ELF>@؞@8 @$"@@@8`o[0_PPRtd8`Ptd===\\Qtd /lib64/ld-linux-x86-64.so.2GNU . J d"s eW{zR$OVq: o    \    =     -cbszU =  H  PE! R    !!8""":###$"$;$N$$$%q%x%%"&S&d&&&&#'@'W'}'''' (=(((N)%*7+b++++$,B,g,,,--I.N.n.....///d001112#2@2]2s222v3330444445I5d5x555556R6666-7w77748:8T8888809p9999:L:::$;O;;$<V<<< =e====*>h>0???@<@l@@@@ A4A\AAAAA%BBCHCCCCD@DnDDDESEEEEEF?FdFFFFG]GGGGHGHHHHH3IfIIIJAJlJJJ4KpKKKHLLLLLMMMBMfMnMKNN&O\OOmPPQQ#RR+SSSTlTTTU-UNUWU^VuVWWDWWWW)XoXXXYWYYYY ZZZ Z)Z0Z7Z>ZEZNZ^ZrZxZZZZZ[[<[[[[[[[[\ \\\\$\0\6\>\g\l\q\w\\\\\\\]]]]!]1]8]G]Q]\]p]}]]]]]]]]]]]^ ^T^[^g^n^{^^^^^^_$___ `@`t```aUaaajaqaxaaaaaaaaaa b b b 8b@bObUb[bcbib""Y6"`N"0" "SG!"p& `*"b "p y"" "` "p;>" YD"[%"L9B! rU"@,;""B!"`&p)"aV"1SB":uM"SB! кM"V$W! "e"p/"['(({"pR  "@!!"& ("6 "p" 2"@`*_"PT     nbPbPbPbPbui ~bui ~bui ~bui ~bii bti bii bii bii bb bii b    `@ (@ 8+ٰyR8F]^(3Y*[CᎬ:Uɍ&BΗq:`0P%pKA-,p^`2Wh>x\?MF-$S1=Dɍ Ļg],L/$b!{Tc__libc_start_main__gmon_start___init_fini_ITM_deregisterTMCloneTable_ITM_registerTMCloneTable__cxa_finalize_Jv_RegisterClasses_ZN6google8protobuf8internal17AssignDescriptorsEPKNS1_15DescriptorTableEb_ZN6google8protobuf8internal26fixed_address_empty_stringE_ZN6google8protobuf8internal16InternalMetadata11DoMergeFromINS0_15UnknownFieldSetEEEvRKT__ZN6google8protobuf8internal14ArenaStringPtr3SetERKNSt3__112basic_stringIcNS3_11char_traitsIcEENS3_9allocatorIcEEEEPNS0_5ArenaE_Znwm_ZN6google8protobuf8internal16InternalMetadata21DeleteOutOfLineHelperINS0_15UnknownFieldSetEEEPNS0_5ArenaEv_ZTVN6google8protobuf11MessageLiteE_ZN6google8protobuf8internal15ThreadSafeArenaD1Ev_ZdlPv_ZN6google8protobuf8internal14ArenaStringPtr7DestroyEv_ZN6google8protobuf8internal14ArenaStringPtr12ClearToEmptyEv_ZN6google8protobuf8internal16InternalMetadata7DoClearINS0_15UnknownFieldSetEEEvv_ZN6google8protobuf8internal10VerifyUTF8ENS0_20stringpiece_internal11StringPieceEPKc_ZN6google8protobuf8internal15ReadTagFallbackEPKcj_ZN6google8protobuf8internal12ParseContext12ParseMessageEPNS0_11MessageLiteEPKc_ZN6google8protobuf8internal14ArenaStringPtr7MutableEPNS0_5ArenaE_ZN6google8protobuf8internal24InlineGreedyStringParserEPNSt3__112basic_stringIcNS2_11char_traitsIcEENS2_9allocatorIcEEEEPKcPNS1_12ParseContextE_ZN6google8protobuf8internal17UnknownFieldParseEmPNS0_15UnknownFieldSetEPKcPNS1_12ParseContextE_ZN6google8protobuf8internal18EpsCopyInputStream12DoneFallbackEii_ZN6google8protobuf20stringpiece_internal11StringPiece18LogFatalSizeTooBigEmPKc_ZN6google8protobuf8internal16InternalMetadata27mutable_unknown_fields_slowINS0_15UnknownFieldSetEEEPT_v_ZN6google8protobuf8internal14WireFormatLite20InternalWriteMessageEiRKNS0_11MessageLiteEiPhPNS0_2io19EpsCopyOutputStreamE_ZN6google8protobuf8internal14WireFormatLite16VerifyUtf8StringEPKciNS2_9OperationES4_memcpy_ZN6google8protobuf2io19EpsCopyOutputStream30WriteStringMaybeAliasedOutlineEjRKNSt3__112basic_stringIcNS3_11char_traitsIcEENS3_9allocatorIcEEEEPh_ZN6google8protobuf8internal10WireFormat37InternalSerializeUnknownFieldsToArrayERKNS0_15UnknownFieldSetEPhPNS0_2io19EpsCopyOutputStreamE_ZNK6google8protobuf7Message29MaybeComputeUnknownFieldsSizeEmPNS0_8internal10CachedSizeE_ZN6google8protobuf8internal17AssignDescriptorsEPFPKNS1_15DescriptorTableEvEPNSt3__19once_flagERKNS0_8MetadataE_ZN6google8protobuf8internal17VarintParseSlow64EPKcj_ZN6google8protobuf2io19EpsCopyOutputStream19EnsureSpaceFallbackEPh_ZN6google8protobuf3Any5ClearEv_ZN6google8protobuf8internal20RepeatedPtrFieldBase14InternalExtendEi_ZN6google8protobuf8internal20RepeatedPtrFieldBase13DestroyProtosEv_ZN6google8protobuf5Arena18CreateMaybeMessageINS0_3AnyEJEEEPT_PS1_DpOT0__ZN6google8protobuf8internal20RepeatedPtrFieldBase18AddOutOfLineHelperEPv_ZNK6google8protobuf3Any12ByteSizeLongEv_ZN6google8protobuf8internal20RepeatedPtrFieldBase18MergeFromInnerLoopINS0_16RepeatedPtrFieldINSt3__112basic_stringIcNS5_11char_traitsIcEENS5_9allocatorIcEEEEE11TypeHandlerEEEvPPvSF_ii_ZN6google8protobuf16RepeatedPtrFieldINSt3__112basic_stringIcNS2_11char_traitsIcEENS2_9allocatorIcEEEEED1Ev_ZN6google8protobuf5Arena26AllocateAlignedWithCleanupEmPKSt9type_info_ZN6google8protobuf8internal21arena_destruct_objectINSt3__112basic_stringIcNS3_11char_traitsIcEENS3_9allocatorIcEEEEEEvPv_ZN6google8protobuf2io19EpsCopyOutputStream18WriteStringOutlineEjRKNSt3__112basic_stringIcNS3_11char_traitsIcEENS3_9allocatorIcEEEEPh_ZN6google8protobuf8internal17VarintParseSlow32EPKcj_ZN6google8protobuf8internal17kGlobalEmptyTableE_ZN6google8protobuf8internal15ThreadSafeArena10AddCleanupEPvPFvS3_E_ZN6google8protobuf8internal12MapFieldBase8DestructEv_ZNK6google8protobuf8internal12MapFieldBase24SyncMapWithRepeatedFieldEv_ZN6google8protobuf8internal12MapFieldBase11SetMapDirtyEv_ZTVN6google8protobuf8internal12MapFieldBaseE_ZN6google8protobuf8internal12ParseContext28ReadSizeAndPushLimitAndDepthEPKcPi__stack_chk_fail_ZdaPv_Znam_ZN6google8protobuf8internal12MapFieldBase12InternalSwapEPS2__ZN6google8protobuf9TimestampD1Ev_ZN6google8protobuf9TimestampC1ERKS1__ZN6google8protobuf5Arena18CreateMaybeMessageINS0_9TimestampEJEEEPT_PS1_DpOT0__ZNK6google8protobuf9Timestamp12ByteSizeLongEv_ZN6google8protobuf28_Timestamp_default_instance_E_ZN6google8protobuf9Timestamp9MergeImplERNS0_7MessageERKS2__ZN6google8protobuf5Arena23AllocateAlignedWithHookEmPKSt9type_info_ZN6google8protobuf11MessageLite25OnDemandRegisterArenaDtorEPNS0_5ArenaE_ZN6google8protobuf8internal17UnknownFieldParseEjPNSt3__112basic_stringIcNS2_11char_traitsIcEENS2_9allocatorIcEEEEPKcPNS1_12ParseContextE_ZN6google8protobuf8internal16ReadSizeFallbackEPKcj_ZN6google8protobuf8internal18EpsCopyInputStream18ReadStringFallbackEPKciPNSt3__112basic_stringIcNS5_11char_traitsIcEENS5_9allocatorIcEEEE_ZNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEE17__assign_externalEPKcm_ZNK6google8protobuf7Message13SpaceUsedLongEv_ZNK6google8protobuf7Message12GetClassDataEv_ZNK6google8protobuf6MapKey14GetStringValueEv_ZNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEE25__init_copy_ctor_externalEPKcm_ZN6google8protobuf8internal12MapFieldBase4SwapEPS2__ZN6google8protobuf8internal16SpaceUsedInTableINSt3__112basic_stringIcNS3_11char_traitsIcEENS3_9allocatorIcEEEEEEmPPvmmm_ZN6google8protobuf8internal21arena_destruct_objectINS0_16RepeatedPtrFieldINS0_7MessageEEEEEvPv_ZNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEE17__assign_no_aliasILb0EEERS5_PKcm_ZNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEE17__assign_no_aliasILb1EEERS5_PKcm_ZN6google8protobuf8internal10LogMessageC1ENS0_8LogLevelEPKci_ZN6google8protobuf8internal10LogMessagelsEPKc_ZN6google8protobuf8internal11LogFinisheraSERNS1_10LogMessageE_ZN6google8protobuf8internal10LogMessageD1Ev_ZNK6google8protobuf6MapKey4typeEv_ZN6google8protobuf15UnknownFieldSet13ClearFallbackEv_ZN6google8protobuf15UnknownFieldSet9MergeFromERKS1__ZN6google8protobuf8internal21arena_destruct_objectINS1_16InternalMetadata9ContainerINS0_15UnknownFieldSetEEEEEvPv_ZN6google8protobuf3Any9MergeImplERNS0_7MessageERKS2_memmove_ZNSt3__122__libcpp_verbose_abortEPKcz_ZN6google8protobuf5Arena31AllocateAlignedWithHookForArrayEmPKSt9type_info_ZNSt3__16__treeINS_12__value_typeINS_17reference_wrapperIKNS_12basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEEEEEPvEENS_19__map_value_compareISA_SC_N6google8protobuf8internal18TransparentSupportIS8_E4lessELb1EEENSG_12MapAllocatorISC_EEE25__emplace_unique_key_argsISA_JNS_4pairIKSA_SB_EEEEENSP_INS_15__tree_iteratorISC_PNS_11__tree_nodeISC_SB_EElEEbEERKT_DpOT0__ZNSt3__16__treeINS_12__value_typeINS_17reference_wrapperIKNS_12basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEEEEEPvEENS_19__map_value_compareISA_SC_N6google8protobuf8internal18TransparentSupportIS8_E4lessELb1EEENSG_12MapAllocatorISC_EEE4findIS8_EENS_15__tree_iteratorISC_PNS_11__tree_nodeISC_SB_EElEERKT_bcmpmemcmpmemset_ZN6google8protobuf8internal21arena_destruct_objectINSt3__13mapINS3_17reference_wrapperIKNS3_12basic_stringIcNS3_11char_traitsIcEENS3_9allocatorIcEEEEEEPvNS1_18TransparentSupportISB_E4lessENS1_12MapAllocatorINS3_4pairIKSD_SE_EEEEEEEEvSE__ZNSt3__16__treeINS_12__value_typeINS_17reference_wrapperIKNS_12basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEEEEEPvEENS_19__map_value_compareISA_SC_N6google8protobuf8internal18TransparentSupportIS8_E4lessELb1EEENSG_12MapAllocatorISC_EEE12__find_equalISA_EERPNS_16__tree_node_baseISB_EERPNS_15__tree_end_nodeISR_EERKT__ZN6google8protobuf8internal11GenericSwapEPNS0_7MessageES3__ZN6google8protobuf15FieldDescriptor14kCppTypeToNameE_ZN6google8protobuf8internal32StringSpaceUsedExcludingSelfLongERKNSt3__112basic_stringIcNS2_11char_traitsIcEENS2_9allocatorIcEEEE_ZN6google8protobuf8internal19arena_delete_objectINS0_11MessageLiteEEEvPv_ZN6google8protobuf8internal20RepeatedPtrFieldBase7ReserveEi_ZN6google8protobuf8internal20AddDescriptorsRunnerC1EPKNS1_15DescriptorTableE_ZNK6google8protobuf7Message11GetTypeNameEv_ZNK6google8protobuf7Message25InitializationErrorStringEv_ZN6google8protobuf7Message21CheckTypeAndMergeFromERKNS0_11MessageLiteE_ZN6google8protobuf7Message9MergeFromERKS1__ZNK6google8protobuf7Message13SetCachedSizeEidescriptor_table_google_2fprotobuf_2fany_2eprotodescriptor_table_google_2fprotobuf_2ftimestamp_2eproto_ZN6google8protobuf7Message19CopyWithSourceCheckERS1_RKS1___cxa_pure_virtual_ZN6google8protobuf8internal14ZeroFieldsBaseD2Ev_ZNK6google8protobuf8internal14ZeroFieldsBase13IsInitializedEv_ZNK6google8protobuf8internal14ZeroFieldsBase13GetCachedSizeEv_ZNK6google8protobuf8internal14ZeroFieldsBase13SetCachedSizeEi_ZN6google8protobuf8internal14ZeroFieldsBase5ClearEv_ZNK6google8protobuf8internal14ZeroFieldsBase12ByteSizeLongEv_ZN6google8protobuf8internal14ZeroFieldsBase14_InternalParseEPKcPNS1_12ParseContextE_ZNK6google8protobuf8internal14ZeroFieldsBase18_InternalSerializeEPhPNS0_2io19EpsCopyOutputStreamE_ZN6google8protobuf8internal14ZeroFieldsBase8CopyImplERNS0_7MessageERKS3__ZN6google8protobuf8internal14ZeroFieldsBase9MergeImplERNS0_7MessageERKS3__ZNSt3__15mutex4lockEv_ZNK4llvm5Regex5matchENS_9StringRefEPNS_15SmallVectorImplIS1_EEPNSt3__112basic_stringIcNS5_11char_traitsIcEENS5_9allocatorIcEEEEfree_ZNSt3__15mutex6unlockEv_ZN4llvm5RegexD1Ev_ZNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEE6insertEmPKcm_ZNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEE6appendEPKc_ZN4llvm5RegexC1ENS_9StringRefENS0_10RegexFlagsE_ZNK4llvm5Regex7isValidERNSt3__112basic_stringIcNS1_11char_traitsIcEENS1_9allocatorIcEEEEstderrstrlen_ZNSt3__16vectorINS_12basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEEENS4_IS6_EEE21__push_back_slow_pathIRKS6_EEPS6_OT__ZNK5clang15DirectoryLookup7getNameEv_ZN7android4base9ShouldLogENS0_11LogSeverityEPKc__errno_location_ZN7android4base10LogMessageC1EPKcjNS0_11LogSeverityES3_i_ZN7android4base10LogMessage6streamEv_ZN7android4base10LogMessageD1Ev_ZNSt3__113basic_ostreamIcNS_11char_traitsIcEEE6sentryC1ERS3__ZNKSt3__18ios_base6getlocEv_ZNSt3__15ctypeIcE2idE_ZNKSt3__16locale9use_facetERNS0_2idE_ZNSt3__16localeD1Ev_ZNSt3__18ios_base5clearEj_ZNSt3__113basic_ostreamIcNS_11char_traitsIcEEE6sentryD1Ev_ZN4llvm24DisableABIBreakingChecksE_ZNK6google8protobuf7Message16ShortDebugStringEv_ZN6google8protobuf8internal14ArenaStringPtr3SetEONSt3__112basic_stringIcNS3_11char_traitsIcEENS3_9allocatorIcEEEEPNS0_5ArenaE_ZNK6google8protobuf8internal11AnyMetadata8UnpackToEPNS0_7MessageE_ZN6google8protobuf8internal11AnyMetadata8PackFromEPNS0_5ArenaERKNS0_7MessageE_ZN4llvm3vfs17getRealFileSystemEv_ZNSt3__16__treeINS_12basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEEENS_4lessIS6_EENS4_IS6_EEE12__find_equalIS6_EERPNS_16__tree_node_baseIPvEERPNS_15__tree_end_nodeISF_EERKT__ZN4llvm3vfs10FileSystem16getBufferForFileERKNS_5TwineElbb_ZNSt3__16__treeINS_12basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEEENS_4lessIS6_EENS4_IS6_EEE25__emplace_unique_key_argsIS6_JRKS6_EEENS_4pairINS_15__tree_iteratorIS6_PNS_11__tree_nodeIS6_PvEElEEbEERKT_DpOT0__ZN4llvm15SmallVectorBaseImE8grow_podEPvmm_ZN4llvm3sys2fs13make_absoluteERNS_15SmallVectorImplIcEE_ZNKSt3__110error_code7messageEvmemchr_ZNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEE21__grow_by_and_replaceEmmmmmmPKc_ZN5clang14FrontendActionC2Ev_ZN4llvm3vfs18InMemoryFileSystemC1Eb_ZN4llvm3sys4path6appendERNS_15SmallVectorImplIcEERKNS_5TwineES7_S7_S7__ZN4llvm12MemoryBuffer12getMemBufferENS_9StringRefES1_b_ZN4llvm3vfs18InMemoryFileSystem7addFileERKNS_5TwineElNSt3__110unique_ptrINS_12MemoryBufferENS5_14default_deleteIS7_EEEENS5_8optionalIjEESC_NSB_INS_3sys2fs9file_typeEEENSB_INSE_5permsEEE_ZN4llvm3vfs17OverlayFileSystemC1ENS_18IntrusiveRefCntPtrINS0_10FileSystemEEE_ZN4llvm3vfs17OverlayFileSystem11pushOverlayENS_18IntrusiveRefCntPtrINS0_10FileSystemEEEexitLLVMInitializeAArch64TargetInfoLLVMInitializeARMTargetInfoLLVMInitializeBPFTargetInfoLLVMInitializeRISCVTargetInfoLLVMInitializeWebAssemblyTargetInfoLLVMInitializeX86TargetInfo_ZN5clang7tooling30addTargetAndModeForProgramNameERNSt3__16vectorINS1_12basic_stringIcNS1_11char_traitsIcEENS1_9allocatorIcEEEENS6_IS8_EEEEN4llvm9StringRefEgetenv_ZNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEE17__assign_externalEPKc_ZN5clang11FileManagerC1ERKNS_17FileSystemOptionsEN4llvm18IntrusiveRefCntPtrINS4_3vfs10FileSystemEEE_ZN5clang22PCHContainerOperationsC1Ev_ZN5clang7tooling14ToolInvocationC1ENSt3__16vectorINS2_12basic_stringIcNS2_11char_traitsIcEENS2_9allocatorIcEEEENS7_IS9_EEEENS2_10unique_ptrINS_14FrontendActionENS2_14default_deleteISD_EEEEPNS_11FileManagerENS2_10shared_ptrINS_22PCHContainerOperationsEEE_ZNSt3__119__shared_weak_count14__release_weakEv_ZN5clang7tooling14ToolInvocation3runEv_ZN5clang7tooling14ToolInvocationD1Ev_ZN5clang11FileManagerD1Ev_ZN4llvm3vfs10FileSystemD2Ev_ZN4llvm3vfs10FileSystem2IDE_ZN4llvm8RTTIRoot2IDE_ZNK4llvm3vfs6Status11isDirectoryEv_ZNK4llvm5Twine3strEv_ZN4llvm11raw_ostream5writeEPKcm_ZNSt3__16__treeINS_12basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEEENS_4lessIS6_EENS4_IS6_EEE4findIS6_EENS_15__tree_iteratorIS6_PNS_11__tree_nodeIS6_PvEElEERKT__ZNSt3__113basic_ostreamIcNS_11char_traitsIcEEElsEi_ZNSt3__113basic_ostreamIcNS_11char_traitsIcEEElsEm_ZN5clang17DiagnosticsEngine11setSeverityEjNS_4diag8SeverityENS_14SourceLocationE_ZN5clang12Preprocessor16AddPragmaHandlerEN4llvm9StringRefEPNS_13PragmaHandlerE_ZTVN5clang18PPChainedCallbacksE_ZN5clang12Preprocessor19EnterMainSourceFileEv_ZN5clang12Preprocessor3LexERNS_5TokenE_ZN5clang14FrontendActionD2Ev_ZN4llvm15SmallVectorBaseIjE13mallocForGrowEPvmmRm_ZN5clang11PPCallbacksD2Ev__cxa_guard_acquire__cxa_guard_release_ZNSt3__113basic_ostreamIcNS_11char_traitsIcEEElsEPKv_ZN5clang14MacroDirective13getDefinitionEv_ZNK5clang13SourceManager23getExpansionLocSlowCaseENS_14SourceLocationE_ZN5clang13PragmaHandlerD2Ev_ZN5clang11FileManager12getDirectoryEN4llvm9StringRefEb_ZN5clang11FileManager7getFileEN4llvm9StringRefEbb_ZN4llvm3sys4path11is_absoluteERKNS_5TwineENS1_5StyleE_ZN4llvm3sys4path11parent_pathENS_9StringRefENS1_5StyleE_ZN5clang13SourceManager28getMemoryBufferForFileOrNoneENS_12FileEntryRefE_ZNK5clang13SourceManager24getFakeBufferForRecoveryEv_ZNK6google8protobuf7Message11DebugStringEv_ZNSt3__1plIcNS_11char_traitsIcEENS_9allocatorIcEEEENS_12basic_stringIT_T0_T1_EEPKS6_RKS9_ceilf_ZNSt3__112__next_primeEm_ZNK5clang13SourceManager13loadSLocEntryEjPb_ZN4llvm15allocate_bufferEmm_ZN4llvm15SmallVectorBaseIjE8grow_podEPvmm_ZNK5clang13SourceManager13getFileIDSlowEj_ZNK5clang13SourceManager11getFilenameENS_14SourceLocationE_ZNK5clang14SourceLocation13printToStringERKNS_13SourceManagerE_ZNSt3__119__shared_weak_countD2Ev_ZN4llvm17deallocate_bufferEPvmm_ZN4llvm8RTTIRoot6anchorEv_ZNK4llvm3vfs10FileSystem11getRealPathERKNS_5TwineERNS_15SmallVectorImplIcEE_ZN4llvm3vfs10FileSystem7isLocalERKNS_5TwineERb_ZNK4llvm3vfs10FileSystem12makeAbsoluteERNS_15SmallVectorImplIcEE_ZN5clang26PreprocessorFrontendAction17CreateASTConsumerERNS_16CompilerInstanceEN4llvm9StringRefE_ZN5clang14FrontendAction22shouldEraseOutputFilesEv_ZN5clang14FrontendAction13EndSourceFileEv_ZN5clang11PPCallbacks10HasIncludeENS_14SourceLocationEN4llvm9StringRefEbNS_20CustomizableOptionalINS_12FileEntryRefEEENS_6SrcMgr18CharacteristicKindE_ZNKSt3__119__shared_weak_count13__get_deleterERKSt9type_info_ZN6google8protobuf8internal13VerifyVersionEiiPKc_ZN6google8protobuf23ShutdownProtobufLibraryEv_ZN5clang12Preprocessor13LexHeaderNameERNS_5TokenEb_ZNK5clang12Preprocessor11getSpellingERKNS_5TokenERN4llvm15SmallVectorImplIcEEPb_ZN5clang12Preprocessor26GetIncludeFilenameSpellingENS_14SourceLocationERN4llvm9StringRefE_ZN5clang12Preprocessor19CheckEndOfDirectiveEPKcb_ZN4llvm13StringMapImpl4hashENS_9StringRefE_ZNK4llvm13StringMapImpl7FindKeyENS_9StringRefEj_ZN5clang12Preprocessor26DiscardUntilEndOfDirectiveEv_ZN4llvm3sys4path6nativeERNS_15SmallVectorImplIcEENS1_5StyleE_ZN5clang12Preprocessor10LookupFileENS_14SourceLocationEN4llvm9StringRefEbNS_6detail21SearchDirIteratorImplILb1EEEPKNS_9FileEntryEPS6_PNS2_15SmallVectorImplIcEESD_PNS_9ModuleMap11KnownHeaderEPbSH_bbb_ZN5clang5Lexer19getLocForEndOfTokenENS_14SourceLocationEjRKNS_13SourceManagerERKNS_11LangOptionsE_ZNK5clang7VarDecl29getTemplateSpecializationKindEv_ZNK5clang12FunctionDecl29getTemplateSpecializationInfoEv_ZNK5clang13CXXRecordDecl29getTemplateSpecializationKindEv_ZN5clang11ASTNodeKind11getFromNodeERKNS_4DeclE_ZNK5clang12FunctionDecl27getMemberSpecializationInfoEv_ZNK5clang4Decl8getAttrsEv_ZNK5clang11DeclContext11decls_beginEv_ZNK5clang12CapturedDecl7getBodyEv_ZN5clang11DeclContext7classofEPKNS_4DeclE_ZN5clang4Decl17castToDeclContextEPKS0__ZN5clang11OMPChildren11getChildrenEv_ZN5clang7VarDecl7getInitEv_ZNK5clang11ParmVarDecl13hasDefaultArgEv_ZN5clang11ParmVarDecl27getUninstantiatedDefaultArgEv_ZN5clang11ParmVarDecl13getDefaultArgEv_ZNK5clang9FieldDecl21getInClassInitializerEv_ZGVZNK5clang24TemplateTemplateParmDecl18getDefaultArgumentEvE7NoneLoc_ZZNK5clang24TemplateTemplateParmDecl18getDefaultArgumentEvE7NoneLoc_ZNK5clang15VarTemplateDecl18getSpecializationsEv_ZNK5clang20FunctionTemplateDecl18getSpecializationsEv_ZNK5clang12FunctionDecl29getTemplateSpecializationKindEv_ZNK5clang17ClassTemplateDecl18getSpecializationsEv_ZNK5clang4Type27getUnqualifiedDesugaredTypeEv_ZNK5clang19NestedNameSpecifier7getKindEv_ZNK5clang22NestedNameSpecifierLoc10getTypeLocEv_ZN5clang12TemplateNameC1EPNS_12TemplateDeclE_ZNK5clang12TemplateName26getAsDependentTemplateNameEv_ZNK5clang12TemplateName26getAsQualifiedTemplateNameEv_ZN5clang7TypeLoc24getLocalAlignmentForTypeENS_8QualTypeE_ZNK5clang29SubstTemplateTypeParmPackType15getArgumentPackEv_ZNK5clang19DependentBitIntType14getNumBitsExprEv_ZN5clang12TemplateNameC1EPv_ZN5clang11ASTNodeKind11getFromNodeERKNS_4StmtE_ZN5clang4Stmt8childrenEv_ZNK5clang16StmtIteratorBase11GetDeclExprEv_ZN5clang16StmtIteratorBase8NextDeclEb_ZN5clang16StmtIteratorBase6NextVAEv_ZNK5clang10LambdaExpr13capture_beginEv_ZNK5clang10LambdaExpr13isInitCaptureEPKNS_13LambdaCaptureE_ZNK5clang10LambdaExpr14getLambdaClassEv_ZNK5clang4Decl13getASTContextEv_ZNK5clang10ASTContext20getObjCInterfaceTypeEPKNS_17ObjCInterfaceDeclEPS1__ZN5clang17CXXDefaultArgExpr7getExprEv_ZN5clang12CapturedStmt15getCapturedDeclEv_ZN5clang18CXXDefaultInitExpr7getExprEv_ZNK5clang23OMPUsesAllocatorsClause16getAllocatorDataEj_ZNK5clang17ObjCInterfaceDecl22LoadExternalDefinitionEv_ZNK5clang12FunctionDecl30getDependentSpecializationInfoEv_ZNK5clang12FunctionDecl12getNumParamsEv_ZNK5clang18CXXConstructorDecl10init_beginEv_ZNK5clang4Decl15hasDefiningAttrEv_ZN5clang4Decl19castFromDeclContextEPKNS_11DeclContextE_ZNK5clang13CXXRecordDecl21getLambdaCallOperatorEv_ZNK5clang13CXXRecordDecl14DefinitionData16getBasesSlowCaseEv_ZNK5clang10RecordDecl19isInjectedClassNameEv_ZNK5clang13SourceManager16getCharacterDataENS_14SourceLocationEPb_ZNK4llvm9StringRef17find_first_not_ofES0_m_ZNK4llvm9StringRef16find_last_not_ofES0_m_ZN5clang5Lexer11getSpellingERKNS_5TokenERKNS_13SourceManagerERKNS_11LangOptionsEPb_ZNK5clang9MacroArgs16getUnexpArgumentEj_ZNK5clang9MacroArgs20ArgNeedsPreexpansionEPKNS_5TokenERNS_12PreprocessorE_ZN5clang9MacroArgs17getPreExpArgumentEjRNS_12PreprocessorE_ZN5clang9MacroArgs12getArgLengthEPKNS_5TokenE_ZN5clang5Lexer13getSourceTextENS_15CharSourceRangeERKNS_13SourceManagerERKNS_11LangOptionsEPb_ZNK5clang12Preprocessor25updateOutOfDateIdentifierERNS_14IdentifierInfoE_ZN5clang12Preprocessor21updateModuleMacroInfoEPKNS_14IdentifierInfoERNS0_15ModuleMacroInfoEfopenfseekftell_ZNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEE6appendEmcfreadfclose_ZNSt3__113basic_ostreamIcNS_11char_traitsIcEEElsEl_ZNSt3__111__call_onceERVmPvPFvS2_Ereallocmalloc_ZNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEEC2B8nn190000ILi0EEEPKc_ZN6google8protobuf20stringpiece_internal11StringPieceC2INSt3__19allocatorIcEEEERKNS4_12basic_stringIcNS4_11char_traitsIcEET_EE_ZNK6google8protobuf11MessageLite17SerializeAsStringEv_ZN6google8protobuf4util18BinaryToJsonStringEPNS1_12TypeResolverERKNSt3__112basic_stringIcNS4_11char_traitsIcEENS4_9allocatorIcEEEESC_PSA_RKNS1_16JsonPrintOptionsE_ZN6google8protobuf14DescriptorPool14generated_poolEv_ZNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEEaSERKS5__ZN6google8protobuf4util18JsonToBinaryStringEPNS1_12TypeResolverERKNSt3__112basic_stringIcNS4_11char_traitsIcEENS4_9allocatorIcEEEENS0_20stringpiece_internal11StringPieceEPSA_RKNS1_16JsonParseOptionsE_ZNK6google8protobuf4util15status_internal6Status8ToStringEv_ZN6google8protobuf11MessageLite15ParseFromStringERKNSt3__112basic_stringIcNS2_11char_traitsIcEENS2_9allocatorIcEEEE_ZN6google8protobuf4util32NewTypeResolverForDescriptorPoolERKNSt3__112basic_stringIcNS2_11char_traitsIcEENS2_9allocatorIcEEEEPKNS0_14DescriptorPoolE_ZN6google8protobuf2io18StringOutputStreamC1EPNSt3__112basic_stringIcNS3_11char_traitsIcEENS3_9allocatorIcEEEE_ZN6google8protobuf4util18JsonToBinaryStreamEPNS1_12TypeResolverERKNSt3__112basic_stringIcNS4_11char_traitsIcEENS4_9allocatorIcEEEEPNS0_2io19ZeroCopyInputStreamEPNSD_20ZeroCopyOutputStreamERKNS1_16JsonParseOptionsE_ZN6google8protobuf2io16ArrayInputStreamC1EPKvii_ZNK6google8protobuf11MessageLite17SerializeToStringEPNSt3__112basic_stringIcNS2_11char_traitsIcEENS2_9allocatorIcEEEE_ZN6google8protobuf2io16CodedInputStream24default_recursion_limit_E_ZN6google8protobuf2io16CodedInputStream7RefreshEv_ZN6google8protobuf11MessageLite20ParseFromCodedStreamEPNS0_2io16CodedInputStreamE_ZN6google8protobuf2io16CodedInputStreamD1Ev_ZN9ZipWriter18StartEntryWithTimeENSt3__117basic_string_viewIcNS0_11char_traitsIcEEEEml_ZN9ZipWriter10WriteBytesEPKvm_ZN9ZipWriter11FinishEntryEv_ZN9ZipWriter15ErrorCodeStringEistrerror_ZN9ZipWriterC1EP8_IO_FILE_ZNSt3__112__hash_tableINS_12basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEEENS_4hashIS6_EENS_8equal_toIS6_EENS4_IS6_EEE25__emplace_unique_key_argsIS6_JRKS6_EEENS_4pairINS_15__hash_iteratorIPNS_11__hash_nodeIS6_PvEEEEbEERKT_DpOT0__ZN9ZipWriter6FinishEvstrcasecmp_ZNSt3__112__hash_tableINS_12basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEEENS_4hashIS6_EENS_8equal_toIS6_EENS4_IS6_EEE11__do_rehashILb1EEEvmrealpathgetcwd_ZNSt3__119piecewise_constructE_ZTVNSt3__119basic_ostringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE_ZTTNSt3__119basic_ostringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE_ZNSt3__18ios_base4initEPv_ZNSt3__115basic_streambufIcNS_11char_traitsIcEEEC2Ev_ZTVNSt3__115basic_stringbufIcNS_11char_traitsIcEENS_9allocatorIcEEEE_ZNSt3__115basic_streambufIcNS_11char_traitsIcEEED2Ev_ZNSt3__113basic_ostreamIcNS_11char_traitsIcEEED2Ev_ZNSt3__19basic_iosIcNS_11char_traitsIcEEED2Ev_ZNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEE7replaceEmmPKcm_ZNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEE9push_backEc_ZNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEE7reserveEm__ctype_b_loc__memmove_chkstrtolstrtoulstrtollstrtoullstrtofstrtodfwritestrchrsnprintf__vsnprintf_chkpthread_rwlock_initabortpthread_rwlock_destroy_ZNSt3__16__sortIRNS_6__lessIiiEEPiEEvT0_S5_T_pthread_rwlock_wrlockpthread_rwlock_unlockpthread_rwlock_rdlockmemrchr_ZNSt3__113basic_ostreamIcNS_11char_traitsIcEEElsEt_ZNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEE6appendEPKcm_ZNSt3__113basic_ostreamIcNS_11char_traitsIcEEE5writeEPKcl__tls_get_addrsigemptysetsigactionsched_getcpualarmsignalnanosleeptimeraisesysconfmmapsigaltstackprctlsyscall_ZNSt3__16thread20hardware_concurrencyEvopenreadcloseclock_gettime_ZNSt3__19to_stringEi_ZNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEED2Evgetpagesizestrdupstrnlenpreadstrcmpstrncpygetpidsigfillsetpthread_sigmaskmunmap__cxa_demanglegetauxvalstrerror_r_ZNSt9exceptionD2Ev__memcpy_chkfrexpnannanflog2rintwmemchrwcslenldexp_ZNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEE5eraseEmmfrexplldexplldexpf_ZNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEE6insertEmmcferrorsched_yieldstrcpypthread_selfpthread_getschedparampthread_setspecificpthread_key_createpthread_getspecific_ZNSt3__16chrono12steady_clock3nowEv_ZNSt3__16chrono12system_clock11from_time_tElmodfround_ZNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEE26__erase_external_with_moveEmm_ZNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEE6appendERKS5_mm_ZTVNSt3__114basic_ifstreamIcNS_11char_traitsIcEEEE_ZTTNSt3__114basic_ifstreamIcNS_11char_traitsIcEEEE_ZNSt3__113basic_filebufIcNS_11char_traitsIcEEEC1Ev_ZNSt3__113basic_filebufIcNS_11char_traitsIcEEE4openEPKcj_ZNSt3__113basic_filebufIcNS_11char_traitsIcEEED1Ev_ZNSt3__113basic_istreamIcNS_11char_traitsIcEEED2Ev_ZNSt3__113basic_istreamIcNS_11char_traitsIcEEE6sentryC1ERS3_blocaltime_rgmtime_rmktimeperror__memset_chkmadvise__fprintf_chkfflush__isoc99_sscanfpthread_oncepthread_mutex_lockpthread_mutex_unlockcallocOPENSSL_memory_allocOPENSSL_memory_freesdallocxOPENSSL_memory_get_sizebsearch__snprintf_chkfputsqsortfopen64fgetsfeoflibpthread.so.0GLIBC_2.2.5libm.so.6librt.so.1libc.so.6GLIBC_2.3GLIBC_2.3.4GLIBC_2.4GLIBC_2.6GLIBC_2.7GLIBC_2.14GLIBC_2.16ld-linux-x86-64.so.2$ORIGIN/:$ORIGIN/lib64:$ORIGIN/../lib64:$ORIGIN/../../lib64:$ORIGIN/../../../lib64libprotobuf-cpp-full.solibbase.solibclang-cpp.solibziparchive.solibc++.solibdl.so.2libgcc_s.so.1`@P (P0@8`@X@`h@p@`` (8@H` ` hpx`0  0P` 8@0H`hxP``@ p%% `&(>@7HpP )X``1xP& @`>@ AADCA`CA D( D@HP X0`@hxP` E0@Pp @(08P@H`P `p Fx0@PpIpJJUQK`pMJ 0W(U@@HP X`hx`PW0@PZ[\Pk0g8@^H`Pbh[plxkmpn n sq0 o`pn`sr0t8uH@PuX{pxx0@v`xu{@z0}}P@~` ~(`00H0~PX0p@x@`p ЋP``(p0 8PXP` h`p0`0``М(p080PXPhpУx`@`ЪP (Pp (ؠ08@ȡHPX `hp8x`  ;CT i0pq@yP`pp`@@`0 (08@HPX`xp`@@`0 (0H PX`Phpx`P@P  @ ( 008@@PX`Php`x 0@P 0@P `( P0X@`Pxp @P` 0@P(08 @H@P`hpPx` 0@P@  (0`8PH`0h@pP `P0@ P8c@dPmX0d`lxj0d`h dmPlxPko p  p Pv0 t8 `@ pH `P  sh pp vx u Pw w   x `} |  Px ` `z x } |0 8 H P X @p  x `  `  p    p       p ( `0 @ P X 0` @h  p `   @   @ `    p  ( 8 @ H @` h Pp x `      p   P p `  ` Ш8 @ H ` 0h x    ` @ 0 0 `  Э  `0 (@pH0PX``0xи`@0p` (p@H`XP`h@`0`` (p08`@pX` h``P0 @(8@0H@`hppx`P 0Px8pȧh ب(8@uH|Xhx0p (08@`H@ PX`@h px  (p08`@Ph0p@xP p`0 @(@7H8Xf`8hp?=`8`;8?>@`AfAH F(P0B8`@0EXA`PIhH`JJgPKP@OK`N@K0Q P @R(R8g@0SHW``Vh0ppSx``U S X0WY`Z gZpc0`0gp\` 0_8Z@cH0cPX`hp0>GOVaklPl`r p(0l8`@0oXl`rhq@tt`u@}@zu`xt}p| ~(`8p@H`ph0p0x`Є8 q{(08PX`h`ppЦ@0g`P(08@ PȫXhPx@%Ъ@00` 08 H%PXp0x0`м@%00 (`0H PpXpx %P0` (&08PX```h`pP``&@pP  ` 0( 00 8 0P @X h  &p x   P  `    P 0&!P&!`&8!p&H!`h!&p!x!!@!!&!!`!!0!!`!"""0"p8"H"&P"@X"p"`x"`"`"`"0"0"""`""&"0"P #`#0 #P(#`0#H# P# X#p#@ x# #&# #P#0#`#0 #`# # ##$$p($&0$8$P$X$``$ h$`p$$$0$@$`$$&$p$$$0%%`%p(%`0% 8%@%H%P%X%H`%xh%p%حx%%(%X%%%Ȯ%%%%%%&&P8&H&X&h&x&`&0&a&@e&f&pi&j&f&v&ŕ' '('0'8'H'P'X'e`'h'ep'x''e'e'0''0'^'^'e''^'^'^'(^(^(^(^ (^((^0(^8(^@(^H(^P(^X(^`(^h(^p(^x(^(^(^((P(P((^(((@(^(^(p (^(^)^)^() 0) 8)eX) `) h)e)^)9):)@:)`:)p:):)V)W)W) \)p\)\*\*\*] *^(*^0*^P*^X*^p*x****q*P*lb*Ԝ*G(#*-*rY*f*6f*"*Y* *hV+Ea+G+s+T +"(+0+,8+ug@+p#H+/ P+^X+]`+h+#p+x++X+Ԝ+QN+lZ+p#+Œ+++vg+"/+4,+P+y++mZ+g,ߺ,c,$, ;x ,(,0,G,8,p#@,us#H,kNP,MX,0,`,$h,:=p,ٜx,-,g,d,=x,e,,=x,0Q,>Y,ea,,$,y,ԤY,Ya,%$,Z,-ߺ-ޮ3Z"3`+3^3 3,(3#03283h@3^H3P3VM,X3ӕV`3-!fh3bM,p3 x3,3^3ZY3f9t9 9)b98!#99xf9S99#9Ԩ :0(:P0:@:X:`:0h:pp:p:0:P :p :0(:p):P_:_:W::::;`; ;(;0;p8;@;`P;Sp`;P;;P;a;T;V;T<;v <`@<P<`p<>v<t<d<t< M<==0=ќ@=`=u%#p==loY====aa> >0>P>`>>8x>>8x>>>?#$ ?@?7fP?p?ܺ??Bf???@-@0@a@@`@Ap@@a@Ƞ@V@Ƞ@MAР AHv0AРPA`AA5AA 6A6ATAVARATB hBBphBh Bh0B@BPB`B0pBPBpBBBܺB¯BMBܺBBYBBpBeBќCӯC8xCܺC0C8C@CHCPC`XCpCxCC`CC`C C CCpCC`C C hCDphD@D0D@0D h8D@D HD@PDXD pDxDDD`D`DeDDDUMD E E80E]@E@PE`EpEE EE%#EEabFF F@FPF`F F,F0FFpFZMFG,0G@G4ݺPGpGdGGpGG3GGHKVHP0H.PHX`HoYH`HHpHHHHAI IcM0IPIoY`II5II IҜI0IIHIJo J @Jaa`JpJS$J0J6/J`J,JKK0K%#PK`KBKK5KK?/KKHqL L8x0LpPLhM`LpLLJLL,LLfLPMM<ݺ M@Mp`M@pMYMpMG/MMM9xMNp NP0Nv@N``NgbNpNK/NN,NNNOO O0O@O0POQpOpOQqOOsMOOnbPPg P@PmPP`P pP-PPbaP P-P`Qub Qp0QfPQ`Q|bpQQQ%#QPQEݺQQKR R`0R0PR[$pRPRR/RpR9xRRhqRS S|M S00S@SrPS0`SpS 9xSPSҜSSS baSTp T0T_V@TPT`T~bTTX,TTvTTfU U`,@UPUbpUUfU0Ud$UpU0UUVKݺ0V@VRPV pVMV VV0VuVHWo WP0W%#@WpPW `WYW WʘW W( WҜW0 Wg$XP Xv0Xx PX_`X pXX,XpXo$XX X~XYf0Y@Y%#PYpYMYYYoYY@YYbYZ9xZ0Z@Z`ZMZZnZ0Z,Z@Z[H [@[PP[!p[`[[h[r$[p[aV[\ 0\@\b`\p\Ԙ\\\\p\\] ]0]@]`]Wݺp]]p] ]a]@]^h ^-@^pP^֘`^^gV^^^ba^0^oY_`_8 _x@_MP_p_jV__!__($__0$`M`@$ `$0`tP`$``,`$`Y`$`"Ҝ`$`Ya$a% ay0a %@a%Pa`a%pa&aaa&a(a8a*a%#b*blV0b*@bPb*pb pYb*b[/b*b -b*c|c*0ckq@c*`cYpc+cac+c-c +cd0+ dY@dp+Pd`d+d@d+drqd+dwqd+e.Ҝ e+@e,`e,pek,e,eqe ,etVe0,f7Ҝf@,0f:Ҝ@fP,`f٘fp,ffx,fqf|,f,g, g/(g}q8g@gPgVXg-hg!/pggG$gMg7gV9xgpYgag9gg9 gg;hhbah; (hV0hq@hHhMXh`hph,xhI$hfhahhhvhbh/h/h=h@ iii/a iY0i8iMHiPiK `i%#hi%#xi,i,iii2aiipYi,i5ai?i,iq jba(j[8j8a@jqPjXjHhjpjjfjQ$jjSMjjpYjbajjjqjvkQ$kqkSM(kV0k@kHk,Xk%#`kR$pkOxkpYkd9xkќkbkvkMk¬kMkZ k] kYllYlv lq0l8lvHl_Pl `lqhl,xlYllĬll;alg9xlj9xlll/lݺl"mT$mdb mҜ(m>a8mq@m-PmvXmhmfpmvmVmYmVmpYmYmmfmmRmmݺnYnnv(nM0nba@nbHnbaXn`n` pnݺxnO$nnŐnbnŐnMn-nno/oOov opY0o 8oݺHoaPoҜ`o#hoMoooo`ooop p(pЫ0p8p@pHpPp hp^ppxpppp`pppp޺qq(q8qHq-Xq-hqZ /xqqqwqq}caq:xqxrB r7@rV`rpYrVrVrrPrvrysss0sp0s^8s`@spHsPsXs`sxspsss0spss'bsEssss0s@s{fsgt,xt$ t(tЛ0t8t0@t0XtV`thtPptгxt`tptttttttVtJ'#trtruE-ur uV(u0ur8u ,@uVHu:xPuXu2`uY-hurpu5xuu6$u9u7Ӝuuf-u)u@uxu㭮uKӜuru&fu3fu!uuvxvX'#v,vr v!/(vx0v!8v~b@v!HvxPvTXvqY`vhvVpvxxvvv!/vv%:xv,vvv7fvv=vYvrvDvrvvUw,ww,ws'# w=f(wx0wHf8w!/@wz'#HwPwXwMf`wqYhwpw,xwLw@wbw,wOw'#wwVw2:xwmwww$,wYwn-wwbx}-xrxTߺx- xO(xn@xxXxUxUxUxVx@VxPWx XxPYxYxYxY8y`ZHy[`ypzhy\py\xy`^y^y_y ay`ayayayayay byby cz0+z-z/z`2(zC:x0z@8zHzPzP`zxzPzzzzzPzz0zz{{@{0{{0 { X{`{h{{{ {` {0 {p { {` {{{0| |+(|`,0|.8|@.@|`/|1|1|01|1|2|2|X}|`^|_|`|`|a|c|@d}d}Pe(}e0}eP}eh}Zx}Z}`[}\}x~}`\}]}^~^~_~a~ b ~b(~0c0~c8~dH~`dP~pdp~d~d~d~ e~@f~ffgP0h`0jhapmxp6j@)P xoЂ؂- `(P p`ؠ@ȡxx@ 0H08`pP `(dߺ08HP PX` hPx8!P`إp"!p0 P0 x  8 p ` ȧh@ب 8$@yHP`+hp"x@ 0P-10P,Ȫ8 p0x 2P0 ȫP2X``h@x5PP2p0HpxحP  (!X0""p#Ȯ$$(708P <X@%`5hӁ,د" (ZݺаVxV qȲGpvVдش@@-  (0PX`hpx0 (08`Ƞ`hpءHPp (HPX8@Hx  (ȬЬج (8@خ8 @+h++++H0P:P@@@0@@p@@@`@@@@@@@ @@8@@@@` @ @ @ @ @0 @ @p@@@P@@@@@P@@@0@P@@@@ @@@@@@@ @ @`!@"@"@@#@#@$@ %@XPTT0TTpTTTTTT@TTT TT`THT T T@ T T0 T Tp TTTPTTT0TPTTT0TTTT0T TTT@TTT TT` T!T!T@"T"T#T $T$TUUPUUU0UUpU UUpU UU`UUU@UUUHUUUXUUUhU U U` U U UP U U U0UUpUUUPUUUpUUUPUUUUPU@UUU`UUU@UU U !U!U`"U#U#U@$U$UVVXVVV8VVV(VVhVVVHVVVpV( V Vh V VX V V V8VVxVVVXVxVVVXVVVVXVHVVVhVVVHVV V(!V!Vh"V#V#VH$V$VHWWW(WWhWWWXWWWXWWW8WWxWWWW0WWW@WWWX W W W8 W W W( W WhWWWHWWWHWWWHWWW(WHWWWxWWWW8WWxW W WX!W!W"W8#W#Wx$W%WXhXXX@XXXPXXH XXXXYZZ[[[[[[[[[([8[H[X[h[x[[[0[@[P[`[p[[[[[[[[[x[[[[[[[[`[p[%[%[%[%[%[&[&[0&[@&[P&[`&[p&[&[\H\\\X\\`\\H;\(B\o\o\o\o\p\r\r\r\r\r\r\t\t\ ]!^0!_@!`P!a &b(&c`fhmpw||||||| )P)*8*@*H*'@''(*8:!$%2@AȂD (08X`hpx    ȃЃ؃ ( 0!8"@#H$P%X&`'h(p)x*,-./12345Ȅ6Є7؄89;<=>?@A B(C0D8E@FHGPHXI`JhKpLxMNOQRS]defȅgЅh؅ijklnopqr s(t0u8v@xHyPzX{`}h~pxȆІ؆ (08@HPX`hpxȇЇ؇ (08@HPX`hpxȈЈ؈ (08@HPX`hpxȉЉ؉ (08@HPX` h p x  ȊЊ؊  !("0#8$@%H&P'X(`)h*p+x,-./013456ȋ7Ћ8؋9:;<=>?BC E(F0G8H@IHJPKXL`MhNpOxPQRSTUVWXYȌZЌ[،\]^_`acde f(g0h8i@jHkPlXm`nhoppxqrstuvwxyzȍ{Ѝ|؍}~ (08@HPX`hpxȎЎ؎ (08@HPX`hpxȏЏ?_)%r9)./*)m+,)*+a.-]``C_]_0C0EUUUUUUUUUUUUUUUU""""""""""""""""BoringCrypto Key0C0E#EgܺvT2;}#j+9kQR" 6666666666666666\\\\\\\\\\\\\\\\0000  --with_executablSE6cs 3333333333333333 ???????? ;1 XhdOeRQhك[~~~~~~~~~~~~~~~~Χ@=M`knNӺ0000000000000000  g jgrn<:O r`7*tѢu5؞|6p09Y???????????????? . J+J+))3+)kythe.proto.common.SymbolInfo.qualified_namekythe.proto.common.Origin.languagekythe/proto/filecontext.protokythe/proto/metadata.protoc++-cpp-outputassembler-with-cppCheck failed: !writer_.has_value()Output file must have '.kzip' extensionkythe_claimCXXSystemexternal/kythe/kythe/cxx/extractor/cxx_extractor_main.cc/*===---- __clang_cuda_runtime_wrapper.h - CUDA runtime support -------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ /* * WARNING: This header is intended to be directly -include'd by * the compiler and is not supposed to be included by users. * * CUDA headers are implemented in a way that currently makes it * impossible for user code to #include directly when compiling with * Clang. They present different view of CUDA-supplied functions * depending on where in NVCC's compilation pipeline the headers are * included. Neither of these modes provides function definitions with * correct attributes, so we use preprocessor to force the headers * into a form that Clang can use. * * Similarly to NVCC which -include's cuda_runtime.h, Clang -include's * this file during every CUDA compilation. */ #ifndef __CLANG_CUDA_RUNTIME_WRAPPER_H__ #define __CLANG_CUDA_RUNTIME_WRAPPER_H__ #if defined(__CUDA__) && defined(__clang__) // Include some forward declares that must come before cmath. #include <__clang_cuda_math_forward_declares.h> // Define __CUDACC__ early as libstdc++ standard headers with GNU extensions // enabled depend on it to avoid using __float128, which is unsupported in // CUDA. #define __CUDACC__ // Include some standard headers to avoid CUDA headers including them // while some required macros (like __THROW) are in a weird state. #include #include #include #include #undef __CUDACC__ // Preserve common macros that will be changed below by us or by CUDA // headers. #pragma push_macro("__THROW") #pragma push_macro("__CUDA_ARCH__") // WARNING: Preprocessor hacks below are based on specific details of // CUDA-7.x headers and are not expected to work with any other // version of CUDA headers. #include "cuda.h" #if !defined(CUDA_VERSION) #error "cuda.h did not define CUDA_VERSION" #elif CUDA_VERSION < 7000 #error "Unsupported CUDA version!" #endif #pragma push_macro("__CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__") #if CUDA_VERSION >= 10000 #define __CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__ #endif // Make largest subset of device functions available during host // compilation. #ifndef __CUDA_ARCH__ #define __CUDA_ARCH__ 9999 #endif #include "__clang_cuda_builtin_vars.h" // No need for device_launch_parameters.h as __clang_cuda_builtin_vars.h above // has taken care of builtin variables declared in the file. #define __DEVICE_LAUNCH_PARAMETERS_H__ // {math,device}_functions.h only have declarations of the // functions. We don't need them as we're going to pull in their // definitions from .hpp files. #define __DEVICE_FUNCTIONS_H__ #define __MATH_FUNCTIONS_H__ #define __COMMON_FUNCTIONS_H__ // device_functions_decls is replaced by __clang_cuda_device_functions.h // included below. #define __DEVICE_FUNCTIONS_DECLS_H__ #undef __CUDACC__ #if CUDA_VERSION < 9000 #define __CUDABE__ #else #define __CUDACC__ #define __CUDA_LIBDEVICE__ #endif // Disables definitions of device-side runtime support stubs in // cuda_device_runtime_api.h #include "host_defines.h" #undef __CUDACC__ #include "driver_types.h" #include "host_config.h" // Temporarily replace "nv_weak" with weak, so __attribute__((nv_weak)) in // cuda_device_runtime_api.h ends up being __attribute__((weak)) which is the // functional equivalent of what we need. #pragma push_macro("nv_weak") #define nv_weak weak #undef __CUDABE__ #undef __CUDA_LIBDEVICE__ #define __CUDACC__ #include "cuda_runtime.h" #pragma pop_macro("nv_weak") #undef __CUDACC__ #define __CUDABE__ // CUDA headers use __nvvm_memcpy and __nvvm_memset which Clang does // not have at the moment. Emulate them with a builtin memcpy/memset. #define __nvvm_memcpy(s, d, n, a) __builtin_memcpy(s, d, n) #define __nvvm_memset(d, c, n, a) __builtin_memset(d, c, n) #if CUDA_VERSION < 9000 #include "crt/device_runtime.h" #endif #include "crt/host_runtime.h" // device_runtime.h defines __cxa_* macros that will conflict with // cxxabi.h. // FIXME: redefine these as __device__ functions. #undef __cxa_vec_ctor #undef __cxa_vec_cctor #undef __cxa_vec_dtor #undef __cxa_vec_new #undef __cxa_vec_new2 #undef __cxa_vec_new3 #undef __cxa_vec_delete2 #undef __cxa_vec_delete #undef __cxa_vec_delete3 #undef __cxa_pure_virtual // math_functions.hpp expects this host function be defined on MacOS, but it // ends up not being there because of the games we play here. Just define it // ourselves; it's simple enough. #ifdef __APPLE__ inline __host__ double __signbitd(double x) { return std::signbit(x); } #endif // CUDA 9.1 no longer provides declarations for libdevice functions, so we need // to provide our own. #include <__clang_cuda_libdevice_declares.h> // Wrappers for many device-side standard library functions, incl. math // functions, became compiler builtins in CUDA-9 and have been removed from the // CUDA headers. Clang now provides its own implementation of the wrappers. #if CUDA_VERSION >= 9000 #include <__clang_cuda_device_functions.h> #include <__clang_cuda_math.h> #endif // __THROW is redefined to be empty by device_functions_decls.h in CUDA. Clang's // counterpart does not do it, so we need to make it empty here to keep // following CUDA includes happy. #undef __THROW #define __THROW // CUDA 8.0.41 relies on __USE_FAST_MATH__ and __CUDA_PREC_DIV's values. // Previous versions used to check whether they are defined or not. // CU_DEVICE_INVALID macro is only defined in 8.0.41, so we use it // here to detect the switch. #if defined(CU_DEVICE_INVALID) #if !defined(__USE_FAST_MATH__) #define __USE_FAST_MATH__ 0 #endif #if !defined(__CUDA_PREC_DIV) #define __CUDA_PREC_DIV 0 #endif #endif // Temporarily poison __host__ macro to ensure it's not used by any of // the headers we're about to include. #pragma push_macro("__host__") #define __host__ UNEXPECTED_HOST_ATTRIBUTE // device_functions.hpp and math_functions*.hpp use 'static // __forceinline__' (with no __device__) for definitions of device // functions. Temporarily redefine __forceinline__ to include // __device__. #pragma push_macro("__forceinline__") #define __forceinline__ __device__ __inline__ __attribute__((always_inline)) #if CUDA_VERSION < 9000 #include "device_functions.hpp" #endif // math_function.hpp uses the __USE_FAST_MATH__ macro to determine whether we // get the slow-but-accurate or fast-but-inaccurate versions of functions like // sin and exp. This is controlled in clang by -fgpu-approx-transcendentals. // // device_functions.hpp uses __USE_FAST_MATH__ for a different purpose (fast vs. // slow divides), so we need to scope our define carefully here. #pragma push_macro("__USE_FAST_MATH__") #if defined(__CLANG_GPU_APPROX_TRANSCENDENTALS__) #define __USE_FAST_MATH__ 1 #endif #if CUDA_VERSION >= 9000 #include "crt/math_functions.hpp" #else #include "math_functions.hpp" #endif #pragma pop_macro("__USE_FAST_MATH__") #if CUDA_VERSION < 9000 #include "math_functions_dbl_ptx3.hpp" #endif #pragma pop_macro("__forceinline__") // Pull in host-only functions that are only available when neither // __CUDACC__ nor __CUDABE__ are defined. #undef __MATH_FUNCTIONS_HPP__ #undef __CUDABE__ #if CUDA_VERSION < 9000 #include "math_functions.hpp" #endif // Alas, additional overloads for these functions are hard to get to. // Considering that we only need these overloads for a few functions, // we can provide them here. static inline float rsqrt(float __a) { return rsqrtf(__a); } static inline float rcbrt(float __a) { return rcbrtf(__a); } static inline float sinpi(float __a) { return sinpif(__a); } static inline float cospi(float __a) { return cospif(__a); } static inline void sincospi(float __a, float *__b, float *__c) { return sincospif(__a, __b, __c); } static inline float erfcinv(float __a) { return erfcinvf(__a); } static inline float normcdfinv(float __a) { return normcdfinvf(__a); } static inline float normcdf(float __a) { return normcdff(__a); } static inline float erfcx(float __a) { return erfcxf(__a); } #if CUDA_VERSION < 9000 // For some reason single-argument variant is not always declared by // CUDA headers. Alas, device_functions.hpp included below needs it. static inline __device__ void __brkpt(int __c) { __brkpt(); } #endif // Now include *.hpp with definitions of various GPU functions. Alas, // a lot of thins get declared/defined with __host__ attribute which // we don't want and we have to define it out. We also have to include // {device,math}_functions.hpp again in order to extract the other // branch of #if/else inside. #define __host__ #undef __CUDABE__ #define __CUDACC__ #if CUDA_VERSION >= 9000 // Some atomic functions became compiler builtins in CUDA-9 , so we need their // declarations. #include "device_atomic_functions.h" #endif #undef __DEVICE_FUNCTIONS_HPP__ #include "device_atomic_functions.hpp" #if CUDA_VERSION >= 9000 #include "crt/device_functions.hpp" #include "crt/device_double_functions.hpp" #else #include "device_functions.hpp" #define __CUDABE__ #include "device_double_functions.h" #undef __CUDABE__ #endif #include "sm_20_atomic_functions.hpp" // Predicate functions used in `__builtin_assume` need to have no side effect. // However, sm_20_intrinsics.hpp doesn't define them with neither pure nor // const attribute. Rename definitions from sm_20_intrinsics.hpp and re-define // them as pure ones. #pragma push_macro("__isGlobal") #pragma push_macro("__isShared") #pragma push_macro("__isConstant") #pragma push_macro("__isLocal") #define __isGlobal __ignored_cuda___isGlobal #define __isShared __ignored_cuda___isShared #define __isConstant __ignored_cuda___isConstant #define __isLocal __ignored_cuda___isLocal #include "sm_20_intrinsics.hpp" #pragma pop_macro("__isGlobal") #pragma pop_macro("__isShared") #pragma pop_macro("__isConstant") #pragma pop_macro("__isLocal") #pragma push_macro("__DEVICE__") #define __DEVICE__ static __device__ __forceinline__ __attribute__((const)) __DEVICE__ unsigned int __isGlobal(const void *p) { return __nvvm_isspacep_global(p); } __DEVICE__ unsigned int __isShared(const void *p) { return __nvvm_isspacep_shared(p); } __DEVICE__ unsigned int __isConstant(const void *p) { return __nvvm_isspacep_const(p); } __DEVICE__ unsigned int __isLocal(const void *p) { return __nvvm_isspacep_local(p); } #pragma pop_macro("__DEVICE__") #include "sm_32_atomic_functions.hpp" // Don't include sm_30_intrinsics.h and sm_32_intrinsics.h. These define the // __shfl and __ldg intrinsics using inline (volatile) asm, but we want to // define them using builtins so that the optimizer can reason about and across // these instructions. In particular, using intrinsics for ldg gets us the // [addr+imm] addressing mode, which, although it doesn't actually exist in the // hardware, seems to generate faster machine code because ptxas can more easily // reason about our code. #if CUDA_VERSION >= 8000 #pragma push_macro("__CUDA_ARCH__") #undef __CUDA_ARCH__ #include "sm_60_atomic_functions.hpp" #include "sm_61_intrinsics.hpp" #pragma pop_macro("__CUDA_ARCH__") #endif #undef __MATH_FUNCTIONS_HPP__ // math_functions.hpp defines ::signbit as a __host__ __device__ function. This // conflicts with libstdc++'s constexpr ::signbit, so we have to rename // math_function.hpp's ::signbit. It's guarded by #undef signbit, but that's // conditional on __GNUC__. :) #pragma push_macro("signbit") #pragma push_macro("__GNUC__") #undef __GNUC__ #define signbit __ignored_cuda_signbit // CUDA-9 omits device-side definitions of some math functions if it sees // include guard from math.h wrapper from libstdc++. We have to undo the header // guard temporarily to get the definitions we need. #pragma push_macro("_GLIBCXX_MATH_H") #pragma push_macro("_LIBCPP_VERSION") #if CUDA_VERSION >= 9000 #undef _GLIBCXX_MATH_H // We also need to undo another guard that checks for libc++ 3.8+ #ifdef _LIBCPP_VERSION #define _LIBCPP_VERSION 3700 #endif #endif #if CUDA_VERSION >= 9000 #include "crt/math_functions.hpp" #else #include "math_functions.hpp" #endif #pragma pop_macro("_GLIBCXX_MATH_H") #pragma pop_macro("_LIBCPP_VERSION") #pragma pop_macro("__GNUC__") #pragma pop_macro("signbit") #pragma pop_macro("__host__") // __clang_cuda_texture_intrinsics.h must be included first in order to provide // implementation for __nv_tex_surf_handler that CUDA's headers depend on. // The implementation requires c++11 and only works with CUDA-9 or newer. #if __cplusplus >= 201103L && CUDA_VERSION >= 9000 // clang-format off #include <__clang_cuda_texture_intrinsics.h> // clang-format on #else #if CUDA_VERSION >= 9000 // Provide a hint that texture support needs C++11. template struct __nv_tex_needs_cxx11 { const static bool value = false; }; template __host__ __device__ void __nv_tex_surf_handler(const char *name, T *ptr, cudaTextureObject_t obj, float x) { _Static_assert(__nv_tex_needs_cxx11::value, "Texture support requires C++11"); } #else // Textures in CUDA-8 and older are not supported by clang.There's no // convenient way to intercept texture use in these versions, so we can't // produce a meaningful error. The source code that attempts to use textures // will continue to fail as it does now. #endif // CUDA_VERSION #endif // __cplusplus >= 201103L && CUDA_VERSION >= 9000 #include "texture_fetch_functions.h" #include "texture_indirect_functions.h" // Restore state of __CUDA_ARCH__ and __THROW we had on entry. #pragma pop_macro("__CUDA_ARCH__") #pragma pop_macro("__THROW") // Set up compiler macros expected to be seen during compilation. #undef __CUDABE__ #define __CUDACC__ extern "C" { // Device-side CUDA system calls. // http://docs.nvidia.com/cuda/ptx-writers-guide-to-interoperability/index.html#system-calls // We need these declarations and wrappers for device-side // malloc/free/printf calls to work without relying on // -fcuda-disable-target-call-checks option. __device__ int vprintf(const char *, const char *); __device__ void free(void *) __attribute((nothrow)); __device__ void *malloc(size_t) __attribute((nothrow)) __attribute__((malloc)); // __assertfail() used to have a `noreturn` attribute. Unfortunately that // contributed to triggering the longstanding bug in ptxas when assert was used // in sufficiently convoluted code. See // https://bugs.llvm.org/show_bug.cgi?id=27738 for the details. __device__ void __assertfail(const char *__message, const char *__file, unsigned __line, const char *__function, size_t __charSize); // In order for standard assert() macro on linux to work we need to // provide device-side __assert_fail() __device__ static inline void __assert_fail(const char *__message, const char *__file, unsigned __line, const char *__function) { __assertfail(__message, __file, __line, __function, sizeof(char)); } // Clang will convert printf into vprintf, but we still need // device-side declaration for it. __device__ int printf(const char *, ...); } // extern "C" // We also need device-side std::malloc and std::free. namespace std { __device__ static inline void free(void *__ptr) { ::free(__ptr); } __device__ static inline void *malloc(size_t __size) { return ::malloc(__size); } } // namespace std // Out-of-line implementations from __clang_cuda_builtin_vars.h. These need to // come after we've pulled in the definition of uint3 and dim3. __device__ inline __cuda_builtin_threadIdx_t::operator dim3() const { return dim3(x, y, z); } __device__ inline __cuda_builtin_threadIdx_t::operator uint3() const { return {x, y, z}; } __device__ inline __cuda_builtin_blockIdx_t::operator dim3() const { return dim3(x, y, z); } __device__ inline __cuda_builtin_blockIdx_t::operator uint3() const { return {x, y, z}; } __device__ inline __cuda_builtin_blockDim_t::operator dim3() const { return dim3(x, y, z); } __device__ inline __cuda_builtin_blockDim_t::operator uint3() const { return {x, y, z}; } __device__ inline __cuda_builtin_gridDim_t::operator dim3() const { return dim3(x, y, z); } __device__ inline __cuda_builtin_gridDim_t::operator uint3() const { return {x, y, z}; } #include <__clang_cuda_cmath.h> #include <__clang_cuda_intrinsics.h> #include <__clang_cuda_complex_builtins.h> // curand_mtgp32_kernel helpfully redeclares blockDim and threadIdx in host // mode, giving them their "proper" types of dim3 and uint3. This is // incompatible with the types we give in __clang_cuda_builtin_vars.h. As as // hack, force-include the header (nvcc doesn't include it by default) but // redefine dim3 and uint3 to our builtin types. (Thankfully dim3 and uint3 are // only used here for the redeclarations of blockDim and threadIdx.) #pragma push_macro("dim3") #pragma push_macro("uint3") #define dim3 __cuda_builtin_blockDim_t #define uint3 __cuda_builtin_threadIdx_t #include "curand_mtgp32_kernel.h" #pragma pop_macro("dim3") #pragma pop_macro("uint3") #pragma pop_macro("__USE_FAST_MATH__") #pragma pop_macro("__CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__") // CUDA runtime uses this undocumented function to access kernel launch // configuration. The declaration is in crt/device_functions.h but that file // includes a lot of other stuff we don't want. Instead, we'll provide our own // declaration for it here. #if CUDA_VERSION >= 9020 extern "C" unsigned __cudaPushCallConfiguration(dim3 gridDim, dim3 blockDim, size_t sharedMem = 0, void *stream = 0); #endif #endif // __CUDA__ #endif // __CLANG_CUDA_RUNTIME_WRAPPER_H__ __stdarg___gnuc_va_list.h/*===---- __stddef_size_t.h - Definition of size_t -------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ /* * When -fbuiltin-headers-in-system-modules is set this is a non-modular header * and needs to behave as if it was textual. */ #if !defined(_SIZE_T) || \ (__has_feature(modules) && !__building_module(_Builtin_stddef)) #define _SIZE_T typedef __SIZE_TYPE__ size_t; #endif adxintrin.h/*===--------------- amxintrin.h - AMX intrinsics -*- C/C++ -*---------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===------------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif /* __IMMINTRIN_H */ #ifndef __AMXINTRIN_H #define __AMXINTRIN_H #ifdef __x86_64__ /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS_TILE \ __attribute__((__always_inline__, __nodebug__, __target__("amx-tile"))) #define __DEFAULT_FN_ATTRS_INT8 \ __attribute__((__always_inline__, __nodebug__, __target__("amx-int8"))) #define __DEFAULT_FN_ATTRS_BF16 \ __attribute__((__always_inline__, __nodebug__, __target__("amx-bf16"))) #define __DEFAULT_FN_ATTRS_FP16 \ __attribute__((__always_inline__, __nodebug__, __target__("amx-fp16"))) /// Load tile configuration from a 64-byte memory location specified by /// "mem_addr". The tile configuration includes the tile type palette, the /// number of bytes per row, and the number of rows. If the specified /// palette_id is zero, that signifies the init state for both the tile /// config and the tile data, and the tiles are zeroed. Any invalid /// configurations will result in #GP fault. /// /// \headerfile /// /// This intrinsic corresponds to the LDTILECFG instruction. /// /// \param __config /// A pointer to 512-bits configuration static __inline__ void __DEFAULT_FN_ATTRS_TILE _tile_loadconfig(const void *__config) { __builtin_ia32_tile_loadconfig(__config); } /// Stores the current tile configuration to a 64-byte memory location /// specified by "mem_addr". The tile configuration includes the tile type /// palette, the number of bytes per row, and the number of rows. If tiles /// are not configured, all zeroes will be stored to memory. /// /// \headerfile /// /// This intrinsic corresponds to the STTILECFG instruction. /// /// \param __config /// A pointer to 512-bits configuration static __inline__ void __DEFAULT_FN_ATTRS_TILE _tile_storeconfig(void *__config) { __builtin_ia32_tile_storeconfig(__config); } /// Release the tile configuration to return to the init state, which /// releases all storage it currently holds. /// /// \headerfile /// /// This intrinsic corresponds to the TILERELEASE instruction. static __inline__ void __DEFAULT_FN_ATTRS_TILE _tile_release(void) { __builtin_ia32_tilerelease(); } /// Load tile rows from memory specifieid by "base" address and "stride" into /// destination tile "dst" using the tile configuration previously configured /// via "_tile_loadconfig". /// /// \headerfile /// /// This intrinsic corresponds to the TILELOADD instruction. /// /// \param dst /// A destination tile. Max size is 1024 Bytes. /// \param base /// A pointer to base address. /// \param stride /// The stride between the rows' data to be loaded in memory. #define _tile_loadd(dst, base, stride) \ __builtin_ia32_tileloadd64((dst), ((const void *)(base)), \ (__SIZE_TYPE__)(stride)) /// Load tile rows from memory specifieid by "base" address and "stride" into /// destination tile "dst" using the tile configuration previously configured /// via "_tile_loadconfig". This intrinsic provides a hint to the implementation /// that the data will likely not be reused in the near future and the data /// caching can be optimized accordingly. /// /// \headerfile /// /// This intrinsic corresponds to the TILELOADDT1 instruction. /// /// \param dst /// A destination tile. Max size is 1024 Bytes. /// \param base /// A pointer to base address. /// \param stride /// The stride between the rows' data to be loaded in memory. #define _tile_stream_loadd(dst, base, stride) \ __builtin_ia32_tileloaddt164((dst), ((const void *)(base)), \ (__SIZE_TYPE__)(stride)) /// Store the tile specified by "src" to memory specifieid by "base" address and /// "stride" using the tile configuration previously configured via /// "_tile_loadconfig". /// /// \headerfile /// /// This intrinsic corresponds to the TILESTORED instruction. /// /// \param dst /// A destination tile. Max size is 1024 Bytes. /// \param base /// A pointer to base address. /// \param stride /// The stride between the rows' data to be stored in memory. #define _tile_stored(dst, base, stride) \ __builtin_ia32_tilestored64((dst), ((void *)(base)), (__SIZE_TYPE__)(stride)) /// Zero the tile specified by "tdest". /// /// \headerfile /// /// This intrinsic corresponds to the TILEZERO instruction. /// /// \param tile /// The destination tile to be zero. Max size is 1024 Bytes. #define _tile_zero(tile) __builtin_ia32_tilezero((tile)) /// Compute dot-product of bytes in tiles with a source/destination accumulator. /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in src0 with /// corresponding signed 8-bit integers in src1, producing 4 intermediate 32-bit /// results. Sum these 4 results with the corresponding 32-bit integer in "dst", /// and store the 32-bit result back to tile "dst". /// /// \headerfile /// /// This intrinsic corresponds to the TDPBSSD instruction. /// /// \param dst /// The destination tile. Max size is 1024 Bytes. /// \param src0 /// The 1st source tile. Max size is 1024 Bytes. /// \param src1 /// The 2nd source tile. Max size is 1024 Bytes. #define _tile_dpbssd(dst, src0, src1) \ __builtin_ia32_tdpbssd((dst), (src0), (src1)) /// Compute dot-product of bytes in tiles with a source/destination accumulator. /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in src0 with /// corresponding unsigned 8-bit integers in src1, producing 4 intermediate /// 32-bit results. Sum these 4 results with the corresponding 32-bit integer /// in "dst", and store the 32-bit result back to tile "dst". /// /// \headerfile /// /// This intrinsic corresponds to the TDPBSUD instruction. /// /// \param dst /// The destination tile. Max size is 1024 Bytes. /// \param src0 /// The 1st source tile. Max size is 1024 Bytes. /// \param src1 /// The 2nd source tile. Max size is 1024 Bytes. #define _tile_dpbsud(dst, src0, src1) \ __builtin_ia32_tdpbsud((dst), (src0), (src1)) /// Compute dot-product of bytes in tiles with a source/destination accumulator. /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in src0 with /// corresponding signed 8-bit integers in src1, producing 4 intermediate 32-bit /// results. Sum these 4 results with the corresponding 32-bit integer in "dst", /// and store the 32-bit result back to tile "dst". /// /// \headerfile /// /// This intrinsic corresponds to the TDPBUSD instruction. /// /// \param dst /// The destination tile. Max size is 1024 Bytes. /// \param src0 /// The 1st source tile. Max size is 1024 Bytes. /// \param src1 /// The 2nd source tile. Max size is 1024 Bytes. #define _tile_dpbusd(dst, src0, src1) \ __builtin_ia32_tdpbusd((dst), (src0), (src1)) /// Compute dot-product of bytes in tiles with a source/destination accumulator. /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in src0 with /// corresponding unsigned 8-bit integers in src1, producing 4 intermediate /// 32-bit results. Sum these 4 results with the corresponding 32-bit integer in /// "dst", and store the 32-bit result back to tile "dst". /// /// \headerfile /// /// This intrinsic corresponds to the TDPBUUD instruction. /// /// \param dst /// The destination tile. Max size is 1024 Bytes. /// \param src0 /// The 1st source tile. Max size is 1024 Bytes. /// \param src1 /// The 2nd source tile. Max size is 1024 Bytes. #define _tile_dpbuud(dst, src0, src1) \ __builtin_ia32_tdpbuud((dst), (src0), (src1)) /// Compute dot-product of BF16 (16-bit) floating-point pairs in tiles src0 and /// src1, accumulating the intermediate single-precision (32-bit) floating-point /// elements with elements in "dst", and store the 32-bit result back to tile /// "dst". /// /// \headerfile /// /// This intrinsic corresponds to the TDPBF16PS instruction. /// /// \param dst /// The destination tile. Max size is 1024 Bytes. /// \param src0 /// The 1st source tile. Max size is 1024 Bytes. /// \param src1 /// The 2nd source tile. Max size is 1024 Bytes. #define _tile_dpbf16ps(dst, src0, src1) \ __builtin_ia32_tdpbf16ps((dst), (src0), (src1)) /// AMX tile register size can be configured, the maximum size is 16x64=1024 /// bytes. Since there is no 2D type in llvm IR, we use vector type to /// represent 2D tile and the fixed size is maximum amx tile register size. typedef int _tile1024i __attribute__((__vector_size__(1024), __aligned__(64))); /// This is internal intrinsic. C/C++ user should avoid calling it directly. static __inline__ _tile1024i __DEFAULT_FN_ATTRS_INT8 _tile_loadd_internal(unsigned short m, unsigned short n, const void *base, __SIZE_TYPE__ stride) { return __builtin_ia32_tileloadd64_internal(m, n, base, (__SIZE_TYPE__)(stride)); } /// This is internal intrinsic. C/C++ user should avoid calling it directly. static __inline__ _tile1024i __DEFAULT_FN_ATTRS_INT8 _tile_loaddt1_internal(unsigned short m, unsigned short n, const void *base, __SIZE_TYPE__ stride) { return __builtin_ia32_tileloaddt164_internal(m, n, base, (__SIZE_TYPE__)(stride)); } /// This is internal intrinsic. C/C++ user should avoid calling it directly. static __inline__ _tile1024i __DEFAULT_FN_ATTRS_INT8 _tile_dpbssd_internal(unsigned short m, unsigned short n, unsigned short k, _tile1024i dst, _tile1024i src1, _tile1024i src2) { return __builtin_ia32_tdpbssd_internal(m, n, k, dst, src1, src2); } /// This is internal intrinsic. C/C++ user should avoid calling it directly. static __inline__ _tile1024i __DEFAULT_FN_ATTRS_INT8 _tile_dpbsud_internal(unsigned short m, unsigned short n, unsigned short k, _tile1024i dst, _tile1024i src1, _tile1024i src2) { return __builtin_ia32_tdpbsud_internal(m, n, k, dst, src1, src2); } /// This is internal intrinsic. C/C++ user should avoid calling it directly. static __inline__ _tile1024i __DEFAULT_FN_ATTRS_INT8 _tile_dpbusd_internal(unsigned short m, unsigned short n, unsigned short k, _tile1024i dst, _tile1024i src1, _tile1024i src2) { return __builtin_ia32_tdpbusd_internal(m, n, k, dst, src1, src2); } /// This is internal intrinsic. C/C++ user should avoid calling it directly. static __inline__ _tile1024i __DEFAULT_FN_ATTRS_INT8 _tile_dpbuud_internal(unsigned short m, unsigned short n, unsigned short k, _tile1024i dst, _tile1024i src1, _tile1024i src2) { return __builtin_ia32_tdpbuud_internal(m, n, k, dst, src1, src2); } /// This is internal intrinsic. C/C++ user should avoid calling it directly. static __inline__ void __DEFAULT_FN_ATTRS_INT8 _tile_stored_internal(unsigned short m, unsigned short n, void *base, __SIZE_TYPE__ stride, _tile1024i tile) { return __builtin_ia32_tilestored64_internal(m, n, base, (__SIZE_TYPE__)(stride), tile); } /// This is internal intrinsic. C/C++ user should avoid calling it directly. static __inline__ _tile1024i __DEFAULT_FN_ATTRS_BF16 _tile_dpbf16ps_internal(unsigned short m, unsigned short n, unsigned short k, _tile1024i dst, _tile1024i src1, _tile1024i src2) { return __builtin_ia32_tdpbf16ps_internal(m, n, k, dst, src1, src2); } /// This is internal intrinsic. C/C++ user should avoid calling it directly. static __inline__ _tile1024i __DEFAULT_FN_ATTRS_FP16 _tile_dpfp16ps_internal(unsigned short m, unsigned short n, unsigned short k, _tile1024i dst, _tile1024i src1, _tile1024i src2) { return __builtin_ia32_tdpfp16ps_internal(m, n, k, dst, src1, src2); } /// This struct pack the shape and tile data together for user. We suggest /// initializing the struct as early as possible, because compiler depends /// on the shape information to do configure. The constant value is preferred /// for optimization by compiler. typedef struct __tile1024i_str { const unsigned short row; const unsigned short col; _tile1024i tile; } __tile1024i; /// Load tile rows from memory specifieid by "base" address and "stride" into /// destination tile "dst". /// /// \headerfile /// /// This intrinsic corresponds to the TILELOADD instruction. /// /// \param dst /// A destination tile. Max size is 1024 Bytes. /// \param base /// A pointer to base address. /// \param stride /// The stride between the rows' data to be loaded in memory. __DEFAULT_FN_ATTRS_TILE static __inline__ void __tile_loadd(__tile1024i *dst, const void *base, __SIZE_TYPE__ stride) { dst->tile = _tile_loadd_internal(dst->row, dst->col, base, stride); } /// Load tile rows from memory specifieid by "base" address and "stride" into /// destination tile "dst". This intrinsic provides a hint to the implementation /// that the data will likely not be reused in the near future and the data /// caching can be optimized accordingly. /// /// \headerfile /// /// This intrinsic corresponds to the TILELOADDT1 instruction. /// /// \param dst /// A destination tile. Max size is 1024 Bytes. /// \param base /// A pointer to base address. /// \param stride /// The stride between the rows' data to be loaded in memory. __DEFAULT_FN_ATTRS_TILE static __inline__ void __tile_stream_loadd(__tile1024i *dst, const void *base, __SIZE_TYPE__ stride) { dst->tile = _tile_loaddt1_internal(dst->row, dst->col, base, stride); } /// Compute dot-product of bytes in tiles with a source/destination accumulator. /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in src0 with /// corresponding signed 8-bit integers in src1, producing 4 intermediate 32-bit /// results. Sum these 4 results with the corresponding 32-bit integer in "dst", /// and store the 32-bit result back to tile "dst". /// /// \headerfile /// /// This intrinsic corresponds to the TDPBSSD instruction. /// /// \param dst /// The destination tile. Max size is 1024 Bytes. /// \param src0 /// The 1st source tile. Max size is 1024 Bytes. /// \param src1 /// The 2nd source tile. Max size is 1024 Bytes. __DEFAULT_FN_ATTRS_INT8 static __inline__ void __tile_dpbssd(__tile1024i *dst, __tile1024i src0, __tile1024i src1) { dst->tile = _tile_dpbssd_internal(src0.row, src1.col, src0.col, dst->tile, src0.tile, src1.tile); } /// Compute dot-product of bytes in tiles with a source/destination accumulator. /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in src0 with /// corresponding unsigned 8-bit integers in src1, producing 4 intermediate /// 32-bit results. Sum these 4 results with the corresponding 32-bit integer /// in "dst", and store the 32-bit result back to tile "dst". /// /// \headerfile /// /// This intrinsic corresponds to the TDPBSUD instruction. /// /// \param dst /// The destination tile. Max size is 1024 Bytes. /// \param src0 /// The 1st source tile. Max size is 1024 Bytes. /// \param src1 /// The 2nd source tile. Max size is 1024 Bytes. __DEFAULT_FN_ATTRS_INT8 static __inline__ void __tile_dpbsud(__tile1024i *dst, __tile1024i src0, __tile1024i src1) { dst->tile = _tile_dpbsud_internal(src0.row, src1.col, src0.col, dst->tile, src0.tile, src1.tile); } /// Compute dot-product of bytes in tiles with a source/destination accumulator. /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in src0 with /// corresponding signed 8-bit integers in src1, producing 4 intermediate 32-bit /// results. Sum these 4 results with the corresponding 32-bit integer in "dst", /// and store the 32-bit result back to tile "dst". /// /// \headerfile /// /// This intrinsic corresponds to the TDPBUSD instruction. /// /// \param dst /// The destination tile. Max size is 1024 Bytes. /// \param src0 /// The 1st source tile. Max size is 1024 Bytes. /// \param src1 /// The 2nd source tile. Max size is 1024 Bytes. __DEFAULT_FN_ATTRS_INT8 static __inline__ void __tile_dpbusd(__tile1024i *dst, __tile1024i src0, __tile1024i src1) { dst->tile = _tile_dpbusd_internal(src0.row, src1.col, src0.col, dst->tile, src0.tile, src1.tile); } /// Compute dot-product of bytes in tiles with a source/destination accumulator. /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in src0 with /// corresponding unsigned 8-bit integers in src1, producing 4 intermediate /// 32-bit results. Sum these 4 results with the corresponding 32-bit integer in /// "dst", and store the 32-bit result back to tile "dst". /// /// \headerfile /// /// This intrinsic corresponds to the TDPBUUD instruction. /// /// \param dst /// The destination tile. Max size is 1024 Bytes. /// \param src0 /// The 1st source tile. Max size is 1024 Bytes. /// \param src1 /// The 2nd source tile. Max size is 1024 Bytes. __DEFAULT_FN_ATTRS_INT8 static __inline__ void __tile_dpbuud(__tile1024i *dst, __tile1024i src0, __tile1024i src1) { dst->tile = _tile_dpbuud_internal(src0.row, src1.col, src0.col, dst->tile, src0.tile, src1.tile); } /// Store the tile specified by "src" to memory specifieid by "base" address and /// "stride". /// /// \headerfile /// /// This intrinsic corresponds to the TILESTORED instruction. /// /// \param base /// A pointer to base address. /// \param stride /// The stride between the rows' data to be stored in memory. __DEFAULT_FN_ATTRS_TILE static __inline__ void __tile_stored(void *base, __SIZE_TYPE__ stride, __tile1024i src) { _tile_stored_internal(src.row, src.col, base, stride, src.tile); } /// Zero the tile specified by "dst". /// /// \headerfile /// /// This intrinsic corresponds to the TILEZERO instruction. /// /// \param dst /// The destination tile to be zero. Max size is 1024 Bytes. __DEFAULT_FN_ATTRS_TILE static __inline__ void __tile_zero(__tile1024i *dst) { dst->tile = __builtin_ia32_tilezero_internal(dst->row, dst->col); } /// Compute dot-product of BF16 (16-bit) floating-point pairs in tiles src0 and /// src1, accumulating the intermediate single-precision (32-bit) floating-point /// elements with elements in "dst", and store the 32-bit result back to tile /// "dst". /// /// \headerfile /// /// This intrinsic corresponds to the TDPBF16PS instruction. /// /// \param dst /// The destination tile. Max size is 1024 Bytes. /// \param src0 /// The 1st source tile. Max size is 1024 Bytes. /// \param src1 /// The 2nd source tile. Max size is 1024 Bytes. __DEFAULT_FN_ATTRS_BF16 static __inline__ void __tile_dpbf16ps(__tile1024i *dst, __tile1024i src0, __tile1024i src1) { dst->tile = _tile_dpbf16ps_internal(src0.row, src1.col, src0.col, dst->tile, src0.tile, src1.tile); } /// Compute dot-product of FP16 (16-bit) floating-point pairs in tiles src0 and /// src1, accumulating the intermediate single-precision (32-bit) floating-point /// elements with elements in "dst", and store the 32-bit result back to tile /// "dst". /// /// \headerfile /// /// This intrinsic corresponds to the TDPFP16PS instruction. /// /// \param dst /// The destination tile. Max size is 1024 Bytes. /// \param src0 /// The 1st source tile. Max size is 1024 Bytes. /// \param src1 /// The 2nd source tile. Max size is 1024 Bytes. __DEFAULT_FN_ATTRS_FP16 static __inline__ void __tile_dpfp16ps(__tile1024i *dst, __tile1024i src0, __tile1024i src1) { dst->tile = _tile_dpfp16ps_internal(src0.row, src1.col, src0.col, dst->tile, src0.tile, src1.tile); } #undef __DEFAULT_FN_ATTRS_TILE #undef __DEFAULT_FN_ATTRS_INT8 #undef __DEFAULT_FN_ATTRS_BF16 #undef __DEFAULT_FN_ATTRS_FP16 #endif /* __x86_64__ */ #endif /* __AMXINTRIN_H */ arm_mve.h/*===---- avx512vlbwintrin.h - AVX512VL and AVX512BW intrinsics ------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __AVX512VLBWINTRIN_H #define __AVX512VLBWINTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS128 \ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512vl,avx512bw,no-evex512"), \ __min_vector_width__(128))) #define __DEFAULT_FN_ATTRS256 \ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512vl,avx512bw,no-evex512"), \ __min_vector_width__(256))) /* Integer compare */ #define _mm_cmp_epi8_mask(a, b, p) \ ((__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)(__m128i)(a), \ (__v16qi)(__m128i)(b), (int)(p), \ (__mmask16)-1)) #define _mm_mask_cmp_epi8_mask(m, a, b, p) \ ((__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)(__m128i)(a), \ (__v16qi)(__m128i)(b), (int)(p), \ (__mmask16)(m))) #define _mm_cmp_epu8_mask(a, b, p) \ ((__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)(__m128i)(a), \ (__v16qi)(__m128i)(b), (int)(p), \ (__mmask16)-1)) #define _mm_mask_cmp_epu8_mask(m, a, b, p) \ ((__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)(__m128i)(a), \ (__v16qi)(__m128i)(b), (int)(p), \ (__mmask16)(m))) #define _mm256_cmp_epi8_mask(a, b, p) \ ((__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)(__m256i)(a), \ (__v32qi)(__m256i)(b), (int)(p), \ (__mmask32)-1)) #define _mm256_mask_cmp_epi8_mask(m, a, b, p) \ ((__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)(__m256i)(a), \ (__v32qi)(__m256i)(b), (int)(p), \ (__mmask32)(m))) #define _mm256_cmp_epu8_mask(a, b, p) \ ((__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)(__m256i)(a), \ (__v32qi)(__m256i)(b), (int)(p), \ (__mmask32)-1)) #define _mm256_mask_cmp_epu8_mask(m, a, b, p) \ ((__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)(__m256i)(a), \ (__v32qi)(__m256i)(b), (int)(p), \ (__mmask32)(m))) #define _mm_cmp_epi16_mask(a, b, p) \ ((__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)(__m128i)(a), \ (__v8hi)(__m128i)(b), (int)(p), \ (__mmask8)-1)) #define _mm_mask_cmp_epi16_mask(m, a, b, p) \ ((__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)(__m128i)(a), \ (__v8hi)(__m128i)(b), (int)(p), \ (__mmask8)(m))) #define _mm_cmp_epu16_mask(a, b, p) \ ((__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)(__m128i)(a), \ (__v8hi)(__m128i)(b), (int)(p), \ (__mmask8)-1)) #define _mm_mask_cmp_epu16_mask(m, a, b, p) \ ((__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)(__m128i)(a), \ (__v8hi)(__m128i)(b), (int)(p), \ (__mmask8)(m))) #define _mm256_cmp_epi16_mask(a, b, p) \ ((__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)(__m256i)(a), \ (__v16hi)(__m256i)(b), (int)(p), \ (__mmask16)-1)) #define _mm256_mask_cmp_epi16_mask(m, a, b, p) \ ((__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)(__m256i)(a), \ (__v16hi)(__m256i)(b), (int)(p), \ (__mmask16)(m))) #define _mm256_cmp_epu16_mask(a, b, p) \ ((__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)(__m256i)(a), \ (__v16hi)(__m256i)(b), (int)(p), \ (__mmask16)-1)) #define _mm256_mask_cmp_epu16_mask(m, a, b, p) \ ((__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)(__m256i)(a), \ (__v16hi)(__m256i)(b), (int)(p), \ (__mmask16)(m))) #define _mm_cmpeq_epi8_mask(A, B) \ _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_EQ) #define _mm_mask_cmpeq_epi8_mask(k, A, B) \ _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_EQ) #define _mm_cmpge_epi8_mask(A, B) \ _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_GE) #define _mm_mask_cmpge_epi8_mask(k, A, B) \ _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GE) #define _mm_cmpgt_epi8_mask(A, B) \ _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_GT) #define _mm_mask_cmpgt_epi8_mask(k, A, B) \ _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GT) #define _mm_cmple_epi8_mask(A, B) \ _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_LE) #define _mm_mask_cmple_epi8_mask(k, A, B) \ _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LE) #define _mm_cmplt_epi8_mask(A, B) \ _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_LT) #define _mm_mask_cmplt_epi8_mask(k, A, B) \ _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LT) #define _mm_cmpneq_epi8_mask(A, B) \ _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_NE) #define _mm_mask_cmpneq_epi8_mask(k, A, B) \ _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_NE) #define _mm256_cmpeq_epi8_mask(A, B) \ _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_EQ) #define _mm256_mask_cmpeq_epi8_mask(k, A, B) \ _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_EQ) #define _mm256_cmpge_epi8_mask(A, B) \ _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_GE) #define _mm256_mask_cmpge_epi8_mask(k, A, B) \ _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GE) #define _mm256_cmpgt_epi8_mask(A, B) \ _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_GT) #define _mm256_mask_cmpgt_epi8_mask(k, A, B) \ _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GT) #define _mm256_cmple_epi8_mask(A, B) \ _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_LE) #define _mm256_mask_cmple_epi8_mask(k, A, B) \ _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LE) #define _mm256_cmplt_epi8_mask(A, B) \ _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_LT) #define _mm256_mask_cmplt_epi8_mask(k, A, B) \ _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LT) #define _mm256_cmpneq_epi8_mask(A, B) \ _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_NE) #define _mm256_mask_cmpneq_epi8_mask(k, A, B) \ _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_NE) #define _mm_cmpeq_epu8_mask(A, B) \ _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_EQ) #define _mm_mask_cmpeq_epu8_mask(k, A, B) \ _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_EQ) #define _mm_cmpge_epu8_mask(A, B) \ _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_GE) #define _mm_mask_cmpge_epu8_mask(k, A, B) \ _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GE) #define _mm_cmpgt_epu8_mask(A, B) \ _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_GT) #define _mm_mask_cmpgt_epu8_mask(k, A, B) \ _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GT) #define _mm_cmple_epu8_mask(A, B) \ _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_LE) #define _mm_mask_cmple_epu8_mask(k, A, B) \ _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LE) #define _mm_cmplt_epu8_mask(A, B) \ _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_LT) #define _mm_mask_cmplt_epu8_mask(k, A, B) \ _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LT) #define _mm_cmpneq_epu8_mask(A, B) \ _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_NE) #define _mm_mask_cmpneq_epu8_mask(k, A, B) \ _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_NE) #define _mm256_cmpeq_epu8_mask(A, B) \ _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_EQ) #define _mm256_mask_cmpeq_epu8_mask(k, A, B) \ _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_EQ) #define _mm256_cmpge_epu8_mask(A, B) \ _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_GE) #define _mm256_mask_cmpge_epu8_mask(k, A, B) \ _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GE) #define _mm256_cmpgt_epu8_mask(A, B) \ _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_GT) #define _mm256_mask_cmpgt_epu8_mask(k, A, B) \ _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GT) #define _mm256_cmple_epu8_mask(A, B) \ _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_LE) #define _mm256_mask_cmple_epu8_mask(k, A, B) \ _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LE) #define _mm256_cmplt_epu8_mask(A, B) \ _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_LT) #define _mm256_mask_cmplt_epu8_mask(k, A, B) \ _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LT) #define _mm256_cmpneq_epu8_mask(A, B) \ _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_NE) #define _mm256_mask_cmpneq_epu8_mask(k, A, B) \ _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_NE) #define _mm_cmpeq_epi16_mask(A, B) \ _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_EQ) #define _mm_mask_cmpeq_epi16_mask(k, A, B) \ _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_EQ) #define _mm_cmpge_epi16_mask(A, B) \ _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_GE) #define _mm_mask_cmpge_epi16_mask(k, A, B) \ _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GE) #define _mm_cmpgt_epi16_mask(A, B) \ _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_GT) #define _mm_mask_cmpgt_epi16_mask(k, A, B) \ _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GT) #define _mm_cmple_epi16_mask(A, B) \ _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_LE) #define _mm_mask_cmple_epi16_mask(k, A, B) \ _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LE) #define _mm_cmplt_epi16_mask(A, B) \ _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_LT) #define _mm_mask_cmplt_epi16_mask(k, A, B) \ _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LT) #define _mm_cmpneq_epi16_mask(A, B) \ _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_NE) #define _mm_mask_cmpneq_epi16_mask(k, A, B) \ _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_NE) #define _mm256_cmpeq_epi16_mask(A, B) \ _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_EQ) #define _mm256_mask_cmpeq_epi16_mask(k, A, B) \ _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_EQ) #define _mm256_cmpge_epi16_mask(A, B) \ _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_GE) #define _mm256_mask_cmpge_epi16_mask(k, A, B) \ _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GE) #define _mm256_cmpgt_epi16_mask(A, B) \ _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_GT) #define _mm256_mask_cmpgt_epi16_mask(k, A, B) \ _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GT) #define _mm256_cmple_epi16_mask(A, B) \ _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_LE) #define _mm256_mask_cmple_epi16_mask(k, A, B) \ _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LE) #define _mm256_cmplt_epi16_mask(A, B) \ _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_LT) #define _mm256_mask_cmplt_epi16_mask(k, A, B) \ _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LT) #define _mm256_cmpneq_epi16_mask(A, B) \ _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_NE) #define _mm256_mask_cmpneq_epi16_mask(k, A, B) \ _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_NE) #define _mm_cmpeq_epu16_mask(A, B) \ _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_EQ) #define _mm_mask_cmpeq_epu16_mask(k, A, B) \ _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_EQ) #define _mm_cmpge_epu16_mask(A, B) \ _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_GE) #define _mm_mask_cmpge_epu16_mask(k, A, B) \ _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GE) #define _mm_cmpgt_epu16_mask(A, B) \ _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_GT) #define _mm_mask_cmpgt_epu16_mask(k, A, B) \ _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GT) #define _mm_cmple_epu16_mask(A, B) \ _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_LE) #define _mm_mask_cmple_epu16_mask(k, A, B) \ _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LE) #define _mm_cmplt_epu16_mask(A, B) \ _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_LT) #define _mm_mask_cmplt_epu16_mask(k, A, B) \ _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LT) #define _mm_cmpneq_epu16_mask(A, B) \ _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_NE) #define _mm_mask_cmpneq_epu16_mask(k, A, B) \ _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE) #define _mm256_cmpeq_epu16_mask(A, B) \ _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_EQ) #define _mm256_mask_cmpeq_epu16_mask(k, A, B) \ _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_EQ) #define _mm256_cmpge_epu16_mask(A, B) \ _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_GE) #define _mm256_mask_cmpge_epu16_mask(k, A, B) \ _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GE) #define _mm256_cmpgt_epu16_mask(A, B) \ _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_GT) #define _mm256_mask_cmpgt_epu16_mask(k, A, B) \ _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GT) #define _mm256_cmple_epu16_mask(A, B) \ _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_LE) #define _mm256_mask_cmple_epu16_mask(k, A, B) \ _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LE) #define _mm256_cmplt_epu16_mask(A, B) \ _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_LT) #define _mm256_mask_cmplt_epu16_mask(k, A, B) \ _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LT) #define _mm256_cmpneq_epu16_mask(A, B) \ _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_NE) #define _mm256_mask_cmpneq_epu16_mask(k, A, B) \ _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE) static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_add_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B){ return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, (__v32qi)_mm256_add_epi8(__A, __B), (__v32qi)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_add_epi8(__mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, (__v32qi)_mm256_add_epi8(__A, __B), (__v32qi)_mm256_setzero_si256()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_add_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_add_epi16(__A, __B), (__v16hi)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_add_epi16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_add_epi16(__A, __B), (__v16hi)_mm256_setzero_si256()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sub_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, (__v32qi)_mm256_sub_epi8(__A, __B), (__v32qi)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sub_epi8(__mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, (__v32qi)_mm256_sub_epi8(__A, __B), (__v32qi)_mm256_setzero_si256()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sub_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_sub_epi16(__A, __B), (__v16hi)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sub_epi16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_sub_epi16(__A, __B), (__v16hi)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_add_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, (__v16qi)_mm_add_epi8(__A, __B), (__v16qi)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_add_epi8(__mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, (__v16qi)_mm_add_epi8(__A, __B), (__v16qi)_mm_setzero_si128()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_add_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_add_epi16(__A, __B), (__v8hi)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_add_epi16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_add_epi16(__A, __B), (__v8hi)_mm_setzero_si128()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sub_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, (__v16qi)_mm_sub_epi8(__A, __B), (__v16qi)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sub_epi8(__mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, (__v16qi)_mm_sub_epi8(__A, __B), (__v16qi)_mm_setzero_si128()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sub_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_sub_epi16(__A, __B), (__v8hi)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sub_epi16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_sub_epi16(__A, __B), (__v8hi)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mullo_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_mullo_epi16(__A, __B), (__v16hi)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mullo_epi16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_mullo_epi16(__A, __B), (__v16hi)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mullo_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_mullo_epi16(__A, __B), (__v8hi)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mullo_epi16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_mullo_epi16(__A, __B), (__v8hi)_mm_setzero_si128()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_blend_epi8 (__mmask16 __U, __m128i __A, __m128i __W) { return (__m128i) __builtin_ia32_selectb_128 ((__mmask16) __U, (__v16qi) __W, (__v16qi) __A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_blend_epi8 (__mmask32 __U, __m256i __A, __m256i __W) { return (__m256i) __builtin_ia32_selectb_256 ((__mmask32) __U, (__v32qi) __W, (__v32qi) __A); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_blend_epi16 (__mmask8 __U, __m128i __A, __m128i __W) { return (__m128i) __builtin_ia32_selectw_128 ((__mmask8) __U, (__v8hi) __W, (__v8hi) __A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_blend_epi16 (__mmask16 __U, __m256i __A, __m256i __W) { return (__m256i) __builtin_ia32_selectw_256 ((__mmask16) __U, (__v16hi) __W, (__v16hi) __A); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_abs_epi8(__m128i __W, __mmask16 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, (__v16qi)_mm_abs_epi8(__A), (__v16qi)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_abs_epi8(__mmask16 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, (__v16qi)_mm_abs_epi8(__A), (__v16qi)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_abs_epi8(__m256i __W, __mmask32 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, (__v32qi)_mm256_abs_epi8(__A), (__v32qi)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_abs_epi8 (__mmask32 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, (__v32qi)_mm256_abs_epi8(__A), (__v32qi)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_abs_epi16(__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_abs_epi16(__A), (__v8hi)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_abs_epi16(__mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_abs_epi16(__A), (__v8hi)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_abs_epi16(__m256i __W, __mmask16 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_abs_epi16(__A), (__v16hi)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_abs_epi16(__mmask16 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_abs_epi16(__A), (__v16hi)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_packs_epi32(__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, (__v8hi)_mm_packs_epi32(__A, __B), (__v8hi)_mm_setzero_si128()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_packs_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, (__v8hi)_mm_packs_epi32(__A, __B), (__v8hi)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_packs_epi32(__mmask16 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, (__v16hi)_mm256_packs_epi32(__A, __B), (__v16hi)_mm256_setzero_si256()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_packs_epi32(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, (__v16hi)_mm256_packs_epi32(__A, __B), (__v16hi)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_packs_epi16(__mmask16 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, (__v16qi)_mm_packs_epi16(__A, __B), (__v16qi)_mm_setzero_si128()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_packs_epi16(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, (__v16qi)_mm_packs_epi16(__A, __B), (__v16qi)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_packs_epi16(__mmask32 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, (__v32qi)_mm256_packs_epi16(__A, __B), (__v32qi)_mm256_setzero_si256()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_packs_epi16(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, (__v32qi)_mm256_packs_epi16(__A, __B), (__v32qi)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_packus_epi32(__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, (__v8hi)_mm_packus_epi32(__A, __B), (__v8hi)_mm_setzero_si128()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_packus_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, (__v8hi)_mm_packus_epi32(__A, __B), (__v8hi)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_packus_epi32(__mmask16 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, (__v16hi)_mm256_packus_epi32(__A, __B), (__v16hi)_mm256_setzero_si256()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_packus_epi32(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, (__v16hi)_mm256_packus_epi32(__A, __B), (__v16hi)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_packus_epi16(__mmask16 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, (__v16qi)_mm_packus_epi16(__A, __B), (__v16qi)_mm_setzero_si128()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_packus_epi16(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, (__v16qi)_mm_packus_epi16(__A, __B), (__v16qi)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_packus_epi16(__mmask32 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, (__v32qi)_mm256_packus_epi16(__A, __B), (__v32qi)_mm256_setzero_si256()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_packus_epi16(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, (__v32qi)_mm256_packus_epi16(__A, __B), (__v32qi)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_adds_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, (__v16qi)_mm_adds_epi8(__A, __B), (__v16qi)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_adds_epi8(__mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, (__v16qi)_mm_adds_epi8(__A, __B), (__v16qi)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_adds_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, (__v32qi)_mm256_adds_epi8(__A, __B), (__v32qi)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_adds_epi8(__mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, (__v32qi)_mm256_adds_epi8(__A, __B), (__v32qi)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_adds_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_adds_epi16(__A, __B), (__v8hi)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_adds_epi16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_adds_epi16(__A, __B), (__v8hi)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_adds_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_adds_epi16(__A, __B), (__v16hi)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_adds_epi16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_adds_epi16(__A, __B), (__v16hi)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_adds_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, (__v16qi)_mm_adds_epu8(__A, __B), (__v16qi)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_adds_epu8(__mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, (__v16qi)_mm_adds_epu8(__A, __B), (__v16qi)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_adds_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, (__v32qi)_mm256_adds_epu8(__A, __B), (__v32qi)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_adds_epu8(__mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, (__v32qi)_mm256_adds_epu8(__A, __B), (__v32qi)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_adds_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_adds_epu16(__A, __B), (__v8hi)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_adds_epu16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_adds_epu16(__A, __B), (__v8hi)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_adds_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_adds_epu16(__A, __B), (__v16hi)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_adds_epu16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_adds_epu16(__A, __B), (__v16hi)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_avg_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, (__v16qi)_mm_avg_epu8(__A, __B), (__v16qi)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_avg_epu8(__mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, (__v16qi)_mm_avg_epu8(__A, __B), (__v16qi)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_avg_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, (__v32qi)_mm256_avg_epu8(__A, __B), (__v32qi)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_avg_epu8(__mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, (__v32qi)_mm256_avg_epu8(__A, __B), (__v32qi)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_avg_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_avg_epu16(__A, __B), (__v8hi)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_avg_epu16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_avg_epu16(__A, __B), (__v8hi)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_avg_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_avg_epu16(__A, __B), (__v16hi)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_avg_epu16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_avg_epu16(__A, __B), (__v16hi)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_max_epi8(__mmask16 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, (__v16qi)_mm_max_epi8(__A, __B), (__v16qi)_mm_setzero_si128()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_max_epi8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, (__v16qi)_mm_max_epi8(__A, __B), (__v16qi)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_max_epi8(__mmask32 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, (__v32qi)_mm256_max_epi8(__A, __B), (__v32qi)_mm256_setzero_si256()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_max_epi8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, (__v32qi)_mm256_max_epi8(__A, __B), (__v32qi)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_max_epi16(__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, (__v8hi)_mm_max_epi16(__A, __B), (__v8hi)_mm_setzero_si128()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_max_epi16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, (__v8hi)_mm_max_epi16(__A, __B), (__v8hi)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_max_epi16(__mmask16 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, (__v16hi)_mm256_max_epi16(__A, __B), (__v16hi)_mm256_setzero_si256()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_max_epi16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, (__v16hi)_mm256_max_epi16(__A, __B), (__v16hi)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_max_epu8(__mmask16 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, (__v16qi)_mm_max_epu8(__A, __B), (__v16qi)_mm_setzero_si128()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_max_epu8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, (__v16qi)_mm_max_epu8(__A, __B), (__v16qi)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_max_epu8 (__mmask32 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, (__v32qi)_mm256_max_epu8(__A, __B), (__v32qi)_mm256_setzero_si256()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_max_epu8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, (__v32qi)_mm256_max_epu8(__A, __B), (__v32qi)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_max_epu16(__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, (__v8hi)_mm_max_epu16(__A, __B), (__v8hi)_mm_setzero_si128()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_max_epu16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, (__v8hi)_mm_max_epu16(__A, __B), (__v8hi)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_max_epu16(__mmask16 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, (__v16hi)_mm256_max_epu16(__A, __B), (__v16hi)_mm256_setzero_si256()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_max_epu16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, (__v16hi)_mm256_max_epu16(__A, __B), (__v16hi)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_min_epi8(__mmask16 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, (__v16qi)_mm_min_epi8(__A, __B), (__v16qi)_mm_setzero_si128()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_min_epi8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, (__v16qi)_mm_min_epi8(__A, __B), (__v16qi)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_min_epi8(__mmask32 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, (__v32qi)_mm256_min_epi8(__A, __B), (__v32qi)_mm256_setzero_si256()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_min_epi8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, (__v32qi)_mm256_min_epi8(__A, __B), (__v32qi)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_min_epi16(__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, (__v8hi)_mm_min_epi16(__A, __B), (__v8hi)_mm_setzero_si128()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_min_epi16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, (__v8hi)_mm_min_epi16(__A, __B), (__v8hi)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_min_epi16(__mmask16 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, (__v16hi)_mm256_min_epi16(__A, __B), (__v16hi)_mm256_setzero_si256()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_min_epi16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, (__v16hi)_mm256_min_epi16(__A, __B), (__v16hi)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_min_epu8(__mmask16 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, (__v16qi)_mm_min_epu8(__A, __B), (__v16qi)_mm_setzero_si128()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_min_epu8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, (__v16qi)_mm_min_epu8(__A, __B), (__v16qi)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_min_epu8 (__mmask32 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, (__v32qi)_mm256_min_epu8(__A, __B), (__v32qi)_mm256_setzero_si256()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_min_epu8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, (__v32qi)_mm256_min_epu8(__A, __B), (__v32qi)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_min_epu16(__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, (__v8hi)_mm_min_epu16(__A, __B), (__v8hi)_mm_setzero_si128()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_min_epu16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, (__v8hi)_mm_min_epu16(__A, __B), (__v8hi)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_min_epu16(__mmask16 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, (__v16hi)_mm256_min_epu16(__A, __B), (__v16hi)_mm256_setzero_si256()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_min_epu16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, (__v16hi)_mm256_min_epu16(__A, __B), (__v16hi)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_shuffle_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, (__v16qi)_mm_shuffle_epi8(__A, __B), (__v16qi)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_shuffle_epi8(__mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, (__v16qi)_mm_shuffle_epi8(__A, __B), (__v16qi)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_shuffle_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, (__v32qi)_mm256_shuffle_epi8(__A, __B), (__v32qi)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_shuffle_epi8(__mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, (__v32qi)_mm256_shuffle_epi8(__A, __B), (__v32qi)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_subs_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, (__v16qi)_mm_subs_epi8(__A, __B), (__v16qi)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_subs_epi8(__mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, (__v16qi)_mm_subs_epi8(__A, __B), (__v16qi)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_subs_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, (__v32qi)_mm256_subs_epi8(__A, __B), (__v32qi)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_subs_epi8(__mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, (__v32qi)_mm256_subs_epi8(__A, __B), (__v32qi)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_subs_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_subs_epi16(__A, __B), (__v8hi)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_subs_epi16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_subs_epi16(__A, __B), (__v8hi)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_subs_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_subs_epi16(__A, __B), (__v16hi)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_subs_epi16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_subs_epi16(__A, __B), (__v16hi)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_subs_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, (__v16qi)_mm_subs_epu8(__A, __B), (__v16qi)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_subs_epu8(__mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, (__v16qi)_mm_subs_epu8(__A, __B), (__v16qi)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_subs_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, (__v32qi)_mm256_subs_epu8(__A, __B), (__v32qi)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_subs_epu8(__mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, (__v32qi)_mm256_subs_epu8(__A, __B), (__v32qi)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_subs_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_subs_epu16(__A, __B), (__v8hi)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_subs_epu16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_subs_epu16(__A, __B), (__v8hi)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_subs_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_subs_epu16(__A, __B), (__v16hi)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_subs_epu16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_subs_epu16(__A, __B), (__v16hi)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_permutex2var_epi16(__m128i __A, __m128i __I, __m128i __B) { return (__m128i)__builtin_ia32_vpermi2varhi128((__v8hi)__A, (__v8hi)__I, (__v8hi) __B); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_permutex2var_epi16(__m128i __A, __mmask8 __U, __m128i __I, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128(__U, (__v8hi)_mm_permutex2var_epi16(__A, __I, __B), (__v8hi)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask2_permutex2var_epi16(__m128i __A, __m128i __I, __mmask8 __U, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128(__U, (__v8hi)_mm_permutex2var_epi16(__A, __I, __B), (__v8hi)__I); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_permutex2var_epi16 (__mmask8 __U, __m128i __A, __m128i __I, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128(__U, (__v8hi)_mm_permutex2var_epi16(__A, __I, __B), (__v8hi)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_permutex2var_epi16(__m256i __A, __m256i __I, __m256i __B) { return (__m256i)__builtin_ia32_vpermi2varhi256((__v16hi)__A, (__v16hi)__I, (__v16hi)__B); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_permutex2var_epi16(__m256i __A, __mmask16 __U, __m256i __I, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256(__U, (__v16hi)_mm256_permutex2var_epi16(__A, __I, __B), (__v16hi)__A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask2_permutex2var_epi16(__m256i __A, __m256i __I, __mmask16 __U, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256(__U, (__v16hi)_mm256_permutex2var_epi16(__A, __I, __B), (__v16hi)__I); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_permutex2var_epi16 (__mmask16 __U, __m256i __A, __m256i __I, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256(__U, (__v16hi)_mm256_permutex2var_epi16(__A, __I, __B), (__v16hi)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_maddubs_epi16(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_maddubs_epi16(__X, __Y), (__v8hi)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_maddubs_epi16(__mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_maddubs_epi16(__X, __Y), (__v8hi)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_maddubs_epi16(__m256i __W, __mmask16 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_maddubs_epi16(__X, __Y), (__v16hi)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_maddubs_epi16(__mmask16 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_maddubs_epi16(__X, __Y), (__v16hi)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_madd_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_madd_epi16(__A, __B), (__v4si)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_madd_epi16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_madd_epi16(__A, __B), (__v4si)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_madd_epi16(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_madd_epi16(__A, __B), (__v8si)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_madd_epi16(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_madd_epi16(__A, __B), (__v8si)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsepi16_epi8 (__m128i __A) { return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A, (__v16qi) _mm_setzero_si128(), (__mmask8) -1); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi16_epi8 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A, (__v16qi) __O, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsepi16_epi8 (__mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A, (__v16qi) _mm_setzero_si128(), __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsepi16_epi8 (__m256i __A) { return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A, (__v16qi) _mm_setzero_si128(), (__mmask16) -1); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A, (__v16qi) __O, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtsepi16_epi8 (__mmask16 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A, (__v16qi) _mm_setzero_si128(), __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtusepi16_epi8 (__m128i __A) { return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A, (__v16qi) _mm_setzero_si128(), (__mmask8) -1); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi16_epi8 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A, (__v16qi) __O, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtusepi16_epi8 (__mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A, (__v16qi) _mm_setzero_si128(), __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtusepi16_epi8 (__m256i __A) { return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A, (__v16qi) _mm_setzero_si128(), (__mmask16) -1); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A, (__v16qi) __O, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtusepi16_epi8 (__mmask16 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A, (__v16qi) _mm_setzero_si128(), __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtepi16_epi8 (__m128i __A) { return (__m128i)__builtin_shufflevector( __builtin_convertvector((__v8hi)__A, __v8qi), (__v8qi){0, 0, 0, 0, 0, 0, 0, 0}, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi16_epi8 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovwb128_mask ((__v8hi) __A, (__v16qi) __O, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi16_epi8 (__mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovwb128_mask ((__v8hi) __A, (__v16qi) _mm_setzero_si128(), __M); } static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi16_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) { __builtin_ia32_pmovwb128mem_mask ((__v16qi *) __P, (__v8hi) __A, __M); } static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) { __builtin_ia32_pmovswb128mem_mask ((__v16qi *) __P, (__v8hi) __A, __M); } static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) { __builtin_ia32_pmovuswb128mem_mask ((__v16qi *) __P, (__v8hi) __A, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtepi16_epi8 (__m256i __A) { return (__m128i)__builtin_convertvector((__v16hi) __A, __v16qi); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, (__v16qi)_mm256_cvtepi16_epi8(__A), (__v16qi)__O); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi16_epi8 (__mmask16 __M, __m256i __A) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, (__v16qi)_mm256_cvtepi16_epi8(__A), (__v16qi)_mm_setzero_si128()); } static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi16_storeu_epi8 (void * __P, __mmask16 __M, __m256i __A) { __builtin_ia32_pmovwb256mem_mask ((__v16qi *) __P, (__v16hi) __A, __M); } static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask16 __M, __m256i __A) { __builtin_ia32_pmovswb256mem_mask ((__v16qi *) __P, (__v16hi) __A, __M); } static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask16 __M, __m256i __A) { __builtin_ia32_pmovuswb256mem_mask ((__v16qi*) __P, (__v16hi) __A, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mulhrs_epi16(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_mulhrs_epi16(__X, __Y), (__v8hi)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mulhrs_epi16(__mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_mulhrs_epi16(__X, __Y), (__v8hi)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mulhrs_epi16(__m256i __W, __mmask16 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_mulhrs_epi16(__X, __Y), (__v16hi)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mulhrs_epi16(__mmask16 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_mulhrs_epi16(__X, __Y), (__v16hi)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mulhi_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_mulhi_epu16(__A, __B), (__v8hi)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mulhi_epu16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_mulhi_epu16(__A, __B), (__v8hi)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mulhi_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_mulhi_epu16(__A, __B), (__v16hi)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mulhi_epu16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_mulhi_epu16(__A, __B), (__v16hi)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mulhi_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_mulhi_epi16(__A, __B), (__v8hi)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mulhi_epi16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_mulhi_epi16(__A, __B), (__v8hi)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mulhi_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_mulhi_epi16(__A, __B), (__v16hi)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mulhi_epi16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_mulhi_epi16(__A, __B), (__v16hi)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_unpackhi_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, (__v16qi)_mm_unpackhi_epi8(__A, __B), (__v16qi)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_unpackhi_epi8(__mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, (__v16qi)_mm_unpackhi_epi8(__A, __B), (__v16qi)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_unpackhi_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, (__v32qi)_mm256_unpackhi_epi8(__A, __B), (__v32qi)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_unpackhi_epi8(__mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, (__v32qi)_mm256_unpackhi_epi8(__A, __B), (__v32qi)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_unpackhi_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_unpackhi_epi16(__A, __B), (__v8hi)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_unpackhi_epi16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_unpackhi_epi16(__A, __B), (__v8hi) _mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_unpackhi_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_unpackhi_epi16(__A, __B), (__v16hi)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_unpackhi_epi16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_unpackhi_epi16(__A, __B), (__v16hi)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_unpacklo_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, (__v16qi)_mm_unpacklo_epi8(__A, __B), (__v16qi)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_unpacklo_epi8(__mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, (__v16qi)_mm_unpacklo_epi8(__A, __B), (__v16qi)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_unpacklo_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, (__v32qi)_mm256_unpacklo_epi8(__A, __B), (__v32qi)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_unpacklo_epi8(__mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, (__v32qi)_mm256_unpacklo_epi8(__A, __B), (__v32qi)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_unpacklo_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_unpacklo_epi16(__A, __B), (__v8hi)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_unpacklo_epi16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_unpacklo_epi16(__A, __B), (__v8hi) _mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_unpacklo_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_unpacklo_epi16(__A, __B), (__v16hi)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_unpacklo_epi16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_unpacklo_epi16(__A, __B), (__v16hi)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi8_epi16(__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_cvtepi8_epi16(__A), (__v8hi)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi8_epi16(__mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_cvtepi8_epi16(__A), (__v8hi)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi8_epi16(__m256i __W, __mmask16 __U, __m128i __A) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_cvtepi8_epi16(__A), (__v16hi)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi8_epi16(__mmask16 __U, __m128i __A) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_cvtepi8_epi16(__A), (__v16hi)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu8_epi16(__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_cvtepu8_epi16(__A), (__v8hi)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu8_epi16(__mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_cvtepu8_epi16(__A), (__v8hi)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu8_epi16(__m256i __W, __mmask16 __U, __m128i __A) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_cvtepu8_epi16(__A), (__v16hi)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu8_epi16 (__mmask16 __U, __m128i __A) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_cvtepu8_epi16(__A), (__v16hi)_mm256_setzero_si256()); } #define _mm_mask_shufflehi_epi16(W, U, A, imm) \ ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ (__v8hi)_mm_shufflehi_epi16((A), (imm)), \ (__v8hi)(__m128i)(W))) #define _mm_maskz_shufflehi_epi16(U, A, imm) \ ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ (__v8hi)_mm_shufflehi_epi16((A), (imm)), \ (__v8hi)_mm_setzero_si128())) #define _mm256_mask_shufflehi_epi16(W, U, A, imm) \ ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ (__v16hi)_mm256_shufflehi_epi16((A), (imm)), \ (__v16hi)(__m256i)(W))) #define _mm256_maskz_shufflehi_epi16(U, A, imm) \ ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ (__v16hi)_mm256_shufflehi_epi16((A), (imm)), \ (__v16hi)_mm256_setzero_si256())) #define _mm_mask_shufflelo_epi16(W, U, A, imm) \ ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ (__v8hi)_mm_shufflelo_epi16((A), (imm)), \ (__v8hi)(__m128i)(W))) #define _mm_maskz_shufflelo_epi16(U, A, imm) \ ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ (__v8hi)_mm_shufflelo_epi16((A), (imm)), \ (__v8hi)_mm_setzero_si128())) #define _mm256_mask_shufflelo_epi16(W, U, A, imm) \ ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ (__v16hi)_mm256_shufflelo_epi16((A), \ (imm)), \ (__v16hi)(__m256i)(W))) #define _mm256_maskz_shufflelo_epi16(U, A, imm) \ ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ (__v16hi)_mm256_shufflelo_epi16((A), \ (imm)), \ (__v16hi)_mm256_setzero_si256())) static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sllv_epi16(__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_psllv16hi((__v16hi)__A, (__v16hi)__B); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sllv_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_sllv_epi16(__A, __B), (__v16hi)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sllv_epi16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_sllv_epi16(__A, __B), (__v16hi)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_sllv_epi16(__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_psllv8hi((__v8hi)__A, (__v8hi)__B); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sllv_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_sllv_epi16(__A, __B), (__v8hi)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sllv_epi16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_sllv_epi16(__A, __B), (__v8hi)_mm_setzero_si128()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sll_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_sll_epi16(__A, __B), (__v8hi)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sll_epi16 (__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_sll_epi16(__A, __B), (__v8hi)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sll_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_sll_epi16(__A, __B), (__v16hi)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sll_epi16(__mmask16 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_sll_epi16(__A, __B), (__v16hi)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_slli_epi16(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_slli_epi16(__A, (int)__B), (__v8hi)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_slli_epi16 (__mmask8 __U, __m128i __A, unsigned int __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_slli_epi16(__A, (int)__B), (__v8hi)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_slli_epi16(__m256i __W, __mmask16 __U, __m256i __A, unsigned int __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_slli_epi16(__A, (int)__B), (__v16hi)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_slli_epi16(__mmask16 __U, __m256i __A, unsigned int __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_slli_epi16(__A, (int)__B), (__v16hi)_mm256_setzero_si256()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srlv_epi16(__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_psrlv16hi((__v16hi)__A, (__v16hi)__B); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srlv_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_srlv_epi16(__A, __B), (__v16hi)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srlv_epi16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_srlv_epi16(__A, __B), (__v16hi)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_srlv_epi16(__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_psrlv8hi((__v8hi)__A, (__v8hi)__B); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srlv_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_srlv_epi16(__A, __B), (__v8hi)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srlv_epi16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_srlv_epi16(__A, __B), (__v8hi)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srav_epi16(__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_psrav16hi((__v16hi)__A, (__v16hi)__B); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srav_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_srav_epi16(__A, __B), (__v16hi)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srav_epi16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_srav_epi16(__A, __B), (__v16hi)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_srav_epi16(__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_psrav8hi((__v8hi)__A, (__v8hi)__B); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srav_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_srav_epi16(__A, __B), (__v8hi)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srav_epi16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_srav_epi16(__A, __B), (__v8hi)_mm_setzero_si128()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sra_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_sra_epi16(__A, __B), (__v8hi)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sra_epi16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_sra_epi16(__A, __B), (__v8hi)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sra_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_sra_epi16(__A, __B), (__v16hi)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sra_epi16(__mmask16 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_sra_epi16(__A, __B), (__v16hi)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srai_epi16(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_srai_epi16(__A, (int)__B), (__v8hi)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srai_epi16(__mmask8 __U, __m128i __A, unsigned int __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_srai_epi16(__A, (int)__B), (__v8hi)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srai_epi16(__m256i __W, __mmask16 __U, __m256i __A, unsigned int __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_srai_epi16(__A, (int)__B), (__v16hi)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srai_epi16(__mmask16 __U, __m256i __A, unsigned int __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_srai_epi16(__A, (int)__B), (__v16hi)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srl_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_srl_epi16(__A, __B), (__v8hi)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srl_epi16 (__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_srl_epi16(__A, __B), (__v8hi)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srl_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_srl_epi16(__A, __B), (__v16hi)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srl_epi16(__mmask16 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_srl_epi16(__A, __B), (__v16hi)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srli_epi16(__m128i __W, __mmask8 __U, __m128i __A, int __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_srli_epi16(__A, __B), (__v8hi)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srli_epi16 (__mmask8 __U, __m128i __A, int __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_srli_epi16(__A, __B), (__v8hi)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srli_epi16(__m256i __W, __mmask16 __U, __m256i __A, int __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_srli_epi16(__A, __B), (__v16hi)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srli_epi16(__mmask16 __U, __m256i __A, int __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_srli_epi16(__A, __B), (__v16hi)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mov_epi16 (__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_selectw_128 ((__mmask8) __U, (__v8hi) __A, (__v8hi) __W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mov_epi16 (__mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_selectw_128 ((__mmask8) __U, (__v8hi) __A, (__v8hi) _mm_setzero_si128 ()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mov_epi16 (__m256i __W, __mmask16 __U, __m256i __A) { return (__m256i) __builtin_ia32_selectw_256 ((__mmask16) __U, (__v16hi) __A, (__v16hi) __W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mov_epi16 (__mmask16 __U, __m256i __A) { return (__m256i) __builtin_ia32_selectw_256 ((__mmask16) __U, (__v16hi) __A, (__v16hi) _mm256_setzero_si256 ()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mov_epi8 (__m128i __W, __mmask16 __U, __m128i __A) { return (__m128i) __builtin_ia32_selectb_128 ((__mmask16) __U, (__v16qi) __A, (__v16qi) __W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mov_epi8 (__mmask16 __U, __m128i __A) { return (__m128i) __builtin_ia32_selectb_128 ((__mmask16) __U, (__v16qi) __A, (__v16qi) _mm_setzero_si128 ()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mov_epi8 (__m256i __W, __mmask32 __U, __m256i __A) { return (__m256i) __builtin_ia32_selectb_256 ((__mmask32) __U, (__v32qi) __A, (__v32qi) __W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mov_epi8 (__mmask32 __U, __m256i __A) { return (__m256i) __builtin_ia32_selectb_256 ((__mmask32) __U, (__v32qi) __A, (__v32qi) _mm256_setzero_si256 ()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_set1_epi8 (__m128i __O, __mmask16 __M, char __A) { return (__m128i) __builtin_ia32_selectb_128(__M, (__v16qi) _mm_set1_epi8(__A), (__v16qi) __O); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_set1_epi8 (__mmask16 __M, char __A) { return (__m128i) __builtin_ia32_selectb_128(__M, (__v16qi) _mm_set1_epi8(__A), (__v16qi) _mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_set1_epi8 (__m256i __O, __mmask32 __M, char __A) { return (__m256i) __builtin_ia32_selectb_256(__M, (__v32qi) _mm256_set1_epi8(__A), (__v32qi) __O); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_set1_epi8 (__mmask32 __M, char __A) { return (__m256i) __builtin_ia32_selectb_256(__M, (__v32qi) _mm256_set1_epi8(__A), (__v32qi) _mm256_setzero_si256()); } static __inline __m128i __DEFAULT_FN_ATTRS128 _mm_loadu_epi16 (void const *__P) { struct __loadu_epi16 { __m128i_u __v; } __attribute__((__packed__, __may_alias__)); return ((const struct __loadu_epi16*)__P)->__v; } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_loadu_epi16 (__m128i __W, __mmask8 __U, void const *__P) { return (__m128i) __builtin_ia32_loaddquhi128_mask ((const __v8hi *) __P, (__v8hi) __W, (__mmask8) __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_loadu_epi16 (__mmask8 __U, void const *__P) { return (__m128i) __builtin_ia32_loaddquhi128_mask ((const __v8hi *) __P, (__v8hi) _mm_setzero_si128 (), (__mmask8) __U); } static __inline __m256i __DEFAULT_FN_ATTRS256 _mm256_loadu_epi16 (void const *__P) { struct __loadu_epi16 { __m256i_u __v; } __attribute__((__packed__, __may_alias__)); return ((const struct __loadu_epi16*)__P)->__v; } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_loadu_epi16 (__m256i __W, __mmask16 __U, void const *__P) { return (__m256i) __builtin_ia32_loaddquhi256_mask ((const __v16hi *) __P, (__v16hi) __W, (__mmask16) __U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_loadu_epi16 (__mmask16 __U, void const *__P) { return (__m256i) __builtin_ia32_loaddquhi256_mask ((const __v16hi *) __P, (__v16hi) _mm256_setzero_si256 (), (__mmask16) __U); } static __inline __m128i __DEFAULT_FN_ATTRS128 _mm_loadu_epi8 (void const *__P) { struct __loadu_epi8 { __m128i_u __v; } __attribute__((__packed__, __may_alias__)); return ((const struct __loadu_epi8*)__P)->__v; } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_loadu_epi8 (__m128i __W, __mmask16 __U, void const *__P) { return (__m128i) __builtin_ia32_loaddquqi128_mask ((const __v16qi *) __P, (__v16qi) __W, (__mmask16) __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_loadu_epi8 (__mmask16 __U, void const *__P) { return (__m128i) __builtin_ia32_loaddquqi128_mask ((const __v16qi *) __P, (__v16qi) _mm_setzero_si128 (), (__mmask16) __U); } static __inline __m256i __DEFAULT_FN_ATTRS256 _mm256_loadu_epi8 (void const *__P) { struct __loadu_epi8 { __m256i_u __v; } __attribute__((__packed__, __may_alias__)); return ((const struct __loadu_epi8*)__P)->__v; } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_loadu_epi8 (__m256i __W, __mmask32 __U, void const *__P) { return (__m256i) __builtin_ia32_loaddquqi256_mask ((const __v32qi *) __P, (__v32qi) __W, (__mmask32) __U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_loadu_epi8 (__mmask32 __U, void const *__P) { return (__m256i) __builtin_ia32_loaddquqi256_mask ((const __v32qi *) __P, (__v32qi) _mm256_setzero_si256 (), (__mmask32) __U); } static __inline void __DEFAULT_FN_ATTRS128 _mm_storeu_epi16 (void *__P, __m128i __A) { struct __storeu_epi16 { __m128i_u __v; } __attribute__((__packed__, __may_alias__)); ((struct __storeu_epi16*)__P)->__v = __A; } static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_storeu_epi16 (void *__P, __mmask8 __U, __m128i __A) { __builtin_ia32_storedquhi128_mask ((__v8hi *) __P, (__v8hi) __A, (__mmask8) __U); } static __inline void __DEFAULT_FN_ATTRS256 _mm256_storeu_epi16 (void *__P, __m256i __A) { struct __storeu_epi16 { __m256i_u __v; } __attribute__((__packed__, __may_alias__)); ((struct __storeu_epi16*)__P)->__v = __A; } static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_storeu_epi16 (void *__P, __mmask16 __U, __m256i __A) { __builtin_ia32_storedquhi256_mask ((__v16hi *) __P, (__v16hi) __A, (__mmask16) __U); } static __inline void __DEFAULT_FN_ATTRS128 _mm_storeu_epi8 (void *__P, __m128i __A) { struct __storeu_epi8 { __m128i_u __v; } __attribute__((__packed__, __may_alias__)); ((struct __storeu_epi8*)__P)->__v = __A; } static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_storeu_epi8 (void *__P, __mmask16 __U, __m128i __A) { __builtin_ia32_storedquqi128_mask ((__v16qi *) __P, (__v16qi) __A, (__mmask16) __U); } static __inline void __DEFAULT_FN_ATTRS256 _mm256_storeu_epi8 (void *__P, __m256i __A) { struct __storeu_epi8 { __m256i_u __v; } __attribute__((__packed__, __may_alias__)); ((struct __storeu_epi8*)__P)->__v = __A; } static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_storeu_epi8 (void *__P, __mmask32 __U, __m256i __A) { __builtin_ia32_storedquqi256_mask ((__v32qi *) __P, (__v32qi) __A, (__mmask32) __U); } static __inline__ __mmask16 __DEFAULT_FN_ATTRS128 _mm_test_epi8_mask (__m128i __A, __m128i __B) { return _mm_cmpneq_epi8_mask (_mm_and_si128(__A, __B), _mm_setzero_si128()); } static __inline__ __mmask16 __DEFAULT_FN_ATTRS128 _mm_mask_test_epi8_mask (__mmask16 __U, __m128i __A, __m128i __B) { return _mm_mask_cmpneq_epi8_mask (__U, _mm_and_si128 (__A, __B), _mm_setzero_si128()); } static __inline__ __mmask32 __DEFAULT_FN_ATTRS256 _mm256_test_epi8_mask (__m256i __A, __m256i __B) { return _mm256_cmpneq_epi8_mask (_mm256_and_si256(__A, __B), _mm256_setzero_si256()); } static __inline__ __mmask32 __DEFAULT_FN_ATTRS256 _mm256_mask_test_epi8_mask (__mmask32 __U, __m256i __A, __m256i __B) { return _mm256_mask_cmpneq_epi8_mask (__U, _mm256_and_si256(__A, __B), _mm256_setzero_si256()); } static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_test_epi16_mask (__m128i __A, __m128i __B) { return _mm_cmpneq_epi16_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128()); } static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_mask_test_epi16_mask (__mmask8 __U, __m128i __A, __m128i __B) { return _mm_mask_cmpneq_epi16_mask (__U, _mm_and_si128 (__A, __B), _mm_setzero_si128()); } static __inline__ __mmask16 __DEFAULT_FN_ATTRS256 _mm256_test_epi16_mask (__m256i __A, __m256i __B) { return _mm256_cmpneq_epi16_mask (_mm256_and_si256 (__A, __B), _mm256_setzero_si256 ()); } static __inline__ __mmask16 __DEFAULT_FN_ATTRS256 _mm256_mask_test_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B) { return _mm256_mask_cmpneq_epi16_mask (__U, _mm256_and_si256(__A, __B), _mm256_setzero_si256()); } static __inline__ __mmask16 __DEFAULT_FN_ATTRS128 _mm_testn_epi8_mask (__m128i __A, __m128i __B) { return _mm_cmpeq_epi8_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128()); } static __inline__ __mmask16 __DEFAULT_FN_ATTRS128 _mm_mask_testn_epi8_mask (__mmask16 __U, __m128i __A, __m128i __B) { return _mm_mask_cmpeq_epi8_mask (__U, _mm_and_si128 (__A, __B), _mm_setzero_si128()); } static __inline__ __mmask32 __DEFAULT_FN_ATTRS256 _mm256_testn_epi8_mask (__m256i __A, __m256i __B) { return _mm256_cmpeq_epi8_mask (_mm256_and_si256 (__A, __B), _mm256_setzero_si256()); } static __inline__ __mmask32 __DEFAULT_FN_ATTRS256 _mm256_mask_testn_epi8_mask (__mmask32 __U, __m256i __A, __m256i __B) { return _mm256_mask_cmpeq_epi8_mask (__U, _mm256_and_si256 (__A, __B), _mm256_setzero_si256()); } static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_testn_epi16_mask (__m128i __A, __m128i __B) { return _mm_cmpeq_epi16_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128()); } static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_mask_testn_epi16_mask (__mmask8 __U, __m128i __A, __m128i __B) { return _mm_mask_cmpeq_epi16_mask (__U, _mm_and_si128(__A, __B), _mm_setzero_si128()); } static __inline__ __mmask16 __DEFAULT_FN_ATTRS256 _mm256_testn_epi16_mask (__m256i __A, __m256i __B) { return _mm256_cmpeq_epi16_mask (_mm256_and_si256(__A, __B), _mm256_setzero_si256()); } static __inline__ __mmask16 __DEFAULT_FN_ATTRS256 _mm256_mask_testn_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B) { return _mm256_mask_cmpeq_epi16_mask (__U, _mm256_and_si256 (__A, __B), _mm256_setzero_si256()); } static __inline__ __mmask16 __DEFAULT_FN_ATTRS128 _mm_movepi8_mask (__m128i __A) { return (__mmask16) __builtin_ia32_cvtb2mask128 ((__v16qi) __A); } static __inline__ __mmask32 __DEFAULT_FN_ATTRS256 _mm256_movepi8_mask (__m256i __A) { return (__mmask32) __builtin_ia32_cvtb2mask256 ((__v32qi) __A); } static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_movepi16_mask (__m128i __A) { return (__mmask8) __builtin_ia32_cvtw2mask128 ((__v8hi) __A); } static __inline__ __mmask16 __DEFAULT_FN_ATTRS256 _mm256_movepi16_mask (__m256i __A) { return (__mmask16) __builtin_ia32_cvtw2mask256 ((__v16hi) __A); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_movm_epi8 (__mmask16 __A) { return (__m128i) __builtin_ia32_cvtmask2b128 (__A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_movm_epi8 (__mmask32 __A) { return (__m256i) __builtin_ia32_cvtmask2b256 (__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_movm_epi16 (__mmask8 __A) { return (__m128i) __builtin_ia32_cvtmask2w128 (__A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_movm_epi16 (__mmask16 __A) { return (__m256i) __builtin_ia32_cvtmask2w256 (__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_broadcastb_epi8 (__m128i __O, __mmask16 __M, __m128i __A) { return (__m128i)__builtin_ia32_selectb_128(__M, (__v16qi) _mm_broadcastb_epi8(__A), (__v16qi) __O); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_broadcastb_epi8 (__mmask16 __M, __m128i __A) { return (__m128i)__builtin_ia32_selectb_128(__M, (__v16qi) _mm_broadcastb_epi8(__A), (__v16qi) _mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_broadcastb_epi8 (__m256i __O, __mmask32 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectb_256(__M, (__v32qi) _mm256_broadcastb_epi8(__A), (__v32qi) __O); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcastb_epi8 (__mmask32 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectb_256(__M, (__v32qi) _mm256_broadcastb_epi8(__A), (__v32qi) _mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_broadcastw_epi16 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i)__builtin_ia32_selectw_128(__M, (__v8hi) _mm_broadcastw_epi16(__A), (__v8hi) __O); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_broadcastw_epi16 (__mmask8 __M, __m128i __A) { return (__m128i)__builtin_ia32_selectw_128(__M, (__v8hi) _mm_broadcastw_epi16(__A), (__v8hi) _mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_broadcastw_epi16 (__m256i __O, __mmask16 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectw_256(__M, (__v16hi) _mm256_broadcastw_epi16(__A), (__v16hi) __O); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcastw_epi16 (__mmask16 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectw_256(__M, (__v16hi) _mm256_broadcastw_epi16(__A), (__v16hi) _mm256_setzero_si256()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_set1_epi16 (__m256i __O, __mmask16 __M, short __A) { return (__m256i) __builtin_ia32_selectw_256 (__M, (__v16hi) _mm256_set1_epi16(__A), (__v16hi) __O); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_set1_epi16 (__mmask16 __M, short __A) { return (__m256i) __builtin_ia32_selectw_256(__M, (__v16hi)_mm256_set1_epi16(__A), (__v16hi) _mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_set1_epi16 (__m128i __O, __mmask8 __M, short __A) { return (__m128i) __builtin_ia32_selectw_128(__M, (__v8hi) _mm_set1_epi16(__A), (__v8hi) __O); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_set1_epi16 (__mmask8 __M, short __A) { return (__m128i) __builtin_ia32_selectw_128(__M, (__v8hi) _mm_set1_epi16(__A), (__v8hi) _mm_setzero_si128()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_permutexvar_epi16 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_permvarhi128((__v8hi) __B, (__v8hi) __A); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_permutexvar_epi16 (__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, (__v8hi)_mm_permutexvar_epi16(__A, __B), (__v8hi) _mm_setzero_si128()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_permutexvar_epi16 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, (__v8hi)_mm_permutexvar_epi16(__A, __B), (__v8hi)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_permutexvar_epi16 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_permvarhi256((__v16hi) __B, (__v16hi) __A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_permutexvar_epi16 (__mmask16 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, (__v16hi)_mm256_permutexvar_epi16(__A, __B), (__v16hi)_mm256_setzero_si256()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_permutexvar_epi16 (__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, (__v16hi)_mm256_permutexvar_epi16(__A, __B), (__v16hi)__W); } #define _mm_mask_alignr_epi8(W, U, A, B, N) \ ((__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \ (__v16qi)_mm_alignr_epi8((A), (B), (int)(N)), \ (__v16qi)(__m128i)(W))) #define _mm_maskz_alignr_epi8(U, A, B, N) \ ((__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \ (__v16qi)_mm_alignr_epi8((A), (B), (int)(N)), \ (__v16qi)_mm_setzero_si128())) #define _mm256_mask_alignr_epi8(W, U, A, B, N) \ ((__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \ (__v32qi)_mm256_alignr_epi8((A), (B), (int)(N)), \ (__v32qi)(__m256i)(W))) #define _mm256_maskz_alignr_epi8(U, A, B, N) \ ((__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \ (__v32qi)_mm256_alignr_epi8((A), (B), (int)(N)), \ (__v32qi)_mm256_setzero_si256())) #define _mm_dbsad_epu8(A, B, imm) \ ((__m128i)__builtin_ia32_dbpsadbw128((__v16qi)(__m128i)(A), \ (__v16qi)(__m128i)(B), (int)(imm))) #define _mm_mask_dbsad_epu8(W, U, A, B, imm) \ ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ (__v8hi)_mm_dbsad_epu8((A), (B), (imm)), \ (__v8hi)(__m128i)(W))) #define _mm_maskz_dbsad_epu8(U, A, B, imm) \ ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ (__v8hi)_mm_dbsad_epu8((A), (B), (imm)), \ (__v8hi)_mm_setzero_si128())) #define _mm256_dbsad_epu8(A, B, imm) \ ((__m256i)__builtin_ia32_dbpsadbw256((__v32qi)(__m256i)(A), \ (__v32qi)(__m256i)(B), (int)(imm))) #define _mm256_mask_dbsad_epu8(W, U, A, B, imm) \ ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ (__v16hi)_mm256_dbsad_epu8((A), (B), (imm)), \ (__v16hi)(__m256i)(W))) #define _mm256_maskz_dbsad_epu8(U, A, B, imm) \ ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ (__v16hi)_mm256_dbsad_epu8((A), (B), (imm)), \ (__v16hi)_mm256_setzero_si256())) static __inline__ short __DEFAULT_FN_ATTRS128 _mm_reduce_add_epi16(__m128i __W) { return __builtin_reduce_add((__v8hi)__W); } static __inline__ short __DEFAULT_FN_ATTRS128 _mm_reduce_mul_epi16(__m128i __W) { return __builtin_reduce_mul((__v8hi)__W); } static __inline__ short __DEFAULT_FN_ATTRS128 _mm_reduce_and_epi16(__m128i __W) { return __builtin_reduce_and((__v8hi)__W); } static __inline__ short __DEFAULT_FN_ATTRS128 _mm_reduce_or_epi16(__m128i __W) { return __builtin_reduce_or((__v8hi)__W); } static __inline__ short __DEFAULT_FN_ATTRS128 _mm_mask_reduce_add_epi16( __mmask8 __M, __m128i __W) { __W = _mm_maskz_mov_epi16(__M, __W); return __builtin_reduce_add((__v8hi)__W); } static __inline__ short __DEFAULT_FN_ATTRS128 _mm_mask_reduce_mul_epi16( __mmask8 __M, __m128i __W) { __W = _mm_mask_mov_epi16(_mm_set1_epi16(1), __M, __W); return __builtin_reduce_mul((__v8hi)__W); } static __inline__ short __DEFAULT_FN_ATTRS128 _mm_mask_reduce_and_epi16( __mmask8 __M, __m128i __W) { __W = _mm_mask_mov_epi16(_mm_set1_epi16(-1), __M, __W); return __builtin_reduce_and((__v8hi)__W); } static __inline__ short __DEFAULT_FN_ATTRS128 _mm_mask_reduce_or_epi16(__mmask8 __M, __m128i __W) { __W = _mm_maskz_mov_epi16(__M, __W); return __builtin_reduce_or((__v8hi)__W); } static __inline__ short __DEFAULT_FN_ATTRS128 _mm_reduce_max_epi16(__m128i __V) { return __builtin_reduce_max((__v8hi)__V); } static __inline__ unsigned short __DEFAULT_FN_ATTRS128 _mm_reduce_max_epu16(__m128i __V) { return __builtin_reduce_max((__v8hu)__V); } static __inline__ short __DEFAULT_FN_ATTRS128 _mm_reduce_min_epi16(__m128i __V) { return __builtin_reduce_min((__v8hi)__V); } static __inline__ unsigned short __DEFAULT_FN_ATTRS128 _mm_reduce_min_epu16(__m128i __V) { return __builtin_reduce_min((__v8hu)__V); } static __inline__ short __DEFAULT_FN_ATTRS128 _mm_mask_reduce_max_epi16(__mmask16 __M, __m128i __V) { __V = _mm_mask_mov_epi16(_mm_set1_epi16(-32767-1), __M, __V); return __builtin_reduce_max((__v8hi)__V); } static __inline__ unsigned short __DEFAULT_FN_ATTRS128 _mm_mask_reduce_max_epu16(__mmask16 __M, __m128i __V) { __V = _mm_maskz_mov_epi16(__M, __V); return __builtin_reduce_max((__v8hu)__V); } static __inline__ short __DEFAULT_FN_ATTRS128 _mm_mask_reduce_min_epi16(__mmask16 __M, __m128i __V) { __V = _mm_mask_mov_epi16(_mm_set1_epi16(32767), __M, __V); return __builtin_reduce_min((__v8hi)__V); } static __inline__ unsigned short __DEFAULT_FN_ATTRS128 _mm_mask_reduce_min_epu16(__mmask16 __M, __m128i __V) { __V = _mm_mask_mov_epi16(_mm_set1_epi16(-1), __M, __V); return __builtin_reduce_min((__v8hu)__V); } static __inline__ short __DEFAULT_FN_ATTRS256 _mm256_reduce_add_epi16(__m256i __W) { return __builtin_reduce_add((__v16hi)__W); } static __inline__ short __DEFAULT_FN_ATTRS256 _mm256_reduce_mul_epi16(__m256i __W) { return __builtin_reduce_mul((__v16hi)__W); } static __inline__ short __DEFAULT_FN_ATTRS256 _mm256_reduce_and_epi16(__m256i __W) { return __builtin_reduce_and((__v16hi)__W); } static __inline__ short __DEFAULT_FN_ATTRS256 _mm256_reduce_or_epi16(__m256i __W) { return __builtin_reduce_or((__v16hi)__W); } static __inline__ short __DEFAULT_FN_ATTRS256 _mm256_mask_reduce_add_epi16( __mmask16 __M, __m256i __W) { __W = _mm256_maskz_mov_epi16(__M, __W); return __builtin_reduce_add((__v16hi)__W); } static __inline__ short __DEFAULT_FN_ATTRS256 _mm256_mask_reduce_mul_epi16( __mmask16 __M, __m256i __W) { __W = _mm256_mask_mov_epi16(_mm256_set1_epi16(1), __M, __W); return __builtin_reduce_mul((__v16hi)__W); } static __inline__ short __DEFAULT_FN_ATTRS256 _mm256_mask_reduce_and_epi16( __mmask16 __M, __m256i __W) { __W = _mm256_mask_mov_epi16(_mm256_set1_epi16(-1), __M, __W); return __builtin_reduce_and((__v16hi)__W); } static __inline__ short __DEFAULT_FN_ATTRS256 _mm256_mask_reduce_or_epi16(__mmask16 __M, __m256i __W) { __W = _mm256_maskz_mov_epi16(__M, __W); return __builtin_reduce_or((__v16hi)__W); } static __inline__ short __DEFAULT_FN_ATTRS256 _mm256_reduce_max_epi16(__m256i __V) { return __builtin_reduce_max((__v16hi)__V); } static __inline__ unsigned short __DEFAULT_FN_ATTRS256 _mm256_reduce_max_epu16(__m256i __V) { return __builtin_reduce_max((__v16hu)__V); } static __inline__ short __DEFAULT_FN_ATTRS256 _mm256_reduce_min_epi16(__m256i __V) { return __builtin_reduce_min((__v16hi)__V); } static __inline__ unsigned short __DEFAULT_FN_ATTRS256 _mm256_reduce_min_epu16(__m256i __V) { return __builtin_reduce_min((__v16hu)__V); } static __inline__ short __DEFAULT_FN_ATTRS256 _mm256_mask_reduce_max_epi16(__mmask16 __M, __m256i __V) { __V = _mm256_mask_mov_epi16(_mm256_set1_epi16(-32767-1), __M, __V); return __builtin_reduce_max((__v16hi)__V); } static __inline__ unsigned short __DEFAULT_FN_ATTRS256 _mm256_mask_reduce_max_epu16(__mmask16 __M, __m256i __V) { __V = _mm256_maskz_mov_epi16(__M, __V); return __builtin_reduce_max((__v16hu)__V); } static __inline__ short __DEFAULT_FN_ATTRS256 _mm256_mask_reduce_min_epi16(__mmask16 __M, __m256i __V) { __V = _mm256_mask_mov_epi16(_mm256_set1_epi16(32767), __M, __V); return __builtin_reduce_min((__v16hi)__V); } static __inline__ unsigned short __DEFAULT_FN_ATTRS256 _mm256_mask_reduce_min_epu16(__mmask16 __M, __m256i __V) { __V = _mm256_mask_mov_epi16(_mm256_set1_epi16(-1), __M, __V); return __builtin_reduce_min((__v16hu)__V); } static __inline__ signed char __DEFAULT_FN_ATTRS128 _mm_reduce_add_epi8(__m128i __W) { return __builtin_reduce_add((__v16qs)__W); } static __inline__ signed char __DEFAULT_FN_ATTRS128 _mm_reduce_mul_epi8(__m128i __W) { return __builtin_reduce_mul((__v16qs)__W); } static __inline__ signed char __DEFAULT_FN_ATTRS128 _mm_reduce_and_epi8(__m128i __W) { return __builtin_reduce_and((__v16qs)__W); } static __inline__ signed char __DEFAULT_FN_ATTRS128 _mm_reduce_or_epi8(__m128i __W) { return __builtin_reduce_or((__v16qs)__W); } static __inline__ signed char __DEFAULT_FN_ATTRS128 _mm_mask_reduce_add_epi8(__mmask16 __M, __m128i __W) { __W = _mm_maskz_mov_epi8(__M, __W); return __builtin_reduce_add((__v16qs)__W); } static __inline__ signed char __DEFAULT_FN_ATTRS128 _mm_mask_reduce_mul_epi8(__mmask16 __M, __m128i __W) { __W = _mm_mask_mov_epi8(_mm_set1_epi8(1), __M, __W); return __builtin_reduce_mul((__v16qs)__W); } static __inline__ signed char __DEFAULT_FN_ATTRS128 _mm_mask_reduce_and_epi8(__mmask16 __M, __m128i __W) { __W = _mm_mask_mov_epi8(_mm_set1_epi8(-1), __M, __W); return __builtin_reduce_and((__v16qs)__W); } static __inline__ signed char __DEFAULT_FN_ATTRS128 _mm_mask_reduce_or_epi8(__mmask16 __M, __m128i __W) { __W = _mm_maskz_mov_epi8(__M, __W); return __builtin_reduce_or((__v16qs)__W); } static __inline__ signed char __DEFAULT_FN_ATTRS128 _mm_reduce_max_epi8(__m128i __V) { return __builtin_reduce_max((__v16qs)__V); } static __inline__ unsigned char __DEFAULT_FN_ATTRS128 _mm_reduce_max_epu8(__m128i __V) { return __builtin_reduce_max((__v16qu)__V); } static __inline__ signed char __DEFAULT_FN_ATTRS128 _mm_reduce_min_epi8(__m128i __V) { return __builtin_reduce_min((__v16qs)__V); } static __inline__ unsigned char __DEFAULT_FN_ATTRS128 _mm_reduce_min_epu8(__m128i __V) { return __builtin_reduce_min((__v16qu)__V); } static __inline__ signed char __DEFAULT_FN_ATTRS128 _mm_mask_reduce_max_epi8(__mmask16 __M, __m128i __V) { __V = _mm_mask_mov_epi8(_mm_set1_epi8(-127-1), __M, __V); return __builtin_reduce_max((__v16qs)__V); } static __inline__ unsigned char __DEFAULT_FN_ATTRS128 _mm_mask_reduce_max_epu8(__mmask16 __M, __m128i __V) { __V = _mm_maskz_mov_epi8(__M, __V); return __builtin_reduce_max((__v16qu)__V); } static __inline__ signed char __DEFAULT_FN_ATTRS128 _mm_mask_reduce_min_epi8(__mmask16 __M, __m128i __V) { __V = _mm_mask_mov_epi8(_mm_set1_epi8(127), __M, __V); return __builtin_reduce_min((__v16qs)__V); } static __inline__ unsigned char __DEFAULT_FN_ATTRS128 _mm_mask_reduce_min_epu8(__mmask16 __M, __m128i __V) { __V = _mm_mask_mov_epi8(_mm_set1_epi8(-1), __M, __V); return __builtin_reduce_min((__v16qu)__V); } static __inline__ signed char __DEFAULT_FN_ATTRS256 _mm256_reduce_add_epi8(__m256i __W) { return __builtin_reduce_add((__v32qs)__W); } static __inline__ signed char __DEFAULT_FN_ATTRS256 _mm256_reduce_mul_epi8(__m256i __W) { return __builtin_reduce_mul((__v32qs)__W); } static __inline__ signed char __DEFAULT_FN_ATTRS256 _mm256_reduce_and_epi8(__m256i __W) { return __builtin_reduce_and((__v32qs)__W); } static __inline__ signed char __DEFAULT_FN_ATTRS256 _mm256_reduce_or_epi8(__m256i __W) { return __builtin_reduce_or((__v32qs)__W); } static __inline__ signed char __DEFAULT_FN_ATTRS256 _mm256_mask_reduce_add_epi8(__mmask32 __M, __m256i __W) { __W = _mm256_maskz_mov_epi8(__M, __W); return __builtin_reduce_add((__v32qs)__W); } static __inline__ signed char __DEFAULT_FN_ATTRS256 _mm256_mask_reduce_mul_epi8(__mmask32 __M, __m256i __W) { __W = _mm256_mask_mov_epi8(_mm256_set1_epi8(1), __M, __W); return __builtin_reduce_mul((__v32qs)__W); } static __inline__ signed char __DEFAULT_FN_ATTRS256 _mm256_mask_reduce_and_epi8(__mmask32 __M, __m256i __W) { __W = _mm256_mask_mov_epi8(_mm256_set1_epi8(-1), __M, __W); return __builtin_reduce_and((__v32qs)__W); } static __inline__ signed char __DEFAULT_FN_ATTRS256 _mm256_mask_reduce_or_epi8(__mmask32 __M, __m256i __W) { __W = _mm256_maskz_mov_epi8(__M, __W); return __builtin_reduce_or((__v32qs)__W); } static __inline__ signed char __DEFAULT_FN_ATTRS256 _mm256_reduce_max_epi8(__m256i __V) { return __builtin_reduce_max((__v32qs)__V); } static __inline__ unsigned char __DEFAULT_FN_ATTRS256 _mm256_reduce_max_epu8(__m256i __V) { return __builtin_reduce_max((__v32qu)__V); } static __inline__ signed char __DEFAULT_FN_ATTRS256 _mm256_reduce_min_epi8(__m256i __V) { return __builtin_reduce_min((__v32qs)__V); } static __inline__ unsigned char __DEFAULT_FN_ATTRS256 _mm256_reduce_min_epu8(__m256i __V) { return __builtin_reduce_min((__v32qu)__V); } static __inline__ signed char __DEFAULT_FN_ATTRS256 _mm256_mask_reduce_max_epi8(__mmask32 __M, __m256i __V) { __V = _mm256_mask_mov_epi8(_mm256_set1_epi8(-127-1), __M, __V); return __builtin_reduce_max((__v32qs)__V); } static __inline__ unsigned char __DEFAULT_FN_ATTRS256 _mm256_mask_reduce_max_epu8(__mmask32 __M, __m256i __V) { __V = _mm256_maskz_mov_epi8(__M, __V); return __builtin_reduce_max((__v32qu)__V); } static __inline__ signed char __DEFAULT_FN_ATTRS256 _mm256_mask_reduce_min_epi8(__mmask32 __M, __m256i __V) { __V = _mm256_mask_mov_epi8(_mm256_set1_epi8(127), __M, __V); return __builtin_reduce_min((__v32qs)__V); } static __inline__ unsigned char __DEFAULT_FN_ATTRS256 _mm256_mask_reduce_min_epu8(__mmask32 __M, __m256i __V) { __V = _mm256_mask_mov_epi8(_mm256_set1_epi8(-1), __M, __V); return __builtin_reduce_min((__v32qu)__V); } #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 #endif /* __AVX512VLBWINTRIN_H */ /*===---- htmintrin.h - Standard header for PowerPC HTM ---------------===*\ * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * \*===----------------------------------------------------------------------===*/ #ifndef __HTMINTRIN_H #define __HTMINTRIN_H #ifndef __HTM__ #error "HTM instruction set not enabled" #endif #ifdef __powerpc__ #include typedef uint64_t texasr_t; typedef uint32_t texasru_t; typedef uint32_t texasrl_t; typedef uintptr_t tfiar_t; typedef uintptr_t tfhar_t; #define _HTM_STATE(CR0) ((CR0 >> 1) & 0x3) #define _HTM_NONTRANSACTIONAL 0x0 #define _HTM_SUSPENDED 0x1 #define _HTM_TRANSACTIONAL 0x2 #define _TEXASR_EXTRACT_BITS(TEXASR,BITNUM,SIZE) \ (((TEXASR) >> (63-(BITNUM))) & ((1<<(SIZE))-1)) #define _TEXASRU_EXTRACT_BITS(TEXASR,BITNUM,SIZE) \ (((TEXASR) >> (31-(BITNUM))) & ((1<<(SIZE))-1)) #define _TEXASR_FAILURE_CODE(TEXASR) \ _TEXASR_EXTRACT_BITS(TEXASR, 7, 8) #define _TEXASRU_FAILURE_CODE(TEXASRU) \ _TEXASRU_EXTRACT_BITS(TEXASRU, 7, 8) #define _TEXASR_FAILURE_PERSISTENT(TEXASR) \ _TEXASR_EXTRACT_BITS(TEXASR, 7, 1) #define _TEXASRU_FAILURE_PERSISTENT(TEXASRU) \ _TEXASRU_EXTRACT_BITS(TEXASRU, 7, 1) #define _TEXASR_DISALLOWED(TEXASR) \ _TEXASR_EXTRACT_BITS(TEXASR, 8, 1) #define _TEXASRU_DISALLOWED(TEXASRU) \ _TEXASRU_EXTRACT_BITS(TEXASRU, 8, 1) #define _TEXASR_NESTING_OVERFLOW(TEXASR) \ _TEXASR_EXTRACT_BITS(TEXASR, 9, 1) #define _TEXASRU_NESTING_OVERFLOW(TEXASRU) \ _TEXASRU_EXTRACT_BITS(TEXASRU, 9, 1) #define _TEXASR_FOOTPRINT_OVERFLOW(TEXASR) \ _TEXASR_EXTRACT_BITS(TEXASR, 10, 1) #define _TEXASRU_FOOTPRINT_OVERFLOW(TEXASRU) \ _TEXASRU_EXTRACT_BITS(TEXASRU, 10, 1) #define _TEXASR_SELF_INDUCED_CONFLICT(TEXASR) \ _TEXASR_EXTRACT_BITS(TEXASR, 11, 1) #define _TEXASRU_SELF_INDUCED_CONFLICT(TEXASRU) \ _TEXASRU_EXTRACT_BITS(TEXASRU, 11, 1) #define _TEXASR_NON_TRANSACTIONAL_CONFLICT(TEXASR) \ _TEXASR_EXTRACT_BITS(TEXASR, 12, 1) #define _TEXASRU_NON_TRANSACTIONAL_CONFLICT(TEXASRU) \ _TEXASRU_EXTRACT_BITS(TEXASRU, 12, 1) #define _TEXASR_TRANSACTION_CONFLICT(TEXASR) \ _TEXASR_EXTRACT_BITS(TEXASR, 13, 1) #define _TEXASRU_TRANSACTION_CONFLICT(TEXASRU) \ _TEXASRU_EXTRACT_BITS(TEXASRU, 13, 1) #define _TEXASR_TRANSLATION_INVALIDATION_CONFLICT(TEXASR) \ _TEXASR_EXTRACT_BITS(TEXASR, 14, 1) #define _TEXASRU_TRANSLATION_INVALIDATION_CONFLICT(TEXASRU) \ _TEXASRU_EXTRACT_BITS(TEXASRU, 14, 1) #define _TEXASR_IMPLEMENTAION_SPECIFIC(TEXASR) \ _TEXASR_EXTRACT_BITS(TEXASR, 15, 1) #define _TEXASRU_IMPLEMENTAION_SPECIFIC(TEXASRU) \ _TEXASRU_EXTRACT_BITS(TEXASRU, 15, 1) #define _TEXASR_INSTRUCTION_FETCH_CONFLICT(TEXASR) \ _TEXASR_EXTRACT_BITS(TEXASR, 16, 1) #define _TEXASRU_INSTRUCTION_FETCH_CONFLICT(TEXASRU) \ _TEXASRU_EXTRACT_BITS(TEXASRU, 16, 1) #define _TEXASR_ABORT(TEXASR) \ _TEXASR_EXTRACT_BITS(TEXASR, 31, 1) #define _TEXASRU_ABORT(TEXASRU) \ _TEXASRU_EXTRACT_BITS(TEXASRU, 31, 1) #define _TEXASR_SUSPENDED(TEXASR) \ _TEXASR_EXTRACT_BITS(TEXASR, 32, 1) #define _TEXASR_PRIVILEGE(TEXASR) \ _TEXASR_EXTRACT_BITS(TEXASR, 35, 2) #define _TEXASR_FAILURE_SUMMARY(TEXASR) \ _TEXASR_EXTRACT_BITS(TEXASR, 36, 1) #define _TEXASR_TFIAR_EXACT(TEXASR) \ _TEXASR_EXTRACT_BITS(TEXASR, 37, 1) #define _TEXASR_ROT(TEXASR) \ _TEXASR_EXTRACT_BITS(TEXASR, 38, 1) #define _TEXASR_TRANSACTION_LEVEL(TEXASR) \ _TEXASR_EXTRACT_BITS(TEXASR, 63, 12) #endif /* __powerpc */ #ifdef __s390__ /* Condition codes generated by tbegin */ #define _HTM_TBEGIN_STARTED 0 #define _HTM_TBEGIN_INDETERMINATE 1 #define _HTM_TBEGIN_TRANSIENT 2 #define _HTM_TBEGIN_PERSISTENT 3 /* The abort codes below this threshold are reserved for machine use. */ #define _HTM_FIRST_USER_ABORT_CODE 256 /* The transaction diagnostic block is it is defined in the Principles of Operation chapter 5-91. */ struct __htm_tdb { unsigned char format; /* 0 */ unsigned char flags; unsigned char reserved1[4]; unsigned short nesting_depth; unsigned long long abort_code; /* 8 */ unsigned long long conflict_token; /* 16 */ unsigned long long atia; /* 24 */ unsigned char eaid; /* 32 */ unsigned char dxc; unsigned char reserved2[2]; unsigned int program_int_id; unsigned long long exception_id; /* 40 */ unsigned long long bea; /* 48 */ unsigned char reserved3[72]; /* 56 */ unsigned long long gprs[16]; /* 128 */ } __attribute__((__packed__, __aligned__ (8))); /* Helper intrinsics to retry tbegin in case of transient failure. */ static __inline int __attribute__((__always_inline__, __nodebug__)) __builtin_tbegin_retry_null (int __retry) { int cc, i = 0; while ((cc = __builtin_tbegin(0)) == _HTM_TBEGIN_TRANSIENT && i++ < __retry) __builtin_tx_assist(i); return cc; } static __inline int __attribute__((__always_inline__, __nodebug__)) __builtin_tbegin_retry_tdb (void *__tdb, int __retry) { int cc, i = 0; while ((cc = __builtin_tbegin(__tdb)) == _HTM_TBEGIN_TRANSIENT && i++ < __retry) __builtin_tx_assist(i); return cc; } #define __builtin_tbegin_retry(tdb, retry) \ (__builtin_constant_p(tdb == 0) && tdb == 0 ? \ __builtin_tbegin_retry_null(retry) : \ __builtin_tbegin_retry_tdb(tdb, retry)) static __inline int __attribute__((__always_inline__, __nodebug__)) __builtin_tbegin_retry_nofloat_null (int __retry) { int cc, i = 0; while ((cc = __builtin_tbegin_nofloat(0)) == _HTM_TBEGIN_TRANSIENT && i++ < __retry) __builtin_tx_assist(i); return cc; } static __inline int __attribute__((__always_inline__, __nodebug__)) __builtin_tbegin_retry_nofloat_tdb (void *__tdb, int __retry) { int cc, i = 0; while ((cc = __builtin_tbegin_nofloat(__tdb)) == _HTM_TBEGIN_TRANSIENT && i++ < __retry) __builtin_tx_assist(i); return cc; } #define __builtin_tbegin_retry_nofloat(tdb, retry) \ (__builtin_constant_p(tdb == 0) && tdb == 0 ? \ __builtin_tbegin_retry_nofloat_null(retry) : \ __builtin_tbegin_retry_nofloat_tdb(tdb, retry)) #endif /* __s390__ */ #endif /* __HTMINTRIN_H */ ia32intrin.h/* ===-------- intrin.h ---------------------------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ /* Only include this if we're compiling for the windows platform. */ #ifndef _MSC_VER #include_next #else #ifndef __INTRIN_H #define __INTRIN_H /* First include the standard intrinsics. */ #if defined(__i386__) || defined(__x86_64__) #include #endif #if defined(__arm__) #include #endif #if defined(__aarch64__) #include #endif /* For the definition of jmp_buf. */ #if __STDC_HOSTED__ #include #endif /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) #if __x86_64__ #define __LPTRINT_TYPE__ __int64 #else #define __LPTRINT_TYPE__ long #endif #ifdef __cplusplus extern "C" { #endif #if defined(__MMX__) /* And the random ones that aren't in those files. */ __m64 _m_from_float(float); float _m_to_float(__m64); #endif /* Other assorted instruction intrinsics. */ void __addfsbyte(unsigned long, unsigned char); void __addfsdword(unsigned long, unsigned long); void __addfsword(unsigned long, unsigned short); void __code_seg(const char *); void __cpuid(int[4], int); void __cpuidex(int[4], int, int); __int64 __emul(int, int); unsigned __int64 __emulu(unsigned int, unsigned int); unsigned int __getcallerseflags(void); void __halt(void); unsigned char __inbyte(unsigned short); void __inbytestring(unsigned short, unsigned char *, unsigned long); void __incfsbyte(unsigned long); void __incfsdword(unsigned long); void __incfsword(unsigned long); unsigned long __indword(unsigned short); void __indwordstring(unsigned short, unsigned long *, unsigned long); void __int2c(void); void __invlpg(void *); unsigned short __inword(unsigned short); void __inwordstring(unsigned short, unsigned short *, unsigned long); void __lidt(void *); unsigned __int64 __ll_lshift(unsigned __int64, int); __int64 __ll_rshift(__int64, int); void __movsb(unsigned char *, unsigned char const *, size_t); void __movsd(unsigned long *, unsigned long const *, size_t); void __movsw(unsigned short *, unsigned short const *, size_t); void __nop(void); void __nvreg_restore_fence(void); void __nvreg_save_fence(void); void __outbyte(unsigned short, unsigned char); void __outbytestring(unsigned short, unsigned char *, unsigned long); void __outdword(unsigned short, unsigned long); void __outdwordstring(unsigned short, unsigned long *, unsigned long); void __outword(unsigned short, unsigned short); void __outwordstring(unsigned short, unsigned short *, unsigned long); unsigned long __readcr0(void); unsigned long __readcr2(void); unsigned __LPTRINT_TYPE__ __readcr3(void); unsigned long __readcr4(void); unsigned long __readcr8(void); unsigned int __readdr(unsigned int); #ifdef __i386__ unsigned char __readfsbyte(unsigned long); unsigned short __readfsword(unsigned long); unsigned long __readfsdword(unsigned long); unsigned __int64 __readfsqword(unsigned long); #endif unsigned __int64 __readmsr(unsigned long); unsigned __int64 __readpmc(unsigned long); unsigned long __segmentlimit(unsigned long); void __sidt(void *); void __stosb(unsigned char *, unsigned char, size_t); void __stosd(unsigned long *, unsigned long, size_t); void __stosw(unsigned short *, unsigned short, size_t); void __svm_clgi(void); void __svm_invlpga(void *, int); void __svm_skinit(int); void __svm_stgi(void); void __svm_vmload(size_t); void __svm_vmrun(size_t); void __svm_vmsave(size_t); void __ud2(void); unsigned __int64 __ull_rshift(unsigned __int64, int); void __vmx_off(void); void __vmx_vmptrst(unsigned __int64 *); void __wbinvd(void); void __writecr0(unsigned int); void __writecr3(unsigned __INTPTR_TYPE__); void __writecr4(unsigned int); void __writecr8(unsigned int); void __writedr(unsigned int, unsigned int); void __writefsbyte(unsigned long, unsigned char); void __writefsdword(unsigned long, unsigned long); void __writefsqword(unsigned long, unsigned __int64); void __writefsword(unsigned long, unsigned short); void __writemsr(unsigned long, unsigned __int64); void *_AddressOfReturnAddress(void); unsigned char _BitScanForward(unsigned long *_Index, unsigned long _Mask); unsigned char _BitScanReverse(unsigned long *_Index, unsigned long _Mask); unsigned char _bittest(long const *, long); unsigned char _bittestandcomplement(long *, long); unsigned char _bittestandreset(long *, long); unsigned char _bittestandset(long *, long); void __cdecl _disable(void); void __cdecl _enable(void); long _InterlockedAddLargeStatistic(__int64 volatile *_Addend, long _Value); unsigned char _interlockedbittestandreset(long volatile *, long); unsigned char _interlockedbittestandset(long volatile *, long); void *_InterlockedCompareExchangePointer_HLEAcquire(void *volatile *, void *, void *); void *_InterlockedCompareExchangePointer_HLERelease(void *volatile *, void *, void *); long _InterlockedExchangeAdd_HLEAcquire(long volatile *, long); long _InterlockedExchangeAdd_HLERelease(long volatile *, long); __int64 _InterlockedExchangeAdd64_HLEAcquire(__int64 volatile *, __int64); __int64 _InterlockedExchangeAdd64_HLERelease(__int64 volatile *, __int64); void _ReadBarrier(void); void _ReadWriteBarrier(void); unsigned int _rorx_u32(unsigned int, const unsigned int); int _sarx_i32(int, unsigned int); #if __STDC_HOSTED__ int __cdecl _setjmp(jmp_buf); #endif unsigned int _shlx_u32(unsigned int, unsigned int); unsigned int _shrx_u32(unsigned int, unsigned int); void _Store_HLERelease(long volatile *, long); void _Store64_HLERelease(__int64 volatile *, __int64); void _StorePointer_HLERelease(void *volatile *, void *); void _WriteBarrier(void); unsigned __int32 xbegin(void); void _xend(void); /* These additional intrinsics are turned on in x64/amd64/x86_64 mode. */ #ifdef __x86_64__ void __addgsbyte(unsigned long, unsigned char); void __addgsdword(unsigned long, unsigned long); void __addgsqword(unsigned long, unsigned __int64); void __addgsword(unsigned long, unsigned short); void __faststorefence(void); void __incgsbyte(unsigned long); void __incgsdword(unsigned long); void __incgsqword(unsigned long); void __incgsword(unsigned long); void __movsq(unsigned long long *, unsigned long long const *, size_t); unsigned char __readgsbyte(unsigned long); unsigned long __readgsdword(unsigned long); unsigned __int64 __readgsqword(unsigned long); unsigned short __readgsword(unsigned long); unsigned __int64 __shiftleft128(unsigned __int64 _LowPart, unsigned __int64 _HighPart, unsigned char _Shift); unsigned __int64 __shiftright128(unsigned __int64 _LowPart, unsigned __int64 _HighPart, unsigned char _Shift); void __stosq(unsigned __int64 *, unsigned __int64, size_t); unsigned char __vmx_on(unsigned __int64 *); unsigned char __vmx_vmclear(unsigned __int64 *); unsigned char __vmx_vmlaunch(void); unsigned char __vmx_vmptrld(unsigned __int64 *); unsigned char __vmx_vmread(size_t, size_t *); unsigned char __vmx_vmresume(void); unsigned char __vmx_vmwrite(size_t, size_t); void __writegsbyte(unsigned long, unsigned char); void __writegsdword(unsigned long, unsigned long); void __writegsqword(unsigned long, unsigned __int64); void __writegsword(unsigned long, unsigned short); unsigned char _bittest64(__int64 const *, __int64); unsigned char _bittestandcomplement64(__int64 *, __int64); unsigned char _bittestandreset64(__int64 *, __int64); unsigned char _bittestandset64(__int64 *, __int64); long _InterlockedAnd_np(long volatile *_Value, long _Mask); short _InterlockedAnd16_np(short volatile *_Value, short _Mask); __int64 _InterlockedAnd64_np(__int64 volatile *_Value, __int64 _Mask); char _InterlockedAnd8_np(char volatile *_Value, char _Mask); unsigned char _interlockedbittestandreset64(__int64 volatile *, __int64); unsigned char _interlockedbittestandset64(__int64 volatile *, __int64); long _InterlockedCompareExchange_np(long volatile *_Destination, long _Exchange, long _Comparand); unsigned char _InterlockedCompareExchange128_np(__int64 volatile *_Destination, __int64 _ExchangeHigh, __int64 _ExchangeLow, __int64 *_ComparandResult); short _InterlockedCompareExchange16_np(short volatile *_Destination, short _Exchange, short _Comparand); __int64 _InterlockedCompareExchange64_np(__int64 volatile *_Destination, __int64 _Exchange, __int64 _Comparand); void *_InterlockedCompareExchangePointer_np(void *volatile *_Destination, void *_Exchange, void *_Comparand); long _InterlockedOr_np(long volatile *_Value, long _Mask); short _InterlockedOr16_np(short volatile *_Value, short _Mask); __int64 _InterlockedOr64_np(__int64 volatile *_Value, __int64 _Mask); char _InterlockedOr8_np(char volatile *_Value, char _Mask); long _InterlockedXor_np(long volatile *_Value, long _Mask); short _InterlockedXor16_np(short volatile *_Value, short _Mask); __int64 _InterlockedXor64_np(__int64 volatile *_Value, __int64 _Mask); char _InterlockedXor8_np(char volatile *_Value, char _Mask); unsigned __int64 _rorx_u64(unsigned __int64, const unsigned int); __int64 _sarx_i64(__int64, unsigned int); unsigned __int64 _shlx_u64(unsigned __int64, unsigned int); unsigned __int64 _shrx_u64(unsigned __int64, unsigned int); __int64 __mulh(__int64, __int64); unsigned __int64 __umulh(unsigned __int64, unsigned __int64); __int64 _mul128(__int64, __int64, __int64*); unsigned __int64 _umul128(unsigned __int64, unsigned __int64, unsigned __int64*); #endif /* __x86_64__ */ #if defined(__x86_64__) || defined(__arm__) || defined(__aarch64__) unsigned char _BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask); unsigned char _BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask); #endif #if defined(__i386__) || defined(__x86_64__) || defined(__arm__) || defined(__aarch64__) __int64 _InterlockedDecrement64(__int64 volatile *_Addend); __int64 _InterlockedExchange64(__int64 volatile *_Target, __int64 _Value); __int64 _InterlockedExchangeAdd64(__int64 volatile *_Addend, __int64 _Value); __int64 _InterlockedExchangeSub64(__int64 volatile *_Subend, __int64 _Value); __int64 _InterlockedIncrement64(__int64 volatile *_Addend); __int64 _InterlockedOr64(__int64 volatile *_Value, __int64 _Mask); __int64 _InterlockedXor64(__int64 volatile *_Value, __int64 _Mask); __int64 _InterlockedAnd64(__int64 volatile *_Value, __int64 _Mask); #endif /*----------------------------------------------------------------------------*\ |* Interlocked Exchange Add \*----------------------------------------------------------------------------*/ #if defined(__arm__) || defined(__aarch64__) char _InterlockedExchangeAdd8_acq(char volatile *_Addend, char _Value); char _InterlockedExchangeAdd8_nf(char volatile *_Addend, char _Value); char _InterlockedExchangeAdd8_rel(char volatile *_Addend, char _Value); short _InterlockedExchangeAdd16_acq(short volatile *_Addend, short _Value); short _InterlockedExchangeAdd16_nf(short volatile *_Addend, short _Value); short _InterlockedExchangeAdd16_rel(short volatile *_Addend, short _Value); long _InterlockedExchangeAdd_acq(long volatile *_Addend, long _Value); long _InterlockedExchangeAdd_nf(long volatile *_Addend, long _Value); long _InterlockedExchangeAdd_rel(long volatile *_Addend, long _Value); __int64 _InterlockedExchangeAdd64_acq(__int64 volatile *_Addend, __int64 _Value); __int64 _InterlockedExchangeAdd64_nf(__int64 volatile *_Addend, __int64 _Value); __int64 _InterlockedExchangeAdd64_rel(__int64 volatile *_Addend, __int64 _Value); #endif /*----------------------------------------------------------------------------*\ |* Interlocked Increment \*----------------------------------------------------------------------------*/ #if defined(__arm__) || defined(__aarch64__) short _InterlockedIncrement16_acq(short volatile *_Value); short _InterlockedIncrement16_nf(short volatile *_Value); short _InterlockedIncrement16_rel(short volatile *_Value); long _InterlockedIncrement_acq(long volatile *_Value); long _InterlockedIncrement_nf(long volatile *_Value); long _InterlockedIncrement_rel(long volatile *_Value); __int64 _InterlockedIncrement64_acq(__int64 volatile *_Value); __int64 _InterlockedIncrement64_nf(__int64 volatile *_Value); __int64 _InterlockedIncrement64_rel(__int64 volatile *_Value); #endif /*----------------------------------------------------------------------------*\ |* Interlocked Decrement \*----------------------------------------------------------------------------*/ #if defined(__arm__) || defined(__aarch64__) short _InterlockedDecrement16_acq(short volatile *_Value); short _InterlockedDecrement16_nf(short volatile *_Value); short _InterlockedDecrement16_rel(short volatile *_Value); long _InterlockedDecrement_acq(long volatile *_Value); long _InterlockedDecrement_nf(long volatile *_Value); long _InterlockedDecrement_rel(long volatile *_Value); __int64 _InterlockedDecrement64_acq(__int64 volatile *_Value); __int64 _InterlockedDecrement64_nf(__int64 volatile *_Value); __int64 _InterlockedDecrement64_rel(__int64 volatile *_Value); #endif /*----------------------------------------------------------------------------*\ |* Interlocked And \*----------------------------------------------------------------------------*/ #if defined(__arm__) || defined(__aarch64__) char _InterlockedAnd8_acq(char volatile *_Value, char _Mask); char _InterlockedAnd8_nf(char volatile *_Value, char _Mask); char _InterlockedAnd8_rel(char volatile *_Value, char _Mask); short _InterlockedAnd16_acq(short volatile *_Value, short _Mask); short _InterlockedAnd16_nf(short volatile *_Value, short _Mask); short _InterlockedAnd16_rel(short volatile *_Value, short _Mask); long _InterlockedAnd_acq(long volatile *_Value, long _Mask); long _InterlockedAnd_nf(long volatile *_Value, long _Mask); long _InterlockedAnd_rel(long volatile *_Value, long _Mask); __int64 _InterlockedAnd64_acq(__int64 volatile *_Value, __int64 _Mask); __int64 _InterlockedAnd64_nf(__int64 volatile *_Value, __int64 _Mask); __int64 _InterlockedAnd64_rel(__int64 volatile *_Value, __int64 _Mask); #endif /*----------------------------------------------------------------------------*\ |* Bit Counting and Testing \*----------------------------------------------------------------------------*/ #if defined(__arm__) || defined(__aarch64__) unsigned char _interlockedbittestandset_acq(long volatile *_BitBase, long _BitPos); unsigned char _interlockedbittestandset_nf(long volatile *_BitBase, long _BitPos); unsigned char _interlockedbittestandset_rel(long volatile *_BitBase, long _BitPos); unsigned char _interlockedbittestandreset_acq(long volatile *_BitBase, long _BitPos); unsigned char _interlockedbittestandreset_nf(long volatile *_BitBase, long _BitPos); unsigned char _interlockedbittestandreset_rel(long volatile *_BitBase, long _BitPos); #endif /*----------------------------------------------------------------------------*\ |* Interlocked Or \*----------------------------------------------------------------------------*/ #if defined(__arm__) || defined(__aarch64__) char _InterlockedOr8_acq(char volatile *_Value, char _Mask); char _InterlockedOr8_nf(char volatile *_Value, char _Mask); char _InterlockedOr8_rel(char volatile *_Value, char _Mask); short _InterlockedOr16_acq(short volatile *_Value, short _Mask); short _InterlockedOr16_nf(short volatile *_Value, short _Mask); short _InterlockedOr16_rel(short volatile *_Value, short _Mask); long _InterlockedOr_acq(long volatile *_Value, long _Mask); long _InterlockedOr_nf(long volatile *_Value, long _Mask); long _InterlockedOr_rel(long volatile *_Value, long _Mask); __int64 _InterlockedOr64_acq(__int64 volatile *_Value, __int64 _Mask); __int64 _InterlockedOr64_nf(__int64 volatile *_Value, __int64 _Mask); __int64 _InterlockedOr64_rel(__int64 volatile *_Value, __int64 _Mask); #endif /*----------------------------------------------------------------------------*\ |* Interlocked Xor \*----------------------------------------------------------------------------*/ #if defined(__arm__) || defined(__aarch64__) char _InterlockedXor8_acq(char volatile *_Value, char _Mask); char _InterlockedXor8_nf(char volatile *_Value, char _Mask); char _InterlockedXor8_rel(char volatile *_Value, char _Mask); short _InterlockedXor16_acq(short volatile *_Value, short _Mask); short _InterlockedXor16_nf(short volatile *_Value, short _Mask); short _InterlockedXor16_rel(short volatile *_Value, short _Mask); long _InterlockedXor_acq(long volatile *_Value, long _Mask); long _InterlockedXor_nf(long volatile *_Value, long _Mask); long _InterlockedXor_rel(long volatile *_Value, long _Mask); __int64 _InterlockedXor64_acq(__int64 volatile *_Value, __int64 _Mask); __int64 _InterlockedXor64_nf(__int64 volatile *_Value, __int64 _Mask); __int64 _InterlockedXor64_rel(__int64 volatile *_Value, __int64 _Mask); #endif /*----------------------------------------------------------------------------*\ |* Interlocked Exchange \*----------------------------------------------------------------------------*/ #if defined(__arm__) || defined(__aarch64__) char _InterlockedExchange8_acq(char volatile *_Target, char _Value); char _InterlockedExchange8_nf(char volatile *_Target, char _Value); char _InterlockedExchange8_rel(char volatile *_Target, char _Value); short _InterlockedExchange16_acq(short volatile *_Target, short _Value); short _InterlockedExchange16_nf(short volatile *_Target, short _Value); short _InterlockedExchange16_rel(short volatile *_Target, short _Value); long _InterlockedExchange_acq(long volatile *_Target, long _Value); long _InterlockedExchange_nf(long volatile *_Target, long _Value); long _InterlockedExchange_rel(long volatile *_Target, long _Value); __int64 _InterlockedExchange64_acq(__int64 volatile *_Target, __int64 _Value); __int64 _InterlockedExchange64_nf(__int64 volatile *_Target, __int64 _Value); __int64 _InterlockedExchange64_rel(__int64 volatile *_Target, __int64 _Value); #endif /*----------------------------------------------------------------------------*\ |* Interlocked Compare Exchange \*----------------------------------------------------------------------------*/ #if defined(__arm__) || defined(__aarch64__) char _InterlockedCompareExchange8_acq(char volatile *_Destination, char _Exchange, char _Comparand); char _InterlockedCompareExchange8_nf(char volatile *_Destination, char _Exchange, char _Comparand); char _InterlockedCompareExchange8_rel(char volatile *_Destination, char _Exchange, char _Comparand); short _InterlockedCompareExchange16_acq(short volatile *_Destination, short _Exchange, short _Comparand); short _InterlockedCompareExchange16_nf(short volatile *_Destination, short _Exchange, short _Comparand); short _InterlockedCompareExchange16_rel(short volatile *_Destination, short _Exchange, short _Comparand); long _InterlockedCompareExchange_acq(long volatile *_Destination, long _Exchange, long _Comparand); long _InterlockedCompareExchange_nf(long volatile *_Destination, long _Exchange, long _Comparand); long _InterlockedCompareExchange_rel(long volatile *_Destination, long _Exchange, long _Comparand); __int64 _InterlockedCompareExchange64_acq(__int64 volatile *_Destination, __int64 _Exchange, __int64 _Comparand); __int64 _InterlockedCompareExchange64_nf(__int64 volatile *_Destination, __int64 _Exchange, __int64 _Comparand); __int64 _InterlockedCompareExchange64_rel(__int64 volatile *_Destination, __int64 _Exchange, __int64 _Comparand); #endif #if defined(__x86_64__) || defined(__aarch64__) unsigned char _InterlockedCompareExchange128(__int64 volatile *_Destination, __int64 _ExchangeHigh, __int64 _ExchangeLow, __int64 *_ComparandResult); #endif #if defined(__aarch64__) unsigned char _InterlockedCompareExchange128_acq(__int64 volatile *_Destination, __int64 _ExchangeHigh, __int64 _ExchangeLow, __int64 *_ComparandResult); unsigned char _InterlockedCompareExchange128_nf(__int64 volatile *_Destination, __int64 _ExchangeHigh, __int64 _ExchangeLow, __int64 *_ComparandResult); unsigned char _InterlockedCompareExchange128_rel(__int64 volatile *_Destination, __int64 _ExchangeHigh, __int64 _ExchangeLow, __int64 *_ComparandResult); #endif /*----------------------------------------------------------------------------*\ |* movs, stos \*----------------------------------------------------------------------------*/ #if defined(__i386__) || defined(__x86_64__) static __inline__ void __DEFAULT_FN_ATTRS __movsb(unsigned char *__dst, unsigned char const *__src, size_t __n) { #if defined(__x86_64__) __asm__ __volatile__("rep movsb" : "+D"(__dst), "+S"(__src), "+c"(__n) : : "memory"); #else __asm__ __volatile__("xchg {%%esi, %1|%1, esi}\n" "rep movsb\n" "xchg {%%esi, %1|%1, esi}" : "+D"(__dst), "+r"(__src), "+c"(__n) : : "memory"); #endif } static __inline__ void __DEFAULT_FN_ATTRS __movsd(unsigned long *__dst, unsigned long const *__src, size_t __n) { #if defined(__x86_64__) __asm__ __volatile__("rep movs{l|d}" : "+D"(__dst), "+S"(__src), "+c"(__n) : : "memory"); #else __asm__ __volatile__("xchg {%%esi, %1|%1, esi}\n" "rep movs{l|d}\n" "xchg {%%esi, %1|%1, esi}" : "+D"(__dst), "+r"(__src), "+c"(__n) : : "memory"); #endif } static __inline__ void __DEFAULT_FN_ATTRS __movsw(unsigned short *__dst, unsigned short const *__src, size_t __n) { #if defined(__x86_64__) __asm__ __volatile__("rep movsw" : "+D"(__dst), "+S"(__src), "+c"(__n) : : "memory"); #else __asm__ __volatile__("xchg {%%esi, %1|%1, esi}\n" "rep movsw\n" "xchg {%%esi, %1|%1, esi}" : "+D"(__dst), "+r"(__src), "+c"(__n) : : "memory"); #endif } static __inline__ void __DEFAULT_FN_ATTRS __stosd(unsigned long *__dst, unsigned long __x, size_t __n) { __asm__ __volatile__("rep stos{l|d}" : "+D"(__dst), "+c"(__n) : "a"(__x) : "memory"); } static __inline__ void __DEFAULT_FN_ATTRS __stosw(unsigned short *__dst, unsigned short __x, size_t __n) { __asm__ __volatile__("rep stosw" : "+D"(__dst), "+c"(__n) : "a"(__x) : "memory"); } #endif #ifdef __x86_64__ static __inline__ void __DEFAULT_FN_ATTRS __movsq( unsigned long long *__dst, unsigned long long const *__src, size_t __n) { __asm__ __volatile__("rep movsq" : "+D"(__dst), "+S"(__src), "+c"(__n) : : "memory"); } static __inline__ void __DEFAULT_FN_ATTRS __stosq(unsigned __int64 *__dst, unsigned __int64 __x, size_t __n) { __asm__ __volatile__("rep stosq" : "+D"(__dst), "+c"(__n) : "a"(__x) : "memory"); } #endif /*----------------------------------------------------------------------------*\ |* Misc \*----------------------------------------------------------------------------*/ #if defined(__i386__) || defined(__x86_64__) static __inline__ void __DEFAULT_FN_ATTRS __halt(void) { __asm__ volatile("hlt"); } #endif #if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__) static __inline__ void __DEFAULT_FN_ATTRS __nop(void) { __asm__ volatile("nop"); } #endif /*----------------------------------------------------------------------------*\ |* MS AArch64 specific \*----------------------------------------------------------------------------*/ #if defined(__aarch64__) unsigned __int64 __getReg(int); long _InterlockedAdd(long volatile *Addend, long Value); __int64 _InterlockedAdd64(__int64 volatile *Addend, __int64 Value); __int64 _ReadStatusReg(int); void _WriteStatusReg(int, __int64); unsigned short __cdecl _byteswap_ushort(unsigned short val); unsigned long __cdecl _byteswap_ulong (unsigned long val); unsigned __int64 __cdecl _byteswap_uint64(unsigned __int64 val); __int64 __mulh(__int64 __a, __int64 __b); unsigned __int64 __umulh(unsigned __int64 __a, unsigned __int64 __b); void __break(int); void __writex18byte(unsigned long offset, unsigned char data); void __writex18word(unsigned long offset, unsigned short data); void __writex18dword(unsigned long offset, unsigned long data); void __writex18qword(unsigned long offset, unsigned __int64 data); unsigned char __readx18byte(unsigned long offset); unsigned short __readx18word(unsigned long offset); unsigned long __readx18dword(unsigned long offset); unsigned __int64 __readx18qword(unsigned long offset); double _CopyDoubleFromInt64(__int64); float _CopyFloatFromInt32(__int32); __int32 _CopyInt32FromFloat(float); __int64 _CopyInt64FromDouble(double); unsigned int _CountLeadingOnes(unsigned long); unsigned int _CountLeadingOnes64(unsigned __int64); unsigned int _CountLeadingSigns(long); unsigned int _CountLeadingSigns64(__int64); unsigned int _CountLeadingZeros(unsigned long); unsigned int _CountLeadingZeros64(unsigned _int64); unsigned int _CountOneBits(unsigned long); unsigned int _CountOneBits64(unsigned __int64); void __cdecl __prefetch(void *); #endif /*----------------------------------------------------------------------------*\ |* Privileged intrinsics \*----------------------------------------------------------------------------*/ #if defined(__i386__) || defined(__x86_64__) static __inline__ unsigned __int64 __DEFAULT_FN_ATTRS __readmsr(unsigned long __register) { // Loads the contents of a 64-bit model specific register (MSR) specified in // the ECX register into registers EDX:EAX. The EDX register is loaded with // the high-order 32 bits of the MSR and the EAX register is loaded with the // low-order 32 bits. If less than 64 bits are implemented in the MSR being // read, the values returned to EDX:EAX in unimplemented bit locations are // undefined. unsigned long __edx; unsigned long __eax; __asm__ ("rdmsr" : "=d"(__edx), "=a"(__eax) : "c"(__register)); return (((unsigned __int64)__edx) << 32) | (unsigned __int64)__eax; } #endif static __inline__ unsigned __LPTRINT_TYPE__ __DEFAULT_FN_ATTRS __readcr3(void) { unsigned __LPTRINT_TYPE__ __cr3_val; __asm__ __volatile__( "mov {%%cr3, %0|%0, cr3}" : "=r"(__cr3_val) : : "memory"); return __cr3_val; } static __inline__ void __DEFAULT_FN_ATTRS __writecr3(unsigned __INTPTR_TYPE__ __cr3_val) { __asm__ ("mov {%0, %%cr3|cr3, %0}" : : "r"(__cr3_val) : "memory"); } #ifdef __cplusplus } #endif #undef __LPTRINT_TYPE__ #undef __DEFAULT_FN_ATTRS #endif /* __INTRIN_H */ #endif /* _MSC_VER */ wasm_simd128.h//===-- Wrapper for C standard inttypes.h declarations on the GPU ---------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef __CLANG_LLVM_LIBC_WRAPPERS_INTTYPES_H__ #define __CLANG_LLVM_LIBC_WRAPPERS_INTTYPES_H__ #if !defined(_OPENMP) && !defined(__HIP__) && !defined(__CUDA__) #error "This file is for GPU offloading compilation only" #endif #include_next #if __has_include() #if defined(__HIP__) || defined(__CUDA__) #define __LIBC_ATTRS __attribute__((device)) #endif #pragma omp begin declare target #include #pragma omp end declare target #undef __LIBC_ATTRS #endif #endif // __CLANG_LLVM_LIBC_WRAPPERS_INTTYPES_H__ (size=VName rule is missing its template.root/files/Rewrite schema requests %d matches, but the regexp only has %d parenthesized subexpressions.[:^space:]DeadState in RunStateOnByteNULL state in RunStateOnByteBad args: nsubmatch=external/regex-re2/re2/onepass.ccGrowStack() failed: invalid character class range[]^-\DevanagariOl_ChikiOsageSamaritanSmlevel >= 1next->header.arena == arenaexternal/abseil-cpp/absl/debugging/internal/address_is_readable.ccwchar_tunsigned shortunsigned long longpmixfalseUnwaitAccumulated %zu Mutex debug objects. If you see this in production, it may mean that the production code accidentally calls Mutex/CondVar::EnableDebugLog/EnableInvariantDebugging.Wait unblocked clock_gettime(CLOCK_REALTIME, &ts) == 0P-384SHA-256 KATBIO_LIBkythe.proto.KzipInfo.critical_kzip_errorskythe.proto.KzipInfo.CorpusInfo.LanguageSourcesEntry.keykythe.proto.common.MarkedSource.pre_textkythe.proto.Entry.fact_namec++) due to its inclusion in ) as we were unable to find a stable unique root.not_ObjCQuotedCSystemCheck failed: node.getMemoizationData() != nullptr__clang_cuda_libdevice_declares.h/*===---- __clang_hip_cmath.h - HIP cmath decls -----------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __CLANG_HIP_CMATH_H__ #define __CLANG_HIP_CMATH_H__ #if !defined(__HIP__) && !defined(__OPENMP_AMDGCN__) #error "This file is for HIP and OpenMP AMDGCN device compilation only." #endif #if !defined(__HIPCC_RTC__) #if defined(__cplusplus) #include #include #include #endif #include #include #endif // !defined(__HIPCC_RTC__) #pragma push_macro("__DEVICE__") #pragma push_macro("__CONSTEXPR__") #ifdef __OPENMP_AMDGCN__ #define __DEVICE__ static __attribute__((always_inline, nothrow)) #define __CONSTEXPR__ constexpr #else #define __DEVICE__ static __device__ inline __attribute__((always_inline)) #define __CONSTEXPR__ #endif // __OPENMP_AMDGCN__ // Start with functions that cannot be defined by DEF macros below. #if defined(__cplusplus) #if defined __OPENMP_AMDGCN__ __DEVICE__ __CONSTEXPR__ float fabs(float __x) { return ::fabsf(__x); } __DEVICE__ __CONSTEXPR__ float sin(float __x) { return ::sinf(__x); } __DEVICE__ __CONSTEXPR__ float cos(float __x) { return ::cosf(__x); } #endif __DEVICE__ __CONSTEXPR__ double abs(double __x) { return ::fabs(__x); } __DEVICE__ __CONSTEXPR__ float abs(float __x) { return ::fabsf(__x); } __DEVICE__ __CONSTEXPR__ long long abs(long long __n) { return ::llabs(__n); } __DEVICE__ __CONSTEXPR__ long abs(long __n) { return ::labs(__n); } __DEVICE__ __CONSTEXPR__ float fma(float __x, float __y, float __z) { return ::fmaf(__x, __y, __z); } #if !defined(__HIPCC_RTC__) // The value returned by fpclassify is platform dependent, therefore it is not // supported by hipRTC. __DEVICE__ __CONSTEXPR__ int fpclassify(float __x) { return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, FP_ZERO, __x); } __DEVICE__ __CONSTEXPR__ int fpclassify(double __x) { return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, FP_ZERO, __x); } #endif // !defined(__HIPCC_RTC__) __DEVICE__ __CONSTEXPR__ float frexp(float __arg, int *__exp) { return ::frexpf(__arg, __exp); } #if defined(__OPENMP_AMDGCN__) // For OpenMP we work around some old system headers that have non-conforming // `isinf(float)` and `isnan(float)` implementations that return an `int`. We do // this by providing two versions of these functions, differing only in the // return type. To avoid conflicting definitions we disable implicit base // function generation. That means we will end up with two specializations, one // per type, but only one has a base function defined by the system header. #pragma omp begin declare variant match( \ implementation = {extension(disable_implicit_base)}) // FIXME: We lack an extension to customize the mangling of the variants, e.g., // add a suffix. This means we would clash with the names of the variants // (note that we do not create implicit base functions here). To avoid // this clash we add a new trait to some of them that is always true // (this is LLVM after all ;)). It will only influence the mangled name // of the variants inside the inner region and avoid the clash. #pragma omp begin declare variant match(implementation = {vendor(llvm)}) __DEVICE__ __CONSTEXPR__ int isinf(float __x) { return ::__isinff(__x); } __DEVICE__ __CONSTEXPR__ int isinf(double __x) { return ::__isinf(__x); } __DEVICE__ __CONSTEXPR__ int isfinite(float __x) { return ::__finitef(__x); } __DEVICE__ __CONSTEXPR__ int isfinite(double __x) { return ::__finite(__x); } __DEVICE__ __CONSTEXPR__ int isnan(float __x) { return ::__isnanf(__x); } __DEVICE__ __CONSTEXPR__ int isnan(double __x) { return ::__isnan(__x); } #pragma omp end declare variant #endif // defined(__OPENMP_AMDGCN__) __DEVICE__ __CONSTEXPR__ bool isinf(float __x) { return ::__isinff(__x); } __DEVICE__ __CONSTEXPR__ bool isinf(double __x) { return ::__isinf(__x); } __DEVICE__ __CONSTEXPR__ bool isfinite(float __x) { return ::__finitef(__x); } __DEVICE__ __CONSTEXPR__ bool isfinite(double __x) { return ::__finite(__x); } __DEVICE__ __CONSTEXPR__ bool isnan(float __x) { return ::__isnanf(__x); } __DEVICE__ __CONSTEXPR__ bool isnan(double __x) { return ::__isnan(__x); } #if defined(__OPENMP_AMDGCN__) #pragma omp end declare variant #endif // defined(__OPENMP_AMDGCN__) __DEVICE__ __CONSTEXPR__ bool isgreater(float __x, float __y) { return __builtin_isgreater(__x, __y); } __DEVICE__ __CONSTEXPR__ bool isgreater(double __x, double __y) { return __builtin_isgreater(__x, __y); } __DEVICE__ __CONSTEXPR__ bool isgreaterequal(float __x, float __y) { return __builtin_isgreaterequal(__x, __y); } __DEVICE__ __CONSTEXPR__ bool isgreaterequal(double __x, double __y) { return __builtin_isgreaterequal(__x, __y); } __DEVICE__ __CONSTEXPR__ bool isless(float __x, float __y) { return __builtin_isless(__x, __y); } __DEVICE__ __CONSTEXPR__ bool isless(double __x, double __y) { return __builtin_isless(__x, __y); } __DEVICE__ __CONSTEXPR__ bool islessequal(float __x, float __y) { return __builtin_islessequal(__x, __y); } __DEVICE__ __CONSTEXPR__ bool islessequal(double __x, double __y) { return __builtin_islessequal(__x, __y); } __DEVICE__ __CONSTEXPR__ bool islessgreater(float __x, float __y) { return __builtin_islessgreater(__x, __y); } __DEVICE__ __CONSTEXPR__ bool islessgreater(double __x, double __y) { return __builtin_islessgreater(__x, __y); } __DEVICE__ __CONSTEXPR__ bool isnormal(float __x) { return __builtin_isnormal(__x); } __DEVICE__ __CONSTEXPR__ bool isnormal(double __x) { return __builtin_isnormal(__x); } __DEVICE__ __CONSTEXPR__ bool isunordered(float __x, float __y) { return __builtin_isunordered(__x, __y); } __DEVICE__ __CONSTEXPR__ bool isunordered(double __x, double __y) { return __builtin_isunordered(__x, __y); } __DEVICE__ __CONSTEXPR__ float modf(float __x, float *__iptr) { return ::modff(__x, __iptr); } __DEVICE__ __CONSTEXPR__ float pow(float __base, int __iexp) { return ::powif(__base, __iexp); } __DEVICE__ __CONSTEXPR__ double pow(double __base, int __iexp) { return ::powi(__base, __iexp); } __DEVICE__ __CONSTEXPR__ float remquo(float __x, float __y, int *__quo) { return ::remquof(__x, __y, __quo); } __DEVICE__ __CONSTEXPR__ float scalbln(float __x, long int __n) { return ::scalblnf(__x, __n); } __DEVICE__ __CONSTEXPR__ bool signbit(float __x) { return ::__signbitf(__x); } __DEVICE__ __CONSTEXPR__ bool signbit(double __x) { return ::__signbit(__x); } // Notably missing above is nexttoward. We omit it because // ocml doesn't provide an implementation, and we don't want to be in the // business of implementing tricky libm functions in this header. // Other functions. __DEVICE__ __CONSTEXPR__ _Float16 fma(_Float16 __x, _Float16 __y, _Float16 __z) { return __builtin_fmaf16(__x, __y, __z); } __DEVICE__ __CONSTEXPR__ _Float16 pow(_Float16 __base, int __iexp) { return __ocml_pown_f16(__base, __iexp); } #ifndef __OPENMP_AMDGCN__ // BEGIN DEF_FUN and HIP_OVERLOAD // BEGIN DEF_FUN #pragma push_macro("__DEF_FUN1") #pragma push_macro("__DEF_FUN2") #pragma push_macro("__DEF_FUN2_FI") // Define cmath functions with float argument and returns __retty. #define __DEF_FUN1(__retty, __func) \ __DEVICE__ __CONSTEXPR__ __retty __func(float __x) { return __func##f(__x); } // Define cmath functions with two float arguments and returns __retty. #define __DEF_FUN2(__retty, __func) \ __DEVICE__ __CONSTEXPR__ __retty __func(float __x, float __y) { \ return __func##f(__x, __y); \ } // Define cmath functions with a float and an int argument and returns __retty. #define __DEF_FUN2_FI(__retty, __func) \ __DEVICE__ __CONSTEXPR__ __retty __func(float __x, int __y) { \ return __func##f(__x, __y); \ } __DEF_FUN1(float, acos) __DEF_FUN1(float, acosh) __DEF_FUN1(float, asin) __DEF_FUN1(float, asinh) __DEF_FUN1(float, atan) __DEF_FUN2(float, atan2) __DEF_FUN1(float, atanh) __DEF_FUN1(float, cbrt) __DEF_FUN1(float, ceil) __DEF_FUN2(float, copysign) __DEF_FUN1(float, cos) __DEF_FUN1(float, cosh) __DEF_FUN1(float, erf) __DEF_FUN1(float, erfc) __DEF_FUN1(float, exp) __DEF_FUN1(float, exp2) __DEF_FUN1(float, expm1) __DEF_FUN1(float, fabs) __DEF_FUN2(float, fdim) __DEF_FUN1(float, floor) __DEF_FUN2(float, fmax) __DEF_FUN2(float, fmin) __DEF_FUN2(float, fmod) __DEF_FUN2(float, hypot) __DEF_FUN1(int, ilogb) __DEF_FUN2_FI(float, ldexp) __DEF_FUN1(float, lgamma) __DEF_FUN1(float, log) __DEF_FUN1(float, log10) __DEF_FUN1(float, log1p) __DEF_FUN1(float, log2) __DEF_FUN1(float, logb) __DEF_FUN1(long long, llrint) __DEF_FUN1(long long, llround) __DEF_FUN1(long, lrint) __DEF_FUN1(long, lround) __DEF_FUN1(float, nearbyint) __DEF_FUN2(float, nextafter) __DEF_FUN2(float, pow) __DEF_FUN2(float, remainder) __DEF_FUN1(float, rint) __DEF_FUN1(float, round) __DEF_FUN2_FI(float, scalbn) __DEF_FUN1(float, sin) __DEF_FUN1(float, sinh) __DEF_FUN1(float, sqrt) __DEF_FUN1(float, tan) __DEF_FUN1(float, tanh) __DEF_FUN1(float, tgamma) __DEF_FUN1(float, trunc) #pragma pop_macro("__DEF_FUN1") #pragma pop_macro("__DEF_FUN2") #pragma pop_macro("__DEF_FUN2_FI") // END DEF_FUN // BEGIN HIP_OVERLOAD #pragma push_macro("__HIP_OVERLOAD1") #pragma push_macro("__HIP_OVERLOAD2") // __hip_enable_if::type is a type function which returns __T if __B is true. template struct __hip_enable_if {}; template struct __hip_enable_if { typedef __T type; }; namespace __hip { template struct is_integral { enum { value = 0 }; }; template <> struct is_integral { enum { value = 1 }; }; template <> struct is_integral { enum { value = 1 }; }; template <> struct is_integral { enum { value = 1 }; }; template <> struct is_integral { enum { value = 1 }; }; template <> struct is_integral { enum { value = 1 }; }; template <> struct is_integral { enum { value = 1 }; }; template <> struct is_integral { enum { value = 1 }; }; template <> struct is_integral { enum { value = 1 }; }; template <> struct is_integral { enum { value = 1 }; }; template <> struct is_integral { enum { value = 1 }; }; template <> struct is_integral { enum { value = 1 }; }; template <> struct is_integral { enum { value = 1 }; }; template <> struct is_integral { enum { value = 1 }; }; // ToDo: specializes is_arithmetic<_Float16> template struct is_arithmetic { enum { value = 0 }; }; template <> struct is_arithmetic { enum { value = 1 }; }; template <> struct is_arithmetic { enum { value = 1 }; }; template <> struct is_arithmetic { enum { value = 1 }; }; template <> struct is_arithmetic { enum { value = 1 }; }; template <> struct is_arithmetic { enum { value = 1 }; }; template <> struct is_arithmetic { enum { value = 1 }; }; template <> struct is_arithmetic { enum { value = 1 }; }; template <> struct is_arithmetic { enum { value = 1 }; }; template <> struct is_arithmetic { enum { value = 1 }; }; template <> struct is_arithmetic { enum { value = 1 }; }; template <> struct is_arithmetic { enum { value = 1 }; }; template <> struct is_arithmetic { enum { value = 1 }; }; template <> struct is_arithmetic { enum { value = 1 }; }; template <> struct is_arithmetic { enum { value = 1 }; }; template <> struct is_arithmetic { enum { value = 1 }; }; struct true_type { static const __constant__ bool value = true; }; struct false_type { static const __constant__ bool value = false; }; template struct is_same : public false_type {}; template struct is_same<__T, __T> : public true_type {}; template struct add_rvalue_reference { typedef __T &&type; }; template typename add_rvalue_reference<__T>::type declval(); // decltype is only available in C++11 and above. #if __cplusplus >= 201103L // __hip_promote template struct __numeric_type { static void __test(...); static _Float16 __test(_Float16); static float __test(float); static double __test(char); static double __test(int); static double __test(unsigned); static double __test(long); static double __test(unsigned long); static double __test(long long); static double __test(unsigned long long); static double __test(double); // No support for long double, use double instead. static double __test(long double); typedef decltype(__test(declval<_Tp>())) type; static const bool value = !is_same::value; }; template <> struct __numeric_type { static const bool value = true; }; template ::value &&__numeric_type<_A2>::value &&__numeric_type<_A3>::value> class __promote_imp { public: static const bool value = false; }; template class __promote_imp<_A1, _A2, _A3, true> { private: typedef typename __promote_imp<_A1>::type __type1; typedef typename __promote_imp<_A2>::type __type2; typedef typename __promote_imp<_A3>::type __type3; public: typedef decltype(__type1() + __type2() + __type3()) type; static const bool value = true; }; template class __promote_imp<_A1, _A2, void, true> { private: typedef typename __promote_imp<_A1>::type __type1; typedef typename __promote_imp<_A2>::type __type2; public: typedef decltype(__type1() + __type2()) type; static const bool value = true; }; template class __promote_imp<_A1, void, void, true> { public: typedef typename __numeric_type<_A1>::type type; static const bool value = true; }; template class __promote : public __promote_imp<_A1, _A2, _A3> {}; #endif //__cplusplus >= 201103L } // namespace __hip // __HIP_OVERLOAD1 is used to resolve function calls with integer argument to // avoid compilation error due to ambibuity. e.g. floor(5) is resolved with // floor(double). #define __HIP_OVERLOAD1(__retty, __fn) \ template \ __DEVICE__ __CONSTEXPR__ \ typename __hip_enable_if<__hip::is_integral<__T>::value, __retty>::type \ __fn(__T __x) { \ return ::__fn((double)__x); \ } // __HIP_OVERLOAD2 is used to resolve function calls with mixed float/double // or integer argument to avoid compilation error due to ambibuity. e.g. // max(5.0f, 6.0) is resolved with max(double, double). #if __cplusplus >= 201103L #define __HIP_OVERLOAD2(__retty, __fn) \ template \ __DEVICE__ __CONSTEXPR__ typename __hip_enable_if< \ __hip::is_arithmetic<__T1>::value && __hip::is_arithmetic<__T2>::value, \ typename __hip::__promote<__T1, __T2>::type>::type \ __fn(__T1 __x, __T2 __y) { \ typedef typename __hip::__promote<__T1, __T2>::type __result_type; \ return __fn((__result_type)__x, (__result_type)__y); \ } #else #define __HIP_OVERLOAD2(__retty, __fn) \ template \ __DEVICE__ __CONSTEXPR__ \ typename __hip_enable_if<__hip::is_arithmetic<__T1>::value && \ __hip::is_arithmetic<__T2>::value, \ __retty>::type \ __fn(__T1 __x, __T2 __y) { \ return __fn((double)__x, (double)__y); \ } #endif __HIP_OVERLOAD1(double, acos) __HIP_OVERLOAD1(double, acosh) __HIP_OVERLOAD1(double, asin) __HIP_OVERLOAD1(double, asinh) __HIP_OVERLOAD1(double, atan) __HIP_OVERLOAD2(double, atan2) __HIP_OVERLOAD1(double, atanh) __HIP_OVERLOAD1(double, cbrt) __HIP_OVERLOAD1(double, ceil) __HIP_OVERLOAD2(double, copysign) __HIP_OVERLOAD1(double, cos) __HIP_OVERLOAD1(double, cosh) __HIP_OVERLOAD1(double, erf) __HIP_OVERLOAD1(double, erfc) __HIP_OVERLOAD1(double, exp) __HIP_OVERLOAD1(double, exp2) __HIP_OVERLOAD1(double, expm1) __HIP_OVERLOAD1(double, fabs) __HIP_OVERLOAD2(double, fdim) __HIP_OVERLOAD1(double, floor) __HIP_OVERLOAD2(double, fmax) __HIP_OVERLOAD2(double, fmin) __HIP_OVERLOAD2(double, fmod) #if !defined(__HIPCC_RTC__) __HIP_OVERLOAD1(int, fpclassify) #endif // !defined(__HIPCC_RTC__) __HIP_OVERLOAD2(double, hypot) __HIP_OVERLOAD1(int, ilogb) __HIP_OVERLOAD1(bool, isfinite) __HIP_OVERLOAD2(bool, isgreater) __HIP_OVERLOAD2(bool, isgreaterequal) __HIP_OVERLOAD1(bool, isinf) __HIP_OVERLOAD2(bool, isless) __HIP_OVERLOAD2(bool, islessequal) __HIP_OVERLOAD2(bool, islessgreater) __HIP_OVERLOAD1(bool, isnan) __HIP_OVERLOAD1(bool, isnormal) __HIP_OVERLOAD2(bool, isunordered) __HIP_OVERLOAD1(double, lgamma) __HIP_OVERLOAD1(double, log) __HIP_OVERLOAD1(double, log10) __HIP_OVERLOAD1(double, log1p) __HIP_OVERLOAD1(double, log2) __HIP_OVERLOAD1(double, logb) __HIP_OVERLOAD1(long long, llrint) __HIP_OVERLOAD1(long long, llround) __HIP_OVERLOAD1(long, lrint) __HIP_OVERLOAD1(long, lround) __HIP_OVERLOAD1(double, nearbyint) __HIP_OVERLOAD2(double, nextafter) __HIP_OVERLOAD2(double, pow) __HIP_OVERLOAD2(double, remainder) __HIP_OVERLOAD1(double, rint) __HIP_OVERLOAD1(double, round) __HIP_OVERLOAD1(bool, signbit) __HIP_OVERLOAD1(double, sin) __HIP_OVERLOAD1(double, sinh) __HIP_OVERLOAD1(double, sqrt) __HIP_OVERLOAD1(double, tan) __HIP_OVERLOAD1(double, tanh) __HIP_OVERLOAD1(double, tgamma) __HIP_OVERLOAD1(double, trunc) // Overload these but don't add them to std, they are not part of cmath. __HIP_OVERLOAD2(double, max) __HIP_OVERLOAD2(double, min) // Additional Overloads that don't quite match HIP_OVERLOAD. #if __cplusplus >= 201103L template __DEVICE__ __CONSTEXPR__ typename __hip_enable_if< __hip::is_arithmetic<__T1>::value && __hip::is_arithmetic<__T2>::value && __hip::is_arithmetic<__T3>::value, typename __hip::__promote<__T1, __T2, __T3>::type>::type fma(__T1 __x, __T2 __y, __T3 __z) { typedef typename __hip::__promote<__T1, __T2, __T3>::type __result_type; return ::fma((__result_type)__x, (__result_type)__y, (__result_type)__z); } #else template __DEVICE__ __CONSTEXPR__ typename __hip_enable_if<__hip::is_arithmetic<__T1>::value && __hip::is_arithmetic<__T2>::value && __hip::is_arithmetic<__T3>::value, double>::type fma(__T1 __x, __T2 __y, __T3 __z) { return ::fma((double)__x, (double)__y, (double)__z); } #endif template __DEVICE__ __CONSTEXPR__ typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type frexp(__T __x, int *__exp) { return ::frexp((double)__x, __exp); } template __DEVICE__ __CONSTEXPR__ typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type ldexp(__T __x, int __exp) { return ::ldexp((double)__x, __exp); } template __DEVICE__ __CONSTEXPR__ typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type modf(__T __x, double *__exp) { return ::modf((double)__x, __exp); } #if __cplusplus >= 201103L template __DEVICE__ __CONSTEXPR__ typename __hip_enable_if<__hip::is_arithmetic<__T1>::value && __hip::is_arithmetic<__T2>::value, typename __hip::__promote<__T1, __T2>::type>::type remquo(__T1 __x, __T2 __y, int *__quo) { typedef typename __hip::__promote<__T1, __T2>::type __result_type; return ::remquo((__result_type)__x, (__result_type)__y, __quo); } #else template __DEVICE__ __CONSTEXPR__ typename __hip_enable_if<__hip::is_arithmetic<__T1>::value && __hip::is_arithmetic<__T2>::value, double>::type remquo(__T1 __x, __T2 __y, int *__quo) { return ::remquo((double)__x, (double)__y, __quo); } #endif template __DEVICE__ __CONSTEXPR__ typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type scalbln(__T __x, long int __exp) { return ::scalbln((double)__x, __exp); } template __DEVICE__ __CONSTEXPR__ typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type scalbn(__T __x, int __exp) { return ::scalbn((double)__x, __exp); } #pragma pop_macro("__HIP_OVERLOAD1") #pragma pop_macro("__HIP_OVERLOAD2") // END HIP_OVERLOAD // END DEF_FUN and HIP_OVERLOAD #endif // ifndef __OPENMP_AMDGCN__ #endif // defined(__cplusplus) #ifndef __OPENMP_AMDGCN__ // Define these overloads inside the namespace our standard library uses. #if !defined(__HIPCC_RTC__) #ifdef _LIBCPP_BEGIN_NAMESPACE_STD _LIBCPP_BEGIN_NAMESPACE_STD #else namespace std { #ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION _GLIBCXX_BEGIN_NAMESPACE_VERSION #endif // _GLIBCXX_BEGIN_NAMESPACE_VERSION #endif // _LIBCPP_BEGIN_NAMESPACE_STD // Pull the new overloads we defined above into namespace std. // using ::abs; - This may be considered for C++. using ::acos; using ::acosh; using ::asin; using ::asinh; using ::atan; using ::atan2; using ::atanh; using ::cbrt; using ::ceil; using ::copysign; using ::cos; using ::cosh; using ::erf; using ::erfc; using ::exp; using ::exp2; using ::expm1; using ::fabs; using ::fdim; using ::floor; using ::fma; using ::fmax; using ::fmin; using ::fmod; using ::fpclassify; using ::frexp; using ::hypot; using ::ilogb; using ::isfinite; using ::isgreater; using ::isgreaterequal; using ::isless; using ::islessequal; using ::islessgreater; using ::isnormal; using ::isunordered; using ::ldexp; using ::lgamma; using ::llrint; using ::llround; using ::log; using ::log10; using ::log1p; using ::log2; using ::logb; using ::lrint; using ::lround; using ::modf; // using ::nan; - This may be considered for C++. // using ::nanf; - This may be considered for C++. // using ::nanl; - This is not yet defined. using ::nearbyint; using ::nextafter; // using ::nexttoward; - Omit this since we do not have a definition. using ::pow; using ::remainder; using ::remquo; using ::rint; using ::round; using ::scalbln; using ::scalbn; using ::signbit; using ::sin; using ::sinh; using ::sqrt; using ::tan; using ::tanh; using ::tgamma; using ::trunc; // Well this is fun: We need to pull these symbols in for libc++, but we can't // pull them in with libstdc++, because its ::isinf and ::isnan are different // than its std::isinf and std::isnan. #ifndef __GLIBCXX__ using ::isinf; using ::isnan; #endif // Finally, pull the "foobarf" functions that HIP defines into std. using ::acosf; using ::acoshf; using ::asinf; using ::asinhf; using ::atan2f; using ::atanf; using ::atanhf; using ::cbrtf; using ::ceilf; using ::copysignf; using ::cosf; using ::coshf; using ::erfcf; using ::erff; using ::exp2f; using ::expf; using ::expm1f; using ::fabsf; using ::fdimf; using ::floorf; using ::fmaf; using ::fmaxf; using ::fminf; using ::fmodf; using ::frexpf; using ::hypotf; using ::ilogbf; using ::ldexpf; using ::lgammaf; using ::llrintf; using ::llroundf; using ::log10f; using ::log1pf; using ::log2f; using ::logbf; using ::logf; using ::lrintf; using ::lroundf; using ::modff; using ::nearbyintf; using ::nextafterf; // using ::nexttowardf; - Omit this since we do not have a definition. using ::powf; using ::remainderf; using ::remquof; using ::rintf; using ::roundf; using ::scalblnf; using ::scalbnf; using ::sinf; using ::sinhf; using ::sqrtf; using ::tanf; using ::tanhf; using ::tgammaf; using ::truncf; #ifdef _LIBCPP_END_NAMESPACE_STD _LIBCPP_END_NAMESPACE_STD #else #ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION _GLIBCXX_END_NAMESPACE_VERSION #endif // _GLIBCXX_BEGIN_NAMESPACE_VERSION } // namespace std #endif // _LIBCPP_END_NAMESPACE_STD #endif // !defined(__HIPCC_RTC__) // Define device-side math functions from on MSVC. #if !defined(__HIPCC_RTC__) #if defined(_MSC_VER) // Before VS2019, `` is also included in `` and other headers. // But, from VS2019, it's only included in ``. Need to include // `` here to ensure C functions declared there won't be markded as // `__host__` and `__device__` through `` wrapper. #include #if defined(__cplusplus) extern "C" { #endif // defined(__cplusplus) __DEVICE__ __CONSTEXPR__ __attribute__((overloadable)) double _Cosh(double x, double y) { return cosh(x) * y; } __DEVICE__ __CONSTEXPR__ __attribute__((overloadable)) float _FCosh(float x, float y) { return coshf(x) * y; } __DEVICE__ __CONSTEXPR__ __attribute__((overloadable)) short _Dtest(double *p) { return fpclassify(*p); } __DEVICE__ __CONSTEXPR__ __attribute__((overloadable)) short _FDtest(float *p) { return fpclassify(*p); } __DEVICE__ __CONSTEXPR__ __attribute__((overloadable)) double _Sinh(double x, double y) { return sinh(x) * y; } __DEVICE__ __CONSTEXPR__ __attribute__((overloadable)) float _FSinh(float x, float y) { return sinhf(x) * y; } #if defined(__cplusplus) } #endif // defined(__cplusplus) #endif // defined(_MSC_VER) #endif // !defined(__HIPCC_RTC__) #endif // ifndef __OPENMP_AMDGCN__ #pragma pop_macro("__DEVICE__") #pragma pop_macro("__CONSTEXPR__") #endif // __CLANG_HIP_CMATH_H__ avx512vlbwintrin.h/*===------ cet.h -Control-flow Enforcement Technology feature ------------=== * Add x86 feature with IBT and/or SHSTK bits to ELF program property if they * are enabled. Otherwise, contents in this header file are unused. This file * is mainly design for assembly source code which want to enable CET. * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __CET_H #define __CET_H #ifdef __ASSEMBLER__ #ifndef __CET__ # define _CET_ENDBR #endif #ifdef __CET__ # ifdef __LP64__ # if __CET__ & 0x1 # define _CET_ENDBR endbr64 # else # define _CET_ENDBR # endif # else # if __CET__ & 0x1 # define _CET_ENDBR endbr32 # else # define _CET_ENDBR # endif # endif # ifdef __LP64__ # define __PROPERTY_ALIGN 3 # else # define __PROPERTY_ALIGN 2 # endif .pushsection ".note.gnu.property", "a" .p2align __PROPERTY_ALIGN .long 1f - 0f /* name length. */ .long 4f - 1f /* data length. */ /* NT_GNU_PROPERTY_TYPE_0. */ .long 5 /* note type. */ 0: .asciz "GNU" /* vendor name. */ 1: .p2align __PROPERTY_ALIGN /* GNU_PROPERTY_X86_FEATURE_1_AND. */ .long 0xc0000002 /* pr_type. */ .long 3f - 2f /* pr_datasz. */ 2: /* GNU_PROPERTY_X86_FEATURE_1_XXX. */ .long __CET__ 3: .p2align __PROPERTY_ALIGN 4: .popsection #endif #endif #endif /*===---- fxsrintrin.h - FXSR intrinsic ------------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __FXSRINTRIN_H #define __FXSRINTRIN_H #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("fxsr"))) /// Saves the XMM, MMX, MXCSR and x87 FPU registers into a 512-byte /// memory region pointed to by the input parameter \a __p. /// /// \headerfile /// /// This intrinsic corresponds to the FXSAVE instruction. /// /// \param __p /// A pointer to a 512-byte memory region. The beginning of this memory /// region should be aligned on a 16-byte boundary. static __inline__ void __DEFAULT_FN_ATTRS _fxsave(void *__p) { __builtin_ia32_fxsave(__p); } /// Restores the XMM, MMX, MXCSR and x87 FPU registers from the 512-byte /// memory region pointed to by the input parameter \a __p. The contents of /// this memory region should have been written to by a previous \c _fxsave /// or \c _fxsave64 intrinsic. /// /// \headerfile /// /// This intrinsic corresponds to the FXRSTOR instruction. /// /// \param __p /// A pointer to a 512-byte memory region. The beginning of this memory /// region should be aligned on a 16-byte boundary. static __inline__ void __DEFAULT_FN_ATTRS _fxrstor(void *__p) { __builtin_ia32_fxrstor(__p); } #ifdef __x86_64__ /// Saves the XMM, MMX, MXCSR and x87 FPU registers into a 512-byte /// memory region pointed to by the input parameter \a __p. /// /// \headerfile /// /// This intrinsic corresponds to the FXSAVE64 instruction. /// /// \param __p /// A pointer to a 512-byte memory region. The beginning of this memory /// region should be aligned on a 16-byte boundary. static __inline__ void __DEFAULT_FN_ATTRS _fxsave64(void *__p) { __builtin_ia32_fxsave64(__p); } /// Restores the XMM, MMX, MXCSR and x87 FPU registers from the 512-byte /// memory region pointed to by the input parameter \a __p. The contents of /// this memory region should have been written to by a previous \c _fxsave /// or \c _fxsave64 intrinsic. /// /// \headerfile /// /// This intrinsic corresponds to the FXRSTOR64 instruction. /// /// \param __p /// A pointer to a 512-byte memory region. The beginning of this memory /// region should be aligned on a 16-byte boundary. static __inline__ void __DEFAULT_FN_ATTRS _fxrstor64(void *__p) { __builtin_ia32_fxrstor64(__p); } #endif #undef __DEFAULT_FN_ATTRS #endif htmintrin.h/*===---- pconfigintrin.h - X86 platform configuration ---------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #if !defined __X86INTRIN_H && !defined __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __PCONFIGINTRIN_H #define __PCONFIGINTRIN_H #define __PCONFIG_KEY_PROGRAM 0x00000001 #if __has_extension(gnu_asm) /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("pconfig"))) static __inline unsigned int __DEFAULT_FN_ATTRS _pconfig_u32(unsigned int __leaf, __SIZE_TYPE__ __d[]) { unsigned int __result; __asm__ ("pconfig" : "=a" (__result), "=b" (__d[0]), "=c" (__d[1]), "=d" (__d[2]) : "a" (__leaf), "b" (__d[0]), "c" (__d[1]), "d" (__d[2]) : "cc"); return __result; } #undef __DEFAULT_FN_ATTRS #endif /* __has_extension(gnu_asm) */ #endif sm3intrin.hstdarg.hstddef.h/*===---- stdint.h - Standard header for sized integer types --------------===*\ * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * \*===----------------------------------------------------------------------===*/ #ifndef __CLANG_STDINT_H // AIX system headers need stdint.h to be re-enterable while _STD_TYPES_T // is defined until an inclusion of it without _STD_TYPES_T occurs, in which // case the header guard macro is defined. #if !defined(_AIX) || !defined(_STD_TYPES_T) || !defined(__STDC_HOSTED__) #define __CLANG_STDINT_H #endif /* If we're hosted, fall back to the system's stdint.h, which might have * additional definitions. */ #if __STDC_HOSTED__ && __has_include_next() // C99 7.18.3 Limits of other integer types // // Footnote 219, 220: C++ implementations should define these macros only when // __STDC_LIMIT_MACROS is defined before is included. // // Footnote 222: C++ implementations should define these macros only when // __STDC_CONSTANT_MACROS is defined before is included. // // C++11 [cstdint.syn]p2: // // The macros defined by are provided unconditionally. In particular, // the symbols __STDC_LIMIT_MACROS and __STDC_CONSTANT_MACROS (mentioned in // footnotes 219, 220, and 222 in the C standard) play no role in C++. // // C11 removed the problematic footnotes. // // Work around this inconsistency by always defining those macros in C++ mode, // so that a C library implementation which follows the C99 standard can be // used in C++. # ifdef __cplusplus # if !defined(__STDC_LIMIT_MACROS) # define __STDC_LIMIT_MACROS # define __STDC_LIMIT_MACROS_DEFINED_BY_CLANG # endif # if !defined(__STDC_CONSTANT_MACROS) # define __STDC_CONSTANT_MACROS # define __STDC_CONSTANT_MACROS_DEFINED_BY_CLANG # endif # endif # include_next # ifdef __STDC_LIMIT_MACROS_DEFINED_BY_CLANG # undef __STDC_LIMIT_MACROS # undef __STDC_LIMIT_MACROS_DEFINED_BY_CLANG # endif # ifdef __STDC_CONSTANT_MACROS_DEFINED_BY_CLANG # undef __STDC_CONSTANT_MACROS # undef __STDC_CONSTANT_MACROS_DEFINED_BY_CLANG # endif #else /* C99 7.18.1.1 Exact-width integer types. * C99 7.18.1.2 Minimum-width integer types. * C99 7.18.1.3 Fastest minimum-width integer types. * * The standard requires that exact-width type be defined for 8-, 16-, 32-, and * 64-bit types if they are implemented. Other exact width types are optional. * This implementation defines an exact-width types for every integer width * that is represented in the standard integer types. * * The standard also requires minimum-width types be defined for 8-, 16-, 32-, * and 64-bit widths regardless of whether there are corresponding exact-width * types. * * To accommodate targets that are missing types that are exactly 8, 16, 32, or * 64 bits wide, this implementation takes an approach of cascading * redefinitions, redefining __int_leastN_t to successively smaller exact-width * types. It is therefore important that the types are defined in order of * descending widths. * * We currently assume that the minimum-width types and the fastest * minimum-width types are the same. This is allowed by the standard, but is * suboptimal. * * In violation of the standard, some targets do not implement a type that is * wide enough to represent all of the required widths (8-, 16-, 32-, 64-bit). * To accommodate these targets, a required minimum-width type is only * defined if there exists an exact-width type of equal or greater width. */ #ifdef __INT64_TYPE__ # ifndef __int8_t_defined /* glibc sys/types.h also defines int64_t*/ typedef __INT64_TYPE__ int64_t; # endif /* __int8_t_defined */ typedef __UINT64_TYPE__ uint64_t; # undef __int_least64_t # define __int_least64_t int64_t # undef __uint_least64_t # define __uint_least64_t uint64_t # undef __int_least32_t # define __int_least32_t int64_t # undef __uint_least32_t # define __uint_least32_t uint64_t # undef __int_least16_t # define __int_least16_t int64_t # undef __uint_least16_t # define __uint_least16_t uint64_t # undef __int_least8_t # define __int_least8_t int64_t # undef __uint_least8_t # define __uint_least8_t uint64_t #endif /* __INT64_TYPE__ */ #ifdef __int_least64_t typedef __int_least64_t int_least64_t; typedef __uint_least64_t uint_least64_t; typedef __int_least64_t int_fast64_t; typedef __uint_least64_t uint_fast64_t; #endif /* __int_least64_t */ #ifdef __INT56_TYPE__ typedef __INT56_TYPE__ int56_t; typedef __UINT56_TYPE__ uint56_t; typedef int56_t int_least56_t; typedef uint56_t uint_least56_t; typedef int56_t int_fast56_t; typedef uint56_t uint_fast56_t; # undef __int_least32_t # define __int_least32_t int56_t # undef __uint_least32_t # define __uint_least32_t uint56_t # undef __int_least16_t # define __int_least16_t int56_t # undef __uint_least16_t # define __uint_least16_t uint56_t # undef __int_least8_t # define __int_least8_t int56_t # undef __uint_least8_t # define __uint_least8_t uint56_t #endif /* __INT56_TYPE__ */ #ifdef __INT48_TYPE__ typedef __INT48_TYPE__ int48_t; typedef __UINT48_TYPE__ uint48_t; typedef int48_t int_least48_t; typedef uint48_t uint_least48_t; typedef int48_t int_fast48_t; typedef uint48_t uint_fast48_t; # undef __int_least32_t # define __int_least32_t int48_t # undef __uint_least32_t # define __uint_least32_t uint48_t # undef __int_least16_t # define __int_least16_t int48_t # undef __uint_least16_t # define __uint_least16_t uint48_t # undef __int_least8_t # define __int_least8_t int48_t # undef __uint_least8_t # define __uint_least8_t uint48_t #endif /* __INT48_TYPE__ */ #ifdef __INT40_TYPE__ typedef __INT40_TYPE__ int40_t; typedef __UINT40_TYPE__ uint40_t; typedef int40_t int_least40_t; typedef uint40_t uint_least40_t; typedef int40_t int_fast40_t; typedef uint40_t uint_fast40_t; # undef __int_least32_t # define __int_least32_t int40_t # undef __uint_least32_t # define __uint_least32_t uint40_t # undef __int_least16_t # define __int_least16_t int40_t # undef __uint_least16_t # define __uint_least16_t uint40_t # undef __int_least8_t # define __int_least8_t int40_t # undef __uint_least8_t # define __uint_least8_t uint40_t #endif /* __INT40_TYPE__ */ #ifdef __INT32_TYPE__ # ifndef __int8_t_defined /* glibc sys/types.h also defines int32_t*/ typedef __INT32_TYPE__ int32_t; # endif /* __int8_t_defined */ # ifndef __uint32_t_defined /* more glibc compatibility */ # define __uint32_t_defined typedef __UINT32_TYPE__ uint32_t; # endif /* __uint32_t_defined */ # undef __int_least32_t # define __int_least32_t int32_t # undef __uint_least32_t # define __uint_least32_t uint32_t # undef __int_least16_t # define __int_least16_t int32_t # undef __uint_least16_t # define __uint_least16_t uint32_t # undef __int_least8_t # define __int_least8_t int32_t # undef __uint_least8_t # define __uint_least8_t uint32_t #endif /* __INT32_TYPE__ */ #ifdef __int_least32_t typedef __int_least32_t int_least32_t; typedef __uint_least32_t uint_least32_t; typedef __int_least32_t int_fast32_t; typedef __uint_least32_t uint_fast32_t; #endif /* __int_least32_t */ #ifdef __INT24_TYPE__ typedef __INT24_TYPE__ int24_t; typedef __UINT24_TYPE__ uint24_t; typedef int24_t int_least24_t; typedef uint24_t uint_least24_t; typedef int24_t int_fast24_t; typedef uint24_t uint_fast24_t; # undef __int_least16_t # define __int_least16_t int24_t # undef __uint_least16_t # define __uint_least16_t uint24_t # undef __int_least8_t # define __int_least8_t int24_t # undef __uint_least8_t # define __uint_least8_t uint24_t #endif /* __INT24_TYPE__ */ #ifdef __INT16_TYPE__ #ifndef __int8_t_defined /* glibc sys/types.h also defines int16_t*/ typedef __INT16_TYPE__ int16_t; #endif /* __int8_t_defined */ typedef __UINT16_TYPE__ uint16_t; # undef __int_least16_t # define __int_least16_t int16_t # undef __uint_least16_t # define __uint_least16_t uint16_t # undef __int_least8_t # define __int_least8_t int16_t # undef __uint_least8_t # define __uint_least8_t uint16_t #endif /* __INT16_TYPE__ */ #ifdef __int_least16_t typedef __int_least16_t int_least16_t; typedef __uint_least16_t uint_least16_t; typedef __int_least16_t int_fast16_t; typedef __uint_least16_t uint_fast16_t; #endif /* __int_least16_t */ #ifdef __INT8_TYPE__ #ifndef __int8_t_defined /* glibc sys/types.h also defines int8_t*/ typedef __INT8_TYPE__ int8_t; #endif /* __int8_t_defined */ typedef __UINT8_TYPE__ uint8_t; # undef __int_least8_t # define __int_least8_t int8_t # undef __uint_least8_t # define __uint_least8_t uint8_t #endif /* __INT8_TYPE__ */ #ifdef __int_least8_t typedef __int_least8_t int_least8_t; typedef __uint_least8_t uint_least8_t; typedef __int_least8_t int_fast8_t; typedef __uint_least8_t uint_fast8_t; #endif /* __int_least8_t */ /* prevent glibc sys/types.h from defining conflicting types */ #ifndef __int8_t_defined # define __int8_t_defined #endif /* __int8_t_defined */ /* C99 7.18.1.4 Integer types capable of holding object pointers. */ #define __stdint_join3(a,b,c) a ## b ## c #ifndef _INTPTR_T #ifndef __intptr_t_defined typedef __INTPTR_TYPE__ intptr_t; #define __intptr_t_defined #define _INTPTR_T #endif #endif #ifndef _UINTPTR_T typedef __UINTPTR_TYPE__ uintptr_t; #define _UINTPTR_T #endif /* C99 7.18.1.5 Greatest-width integer types. */ typedef __INTMAX_TYPE__ intmax_t; typedef __UINTMAX_TYPE__ uintmax_t; /* C99 7.18.4 Macros for minimum-width integer constants. * * The standard requires that integer constant macros be defined for all the * minimum-width types defined above. As 8-, 16-, 32-, and 64-bit minimum-width * types are required, the corresponding integer constant macros are defined * here. This implementation also defines minimum-width types for every other * integer width that the target implements, so corresponding macros are * defined below, too. * * These macros are defined using the same successive-shrinking approach as * the type definitions above. It is likewise important that macros are defined * in order of decending width. * * Note that C++ should not check __STDC_CONSTANT_MACROS here, contrary to the * claims of the C standard (see C++ 18.3.1p2, [cstdint.syn]). */ #define __int_c_join(a, b) a ## b #define __int_c(v, suffix) __int_c_join(v, suffix) #define __uint_c(v, suffix) __int_c_join(v##U, suffix) #ifdef __INT64_TYPE__ # undef __int64_c_suffix # undef __int32_c_suffix # undef __int16_c_suffix # undef __int8_c_suffix # ifdef __INT64_C_SUFFIX__ # define __int64_c_suffix __INT64_C_SUFFIX__ # define __int32_c_suffix __INT64_C_SUFFIX__ # define __int16_c_suffix __INT64_C_SUFFIX__ # define __int8_c_suffix __INT64_C_SUFFIX__ # endif /* __INT64_C_SUFFIX__ */ #endif /* __INT64_TYPE__ */ #ifdef __int_least64_t # ifdef __int64_c_suffix # define INT64_C(v) __int_c(v, __int64_c_suffix) # define UINT64_C(v) __uint_c(v, __int64_c_suffix) # else # define INT64_C(v) v # define UINT64_C(v) v ## U # endif /* __int64_c_suffix */ #endif /* __int_least64_t */ #ifdef __INT56_TYPE__ # undef __int32_c_suffix # undef __int16_c_suffix # undef __int8_c_suffix # ifdef __INT56_C_SUFFIX__ # define INT56_C(v) __int_c(v, __INT56_C_SUFFIX__) # define UINT56_C(v) __uint_c(v, __INT56_C_SUFFIX__) # define __int32_c_suffix __INT56_C_SUFFIX__ # define __int16_c_suffix __INT56_C_SUFFIX__ # define __int8_c_suffix __INT56_C_SUFFIX__ # else # define INT56_C(v) v # define UINT56_C(v) v ## U # endif /* __INT56_C_SUFFIX__ */ #endif /* __INT56_TYPE__ */ #ifdef __INT48_TYPE__ # undef __int32_c_suffix # undef __int16_c_suffix # undef __int8_c_suffix # ifdef __INT48_C_SUFFIX__ # define INT48_C(v) __int_c(v, __INT48_C_SUFFIX__) # define UINT48_C(v) __uint_c(v, __INT48_C_SUFFIX__) # define __int32_c_suffix __INT48_C_SUFFIX__ # define __int16_c_suffix __INT48_C_SUFFIX__ # define __int8_c_suffix __INT48_C_SUFFIX__ # else # define INT48_C(v) v # define UINT48_C(v) v ## U # endif /* __INT48_C_SUFFIX__ */ #endif /* __INT48_TYPE__ */ #ifdef __INT40_TYPE__ # undef __int32_c_suffix # undef __int16_c_suffix # undef __int8_c_suffix # ifdef __INT40_C_SUFFIX__ # define INT40_C(v) __int_c(v, __INT40_C_SUFFIX__) # define UINT40_C(v) __uint_c(v, __INT40_C_SUFFIX__) # define __int32_c_suffix __INT40_C_SUFFIX__ # define __int16_c_suffix __INT40_C_SUFFIX__ # define __int8_c_suffix __INT40_C_SUFFIX__ # else # define INT40_C(v) v # define UINT40_C(v) v ## U # endif /* __INT40_C_SUFFIX__ */ #endif /* __INT40_TYPE__ */ #ifdef __INT32_TYPE__ # undef __int32_c_suffix # undef __int16_c_suffix # undef __int8_c_suffix # ifdef __INT32_C_SUFFIX__ # define __int32_c_suffix __INT32_C_SUFFIX__ # define __int16_c_suffix __INT32_C_SUFFIX__ # define __int8_c_suffix __INT32_C_SUFFIX__ # endif /* __INT32_C_SUFFIX__ */ #endif /* __INT32_TYPE__ */ #ifdef __int_least32_t # ifdef __int32_c_suffix # define INT32_C(v) __int_c(v, __int32_c_suffix) # define UINT32_C(v) __uint_c(v, __int32_c_suffix) # else # define INT32_C(v) v # define UINT32_C(v) v ## U # endif /* __int32_c_suffix */ #endif /* __int_least32_t */ #ifdef __INT24_TYPE__ # undef __int16_c_suffix # undef __int8_c_suffix # ifdef __INT24_C_SUFFIX__ # define INT24_C(v) __int_c(v, __INT24_C_SUFFIX__) # define UINT24_C(v) __uint_c(v, __INT24_C_SUFFIX__) # define __int16_c_suffix __INT24_C_SUFFIX__ # define __int8_c_suffix __INT24_C_SUFFIX__ # else # define INT24_C(v) v # define UINT24_C(v) v ## U # endif /* __INT24_C_SUFFIX__ */ #endif /* __INT24_TYPE__ */ #ifdef __INT16_TYPE__ # undef __int16_c_suffix # undef __int8_c_suffix # ifdef __INT16_C_SUFFIX__ # define __int16_c_suffix __INT16_C_SUFFIX__ # define __int8_c_suffix __INT16_C_SUFFIX__ # endif /* __INT16_C_SUFFIX__ */ #endif /* __INT16_TYPE__ */ #ifdef __int_least16_t # ifdef __int16_c_suffix # define INT16_C(v) __int_c(v, __int16_c_suffix) # define UINT16_C(v) __uint_c(v, __int16_c_suffix) # else # define INT16_C(v) v # define UINT16_C(v) v ## U # endif /* __int16_c_suffix */ #endif /* __int_least16_t */ #ifdef __INT8_TYPE__ # undef __int8_c_suffix # ifdef __INT8_C_SUFFIX__ # define __int8_c_suffix __INT8_C_SUFFIX__ # endif /* __INT8_C_SUFFIX__ */ #endif /* __INT8_TYPE__ */ #ifdef __int_least8_t # ifdef __int8_c_suffix # define INT8_C(v) __int_c(v, __int8_c_suffix) # define UINT8_C(v) __uint_c(v, __int8_c_suffix) # else # define INT8_C(v) v # define UINT8_C(v) v ## U # endif /* __int8_c_suffix */ #endif /* __int_least8_t */ /* C99 7.18.2.1 Limits of exact-width integer types. * C99 7.18.2.2 Limits of minimum-width integer types. * C99 7.18.2.3 Limits of fastest minimum-width integer types. * * The presence of limit macros are completely optional in C99. This * implementation defines limits for all of the types (exact- and * minimum-width) that it defines above, using the limits of the minimum-width * type for any types that do not have exact-width representations. * * As in the type definitions, this section takes an approach of * successive-shrinking to determine which limits to use for the standard (8, * 16, 32, 64) bit widths when they don't have exact representations. It is * therefore important that the definitions be kept in order of decending * widths. * * Note that C++ should not check __STDC_LIMIT_MACROS here, contrary to the * claims of the C standard (see C++ 18.3.1p2, [cstdint.syn]). */ #ifdef __INT64_TYPE__ # define INT64_MAX INT64_C( 9223372036854775807) # define INT64_MIN (-INT64_C( 9223372036854775807)-1) # define UINT64_MAX UINT64_C(18446744073709551615) #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L # define UINT64_WIDTH 64 # define INT64_WIDTH UINT64_WIDTH # define __UINT_LEAST64_WIDTH UINT64_WIDTH # undef __UINT_LEAST32_WIDTH # define __UINT_LEAST32_WIDTH UINT64_WIDTH # undef __UINT_LEAST16_WIDTH # define __UINT_LEAST16_WIDTH UINT64_WIDTH # undef __UINT_LEAST8_MAX # define __UINT_LEAST8_MAX UINT64_MAX #endif /* __STDC_VERSION__ */ # define __INT_LEAST64_MIN INT64_MIN # define __INT_LEAST64_MAX INT64_MAX # define __UINT_LEAST64_MAX UINT64_MAX # undef __INT_LEAST32_MIN # define __INT_LEAST32_MIN INT64_MIN # undef __INT_LEAST32_MAX # define __INT_LEAST32_MAX INT64_MAX # undef __UINT_LEAST32_MAX # define __UINT_LEAST32_MAX UINT64_MAX # undef __INT_LEAST16_MIN # define __INT_LEAST16_MIN INT64_MIN # undef __INT_LEAST16_MAX # define __INT_LEAST16_MAX INT64_MAX # undef __UINT_LEAST16_MAX # define __UINT_LEAST16_MAX UINT64_MAX # undef __INT_LEAST8_MIN # define __INT_LEAST8_MIN INT64_MIN # undef __INT_LEAST8_MAX # define __INT_LEAST8_MAX INT64_MAX # undef __UINT_LEAST8_MAX # define __UINT_LEAST8_MAX UINT64_MAX #endif /* __INT64_TYPE__ */ #ifdef __INT_LEAST64_MIN # define INT_LEAST64_MIN __INT_LEAST64_MIN # define INT_LEAST64_MAX __INT_LEAST64_MAX # define UINT_LEAST64_MAX __UINT_LEAST64_MAX # define INT_FAST64_MIN __INT_LEAST64_MIN # define INT_FAST64_MAX __INT_LEAST64_MAX # define UINT_FAST64_MAX __UINT_LEAST64_MAX #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L # define UINT_LEAST64_WIDTH __UINT_LEAST64_WIDTH # define INT_LEAST64_WIDTH UINT_LEAST64_WIDTH # define UINT_FAST64_WIDTH __UINT_LEAST64_WIDTH # define INT_FAST64_WIDTH UINT_FAST64_WIDTH #endif /* __STDC_VERSION__ */ #endif /* __INT_LEAST64_MIN */ #ifdef __INT56_TYPE__ # define INT56_MAX INT56_C(36028797018963967) # define INT56_MIN (-INT56_C(36028797018963967)-1) # define UINT56_MAX UINT56_C(72057594037927935) # define INT_LEAST56_MIN INT56_MIN # define INT_LEAST56_MAX INT56_MAX # define UINT_LEAST56_MAX UINT56_MAX # define INT_FAST56_MIN INT56_MIN # define INT_FAST56_MAX INT56_MAX # define UINT_FAST56_MAX UINT56_MAX # undef __INT_LEAST32_MIN # define __INT_LEAST32_MIN INT56_MIN # undef __INT_LEAST32_MAX # define __INT_LEAST32_MAX INT56_MAX # undef __UINT_LEAST32_MAX # define __UINT_LEAST32_MAX UINT56_MAX # undef __INT_LEAST16_MIN # define __INT_LEAST16_MIN INT56_MIN # undef __INT_LEAST16_MAX # define __INT_LEAST16_MAX INT56_MAX # undef __UINT_LEAST16_MAX # define __UINT_LEAST16_MAX UINT56_MAX # undef __INT_LEAST8_MIN # define __INT_LEAST8_MIN INT56_MIN # undef __INT_LEAST8_MAX # define __INT_LEAST8_MAX INT56_MAX # undef __UINT_LEAST8_MAX # define __UINT_LEAST8_MAX UINT56_MAX #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L # define UINT56_WIDTH 56 # define INT56_WIDTH UINT56_WIDTH # define UINT_LEAST56_WIDTH UINT56_WIDTH # define INT_LEAST56_WIDTH UINT_LEAST56_WIDTH # define UINT_FAST56_WIDTH UINT56_WIDTH # define INT_FAST56_WIDTH UINT_FAST56_WIDTH # undef __UINT_LEAST32_WIDTH # define __UINT_LEAST32_WIDTH UINT56_WIDTH # undef __UINT_LEAST16_WIDTH # define __UINT_LEAST16_WIDTH UINT56_WIDTH # undef __UINT_LEAST8_WIDTH # define __UINT_LEAST8_WIDTH UINT56_WIDTH #endif /* __STDC_VERSION__ */ #endif /* __INT56_TYPE__ */ #ifdef __INT48_TYPE__ # define INT48_MAX INT48_C(140737488355327) # define INT48_MIN (-INT48_C(140737488355327)-1) # define UINT48_MAX UINT48_C(281474976710655) # define INT_LEAST48_MIN INT48_MIN # define INT_LEAST48_MAX INT48_MAX # define UINT_LEAST48_MAX UINT48_MAX # define INT_FAST48_MIN INT48_MIN # define INT_FAST48_MAX INT48_MAX # define UINT_FAST48_MAX UINT48_MAX # undef __INT_LEAST32_MIN # define __INT_LEAST32_MIN INT48_MIN # undef __INT_LEAST32_MAX # define __INT_LEAST32_MAX INT48_MAX # undef __UINT_LEAST32_MAX # define __UINT_LEAST32_MAX UINT48_MAX # undef __INT_LEAST16_MIN # define __INT_LEAST16_MIN INT48_MIN # undef __INT_LEAST16_MAX # define __INT_LEAST16_MAX INT48_MAX # undef __UINT_LEAST16_MAX # define __UINT_LEAST16_MAX UINT48_MAX # undef __INT_LEAST8_MIN # define __INT_LEAST8_MIN INT48_MIN # undef __INT_LEAST8_MAX # define __INT_LEAST8_MAX INT48_MAX # undef __UINT_LEAST8_MAX # define __UINT_LEAST8_MAX UINT48_MAX #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L #define UINT48_WIDTH 48 #define INT48_WIDTH UINT48_WIDTH #define UINT_LEAST48_WIDTH UINT48_WIDTH #define INT_LEAST48_WIDTH UINT_LEAST48_WIDTH #define UINT_FAST48_WIDTH UINT48_WIDTH #define INT_FAST48_WIDTH UINT_FAST48_WIDTH #undef __UINT_LEAST32_WIDTH #define __UINT_LEAST32_WIDTH UINT48_WIDTH # undef __UINT_LEAST16_WIDTH #define __UINT_LEAST16_WIDTH UINT48_WIDTH # undef __UINT_LEAST8_WIDTH #define __UINT_LEAST8_WIDTH UINT48_WIDTH #endif /* __STDC_VERSION__ */ #endif /* __INT48_TYPE__ */ #ifdef __INT40_TYPE__ # define INT40_MAX INT40_C(549755813887) # define INT40_MIN (-INT40_C(549755813887)-1) # define UINT40_MAX UINT40_C(1099511627775) # define INT_LEAST40_MIN INT40_MIN # define INT_LEAST40_MAX INT40_MAX # define UINT_LEAST40_MAX UINT40_MAX # define INT_FAST40_MIN INT40_MIN # define INT_FAST40_MAX INT40_MAX # define UINT_FAST40_MAX UINT40_MAX # undef __INT_LEAST32_MIN # define __INT_LEAST32_MIN INT40_MIN # undef __INT_LEAST32_MAX # define __INT_LEAST32_MAX INT40_MAX # undef __UINT_LEAST32_MAX # define __UINT_LEAST32_MAX UINT40_MAX # undef __INT_LEAST16_MIN # define __INT_LEAST16_MIN INT40_MIN # undef __INT_LEAST16_MAX # define __INT_LEAST16_MAX INT40_MAX # undef __UINT_LEAST16_MAX # define __UINT_LEAST16_MAX UINT40_MAX # undef __INT_LEAST8_MIN # define __INT_LEAST8_MIN INT40_MIN # undef __INT_LEAST8_MAX # define __INT_LEAST8_MAX INT40_MAX # undef __UINT_LEAST8_MAX # define __UINT_LEAST8_MAX UINT40_MAX #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L # define UINT40_WIDTH 40 # define INT40_WIDTH UINT40_WIDTH # define UINT_LEAST40_WIDTH UINT40_WIDTH # define INT_LEAST40_WIDTH UINT_LEAST40_WIDTH # define UINT_FAST40_WIDTH UINT40_WIDTH # define INT_FAST40_WIDTH UINT_FAST40_WIDTH # undef __UINT_LEAST32_WIDTH # define __UINT_LEAST32_WIDTH UINT40_WIDTH # undef __UINT_LEAST16_WIDTH # define __UINT_LEAST16_WIDTH UINT40_WIDTH # undef __UINT_LEAST8_WIDTH # define __UINT_LEAST8_WIDTH UINT40_WIDTH #endif /* __STDC_VERSION__ */ #endif /* __INT40_TYPE__ */ #ifdef __INT32_TYPE__ # define INT32_MAX INT32_C(2147483647) # define INT32_MIN (-INT32_C(2147483647)-1) # define UINT32_MAX UINT32_C(4294967295) # undef __INT_LEAST32_MIN # define __INT_LEAST32_MIN INT32_MIN # undef __INT_LEAST32_MAX # define __INT_LEAST32_MAX INT32_MAX # undef __UINT_LEAST32_MAX # define __UINT_LEAST32_MAX UINT32_MAX # undef __INT_LEAST16_MIN # define __INT_LEAST16_MIN INT32_MIN # undef __INT_LEAST16_MAX # define __INT_LEAST16_MAX INT32_MAX # undef __UINT_LEAST16_MAX # define __UINT_LEAST16_MAX UINT32_MAX # undef __INT_LEAST8_MIN # define __INT_LEAST8_MIN INT32_MIN # undef __INT_LEAST8_MAX # define __INT_LEAST8_MAX INT32_MAX # undef __UINT_LEAST8_MAX # define __UINT_LEAST8_MAX UINT32_MAX #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L # define UINT32_WIDTH 32 # define INT32_WIDTH UINT32_WIDTH # undef __UINT_LEAST32_WIDTH # define __UINT_LEAST32_WIDTH UINT32_WIDTH # undef __UINT_LEAST16_WIDTH # define __UINT_LEAST16_WIDTH UINT32_WIDTH # undef __UINT_LEAST8_WIDTH # define __UINT_LEAST8_WIDTH UINT32_WIDTH #endif /* __STDC_VERSION__ */ #endif /* __INT32_TYPE__ */ #ifdef __INT_LEAST32_MIN # define INT_LEAST32_MIN __INT_LEAST32_MIN # define INT_LEAST32_MAX __INT_LEAST32_MAX # define UINT_LEAST32_MAX __UINT_LEAST32_MAX # define INT_FAST32_MIN __INT_LEAST32_MIN # define INT_FAST32_MAX __INT_LEAST32_MAX # define UINT_FAST32_MAX __UINT_LEAST32_MAX #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L # define UINT_LEAST32_WIDTH __UINT_LEAST32_WIDTH # define INT_LEAST32_WIDTH UINT_LEAST32_WIDTH # define UINT_FAST32_WIDTH __UINT_LEAST32_WIDTH # define INT_FAST32_WIDTH UINT_FAST32_WIDTH #endif /* __STDC_VERSION__ */ #endif /* __INT_LEAST32_MIN */ #ifdef __INT24_TYPE__ # define INT24_MAX INT24_C(8388607) # define INT24_MIN (-INT24_C(8388607)-1) # define UINT24_MAX UINT24_C(16777215) # define INT_LEAST24_MIN INT24_MIN # define INT_LEAST24_MAX INT24_MAX # define UINT_LEAST24_MAX UINT24_MAX # define INT_FAST24_MIN INT24_MIN # define INT_FAST24_MAX INT24_MAX # define UINT_FAST24_MAX UINT24_MAX # undef __INT_LEAST16_MIN # define __INT_LEAST16_MIN INT24_MIN # undef __INT_LEAST16_MAX # define __INT_LEAST16_MAX INT24_MAX # undef __UINT_LEAST16_MAX # define __UINT_LEAST16_MAX UINT24_MAX # undef __INT_LEAST8_MIN # define __INT_LEAST8_MIN INT24_MIN # undef __INT_LEAST8_MAX # define __INT_LEAST8_MAX INT24_MAX # undef __UINT_LEAST8_MAX # define __UINT_LEAST8_MAX UINT24_MAX #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L # define UINT24_WIDTH 24 # define INT24_WIDTH UINT24_WIDTH # define UINT_LEAST24_WIDTH UINT24_WIDTH # define INT_LEAST24_WIDTH UINT_LEAST24_WIDTH # define UINT_FAST24_WIDTH UINT24_WIDTH # define INT_FAST24_WIDTH UINT_FAST24_WIDTH # undef __UINT_LEAST16_WIDTH # define __UINT_LEAST16_WIDTH UINT24_WIDTH # undef __UINT_LEAST8_WIDTH # define __UINT_LEAST8_WIDTH UINT24_WIDTH #endif /* __STDC_VERSION__ */ #endif /* __INT24_TYPE__ */ #ifdef __INT16_TYPE__ #define INT16_MAX INT16_C(32767) #define INT16_MIN (-INT16_C(32767)-1) #define UINT16_MAX UINT16_C(65535) # undef __INT_LEAST16_MIN # define __INT_LEAST16_MIN INT16_MIN # undef __INT_LEAST16_MAX # define __INT_LEAST16_MAX INT16_MAX # undef __UINT_LEAST16_MAX # define __UINT_LEAST16_MAX UINT16_MAX # undef __INT_LEAST8_MIN # define __INT_LEAST8_MIN INT16_MIN # undef __INT_LEAST8_MAX # define __INT_LEAST8_MAX INT16_MAX # undef __UINT_LEAST8_MAX # define __UINT_LEAST8_MAX UINT16_MAX #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L # define UINT16_WIDTH 16 # define INT16_WIDTH UINT16_WIDTH # undef __UINT_LEAST16_WIDTH # define __UINT_LEAST16_WIDTH UINT16_WIDTH # undef __UINT_LEAST8_WIDTH # define __UINT_LEAST8_WIDTH UINT16_WIDTH #endif /* __STDC_VERSION__ */ #endif /* __INT16_TYPE__ */ #ifdef __INT_LEAST16_MIN # define INT_LEAST16_MIN __INT_LEAST16_MIN # define INT_LEAST16_MAX __INT_LEAST16_MAX # define UINT_LEAST16_MAX __UINT_LEAST16_MAX # define INT_FAST16_MIN __INT_LEAST16_MIN # define INT_FAST16_MAX __INT_LEAST16_MAX # define UINT_FAST16_MAX __UINT_LEAST16_MAX #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L # define UINT_LEAST16_WIDTH __UINT_LEAST16_WIDTH # define INT_LEAST16_WIDTH UINT_LEAST16_WIDTH # define UINT_FAST16_WIDTH __UINT_LEAST16_WIDTH # define INT_FAST16_WIDTH UINT_FAST16_WIDTH #endif /* __STDC_VERSION__ */ #endif /* __INT_LEAST16_MIN */ #ifdef __INT8_TYPE__ # define INT8_MAX INT8_C(127) # define INT8_MIN (-INT8_C(127)-1) # define UINT8_MAX UINT8_C(255) # undef __INT_LEAST8_MIN # define __INT_LEAST8_MIN INT8_MIN # undef __INT_LEAST8_MAX # define __INT_LEAST8_MAX INT8_MAX # undef __UINT_LEAST8_MAX # define __UINT_LEAST8_MAX UINT8_MAX #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L # define UINT8_WIDTH 8 # define INT8_WIDTH UINT8_WIDTH # undef __UINT_LEAST8_WIDTH # define __UINT_LEAST8_WIDTH UINT8_WIDTH #endif /* __STDC_VERSION__ */ #endif /* __INT8_TYPE__ */ #ifdef __INT_LEAST8_MIN # define INT_LEAST8_MIN __INT_LEAST8_MIN # define INT_LEAST8_MAX __INT_LEAST8_MAX # define UINT_LEAST8_MAX __UINT_LEAST8_MAX # define INT_FAST8_MIN __INT_LEAST8_MIN # define INT_FAST8_MAX __INT_LEAST8_MAX # define UINT_FAST8_MAX __UINT_LEAST8_MAX #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L # define UINT_LEAST8_WIDTH __UINT_LEAST8_WIDTH # define INT_LEAST8_WIDTH UINT_LEAST8_WIDTH # define UINT_FAST8_WIDTH __UINT_LEAST8_WIDTH # define INT_FAST8_WIDTH UINT_FAST8_WIDTH #endif /* __STDC_VERSION__ */ #endif /* __INT_LEAST8_MIN */ /* Some utility macros */ #define __INTN_MIN(n) __stdint_join3( INT, n, _MIN) #define __INTN_MAX(n) __stdint_join3( INT, n, _MAX) #define __UINTN_MAX(n) __stdint_join3(UINT, n, _MAX) #define __INTN_C(n, v) __stdint_join3( INT, n, _C(v)) #define __UINTN_C(n, v) __stdint_join3(UINT, n, _C(v)) /* C99 7.18.2.4 Limits of integer types capable of holding object pointers. */ /* C99 7.18.3 Limits of other integer types. */ #define INTPTR_MIN (-__INTPTR_MAX__-1) #define INTPTR_MAX __INTPTR_MAX__ #define UINTPTR_MAX __UINTPTR_MAX__ #define PTRDIFF_MIN (-__PTRDIFF_MAX__-1) #define PTRDIFF_MAX __PTRDIFF_MAX__ #define SIZE_MAX __SIZE_MAX__ /* C23 7.22.2.4 Width of integer types capable of holding object pointers. */ #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L /* NB: The C standard requires that these be the same value, but the compiler exposes separate internal width macros. */ #define INTPTR_WIDTH __INTPTR_WIDTH__ #define UINTPTR_WIDTH __UINTPTR_WIDTH__ #endif /* ISO9899:2011 7.20 (C11 Annex K): Define RSIZE_MAX if __STDC_WANT_LIB_EXT1__ * is enabled. */ #if defined(__STDC_WANT_LIB_EXT1__) && __STDC_WANT_LIB_EXT1__ >= 1 #define RSIZE_MAX (SIZE_MAX >> 1) #endif /* C99 7.18.2.5 Limits of greatest-width integer types. */ #define INTMAX_MIN (-__INTMAX_MAX__-1) #define INTMAX_MAX __INTMAX_MAX__ #define UINTMAX_MAX __UINTMAX_MAX__ /* C23 7.22.2.5 Width of greatest-width integer types. */ #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L /* NB: The C standard requires that these be the same value, but the compiler exposes separate internal width macros. */ #define INTMAX_WIDTH __INTMAX_WIDTH__ #define UINTMAX_WIDTH __UINTMAX_WIDTH__ #endif /* C99 7.18.3 Limits of other integer types. */ #define SIG_ATOMIC_MIN __INTN_MIN(__SIG_ATOMIC_WIDTH__) #define SIG_ATOMIC_MAX __INTN_MAX(__SIG_ATOMIC_WIDTH__) #ifdef __WINT_UNSIGNED__ # define WINT_MIN __UINTN_C(__WINT_WIDTH__, 0) # define WINT_MAX __UINTN_MAX(__WINT_WIDTH__) #else # define WINT_MIN __INTN_MIN(__WINT_WIDTH__) # define WINT_MAX __INTN_MAX(__WINT_WIDTH__) #endif #ifndef WCHAR_MAX # define WCHAR_MAX __WCHAR_MAX__ #endif #ifndef WCHAR_MIN # if __WCHAR_MAX__ == __INTN_MAX(__WCHAR_WIDTH__) # define WCHAR_MIN __INTN_MIN(__WCHAR_WIDTH__) # else # define WCHAR_MIN __UINTN_C(__WCHAR_WIDTH__, 0) # endif #endif /* 7.18.4.2 Macros for greatest-width integer constants. */ #define INTMAX_C(v) __int_c(v, __INTMAX_C_SUFFIX__) #define UINTMAX_C(v) __int_c(v, __UINTMAX_C_SUFFIX__) /* C23 7.22.3.x Width of other integer types. */ #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L #define PTRDIFF_WIDTH __PTRDIFF_WIDTH__ #define SIG_ATOMIC_WIDTH __SIG_ATOMIC_WIDTH__ #define SIZE_WIDTH __SIZE_WIDTH__ #define WCHAR_WIDTH __WCHAR_WIDTH__ #define WINT_WIDTH __WINT_WIDTH__ #endif #endif /* __STDC_HOSTED__ */ #endif /* __CLANG_STDINT_H */ tgmath.h/*===---- xsaveintrin.h - XSAVE intrinsic ----------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __XSAVEINTRIN_H #define __XSAVEINTRIN_H #ifdef _MSC_VER #define _XCR_XFEATURE_ENABLED_MASK 0 #endif /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("xsave"))) static __inline__ void __DEFAULT_FN_ATTRS _xsave(void *__p, unsigned long long __m) { __builtin_ia32_xsave(__p, __m); } static __inline__ void __DEFAULT_FN_ATTRS _xrstor(void *__p, unsigned long long __m) { __builtin_ia32_xrstor(__p, __m); } #ifndef _MSC_VER #define _xgetbv(A) __builtin_ia32_xgetbv((long long)(A)) #define _xsetbv(A, B) __builtin_ia32_xsetbv((unsigned int)(A), (unsigned long long)(B)) #else #ifdef __cplusplus extern "C" { #endif unsigned __int64 __cdecl _xgetbv(unsigned int); void __cdecl _xsetbv(unsigned int, unsigned __int64); #ifdef __cplusplus } #endif #endif /* _MSC_VER */ #ifdef __x86_64__ static __inline__ void __DEFAULT_FN_ATTRS _xsave64(void *__p, unsigned long long __m) { __builtin_ia32_xsave64(__p, __m); } static __inline__ void __DEFAULT_FN_ATTRS _xrstor64(void *__p, unsigned long long __m) { __builtin_ia32_xrstor64(__p, __m); } #endif #undef __DEFAULT_FN_ATTRS #endif xtestintrin.hxray/xray_log_interface.h is not a string.Error parsing '\D[:^alpha:][:punct:][:xdigit:]match! %dAdlamArmenianBatakOld_PermicOld_South_ArabianPahawh_HmongSundaneseread failed: errno=%dbad arena pointer in Next()}ilong__float128decltype(auto)+=mI[]NOT_FOUND, len = , cap = 0123456789ABCDEF0123456789abcdefUnlockSignalAll wakeupwaiting when shouldn't beexternal/abseil-cpp/absl/synchronization/internal/kernel_timeout.ccexternal/boringssl/src/crypto/fipsmodule/bn/exponentiation.cexternal/boringssl/src/crypto/fipsmodule/ec/felem.cDRBG-reseed KAT%lu:%s:%s:%d:%s PEM_LIBASN1_LIBCONFfunction should not have been calledexternal/boringssl/src/crypto/ecdsa_extra/ecdsa_asn1.ckythe.proto.CompilationUnit.Env.namekythe.proto.ReadRequest.edge_kindkythe.proto.VNameRewriteRule.pattern-static Check failed: source_file.vname.language().empty()==#elif__clang_cuda_intrinsics.h__clang_cuda_runtime_wrapper.h/*===---- arm_mve.h - ARM MVE intrinsics -----------------------------------=== * * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __ARM_MVE_H #define __ARM_MVE_H #if !__ARM_FEATURE_MVE #error "MVE support not enabled" #endif #include #ifdef __cplusplus extern "C" { #endif typedef uint16_t mve_pred16_t; typedef __attribute__((__neon_vector_type__(8), __clang_arm_mve_strict_polymorphism)) int16_t int16x8_t; typedef struct { int16x8_t val[2]; } int16x8x2_t; typedef struct { int16x8_t val[4]; } int16x8x4_t; typedef __attribute__((__neon_vector_type__(4), __clang_arm_mve_strict_polymorphism)) int32_t int32x4_t; typedef struct { int32x4_t val[2]; } int32x4x2_t; typedef struct { int32x4_t val[4]; } int32x4x4_t; typedef __attribute__((__neon_vector_type__(2), __clang_arm_mve_strict_polymorphism)) int64_t int64x2_t; typedef struct { int64x2_t val[2]; } int64x2x2_t; typedef struct { int64x2_t val[4]; } int64x2x4_t; typedef __attribute__((__neon_vector_type__(16), __clang_arm_mve_strict_polymorphism)) int8_t int8x16_t; typedef struct { int8x16_t val[2]; } int8x16x2_t; typedef struct { int8x16_t val[4]; } int8x16x4_t; typedef __attribute__((__neon_vector_type__(8), __clang_arm_mve_strict_polymorphism)) uint16_t uint16x8_t; typedef struct { uint16x8_t val[2]; } uint16x8x2_t; typedef struct { uint16x8_t val[4]; } uint16x8x4_t; typedef __attribute__((__neon_vector_type__(4), __clang_arm_mve_strict_polymorphism)) uint32_t uint32x4_t; typedef struct { uint32x4_t val[2]; } uint32x4x2_t; typedef struct { uint32x4_t val[4]; } uint32x4x4_t; typedef __attribute__((__neon_vector_type__(2), __clang_arm_mve_strict_polymorphism)) uint64_t uint64x2_t; typedef struct { uint64x2_t val[2]; } uint64x2x2_t; typedef struct { uint64x2_t val[4]; } uint64x2x4_t; typedef __attribute__((__neon_vector_type__(16), __clang_arm_mve_strict_polymorphism)) uint8_t uint8x16_t; typedef struct { uint8x16_t val[2]; } uint8x16x2_t; typedef struct { uint8x16_t val[4]; } uint8x16x4_t; static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_asrl))) int64_t __arm_asrl(int64_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_lsll))) uint64_t __arm_lsll(uint64_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_sqrshr))) int32_t __arm_sqrshr(int32_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_sqrshrl))) int64_t __arm_sqrshrl(int64_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_sqrshrl_sat48))) int64_t __arm_sqrshrl_sat48(int64_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_sqshl))) int32_t __arm_sqshl(int32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_sqshll))) int64_t __arm_sqshll(int64_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_srshr))) int32_t __arm_srshr(int32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_srshrl))) int64_t __arm_srshrl(int64_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_uqrshl))) uint32_t __arm_uqrshl(uint32_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_uqrshll))) uint64_t __arm_uqrshll(uint64_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_uqrshll_sat48))) uint64_t __arm_uqrshll_sat48(uint64_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_uqshl))) uint32_t __arm_uqshl(uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_uqshll))) uint64_t __arm_uqshll(uint64_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_urshr))) uint32_t __arm_urshr(uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_urshrl))) uint64_t __arm_urshrl(uint64_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabavq_p_s16))) uint32_t __arm_vabavq_p_s16(uint32_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabavq_p_s16))) uint32_t __arm_vabavq_p(uint32_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabavq_p_s32))) uint32_t __arm_vabavq_p_s32(uint32_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabavq_p_s32))) uint32_t __arm_vabavq_p(uint32_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabavq_p_s8))) uint32_t __arm_vabavq_p_s8(uint32_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabavq_p_s8))) uint32_t __arm_vabavq_p(uint32_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabavq_p_u16))) uint32_t __arm_vabavq_p_u16(uint32_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabavq_p_u16))) uint32_t __arm_vabavq_p(uint32_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabavq_p_u32))) uint32_t __arm_vabavq_p_u32(uint32_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabavq_p_u32))) uint32_t __arm_vabavq_p(uint32_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabavq_p_u8))) uint32_t __arm_vabavq_p_u8(uint32_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabavq_p_u8))) uint32_t __arm_vabavq_p(uint32_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabavq_s16))) uint32_t __arm_vabavq_s16(uint32_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabavq_s16))) uint32_t __arm_vabavq(uint32_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabavq_s32))) uint32_t __arm_vabavq_s32(uint32_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabavq_s32))) uint32_t __arm_vabavq(uint32_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabavq_s8))) uint32_t __arm_vabavq_s8(uint32_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabavq_s8))) uint32_t __arm_vabavq(uint32_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabavq_u16))) uint32_t __arm_vabavq_u16(uint32_t, uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabavq_u16))) uint32_t __arm_vabavq(uint32_t, uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabavq_u32))) uint32_t __arm_vabavq_u32(uint32_t, uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabavq_u32))) uint32_t __arm_vabavq(uint32_t, uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabavq_u8))) uint32_t __arm_vabavq_u8(uint32_t, uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabavq_u8))) uint32_t __arm_vabavq(uint32_t, uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_m_s16))) int16x8_t __arm_vabdq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_m_s16))) int16x8_t __arm_vabdq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_m_s32))) int32x4_t __arm_vabdq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_m_s32))) int32x4_t __arm_vabdq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_m_s8))) int8x16_t __arm_vabdq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_m_s8))) int8x16_t __arm_vabdq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_m_u16))) uint16x8_t __arm_vabdq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_m_u16))) uint16x8_t __arm_vabdq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_m_u32))) uint32x4_t __arm_vabdq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_m_u32))) uint32x4_t __arm_vabdq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_m_u8))) uint8x16_t __arm_vabdq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_m_u8))) uint8x16_t __arm_vabdq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_s16))) int16x8_t __arm_vabdq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_s16))) int16x8_t __arm_vabdq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_s32))) int32x4_t __arm_vabdq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_s32))) int32x4_t __arm_vabdq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_s8))) int8x16_t __arm_vabdq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_s8))) int8x16_t __arm_vabdq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_u16))) uint16x8_t __arm_vabdq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_u16))) uint16x8_t __arm_vabdq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_u32))) uint32x4_t __arm_vabdq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_u32))) uint32x4_t __arm_vabdq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_u8))) uint8x16_t __arm_vabdq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_u8))) uint8x16_t __arm_vabdq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_x_s16))) int16x8_t __arm_vabdq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_x_s16))) int16x8_t __arm_vabdq_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_x_s32))) int32x4_t __arm_vabdq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_x_s32))) int32x4_t __arm_vabdq_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_x_s8))) int8x16_t __arm_vabdq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_x_s8))) int8x16_t __arm_vabdq_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_x_u16))) uint16x8_t __arm_vabdq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_x_u16))) uint16x8_t __arm_vabdq_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_x_u32))) uint32x4_t __arm_vabdq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_x_u32))) uint32x4_t __arm_vabdq_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_x_u8))) uint8x16_t __arm_vabdq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_x_u8))) uint8x16_t __arm_vabdq_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabsq_m_s16))) int16x8_t __arm_vabsq_m_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabsq_m_s16))) int16x8_t __arm_vabsq_m(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabsq_m_s32))) int32x4_t __arm_vabsq_m_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabsq_m_s32))) int32x4_t __arm_vabsq_m(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabsq_m_s8))) int8x16_t __arm_vabsq_m_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabsq_m_s8))) int8x16_t __arm_vabsq_m(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabsq_s16))) int16x8_t __arm_vabsq_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabsq_s16))) int16x8_t __arm_vabsq(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabsq_s32))) int32x4_t __arm_vabsq_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabsq_s32))) int32x4_t __arm_vabsq(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabsq_s8))) int8x16_t __arm_vabsq_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabsq_s8))) int8x16_t __arm_vabsq(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabsq_x_s16))) int16x8_t __arm_vabsq_x_s16(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabsq_x_s16))) int16x8_t __arm_vabsq_x(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabsq_x_s32))) int32x4_t __arm_vabsq_x_s32(int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabsq_x_s32))) int32x4_t __arm_vabsq_x(int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabsq_x_s8))) int8x16_t __arm_vabsq_x_s8(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabsq_x_s8))) int8x16_t __arm_vabsq_x(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vadciq_m_s32))) int32x4_t __arm_vadciq_m_s32(int32x4_t, int32x4_t, int32x4_t, unsigned *, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vadciq_m_s32))) int32x4_t __arm_vadciq_m(int32x4_t, int32x4_t, int32x4_t, unsigned *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vadciq_m_u32))) uint32x4_t __arm_vadciq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, unsigned *, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vadciq_m_u32))) uint32x4_t __arm_vadciq_m(uint32x4_t, uint32x4_t, uint32x4_t, unsigned *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vadciq_s32))) int32x4_t __arm_vadciq_s32(int32x4_t, int32x4_t, unsigned *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vadciq_s32))) int32x4_t __arm_vadciq(int32x4_t, int32x4_t, unsigned *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vadciq_u32))) uint32x4_t __arm_vadciq_u32(uint32x4_t, uint32x4_t, unsigned *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vadciq_u32))) uint32x4_t __arm_vadciq(uint32x4_t, uint32x4_t, unsigned *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vadcq_m_s32))) int32x4_t __arm_vadcq_m_s32(int32x4_t, int32x4_t, int32x4_t, unsigned *, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vadcq_m_s32))) int32x4_t __arm_vadcq_m(int32x4_t, int32x4_t, int32x4_t, unsigned *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vadcq_m_u32))) uint32x4_t __arm_vadcq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, unsigned *, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vadcq_m_u32))) uint32x4_t __arm_vadcq_m(uint32x4_t, uint32x4_t, uint32x4_t, unsigned *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vadcq_s32))) int32x4_t __arm_vadcq_s32(int32x4_t, int32x4_t, unsigned *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vadcq_s32))) int32x4_t __arm_vadcq(int32x4_t, int32x4_t, unsigned *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vadcq_u32))) uint32x4_t __arm_vadcq_u32(uint32x4_t, uint32x4_t, unsigned *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vadcq_u32))) uint32x4_t __arm_vadcq(uint32x4_t, uint32x4_t, unsigned *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddlvaq_p_s32))) int64_t __arm_vaddlvaq_p_s32(int64_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddlvaq_p_s32))) int64_t __arm_vaddlvaq_p(int64_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddlvaq_p_u32))) uint64_t __arm_vaddlvaq_p_u32(uint64_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddlvaq_p_u32))) uint64_t __arm_vaddlvaq_p(uint64_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddlvaq_s32))) int64_t __arm_vaddlvaq_s32(int64_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddlvaq_s32))) int64_t __arm_vaddlvaq(int64_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddlvaq_u32))) uint64_t __arm_vaddlvaq_u32(uint64_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddlvaq_u32))) uint64_t __arm_vaddlvaq(uint64_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddlvq_p_s32))) int64_t __arm_vaddlvq_p_s32(int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddlvq_p_s32))) int64_t __arm_vaddlvq_p(int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddlvq_p_u32))) uint64_t __arm_vaddlvq_p_u32(uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddlvq_p_u32))) uint64_t __arm_vaddlvq_p(uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddlvq_s32))) int64_t __arm_vaddlvq_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddlvq_s32))) int64_t __arm_vaddlvq(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddlvq_u32))) uint64_t __arm_vaddlvq_u32(uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddlvq_u32))) uint64_t __arm_vaddlvq(uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_n_s16))) int16x8_t __arm_vaddq_m_n_s16(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_n_s16))) int16x8_t __arm_vaddq_m(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_n_s32))) int32x4_t __arm_vaddq_m_n_s32(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_n_s32))) int32x4_t __arm_vaddq_m(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_n_s8))) int8x16_t __arm_vaddq_m_n_s8(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_n_s8))) int8x16_t __arm_vaddq_m(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_n_u16))) uint16x8_t __arm_vaddq_m_n_u16(uint16x8_t, uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_n_u16))) uint16x8_t __arm_vaddq_m(uint16x8_t, uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_n_u32))) uint32x4_t __arm_vaddq_m_n_u32(uint32x4_t, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_n_u32))) uint32x4_t __arm_vaddq_m(uint32x4_t, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_n_u8))) uint8x16_t __arm_vaddq_m_n_u8(uint8x16_t, uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_n_u8))) uint8x16_t __arm_vaddq_m(uint8x16_t, uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_s16))) int16x8_t __arm_vaddq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_s16))) int16x8_t __arm_vaddq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_s32))) int32x4_t __arm_vaddq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_s32))) int32x4_t __arm_vaddq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_s8))) int8x16_t __arm_vaddq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_s8))) int8x16_t __arm_vaddq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_u16))) uint16x8_t __arm_vaddq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_u16))) uint16x8_t __arm_vaddq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_u32))) uint32x4_t __arm_vaddq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_u32))) uint32x4_t __arm_vaddq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_u8))) uint8x16_t __arm_vaddq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_u8))) uint8x16_t __arm_vaddq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_n_s16))) int16x8_t __arm_vaddq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_n_s16))) int16x8_t __arm_vaddq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_n_s32))) int32x4_t __arm_vaddq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_n_s32))) int32x4_t __arm_vaddq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_n_s8))) int8x16_t __arm_vaddq_n_s8(int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_n_s8))) int8x16_t __arm_vaddq(int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_n_u16))) uint16x8_t __arm_vaddq_n_u16(uint16x8_t, uint16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_n_u16))) uint16x8_t __arm_vaddq(uint16x8_t, uint16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_n_u32))) uint32x4_t __arm_vaddq_n_u32(uint32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_n_u32))) uint32x4_t __arm_vaddq(uint32x4_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_n_u8))) uint8x16_t __arm_vaddq_n_u8(uint8x16_t, uint8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_n_u8))) uint8x16_t __arm_vaddq(uint8x16_t, uint8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_s16))) int16x8_t __arm_vaddq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_s16))) int16x8_t __arm_vaddq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_s32))) int32x4_t __arm_vaddq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_s32))) int32x4_t __arm_vaddq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_s8))) int8x16_t __arm_vaddq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_s8))) int8x16_t __arm_vaddq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_u16))) uint16x8_t __arm_vaddq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_u16))) uint16x8_t __arm_vaddq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_u32))) uint32x4_t __arm_vaddq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_u32))) uint32x4_t __arm_vaddq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_u8))) uint8x16_t __arm_vaddq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_u8))) uint8x16_t __arm_vaddq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_n_s16))) int16x8_t __arm_vaddq_x_n_s16(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_n_s16))) int16x8_t __arm_vaddq_x(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_n_s32))) int32x4_t __arm_vaddq_x_n_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_n_s32))) int32x4_t __arm_vaddq_x(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_n_s8))) int8x16_t __arm_vaddq_x_n_s8(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_n_s8))) int8x16_t __arm_vaddq_x(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_n_u16))) uint16x8_t __arm_vaddq_x_n_u16(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_n_u16))) uint16x8_t __arm_vaddq_x(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_n_u32))) uint32x4_t __arm_vaddq_x_n_u32(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_n_u32))) uint32x4_t __arm_vaddq_x(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_n_u8))) uint8x16_t __arm_vaddq_x_n_u8(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_n_u8))) uint8x16_t __arm_vaddq_x(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_s16))) int16x8_t __arm_vaddq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_s16))) int16x8_t __arm_vaddq_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_s32))) int32x4_t __arm_vaddq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_s32))) int32x4_t __arm_vaddq_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_s8))) int8x16_t __arm_vaddq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_s8))) int8x16_t __arm_vaddq_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_u16))) uint16x8_t __arm_vaddq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_u16))) uint16x8_t __arm_vaddq_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_u32))) uint32x4_t __arm_vaddq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_u32))) uint32x4_t __arm_vaddq_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_u8))) uint8x16_t __arm_vaddq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_u8))) uint8x16_t __arm_vaddq_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_p_s16))) int32_t __arm_vaddvaq_p_s16(int32_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_p_s16))) int32_t __arm_vaddvaq_p(int32_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_p_s32))) int32_t __arm_vaddvaq_p_s32(int32_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_p_s32))) int32_t __arm_vaddvaq_p(int32_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_p_s8))) int32_t __arm_vaddvaq_p_s8(int32_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_p_s8))) int32_t __arm_vaddvaq_p(int32_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_p_u16))) uint32_t __arm_vaddvaq_p_u16(uint32_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_p_u16))) uint32_t __arm_vaddvaq_p(uint32_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_p_u32))) uint32_t __arm_vaddvaq_p_u32(uint32_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_p_u32))) uint32_t __arm_vaddvaq_p(uint32_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_p_u8))) uint32_t __arm_vaddvaq_p_u8(uint32_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_p_u8))) uint32_t __arm_vaddvaq_p(uint32_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_s16))) int32_t __arm_vaddvaq_s16(int32_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_s16))) int32_t __arm_vaddvaq(int32_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_s32))) int32_t __arm_vaddvaq_s32(int32_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_s32))) int32_t __arm_vaddvaq(int32_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_s8))) int32_t __arm_vaddvaq_s8(int32_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_s8))) int32_t __arm_vaddvaq(int32_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_u16))) uint32_t __arm_vaddvaq_u16(uint32_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_u16))) uint32_t __arm_vaddvaq(uint32_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_u32))) uint32_t __arm_vaddvaq_u32(uint32_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_u32))) uint32_t __arm_vaddvaq(uint32_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_u8))) uint32_t __arm_vaddvaq_u8(uint32_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_u8))) uint32_t __arm_vaddvaq(uint32_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_p_s16))) int32_t __arm_vaddvq_p_s16(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_p_s16))) int32_t __arm_vaddvq_p(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_p_s32))) int32_t __arm_vaddvq_p_s32(int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_p_s32))) int32_t __arm_vaddvq_p(int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_p_s8))) int32_t __arm_vaddvq_p_s8(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_p_s8))) int32_t __arm_vaddvq_p(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_p_u16))) uint32_t __arm_vaddvq_p_u16(uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_p_u16))) uint32_t __arm_vaddvq_p(uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_p_u32))) uint32_t __arm_vaddvq_p_u32(uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_p_u32))) uint32_t __arm_vaddvq_p(uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_p_u8))) uint32_t __arm_vaddvq_p_u8(uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_p_u8))) uint32_t __arm_vaddvq_p(uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_s16))) int32_t __arm_vaddvq_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_s16))) int32_t __arm_vaddvq(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_s32))) int32_t __arm_vaddvq_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_s32))) int32_t __arm_vaddvq(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_s8))) int32_t __arm_vaddvq_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_s8))) int32_t __arm_vaddvq(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_u16))) uint32_t __arm_vaddvq_u16(uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_u16))) uint32_t __arm_vaddvq(uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_u32))) uint32_t __arm_vaddvq_u32(uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_u32))) uint32_t __arm_vaddvq(uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_u8))) uint32_t __arm_vaddvq_u8(uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_u8))) uint32_t __arm_vaddvq(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_m_s16))) int16x8_t __arm_vandq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_m_s16))) int16x8_t __arm_vandq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_m_s32))) int32x4_t __arm_vandq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_m_s32))) int32x4_t __arm_vandq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_m_s8))) int8x16_t __arm_vandq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_m_s8))) int8x16_t __arm_vandq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_m_u16))) uint16x8_t __arm_vandq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_m_u16))) uint16x8_t __arm_vandq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_m_u32))) uint32x4_t __arm_vandq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_m_u32))) uint32x4_t __arm_vandq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_m_u8))) uint8x16_t __arm_vandq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_m_u8))) uint8x16_t __arm_vandq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_s16))) int16x8_t __arm_vandq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_s16))) int16x8_t __arm_vandq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_s32))) int32x4_t __arm_vandq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_s32))) int32x4_t __arm_vandq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_s8))) int8x16_t __arm_vandq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_s8))) int8x16_t __arm_vandq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_u16))) uint16x8_t __arm_vandq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_u16))) uint16x8_t __arm_vandq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_u32))) uint32x4_t __arm_vandq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_u32))) uint32x4_t __arm_vandq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_u8))) uint8x16_t __arm_vandq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_u8))) uint8x16_t __arm_vandq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_x_s16))) int16x8_t __arm_vandq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_x_s16))) int16x8_t __arm_vandq_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_x_s32))) int32x4_t __arm_vandq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_x_s32))) int32x4_t __arm_vandq_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_x_s8))) int8x16_t __arm_vandq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_x_s8))) int8x16_t __arm_vandq_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_x_u16))) uint16x8_t __arm_vandq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_x_u16))) uint16x8_t __arm_vandq_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_x_u32))) uint32x4_t __arm_vandq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_x_u32))) uint32x4_t __arm_vandq_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_x_u8))) uint8x16_t __arm_vandq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_x_u8))) uint8x16_t __arm_vandq_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_n_s16))) int16x8_t __arm_vbicq_m_n_s16(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_n_s16))) int16x8_t __arm_vbicq_m_n(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_n_s32))) int32x4_t __arm_vbicq_m_n_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_n_s32))) int32x4_t __arm_vbicq_m_n(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_n_u16))) uint16x8_t __arm_vbicq_m_n_u16(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_n_u16))) uint16x8_t __arm_vbicq_m_n(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_n_u32))) uint32x4_t __arm_vbicq_m_n_u32(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_n_u32))) uint32x4_t __arm_vbicq_m_n(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_s16))) int16x8_t __arm_vbicq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_s16))) int16x8_t __arm_vbicq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_s32))) int32x4_t __arm_vbicq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_s32))) int32x4_t __arm_vbicq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_s8))) int8x16_t __arm_vbicq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_s8))) int8x16_t __arm_vbicq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_u16))) uint16x8_t __arm_vbicq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_u16))) uint16x8_t __arm_vbicq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_u32))) uint32x4_t __arm_vbicq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_u32))) uint32x4_t __arm_vbicq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_u8))) uint8x16_t __arm_vbicq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_u8))) uint8x16_t __arm_vbicq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_n_s16))) int16x8_t __arm_vbicq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_n_s16))) int16x8_t __arm_vbicq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_n_s32))) int32x4_t __arm_vbicq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_n_s32))) int32x4_t __arm_vbicq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_n_u16))) uint16x8_t __arm_vbicq_n_u16(uint16x8_t, uint16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_n_u16))) uint16x8_t __arm_vbicq(uint16x8_t, uint16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_n_u32))) uint32x4_t __arm_vbicq_n_u32(uint32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_n_u32))) uint32x4_t __arm_vbicq(uint32x4_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_s16))) int16x8_t __arm_vbicq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_s16))) int16x8_t __arm_vbicq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_s32))) int32x4_t __arm_vbicq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_s32))) int32x4_t __arm_vbicq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_s8))) int8x16_t __arm_vbicq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_s8))) int8x16_t __arm_vbicq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_u16))) uint16x8_t __arm_vbicq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_u16))) uint16x8_t __arm_vbicq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_u32))) uint32x4_t __arm_vbicq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_u32))) uint32x4_t __arm_vbicq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_u8))) uint8x16_t __arm_vbicq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_u8))) uint8x16_t __arm_vbicq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_x_s16))) int16x8_t __arm_vbicq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_x_s16))) int16x8_t __arm_vbicq_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_x_s32))) int32x4_t __arm_vbicq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_x_s32))) int32x4_t __arm_vbicq_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_x_s8))) int8x16_t __arm_vbicq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_x_s8))) int8x16_t __arm_vbicq_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_x_u16))) uint16x8_t __arm_vbicq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_x_u16))) uint16x8_t __arm_vbicq_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_x_u32))) uint32x4_t __arm_vbicq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_x_u32))) uint32x4_t __arm_vbicq_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_x_u8))) uint8x16_t __arm_vbicq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_x_u8))) uint8x16_t __arm_vbicq_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_m_n_s16))) int16x8_t __arm_vbrsrq_m_n_s16(int16x8_t, int16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_m_n_s16))) int16x8_t __arm_vbrsrq_m(int16x8_t, int16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_m_n_s32))) int32x4_t __arm_vbrsrq_m_n_s32(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_m_n_s32))) int32x4_t __arm_vbrsrq_m(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_m_n_s8))) int8x16_t __arm_vbrsrq_m_n_s8(int8x16_t, int8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_m_n_s8))) int8x16_t __arm_vbrsrq_m(int8x16_t, int8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_m_n_u16))) uint16x8_t __arm_vbrsrq_m_n_u16(uint16x8_t, uint16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_m_n_u16))) uint16x8_t __arm_vbrsrq_m(uint16x8_t, uint16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_m_n_u32))) uint32x4_t __arm_vbrsrq_m_n_u32(uint32x4_t, uint32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_m_n_u32))) uint32x4_t __arm_vbrsrq_m(uint32x4_t, uint32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_m_n_u8))) uint8x16_t __arm_vbrsrq_m_n_u8(uint8x16_t, uint8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_m_n_u8))) uint8x16_t __arm_vbrsrq_m(uint8x16_t, uint8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_n_s16))) int16x8_t __arm_vbrsrq_n_s16(int16x8_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_n_s16))) int16x8_t __arm_vbrsrq(int16x8_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_n_s32))) int32x4_t __arm_vbrsrq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_n_s32))) int32x4_t __arm_vbrsrq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_n_s8))) int8x16_t __arm_vbrsrq_n_s8(int8x16_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_n_s8))) int8x16_t __arm_vbrsrq(int8x16_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_n_u16))) uint16x8_t __arm_vbrsrq_n_u16(uint16x8_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_n_u16))) uint16x8_t __arm_vbrsrq(uint16x8_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_n_u32))) uint32x4_t __arm_vbrsrq_n_u32(uint32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_n_u32))) uint32x4_t __arm_vbrsrq(uint32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_n_u8))) uint8x16_t __arm_vbrsrq_n_u8(uint8x16_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_n_u8))) uint8x16_t __arm_vbrsrq(uint8x16_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_x_n_s16))) int16x8_t __arm_vbrsrq_x_n_s16(int16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_x_n_s16))) int16x8_t __arm_vbrsrq_x(int16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_x_n_s32))) int32x4_t __arm_vbrsrq_x_n_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_x_n_s32))) int32x4_t __arm_vbrsrq_x(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_x_n_s8))) int8x16_t __arm_vbrsrq_x_n_s8(int8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_x_n_s8))) int8x16_t __arm_vbrsrq_x(int8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_x_n_u16))) uint16x8_t __arm_vbrsrq_x_n_u16(uint16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_x_n_u16))) uint16x8_t __arm_vbrsrq_x(uint16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_x_n_u32))) uint32x4_t __arm_vbrsrq_x_n_u32(uint32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_x_n_u32))) uint32x4_t __arm_vbrsrq_x(uint32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_x_n_u8))) uint8x16_t __arm_vbrsrq_x_n_u8(uint8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_x_n_u8))) uint8x16_t __arm_vbrsrq_x(uint8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_m_s16))) int16x8_t __arm_vcaddq_rot270_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_m_s16))) int16x8_t __arm_vcaddq_rot270_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_m_s32))) int32x4_t __arm_vcaddq_rot270_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_m_s32))) int32x4_t __arm_vcaddq_rot270_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_m_s8))) int8x16_t __arm_vcaddq_rot270_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_m_s8))) int8x16_t __arm_vcaddq_rot270_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_m_u16))) uint16x8_t __arm_vcaddq_rot270_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_m_u16))) uint16x8_t __arm_vcaddq_rot270_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_m_u32))) uint32x4_t __arm_vcaddq_rot270_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_m_u32))) uint32x4_t __arm_vcaddq_rot270_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_m_u8))) uint8x16_t __arm_vcaddq_rot270_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_m_u8))) uint8x16_t __arm_vcaddq_rot270_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_s16))) int16x8_t __arm_vcaddq_rot270_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_s16))) int16x8_t __arm_vcaddq_rot270(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_s32))) int32x4_t __arm_vcaddq_rot270_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_s32))) int32x4_t __arm_vcaddq_rot270(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_s8))) int8x16_t __arm_vcaddq_rot270_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_s8))) int8x16_t __arm_vcaddq_rot270(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_u16))) uint16x8_t __arm_vcaddq_rot270_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_u16))) uint16x8_t __arm_vcaddq_rot270(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_u32))) uint32x4_t __arm_vcaddq_rot270_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_u32))) uint32x4_t __arm_vcaddq_rot270(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_u8))) uint8x16_t __arm_vcaddq_rot270_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_u8))) uint8x16_t __arm_vcaddq_rot270(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_x_s16))) int16x8_t __arm_vcaddq_rot270_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_x_s16))) int16x8_t __arm_vcaddq_rot270_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_x_s32))) int32x4_t __arm_vcaddq_rot270_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_x_s32))) int32x4_t __arm_vcaddq_rot270_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_x_s8))) int8x16_t __arm_vcaddq_rot270_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_x_s8))) int8x16_t __arm_vcaddq_rot270_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_x_u16))) uint16x8_t __arm_vcaddq_rot270_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_x_u16))) uint16x8_t __arm_vcaddq_rot270_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_x_u32))) uint32x4_t __arm_vcaddq_rot270_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_x_u32))) uint32x4_t __arm_vcaddq_rot270_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_x_u8))) uint8x16_t __arm_vcaddq_rot270_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_x_u8))) uint8x16_t __arm_vcaddq_rot270_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_m_s16))) int16x8_t __arm_vcaddq_rot90_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_m_s16))) int16x8_t __arm_vcaddq_rot90_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_m_s32))) int32x4_t __arm_vcaddq_rot90_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_m_s32))) int32x4_t __arm_vcaddq_rot90_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_m_s8))) int8x16_t __arm_vcaddq_rot90_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_m_s8))) int8x16_t __arm_vcaddq_rot90_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_m_u16))) uint16x8_t __arm_vcaddq_rot90_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_m_u16))) uint16x8_t __arm_vcaddq_rot90_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_m_u32))) uint32x4_t __arm_vcaddq_rot90_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_m_u32))) uint32x4_t __arm_vcaddq_rot90_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_m_u8))) uint8x16_t __arm_vcaddq_rot90_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_m_u8))) uint8x16_t __arm_vcaddq_rot90_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_s16))) int16x8_t __arm_vcaddq_rot90_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_s16))) int16x8_t __arm_vcaddq_rot90(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_s32))) int32x4_t __arm_vcaddq_rot90_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_s32))) int32x4_t __arm_vcaddq_rot90(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_s8))) int8x16_t __arm_vcaddq_rot90_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_s8))) int8x16_t __arm_vcaddq_rot90(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_u16))) uint16x8_t __arm_vcaddq_rot90_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_u16))) uint16x8_t __arm_vcaddq_rot90(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_u32))) uint32x4_t __arm_vcaddq_rot90_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_u32))) uint32x4_t __arm_vcaddq_rot90(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_u8))) uint8x16_t __arm_vcaddq_rot90_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_u8))) uint8x16_t __arm_vcaddq_rot90(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_x_s16))) int16x8_t __arm_vcaddq_rot90_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_x_s16))) int16x8_t __arm_vcaddq_rot90_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_x_s32))) int32x4_t __arm_vcaddq_rot90_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_x_s32))) int32x4_t __arm_vcaddq_rot90_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_x_s8))) int8x16_t __arm_vcaddq_rot90_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_x_s8))) int8x16_t __arm_vcaddq_rot90_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_x_u16))) uint16x8_t __arm_vcaddq_rot90_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_x_u16))) uint16x8_t __arm_vcaddq_rot90_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_x_u32))) uint32x4_t __arm_vcaddq_rot90_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_x_u32))) uint32x4_t __arm_vcaddq_rot90_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_x_u8))) uint8x16_t __arm_vcaddq_rot90_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_x_u8))) uint8x16_t __arm_vcaddq_rot90_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclsq_m_s16))) int16x8_t __arm_vclsq_m_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclsq_m_s16))) int16x8_t __arm_vclsq_m(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclsq_m_s32))) int32x4_t __arm_vclsq_m_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclsq_m_s32))) int32x4_t __arm_vclsq_m(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclsq_m_s8))) int8x16_t __arm_vclsq_m_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclsq_m_s8))) int8x16_t __arm_vclsq_m(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclsq_s16))) int16x8_t __arm_vclsq_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclsq_s16))) int16x8_t __arm_vclsq(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclsq_s32))) int32x4_t __arm_vclsq_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclsq_s32))) int32x4_t __arm_vclsq(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclsq_s8))) int8x16_t __arm_vclsq_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclsq_s8))) int8x16_t __arm_vclsq(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclsq_x_s16))) int16x8_t __arm_vclsq_x_s16(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclsq_x_s16))) int16x8_t __arm_vclsq_x(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclsq_x_s32))) int32x4_t __arm_vclsq_x_s32(int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclsq_x_s32))) int32x4_t __arm_vclsq_x(int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclsq_x_s8))) int8x16_t __arm_vclsq_x_s8(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclsq_x_s8))) int8x16_t __arm_vclsq_x(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclzq_m_s16))) int16x8_t __arm_vclzq_m_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclzq_m_s16))) int16x8_t __arm_vclzq_m(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclzq_m_s32))) int32x4_t __arm_vclzq_m_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclzq_m_s32))) int32x4_t __arm_vclzq_m(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclzq_m_s8))) int8x16_t __arm_vclzq_m_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclzq_m_s8))) int8x16_t __arm_vclzq_m(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclzq_m_u16))) uint16x8_t __arm_vclzq_m_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclzq_m_u16))) uint16x8_t __arm_vclzq_m(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclzq_m_u32))) uint32x4_t __arm_vclzq_m_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclzq_m_u32))) uint32x4_t __arm_vclzq_m(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclzq_m_u8))) uint8x16_t __arm_vclzq_m_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclzq_m_u8))) uint8x16_t __arm_vclzq_m(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclzq_s16))) int16x8_t __arm_vclzq_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclzq_s16))) int16x8_t __arm_vclzq(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclzq_s32))) int32x4_t __arm_vclzq_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclzq_s32))) int32x4_t __arm_vclzq(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclzq_s8))) int8x16_t __arm_vclzq_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclzq_s8))) int8x16_t __arm_vclzq(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclzq_u16))) uint16x8_t __arm_vclzq_u16(uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclzq_u16))) uint16x8_t __arm_vclzq(uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclzq_u32))) uint32x4_t __arm_vclzq_u32(uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclzq_u32))) uint32x4_t __arm_vclzq(uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclzq_u8))) uint8x16_t __arm_vclzq_u8(uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclzq_u8))) uint8x16_t __arm_vclzq(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclzq_x_s16))) int16x8_t __arm_vclzq_x_s16(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclzq_x_s16))) int16x8_t __arm_vclzq_x(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclzq_x_s32))) int32x4_t __arm_vclzq_x_s32(int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclzq_x_s32))) int32x4_t __arm_vclzq_x(int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclzq_x_s8))) int8x16_t __arm_vclzq_x_s8(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclzq_x_s8))) int8x16_t __arm_vclzq_x(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclzq_x_u16))) uint16x8_t __arm_vclzq_x_u16(uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclzq_x_u16))) uint16x8_t __arm_vclzq_x(uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclzq_x_u32))) uint32x4_t __arm_vclzq_x_u32(uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclzq_x_u32))) uint32x4_t __arm_vclzq_x(uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclzq_x_u8))) uint8x16_t __arm_vclzq_x_u8(uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclzq_x_u8))) uint8x16_t __arm_vclzq_x(uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_m_n_u16))) mve_pred16_t __arm_vcmpcsq_m_n_u16(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_m_n_u16))) mve_pred16_t __arm_vcmpcsq_m(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_m_n_u32))) mve_pred16_t __arm_vcmpcsq_m_n_u32(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_m_n_u32))) mve_pred16_t __arm_vcmpcsq_m(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_m_n_u8))) mve_pred16_t __arm_vcmpcsq_m_n_u8(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_m_n_u8))) mve_pred16_t __arm_vcmpcsq_m(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_m_u16))) mve_pred16_t __arm_vcmpcsq_m_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_m_u16))) mve_pred16_t __arm_vcmpcsq_m(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_m_u32))) mve_pred16_t __arm_vcmpcsq_m_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_m_u32))) mve_pred16_t __arm_vcmpcsq_m(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_m_u8))) mve_pred16_t __arm_vcmpcsq_m_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_m_u8))) mve_pred16_t __arm_vcmpcsq_m(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_n_u16))) mve_pred16_t __arm_vcmpcsq_n_u16(uint16x8_t, uint16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_n_u16))) mve_pred16_t __arm_vcmpcsq(uint16x8_t, uint16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_n_u32))) mve_pred16_t __arm_vcmpcsq_n_u32(uint32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_n_u32))) mve_pred16_t __arm_vcmpcsq(uint32x4_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_n_u8))) mve_pred16_t __arm_vcmpcsq_n_u8(uint8x16_t, uint8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_n_u8))) mve_pred16_t __arm_vcmpcsq(uint8x16_t, uint8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_u16))) mve_pred16_t __arm_vcmpcsq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_u16))) mve_pred16_t __arm_vcmpcsq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_u32))) mve_pred16_t __arm_vcmpcsq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_u32))) mve_pred16_t __arm_vcmpcsq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_u8))) mve_pred16_t __arm_vcmpcsq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_u8))) mve_pred16_t __arm_vcmpcsq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_n_s16))) mve_pred16_t __arm_vcmpeqq_m_n_s16(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_n_s16))) mve_pred16_t __arm_vcmpeqq_m(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_n_s32))) mve_pred16_t __arm_vcmpeqq_m_n_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_n_s32))) mve_pred16_t __arm_vcmpeqq_m(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_n_s8))) mve_pred16_t __arm_vcmpeqq_m_n_s8(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_n_s8))) mve_pred16_t __arm_vcmpeqq_m(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_n_u16))) mve_pred16_t __arm_vcmpeqq_m_n_u16(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_n_u16))) mve_pred16_t __arm_vcmpeqq_m(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_n_u32))) mve_pred16_t __arm_vcmpeqq_m_n_u32(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_n_u32))) mve_pred16_t __arm_vcmpeqq_m(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_n_u8))) mve_pred16_t __arm_vcmpeqq_m_n_u8(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_n_u8))) mve_pred16_t __arm_vcmpeqq_m(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_s16))) mve_pred16_t __arm_vcmpeqq_m_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_s16))) mve_pred16_t __arm_vcmpeqq_m(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_s32))) mve_pred16_t __arm_vcmpeqq_m_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_s32))) mve_pred16_t __arm_vcmpeqq_m(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_s8))) mve_pred16_t __arm_vcmpeqq_m_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_s8))) mve_pred16_t __arm_vcmpeqq_m(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_u16))) mve_pred16_t __arm_vcmpeqq_m_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_u16))) mve_pred16_t __arm_vcmpeqq_m(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_u32))) mve_pred16_t __arm_vcmpeqq_m_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_u32))) mve_pred16_t __arm_vcmpeqq_m(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_u8))) mve_pred16_t __arm_vcmpeqq_m_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_u8))) mve_pred16_t __arm_vcmpeqq_m(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_n_s16))) mve_pred16_t __arm_vcmpeqq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_n_s16))) mve_pred16_t __arm_vcmpeqq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_n_s32))) mve_pred16_t __arm_vcmpeqq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_n_s32))) mve_pred16_t __arm_vcmpeqq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_n_s8))) mve_pred16_t __arm_vcmpeqq_n_s8(int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_n_s8))) mve_pred16_t __arm_vcmpeqq(int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_n_u16))) mve_pred16_t __arm_vcmpeqq_n_u16(uint16x8_t, uint16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_n_u16))) mve_pred16_t __arm_vcmpeqq(uint16x8_t, uint16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_n_u32))) mve_pred16_t __arm_vcmpeqq_n_u32(uint32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_n_u32))) mve_pred16_t __arm_vcmpeqq(uint32x4_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_n_u8))) mve_pred16_t __arm_vcmpeqq_n_u8(uint8x16_t, uint8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_n_u8))) mve_pred16_t __arm_vcmpeqq(uint8x16_t, uint8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_s16))) mve_pred16_t __arm_vcmpeqq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_s16))) mve_pred16_t __arm_vcmpeqq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_s32))) mve_pred16_t __arm_vcmpeqq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_s32))) mve_pred16_t __arm_vcmpeqq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_s8))) mve_pred16_t __arm_vcmpeqq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_s8))) mve_pred16_t __arm_vcmpeqq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_u16))) mve_pred16_t __arm_vcmpeqq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_u16))) mve_pred16_t __arm_vcmpeqq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_u32))) mve_pred16_t __arm_vcmpeqq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_u32))) mve_pred16_t __arm_vcmpeqq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_u8))) mve_pred16_t __arm_vcmpeqq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_u8))) mve_pred16_t __arm_vcmpeqq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_n_s16))) mve_pred16_t __arm_vcmpgeq_m_n_s16(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_n_s16))) mve_pred16_t __arm_vcmpgeq_m(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_n_s32))) mve_pred16_t __arm_vcmpgeq_m_n_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_n_s32))) mve_pred16_t __arm_vcmpgeq_m(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_n_s8))) mve_pred16_t __arm_vcmpgeq_m_n_s8(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_n_s8))) mve_pred16_t __arm_vcmpgeq_m(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_s16))) mve_pred16_t __arm_vcmpgeq_m_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_s16))) mve_pred16_t __arm_vcmpgeq_m(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_s32))) mve_pred16_t __arm_vcmpgeq_m_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_s32))) mve_pred16_t __arm_vcmpgeq_m(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_s8))) mve_pred16_t __arm_vcmpgeq_m_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_s8))) mve_pred16_t __arm_vcmpgeq_m(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_n_s16))) mve_pred16_t __arm_vcmpgeq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_n_s16))) mve_pred16_t __arm_vcmpgeq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_n_s32))) mve_pred16_t __arm_vcmpgeq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_n_s32))) mve_pred16_t __arm_vcmpgeq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_n_s8))) mve_pred16_t __arm_vcmpgeq_n_s8(int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_n_s8))) mve_pred16_t __arm_vcmpgeq(int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_s16))) mve_pred16_t __arm_vcmpgeq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_s16))) mve_pred16_t __arm_vcmpgeq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_s32))) mve_pred16_t __arm_vcmpgeq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_s32))) mve_pred16_t __arm_vcmpgeq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_s8))) mve_pred16_t __arm_vcmpgeq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_s8))) mve_pred16_t __arm_vcmpgeq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_n_s16))) mve_pred16_t __arm_vcmpgtq_m_n_s16(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_n_s16))) mve_pred16_t __arm_vcmpgtq_m(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_n_s32))) mve_pred16_t __arm_vcmpgtq_m_n_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_n_s32))) mve_pred16_t __arm_vcmpgtq_m(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_n_s8))) mve_pred16_t __arm_vcmpgtq_m_n_s8(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_n_s8))) mve_pred16_t __arm_vcmpgtq_m(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_s16))) mve_pred16_t __arm_vcmpgtq_m_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_s16))) mve_pred16_t __arm_vcmpgtq_m(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_s32))) mve_pred16_t __arm_vcmpgtq_m_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_s32))) mve_pred16_t __arm_vcmpgtq_m(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_s8))) mve_pred16_t __arm_vcmpgtq_m_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_s8))) mve_pred16_t __arm_vcmpgtq_m(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_n_s16))) mve_pred16_t __arm_vcmpgtq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_n_s16))) mve_pred16_t __arm_vcmpgtq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_n_s32))) mve_pred16_t __arm_vcmpgtq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_n_s32))) mve_pred16_t __arm_vcmpgtq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_n_s8))) mve_pred16_t __arm_vcmpgtq_n_s8(int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_n_s8))) mve_pred16_t __arm_vcmpgtq(int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_s16))) mve_pred16_t __arm_vcmpgtq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_s16))) mve_pred16_t __arm_vcmpgtq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_s32))) mve_pred16_t __arm_vcmpgtq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_s32))) mve_pred16_t __arm_vcmpgtq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_s8))) mve_pred16_t __arm_vcmpgtq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_s8))) mve_pred16_t __arm_vcmpgtq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_m_n_u16))) mve_pred16_t __arm_vcmphiq_m_n_u16(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_m_n_u16))) mve_pred16_t __arm_vcmphiq_m(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_m_n_u32))) mve_pred16_t __arm_vcmphiq_m_n_u32(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_m_n_u32))) mve_pred16_t __arm_vcmphiq_m(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_m_n_u8))) mve_pred16_t __arm_vcmphiq_m_n_u8(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_m_n_u8))) mve_pred16_t __arm_vcmphiq_m(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_m_u16))) mve_pred16_t __arm_vcmphiq_m_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_m_u16))) mve_pred16_t __arm_vcmphiq_m(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_m_u32))) mve_pred16_t __arm_vcmphiq_m_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_m_u32))) mve_pred16_t __arm_vcmphiq_m(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_m_u8))) mve_pred16_t __arm_vcmphiq_m_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_m_u8))) mve_pred16_t __arm_vcmphiq_m(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_n_u16))) mve_pred16_t __arm_vcmphiq_n_u16(uint16x8_t, uint16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_n_u16))) mve_pred16_t __arm_vcmphiq(uint16x8_t, uint16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_n_u32))) mve_pred16_t __arm_vcmphiq_n_u32(uint32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_n_u32))) mve_pred16_t __arm_vcmphiq(uint32x4_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_n_u8))) mve_pred16_t __arm_vcmphiq_n_u8(uint8x16_t, uint8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_n_u8))) mve_pred16_t __arm_vcmphiq(uint8x16_t, uint8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_u16))) mve_pred16_t __arm_vcmphiq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_u16))) mve_pred16_t __arm_vcmphiq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_u32))) mve_pred16_t __arm_vcmphiq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_u32))) mve_pred16_t __arm_vcmphiq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_u8))) mve_pred16_t __arm_vcmphiq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_u8))) mve_pred16_t __arm_vcmphiq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_n_s16))) mve_pred16_t __arm_vcmpleq_m_n_s16(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_n_s16))) mve_pred16_t __arm_vcmpleq_m(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_n_s32))) mve_pred16_t __arm_vcmpleq_m_n_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_n_s32))) mve_pred16_t __arm_vcmpleq_m(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_n_s8))) mve_pred16_t __arm_vcmpleq_m_n_s8(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_n_s8))) mve_pred16_t __arm_vcmpleq_m(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_s16))) mve_pred16_t __arm_vcmpleq_m_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_s16))) mve_pred16_t __arm_vcmpleq_m(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_s32))) mve_pred16_t __arm_vcmpleq_m_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_s32))) mve_pred16_t __arm_vcmpleq_m(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_s8))) mve_pred16_t __arm_vcmpleq_m_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_s8))) mve_pred16_t __arm_vcmpleq_m(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_n_s16))) mve_pred16_t __arm_vcmpleq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_n_s16))) mve_pred16_t __arm_vcmpleq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_n_s32))) mve_pred16_t __arm_vcmpleq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_n_s32))) mve_pred16_t __arm_vcmpleq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_n_s8))) mve_pred16_t __arm_vcmpleq_n_s8(int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_n_s8))) mve_pred16_t __arm_vcmpleq(int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_s16))) mve_pred16_t __arm_vcmpleq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_s16))) mve_pred16_t __arm_vcmpleq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_s32))) mve_pred16_t __arm_vcmpleq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_s32))) mve_pred16_t __arm_vcmpleq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_s8))) mve_pred16_t __arm_vcmpleq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_s8))) mve_pred16_t __arm_vcmpleq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_n_s16))) mve_pred16_t __arm_vcmpltq_m_n_s16(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_n_s16))) mve_pred16_t __arm_vcmpltq_m(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_n_s32))) mve_pred16_t __arm_vcmpltq_m_n_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_n_s32))) mve_pred16_t __arm_vcmpltq_m(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_n_s8))) mve_pred16_t __arm_vcmpltq_m_n_s8(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_n_s8))) mve_pred16_t __arm_vcmpltq_m(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_s16))) mve_pred16_t __arm_vcmpltq_m_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_s16))) mve_pred16_t __arm_vcmpltq_m(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_s32))) mve_pred16_t __arm_vcmpltq_m_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_s32))) mve_pred16_t __arm_vcmpltq_m(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_s8))) mve_pred16_t __arm_vcmpltq_m_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_s8))) mve_pred16_t __arm_vcmpltq_m(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_n_s16))) mve_pred16_t __arm_vcmpltq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_n_s16))) mve_pred16_t __arm_vcmpltq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_n_s32))) mve_pred16_t __arm_vcmpltq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_n_s32))) mve_pred16_t __arm_vcmpltq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_n_s8))) mve_pred16_t __arm_vcmpltq_n_s8(int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_n_s8))) mve_pred16_t __arm_vcmpltq(int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_s16))) mve_pred16_t __arm_vcmpltq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_s16))) mve_pred16_t __arm_vcmpltq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_s32))) mve_pred16_t __arm_vcmpltq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_s32))) mve_pred16_t __arm_vcmpltq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_s8))) mve_pred16_t __arm_vcmpltq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_s8))) mve_pred16_t __arm_vcmpltq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_n_s16))) mve_pred16_t __arm_vcmpneq_m_n_s16(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_n_s16))) mve_pred16_t __arm_vcmpneq_m(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_n_s32))) mve_pred16_t __arm_vcmpneq_m_n_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_n_s32))) mve_pred16_t __arm_vcmpneq_m(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_n_s8))) mve_pred16_t __arm_vcmpneq_m_n_s8(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_n_s8))) mve_pred16_t __arm_vcmpneq_m(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_n_u16))) mve_pred16_t __arm_vcmpneq_m_n_u16(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_n_u16))) mve_pred16_t __arm_vcmpneq_m(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_n_u32))) mve_pred16_t __arm_vcmpneq_m_n_u32(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_n_u32))) mve_pred16_t __arm_vcmpneq_m(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_n_u8))) mve_pred16_t __arm_vcmpneq_m_n_u8(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_n_u8))) mve_pred16_t __arm_vcmpneq_m(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_s16))) mve_pred16_t __arm_vcmpneq_m_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_s16))) mve_pred16_t __arm_vcmpneq_m(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_s32))) mve_pred16_t __arm_vcmpneq_m_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_s32))) mve_pred16_t __arm_vcmpneq_m(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_s8))) mve_pred16_t __arm_vcmpneq_m_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_s8))) mve_pred16_t __arm_vcmpneq_m(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_u16))) mve_pred16_t __arm_vcmpneq_m_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_u16))) mve_pred16_t __arm_vcmpneq_m(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_u32))) mve_pred16_t __arm_vcmpneq_m_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_u32))) mve_pred16_t __arm_vcmpneq_m(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_u8))) mve_pred16_t __arm_vcmpneq_m_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_u8))) mve_pred16_t __arm_vcmpneq_m(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_n_s16))) mve_pred16_t __arm_vcmpneq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_n_s16))) mve_pred16_t __arm_vcmpneq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_n_s32))) mve_pred16_t __arm_vcmpneq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_n_s32))) mve_pred16_t __arm_vcmpneq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_n_s8))) mve_pred16_t __arm_vcmpneq_n_s8(int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_n_s8))) mve_pred16_t __arm_vcmpneq(int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_n_u16))) mve_pred16_t __arm_vcmpneq_n_u16(uint16x8_t, uint16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_n_u16))) mve_pred16_t __arm_vcmpneq(uint16x8_t, uint16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_n_u32))) mve_pred16_t __arm_vcmpneq_n_u32(uint32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_n_u32))) mve_pred16_t __arm_vcmpneq(uint32x4_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_n_u8))) mve_pred16_t __arm_vcmpneq_n_u8(uint8x16_t, uint8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_n_u8))) mve_pred16_t __arm_vcmpneq(uint8x16_t, uint8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_s16))) mve_pred16_t __arm_vcmpneq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_s16))) mve_pred16_t __arm_vcmpneq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_s32))) mve_pred16_t __arm_vcmpneq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_s32))) mve_pred16_t __arm_vcmpneq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_s8))) mve_pred16_t __arm_vcmpneq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_s8))) mve_pred16_t __arm_vcmpneq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_u16))) mve_pred16_t __arm_vcmpneq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_u16))) mve_pred16_t __arm_vcmpneq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_u32))) mve_pred16_t __arm_vcmpneq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_u32))) mve_pred16_t __arm_vcmpneq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_u8))) mve_pred16_t __arm_vcmpneq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_u8))) mve_pred16_t __arm_vcmpneq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcreateq_s16))) int16x8_t __arm_vcreateq_s16(uint64_t, uint64_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcreateq_s32))) int32x4_t __arm_vcreateq_s32(uint64_t, uint64_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcreateq_s64))) int64x2_t __arm_vcreateq_s64(uint64_t, uint64_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcreateq_s8))) int8x16_t __arm_vcreateq_s8(uint64_t, uint64_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcreateq_u16))) uint16x8_t __arm_vcreateq_u16(uint64_t, uint64_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcreateq_u32))) uint32x4_t __arm_vcreateq_u32(uint64_t, uint64_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcreateq_u64))) uint64x2_t __arm_vcreateq_u64(uint64_t, uint64_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcreateq_u8))) uint8x16_t __arm_vcreateq_u8(uint64_t, uint64_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vctp16q))) mve_pred16_t __arm_vctp16q(uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vctp16q_m))) mve_pred16_t __arm_vctp16q_m(uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vctp32q))) mve_pred16_t __arm_vctp32q(uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vctp32q_m))) mve_pred16_t __arm_vctp32q_m(uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vctp64q))) mve_pred16_t __arm_vctp64q(uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vctp64q_m))) mve_pred16_t __arm_vctp64q_m(uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vctp8q))) mve_pred16_t __arm_vctp8q(uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vctp8q_m))) mve_pred16_t __arm_vctp8q_m(uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vddupq_m_n_u16))) uint16x8_t __arm_vddupq_m_n_u16(uint16x8_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vddupq_m_n_u16))) uint16x8_t __arm_vddupq_m(uint16x8_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vddupq_m_n_u32))) uint32x4_t __arm_vddupq_m_n_u32(uint32x4_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vddupq_m_n_u32))) uint32x4_t __arm_vddupq_m(uint32x4_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vddupq_m_n_u8))) uint8x16_t __arm_vddupq_m_n_u8(uint8x16_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vddupq_m_n_u8))) uint8x16_t __arm_vddupq_m(uint8x16_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vddupq_m_wb_u16))) uint16x8_t __arm_vddupq_m_wb_u16(uint16x8_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vddupq_m_wb_u16))) uint16x8_t __arm_vddupq_m(uint16x8_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vddupq_m_wb_u32))) uint32x4_t __arm_vddupq_m_wb_u32(uint32x4_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vddupq_m_wb_u32))) uint32x4_t __arm_vddupq_m(uint32x4_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vddupq_m_wb_u8))) uint8x16_t __arm_vddupq_m_wb_u8(uint8x16_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vddupq_m_wb_u8))) uint8x16_t __arm_vddupq_m(uint8x16_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vddupq_n_u16))) uint16x8_t __arm_vddupq_n_u16(uint32_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vddupq_n_u16))) uint16x8_t __arm_vddupq_u16(uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vddupq_n_u32))) uint32x4_t __arm_vddupq_n_u32(uint32_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vddupq_n_u32))) uint32x4_t __arm_vddupq_u32(uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vddupq_n_u8))) uint8x16_t __arm_vddupq_n_u8(uint32_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vddupq_n_u8))) uint8x16_t __arm_vddupq_u8(uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vddupq_wb_u16))) uint16x8_t __arm_vddupq_wb_u16(uint32_t *, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vddupq_wb_u16))) uint16x8_t __arm_vddupq_u16(uint32_t *, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vddupq_wb_u32))) uint32x4_t __arm_vddupq_wb_u32(uint32_t *, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vddupq_wb_u32))) uint32x4_t __arm_vddupq_u32(uint32_t *, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vddupq_wb_u8))) uint8x16_t __arm_vddupq_wb_u8(uint32_t *, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vddupq_wb_u8))) uint8x16_t __arm_vddupq_u8(uint32_t *, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vddupq_x_n_u16))) uint16x8_t __arm_vddupq_x_n_u16(uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vddupq_x_n_u16))) uint16x8_t __arm_vddupq_x_u16(uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vddupq_x_n_u32))) uint32x4_t __arm_vddupq_x_n_u32(uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vddupq_x_n_u32))) uint32x4_t __arm_vddupq_x_u32(uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vddupq_x_n_u8))) uint8x16_t __arm_vddupq_x_n_u8(uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vddupq_x_n_u8))) uint8x16_t __arm_vddupq_x_u8(uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vddupq_x_wb_u16))) uint16x8_t __arm_vddupq_x_wb_u16(uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vddupq_x_wb_u16))) uint16x8_t __arm_vddupq_x_u16(uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vddupq_x_wb_u32))) uint32x4_t __arm_vddupq_x_wb_u32(uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vddupq_x_wb_u32))) uint32x4_t __arm_vddupq_x_u32(uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vddupq_x_wb_u8))) uint8x16_t __arm_vddupq_x_wb_u8(uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vddupq_x_wb_u8))) uint8x16_t __arm_vddupq_x_u8(uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_m_n_s16))) int16x8_t __arm_vdupq_m_n_s16(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdupq_m_n_s16))) int16x8_t __arm_vdupq_m(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_m_n_s32))) int32x4_t __arm_vdupq_m_n_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdupq_m_n_s32))) int32x4_t __arm_vdupq_m(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_m_n_s8))) int8x16_t __arm_vdupq_m_n_s8(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdupq_m_n_s8))) int8x16_t __arm_vdupq_m(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_m_n_u16))) uint16x8_t __arm_vdupq_m_n_u16(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdupq_m_n_u16))) uint16x8_t __arm_vdupq_m(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_m_n_u32))) uint32x4_t __arm_vdupq_m_n_u32(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdupq_m_n_u32))) uint32x4_t __arm_vdupq_m(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_m_n_u8))) uint8x16_t __arm_vdupq_m_n_u8(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdupq_m_n_u8))) uint8x16_t __arm_vdupq_m(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_n_s16))) int16x8_t __arm_vdupq_n_s16(int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_n_s32))) int32x4_t __arm_vdupq_n_s32(int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_n_s8))) int8x16_t __arm_vdupq_n_s8(int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_n_u16))) uint16x8_t __arm_vdupq_n_u16(uint16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_n_u32))) uint32x4_t __arm_vdupq_n_u32(uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_n_u8))) uint8x16_t __arm_vdupq_n_u8(uint8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_x_n_s16))) int16x8_t __arm_vdupq_x_n_s16(int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_x_n_s32))) int32x4_t __arm_vdupq_x_n_s32(int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_x_n_s8))) int8x16_t __arm_vdupq_x_n_s8(int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_x_n_u16))) uint16x8_t __arm_vdupq_x_n_u16(uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_x_n_u32))) uint32x4_t __arm_vdupq_x_n_u32(uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_x_n_u8))) uint8x16_t __arm_vdupq_x_n_u8(uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_m_n_u16))) uint16x8_t __arm_vdwdupq_m_n_u16(uint16x8_t, uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_m_n_u16))) uint16x8_t __arm_vdwdupq_m(uint16x8_t, uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_m_n_u32))) uint32x4_t __arm_vdwdupq_m_n_u32(uint32x4_t, uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_m_n_u32))) uint32x4_t __arm_vdwdupq_m(uint32x4_t, uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_m_n_u8))) uint8x16_t __arm_vdwdupq_m_n_u8(uint8x16_t, uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_m_n_u8))) uint8x16_t __arm_vdwdupq_m(uint8x16_t, uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_m_wb_u16))) uint16x8_t __arm_vdwdupq_m_wb_u16(uint16x8_t, uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_m_wb_u16))) uint16x8_t __arm_vdwdupq_m(uint16x8_t, uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_m_wb_u32))) uint32x4_t __arm_vdwdupq_m_wb_u32(uint32x4_t, uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_m_wb_u32))) uint32x4_t __arm_vdwdupq_m(uint32x4_t, uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_m_wb_u8))) uint8x16_t __arm_vdwdupq_m_wb_u8(uint8x16_t, uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_m_wb_u8))) uint8x16_t __arm_vdwdupq_m(uint8x16_t, uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_n_u16))) uint16x8_t __arm_vdwdupq_n_u16(uint32_t, uint32_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_n_u16))) uint16x8_t __arm_vdwdupq_u16(uint32_t, uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_n_u32))) uint32x4_t __arm_vdwdupq_n_u32(uint32_t, uint32_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_n_u32))) uint32x4_t __arm_vdwdupq_u32(uint32_t, uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_n_u8))) uint8x16_t __arm_vdwdupq_n_u8(uint32_t, uint32_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_n_u8))) uint8x16_t __arm_vdwdupq_u8(uint32_t, uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_wb_u16))) uint16x8_t __arm_vdwdupq_wb_u16(uint32_t *, uint32_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_wb_u16))) uint16x8_t __arm_vdwdupq_u16(uint32_t *, uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_wb_u32))) uint32x4_t __arm_vdwdupq_wb_u32(uint32_t *, uint32_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_wb_u32))) uint32x4_t __arm_vdwdupq_u32(uint32_t *, uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_wb_u8))) uint8x16_t __arm_vdwdupq_wb_u8(uint32_t *, uint32_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_wb_u8))) uint8x16_t __arm_vdwdupq_u8(uint32_t *, uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_x_n_u16))) uint16x8_t __arm_vdwdupq_x_n_u16(uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_x_n_u16))) uint16x8_t __arm_vdwdupq_x_u16(uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_x_n_u32))) uint32x4_t __arm_vdwdupq_x_n_u32(uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_x_n_u32))) uint32x4_t __arm_vdwdupq_x_u32(uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_x_n_u8))) uint8x16_t __arm_vdwdupq_x_n_u8(uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_x_n_u8))) uint8x16_t __arm_vdwdupq_x_u8(uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_x_wb_u16))) uint16x8_t __arm_vdwdupq_x_wb_u16(uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_x_wb_u16))) uint16x8_t __arm_vdwdupq_x_u16(uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_x_wb_u32))) uint32x4_t __arm_vdwdupq_x_wb_u32(uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_x_wb_u32))) uint32x4_t __arm_vdwdupq_x_u32(uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_x_wb_u8))) uint8x16_t __arm_vdwdupq_x_wb_u8(uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_x_wb_u8))) uint8x16_t __arm_vdwdupq_x_u8(uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_m_s16))) int16x8_t __arm_veorq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_m_s16))) int16x8_t __arm_veorq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_m_s32))) int32x4_t __arm_veorq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_m_s32))) int32x4_t __arm_veorq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_m_s8))) int8x16_t __arm_veorq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_m_s8))) int8x16_t __arm_veorq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_m_u16))) uint16x8_t __arm_veorq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_m_u16))) uint16x8_t __arm_veorq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_m_u32))) uint32x4_t __arm_veorq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_m_u32))) uint32x4_t __arm_veorq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_m_u8))) uint8x16_t __arm_veorq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_m_u8))) uint8x16_t __arm_veorq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_s16))) int16x8_t __arm_veorq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_s16))) int16x8_t __arm_veorq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_s32))) int32x4_t __arm_veorq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_s32))) int32x4_t __arm_veorq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_s8))) int8x16_t __arm_veorq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_s8))) int8x16_t __arm_veorq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_u16))) uint16x8_t __arm_veorq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_u16))) uint16x8_t __arm_veorq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_u32))) uint32x4_t __arm_veorq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_u32))) uint32x4_t __arm_veorq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_u8))) uint8x16_t __arm_veorq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_u8))) uint8x16_t __arm_veorq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_x_s16))) int16x8_t __arm_veorq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_x_s16))) int16x8_t __arm_veorq_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_x_s32))) int32x4_t __arm_veorq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_x_s32))) int32x4_t __arm_veorq_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_x_s8))) int8x16_t __arm_veorq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_x_s8))) int8x16_t __arm_veorq_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_x_u16))) uint16x8_t __arm_veorq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_x_u16))) uint16x8_t __arm_veorq_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_x_u32))) uint32x4_t __arm_veorq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_x_u32))) uint32x4_t __arm_veorq_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_x_u8))) uint8x16_t __arm_veorq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_x_u8))) uint8x16_t __arm_veorq_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_s16))) int16_t __arm_vgetq_lane_s16(int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_s16))) int16_t __arm_vgetq_lane(int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_s32))) int32_t __arm_vgetq_lane_s32(int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_s32))) int32_t __arm_vgetq_lane(int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_s64))) int64_t __arm_vgetq_lane_s64(int64x2_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_s64))) int64_t __arm_vgetq_lane(int64x2_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_s8))) int8_t __arm_vgetq_lane_s8(int8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_s8))) int8_t __arm_vgetq_lane(int8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_u16))) uint16_t __arm_vgetq_lane_u16(uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_u16))) uint16_t __arm_vgetq_lane(uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_u32))) uint32_t __arm_vgetq_lane_u32(uint32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_u32))) uint32_t __arm_vgetq_lane(uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_u64))) uint64_t __arm_vgetq_lane_u64(uint64x2_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_u64))) uint64_t __arm_vgetq_lane(uint64x2_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_u8))) uint8_t __arm_vgetq_lane_u8(uint8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_u8))) uint8_t __arm_vgetq_lane(uint8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_n_s16))) int16x8_t __arm_vhaddq_m_n_s16(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_n_s16))) int16x8_t __arm_vhaddq_m(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_n_s32))) int32x4_t __arm_vhaddq_m_n_s32(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_n_s32))) int32x4_t __arm_vhaddq_m(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_n_s8))) int8x16_t __arm_vhaddq_m_n_s8(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_n_s8))) int8x16_t __arm_vhaddq_m(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_n_u16))) uint16x8_t __arm_vhaddq_m_n_u16(uint16x8_t, uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_n_u16))) uint16x8_t __arm_vhaddq_m(uint16x8_t, uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_n_u32))) uint32x4_t __arm_vhaddq_m_n_u32(uint32x4_t, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_n_u32))) uint32x4_t __arm_vhaddq_m(uint32x4_t, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_n_u8))) uint8x16_t __arm_vhaddq_m_n_u8(uint8x16_t, uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_n_u8))) uint8x16_t __arm_vhaddq_m(uint8x16_t, uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_s16))) int16x8_t __arm_vhaddq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_s16))) int16x8_t __arm_vhaddq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_s32))) int32x4_t __arm_vhaddq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_s32))) int32x4_t __arm_vhaddq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_s8))) int8x16_t __arm_vhaddq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_s8))) int8x16_t __arm_vhaddq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_u16))) uint16x8_t __arm_vhaddq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_u16))) uint16x8_t __arm_vhaddq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_u32))) uint32x4_t __arm_vhaddq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_u32))) uint32x4_t __arm_vhaddq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_u8))) uint8x16_t __arm_vhaddq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_u8))) uint8x16_t __arm_vhaddq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_n_s16))) int16x8_t __arm_vhaddq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_n_s16))) int16x8_t __arm_vhaddq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_n_s32))) int32x4_t __arm_vhaddq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_n_s32))) int32x4_t __arm_vhaddq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_n_s8))) int8x16_t __arm_vhaddq_n_s8(int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_n_s8))) int8x16_t __arm_vhaddq(int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_n_u16))) uint16x8_t __arm_vhaddq_n_u16(uint16x8_t, uint16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_n_u16))) uint16x8_t __arm_vhaddq(uint16x8_t, uint16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_n_u32))) uint32x4_t __arm_vhaddq_n_u32(uint32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_n_u32))) uint32x4_t __arm_vhaddq(uint32x4_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_n_u8))) uint8x16_t __arm_vhaddq_n_u8(uint8x16_t, uint8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_n_u8))) uint8x16_t __arm_vhaddq(uint8x16_t, uint8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_s16))) int16x8_t __arm_vhaddq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_s16))) int16x8_t __arm_vhaddq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_s32))) int32x4_t __arm_vhaddq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_s32))) int32x4_t __arm_vhaddq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_s8))) int8x16_t __arm_vhaddq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_s8))) int8x16_t __arm_vhaddq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_u16))) uint16x8_t __arm_vhaddq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_u16))) uint16x8_t __arm_vhaddq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_u32))) uint32x4_t __arm_vhaddq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_u32))) uint32x4_t __arm_vhaddq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_u8))) uint8x16_t __arm_vhaddq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_u8))) uint8x16_t __arm_vhaddq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_n_s16))) int16x8_t __arm_vhaddq_x_n_s16(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_n_s16))) int16x8_t __arm_vhaddq_x(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_n_s32))) int32x4_t __arm_vhaddq_x_n_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_n_s32))) int32x4_t __arm_vhaddq_x(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_n_s8))) int8x16_t __arm_vhaddq_x_n_s8(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_n_s8))) int8x16_t __arm_vhaddq_x(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_n_u16))) uint16x8_t __arm_vhaddq_x_n_u16(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_n_u16))) uint16x8_t __arm_vhaddq_x(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_n_u32))) uint32x4_t __arm_vhaddq_x_n_u32(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_n_u32))) uint32x4_t __arm_vhaddq_x(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_n_u8))) uint8x16_t __arm_vhaddq_x_n_u8(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_n_u8))) uint8x16_t __arm_vhaddq_x(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_s16))) int16x8_t __arm_vhaddq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_s16))) int16x8_t __arm_vhaddq_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_s32))) int32x4_t __arm_vhaddq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_s32))) int32x4_t __arm_vhaddq_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_s8))) int8x16_t __arm_vhaddq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_s8))) int8x16_t __arm_vhaddq_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_u16))) uint16x8_t __arm_vhaddq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_u16))) uint16x8_t __arm_vhaddq_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_u32))) uint32x4_t __arm_vhaddq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_u32))) uint32x4_t __arm_vhaddq_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_u8))) uint8x16_t __arm_vhaddq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_u8))) uint8x16_t __arm_vhaddq_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot270_m_s16))) int16x8_t __arm_vhcaddq_rot270_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot270_m_s16))) int16x8_t __arm_vhcaddq_rot270_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot270_m_s32))) int32x4_t __arm_vhcaddq_rot270_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot270_m_s32))) int32x4_t __arm_vhcaddq_rot270_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot270_m_s8))) int8x16_t __arm_vhcaddq_rot270_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot270_m_s8))) int8x16_t __arm_vhcaddq_rot270_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot270_s16))) int16x8_t __arm_vhcaddq_rot270_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot270_s16))) int16x8_t __arm_vhcaddq_rot270(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot270_s32))) int32x4_t __arm_vhcaddq_rot270_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot270_s32))) int32x4_t __arm_vhcaddq_rot270(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot270_s8))) int8x16_t __arm_vhcaddq_rot270_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot270_s8))) int8x16_t __arm_vhcaddq_rot270(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot270_x_s16))) int16x8_t __arm_vhcaddq_rot270_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot270_x_s16))) int16x8_t __arm_vhcaddq_rot270_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot270_x_s32))) int32x4_t __arm_vhcaddq_rot270_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot270_x_s32))) int32x4_t __arm_vhcaddq_rot270_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot270_x_s8))) int8x16_t __arm_vhcaddq_rot270_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot270_x_s8))) int8x16_t __arm_vhcaddq_rot270_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot90_m_s16))) int16x8_t __arm_vhcaddq_rot90_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot90_m_s16))) int16x8_t __arm_vhcaddq_rot90_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot90_m_s32))) int32x4_t __arm_vhcaddq_rot90_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot90_m_s32))) int32x4_t __arm_vhcaddq_rot90_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot90_m_s8))) int8x16_t __arm_vhcaddq_rot90_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot90_m_s8))) int8x16_t __arm_vhcaddq_rot90_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot90_s16))) int16x8_t __arm_vhcaddq_rot90_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot90_s16))) int16x8_t __arm_vhcaddq_rot90(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot90_s32))) int32x4_t __arm_vhcaddq_rot90_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot90_s32))) int32x4_t __arm_vhcaddq_rot90(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot90_s8))) int8x16_t __arm_vhcaddq_rot90_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot90_s8))) int8x16_t __arm_vhcaddq_rot90(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot90_x_s16))) int16x8_t __arm_vhcaddq_rot90_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot90_x_s16))) int16x8_t __arm_vhcaddq_rot90_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot90_x_s32))) int32x4_t __arm_vhcaddq_rot90_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot90_x_s32))) int32x4_t __arm_vhcaddq_rot90_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot90_x_s8))) int8x16_t __arm_vhcaddq_rot90_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot90_x_s8))) int8x16_t __arm_vhcaddq_rot90_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_n_s16))) int16x8_t __arm_vhsubq_m_n_s16(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_n_s16))) int16x8_t __arm_vhsubq_m(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_n_s32))) int32x4_t __arm_vhsubq_m_n_s32(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_n_s32))) int32x4_t __arm_vhsubq_m(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_n_s8))) int8x16_t __arm_vhsubq_m_n_s8(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_n_s8))) int8x16_t __arm_vhsubq_m(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_n_u16))) uint16x8_t __arm_vhsubq_m_n_u16(uint16x8_t, uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_n_u16))) uint16x8_t __arm_vhsubq_m(uint16x8_t, uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_n_u32))) uint32x4_t __arm_vhsubq_m_n_u32(uint32x4_t, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_n_u32))) uint32x4_t __arm_vhsubq_m(uint32x4_t, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_n_u8))) uint8x16_t __arm_vhsubq_m_n_u8(uint8x16_t, uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_n_u8))) uint8x16_t __arm_vhsubq_m(uint8x16_t, uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_s16))) int16x8_t __arm_vhsubq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_s16))) int16x8_t __arm_vhsubq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_s32))) int32x4_t __arm_vhsubq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_s32))) int32x4_t __arm_vhsubq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_s8))) int8x16_t __arm_vhsubq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_s8))) int8x16_t __arm_vhsubq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_u16))) uint16x8_t __arm_vhsubq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_u16))) uint16x8_t __arm_vhsubq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_u32))) uint32x4_t __arm_vhsubq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_u32))) uint32x4_t __arm_vhsubq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_u8))) uint8x16_t __arm_vhsubq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_u8))) uint8x16_t __arm_vhsubq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_n_s16))) int16x8_t __arm_vhsubq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_n_s16))) int16x8_t __arm_vhsubq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_n_s32))) int32x4_t __arm_vhsubq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_n_s32))) int32x4_t __arm_vhsubq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_n_s8))) int8x16_t __arm_vhsubq_n_s8(int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_n_s8))) int8x16_t __arm_vhsubq(int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_n_u16))) uint16x8_t __arm_vhsubq_n_u16(uint16x8_t, uint16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_n_u16))) uint16x8_t __arm_vhsubq(uint16x8_t, uint16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_n_u32))) uint32x4_t __arm_vhsubq_n_u32(uint32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_n_u32))) uint32x4_t __arm_vhsubq(uint32x4_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_n_u8))) uint8x16_t __arm_vhsubq_n_u8(uint8x16_t, uint8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_n_u8))) uint8x16_t __arm_vhsubq(uint8x16_t, uint8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_s16))) int16x8_t __arm_vhsubq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_s16))) int16x8_t __arm_vhsubq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_s32))) int32x4_t __arm_vhsubq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_s32))) int32x4_t __arm_vhsubq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_s8))) int8x16_t __arm_vhsubq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_s8))) int8x16_t __arm_vhsubq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_u16))) uint16x8_t __arm_vhsubq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_u16))) uint16x8_t __arm_vhsubq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_u32))) uint32x4_t __arm_vhsubq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_u32))) uint32x4_t __arm_vhsubq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_u8))) uint8x16_t __arm_vhsubq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_u8))) uint8x16_t __arm_vhsubq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_n_s16))) int16x8_t __arm_vhsubq_x_n_s16(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_n_s16))) int16x8_t __arm_vhsubq_x(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_n_s32))) int32x4_t __arm_vhsubq_x_n_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_n_s32))) int32x4_t __arm_vhsubq_x(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_n_s8))) int8x16_t __arm_vhsubq_x_n_s8(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_n_s8))) int8x16_t __arm_vhsubq_x(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_n_u16))) uint16x8_t __arm_vhsubq_x_n_u16(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_n_u16))) uint16x8_t __arm_vhsubq_x(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_n_u32))) uint32x4_t __arm_vhsubq_x_n_u32(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_n_u32))) uint32x4_t __arm_vhsubq_x(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_n_u8))) uint8x16_t __arm_vhsubq_x_n_u8(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_n_u8))) uint8x16_t __arm_vhsubq_x(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_s16))) int16x8_t __arm_vhsubq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_s16))) int16x8_t __arm_vhsubq_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_s32))) int32x4_t __arm_vhsubq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_s32))) int32x4_t __arm_vhsubq_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_s8))) int8x16_t __arm_vhsubq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_s8))) int8x16_t __arm_vhsubq_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_u16))) uint16x8_t __arm_vhsubq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_u16))) uint16x8_t __arm_vhsubq_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_u32))) uint32x4_t __arm_vhsubq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_u32))) uint32x4_t __arm_vhsubq_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_u8))) uint8x16_t __arm_vhsubq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_u8))) uint8x16_t __arm_vhsubq_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vidupq_m_n_u16))) uint16x8_t __arm_vidupq_m_n_u16(uint16x8_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vidupq_m_n_u16))) uint16x8_t __arm_vidupq_m(uint16x8_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vidupq_m_n_u32))) uint32x4_t __arm_vidupq_m_n_u32(uint32x4_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vidupq_m_n_u32))) uint32x4_t __arm_vidupq_m(uint32x4_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vidupq_m_n_u8))) uint8x16_t __arm_vidupq_m_n_u8(uint8x16_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vidupq_m_n_u8))) uint8x16_t __arm_vidupq_m(uint8x16_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vidupq_m_wb_u16))) uint16x8_t __arm_vidupq_m_wb_u16(uint16x8_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vidupq_m_wb_u16))) uint16x8_t __arm_vidupq_m(uint16x8_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vidupq_m_wb_u32))) uint32x4_t __arm_vidupq_m_wb_u32(uint32x4_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vidupq_m_wb_u32))) uint32x4_t __arm_vidupq_m(uint32x4_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vidupq_m_wb_u8))) uint8x16_t __arm_vidupq_m_wb_u8(uint8x16_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vidupq_m_wb_u8))) uint8x16_t __arm_vidupq_m(uint8x16_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vidupq_n_u16))) uint16x8_t __arm_vidupq_n_u16(uint32_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vidupq_n_u16))) uint16x8_t __arm_vidupq_u16(uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vidupq_n_u32))) uint32x4_t __arm_vidupq_n_u32(uint32_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vidupq_n_u32))) uint32x4_t __arm_vidupq_u32(uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vidupq_n_u8))) uint8x16_t __arm_vidupq_n_u8(uint32_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vidupq_n_u8))) uint8x16_t __arm_vidupq_u8(uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vidupq_wb_u16))) uint16x8_t __arm_vidupq_wb_u16(uint32_t *, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vidupq_wb_u16))) uint16x8_t __arm_vidupq_u16(uint32_t *, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vidupq_wb_u32))) uint32x4_t __arm_vidupq_wb_u32(uint32_t *, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vidupq_wb_u32))) uint32x4_t __arm_vidupq_u32(uint32_t *, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vidupq_wb_u8))) uint8x16_t __arm_vidupq_wb_u8(uint32_t *, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vidupq_wb_u8))) uint8x16_t __arm_vidupq_u8(uint32_t *, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vidupq_x_n_u16))) uint16x8_t __arm_vidupq_x_n_u16(uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vidupq_x_n_u16))) uint16x8_t __arm_vidupq_x_u16(uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vidupq_x_n_u32))) uint32x4_t __arm_vidupq_x_n_u32(uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vidupq_x_n_u32))) uint32x4_t __arm_vidupq_x_u32(uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vidupq_x_n_u8))) uint8x16_t __arm_vidupq_x_n_u8(uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vidupq_x_n_u8))) uint8x16_t __arm_vidupq_x_u8(uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vidupq_x_wb_u16))) uint16x8_t __arm_vidupq_x_wb_u16(uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vidupq_x_wb_u16))) uint16x8_t __arm_vidupq_x_u16(uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vidupq_x_wb_u32))) uint32x4_t __arm_vidupq_x_wb_u32(uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vidupq_x_wb_u32))) uint32x4_t __arm_vidupq_x_u32(uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vidupq_x_wb_u8))) uint8x16_t __arm_vidupq_x_wb_u8(uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vidupq_x_wb_u8))) uint8x16_t __arm_vidupq_x_u8(uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_m_n_u16))) uint16x8_t __arm_viwdupq_m_n_u16(uint16x8_t, uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_m_n_u16))) uint16x8_t __arm_viwdupq_m(uint16x8_t, uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_m_n_u32))) uint32x4_t __arm_viwdupq_m_n_u32(uint32x4_t, uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_m_n_u32))) uint32x4_t __arm_viwdupq_m(uint32x4_t, uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_m_n_u8))) uint8x16_t __arm_viwdupq_m_n_u8(uint8x16_t, uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_m_n_u8))) uint8x16_t __arm_viwdupq_m(uint8x16_t, uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_m_wb_u16))) uint16x8_t __arm_viwdupq_m_wb_u16(uint16x8_t, uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_m_wb_u16))) uint16x8_t __arm_viwdupq_m(uint16x8_t, uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_m_wb_u32))) uint32x4_t __arm_viwdupq_m_wb_u32(uint32x4_t, uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_m_wb_u32))) uint32x4_t __arm_viwdupq_m(uint32x4_t, uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_m_wb_u8))) uint8x16_t __arm_viwdupq_m_wb_u8(uint8x16_t, uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_m_wb_u8))) uint8x16_t __arm_viwdupq_m(uint8x16_t, uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_n_u16))) uint16x8_t __arm_viwdupq_n_u16(uint32_t, uint32_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_n_u16))) uint16x8_t __arm_viwdupq_u16(uint32_t, uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_n_u32))) uint32x4_t __arm_viwdupq_n_u32(uint32_t, uint32_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_n_u32))) uint32x4_t __arm_viwdupq_u32(uint32_t, uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_n_u8))) uint8x16_t __arm_viwdupq_n_u8(uint32_t, uint32_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_n_u8))) uint8x16_t __arm_viwdupq_u8(uint32_t, uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_wb_u16))) uint16x8_t __arm_viwdupq_wb_u16(uint32_t *, uint32_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_wb_u16))) uint16x8_t __arm_viwdupq_u16(uint32_t *, uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_wb_u32))) uint32x4_t __arm_viwdupq_wb_u32(uint32_t *, uint32_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_wb_u32))) uint32x4_t __arm_viwdupq_u32(uint32_t *, uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_wb_u8))) uint8x16_t __arm_viwdupq_wb_u8(uint32_t *, uint32_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_wb_u8))) uint8x16_t __arm_viwdupq_u8(uint32_t *, uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_x_n_u16))) uint16x8_t __arm_viwdupq_x_n_u16(uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_x_n_u16))) uint16x8_t __arm_viwdupq_x_u16(uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_x_n_u32))) uint32x4_t __arm_viwdupq_x_n_u32(uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_x_n_u32))) uint32x4_t __arm_viwdupq_x_u32(uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_x_n_u8))) uint8x16_t __arm_viwdupq_x_n_u8(uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_x_n_u8))) uint8x16_t __arm_viwdupq_x_u8(uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_x_wb_u16))) uint16x8_t __arm_viwdupq_x_wb_u16(uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_x_wb_u16))) uint16x8_t __arm_viwdupq_x_u16(uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_x_wb_u32))) uint32x4_t __arm_viwdupq_x_wb_u32(uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_x_wb_u32))) uint32x4_t __arm_viwdupq_x_u32(uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_x_wb_u8))) uint8x16_t __arm_viwdupq_x_wb_u8(uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_x_wb_u8))) uint8x16_t __arm_viwdupq_x_u8(uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld1q_s16))) int16x8_t __arm_vld1q_s16(const int16_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld1q_s16))) int16x8_t __arm_vld1q(const int16_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld1q_s32))) int32x4_t __arm_vld1q_s32(const int32_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld1q_s32))) int32x4_t __arm_vld1q(const int32_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld1q_s8))) int8x16_t __arm_vld1q_s8(const int8_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld1q_s8))) int8x16_t __arm_vld1q(const int8_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld1q_u16))) uint16x8_t __arm_vld1q_u16(const uint16_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld1q_u16))) uint16x8_t __arm_vld1q(const uint16_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld1q_u32))) uint32x4_t __arm_vld1q_u32(const uint32_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld1q_u32))) uint32x4_t __arm_vld1q(const uint32_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld1q_u8))) uint8x16_t __arm_vld1q_u8(const uint8_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld1q_u8))) uint8x16_t __arm_vld1q(const uint8_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld1q_z_s16))) int16x8_t __arm_vld1q_z_s16(const int16_t *, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld1q_z_s16))) int16x8_t __arm_vld1q_z(const int16_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld1q_z_s32))) int32x4_t __arm_vld1q_z_s32(const int32_t *, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld1q_z_s32))) int32x4_t __arm_vld1q_z(const int32_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld1q_z_s8))) int8x16_t __arm_vld1q_z_s8(const int8_t *, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld1q_z_s8))) int8x16_t __arm_vld1q_z(const int8_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld1q_z_u16))) uint16x8_t __arm_vld1q_z_u16(const uint16_t *, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld1q_z_u16))) uint16x8_t __arm_vld1q_z(const uint16_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld1q_z_u32))) uint32x4_t __arm_vld1q_z_u32(const uint32_t *, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld1q_z_u32))) uint32x4_t __arm_vld1q_z(const uint32_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld1q_z_u8))) uint8x16_t __arm_vld1q_z_u8(const uint8_t *, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld1q_z_u8))) uint8x16_t __arm_vld1q_z(const uint8_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld2q_s16))) int16x8x2_t __arm_vld2q_s16(const int16_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld2q_s16))) int16x8x2_t __arm_vld2q(const int16_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld2q_s32))) int32x4x2_t __arm_vld2q_s32(const int32_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld2q_s32))) int32x4x2_t __arm_vld2q(const int32_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld2q_s8))) int8x16x2_t __arm_vld2q_s8(const int8_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld2q_s8))) int8x16x2_t __arm_vld2q(const int8_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld2q_u16))) uint16x8x2_t __arm_vld2q_u16(const uint16_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld2q_u16))) uint16x8x2_t __arm_vld2q(const uint16_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld2q_u32))) uint32x4x2_t __arm_vld2q_u32(const uint32_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld2q_u32))) uint32x4x2_t __arm_vld2q(const uint32_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld2q_u8))) uint8x16x2_t __arm_vld2q_u8(const uint8_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld2q_u8))) uint8x16x2_t __arm_vld2q(const uint8_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld4q_s16))) int16x8x4_t __arm_vld4q_s16(const int16_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld4q_s16))) int16x8x4_t __arm_vld4q(const int16_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld4q_s32))) int32x4x4_t __arm_vld4q_s32(const int32_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld4q_s32))) int32x4x4_t __arm_vld4q(const int32_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld4q_s8))) int8x16x4_t __arm_vld4q_s8(const int8_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld4q_s8))) int8x16x4_t __arm_vld4q(const int8_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld4q_u16))) uint16x8x4_t __arm_vld4q_u16(const uint16_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld4q_u16))) uint16x8x4_t __arm_vld4q(const uint16_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld4q_u32))) uint32x4x4_t __arm_vld4q_u32(const uint32_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld4q_u32))) uint32x4x4_t __arm_vld4q(const uint32_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld4q_u8))) uint8x16x4_t __arm_vld4q_u8(const uint8_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld4q_u8))) uint8x16x4_t __arm_vld4q(const uint8_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_s16))) int16x8_t __arm_vldrbq_gather_offset_s16(const int8_t *, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_s16))) int16x8_t __arm_vldrbq_gather_offset(const int8_t *, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_s32))) int32x4_t __arm_vldrbq_gather_offset_s32(const int8_t *, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_s32))) int32x4_t __arm_vldrbq_gather_offset(const int8_t *, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_s8))) int8x16_t __arm_vldrbq_gather_offset_s8(const int8_t *, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_s8))) int8x16_t __arm_vldrbq_gather_offset(const int8_t *, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_u16))) uint16x8_t __arm_vldrbq_gather_offset_u16(const uint8_t *, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_u16))) uint16x8_t __arm_vldrbq_gather_offset(const uint8_t *, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_u32))) uint32x4_t __arm_vldrbq_gather_offset_u32(const uint8_t *, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_u32))) uint32x4_t __arm_vldrbq_gather_offset(const uint8_t *, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_u8))) uint8x16_t __arm_vldrbq_gather_offset_u8(const uint8_t *, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_u8))) uint8x16_t __arm_vldrbq_gather_offset(const uint8_t *, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_z_s16))) int16x8_t __arm_vldrbq_gather_offset_z_s16(const int8_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_z_s16))) int16x8_t __arm_vldrbq_gather_offset_z(const int8_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_z_s32))) int32x4_t __arm_vldrbq_gather_offset_z_s32(const int8_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_z_s32))) int32x4_t __arm_vldrbq_gather_offset_z(const int8_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_z_s8))) int8x16_t __arm_vldrbq_gather_offset_z_s8(const int8_t *, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_z_s8))) int8x16_t __arm_vldrbq_gather_offset_z(const int8_t *, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_z_u16))) uint16x8_t __arm_vldrbq_gather_offset_z_u16(const uint8_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_z_u16))) uint16x8_t __arm_vldrbq_gather_offset_z(const uint8_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_z_u32))) uint32x4_t __arm_vldrbq_gather_offset_z_u32(const uint8_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_z_u32))) uint32x4_t __arm_vldrbq_gather_offset_z(const uint8_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_z_u8))) uint8x16_t __arm_vldrbq_gather_offset_z_u8(const uint8_t *, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_z_u8))) uint8x16_t __arm_vldrbq_gather_offset_z(const uint8_t *, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_s16))) int16x8_t __arm_vldrbq_s16(const int8_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_s32))) int32x4_t __arm_vldrbq_s32(const int8_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_s8))) int8x16_t __arm_vldrbq_s8(const int8_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_u16))) uint16x8_t __arm_vldrbq_u16(const uint8_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_u32))) uint32x4_t __arm_vldrbq_u32(const uint8_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_u8))) uint8x16_t __arm_vldrbq_u8(const uint8_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_z_s16))) int16x8_t __arm_vldrbq_z_s16(const int8_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_z_s32))) int32x4_t __arm_vldrbq_z_s32(const int8_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_z_s8))) int8x16_t __arm_vldrbq_z_s8(const int8_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_z_u16))) uint16x8_t __arm_vldrbq_z_u16(const uint8_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_z_u32))) uint32x4_t __arm_vldrbq_z_u32(const uint8_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_z_u8))) uint8x16_t __arm_vldrbq_z_u8(const uint8_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_base_s64))) int64x2_t __arm_vldrdq_gather_base_s64(uint64x2_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_base_u64))) uint64x2_t __arm_vldrdq_gather_base_u64(uint64x2_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_base_wb_s64))) int64x2_t __arm_vldrdq_gather_base_wb_s64(uint64x2_t *, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_base_wb_u64))) uint64x2_t __arm_vldrdq_gather_base_wb_u64(uint64x2_t *, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_base_wb_z_s64))) int64x2_t __arm_vldrdq_gather_base_wb_z_s64(uint64x2_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_base_wb_z_u64))) uint64x2_t __arm_vldrdq_gather_base_wb_z_u64(uint64x2_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_base_z_s64))) int64x2_t __arm_vldrdq_gather_base_z_s64(uint64x2_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_base_z_u64))) uint64x2_t __arm_vldrdq_gather_base_z_u64(uint64x2_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_offset_s64))) int64x2_t __arm_vldrdq_gather_offset_s64(const int64_t *, uint64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_offset_s64))) int64x2_t __arm_vldrdq_gather_offset(const int64_t *, uint64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_offset_u64))) uint64x2_t __arm_vldrdq_gather_offset_u64(const uint64_t *, uint64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_offset_u64))) uint64x2_t __arm_vldrdq_gather_offset(const uint64_t *, uint64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_offset_z_s64))) int64x2_t __arm_vldrdq_gather_offset_z_s64(const int64_t *, uint64x2_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_offset_z_s64))) int64x2_t __arm_vldrdq_gather_offset_z(const int64_t *, uint64x2_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_offset_z_u64))) uint64x2_t __arm_vldrdq_gather_offset_z_u64(const uint64_t *, uint64x2_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_offset_z_u64))) uint64x2_t __arm_vldrdq_gather_offset_z(const uint64_t *, uint64x2_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_shifted_offset_s64))) int64x2_t __arm_vldrdq_gather_shifted_offset_s64(const int64_t *, uint64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_shifted_offset_s64))) int64x2_t __arm_vldrdq_gather_shifted_offset(const int64_t *, uint64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_shifted_offset_u64))) uint64x2_t __arm_vldrdq_gather_shifted_offset_u64(const uint64_t *, uint64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_shifted_offset_u64))) uint64x2_t __arm_vldrdq_gather_shifted_offset(const uint64_t *, uint64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_shifted_offset_z_s64))) int64x2_t __arm_vldrdq_gather_shifted_offset_z_s64(const int64_t *, uint64x2_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_shifted_offset_z_s64))) int64x2_t __arm_vldrdq_gather_shifted_offset_z(const int64_t *, uint64x2_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_shifted_offset_z_u64))) uint64x2_t __arm_vldrdq_gather_shifted_offset_z_u64(const uint64_t *, uint64x2_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_shifted_offset_z_u64))) uint64x2_t __arm_vldrdq_gather_shifted_offset_z(const uint64_t *, uint64x2_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_s16))) int16x8_t __arm_vldrhq_gather_offset_s16(const int16_t *, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_s16))) int16x8_t __arm_vldrhq_gather_offset(const int16_t *, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_s32))) int32x4_t __arm_vldrhq_gather_offset_s32(const int16_t *, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_s32))) int32x4_t __arm_vldrhq_gather_offset(const int16_t *, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_u16))) uint16x8_t __arm_vldrhq_gather_offset_u16(const uint16_t *, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_u16))) uint16x8_t __arm_vldrhq_gather_offset(const uint16_t *, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_u32))) uint32x4_t __arm_vldrhq_gather_offset_u32(const uint16_t *, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_u32))) uint32x4_t __arm_vldrhq_gather_offset(const uint16_t *, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_z_s16))) int16x8_t __arm_vldrhq_gather_offset_z_s16(const int16_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_z_s16))) int16x8_t __arm_vldrhq_gather_offset_z(const int16_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_z_s32))) int32x4_t __arm_vldrhq_gather_offset_z_s32(const int16_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_z_s32))) int32x4_t __arm_vldrhq_gather_offset_z(const int16_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_z_u16))) uint16x8_t __arm_vldrhq_gather_offset_z_u16(const uint16_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_z_u16))) uint16x8_t __arm_vldrhq_gather_offset_z(const uint16_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_z_u32))) uint32x4_t __arm_vldrhq_gather_offset_z_u32(const uint16_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_z_u32))) uint32x4_t __arm_vldrhq_gather_offset_z(const uint16_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_s16))) int16x8_t __arm_vldrhq_gather_shifted_offset_s16(const int16_t *, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_s16))) int16x8_t __arm_vldrhq_gather_shifted_offset(const int16_t *, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_s32))) int32x4_t __arm_vldrhq_gather_shifted_offset_s32(const int16_t *, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_s32))) int32x4_t __arm_vldrhq_gather_shifted_offset(const int16_t *, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_u16))) uint16x8_t __arm_vldrhq_gather_shifted_offset_u16(const uint16_t *, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_u16))) uint16x8_t __arm_vldrhq_gather_shifted_offset(const uint16_t *, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_u32))) uint32x4_t __arm_vldrhq_gather_shifted_offset_u32(const uint16_t *, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_u32))) uint32x4_t __arm_vldrhq_gather_shifted_offset(const uint16_t *, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_z_s16))) int16x8_t __arm_vldrhq_gather_shifted_offset_z_s16(const int16_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_z_s16))) int16x8_t __arm_vldrhq_gather_shifted_offset_z(const int16_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_z_s32))) int32x4_t __arm_vldrhq_gather_shifted_offset_z_s32(const int16_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_z_s32))) int32x4_t __arm_vldrhq_gather_shifted_offset_z(const int16_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_z_u16))) uint16x8_t __arm_vldrhq_gather_shifted_offset_z_u16(const uint16_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_z_u16))) uint16x8_t __arm_vldrhq_gather_shifted_offset_z(const uint16_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_z_u32))) uint32x4_t __arm_vldrhq_gather_shifted_offset_z_u32(const uint16_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_z_u32))) uint32x4_t __arm_vldrhq_gather_shifted_offset_z(const uint16_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_s16))) int16x8_t __arm_vldrhq_s16(const int16_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_s32))) int32x4_t __arm_vldrhq_s32(const int16_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_u16))) uint16x8_t __arm_vldrhq_u16(const uint16_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_u32))) uint32x4_t __arm_vldrhq_u32(const uint16_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_z_s16))) int16x8_t __arm_vldrhq_z_s16(const int16_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_z_s32))) int32x4_t __arm_vldrhq_z_s32(const int16_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_z_u16))) uint16x8_t __arm_vldrhq_z_u16(const uint16_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_z_u32))) uint32x4_t __arm_vldrhq_z_u32(const uint16_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_base_s32))) int32x4_t __arm_vldrwq_gather_base_s32(uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_base_u32))) uint32x4_t __arm_vldrwq_gather_base_u32(uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_base_wb_s32))) int32x4_t __arm_vldrwq_gather_base_wb_s32(uint32x4_t *, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_base_wb_u32))) uint32x4_t __arm_vldrwq_gather_base_wb_u32(uint32x4_t *, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_base_wb_z_s32))) int32x4_t __arm_vldrwq_gather_base_wb_z_s32(uint32x4_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_base_wb_z_u32))) uint32x4_t __arm_vldrwq_gather_base_wb_z_u32(uint32x4_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_base_z_s32))) int32x4_t __arm_vldrwq_gather_base_z_s32(uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_base_z_u32))) uint32x4_t __arm_vldrwq_gather_base_z_u32(uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_offset_s32))) int32x4_t __arm_vldrwq_gather_offset_s32(const int32_t *, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_offset_s32))) int32x4_t __arm_vldrwq_gather_offset(const int32_t *, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_offset_u32))) uint32x4_t __arm_vldrwq_gather_offset_u32(const uint32_t *, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_offset_u32))) uint32x4_t __arm_vldrwq_gather_offset(const uint32_t *, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_offset_z_s32))) int32x4_t __arm_vldrwq_gather_offset_z_s32(const int32_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_offset_z_s32))) int32x4_t __arm_vldrwq_gather_offset_z(const int32_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_offset_z_u32))) uint32x4_t __arm_vldrwq_gather_offset_z_u32(const uint32_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_offset_z_u32))) uint32x4_t __arm_vldrwq_gather_offset_z(const uint32_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_shifted_offset_s32))) int32x4_t __arm_vldrwq_gather_shifted_offset_s32(const int32_t *, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_shifted_offset_s32))) int32x4_t __arm_vldrwq_gather_shifted_offset(const int32_t *, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_shifted_offset_u32))) uint32x4_t __arm_vldrwq_gather_shifted_offset_u32(const uint32_t *, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_shifted_offset_u32))) uint32x4_t __arm_vldrwq_gather_shifted_offset(const uint32_t *, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_shifted_offset_z_s32))) int32x4_t __arm_vldrwq_gather_shifted_offset_z_s32(const int32_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_shifted_offset_z_s32))) int32x4_t __arm_vldrwq_gather_shifted_offset_z(const int32_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_shifted_offset_z_u32))) uint32x4_t __arm_vldrwq_gather_shifted_offset_z_u32(const uint32_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_shifted_offset_z_u32))) uint32x4_t __arm_vldrwq_gather_shifted_offset_z(const uint32_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_s32))) int32x4_t __arm_vldrwq_s32(const int32_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_u32))) uint32x4_t __arm_vldrwq_u32(const uint32_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_z_s32))) int32x4_t __arm_vldrwq_z_s32(const int32_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_z_u32))) uint32x4_t __arm_vldrwq_z_u32(const uint32_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxaq_m_s16))) uint16x8_t __arm_vmaxaq_m_s16(uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxaq_m_s16))) uint16x8_t __arm_vmaxaq_m(uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxaq_m_s32))) uint32x4_t __arm_vmaxaq_m_s32(uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxaq_m_s32))) uint32x4_t __arm_vmaxaq_m(uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxaq_m_s8))) uint8x16_t __arm_vmaxaq_m_s8(uint8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxaq_m_s8))) uint8x16_t __arm_vmaxaq_m(uint8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxaq_s16))) uint16x8_t __arm_vmaxaq_s16(uint16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxaq_s16))) uint16x8_t __arm_vmaxaq(uint16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxaq_s32))) uint32x4_t __arm_vmaxaq_s32(uint32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxaq_s32))) uint32x4_t __arm_vmaxaq(uint32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxaq_s8))) uint8x16_t __arm_vmaxaq_s8(uint8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxaq_s8))) uint8x16_t __arm_vmaxaq(uint8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxavq_p_s16))) uint16_t __arm_vmaxavq_p_s16(uint16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxavq_p_s16))) uint16_t __arm_vmaxavq_p(uint16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxavq_p_s32))) uint32_t __arm_vmaxavq_p_s32(uint32_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxavq_p_s32))) uint32_t __arm_vmaxavq_p(uint32_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxavq_p_s8))) uint8_t __arm_vmaxavq_p_s8(uint8_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxavq_p_s8))) uint8_t __arm_vmaxavq_p(uint8_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxavq_s16))) uint16_t __arm_vmaxavq_s16(uint16_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxavq_s16))) uint16_t __arm_vmaxavq(uint16_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxavq_s32))) uint32_t __arm_vmaxavq_s32(uint32_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxavq_s32))) uint32_t __arm_vmaxavq(uint32_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxavq_s8))) uint8_t __arm_vmaxavq_s8(uint8_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxavq_s8))) uint8_t __arm_vmaxavq(uint8_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_m_s16))) int16x8_t __arm_vmaxq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_m_s16))) int16x8_t __arm_vmaxq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_m_s32))) int32x4_t __arm_vmaxq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_m_s32))) int32x4_t __arm_vmaxq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_m_s8))) int8x16_t __arm_vmaxq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_m_s8))) int8x16_t __arm_vmaxq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_m_u16))) uint16x8_t __arm_vmaxq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_m_u16))) uint16x8_t __arm_vmaxq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_m_u32))) uint32x4_t __arm_vmaxq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_m_u32))) uint32x4_t __arm_vmaxq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_m_u8))) uint8x16_t __arm_vmaxq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_m_u8))) uint8x16_t __arm_vmaxq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_s16))) int16x8_t __arm_vmaxq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_s16))) int16x8_t __arm_vmaxq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_s32))) int32x4_t __arm_vmaxq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_s32))) int32x4_t __arm_vmaxq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_s8))) int8x16_t __arm_vmaxq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_s8))) int8x16_t __arm_vmaxq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_u16))) uint16x8_t __arm_vmaxq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_u16))) uint16x8_t __arm_vmaxq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_u32))) uint32x4_t __arm_vmaxq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_u32))) uint32x4_t __arm_vmaxq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_u8))) uint8x16_t __arm_vmaxq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_u8))) uint8x16_t __arm_vmaxq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_x_s16))) int16x8_t __arm_vmaxq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_x_s16))) int16x8_t __arm_vmaxq_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_x_s32))) int32x4_t __arm_vmaxq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_x_s32))) int32x4_t __arm_vmaxq_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_x_s8))) int8x16_t __arm_vmaxq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_x_s8))) int8x16_t __arm_vmaxq_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_x_u16))) uint16x8_t __arm_vmaxq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_x_u16))) uint16x8_t __arm_vmaxq_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_x_u32))) uint32x4_t __arm_vmaxq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_x_u32))) uint32x4_t __arm_vmaxq_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_x_u8))) uint8x16_t __arm_vmaxq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_x_u8))) uint8x16_t __arm_vmaxq_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_p_s16))) int16_t __arm_vmaxvq_p_s16(int16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_p_s16))) int16_t __arm_vmaxvq_p(int16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_p_s32))) int32_t __arm_vmaxvq_p_s32(int32_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_p_s32))) int32_t __arm_vmaxvq_p(int32_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_p_s8))) int8_t __arm_vmaxvq_p_s8(int8_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_p_s8))) int8_t __arm_vmaxvq_p(int8_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_p_u16))) uint16_t __arm_vmaxvq_p_u16(uint16_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_p_u16))) uint16_t __arm_vmaxvq_p(uint16_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_p_u32))) uint32_t __arm_vmaxvq_p_u32(uint32_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_p_u32))) uint32_t __arm_vmaxvq_p(uint32_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_p_u8))) uint8_t __arm_vmaxvq_p_u8(uint8_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_p_u8))) uint8_t __arm_vmaxvq_p(uint8_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_s16))) int16_t __arm_vmaxvq_s16(int16_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_s16))) int16_t __arm_vmaxvq(int16_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_s32))) int32_t __arm_vmaxvq_s32(int32_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_s32))) int32_t __arm_vmaxvq(int32_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_s8))) int8_t __arm_vmaxvq_s8(int8_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_s8))) int8_t __arm_vmaxvq(int8_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_u16))) uint16_t __arm_vmaxvq_u16(uint16_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_u16))) uint16_t __arm_vmaxvq(uint16_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_u32))) uint32_t __arm_vmaxvq_u32(uint32_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_u32))) uint32_t __arm_vmaxvq(uint32_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_u8))) uint8_t __arm_vmaxvq_u8(uint8_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_u8))) uint8_t __arm_vmaxvq(uint8_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminaq_m_s16))) uint16x8_t __arm_vminaq_m_s16(uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminaq_m_s16))) uint16x8_t __arm_vminaq_m(uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminaq_m_s32))) uint32x4_t __arm_vminaq_m_s32(uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminaq_m_s32))) uint32x4_t __arm_vminaq_m(uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminaq_m_s8))) uint8x16_t __arm_vminaq_m_s8(uint8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminaq_m_s8))) uint8x16_t __arm_vminaq_m(uint8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminaq_s16))) uint16x8_t __arm_vminaq_s16(uint16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminaq_s16))) uint16x8_t __arm_vminaq(uint16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminaq_s32))) uint32x4_t __arm_vminaq_s32(uint32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminaq_s32))) uint32x4_t __arm_vminaq(uint32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminaq_s8))) uint8x16_t __arm_vminaq_s8(uint8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminaq_s8))) uint8x16_t __arm_vminaq(uint8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminavq_p_s16))) uint16_t __arm_vminavq_p_s16(uint16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminavq_p_s16))) uint16_t __arm_vminavq_p(uint16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminavq_p_s32))) uint32_t __arm_vminavq_p_s32(uint32_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminavq_p_s32))) uint32_t __arm_vminavq_p(uint32_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminavq_p_s8))) uint8_t __arm_vminavq_p_s8(uint8_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminavq_p_s8))) uint8_t __arm_vminavq_p(uint8_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminavq_s16))) uint16_t __arm_vminavq_s16(uint16_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminavq_s16))) uint16_t __arm_vminavq(uint16_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminavq_s32))) uint32_t __arm_vminavq_s32(uint32_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminavq_s32))) uint32_t __arm_vminavq(uint32_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminavq_s8))) uint8_t __arm_vminavq_s8(uint8_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminavq_s8))) uint8_t __arm_vminavq(uint8_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminq_m_s16))) int16x8_t __arm_vminq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminq_m_s16))) int16x8_t __arm_vminq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminq_m_s32))) int32x4_t __arm_vminq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminq_m_s32))) int32x4_t __arm_vminq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminq_m_s8))) int8x16_t __arm_vminq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminq_m_s8))) int8x16_t __arm_vminq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminq_m_u16))) uint16x8_t __arm_vminq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminq_m_u16))) uint16x8_t __arm_vminq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminq_m_u32))) uint32x4_t __arm_vminq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminq_m_u32))) uint32x4_t __arm_vminq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminq_m_u8))) uint8x16_t __arm_vminq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminq_m_u8))) uint8x16_t __arm_vminq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminq_s16))) int16x8_t __arm_vminq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminq_s16))) int16x8_t __arm_vminq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminq_s32))) int32x4_t __arm_vminq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminq_s32))) int32x4_t __arm_vminq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminq_s8))) int8x16_t __arm_vminq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminq_s8))) int8x16_t __arm_vminq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminq_u16))) uint16x8_t __arm_vminq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminq_u16))) uint16x8_t __arm_vminq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminq_u32))) uint32x4_t __arm_vminq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminq_u32))) uint32x4_t __arm_vminq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminq_u8))) uint8x16_t __arm_vminq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminq_u8))) uint8x16_t __arm_vminq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminq_x_s16))) int16x8_t __arm_vminq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminq_x_s16))) int16x8_t __arm_vminq_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminq_x_s32))) int32x4_t __arm_vminq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminq_x_s32))) int32x4_t __arm_vminq_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminq_x_s8))) int8x16_t __arm_vminq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminq_x_s8))) int8x16_t __arm_vminq_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminq_x_u16))) uint16x8_t __arm_vminq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminq_x_u16))) uint16x8_t __arm_vminq_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminq_x_u32))) uint32x4_t __arm_vminq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminq_x_u32))) uint32x4_t __arm_vminq_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminq_x_u8))) uint8x16_t __arm_vminq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminq_x_u8))) uint8x16_t __arm_vminq_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminvq_p_s16))) int16_t __arm_vminvq_p_s16(int16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminvq_p_s16))) int16_t __arm_vminvq_p(int16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminvq_p_s32))) int32_t __arm_vminvq_p_s32(int32_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminvq_p_s32))) int32_t __arm_vminvq_p(int32_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminvq_p_s8))) int8_t __arm_vminvq_p_s8(int8_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminvq_p_s8))) int8_t __arm_vminvq_p(int8_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminvq_p_u16))) uint16_t __arm_vminvq_p_u16(uint16_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminvq_p_u16))) uint16_t __arm_vminvq_p(uint16_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminvq_p_u32))) uint32_t __arm_vminvq_p_u32(uint32_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminvq_p_u32))) uint32_t __arm_vminvq_p(uint32_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminvq_p_u8))) uint8_t __arm_vminvq_p_u8(uint8_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminvq_p_u8))) uint8_t __arm_vminvq_p(uint8_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminvq_s16))) int16_t __arm_vminvq_s16(int16_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminvq_s16))) int16_t __arm_vminvq(int16_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminvq_s32))) int32_t __arm_vminvq_s32(int32_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminvq_s32))) int32_t __arm_vminvq(int32_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminvq_s8))) int8_t __arm_vminvq_s8(int8_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminvq_s8))) int8_t __arm_vminvq(int8_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminvq_u16))) uint16_t __arm_vminvq_u16(uint16_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminvq_u16))) uint16_t __arm_vminvq(uint16_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminvq_u32))) uint32_t __arm_vminvq_u32(uint32_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminvq_u32))) uint32_t __arm_vminvq(uint32_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminvq_u8))) uint8_t __arm_vminvq_u8(uint8_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminvq_u8))) uint8_t __arm_vminvq(uint8_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_p_s16))) int32_t __arm_vmladavaq_p_s16(int32_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_p_s16))) int32_t __arm_vmladavaq_p(int32_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_p_s32))) int32_t __arm_vmladavaq_p_s32(int32_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_p_s32))) int32_t __arm_vmladavaq_p(int32_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_p_s8))) int32_t __arm_vmladavaq_p_s8(int32_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_p_s8))) int32_t __arm_vmladavaq_p(int32_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_p_u16))) uint32_t __arm_vmladavaq_p_u16(uint32_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_p_u16))) uint32_t __arm_vmladavaq_p(uint32_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_p_u32))) uint32_t __arm_vmladavaq_p_u32(uint32_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_p_u32))) uint32_t __arm_vmladavaq_p(uint32_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_p_u8))) uint32_t __arm_vmladavaq_p_u8(uint32_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_p_u8))) uint32_t __arm_vmladavaq_p(uint32_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_s16))) int32_t __arm_vmladavaq_s16(int32_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_s16))) int32_t __arm_vmladavaq(int32_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_s32))) int32_t __arm_vmladavaq_s32(int32_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_s32))) int32_t __arm_vmladavaq(int32_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_s8))) int32_t __arm_vmladavaq_s8(int32_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_s8))) int32_t __arm_vmladavaq(int32_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_u16))) uint32_t __arm_vmladavaq_u16(uint32_t, uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_u16))) uint32_t __arm_vmladavaq(uint32_t, uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_u32))) uint32_t __arm_vmladavaq_u32(uint32_t, uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_u32))) uint32_t __arm_vmladavaq(uint32_t, uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_u8))) uint32_t __arm_vmladavaq_u8(uint32_t, uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_u8))) uint32_t __arm_vmladavaq(uint32_t, uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavaxq_p_s16))) int32_t __arm_vmladavaxq_p_s16(int32_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavaxq_p_s16))) int32_t __arm_vmladavaxq_p(int32_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavaxq_p_s32))) int32_t __arm_vmladavaxq_p_s32(int32_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavaxq_p_s32))) int32_t __arm_vmladavaxq_p(int32_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavaxq_p_s8))) int32_t __arm_vmladavaxq_p_s8(int32_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavaxq_p_s8))) int32_t __arm_vmladavaxq_p(int32_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavaxq_s16))) int32_t __arm_vmladavaxq_s16(int32_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavaxq_s16))) int32_t __arm_vmladavaxq(int32_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavaxq_s32))) int32_t __arm_vmladavaxq_s32(int32_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavaxq_s32))) int32_t __arm_vmladavaxq(int32_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavaxq_s8))) int32_t __arm_vmladavaxq_s8(int32_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavaxq_s8))) int32_t __arm_vmladavaxq(int32_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_p_s16))) int32_t __arm_vmladavq_p_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_p_s16))) int32_t __arm_vmladavq_p(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_p_s32))) int32_t __arm_vmladavq_p_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_p_s32))) int32_t __arm_vmladavq_p(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_p_s8))) int32_t __arm_vmladavq_p_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_p_s8))) int32_t __arm_vmladavq_p(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_p_u16))) uint32_t __arm_vmladavq_p_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_p_u16))) uint32_t __arm_vmladavq_p(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_p_u32))) uint32_t __arm_vmladavq_p_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_p_u32))) uint32_t __arm_vmladavq_p(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_p_u8))) uint32_t __arm_vmladavq_p_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_p_u8))) uint32_t __arm_vmladavq_p(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_s16))) int32_t __arm_vmladavq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_s16))) int32_t __arm_vmladavq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_s32))) int32_t __arm_vmladavq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_s32))) int32_t __arm_vmladavq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_s8))) int32_t __arm_vmladavq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_s8))) int32_t __arm_vmladavq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_u16))) uint32_t __arm_vmladavq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_u16))) uint32_t __arm_vmladavq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_u32))) uint32_t __arm_vmladavq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_u32))) uint32_t __arm_vmladavq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_u8))) uint32_t __arm_vmladavq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_u8))) uint32_t __arm_vmladavq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavxq_p_s16))) int32_t __arm_vmladavxq_p_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavxq_p_s16))) int32_t __arm_vmladavxq_p(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavxq_p_s32))) int32_t __arm_vmladavxq_p_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavxq_p_s32))) int32_t __arm_vmladavxq_p(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavxq_p_s8))) int32_t __arm_vmladavxq_p_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavxq_p_s8))) int32_t __arm_vmladavxq_p(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavxq_s16))) int32_t __arm_vmladavxq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavxq_s16))) int32_t __arm_vmladavxq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavxq_s32))) int32_t __arm_vmladavxq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavxq_s32))) int32_t __arm_vmladavxq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavxq_s8))) int32_t __arm_vmladavxq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavxq_s8))) int32_t __arm_vmladavxq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaq_p_s16))) int64_t __arm_vmlaldavaq_p_s16(int64_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaq_p_s16))) int64_t __arm_vmlaldavaq_p(int64_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaq_p_s32))) int64_t __arm_vmlaldavaq_p_s32(int64_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaq_p_s32))) int64_t __arm_vmlaldavaq_p(int64_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaq_p_u16))) uint64_t __arm_vmlaldavaq_p_u16(uint64_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaq_p_u16))) uint64_t __arm_vmlaldavaq_p(uint64_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaq_p_u32))) uint64_t __arm_vmlaldavaq_p_u32(uint64_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaq_p_u32))) uint64_t __arm_vmlaldavaq_p(uint64_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaq_s16))) int64_t __arm_vmlaldavaq_s16(int64_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaq_s16))) int64_t __arm_vmlaldavaq(int64_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaq_s32))) int64_t __arm_vmlaldavaq_s32(int64_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaq_s32))) int64_t __arm_vmlaldavaq(int64_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaq_u16))) uint64_t __arm_vmlaldavaq_u16(uint64_t, uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaq_u16))) uint64_t __arm_vmlaldavaq(uint64_t, uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaq_u32))) uint64_t __arm_vmlaldavaq_u32(uint64_t, uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaq_u32))) uint64_t __arm_vmlaldavaq(uint64_t, uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaxq_p_s16))) int64_t __arm_vmlaldavaxq_p_s16(int64_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaxq_p_s16))) int64_t __arm_vmlaldavaxq_p(int64_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaxq_p_s32))) int64_t __arm_vmlaldavaxq_p_s32(int64_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaxq_p_s32))) int64_t __arm_vmlaldavaxq_p(int64_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaxq_s16))) int64_t __arm_vmlaldavaxq_s16(int64_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaxq_s16))) int64_t __arm_vmlaldavaxq(int64_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaxq_s32))) int64_t __arm_vmlaldavaxq_s32(int64_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaxq_s32))) int64_t __arm_vmlaldavaxq(int64_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavq_p_s16))) int64_t __arm_vmlaldavq_p_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavq_p_s16))) int64_t __arm_vmlaldavq_p(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavq_p_s32))) int64_t __arm_vmlaldavq_p_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavq_p_s32))) int64_t __arm_vmlaldavq_p(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavq_p_u16))) uint64_t __arm_vmlaldavq_p_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavq_p_u16))) uint64_t __arm_vmlaldavq_p(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavq_p_u32))) uint64_t __arm_vmlaldavq_p_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavq_p_u32))) uint64_t __arm_vmlaldavq_p(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavq_s16))) int64_t __arm_vmlaldavq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavq_s16))) int64_t __arm_vmlaldavq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavq_s32))) int64_t __arm_vmlaldavq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavq_s32))) int64_t __arm_vmlaldavq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavq_u16))) uint64_t __arm_vmlaldavq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavq_u16))) uint64_t __arm_vmlaldavq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavq_u32))) uint64_t __arm_vmlaldavq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavq_u32))) uint64_t __arm_vmlaldavq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavxq_p_s16))) int64_t __arm_vmlaldavxq_p_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavxq_p_s16))) int64_t __arm_vmlaldavxq_p(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavxq_p_s32))) int64_t __arm_vmlaldavxq_p_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavxq_p_s32))) int64_t __arm_vmlaldavxq_p(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavxq_s16))) int64_t __arm_vmlaldavxq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavxq_s16))) int64_t __arm_vmlaldavxq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavxq_s32))) int64_t __arm_vmlaldavxq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavxq_s32))) int64_t __arm_vmlaldavxq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_m_n_s16))) int16x8_t __arm_vmlaq_m_n_s16(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_m_n_s16))) int16x8_t __arm_vmlaq_m(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_m_n_s32))) int32x4_t __arm_vmlaq_m_n_s32(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_m_n_s32))) int32x4_t __arm_vmlaq_m(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_m_n_s8))) int8x16_t __arm_vmlaq_m_n_s8(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_m_n_s8))) int8x16_t __arm_vmlaq_m(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_m_n_u16))) uint16x8_t __arm_vmlaq_m_n_u16(uint16x8_t, uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_m_n_u16))) uint16x8_t __arm_vmlaq_m(uint16x8_t, uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_m_n_u32))) uint32x4_t __arm_vmlaq_m_n_u32(uint32x4_t, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_m_n_u32))) uint32x4_t __arm_vmlaq_m(uint32x4_t, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_m_n_u8))) uint8x16_t __arm_vmlaq_m_n_u8(uint8x16_t, uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_m_n_u8))) uint8x16_t __arm_vmlaq_m(uint8x16_t, uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_n_s16))) int16x8_t __arm_vmlaq_n_s16(int16x8_t, int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_n_s16))) int16x8_t __arm_vmlaq(int16x8_t, int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_n_s32))) int32x4_t __arm_vmlaq_n_s32(int32x4_t, int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_n_s32))) int32x4_t __arm_vmlaq(int32x4_t, int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_n_s8))) int8x16_t __arm_vmlaq_n_s8(int8x16_t, int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_n_s8))) int8x16_t __arm_vmlaq(int8x16_t, int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_n_u16))) uint16x8_t __arm_vmlaq_n_u16(uint16x8_t, uint16x8_t, uint16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_n_u16))) uint16x8_t __arm_vmlaq(uint16x8_t, uint16x8_t, uint16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_n_u32))) uint32x4_t __arm_vmlaq_n_u32(uint32x4_t, uint32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_n_u32))) uint32x4_t __arm_vmlaq(uint32x4_t, uint32x4_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_n_u8))) uint8x16_t __arm_vmlaq_n_u8(uint8x16_t, uint8x16_t, uint8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_n_u8))) uint8x16_t __arm_vmlaq(uint8x16_t, uint8x16_t, uint8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_m_n_s16))) int16x8_t __arm_vmlasq_m_n_s16(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_m_n_s16))) int16x8_t __arm_vmlasq_m(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_m_n_s32))) int32x4_t __arm_vmlasq_m_n_s32(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_m_n_s32))) int32x4_t __arm_vmlasq_m(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_m_n_s8))) int8x16_t __arm_vmlasq_m_n_s8(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_m_n_s8))) int8x16_t __arm_vmlasq_m(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_m_n_u16))) uint16x8_t __arm_vmlasq_m_n_u16(uint16x8_t, uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_m_n_u16))) uint16x8_t __arm_vmlasq_m(uint16x8_t, uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_m_n_u32))) uint32x4_t __arm_vmlasq_m_n_u32(uint32x4_t, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_m_n_u32))) uint32x4_t __arm_vmlasq_m(uint32x4_t, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_m_n_u8))) uint8x16_t __arm_vmlasq_m_n_u8(uint8x16_t, uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_m_n_u8))) uint8x16_t __arm_vmlasq_m(uint8x16_t, uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_n_s16))) int16x8_t __arm_vmlasq_n_s16(int16x8_t, int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_n_s16))) int16x8_t __arm_vmlasq(int16x8_t, int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_n_s32))) int32x4_t __arm_vmlasq_n_s32(int32x4_t, int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_n_s32))) int32x4_t __arm_vmlasq(int32x4_t, int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_n_s8))) int8x16_t __arm_vmlasq_n_s8(int8x16_t, int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_n_s8))) int8x16_t __arm_vmlasq(int8x16_t, int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_n_u16))) uint16x8_t __arm_vmlasq_n_u16(uint16x8_t, uint16x8_t, uint16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_n_u16))) uint16x8_t __arm_vmlasq(uint16x8_t, uint16x8_t, uint16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_n_u32))) uint32x4_t __arm_vmlasq_n_u32(uint32x4_t, uint32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_n_u32))) uint32x4_t __arm_vmlasq(uint32x4_t, uint32x4_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_n_u8))) uint8x16_t __arm_vmlasq_n_u8(uint8x16_t, uint8x16_t, uint8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_n_u8))) uint8x16_t __arm_vmlasq(uint8x16_t, uint8x16_t, uint8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaq_p_s16))) int32_t __arm_vmlsdavaq_p_s16(int32_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaq_p_s16))) int32_t __arm_vmlsdavaq_p(int32_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaq_p_s32))) int32_t __arm_vmlsdavaq_p_s32(int32_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaq_p_s32))) int32_t __arm_vmlsdavaq_p(int32_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaq_p_s8))) int32_t __arm_vmlsdavaq_p_s8(int32_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaq_p_s8))) int32_t __arm_vmlsdavaq_p(int32_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaq_s16))) int32_t __arm_vmlsdavaq_s16(int32_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaq_s16))) int32_t __arm_vmlsdavaq(int32_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaq_s32))) int32_t __arm_vmlsdavaq_s32(int32_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaq_s32))) int32_t __arm_vmlsdavaq(int32_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaq_s8))) int32_t __arm_vmlsdavaq_s8(int32_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaq_s8))) int32_t __arm_vmlsdavaq(int32_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaxq_p_s16))) int32_t __arm_vmlsdavaxq_p_s16(int32_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaxq_p_s16))) int32_t __arm_vmlsdavaxq_p(int32_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaxq_p_s32))) int32_t __arm_vmlsdavaxq_p_s32(int32_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaxq_p_s32))) int32_t __arm_vmlsdavaxq_p(int32_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaxq_p_s8))) int32_t __arm_vmlsdavaxq_p_s8(int32_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaxq_p_s8))) int32_t __arm_vmlsdavaxq_p(int32_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaxq_s16))) int32_t __arm_vmlsdavaxq_s16(int32_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaxq_s16))) int32_t __arm_vmlsdavaxq(int32_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaxq_s32))) int32_t __arm_vmlsdavaxq_s32(int32_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaxq_s32))) int32_t __arm_vmlsdavaxq(int32_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaxq_s8))) int32_t __arm_vmlsdavaxq_s8(int32_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaxq_s8))) int32_t __arm_vmlsdavaxq(int32_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavq_p_s16))) int32_t __arm_vmlsdavq_p_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavq_p_s16))) int32_t __arm_vmlsdavq_p(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavq_p_s32))) int32_t __arm_vmlsdavq_p_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavq_p_s32))) int32_t __arm_vmlsdavq_p(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavq_p_s8))) int32_t __arm_vmlsdavq_p_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavq_p_s8))) int32_t __arm_vmlsdavq_p(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavq_s16))) int32_t __arm_vmlsdavq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavq_s16))) int32_t __arm_vmlsdavq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavq_s32))) int32_t __arm_vmlsdavq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavq_s32))) int32_t __arm_vmlsdavq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavq_s8))) int32_t __arm_vmlsdavq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavq_s8))) int32_t __arm_vmlsdavq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavxq_p_s16))) int32_t __arm_vmlsdavxq_p_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavxq_p_s16))) int32_t __arm_vmlsdavxq_p(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavxq_p_s32))) int32_t __arm_vmlsdavxq_p_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavxq_p_s32))) int32_t __arm_vmlsdavxq_p(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavxq_p_s8))) int32_t __arm_vmlsdavxq_p_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavxq_p_s8))) int32_t __arm_vmlsdavxq_p(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavxq_s16))) int32_t __arm_vmlsdavxq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavxq_s16))) int32_t __arm_vmlsdavxq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavxq_s32))) int32_t __arm_vmlsdavxq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavxq_s32))) int32_t __arm_vmlsdavxq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavxq_s8))) int32_t __arm_vmlsdavxq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavxq_s8))) int32_t __arm_vmlsdavxq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavaq_p_s16))) int64_t __arm_vmlsldavaq_p_s16(int64_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavaq_p_s16))) int64_t __arm_vmlsldavaq_p(int64_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavaq_p_s32))) int64_t __arm_vmlsldavaq_p_s32(int64_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavaq_p_s32))) int64_t __arm_vmlsldavaq_p(int64_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavaq_s16))) int64_t __arm_vmlsldavaq_s16(int64_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavaq_s16))) int64_t __arm_vmlsldavaq(int64_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavaq_s32))) int64_t __arm_vmlsldavaq_s32(int64_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavaq_s32))) int64_t __arm_vmlsldavaq(int64_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavaxq_p_s16))) int64_t __arm_vmlsldavaxq_p_s16(int64_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavaxq_p_s16))) int64_t __arm_vmlsldavaxq_p(int64_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavaxq_p_s32))) int64_t __arm_vmlsldavaxq_p_s32(int64_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavaxq_p_s32))) int64_t __arm_vmlsldavaxq_p(int64_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavaxq_s16))) int64_t __arm_vmlsldavaxq_s16(int64_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavaxq_s16))) int64_t __arm_vmlsldavaxq(int64_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavaxq_s32))) int64_t __arm_vmlsldavaxq_s32(int64_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavaxq_s32))) int64_t __arm_vmlsldavaxq(int64_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavq_p_s16))) int64_t __arm_vmlsldavq_p_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavq_p_s16))) int64_t __arm_vmlsldavq_p(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavq_p_s32))) int64_t __arm_vmlsldavq_p_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavq_p_s32))) int64_t __arm_vmlsldavq_p(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavq_s16))) int64_t __arm_vmlsldavq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavq_s16))) int64_t __arm_vmlsldavq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavq_s32))) int64_t __arm_vmlsldavq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavq_s32))) int64_t __arm_vmlsldavq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavxq_p_s16))) int64_t __arm_vmlsldavxq_p_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavxq_p_s16))) int64_t __arm_vmlsldavxq_p(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavxq_p_s32))) int64_t __arm_vmlsldavxq_p_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavxq_p_s32))) int64_t __arm_vmlsldavxq_p(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavxq_s16))) int64_t __arm_vmlsldavxq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavxq_s16))) int64_t __arm_vmlsldavxq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavxq_s32))) int64_t __arm_vmlsldavxq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavxq_s32))) int64_t __arm_vmlsldavxq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_m_s16))) int32x4_t __arm_vmovlbq_m_s16(int32x4_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_m_s16))) int32x4_t __arm_vmovlbq_m(int32x4_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_m_s8))) int16x8_t __arm_vmovlbq_m_s8(int16x8_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_m_s8))) int16x8_t __arm_vmovlbq_m(int16x8_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_m_u16))) uint32x4_t __arm_vmovlbq_m_u16(uint32x4_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_m_u16))) uint32x4_t __arm_vmovlbq_m(uint32x4_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_m_u8))) uint16x8_t __arm_vmovlbq_m_u8(uint16x8_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_m_u8))) uint16x8_t __arm_vmovlbq_m(uint16x8_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_s16))) int32x4_t __arm_vmovlbq_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_s16))) int32x4_t __arm_vmovlbq(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_s8))) int16x8_t __arm_vmovlbq_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_s8))) int16x8_t __arm_vmovlbq(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_u16))) uint32x4_t __arm_vmovlbq_u16(uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_u16))) uint32x4_t __arm_vmovlbq(uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_u8))) uint16x8_t __arm_vmovlbq_u8(uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_u8))) uint16x8_t __arm_vmovlbq(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_x_s16))) int32x4_t __arm_vmovlbq_x_s16(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_x_s16))) int32x4_t __arm_vmovlbq_x(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_x_s8))) int16x8_t __arm_vmovlbq_x_s8(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_x_s8))) int16x8_t __arm_vmovlbq_x(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_x_u16))) uint32x4_t __arm_vmovlbq_x_u16(uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_x_u16))) uint32x4_t __arm_vmovlbq_x(uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_x_u8))) uint16x8_t __arm_vmovlbq_x_u8(uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_x_u8))) uint16x8_t __arm_vmovlbq_x(uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_m_s16))) int32x4_t __arm_vmovltq_m_s16(int32x4_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_m_s16))) int32x4_t __arm_vmovltq_m(int32x4_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_m_s8))) int16x8_t __arm_vmovltq_m_s8(int16x8_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_m_s8))) int16x8_t __arm_vmovltq_m(int16x8_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_m_u16))) uint32x4_t __arm_vmovltq_m_u16(uint32x4_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_m_u16))) uint32x4_t __arm_vmovltq_m(uint32x4_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_m_u8))) uint16x8_t __arm_vmovltq_m_u8(uint16x8_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_m_u8))) uint16x8_t __arm_vmovltq_m(uint16x8_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_s16))) int32x4_t __arm_vmovltq_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_s16))) int32x4_t __arm_vmovltq(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_s8))) int16x8_t __arm_vmovltq_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_s8))) int16x8_t __arm_vmovltq(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_u16))) uint32x4_t __arm_vmovltq_u16(uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_u16))) uint32x4_t __arm_vmovltq(uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_u8))) uint16x8_t __arm_vmovltq_u8(uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_u8))) uint16x8_t __arm_vmovltq(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_x_s16))) int32x4_t __arm_vmovltq_x_s16(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_x_s16))) int32x4_t __arm_vmovltq_x(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_x_s8))) int16x8_t __arm_vmovltq_x_s8(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_x_s8))) int16x8_t __arm_vmovltq_x(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_x_u16))) uint32x4_t __arm_vmovltq_x_u16(uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_x_u16))) uint32x4_t __arm_vmovltq_x(uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_x_u8))) uint16x8_t __arm_vmovltq_x_u8(uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_x_u8))) uint16x8_t __arm_vmovltq_x(uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovnbq_m_s16))) int8x16_t __arm_vmovnbq_m_s16(int8x16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovnbq_m_s16))) int8x16_t __arm_vmovnbq_m(int8x16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovnbq_m_s32))) int16x8_t __arm_vmovnbq_m_s32(int16x8_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovnbq_m_s32))) int16x8_t __arm_vmovnbq_m(int16x8_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovnbq_m_u16))) uint8x16_t __arm_vmovnbq_m_u16(uint8x16_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovnbq_m_u16))) uint8x16_t __arm_vmovnbq_m(uint8x16_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovnbq_m_u32))) uint16x8_t __arm_vmovnbq_m_u32(uint16x8_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovnbq_m_u32))) uint16x8_t __arm_vmovnbq_m(uint16x8_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovnbq_s16))) int8x16_t __arm_vmovnbq_s16(int8x16_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovnbq_s16))) int8x16_t __arm_vmovnbq(int8x16_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovnbq_s32))) int16x8_t __arm_vmovnbq_s32(int16x8_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovnbq_s32))) int16x8_t __arm_vmovnbq(int16x8_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovnbq_u16))) uint8x16_t __arm_vmovnbq_u16(uint8x16_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovnbq_u16))) uint8x16_t __arm_vmovnbq(uint8x16_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovnbq_u32))) uint16x8_t __arm_vmovnbq_u32(uint16x8_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovnbq_u32))) uint16x8_t __arm_vmovnbq(uint16x8_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovntq_m_s16))) int8x16_t __arm_vmovntq_m_s16(int8x16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovntq_m_s16))) int8x16_t __arm_vmovntq_m(int8x16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovntq_m_s32))) int16x8_t __arm_vmovntq_m_s32(int16x8_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovntq_m_s32))) int16x8_t __arm_vmovntq_m(int16x8_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovntq_m_u16))) uint8x16_t __arm_vmovntq_m_u16(uint8x16_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovntq_m_u16))) uint8x16_t __arm_vmovntq_m(uint8x16_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovntq_m_u32))) uint16x8_t __arm_vmovntq_m_u32(uint16x8_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovntq_m_u32))) uint16x8_t __arm_vmovntq_m(uint16x8_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovntq_s16))) int8x16_t __arm_vmovntq_s16(int8x16_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovntq_s16))) int8x16_t __arm_vmovntq(int8x16_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovntq_s32))) int16x8_t __arm_vmovntq_s32(int16x8_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovntq_s32))) int16x8_t __arm_vmovntq(int16x8_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovntq_u16))) uint8x16_t __arm_vmovntq_u16(uint8x16_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovntq_u16))) uint8x16_t __arm_vmovntq(uint8x16_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovntq_u32))) uint16x8_t __arm_vmovntq_u32(uint16x8_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovntq_u32))) uint16x8_t __arm_vmovntq(uint16x8_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_m_s16))) int16x8_t __arm_vmulhq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_m_s16))) int16x8_t __arm_vmulhq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_m_s32))) int32x4_t __arm_vmulhq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_m_s32))) int32x4_t __arm_vmulhq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_m_s8))) int8x16_t __arm_vmulhq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_m_s8))) int8x16_t __arm_vmulhq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_m_u16))) uint16x8_t __arm_vmulhq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_m_u16))) uint16x8_t __arm_vmulhq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_m_u32))) uint32x4_t __arm_vmulhq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_m_u32))) uint32x4_t __arm_vmulhq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_m_u8))) uint8x16_t __arm_vmulhq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_m_u8))) uint8x16_t __arm_vmulhq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_s16))) int16x8_t __arm_vmulhq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_s16))) int16x8_t __arm_vmulhq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_s32))) int32x4_t __arm_vmulhq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_s32))) int32x4_t __arm_vmulhq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_s8))) int8x16_t __arm_vmulhq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_s8))) int8x16_t __arm_vmulhq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_u16))) uint16x8_t __arm_vmulhq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_u16))) uint16x8_t __arm_vmulhq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_u32))) uint32x4_t __arm_vmulhq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_u32))) uint32x4_t __arm_vmulhq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_u8))) uint8x16_t __arm_vmulhq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_u8))) uint8x16_t __arm_vmulhq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_x_s16))) int16x8_t __arm_vmulhq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_x_s16))) int16x8_t __arm_vmulhq_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_x_s32))) int32x4_t __arm_vmulhq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_x_s32))) int32x4_t __arm_vmulhq_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_x_s8))) int8x16_t __arm_vmulhq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_x_s8))) int8x16_t __arm_vmulhq_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_x_u16))) uint16x8_t __arm_vmulhq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_x_u16))) uint16x8_t __arm_vmulhq_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_x_u32))) uint32x4_t __arm_vmulhq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_x_u32))) uint32x4_t __arm_vmulhq_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_x_u8))) uint8x16_t __arm_vmulhq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_x_u8))) uint8x16_t __arm_vmulhq_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_m_s16))) int32x4_t __arm_vmullbq_int_m_s16(int32x4_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_m_s16))) int32x4_t __arm_vmullbq_int_m(int32x4_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_m_s32))) int64x2_t __arm_vmullbq_int_m_s32(int64x2_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_m_s32))) int64x2_t __arm_vmullbq_int_m(int64x2_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_m_s8))) int16x8_t __arm_vmullbq_int_m_s8(int16x8_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_m_s8))) int16x8_t __arm_vmullbq_int_m(int16x8_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_m_u16))) uint32x4_t __arm_vmullbq_int_m_u16(uint32x4_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_m_u16))) uint32x4_t __arm_vmullbq_int_m(uint32x4_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_m_u32))) uint64x2_t __arm_vmullbq_int_m_u32(uint64x2_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_m_u32))) uint64x2_t __arm_vmullbq_int_m(uint64x2_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_m_u8))) uint16x8_t __arm_vmullbq_int_m_u8(uint16x8_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_m_u8))) uint16x8_t __arm_vmullbq_int_m(uint16x8_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_s16))) int32x4_t __arm_vmullbq_int_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_s16))) int32x4_t __arm_vmullbq_int(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_s32))) int64x2_t __arm_vmullbq_int_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_s32))) int64x2_t __arm_vmullbq_int(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_s8))) int16x8_t __arm_vmullbq_int_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_s8))) int16x8_t __arm_vmullbq_int(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_u16))) uint32x4_t __arm_vmullbq_int_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_u16))) uint32x4_t __arm_vmullbq_int(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_u32))) uint64x2_t __arm_vmullbq_int_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_u32))) uint64x2_t __arm_vmullbq_int(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_u8))) uint16x8_t __arm_vmullbq_int_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_u8))) uint16x8_t __arm_vmullbq_int(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_x_s16))) int32x4_t __arm_vmullbq_int_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_x_s16))) int32x4_t __arm_vmullbq_int_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_x_s32))) int64x2_t __arm_vmullbq_int_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_x_s32))) int64x2_t __arm_vmullbq_int_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_x_s8))) int16x8_t __arm_vmullbq_int_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_x_s8))) int16x8_t __arm_vmullbq_int_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_x_u16))) uint32x4_t __arm_vmullbq_int_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_x_u16))) uint32x4_t __arm_vmullbq_int_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_x_u32))) uint64x2_t __arm_vmullbq_int_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_x_u32))) uint64x2_t __arm_vmullbq_int_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_x_u8))) uint16x8_t __arm_vmullbq_int_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_x_u8))) uint16x8_t __arm_vmullbq_int_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_poly_m_p16))) uint32x4_t __arm_vmullbq_poly_m_p16(uint32x4_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_poly_m_p16))) uint32x4_t __arm_vmullbq_poly_m(uint32x4_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_poly_m_p8))) uint16x8_t __arm_vmullbq_poly_m_p8(uint16x8_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_poly_m_p8))) uint16x8_t __arm_vmullbq_poly_m(uint16x8_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_poly_p16))) uint32x4_t __arm_vmullbq_poly_p16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_poly_p16))) uint32x4_t __arm_vmullbq_poly(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_poly_p8))) uint16x8_t __arm_vmullbq_poly_p8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_poly_p8))) uint16x8_t __arm_vmullbq_poly(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_poly_x_p16))) uint32x4_t __arm_vmullbq_poly_x_p16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_poly_x_p16))) uint32x4_t __arm_vmullbq_poly_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_poly_x_p8))) uint16x8_t __arm_vmullbq_poly_x_p8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_poly_x_p8))) uint16x8_t __arm_vmullbq_poly_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_m_s16))) int32x4_t __arm_vmulltq_int_m_s16(int32x4_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_m_s16))) int32x4_t __arm_vmulltq_int_m(int32x4_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_m_s32))) int64x2_t __arm_vmulltq_int_m_s32(int64x2_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_m_s32))) int64x2_t __arm_vmulltq_int_m(int64x2_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_m_s8))) int16x8_t __arm_vmulltq_int_m_s8(int16x8_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_m_s8))) int16x8_t __arm_vmulltq_int_m(int16x8_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_m_u16))) uint32x4_t __arm_vmulltq_int_m_u16(uint32x4_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_m_u16))) uint32x4_t __arm_vmulltq_int_m(uint32x4_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_m_u32))) uint64x2_t __arm_vmulltq_int_m_u32(uint64x2_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_m_u32))) uint64x2_t __arm_vmulltq_int_m(uint64x2_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_m_u8))) uint16x8_t __arm_vmulltq_int_m_u8(uint16x8_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_m_u8))) uint16x8_t __arm_vmulltq_int_m(uint16x8_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_s16))) int32x4_t __arm_vmulltq_int_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_s16))) int32x4_t __arm_vmulltq_int(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_s32))) int64x2_t __arm_vmulltq_int_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_s32))) int64x2_t __arm_vmulltq_int(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_s8))) int16x8_t __arm_vmulltq_int_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_s8))) int16x8_t __arm_vmulltq_int(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_u16))) uint32x4_t __arm_vmulltq_int_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_u16))) uint32x4_t __arm_vmulltq_int(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_u32))) uint64x2_t __arm_vmulltq_int_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_u32))) uint64x2_t __arm_vmulltq_int(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_u8))) uint16x8_t __arm_vmulltq_int_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_u8))) uint16x8_t __arm_vmulltq_int(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_x_s16))) int32x4_t __arm_vmulltq_int_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_x_s16))) int32x4_t __arm_vmulltq_int_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_x_s32))) int64x2_t __arm_vmulltq_int_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_x_s32))) int64x2_t __arm_vmulltq_int_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_x_s8))) int16x8_t __arm_vmulltq_int_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_x_s8))) int16x8_t __arm_vmulltq_int_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_x_u16))) uint32x4_t __arm_vmulltq_int_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_x_u16))) uint32x4_t __arm_vmulltq_int_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_x_u32))) uint64x2_t __arm_vmulltq_int_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_x_u32))) uint64x2_t __arm_vmulltq_int_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_x_u8))) uint16x8_t __arm_vmulltq_int_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_x_u8))) uint16x8_t __arm_vmulltq_int_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_poly_m_p16))) uint32x4_t __arm_vmulltq_poly_m_p16(uint32x4_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_poly_m_p16))) uint32x4_t __arm_vmulltq_poly_m(uint32x4_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_poly_m_p8))) uint16x8_t __arm_vmulltq_poly_m_p8(uint16x8_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_poly_m_p8))) uint16x8_t __arm_vmulltq_poly_m(uint16x8_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_poly_p16))) uint32x4_t __arm_vmulltq_poly_p16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_poly_p16))) uint32x4_t __arm_vmulltq_poly(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_poly_p8))) uint16x8_t __arm_vmulltq_poly_p8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_poly_p8))) uint16x8_t __arm_vmulltq_poly(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_poly_x_p16))) uint32x4_t __arm_vmulltq_poly_x_p16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_poly_x_p16))) uint32x4_t __arm_vmulltq_poly_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_poly_x_p8))) uint16x8_t __arm_vmulltq_poly_x_p8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_poly_x_p8))) uint16x8_t __arm_vmulltq_poly_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_n_s16))) int16x8_t __arm_vmulq_m_n_s16(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_n_s16))) int16x8_t __arm_vmulq_m(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_n_s32))) int32x4_t __arm_vmulq_m_n_s32(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_n_s32))) int32x4_t __arm_vmulq_m(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_n_s8))) int8x16_t __arm_vmulq_m_n_s8(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_n_s8))) int8x16_t __arm_vmulq_m(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_n_u16))) uint16x8_t __arm_vmulq_m_n_u16(uint16x8_t, uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_n_u16))) uint16x8_t __arm_vmulq_m(uint16x8_t, uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_n_u32))) uint32x4_t __arm_vmulq_m_n_u32(uint32x4_t, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_n_u32))) uint32x4_t __arm_vmulq_m(uint32x4_t, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_n_u8))) uint8x16_t __arm_vmulq_m_n_u8(uint8x16_t, uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_n_u8))) uint8x16_t __arm_vmulq_m(uint8x16_t, uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_s16))) int16x8_t __arm_vmulq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_s16))) int16x8_t __arm_vmulq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_s32))) int32x4_t __arm_vmulq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_s32))) int32x4_t __arm_vmulq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_s8))) int8x16_t __arm_vmulq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_s8))) int8x16_t __arm_vmulq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_u16))) uint16x8_t __arm_vmulq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_u16))) uint16x8_t __arm_vmulq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_u32))) uint32x4_t __arm_vmulq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_u32))) uint32x4_t __arm_vmulq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_u8))) uint8x16_t __arm_vmulq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_u8))) uint8x16_t __arm_vmulq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_n_s16))) int16x8_t __arm_vmulq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_n_s16))) int16x8_t __arm_vmulq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_n_s32))) int32x4_t __arm_vmulq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_n_s32))) int32x4_t __arm_vmulq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_n_s8))) int8x16_t __arm_vmulq_n_s8(int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_n_s8))) int8x16_t __arm_vmulq(int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_n_u16))) uint16x8_t __arm_vmulq_n_u16(uint16x8_t, uint16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_n_u16))) uint16x8_t __arm_vmulq(uint16x8_t, uint16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_n_u32))) uint32x4_t __arm_vmulq_n_u32(uint32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_n_u32))) uint32x4_t __arm_vmulq(uint32x4_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_n_u8))) uint8x16_t __arm_vmulq_n_u8(uint8x16_t, uint8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_n_u8))) uint8x16_t __arm_vmulq(uint8x16_t, uint8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_s16))) int16x8_t __arm_vmulq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_s16))) int16x8_t __arm_vmulq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_s32))) int32x4_t __arm_vmulq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_s32))) int32x4_t __arm_vmulq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_s8))) int8x16_t __arm_vmulq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_s8))) int8x16_t __arm_vmulq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_u16))) uint16x8_t __arm_vmulq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_u16))) uint16x8_t __arm_vmulq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_u32))) uint32x4_t __arm_vmulq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_u32))) uint32x4_t __arm_vmulq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_u8))) uint8x16_t __arm_vmulq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_u8))) uint8x16_t __arm_vmulq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_n_s16))) int16x8_t __arm_vmulq_x_n_s16(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_n_s16))) int16x8_t __arm_vmulq_x(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_n_s32))) int32x4_t __arm_vmulq_x_n_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_n_s32))) int32x4_t __arm_vmulq_x(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_n_s8))) int8x16_t __arm_vmulq_x_n_s8(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_n_s8))) int8x16_t __arm_vmulq_x(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_n_u16))) uint16x8_t __arm_vmulq_x_n_u16(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_n_u16))) uint16x8_t __arm_vmulq_x(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_n_u32))) uint32x4_t __arm_vmulq_x_n_u32(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_n_u32))) uint32x4_t __arm_vmulq_x(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_n_u8))) uint8x16_t __arm_vmulq_x_n_u8(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_n_u8))) uint8x16_t __arm_vmulq_x(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_s16))) int16x8_t __arm_vmulq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_s16))) int16x8_t __arm_vmulq_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_s32))) int32x4_t __arm_vmulq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_s32))) int32x4_t __arm_vmulq_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_s8))) int8x16_t __arm_vmulq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_s8))) int8x16_t __arm_vmulq_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_u16))) uint16x8_t __arm_vmulq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_u16))) uint16x8_t __arm_vmulq_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_u32))) uint32x4_t __arm_vmulq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_u32))) uint32x4_t __arm_vmulq_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_u8))) uint8x16_t __arm_vmulq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_u8))) uint8x16_t __arm_vmulq_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_n_s16))) int16x8_t __arm_vmvnq_m_n_s16(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_n_s16))) int16x8_t __arm_vmvnq_m(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_n_s32))) int32x4_t __arm_vmvnq_m_n_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_n_s32))) int32x4_t __arm_vmvnq_m(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_n_u16))) uint16x8_t __arm_vmvnq_m_n_u16(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_n_u16))) uint16x8_t __arm_vmvnq_m(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_n_u32))) uint32x4_t __arm_vmvnq_m_n_u32(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_n_u32))) uint32x4_t __arm_vmvnq_m(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_s16))) int16x8_t __arm_vmvnq_m_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_s16))) int16x8_t __arm_vmvnq_m(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_s32))) int32x4_t __arm_vmvnq_m_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_s32))) int32x4_t __arm_vmvnq_m(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_s8))) int8x16_t __arm_vmvnq_m_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_s8))) int8x16_t __arm_vmvnq_m(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_u16))) uint16x8_t __arm_vmvnq_m_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_u16))) uint16x8_t __arm_vmvnq_m(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_u32))) uint32x4_t __arm_vmvnq_m_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_u32))) uint32x4_t __arm_vmvnq_m(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_u8))) uint8x16_t __arm_vmvnq_m_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_u8))) uint8x16_t __arm_vmvnq_m(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_n_s16))) int16x8_t __arm_vmvnq_n_s16(int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_n_s32))) int32x4_t __arm_vmvnq_n_s32(int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_n_u16))) uint16x8_t __arm_vmvnq_n_u16(uint16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_n_u32))) uint32x4_t __arm_vmvnq_n_u32(uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_s16))) int16x8_t __arm_vmvnq_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_s16))) int16x8_t __arm_vmvnq(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_s32))) int32x4_t __arm_vmvnq_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_s32))) int32x4_t __arm_vmvnq(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_s8))) int8x16_t __arm_vmvnq_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_s8))) int8x16_t __arm_vmvnq(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_u16))) uint16x8_t __arm_vmvnq_u16(uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_u16))) uint16x8_t __arm_vmvnq(uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_u32))) uint32x4_t __arm_vmvnq_u32(uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_u32))) uint32x4_t __arm_vmvnq(uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_u8))) uint8x16_t __arm_vmvnq_u8(uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_u8))) uint8x16_t __arm_vmvnq(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_x_n_s16))) int16x8_t __arm_vmvnq_x_n_s16(int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_x_n_s32))) int32x4_t __arm_vmvnq_x_n_s32(int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_x_n_u16))) uint16x8_t __arm_vmvnq_x_n_u16(uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_x_n_u32))) uint32x4_t __arm_vmvnq_x_n_u32(uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_x_s16))) int16x8_t __arm_vmvnq_x_s16(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_x_s16))) int16x8_t __arm_vmvnq_x(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_x_s32))) int32x4_t __arm_vmvnq_x_s32(int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_x_s32))) int32x4_t __arm_vmvnq_x(int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_x_s8))) int8x16_t __arm_vmvnq_x_s8(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_x_s8))) int8x16_t __arm_vmvnq_x(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_x_u16))) uint16x8_t __arm_vmvnq_x_u16(uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_x_u16))) uint16x8_t __arm_vmvnq_x(uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_x_u32))) uint32x4_t __arm_vmvnq_x_u32(uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_x_u32))) uint32x4_t __arm_vmvnq_x(uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_x_u8))) uint8x16_t __arm_vmvnq_x_u8(uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_x_u8))) uint8x16_t __arm_vmvnq_x(uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vnegq_m_s16))) int16x8_t __arm_vnegq_m_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vnegq_m_s16))) int16x8_t __arm_vnegq_m(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vnegq_m_s32))) int32x4_t __arm_vnegq_m_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vnegq_m_s32))) int32x4_t __arm_vnegq_m(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vnegq_m_s8))) int8x16_t __arm_vnegq_m_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vnegq_m_s8))) int8x16_t __arm_vnegq_m(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vnegq_s16))) int16x8_t __arm_vnegq_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vnegq_s16))) int16x8_t __arm_vnegq(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vnegq_s32))) int32x4_t __arm_vnegq_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vnegq_s32))) int32x4_t __arm_vnegq(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vnegq_s8))) int8x16_t __arm_vnegq_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vnegq_s8))) int8x16_t __arm_vnegq(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vnegq_x_s16))) int16x8_t __arm_vnegq_x_s16(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vnegq_x_s16))) int16x8_t __arm_vnegq_x(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vnegq_x_s32))) int32x4_t __arm_vnegq_x_s32(int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vnegq_x_s32))) int32x4_t __arm_vnegq_x(int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vnegq_x_s8))) int8x16_t __arm_vnegq_x_s8(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vnegq_x_s8))) int8x16_t __arm_vnegq_x(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_m_s16))) int16x8_t __arm_vornq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_m_s16))) int16x8_t __arm_vornq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_m_s32))) int32x4_t __arm_vornq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_m_s32))) int32x4_t __arm_vornq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_m_s8))) int8x16_t __arm_vornq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_m_s8))) int8x16_t __arm_vornq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_m_u16))) uint16x8_t __arm_vornq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_m_u16))) uint16x8_t __arm_vornq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_m_u32))) uint32x4_t __arm_vornq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_m_u32))) uint32x4_t __arm_vornq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_m_u8))) uint8x16_t __arm_vornq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_m_u8))) uint8x16_t __arm_vornq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_s16))) int16x8_t __arm_vornq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_s16))) int16x8_t __arm_vornq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_s32))) int32x4_t __arm_vornq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_s32))) int32x4_t __arm_vornq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_s8))) int8x16_t __arm_vornq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_s8))) int8x16_t __arm_vornq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_u16))) uint16x8_t __arm_vornq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_u16))) uint16x8_t __arm_vornq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_u32))) uint32x4_t __arm_vornq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_u32))) uint32x4_t __arm_vornq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_u8))) uint8x16_t __arm_vornq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_u8))) uint8x16_t __arm_vornq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_x_s16))) int16x8_t __arm_vornq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_x_s16))) int16x8_t __arm_vornq_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_x_s32))) int32x4_t __arm_vornq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_x_s32))) int32x4_t __arm_vornq_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_x_s8))) int8x16_t __arm_vornq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_x_s8))) int8x16_t __arm_vornq_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_x_u16))) uint16x8_t __arm_vornq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_x_u16))) uint16x8_t __arm_vornq_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_x_u32))) uint32x4_t __arm_vornq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_x_u32))) uint32x4_t __arm_vornq_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_x_u8))) uint8x16_t __arm_vornq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_x_u8))) uint8x16_t __arm_vornq_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_n_s16))) int16x8_t __arm_vorrq_m_n_s16(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_n_s16))) int16x8_t __arm_vorrq_m_n(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_n_s32))) int32x4_t __arm_vorrq_m_n_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_n_s32))) int32x4_t __arm_vorrq_m_n(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_n_u16))) uint16x8_t __arm_vorrq_m_n_u16(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_n_u16))) uint16x8_t __arm_vorrq_m_n(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_n_u32))) uint32x4_t __arm_vorrq_m_n_u32(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_n_u32))) uint32x4_t __arm_vorrq_m_n(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_s16))) int16x8_t __arm_vorrq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_s16))) int16x8_t __arm_vorrq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_s32))) int32x4_t __arm_vorrq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_s32))) int32x4_t __arm_vorrq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_s8))) int8x16_t __arm_vorrq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_s8))) int8x16_t __arm_vorrq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_u16))) uint16x8_t __arm_vorrq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_u16))) uint16x8_t __arm_vorrq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_u32))) uint32x4_t __arm_vorrq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_u32))) uint32x4_t __arm_vorrq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_u8))) uint8x16_t __arm_vorrq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_u8))) uint8x16_t __arm_vorrq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_n_s16))) int16x8_t __arm_vorrq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_n_s16))) int16x8_t __arm_vorrq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_n_s32))) int32x4_t __arm_vorrq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_n_s32))) int32x4_t __arm_vorrq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_n_u16))) uint16x8_t __arm_vorrq_n_u16(uint16x8_t, uint16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_n_u16))) uint16x8_t __arm_vorrq(uint16x8_t, uint16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_n_u32))) uint32x4_t __arm_vorrq_n_u32(uint32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_n_u32))) uint32x4_t __arm_vorrq(uint32x4_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_s16))) int16x8_t __arm_vorrq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_s16))) int16x8_t __arm_vorrq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_s32))) int32x4_t __arm_vorrq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_s32))) int32x4_t __arm_vorrq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_s8))) int8x16_t __arm_vorrq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_s8))) int8x16_t __arm_vorrq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_u16))) uint16x8_t __arm_vorrq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_u16))) uint16x8_t __arm_vorrq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_u32))) uint32x4_t __arm_vorrq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_u32))) uint32x4_t __arm_vorrq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_u8))) uint8x16_t __arm_vorrq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_u8))) uint8x16_t __arm_vorrq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_x_s16))) int16x8_t __arm_vorrq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_x_s16))) int16x8_t __arm_vorrq_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_x_s32))) int32x4_t __arm_vorrq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_x_s32))) int32x4_t __arm_vorrq_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_x_s8))) int8x16_t __arm_vorrq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_x_s8))) int8x16_t __arm_vorrq_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_x_u16))) uint16x8_t __arm_vorrq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_x_u16))) uint16x8_t __arm_vorrq_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_x_u32))) uint32x4_t __arm_vorrq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_x_u32))) uint32x4_t __arm_vorrq_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_x_u8))) uint8x16_t __arm_vorrq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_x_u8))) uint8x16_t __arm_vorrq_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vpnot))) mve_pred16_t __arm_vpnot(mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vpselq_s16))) int16x8_t __arm_vpselq_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vpselq_s16))) int16x8_t __arm_vpselq(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vpselq_s32))) int32x4_t __arm_vpselq_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vpselq_s32))) int32x4_t __arm_vpselq(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vpselq_s64))) int64x2_t __arm_vpselq_s64(int64x2_t, int64x2_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vpselq_s64))) int64x2_t __arm_vpselq(int64x2_t, int64x2_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vpselq_s8))) int8x16_t __arm_vpselq_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vpselq_s8))) int8x16_t __arm_vpselq(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vpselq_u16))) uint16x8_t __arm_vpselq_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vpselq_u16))) uint16x8_t __arm_vpselq(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vpselq_u32))) uint32x4_t __arm_vpselq_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vpselq_u32))) uint32x4_t __arm_vpselq(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vpselq_u64))) uint64x2_t __arm_vpselq_u64(uint64x2_t, uint64x2_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vpselq_u64))) uint64x2_t __arm_vpselq(uint64x2_t, uint64x2_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vpselq_u8))) uint8x16_t __arm_vpselq_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vpselq_u8))) uint8x16_t __arm_vpselq(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqabsq_m_s16))) int16x8_t __arm_vqabsq_m_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqabsq_m_s16))) int16x8_t __arm_vqabsq_m(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqabsq_m_s32))) int32x4_t __arm_vqabsq_m_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqabsq_m_s32))) int32x4_t __arm_vqabsq_m(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqabsq_m_s8))) int8x16_t __arm_vqabsq_m_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqabsq_m_s8))) int8x16_t __arm_vqabsq_m(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqabsq_s16))) int16x8_t __arm_vqabsq_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqabsq_s16))) int16x8_t __arm_vqabsq(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqabsq_s32))) int32x4_t __arm_vqabsq_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqabsq_s32))) int32x4_t __arm_vqabsq(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqabsq_s8))) int8x16_t __arm_vqabsq_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqabsq_s8))) int8x16_t __arm_vqabsq(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_n_s16))) int16x8_t __arm_vqaddq_m_n_s16(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_n_s16))) int16x8_t __arm_vqaddq_m(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_n_s32))) int32x4_t __arm_vqaddq_m_n_s32(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_n_s32))) int32x4_t __arm_vqaddq_m(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_n_s8))) int8x16_t __arm_vqaddq_m_n_s8(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_n_s8))) int8x16_t __arm_vqaddq_m(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_n_u16))) uint16x8_t __arm_vqaddq_m_n_u16(uint16x8_t, uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_n_u16))) uint16x8_t __arm_vqaddq_m(uint16x8_t, uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_n_u32))) uint32x4_t __arm_vqaddq_m_n_u32(uint32x4_t, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_n_u32))) uint32x4_t __arm_vqaddq_m(uint32x4_t, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_n_u8))) uint8x16_t __arm_vqaddq_m_n_u8(uint8x16_t, uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_n_u8))) uint8x16_t __arm_vqaddq_m(uint8x16_t, uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_s16))) int16x8_t __arm_vqaddq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_s16))) int16x8_t __arm_vqaddq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_s32))) int32x4_t __arm_vqaddq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_s32))) int32x4_t __arm_vqaddq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_s8))) int8x16_t __arm_vqaddq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_s8))) int8x16_t __arm_vqaddq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_u16))) uint16x8_t __arm_vqaddq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_u16))) uint16x8_t __arm_vqaddq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_u32))) uint32x4_t __arm_vqaddq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_u32))) uint32x4_t __arm_vqaddq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_u8))) uint8x16_t __arm_vqaddq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_u8))) uint8x16_t __arm_vqaddq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_n_s16))) int16x8_t __arm_vqaddq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_n_s16))) int16x8_t __arm_vqaddq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_n_s32))) int32x4_t __arm_vqaddq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_n_s32))) int32x4_t __arm_vqaddq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_n_s8))) int8x16_t __arm_vqaddq_n_s8(int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_n_s8))) int8x16_t __arm_vqaddq(int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_n_u16))) uint16x8_t __arm_vqaddq_n_u16(uint16x8_t, uint16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_n_u16))) uint16x8_t __arm_vqaddq(uint16x8_t, uint16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_n_u32))) uint32x4_t __arm_vqaddq_n_u32(uint32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_n_u32))) uint32x4_t __arm_vqaddq(uint32x4_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_n_u8))) uint8x16_t __arm_vqaddq_n_u8(uint8x16_t, uint8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_n_u8))) uint8x16_t __arm_vqaddq(uint8x16_t, uint8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_s16))) int16x8_t __arm_vqaddq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_s16))) int16x8_t __arm_vqaddq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_s32))) int32x4_t __arm_vqaddq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_s32))) int32x4_t __arm_vqaddq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_s8))) int8x16_t __arm_vqaddq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_s8))) int8x16_t __arm_vqaddq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_u16))) uint16x8_t __arm_vqaddq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_u16))) uint16x8_t __arm_vqaddq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_u32))) uint32x4_t __arm_vqaddq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_u32))) uint32x4_t __arm_vqaddq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_u8))) uint8x16_t __arm_vqaddq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_u8))) uint8x16_t __arm_vqaddq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhq_m_s16))) int16x8_t __arm_vqdmladhq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhq_m_s16))) int16x8_t __arm_vqdmladhq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhq_m_s32))) int32x4_t __arm_vqdmladhq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhq_m_s32))) int32x4_t __arm_vqdmladhq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhq_m_s8))) int8x16_t __arm_vqdmladhq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhq_m_s8))) int8x16_t __arm_vqdmladhq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhq_s16))) int16x8_t __arm_vqdmladhq_s16(int16x8_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhq_s16))) int16x8_t __arm_vqdmladhq(int16x8_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhq_s32))) int32x4_t __arm_vqdmladhq_s32(int32x4_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhq_s32))) int32x4_t __arm_vqdmladhq(int32x4_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhq_s8))) int8x16_t __arm_vqdmladhq_s8(int8x16_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhq_s8))) int8x16_t __arm_vqdmladhq(int8x16_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhxq_m_s16))) int16x8_t __arm_vqdmladhxq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhxq_m_s16))) int16x8_t __arm_vqdmladhxq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhxq_m_s32))) int32x4_t __arm_vqdmladhxq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhxq_m_s32))) int32x4_t __arm_vqdmladhxq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhxq_m_s8))) int8x16_t __arm_vqdmladhxq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhxq_m_s8))) int8x16_t __arm_vqdmladhxq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhxq_s16))) int16x8_t __arm_vqdmladhxq_s16(int16x8_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhxq_s16))) int16x8_t __arm_vqdmladhxq(int16x8_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhxq_s32))) int32x4_t __arm_vqdmladhxq_s32(int32x4_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhxq_s32))) int32x4_t __arm_vqdmladhxq(int32x4_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhxq_s8))) int8x16_t __arm_vqdmladhxq_s8(int8x16_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhxq_s8))) int8x16_t __arm_vqdmladhxq(int8x16_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlahq_m_n_s16))) int16x8_t __arm_vqdmlahq_m_n_s16(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlahq_m_n_s16))) int16x8_t __arm_vqdmlahq_m(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlahq_m_n_s32))) int32x4_t __arm_vqdmlahq_m_n_s32(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlahq_m_n_s32))) int32x4_t __arm_vqdmlahq_m(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlahq_m_n_s8))) int8x16_t __arm_vqdmlahq_m_n_s8(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlahq_m_n_s8))) int8x16_t __arm_vqdmlahq_m(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlahq_n_s16))) int16x8_t __arm_vqdmlahq_n_s16(int16x8_t, int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlahq_n_s16))) int16x8_t __arm_vqdmlahq(int16x8_t, int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlahq_n_s32))) int32x4_t __arm_vqdmlahq_n_s32(int32x4_t, int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlahq_n_s32))) int32x4_t __arm_vqdmlahq(int32x4_t, int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlahq_n_s8))) int8x16_t __arm_vqdmlahq_n_s8(int8x16_t, int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlahq_n_s8))) int8x16_t __arm_vqdmlahq(int8x16_t, int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlashq_m_n_s16))) int16x8_t __arm_vqdmlashq_m_n_s16(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlashq_m_n_s16))) int16x8_t __arm_vqdmlashq_m(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlashq_m_n_s32))) int32x4_t __arm_vqdmlashq_m_n_s32(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlashq_m_n_s32))) int32x4_t __arm_vqdmlashq_m(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlashq_m_n_s8))) int8x16_t __arm_vqdmlashq_m_n_s8(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlashq_m_n_s8))) int8x16_t __arm_vqdmlashq_m(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlashq_n_s16))) int16x8_t __arm_vqdmlashq_n_s16(int16x8_t, int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlashq_n_s16))) int16x8_t __arm_vqdmlashq(int16x8_t, int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlashq_n_s32))) int32x4_t __arm_vqdmlashq_n_s32(int32x4_t, int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlashq_n_s32))) int32x4_t __arm_vqdmlashq(int32x4_t, int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlashq_n_s8))) int8x16_t __arm_vqdmlashq_n_s8(int8x16_t, int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlashq_n_s8))) int8x16_t __arm_vqdmlashq(int8x16_t, int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhq_m_s16))) int16x8_t __arm_vqdmlsdhq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhq_m_s16))) int16x8_t __arm_vqdmlsdhq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhq_m_s32))) int32x4_t __arm_vqdmlsdhq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhq_m_s32))) int32x4_t __arm_vqdmlsdhq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhq_m_s8))) int8x16_t __arm_vqdmlsdhq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhq_m_s8))) int8x16_t __arm_vqdmlsdhq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhq_s16))) int16x8_t __arm_vqdmlsdhq_s16(int16x8_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhq_s16))) int16x8_t __arm_vqdmlsdhq(int16x8_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhq_s32))) int32x4_t __arm_vqdmlsdhq_s32(int32x4_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhq_s32))) int32x4_t __arm_vqdmlsdhq(int32x4_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhq_s8))) int8x16_t __arm_vqdmlsdhq_s8(int8x16_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhq_s8))) int8x16_t __arm_vqdmlsdhq(int8x16_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhxq_m_s16))) int16x8_t __arm_vqdmlsdhxq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhxq_m_s16))) int16x8_t __arm_vqdmlsdhxq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhxq_m_s32))) int32x4_t __arm_vqdmlsdhxq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhxq_m_s32))) int32x4_t __arm_vqdmlsdhxq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhxq_m_s8))) int8x16_t __arm_vqdmlsdhxq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhxq_m_s8))) int8x16_t __arm_vqdmlsdhxq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhxq_s16))) int16x8_t __arm_vqdmlsdhxq_s16(int16x8_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhxq_s16))) int16x8_t __arm_vqdmlsdhxq(int16x8_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhxq_s32))) int32x4_t __arm_vqdmlsdhxq_s32(int32x4_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhxq_s32))) int32x4_t __arm_vqdmlsdhxq(int32x4_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhxq_s8))) int8x16_t __arm_vqdmlsdhxq_s8(int8x16_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhxq_s8))) int8x16_t __arm_vqdmlsdhxq(int8x16_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_m_n_s16))) int16x8_t __arm_vqdmulhq_m_n_s16(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_m_n_s16))) int16x8_t __arm_vqdmulhq_m(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_m_n_s32))) int32x4_t __arm_vqdmulhq_m_n_s32(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_m_n_s32))) int32x4_t __arm_vqdmulhq_m(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_m_n_s8))) int8x16_t __arm_vqdmulhq_m_n_s8(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_m_n_s8))) int8x16_t __arm_vqdmulhq_m(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_m_s16))) int16x8_t __arm_vqdmulhq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_m_s16))) int16x8_t __arm_vqdmulhq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_m_s32))) int32x4_t __arm_vqdmulhq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_m_s32))) int32x4_t __arm_vqdmulhq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_m_s8))) int8x16_t __arm_vqdmulhq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_m_s8))) int8x16_t __arm_vqdmulhq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_n_s16))) int16x8_t __arm_vqdmulhq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_n_s16))) int16x8_t __arm_vqdmulhq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_n_s32))) int32x4_t __arm_vqdmulhq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_n_s32))) int32x4_t __arm_vqdmulhq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_n_s8))) int8x16_t __arm_vqdmulhq_n_s8(int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_n_s8))) int8x16_t __arm_vqdmulhq(int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_s16))) int16x8_t __arm_vqdmulhq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_s16))) int16x8_t __arm_vqdmulhq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_s32))) int32x4_t __arm_vqdmulhq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_s32))) int32x4_t __arm_vqdmulhq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_s8))) int8x16_t __arm_vqdmulhq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_s8))) int8x16_t __arm_vqdmulhq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmullbq_m_n_s16))) int32x4_t __arm_vqdmullbq_m_n_s16(int32x4_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmullbq_m_n_s16))) int32x4_t __arm_vqdmullbq_m(int32x4_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmullbq_m_n_s32))) int64x2_t __arm_vqdmullbq_m_n_s32(int64x2_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmullbq_m_n_s32))) int64x2_t __arm_vqdmullbq_m(int64x2_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmullbq_m_s16))) int32x4_t __arm_vqdmullbq_m_s16(int32x4_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmullbq_m_s16))) int32x4_t __arm_vqdmullbq_m(int32x4_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmullbq_m_s32))) int64x2_t __arm_vqdmullbq_m_s32(int64x2_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmullbq_m_s32))) int64x2_t __arm_vqdmullbq_m(int64x2_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmullbq_n_s16))) int32x4_t __arm_vqdmullbq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmullbq_n_s16))) int32x4_t __arm_vqdmullbq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmullbq_n_s32))) int64x2_t __arm_vqdmullbq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmullbq_n_s32))) int64x2_t __arm_vqdmullbq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmullbq_s16))) int32x4_t __arm_vqdmullbq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmullbq_s16))) int32x4_t __arm_vqdmullbq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmullbq_s32))) int64x2_t __arm_vqdmullbq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmullbq_s32))) int64x2_t __arm_vqdmullbq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulltq_m_n_s16))) int32x4_t __arm_vqdmulltq_m_n_s16(int32x4_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulltq_m_n_s16))) int32x4_t __arm_vqdmulltq_m(int32x4_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulltq_m_n_s32))) int64x2_t __arm_vqdmulltq_m_n_s32(int64x2_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulltq_m_n_s32))) int64x2_t __arm_vqdmulltq_m(int64x2_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulltq_m_s16))) int32x4_t __arm_vqdmulltq_m_s16(int32x4_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulltq_m_s16))) int32x4_t __arm_vqdmulltq_m(int32x4_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulltq_m_s32))) int64x2_t __arm_vqdmulltq_m_s32(int64x2_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulltq_m_s32))) int64x2_t __arm_vqdmulltq_m(int64x2_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulltq_n_s16))) int32x4_t __arm_vqdmulltq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulltq_n_s16))) int32x4_t __arm_vqdmulltq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulltq_n_s32))) int64x2_t __arm_vqdmulltq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulltq_n_s32))) int64x2_t __arm_vqdmulltq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulltq_s16))) int32x4_t __arm_vqdmulltq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulltq_s16))) int32x4_t __arm_vqdmulltq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulltq_s32))) int64x2_t __arm_vqdmulltq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulltq_s32))) int64x2_t __arm_vqdmulltq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovnbq_m_s16))) int8x16_t __arm_vqmovnbq_m_s16(int8x16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovnbq_m_s16))) int8x16_t __arm_vqmovnbq_m(int8x16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovnbq_m_s32))) int16x8_t __arm_vqmovnbq_m_s32(int16x8_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovnbq_m_s32))) int16x8_t __arm_vqmovnbq_m(int16x8_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovnbq_m_u16))) uint8x16_t __arm_vqmovnbq_m_u16(uint8x16_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovnbq_m_u16))) uint8x16_t __arm_vqmovnbq_m(uint8x16_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovnbq_m_u32))) uint16x8_t __arm_vqmovnbq_m_u32(uint16x8_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovnbq_m_u32))) uint16x8_t __arm_vqmovnbq_m(uint16x8_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovnbq_s16))) int8x16_t __arm_vqmovnbq_s16(int8x16_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovnbq_s16))) int8x16_t __arm_vqmovnbq(int8x16_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovnbq_s32))) int16x8_t __arm_vqmovnbq_s32(int16x8_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovnbq_s32))) int16x8_t __arm_vqmovnbq(int16x8_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovnbq_u16))) uint8x16_t __arm_vqmovnbq_u16(uint8x16_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovnbq_u16))) uint8x16_t __arm_vqmovnbq(uint8x16_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovnbq_u32))) uint16x8_t __arm_vqmovnbq_u32(uint16x8_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovnbq_u32))) uint16x8_t __arm_vqmovnbq(uint16x8_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovntq_m_s16))) int8x16_t __arm_vqmovntq_m_s16(int8x16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovntq_m_s16))) int8x16_t __arm_vqmovntq_m(int8x16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovntq_m_s32))) int16x8_t __arm_vqmovntq_m_s32(int16x8_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovntq_m_s32))) int16x8_t __arm_vqmovntq_m(int16x8_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovntq_m_u16))) uint8x16_t __arm_vqmovntq_m_u16(uint8x16_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovntq_m_u16))) uint8x16_t __arm_vqmovntq_m(uint8x16_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovntq_m_u32))) uint16x8_t __arm_vqmovntq_m_u32(uint16x8_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovntq_m_u32))) uint16x8_t __arm_vqmovntq_m(uint16x8_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovntq_s16))) int8x16_t __arm_vqmovntq_s16(int8x16_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovntq_s16))) int8x16_t __arm_vqmovntq(int8x16_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovntq_s32))) int16x8_t __arm_vqmovntq_s32(int16x8_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovntq_s32))) int16x8_t __arm_vqmovntq(int16x8_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovntq_u16))) uint8x16_t __arm_vqmovntq_u16(uint8x16_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovntq_u16))) uint8x16_t __arm_vqmovntq(uint8x16_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovntq_u32))) uint16x8_t __arm_vqmovntq_u32(uint16x8_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovntq_u32))) uint16x8_t __arm_vqmovntq(uint16x8_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovunbq_m_s16))) uint8x16_t __arm_vqmovunbq_m_s16(uint8x16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovunbq_m_s16))) uint8x16_t __arm_vqmovunbq_m(uint8x16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovunbq_m_s32))) uint16x8_t __arm_vqmovunbq_m_s32(uint16x8_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovunbq_m_s32))) uint16x8_t __arm_vqmovunbq_m(uint16x8_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovunbq_s16))) uint8x16_t __arm_vqmovunbq_s16(uint8x16_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovunbq_s16))) uint8x16_t __arm_vqmovunbq(uint8x16_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovunbq_s32))) uint16x8_t __arm_vqmovunbq_s32(uint16x8_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovunbq_s32))) uint16x8_t __arm_vqmovunbq(uint16x8_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovuntq_m_s16))) uint8x16_t __arm_vqmovuntq_m_s16(uint8x16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovuntq_m_s16))) uint8x16_t __arm_vqmovuntq_m(uint8x16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovuntq_m_s32))) uint16x8_t __arm_vqmovuntq_m_s32(uint16x8_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovuntq_m_s32))) uint16x8_t __arm_vqmovuntq_m(uint16x8_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovuntq_s16))) uint8x16_t __arm_vqmovuntq_s16(uint8x16_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovuntq_s16))) uint8x16_t __arm_vqmovuntq(uint8x16_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovuntq_s32))) uint16x8_t __arm_vqmovuntq_s32(uint16x8_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovuntq_s32))) uint16x8_t __arm_vqmovuntq(uint16x8_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqnegq_m_s16))) int16x8_t __arm_vqnegq_m_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqnegq_m_s16))) int16x8_t __arm_vqnegq_m(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqnegq_m_s32))) int32x4_t __arm_vqnegq_m_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqnegq_m_s32))) int32x4_t __arm_vqnegq_m(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqnegq_m_s8))) int8x16_t __arm_vqnegq_m_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqnegq_m_s8))) int8x16_t __arm_vqnegq_m(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqnegq_s16))) int16x8_t __arm_vqnegq_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqnegq_s16))) int16x8_t __arm_vqnegq(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqnegq_s32))) int32x4_t __arm_vqnegq_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqnegq_s32))) int32x4_t __arm_vqnegq(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqnegq_s8))) int8x16_t __arm_vqnegq_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqnegq_s8))) int8x16_t __arm_vqnegq(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhq_m_s16))) int16x8_t __arm_vqrdmladhq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhq_m_s16))) int16x8_t __arm_vqrdmladhq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhq_m_s32))) int32x4_t __arm_vqrdmladhq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhq_m_s32))) int32x4_t __arm_vqrdmladhq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhq_m_s8))) int8x16_t __arm_vqrdmladhq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhq_m_s8))) int8x16_t __arm_vqrdmladhq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhq_s16))) int16x8_t __arm_vqrdmladhq_s16(int16x8_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhq_s16))) int16x8_t __arm_vqrdmladhq(int16x8_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhq_s32))) int32x4_t __arm_vqrdmladhq_s32(int32x4_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhq_s32))) int32x4_t __arm_vqrdmladhq(int32x4_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhq_s8))) int8x16_t __arm_vqrdmladhq_s8(int8x16_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhq_s8))) int8x16_t __arm_vqrdmladhq(int8x16_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhxq_m_s16))) int16x8_t __arm_vqrdmladhxq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhxq_m_s16))) int16x8_t __arm_vqrdmladhxq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhxq_m_s32))) int32x4_t __arm_vqrdmladhxq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhxq_m_s32))) int32x4_t __arm_vqrdmladhxq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhxq_m_s8))) int8x16_t __arm_vqrdmladhxq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhxq_m_s8))) int8x16_t __arm_vqrdmladhxq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhxq_s16))) int16x8_t __arm_vqrdmladhxq_s16(int16x8_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhxq_s16))) int16x8_t __arm_vqrdmladhxq(int16x8_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhxq_s32))) int32x4_t __arm_vqrdmladhxq_s32(int32x4_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhxq_s32))) int32x4_t __arm_vqrdmladhxq(int32x4_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhxq_s8))) int8x16_t __arm_vqrdmladhxq_s8(int8x16_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhxq_s8))) int8x16_t __arm_vqrdmladhxq(int8x16_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlahq_m_n_s16))) int16x8_t __arm_vqrdmlahq_m_n_s16(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlahq_m_n_s16))) int16x8_t __arm_vqrdmlahq_m(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlahq_m_n_s32))) int32x4_t __arm_vqrdmlahq_m_n_s32(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlahq_m_n_s32))) int32x4_t __arm_vqrdmlahq_m(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlahq_m_n_s8))) int8x16_t __arm_vqrdmlahq_m_n_s8(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlahq_m_n_s8))) int8x16_t __arm_vqrdmlahq_m(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlahq_n_s16))) int16x8_t __arm_vqrdmlahq_n_s16(int16x8_t, int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlahq_n_s16))) int16x8_t __arm_vqrdmlahq(int16x8_t, int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlahq_n_s32))) int32x4_t __arm_vqrdmlahq_n_s32(int32x4_t, int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlahq_n_s32))) int32x4_t __arm_vqrdmlahq(int32x4_t, int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlahq_n_s8))) int8x16_t __arm_vqrdmlahq_n_s8(int8x16_t, int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlahq_n_s8))) int8x16_t __arm_vqrdmlahq(int8x16_t, int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlashq_m_n_s16))) int16x8_t __arm_vqrdmlashq_m_n_s16(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlashq_m_n_s16))) int16x8_t __arm_vqrdmlashq_m(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlashq_m_n_s32))) int32x4_t __arm_vqrdmlashq_m_n_s32(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlashq_m_n_s32))) int32x4_t __arm_vqrdmlashq_m(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlashq_m_n_s8))) int8x16_t __arm_vqrdmlashq_m_n_s8(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlashq_m_n_s8))) int8x16_t __arm_vqrdmlashq_m(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlashq_n_s16))) int16x8_t __arm_vqrdmlashq_n_s16(int16x8_t, int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlashq_n_s16))) int16x8_t __arm_vqrdmlashq(int16x8_t, int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlashq_n_s32))) int32x4_t __arm_vqrdmlashq_n_s32(int32x4_t, int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlashq_n_s32))) int32x4_t __arm_vqrdmlashq(int32x4_t, int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlashq_n_s8))) int8x16_t __arm_vqrdmlashq_n_s8(int8x16_t, int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlashq_n_s8))) int8x16_t __arm_vqrdmlashq(int8x16_t, int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhq_m_s16))) int16x8_t __arm_vqrdmlsdhq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhq_m_s16))) int16x8_t __arm_vqrdmlsdhq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhq_m_s32))) int32x4_t __arm_vqrdmlsdhq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhq_m_s32))) int32x4_t __arm_vqrdmlsdhq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhq_m_s8))) int8x16_t __arm_vqrdmlsdhq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhq_m_s8))) int8x16_t __arm_vqrdmlsdhq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhq_s16))) int16x8_t __arm_vqrdmlsdhq_s16(int16x8_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhq_s16))) int16x8_t __arm_vqrdmlsdhq(int16x8_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhq_s32))) int32x4_t __arm_vqrdmlsdhq_s32(int32x4_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhq_s32))) int32x4_t __arm_vqrdmlsdhq(int32x4_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhq_s8))) int8x16_t __arm_vqrdmlsdhq_s8(int8x16_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhq_s8))) int8x16_t __arm_vqrdmlsdhq(int8x16_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhxq_m_s16))) int16x8_t __arm_vqrdmlsdhxq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhxq_m_s16))) int16x8_t __arm_vqrdmlsdhxq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhxq_m_s32))) int32x4_t __arm_vqrdmlsdhxq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhxq_m_s32))) int32x4_t __arm_vqrdmlsdhxq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhxq_m_s8))) int8x16_t __arm_vqrdmlsdhxq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhxq_m_s8))) int8x16_t __arm_vqrdmlsdhxq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhxq_s16))) int16x8_t __arm_vqrdmlsdhxq_s16(int16x8_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhxq_s16))) int16x8_t __arm_vqrdmlsdhxq(int16x8_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhxq_s32))) int32x4_t __arm_vqrdmlsdhxq_s32(int32x4_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhxq_s32))) int32x4_t __arm_vqrdmlsdhxq(int32x4_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhxq_s8))) int8x16_t __arm_vqrdmlsdhxq_s8(int8x16_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhxq_s8))) int8x16_t __arm_vqrdmlsdhxq(int8x16_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_m_n_s16))) int16x8_t __arm_vqrdmulhq_m_n_s16(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_m_n_s16))) int16x8_t __arm_vqrdmulhq_m(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_m_n_s32))) int32x4_t __arm_vqrdmulhq_m_n_s32(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_m_n_s32))) int32x4_t __arm_vqrdmulhq_m(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_m_n_s8))) int8x16_t __arm_vqrdmulhq_m_n_s8(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_m_n_s8))) int8x16_t __arm_vqrdmulhq_m(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_m_s16))) int16x8_t __arm_vqrdmulhq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_m_s16))) int16x8_t __arm_vqrdmulhq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_m_s32))) int32x4_t __arm_vqrdmulhq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_m_s32))) int32x4_t __arm_vqrdmulhq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_m_s8))) int8x16_t __arm_vqrdmulhq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_m_s8))) int8x16_t __arm_vqrdmulhq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_n_s16))) int16x8_t __arm_vqrdmulhq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_n_s16))) int16x8_t __arm_vqrdmulhq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_n_s32))) int32x4_t __arm_vqrdmulhq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_n_s32))) int32x4_t __arm_vqrdmulhq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_n_s8))) int8x16_t __arm_vqrdmulhq_n_s8(int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_n_s8))) int8x16_t __arm_vqrdmulhq(int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_s16))) int16x8_t __arm_vqrdmulhq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_s16))) int16x8_t __arm_vqrdmulhq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_s32))) int32x4_t __arm_vqrdmulhq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_s32))) int32x4_t __arm_vqrdmulhq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_s8))) int8x16_t __arm_vqrdmulhq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_s8))) int8x16_t __arm_vqrdmulhq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_n_s16))) int16x8_t __arm_vqrshlq_m_n_s16(int16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_n_s16))) int16x8_t __arm_vqrshlq_m_n(int16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_n_s32))) int32x4_t __arm_vqrshlq_m_n_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_n_s32))) int32x4_t __arm_vqrshlq_m_n(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_n_s8))) int8x16_t __arm_vqrshlq_m_n_s8(int8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_n_s8))) int8x16_t __arm_vqrshlq_m_n(int8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_n_u16))) uint16x8_t __arm_vqrshlq_m_n_u16(uint16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_n_u16))) uint16x8_t __arm_vqrshlq_m_n(uint16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_n_u32))) uint32x4_t __arm_vqrshlq_m_n_u32(uint32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_n_u32))) uint32x4_t __arm_vqrshlq_m_n(uint32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_n_u8))) uint8x16_t __arm_vqrshlq_m_n_u8(uint8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_n_u8))) uint8x16_t __arm_vqrshlq_m_n(uint8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_s16))) int16x8_t __arm_vqrshlq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_s16))) int16x8_t __arm_vqrshlq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_s32))) int32x4_t __arm_vqrshlq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_s32))) int32x4_t __arm_vqrshlq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_s8))) int8x16_t __arm_vqrshlq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_s8))) int8x16_t __arm_vqrshlq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_u16))) uint16x8_t __arm_vqrshlq_m_u16(uint16x8_t, uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_u16))) uint16x8_t __arm_vqrshlq_m(uint16x8_t, uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_u32))) uint32x4_t __arm_vqrshlq_m_u32(uint32x4_t, uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_u32))) uint32x4_t __arm_vqrshlq_m(uint32x4_t, uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_u8))) uint8x16_t __arm_vqrshlq_m_u8(uint8x16_t, uint8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_u8))) uint8x16_t __arm_vqrshlq_m(uint8x16_t, uint8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_n_s16))) int16x8_t __arm_vqrshlq_n_s16(int16x8_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_n_s16))) int16x8_t __arm_vqrshlq(int16x8_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_n_s32))) int32x4_t __arm_vqrshlq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_n_s32))) int32x4_t __arm_vqrshlq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_n_s8))) int8x16_t __arm_vqrshlq_n_s8(int8x16_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_n_s8))) int8x16_t __arm_vqrshlq(int8x16_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_n_u16))) uint16x8_t __arm_vqrshlq_n_u16(uint16x8_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_n_u16))) uint16x8_t __arm_vqrshlq(uint16x8_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_n_u32))) uint32x4_t __arm_vqrshlq_n_u32(uint32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_n_u32))) uint32x4_t __arm_vqrshlq(uint32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_n_u8))) uint8x16_t __arm_vqrshlq_n_u8(uint8x16_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_n_u8))) uint8x16_t __arm_vqrshlq(uint8x16_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_s16))) int16x8_t __arm_vqrshlq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_s16))) int16x8_t __arm_vqrshlq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_s32))) int32x4_t __arm_vqrshlq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_s32))) int32x4_t __arm_vqrshlq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_s8))) int8x16_t __arm_vqrshlq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_s8))) int8x16_t __arm_vqrshlq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_u16))) uint16x8_t __arm_vqrshlq_u16(uint16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_u16))) uint16x8_t __arm_vqrshlq(uint16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_u32))) uint32x4_t __arm_vqrshlq_u32(uint32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_u32))) uint32x4_t __arm_vqrshlq(uint32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_u8))) uint8x16_t __arm_vqrshlq_u8(uint8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_u8))) uint8x16_t __arm_vqrshlq(uint8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrnbq_m_n_s16))) int8x16_t __arm_vqrshrnbq_m_n_s16(int8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrnbq_m_n_s16))) int8x16_t __arm_vqrshrnbq_m(int8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrnbq_m_n_s32))) int16x8_t __arm_vqrshrnbq_m_n_s32(int16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrnbq_m_n_s32))) int16x8_t __arm_vqrshrnbq_m(int16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrnbq_m_n_u16))) uint8x16_t __arm_vqrshrnbq_m_n_u16(uint8x16_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrnbq_m_n_u16))) uint8x16_t __arm_vqrshrnbq_m(uint8x16_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrnbq_m_n_u32))) uint16x8_t __arm_vqrshrnbq_m_n_u32(uint16x8_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrnbq_m_n_u32))) uint16x8_t __arm_vqrshrnbq_m(uint16x8_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrnbq_n_s16))) int8x16_t __arm_vqrshrnbq_n_s16(int8x16_t, int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrnbq_n_s16))) int8x16_t __arm_vqrshrnbq(int8x16_t, int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrnbq_n_s32))) int16x8_t __arm_vqrshrnbq_n_s32(int16x8_t, int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrnbq_n_s32))) int16x8_t __arm_vqrshrnbq(int16x8_t, int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrnbq_n_u16))) uint8x16_t __arm_vqrshrnbq_n_u16(uint8x16_t, uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrnbq_n_u16))) uint8x16_t __arm_vqrshrnbq(uint8x16_t, uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrnbq_n_u32))) uint16x8_t __arm_vqrshrnbq_n_u32(uint16x8_t, uint32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrnbq_n_u32))) uint16x8_t __arm_vqrshrnbq(uint16x8_t, uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrntq_m_n_s16))) int8x16_t __arm_vqrshrntq_m_n_s16(int8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrntq_m_n_s16))) int8x16_t __arm_vqrshrntq_m(int8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrntq_m_n_s32))) int16x8_t __arm_vqrshrntq_m_n_s32(int16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrntq_m_n_s32))) int16x8_t __arm_vqrshrntq_m(int16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrntq_m_n_u16))) uint8x16_t __arm_vqrshrntq_m_n_u16(uint8x16_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrntq_m_n_u16))) uint8x16_t __arm_vqrshrntq_m(uint8x16_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrntq_m_n_u32))) uint16x8_t __arm_vqrshrntq_m_n_u32(uint16x8_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrntq_m_n_u32))) uint16x8_t __arm_vqrshrntq_m(uint16x8_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrntq_n_s16))) int8x16_t __arm_vqrshrntq_n_s16(int8x16_t, int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrntq_n_s16))) int8x16_t __arm_vqrshrntq(int8x16_t, int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrntq_n_s32))) int16x8_t __arm_vqrshrntq_n_s32(int16x8_t, int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrntq_n_s32))) int16x8_t __arm_vqrshrntq(int16x8_t, int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrntq_n_u16))) uint8x16_t __arm_vqrshrntq_n_u16(uint8x16_t, uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrntq_n_u16))) uint8x16_t __arm_vqrshrntq(uint8x16_t, uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrntq_n_u32))) uint16x8_t __arm_vqrshrntq_n_u32(uint16x8_t, uint32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrntq_n_u32))) uint16x8_t __arm_vqrshrntq(uint16x8_t, uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrunbq_m_n_s16))) uint8x16_t __arm_vqrshrunbq_m_n_s16(uint8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrunbq_m_n_s16))) uint8x16_t __arm_vqrshrunbq_m(uint8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrunbq_m_n_s32))) uint16x8_t __arm_vqrshrunbq_m_n_s32(uint16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrunbq_m_n_s32))) uint16x8_t __arm_vqrshrunbq_m(uint16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrunbq_n_s16))) uint8x16_t __arm_vqrshrunbq_n_s16(uint8x16_t, int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrunbq_n_s16))) uint8x16_t __arm_vqrshrunbq(uint8x16_t, int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrunbq_n_s32))) uint16x8_t __arm_vqrshrunbq_n_s32(uint16x8_t, int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrunbq_n_s32))) uint16x8_t __arm_vqrshrunbq(uint16x8_t, int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshruntq_m_n_s16))) uint8x16_t __arm_vqrshruntq_m_n_s16(uint8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshruntq_m_n_s16))) uint8x16_t __arm_vqrshruntq_m(uint8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshruntq_m_n_s32))) uint16x8_t __arm_vqrshruntq_m_n_s32(uint16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshruntq_m_n_s32))) uint16x8_t __arm_vqrshruntq_m(uint16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshruntq_n_s16))) uint8x16_t __arm_vqrshruntq_n_s16(uint8x16_t, int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshruntq_n_s16))) uint8x16_t __arm_vqrshruntq(uint8x16_t, int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshruntq_n_s32))) uint16x8_t __arm_vqrshruntq_n_s32(uint16x8_t, int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshruntq_n_s32))) uint16x8_t __arm_vqrshruntq(uint16x8_t, int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_n_s16))) int16x8_t __arm_vqshlq_m_n_s16(int16x8_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_n_s16))) int16x8_t __arm_vqshlq_m_n(int16x8_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_n_s32))) int32x4_t __arm_vqshlq_m_n_s32(int32x4_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_n_s32))) int32x4_t __arm_vqshlq_m_n(int32x4_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_n_s8))) int8x16_t __arm_vqshlq_m_n_s8(int8x16_t, int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_n_s8))) int8x16_t __arm_vqshlq_m_n(int8x16_t, int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_n_u16))) uint16x8_t __arm_vqshlq_m_n_u16(uint16x8_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_n_u16))) uint16x8_t __arm_vqshlq_m_n(uint16x8_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_n_u32))) uint32x4_t __arm_vqshlq_m_n_u32(uint32x4_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_n_u32))) uint32x4_t __arm_vqshlq_m_n(uint32x4_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_n_u8))) uint8x16_t __arm_vqshlq_m_n_u8(uint8x16_t, uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_n_u8))) uint8x16_t __arm_vqshlq_m_n(uint8x16_t, uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_r_s16))) int16x8_t __arm_vqshlq_m_r_s16(int16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_r_s16))) int16x8_t __arm_vqshlq_m_r(int16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_r_s32))) int32x4_t __arm_vqshlq_m_r_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_r_s32))) int32x4_t __arm_vqshlq_m_r(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_r_s8))) int8x16_t __arm_vqshlq_m_r_s8(int8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_r_s8))) int8x16_t __arm_vqshlq_m_r(int8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_r_u16))) uint16x8_t __arm_vqshlq_m_r_u16(uint16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_r_u16))) uint16x8_t __arm_vqshlq_m_r(uint16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_r_u32))) uint32x4_t __arm_vqshlq_m_r_u32(uint32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_r_u32))) uint32x4_t __arm_vqshlq_m_r(uint32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_r_u8))) uint8x16_t __arm_vqshlq_m_r_u8(uint8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_r_u8))) uint8x16_t __arm_vqshlq_m_r(uint8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_s16))) int16x8_t __arm_vqshlq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_s16))) int16x8_t __arm_vqshlq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_s32))) int32x4_t __arm_vqshlq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_s32))) int32x4_t __arm_vqshlq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_s8))) int8x16_t __arm_vqshlq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_s8))) int8x16_t __arm_vqshlq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_u16))) uint16x8_t __arm_vqshlq_m_u16(uint16x8_t, uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_u16))) uint16x8_t __arm_vqshlq_m(uint16x8_t, uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_u32))) uint32x4_t __arm_vqshlq_m_u32(uint32x4_t, uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_u32))) uint32x4_t __arm_vqshlq_m(uint32x4_t, uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_u8))) uint8x16_t __arm_vqshlq_m_u8(uint8x16_t, uint8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_u8))) uint8x16_t __arm_vqshlq_m(uint8x16_t, uint8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_n_s16))) int16x8_t __arm_vqshlq_n_s16(int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_n_s16))) int16x8_t __arm_vqshlq_n(int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_n_s32))) int32x4_t __arm_vqshlq_n_s32(int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_n_s32))) int32x4_t __arm_vqshlq_n(int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_n_s8))) int8x16_t __arm_vqshlq_n_s8(int8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_n_s8))) int8x16_t __arm_vqshlq_n(int8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_n_u16))) uint16x8_t __arm_vqshlq_n_u16(uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_n_u16))) uint16x8_t __arm_vqshlq_n(uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_n_u32))) uint32x4_t __arm_vqshlq_n_u32(uint32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_n_u32))) uint32x4_t __arm_vqshlq_n(uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_n_u8))) uint8x16_t __arm_vqshlq_n_u8(uint8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_n_u8))) uint8x16_t __arm_vqshlq_n(uint8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_r_s16))) int16x8_t __arm_vqshlq_r_s16(int16x8_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_r_s16))) int16x8_t __arm_vqshlq_r(int16x8_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_r_s32))) int32x4_t __arm_vqshlq_r_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_r_s32))) int32x4_t __arm_vqshlq_r(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_r_s8))) int8x16_t __arm_vqshlq_r_s8(int8x16_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_r_s8))) int8x16_t __arm_vqshlq_r(int8x16_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_r_u16))) uint16x8_t __arm_vqshlq_r_u16(uint16x8_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_r_u16))) uint16x8_t __arm_vqshlq_r(uint16x8_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_r_u32))) uint32x4_t __arm_vqshlq_r_u32(uint32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_r_u32))) uint32x4_t __arm_vqshlq_r(uint32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_r_u8))) uint8x16_t __arm_vqshlq_r_u8(uint8x16_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_r_u8))) uint8x16_t __arm_vqshlq_r(uint8x16_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_s16))) int16x8_t __arm_vqshlq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_s16))) int16x8_t __arm_vqshlq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_s32))) int32x4_t __arm_vqshlq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_s32))) int32x4_t __arm_vqshlq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_s8))) int8x16_t __arm_vqshlq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_s8))) int8x16_t __arm_vqshlq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_u16))) uint16x8_t __arm_vqshlq_u16(uint16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_u16))) uint16x8_t __arm_vqshlq(uint16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_u32))) uint32x4_t __arm_vqshlq_u32(uint32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_u32))) uint32x4_t __arm_vqshlq(uint32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_u8))) uint8x16_t __arm_vqshlq_u8(uint8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_u8))) uint8x16_t __arm_vqshlq(uint8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshluq_m_n_s16))) uint16x8_t __arm_vqshluq_m_n_s16(uint16x8_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshluq_m_n_s16))) uint16x8_t __arm_vqshluq_m(uint16x8_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshluq_m_n_s32))) uint32x4_t __arm_vqshluq_m_n_s32(uint32x4_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshluq_m_n_s32))) uint32x4_t __arm_vqshluq_m(uint32x4_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshluq_m_n_s8))) uint8x16_t __arm_vqshluq_m_n_s8(uint8x16_t, int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshluq_m_n_s8))) uint8x16_t __arm_vqshluq_m(uint8x16_t, int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshluq_n_s16))) uint16x8_t __arm_vqshluq_n_s16(int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshluq_n_s16))) uint16x8_t __arm_vqshluq(int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshluq_n_s32))) uint32x4_t __arm_vqshluq_n_s32(int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshluq_n_s32))) uint32x4_t __arm_vqshluq(int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshluq_n_s8))) uint8x16_t __arm_vqshluq_n_s8(int8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshluq_n_s8))) uint8x16_t __arm_vqshluq(int8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrnbq_m_n_s16))) int8x16_t __arm_vqshrnbq_m_n_s16(int8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrnbq_m_n_s16))) int8x16_t __arm_vqshrnbq_m(int8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrnbq_m_n_s32))) int16x8_t __arm_vqshrnbq_m_n_s32(int16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrnbq_m_n_s32))) int16x8_t __arm_vqshrnbq_m(int16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrnbq_m_n_u16))) uint8x16_t __arm_vqshrnbq_m_n_u16(uint8x16_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrnbq_m_n_u16))) uint8x16_t __arm_vqshrnbq_m(uint8x16_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrnbq_m_n_u32))) uint16x8_t __arm_vqshrnbq_m_n_u32(uint16x8_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrnbq_m_n_u32))) uint16x8_t __arm_vqshrnbq_m(uint16x8_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrnbq_n_s16))) int8x16_t __arm_vqshrnbq_n_s16(int8x16_t, int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrnbq_n_s16))) int8x16_t __arm_vqshrnbq(int8x16_t, int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrnbq_n_s32))) int16x8_t __arm_vqshrnbq_n_s32(int16x8_t, int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrnbq_n_s32))) int16x8_t __arm_vqshrnbq(int16x8_t, int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrnbq_n_u16))) uint8x16_t __arm_vqshrnbq_n_u16(uint8x16_t, uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrnbq_n_u16))) uint8x16_t __arm_vqshrnbq(uint8x16_t, uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrnbq_n_u32))) uint16x8_t __arm_vqshrnbq_n_u32(uint16x8_t, uint32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrnbq_n_u32))) uint16x8_t __arm_vqshrnbq(uint16x8_t, uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrntq_m_n_s16))) int8x16_t __arm_vqshrntq_m_n_s16(int8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrntq_m_n_s16))) int8x16_t __arm_vqshrntq_m(int8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrntq_m_n_s32))) int16x8_t __arm_vqshrntq_m_n_s32(int16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrntq_m_n_s32))) int16x8_t __arm_vqshrntq_m(int16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrntq_m_n_u16))) uint8x16_t __arm_vqshrntq_m_n_u16(uint8x16_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrntq_m_n_u16))) uint8x16_t __arm_vqshrntq_m(uint8x16_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrntq_m_n_u32))) uint16x8_t __arm_vqshrntq_m_n_u32(uint16x8_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrntq_m_n_u32))) uint16x8_t __arm_vqshrntq_m(uint16x8_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrntq_n_s16))) int8x16_t __arm_vqshrntq_n_s16(int8x16_t, int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrntq_n_s16))) int8x16_t __arm_vqshrntq(int8x16_t, int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrntq_n_s32))) int16x8_t __arm_vqshrntq_n_s32(int16x8_t, int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrntq_n_s32))) int16x8_t __arm_vqshrntq(int16x8_t, int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrntq_n_u16))) uint8x16_t __arm_vqshrntq_n_u16(uint8x16_t, uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrntq_n_u16))) uint8x16_t __arm_vqshrntq(uint8x16_t, uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrntq_n_u32))) uint16x8_t __arm_vqshrntq_n_u32(uint16x8_t, uint32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrntq_n_u32))) uint16x8_t __arm_vqshrntq(uint16x8_t, uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrunbq_m_n_s16))) uint8x16_t __arm_vqshrunbq_m_n_s16(uint8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrunbq_m_n_s16))) uint8x16_t __arm_vqshrunbq_m(uint8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrunbq_m_n_s32))) uint16x8_t __arm_vqshrunbq_m_n_s32(uint16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrunbq_m_n_s32))) uint16x8_t __arm_vqshrunbq_m(uint16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrunbq_n_s16))) uint8x16_t __arm_vqshrunbq_n_s16(uint8x16_t, int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrunbq_n_s16))) uint8x16_t __arm_vqshrunbq(uint8x16_t, int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrunbq_n_s32))) uint16x8_t __arm_vqshrunbq_n_s32(uint16x8_t, int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrunbq_n_s32))) uint16x8_t __arm_vqshrunbq(uint16x8_t, int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshruntq_m_n_s16))) uint8x16_t __arm_vqshruntq_m_n_s16(uint8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshruntq_m_n_s16))) uint8x16_t __arm_vqshruntq_m(uint8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshruntq_m_n_s32))) uint16x8_t __arm_vqshruntq_m_n_s32(uint16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshruntq_m_n_s32))) uint16x8_t __arm_vqshruntq_m(uint16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshruntq_n_s16))) uint8x16_t __arm_vqshruntq_n_s16(uint8x16_t, int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshruntq_n_s16))) uint8x16_t __arm_vqshruntq(uint8x16_t, int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshruntq_n_s32))) uint16x8_t __arm_vqshruntq_n_s32(uint16x8_t, int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshruntq_n_s32))) uint16x8_t __arm_vqshruntq(uint16x8_t, int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_n_s16))) int16x8_t __arm_vqsubq_m_n_s16(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_n_s16))) int16x8_t __arm_vqsubq_m(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_n_s32))) int32x4_t __arm_vqsubq_m_n_s32(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_n_s32))) int32x4_t __arm_vqsubq_m(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_n_s8))) int8x16_t __arm_vqsubq_m_n_s8(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_n_s8))) int8x16_t __arm_vqsubq_m(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_n_u16))) uint16x8_t __arm_vqsubq_m_n_u16(uint16x8_t, uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_n_u16))) uint16x8_t __arm_vqsubq_m(uint16x8_t, uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_n_u32))) uint32x4_t __arm_vqsubq_m_n_u32(uint32x4_t, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_n_u32))) uint32x4_t __arm_vqsubq_m(uint32x4_t, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_n_u8))) uint8x16_t __arm_vqsubq_m_n_u8(uint8x16_t, uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_n_u8))) uint8x16_t __arm_vqsubq_m(uint8x16_t, uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_s16))) int16x8_t __arm_vqsubq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_s16))) int16x8_t __arm_vqsubq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_s32))) int32x4_t __arm_vqsubq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_s32))) int32x4_t __arm_vqsubq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_s8))) int8x16_t __arm_vqsubq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_s8))) int8x16_t __arm_vqsubq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_u16))) uint16x8_t __arm_vqsubq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_u16))) uint16x8_t __arm_vqsubq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_u32))) uint32x4_t __arm_vqsubq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_u32))) uint32x4_t __arm_vqsubq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_u8))) uint8x16_t __arm_vqsubq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_u8))) uint8x16_t __arm_vqsubq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_n_s16))) int16x8_t __arm_vqsubq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_n_s16))) int16x8_t __arm_vqsubq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_n_s32))) int32x4_t __arm_vqsubq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_n_s32))) int32x4_t __arm_vqsubq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_n_s8))) int8x16_t __arm_vqsubq_n_s8(int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_n_s8))) int8x16_t __arm_vqsubq(int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_n_u16))) uint16x8_t __arm_vqsubq_n_u16(uint16x8_t, uint16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_n_u16))) uint16x8_t __arm_vqsubq(uint16x8_t, uint16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_n_u32))) uint32x4_t __arm_vqsubq_n_u32(uint32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_n_u32))) uint32x4_t __arm_vqsubq(uint32x4_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_n_u8))) uint8x16_t __arm_vqsubq_n_u8(uint8x16_t, uint8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_n_u8))) uint8x16_t __arm_vqsubq(uint8x16_t, uint8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_s16))) int16x8_t __arm_vqsubq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_s16))) int16x8_t __arm_vqsubq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_s32))) int32x4_t __arm_vqsubq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_s32))) int32x4_t __arm_vqsubq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_s8))) int8x16_t __arm_vqsubq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_s8))) int8x16_t __arm_vqsubq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_u16))) uint16x8_t __arm_vqsubq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_u16))) uint16x8_t __arm_vqsubq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_u32))) uint32x4_t __arm_vqsubq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_u32))) uint32x4_t __arm_vqsubq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_u8))) uint8x16_t __arm_vqsubq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_u8))) uint8x16_t __arm_vqsubq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_s32))) int16x8_t __arm_vreinterpretq_s16_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_s32))) int16x8_t __arm_vreinterpretq_s16(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_s64))) int16x8_t __arm_vreinterpretq_s16_s64(int64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_s64))) int16x8_t __arm_vreinterpretq_s16(int64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_s8))) int16x8_t __arm_vreinterpretq_s16_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_s8))) int16x8_t __arm_vreinterpretq_s16(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_u16))) int16x8_t __arm_vreinterpretq_s16_u16(uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_u16))) int16x8_t __arm_vreinterpretq_s16(uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_u32))) int16x8_t __arm_vreinterpretq_s16_u32(uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_u32))) int16x8_t __arm_vreinterpretq_s16(uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_u64))) int16x8_t __arm_vreinterpretq_s16_u64(uint64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_u64))) int16x8_t __arm_vreinterpretq_s16(uint64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_u8))) int16x8_t __arm_vreinterpretq_s16_u8(uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_u8))) int16x8_t __arm_vreinterpretq_s16(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_s16))) int32x4_t __arm_vreinterpretq_s32_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_s16))) int32x4_t __arm_vreinterpretq_s32(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_s64))) int32x4_t __arm_vreinterpretq_s32_s64(int64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_s64))) int32x4_t __arm_vreinterpretq_s32(int64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_s8))) int32x4_t __arm_vreinterpretq_s32_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_s8))) int32x4_t __arm_vreinterpretq_s32(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_u16))) int32x4_t __arm_vreinterpretq_s32_u16(uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_u16))) int32x4_t __arm_vreinterpretq_s32(uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_u32))) int32x4_t __arm_vreinterpretq_s32_u32(uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_u32))) int32x4_t __arm_vreinterpretq_s32(uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_u64))) int32x4_t __arm_vreinterpretq_s32_u64(uint64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_u64))) int32x4_t __arm_vreinterpretq_s32(uint64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_u8))) int32x4_t __arm_vreinterpretq_s32_u8(uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_u8))) int32x4_t __arm_vreinterpretq_s32(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_s16))) int64x2_t __arm_vreinterpretq_s64_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_s16))) int64x2_t __arm_vreinterpretq_s64(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_s32))) int64x2_t __arm_vreinterpretq_s64_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_s32))) int64x2_t __arm_vreinterpretq_s64(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_s8))) int64x2_t __arm_vreinterpretq_s64_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_s8))) int64x2_t __arm_vreinterpretq_s64(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_u16))) int64x2_t __arm_vreinterpretq_s64_u16(uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_u16))) int64x2_t __arm_vreinterpretq_s64(uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_u32))) int64x2_t __arm_vreinterpretq_s64_u32(uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_u32))) int64x2_t __arm_vreinterpretq_s64(uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_u64))) int64x2_t __arm_vreinterpretq_s64_u64(uint64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_u64))) int64x2_t __arm_vreinterpretq_s64(uint64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_u8))) int64x2_t __arm_vreinterpretq_s64_u8(uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_u8))) int64x2_t __arm_vreinterpretq_s64(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_s16))) int8x16_t __arm_vreinterpretq_s8_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_s16))) int8x16_t __arm_vreinterpretq_s8(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_s32))) int8x16_t __arm_vreinterpretq_s8_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_s32))) int8x16_t __arm_vreinterpretq_s8(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_s64))) int8x16_t __arm_vreinterpretq_s8_s64(int64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_s64))) int8x16_t __arm_vreinterpretq_s8(int64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_u16))) int8x16_t __arm_vreinterpretq_s8_u16(uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_u16))) int8x16_t __arm_vreinterpretq_s8(uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_u32))) int8x16_t __arm_vreinterpretq_s8_u32(uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_u32))) int8x16_t __arm_vreinterpretq_s8(uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_u64))) int8x16_t __arm_vreinterpretq_s8_u64(uint64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_u64))) int8x16_t __arm_vreinterpretq_s8(uint64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_u8))) int8x16_t __arm_vreinterpretq_s8_u8(uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_u8))) int8x16_t __arm_vreinterpretq_s8(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_s16))) uint16x8_t __arm_vreinterpretq_u16_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_s16))) uint16x8_t __arm_vreinterpretq_u16(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_s32))) uint16x8_t __arm_vreinterpretq_u16_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_s32))) uint16x8_t __arm_vreinterpretq_u16(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_s64))) uint16x8_t __arm_vreinterpretq_u16_s64(int64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_s64))) uint16x8_t __arm_vreinterpretq_u16(int64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_s8))) uint16x8_t __arm_vreinterpretq_u16_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_s8))) uint16x8_t __arm_vreinterpretq_u16(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_u32))) uint16x8_t __arm_vreinterpretq_u16_u32(uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_u32))) uint16x8_t __arm_vreinterpretq_u16(uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_u64))) uint16x8_t __arm_vreinterpretq_u16_u64(uint64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_u64))) uint16x8_t __arm_vreinterpretq_u16(uint64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_u8))) uint16x8_t __arm_vreinterpretq_u16_u8(uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_u8))) uint16x8_t __arm_vreinterpretq_u16(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_s16))) uint32x4_t __arm_vreinterpretq_u32_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_s16))) uint32x4_t __arm_vreinterpretq_u32(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_s32))) uint32x4_t __arm_vreinterpretq_u32_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_s32))) uint32x4_t __arm_vreinterpretq_u32(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_s64))) uint32x4_t __arm_vreinterpretq_u32_s64(int64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_s64))) uint32x4_t __arm_vreinterpretq_u32(int64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_s8))) uint32x4_t __arm_vreinterpretq_u32_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_s8))) uint32x4_t __arm_vreinterpretq_u32(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_u16))) uint32x4_t __arm_vreinterpretq_u32_u16(uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_u16))) uint32x4_t __arm_vreinterpretq_u32(uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_u64))) uint32x4_t __arm_vreinterpretq_u32_u64(uint64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_u64))) uint32x4_t __arm_vreinterpretq_u32(uint64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_u8))) uint32x4_t __arm_vreinterpretq_u32_u8(uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_u8))) uint32x4_t __arm_vreinterpretq_u32(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_s16))) uint64x2_t __arm_vreinterpretq_u64_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_s16))) uint64x2_t __arm_vreinterpretq_u64(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_s32))) uint64x2_t __arm_vreinterpretq_u64_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_s32))) uint64x2_t __arm_vreinterpretq_u64(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_s64))) uint64x2_t __arm_vreinterpretq_u64_s64(int64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_s64))) uint64x2_t __arm_vreinterpretq_u64(int64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_s8))) uint64x2_t __arm_vreinterpretq_u64_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_s8))) uint64x2_t __arm_vreinterpretq_u64(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_u16))) uint64x2_t __arm_vreinterpretq_u64_u16(uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_u16))) uint64x2_t __arm_vreinterpretq_u64(uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_u32))) uint64x2_t __arm_vreinterpretq_u64_u32(uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_u32))) uint64x2_t __arm_vreinterpretq_u64(uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_u8))) uint64x2_t __arm_vreinterpretq_u64_u8(uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_u8))) uint64x2_t __arm_vreinterpretq_u64(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_s16))) uint8x16_t __arm_vreinterpretq_u8_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_s16))) uint8x16_t __arm_vreinterpretq_u8(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_s32))) uint8x16_t __arm_vreinterpretq_u8_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_s32))) uint8x16_t __arm_vreinterpretq_u8(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_s64))) uint8x16_t __arm_vreinterpretq_u8_s64(int64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_s64))) uint8x16_t __arm_vreinterpretq_u8(int64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_s8))) uint8x16_t __arm_vreinterpretq_u8_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_s8))) uint8x16_t __arm_vreinterpretq_u8(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_u16))) uint8x16_t __arm_vreinterpretq_u8_u16(uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_u16))) uint8x16_t __arm_vreinterpretq_u8(uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_u32))) uint8x16_t __arm_vreinterpretq_u8_u32(uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_u32))) uint8x16_t __arm_vreinterpretq_u8(uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_u64))) uint8x16_t __arm_vreinterpretq_u8_u64(uint64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_u64))) uint8x16_t __arm_vreinterpretq_u8(uint64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev16q_m_s8))) int8x16_t __arm_vrev16q_m_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev16q_m_s8))) int8x16_t __arm_vrev16q_m(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev16q_m_u8))) uint8x16_t __arm_vrev16q_m_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev16q_m_u8))) uint8x16_t __arm_vrev16q_m(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev16q_s8))) int8x16_t __arm_vrev16q_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev16q_s8))) int8x16_t __arm_vrev16q(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev16q_u8))) uint8x16_t __arm_vrev16q_u8(uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev16q_u8))) uint8x16_t __arm_vrev16q(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev16q_x_s8))) int8x16_t __arm_vrev16q_x_s8(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev16q_x_s8))) int8x16_t __arm_vrev16q_x(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev16q_x_u8))) uint8x16_t __arm_vrev16q_x_u8(uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev16q_x_u8))) uint8x16_t __arm_vrev16q_x(uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_m_s16))) int16x8_t __arm_vrev32q_m_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_m_s16))) int16x8_t __arm_vrev32q_m(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_m_s8))) int8x16_t __arm_vrev32q_m_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_m_s8))) int8x16_t __arm_vrev32q_m(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_m_u16))) uint16x8_t __arm_vrev32q_m_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_m_u16))) uint16x8_t __arm_vrev32q_m(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_m_u8))) uint8x16_t __arm_vrev32q_m_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_m_u8))) uint8x16_t __arm_vrev32q_m(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_s16))) int16x8_t __arm_vrev32q_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_s16))) int16x8_t __arm_vrev32q(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_s8))) int8x16_t __arm_vrev32q_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_s8))) int8x16_t __arm_vrev32q(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_u16))) uint16x8_t __arm_vrev32q_u16(uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_u16))) uint16x8_t __arm_vrev32q(uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_u8))) uint8x16_t __arm_vrev32q_u8(uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_u8))) uint8x16_t __arm_vrev32q(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_x_s16))) int16x8_t __arm_vrev32q_x_s16(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_x_s16))) int16x8_t __arm_vrev32q_x(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_x_s8))) int8x16_t __arm_vrev32q_x_s8(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_x_s8))) int8x16_t __arm_vrev32q_x(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_x_u16))) uint16x8_t __arm_vrev32q_x_u16(uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_x_u16))) uint16x8_t __arm_vrev32q_x(uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_x_u8))) uint8x16_t __arm_vrev32q_x_u8(uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_x_u8))) uint8x16_t __arm_vrev32q_x(uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_m_s16))) int16x8_t __arm_vrev64q_m_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_m_s16))) int16x8_t __arm_vrev64q_m(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_m_s32))) int32x4_t __arm_vrev64q_m_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_m_s32))) int32x4_t __arm_vrev64q_m(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_m_s8))) int8x16_t __arm_vrev64q_m_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_m_s8))) int8x16_t __arm_vrev64q_m(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_m_u16))) uint16x8_t __arm_vrev64q_m_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_m_u16))) uint16x8_t __arm_vrev64q_m(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_m_u32))) uint32x4_t __arm_vrev64q_m_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_m_u32))) uint32x4_t __arm_vrev64q_m(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_m_u8))) uint8x16_t __arm_vrev64q_m_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_m_u8))) uint8x16_t __arm_vrev64q_m(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_s16))) int16x8_t __arm_vrev64q_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_s16))) int16x8_t __arm_vrev64q(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_s32))) int32x4_t __arm_vrev64q_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_s32))) int32x4_t __arm_vrev64q(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_s8))) int8x16_t __arm_vrev64q_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_s8))) int8x16_t __arm_vrev64q(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_u16))) uint16x8_t __arm_vrev64q_u16(uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_u16))) uint16x8_t __arm_vrev64q(uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_u32))) uint32x4_t __arm_vrev64q_u32(uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_u32))) uint32x4_t __arm_vrev64q(uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_u8))) uint8x16_t __arm_vrev64q_u8(uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_u8))) uint8x16_t __arm_vrev64q(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_x_s16))) int16x8_t __arm_vrev64q_x_s16(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_x_s16))) int16x8_t __arm_vrev64q_x(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_x_s32))) int32x4_t __arm_vrev64q_x_s32(int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_x_s32))) int32x4_t __arm_vrev64q_x(int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_x_s8))) int8x16_t __arm_vrev64q_x_s8(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_x_s8))) int8x16_t __arm_vrev64q_x(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_x_u16))) uint16x8_t __arm_vrev64q_x_u16(uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_x_u16))) uint16x8_t __arm_vrev64q_x(uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_x_u32))) uint32x4_t __arm_vrev64q_x_u32(uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_x_u32))) uint32x4_t __arm_vrev64q_x(uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_x_u8))) uint8x16_t __arm_vrev64q_x_u8(uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_x_u8))) uint8x16_t __arm_vrev64q_x(uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_m_s16))) int16x8_t __arm_vrhaddq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_m_s16))) int16x8_t __arm_vrhaddq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_m_s32))) int32x4_t __arm_vrhaddq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_m_s32))) int32x4_t __arm_vrhaddq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_m_s8))) int8x16_t __arm_vrhaddq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_m_s8))) int8x16_t __arm_vrhaddq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_m_u16))) uint16x8_t __arm_vrhaddq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_m_u16))) uint16x8_t __arm_vrhaddq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_m_u32))) uint32x4_t __arm_vrhaddq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_m_u32))) uint32x4_t __arm_vrhaddq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_m_u8))) uint8x16_t __arm_vrhaddq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_m_u8))) uint8x16_t __arm_vrhaddq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_s16))) int16x8_t __arm_vrhaddq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_s16))) int16x8_t __arm_vrhaddq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_s32))) int32x4_t __arm_vrhaddq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_s32))) int32x4_t __arm_vrhaddq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_s8))) int8x16_t __arm_vrhaddq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_s8))) int8x16_t __arm_vrhaddq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_u16))) uint16x8_t __arm_vrhaddq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_u16))) uint16x8_t __arm_vrhaddq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_u32))) uint32x4_t __arm_vrhaddq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_u32))) uint32x4_t __arm_vrhaddq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_u8))) uint8x16_t __arm_vrhaddq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_u8))) uint8x16_t __arm_vrhaddq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_x_s16))) int16x8_t __arm_vrhaddq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_x_s16))) int16x8_t __arm_vrhaddq_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_x_s32))) int32x4_t __arm_vrhaddq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_x_s32))) int32x4_t __arm_vrhaddq_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_x_s8))) int8x16_t __arm_vrhaddq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_x_s8))) int8x16_t __arm_vrhaddq_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_x_u16))) uint16x8_t __arm_vrhaddq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_x_u16))) uint16x8_t __arm_vrhaddq_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_x_u32))) uint32x4_t __arm_vrhaddq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_x_u32))) uint32x4_t __arm_vrhaddq_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_x_u8))) uint8x16_t __arm_vrhaddq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_x_u8))) uint8x16_t __arm_vrhaddq_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhaq_p_s32))) int64_t __arm_vrmlaldavhaq_p_s32(int64_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhaq_p_s32))) int64_t __arm_vrmlaldavhaq_p(int64_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhaq_p_u32))) uint64_t __arm_vrmlaldavhaq_p_u32(uint64_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhaq_p_u32))) uint64_t __arm_vrmlaldavhaq_p(uint64_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhaq_s32))) int64_t __arm_vrmlaldavhaq_s32(int64_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhaq_s32))) int64_t __arm_vrmlaldavhaq(int64_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhaq_u32))) uint64_t __arm_vrmlaldavhaq_u32(uint64_t, uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhaq_u32))) uint64_t __arm_vrmlaldavhaq(uint64_t, uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhaxq_p_s32))) int64_t __arm_vrmlaldavhaxq_p_s32(int64_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhaxq_p_s32))) int64_t __arm_vrmlaldavhaxq_p(int64_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhaxq_s32))) int64_t __arm_vrmlaldavhaxq_s32(int64_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhaxq_s32))) int64_t __arm_vrmlaldavhaxq(int64_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhq_p_s32))) int64_t __arm_vrmlaldavhq_p_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhq_p_s32))) int64_t __arm_vrmlaldavhq_p(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhq_p_u32))) uint64_t __arm_vrmlaldavhq_p_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhq_p_u32))) uint64_t __arm_vrmlaldavhq_p(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhq_s32))) int64_t __arm_vrmlaldavhq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhq_s32))) int64_t __arm_vrmlaldavhq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhq_u32))) uint64_t __arm_vrmlaldavhq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhq_u32))) uint64_t __arm_vrmlaldavhq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhxq_p_s32))) int64_t __arm_vrmlaldavhxq_p_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhxq_p_s32))) int64_t __arm_vrmlaldavhxq_p(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhxq_s32))) int64_t __arm_vrmlaldavhxq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhxq_s32))) int64_t __arm_vrmlaldavhxq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlsldavhaq_p_s32))) int64_t __arm_vrmlsldavhaq_p_s32(int64_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlsldavhaq_p_s32))) int64_t __arm_vrmlsldavhaq_p(int64_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlsldavhaq_s32))) int64_t __arm_vrmlsldavhaq_s32(int64_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlsldavhaq_s32))) int64_t __arm_vrmlsldavhaq(int64_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlsldavhaxq_p_s32))) int64_t __arm_vrmlsldavhaxq_p_s32(int64_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlsldavhaxq_p_s32))) int64_t __arm_vrmlsldavhaxq_p(int64_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlsldavhaxq_s32))) int64_t __arm_vrmlsldavhaxq_s32(int64_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlsldavhaxq_s32))) int64_t __arm_vrmlsldavhaxq(int64_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlsldavhq_p_s32))) int64_t __arm_vrmlsldavhq_p_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlsldavhq_p_s32))) int64_t __arm_vrmlsldavhq_p(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlsldavhq_s32))) int64_t __arm_vrmlsldavhq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlsldavhq_s32))) int64_t __arm_vrmlsldavhq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlsldavhxq_p_s32))) int64_t __arm_vrmlsldavhxq_p_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlsldavhxq_p_s32))) int64_t __arm_vrmlsldavhxq_p(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlsldavhxq_s32))) int64_t __arm_vrmlsldavhxq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlsldavhxq_s32))) int64_t __arm_vrmlsldavhxq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_m_s16))) int16x8_t __arm_vrmulhq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_m_s16))) int16x8_t __arm_vrmulhq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_m_s32))) int32x4_t __arm_vrmulhq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_m_s32))) int32x4_t __arm_vrmulhq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_m_s8))) int8x16_t __arm_vrmulhq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_m_s8))) int8x16_t __arm_vrmulhq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_m_u16))) uint16x8_t __arm_vrmulhq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_m_u16))) uint16x8_t __arm_vrmulhq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_m_u32))) uint32x4_t __arm_vrmulhq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_m_u32))) uint32x4_t __arm_vrmulhq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_m_u8))) uint8x16_t __arm_vrmulhq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_m_u8))) uint8x16_t __arm_vrmulhq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_s16))) int16x8_t __arm_vrmulhq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_s16))) int16x8_t __arm_vrmulhq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_s32))) int32x4_t __arm_vrmulhq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_s32))) int32x4_t __arm_vrmulhq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_s8))) int8x16_t __arm_vrmulhq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_s8))) int8x16_t __arm_vrmulhq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_u16))) uint16x8_t __arm_vrmulhq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_u16))) uint16x8_t __arm_vrmulhq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_u32))) uint32x4_t __arm_vrmulhq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_u32))) uint32x4_t __arm_vrmulhq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_u8))) uint8x16_t __arm_vrmulhq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_u8))) uint8x16_t __arm_vrmulhq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_x_s16))) int16x8_t __arm_vrmulhq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_x_s16))) int16x8_t __arm_vrmulhq_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_x_s32))) int32x4_t __arm_vrmulhq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_x_s32))) int32x4_t __arm_vrmulhq_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_x_s8))) int8x16_t __arm_vrmulhq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_x_s8))) int8x16_t __arm_vrmulhq_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_x_u16))) uint16x8_t __arm_vrmulhq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_x_u16))) uint16x8_t __arm_vrmulhq_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_x_u32))) uint32x4_t __arm_vrmulhq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_x_u32))) uint32x4_t __arm_vrmulhq_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_x_u8))) uint8x16_t __arm_vrmulhq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_x_u8))) uint8x16_t __arm_vrmulhq_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_n_s16))) int16x8_t __arm_vrshlq_m_n_s16(int16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_n_s16))) int16x8_t __arm_vrshlq_m_n(int16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_n_s32))) int32x4_t __arm_vrshlq_m_n_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_n_s32))) int32x4_t __arm_vrshlq_m_n(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_n_s8))) int8x16_t __arm_vrshlq_m_n_s8(int8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_n_s8))) int8x16_t __arm_vrshlq_m_n(int8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_n_u16))) uint16x8_t __arm_vrshlq_m_n_u16(uint16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_n_u16))) uint16x8_t __arm_vrshlq_m_n(uint16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_n_u32))) uint32x4_t __arm_vrshlq_m_n_u32(uint32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_n_u32))) uint32x4_t __arm_vrshlq_m_n(uint32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_n_u8))) uint8x16_t __arm_vrshlq_m_n_u8(uint8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_n_u8))) uint8x16_t __arm_vrshlq_m_n(uint8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_s16))) int16x8_t __arm_vrshlq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_s16))) int16x8_t __arm_vrshlq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_s32))) int32x4_t __arm_vrshlq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_s32))) int32x4_t __arm_vrshlq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_s8))) int8x16_t __arm_vrshlq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_s8))) int8x16_t __arm_vrshlq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_u16))) uint16x8_t __arm_vrshlq_m_u16(uint16x8_t, uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_u16))) uint16x8_t __arm_vrshlq_m(uint16x8_t, uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_u32))) uint32x4_t __arm_vrshlq_m_u32(uint32x4_t, uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_u32))) uint32x4_t __arm_vrshlq_m(uint32x4_t, uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_u8))) uint8x16_t __arm_vrshlq_m_u8(uint8x16_t, uint8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_u8))) uint8x16_t __arm_vrshlq_m(uint8x16_t, uint8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_n_s16))) int16x8_t __arm_vrshlq_n_s16(int16x8_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_n_s16))) int16x8_t __arm_vrshlq(int16x8_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_n_s32))) int32x4_t __arm_vrshlq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_n_s32))) int32x4_t __arm_vrshlq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_n_s8))) int8x16_t __arm_vrshlq_n_s8(int8x16_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_n_s8))) int8x16_t __arm_vrshlq(int8x16_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_n_u16))) uint16x8_t __arm_vrshlq_n_u16(uint16x8_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_n_u16))) uint16x8_t __arm_vrshlq(uint16x8_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_n_u32))) uint32x4_t __arm_vrshlq_n_u32(uint32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_n_u32))) uint32x4_t __arm_vrshlq(uint32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_n_u8))) uint8x16_t __arm_vrshlq_n_u8(uint8x16_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_n_u8))) uint8x16_t __arm_vrshlq(uint8x16_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_s16))) int16x8_t __arm_vrshlq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_s16))) int16x8_t __arm_vrshlq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_s32))) int32x4_t __arm_vrshlq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_s32))) int32x4_t __arm_vrshlq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_s8))) int8x16_t __arm_vrshlq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_s8))) int8x16_t __arm_vrshlq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_u16))) uint16x8_t __arm_vrshlq_u16(uint16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_u16))) uint16x8_t __arm_vrshlq(uint16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_u32))) uint32x4_t __arm_vrshlq_u32(uint32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_u32))) uint32x4_t __arm_vrshlq(uint32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_u8))) uint8x16_t __arm_vrshlq_u8(uint8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_u8))) uint8x16_t __arm_vrshlq(uint8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_x_s16))) int16x8_t __arm_vrshlq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_x_s16))) int16x8_t __arm_vrshlq_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_x_s32))) int32x4_t __arm_vrshlq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_x_s32))) int32x4_t __arm_vrshlq_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_x_s8))) int8x16_t __arm_vrshlq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_x_s8))) int8x16_t __arm_vrshlq_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_x_u16))) uint16x8_t __arm_vrshlq_x_u16(uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_x_u16))) uint16x8_t __arm_vrshlq_x(uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_x_u32))) uint32x4_t __arm_vrshlq_x_u32(uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_x_u32))) uint32x4_t __arm_vrshlq_x(uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_x_u8))) uint8x16_t __arm_vrshlq_x_u8(uint8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_x_u8))) uint8x16_t __arm_vrshlq_x(uint8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrnbq_m_n_s16))) int8x16_t __arm_vrshrnbq_m_n_s16(int8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrnbq_m_n_s16))) int8x16_t __arm_vrshrnbq_m(int8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrnbq_m_n_s32))) int16x8_t __arm_vrshrnbq_m_n_s32(int16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrnbq_m_n_s32))) int16x8_t __arm_vrshrnbq_m(int16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrnbq_m_n_u16))) uint8x16_t __arm_vrshrnbq_m_n_u16(uint8x16_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrnbq_m_n_u16))) uint8x16_t __arm_vrshrnbq_m(uint8x16_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrnbq_m_n_u32))) uint16x8_t __arm_vrshrnbq_m_n_u32(uint16x8_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrnbq_m_n_u32))) uint16x8_t __arm_vrshrnbq_m(uint16x8_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrnbq_n_s16))) int8x16_t __arm_vrshrnbq_n_s16(int8x16_t, int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrnbq_n_s16))) int8x16_t __arm_vrshrnbq(int8x16_t, int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrnbq_n_s32))) int16x8_t __arm_vrshrnbq_n_s32(int16x8_t, int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrnbq_n_s32))) int16x8_t __arm_vrshrnbq(int16x8_t, int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrnbq_n_u16))) uint8x16_t __arm_vrshrnbq_n_u16(uint8x16_t, uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrnbq_n_u16))) uint8x16_t __arm_vrshrnbq(uint8x16_t, uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrnbq_n_u32))) uint16x8_t __arm_vrshrnbq_n_u32(uint16x8_t, uint32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrnbq_n_u32))) uint16x8_t __arm_vrshrnbq(uint16x8_t, uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrntq_m_n_s16))) int8x16_t __arm_vrshrntq_m_n_s16(int8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrntq_m_n_s16))) int8x16_t __arm_vrshrntq_m(int8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrntq_m_n_s32))) int16x8_t __arm_vrshrntq_m_n_s32(int16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrntq_m_n_s32))) int16x8_t __arm_vrshrntq_m(int16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrntq_m_n_u16))) uint8x16_t __arm_vrshrntq_m_n_u16(uint8x16_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrntq_m_n_u16))) uint8x16_t __arm_vrshrntq_m(uint8x16_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrntq_m_n_u32))) uint16x8_t __arm_vrshrntq_m_n_u32(uint16x8_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrntq_m_n_u32))) uint16x8_t __arm_vrshrntq_m(uint16x8_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrntq_n_s16))) int8x16_t __arm_vrshrntq_n_s16(int8x16_t, int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrntq_n_s16))) int8x16_t __arm_vrshrntq(int8x16_t, int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrntq_n_s32))) int16x8_t __arm_vrshrntq_n_s32(int16x8_t, int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrntq_n_s32))) int16x8_t __arm_vrshrntq(int16x8_t, int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrntq_n_u16))) uint8x16_t __arm_vrshrntq_n_u16(uint8x16_t, uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrntq_n_u16))) uint8x16_t __arm_vrshrntq(uint8x16_t, uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrntq_n_u32))) uint16x8_t __arm_vrshrntq_n_u32(uint16x8_t, uint32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrntq_n_u32))) uint16x8_t __arm_vrshrntq(uint16x8_t, uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_m_n_s16))) int16x8_t __arm_vrshrq_m_n_s16(int16x8_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_m_n_s16))) int16x8_t __arm_vrshrq_m(int16x8_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_m_n_s32))) int32x4_t __arm_vrshrq_m_n_s32(int32x4_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_m_n_s32))) int32x4_t __arm_vrshrq_m(int32x4_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_m_n_s8))) int8x16_t __arm_vrshrq_m_n_s8(int8x16_t, int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_m_n_s8))) int8x16_t __arm_vrshrq_m(int8x16_t, int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_m_n_u16))) uint16x8_t __arm_vrshrq_m_n_u16(uint16x8_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_m_n_u16))) uint16x8_t __arm_vrshrq_m(uint16x8_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_m_n_u32))) uint32x4_t __arm_vrshrq_m_n_u32(uint32x4_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_m_n_u32))) uint32x4_t __arm_vrshrq_m(uint32x4_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_m_n_u8))) uint8x16_t __arm_vrshrq_m_n_u8(uint8x16_t, uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_m_n_u8))) uint8x16_t __arm_vrshrq_m(uint8x16_t, uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_n_s16))) int16x8_t __arm_vrshrq_n_s16(int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_n_s16))) int16x8_t __arm_vrshrq(int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_n_s32))) int32x4_t __arm_vrshrq_n_s32(int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_n_s32))) int32x4_t __arm_vrshrq(int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_n_s8))) int8x16_t __arm_vrshrq_n_s8(int8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_n_s8))) int8x16_t __arm_vrshrq(int8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_n_u16))) uint16x8_t __arm_vrshrq_n_u16(uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_n_u16))) uint16x8_t __arm_vrshrq(uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_n_u32))) uint32x4_t __arm_vrshrq_n_u32(uint32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_n_u32))) uint32x4_t __arm_vrshrq(uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_n_u8))) uint8x16_t __arm_vrshrq_n_u8(uint8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_n_u8))) uint8x16_t __arm_vrshrq(uint8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_x_n_s16))) int16x8_t __arm_vrshrq_x_n_s16(int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_x_n_s16))) int16x8_t __arm_vrshrq_x(int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_x_n_s32))) int32x4_t __arm_vrshrq_x_n_s32(int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_x_n_s32))) int32x4_t __arm_vrshrq_x(int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_x_n_s8))) int8x16_t __arm_vrshrq_x_n_s8(int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_x_n_s8))) int8x16_t __arm_vrshrq_x(int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_x_n_u16))) uint16x8_t __arm_vrshrq_x_n_u16(uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_x_n_u16))) uint16x8_t __arm_vrshrq_x(uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_x_n_u32))) uint32x4_t __arm_vrshrq_x_n_u32(uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_x_n_u32))) uint32x4_t __arm_vrshrq_x(uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_x_n_u8))) uint8x16_t __arm_vrshrq_x_n_u8(uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_x_n_u8))) uint8x16_t __arm_vrshrq_x(uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsbciq_m_s32))) int32x4_t __arm_vsbciq_m_s32(int32x4_t, int32x4_t, int32x4_t, unsigned *, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsbciq_m_s32))) int32x4_t __arm_vsbciq_m(int32x4_t, int32x4_t, int32x4_t, unsigned *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsbciq_m_u32))) uint32x4_t __arm_vsbciq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, unsigned *, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsbciq_m_u32))) uint32x4_t __arm_vsbciq_m(uint32x4_t, uint32x4_t, uint32x4_t, unsigned *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsbciq_s32))) int32x4_t __arm_vsbciq_s32(int32x4_t, int32x4_t, unsigned *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsbciq_s32))) int32x4_t __arm_vsbciq(int32x4_t, int32x4_t, unsigned *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsbciq_u32))) uint32x4_t __arm_vsbciq_u32(uint32x4_t, uint32x4_t, unsigned *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsbciq_u32))) uint32x4_t __arm_vsbciq(uint32x4_t, uint32x4_t, unsigned *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsbcq_m_s32))) int32x4_t __arm_vsbcq_m_s32(int32x4_t, int32x4_t, int32x4_t, unsigned *, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsbcq_m_s32))) int32x4_t __arm_vsbcq_m(int32x4_t, int32x4_t, int32x4_t, unsigned *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsbcq_m_u32))) uint32x4_t __arm_vsbcq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, unsigned *, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsbcq_m_u32))) uint32x4_t __arm_vsbcq_m(uint32x4_t, uint32x4_t, uint32x4_t, unsigned *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsbcq_s32))) int32x4_t __arm_vsbcq_s32(int32x4_t, int32x4_t, unsigned *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsbcq_s32))) int32x4_t __arm_vsbcq(int32x4_t, int32x4_t, unsigned *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsbcq_u32))) uint32x4_t __arm_vsbcq_u32(uint32x4_t, uint32x4_t, unsigned *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsbcq_u32))) uint32x4_t __arm_vsbcq(uint32x4_t, uint32x4_t, unsigned *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_s16))) int16x8_t __arm_vsetq_lane_s16(int16_t, int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_s16))) int16x8_t __arm_vsetq_lane(int16_t, int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_s32))) int32x4_t __arm_vsetq_lane_s32(int32_t, int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_s32))) int32x4_t __arm_vsetq_lane(int32_t, int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_s64))) int64x2_t __arm_vsetq_lane_s64(int64_t, int64x2_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_s64))) int64x2_t __arm_vsetq_lane(int64_t, int64x2_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_s8))) int8x16_t __arm_vsetq_lane_s8(int8_t, int8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_s8))) int8x16_t __arm_vsetq_lane(int8_t, int8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_u16))) uint16x8_t __arm_vsetq_lane_u16(uint16_t, uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_u16))) uint16x8_t __arm_vsetq_lane(uint16_t, uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_u32))) uint32x4_t __arm_vsetq_lane_u32(uint32_t, uint32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_u32))) uint32x4_t __arm_vsetq_lane(uint32_t, uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_u64))) uint64x2_t __arm_vsetq_lane_u64(uint64_t, uint64x2_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_u64))) uint64x2_t __arm_vsetq_lane(uint64_t, uint64x2_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_u8))) uint8x16_t __arm_vsetq_lane_u8(uint8_t, uint8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_u8))) uint8x16_t __arm_vsetq_lane(uint8_t, uint8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_m_s16))) int16x8_t __arm_vshlcq_m_s16(int16x8_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_m_s16))) int16x8_t __arm_vshlcq_m(int16x8_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_m_s32))) int32x4_t __arm_vshlcq_m_s32(int32x4_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_m_s32))) int32x4_t __arm_vshlcq_m(int32x4_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_m_s8))) int8x16_t __arm_vshlcq_m_s8(int8x16_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_m_s8))) int8x16_t __arm_vshlcq_m(int8x16_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_m_u16))) uint16x8_t __arm_vshlcq_m_u16(uint16x8_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_m_u16))) uint16x8_t __arm_vshlcq_m(uint16x8_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_m_u32))) uint32x4_t __arm_vshlcq_m_u32(uint32x4_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_m_u32))) uint32x4_t __arm_vshlcq_m(uint32x4_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_m_u8))) uint8x16_t __arm_vshlcq_m_u8(uint8x16_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_m_u8))) uint8x16_t __arm_vshlcq_m(uint8x16_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_s16))) int16x8_t __arm_vshlcq_s16(int16x8_t, uint32_t *, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_s16))) int16x8_t __arm_vshlcq(int16x8_t, uint32_t *, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_s32))) int32x4_t __arm_vshlcq_s32(int32x4_t, uint32_t *, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_s32))) int32x4_t __arm_vshlcq(int32x4_t, uint32_t *, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_s8))) int8x16_t __arm_vshlcq_s8(int8x16_t, uint32_t *, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_s8))) int8x16_t __arm_vshlcq(int8x16_t, uint32_t *, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_u16))) uint16x8_t __arm_vshlcq_u16(uint16x8_t, uint32_t *, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_u16))) uint16x8_t __arm_vshlcq(uint16x8_t, uint32_t *, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_u32))) uint32x4_t __arm_vshlcq_u32(uint32x4_t, uint32_t *, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_u32))) uint32x4_t __arm_vshlcq(uint32x4_t, uint32_t *, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_u8))) uint8x16_t __arm_vshlcq_u8(uint8x16_t, uint32_t *, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_u8))) uint8x16_t __arm_vshlcq(uint8x16_t, uint32_t *, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_m_n_s16))) int32x4_t __arm_vshllbq_m_n_s16(int32x4_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_m_n_s16))) int32x4_t __arm_vshllbq_m(int32x4_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_m_n_s8))) int16x8_t __arm_vshllbq_m_n_s8(int16x8_t, int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_m_n_s8))) int16x8_t __arm_vshllbq_m(int16x8_t, int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_m_n_u16))) uint32x4_t __arm_vshllbq_m_n_u16(uint32x4_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_m_n_u16))) uint32x4_t __arm_vshllbq_m(uint32x4_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_m_n_u8))) uint16x8_t __arm_vshllbq_m_n_u8(uint16x8_t, uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_m_n_u8))) uint16x8_t __arm_vshllbq_m(uint16x8_t, uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_n_s16))) int32x4_t __arm_vshllbq_n_s16(int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_n_s16))) int32x4_t __arm_vshllbq(int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_n_s8))) int16x8_t __arm_vshllbq_n_s8(int8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_n_s8))) int16x8_t __arm_vshllbq(int8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_n_u16))) uint32x4_t __arm_vshllbq_n_u16(uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_n_u16))) uint32x4_t __arm_vshllbq(uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_n_u8))) uint16x8_t __arm_vshllbq_n_u8(uint8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_n_u8))) uint16x8_t __arm_vshllbq(uint8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_x_n_s16))) int32x4_t __arm_vshllbq_x_n_s16(int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_x_n_s16))) int32x4_t __arm_vshllbq_x(int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_x_n_s8))) int16x8_t __arm_vshllbq_x_n_s8(int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_x_n_s8))) int16x8_t __arm_vshllbq_x(int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_x_n_u16))) uint32x4_t __arm_vshllbq_x_n_u16(uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_x_n_u16))) uint32x4_t __arm_vshllbq_x(uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_x_n_u8))) uint16x8_t __arm_vshllbq_x_n_u8(uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_x_n_u8))) uint16x8_t __arm_vshllbq_x(uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_m_n_s16))) int32x4_t __arm_vshlltq_m_n_s16(int32x4_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_m_n_s16))) int32x4_t __arm_vshlltq_m(int32x4_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_m_n_s8))) int16x8_t __arm_vshlltq_m_n_s8(int16x8_t, int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_m_n_s8))) int16x8_t __arm_vshlltq_m(int16x8_t, int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_m_n_u16))) uint32x4_t __arm_vshlltq_m_n_u16(uint32x4_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_m_n_u16))) uint32x4_t __arm_vshlltq_m(uint32x4_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_m_n_u8))) uint16x8_t __arm_vshlltq_m_n_u8(uint16x8_t, uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_m_n_u8))) uint16x8_t __arm_vshlltq_m(uint16x8_t, uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_n_s16))) int32x4_t __arm_vshlltq_n_s16(int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_n_s16))) int32x4_t __arm_vshlltq(int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_n_s8))) int16x8_t __arm_vshlltq_n_s8(int8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_n_s8))) int16x8_t __arm_vshlltq(int8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_n_u16))) uint32x4_t __arm_vshlltq_n_u16(uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_n_u16))) uint32x4_t __arm_vshlltq(uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_n_u8))) uint16x8_t __arm_vshlltq_n_u8(uint8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_n_u8))) uint16x8_t __arm_vshlltq(uint8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_x_n_s16))) int32x4_t __arm_vshlltq_x_n_s16(int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_x_n_s16))) int32x4_t __arm_vshlltq_x(int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_x_n_s8))) int16x8_t __arm_vshlltq_x_n_s8(int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_x_n_s8))) int16x8_t __arm_vshlltq_x(int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_x_n_u16))) uint32x4_t __arm_vshlltq_x_n_u16(uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_x_n_u16))) uint32x4_t __arm_vshlltq_x(uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_x_n_u8))) uint16x8_t __arm_vshlltq_x_n_u8(uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_x_n_u8))) uint16x8_t __arm_vshlltq_x(uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_n_s16))) int16x8_t __arm_vshlq_m_n_s16(int16x8_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_n_s16))) int16x8_t __arm_vshlq_m_n(int16x8_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_n_s32))) int32x4_t __arm_vshlq_m_n_s32(int32x4_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_n_s32))) int32x4_t __arm_vshlq_m_n(int32x4_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_n_s8))) int8x16_t __arm_vshlq_m_n_s8(int8x16_t, int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_n_s8))) int8x16_t __arm_vshlq_m_n(int8x16_t, int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_n_u16))) uint16x8_t __arm_vshlq_m_n_u16(uint16x8_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_n_u16))) uint16x8_t __arm_vshlq_m_n(uint16x8_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_n_u32))) uint32x4_t __arm_vshlq_m_n_u32(uint32x4_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_n_u32))) uint32x4_t __arm_vshlq_m_n(uint32x4_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_n_u8))) uint8x16_t __arm_vshlq_m_n_u8(uint8x16_t, uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_n_u8))) uint8x16_t __arm_vshlq_m_n(uint8x16_t, uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_r_s16))) int16x8_t __arm_vshlq_m_r_s16(int16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_r_s16))) int16x8_t __arm_vshlq_m_r(int16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_r_s32))) int32x4_t __arm_vshlq_m_r_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_r_s32))) int32x4_t __arm_vshlq_m_r(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_r_s8))) int8x16_t __arm_vshlq_m_r_s8(int8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_r_s8))) int8x16_t __arm_vshlq_m_r(int8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_r_u16))) uint16x8_t __arm_vshlq_m_r_u16(uint16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_r_u16))) uint16x8_t __arm_vshlq_m_r(uint16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_r_u32))) uint32x4_t __arm_vshlq_m_r_u32(uint32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_r_u32))) uint32x4_t __arm_vshlq_m_r(uint32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_r_u8))) uint8x16_t __arm_vshlq_m_r_u8(uint8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_r_u8))) uint8x16_t __arm_vshlq_m_r(uint8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_s16))) int16x8_t __arm_vshlq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_s16))) int16x8_t __arm_vshlq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_s32))) int32x4_t __arm_vshlq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_s32))) int32x4_t __arm_vshlq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_s8))) int8x16_t __arm_vshlq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_s8))) int8x16_t __arm_vshlq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_u16))) uint16x8_t __arm_vshlq_m_u16(uint16x8_t, uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_u16))) uint16x8_t __arm_vshlq_m(uint16x8_t, uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_u32))) uint32x4_t __arm_vshlq_m_u32(uint32x4_t, uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_u32))) uint32x4_t __arm_vshlq_m(uint32x4_t, uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_u8))) uint8x16_t __arm_vshlq_m_u8(uint8x16_t, uint8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_u8))) uint8x16_t __arm_vshlq_m(uint8x16_t, uint8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_n_s16))) int16x8_t __arm_vshlq_n_s16(int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_n_s16))) int16x8_t __arm_vshlq_n(int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_n_s32))) int32x4_t __arm_vshlq_n_s32(int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_n_s32))) int32x4_t __arm_vshlq_n(int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_n_s8))) int8x16_t __arm_vshlq_n_s8(int8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_n_s8))) int8x16_t __arm_vshlq_n(int8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_n_u16))) uint16x8_t __arm_vshlq_n_u16(uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_n_u16))) uint16x8_t __arm_vshlq_n(uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_n_u32))) uint32x4_t __arm_vshlq_n_u32(uint32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_n_u32))) uint32x4_t __arm_vshlq_n(uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_n_u8))) uint8x16_t __arm_vshlq_n_u8(uint8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_n_u8))) uint8x16_t __arm_vshlq_n(uint8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_r_s16))) int16x8_t __arm_vshlq_r_s16(int16x8_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_r_s16))) int16x8_t __arm_vshlq_r(int16x8_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_r_s32))) int32x4_t __arm_vshlq_r_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_r_s32))) int32x4_t __arm_vshlq_r(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_r_s8))) int8x16_t __arm_vshlq_r_s8(int8x16_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_r_s8))) int8x16_t __arm_vshlq_r(int8x16_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_r_u16))) uint16x8_t __arm_vshlq_r_u16(uint16x8_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_r_u16))) uint16x8_t __arm_vshlq_r(uint16x8_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_r_u32))) uint32x4_t __arm_vshlq_r_u32(uint32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_r_u32))) uint32x4_t __arm_vshlq_r(uint32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_r_u8))) uint8x16_t __arm_vshlq_r_u8(uint8x16_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_r_u8))) uint8x16_t __arm_vshlq_r(uint8x16_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_s16))) int16x8_t __arm_vshlq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_s16))) int16x8_t __arm_vshlq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_s32))) int32x4_t __arm_vshlq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_s32))) int32x4_t __arm_vshlq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_s8))) int8x16_t __arm_vshlq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_s8))) int8x16_t __arm_vshlq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_u16))) uint16x8_t __arm_vshlq_u16(uint16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_u16))) uint16x8_t __arm_vshlq(uint16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_u32))) uint32x4_t __arm_vshlq_u32(uint32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_u32))) uint32x4_t __arm_vshlq(uint32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_u8))) uint8x16_t __arm_vshlq_u8(uint8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_u8))) uint8x16_t __arm_vshlq(uint8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_n_s16))) int16x8_t __arm_vshlq_x_n_s16(int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_n_s16))) int16x8_t __arm_vshlq_x_n(int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_n_s32))) int32x4_t __arm_vshlq_x_n_s32(int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_n_s32))) int32x4_t __arm_vshlq_x_n(int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_n_s8))) int8x16_t __arm_vshlq_x_n_s8(int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_n_s8))) int8x16_t __arm_vshlq_x_n(int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_n_u16))) uint16x8_t __arm_vshlq_x_n_u16(uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_n_u16))) uint16x8_t __arm_vshlq_x_n(uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_n_u32))) uint32x4_t __arm_vshlq_x_n_u32(uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_n_u32))) uint32x4_t __arm_vshlq_x_n(uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_n_u8))) uint8x16_t __arm_vshlq_x_n_u8(uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_n_u8))) uint8x16_t __arm_vshlq_x_n(uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_s16))) int16x8_t __arm_vshlq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_s16))) int16x8_t __arm_vshlq_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_s32))) int32x4_t __arm_vshlq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_s32))) int32x4_t __arm_vshlq_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_s8))) int8x16_t __arm_vshlq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_s8))) int8x16_t __arm_vshlq_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_u16))) uint16x8_t __arm_vshlq_x_u16(uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_u16))) uint16x8_t __arm_vshlq_x(uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_u32))) uint32x4_t __arm_vshlq_x_u32(uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_u32))) uint32x4_t __arm_vshlq_x(uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_u8))) uint8x16_t __arm_vshlq_x_u8(uint8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_u8))) uint8x16_t __arm_vshlq_x(uint8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrnbq_m_n_s16))) int8x16_t __arm_vshrnbq_m_n_s16(int8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrnbq_m_n_s16))) int8x16_t __arm_vshrnbq_m(int8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrnbq_m_n_s32))) int16x8_t __arm_vshrnbq_m_n_s32(int16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrnbq_m_n_s32))) int16x8_t __arm_vshrnbq_m(int16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrnbq_m_n_u16))) uint8x16_t __arm_vshrnbq_m_n_u16(uint8x16_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrnbq_m_n_u16))) uint8x16_t __arm_vshrnbq_m(uint8x16_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrnbq_m_n_u32))) uint16x8_t __arm_vshrnbq_m_n_u32(uint16x8_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrnbq_m_n_u32))) uint16x8_t __arm_vshrnbq_m(uint16x8_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrnbq_n_s16))) int8x16_t __arm_vshrnbq_n_s16(int8x16_t, int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrnbq_n_s16))) int8x16_t __arm_vshrnbq(int8x16_t, int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrnbq_n_s32))) int16x8_t __arm_vshrnbq_n_s32(int16x8_t, int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrnbq_n_s32))) int16x8_t __arm_vshrnbq(int16x8_t, int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrnbq_n_u16))) uint8x16_t __arm_vshrnbq_n_u16(uint8x16_t, uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrnbq_n_u16))) uint8x16_t __arm_vshrnbq(uint8x16_t, uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrnbq_n_u32))) uint16x8_t __arm_vshrnbq_n_u32(uint16x8_t, uint32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrnbq_n_u32))) uint16x8_t __arm_vshrnbq(uint16x8_t, uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrntq_m_n_s16))) int8x16_t __arm_vshrntq_m_n_s16(int8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrntq_m_n_s16))) int8x16_t __arm_vshrntq_m(int8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrntq_m_n_s32))) int16x8_t __arm_vshrntq_m_n_s32(int16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrntq_m_n_s32))) int16x8_t __arm_vshrntq_m(int16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrntq_m_n_u16))) uint8x16_t __arm_vshrntq_m_n_u16(uint8x16_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrntq_m_n_u16))) uint8x16_t __arm_vshrntq_m(uint8x16_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrntq_m_n_u32))) uint16x8_t __arm_vshrntq_m_n_u32(uint16x8_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrntq_m_n_u32))) uint16x8_t __arm_vshrntq_m(uint16x8_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrntq_n_s16))) int8x16_t __arm_vshrntq_n_s16(int8x16_t, int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrntq_n_s16))) int8x16_t __arm_vshrntq(int8x16_t, int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrntq_n_s32))) int16x8_t __arm_vshrntq_n_s32(int16x8_t, int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrntq_n_s32))) int16x8_t __arm_vshrntq(int16x8_t, int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrntq_n_u16))) uint8x16_t __arm_vshrntq_n_u16(uint8x16_t, uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrntq_n_u16))) uint8x16_t __arm_vshrntq(uint8x16_t, uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrntq_n_u32))) uint16x8_t __arm_vshrntq_n_u32(uint16x8_t, uint32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrntq_n_u32))) uint16x8_t __arm_vshrntq(uint16x8_t, uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrq_m_n_s16))) int16x8_t __arm_vshrq_m_n_s16(int16x8_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrq_m_n_s16))) int16x8_t __arm_vshrq_m(int16x8_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrq_m_n_s32))) int32x4_t __arm_vshrq_m_n_s32(int32x4_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrq_m_n_s32))) int32x4_t __arm_vshrq_m(int32x4_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrq_m_n_s8))) int8x16_t __arm_vshrq_m_n_s8(int8x16_t, int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrq_m_n_s8))) int8x16_t __arm_vshrq_m(int8x16_t, int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrq_m_n_u16))) uint16x8_t __arm_vshrq_m_n_u16(uint16x8_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrq_m_n_u16))) uint16x8_t __arm_vshrq_m(uint16x8_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrq_m_n_u32))) uint32x4_t __arm_vshrq_m_n_u32(uint32x4_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrq_m_n_u32))) uint32x4_t __arm_vshrq_m(uint32x4_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrq_m_n_u8))) uint8x16_t __arm_vshrq_m_n_u8(uint8x16_t, uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrq_m_n_u8))) uint8x16_t __arm_vshrq_m(uint8x16_t, uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrq_n_s16))) int16x8_t __arm_vshrq_n_s16(int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrq_n_s16))) int16x8_t __arm_vshrq(int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrq_n_s32))) int32x4_t __arm_vshrq_n_s32(int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrq_n_s32))) int32x4_t __arm_vshrq(int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrq_n_s8))) int8x16_t __arm_vshrq_n_s8(int8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrq_n_s8))) int8x16_t __arm_vshrq(int8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrq_n_u16))) uint16x8_t __arm_vshrq_n_u16(uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrq_n_u16))) uint16x8_t __arm_vshrq(uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrq_n_u32))) uint32x4_t __arm_vshrq_n_u32(uint32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrq_n_u32))) uint32x4_t __arm_vshrq(uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrq_n_u8))) uint8x16_t __arm_vshrq_n_u8(uint8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrq_n_u8))) uint8x16_t __arm_vshrq(uint8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrq_x_n_s16))) int16x8_t __arm_vshrq_x_n_s16(int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrq_x_n_s16))) int16x8_t __arm_vshrq_x(int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrq_x_n_s32))) int32x4_t __arm_vshrq_x_n_s32(int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrq_x_n_s32))) int32x4_t __arm_vshrq_x(int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrq_x_n_s8))) int8x16_t __arm_vshrq_x_n_s8(int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrq_x_n_s8))) int8x16_t __arm_vshrq_x(int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrq_x_n_u16))) uint16x8_t __arm_vshrq_x_n_u16(uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrq_x_n_u16))) uint16x8_t __arm_vshrq_x(uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrq_x_n_u32))) uint32x4_t __arm_vshrq_x_n_u32(uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrq_x_n_u32))) uint32x4_t __arm_vshrq_x(uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrq_x_n_u8))) uint8x16_t __arm_vshrq_x_n_u8(uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrq_x_n_u8))) uint8x16_t __arm_vshrq_x(uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsliq_m_n_s16))) int16x8_t __arm_vsliq_m_n_s16(int16x8_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsliq_m_n_s16))) int16x8_t __arm_vsliq_m(int16x8_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsliq_m_n_s32))) int32x4_t __arm_vsliq_m_n_s32(int32x4_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsliq_m_n_s32))) int32x4_t __arm_vsliq_m(int32x4_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsliq_m_n_s8))) int8x16_t __arm_vsliq_m_n_s8(int8x16_t, int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsliq_m_n_s8))) int8x16_t __arm_vsliq_m(int8x16_t, int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsliq_m_n_u16))) uint16x8_t __arm_vsliq_m_n_u16(uint16x8_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsliq_m_n_u16))) uint16x8_t __arm_vsliq_m(uint16x8_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsliq_m_n_u32))) uint32x4_t __arm_vsliq_m_n_u32(uint32x4_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsliq_m_n_u32))) uint32x4_t __arm_vsliq_m(uint32x4_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsliq_m_n_u8))) uint8x16_t __arm_vsliq_m_n_u8(uint8x16_t, uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsliq_m_n_u8))) uint8x16_t __arm_vsliq_m(uint8x16_t, uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsliq_n_s16))) int16x8_t __arm_vsliq_n_s16(int16x8_t, int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsliq_n_s16))) int16x8_t __arm_vsliq(int16x8_t, int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsliq_n_s32))) int32x4_t __arm_vsliq_n_s32(int32x4_t, int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsliq_n_s32))) int32x4_t __arm_vsliq(int32x4_t, int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsliq_n_s8))) int8x16_t __arm_vsliq_n_s8(int8x16_t, int8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsliq_n_s8))) int8x16_t __arm_vsliq(int8x16_t, int8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsliq_n_u16))) uint16x8_t __arm_vsliq_n_u16(uint16x8_t, uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsliq_n_u16))) uint16x8_t __arm_vsliq(uint16x8_t, uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsliq_n_u32))) uint32x4_t __arm_vsliq_n_u32(uint32x4_t, uint32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsliq_n_u32))) uint32x4_t __arm_vsliq(uint32x4_t, uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsliq_n_u8))) uint8x16_t __arm_vsliq_n_u8(uint8x16_t, uint8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsliq_n_u8))) uint8x16_t __arm_vsliq(uint8x16_t, uint8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsriq_m_n_s16))) int16x8_t __arm_vsriq_m_n_s16(int16x8_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsriq_m_n_s16))) int16x8_t __arm_vsriq_m(int16x8_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsriq_m_n_s32))) int32x4_t __arm_vsriq_m_n_s32(int32x4_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsriq_m_n_s32))) int32x4_t __arm_vsriq_m(int32x4_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsriq_m_n_s8))) int8x16_t __arm_vsriq_m_n_s8(int8x16_t, int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsriq_m_n_s8))) int8x16_t __arm_vsriq_m(int8x16_t, int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsriq_m_n_u16))) uint16x8_t __arm_vsriq_m_n_u16(uint16x8_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsriq_m_n_u16))) uint16x8_t __arm_vsriq_m(uint16x8_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsriq_m_n_u32))) uint32x4_t __arm_vsriq_m_n_u32(uint32x4_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsriq_m_n_u32))) uint32x4_t __arm_vsriq_m(uint32x4_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsriq_m_n_u8))) uint8x16_t __arm_vsriq_m_n_u8(uint8x16_t, uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsriq_m_n_u8))) uint8x16_t __arm_vsriq_m(uint8x16_t, uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsriq_n_s16))) int16x8_t __arm_vsriq_n_s16(int16x8_t, int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsriq_n_s16))) int16x8_t __arm_vsriq(int16x8_t, int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsriq_n_s32))) int32x4_t __arm_vsriq_n_s32(int32x4_t, int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsriq_n_s32))) int32x4_t __arm_vsriq(int32x4_t, int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsriq_n_s8))) int8x16_t __arm_vsriq_n_s8(int8x16_t, int8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsriq_n_s8))) int8x16_t __arm_vsriq(int8x16_t, int8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsriq_n_u16))) uint16x8_t __arm_vsriq_n_u16(uint16x8_t, uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsriq_n_u16))) uint16x8_t __arm_vsriq(uint16x8_t, uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsriq_n_u32))) uint32x4_t __arm_vsriq_n_u32(uint32x4_t, uint32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsriq_n_u32))) uint32x4_t __arm_vsriq(uint32x4_t, uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsriq_n_u8))) uint8x16_t __arm_vsriq_n_u8(uint8x16_t, uint8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsriq_n_u8))) uint8x16_t __arm_vsriq(uint8x16_t, uint8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst1q_p_s16))) void __arm_vst1q_p_s16(int16_t *, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst1q_p_s16))) void __arm_vst1q_p(int16_t *, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst1q_p_s32))) void __arm_vst1q_p_s32(int32_t *, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst1q_p_s32))) void __arm_vst1q_p(int32_t *, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst1q_p_s8))) void __arm_vst1q_p_s8(int8_t *, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst1q_p_s8))) void __arm_vst1q_p(int8_t *, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst1q_p_u16))) void __arm_vst1q_p_u16(uint16_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst1q_p_u16))) void __arm_vst1q_p(uint16_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst1q_p_u32))) void __arm_vst1q_p_u32(uint32_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst1q_p_u32))) void __arm_vst1q_p(uint32_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst1q_p_u8))) void __arm_vst1q_p_u8(uint8_t *, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst1q_p_u8))) void __arm_vst1q_p(uint8_t *, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst1q_s16))) void __arm_vst1q_s16(int16_t *, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst1q_s16))) void __arm_vst1q(int16_t *, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst1q_s32))) void __arm_vst1q_s32(int32_t *, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst1q_s32))) void __arm_vst1q(int32_t *, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst1q_s8))) void __arm_vst1q_s8(int8_t *, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst1q_s8))) void __arm_vst1q(int8_t *, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst1q_u16))) void __arm_vst1q_u16(uint16_t *, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst1q_u16))) void __arm_vst1q(uint16_t *, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst1q_u32))) void __arm_vst1q_u32(uint32_t *, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst1q_u32))) void __arm_vst1q(uint32_t *, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst1q_u8))) void __arm_vst1q_u8(uint8_t *, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst1q_u8))) void __arm_vst1q(uint8_t *, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst2q_s16))) void __arm_vst2q_s16(int16_t *, int16x8x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst2q_s16))) void __arm_vst2q(int16_t *, int16x8x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst2q_s32))) void __arm_vst2q_s32(int32_t *, int32x4x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst2q_s32))) void __arm_vst2q(int32_t *, int32x4x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst2q_s8))) void __arm_vst2q_s8(int8_t *, int8x16x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst2q_s8))) void __arm_vst2q(int8_t *, int8x16x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst2q_u16))) void __arm_vst2q_u16(uint16_t *, uint16x8x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst2q_u16))) void __arm_vst2q(uint16_t *, uint16x8x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst2q_u32))) void __arm_vst2q_u32(uint32_t *, uint32x4x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst2q_u32))) void __arm_vst2q(uint32_t *, uint32x4x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst2q_u8))) void __arm_vst2q_u8(uint8_t *, uint8x16x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst2q_u8))) void __arm_vst2q(uint8_t *, uint8x16x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst4q_s16))) void __arm_vst4q_s16(int16_t *, int16x8x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst4q_s16))) void __arm_vst4q(int16_t *, int16x8x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst4q_s32))) void __arm_vst4q_s32(int32_t *, int32x4x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst4q_s32))) void __arm_vst4q(int32_t *, int32x4x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst4q_s8))) void __arm_vst4q_s8(int8_t *, int8x16x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst4q_s8))) void __arm_vst4q(int8_t *, int8x16x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst4q_u16))) void __arm_vst4q_u16(uint16_t *, uint16x8x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst4q_u16))) void __arm_vst4q(uint16_t *, uint16x8x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst4q_u32))) void __arm_vst4q_u32(uint32_t *, uint32x4x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst4q_u32))) void __arm_vst4q(uint32_t *, uint32x4x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst4q_u8))) void __arm_vst4q_u8(uint8_t *, uint8x16x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst4q_u8))) void __arm_vst4q(uint8_t *, uint8x16x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_p_s16))) void __arm_vstrbq_p_s16(int8_t *, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_p_s16))) void __arm_vstrbq_p(int8_t *, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_p_s32))) void __arm_vstrbq_p_s32(int8_t *, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_p_s32))) void __arm_vstrbq_p(int8_t *, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_p_s8))) void __arm_vstrbq_p_s8(int8_t *, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_p_s8))) void __arm_vstrbq_p(int8_t *, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_p_u16))) void __arm_vstrbq_p_u16(uint8_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_p_u16))) void __arm_vstrbq_p(uint8_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_p_u32))) void __arm_vstrbq_p_u32(uint8_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_p_u32))) void __arm_vstrbq_p(uint8_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_p_u8))) void __arm_vstrbq_p_u8(uint8_t *, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_p_u8))) void __arm_vstrbq_p(uint8_t *, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_s16))) void __arm_vstrbq_s16(int8_t *, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_s16))) void __arm_vstrbq(int8_t *, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_s32))) void __arm_vstrbq_s32(int8_t *, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_s32))) void __arm_vstrbq(int8_t *, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_s8))) void __arm_vstrbq_s8(int8_t *, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_s8))) void __arm_vstrbq(int8_t *, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_p_s16))) void __arm_vstrbq_scatter_offset_p_s16(int8_t *, uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_p_s16))) void __arm_vstrbq_scatter_offset_p(int8_t *, uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_p_s32))) void __arm_vstrbq_scatter_offset_p_s32(int8_t *, uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_p_s32))) void __arm_vstrbq_scatter_offset_p(int8_t *, uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_p_s8))) void __arm_vstrbq_scatter_offset_p_s8(int8_t *, uint8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_p_s8))) void __arm_vstrbq_scatter_offset_p(int8_t *, uint8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_p_u16))) void __arm_vstrbq_scatter_offset_p_u16(uint8_t *, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_p_u16))) void __arm_vstrbq_scatter_offset_p(uint8_t *, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_p_u32))) void __arm_vstrbq_scatter_offset_p_u32(uint8_t *, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_p_u32))) void __arm_vstrbq_scatter_offset_p(uint8_t *, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_p_u8))) void __arm_vstrbq_scatter_offset_p_u8(uint8_t *, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_p_u8))) void __arm_vstrbq_scatter_offset_p(uint8_t *, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_s16))) void __arm_vstrbq_scatter_offset_s16(int8_t *, uint16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_s16))) void __arm_vstrbq_scatter_offset(int8_t *, uint16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_s32))) void __arm_vstrbq_scatter_offset_s32(int8_t *, uint32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_s32))) void __arm_vstrbq_scatter_offset(int8_t *, uint32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_s8))) void __arm_vstrbq_scatter_offset_s8(int8_t *, uint8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_s8))) void __arm_vstrbq_scatter_offset(int8_t *, uint8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_u16))) void __arm_vstrbq_scatter_offset_u16(uint8_t *, uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_u16))) void __arm_vstrbq_scatter_offset(uint8_t *, uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_u32))) void __arm_vstrbq_scatter_offset_u32(uint8_t *, uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_u32))) void __arm_vstrbq_scatter_offset(uint8_t *, uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_u8))) void __arm_vstrbq_scatter_offset_u8(uint8_t *, uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_u8))) void __arm_vstrbq_scatter_offset(uint8_t *, uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_u16))) void __arm_vstrbq_u16(uint8_t *, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_u16))) void __arm_vstrbq(uint8_t *, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_u32))) void __arm_vstrbq_u32(uint8_t *, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_u32))) void __arm_vstrbq(uint8_t *, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_u8))) void __arm_vstrbq_u8(uint8_t *, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_u8))) void __arm_vstrbq(uint8_t *, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_base_p_s64))) void __arm_vstrdq_scatter_base_p_s64(uint64x2_t, int, int64x2_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_base_p_s64))) void __arm_vstrdq_scatter_base_p(uint64x2_t, int, int64x2_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_base_p_u64))) void __arm_vstrdq_scatter_base_p_u64(uint64x2_t, int, uint64x2_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_base_p_u64))) void __arm_vstrdq_scatter_base_p(uint64x2_t, int, uint64x2_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_base_s64))) void __arm_vstrdq_scatter_base_s64(uint64x2_t, int, int64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_base_s64))) void __arm_vstrdq_scatter_base(uint64x2_t, int, int64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_base_u64))) void __arm_vstrdq_scatter_base_u64(uint64x2_t, int, uint64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_base_u64))) void __arm_vstrdq_scatter_base(uint64x2_t, int, uint64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_base_wb_p_s64))) void __arm_vstrdq_scatter_base_wb_p_s64(uint64x2_t *, int, int64x2_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_base_wb_p_s64))) void __arm_vstrdq_scatter_base_wb_p(uint64x2_t *, int, int64x2_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_base_wb_p_u64))) void __arm_vstrdq_scatter_base_wb_p_u64(uint64x2_t *, int, uint64x2_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_base_wb_p_u64))) void __arm_vstrdq_scatter_base_wb_p(uint64x2_t *, int, uint64x2_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_base_wb_s64))) void __arm_vstrdq_scatter_base_wb_s64(uint64x2_t *, int, int64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_base_wb_s64))) void __arm_vstrdq_scatter_base_wb(uint64x2_t *, int, int64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_base_wb_u64))) void __arm_vstrdq_scatter_base_wb_u64(uint64x2_t *, int, uint64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_base_wb_u64))) void __arm_vstrdq_scatter_base_wb(uint64x2_t *, int, uint64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_offset_p_s64))) void __arm_vstrdq_scatter_offset_p_s64(int64_t *, uint64x2_t, int64x2_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_offset_p_s64))) void __arm_vstrdq_scatter_offset_p(int64_t *, uint64x2_t, int64x2_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_offset_p_u64))) void __arm_vstrdq_scatter_offset_p_u64(uint64_t *, uint64x2_t, uint64x2_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_offset_p_u64))) void __arm_vstrdq_scatter_offset_p(uint64_t *, uint64x2_t, uint64x2_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_offset_s64))) void __arm_vstrdq_scatter_offset_s64(int64_t *, uint64x2_t, int64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_offset_s64))) void __arm_vstrdq_scatter_offset(int64_t *, uint64x2_t, int64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_offset_u64))) void __arm_vstrdq_scatter_offset_u64(uint64_t *, uint64x2_t, uint64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_offset_u64))) void __arm_vstrdq_scatter_offset(uint64_t *, uint64x2_t, uint64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_shifted_offset_p_s64))) void __arm_vstrdq_scatter_shifted_offset_p_s64(int64_t *, uint64x2_t, int64x2_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_shifted_offset_p_s64))) void __arm_vstrdq_scatter_shifted_offset_p(int64_t *, uint64x2_t, int64x2_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_shifted_offset_p_u64))) void __arm_vstrdq_scatter_shifted_offset_p_u64(uint64_t *, uint64x2_t, uint64x2_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_shifted_offset_p_u64))) void __arm_vstrdq_scatter_shifted_offset_p(uint64_t *, uint64x2_t, uint64x2_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_shifted_offset_s64))) void __arm_vstrdq_scatter_shifted_offset_s64(int64_t *, uint64x2_t, int64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_shifted_offset_s64))) void __arm_vstrdq_scatter_shifted_offset(int64_t *, uint64x2_t, int64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_shifted_offset_u64))) void __arm_vstrdq_scatter_shifted_offset_u64(uint64_t *, uint64x2_t, uint64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_shifted_offset_u64))) void __arm_vstrdq_scatter_shifted_offset(uint64_t *, uint64x2_t, uint64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_p_s16))) void __arm_vstrhq_p_s16(int16_t *, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_p_s16))) void __arm_vstrhq_p(int16_t *, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_p_s32))) void __arm_vstrhq_p_s32(int16_t *, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_p_s32))) void __arm_vstrhq_p(int16_t *, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_p_u16))) void __arm_vstrhq_p_u16(uint16_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_p_u16))) void __arm_vstrhq_p(uint16_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_p_u32))) void __arm_vstrhq_p_u32(uint16_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_p_u32))) void __arm_vstrhq_p(uint16_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_s16))) void __arm_vstrhq_s16(int16_t *, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_s16))) void __arm_vstrhq(int16_t *, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_s32))) void __arm_vstrhq_s32(int16_t *, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_s32))) void __arm_vstrhq(int16_t *, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_p_s16))) void __arm_vstrhq_scatter_offset_p_s16(int16_t *, uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_p_s16))) void __arm_vstrhq_scatter_offset_p(int16_t *, uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_p_s32))) void __arm_vstrhq_scatter_offset_p_s32(int16_t *, uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_p_s32))) void __arm_vstrhq_scatter_offset_p(int16_t *, uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_p_u16))) void __arm_vstrhq_scatter_offset_p_u16(uint16_t *, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_p_u16))) void __arm_vstrhq_scatter_offset_p(uint16_t *, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_p_u32))) void __arm_vstrhq_scatter_offset_p_u32(uint16_t *, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_p_u32))) void __arm_vstrhq_scatter_offset_p(uint16_t *, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_s16))) void __arm_vstrhq_scatter_offset_s16(int16_t *, uint16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_s16))) void __arm_vstrhq_scatter_offset(int16_t *, uint16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_s32))) void __arm_vstrhq_scatter_offset_s32(int16_t *, uint32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_s32))) void __arm_vstrhq_scatter_offset(int16_t *, uint32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_u16))) void __arm_vstrhq_scatter_offset_u16(uint16_t *, uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_u16))) void __arm_vstrhq_scatter_offset(uint16_t *, uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_u32))) void __arm_vstrhq_scatter_offset_u32(uint16_t *, uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_u32))) void __arm_vstrhq_scatter_offset(uint16_t *, uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_p_s16))) void __arm_vstrhq_scatter_shifted_offset_p_s16(int16_t *, uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_p_s16))) void __arm_vstrhq_scatter_shifted_offset_p(int16_t *, uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_p_s32))) void __arm_vstrhq_scatter_shifted_offset_p_s32(int16_t *, uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_p_s32))) void __arm_vstrhq_scatter_shifted_offset_p(int16_t *, uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_p_u16))) void __arm_vstrhq_scatter_shifted_offset_p_u16(uint16_t *, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_p_u16))) void __arm_vstrhq_scatter_shifted_offset_p(uint16_t *, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_p_u32))) void __arm_vstrhq_scatter_shifted_offset_p_u32(uint16_t *, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_p_u32))) void __arm_vstrhq_scatter_shifted_offset_p(uint16_t *, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_s16))) void __arm_vstrhq_scatter_shifted_offset_s16(int16_t *, uint16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_s16))) void __arm_vstrhq_scatter_shifted_offset(int16_t *, uint16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_s32))) void __arm_vstrhq_scatter_shifted_offset_s32(int16_t *, uint32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_s32))) void __arm_vstrhq_scatter_shifted_offset(int16_t *, uint32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_u16))) void __arm_vstrhq_scatter_shifted_offset_u16(uint16_t *, uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_u16))) void __arm_vstrhq_scatter_shifted_offset(uint16_t *, uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_u32))) void __arm_vstrhq_scatter_shifted_offset_u32(uint16_t *, uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_u32))) void __arm_vstrhq_scatter_shifted_offset(uint16_t *, uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_u16))) void __arm_vstrhq_u16(uint16_t *, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_u16))) void __arm_vstrhq(uint16_t *, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_u32))) void __arm_vstrhq_u32(uint16_t *, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_u32))) void __arm_vstrhq(uint16_t *, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_p_s32))) void __arm_vstrwq_p_s32(int32_t *, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_p_s32))) void __arm_vstrwq_p(int32_t *, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_p_u32))) void __arm_vstrwq_p_u32(uint32_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_p_u32))) void __arm_vstrwq_p(uint32_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_s32))) void __arm_vstrwq_s32(int32_t *, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_s32))) void __arm_vstrwq(int32_t *, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_p_s32))) void __arm_vstrwq_scatter_base_p_s32(uint32x4_t, int, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_p_s32))) void __arm_vstrwq_scatter_base_p(uint32x4_t, int, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_p_u32))) void __arm_vstrwq_scatter_base_p_u32(uint32x4_t, int, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_p_u32))) void __arm_vstrwq_scatter_base_p(uint32x4_t, int, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_s32))) void __arm_vstrwq_scatter_base_s32(uint32x4_t, int, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_s32))) void __arm_vstrwq_scatter_base(uint32x4_t, int, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_u32))) void __arm_vstrwq_scatter_base_u32(uint32x4_t, int, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_u32))) void __arm_vstrwq_scatter_base(uint32x4_t, int, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_wb_p_s32))) void __arm_vstrwq_scatter_base_wb_p_s32(uint32x4_t *, int, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_wb_p_s32))) void __arm_vstrwq_scatter_base_wb_p(uint32x4_t *, int, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_wb_p_u32))) void __arm_vstrwq_scatter_base_wb_p_u32(uint32x4_t *, int, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_wb_p_u32))) void __arm_vstrwq_scatter_base_wb_p(uint32x4_t *, int, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_wb_s32))) void __arm_vstrwq_scatter_base_wb_s32(uint32x4_t *, int, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_wb_s32))) void __arm_vstrwq_scatter_base_wb(uint32x4_t *, int, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_wb_u32))) void __arm_vstrwq_scatter_base_wb_u32(uint32x4_t *, int, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_wb_u32))) void __arm_vstrwq_scatter_base_wb(uint32x4_t *, int, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_offset_p_s32))) void __arm_vstrwq_scatter_offset_p_s32(int32_t *, uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_offset_p_s32))) void __arm_vstrwq_scatter_offset_p(int32_t *, uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_offset_p_u32))) void __arm_vstrwq_scatter_offset_p_u32(uint32_t *, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_offset_p_u32))) void __arm_vstrwq_scatter_offset_p(uint32_t *, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_offset_s32))) void __arm_vstrwq_scatter_offset_s32(int32_t *, uint32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_offset_s32))) void __arm_vstrwq_scatter_offset(int32_t *, uint32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_offset_u32))) void __arm_vstrwq_scatter_offset_u32(uint32_t *, uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_offset_u32))) void __arm_vstrwq_scatter_offset(uint32_t *, uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_shifted_offset_p_s32))) void __arm_vstrwq_scatter_shifted_offset_p_s32(int32_t *, uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_shifted_offset_p_s32))) void __arm_vstrwq_scatter_shifted_offset_p(int32_t *, uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_shifted_offset_p_u32))) void __arm_vstrwq_scatter_shifted_offset_p_u32(uint32_t *, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_shifted_offset_p_u32))) void __arm_vstrwq_scatter_shifted_offset_p(uint32_t *, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_shifted_offset_s32))) void __arm_vstrwq_scatter_shifted_offset_s32(int32_t *, uint32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_shifted_offset_s32))) void __arm_vstrwq_scatter_shifted_offset(int32_t *, uint32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_shifted_offset_u32))) void __arm_vstrwq_scatter_shifted_offset_u32(uint32_t *, uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_shifted_offset_u32))) void __arm_vstrwq_scatter_shifted_offset(uint32_t *, uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_u32))) void __arm_vstrwq_u32(uint32_t *, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_u32))) void __arm_vstrwq(uint32_t *, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_n_s16))) int16x8_t __arm_vsubq_m_n_s16(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_n_s16))) int16x8_t __arm_vsubq_m(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_n_s32))) int32x4_t __arm_vsubq_m_n_s32(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_n_s32))) int32x4_t __arm_vsubq_m(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_n_s8))) int8x16_t __arm_vsubq_m_n_s8(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_n_s8))) int8x16_t __arm_vsubq_m(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_n_u16))) uint16x8_t __arm_vsubq_m_n_u16(uint16x8_t, uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_n_u16))) uint16x8_t __arm_vsubq_m(uint16x8_t, uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_n_u32))) uint32x4_t __arm_vsubq_m_n_u32(uint32x4_t, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_n_u32))) uint32x4_t __arm_vsubq_m(uint32x4_t, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_n_u8))) uint8x16_t __arm_vsubq_m_n_u8(uint8x16_t, uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_n_u8))) uint8x16_t __arm_vsubq_m(uint8x16_t, uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_s16))) int16x8_t __arm_vsubq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_s16))) int16x8_t __arm_vsubq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_s32))) int32x4_t __arm_vsubq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_s32))) int32x4_t __arm_vsubq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_s8))) int8x16_t __arm_vsubq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_s8))) int8x16_t __arm_vsubq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_u16))) uint16x8_t __arm_vsubq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_u16))) uint16x8_t __arm_vsubq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_u32))) uint32x4_t __arm_vsubq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_u32))) uint32x4_t __arm_vsubq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_u8))) uint8x16_t __arm_vsubq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_u8))) uint8x16_t __arm_vsubq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_n_s16))) int16x8_t __arm_vsubq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_n_s16))) int16x8_t __arm_vsubq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_n_s32))) int32x4_t __arm_vsubq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_n_s32))) int32x4_t __arm_vsubq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_n_s8))) int8x16_t __arm_vsubq_n_s8(int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_n_s8))) int8x16_t __arm_vsubq(int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_n_u16))) uint16x8_t __arm_vsubq_n_u16(uint16x8_t, uint16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_n_u16))) uint16x8_t __arm_vsubq(uint16x8_t, uint16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_n_u32))) uint32x4_t __arm_vsubq_n_u32(uint32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_n_u32))) uint32x4_t __arm_vsubq(uint32x4_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_n_u8))) uint8x16_t __arm_vsubq_n_u8(uint8x16_t, uint8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_n_u8))) uint8x16_t __arm_vsubq(uint8x16_t, uint8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_s16))) int16x8_t __arm_vsubq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_s16))) int16x8_t __arm_vsubq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_s32))) int32x4_t __arm_vsubq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_s32))) int32x4_t __arm_vsubq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_s8))) int8x16_t __arm_vsubq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_s8))) int8x16_t __arm_vsubq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_u16))) uint16x8_t __arm_vsubq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_u16))) uint16x8_t __arm_vsubq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_u32))) uint32x4_t __arm_vsubq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_u32))) uint32x4_t __arm_vsubq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_u8))) uint8x16_t __arm_vsubq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_u8))) uint8x16_t __arm_vsubq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_n_s16))) int16x8_t __arm_vsubq_x_n_s16(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_n_s16))) int16x8_t __arm_vsubq_x(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_n_s32))) int32x4_t __arm_vsubq_x_n_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_n_s32))) int32x4_t __arm_vsubq_x(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_n_s8))) int8x16_t __arm_vsubq_x_n_s8(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_n_s8))) int8x16_t __arm_vsubq_x(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_n_u16))) uint16x8_t __arm_vsubq_x_n_u16(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_n_u16))) uint16x8_t __arm_vsubq_x(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_n_u32))) uint32x4_t __arm_vsubq_x_n_u32(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_n_u32))) uint32x4_t __arm_vsubq_x(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_n_u8))) uint8x16_t __arm_vsubq_x_n_u8(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_n_u8))) uint8x16_t __arm_vsubq_x(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_s16))) int16x8_t __arm_vsubq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_s16))) int16x8_t __arm_vsubq_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_s32))) int32x4_t __arm_vsubq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_s32))) int32x4_t __arm_vsubq_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_s8))) int8x16_t __arm_vsubq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_s8))) int8x16_t __arm_vsubq_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_u16))) uint16x8_t __arm_vsubq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_u16))) uint16x8_t __arm_vsubq_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_u32))) uint32x4_t __arm_vsubq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_u32))) uint32x4_t __arm_vsubq_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_u8))) uint8x16_t __arm_vsubq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_u8))) uint8x16_t __arm_vsubq_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_polymorphic_s16))) int16x8_t __arm_vuninitializedq(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_polymorphic_s32))) int32x4_t __arm_vuninitializedq(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_polymorphic_s64))) int64x2_t __arm_vuninitializedq(int64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_polymorphic_s8))) int8x16_t __arm_vuninitializedq(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_polymorphic_u16))) uint16x8_t __arm_vuninitializedq(uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_polymorphic_u32))) uint32x4_t __arm_vuninitializedq(uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_polymorphic_u64))) uint64x2_t __arm_vuninitializedq(uint64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_polymorphic_u8))) uint8x16_t __arm_vuninitializedq(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_s16))) int16x8_t __arm_vuninitializedq_s16(); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_s32))) int32x4_t __arm_vuninitializedq_s32(); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_s64))) int64x2_t __arm_vuninitializedq_s64(); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_s8))) int8x16_t __arm_vuninitializedq_s8(); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_u16))) uint16x8_t __arm_vuninitializedq_u16(); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_u32))) uint32x4_t __arm_vuninitializedq_u32(); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_u64))) uint64x2_t __arm_vuninitializedq_u64(); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_u8))) uint8x16_t __arm_vuninitializedq_u8(); #if (__ARM_FEATURE_MVE & 2) typedef __fp16 float16_t; typedef float float32_t; typedef __attribute__((__neon_vector_type__(8), __clang_arm_mve_strict_polymorphism)) float16_t float16x8_t; typedef struct { float16x8_t val[2]; } float16x8x2_t; typedef struct { float16x8_t val[4]; } float16x8x4_t; typedef __attribute__((__neon_vector_type__(4), __clang_arm_mve_strict_polymorphism)) float32_t float32x4_t; typedef struct { float32x4_t val[2]; } float32x4x2_t; typedef struct { float32x4_t val[4]; } float32x4x4_t; static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_f16))) float16x8_t __arm_vabdq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_f16))) float16x8_t __arm_vabdq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_f32))) float32x4_t __arm_vabdq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_f32))) float32x4_t __arm_vabdq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_m_f16))) float16x8_t __arm_vabdq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_m_f16))) float16x8_t __arm_vabdq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_m_f32))) float32x4_t __arm_vabdq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_m_f32))) float32x4_t __arm_vabdq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_x_f16))) float16x8_t __arm_vabdq_x_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_x_f16))) float16x8_t __arm_vabdq_x(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_x_f32))) float32x4_t __arm_vabdq_x_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_x_f32))) float32x4_t __arm_vabdq_x(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabsq_f16))) float16x8_t __arm_vabsq_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabsq_f16))) float16x8_t __arm_vabsq(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabsq_f32))) float32x4_t __arm_vabsq_f32(float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabsq_f32))) float32x4_t __arm_vabsq(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabsq_m_f16))) float16x8_t __arm_vabsq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabsq_m_f16))) float16x8_t __arm_vabsq_m(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabsq_m_f32))) float32x4_t __arm_vabsq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabsq_m_f32))) float32x4_t __arm_vabsq_m(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabsq_x_f16))) float16x8_t __arm_vabsq_x_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabsq_x_f16))) float16x8_t __arm_vabsq_x(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabsq_x_f32))) float32x4_t __arm_vabsq_x_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabsq_x_f32))) float32x4_t __arm_vabsq_x(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_f16))) float16x8_t __arm_vaddq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_f16))) float16x8_t __arm_vaddq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_f32))) float32x4_t __arm_vaddq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_f32))) float32x4_t __arm_vaddq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_f16))) float16x8_t __arm_vaddq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_f16))) float16x8_t __arm_vaddq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_f32))) float32x4_t __arm_vaddq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_f32))) float32x4_t __arm_vaddq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_n_f16))) float16x8_t __arm_vaddq_m_n_f16(float16x8_t, float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_n_f16))) float16x8_t __arm_vaddq_m(float16x8_t, float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_n_f32))) float32x4_t __arm_vaddq_m_n_f32(float32x4_t, float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_n_f32))) float32x4_t __arm_vaddq_m(float32x4_t, float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_n_f16))) float16x8_t __arm_vaddq_n_f16(float16x8_t, float16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_n_f16))) float16x8_t __arm_vaddq(float16x8_t, float16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_n_f32))) float32x4_t __arm_vaddq_n_f32(float32x4_t, float32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_n_f32))) float32x4_t __arm_vaddq(float32x4_t, float32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_f16))) float16x8_t __arm_vaddq_x_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_f16))) float16x8_t __arm_vaddq_x(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_f32))) float32x4_t __arm_vaddq_x_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_f32))) float32x4_t __arm_vaddq_x(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_n_f16))) float16x8_t __arm_vaddq_x_n_f16(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_n_f16))) float16x8_t __arm_vaddq_x(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_n_f32))) float32x4_t __arm_vaddq_x_n_f32(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_n_f32))) float32x4_t __arm_vaddq_x(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_f16))) float16x8_t __arm_vandq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_f16))) float16x8_t __arm_vandq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_f32))) float32x4_t __arm_vandq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_f32))) float32x4_t __arm_vandq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_m_f16))) float16x8_t __arm_vandq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_m_f16))) float16x8_t __arm_vandq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_m_f32))) float32x4_t __arm_vandq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_m_f32))) float32x4_t __arm_vandq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_x_f16))) float16x8_t __arm_vandq_x_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_x_f16))) float16x8_t __arm_vandq_x(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_x_f32))) float32x4_t __arm_vandq_x_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_x_f32))) float32x4_t __arm_vandq_x(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_f16))) float16x8_t __arm_vbicq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_f16))) float16x8_t __arm_vbicq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_f32))) float32x4_t __arm_vbicq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_f32))) float32x4_t __arm_vbicq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_f16))) float16x8_t __arm_vbicq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_f16))) float16x8_t __arm_vbicq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_f32))) float32x4_t __arm_vbicq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_f32))) float32x4_t __arm_vbicq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_x_f16))) float16x8_t __arm_vbicq_x_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_x_f16))) float16x8_t __arm_vbicq_x(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_x_f32))) float32x4_t __arm_vbicq_x_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_x_f32))) float32x4_t __arm_vbicq_x(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_m_n_f16))) float16x8_t __arm_vbrsrq_m_n_f16(float16x8_t, float16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_m_n_f16))) float16x8_t __arm_vbrsrq_m(float16x8_t, float16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_m_n_f32))) float32x4_t __arm_vbrsrq_m_n_f32(float32x4_t, float32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_m_n_f32))) float32x4_t __arm_vbrsrq_m(float32x4_t, float32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_n_f16))) float16x8_t __arm_vbrsrq_n_f16(float16x8_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_n_f16))) float16x8_t __arm_vbrsrq(float16x8_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_n_f32))) float32x4_t __arm_vbrsrq_n_f32(float32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_n_f32))) float32x4_t __arm_vbrsrq(float32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_x_n_f16))) float16x8_t __arm_vbrsrq_x_n_f16(float16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_x_n_f16))) float16x8_t __arm_vbrsrq_x(float16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_x_n_f32))) float32x4_t __arm_vbrsrq_x_n_f32(float32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_x_n_f32))) float32x4_t __arm_vbrsrq_x(float32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_f16))) float16x8_t __arm_vcaddq_rot270_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_f16))) float16x8_t __arm_vcaddq_rot270(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_f32))) float32x4_t __arm_vcaddq_rot270_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_f32))) float32x4_t __arm_vcaddq_rot270(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_m_f16))) float16x8_t __arm_vcaddq_rot270_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_m_f16))) float16x8_t __arm_vcaddq_rot270_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_m_f32))) float32x4_t __arm_vcaddq_rot270_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_m_f32))) float32x4_t __arm_vcaddq_rot270_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_x_f16))) float16x8_t __arm_vcaddq_rot270_x_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_x_f16))) float16x8_t __arm_vcaddq_rot270_x(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_x_f32))) float32x4_t __arm_vcaddq_rot270_x_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_x_f32))) float32x4_t __arm_vcaddq_rot270_x(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_f16))) float16x8_t __arm_vcaddq_rot90_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_f16))) float16x8_t __arm_vcaddq_rot90(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_f32))) float32x4_t __arm_vcaddq_rot90_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_f32))) float32x4_t __arm_vcaddq_rot90(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_m_f16))) float16x8_t __arm_vcaddq_rot90_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_m_f16))) float16x8_t __arm_vcaddq_rot90_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_m_f32))) float32x4_t __arm_vcaddq_rot90_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_m_f32))) float32x4_t __arm_vcaddq_rot90_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_x_f16))) float16x8_t __arm_vcaddq_rot90_x_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_x_f16))) float16x8_t __arm_vcaddq_rot90_x(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_x_f32))) float32x4_t __arm_vcaddq_rot90_x_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_x_f32))) float32x4_t __arm_vcaddq_rot90_x(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_f16))) float16x8_t __arm_vcmlaq_f16(float16x8_t, float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_f16))) float16x8_t __arm_vcmlaq(float16x8_t, float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_f32))) float32x4_t __arm_vcmlaq_f32(float32x4_t, float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_f32))) float32x4_t __arm_vcmlaq(float32x4_t, float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_m_f16))) float16x8_t __arm_vcmlaq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_m_f16))) float16x8_t __arm_vcmlaq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_m_f32))) float32x4_t __arm_vcmlaq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_m_f32))) float32x4_t __arm_vcmlaq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot180_f16))) float16x8_t __arm_vcmlaq_rot180_f16(float16x8_t, float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot180_f16))) float16x8_t __arm_vcmlaq_rot180(float16x8_t, float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot180_f32))) float32x4_t __arm_vcmlaq_rot180_f32(float32x4_t, float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot180_f32))) float32x4_t __arm_vcmlaq_rot180(float32x4_t, float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot180_m_f16))) float16x8_t __arm_vcmlaq_rot180_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot180_m_f16))) float16x8_t __arm_vcmlaq_rot180_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot180_m_f32))) float32x4_t __arm_vcmlaq_rot180_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot180_m_f32))) float32x4_t __arm_vcmlaq_rot180_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot270_f16))) float16x8_t __arm_vcmlaq_rot270_f16(float16x8_t, float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot270_f16))) float16x8_t __arm_vcmlaq_rot270(float16x8_t, float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot270_f32))) float32x4_t __arm_vcmlaq_rot270_f32(float32x4_t, float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot270_f32))) float32x4_t __arm_vcmlaq_rot270(float32x4_t, float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot270_m_f16))) float16x8_t __arm_vcmlaq_rot270_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot270_m_f16))) float16x8_t __arm_vcmlaq_rot270_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot270_m_f32))) float32x4_t __arm_vcmlaq_rot270_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot270_m_f32))) float32x4_t __arm_vcmlaq_rot270_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot90_f16))) float16x8_t __arm_vcmlaq_rot90_f16(float16x8_t, float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot90_f16))) float16x8_t __arm_vcmlaq_rot90(float16x8_t, float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot90_f32))) float32x4_t __arm_vcmlaq_rot90_f32(float32x4_t, float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot90_f32))) float32x4_t __arm_vcmlaq_rot90(float32x4_t, float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot90_m_f16))) float16x8_t __arm_vcmlaq_rot90_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot90_m_f16))) float16x8_t __arm_vcmlaq_rot90_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot90_m_f32))) float32x4_t __arm_vcmlaq_rot90_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot90_m_f32))) float32x4_t __arm_vcmlaq_rot90_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_f16))) mve_pred16_t __arm_vcmpeqq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_f16))) mve_pred16_t __arm_vcmpeqq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_f32))) mve_pred16_t __arm_vcmpeqq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_f32))) mve_pred16_t __arm_vcmpeqq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_f16))) mve_pred16_t __arm_vcmpeqq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_f16))) mve_pred16_t __arm_vcmpeqq_m(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_f32))) mve_pred16_t __arm_vcmpeqq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_f32))) mve_pred16_t __arm_vcmpeqq_m(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_n_f16))) mve_pred16_t __arm_vcmpeqq_m_n_f16(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_n_f16))) mve_pred16_t __arm_vcmpeqq_m(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_n_f32))) mve_pred16_t __arm_vcmpeqq_m_n_f32(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_n_f32))) mve_pred16_t __arm_vcmpeqq_m(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_n_f16))) mve_pred16_t __arm_vcmpeqq_n_f16(float16x8_t, float16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_n_f16))) mve_pred16_t __arm_vcmpeqq(float16x8_t, float16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_n_f32))) mve_pred16_t __arm_vcmpeqq_n_f32(float32x4_t, float32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_n_f32))) mve_pred16_t __arm_vcmpeqq(float32x4_t, float32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_f16))) mve_pred16_t __arm_vcmpgeq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_f16))) mve_pred16_t __arm_vcmpgeq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_f32))) mve_pred16_t __arm_vcmpgeq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_f32))) mve_pred16_t __arm_vcmpgeq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_f16))) mve_pred16_t __arm_vcmpgeq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_f16))) mve_pred16_t __arm_vcmpgeq_m(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_f32))) mve_pred16_t __arm_vcmpgeq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_f32))) mve_pred16_t __arm_vcmpgeq_m(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_n_f16))) mve_pred16_t __arm_vcmpgeq_m_n_f16(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_n_f16))) mve_pred16_t __arm_vcmpgeq_m(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_n_f32))) mve_pred16_t __arm_vcmpgeq_m_n_f32(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_n_f32))) mve_pred16_t __arm_vcmpgeq_m(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_n_f16))) mve_pred16_t __arm_vcmpgeq_n_f16(float16x8_t, float16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_n_f16))) mve_pred16_t __arm_vcmpgeq(float16x8_t, float16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_n_f32))) mve_pred16_t __arm_vcmpgeq_n_f32(float32x4_t, float32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_n_f32))) mve_pred16_t __arm_vcmpgeq(float32x4_t, float32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_f16))) mve_pred16_t __arm_vcmpgtq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_f16))) mve_pred16_t __arm_vcmpgtq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_f32))) mve_pred16_t __arm_vcmpgtq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_f32))) mve_pred16_t __arm_vcmpgtq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_f16))) mve_pred16_t __arm_vcmpgtq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_f16))) mve_pred16_t __arm_vcmpgtq_m(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_f32))) mve_pred16_t __arm_vcmpgtq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_f32))) mve_pred16_t __arm_vcmpgtq_m(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_n_f16))) mve_pred16_t __arm_vcmpgtq_m_n_f16(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_n_f16))) mve_pred16_t __arm_vcmpgtq_m(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_n_f32))) mve_pred16_t __arm_vcmpgtq_m_n_f32(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_n_f32))) mve_pred16_t __arm_vcmpgtq_m(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_n_f16))) mve_pred16_t __arm_vcmpgtq_n_f16(float16x8_t, float16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_n_f16))) mve_pred16_t __arm_vcmpgtq(float16x8_t, float16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_n_f32))) mve_pred16_t __arm_vcmpgtq_n_f32(float32x4_t, float32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_n_f32))) mve_pred16_t __arm_vcmpgtq(float32x4_t, float32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_f16))) mve_pred16_t __arm_vcmpleq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_f16))) mve_pred16_t __arm_vcmpleq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_f32))) mve_pred16_t __arm_vcmpleq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_f32))) mve_pred16_t __arm_vcmpleq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_f16))) mve_pred16_t __arm_vcmpleq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_f16))) mve_pred16_t __arm_vcmpleq_m(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_f32))) mve_pred16_t __arm_vcmpleq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_f32))) mve_pred16_t __arm_vcmpleq_m(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_n_f16))) mve_pred16_t __arm_vcmpleq_m_n_f16(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_n_f16))) mve_pred16_t __arm_vcmpleq_m(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_n_f32))) mve_pred16_t __arm_vcmpleq_m_n_f32(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_n_f32))) mve_pred16_t __arm_vcmpleq_m(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_n_f16))) mve_pred16_t __arm_vcmpleq_n_f16(float16x8_t, float16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_n_f16))) mve_pred16_t __arm_vcmpleq(float16x8_t, float16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_n_f32))) mve_pred16_t __arm_vcmpleq_n_f32(float32x4_t, float32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_n_f32))) mve_pred16_t __arm_vcmpleq(float32x4_t, float32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_f16))) mve_pred16_t __arm_vcmpltq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_f16))) mve_pred16_t __arm_vcmpltq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_f32))) mve_pred16_t __arm_vcmpltq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_f32))) mve_pred16_t __arm_vcmpltq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_f16))) mve_pred16_t __arm_vcmpltq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_f16))) mve_pred16_t __arm_vcmpltq_m(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_f32))) mve_pred16_t __arm_vcmpltq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_f32))) mve_pred16_t __arm_vcmpltq_m(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_n_f16))) mve_pred16_t __arm_vcmpltq_m_n_f16(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_n_f16))) mve_pred16_t __arm_vcmpltq_m(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_n_f32))) mve_pred16_t __arm_vcmpltq_m_n_f32(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_n_f32))) mve_pred16_t __arm_vcmpltq_m(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_n_f16))) mve_pred16_t __arm_vcmpltq_n_f16(float16x8_t, float16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_n_f16))) mve_pred16_t __arm_vcmpltq(float16x8_t, float16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_n_f32))) mve_pred16_t __arm_vcmpltq_n_f32(float32x4_t, float32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_n_f32))) mve_pred16_t __arm_vcmpltq(float32x4_t, float32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_f16))) mve_pred16_t __arm_vcmpneq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_f16))) mve_pred16_t __arm_vcmpneq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_f32))) mve_pred16_t __arm_vcmpneq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_f32))) mve_pred16_t __arm_vcmpneq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_f16))) mve_pred16_t __arm_vcmpneq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_f16))) mve_pred16_t __arm_vcmpneq_m(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_f32))) mve_pred16_t __arm_vcmpneq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_f32))) mve_pred16_t __arm_vcmpneq_m(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_n_f16))) mve_pred16_t __arm_vcmpneq_m_n_f16(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_n_f16))) mve_pred16_t __arm_vcmpneq_m(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_n_f32))) mve_pred16_t __arm_vcmpneq_m_n_f32(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_n_f32))) mve_pred16_t __arm_vcmpneq_m(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_n_f16))) mve_pred16_t __arm_vcmpneq_n_f16(float16x8_t, float16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_n_f16))) mve_pred16_t __arm_vcmpneq(float16x8_t, float16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_n_f32))) mve_pred16_t __arm_vcmpneq_n_f32(float32x4_t, float32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_n_f32))) mve_pred16_t __arm_vcmpneq(float32x4_t, float32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_f16))) float16x8_t __arm_vcmulq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_f16))) float16x8_t __arm_vcmulq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_f32))) float32x4_t __arm_vcmulq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_f32))) float32x4_t __arm_vcmulq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_m_f16))) float16x8_t __arm_vcmulq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_m_f16))) float16x8_t __arm_vcmulq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_m_f32))) float32x4_t __arm_vcmulq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_m_f32))) float32x4_t __arm_vcmulq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot180_f16))) float16x8_t __arm_vcmulq_rot180_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot180_f16))) float16x8_t __arm_vcmulq_rot180(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot180_f32))) float32x4_t __arm_vcmulq_rot180_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot180_f32))) float32x4_t __arm_vcmulq_rot180(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot180_m_f16))) float16x8_t __arm_vcmulq_rot180_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot180_m_f16))) float16x8_t __arm_vcmulq_rot180_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot180_m_f32))) float32x4_t __arm_vcmulq_rot180_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot180_m_f32))) float32x4_t __arm_vcmulq_rot180_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot180_x_f16))) float16x8_t __arm_vcmulq_rot180_x_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot180_x_f16))) float16x8_t __arm_vcmulq_rot180_x(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot180_x_f32))) float32x4_t __arm_vcmulq_rot180_x_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot180_x_f32))) float32x4_t __arm_vcmulq_rot180_x(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot270_f16))) float16x8_t __arm_vcmulq_rot270_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot270_f16))) float16x8_t __arm_vcmulq_rot270(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot270_f32))) float32x4_t __arm_vcmulq_rot270_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot270_f32))) float32x4_t __arm_vcmulq_rot270(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot270_m_f16))) float16x8_t __arm_vcmulq_rot270_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot270_m_f16))) float16x8_t __arm_vcmulq_rot270_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot270_m_f32))) float32x4_t __arm_vcmulq_rot270_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot270_m_f32))) float32x4_t __arm_vcmulq_rot270_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot270_x_f16))) float16x8_t __arm_vcmulq_rot270_x_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot270_x_f16))) float16x8_t __arm_vcmulq_rot270_x(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot270_x_f32))) float32x4_t __arm_vcmulq_rot270_x_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot270_x_f32))) float32x4_t __arm_vcmulq_rot270_x(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot90_f16))) float16x8_t __arm_vcmulq_rot90_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot90_f16))) float16x8_t __arm_vcmulq_rot90(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot90_f32))) float32x4_t __arm_vcmulq_rot90_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot90_f32))) float32x4_t __arm_vcmulq_rot90(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot90_m_f16))) float16x8_t __arm_vcmulq_rot90_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot90_m_f16))) float16x8_t __arm_vcmulq_rot90_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot90_m_f32))) float32x4_t __arm_vcmulq_rot90_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot90_m_f32))) float32x4_t __arm_vcmulq_rot90_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot90_x_f16))) float16x8_t __arm_vcmulq_rot90_x_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot90_x_f16))) float16x8_t __arm_vcmulq_rot90_x(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot90_x_f32))) float32x4_t __arm_vcmulq_rot90_x_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot90_x_f32))) float32x4_t __arm_vcmulq_rot90_x(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_x_f16))) float16x8_t __arm_vcmulq_x_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_x_f16))) float16x8_t __arm_vcmulq_x(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_x_f32))) float32x4_t __arm_vcmulq_x_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_x_f32))) float32x4_t __arm_vcmulq_x(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcreateq_f16))) float16x8_t __arm_vcreateq_f16(uint64_t, uint64_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcreateq_f32))) float32x4_t __arm_vcreateq_f32(uint64_t, uint64_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtaq_m_s16_f16))) int16x8_t __arm_vcvtaq_m_s16_f16(int16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtaq_m_s16_f16))) int16x8_t __arm_vcvtaq_m(int16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtaq_m_s32_f32))) int32x4_t __arm_vcvtaq_m_s32_f32(int32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtaq_m_s32_f32))) int32x4_t __arm_vcvtaq_m(int32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtaq_m_u16_f16))) uint16x8_t __arm_vcvtaq_m_u16_f16(uint16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtaq_m_u16_f16))) uint16x8_t __arm_vcvtaq_m(uint16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtaq_m_u32_f32))) uint32x4_t __arm_vcvtaq_m_u32_f32(uint32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtaq_m_u32_f32))) uint32x4_t __arm_vcvtaq_m(uint32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtaq_s16_f16))) int16x8_t __arm_vcvtaq_s16_f16(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtaq_s32_f32))) int32x4_t __arm_vcvtaq_s32_f32(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtaq_u16_f16))) uint16x8_t __arm_vcvtaq_u16_f16(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtaq_u32_f32))) uint32x4_t __arm_vcvtaq_u32_f32(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtaq_x_s16_f16))) int16x8_t __arm_vcvtaq_x_s16_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtaq_x_s32_f32))) int32x4_t __arm_vcvtaq_x_s32_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtaq_x_u16_f16))) uint16x8_t __arm_vcvtaq_x_u16_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtaq_x_u32_f32))) uint32x4_t __arm_vcvtaq_x_u32_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtbq_f16_f32))) float16x8_t __arm_vcvtbq_f16_f32(float16x8_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtbq_f32_f16))) float32x4_t __arm_vcvtbq_f32_f16(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtbq_m_f16_f32))) float16x8_t __arm_vcvtbq_m_f16_f32(float16x8_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtbq_m_f32_f16))) float32x4_t __arm_vcvtbq_m_f32_f16(float32x4_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtbq_x_f32_f16))) float32x4_t __arm_vcvtbq_x_f32_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtmq_m_s16_f16))) int16x8_t __arm_vcvtmq_m_s16_f16(int16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtmq_m_s16_f16))) int16x8_t __arm_vcvtmq_m(int16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtmq_m_s32_f32))) int32x4_t __arm_vcvtmq_m_s32_f32(int32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtmq_m_s32_f32))) int32x4_t __arm_vcvtmq_m(int32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtmq_m_u16_f16))) uint16x8_t __arm_vcvtmq_m_u16_f16(uint16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtmq_m_u16_f16))) uint16x8_t __arm_vcvtmq_m(uint16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtmq_m_u32_f32))) uint32x4_t __arm_vcvtmq_m_u32_f32(uint32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtmq_m_u32_f32))) uint32x4_t __arm_vcvtmq_m(uint32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtmq_s16_f16))) int16x8_t __arm_vcvtmq_s16_f16(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtmq_s32_f32))) int32x4_t __arm_vcvtmq_s32_f32(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtmq_u16_f16))) uint16x8_t __arm_vcvtmq_u16_f16(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtmq_u32_f32))) uint32x4_t __arm_vcvtmq_u32_f32(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtmq_x_s16_f16))) int16x8_t __arm_vcvtmq_x_s16_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtmq_x_s32_f32))) int32x4_t __arm_vcvtmq_x_s32_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtmq_x_u16_f16))) uint16x8_t __arm_vcvtmq_x_u16_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtmq_x_u32_f32))) uint32x4_t __arm_vcvtmq_x_u32_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtnq_m_s16_f16))) int16x8_t __arm_vcvtnq_m_s16_f16(int16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtnq_m_s16_f16))) int16x8_t __arm_vcvtnq_m(int16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtnq_m_s32_f32))) int32x4_t __arm_vcvtnq_m_s32_f32(int32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtnq_m_s32_f32))) int32x4_t __arm_vcvtnq_m(int32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtnq_m_u16_f16))) uint16x8_t __arm_vcvtnq_m_u16_f16(uint16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtnq_m_u16_f16))) uint16x8_t __arm_vcvtnq_m(uint16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtnq_m_u32_f32))) uint32x4_t __arm_vcvtnq_m_u32_f32(uint32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtnq_m_u32_f32))) uint32x4_t __arm_vcvtnq_m(uint32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtnq_s16_f16))) int16x8_t __arm_vcvtnq_s16_f16(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtnq_s32_f32))) int32x4_t __arm_vcvtnq_s32_f32(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtnq_u16_f16))) uint16x8_t __arm_vcvtnq_u16_f16(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtnq_u32_f32))) uint32x4_t __arm_vcvtnq_u32_f32(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtnq_x_s16_f16))) int16x8_t __arm_vcvtnq_x_s16_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtnq_x_s32_f32))) int32x4_t __arm_vcvtnq_x_s32_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtnq_x_u16_f16))) uint16x8_t __arm_vcvtnq_x_u16_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtnq_x_u32_f32))) uint32x4_t __arm_vcvtnq_x_u32_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtpq_m_s16_f16))) int16x8_t __arm_vcvtpq_m_s16_f16(int16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtpq_m_s16_f16))) int16x8_t __arm_vcvtpq_m(int16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtpq_m_s32_f32))) int32x4_t __arm_vcvtpq_m_s32_f32(int32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtpq_m_s32_f32))) int32x4_t __arm_vcvtpq_m(int32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtpq_m_u16_f16))) uint16x8_t __arm_vcvtpq_m_u16_f16(uint16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtpq_m_u16_f16))) uint16x8_t __arm_vcvtpq_m(uint16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtpq_m_u32_f32))) uint32x4_t __arm_vcvtpq_m_u32_f32(uint32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtpq_m_u32_f32))) uint32x4_t __arm_vcvtpq_m(uint32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtpq_s16_f16))) int16x8_t __arm_vcvtpq_s16_f16(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtpq_s32_f32))) int32x4_t __arm_vcvtpq_s32_f32(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtpq_u16_f16))) uint16x8_t __arm_vcvtpq_u16_f16(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtpq_u32_f32))) uint32x4_t __arm_vcvtpq_u32_f32(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtpq_x_s16_f16))) int16x8_t __arm_vcvtpq_x_s16_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtpq_x_s32_f32))) int32x4_t __arm_vcvtpq_x_s32_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtpq_x_u16_f16))) uint16x8_t __arm_vcvtpq_x_u16_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtpq_x_u32_f32))) uint32x4_t __arm_vcvtpq_x_u32_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_f16_s16))) float16x8_t __arm_vcvtq_f16_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_f16_s16))) float16x8_t __arm_vcvtq(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_f16_u16))) float16x8_t __arm_vcvtq_f16_u16(uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_f16_u16))) float16x8_t __arm_vcvtq(uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_f32_s32))) float32x4_t __arm_vcvtq_f32_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_f32_s32))) float32x4_t __arm_vcvtq(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_f32_u32))) float32x4_t __arm_vcvtq_f32_u32(uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_f32_u32))) float32x4_t __arm_vcvtq(uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_f16_s16))) float16x8_t __arm_vcvtq_m_f16_s16(float16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_f16_s16))) float16x8_t __arm_vcvtq_m(float16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_f16_u16))) float16x8_t __arm_vcvtq_m_f16_u16(float16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_f16_u16))) float16x8_t __arm_vcvtq_m(float16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_f32_s32))) float32x4_t __arm_vcvtq_m_f32_s32(float32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_f32_s32))) float32x4_t __arm_vcvtq_m(float32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_f32_u32))) float32x4_t __arm_vcvtq_m_f32_u32(float32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_f32_u32))) float32x4_t __arm_vcvtq_m(float32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_n_f16_s16))) float16x8_t __arm_vcvtq_m_n_f16_s16(float16x8_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_n_f16_s16))) float16x8_t __arm_vcvtq_m_n(float16x8_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_n_f16_u16))) float16x8_t __arm_vcvtq_m_n_f16_u16(float16x8_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_n_f16_u16))) float16x8_t __arm_vcvtq_m_n(float16x8_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_n_f32_s32))) float32x4_t __arm_vcvtq_m_n_f32_s32(float32x4_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_n_f32_s32))) float32x4_t __arm_vcvtq_m_n(float32x4_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_n_f32_u32))) float32x4_t __arm_vcvtq_m_n_f32_u32(float32x4_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_n_f32_u32))) float32x4_t __arm_vcvtq_m_n(float32x4_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_n_s16_f16))) int16x8_t __arm_vcvtq_m_n_s16_f16(int16x8_t, float16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_n_s16_f16))) int16x8_t __arm_vcvtq_m_n(int16x8_t, float16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_n_s32_f32))) int32x4_t __arm_vcvtq_m_n_s32_f32(int32x4_t, float32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_n_s32_f32))) int32x4_t __arm_vcvtq_m_n(int32x4_t, float32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_n_u16_f16))) uint16x8_t __arm_vcvtq_m_n_u16_f16(uint16x8_t, float16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_n_u16_f16))) uint16x8_t __arm_vcvtq_m_n(uint16x8_t, float16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_n_u32_f32))) uint32x4_t __arm_vcvtq_m_n_u32_f32(uint32x4_t, float32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_n_u32_f32))) uint32x4_t __arm_vcvtq_m_n(uint32x4_t, float32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_s16_f16))) int16x8_t __arm_vcvtq_m_s16_f16(int16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_s16_f16))) int16x8_t __arm_vcvtq_m(int16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_s32_f32))) int32x4_t __arm_vcvtq_m_s32_f32(int32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_s32_f32))) int32x4_t __arm_vcvtq_m(int32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_u16_f16))) uint16x8_t __arm_vcvtq_m_u16_f16(uint16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_u16_f16))) uint16x8_t __arm_vcvtq_m(uint16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_u32_f32))) uint32x4_t __arm_vcvtq_m_u32_f32(uint32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_u32_f32))) uint32x4_t __arm_vcvtq_m(uint32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_n_f16_s16))) float16x8_t __arm_vcvtq_n_f16_s16(int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_n_f16_s16))) float16x8_t __arm_vcvtq_n(int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_n_f16_u16))) float16x8_t __arm_vcvtq_n_f16_u16(uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_n_f16_u16))) float16x8_t __arm_vcvtq_n(uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_n_f32_s32))) float32x4_t __arm_vcvtq_n_f32_s32(int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_n_f32_s32))) float32x4_t __arm_vcvtq_n(int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_n_f32_u32))) float32x4_t __arm_vcvtq_n_f32_u32(uint32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_n_f32_u32))) float32x4_t __arm_vcvtq_n(uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_n_s16_f16))) int16x8_t __arm_vcvtq_n_s16_f16(float16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_n_s32_f32))) int32x4_t __arm_vcvtq_n_s32_f32(float32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_n_u16_f16))) uint16x8_t __arm_vcvtq_n_u16_f16(float16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_n_u32_f32))) uint32x4_t __arm_vcvtq_n_u32_f32(float32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_s16_f16))) int16x8_t __arm_vcvtq_s16_f16(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_s32_f32))) int32x4_t __arm_vcvtq_s32_f32(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_u16_f16))) uint16x8_t __arm_vcvtq_u16_f16(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_u32_f32))) uint32x4_t __arm_vcvtq_u32_f32(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_f16_s16))) float16x8_t __arm_vcvtq_x_f16_s16(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_f16_s16))) float16x8_t __arm_vcvtq_x(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_f16_u16))) float16x8_t __arm_vcvtq_x_f16_u16(uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_f16_u16))) float16x8_t __arm_vcvtq_x(uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_f32_s32))) float32x4_t __arm_vcvtq_x_f32_s32(int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_f32_s32))) float32x4_t __arm_vcvtq_x(int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_f32_u32))) float32x4_t __arm_vcvtq_x_f32_u32(uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_f32_u32))) float32x4_t __arm_vcvtq_x(uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_n_f16_s16))) float16x8_t __arm_vcvtq_x_n_f16_s16(int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_n_f16_s16))) float16x8_t __arm_vcvtq_x_n(int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_n_f16_u16))) float16x8_t __arm_vcvtq_x_n_f16_u16(uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_n_f16_u16))) float16x8_t __arm_vcvtq_x_n(uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_n_f32_s32))) float32x4_t __arm_vcvtq_x_n_f32_s32(int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_n_f32_s32))) float32x4_t __arm_vcvtq_x_n(int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_n_f32_u32))) float32x4_t __arm_vcvtq_x_n_f32_u32(uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_n_f32_u32))) float32x4_t __arm_vcvtq_x_n(uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_n_s16_f16))) int16x8_t __arm_vcvtq_x_n_s16_f16(float16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_n_s32_f32))) int32x4_t __arm_vcvtq_x_n_s32_f32(float32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_n_u16_f16))) uint16x8_t __arm_vcvtq_x_n_u16_f16(float16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_n_u32_f32))) uint32x4_t __arm_vcvtq_x_n_u32_f32(float32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_s16_f16))) int16x8_t __arm_vcvtq_x_s16_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_s32_f32))) int32x4_t __arm_vcvtq_x_s32_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_u16_f16))) uint16x8_t __arm_vcvtq_x_u16_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_u32_f32))) uint32x4_t __arm_vcvtq_x_u32_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvttq_f16_f32))) float16x8_t __arm_vcvttq_f16_f32(float16x8_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvttq_f32_f16))) float32x4_t __arm_vcvttq_f32_f16(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvttq_m_f16_f32))) float16x8_t __arm_vcvttq_m_f16_f32(float16x8_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvttq_m_f32_f16))) float32x4_t __arm_vcvttq_m_f32_f16(float32x4_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvttq_x_f32_f16))) float32x4_t __arm_vcvttq_x_f32_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_m_n_f16))) float16x8_t __arm_vdupq_m_n_f16(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdupq_m_n_f16))) float16x8_t __arm_vdupq_m(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_m_n_f32))) float32x4_t __arm_vdupq_m_n_f32(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdupq_m_n_f32))) float32x4_t __arm_vdupq_m(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_n_f16))) float16x8_t __arm_vdupq_n_f16(float16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_n_f32))) float32x4_t __arm_vdupq_n_f32(float32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_x_n_f16))) float16x8_t __arm_vdupq_x_n_f16(float16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_x_n_f32))) float32x4_t __arm_vdupq_x_n_f32(float32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_f16))) float16x8_t __arm_veorq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_f16))) float16x8_t __arm_veorq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_f32))) float32x4_t __arm_veorq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_f32))) float32x4_t __arm_veorq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_m_f16))) float16x8_t __arm_veorq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_m_f16))) float16x8_t __arm_veorq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_m_f32))) float32x4_t __arm_veorq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_m_f32))) float32x4_t __arm_veorq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_x_f16))) float16x8_t __arm_veorq_x_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_x_f16))) float16x8_t __arm_veorq_x(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_x_f32))) float32x4_t __arm_veorq_x_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_x_f32))) float32x4_t __arm_veorq_x(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vfmaq_f16))) float16x8_t __arm_vfmaq_f16(float16x8_t, float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vfmaq_f16))) float16x8_t __arm_vfmaq(float16x8_t, float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vfmaq_f32))) float32x4_t __arm_vfmaq_f32(float32x4_t, float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vfmaq_f32))) float32x4_t __arm_vfmaq(float32x4_t, float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vfmaq_m_f16))) float16x8_t __arm_vfmaq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vfmaq_m_f16))) float16x8_t __arm_vfmaq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vfmaq_m_f32))) float32x4_t __arm_vfmaq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vfmaq_m_f32))) float32x4_t __arm_vfmaq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vfmaq_m_n_f16))) float16x8_t __arm_vfmaq_m_n_f16(float16x8_t, float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vfmaq_m_n_f16))) float16x8_t __arm_vfmaq_m(float16x8_t, float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vfmaq_m_n_f32))) float32x4_t __arm_vfmaq_m_n_f32(float32x4_t, float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vfmaq_m_n_f32))) float32x4_t __arm_vfmaq_m(float32x4_t, float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vfmaq_n_f16))) float16x8_t __arm_vfmaq_n_f16(float16x8_t, float16x8_t, float16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vfmaq_n_f16))) float16x8_t __arm_vfmaq(float16x8_t, float16x8_t, float16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vfmaq_n_f32))) float32x4_t __arm_vfmaq_n_f32(float32x4_t, float32x4_t, float32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vfmaq_n_f32))) float32x4_t __arm_vfmaq(float32x4_t, float32x4_t, float32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vfmasq_m_n_f16))) float16x8_t __arm_vfmasq_m_n_f16(float16x8_t, float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vfmasq_m_n_f16))) float16x8_t __arm_vfmasq_m(float16x8_t, float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vfmasq_m_n_f32))) float32x4_t __arm_vfmasq_m_n_f32(float32x4_t, float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vfmasq_m_n_f32))) float32x4_t __arm_vfmasq_m(float32x4_t, float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vfmasq_n_f16))) float16x8_t __arm_vfmasq_n_f16(float16x8_t, float16x8_t, float16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vfmasq_n_f16))) float16x8_t __arm_vfmasq(float16x8_t, float16x8_t, float16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vfmasq_n_f32))) float32x4_t __arm_vfmasq_n_f32(float32x4_t, float32x4_t, float32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vfmasq_n_f32))) float32x4_t __arm_vfmasq(float32x4_t, float32x4_t, float32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vfmsq_f16))) float16x8_t __arm_vfmsq_f16(float16x8_t, float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vfmsq_f16))) float16x8_t __arm_vfmsq(float16x8_t, float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vfmsq_f32))) float32x4_t __arm_vfmsq_f32(float32x4_t, float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vfmsq_f32))) float32x4_t __arm_vfmsq(float32x4_t, float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vfmsq_m_f16))) float16x8_t __arm_vfmsq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vfmsq_m_f16))) float16x8_t __arm_vfmsq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vfmsq_m_f32))) float32x4_t __arm_vfmsq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vfmsq_m_f32))) float32x4_t __arm_vfmsq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_f16))) float16_t __arm_vgetq_lane_f16(float16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_f16))) float16_t __arm_vgetq_lane(float16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_f32))) float32_t __arm_vgetq_lane_f32(float32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_f32))) float32_t __arm_vgetq_lane(float32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld1q_f16))) float16x8_t __arm_vld1q_f16(const float16_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld1q_f16))) float16x8_t __arm_vld1q(const float16_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld1q_f32))) float32x4_t __arm_vld1q_f32(const float32_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld1q_f32))) float32x4_t __arm_vld1q(const float32_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld1q_z_f16))) float16x8_t __arm_vld1q_z_f16(const float16_t *, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld1q_z_f16))) float16x8_t __arm_vld1q_z(const float16_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld1q_z_f32))) float32x4_t __arm_vld1q_z_f32(const float32_t *, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld1q_z_f32))) float32x4_t __arm_vld1q_z(const float32_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld2q_f16))) float16x8x2_t __arm_vld2q_f16(const float16_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld2q_f16))) float16x8x2_t __arm_vld2q(const float16_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld2q_f32))) float32x4x2_t __arm_vld2q_f32(const float32_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld2q_f32))) float32x4x2_t __arm_vld2q(const float32_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld4q_f16))) float16x8x4_t __arm_vld4q_f16(const float16_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld4q_f16))) float16x8x4_t __arm_vld4q(const float16_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld4q_f32))) float32x4x4_t __arm_vld4q_f32(const float32_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld4q_f32))) float32x4x4_t __arm_vld4q(const float32_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_f16))) float16x8_t __arm_vldrhq_f16(const float16_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_f16))) float16x8_t __arm_vldrhq_gather_offset_f16(const float16_t *, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_f16))) float16x8_t __arm_vldrhq_gather_offset(const float16_t *, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_z_f16))) float16x8_t __arm_vldrhq_gather_offset_z_f16(const float16_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_z_f16))) float16x8_t __arm_vldrhq_gather_offset_z(const float16_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_f16))) float16x8_t __arm_vldrhq_gather_shifted_offset_f16(const float16_t *, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_f16))) float16x8_t __arm_vldrhq_gather_shifted_offset(const float16_t *, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_z_f16))) float16x8_t __arm_vldrhq_gather_shifted_offset_z_f16(const float16_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_z_f16))) float16x8_t __arm_vldrhq_gather_shifted_offset_z(const float16_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_z_f16))) float16x8_t __arm_vldrhq_z_f16(const float16_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_f32))) float32x4_t __arm_vldrwq_f32(const float32_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_base_f32))) float32x4_t __arm_vldrwq_gather_base_f32(uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_base_wb_f32))) float32x4_t __arm_vldrwq_gather_base_wb_f32(uint32x4_t *, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_base_wb_z_f32))) float32x4_t __arm_vldrwq_gather_base_wb_z_f32(uint32x4_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_base_z_f32))) float32x4_t __arm_vldrwq_gather_base_z_f32(uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_offset_f32))) float32x4_t __arm_vldrwq_gather_offset_f32(const float32_t *, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_offset_f32))) float32x4_t __arm_vldrwq_gather_offset(const float32_t *, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_offset_z_f32))) float32x4_t __arm_vldrwq_gather_offset_z_f32(const float32_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_offset_z_f32))) float32x4_t __arm_vldrwq_gather_offset_z(const float32_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_shifted_offset_f32))) float32x4_t __arm_vldrwq_gather_shifted_offset_f32(const float32_t *, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_shifted_offset_f32))) float32x4_t __arm_vldrwq_gather_shifted_offset(const float32_t *, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_shifted_offset_z_f32))) float32x4_t __arm_vldrwq_gather_shifted_offset_z_f32(const float32_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_shifted_offset_z_f32))) float32x4_t __arm_vldrwq_gather_shifted_offset_z(const float32_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_z_f32))) float32x4_t __arm_vldrwq_z_f32(const float32_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmaq_f16))) float16x8_t __arm_vmaxnmaq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmaq_f16))) float16x8_t __arm_vmaxnmaq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmaq_f32))) float32x4_t __arm_vmaxnmaq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmaq_f32))) float32x4_t __arm_vmaxnmaq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmaq_m_f16))) float16x8_t __arm_vmaxnmaq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmaq_m_f16))) float16x8_t __arm_vmaxnmaq_m(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmaq_m_f32))) float32x4_t __arm_vmaxnmaq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmaq_m_f32))) float32x4_t __arm_vmaxnmaq_m(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmavq_f16))) float16_t __arm_vmaxnmavq_f16(float16_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmavq_f16))) float16_t __arm_vmaxnmavq(float16_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmavq_f32))) float32_t __arm_vmaxnmavq_f32(float32_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmavq_f32))) float32_t __arm_vmaxnmavq(float32_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmavq_p_f16))) float16_t __arm_vmaxnmavq_p_f16(float16_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmavq_p_f16))) float16_t __arm_vmaxnmavq_p(float16_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmavq_p_f32))) float32_t __arm_vmaxnmavq_p_f32(float32_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmavq_p_f32))) float32_t __arm_vmaxnmavq_p(float32_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmq_f16))) float16x8_t __arm_vmaxnmq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmq_f16))) float16x8_t __arm_vmaxnmq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmq_f32))) float32x4_t __arm_vmaxnmq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmq_f32))) float32x4_t __arm_vmaxnmq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmq_m_f16))) float16x8_t __arm_vmaxnmq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmq_m_f16))) float16x8_t __arm_vmaxnmq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmq_m_f32))) float32x4_t __arm_vmaxnmq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmq_m_f32))) float32x4_t __arm_vmaxnmq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmq_x_f16))) float16x8_t __arm_vmaxnmq_x_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmq_x_f16))) float16x8_t __arm_vmaxnmq_x(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmq_x_f32))) float32x4_t __arm_vmaxnmq_x_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmq_x_f32))) float32x4_t __arm_vmaxnmq_x(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmvq_f16))) float16_t __arm_vmaxnmvq_f16(float16_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmvq_f16))) float16_t __arm_vmaxnmvq(float16_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmvq_f32))) float32_t __arm_vmaxnmvq_f32(float32_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmvq_f32))) float32_t __arm_vmaxnmvq(float32_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmvq_p_f16))) float16_t __arm_vmaxnmvq_p_f16(float16_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmvq_p_f16))) float16_t __arm_vmaxnmvq_p(float16_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmvq_p_f32))) float32_t __arm_vmaxnmvq_p_f32(float32_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmvq_p_f32))) float32_t __arm_vmaxnmvq_p(float32_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminnmaq_f16))) float16x8_t __arm_vminnmaq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminnmaq_f16))) float16x8_t __arm_vminnmaq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminnmaq_f32))) float32x4_t __arm_vminnmaq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminnmaq_f32))) float32x4_t __arm_vminnmaq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminnmaq_m_f16))) float16x8_t __arm_vminnmaq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminnmaq_m_f16))) float16x8_t __arm_vminnmaq_m(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminnmaq_m_f32))) float32x4_t __arm_vminnmaq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminnmaq_m_f32))) float32x4_t __arm_vminnmaq_m(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminnmavq_f16))) float16_t __arm_vminnmavq_f16(float16_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminnmavq_f16))) float16_t __arm_vminnmavq(float16_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminnmavq_f32))) float32_t __arm_vminnmavq_f32(float32_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminnmavq_f32))) float32_t __arm_vminnmavq(float32_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminnmavq_p_f16))) float16_t __arm_vminnmavq_p_f16(float16_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminnmavq_p_f16))) float16_t __arm_vminnmavq_p(float16_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminnmavq_p_f32))) float32_t __arm_vminnmavq_p_f32(float32_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminnmavq_p_f32))) float32_t __arm_vminnmavq_p(float32_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminnmq_f16))) float16x8_t __arm_vminnmq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminnmq_f16))) float16x8_t __arm_vminnmq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminnmq_f32))) float32x4_t __arm_vminnmq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminnmq_f32))) float32x4_t __arm_vminnmq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminnmq_m_f16))) float16x8_t __arm_vminnmq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminnmq_m_f16))) float16x8_t __arm_vminnmq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminnmq_m_f32))) float32x4_t __arm_vminnmq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminnmq_m_f32))) float32x4_t __arm_vminnmq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminnmq_x_f16))) float16x8_t __arm_vminnmq_x_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminnmq_x_f16))) float16x8_t __arm_vminnmq_x(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminnmq_x_f32))) float32x4_t __arm_vminnmq_x_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminnmq_x_f32))) float32x4_t __arm_vminnmq_x(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminnmvq_f16))) float16_t __arm_vminnmvq_f16(float16_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminnmvq_f16))) float16_t __arm_vminnmvq(float16_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminnmvq_f32))) float32_t __arm_vminnmvq_f32(float32_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminnmvq_f32))) float32_t __arm_vminnmvq(float32_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminnmvq_p_f16))) float16_t __arm_vminnmvq_p_f16(float16_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminnmvq_p_f16))) float16_t __arm_vminnmvq_p(float16_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminnmvq_p_f32))) float32_t __arm_vminnmvq_p_f32(float32_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminnmvq_p_f32))) float32_t __arm_vminnmvq_p(float32_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_f16))) float16x8_t __arm_vmulq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_f16))) float16x8_t __arm_vmulq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_f32))) float32x4_t __arm_vmulq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_f32))) float32x4_t __arm_vmulq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_f16))) float16x8_t __arm_vmulq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_f16))) float16x8_t __arm_vmulq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_f32))) float32x4_t __arm_vmulq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_f32))) float32x4_t __arm_vmulq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_n_f16))) float16x8_t __arm_vmulq_m_n_f16(float16x8_t, float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_n_f16))) float16x8_t __arm_vmulq_m(float16x8_t, float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_n_f32))) float32x4_t __arm_vmulq_m_n_f32(float32x4_t, float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_n_f32))) float32x4_t __arm_vmulq_m(float32x4_t, float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_n_f16))) float16x8_t __arm_vmulq_n_f16(float16x8_t, float16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_n_f16))) float16x8_t __arm_vmulq(float16x8_t, float16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_n_f32))) float32x4_t __arm_vmulq_n_f32(float32x4_t, float32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_n_f32))) float32x4_t __arm_vmulq(float32x4_t, float32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_f16))) float16x8_t __arm_vmulq_x_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_f16))) float16x8_t __arm_vmulq_x(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_f32))) float32x4_t __arm_vmulq_x_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_f32))) float32x4_t __arm_vmulq_x(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_n_f16))) float16x8_t __arm_vmulq_x_n_f16(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_n_f16))) float16x8_t __arm_vmulq_x(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_n_f32))) float32x4_t __arm_vmulq_x_n_f32(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_n_f32))) float32x4_t __arm_vmulq_x(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vnegq_f16))) float16x8_t __arm_vnegq_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vnegq_f16))) float16x8_t __arm_vnegq(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vnegq_f32))) float32x4_t __arm_vnegq_f32(float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vnegq_f32))) float32x4_t __arm_vnegq(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vnegq_m_f16))) float16x8_t __arm_vnegq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vnegq_m_f16))) float16x8_t __arm_vnegq_m(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vnegq_m_f32))) float32x4_t __arm_vnegq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vnegq_m_f32))) float32x4_t __arm_vnegq_m(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vnegq_x_f16))) float16x8_t __arm_vnegq_x_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vnegq_x_f16))) float16x8_t __arm_vnegq_x(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vnegq_x_f32))) float32x4_t __arm_vnegq_x_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vnegq_x_f32))) float32x4_t __arm_vnegq_x(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_f16))) float16x8_t __arm_vornq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_f16))) float16x8_t __arm_vornq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_f32))) float32x4_t __arm_vornq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_f32))) float32x4_t __arm_vornq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_m_f16))) float16x8_t __arm_vornq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_m_f16))) float16x8_t __arm_vornq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_m_f32))) float32x4_t __arm_vornq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_m_f32))) float32x4_t __arm_vornq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_x_f16))) float16x8_t __arm_vornq_x_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_x_f16))) float16x8_t __arm_vornq_x(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_x_f32))) float32x4_t __arm_vornq_x_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_x_f32))) float32x4_t __arm_vornq_x(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_f16))) float16x8_t __arm_vorrq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_f16))) float16x8_t __arm_vorrq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_f32))) float32x4_t __arm_vorrq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_f32))) float32x4_t __arm_vorrq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_f16))) float16x8_t __arm_vorrq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_f16))) float16x8_t __arm_vorrq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_f32))) float32x4_t __arm_vorrq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_f32))) float32x4_t __arm_vorrq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_x_f16))) float16x8_t __arm_vorrq_x_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_x_f16))) float16x8_t __arm_vorrq_x(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_x_f32))) float32x4_t __arm_vorrq_x_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_x_f32))) float32x4_t __arm_vorrq_x(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vpselq_f16))) float16x8_t __arm_vpselq_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vpselq_f16))) float16x8_t __arm_vpselq(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vpselq_f32))) float32x4_t __arm_vpselq_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vpselq_f32))) float32x4_t __arm_vpselq(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_f32))) float16x8_t __arm_vreinterpretq_f16_f32(float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_f32))) float16x8_t __arm_vreinterpretq_f16(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_s16))) float16x8_t __arm_vreinterpretq_f16_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_s16))) float16x8_t __arm_vreinterpretq_f16(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_s32))) float16x8_t __arm_vreinterpretq_f16_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_s32))) float16x8_t __arm_vreinterpretq_f16(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_s64))) float16x8_t __arm_vreinterpretq_f16_s64(int64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_s64))) float16x8_t __arm_vreinterpretq_f16(int64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_s8))) float16x8_t __arm_vreinterpretq_f16_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_s8))) float16x8_t __arm_vreinterpretq_f16(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_u16))) float16x8_t __arm_vreinterpretq_f16_u16(uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_u16))) float16x8_t __arm_vreinterpretq_f16(uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_u32))) float16x8_t __arm_vreinterpretq_f16_u32(uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_u32))) float16x8_t __arm_vreinterpretq_f16(uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_u64))) float16x8_t __arm_vreinterpretq_f16_u64(uint64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_u64))) float16x8_t __arm_vreinterpretq_f16(uint64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_u8))) float16x8_t __arm_vreinterpretq_f16_u8(uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_u8))) float16x8_t __arm_vreinterpretq_f16(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_f16))) float32x4_t __arm_vreinterpretq_f32_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_f16))) float32x4_t __arm_vreinterpretq_f32(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_s16))) float32x4_t __arm_vreinterpretq_f32_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_s16))) float32x4_t __arm_vreinterpretq_f32(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_s32))) float32x4_t __arm_vreinterpretq_f32_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_s32))) float32x4_t __arm_vreinterpretq_f32(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_s64))) float32x4_t __arm_vreinterpretq_f32_s64(int64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_s64))) float32x4_t __arm_vreinterpretq_f32(int64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_s8))) float32x4_t __arm_vreinterpretq_f32_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_s8))) float32x4_t __arm_vreinterpretq_f32(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_u16))) float32x4_t __arm_vreinterpretq_f32_u16(uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_u16))) float32x4_t __arm_vreinterpretq_f32(uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_u32))) float32x4_t __arm_vreinterpretq_f32_u32(uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_u32))) float32x4_t __arm_vreinterpretq_f32(uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_u64))) float32x4_t __arm_vreinterpretq_f32_u64(uint64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_u64))) float32x4_t __arm_vreinterpretq_f32(uint64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_u8))) float32x4_t __arm_vreinterpretq_f32_u8(uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_u8))) float32x4_t __arm_vreinterpretq_f32(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_f16))) int16x8_t __arm_vreinterpretq_s16_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_f16))) int16x8_t __arm_vreinterpretq_s16(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_f32))) int16x8_t __arm_vreinterpretq_s16_f32(float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_f32))) int16x8_t __arm_vreinterpretq_s16(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_f16))) int32x4_t __arm_vreinterpretq_s32_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_f16))) int32x4_t __arm_vreinterpretq_s32(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_f32))) int32x4_t __arm_vreinterpretq_s32_f32(float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_f32))) int32x4_t __arm_vreinterpretq_s32(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_f16))) int64x2_t __arm_vreinterpretq_s64_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_f16))) int64x2_t __arm_vreinterpretq_s64(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_f32))) int64x2_t __arm_vreinterpretq_s64_f32(float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_f32))) int64x2_t __arm_vreinterpretq_s64(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_f16))) int8x16_t __arm_vreinterpretq_s8_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_f16))) int8x16_t __arm_vreinterpretq_s8(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_f32))) int8x16_t __arm_vreinterpretq_s8_f32(float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_f32))) int8x16_t __arm_vreinterpretq_s8(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_f16))) uint16x8_t __arm_vreinterpretq_u16_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_f16))) uint16x8_t __arm_vreinterpretq_u16(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_f32))) uint16x8_t __arm_vreinterpretq_u16_f32(float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_f32))) uint16x8_t __arm_vreinterpretq_u16(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_f16))) uint32x4_t __arm_vreinterpretq_u32_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_f16))) uint32x4_t __arm_vreinterpretq_u32(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_f32))) uint32x4_t __arm_vreinterpretq_u32_f32(float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_f32))) uint32x4_t __arm_vreinterpretq_u32(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_f16))) uint64x2_t __arm_vreinterpretq_u64_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_f16))) uint64x2_t __arm_vreinterpretq_u64(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_f32))) uint64x2_t __arm_vreinterpretq_u64_f32(float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_f32))) uint64x2_t __arm_vreinterpretq_u64(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_f16))) uint8x16_t __arm_vreinterpretq_u8_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_f16))) uint8x16_t __arm_vreinterpretq_u8(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_f32))) uint8x16_t __arm_vreinterpretq_u8_f32(float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_f32))) uint8x16_t __arm_vreinterpretq_u8(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_f16))) float16x8_t __arm_vrev32q_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_f16))) float16x8_t __arm_vrev32q(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_m_f16))) float16x8_t __arm_vrev32q_m_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_m_f16))) float16x8_t __arm_vrev32q_m(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_x_f16))) float16x8_t __arm_vrev32q_x_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_x_f16))) float16x8_t __arm_vrev32q_x(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_f16))) float16x8_t __arm_vrev64q_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_f16))) float16x8_t __arm_vrev64q(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_f32))) float32x4_t __arm_vrev64q_f32(float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_f32))) float32x4_t __arm_vrev64q(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_m_f16))) float16x8_t __arm_vrev64q_m_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_m_f16))) float16x8_t __arm_vrev64q_m(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_m_f32))) float32x4_t __arm_vrev64q_m_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_m_f32))) float32x4_t __arm_vrev64q_m(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_x_f16))) float16x8_t __arm_vrev64q_x_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_x_f16))) float16x8_t __arm_vrev64q_x(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_x_f32))) float32x4_t __arm_vrev64q_x_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_x_f32))) float32x4_t __arm_vrev64q_x(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndaq_f16))) float16x8_t __arm_vrndaq_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndaq_f16))) float16x8_t __arm_vrndaq(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndaq_f32))) float32x4_t __arm_vrndaq_f32(float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndaq_f32))) float32x4_t __arm_vrndaq(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndaq_m_f16))) float16x8_t __arm_vrndaq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndaq_m_f16))) float16x8_t __arm_vrndaq_m(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndaq_m_f32))) float32x4_t __arm_vrndaq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndaq_m_f32))) float32x4_t __arm_vrndaq_m(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndaq_x_f16))) float16x8_t __arm_vrndaq_x_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndaq_x_f16))) float16x8_t __arm_vrndaq_x(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndaq_x_f32))) float32x4_t __arm_vrndaq_x_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndaq_x_f32))) float32x4_t __arm_vrndaq_x(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndmq_f16))) float16x8_t __arm_vrndmq_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndmq_f16))) float16x8_t __arm_vrndmq(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndmq_f32))) float32x4_t __arm_vrndmq_f32(float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndmq_f32))) float32x4_t __arm_vrndmq(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndmq_m_f16))) float16x8_t __arm_vrndmq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndmq_m_f16))) float16x8_t __arm_vrndmq_m(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndmq_m_f32))) float32x4_t __arm_vrndmq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndmq_m_f32))) float32x4_t __arm_vrndmq_m(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndmq_x_f16))) float16x8_t __arm_vrndmq_x_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndmq_x_f16))) float16x8_t __arm_vrndmq_x(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndmq_x_f32))) float32x4_t __arm_vrndmq_x_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndmq_x_f32))) float32x4_t __arm_vrndmq_x(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndnq_f16))) float16x8_t __arm_vrndnq_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndnq_f16))) float16x8_t __arm_vrndnq(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndnq_f32))) float32x4_t __arm_vrndnq_f32(float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndnq_f32))) float32x4_t __arm_vrndnq(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndnq_m_f16))) float16x8_t __arm_vrndnq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndnq_m_f16))) float16x8_t __arm_vrndnq_m(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndnq_m_f32))) float32x4_t __arm_vrndnq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndnq_m_f32))) float32x4_t __arm_vrndnq_m(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndnq_x_f16))) float16x8_t __arm_vrndnq_x_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndnq_x_f16))) float16x8_t __arm_vrndnq_x(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndnq_x_f32))) float32x4_t __arm_vrndnq_x_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndnq_x_f32))) float32x4_t __arm_vrndnq_x(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndpq_f16))) float16x8_t __arm_vrndpq_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndpq_f16))) float16x8_t __arm_vrndpq(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndpq_f32))) float32x4_t __arm_vrndpq_f32(float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndpq_f32))) float32x4_t __arm_vrndpq(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndpq_m_f16))) float16x8_t __arm_vrndpq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndpq_m_f16))) float16x8_t __arm_vrndpq_m(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndpq_m_f32))) float32x4_t __arm_vrndpq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndpq_m_f32))) float32x4_t __arm_vrndpq_m(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndpq_x_f16))) float16x8_t __arm_vrndpq_x_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndpq_x_f16))) float16x8_t __arm_vrndpq_x(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndpq_x_f32))) float32x4_t __arm_vrndpq_x_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndpq_x_f32))) float32x4_t __arm_vrndpq_x(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndq_f16))) float16x8_t __arm_vrndq_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndq_f16))) float16x8_t __arm_vrndq(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndq_f32))) float32x4_t __arm_vrndq_f32(float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndq_f32))) float32x4_t __arm_vrndq(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndq_m_f16))) float16x8_t __arm_vrndq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndq_m_f16))) float16x8_t __arm_vrndq_m(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndq_m_f32))) float32x4_t __arm_vrndq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndq_m_f32))) float32x4_t __arm_vrndq_m(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndq_x_f16))) float16x8_t __arm_vrndq_x_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndq_x_f16))) float16x8_t __arm_vrndq_x(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndq_x_f32))) float32x4_t __arm_vrndq_x_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndq_x_f32))) float32x4_t __arm_vrndq_x(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndxq_f16))) float16x8_t __arm_vrndxq_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndxq_f16))) float16x8_t __arm_vrndxq(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndxq_f32))) float32x4_t __arm_vrndxq_f32(float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndxq_f32))) float32x4_t __arm_vrndxq(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndxq_m_f16))) float16x8_t __arm_vrndxq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndxq_m_f16))) float16x8_t __arm_vrndxq_m(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndxq_m_f32))) float32x4_t __arm_vrndxq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndxq_m_f32))) float32x4_t __arm_vrndxq_m(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndxq_x_f16))) float16x8_t __arm_vrndxq_x_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndxq_x_f16))) float16x8_t __arm_vrndxq_x(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndxq_x_f32))) float32x4_t __arm_vrndxq_x_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndxq_x_f32))) float32x4_t __arm_vrndxq_x(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_f16))) float16x8_t __arm_vsetq_lane_f16(float16_t, float16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_f16))) float16x8_t __arm_vsetq_lane(float16_t, float16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_f32))) float32x4_t __arm_vsetq_lane_f32(float32_t, float32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_f32))) float32x4_t __arm_vsetq_lane(float32_t, float32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst1q_f16))) void __arm_vst1q_f16(float16_t *, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst1q_f16))) void __arm_vst1q(float16_t *, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst1q_f32))) void __arm_vst1q_f32(float32_t *, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst1q_f32))) void __arm_vst1q(float32_t *, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst1q_p_f16))) void __arm_vst1q_p_f16(float16_t *, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst1q_p_f16))) void __arm_vst1q_p(float16_t *, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst1q_p_f32))) void __arm_vst1q_p_f32(float32_t *, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst1q_p_f32))) void __arm_vst1q_p(float32_t *, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst2q_f16))) void __arm_vst2q_f16(float16_t *, float16x8x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst2q_f16))) void __arm_vst2q(float16_t *, float16x8x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst2q_f32))) void __arm_vst2q_f32(float32_t *, float32x4x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst2q_f32))) void __arm_vst2q(float32_t *, float32x4x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst4q_f16))) void __arm_vst4q_f16(float16_t *, float16x8x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst4q_f16))) void __arm_vst4q(float16_t *, float16x8x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst4q_f32))) void __arm_vst4q_f32(float32_t *, float32x4x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst4q_f32))) void __arm_vst4q(float32_t *, float32x4x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_f16))) void __arm_vstrhq_f16(float16_t *, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_f16))) void __arm_vstrhq(float16_t *, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_p_f16))) void __arm_vstrhq_p_f16(float16_t *, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_p_f16))) void __arm_vstrhq_p(float16_t *, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_f16))) void __arm_vstrhq_scatter_offset_f16(float16_t *, uint16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_f16))) void __arm_vstrhq_scatter_offset(float16_t *, uint16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_p_f16))) void __arm_vstrhq_scatter_offset_p_f16(float16_t *, uint16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_p_f16))) void __arm_vstrhq_scatter_offset_p(float16_t *, uint16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_f16))) void __arm_vstrhq_scatter_shifted_offset_f16(float16_t *, uint16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_f16))) void __arm_vstrhq_scatter_shifted_offset(float16_t *, uint16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_p_f16))) void __arm_vstrhq_scatter_shifted_offset_p_f16(float16_t *, uint16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_p_f16))) void __arm_vstrhq_scatter_shifted_offset_p(float16_t *, uint16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_f32))) void __arm_vstrwq_f32(float32_t *, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_f32))) void __arm_vstrwq(float32_t *, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_p_f32))) void __arm_vstrwq_p_f32(float32_t *, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_p_f32))) void __arm_vstrwq_p(float32_t *, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_f32))) void __arm_vstrwq_scatter_base_f32(uint32x4_t, int, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_f32))) void __arm_vstrwq_scatter_base(uint32x4_t, int, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_p_f32))) void __arm_vstrwq_scatter_base_p_f32(uint32x4_t, int, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_p_f32))) void __arm_vstrwq_scatter_base_p(uint32x4_t, int, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_wb_f32))) void __arm_vstrwq_scatter_base_wb_f32(uint32x4_t *, int, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_wb_f32))) void __arm_vstrwq_scatter_base_wb(uint32x4_t *, int, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_wb_p_f32))) void __arm_vstrwq_scatter_base_wb_p_f32(uint32x4_t *, int, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_wb_p_f32))) void __arm_vstrwq_scatter_base_wb_p(uint32x4_t *, int, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_offset_f32))) void __arm_vstrwq_scatter_offset_f32(float32_t *, uint32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_offset_f32))) void __arm_vstrwq_scatter_offset(float32_t *, uint32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_offset_p_f32))) void __arm_vstrwq_scatter_offset_p_f32(float32_t *, uint32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_offset_p_f32))) void __arm_vstrwq_scatter_offset_p(float32_t *, uint32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_shifted_offset_f32))) void __arm_vstrwq_scatter_shifted_offset_f32(float32_t *, uint32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_shifted_offset_f32))) void __arm_vstrwq_scatter_shifted_offset(float32_t *, uint32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_shifted_offset_p_f32))) void __arm_vstrwq_scatter_shifted_offset_p_f32(float32_t *, uint32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_shifted_offset_p_f32))) void __arm_vstrwq_scatter_shifted_offset_p(float32_t *, uint32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_f16))) float16x8_t __arm_vsubq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_f16))) float16x8_t __arm_vsubq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_f32))) float32x4_t __arm_vsubq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_f32))) float32x4_t __arm_vsubq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_f16))) float16x8_t __arm_vsubq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_f16))) float16x8_t __arm_vsubq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_f32))) float32x4_t __arm_vsubq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_f32))) float32x4_t __arm_vsubq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_n_f16))) float16x8_t __arm_vsubq_m_n_f16(float16x8_t, float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_n_f16))) float16x8_t __arm_vsubq_m(float16x8_t, float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_n_f32))) float32x4_t __arm_vsubq_m_n_f32(float32x4_t, float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_n_f32))) float32x4_t __arm_vsubq_m(float32x4_t, float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_n_f16))) float16x8_t __arm_vsubq_n_f16(float16x8_t, float16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_n_f16))) float16x8_t __arm_vsubq(float16x8_t, float16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_n_f32))) float32x4_t __arm_vsubq_n_f32(float32x4_t, float32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_n_f32))) float32x4_t __arm_vsubq(float32x4_t, float32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_f16))) float16x8_t __arm_vsubq_x_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_f16))) float16x8_t __arm_vsubq_x(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_f32))) float32x4_t __arm_vsubq_x_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_f32))) float32x4_t __arm_vsubq_x(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_n_f16))) float16x8_t __arm_vsubq_x_n_f16(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_n_f16))) float16x8_t __arm_vsubq_x(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_n_f32))) float32x4_t __arm_vsubq_x_n_f32(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_n_f32))) float32x4_t __arm_vsubq_x(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_f16))) float16x8_t __arm_vuninitializedq_f16(); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_f32))) float32x4_t __arm_vuninitializedq_f32(); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_polymorphic_f16))) float16x8_t __arm_vuninitializedq(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_polymorphic_f32))) float32x4_t __arm_vuninitializedq(float32x4_t); #endif /* (__ARM_FEATURE_MVE & 2) */ #if (!defined __ARM_MVE_PRESERVE_USER_NAMESPACE) static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_asrl))) int64_t asrl(int64_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_lsll))) uint64_t lsll(uint64_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_sqrshr))) int32_t sqrshr(int32_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_sqrshrl))) int64_t sqrshrl(int64_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_sqrshrl_sat48))) int64_t sqrshrl_sat48(int64_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_sqshl))) int32_t sqshl(int32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_sqshll))) int64_t sqshll(int64_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_srshr))) int32_t srshr(int32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_srshrl))) int64_t srshrl(int64_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_uqrshl))) uint32_t uqrshl(uint32_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_uqrshll))) uint64_t uqrshll(uint64_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_uqrshll_sat48))) uint64_t uqrshll_sat48(uint64_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_uqshl))) uint32_t uqshl(uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_uqshll))) uint64_t uqshll(uint64_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_urshr))) uint32_t urshr(uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_urshrl))) uint64_t urshrl(uint64_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabavq_p_s16))) uint32_t vabavq_p_s16(uint32_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabavq_p_s16))) uint32_t vabavq_p(uint32_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabavq_p_s32))) uint32_t vabavq_p_s32(uint32_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabavq_p_s32))) uint32_t vabavq_p(uint32_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabavq_p_s8))) uint32_t vabavq_p_s8(uint32_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabavq_p_s8))) uint32_t vabavq_p(uint32_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabavq_p_u16))) uint32_t vabavq_p_u16(uint32_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabavq_p_u16))) uint32_t vabavq_p(uint32_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabavq_p_u32))) uint32_t vabavq_p_u32(uint32_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabavq_p_u32))) uint32_t vabavq_p(uint32_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabavq_p_u8))) uint32_t vabavq_p_u8(uint32_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabavq_p_u8))) uint32_t vabavq_p(uint32_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabavq_s16))) uint32_t vabavq_s16(uint32_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabavq_s16))) uint32_t vabavq(uint32_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabavq_s32))) uint32_t vabavq_s32(uint32_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabavq_s32))) uint32_t vabavq(uint32_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabavq_s8))) uint32_t vabavq_s8(uint32_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabavq_s8))) uint32_t vabavq(uint32_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabavq_u16))) uint32_t vabavq_u16(uint32_t, uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabavq_u16))) uint32_t vabavq(uint32_t, uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabavq_u32))) uint32_t vabavq_u32(uint32_t, uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabavq_u32))) uint32_t vabavq(uint32_t, uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabavq_u8))) uint32_t vabavq_u8(uint32_t, uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabavq_u8))) uint32_t vabavq(uint32_t, uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_m_s16))) int16x8_t vabdq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_m_s16))) int16x8_t vabdq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_m_s32))) int32x4_t vabdq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_m_s32))) int32x4_t vabdq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_m_s8))) int8x16_t vabdq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_m_s8))) int8x16_t vabdq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_m_u16))) uint16x8_t vabdq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_m_u16))) uint16x8_t vabdq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_m_u32))) uint32x4_t vabdq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_m_u32))) uint32x4_t vabdq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_m_u8))) uint8x16_t vabdq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_m_u8))) uint8x16_t vabdq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_s16))) int16x8_t vabdq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_s16))) int16x8_t vabdq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_s32))) int32x4_t vabdq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_s32))) int32x4_t vabdq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_s8))) int8x16_t vabdq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_s8))) int8x16_t vabdq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_u16))) uint16x8_t vabdq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_u16))) uint16x8_t vabdq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_u32))) uint32x4_t vabdq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_u32))) uint32x4_t vabdq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_u8))) uint8x16_t vabdq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_u8))) uint8x16_t vabdq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_x_s16))) int16x8_t vabdq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_x_s16))) int16x8_t vabdq_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_x_s32))) int32x4_t vabdq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_x_s32))) int32x4_t vabdq_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_x_s8))) int8x16_t vabdq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_x_s8))) int8x16_t vabdq_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_x_u16))) uint16x8_t vabdq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_x_u16))) uint16x8_t vabdq_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_x_u32))) uint32x4_t vabdq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_x_u32))) uint32x4_t vabdq_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_x_u8))) uint8x16_t vabdq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_x_u8))) uint8x16_t vabdq_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabsq_m_s16))) int16x8_t vabsq_m_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabsq_m_s16))) int16x8_t vabsq_m(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabsq_m_s32))) int32x4_t vabsq_m_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabsq_m_s32))) int32x4_t vabsq_m(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabsq_m_s8))) int8x16_t vabsq_m_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabsq_m_s8))) int8x16_t vabsq_m(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabsq_s16))) int16x8_t vabsq_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabsq_s16))) int16x8_t vabsq(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabsq_s32))) int32x4_t vabsq_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabsq_s32))) int32x4_t vabsq(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabsq_s8))) int8x16_t vabsq_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabsq_s8))) int8x16_t vabsq(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabsq_x_s16))) int16x8_t vabsq_x_s16(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabsq_x_s16))) int16x8_t vabsq_x(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabsq_x_s32))) int32x4_t vabsq_x_s32(int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabsq_x_s32))) int32x4_t vabsq_x(int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabsq_x_s8))) int8x16_t vabsq_x_s8(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabsq_x_s8))) int8x16_t vabsq_x(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vadciq_m_s32))) int32x4_t vadciq_m_s32(int32x4_t, int32x4_t, int32x4_t, unsigned *, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vadciq_m_s32))) int32x4_t vadciq_m(int32x4_t, int32x4_t, int32x4_t, unsigned *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vadciq_m_u32))) uint32x4_t vadciq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, unsigned *, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vadciq_m_u32))) uint32x4_t vadciq_m(uint32x4_t, uint32x4_t, uint32x4_t, unsigned *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vadciq_s32))) int32x4_t vadciq_s32(int32x4_t, int32x4_t, unsigned *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vadciq_s32))) int32x4_t vadciq(int32x4_t, int32x4_t, unsigned *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vadciq_u32))) uint32x4_t vadciq_u32(uint32x4_t, uint32x4_t, unsigned *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vadciq_u32))) uint32x4_t vadciq(uint32x4_t, uint32x4_t, unsigned *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vadcq_m_s32))) int32x4_t vadcq_m_s32(int32x4_t, int32x4_t, int32x4_t, unsigned *, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vadcq_m_s32))) int32x4_t vadcq_m(int32x4_t, int32x4_t, int32x4_t, unsigned *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vadcq_m_u32))) uint32x4_t vadcq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, unsigned *, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vadcq_m_u32))) uint32x4_t vadcq_m(uint32x4_t, uint32x4_t, uint32x4_t, unsigned *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vadcq_s32))) int32x4_t vadcq_s32(int32x4_t, int32x4_t, unsigned *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vadcq_s32))) int32x4_t vadcq(int32x4_t, int32x4_t, unsigned *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vadcq_u32))) uint32x4_t vadcq_u32(uint32x4_t, uint32x4_t, unsigned *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vadcq_u32))) uint32x4_t vadcq(uint32x4_t, uint32x4_t, unsigned *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddlvaq_p_s32))) int64_t vaddlvaq_p_s32(int64_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddlvaq_p_s32))) int64_t vaddlvaq_p(int64_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddlvaq_p_u32))) uint64_t vaddlvaq_p_u32(uint64_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddlvaq_p_u32))) uint64_t vaddlvaq_p(uint64_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddlvaq_s32))) int64_t vaddlvaq_s32(int64_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddlvaq_s32))) int64_t vaddlvaq(int64_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddlvaq_u32))) uint64_t vaddlvaq_u32(uint64_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddlvaq_u32))) uint64_t vaddlvaq(uint64_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddlvq_p_s32))) int64_t vaddlvq_p_s32(int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddlvq_p_s32))) int64_t vaddlvq_p(int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddlvq_p_u32))) uint64_t vaddlvq_p_u32(uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddlvq_p_u32))) uint64_t vaddlvq_p(uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddlvq_s32))) int64_t vaddlvq_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddlvq_s32))) int64_t vaddlvq(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddlvq_u32))) uint64_t vaddlvq_u32(uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddlvq_u32))) uint64_t vaddlvq(uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_n_s16))) int16x8_t vaddq_m_n_s16(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_n_s16))) int16x8_t vaddq_m(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_n_s32))) int32x4_t vaddq_m_n_s32(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_n_s32))) int32x4_t vaddq_m(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_n_s8))) int8x16_t vaddq_m_n_s8(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_n_s8))) int8x16_t vaddq_m(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_n_u16))) uint16x8_t vaddq_m_n_u16(uint16x8_t, uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_n_u16))) uint16x8_t vaddq_m(uint16x8_t, uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_n_u32))) uint32x4_t vaddq_m_n_u32(uint32x4_t, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_n_u32))) uint32x4_t vaddq_m(uint32x4_t, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_n_u8))) uint8x16_t vaddq_m_n_u8(uint8x16_t, uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_n_u8))) uint8x16_t vaddq_m(uint8x16_t, uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_s16))) int16x8_t vaddq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_s16))) int16x8_t vaddq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_s32))) int32x4_t vaddq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_s32))) int32x4_t vaddq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_s8))) int8x16_t vaddq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_s8))) int8x16_t vaddq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_u16))) uint16x8_t vaddq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_u16))) uint16x8_t vaddq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_u32))) uint32x4_t vaddq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_u32))) uint32x4_t vaddq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_u8))) uint8x16_t vaddq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_u8))) uint8x16_t vaddq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_n_s16))) int16x8_t vaddq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_n_s16))) int16x8_t vaddq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_n_s32))) int32x4_t vaddq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_n_s32))) int32x4_t vaddq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_n_s8))) int8x16_t vaddq_n_s8(int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_n_s8))) int8x16_t vaddq(int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_n_u16))) uint16x8_t vaddq_n_u16(uint16x8_t, uint16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_n_u16))) uint16x8_t vaddq(uint16x8_t, uint16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_n_u32))) uint32x4_t vaddq_n_u32(uint32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_n_u32))) uint32x4_t vaddq(uint32x4_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_n_u8))) uint8x16_t vaddq_n_u8(uint8x16_t, uint8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_n_u8))) uint8x16_t vaddq(uint8x16_t, uint8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_s16))) int16x8_t vaddq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_s16))) int16x8_t vaddq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_s32))) int32x4_t vaddq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_s32))) int32x4_t vaddq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_s8))) int8x16_t vaddq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_s8))) int8x16_t vaddq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_u16))) uint16x8_t vaddq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_u16))) uint16x8_t vaddq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_u32))) uint32x4_t vaddq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_u32))) uint32x4_t vaddq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_u8))) uint8x16_t vaddq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_u8))) uint8x16_t vaddq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_n_s16))) int16x8_t vaddq_x_n_s16(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_n_s16))) int16x8_t vaddq_x(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_n_s32))) int32x4_t vaddq_x_n_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_n_s32))) int32x4_t vaddq_x(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_n_s8))) int8x16_t vaddq_x_n_s8(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_n_s8))) int8x16_t vaddq_x(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_n_u16))) uint16x8_t vaddq_x_n_u16(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_n_u16))) uint16x8_t vaddq_x(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_n_u32))) uint32x4_t vaddq_x_n_u32(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_n_u32))) uint32x4_t vaddq_x(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_n_u8))) uint8x16_t vaddq_x_n_u8(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_n_u8))) uint8x16_t vaddq_x(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_s16))) int16x8_t vaddq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_s16))) int16x8_t vaddq_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_s32))) int32x4_t vaddq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_s32))) int32x4_t vaddq_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_s8))) int8x16_t vaddq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_s8))) int8x16_t vaddq_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_u16))) uint16x8_t vaddq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_u16))) uint16x8_t vaddq_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_u32))) uint32x4_t vaddq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_u32))) uint32x4_t vaddq_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_u8))) uint8x16_t vaddq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_u8))) uint8x16_t vaddq_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_p_s16))) int32_t vaddvaq_p_s16(int32_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_p_s16))) int32_t vaddvaq_p(int32_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_p_s32))) int32_t vaddvaq_p_s32(int32_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_p_s32))) int32_t vaddvaq_p(int32_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_p_s8))) int32_t vaddvaq_p_s8(int32_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_p_s8))) int32_t vaddvaq_p(int32_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_p_u16))) uint32_t vaddvaq_p_u16(uint32_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_p_u16))) uint32_t vaddvaq_p(uint32_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_p_u32))) uint32_t vaddvaq_p_u32(uint32_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_p_u32))) uint32_t vaddvaq_p(uint32_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_p_u8))) uint32_t vaddvaq_p_u8(uint32_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_p_u8))) uint32_t vaddvaq_p(uint32_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_s16))) int32_t vaddvaq_s16(int32_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_s16))) int32_t vaddvaq(int32_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_s32))) int32_t vaddvaq_s32(int32_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_s32))) int32_t vaddvaq(int32_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_s8))) int32_t vaddvaq_s8(int32_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_s8))) int32_t vaddvaq(int32_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_u16))) uint32_t vaddvaq_u16(uint32_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_u16))) uint32_t vaddvaq(uint32_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_u32))) uint32_t vaddvaq_u32(uint32_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_u32))) uint32_t vaddvaq(uint32_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_u8))) uint32_t vaddvaq_u8(uint32_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_u8))) uint32_t vaddvaq(uint32_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_p_s16))) int32_t vaddvq_p_s16(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_p_s16))) int32_t vaddvq_p(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_p_s32))) int32_t vaddvq_p_s32(int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_p_s32))) int32_t vaddvq_p(int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_p_s8))) int32_t vaddvq_p_s8(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_p_s8))) int32_t vaddvq_p(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_p_u16))) uint32_t vaddvq_p_u16(uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_p_u16))) uint32_t vaddvq_p(uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_p_u32))) uint32_t vaddvq_p_u32(uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_p_u32))) uint32_t vaddvq_p(uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_p_u8))) uint32_t vaddvq_p_u8(uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_p_u8))) uint32_t vaddvq_p(uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_s16))) int32_t vaddvq_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_s16))) int32_t vaddvq(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_s32))) int32_t vaddvq_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_s32))) int32_t vaddvq(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_s8))) int32_t vaddvq_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_s8))) int32_t vaddvq(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_u16))) uint32_t vaddvq_u16(uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_u16))) uint32_t vaddvq(uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_u32))) uint32_t vaddvq_u32(uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_u32))) uint32_t vaddvq(uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_u8))) uint32_t vaddvq_u8(uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_u8))) uint32_t vaddvq(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_m_s16))) int16x8_t vandq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_m_s16))) int16x8_t vandq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_m_s32))) int32x4_t vandq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_m_s32))) int32x4_t vandq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_m_s8))) int8x16_t vandq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_m_s8))) int8x16_t vandq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_m_u16))) uint16x8_t vandq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_m_u16))) uint16x8_t vandq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_m_u32))) uint32x4_t vandq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_m_u32))) uint32x4_t vandq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_m_u8))) uint8x16_t vandq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_m_u8))) uint8x16_t vandq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_s16))) int16x8_t vandq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_s16))) int16x8_t vandq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_s32))) int32x4_t vandq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_s32))) int32x4_t vandq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_s8))) int8x16_t vandq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_s8))) int8x16_t vandq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_u16))) uint16x8_t vandq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_u16))) uint16x8_t vandq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_u32))) uint32x4_t vandq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_u32))) uint32x4_t vandq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_u8))) uint8x16_t vandq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_u8))) uint8x16_t vandq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_x_s16))) int16x8_t vandq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_x_s16))) int16x8_t vandq_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_x_s32))) int32x4_t vandq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_x_s32))) int32x4_t vandq_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_x_s8))) int8x16_t vandq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_x_s8))) int8x16_t vandq_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_x_u16))) uint16x8_t vandq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_x_u16))) uint16x8_t vandq_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_x_u32))) uint32x4_t vandq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_x_u32))) uint32x4_t vandq_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_x_u8))) uint8x16_t vandq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_x_u8))) uint8x16_t vandq_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_n_s16))) int16x8_t vbicq_m_n_s16(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_n_s16))) int16x8_t vbicq_m_n(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_n_s32))) int32x4_t vbicq_m_n_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_n_s32))) int32x4_t vbicq_m_n(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_n_u16))) uint16x8_t vbicq_m_n_u16(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_n_u16))) uint16x8_t vbicq_m_n(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_n_u32))) uint32x4_t vbicq_m_n_u32(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_n_u32))) uint32x4_t vbicq_m_n(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_s16))) int16x8_t vbicq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_s16))) int16x8_t vbicq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_s32))) int32x4_t vbicq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_s32))) int32x4_t vbicq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_s8))) int8x16_t vbicq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_s8))) int8x16_t vbicq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_u16))) uint16x8_t vbicq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_u16))) uint16x8_t vbicq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_u32))) uint32x4_t vbicq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_u32))) uint32x4_t vbicq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_u8))) uint8x16_t vbicq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_u8))) uint8x16_t vbicq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_n_s16))) int16x8_t vbicq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_n_s16))) int16x8_t vbicq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_n_s32))) int32x4_t vbicq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_n_s32))) int32x4_t vbicq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_n_u16))) uint16x8_t vbicq_n_u16(uint16x8_t, uint16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_n_u16))) uint16x8_t vbicq(uint16x8_t, uint16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_n_u32))) uint32x4_t vbicq_n_u32(uint32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_n_u32))) uint32x4_t vbicq(uint32x4_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_s16))) int16x8_t vbicq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_s16))) int16x8_t vbicq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_s32))) int32x4_t vbicq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_s32))) int32x4_t vbicq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_s8))) int8x16_t vbicq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_s8))) int8x16_t vbicq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_u16))) uint16x8_t vbicq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_u16))) uint16x8_t vbicq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_u32))) uint32x4_t vbicq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_u32))) uint32x4_t vbicq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_u8))) uint8x16_t vbicq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_u8))) uint8x16_t vbicq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_x_s16))) int16x8_t vbicq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_x_s16))) int16x8_t vbicq_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_x_s32))) int32x4_t vbicq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_x_s32))) int32x4_t vbicq_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_x_s8))) int8x16_t vbicq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_x_s8))) int8x16_t vbicq_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_x_u16))) uint16x8_t vbicq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_x_u16))) uint16x8_t vbicq_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_x_u32))) uint32x4_t vbicq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_x_u32))) uint32x4_t vbicq_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_x_u8))) uint8x16_t vbicq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_x_u8))) uint8x16_t vbicq_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_m_n_s16))) int16x8_t vbrsrq_m_n_s16(int16x8_t, int16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_m_n_s16))) int16x8_t vbrsrq_m(int16x8_t, int16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_m_n_s32))) int32x4_t vbrsrq_m_n_s32(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_m_n_s32))) int32x4_t vbrsrq_m(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_m_n_s8))) int8x16_t vbrsrq_m_n_s8(int8x16_t, int8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_m_n_s8))) int8x16_t vbrsrq_m(int8x16_t, int8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_m_n_u16))) uint16x8_t vbrsrq_m_n_u16(uint16x8_t, uint16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_m_n_u16))) uint16x8_t vbrsrq_m(uint16x8_t, uint16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_m_n_u32))) uint32x4_t vbrsrq_m_n_u32(uint32x4_t, uint32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_m_n_u32))) uint32x4_t vbrsrq_m(uint32x4_t, uint32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_m_n_u8))) uint8x16_t vbrsrq_m_n_u8(uint8x16_t, uint8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_m_n_u8))) uint8x16_t vbrsrq_m(uint8x16_t, uint8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_n_s16))) int16x8_t vbrsrq_n_s16(int16x8_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_n_s16))) int16x8_t vbrsrq(int16x8_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_n_s32))) int32x4_t vbrsrq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_n_s32))) int32x4_t vbrsrq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_n_s8))) int8x16_t vbrsrq_n_s8(int8x16_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_n_s8))) int8x16_t vbrsrq(int8x16_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_n_u16))) uint16x8_t vbrsrq_n_u16(uint16x8_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_n_u16))) uint16x8_t vbrsrq(uint16x8_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_n_u32))) uint32x4_t vbrsrq_n_u32(uint32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_n_u32))) uint32x4_t vbrsrq(uint32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_n_u8))) uint8x16_t vbrsrq_n_u8(uint8x16_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_n_u8))) uint8x16_t vbrsrq(uint8x16_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_x_n_s16))) int16x8_t vbrsrq_x_n_s16(int16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_x_n_s16))) int16x8_t vbrsrq_x(int16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_x_n_s32))) int32x4_t vbrsrq_x_n_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_x_n_s32))) int32x4_t vbrsrq_x(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_x_n_s8))) int8x16_t vbrsrq_x_n_s8(int8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_x_n_s8))) int8x16_t vbrsrq_x(int8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_x_n_u16))) uint16x8_t vbrsrq_x_n_u16(uint16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_x_n_u16))) uint16x8_t vbrsrq_x(uint16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_x_n_u32))) uint32x4_t vbrsrq_x_n_u32(uint32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_x_n_u32))) uint32x4_t vbrsrq_x(uint32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_x_n_u8))) uint8x16_t vbrsrq_x_n_u8(uint8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_x_n_u8))) uint8x16_t vbrsrq_x(uint8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_m_s16))) int16x8_t vcaddq_rot270_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_m_s16))) int16x8_t vcaddq_rot270_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_m_s32))) int32x4_t vcaddq_rot270_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_m_s32))) int32x4_t vcaddq_rot270_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_m_s8))) int8x16_t vcaddq_rot270_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_m_s8))) int8x16_t vcaddq_rot270_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_m_u16))) uint16x8_t vcaddq_rot270_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_m_u16))) uint16x8_t vcaddq_rot270_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_m_u32))) uint32x4_t vcaddq_rot270_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_m_u32))) uint32x4_t vcaddq_rot270_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_m_u8))) uint8x16_t vcaddq_rot270_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_m_u8))) uint8x16_t vcaddq_rot270_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_s16))) int16x8_t vcaddq_rot270_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_s16))) int16x8_t vcaddq_rot270(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_s32))) int32x4_t vcaddq_rot270_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_s32))) int32x4_t vcaddq_rot270(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_s8))) int8x16_t vcaddq_rot270_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_s8))) int8x16_t vcaddq_rot270(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_u16))) uint16x8_t vcaddq_rot270_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_u16))) uint16x8_t vcaddq_rot270(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_u32))) uint32x4_t vcaddq_rot270_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_u32))) uint32x4_t vcaddq_rot270(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_u8))) uint8x16_t vcaddq_rot270_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_u8))) uint8x16_t vcaddq_rot270(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_x_s16))) int16x8_t vcaddq_rot270_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_x_s16))) int16x8_t vcaddq_rot270_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_x_s32))) int32x4_t vcaddq_rot270_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_x_s32))) int32x4_t vcaddq_rot270_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_x_s8))) int8x16_t vcaddq_rot270_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_x_s8))) int8x16_t vcaddq_rot270_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_x_u16))) uint16x8_t vcaddq_rot270_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_x_u16))) uint16x8_t vcaddq_rot270_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_x_u32))) uint32x4_t vcaddq_rot270_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_x_u32))) uint32x4_t vcaddq_rot270_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_x_u8))) uint8x16_t vcaddq_rot270_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_x_u8))) uint8x16_t vcaddq_rot270_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_m_s16))) int16x8_t vcaddq_rot90_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_m_s16))) int16x8_t vcaddq_rot90_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_m_s32))) int32x4_t vcaddq_rot90_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_m_s32))) int32x4_t vcaddq_rot90_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_m_s8))) int8x16_t vcaddq_rot90_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_m_s8))) int8x16_t vcaddq_rot90_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_m_u16))) uint16x8_t vcaddq_rot90_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_m_u16))) uint16x8_t vcaddq_rot90_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_m_u32))) uint32x4_t vcaddq_rot90_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_m_u32))) uint32x4_t vcaddq_rot90_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_m_u8))) uint8x16_t vcaddq_rot90_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_m_u8))) uint8x16_t vcaddq_rot90_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_s16))) int16x8_t vcaddq_rot90_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_s16))) int16x8_t vcaddq_rot90(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_s32))) int32x4_t vcaddq_rot90_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_s32))) int32x4_t vcaddq_rot90(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_s8))) int8x16_t vcaddq_rot90_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_s8))) int8x16_t vcaddq_rot90(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_u16))) uint16x8_t vcaddq_rot90_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_u16))) uint16x8_t vcaddq_rot90(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_u32))) uint32x4_t vcaddq_rot90_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_u32))) uint32x4_t vcaddq_rot90(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_u8))) uint8x16_t vcaddq_rot90_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_u8))) uint8x16_t vcaddq_rot90(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_x_s16))) int16x8_t vcaddq_rot90_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_x_s16))) int16x8_t vcaddq_rot90_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_x_s32))) int32x4_t vcaddq_rot90_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_x_s32))) int32x4_t vcaddq_rot90_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_x_s8))) int8x16_t vcaddq_rot90_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_x_s8))) int8x16_t vcaddq_rot90_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_x_u16))) uint16x8_t vcaddq_rot90_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_x_u16))) uint16x8_t vcaddq_rot90_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_x_u32))) uint32x4_t vcaddq_rot90_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_x_u32))) uint32x4_t vcaddq_rot90_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_x_u8))) uint8x16_t vcaddq_rot90_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_x_u8))) uint8x16_t vcaddq_rot90_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclsq_m_s16))) int16x8_t vclsq_m_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclsq_m_s16))) int16x8_t vclsq_m(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclsq_m_s32))) int32x4_t vclsq_m_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclsq_m_s32))) int32x4_t vclsq_m(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclsq_m_s8))) int8x16_t vclsq_m_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclsq_m_s8))) int8x16_t vclsq_m(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclsq_s16))) int16x8_t vclsq_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclsq_s16))) int16x8_t vclsq(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclsq_s32))) int32x4_t vclsq_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclsq_s32))) int32x4_t vclsq(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclsq_s8))) int8x16_t vclsq_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclsq_s8))) int8x16_t vclsq(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclsq_x_s16))) int16x8_t vclsq_x_s16(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclsq_x_s16))) int16x8_t vclsq_x(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclsq_x_s32))) int32x4_t vclsq_x_s32(int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclsq_x_s32))) int32x4_t vclsq_x(int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclsq_x_s8))) int8x16_t vclsq_x_s8(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclsq_x_s8))) int8x16_t vclsq_x(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclzq_m_s16))) int16x8_t vclzq_m_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclzq_m_s16))) int16x8_t vclzq_m(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclzq_m_s32))) int32x4_t vclzq_m_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclzq_m_s32))) int32x4_t vclzq_m(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclzq_m_s8))) int8x16_t vclzq_m_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclzq_m_s8))) int8x16_t vclzq_m(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclzq_m_u16))) uint16x8_t vclzq_m_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclzq_m_u16))) uint16x8_t vclzq_m(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclzq_m_u32))) uint32x4_t vclzq_m_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclzq_m_u32))) uint32x4_t vclzq_m(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclzq_m_u8))) uint8x16_t vclzq_m_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclzq_m_u8))) uint8x16_t vclzq_m(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclzq_s16))) int16x8_t vclzq_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclzq_s16))) int16x8_t vclzq(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclzq_s32))) int32x4_t vclzq_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclzq_s32))) int32x4_t vclzq(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclzq_s8))) int8x16_t vclzq_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclzq_s8))) int8x16_t vclzq(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclzq_u16))) uint16x8_t vclzq_u16(uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclzq_u16))) uint16x8_t vclzq(uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclzq_u32))) uint32x4_t vclzq_u32(uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclzq_u32))) uint32x4_t vclzq(uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclzq_u8))) uint8x16_t vclzq_u8(uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclzq_u8))) uint8x16_t vclzq(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclzq_x_s16))) int16x8_t vclzq_x_s16(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclzq_x_s16))) int16x8_t vclzq_x(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclzq_x_s32))) int32x4_t vclzq_x_s32(int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclzq_x_s32))) int32x4_t vclzq_x(int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclzq_x_s8))) int8x16_t vclzq_x_s8(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclzq_x_s8))) int8x16_t vclzq_x(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclzq_x_u16))) uint16x8_t vclzq_x_u16(uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclzq_x_u16))) uint16x8_t vclzq_x(uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclzq_x_u32))) uint32x4_t vclzq_x_u32(uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclzq_x_u32))) uint32x4_t vclzq_x(uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclzq_x_u8))) uint8x16_t vclzq_x_u8(uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclzq_x_u8))) uint8x16_t vclzq_x(uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_m_n_u16))) mve_pred16_t vcmpcsq_m_n_u16(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_m_n_u16))) mve_pred16_t vcmpcsq_m(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_m_n_u32))) mve_pred16_t vcmpcsq_m_n_u32(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_m_n_u32))) mve_pred16_t vcmpcsq_m(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_m_n_u8))) mve_pred16_t vcmpcsq_m_n_u8(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_m_n_u8))) mve_pred16_t vcmpcsq_m(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_m_u16))) mve_pred16_t vcmpcsq_m_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_m_u16))) mve_pred16_t vcmpcsq_m(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_m_u32))) mve_pred16_t vcmpcsq_m_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_m_u32))) mve_pred16_t vcmpcsq_m(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_m_u8))) mve_pred16_t vcmpcsq_m_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_m_u8))) mve_pred16_t vcmpcsq_m(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_n_u16))) mve_pred16_t vcmpcsq_n_u16(uint16x8_t, uint16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_n_u16))) mve_pred16_t vcmpcsq(uint16x8_t, uint16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_n_u32))) mve_pred16_t vcmpcsq_n_u32(uint32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_n_u32))) mve_pred16_t vcmpcsq(uint32x4_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_n_u8))) mve_pred16_t vcmpcsq_n_u8(uint8x16_t, uint8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_n_u8))) mve_pred16_t vcmpcsq(uint8x16_t, uint8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_u16))) mve_pred16_t vcmpcsq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_u16))) mve_pred16_t vcmpcsq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_u32))) mve_pred16_t vcmpcsq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_u32))) mve_pred16_t vcmpcsq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_u8))) mve_pred16_t vcmpcsq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_u8))) mve_pred16_t vcmpcsq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_n_s16))) mve_pred16_t vcmpeqq_m_n_s16(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_n_s16))) mve_pred16_t vcmpeqq_m(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_n_s32))) mve_pred16_t vcmpeqq_m_n_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_n_s32))) mve_pred16_t vcmpeqq_m(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_n_s8))) mve_pred16_t vcmpeqq_m_n_s8(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_n_s8))) mve_pred16_t vcmpeqq_m(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_n_u16))) mve_pred16_t vcmpeqq_m_n_u16(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_n_u16))) mve_pred16_t vcmpeqq_m(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_n_u32))) mve_pred16_t vcmpeqq_m_n_u32(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_n_u32))) mve_pred16_t vcmpeqq_m(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_n_u8))) mve_pred16_t vcmpeqq_m_n_u8(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_n_u8))) mve_pred16_t vcmpeqq_m(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_s16))) mve_pred16_t vcmpeqq_m_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_s16))) mve_pred16_t vcmpeqq_m(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_s32))) mve_pred16_t vcmpeqq_m_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_s32))) mve_pred16_t vcmpeqq_m(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_s8))) mve_pred16_t vcmpeqq_m_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_s8))) mve_pred16_t vcmpeqq_m(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_u16))) mve_pred16_t vcmpeqq_m_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_u16))) mve_pred16_t vcmpeqq_m(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_u32))) mve_pred16_t vcmpeqq_m_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_u32))) mve_pred16_t vcmpeqq_m(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_u8))) mve_pred16_t vcmpeqq_m_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_u8))) mve_pred16_t vcmpeqq_m(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_n_s16))) mve_pred16_t vcmpeqq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_n_s16))) mve_pred16_t vcmpeqq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_n_s32))) mve_pred16_t vcmpeqq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_n_s32))) mve_pred16_t vcmpeqq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_n_s8))) mve_pred16_t vcmpeqq_n_s8(int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_n_s8))) mve_pred16_t vcmpeqq(int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_n_u16))) mve_pred16_t vcmpeqq_n_u16(uint16x8_t, uint16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_n_u16))) mve_pred16_t vcmpeqq(uint16x8_t, uint16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_n_u32))) mve_pred16_t vcmpeqq_n_u32(uint32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_n_u32))) mve_pred16_t vcmpeqq(uint32x4_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_n_u8))) mve_pred16_t vcmpeqq_n_u8(uint8x16_t, uint8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_n_u8))) mve_pred16_t vcmpeqq(uint8x16_t, uint8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_s16))) mve_pred16_t vcmpeqq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_s16))) mve_pred16_t vcmpeqq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_s32))) mve_pred16_t vcmpeqq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_s32))) mve_pred16_t vcmpeqq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_s8))) mve_pred16_t vcmpeqq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_s8))) mve_pred16_t vcmpeqq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_u16))) mve_pred16_t vcmpeqq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_u16))) mve_pred16_t vcmpeqq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_u32))) mve_pred16_t vcmpeqq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_u32))) mve_pred16_t vcmpeqq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_u8))) mve_pred16_t vcmpeqq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_u8))) mve_pred16_t vcmpeqq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_n_s16))) mve_pred16_t vcmpgeq_m_n_s16(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_n_s16))) mve_pred16_t vcmpgeq_m(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_n_s32))) mve_pred16_t vcmpgeq_m_n_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_n_s32))) mve_pred16_t vcmpgeq_m(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_n_s8))) mve_pred16_t vcmpgeq_m_n_s8(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_n_s8))) mve_pred16_t vcmpgeq_m(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_s16))) mve_pred16_t vcmpgeq_m_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_s16))) mve_pred16_t vcmpgeq_m(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_s32))) mve_pred16_t vcmpgeq_m_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_s32))) mve_pred16_t vcmpgeq_m(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_s8))) mve_pred16_t vcmpgeq_m_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_s8))) mve_pred16_t vcmpgeq_m(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_n_s16))) mve_pred16_t vcmpgeq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_n_s16))) mve_pred16_t vcmpgeq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_n_s32))) mve_pred16_t vcmpgeq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_n_s32))) mve_pred16_t vcmpgeq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_n_s8))) mve_pred16_t vcmpgeq_n_s8(int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_n_s8))) mve_pred16_t vcmpgeq(int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_s16))) mve_pred16_t vcmpgeq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_s16))) mve_pred16_t vcmpgeq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_s32))) mve_pred16_t vcmpgeq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_s32))) mve_pred16_t vcmpgeq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_s8))) mve_pred16_t vcmpgeq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_s8))) mve_pred16_t vcmpgeq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_n_s16))) mve_pred16_t vcmpgtq_m_n_s16(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_n_s16))) mve_pred16_t vcmpgtq_m(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_n_s32))) mve_pred16_t vcmpgtq_m_n_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_n_s32))) mve_pred16_t vcmpgtq_m(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_n_s8))) mve_pred16_t vcmpgtq_m_n_s8(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_n_s8))) mve_pred16_t vcmpgtq_m(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_s16))) mve_pred16_t vcmpgtq_m_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_s16))) mve_pred16_t vcmpgtq_m(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_s32))) mve_pred16_t vcmpgtq_m_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_s32))) mve_pred16_t vcmpgtq_m(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_s8))) mve_pred16_t vcmpgtq_m_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_s8))) mve_pred16_t vcmpgtq_m(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_n_s16))) mve_pred16_t vcmpgtq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_n_s16))) mve_pred16_t vcmpgtq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_n_s32))) mve_pred16_t vcmpgtq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_n_s32))) mve_pred16_t vcmpgtq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_n_s8))) mve_pred16_t vcmpgtq_n_s8(int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_n_s8))) mve_pred16_t vcmpgtq(int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_s16))) mve_pred16_t vcmpgtq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_s16))) mve_pred16_t vcmpgtq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_s32))) mve_pred16_t vcmpgtq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_s32))) mve_pred16_t vcmpgtq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_s8))) mve_pred16_t vcmpgtq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_s8))) mve_pred16_t vcmpgtq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_m_n_u16))) mve_pred16_t vcmphiq_m_n_u16(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_m_n_u16))) mve_pred16_t vcmphiq_m(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_m_n_u32))) mve_pred16_t vcmphiq_m_n_u32(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_m_n_u32))) mve_pred16_t vcmphiq_m(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_m_n_u8))) mve_pred16_t vcmphiq_m_n_u8(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_m_n_u8))) mve_pred16_t vcmphiq_m(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_m_u16))) mve_pred16_t vcmphiq_m_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_m_u16))) mve_pred16_t vcmphiq_m(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_m_u32))) mve_pred16_t vcmphiq_m_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_m_u32))) mve_pred16_t vcmphiq_m(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_m_u8))) mve_pred16_t vcmphiq_m_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_m_u8))) mve_pred16_t vcmphiq_m(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_n_u16))) mve_pred16_t vcmphiq_n_u16(uint16x8_t, uint16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_n_u16))) mve_pred16_t vcmphiq(uint16x8_t, uint16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_n_u32))) mve_pred16_t vcmphiq_n_u32(uint32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_n_u32))) mve_pred16_t vcmphiq(uint32x4_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_n_u8))) mve_pred16_t vcmphiq_n_u8(uint8x16_t, uint8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_n_u8))) mve_pred16_t vcmphiq(uint8x16_t, uint8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_u16))) mve_pred16_t vcmphiq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_u16))) mve_pred16_t vcmphiq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_u32))) mve_pred16_t vcmphiq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_u32))) mve_pred16_t vcmphiq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_u8))) mve_pred16_t vcmphiq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_u8))) mve_pred16_t vcmphiq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_n_s16))) mve_pred16_t vcmpleq_m_n_s16(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_n_s16))) mve_pred16_t vcmpleq_m(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_n_s32))) mve_pred16_t vcmpleq_m_n_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_n_s32))) mve_pred16_t vcmpleq_m(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_n_s8))) mve_pred16_t vcmpleq_m_n_s8(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_n_s8))) mve_pred16_t vcmpleq_m(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_s16))) mve_pred16_t vcmpleq_m_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_s16))) mve_pred16_t vcmpleq_m(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_s32))) mve_pred16_t vcmpleq_m_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_s32))) mve_pred16_t vcmpleq_m(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_s8))) mve_pred16_t vcmpleq_m_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_s8))) mve_pred16_t vcmpleq_m(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_n_s16))) mve_pred16_t vcmpleq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_n_s16))) mve_pred16_t vcmpleq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_n_s32))) mve_pred16_t vcmpleq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_n_s32))) mve_pred16_t vcmpleq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_n_s8))) mve_pred16_t vcmpleq_n_s8(int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_n_s8))) mve_pred16_t vcmpleq(int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_s16))) mve_pred16_t vcmpleq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_s16))) mve_pred16_t vcmpleq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_s32))) mve_pred16_t vcmpleq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_s32))) mve_pred16_t vcmpleq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_s8))) mve_pred16_t vcmpleq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_s8))) mve_pred16_t vcmpleq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_n_s16))) mve_pred16_t vcmpltq_m_n_s16(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_n_s16))) mve_pred16_t vcmpltq_m(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_n_s32))) mve_pred16_t vcmpltq_m_n_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_n_s32))) mve_pred16_t vcmpltq_m(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_n_s8))) mve_pred16_t vcmpltq_m_n_s8(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_n_s8))) mve_pred16_t vcmpltq_m(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_s16))) mve_pred16_t vcmpltq_m_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_s16))) mve_pred16_t vcmpltq_m(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_s32))) mve_pred16_t vcmpltq_m_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_s32))) mve_pred16_t vcmpltq_m(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_s8))) mve_pred16_t vcmpltq_m_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_s8))) mve_pred16_t vcmpltq_m(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_n_s16))) mve_pred16_t vcmpltq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_n_s16))) mve_pred16_t vcmpltq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_n_s32))) mve_pred16_t vcmpltq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_n_s32))) mve_pred16_t vcmpltq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_n_s8))) mve_pred16_t vcmpltq_n_s8(int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_n_s8))) mve_pred16_t vcmpltq(int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_s16))) mve_pred16_t vcmpltq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_s16))) mve_pred16_t vcmpltq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_s32))) mve_pred16_t vcmpltq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_s32))) mve_pred16_t vcmpltq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_s8))) mve_pred16_t vcmpltq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_s8))) mve_pred16_t vcmpltq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_n_s16))) mve_pred16_t vcmpneq_m_n_s16(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_n_s16))) mve_pred16_t vcmpneq_m(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_n_s32))) mve_pred16_t vcmpneq_m_n_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_n_s32))) mve_pred16_t vcmpneq_m(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_n_s8))) mve_pred16_t vcmpneq_m_n_s8(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_n_s8))) mve_pred16_t vcmpneq_m(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_n_u16))) mve_pred16_t vcmpneq_m_n_u16(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_n_u16))) mve_pred16_t vcmpneq_m(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_n_u32))) mve_pred16_t vcmpneq_m_n_u32(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_n_u32))) mve_pred16_t vcmpneq_m(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_n_u8))) mve_pred16_t vcmpneq_m_n_u8(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_n_u8))) mve_pred16_t vcmpneq_m(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_s16))) mve_pred16_t vcmpneq_m_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_s16))) mve_pred16_t vcmpneq_m(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_s32))) mve_pred16_t vcmpneq_m_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_s32))) mve_pred16_t vcmpneq_m(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_s8))) mve_pred16_t vcmpneq_m_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_s8))) mve_pred16_t vcmpneq_m(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_u16))) mve_pred16_t vcmpneq_m_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_u16))) mve_pred16_t vcmpneq_m(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_u32))) mve_pred16_t vcmpneq_m_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_u32))) mve_pred16_t vcmpneq_m(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_u8))) mve_pred16_t vcmpneq_m_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_u8))) mve_pred16_t vcmpneq_m(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_n_s16))) mve_pred16_t vcmpneq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_n_s16))) mve_pred16_t vcmpneq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_n_s32))) mve_pred16_t vcmpneq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_n_s32))) mve_pred16_t vcmpneq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_n_s8))) mve_pred16_t vcmpneq_n_s8(int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_n_s8))) mve_pred16_t vcmpneq(int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_n_u16))) mve_pred16_t vcmpneq_n_u16(uint16x8_t, uint16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_n_u16))) mve_pred16_t vcmpneq(uint16x8_t, uint16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_n_u32))) mve_pred16_t vcmpneq_n_u32(uint32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_n_u32))) mve_pred16_t vcmpneq(uint32x4_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_n_u8))) mve_pred16_t vcmpneq_n_u8(uint8x16_t, uint8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_n_u8))) mve_pred16_t vcmpneq(uint8x16_t, uint8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_s16))) mve_pred16_t vcmpneq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_s16))) mve_pred16_t vcmpneq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_s32))) mve_pred16_t vcmpneq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_s32))) mve_pred16_t vcmpneq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_s8))) mve_pred16_t vcmpneq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_s8))) mve_pred16_t vcmpneq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_u16))) mve_pred16_t vcmpneq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_u16))) mve_pred16_t vcmpneq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_u32))) mve_pred16_t vcmpneq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_u32))) mve_pred16_t vcmpneq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_u8))) mve_pred16_t vcmpneq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_u8))) mve_pred16_t vcmpneq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcreateq_s16))) int16x8_t vcreateq_s16(uint64_t, uint64_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcreateq_s32))) int32x4_t vcreateq_s32(uint64_t, uint64_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcreateq_s64))) int64x2_t vcreateq_s64(uint64_t, uint64_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcreateq_s8))) int8x16_t vcreateq_s8(uint64_t, uint64_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcreateq_u16))) uint16x8_t vcreateq_u16(uint64_t, uint64_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcreateq_u32))) uint32x4_t vcreateq_u32(uint64_t, uint64_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcreateq_u64))) uint64x2_t vcreateq_u64(uint64_t, uint64_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcreateq_u8))) uint8x16_t vcreateq_u8(uint64_t, uint64_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vctp16q))) mve_pred16_t vctp16q(uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vctp16q_m))) mve_pred16_t vctp16q_m(uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vctp32q))) mve_pred16_t vctp32q(uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vctp32q_m))) mve_pred16_t vctp32q_m(uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vctp64q))) mve_pred16_t vctp64q(uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vctp64q_m))) mve_pred16_t vctp64q_m(uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vctp8q))) mve_pred16_t vctp8q(uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vctp8q_m))) mve_pred16_t vctp8q_m(uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vddupq_m_n_u16))) uint16x8_t vddupq_m_n_u16(uint16x8_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vddupq_m_n_u16))) uint16x8_t vddupq_m(uint16x8_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vddupq_m_n_u32))) uint32x4_t vddupq_m_n_u32(uint32x4_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vddupq_m_n_u32))) uint32x4_t vddupq_m(uint32x4_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vddupq_m_n_u8))) uint8x16_t vddupq_m_n_u8(uint8x16_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vddupq_m_n_u8))) uint8x16_t vddupq_m(uint8x16_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vddupq_m_wb_u16))) uint16x8_t vddupq_m_wb_u16(uint16x8_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vddupq_m_wb_u16))) uint16x8_t vddupq_m(uint16x8_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vddupq_m_wb_u32))) uint32x4_t vddupq_m_wb_u32(uint32x4_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vddupq_m_wb_u32))) uint32x4_t vddupq_m(uint32x4_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vddupq_m_wb_u8))) uint8x16_t vddupq_m_wb_u8(uint8x16_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vddupq_m_wb_u8))) uint8x16_t vddupq_m(uint8x16_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vddupq_n_u16))) uint16x8_t vddupq_n_u16(uint32_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vddupq_n_u16))) uint16x8_t vddupq_u16(uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vddupq_n_u32))) uint32x4_t vddupq_n_u32(uint32_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vddupq_n_u32))) uint32x4_t vddupq_u32(uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vddupq_n_u8))) uint8x16_t vddupq_n_u8(uint32_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vddupq_n_u8))) uint8x16_t vddupq_u8(uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vddupq_wb_u16))) uint16x8_t vddupq_wb_u16(uint32_t *, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vddupq_wb_u16))) uint16x8_t vddupq_u16(uint32_t *, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vddupq_wb_u32))) uint32x4_t vddupq_wb_u32(uint32_t *, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vddupq_wb_u32))) uint32x4_t vddupq_u32(uint32_t *, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vddupq_wb_u8))) uint8x16_t vddupq_wb_u8(uint32_t *, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vddupq_wb_u8))) uint8x16_t vddupq_u8(uint32_t *, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vddupq_x_n_u16))) uint16x8_t vddupq_x_n_u16(uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vddupq_x_n_u16))) uint16x8_t vddupq_x_u16(uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vddupq_x_n_u32))) uint32x4_t vddupq_x_n_u32(uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vddupq_x_n_u32))) uint32x4_t vddupq_x_u32(uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vddupq_x_n_u8))) uint8x16_t vddupq_x_n_u8(uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vddupq_x_n_u8))) uint8x16_t vddupq_x_u8(uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vddupq_x_wb_u16))) uint16x8_t vddupq_x_wb_u16(uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vddupq_x_wb_u16))) uint16x8_t vddupq_x_u16(uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vddupq_x_wb_u32))) uint32x4_t vddupq_x_wb_u32(uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vddupq_x_wb_u32))) uint32x4_t vddupq_x_u32(uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vddupq_x_wb_u8))) uint8x16_t vddupq_x_wb_u8(uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vddupq_x_wb_u8))) uint8x16_t vddupq_x_u8(uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_m_n_s16))) int16x8_t vdupq_m_n_s16(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdupq_m_n_s16))) int16x8_t vdupq_m(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_m_n_s32))) int32x4_t vdupq_m_n_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdupq_m_n_s32))) int32x4_t vdupq_m(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_m_n_s8))) int8x16_t vdupq_m_n_s8(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdupq_m_n_s8))) int8x16_t vdupq_m(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_m_n_u16))) uint16x8_t vdupq_m_n_u16(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdupq_m_n_u16))) uint16x8_t vdupq_m(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_m_n_u32))) uint32x4_t vdupq_m_n_u32(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdupq_m_n_u32))) uint32x4_t vdupq_m(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_m_n_u8))) uint8x16_t vdupq_m_n_u8(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdupq_m_n_u8))) uint8x16_t vdupq_m(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_n_s16))) int16x8_t vdupq_n_s16(int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_n_s32))) int32x4_t vdupq_n_s32(int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_n_s8))) int8x16_t vdupq_n_s8(int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_n_u16))) uint16x8_t vdupq_n_u16(uint16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_n_u32))) uint32x4_t vdupq_n_u32(uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_n_u8))) uint8x16_t vdupq_n_u8(uint8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_x_n_s16))) int16x8_t vdupq_x_n_s16(int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_x_n_s32))) int32x4_t vdupq_x_n_s32(int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_x_n_s8))) int8x16_t vdupq_x_n_s8(int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_x_n_u16))) uint16x8_t vdupq_x_n_u16(uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_x_n_u32))) uint32x4_t vdupq_x_n_u32(uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_x_n_u8))) uint8x16_t vdupq_x_n_u8(uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_m_n_u16))) uint16x8_t vdwdupq_m_n_u16(uint16x8_t, uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_m_n_u16))) uint16x8_t vdwdupq_m(uint16x8_t, uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_m_n_u32))) uint32x4_t vdwdupq_m_n_u32(uint32x4_t, uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_m_n_u32))) uint32x4_t vdwdupq_m(uint32x4_t, uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_m_n_u8))) uint8x16_t vdwdupq_m_n_u8(uint8x16_t, uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_m_n_u8))) uint8x16_t vdwdupq_m(uint8x16_t, uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_m_wb_u16))) uint16x8_t vdwdupq_m_wb_u16(uint16x8_t, uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_m_wb_u16))) uint16x8_t vdwdupq_m(uint16x8_t, uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_m_wb_u32))) uint32x4_t vdwdupq_m_wb_u32(uint32x4_t, uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_m_wb_u32))) uint32x4_t vdwdupq_m(uint32x4_t, uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_m_wb_u8))) uint8x16_t vdwdupq_m_wb_u8(uint8x16_t, uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_m_wb_u8))) uint8x16_t vdwdupq_m(uint8x16_t, uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_n_u16))) uint16x8_t vdwdupq_n_u16(uint32_t, uint32_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_n_u16))) uint16x8_t vdwdupq_u16(uint32_t, uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_n_u32))) uint32x4_t vdwdupq_n_u32(uint32_t, uint32_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_n_u32))) uint32x4_t vdwdupq_u32(uint32_t, uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_n_u8))) uint8x16_t vdwdupq_n_u8(uint32_t, uint32_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_n_u8))) uint8x16_t vdwdupq_u8(uint32_t, uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_wb_u16))) uint16x8_t vdwdupq_wb_u16(uint32_t *, uint32_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_wb_u16))) uint16x8_t vdwdupq_u16(uint32_t *, uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_wb_u32))) uint32x4_t vdwdupq_wb_u32(uint32_t *, uint32_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_wb_u32))) uint32x4_t vdwdupq_u32(uint32_t *, uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_wb_u8))) uint8x16_t vdwdupq_wb_u8(uint32_t *, uint32_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_wb_u8))) uint8x16_t vdwdupq_u8(uint32_t *, uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_x_n_u16))) uint16x8_t vdwdupq_x_n_u16(uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_x_n_u16))) uint16x8_t vdwdupq_x_u16(uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_x_n_u32))) uint32x4_t vdwdupq_x_n_u32(uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_x_n_u32))) uint32x4_t vdwdupq_x_u32(uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_x_n_u8))) uint8x16_t vdwdupq_x_n_u8(uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_x_n_u8))) uint8x16_t vdwdupq_x_u8(uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_x_wb_u16))) uint16x8_t vdwdupq_x_wb_u16(uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_x_wb_u16))) uint16x8_t vdwdupq_x_u16(uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_x_wb_u32))) uint32x4_t vdwdupq_x_wb_u32(uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_x_wb_u32))) uint32x4_t vdwdupq_x_u32(uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_x_wb_u8))) uint8x16_t vdwdupq_x_wb_u8(uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_x_wb_u8))) uint8x16_t vdwdupq_x_u8(uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_m_s16))) int16x8_t veorq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_m_s16))) int16x8_t veorq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_m_s32))) int32x4_t veorq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_m_s32))) int32x4_t veorq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_m_s8))) int8x16_t veorq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_m_s8))) int8x16_t veorq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_m_u16))) uint16x8_t veorq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_m_u16))) uint16x8_t veorq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_m_u32))) uint32x4_t veorq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_m_u32))) uint32x4_t veorq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_m_u8))) uint8x16_t veorq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_m_u8))) uint8x16_t veorq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_s16))) int16x8_t veorq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_s16))) int16x8_t veorq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_s32))) int32x4_t veorq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_s32))) int32x4_t veorq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_s8))) int8x16_t veorq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_s8))) int8x16_t veorq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_u16))) uint16x8_t veorq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_u16))) uint16x8_t veorq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_u32))) uint32x4_t veorq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_u32))) uint32x4_t veorq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_u8))) uint8x16_t veorq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_u8))) uint8x16_t veorq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_x_s16))) int16x8_t veorq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_x_s16))) int16x8_t veorq_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_x_s32))) int32x4_t veorq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_x_s32))) int32x4_t veorq_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_x_s8))) int8x16_t veorq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_x_s8))) int8x16_t veorq_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_x_u16))) uint16x8_t veorq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_x_u16))) uint16x8_t veorq_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_x_u32))) uint32x4_t veorq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_x_u32))) uint32x4_t veorq_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_x_u8))) uint8x16_t veorq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_x_u8))) uint8x16_t veorq_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_s16))) int16_t vgetq_lane_s16(int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_s16))) int16_t vgetq_lane(int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_s32))) int32_t vgetq_lane_s32(int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_s32))) int32_t vgetq_lane(int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_s64))) int64_t vgetq_lane_s64(int64x2_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_s64))) int64_t vgetq_lane(int64x2_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_s8))) int8_t vgetq_lane_s8(int8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_s8))) int8_t vgetq_lane(int8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_u16))) uint16_t vgetq_lane_u16(uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_u16))) uint16_t vgetq_lane(uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_u32))) uint32_t vgetq_lane_u32(uint32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_u32))) uint32_t vgetq_lane(uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_u64))) uint64_t vgetq_lane_u64(uint64x2_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_u64))) uint64_t vgetq_lane(uint64x2_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_u8))) uint8_t vgetq_lane_u8(uint8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_u8))) uint8_t vgetq_lane(uint8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_n_s16))) int16x8_t vhaddq_m_n_s16(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_n_s16))) int16x8_t vhaddq_m(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_n_s32))) int32x4_t vhaddq_m_n_s32(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_n_s32))) int32x4_t vhaddq_m(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_n_s8))) int8x16_t vhaddq_m_n_s8(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_n_s8))) int8x16_t vhaddq_m(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_n_u16))) uint16x8_t vhaddq_m_n_u16(uint16x8_t, uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_n_u16))) uint16x8_t vhaddq_m(uint16x8_t, uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_n_u32))) uint32x4_t vhaddq_m_n_u32(uint32x4_t, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_n_u32))) uint32x4_t vhaddq_m(uint32x4_t, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_n_u8))) uint8x16_t vhaddq_m_n_u8(uint8x16_t, uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_n_u8))) uint8x16_t vhaddq_m(uint8x16_t, uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_s16))) int16x8_t vhaddq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_s16))) int16x8_t vhaddq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_s32))) int32x4_t vhaddq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_s32))) int32x4_t vhaddq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_s8))) int8x16_t vhaddq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_s8))) int8x16_t vhaddq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_u16))) uint16x8_t vhaddq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_u16))) uint16x8_t vhaddq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_u32))) uint32x4_t vhaddq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_u32))) uint32x4_t vhaddq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_u8))) uint8x16_t vhaddq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_u8))) uint8x16_t vhaddq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_n_s16))) int16x8_t vhaddq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_n_s16))) int16x8_t vhaddq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_n_s32))) int32x4_t vhaddq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_n_s32))) int32x4_t vhaddq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_n_s8))) int8x16_t vhaddq_n_s8(int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_n_s8))) int8x16_t vhaddq(int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_n_u16))) uint16x8_t vhaddq_n_u16(uint16x8_t, uint16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_n_u16))) uint16x8_t vhaddq(uint16x8_t, uint16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_n_u32))) uint32x4_t vhaddq_n_u32(uint32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_n_u32))) uint32x4_t vhaddq(uint32x4_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_n_u8))) uint8x16_t vhaddq_n_u8(uint8x16_t, uint8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_n_u8))) uint8x16_t vhaddq(uint8x16_t, uint8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_s16))) int16x8_t vhaddq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_s16))) int16x8_t vhaddq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_s32))) int32x4_t vhaddq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_s32))) int32x4_t vhaddq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_s8))) int8x16_t vhaddq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_s8))) int8x16_t vhaddq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_u16))) uint16x8_t vhaddq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_u16))) uint16x8_t vhaddq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_u32))) uint32x4_t vhaddq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_u32))) uint32x4_t vhaddq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_u8))) uint8x16_t vhaddq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_u8))) uint8x16_t vhaddq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_n_s16))) int16x8_t vhaddq_x_n_s16(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_n_s16))) int16x8_t vhaddq_x(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_n_s32))) int32x4_t vhaddq_x_n_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_n_s32))) int32x4_t vhaddq_x(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_n_s8))) int8x16_t vhaddq_x_n_s8(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_n_s8))) int8x16_t vhaddq_x(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_n_u16))) uint16x8_t vhaddq_x_n_u16(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_n_u16))) uint16x8_t vhaddq_x(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_n_u32))) uint32x4_t vhaddq_x_n_u32(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_n_u32))) uint32x4_t vhaddq_x(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_n_u8))) uint8x16_t vhaddq_x_n_u8(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_n_u8))) uint8x16_t vhaddq_x(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_s16))) int16x8_t vhaddq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_s16))) int16x8_t vhaddq_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_s32))) int32x4_t vhaddq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_s32))) int32x4_t vhaddq_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_s8))) int8x16_t vhaddq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_s8))) int8x16_t vhaddq_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_u16))) uint16x8_t vhaddq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_u16))) uint16x8_t vhaddq_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_u32))) uint32x4_t vhaddq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_u32))) uint32x4_t vhaddq_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_u8))) uint8x16_t vhaddq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_u8))) uint8x16_t vhaddq_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot270_m_s16))) int16x8_t vhcaddq_rot270_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot270_m_s16))) int16x8_t vhcaddq_rot270_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot270_m_s32))) int32x4_t vhcaddq_rot270_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot270_m_s32))) int32x4_t vhcaddq_rot270_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot270_m_s8))) int8x16_t vhcaddq_rot270_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot270_m_s8))) int8x16_t vhcaddq_rot270_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot270_s16))) int16x8_t vhcaddq_rot270_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot270_s16))) int16x8_t vhcaddq_rot270(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot270_s32))) int32x4_t vhcaddq_rot270_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot270_s32))) int32x4_t vhcaddq_rot270(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot270_s8))) int8x16_t vhcaddq_rot270_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot270_s8))) int8x16_t vhcaddq_rot270(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot270_x_s16))) int16x8_t vhcaddq_rot270_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot270_x_s16))) int16x8_t vhcaddq_rot270_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot270_x_s32))) int32x4_t vhcaddq_rot270_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot270_x_s32))) int32x4_t vhcaddq_rot270_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot270_x_s8))) int8x16_t vhcaddq_rot270_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot270_x_s8))) int8x16_t vhcaddq_rot270_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot90_m_s16))) int16x8_t vhcaddq_rot90_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot90_m_s16))) int16x8_t vhcaddq_rot90_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot90_m_s32))) int32x4_t vhcaddq_rot90_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot90_m_s32))) int32x4_t vhcaddq_rot90_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot90_m_s8))) int8x16_t vhcaddq_rot90_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot90_m_s8))) int8x16_t vhcaddq_rot90_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot90_s16))) int16x8_t vhcaddq_rot90_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot90_s16))) int16x8_t vhcaddq_rot90(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot90_s32))) int32x4_t vhcaddq_rot90_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot90_s32))) int32x4_t vhcaddq_rot90(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot90_s8))) int8x16_t vhcaddq_rot90_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot90_s8))) int8x16_t vhcaddq_rot90(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot90_x_s16))) int16x8_t vhcaddq_rot90_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot90_x_s16))) int16x8_t vhcaddq_rot90_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot90_x_s32))) int32x4_t vhcaddq_rot90_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot90_x_s32))) int32x4_t vhcaddq_rot90_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot90_x_s8))) int8x16_t vhcaddq_rot90_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot90_x_s8))) int8x16_t vhcaddq_rot90_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_n_s16))) int16x8_t vhsubq_m_n_s16(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_n_s16))) int16x8_t vhsubq_m(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_n_s32))) int32x4_t vhsubq_m_n_s32(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_n_s32))) int32x4_t vhsubq_m(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_n_s8))) int8x16_t vhsubq_m_n_s8(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_n_s8))) int8x16_t vhsubq_m(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_n_u16))) uint16x8_t vhsubq_m_n_u16(uint16x8_t, uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_n_u16))) uint16x8_t vhsubq_m(uint16x8_t, uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_n_u32))) uint32x4_t vhsubq_m_n_u32(uint32x4_t, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_n_u32))) uint32x4_t vhsubq_m(uint32x4_t, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_n_u8))) uint8x16_t vhsubq_m_n_u8(uint8x16_t, uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_n_u8))) uint8x16_t vhsubq_m(uint8x16_t, uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_s16))) int16x8_t vhsubq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_s16))) int16x8_t vhsubq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_s32))) int32x4_t vhsubq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_s32))) int32x4_t vhsubq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_s8))) int8x16_t vhsubq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_s8))) int8x16_t vhsubq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_u16))) uint16x8_t vhsubq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_u16))) uint16x8_t vhsubq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_u32))) uint32x4_t vhsubq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_u32))) uint32x4_t vhsubq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_u8))) uint8x16_t vhsubq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_u8))) uint8x16_t vhsubq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_n_s16))) int16x8_t vhsubq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_n_s16))) int16x8_t vhsubq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_n_s32))) int32x4_t vhsubq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_n_s32))) int32x4_t vhsubq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_n_s8))) int8x16_t vhsubq_n_s8(int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_n_s8))) int8x16_t vhsubq(int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_n_u16))) uint16x8_t vhsubq_n_u16(uint16x8_t, uint16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_n_u16))) uint16x8_t vhsubq(uint16x8_t, uint16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_n_u32))) uint32x4_t vhsubq_n_u32(uint32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_n_u32))) uint32x4_t vhsubq(uint32x4_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_n_u8))) uint8x16_t vhsubq_n_u8(uint8x16_t, uint8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_n_u8))) uint8x16_t vhsubq(uint8x16_t, uint8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_s16))) int16x8_t vhsubq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_s16))) int16x8_t vhsubq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_s32))) int32x4_t vhsubq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_s32))) int32x4_t vhsubq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_s8))) int8x16_t vhsubq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_s8))) int8x16_t vhsubq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_u16))) uint16x8_t vhsubq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_u16))) uint16x8_t vhsubq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_u32))) uint32x4_t vhsubq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_u32))) uint32x4_t vhsubq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_u8))) uint8x16_t vhsubq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_u8))) uint8x16_t vhsubq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_n_s16))) int16x8_t vhsubq_x_n_s16(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_n_s16))) int16x8_t vhsubq_x(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_n_s32))) int32x4_t vhsubq_x_n_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_n_s32))) int32x4_t vhsubq_x(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_n_s8))) int8x16_t vhsubq_x_n_s8(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_n_s8))) int8x16_t vhsubq_x(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_n_u16))) uint16x8_t vhsubq_x_n_u16(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_n_u16))) uint16x8_t vhsubq_x(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_n_u32))) uint32x4_t vhsubq_x_n_u32(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_n_u32))) uint32x4_t vhsubq_x(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_n_u8))) uint8x16_t vhsubq_x_n_u8(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_n_u8))) uint8x16_t vhsubq_x(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_s16))) int16x8_t vhsubq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_s16))) int16x8_t vhsubq_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_s32))) int32x4_t vhsubq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_s32))) int32x4_t vhsubq_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_s8))) int8x16_t vhsubq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_s8))) int8x16_t vhsubq_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_u16))) uint16x8_t vhsubq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_u16))) uint16x8_t vhsubq_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_u32))) uint32x4_t vhsubq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_u32))) uint32x4_t vhsubq_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_u8))) uint8x16_t vhsubq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_u8))) uint8x16_t vhsubq_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vidupq_m_n_u16))) uint16x8_t vidupq_m_n_u16(uint16x8_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vidupq_m_n_u16))) uint16x8_t vidupq_m(uint16x8_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vidupq_m_n_u32))) uint32x4_t vidupq_m_n_u32(uint32x4_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vidupq_m_n_u32))) uint32x4_t vidupq_m(uint32x4_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vidupq_m_n_u8))) uint8x16_t vidupq_m_n_u8(uint8x16_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vidupq_m_n_u8))) uint8x16_t vidupq_m(uint8x16_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vidupq_m_wb_u16))) uint16x8_t vidupq_m_wb_u16(uint16x8_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vidupq_m_wb_u16))) uint16x8_t vidupq_m(uint16x8_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vidupq_m_wb_u32))) uint32x4_t vidupq_m_wb_u32(uint32x4_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vidupq_m_wb_u32))) uint32x4_t vidupq_m(uint32x4_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vidupq_m_wb_u8))) uint8x16_t vidupq_m_wb_u8(uint8x16_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vidupq_m_wb_u8))) uint8x16_t vidupq_m(uint8x16_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vidupq_n_u16))) uint16x8_t vidupq_n_u16(uint32_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vidupq_n_u16))) uint16x8_t vidupq_u16(uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vidupq_n_u32))) uint32x4_t vidupq_n_u32(uint32_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vidupq_n_u32))) uint32x4_t vidupq_u32(uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vidupq_n_u8))) uint8x16_t vidupq_n_u8(uint32_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vidupq_n_u8))) uint8x16_t vidupq_u8(uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vidupq_wb_u16))) uint16x8_t vidupq_wb_u16(uint32_t *, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vidupq_wb_u16))) uint16x8_t vidupq_u16(uint32_t *, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vidupq_wb_u32))) uint32x4_t vidupq_wb_u32(uint32_t *, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vidupq_wb_u32))) uint32x4_t vidupq_u32(uint32_t *, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vidupq_wb_u8))) uint8x16_t vidupq_wb_u8(uint32_t *, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vidupq_wb_u8))) uint8x16_t vidupq_u8(uint32_t *, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vidupq_x_n_u16))) uint16x8_t vidupq_x_n_u16(uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vidupq_x_n_u16))) uint16x8_t vidupq_x_u16(uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vidupq_x_n_u32))) uint32x4_t vidupq_x_n_u32(uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vidupq_x_n_u32))) uint32x4_t vidupq_x_u32(uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vidupq_x_n_u8))) uint8x16_t vidupq_x_n_u8(uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vidupq_x_n_u8))) uint8x16_t vidupq_x_u8(uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vidupq_x_wb_u16))) uint16x8_t vidupq_x_wb_u16(uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vidupq_x_wb_u16))) uint16x8_t vidupq_x_u16(uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vidupq_x_wb_u32))) uint32x4_t vidupq_x_wb_u32(uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vidupq_x_wb_u32))) uint32x4_t vidupq_x_u32(uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vidupq_x_wb_u8))) uint8x16_t vidupq_x_wb_u8(uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vidupq_x_wb_u8))) uint8x16_t vidupq_x_u8(uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_m_n_u16))) uint16x8_t viwdupq_m_n_u16(uint16x8_t, uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_m_n_u16))) uint16x8_t viwdupq_m(uint16x8_t, uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_m_n_u32))) uint32x4_t viwdupq_m_n_u32(uint32x4_t, uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_m_n_u32))) uint32x4_t viwdupq_m(uint32x4_t, uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_m_n_u8))) uint8x16_t viwdupq_m_n_u8(uint8x16_t, uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_m_n_u8))) uint8x16_t viwdupq_m(uint8x16_t, uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_m_wb_u16))) uint16x8_t viwdupq_m_wb_u16(uint16x8_t, uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_m_wb_u16))) uint16x8_t viwdupq_m(uint16x8_t, uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_m_wb_u32))) uint32x4_t viwdupq_m_wb_u32(uint32x4_t, uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_m_wb_u32))) uint32x4_t viwdupq_m(uint32x4_t, uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_m_wb_u8))) uint8x16_t viwdupq_m_wb_u8(uint8x16_t, uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_m_wb_u8))) uint8x16_t viwdupq_m(uint8x16_t, uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_n_u16))) uint16x8_t viwdupq_n_u16(uint32_t, uint32_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_n_u16))) uint16x8_t viwdupq_u16(uint32_t, uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_n_u32))) uint32x4_t viwdupq_n_u32(uint32_t, uint32_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_n_u32))) uint32x4_t viwdupq_u32(uint32_t, uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_n_u8))) uint8x16_t viwdupq_n_u8(uint32_t, uint32_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_n_u8))) uint8x16_t viwdupq_u8(uint32_t, uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_wb_u16))) uint16x8_t viwdupq_wb_u16(uint32_t *, uint32_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_wb_u16))) uint16x8_t viwdupq_u16(uint32_t *, uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_wb_u32))) uint32x4_t viwdupq_wb_u32(uint32_t *, uint32_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_wb_u32))) uint32x4_t viwdupq_u32(uint32_t *, uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_wb_u8))) uint8x16_t viwdupq_wb_u8(uint32_t *, uint32_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_wb_u8))) uint8x16_t viwdupq_u8(uint32_t *, uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_x_n_u16))) uint16x8_t viwdupq_x_n_u16(uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_x_n_u16))) uint16x8_t viwdupq_x_u16(uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_x_n_u32))) uint32x4_t viwdupq_x_n_u32(uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_x_n_u32))) uint32x4_t viwdupq_x_u32(uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_x_n_u8))) uint8x16_t viwdupq_x_n_u8(uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_x_n_u8))) uint8x16_t viwdupq_x_u8(uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_x_wb_u16))) uint16x8_t viwdupq_x_wb_u16(uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_x_wb_u16))) uint16x8_t viwdupq_x_u16(uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_x_wb_u32))) uint32x4_t viwdupq_x_wb_u32(uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_x_wb_u32))) uint32x4_t viwdupq_x_u32(uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_x_wb_u8))) uint8x16_t viwdupq_x_wb_u8(uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_x_wb_u8))) uint8x16_t viwdupq_x_u8(uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld1q_s16))) int16x8_t vld1q_s16(const int16_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld1q_s16))) int16x8_t vld1q(const int16_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld1q_s32))) int32x4_t vld1q_s32(const int32_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld1q_s32))) int32x4_t vld1q(const int32_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld1q_s8))) int8x16_t vld1q_s8(const int8_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld1q_s8))) int8x16_t vld1q(const int8_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld1q_u16))) uint16x8_t vld1q_u16(const uint16_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld1q_u16))) uint16x8_t vld1q(const uint16_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld1q_u32))) uint32x4_t vld1q_u32(const uint32_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld1q_u32))) uint32x4_t vld1q(const uint32_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld1q_u8))) uint8x16_t vld1q_u8(const uint8_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld1q_u8))) uint8x16_t vld1q(const uint8_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld1q_z_s16))) int16x8_t vld1q_z_s16(const int16_t *, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld1q_z_s16))) int16x8_t vld1q_z(const int16_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld1q_z_s32))) int32x4_t vld1q_z_s32(const int32_t *, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld1q_z_s32))) int32x4_t vld1q_z(const int32_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld1q_z_s8))) int8x16_t vld1q_z_s8(const int8_t *, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld1q_z_s8))) int8x16_t vld1q_z(const int8_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld1q_z_u16))) uint16x8_t vld1q_z_u16(const uint16_t *, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld1q_z_u16))) uint16x8_t vld1q_z(const uint16_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld1q_z_u32))) uint32x4_t vld1q_z_u32(const uint32_t *, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld1q_z_u32))) uint32x4_t vld1q_z(const uint32_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld1q_z_u8))) uint8x16_t vld1q_z_u8(const uint8_t *, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld1q_z_u8))) uint8x16_t vld1q_z(const uint8_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld2q_s16))) int16x8x2_t vld2q_s16(const int16_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld2q_s16))) int16x8x2_t vld2q(const int16_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld2q_s32))) int32x4x2_t vld2q_s32(const int32_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld2q_s32))) int32x4x2_t vld2q(const int32_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld2q_s8))) int8x16x2_t vld2q_s8(const int8_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld2q_s8))) int8x16x2_t vld2q(const int8_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld2q_u16))) uint16x8x2_t vld2q_u16(const uint16_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld2q_u16))) uint16x8x2_t vld2q(const uint16_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld2q_u32))) uint32x4x2_t vld2q_u32(const uint32_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld2q_u32))) uint32x4x2_t vld2q(const uint32_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld2q_u8))) uint8x16x2_t vld2q_u8(const uint8_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld2q_u8))) uint8x16x2_t vld2q(const uint8_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld4q_s16))) int16x8x4_t vld4q_s16(const int16_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld4q_s16))) int16x8x4_t vld4q(const int16_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld4q_s32))) int32x4x4_t vld4q_s32(const int32_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld4q_s32))) int32x4x4_t vld4q(const int32_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld4q_s8))) int8x16x4_t vld4q_s8(const int8_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld4q_s8))) int8x16x4_t vld4q(const int8_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld4q_u16))) uint16x8x4_t vld4q_u16(const uint16_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld4q_u16))) uint16x8x4_t vld4q(const uint16_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld4q_u32))) uint32x4x4_t vld4q_u32(const uint32_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld4q_u32))) uint32x4x4_t vld4q(const uint32_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld4q_u8))) uint8x16x4_t vld4q_u8(const uint8_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld4q_u8))) uint8x16x4_t vld4q(const uint8_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_s16))) int16x8_t vldrbq_gather_offset_s16(const int8_t *, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_s16))) int16x8_t vldrbq_gather_offset(const int8_t *, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_s32))) int32x4_t vldrbq_gather_offset_s32(const int8_t *, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_s32))) int32x4_t vldrbq_gather_offset(const int8_t *, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_s8))) int8x16_t vldrbq_gather_offset_s8(const int8_t *, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_s8))) int8x16_t vldrbq_gather_offset(const int8_t *, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_u16))) uint16x8_t vldrbq_gather_offset_u16(const uint8_t *, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_u16))) uint16x8_t vldrbq_gather_offset(const uint8_t *, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_u32))) uint32x4_t vldrbq_gather_offset_u32(const uint8_t *, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_u32))) uint32x4_t vldrbq_gather_offset(const uint8_t *, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_u8))) uint8x16_t vldrbq_gather_offset_u8(const uint8_t *, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_u8))) uint8x16_t vldrbq_gather_offset(const uint8_t *, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_z_s16))) int16x8_t vldrbq_gather_offset_z_s16(const int8_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_z_s16))) int16x8_t vldrbq_gather_offset_z(const int8_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_z_s32))) int32x4_t vldrbq_gather_offset_z_s32(const int8_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_z_s32))) int32x4_t vldrbq_gather_offset_z(const int8_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_z_s8))) int8x16_t vldrbq_gather_offset_z_s8(const int8_t *, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_z_s8))) int8x16_t vldrbq_gather_offset_z(const int8_t *, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_z_u16))) uint16x8_t vldrbq_gather_offset_z_u16(const uint8_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_z_u16))) uint16x8_t vldrbq_gather_offset_z(const uint8_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_z_u32))) uint32x4_t vldrbq_gather_offset_z_u32(const uint8_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_z_u32))) uint32x4_t vldrbq_gather_offset_z(const uint8_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_z_u8))) uint8x16_t vldrbq_gather_offset_z_u8(const uint8_t *, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_z_u8))) uint8x16_t vldrbq_gather_offset_z(const uint8_t *, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_s16))) int16x8_t vldrbq_s16(const int8_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_s32))) int32x4_t vldrbq_s32(const int8_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_s8))) int8x16_t vldrbq_s8(const int8_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_u16))) uint16x8_t vldrbq_u16(const uint8_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_u32))) uint32x4_t vldrbq_u32(const uint8_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_u8))) uint8x16_t vldrbq_u8(const uint8_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_z_s16))) int16x8_t vldrbq_z_s16(const int8_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_z_s32))) int32x4_t vldrbq_z_s32(const int8_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_z_s8))) int8x16_t vldrbq_z_s8(const int8_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_z_u16))) uint16x8_t vldrbq_z_u16(const uint8_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_z_u32))) uint32x4_t vldrbq_z_u32(const uint8_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_z_u8))) uint8x16_t vldrbq_z_u8(const uint8_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_base_s64))) int64x2_t vldrdq_gather_base_s64(uint64x2_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_base_u64))) uint64x2_t vldrdq_gather_base_u64(uint64x2_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_base_wb_s64))) int64x2_t vldrdq_gather_base_wb_s64(uint64x2_t *, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_base_wb_u64))) uint64x2_t vldrdq_gather_base_wb_u64(uint64x2_t *, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_base_wb_z_s64))) int64x2_t vldrdq_gather_base_wb_z_s64(uint64x2_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_base_wb_z_u64))) uint64x2_t vldrdq_gather_base_wb_z_u64(uint64x2_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_base_z_s64))) int64x2_t vldrdq_gather_base_z_s64(uint64x2_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_base_z_u64))) uint64x2_t vldrdq_gather_base_z_u64(uint64x2_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_offset_s64))) int64x2_t vldrdq_gather_offset_s64(const int64_t *, uint64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_offset_s64))) int64x2_t vldrdq_gather_offset(const int64_t *, uint64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_offset_u64))) uint64x2_t vldrdq_gather_offset_u64(const uint64_t *, uint64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_offset_u64))) uint64x2_t vldrdq_gather_offset(const uint64_t *, uint64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_offset_z_s64))) int64x2_t vldrdq_gather_offset_z_s64(const int64_t *, uint64x2_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_offset_z_s64))) int64x2_t vldrdq_gather_offset_z(const int64_t *, uint64x2_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_offset_z_u64))) uint64x2_t vldrdq_gather_offset_z_u64(const uint64_t *, uint64x2_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_offset_z_u64))) uint64x2_t vldrdq_gather_offset_z(const uint64_t *, uint64x2_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_shifted_offset_s64))) int64x2_t vldrdq_gather_shifted_offset_s64(const int64_t *, uint64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_shifted_offset_s64))) int64x2_t vldrdq_gather_shifted_offset(const int64_t *, uint64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_shifted_offset_u64))) uint64x2_t vldrdq_gather_shifted_offset_u64(const uint64_t *, uint64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_shifted_offset_u64))) uint64x2_t vldrdq_gather_shifted_offset(const uint64_t *, uint64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_shifted_offset_z_s64))) int64x2_t vldrdq_gather_shifted_offset_z_s64(const int64_t *, uint64x2_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_shifted_offset_z_s64))) int64x2_t vldrdq_gather_shifted_offset_z(const int64_t *, uint64x2_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_shifted_offset_z_u64))) uint64x2_t vldrdq_gather_shifted_offset_z_u64(const uint64_t *, uint64x2_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_shifted_offset_z_u64))) uint64x2_t vldrdq_gather_shifted_offset_z(const uint64_t *, uint64x2_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_s16))) int16x8_t vldrhq_gather_offset_s16(const int16_t *, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_s16))) int16x8_t vldrhq_gather_offset(const int16_t *, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_s32))) int32x4_t vldrhq_gather_offset_s32(const int16_t *, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_s32))) int32x4_t vldrhq_gather_offset(const int16_t *, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_u16))) uint16x8_t vldrhq_gather_offset_u16(const uint16_t *, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_u16))) uint16x8_t vldrhq_gather_offset(const uint16_t *, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_u32))) uint32x4_t vldrhq_gather_offset_u32(const uint16_t *, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_u32))) uint32x4_t vldrhq_gather_offset(const uint16_t *, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_z_s16))) int16x8_t vldrhq_gather_offset_z_s16(const int16_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_z_s16))) int16x8_t vldrhq_gather_offset_z(const int16_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_z_s32))) int32x4_t vldrhq_gather_offset_z_s32(const int16_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_z_s32))) int32x4_t vldrhq_gather_offset_z(const int16_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_z_u16))) uint16x8_t vldrhq_gather_offset_z_u16(const uint16_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_z_u16))) uint16x8_t vldrhq_gather_offset_z(const uint16_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_z_u32))) uint32x4_t vldrhq_gather_offset_z_u32(const uint16_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_z_u32))) uint32x4_t vldrhq_gather_offset_z(const uint16_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_s16))) int16x8_t vldrhq_gather_shifted_offset_s16(const int16_t *, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_s16))) int16x8_t vldrhq_gather_shifted_offset(const int16_t *, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_s32))) int32x4_t vldrhq_gather_shifted_offset_s32(const int16_t *, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_s32))) int32x4_t vldrhq_gather_shifted_offset(const int16_t *, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_u16))) uint16x8_t vldrhq_gather_shifted_offset_u16(const uint16_t *, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_u16))) uint16x8_t vldrhq_gather_shifted_offset(const uint16_t *, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_u32))) uint32x4_t vldrhq_gather_shifted_offset_u32(const uint16_t *, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_u32))) uint32x4_t vldrhq_gather_shifted_offset(const uint16_t *, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_z_s16))) int16x8_t vldrhq_gather_shifted_offset_z_s16(const int16_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_z_s16))) int16x8_t vldrhq_gather_shifted_offset_z(const int16_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_z_s32))) int32x4_t vldrhq_gather_shifted_offset_z_s32(const int16_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_z_s32))) int32x4_t vldrhq_gather_shifted_offset_z(const int16_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_z_u16))) uint16x8_t vldrhq_gather_shifted_offset_z_u16(const uint16_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_z_u16))) uint16x8_t vldrhq_gather_shifted_offset_z(const uint16_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_z_u32))) uint32x4_t vldrhq_gather_shifted_offset_z_u32(const uint16_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_z_u32))) uint32x4_t vldrhq_gather_shifted_offset_z(const uint16_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_s16))) int16x8_t vldrhq_s16(const int16_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_s32))) int32x4_t vldrhq_s32(const int16_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_u16))) uint16x8_t vldrhq_u16(const uint16_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_u32))) uint32x4_t vldrhq_u32(const uint16_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_z_s16))) int16x8_t vldrhq_z_s16(const int16_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_z_s32))) int32x4_t vldrhq_z_s32(const int16_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_z_u16))) uint16x8_t vldrhq_z_u16(const uint16_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_z_u32))) uint32x4_t vldrhq_z_u32(const uint16_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_base_s32))) int32x4_t vldrwq_gather_base_s32(uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_base_u32))) uint32x4_t vldrwq_gather_base_u32(uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_base_wb_s32))) int32x4_t vldrwq_gather_base_wb_s32(uint32x4_t *, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_base_wb_u32))) uint32x4_t vldrwq_gather_base_wb_u32(uint32x4_t *, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_base_wb_z_s32))) int32x4_t vldrwq_gather_base_wb_z_s32(uint32x4_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_base_wb_z_u32))) uint32x4_t vldrwq_gather_base_wb_z_u32(uint32x4_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_base_z_s32))) int32x4_t vldrwq_gather_base_z_s32(uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_base_z_u32))) uint32x4_t vldrwq_gather_base_z_u32(uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_offset_s32))) int32x4_t vldrwq_gather_offset_s32(const int32_t *, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_offset_s32))) int32x4_t vldrwq_gather_offset(const int32_t *, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_offset_u32))) uint32x4_t vldrwq_gather_offset_u32(const uint32_t *, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_offset_u32))) uint32x4_t vldrwq_gather_offset(const uint32_t *, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_offset_z_s32))) int32x4_t vldrwq_gather_offset_z_s32(const int32_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_offset_z_s32))) int32x4_t vldrwq_gather_offset_z(const int32_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_offset_z_u32))) uint32x4_t vldrwq_gather_offset_z_u32(const uint32_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_offset_z_u32))) uint32x4_t vldrwq_gather_offset_z(const uint32_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_shifted_offset_s32))) int32x4_t vldrwq_gather_shifted_offset_s32(const int32_t *, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_shifted_offset_s32))) int32x4_t vldrwq_gather_shifted_offset(const int32_t *, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_shifted_offset_u32))) uint32x4_t vldrwq_gather_shifted_offset_u32(const uint32_t *, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_shifted_offset_u32))) uint32x4_t vldrwq_gather_shifted_offset(const uint32_t *, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_shifted_offset_z_s32))) int32x4_t vldrwq_gather_shifted_offset_z_s32(const int32_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_shifted_offset_z_s32))) int32x4_t vldrwq_gather_shifted_offset_z(const int32_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_shifted_offset_z_u32))) uint32x4_t vldrwq_gather_shifted_offset_z_u32(const uint32_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_shifted_offset_z_u32))) uint32x4_t vldrwq_gather_shifted_offset_z(const uint32_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_s32))) int32x4_t vldrwq_s32(const int32_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_u32))) uint32x4_t vldrwq_u32(const uint32_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_z_s32))) int32x4_t vldrwq_z_s32(const int32_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_z_u32))) uint32x4_t vldrwq_z_u32(const uint32_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxaq_m_s16))) uint16x8_t vmaxaq_m_s16(uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxaq_m_s16))) uint16x8_t vmaxaq_m(uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxaq_m_s32))) uint32x4_t vmaxaq_m_s32(uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxaq_m_s32))) uint32x4_t vmaxaq_m(uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxaq_m_s8))) uint8x16_t vmaxaq_m_s8(uint8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxaq_m_s8))) uint8x16_t vmaxaq_m(uint8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxaq_s16))) uint16x8_t vmaxaq_s16(uint16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxaq_s16))) uint16x8_t vmaxaq(uint16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxaq_s32))) uint32x4_t vmaxaq_s32(uint32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxaq_s32))) uint32x4_t vmaxaq(uint32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxaq_s8))) uint8x16_t vmaxaq_s8(uint8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxaq_s8))) uint8x16_t vmaxaq(uint8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxavq_p_s16))) uint16_t vmaxavq_p_s16(uint16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxavq_p_s16))) uint16_t vmaxavq_p(uint16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxavq_p_s32))) uint32_t vmaxavq_p_s32(uint32_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxavq_p_s32))) uint32_t vmaxavq_p(uint32_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxavq_p_s8))) uint8_t vmaxavq_p_s8(uint8_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxavq_p_s8))) uint8_t vmaxavq_p(uint8_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxavq_s16))) uint16_t vmaxavq_s16(uint16_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxavq_s16))) uint16_t vmaxavq(uint16_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxavq_s32))) uint32_t vmaxavq_s32(uint32_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxavq_s32))) uint32_t vmaxavq(uint32_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxavq_s8))) uint8_t vmaxavq_s8(uint8_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxavq_s8))) uint8_t vmaxavq(uint8_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_m_s16))) int16x8_t vmaxq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_m_s16))) int16x8_t vmaxq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_m_s32))) int32x4_t vmaxq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_m_s32))) int32x4_t vmaxq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_m_s8))) int8x16_t vmaxq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_m_s8))) int8x16_t vmaxq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_m_u16))) uint16x8_t vmaxq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_m_u16))) uint16x8_t vmaxq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_m_u32))) uint32x4_t vmaxq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_m_u32))) uint32x4_t vmaxq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_m_u8))) uint8x16_t vmaxq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_m_u8))) uint8x16_t vmaxq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_s16))) int16x8_t vmaxq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_s16))) int16x8_t vmaxq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_s32))) int32x4_t vmaxq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_s32))) int32x4_t vmaxq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_s8))) int8x16_t vmaxq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_s8))) int8x16_t vmaxq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_u16))) uint16x8_t vmaxq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_u16))) uint16x8_t vmaxq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_u32))) uint32x4_t vmaxq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_u32))) uint32x4_t vmaxq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_u8))) uint8x16_t vmaxq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_u8))) uint8x16_t vmaxq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_x_s16))) int16x8_t vmaxq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_x_s16))) int16x8_t vmaxq_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_x_s32))) int32x4_t vmaxq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_x_s32))) int32x4_t vmaxq_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_x_s8))) int8x16_t vmaxq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_x_s8))) int8x16_t vmaxq_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_x_u16))) uint16x8_t vmaxq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_x_u16))) uint16x8_t vmaxq_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_x_u32))) uint32x4_t vmaxq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_x_u32))) uint32x4_t vmaxq_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_x_u8))) uint8x16_t vmaxq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_x_u8))) uint8x16_t vmaxq_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_p_s16))) int16_t vmaxvq_p_s16(int16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_p_s16))) int16_t vmaxvq_p(int16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_p_s32))) int32_t vmaxvq_p_s32(int32_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_p_s32))) int32_t vmaxvq_p(int32_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_p_s8))) int8_t vmaxvq_p_s8(int8_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_p_s8))) int8_t vmaxvq_p(int8_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_p_u16))) uint16_t vmaxvq_p_u16(uint16_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_p_u16))) uint16_t vmaxvq_p(uint16_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_p_u32))) uint32_t vmaxvq_p_u32(uint32_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_p_u32))) uint32_t vmaxvq_p(uint32_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_p_u8))) uint8_t vmaxvq_p_u8(uint8_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_p_u8))) uint8_t vmaxvq_p(uint8_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_s16))) int16_t vmaxvq_s16(int16_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_s16))) int16_t vmaxvq(int16_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_s32))) int32_t vmaxvq_s32(int32_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_s32))) int32_t vmaxvq(int32_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_s8))) int8_t vmaxvq_s8(int8_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_s8))) int8_t vmaxvq(int8_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_u16))) uint16_t vmaxvq_u16(uint16_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_u16))) uint16_t vmaxvq(uint16_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_u32))) uint32_t vmaxvq_u32(uint32_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_u32))) uint32_t vmaxvq(uint32_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_u8))) uint8_t vmaxvq_u8(uint8_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_u8))) uint8_t vmaxvq(uint8_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminaq_m_s16))) uint16x8_t vminaq_m_s16(uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminaq_m_s16))) uint16x8_t vminaq_m(uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminaq_m_s32))) uint32x4_t vminaq_m_s32(uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminaq_m_s32))) uint32x4_t vminaq_m(uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminaq_m_s8))) uint8x16_t vminaq_m_s8(uint8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminaq_m_s8))) uint8x16_t vminaq_m(uint8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminaq_s16))) uint16x8_t vminaq_s16(uint16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminaq_s16))) uint16x8_t vminaq(uint16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminaq_s32))) uint32x4_t vminaq_s32(uint32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminaq_s32))) uint32x4_t vminaq(uint32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminaq_s8))) uint8x16_t vminaq_s8(uint8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminaq_s8))) uint8x16_t vminaq(uint8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminavq_p_s16))) uint16_t vminavq_p_s16(uint16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminavq_p_s16))) uint16_t vminavq_p(uint16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminavq_p_s32))) uint32_t vminavq_p_s32(uint32_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminavq_p_s32))) uint32_t vminavq_p(uint32_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminavq_p_s8))) uint8_t vminavq_p_s8(uint8_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminavq_p_s8))) uint8_t vminavq_p(uint8_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminavq_s16))) uint16_t vminavq_s16(uint16_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminavq_s16))) uint16_t vminavq(uint16_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminavq_s32))) uint32_t vminavq_s32(uint32_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminavq_s32))) uint32_t vminavq(uint32_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminavq_s8))) uint8_t vminavq_s8(uint8_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminavq_s8))) uint8_t vminavq(uint8_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminq_m_s16))) int16x8_t vminq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminq_m_s16))) int16x8_t vminq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminq_m_s32))) int32x4_t vminq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminq_m_s32))) int32x4_t vminq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminq_m_s8))) int8x16_t vminq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminq_m_s8))) int8x16_t vminq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminq_m_u16))) uint16x8_t vminq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminq_m_u16))) uint16x8_t vminq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminq_m_u32))) uint32x4_t vminq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminq_m_u32))) uint32x4_t vminq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminq_m_u8))) uint8x16_t vminq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminq_m_u8))) uint8x16_t vminq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminq_s16))) int16x8_t vminq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminq_s16))) int16x8_t vminq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminq_s32))) int32x4_t vminq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminq_s32))) int32x4_t vminq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminq_s8))) int8x16_t vminq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminq_s8))) int8x16_t vminq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminq_u16))) uint16x8_t vminq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminq_u16))) uint16x8_t vminq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminq_u32))) uint32x4_t vminq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminq_u32))) uint32x4_t vminq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminq_u8))) uint8x16_t vminq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminq_u8))) uint8x16_t vminq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminq_x_s16))) int16x8_t vminq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminq_x_s16))) int16x8_t vminq_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminq_x_s32))) int32x4_t vminq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminq_x_s32))) int32x4_t vminq_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminq_x_s8))) int8x16_t vminq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminq_x_s8))) int8x16_t vminq_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminq_x_u16))) uint16x8_t vminq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminq_x_u16))) uint16x8_t vminq_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminq_x_u32))) uint32x4_t vminq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminq_x_u32))) uint32x4_t vminq_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminq_x_u8))) uint8x16_t vminq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminq_x_u8))) uint8x16_t vminq_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminvq_p_s16))) int16_t vminvq_p_s16(int16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminvq_p_s16))) int16_t vminvq_p(int16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminvq_p_s32))) int32_t vminvq_p_s32(int32_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminvq_p_s32))) int32_t vminvq_p(int32_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminvq_p_s8))) int8_t vminvq_p_s8(int8_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminvq_p_s8))) int8_t vminvq_p(int8_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminvq_p_u16))) uint16_t vminvq_p_u16(uint16_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminvq_p_u16))) uint16_t vminvq_p(uint16_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminvq_p_u32))) uint32_t vminvq_p_u32(uint32_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminvq_p_u32))) uint32_t vminvq_p(uint32_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminvq_p_u8))) uint8_t vminvq_p_u8(uint8_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminvq_p_u8))) uint8_t vminvq_p(uint8_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminvq_s16))) int16_t vminvq_s16(int16_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminvq_s16))) int16_t vminvq(int16_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminvq_s32))) int32_t vminvq_s32(int32_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminvq_s32))) int32_t vminvq(int32_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminvq_s8))) int8_t vminvq_s8(int8_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminvq_s8))) int8_t vminvq(int8_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminvq_u16))) uint16_t vminvq_u16(uint16_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminvq_u16))) uint16_t vminvq(uint16_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminvq_u32))) uint32_t vminvq_u32(uint32_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminvq_u32))) uint32_t vminvq(uint32_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminvq_u8))) uint8_t vminvq_u8(uint8_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminvq_u8))) uint8_t vminvq(uint8_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_p_s16))) int32_t vmladavaq_p_s16(int32_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_p_s16))) int32_t vmladavaq_p(int32_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_p_s32))) int32_t vmladavaq_p_s32(int32_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_p_s32))) int32_t vmladavaq_p(int32_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_p_s8))) int32_t vmladavaq_p_s8(int32_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_p_s8))) int32_t vmladavaq_p(int32_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_p_u16))) uint32_t vmladavaq_p_u16(uint32_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_p_u16))) uint32_t vmladavaq_p(uint32_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_p_u32))) uint32_t vmladavaq_p_u32(uint32_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_p_u32))) uint32_t vmladavaq_p(uint32_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_p_u8))) uint32_t vmladavaq_p_u8(uint32_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_p_u8))) uint32_t vmladavaq_p(uint32_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_s16))) int32_t vmladavaq_s16(int32_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_s16))) int32_t vmladavaq(int32_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_s32))) int32_t vmladavaq_s32(int32_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_s32))) int32_t vmladavaq(int32_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_s8))) int32_t vmladavaq_s8(int32_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_s8))) int32_t vmladavaq(int32_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_u16))) uint32_t vmladavaq_u16(uint32_t, uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_u16))) uint32_t vmladavaq(uint32_t, uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_u32))) uint32_t vmladavaq_u32(uint32_t, uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_u32))) uint32_t vmladavaq(uint32_t, uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_u8))) uint32_t vmladavaq_u8(uint32_t, uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_u8))) uint32_t vmladavaq(uint32_t, uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavaxq_p_s16))) int32_t vmladavaxq_p_s16(int32_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavaxq_p_s16))) int32_t vmladavaxq_p(int32_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavaxq_p_s32))) int32_t vmladavaxq_p_s32(int32_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavaxq_p_s32))) int32_t vmladavaxq_p(int32_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavaxq_p_s8))) int32_t vmladavaxq_p_s8(int32_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavaxq_p_s8))) int32_t vmladavaxq_p(int32_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavaxq_s16))) int32_t vmladavaxq_s16(int32_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavaxq_s16))) int32_t vmladavaxq(int32_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavaxq_s32))) int32_t vmladavaxq_s32(int32_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavaxq_s32))) int32_t vmladavaxq(int32_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavaxq_s8))) int32_t vmladavaxq_s8(int32_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavaxq_s8))) int32_t vmladavaxq(int32_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_p_s16))) int32_t vmladavq_p_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_p_s16))) int32_t vmladavq_p(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_p_s32))) int32_t vmladavq_p_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_p_s32))) int32_t vmladavq_p(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_p_s8))) int32_t vmladavq_p_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_p_s8))) int32_t vmladavq_p(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_p_u16))) uint32_t vmladavq_p_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_p_u16))) uint32_t vmladavq_p(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_p_u32))) uint32_t vmladavq_p_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_p_u32))) uint32_t vmladavq_p(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_p_u8))) uint32_t vmladavq_p_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_p_u8))) uint32_t vmladavq_p(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_s16))) int32_t vmladavq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_s16))) int32_t vmladavq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_s32))) int32_t vmladavq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_s32))) int32_t vmladavq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_s8))) int32_t vmladavq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_s8))) int32_t vmladavq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_u16))) uint32_t vmladavq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_u16))) uint32_t vmladavq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_u32))) uint32_t vmladavq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_u32))) uint32_t vmladavq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_u8))) uint32_t vmladavq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_u8))) uint32_t vmladavq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavxq_p_s16))) int32_t vmladavxq_p_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavxq_p_s16))) int32_t vmladavxq_p(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavxq_p_s32))) int32_t vmladavxq_p_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavxq_p_s32))) int32_t vmladavxq_p(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavxq_p_s8))) int32_t vmladavxq_p_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavxq_p_s8))) int32_t vmladavxq_p(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavxq_s16))) int32_t vmladavxq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavxq_s16))) int32_t vmladavxq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavxq_s32))) int32_t vmladavxq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavxq_s32))) int32_t vmladavxq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavxq_s8))) int32_t vmladavxq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavxq_s8))) int32_t vmladavxq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaq_p_s16))) int64_t vmlaldavaq_p_s16(int64_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaq_p_s16))) int64_t vmlaldavaq_p(int64_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaq_p_s32))) int64_t vmlaldavaq_p_s32(int64_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaq_p_s32))) int64_t vmlaldavaq_p(int64_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaq_p_u16))) uint64_t vmlaldavaq_p_u16(uint64_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaq_p_u16))) uint64_t vmlaldavaq_p(uint64_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaq_p_u32))) uint64_t vmlaldavaq_p_u32(uint64_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaq_p_u32))) uint64_t vmlaldavaq_p(uint64_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaq_s16))) int64_t vmlaldavaq_s16(int64_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaq_s16))) int64_t vmlaldavaq(int64_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaq_s32))) int64_t vmlaldavaq_s32(int64_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaq_s32))) int64_t vmlaldavaq(int64_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaq_u16))) uint64_t vmlaldavaq_u16(uint64_t, uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaq_u16))) uint64_t vmlaldavaq(uint64_t, uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaq_u32))) uint64_t vmlaldavaq_u32(uint64_t, uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaq_u32))) uint64_t vmlaldavaq(uint64_t, uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaxq_p_s16))) int64_t vmlaldavaxq_p_s16(int64_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaxq_p_s16))) int64_t vmlaldavaxq_p(int64_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaxq_p_s32))) int64_t vmlaldavaxq_p_s32(int64_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaxq_p_s32))) int64_t vmlaldavaxq_p(int64_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaxq_s16))) int64_t vmlaldavaxq_s16(int64_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaxq_s16))) int64_t vmlaldavaxq(int64_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaxq_s32))) int64_t vmlaldavaxq_s32(int64_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaxq_s32))) int64_t vmlaldavaxq(int64_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavq_p_s16))) int64_t vmlaldavq_p_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavq_p_s16))) int64_t vmlaldavq_p(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavq_p_s32))) int64_t vmlaldavq_p_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavq_p_s32))) int64_t vmlaldavq_p(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavq_p_u16))) uint64_t vmlaldavq_p_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavq_p_u16))) uint64_t vmlaldavq_p(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavq_p_u32))) uint64_t vmlaldavq_p_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavq_p_u32))) uint64_t vmlaldavq_p(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavq_s16))) int64_t vmlaldavq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavq_s16))) int64_t vmlaldavq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavq_s32))) int64_t vmlaldavq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavq_s32))) int64_t vmlaldavq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavq_u16))) uint64_t vmlaldavq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavq_u16))) uint64_t vmlaldavq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavq_u32))) uint64_t vmlaldavq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavq_u32))) uint64_t vmlaldavq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavxq_p_s16))) int64_t vmlaldavxq_p_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavxq_p_s16))) int64_t vmlaldavxq_p(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavxq_p_s32))) int64_t vmlaldavxq_p_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavxq_p_s32))) int64_t vmlaldavxq_p(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavxq_s16))) int64_t vmlaldavxq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavxq_s16))) int64_t vmlaldavxq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavxq_s32))) int64_t vmlaldavxq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavxq_s32))) int64_t vmlaldavxq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_m_n_s16))) int16x8_t vmlaq_m_n_s16(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_m_n_s16))) int16x8_t vmlaq_m(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_m_n_s32))) int32x4_t vmlaq_m_n_s32(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_m_n_s32))) int32x4_t vmlaq_m(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_m_n_s8))) int8x16_t vmlaq_m_n_s8(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_m_n_s8))) int8x16_t vmlaq_m(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_m_n_u16))) uint16x8_t vmlaq_m_n_u16(uint16x8_t, uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_m_n_u16))) uint16x8_t vmlaq_m(uint16x8_t, uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_m_n_u32))) uint32x4_t vmlaq_m_n_u32(uint32x4_t, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_m_n_u32))) uint32x4_t vmlaq_m(uint32x4_t, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_m_n_u8))) uint8x16_t vmlaq_m_n_u8(uint8x16_t, uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_m_n_u8))) uint8x16_t vmlaq_m(uint8x16_t, uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_n_s16))) int16x8_t vmlaq_n_s16(int16x8_t, int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_n_s16))) int16x8_t vmlaq(int16x8_t, int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_n_s32))) int32x4_t vmlaq_n_s32(int32x4_t, int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_n_s32))) int32x4_t vmlaq(int32x4_t, int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_n_s8))) int8x16_t vmlaq_n_s8(int8x16_t, int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_n_s8))) int8x16_t vmlaq(int8x16_t, int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_n_u16))) uint16x8_t vmlaq_n_u16(uint16x8_t, uint16x8_t, uint16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_n_u16))) uint16x8_t vmlaq(uint16x8_t, uint16x8_t, uint16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_n_u32))) uint32x4_t vmlaq_n_u32(uint32x4_t, uint32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_n_u32))) uint32x4_t vmlaq(uint32x4_t, uint32x4_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_n_u8))) uint8x16_t vmlaq_n_u8(uint8x16_t, uint8x16_t, uint8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_n_u8))) uint8x16_t vmlaq(uint8x16_t, uint8x16_t, uint8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_m_n_s16))) int16x8_t vmlasq_m_n_s16(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_m_n_s16))) int16x8_t vmlasq_m(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_m_n_s32))) int32x4_t vmlasq_m_n_s32(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_m_n_s32))) int32x4_t vmlasq_m(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_m_n_s8))) int8x16_t vmlasq_m_n_s8(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_m_n_s8))) int8x16_t vmlasq_m(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_m_n_u16))) uint16x8_t vmlasq_m_n_u16(uint16x8_t, uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_m_n_u16))) uint16x8_t vmlasq_m(uint16x8_t, uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_m_n_u32))) uint32x4_t vmlasq_m_n_u32(uint32x4_t, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_m_n_u32))) uint32x4_t vmlasq_m(uint32x4_t, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_m_n_u8))) uint8x16_t vmlasq_m_n_u8(uint8x16_t, uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_m_n_u8))) uint8x16_t vmlasq_m(uint8x16_t, uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_n_s16))) int16x8_t vmlasq_n_s16(int16x8_t, int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_n_s16))) int16x8_t vmlasq(int16x8_t, int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_n_s32))) int32x4_t vmlasq_n_s32(int32x4_t, int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_n_s32))) int32x4_t vmlasq(int32x4_t, int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_n_s8))) int8x16_t vmlasq_n_s8(int8x16_t, int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_n_s8))) int8x16_t vmlasq(int8x16_t, int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_n_u16))) uint16x8_t vmlasq_n_u16(uint16x8_t, uint16x8_t, uint16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_n_u16))) uint16x8_t vmlasq(uint16x8_t, uint16x8_t, uint16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_n_u32))) uint32x4_t vmlasq_n_u32(uint32x4_t, uint32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_n_u32))) uint32x4_t vmlasq(uint32x4_t, uint32x4_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_n_u8))) uint8x16_t vmlasq_n_u8(uint8x16_t, uint8x16_t, uint8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_n_u8))) uint8x16_t vmlasq(uint8x16_t, uint8x16_t, uint8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaq_p_s16))) int32_t vmlsdavaq_p_s16(int32_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaq_p_s16))) int32_t vmlsdavaq_p(int32_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaq_p_s32))) int32_t vmlsdavaq_p_s32(int32_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaq_p_s32))) int32_t vmlsdavaq_p(int32_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaq_p_s8))) int32_t vmlsdavaq_p_s8(int32_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaq_p_s8))) int32_t vmlsdavaq_p(int32_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaq_s16))) int32_t vmlsdavaq_s16(int32_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaq_s16))) int32_t vmlsdavaq(int32_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaq_s32))) int32_t vmlsdavaq_s32(int32_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaq_s32))) int32_t vmlsdavaq(int32_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaq_s8))) int32_t vmlsdavaq_s8(int32_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaq_s8))) int32_t vmlsdavaq(int32_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaxq_p_s16))) int32_t vmlsdavaxq_p_s16(int32_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaxq_p_s16))) int32_t vmlsdavaxq_p(int32_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaxq_p_s32))) int32_t vmlsdavaxq_p_s32(int32_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaxq_p_s32))) int32_t vmlsdavaxq_p(int32_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaxq_p_s8))) int32_t vmlsdavaxq_p_s8(int32_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaxq_p_s8))) int32_t vmlsdavaxq_p(int32_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaxq_s16))) int32_t vmlsdavaxq_s16(int32_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaxq_s16))) int32_t vmlsdavaxq(int32_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaxq_s32))) int32_t vmlsdavaxq_s32(int32_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaxq_s32))) int32_t vmlsdavaxq(int32_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaxq_s8))) int32_t vmlsdavaxq_s8(int32_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaxq_s8))) int32_t vmlsdavaxq(int32_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavq_p_s16))) int32_t vmlsdavq_p_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavq_p_s16))) int32_t vmlsdavq_p(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavq_p_s32))) int32_t vmlsdavq_p_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavq_p_s32))) int32_t vmlsdavq_p(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavq_p_s8))) int32_t vmlsdavq_p_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavq_p_s8))) int32_t vmlsdavq_p(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavq_s16))) int32_t vmlsdavq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavq_s16))) int32_t vmlsdavq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavq_s32))) int32_t vmlsdavq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavq_s32))) int32_t vmlsdavq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavq_s8))) int32_t vmlsdavq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavq_s8))) int32_t vmlsdavq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavxq_p_s16))) int32_t vmlsdavxq_p_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavxq_p_s16))) int32_t vmlsdavxq_p(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavxq_p_s32))) int32_t vmlsdavxq_p_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavxq_p_s32))) int32_t vmlsdavxq_p(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavxq_p_s8))) int32_t vmlsdavxq_p_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavxq_p_s8))) int32_t vmlsdavxq_p(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavxq_s16))) int32_t vmlsdavxq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavxq_s16))) int32_t vmlsdavxq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavxq_s32))) int32_t vmlsdavxq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavxq_s32))) int32_t vmlsdavxq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavxq_s8))) int32_t vmlsdavxq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavxq_s8))) int32_t vmlsdavxq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavaq_p_s16))) int64_t vmlsldavaq_p_s16(int64_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavaq_p_s16))) int64_t vmlsldavaq_p(int64_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavaq_p_s32))) int64_t vmlsldavaq_p_s32(int64_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavaq_p_s32))) int64_t vmlsldavaq_p(int64_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavaq_s16))) int64_t vmlsldavaq_s16(int64_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavaq_s16))) int64_t vmlsldavaq(int64_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavaq_s32))) int64_t vmlsldavaq_s32(int64_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavaq_s32))) int64_t vmlsldavaq(int64_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavaxq_p_s16))) int64_t vmlsldavaxq_p_s16(int64_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavaxq_p_s16))) int64_t vmlsldavaxq_p(int64_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavaxq_p_s32))) int64_t vmlsldavaxq_p_s32(int64_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavaxq_p_s32))) int64_t vmlsldavaxq_p(int64_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavaxq_s16))) int64_t vmlsldavaxq_s16(int64_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavaxq_s16))) int64_t vmlsldavaxq(int64_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavaxq_s32))) int64_t vmlsldavaxq_s32(int64_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavaxq_s32))) int64_t vmlsldavaxq(int64_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavq_p_s16))) int64_t vmlsldavq_p_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavq_p_s16))) int64_t vmlsldavq_p(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavq_p_s32))) int64_t vmlsldavq_p_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavq_p_s32))) int64_t vmlsldavq_p(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavq_s16))) int64_t vmlsldavq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavq_s16))) int64_t vmlsldavq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavq_s32))) int64_t vmlsldavq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavq_s32))) int64_t vmlsldavq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavxq_p_s16))) int64_t vmlsldavxq_p_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavxq_p_s16))) int64_t vmlsldavxq_p(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavxq_p_s32))) int64_t vmlsldavxq_p_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavxq_p_s32))) int64_t vmlsldavxq_p(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavxq_s16))) int64_t vmlsldavxq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavxq_s16))) int64_t vmlsldavxq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavxq_s32))) int64_t vmlsldavxq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavxq_s32))) int64_t vmlsldavxq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_m_s16))) int32x4_t vmovlbq_m_s16(int32x4_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_m_s16))) int32x4_t vmovlbq_m(int32x4_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_m_s8))) int16x8_t vmovlbq_m_s8(int16x8_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_m_s8))) int16x8_t vmovlbq_m(int16x8_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_m_u16))) uint32x4_t vmovlbq_m_u16(uint32x4_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_m_u16))) uint32x4_t vmovlbq_m(uint32x4_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_m_u8))) uint16x8_t vmovlbq_m_u8(uint16x8_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_m_u8))) uint16x8_t vmovlbq_m(uint16x8_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_s16))) int32x4_t vmovlbq_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_s16))) int32x4_t vmovlbq(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_s8))) int16x8_t vmovlbq_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_s8))) int16x8_t vmovlbq(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_u16))) uint32x4_t vmovlbq_u16(uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_u16))) uint32x4_t vmovlbq(uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_u8))) uint16x8_t vmovlbq_u8(uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_u8))) uint16x8_t vmovlbq(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_x_s16))) int32x4_t vmovlbq_x_s16(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_x_s16))) int32x4_t vmovlbq_x(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_x_s8))) int16x8_t vmovlbq_x_s8(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_x_s8))) int16x8_t vmovlbq_x(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_x_u16))) uint32x4_t vmovlbq_x_u16(uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_x_u16))) uint32x4_t vmovlbq_x(uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_x_u8))) uint16x8_t vmovlbq_x_u8(uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_x_u8))) uint16x8_t vmovlbq_x(uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_m_s16))) int32x4_t vmovltq_m_s16(int32x4_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_m_s16))) int32x4_t vmovltq_m(int32x4_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_m_s8))) int16x8_t vmovltq_m_s8(int16x8_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_m_s8))) int16x8_t vmovltq_m(int16x8_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_m_u16))) uint32x4_t vmovltq_m_u16(uint32x4_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_m_u16))) uint32x4_t vmovltq_m(uint32x4_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_m_u8))) uint16x8_t vmovltq_m_u8(uint16x8_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_m_u8))) uint16x8_t vmovltq_m(uint16x8_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_s16))) int32x4_t vmovltq_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_s16))) int32x4_t vmovltq(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_s8))) int16x8_t vmovltq_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_s8))) int16x8_t vmovltq(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_u16))) uint32x4_t vmovltq_u16(uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_u16))) uint32x4_t vmovltq(uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_u8))) uint16x8_t vmovltq_u8(uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_u8))) uint16x8_t vmovltq(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_x_s16))) int32x4_t vmovltq_x_s16(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_x_s16))) int32x4_t vmovltq_x(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_x_s8))) int16x8_t vmovltq_x_s8(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_x_s8))) int16x8_t vmovltq_x(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_x_u16))) uint32x4_t vmovltq_x_u16(uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_x_u16))) uint32x4_t vmovltq_x(uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_x_u8))) uint16x8_t vmovltq_x_u8(uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_x_u8))) uint16x8_t vmovltq_x(uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovnbq_m_s16))) int8x16_t vmovnbq_m_s16(int8x16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovnbq_m_s16))) int8x16_t vmovnbq_m(int8x16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovnbq_m_s32))) int16x8_t vmovnbq_m_s32(int16x8_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovnbq_m_s32))) int16x8_t vmovnbq_m(int16x8_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovnbq_m_u16))) uint8x16_t vmovnbq_m_u16(uint8x16_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovnbq_m_u16))) uint8x16_t vmovnbq_m(uint8x16_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovnbq_m_u32))) uint16x8_t vmovnbq_m_u32(uint16x8_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovnbq_m_u32))) uint16x8_t vmovnbq_m(uint16x8_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovnbq_s16))) int8x16_t vmovnbq_s16(int8x16_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovnbq_s16))) int8x16_t vmovnbq(int8x16_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovnbq_s32))) int16x8_t vmovnbq_s32(int16x8_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovnbq_s32))) int16x8_t vmovnbq(int16x8_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovnbq_u16))) uint8x16_t vmovnbq_u16(uint8x16_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovnbq_u16))) uint8x16_t vmovnbq(uint8x16_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovnbq_u32))) uint16x8_t vmovnbq_u32(uint16x8_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovnbq_u32))) uint16x8_t vmovnbq(uint16x8_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovntq_m_s16))) int8x16_t vmovntq_m_s16(int8x16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovntq_m_s16))) int8x16_t vmovntq_m(int8x16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovntq_m_s32))) int16x8_t vmovntq_m_s32(int16x8_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovntq_m_s32))) int16x8_t vmovntq_m(int16x8_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovntq_m_u16))) uint8x16_t vmovntq_m_u16(uint8x16_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovntq_m_u16))) uint8x16_t vmovntq_m(uint8x16_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovntq_m_u32))) uint16x8_t vmovntq_m_u32(uint16x8_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovntq_m_u32))) uint16x8_t vmovntq_m(uint16x8_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovntq_s16))) int8x16_t vmovntq_s16(int8x16_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovntq_s16))) int8x16_t vmovntq(int8x16_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovntq_s32))) int16x8_t vmovntq_s32(int16x8_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovntq_s32))) int16x8_t vmovntq(int16x8_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovntq_u16))) uint8x16_t vmovntq_u16(uint8x16_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovntq_u16))) uint8x16_t vmovntq(uint8x16_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovntq_u32))) uint16x8_t vmovntq_u32(uint16x8_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovntq_u32))) uint16x8_t vmovntq(uint16x8_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_m_s16))) int16x8_t vmulhq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_m_s16))) int16x8_t vmulhq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_m_s32))) int32x4_t vmulhq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_m_s32))) int32x4_t vmulhq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_m_s8))) int8x16_t vmulhq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_m_s8))) int8x16_t vmulhq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_m_u16))) uint16x8_t vmulhq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_m_u16))) uint16x8_t vmulhq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_m_u32))) uint32x4_t vmulhq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_m_u32))) uint32x4_t vmulhq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_m_u8))) uint8x16_t vmulhq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_m_u8))) uint8x16_t vmulhq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_s16))) int16x8_t vmulhq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_s16))) int16x8_t vmulhq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_s32))) int32x4_t vmulhq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_s32))) int32x4_t vmulhq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_s8))) int8x16_t vmulhq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_s8))) int8x16_t vmulhq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_u16))) uint16x8_t vmulhq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_u16))) uint16x8_t vmulhq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_u32))) uint32x4_t vmulhq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_u32))) uint32x4_t vmulhq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_u8))) uint8x16_t vmulhq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_u8))) uint8x16_t vmulhq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_x_s16))) int16x8_t vmulhq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_x_s16))) int16x8_t vmulhq_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_x_s32))) int32x4_t vmulhq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_x_s32))) int32x4_t vmulhq_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_x_s8))) int8x16_t vmulhq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_x_s8))) int8x16_t vmulhq_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_x_u16))) uint16x8_t vmulhq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_x_u16))) uint16x8_t vmulhq_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_x_u32))) uint32x4_t vmulhq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_x_u32))) uint32x4_t vmulhq_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_x_u8))) uint8x16_t vmulhq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_x_u8))) uint8x16_t vmulhq_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_m_s16))) int32x4_t vmullbq_int_m_s16(int32x4_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_m_s16))) int32x4_t vmullbq_int_m(int32x4_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_m_s32))) int64x2_t vmullbq_int_m_s32(int64x2_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_m_s32))) int64x2_t vmullbq_int_m(int64x2_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_m_s8))) int16x8_t vmullbq_int_m_s8(int16x8_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_m_s8))) int16x8_t vmullbq_int_m(int16x8_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_m_u16))) uint32x4_t vmullbq_int_m_u16(uint32x4_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_m_u16))) uint32x4_t vmullbq_int_m(uint32x4_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_m_u32))) uint64x2_t vmullbq_int_m_u32(uint64x2_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_m_u32))) uint64x2_t vmullbq_int_m(uint64x2_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_m_u8))) uint16x8_t vmullbq_int_m_u8(uint16x8_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_m_u8))) uint16x8_t vmullbq_int_m(uint16x8_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_s16))) int32x4_t vmullbq_int_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_s16))) int32x4_t vmullbq_int(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_s32))) int64x2_t vmullbq_int_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_s32))) int64x2_t vmullbq_int(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_s8))) int16x8_t vmullbq_int_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_s8))) int16x8_t vmullbq_int(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_u16))) uint32x4_t vmullbq_int_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_u16))) uint32x4_t vmullbq_int(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_u32))) uint64x2_t vmullbq_int_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_u32))) uint64x2_t vmullbq_int(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_u8))) uint16x8_t vmullbq_int_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_u8))) uint16x8_t vmullbq_int(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_x_s16))) int32x4_t vmullbq_int_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_x_s16))) int32x4_t vmullbq_int_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_x_s32))) int64x2_t vmullbq_int_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_x_s32))) int64x2_t vmullbq_int_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_x_s8))) int16x8_t vmullbq_int_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_x_s8))) int16x8_t vmullbq_int_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_x_u16))) uint32x4_t vmullbq_int_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_x_u16))) uint32x4_t vmullbq_int_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_x_u32))) uint64x2_t vmullbq_int_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_x_u32))) uint64x2_t vmullbq_int_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_x_u8))) uint16x8_t vmullbq_int_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_x_u8))) uint16x8_t vmullbq_int_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_poly_m_p16))) uint32x4_t vmullbq_poly_m_p16(uint32x4_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_poly_m_p16))) uint32x4_t vmullbq_poly_m(uint32x4_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_poly_m_p8))) uint16x8_t vmullbq_poly_m_p8(uint16x8_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_poly_m_p8))) uint16x8_t vmullbq_poly_m(uint16x8_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_poly_p16))) uint32x4_t vmullbq_poly_p16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_poly_p16))) uint32x4_t vmullbq_poly(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_poly_p8))) uint16x8_t vmullbq_poly_p8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_poly_p8))) uint16x8_t vmullbq_poly(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_poly_x_p16))) uint32x4_t vmullbq_poly_x_p16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_poly_x_p16))) uint32x4_t vmullbq_poly_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_poly_x_p8))) uint16x8_t vmullbq_poly_x_p8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_poly_x_p8))) uint16x8_t vmullbq_poly_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_m_s16))) int32x4_t vmulltq_int_m_s16(int32x4_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_m_s16))) int32x4_t vmulltq_int_m(int32x4_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_m_s32))) int64x2_t vmulltq_int_m_s32(int64x2_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_m_s32))) int64x2_t vmulltq_int_m(int64x2_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_m_s8))) int16x8_t vmulltq_int_m_s8(int16x8_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_m_s8))) int16x8_t vmulltq_int_m(int16x8_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_m_u16))) uint32x4_t vmulltq_int_m_u16(uint32x4_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_m_u16))) uint32x4_t vmulltq_int_m(uint32x4_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_m_u32))) uint64x2_t vmulltq_int_m_u32(uint64x2_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_m_u32))) uint64x2_t vmulltq_int_m(uint64x2_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_m_u8))) uint16x8_t vmulltq_int_m_u8(uint16x8_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_m_u8))) uint16x8_t vmulltq_int_m(uint16x8_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_s16))) int32x4_t vmulltq_int_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_s16))) int32x4_t vmulltq_int(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_s32))) int64x2_t vmulltq_int_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_s32))) int64x2_t vmulltq_int(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_s8))) int16x8_t vmulltq_int_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_s8))) int16x8_t vmulltq_int(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_u16))) uint32x4_t vmulltq_int_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_u16))) uint32x4_t vmulltq_int(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_u32))) uint64x2_t vmulltq_int_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_u32))) uint64x2_t vmulltq_int(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_u8))) uint16x8_t vmulltq_int_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_u8))) uint16x8_t vmulltq_int(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_x_s16))) int32x4_t vmulltq_int_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_x_s16))) int32x4_t vmulltq_int_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_x_s32))) int64x2_t vmulltq_int_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_x_s32))) int64x2_t vmulltq_int_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_x_s8))) int16x8_t vmulltq_int_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_x_s8))) int16x8_t vmulltq_int_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_x_u16))) uint32x4_t vmulltq_int_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_x_u16))) uint32x4_t vmulltq_int_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_x_u32))) uint64x2_t vmulltq_int_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_x_u32))) uint64x2_t vmulltq_int_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_x_u8))) uint16x8_t vmulltq_int_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_x_u8))) uint16x8_t vmulltq_int_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_poly_m_p16))) uint32x4_t vmulltq_poly_m_p16(uint32x4_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_poly_m_p16))) uint32x4_t vmulltq_poly_m(uint32x4_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_poly_m_p8))) uint16x8_t vmulltq_poly_m_p8(uint16x8_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_poly_m_p8))) uint16x8_t vmulltq_poly_m(uint16x8_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_poly_p16))) uint32x4_t vmulltq_poly_p16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_poly_p16))) uint32x4_t vmulltq_poly(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_poly_p8))) uint16x8_t vmulltq_poly_p8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_poly_p8))) uint16x8_t vmulltq_poly(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_poly_x_p16))) uint32x4_t vmulltq_poly_x_p16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_poly_x_p16))) uint32x4_t vmulltq_poly_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_poly_x_p8))) uint16x8_t vmulltq_poly_x_p8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_poly_x_p8))) uint16x8_t vmulltq_poly_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_n_s16))) int16x8_t vmulq_m_n_s16(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_n_s16))) int16x8_t vmulq_m(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_n_s32))) int32x4_t vmulq_m_n_s32(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_n_s32))) int32x4_t vmulq_m(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_n_s8))) int8x16_t vmulq_m_n_s8(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_n_s8))) int8x16_t vmulq_m(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_n_u16))) uint16x8_t vmulq_m_n_u16(uint16x8_t, uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_n_u16))) uint16x8_t vmulq_m(uint16x8_t, uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_n_u32))) uint32x4_t vmulq_m_n_u32(uint32x4_t, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_n_u32))) uint32x4_t vmulq_m(uint32x4_t, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_n_u8))) uint8x16_t vmulq_m_n_u8(uint8x16_t, uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_n_u8))) uint8x16_t vmulq_m(uint8x16_t, uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_s16))) int16x8_t vmulq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_s16))) int16x8_t vmulq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_s32))) int32x4_t vmulq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_s32))) int32x4_t vmulq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_s8))) int8x16_t vmulq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_s8))) int8x16_t vmulq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_u16))) uint16x8_t vmulq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_u16))) uint16x8_t vmulq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_u32))) uint32x4_t vmulq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_u32))) uint32x4_t vmulq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_u8))) uint8x16_t vmulq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_u8))) uint8x16_t vmulq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_n_s16))) int16x8_t vmulq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_n_s16))) int16x8_t vmulq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_n_s32))) int32x4_t vmulq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_n_s32))) int32x4_t vmulq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_n_s8))) int8x16_t vmulq_n_s8(int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_n_s8))) int8x16_t vmulq(int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_n_u16))) uint16x8_t vmulq_n_u16(uint16x8_t, uint16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_n_u16))) uint16x8_t vmulq(uint16x8_t, uint16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_n_u32))) uint32x4_t vmulq_n_u32(uint32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_n_u32))) uint32x4_t vmulq(uint32x4_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_n_u8))) uint8x16_t vmulq_n_u8(uint8x16_t, uint8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_n_u8))) uint8x16_t vmulq(uint8x16_t, uint8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_s16))) int16x8_t vmulq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_s16))) int16x8_t vmulq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_s32))) int32x4_t vmulq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_s32))) int32x4_t vmulq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_s8))) int8x16_t vmulq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_s8))) int8x16_t vmulq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_u16))) uint16x8_t vmulq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_u16))) uint16x8_t vmulq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_u32))) uint32x4_t vmulq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_u32))) uint32x4_t vmulq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_u8))) uint8x16_t vmulq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_u8))) uint8x16_t vmulq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_n_s16))) int16x8_t vmulq_x_n_s16(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_n_s16))) int16x8_t vmulq_x(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_n_s32))) int32x4_t vmulq_x_n_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_n_s32))) int32x4_t vmulq_x(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_n_s8))) int8x16_t vmulq_x_n_s8(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_n_s8))) int8x16_t vmulq_x(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_n_u16))) uint16x8_t vmulq_x_n_u16(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_n_u16))) uint16x8_t vmulq_x(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_n_u32))) uint32x4_t vmulq_x_n_u32(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_n_u32))) uint32x4_t vmulq_x(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_n_u8))) uint8x16_t vmulq_x_n_u8(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_n_u8))) uint8x16_t vmulq_x(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_s16))) int16x8_t vmulq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_s16))) int16x8_t vmulq_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_s32))) int32x4_t vmulq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_s32))) int32x4_t vmulq_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_s8))) int8x16_t vmulq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_s8))) int8x16_t vmulq_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_u16))) uint16x8_t vmulq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_u16))) uint16x8_t vmulq_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_u32))) uint32x4_t vmulq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_u32))) uint32x4_t vmulq_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_u8))) uint8x16_t vmulq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_u8))) uint8x16_t vmulq_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_n_s16))) int16x8_t vmvnq_m_n_s16(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_n_s16))) int16x8_t vmvnq_m(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_n_s32))) int32x4_t vmvnq_m_n_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_n_s32))) int32x4_t vmvnq_m(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_n_u16))) uint16x8_t vmvnq_m_n_u16(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_n_u16))) uint16x8_t vmvnq_m(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_n_u32))) uint32x4_t vmvnq_m_n_u32(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_n_u32))) uint32x4_t vmvnq_m(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_s16))) int16x8_t vmvnq_m_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_s16))) int16x8_t vmvnq_m(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_s32))) int32x4_t vmvnq_m_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_s32))) int32x4_t vmvnq_m(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_s8))) int8x16_t vmvnq_m_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_s8))) int8x16_t vmvnq_m(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_u16))) uint16x8_t vmvnq_m_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_u16))) uint16x8_t vmvnq_m(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_u32))) uint32x4_t vmvnq_m_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_u32))) uint32x4_t vmvnq_m(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_u8))) uint8x16_t vmvnq_m_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_u8))) uint8x16_t vmvnq_m(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_n_s16))) int16x8_t vmvnq_n_s16(int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_n_s32))) int32x4_t vmvnq_n_s32(int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_n_u16))) uint16x8_t vmvnq_n_u16(uint16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_n_u32))) uint32x4_t vmvnq_n_u32(uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_s16))) int16x8_t vmvnq_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_s16))) int16x8_t vmvnq(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_s32))) int32x4_t vmvnq_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_s32))) int32x4_t vmvnq(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_s8))) int8x16_t vmvnq_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_s8))) int8x16_t vmvnq(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_u16))) uint16x8_t vmvnq_u16(uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_u16))) uint16x8_t vmvnq(uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_u32))) uint32x4_t vmvnq_u32(uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_u32))) uint32x4_t vmvnq(uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_u8))) uint8x16_t vmvnq_u8(uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_u8))) uint8x16_t vmvnq(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_x_n_s16))) int16x8_t vmvnq_x_n_s16(int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_x_n_s32))) int32x4_t vmvnq_x_n_s32(int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_x_n_u16))) uint16x8_t vmvnq_x_n_u16(uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_x_n_u32))) uint32x4_t vmvnq_x_n_u32(uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_x_s16))) int16x8_t vmvnq_x_s16(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_x_s16))) int16x8_t vmvnq_x(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_x_s32))) int32x4_t vmvnq_x_s32(int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_x_s32))) int32x4_t vmvnq_x(int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_x_s8))) int8x16_t vmvnq_x_s8(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_x_s8))) int8x16_t vmvnq_x(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_x_u16))) uint16x8_t vmvnq_x_u16(uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_x_u16))) uint16x8_t vmvnq_x(uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_x_u32))) uint32x4_t vmvnq_x_u32(uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_x_u32))) uint32x4_t vmvnq_x(uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_x_u8))) uint8x16_t vmvnq_x_u8(uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_x_u8))) uint8x16_t vmvnq_x(uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vnegq_m_s16))) int16x8_t vnegq_m_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vnegq_m_s16))) int16x8_t vnegq_m(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vnegq_m_s32))) int32x4_t vnegq_m_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vnegq_m_s32))) int32x4_t vnegq_m(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vnegq_m_s8))) int8x16_t vnegq_m_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vnegq_m_s8))) int8x16_t vnegq_m(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vnegq_s16))) int16x8_t vnegq_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vnegq_s16))) int16x8_t vnegq(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vnegq_s32))) int32x4_t vnegq_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vnegq_s32))) int32x4_t vnegq(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vnegq_s8))) int8x16_t vnegq_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vnegq_s8))) int8x16_t vnegq(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vnegq_x_s16))) int16x8_t vnegq_x_s16(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vnegq_x_s16))) int16x8_t vnegq_x(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vnegq_x_s32))) int32x4_t vnegq_x_s32(int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vnegq_x_s32))) int32x4_t vnegq_x(int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vnegq_x_s8))) int8x16_t vnegq_x_s8(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vnegq_x_s8))) int8x16_t vnegq_x(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_m_s16))) int16x8_t vornq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_m_s16))) int16x8_t vornq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_m_s32))) int32x4_t vornq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_m_s32))) int32x4_t vornq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_m_s8))) int8x16_t vornq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_m_s8))) int8x16_t vornq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_m_u16))) uint16x8_t vornq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_m_u16))) uint16x8_t vornq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_m_u32))) uint32x4_t vornq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_m_u32))) uint32x4_t vornq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_m_u8))) uint8x16_t vornq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_m_u8))) uint8x16_t vornq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_s16))) int16x8_t vornq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_s16))) int16x8_t vornq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_s32))) int32x4_t vornq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_s32))) int32x4_t vornq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_s8))) int8x16_t vornq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_s8))) int8x16_t vornq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_u16))) uint16x8_t vornq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_u16))) uint16x8_t vornq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_u32))) uint32x4_t vornq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_u32))) uint32x4_t vornq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_u8))) uint8x16_t vornq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_u8))) uint8x16_t vornq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_x_s16))) int16x8_t vornq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_x_s16))) int16x8_t vornq_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_x_s32))) int32x4_t vornq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_x_s32))) int32x4_t vornq_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_x_s8))) int8x16_t vornq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_x_s8))) int8x16_t vornq_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_x_u16))) uint16x8_t vornq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_x_u16))) uint16x8_t vornq_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_x_u32))) uint32x4_t vornq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_x_u32))) uint32x4_t vornq_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_x_u8))) uint8x16_t vornq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_x_u8))) uint8x16_t vornq_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_n_s16))) int16x8_t vorrq_m_n_s16(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_n_s16))) int16x8_t vorrq_m_n(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_n_s32))) int32x4_t vorrq_m_n_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_n_s32))) int32x4_t vorrq_m_n(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_n_u16))) uint16x8_t vorrq_m_n_u16(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_n_u16))) uint16x8_t vorrq_m_n(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_n_u32))) uint32x4_t vorrq_m_n_u32(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_n_u32))) uint32x4_t vorrq_m_n(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_s16))) int16x8_t vorrq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_s16))) int16x8_t vorrq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_s32))) int32x4_t vorrq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_s32))) int32x4_t vorrq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_s8))) int8x16_t vorrq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_s8))) int8x16_t vorrq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_u16))) uint16x8_t vorrq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_u16))) uint16x8_t vorrq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_u32))) uint32x4_t vorrq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_u32))) uint32x4_t vorrq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_u8))) uint8x16_t vorrq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_u8))) uint8x16_t vorrq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_n_s16))) int16x8_t vorrq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_n_s16))) int16x8_t vorrq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_n_s32))) int32x4_t vorrq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_n_s32))) int32x4_t vorrq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_n_u16))) uint16x8_t vorrq_n_u16(uint16x8_t, uint16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_n_u16))) uint16x8_t vorrq(uint16x8_t, uint16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_n_u32))) uint32x4_t vorrq_n_u32(uint32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_n_u32))) uint32x4_t vorrq(uint32x4_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_s16))) int16x8_t vorrq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_s16))) int16x8_t vorrq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_s32))) int32x4_t vorrq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_s32))) int32x4_t vorrq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_s8))) int8x16_t vorrq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_s8))) int8x16_t vorrq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_u16))) uint16x8_t vorrq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_u16))) uint16x8_t vorrq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_u32))) uint32x4_t vorrq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_u32))) uint32x4_t vorrq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_u8))) uint8x16_t vorrq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_u8))) uint8x16_t vorrq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_x_s16))) int16x8_t vorrq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_x_s16))) int16x8_t vorrq_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_x_s32))) int32x4_t vorrq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_x_s32))) int32x4_t vorrq_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_x_s8))) int8x16_t vorrq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_x_s8))) int8x16_t vorrq_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_x_u16))) uint16x8_t vorrq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_x_u16))) uint16x8_t vorrq_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_x_u32))) uint32x4_t vorrq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_x_u32))) uint32x4_t vorrq_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_x_u8))) uint8x16_t vorrq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_x_u8))) uint8x16_t vorrq_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vpnot))) mve_pred16_t vpnot(mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vpselq_s16))) int16x8_t vpselq_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vpselq_s16))) int16x8_t vpselq(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vpselq_s32))) int32x4_t vpselq_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vpselq_s32))) int32x4_t vpselq(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vpselq_s64))) int64x2_t vpselq_s64(int64x2_t, int64x2_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vpselq_s64))) int64x2_t vpselq(int64x2_t, int64x2_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vpselq_s8))) int8x16_t vpselq_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vpselq_s8))) int8x16_t vpselq(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vpselq_u16))) uint16x8_t vpselq_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vpselq_u16))) uint16x8_t vpselq(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vpselq_u32))) uint32x4_t vpselq_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vpselq_u32))) uint32x4_t vpselq(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vpselq_u64))) uint64x2_t vpselq_u64(uint64x2_t, uint64x2_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vpselq_u64))) uint64x2_t vpselq(uint64x2_t, uint64x2_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vpselq_u8))) uint8x16_t vpselq_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vpselq_u8))) uint8x16_t vpselq(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqabsq_m_s16))) int16x8_t vqabsq_m_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqabsq_m_s16))) int16x8_t vqabsq_m(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqabsq_m_s32))) int32x4_t vqabsq_m_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqabsq_m_s32))) int32x4_t vqabsq_m(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqabsq_m_s8))) int8x16_t vqabsq_m_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqabsq_m_s8))) int8x16_t vqabsq_m(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqabsq_s16))) int16x8_t vqabsq_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqabsq_s16))) int16x8_t vqabsq(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqabsq_s32))) int32x4_t vqabsq_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqabsq_s32))) int32x4_t vqabsq(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqabsq_s8))) int8x16_t vqabsq_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqabsq_s8))) int8x16_t vqabsq(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_n_s16))) int16x8_t vqaddq_m_n_s16(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_n_s16))) int16x8_t vqaddq_m(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_n_s32))) int32x4_t vqaddq_m_n_s32(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_n_s32))) int32x4_t vqaddq_m(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_n_s8))) int8x16_t vqaddq_m_n_s8(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_n_s8))) int8x16_t vqaddq_m(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_n_u16))) uint16x8_t vqaddq_m_n_u16(uint16x8_t, uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_n_u16))) uint16x8_t vqaddq_m(uint16x8_t, uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_n_u32))) uint32x4_t vqaddq_m_n_u32(uint32x4_t, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_n_u32))) uint32x4_t vqaddq_m(uint32x4_t, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_n_u8))) uint8x16_t vqaddq_m_n_u8(uint8x16_t, uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_n_u8))) uint8x16_t vqaddq_m(uint8x16_t, uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_s16))) int16x8_t vqaddq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_s16))) int16x8_t vqaddq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_s32))) int32x4_t vqaddq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_s32))) int32x4_t vqaddq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_s8))) int8x16_t vqaddq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_s8))) int8x16_t vqaddq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_u16))) uint16x8_t vqaddq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_u16))) uint16x8_t vqaddq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_u32))) uint32x4_t vqaddq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_u32))) uint32x4_t vqaddq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_u8))) uint8x16_t vqaddq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_u8))) uint8x16_t vqaddq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_n_s16))) int16x8_t vqaddq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_n_s16))) int16x8_t vqaddq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_n_s32))) int32x4_t vqaddq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_n_s32))) int32x4_t vqaddq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_n_s8))) int8x16_t vqaddq_n_s8(int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_n_s8))) int8x16_t vqaddq(int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_n_u16))) uint16x8_t vqaddq_n_u16(uint16x8_t, uint16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_n_u16))) uint16x8_t vqaddq(uint16x8_t, uint16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_n_u32))) uint32x4_t vqaddq_n_u32(uint32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_n_u32))) uint32x4_t vqaddq(uint32x4_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_n_u8))) uint8x16_t vqaddq_n_u8(uint8x16_t, uint8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_n_u8))) uint8x16_t vqaddq(uint8x16_t, uint8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_s16))) int16x8_t vqaddq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_s16))) int16x8_t vqaddq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_s32))) int32x4_t vqaddq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_s32))) int32x4_t vqaddq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_s8))) int8x16_t vqaddq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_s8))) int8x16_t vqaddq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_u16))) uint16x8_t vqaddq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_u16))) uint16x8_t vqaddq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_u32))) uint32x4_t vqaddq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_u32))) uint32x4_t vqaddq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_u8))) uint8x16_t vqaddq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_u8))) uint8x16_t vqaddq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhq_m_s16))) int16x8_t vqdmladhq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhq_m_s16))) int16x8_t vqdmladhq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhq_m_s32))) int32x4_t vqdmladhq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhq_m_s32))) int32x4_t vqdmladhq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhq_m_s8))) int8x16_t vqdmladhq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhq_m_s8))) int8x16_t vqdmladhq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhq_s16))) int16x8_t vqdmladhq_s16(int16x8_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhq_s16))) int16x8_t vqdmladhq(int16x8_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhq_s32))) int32x4_t vqdmladhq_s32(int32x4_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhq_s32))) int32x4_t vqdmladhq(int32x4_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhq_s8))) int8x16_t vqdmladhq_s8(int8x16_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhq_s8))) int8x16_t vqdmladhq(int8x16_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhxq_m_s16))) int16x8_t vqdmladhxq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhxq_m_s16))) int16x8_t vqdmladhxq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhxq_m_s32))) int32x4_t vqdmladhxq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhxq_m_s32))) int32x4_t vqdmladhxq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhxq_m_s8))) int8x16_t vqdmladhxq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhxq_m_s8))) int8x16_t vqdmladhxq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhxq_s16))) int16x8_t vqdmladhxq_s16(int16x8_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhxq_s16))) int16x8_t vqdmladhxq(int16x8_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhxq_s32))) int32x4_t vqdmladhxq_s32(int32x4_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhxq_s32))) int32x4_t vqdmladhxq(int32x4_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhxq_s8))) int8x16_t vqdmladhxq_s8(int8x16_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhxq_s8))) int8x16_t vqdmladhxq(int8x16_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlahq_m_n_s16))) int16x8_t vqdmlahq_m_n_s16(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlahq_m_n_s16))) int16x8_t vqdmlahq_m(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlahq_m_n_s32))) int32x4_t vqdmlahq_m_n_s32(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlahq_m_n_s32))) int32x4_t vqdmlahq_m(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlahq_m_n_s8))) int8x16_t vqdmlahq_m_n_s8(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlahq_m_n_s8))) int8x16_t vqdmlahq_m(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlahq_n_s16))) int16x8_t vqdmlahq_n_s16(int16x8_t, int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlahq_n_s16))) int16x8_t vqdmlahq(int16x8_t, int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlahq_n_s32))) int32x4_t vqdmlahq_n_s32(int32x4_t, int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlahq_n_s32))) int32x4_t vqdmlahq(int32x4_t, int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlahq_n_s8))) int8x16_t vqdmlahq_n_s8(int8x16_t, int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlahq_n_s8))) int8x16_t vqdmlahq(int8x16_t, int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlashq_m_n_s16))) int16x8_t vqdmlashq_m_n_s16(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlashq_m_n_s16))) int16x8_t vqdmlashq_m(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlashq_m_n_s32))) int32x4_t vqdmlashq_m_n_s32(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlashq_m_n_s32))) int32x4_t vqdmlashq_m(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlashq_m_n_s8))) int8x16_t vqdmlashq_m_n_s8(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlashq_m_n_s8))) int8x16_t vqdmlashq_m(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlashq_n_s16))) int16x8_t vqdmlashq_n_s16(int16x8_t, int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlashq_n_s16))) int16x8_t vqdmlashq(int16x8_t, int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlashq_n_s32))) int32x4_t vqdmlashq_n_s32(int32x4_t, int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlashq_n_s32))) int32x4_t vqdmlashq(int32x4_t, int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlashq_n_s8))) int8x16_t vqdmlashq_n_s8(int8x16_t, int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlashq_n_s8))) int8x16_t vqdmlashq(int8x16_t, int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhq_m_s16))) int16x8_t vqdmlsdhq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhq_m_s16))) int16x8_t vqdmlsdhq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhq_m_s32))) int32x4_t vqdmlsdhq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhq_m_s32))) int32x4_t vqdmlsdhq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhq_m_s8))) int8x16_t vqdmlsdhq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhq_m_s8))) int8x16_t vqdmlsdhq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhq_s16))) int16x8_t vqdmlsdhq_s16(int16x8_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhq_s16))) int16x8_t vqdmlsdhq(int16x8_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhq_s32))) int32x4_t vqdmlsdhq_s32(int32x4_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhq_s32))) int32x4_t vqdmlsdhq(int32x4_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhq_s8))) int8x16_t vqdmlsdhq_s8(int8x16_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhq_s8))) int8x16_t vqdmlsdhq(int8x16_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhxq_m_s16))) int16x8_t vqdmlsdhxq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhxq_m_s16))) int16x8_t vqdmlsdhxq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhxq_m_s32))) int32x4_t vqdmlsdhxq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhxq_m_s32))) int32x4_t vqdmlsdhxq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhxq_m_s8))) int8x16_t vqdmlsdhxq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhxq_m_s8))) int8x16_t vqdmlsdhxq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhxq_s16))) int16x8_t vqdmlsdhxq_s16(int16x8_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhxq_s16))) int16x8_t vqdmlsdhxq(int16x8_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhxq_s32))) int32x4_t vqdmlsdhxq_s32(int32x4_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhxq_s32))) int32x4_t vqdmlsdhxq(int32x4_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhxq_s8))) int8x16_t vqdmlsdhxq_s8(int8x16_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhxq_s8))) int8x16_t vqdmlsdhxq(int8x16_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_m_n_s16))) int16x8_t vqdmulhq_m_n_s16(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_m_n_s16))) int16x8_t vqdmulhq_m(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_m_n_s32))) int32x4_t vqdmulhq_m_n_s32(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_m_n_s32))) int32x4_t vqdmulhq_m(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_m_n_s8))) int8x16_t vqdmulhq_m_n_s8(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_m_n_s8))) int8x16_t vqdmulhq_m(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_m_s16))) int16x8_t vqdmulhq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_m_s16))) int16x8_t vqdmulhq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_m_s32))) int32x4_t vqdmulhq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_m_s32))) int32x4_t vqdmulhq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_m_s8))) int8x16_t vqdmulhq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_m_s8))) int8x16_t vqdmulhq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_n_s16))) int16x8_t vqdmulhq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_n_s16))) int16x8_t vqdmulhq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_n_s32))) int32x4_t vqdmulhq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_n_s32))) int32x4_t vqdmulhq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_n_s8))) int8x16_t vqdmulhq_n_s8(int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_n_s8))) int8x16_t vqdmulhq(int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_s16))) int16x8_t vqdmulhq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_s16))) int16x8_t vqdmulhq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_s32))) int32x4_t vqdmulhq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_s32))) int32x4_t vqdmulhq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_s8))) int8x16_t vqdmulhq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_s8))) int8x16_t vqdmulhq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmullbq_m_n_s16))) int32x4_t vqdmullbq_m_n_s16(int32x4_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmullbq_m_n_s16))) int32x4_t vqdmullbq_m(int32x4_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmullbq_m_n_s32))) int64x2_t vqdmullbq_m_n_s32(int64x2_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmullbq_m_n_s32))) int64x2_t vqdmullbq_m(int64x2_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmullbq_m_s16))) int32x4_t vqdmullbq_m_s16(int32x4_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmullbq_m_s16))) int32x4_t vqdmullbq_m(int32x4_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmullbq_m_s32))) int64x2_t vqdmullbq_m_s32(int64x2_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmullbq_m_s32))) int64x2_t vqdmullbq_m(int64x2_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmullbq_n_s16))) int32x4_t vqdmullbq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmullbq_n_s16))) int32x4_t vqdmullbq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmullbq_n_s32))) int64x2_t vqdmullbq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmullbq_n_s32))) int64x2_t vqdmullbq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmullbq_s16))) int32x4_t vqdmullbq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmullbq_s16))) int32x4_t vqdmullbq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmullbq_s32))) int64x2_t vqdmullbq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmullbq_s32))) int64x2_t vqdmullbq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulltq_m_n_s16))) int32x4_t vqdmulltq_m_n_s16(int32x4_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulltq_m_n_s16))) int32x4_t vqdmulltq_m(int32x4_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulltq_m_n_s32))) int64x2_t vqdmulltq_m_n_s32(int64x2_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulltq_m_n_s32))) int64x2_t vqdmulltq_m(int64x2_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulltq_m_s16))) int32x4_t vqdmulltq_m_s16(int32x4_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulltq_m_s16))) int32x4_t vqdmulltq_m(int32x4_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulltq_m_s32))) int64x2_t vqdmulltq_m_s32(int64x2_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulltq_m_s32))) int64x2_t vqdmulltq_m(int64x2_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulltq_n_s16))) int32x4_t vqdmulltq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulltq_n_s16))) int32x4_t vqdmulltq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulltq_n_s32))) int64x2_t vqdmulltq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulltq_n_s32))) int64x2_t vqdmulltq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulltq_s16))) int32x4_t vqdmulltq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulltq_s16))) int32x4_t vqdmulltq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulltq_s32))) int64x2_t vqdmulltq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulltq_s32))) int64x2_t vqdmulltq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovnbq_m_s16))) int8x16_t vqmovnbq_m_s16(int8x16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovnbq_m_s16))) int8x16_t vqmovnbq_m(int8x16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovnbq_m_s32))) int16x8_t vqmovnbq_m_s32(int16x8_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovnbq_m_s32))) int16x8_t vqmovnbq_m(int16x8_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovnbq_m_u16))) uint8x16_t vqmovnbq_m_u16(uint8x16_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovnbq_m_u16))) uint8x16_t vqmovnbq_m(uint8x16_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovnbq_m_u32))) uint16x8_t vqmovnbq_m_u32(uint16x8_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovnbq_m_u32))) uint16x8_t vqmovnbq_m(uint16x8_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovnbq_s16))) int8x16_t vqmovnbq_s16(int8x16_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovnbq_s16))) int8x16_t vqmovnbq(int8x16_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovnbq_s32))) int16x8_t vqmovnbq_s32(int16x8_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovnbq_s32))) int16x8_t vqmovnbq(int16x8_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovnbq_u16))) uint8x16_t vqmovnbq_u16(uint8x16_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovnbq_u16))) uint8x16_t vqmovnbq(uint8x16_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovnbq_u32))) uint16x8_t vqmovnbq_u32(uint16x8_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovnbq_u32))) uint16x8_t vqmovnbq(uint16x8_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovntq_m_s16))) int8x16_t vqmovntq_m_s16(int8x16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovntq_m_s16))) int8x16_t vqmovntq_m(int8x16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovntq_m_s32))) int16x8_t vqmovntq_m_s32(int16x8_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovntq_m_s32))) int16x8_t vqmovntq_m(int16x8_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovntq_m_u16))) uint8x16_t vqmovntq_m_u16(uint8x16_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovntq_m_u16))) uint8x16_t vqmovntq_m(uint8x16_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovntq_m_u32))) uint16x8_t vqmovntq_m_u32(uint16x8_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovntq_m_u32))) uint16x8_t vqmovntq_m(uint16x8_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovntq_s16))) int8x16_t vqmovntq_s16(int8x16_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovntq_s16))) int8x16_t vqmovntq(int8x16_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovntq_s32))) int16x8_t vqmovntq_s32(int16x8_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovntq_s32))) int16x8_t vqmovntq(int16x8_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovntq_u16))) uint8x16_t vqmovntq_u16(uint8x16_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovntq_u16))) uint8x16_t vqmovntq(uint8x16_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovntq_u32))) uint16x8_t vqmovntq_u32(uint16x8_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovntq_u32))) uint16x8_t vqmovntq(uint16x8_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovunbq_m_s16))) uint8x16_t vqmovunbq_m_s16(uint8x16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovunbq_m_s16))) uint8x16_t vqmovunbq_m(uint8x16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovunbq_m_s32))) uint16x8_t vqmovunbq_m_s32(uint16x8_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovunbq_m_s32))) uint16x8_t vqmovunbq_m(uint16x8_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovunbq_s16))) uint8x16_t vqmovunbq_s16(uint8x16_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovunbq_s16))) uint8x16_t vqmovunbq(uint8x16_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovunbq_s32))) uint16x8_t vqmovunbq_s32(uint16x8_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovunbq_s32))) uint16x8_t vqmovunbq(uint16x8_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovuntq_m_s16))) uint8x16_t vqmovuntq_m_s16(uint8x16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovuntq_m_s16))) uint8x16_t vqmovuntq_m(uint8x16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovuntq_m_s32))) uint16x8_t vqmovuntq_m_s32(uint16x8_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovuntq_m_s32))) uint16x8_t vqmovuntq_m(uint16x8_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovuntq_s16))) uint8x16_t vqmovuntq_s16(uint8x16_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovuntq_s16))) uint8x16_t vqmovuntq(uint8x16_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovuntq_s32))) uint16x8_t vqmovuntq_s32(uint16x8_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovuntq_s32))) uint16x8_t vqmovuntq(uint16x8_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqnegq_m_s16))) int16x8_t vqnegq_m_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqnegq_m_s16))) int16x8_t vqnegq_m(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqnegq_m_s32))) int32x4_t vqnegq_m_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqnegq_m_s32))) int32x4_t vqnegq_m(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqnegq_m_s8))) int8x16_t vqnegq_m_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqnegq_m_s8))) int8x16_t vqnegq_m(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqnegq_s16))) int16x8_t vqnegq_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqnegq_s16))) int16x8_t vqnegq(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqnegq_s32))) int32x4_t vqnegq_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqnegq_s32))) int32x4_t vqnegq(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqnegq_s8))) int8x16_t vqnegq_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqnegq_s8))) int8x16_t vqnegq(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhq_m_s16))) int16x8_t vqrdmladhq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhq_m_s16))) int16x8_t vqrdmladhq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhq_m_s32))) int32x4_t vqrdmladhq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhq_m_s32))) int32x4_t vqrdmladhq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhq_m_s8))) int8x16_t vqrdmladhq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhq_m_s8))) int8x16_t vqrdmladhq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhq_s16))) int16x8_t vqrdmladhq_s16(int16x8_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhq_s16))) int16x8_t vqrdmladhq(int16x8_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhq_s32))) int32x4_t vqrdmladhq_s32(int32x4_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhq_s32))) int32x4_t vqrdmladhq(int32x4_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhq_s8))) int8x16_t vqrdmladhq_s8(int8x16_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhq_s8))) int8x16_t vqrdmladhq(int8x16_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhxq_m_s16))) int16x8_t vqrdmladhxq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhxq_m_s16))) int16x8_t vqrdmladhxq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhxq_m_s32))) int32x4_t vqrdmladhxq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhxq_m_s32))) int32x4_t vqrdmladhxq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhxq_m_s8))) int8x16_t vqrdmladhxq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhxq_m_s8))) int8x16_t vqrdmladhxq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhxq_s16))) int16x8_t vqrdmladhxq_s16(int16x8_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhxq_s16))) int16x8_t vqrdmladhxq(int16x8_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhxq_s32))) int32x4_t vqrdmladhxq_s32(int32x4_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhxq_s32))) int32x4_t vqrdmladhxq(int32x4_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhxq_s8))) int8x16_t vqrdmladhxq_s8(int8x16_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhxq_s8))) int8x16_t vqrdmladhxq(int8x16_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlahq_m_n_s16))) int16x8_t vqrdmlahq_m_n_s16(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlahq_m_n_s16))) int16x8_t vqrdmlahq_m(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlahq_m_n_s32))) int32x4_t vqrdmlahq_m_n_s32(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlahq_m_n_s32))) int32x4_t vqrdmlahq_m(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlahq_m_n_s8))) int8x16_t vqrdmlahq_m_n_s8(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlahq_m_n_s8))) int8x16_t vqrdmlahq_m(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlahq_n_s16))) int16x8_t vqrdmlahq_n_s16(int16x8_t, int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlahq_n_s16))) int16x8_t vqrdmlahq(int16x8_t, int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlahq_n_s32))) int32x4_t vqrdmlahq_n_s32(int32x4_t, int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlahq_n_s32))) int32x4_t vqrdmlahq(int32x4_t, int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlahq_n_s8))) int8x16_t vqrdmlahq_n_s8(int8x16_t, int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlahq_n_s8))) int8x16_t vqrdmlahq(int8x16_t, int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlashq_m_n_s16))) int16x8_t vqrdmlashq_m_n_s16(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlashq_m_n_s16))) int16x8_t vqrdmlashq_m(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlashq_m_n_s32))) int32x4_t vqrdmlashq_m_n_s32(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlashq_m_n_s32))) int32x4_t vqrdmlashq_m(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlashq_m_n_s8))) int8x16_t vqrdmlashq_m_n_s8(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlashq_m_n_s8))) int8x16_t vqrdmlashq_m(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlashq_n_s16))) int16x8_t vqrdmlashq_n_s16(int16x8_t, int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlashq_n_s16))) int16x8_t vqrdmlashq(int16x8_t, int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlashq_n_s32))) int32x4_t vqrdmlashq_n_s32(int32x4_t, int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlashq_n_s32))) int32x4_t vqrdmlashq(int32x4_t, int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlashq_n_s8))) int8x16_t vqrdmlashq_n_s8(int8x16_t, int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlashq_n_s8))) int8x16_t vqrdmlashq(int8x16_t, int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhq_m_s16))) int16x8_t vqrdmlsdhq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhq_m_s16))) int16x8_t vqrdmlsdhq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhq_m_s32))) int32x4_t vqrdmlsdhq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhq_m_s32))) int32x4_t vqrdmlsdhq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhq_m_s8))) int8x16_t vqrdmlsdhq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhq_m_s8))) int8x16_t vqrdmlsdhq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhq_s16))) int16x8_t vqrdmlsdhq_s16(int16x8_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhq_s16))) int16x8_t vqrdmlsdhq(int16x8_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhq_s32))) int32x4_t vqrdmlsdhq_s32(int32x4_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhq_s32))) int32x4_t vqrdmlsdhq(int32x4_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhq_s8))) int8x16_t vqrdmlsdhq_s8(int8x16_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhq_s8))) int8x16_t vqrdmlsdhq(int8x16_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhxq_m_s16))) int16x8_t vqrdmlsdhxq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhxq_m_s16))) int16x8_t vqrdmlsdhxq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhxq_m_s32))) int32x4_t vqrdmlsdhxq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhxq_m_s32))) int32x4_t vqrdmlsdhxq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhxq_m_s8))) int8x16_t vqrdmlsdhxq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhxq_m_s8))) int8x16_t vqrdmlsdhxq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhxq_s16))) int16x8_t vqrdmlsdhxq_s16(int16x8_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhxq_s16))) int16x8_t vqrdmlsdhxq(int16x8_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhxq_s32))) int32x4_t vqrdmlsdhxq_s32(int32x4_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhxq_s32))) int32x4_t vqrdmlsdhxq(int32x4_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhxq_s8))) int8x16_t vqrdmlsdhxq_s8(int8x16_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhxq_s8))) int8x16_t vqrdmlsdhxq(int8x16_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_m_n_s16))) int16x8_t vqrdmulhq_m_n_s16(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_m_n_s16))) int16x8_t vqrdmulhq_m(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_m_n_s32))) int32x4_t vqrdmulhq_m_n_s32(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_m_n_s32))) int32x4_t vqrdmulhq_m(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_m_n_s8))) int8x16_t vqrdmulhq_m_n_s8(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_m_n_s8))) int8x16_t vqrdmulhq_m(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_m_s16))) int16x8_t vqrdmulhq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_m_s16))) int16x8_t vqrdmulhq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_m_s32))) int32x4_t vqrdmulhq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_m_s32))) int32x4_t vqrdmulhq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_m_s8))) int8x16_t vqrdmulhq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_m_s8))) int8x16_t vqrdmulhq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_n_s16))) int16x8_t vqrdmulhq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_n_s16))) int16x8_t vqrdmulhq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_n_s32))) int32x4_t vqrdmulhq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_n_s32))) int32x4_t vqrdmulhq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_n_s8))) int8x16_t vqrdmulhq_n_s8(int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_n_s8))) int8x16_t vqrdmulhq(int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_s16))) int16x8_t vqrdmulhq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_s16))) int16x8_t vqrdmulhq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_s32))) int32x4_t vqrdmulhq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_s32))) int32x4_t vqrdmulhq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_s8))) int8x16_t vqrdmulhq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_s8))) int8x16_t vqrdmulhq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_n_s16))) int16x8_t vqrshlq_m_n_s16(int16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_n_s16))) int16x8_t vqrshlq_m_n(int16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_n_s32))) int32x4_t vqrshlq_m_n_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_n_s32))) int32x4_t vqrshlq_m_n(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_n_s8))) int8x16_t vqrshlq_m_n_s8(int8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_n_s8))) int8x16_t vqrshlq_m_n(int8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_n_u16))) uint16x8_t vqrshlq_m_n_u16(uint16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_n_u16))) uint16x8_t vqrshlq_m_n(uint16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_n_u32))) uint32x4_t vqrshlq_m_n_u32(uint32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_n_u32))) uint32x4_t vqrshlq_m_n(uint32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_n_u8))) uint8x16_t vqrshlq_m_n_u8(uint8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_n_u8))) uint8x16_t vqrshlq_m_n(uint8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_s16))) int16x8_t vqrshlq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_s16))) int16x8_t vqrshlq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_s32))) int32x4_t vqrshlq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_s32))) int32x4_t vqrshlq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_s8))) int8x16_t vqrshlq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_s8))) int8x16_t vqrshlq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_u16))) uint16x8_t vqrshlq_m_u16(uint16x8_t, uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_u16))) uint16x8_t vqrshlq_m(uint16x8_t, uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_u32))) uint32x4_t vqrshlq_m_u32(uint32x4_t, uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_u32))) uint32x4_t vqrshlq_m(uint32x4_t, uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_u8))) uint8x16_t vqrshlq_m_u8(uint8x16_t, uint8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_u8))) uint8x16_t vqrshlq_m(uint8x16_t, uint8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_n_s16))) int16x8_t vqrshlq_n_s16(int16x8_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_n_s16))) int16x8_t vqrshlq(int16x8_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_n_s32))) int32x4_t vqrshlq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_n_s32))) int32x4_t vqrshlq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_n_s8))) int8x16_t vqrshlq_n_s8(int8x16_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_n_s8))) int8x16_t vqrshlq(int8x16_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_n_u16))) uint16x8_t vqrshlq_n_u16(uint16x8_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_n_u16))) uint16x8_t vqrshlq(uint16x8_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_n_u32))) uint32x4_t vqrshlq_n_u32(uint32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_n_u32))) uint32x4_t vqrshlq(uint32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_n_u8))) uint8x16_t vqrshlq_n_u8(uint8x16_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_n_u8))) uint8x16_t vqrshlq(uint8x16_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_s16))) int16x8_t vqrshlq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_s16))) int16x8_t vqrshlq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_s32))) int32x4_t vqrshlq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_s32))) int32x4_t vqrshlq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_s8))) int8x16_t vqrshlq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_s8))) int8x16_t vqrshlq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_u16))) uint16x8_t vqrshlq_u16(uint16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_u16))) uint16x8_t vqrshlq(uint16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_u32))) uint32x4_t vqrshlq_u32(uint32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_u32))) uint32x4_t vqrshlq(uint32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_u8))) uint8x16_t vqrshlq_u8(uint8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_u8))) uint8x16_t vqrshlq(uint8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrnbq_m_n_s16))) int8x16_t vqrshrnbq_m_n_s16(int8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrnbq_m_n_s16))) int8x16_t vqrshrnbq_m(int8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrnbq_m_n_s32))) int16x8_t vqrshrnbq_m_n_s32(int16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrnbq_m_n_s32))) int16x8_t vqrshrnbq_m(int16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrnbq_m_n_u16))) uint8x16_t vqrshrnbq_m_n_u16(uint8x16_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrnbq_m_n_u16))) uint8x16_t vqrshrnbq_m(uint8x16_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrnbq_m_n_u32))) uint16x8_t vqrshrnbq_m_n_u32(uint16x8_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrnbq_m_n_u32))) uint16x8_t vqrshrnbq_m(uint16x8_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrnbq_n_s16))) int8x16_t vqrshrnbq_n_s16(int8x16_t, int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrnbq_n_s16))) int8x16_t vqrshrnbq(int8x16_t, int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrnbq_n_s32))) int16x8_t vqrshrnbq_n_s32(int16x8_t, int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrnbq_n_s32))) int16x8_t vqrshrnbq(int16x8_t, int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrnbq_n_u16))) uint8x16_t vqrshrnbq_n_u16(uint8x16_t, uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrnbq_n_u16))) uint8x16_t vqrshrnbq(uint8x16_t, uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrnbq_n_u32))) uint16x8_t vqrshrnbq_n_u32(uint16x8_t, uint32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrnbq_n_u32))) uint16x8_t vqrshrnbq(uint16x8_t, uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrntq_m_n_s16))) int8x16_t vqrshrntq_m_n_s16(int8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrntq_m_n_s16))) int8x16_t vqrshrntq_m(int8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrntq_m_n_s32))) int16x8_t vqrshrntq_m_n_s32(int16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrntq_m_n_s32))) int16x8_t vqrshrntq_m(int16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrntq_m_n_u16))) uint8x16_t vqrshrntq_m_n_u16(uint8x16_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrntq_m_n_u16))) uint8x16_t vqrshrntq_m(uint8x16_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrntq_m_n_u32))) uint16x8_t vqrshrntq_m_n_u32(uint16x8_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrntq_m_n_u32))) uint16x8_t vqrshrntq_m(uint16x8_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrntq_n_s16))) int8x16_t vqrshrntq_n_s16(int8x16_t, int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrntq_n_s16))) int8x16_t vqrshrntq(int8x16_t, int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrntq_n_s32))) int16x8_t vqrshrntq_n_s32(int16x8_t, int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrntq_n_s32))) int16x8_t vqrshrntq(int16x8_t, int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrntq_n_u16))) uint8x16_t vqrshrntq_n_u16(uint8x16_t, uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrntq_n_u16))) uint8x16_t vqrshrntq(uint8x16_t, uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrntq_n_u32))) uint16x8_t vqrshrntq_n_u32(uint16x8_t, uint32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrntq_n_u32))) uint16x8_t vqrshrntq(uint16x8_t, uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrunbq_m_n_s16))) uint8x16_t vqrshrunbq_m_n_s16(uint8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrunbq_m_n_s16))) uint8x16_t vqrshrunbq_m(uint8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrunbq_m_n_s32))) uint16x8_t vqrshrunbq_m_n_s32(uint16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrunbq_m_n_s32))) uint16x8_t vqrshrunbq_m(uint16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrunbq_n_s16))) uint8x16_t vqrshrunbq_n_s16(uint8x16_t, int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrunbq_n_s16))) uint8x16_t vqrshrunbq(uint8x16_t, int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrunbq_n_s32))) uint16x8_t vqrshrunbq_n_s32(uint16x8_t, int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrunbq_n_s32))) uint16x8_t vqrshrunbq(uint16x8_t, int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshruntq_m_n_s16))) uint8x16_t vqrshruntq_m_n_s16(uint8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshruntq_m_n_s16))) uint8x16_t vqrshruntq_m(uint8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshruntq_m_n_s32))) uint16x8_t vqrshruntq_m_n_s32(uint16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshruntq_m_n_s32))) uint16x8_t vqrshruntq_m(uint16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshruntq_n_s16))) uint8x16_t vqrshruntq_n_s16(uint8x16_t, int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshruntq_n_s16))) uint8x16_t vqrshruntq(uint8x16_t, int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshruntq_n_s32))) uint16x8_t vqrshruntq_n_s32(uint16x8_t, int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshruntq_n_s32))) uint16x8_t vqrshruntq(uint16x8_t, int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_n_s16))) int16x8_t vqshlq_m_n_s16(int16x8_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_n_s16))) int16x8_t vqshlq_m_n(int16x8_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_n_s32))) int32x4_t vqshlq_m_n_s32(int32x4_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_n_s32))) int32x4_t vqshlq_m_n(int32x4_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_n_s8))) int8x16_t vqshlq_m_n_s8(int8x16_t, int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_n_s8))) int8x16_t vqshlq_m_n(int8x16_t, int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_n_u16))) uint16x8_t vqshlq_m_n_u16(uint16x8_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_n_u16))) uint16x8_t vqshlq_m_n(uint16x8_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_n_u32))) uint32x4_t vqshlq_m_n_u32(uint32x4_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_n_u32))) uint32x4_t vqshlq_m_n(uint32x4_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_n_u8))) uint8x16_t vqshlq_m_n_u8(uint8x16_t, uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_n_u8))) uint8x16_t vqshlq_m_n(uint8x16_t, uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_r_s16))) int16x8_t vqshlq_m_r_s16(int16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_r_s16))) int16x8_t vqshlq_m_r(int16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_r_s32))) int32x4_t vqshlq_m_r_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_r_s32))) int32x4_t vqshlq_m_r(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_r_s8))) int8x16_t vqshlq_m_r_s8(int8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_r_s8))) int8x16_t vqshlq_m_r(int8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_r_u16))) uint16x8_t vqshlq_m_r_u16(uint16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_r_u16))) uint16x8_t vqshlq_m_r(uint16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_r_u32))) uint32x4_t vqshlq_m_r_u32(uint32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_r_u32))) uint32x4_t vqshlq_m_r(uint32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_r_u8))) uint8x16_t vqshlq_m_r_u8(uint8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_r_u8))) uint8x16_t vqshlq_m_r(uint8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_s16))) int16x8_t vqshlq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_s16))) int16x8_t vqshlq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_s32))) int32x4_t vqshlq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_s32))) int32x4_t vqshlq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_s8))) int8x16_t vqshlq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_s8))) int8x16_t vqshlq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_u16))) uint16x8_t vqshlq_m_u16(uint16x8_t, uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_u16))) uint16x8_t vqshlq_m(uint16x8_t, uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_u32))) uint32x4_t vqshlq_m_u32(uint32x4_t, uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_u32))) uint32x4_t vqshlq_m(uint32x4_t, uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_u8))) uint8x16_t vqshlq_m_u8(uint8x16_t, uint8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_u8))) uint8x16_t vqshlq_m(uint8x16_t, uint8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_n_s16))) int16x8_t vqshlq_n_s16(int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_n_s16))) int16x8_t vqshlq_n(int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_n_s32))) int32x4_t vqshlq_n_s32(int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_n_s32))) int32x4_t vqshlq_n(int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_n_s8))) int8x16_t vqshlq_n_s8(int8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_n_s8))) int8x16_t vqshlq_n(int8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_n_u16))) uint16x8_t vqshlq_n_u16(uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_n_u16))) uint16x8_t vqshlq_n(uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_n_u32))) uint32x4_t vqshlq_n_u32(uint32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_n_u32))) uint32x4_t vqshlq_n(uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_n_u8))) uint8x16_t vqshlq_n_u8(uint8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_n_u8))) uint8x16_t vqshlq_n(uint8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_r_s16))) int16x8_t vqshlq_r_s16(int16x8_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_r_s16))) int16x8_t vqshlq_r(int16x8_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_r_s32))) int32x4_t vqshlq_r_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_r_s32))) int32x4_t vqshlq_r(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_r_s8))) int8x16_t vqshlq_r_s8(int8x16_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_r_s8))) int8x16_t vqshlq_r(int8x16_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_r_u16))) uint16x8_t vqshlq_r_u16(uint16x8_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_r_u16))) uint16x8_t vqshlq_r(uint16x8_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_r_u32))) uint32x4_t vqshlq_r_u32(uint32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_r_u32))) uint32x4_t vqshlq_r(uint32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_r_u8))) uint8x16_t vqshlq_r_u8(uint8x16_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_r_u8))) uint8x16_t vqshlq_r(uint8x16_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_s16))) int16x8_t vqshlq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_s16))) int16x8_t vqshlq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_s32))) int32x4_t vqshlq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_s32))) int32x4_t vqshlq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_s8))) int8x16_t vqshlq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_s8))) int8x16_t vqshlq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_u16))) uint16x8_t vqshlq_u16(uint16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_u16))) uint16x8_t vqshlq(uint16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_u32))) uint32x4_t vqshlq_u32(uint32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_u32))) uint32x4_t vqshlq(uint32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_u8))) uint8x16_t vqshlq_u8(uint8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_u8))) uint8x16_t vqshlq(uint8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshluq_m_n_s16))) uint16x8_t vqshluq_m_n_s16(uint16x8_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshluq_m_n_s16))) uint16x8_t vqshluq_m(uint16x8_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshluq_m_n_s32))) uint32x4_t vqshluq_m_n_s32(uint32x4_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshluq_m_n_s32))) uint32x4_t vqshluq_m(uint32x4_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshluq_m_n_s8))) uint8x16_t vqshluq_m_n_s8(uint8x16_t, int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshluq_m_n_s8))) uint8x16_t vqshluq_m(uint8x16_t, int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshluq_n_s16))) uint16x8_t vqshluq_n_s16(int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshluq_n_s16))) uint16x8_t vqshluq(int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshluq_n_s32))) uint32x4_t vqshluq_n_s32(int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshluq_n_s32))) uint32x4_t vqshluq(int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshluq_n_s8))) uint8x16_t vqshluq_n_s8(int8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshluq_n_s8))) uint8x16_t vqshluq(int8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrnbq_m_n_s16))) int8x16_t vqshrnbq_m_n_s16(int8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrnbq_m_n_s16))) int8x16_t vqshrnbq_m(int8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrnbq_m_n_s32))) int16x8_t vqshrnbq_m_n_s32(int16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrnbq_m_n_s32))) int16x8_t vqshrnbq_m(int16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrnbq_m_n_u16))) uint8x16_t vqshrnbq_m_n_u16(uint8x16_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrnbq_m_n_u16))) uint8x16_t vqshrnbq_m(uint8x16_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrnbq_m_n_u32))) uint16x8_t vqshrnbq_m_n_u32(uint16x8_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrnbq_m_n_u32))) uint16x8_t vqshrnbq_m(uint16x8_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrnbq_n_s16))) int8x16_t vqshrnbq_n_s16(int8x16_t, int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrnbq_n_s16))) int8x16_t vqshrnbq(int8x16_t, int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrnbq_n_s32))) int16x8_t vqshrnbq_n_s32(int16x8_t, int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrnbq_n_s32))) int16x8_t vqshrnbq(int16x8_t, int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrnbq_n_u16))) uint8x16_t vqshrnbq_n_u16(uint8x16_t, uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrnbq_n_u16))) uint8x16_t vqshrnbq(uint8x16_t, uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrnbq_n_u32))) uint16x8_t vqshrnbq_n_u32(uint16x8_t, uint32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrnbq_n_u32))) uint16x8_t vqshrnbq(uint16x8_t, uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrntq_m_n_s16))) int8x16_t vqshrntq_m_n_s16(int8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrntq_m_n_s16))) int8x16_t vqshrntq_m(int8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrntq_m_n_s32))) int16x8_t vqshrntq_m_n_s32(int16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrntq_m_n_s32))) int16x8_t vqshrntq_m(int16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrntq_m_n_u16))) uint8x16_t vqshrntq_m_n_u16(uint8x16_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrntq_m_n_u16))) uint8x16_t vqshrntq_m(uint8x16_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrntq_m_n_u32))) uint16x8_t vqshrntq_m_n_u32(uint16x8_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrntq_m_n_u32))) uint16x8_t vqshrntq_m(uint16x8_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrntq_n_s16))) int8x16_t vqshrntq_n_s16(int8x16_t, int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrntq_n_s16))) int8x16_t vqshrntq(int8x16_t, int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrntq_n_s32))) int16x8_t vqshrntq_n_s32(int16x8_t, int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrntq_n_s32))) int16x8_t vqshrntq(int16x8_t, int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrntq_n_u16))) uint8x16_t vqshrntq_n_u16(uint8x16_t, uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrntq_n_u16))) uint8x16_t vqshrntq(uint8x16_t, uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrntq_n_u32))) uint16x8_t vqshrntq_n_u32(uint16x8_t, uint32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrntq_n_u32))) uint16x8_t vqshrntq(uint16x8_t, uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrunbq_m_n_s16))) uint8x16_t vqshrunbq_m_n_s16(uint8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrunbq_m_n_s16))) uint8x16_t vqshrunbq_m(uint8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrunbq_m_n_s32))) uint16x8_t vqshrunbq_m_n_s32(uint16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrunbq_m_n_s32))) uint16x8_t vqshrunbq_m(uint16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrunbq_n_s16))) uint8x16_t vqshrunbq_n_s16(uint8x16_t, int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrunbq_n_s16))) uint8x16_t vqshrunbq(uint8x16_t, int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrunbq_n_s32))) uint16x8_t vqshrunbq_n_s32(uint16x8_t, int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrunbq_n_s32))) uint16x8_t vqshrunbq(uint16x8_t, int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshruntq_m_n_s16))) uint8x16_t vqshruntq_m_n_s16(uint8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshruntq_m_n_s16))) uint8x16_t vqshruntq_m(uint8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshruntq_m_n_s32))) uint16x8_t vqshruntq_m_n_s32(uint16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshruntq_m_n_s32))) uint16x8_t vqshruntq_m(uint16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshruntq_n_s16))) uint8x16_t vqshruntq_n_s16(uint8x16_t, int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshruntq_n_s16))) uint8x16_t vqshruntq(uint8x16_t, int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshruntq_n_s32))) uint16x8_t vqshruntq_n_s32(uint16x8_t, int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshruntq_n_s32))) uint16x8_t vqshruntq(uint16x8_t, int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_n_s16))) int16x8_t vqsubq_m_n_s16(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_n_s16))) int16x8_t vqsubq_m(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_n_s32))) int32x4_t vqsubq_m_n_s32(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_n_s32))) int32x4_t vqsubq_m(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_n_s8))) int8x16_t vqsubq_m_n_s8(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_n_s8))) int8x16_t vqsubq_m(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_n_u16))) uint16x8_t vqsubq_m_n_u16(uint16x8_t, uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_n_u16))) uint16x8_t vqsubq_m(uint16x8_t, uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_n_u32))) uint32x4_t vqsubq_m_n_u32(uint32x4_t, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_n_u32))) uint32x4_t vqsubq_m(uint32x4_t, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_n_u8))) uint8x16_t vqsubq_m_n_u8(uint8x16_t, uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_n_u8))) uint8x16_t vqsubq_m(uint8x16_t, uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_s16))) int16x8_t vqsubq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_s16))) int16x8_t vqsubq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_s32))) int32x4_t vqsubq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_s32))) int32x4_t vqsubq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_s8))) int8x16_t vqsubq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_s8))) int8x16_t vqsubq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_u16))) uint16x8_t vqsubq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_u16))) uint16x8_t vqsubq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_u32))) uint32x4_t vqsubq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_u32))) uint32x4_t vqsubq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_u8))) uint8x16_t vqsubq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_u8))) uint8x16_t vqsubq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_n_s16))) int16x8_t vqsubq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_n_s16))) int16x8_t vqsubq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_n_s32))) int32x4_t vqsubq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_n_s32))) int32x4_t vqsubq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_n_s8))) int8x16_t vqsubq_n_s8(int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_n_s8))) int8x16_t vqsubq(int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_n_u16))) uint16x8_t vqsubq_n_u16(uint16x8_t, uint16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_n_u16))) uint16x8_t vqsubq(uint16x8_t, uint16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_n_u32))) uint32x4_t vqsubq_n_u32(uint32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_n_u32))) uint32x4_t vqsubq(uint32x4_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_n_u8))) uint8x16_t vqsubq_n_u8(uint8x16_t, uint8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_n_u8))) uint8x16_t vqsubq(uint8x16_t, uint8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_s16))) int16x8_t vqsubq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_s16))) int16x8_t vqsubq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_s32))) int32x4_t vqsubq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_s32))) int32x4_t vqsubq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_s8))) int8x16_t vqsubq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_s8))) int8x16_t vqsubq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_u16))) uint16x8_t vqsubq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_u16))) uint16x8_t vqsubq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_u32))) uint32x4_t vqsubq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_u32))) uint32x4_t vqsubq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_u8))) uint8x16_t vqsubq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_u8))) uint8x16_t vqsubq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_s32))) int16x8_t vreinterpretq_s16_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_s32))) int16x8_t vreinterpretq_s16(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_s64))) int16x8_t vreinterpretq_s16_s64(int64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_s64))) int16x8_t vreinterpretq_s16(int64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_s8))) int16x8_t vreinterpretq_s16_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_s8))) int16x8_t vreinterpretq_s16(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_u16))) int16x8_t vreinterpretq_s16_u16(uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_u16))) int16x8_t vreinterpretq_s16(uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_u32))) int16x8_t vreinterpretq_s16_u32(uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_u32))) int16x8_t vreinterpretq_s16(uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_u64))) int16x8_t vreinterpretq_s16_u64(uint64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_u64))) int16x8_t vreinterpretq_s16(uint64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_u8))) int16x8_t vreinterpretq_s16_u8(uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_u8))) int16x8_t vreinterpretq_s16(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_s16))) int32x4_t vreinterpretq_s32_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_s16))) int32x4_t vreinterpretq_s32(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_s64))) int32x4_t vreinterpretq_s32_s64(int64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_s64))) int32x4_t vreinterpretq_s32(int64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_s8))) int32x4_t vreinterpretq_s32_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_s8))) int32x4_t vreinterpretq_s32(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_u16))) int32x4_t vreinterpretq_s32_u16(uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_u16))) int32x4_t vreinterpretq_s32(uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_u32))) int32x4_t vreinterpretq_s32_u32(uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_u32))) int32x4_t vreinterpretq_s32(uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_u64))) int32x4_t vreinterpretq_s32_u64(uint64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_u64))) int32x4_t vreinterpretq_s32(uint64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_u8))) int32x4_t vreinterpretq_s32_u8(uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_u8))) int32x4_t vreinterpretq_s32(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_s16))) int64x2_t vreinterpretq_s64_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_s16))) int64x2_t vreinterpretq_s64(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_s32))) int64x2_t vreinterpretq_s64_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_s32))) int64x2_t vreinterpretq_s64(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_s8))) int64x2_t vreinterpretq_s64_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_s8))) int64x2_t vreinterpretq_s64(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_u16))) int64x2_t vreinterpretq_s64_u16(uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_u16))) int64x2_t vreinterpretq_s64(uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_u32))) int64x2_t vreinterpretq_s64_u32(uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_u32))) int64x2_t vreinterpretq_s64(uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_u64))) int64x2_t vreinterpretq_s64_u64(uint64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_u64))) int64x2_t vreinterpretq_s64(uint64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_u8))) int64x2_t vreinterpretq_s64_u8(uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_u8))) int64x2_t vreinterpretq_s64(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_s16))) int8x16_t vreinterpretq_s8_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_s16))) int8x16_t vreinterpretq_s8(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_s32))) int8x16_t vreinterpretq_s8_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_s32))) int8x16_t vreinterpretq_s8(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_s64))) int8x16_t vreinterpretq_s8_s64(int64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_s64))) int8x16_t vreinterpretq_s8(int64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_u16))) int8x16_t vreinterpretq_s8_u16(uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_u16))) int8x16_t vreinterpretq_s8(uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_u32))) int8x16_t vreinterpretq_s8_u32(uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_u32))) int8x16_t vreinterpretq_s8(uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_u64))) int8x16_t vreinterpretq_s8_u64(uint64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_u64))) int8x16_t vreinterpretq_s8(uint64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_u8))) int8x16_t vreinterpretq_s8_u8(uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_u8))) int8x16_t vreinterpretq_s8(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_s16))) uint16x8_t vreinterpretq_u16_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_s16))) uint16x8_t vreinterpretq_u16(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_s32))) uint16x8_t vreinterpretq_u16_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_s32))) uint16x8_t vreinterpretq_u16(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_s64))) uint16x8_t vreinterpretq_u16_s64(int64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_s64))) uint16x8_t vreinterpretq_u16(int64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_s8))) uint16x8_t vreinterpretq_u16_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_s8))) uint16x8_t vreinterpretq_u16(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_u32))) uint16x8_t vreinterpretq_u16_u32(uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_u32))) uint16x8_t vreinterpretq_u16(uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_u64))) uint16x8_t vreinterpretq_u16_u64(uint64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_u64))) uint16x8_t vreinterpretq_u16(uint64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_u8))) uint16x8_t vreinterpretq_u16_u8(uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_u8))) uint16x8_t vreinterpretq_u16(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_s16))) uint32x4_t vreinterpretq_u32_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_s16))) uint32x4_t vreinterpretq_u32(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_s32))) uint32x4_t vreinterpretq_u32_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_s32))) uint32x4_t vreinterpretq_u32(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_s64))) uint32x4_t vreinterpretq_u32_s64(int64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_s64))) uint32x4_t vreinterpretq_u32(int64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_s8))) uint32x4_t vreinterpretq_u32_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_s8))) uint32x4_t vreinterpretq_u32(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_u16))) uint32x4_t vreinterpretq_u32_u16(uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_u16))) uint32x4_t vreinterpretq_u32(uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_u64))) uint32x4_t vreinterpretq_u32_u64(uint64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_u64))) uint32x4_t vreinterpretq_u32(uint64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_u8))) uint32x4_t vreinterpretq_u32_u8(uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_u8))) uint32x4_t vreinterpretq_u32(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_s16))) uint64x2_t vreinterpretq_u64_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_s16))) uint64x2_t vreinterpretq_u64(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_s32))) uint64x2_t vreinterpretq_u64_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_s32))) uint64x2_t vreinterpretq_u64(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_s64))) uint64x2_t vreinterpretq_u64_s64(int64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_s64))) uint64x2_t vreinterpretq_u64(int64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_s8))) uint64x2_t vreinterpretq_u64_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_s8))) uint64x2_t vreinterpretq_u64(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_u16))) uint64x2_t vreinterpretq_u64_u16(uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_u16))) uint64x2_t vreinterpretq_u64(uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_u32))) uint64x2_t vreinterpretq_u64_u32(uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_u32))) uint64x2_t vreinterpretq_u64(uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_u8))) uint64x2_t vreinterpretq_u64_u8(uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_u8))) uint64x2_t vreinterpretq_u64(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_s16))) uint8x16_t vreinterpretq_u8_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_s16))) uint8x16_t vreinterpretq_u8(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_s32))) uint8x16_t vreinterpretq_u8_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_s32))) uint8x16_t vreinterpretq_u8(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_s64))) uint8x16_t vreinterpretq_u8_s64(int64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_s64))) uint8x16_t vreinterpretq_u8(int64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_s8))) uint8x16_t vreinterpretq_u8_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_s8))) uint8x16_t vreinterpretq_u8(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_u16))) uint8x16_t vreinterpretq_u8_u16(uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_u16))) uint8x16_t vreinterpretq_u8(uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_u32))) uint8x16_t vreinterpretq_u8_u32(uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_u32))) uint8x16_t vreinterpretq_u8(uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_u64))) uint8x16_t vreinterpretq_u8_u64(uint64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_u64))) uint8x16_t vreinterpretq_u8(uint64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev16q_m_s8))) int8x16_t vrev16q_m_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev16q_m_s8))) int8x16_t vrev16q_m(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev16q_m_u8))) uint8x16_t vrev16q_m_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev16q_m_u8))) uint8x16_t vrev16q_m(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev16q_s8))) int8x16_t vrev16q_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev16q_s8))) int8x16_t vrev16q(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev16q_u8))) uint8x16_t vrev16q_u8(uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev16q_u8))) uint8x16_t vrev16q(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev16q_x_s8))) int8x16_t vrev16q_x_s8(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev16q_x_s8))) int8x16_t vrev16q_x(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev16q_x_u8))) uint8x16_t vrev16q_x_u8(uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev16q_x_u8))) uint8x16_t vrev16q_x(uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_m_s16))) int16x8_t vrev32q_m_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_m_s16))) int16x8_t vrev32q_m(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_m_s8))) int8x16_t vrev32q_m_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_m_s8))) int8x16_t vrev32q_m(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_m_u16))) uint16x8_t vrev32q_m_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_m_u16))) uint16x8_t vrev32q_m(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_m_u8))) uint8x16_t vrev32q_m_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_m_u8))) uint8x16_t vrev32q_m(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_s16))) int16x8_t vrev32q_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_s16))) int16x8_t vrev32q(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_s8))) int8x16_t vrev32q_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_s8))) int8x16_t vrev32q(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_u16))) uint16x8_t vrev32q_u16(uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_u16))) uint16x8_t vrev32q(uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_u8))) uint8x16_t vrev32q_u8(uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_u8))) uint8x16_t vrev32q(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_x_s16))) int16x8_t vrev32q_x_s16(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_x_s16))) int16x8_t vrev32q_x(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_x_s8))) int8x16_t vrev32q_x_s8(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_x_s8))) int8x16_t vrev32q_x(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_x_u16))) uint16x8_t vrev32q_x_u16(uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_x_u16))) uint16x8_t vrev32q_x(uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_x_u8))) uint8x16_t vrev32q_x_u8(uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_x_u8))) uint8x16_t vrev32q_x(uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_m_s16))) int16x8_t vrev64q_m_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_m_s16))) int16x8_t vrev64q_m(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_m_s32))) int32x4_t vrev64q_m_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_m_s32))) int32x4_t vrev64q_m(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_m_s8))) int8x16_t vrev64q_m_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_m_s8))) int8x16_t vrev64q_m(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_m_u16))) uint16x8_t vrev64q_m_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_m_u16))) uint16x8_t vrev64q_m(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_m_u32))) uint32x4_t vrev64q_m_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_m_u32))) uint32x4_t vrev64q_m(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_m_u8))) uint8x16_t vrev64q_m_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_m_u8))) uint8x16_t vrev64q_m(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_s16))) int16x8_t vrev64q_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_s16))) int16x8_t vrev64q(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_s32))) int32x4_t vrev64q_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_s32))) int32x4_t vrev64q(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_s8))) int8x16_t vrev64q_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_s8))) int8x16_t vrev64q(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_u16))) uint16x8_t vrev64q_u16(uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_u16))) uint16x8_t vrev64q(uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_u32))) uint32x4_t vrev64q_u32(uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_u32))) uint32x4_t vrev64q(uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_u8))) uint8x16_t vrev64q_u8(uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_u8))) uint8x16_t vrev64q(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_x_s16))) int16x8_t vrev64q_x_s16(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_x_s16))) int16x8_t vrev64q_x(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_x_s32))) int32x4_t vrev64q_x_s32(int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_x_s32))) int32x4_t vrev64q_x(int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_x_s8))) int8x16_t vrev64q_x_s8(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_x_s8))) int8x16_t vrev64q_x(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_x_u16))) uint16x8_t vrev64q_x_u16(uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_x_u16))) uint16x8_t vrev64q_x(uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_x_u32))) uint32x4_t vrev64q_x_u32(uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_x_u32))) uint32x4_t vrev64q_x(uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_x_u8))) uint8x16_t vrev64q_x_u8(uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_x_u8))) uint8x16_t vrev64q_x(uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_m_s16))) int16x8_t vrhaddq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_m_s16))) int16x8_t vrhaddq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_m_s32))) int32x4_t vrhaddq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_m_s32))) int32x4_t vrhaddq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_m_s8))) int8x16_t vrhaddq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_m_s8))) int8x16_t vrhaddq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_m_u16))) uint16x8_t vrhaddq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_m_u16))) uint16x8_t vrhaddq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_m_u32))) uint32x4_t vrhaddq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_m_u32))) uint32x4_t vrhaddq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_m_u8))) uint8x16_t vrhaddq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_m_u8))) uint8x16_t vrhaddq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_s16))) int16x8_t vrhaddq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_s16))) int16x8_t vrhaddq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_s32))) int32x4_t vrhaddq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_s32))) int32x4_t vrhaddq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_s8))) int8x16_t vrhaddq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_s8))) int8x16_t vrhaddq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_u16))) uint16x8_t vrhaddq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_u16))) uint16x8_t vrhaddq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_u32))) uint32x4_t vrhaddq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_u32))) uint32x4_t vrhaddq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_u8))) uint8x16_t vrhaddq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_u8))) uint8x16_t vrhaddq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_x_s16))) int16x8_t vrhaddq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_x_s16))) int16x8_t vrhaddq_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_x_s32))) int32x4_t vrhaddq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_x_s32))) int32x4_t vrhaddq_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_x_s8))) int8x16_t vrhaddq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_x_s8))) int8x16_t vrhaddq_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_x_u16))) uint16x8_t vrhaddq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_x_u16))) uint16x8_t vrhaddq_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_x_u32))) uint32x4_t vrhaddq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_x_u32))) uint32x4_t vrhaddq_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_x_u8))) uint8x16_t vrhaddq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_x_u8))) uint8x16_t vrhaddq_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhaq_p_s32))) int64_t vrmlaldavhaq_p_s32(int64_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhaq_p_s32))) int64_t vrmlaldavhaq_p(int64_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhaq_p_u32))) uint64_t vrmlaldavhaq_p_u32(uint64_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhaq_p_u32))) uint64_t vrmlaldavhaq_p(uint64_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhaq_s32))) int64_t vrmlaldavhaq_s32(int64_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhaq_s32))) int64_t vrmlaldavhaq(int64_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhaq_u32))) uint64_t vrmlaldavhaq_u32(uint64_t, uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhaq_u32))) uint64_t vrmlaldavhaq(uint64_t, uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhaxq_p_s32))) int64_t vrmlaldavhaxq_p_s32(int64_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhaxq_p_s32))) int64_t vrmlaldavhaxq_p(int64_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhaxq_s32))) int64_t vrmlaldavhaxq_s32(int64_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhaxq_s32))) int64_t vrmlaldavhaxq(int64_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhq_p_s32))) int64_t vrmlaldavhq_p_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhq_p_s32))) int64_t vrmlaldavhq_p(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhq_p_u32))) uint64_t vrmlaldavhq_p_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhq_p_u32))) uint64_t vrmlaldavhq_p(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhq_s32))) int64_t vrmlaldavhq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhq_s32))) int64_t vrmlaldavhq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhq_u32))) uint64_t vrmlaldavhq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhq_u32))) uint64_t vrmlaldavhq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhxq_p_s32))) int64_t vrmlaldavhxq_p_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhxq_p_s32))) int64_t vrmlaldavhxq_p(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhxq_s32))) int64_t vrmlaldavhxq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhxq_s32))) int64_t vrmlaldavhxq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlsldavhaq_p_s32))) int64_t vrmlsldavhaq_p_s32(int64_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlsldavhaq_p_s32))) int64_t vrmlsldavhaq_p(int64_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlsldavhaq_s32))) int64_t vrmlsldavhaq_s32(int64_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlsldavhaq_s32))) int64_t vrmlsldavhaq(int64_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlsldavhaxq_p_s32))) int64_t vrmlsldavhaxq_p_s32(int64_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlsldavhaxq_p_s32))) int64_t vrmlsldavhaxq_p(int64_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlsldavhaxq_s32))) int64_t vrmlsldavhaxq_s32(int64_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlsldavhaxq_s32))) int64_t vrmlsldavhaxq(int64_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlsldavhq_p_s32))) int64_t vrmlsldavhq_p_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlsldavhq_p_s32))) int64_t vrmlsldavhq_p(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlsldavhq_s32))) int64_t vrmlsldavhq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlsldavhq_s32))) int64_t vrmlsldavhq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlsldavhxq_p_s32))) int64_t vrmlsldavhxq_p_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlsldavhxq_p_s32))) int64_t vrmlsldavhxq_p(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlsldavhxq_s32))) int64_t vrmlsldavhxq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlsldavhxq_s32))) int64_t vrmlsldavhxq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_m_s16))) int16x8_t vrmulhq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_m_s16))) int16x8_t vrmulhq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_m_s32))) int32x4_t vrmulhq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_m_s32))) int32x4_t vrmulhq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_m_s8))) int8x16_t vrmulhq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_m_s8))) int8x16_t vrmulhq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_m_u16))) uint16x8_t vrmulhq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_m_u16))) uint16x8_t vrmulhq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_m_u32))) uint32x4_t vrmulhq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_m_u32))) uint32x4_t vrmulhq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_m_u8))) uint8x16_t vrmulhq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_m_u8))) uint8x16_t vrmulhq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_s16))) int16x8_t vrmulhq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_s16))) int16x8_t vrmulhq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_s32))) int32x4_t vrmulhq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_s32))) int32x4_t vrmulhq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_s8))) int8x16_t vrmulhq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_s8))) int8x16_t vrmulhq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_u16))) uint16x8_t vrmulhq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_u16))) uint16x8_t vrmulhq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_u32))) uint32x4_t vrmulhq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_u32))) uint32x4_t vrmulhq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_u8))) uint8x16_t vrmulhq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_u8))) uint8x16_t vrmulhq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_x_s16))) int16x8_t vrmulhq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_x_s16))) int16x8_t vrmulhq_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_x_s32))) int32x4_t vrmulhq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_x_s32))) int32x4_t vrmulhq_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_x_s8))) int8x16_t vrmulhq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_x_s8))) int8x16_t vrmulhq_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_x_u16))) uint16x8_t vrmulhq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_x_u16))) uint16x8_t vrmulhq_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_x_u32))) uint32x4_t vrmulhq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_x_u32))) uint32x4_t vrmulhq_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_x_u8))) uint8x16_t vrmulhq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_x_u8))) uint8x16_t vrmulhq_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_n_s16))) int16x8_t vrshlq_m_n_s16(int16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_n_s16))) int16x8_t vrshlq_m_n(int16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_n_s32))) int32x4_t vrshlq_m_n_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_n_s32))) int32x4_t vrshlq_m_n(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_n_s8))) int8x16_t vrshlq_m_n_s8(int8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_n_s8))) int8x16_t vrshlq_m_n(int8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_n_u16))) uint16x8_t vrshlq_m_n_u16(uint16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_n_u16))) uint16x8_t vrshlq_m_n(uint16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_n_u32))) uint32x4_t vrshlq_m_n_u32(uint32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_n_u32))) uint32x4_t vrshlq_m_n(uint32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_n_u8))) uint8x16_t vrshlq_m_n_u8(uint8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_n_u8))) uint8x16_t vrshlq_m_n(uint8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_s16))) int16x8_t vrshlq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_s16))) int16x8_t vrshlq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_s32))) int32x4_t vrshlq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_s32))) int32x4_t vrshlq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_s8))) int8x16_t vrshlq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_s8))) int8x16_t vrshlq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_u16))) uint16x8_t vrshlq_m_u16(uint16x8_t, uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_u16))) uint16x8_t vrshlq_m(uint16x8_t, uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_u32))) uint32x4_t vrshlq_m_u32(uint32x4_t, uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_u32))) uint32x4_t vrshlq_m(uint32x4_t, uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_u8))) uint8x16_t vrshlq_m_u8(uint8x16_t, uint8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_u8))) uint8x16_t vrshlq_m(uint8x16_t, uint8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_n_s16))) int16x8_t vrshlq_n_s16(int16x8_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_n_s16))) int16x8_t vrshlq(int16x8_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_n_s32))) int32x4_t vrshlq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_n_s32))) int32x4_t vrshlq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_n_s8))) int8x16_t vrshlq_n_s8(int8x16_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_n_s8))) int8x16_t vrshlq(int8x16_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_n_u16))) uint16x8_t vrshlq_n_u16(uint16x8_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_n_u16))) uint16x8_t vrshlq(uint16x8_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_n_u32))) uint32x4_t vrshlq_n_u32(uint32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_n_u32))) uint32x4_t vrshlq(uint32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_n_u8))) uint8x16_t vrshlq_n_u8(uint8x16_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_n_u8))) uint8x16_t vrshlq(uint8x16_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_s16))) int16x8_t vrshlq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_s16))) int16x8_t vrshlq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_s32))) int32x4_t vrshlq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_s32))) int32x4_t vrshlq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_s8))) int8x16_t vrshlq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_s8))) int8x16_t vrshlq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_u16))) uint16x8_t vrshlq_u16(uint16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_u16))) uint16x8_t vrshlq(uint16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_u32))) uint32x4_t vrshlq_u32(uint32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_u32))) uint32x4_t vrshlq(uint32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_u8))) uint8x16_t vrshlq_u8(uint8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_u8))) uint8x16_t vrshlq(uint8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_x_s16))) int16x8_t vrshlq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_x_s16))) int16x8_t vrshlq_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_x_s32))) int32x4_t vrshlq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_x_s32))) int32x4_t vrshlq_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_x_s8))) int8x16_t vrshlq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_x_s8))) int8x16_t vrshlq_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_x_u16))) uint16x8_t vrshlq_x_u16(uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_x_u16))) uint16x8_t vrshlq_x(uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_x_u32))) uint32x4_t vrshlq_x_u32(uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_x_u32))) uint32x4_t vrshlq_x(uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_x_u8))) uint8x16_t vrshlq_x_u8(uint8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_x_u8))) uint8x16_t vrshlq_x(uint8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrnbq_m_n_s16))) int8x16_t vrshrnbq_m_n_s16(int8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrnbq_m_n_s16))) int8x16_t vrshrnbq_m(int8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrnbq_m_n_s32))) int16x8_t vrshrnbq_m_n_s32(int16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrnbq_m_n_s32))) int16x8_t vrshrnbq_m(int16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrnbq_m_n_u16))) uint8x16_t vrshrnbq_m_n_u16(uint8x16_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrnbq_m_n_u16))) uint8x16_t vrshrnbq_m(uint8x16_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrnbq_m_n_u32))) uint16x8_t vrshrnbq_m_n_u32(uint16x8_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrnbq_m_n_u32))) uint16x8_t vrshrnbq_m(uint16x8_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrnbq_n_s16))) int8x16_t vrshrnbq_n_s16(int8x16_t, int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrnbq_n_s16))) int8x16_t vrshrnbq(int8x16_t, int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrnbq_n_s32))) int16x8_t vrshrnbq_n_s32(int16x8_t, int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrnbq_n_s32))) int16x8_t vrshrnbq(int16x8_t, int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrnbq_n_u16))) uint8x16_t vrshrnbq_n_u16(uint8x16_t, uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrnbq_n_u16))) uint8x16_t vrshrnbq(uint8x16_t, uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrnbq_n_u32))) uint16x8_t vrshrnbq_n_u32(uint16x8_t, uint32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrnbq_n_u32))) uint16x8_t vrshrnbq(uint16x8_t, uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrntq_m_n_s16))) int8x16_t vrshrntq_m_n_s16(int8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrntq_m_n_s16))) int8x16_t vrshrntq_m(int8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrntq_m_n_s32))) int16x8_t vrshrntq_m_n_s32(int16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrntq_m_n_s32))) int16x8_t vrshrntq_m(int16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrntq_m_n_u16))) uint8x16_t vrshrntq_m_n_u16(uint8x16_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrntq_m_n_u16))) uint8x16_t vrshrntq_m(uint8x16_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrntq_m_n_u32))) uint16x8_t vrshrntq_m_n_u32(uint16x8_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrntq_m_n_u32))) uint16x8_t vrshrntq_m(uint16x8_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrntq_n_s16))) int8x16_t vrshrntq_n_s16(int8x16_t, int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrntq_n_s16))) int8x16_t vrshrntq(int8x16_t, int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrntq_n_s32))) int16x8_t vrshrntq_n_s32(int16x8_t, int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrntq_n_s32))) int16x8_t vrshrntq(int16x8_t, int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrntq_n_u16))) uint8x16_t vrshrntq_n_u16(uint8x16_t, uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrntq_n_u16))) uint8x16_t vrshrntq(uint8x16_t, uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrntq_n_u32))) uint16x8_t vrshrntq_n_u32(uint16x8_t, uint32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrntq_n_u32))) uint16x8_t vrshrntq(uint16x8_t, uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_m_n_s16))) int16x8_t vrshrq_m_n_s16(int16x8_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_m_n_s16))) int16x8_t vrshrq_m(int16x8_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_m_n_s32))) int32x4_t vrshrq_m_n_s32(int32x4_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_m_n_s32))) int32x4_t vrshrq_m(int32x4_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_m_n_s8))) int8x16_t vrshrq_m_n_s8(int8x16_t, int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_m_n_s8))) int8x16_t vrshrq_m(int8x16_t, int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_m_n_u16))) uint16x8_t vrshrq_m_n_u16(uint16x8_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_m_n_u16))) uint16x8_t vrshrq_m(uint16x8_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_m_n_u32))) uint32x4_t vrshrq_m_n_u32(uint32x4_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_m_n_u32))) uint32x4_t vrshrq_m(uint32x4_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_m_n_u8))) uint8x16_t vrshrq_m_n_u8(uint8x16_t, uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_m_n_u8))) uint8x16_t vrshrq_m(uint8x16_t, uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_n_s16))) int16x8_t vrshrq_n_s16(int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_n_s16))) int16x8_t vrshrq(int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_n_s32))) int32x4_t vrshrq_n_s32(int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_n_s32))) int32x4_t vrshrq(int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_n_s8))) int8x16_t vrshrq_n_s8(int8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_n_s8))) int8x16_t vrshrq(int8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_n_u16))) uint16x8_t vrshrq_n_u16(uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_n_u16))) uint16x8_t vrshrq(uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_n_u32))) uint32x4_t vrshrq_n_u32(uint32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_n_u32))) uint32x4_t vrshrq(uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_n_u8))) uint8x16_t vrshrq_n_u8(uint8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_n_u8))) uint8x16_t vrshrq(uint8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_x_n_s16))) int16x8_t vrshrq_x_n_s16(int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_x_n_s16))) int16x8_t vrshrq_x(int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_x_n_s32))) int32x4_t vrshrq_x_n_s32(int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_x_n_s32))) int32x4_t vrshrq_x(int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_x_n_s8))) int8x16_t vrshrq_x_n_s8(int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_x_n_s8))) int8x16_t vrshrq_x(int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_x_n_u16))) uint16x8_t vrshrq_x_n_u16(uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_x_n_u16))) uint16x8_t vrshrq_x(uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_x_n_u32))) uint32x4_t vrshrq_x_n_u32(uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_x_n_u32))) uint32x4_t vrshrq_x(uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_x_n_u8))) uint8x16_t vrshrq_x_n_u8(uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_x_n_u8))) uint8x16_t vrshrq_x(uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsbciq_m_s32))) int32x4_t vsbciq_m_s32(int32x4_t, int32x4_t, int32x4_t, unsigned *, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsbciq_m_s32))) int32x4_t vsbciq_m(int32x4_t, int32x4_t, int32x4_t, unsigned *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsbciq_m_u32))) uint32x4_t vsbciq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, unsigned *, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsbciq_m_u32))) uint32x4_t vsbciq_m(uint32x4_t, uint32x4_t, uint32x4_t, unsigned *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsbciq_s32))) int32x4_t vsbciq_s32(int32x4_t, int32x4_t, unsigned *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsbciq_s32))) int32x4_t vsbciq(int32x4_t, int32x4_t, unsigned *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsbciq_u32))) uint32x4_t vsbciq_u32(uint32x4_t, uint32x4_t, unsigned *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsbciq_u32))) uint32x4_t vsbciq(uint32x4_t, uint32x4_t, unsigned *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsbcq_m_s32))) int32x4_t vsbcq_m_s32(int32x4_t, int32x4_t, int32x4_t, unsigned *, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsbcq_m_s32))) int32x4_t vsbcq_m(int32x4_t, int32x4_t, int32x4_t, unsigned *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsbcq_m_u32))) uint32x4_t vsbcq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, unsigned *, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsbcq_m_u32))) uint32x4_t vsbcq_m(uint32x4_t, uint32x4_t, uint32x4_t, unsigned *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsbcq_s32))) int32x4_t vsbcq_s32(int32x4_t, int32x4_t, unsigned *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsbcq_s32))) int32x4_t vsbcq(int32x4_t, int32x4_t, unsigned *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsbcq_u32))) uint32x4_t vsbcq_u32(uint32x4_t, uint32x4_t, unsigned *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsbcq_u32))) uint32x4_t vsbcq(uint32x4_t, uint32x4_t, unsigned *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_s16))) int16x8_t vsetq_lane_s16(int16_t, int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_s16))) int16x8_t vsetq_lane(int16_t, int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_s32))) int32x4_t vsetq_lane_s32(int32_t, int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_s32))) int32x4_t vsetq_lane(int32_t, int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_s64))) int64x2_t vsetq_lane_s64(int64_t, int64x2_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_s64))) int64x2_t vsetq_lane(int64_t, int64x2_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_s8))) int8x16_t vsetq_lane_s8(int8_t, int8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_s8))) int8x16_t vsetq_lane(int8_t, int8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_u16))) uint16x8_t vsetq_lane_u16(uint16_t, uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_u16))) uint16x8_t vsetq_lane(uint16_t, uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_u32))) uint32x4_t vsetq_lane_u32(uint32_t, uint32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_u32))) uint32x4_t vsetq_lane(uint32_t, uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_u64))) uint64x2_t vsetq_lane_u64(uint64_t, uint64x2_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_u64))) uint64x2_t vsetq_lane(uint64_t, uint64x2_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_u8))) uint8x16_t vsetq_lane_u8(uint8_t, uint8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_u8))) uint8x16_t vsetq_lane(uint8_t, uint8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_m_s16))) int16x8_t vshlcq_m_s16(int16x8_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_m_s16))) int16x8_t vshlcq_m(int16x8_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_m_s32))) int32x4_t vshlcq_m_s32(int32x4_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_m_s32))) int32x4_t vshlcq_m(int32x4_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_m_s8))) int8x16_t vshlcq_m_s8(int8x16_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_m_s8))) int8x16_t vshlcq_m(int8x16_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_m_u16))) uint16x8_t vshlcq_m_u16(uint16x8_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_m_u16))) uint16x8_t vshlcq_m(uint16x8_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_m_u32))) uint32x4_t vshlcq_m_u32(uint32x4_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_m_u32))) uint32x4_t vshlcq_m(uint32x4_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_m_u8))) uint8x16_t vshlcq_m_u8(uint8x16_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_m_u8))) uint8x16_t vshlcq_m(uint8x16_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_s16))) int16x8_t vshlcq_s16(int16x8_t, uint32_t *, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_s16))) int16x8_t vshlcq(int16x8_t, uint32_t *, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_s32))) int32x4_t vshlcq_s32(int32x4_t, uint32_t *, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_s32))) int32x4_t vshlcq(int32x4_t, uint32_t *, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_s8))) int8x16_t vshlcq_s8(int8x16_t, uint32_t *, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_s8))) int8x16_t vshlcq(int8x16_t, uint32_t *, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_u16))) uint16x8_t vshlcq_u16(uint16x8_t, uint32_t *, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_u16))) uint16x8_t vshlcq(uint16x8_t, uint32_t *, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_u32))) uint32x4_t vshlcq_u32(uint32x4_t, uint32_t *, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_u32))) uint32x4_t vshlcq(uint32x4_t, uint32_t *, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_u8))) uint8x16_t vshlcq_u8(uint8x16_t, uint32_t *, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_u8))) uint8x16_t vshlcq(uint8x16_t, uint32_t *, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_m_n_s16))) int32x4_t vshllbq_m_n_s16(int32x4_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_m_n_s16))) int32x4_t vshllbq_m(int32x4_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_m_n_s8))) int16x8_t vshllbq_m_n_s8(int16x8_t, int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_m_n_s8))) int16x8_t vshllbq_m(int16x8_t, int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_m_n_u16))) uint32x4_t vshllbq_m_n_u16(uint32x4_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_m_n_u16))) uint32x4_t vshllbq_m(uint32x4_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_m_n_u8))) uint16x8_t vshllbq_m_n_u8(uint16x8_t, uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_m_n_u8))) uint16x8_t vshllbq_m(uint16x8_t, uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_n_s16))) int32x4_t vshllbq_n_s16(int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_n_s16))) int32x4_t vshllbq(int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_n_s8))) int16x8_t vshllbq_n_s8(int8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_n_s8))) int16x8_t vshllbq(int8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_n_u16))) uint32x4_t vshllbq_n_u16(uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_n_u16))) uint32x4_t vshllbq(uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_n_u8))) uint16x8_t vshllbq_n_u8(uint8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_n_u8))) uint16x8_t vshllbq(uint8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_x_n_s16))) int32x4_t vshllbq_x_n_s16(int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_x_n_s16))) int32x4_t vshllbq_x(int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_x_n_s8))) int16x8_t vshllbq_x_n_s8(int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_x_n_s8))) int16x8_t vshllbq_x(int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_x_n_u16))) uint32x4_t vshllbq_x_n_u16(uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_x_n_u16))) uint32x4_t vshllbq_x(uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_x_n_u8))) uint16x8_t vshllbq_x_n_u8(uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_x_n_u8))) uint16x8_t vshllbq_x(uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_m_n_s16))) int32x4_t vshlltq_m_n_s16(int32x4_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_m_n_s16))) int32x4_t vshlltq_m(int32x4_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_m_n_s8))) int16x8_t vshlltq_m_n_s8(int16x8_t, int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_m_n_s8))) int16x8_t vshlltq_m(int16x8_t, int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_m_n_u16))) uint32x4_t vshlltq_m_n_u16(uint32x4_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_m_n_u16))) uint32x4_t vshlltq_m(uint32x4_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_m_n_u8))) uint16x8_t vshlltq_m_n_u8(uint16x8_t, uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_m_n_u8))) uint16x8_t vshlltq_m(uint16x8_t, uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_n_s16))) int32x4_t vshlltq_n_s16(int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_n_s16))) int32x4_t vshlltq(int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_n_s8))) int16x8_t vshlltq_n_s8(int8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_n_s8))) int16x8_t vshlltq(int8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_n_u16))) uint32x4_t vshlltq_n_u16(uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_n_u16))) uint32x4_t vshlltq(uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_n_u8))) uint16x8_t vshlltq_n_u8(uint8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_n_u8))) uint16x8_t vshlltq(uint8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_x_n_s16))) int32x4_t vshlltq_x_n_s16(int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_x_n_s16))) int32x4_t vshlltq_x(int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_x_n_s8))) int16x8_t vshlltq_x_n_s8(int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_x_n_s8))) int16x8_t vshlltq_x(int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_x_n_u16))) uint32x4_t vshlltq_x_n_u16(uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_x_n_u16))) uint32x4_t vshlltq_x(uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_x_n_u8))) uint16x8_t vshlltq_x_n_u8(uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_x_n_u8))) uint16x8_t vshlltq_x(uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_n_s16))) int16x8_t vshlq_m_n_s16(int16x8_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_n_s16))) int16x8_t vshlq_m_n(int16x8_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_n_s32))) int32x4_t vshlq_m_n_s32(int32x4_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_n_s32))) int32x4_t vshlq_m_n(int32x4_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_n_s8))) int8x16_t vshlq_m_n_s8(int8x16_t, int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_n_s8))) int8x16_t vshlq_m_n(int8x16_t, int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_n_u16))) uint16x8_t vshlq_m_n_u16(uint16x8_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_n_u16))) uint16x8_t vshlq_m_n(uint16x8_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_n_u32))) uint32x4_t vshlq_m_n_u32(uint32x4_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_n_u32))) uint32x4_t vshlq_m_n(uint32x4_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_n_u8))) uint8x16_t vshlq_m_n_u8(uint8x16_t, uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_n_u8))) uint8x16_t vshlq_m_n(uint8x16_t, uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_r_s16))) int16x8_t vshlq_m_r_s16(int16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_r_s16))) int16x8_t vshlq_m_r(int16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_r_s32))) int32x4_t vshlq_m_r_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_r_s32))) int32x4_t vshlq_m_r(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_r_s8))) int8x16_t vshlq_m_r_s8(int8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_r_s8))) int8x16_t vshlq_m_r(int8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_r_u16))) uint16x8_t vshlq_m_r_u16(uint16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_r_u16))) uint16x8_t vshlq_m_r(uint16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_r_u32))) uint32x4_t vshlq_m_r_u32(uint32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_r_u32))) uint32x4_t vshlq_m_r(uint32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_r_u8))) uint8x16_t vshlq_m_r_u8(uint8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_r_u8))) uint8x16_t vshlq_m_r(uint8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_s16))) int16x8_t vshlq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_s16))) int16x8_t vshlq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_s32))) int32x4_t vshlq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_s32))) int32x4_t vshlq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_s8))) int8x16_t vshlq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_s8))) int8x16_t vshlq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_u16))) uint16x8_t vshlq_m_u16(uint16x8_t, uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_u16))) uint16x8_t vshlq_m(uint16x8_t, uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_u32))) uint32x4_t vshlq_m_u32(uint32x4_t, uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_u32))) uint32x4_t vshlq_m(uint32x4_t, uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_u8))) uint8x16_t vshlq_m_u8(uint8x16_t, uint8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_u8))) uint8x16_t vshlq_m(uint8x16_t, uint8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_n_s16))) int16x8_t vshlq_n_s16(int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_n_s16))) int16x8_t vshlq_n(int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_n_s32))) int32x4_t vshlq_n_s32(int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_n_s32))) int32x4_t vshlq_n(int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_n_s8))) int8x16_t vshlq_n_s8(int8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_n_s8))) int8x16_t vshlq_n(int8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_n_u16))) uint16x8_t vshlq_n_u16(uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_n_u16))) uint16x8_t vshlq_n(uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_n_u32))) uint32x4_t vshlq_n_u32(uint32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_n_u32))) uint32x4_t vshlq_n(uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_n_u8))) uint8x16_t vshlq_n_u8(uint8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_n_u8))) uint8x16_t vshlq_n(uint8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_r_s16))) int16x8_t vshlq_r_s16(int16x8_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_r_s16))) int16x8_t vshlq_r(int16x8_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_r_s32))) int32x4_t vshlq_r_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_r_s32))) int32x4_t vshlq_r(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_r_s8))) int8x16_t vshlq_r_s8(int8x16_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_r_s8))) int8x16_t vshlq_r(int8x16_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_r_u16))) uint16x8_t vshlq_r_u16(uint16x8_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_r_u16))) uint16x8_t vshlq_r(uint16x8_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_r_u32))) uint32x4_t vshlq_r_u32(uint32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_r_u32))) uint32x4_t vshlq_r(uint32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_r_u8))) uint8x16_t vshlq_r_u8(uint8x16_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_r_u8))) uint8x16_t vshlq_r(uint8x16_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_s16))) int16x8_t vshlq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_s16))) int16x8_t vshlq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_s32))) int32x4_t vshlq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_s32))) int32x4_t vshlq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_s8))) int8x16_t vshlq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_s8))) int8x16_t vshlq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_u16))) uint16x8_t vshlq_u16(uint16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_u16))) uint16x8_t vshlq(uint16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_u32))) uint32x4_t vshlq_u32(uint32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_u32))) uint32x4_t vshlq(uint32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_u8))) uint8x16_t vshlq_u8(uint8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_u8))) uint8x16_t vshlq(uint8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_n_s16))) int16x8_t vshlq_x_n_s16(int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_n_s16))) int16x8_t vshlq_x_n(int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_n_s32))) int32x4_t vshlq_x_n_s32(int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_n_s32))) int32x4_t vshlq_x_n(int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_n_s8))) int8x16_t vshlq_x_n_s8(int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_n_s8))) int8x16_t vshlq_x_n(int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_n_u16))) uint16x8_t vshlq_x_n_u16(uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_n_u16))) uint16x8_t vshlq_x_n(uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_n_u32))) uint32x4_t vshlq_x_n_u32(uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_n_u32))) uint32x4_t vshlq_x_n(uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_n_u8))) uint8x16_t vshlq_x_n_u8(uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_n_u8))) uint8x16_t vshlq_x_n(uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_s16))) int16x8_t vshlq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_s16))) int16x8_t vshlq_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_s32))) int32x4_t vshlq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_s32))) int32x4_t vshlq_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_s8))) int8x16_t vshlq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_s8))) int8x16_t vshlq_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_u16))) uint16x8_t vshlq_x_u16(uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_u16))) uint16x8_t vshlq_x(uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_u32))) uint32x4_t vshlq_x_u32(uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_u32))) uint32x4_t vshlq_x(uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_u8))) uint8x16_t vshlq_x_u8(uint8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_u8))) uint8x16_t vshlq_x(uint8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrnbq_m_n_s16))) int8x16_t vshrnbq_m_n_s16(int8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrnbq_m_n_s16))) int8x16_t vshrnbq_m(int8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrnbq_m_n_s32))) int16x8_t vshrnbq_m_n_s32(int16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrnbq_m_n_s32))) int16x8_t vshrnbq_m(int16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrnbq_m_n_u16))) uint8x16_t vshrnbq_m_n_u16(uint8x16_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrnbq_m_n_u16))) uint8x16_t vshrnbq_m(uint8x16_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrnbq_m_n_u32))) uint16x8_t vshrnbq_m_n_u32(uint16x8_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrnbq_m_n_u32))) uint16x8_t vshrnbq_m(uint16x8_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrnbq_n_s16))) int8x16_t vshrnbq_n_s16(int8x16_t, int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrnbq_n_s16))) int8x16_t vshrnbq(int8x16_t, int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrnbq_n_s32))) int16x8_t vshrnbq_n_s32(int16x8_t, int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrnbq_n_s32))) int16x8_t vshrnbq(int16x8_t, int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrnbq_n_u16))) uint8x16_t vshrnbq_n_u16(uint8x16_t, uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrnbq_n_u16))) uint8x16_t vshrnbq(uint8x16_t, uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrnbq_n_u32))) uint16x8_t vshrnbq_n_u32(uint16x8_t, uint32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrnbq_n_u32))) uint16x8_t vshrnbq(uint16x8_t, uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrntq_m_n_s16))) int8x16_t vshrntq_m_n_s16(int8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrntq_m_n_s16))) int8x16_t vshrntq_m(int8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrntq_m_n_s32))) int16x8_t vshrntq_m_n_s32(int16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrntq_m_n_s32))) int16x8_t vshrntq_m(int16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrntq_m_n_u16))) uint8x16_t vshrntq_m_n_u16(uint8x16_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrntq_m_n_u16))) uint8x16_t vshrntq_m(uint8x16_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrntq_m_n_u32))) uint16x8_t vshrntq_m_n_u32(uint16x8_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrntq_m_n_u32))) uint16x8_t vshrntq_m(uint16x8_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrntq_n_s16))) int8x16_t vshrntq_n_s16(int8x16_t, int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrntq_n_s16))) int8x16_t vshrntq(int8x16_t, int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrntq_n_s32))) int16x8_t vshrntq_n_s32(int16x8_t, int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrntq_n_s32))) int16x8_t vshrntq(int16x8_t, int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrntq_n_u16))) uint8x16_t vshrntq_n_u16(uint8x16_t, uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrntq_n_u16))) uint8x16_t vshrntq(uint8x16_t, uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrntq_n_u32))) uint16x8_t vshrntq_n_u32(uint16x8_t, uint32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrntq_n_u32))) uint16x8_t vshrntq(uint16x8_t, uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrq_m_n_s16))) int16x8_t vshrq_m_n_s16(int16x8_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrq_m_n_s16))) int16x8_t vshrq_m(int16x8_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrq_m_n_s32))) int32x4_t vshrq_m_n_s32(int32x4_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrq_m_n_s32))) int32x4_t vshrq_m(int32x4_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrq_m_n_s8))) int8x16_t vshrq_m_n_s8(int8x16_t, int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrq_m_n_s8))) int8x16_t vshrq_m(int8x16_t, int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrq_m_n_u16))) uint16x8_t vshrq_m_n_u16(uint16x8_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrq_m_n_u16))) uint16x8_t vshrq_m(uint16x8_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrq_m_n_u32))) uint32x4_t vshrq_m_n_u32(uint32x4_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrq_m_n_u32))) uint32x4_t vshrq_m(uint32x4_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrq_m_n_u8))) uint8x16_t vshrq_m_n_u8(uint8x16_t, uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrq_m_n_u8))) uint8x16_t vshrq_m(uint8x16_t, uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrq_n_s16))) int16x8_t vshrq_n_s16(int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrq_n_s16))) int16x8_t vshrq(int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrq_n_s32))) int32x4_t vshrq_n_s32(int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrq_n_s32))) int32x4_t vshrq(int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrq_n_s8))) int8x16_t vshrq_n_s8(int8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrq_n_s8))) int8x16_t vshrq(int8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrq_n_u16))) uint16x8_t vshrq_n_u16(uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrq_n_u16))) uint16x8_t vshrq(uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrq_n_u32))) uint32x4_t vshrq_n_u32(uint32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrq_n_u32))) uint32x4_t vshrq(uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrq_n_u8))) uint8x16_t vshrq_n_u8(uint8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrq_n_u8))) uint8x16_t vshrq(uint8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrq_x_n_s16))) int16x8_t vshrq_x_n_s16(int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrq_x_n_s16))) int16x8_t vshrq_x(int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrq_x_n_s32))) int32x4_t vshrq_x_n_s32(int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrq_x_n_s32))) int32x4_t vshrq_x(int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrq_x_n_s8))) int8x16_t vshrq_x_n_s8(int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrq_x_n_s8))) int8x16_t vshrq_x(int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrq_x_n_u16))) uint16x8_t vshrq_x_n_u16(uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrq_x_n_u16))) uint16x8_t vshrq_x(uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrq_x_n_u32))) uint32x4_t vshrq_x_n_u32(uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrq_x_n_u32))) uint32x4_t vshrq_x(uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrq_x_n_u8))) uint8x16_t vshrq_x_n_u8(uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrq_x_n_u8))) uint8x16_t vshrq_x(uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsliq_m_n_s16))) int16x8_t vsliq_m_n_s16(int16x8_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsliq_m_n_s16))) int16x8_t vsliq_m(int16x8_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsliq_m_n_s32))) int32x4_t vsliq_m_n_s32(int32x4_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsliq_m_n_s32))) int32x4_t vsliq_m(int32x4_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsliq_m_n_s8))) int8x16_t vsliq_m_n_s8(int8x16_t, int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsliq_m_n_s8))) int8x16_t vsliq_m(int8x16_t, int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsliq_m_n_u16))) uint16x8_t vsliq_m_n_u16(uint16x8_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsliq_m_n_u16))) uint16x8_t vsliq_m(uint16x8_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsliq_m_n_u32))) uint32x4_t vsliq_m_n_u32(uint32x4_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsliq_m_n_u32))) uint32x4_t vsliq_m(uint32x4_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsliq_m_n_u8))) uint8x16_t vsliq_m_n_u8(uint8x16_t, uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsliq_m_n_u8))) uint8x16_t vsliq_m(uint8x16_t, uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsliq_n_s16))) int16x8_t vsliq_n_s16(int16x8_t, int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsliq_n_s16))) int16x8_t vsliq(int16x8_t, int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsliq_n_s32))) int32x4_t vsliq_n_s32(int32x4_t, int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsliq_n_s32))) int32x4_t vsliq(int32x4_t, int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsliq_n_s8))) int8x16_t vsliq_n_s8(int8x16_t, int8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsliq_n_s8))) int8x16_t vsliq(int8x16_t, int8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsliq_n_u16))) uint16x8_t vsliq_n_u16(uint16x8_t, uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsliq_n_u16))) uint16x8_t vsliq(uint16x8_t, uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsliq_n_u32))) uint32x4_t vsliq_n_u32(uint32x4_t, uint32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsliq_n_u32))) uint32x4_t vsliq(uint32x4_t, uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsliq_n_u8))) uint8x16_t vsliq_n_u8(uint8x16_t, uint8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsliq_n_u8))) uint8x16_t vsliq(uint8x16_t, uint8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsriq_m_n_s16))) int16x8_t vsriq_m_n_s16(int16x8_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsriq_m_n_s16))) int16x8_t vsriq_m(int16x8_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsriq_m_n_s32))) int32x4_t vsriq_m_n_s32(int32x4_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsriq_m_n_s32))) int32x4_t vsriq_m(int32x4_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsriq_m_n_s8))) int8x16_t vsriq_m_n_s8(int8x16_t, int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsriq_m_n_s8))) int8x16_t vsriq_m(int8x16_t, int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsriq_m_n_u16))) uint16x8_t vsriq_m_n_u16(uint16x8_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsriq_m_n_u16))) uint16x8_t vsriq_m(uint16x8_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsriq_m_n_u32))) uint32x4_t vsriq_m_n_u32(uint32x4_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsriq_m_n_u32))) uint32x4_t vsriq_m(uint32x4_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsriq_m_n_u8))) uint8x16_t vsriq_m_n_u8(uint8x16_t, uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsriq_m_n_u8))) uint8x16_t vsriq_m(uint8x16_t, uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsriq_n_s16))) int16x8_t vsriq_n_s16(int16x8_t, int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsriq_n_s16))) int16x8_t vsriq(int16x8_t, int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsriq_n_s32))) int32x4_t vsriq_n_s32(int32x4_t, int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsriq_n_s32))) int32x4_t vsriq(int32x4_t, int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsriq_n_s8))) int8x16_t vsriq_n_s8(int8x16_t, int8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsriq_n_s8))) int8x16_t vsriq(int8x16_t, int8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsriq_n_u16))) uint16x8_t vsriq_n_u16(uint16x8_t, uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsriq_n_u16))) uint16x8_t vsriq(uint16x8_t, uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsriq_n_u32))) uint32x4_t vsriq_n_u32(uint32x4_t, uint32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsriq_n_u32))) uint32x4_t vsriq(uint32x4_t, uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsriq_n_u8))) uint8x16_t vsriq_n_u8(uint8x16_t, uint8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsriq_n_u8))) uint8x16_t vsriq(uint8x16_t, uint8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst1q_p_s16))) void vst1q_p_s16(int16_t *, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst1q_p_s16))) void vst1q_p(int16_t *, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst1q_p_s32))) void vst1q_p_s32(int32_t *, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst1q_p_s32))) void vst1q_p(int32_t *, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst1q_p_s8))) void vst1q_p_s8(int8_t *, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst1q_p_s8))) void vst1q_p(int8_t *, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst1q_p_u16))) void vst1q_p_u16(uint16_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst1q_p_u16))) void vst1q_p(uint16_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst1q_p_u32))) void vst1q_p_u32(uint32_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst1q_p_u32))) void vst1q_p(uint32_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst1q_p_u8))) void vst1q_p_u8(uint8_t *, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst1q_p_u8))) void vst1q_p(uint8_t *, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst1q_s16))) void vst1q_s16(int16_t *, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst1q_s16))) void vst1q(int16_t *, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst1q_s32))) void vst1q_s32(int32_t *, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst1q_s32))) void vst1q(int32_t *, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst1q_s8))) void vst1q_s8(int8_t *, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst1q_s8))) void vst1q(int8_t *, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst1q_u16))) void vst1q_u16(uint16_t *, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst1q_u16))) void vst1q(uint16_t *, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst1q_u32))) void vst1q_u32(uint32_t *, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst1q_u32))) void vst1q(uint32_t *, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst1q_u8))) void vst1q_u8(uint8_t *, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst1q_u8))) void vst1q(uint8_t *, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst2q_s16))) void vst2q_s16(int16_t *, int16x8x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst2q_s16))) void vst2q(int16_t *, int16x8x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst2q_s32))) void vst2q_s32(int32_t *, int32x4x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst2q_s32))) void vst2q(int32_t *, int32x4x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst2q_s8))) void vst2q_s8(int8_t *, int8x16x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst2q_s8))) void vst2q(int8_t *, int8x16x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst2q_u16))) void vst2q_u16(uint16_t *, uint16x8x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst2q_u16))) void vst2q(uint16_t *, uint16x8x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst2q_u32))) void vst2q_u32(uint32_t *, uint32x4x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst2q_u32))) void vst2q(uint32_t *, uint32x4x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst2q_u8))) void vst2q_u8(uint8_t *, uint8x16x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst2q_u8))) void vst2q(uint8_t *, uint8x16x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst4q_s16))) void vst4q_s16(int16_t *, int16x8x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst4q_s16))) void vst4q(int16_t *, int16x8x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst4q_s32))) void vst4q_s32(int32_t *, int32x4x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst4q_s32))) void vst4q(int32_t *, int32x4x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst4q_s8))) void vst4q_s8(int8_t *, int8x16x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst4q_s8))) void vst4q(int8_t *, int8x16x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst4q_u16))) void vst4q_u16(uint16_t *, uint16x8x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst4q_u16))) void vst4q(uint16_t *, uint16x8x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst4q_u32))) void vst4q_u32(uint32_t *, uint32x4x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst4q_u32))) void vst4q(uint32_t *, uint32x4x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst4q_u8))) void vst4q_u8(uint8_t *, uint8x16x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst4q_u8))) void vst4q(uint8_t *, uint8x16x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_p_s16))) void vstrbq_p_s16(int8_t *, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_p_s16))) void vstrbq_p(int8_t *, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_p_s32))) void vstrbq_p_s32(int8_t *, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_p_s32))) void vstrbq_p(int8_t *, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_p_s8))) void vstrbq_p_s8(int8_t *, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_p_s8))) void vstrbq_p(int8_t *, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_p_u16))) void vstrbq_p_u16(uint8_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_p_u16))) void vstrbq_p(uint8_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_p_u32))) void vstrbq_p_u32(uint8_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_p_u32))) void vstrbq_p(uint8_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_p_u8))) void vstrbq_p_u8(uint8_t *, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_p_u8))) void vstrbq_p(uint8_t *, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_s16))) void vstrbq_s16(int8_t *, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_s16))) void vstrbq(int8_t *, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_s32))) void vstrbq_s32(int8_t *, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_s32))) void vstrbq(int8_t *, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_s8))) void vstrbq_s8(int8_t *, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_s8))) void vstrbq(int8_t *, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_p_s16))) void vstrbq_scatter_offset_p_s16(int8_t *, uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_p_s16))) void vstrbq_scatter_offset_p(int8_t *, uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_p_s32))) void vstrbq_scatter_offset_p_s32(int8_t *, uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_p_s32))) void vstrbq_scatter_offset_p(int8_t *, uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_p_s8))) void vstrbq_scatter_offset_p_s8(int8_t *, uint8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_p_s8))) void vstrbq_scatter_offset_p(int8_t *, uint8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_p_u16))) void vstrbq_scatter_offset_p_u16(uint8_t *, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_p_u16))) void vstrbq_scatter_offset_p(uint8_t *, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_p_u32))) void vstrbq_scatter_offset_p_u32(uint8_t *, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_p_u32))) void vstrbq_scatter_offset_p(uint8_t *, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_p_u8))) void vstrbq_scatter_offset_p_u8(uint8_t *, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_p_u8))) void vstrbq_scatter_offset_p(uint8_t *, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_s16))) void vstrbq_scatter_offset_s16(int8_t *, uint16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_s16))) void vstrbq_scatter_offset(int8_t *, uint16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_s32))) void vstrbq_scatter_offset_s32(int8_t *, uint32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_s32))) void vstrbq_scatter_offset(int8_t *, uint32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_s8))) void vstrbq_scatter_offset_s8(int8_t *, uint8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_s8))) void vstrbq_scatter_offset(int8_t *, uint8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_u16))) void vstrbq_scatter_offset_u16(uint8_t *, uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_u16))) void vstrbq_scatter_offset(uint8_t *, uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_u32))) void vstrbq_scatter_offset_u32(uint8_t *, uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_u32))) void vstrbq_scatter_offset(uint8_t *, uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_u8))) void vstrbq_scatter_offset_u8(uint8_t *, uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_u8))) void vstrbq_scatter_offset(uint8_t *, uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_u16))) void vstrbq_u16(uint8_t *, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_u16))) void vstrbq(uint8_t *, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_u32))) void vstrbq_u32(uint8_t *, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_u32))) void vstrbq(uint8_t *, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_u8))) void vstrbq_u8(uint8_t *, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_u8))) void vstrbq(uint8_t *, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_base_p_s64))) void vstrdq_scatter_base_p_s64(uint64x2_t, int, int64x2_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_base_p_s64))) void vstrdq_scatter_base_p(uint64x2_t, int, int64x2_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_base_p_u64))) void vstrdq_scatter_base_p_u64(uint64x2_t, int, uint64x2_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_base_p_u64))) void vstrdq_scatter_base_p(uint64x2_t, int, uint64x2_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_base_s64))) void vstrdq_scatter_base_s64(uint64x2_t, int, int64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_base_s64))) void vstrdq_scatter_base(uint64x2_t, int, int64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_base_u64))) void vstrdq_scatter_base_u64(uint64x2_t, int, uint64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_base_u64))) void vstrdq_scatter_base(uint64x2_t, int, uint64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_base_wb_p_s64))) void vstrdq_scatter_base_wb_p_s64(uint64x2_t *, int, int64x2_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_base_wb_p_s64))) void vstrdq_scatter_base_wb_p(uint64x2_t *, int, int64x2_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_base_wb_p_u64))) void vstrdq_scatter_base_wb_p_u64(uint64x2_t *, int, uint64x2_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_base_wb_p_u64))) void vstrdq_scatter_base_wb_p(uint64x2_t *, int, uint64x2_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_base_wb_s64))) void vstrdq_scatter_base_wb_s64(uint64x2_t *, int, int64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_base_wb_s64))) void vstrdq_scatter_base_wb(uint64x2_t *, int, int64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_base_wb_u64))) void vstrdq_scatter_base_wb_u64(uint64x2_t *, int, uint64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_base_wb_u64))) void vstrdq_scatter_base_wb(uint64x2_t *, int, uint64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_offset_p_s64))) void vstrdq_scatter_offset_p_s64(int64_t *, uint64x2_t, int64x2_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_offset_p_s64))) void vstrdq_scatter_offset_p(int64_t *, uint64x2_t, int64x2_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_offset_p_u64))) void vstrdq_scatter_offset_p_u64(uint64_t *, uint64x2_t, uint64x2_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_offset_p_u64))) void vstrdq_scatter_offset_p(uint64_t *, uint64x2_t, uint64x2_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_offset_s64))) void vstrdq_scatter_offset_s64(int64_t *, uint64x2_t, int64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_offset_s64))) void vstrdq_scatter_offset(int64_t *, uint64x2_t, int64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_offset_u64))) void vstrdq_scatter_offset_u64(uint64_t *, uint64x2_t, uint64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_offset_u64))) void vstrdq_scatter_offset(uint64_t *, uint64x2_t, uint64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_shifted_offset_p_s64))) void vstrdq_scatter_shifted_offset_p_s64(int64_t *, uint64x2_t, int64x2_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_shifted_offset_p_s64))) void vstrdq_scatter_shifted_offset_p(int64_t *, uint64x2_t, int64x2_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_shifted_offset_p_u64))) void vstrdq_scatter_shifted_offset_p_u64(uint64_t *, uint64x2_t, uint64x2_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_shifted_offset_p_u64))) void vstrdq_scatter_shifted_offset_p(uint64_t *, uint64x2_t, uint64x2_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_shifted_offset_s64))) void vstrdq_scatter_shifted_offset_s64(int64_t *, uint64x2_t, int64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_shifted_offset_s64))) void vstrdq_scatter_shifted_offset(int64_t *, uint64x2_t, int64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_shifted_offset_u64))) void vstrdq_scatter_shifted_offset_u64(uint64_t *, uint64x2_t, uint64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_shifted_offset_u64))) void vstrdq_scatter_shifted_offset(uint64_t *, uint64x2_t, uint64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_p_s16))) void vstrhq_p_s16(int16_t *, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_p_s16))) void vstrhq_p(int16_t *, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_p_s32))) void vstrhq_p_s32(int16_t *, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_p_s32))) void vstrhq_p(int16_t *, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_p_u16))) void vstrhq_p_u16(uint16_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_p_u16))) void vstrhq_p(uint16_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_p_u32))) void vstrhq_p_u32(uint16_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_p_u32))) void vstrhq_p(uint16_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_s16))) void vstrhq_s16(int16_t *, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_s16))) void vstrhq(int16_t *, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_s32))) void vstrhq_s32(int16_t *, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_s32))) void vstrhq(int16_t *, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_p_s16))) void vstrhq_scatter_offset_p_s16(int16_t *, uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_p_s16))) void vstrhq_scatter_offset_p(int16_t *, uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_p_s32))) void vstrhq_scatter_offset_p_s32(int16_t *, uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_p_s32))) void vstrhq_scatter_offset_p(int16_t *, uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_p_u16))) void vstrhq_scatter_offset_p_u16(uint16_t *, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_p_u16))) void vstrhq_scatter_offset_p(uint16_t *, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_p_u32))) void vstrhq_scatter_offset_p_u32(uint16_t *, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_p_u32))) void vstrhq_scatter_offset_p(uint16_t *, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_s16))) void vstrhq_scatter_offset_s16(int16_t *, uint16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_s16))) void vstrhq_scatter_offset(int16_t *, uint16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_s32))) void vstrhq_scatter_offset_s32(int16_t *, uint32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_s32))) void vstrhq_scatter_offset(int16_t *, uint32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_u16))) void vstrhq_scatter_offset_u16(uint16_t *, uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_u16))) void vstrhq_scatter_offset(uint16_t *, uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_u32))) void vstrhq_scatter_offset_u32(uint16_t *, uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_u32))) void vstrhq_scatter_offset(uint16_t *, uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_p_s16))) void vstrhq_scatter_shifted_offset_p_s16(int16_t *, uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_p_s16))) void vstrhq_scatter_shifted_offset_p(int16_t *, uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_p_s32))) void vstrhq_scatter_shifted_offset_p_s32(int16_t *, uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_p_s32))) void vstrhq_scatter_shifted_offset_p(int16_t *, uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_p_u16))) void vstrhq_scatter_shifted_offset_p_u16(uint16_t *, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_p_u16))) void vstrhq_scatter_shifted_offset_p(uint16_t *, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_p_u32))) void vstrhq_scatter_shifted_offset_p_u32(uint16_t *, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_p_u32))) void vstrhq_scatter_shifted_offset_p(uint16_t *, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_s16))) void vstrhq_scatter_shifted_offset_s16(int16_t *, uint16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_s16))) void vstrhq_scatter_shifted_offset(int16_t *, uint16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_s32))) void vstrhq_scatter_shifted_offset_s32(int16_t *, uint32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_s32))) void vstrhq_scatter_shifted_offset(int16_t *, uint32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_u16))) void vstrhq_scatter_shifted_offset_u16(uint16_t *, uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_u16))) void vstrhq_scatter_shifted_offset(uint16_t *, uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_u32))) void vstrhq_scatter_shifted_offset_u32(uint16_t *, uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_u32))) void vstrhq_scatter_shifted_offset(uint16_t *, uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_u16))) void vstrhq_u16(uint16_t *, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_u16))) void vstrhq(uint16_t *, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_u32))) void vstrhq_u32(uint16_t *, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_u32))) void vstrhq(uint16_t *, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_p_s32))) void vstrwq_p_s32(int32_t *, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_p_s32))) void vstrwq_p(int32_t *, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_p_u32))) void vstrwq_p_u32(uint32_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_p_u32))) void vstrwq_p(uint32_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_s32))) void vstrwq_s32(int32_t *, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_s32))) void vstrwq(int32_t *, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_p_s32))) void vstrwq_scatter_base_p_s32(uint32x4_t, int, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_p_s32))) void vstrwq_scatter_base_p(uint32x4_t, int, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_p_u32))) void vstrwq_scatter_base_p_u32(uint32x4_t, int, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_p_u32))) void vstrwq_scatter_base_p(uint32x4_t, int, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_s32))) void vstrwq_scatter_base_s32(uint32x4_t, int, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_s32))) void vstrwq_scatter_base(uint32x4_t, int, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_u32))) void vstrwq_scatter_base_u32(uint32x4_t, int, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_u32))) void vstrwq_scatter_base(uint32x4_t, int, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_wb_p_s32))) void vstrwq_scatter_base_wb_p_s32(uint32x4_t *, int, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_wb_p_s32))) void vstrwq_scatter_base_wb_p(uint32x4_t *, int, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_wb_p_u32))) void vstrwq_scatter_base_wb_p_u32(uint32x4_t *, int, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_wb_p_u32))) void vstrwq_scatter_base_wb_p(uint32x4_t *, int, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_wb_s32))) void vstrwq_scatter_base_wb_s32(uint32x4_t *, int, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_wb_s32))) void vstrwq_scatter_base_wb(uint32x4_t *, int, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_wb_u32))) void vstrwq_scatter_base_wb_u32(uint32x4_t *, int, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_wb_u32))) void vstrwq_scatter_base_wb(uint32x4_t *, int, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_offset_p_s32))) void vstrwq_scatter_offset_p_s32(int32_t *, uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_offset_p_s32))) void vstrwq_scatter_offset_p(int32_t *, uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_offset_p_u32))) void vstrwq_scatter_offset_p_u32(uint32_t *, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_offset_p_u32))) void vstrwq_scatter_offset_p(uint32_t *, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_offset_s32))) void vstrwq_scatter_offset_s32(int32_t *, uint32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_offset_s32))) void vstrwq_scatter_offset(int32_t *, uint32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_offset_u32))) void vstrwq_scatter_offset_u32(uint32_t *, uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_offset_u32))) void vstrwq_scatter_offset(uint32_t *, uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_shifted_offset_p_s32))) void vstrwq_scatter_shifted_offset_p_s32(int32_t *, uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_shifted_offset_p_s32))) void vstrwq_scatter_shifted_offset_p(int32_t *, uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_shifted_offset_p_u32))) void vstrwq_scatter_shifted_offset_p_u32(uint32_t *, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_shifted_offset_p_u32))) void vstrwq_scatter_shifted_offset_p(uint32_t *, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_shifted_offset_s32))) void vstrwq_scatter_shifted_offset_s32(int32_t *, uint32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_shifted_offset_s32))) void vstrwq_scatter_shifted_offset(int32_t *, uint32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_shifted_offset_u32))) void vstrwq_scatter_shifted_offset_u32(uint32_t *, uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_shifted_offset_u32))) void vstrwq_scatter_shifted_offset(uint32_t *, uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_u32))) void vstrwq_u32(uint32_t *, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_u32))) void vstrwq(uint32_t *, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_n_s16))) int16x8_t vsubq_m_n_s16(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_n_s16))) int16x8_t vsubq_m(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_n_s32))) int32x4_t vsubq_m_n_s32(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_n_s32))) int32x4_t vsubq_m(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_n_s8))) int8x16_t vsubq_m_n_s8(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_n_s8))) int8x16_t vsubq_m(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_n_u16))) uint16x8_t vsubq_m_n_u16(uint16x8_t, uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_n_u16))) uint16x8_t vsubq_m(uint16x8_t, uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_n_u32))) uint32x4_t vsubq_m_n_u32(uint32x4_t, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_n_u32))) uint32x4_t vsubq_m(uint32x4_t, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_n_u8))) uint8x16_t vsubq_m_n_u8(uint8x16_t, uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_n_u8))) uint8x16_t vsubq_m(uint8x16_t, uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_s16))) int16x8_t vsubq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_s16))) int16x8_t vsubq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_s32))) int32x4_t vsubq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_s32))) int32x4_t vsubq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_s8))) int8x16_t vsubq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_s8))) int8x16_t vsubq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_u16))) uint16x8_t vsubq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_u16))) uint16x8_t vsubq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_u32))) uint32x4_t vsubq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_u32))) uint32x4_t vsubq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_u8))) uint8x16_t vsubq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_u8))) uint8x16_t vsubq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_n_s16))) int16x8_t vsubq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_n_s16))) int16x8_t vsubq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_n_s32))) int32x4_t vsubq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_n_s32))) int32x4_t vsubq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_n_s8))) int8x16_t vsubq_n_s8(int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_n_s8))) int8x16_t vsubq(int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_n_u16))) uint16x8_t vsubq_n_u16(uint16x8_t, uint16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_n_u16))) uint16x8_t vsubq(uint16x8_t, uint16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_n_u32))) uint32x4_t vsubq_n_u32(uint32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_n_u32))) uint32x4_t vsubq(uint32x4_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_n_u8))) uint8x16_t vsubq_n_u8(uint8x16_t, uint8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_n_u8))) uint8x16_t vsubq(uint8x16_t, uint8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_s16))) int16x8_t vsubq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_s16))) int16x8_t vsubq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_s32))) int32x4_t vsubq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_s32))) int32x4_t vsubq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_s8))) int8x16_t vsubq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_s8))) int8x16_t vsubq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_u16))) uint16x8_t vsubq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_u16))) uint16x8_t vsubq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_u32))) uint32x4_t vsubq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_u32))) uint32x4_t vsubq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_u8))) uint8x16_t vsubq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_u8))) uint8x16_t vsubq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_n_s16))) int16x8_t vsubq_x_n_s16(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_n_s16))) int16x8_t vsubq_x(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_n_s32))) int32x4_t vsubq_x_n_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_n_s32))) int32x4_t vsubq_x(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_n_s8))) int8x16_t vsubq_x_n_s8(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_n_s8))) int8x16_t vsubq_x(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_n_u16))) uint16x8_t vsubq_x_n_u16(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_n_u16))) uint16x8_t vsubq_x(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_n_u32))) uint32x4_t vsubq_x_n_u32(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_n_u32))) uint32x4_t vsubq_x(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_n_u8))) uint8x16_t vsubq_x_n_u8(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_n_u8))) uint8x16_t vsubq_x(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_s16))) int16x8_t vsubq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_s16))) int16x8_t vsubq_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_s32))) int32x4_t vsubq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_s32))) int32x4_t vsubq_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_s8))) int8x16_t vsubq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_s8))) int8x16_t vsubq_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_u16))) uint16x8_t vsubq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_u16))) uint16x8_t vsubq_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_u32))) uint32x4_t vsubq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_u32))) uint32x4_t vsubq_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_u8))) uint8x16_t vsubq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_u8))) uint8x16_t vsubq_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_polymorphic_s16))) int16x8_t vuninitializedq(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_polymorphic_s32))) int32x4_t vuninitializedq(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_polymorphic_s64))) int64x2_t vuninitializedq(int64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_polymorphic_s8))) int8x16_t vuninitializedq(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_polymorphic_u16))) uint16x8_t vuninitializedq(uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_polymorphic_u32))) uint32x4_t vuninitializedq(uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_polymorphic_u64))) uint64x2_t vuninitializedq(uint64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_polymorphic_u8))) uint8x16_t vuninitializedq(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_s16))) int16x8_t vuninitializedq_s16(); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_s32))) int32x4_t vuninitializedq_s32(); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_s64))) int64x2_t vuninitializedq_s64(); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_s8))) int8x16_t vuninitializedq_s8(); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_u16))) uint16x8_t vuninitializedq_u16(); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_u32))) uint32x4_t vuninitializedq_u32(); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_u64))) uint64x2_t vuninitializedq_u64(); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_u8))) uint8x16_t vuninitializedq_u8(); #endif /* (!defined __ARM_MVE_PRESERVE_USER_NAMESPACE) */ #if (__ARM_FEATURE_MVE & 2) && (!defined __ARM_MVE_PRESERVE_USER_NAMESPACE) static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_f16))) float16x8_t vabdq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_f16))) float16x8_t vabdq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_f32))) float32x4_t vabdq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_f32))) float32x4_t vabdq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_m_f16))) float16x8_t vabdq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_m_f16))) float16x8_t vabdq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_m_f32))) float32x4_t vabdq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_m_f32))) float32x4_t vabdq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_x_f16))) float16x8_t vabdq_x_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_x_f16))) float16x8_t vabdq_x(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_x_f32))) float32x4_t vabdq_x_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_x_f32))) float32x4_t vabdq_x(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabsq_f16))) float16x8_t vabsq_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabsq_f16))) float16x8_t vabsq(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabsq_f32))) float32x4_t vabsq_f32(float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabsq_f32))) float32x4_t vabsq(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabsq_m_f16))) float16x8_t vabsq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabsq_m_f16))) float16x8_t vabsq_m(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabsq_m_f32))) float32x4_t vabsq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabsq_m_f32))) float32x4_t vabsq_m(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabsq_x_f16))) float16x8_t vabsq_x_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabsq_x_f16))) float16x8_t vabsq_x(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabsq_x_f32))) float32x4_t vabsq_x_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabsq_x_f32))) float32x4_t vabsq_x(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_f16))) float16x8_t vaddq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_f16))) float16x8_t vaddq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_f32))) float32x4_t vaddq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_f32))) float32x4_t vaddq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_f16))) float16x8_t vaddq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_f16))) float16x8_t vaddq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_f32))) float32x4_t vaddq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_f32))) float32x4_t vaddq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_n_f16))) float16x8_t vaddq_m_n_f16(float16x8_t, float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_n_f16))) float16x8_t vaddq_m(float16x8_t, float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_n_f32))) float32x4_t vaddq_m_n_f32(float32x4_t, float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_n_f32))) float32x4_t vaddq_m(float32x4_t, float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_n_f16))) float16x8_t vaddq_n_f16(float16x8_t, float16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_n_f16))) float16x8_t vaddq(float16x8_t, float16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_n_f32))) float32x4_t vaddq_n_f32(float32x4_t, float32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_n_f32))) float32x4_t vaddq(float32x4_t, float32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_f16))) float16x8_t vaddq_x_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_f16))) float16x8_t vaddq_x(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_f32))) float32x4_t vaddq_x_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_f32))) float32x4_t vaddq_x(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_n_f16))) float16x8_t vaddq_x_n_f16(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_n_f16))) float16x8_t vaddq_x(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_n_f32))) float32x4_t vaddq_x_n_f32(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_n_f32))) float32x4_t vaddq_x(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_f16))) float16x8_t vandq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_f16))) float16x8_t vandq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_f32))) float32x4_t vandq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_f32))) float32x4_t vandq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_m_f16))) float16x8_t vandq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_m_f16))) float16x8_t vandq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_m_f32))) float32x4_t vandq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_m_f32))) float32x4_t vandq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_x_f16))) float16x8_t vandq_x_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_x_f16))) float16x8_t vandq_x(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_x_f32))) float32x4_t vandq_x_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_x_f32))) float32x4_t vandq_x(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_f16))) float16x8_t vbicq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_f16))) float16x8_t vbicq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_f32))) float32x4_t vbicq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_f32))) float32x4_t vbicq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_f16))) float16x8_t vbicq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_f16))) float16x8_t vbicq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_f32))) float32x4_t vbicq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_f32))) float32x4_t vbicq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_x_f16))) float16x8_t vbicq_x_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_x_f16))) float16x8_t vbicq_x(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_x_f32))) float32x4_t vbicq_x_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_x_f32))) float32x4_t vbicq_x(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_m_n_f16))) float16x8_t vbrsrq_m_n_f16(float16x8_t, float16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_m_n_f16))) float16x8_t vbrsrq_m(float16x8_t, float16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_m_n_f32))) float32x4_t vbrsrq_m_n_f32(float32x4_t, float32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_m_n_f32))) float32x4_t vbrsrq_m(float32x4_t, float32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_n_f16))) float16x8_t vbrsrq_n_f16(float16x8_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_n_f16))) float16x8_t vbrsrq(float16x8_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_n_f32))) float32x4_t vbrsrq_n_f32(float32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_n_f32))) float32x4_t vbrsrq(float32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_x_n_f16))) float16x8_t vbrsrq_x_n_f16(float16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_x_n_f16))) float16x8_t vbrsrq_x(float16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_x_n_f32))) float32x4_t vbrsrq_x_n_f32(float32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_x_n_f32))) float32x4_t vbrsrq_x(float32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_f16))) float16x8_t vcaddq_rot270_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_f16))) float16x8_t vcaddq_rot270(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_f32))) float32x4_t vcaddq_rot270_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_f32))) float32x4_t vcaddq_rot270(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_m_f16))) float16x8_t vcaddq_rot270_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_m_f16))) float16x8_t vcaddq_rot270_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_m_f32))) float32x4_t vcaddq_rot270_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_m_f32))) float32x4_t vcaddq_rot270_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_x_f16))) float16x8_t vcaddq_rot270_x_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_x_f16))) float16x8_t vcaddq_rot270_x(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_x_f32))) float32x4_t vcaddq_rot270_x_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_x_f32))) float32x4_t vcaddq_rot270_x(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_f16))) float16x8_t vcaddq_rot90_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_f16))) float16x8_t vcaddq_rot90(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_f32))) float32x4_t vcaddq_rot90_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_f32))) float32x4_t vcaddq_rot90(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_m_f16))) float16x8_t vcaddq_rot90_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_m_f16))) float16x8_t vcaddq_rot90_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_m_f32))) float32x4_t vcaddq_rot90_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_m_f32))) float32x4_t vcaddq_rot90_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_x_f16))) float16x8_t vcaddq_rot90_x_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_x_f16))) float16x8_t vcaddq_rot90_x(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_x_f32))) float32x4_t vcaddq_rot90_x_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_x_f32))) float32x4_t vcaddq_rot90_x(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_f16))) float16x8_t vcmlaq_f16(float16x8_t, float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_f16))) float16x8_t vcmlaq(float16x8_t, float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_f32))) float32x4_t vcmlaq_f32(float32x4_t, float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_f32))) float32x4_t vcmlaq(float32x4_t, float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_m_f16))) float16x8_t vcmlaq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_m_f16))) float16x8_t vcmlaq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_m_f32))) float32x4_t vcmlaq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_m_f32))) float32x4_t vcmlaq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot180_f16))) float16x8_t vcmlaq_rot180_f16(float16x8_t, float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot180_f16))) float16x8_t vcmlaq_rot180(float16x8_t, float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot180_f32))) float32x4_t vcmlaq_rot180_f32(float32x4_t, float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot180_f32))) float32x4_t vcmlaq_rot180(float32x4_t, float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot180_m_f16))) float16x8_t vcmlaq_rot180_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot180_m_f16))) float16x8_t vcmlaq_rot180_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot180_m_f32))) float32x4_t vcmlaq_rot180_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot180_m_f32))) float32x4_t vcmlaq_rot180_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot270_f16))) float16x8_t vcmlaq_rot270_f16(float16x8_t, float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot270_f16))) float16x8_t vcmlaq_rot270(float16x8_t, float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot270_f32))) float32x4_t vcmlaq_rot270_f32(float32x4_t, float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot270_f32))) float32x4_t vcmlaq_rot270(float32x4_t, float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot270_m_f16))) float16x8_t vcmlaq_rot270_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot270_m_f16))) float16x8_t vcmlaq_rot270_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot270_m_f32))) float32x4_t vcmlaq_rot270_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot270_m_f32))) float32x4_t vcmlaq_rot270_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot90_f16))) float16x8_t vcmlaq_rot90_f16(float16x8_t, float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot90_f16))) float16x8_t vcmlaq_rot90(float16x8_t, float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot90_f32))) float32x4_t vcmlaq_rot90_f32(float32x4_t, float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot90_f32))) float32x4_t vcmlaq_rot90(float32x4_t, float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot90_m_f16))) float16x8_t vcmlaq_rot90_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot90_m_f16))) float16x8_t vcmlaq_rot90_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot90_m_f32))) float32x4_t vcmlaq_rot90_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot90_m_f32))) float32x4_t vcmlaq_rot90_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_f16))) mve_pred16_t vcmpeqq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_f16))) mve_pred16_t vcmpeqq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_f32))) mve_pred16_t vcmpeqq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_f32))) mve_pred16_t vcmpeqq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_f16))) mve_pred16_t vcmpeqq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_f16))) mve_pred16_t vcmpeqq_m(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_f32))) mve_pred16_t vcmpeqq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_f32))) mve_pred16_t vcmpeqq_m(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_n_f16))) mve_pred16_t vcmpeqq_m_n_f16(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_n_f16))) mve_pred16_t vcmpeqq_m(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_n_f32))) mve_pred16_t vcmpeqq_m_n_f32(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_n_f32))) mve_pred16_t vcmpeqq_m(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_n_f16))) mve_pred16_t vcmpeqq_n_f16(float16x8_t, float16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_n_f16))) mve_pred16_t vcmpeqq(float16x8_t, float16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_n_f32))) mve_pred16_t vcmpeqq_n_f32(float32x4_t, float32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_n_f32))) mve_pred16_t vcmpeqq(float32x4_t, float32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_f16))) mve_pred16_t vcmpgeq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_f16))) mve_pred16_t vcmpgeq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_f32))) mve_pred16_t vcmpgeq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_f32))) mve_pred16_t vcmpgeq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_f16))) mve_pred16_t vcmpgeq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_f16))) mve_pred16_t vcmpgeq_m(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_f32))) mve_pred16_t vcmpgeq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_f32))) mve_pred16_t vcmpgeq_m(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_n_f16))) mve_pred16_t vcmpgeq_m_n_f16(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_n_f16))) mve_pred16_t vcmpgeq_m(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_n_f32))) mve_pred16_t vcmpgeq_m_n_f32(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_n_f32))) mve_pred16_t vcmpgeq_m(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_n_f16))) mve_pred16_t vcmpgeq_n_f16(float16x8_t, float16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_n_f16))) mve_pred16_t vcmpgeq(float16x8_t, float16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_n_f32))) mve_pred16_t vcmpgeq_n_f32(float32x4_t, float32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_n_f32))) mve_pred16_t vcmpgeq(float32x4_t, float32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_f16))) mve_pred16_t vcmpgtq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_f16))) mve_pred16_t vcmpgtq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_f32))) mve_pred16_t vcmpgtq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_f32))) mve_pred16_t vcmpgtq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_f16))) mve_pred16_t vcmpgtq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_f16))) mve_pred16_t vcmpgtq_m(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_f32))) mve_pred16_t vcmpgtq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_f32))) mve_pred16_t vcmpgtq_m(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_n_f16))) mve_pred16_t vcmpgtq_m_n_f16(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_n_f16))) mve_pred16_t vcmpgtq_m(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_n_f32))) mve_pred16_t vcmpgtq_m_n_f32(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_n_f32))) mve_pred16_t vcmpgtq_m(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_n_f16))) mve_pred16_t vcmpgtq_n_f16(float16x8_t, float16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_n_f16))) mve_pred16_t vcmpgtq(float16x8_t, float16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_n_f32))) mve_pred16_t vcmpgtq_n_f32(float32x4_t, float32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_n_f32))) mve_pred16_t vcmpgtq(float32x4_t, float32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_f16))) mve_pred16_t vcmpleq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_f16))) mve_pred16_t vcmpleq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_f32))) mve_pred16_t vcmpleq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_f32))) mve_pred16_t vcmpleq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_f16))) mve_pred16_t vcmpleq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_f16))) mve_pred16_t vcmpleq_m(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_f32))) mve_pred16_t vcmpleq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_f32))) mve_pred16_t vcmpleq_m(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_n_f16))) mve_pred16_t vcmpleq_m_n_f16(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_n_f16))) mve_pred16_t vcmpleq_m(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_n_f32))) mve_pred16_t vcmpleq_m_n_f32(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_n_f32))) mve_pred16_t vcmpleq_m(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_n_f16))) mve_pred16_t vcmpleq_n_f16(float16x8_t, float16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_n_f16))) mve_pred16_t vcmpleq(float16x8_t, float16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_n_f32))) mve_pred16_t vcmpleq_n_f32(float32x4_t, float32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_n_f32))) mve_pred16_t vcmpleq(float32x4_t, float32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_f16))) mve_pred16_t vcmpltq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_f16))) mve_pred16_t vcmpltq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_f32))) mve_pred16_t vcmpltq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_f32))) mve_pred16_t vcmpltq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_f16))) mve_pred16_t vcmpltq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_f16))) mve_pred16_t vcmpltq_m(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_f32))) mve_pred16_t vcmpltq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_f32))) mve_pred16_t vcmpltq_m(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_n_f16))) mve_pred16_t vcmpltq_m_n_f16(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_n_f16))) mve_pred16_t vcmpltq_m(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_n_f32))) mve_pred16_t vcmpltq_m_n_f32(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_n_f32))) mve_pred16_t vcmpltq_m(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_n_f16))) mve_pred16_t vcmpltq_n_f16(float16x8_t, float16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_n_f16))) mve_pred16_t vcmpltq(float16x8_t, float16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_n_f32))) mve_pred16_t vcmpltq_n_f32(float32x4_t, float32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_n_f32))) mve_pred16_t vcmpltq(float32x4_t, float32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_f16))) mve_pred16_t vcmpneq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_f16))) mve_pred16_t vcmpneq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_f32))) mve_pred16_t vcmpneq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_f32))) mve_pred16_t vcmpneq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_f16))) mve_pred16_t vcmpneq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_f16))) mve_pred16_t vcmpneq_m(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_f32))) mve_pred16_t vcmpneq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_f32))) mve_pred16_t vcmpneq_m(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_n_f16))) mve_pred16_t vcmpneq_m_n_f16(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_n_f16))) mve_pred16_t vcmpneq_m(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_n_f32))) mve_pred16_t vcmpneq_m_n_f32(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_n_f32))) mve_pred16_t vcmpneq_m(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_n_f16))) mve_pred16_t vcmpneq_n_f16(float16x8_t, float16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_n_f16))) mve_pred16_t vcmpneq(float16x8_t, float16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_n_f32))) mve_pred16_t vcmpneq_n_f32(float32x4_t, float32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_n_f32))) mve_pred16_t vcmpneq(float32x4_t, float32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_f16))) float16x8_t vcmulq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_f16))) float16x8_t vcmulq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_f32))) float32x4_t vcmulq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_f32))) float32x4_t vcmulq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_m_f16))) float16x8_t vcmulq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_m_f16))) float16x8_t vcmulq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_m_f32))) float32x4_t vcmulq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_m_f32))) float32x4_t vcmulq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot180_f16))) float16x8_t vcmulq_rot180_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot180_f16))) float16x8_t vcmulq_rot180(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot180_f32))) float32x4_t vcmulq_rot180_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot180_f32))) float32x4_t vcmulq_rot180(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot180_m_f16))) float16x8_t vcmulq_rot180_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot180_m_f16))) float16x8_t vcmulq_rot180_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot180_m_f32))) float32x4_t vcmulq_rot180_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot180_m_f32))) float32x4_t vcmulq_rot180_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot180_x_f16))) float16x8_t vcmulq_rot180_x_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot180_x_f16))) float16x8_t vcmulq_rot180_x(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot180_x_f32))) float32x4_t vcmulq_rot180_x_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot180_x_f32))) float32x4_t vcmulq_rot180_x(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot270_f16))) float16x8_t vcmulq_rot270_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot270_f16))) float16x8_t vcmulq_rot270(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot270_f32))) float32x4_t vcmulq_rot270_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot270_f32))) float32x4_t vcmulq_rot270(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot270_m_f16))) float16x8_t vcmulq_rot270_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot270_m_f16))) float16x8_t vcmulq_rot270_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot270_m_f32))) float32x4_t vcmulq_rot270_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot270_m_f32))) float32x4_t vcmulq_rot270_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot270_x_f16))) float16x8_t vcmulq_rot270_x_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot270_x_f16))) float16x8_t vcmulq_rot270_x(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot270_x_f32))) float32x4_t vcmulq_rot270_x_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot270_x_f32))) float32x4_t vcmulq_rot270_x(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot90_f16))) float16x8_t vcmulq_rot90_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot90_f16))) float16x8_t vcmulq_rot90(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot90_f32))) float32x4_t vcmulq_rot90_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot90_f32))) float32x4_t vcmulq_rot90(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot90_m_f16))) float16x8_t vcmulq_rot90_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot90_m_f16))) float16x8_t vcmulq_rot90_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot90_m_f32))) float32x4_t vcmulq_rot90_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot90_m_f32))) float32x4_t vcmulq_rot90_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot90_x_f16))) float16x8_t vcmulq_rot90_x_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot90_x_f16))) float16x8_t vcmulq_rot90_x(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot90_x_f32))) float32x4_t vcmulq_rot90_x_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot90_x_f32))) float32x4_t vcmulq_rot90_x(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_x_f16))) float16x8_t vcmulq_x_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_x_f16))) float16x8_t vcmulq_x(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_x_f32))) float32x4_t vcmulq_x_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_x_f32))) float32x4_t vcmulq_x(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcreateq_f16))) float16x8_t vcreateq_f16(uint64_t, uint64_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcreateq_f32))) float32x4_t vcreateq_f32(uint64_t, uint64_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtaq_m_s16_f16))) int16x8_t vcvtaq_m_s16_f16(int16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtaq_m_s16_f16))) int16x8_t vcvtaq_m(int16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtaq_m_s32_f32))) int32x4_t vcvtaq_m_s32_f32(int32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtaq_m_s32_f32))) int32x4_t vcvtaq_m(int32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtaq_m_u16_f16))) uint16x8_t vcvtaq_m_u16_f16(uint16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtaq_m_u16_f16))) uint16x8_t vcvtaq_m(uint16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtaq_m_u32_f32))) uint32x4_t vcvtaq_m_u32_f32(uint32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtaq_m_u32_f32))) uint32x4_t vcvtaq_m(uint32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtaq_s16_f16))) int16x8_t vcvtaq_s16_f16(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtaq_s32_f32))) int32x4_t vcvtaq_s32_f32(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtaq_u16_f16))) uint16x8_t vcvtaq_u16_f16(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtaq_u32_f32))) uint32x4_t vcvtaq_u32_f32(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtaq_x_s16_f16))) int16x8_t vcvtaq_x_s16_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtaq_x_s32_f32))) int32x4_t vcvtaq_x_s32_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtaq_x_u16_f16))) uint16x8_t vcvtaq_x_u16_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtaq_x_u32_f32))) uint32x4_t vcvtaq_x_u32_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtbq_f16_f32))) float16x8_t vcvtbq_f16_f32(float16x8_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtbq_f32_f16))) float32x4_t vcvtbq_f32_f16(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtbq_m_f16_f32))) float16x8_t vcvtbq_m_f16_f32(float16x8_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtbq_m_f32_f16))) float32x4_t vcvtbq_m_f32_f16(float32x4_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtbq_x_f32_f16))) float32x4_t vcvtbq_x_f32_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtmq_m_s16_f16))) int16x8_t vcvtmq_m_s16_f16(int16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtmq_m_s16_f16))) int16x8_t vcvtmq_m(int16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtmq_m_s32_f32))) int32x4_t vcvtmq_m_s32_f32(int32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtmq_m_s32_f32))) int32x4_t vcvtmq_m(int32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtmq_m_u16_f16))) uint16x8_t vcvtmq_m_u16_f16(uint16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtmq_m_u16_f16))) uint16x8_t vcvtmq_m(uint16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtmq_m_u32_f32))) uint32x4_t vcvtmq_m_u32_f32(uint32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtmq_m_u32_f32))) uint32x4_t vcvtmq_m(uint32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtmq_s16_f16))) int16x8_t vcvtmq_s16_f16(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtmq_s32_f32))) int32x4_t vcvtmq_s32_f32(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtmq_u16_f16))) uint16x8_t vcvtmq_u16_f16(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtmq_u32_f32))) uint32x4_t vcvtmq_u32_f32(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtmq_x_s16_f16))) int16x8_t vcvtmq_x_s16_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtmq_x_s32_f32))) int32x4_t vcvtmq_x_s32_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtmq_x_u16_f16))) uint16x8_t vcvtmq_x_u16_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtmq_x_u32_f32))) uint32x4_t vcvtmq_x_u32_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtnq_m_s16_f16))) int16x8_t vcvtnq_m_s16_f16(int16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtnq_m_s16_f16))) int16x8_t vcvtnq_m(int16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtnq_m_s32_f32))) int32x4_t vcvtnq_m_s32_f32(int32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtnq_m_s32_f32))) int32x4_t vcvtnq_m(int32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtnq_m_u16_f16))) uint16x8_t vcvtnq_m_u16_f16(uint16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtnq_m_u16_f16))) uint16x8_t vcvtnq_m(uint16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtnq_m_u32_f32))) uint32x4_t vcvtnq_m_u32_f32(uint32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtnq_m_u32_f32))) uint32x4_t vcvtnq_m(uint32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtnq_s16_f16))) int16x8_t vcvtnq_s16_f16(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtnq_s32_f32))) int32x4_t vcvtnq_s32_f32(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtnq_u16_f16))) uint16x8_t vcvtnq_u16_f16(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtnq_u32_f32))) uint32x4_t vcvtnq_u32_f32(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtnq_x_s16_f16))) int16x8_t vcvtnq_x_s16_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtnq_x_s32_f32))) int32x4_t vcvtnq_x_s32_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtnq_x_u16_f16))) uint16x8_t vcvtnq_x_u16_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtnq_x_u32_f32))) uint32x4_t vcvtnq_x_u32_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtpq_m_s16_f16))) int16x8_t vcvtpq_m_s16_f16(int16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtpq_m_s16_f16))) int16x8_t vcvtpq_m(int16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtpq_m_s32_f32))) int32x4_t vcvtpq_m_s32_f32(int32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtpq_m_s32_f32))) int32x4_t vcvtpq_m(int32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtpq_m_u16_f16))) uint16x8_t vcvtpq_m_u16_f16(uint16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtpq_m_u16_f16))) uint16x8_t vcvtpq_m(uint16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtpq_m_u32_f32))) uint32x4_t vcvtpq_m_u32_f32(uint32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtpq_m_u32_f32))) uint32x4_t vcvtpq_m(uint32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtpq_s16_f16))) int16x8_t vcvtpq_s16_f16(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtpq_s32_f32))) int32x4_t vcvtpq_s32_f32(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtpq_u16_f16))) uint16x8_t vcvtpq_u16_f16(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtpq_u32_f32))) uint32x4_t vcvtpq_u32_f32(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtpq_x_s16_f16))) int16x8_t vcvtpq_x_s16_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtpq_x_s32_f32))) int32x4_t vcvtpq_x_s32_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtpq_x_u16_f16))) uint16x8_t vcvtpq_x_u16_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtpq_x_u32_f32))) uint32x4_t vcvtpq_x_u32_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_f16_s16))) float16x8_t vcvtq_f16_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_f16_s16))) float16x8_t vcvtq(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_f16_u16))) float16x8_t vcvtq_f16_u16(uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_f16_u16))) float16x8_t vcvtq(uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_f32_s32))) float32x4_t vcvtq_f32_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_f32_s32))) float32x4_t vcvtq(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_f32_u32))) float32x4_t vcvtq_f32_u32(uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_f32_u32))) float32x4_t vcvtq(uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_f16_s16))) float16x8_t vcvtq_m_f16_s16(float16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_f16_s16))) float16x8_t vcvtq_m(float16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_f16_u16))) float16x8_t vcvtq_m_f16_u16(float16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_f16_u16))) float16x8_t vcvtq_m(float16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_f32_s32))) float32x4_t vcvtq_m_f32_s32(float32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_f32_s32))) float32x4_t vcvtq_m(float32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_f32_u32))) float32x4_t vcvtq_m_f32_u32(float32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_f32_u32))) float32x4_t vcvtq_m(float32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_n_f16_s16))) float16x8_t vcvtq_m_n_f16_s16(float16x8_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_n_f16_s16))) float16x8_t vcvtq_m_n(float16x8_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_n_f16_u16))) float16x8_t vcvtq_m_n_f16_u16(float16x8_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_n_f16_u16))) float16x8_t vcvtq_m_n(float16x8_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_n_f32_s32))) float32x4_t vcvtq_m_n_f32_s32(float32x4_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_n_f32_s32))) float32x4_t vcvtq_m_n(float32x4_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_n_f32_u32))) float32x4_t vcvtq_m_n_f32_u32(float32x4_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_n_f32_u32))) float32x4_t vcvtq_m_n(float32x4_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_n_s16_f16))) int16x8_t vcvtq_m_n_s16_f16(int16x8_t, float16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_n_s16_f16))) int16x8_t vcvtq_m_n(int16x8_t, float16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_n_s32_f32))) int32x4_t vcvtq_m_n_s32_f32(int32x4_t, float32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_n_s32_f32))) int32x4_t vcvtq_m_n(int32x4_t, float32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_n_u16_f16))) uint16x8_t vcvtq_m_n_u16_f16(uint16x8_t, float16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_n_u16_f16))) uint16x8_t vcvtq_m_n(uint16x8_t, float16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_n_u32_f32))) uint32x4_t vcvtq_m_n_u32_f32(uint32x4_t, float32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_n_u32_f32))) uint32x4_t vcvtq_m_n(uint32x4_t, float32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_s16_f16))) int16x8_t vcvtq_m_s16_f16(int16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_s16_f16))) int16x8_t vcvtq_m(int16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_s32_f32))) int32x4_t vcvtq_m_s32_f32(int32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_s32_f32))) int32x4_t vcvtq_m(int32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_u16_f16))) uint16x8_t vcvtq_m_u16_f16(uint16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_u16_f16))) uint16x8_t vcvtq_m(uint16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_u32_f32))) uint32x4_t vcvtq_m_u32_f32(uint32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_u32_f32))) uint32x4_t vcvtq_m(uint32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_n_f16_s16))) float16x8_t vcvtq_n_f16_s16(int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_n_f16_s16))) float16x8_t vcvtq_n(int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_n_f16_u16))) float16x8_t vcvtq_n_f16_u16(uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_n_f16_u16))) float16x8_t vcvtq_n(uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_n_f32_s32))) float32x4_t vcvtq_n_f32_s32(int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_n_f32_s32))) float32x4_t vcvtq_n(int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_n_f32_u32))) float32x4_t vcvtq_n_f32_u32(uint32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_n_f32_u32))) float32x4_t vcvtq_n(uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_n_s16_f16))) int16x8_t vcvtq_n_s16_f16(float16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_n_s32_f32))) int32x4_t vcvtq_n_s32_f32(float32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_n_u16_f16))) uint16x8_t vcvtq_n_u16_f16(float16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_n_u32_f32))) uint32x4_t vcvtq_n_u32_f32(float32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_s16_f16))) int16x8_t vcvtq_s16_f16(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_s32_f32))) int32x4_t vcvtq_s32_f32(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_u16_f16))) uint16x8_t vcvtq_u16_f16(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_u32_f32))) uint32x4_t vcvtq_u32_f32(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_f16_s16))) float16x8_t vcvtq_x_f16_s16(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_f16_s16))) float16x8_t vcvtq_x(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_f16_u16))) float16x8_t vcvtq_x_f16_u16(uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_f16_u16))) float16x8_t vcvtq_x(uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_f32_s32))) float32x4_t vcvtq_x_f32_s32(int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_f32_s32))) float32x4_t vcvtq_x(int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_f32_u32))) float32x4_t vcvtq_x_f32_u32(uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_f32_u32))) float32x4_t vcvtq_x(uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_n_f16_s16))) float16x8_t vcvtq_x_n_f16_s16(int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_n_f16_s16))) float16x8_t vcvtq_x_n(int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_n_f16_u16))) float16x8_t vcvtq_x_n_f16_u16(uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_n_f16_u16))) float16x8_t vcvtq_x_n(uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_n_f32_s32))) float32x4_t vcvtq_x_n_f32_s32(int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_n_f32_s32))) float32x4_t vcvtq_x_n(int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_n_f32_u32))) float32x4_t vcvtq_x_n_f32_u32(uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_n_f32_u32))) float32x4_t vcvtq_x_n(uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_n_s16_f16))) int16x8_t vcvtq_x_n_s16_f16(float16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_n_s32_f32))) int32x4_t vcvtq_x_n_s32_f32(float32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_n_u16_f16))) uint16x8_t vcvtq_x_n_u16_f16(float16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_n_u32_f32))) uint32x4_t vcvtq_x_n_u32_f32(float32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_s16_f16))) int16x8_t vcvtq_x_s16_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_s32_f32))) int32x4_t vcvtq_x_s32_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_u16_f16))) uint16x8_t vcvtq_x_u16_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_u32_f32))) uint32x4_t vcvtq_x_u32_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvttq_f16_f32))) float16x8_t vcvttq_f16_f32(float16x8_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvttq_f32_f16))) float32x4_t vcvttq_f32_f16(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvttq_m_f16_f32))) float16x8_t vcvttq_m_f16_f32(float16x8_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvttq_m_f32_f16))) float32x4_t vcvttq_m_f32_f16(float32x4_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvttq_x_f32_f16))) float32x4_t vcvttq_x_f32_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_m_n_f16))) float16x8_t vdupq_m_n_f16(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdupq_m_n_f16))) float16x8_t vdupq_m(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_m_n_f32))) float32x4_t vdupq_m_n_f32(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdupq_m_n_f32))) float32x4_t vdupq_m(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_n_f16))) float16x8_t vdupq_n_f16(float16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_n_f32))) float32x4_t vdupq_n_f32(float32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_x_n_f16))) float16x8_t vdupq_x_n_f16(float16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_x_n_f32))) float32x4_t vdupq_x_n_f32(float32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_f16))) float16x8_t veorq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_f16))) float16x8_t veorq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_f32))) float32x4_t veorq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_f32))) float32x4_t veorq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_m_f16))) float16x8_t veorq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_m_f16))) float16x8_t veorq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_m_f32))) float32x4_t veorq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_m_f32))) float32x4_t veorq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_x_f16))) float16x8_t veorq_x_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_x_f16))) float16x8_t veorq_x(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_x_f32))) float32x4_t veorq_x_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_x_f32))) float32x4_t veorq_x(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vfmaq_f16))) float16x8_t vfmaq_f16(float16x8_t, float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vfmaq_f16))) float16x8_t vfmaq(float16x8_t, float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vfmaq_f32))) float32x4_t vfmaq_f32(float32x4_t, float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vfmaq_f32))) float32x4_t vfmaq(float32x4_t, float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vfmaq_m_f16))) float16x8_t vfmaq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vfmaq_m_f16))) float16x8_t vfmaq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vfmaq_m_f32))) float32x4_t vfmaq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vfmaq_m_f32))) float32x4_t vfmaq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vfmaq_m_n_f16))) float16x8_t vfmaq_m_n_f16(float16x8_t, float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vfmaq_m_n_f16))) float16x8_t vfmaq_m(float16x8_t, float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vfmaq_m_n_f32))) float32x4_t vfmaq_m_n_f32(float32x4_t, float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vfmaq_m_n_f32))) float32x4_t vfmaq_m(float32x4_t, float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vfmaq_n_f16))) float16x8_t vfmaq_n_f16(float16x8_t, float16x8_t, float16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vfmaq_n_f16))) float16x8_t vfmaq(float16x8_t, float16x8_t, float16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vfmaq_n_f32))) float32x4_t vfmaq_n_f32(float32x4_t, float32x4_t, float32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vfmaq_n_f32))) float32x4_t vfmaq(float32x4_t, float32x4_t, float32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vfmasq_m_n_f16))) float16x8_t vfmasq_m_n_f16(float16x8_t, float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vfmasq_m_n_f16))) float16x8_t vfmasq_m(float16x8_t, float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vfmasq_m_n_f32))) float32x4_t vfmasq_m_n_f32(float32x4_t, float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vfmasq_m_n_f32))) float32x4_t vfmasq_m(float32x4_t, float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vfmasq_n_f16))) float16x8_t vfmasq_n_f16(float16x8_t, float16x8_t, float16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vfmasq_n_f16))) float16x8_t vfmasq(float16x8_t, float16x8_t, float16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vfmasq_n_f32))) float32x4_t vfmasq_n_f32(float32x4_t, float32x4_t, float32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vfmasq_n_f32))) float32x4_t vfmasq(float32x4_t, float32x4_t, float32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vfmsq_f16))) float16x8_t vfmsq_f16(float16x8_t, float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vfmsq_f16))) float16x8_t vfmsq(float16x8_t, float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vfmsq_f32))) float32x4_t vfmsq_f32(float32x4_t, float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vfmsq_f32))) float32x4_t vfmsq(float32x4_t, float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vfmsq_m_f16))) float16x8_t vfmsq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vfmsq_m_f16))) float16x8_t vfmsq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vfmsq_m_f32))) float32x4_t vfmsq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vfmsq_m_f32))) float32x4_t vfmsq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_f16))) float16_t vgetq_lane_f16(float16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_f16))) float16_t vgetq_lane(float16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_f32))) float32_t vgetq_lane_f32(float32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_f32))) float32_t vgetq_lane(float32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld1q_f16))) float16x8_t vld1q_f16(const float16_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld1q_f16))) float16x8_t vld1q(const float16_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld1q_f32))) float32x4_t vld1q_f32(const float32_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld1q_f32))) float32x4_t vld1q(const float32_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld1q_z_f16))) float16x8_t vld1q_z_f16(const float16_t *, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld1q_z_f16))) float16x8_t vld1q_z(const float16_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld1q_z_f32))) float32x4_t vld1q_z_f32(const float32_t *, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld1q_z_f32))) float32x4_t vld1q_z(const float32_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld2q_f16))) float16x8x2_t vld2q_f16(const float16_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld2q_f16))) float16x8x2_t vld2q(const float16_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld2q_f32))) float32x4x2_t vld2q_f32(const float32_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld2q_f32))) float32x4x2_t vld2q(const float32_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld4q_f16))) float16x8x4_t vld4q_f16(const float16_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld4q_f16))) float16x8x4_t vld4q(const float16_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld4q_f32))) float32x4x4_t vld4q_f32(const float32_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld4q_f32))) float32x4x4_t vld4q(const float32_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_f16))) float16x8_t vldrhq_f16(const float16_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_f16))) float16x8_t vldrhq_gather_offset_f16(const float16_t *, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_f16))) float16x8_t vldrhq_gather_offset(const float16_t *, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_z_f16))) float16x8_t vldrhq_gather_offset_z_f16(const float16_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_z_f16))) float16x8_t vldrhq_gather_offset_z(const float16_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_f16))) float16x8_t vldrhq_gather_shifted_offset_f16(const float16_t *, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_f16))) float16x8_t vldrhq_gather_shifted_offset(const float16_t *, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_z_f16))) float16x8_t vldrhq_gather_shifted_offset_z_f16(const float16_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_z_f16))) float16x8_t vldrhq_gather_shifted_offset_z(const float16_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_z_f16))) float16x8_t vldrhq_z_f16(const float16_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_f32))) float32x4_t vldrwq_f32(const float32_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_base_f32))) float32x4_t vldrwq_gather_base_f32(uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_base_wb_f32))) float32x4_t vldrwq_gather_base_wb_f32(uint32x4_t *, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_base_wb_z_f32))) float32x4_t vldrwq_gather_base_wb_z_f32(uint32x4_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_base_z_f32))) float32x4_t vldrwq_gather_base_z_f32(uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_offset_f32))) float32x4_t vldrwq_gather_offset_f32(const float32_t *, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_offset_f32))) float32x4_t vldrwq_gather_offset(const float32_t *, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_offset_z_f32))) float32x4_t vldrwq_gather_offset_z_f32(const float32_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_offset_z_f32))) float32x4_t vldrwq_gather_offset_z(const float32_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_shifted_offset_f32))) float32x4_t vldrwq_gather_shifted_offset_f32(const float32_t *, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_shifted_offset_f32))) float32x4_t vldrwq_gather_shifted_offset(const float32_t *, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_shifted_offset_z_f32))) float32x4_t vldrwq_gather_shifted_offset_z_f32(const float32_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_shifted_offset_z_f32))) float32x4_t vldrwq_gather_shifted_offset_z(const float32_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_z_f32))) float32x4_t vldrwq_z_f32(const float32_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmaq_f16))) float16x8_t vmaxnmaq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmaq_f16))) float16x8_t vmaxnmaq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmaq_f32))) float32x4_t vmaxnmaq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmaq_f32))) float32x4_t vmaxnmaq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmaq_m_f16))) float16x8_t vmaxnmaq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmaq_m_f16))) float16x8_t vmaxnmaq_m(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmaq_m_f32))) float32x4_t vmaxnmaq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmaq_m_f32))) float32x4_t vmaxnmaq_m(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmavq_f16))) float16_t vmaxnmavq_f16(float16_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmavq_f16))) float16_t vmaxnmavq(float16_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmavq_f32))) float32_t vmaxnmavq_f32(float32_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmavq_f32))) float32_t vmaxnmavq(float32_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmavq_p_f16))) float16_t vmaxnmavq_p_f16(float16_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmavq_p_f16))) float16_t vmaxnmavq_p(float16_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmavq_p_f32))) float32_t vmaxnmavq_p_f32(float32_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmavq_p_f32))) float32_t vmaxnmavq_p(float32_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmq_f16))) float16x8_t vmaxnmq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmq_f16))) float16x8_t vmaxnmq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmq_f32))) float32x4_t vmaxnmq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmq_f32))) float32x4_t vmaxnmq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmq_m_f16))) float16x8_t vmaxnmq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmq_m_f16))) float16x8_t vmaxnmq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmq_m_f32))) float32x4_t vmaxnmq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmq_m_f32))) float32x4_t vmaxnmq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmq_x_f16))) float16x8_t vmaxnmq_x_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmq_x_f16))) float16x8_t vmaxnmq_x(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmq_x_f32))) float32x4_t vmaxnmq_x_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmq_x_f32))) float32x4_t vmaxnmq_x(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmvq_f16))) float16_t vmaxnmvq_f16(float16_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmvq_f16))) float16_t vmaxnmvq(float16_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmvq_f32))) float32_t vmaxnmvq_f32(float32_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmvq_f32))) float32_t vmaxnmvq(float32_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmvq_p_f16))) float16_t vmaxnmvq_p_f16(float16_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmvq_p_f16))) float16_t vmaxnmvq_p(float16_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmvq_p_f32))) float32_t vmaxnmvq_p_f32(float32_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmvq_p_f32))) float32_t vmaxnmvq_p(float32_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminnmaq_f16))) float16x8_t vminnmaq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminnmaq_f16))) float16x8_t vminnmaq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminnmaq_f32))) float32x4_t vminnmaq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminnmaq_f32))) float32x4_t vminnmaq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminnmaq_m_f16))) float16x8_t vminnmaq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminnmaq_m_f16))) float16x8_t vminnmaq_m(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminnmaq_m_f32))) float32x4_t vminnmaq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminnmaq_m_f32))) float32x4_t vminnmaq_m(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminnmavq_f16))) float16_t vminnmavq_f16(float16_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminnmavq_f16))) float16_t vminnmavq(float16_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminnmavq_f32))) float32_t vminnmavq_f32(float32_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminnmavq_f32))) float32_t vminnmavq(float32_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminnmavq_p_f16))) float16_t vminnmavq_p_f16(float16_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminnmavq_p_f16))) float16_t vminnmavq_p(float16_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminnmavq_p_f32))) float32_t vminnmavq_p_f32(float32_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminnmavq_p_f32))) float32_t vminnmavq_p(float32_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminnmq_f16))) float16x8_t vminnmq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminnmq_f16))) float16x8_t vminnmq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminnmq_f32))) float32x4_t vminnmq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminnmq_f32))) float32x4_t vminnmq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminnmq_m_f16))) float16x8_t vminnmq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminnmq_m_f16))) float16x8_t vminnmq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminnmq_m_f32))) float32x4_t vminnmq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminnmq_m_f32))) float32x4_t vminnmq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminnmq_x_f16))) float16x8_t vminnmq_x_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminnmq_x_f16))) float16x8_t vminnmq_x(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminnmq_x_f32))) float32x4_t vminnmq_x_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminnmq_x_f32))) float32x4_t vminnmq_x(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminnmvq_f16))) float16_t vminnmvq_f16(float16_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminnmvq_f16))) float16_t vminnmvq(float16_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminnmvq_f32))) float32_t vminnmvq_f32(float32_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminnmvq_f32))) float32_t vminnmvq(float32_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminnmvq_p_f16))) float16_t vminnmvq_p_f16(float16_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminnmvq_p_f16))) float16_t vminnmvq_p(float16_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminnmvq_p_f32))) float32_t vminnmvq_p_f32(float32_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminnmvq_p_f32))) float32_t vminnmvq_p(float32_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_f16))) float16x8_t vmulq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_f16))) float16x8_t vmulq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_f32))) float32x4_t vmulq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_f32))) float32x4_t vmulq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_f16))) float16x8_t vmulq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_f16))) float16x8_t vmulq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_f32))) float32x4_t vmulq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_f32))) float32x4_t vmulq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_n_f16))) float16x8_t vmulq_m_n_f16(float16x8_t, float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_n_f16))) float16x8_t vmulq_m(float16x8_t, float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_n_f32))) float32x4_t vmulq_m_n_f32(float32x4_t, float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_n_f32))) float32x4_t vmulq_m(float32x4_t, float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_n_f16))) float16x8_t vmulq_n_f16(float16x8_t, float16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_n_f16))) float16x8_t vmulq(float16x8_t, float16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_n_f32))) float32x4_t vmulq_n_f32(float32x4_t, float32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_n_f32))) float32x4_t vmulq(float32x4_t, float32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_f16))) float16x8_t vmulq_x_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_f16))) float16x8_t vmulq_x(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_f32))) float32x4_t vmulq_x_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_f32))) float32x4_t vmulq_x(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_n_f16))) float16x8_t vmulq_x_n_f16(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_n_f16))) float16x8_t vmulq_x(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_n_f32))) float32x4_t vmulq_x_n_f32(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_n_f32))) float32x4_t vmulq_x(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vnegq_f16))) float16x8_t vnegq_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vnegq_f16))) float16x8_t vnegq(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vnegq_f32))) float32x4_t vnegq_f32(float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vnegq_f32))) float32x4_t vnegq(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vnegq_m_f16))) float16x8_t vnegq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vnegq_m_f16))) float16x8_t vnegq_m(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vnegq_m_f32))) float32x4_t vnegq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vnegq_m_f32))) float32x4_t vnegq_m(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vnegq_x_f16))) float16x8_t vnegq_x_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vnegq_x_f16))) float16x8_t vnegq_x(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vnegq_x_f32))) float32x4_t vnegq_x_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vnegq_x_f32))) float32x4_t vnegq_x(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_f16))) float16x8_t vornq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_f16))) float16x8_t vornq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_f32))) float32x4_t vornq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_f32))) float32x4_t vornq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_m_f16))) float16x8_t vornq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_m_f16))) float16x8_t vornq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_m_f32))) float32x4_t vornq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_m_f32))) float32x4_t vornq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_x_f16))) float16x8_t vornq_x_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_x_f16))) float16x8_t vornq_x(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_x_f32))) float32x4_t vornq_x_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_x_f32))) float32x4_t vornq_x(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_f16))) float16x8_t vorrq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_f16))) float16x8_t vorrq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_f32))) float32x4_t vorrq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_f32))) float32x4_t vorrq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_f16))) float16x8_t vorrq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_f16))) float16x8_t vorrq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_f32))) float32x4_t vorrq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_f32))) float32x4_t vorrq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_x_f16))) float16x8_t vorrq_x_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_x_f16))) float16x8_t vorrq_x(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_x_f32))) float32x4_t vorrq_x_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_x_f32))) float32x4_t vorrq_x(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vpselq_f16))) float16x8_t vpselq_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vpselq_f16))) float16x8_t vpselq(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vpselq_f32))) float32x4_t vpselq_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vpselq_f32))) float32x4_t vpselq(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_f32))) float16x8_t vreinterpretq_f16_f32(float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_f32))) float16x8_t vreinterpretq_f16(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_s16))) float16x8_t vreinterpretq_f16_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_s16))) float16x8_t vreinterpretq_f16(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_s32))) float16x8_t vreinterpretq_f16_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_s32))) float16x8_t vreinterpretq_f16(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_s64))) float16x8_t vreinterpretq_f16_s64(int64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_s64))) float16x8_t vreinterpretq_f16(int64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_s8))) float16x8_t vreinterpretq_f16_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_s8))) float16x8_t vreinterpretq_f16(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_u16))) float16x8_t vreinterpretq_f16_u16(uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_u16))) float16x8_t vreinterpretq_f16(uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_u32))) float16x8_t vreinterpretq_f16_u32(uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_u32))) float16x8_t vreinterpretq_f16(uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_u64))) float16x8_t vreinterpretq_f16_u64(uint64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_u64))) float16x8_t vreinterpretq_f16(uint64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_u8))) float16x8_t vreinterpretq_f16_u8(uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_u8))) float16x8_t vreinterpretq_f16(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_f16))) float32x4_t vreinterpretq_f32_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_f16))) float32x4_t vreinterpretq_f32(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_s16))) float32x4_t vreinterpretq_f32_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_s16))) float32x4_t vreinterpretq_f32(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_s32))) float32x4_t vreinterpretq_f32_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_s32))) float32x4_t vreinterpretq_f32(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_s64))) float32x4_t vreinterpretq_f32_s64(int64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_s64))) float32x4_t vreinterpretq_f32(int64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_s8))) float32x4_t vreinterpretq_f32_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_s8))) float32x4_t vreinterpretq_f32(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_u16))) float32x4_t vreinterpretq_f32_u16(uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_u16))) float32x4_t vreinterpretq_f32(uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_u32))) float32x4_t vreinterpretq_f32_u32(uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_u32))) float32x4_t vreinterpretq_f32(uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_u64))) float32x4_t vreinterpretq_f32_u64(uint64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_u64))) float32x4_t vreinterpretq_f32(uint64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_u8))) float32x4_t vreinterpretq_f32_u8(uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_u8))) float32x4_t vreinterpretq_f32(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_f16))) int16x8_t vreinterpretq_s16_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_f16))) int16x8_t vreinterpretq_s16(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_f32))) int16x8_t vreinterpretq_s16_f32(float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_f32))) int16x8_t vreinterpretq_s16(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_f16))) int32x4_t vreinterpretq_s32_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_f16))) int32x4_t vreinterpretq_s32(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_f32))) int32x4_t vreinterpretq_s32_f32(float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_f32))) int32x4_t vreinterpretq_s32(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_f16))) int64x2_t vreinterpretq_s64_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_f16))) int64x2_t vreinterpretq_s64(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_f32))) int64x2_t vreinterpretq_s64_f32(float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_f32))) int64x2_t vreinterpretq_s64(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_f16))) int8x16_t vreinterpretq_s8_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_f16))) int8x16_t vreinterpretq_s8(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_f32))) int8x16_t vreinterpretq_s8_f32(float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_f32))) int8x16_t vreinterpretq_s8(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_f16))) uint16x8_t vreinterpretq_u16_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_f16))) uint16x8_t vreinterpretq_u16(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_f32))) uint16x8_t vreinterpretq_u16_f32(float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_f32))) uint16x8_t vreinterpretq_u16(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_f16))) uint32x4_t vreinterpretq_u32_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_f16))) uint32x4_t vreinterpretq_u32(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_f32))) uint32x4_t vreinterpretq_u32_f32(float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_f32))) uint32x4_t vreinterpretq_u32(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_f16))) uint64x2_t vreinterpretq_u64_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_f16))) uint64x2_t vreinterpretq_u64(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_f32))) uint64x2_t vreinterpretq_u64_f32(float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_f32))) uint64x2_t vreinterpretq_u64(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_f16))) uint8x16_t vreinterpretq_u8_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_f16))) uint8x16_t vreinterpretq_u8(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_f32))) uint8x16_t vreinterpretq_u8_f32(float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_f32))) uint8x16_t vreinterpretq_u8(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_f16))) float16x8_t vrev32q_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_f16))) float16x8_t vrev32q(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_m_f16))) float16x8_t vrev32q_m_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_m_f16))) float16x8_t vrev32q_m(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_x_f16))) float16x8_t vrev32q_x_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_x_f16))) float16x8_t vrev32q_x(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_f16))) float16x8_t vrev64q_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_f16))) float16x8_t vrev64q(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_f32))) float32x4_t vrev64q_f32(float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_f32))) float32x4_t vrev64q(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_m_f16))) float16x8_t vrev64q_m_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_m_f16))) float16x8_t vrev64q_m(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_m_f32))) float32x4_t vrev64q_m_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_m_f32))) float32x4_t vrev64q_m(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_x_f16))) float16x8_t vrev64q_x_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_x_f16))) float16x8_t vrev64q_x(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_x_f32))) float32x4_t vrev64q_x_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_x_f32))) float32x4_t vrev64q_x(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndaq_f16))) float16x8_t vrndaq_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndaq_f16))) float16x8_t vrndaq(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndaq_f32))) float32x4_t vrndaq_f32(float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndaq_f32))) float32x4_t vrndaq(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndaq_m_f16))) float16x8_t vrndaq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndaq_m_f16))) float16x8_t vrndaq_m(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndaq_m_f32))) float32x4_t vrndaq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndaq_m_f32))) float32x4_t vrndaq_m(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndaq_x_f16))) float16x8_t vrndaq_x_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndaq_x_f16))) float16x8_t vrndaq_x(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndaq_x_f32))) float32x4_t vrndaq_x_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndaq_x_f32))) float32x4_t vrndaq_x(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndmq_f16))) float16x8_t vrndmq_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndmq_f16))) float16x8_t vrndmq(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndmq_f32))) float32x4_t vrndmq_f32(float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndmq_f32))) float32x4_t vrndmq(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndmq_m_f16))) float16x8_t vrndmq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndmq_m_f16))) float16x8_t vrndmq_m(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndmq_m_f32))) float32x4_t vrndmq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndmq_m_f32))) float32x4_t vrndmq_m(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndmq_x_f16))) float16x8_t vrndmq_x_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndmq_x_f16))) float16x8_t vrndmq_x(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndmq_x_f32))) float32x4_t vrndmq_x_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndmq_x_f32))) float32x4_t vrndmq_x(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndnq_f16))) float16x8_t vrndnq_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndnq_f16))) float16x8_t vrndnq(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndnq_f32))) float32x4_t vrndnq_f32(float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndnq_f32))) float32x4_t vrndnq(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndnq_m_f16))) float16x8_t vrndnq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndnq_m_f16))) float16x8_t vrndnq_m(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndnq_m_f32))) float32x4_t vrndnq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndnq_m_f32))) float32x4_t vrndnq_m(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndnq_x_f16))) float16x8_t vrndnq_x_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndnq_x_f16))) float16x8_t vrndnq_x(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndnq_x_f32))) float32x4_t vrndnq_x_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndnq_x_f32))) float32x4_t vrndnq_x(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndpq_f16))) float16x8_t vrndpq_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndpq_f16))) float16x8_t vrndpq(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndpq_f32))) float32x4_t vrndpq_f32(float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndpq_f32))) float32x4_t vrndpq(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndpq_m_f16))) float16x8_t vrndpq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndpq_m_f16))) float16x8_t vrndpq_m(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndpq_m_f32))) float32x4_t vrndpq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndpq_m_f32))) float32x4_t vrndpq_m(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndpq_x_f16))) float16x8_t vrndpq_x_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndpq_x_f16))) float16x8_t vrndpq_x(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndpq_x_f32))) float32x4_t vrndpq_x_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndpq_x_f32))) float32x4_t vrndpq_x(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndq_f16))) float16x8_t vrndq_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndq_f16))) float16x8_t vrndq(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndq_f32))) float32x4_t vrndq_f32(float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndq_f32))) float32x4_t vrndq(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndq_m_f16))) float16x8_t vrndq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndq_m_f16))) float16x8_t vrndq_m(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndq_m_f32))) float32x4_t vrndq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndq_m_f32))) float32x4_t vrndq_m(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndq_x_f16))) float16x8_t vrndq_x_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndq_x_f16))) float16x8_t vrndq_x(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndq_x_f32))) float32x4_t vrndq_x_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndq_x_f32))) float32x4_t vrndq_x(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndxq_f16))) float16x8_t vrndxq_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndxq_f16))) float16x8_t vrndxq(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndxq_f32))) float32x4_t vrndxq_f32(float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndxq_f32))) float32x4_t vrndxq(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndxq_m_f16))) float16x8_t vrndxq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndxq_m_f16))) float16x8_t vrndxq_m(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndxq_m_f32))) float32x4_t vrndxq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndxq_m_f32))) float32x4_t vrndxq_m(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndxq_x_f16))) float16x8_t vrndxq_x_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndxq_x_f16))) float16x8_t vrndxq_x(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndxq_x_f32))) float32x4_t vrndxq_x_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndxq_x_f32))) float32x4_t vrndxq_x(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_f16))) float16x8_t vsetq_lane_f16(float16_t, float16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_f16))) float16x8_t vsetq_lane(float16_t, float16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_f32))) float32x4_t vsetq_lane_f32(float32_t, float32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_f32))) float32x4_t vsetq_lane(float32_t, float32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst1q_f16))) void vst1q_f16(float16_t *, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst1q_f16))) void vst1q(float16_t *, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst1q_f32))) void vst1q_f32(float32_t *, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst1q_f32))) void vst1q(float32_t *, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst1q_p_f16))) void vst1q_p_f16(float16_t *, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst1q_p_f16))) void vst1q_p(float16_t *, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst1q_p_f32))) void vst1q_p_f32(float32_t *, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst1q_p_f32))) void vst1q_p(float32_t *, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst2q_f16))) void vst2q_f16(float16_t *, float16x8x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst2q_f16))) void vst2q(float16_t *, float16x8x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst2q_f32))) void vst2q_f32(float32_t *, float32x4x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst2q_f32))) void vst2q(float32_t *, float32x4x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst4q_f16))) void vst4q_f16(float16_t *, float16x8x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst4q_f16))) void vst4q(float16_t *, float16x8x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst4q_f32))) void vst4q_f32(float32_t *, float32x4x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst4q_f32))) void vst4q(float32_t *, float32x4x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_f16))) void vstrhq_f16(float16_t *, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_f16))) void vstrhq(float16_t *, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_p_f16))) void vstrhq_p_f16(float16_t *, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_p_f16))) void vstrhq_p(float16_t *, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_f16))) void vstrhq_scatter_offset_f16(float16_t *, uint16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_f16))) void vstrhq_scatter_offset(float16_t *, uint16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_p_f16))) void vstrhq_scatter_offset_p_f16(float16_t *, uint16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_p_f16))) void vstrhq_scatter_offset_p(float16_t *, uint16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_f16))) void vstrhq_scatter_shifted_offset_f16(float16_t *, uint16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_f16))) void vstrhq_scatter_shifted_offset(float16_t *, uint16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_p_f16))) void vstrhq_scatter_shifted_offset_p_f16(float16_t *, uint16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_p_f16))) void vstrhq_scatter_shifted_offset_p(float16_t *, uint16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_f32))) void vstrwq_f32(float32_t *, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_f32))) void vstrwq(float32_t *, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_p_f32))) void vstrwq_p_f32(float32_t *, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_p_f32))) void vstrwq_p(float32_t *, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_f32))) void vstrwq_scatter_base_f32(uint32x4_t, int, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_f32))) void vstrwq_scatter_base(uint32x4_t, int, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_p_f32))) void vstrwq_scatter_base_p_f32(uint32x4_t, int, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_p_f32))) void vstrwq_scatter_base_p(uint32x4_t, int, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_wb_f32))) void vstrwq_scatter_base_wb_f32(uint32x4_t *, int, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_wb_f32))) void vstrwq_scatter_base_wb(uint32x4_t *, int, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_wb_p_f32))) void vstrwq_scatter_base_wb_p_f32(uint32x4_t *, int, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_wb_p_f32))) void vstrwq_scatter_base_wb_p(uint32x4_t *, int, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_offset_f32))) void vstrwq_scatter_offset_f32(float32_t *, uint32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_offset_f32))) void vstrwq_scatter_offset(float32_t *, uint32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_offset_p_f32))) void vstrwq_scatter_offset_p_f32(float32_t *, uint32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_offset_p_f32))) void vstrwq_scatter_offset_p(float32_t *, uint32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_shifted_offset_f32))) void vstrwq_scatter_shifted_offset_f32(float32_t *, uint32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_shifted_offset_f32))) void vstrwq_scatter_shifted_offset(float32_t *, uint32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_shifted_offset_p_f32))) void vstrwq_scatter_shifted_offset_p_f32(float32_t *, uint32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_shifted_offset_p_f32))) void vstrwq_scatter_shifted_offset_p(float32_t *, uint32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_f16))) float16x8_t vsubq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_f16))) float16x8_t vsubq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_f32))) float32x4_t vsubq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_f32))) float32x4_t vsubq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_f16))) float16x8_t vsubq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_f16))) float16x8_t vsubq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_f32))) float32x4_t vsubq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_f32))) float32x4_t vsubq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_n_f16))) float16x8_t vsubq_m_n_f16(float16x8_t, float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_n_f16))) float16x8_t vsubq_m(float16x8_t, float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_n_f32))) float32x4_t vsubq_m_n_f32(float32x4_t, float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_n_f32))) float32x4_t vsubq_m(float32x4_t, float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_n_f16))) float16x8_t vsubq_n_f16(float16x8_t, float16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_n_f16))) float16x8_t vsubq(float16x8_t, float16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_n_f32))) float32x4_t vsubq_n_f32(float32x4_t, float32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_n_f32))) float32x4_t vsubq(float32x4_t, float32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_f16))) float16x8_t vsubq_x_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_f16))) float16x8_t vsubq_x(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_f32))) float32x4_t vsubq_x_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_f32))) float32x4_t vsubq_x(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_n_f16))) float16x8_t vsubq_x_n_f16(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_n_f16))) float16x8_t vsubq_x(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_n_f32))) float32x4_t vsubq_x_n_f32(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_n_f32))) float32x4_t vsubq_x(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_f16))) float16x8_t vuninitializedq_f16(); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_f32))) float32x4_t vuninitializedq_f32(); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_polymorphic_f16))) float16x8_t vuninitializedq(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_polymorphic_f32))) float32x4_t vuninitializedq(float32x4_t); #endif /* (__ARM_FEATURE_MVE & 2) && (!defined __ARM_MVE_PRESERVE_USER_NAMESPACE) */ #ifdef __cplusplus } /* extern "C" */ #endif #endif /* __ARM_MVE_H */ arm_sve.havx2intrin.h/*===---- avx512vlintrin.h - AVX512VL intrinsics ---------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __AVX512VLINTRIN_H #define __AVX512VLINTRIN_H #define __DEFAULT_FN_ATTRS128 \ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512vl,no-evex512"), \ __min_vector_width__(128))) #define __DEFAULT_FN_ATTRS256 \ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512vl,no-evex512"), \ __min_vector_width__(256))) typedef short __v2hi __attribute__((__vector_size__(4))); typedef char __v4qi __attribute__((__vector_size__(4))); typedef char __v2qi __attribute__((__vector_size__(2))); /* Integer compare */ #define _mm_cmpeq_epi32_mask(A, B) \ _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ) #define _mm_mask_cmpeq_epi32_mask(k, A, B) \ _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ) #define _mm_cmpge_epi32_mask(A, B) \ _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_GE) #define _mm_mask_cmpge_epi32_mask(k, A, B) \ _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE) #define _mm_cmpgt_epi32_mask(A, B) \ _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_GT) #define _mm_mask_cmpgt_epi32_mask(k, A, B) \ _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT) #define _mm_cmple_epi32_mask(A, B) \ _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_LE) #define _mm_mask_cmple_epi32_mask(k, A, B) \ _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE) #define _mm_cmplt_epi32_mask(A, B) \ _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_LT) #define _mm_mask_cmplt_epi32_mask(k, A, B) \ _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT) #define _mm_cmpneq_epi32_mask(A, B) \ _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_NE) #define _mm_mask_cmpneq_epi32_mask(k, A, B) \ _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE) #define _mm256_cmpeq_epi32_mask(A, B) \ _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ) #define _mm256_mask_cmpeq_epi32_mask(k, A, B) \ _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ) #define _mm256_cmpge_epi32_mask(A, B) \ _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_GE) #define _mm256_mask_cmpge_epi32_mask(k, A, B) \ _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE) #define _mm256_cmpgt_epi32_mask(A, B) \ _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_GT) #define _mm256_mask_cmpgt_epi32_mask(k, A, B) \ _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT) #define _mm256_cmple_epi32_mask(A, B) \ _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_LE) #define _mm256_mask_cmple_epi32_mask(k, A, B) \ _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE) #define _mm256_cmplt_epi32_mask(A, B) \ _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_LT) #define _mm256_mask_cmplt_epi32_mask(k, A, B) \ _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT) #define _mm256_cmpneq_epi32_mask(A, B) \ _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_NE) #define _mm256_mask_cmpneq_epi32_mask(k, A, B) \ _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE) #define _mm_cmpeq_epu32_mask(A, B) \ _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ) #define _mm_mask_cmpeq_epu32_mask(k, A, B) \ _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ) #define _mm_cmpge_epu32_mask(A, B) \ _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_GE) #define _mm_mask_cmpge_epu32_mask(k, A, B) \ _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE) #define _mm_cmpgt_epu32_mask(A, B) \ _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_GT) #define _mm_mask_cmpgt_epu32_mask(k, A, B) \ _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT) #define _mm_cmple_epu32_mask(A, B) \ _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_LE) #define _mm_mask_cmple_epu32_mask(k, A, B) \ _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE) #define _mm_cmplt_epu32_mask(A, B) \ _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_LT) #define _mm_mask_cmplt_epu32_mask(k, A, B) \ _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT) #define _mm_cmpneq_epu32_mask(A, B) \ _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_NE) #define _mm_mask_cmpneq_epu32_mask(k, A, B) \ _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE) #define _mm256_cmpeq_epu32_mask(A, B) \ _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ) #define _mm256_mask_cmpeq_epu32_mask(k, A, B) \ _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ) #define _mm256_cmpge_epu32_mask(A, B) \ _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_GE) #define _mm256_mask_cmpge_epu32_mask(k, A, B) \ _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE) #define _mm256_cmpgt_epu32_mask(A, B) \ _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_GT) #define _mm256_mask_cmpgt_epu32_mask(k, A, B) \ _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT) #define _mm256_cmple_epu32_mask(A, B) \ _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_LE) #define _mm256_mask_cmple_epu32_mask(k, A, B) \ _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE) #define _mm256_cmplt_epu32_mask(A, B) \ _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_LT) #define _mm256_mask_cmplt_epu32_mask(k, A, B) \ _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT) #define _mm256_cmpneq_epu32_mask(A, B) \ _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_NE) #define _mm256_mask_cmpneq_epu32_mask(k, A, B) \ _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE) #define _mm_cmpeq_epi64_mask(A, B) \ _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ) #define _mm_mask_cmpeq_epi64_mask(k, A, B) \ _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ) #define _mm_cmpge_epi64_mask(A, B) \ _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_GE) #define _mm_mask_cmpge_epi64_mask(k, A, B) \ _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE) #define _mm_cmpgt_epi64_mask(A, B) \ _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_GT) #define _mm_mask_cmpgt_epi64_mask(k, A, B) \ _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT) #define _mm_cmple_epi64_mask(A, B) \ _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_LE) #define _mm_mask_cmple_epi64_mask(k, A, B) \ _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE) #define _mm_cmplt_epi64_mask(A, B) \ _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_LT) #define _mm_mask_cmplt_epi64_mask(k, A, B) \ _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT) #define _mm_cmpneq_epi64_mask(A, B) \ _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_NE) #define _mm_mask_cmpneq_epi64_mask(k, A, B) \ _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE) #define _mm256_cmpeq_epi64_mask(A, B) \ _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ) #define _mm256_mask_cmpeq_epi64_mask(k, A, B) \ _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ) #define _mm256_cmpge_epi64_mask(A, B) \ _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_GE) #define _mm256_mask_cmpge_epi64_mask(k, A, B) \ _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE) #define _mm256_cmpgt_epi64_mask(A, B) \ _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_GT) #define _mm256_mask_cmpgt_epi64_mask(k, A, B) \ _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT) #define _mm256_cmple_epi64_mask(A, B) \ _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_LE) #define _mm256_mask_cmple_epi64_mask(k, A, B) \ _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE) #define _mm256_cmplt_epi64_mask(A, B) \ _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_LT) #define _mm256_mask_cmplt_epi64_mask(k, A, B) \ _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT) #define _mm256_cmpneq_epi64_mask(A, B) \ _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_NE) #define _mm256_mask_cmpneq_epi64_mask(k, A, B) \ _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE) #define _mm_cmpeq_epu64_mask(A, B) \ _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ) #define _mm_mask_cmpeq_epu64_mask(k, A, B) \ _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ) #define _mm_cmpge_epu64_mask(A, B) \ _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_GE) #define _mm_mask_cmpge_epu64_mask(k, A, B) \ _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE) #define _mm_cmpgt_epu64_mask(A, B) \ _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_GT) #define _mm_mask_cmpgt_epu64_mask(k, A, B) \ _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT) #define _mm_cmple_epu64_mask(A, B) \ _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_LE) #define _mm_mask_cmple_epu64_mask(k, A, B) \ _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE) #define _mm_cmplt_epu64_mask(A, B) \ _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_LT) #define _mm_mask_cmplt_epu64_mask(k, A, B) \ _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT) #define _mm_cmpneq_epu64_mask(A, B) \ _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_NE) #define _mm_mask_cmpneq_epu64_mask(k, A, B) \ _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE) #define _mm256_cmpeq_epu64_mask(A, B) \ _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ) #define _mm256_mask_cmpeq_epu64_mask(k, A, B) \ _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ) #define _mm256_cmpge_epu64_mask(A, B) \ _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_GE) #define _mm256_mask_cmpge_epu64_mask(k, A, B) \ _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE) #define _mm256_cmpgt_epu64_mask(A, B) \ _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_GT) #define _mm256_mask_cmpgt_epu64_mask(k, A, B) \ _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT) #define _mm256_cmple_epu64_mask(A, B) \ _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_LE) #define _mm256_mask_cmple_epu64_mask(k, A, B) \ _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE) #define _mm256_cmplt_epu64_mask(A, B) \ _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_LT) #define _mm256_mask_cmplt_epu64_mask(k, A, B) \ _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT) #define _mm256_cmpneq_epu64_mask(A, B) \ _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_NE) #define _mm256_mask_cmpneq_epu64_mask(k, A, B) \ _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE) static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_add_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_add_epi32(__A, __B), (__v8si)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_add_epi32(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_add_epi32(__A, __B), (__v8si)_mm256_setzero_si256()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_add_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_add_epi64(__A, __B), (__v4di)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_add_epi64(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_add_epi64(__A, __B), (__v4di)_mm256_setzero_si256()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sub_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_sub_epi32(__A, __B), (__v8si)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sub_epi32(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_sub_epi32(__A, __B), (__v8si)_mm256_setzero_si256()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sub_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_sub_epi64(__A, __B), (__v4di)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sub_epi64(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_sub_epi64(__A, __B), (__v4di)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_add_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_add_epi32(__A, __B), (__v4si)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_add_epi32(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_add_epi32(__A, __B), (__v4si)_mm_setzero_si128()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_add_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_add_epi64(__A, __B), (__v2di)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_add_epi64(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_add_epi64(__A, __B), (__v2di)_mm_setzero_si128()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sub_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_sub_epi32(__A, __B), (__v4si)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sub_epi32(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_sub_epi32(__A, __B), (__v4si)_mm_setzero_si128()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sub_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_sub_epi64(__A, __B), (__v2di)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sub_epi64(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_sub_epi64(__A, __B), (__v2di)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mul_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, (__v4di)_mm256_mul_epi32(__X, __Y), (__v4di)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mul_epi32(__mmask8 __M, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, (__v4di)_mm256_mul_epi32(__X, __Y), (__v4di)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mul_epi32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, (__v2di)_mm_mul_epi32(__X, __Y), (__v2di)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mul_epi32(__mmask8 __M, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, (__v2di)_mm_mul_epi32(__X, __Y), (__v2di)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mul_epu32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, (__v4di)_mm256_mul_epu32(__X, __Y), (__v4di)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mul_epu32(__mmask8 __M, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, (__v4di)_mm256_mul_epu32(__X, __Y), (__v4di)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mul_epu32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, (__v2di)_mm_mul_epu32(__X, __Y), (__v2di)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mul_epu32(__mmask8 __M, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, (__v2di)_mm_mul_epu32(__X, __Y), (__v2di)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mullo_epi32(__mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, (__v8si)_mm256_mullo_epi32(__A, __B), (__v8si)_mm256_setzero_si256()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mullo_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, (__v8si)_mm256_mullo_epi32(__A, __B), (__v8si)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mullo_epi32(__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, (__v4si)_mm_mullo_epi32(__A, __B), (__v4si)_mm_setzero_si128()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mullo_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, (__v4si)_mm_mullo_epi32(__A, __B), (__v4si)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_and_epi32(__m256i __a, __m256i __b) { return (__m256i)((__v8su)__a & (__v8su)__b); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_and_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_and_epi32(__A, __B), (__v8si)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_and_epi32(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)_mm256_mask_and_epi32(_mm256_setzero_si256(), __U, __A, __B); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_and_epi32(__m128i __a, __m128i __b) { return (__m128i)((__v4su)__a & (__v4su)__b); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_and_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_and_epi32(__A, __B), (__v4si)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_and_epi32(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)_mm_mask_and_epi32(_mm_setzero_si128(), __U, __A, __B); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_andnot_epi32(__m256i __A, __m256i __B) { return (__m256i)(~(__v8su)__A & (__v8su)__B); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_andnot_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_andnot_epi32(__A, __B), (__v8si)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_andnot_epi32(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)_mm256_mask_andnot_epi32(_mm256_setzero_si256(), __U, __A, __B); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_andnot_epi32(__m128i __A, __m128i __B) { return (__m128i)(~(__v4su)__A & (__v4su)__B); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_andnot_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_andnot_epi32(__A, __B), (__v4si)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_andnot_epi32(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)_mm_mask_andnot_epi32(_mm_setzero_si128(), __U, __A, __B); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_or_epi32(__m256i __a, __m256i __b) { return (__m256i)((__v8su)__a | (__v8su)__b); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_or_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_or_epi32(__A, __B), (__v8si)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_or_epi32(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)_mm256_mask_or_epi32(_mm256_setzero_si256(), __U, __A, __B); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_or_epi32(__m128i __a, __m128i __b) { return (__m128i)((__v4su)__a | (__v4su)__b); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_or_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_or_epi32(__A, __B), (__v4si)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_or_epi32(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)_mm_mask_or_epi32(_mm_setzero_si128(), __U, __A, __B); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_xor_epi32(__m256i __a, __m256i __b) { return (__m256i)((__v8su)__a ^ (__v8su)__b); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_xor_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_xor_epi32(__A, __B), (__v8si)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_xor_epi32(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)_mm256_mask_xor_epi32(_mm256_setzero_si256(), __U, __A, __B); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_xor_epi32(__m128i __a, __m128i __b) { return (__m128i)((__v4su)__a ^ (__v4su)__b); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_xor_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_xor_epi32(__A, __B), (__v4si)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_xor_epi32(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)_mm_mask_xor_epi32(_mm_setzero_si128(), __U, __A, __B); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_and_epi64(__m256i __a, __m256i __b) { return (__m256i)((__v4du)__a & (__v4du)__b); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_and_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_and_epi64(__A, __B), (__v4di)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_and_epi64(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)_mm256_mask_and_epi64(_mm256_setzero_si256(), __U, __A, __B); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_and_epi64(__m128i __a, __m128i __b) { return (__m128i)((__v2du)__a & (__v2du)__b); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_and_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_and_epi64(__A, __B), (__v2di)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_and_epi64(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)_mm_mask_and_epi64(_mm_setzero_si128(), __U, __A, __B); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_andnot_epi64(__m256i __A, __m256i __B) { return (__m256i)(~(__v4du)__A & (__v4du)__B); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_andnot_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_andnot_epi64(__A, __B), (__v4di)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_andnot_epi64(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)_mm256_mask_andnot_epi64(_mm256_setzero_si256(), __U, __A, __B); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_andnot_epi64(__m128i __A, __m128i __B) { return (__m128i)(~(__v2du)__A & (__v2du)__B); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_andnot_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_andnot_epi64(__A, __B), (__v2di)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_andnot_epi64(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)_mm_mask_andnot_epi64(_mm_setzero_si128(), __U, __A, __B); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_or_epi64(__m256i __a, __m256i __b) { return (__m256i)((__v4du)__a | (__v4du)__b); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_or_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_or_epi64(__A, __B), (__v4di)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_or_epi64(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)_mm256_mask_or_epi64(_mm256_setzero_si256(), __U, __A, __B); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_or_epi64(__m128i __a, __m128i __b) { return (__m128i)((__v2du)__a | (__v2du)__b); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_or_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_or_epi64(__A, __B), (__v2di)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_or_epi64(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)_mm_mask_or_epi64(_mm_setzero_si128(), __U, __A, __B); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_xor_epi64(__m256i __a, __m256i __b) { return (__m256i)((__v4du)__a ^ (__v4du)__b); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_xor_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_xor_epi64(__A, __B), (__v4di)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_xor_epi64(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)_mm256_mask_xor_epi64(_mm256_setzero_si256(), __U, __A, __B); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_xor_epi64(__m128i __a, __m128i __b) { return (__m128i)((__v2du)__a ^ (__v2du)__b); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_xor_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_xor_epi64(__A, __B), (__v2di)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_xor_epi64(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)_mm_mask_xor_epi64(_mm_setzero_si128(), __U, __A, __B); } #define _mm_cmp_epi32_mask(a, b, p) \ ((__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \ (__v4si)(__m128i)(b), (int)(p), \ (__mmask8)-1)) #define _mm_mask_cmp_epi32_mask(m, a, b, p) \ ((__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \ (__v4si)(__m128i)(b), (int)(p), \ (__mmask8)(m))) #define _mm_cmp_epu32_mask(a, b, p) \ ((__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \ (__v4si)(__m128i)(b), (int)(p), \ (__mmask8)-1)) #define _mm_mask_cmp_epu32_mask(m, a, b, p) \ ((__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \ (__v4si)(__m128i)(b), (int)(p), \ (__mmask8)(m))) #define _mm256_cmp_epi32_mask(a, b, p) \ ((__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \ (__v8si)(__m256i)(b), (int)(p), \ (__mmask8)-1)) #define _mm256_mask_cmp_epi32_mask(m, a, b, p) \ ((__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \ (__v8si)(__m256i)(b), (int)(p), \ (__mmask8)(m))) #define _mm256_cmp_epu32_mask(a, b, p) \ ((__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \ (__v8si)(__m256i)(b), (int)(p), \ (__mmask8)-1)) #define _mm256_mask_cmp_epu32_mask(m, a, b, p) \ ((__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \ (__v8si)(__m256i)(b), (int)(p), \ (__mmask8)(m))) #define _mm_cmp_epi64_mask(a, b, p) \ ((__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \ (__v2di)(__m128i)(b), (int)(p), \ (__mmask8)-1)) #define _mm_mask_cmp_epi64_mask(m, a, b, p) \ ((__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \ (__v2di)(__m128i)(b), (int)(p), \ (__mmask8)(m))) #define _mm_cmp_epu64_mask(a, b, p) \ ((__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \ (__v2di)(__m128i)(b), (int)(p), \ (__mmask8)-1)) #define _mm_mask_cmp_epu64_mask(m, a, b, p) \ ((__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \ (__v2di)(__m128i)(b), (int)(p), \ (__mmask8)(m))) #define _mm256_cmp_epi64_mask(a, b, p) \ ((__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \ (__v4di)(__m256i)(b), (int)(p), \ (__mmask8)-1)) #define _mm256_mask_cmp_epi64_mask(m, a, b, p) \ ((__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \ (__v4di)(__m256i)(b), (int)(p), \ (__mmask8)(m))) #define _mm256_cmp_epu64_mask(a, b, p) \ ((__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \ (__v4di)(__m256i)(b), (int)(p), \ (__mmask8)-1)) #define _mm256_mask_cmp_epu64_mask(m, a, b, p) \ ((__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \ (__v4di)(__m256i)(b), (int)(p), \ (__mmask8)(m))) #define _mm256_cmp_ps_mask(a, b, p) \ ((__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \ (__v8sf)(__m256)(b), (int)(p), \ (__mmask8)-1)) #define _mm256_mask_cmp_ps_mask(m, a, b, p) \ ((__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \ (__v8sf)(__m256)(b), (int)(p), \ (__mmask8)(m))) #define _mm256_cmp_pd_mask(a, b, p) \ ((__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \ (__v4df)(__m256d)(b), (int)(p), \ (__mmask8)-1)) #define _mm256_mask_cmp_pd_mask(m, a, b, p) \ ((__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \ (__v4df)(__m256d)(b), (int)(p), \ (__mmask8)(m))) #define _mm_cmp_ps_mask(a, b, p) \ ((__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \ (__v4sf)(__m128)(b), (int)(p), \ (__mmask8)-1)) #define _mm_mask_cmp_ps_mask(m, a, b, p) \ ((__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \ (__v4sf)(__m128)(b), (int)(p), \ (__mmask8)(m))) #define _mm_cmp_pd_mask(a, b, p) \ ((__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \ (__v2df)(__m128d)(b), (int)(p), \ (__mmask8)-1)) #define _mm_mask_cmp_pd_mask(m, a, b, p) \ ((__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \ (__v2df)(__m128d)(b), (int)(p), \ (__mmask8)(m))) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) { return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, __builtin_ia32_vfmaddpd ((__v2df) __A, (__v2df) __B, (__v2df) __C), (__v2df) __A); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) { return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, __builtin_ia32_vfmaddpd ((__v2df) __A, (__v2df) __B, (__v2df) __C), (__v2df) __C); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) { return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, __builtin_ia32_vfmaddpd ((__v2df) __A, (__v2df) __B, (__v2df) __C), (__v2df)_mm_setzero_pd()); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) { return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, __builtin_ia32_vfmaddpd ((__v2df) __A, (__v2df) __B, -(__v2df) __C), (__v2df) __A); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) { return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, __builtin_ia32_vfmaddpd ((__v2df) __A, (__v2df) __B, -(__v2df) __C), (__v2df)_mm_setzero_pd()); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) { return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, __builtin_ia32_vfmaddpd (-(__v2df) __A, (__v2df) __B, (__v2df) __C), (__v2df) __C); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) { return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, __builtin_ia32_vfmaddpd (-(__v2df) __A, (__v2df) __B, (__v2df) __C), (__v2df)_mm_setzero_pd()); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) { return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, __builtin_ia32_vfmaddpd (-(__v2df) __A, (__v2df) __B, -(__v2df) __C), (__v2df)_mm_setzero_pd()); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_fmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) { return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, __builtin_ia32_vfmaddpd256 ((__v4df) __A, (__v4df) __B, (__v4df) __C), (__v4df) __A); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask3_fmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) { return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, __builtin_ia32_vfmaddpd256 ((__v4df) __A, (__v4df) __B, (__v4df) __C), (__v4df) __C); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_fmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) { return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, __builtin_ia32_vfmaddpd256 ((__v4df) __A, (__v4df) __B, (__v4df) __C), (__v4df)_mm256_setzero_pd()); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_fmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) { return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, __builtin_ia32_vfmaddpd256 ((__v4df) __A, (__v4df) __B, -(__v4df) __C), (__v4df) __A); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_fmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) { return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, __builtin_ia32_vfmaddpd256 ((__v4df) __A, (__v4df) __B, -(__v4df) __C), (__v4df)_mm256_setzero_pd()); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask3_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) { return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, __builtin_ia32_vfmaddpd256 (-(__v4df) __A, (__v4df) __B, (__v4df) __C), (__v4df) __C); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) { return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, __builtin_ia32_vfmaddpd256 (-(__v4df) __A, (__v4df) __B, (__v4df) __C), (__v4df)_mm256_setzero_pd()); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) { return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, __builtin_ia32_vfmaddpd256 (-(__v4df) __A, (__v4df) __B, -(__v4df) __C), (__v4df)_mm256_setzero_pd()); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) { return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, __builtin_ia32_vfmaddps ((__v4sf) __A, (__v4sf) __B, (__v4sf) __C), (__v4sf) __A); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) { return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, __builtin_ia32_vfmaddps ((__v4sf) __A, (__v4sf) __B, (__v4sf) __C), (__v4sf) __C); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) { return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, __builtin_ia32_vfmaddps ((__v4sf) __A, (__v4sf) __B, (__v4sf) __C), (__v4sf)_mm_setzero_ps()); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) { return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, __builtin_ia32_vfmaddps ((__v4sf) __A, (__v4sf) __B, -(__v4sf) __C), (__v4sf) __A); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) { return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, __builtin_ia32_vfmaddps ((__v4sf) __A, (__v4sf) __B, -(__v4sf) __C), (__v4sf)_mm_setzero_ps()); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) { return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, __builtin_ia32_vfmaddps (-(__v4sf) __A, (__v4sf) __B, (__v4sf) __C), (__v4sf) __C); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) { return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, __builtin_ia32_vfmaddps (-(__v4sf) __A, (__v4sf) __B, (__v4sf) __C), (__v4sf)_mm_setzero_ps()); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) { return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, __builtin_ia32_vfmaddps (-(__v4sf) __A, (__v4sf) __B, -(__v4sf) __C), (__v4sf)_mm_setzero_ps()); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_fmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) { return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, __builtin_ia32_vfmaddps256 ((__v8sf) __A, (__v8sf) __B, (__v8sf) __C), (__v8sf) __A); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask3_fmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) { return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, __builtin_ia32_vfmaddps256 ((__v8sf) __A, (__v8sf) __B, (__v8sf) __C), (__v8sf) __C); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_fmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) { return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, __builtin_ia32_vfmaddps256 ((__v8sf) __A, (__v8sf) __B, (__v8sf) __C), (__v8sf)_mm256_setzero_ps()); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_fmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) { return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, __builtin_ia32_vfmaddps256 ((__v8sf) __A, (__v8sf) __B, -(__v8sf) __C), (__v8sf) __A); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_fmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) { return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, __builtin_ia32_vfmaddps256 ((__v8sf) __A, (__v8sf) __B, -(__v8sf) __C), (__v8sf)_mm256_setzero_ps()); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask3_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) { return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, __builtin_ia32_vfmaddps256 (-(__v8sf) __A, (__v8sf) __B, (__v8sf) __C), (__v8sf) __C); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) { return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, __builtin_ia32_vfmaddps256 (-(__v8sf) __A, (__v8sf) __B, (__v8sf) __C), (__v8sf)_mm256_setzero_ps()); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) { return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, __builtin_ia32_vfmaddps256 (-(__v8sf) __A, (__v8sf) __B, -(__v8sf) __C), (__v8sf)_mm256_setzero_ps()); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmaddsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) { return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, __builtin_ia32_vfmaddsubpd ((__v2df) __A, (__v2df) __B, (__v2df) __C), (__v2df) __A); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) { return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, __builtin_ia32_vfmaddsubpd ((__v2df) __A, (__v2df) __B, (__v2df) __C), (__v2df) __C); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmaddsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) { return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, __builtin_ia32_vfmaddsubpd ((__v2df) __A, (__v2df) __B, (__v2df) __C), (__v2df)_mm_setzero_pd()); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmsubadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) { return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, __builtin_ia32_vfmaddsubpd ((__v2df) __A, (__v2df) __B, -(__v2df) __C), (__v2df) __A); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmsubadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) { return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, __builtin_ia32_vfmaddsubpd ((__v2df) __A, (__v2df) __B, -(__v2df) __C), (__v2df)_mm_setzero_pd()); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_fmaddsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) { return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, __builtin_ia32_vfmaddsubpd256 ((__v4df) __A, (__v4df) __B, (__v4df) __C), (__v4df) __A); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask3_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) { return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, __builtin_ia32_vfmaddsubpd256 ((__v4df) __A, (__v4df) __B, (__v4df) __C), (__v4df) __C); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_fmaddsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) { return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, __builtin_ia32_vfmaddsubpd256 ((__v4df) __A, (__v4df) __B, (__v4df) __C), (__v4df)_mm256_setzero_pd()); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_fmsubadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) { return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, __builtin_ia32_vfmaddsubpd256 ((__v4df) __A, (__v4df) __B, -(__v4df) __C), (__v4df) __A); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_fmsubadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) { return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, __builtin_ia32_vfmaddsubpd256 ((__v4df) __A, (__v4df) __B, -(__v4df) __C), (__v4df)_mm256_setzero_pd()); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmaddsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) { return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, __builtin_ia32_vfmaddsubps ((__v4sf) __A, (__v4sf) __B, (__v4sf) __C), (__v4sf) __A); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) { return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, __builtin_ia32_vfmaddsubps ((__v4sf) __A, (__v4sf) __B, (__v4sf) __C), (__v4sf) __C); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmaddsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) { return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, __builtin_ia32_vfmaddsubps ((__v4sf) __A, (__v4sf) __B, (__v4sf) __C), (__v4sf)_mm_setzero_ps()); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmsubadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) { return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, __builtin_ia32_vfmaddsubps ((__v4sf) __A, (__v4sf) __B, -(__v4sf) __C), (__v4sf) __A); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmsubadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) { return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, __builtin_ia32_vfmaddsubps ((__v4sf) __A, (__v4sf) __B, -(__v4sf) __C), (__v4sf)_mm_setzero_ps()); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_fmaddsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) { return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, __builtin_ia32_vfmaddsubps256 ((__v8sf) __A, (__v8sf) __B, (__v8sf) __C), (__v8sf) __A); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask3_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) { return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, __builtin_ia32_vfmaddsubps256 ((__v8sf) __A, (__v8sf) __B, (__v8sf) __C), (__v8sf) __C); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_fmaddsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) { return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, __builtin_ia32_vfmaddsubps256 ((__v8sf) __A, (__v8sf) __B, (__v8sf) __C), (__v8sf)_mm256_setzero_ps()); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_fmsubadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) { return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, __builtin_ia32_vfmaddsubps256 ((__v8sf) __A, (__v8sf) __B, -(__v8sf) __C), (__v8sf) __A); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_fmsubadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) { return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, __builtin_ia32_vfmaddsubps256 ((__v8sf) __A, (__v8sf) __B, -(__v8sf) __C), (__v8sf)_mm256_setzero_ps()); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) { return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, __builtin_ia32_vfmaddpd ((__v2df) __A, (__v2df) __B, -(__v2df) __C), (__v2df) __C); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask3_fmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) { return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, __builtin_ia32_vfmaddpd256 ((__v4df) __A, (__v4df) __B, -(__v4df) __C), (__v4df) __C); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) { return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, __builtin_ia32_vfmaddps ((__v4sf) __A, (__v4sf) __B, -(__v4sf) __C), (__v4sf) __C); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask3_fmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) { return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, __builtin_ia32_vfmaddps256 ((__v8sf) __A, (__v8sf) __B, -(__v8sf) __C), (__v8sf) __C); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) { return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, __builtin_ia32_vfmaddsubpd ((__v2df) __A, (__v2df) __B, -(__v2df) __C), (__v2df) __C); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask3_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) { return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, __builtin_ia32_vfmaddsubpd256 ((__v4df) __A, (__v4df) __B, -(__v4df) __C), (__v4df) __C); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) { return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, __builtin_ia32_vfmaddsubps ((__v4sf) __A, (__v4sf) __B, -(__v4sf) __C), (__v4sf) __C); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask3_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) { return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, __builtin_ia32_vfmaddsubps256 ((__v8sf) __A, (__v8sf) __B, -(__v8sf) __C), (__v8sf) __C); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) { return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, __builtin_ia32_vfmaddpd ((__v2df) __A, -(__v2df) __B, (__v2df) __C), (__v2df) __A); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_fnmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) { return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, __builtin_ia32_vfmaddpd256 ((__v4df) __A, -(__v4df) __B, (__v4df) __C), (__v4df) __A); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) { return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, __builtin_ia32_vfmaddps ((__v4sf) __A, -(__v4sf) __B, (__v4sf) __C), (__v4sf) __A); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_fnmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) { return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, __builtin_ia32_vfmaddps256 ((__v8sf) __A, -(__v8sf) __B, (__v8sf) __C), (__v8sf) __A); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) { return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, __builtin_ia32_vfmaddpd ((__v2df) __A, -(__v2df) __B, -(__v2df) __C), (__v2df) __A); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) { return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, __builtin_ia32_vfmaddpd ((__v2df) __A, -(__v2df) __B, -(__v2df) __C), (__v2df) __C); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_fnmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) { return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, __builtin_ia32_vfmaddpd256 ((__v4df) __A, -(__v4df) __B, -(__v4df) __C), (__v4df) __A); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask3_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) { return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, __builtin_ia32_vfmaddpd256 ((__v4df) __A, -(__v4df) __B, -(__v4df) __C), (__v4df) __C); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) { return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, __builtin_ia32_vfmaddps ((__v4sf) __A, -(__v4sf) __B, -(__v4sf) __C), (__v4sf) __A); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) { return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, __builtin_ia32_vfmaddps ((__v4sf) __A, -(__v4sf) __B, -(__v4sf) __C), (__v4sf) __C); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_fnmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) { return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, __builtin_ia32_vfmaddps256 ((__v8sf) __A, -(__v8sf) __B, -(__v8sf) __C), (__v8sf) __A); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask3_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) { return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, __builtin_ia32_vfmaddps256 ((__v8sf) __A, -(__v8sf) __B, -(__v8sf) __C), (__v8sf) __C); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_add_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_add_pd(__A, __B), (__v2df)__W); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_add_pd(__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_add_pd(__A, __B), (__v2df)_mm_setzero_pd()); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_add_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_add_pd(__A, __B), (__v4df)__W); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_add_pd(__mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_add_pd(__A, __B), (__v4df)_mm256_setzero_pd()); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_add_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_add_ps(__A, __B), (__v4sf)__W); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_add_ps(__mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_add_ps(__A, __B), (__v4sf)_mm_setzero_ps()); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_add_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_add_ps(__A, __B), (__v8sf)__W); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_add_ps(__mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_add_ps(__A, __B), (__v8sf)_mm256_setzero_ps()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W) { return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U, (__v4si) __W, (__v4si) __A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W) { return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U, (__v8si) __W, (__v8si) __A); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W) { return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U, (__v2df) __W, (__v2df) __A); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W) { return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U, (__v4df) __W, (__v4df) __A); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W) { return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U, (__v4sf) __W, (__v4sf) __A); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W) { return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U, (__v8sf) __W, (__v8sf) __A); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W) { return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U, (__v2di) __W, (__v2di) __A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W) { return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U, (__v4di) __W, (__v4di) __A); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_compress_pd (__m128d __W, __mmask8 __U, __m128d __A) { return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A, (__v2df) __W, (__mmask8) __U); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_compress_pd (__mmask8 __U, __m128d __A) { return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A, (__v2df) _mm_setzero_pd (), (__mmask8) __U); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_compress_pd (__m256d __W, __mmask8 __U, __m256d __A) { return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A, (__v4df) __W, (__mmask8) __U); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_compress_pd (__mmask8 __U, __m256d __A) { return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A, (__v4df) _mm256_setzero_pd (), (__mmask8) __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_compress_epi64 (__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A, (__v2di) __W, (__mmask8) __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_compress_epi64 (__mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A, (__v2di) _mm_setzero_si128 (), (__mmask8) __U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_compress_epi64 (__m256i __W, __mmask8 __U, __m256i __A) { return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A, (__v4di) __W, (__mmask8) __U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_compress_epi64 (__mmask8 __U, __m256i __A) { return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A, (__v4di) _mm256_setzero_si256 (), (__mmask8) __U); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_compress_ps (__m128 __W, __mmask8 __U, __m128 __A) { return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A, (__v4sf) __W, (__mmask8) __U); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_compress_ps (__mmask8 __U, __m128 __A) { return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A, (__v4sf) _mm_setzero_ps (), (__mmask8) __U); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_compress_ps (__m256 __W, __mmask8 __U, __m256 __A) { return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A, (__v8sf) __W, (__mmask8) __U); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_compress_ps (__mmask8 __U, __m256 __A) { return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A, (__v8sf) _mm256_setzero_ps (), (__mmask8) __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_compress_epi32 (__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A, (__v4si) __W, (__mmask8) __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_compress_epi32 (__mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A, (__v4si) _mm_setzero_si128 (), (__mmask8) __U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_compress_epi32 (__m256i __W, __mmask8 __U, __m256i __A) { return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A, (__v8si) __W, (__mmask8) __U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_compress_epi32 (__mmask8 __U, __m256i __A) { return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A, (__v8si) _mm256_setzero_si256 (), (__mmask8) __U); } static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m128d __A) { __builtin_ia32_compressstoredf128_mask ((__v2df *) __P, (__v2df) __A, (__mmask8) __U); } static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m256d __A) { __builtin_ia32_compressstoredf256_mask ((__v4df *) __P, (__v4df) __A, (__mmask8) __U); } static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m128i __A) { __builtin_ia32_compressstoredi128_mask ((__v2di *) __P, (__v2di) __A, (__mmask8) __U); } static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m256i __A) { __builtin_ia32_compressstoredi256_mask ((__v4di *) __P, (__v4di) __A, (__mmask8) __U); } static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m128 __A) { __builtin_ia32_compressstoresf128_mask ((__v4sf *) __P, (__v4sf) __A, (__mmask8) __U); } static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m256 __A) { __builtin_ia32_compressstoresf256_mask ((__v8sf *) __P, (__v8sf) __A, (__mmask8) __U); } static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m128i __A) { __builtin_ia32_compressstoresi128_mask ((__v4si *) __P, (__v4si) __A, (__mmask8) __U); } static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m256i __A) { __builtin_ia32_compressstoresi256_mask ((__v8si *) __P, (__v8si) __A, (__mmask8) __U); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi32_pd (__m128d __W, __mmask8 __U, __m128i __A) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U, (__v2df)_mm_cvtepi32_pd(__A), (__v2df)__W); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U, (__v2df)_mm_cvtepi32_pd(__A), (__v2df)_mm_setzero_pd()); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_pd (__m256d __W, __mmask8 __U, __m128i __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U, (__v4df)_mm256_cvtepi32_pd(__A), (__v4df)__W); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U, (__v4df)_mm256_cvtepi32_pd(__A), (__v4df)_mm256_setzero_pd()); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi32_ps (__m128 __W, __mmask8 __U, __m128i __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_cvtepi32_ps(__A), (__v4sf)__W); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi32_ps (__mmask8 __U, __m128i __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_cvtepi32_ps(__A), (__v4sf)_mm_setzero_ps()); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_ps (__m256 __W, __mmask8 __U, __m256i __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_cvtepi32_ps(__A), (__v8sf)__W); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi32_ps (__mmask8 __U, __m256i __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_cvtepi32_ps(__A), (__v8sf)_mm256_setzero_ps()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) { return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A, (__v4si) __W, (__mmask8) __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtpd_epi32 (__mmask8 __U, __m128d __A) { return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A, (__v4si) _mm_setzero_si128 (), (__mmask8) __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm256_cvtpd_epi32(__A), (__v4si)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtpd_epi32 (__mmask8 __U, __m256d __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm256_cvtpd_epi32(__A), (__v4si)_mm_setzero_si128()); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m128d __A) { return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A, (__v4sf) __W, (__mmask8) __U); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A) { return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A, (__v4sf) _mm_setzero_ps (), (__mmask8) __U); } static __inline__ __m128 __DEFAULT_FN_ATTRS256 _mm256_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m256d __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm256_cvtpd_ps(__A), (__v4sf)__W); } static __inline__ __m128 __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtpd_ps (__mmask8 __U, __m256d __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm256_cvtpd_ps(__A), (__v4sf)_mm_setzero_ps()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtpd_epu32 (__m128d __A) { return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A, (__v4si) _mm_setzero_si128 (), (__mmask8) -1); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) { return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A, (__v4si) __W, (__mmask8) __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtpd_epu32 (__mmask8 __U, __m128d __A) { return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A, (__v4si) _mm_setzero_si128 (), (__mmask8) __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtpd_epu32 (__m256d __A) { return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A, (__v4si) _mm_setzero_si128 (), (__mmask8) -1); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) { return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A, (__v4si) __W, (__mmask8) __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtpd_epu32 (__mmask8 __U, __m256d __A) { return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A, (__v4si) _mm_setzero_si128 (), (__mmask8) __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_cvtps_epi32(__A), (__v4si)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtps_epi32 (__mmask8 __U, __m128 __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_cvtps_epi32(__A), (__v4si)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_cvtps_epi32(__A), (__v8si)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtps_epi32 (__mmask8 __U, __m256 __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_cvtps_epi32(__A), (__v8si)_mm256_setzero_si256()); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtps_pd (__m128d __W, __mmask8 __U, __m128 __A) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_cvtps_pd(__A), (__v2df)__W); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtps_pd (__mmask8 __U, __m128 __A) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_cvtps_pd(__A), (__v2df)_mm_setzero_pd()); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_cvtps_pd (__m256d __W, __mmask8 __U, __m128 __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_cvtps_pd(__A), (__v4df)__W); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtps_pd (__mmask8 __U, __m128 __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_cvtps_pd(__A), (__v4df)_mm256_setzero_pd()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtps_epu32 (__m128 __A) { return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A, (__v4si) _mm_setzero_si128 (), (__mmask8) -1); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) { return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A, (__v4si) __W, (__mmask8) __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtps_epu32 (__mmask8 __U, __m128 __A) { return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A, (__v4si) _mm_setzero_si128 (), (__mmask8) __U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtps_epu32 (__m256 __A) { return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A, (__v8si) _mm256_setzero_si256 (), (__mmask8) -1); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) { return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A, (__v8si) __W, (__mmask8) __U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtps_epu32 (__mmask8 __U, __m256 __A) { return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A, (__v8si) _mm256_setzero_si256 (), (__mmask8) __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) { return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A, (__v4si) __W, (__mmask8) __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttpd_epi32 (__mmask8 __U, __m128d __A) { return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A, (__v4si) _mm_setzero_si128 (), (__mmask8) __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm256_cvttpd_epi32(__A), (__v4si)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttpd_epi32 (__mmask8 __U, __m256d __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm256_cvttpd_epi32(__A), (__v4si)_mm_setzero_si128()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttpd_epu32 (__m128d __A) { return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A, (__v4si) _mm_setzero_si128 (), (__mmask8) -1); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) { return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A, (__v4si) __W, (__mmask8) __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttpd_epu32 (__mmask8 __U, __m128d __A) { return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A, (__v4si) _mm_setzero_si128 (), (__mmask8) __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvttpd_epu32 (__m256d __A) { return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A, (__v4si) _mm_setzero_si128 (), (__mmask8) -1); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) { return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A, (__v4si) __W, (__mmask8) __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttpd_epu32 (__mmask8 __U, __m256d __A) { return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A, (__v4si) _mm_setzero_si128 (), (__mmask8) __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_cvttps_epi32(__A), (__v4si)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttps_epi32 (__mmask8 __U, __m128 __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_cvttps_epi32(__A), (__v4si)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_cvttps_epi32(__A), (__v8si)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttps_epi32 (__mmask8 __U, __m256 __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_cvttps_epi32(__A), (__v8si)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttps_epu32 (__m128 __A) { return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A, (__v4si) _mm_setzero_si128 (), (__mmask8) -1); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) { return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A, (__v4si) __W, (__mmask8) __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttps_epu32 (__mmask8 __U, __m128 __A) { return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A, (__v4si) _mm_setzero_si128 (), (__mmask8) __U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvttps_epu32 (__m256 __A) { return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A, (__v8si) _mm256_setzero_si256 (), (__mmask8) -1); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) { return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A, (__v8si) __W, (__mmask8) __U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttps_epu32 (__mmask8 __U, __m256 __A) { return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A, (__v8si) _mm256_setzero_si256 (), (__mmask8) __U); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtepu32_pd (__m128i __A) { return (__m128d) __builtin_convertvector( __builtin_shufflevector((__v4su)__A, (__v4su)__A, 0, 1), __v2df); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu32_pd (__m128d __W, __mmask8 __U, __m128i __A) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U, (__v2df)_mm_cvtepu32_pd(__A), (__v2df)__W); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U, (__v2df)_mm_cvtepu32_pd(__A), (__v2df)_mm_setzero_pd()); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_cvtepu32_pd (__m128i __A) { return (__m256d)__builtin_convertvector((__v4su)__A, __v4df); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu32_pd (__m256d __W, __mmask8 __U, __m128i __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U, (__v4df)_mm256_cvtepu32_pd(__A), (__v4df)__W); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U, (__v4df)_mm256_cvtepu32_pd(__A), (__v4df)_mm256_setzero_pd()); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtepu32_ps (__m128i __A) { return (__m128)__builtin_convertvector((__v4su)__A, __v4sf); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu32_ps (__m128 __W, __mmask8 __U, __m128i __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_cvtepu32_ps(__A), (__v4sf)__W); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu32_ps (__mmask8 __U, __m128i __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_cvtepu32_ps(__A), (__v4sf)_mm_setzero_ps()); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_cvtepu32_ps (__m256i __A) { return (__m256)__builtin_convertvector((__v8su)__A, __v8sf); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu32_ps (__m256 __W, __mmask8 __U, __m256i __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_cvtepu32_ps(__A), (__v8sf)__W); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu32_ps (__mmask8 __U, __m256i __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_cvtepu32_ps(__A), (__v8sf)_mm256_setzero_ps()); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_div_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_div_pd(__A, __B), (__v2df)__W); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_div_pd(__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_div_pd(__A, __B), (__v2df)_mm_setzero_pd()); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_div_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_div_pd(__A, __B), (__v4df)__W); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_div_pd(__mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_div_pd(__A, __B), (__v4df)_mm256_setzero_pd()); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_div_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_div_ps(__A, __B), (__v4sf)__W); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_div_ps(__mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_div_ps(__A, __B), (__v4sf)_mm_setzero_ps()); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_div_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_div_ps(__A, __B), (__v8sf)__W); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_div_ps(__mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_div_ps(__A, __B), (__v8sf)_mm256_setzero_ps()); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_expand_pd (__m128d __W, __mmask8 __U, __m128d __A) { return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A, (__v2df) __W, (__mmask8) __U); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_expand_pd (__mmask8 __U, __m128d __A) { return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A, (__v2df) _mm_setzero_pd (), (__mmask8) __U); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_expand_pd (__m256d __W, __mmask8 __U, __m256d __A) { return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A, (__v4df) __W, (__mmask8) __U); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_expand_pd (__mmask8 __U, __m256d __A) { return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A, (__v4df) _mm256_setzero_pd (), (__mmask8) __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_expand_epi64 (__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A, (__v2di) __W, (__mmask8) __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_expand_epi64 (__mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A, (__v2di) _mm_setzero_si128 (), (__mmask8) __U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_expand_epi64 (__m256i __W, __mmask8 __U, __m256i __A) { return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A, (__v4di) __W, (__mmask8) __U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_expand_epi64 (__mmask8 __U, __m256i __A) { return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A, (__v4di) _mm256_setzero_si256 (), (__mmask8) __U); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_expandloadu_pd (__m128d __W, __mmask8 __U, void const *__P) { return (__m128d) __builtin_ia32_expandloaddf128_mask ((const __v2df *) __P, (__v2df) __W, (__mmask8) __U); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_expandloadu_pd (__mmask8 __U, void const *__P) { return (__m128d) __builtin_ia32_expandloaddf128_mask ((const __v2df *) __P, (__v2df) _mm_setzero_pd (), (__mmask8) __U); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_expandloadu_pd (__m256d __W, __mmask8 __U, void const *__P) { return (__m256d) __builtin_ia32_expandloaddf256_mask ((const __v4df *) __P, (__v4df) __W, (__mmask8) __U); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_expandloadu_pd (__mmask8 __U, void const *__P) { return (__m256d) __builtin_ia32_expandloaddf256_mask ((const __v4df *) __P, (__v4df) _mm256_setzero_pd (), (__mmask8) __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_expandloadu_epi64 (__m128i __W, __mmask8 __U, void const *__P) { return (__m128i) __builtin_ia32_expandloaddi128_mask ((const __v2di *) __P, (__v2di) __W, (__mmask8) __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) { return (__m128i) __builtin_ia32_expandloaddi128_mask ((const __v2di *) __P, (__v2di) _mm_setzero_si128 (), (__mmask8) __U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_expandloadu_epi64 (__m256i __W, __mmask8 __U, void const *__P) { return (__m256i) __builtin_ia32_expandloaddi256_mask ((const __v4di *) __P, (__v4di) __W, (__mmask8) __U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) { return (__m256i) __builtin_ia32_expandloaddi256_mask ((const __v4di *) __P, (__v4di) _mm256_setzero_si256 (), (__mmask8) __U); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_expandloadu_ps (__m128 __W, __mmask8 __U, void const *__P) { return (__m128) __builtin_ia32_expandloadsf128_mask ((const __v4sf *) __P, (__v4sf) __W, (__mmask8) __U); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_expandloadu_ps (__mmask8 __U, void const *__P) { return (__m128) __builtin_ia32_expandloadsf128_mask ((const __v4sf *) __P, (__v4sf) _mm_setzero_ps (), (__mmask8) __U); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_expandloadu_ps (__m256 __W, __mmask8 __U, void const *__P) { return (__m256) __builtin_ia32_expandloadsf256_mask ((const __v8sf *) __P, (__v8sf) __W, (__mmask8) __U); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_expandloadu_ps (__mmask8 __U, void const *__P) { return (__m256) __builtin_ia32_expandloadsf256_mask ((const __v8sf *) __P, (__v8sf) _mm256_setzero_ps (), (__mmask8) __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_expandloadu_epi32 (__m128i __W, __mmask8 __U, void const *__P) { return (__m128i) __builtin_ia32_expandloadsi128_mask ((const __v4si *) __P, (__v4si) __W, (__mmask8) __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) { return (__m128i) __builtin_ia32_expandloadsi128_mask ((const __v4si *) __P, (__v4si) _mm_setzero_si128 (), (__mmask8) __U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_expandloadu_epi32 (__m256i __W, __mmask8 __U, void const *__P) { return (__m256i) __builtin_ia32_expandloadsi256_mask ((const __v8si *) __P, (__v8si) __W, (__mmask8) __U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) { return (__m256i) __builtin_ia32_expandloadsi256_mask ((const __v8si *) __P, (__v8si) _mm256_setzero_si256 (), (__mmask8) __U); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_expand_ps (__m128 __W, __mmask8 __U, __m128 __A) { return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A, (__v4sf) __W, (__mmask8) __U); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_expand_ps (__mmask8 __U, __m128 __A) { return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A, (__v4sf) _mm_setzero_ps (), (__mmask8) __U); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_expand_ps (__m256 __W, __mmask8 __U, __m256 __A) { return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A, (__v8sf) __W, (__mmask8) __U); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_expand_ps (__mmask8 __U, __m256 __A) { return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A, (__v8sf) _mm256_setzero_ps (), (__mmask8) __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_expand_epi32 (__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A, (__v4si) __W, (__mmask8) __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_expand_epi32 (__mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A, (__v4si) _mm_setzero_si128 (), (__mmask8) __U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_expand_epi32 (__m256i __W, __mmask8 __U, __m256i __A) { return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A, (__v8si) __W, (__mmask8) __U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_expand_epi32 (__mmask8 __U, __m256i __A) { return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A, (__v8si) _mm256_setzero_si256 (), (__mmask8) __U); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_getexp_pd (__m128d __A) { return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A, (__v2df) _mm_setzero_pd (), (__mmask8) -1); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_getexp_pd (__m128d __W, __mmask8 __U, __m128d __A) { return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A, (__v2df) __W, (__mmask8) __U); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_pd (__mmask8 __U, __m128d __A) { return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A, (__v2df) _mm_setzero_pd (), (__mmask8) __U); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_getexp_pd (__m256d __A) { return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A, (__v4df) _mm256_setzero_pd (), (__mmask8) -1); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_getexp_pd (__m256d __W, __mmask8 __U, __m256d __A) { return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A, (__v4df) __W, (__mmask8) __U); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_getexp_pd (__mmask8 __U, __m256d __A) { return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A, (__v4df) _mm256_setzero_pd (), (__mmask8) __U); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_getexp_ps (__m128 __A) { return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A, (__v4sf) _mm_setzero_ps (), (__mmask8) -1); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_getexp_ps (__m128 __W, __mmask8 __U, __m128 __A) { return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A, (__v4sf) __W, (__mmask8) __U); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_ps (__mmask8 __U, __m128 __A) { return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A, (__v4sf) _mm_setzero_ps (), (__mmask8) __U); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_getexp_ps (__m256 __A) { return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A, (__v8sf) _mm256_setzero_ps (), (__mmask8) -1); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_getexp_ps (__m256 __W, __mmask8 __U, __m256 __A) { return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A, (__v8sf) __W, (__mmask8) __U); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_getexp_ps (__mmask8 __U, __m256 __A) { return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A, (__v8sf) _mm256_setzero_ps (), (__mmask8) __U); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_max_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_max_pd(__A, __B), (__v2df)__W); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_max_pd(__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_max_pd(__A, __B), (__v2df)_mm_setzero_pd()); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_max_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_max_pd(__A, __B), (__v4df)__W); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_max_pd(__mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_max_pd(__A, __B), (__v4df)_mm256_setzero_pd()); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_max_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_max_ps(__A, __B), (__v4sf)__W); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_max_ps(__mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_max_ps(__A, __B), (__v4sf)_mm_setzero_ps()); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_max_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_max_ps(__A, __B), (__v8sf)__W); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_max_ps(__mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_max_ps(__A, __B), (__v8sf)_mm256_setzero_ps()); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_min_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_min_pd(__A, __B), (__v2df)__W); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_min_pd(__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_min_pd(__A, __B), (__v2df)_mm_setzero_pd()); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_min_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_min_pd(__A, __B), (__v4df)__W); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_min_pd(__mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_min_pd(__A, __B), (__v4df)_mm256_setzero_pd()); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_min_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_min_ps(__A, __B), (__v4sf)__W); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_min_ps(__mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_min_ps(__A, __B), (__v4sf)_mm_setzero_ps()); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_min_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_min_ps(__A, __B), (__v8sf)__W); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_min_ps(__mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_min_ps(__A, __B), (__v8sf)_mm256_setzero_ps()); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_mul_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_mul_pd(__A, __B), (__v2df)__W); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_mul_pd(__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_mul_pd(__A, __B), (__v2df)_mm_setzero_pd()); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_mul_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_mul_pd(__A, __B), (__v4df)__W); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_mul_pd(__mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_mul_pd(__A, __B), (__v4df)_mm256_setzero_pd()); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_mul_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_mul_ps(__A, __B), (__v4sf)__W); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_mul_ps(__mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_mul_ps(__A, __B), (__v4sf)_mm_setzero_ps()); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_mul_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_mul_ps(__A, __B), (__v8sf)__W); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_mul_ps(__mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_mul_ps(__A, __B), (__v8sf)_mm256_setzero_ps()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_abs_epi32(__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_abs_epi32(__A), (__v4si)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_abs_epi32(__mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_abs_epi32(__A), (__v4si)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_abs_epi32(__m256i __W, __mmask8 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_abs_epi32(__A), (__v8si)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_abs_epi32(__mmask8 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_abs_epi32(__A), (__v8si)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_abs_epi64 (__m128i __A) { return (__m128i)__builtin_elementwise_abs((__v2di)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_abs_epi64 (__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_abs_epi64(__A), (__v2di)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_abs_epi64 (__mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_abs_epi64(__A), (__v2di)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_abs_epi64 (__m256i __A) { return (__m256i)__builtin_elementwise_abs((__v4di)__A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_abs_epi64 (__m256i __W, __mmask8 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_abs_epi64(__A), (__v4di)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_abs_epi64 (__mmask8 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_abs_epi64(__A), (__v4di)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_max_epi32(__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, (__v4si)_mm_max_epi32(__A, __B), (__v4si)_mm_setzero_si128()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_max_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, (__v4si)_mm_max_epi32(__A, __B), (__v4si)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_max_epi32(__mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, (__v8si)_mm256_max_epi32(__A, __B), (__v8si)_mm256_setzero_si256()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_max_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, (__v8si)_mm256_max_epi32(__A, __B), (__v8si)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_max_epi64 (__m128i __A, __m128i __B) { return (__m128i)__builtin_elementwise_max((__v2di)__A, (__v2di)__B); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_max_epi64 (__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, (__v2di)_mm_max_epi64(__A, __B), (__v2di)_mm_setzero_si128()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_max_epi64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, (__v2di)_mm_max_epi64(__A, __B), (__v2di)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epi64 (__m256i __A, __m256i __B) { return (__m256i)__builtin_elementwise_max((__v4di)__A, (__v4di)__B); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_max_epi64 (__mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, (__v4di)_mm256_max_epi64(__A, __B), (__v4di)_mm256_setzero_si256()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_max_epi64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, (__v4di)_mm256_max_epi64(__A, __B), (__v4di)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_max_epu32(__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, (__v4si)_mm_max_epu32(__A, __B), (__v4si)_mm_setzero_si128()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_max_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, (__v4si)_mm_max_epu32(__A, __B), (__v4si)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_max_epu32(__mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, (__v8si)_mm256_max_epu32(__A, __B), (__v8si)_mm256_setzero_si256()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_max_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, (__v8si)_mm256_max_epu32(__A, __B), (__v8si)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_max_epu64 (__m128i __A, __m128i __B) { return (__m128i)__builtin_elementwise_max((__v2du)__A, (__v2du)__B); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_max_epu64 (__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, (__v2di)_mm_max_epu64(__A, __B), (__v2di)_mm_setzero_si128()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_max_epu64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, (__v2di)_mm_max_epu64(__A, __B), (__v2di)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epu64 (__m256i __A, __m256i __B) { return (__m256i)__builtin_elementwise_max((__v4du)__A, (__v4du)__B); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_max_epu64 (__mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, (__v4di)_mm256_max_epu64(__A, __B), (__v4di)_mm256_setzero_si256()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_max_epu64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, (__v4di)_mm256_max_epu64(__A, __B), (__v4di)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_min_epi32(__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, (__v4si)_mm_min_epi32(__A, __B), (__v4si)_mm_setzero_si128()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_min_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, (__v4si)_mm_min_epi32(__A, __B), (__v4si)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_min_epi32(__mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, (__v8si)_mm256_min_epi32(__A, __B), (__v8si)_mm256_setzero_si256()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_min_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, (__v8si)_mm256_min_epi32(__A, __B), (__v8si)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_min_epi64 (__m128i __A, __m128i __B) { return (__m128i)__builtin_elementwise_min((__v2di)__A, (__v2di)__B); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_min_epi64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, (__v2di)_mm_min_epi64(__A, __B), (__v2di)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_min_epi64 (__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, (__v2di)_mm_min_epi64(__A, __B), (__v2di)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epi64 (__m256i __A, __m256i __B) { return (__m256i)__builtin_elementwise_min((__v4di)__A, (__v4di)__B); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_min_epi64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, (__v4di)_mm256_min_epi64(__A, __B), (__v4di)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_min_epi64 (__mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, (__v4di)_mm256_min_epi64(__A, __B), (__v4di)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_min_epu32(__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, (__v4si)_mm_min_epu32(__A, __B), (__v4si)_mm_setzero_si128()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_min_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, (__v4si)_mm_min_epu32(__A, __B), (__v4si)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_min_epu32(__mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, (__v8si)_mm256_min_epu32(__A, __B), (__v8si)_mm256_setzero_si256()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_min_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, (__v8si)_mm256_min_epu32(__A, __B), (__v8si)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_min_epu64 (__m128i __A, __m128i __B) { return (__m128i)__builtin_elementwise_min((__v2du)__A, (__v2du)__B); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_min_epu64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, (__v2di)_mm_min_epu64(__A, __B), (__v2di)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_min_epu64 (__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, (__v2di)_mm_min_epu64(__A, __B), (__v2di)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epu64 (__m256i __A, __m256i __B) { return (__m256i)__builtin_elementwise_min((__v4du)__A, (__v4du)__B); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_min_epu64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, (__v4di)_mm256_min_epu64(__A, __B), (__v4di)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_min_epu64 (__mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, (__v4di)_mm256_min_epu64(__A, __B), (__v4di)_mm256_setzero_si256()); } #define _mm_roundscale_pd(A, imm) \ ((__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \ (int)(imm), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)-1)) #define _mm_mask_roundscale_pd(W, U, A, imm) \ ((__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \ (int)(imm), \ (__v2df)(__m128d)(W), \ (__mmask8)(U))) #define _mm_maskz_roundscale_pd(U, A, imm) \ ((__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \ (int)(imm), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)(U))) #define _mm256_roundscale_pd(A, imm) \ ((__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \ (int)(imm), \ (__v4df)_mm256_setzero_pd(), \ (__mmask8)-1)) #define _mm256_mask_roundscale_pd(W, U, A, imm) \ ((__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \ (int)(imm), \ (__v4df)(__m256d)(W), \ (__mmask8)(U))) #define _mm256_maskz_roundscale_pd(U, A, imm) \ ((__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \ (int)(imm), \ (__v4df)_mm256_setzero_pd(), \ (__mmask8)(U))) #define _mm_roundscale_ps(A, imm) \ ((__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)-1)) #define _mm_mask_roundscale_ps(W, U, A, imm) \ ((__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \ (__v4sf)(__m128)(W), \ (__mmask8)(U))) #define _mm_maskz_roundscale_ps(U, A, imm) \ ((__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U))) #define _mm256_roundscale_ps(A, imm) \ ((__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \ (__v8sf)_mm256_setzero_ps(), \ (__mmask8)-1)) #define _mm256_mask_roundscale_ps(W, U, A, imm) \ ((__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \ (__v8sf)(__m256)(W), \ (__mmask8)(U))) #define _mm256_maskz_roundscale_ps(U, A, imm) \ ((__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \ (__v8sf)_mm256_setzero_ps(), \ (__mmask8)(U))) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_scalef_pd (__m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A, (__v2df) __B, (__v2df) _mm_setzero_pd (), (__mmask8) -1); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_scalef_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A, (__v2df) __B, (__v2df) __W, (__mmask8) __U); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_pd (__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A, (__v2df) __B, (__v2df) _mm_setzero_pd (), (__mmask8) __U); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_scalef_pd (__m256d __A, __m256d __B) { return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A, (__v4df) __B, (__v4df) _mm256_setzero_pd (), (__mmask8) -1); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_scalef_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A, (__v4df) __B, (__v4df) __W, (__mmask8) __U); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_scalef_pd (__mmask8 __U, __m256d __A, __m256d __B) { return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A, (__v4df) __B, (__v4df) _mm256_setzero_pd (), (__mmask8) __U); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_scalef_ps (__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A, (__v4sf) __B, (__v4sf) _mm_setzero_ps (), (__mmask8) -1); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_scalef_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A, (__v4sf) __B, (__v4sf) __W, (__mmask8) __U); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_ps (__mmask8 __U, __m128 __A, __m128 __B) { return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A, (__v4sf) __B, (__v4sf) _mm_setzero_ps (), (__mmask8) __U); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_scalef_ps (__m256 __A, __m256 __B) { return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A, (__v8sf) __B, (__v8sf) _mm256_setzero_ps (), (__mmask8) -1); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_scalef_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A, (__v8sf) __B, (__v8sf) __W, (__mmask8) __U); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) { return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A, (__v8sf) __B, (__v8sf) _mm256_setzero_ps (), (__mmask8) __U); } #define _mm_i64scatter_pd(addr, index, v1, scale) \ __builtin_ia32_scatterdiv2df((void *)(addr), (__mmask8)-1, \ (__v2di)(__m128i)(index), \ (__v2df)(__m128d)(v1), (int)(scale)) #define _mm_mask_i64scatter_pd(addr, mask, index, v1, scale) \ __builtin_ia32_scatterdiv2df((void *)(addr), (__mmask8)(mask), \ (__v2di)(__m128i)(index), \ (__v2df)(__m128d)(v1), (int)(scale)) #define _mm_i64scatter_epi64(addr, index, v1, scale) \ __builtin_ia32_scatterdiv2di((void *)(addr), (__mmask8)-1, \ (__v2di)(__m128i)(index), \ (__v2di)(__m128i)(v1), (int)(scale)) #define _mm_mask_i64scatter_epi64(addr, mask, index, v1, scale) \ __builtin_ia32_scatterdiv2di((void *)(addr), (__mmask8)(mask), \ (__v2di)(__m128i)(index), \ (__v2di)(__m128i)(v1), (int)(scale)) #define _mm256_i64scatter_pd(addr, index, v1, scale) \ __builtin_ia32_scatterdiv4df((void *)(addr), (__mmask8)-1, \ (__v4di)(__m256i)(index), \ (__v4df)(__m256d)(v1), (int)(scale)) #define _mm256_mask_i64scatter_pd(addr, mask, index, v1, scale) \ __builtin_ia32_scatterdiv4df((void *)(addr), (__mmask8)(mask), \ (__v4di)(__m256i)(index), \ (__v4df)(__m256d)(v1), (int)(scale)) #define _mm256_i64scatter_epi64(addr, index, v1, scale) \ __builtin_ia32_scatterdiv4di((void *)(addr), (__mmask8)-1, \ (__v4di)(__m256i)(index), \ (__v4di)(__m256i)(v1), (int)(scale)) #define _mm256_mask_i64scatter_epi64(addr, mask, index, v1, scale) \ __builtin_ia32_scatterdiv4di((void *)(addr), (__mmask8)(mask), \ (__v4di)(__m256i)(index), \ (__v4di)(__m256i)(v1), (int)(scale)) #define _mm_i64scatter_ps(addr, index, v1, scale) \ __builtin_ia32_scatterdiv4sf((void *)(addr), (__mmask8)-1, \ (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \ (int)(scale)) #define _mm_mask_i64scatter_ps(addr, mask, index, v1, scale) \ __builtin_ia32_scatterdiv4sf((void *)(addr), (__mmask8)(mask), \ (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \ (int)(scale)) #define _mm_i64scatter_epi32(addr, index, v1, scale) \ __builtin_ia32_scatterdiv4si((void *)(addr), (__mmask8)-1, \ (__v2di)(__m128i)(index), \ (__v4si)(__m128i)(v1), (int)(scale)) #define _mm_mask_i64scatter_epi32(addr, mask, index, v1, scale) \ __builtin_ia32_scatterdiv4si((void *)(addr), (__mmask8)(mask), \ (__v2di)(__m128i)(index), \ (__v4si)(__m128i)(v1), (int)(scale)) #define _mm256_i64scatter_ps(addr, index, v1, scale) \ __builtin_ia32_scatterdiv8sf((void *)(addr), (__mmask8)-1, \ (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \ (int)(scale)) #define _mm256_mask_i64scatter_ps(addr, mask, index, v1, scale) \ __builtin_ia32_scatterdiv8sf((void *)(addr), (__mmask8)(mask), \ (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \ (int)(scale)) #define _mm256_i64scatter_epi32(addr, index, v1, scale) \ __builtin_ia32_scatterdiv8si((void *)(addr), (__mmask8)-1, \ (__v4di)(__m256i)(index), \ (__v4si)(__m128i)(v1), (int)(scale)) #define _mm256_mask_i64scatter_epi32(addr, mask, index, v1, scale) \ __builtin_ia32_scatterdiv8si((void *)(addr), (__mmask8)(mask), \ (__v4di)(__m256i)(index), \ (__v4si)(__m128i)(v1), (int)(scale)) #define _mm_i32scatter_pd(addr, index, v1, scale) \ __builtin_ia32_scattersiv2df((void *)(addr), (__mmask8)-1, \ (__v4si)(__m128i)(index), \ (__v2df)(__m128d)(v1), (int)(scale)) #define _mm_mask_i32scatter_pd(addr, mask, index, v1, scale) \ __builtin_ia32_scattersiv2df((void *)(addr), (__mmask8)(mask), \ (__v4si)(__m128i)(index), \ (__v2df)(__m128d)(v1), (int)(scale)) #define _mm_i32scatter_epi64(addr, index, v1, scale) \ __builtin_ia32_scattersiv2di((void *)(addr), (__mmask8)-1, \ (__v4si)(__m128i)(index), \ (__v2di)(__m128i)(v1), (int)(scale)) #define _mm_mask_i32scatter_epi64(addr, mask, index, v1, scale) \ __builtin_ia32_scattersiv2di((void *)(addr), (__mmask8)(mask), \ (__v4si)(__m128i)(index), \ (__v2di)(__m128i)(v1), (int)(scale)) #define _mm256_i32scatter_pd(addr, index, v1, scale) \ __builtin_ia32_scattersiv4df((void *)(addr), (__mmask8)-1, \ (__v4si)(__m128i)(index), \ (__v4df)(__m256d)(v1), (int)(scale)) #define _mm256_mask_i32scatter_pd(addr, mask, index, v1, scale) \ __builtin_ia32_scattersiv4df((void *)(addr), (__mmask8)(mask), \ (__v4si)(__m128i)(index), \ (__v4df)(__m256d)(v1), (int)(scale)) #define _mm256_i32scatter_epi64(addr, index, v1, scale) \ __builtin_ia32_scattersiv4di((void *)(addr), (__mmask8)-1, \ (__v4si)(__m128i)(index), \ (__v4di)(__m256i)(v1), (int)(scale)) #define _mm256_mask_i32scatter_epi64(addr, mask, index, v1, scale) \ __builtin_ia32_scattersiv4di((void *)(addr), (__mmask8)(mask), \ (__v4si)(__m128i)(index), \ (__v4di)(__m256i)(v1), (int)(scale)) #define _mm_i32scatter_ps(addr, index, v1, scale) \ __builtin_ia32_scattersiv4sf((void *)(addr), (__mmask8)-1, \ (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \ (int)(scale)) #define _mm_mask_i32scatter_ps(addr, mask, index, v1, scale) \ __builtin_ia32_scattersiv4sf((void *)(addr), (__mmask8)(mask), \ (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \ (int)(scale)) #define _mm_i32scatter_epi32(addr, index, v1, scale) \ __builtin_ia32_scattersiv4si((void *)(addr), (__mmask8)-1, \ (__v4si)(__m128i)(index), \ (__v4si)(__m128i)(v1), (int)(scale)) #define _mm_mask_i32scatter_epi32(addr, mask, index, v1, scale) \ __builtin_ia32_scattersiv4si((void *)(addr), (__mmask8)(mask), \ (__v4si)(__m128i)(index), \ (__v4si)(__m128i)(v1), (int)(scale)) #define _mm256_i32scatter_ps(addr, index, v1, scale) \ __builtin_ia32_scattersiv8sf((void *)(addr), (__mmask8)-1, \ (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \ (int)(scale)) #define _mm256_mask_i32scatter_ps(addr, mask, index, v1, scale) \ __builtin_ia32_scattersiv8sf((void *)(addr), (__mmask8)(mask), \ (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \ (int)(scale)) #define _mm256_i32scatter_epi32(addr, index, v1, scale) \ __builtin_ia32_scattersiv8si((void *)(addr), (__mmask8)-1, \ (__v8si)(__m256i)(index), \ (__v8si)(__m256i)(v1), (int)(scale)) #define _mm256_mask_i32scatter_epi32(addr, mask, index, v1, scale) \ __builtin_ia32_scattersiv8si((void *)(addr), (__mmask8)(mask), \ (__v8si)(__m256i)(index), \ (__v8si)(__m256i)(v1), (int)(scale)) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_pd(__m128d __W, __mmask8 __U, __m128d __A) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_sqrt_pd(__A), (__v2df)__W); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_pd(__mmask8 __U, __m128d __A) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_sqrt_pd(__A), (__v2df)_mm_setzero_pd()); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_sqrt_pd(__m256d __W, __mmask8 __U, __m256d __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_sqrt_pd(__A), (__v4df)__W); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_sqrt_pd(__mmask8 __U, __m256d __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_sqrt_pd(__A), (__v4df)_mm256_setzero_pd()); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_ps(__m128 __W, __mmask8 __U, __m128 __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_sqrt_ps(__A), (__v4sf)__W); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_ps(__mmask8 __U, __m128 __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_sqrt_ps(__A), (__v4sf)_mm_setzero_ps()); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_sqrt_ps(__m256 __W, __mmask8 __U, __m256 __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_sqrt_ps(__A), (__v8sf)__W); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_sqrt_ps(__mmask8 __U, __m256 __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_sqrt_ps(__A), (__v8sf)_mm256_setzero_ps()); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_sub_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_sub_pd(__A, __B), (__v2df)__W); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_sub_pd(__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_sub_pd(__A, __B), (__v2df)_mm_setzero_pd()); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_sub_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_sub_pd(__A, __B), (__v4df)__W); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_sub_pd(__mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_sub_pd(__A, __B), (__v4df)_mm256_setzero_pd()); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_sub_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_sub_ps(__A, __B), (__v4sf)__W); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_sub_ps(__mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_sub_ps(__A, __B), (__v4sf)_mm_setzero_ps()); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_sub_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_sub_ps(__A, __B), (__v8sf)__W); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_sub_ps(__mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_sub_ps(__A, __B), (__v8sf)_mm256_setzero_ps()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_permutex2var_epi32(__m128i __A, __m128i __I, __m128i __B) { return (__m128i)__builtin_ia32_vpermi2vard128((__v4si) __A, (__v4si)__I, (__v4si)__B); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_permutex2var_epi32(__m128i __A, __mmask8 __U, __m128i __I, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128(__U, (__v4si)_mm_permutex2var_epi32(__A, __I, __B), (__v4si)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask2_permutex2var_epi32(__m128i __A, __m128i __I, __mmask8 __U, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128(__U, (__v4si)_mm_permutex2var_epi32(__A, __I, __B), (__v4si)__I); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_permutex2var_epi32(__mmask8 __U, __m128i __A, __m128i __I, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128(__U, (__v4si)_mm_permutex2var_epi32(__A, __I, __B), (__v4si)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_permutex2var_epi32(__m256i __A, __m256i __I, __m256i __B) { return (__m256i)__builtin_ia32_vpermi2vard256((__v8si)__A, (__v8si) __I, (__v8si) __B); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_permutex2var_epi32(__m256i __A, __mmask8 __U, __m256i __I, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256(__U, (__v8si)_mm256_permutex2var_epi32(__A, __I, __B), (__v8si)__A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask2_permutex2var_epi32(__m256i __A, __m256i __I, __mmask8 __U, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256(__U, (__v8si)_mm256_permutex2var_epi32(__A, __I, __B), (__v8si)__I); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_permutex2var_epi32(__mmask8 __U, __m256i __A, __m256i __I, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256(__U, (__v8si)_mm256_permutex2var_epi32(__A, __I, __B), (__v8si)_mm256_setzero_si256()); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_permutex2var_pd(__m128d __A, __m128i __I, __m128d __B) { return (__m128d)__builtin_ia32_vpermi2varpd128((__v2df)__A, (__v2di)__I, (__v2df)__B); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_permutex2var_pd(__m128d __A, __mmask8 __U, __m128i __I, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128(__U, (__v2df)_mm_permutex2var_pd(__A, __I, __B), (__v2df)__A); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask2_permutex2var_pd(__m128d __A, __m128i __I, __mmask8 __U, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128(__U, (__v2df)_mm_permutex2var_pd(__A, __I, __B), (__v2df)(__m128d)__I); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_permutex2var_pd(__mmask8 __U, __m128d __A, __m128i __I, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128(__U, (__v2df)_mm_permutex2var_pd(__A, __I, __B), (__v2df)_mm_setzero_pd()); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_permutex2var_pd(__m256d __A, __m256i __I, __m256d __B) { return (__m256d)__builtin_ia32_vpermi2varpd256((__v4df)__A, (__v4di)__I, (__v4df)__B); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_permutex2var_pd(__m256d __A, __mmask8 __U, __m256i __I, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256(__U, (__v4df)_mm256_permutex2var_pd(__A, __I, __B), (__v4df)__A); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask2_permutex2var_pd(__m256d __A, __m256i __I, __mmask8 __U, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256(__U, (__v4df)_mm256_permutex2var_pd(__A, __I, __B), (__v4df)(__m256d)__I); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_permutex2var_pd(__mmask8 __U, __m256d __A, __m256i __I, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256(__U, (__v4df)_mm256_permutex2var_pd(__A, __I, __B), (__v4df)_mm256_setzero_pd()); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_permutex2var_ps(__m128 __A, __m128i __I, __m128 __B) { return (__m128)__builtin_ia32_vpermi2varps128((__v4sf)__A, (__v4si)__I, (__v4sf)__B); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_permutex2var_ps(__m128 __A, __mmask8 __U, __m128i __I, __m128 __B) { return (__m128)__builtin_ia32_selectps_128(__U, (__v4sf)_mm_permutex2var_ps(__A, __I, __B), (__v4sf)__A); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask2_permutex2var_ps(__m128 __A, __m128i __I, __mmask8 __U, __m128 __B) { return (__m128)__builtin_ia32_selectps_128(__U, (__v4sf)_mm_permutex2var_ps(__A, __I, __B), (__v4sf)(__m128)__I); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_permutex2var_ps(__mmask8 __U, __m128 __A, __m128i __I, __m128 __B) { return (__m128)__builtin_ia32_selectps_128(__U, (__v4sf)_mm_permutex2var_ps(__A, __I, __B), (__v4sf)_mm_setzero_ps()); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_permutex2var_ps(__m256 __A, __m256i __I, __m256 __B) { return (__m256)__builtin_ia32_vpermi2varps256((__v8sf)__A, (__v8si)__I, (__v8sf) __B); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_permutex2var_ps(__m256 __A, __mmask8 __U, __m256i __I, __m256 __B) { return (__m256)__builtin_ia32_selectps_256(__U, (__v8sf)_mm256_permutex2var_ps(__A, __I, __B), (__v8sf)__A); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask2_permutex2var_ps(__m256 __A, __m256i __I, __mmask8 __U, __m256 __B) { return (__m256)__builtin_ia32_selectps_256(__U, (__v8sf)_mm256_permutex2var_ps(__A, __I, __B), (__v8sf)(__m256)__I); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_permutex2var_ps(__mmask8 __U, __m256 __A, __m256i __I, __m256 __B) { return (__m256)__builtin_ia32_selectps_256(__U, (__v8sf)_mm256_permutex2var_ps(__A, __I, __B), (__v8sf)_mm256_setzero_ps()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_permutex2var_epi64(__m128i __A, __m128i __I, __m128i __B) { return (__m128i)__builtin_ia32_vpermi2varq128((__v2di)__A, (__v2di)__I, (__v2di)__B); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_permutex2var_epi64(__m128i __A, __mmask8 __U, __m128i __I, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128(__U, (__v2di)_mm_permutex2var_epi64(__A, __I, __B), (__v2di)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask2_permutex2var_epi64(__m128i __A, __m128i __I, __mmask8 __U, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128(__U, (__v2di)_mm_permutex2var_epi64(__A, __I, __B), (__v2di)__I); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_permutex2var_epi64(__mmask8 __U, __m128i __A, __m128i __I, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128(__U, (__v2di)_mm_permutex2var_epi64(__A, __I, __B), (__v2di)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_permutex2var_epi64(__m256i __A, __m256i __I, __m256i __B) { return (__m256i)__builtin_ia32_vpermi2varq256((__v4di)__A, (__v4di) __I, (__v4di) __B); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_permutex2var_epi64(__m256i __A, __mmask8 __U, __m256i __I, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256(__U, (__v4di)_mm256_permutex2var_epi64(__A, __I, __B), (__v4di)__A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask2_permutex2var_epi64(__m256i __A, __m256i __I, __mmask8 __U, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256(__U, (__v4di)_mm256_permutex2var_epi64(__A, __I, __B), (__v4di)__I); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_permutex2var_epi64(__mmask8 __U, __m256i __A, __m256i __I, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256(__U, (__v4di)_mm256_permutex2var_epi64(__A, __I, __B), (__v4di)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi8_epi32(__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_cvtepi8_epi32(__A), (__v4si)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi8_epi32(__mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_cvtepi8_epi32(__A), (__v4si)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi8_epi32 (__m256i __W, __mmask8 __U, __m128i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_cvtepi8_epi32(__A), (__v8si)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_cvtepi8_epi32(__A), (__v8si)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi8_epi64(__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_cvtepi8_epi64(__A), (__v2di)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_cvtepi8_epi64(__A), (__v2di)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi8_epi64(__m256i __W, __mmask8 __U, __m128i __A) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_cvtepi8_epi64(__A), (__v4di)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_cvtepi8_epi64(__A), (__v4di)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi32_epi64(__m128i __W, __mmask8 __U, __m128i __X) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_cvtepi32_epi64(__X), (__v2di)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_cvtepi32_epi64(__X), (__v2di)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_epi64(__m256i __W, __mmask8 __U, __m128i __X) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_cvtepi32_epi64(__X), (__v4di)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_cvtepi32_epi64(__X), (__v4di)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi16_epi32(__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_cvtepi16_epi32(__A), (__v4si)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi16_epi32(__mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_cvtepi16_epi32(__A), (__v4si)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi16_epi32(__m256i __W, __mmask8 __U, __m128i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_cvtepi16_epi32(__A), (__v8si)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_cvtepi16_epi32(__A), (__v8si)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi16_epi64(__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_cvtepi16_epi64(__A), (__v2di)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_cvtepi16_epi64(__A), (__v2di)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi16_epi64(__m256i __W, __mmask8 __U, __m128i __A) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_cvtepi16_epi64(__A), (__v4di)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_cvtepi16_epi64(__A), (__v4di)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu8_epi32(__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_cvtepu8_epi32(__A), (__v4si)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_cvtepu8_epi32(__A), (__v4si)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu8_epi32(__m256i __W, __mmask8 __U, __m128i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_cvtepu8_epi32(__A), (__v8si)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_cvtepu8_epi32(__A), (__v8si)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu8_epi64(__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_cvtepu8_epi64(__A), (__v2di)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_cvtepu8_epi64(__A), (__v2di)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu8_epi64(__m256i __W, __mmask8 __U, __m128i __A) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_cvtepu8_epi64(__A), (__v4di)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_cvtepu8_epi64(__A), (__v4di)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu32_epi64(__m128i __W, __mmask8 __U, __m128i __X) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_cvtepu32_epi64(__X), (__v2di)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_cvtepu32_epi64(__X), (__v2di)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu32_epi64(__m256i __W, __mmask8 __U, __m128i __X) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_cvtepu32_epi64(__X), (__v4di)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_cvtepu32_epi64(__X), (__v4di)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu16_epi32(__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_cvtepu16_epi32(__A), (__v4si)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_cvtepu16_epi32(__A), (__v4si)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu16_epi32(__m256i __W, __mmask8 __U, __m128i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_cvtepu16_epi32(__A), (__v8si)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_cvtepu16_epi32(__A), (__v8si)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu16_epi64(__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_cvtepu16_epi64(__A), (__v2di)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_cvtepu16_epi64(__A), (__v2di)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu16_epi64(__m256i __W, __mmask8 __U, __m128i __A) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_cvtepu16_epi64(__A), (__v4di)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_cvtepu16_epi64(__A), (__v4di)_mm256_setzero_si256()); } #define _mm_rol_epi32(a, b) \ ((__m128i)__builtin_ia32_prold128((__v4si)(__m128i)(a), (int)(b))) #define _mm_mask_rol_epi32(w, u, a, b) \ ((__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \ (__v4si)_mm_rol_epi32((a), (b)), \ (__v4si)(__m128i)(w))) #define _mm_maskz_rol_epi32(u, a, b) \ ((__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \ (__v4si)_mm_rol_epi32((a), (b)), \ (__v4si)_mm_setzero_si128())) #define _mm256_rol_epi32(a, b) \ ((__m256i)__builtin_ia32_prold256((__v8si)(__m256i)(a), (int)(b))) #define _mm256_mask_rol_epi32(w, u, a, b) \ ((__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \ (__v8si)_mm256_rol_epi32((a), (b)), \ (__v8si)(__m256i)(w))) #define _mm256_maskz_rol_epi32(u, a, b) \ ((__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \ (__v8si)_mm256_rol_epi32((a), (b)), \ (__v8si)_mm256_setzero_si256())) #define _mm_rol_epi64(a, b) \ ((__m128i)__builtin_ia32_prolq128((__v2di)(__m128i)(a), (int)(b))) #define _mm_mask_rol_epi64(w, u, a, b) \ ((__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \ (__v2di)_mm_rol_epi64((a), (b)), \ (__v2di)(__m128i)(w))) #define _mm_maskz_rol_epi64(u, a, b) \ ((__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \ (__v2di)_mm_rol_epi64((a), (b)), \ (__v2di)_mm_setzero_si128())) #define _mm256_rol_epi64(a, b) \ ((__m256i)__builtin_ia32_prolq256((__v4di)(__m256i)(a), (int)(b))) #define _mm256_mask_rol_epi64(w, u, a, b) \ ((__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \ (__v4di)_mm256_rol_epi64((a), (b)), \ (__v4di)(__m256i)(w))) #define _mm256_maskz_rol_epi64(u, a, b) \ ((__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \ (__v4di)_mm256_rol_epi64((a), (b)), \ (__v4di)_mm256_setzero_si256())) static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_rolv_epi32 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_prolvd128((__v4si)__A, (__v4si)__B); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_rolv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128(__U, (__v4si)_mm_rolv_epi32(__A, __B), (__v4si)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_rolv_epi32 (__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128(__U, (__v4si)_mm_rolv_epi32(__A, __B), (__v4si)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_rolv_epi32 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_prolvd256((__v8si)__A, (__v8si)__B); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_rolv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256(__U, (__v8si)_mm256_rolv_epi32(__A, __B), (__v8si)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_rolv_epi32 (__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256(__U, (__v8si)_mm256_rolv_epi32(__A, __B), (__v8si)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_rolv_epi64 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_prolvq128((__v2di)__A, (__v2di)__B); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_rolv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128(__U, (__v2di)_mm_rolv_epi64(__A, __B), (__v2di)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_rolv_epi64 (__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128(__U, (__v2di)_mm_rolv_epi64(__A, __B), (__v2di)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_rolv_epi64 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_prolvq256((__v4di)__A, (__v4di)__B); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_rolv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256(__U, (__v4di)_mm256_rolv_epi64(__A, __B), (__v4di)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_rolv_epi64 (__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256(__U, (__v4di)_mm256_rolv_epi64(__A, __B), (__v4di)_mm256_setzero_si256()); } #define _mm_ror_epi32(a, b) \ ((__m128i)__builtin_ia32_prord128((__v4si)(__m128i)(a), (int)(b))) #define _mm_mask_ror_epi32(w, u, a, b) \ ((__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \ (__v4si)_mm_ror_epi32((a), (b)), \ (__v4si)(__m128i)(w))) #define _mm_maskz_ror_epi32(u, a, b) \ ((__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \ (__v4si)_mm_ror_epi32((a), (b)), \ (__v4si)_mm_setzero_si128())) #define _mm256_ror_epi32(a, b) \ ((__m256i)__builtin_ia32_prord256((__v8si)(__m256i)(a), (int)(b))) #define _mm256_mask_ror_epi32(w, u, a, b) \ ((__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \ (__v8si)_mm256_ror_epi32((a), (b)), \ (__v8si)(__m256i)(w))) #define _mm256_maskz_ror_epi32(u, a, b) \ ((__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \ (__v8si)_mm256_ror_epi32((a), (b)), \ (__v8si)_mm256_setzero_si256())) #define _mm_ror_epi64(a, b) \ ((__m128i)__builtin_ia32_prorq128((__v2di)(__m128i)(a), (int)(b))) #define _mm_mask_ror_epi64(w, u, a, b) \ ((__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \ (__v2di)_mm_ror_epi64((a), (b)), \ (__v2di)(__m128i)(w))) #define _mm_maskz_ror_epi64(u, a, b) \ ((__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \ (__v2di)_mm_ror_epi64((a), (b)), \ (__v2di)_mm_setzero_si128())) #define _mm256_ror_epi64(a, b) \ ((__m256i)__builtin_ia32_prorq256((__v4di)(__m256i)(a), (int)(b))) #define _mm256_mask_ror_epi64(w, u, a, b) \ ((__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \ (__v4di)_mm256_ror_epi64((a), (b)), \ (__v4di)(__m256i)(w))) #define _mm256_maskz_ror_epi64(u, a, b) \ ((__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \ (__v4di)_mm256_ror_epi64((a), (b)), \ (__v4di)_mm256_setzero_si256())) static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sll_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_sll_epi32(__A, __B), (__v4si)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sll_epi32(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_sll_epi32(__A, __B), (__v4si)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sll_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_sll_epi32(__A, __B), (__v8si)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sll_epi32(__mmask8 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_sll_epi32(__A, __B), (__v8si)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_slli_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_slli_epi32(__A, (int)__B), (__v4si)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_slli_epi32(__mmask8 __U, __m128i __A, unsigned int __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_slli_epi32(__A, (int)__B), (__v4si)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_slli_epi32(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_slli_epi32(__A, (int)__B), (__v8si)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_slli_epi32(__mmask8 __U, __m256i __A, unsigned int __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_slli_epi32(__A, (int)__B), (__v8si)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sll_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_sll_epi64(__A, __B), (__v2di)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sll_epi64(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_sll_epi64(__A, __B), (__v2di)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sll_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_sll_epi64(__A, __B), (__v4di)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sll_epi64(__mmask8 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_sll_epi64(__A, __B), (__v4di)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_slli_epi64(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_slli_epi64(__A, (int)__B), (__v2di)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_slli_epi64(__mmask8 __U, __m128i __A, unsigned int __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_slli_epi64(__A, (int)__B), (__v2di)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_slli_epi64(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_slli_epi64(__A, (int)__B), (__v4di)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_slli_epi64(__mmask8 __U, __m256i __A, unsigned int __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_slli_epi64(__A, (int)__B), (__v4di)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_rorv_epi32 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_prorvd128((__v4si)__A, (__v4si)__B); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_rorv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128(__U, (__v4si)_mm_rorv_epi32(__A, __B), (__v4si)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_rorv_epi32 (__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128(__U, (__v4si)_mm_rorv_epi32(__A, __B), (__v4si)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_rorv_epi32 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_prorvd256((__v8si)__A, (__v8si)__B); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_rorv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256(__U, (__v8si)_mm256_rorv_epi32(__A, __B), (__v8si)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_rorv_epi32 (__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256(__U, (__v8si)_mm256_rorv_epi32(__A, __B), (__v8si)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_rorv_epi64 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_prorvq128((__v2di)__A, (__v2di)__B); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_rorv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128(__U, (__v2di)_mm_rorv_epi64(__A, __B), (__v2di)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_rorv_epi64 (__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128(__U, (__v2di)_mm_rorv_epi64(__A, __B), (__v2di)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_rorv_epi64 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_prorvq256((__v4di)__A, (__v4di)__B); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_rorv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256(__U, (__v4di)_mm256_rorv_epi64(__A, __B), (__v4di)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_rorv_epi64 (__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256(__U, (__v4di)_mm256_rorv_epi64(__A, __B), (__v4di)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sllv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_sllv_epi64(__X, __Y), (__v2di)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sllv_epi64(__mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_sllv_epi64(__X, __Y), (__v2di)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sllv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_sllv_epi64(__X, __Y), (__v4di)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sllv_epi64(__mmask8 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_sllv_epi64(__X, __Y), (__v4di)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sllv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_sllv_epi32(__X, __Y), (__v4si)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sllv_epi32(__mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_sllv_epi32(__X, __Y), (__v4si)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sllv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_sllv_epi32(__X, __Y), (__v8si)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sllv_epi32(__mmask8 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_sllv_epi32(__X, __Y), (__v8si)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srlv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_srlv_epi64(__X, __Y), (__v2di)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srlv_epi64(__mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_srlv_epi64(__X, __Y), (__v2di)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srlv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_srlv_epi64(__X, __Y), (__v4di)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srlv_epi64(__mmask8 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_srlv_epi64(__X, __Y), (__v4di)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srlv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_srlv_epi32(__X, __Y), (__v4si)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srlv_epi32(__mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_srlv_epi32(__X, __Y), (__v4si)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srlv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_srlv_epi32(__X, __Y), (__v8si)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srlv_epi32(__mmask8 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_srlv_epi32(__X, __Y), (__v8si)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srl_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_srl_epi32(__A, __B), (__v4si)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srl_epi32(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_srl_epi32(__A, __B), (__v4si)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srl_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_srl_epi32(__A, __B), (__v8si)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srl_epi32(__mmask8 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_srl_epi32(__A, __B), (__v8si)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srli_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_srli_epi32(__A, (int)__B), (__v4si)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srli_epi32(__mmask8 __U, __m128i __A, unsigned int __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_srli_epi32(__A, (int)__B), (__v4si)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srli_epi32(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_srli_epi32(__A, (int)__B), (__v8si)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srli_epi32(__mmask8 __U, __m256i __A, unsigned int __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_srli_epi32(__A, (int)__B), (__v8si)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srl_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_srl_epi64(__A, __B), (__v2di)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srl_epi64(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_srl_epi64(__A, __B), (__v2di)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srl_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_srl_epi64(__A, __B), (__v4di)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srl_epi64(__mmask8 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_srl_epi64(__A, __B), (__v4di)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srli_epi64(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_srli_epi64(__A, (int)__B), (__v2di)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srli_epi64(__mmask8 __U, __m128i __A, unsigned int __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_srli_epi64(__A, (int)__B), (__v2di)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srli_epi64(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_srli_epi64(__A, (int)__B), (__v4di)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srli_epi64(__mmask8 __U, __m256i __A, unsigned int __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_srli_epi64(__A, (int)__B), (__v4di)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srav_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_srav_epi32(__X, __Y), (__v4si)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srav_epi32(__mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_srav_epi32(__X, __Y), (__v4si)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srav_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_srav_epi32(__X, __Y), (__v8si)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srav_epi32(__mmask8 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_srav_epi32(__X, __Y), (__v8si)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_srav_epi64(__m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_psravq128((__v2di)__X, (__v2di)__Y); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srav_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_srav_epi64(__X, __Y), (__v2di)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srav_epi64(__mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_srav_epi64(__X, __Y), (__v2di)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srav_epi64(__m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_psravq256((__v4di)__X, (__v4di) __Y); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srav_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_srav_epi64(__X, __Y), (__v4di)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srav_epi64 (__mmask8 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_srav_epi64(__X, __Y), (__v4di)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mov_epi32 (__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U, (__v4si) __A, (__v4si) __W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mov_epi32 (__mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U, (__v4si) __A, (__v4si) _mm_setzero_si128 ()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mov_epi32 (__m256i __W, __mmask8 __U, __m256i __A) { return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U, (__v8si) __A, (__v8si) __W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mov_epi32 (__mmask8 __U, __m256i __A) { return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U, (__v8si) __A, (__v8si) _mm256_setzero_si256 ()); } static __inline __m128i __DEFAULT_FN_ATTRS128 _mm_load_epi32 (void const *__P) { return *(const __m128i *) __P; } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_load_epi32 (__m128i __W, __mmask8 __U, void const *__P) { return (__m128i) __builtin_ia32_movdqa32load128_mask ((const __v4si *) __P, (__v4si) __W, (__mmask8) __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_load_epi32 (__mmask8 __U, void const *__P) { return (__m128i) __builtin_ia32_movdqa32load128_mask ((const __v4si *) __P, (__v4si) _mm_setzero_si128 (), (__mmask8) __U); } static __inline __m256i __DEFAULT_FN_ATTRS256 _mm256_load_epi32 (void const *__P) { return *(const __m256i *) __P; } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_load_epi32 (__m256i __W, __mmask8 __U, void const *__P) { return (__m256i) __builtin_ia32_movdqa32load256_mask ((const __v8si *) __P, (__v8si) __W, (__mmask8) __U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_load_epi32 (__mmask8 __U, void const *__P) { return (__m256i) __builtin_ia32_movdqa32load256_mask ((const __v8si *) __P, (__v8si) _mm256_setzero_si256 (), (__mmask8) __U); } static __inline void __DEFAULT_FN_ATTRS128 _mm_store_epi32 (void *__P, __m128i __A) { *(__m128i *) __P = __A; } static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_epi32 (void *__P, __mmask8 __U, __m128i __A) { __builtin_ia32_movdqa32store128_mask ((__v4si *) __P, (__v4si) __A, (__mmask8) __U); } static __inline void __DEFAULT_FN_ATTRS256 _mm256_store_epi32 (void *__P, __m256i __A) { *(__m256i *) __P = __A; } static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_store_epi32 (void *__P, __mmask8 __U, __m256i __A) { __builtin_ia32_movdqa32store256_mask ((__v8si *) __P, (__v8si) __A, (__mmask8) __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mov_epi64 (__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U, (__v2di) __A, (__v2di) __W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mov_epi64 (__mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U, (__v2di) __A, (__v2di) _mm_setzero_si128 ()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mov_epi64 (__m256i __W, __mmask8 __U, __m256i __A) { return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U, (__v4di) __A, (__v4di) __W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mov_epi64 (__mmask8 __U, __m256i __A) { return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U, (__v4di) __A, (__v4di) _mm256_setzero_si256 ()); } static __inline __m128i __DEFAULT_FN_ATTRS128 _mm_load_epi64 (void const *__P) { return *(const __m128i *) __P; } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_load_epi64 (__m128i __W, __mmask8 __U, void const *__P) { return (__m128i) __builtin_ia32_movdqa64load128_mask ((const __v2di *) __P, (__v2di) __W, (__mmask8) __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_load_epi64 (__mmask8 __U, void const *__P) { return (__m128i) __builtin_ia32_movdqa64load128_mask ((const __v2di *) __P, (__v2di) _mm_setzero_si128 (), (__mmask8) __U); } static __inline __m256i __DEFAULT_FN_ATTRS256 _mm256_load_epi64 (void const *__P) { return *(const __m256i *) __P; } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_load_epi64 (__m256i __W, __mmask8 __U, void const *__P) { return (__m256i) __builtin_ia32_movdqa64load256_mask ((const __v4di *) __P, (__v4di) __W, (__mmask8) __U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_load_epi64 (__mmask8 __U, void const *__P) { return (__m256i) __builtin_ia32_movdqa64load256_mask ((const __v4di *) __P, (__v4di) _mm256_setzero_si256 (), (__mmask8) __U); } static __inline void __DEFAULT_FN_ATTRS128 _mm_store_epi64 (void *__P, __m128i __A) { *(__m128i *) __P = __A; } static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_epi64 (void *__P, __mmask8 __U, __m128i __A) { __builtin_ia32_movdqa64store128_mask ((__v2di *) __P, (__v2di) __A, (__mmask8) __U); } static __inline void __DEFAULT_FN_ATTRS256 _mm256_store_epi64 (void *__P, __m256i __A) { *(__m256i *) __P = __A; } static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_store_epi64 (void *__P, __mmask8 __U, __m256i __A) { __builtin_ia32_movdqa64store256_mask ((__v4di *) __P, (__v4di) __A, (__mmask8) __U); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_movedup_pd (__m128d __W, __mmask8 __U, __m128d __A) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_movedup_pd(__A), (__v2df)__W); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_movedup_pd (__mmask8 __U, __m128d __A) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_movedup_pd(__A), (__v2df)_mm_setzero_pd()); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_movedup_pd (__m256d __W, __mmask8 __U, __m256d __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_movedup_pd(__A), (__v4df)__W); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_movedup_pd (__mmask8 __U, __m256d __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_movedup_pd(__A), (__v4df)_mm256_setzero_pd()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_set1_epi32(__m128i __O, __mmask8 __M, int __A) { return (__m128i)__builtin_ia32_selectd_128(__M, (__v4si) _mm_set1_epi32(__A), (__v4si)__O); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_set1_epi32( __mmask8 __M, int __A) { return (__m128i)__builtin_ia32_selectd_128(__M, (__v4si) _mm_set1_epi32(__A), (__v4si)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_set1_epi32(__m256i __O, __mmask8 __M, int __A) { return (__m256i)__builtin_ia32_selectd_256(__M, (__v8si) _mm256_set1_epi32(__A), (__v8si)__O); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_set1_epi32( __mmask8 __M, int __A) { return (__m256i)__builtin_ia32_selectd_256(__M, (__v8si) _mm256_set1_epi32(__A), (__v8si)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A) { return (__m128i) __builtin_ia32_selectq_128(__M, (__v2di) _mm_set1_epi64x(__A), (__v2di) __O); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_set1_epi64 (__mmask8 __M, long long __A) { return (__m128i) __builtin_ia32_selectq_128(__M, (__v2di) _mm_set1_epi64x(__A), (__v2di) _mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_set1_epi64 (__m256i __O, __mmask8 __M, long long __A) { return (__m256i) __builtin_ia32_selectq_256(__M, (__v4di) _mm256_set1_epi64x(__A), (__v4di) __O) ; } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_set1_epi64 (__mmask8 __M, long long __A) { return (__m256i) __builtin_ia32_selectq_256(__M, (__v4di) _mm256_set1_epi64x(__A), (__v4di) _mm256_setzero_si256()); } #define _mm_fixupimm_pd(A, B, C, imm) \ ((__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2di)(__m128i)(C), (int)(imm), \ (__mmask8)-1)) #define _mm_mask_fixupimm_pd(A, U, B, C, imm) \ ((__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2di)(__m128i)(C), (int)(imm), \ (__mmask8)(U))) #define _mm_maskz_fixupimm_pd(U, A, B, C, imm) \ ((__m128d)__builtin_ia32_fixupimmpd128_maskz((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2di)(__m128i)(C), \ (int)(imm), (__mmask8)(U))) #define _mm256_fixupimm_pd(A, B, C, imm) \ ((__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \ (__v4df)(__m256d)(B), \ (__v4di)(__m256i)(C), (int)(imm), \ (__mmask8)-1)) #define _mm256_mask_fixupimm_pd(A, U, B, C, imm) \ ((__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \ (__v4df)(__m256d)(B), \ (__v4di)(__m256i)(C), (int)(imm), \ (__mmask8)(U))) #define _mm256_maskz_fixupimm_pd(U, A, B, C, imm) \ ((__m256d)__builtin_ia32_fixupimmpd256_maskz((__v4df)(__m256d)(A), \ (__v4df)(__m256d)(B), \ (__v4di)(__m256i)(C), \ (int)(imm), (__mmask8)(U))) #define _mm_fixupimm_ps(A, B, C, imm) \ ((__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4si)(__m128i)(C), (int)(imm), \ (__mmask8)-1)) #define _mm_mask_fixupimm_ps(A, U, B, C, imm) \ ((__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4si)(__m128i)(C), (int)(imm), \ (__mmask8)(U))) #define _mm_maskz_fixupimm_ps(U, A, B, C, imm) \ ((__m128)__builtin_ia32_fixupimmps128_maskz((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4si)(__m128i)(C), (int)(imm), \ (__mmask8)(U))) #define _mm256_fixupimm_ps(A, B, C, imm) \ ((__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \ (__v8sf)(__m256)(B), \ (__v8si)(__m256i)(C), (int)(imm), \ (__mmask8)-1)) #define _mm256_mask_fixupimm_ps(A, U, B, C, imm) \ ((__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \ (__v8sf)(__m256)(B), \ (__v8si)(__m256i)(C), (int)(imm), \ (__mmask8)(U))) #define _mm256_maskz_fixupimm_ps(U, A, B, C, imm) \ ((__m256)__builtin_ia32_fixupimmps256_maskz((__v8sf)(__m256)(A), \ (__v8sf)(__m256)(B), \ (__v8si)(__m256i)(C), (int)(imm), \ (__mmask8)(U))) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_load_pd (__m128d __W, __mmask8 __U, void const *__P) { return (__m128d) __builtin_ia32_loadapd128_mask ((const __v2df *) __P, (__v2df) __W, (__mmask8) __U); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_load_pd (__mmask8 __U, void const *__P) { return (__m128d) __builtin_ia32_loadapd128_mask ((const __v2df *) __P, (__v2df) _mm_setzero_pd (), (__mmask8) __U); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_load_pd (__m256d __W, __mmask8 __U, void const *__P) { return (__m256d) __builtin_ia32_loadapd256_mask ((const __v4df *) __P, (__v4df) __W, (__mmask8) __U); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_load_pd (__mmask8 __U, void const *__P) { return (__m256d) __builtin_ia32_loadapd256_mask ((const __v4df *) __P, (__v4df) _mm256_setzero_pd (), (__mmask8) __U); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_load_ps (__m128 __W, __mmask8 __U, void const *__P) { return (__m128) __builtin_ia32_loadaps128_mask ((const __v4sf *) __P, (__v4sf) __W, (__mmask8) __U); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_load_ps (__mmask8 __U, void const *__P) { return (__m128) __builtin_ia32_loadaps128_mask ((const __v4sf *) __P, (__v4sf) _mm_setzero_ps (), (__mmask8) __U); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_load_ps (__m256 __W, __mmask8 __U, void const *__P) { return (__m256) __builtin_ia32_loadaps256_mask ((const __v8sf *) __P, (__v8sf) __W, (__mmask8) __U); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_load_ps (__mmask8 __U, void const *__P) { return (__m256) __builtin_ia32_loadaps256_mask ((const __v8sf *) __P, (__v8sf) _mm256_setzero_ps (), (__mmask8) __U); } static __inline __m128i __DEFAULT_FN_ATTRS128 _mm_loadu_epi64 (void const *__P) { struct __loadu_epi64 { __m128i_u __v; } __attribute__((__packed__, __may_alias__)); return ((const struct __loadu_epi64*)__P)->__v; } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_loadu_epi64 (__m128i __W, __mmask8 __U, void const *__P) { return (__m128i) __builtin_ia32_loaddqudi128_mask ((const __v2di *) __P, (__v2di) __W, (__mmask8) __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_loadu_epi64 (__mmask8 __U, void const *__P) { return (__m128i) __builtin_ia32_loaddqudi128_mask ((const __v2di *) __P, (__v2di) _mm_setzero_si128 (), (__mmask8) __U); } static __inline __m256i __DEFAULT_FN_ATTRS256 _mm256_loadu_epi64 (void const *__P) { struct __loadu_epi64 { __m256i_u __v; } __attribute__((__packed__, __may_alias__)); return ((const struct __loadu_epi64*)__P)->__v; } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_loadu_epi64 (__m256i __W, __mmask8 __U, void const *__P) { return (__m256i) __builtin_ia32_loaddqudi256_mask ((const __v4di *) __P, (__v4di) __W, (__mmask8) __U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_loadu_epi64 (__mmask8 __U, void const *__P) { return (__m256i) __builtin_ia32_loaddqudi256_mask ((const __v4di *) __P, (__v4di) _mm256_setzero_si256 (), (__mmask8) __U); } static __inline __m128i __DEFAULT_FN_ATTRS128 _mm_loadu_epi32 (void const *__P) { struct __loadu_epi32 { __m128i_u __v; } __attribute__((__packed__, __may_alias__)); return ((const struct __loadu_epi32*)__P)->__v; } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_loadu_epi32 (__m128i __W, __mmask8 __U, void const *__P) { return (__m128i) __builtin_ia32_loaddqusi128_mask ((const __v4si *) __P, (__v4si) __W, (__mmask8) __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_loadu_epi32 (__mmask8 __U, void const *__P) { return (__m128i) __builtin_ia32_loaddqusi128_mask ((const __v4si *) __P, (__v4si) _mm_setzero_si128 (), (__mmask8) __U); } static __inline __m256i __DEFAULT_FN_ATTRS256 _mm256_loadu_epi32 (void const *__P) { struct __loadu_epi32 { __m256i_u __v; } __attribute__((__packed__, __may_alias__)); return ((const struct __loadu_epi32*)__P)->__v; } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_loadu_epi32 (__m256i __W, __mmask8 __U, void const *__P) { return (__m256i) __builtin_ia32_loaddqusi256_mask ((const __v8si *) __P, (__v8si) __W, (__mmask8) __U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_loadu_epi32 (__mmask8 __U, void const *__P) { return (__m256i) __builtin_ia32_loaddqusi256_mask ((const __v8si *) __P, (__v8si) _mm256_setzero_si256 (), (__mmask8) __U); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_loadu_pd (__m128d __W, __mmask8 __U, void const *__P) { return (__m128d) __builtin_ia32_loadupd128_mask ((const __v2df *) __P, (__v2df) __W, (__mmask8) __U); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_loadu_pd (__mmask8 __U, void const *__P) { return (__m128d) __builtin_ia32_loadupd128_mask ((const __v2df *) __P, (__v2df) _mm_setzero_pd (), (__mmask8) __U); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_loadu_pd (__m256d __W, __mmask8 __U, void const *__P) { return (__m256d) __builtin_ia32_loadupd256_mask ((const __v4df *) __P, (__v4df) __W, (__mmask8) __U); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_loadu_pd (__mmask8 __U, void const *__P) { return (__m256d) __builtin_ia32_loadupd256_mask ((const __v4df *) __P, (__v4df) _mm256_setzero_pd (), (__mmask8) __U); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_loadu_ps (__m128 __W, __mmask8 __U, void const *__P) { return (__m128) __builtin_ia32_loadups128_mask ((const __v4sf *) __P, (__v4sf) __W, (__mmask8) __U); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_loadu_ps (__mmask8 __U, void const *__P) { return (__m128) __builtin_ia32_loadups128_mask ((const __v4sf *) __P, (__v4sf) _mm_setzero_ps (), (__mmask8) __U); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_loadu_ps (__m256 __W, __mmask8 __U, void const *__P) { return (__m256) __builtin_ia32_loadups256_mask ((const __v8sf *) __P, (__v8sf) __W, (__mmask8) __U); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_loadu_ps (__mmask8 __U, void const *__P) { return (__m256) __builtin_ia32_loadups256_mask ((const __v8sf *) __P, (__v8sf) _mm256_setzero_ps (), (__mmask8) __U); } static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_pd (void *__P, __mmask8 __U, __m128d __A) { __builtin_ia32_storeapd128_mask ((__v2df *) __P, (__v2df) __A, (__mmask8) __U); } static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_store_pd (void *__P, __mmask8 __U, __m256d __A) { __builtin_ia32_storeapd256_mask ((__v4df *) __P, (__v4df) __A, (__mmask8) __U); } static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_ps (void *__P, __mmask8 __U, __m128 __A) { __builtin_ia32_storeaps128_mask ((__v4sf *) __P, (__v4sf) __A, (__mmask8) __U); } static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_store_ps (void *__P, __mmask8 __U, __m256 __A) { __builtin_ia32_storeaps256_mask ((__v8sf *) __P, (__v8sf) __A, (__mmask8) __U); } static __inline void __DEFAULT_FN_ATTRS128 _mm_storeu_epi64 (void *__P, __m128i __A) { struct __storeu_epi64 { __m128i_u __v; } __attribute__((__packed__, __may_alias__)); ((struct __storeu_epi64*)__P)->__v = __A; } static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_storeu_epi64 (void *__P, __mmask8 __U, __m128i __A) { __builtin_ia32_storedqudi128_mask ((__v2di *) __P, (__v2di) __A, (__mmask8) __U); } static __inline void __DEFAULT_FN_ATTRS256 _mm256_storeu_epi64 (void *__P, __m256i __A) { struct __storeu_epi64 { __m256i_u __v; } __attribute__((__packed__, __may_alias__)); ((struct __storeu_epi64*)__P)->__v = __A; } static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A) { __builtin_ia32_storedqudi256_mask ((__v4di *) __P, (__v4di) __A, (__mmask8) __U); } static __inline void __DEFAULT_FN_ATTRS128 _mm_storeu_epi32 (void *__P, __m128i __A) { struct __storeu_epi32 { __m128i_u __v; } __attribute__((__packed__, __may_alias__)); ((struct __storeu_epi32*)__P)->__v = __A; } static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A) { __builtin_ia32_storedqusi128_mask ((__v4si *) __P, (__v4si) __A, (__mmask8) __U); } static __inline void __DEFAULT_FN_ATTRS256 _mm256_storeu_epi32 (void *__P, __m256i __A) { struct __storeu_epi32 { __m256i_u __v; } __attribute__((__packed__, __may_alias__)); ((struct __storeu_epi32*)__P)->__v = __A; } static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_storeu_epi32 (void *__P, __mmask8 __U, __m256i __A) { __builtin_ia32_storedqusi256_mask ((__v8si *) __P, (__v8si) __A, (__mmask8) __U); } static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_storeu_pd (void *__P, __mmask8 __U, __m128d __A) { __builtin_ia32_storeupd128_mask ((__v2df *) __P, (__v2df) __A, (__mmask8) __U); } static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_storeu_pd (void *__P, __mmask8 __U, __m256d __A) { __builtin_ia32_storeupd256_mask ((__v4df *) __P, (__v4df) __A, (__mmask8) __U); } static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_storeu_ps (void *__P, __mmask8 __U, __m128 __A) { __builtin_ia32_storeups128_mask ((__v4sf *) __P, (__v4sf) __A, (__mmask8) __U); } static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_storeu_ps (void *__P, __mmask8 __U, __m256 __A) { __builtin_ia32_storeups256_mask ((__v8sf *) __P, (__v8sf) __A, (__mmask8) __U); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_unpackhi_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_unpackhi_pd(__A, __B), (__v2df)__W); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_unpackhi_pd(__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_unpackhi_pd(__A, __B), (__v2df)_mm_setzero_pd()); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_unpackhi_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_unpackhi_pd(__A, __B), (__v4df)__W); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_unpackhi_pd(__mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_unpackhi_pd(__A, __B), (__v4df)_mm256_setzero_pd()); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_unpackhi_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_unpackhi_ps(__A, __B), (__v4sf)__W); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_unpackhi_ps(__mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_unpackhi_ps(__A, __B), (__v4sf)_mm_setzero_ps()); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_unpackhi_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_unpackhi_ps(__A, __B), (__v8sf)__W); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_unpackhi_ps(__mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_unpackhi_ps(__A, __B), (__v8sf)_mm256_setzero_ps()); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_unpacklo_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_unpacklo_pd(__A, __B), (__v2df)__W); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_unpacklo_pd(__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_unpacklo_pd(__A, __B), (__v2df)_mm_setzero_pd()); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_unpacklo_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_unpacklo_pd(__A, __B), (__v4df)__W); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_unpacklo_pd(__mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_unpacklo_pd(__A, __B), (__v4df)_mm256_setzero_pd()); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_unpacklo_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_unpacklo_ps(__A, __B), (__v4sf)__W); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_unpacklo_ps(__mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_unpacklo_ps(__A, __B), (__v4sf)_mm_setzero_ps()); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_unpacklo_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_unpacklo_ps(__A, __B), (__v8sf)__W); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_unpacklo_ps(__mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_unpacklo_ps(__A, __B), (__v8sf)_mm256_setzero_ps()); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_rcp14_pd (__m128d __A) { return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A, (__v2df) _mm_setzero_pd (), (__mmask8) -1); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_rcp14_pd (__m128d __W, __mmask8 __U, __m128d __A) { return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A, (__v2df) __W, (__mmask8) __U); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_rcp14_pd (__mmask8 __U, __m128d __A) { return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A, (__v2df) _mm_setzero_pd (), (__mmask8) __U); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_rcp14_pd (__m256d __A) { return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A, (__v4df) _mm256_setzero_pd (), (__mmask8) -1); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_rcp14_pd (__m256d __W, __mmask8 __U, __m256d __A) { return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A, (__v4df) __W, (__mmask8) __U); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_rcp14_pd (__mmask8 __U, __m256d __A) { return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A, (__v4df) _mm256_setzero_pd (), (__mmask8) __U); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_rcp14_ps (__m128 __A) { return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A, (__v4sf) _mm_setzero_ps (), (__mmask8) -1); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_rcp14_ps (__m128 __W, __mmask8 __U, __m128 __A) { return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A, (__v4sf) __W, (__mmask8) __U); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_rcp14_ps (__mmask8 __U, __m128 __A) { return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A, (__v4sf) _mm_setzero_ps (), (__mmask8) __U); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_rcp14_ps (__m256 __A) { return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A, (__v8sf) _mm256_setzero_ps (), (__mmask8) -1); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_rcp14_ps (__m256 __W, __mmask8 __U, __m256 __A) { return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A, (__v8sf) __W, (__mmask8) __U); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_rcp14_ps (__mmask8 __U, __m256 __A) { return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A, (__v8sf) _mm256_setzero_ps (), (__mmask8) __U); } #define _mm_mask_permute_pd(W, U, X, C) \ ((__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ (__v2df)_mm_permute_pd((X), (C)), \ (__v2df)(__m128d)(W))) #define _mm_maskz_permute_pd(U, X, C) \ ((__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ (__v2df)_mm_permute_pd((X), (C)), \ (__v2df)_mm_setzero_pd())) #define _mm256_mask_permute_pd(W, U, X, C) \ ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ (__v4df)_mm256_permute_pd((X), (C)), \ (__v4df)(__m256d)(W))) #define _mm256_maskz_permute_pd(U, X, C) \ ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ (__v4df)_mm256_permute_pd((X), (C)), \ (__v4df)_mm256_setzero_pd())) #define _mm_mask_permute_ps(W, U, X, C) \ ((__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ (__v4sf)_mm_permute_ps((X), (C)), \ (__v4sf)(__m128)(W))) #define _mm_maskz_permute_ps(U, X, C) \ ((__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ (__v4sf)_mm_permute_ps((X), (C)), \ (__v4sf)_mm_setzero_ps())) #define _mm256_mask_permute_ps(W, U, X, C) \ ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ (__v8sf)_mm256_permute_ps((X), (C)), \ (__v8sf)(__m256)(W))) #define _mm256_maskz_permute_ps(U, X, C) \ ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ (__v8sf)_mm256_permute_ps((X), (C)), \ (__v8sf)_mm256_setzero_ps())) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_permutevar_pd(__A, __C), (__v2df)__W); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_permutevar_pd(__A, __C), (__v2df)_mm_setzero_pd()); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256i __C) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_permutevar_pd(__A, __C), (__v4df)__W); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_permutevar_pd(__A, __C), (__v4df)_mm256_setzero_pd()); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i __C) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_permutevar_ps(__A, __C), (__v4sf)__W); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_permutevar_ps(__A, __C), (__v4sf)_mm_setzero_ps()); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256i __C) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_permutevar_ps(__A, __C), (__v8sf)__W); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_permutevar_ps(__A, __C), (__v8sf)_mm256_setzero_ps()); } static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_test_epi32_mask (__m128i __A, __m128i __B) { return _mm_cmpneq_epi32_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128()); } static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_mask_test_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B) { return _mm_mask_cmpneq_epi32_mask (__U, _mm_and_si128 (__A, __B), _mm_setzero_si128()); } static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_test_epi32_mask (__m256i __A, __m256i __B) { return _mm256_cmpneq_epi32_mask (_mm256_and_si256 (__A, __B), _mm256_setzero_si256()); } static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_mask_test_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B) { return _mm256_mask_cmpneq_epi32_mask (__U, _mm256_and_si256 (__A, __B), _mm256_setzero_si256()); } static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_test_epi64_mask (__m128i __A, __m128i __B) { return _mm_cmpneq_epi64_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128()); } static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_mask_test_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B) { return _mm_mask_cmpneq_epi64_mask (__U, _mm_and_si128 (__A, __B), _mm_setzero_si128()); } static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_test_epi64_mask (__m256i __A, __m256i __B) { return _mm256_cmpneq_epi64_mask (_mm256_and_si256 (__A, __B), _mm256_setzero_si256()); } static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_mask_test_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B) { return _mm256_mask_cmpneq_epi64_mask (__U, _mm256_and_si256 (__A, __B), _mm256_setzero_si256()); } static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_testn_epi32_mask (__m128i __A, __m128i __B) { return _mm_cmpeq_epi32_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128()); } static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_mask_testn_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B) { return _mm_mask_cmpeq_epi32_mask (__U, _mm_and_si128 (__A, __B), _mm_setzero_si128()); } static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_testn_epi32_mask (__m256i __A, __m256i __B) { return _mm256_cmpeq_epi32_mask (_mm256_and_si256 (__A, __B), _mm256_setzero_si256()); } static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_mask_testn_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B) { return _mm256_mask_cmpeq_epi32_mask (__U, _mm256_and_si256 (__A, __B), _mm256_setzero_si256()); } static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_testn_epi64_mask (__m128i __A, __m128i __B) { return _mm_cmpeq_epi64_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128()); } static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_mask_testn_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B) { return _mm_mask_cmpeq_epi64_mask (__U, _mm_and_si128 (__A, __B), _mm_setzero_si128()); } static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_testn_epi64_mask (__m256i __A, __m256i __B) { return _mm256_cmpeq_epi64_mask (_mm256_and_si256 (__A, __B), _mm256_setzero_si256()); } static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_mask_testn_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B) { return _mm256_mask_cmpeq_epi64_mask (__U, _mm256_and_si256 (__A, __B), _mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_unpackhi_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_unpackhi_epi32(__A, __B), (__v4si)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_unpackhi_epi32(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_unpackhi_epi32(__A, __B), (__v4si)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_unpackhi_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_unpackhi_epi32(__A, __B), (__v8si)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_unpackhi_epi32(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_unpackhi_epi32(__A, __B), (__v8si)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_unpackhi_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_unpackhi_epi64(__A, __B), (__v2di)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_unpackhi_epi64(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_unpackhi_epi64(__A, __B), (__v2di)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_unpackhi_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_unpackhi_epi64(__A, __B), (__v4di)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_unpackhi_epi64(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_unpackhi_epi64(__A, __B), (__v4di)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_unpacklo_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_unpacklo_epi32(__A, __B), (__v4si)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_unpacklo_epi32(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_unpacklo_epi32(__A, __B), (__v4si)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_unpacklo_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_unpacklo_epi32(__A, __B), (__v8si)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_unpacklo_epi32(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_unpacklo_epi32(__A, __B), (__v8si)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_unpacklo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_unpacklo_epi64(__A, __B), (__v2di)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_unpacklo_epi64(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_unpacklo_epi64(__A, __B), (__v2di)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_unpacklo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_unpacklo_epi64(__A, __B), (__v4di)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_unpacklo_epi64(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_unpacklo_epi64(__A, __B), (__v4di)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sra_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_sra_epi32(__A, __B), (__v4si)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sra_epi32(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_sra_epi32(__A, __B), (__v4si)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sra_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_sra_epi32(__A, __B), (__v8si)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sra_epi32(__mmask8 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_sra_epi32(__A, __B), (__v8si)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srai_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_srai_epi32(__A, (int)__B), (__v4si)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srai_epi32(__mmask8 __U, __m128i __A, unsigned int __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_srai_epi32(__A, (int)__B), (__v4si)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srai_epi32(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_srai_epi32(__A, (int)__B), (__v8si)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srai_epi32(__mmask8 __U, __m256i __A, unsigned int __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_srai_epi32(__A, (int)__B), (__v8si)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_sra_epi64(__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_psraq128((__v2di)__A, (__v2di)__B); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sra_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \ (__v2di)_mm_sra_epi64(__A, __B), \ (__v2di)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sra_epi64(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \ (__v2di)_mm_sra_epi64(__A, __B), \ (__v2di)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sra_epi64(__m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_psraq256((__v4di) __A, (__v2di) __B); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sra_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \ (__v4di)_mm256_sra_epi64(__A, __B), \ (__v4di)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sra_epi64(__mmask8 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \ (__v4di)_mm256_sra_epi64(__A, __B), \ (__v4di)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_srai_epi64(__m128i __A, unsigned int __imm) { return (__m128i)__builtin_ia32_psraqi128((__v2di)__A, (int)__imm); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srai_epi64(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __imm) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \ (__v2di)_mm_srai_epi64(__A, __imm), \ (__v2di)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srai_epi64(__mmask8 __U, __m128i __A, unsigned int __imm) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \ (__v2di)_mm_srai_epi64(__A, __imm), \ (__v2di)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srai_epi64(__m256i __A, unsigned int __imm) { return (__m256i)__builtin_ia32_psraqi256((__v4di)__A, (int)__imm); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srai_epi64(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __imm) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \ (__v4di)_mm256_srai_epi64(__A, __imm), \ (__v4di)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srai_epi64(__mmask8 __U, __m256i __A, unsigned int __imm) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \ (__v4di)_mm256_srai_epi64(__A, __imm), \ (__v4di)_mm256_setzero_si256()); } #define _mm_ternarylogic_epi32(A, B, C, imm) \ ((__m128i)__builtin_ia32_pternlogd128_mask( \ (__v4si)(__m128i)(A), (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), \ (unsigned char)(imm), (__mmask8)-1)) #define _mm_mask_ternarylogic_epi32(A, U, B, C, imm) \ ((__m128i)__builtin_ia32_pternlogd128_mask( \ (__v4si)(__m128i)(A), (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), \ (unsigned char)(imm), (__mmask8)(U))) #define _mm_maskz_ternarylogic_epi32(U, A, B, C, imm) \ ((__m128i)__builtin_ia32_pternlogd128_maskz( \ (__v4si)(__m128i)(A), (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), \ (unsigned char)(imm), (__mmask8)(U))) #define _mm256_ternarylogic_epi32(A, B, C, imm) \ ((__m256i)__builtin_ia32_pternlogd256_mask( \ (__v8si)(__m256i)(A), (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), \ (unsigned char)(imm), (__mmask8)-1)) #define _mm256_mask_ternarylogic_epi32(A, U, B, C, imm) \ ((__m256i)__builtin_ia32_pternlogd256_mask( \ (__v8si)(__m256i)(A), (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), \ (unsigned char)(imm), (__mmask8)(U))) #define _mm256_maskz_ternarylogic_epi32(U, A, B, C, imm) \ ((__m256i)__builtin_ia32_pternlogd256_maskz( \ (__v8si)(__m256i)(A), (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), \ (unsigned char)(imm), (__mmask8)(U))) #define _mm_ternarylogic_epi64(A, B, C, imm) \ ((__m128i)__builtin_ia32_pternlogq128_mask( \ (__v2di)(__m128i)(A), (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), \ (unsigned char)(imm), (__mmask8)-1)) #define _mm_mask_ternarylogic_epi64(A, U, B, C, imm) \ ((__m128i)__builtin_ia32_pternlogq128_mask( \ (__v2di)(__m128i)(A), (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), \ (unsigned char)(imm), (__mmask8)(U))) #define _mm_maskz_ternarylogic_epi64(U, A, B, C, imm) \ ((__m128i)__builtin_ia32_pternlogq128_maskz( \ (__v2di)(__m128i)(A), (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), \ (unsigned char)(imm), (__mmask8)(U))) #define _mm256_ternarylogic_epi64(A, B, C, imm) \ ((__m256i)__builtin_ia32_pternlogq256_mask( \ (__v4di)(__m256i)(A), (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), \ (unsigned char)(imm), (__mmask8)-1)) #define _mm256_mask_ternarylogic_epi64(A, U, B, C, imm) \ ((__m256i)__builtin_ia32_pternlogq256_mask( \ (__v4di)(__m256i)(A), (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), \ (unsigned char)(imm), (__mmask8)(U))) #define _mm256_maskz_ternarylogic_epi64(U, A, B, C, imm) \ ((__m256i)__builtin_ia32_pternlogq256_maskz( \ (__v4di)(__m256i)(A), (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), \ (unsigned char)(imm), (__mmask8)(U))) #define _mm256_shuffle_f32x4(A, B, imm) \ ((__m256)__builtin_ia32_shuf_f32x4_256((__v8sf)(__m256)(A), \ (__v8sf)(__m256)(B), (int)(imm))) #define _mm256_mask_shuffle_f32x4(W, U, A, B, imm) \ ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ (__v8sf)_mm256_shuffle_f32x4((A), (B), (imm)), \ (__v8sf)(__m256)(W))) #define _mm256_maskz_shuffle_f32x4(U, A, B, imm) \ ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ (__v8sf)_mm256_shuffle_f32x4((A), (B), (imm)), \ (__v8sf)_mm256_setzero_ps())) #define _mm256_shuffle_f64x2(A, B, imm) \ ((__m256d)__builtin_ia32_shuf_f64x2_256((__v4df)(__m256d)(A), \ (__v4df)(__m256d)(B), (int)(imm))) #define _mm256_mask_shuffle_f64x2(W, U, A, B, imm) \ ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ (__v4df)_mm256_shuffle_f64x2((A), (B), (imm)), \ (__v4df)(__m256d)(W))) #define _mm256_maskz_shuffle_f64x2(U, A, B, imm) \ ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ (__v4df)_mm256_shuffle_f64x2((A), (B), (imm)), \ (__v4df)_mm256_setzero_pd())) #define _mm256_shuffle_i32x4(A, B, imm) \ ((__m256i)__builtin_ia32_shuf_i32x4_256((__v8si)(__m256i)(A), \ (__v8si)(__m256i)(B), (int)(imm))) #define _mm256_mask_shuffle_i32x4(W, U, A, B, imm) \ ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ (__v8si)_mm256_shuffle_i32x4((A), (B), (imm)), \ (__v8si)(__m256i)(W))) #define _mm256_maskz_shuffle_i32x4(U, A, B, imm) \ ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ (__v8si)_mm256_shuffle_i32x4((A), (B), (imm)), \ (__v8si)_mm256_setzero_si256())) #define _mm256_shuffle_i64x2(A, B, imm) \ ((__m256i)__builtin_ia32_shuf_i64x2_256((__v4di)(__m256i)(A), \ (__v4di)(__m256i)(B), (int)(imm))) #define _mm256_mask_shuffle_i64x2(W, U, A, B, imm) \ ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ (__v4di)_mm256_shuffle_i64x2((A), (B), (imm)), \ (__v4di)(__m256i)(W))) #define _mm256_maskz_shuffle_i64x2(U, A, B, imm) \ ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ (__v4di)_mm256_shuffle_i64x2((A), (B), (imm)), \ (__v4di)_mm256_setzero_si256())) #define _mm_mask_shuffle_pd(W, U, A, B, M) \ ((__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ (__v2df)_mm_shuffle_pd((A), (B), (M)), \ (__v2df)(__m128d)(W))) #define _mm_maskz_shuffle_pd(U, A, B, M) \ ((__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ (__v2df)_mm_shuffle_pd((A), (B), (M)), \ (__v2df)_mm_setzero_pd())) #define _mm256_mask_shuffle_pd(W, U, A, B, M) \ ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ (__v4df)_mm256_shuffle_pd((A), (B), (M)), \ (__v4df)(__m256d)(W))) #define _mm256_maskz_shuffle_pd(U, A, B, M) \ ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ (__v4df)_mm256_shuffle_pd((A), (B), (M)), \ (__v4df)_mm256_setzero_pd())) #define _mm_mask_shuffle_ps(W, U, A, B, M) \ ((__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ (__v4sf)_mm_shuffle_ps((A), (B), (M)), \ (__v4sf)(__m128)(W))) #define _mm_maskz_shuffle_ps(U, A, B, M) \ ((__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ (__v4sf)_mm_shuffle_ps((A), (B), (M)), \ (__v4sf)_mm_setzero_ps())) #define _mm256_mask_shuffle_ps(W, U, A, B, M) \ ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \ (__v8sf)(__m256)(W))) #define _mm256_maskz_shuffle_ps(U, A, B, M) \ ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \ (__v8sf)_mm256_setzero_ps())) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_rsqrt14_pd (__m128d __A) { return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A, (__v2df) _mm_setzero_pd (), (__mmask8) -1); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt14_pd (__m128d __W, __mmask8 __U, __m128d __A) { return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A, (__v2df) __W, (__mmask8) __U); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_rsqrt14_pd (__mmask8 __U, __m128d __A) { return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A, (__v2df) _mm_setzero_pd (), (__mmask8) __U); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_rsqrt14_pd (__m256d __A) { return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A, (__v4df) _mm256_setzero_pd (), (__mmask8) -1); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_rsqrt14_pd (__m256d __W, __mmask8 __U, __m256d __A) { return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A, (__v4df) __W, (__mmask8) __U); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_rsqrt14_pd (__mmask8 __U, __m256d __A) { return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A, (__v4df) _mm256_setzero_pd (), (__mmask8) __U); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_rsqrt14_ps (__m128 __A) { return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A, (__v4sf) _mm_setzero_ps (), (__mmask8) -1); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt14_ps (__m128 __W, __mmask8 __U, __m128 __A) { return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A, (__v4sf) __W, (__mmask8) __U); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_rsqrt14_ps (__mmask8 __U, __m128 __A) { return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A, (__v4sf) _mm_setzero_ps (), (__mmask8) __U); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_rsqrt14_ps (__m256 __A) { return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A, (__v8sf) _mm256_setzero_ps (), (__mmask8) -1); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_rsqrt14_ps (__m256 __W, __mmask8 __U, __m256 __A) { return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A, (__v8sf) __W, (__mmask8) __U); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_rsqrt14_ps (__mmask8 __U, __m256 __A) { return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A, (__v8sf) _mm256_setzero_ps (), (__mmask8) __U); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_broadcast_f32x4(__m128 __A) { return (__m256)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A, 0, 1, 2, 3, 0, 1, 2, 3); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_broadcast_f32x4(__m256 __O, __mmask8 __M, __m128 __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__M, (__v8sf)_mm256_broadcast_f32x4(__A), (__v8sf)__O); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcast_f32x4 (__mmask8 __M, __m128 __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__M, (__v8sf)_mm256_broadcast_f32x4(__A), (__v8sf)_mm256_setzero_ps()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_broadcast_i32x4(__m128i __A) { return (__m256i)__builtin_shufflevector((__v4si)__A, (__v4si)__A, 0, 1, 2, 3, 0, 1, 2, 3); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_broadcast_i32x4(__m256i __O, __mmask8 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, (__v8si)_mm256_broadcast_i32x4(__A), (__v8si)__O); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcast_i32x4(__mmask8 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, (__v8si)_mm256_broadcast_i32x4(__A), (__v8si)_mm256_setzero_si256()); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_broadcastsd_pd (__m256d __O, __mmask8 __M, __m128d __A) { return (__m256d)__builtin_ia32_selectpd_256(__M, (__v4df) _mm256_broadcastsd_pd(__A), (__v4df) __O); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A) { return (__m256d)__builtin_ia32_selectpd_256(__M, (__v4df) _mm256_broadcastsd_pd(__A), (__v4df) _mm256_setzero_pd()); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_broadcastss_ps (__m128 __O, __mmask8 __M, __m128 __A) { return (__m128)__builtin_ia32_selectps_128(__M, (__v4sf) _mm_broadcastss_ps(__A), (__v4sf) __O); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_broadcastss_ps (__mmask8 __M, __m128 __A) { return (__m128)__builtin_ia32_selectps_128(__M, (__v4sf) _mm_broadcastss_ps(__A), (__v4sf) _mm_setzero_ps()); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_broadcastss_ps (__m256 __O, __mmask8 __M, __m128 __A) { return (__m256)__builtin_ia32_selectps_256(__M, (__v8sf) _mm256_broadcastss_ps(__A), (__v8sf) __O); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcastss_ps (__mmask8 __M, __m128 __A) { return (__m256)__builtin_ia32_selectps_256(__M, (__v8sf) _mm256_broadcastss_ps(__A), (__v8sf) _mm256_setzero_ps()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_broadcastd_epi32 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128(__M, (__v4si) _mm_broadcastd_epi32(__A), (__v4si) __O); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128(__M, (__v4si) _mm_broadcastd_epi32(__A), (__v4si) _mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_broadcastd_epi32 (__m256i __O, __mmask8 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectd_256(__M, (__v8si) _mm256_broadcastd_epi32(__A), (__v8si) __O); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectd_256(__M, (__v8si) _mm256_broadcastd_epi32(__A), (__v8si) _mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_broadcastq_epi64 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i)__builtin_ia32_selectq_128(__M, (__v2di) _mm_broadcastq_epi64(__A), (__v2di) __O); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A) { return (__m128i)__builtin_ia32_selectq_128(__M, (__v2di) _mm_broadcastq_epi64(__A), (__v2di) _mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_broadcastq_epi64 (__m256i __O, __mmask8 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectq_256(__M, (__v4di) _mm256_broadcastq_epi64(__A), (__v4di) __O); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectq_256(__M, (__v4di) _mm256_broadcastq_epi64(__A), (__v4di) _mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsepi32_epi8 (__m128i __A) { return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A, (__v16qi)_mm_undefined_si128(), (__mmask8) -1); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A, (__v16qi) __O, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsepi32_epi8 (__mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A, (__v16qi) _mm_setzero_si128 (), __M); } static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) { __builtin_ia32_pmovsdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsepi32_epi8 (__m256i __A) { return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A, (__v16qi)_mm_undefined_si128(), (__mmask8) -1); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A, (__v16qi) __O, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtsepi32_epi8 (__mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A, (__v16qi) _mm_setzero_si128 (), __M); } static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) { __builtin_ia32_pmovsdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsepi32_epi16 (__m128i __A) { return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A, (__v8hi)_mm_setzero_si128 (), (__mmask8) -1); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A, (__v8hi)__O, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsepi32_epi16 (__mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A, (__v8hi) _mm_setzero_si128 (), __M); } static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) { __builtin_ia32_pmovsdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsepi32_epi16 (__m256i __A) { return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A, (__v8hi)_mm_undefined_si128(), (__mmask8) -1); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A, (__v8hi) __O, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtsepi32_epi16 (__mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A, (__v8hi) _mm_setzero_si128 (), __M); } static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) { __builtin_ia32_pmovsdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsepi64_epi8 (__m128i __A) { return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A, (__v16qi)_mm_undefined_si128(), (__mmask8) -1); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A, (__v16qi) __O, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsepi64_epi8 (__mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A, (__v16qi) _mm_setzero_si128 (), __M); } static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) { __builtin_ia32_pmovsqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsepi64_epi8 (__m256i __A) { return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A, (__v16qi)_mm_undefined_si128(), (__mmask8) -1); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A, (__v16qi) __O, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtsepi64_epi8 (__mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A, (__v16qi) _mm_setzero_si128 (), __M); } static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) { __builtin_ia32_pmovsqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsepi64_epi32 (__m128i __A) { return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A, (__v4si)_mm_undefined_si128(), (__mmask8) -1); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A, (__v4si) __O, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsepi64_epi32 (__mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A, (__v4si) _mm_setzero_si128 (), __M); } static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A) { __builtin_ia32_pmovsqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsepi64_epi32 (__m256i __A) { return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A, (__v4si)_mm_undefined_si128(), (__mmask8) -1); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A, (__v4si)__O, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtsepi64_epi32 (__mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A, (__v4si) _mm_setzero_si128 (), __M); } static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A) { __builtin_ia32_pmovsqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsepi64_epi16 (__m128i __A) { return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A, (__v8hi)_mm_undefined_si128(), (__mmask8) -1); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A, (__v8hi) __O, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsepi64_epi16 (__mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A, (__v8hi) _mm_setzero_si128 (), __M); } static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) { __builtin_ia32_pmovsqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsepi64_epi16 (__m256i __A) { return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A, (__v8hi)_mm_undefined_si128(), (__mmask8) -1); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A, (__v8hi) __O, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtsepi64_epi16 (__mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A, (__v8hi) _mm_setzero_si128 (), __M); } static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) { __builtin_ia32_pmovsqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtusepi32_epi8 (__m128i __A) { return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A, (__v16qi)_mm_undefined_si128(), (__mmask8) -1); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A, (__v16qi) __O, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtusepi32_epi8 (__mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A, (__v16qi) _mm_setzero_si128 (), __M); } static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) { __builtin_ia32_pmovusdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtusepi32_epi8 (__m256i __A) { return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A, (__v16qi)_mm_undefined_si128(), (__mmask8) -1); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A, (__v16qi) __O, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtusepi32_epi8 (__mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A, (__v16qi) _mm_setzero_si128 (), __M); } static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) { __builtin_ia32_pmovusdb256mem_mask ((__v16qi*) __P, (__v8si) __A, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtusepi32_epi16 (__m128i __A) { return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A, (__v8hi)_mm_undefined_si128(), (__mmask8) -1); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A, (__v8hi) __O, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtusepi32_epi16 (__mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A, (__v8hi) _mm_setzero_si128 (), __M); } static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) { __builtin_ia32_pmovusdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtusepi32_epi16 (__m256i __A) { return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A, (__v8hi) _mm_undefined_si128(), (__mmask8) -1); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A, (__v8hi) __O, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtusepi32_epi16 (__mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A, (__v8hi) _mm_setzero_si128 (), __M); } static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) { __builtin_ia32_pmovusdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtusepi64_epi8 (__m128i __A) { return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A, (__v16qi)_mm_undefined_si128(), (__mmask8) -1); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A, (__v16qi) __O, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtusepi64_epi8 (__mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A, (__v16qi) _mm_setzero_si128 (), __M); } static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) { __builtin_ia32_pmovusqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtusepi64_epi8 (__m256i __A) { return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A, (__v16qi)_mm_undefined_si128(), (__mmask8) -1); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A, (__v16qi) __O, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtusepi64_epi8 (__mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A, (__v16qi) _mm_setzero_si128 (), __M); } static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) { __builtin_ia32_pmovusqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtusepi64_epi32 (__m128i __A) { return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A, (__v4si)_mm_undefined_si128(), (__mmask8) -1); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A, (__v4si) __O, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtusepi64_epi32 (__mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A, (__v4si) _mm_setzero_si128 (), __M); } static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A) { __builtin_ia32_pmovusqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtusepi64_epi32 (__m256i __A) { return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A, (__v4si)_mm_undefined_si128(), (__mmask8) -1); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A, (__v4si) __O, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtusepi64_epi32 (__mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A, (__v4si) _mm_setzero_si128 (), __M); } static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A) { __builtin_ia32_pmovusqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtusepi64_epi16 (__m128i __A) { return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A, (__v8hi)_mm_undefined_si128(), (__mmask8) -1); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A, (__v8hi) __O, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtusepi64_epi16 (__mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A, (__v8hi) _mm_setzero_si128 (), __M); } static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) { __builtin_ia32_pmovusqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtusepi64_epi16 (__m256i __A) { return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A, (__v8hi)_mm_undefined_si128(), (__mmask8) -1); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A, (__v8hi) __O, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtusepi64_epi16 (__mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A, (__v8hi) _mm_setzero_si128 (), __M); } static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) { __builtin_ia32_pmovusqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtepi32_epi8 (__m128i __A) { return (__m128i)__builtin_shufflevector( __builtin_convertvector((__v4si)__A, __v4qi), (__v4qi){0, 0, 0, 0}, 0, 1, 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A, (__v16qi) __O, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi32_epi8 (__mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A, (__v16qi) _mm_setzero_si128 (), __M); } static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) { __builtin_ia32_pmovdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtepi32_epi8 (__m256i __A) { return (__m128i)__builtin_shufflevector( __builtin_convertvector((__v8si)__A, __v8qi), (__v8qi){0, 0, 0, 0, 0, 0, 0, 0}, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A, (__v16qi) __O, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi32_epi8 (__mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A, (__v16qi) _mm_setzero_si128 (), __M); } static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) { __builtin_ia32_pmovdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtepi32_epi16 (__m128i __A) { return (__m128i)__builtin_shufflevector( __builtin_convertvector((__v4si)__A, __v4hi), (__v4hi){0, 0, 0, 0}, 0, 1, 2, 3, 4, 5, 6, 7); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A, (__v8hi) __O, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi32_epi16 (__mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A, (__v8hi) _mm_setzero_si128 (), __M); } static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) { __builtin_ia32_pmovdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtepi32_epi16 (__m256i __A) { return (__m128i)__builtin_convertvector((__v8si)__A, __v8hi); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A, (__v8hi) __O, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi32_epi16 (__mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A, (__v8hi) _mm_setzero_si128 (), __M); } static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) { __builtin_ia32_pmovdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtepi64_epi8 (__m128i __A) { return (__m128i)__builtin_shufflevector( __builtin_convertvector((__v2di)__A, __v2qi), (__v2qi){0, 0}, 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A, (__v16qi) __O, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi64_epi8 (__mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A, (__v16qi) _mm_setzero_si128 (), __M); } static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) { __builtin_ia32_pmovqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtepi64_epi8 (__m256i __A) { return (__m128i)__builtin_shufflevector( __builtin_convertvector((__v4di)__A, __v4qi), (__v4qi){0, 0, 0, 0}, 0, 1, 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A, (__v16qi) __O, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi64_epi8 (__mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A, (__v16qi) _mm_setzero_si128 (), __M); } static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) { __builtin_ia32_pmovqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtepi64_epi32 (__m128i __A) { return (__m128i)__builtin_shufflevector( __builtin_convertvector((__v2di)__A, __v2si), (__v2si){0, 0}, 0, 1, 2, 3); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A, (__v4si) __O, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi64_epi32 (__mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A, (__v4si) _mm_setzero_si128 (), __M); } static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A) { __builtin_ia32_pmovqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtepi64_epi32 (__m256i __A) { return (__m128i)__builtin_convertvector((__v4di)__A, __v4si); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, (__v4si)_mm256_cvtepi64_epi32(__A), (__v4si)__O); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi64_epi32 (__mmask8 __M, __m256i __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, (__v4si)_mm256_cvtepi64_epi32(__A), (__v4si)_mm_setzero_si128()); } static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A) { __builtin_ia32_pmovqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtepi64_epi16 (__m128i __A) { return (__m128i)__builtin_shufflevector( __builtin_convertvector((__v2di)__A, __v2hi), (__v2hi){0, 0}, 0, 1, 2, 3, 3, 3, 3, 3); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A, (__v8hi)__O, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi64_epi16 (__mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A, (__v8hi) _mm_setzero_si128 (), __M); } static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) { __builtin_ia32_pmovqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtepi64_epi16 (__m256i __A) { return (__m128i)__builtin_shufflevector( __builtin_convertvector((__v4di)__A, __v4hi), (__v4hi){0, 0, 0, 0}, 0, 1, 2, 3, 4, 5, 6, 7); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A, (__v8hi) __O, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi64_epi16 (__mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A, (__v8hi) _mm_setzero_si128 (), __M); } static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) { __builtin_ia32_pmovqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M); } #define _mm256_extractf32x4_ps(A, imm) \ ((__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \ (int)(imm), \ (__v4sf)_mm_undefined_ps(), \ (__mmask8)-1)) #define _mm256_mask_extractf32x4_ps(W, U, A, imm) \ ((__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \ (int)(imm), \ (__v4sf)(__m128)(W), \ (__mmask8)(U))) #define _mm256_maskz_extractf32x4_ps(U, A, imm) \ ((__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \ (int)(imm), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U))) #define _mm256_extracti32x4_epi32(A, imm) \ ((__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \ (int)(imm), \ (__v4si)_mm_undefined_si128(), \ (__mmask8)-1)) #define _mm256_mask_extracti32x4_epi32(W, U, A, imm) \ ((__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \ (int)(imm), \ (__v4si)(__m128i)(W), \ (__mmask8)(U))) #define _mm256_maskz_extracti32x4_epi32(U, A, imm) \ ((__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \ (int)(imm), \ (__v4si)_mm_setzero_si128(), \ (__mmask8)(U))) #define _mm256_insertf32x4(A, B, imm) \ ((__m256)__builtin_ia32_insertf32x4_256((__v8sf)(__m256)(A), \ (__v4sf)(__m128)(B), (int)(imm))) #define _mm256_mask_insertf32x4(W, U, A, B, imm) \ ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \ (__v8sf)(__m256)(W))) #define _mm256_maskz_insertf32x4(U, A, B, imm) \ ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \ (__v8sf)_mm256_setzero_ps())) #define _mm256_inserti32x4(A, B, imm) \ ((__m256i)__builtin_ia32_inserti32x4_256((__v8si)(__m256i)(A), \ (__v4si)(__m128i)(B), (int)(imm))) #define _mm256_mask_inserti32x4(W, U, A, B, imm) \ ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ (__v8si)_mm256_inserti32x4((A), (B), (imm)), \ (__v8si)(__m256i)(W))) #define _mm256_maskz_inserti32x4(U, A, B, imm) \ ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ (__v8si)_mm256_inserti32x4((A), (B), (imm)), \ (__v8si)_mm256_setzero_si256())) #define _mm_getmant_pd(A, B, C) \ ((__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \ (int)(((C)<<2) | (B)), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)-1)) #define _mm_mask_getmant_pd(W, U, A, B, C) \ ((__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \ (int)(((C)<<2) | (B)), \ (__v2df)(__m128d)(W), \ (__mmask8)(U))) #define _mm_maskz_getmant_pd(U, A, B, C) \ ((__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \ (int)(((C)<<2) | (B)), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)(U))) #define _mm256_getmant_pd(A, B, C) \ ((__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \ (int)(((C)<<2) | (B)), \ (__v4df)_mm256_setzero_pd(), \ (__mmask8)-1)) #define _mm256_mask_getmant_pd(W, U, A, B, C) \ ((__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \ (int)(((C)<<2) | (B)), \ (__v4df)(__m256d)(W), \ (__mmask8)(U))) #define _mm256_maskz_getmant_pd(U, A, B, C) \ ((__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \ (int)(((C)<<2) | (B)), \ (__v4df)_mm256_setzero_pd(), \ (__mmask8)(U))) #define _mm_getmant_ps(A, B, C) \ ((__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \ (int)(((C)<<2) | (B)), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)-1)) #define _mm_mask_getmant_ps(W, U, A, B, C) \ ((__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \ (int)(((C)<<2) | (B)), \ (__v4sf)(__m128)(W), \ (__mmask8)(U))) #define _mm_maskz_getmant_ps(U, A, B, C) \ ((__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \ (int)(((C)<<2) | (B)), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U))) #define _mm256_getmant_ps(A, B, C) \ ((__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \ (int)(((C)<<2) | (B)), \ (__v8sf)_mm256_setzero_ps(), \ (__mmask8)-1)) #define _mm256_mask_getmant_ps(W, U, A, B, C) \ ((__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \ (int)(((C)<<2) | (B)), \ (__v8sf)(__m256)(W), \ (__mmask8)(U))) #define _mm256_maskz_getmant_ps(U, A, B, C) \ ((__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \ (int)(((C)<<2) | (B)), \ (__v8sf)_mm256_setzero_ps(), \ (__mmask8)(U))) #define _mm_mmask_i64gather_pd(v1_old, mask, index, addr, scale) \ ((__m128d)__builtin_ia32_gather3div2df((__v2df)(__m128d)(v1_old), \ (void const *)(addr), \ (__v2di)(__m128i)(index), \ (__mmask8)(mask), (int)(scale))) #define _mm_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) \ ((__m128i)__builtin_ia32_gather3div2di((__v2di)(__m128i)(v1_old), \ (void const *)(addr), \ (__v2di)(__m128i)(index), \ (__mmask8)(mask), (int)(scale))) #define _mm256_mmask_i64gather_pd(v1_old, mask, index, addr, scale) \ ((__m256d)__builtin_ia32_gather3div4df((__v4df)(__m256d)(v1_old), \ (void const *)(addr), \ (__v4di)(__m256i)(index), \ (__mmask8)(mask), (int)(scale))) #define _mm256_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) \ ((__m256i)__builtin_ia32_gather3div4di((__v4di)(__m256i)(v1_old), \ (void const *)(addr), \ (__v4di)(__m256i)(index), \ (__mmask8)(mask), (int)(scale))) #define _mm_mmask_i64gather_ps(v1_old, mask, index, addr, scale) \ ((__m128)__builtin_ia32_gather3div4sf((__v4sf)(__m128)(v1_old), \ (void const *)(addr), \ (__v2di)(__m128i)(index), \ (__mmask8)(mask), (int)(scale))) #define _mm_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) \ ((__m128i)__builtin_ia32_gather3div4si((__v4si)(__m128i)(v1_old), \ (void const *)(addr), \ (__v2di)(__m128i)(index), \ (__mmask8)(mask), (int)(scale))) #define _mm256_mmask_i64gather_ps(v1_old, mask, index, addr, scale) \ ((__m128)__builtin_ia32_gather3div8sf((__v4sf)(__m128)(v1_old), \ (void const *)(addr), \ (__v4di)(__m256i)(index), \ (__mmask8)(mask), (int)(scale))) #define _mm256_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) \ ((__m128i)__builtin_ia32_gather3div8si((__v4si)(__m128i)(v1_old), \ (void const *)(addr), \ (__v4di)(__m256i)(index), \ (__mmask8)(mask), (int)(scale))) #define _mm_mmask_i32gather_pd(v1_old, mask, index, addr, scale) \ ((__m128d)__builtin_ia32_gather3siv2df((__v2df)(__m128d)(v1_old), \ (void const *)(addr), \ (__v4si)(__m128i)(index), \ (__mmask8)(mask), (int)(scale))) #define _mm_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) \ ((__m128i)__builtin_ia32_gather3siv2di((__v2di)(__m128i)(v1_old), \ (void const *)(addr), \ (__v4si)(__m128i)(index), \ (__mmask8)(mask), (int)(scale))) #define _mm256_mmask_i32gather_pd(v1_old, mask, index, addr, scale) \ ((__m256d)__builtin_ia32_gather3siv4df((__v4df)(__m256d)(v1_old), \ (void const *)(addr), \ (__v4si)(__m128i)(index), \ (__mmask8)(mask), (int)(scale))) #define _mm256_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) \ ((__m256i)__builtin_ia32_gather3siv4di((__v4di)(__m256i)(v1_old), \ (void const *)(addr), \ (__v4si)(__m128i)(index), \ (__mmask8)(mask), (int)(scale))) #define _mm_mmask_i32gather_ps(v1_old, mask, index, addr, scale) \ ((__m128)__builtin_ia32_gather3siv4sf((__v4sf)(__m128)(v1_old), \ (void const *)(addr), \ (__v4si)(__m128i)(index), \ (__mmask8)(mask), (int)(scale))) #define _mm_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) \ ((__m128i)__builtin_ia32_gather3siv4si((__v4si)(__m128i)(v1_old), \ (void const *)(addr), \ (__v4si)(__m128i)(index), \ (__mmask8)(mask), (int)(scale))) #define _mm256_mmask_i32gather_ps(v1_old, mask, index, addr, scale) \ ((__m256)__builtin_ia32_gather3siv8sf((__v8sf)(__m256)(v1_old), \ (void const *)(addr), \ (__v8si)(__m256i)(index), \ (__mmask8)(mask), (int)(scale))) #define _mm256_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) \ ((__m256i)__builtin_ia32_gather3siv8si((__v8si)(__m256i)(v1_old), \ (void const *)(addr), \ (__v8si)(__m256i)(index), \ (__mmask8)(mask), (int)(scale))) #define _mm256_permutex_pd(X, C) \ ((__m256d)__builtin_ia32_permdf256((__v4df)(__m256d)(X), (int)(C))) #define _mm256_mask_permutex_pd(W, U, X, C) \ ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ (__v4df)_mm256_permutex_pd((X), (C)), \ (__v4df)(__m256d)(W))) #define _mm256_maskz_permutex_pd(U, X, C) \ ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ (__v4df)_mm256_permutex_pd((X), (C)), \ (__v4df)_mm256_setzero_pd())) #define _mm256_permutex_epi64(X, C) \ ((__m256i)__builtin_ia32_permdi256((__v4di)(__m256i)(X), (int)(C))) #define _mm256_mask_permutex_epi64(W, U, X, C) \ ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ (__v4di)_mm256_permutex_epi64((X), (C)), \ (__v4di)(__m256i)(W))) #define _mm256_maskz_permutex_epi64(U, X, C) \ ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ (__v4di)_mm256_permutex_epi64((X), (C)), \ (__v4di)_mm256_setzero_si256())) static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_permutexvar_pd (__m256i __X, __m256d __Y) { return (__m256d)__builtin_ia32_permvardf256((__v4df)__Y, (__v4di)__X); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_permutexvar_pd (__m256d __W, __mmask8 __U, __m256i __X, __m256d __Y) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_permutexvar_pd(__X, __Y), (__v4df)__W); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_permutexvar_pd (__mmask8 __U, __m256i __X, __m256d __Y) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_permutexvar_pd(__X, __Y), (__v4df)_mm256_setzero_pd()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_permutexvar_epi64 ( __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_permvardi256((__v4di) __Y, (__v4di) __X); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_permutexvar_epi64 (__mmask8 __M, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, (__v4di)_mm256_permutexvar_epi64(__X, __Y), (__v4di)_mm256_setzero_si256()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_permutexvar_epi64 (__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, (__v4di)_mm256_permutexvar_epi64(__X, __Y), (__v4di)__W); } #define _mm256_permutexvar_ps(A, B) _mm256_permutevar8x32_ps((B), (A)) static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_permutexvar_ps(__m256 __W, __mmask8 __U, __m256i __X, __m256 __Y) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_permutexvar_ps(__X, __Y), (__v8sf)__W); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_permutexvar_ps(__mmask8 __U, __m256i __X, __m256 __Y) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_permutexvar_ps(__X, __Y), (__v8sf)_mm256_setzero_ps()); } #define _mm256_permutexvar_epi32(A, B) _mm256_permutevar8x32_epi32((B), (A)) static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_permutexvar_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, (__v8si)_mm256_permutexvar_epi32(__X, __Y), (__v8si)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_permutexvar_epi32(__mmask8 __M, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, (__v8si)_mm256_permutexvar_epi32(__X, __Y), (__v8si)_mm256_setzero_si256()); } #define _mm_alignr_epi32(A, B, imm) \ ((__m128i)__builtin_ia32_alignd128((__v4si)(__m128i)(A), \ (__v4si)(__m128i)(B), (int)(imm))) #define _mm_mask_alignr_epi32(W, U, A, B, imm) \ ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ (__v4si)_mm_alignr_epi32((A), (B), (imm)), \ (__v4si)(__m128i)(W))) #define _mm_maskz_alignr_epi32(U, A, B, imm) \ ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ (__v4si)_mm_alignr_epi32((A), (B), (imm)), \ (__v4si)_mm_setzero_si128())) #define _mm256_alignr_epi32(A, B, imm) \ ((__m256i)__builtin_ia32_alignd256((__v8si)(__m256i)(A), \ (__v8si)(__m256i)(B), (int)(imm))) #define _mm256_mask_alignr_epi32(W, U, A, B, imm) \ ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ (__v8si)_mm256_alignr_epi32((A), (B), (imm)), \ (__v8si)(__m256i)(W))) #define _mm256_maskz_alignr_epi32(U, A, B, imm) \ ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ (__v8si)_mm256_alignr_epi32((A), (B), (imm)), \ (__v8si)_mm256_setzero_si256())) #define _mm_alignr_epi64(A, B, imm) \ ((__m128i)__builtin_ia32_alignq128((__v2di)(__m128i)(A), \ (__v2di)(__m128i)(B), (int)(imm))) #define _mm_mask_alignr_epi64(W, U, A, B, imm) \ ((__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ (__v2di)_mm_alignr_epi64((A), (B), (imm)), \ (__v2di)(__m128i)(W))) #define _mm_maskz_alignr_epi64(U, A, B, imm) \ ((__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ (__v2di)_mm_alignr_epi64((A), (B), (imm)), \ (__v2di)_mm_setzero_si128())) #define _mm256_alignr_epi64(A, B, imm) \ ((__m256i)__builtin_ia32_alignq256((__v4di)(__m256i)(A), \ (__v4di)(__m256i)(B), (int)(imm))) #define _mm256_mask_alignr_epi64(W, U, A, B, imm) \ ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \ (__v4di)(__m256i)(W))) #define _mm256_maskz_alignr_epi64(U, A, B, imm) \ ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \ (__v4di)_mm256_setzero_si256())) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_movehdup_ps (__m128 __W, __mmask8 __U, __m128 __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_movehdup_ps(__A), (__v4sf)__W); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_movehdup_ps (__mmask8 __U, __m128 __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_movehdup_ps(__A), (__v4sf)_mm_setzero_ps()); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_movehdup_ps (__m256 __W, __mmask8 __U, __m256 __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_movehdup_ps(__A), (__v8sf)__W); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_movehdup_ps (__mmask8 __U, __m256 __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_movehdup_ps(__A), (__v8sf)_mm256_setzero_ps()); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_moveldup_ps (__m128 __W, __mmask8 __U, __m128 __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_moveldup_ps(__A), (__v4sf)__W); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_moveldup_ps (__mmask8 __U, __m128 __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_moveldup_ps(__A), (__v4sf)_mm_setzero_ps()); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_moveldup_ps (__m256 __W, __mmask8 __U, __m256 __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_moveldup_ps(__A), (__v8sf)__W); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_moveldup_ps(__A), (__v8sf)_mm256_setzero_ps()); } #define _mm256_mask_shuffle_epi32(W, U, A, I) \ ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ (__v8si)_mm256_shuffle_epi32((A), (I)), \ (__v8si)(__m256i)(W))) #define _mm256_maskz_shuffle_epi32(U, A, I) \ ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ (__v8si)_mm256_shuffle_epi32((A), (I)), \ (__v8si)_mm256_setzero_si256())) #define _mm_mask_shuffle_epi32(W, U, A, I) \ ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ (__v4si)_mm_shuffle_epi32((A), (I)), \ (__v4si)(__m128i)(W))) #define _mm_maskz_shuffle_epi32(U, A, I) \ ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ (__v4si)_mm_shuffle_epi32((A), (I)), \ (__v4si)_mm_setzero_si128())) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_mov_pd (__m128d __W, __mmask8 __U, __m128d __A) { return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U, (__v2df) __A, (__v2df) __W); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_mov_pd (__mmask8 __U, __m128d __A) { return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U, (__v2df) __A, (__v2df) _mm_setzero_pd ()); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_mov_pd (__m256d __W, __mmask8 __U, __m256d __A) { return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U, (__v4df) __A, (__v4df) __W); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_mov_pd (__mmask8 __U, __m256d __A) { return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U, (__v4df) __A, (__v4df) _mm256_setzero_pd ()); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_mov_ps (__m128 __W, __mmask8 __U, __m128 __A) { return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U, (__v4sf) __A, (__v4sf) __W); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_mov_ps (__mmask8 __U, __m128 __A) { return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U, (__v4sf) __A, (__v4sf) _mm_setzero_ps ()); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_mov_ps (__m256 __W, __mmask8 __U, __m256 __A) { return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U, (__v8sf) __A, (__v8sf) __W); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_mov_ps (__mmask8 __U, __m256 __A) { return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U, (__v8sf) __A, (__v8sf) _mm256_setzero_ps ()); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtph_ps (__m128 __W, __mmask8 __U, __m128i __A) { return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A, (__v4sf) __W, (__mmask8) __U); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtph_ps (__mmask8 __U, __m128i __A) { return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A, (__v4sf) _mm_setzero_ps (), (__mmask8) __U); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_cvtph_ps (__m256 __W, __mmask8 __U, __m128i __A) { return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A, (__v8sf) __W, (__mmask8) __U); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtph_ps (__mmask8 __U, __m128i __A) { return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A, (__v8sf) _mm256_setzero_ps (), (__mmask8) __U); } #define _mm_mask_cvt_roundps_ph(W, U, A, I) \ ((__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \ (__v8hi)(__m128i)(W), \ (__mmask8)(U))) #define _mm_maskz_cvt_roundps_ph(U, A, I) \ ((__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \ (__v8hi)_mm_setzero_si128(), \ (__mmask8)(U))) #define _mm_mask_cvtps_ph _mm_mask_cvt_roundps_ph #define _mm_maskz_cvtps_ph _mm_maskz_cvt_roundps_ph #define _mm256_mask_cvt_roundps_ph(W, U, A, I) \ ((__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \ (__v8hi)(__m128i)(W), \ (__mmask8)(U))) #define _mm256_maskz_cvt_roundps_ph(U, A, I) \ ((__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \ (__v8hi)_mm_setzero_si128(), \ (__mmask8)(U))) #define _mm256_mask_cvtps_ph _mm256_mask_cvt_roundps_ph #define _mm256_maskz_cvtps_ph _mm256_maskz_cvt_roundps_ph #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 #endif /* __AVX512VLINTRIN_H */ builtins.h/*===---- clwbintrin.h - CLWB intrinsic ------------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __CLWBINTRIN_H #define __CLWBINTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("clwb"))) /// Writes back to memory the cache line (if modified) that contains the /// linear address specified in \a __p from any level of the cache hierarchy in /// the cache coherence domain /// /// \headerfile /// /// This intrinsic corresponds to the CLWB instruction. /// /// \param __p /// A pointer to the memory location used to identify the cache line to be /// written back. static __inline__ void __DEFAULT_FN_ATTRS _mm_clwb(void const *__p) { __builtin_ia32_clwb(__p); } #undef __DEFAULT_FN_ATTRS #endif hresetintrin.homp-tools.h/*===--------------- sm4intrin.h - SM4 intrinsics -----------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif // __IMMINTRIN_H #ifndef __SM4INTRIN_H #define __SM4INTRIN_H /// This intrinsic performs four rounds of SM4 key expansion. The intrinsic /// operates on independent 128-bit lanes. The calculated results are /// stored in \a dst. /// \headerfile /// /// \code /// __m128i _mm_sm4key4_epi32(__m128i __A, __m128i __B) /// \endcode /// /// This intrinsic corresponds to the \c VSM4KEY4 instruction. /// /// \param __A /// A 128-bit vector of [4 x int]. /// \param __B /// A 128-bit vector of [4 x int]. /// \returns /// A 128-bit vector of [4 x int]. /// /// \code{.operation} /// DEFINE ROL32(dword, n) { /// count := n % 32 /// dest := (dword << count) | (dword >> (32-count)) /// RETURN dest /// } /// DEFINE SBOX_BYTE(dword, i) { /// RETURN sbox[dword.byte[i]] /// } /// DEFINE lower_t(dword) { /// tmp.byte[0] := SBOX_BYTE(dword, 0) /// tmp.byte[1] := SBOX_BYTE(dword, 1) /// tmp.byte[2] := SBOX_BYTE(dword, 2) /// tmp.byte[3] := SBOX_BYTE(dword, 3) /// RETURN tmp /// } /// DEFINE L_KEY(dword) { /// RETURN dword ^ ROL32(dword, 13) ^ ROL32(dword, 23) /// } /// DEFINE T_KEY(dword) { /// RETURN L_KEY(lower_t(dword)) /// } /// DEFINE F_KEY(X0, X1, X2, X3, round_key) { /// RETURN X0 ^ T_KEY(X1 ^ X2 ^ X3 ^ round_key) /// } /// FOR i:= 0 to 0 /// P[0] := __B.xmm[i].dword[0] /// P[1] := __B.xmm[i].dword[1] /// P[2] := __B.xmm[i].dword[2] /// P[3] := __B.xmm[i].dword[3] /// C[0] := F_KEY(P[0], P[1], P[2], P[3], __A.xmm[i].dword[0]) /// C[1] := F_KEY(P[1], P[2], P[3], C[0], __A.xmm[i].dword[1]) /// C[2] := F_KEY(P[2], P[3], C[0], C[1], __A.xmm[i].dword[2]) /// C[3] := F_KEY(P[3], C[0], C[1], C[2], __A.xmm[i].dword[3]) /// DEST.xmm[i].dword[0] := C[0] /// DEST.xmm[i].dword[1] := C[1] /// DEST.xmm[i].dword[2] := C[2] /// DEST.xmm[i].dword[3] := C[3] /// ENDFOR /// DEST[MAX:128] := 0 /// \endcode #define _mm_sm4key4_epi32(A, B) \ (__m128i) __builtin_ia32_vsm4key4128((__v4su)A, (__v4su)B) /// This intrinsic performs four rounds of SM4 key expansion. The intrinsic /// operates on independent 128-bit lanes. The calculated results are /// stored in \a dst. /// \headerfile /// /// \code /// __m256i _mm256_sm4key4_epi32(__m256i __A, __m256i __B) /// \endcode /// /// This intrinsic corresponds to the \c VSM4KEY4 instruction. /// /// \param __A /// A 256-bit vector of [8 x int]. /// \param __B /// A 256-bit vector of [8 x int]. /// \returns /// A 256-bit vector of [8 x int]. /// /// \code{.operation} /// DEFINE ROL32(dword, n) { /// count := n % 32 /// dest := (dword << count) | (dword >> (32-count)) /// RETURN dest /// } /// DEFINE SBOX_BYTE(dword, i) { /// RETURN sbox[dword.byte[i]] /// } /// DEFINE lower_t(dword) { /// tmp.byte[0] := SBOX_BYTE(dword, 0) /// tmp.byte[1] := SBOX_BYTE(dword, 1) /// tmp.byte[2] := SBOX_BYTE(dword, 2) /// tmp.byte[3] := SBOX_BYTE(dword, 3) /// RETURN tmp /// } /// DEFINE L_KEY(dword) { /// RETURN dword ^ ROL32(dword, 13) ^ ROL32(dword, 23) /// } /// DEFINE T_KEY(dword) { /// RETURN L_KEY(lower_t(dword)) /// } /// DEFINE F_KEY(X0, X1, X2, X3, round_key) { /// RETURN X0 ^ T_KEY(X1 ^ X2 ^ X3 ^ round_key) /// } /// FOR i:= 0 to 1 /// P[0] := __B.xmm[i].dword[0] /// P[1] := __B.xmm[i].dword[1] /// P[2] := __B.xmm[i].dword[2] /// P[3] := __B.xmm[i].dword[3] /// C[0] := F_KEY(P[0], P[1], P[2], P[3], __A.xmm[i].dword[0]) /// C[1] := F_KEY(P[1], P[2], P[3], C[0], __A.xmm[i].dword[1]) /// C[2] := F_KEY(P[2], P[3], C[0], C[1], __A.xmm[i].dword[2]) /// C[3] := F_KEY(P[3], C[0], C[1], C[2], __A.xmm[i].dword[3]) /// DEST.xmm[i].dword[0] := C[0] /// DEST.xmm[i].dword[1] := C[1] /// DEST.xmm[i].dword[2] := C[2] /// DEST.xmm[i].dword[3] := C[3] /// ENDFOR /// DEST[MAX:256] := 0 /// \endcode #define _mm256_sm4key4_epi32(A, B) \ (__m256i) __builtin_ia32_vsm4key4256((__v8su)A, (__v8su)B) /// This intrinisc performs four rounds of SM4 encryption. The intrinisc /// operates on independent 128-bit lanes. The calculated results are /// stored in \a dst. /// \headerfile /// /// \code /// __m128i _mm_sm4rnds4_epi32(__m128i __A, __m128i __B) /// \endcode /// /// This intrinsic corresponds to the \c VSM4RNDS4 instruction. /// /// \param __A /// A 128-bit vector of [4 x int]. /// \param __B /// A 128-bit vector of [4 x int]. /// \returns /// A 128-bit vector of [4 x int]. /// /// \code{.operation} /// DEFINE ROL32(dword, n) { /// count := n % 32 /// dest := (dword << count) | (dword >> (32-count)) /// RETURN dest /// } /// DEFINE lower_t(dword) { /// tmp.byte[0] := SBOX_BYTE(dword, 0) /// tmp.byte[1] := SBOX_BYTE(dword, 1) /// tmp.byte[2] := SBOX_BYTE(dword, 2) /// tmp.byte[3] := SBOX_BYTE(dword, 3) /// RETURN tmp /// } /// DEFINE L_RND(dword) { /// tmp := dword /// tmp := tmp ^ ROL32(dword, 2) /// tmp := tmp ^ ROL32(dword, 10) /// tmp := tmp ^ ROL32(dword, 18) /// tmp := tmp ^ ROL32(dword, 24) /// RETURN tmp /// } /// DEFINE T_RND(dword) { /// RETURN L_RND(lower_t(dword)) /// } /// DEFINE F_RND(X0, X1, X2, X3, round_key) { /// RETURN X0 ^ T_RND(X1 ^ X2 ^ X3 ^ round_key) /// } /// FOR i:= 0 to 0 /// P[0] := __B.xmm[i].dword[0] /// P[1] := __B.xmm[i].dword[1] /// P[2] := __B.xmm[i].dword[2] /// P[3] := __B.xmm[i].dword[3] /// C[0] := F_RND(P[0], P[1], P[2], P[3], __A.xmm[i].dword[0]) /// C[1] := F_RND(P[1], P[2], P[3], C[0], __A.xmm[i].dword[1]) /// C[2] := F_RND(P[2], P[3], C[0], C[1], __A.xmm[i].dword[2]) /// C[3] := F_RND(P[3], C[0], C[1], C[2], __A.xmm[i].dword[3]) /// DEST.xmm[i].dword[0] := C[0] /// DEST.xmm[i].dword[1] := C[1] /// DEST.xmm[i].dword[2] := C[2] /// DEST.xmm[i].dword[3] := C[3] /// ENDFOR /// DEST[MAX:128] := 0 /// \endcode #define _mm_sm4rnds4_epi32(A, B) \ (__m128i) __builtin_ia32_vsm4rnds4128((__v4su)A, (__v4su)B) /// This intrinisc performs four rounds of SM4 encryption. The intrinisc /// operates on independent 128-bit lanes. The calculated results are /// stored in \a dst. /// \headerfile /// /// \code /// __m256i _mm256_sm4rnds4_epi32(__m256i __A, __m256i __B) /// \endcode /// /// This intrinsic corresponds to the \c VSM4RNDS4 instruction. /// /// \param __A /// A 256-bit vector of [8 x int]. /// \param __B /// A 256-bit vector of [8 x int]. /// \returns /// A 256-bit vector of [8 x int]. /// /// \code{.operation} /// DEFINE ROL32(dword, n) { /// count := n % 32 /// dest := (dword << count) | (dword >> (32-count)) /// RETURN dest /// } /// DEFINE lower_t(dword) { /// tmp.byte[0] := SBOX_BYTE(dword, 0) /// tmp.byte[1] := SBOX_BYTE(dword, 1) /// tmp.byte[2] := SBOX_BYTE(dword, 2) /// tmp.byte[3] := SBOX_BYTE(dword, 3) /// RETURN tmp /// } /// DEFINE L_RND(dword) { /// tmp := dword /// tmp := tmp ^ ROL32(dword, 2) /// tmp := tmp ^ ROL32(dword, 10) /// tmp := tmp ^ ROL32(dword, 18) /// tmp := tmp ^ ROL32(dword, 24) /// RETURN tmp /// } /// DEFINE T_RND(dword) { /// RETURN L_RND(lower_t(dword)) /// } /// DEFINE F_RND(X0, X1, X2, X3, round_key) { /// RETURN X0 ^ T_RND(X1 ^ X2 ^ X3 ^ round_key) /// } /// FOR i:= 0 to 0 /// P[0] := __B.xmm[i].dword[0] /// P[1] := __B.xmm[i].dword[1] /// P[2] := __B.xmm[i].dword[2] /// P[3] := __B.xmm[i].dword[3] /// C[0] := F_RND(P[0], P[1], P[2], P[3], __A.xmm[i].dword[0]) /// C[1] := F_RND(P[1], P[2], P[3], C[0], __A.xmm[i].dword[1]) /// C[2] := F_RND(P[2], P[3], C[0], C[1], __A.xmm[i].dword[2]) /// C[3] := F_RND(P[3], C[0], C[1], C[2], __A.xmm[i].dword[3]) /// DEST.xmm[i].dword[0] := C[0] /// DEST.xmm[i].dword[1] := C[1] /// DEST.xmm[i].dword[2] := C[2] /// DEST.xmm[i].dword[3] := C[3] /// ENDFOR /// DEST[MAX:256] := 0 /// \endcode #define _mm256_sm4rnds4_epi32(A, B) \ (__m256i) __builtin_ia32_vsm4rnds4256((__v8su)A, (__v8su)B) #endif // __SM4INTRIN_H /*===------------------ uintrintrin.h - UINTR intrinsics -------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __X86GPRINTRIN_H #error "Never use directly; include instead." #endif #ifndef __UINTRINTRIN_H #define __UINTRINTRIN_H /* Define the default attributes for the functions in this file */ #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("uintr"))) #ifdef __x86_64__ struct __uintr_frame { unsigned long long rip; unsigned long long rflags; unsigned long long rsp; }; /// Clears the user interrupt flag (UIF). Its effect takes place immediately: a /// user interrupt cannot be delivered on the instruction boundary following /// CLUI. Can be executed only if CR4.UINT = 1, the logical processor is in /// 64-bit mode, and software is not executing inside an enclave; otherwise, /// each causes an invalid-opcode exception. Causes a transactional abort if /// executed inside a transactional region; the abort loads EAX as it would /// had it been due to an execution of CLI. /// /// \headerfile /// /// This intrinsic corresponds to the CLUI instruction. /// /// \code{.operation} /// UIF := 0 /// \endcode static __inline__ void __DEFAULT_FN_ATTRS _clui (void) { __builtin_ia32_clui(); } /// Sets the user interrupt flag (UIF). Its effect takes place immediately; a /// user interrupt may be delivered on the instruction boundary following /// STUI. Can be executed only if CR4.UINT = 1, the logical processor is in /// 64-bit mode, and software is not executing inside an enclave; otherwise, /// each causes an invalid-opcode exception. Causes a transactional abort if /// executed inside a transactional region; the abort loads EAX as it would /// had it been due to an execution of STI. /// /// \headerfile /// /// This intrinsic corresponds to the STUI instruction. /// /// \code{.operation} /// UIF := 1 /// \endcode static __inline__ void __DEFAULT_FN_ATTRS _stui (void) { __builtin_ia32_stui(); } /// Get the current value of the user interrupt flag (UIF). Can be executed /// regardless of CPL and inside a transactional region. Can be executed only /// if CR4.UINT = 1, the logical processor is in 64-bit mode, and software is /// not executing inside an enclave; otherwise, it causes an invalid-opcode /// exception. /// /// \headerfile /// /// This intrinsic corresponds to the TESTUI instruction. /// /// \returns The current value of the user interrupt flag (UIF). /// /// \code{.operation} /// CF := UIF /// ZF := 0 /// AF := 0 /// OF := 0 /// PF := 0 /// SF := 0 /// dst := CF /// \endcode static __inline__ unsigned char __DEFAULT_FN_ATTRS _testui (void) { return __builtin_ia32_testui(); } /// Send interprocessor user interrupt. Can be executed only if /// CR4.UINT = IA32_UINT_TT[0] = 1, the logical processor is in 64-bit mode, /// and software is not executing inside an enclave; otherwise, it causes an /// invalid-opcode exception. May be executed at any privilege level, all of /// its memory accesses are performed with supervisor privilege. /// /// \headerfile /// /// This intrinsic corresponds to the SENDUIPI instruction /// /// \param __a /// Index of user-interrupt target table entry in user-interrupt target /// table. /// /// \code{.operation} /// IF __a > UITTSZ /// GP (0) /// FI /// tempUITTE := MEM[UITTADDR + (a<<4)] /// // tempUITTE must be valid, and can't have any reserved bit set /// IF (tempUITTE.V == 0 OR tempUITTE[7:1] != 0) /// GP (0) /// FI /// tempUPID := MEM[tempUITTE.UPIDADDR] // under lock /// // tempUPID can't have any reserved bit set /// IF (tempUPID[15:2] != 0 OR tempUPID[31:24] != 0) /// GP (0) // release lock /// FI /// tempUPID.PIR[tempUITTE.UV] := 1; /// IF (tempUPID.SN == 0 AND tempUPID.ON == 0) /// tempUPID.ON := 1 /// sendNotify := 1 /// ELSE /// sendNotify := 0 /// FI /// MEM[tempUITTE.UPIDADDR] := tempUPID // release lock /// IF sendNotify == 1 /// IF IA32_APIC_BASE[10] == 1 // local APIC is in x2APIC mode /// // send ordinary IPI with vector tempUPID.NV to 32-bit physical APIC /// // ID tempUPID.NDST /// SendOrdinaryIPI(tempUPID.NV, tempUPID.NDST) /// ELSE /// // send ordinary IPI with vector tempUPID.NV to 8-bit physical APIC /// // ID tempUPID.NDST[15:8] /// SendOrdinaryIPI(tempUPID.NV, tempUPID.NDST[15:8]) /// FI /// FI /// \endcode static __inline__ void __DEFAULT_FN_ATTRS _senduipi (unsigned long long __a) { __builtin_ia32_senduipi(__a); } #endif /* __x86_64__ */ #undef __DEFAULT_FN_ATTRS #endif /* __UINTRINTRIN_H */ xsavecintrin.hppc_wrappers/x86intrin.h//===-- sanitizer/ubsan_interface.h -----------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file is a part of UBSanitizer (UBSan). // // Public interface header. //===----------------------------------------------------------------------===// #ifndef SANITIZER_UBSAN_INTERFACE_H #define SANITIZER_UBSAN_INTERFACE_H #ifdef __cplusplus extern "C" { #endif /// User-provided default option settings. /// /// You can provide your own implementation of this function to return a string /// containing UBSan runtime options (for example, /// verbosity=1:halt_on_error=0). /// /// \returns Default options string. const char *SANITIZER_CDECL __ubsan_default_options(void); #ifdef __cplusplus } // extern "C" #endif #endif // SANITIZER_UBSAN_INTERFACE_H Terminate parsing due to Handler error.[:^ascii:]BalineseDograLaoMnNkoSora_Sompengmmap() for alternate signal stack failed::Daautorm%s@ %*p __vdso_getcpusymbol out of rangesymbol && version_symbol exceeds 0xfftree->height() <= CordRepBtree::kMaxHeightMax height exceededdetected illegal recursion in Mutex codeTZexternal/boringssl/src/crypto/fipsmodule/bn/prime.cexternal/boringssl/src/crypto/fipsmodule/ec/simple.cEVP_AEAD_CTX_init for AES-128-GCM failed. Z-computation KAT failed. built on: n/aX.509 certificate routinesENGINEUI routinesHMAC routinesINTERNAL_ERRORkythe.proto.common.Language.analyzer_versionkythe.proto.CxxCompilationUnitDetails.HeaderSearchDir.path-M[FTQ]--no-warningsnot_freestandingPrevious record for /*===---- __clang_cuda_cmath.h - Device-side CUDA cmath support ------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __CLANG_CUDA_CMATH_H__ #define __CLANG_CUDA_CMATH_H__ #ifndef __CUDA__ #error "This file is for CUDA compilation only." #endif #ifndef __OPENMP_NVPTX__ #include #endif // CUDA lets us use various std math functions on the device side. This file // works in concert with __clang_cuda_math_forward_declares.h to make this work. // // Specifically, the forward-declares header declares __device__ overloads for // these functions in the global namespace, then pulls them into namespace std // with 'using' statements. Then this file implements those functions, after // their implementations have been pulled in. // // It's important that we declare the functions in the global namespace and pull // them into namespace std with using statements, as opposed to simply declaring // these functions in namespace std, because our device functions need to // overload the standard library functions, which may be declared in the global // namespace or in std, depending on the degree of conformance of the stdlib // implementation. Declaring in the global namespace and pulling into namespace // std covers all of the known knowns. #ifdef __OPENMP_NVPTX__ #define __DEVICE__ static constexpr __attribute__((always_inline, nothrow)) #else #define __DEVICE__ static __device__ __inline__ __attribute__((always_inline)) #endif __DEVICE__ long long abs(long long __n) { return ::llabs(__n); } __DEVICE__ long abs(long __n) { return ::labs(__n); } __DEVICE__ float abs(float __x) { return ::fabsf(__x); } __DEVICE__ double abs(double __x) { return ::fabs(__x); } __DEVICE__ float acos(float __x) { return ::acosf(__x); } __DEVICE__ float asin(float __x) { return ::asinf(__x); } __DEVICE__ float atan(float __x) { return ::atanf(__x); } __DEVICE__ float atan2(float __x, float __y) { return ::atan2f(__x, __y); } __DEVICE__ float ceil(float __x) { return ::ceilf(__x); } __DEVICE__ float cos(float __x) { return ::cosf(__x); } __DEVICE__ float cosh(float __x) { return ::coshf(__x); } __DEVICE__ float exp(float __x) { return ::expf(__x); } __DEVICE__ float fabs(float __x) { return ::fabsf(__x); } __DEVICE__ float floor(float __x) { return ::floorf(__x); } __DEVICE__ float fmod(float __x, float __y) { return ::fmodf(__x, __y); } __DEVICE__ int fpclassify(float __x) { return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, FP_ZERO, __x); } __DEVICE__ int fpclassify(double __x) { return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, FP_ZERO, __x); } __DEVICE__ float frexp(float __arg, int *__exp) { return ::frexpf(__arg, __exp); } // For inscrutable reasons, the CUDA headers define these functions for us on // Windows. #if !defined(_MSC_VER) || defined(__OPENMP_NVPTX__) // For OpenMP we work around some old system headers that have non-conforming // `isinf(float)` and `isnan(float)` implementations that return an `int`. We do // this by providing two versions of these functions, differing only in the // return type. To avoid conflicting definitions we disable implicit base // function generation. That means we will end up with two specializations, one // per type, but only one has a base function defined by the system header. #if defined(__OPENMP_NVPTX__) #pragma omp begin declare variant match( \ implementation = {extension(disable_implicit_base)}) // FIXME: We lack an extension to customize the mangling of the variants, e.g., // add a suffix. This means we would clash with the names of the variants // (note that we do not create implicit base functions here). To avoid // this clash we add a new trait to some of them that is always true // (this is LLVM after all ;)). It will only influence the mangled name // of the variants inside the inner region and avoid the clash. #pragma omp begin declare variant match(implementation = {vendor(llvm)}) __DEVICE__ int isinf(float __x) { return ::__isinff(__x); } __DEVICE__ int isinf(double __x) { return ::__isinf(__x); } __DEVICE__ int isfinite(float __x) { return ::__finitef(__x); } __DEVICE__ int isfinite(double __x) { return ::__isfinited(__x); } __DEVICE__ int isnan(float __x) { return ::__isnanf(__x); } __DEVICE__ int isnan(double __x) { return ::__isnan(__x); } #pragma omp end declare variant #endif __DEVICE__ bool isinf(float __x) { return ::__isinff(__x); } __DEVICE__ bool isinf(double __x) { return ::__isinf(__x); } __DEVICE__ bool isfinite(float __x) { return ::__finitef(__x); } // For inscrutable reasons, __finite(), the double-precision version of // __finitef, does not exist when compiling for MacOS. __isfinited is available // everywhere and is just as good. __DEVICE__ bool isfinite(double __x) { return ::__isfinited(__x); } __DEVICE__ bool isnan(float __x) { return ::__isnanf(__x); } __DEVICE__ bool isnan(double __x) { return ::__isnan(__x); } #if defined(__OPENMP_NVPTX__) #pragma omp end declare variant #endif #endif __DEVICE__ bool isgreater(float __x, float __y) { return __builtin_isgreater(__x, __y); } __DEVICE__ bool isgreater(double __x, double __y) { return __builtin_isgreater(__x, __y); } __DEVICE__ bool isgreaterequal(float __x, float __y) { return __builtin_isgreaterequal(__x, __y); } __DEVICE__ bool isgreaterequal(double __x, double __y) { return __builtin_isgreaterequal(__x, __y); } __DEVICE__ bool isless(float __x, float __y) { return __builtin_isless(__x, __y); } __DEVICE__ bool isless(double __x, double __y) { return __builtin_isless(__x, __y); } __DEVICE__ bool islessequal(float __x, float __y) { return __builtin_islessequal(__x, __y); } __DEVICE__ bool islessequal(double __x, double __y) { return __builtin_islessequal(__x, __y); } __DEVICE__ bool islessgreater(float __x, float __y) { return __builtin_islessgreater(__x, __y); } __DEVICE__ bool islessgreater(double __x, double __y) { return __builtin_islessgreater(__x, __y); } __DEVICE__ bool isnormal(float __x) { return __builtin_isnormal(__x); } __DEVICE__ bool isnormal(double __x) { return __builtin_isnormal(__x); } __DEVICE__ bool isunordered(float __x, float __y) { return __builtin_isunordered(__x, __y); } __DEVICE__ bool isunordered(double __x, double __y) { return __builtin_isunordered(__x, __y); } __DEVICE__ float ldexp(float __arg, int __exp) { return ::ldexpf(__arg, __exp); } __DEVICE__ float log(float __x) { return ::logf(__x); } __DEVICE__ float log10(float __x) { return ::log10f(__x); } __DEVICE__ float modf(float __x, float *__iptr) { return ::modff(__x, __iptr); } __DEVICE__ float pow(float __base, float __exp) { return ::powf(__base, __exp); } __DEVICE__ float pow(float __base, int __iexp) { return ::powif(__base, __iexp); } __DEVICE__ double pow(double __base, int __iexp) { return ::powi(__base, __iexp); } __DEVICE__ bool signbit(float __x) { return ::__signbitf(__x); } __DEVICE__ bool signbit(double __x) { return ::__signbitd(__x); } __DEVICE__ float sin(float __x) { return ::sinf(__x); } __DEVICE__ float sinh(float __x) { return ::sinhf(__x); } __DEVICE__ float sqrt(float __x) { return ::sqrtf(__x); } __DEVICE__ float tan(float __x) { return ::tanf(__x); } __DEVICE__ float tanh(float __x) { return ::tanhf(__x); } // There was a redefinition error for this this overload in CUDA mode. // We restrict it to OpenMP mode for now, that is where it is actually needed // anyway. #ifdef __OPENMP_NVPTX__ __DEVICE__ float remquo(float __n, float __d, int *__q) { return ::remquof(__n, __d, __q); } #endif // Notably missing above is nexttoward. We omit it because // libdevice doesn't provide an implementation, and we don't want to be in the // business of implementing tricky libm functions in this header. #ifndef __OPENMP_NVPTX__ // Now we've defined everything we promised we'd define in // __clang_cuda_math_forward_declares.h. We need to do two additional things to // fix up our math functions. // // 1) Define __device__ overloads for e.g. sin(int). The CUDA headers define // only sin(float) and sin(double), which means that e.g. sin(0) is // ambiguous. // // 2) Pull the __device__ overloads of "foobarf" math functions into namespace // std. These are defined in the CUDA headers in the global namespace, // independent of everything else we've done here. // We can't use std::enable_if, because we want to be pre-C++11 compatible. But // we go ahead and unconditionally define functions that are only available when // compiling for C++11 to match the behavior of the CUDA headers. template struct __clang_cuda_enable_if {}; template struct __clang_cuda_enable_if { typedef __T type; }; // Defines an overload of __fn that accepts one integral argument, calls // __fn((double)x), and returns __retty. #define __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(__retty, __fn) \ template \ __DEVICE__ \ typename __clang_cuda_enable_if::is_integer, \ __retty>::type \ __fn(__T __x) { \ return ::__fn((double)__x); \ } // Defines an overload of __fn that accepts one two arithmetic arguments, calls // __fn((double)x, (double)y), and returns a double. // // Note this is different from OVERLOAD_1, which generates an overload that // accepts only *integral* arguments. #define __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(__retty, __fn) \ template \ __DEVICE__ typename __clang_cuda_enable_if< \ std::numeric_limits<__T1>::is_specialized && \ std::numeric_limits<__T2>::is_specialized, \ __retty>::type \ __fn(__T1 __x, __T2 __y) { \ return __fn((double)__x, (double)__y); \ } __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, acos) __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, acosh) __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, asin) __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, asinh) __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, atan) __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, atan2); __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, atanh) __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cbrt) __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, ceil) __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, copysign); __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cos) __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cosh) __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, erf) __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, erfc) __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, exp) __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, exp2) __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, expm1) __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, fabs) __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fdim); __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, floor) __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmax); __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmin); __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmod); __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(int, fpclassify) __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, hypot); __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(int, ilogb) __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isfinite) __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isgreater); __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isgreaterequal); __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isinf); __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isless); __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, islessequal); __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, islessgreater); __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isnan); __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isnormal) __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isunordered); __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, lgamma) __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log) __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log10) __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log1p) __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log2) __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, logb) __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long long, llrint) __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long long, llround) __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long, lrint) __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long, lround) __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, nearbyint); __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, nextafter); __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, pow); __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, remainder); __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, rint); __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, round); __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, signbit) __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sin) __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sinh) __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sqrt) __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tan) __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tanh) __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tgamma) __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, trunc); #undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_1 #undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_2 // Overloads for functions that don't match the patterns expected by // __CUDA_CLANG_FN_INTEGER_OVERLOAD_{1,2}. template __DEVICE__ typename __clang_cuda_enable_if< std::numeric_limits<__T1>::is_specialized && std::numeric_limits<__T2>::is_specialized && std::numeric_limits<__T3>::is_specialized, double>::type fma(__T1 __x, __T2 __y, __T3 __z) { return std::fma((double)__x, (double)__y, (double)__z); } template __DEVICE__ typename __clang_cuda_enable_if::is_integer, double>::type frexp(__T __x, int *__exp) { return std::frexp((double)__x, __exp); } template __DEVICE__ typename __clang_cuda_enable_if::is_integer, double>::type ldexp(__T __x, int __exp) { return std::ldexp((double)__x, __exp); } template __DEVICE__ typename __clang_cuda_enable_if< std::numeric_limits<__T1>::is_specialized && std::numeric_limits<__T2>::is_specialized, double>::type remquo(__T1 __x, __T2 __y, int *__quo) { return std::remquo((double)__x, (double)__y, __quo); } template __DEVICE__ typename __clang_cuda_enable_if::is_integer, double>::type scalbln(__T __x, long __exp) { return std::scalbln((double)__x, __exp); } template __DEVICE__ typename __clang_cuda_enable_if::is_integer, double>::type scalbn(__T __x, int __exp) { return std::scalbn((double)__x, __exp); } // We need to define these overloads in exactly the namespace our standard // library uses (including the right inline namespace), otherwise they won't be // picked up by other functions in the standard library (e.g. functions in // ). Thus the ugliness below. #ifdef _LIBCPP_BEGIN_NAMESPACE_STD _LIBCPP_BEGIN_NAMESPACE_STD #else namespace std { #ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION _GLIBCXX_BEGIN_NAMESPACE_VERSION #endif #endif // Pull the new overloads we defined above into namespace std. using ::acos; using ::acosh; using ::asin; using ::asinh; using ::atan; using ::atan2; using ::atanh; using ::cbrt; using ::ceil; using ::copysign; using ::cos; using ::cosh; using ::erf; using ::erfc; using ::exp; using ::exp2; using ::expm1; using ::fabs; using ::fdim; using ::floor; using ::fma; using ::fmax; using ::fmin; using ::fmod; using ::fpclassify; using ::frexp; using ::hypot; using ::ilogb; using ::isfinite; using ::isgreater; using ::isgreaterequal; using ::isless; using ::islessequal; using ::islessgreater; using ::isnormal; using ::isunordered; using ::ldexp; using ::lgamma; using ::llrint; using ::llround; using ::log; using ::log10; using ::log1p; using ::log2; using ::logb; using ::lrint; using ::lround; using ::nearbyint; using ::nextafter; using ::pow; using ::remainder; using ::remquo; using ::rint; using ::round; using ::scalbln; using ::scalbn; using ::signbit; using ::sin; using ::sinh; using ::sqrt; using ::tan; using ::tanh; using ::tgamma; using ::trunc; // Well this is fun: We need to pull these symbols in for libc++, but we can't // pull them in with libstdc++, because its ::isinf and ::isnan are different // than its std::isinf and std::isnan. #ifndef __GLIBCXX__ using ::isinf; using ::isnan; #endif // Finally, pull the "foobarf" functions that CUDA defines in its headers into // namespace std. using ::acosf; using ::acoshf; using ::asinf; using ::asinhf; using ::atan2f; using ::atanf; using ::atanhf; using ::cbrtf; using ::ceilf; using ::copysignf; using ::cosf; using ::coshf; using ::erfcf; using ::erff; using ::exp2f; using ::expf; using ::expm1f; using ::fabsf; using ::fdimf; using ::floorf; using ::fmaf; using ::fmaxf; using ::fminf; using ::fmodf; using ::frexpf; using ::hypotf; using ::ilogbf; using ::ldexpf; using ::lgammaf; using ::llrintf; using ::llroundf; using ::log10f; using ::log1pf; using ::log2f; using ::logbf; using ::logf; using ::lrintf; using ::lroundf; using ::modff; using ::nearbyintf; using ::nextafterf; using ::powf; using ::remainderf; using ::remquof; using ::rintf; using ::roundf; using ::scalblnf; using ::scalbnf; using ::sinf; using ::sinhf; using ::sqrtf; using ::tanf; using ::tanhf; using ::tgammaf; using ::truncf; #ifdef _LIBCPP_END_NAMESPACE_STD _LIBCPP_END_NAMESPACE_STD #else #ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION _GLIBCXX_END_NAMESPACE_VERSION #endif } // namespace std #endif #endif // __OPENMP_NVPTX__ #undef __DEVICE__ #endif __clang_hip_cmath.h__stdarg___va_copy.h/*===---- __stddef_rsize_t.h - Definition of rsize_t -----------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ /* * When -fbuiltin-headers-in-system-modules is set this is a non-modular header * and needs to behave as if it was textual. */ #if !defined(_RSIZE_T) || \ (__has_feature(modules) && !__building_module(_Builtin_stddef)) #define _RSIZE_T typedef __SIZE_TYPE__ rsize_t; #endif __stddef_size_t.havx512vlbf16intrin.h/*===---- avx512vpopcntdqintrin.h - AVX512VPOPCNTDQ intrinsics -------------=== * * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error \ "Never use directly; include instead." #endif #ifndef __AVX512VPOPCNTDQVLINTRIN_H #define __AVX512VPOPCNTDQVLINTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS128 \ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512vpopcntdq,avx512vl,no-evex512"), \ __min_vector_width__(128))) #define __DEFAULT_FN_ATTRS256 \ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512vpopcntdq,avx512vl,no-evex512"), \ __min_vector_width__(256))) static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_popcnt_epi64(__m128i __A) { return (__m128i)__builtin_ia32_vpopcntq_128((__v2di)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_popcnt_epi64(__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectq_128( (__mmask8)__U, (__v2di)_mm_popcnt_epi64(__A), (__v2di)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_popcnt_epi64(__mmask8 __U, __m128i __A) { return _mm_mask_popcnt_epi64((__m128i)_mm_setzero_si128(), __U, __A); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_popcnt_epi32(__m128i __A) { return (__m128i)__builtin_ia32_vpopcntd_128((__v4si)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_popcnt_epi32(__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128( (__mmask8)__U, (__v4si)_mm_popcnt_epi32(__A), (__v4si)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_popcnt_epi32(__mmask8 __U, __m128i __A) { return _mm_mask_popcnt_epi32((__m128i)_mm_setzero_si128(), __U, __A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_popcnt_epi64(__m256i __A) { return (__m256i)__builtin_ia32_vpopcntq_256((__v4di)__A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_popcnt_epi64(__m256i __W, __mmask8 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectq_256( (__mmask8)__U, (__v4di)_mm256_popcnt_epi64(__A), (__v4di)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_popcnt_epi64(__mmask8 __U, __m256i __A) { return _mm256_mask_popcnt_epi64((__m256i)_mm256_setzero_si256(), __U, __A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_popcnt_epi32(__m256i __A) { return (__m256i)__builtin_ia32_vpopcntd_256((__v8si)__A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_popcnt_epi32(__m256i __W, __mmask8 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectd_256( (__mmask8)__U, (__v8si)_mm256_popcnt_epi32(__A), (__v8si)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_popcnt_epi32(__mmask8 __U, __m256i __A) { return _mm256_mask_popcnt_epi32((__m256i)_mm256_setzero_si256(), __U, __A); } #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 #endif limits.hmm3dnow.hunwind.h/*===------------------ vaesintrin.h - VAES intrinsics ---------------------=== * * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __VAESINTRIN_H #define __VAESINTRIN_H /* Default attributes for YMM forms. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("vaes"), __min_vector_width__(256))) /* Default attributes for ZMM forms. */ #define __DEFAULT_FN_ATTRS_F \ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512f,evex512,vaes"), \ __min_vector_width__(512))) static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_aesenc_epi128(__m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_aesenc256((__v4di) __A, (__v4di) __B); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_aesdec_epi128(__m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_aesdec256((__v4di) __A, (__v4di) __B); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_aesenclast_epi128(__m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_aesenclast256((__v4di) __A, (__v4di) __B); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_aesdeclast_epi128(__m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_aesdeclast256((__v4di) __A, (__v4di) __B); } #ifdef __AVX512FINTRIN_H static __inline__ __m512i __DEFAULT_FN_ATTRS_F _mm512_aesenc_epi128(__m512i __A, __m512i __B) { return (__m512i) __builtin_ia32_aesenc512((__v8di) __A, (__v8di) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS_F _mm512_aesdec_epi128(__m512i __A, __m512i __B) { return (__m512i) __builtin_ia32_aesdec512((__v8di) __A, (__v8di) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS_F _mm512_aesenclast_epi128(__m512i __A, __m512i __B) { return (__m512i) __builtin_ia32_aesenclast512((__v8di) __A, (__v8di) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS_F _mm512_aesdeclast_epi128(__m512i __A, __m512i __B) { return (__m512i) __builtin_ia32_aesdeclast512((__v8di) __A, (__v8di) __B); } #endif // __AVX512FINTRIN_H #undef __DEFAULT_FN_ATTRS #undef __DEFAULT_FN_ATTRS_F #endif // __VAESINTRIN_H /*===---- xsavecintrin.h - XSAVEC intrinsic --------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __XSAVECINTRIN_H #define __XSAVECINTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("xsavec"))) /// Performs a full or partial save of processor state to the memory at /// \a __p. The exact state saved depends on the 64-bit mask \a __m and /// processor control register \c XCR0. /// /// \code{.operation} /// mask[62:0] := __m[62:0] AND XCR0[62:0] /// FOR i := 0 TO 62 /// IF mask[i] == 1 /// CASE (i) OF /// 0: save X87 FPU state /// 1: save SSE state /// DEFAULT: __p.Ext_Save_Area[i] := ProcessorState[i] /// FI /// ENDFOR /// __p.Header.XSTATE_BV[62:0] := INIT_FUNCTION(mask[62:0]) /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c XSAVEC instruction. /// /// \param __p /// Pointer to the save area; must be 64-byte aligned. /// \param __m /// A 64-bit mask indicating what state should be saved. static __inline__ void __DEFAULT_FN_ATTRS _xsavec(void *__p, unsigned long long __m) { __builtin_ia32_xsavec(__p, __m); } #ifdef __x86_64__ /// Performs a full or partial save of processor state to the memory at /// \a __p. The exact state saved depends on the 64-bit mask \a __m and /// processor control register \c XCR0. /// /// \code{.operation} /// mask[62:0] := __m[62:0] AND XCR0[62:0] /// FOR i := 0 TO 62 /// IF mask[i] == 1 /// CASE (i) OF /// 0: save X87 FPU state /// 1: save SSE state /// DEFAULT: __p.Ext_Save_Area[i] := ProcessorState[i] /// FI /// ENDFOR /// __p.Header.XSTATE_BV[62:0] := INIT_FUNCTION(mask[62:0]) /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c XSAVEC64 instruction. /// /// \param __p /// Pointer to the save area; must be 64-byte aligned. /// \param __m /// A 64-bit mask indicating what state should be saved. static __inline__ void __DEFAULT_FN_ATTRS _xsavec64(void *__p, unsigned long long __m) { __builtin_ia32_xsavec64(__p, __m); } #endif #undef __DEFAULT_FN_ATTRS #endif //===-- Wrapper for C standard stdio.h declarations on the GPU ------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #if !defined(_OPENMP) && !defined(__HIP__) && !defined(__CUDA__) #error "This file is for GPU offloading compilation only" #endif #include_next // In some old versions of glibc, other standard headers sometimes define // special macros (e.g., __need_FILE) before including stdio.h to cause stdio.h // to produce special definitions. Future includes of stdio.h when those // special macros are undefined are expected to produce the normal definitions // from stdio.h. // // We do not apply our include guard (__CLANG_LLVM_LIBC_WRAPPERS_STDIO_H__) // unconditionally to the above include_next. Otherwise, after an occurrence of // the first glibc stdio.h use case described above, the include_next would be // skipped for remaining includes of stdio.h, leaving required symbols // undefined. // // We make the following assumptions to handle all use cases: // // 1. If the above include_next produces special glibc definitions, then (a) it // does not produce the normal definitions that we must intercept below, (b) // the current file was included from a glibc header that already defined // __GLIBC__ (usually by including glibc's ), and (c) the above // include_next does not define _STDIO_H. In that case, we skip the rest of // the current file and don't guard against future includes. // 2. If the above include_next produces the normal stdio.h definitions, then // either (a) __GLIBC__ is not defined because C headers are from some other // libc implementation or (b) the above include_next defines _STDIO_H to // prevent the above include_next from having any effect in the future. #if !defined(__GLIBC__) || defined(_STDIO_H) #ifndef __CLANG_LLVM_LIBC_WRAPPERS_STDIO_H__ #define __CLANG_LLVM_LIBC_WRAPPERS_STDIO_H__ #if __has_include() #if defined(__HIP__) || defined(__CUDA__) #define __LIBC_ATTRS __attribute__((device)) #endif // Some headers provide these as macros. Temporarily undefine them so they do // not conflict with any definitions for the GPU. #pragma push_macro("stdout") #pragma push_macro("stdin") #pragma push_macro("stderr") #undef stdout #undef stderr #undef stdin #pragma omp begin declare target #include #pragma omp end declare target #undef __LIBC_ATTRS // Restore the original macros when compiling on the host. #if !defined(__NVPTX__) && !defined(__AMDGPU__) #pragma pop_macro("stdout") #pragma pop_macro("stderr") #pragma pop_macro("stdin") #endif #endif #endif // __CLANG_LLVM_LIBC_WRAPPERS_STDIO_H__ #endif ppc_wrappers/immintrin.h/*===---- smmintrin.h - Implementation of SSE4 intrinsics on PowerPC -------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ /* Implemented from the specification included in the Intel C++ Compiler User Guide and Reference, version 9.0. NOTE: This is NOT a complete implementation of the SSE4 intrinsics! */ #ifndef NO_WARN_X86_INTRINSICS /* This header is distributed to simplify porting x86_64 code that makes explicit use of Intel intrinsics to powerpc64/powerpc64le. It is the user's responsibility to determine if the results are acceptable and make additional changes as necessary. Note that much code that uses Intel intrinsics can be rewritten in standard C or GNU C extensions, which are more portable and better optimized across multiple targets. */ #error \ "Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this error." #endif #ifndef SMMINTRIN_H_ #define SMMINTRIN_H_ #if defined(__powerpc64__) && \ (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) #include #include /* Rounding mode macros. */ #define _MM_FROUND_TO_NEAREST_INT 0x00 #define _MM_FROUND_TO_ZERO 0x01 #define _MM_FROUND_TO_POS_INF 0x02 #define _MM_FROUND_TO_NEG_INF 0x03 #define _MM_FROUND_CUR_DIRECTION 0x04 #define _MM_FROUND_NINT (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_RAISE_EXC) #define _MM_FROUND_FLOOR (_MM_FROUND_TO_NEG_INF | _MM_FROUND_RAISE_EXC) #define _MM_FROUND_CEIL (_MM_FROUND_TO_POS_INF | _MM_FROUND_RAISE_EXC) #define _MM_FROUND_TRUNC (_MM_FROUND_TO_ZERO | _MM_FROUND_RAISE_EXC) #define _MM_FROUND_RINT (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_RAISE_EXC) #define _MM_FROUND_NEARBYINT (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_NO_EXC) #define _MM_FROUND_RAISE_EXC 0x00 #define _MM_FROUND_NO_EXC 0x08 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_round_pd(__m128d __A, int __rounding) { __v2df __r; union { double __fr; long long __fpscr; } __enables_save, __fpscr_save; if (__rounding & _MM_FROUND_NO_EXC) { /* Save enabled exceptions, disable all exceptions, and preserve the rounding mode. */ #ifdef _ARCH_PWR9 __asm__("mffsce %0" : "=f"(__fpscr_save.__fr)); __enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8; #else __fpscr_save.__fr = __builtin_ppc_mffs(); __enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8; __fpscr_save.__fpscr &= ~0xf8; __builtin_ppc_mtfsf(0b00000011, __fpscr_save.__fr); #endif /* Insert an artificial "read/write" reference to the variable read below, to ensure the compiler does not schedule a read/use of the variable before the FPSCR is modified, above. This can be removed if and when GCC PR102783 is fixed. */ __asm__("" : "+wa"(__A)); } switch (__rounding) { case _MM_FROUND_TO_NEAREST_INT: #ifdef _ARCH_PWR9 __fpscr_save.__fr = __builtin_ppc_mffsl(); #else __fpscr_save.__fr = __builtin_ppc_mffs(); __fpscr_save.__fpscr &= 0x70007f0ffL; #endif __attribute__((fallthrough)); case _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC: __builtin_ppc_set_fpscr_rn(0b00); /* Insert an artificial "read/write" reference to the variable read below, to ensure the compiler does not schedule a read/use of the variable before the FPSCR is modified, above. This can be removed if and when GCC PR102783 is fixed. */ __asm__("" : "+wa"(__A)); __r = vec_rint((__v2df)__A); /* Insert an artificial "read" reference to the variable written above, to ensure the compiler does not schedule the computation of the value after the manipulation of the FPSCR, below. This can be removed if and when GCC PR102783 is fixed. */ __asm__("" : : "wa"(__r)); __builtin_ppc_set_fpscr_rn(__fpscr_save.__fpscr); break; case _MM_FROUND_TO_NEG_INF: case _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC: __r = vec_floor((__v2df)__A); break; case _MM_FROUND_TO_POS_INF: case _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC: __r = vec_ceil((__v2df)__A); break; case _MM_FROUND_TO_ZERO: case _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC: __r = vec_trunc((__v2df)__A); break; case _MM_FROUND_CUR_DIRECTION: __r = vec_rint((__v2df)__A); break; } if (__rounding & _MM_FROUND_NO_EXC) { /* Insert an artificial "read" reference to the variable written above, to ensure the compiler does not schedule the computation of the value after the manipulation of the FPSCR, below. This can be removed if and when GCC PR102783 is fixed. */ __asm__("" : : "wa"(__r)); /* Restore enabled exceptions. */ #ifdef _ARCH_PWR9 __fpscr_save.__fr = __builtin_ppc_mffsl(); #else __fpscr_save.__fr = __builtin_ppc_mffs(); __fpscr_save.__fpscr &= 0x70007f0ffL; #endif __fpscr_save.__fpscr |= __enables_save.__fpscr; __builtin_ppc_mtfsf(0b00000011, __fpscr_save.__fr); } return (__m128d)__r; } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_round_sd(__m128d __A, __m128d __B, int __rounding) { __B = _mm_round_pd(__B, __rounding); __v2df __r = {((__v2df)__B)[0], ((__v2df)__A)[1]}; return (__m128d)__r; } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_round_ps(__m128 __A, int __rounding) { __v4sf __r; union { double __fr; long long __fpscr; } __enables_save, __fpscr_save; if (__rounding & _MM_FROUND_NO_EXC) { /* Save enabled exceptions, disable all exceptions, and preserve the rounding mode. */ #ifdef _ARCH_PWR9 __asm__("mffsce %0" : "=f"(__fpscr_save.__fr)); __enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8; #else __fpscr_save.__fr = __builtin_ppc_mffs(); __enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8; __fpscr_save.__fpscr &= ~0xf8; __builtin_ppc_mtfsf(0b00000011, __fpscr_save.__fr); #endif /* Insert an artificial "read/write" reference to the variable read below, to ensure the compiler does not schedule a read/use of the variable before the FPSCR is modified, above. This can be removed if and when GCC PR102783 is fixed. */ __asm__("" : "+wa"(__A)); } switch (__rounding) { case _MM_FROUND_TO_NEAREST_INT: #ifdef _ARCH_PWR9 __fpscr_save.__fr = __builtin_ppc_mffsl(); #else __fpscr_save.__fr = __builtin_ppc_mffs(); __fpscr_save.__fpscr &= 0x70007f0ffL; #endif __attribute__((fallthrough)); case _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC: __builtin_ppc_set_fpscr_rn(0b00); /* Insert an artificial "read/write" reference to the variable read below, to ensure the compiler does not schedule a read/use of the variable before the FPSCR is modified, above. This can be removed if and when GCC PR102783 is fixed. */ __asm__("" : "+wa"(__A)); __r = vec_rint((__v4sf)__A); /* Insert an artificial "read" reference to the variable written above, to ensure the compiler does not schedule the computation of the value after the manipulation of the FPSCR, below. This can be removed if and when GCC PR102783 is fixed. */ __asm__("" : : "wa"(__r)); __builtin_ppc_set_fpscr_rn(__fpscr_save.__fpscr); break; case _MM_FROUND_TO_NEG_INF: case _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC: __r = vec_floor((__v4sf)__A); break; case _MM_FROUND_TO_POS_INF: case _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC: __r = vec_ceil((__v4sf)__A); break; case _MM_FROUND_TO_ZERO: case _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC: __r = vec_trunc((__v4sf)__A); break; case _MM_FROUND_CUR_DIRECTION: __r = vec_rint((__v4sf)__A); break; } if (__rounding & _MM_FROUND_NO_EXC) { /* Insert an artificial "read" reference to the variable written above, to ensure the compiler does not schedule the computation of the value after the manipulation of the FPSCR, below. This can be removed if and when GCC PR102783 is fixed. */ __asm__("" : : "wa"(__r)); /* Restore enabled exceptions. */ #ifdef _ARCH_PWR9 __fpscr_save.__fr = __builtin_ppc_mffsl(); #else __fpscr_save.__fr = __builtin_ppc_mffs(); __fpscr_save.__fpscr &= 0x70007f0ffL; #endif __fpscr_save.__fpscr |= __enables_save.__fpscr; __builtin_ppc_mtfsf(0b00000011, __fpscr_save.__fr); } return (__m128)__r; } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_round_ss(__m128 __A, __m128 __B, int __rounding) { __B = _mm_round_ps(__B, __rounding); __v4sf __r = (__v4sf)__A; __r[0] = ((__v4sf)__B)[0]; return (__m128)__r; } #define _mm_ceil_pd(V) _mm_round_pd((V), _MM_FROUND_CEIL) #define _mm_ceil_sd(D, V) _mm_round_sd((D), (V), _MM_FROUND_CEIL) #define _mm_floor_pd(V) _mm_round_pd((V), _MM_FROUND_FLOOR) #define _mm_floor_sd(D, V) _mm_round_sd((D), (V), _MM_FROUND_FLOOR) #define _mm_ceil_ps(V) _mm_round_ps((V), _MM_FROUND_CEIL) #define _mm_ceil_ss(D, V) _mm_round_ss((D), (V), _MM_FROUND_CEIL) #define _mm_floor_ps(V) _mm_round_ps((V), _MM_FROUND_FLOOR) #define _mm_floor_ss(D, V) _mm_round_ss((D), (V), _MM_FROUND_FLOOR) extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_insert_epi8(__m128i const __A, int const __D, int const __N) { __v16qi __result = (__v16qi)__A; __result[__N & 0xf] = __D; return (__m128i)__result; } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_insert_epi32(__m128i const __A, int const __D, int const __N) { __v4si __result = (__v4si)__A; __result[__N & 3] = __D; return (__m128i)__result; } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_insert_epi64(__m128i const __A, long long const __D, int const __N) { __v2di __result = (__v2di)__A; __result[__N & 1] = __D; return (__m128i)__result; } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_extract_epi8(__m128i __X, const int __N) { return (unsigned char)((__v16qi)__X)[__N & 15]; } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_extract_epi32(__m128i __X, const int __N) { return ((__v4si)__X)[__N & 3]; } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_extract_epi64(__m128i __X, const int __N) { return ((__v2di)__X)[__N & 1]; } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_extract_ps(__m128 __X, const int __N) { return ((__v4si)__X)[__N & 3]; } #ifdef _ARCH_PWR8 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_blend_epi16(__m128i __A, __m128i __B, const int __imm8) { __v16qu __charmask = vec_splats((unsigned char)__imm8); __charmask = vec_gb(__charmask); __v8hu __shortmask = (__v8hu)vec_unpackh((__v16qi)__charmask); #ifdef __BIG_ENDIAN__ __shortmask = vec_reve(__shortmask); #endif return (__m128i)vec_sel((__v8hu)__A, (__v8hu)__B, __shortmask); } #endif extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_blendv_epi8(__m128i __A, __m128i __B, __m128i __mask) { #ifdef _ARCH_PWR10 return (__m128i)vec_blendv((__v16qi)__A, (__v16qi)__B, (__v16qu)__mask); #else const __v16qu __seven = vec_splats((unsigned char)0x07); __v16qu __lmask = vec_sra((__v16qu)__mask, __seven); return (__m128i)vec_sel((__v16qi)__A, (__v16qi)__B, __lmask); #endif } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_blend_ps(__m128 __A, __m128 __B, const int __imm8) { __v16qu __pcv[] = { {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, {16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, {0, 1, 2, 3, 20, 21, 22, 23, 8, 9, 10, 11, 12, 13, 14, 15}, {16, 17, 18, 19, 20, 21, 22, 23, 8, 9, 10, 11, 12, 13, 14, 15}, {0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 12, 13, 14, 15}, {16, 17, 18, 19, 4, 5, 6, 7, 24, 25, 26, 27, 12, 13, 14, 15}, {0, 1, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 12, 13, 14, 15}, {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 12, 13, 14, 15}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 28, 29, 30, 31}, {16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 28, 29, 30, 31}, {0, 1, 2, 3, 20, 21, 22, 23, 8, 9, 10, 11, 28, 29, 30, 31}, {16, 17, 18, 19, 20, 21, 22, 23, 8, 9, 10, 11, 28, 29, 30, 31}, {0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31}, {16, 17, 18, 19, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31}, {0, 1, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}, {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}, }; __v16qu __r = vec_perm((__v16qu)__A, (__v16qu)__B, __pcv[__imm8]); return (__m128)__r; } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_blendv_ps(__m128 __A, __m128 __B, __m128 __mask) { #ifdef _ARCH_PWR10 return (__m128)vec_blendv((__v4sf)__A, (__v4sf)__B, (__v4su)__mask); #else const __v4si __zero = {0}; const __vector __bool int __boolmask = vec_cmplt((__v4si)__mask, __zero); return (__m128)vec_sel((__v4su)__A, (__v4su)__B, (__v4su)__boolmask); #endif } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_blend_pd(__m128d __A, __m128d __B, const int __imm8) { __v16qu __pcv[] = { {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, {16, 17, 18, 19, 20, 21, 22, 23, 8, 9, 10, 11, 12, 13, 14, 15}, {0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31}, {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}}; __v16qu __r = vec_perm((__v16qu)__A, (__v16qu)__B, __pcv[__imm8]); return (__m128d)__r; } #ifdef _ARCH_PWR8 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_blendv_pd(__m128d __A, __m128d __B, __m128d __mask) { #ifdef _ARCH_PWR10 return (__m128d)vec_blendv((__v2df)__A, (__v2df)__B, (__v2du)__mask); #else const __v2di __zero = {0}; const __vector __bool long long __boolmask = vec_cmplt((__v2di)__mask, __zero); return (__m128d)vec_sel((__v2du)__A, (__v2du)__B, (__v2du)__boolmask); #endif } #endif extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_testz_si128(__m128i __A, __m128i __B) { /* Note: This implementation does NOT set "zero" or "carry" flags. */ const __v16qu __zero = {0}; return vec_all_eq(vec_and((__v16qu)__A, (__v16qu)__B), __zero); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_testc_si128(__m128i __A, __m128i __B) { /* Note: This implementation does NOT set "zero" or "carry" flags. */ const __v16qu __zero = {0}; const __v16qu __notA = vec_nor((__v16qu)__A, (__v16qu)__A); return vec_all_eq(vec_and((__v16qu)__notA, (__v16qu)__B), __zero); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_testnzc_si128(__m128i __A, __m128i __B) { /* Note: This implementation does NOT set "zero" or "carry" flags. */ return _mm_testz_si128(__A, __B) == 0 && _mm_testc_si128(__A, __B) == 0; } #define _mm_test_all_zeros(M, V) _mm_testz_si128((M), (V)) #define _mm_test_all_ones(V) _mm_testc_si128((V), _mm_cmpeq_epi32((V), (V))) #define _mm_test_mix_ones_zeros(M, V) _mm_testnzc_si128((M), (V)) #ifdef _ARCH_PWR8 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpeq_epi64(__m128i __X, __m128i __Y) { return (__m128i)vec_cmpeq((__v2di)__X, (__v2di)__Y); } #endif extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_min_epi8(__m128i __X, __m128i __Y) { return (__m128i)vec_min((__v16qi)__X, (__v16qi)__Y); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_min_epu16(__m128i __X, __m128i __Y) { return (__m128i)vec_min((__v8hu)__X, (__v8hu)__Y); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_min_epi32(__m128i __X, __m128i __Y) { return (__m128i)vec_min((__v4si)__X, (__v4si)__Y); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_min_epu32(__m128i __X, __m128i __Y) { return (__m128i)vec_min((__v4su)__X, (__v4su)__Y); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_max_epi8(__m128i __X, __m128i __Y) { return (__m128i)vec_max((__v16qi)__X, (__v16qi)__Y); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_max_epu16(__m128i __X, __m128i __Y) { return (__m128i)vec_max((__v8hu)__X, (__v8hu)__Y); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_max_epi32(__m128i __X, __m128i __Y) { return (__m128i)vec_max((__v4si)__X, (__v4si)__Y); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_max_epu32(__m128i __X, __m128i __Y) { return (__m128i)vec_max((__v4su)__X, (__v4su)__Y); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_mullo_epi32(__m128i __X, __m128i __Y) { return (__m128i)vec_mul((__v4su)__X, (__v4su)__Y); } #ifdef _ARCH_PWR8 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_mul_epi32(__m128i __X, __m128i __Y) { return (__m128i)vec_mule((__v4si)__X, (__v4si)__Y); } #endif extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtepi8_epi16(__m128i __A) { return (__m128i)vec_unpackh((__v16qi)__A); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtepi8_epi32(__m128i __A) { __A = (__m128i)vec_unpackh((__v16qi)__A); return (__m128i)vec_unpackh((__v8hi)__A); } #ifdef _ARCH_PWR8 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtepi8_epi64(__m128i __A) { __A = (__m128i)vec_unpackh((__v16qi)__A); __A = (__m128i)vec_unpackh((__v8hi)__A); return (__m128i)vec_unpackh((__v4si)__A); } #endif extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtepi16_epi32(__m128i __A) { return (__m128i)vec_unpackh((__v8hi)__A); } #ifdef _ARCH_PWR8 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtepi16_epi64(__m128i __A) { __A = (__m128i)vec_unpackh((__v8hi)__A); return (__m128i)vec_unpackh((__v4si)__A); } #endif #ifdef _ARCH_PWR8 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtepi32_epi64(__m128i __A) { return (__m128i)vec_unpackh((__v4si)__A); } #endif extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtepu8_epi16(__m128i __A) { const __v16qu __zero = {0}; #ifdef __LITTLE_ENDIAN__ __A = (__m128i)vec_mergeh((__v16qu)__A, __zero); #else /* __BIG_ENDIAN__. */ __A = (__m128i)vec_mergeh(__zero, (__v16qu)__A); #endif /* __BIG_ENDIAN__. */ return __A; } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtepu8_epi32(__m128i __A) { const __v16qu __zero = {0}; #ifdef __LITTLE_ENDIAN__ __A = (__m128i)vec_mergeh((__v16qu)__A, __zero); __A = (__m128i)vec_mergeh((__v8hu)__A, (__v8hu)__zero); #else /* __BIG_ENDIAN__. */ __A = (__m128i)vec_mergeh(__zero, (__v16qu)__A); __A = (__m128i)vec_mergeh((__v8hu)__zero, (__v8hu)__A); #endif /* __BIG_ENDIAN__. */ return __A; } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtepu8_epi64(__m128i __A) { const __v16qu __zero = {0}; #ifdef __LITTLE_ENDIAN__ __A = (__m128i)vec_mergeh((__v16qu)__A, __zero); __A = (__m128i)vec_mergeh((__v8hu)__A, (__v8hu)__zero); __A = (__m128i)vec_mergeh((__v4su)__A, (__v4su)__zero); #else /* __BIG_ENDIAN__. */ __A = (__m128i)vec_mergeh(__zero, (__v16qu)__A); __A = (__m128i)vec_mergeh((__v8hu)__zero, (__v8hu)__A); __A = (__m128i)vec_mergeh((__v4su)__zero, (__v4su)__A); #endif /* __BIG_ENDIAN__. */ return __A; } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtepu16_epi32(__m128i __A) { const __v8hu __zero = {0}; #ifdef __LITTLE_ENDIAN__ __A = (__m128i)vec_mergeh((__v8hu)__A, __zero); #else /* __BIG_ENDIAN__. */ __A = (__m128i)vec_mergeh(__zero, (__v8hu)__A); #endif /* __BIG_ENDIAN__. */ return __A; } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtepu16_epi64(__m128i __A) { const __v8hu __zero = {0}; #ifdef __LITTLE_ENDIAN__ __A = (__m128i)vec_mergeh((__v8hu)__A, __zero); __A = (__m128i)vec_mergeh((__v4su)__A, (__v4su)__zero); #else /* __BIG_ENDIAN__. */ __A = (__m128i)vec_mergeh(__zero, (__v8hu)__A); __A = (__m128i)vec_mergeh((__v4su)__zero, (__v4su)__A); #endif /* __BIG_ENDIAN__. */ return __A; } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtepu32_epi64(__m128i __A) { const __v4su __zero = {0}; #ifdef __LITTLE_ENDIAN__ __A = (__m128i)vec_mergeh((__v4su)__A, __zero); #else /* __BIG_ENDIAN__. */ __A = (__m128i)vec_mergeh(__zero, (__v4su)__A); #endif /* __BIG_ENDIAN__. */ return __A; } /* Return horizontal packed word minimum and its index in bits [15:0] and bits [18:16] respectively. */ extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_minpos_epu16(__m128i __A) { union __u { __m128i __m; __v8hu __uh; }; union __u __u = {.__m = __A}, __r = {.__m = {0}}; unsigned short __ridx = 0; unsigned short __rmin = __u.__uh[__ridx]; unsigned long __i; for (__i = 1; __i < 8; __i++) { if (__u.__uh[__i] < __rmin) { __rmin = __u.__uh[__i]; __ridx = __i; } } __r.__uh[0] = __rmin; __r.__uh[1] = __ridx; return __r.__m; } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_packus_epi32(__m128i __X, __m128i __Y) { return (__m128i)vec_packsu((__v4si)__X, (__v4si)__Y); } #ifdef _ARCH_PWR8 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpgt_epi64(__m128i __X, __m128i __Y) { return (__m128i)vec_cmpgt((__v2di)__X, (__v2di)__Y); } #endif #else #include_next #endif /* defined(__powerpc64__) && \ * (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) */ #endif /* SMMINTRIN_H_ */ /*===--- x86gprintrin.h - Implementation of X86 GPR intrinsics on PowerPC --=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef X86GPRINTRIN_H_ #define X86GPRINTRIN_H_ #include #include #endif /* X86GPRINTRIN_H_ */ profile/instr_prof_interface.hxray/xray_records.h, 0=The document root must not be followed by other values.[:lower:]CoalesceWalker::ShortVisit called?+CypriotLinear_AMcMultaniNdOld_TurkicSection name '%s' is too long (%zu); section will not be found (even if present).dst != nullptr/proc/self/exeelement not in freelistabslLowLevelAlloc arithmetic overflowbad magic number in Next()a__int128%nebase != kInvalidBaseUNIMPLEMENTEDtree->IsBtree()0123456789ABCDEF%s%p %s %s/config/tzdata/external/boringssl/src/crypto/fipsmodule/bn/random.c%s failed. Expected: AES-CBC-decrypt KATCRYPTO_tls13_hkdf_expand_labelDHkythe.proto.AnalysisResult.summaryError adding compilation: Wrote FileData with mismatched digests: -resource-dirKYTHE_ROOT_DIRECTORYrequired_input differs from the current one. __stddef_offsetof.h__stddef_wchar_t.hammintrin.hamxfp16intrin.h/*===---- arm_vector_types - ARM vector type ------=== * * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #if !defined(__ARM_NEON_H) && !defined(__ARM_SVE_H) #error "This file should not be used standalone. Please include arm_neon.h or arm_sve.h instead" #endif #ifndef __ARM_NEON_TYPES_H #define __ARM_NEON_TYPES_H typedef float float32_t; typedef __fp16 float16_t; #ifdef __aarch64__ typedef double float64_t; #endif typedef __attribute__((neon_vector_type(8))) int8_t int8x8_t; typedef __attribute__((neon_vector_type(16))) int8_t int8x16_t; typedef __attribute__((neon_vector_type(4))) int16_t int16x4_t; typedef __attribute__((neon_vector_type(8))) int16_t int16x8_t; typedef __attribute__((neon_vector_type(2))) int32_t int32x2_t; typedef __attribute__((neon_vector_type(4))) int32_t int32x4_t; typedef __attribute__((neon_vector_type(1))) int64_t int64x1_t; typedef __attribute__((neon_vector_type(2))) int64_t int64x2_t; typedef __attribute__((neon_vector_type(8))) uint8_t uint8x8_t; typedef __attribute__((neon_vector_type(16))) uint8_t uint8x16_t; typedef __attribute__((neon_vector_type(4))) uint16_t uint16x4_t; typedef __attribute__((neon_vector_type(8))) uint16_t uint16x8_t; typedef __attribute__((neon_vector_type(2))) uint32_t uint32x2_t; typedef __attribute__((neon_vector_type(4))) uint32_t uint32x4_t; typedef __attribute__((neon_vector_type(1))) uint64_t uint64x1_t; typedef __attribute__((neon_vector_type(2))) uint64_t uint64x2_t; typedef __attribute__((neon_vector_type(4))) float16_t float16x4_t; typedef __attribute__((neon_vector_type(8))) float16_t float16x8_t; typedef __attribute__((neon_vector_type(2))) float32_t float32x2_t; typedef __attribute__((neon_vector_type(4))) float32_t float32x4_t; #ifdef __aarch64__ typedef __attribute__((neon_vector_type(1))) float64_t float64x1_t; typedef __attribute__((neon_vector_type(2))) float64_t float64x2_t; #endif typedef struct int8x8x2_t { int8x8_t val[2]; } int8x8x2_t; typedef struct int8x16x2_t { int8x16_t val[2]; } int8x16x2_t; typedef struct int16x4x2_t { int16x4_t val[2]; } int16x4x2_t; typedef struct int16x8x2_t { int16x8_t val[2]; } int16x8x2_t; typedef struct int32x2x2_t { int32x2_t val[2]; } int32x2x2_t; typedef struct int32x4x2_t { int32x4_t val[2]; } int32x4x2_t; typedef struct int64x1x2_t { int64x1_t val[2]; } int64x1x2_t; typedef struct int64x2x2_t { int64x2_t val[2]; } int64x2x2_t; typedef struct uint8x8x2_t { uint8x8_t val[2]; } uint8x8x2_t; typedef struct uint8x16x2_t { uint8x16_t val[2]; } uint8x16x2_t; typedef struct uint16x4x2_t { uint16x4_t val[2]; } uint16x4x2_t; typedef struct uint16x8x2_t { uint16x8_t val[2]; } uint16x8x2_t; typedef struct uint32x2x2_t { uint32x2_t val[2]; } uint32x2x2_t; typedef struct uint32x4x2_t { uint32x4_t val[2]; } uint32x4x2_t; typedef struct uint64x1x2_t { uint64x1_t val[2]; } uint64x1x2_t; typedef struct uint64x2x2_t { uint64x2_t val[2]; } uint64x2x2_t; typedef struct float16x4x2_t { float16x4_t val[2]; } float16x4x2_t; typedef struct float16x8x2_t { float16x8_t val[2]; } float16x8x2_t; typedef struct float32x2x2_t { float32x2_t val[2]; } float32x2x2_t; typedef struct float32x4x2_t { float32x4_t val[2]; } float32x4x2_t; #ifdef __aarch64__ typedef struct float64x1x2_t { float64x1_t val[2]; } float64x1x2_t; typedef struct float64x2x2_t { float64x2_t val[2]; } float64x2x2_t; #endif typedef struct int8x8x3_t { int8x8_t val[3]; } int8x8x3_t; typedef struct int8x16x3_t { int8x16_t val[3]; } int8x16x3_t; typedef struct int16x4x3_t { int16x4_t val[3]; } int16x4x3_t; typedef struct int16x8x3_t { int16x8_t val[3]; } int16x8x3_t; typedef struct int32x2x3_t { int32x2_t val[3]; } int32x2x3_t; typedef struct int32x4x3_t { int32x4_t val[3]; } int32x4x3_t; typedef struct int64x1x3_t { int64x1_t val[3]; } int64x1x3_t; typedef struct int64x2x3_t { int64x2_t val[3]; } int64x2x3_t; typedef struct uint8x8x3_t { uint8x8_t val[3]; } uint8x8x3_t; typedef struct uint8x16x3_t { uint8x16_t val[3]; } uint8x16x3_t; typedef struct uint16x4x3_t { uint16x4_t val[3]; } uint16x4x3_t; typedef struct uint16x8x3_t { uint16x8_t val[3]; } uint16x8x3_t; typedef struct uint32x2x3_t { uint32x2_t val[3]; } uint32x2x3_t; typedef struct uint32x4x3_t { uint32x4_t val[3]; } uint32x4x3_t; typedef struct uint64x1x3_t { uint64x1_t val[3]; } uint64x1x3_t; typedef struct uint64x2x3_t { uint64x2_t val[3]; } uint64x2x3_t; typedef struct float16x4x3_t { float16x4_t val[3]; } float16x4x3_t; typedef struct float16x8x3_t { float16x8_t val[3]; } float16x8x3_t; typedef struct float32x2x3_t { float32x2_t val[3]; } float32x2x3_t; typedef struct float32x4x3_t { float32x4_t val[3]; } float32x4x3_t; #ifdef __aarch64__ typedef struct float64x1x3_t { float64x1_t val[3]; } float64x1x3_t; typedef struct float64x2x3_t { float64x2_t val[3]; } float64x2x3_t; #endif typedef struct int8x8x4_t { int8x8_t val[4]; } int8x8x4_t; typedef struct int8x16x4_t { int8x16_t val[4]; } int8x16x4_t; typedef struct int16x4x4_t { int16x4_t val[4]; } int16x4x4_t; typedef struct int16x8x4_t { int16x8_t val[4]; } int16x8x4_t; typedef struct int32x2x4_t { int32x2_t val[4]; } int32x2x4_t; typedef struct int32x4x4_t { int32x4_t val[4]; } int32x4x4_t; typedef struct int64x1x4_t { int64x1_t val[4]; } int64x1x4_t; typedef struct int64x2x4_t { int64x2_t val[4]; } int64x2x4_t; typedef struct uint8x8x4_t { uint8x8_t val[4]; } uint8x8x4_t; typedef struct uint8x16x4_t { uint8x16_t val[4]; } uint8x16x4_t; typedef struct uint16x4x4_t { uint16x4_t val[4]; } uint16x4x4_t; typedef struct uint16x8x4_t { uint16x8_t val[4]; } uint16x8x4_t; typedef struct uint32x2x4_t { uint32x2_t val[4]; } uint32x2x4_t; typedef struct uint32x4x4_t { uint32x4_t val[4]; } uint32x4x4_t; typedef struct uint64x1x4_t { uint64x1_t val[4]; } uint64x1x4_t; typedef struct uint64x2x4_t { uint64x2_t val[4]; } uint64x2x4_t; typedef struct float16x4x4_t { float16x4_t val[4]; } float16x4x4_t; typedef struct float16x8x4_t { float16x8_t val[4]; } float16x8x4_t; typedef struct float32x2x4_t { float32x2_t val[4]; } float32x2x4_t; typedef struct float32x4x4_t { float32x4_t val[4]; } float32x4x4_t; #ifdef __aarch64__ typedef struct float64x1x4_t { float64x1_t val[4]; } float64x1x4_t; typedef struct float64x2x4_t { float64x2_t val[4]; } float64x2x4_t; #endif typedef __attribute__((neon_vector_type(4))) bfloat16_t bfloat16x4_t; typedef __attribute__((neon_vector_type(8))) bfloat16_t bfloat16x8_t; typedef struct bfloat16x4x2_t { bfloat16x4_t val[2]; } bfloat16x4x2_t; typedef struct bfloat16x8x2_t { bfloat16x8_t val[2]; } bfloat16x8x2_t; typedef struct bfloat16x4x3_t { bfloat16x4_t val[3]; } bfloat16x4x3_t; typedef struct bfloat16x8x3_t { bfloat16x8_t val[3]; } bfloat16x8x3_t; typedef struct bfloat16x4x4_t { bfloat16x4_t val[4]; } bfloat16x4x4_t; typedef struct bfloat16x8x4_t { bfloat16x8_t val[4]; } bfloat16x8x4_t; #endif // __ARM_NEON_TYPES_H /*===------- avx512vpintersectintrin.h - VP2INTERSECT intrinsics ------------=== * * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef _AVX512VP2INTERSECT_H #define _AVX512VP2INTERSECT_H #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512vp2intersect,evex512"), \ __min_vector_width__(512))) /// Store, in an even/odd pair of mask registers, the indicators of the /// locations of value matches between dwords in operands __a and __b. /// /// \headerfile /// /// This intrinsic corresponds to the VP2INTERSECTD instruction. /// /// \param __a /// A 512-bit vector of [16 x i32]. /// \param __b /// A 512-bit vector of [16 x i32] /// \param __m0 /// A pointer point to 16-bit mask /// \param __m1 /// A pointer point to 16-bit mask static __inline__ void __DEFAULT_FN_ATTRS _mm512_2intersect_epi32(__m512i __a, __m512i __b, __mmask16 *__m0, __mmask16 *__m1) { __builtin_ia32_vp2intersect_d_512((__v16si)__a, (__v16si)__b, __m0, __m1); } /// Store, in an even/odd pair of mask registers, the indicators of the /// locations of value matches between quadwords in operands __a and __b. /// /// \headerfile /// /// This intrinsic corresponds to the VP2INTERSECTQ instruction. /// /// \param __a /// A 512-bit vector of [8 x i64]. /// \param __b /// A 512-bit vector of [8 x i64] /// \param __m0 /// A pointer point to 8-bit mask /// \param __m1 /// A pointer point to 8-bit mask static __inline__ void __DEFAULT_FN_ATTRS _mm512_2intersect_epi64(__m512i __a, __m512i __b, __mmask8 *__m0, __mmask8 *__m1) { __builtin_ia32_vp2intersect_q_512((__v8di)__a, (__v8di)__b, __m0, __m1); } #undef __DEFAULT_FN_ATTRS #endif avxneconvertintrin.hfxsrintrin.h//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef _HEXAGON_CIRC_BREV_INTRINSICS_H_ #define _HEXAGON_CIRC_BREV_INTRINSICS_H_ 1 #include #include /* Circular Load */ /* ========================================================================== Assembly Syntax: Return=instruction() C Intrinsic Prototype: void Q6_circ_load_update_D(Word64 dst, Word64 *ptr, UWord32 incr, UWord32 bufsize, UWord32 K) Instruction Type: InstructionType Execution Slots: SLOT0123 ========================================================================== */ #define Q6_circ_load_update_D(dest,ptr,incr,bufsize,K) \ { ptr = (int64_t *) HEXAGON_circ_ldd (ptr, &(dest), ((((K)+1)<<24)|((bufsize)<<3)), ((incr)*8)); } /* ========================================================================== Assembly Syntax: Return=instruction() C Intrinsic Prototype: void Q6_circ_load_update_W(Word32 dst, Word32 *ptr, UWord32 incr, UWord32 bufsize, UWord32 K) Instruction Type: InstructionType Execution Slots: SLOT0123 ========================================================================== */ #define Q6_circ_load_update_W(dest,ptr,incr,bufsize,K) \ { ptr = (int *) HEXAGON_circ_ldw (ptr, &(dest), (((K)<<24)|((bufsize)<<2)), ((incr)*4)); } /* ========================================================================== Assembly Syntax: Return=instruction() C Intrinsic Prototype: void Q6_circ_load_update_H(Word16 dst, Word16 *ptr, UWord32 incr, UWord32 bufsize, UWord32 K) Instruction Type: InstructionType Execution Slots: SLOT0123 ========================================================================== */ #define Q6_circ_load_update_H(dest,ptr,incr,bufsize,K) \ { ptr = (int16_t *) HEXAGON_circ_ldh (ptr, &(dest), ((((K)-1)<<24)|((bufsize)<<1)), ((incr)*2)); } /* ========================================================================== Assembly Syntax: Return=instruction() C Intrinsic Prototype: void Q6_circ_load_update_UH( UWord16 dst, UWord16 *ptr, UWord32 incr, UWord32 bufsize, UWord32 K) Instruction Type: InstructionType Execution Slots: SLOT0123 ========================================================================== */ #define Q6_circ_load_update_UH(dest,ptr,incr,bufsize,K) \ { ptr = (uint16_t *) HEXAGON_circ_lduh (ptr, &(dest), ((((K)-1)<<24)|((bufsize)<<1)), ((incr)*2)); } /* ========================================================================== Assembly Syntax: Return=instruction() C Intrinsic Prototype: void Q6_circ_load_update_B(Word8 dst, Word8 *ptr, UWord32 incr, UWord32 bufsize, UWord32 K) Instruction Type: InstructionType Execution Slots: SLOT0123 ========================================================================== */ #define Q6_circ_load_update_B(dest,ptr,incr,bufsize,K) \ { ptr = (int8_t *) HEXAGON_circ_ldb (ptr, &(dest), ((((K)-2)<<24)|(bufsize)), incr); } /* ========================================================================== Assembly Syntax: Return=instruction() C Intrinsic Prototype: void Q6_circ_load_update_UB(UWord8 dst, UWord8 *ptr, UWord32 incr, UWord32 bufsize, UWord32 K) Instruction Type: InstructionType Execution Slots: SLOT0123 ========================================================================== */ #define Q6_circ_load_update_UB(dest,ptr,incr,bufsize,K) \ { ptr = (uint8_t *) HEXAGON_circ_ldub (ptr, &(dest), ((((K)-2)<<24)|(bufsize)), incr); } /* Circular Store */ /* ========================================================================== Assembly Syntax: Return=instruction() C Intrinsic Prototype: void Q6_circ_store_update_D(Word64 *src, Word64 *ptr, UWord32 incr, UWord32 bufsize, UWord32 K) Instruction Type: InstructionType Execution Slots: SLOT0123 ========================================================================== */ #define Q6_circ_store_update_D(src,ptr,incr,bufsize,K) \ { ptr = (int64_t *) HEXAGON_circ_std (ptr, src, ((((K)+1)<<24)|((bufsize)<<3)), ((incr)*8)); } /* ========================================================================== Assembly Syntax: Return=instruction() C Intrinsic Prototype: void Q6_circ_store_update_W(Word32 *src, Word32 *ptr, UWord32 incr, UWord32 bufsize, UWord32 K) Instruction Type: InstructionType Execution Slots: SLOT0123 ========================================================================== */ #define Q6_circ_store_update_W(src,ptr,incr,bufsize,K) \ { ptr = (int *) HEXAGON_circ_stw (ptr, src, (((K)<<24)|((bufsize)<<2)), ((incr)*4)); } /* ========================================================================== Assembly Syntax: Return=instruction() C Intrinsic Prototype: void Q6_circ_store_update_HL(Word16 *src, Word16 *ptr, UWord32 incr, UWord32 bufsize, UWord32 K) Instruction Type: InstructionType Execution Slots: SLOT0123 ========================================================================== */ #define Q6_circ_store_update_HL(src,ptr,incr,bufsize,K) \ { ptr = (int16_t *) HEXAGON_circ_sth (ptr, src, ((((K)-1)<<24)|((bufsize)<<1)), ((incr)*2)); } /* ========================================================================== Assembly Syntax: Return=instruction() C Intrinsic Prototype: void Q6_circ_store_update_HH(Word16 *src, Word16 *ptr, UWord32 incr, UWord32 bufsize, UWord32 K) Instruction Type: InstructionType Execution Slots: SLOT0123 ========================================================================== */ #define Q6_circ_store_update_HH(src,ptr,incr,bufsize,K) \ { ptr = (int16_t *) HEXAGON_circ_sthhi (ptr, src, ((((K)-1)<<24)|((bufsize)<<1)), ((incr)*2)); } /* ========================================================================== Assembly Syntax: Return=instruction() C Intrinsic Prototype: void Q6_circ_store_update_B(Word8 *src, Word8 *ptr, UWord32 I4, UWord32 bufsize, UWord64 K) Instruction Type: InstructionType Execution Slots: SLOT0123 ========================================================================== */ #define Q6_circ_store_update_B(src,ptr,incr,bufsize,K) \ { ptr = (int8_t *) HEXAGON_circ_stb (ptr, src, ((((K)-2)<<24)|(bufsize)), incr); } /* Bit Reverse Load */ /* ========================================================================== Assembly Syntax: Return=instruction() C Intrinsic Prototype: void Q6_bitrev_load_update_D(Word64 dst, Word64 *ptr, UWord32 Iu4) Instruction Type: InstructionType Execution Slots: SLOT0123 ========================================================================== */ #define Q6_bitrev_load_update_D(dest,ptr,log2bufsize) \ { ptr = (int64_t *) HEXAGON_brev_ldd (ptr, &(dest), (1<<(16-((log2bufsize) + 3)))); } /* ========================================================================== Assembly Syntax: Return=instruction() C Intrinsic Prototype: void Q6_bitrev_load_update_W(Word32 dst, Word32 *ptr, UWord32 Iu4) Instruction Type: InstructionType Execution Slots: SLOT0123 ========================================================================== */ #define Q6_bitrev_load_update_W(dest,ptr,log2bufsize) \ { ptr = (int *) HEXAGON_brev_ldw (ptr, &(dest), (1<<(16-((log2bufsize) + 2)))); } /* ========================================================================== Assembly Syntax: Return=instruction() C Intrinsic Prototype: void Q6_bitrev_load_update_H(Word16 dst, Word16 *ptr, UWord32 Iu4) Instruction Type: InstructionType Execution Slots: SLOT0123 ========================================================================== */ #define Q6_bitrev_load_update_H(dest,ptr,log2bufsize) \ { ptr = (int16_t *) HEXAGON_brev_ldh (ptr, &(dest), (1<<(16-((log2bufsize) + 1)))); } /* ========================================================================== Assembly Syntax: Return=instruction() C Intrinsic Prototype: void Q6_bitrev_load_update_UH(UWord16 dst, UWord16 *ptr, UWord32 Iu4) Instruction Type: InstructionType Execution Slots: SLOT0123 ========================================================================== */ #define Q6_bitrev_load_update_UH(dest,ptr,log2bufsize) \ { ptr = (uint16_t *) HEXAGON_brev_lduh (ptr, &(dest), (1<<(16-((log2bufsize) + 1)))); } /* ========================================================================== Assembly Syntax: Return=instruction() C Intrinsic Prototype: void Q6_bitrev_load_update_B(Word8 dst, Word8 *ptr, UWord32 Iu4) Instruction Type: InstructionType Execution Slots: SLOT0123 ========================================================================== */ #define Q6_bitrev_load_update_B(dest,ptr,log2bufsize) \ { ptr = (int8_t *) HEXAGON_brev_ldb (ptr, &(dest), (1<<(16-((log2bufsize))))); } /* ========================================================================== Assembly Syntax: Return=instruction() C Intrinsic Prototype: void Q6_bitrev_load_update_UB(UWord8 dst, UWord8 *ptr, UWord32 Iu4) Instruction Type: InstructionType Execution Slots: SLOT0123 ========================================================================== */ #define Q6_bitrev_load_update_UB(dest,ptr,log2bufsize) \ { ptr = (uint8_t *) HEXAGON_brev_ldub (ptr, &(dest), (1<<(16-((log2bufsize))))); } /* Bit Reverse Store */ /* ========================================================================== Assembly Syntax: Return=instruction() C Intrinsic Prototype: void Q6_bitrev_store_update_D(Word64 *src, Word64 *ptr, UWord32 Iu4) Instruction Type: InstructionType Execution Slots: SLOT0123 ========================================================================== */ #define Q6_bitrev_store_update_D(src,ptr,log2bufsize) \ { ptr = (int64_t *) HEXAGON_brev_std (ptr, src, (1<<(16-((log2bufsize) + 3)))); } /* ========================================================================== Assembly Syntax: Return=instruction() C Intrinsic Prototype: void Q6_bitrev_store_update_W(Word32 *src, Word32 *ptr, UWord32 Iu4) Instruction Type: InstructionType Execution Slots: SLOT0123 ========================================================================== */ #define Q6_bitrev_store_update_W(src,ptr,log2bufsize) \ { ptr = (int *) HEXAGON_brev_stw (ptr, src, (1<<(16-((log2bufsize) + 2)))); } /* ========================================================================== Assembly Syntax: Return=instruction() C Intrinsic Prototype: void Q6_bitrev_store_update_HL(Word16 *src, Word16 *ptr, Word32 Iu4) Instruction Type: InstructionType Execution Slots: SLOT0123 ========================================================================== */ #define Q6_bitrev_store_update_HL(src,ptr,log2bufsize) \ { ptr = (int16_t *) HEXAGON_brev_sth (ptr, src, (1<<(16-((log2bufsize) + 1)))); } /* ========================================================================== Assembly Syntax: Return=instruction() C Intrinsic Prototype: void Q6_bitrev_store_update_HH(Word16 *src, Word16 *ptr, UWord32 Iu4) Instruction Type: InstructionType Execution Slots: SLOT0123 ========================================================================== */ #define Q6_bitrev_store_update_HH(src,ptr,log2bufsize) \ { ptr = (int16_t *) HEXAGON_brev_sthhi (ptr, src, (1<<(16-((log2bufsize) + 1)))); } /* ========================================================================== Assembly Syntax: Return=instruction() C Intrinsic Prototype: void Q6_bitrev_store_update_B(Word8 *src, Word8 *ptr, UWord32 Iu4) Instruction Type: InstructionType Execution Slots: SLOT0123 ========================================================================== */ #define Q6_bitrev_store_update_B(src,ptr,log2bufsize) \ { ptr = (int8_t *) HEXAGON_brev_stb (ptr, src, (1<<(16-((log2bufsize))))); } #define HEXAGON_circ_ldd __builtin_circ_ldd #define HEXAGON_circ_ldw __builtin_circ_ldw #define HEXAGON_circ_ldh __builtin_circ_ldh #define HEXAGON_circ_lduh __builtin_circ_lduh #define HEXAGON_circ_ldb __builtin_circ_ldb #define HEXAGON_circ_ldub __builtin_circ_ldub #define HEXAGON_circ_std __builtin_circ_std #define HEXAGON_circ_stw __builtin_circ_stw #define HEXAGON_circ_sth __builtin_circ_sth #define HEXAGON_circ_sthhi __builtin_circ_sthhi #define HEXAGON_circ_stb __builtin_circ_stb #define HEXAGON_brev_ldd __builtin_brev_ldd #define HEXAGON_brev_ldw __builtin_brev_ldw #define HEXAGON_brev_ldh __builtin_brev_ldh #define HEXAGON_brev_lduh __builtin_brev_lduh #define HEXAGON_brev_ldb __builtin_brev_ldb #define HEXAGON_brev_ldub __builtin_brev_ldub #define HEXAGON_brev_std __builtin_brev_std #define HEXAGON_brev_stw __builtin_brev_stw #define HEXAGON_brev_sth __builtin_brev_sth #define HEXAGON_brev_sthhi __builtin_brev_sthhi #define HEXAGON_brev_stb __builtin_brev_stb #ifdef __HVX__ /* ========================================================================== Assembly Syntax: if (Qt) vmem(Rt+#0) = Vs C Intrinsic Prototype: void Q6_vmaskedstoreq_QAV(HVX_VectorPred Qt, HVX_VectorAddress A, HVX_Vector Vs) Instruction Type: COPROC_VMEM Execution Slots: SLOT0 ========================================================================== */ #define Q6_vmaskedstoreq_QAV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmaskedstoreq) /* ========================================================================== Assembly Syntax: if (!Qt) vmem(Rt+#0) = Vs C Intrinsic Prototype: void Q6_vmaskedstorenq_QAV(HVX_VectorPred Qt, HVX_VectorAddress A, HVX_Vector Vs) Instruction Type: COPROC_VMEM Execution Slots: SLOT0 ========================================================================== */ #define Q6_vmaskedstorenq_QAV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmaskedstorenq) /* ========================================================================== Assembly Syntax: if (Qt) vmem(Rt+#0):nt = Vs C Intrinsic Prototype: void Q6_vmaskedstorentq_QAV(HVX_VectorPred Qt, HVX_VectorAddress A, HVX_Vector Vs) Instruction Type: COPROC_VMEM Execution Slots: SLOT0 ========================================================================== */ #define Q6_vmaskedstorentq_QAV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmaskedstorentq) /* ========================================================================== Assembly Syntax: if (!Qt) vmem(Rt+#0):nt = Vs C Intrinsic Prototype: void Q6_vmaskedstorentnq_QAV(HVX_VectorPred Qt, HVX_VectorAddress A, HVX_Vector Vs) Instruction Type: COPROC_VMEM Execution Slots: SLOT0 ========================================================================== */ #define Q6_vmaskedstorentnq_QAV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmaskedstorentnq) #endif #endif /* #ifndef _HEXAGON_CIRC_BREV_INTRINSICS_H_ */ #ifdef __NOT_DEFINED__ /*** comment block template ***/ /* ========================================================================== Assembly Syntax: Return=instruction() C Intrinsic Prototype: ReturnType Intrinsic(ParamType Rs, ParamType Rt) Instruction Type: InstructionType Execution Slots: SLOT0123 ========================================================================== */ #endif /*** __NOT_DEFINED__ ***/ //===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // Automatically generated file, do not edit! //===----------------------------------------------------------------------===// #ifndef __HEXAGON_PROTOS_H_ #define __HEXAGON_PROTOS_H_ 1 /* ========================================================================== Assembly Syntax: Rd32=abs(Rs32) C Intrinsic Prototype: Word32 Q6_R_abs_R(Word32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_abs_R __builtin_HEXAGON_A2_abs /* ========================================================================== Assembly Syntax: Rdd32=abs(Rss32) C Intrinsic Prototype: Word64 Q6_P_abs_P(Word64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_abs_P __builtin_HEXAGON_A2_absp /* ========================================================================== Assembly Syntax: Rd32=abs(Rs32):sat C Intrinsic Prototype: Word32 Q6_R_abs_R_sat(Word32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_abs_R_sat __builtin_HEXAGON_A2_abssat /* ========================================================================== Assembly Syntax: Rd32=add(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_add_RR(Word32 Rs, Word32 Rt) Instruction Type: ALU32_3op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_add_RR __builtin_HEXAGON_A2_add /* ========================================================================== Assembly Syntax: Rd32=add(Rt32.h,Rs32.h):<<16 C Intrinsic Prototype: Word32 Q6_R_add_RhRh_s16(Word32 Rt, Word32 Rs) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_add_RhRh_s16 __builtin_HEXAGON_A2_addh_h16_hh /* ========================================================================== Assembly Syntax: Rd32=add(Rt32.h,Rs32.l):<<16 C Intrinsic Prototype: Word32 Q6_R_add_RhRl_s16(Word32 Rt, Word32 Rs) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_add_RhRl_s16 __builtin_HEXAGON_A2_addh_h16_hl /* ========================================================================== Assembly Syntax: Rd32=add(Rt32.l,Rs32.h):<<16 C Intrinsic Prototype: Word32 Q6_R_add_RlRh_s16(Word32 Rt, Word32 Rs) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_add_RlRh_s16 __builtin_HEXAGON_A2_addh_h16_lh /* ========================================================================== Assembly Syntax: Rd32=add(Rt32.l,Rs32.l):<<16 C Intrinsic Prototype: Word32 Q6_R_add_RlRl_s16(Word32 Rt, Word32 Rs) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_add_RlRl_s16 __builtin_HEXAGON_A2_addh_h16_ll /* ========================================================================== Assembly Syntax: Rd32=add(Rt32.h,Rs32.h):sat:<<16 C Intrinsic Prototype: Word32 Q6_R_add_RhRh_sat_s16(Word32 Rt, Word32 Rs) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_add_RhRh_sat_s16 __builtin_HEXAGON_A2_addh_h16_sat_hh /* ========================================================================== Assembly Syntax: Rd32=add(Rt32.h,Rs32.l):sat:<<16 C Intrinsic Prototype: Word32 Q6_R_add_RhRl_sat_s16(Word32 Rt, Word32 Rs) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_add_RhRl_sat_s16 __builtin_HEXAGON_A2_addh_h16_sat_hl /* ========================================================================== Assembly Syntax: Rd32=add(Rt32.l,Rs32.h):sat:<<16 C Intrinsic Prototype: Word32 Q6_R_add_RlRh_sat_s16(Word32 Rt, Word32 Rs) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_add_RlRh_sat_s16 __builtin_HEXAGON_A2_addh_h16_sat_lh /* ========================================================================== Assembly Syntax: Rd32=add(Rt32.l,Rs32.l):sat:<<16 C Intrinsic Prototype: Word32 Q6_R_add_RlRl_sat_s16(Word32 Rt, Word32 Rs) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_add_RlRl_sat_s16 __builtin_HEXAGON_A2_addh_h16_sat_ll /* ========================================================================== Assembly Syntax: Rd32=add(Rt32.l,Rs32.h) C Intrinsic Prototype: Word32 Q6_R_add_RlRh(Word32 Rt, Word32 Rs) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_add_RlRh __builtin_HEXAGON_A2_addh_l16_hl /* ========================================================================== Assembly Syntax: Rd32=add(Rt32.l,Rs32.l) C Intrinsic Prototype: Word32 Q6_R_add_RlRl(Word32 Rt, Word32 Rs) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_add_RlRl __builtin_HEXAGON_A2_addh_l16_ll /* ========================================================================== Assembly Syntax: Rd32=add(Rt32.l,Rs32.h):sat C Intrinsic Prototype: Word32 Q6_R_add_RlRh_sat(Word32 Rt, Word32 Rs) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_add_RlRh_sat __builtin_HEXAGON_A2_addh_l16_sat_hl /* ========================================================================== Assembly Syntax: Rd32=add(Rt32.l,Rs32.l):sat C Intrinsic Prototype: Word32 Q6_R_add_RlRl_sat(Word32 Rt, Word32 Rs) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_add_RlRl_sat __builtin_HEXAGON_A2_addh_l16_sat_ll /* ========================================================================== Assembly Syntax: Rd32=add(Rs32,#s16) C Intrinsic Prototype: Word32 Q6_R_add_RI(Word32 Rs, Word32 Is16) Instruction Type: ALU32_ADDI Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_add_RI __builtin_HEXAGON_A2_addi /* ========================================================================== Assembly Syntax: Rdd32=add(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_add_PP(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_add_PP __builtin_HEXAGON_A2_addp /* ========================================================================== Assembly Syntax: Rdd32=add(Rss32,Rtt32):sat C Intrinsic Prototype: Word64 Q6_P_add_PP_sat(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_add_PP_sat __builtin_HEXAGON_A2_addpsat /* ========================================================================== Assembly Syntax: Rd32=add(Rs32,Rt32):sat C Intrinsic Prototype: Word32 Q6_R_add_RR_sat(Word32 Rs, Word32 Rt) Instruction Type: ALU32_3op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_add_RR_sat __builtin_HEXAGON_A2_addsat /* ========================================================================== Assembly Syntax: Rdd32=add(Rs32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_add_RP(Word32 Rs, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT0123 ========================================================================== */ #define Q6_P_add_RP __builtin_HEXAGON_A2_addsp /* ========================================================================== Assembly Syntax: Rd32=and(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_and_RR(Word32 Rs, Word32 Rt) Instruction Type: ALU32_3op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_and_RR __builtin_HEXAGON_A2_and /* ========================================================================== Assembly Syntax: Rd32=and(Rs32,#s10) C Intrinsic Prototype: Word32 Q6_R_and_RI(Word32 Rs, Word32 Is10) Instruction Type: ALU32_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_and_RI __builtin_HEXAGON_A2_andir /* ========================================================================== Assembly Syntax: Rdd32=and(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_and_PP(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_and_PP __builtin_HEXAGON_A2_andp /* ========================================================================== Assembly Syntax: Rd32=aslh(Rs32) C Intrinsic Prototype: Word32 Q6_R_aslh_R(Word32 Rs) Instruction Type: ALU32_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_aslh_R __builtin_HEXAGON_A2_aslh /* ========================================================================== Assembly Syntax: Rd32=asrh(Rs32) C Intrinsic Prototype: Word32 Q6_R_asrh_R(Word32 Rs) Instruction Type: ALU32_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_asrh_R __builtin_HEXAGON_A2_asrh /* ========================================================================== Assembly Syntax: Rd32=combine(Rt32.h,Rs32.h) C Intrinsic Prototype: Word32 Q6_R_combine_RhRh(Word32 Rt, Word32 Rs) Instruction Type: ALU32_3op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_combine_RhRh __builtin_HEXAGON_A2_combine_hh /* ========================================================================== Assembly Syntax: Rd32=combine(Rt32.h,Rs32.l) C Intrinsic Prototype: Word32 Q6_R_combine_RhRl(Word32 Rt, Word32 Rs) Instruction Type: ALU32_3op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_combine_RhRl __builtin_HEXAGON_A2_combine_hl /* ========================================================================== Assembly Syntax: Rd32=combine(Rt32.l,Rs32.h) C Intrinsic Prototype: Word32 Q6_R_combine_RlRh(Word32 Rt, Word32 Rs) Instruction Type: ALU32_3op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_combine_RlRh __builtin_HEXAGON_A2_combine_lh /* ========================================================================== Assembly Syntax: Rd32=combine(Rt32.l,Rs32.l) C Intrinsic Prototype: Word32 Q6_R_combine_RlRl(Word32 Rt, Word32 Rs) Instruction Type: ALU32_3op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_combine_RlRl __builtin_HEXAGON_A2_combine_ll /* ========================================================================== Assembly Syntax: Rdd32=combine(#s8,#S8) C Intrinsic Prototype: Word64 Q6_P_combine_II(Word32 Is8, Word32 IS8) Instruction Type: ALU32_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_P_combine_II __builtin_HEXAGON_A2_combineii /* ========================================================================== Assembly Syntax: Rdd32=combine(Rs32,Rt32) C Intrinsic Prototype: Word64 Q6_P_combine_RR(Word32 Rs, Word32 Rt) Instruction Type: ALU32_3op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_P_combine_RR __builtin_HEXAGON_A2_combinew /* ========================================================================== Assembly Syntax: Rd32=max(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_max_RR(Word32 Rs, Word32 Rt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_max_RR __builtin_HEXAGON_A2_max /* ========================================================================== Assembly Syntax: Rdd32=max(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_max_PP(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_max_PP __builtin_HEXAGON_A2_maxp /* ========================================================================== Assembly Syntax: Rd32=maxu(Rs32,Rt32) C Intrinsic Prototype: UWord32 Q6_R_maxu_RR(Word32 Rs, Word32 Rt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_maxu_RR __builtin_HEXAGON_A2_maxu /* ========================================================================== Assembly Syntax: Rdd32=maxu(Rss32,Rtt32) C Intrinsic Prototype: UWord64 Q6_P_maxu_PP(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_maxu_PP __builtin_HEXAGON_A2_maxup /* ========================================================================== Assembly Syntax: Rd32=min(Rt32,Rs32) C Intrinsic Prototype: Word32 Q6_R_min_RR(Word32 Rt, Word32 Rs) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_min_RR __builtin_HEXAGON_A2_min /* ========================================================================== Assembly Syntax: Rdd32=min(Rtt32,Rss32) C Intrinsic Prototype: Word64 Q6_P_min_PP(Word64 Rtt, Word64 Rss) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_min_PP __builtin_HEXAGON_A2_minp /* ========================================================================== Assembly Syntax: Rd32=minu(Rt32,Rs32) C Intrinsic Prototype: UWord32 Q6_R_minu_RR(Word32 Rt, Word32 Rs) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_minu_RR __builtin_HEXAGON_A2_minu /* ========================================================================== Assembly Syntax: Rdd32=minu(Rtt32,Rss32) C Intrinsic Prototype: UWord64 Q6_P_minu_PP(Word64 Rtt, Word64 Rss) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_minu_PP __builtin_HEXAGON_A2_minup /* ========================================================================== Assembly Syntax: Rd32=neg(Rs32) C Intrinsic Prototype: Word32 Q6_R_neg_R(Word32 Rs) Instruction Type: ALU32_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_neg_R __builtin_HEXAGON_A2_neg /* ========================================================================== Assembly Syntax: Rdd32=neg(Rss32) C Intrinsic Prototype: Word64 Q6_P_neg_P(Word64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_neg_P __builtin_HEXAGON_A2_negp /* ========================================================================== Assembly Syntax: Rd32=neg(Rs32):sat C Intrinsic Prototype: Word32 Q6_R_neg_R_sat(Word32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_neg_R_sat __builtin_HEXAGON_A2_negsat /* ========================================================================== Assembly Syntax: Rd32=not(Rs32) C Intrinsic Prototype: Word32 Q6_R_not_R(Word32 Rs) Instruction Type: ALU32_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_not_R __builtin_HEXAGON_A2_not /* ========================================================================== Assembly Syntax: Rdd32=not(Rss32) C Intrinsic Prototype: Word64 Q6_P_not_P(Word64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_not_P __builtin_HEXAGON_A2_notp /* ========================================================================== Assembly Syntax: Rd32=or(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_or_RR(Word32 Rs, Word32 Rt) Instruction Type: ALU32_3op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_or_RR __builtin_HEXAGON_A2_or /* ========================================================================== Assembly Syntax: Rd32=or(Rs32,#s10) C Intrinsic Prototype: Word32 Q6_R_or_RI(Word32 Rs, Word32 Is10) Instruction Type: ALU32_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_or_RI __builtin_HEXAGON_A2_orir /* ========================================================================== Assembly Syntax: Rdd32=or(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_or_PP(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_or_PP __builtin_HEXAGON_A2_orp /* ========================================================================== Assembly Syntax: Rd32=round(Rss32):sat C Intrinsic Prototype: Word32 Q6_R_round_P_sat(Word64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_round_P_sat __builtin_HEXAGON_A2_roundsat /* ========================================================================== Assembly Syntax: Rd32=sat(Rss32) C Intrinsic Prototype: Word32 Q6_R_sat_P(Word64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_sat_P __builtin_HEXAGON_A2_sat /* ========================================================================== Assembly Syntax: Rd32=satb(Rs32) C Intrinsic Prototype: Word32 Q6_R_satb_R(Word32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_satb_R __builtin_HEXAGON_A2_satb /* ========================================================================== Assembly Syntax: Rd32=sath(Rs32) C Intrinsic Prototype: Word32 Q6_R_sath_R(Word32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_sath_R __builtin_HEXAGON_A2_sath /* ========================================================================== Assembly Syntax: Rd32=satub(Rs32) C Intrinsic Prototype: Word32 Q6_R_satub_R(Word32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_satub_R __builtin_HEXAGON_A2_satub /* ========================================================================== Assembly Syntax: Rd32=satuh(Rs32) C Intrinsic Prototype: Word32 Q6_R_satuh_R(Word32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_satuh_R __builtin_HEXAGON_A2_satuh /* ========================================================================== Assembly Syntax: Rd32=sub(Rt32,Rs32) C Intrinsic Prototype: Word32 Q6_R_sub_RR(Word32 Rt, Word32 Rs) Instruction Type: ALU32_3op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_sub_RR __builtin_HEXAGON_A2_sub /* ========================================================================== Assembly Syntax: Rd32=sub(Rt32.h,Rs32.h):<<16 C Intrinsic Prototype: Word32 Q6_R_sub_RhRh_s16(Word32 Rt, Word32 Rs) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_sub_RhRh_s16 __builtin_HEXAGON_A2_subh_h16_hh /* ========================================================================== Assembly Syntax: Rd32=sub(Rt32.h,Rs32.l):<<16 C Intrinsic Prototype: Word32 Q6_R_sub_RhRl_s16(Word32 Rt, Word32 Rs) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_sub_RhRl_s16 __builtin_HEXAGON_A2_subh_h16_hl /* ========================================================================== Assembly Syntax: Rd32=sub(Rt32.l,Rs32.h):<<16 C Intrinsic Prototype: Word32 Q6_R_sub_RlRh_s16(Word32 Rt, Word32 Rs) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_sub_RlRh_s16 __builtin_HEXAGON_A2_subh_h16_lh /* ========================================================================== Assembly Syntax: Rd32=sub(Rt32.l,Rs32.l):<<16 C Intrinsic Prototype: Word32 Q6_R_sub_RlRl_s16(Word32 Rt, Word32 Rs) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_sub_RlRl_s16 __builtin_HEXAGON_A2_subh_h16_ll /* ========================================================================== Assembly Syntax: Rd32=sub(Rt32.h,Rs32.h):sat:<<16 C Intrinsic Prototype: Word32 Q6_R_sub_RhRh_sat_s16(Word32 Rt, Word32 Rs) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_sub_RhRh_sat_s16 __builtin_HEXAGON_A2_subh_h16_sat_hh /* ========================================================================== Assembly Syntax: Rd32=sub(Rt32.h,Rs32.l):sat:<<16 C Intrinsic Prototype: Word32 Q6_R_sub_RhRl_sat_s16(Word32 Rt, Word32 Rs) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_sub_RhRl_sat_s16 __builtin_HEXAGON_A2_subh_h16_sat_hl /* ========================================================================== Assembly Syntax: Rd32=sub(Rt32.l,Rs32.h):sat:<<16 C Intrinsic Prototype: Word32 Q6_R_sub_RlRh_sat_s16(Word32 Rt, Word32 Rs) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_sub_RlRh_sat_s16 __builtin_HEXAGON_A2_subh_h16_sat_lh /* ========================================================================== Assembly Syntax: Rd32=sub(Rt32.l,Rs32.l):sat:<<16 C Intrinsic Prototype: Word32 Q6_R_sub_RlRl_sat_s16(Word32 Rt, Word32 Rs) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_sub_RlRl_sat_s16 __builtin_HEXAGON_A2_subh_h16_sat_ll /* ========================================================================== Assembly Syntax: Rd32=sub(Rt32.l,Rs32.h) C Intrinsic Prototype: Word32 Q6_R_sub_RlRh(Word32 Rt, Word32 Rs) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_sub_RlRh __builtin_HEXAGON_A2_subh_l16_hl /* ========================================================================== Assembly Syntax: Rd32=sub(Rt32.l,Rs32.l) C Intrinsic Prototype: Word32 Q6_R_sub_RlRl(Word32 Rt, Word32 Rs) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_sub_RlRl __builtin_HEXAGON_A2_subh_l16_ll /* ========================================================================== Assembly Syntax: Rd32=sub(Rt32.l,Rs32.h):sat C Intrinsic Prototype: Word32 Q6_R_sub_RlRh_sat(Word32 Rt, Word32 Rs) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_sub_RlRh_sat __builtin_HEXAGON_A2_subh_l16_sat_hl /* ========================================================================== Assembly Syntax: Rd32=sub(Rt32.l,Rs32.l):sat C Intrinsic Prototype: Word32 Q6_R_sub_RlRl_sat(Word32 Rt, Word32 Rs) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_sub_RlRl_sat __builtin_HEXAGON_A2_subh_l16_sat_ll /* ========================================================================== Assembly Syntax: Rdd32=sub(Rtt32,Rss32) C Intrinsic Prototype: Word64 Q6_P_sub_PP(Word64 Rtt, Word64 Rss) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_sub_PP __builtin_HEXAGON_A2_subp /* ========================================================================== Assembly Syntax: Rd32=sub(#s10,Rs32) C Intrinsic Prototype: Word32 Q6_R_sub_IR(Word32 Is10, Word32 Rs) Instruction Type: ALU32_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_sub_IR __builtin_HEXAGON_A2_subri /* ========================================================================== Assembly Syntax: Rd32=sub(Rt32,Rs32):sat C Intrinsic Prototype: Word32 Q6_R_sub_RR_sat(Word32 Rt, Word32 Rs) Instruction Type: ALU32_3op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_sub_RR_sat __builtin_HEXAGON_A2_subsat /* ========================================================================== Assembly Syntax: Rd32=vaddh(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_vaddh_RR(Word32 Rs, Word32 Rt) Instruction Type: ALU32_3op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_vaddh_RR __builtin_HEXAGON_A2_svaddh /* ========================================================================== Assembly Syntax: Rd32=vaddh(Rs32,Rt32):sat C Intrinsic Prototype: Word32 Q6_R_vaddh_RR_sat(Word32 Rs, Word32 Rt) Instruction Type: ALU32_3op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_vaddh_RR_sat __builtin_HEXAGON_A2_svaddhs /* ========================================================================== Assembly Syntax: Rd32=vadduh(Rs32,Rt32):sat C Intrinsic Prototype: Word32 Q6_R_vadduh_RR_sat(Word32 Rs, Word32 Rt) Instruction Type: ALU32_3op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_vadduh_RR_sat __builtin_HEXAGON_A2_svadduhs /* ========================================================================== Assembly Syntax: Rd32=vavgh(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_vavgh_RR(Word32 Rs, Word32 Rt) Instruction Type: ALU32_3op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_vavgh_RR __builtin_HEXAGON_A2_svavgh /* ========================================================================== Assembly Syntax: Rd32=vavgh(Rs32,Rt32):rnd C Intrinsic Prototype: Word32 Q6_R_vavgh_RR_rnd(Word32 Rs, Word32 Rt) Instruction Type: ALU32_3op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_vavgh_RR_rnd __builtin_HEXAGON_A2_svavghs /* ========================================================================== Assembly Syntax: Rd32=vnavgh(Rt32,Rs32) C Intrinsic Prototype: Word32 Q6_R_vnavgh_RR(Word32 Rt, Word32 Rs) Instruction Type: ALU32_3op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_vnavgh_RR __builtin_HEXAGON_A2_svnavgh /* ========================================================================== Assembly Syntax: Rd32=vsubh(Rt32,Rs32) C Intrinsic Prototype: Word32 Q6_R_vsubh_RR(Word32 Rt, Word32 Rs) Instruction Type: ALU32_3op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_vsubh_RR __builtin_HEXAGON_A2_svsubh /* ========================================================================== Assembly Syntax: Rd32=vsubh(Rt32,Rs32):sat C Intrinsic Prototype: Word32 Q6_R_vsubh_RR_sat(Word32 Rt, Word32 Rs) Instruction Type: ALU32_3op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_vsubh_RR_sat __builtin_HEXAGON_A2_svsubhs /* ========================================================================== Assembly Syntax: Rd32=vsubuh(Rt32,Rs32):sat C Intrinsic Prototype: Word32 Q6_R_vsubuh_RR_sat(Word32 Rt, Word32 Rs) Instruction Type: ALU32_3op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_vsubuh_RR_sat __builtin_HEXAGON_A2_svsubuhs /* ========================================================================== Assembly Syntax: Rd32=swiz(Rs32) C Intrinsic Prototype: Word32 Q6_R_swiz_R(Word32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_swiz_R __builtin_HEXAGON_A2_swiz /* ========================================================================== Assembly Syntax: Rd32=sxtb(Rs32) C Intrinsic Prototype: Word32 Q6_R_sxtb_R(Word32 Rs) Instruction Type: ALU32_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_sxtb_R __builtin_HEXAGON_A2_sxtb /* ========================================================================== Assembly Syntax: Rd32=sxth(Rs32) C Intrinsic Prototype: Word32 Q6_R_sxth_R(Word32 Rs) Instruction Type: ALU32_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_sxth_R __builtin_HEXAGON_A2_sxth /* ========================================================================== Assembly Syntax: Rdd32=sxtw(Rs32) C Intrinsic Prototype: Word64 Q6_P_sxtw_R(Word32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_sxtw_R __builtin_HEXAGON_A2_sxtw /* ========================================================================== Assembly Syntax: Rd32=Rs32 C Intrinsic Prototype: Word32 Q6_R_equals_R(Word32 Rs) Instruction Type: ALU32_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_equals_R __builtin_HEXAGON_A2_tfr /* ========================================================================== Assembly Syntax: Rx32.h=#u16 C Intrinsic Prototype: Word32 Q6_Rh_equals_I(Word32 Rx, Word32 Iu16) Instruction Type: ALU32_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Rh_equals_I __builtin_HEXAGON_A2_tfrih /* ========================================================================== Assembly Syntax: Rx32.l=#u16 C Intrinsic Prototype: Word32 Q6_Rl_equals_I(Word32 Rx, Word32 Iu16) Instruction Type: ALU32_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Rl_equals_I __builtin_HEXAGON_A2_tfril /* ========================================================================== Assembly Syntax: Rdd32=Rss32 C Intrinsic Prototype: Word64 Q6_P_equals_P(Word64 Rss) Instruction Type: ALU32_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_P_equals_P __builtin_HEXAGON_A2_tfrp /* ========================================================================== Assembly Syntax: Rdd32=#s8 C Intrinsic Prototype: Word64 Q6_P_equals_I(Word32 Is8) Instruction Type: ALU64 Execution Slots: SLOT0123 ========================================================================== */ #define Q6_P_equals_I __builtin_HEXAGON_A2_tfrpi /* ========================================================================== Assembly Syntax: Rd32=#s16 C Intrinsic Prototype: Word32 Q6_R_equals_I(Word32 Is16) Instruction Type: ALU32_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_equals_I __builtin_HEXAGON_A2_tfrsi /* ========================================================================== Assembly Syntax: Rdd32=vabsh(Rss32) C Intrinsic Prototype: Word64 Q6_P_vabsh_P(Word64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vabsh_P __builtin_HEXAGON_A2_vabsh /* ========================================================================== Assembly Syntax: Rdd32=vabsh(Rss32):sat C Intrinsic Prototype: Word64 Q6_P_vabsh_P_sat(Word64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vabsh_P_sat __builtin_HEXAGON_A2_vabshsat /* ========================================================================== Assembly Syntax: Rdd32=vabsw(Rss32) C Intrinsic Prototype: Word64 Q6_P_vabsw_P(Word64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vabsw_P __builtin_HEXAGON_A2_vabsw /* ========================================================================== Assembly Syntax: Rdd32=vabsw(Rss32):sat C Intrinsic Prototype: Word64 Q6_P_vabsw_P_sat(Word64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vabsw_P_sat __builtin_HEXAGON_A2_vabswsat /* ========================================================================== Assembly Syntax: Rdd32=vaddb(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_vaddb_PP(Word64 Rss, Word64 Rtt) Instruction Type: MAPPING Execution Slots: SLOT0123 ========================================================================== */ #define Q6_P_vaddb_PP __builtin_HEXAGON_A2_vaddb_map /* ========================================================================== Assembly Syntax: Rdd32=vaddh(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_vaddh_PP(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vaddh_PP __builtin_HEXAGON_A2_vaddh /* ========================================================================== Assembly Syntax: Rdd32=vaddh(Rss32,Rtt32):sat C Intrinsic Prototype: Word64 Q6_P_vaddh_PP_sat(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vaddh_PP_sat __builtin_HEXAGON_A2_vaddhs /* ========================================================================== Assembly Syntax: Rdd32=vaddub(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_vaddub_PP(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vaddub_PP __builtin_HEXAGON_A2_vaddub /* ========================================================================== Assembly Syntax: Rdd32=vaddub(Rss32,Rtt32):sat C Intrinsic Prototype: Word64 Q6_P_vaddub_PP_sat(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vaddub_PP_sat __builtin_HEXAGON_A2_vaddubs /* ========================================================================== Assembly Syntax: Rdd32=vadduh(Rss32,Rtt32):sat C Intrinsic Prototype: Word64 Q6_P_vadduh_PP_sat(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vadduh_PP_sat __builtin_HEXAGON_A2_vadduhs /* ========================================================================== Assembly Syntax: Rdd32=vaddw(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_vaddw_PP(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vaddw_PP __builtin_HEXAGON_A2_vaddw /* ========================================================================== Assembly Syntax: Rdd32=vaddw(Rss32,Rtt32):sat C Intrinsic Prototype: Word64 Q6_P_vaddw_PP_sat(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vaddw_PP_sat __builtin_HEXAGON_A2_vaddws /* ========================================================================== Assembly Syntax: Rdd32=vavgh(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_vavgh_PP(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vavgh_PP __builtin_HEXAGON_A2_vavgh /* ========================================================================== Assembly Syntax: Rdd32=vavgh(Rss32,Rtt32):crnd C Intrinsic Prototype: Word64 Q6_P_vavgh_PP_crnd(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vavgh_PP_crnd __builtin_HEXAGON_A2_vavghcr /* ========================================================================== Assembly Syntax: Rdd32=vavgh(Rss32,Rtt32):rnd C Intrinsic Prototype: Word64 Q6_P_vavgh_PP_rnd(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vavgh_PP_rnd __builtin_HEXAGON_A2_vavghr /* ========================================================================== Assembly Syntax: Rdd32=vavgub(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_vavgub_PP(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vavgub_PP __builtin_HEXAGON_A2_vavgub /* ========================================================================== Assembly Syntax: Rdd32=vavgub(Rss32,Rtt32):rnd C Intrinsic Prototype: Word64 Q6_P_vavgub_PP_rnd(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vavgub_PP_rnd __builtin_HEXAGON_A2_vavgubr /* ========================================================================== Assembly Syntax: Rdd32=vavguh(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_vavguh_PP(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vavguh_PP __builtin_HEXAGON_A2_vavguh /* ========================================================================== Assembly Syntax: Rdd32=vavguh(Rss32,Rtt32):rnd C Intrinsic Prototype: Word64 Q6_P_vavguh_PP_rnd(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vavguh_PP_rnd __builtin_HEXAGON_A2_vavguhr /* ========================================================================== Assembly Syntax: Rdd32=vavguw(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_vavguw_PP(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vavguw_PP __builtin_HEXAGON_A2_vavguw /* ========================================================================== Assembly Syntax: Rdd32=vavguw(Rss32,Rtt32):rnd C Intrinsic Prototype: Word64 Q6_P_vavguw_PP_rnd(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vavguw_PP_rnd __builtin_HEXAGON_A2_vavguwr /* ========================================================================== Assembly Syntax: Rdd32=vavgw(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_vavgw_PP(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vavgw_PP __builtin_HEXAGON_A2_vavgw /* ========================================================================== Assembly Syntax: Rdd32=vavgw(Rss32,Rtt32):crnd C Intrinsic Prototype: Word64 Q6_P_vavgw_PP_crnd(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vavgw_PP_crnd __builtin_HEXAGON_A2_vavgwcr /* ========================================================================== Assembly Syntax: Rdd32=vavgw(Rss32,Rtt32):rnd C Intrinsic Prototype: Word64 Q6_P_vavgw_PP_rnd(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vavgw_PP_rnd __builtin_HEXAGON_A2_vavgwr /* ========================================================================== Assembly Syntax: Pd4=vcmpb.eq(Rss32,Rtt32) C Intrinsic Prototype: Byte Q6_p_vcmpb_eq_PP(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_vcmpb_eq_PP __builtin_HEXAGON_A2_vcmpbeq /* ========================================================================== Assembly Syntax: Pd4=vcmpb.gtu(Rss32,Rtt32) C Intrinsic Prototype: Byte Q6_p_vcmpb_gtu_PP(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_vcmpb_gtu_PP __builtin_HEXAGON_A2_vcmpbgtu /* ========================================================================== Assembly Syntax: Pd4=vcmph.eq(Rss32,Rtt32) C Intrinsic Prototype: Byte Q6_p_vcmph_eq_PP(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_vcmph_eq_PP __builtin_HEXAGON_A2_vcmpheq /* ========================================================================== Assembly Syntax: Pd4=vcmph.gt(Rss32,Rtt32) C Intrinsic Prototype: Byte Q6_p_vcmph_gt_PP(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_vcmph_gt_PP __builtin_HEXAGON_A2_vcmphgt /* ========================================================================== Assembly Syntax: Pd4=vcmph.gtu(Rss32,Rtt32) C Intrinsic Prototype: Byte Q6_p_vcmph_gtu_PP(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_vcmph_gtu_PP __builtin_HEXAGON_A2_vcmphgtu /* ========================================================================== Assembly Syntax: Pd4=vcmpw.eq(Rss32,Rtt32) C Intrinsic Prototype: Byte Q6_p_vcmpw_eq_PP(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_vcmpw_eq_PP __builtin_HEXAGON_A2_vcmpweq /* ========================================================================== Assembly Syntax: Pd4=vcmpw.gt(Rss32,Rtt32) C Intrinsic Prototype: Byte Q6_p_vcmpw_gt_PP(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_vcmpw_gt_PP __builtin_HEXAGON_A2_vcmpwgt /* ========================================================================== Assembly Syntax: Pd4=vcmpw.gtu(Rss32,Rtt32) C Intrinsic Prototype: Byte Q6_p_vcmpw_gtu_PP(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_vcmpw_gtu_PP __builtin_HEXAGON_A2_vcmpwgtu /* ========================================================================== Assembly Syntax: Rdd32=vconj(Rss32):sat C Intrinsic Prototype: Word64 Q6_P_vconj_P_sat(Word64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vconj_P_sat __builtin_HEXAGON_A2_vconj /* ========================================================================== Assembly Syntax: Rdd32=vmaxb(Rtt32,Rss32) C Intrinsic Prototype: Word64 Q6_P_vmaxb_PP(Word64 Rtt, Word64 Rss) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmaxb_PP __builtin_HEXAGON_A2_vmaxb /* ========================================================================== Assembly Syntax: Rdd32=vmaxh(Rtt32,Rss32) C Intrinsic Prototype: Word64 Q6_P_vmaxh_PP(Word64 Rtt, Word64 Rss) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmaxh_PP __builtin_HEXAGON_A2_vmaxh /* ========================================================================== Assembly Syntax: Rdd32=vmaxub(Rtt32,Rss32) C Intrinsic Prototype: Word64 Q6_P_vmaxub_PP(Word64 Rtt, Word64 Rss) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmaxub_PP __builtin_HEXAGON_A2_vmaxub /* ========================================================================== Assembly Syntax: Rdd32=vmaxuh(Rtt32,Rss32) C Intrinsic Prototype: Word64 Q6_P_vmaxuh_PP(Word64 Rtt, Word64 Rss) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmaxuh_PP __builtin_HEXAGON_A2_vmaxuh /* ========================================================================== Assembly Syntax: Rdd32=vmaxuw(Rtt32,Rss32) C Intrinsic Prototype: Word64 Q6_P_vmaxuw_PP(Word64 Rtt, Word64 Rss) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmaxuw_PP __builtin_HEXAGON_A2_vmaxuw /* ========================================================================== Assembly Syntax: Rdd32=vmaxw(Rtt32,Rss32) C Intrinsic Prototype: Word64 Q6_P_vmaxw_PP(Word64 Rtt, Word64 Rss) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmaxw_PP __builtin_HEXAGON_A2_vmaxw /* ========================================================================== Assembly Syntax: Rdd32=vminb(Rtt32,Rss32) C Intrinsic Prototype: Word64 Q6_P_vminb_PP(Word64 Rtt, Word64 Rss) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vminb_PP __builtin_HEXAGON_A2_vminb /* ========================================================================== Assembly Syntax: Rdd32=vminh(Rtt32,Rss32) C Intrinsic Prototype: Word64 Q6_P_vminh_PP(Word64 Rtt, Word64 Rss) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vminh_PP __builtin_HEXAGON_A2_vminh /* ========================================================================== Assembly Syntax: Rdd32=vminub(Rtt32,Rss32) C Intrinsic Prototype: Word64 Q6_P_vminub_PP(Word64 Rtt, Word64 Rss) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vminub_PP __builtin_HEXAGON_A2_vminub /* ========================================================================== Assembly Syntax: Rdd32=vminuh(Rtt32,Rss32) C Intrinsic Prototype: Word64 Q6_P_vminuh_PP(Word64 Rtt, Word64 Rss) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vminuh_PP __builtin_HEXAGON_A2_vminuh /* ========================================================================== Assembly Syntax: Rdd32=vminuw(Rtt32,Rss32) C Intrinsic Prototype: Word64 Q6_P_vminuw_PP(Word64 Rtt, Word64 Rss) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vminuw_PP __builtin_HEXAGON_A2_vminuw /* ========================================================================== Assembly Syntax: Rdd32=vminw(Rtt32,Rss32) C Intrinsic Prototype: Word64 Q6_P_vminw_PP(Word64 Rtt, Word64 Rss) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vminw_PP __builtin_HEXAGON_A2_vminw /* ========================================================================== Assembly Syntax: Rdd32=vnavgh(Rtt32,Rss32) C Intrinsic Prototype: Word64 Q6_P_vnavgh_PP(Word64 Rtt, Word64 Rss) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vnavgh_PP __builtin_HEXAGON_A2_vnavgh /* ========================================================================== Assembly Syntax: Rdd32=vnavgh(Rtt32,Rss32):crnd:sat C Intrinsic Prototype: Word64 Q6_P_vnavgh_PP_crnd_sat(Word64 Rtt, Word64 Rss) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vnavgh_PP_crnd_sat __builtin_HEXAGON_A2_vnavghcr /* ========================================================================== Assembly Syntax: Rdd32=vnavgh(Rtt32,Rss32):rnd:sat C Intrinsic Prototype: Word64 Q6_P_vnavgh_PP_rnd_sat(Word64 Rtt, Word64 Rss) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vnavgh_PP_rnd_sat __builtin_HEXAGON_A2_vnavghr /* ========================================================================== Assembly Syntax: Rdd32=vnavgw(Rtt32,Rss32) C Intrinsic Prototype: Word64 Q6_P_vnavgw_PP(Word64 Rtt, Word64 Rss) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vnavgw_PP __builtin_HEXAGON_A2_vnavgw /* ========================================================================== Assembly Syntax: Rdd32=vnavgw(Rtt32,Rss32):crnd:sat C Intrinsic Prototype: Word64 Q6_P_vnavgw_PP_crnd_sat(Word64 Rtt, Word64 Rss) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vnavgw_PP_crnd_sat __builtin_HEXAGON_A2_vnavgwcr /* ========================================================================== Assembly Syntax: Rdd32=vnavgw(Rtt32,Rss32):rnd:sat C Intrinsic Prototype: Word64 Q6_P_vnavgw_PP_rnd_sat(Word64 Rtt, Word64 Rss) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vnavgw_PP_rnd_sat __builtin_HEXAGON_A2_vnavgwr /* ========================================================================== Assembly Syntax: Rdd32=vraddub(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_vraddub_PP(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vraddub_PP __builtin_HEXAGON_A2_vraddub /* ========================================================================== Assembly Syntax: Rxx32+=vraddub(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_vraddubacc_PP(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vraddubacc_PP __builtin_HEXAGON_A2_vraddub_acc /* ========================================================================== Assembly Syntax: Rdd32=vrsadub(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_vrsadub_PP(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vrsadub_PP __builtin_HEXAGON_A2_vrsadub /* ========================================================================== Assembly Syntax: Rxx32+=vrsadub(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_vrsadubacc_PP(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vrsadubacc_PP __builtin_HEXAGON_A2_vrsadub_acc /* ========================================================================== Assembly Syntax: Rdd32=vsubb(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_vsubb_PP(Word64 Rss, Word64 Rtt) Instruction Type: MAPPING Execution Slots: SLOT0123 ========================================================================== */ #define Q6_P_vsubb_PP __builtin_HEXAGON_A2_vsubb_map /* ========================================================================== Assembly Syntax: Rdd32=vsubh(Rtt32,Rss32) C Intrinsic Prototype: Word64 Q6_P_vsubh_PP(Word64 Rtt, Word64 Rss) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vsubh_PP __builtin_HEXAGON_A2_vsubh /* ========================================================================== Assembly Syntax: Rdd32=vsubh(Rtt32,Rss32):sat C Intrinsic Prototype: Word64 Q6_P_vsubh_PP_sat(Word64 Rtt, Word64 Rss) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vsubh_PP_sat __builtin_HEXAGON_A2_vsubhs /* ========================================================================== Assembly Syntax: Rdd32=vsubub(Rtt32,Rss32) C Intrinsic Prototype: Word64 Q6_P_vsubub_PP(Word64 Rtt, Word64 Rss) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vsubub_PP __builtin_HEXAGON_A2_vsubub /* ========================================================================== Assembly Syntax: Rdd32=vsubub(Rtt32,Rss32):sat C Intrinsic Prototype: Word64 Q6_P_vsubub_PP_sat(Word64 Rtt, Word64 Rss) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vsubub_PP_sat __builtin_HEXAGON_A2_vsububs /* ========================================================================== Assembly Syntax: Rdd32=vsubuh(Rtt32,Rss32):sat C Intrinsic Prototype: Word64 Q6_P_vsubuh_PP_sat(Word64 Rtt, Word64 Rss) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vsubuh_PP_sat __builtin_HEXAGON_A2_vsubuhs /* ========================================================================== Assembly Syntax: Rdd32=vsubw(Rtt32,Rss32) C Intrinsic Prototype: Word64 Q6_P_vsubw_PP(Word64 Rtt, Word64 Rss) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vsubw_PP __builtin_HEXAGON_A2_vsubw /* ========================================================================== Assembly Syntax: Rdd32=vsubw(Rtt32,Rss32):sat C Intrinsic Prototype: Word64 Q6_P_vsubw_PP_sat(Word64 Rtt, Word64 Rss) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vsubw_PP_sat __builtin_HEXAGON_A2_vsubws /* ========================================================================== Assembly Syntax: Rd32=xor(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_xor_RR(Word32 Rs, Word32 Rt) Instruction Type: ALU32_3op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_xor_RR __builtin_HEXAGON_A2_xor /* ========================================================================== Assembly Syntax: Rdd32=xor(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_xor_PP(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_xor_PP __builtin_HEXAGON_A2_xorp /* ========================================================================== Assembly Syntax: Rd32=zxtb(Rs32) C Intrinsic Prototype: Word32 Q6_R_zxtb_R(Word32 Rs) Instruction Type: ALU32_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_zxtb_R __builtin_HEXAGON_A2_zxtb /* ========================================================================== Assembly Syntax: Rd32=zxth(Rs32) C Intrinsic Prototype: Word32 Q6_R_zxth_R(Word32 Rs) Instruction Type: ALU32_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_zxth_R __builtin_HEXAGON_A2_zxth /* ========================================================================== Assembly Syntax: Rd32=and(Rt32,~Rs32) C Intrinsic Prototype: Word32 Q6_R_and_RnR(Word32 Rt, Word32 Rs) Instruction Type: ALU32_3op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_and_RnR __builtin_HEXAGON_A4_andn /* ========================================================================== Assembly Syntax: Rdd32=and(Rtt32,~Rss32) C Intrinsic Prototype: Word64 Q6_P_and_PnP(Word64 Rtt, Word64 Rss) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_and_PnP __builtin_HEXAGON_A4_andnp /* ========================================================================== Assembly Syntax: Rdd32=bitsplit(Rs32,Rt32) C Intrinsic Prototype: Word64 Q6_P_bitsplit_RR(Word32 Rs, Word32 Rt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_bitsplit_RR __builtin_HEXAGON_A4_bitsplit /* ========================================================================== Assembly Syntax: Rdd32=bitsplit(Rs32,#u5) C Intrinsic Prototype: Word64 Q6_P_bitsplit_RI(Word32 Rs, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_bitsplit_RI __builtin_HEXAGON_A4_bitspliti /* ========================================================================== Assembly Syntax: Pd4=boundscheck(Rs32,Rtt32) C Intrinsic Prototype: Byte Q6_p_boundscheck_RP(Word32 Rs, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT0123 ========================================================================== */ #define Q6_p_boundscheck_RP __builtin_HEXAGON_A4_boundscheck /* ========================================================================== Assembly Syntax: Pd4=cmpb.eq(Rs32,Rt32) C Intrinsic Prototype: Byte Q6_p_cmpb_eq_RR(Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_cmpb_eq_RR __builtin_HEXAGON_A4_cmpbeq /* ========================================================================== Assembly Syntax: Pd4=cmpb.eq(Rs32,#u8) C Intrinsic Prototype: Byte Q6_p_cmpb_eq_RI(Word32 Rs, Word32 Iu8) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_cmpb_eq_RI __builtin_HEXAGON_A4_cmpbeqi /* ========================================================================== Assembly Syntax: Pd4=cmpb.gt(Rs32,Rt32) C Intrinsic Prototype: Byte Q6_p_cmpb_gt_RR(Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_cmpb_gt_RR __builtin_HEXAGON_A4_cmpbgt /* ========================================================================== Assembly Syntax: Pd4=cmpb.gt(Rs32,#s8) C Intrinsic Prototype: Byte Q6_p_cmpb_gt_RI(Word32 Rs, Word32 Is8) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_cmpb_gt_RI __builtin_HEXAGON_A4_cmpbgti /* ========================================================================== Assembly Syntax: Pd4=cmpb.gtu(Rs32,Rt32) C Intrinsic Prototype: Byte Q6_p_cmpb_gtu_RR(Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_cmpb_gtu_RR __builtin_HEXAGON_A4_cmpbgtu /* ========================================================================== Assembly Syntax: Pd4=cmpb.gtu(Rs32,#u7) C Intrinsic Prototype: Byte Q6_p_cmpb_gtu_RI(Word32 Rs, Word32 Iu7) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_cmpb_gtu_RI __builtin_HEXAGON_A4_cmpbgtui /* ========================================================================== Assembly Syntax: Pd4=cmph.eq(Rs32,Rt32) C Intrinsic Prototype: Byte Q6_p_cmph_eq_RR(Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_cmph_eq_RR __builtin_HEXAGON_A4_cmpheq /* ========================================================================== Assembly Syntax: Pd4=cmph.eq(Rs32,#s8) C Intrinsic Prototype: Byte Q6_p_cmph_eq_RI(Word32 Rs, Word32 Is8) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_cmph_eq_RI __builtin_HEXAGON_A4_cmpheqi /* ========================================================================== Assembly Syntax: Pd4=cmph.gt(Rs32,Rt32) C Intrinsic Prototype: Byte Q6_p_cmph_gt_RR(Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_cmph_gt_RR __builtin_HEXAGON_A4_cmphgt /* ========================================================================== Assembly Syntax: Pd4=cmph.gt(Rs32,#s8) C Intrinsic Prototype: Byte Q6_p_cmph_gt_RI(Word32 Rs, Word32 Is8) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_cmph_gt_RI __builtin_HEXAGON_A4_cmphgti /* ========================================================================== Assembly Syntax: Pd4=cmph.gtu(Rs32,Rt32) C Intrinsic Prototype: Byte Q6_p_cmph_gtu_RR(Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_cmph_gtu_RR __builtin_HEXAGON_A4_cmphgtu /* ========================================================================== Assembly Syntax: Pd4=cmph.gtu(Rs32,#u7) C Intrinsic Prototype: Byte Q6_p_cmph_gtu_RI(Word32 Rs, Word32 Iu7) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_cmph_gtu_RI __builtin_HEXAGON_A4_cmphgtui /* ========================================================================== Assembly Syntax: Rdd32=combine(#s8,Rs32) C Intrinsic Prototype: Word64 Q6_P_combine_IR(Word32 Is8, Word32 Rs) Instruction Type: ALU32_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_P_combine_IR __builtin_HEXAGON_A4_combineir /* ========================================================================== Assembly Syntax: Rdd32=combine(Rs32,#s8) C Intrinsic Prototype: Word64 Q6_P_combine_RI(Word32 Rs, Word32 Is8) Instruction Type: ALU32_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_P_combine_RI __builtin_HEXAGON_A4_combineri /* ========================================================================== Assembly Syntax: Rd32=cround(Rs32,#u5) C Intrinsic Prototype: Word32 Q6_R_cround_RI(Word32 Rs, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_cround_RI __builtin_HEXAGON_A4_cround_ri /* ========================================================================== Assembly Syntax: Rd32=cround(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_cround_RR(Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_cround_RR __builtin_HEXAGON_A4_cround_rr /* ========================================================================== Assembly Syntax: Rd32=modwrap(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_modwrap_RR(Word32 Rs, Word32 Rt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_modwrap_RR __builtin_HEXAGON_A4_modwrapu /* ========================================================================== Assembly Syntax: Rd32=or(Rt32,~Rs32) C Intrinsic Prototype: Word32 Q6_R_or_RnR(Word32 Rt, Word32 Rs) Instruction Type: ALU32_3op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_or_RnR __builtin_HEXAGON_A4_orn /* ========================================================================== Assembly Syntax: Rdd32=or(Rtt32,~Rss32) C Intrinsic Prototype: Word64 Q6_P_or_PnP(Word64 Rtt, Word64 Rss) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_or_PnP __builtin_HEXAGON_A4_ornp /* ========================================================================== Assembly Syntax: Rd32=cmp.eq(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_cmp_eq_RR(Word32 Rs, Word32 Rt) Instruction Type: ALU32_3op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_cmp_eq_RR __builtin_HEXAGON_A4_rcmpeq /* ========================================================================== Assembly Syntax: Rd32=cmp.eq(Rs32,#s8) C Intrinsic Prototype: Word32 Q6_R_cmp_eq_RI(Word32 Rs, Word32 Is8) Instruction Type: ALU32_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_cmp_eq_RI __builtin_HEXAGON_A4_rcmpeqi /* ========================================================================== Assembly Syntax: Rd32=!cmp.eq(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_not_cmp_eq_RR(Word32 Rs, Word32 Rt) Instruction Type: ALU32_3op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_not_cmp_eq_RR __builtin_HEXAGON_A4_rcmpneq /* ========================================================================== Assembly Syntax: Rd32=!cmp.eq(Rs32,#s8) C Intrinsic Prototype: Word32 Q6_R_not_cmp_eq_RI(Word32 Rs, Word32 Is8) Instruction Type: ALU32_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_not_cmp_eq_RI __builtin_HEXAGON_A4_rcmpneqi /* ========================================================================== Assembly Syntax: Rd32=round(Rs32,#u5) C Intrinsic Prototype: Word32 Q6_R_round_RI(Word32 Rs, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_round_RI __builtin_HEXAGON_A4_round_ri /* ========================================================================== Assembly Syntax: Rd32=round(Rs32,#u5):sat C Intrinsic Prototype: Word32 Q6_R_round_RI_sat(Word32 Rs, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_round_RI_sat __builtin_HEXAGON_A4_round_ri_sat /* ========================================================================== Assembly Syntax: Rd32=round(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_round_RR(Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_round_RR __builtin_HEXAGON_A4_round_rr /* ========================================================================== Assembly Syntax: Rd32=round(Rs32,Rt32):sat C Intrinsic Prototype: Word32 Q6_R_round_RR_sat(Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_round_RR_sat __builtin_HEXAGON_A4_round_rr_sat /* ========================================================================== Assembly Syntax: Pd4=tlbmatch(Rss32,Rt32) C Intrinsic Prototype: Byte Q6_p_tlbmatch_PR(Word64 Rss, Word32 Rt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_tlbmatch_PR __builtin_HEXAGON_A4_tlbmatch /* ========================================================================== Assembly Syntax: Pd4=any8(vcmpb.eq(Rss32,Rtt32)) C Intrinsic Prototype: Byte Q6_p_any8_vcmpb_eq_PP(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_any8_vcmpb_eq_PP __builtin_HEXAGON_A4_vcmpbeq_any /* ========================================================================== Assembly Syntax: Pd4=vcmpb.eq(Rss32,#u8) C Intrinsic Prototype: Byte Q6_p_vcmpb_eq_PI(Word64 Rss, Word32 Iu8) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_vcmpb_eq_PI __builtin_HEXAGON_A4_vcmpbeqi /* ========================================================================== Assembly Syntax: Pd4=vcmpb.gt(Rss32,Rtt32) C Intrinsic Prototype: Byte Q6_p_vcmpb_gt_PP(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_vcmpb_gt_PP __builtin_HEXAGON_A4_vcmpbgt /* ========================================================================== Assembly Syntax: Pd4=vcmpb.gt(Rss32,#s8) C Intrinsic Prototype: Byte Q6_p_vcmpb_gt_PI(Word64 Rss, Word32 Is8) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_vcmpb_gt_PI __builtin_HEXAGON_A4_vcmpbgti /* ========================================================================== Assembly Syntax: Pd4=vcmpb.gtu(Rss32,#u7) C Intrinsic Prototype: Byte Q6_p_vcmpb_gtu_PI(Word64 Rss, Word32 Iu7) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_vcmpb_gtu_PI __builtin_HEXAGON_A4_vcmpbgtui /* ========================================================================== Assembly Syntax: Pd4=vcmph.eq(Rss32,#s8) C Intrinsic Prototype: Byte Q6_p_vcmph_eq_PI(Word64 Rss, Word32 Is8) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_vcmph_eq_PI __builtin_HEXAGON_A4_vcmpheqi /* ========================================================================== Assembly Syntax: Pd4=vcmph.gt(Rss32,#s8) C Intrinsic Prototype: Byte Q6_p_vcmph_gt_PI(Word64 Rss, Word32 Is8) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_vcmph_gt_PI __builtin_HEXAGON_A4_vcmphgti /* ========================================================================== Assembly Syntax: Pd4=vcmph.gtu(Rss32,#u7) C Intrinsic Prototype: Byte Q6_p_vcmph_gtu_PI(Word64 Rss, Word32 Iu7) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_vcmph_gtu_PI __builtin_HEXAGON_A4_vcmphgtui /* ========================================================================== Assembly Syntax: Pd4=vcmpw.eq(Rss32,#s8) C Intrinsic Prototype: Byte Q6_p_vcmpw_eq_PI(Word64 Rss, Word32 Is8) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_vcmpw_eq_PI __builtin_HEXAGON_A4_vcmpweqi /* ========================================================================== Assembly Syntax: Pd4=vcmpw.gt(Rss32,#s8) C Intrinsic Prototype: Byte Q6_p_vcmpw_gt_PI(Word64 Rss, Word32 Is8) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_vcmpw_gt_PI __builtin_HEXAGON_A4_vcmpwgti /* ========================================================================== Assembly Syntax: Pd4=vcmpw.gtu(Rss32,#u7) C Intrinsic Prototype: Byte Q6_p_vcmpw_gtu_PI(Word64 Rss, Word32 Iu7) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_vcmpw_gtu_PI __builtin_HEXAGON_A4_vcmpwgtui /* ========================================================================== Assembly Syntax: Rxx32=vrmaxh(Rss32,Ru32) C Intrinsic Prototype: Word64 Q6_P_vrmaxh_PR(Word64 Rxx, Word64 Rss, Word32 Ru) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vrmaxh_PR __builtin_HEXAGON_A4_vrmaxh /* ========================================================================== Assembly Syntax: Rxx32=vrmaxuh(Rss32,Ru32) C Intrinsic Prototype: Word64 Q6_P_vrmaxuh_PR(Word64 Rxx, Word64 Rss, Word32 Ru) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vrmaxuh_PR __builtin_HEXAGON_A4_vrmaxuh /* ========================================================================== Assembly Syntax: Rxx32=vrmaxuw(Rss32,Ru32) C Intrinsic Prototype: Word64 Q6_P_vrmaxuw_PR(Word64 Rxx, Word64 Rss, Word32 Ru) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vrmaxuw_PR __builtin_HEXAGON_A4_vrmaxuw /* ========================================================================== Assembly Syntax: Rxx32=vrmaxw(Rss32,Ru32) C Intrinsic Prototype: Word64 Q6_P_vrmaxw_PR(Word64 Rxx, Word64 Rss, Word32 Ru) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vrmaxw_PR __builtin_HEXAGON_A4_vrmaxw /* ========================================================================== Assembly Syntax: Rxx32=vrminh(Rss32,Ru32) C Intrinsic Prototype: Word64 Q6_P_vrminh_PR(Word64 Rxx, Word64 Rss, Word32 Ru) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vrminh_PR __builtin_HEXAGON_A4_vrminh /* ========================================================================== Assembly Syntax: Rxx32=vrminuh(Rss32,Ru32) C Intrinsic Prototype: Word64 Q6_P_vrminuh_PR(Word64 Rxx, Word64 Rss, Word32 Ru) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vrminuh_PR __builtin_HEXAGON_A4_vrminuh /* ========================================================================== Assembly Syntax: Rxx32=vrminuw(Rss32,Ru32) C Intrinsic Prototype: Word64 Q6_P_vrminuw_PR(Word64 Rxx, Word64 Rss, Word32 Ru) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vrminuw_PR __builtin_HEXAGON_A4_vrminuw /* ========================================================================== Assembly Syntax: Rxx32=vrminw(Rss32,Ru32) C Intrinsic Prototype: Word64 Q6_P_vrminw_PR(Word64 Rxx, Word64 Rss, Word32 Ru) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vrminw_PR __builtin_HEXAGON_A4_vrminw /* ========================================================================== Assembly Syntax: Rd32=vaddhub(Rss32,Rtt32):sat C Intrinsic Prototype: Word32 Q6_R_vaddhub_PP_sat(Word64 Rss, Word64 Rtt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_vaddhub_PP_sat __builtin_HEXAGON_A5_vaddhubs /* ========================================================================== Assembly Syntax: Pd4=all8(Ps4) C Intrinsic Prototype: Byte Q6_p_all8_p(Byte Ps) Instruction Type: CR Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_all8_p __builtin_HEXAGON_C2_all8 /* ========================================================================== Assembly Syntax: Pd4=and(Pt4,Ps4) C Intrinsic Prototype: Byte Q6_p_and_pp(Byte Pt, Byte Ps) Instruction Type: CR Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_and_pp __builtin_HEXAGON_C2_and /* ========================================================================== Assembly Syntax: Pd4=and(Pt4,!Ps4) C Intrinsic Prototype: Byte Q6_p_and_pnp(Byte Pt, Byte Ps) Instruction Type: CR Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_and_pnp __builtin_HEXAGON_C2_andn /* ========================================================================== Assembly Syntax: Pd4=any8(Ps4) C Intrinsic Prototype: Byte Q6_p_any8_p(Byte Ps) Instruction Type: CR Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_any8_p __builtin_HEXAGON_C2_any8 /* ========================================================================== Assembly Syntax: Pd4=bitsclr(Rs32,Rt32) C Intrinsic Prototype: Byte Q6_p_bitsclr_RR(Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_bitsclr_RR __builtin_HEXAGON_C2_bitsclr /* ========================================================================== Assembly Syntax: Pd4=bitsclr(Rs32,#u6) C Intrinsic Prototype: Byte Q6_p_bitsclr_RI(Word32 Rs, Word32 Iu6) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_bitsclr_RI __builtin_HEXAGON_C2_bitsclri /* ========================================================================== Assembly Syntax: Pd4=bitsset(Rs32,Rt32) C Intrinsic Prototype: Byte Q6_p_bitsset_RR(Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_bitsset_RR __builtin_HEXAGON_C2_bitsset /* ========================================================================== Assembly Syntax: Pd4=cmp.eq(Rs32,Rt32) C Intrinsic Prototype: Byte Q6_p_cmp_eq_RR(Word32 Rs, Word32 Rt) Instruction Type: ALU32_3op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_p_cmp_eq_RR __builtin_HEXAGON_C2_cmpeq /* ========================================================================== Assembly Syntax: Pd4=cmp.eq(Rs32,#s10) C Intrinsic Prototype: Byte Q6_p_cmp_eq_RI(Word32 Rs, Word32 Is10) Instruction Type: ALU32_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_p_cmp_eq_RI __builtin_HEXAGON_C2_cmpeqi /* ========================================================================== Assembly Syntax: Pd4=cmp.eq(Rss32,Rtt32) C Intrinsic Prototype: Byte Q6_p_cmp_eq_PP(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_cmp_eq_PP __builtin_HEXAGON_C2_cmpeqp /* ========================================================================== Assembly Syntax: Pd4=cmp.ge(Rs32,#s8) C Intrinsic Prototype: Byte Q6_p_cmp_ge_RI(Word32 Rs, Word32 Is8) Instruction Type: ALU32_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_p_cmp_ge_RI __builtin_HEXAGON_C2_cmpgei /* ========================================================================== Assembly Syntax: Pd4=cmp.geu(Rs32,#u8) C Intrinsic Prototype: Byte Q6_p_cmp_geu_RI(Word32 Rs, Word32 Iu8) Instruction Type: ALU32_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_p_cmp_geu_RI __builtin_HEXAGON_C2_cmpgeui /* ========================================================================== Assembly Syntax: Pd4=cmp.gt(Rs32,Rt32) C Intrinsic Prototype: Byte Q6_p_cmp_gt_RR(Word32 Rs, Word32 Rt) Instruction Type: ALU32_3op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_p_cmp_gt_RR __builtin_HEXAGON_C2_cmpgt /* ========================================================================== Assembly Syntax: Pd4=cmp.gt(Rs32,#s10) C Intrinsic Prototype: Byte Q6_p_cmp_gt_RI(Word32 Rs, Word32 Is10) Instruction Type: ALU32_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_p_cmp_gt_RI __builtin_HEXAGON_C2_cmpgti /* ========================================================================== Assembly Syntax: Pd4=cmp.gt(Rss32,Rtt32) C Intrinsic Prototype: Byte Q6_p_cmp_gt_PP(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_cmp_gt_PP __builtin_HEXAGON_C2_cmpgtp /* ========================================================================== Assembly Syntax: Pd4=cmp.gtu(Rs32,Rt32) C Intrinsic Prototype: Byte Q6_p_cmp_gtu_RR(Word32 Rs, Word32 Rt) Instruction Type: ALU32_3op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_p_cmp_gtu_RR __builtin_HEXAGON_C2_cmpgtu /* ========================================================================== Assembly Syntax: Pd4=cmp.gtu(Rs32,#u9) C Intrinsic Prototype: Byte Q6_p_cmp_gtu_RI(Word32 Rs, Word32 Iu9) Instruction Type: ALU32_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_p_cmp_gtu_RI __builtin_HEXAGON_C2_cmpgtui /* ========================================================================== Assembly Syntax: Pd4=cmp.gtu(Rss32,Rtt32) C Intrinsic Prototype: Byte Q6_p_cmp_gtu_PP(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_cmp_gtu_PP __builtin_HEXAGON_C2_cmpgtup /* ========================================================================== Assembly Syntax: Pd4=cmp.lt(Rs32,Rt32) C Intrinsic Prototype: Byte Q6_p_cmp_lt_RR(Word32 Rs, Word32 Rt) Instruction Type: ALU32_3op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_p_cmp_lt_RR __builtin_HEXAGON_C2_cmplt /* ========================================================================== Assembly Syntax: Pd4=cmp.ltu(Rs32,Rt32) C Intrinsic Prototype: Byte Q6_p_cmp_ltu_RR(Word32 Rs, Word32 Rt) Instruction Type: ALU32_3op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_p_cmp_ltu_RR __builtin_HEXAGON_C2_cmpltu /* ========================================================================== Assembly Syntax: Rdd32=mask(Pt4) C Intrinsic Prototype: Word64 Q6_P_mask_p(Byte Pt) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mask_p __builtin_HEXAGON_C2_mask /* ========================================================================== Assembly Syntax: Rd32=mux(Pu4,Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_mux_pRR(Byte Pu, Word32 Rs, Word32 Rt) Instruction Type: ALU32_3op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_mux_pRR __builtin_HEXAGON_C2_mux /* ========================================================================== Assembly Syntax: Rd32=mux(Pu4,#s8,#S8) C Intrinsic Prototype: Word32 Q6_R_mux_pII(Byte Pu, Word32 Is8, Word32 IS8) Instruction Type: ALU32_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_mux_pII __builtin_HEXAGON_C2_muxii /* ========================================================================== Assembly Syntax: Rd32=mux(Pu4,Rs32,#s8) C Intrinsic Prototype: Word32 Q6_R_mux_pRI(Byte Pu, Word32 Rs, Word32 Is8) Instruction Type: ALU32_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_mux_pRI __builtin_HEXAGON_C2_muxir /* ========================================================================== Assembly Syntax: Rd32=mux(Pu4,#s8,Rs32) C Intrinsic Prototype: Word32 Q6_R_mux_pIR(Byte Pu, Word32 Is8, Word32 Rs) Instruction Type: ALU32_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_mux_pIR __builtin_HEXAGON_C2_muxri /* ========================================================================== Assembly Syntax: Pd4=not(Ps4) C Intrinsic Prototype: Byte Q6_p_not_p(Byte Ps) Instruction Type: CR Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_not_p __builtin_HEXAGON_C2_not /* ========================================================================== Assembly Syntax: Pd4=or(Pt4,Ps4) C Intrinsic Prototype: Byte Q6_p_or_pp(Byte Pt, Byte Ps) Instruction Type: CR Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_or_pp __builtin_HEXAGON_C2_or /* ========================================================================== Assembly Syntax: Pd4=or(Pt4,!Ps4) C Intrinsic Prototype: Byte Q6_p_or_pnp(Byte Pt, Byte Ps) Instruction Type: CR Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_or_pnp __builtin_HEXAGON_C2_orn /* ========================================================================== Assembly Syntax: Pd4=Ps4 C Intrinsic Prototype: Byte Q6_p_equals_p(Byte Ps) Instruction Type: MAPPING Execution Slots: SLOT0123 ========================================================================== */ #define Q6_p_equals_p __builtin_HEXAGON_C2_pxfer_map /* ========================================================================== Assembly Syntax: Rd32=Ps4 C Intrinsic Prototype: Word32 Q6_R_equals_p(Byte Ps) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_equals_p __builtin_HEXAGON_C2_tfrpr /* ========================================================================== Assembly Syntax: Pd4=Rs32 C Intrinsic Prototype: Byte Q6_p_equals_R(Word32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_equals_R __builtin_HEXAGON_C2_tfrrp /* ========================================================================== Assembly Syntax: Rd32=vitpack(Ps4,Pt4) C Intrinsic Prototype: Word32 Q6_R_vitpack_pp(Byte Ps, Byte Pt) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_vitpack_pp __builtin_HEXAGON_C2_vitpack /* ========================================================================== Assembly Syntax: Rdd32=vmux(Pu4,Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_vmux_pPP(Byte Pu, Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmux_pPP __builtin_HEXAGON_C2_vmux /* ========================================================================== Assembly Syntax: Pd4=xor(Ps4,Pt4) C Intrinsic Prototype: Byte Q6_p_xor_pp(Byte Ps, Byte Pt) Instruction Type: CR Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_xor_pp __builtin_HEXAGON_C2_xor /* ========================================================================== Assembly Syntax: Pd4=and(Ps4,and(Pt4,Pu4)) C Intrinsic Prototype: Byte Q6_p_and_and_ppp(Byte Ps, Byte Pt, Byte Pu) Instruction Type: CR Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_and_and_ppp __builtin_HEXAGON_C4_and_and /* ========================================================================== Assembly Syntax: Pd4=and(Ps4,and(Pt4,!Pu4)) C Intrinsic Prototype: Byte Q6_p_and_and_ppnp(Byte Ps, Byte Pt, Byte Pu) Instruction Type: CR Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_and_and_ppnp __builtin_HEXAGON_C4_and_andn /* ========================================================================== Assembly Syntax: Pd4=and(Ps4,or(Pt4,Pu4)) C Intrinsic Prototype: Byte Q6_p_and_or_ppp(Byte Ps, Byte Pt, Byte Pu) Instruction Type: CR Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_and_or_ppp __builtin_HEXAGON_C4_and_or /* ========================================================================== Assembly Syntax: Pd4=and(Ps4,or(Pt4,!Pu4)) C Intrinsic Prototype: Byte Q6_p_and_or_ppnp(Byte Ps, Byte Pt, Byte Pu) Instruction Type: CR Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_and_or_ppnp __builtin_HEXAGON_C4_and_orn /* ========================================================================== Assembly Syntax: Pd4=!cmp.gt(Rs32,Rt32) C Intrinsic Prototype: Byte Q6_p_not_cmp_gt_RR(Word32 Rs, Word32 Rt) Instruction Type: ALU32_3op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_p_not_cmp_gt_RR __builtin_HEXAGON_C4_cmplte /* ========================================================================== Assembly Syntax: Pd4=!cmp.gt(Rs32,#s10) C Intrinsic Prototype: Byte Q6_p_not_cmp_gt_RI(Word32 Rs, Word32 Is10) Instruction Type: ALU32_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_p_not_cmp_gt_RI __builtin_HEXAGON_C4_cmpltei /* ========================================================================== Assembly Syntax: Pd4=!cmp.gtu(Rs32,Rt32) C Intrinsic Prototype: Byte Q6_p_not_cmp_gtu_RR(Word32 Rs, Word32 Rt) Instruction Type: ALU32_3op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_p_not_cmp_gtu_RR __builtin_HEXAGON_C4_cmplteu /* ========================================================================== Assembly Syntax: Pd4=!cmp.gtu(Rs32,#u9) C Intrinsic Prototype: Byte Q6_p_not_cmp_gtu_RI(Word32 Rs, Word32 Iu9) Instruction Type: ALU32_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_p_not_cmp_gtu_RI __builtin_HEXAGON_C4_cmplteui /* ========================================================================== Assembly Syntax: Pd4=!cmp.eq(Rs32,Rt32) C Intrinsic Prototype: Byte Q6_p_not_cmp_eq_RR(Word32 Rs, Word32 Rt) Instruction Type: ALU32_3op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_p_not_cmp_eq_RR __builtin_HEXAGON_C4_cmpneq /* ========================================================================== Assembly Syntax: Pd4=!cmp.eq(Rs32,#s10) C Intrinsic Prototype: Byte Q6_p_not_cmp_eq_RI(Word32 Rs, Word32 Is10) Instruction Type: ALU32_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_p_not_cmp_eq_RI __builtin_HEXAGON_C4_cmpneqi /* ========================================================================== Assembly Syntax: Pd4=fastcorner9(Ps4,Pt4) C Intrinsic Prototype: Byte Q6_p_fastcorner9_pp(Byte Ps, Byte Pt) Instruction Type: CR Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_fastcorner9_pp __builtin_HEXAGON_C4_fastcorner9 /* ========================================================================== Assembly Syntax: Pd4=!fastcorner9(Ps4,Pt4) C Intrinsic Prototype: Byte Q6_p_not_fastcorner9_pp(Byte Ps, Byte Pt) Instruction Type: CR Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_not_fastcorner9_pp __builtin_HEXAGON_C4_fastcorner9_not /* ========================================================================== Assembly Syntax: Pd4=!bitsclr(Rs32,Rt32) C Intrinsic Prototype: Byte Q6_p_not_bitsclr_RR(Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_not_bitsclr_RR __builtin_HEXAGON_C4_nbitsclr /* ========================================================================== Assembly Syntax: Pd4=!bitsclr(Rs32,#u6) C Intrinsic Prototype: Byte Q6_p_not_bitsclr_RI(Word32 Rs, Word32 Iu6) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_not_bitsclr_RI __builtin_HEXAGON_C4_nbitsclri /* ========================================================================== Assembly Syntax: Pd4=!bitsset(Rs32,Rt32) C Intrinsic Prototype: Byte Q6_p_not_bitsset_RR(Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_not_bitsset_RR __builtin_HEXAGON_C4_nbitsset /* ========================================================================== Assembly Syntax: Pd4=or(Ps4,and(Pt4,Pu4)) C Intrinsic Prototype: Byte Q6_p_or_and_ppp(Byte Ps, Byte Pt, Byte Pu) Instruction Type: CR Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_or_and_ppp __builtin_HEXAGON_C4_or_and /* ========================================================================== Assembly Syntax: Pd4=or(Ps4,and(Pt4,!Pu4)) C Intrinsic Prototype: Byte Q6_p_or_and_ppnp(Byte Ps, Byte Pt, Byte Pu) Instruction Type: CR Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_or_and_ppnp __builtin_HEXAGON_C4_or_andn /* ========================================================================== Assembly Syntax: Pd4=or(Ps4,or(Pt4,Pu4)) C Intrinsic Prototype: Byte Q6_p_or_or_ppp(Byte Ps, Byte Pt, Byte Pu) Instruction Type: CR Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_or_or_ppp __builtin_HEXAGON_C4_or_or /* ========================================================================== Assembly Syntax: Pd4=or(Ps4,or(Pt4,!Pu4)) C Intrinsic Prototype: Byte Q6_p_or_or_ppnp(Byte Ps, Byte Pt, Byte Pu) Instruction Type: CR Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_or_or_ppnp __builtin_HEXAGON_C4_or_orn /* ========================================================================== Assembly Syntax: Rdd32=convert_d2df(Rss32) C Intrinsic Prototype: Float64 Q6_P_convert_d2df_P(Word64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_convert_d2df_P __builtin_HEXAGON_F2_conv_d2df /* ========================================================================== Assembly Syntax: Rd32=convert_d2sf(Rss32) C Intrinsic Prototype: Float32 Q6_R_convert_d2sf_P(Word64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_convert_d2sf_P __builtin_HEXAGON_F2_conv_d2sf /* ========================================================================== Assembly Syntax: Rdd32=convert_df2d(Rss32) C Intrinsic Prototype: Word64 Q6_P_convert_df2d_P(Float64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_convert_df2d_P __builtin_HEXAGON_F2_conv_df2d /* ========================================================================== Assembly Syntax: Rdd32=convert_df2d(Rss32):chop C Intrinsic Prototype: Word64 Q6_P_convert_df2d_P_chop(Float64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_convert_df2d_P_chop __builtin_HEXAGON_F2_conv_df2d_chop /* ========================================================================== Assembly Syntax: Rd32=convert_df2sf(Rss32) C Intrinsic Prototype: Float32 Q6_R_convert_df2sf_P(Float64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_convert_df2sf_P __builtin_HEXAGON_F2_conv_df2sf /* ========================================================================== Assembly Syntax: Rdd32=convert_df2ud(Rss32) C Intrinsic Prototype: Word64 Q6_P_convert_df2ud_P(Float64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_convert_df2ud_P __builtin_HEXAGON_F2_conv_df2ud /* ========================================================================== Assembly Syntax: Rdd32=convert_df2ud(Rss32):chop C Intrinsic Prototype: Word64 Q6_P_convert_df2ud_P_chop(Float64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_convert_df2ud_P_chop __builtin_HEXAGON_F2_conv_df2ud_chop /* ========================================================================== Assembly Syntax: Rd32=convert_df2uw(Rss32) C Intrinsic Prototype: Word32 Q6_R_convert_df2uw_P(Float64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_convert_df2uw_P __builtin_HEXAGON_F2_conv_df2uw /* ========================================================================== Assembly Syntax: Rd32=convert_df2uw(Rss32):chop C Intrinsic Prototype: Word32 Q6_R_convert_df2uw_P_chop(Float64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_convert_df2uw_P_chop __builtin_HEXAGON_F2_conv_df2uw_chop /* ========================================================================== Assembly Syntax: Rd32=convert_df2w(Rss32) C Intrinsic Prototype: Word32 Q6_R_convert_df2w_P(Float64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_convert_df2w_P __builtin_HEXAGON_F2_conv_df2w /* ========================================================================== Assembly Syntax: Rd32=convert_df2w(Rss32):chop C Intrinsic Prototype: Word32 Q6_R_convert_df2w_P_chop(Float64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_convert_df2w_P_chop __builtin_HEXAGON_F2_conv_df2w_chop /* ========================================================================== Assembly Syntax: Rdd32=convert_sf2d(Rs32) C Intrinsic Prototype: Word64 Q6_P_convert_sf2d_R(Float32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_convert_sf2d_R __builtin_HEXAGON_F2_conv_sf2d /* ========================================================================== Assembly Syntax: Rdd32=convert_sf2d(Rs32):chop C Intrinsic Prototype: Word64 Q6_P_convert_sf2d_R_chop(Float32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_convert_sf2d_R_chop __builtin_HEXAGON_F2_conv_sf2d_chop /* ========================================================================== Assembly Syntax: Rdd32=convert_sf2df(Rs32) C Intrinsic Prototype: Float64 Q6_P_convert_sf2df_R(Float32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_convert_sf2df_R __builtin_HEXAGON_F2_conv_sf2df /* ========================================================================== Assembly Syntax: Rdd32=convert_sf2ud(Rs32) C Intrinsic Prototype: Word64 Q6_P_convert_sf2ud_R(Float32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_convert_sf2ud_R __builtin_HEXAGON_F2_conv_sf2ud /* ========================================================================== Assembly Syntax: Rdd32=convert_sf2ud(Rs32):chop C Intrinsic Prototype: Word64 Q6_P_convert_sf2ud_R_chop(Float32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_convert_sf2ud_R_chop __builtin_HEXAGON_F2_conv_sf2ud_chop /* ========================================================================== Assembly Syntax: Rd32=convert_sf2uw(Rs32) C Intrinsic Prototype: Word32 Q6_R_convert_sf2uw_R(Float32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_convert_sf2uw_R __builtin_HEXAGON_F2_conv_sf2uw /* ========================================================================== Assembly Syntax: Rd32=convert_sf2uw(Rs32):chop C Intrinsic Prototype: Word32 Q6_R_convert_sf2uw_R_chop(Float32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_convert_sf2uw_R_chop __builtin_HEXAGON_F2_conv_sf2uw_chop /* ========================================================================== Assembly Syntax: Rd32=convert_sf2w(Rs32) C Intrinsic Prototype: Word32 Q6_R_convert_sf2w_R(Float32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_convert_sf2w_R __builtin_HEXAGON_F2_conv_sf2w /* ========================================================================== Assembly Syntax: Rd32=convert_sf2w(Rs32):chop C Intrinsic Prototype: Word32 Q6_R_convert_sf2w_R_chop(Float32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_convert_sf2w_R_chop __builtin_HEXAGON_F2_conv_sf2w_chop /* ========================================================================== Assembly Syntax: Rdd32=convert_ud2df(Rss32) C Intrinsic Prototype: Float64 Q6_P_convert_ud2df_P(Word64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_convert_ud2df_P __builtin_HEXAGON_F2_conv_ud2df /* ========================================================================== Assembly Syntax: Rd32=convert_ud2sf(Rss32) C Intrinsic Prototype: Float32 Q6_R_convert_ud2sf_P(Word64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_convert_ud2sf_P __builtin_HEXAGON_F2_conv_ud2sf /* ========================================================================== Assembly Syntax: Rdd32=convert_uw2df(Rs32) C Intrinsic Prototype: Float64 Q6_P_convert_uw2df_R(Word32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_convert_uw2df_R __builtin_HEXAGON_F2_conv_uw2df /* ========================================================================== Assembly Syntax: Rd32=convert_uw2sf(Rs32) C Intrinsic Prototype: Float32 Q6_R_convert_uw2sf_R(Word32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_convert_uw2sf_R __builtin_HEXAGON_F2_conv_uw2sf /* ========================================================================== Assembly Syntax: Rdd32=convert_w2df(Rs32) C Intrinsic Prototype: Float64 Q6_P_convert_w2df_R(Word32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_convert_w2df_R __builtin_HEXAGON_F2_conv_w2df /* ========================================================================== Assembly Syntax: Rd32=convert_w2sf(Rs32) C Intrinsic Prototype: Float32 Q6_R_convert_w2sf_R(Word32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_convert_w2sf_R __builtin_HEXAGON_F2_conv_w2sf /* ========================================================================== Assembly Syntax: Pd4=dfclass(Rss32,#u5) C Intrinsic Prototype: Byte Q6_p_dfclass_PI(Float64 Rss, Word32 Iu5) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_dfclass_PI __builtin_HEXAGON_F2_dfclass /* ========================================================================== Assembly Syntax: Pd4=dfcmp.eq(Rss32,Rtt32) C Intrinsic Prototype: Byte Q6_p_dfcmp_eq_PP(Float64 Rss, Float64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_dfcmp_eq_PP __builtin_HEXAGON_F2_dfcmpeq /* ========================================================================== Assembly Syntax: Pd4=dfcmp.ge(Rss32,Rtt32) C Intrinsic Prototype: Byte Q6_p_dfcmp_ge_PP(Float64 Rss, Float64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_dfcmp_ge_PP __builtin_HEXAGON_F2_dfcmpge /* ========================================================================== Assembly Syntax: Pd4=dfcmp.gt(Rss32,Rtt32) C Intrinsic Prototype: Byte Q6_p_dfcmp_gt_PP(Float64 Rss, Float64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_dfcmp_gt_PP __builtin_HEXAGON_F2_dfcmpgt /* ========================================================================== Assembly Syntax: Pd4=dfcmp.uo(Rss32,Rtt32) C Intrinsic Prototype: Byte Q6_p_dfcmp_uo_PP(Float64 Rss, Float64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_dfcmp_uo_PP __builtin_HEXAGON_F2_dfcmpuo /* ========================================================================== Assembly Syntax: Rdd32=dfmake(#u10):neg C Intrinsic Prototype: Float64 Q6_P_dfmake_I_neg(Word32 Iu10) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_dfmake_I_neg __builtin_HEXAGON_F2_dfimm_n /* ========================================================================== Assembly Syntax: Rdd32=dfmake(#u10):pos C Intrinsic Prototype: Float64 Q6_P_dfmake_I_pos(Word32 Iu10) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_dfmake_I_pos __builtin_HEXAGON_F2_dfimm_p /* ========================================================================== Assembly Syntax: Rd32=sfadd(Rs32,Rt32) C Intrinsic Prototype: Float32 Q6_R_sfadd_RR(Float32 Rs, Float32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_sfadd_RR __builtin_HEXAGON_F2_sfadd /* ========================================================================== Assembly Syntax: Pd4=sfclass(Rs32,#u5) C Intrinsic Prototype: Byte Q6_p_sfclass_RI(Float32 Rs, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_sfclass_RI __builtin_HEXAGON_F2_sfclass /* ========================================================================== Assembly Syntax: Pd4=sfcmp.eq(Rs32,Rt32) C Intrinsic Prototype: Byte Q6_p_sfcmp_eq_RR(Float32 Rs, Float32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_sfcmp_eq_RR __builtin_HEXAGON_F2_sfcmpeq /* ========================================================================== Assembly Syntax: Pd4=sfcmp.ge(Rs32,Rt32) C Intrinsic Prototype: Byte Q6_p_sfcmp_ge_RR(Float32 Rs, Float32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_sfcmp_ge_RR __builtin_HEXAGON_F2_sfcmpge /* ========================================================================== Assembly Syntax: Pd4=sfcmp.gt(Rs32,Rt32) C Intrinsic Prototype: Byte Q6_p_sfcmp_gt_RR(Float32 Rs, Float32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_sfcmp_gt_RR __builtin_HEXAGON_F2_sfcmpgt /* ========================================================================== Assembly Syntax: Pd4=sfcmp.uo(Rs32,Rt32) C Intrinsic Prototype: Byte Q6_p_sfcmp_uo_RR(Float32 Rs, Float32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_sfcmp_uo_RR __builtin_HEXAGON_F2_sfcmpuo /* ========================================================================== Assembly Syntax: Rd32=sffixupd(Rs32,Rt32) C Intrinsic Prototype: Float32 Q6_R_sffixupd_RR(Float32 Rs, Float32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_sffixupd_RR __builtin_HEXAGON_F2_sffixupd /* ========================================================================== Assembly Syntax: Rd32=sffixupn(Rs32,Rt32) C Intrinsic Prototype: Float32 Q6_R_sffixupn_RR(Float32 Rs, Float32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_sffixupn_RR __builtin_HEXAGON_F2_sffixupn /* ========================================================================== Assembly Syntax: Rd32=sffixupr(Rs32) C Intrinsic Prototype: Float32 Q6_R_sffixupr_R(Float32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_sffixupr_R __builtin_HEXAGON_F2_sffixupr /* ========================================================================== Assembly Syntax: Rx32+=sfmpy(Rs32,Rt32) C Intrinsic Prototype: Float32 Q6_R_sfmpyacc_RR(Float32 Rx, Float32 Rs, Float32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_sfmpyacc_RR __builtin_HEXAGON_F2_sffma /* ========================================================================== Assembly Syntax: Rx32+=sfmpy(Rs32,Rt32):lib C Intrinsic Prototype: Float32 Q6_R_sfmpyacc_RR_lib(Float32 Rx, Float32 Rs, Float32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_sfmpyacc_RR_lib __builtin_HEXAGON_F2_sffma_lib /* ========================================================================== Assembly Syntax: Rx32+=sfmpy(Rs32,Rt32,Pu4):scale C Intrinsic Prototype: Float32 Q6_R_sfmpyacc_RRp_scale(Float32 Rx, Float32 Rs, Float32 Rt, Byte Pu) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_sfmpyacc_RRp_scale __builtin_HEXAGON_F2_sffma_sc /* ========================================================================== Assembly Syntax: Rx32-=sfmpy(Rs32,Rt32) C Intrinsic Prototype: Float32 Q6_R_sfmpynac_RR(Float32 Rx, Float32 Rs, Float32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_sfmpynac_RR __builtin_HEXAGON_F2_sffms /* ========================================================================== Assembly Syntax: Rx32-=sfmpy(Rs32,Rt32):lib C Intrinsic Prototype: Float32 Q6_R_sfmpynac_RR_lib(Float32 Rx, Float32 Rs, Float32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_sfmpynac_RR_lib __builtin_HEXAGON_F2_sffms_lib /* ========================================================================== Assembly Syntax: Rd32=sfmake(#u10):neg C Intrinsic Prototype: Float32 Q6_R_sfmake_I_neg(Word32 Iu10) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_sfmake_I_neg __builtin_HEXAGON_F2_sfimm_n /* ========================================================================== Assembly Syntax: Rd32=sfmake(#u10):pos C Intrinsic Prototype: Float32 Q6_R_sfmake_I_pos(Word32 Iu10) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_sfmake_I_pos __builtin_HEXAGON_F2_sfimm_p /* ========================================================================== Assembly Syntax: Rd32=sfmax(Rs32,Rt32) C Intrinsic Prototype: Float32 Q6_R_sfmax_RR(Float32 Rs, Float32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_sfmax_RR __builtin_HEXAGON_F2_sfmax /* ========================================================================== Assembly Syntax: Rd32=sfmin(Rs32,Rt32) C Intrinsic Prototype: Float32 Q6_R_sfmin_RR(Float32 Rs, Float32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_sfmin_RR __builtin_HEXAGON_F2_sfmin /* ========================================================================== Assembly Syntax: Rd32=sfmpy(Rs32,Rt32) C Intrinsic Prototype: Float32 Q6_R_sfmpy_RR(Float32 Rs, Float32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_sfmpy_RR __builtin_HEXAGON_F2_sfmpy /* ========================================================================== Assembly Syntax: Rd32=sfsub(Rs32,Rt32) C Intrinsic Prototype: Float32 Q6_R_sfsub_RR(Float32 Rs, Float32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_sfsub_RR __builtin_HEXAGON_F2_sfsub /* ========================================================================== Assembly Syntax: Rd32=memb(Rx32++#s4:0:circ(Mu2)) C Intrinsic Prototype: Word32 Q6_R_memb_IM_circ(void** Rx, Word32 Is4_0, Word32 Mu, void* BaseAddress) Instruction Type: LD Execution Slots: SLOT01 ========================================================================== */ #define Q6_R_memb_IM_circ __builtin_HEXAGON_L2_loadrb_pci /* ========================================================================== Assembly Syntax: Rd32=memb(Rx32++I:circ(Mu2)) C Intrinsic Prototype: Word32 Q6_R_memb_M_circ(void** Rx, Word32 Mu, void* BaseAddress) Instruction Type: LD Execution Slots: SLOT01 ========================================================================== */ #define Q6_R_memb_M_circ __builtin_HEXAGON_L2_loadrb_pcr /* ========================================================================== Assembly Syntax: Rdd32=memd(Rx32++#s4:3:circ(Mu2)) C Intrinsic Prototype: Word64 Q6_P_memd_IM_circ(void** Rx, Word32 Is4_3, Word32 Mu, void* BaseAddress) Instruction Type: LD Execution Slots: SLOT01 ========================================================================== */ #define Q6_P_memd_IM_circ __builtin_HEXAGON_L2_loadrd_pci /* ========================================================================== Assembly Syntax: Rdd32=memd(Rx32++I:circ(Mu2)) C Intrinsic Prototype: Word64 Q6_P_memd_M_circ(void** Rx, Word32 Mu, void* BaseAddress) Instruction Type: LD Execution Slots: SLOT01 ========================================================================== */ #define Q6_P_memd_M_circ __builtin_HEXAGON_L2_loadrd_pcr /* ========================================================================== Assembly Syntax: Rd32=memh(Rx32++#s4:1:circ(Mu2)) C Intrinsic Prototype: Word32 Q6_R_memh_IM_circ(void** Rx, Word32 Is4_1, Word32 Mu, void* BaseAddress) Instruction Type: LD Execution Slots: SLOT01 ========================================================================== */ #define Q6_R_memh_IM_circ __builtin_HEXAGON_L2_loadrh_pci /* ========================================================================== Assembly Syntax: Rd32=memh(Rx32++I:circ(Mu2)) C Intrinsic Prototype: Word32 Q6_R_memh_M_circ(void** Rx, Word32 Mu, void* BaseAddress) Instruction Type: LD Execution Slots: SLOT01 ========================================================================== */ #define Q6_R_memh_M_circ __builtin_HEXAGON_L2_loadrh_pcr /* ========================================================================== Assembly Syntax: Rd32=memw(Rx32++#s4:2:circ(Mu2)) C Intrinsic Prototype: Word32 Q6_R_memw_IM_circ(void** Rx, Word32 Is4_2, Word32 Mu, void* BaseAddress) Instruction Type: LD Execution Slots: SLOT01 ========================================================================== */ #define Q6_R_memw_IM_circ __builtin_HEXAGON_L2_loadri_pci /* ========================================================================== Assembly Syntax: Rd32=memw(Rx32++I:circ(Mu2)) C Intrinsic Prototype: Word32 Q6_R_memw_M_circ(void** Rx, Word32 Mu, void* BaseAddress) Instruction Type: LD Execution Slots: SLOT01 ========================================================================== */ #define Q6_R_memw_M_circ __builtin_HEXAGON_L2_loadri_pcr /* ========================================================================== Assembly Syntax: Rd32=memub(Rx32++#s4:0:circ(Mu2)) C Intrinsic Prototype: Word32 Q6_R_memub_IM_circ(void** Rx, Word32 Is4_0, Word32 Mu, void* BaseAddress) Instruction Type: LD Execution Slots: SLOT01 ========================================================================== */ #define Q6_R_memub_IM_circ __builtin_HEXAGON_L2_loadrub_pci /* ========================================================================== Assembly Syntax: Rd32=memub(Rx32++I:circ(Mu2)) C Intrinsic Prototype: Word32 Q6_R_memub_M_circ(void** Rx, Word32 Mu, void* BaseAddress) Instruction Type: LD Execution Slots: SLOT01 ========================================================================== */ #define Q6_R_memub_M_circ __builtin_HEXAGON_L2_loadrub_pcr /* ========================================================================== Assembly Syntax: Rd32=memuh(Rx32++#s4:1:circ(Mu2)) C Intrinsic Prototype: Word32 Q6_R_memuh_IM_circ(void** Rx, Word32 Is4_1, Word32 Mu, void* BaseAddress) Instruction Type: LD Execution Slots: SLOT01 ========================================================================== */ #define Q6_R_memuh_IM_circ __builtin_HEXAGON_L2_loadruh_pci /* ========================================================================== Assembly Syntax: Rd32=memuh(Rx32++I:circ(Mu2)) C Intrinsic Prototype: Word32 Q6_R_memuh_M_circ(void** Rx, Word32 Mu, void* BaseAddress) Instruction Type: LD Execution Slots: SLOT01 ========================================================================== */ #define Q6_R_memuh_M_circ __builtin_HEXAGON_L2_loadruh_pcr /* ========================================================================== Assembly Syntax: Rx32+=add(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_addacc_RR(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_addacc_RR __builtin_HEXAGON_M2_acci /* ========================================================================== Assembly Syntax: Rx32+=add(Rs32,#s8) C Intrinsic Prototype: Word32 Q6_R_addacc_RI(Word32 Rx, Word32 Rs, Word32 Is8) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_addacc_RI __builtin_HEXAGON_M2_accii /* ========================================================================== Assembly Syntax: Rxx32+=cmpyi(Rs32,Rt32) C Intrinsic Prototype: Word64 Q6_P_cmpyiacc_RR(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_cmpyiacc_RR __builtin_HEXAGON_M2_cmaci_s0 /* ========================================================================== Assembly Syntax: Rxx32+=cmpyr(Rs32,Rt32) C Intrinsic Prototype: Word64 Q6_P_cmpyracc_RR(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_cmpyracc_RR __builtin_HEXAGON_M2_cmacr_s0 /* ========================================================================== Assembly Syntax: Rxx32+=cmpy(Rs32,Rt32):sat C Intrinsic Prototype: Word64 Q6_P_cmpyacc_RR_sat(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_cmpyacc_RR_sat __builtin_HEXAGON_M2_cmacs_s0 /* ========================================================================== Assembly Syntax: Rxx32+=cmpy(Rs32,Rt32):<<1:sat C Intrinsic Prototype: Word64 Q6_P_cmpyacc_RR_s1_sat(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_cmpyacc_RR_s1_sat __builtin_HEXAGON_M2_cmacs_s1 /* ========================================================================== Assembly Syntax: Rxx32+=cmpy(Rs32,Rt32*):sat C Intrinsic Prototype: Word64 Q6_P_cmpyacc_RR_conj_sat(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_cmpyacc_RR_conj_sat __builtin_HEXAGON_M2_cmacsc_s0 /* ========================================================================== Assembly Syntax: Rxx32+=cmpy(Rs32,Rt32*):<<1:sat C Intrinsic Prototype: Word64 Q6_P_cmpyacc_RR_conj_s1_sat(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_cmpyacc_RR_conj_s1_sat __builtin_HEXAGON_M2_cmacsc_s1 /* ========================================================================== Assembly Syntax: Rdd32=cmpyi(Rs32,Rt32) C Intrinsic Prototype: Word64 Q6_P_cmpyi_RR(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_cmpyi_RR __builtin_HEXAGON_M2_cmpyi_s0 /* ========================================================================== Assembly Syntax: Rdd32=cmpyr(Rs32,Rt32) C Intrinsic Prototype: Word64 Q6_P_cmpyr_RR(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_cmpyr_RR __builtin_HEXAGON_M2_cmpyr_s0 /* ========================================================================== Assembly Syntax: Rd32=cmpy(Rs32,Rt32):rnd:sat C Intrinsic Prototype: Word32 Q6_R_cmpy_RR_rnd_sat(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_cmpy_RR_rnd_sat __builtin_HEXAGON_M2_cmpyrs_s0 /* ========================================================================== Assembly Syntax: Rd32=cmpy(Rs32,Rt32):<<1:rnd:sat C Intrinsic Prototype: Word32 Q6_R_cmpy_RR_s1_rnd_sat(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_cmpy_RR_s1_rnd_sat __builtin_HEXAGON_M2_cmpyrs_s1 /* ========================================================================== Assembly Syntax: Rd32=cmpy(Rs32,Rt32*):rnd:sat C Intrinsic Prototype: Word32 Q6_R_cmpy_RR_conj_rnd_sat(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_cmpy_RR_conj_rnd_sat __builtin_HEXAGON_M2_cmpyrsc_s0 /* ========================================================================== Assembly Syntax: Rd32=cmpy(Rs32,Rt32*):<<1:rnd:sat C Intrinsic Prototype: Word32 Q6_R_cmpy_RR_conj_s1_rnd_sat(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_cmpy_RR_conj_s1_rnd_sat __builtin_HEXAGON_M2_cmpyrsc_s1 /* ========================================================================== Assembly Syntax: Rdd32=cmpy(Rs32,Rt32):sat C Intrinsic Prototype: Word64 Q6_P_cmpy_RR_sat(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_cmpy_RR_sat __builtin_HEXAGON_M2_cmpys_s0 /* ========================================================================== Assembly Syntax: Rdd32=cmpy(Rs32,Rt32):<<1:sat C Intrinsic Prototype: Word64 Q6_P_cmpy_RR_s1_sat(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_cmpy_RR_s1_sat __builtin_HEXAGON_M2_cmpys_s1 /* ========================================================================== Assembly Syntax: Rdd32=cmpy(Rs32,Rt32*):sat C Intrinsic Prototype: Word64 Q6_P_cmpy_RR_conj_sat(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_cmpy_RR_conj_sat __builtin_HEXAGON_M2_cmpysc_s0 /* ========================================================================== Assembly Syntax: Rdd32=cmpy(Rs32,Rt32*):<<1:sat C Intrinsic Prototype: Word64 Q6_P_cmpy_RR_conj_s1_sat(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_cmpy_RR_conj_s1_sat __builtin_HEXAGON_M2_cmpysc_s1 /* ========================================================================== Assembly Syntax: Rxx32-=cmpy(Rs32,Rt32):sat C Intrinsic Prototype: Word64 Q6_P_cmpynac_RR_sat(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_cmpynac_RR_sat __builtin_HEXAGON_M2_cnacs_s0 /* ========================================================================== Assembly Syntax: Rxx32-=cmpy(Rs32,Rt32):<<1:sat C Intrinsic Prototype: Word64 Q6_P_cmpynac_RR_s1_sat(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_cmpynac_RR_s1_sat __builtin_HEXAGON_M2_cnacs_s1 /* ========================================================================== Assembly Syntax: Rxx32-=cmpy(Rs32,Rt32*):sat C Intrinsic Prototype: Word64 Q6_P_cmpynac_RR_conj_sat(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_cmpynac_RR_conj_sat __builtin_HEXAGON_M2_cnacsc_s0 /* ========================================================================== Assembly Syntax: Rxx32-=cmpy(Rs32,Rt32*):<<1:sat C Intrinsic Prototype: Word64 Q6_P_cmpynac_RR_conj_s1_sat(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_cmpynac_RR_conj_s1_sat __builtin_HEXAGON_M2_cnacsc_s1 /* ========================================================================== Assembly Syntax: Rxx32+=mpy(Rs32,Rt32) C Intrinsic Prototype: Word64 Q6_P_mpyacc_RR(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpyacc_RR __builtin_HEXAGON_M2_dpmpyss_acc_s0 /* ========================================================================== Assembly Syntax: Rxx32-=mpy(Rs32,Rt32) C Intrinsic Prototype: Word64 Q6_P_mpynac_RR(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpynac_RR __builtin_HEXAGON_M2_dpmpyss_nac_s0 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32,Rt32):rnd C Intrinsic Prototype: Word32 Q6_R_mpy_RR_rnd(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RR_rnd __builtin_HEXAGON_M2_dpmpyss_rnd_s0 /* ========================================================================== Assembly Syntax: Rdd32=mpy(Rs32,Rt32) C Intrinsic Prototype: Word64 Q6_P_mpy_RR(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpy_RR __builtin_HEXAGON_M2_dpmpyss_s0 /* ========================================================================== Assembly Syntax: Rxx32+=mpyu(Rs32,Rt32) C Intrinsic Prototype: Word64 Q6_P_mpyuacc_RR(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpyuacc_RR __builtin_HEXAGON_M2_dpmpyuu_acc_s0 /* ========================================================================== Assembly Syntax: Rxx32-=mpyu(Rs32,Rt32) C Intrinsic Prototype: Word64 Q6_P_mpyunac_RR(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpyunac_RR __builtin_HEXAGON_M2_dpmpyuu_nac_s0 /* ========================================================================== Assembly Syntax: Rdd32=mpyu(Rs32,Rt32) C Intrinsic Prototype: UWord64 Q6_P_mpyu_RR(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpyu_RR __builtin_HEXAGON_M2_dpmpyuu_s0 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32,Rt32.h):<<1:rnd:sat C Intrinsic Prototype: Word32 Q6_R_mpy_RRh_s1_rnd_sat(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RRh_s1_rnd_sat __builtin_HEXAGON_M2_hmmpyh_rs1 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32,Rt32.h):<<1:sat C Intrinsic Prototype: Word32 Q6_R_mpy_RRh_s1_sat(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RRh_s1_sat __builtin_HEXAGON_M2_hmmpyh_s1 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32,Rt32.l):<<1:rnd:sat C Intrinsic Prototype: Word32 Q6_R_mpy_RRl_s1_rnd_sat(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RRl_s1_rnd_sat __builtin_HEXAGON_M2_hmmpyl_rs1 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32,Rt32.l):<<1:sat C Intrinsic Prototype: Word32 Q6_R_mpy_RRl_s1_sat(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RRl_s1_sat __builtin_HEXAGON_M2_hmmpyl_s1 /* ========================================================================== Assembly Syntax: Rx32+=mpyi(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_mpyiacc_RR(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyiacc_RR __builtin_HEXAGON_M2_maci /* ========================================================================== Assembly Syntax: Rx32-=mpyi(Rs32,#u8) C Intrinsic Prototype: Word32 Q6_R_mpyinac_RI(Word32 Rx, Word32 Rs, Word32 Iu8) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyinac_RI __builtin_HEXAGON_M2_macsin /* ========================================================================== Assembly Syntax: Rx32+=mpyi(Rs32,#u8) C Intrinsic Prototype: Word32 Q6_R_mpyiacc_RI(Word32 Rx, Word32 Rs, Word32 Iu8) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyiacc_RI __builtin_HEXAGON_M2_macsip /* ========================================================================== Assembly Syntax: Rxx32+=vmpywoh(Rss32,Rtt32):rnd:sat C Intrinsic Prototype: Word64 Q6_P_vmpywohacc_PP_rnd_sat(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpywohacc_PP_rnd_sat __builtin_HEXAGON_M2_mmachs_rs0 /* ========================================================================== Assembly Syntax: Rxx32+=vmpywoh(Rss32,Rtt32):<<1:rnd:sat C Intrinsic Prototype: Word64 Q6_P_vmpywohacc_PP_s1_rnd_sat(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpywohacc_PP_s1_rnd_sat __builtin_HEXAGON_M2_mmachs_rs1 /* ========================================================================== Assembly Syntax: Rxx32+=vmpywoh(Rss32,Rtt32):sat C Intrinsic Prototype: Word64 Q6_P_vmpywohacc_PP_sat(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpywohacc_PP_sat __builtin_HEXAGON_M2_mmachs_s0 /* ========================================================================== Assembly Syntax: Rxx32+=vmpywoh(Rss32,Rtt32):<<1:sat C Intrinsic Prototype: Word64 Q6_P_vmpywohacc_PP_s1_sat(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpywohacc_PP_s1_sat __builtin_HEXAGON_M2_mmachs_s1 /* ========================================================================== Assembly Syntax: Rxx32+=vmpyweh(Rss32,Rtt32):rnd:sat C Intrinsic Prototype: Word64 Q6_P_vmpywehacc_PP_rnd_sat(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpywehacc_PP_rnd_sat __builtin_HEXAGON_M2_mmacls_rs0 /* ========================================================================== Assembly Syntax: Rxx32+=vmpyweh(Rss32,Rtt32):<<1:rnd:sat C Intrinsic Prototype: Word64 Q6_P_vmpywehacc_PP_s1_rnd_sat(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpywehacc_PP_s1_rnd_sat __builtin_HEXAGON_M2_mmacls_rs1 /* ========================================================================== Assembly Syntax: Rxx32+=vmpyweh(Rss32,Rtt32):sat C Intrinsic Prototype: Word64 Q6_P_vmpywehacc_PP_sat(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpywehacc_PP_sat __builtin_HEXAGON_M2_mmacls_s0 /* ========================================================================== Assembly Syntax: Rxx32+=vmpyweh(Rss32,Rtt32):<<1:sat C Intrinsic Prototype: Word64 Q6_P_vmpywehacc_PP_s1_sat(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpywehacc_PP_s1_sat __builtin_HEXAGON_M2_mmacls_s1 /* ========================================================================== Assembly Syntax: Rxx32+=vmpywouh(Rss32,Rtt32):rnd:sat C Intrinsic Prototype: Word64 Q6_P_vmpywouhacc_PP_rnd_sat(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpywouhacc_PP_rnd_sat __builtin_HEXAGON_M2_mmacuhs_rs0 /* ========================================================================== Assembly Syntax: Rxx32+=vmpywouh(Rss32,Rtt32):<<1:rnd:sat C Intrinsic Prototype: Word64 Q6_P_vmpywouhacc_PP_s1_rnd_sat(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpywouhacc_PP_s1_rnd_sat __builtin_HEXAGON_M2_mmacuhs_rs1 /* ========================================================================== Assembly Syntax: Rxx32+=vmpywouh(Rss32,Rtt32):sat C Intrinsic Prototype: Word64 Q6_P_vmpywouhacc_PP_sat(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpywouhacc_PP_sat __builtin_HEXAGON_M2_mmacuhs_s0 /* ========================================================================== Assembly Syntax: Rxx32+=vmpywouh(Rss32,Rtt32):<<1:sat C Intrinsic Prototype: Word64 Q6_P_vmpywouhacc_PP_s1_sat(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpywouhacc_PP_s1_sat __builtin_HEXAGON_M2_mmacuhs_s1 /* ========================================================================== Assembly Syntax: Rxx32+=vmpyweuh(Rss32,Rtt32):rnd:sat C Intrinsic Prototype: Word64 Q6_P_vmpyweuhacc_PP_rnd_sat(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpyweuhacc_PP_rnd_sat __builtin_HEXAGON_M2_mmaculs_rs0 /* ========================================================================== Assembly Syntax: Rxx32+=vmpyweuh(Rss32,Rtt32):<<1:rnd:sat C Intrinsic Prototype: Word64 Q6_P_vmpyweuhacc_PP_s1_rnd_sat(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpyweuhacc_PP_s1_rnd_sat __builtin_HEXAGON_M2_mmaculs_rs1 /* ========================================================================== Assembly Syntax: Rxx32+=vmpyweuh(Rss32,Rtt32):sat C Intrinsic Prototype: Word64 Q6_P_vmpyweuhacc_PP_sat(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpyweuhacc_PP_sat __builtin_HEXAGON_M2_mmaculs_s0 /* ========================================================================== Assembly Syntax: Rxx32+=vmpyweuh(Rss32,Rtt32):<<1:sat C Intrinsic Prototype: Word64 Q6_P_vmpyweuhacc_PP_s1_sat(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpyweuhacc_PP_s1_sat __builtin_HEXAGON_M2_mmaculs_s1 /* ========================================================================== Assembly Syntax: Rdd32=vmpywoh(Rss32,Rtt32):rnd:sat C Intrinsic Prototype: Word64 Q6_P_vmpywoh_PP_rnd_sat(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpywoh_PP_rnd_sat __builtin_HEXAGON_M2_mmpyh_rs0 /* ========================================================================== Assembly Syntax: Rdd32=vmpywoh(Rss32,Rtt32):<<1:rnd:sat C Intrinsic Prototype: Word64 Q6_P_vmpywoh_PP_s1_rnd_sat(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpywoh_PP_s1_rnd_sat __builtin_HEXAGON_M2_mmpyh_rs1 /* ========================================================================== Assembly Syntax: Rdd32=vmpywoh(Rss32,Rtt32):sat C Intrinsic Prototype: Word64 Q6_P_vmpywoh_PP_sat(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpywoh_PP_sat __builtin_HEXAGON_M2_mmpyh_s0 /* ========================================================================== Assembly Syntax: Rdd32=vmpywoh(Rss32,Rtt32):<<1:sat C Intrinsic Prototype: Word64 Q6_P_vmpywoh_PP_s1_sat(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpywoh_PP_s1_sat __builtin_HEXAGON_M2_mmpyh_s1 /* ========================================================================== Assembly Syntax: Rdd32=vmpyweh(Rss32,Rtt32):rnd:sat C Intrinsic Prototype: Word64 Q6_P_vmpyweh_PP_rnd_sat(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpyweh_PP_rnd_sat __builtin_HEXAGON_M2_mmpyl_rs0 /* ========================================================================== Assembly Syntax: Rdd32=vmpyweh(Rss32,Rtt32):<<1:rnd:sat C Intrinsic Prototype: Word64 Q6_P_vmpyweh_PP_s1_rnd_sat(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpyweh_PP_s1_rnd_sat __builtin_HEXAGON_M2_mmpyl_rs1 /* ========================================================================== Assembly Syntax: Rdd32=vmpyweh(Rss32,Rtt32):sat C Intrinsic Prototype: Word64 Q6_P_vmpyweh_PP_sat(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpyweh_PP_sat __builtin_HEXAGON_M2_mmpyl_s0 /* ========================================================================== Assembly Syntax: Rdd32=vmpyweh(Rss32,Rtt32):<<1:sat C Intrinsic Prototype: Word64 Q6_P_vmpyweh_PP_s1_sat(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpyweh_PP_s1_sat __builtin_HEXAGON_M2_mmpyl_s1 /* ========================================================================== Assembly Syntax: Rdd32=vmpywouh(Rss32,Rtt32):rnd:sat C Intrinsic Prototype: Word64 Q6_P_vmpywouh_PP_rnd_sat(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpywouh_PP_rnd_sat __builtin_HEXAGON_M2_mmpyuh_rs0 /* ========================================================================== Assembly Syntax: Rdd32=vmpywouh(Rss32,Rtt32):<<1:rnd:sat C Intrinsic Prototype: Word64 Q6_P_vmpywouh_PP_s1_rnd_sat(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpywouh_PP_s1_rnd_sat __builtin_HEXAGON_M2_mmpyuh_rs1 /* ========================================================================== Assembly Syntax: Rdd32=vmpywouh(Rss32,Rtt32):sat C Intrinsic Prototype: Word64 Q6_P_vmpywouh_PP_sat(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpywouh_PP_sat __builtin_HEXAGON_M2_mmpyuh_s0 /* ========================================================================== Assembly Syntax: Rdd32=vmpywouh(Rss32,Rtt32):<<1:sat C Intrinsic Prototype: Word64 Q6_P_vmpywouh_PP_s1_sat(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpywouh_PP_s1_sat __builtin_HEXAGON_M2_mmpyuh_s1 /* ========================================================================== Assembly Syntax: Rdd32=vmpyweuh(Rss32,Rtt32):rnd:sat C Intrinsic Prototype: Word64 Q6_P_vmpyweuh_PP_rnd_sat(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpyweuh_PP_rnd_sat __builtin_HEXAGON_M2_mmpyul_rs0 /* ========================================================================== Assembly Syntax: Rdd32=vmpyweuh(Rss32,Rtt32):<<1:rnd:sat C Intrinsic Prototype: Word64 Q6_P_vmpyweuh_PP_s1_rnd_sat(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpyweuh_PP_s1_rnd_sat __builtin_HEXAGON_M2_mmpyul_rs1 /* ========================================================================== Assembly Syntax: Rdd32=vmpyweuh(Rss32,Rtt32):sat C Intrinsic Prototype: Word64 Q6_P_vmpyweuh_PP_sat(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpyweuh_PP_sat __builtin_HEXAGON_M2_mmpyul_s0 /* ========================================================================== Assembly Syntax: Rdd32=vmpyweuh(Rss32,Rtt32):<<1:sat C Intrinsic Prototype: Word64 Q6_P_vmpyweuh_PP_s1_sat(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpyweuh_PP_s1_sat __builtin_HEXAGON_M2_mmpyul_s1 /* ========================================================================== Assembly Syntax: Rx32+=mpy(Rs32.h,Rt32.h) C Intrinsic Prototype: Word32 Q6_R_mpyacc_RhRh(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyacc_RhRh __builtin_HEXAGON_M2_mpy_acc_hh_s0 /* ========================================================================== Assembly Syntax: Rx32+=mpy(Rs32.h,Rt32.h):<<1 C Intrinsic Prototype: Word32 Q6_R_mpyacc_RhRh_s1(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyacc_RhRh_s1 __builtin_HEXAGON_M2_mpy_acc_hh_s1 /* ========================================================================== Assembly Syntax: Rx32+=mpy(Rs32.h,Rt32.l) C Intrinsic Prototype: Word32 Q6_R_mpyacc_RhRl(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyacc_RhRl __builtin_HEXAGON_M2_mpy_acc_hl_s0 /* ========================================================================== Assembly Syntax: Rx32+=mpy(Rs32.h,Rt32.l):<<1 C Intrinsic Prototype: Word32 Q6_R_mpyacc_RhRl_s1(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyacc_RhRl_s1 __builtin_HEXAGON_M2_mpy_acc_hl_s1 /* ========================================================================== Assembly Syntax: Rx32+=mpy(Rs32.l,Rt32.h) C Intrinsic Prototype: Word32 Q6_R_mpyacc_RlRh(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyacc_RlRh __builtin_HEXAGON_M2_mpy_acc_lh_s0 /* ========================================================================== Assembly Syntax: Rx32+=mpy(Rs32.l,Rt32.h):<<1 C Intrinsic Prototype: Word32 Q6_R_mpyacc_RlRh_s1(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyacc_RlRh_s1 __builtin_HEXAGON_M2_mpy_acc_lh_s1 /* ========================================================================== Assembly Syntax: Rx32+=mpy(Rs32.l,Rt32.l) C Intrinsic Prototype: Word32 Q6_R_mpyacc_RlRl(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyacc_RlRl __builtin_HEXAGON_M2_mpy_acc_ll_s0 /* ========================================================================== Assembly Syntax: Rx32+=mpy(Rs32.l,Rt32.l):<<1 C Intrinsic Prototype: Word32 Q6_R_mpyacc_RlRl_s1(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyacc_RlRl_s1 __builtin_HEXAGON_M2_mpy_acc_ll_s1 /* ========================================================================== Assembly Syntax: Rx32+=mpy(Rs32.h,Rt32.h):sat C Intrinsic Prototype: Word32 Q6_R_mpyacc_RhRh_sat(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyacc_RhRh_sat __builtin_HEXAGON_M2_mpy_acc_sat_hh_s0 /* ========================================================================== Assembly Syntax: Rx32+=mpy(Rs32.h,Rt32.h):<<1:sat C Intrinsic Prototype: Word32 Q6_R_mpyacc_RhRh_s1_sat(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyacc_RhRh_s1_sat __builtin_HEXAGON_M2_mpy_acc_sat_hh_s1 /* ========================================================================== Assembly Syntax: Rx32+=mpy(Rs32.h,Rt32.l):sat C Intrinsic Prototype: Word32 Q6_R_mpyacc_RhRl_sat(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyacc_RhRl_sat __builtin_HEXAGON_M2_mpy_acc_sat_hl_s0 /* ========================================================================== Assembly Syntax: Rx32+=mpy(Rs32.h,Rt32.l):<<1:sat C Intrinsic Prototype: Word32 Q6_R_mpyacc_RhRl_s1_sat(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyacc_RhRl_s1_sat __builtin_HEXAGON_M2_mpy_acc_sat_hl_s1 /* ========================================================================== Assembly Syntax: Rx32+=mpy(Rs32.l,Rt32.h):sat C Intrinsic Prototype: Word32 Q6_R_mpyacc_RlRh_sat(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyacc_RlRh_sat __builtin_HEXAGON_M2_mpy_acc_sat_lh_s0 /* ========================================================================== Assembly Syntax: Rx32+=mpy(Rs32.l,Rt32.h):<<1:sat C Intrinsic Prototype: Word32 Q6_R_mpyacc_RlRh_s1_sat(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyacc_RlRh_s1_sat __builtin_HEXAGON_M2_mpy_acc_sat_lh_s1 /* ========================================================================== Assembly Syntax: Rx32+=mpy(Rs32.l,Rt32.l):sat C Intrinsic Prototype: Word32 Q6_R_mpyacc_RlRl_sat(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyacc_RlRl_sat __builtin_HEXAGON_M2_mpy_acc_sat_ll_s0 /* ========================================================================== Assembly Syntax: Rx32+=mpy(Rs32.l,Rt32.l):<<1:sat C Intrinsic Prototype: Word32 Q6_R_mpyacc_RlRl_s1_sat(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyacc_RlRl_s1_sat __builtin_HEXAGON_M2_mpy_acc_sat_ll_s1 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32.h,Rt32.h) C Intrinsic Prototype: Word32 Q6_R_mpy_RhRh(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RhRh __builtin_HEXAGON_M2_mpy_hh_s0 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32.h,Rt32.h):<<1 C Intrinsic Prototype: Word32 Q6_R_mpy_RhRh_s1(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RhRh_s1 __builtin_HEXAGON_M2_mpy_hh_s1 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32.h,Rt32.l) C Intrinsic Prototype: Word32 Q6_R_mpy_RhRl(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RhRl __builtin_HEXAGON_M2_mpy_hl_s0 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32.h,Rt32.l):<<1 C Intrinsic Prototype: Word32 Q6_R_mpy_RhRl_s1(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RhRl_s1 __builtin_HEXAGON_M2_mpy_hl_s1 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32.l,Rt32.h) C Intrinsic Prototype: Word32 Q6_R_mpy_RlRh(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RlRh __builtin_HEXAGON_M2_mpy_lh_s0 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32.l,Rt32.h):<<1 C Intrinsic Prototype: Word32 Q6_R_mpy_RlRh_s1(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RlRh_s1 __builtin_HEXAGON_M2_mpy_lh_s1 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32.l,Rt32.l) C Intrinsic Prototype: Word32 Q6_R_mpy_RlRl(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RlRl __builtin_HEXAGON_M2_mpy_ll_s0 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32.l,Rt32.l):<<1 C Intrinsic Prototype: Word32 Q6_R_mpy_RlRl_s1(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RlRl_s1 __builtin_HEXAGON_M2_mpy_ll_s1 /* ========================================================================== Assembly Syntax: Rx32-=mpy(Rs32.h,Rt32.h) C Intrinsic Prototype: Word32 Q6_R_mpynac_RhRh(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpynac_RhRh __builtin_HEXAGON_M2_mpy_nac_hh_s0 /* ========================================================================== Assembly Syntax: Rx32-=mpy(Rs32.h,Rt32.h):<<1 C Intrinsic Prototype: Word32 Q6_R_mpynac_RhRh_s1(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpynac_RhRh_s1 __builtin_HEXAGON_M2_mpy_nac_hh_s1 /* ========================================================================== Assembly Syntax: Rx32-=mpy(Rs32.h,Rt32.l) C Intrinsic Prototype: Word32 Q6_R_mpynac_RhRl(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpynac_RhRl __builtin_HEXAGON_M2_mpy_nac_hl_s0 /* ========================================================================== Assembly Syntax: Rx32-=mpy(Rs32.h,Rt32.l):<<1 C Intrinsic Prototype: Word32 Q6_R_mpynac_RhRl_s1(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpynac_RhRl_s1 __builtin_HEXAGON_M2_mpy_nac_hl_s1 /* ========================================================================== Assembly Syntax: Rx32-=mpy(Rs32.l,Rt32.h) C Intrinsic Prototype: Word32 Q6_R_mpynac_RlRh(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpynac_RlRh __builtin_HEXAGON_M2_mpy_nac_lh_s0 /* ========================================================================== Assembly Syntax: Rx32-=mpy(Rs32.l,Rt32.h):<<1 C Intrinsic Prototype: Word32 Q6_R_mpynac_RlRh_s1(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpynac_RlRh_s1 __builtin_HEXAGON_M2_mpy_nac_lh_s1 /* ========================================================================== Assembly Syntax: Rx32-=mpy(Rs32.l,Rt32.l) C Intrinsic Prototype: Word32 Q6_R_mpynac_RlRl(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpynac_RlRl __builtin_HEXAGON_M2_mpy_nac_ll_s0 /* ========================================================================== Assembly Syntax: Rx32-=mpy(Rs32.l,Rt32.l):<<1 C Intrinsic Prototype: Word32 Q6_R_mpynac_RlRl_s1(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpynac_RlRl_s1 __builtin_HEXAGON_M2_mpy_nac_ll_s1 /* ========================================================================== Assembly Syntax: Rx32-=mpy(Rs32.h,Rt32.h):sat C Intrinsic Prototype: Word32 Q6_R_mpynac_RhRh_sat(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpynac_RhRh_sat __builtin_HEXAGON_M2_mpy_nac_sat_hh_s0 /* ========================================================================== Assembly Syntax: Rx32-=mpy(Rs32.h,Rt32.h):<<1:sat C Intrinsic Prototype: Word32 Q6_R_mpynac_RhRh_s1_sat(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpynac_RhRh_s1_sat __builtin_HEXAGON_M2_mpy_nac_sat_hh_s1 /* ========================================================================== Assembly Syntax: Rx32-=mpy(Rs32.h,Rt32.l):sat C Intrinsic Prototype: Word32 Q6_R_mpynac_RhRl_sat(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpynac_RhRl_sat __builtin_HEXAGON_M2_mpy_nac_sat_hl_s0 /* ========================================================================== Assembly Syntax: Rx32-=mpy(Rs32.h,Rt32.l):<<1:sat C Intrinsic Prototype: Word32 Q6_R_mpynac_RhRl_s1_sat(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpynac_RhRl_s1_sat __builtin_HEXAGON_M2_mpy_nac_sat_hl_s1 /* ========================================================================== Assembly Syntax: Rx32-=mpy(Rs32.l,Rt32.h):sat C Intrinsic Prototype: Word32 Q6_R_mpynac_RlRh_sat(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpynac_RlRh_sat __builtin_HEXAGON_M2_mpy_nac_sat_lh_s0 /* ========================================================================== Assembly Syntax: Rx32-=mpy(Rs32.l,Rt32.h):<<1:sat C Intrinsic Prototype: Word32 Q6_R_mpynac_RlRh_s1_sat(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpynac_RlRh_s1_sat __builtin_HEXAGON_M2_mpy_nac_sat_lh_s1 /* ========================================================================== Assembly Syntax: Rx32-=mpy(Rs32.l,Rt32.l):sat C Intrinsic Prototype: Word32 Q6_R_mpynac_RlRl_sat(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpynac_RlRl_sat __builtin_HEXAGON_M2_mpy_nac_sat_ll_s0 /* ========================================================================== Assembly Syntax: Rx32-=mpy(Rs32.l,Rt32.l):<<1:sat C Intrinsic Prototype: Word32 Q6_R_mpynac_RlRl_s1_sat(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpynac_RlRl_s1_sat __builtin_HEXAGON_M2_mpy_nac_sat_ll_s1 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32.h,Rt32.h):rnd C Intrinsic Prototype: Word32 Q6_R_mpy_RhRh_rnd(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RhRh_rnd __builtin_HEXAGON_M2_mpy_rnd_hh_s0 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32.h,Rt32.h):<<1:rnd C Intrinsic Prototype: Word32 Q6_R_mpy_RhRh_s1_rnd(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RhRh_s1_rnd __builtin_HEXAGON_M2_mpy_rnd_hh_s1 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32.h,Rt32.l):rnd C Intrinsic Prototype: Word32 Q6_R_mpy_RhRl_rnd(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RhRl_rnd __builtin_HEXAGON_M2_mpy_rnd_hl_s0 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32.h,Rt32.l):<<1:rnd C Intrinsic Prototype: Word32 Q6_R_mpy_RhRl_s1_rnd(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RhRl_s1_rnd __builtin_HEXAGON_M2_mpy_rnd_hl_s1 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32.l,Rt32.h):rnd C Intrinsic Prototype: Word32 Q6_R_mpy_RlRh_rnd(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RlRh_rnd __builtin_HEXAGON_M2_mpy_rnd_lh_s0 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32.l,Rt32.h):<<1:rnd C Intrinsic Prototype: Word32 Q6_R_mpy_RlRh_s1_rnd(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RlRh_s1_rnd __builtin_HEXAGON_M2_mpy_rnd_lh_s1 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32.l,Rt32.l):rnd C Intrinsic Prototype: Word32 Q6_R_mpy_RlRl_rnd(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RlRl_rnd __builtin_HEXAGON_M2_mpy_rnd_ll_s0 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32.l,Rt32.l):<<1:rnd C Intrinsic Prototype: Word32 Q6_R_mpy_RlRl_s1_rnd(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RlRl_s1_rnd __builtin_HEXAGON_M2_mpy_rnd_ll_s1 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32.h,Rt32.h):sat C Intrinsic Prototype: Word32 Q6_R_mpy_RhRh_sat(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RhRh_sat __builtin_HEXAGON_M2_mpy_sat_hh_s0 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32.h,Rt32.h):<<1:sat C Intrinsic Prototype: Word32 Q6_R_mpy_RhRh_s1_sat(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RhRh_s1_sat __builtin_HEXAGON_M2_mpy_sat_hh_s1 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32.h,Rt32.l):sat C Intrinsic Prototype: Word32 Q6_R_mpy_RhRl_sat(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RhRl_sat __builtin_HEXAGON_M2_mpy_sat_hl_s0 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32.h,Rt32.l):<<1:sat C Intrinsic Prototype: Word32 Q6_R_mpy_RhRl_s1_sat(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RhRl_s1_sat __builtin_HEXAGON_M2_mpy_sat_hl_s1 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32.l,Rt32.h):sat C Intrinsic Prototype: Word32 Q6_R_mpy_RlRh_sat(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RlRh_sat __builtin_HEXAGON_M2_mpy_sat_lh_s0 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32.l,Rt32.h):<<1:sat C Intrinsic Prototype: Word32 Q6_R_mpy_RlRh_s1_sat(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RlRh_s1_sat __builtin_HEXAGON_M2_mpy_sat_lh_s1 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32.l,Rt32.l):sat C Intrinsic Prototype: Word32 Q6_R_mpy_RlRl_sat(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RlRl_sat __builtin_HEXAGON_M2_mpy_sat_ll_s0 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32.l,Rt32.l):<<1:sat C Intrinsic Prototype: Word32 Q6_R_mpy_RlRl_s1_sat(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RlRl_s1_sat __builtin_HEXAGON_M2_mpy_sat_ll_s1 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32.h,Rt32.h):rnd:sat C Intrinsic Prototype: Word32 Q6_R_mpy_RhRh_rnd_sat(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RhRh_rnd_sat __builtin_HEXAGON_M2_mpy_sat_rnd_hh_s0 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32.h,Rt32.h):<<1:rnd:sat C Intrinsic Prototype: Word32 Q6_R_mpy_RhRh_s1_rnd_sat(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RhRh_s1_rnd_sat __builtin_HEXAGON_M2_mpy_sat_rnd_hh_s1 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32.h,Rt32.l):rnd:sat C Intrinsic Prototype: Word32 Q6_R_mpy_RhRl_rnd_sat(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RhRl_rnd_sat __builtin_HEXAGON_M2_mpy_sat_rnd_hl_s0 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32.h,Rt32.l):<<1:rnd:sat C Intrinsic Prototype: Word32 Q6_R_mpy_RhRl_s1_rnd_sat(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RhRl_s1_rnd_sat __builtin_HEXAGON_M2_mpy_sat_rnd_hl_s1 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32.l,Rt32.h):rnd:sat C Intrinsic Prototype: Word32 Q6_R_mpy_RlRh_rnd_sat(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RlRh_rnd_sat __builtin_HEXAGON_M2_mpy_sat_rnd_lh_s0 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32.l,Rt32.h):<<1:rnd:sat C Intrinsic Prototype: Word32 Q6_R_mpy_RlRh_s1_rnd_sat(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RlRh_s1_rnd_sat __builtin_HEXAGON_M2_mpy_sat_rnd_lh_s1 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32.l,Rt32.l):rnd:sat C Intrinsic Prototype: Word32 Q6_R_mpy_RlRl_rnd_sat(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RlRl_rnd_sat __builtin_HEXAGON_M2_mpy_sat_rnd_ll_s0 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32.l,Rt32.l):<<1:rnd:sat C Intrinsic Prototype: Word32 Q6_R_mpy_RlRl_s1_rnd_sat(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RlRl_s1_rnd_sat __builtin_HEXAGON_M2_mpy_sat_rnd_ll_s1 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_mpy_RR(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RR __builtin_HEXAGON_M2_mpy_up /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32,Rt32):<<1 C Intrinsic Prototype: Word32 Q6_R_mpy_RR_s1(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RR_s1 __builtin_HEXAGON_M2_mpy_up_s1 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32,Rt32):<<1:sat C Intrinsic Prototype: Word32 Q6_R_mpy_RR_s1_sat(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RR_s1_sat __builtin_HEXAGON_M2_mpy_up_s1_sat /* ========================================================================== Assembly Syntax: Rxx32+=mpy(Rs32.h,Rt32.h) C Intrinsic Prototype: Word64 Q6_P_mpyacc_RhRh(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpyacc_RhRh __builtin_HEXAGON_M2_mpyd_acc_hh_s0 /* ========================================================================== Assembly Syntax: Rxx32+=mpy(Rs32.h,Rt32.h):<<1 C Intrinsic Prototype: Word64 Q6_P_mpyacc_RhRh_s1(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpyacc_RhRh_s1 __builtin_HEXAGON_M2_mpyd_acc_hh_s1 /* ========================================================================== Assembly Syntax: Rxx32+=mpy(Rs32.h,Rt32.l) C Intrinsic Prototype: Word64 Q6_P_mpyacc_RhRl(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpyacc_RhRl __builtin_HEXAGON_M2_mpyd_acc_hl_s0 /* ========================================================================== Assembly Syntax: Rxx32+=mpy(Rs32.h,Rt32.l):<<1 C Intrinsic Prototype: Word64 Q6_P_mpyacc_RhRl_s1(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpyacc_RhRl_s1 __builtin_HEXAGON_M2_mpyd_acc_hl_s1 /* ========================================================================== Assembly Syntax: Rxx32+=mpy(Rs32.l,Rt32.h) C Intrinsic Prototype: Word64 Q6_P_mpyacc_RlRh(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpyacc_RlRh __builtin_HEXAGON_M2_mpyd_acc_lh_s0 /* ========================================================================== Assembly Syntax: Rxx32+=mpy(Rs32.l,Rt32.h):<<1 C Intrinsic Prototype: Word64 Q6_P_mpyacc_RlRh_s1(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpyacc_RlRh_s1 __builtin_HEXAGON_M2_mpyd_acc_lh_s1 /* ========================================================================== Assembly Syntax: Rxx32+=mpy(Rs32.l,Rt32.l) C Intrinsic Prototype: Word64 Q6_P_mpyacc_RlRl(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpyacc_RlRl __builtin_HEXAGON_M2_mpyd_acc_ll_s0 /* ========================================================================== Assembly Syntax: Rxx32+=mpy(Rs32.l,Rt32.l):<<1 C Intrinsic Prototype: Word64 Q6_P_mpyacc_RlRl_s1(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpyacc_RlRl_s1 __builtin_HEXAGON_M2_mpyd_acc_ll_s1 /* ========================================================================== Assembly Syntax: Rdd32=mpy(Rs32.h,Rt32.h) C Intrinsic Prototype: Word64 Q6_P_mpy_RhRh(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpy_RhRh __builtin_HEXAGON_M2_mpyd_hh_s0 /* ========================================================================== Assembly Syntax: Rdd32=mpy(Rs32.h,Rt32.h):<<1 C Intrinsic Prototype: Word64 Q6_P_mpy_RhRh_s1(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpy_RhRh_s1 __builtin_HEXAGON_M2_mpyd_hh_s1 /* ========================================================================== Assembly Syntax: Rdd32=mpy(Rs32.h,Rt32.l) C Intrinsic Prototype: Word64 Q6_P_mpy_RhRl(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpy_RhRl __builtin_HEXAGON_M2_mpyd_hl_s0 /* ========================================================================== Assembly Syntax: Rdd32=mpy(Rs32.h,Rt32.l):<<1 C Intrinsic Prototype: Word64 Q6_P_mpy_RhRl_s1(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpy_RhRl_s1 __builtin_HEXAGON_M2_mpyd_hl_s1 /* ========================================================================== Assembly Syntax: Rdd32=mpy(Rs32.l,Rt32.h) C Intrinsic Prototype: Word64 Q6_P_mpy_RlRh(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpy_RlRh __builtin_HEXAGON_M2_mpyd_lh_s0 /* ========================================================================== Assembly Syntax: Rdd32=mpy(Rs32.l,Rt32.h):<<1 C Intrinsic Prototype: Word64 Q6_P_mpy_RlRh_s1(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpy_RlRh_s1 __builtin_HEXAGON_M2_mpyd_lh_s1 /* ========================================================================== Assembly Syntax: Rdd32=mpy(Rs32.l,Rt32.l) C Intrinsic Prototype: Word64 Q6_P_mpy_RlRl(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpy_RlRl __builtin_HEXAGON_M2_mpyd_ll_s0 /* ========================================================================== Assembly Syntax: Rdd32=mpy(Rs32.l,Rt32.l):<<1 C Intrinsic Prototype: Word64 Q6_P_mpy_RlRl_s1(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpy_RlRl_s1 __builtin_HEXAGON_M2_mpyd_ll_s1 /* ========================================================================== Assembly Syntax: Rxx32-=mpy(Rs32.h,Rt32.h) C Intrinsic Prototype: Word64 Q6_P_mpynac_RhRh(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpynac_RhRh __builtin_HEXAGON_M2_mpyd_nac_hh_s0 /* ========================================================================== Assembly Syntax: Rxx32-=mpy(Rs32.h,Rt32.h):<<1 C Intrinsic Prototype: Word64 Q6_P_mpynac_RhRh_s1(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpynac_RhRh_s1 __builtin_HEXAGON_M2_mpyd_nac_hh_s1 /* ========================================================================== Assembly Syntax: Rxx32-=mpy(Rs32.h,Rt32.l) C Intrinsic Prototype: Word64 Q6_P_mpynac_RhRl(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpynac_RhRl __builtin_HEXAGON_M2_mpyd_nac_hl_s0 /* ========================================================================== Assembly Syntax: Rxx32-=mpy(Rs32.h,Rt32.l):<<1 C Intrinsic Prototype: Word64 Q6_P_mpynac_RhRl_s1(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpynac_RhRl_s1 __builtin_HEXAGON_M2_mpyd_nac_hl_s1 /* ========================================================================== Assembly Syntax: Rxx32-=mpy(Rs32.l,Rt32.h) C Intrinsic Prototype: Word64 Q6_P_mpynac_RlRh(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpynac_RlRh __builtin_HEXAGON_M2_mpyd_nac_lh_s0 /* ========================================================================== Assembly Syntax: Rxx32-=mpy(Rs32.l,Rt32.h):<<1 C Intrinsic Prototype: Word64 Q6_P_mpynac_RlRh_s1(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpynac_RlRh_s1 __builtin_HEXAGON_M2_mpyd_nac_lh_s1 /* ========================================================================== Assembly Syntax: Rxx32-=mpy(Rs32.l,Rt32.l) C Intrinsic Prototype: Word64 Q6_P_mpynac_RlRl(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpynac_RlRl __builtin_HEXAGON_M2_mpyd_nac_ll_s0 /* ========================================================================== Assembly Syntax: Rxx32-=mpy(Rs32.l,Rt32.l):<<1 C Intrinsic Prototype: Word64 Q6_P_mpynac_RlRl_s1(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpynac_RlRl_s1 __builtin_HEXAGON_M2_mpyd_nac_ll_s1 /* ========================================================================== Assembly Syntax: Rdd32=mpy(Rs32.h,Rt32.h):rnd C Intrinsic Prototype: Word64 Q6_P_mpy_RhRh_rnd(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpy_RhRh_rnd __builtin_HEXAGON_M2_mpyd_rnd_hh_s0 /* ========================================================================== Assembly Syntax: Rdd32=mpy(Rs32.h,Rt32.h):<<1:rnd C Intrinsic Prototype: Word64 Q6_P_mpy_RhRh_s1_rnd(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpy_RhRh_s1_rnd __builtin_HEXAGON_M2_mpyd_rnd_hh_s1 /* ========================================================================== Assembly Syntax: Rdd32=mpy(Rs32.h,Rt32.l):rnd C Intrinsic Prototype: Word64 Q6_P_mpy_RhRl_rnd(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpy_RhRl_rnd __builtin_HEXAGON_M2_mpyd_rnd_hl_s0 /* ========================================================================== Assembly Syntax: Rdd32=mpy(Rs32.h,Rt32.l):<<1:rnd C Intrinsic Prototype: Word64 Q6_P_mpy_RhRl_s1_rnd(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpy_RhRl_s1_rnd __builtin_HEXAGON_M2_mpyd_rnd_hl_s1 /* ========================================================================== Assembly Syntax: Rdd32=mpy(Rs32.l,Rt32.h):rnd C Intrinsic Prototype: Word64 Q6_P_mpy_RlRh_rnd(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpy_RlRh_rnd __builtin_HEXAGON_M2_mpyd_rnd_lh_s0 /* ========================================================================== Assembly Syntax: Rdd32=mpy(Rs32.l,Rt32.h):<<1:rnd C Intrinsic Prototype: Word64 Q6_P_mpy_RlRh_s1_rnd(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpy_RlRh_s1_rnd __builtin_HEXAGON_M2_mpyd_rnd_lh_s1 /* ========================================================================== Assembly Syntax: Rdd32=mpy(Rs32.l,Rt32.l):rnd C Intrinsic Prototype: Word64 Q6_P_mpy_RlRl_rnd(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpy_RlRl_rnd __builtin_HEXAGON_M2_mpyd_rnd_ll_s0 /* ========================================================================== Assembly Syntax: Rdd32=mpy(Rs32.l,Rt32.l):<<1:rnd C Intrinsic Prototype: Word64 Q6_P_mpy_RlRl_s1_rnd(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpy_RlRl_s1_rnd __builtin_HEXAGON_M2_mpyd_rnd_ll_s1 /* ========================================================================== Assembly Syntax: Rd32=mpyi(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_mpyi_RR(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyi_RR __builtin_HEXAGON_M2_mpyi /* ========================================================================== Assembly Syntax: Rd32=mpyi(Rs32,#m9) C Intrinsic Prototype: Word32 Q6_R_mpyi_RI(Word32 Rs, Word32 Im9) Instruction Type: M Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_mpyi_RI __builtin_HEXAGON_M2_mpysmi /* ========================================================================== Assembly Syntax: Rd32=mpysu(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_mpysu_RR(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpysu_RR __builtin_HEXAGON_M2_mpysu_up /* ========================================================================== Assembly Syntax: Rx32+=mpyu(Rs32.h,Rt32.h) C Intrinsic Prototype: Word32 Q6_R_mpyuacc_RhRh(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyuacc_RhRh __builtin_HEXAGON_M2_mpyu_acc_hh_s0 /* ========================================================================== Assembly Syntax: Rx32+=mpyu(Rs32.h,Rt32.h):<<1 C Intrinsic Prototype: Word32 Q6_R_mpyuacc_RhRh_s1(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyuacc_RhRh_s1 __builtin_HEXAGON_M2_mpyu_acc_hh_s1 /* ========================================================================== Assembly Syntax: Rx32+=mpyu(Rs32.h,Rt32.l) C Intrinsic Prototype: Word32 Q6_R_mpyuacc_RhRl(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyuacc_RhRl __builtin_HEXAGON_M2_mpyu_acc_hl_s0 /* ========================================================================== Assembly Syntax: Rx32+=mpyu(Rs32.h,Rt32.l):<<1 C Intrinsic Prototype: Word32 Q6_R_mpyuacc_RhRl_s1(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyuacc_RhRl_s1 __builtin_HEXAGON_M2_mpyu_acc_hl_s1 /* ========================================================================== Assembly Syntax: Rx32+=mpyu(Rs32.l,Rt32.h) C Intrinsic Prototype: Word32 Q6_R_mpyuacc_RlRh(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyuacc_RlRh __builtin_HEXAGON_M2_mpyu_acc_lh_s0 /* ========================================================================== Assembly Syntax: Rx32+=mpyu(Rs32.l,Rt32.h):<<1 C Intrinsic Prototype: Word32 Q6_R_mpyuacc_RlRh_s1(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyuacc_RlRh_s1 __builtin_HEXAGON_M2_mpyu_acc_lh_s1 /* ========================================================================== Assembly Syntax: Rx32+=mpyu(Rs32.l,Rt32.l) C Intrinsic Prototype: Word32 Q6_R_mpyuacc_RlRl(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyuacc_RlRl __builtin_HEXAGON_M2_mpyu_acc_ll_s0 /* ========================================================================== Assembly Syntax: Rx32+=mpyu(Rs32.l,Rt32.l):<<1 C Intrinsic Prototype: Word32 Q6_R_mpyuacc_RlRl_s1(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyuacc_RlRl_s1 __builtin_HEXAGON_M2_mpyu_acc_ll_s1 /* ========================================================================== Assembly Syntax: Rd32=mpyu(Rs32.h,Rt32.h) C Intrinsic Prototype: UWord32 Q6_R_mpyu_RhRh(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyu_RhRh __builtin_HEXAGON_M2_mpyu_hh_s0 /* ========================================================================== Assembly Syntax: Rd32=mpyu(Rs32.h,Rt32.h):<<1 C Intrinsic Prototype: UWord32 Q6_R_mpyu_RhRh_s1(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyu_RhRh_s1 __builtin_HEXAGON_M2_mpyu_hh_s1 /* ========================================================================== Assembly Syntax: Rd32=mpyu(Rs32.h,Rt32.l) C Intrinsic Prototype: UWord32 Q6_R_mpyu_RhRl(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyu_RhRl __builtin_HEXAGON_M2_mpyu_hl_s0 /* ========================================================================== Assembly Syntax: Rd32=mpyu(Rs32.h,Rt32.l):<<1 C Intrinsic Prototype: UWord32 Q6_R_mpyu_RhRl_s1(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyu_RhRl_s1 __builtin_HEXAGON_M2_mpyu_hl_s1 /* ========================================================================== Assembly Syntax: Rd32=mpyu(Rs32.l,Rt32.h) C Intrinsic Prototype: UWord32 Q6_R_mpyu_RlRh(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyu_RlRh __builtin_HEXAGON_M2_mpyu_lh_s0 /* ========================================================================== Assembly Syntax: Rd32=mpyu(Rs32.l,Rt32.h):<<1 C Intrinsic Prototype: UWord32 Q6_R_mpyu_RlRh_s1(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyu_RlRh_s1 __builtin_HEXAGON_M2_mpyu_lh_s1 /* ========================================================================== Assembly Syntax: Rd32=mpyu(Rs32.l,Rt32.l) C Intrinsic Prototype: UWord32 Q6_R_mpyu_RlRl(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyu_RlRl __builtin_HEXAGON_M2_mpyu_ll_s0 /* ========================================================================== Assembly Syntax: Rd32=mpyu(Rs32.l,Rt32.l):<<1 C Intrinsic Prototype: UWord32 Q6_R_mpyu_RlRl_s1(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyu_RlRl_s1 __builtin_HEXAGON_M2_mpyu_ll_s1 /* ========================================================================== Assembly Syntax: Rx32-=mpyu(Rs32.h,Rt32.h) C Intrinsic Prototype: Word32 Q6_R_mpyunac_RhRh(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyunac_RhRh __builtin_HEXAGON_M2_mpyu_nac_hh_s0 /* ========================================================================== Assembly Syntax: Rx32-=mpyu(Rs32.h,Rt32.h):<<1 C Intrinsic Prototype: Word32 Q6_R_mpyunac_RhRh_s1(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyunac_RhRh_s1 __builtin_HEXAGON_M2_mpyu_nac_hh_s1 /* ========================================================================== Assembly Syntax: Rx32-=mpyu(Rs32.h,Rt32.l) C Intrinsic Prototype: Word32 Q6_R_mpyunac_RhRl(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyunac_RhRl __builtin_HEXAGON_M2_mpyu_nac_hl_s0 /* ========================================================================== Assembly Syntax: Rx32-=mpyu(Rs32.h,Rt32.l):<<1 C Intrinsic Prototype: Word32 Q6_R_mpyunac_RhRl_s1(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyunac_RhRl_s1 __builtin_HEXAGON_M2_mpyu_nac_hl_s1 /* ========================================================================== Assembly Syntax: Rx32-=mpyu(Rs32.l,Rt32.h) C Intrinsic Prototype: Word32 Q6_R_mpyunac_RlRh(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyunac_RlRh __builtin_HEXAGON_M2_mpyu_nac_lh_s0 /* ========================================================================== Assembly Syntax: Rx32-=mpyu(Rs32.l,Rt32.h):<<1 C Intrinsic Prototype: Word32 Q6_R_mpyunac_RlRh_s1(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyunac_RlRh_s1 __builtin_HEXAGON_M2_mpyu_nac_lh_s1 /* ========================================================================== Assembly Syntax: Rx32-=mpyu(Rs32.l,Rt32.l) C Intrinsic Prototype: Word32 Q6_R_mpyunac_RlRl(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyunac_RlRl __builtin_HEXAGON_M2_mpyu_nac_ll_s0 /* ========================================================================== Assembly Syntax: Rx32-=mpyu(Rs32.l,Rt32.l):<<1 C Intrinsic Prototype: Word32 Q6_R_mpyunac_RlRl_s1(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyunac_RlRl_s1 __builtin_HEXAGON_M2_mpyu_nac_ll_s1 /* ========================================================================== Assembly Syntax: Rd32=mpyu(Rs32,Rt32) C Intrinsic Prototype: UWord32 Q6_R_mpyu_RR(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyu_RR __builtin_HEXAGON_M2_mpyu_up /* ========================================================================== Assembly Syntax: Rxx32+=mpyu(Rs32.h,Rt32.h) C Intrinsic Prototype: Word64 Q6_P_mpyuacc_RhRh(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpyuacc_RhRh __builtin_HEXAGON_M2_mpyud_acc_hh_s0 /* ========================================================================== Assembly Syntax: Rxx32+=mpyu(Rs32.h,Rt32.h):<<1 C Intrinsic Prototype: Word64 Q6_P_mpyuacc_RhRh_s1(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpyuacc_RhRh_s1 __builtin_HEXAGON_M2_mpyud_acc_hh_s1 /* ========================================================================== Assembly Syntax: Rxx32+=mpyu(Rs32.h,Rt32.l) C Intrinsic Prototype: Word64 Q6_P_mpyuacc_RhRl(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpyuacc_RhRl __builtin_HEXAGON_M2_mpyud_acc_hl_s0 /* ========================================================================== Assembly Syntax: Rxx32+=mpyu(Rs32.h,Rt32.l):<<1 C Intrinsic Prototype: Word64 Q6_P_mpyuacc_RhRl_s1(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpyuacc_RhRl_s1 __builtin_HEXAGON_M2_mpyud_acc_hl_s1 /* ========================================================================== Assembly Syntax: Rxx32+=mpyu(Rs32.l,Rt32.h) C Intrinsic Prototype: Word64 Q6_P_mpyuacc_RlRh(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpyuacc_RlRh __builtin_HEXAGON_M2_mpyud_acc_lh_s0 /* ========================================================================== Assembly Syntax: Rxx32+=mpyu(Rs32.l,Rt32.h):<<1 C Intrinsic Prototype: Word64 Q6_P_mpyuacc_RlRh_s1(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpyuacc_RlRh_s1 __builtin_HEXAGON_M2_mpyud_acc_lh_s1 /* ========================================================================== Assembly Syntax: Rxx32+=mpyu(Rs32.l,Rt32.l) C Intrinsic Prototype: Word64 Q6_P_mpyuacc_RlRl(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpyuacc_RlRl __builtin_HEXAGON_M2_mpyud_acc_ll_s0 /* ========================================================================== Assembly Syntax: Rxx32+=mpyu(Rs32.l,Rt32.l):<<1 C Intrinsic Prototype: Word64 Q6_P_mpyuacc_RlRl_s1(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpyuacc_RlRl_s1 __builtin_HEXAGON_M2_mpyud_acc_ll_s1 /* ========================================================================== Assembly Syntax: Rdd32=mpyu(Rs32.h,Rt32.h) C Intrinsic Prototype: UWord64 Q6_P_mpyu_RhRh(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpyu_RhRh __builtin_HEXAGON_M2_mpyud_hh_s0 /* ========================================================================== Assembly Syntax: Rdd32=mpyu(Rs32.h,Rt32.h):<<1 C Intrinsic Prototype: UWord64 Q6_P_mpyu_RhRh_s1(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpyu_RhRh_s1 __builtin_HEXAGON_M2_mpyud_hh_s1 /* ========================================================================== Assembly Syntax: Rdd32=mpyu(Rs32.h,Rt32.l) C Intrinsic Prototype: UWord64 Q6_P_mpyu_RhRl(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpyu_RhRl __builtin_HEXAGON_M2_mpyud_hl_s0 /* ========================================================================== Assembly Syntax: Rdd32=mpyu(Rs32.h,Rt32.l):<<1 C Intrinsic Prototype: UWord64 Q6_P_mpyu_RhRl_s1(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpyu_RhRl_s1 __builtin_HEXAGON_M2_mpyud_hl_s1 /* ========================================================================== Assembly Syntax: Rdd32=mpyu(Rs32.l,Rt32.h) C Intrinsic Prototype: UWord64 Q6_P_mpyu_RlRh(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpyu_RlRh __builtin_HEXAGON_M2_mpyud_lh_s0 /* ========================================================================== Assembly Syntax: Rdd32=mpyu(Rs32.l,Rt32.h):<<1 C Intrinsic Prototype: UWord64 Q6_P_mpyu_RlRh_s1(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpyu_RlRh_s1 __builtin_HEXAGON_M2_mpyud_lh_s1 /* ========================================================================== Assembly Syntax: Rdd32=mpyu(Rs32.l,Rt32.l) C Intrinsic Prototype: UWord64 Q6_P_mpyu_RlRl(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpyu_RlRl __builtin_HEXAGON_M2_mpyud_ll_s0 /* ========================================================================== Assembly Syntax: Rdd32=mpyu(Rs32.l,Rt32.l):<<1 C Intrinsic Prototype: UWord64 Q6_P_mpyu_RlRl_s1(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpyu_RlRl_s1 __builtin_HEXAGON_M2_mpyud_ll_s1 /* ========================================================================== Assembly Syntax: Rxx32-=mpyu(Rs32.h,Rt32.h) C Intrinsic Prototype: Word64 Q6_P_mpyunac_RhRh(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpyunac_RhRh __builtin_HEXAGON_M2_mpyud_nac_hh_s0 /* ========================================================================== Assembly Syntax: Rxx32-=mpyu(Rs32.h,Rt32.h):<<1 C Intrinsic Prototype: Word64 Q6_P_mpyunac_RhRh_s1(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpyunac_RhRh_s1 __builtin_HEXAGON_M2_mpyud_nac_hh_s1 /* ========================================================================== Assembly Syntax: Rxx32-=mpyu(Rs32.h,Rt32.l) C Intrinsic Prototype: Word64 Q6_P_mpyunac_RhRl(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpyunac_RhRl __builtin_HEXAGON_M2_mpyud_nac_hl_s0 /* ========================================================================== Assembly Syntax: Rxx32-=mpyu(Rs32.h,Rt32.l):<<1 C Intrinsic Prototype: Word64 Q6_P_mpyunac_RhRl_s1(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpyunac_RhRl_s1 __builtin_HEXAGON_M2_mpyud_nac_hl_s1 /* ========================================================================== Assembly Syntax: Rxx32-=mpyu(Rs32.l,Rt32.h) C Intrinsic Prototype: Word64 Q6_P_mpyunac_RlRh(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpyunac_RlRh __builtin_HEXAGON_M2_mpyud_nac_lh_s0 /* ========================================================================== Assembly Syntax: Rxx32-=mpyu(Rs32.l,Rt32.h):<<1 C Intrinsic Prototype: Word64 Q6_P_mpyunac_RlRh_s1(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpyunac_RlRh_s1 __builtin_HEXAGON_M2_mpyud_nac_lh_s1 /* ========================================================================== Assembly Syntax: Rxx32-=mpyu(Rs32.l,Rt32.l) C Intrinsic Prototype: Word64 Q6_P_mpyunac_RlRl(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpyunac_RlRl __builtin_HEXAGON_M2_mpyud_nac_ll_s0 /* ========================================================================== Assembly Syntax: Rxx32-=mpyu(Rs32.l,Rt32.l):<<1 C Intrinsic Prototype: Word64 Q6_P_mpyunac_RlRl_s1(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpyunac_RlRl_s1 __builtin_HEXAGON_M2_mpyud_nac_ll_s1 /* ========================================================================== Assembly Syntax: Rd32=mpyui(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_mpyui_RR(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_mpyui_RR __builtin_HEXAGON_M2_mpyui /* ========================================================================== Assembly Syntax: Rx32-=add(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_addnac_RR(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_addnac_RR __builtin_HEXAGON_M2_nacci /* ========================================================================== Assembly Syntax: Rx32-=add(Rs32,#s8) C Intrinsic Prototype: Word32 Q6_R_addnac_RI(Word32 Rx, Word32 Rs, Word32 Is8) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_addnac_RI __builtin_HEXAGON_M2_naccii /* ========================================================================== Assembly Syntax: Rx32+=sub(Rt32,Rs32) C Intrinsic Prototype: Word32 Q6_R_subacc_RR(Word32 Rx, Word32 Rt, Word32 Rs) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_subacc_RR __builtin_HEXAGON_M2_subacc /* ========================================================================== Assembly Syntax: Rdd32=vabsdiffh(Rtt32,Rss32) C Intrinsic Prototype: Word64 Q6_P_vabsdiffh_PP(Word64 Rtt, Word64 Rss) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vabsdiffh_PP __builtin_HEXAGON_M2_vabsdiffh /* ========================================================================== Assembly Syntax: Rdd32=vabsdiffw(Rtt32,Rss32) C Intrinsic Prototype: Word64 Q6_P_vabsdiffw_PP(Word64 Rtt, Word64 Rss) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vabsdiffw_PP __builtin_HEXAGON_M2_vabsdiffw /* ========================================================================== Assembly Syntax: Rxx32+=vcmpyi(Rss32,Rtt32):sat C Intrinsic Prototype: Word64 Q6_P_vcmpyiacc_PP_sat(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vcmpyiacc_PP_sat __builtin_HEXAGON_M2_vcmac_s0_sat_i /* ========================================================================== Assembly Syntax: Rxx32+=vcmpyr(Rss32,Rtt32):sat C Intrinsic Prototype: Word64 Q6_P_vcmpyracc_PP_sat(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vcmpyracc_PP_sat __builtin_HEXAGON_M2_vcmac_s0_sat_r /* ========================================================================== Assembly Syntax: Rdd32=vcmpyi(Rss32,Rtt32):sat C Intrinsic Prototype: Word64 Q6_P_vcmpyi_PP_sat(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vcmpyi_PP_sat __builtin_HEXAGON_M2_vcmpy_s0_sat_i /* ========================================================================== Assembly Syntax: Rdd32=vcmpyr(Rss32,Rtt32):sat C Intrinsic Prototype: Word64 Q6_P_vcmpyr_PP_sat(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vcmpyr_PP_sat __builtin_HEXAGON_M2_vcmpy_s0_sat_r /* ========================================================================== Assembly Syntax: Rdd32=vcmpyi(Rss32,Rtt32):<<1:sat C Intrinsic Prototype: Word64 Q6_P_vcmpyi_PP_s1_sat(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vcmpyi_PP_s1_sat __builtin_HEXAGON_M2_vcmpy_s1_sat_i /* ========================================================================== Assembly Syntax: Rdd32=vcmpyr(Rss32,Rtt32):<<1:sat C Intrinsic Prototype: Word64 Q6_P_vcmpyr_PP_s1_sat(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vcmpyr_PP_s1_sat __builtin_HEXAGON_M2_vcmpy_s1_sat_r /* ========================================================================== Assembly Syntax: Rxx32+=vdmpy(Rss32,Rtt32):sat C Intrinsic Prototype: Word64 Q6_P_vdmpyacc_PP_sat(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vdmpyacc_PP_sat __builtin_HEXAGON_M2_vdmacs_s0 /* ========================================================================== Assembly Syntax: Rxx32+=vdmpy(Rss32,Rtt32):<<1:sat C Intrinsic Prototype: Word64 Q6_P_vdmpyacc_PP_s1_sat(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vdmpyacc_PP_s1_sat __builtin_HEXAGON_M2_vdmacs_s1 /* ========================================================================== Assembly Syntax: Rd32=vdmpy(Rss32,Rtt32):rnd:sat C Intrinsic Prototype: Word32 Q6_R_vdmpy_PP_rnd_sat(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_vdmpy_PP_rnd_sat __builtin_HEXAGON_M2_vdmpyrs_s0 /* ========================================================================== Assembly Syntax: Rd32=vdmpy(Rss32,Rtt32):<<1:rnd:sat C Intrinsic Prototype: Word32 Q6_R_vdmpy_PP_s1_rnd_sat(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_vdmpy_PP_s1_rnd_sat __builtin_HEXAGON_M2_vdmpyrs_s1 /* ========================================================================== Assembly Syntax: Rdd32=vdmpy(Rss32,Rtt32):sat C Intrinsic Prototype: Word64 Q6_P_vdmpy_PP_sat(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vdmpy_PP_sat __builtin_HEXAGON_M2_vdmpys_s0 /* ========================================================================== Assembly Syntax: Rdd32=vdmpy(Rss32,Rtt32):<<1:sat C Intrinsic Prototype: Word64 Q6_P_vdmpy_PP_s1_sat(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vdmpy_PP_s1_sat __builtin_HEXAGON_M2_vdmpys_s1 /* ========================================================================== Assembly Syntax: Rxx32+=vmpyh(Rs32,Rt32) C Intrinsic Prototype: Word64 Q6_P_vmpyhacc_RR(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpyhacc_RR __builtin_HEXAGON_M2_vmac2 /* ========================================================================== Assembly Syntax: Rxx32+=vmpyeh(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_vmpyehacc_PP(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpyehacc_PP __builtin_HEXAGON_M2_vmac2es /* ========================================================================== Assembly Syntax: Rxx32+=vmpyeh(Rss32,Rtt32):sat C Intrinsic Prototype: Word64 Q6_P_vmpyehacc_PP_sat(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpyehacc_PP_sat __builtin_HEXAGON_M2_vmac2es_s0 /* ========================================================================== Assembly Syntax: Rxx32+=vmpyeh(Rss32,Rtt32):<<1:sat C Intrinsic Prototype: Word64 Q6_P_vmpyehacc_PP_s1_sat(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpyehacc_PP_s1_sat __builtin_HEXAGON_M2_vmac2es_s1 /* ========================================================================== Assembly Syntax: Rxx32+=vmpyh(Rs32,Rt32):sat C Intrinsic Prototype: Word64 Q6_P_vmpyhacc_RR_sat(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpyhacc_RR_sat __builtin_HEXAGON_M2_vmac2s_s0 /* ========================================================================== Assembly Syntax: Rxx32+=vmpyh(Rs32,Rt32):<<1:sat C Intrinsic Prototype: Word64 Q6_P_vmpyhacc_RR_s1_sat(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpyhacc_RR_s1_sat __builtin_HEXAGON_M2_vmac2s_s1 /* ========================================================================== Assembly Syntax: Rxx32+=vmpyhsu(Rs32,Rt32):sat C Intrinsic Prototype: Word64 Q6_P_vmpyhsuacc_RR_sat(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpyhsuacc_RR_sat __builtin_HEXAGON_M2_vmac2su_s0 /* ========================================================================== Assembly Syntax: Rxx32+=vmpyhsu(Rs32,Rt32):<<1:sat C Intrinsic Prototype: Word64 Q6_P_vmpyhsuacc_RR_s1_sat(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpyhsuacc_RR_s1_sat __builtin_HEXAGON_M2_vmac2su_s1 /* ========================================================================== Assembly Syntax: Rdd32=vmpyeh(Rss32,Rtt32):sat C Intrinsic Prototype: Word64 Q6_P_vmpyeh_PP_sat(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpyeh_PP_sat __builtin_HEXAGON_M2_vmpy2es_s0 /* ========================================================================== Assembly Syntax: Rdd32=vmpyeh(Rss32,Rtt32):<<1:sat C Intrinsic Prototype: Word64 Q6_P_vmpyeh_PP_s1_sat(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpyeh_PP_s1_sat __builtin_HEXAGON_M2_vmpy2es_s1 /* ========================================================================== Assembly Syntax: Rdd32=vmpyh(Rs32,Rt32):sat C Intrinsic Prototype: Word64 Q6_P_vmpyh_RR_sat(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpyh_RR_sat __builtin_HEXAGON_M2_vmpy2s_s0 /* ========================================================================== Assembly Syntax: Rd32=vmpyh(Rs32,Rt32):rnd:sat C Intrinsic Prototype: Word32 Q6_R_vmpyh_RR_rnd_sat(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_vmpyh_RR_rnd_sat __builtin_HEXAGON_M2_vmpy2s_s0pack /* ========================================================================== Assembly Syntax: Rdd32=vmpyh(Rs32,Rt32):<<1:sat C Intrinsic Prototype: Word64 Q6_P_vmpyh_RR_s1_sat(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpyh_RR_s1_sat __builtin_HEXAGON_M2_vmpy2s_s1 /* ========================================================================== Assembly Syntax: Rd32=vmpyh(Rs32,Rt32):<<1:rnd:sat C Intrinsic Prototype: Word32 Q6_R_vmpyh_RR_s1_rnd_sat(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_vmpyh_RR_s1_rnd_sat __builtin_HEXAGON_M2_vmpy2s_s1pack /* ========================================================================== Assembly Syntax: Rdd32=vmpyhsu(Rs32,Rt32):sat C Intrinsic Prototype: Word64 Q6_P_vmpyhsu_RR_sat(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpyhsu_RR_sat __builtin_HEXAGON_M2_vmpy2su_s0 /* ========================================================================== Assembly Syntax: Rdd32=vmpyhsu(Rs32,Rt32):<<1:sat C Intrinsic Prototype: Word64 Q6_P_vmpyhsu_RR_s1_sat(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpyhsu_RR_s1_sat __builtin_HEXAGON_M2_vmpy2su_s1 /* ========================================================================== Assembly Syntax: Rd32=vraddh(Rss32,Rtt32) C Intrinsic Prototype: Word32 Q6_R_vraddh_PP(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_vraddh_PP __builtin_HEXAGON_M2_vraddh /* ========================================================================== Assembly Syntax: Rd32=vradduh(Rss32,Rtt32) C Intrinsic Prototype: Word32 Q6_R_vradduh_PP(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_vradduh_PP __builtin_HEXAGON_M2_vradduh /* ========================================================================== Assembly Syntax: Rxx32+=vrcmpyi(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_vrcmpyiacc_PP(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vrcmpyiacc_PP __builtin_HEXAGON_M2_vrcmaci_s0 /* ========================================================================== Assembly Syntax: Rxx32+=vrcmpyi(Rss32,Rtt32*) C Intrinsic Prototype: Word64 Q6_P_vrcmpyiacc_PP_conj(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vrcmpyiacc_PP_conj __builtin_HEXAGON_M2_vrcmaci_s0c /* ========================================================================== Assembly Syntax: Rxx32+=vrcmpyr(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_vrcmpyracc_PP(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vrcmpyracc_PP __builtin_HEXAGON_M2_vrcmacr_s0 /* ========================================================================== Assembly Syntax: Rxx32+=vrcmpyr(Rss32,Rtt32*) C Intrinsic Prototype: Word64 Q6_P_vrcmpyracc_PP_conj(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vrcmpyracc_PP_conj __builtin_HEXAGON_M2_vrcmacr_s0c /* ========================================================================== Assembly Syntax: Rdd32=vrcmpyi(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_vrcmpyi_PP(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vrcmpyi_PP __builtin_HEXAGON_M2_vrcmpyi_s0 /* ========================================================================== Assembly Syntax: Rdd32=vrcmpyi(Rss32,Rtt32*) C Intrinsic Prototype: Word64 Q6_P_vrcmpyi_PP_conj(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vrcmpyi_PP_conj __builtin_HEXAGON_M2_vrcmpyi_s0c /* ========================================================================== Assembly Syntax: Rdd32=vrcmpyr(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_vrcmpyr_PP(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vrcmpyr_PP __builtin_HEXAGON_M2_vrcmpyr_s0 /* ========================================================================== Assembly Syntax: Rdd32=vrcmpyr(Rss32,Rtt32*) C Intrinsic Prototype: Word64 Q6_P_vrcmpyr_PP_conj(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vrcmpyr_PP_conj __builtin_HEXAGON_M2_vrcmpyr_s0c /* ========================================================================== Assembly Syntax: Rxx32+=vrcmpys(Rss32,Rt32):<<1:sat C Intrinsic Prototype: Word64 Q6_P_vrcmpysacc_PR_s1_sat(Word64 Rxx, Word64 Rss, Word32 Rt) Instruction Type: M Execution Slots: SLOT0123 ========================================================================== */ #define Q6_P_vrcmpysacc_PR_s1_sat __builtin_HEXAGON_M2_vrcmpys_acc_s1 /* ========================================================================== Assembly Syntax: Rdd32=vrcmpys(Rss32,Rt32):<<1:sat C Intrinsic Prototype: Word64 Q6_P_vrcmpys_PR_s1_sat(Word64 Rss, Word32 Rt) Instruction Type: M Execution Slots: SLOT0123 ========================================================================== */ #define Q6_P_vrcmpys_PR_s1_sat __builtin_HEXAGON_M2_vrcmpys_s1 /* ========================================================================== Assembly Syntax: Rd32=vrcmpys(Rss32,Rt32):<<1:rnd:sat C Intrinsic Prototype: Word32 Q6_R_vrcmpys_PR_s1_rnd_sat(Word64 Rss, Word32 Rt) Instruction Type: M Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_vrcmpys_PR_s1_rnd_sat __builtin_HEXAGON_M2_vrcmpys_s1rp /* ========================================================================== Assembly Syntax: Rxx32+=vrmpyh(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_vrmpyhacc_PP(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vrmpyhacc_PP __builtin_HEXAGON_M2_vrmac_s0 /* ========================================================================== Assembly Syntax: Rdd32=vrmpyh(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_vrmpyh_PP(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vrmpyh_PP __builtin_HEXAGON_M2_vrmpy_s0 /* ========================================================================== Assembly Syntax: Rx32^=xor(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_xorxacc_RR(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_xorxacc_RR __builtin_HEXAGON_M2_xor_xacc /* ========================================================================== Assembly Syntax: Rx32&=and(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_andand_RR(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_andand_RR __builtin_HEXAGON_M4_and_and /* ========================================================================== Assembly Syntax: Rx32&=and(Rs32,~Rt32) C Intrinsic Prototype: Word32 Q6_R_andand_RnR(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_andand_RnR __builtin_HEXAGON_M4_and_andn /* ========================================================================== Assembly Syntax: Rx32&=or(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_orand_RR(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_orand_RR __builtin_HEXAGON_M4_and_or /* ========================================================================== Assembly Syntax: Rx32&=xor(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_xorand_RR(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_xorand_RR __builtin_HEXAGON_M4_and_xor /* ========================================================================== Assembly Syntax: Rd32=cmpyiwh(Rss32,Rt32):<<1:rnd:sat C Intrinsic Prototype: Word32 Q6_R_cmpyiwh_PR_s1_rnd_sat(Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_cmpyiwh_PR_s1_rnd_sat __builtin_HEXAGON_M4_cmpyi_wh /* ========================================================================== Assembly Syntax: Rd32=cmpyiwh(Rss32,Rt32*):<<1:rnd:sat C Intrinsic Prototype: Word32 Q6_R_cmpyiwh_PR_conj_s1_rnd_sat(Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_cmpyiwh_PR_conj_s1_rnd_sat __builtin_HEXAGON_M4_cmpyi_whc /* ========================================================================== Assembly Syntax: Rd32=cmpyrwh(Rss32,Rt32):<<1:rnd:sat C Intrinsic Prototype: Word32 Q6_R_cmpyrwh_PR_s1_rnd_sat(Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_cmpyrwh_PR_s1_rnd_sat __builtin_HEXAGON_M4_cmpyr_wh /* ========================================================================== Assembly Syntax: Rd32=cmpyrwh(Rss32,Rt32*):<<1:rnd:sat C Intrinsic Prototype: Word32 Q6_R_cmpyrwh_PR_conj_s1_rnd_sat(Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_cmpyrwh_PR_conj_s1_rnd_sat __builtin_HEXAGON_M4_cmpyr_whc /* ========================================================================== Assembly Syntax: Rx32+=mpy(Rs32,Rt32):<<1:sat C Intrinsic Prototype: Word32 Q6_R_mpyacc_RR_s1_sat(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyacc_RR_s1_sat __builtin_HEXAGON_M4_mac_up_s1_sat /* ========================================================================== Assembly Syntax: Rd32=add(#u6,mpyi(Rs32,#U6)) C Intrinsic Prototype: Word32 Q6_R_add_mpyi_IRI(Word32 Iu6, Word32 Rs, Word32 IU6) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_add_mpyi_IRI __builtin_HEXAGON_M4_mpyri_addi /* ========================================================================== Assembly Syntax: Rd32=add(Ru32,mpyi(Rs32,#u6)) C Intrinsic Prototype: Word32 Q6_R_add_mpyi_RRI(Word32 Ru, Word32 Rs, Word32 Iu6) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_add_mpyi_RRI __builtin_HEXAGON_M4_mpyri_addr /* ========================================================================== Assembly Syntax: Rd32=add(Ru32,mpyi(#u6:2,Rs32)) C Intrinsic Prototype: Word32 Q6_R_add_mpyi_RIR(Word32 Ru, Word32 Iu6_2, Word32 Rs) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_add_mpyi_RIR __builtin_HEXAGON_M4_mpyri_addr_u2 /* ========================================================================== Assembly Syntax: Rd32=add(#u6,mpyi(Rs32,Rt32)) C Intrinsic Prototype: Word32 Q6_R_add_mpyi_IRR(Word32 Iu6, Word32 Rs, Word32 Rt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_add_mpyi_IRR __builtin_HEXAGON_M4_mpyrr_addi /* ========================================================================== Assembly Syntax: Ry32=add(Ru32,mpyi(Ry32,Rs32)) C Intrinsic Prototype: Word32 Q6_R_add_mpyi_RRR(Word32 Ru, Word32 Ry, Word32 Rs) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_add_mpyi_RRR __builtin_HEXAGON_M4_mpyrr_addr /* ========================================================================== Assembly Syntax: Rx32-=mpy(Rs32,Rt32):<<1:sat C Intrinsic Prototype: Word32 Q6_R_mpynac_RR_s1_sat(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpynac_RR_s1_sat __builtin_HEXAGON_M4_nac_up_s1_sat /* ========================================================================== Assembly Syntax: Rx32|=and(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_andor_RR(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_andor_RR __builtin_HEXAGON_M4_or_and /* ========================================================================== Assembly Syntax: Rx32|=and(Rs32,~Rt32) C Intrinsic Prototype: Word32 Q6_R_andor_RnR(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_andor_RnR __builtin_HEXAGON_M4_or_andn /* ========================================================================== Assembly Syntax: Rx32|=or(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_oror_RR(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_oror_RR __builtin_HEXAGON_M4_or_or /* ========================================================================== Assembly Syntax: Rx32|=xor(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_xoror_RR(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_xoror_RR __builtin_HEXAGON_M4_or_xor /* ========================================================================== Assembly Syntax: Rdd32=pmpyw(Rs32,Rt32) C Intrinsic Prototype: Word64 Q6_P_pmpyw_RR(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_pmpyw_RR __builtin_HEXAGON_M4_pmpyw /* ========================================================================== Assembly Syntax: Rxx32^=pmpyw(Rs32,Rt32) C Intrinsic Prototype: Word64 Q6_P_pmpywxacc_RR(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_pmpywxacc_RR __builtin_HEXAGON_M4_pmpyw_acc /* ========================================================================== Assembly Syntax: Rdd32=vpmpyh(Rs32,Rt32) C Intrinsic Prototype: Word64 Q6_P_vpmpyh_RR(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vpmpyh_RR __builtin_HEXAGON_M4_vpmpyh /* ========================================================================== Assembly Syntax: Rxx32^=vpmpyh(Rs32,Rt32) C Intrinsic Prototype: Word64 Q6_P_vpmpyhxacc_RR(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vpmpyhxacc_RR __builtin_HEXAGON_M4_vpmpyh_acc /* ========================================================================== Assembly Syntax: Rxx32+=vrmpyweh(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_vrmpywehacc_PP(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vrmpywehacc_PP __builtin_HEXAGON_M4_vrmpyeh_acc_s0 /* ========================================================================== Assembly Syntax: Rxx32+=vrmpyweh(Rss32,Rtt32):<<1 C Intrinsic Prototype: Word64 Q6_P_vrmpywehacc_PP_s1(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vrmpywehacc_PP_s1 __builtin_HEXAGON_M4_vrmpyeh_acc_s1 /* ========================================================================== Assembly Syntax: Rdd32=vrmpyweh(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_vrmpyweh_PP(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vrmpyweh_PP __builtin_HEXAGON_M4_vrmpyeh_s0 /* ========================================================================== Assembly Syntax: Rdd32=vrmpyweh(Rss32,Rtt32):<<1 C Intrinsic Prototype: Word64 Q6_P_vrmpyweh_PP_s1(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vrmpyweh_PP_s1 __builtin_HEXAGON_M4_vrmpyeh_s1 /* ========================================================================== Assembly Syntax: Rxx32+=vrmpywoh(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_vrmpywohacc_PP(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vrmpywohacc_PP __builtin_HEXAGON_M4_vrmpyoh_acc_s0 /* ========================================================================== Assembly Syntax: Rxx32+=vrmpywoh(Rss32,Rtt32):<<1 C Intrinsic Prototype: Word64 Q6_P_vrmpywohacc_PP_s1(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vrmpywohacc_PP_s1 __builtin_HEXAGON_M4_vrmpyoh_acc_s1 /* ========================================================================== Assembly Syntax: Rdd32=vrmpywoh(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_vrmpywoh_PP(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vrmpywoh_PP __builtin_HEXAGON_M4_vrmpyoh_s0 /* ========================================================================== Assembly Syntax: Rdd32=vrmpywoh(Rss32,Rtt32):<<1 C Intrinsic Prototype: Word64 Q6_P_vrmpywoh_PP_s1(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vrmpywoh_PP_s1 __builtin_HEXAGON_M4_vrmpyoh_s1 /* ========================================================================== Assembly Syntax: Rx32^=and(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_andxacc_RR(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_andxacc_RR __builtin_HEXAGON_M4_xor_and /* ========================================================================== Assembly Syntax: Rx32^=and(Rs32,~Rt32) C Intrinsic Prototype: Word32 Q6_R_andxacc_RnR(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_andxacc_RnR __builtin_HEXAGON_M4_xor_andn /* ========================================================================== Assembly Syntax: Rx32^=or(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_orxacc_RR(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_orxacc_RR __builtin_HEXAGON_M4_xor_or /* ========================================================================== Assembly Syntax: Rxx32^=xor(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_xorxacc_PP(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_xorxacc_PP __builtin_HEXAGON_M4_xor_xacc /* ========================================================================== Assembly Syntax: Rxx32+=vdmpybsu(Rss32,Rtt32):sat C Intrinsic Prototype: Word64 Q6_P_vdmpybsuacc_PP_sat(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vdmpybsuacc_PP_sat __builtin_HEXAGON_M5_vdmacbsu /* ========================================================================== Assembly Syntax: Rdd32=vdmpybsu(Rss32,Rtt32):sat C Intrinsic Prototype: Word64 Q6_P_vdmpybsu_PP_sat(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vdmpybsu_PP_sat __builtin_HEXAGON_M5_vdmpybsu /* ========================================================================== Assembly Syntax: Rxx32+=vmpybsu(Rs32,Rt32) C Intrinsic Prototype: Word64 Q6_P_vmpybsuacc_RR(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpybsuacc_RR __builtin_HEXAGON_M5_vmacbsu /* ========================================================================== Assembly Syntax: Rxx32+=vmpybu(Rs32,Rt32) C Intrinsic Prototype: Word64 Q6_P_vmpybuacc_RR(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpybuacc_RR __builtin_HEXAGON_M5_vmacbuu /* ========================================================================== Assembly Syntax: Rdd32=vmpybsu(Rs32,Rt32) C Intrinsic Prototype: Word64 Q6_P_vmpybsu_RR(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpybsu_RR __builtin_HEXAGON_M5_vmpybsu /* ========================================================================== Assembly Syntax: Rdd32=vmpybu(Rs32,Rt32) C Intrinsic Prototype: Word64 Q6_P_vmpybu_RR(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpybu_RR __builtin_HEXAGON_M5_vmpybuu /* ========================================================================== Assembly Syntax: Rxx32+=vrmpybsu(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_vrmpybsuacc_PP(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vrmpybsuacc_PP __builtin_HEXAGON_M5_vrmacbsu /* ========================================================================== Assembly Syntax: Rxx32+=vrmpybu(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_vrmpybuacc_PP(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vrmpybuacc_PP __builtin_HEXAGON_M5_vrmacbuu /* ========================================================================== Assembly Syntax: Rdd32=vrmpybsu(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_vrmpybsu_PP(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vrmpybsu_PP __builtin_HEXAGON_M5_vrmpybsu /* ========================================================================== Assembly Syntax: Rdd32=vrmpybu(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_vrmpybu_PP(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vrmpybu_PP __builtin_HEXAGON_M5_vrmpybuu /* ========================================================================== Assembly Syntax: Rd32=addasl(Rt32,Rs32,#u3) C Intrinsic Prototype: Word32 Q6_R_addasl_RRI(Word32 Rt, Word32 Rs, Word32 Iu3) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_addasl_RRI __builtin_HEXAGON_S2_addasl_rrri /* ========================================================================== Assembly Syntax: Rdd32=asl(Rss32,#u6) C Intrinsic Prototype: Word64 Q6_P_asl_PI(Word64 Rss, Word32 Iu6) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_asl_PI __builtin_HEXAGON_S2_asl_i_p /* ========================================================================== Assembly Syntax: Rxx32+=asl(Rss32,#u6) C Intrinsic Prototype: Word64 Q6_P_aslacc_PI(Word64 Rxx, Word64 Rss, Word32 Iu6) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_aslacc_PI __builtin_HEXAGON_S2_asl_i_p_acc /* ========================================================================== Assembly Syntax: Rxx32&=asl(Rss32,#u6) C Intrinsic Prototype: Word64 Q6_P_asland_PI(Word64 Rxx, Word64 Rss, Word32 Iu6) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_asland_PI __builtin_HEXAGON_S2_asl_i_p_and /* ========================================================================== Assembly Syntax: Rxx32-=asl(Rss32,#u6) C Intrinsic Prototype: Word64 Q6_P_aslnac_PI(Word64 Rxx, Word64 Rss, Word32 Iu6) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_aslnac_PI __builtin_HEXAGON_S2_asl_i_p_nac /* ========================================================================== Assembly Syntax: Rxx32|=asl(Rss32,#u6) C Intrinsic Prototype: Word64 Q6_P_aslor_PI(Word64 Rxx, Word64 Rss, Word32 Iu6) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_aslor_PI __builtin_HEXAGON_S2_asl_i_p_or /* ========================================================================== Assembly Syntax: Rxx32^=asl(Rss32,#u6) C Intrinsic Prototype: Word64 Q6_P_aslxacc_PI(Word64 Rxx, Word64 Rss, Word32 Iu6) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_aslxacc_PI __builtin_HEXAGON_S2_asl_i_p_xacc /* ========================================================================== Assembly Syntax: Rd32=asl(Rs32,#u5) C Intrinsic Prototype: Word32 Q6_R_asl_RI(Word32 Rs, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_asl_RI __builtin_HEXAGON_S2_asl_i_r /* ========================================================================== Assembly Syntax: Rx32+=asl(Rs32,#u5) C Intrinsic Prototype: Word32 Q6_R_aslacc_RI(Word32 Rx, Word32 Rs, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_aslacc_RI __builtin_HEXAGON_S2_asl_i_r_acc /* ========================================================================== Assembly Syntax: Rx32&=asl(Rs32,#u5) C Intrinsic Prototype: Word32 Q6_R_asland_RI(Word32 Rx, Word32 Rs, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_asland_RI __builtin_HEXAGON_S2_asl_i_r_and /* ========================================================================== Assembly Syntax: Rx32-=asl(Rs32,#u5) C Intrinsic Prototype: Word32 Q6_R_aslnac_RI(Word32 Rx, Word32 Rs, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_aslnac_RI __builtin_HEXAGON_S2_asl_i_r_nac /* ========================================================================== Assembly Syntax: Rx32|=asl(Rs32,#u5) C Intrinsic Prototype: Word32 Q6_R_aslor_RI(Word32 Rx, Word32 Rs, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_aslor_RI __builtin_HEXAGON_S2_asl_i_r_or /* ========================================================================== Assembly Syntax: Rd32=asl(Rs32,#u5):sat C Intrinsic Prototype: Word32 Q6_R_asl_RI_sat(Word32 Rs, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_asl_RI_sat __builtin_HEXAGON_S2_asl_i_r_sat /* ========================================================================== Assembly Syntax: Rx32^=asl(Rs32,#u5) C Intrinsic Prototype: Word32 Q6_R_aslxacc_RI(Word32 Rx, Word32 Rs, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_aslxacc_RI __builtin_HEXAGON_S2_asl_i_r_xacc /* ========================================================================== Assembly Syntax: Rdd32=vaslh(Rss32,#u4) C Intrinsic Prototype: Word64 Q6_P_vaslh_PI(Word64 Rss, Word32 Iu4) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vaslh_PI __builtin_HEXAGON_S2_asl_i_vh /* ========================================================================== Assembly Syntax: Rdd32=vaslw(Rss32,#u5) C Intrinsic Prototype: Word64 Q6_P_vaslw_PI(Word64 Rss, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vaslw_PI __builtin_HEXAGON_S2_asl_i_vw /* ========================================================================== Assembly Syntax: Rdd32=asl(Rss32,Rt32) C Intrinsic Prototype: Word64 Q6_P_asl_PR(Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_asl_PR __builtin_HEXAGON_S2_asl_r_p /* ========================================================================== Assembly Syntax: Rxx32+=asl(Rss32,Rt32) C Intrinsic Prototype: Word64 Q6_P_aslacc_PR(Word64 Rxx, Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_aslacc_PR __builtin_HEXAGON_S2_asl_r_p_acc /* ========================================================================== Assembly Syntax: Rxx32&=asl(Rss32,Rt32) C Intrinsic Prototype: Word64 Q6_P_asland_PR(Word64 Rxx, Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_asland_PR __builtin_HEXAGON_S2_asl_r_p_and /* ========================================================================== Assembly Syntax: Rxx32-=asl(Rss32,Rt32) C Intrinsic Prototype: Word64 Q6_P_aslnac_PR(Word64 Rxx, Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_aslnac_PR __builtin_HEXAGON_S2_asl_r_p_nac /* ========================================================================== Assembly Syntax: Rxx32|=asl(Rss32,Rt32) C Intrinsic Prototype: Word64 Q6_P_aslor_PR(Word64 Rxx, Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_aslor_PR __builtin_HEXAGON_S2_asl_r_p_or /* ========================================================================== Assembly Syntax: Rxx32^=asl(Rss32,Rt32) C Intrinsic Prototype: Word64 Q6_P_aslxacc_PR(Word64 Rxx, Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_aslxacc_PR __builtin_HEXAGON_S2_asl_r_p_xor /* ========================================================================== Assembly Syntax: Rd32=asl(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_asl_RR(Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_asl_RR __builtin_HEXAGON_S2_asl_r_r /* ========================================================================== Assembly Syntax: Rx32+=asl(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_aslacc_RR(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_aslacc_RR __builtin_HEXAGON_S2_asl_r_r_acc /* ========================================================================== Assembly Syntax: Rx32&=asl(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_asland_RR(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_asland_RR __builtin_HEXAGON_S2_asl_r_r_and /* ========================================================================== Assembly Syntax: Rx32-=asl(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_aslnac_RR(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_aslnac_RR __builtin_HEXAGON_S2_asl_r_r_nac /* ========================================================================== Assembly Syntax: Rx32|=asl(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_aslor_RR(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_aslor_RR __builtin_HEXAGON_S2_asl_r_r_or /* ========================================================================== Assembly Syntax: Rd32=asl(Rs32,Rt32):sat C Intrinsic Prototype: Word32 Q6_R_asl_RR_sat(Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_asl_RR_sat __builtin_HEXAGON_S2_asl_r_r_sat /* ========================================================================== Assembly Syntax: Rdd32=vaslh(Rss32,Rt32) C Intrinsic Prototype: Word64 Q6_P_vaslh_PR(Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vaslh_PR __builtin_HEXAGON_S2_asl_r_vh /* ========================================================================== Assembly Syntax: Rdd32=vaslw(Rss32,Rt32) C Intrinsic Prototype: Word64 Q6_P_vaslw_PR(Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vaslw_PR __builtin_HEXAGON_S2_asl_r_vw /* ========================================================================== Assembly Syntax: Rdd32=asr(Rss32,#u6) C Intrinsic Prototype: Word64 Q6_P_asr_PI(Word64 Rss, Word32 Iu6) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_asr_PI __builtin_HEXAGON_S2_asr_i_p /* ========================================================================== Assembly Syntax: Rxx32+=asr(Rss32,#u6) C Intrinsic Prototype: Word64 Q6_P_asracc_PI(Word64 Rxx, Word64 Rss, Word32 Iu6) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_asracc_PI __builtin_HEXAGON_S2_asr_i_p_acc /* ========================================================================== Assembly Syntax: Rxx32&=asr(Rss32,#u6) C Intrinsic Prototype: Word64 Q6_P_asrand_PI(Word64 Rxx, Word64 Rss, Word32 Iu6) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_asrand_PI __builtin_HEXAGON_S2_asr_i_p_and /* ========================================================================== Assembly Syntax: Rxx32-=asr(Rss32,#u6) C Intrinsic Prototype: Word64 Q6_P_asrnac_PI(Word64 Rxx, Word64 Rss, Word32 Iu6) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_asrnac_PI __builtin_HEXAGON_S2_asr_i_p_nac /* ========================================================================== Assembly Syntax: Rxx32|=asr(Rss32,#u6) C Intrinsic Prototype: Word64 Q6_P_asror_PI(Word64 Rxx, Word64 Rss, Word32 Iu6) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_asror_PI __builtin_HEXAGON_S2_asr_i_p_or /* ========================================================================== Assembly Syntax: Rdd32=asr(Rss32,#u6):rnd C Intrinsic Prototype: Word64 Q6_P_asr_PI_rnd(Word64 Rss, Word32 Iu6) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_asr_PI_rnd __builtin_HEXAGON_S2_asr_i_p_rnd /* ========================================================================== Assembly Syntax: Rdd32=asrrnd(Rss32,#u6) C Intrinsic Prototype: Word64 Q6_P_asrrnd_PI(Word64 Rss, Word32 Iu6) Instruction Type: S_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_P_asrrnd_PI __builtin_HEXAGON_S2_asr_i_p_rnd_goodsyntax /* ========================================================================== Assembly Syntax: Rd32=asr(Rs32,#u5) C Intrinsic Prototype: Word32 Q6_R_asr_RI(Word32 Rs, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_asr_RI __builtin_HEXAGON_S2_asr_i_r /* ========================================================================== Assembly Syntax: Rx32+=asr(Rs32,#u5) C Intrinsic Prototype: Word32 Q6_R_asracc_RI(Word32 Rx, Word32 Rs, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_asracc_RI __builtin_HEXAGON_S2_asr_i_r_acc /* ========================================================================== Assembly Syntax: Rx32&=asr(Rs32,#u5) C Intrinsic Prototype: Word32 Q6_R_asrand_RI(Word32 Rx, Word32 Rs, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_asrand_RI __builtin_HEXAGON_S2_asr_i_r_and /* ========================================================================== Assembly Syntax: Rx32-=asr(Rs32,#u5) C Intrinsic Prototype: Word32 Q6_R_asrnac_RI(Word32 Rx, Word32 Rs, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_asrnac_RI __builtin_HEXAGON_S2_asr_i_r_nac /* ========================================================================== Assembly Syntax: Rx32|=asr(Rs32,#u5) C Intrinsic Prototype: Word32 Q6_R_asror_RI(Word32 Rx, Word32 Rs, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_asror_RI __builtin_HEXAGON_S2_asr_i_r_or /* ========================================================================== Assembly Syntax: Rd32=asr(Rs32,#u5):rnd C Intrinsic Prototype: Word32 Q6_R_asr_RI_rnd(Word32 Rs, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_asr_RI_rnd __builtin_HEXAGON_S2_asr_i_r_rnd /* ========================================================================== Assembly Syntax: Rd32=asrrnd(Rs32,#u5) C Intrinsic Prototype: Word32 Q6_R_asrrnd_RI(Word32 Rs, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_asrrnd_RI __builtin_HEXAGON_S2_asr_i_r_rnd_goodsyntax /* ========================================================================== Assembly Syntax: Rd32=vasrw(Rss32,#u5) C Intrinsic Prototype: Word32 Q6_R_vasrw_PI(Word64 Rss, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_vasrw_PI __builtin_HEXAGON_S2_asr_i_svw_trun /* ========================================================================== Assembly Syntax: Rdd32=vasrh(Rss32,#u4) C Intrinsic Prototype: Word64 Q6_P_vasrh_PI(Word64 Rss, Word32 Iu4) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vasrh_PI __builtin_HEXAGON_S2_asr_i_vh /* ========================================================================== Assembly Syntax: Rdd32=vasrw(Rss32,#u5) C Intrinsic Prototype: Word64 Q6_P_vasrw_PI(Word64 Rss, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vasrw_PI __builtin_HEXAGON_S2_asr_i_vw /* ========================================================================== Assembly Syntax: Rdd32=asr(Rss32,Rt32) C Intrinsic Prototype: Word64 Q6_P_asr_PR(Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_asr_PR __builtin_HEXAGON_S2_asr_r_p /* ========================================================================== Assembly Syntax: Rxx32+=asr(Rss32,Rt32) C Intrinsic Prototype: Word64 Q6_P_asracc_PR(Word64 Rxx, Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_asracc_PR __builtin_HEXAGON_S2_asr_r_p_acc /* ========================================================================== Assembly Syntax: Rxx32&=asr(Rss32,Rt32) C Intrinsic Prototype: Word64 Q6_P_asrand_PR(Word64 Rxx, Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_asrand_PR __builtin_HEXAGON_S2_asr_r_p_and /* ========================================================================== Assembly Syntax: Rxx32-=asr(Rss32,Rt32) C Intrinsic Prototype: Word64 Q6_P_asrnac_PR(Word64 Rxx, Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_asrnac_PR __builtin_HEXAGON_S2_asr_r_p_nac /* ========================================================================== Assembly Syntax: Rxx32|=asr(Rss32,Rt32) C Intrinsic Prototype: Word64 Q6_P_asror_PR(Word64 Rxx, Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_asror_PR __builtin_HEXAGON_S2_asr_r_p_or /* ========================================================================== Assembly Syntax: Rxx32^=asr(Rss32,Rt32) C Intrinsic Prototype: Word64 Q6_P_asrxacc_PR(Word64 Rxx, Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_asrxacc_PR __builtin_HEXAGON_S2_asr_r_p_xor /* ========================================================================== Assembly Syntax: Rd32=asr(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_asr_RR(Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_asr_RR __builtin_HEXAGON_S2_asr_r_r /* ========================================================================== Assembly Syntax: Rx32+=asr(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_asracc_RR(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_asracc_RR __builtin_HEXAGON_S2_asr_r_r_acc /* ========================================================================== Assembly Syntax: Rx32&=asr(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_asrand_RR(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_asrand_RR __builtin_HEXAGON_S2_asr_r_r_and /* ========================================================================== Assembly Syntax: Rx32-=asr(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_asrnac_RR(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_asrnac_RR __builtin_HEXAGON_S2_asr_r_r_nac /* ========================================================================== Assembly Syntax: Rx32|=asr(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_asror_RR(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_asror_RR __builtin_HEXAGON_S2_asr_r_r_or /* ========================================================================== Assembly Syntax: Rd32=asr(Rs32,Rt32):sat C Intrinsic Prototype: Word32 Q6_R_asr_RR_sat(Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_asr_RR_sat __builtin_HEXAGON_S2_asr_r_r_sat /* ========================================================================== Assembly Syntax: Rd32=vasrw(Rss32,Rt32) C Intrinsic Prototype: Word32 Q6_R_vasrw_PR(Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_vasrw_PR __builtin_HEXAGON_S2_asr_r_svw_trun /* ========================================================================== Assembly Syntax: Rdd32=vasrh(Rss32,Rt32) C Intrinsic Prototype: Word64 Q6_P_vasrh_PR(Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vasrh_PR __builtin_HEXAGON_S2_asr_r_vh /* ========================================================================== Assembly Syntax: Rdd32=vasrw(Rss32,Rt32) C Intrinsic Prototype: Word64 Q6_P_vasrw_PR(Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vasrw_PR __builtin_HEXAGON_S2_asr_r_vw /* ========================================================================== Assembly Syntax: Rd32=brev(Rs32) C Intrinsic Prototype: Word32 Q6_R_brev_R(Word32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_brev_R __builtin_HEXAGON_S2_brev /* ========================================================================== Assembly Syntax: Rdd32=brev(Rss32) C Intrinsic Prototype: Word64 Q6_P_brev_P(Word64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_brev_P __builtin_HEXAGON_S2_brevp /* ========================================================================== Assembly Syntax: Rd32=cl0(Rs32) C Intrinsic Prototype: Word32 Q6_R_cl0_R(Word32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_cl0_R __builtin_HEXAGON_S2_cl0 /* ========================================================================== Assembly Syntax: Rd32=cl0(Rss32) C Intrinsic Prototype: Word32 Q6_R_cl0_P(Word64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_cl0_P __builtin_HEXAGON_S2_cl0p /* ========================================================================== Assembly Syntax: Rd32=cl1(Rs32) C Intrinsic Prototype: Word32 Q6_R_cl1_R(Word32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_cl1_R __builtin_HEXAGON_S2_cl1 /* ========================================================================== Assembly Syntax: Rd32=cl1(Rss32) C Intrinsic Prototype: Word32 Q6_R_cl1_P(Word64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_cl1_P __builtin_HEXAGON_S2_cl1p /* ========================================================================== Assembly Syntax: Rd32=clb(Rs32) C Intrinsic Prototype: Word32 Q6_R_clb_R(Word32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_clb_R __builtin_HEXAGON_S2_clb /* ========================================================================== Assembly Syntax: Rd32=normamt(Rs32) C Intrinsic Prototype: Word32 Q6_R_normamt_R(Word32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_normamt_R __builtin_HEXAGON_S2_clbnorm /* ========================================================================== Assembly Syntax: Rd32=clb(Rss32) C Intrinsic Prototype: Word32 Q6_R_clb_P(Word64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_clb_P __builtin_HEXAGON_S2_clbp /* ========================================================================== Assembly Syntax: Rd32=clrbit(Rs32,#u5) C Intrinsic Prototype: Word32 Q6_R_clrbit_RI(Word32 Rs, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_clrbit_RI __builtin_HEXAGON_S2_clrbit_i /* ========================================================================== Assembly Syntax: Rd32=clrbit(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_clrbit_RR(Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_clrbit_RR __builtin_HEXAGON_S2_clrbit_r /* ========================================================================== Assembly Syntax: Rd32=ct0(Rs32) C Intrinsic Prototype: Word32 Q6_R_ct0_R(Word32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_ct0_R __builtin_HEXAGON_S2_ct0 /* ========================================================================== Assembly Syntax: Rd32=ct0(Rss32) C Intrinsic Prototype: Word32 Q6_R_ct0_P(Word64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_ct0_P __builtin_HEXAGON_S2_ct0p /* ========================================================================== Assembly Syntax: Rd32=ct1(Rs32) C Intrinsic Prototype: Word32 Q6_R_ct1_R(Word32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_ct1_R __builtin_HEXAGON_S2_ct1 /* ========================================================================== Assembly Syntax: Rd32=ct1(Rss32) C Intrinsic Prototype: Word32 Q6_R_ct1_P(Word64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_ct1_P __builtin_HEXAGON_S2_ct1p /* ========================================================================== Assembly Syntax: Rdd32=deinterleave(Rss32) C Intrinsic Prototype: Word64 Q6_P_deinterleave_P(Word64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_deinterleave_P __builtin_HEXAGON_S2_deinterleave /* ========================================================================== Assembly Syntax: Rd32=extractu(Rs32,#u5,#U5) C Intrinsic Prototype: Word32 Q6_R_extractu_RII(Word32 Rs, Word32 Iu5, Word32 IU5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_extractu_RII __builtin_HEXAGON_S2_extractu /* ========================================================================== Assembly Syntax: Rd32=extractu(Rs32,Rtt32) C Intrinsic Prototype: Word32 Q6_R_extractu_RP(Word32 Rs, Word64 Rtt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_extractu_RP __builtin_HEXAGON_S2_extractu_rp /* ========================================================================== Assembly Syntax: Rdd32=extractu(Rss32,#u6,#U6) C Intrinsic Prototype: Word64 Q6_P_extractu_PII(Word64 Rss, Word32 Iu6, Word32 IU6) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_extractu_PII __builtin_HEXAGON_S2_extractup /* ========================================================================== Assembly Syntax: Rdd32=extractu(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_extractu_PP(Word64 Rss, Word64 Rtt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_extractu_PP __builtin_HEXAGON_S2_extractup_rp /* ========================================================================== Assembly Syntax: Rx32=insert(Rs32,#u5,#U5) C Intrinsic Prototype: Word32 Q6_R_insert_RII(Word32 Rx, Word32 Rs, Word32 Iu5, Word32 IU5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_insert_RII __builtin_HEXAGON_S2_insert /* ========================================================================== Assembly Syntax: Rx32=insert(Rs32,Rtt32) C Intrinsic Prototype: Word32 Q6_R_insert_RP(Word32 Rx, Word32 Rs, Word64 Rtt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_insert_RP __builtin_HEXAGON_S2_insert_rp /* ========================================================================== Assembly Syntax: Rxx32=insert(Rss32,#u6,#U6) C Intrinsic Prototype: Word64 Q6_P_insert_PII(Word64 Rxx, Word64 Rss, Word32 Iu6, Word32 IU6) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_insert_PII __builtin_HEXAGON_S2_insertp /* ========================================================================== Assembly Syntax: Rxx32=insert(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_insert_PP(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_insert_PP __builtin_HEXAGON_S2_insertp_rp /* ========================================================================== Assembly Syntax: Rdd32=interleave(Rss32) C Intrinsic Prototype: Word64 Q6_P_interleave_P(Word64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_interleave_P __builtin_HEXAGON_S2_interleave /* ========================================================================== Assembly Syntax: Rdd32=lfs(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_lfs_PP(Word64 Rss, Word64 Rtt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_lfs_PP __builtin_HEXAGON_S2_lfsp /* ========================================================================== Assembly Syntax: Rdd32=lsl(Rss32,Rt32) C Intrinsic Prototype: Word64 Q6_P_lsl_PR(Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_lsl_PR __builtin_HEXAGON_S2_lsl_r_p /* ========================================================================== Assembly Syntax: Rxx32+=lsl(Rss32,Rt32) C Intrinsic Prototype: Word64 Q6_P_lslacc_PR(Word64 Rxx, Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_lslacc_PR __builtin_HEXAGON_S2_lsl_r_p_acc /* ========================================================================== Assembly Syntax: Rxx32&=lsl(Rss32,Rt32) C Intrinsic Prototype: Word64 Q6_P_lsland_PR(Word64 Rxx, Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_lsland_PR __builtin_HEXAGON_S2_lsl_r_p_and /* ========================================================================== Assembly Syntax: Rxx32-=lsl(Rss32,Rt32) C Intrinsic Prototype: Word64 Q6_P_lslnac_PR(Word64 Rxx, Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_lslnac_PR __builtin_HEXAGON_S2_lsl_r_p_nac /* ========================================================================== Assembly Syntax: Rxx32|=lsl(Rss32,Rt32) C Intrinsic Prototype: Word64 Q6_P_lslor_PR(Word64 Rxx, Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_lslor_PR __builtin_HEXAGON_S2_lsl_r_p_or /* ========================================================================== Assembly Syntax: Rxx32^=lsl(Rss32,Rt32) C Intrinsic Prototype: Word64 Q6_P_lslxacc_PR(Word64 Rxx, Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_lslxacc_PR __builtin_HEXAGON_S2_lsl_r_p_xor /* ========================================================================== Assembly Syntax: Rd32=lsl(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_lsl_RR(Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_lsl_RR __builtin_HEXAGON_S2_lsl_r_r /* ========================================================================== Assembly Syntax: Rx32+=lsl(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_lslacc_RR(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_lslacc_RR __builtin_HEXAGON_S2_lsl_r_r_acc /* ========================================================================== Assembly Syntax: Rx32&=lsl(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_lsland_RR(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_lsland_RR __builtin_HEXAGON_S2_lsl_r_r_and /* ========================================================================== Assembly Syntax: Rx32-=lsl(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_lslnac_RR(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_lslnac_RR __builtin_HEXAGON_S2_lsl_r_r_nac /* ========================================================================== Assembly Syntax: Rx32|=lsl(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_lslor_RR(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_lslor_RR __builtin_HEXAGON_S2_lsl_r_r_or /* ========================================================================== Assembly Syntax: Rdd32=vlslh(Rss32,Rt32) C Intrinsic Prototype: Word64 Q6_P_vlslh_PR(Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vlslh_PR __builtin_HEXAGON_S2_lsl_r_vh /* ========================================================================== Assembly Syntax: Rdd32=vlslw(Rss32,Rt32) C Intrinsic Prototype: Word64 Q6_P_vlslw_PR(Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vlslw_PR __builtin_HEXAGON_S2_lsl_r_vw /* ========================================================================== Assembly Syntax: Rdd32=lsr(Rss32,#u6) C Intrinsic Prototype: Word64 Q6_P_lsr_PI(Word64 Rss, Word32 Iu6) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_lsr_PI __builtin_HEXAGON_S2_lsr_i_p /* ========================================================================== Assembly Syntax: Rxx32+=lsr(Rss32,#u6) C Intrinsic Prototype: Word64 Q6_P_lsracc_PI(Word64 Rxx, Word64 Rss, Word32 Iu6) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_lsracc_PI __builtin_HEXAGON_S2_lsr_i_p_acc /* ========================================================================== Assembly Syntax: Rxx32&=lsr(Rss32,#u6) C Intrinsic Prototype: Word64 Q6_P_lsrand_PI(Word64 Rxx, Word64 Rss, Word32 Iu6) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_lsrand_PI __builtin_HEXAGON_S2_lsr_i_p_and /* ========================================================================== Assembly Syntax: Rxx32-=lsr(Rss32,#u6) C Intrinsic Prototype: Word64 Q6_P_lsrnac_PI(Word64 Rxx, Word64 Rss, Word32 Iu6) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_lsrnac_PI __builtin_HEXAGON_S2_lsr_i_p_nac /* ========================================================================== Assembly Syntax: Rxx32|=lsr(Rss32,#u6) C Intrinsic Prototype: Word64 Q6_P_lsror_PI(Word64 Rxx, Word64 Rss, Word32 Iu6) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_lsror_PI __builtin_HEXAGON_S2_lsr_i_p_or /* ========================================================================== Assembly Syntax: Rxx32^=lsr(Rss32,#u6) C Intrinsic Prototype: Word64 Q6_P_lsrxacc_PI(Word64 Rxx, Word64 Rss, Word32 Iu6) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_lsrxacc_PI __builtin_HEXAGON_S2_lsr_i_p_xacc /* ========================================================================== Assembly Syntax: Rd32=lsr(Rs32,#u5) C Intrinsic Prototype: Word32 Q6_R_lsr_RI(Word32 Rs, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_lsr_RI __builtin_HEXAGON_S2_lsr_i_r /* ========================================================================== Assembly Syntax: Rx32+=lsr(Rs32,#u5) C Intrinsic Prototype: Word32 Q6_R_lsracc_RI(Word32 Rx, Word32 Rs, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_lsracc_RI __builtin_HEXAGON_S2_lsr_i_r_acc /* ========================================================================== Assembly Syntax: Rx32&=lsr(Rs32,#u5) C Intrinsic Prototype: Word32 Q6_R_lsrand_RI(Word32 Rx, Word32 Rs, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_lsrand_RI __builtin_HEXAGON_S2_lsr_i_r_and /* ========================================================================== Assembly Syntax: Rx32-=lsr(Rs32,#u5) C Intrinsic Prototype: Word32 Q6_R_lsrnac_RI(Word32 Rx, Word32 Rs, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_lsrnac_RI __builtin_HEXAGON_S2_lsr_i_r_nac /* ========================================================================== Assembly Syntax: Rx32|=lsr(Rs32,#u5) C Intrinsic Prototype: Word32 Q6_R_lsror_RI(Word32 Rx, Word32 Rs, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_lsror_RI __builtin_HEXAGON_S2_lsr_i_r_or /* ========================================================================== Assembly Syntax: Rx32^=lsr(Rs32,#u5) C Intrinsic Prototype: Word32 Q6_R_lsrxacc_RI(Word32 Rx, Word32 Rs, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_lsrxacc_RI __builtin_HEXAGON_S2_lsr_i_r_xacc /* ========================================================================== Assembly Syntax: Rdd32=vlsrh(Rss32,#u4) C Intrinsic Prototype: Word64 Q6_P_vlsrh_PI(Word64 Rss, Word32 Iu4) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vlsrh_PI __builtin_HEXAGON_S2_lsr_i_vh /* ========================================================================== Assembly Syntax: Rdd32=vlsrw(Rss32,#u5) C Intrinsic Prototype: Word64 Q6_P_vlsrw_PI(Word64 Rss, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vlsrw_PI __builtin_HEXAGON_S2_lsr_i_vw /* ========================================================================== Assembly Syntax: Rdd32=lsr(Rss32,Rt32) C Intrinsic Prototype: Word64 Q6_P_lsr_PR(Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_lsr_PR __builtin_HEXAGON_S2_lsr_r_p /* ========================================================================== Assembly Syntax: Rxx32+=lsr(Rss32,Rt32) C Intrinsic Prototype: Word64 Q6_P_lsracc_PR(Word64 Rxx, Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_lsracc_PR __builtin_HEXAGON_S2_lsr_r_p_acc /* ========================================================================== Assembly Syntax: Rxx32&=lsr(Rss32,Rt32) C Intrinsic Prototype: Word64 Q6_P_lsrand_PR(Word64 Rxx, Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_lsrand_PR __builtin_HEXAGON_S2_lsr_r_p_and /* ========================================================================== Assembly Syntax: Rxx32-=lsr(Rss32,Rt32) C Intrinsic Prototype: Word64 Q6_P_lsrnac_PR(Word64 Rxx, Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_lsrnac_PR __builtin_HEXAGON_S2_lsr_r_p_nac /* ========================================================================== Assembly Syntax: Rxx32|=lsr(Rss32,Rt32) C Intrinsic Prototype: Word64 Q6_P_lsror_PR(Word64 Rxx, Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_lsror_PR __builtin_HEXAGON_S2_lsr_r_p_or /* ========================================================================== Assembly Syntax: Rxx32^=lsr(Rss32,Rt32) C Intrinsic Prototype: Word64 Q6_P_lsrxacc_PR(Word64 Rxx, Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_lsrxacc_PR __builtin_HEXAGON_S2_lsr_r_p_xor /* ========================================================================== Assembly Syntax: Rd32=lsr(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_lsr_RR(Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_lsr_RR __builtin_HEXAGON_S2_lsr_r_r /* ========================================================================== Assembly Syntax: Rx32+=lsr(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_lsracc_RR(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_lsracc_RR __builtin_HEXAGON_S2_lsr_r_r_acc /* ========================================================================== Assembly Syntax: Rx32&=lsr(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_lsrand_RR(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_lsrand_RR __builtin_HEXAGON_S2_lsr_r_r_and /* ========================================================================== Assembly Syntax: Rx32-=lsr(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_lsrnac_RR(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_lsrnac_RR __builtin_HEXAGON_S2_lsr_r_r_nac /* ========================================================================== Assembly Syntax: Rx32|=lsr(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_lsror_RR(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_lsror_RR __builtin_HEXAGON_S2_lsr_r_r_or /* ========================================================================== Assembly Syntax: Rdd32=vlsrh(Rss32,Rt32) C Intrinsic Prototype: Word64 Q6_P_vlsrh_PR(Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vlsrh_PR __builtin_HEXAGON_S2_lsr_r_vh /* ========================================================================== Assembly Syntax: Rdd32=vlsrw(Rss32,Rt32) C Intrinsic Prototype: Word64 Q6_P_vlsrw_PR(Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vlsrw_PR __builtin_HEXAGON_S2_lsr_r_vw /* ========================================================================== Assembly Syntax: Rdd32=packhl(Rs32,Rt32) C Intrinsic Prototype: Word64 Q6_P_packhl_RR(Word32 Rs, Word32 Rt) Instruction Type: ALU32_3op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_P_packhl_RR __builtin_HEXAGON_S2_packhl /* ========================================================================== Assembly Syntax: Rd32=parity(Rss32,Rtt32) C Intrinsic Prototype: Word32 Q6_R_parity_PP(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_parity_PP __builtin_HEXAGON_S2_parityp /* ========================================================================== Assembly Syntax: Rd32=setbit(Rs32,#u5) C Intrinsic Prototype: Word32 Q6_R_setbit_RI(Word32 Rs, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_setbit_RI __builtin_HEXAGON_S2_setbit_i /* ========================================================================== Assembly Syntax: Rd32=setbit(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_setbit_RR(Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_setbit_RR __builtin_HEXAGON_S2_setbit_r /* ========================================================================== Assembly Syntax: Rdd32=shuffeb(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_shuffeb_PP(Word64 Rss, Word64 Rtt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_shuffeb_PP __builtin_HEXAGON_S2_shuffeb /* ========================================================================== Assembly Syntax: Rdd32=shuffeh(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_shuffeh_PP(Word64 Rss, Word64 Rtt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_shuffeh_PP __builtin_HEXAGON_S2_shuffeh /* ========================================================================== Assembly Syntax: Rdd32=shuffob(Rtt32,Rss32) C Intrinsic Prototype: Word64 Q6_P_shuffob_PP(Word64 Rtt, Word64 Rss) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_shuffob_PP __builtin_HEXAGON_S2_shuffob /* ========================================================================== Assembly Syntax: Rdd32=shuffoh(Rtt32,Rss32) C Intrinsic Prototype: Word64 Q6_P_shuffoh_PP(Word64 Rtt, Word64 Rss) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_shuffoh_PP __builtin_HEXAGON_S2_shuffoh /* ========================================================================== Assembly Syntax: memb(Rx32++#s4:0:circ(Mu2))=Rt32 C Intrinsic Prototype: void Q6_memb_IMR_circ(void** Rx, Word32 Is4_0, Word32 Mu, Word32 Rt, void* BaseAddress) Instruction Type: ST Execution Slots: SLOT01 ========================================================================== */ #define Q6_memb_IMR_circ __builtin_HEXAGON_S2_storerb_pci /* ========================================================================== Assembly Syntax: memb(Rx32++I:circ(Mu2))=Rt32 C Intrinsic Prototype: void Q6_memb_MR_circ(void** Rx, Word32 Mu, Word32 Rt, void* BaseAddress) Instruction Type: ST Execution Slots: SLOT01 ========================================================================== */ #define Q6_memb_MR_circ __builtin_HEXAGON_S2_storerb_pcr /* ========================================================================== Assembly Syntax: memd(Rx32++#s4:3:circ(Mu2))=Rtt32 C Intrinsic Prototype: void Q6_memd_IMP_circ(void** Rx, Word32 Is4_3, Word32 Mu, Word64 Rtt, void* BaseAddress) Instruction Type: ST Execution Slots: SLOT01 ========================================================================== */ #define Q6_memd_IMP_circ __builtin_HEXAGON_S2_storerd_pci /* ========================================================================== Assembly Syntax: memd(Rx32++I:circ(Mu2))=Rtt32 C Intrinsic Prototype: void Q6_memd_MP_circ(void** Rx, Word32 Mu, Word64 Rtt, void* BaseAddress) Instruction Type: ST Execution Slots: SLOT01 ========================================================================== */ #define Q6_memd_MP_circ __builtin_HEXAGON_S2_storerd_pcr /* ========================================================================== Assembly Syntax: memh(Rx32++#s4:1:circ(Mu2))=Rt32.h C Intrinsic Prototype: void Q6_memh_IMRh_circ(void** Rx, Word32 Is4_1, Word32 Mu, Word32 Rt, void* BaseAddress) Instruction Type: ST Execution Slots: SLOT01 ========================================================================== */ #define Q6_memh_IMRh_circ __builtin_HEXAGON_S2_storerf_pci /* ========================================================================== Assembly Syntax: memh(Rx32++I:circ(Mu2))=Rt32.h C Intrinsic Prototype: void Q6_memh_MRh_circ(void** Rx, Word32 Mu, Word32 Rt, void* BaseAddress) Instruction Type: ST Execution Slots: SLOT01 ========================================================================== */ #define Q6_memh_MRh_circ __builtin_HEXAGON_S2_storerf_pcr /* ========================================================================== Assembly Syntax: memh(Rx32++#s4:1:circ(Mu2))=Rt32 C Intrinsic Prototype: void Q6_memh_IMR_circ(void** Rx, Word32 Is4_1, Word32 Mu, Word32 Rt, void* BaseAddress) Instruction Type: ST Execution Slots: SLOT01 ========================================================================== */ #define Q6_memh_IMR_circ __builtin_HEXAGON_S2_storerh_pci /* ========================================================================== Assembly Syntax: memh(Rx32++I:circ(Mu2))=Rt32 C Intrinsic Prototype: void Q6_memh_MR_circ(void** Rx, Word32 Mu, Word32 Rt, void* BaseAddress) Instruction Type: ST Execution Slots: SLOT01 ========================================================================== */ #define Q6_memh_MR_circ __builtin_HEXAGON_S2_storerh_pcr /* ========================================================================== Assembly Syntax: memw(Rx32++#s4:2:circ(Mu2))=Rt32 C Intrinsic Prototype: void Q6_memw_IMR_circ(void** Rx, Word32 Is4_2, Word32 Mu, Word32 Rt, void* BaseAddress) Instruction Type: ST Execution Slots: SLOT01 ========================================================================== */ #define Q6_memw_IMR_circ __builtin_HEXAGON_S2_storeri_pci /* ========================================================================== Assembly Syntax: memw(Rx32++I:circ(Mu2))=Rt32 C Intrinsic Prototype: void Q6_memw_MR_circ(void** Rx, Word32 Mu, Word32 Rt, void* BaseAddress) Instruction Type: ST Execution Slots: SLOT01 ========================================================================== */ #define Q6_memw_MR_circ __builtin_HEXAGON_S2_storeri_pcr /* ========================================================================== Assembly Syntax: Rd32=vsathb(Rs32) C Intrinsic Prototype: Word32 Q6_R_vsathb_R(Word32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_vsathb_R __builtin_HEXAGON_S2_svsathb /* ========================================================================== Assembly Syntax: Rd32=vsathub(Rs32) C Intrinsic Prototype: Word32 Q6_R_vsathub_R(Word32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_vsathub_R __builtin_HEXAGON_S2_svsathub /* ========================================================================== Assembly Syntax: Rx32=tableidxb(Rs32,#u4,#U5) C Intrinsic Prototype: Word32 Q6_R_tableidxb_RII(Word32 Rx, Word32 Rs, Word32 Iu4, Word32 IU5) Instruction Type: S_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_tableidxb_RII __builtin_HEXAGON_S2_tableidxb_goodsyntax /* ========================================================================== Assembly Syntax: Rx32=tableidxd(Rs32,#u4,#U5) C Intrinsic Prototype: Word32 Q6_R_tableidxd_RII(Word32 Rx, Word32 Rs, Word32 Iu4, Word32 IU5) Instruction Type: S_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_tableidxd_RII __builtin_HEXAGON_S2_tableidxd_goodsyntax /* ========================================================================== Assembly Syntax: Rx32=tableidxh(Rs32,#u4,#U5) C Intrinsic Prototype: Word32 Q6_R_tableidxh_RII(Word32 Rx, Word32 Rs, Word32 Iu4, Word32 IU5) Instruction Type: S_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_tableidxh_RII __builtin_HEXAGON_S2_tableidxh_goodsyntax /* ========================================================================== Assembly Syntax: Rx32=tableidxw(Rs32,#u4,#U5) C Intrinsic Prototype: Word32 Q6_R_tableidxw_RII(Word32 Rx, Word32 Rs, Word32 Iu4, Word32 IU5) Instruction Type: S_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_tableidxw_RII __builtin_HEXAGON_S2_tableidxw_goodsyntax /* ========================================================================== Assembly Syntax: Rd32=togglebit(Rs32,#u5) C Intrinsic Prototype: Word32 Q6_R_togglebit_RI(Word32 Rs, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_togglebit_RI __builtin_HEXAGON_S2_togglebit_i /* ========================================================================== Assembly Syntax: Rd32=togglebit(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_togglebit_RR(Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_togglebit_RR __builtin_HEXAGON_S2_togglebit_r /* ========================================================================== Assembly Syntax: Pd4=tstbit(Rs32,#u5) C Intrinsic Prototype: Byte Q6_p_tstbit_RI(Word32 Rs, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_tstbit_RI __builtin_HEXAGON_S2_tstbit_i /* ========================================================================== Assembly Syntax: Pd4=tstbit(Rs32,Rt32) C Intrinsic Prototype: Byte Q6_p_tstbit_RR(Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_tstbit_RR __builtin_HEXAGON_S2_tstbit_r /* ========================================================================== Assembly Syntax: Rdd32=valignb(Rtt32,Rss32,#u3) C Intrinsic Prototype: Word64 Q6_P_valignb_PPI(Word64 Rtt, Word64 Rss, Word32 Iu3) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_valignb_PPI __builtin_HEXAGON_S2_valignib /* ========================================================================== Assembly Syntax: Rdd32=valignb(Rtt32,Rss32,Pu4) C Intrinsic Prototype: Word64 Q6_P_valignb_PPp(Word64 Rtt, Word64 Rss, Byte Pu) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_valignb_PPp __builtin_HEXAGON_S2_valignrb /* ========================================================================== Assembly Syntax: Rdd32=vcnegh(Rss32,Rt32) C Intrinsic Prototype: Word64 Q6_P_vcnegh_PR(Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vcnegh_PR __builtin_HEXAGON_S2_vcnegh /* ========================================================================== Assembly Syntax: Rdd32=vcrotate(Rss32,Rt32) C Intrinsic Prototype: Word64 Q6_P_vcrotate_PR(Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vcrotate_PR __builtin_HEXAGON_S2_vcrotate /* ========================================================================== Assembly Syntax: Rxx32+=vrcnegh(Rss32,Rt32) C Intrinsic Prototype: Word64 Q6_P_vrcneghacc_PR(Word64 Rxx, Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vrcneghacc_PR __builtin_HEXAGON_S2_vrcnegh /* ========================================================================== Assembly Syntax: Rd32=vrndwh(Rss32) C Intrinsic Prototype: Word32 Q6_R_vrndwh_P(Word64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_vrndwh_P __builtin_HEXAGON_S2_vrndpackwh /* ========================================================================== Assembly Syntax: Rd32=vrndwh(Rss32):sat C Intrinsic Prototype: Word32 Q6_R_vrndwh_P_sat(Word64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_vrndwh_P_sat __builtin_HEXAGON_S2_vrndpackwhs /* ========================================================================== Assembly Syntax: Rd32=vsathb(Rss32) C Intrinsic Prototype: Word32 Q6_R_vsathb_P(Word64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_vsathb_P __builtin_HEXAGON_S2_vsathb /* ========================================================================== Assembly Syntax: Rdd32=vsathb(Rss32) C Intrinsic Prototype: Word64 Q6_P_vsathb_P(Word64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vsathb_P __builtin_HEXAGON_S2_vsathb_nopack /* ========================================================================== Assembly Syntax: Rd32=vsathub(Rss32) C Intrinsic Prototype: Word32 Q6_R_vsathub_P(Word64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_vsathub_P __builtin_HEXAGON_S2_vsathub /* ========================================================================== Assembly Syntax: Rdd32=vsathub(Rss32) C Intrinsic Prototype: Word64 Q6_P_vsathub_P(Word64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vsathub_P __builtin_HEXAGON_S2_vsathub_nopack /* ========================================================================== Assembly Syntax: Rd32=vsatwh(Rss32) C Intrinsic Prototype: Word32 Q6_R_vsatwh_P(Word64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_vsatwh_P __builtin_HEXAGON_S2_vsatwh /* ========================================================================== Assembly Syntax: Rdd32=vsatwh(Rss32) C Intrinsic Prototype: Word64 Q6_P_vsatwh_P(Word64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vsatwh_P __builtin_HEXAGON_S2_vsatwh_nopack /* ========================================================================== Assembly Syntax: Rd32=vsatwuh(Rss32) C Intrinsic Prototype: Word32 Q6_R_vsatwuh_P(Word64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_vsatwuh_P __builtin_HEXAGON_S2_vsatwuh /* ========================================================================== Assembly Syntax: Rdd32=vsatwuh(Rss32) C Intrinsic Prototype: Word64 Q6_P_vsatwuh_P(Word64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vsatwuh_P __builtin_HEXAGON_S2_vsatwuh_nopack /* ========================================================================== Assembly Syntax: Rd32=vsplatb(Rs32) C Intrinsic Prototype: Word32 Q6_R_vsplatb_R(Word32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_vsplatb_R __builtin_HEXAGON_S2_vsplatrb /* ========================================================================== Assembly Syntax: Rdd32=vsplath(Rs32) C Intrinsic Prototype: Word64 Q6_P_vsplath_R(Word32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vsplath_R __builtin_HEXAGON_S2_vsplatrh /* ========================================================================== Assembly Syntax: Rdd32=vspliceb(Rss32,Rtt32,#u3) C Intrinsic Prototype: Word64 Q6_P_vspliceb_PPI(Word64 Rss, Word64 Rtt, Word32 Iu3) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vspliceb_PPI __builtin_HEXAGON_S2_vspliceib /* ========================================================================== Assembly Syntax: Rdd32=vspliceb(Rss32,Rtt32,Pu4) C Intrinsic Prototype: Word64 Q6_P_vspliceb_PPp(Word64 Rss, Word64 Rtt, Byte Pu) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vspliceb_PPp __builtin_HEXAGON_S2_vsplicerb /* ========================================================================== Assembly Syntax: Rdd32=vsxtbh(Rs32) C Intrinsic Prototype: Word64 Q6_P_vsxtbh_R(Word32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vsxtbh_R __builtin_HEXAGON_S2_vsxtbh /* ========================================================================== Assembly Syntax: Rdd32=vsxthw(Rs32) C Intrinsic Prototype: Word64 Q6_P_vsxthw_R(Word32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vsxthw_R __builtin_HEXAGON_S2_vsxthw /* ========================================================================== Assembly Syntax: Rd32=vtrunehb(Rss32) C Intrinsic Prototype: Word32 Q6_R_vtrunehb_P(Word64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_vtrunehb_P __builtin_HEXAGON_S2_vtrunehb /* ========================================================================== Assembly Syntax: Rdd32=vtrunewh(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_vtrunewh_PP(Word64 Rss, Word64 Rtt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vtrunewh_PP __builtin_HEXAGON_S2_vtrunewh /* ========================================================================== Assembly Syntax: Rd32=vtrunohb(Rss32) C Intrinsic Prototype: Word32 Q6_R_vtrunohb_P(Word64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_vtrunohb_P __builtin_HEXAGON_S2_vtrunohb /* ========================================================================== Assembly Syntax: Rdd32=vtrunowh(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_vtrunowh_PP(Word64 Rss, Word64 Rtt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vtrunowh_PP __builtin_HEXAGON_S2_vtrunowh /* ========================================================================== Assembly Syntax: Rdd32=vzxtbh(Rs32) C Intrinsic Prototype: Word64 Q6_P_vzxtbh_R(Word32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vzxtbh_R __builtin_HEXAGON_S2_vzxtbh /* ========================================================================== Assembly Syntax: Rdd32=vzxthw(Rs32) C Intrinsic Prototype: Word64 Q6_P_vzxthw_R(Word32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vzxthw_R __builtin_HEXAGON_S2_vzxthw /* ========================================================================== Assembly Syntax: Rd32=add(Rs32,add(Ru32,#s6)) C Intrinsic Prototype: Word32 Q6_R_add_add_RRI(Word32 Rs, Word32 Ru, Word32 Is6) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_add_add_RRI __builtin_HEXAGON_S4_addaddi /* ========================================================================== Assembly Syntax: Rx32=add(#u8,asl(Rx32,#U5)) C Intrinsic Prototype: Word32 Q6_R_add_asl_IRI(Word32 Iu8, Word32 Rx, Word32 IU5) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_add_asl_IRI __builtin_HEXAGON_S4_addi_asl_ri /* ========================================================================== Assembly Syntax: Rx32=add(#u8,lsr(Rx32,#U5)) C Intrinsic Prototype: Word32 Q6_R_add_lsr_IRI(Word32 Iu8, Word32 Rx, Word32 IU5) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_add_lsr_IRI __builtin_HEXAGON_S4_addi_lsr_ri /* ========================================================================== Assembly Syntax: Rx32=and(#u8,asl(Rx32,#U5)) C Intrinsic Prototype: Word32 Q6_R_and_asl_IRI(Word32 Iu8, Word32 Rx, Word32 IU5) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_and_asl_IRI __builtin_HEXAGON_S4_andi_asl_ri /* ========================================================================== Assembly Syntax: Rx32=and(#u8,lsr(Rx32,#U5)) C Intrinsic Prototype: Word32 Q6_R_and_lsr_IRI(Word32 Iu8, Word32 Rx, Word32 IU5) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_and_lsr_IRI __builtin_HEXAGON_S4_andi_lsr_ri /* ========================================================================== Assembly Syntax: Rd32=add(clb(Rs32),#s6) C Intrinsic Prototype: Word32 Q6_R_add_clb_RI(Word32 Rs, Word32 Is6) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_add_clb_RI __builtin_HEXAGON_S4_clbaddi /* ========================================================================== Assembly Syntax: Rd32=add(clb(Rss32),#s6) C Intrinsic Prototype: Word32 Q6_R_add_clb_PI(Word64 Rss, Word32 Is6) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_add_clb_PI __builtin_HEXAGON_S4_clbpaddi /* ========================================================================== Assembly Syntax: Rd32=normamt(Rss32) C Intrinsic Prototype: Word32 Q6_R_normamt_P(Word64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_normamt_P __builtin_HEXAGON_S4_clbpnorm /* ========================================================================== Assembly Syntax: Rd32=extract(Rs32,#u5,#U5) C Intrinsic Prototype: Word32 Q6_R_extract_RII(Word32 Rs, Word32 Iu5, Word32 IU5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_extract_RII __builtin_HEXAGON_S4_extract /* ========================================================================== Assembly Syntax: Rd32=extract(Rs32,Rtt32) C Intrinsic Prototype: Word32 Q6_R_extract_RP(Word32 Rs, Word64 Rtt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_extract_RP __builtin_HEXAGON_S4_extract_rp /* ========================================================================== Assembly Syntax: Rdd32=extract(Rss32,#u6,#U6) C Intrinsic Prototype: Word64 Q6_P_extract_PII(Word64 Rss, Word32 Iu6, Word32 IU6) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_extract_PII __builtin_HEXAGON_S4_extractp /* ========================================================================== Assembly Syntax: Rdd32=extract(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_extract_PP(Word64 Rss, Word64 Rtt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_extract_PP __builtin_HEXAGON_S4_extractp_rp /* ========================================================================== Assembly Syntax: Rd32=lsl(#s6,Rt32) C Intrinsic Prototype: Word32 Q6_R_lsl_IR(Word32 Is6, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_lsl_IR __builtin_HEXAGON_S4_lsli /* ========================================================================== Assembly Syntax: Pd4=!tstbit(Rs32,#u5) C Intrinsic Prototype: Byte Q6_p_not_tstbit_RI(Word32 Rs, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_not_tstbit_RI __builtin_HEXAGON_S4_ntstbit_i /* ========================================================================== Assembly Syntax: Pd4=!tstbit(Rs32,Rt32) C Intrinsic Prototype: Byte Q6_p_not_tstbit_RR(Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_not_tstbit_RR __builtin_HEXAGON_S4_ntstbit_r /* ========================================================================== Assembly Syntax: Rx32|=and(Rs32,#s10) C Intrinsic Prototype: Word32 Q6_R_andor_RI(Word32 Rx, Word32 Rs, Word32 Is10) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_andor_RI __builtin_HEXAGON_S4_or_andi /* ========================================================================== Assembly Syntax: Rx32=or(Ru32,and(Rx32,#s10)) C Intrinsic Prototype: Word32 Q6_R_or_and_RRI(Word32 Ru, Word32 Rx, Word32 Is10) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_or_and_RRI __builtin_HEXAGON_S4_or_andix /* ========================================================================== Assembly Syntax: Rx32|=or(Rs32,#s10) C Intrinsic Prototype: Word32 Q6_R_oror_RI(Word32 Rx, Word32 Rs, Word32 Is10) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_oror_RI __builtin_HEXAGON_S4_or_ori /* ========================================================================== Assembly Syntax: Rx32=or(#u8,asl(Rx32,#U5)) C Intrinsic Prototype: Word32 Q6_R_or_asl_IRI(Word32 Iu8, Word32 Rx, Word32 IU5) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_or_asl_IRI __builtin_HEXAGON_S4_ori_asl_ri /* ========================================================================== Assembly Syntax: Rx32=or(#u8,lsr(Rx32,#U5)) C Intrinsic Prototype: Word32 Q6_R_or_lsr_IRI(Word32 Iu8, Word32 Rx, Word32 IU5) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_or_lsr_IRI __builtin_HEXAGON_S4_ori_lsr_ri /* ========================================================================== Assembly Syntax: Rd32=parity(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_parity_RR(Word32 Rs, Word32 Rt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_parity_RR __builtin_HEXAGON_S4_parity /* ========================================================================== Assembly Syntax: Rd32=add(Rs32,sub(#s6,Ru32)) C Intrinsic Prototype: Word32 Q6_R_add_sub_RIR(Word32 Rs, Word32 Is6, Word32 Ru) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_add_sub_RIR __builtin_HEXAGON_S4_subaddi /* ========================================================================== Assembly Syntax: Rx32=sub(#u8,asl(Rx32,#U5)) C Intrinsic Prototype: Word32 Q6_R_sub_asl_IRI(Word32 Iu8, Word32 Rx, Word32 IU5) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_sub_asl_IRI __builtin_HEXAGON_S4_subi_asl_ri /* ========================================================================== Assembly Syntax: Rx32=sub(#u8,lsr(Rx32,#U5)) C Intrinsic Prototype: Word32 Q6_R_sub_lsr_IRI(Word32 Iu8, Word32 Rx, Word32 IU5) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_sub_lsr_IRI __builtin_HEXAGON_S4_subi_lsr_ri /* ========================================================================== Assembly Syntax: Rdd32=vrcrotate(Rss32,Rt32,#u2) C Intrinsic Prototype: Word64 Q6_P_vrcrotate_PRI(Word64 Rss, Word32 Rt, Word32 Iu2) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vrcrotate_PRI __builtin_HEXAGON_S4_vrcrotate /* ========================================================================== Assembly Syntax: Rxx32+=vrcrotate(Rss32,Rt32,#u2) C Intrinsic Prototype: Word64 Q6_P_vrcrotateacc_PRI(Word64 Rxx, Word64 Rss, Word32 Rt, Word32 Iu2) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vrcrotateacc_PRI __builtin_HEXAGON_S4_vrcrotate_acc /* ========================================================================== Assembly Syntax: Rdd32=vxaddsubh(Rss32,Rtt32):sat C Intrinsic Prototype: Word64 Q6_P_vxaddsubh_PP_sat(Word64 Rss, Word64 Rtt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vxaddsubh_PP_sat __builtin_HEXAGON_S4_vxaddsubh /* ========================================================================== Assembly Syntax: Rdd32=vxaddsubh(Rss32,Rtt32):rnd:>>1:sat C Intrinsic Prototype: Word64 Q6_P_vxaddsubh_PP_rnd_rs1_sat(Word64 Rss, Word64 Rtt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vxaddsubh_PP_rnd_rs1_sat __builtin_HEXAGON_S4_vxaddsubhr /* ========================================================================== Assembly Syntax: Rdd32=vxaddsubw(Rss32,Rtt32):sat C Intrinsic Prototype: Word64 Q6_P_vxaddsubw_PP_sat(Word64 Rss, Word64 Rtt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vxaddsubw_PP_sat __builtin_HEXAGON_S4_vxaddsubw /* ========================================================================== Assembly Syntax: Rdd32=vxsubaddh(Rss32,Rtt32):sat C Intrinsic Prototype: Word64 Q6_P_vxsubaddh_PP_sat(Word64 Rss, Word64 Rtt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vxsubaddh_PP_sat __builtin_HEXAGON_S4_vxsubaddh /* ========================================================================== Assembly Syntax: Rdd32=vxsubaddh(Rss32,Rtt32):rnd:>>1:sat C Intrinsic Prototype: Word64 Q6_P_vxsubaddh_PP_rnd_rs1_sat(Word64 Rss, Word64 Rtt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vxsubaddh_PP_rnd_rs1_sat __builtin_HEXAGON_S4_vxsubaddhr /* ========================================================================== Assembly Syntax: Rdd32=vxsubaddw(Rss32,Rtt32):sat C Intrinsic Prototype: Word64 Q6_P_vxsubaddw_PP_sat(Word64 Rss, Word64 Rtt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vxsubaddw_PP_sat __builtin_HEXAGON_S4_vxsubaddw /* ========================================================================== Assembly Syntax: Rd32=vasrhub(Rss32,#u4):rnd:sat C Intrinsic Prototype: Word32 Q6_R_vasrhub_PI_rnd_sat(Word64 Rss, Word32 Iu4) Instruction Type: S_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_vasrhub_PI_rnd_sat __builtin_HEXAGON_S5_asrhub_rnd_sat_goodsyntax /* ========================================================================== Assembly Syntax: Rd32=vasrhub(Rss32,#u4):sat C Intrinsic Prototype: Word32 Q6_R_vasrhub_PI_sat(Word64 Rss, Word32 Iu4) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_vasrhub_PI_sat __builtin_HEXAGON_S5_asrhub_sat /* ========================================================================== Assembly Syntax: Rd32=popcount(Rss32) C Intrinsic Prototype: Word32 Q6_R_popcount_P(Word64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_popcount_P __builtin_HEXAGON_S5_popcountp /* ========================================================================== Assembly Syntax: Rdd32=vasrh(Rss32,#u4):rnd C Intrinsic Prototype: Word64 Q6_P_vasrh_PI_rnd(Word64 Rss, Word32 Iu4) Instruction Type: S_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_P_vasrh_PI_rnd __builtin_HEXAGON_S5_vasrhrnd_goodsyntax /* ========================================================================== Assembly Syntax: dccleana(Rs32) C Intrinsic Prototype: void Q6_dccleana_A(Address Rs) Instruction Type: ST Execution Slots: SLOT0 ========================================================================== */ #define Q6_dccleana_A __builtin_HEXAGON_Y2_dccleana /* ========================================================================== Assembly Syntax: dccleaninva(Rs32) C Intrinsic Prototype: void Q6_dccleaninva_A(Address Rs) Instruction Type: ST Execution Slots: SLOT0 ========================================================================== */ #define Q6_dccleaninva_A __builtin_HEXAGON_Y2_dccleaninva /* ========================================================================== Assembly Syntax: dcfetch(Rs32) C Intrinsic Prototype: void Q6_dcfetch_A(Address Rs) Instruction Type: MAPPING Execution Slots: SLOT0123 ========================================================================== */ #define Q6_dcfetch_A __builtin_HEXAGON_Y2_dcfetch /* ========================================================================== Assembly Syntax: dcinva(Rs32) C Intrinsic Prototype: void Q6_dcinva_A(Address Rs) Instruction Type: ST Execution Slots: SLOT0 ========================================================================== */ #define Q6_dcinva_A __builtin_HEXAGON_Y2_dcinva /* ========================================================================== Assembly Syntax: dczeroa(Rs32) C Intrinsic Prototype: void Q6_dczeroa_A(Address Rs) Instruction Type: ST Execution Slots: SLOT0 ========================================================================== */ #define Q6_dczeroa_A __builtin_HEXAGON_Y2_dczeroa /* ========================================================================== Assembly Syntax: l2fetch(Rs32,Rt32) C Intrinsic Prototype: void Q6_l2fetch_AR(Address Rs, Word32 Rt) Instruction Type: ST Execution Slots: SLOT0 ========================================================================== */ #define Q6_l2fetch_AR __builtin_HEXAGON_Y4_l2fetch /* ========================================================================== Assembly Syntax: l2fetch(Rs32,Rtt32) C Intrinsic Prototype: void Q6_l2fetch_AP(Address Rs, Word64 Rtt) Instruction Type: ST Execution Slots: SLOT0 ========================================================================== */ #define Q6_l2fetch_AP __builtin_HEXAGON_Y5_l2fetch #if __HEXAGON_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Rdd32=rol(Rss32,#u6) C Intrinsic Prototype: Word64 Q6_P_rol_PI(Word64 Rss, Word32 Iu6) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_rol_PI __builtin_HEXAGON_S6_rol_i_p #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HEXAGON_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Rxx32+=rol(Rss32,#u6) C Intrinsic Prototype: Word64 Q6_P_rolacc_PI(Word64 Rxx, Word64 Rss, Word32 Iu6) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_rolacc_PI __builtin_HEXAGON_S6_rol_i_p_acc #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HEXAGON_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Rxx32&=rol(Rss32,#u6) C Intrinsic Prototype: Word64 Q6_P_roland_PI(Word64 Rxx, Word64 Rss, Word32 Iu6) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_roland_PI __builtin_HEXAGON_S6_rol_i_p_and #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HEXAGON_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Rxx32-=rol(Rss32,#u6) C Intrinsic Prototype: Word64 Q6_P_rolnac_PI(Word64 Rxx, Word64 Rss, Word32 Iu6) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_rolnac_PI __builtin_HEXAGON_S6_rol_i_p_nac #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HEXAGON_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Rxx32|=rol(Rss32,#u6) C Intrinsic Prototype: Word64 Q6_P_rolor_PI(Word64 Rxx, Word64 Rss, Word32 Iu6) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_rolor_PI __builtin_HEXAGON_S6_rol_i_p_or #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HEXAGON_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Rxx32^=rol(Rss32,#u6) C Intrinsic Prototype: Word64 Q6_P_rolxacc_PI(Word64 Rxx, Word64 Rss, Word32 Iu6) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_rolxacc_PI __builtin_HEXAGON_S6_rol_i_p_xacc #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HEXAGON_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Rd32=rol(Rs32,#u5) C Intrinsic Prototype: Word32 Q6_R_rol_RI(Word32 Rs, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_rol_RI __builtin_HEXAGON_S6_rol_i_r #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HEXAGON_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Rx32+=rol(Rs32,#u5) C Intrinsic Prototype: Word32 Q6_R_rolacc_RI(Word32 Rx, Word32 Rs, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_rolacc_RI __builtin_HEXAGON_S6_rol_i_r_acc #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HEXAGON_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Rx32&=rol(Rs32,#u5) C Intrinsic Prototype: Word32 Q6_R_roland_RI(Word32 Rx, Word32 Rs, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_roland_RI __builtin_HEXAGON_S6_rol_i_r_and #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HEXAGON_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Rx32-=rol(Rs32,#u5) C Intrinsic Prototype: Word32 Q6_R_rolnac_RI(Word32 Rx, Word32 Rs, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_rolnac_RI __builtin_HEXAGON_S6_rol_i_r_nac #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HEXAGON_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Rx32|=rol(Rs32,#u5) C Intrinsic Prototype: Word32 Q6_R_rolor_RI(Word32 Rx, Word32 Rs, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_rolor_RI __builtin_HEXAGON_S6_rol_i_r_or #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HEXAGON_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Rx32^=rol(Rs32,#u5) C Intrinsic Prototype: Word32 Q6_R_rolxacc_RI(Word32 Rx, Word32 Rs, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_rolxacc_RI __builtin_HEXAGON_S6_rol_i_r_xacc #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HEXAGON_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Rdd32=vabsdiffb(Rtt32,Rss32) C Intrinsic Prototype: Word64 Q6_P_vabsdiffb_PP(Word64 Rtt, Word64 Rss) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vabsdiffb_PP __builtin_HEXAGON_M6_vabsdiffb #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HEXAGON_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Rdd32=vabsdiffub(Rtt32,Rss32) C Intrinsic Prototype: Word64 Q6_P_vabsdiffub_PP(Word64 Rtt, Word64 Rss) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vabsdiffub_PP __builtin_HEXAGON_M6_vabsdiffub #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HEXAGON_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Rdd32=vsplatb(Rs32) C Intrinsic Prototype: Word64 Q6_P_vsplatb_R(Word32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vsplatb_R __builtin_HEXAGON_S6_vsplatrbp #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HEXAGON_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Rdd32=vtrunehb(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_vtrunehb_PP(Word64 Rss, Word64 Rtt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vtrunehb_PP __builtin_HEXAGON_S6_vtrunehb_ppp #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HEXAGON_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Rdd32=vtrunohb(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_vtrunohb_PP(Word64 Rss, Word64 Rtt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vtrunohb_PP __builtin_HEXAGON_S6_vtrunohb_ppp #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HEXAGON_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: Pd4=!any8(vcmpb.eq(Rss32,Rtt32)) C Intrinsic Prototype: Byte Q6_p_not_any8_vcmpb_eq_PP(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_not_any8_vcmpb_eq_PP __builtin_HEXAGON_A6_vcmpbeq_notany #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HEXAGON_ARCH__ >= 66 /* ========================================================================== Assembly Syntax: Rdd32=dfadd(Rss32,Rtt32) C Intrinsic Prototype: Float64 Q6_P_dfadd_PP(Float64 Rss, Float64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_dfadd_PP __builtin_HEXAGON_F2_dfadd #endif /* __HEXAGON_ARCH___ >= 66 */ #if __HEXAGON_ARCH__ >= 66 /* ========================================================================== Assembly Syntax: Rdd32=dfsub(Rss32,Rtt32) C Intrinsic Prototype: Float64 Q6_P_dfsub_PP(Float64 Rss, Float64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_dfsub_PP __builtin_HEXAGON_F2_dfsub #endif /* __HEXAGON_ARCH___ >= 66 */ #if __HEXAGON_ARCH__ >= 66 /* ========================================================================== Assembly Syntax: Rx32-=mpyi(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_mpyinac_RR(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyinac_RR __builtin_HEXAGON_M2_mnaci #endif /* __HEXAGON_ARCH___ >= 66 */ #if __HEXAGON_ARCH__ >= 66 /* ========================================================================== Assembly Syntax: Rd32=mask(#u5,#U5) C Intrinsic Prototype: Word32 Q6_R_mask_II(Word32 Iu5, Word32 IU5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mask_II __builtin_HEXAGON_S2_mask #endif /* __HEXAGON_ARCH___ >= 66 */ #if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__ /* ========================================================================== Assembly Syntax: Rd32=clip(Rs32,#u5) C Intrinsic Prototype: Word32 Q6_R_clip_RI(Word32 Rs, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_clip_RI __builtin_HEXAGON_A7_clip #endif /* __HEXAGON_ARCH___ >= 67 && defined __HEXAGON_AUDIO__*/ #if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__ /* ========================================================================== Assembly Syntax: Rdd32=cround(Rss32,#u6) C Intrinsic Prototype: Word64 Q6_P_cround_PI(Word64 Rss, Word32 Iu6) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_cround_PI __builtin_HEXAGON_A7_croundd_ri #endif /* __HEXAGON_ARCH___ >= 67 && defined __HEXAGON_AUDIO__*/ #if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__ /* ========================================================================== Assembly Syntax: Rdd32=cround(Rss32,Rt32) C Intrinsic Prototype: Word64 Q6_P_cround_PR(Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_cround_PR __builtin_HEXAGON_A7_croundd_rr #endif /* __HEXAGON_ARCH___ >= 67 && defined __HEXAGON_AUDIO__*/ #if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__ /* ========================================================================== Assembly Syntax: Rdd32=vclip(Rss32,#u5) C Intrinsic Prototype: Word64 Q6_P_vclip_PI(Word64 Rss, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vclip_PI __builtin_HEXAGON_A7_vclip #endif /* __HEXAGON_ARCH___ >= 67 && defined __HEXAGON_AUDIO__*/ #if __HEXAGON_ARCH__ >= 67 /* ========================================================================== Assembly Syntax: Rdd32=dfmax(Rss32,Rtt32) C Intrinsic Prototype: Float64 Q6_P_dfmax_PP(Float64 Rss, Float64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_dfmax_PP __builtin_HEXAGON_F2_dfmax #endif /* __HEXAGON_ARCH___ >= 67 */ #if __HEXAGON_ARCH__ >= 67 /* ========================================================================== Assembly Syntax: Rdd32=dfmin(Rss32,Rtt32) C Intrinsic Prototype: Float64 Q6_P_dfmin_PP(Float64 Rss, Float64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_dfmin_PP __builtin_HEXAGON_F2_dfmin #endif /* __HEXAGON_ARCH___ >= 67 */ #if __HEXAGON_ARCH__ >= 67 /* ========================================================================== Assembly Syntax: Rdd32=dfmpyfix(Rss32,Rtt32) C Intrinsic Prototype: Float64 Q6_P_dfmpyfix_PP(Float64 Rss, Float64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_dfmpyfix_PP __builtin_HEXAGON_F2_dfmpyfix #endif /* __HEXAGON_ARCH___ >= 67 */ #if __HEXAGON_ARCH__ >= 67 /* ========================================================================== Assembly Syntax: Rxx32+=dfmpyhh(Rss32,Rtt32) C Intrinsic Prototype: Float64 Q6_P_dfmpyhhacc_PP(Float64 Rxx, Float64 Rss, Float64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_dfmpyhhacc_PP __builtin_HEXAGON_F2_dfmpyhh #endif /* __HEXAGON_ARCH___ >= 67 */ #if __HEXAGON_ARCH__ >= 67 /* ========================================================================== Assembly Syntax: Rxx32+=dfmpylh(Rss32,Rtt32) C Intrinsic Prototype: Float64 Q6_P_dfmpylhacc_PP(Float64 Rxx, Float64 Rss, Float64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_dfmpylhacc_PP __builtin_HEXAGON_F2_dfmpylh #endif /* __HEXAGON_ARCH___ >= 67 */ #if __HEXAGON_ARCH__ >= 67 /* ========================================================================== Assembly Syntax: Rdd32=dfmpyll(Rss32,Rtt32) C Intrinsic Prototype: Float64 Q6_P_dfmpyll_PP(Float64 Rss, Float64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_dfmpyll_PP __builtin_HEXAGON_F2_dfmpyll #endif /* __HEXAGON_ARCH___ >= 67 */ #if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__ /* ========================================================================== Assembly Syntax: Rdd32=cmpyiw(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_cmpyiw_PP(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT3 ========================================================================== */ #define Q6_P_cmpyiw_PP __builtin_HEXAGON_M7_dcmpyiw #endif /* __HEXAGON_ARCH___ >= 67 && defined __HEXAGON_AUDIO__*/ #if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__ /* ========================================================================== Assembly Syntax: Rxx32+=cmpyiw(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_cmpyiwacc_PP(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT3 ========================================================================== */ #define Q6_P_cmpyiwacc_PP __builtin_HEXAGON_M7_dcmpyiw_acc #endif /* __HEXAGON_ARCH___ >= 67 && defined __HEXAGON_AUDIO__*/ #if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__ /* ========================================================================== Assembly Syntax: Rdd32=cmpyiw(Rss32,Rtt32*) C Intrinsic Prototype: Word64 Q6_P_cmpyiw_PP_conj(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT3 ========================================================================== */ #define Q6_P_cmpyiw_PP_conj __builtin_HEXAGON_M7_dcmpyiwc #endif /* __HEXAGON_ARCH___ >= 67 && defined __HEXAGON_AUDIO__*/ #if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__ /* ========================================================================== Assembly Syntax: Rxx32+=cmpyiw(Rss32,Rtt32*) C Intrinsic Prototype: Word64 Q6_P_cmpyiwacc_PP_conj(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT3 ========================================================================== */ #define Q6_P_cmpyiwacc_PP_conj __builtin_HEXAGON_M7_dcmpyiwc_acc #endif /* __HEXAGON_ARCH___ >= 67 && defined __HEXAGON_AUDIO__*/ #if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__ /* ========================================================================== Assembly Syntax: Rdd32=cmpyrw(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_cmpyrw_PP(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT3 ========================================================================== */ #define Q6_P_cmpyrw_PP __builtin_HEXAGON_M7_dcmpyrw #endif /* __HEXAGON_ARCH___ >= 67 && defined __HEXAGON_AUDIO__*/ #if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__ /* ========================================================================== Assembly Syntax: Rxx32+=cmpyrw(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_cmpyrwacc_PP(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT3 ========================================================================== */ #define Q6_P_cmpyrwacc_PP __builtin_HEXAGON_M7_dcmpyrw_acc #endif /* __HEXAGON_ARCH___ >= 67 && defined __HEXAGON_AUDIO__*/ #if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__ /* ========================================================================== Assembly Syntax: Rdd32=cmpyrw(Rss32,Rtt32*) C Intrinsic Prototype: Word64 Q6_P_cmpyrw_PP_conj(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT3 ========================================================================== */ #define Q6_P_cmpyrw_PP_conj __builtin_HEXAGON_M7_dcmpyrwc #endif /* __HEXAGON_ARCH___ >= 67 && defined __HEXAGON_AUDIO__*/ #if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__ /* ========================================================================== Assembly Syntax: Rxx32+=cmpyrw(Rss32,Rtt32*) C Intrinsic Prototype: Word64 Q6_P_cmpyrwacc_PP_conj(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT3 ========================================================================== */ #define Q6_P_cmpyrwacc_PP_conj __builtin_HEXAGON_M7_dcmpyrwc_acc #endif /* __HEXAGON_ARCH___ >= 67 && defined __HEXAGON_AUDIO__*/ #if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__ /* ========================================================================== Assembly Syntax: Rdd32=vdmpyw(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_vdmpyw_PP(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT3 ========================================================================== */ #define Q6_P_vdmpyw_PP __builtin_HEXAGON_M7_vdmpy #endif /* __HEXAGON_ARCH___ >= 67 && defined __HEXAGON_AUDIO__*/ #if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__ /* ========================================================================== Assembly Syntax: Rxx32+=vdmpyw(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_vdmpywacc_PP(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT3 ========================================================================== */ #define Q6_P_vdmpywacc_PP __builtin_HEXAGON_M7_vdmpy_acc #endif /* __HEXAGON_ARCH___ >= 67 && defined __HEXAGON_AUDIO__*/ #if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__ /* ========================================================================== Assembly Syntax: Rd32=cmpyiw(Rss32,Rtt32):<<1:sat C Intrinsic Prototype: Word32 Q6_R_cmpyiw_PP_s1_sat(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT3 ========================================================================== */ #define Q6_R_cmpyiw_PP_s1_sat __builtin_HEXAGON_M7_wcmpyiw #endif /* __HEXAGON_ARCH___ >= 67 && defined __HEXAGON_AUDIO__*/ #if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__ /* ========================================================================== Assembly Syntax: Rd32=cmpyiw(Rss32,Rtt32):<<1:rnd:sat C Intrinsic Prototype: Word32 Q6_R_cmpyiw_PP_s1_rnd_sat(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT3 ========================================================================== */ #define Q6_R_cmpyiw_PP_s1_rnd_sat __builtin_HEXAGON_M7_wcmpyiw_rnd #endif /* __HEXAGON_ARCH___ >= 67 && defined __HEXAGON_AUDIO__*/ #if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__ /* ========================================================================== Assembly Syntax: Rd32=cmpyiw(Rss32,Rtt32*):<<1:sat C Intrinsic Prototype: Word32 Q6_R_cmpyiw_PP_conj_s1_sat(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT3 ========================================================================== */ #define Q6_R_cmpyiw_PP_conj_s1_sat __builtin_HEXAGON_M7_wcmpyiwc #endif /* __HEXAGON_ARCH___ >= 67 && defined __HEXAGON_AUDIO__*/ #if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__ /* ========================================================================== Assembly Syntax: Rd32=cmpyiw(Rss32,Rtt32*):<<1:rnd:sat C Intrinsic Prototype: Word32 Q6_R_cmpyiw_PP_conj_s1_rnd_sat(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT3 ========================================================================== */ #define Q6_R_cmpyiw_PP_conj_s1_rnd_sat __builtin_HEXAGON_M7_wcmpyiwc_rnd #endif /* __HEXAGON_ARCH___ >= 67 && defined __HEXAGON_AUDIO__*/ #if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__ /* ========================================================================== Assembly Syntax: Rd32=cmpyrw(Rss32,Rtt32):<<1:sat C Intrinsic Prototype: Word32 Q6_R_cmpyrw_PP_s1_sat(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT3 ========================================================================== */ #define Q6_R_cmpyrw_PP_s1_sat __builtin_HEXAGON_M7_wcmpyrw #endif /* __HEXAGON_ARCH___ >= 67 && defined __HEXAGON_AUDIO__*/ #if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__ /* ========================================================================== Assembly Syntax: Rd32=cmpyrw(Rss32,Rtt32):<<1:rnd:sat C Intrinsic Prototype: Word32 Q6_R_cmpyrw_PP_s1_rnd_sat(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT3 ========================================================================== */ #define Q6_R_cmpyrw_PP_s1_rnd_sat __builtin_HEXAGON_M7_wcmpyrw_rnd #endif /* __HEXAGON_ARCH___ >= 67 && defined __HEXAGON_AUDIO__*/ #if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__ /* ========================================================================== Assembly Syntax: Rd32=cmpyrw(Rss32,Rtt32*):<<1:sat C Intrinsic Prototype: Word32 Q6_R_cmpyrw_PP_conj_s1_sat(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT3 ========================================================================== */ #define Q6_R_cmpyrw_PP_conj_s1_sat __builtin_HEXAGON_M7_wcmpyrwc #endif /* __HEXAGON_ARCH___ >= 67 && defined __HEXAGON_AUDIO__*/ #if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__ /* ========================================================================== Assembly Syntax: Rd32=cmpyrw(Rss32,Rtt32*):<<1:rnd:sat C Intrinsic Prototype: Word32 Q6_R_cmpyrw_PP_conj_s1_rnd_sat(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT3 ========================================================================== */ #define Q6_R_cmpyrw_PP_conj_s1_rnd_sat __builtin_HEXAGON_M7_wcmpyrwc_rnd #endif /* __HEXAGON_ARCH___ >= 67 && defined __HEXAGON_AUDIO__*/ #if __HEXAGON_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: dmlink(Rs32,Rt32) C Intrinsic Prototype: void Q6_dmlink_AA(Address Rs, Address Rt) Instruction Type: ST Execution Slots: SLOT0 ========================================================================== */ #define Q6_dmlink_AA __builtin_HEXAGON_Y6_dmlink #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HEXAGON_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Rd32=dmpause C Intrinsic Prototype: Word32 Q6_R_dmpause() Instruction Type: ST Execution Slots: SLOT0 ========================================================================== */ #define Q6_R_dmpause __builtin_HEXAGON_Y6_dmpause #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HEXAGON_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Rd32=dmpoll C Intrinsic Prototype: Word32 Q6_R_dmpoll() Instruction Type: ST Execution Slots: SLOT0 ========================================================================== */ #define Q6_R_dmpoll __builtin_HEXAGON_Y6_dmpoll #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HEXAGON_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: dmresume(Rs32) C Intrinsic Prototype: void Q6_dmresume_A(Address Rs) Instruction Type: ST Execution Slots: SLOT0 ========================================================================== */ #define Q6_dmresume_A __builtin_HEXAGON_Y6_dmresume #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HEXAGON_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: dmstart(Rs32) C Intrinsic Prototype: void Q6_dmstart_A(Address Rs) Instruction Type: ST Execution Slots: SLOT0 ========================================================================== */ #define Q6_dmstart_A __builtin_HEXAGON_Y6_dmstart #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HEXAGON_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Rd32=dmwait C Intrinsic Prototype: Word32 Q6_R_dmwait() Instruction Type: ST Execution Slots: SLOT0 ========================================================================== */ #define Q6_R_dmwait __builtin_HEXAGON_Y6_dmwait #endif /* __HEXAGON_ARCH___ >= 68 */ #include #ifdef __HVX__ #include #endif /* __HVX__ */ #endif /******************************************************************************/ /* (c) 2020 Qualcomm Innovation Center, Inc. All rights reserved. */ /* */ /******************************************************************************/ #ifndef HEXAGON_TYPES_H #define HEXAGON_TYPES_H #include /* Hexagon names */ #define HEXAGON_Vect HEXAGON_Vect64 #define HEXAGON_V_GET_D HEXAGON_V64_GET_D #define HEXAGON_V_GET_UD HEXAGON_V64_GET_UD #define HEXAGON_V_GET_W0 HEXAGON_V64_GET_W0 #define HEXAGON_V_GET_W1 HEXAGON_V64_GET_W1 #define HEXAGON_V_GET_UW0 HEXAGON_V64_GET_UW0 #define HEXAGON_V_GET_UW1 HEXAGON_V64_GET_UW1 #define HEXAGON_V_GET_H0 HEXAGON_V64_GET_H0 #define HEXAGON_V_GET_H1 HEXAGON_V64_GET_H1 #define HEXAGON_V_GET_H2 HEXAGON_V64_GET_H2 #define HEXAGON_V_GET_H3 HEXAGON_V64_GET_H3 #define HEXAGON_V_GET_UH0 HEXAGON_V64_GET_UH0 #define HEXAGON_V_GET_UH1 HEXAGON_V64_GET_UH1 #define HEXAGON_V_GET_UH2 HEXAGON_V64_GET_UH2 #define HEXAGON_V_GET_UH3 HEXAGON_V64_GET_UH3 #define HEXAGON_V_GET_B0 HEXAGON_V64_GET_B0 #define HEXAGON_V_GET_B1 HEXAGON_V64_GET_B1 #define HEXAGON_V_GET_B2 HEXAGON_V64_GET_B2 #define HEXAGON_V_GET_B3 HEXAGON_V64_GET_B3 #define HEXAGON_V_GET_B4 HEXAGON_V64_GET_B4 #define HEXAGON_V_GET_B5 HEXAGON_V64_GET_B5 #define HEXAGON_V_GET_B6 HEXAGON_V64_GET_B6 #define HEXAGON_V_GET_B7 HEXAGON_V64_GET_B7 #define HEXAGON_V_GET_UB0 HEXAGON_V64_GET_UB0 #define HEXAGON_V_GET_UB1 HEXAGON_V64_GET_UB1 #define HEXAGON_V_GET_UB2 HEXAGON_V64_GET_UB2 #define HEXAGON_V_GET_UB3 HEXAGON_V64_GET_UB3 #define HEXAGON_V_GET_UB4 HEXAGON_V64_GET_UB4 #define HEXAGON_V_GET_UB5 HEXAGON_V64_GET_UB5 #define HEXAGON_V_GET_UB6 HEXAGON_V64_GET_UB6 #define HEXAGON_V_GET_UB7 HEXAGON_V64_GET_UB7 #define HEXAGON_V_PUT_D HEXAGON_V64_PUT_D #define HEXAGON_V_PUT_W0 HEXAGON_V64_PUT_W0 #define HEXAGON_V_PUT_W1 HEXAGON_V64_PUT_W1 #define HEXAGON_V_PUT_H0 HEXAGON_V64_PUT_H0 #define HEXAGON_V_PUT_H1 HEXAGON_V64_PUT_H1 #define HEXAGON_V_PUT_H2 HEXAGON_V64_PUT_H2 #define HEXAGON_V_PUT_H3 HEXAGON_V64_PUT_H3 #define HEXAGON_V_PUT_B0 HEXAGON_V64_PUT_B0 #define HEXAGON_V_PUT_B1 HEXAGON_V64_PUT_B1 #define HEXAGON_V_PUT_B2 HEXAGON_V64_PUT_B2 #define HEXAGON_V_PUT_B3 HEXAGON_V64_PUT_B3 #define HEXAGON_V_PUT_B4 HEXAGON_V64_PUT_B4 #define HEXAGON_V_PUT_B5 HEXAGON_V64_PUT_B5 #define HEXAGON_V_PUT_B6 HEXAGON_V64_PUT_B6 #define HEXAGON_V_PUT_B7 HEXAGON_V64_PUT_B7 #define HEXAGON_V_CREATE_D HEXAGON_V64_CREATE_D #define HEXAGON_V_CREATE_W HEXAGON_V64_CREATE_W #define HEXAGON_V_CREATE_H HEXAGON_V64_CREATE_H #define HEXAGON_V_CREATE_B HEXAGON_V64_CREATE_B #ifdef __cplusplus #define HEXAGON_VectC HEXAGON_Vect64C #endif /* __cplusplus */ /* 64 Bit Vectors */ typedef long long __attribute__((__may_alias__)) HEXAGON_Vect64; /* Extract doubleword macros */ #define HEXAGON_V64_GET_D(v) (v) #define HEXAGON_V64_GET_UD(v) ((unsigned long long)(v)) /* Extract word macros */ #define HEXAGON_V64_GET_W0(v) \ __extension__({ \ union { \ long long d; \ int w[2]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.w[0]; \ }) #define HEXAGON_V64_GET_W1(v) \ __extension__({ \ union { \ long long d; \ int w[2]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.w[1]; \ }) #define HEXAGON_V64_GET_UW0(v) \ __extension__({ \ union { \ long long d; \ unsigned int uw[2]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.uw[0]; \ }) #define HEXAGON_V64_GET_UW1(v) \ __extension__({ \ union { \ long long d; \ unsigned int uw[2]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.uw[1]; \ }) /* Extract half word macros */ #define HEXAGON_V64_GET_H0(v) \ __extension__({ \ union { \ long long d; \ short h[4]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.h[0]; \ }) #define HEXAGON_V64_GET_H1(v) \ __extension__({ \ union { \ long long d; \ short h[4]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.h[1]; \ }) #define HEXAGON_V64_GET_H2(v) \ __extension__({ \ union { \ long long d; \ short h[4]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.h[2]; \ }) #define HEXAGON_V64_GET_H3(v) \ __extension__({ \ union { \ long long d; \ short h[4]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.h[3]; \ }) #define HEXAGON_V64_GET_UH0(v) \ __extension__({ \ union { \ long long d; \ unsigned short uh[4]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.uh[0]; \ }) #define HEXAGON_V64_GET_UH1(v) \ __extension__({ \ union { \ long long d; \ unsigned short uh[4]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.uh[1]; \ }) #define HEXAGON_V64_GET_UH2(v) \ __extension__({ \ union { \ long long d; \ unsigned short uh[4]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.uh[2]; \ }) #define HEXAGON_V64_GET_UH3(v) \ __extension__({ \ union { \ long long d; \ unsigned short uh[4]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.uh[3]; \ }) /* Extract byte macros */ #define HEXAGON_V64_GET_B0(v) \ __extension__({ \ union { \ long long d; \ signed char b[8]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.b[0]; \ }) #define HEXAGON_V64_GET_B1(v) \ __extension__({ \ union { \ long long d; \ signed char b[8]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.b[1]; \ }) #define HEXAGON_V64_GET_B2(v) \ __extension__({ \ union { \ long long d; \ signed char b[8]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.b[2]; \ }) #define HEXAGON_V64_GET_B3(v) \ __extension__({ \ union { \ long long d; \ signed char b[8]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.b[3]; \ }) #define HEXAGON_V64_GET_B4(v) \ __extension__({ \ union { \ long long d; \ signed char b[8]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.b[4]; \ }) #define HEXAGON_V64_GET_B5(v) \ __extension__({ \ union { \ long long d; \ signed char b[8]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.b[5]; \ }) #define HEXAGON_V64_GET_B6(v) \ __extension__({ \ union { \ long long d; \ signed char b[8]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.b[6]; \ }) #define HEXAGON_V64_GET_B7(v) \ __extension__({ \ union { \ long long d; \ signed char b[8]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.b[7]; \ }) #define HEXAGON_V64_GET_UB0(v) \ __extension__({ \ union { \ long long d; \ unsigned char ub[8]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.ub[0]; \ }) #define HEXAGON_V64_GET_UB1(v) \ __extension__({ \ union { \ long long d; \ unsigned char ub[8]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.ub[1]; \ }) #define HEXAGON_V64_GET_UB2(v) \ __extension__({ \ union { \ long long d; \ unsigned char ub[8]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.ub[2]; \ }) #define HEXAGON_V64_GET_UB3(v) \ __extension__({ \ union { \ long long d; \ unsigned char ub[8]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.ub[3]; \ }) #define HEXAGON_V64_GET_UB4(v) \ __extension__({ \ union { \ long long d; \ unsigned char ub[8]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.ub[4]; \ }) #define HEXAGON_V64_GET_UB5(v) \ __extension__({ \ union { \ long long d; \ unsigned char ub[8]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.ub[5]; \ }) #define HEXAGON_V64_GET_UB6(v) \ __extension__({ \ union { \ long long d; \ unsigned char ub[8]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.ub[6]; \ }) #define HEXAGON_V64_GET_UB7(v) \ __extension__({ \ union { \ long long d; \ unsigned char ub[8]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.ub[7]; \ }) /* NOTE: All set macros return a HEXAGON_Vect64 type */ /* Set doubleword macro */ #define HEXAGON_V64_PUT_D(v, new) (new) /* Set word macros */ #ifdef __hexagon__ #define HEXAGON_V64_PUT_W0(v, new) \ __extension__({ \ union { \ long long d; \ int w[2]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.w[0] = (new); \ _HEXAGON_V64_internal_union.d; \ }) #define HEXAGON_V64_PUT_W1(v, new) \ __extension__({ \ union { \ long long d; \ int w[2]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.w[1] = (new); \ _HEXAGON_V64_internal_union.d; \ }) #else /* !__hexagon__ */ #define HEXAGON_V64_PUT_W0(v, new) \ (((v) & 0xffffffff00000000LL) | ((HEXAGON_Vect64)((unsigned int)(new)))) #define HEXAGON_V64_PUT_W1(v, new) \ (((v) & 0x00000000ffffffffLL) | (((HEXAGON_Vect64)(new)) << 32LL)) #endif /* !__hexagon__ */ /* Set half word macros */ #ifdef __hexagon__ #define HEXAGON_V64_PUT_H0(v, new) \ __extension__({ \ union { \ long long d; \ short h[4]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.h[0] = (new); \ _HEXAGON_V64_internal_union.d; \ }) #define HEXAGON_V64_PUT_H1(v, new) \ __extension__({ \ union { \ long long d; \ short h[4]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.h[1] = (new); \ _HEXAGON_V64_internal_union.d; \ }) #define HEXAGON_V64_PUT_H2(v, new) \ __extension__({ \ union { \ long long d; \ short h[4]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.h[2] = (new); \ _HEXAGON_V64_internal_union.d; \ }) #define HEXAGON_V64_PUT_H3(v, new) \ __extension__({ \ union { \ long long d; \ short h[4]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.h[3] = (new); \ _HEXAGON_V64_internal_union.d; \ }) #else /* !__hexagon__ */ #define HEXAGON_V64_PUT_H0(v, new) \ (((v) & 0xffffffffffff0000LL) | ((HEXAGON_Vect64)((unsigned short)(new)))) #define HEXAGON_V64_PUT_H1(v, new) \ (((v) & 0xffffffff0000ffffLL) | (((HEXAGON_Vect64)((unsigned short)(new))) << 16LL)) #define HEXAGON_V64_PUT_H2(v, new) \ (((v) & 0xffff0000ffffffffLL) | (((HEXAGON_Vect64)((unsigned short)(new))) << 32LL)) #define HEXAGON_V64_PUT_H3(v, new) \ (((v) & 0x0000ffffffffffffLL) | (((HEXAGON_Vect64)(new)) << 48LL)) #endif /* !__hexagon__ */ /* Set byte macros */ #ifdef __hexagon__ #define HEXAGON_V64_PUT_B0(v, new) \ __extension__({ \ union { \ long long d; \ char b[8]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.b[0] = (new); \ _HEXAGON_V64_internal_union.d; \ }) #define HEXAGON_V64_PUT_B1(v, new) \ __extension__({ \ union { \ long long d; \ char b[8]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.b[1] = (new); \ _HEXAGON_V64_internal_union.d; \ }) #define HEXAGON_V64_PUT_B2(v, new) \ __extension__({ \ union { \ long long d; \ char b[8]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.b[2] = (new); \ _HEXAGON_V64_internal_union.d; \ }) #define HEXAGON_V64_PUT_B3(v, new) \ __extension__({ \ union { \ long long d; \ char b[8]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.b[3] = (new); \ _HEXAGON_V64_internal_union.d; \ }) #define HEXAGON_V64_PUT_B4(v, new) \ __extension__({ \ union { \ long long d; \ char b[8]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.b[4] = (new); \ _HEXAGON_V64_internal_union.d; \ }) #define HEXAGON_V64_PUT_B5(v, new) \ __extension__({ \ union { \ long long d; \ char b[8]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.b[5] = (new); \ _HEXAGON_V64_internal_union.d; \ }) #define HEXAGON_V64_PUT_B6(v, new) \ __extension__({ \ union { \ long long d; \ char b[8]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.b[6] = (new); \ _HEXAGON_V64_internal_union.d; \ }) #define HEXAGON_V64_PUT_B7(v, new) \ __extension__({ \ union { \ long long d; \ char b[8]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.b[7] = (new); \ _HEXAGON_V64_internal_union.d; \ }) #else /* !__hexagon__ */ #define HEXAGON_V64_PUT_B0(v, new) \ (((v) & 0xffffffffffffff00LL) | ((HEXAGON_Vect64)((unsigned char)(new)))) #define HEXAGON_V64_PUT_B1(v, new) \ (((v) & 0xffffffffffff00ffLL) | (((HEXAGON_Vect64)((unsigned char)(new))) << 8LL)) #define HEXAGON_V64_PUT_B2(v, new) \ (((v) & 0xffffffffff00ffffLL) | (((HEXAGON_Vect64)((unsigned char)(new))) << 16LL)) #define HEXAGON_V64_PUT_B3(v, new) \ (((v) & 0xffffffff00ffffffLL) | (((HEXAGON_Vect64)((unsigned char)(new))) << 24LL)) #define HEXAGON_V64_PUT_B4(v, new) \ (((v) & 0xffffff00ffffffffLL) | (((HEXAGON_Vect64)((unsigned char)(new))) << 32LL)) #define HEXAGON_V64_PUT_B5(v, new) \ (((v) & 0xffff00ffffffffffLL) | (((HEXAGON_Vect64)((unsigned char)(new))) << 40LL)) #define HEXAGON_V64_PUT_B6(v, new) \ (((v) & 0xff00ffffffffffffLL) | (((HEXAGON_Vect64)((unsigned char)(new))) << 48LL)) #define HEXAGON_V64_PUT_B7(v, new) \ (((v) & 0x00ffffffffffffffLL) | (((HEXAGON_Vect64)(new)) << 56LL)) #endif /* !__hexagon__ */ /* NOTE: All create macros return a HEXAGON_Vect64 type */ /* Create from a doubleword */ #define HEXAGON_V64_CREATE_D(d) (d) /* Create from words */ #ifdef __hexagon__ #define HEXAGON_V64_CREATE_W(w1, w0) \ __extension__({ \ union { \ long long d; \ int w[2]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.w[0] = (w0); \ _HEXAGON_V64_internal_union.w[1] = (w1); \ _HEXAGON_V64_internal_union.d; \ }) #else /* !__hexagon__ */ #define HEXAGON_V64_CREATE_W(w1, w0) \ ((((HEXAGON_Vect64)(w1)) << 32LL) | ((HEXAGON_Vect64)((w0) & 0xffffffff))) #endif /* !__hexagon__ */ /* Create from half words */ #ifdef __hexagon__ #define HEXAGON_V64_CREATE_H(h3, h2, h1, h0) \ __extension__({ \ union { \ long long d; \ short h[4]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.h[0] = (h0); \ _HEXAGON_V64_internal_union.h[1] = (h1); \ _HEXAGON_V64_internal_union.h[2] = (h2); \ _HEXAGON_V64_internal_union.h[3] = (h3); \ _HEXAGON_V64_internal_union.d; \ }) #else /* !__hexagon__ */ #define HEXAGON_V64_CREATE_H(h3, h2, h1, h0) \ ((((HEXAGON_Vect64)(h3)) << 48LL) | (((HEXAGON_Vect64)((h2) & 0xffff)) << 32LL) | \ (((HEXAGON_Vect64)((h1) & 0xffff)) << 16LL) | ((HEXAGON_Vect64)((h0) & 0xffff))) #endif /* !__hexagon__ */ /* Create from bytes */ #ifdef __hexagon__ #define HEXAGON_V64_CREATE_B(b7, b6, b5, b4, b3, b2, b1, b0) \ __extension__({ \ union { \ long long d; \ char b[8]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.b[0] = (b0); \ _HEXAGON_V64_internal_union.b[1] = (b1); \ _HEXAGON_V64_internal_union.b[2] = (b2); \ _HEXAGON_V64_internal_union.b[3] = (b3); \ _HEXAGON_V64_internal_union.b[4] = (b4); \ _HEXAGON_V64_internal_union.b[5] = (b5); \ _HEXAGON_V64_internal_union.b[6] = (b6); \ _HEXAGON_V64_internal_union.b[7] = (b7); \ _HEXAGON_V64_internal_union.d; \ }) #else /* !__hexagon__ */ #define HEXAGON_V64_CREATE_B(b7, b6, b5, b4, b3, b2, b1, b0) \ ((((HEXAGON_Vect64)(b7)) << 56LL) | (((HEXAGON_Vect64)((b6) & 0xff)) << 48LL) | \ (((HEXAGON_Vect64)((b5) & 0xff)) << 40LL) | (((HEXAGON_Vect64)((b4) & 0xff)) << 32LL) | \ (((HEXAGON_Vect64)((b3) & 0xff)) << 24LL) | (((HEXAGON_Vect64)((b2) & 0xff)) << 16LL) | \ (((HEXAGON_Vect64)((b1) & 0xff)) << 8LL) | ((HEXAGON_Vect64)((b0) & 0xff))) #endif /* !__hexagon__ */ #ifdef __cplusplus class HEXAGON_Vect64C { public: // Constructors HEXAGON_Vect64C(long long d = 0) : data(d) {}; HEXAGON_Vect64C(int w1, int w0) : data(HEXAGON_V64_CREATE_W(w1, w0)) {}; HEXAGON_Vect64C(short h3, short h2, short h1, short h0) : data(HEXAGON_V64_CREATE_H(h3, h2, h1, h0)) {}; HEXAGON_Vect64C(signed char b7, signed char b6, signed char b5, signed char b4, signed char b3, signed char b2, signed char b1, signed char b0) : data(HEXAGON_V64_CREATE_B(b7, b6, b5, b4, b3, b2, b1, b0)) {}; HEXAGON_Vect64C(const HEXAGON_Vect64C &v) : data(v.data) {}; HEXAGON_Vect64C &operator=(const HEXAGON_Vect64C &v) { data = v.data; return *this; }; operator long long() { return data; }; // Extract doubleword methods long long D(void) { return HEXAGON_V64_GET_D(data); }; unsigned long long UD(void) { return HEXAGON_V64_GET_UD(data); }; // Extract word methods int W0(void) { return HEXAGON_V64_GET_W0(data); }; int W1(void) { return HEXAGON_V64_GET_W1(data); }; unsigned int UW0(void) { return HEXAGON_V64_GET_UW0(data); }; unsigned int UW1(void) { return HEXAGON_V64_GET_UW1(data); }; // Extract half word methods short H0(void) { return HEXAGON_V64_GET_H0(data); }; short H1(void) { return HEXAGON_V64_GET_H1(data); }; short H2(void) { return HEXAGON_V64_GET_H2(data); }; short H3(void) { return HEXAGON_V64_GET_H3(data); }; unsigned short UH0(void) { return HEXAGON_V64_GET_UH0(data); }; unsigned short UH1(void) { return HEXAGON_V64_GET_UH1(data); }; unsigned short UH2(void) { return HEXAGON_V64_GET_UH2(data); }; unsigned short UH3(void) { return HEXAGON_V64_GET_UH3(data); }; // Extract byte methods signed char B0(void) { return HEXAGON_V64_GET_B0(data); }; signed char B1(void) { return HEXAGON_V64_GET_B1(data); }; signed char B2(void) { return HEXAGON_V64_GET_B2(data); }; signed char B3(void) { return HEXAGON_V64_GET_B3(data); }; signed char B4(void) { return HEXAGON_V64_GET_B4(data); }; signed char B5(void) { return HEXAGON_V64_GET_B5(data); }; signed char B6(void) { return HEXAGON_V64_GET_B6(data); }; signed char B7(void) { return HEXAGON_V64_GET_B7(data); }; unsigned char UB0(void) { return HEXAGON_V64_GET_UB0(data); }; unsigned char UB1(void) { return HEXAGON_V64_GET_UB1(data); }; unsigned char UB2(void) { return HEXAGON_V64_GET_UB2(data); }; unsigned char UB3(void) { return HEXAGON_V64_GET_UB3(data); }; unsigned char UB4(void) { return HEXAGON_V64_GET_UB4(data); }; unsigned char UB5(void) { return HEXAGON_V64_GET_UB5(data); }; unsigned char UB6(void) { return HEXAGON_V64_GET_UB6(data); }; unsigned char UB7(void) { return HEXAGON_V64_GET_UB7(data); }; // NOTE: All set methods return a HEXAGON_Vect64C type // Set doubleword method HEXAGON_Vect64C D(long long d) { return HEXAGON_Vect64C(HEXAGON_V64_PUT_D(data, d)); }; // Set word methods HEXAGON_Vect64C W0(int w) { return HEXAGON_Vect64C(HEXAGON_V64_PUT_W0(data, w)); }; HEXAGON_Vect64C W1(int w) { return HEXAGON_Vect64C(HEXAGON_V64_PUT_W1(data, w)); }; // Set half word methods HEXAGON_Vect64C H0(short h) { return HEXAGON_Vect64C(HEXAGON_V64_PUT_H0(data, h)); }; HEXAGON_Vect64C H1(short h) { return HEXAGON_Vect64C(HEXAGON_V64_PUT_H1(data, h)); }; HEXAGON_Vect64C H2(short h) { return HEXAGON_Vect64C(HEXAGON_V64_PUT_H2(data, h)); }; HEXAGON_Vect64C H3(short h) { return HEXAGON_Vect64C(HEXAGON_V64_PUT_H3(data, h)); }; // Set byte methods HEXAGON_Vect64C B0(signed char b) { return HEXAGON_Vect64C(HEXAGON_V64_PUT_B0(data, b)); }; HEXAGON_Vect64C B1(signed char b) { return HEXAGON_Vect64C(HEXAGON_V64_PUT_B1(data, b)); }; HEXAGON_Vect64C B2(signed char b) { return HEXAGON_Vect64C(HEXAGON_V64_PUT_B2(data, b)); }; HEXAGON_Vect64C B3(signed char b) { return HEXAGON_Vect64C(HEXAGON_V64_PUT_B3(data, b)); }; HEXAGON_Vect64C B4(signed char b) { return HEXAGON_Vect64C(HEXAGON_V64_PUT_B4(data, b)); }; HEXAGON_Vect64C B5(signed char b) { return HEXAGON_Vect64C(HEXAGON_V64_PUT_B5(data, b)); }; HEXAGON_Vect64C B6(signed char b) { return HEXAGON_Vect64C(HEXAGON_V64_PUT_B6(data, b)); }; HEXAGON_Vect64C B7(signed char b) { return HEXAGON_Vect64C(HEXAGON_V64_PUT_B7(data, b)); }; private: long long data; }; #endif /* __cplusplus */ /* 32 Bit Vectors */ typedef int HEXAGON_Vect32; /* Extract word macros */ #define HEXAGON_V32_GET_W(v) (v) #define HEXAGON_V32_GET_UW(v) ((unsigned int)(v)) /* Extract half word macros */ #define HEXAGON_V32_GET_H0(v) \ __extension__({ \ union { \ int w; \ short h[2]; \ } _HEXAGON_V32_internal_union; \ _HEXAGON_V32_internal_union.w = (v); \ _HEXAGON_V32_internal_union.h[0]; \ }) #define HEXAGON_V32_GET_H1(v) \ __extension__({ \ union { \ int w; \ short h[2]; \ } _HEXAGON_V32_internal_union; \ _HEXAGON_V32_internal_union.w = (v); \ _HEXAGON_V32_internal_union.h[1]; \ }) #define HEXAGON_V32_GET_UH0(v) \ __extension__({ \ union { \ int w; \ unsigned short uh[2]; \ } _HEXAGON_V32_internal_union; \ _HEXAGON_V32_internal_union.w = (v); \ _HEXAGON_V32_internal_union.uh[0]; \ }) #define HEXAGON_V32_GET_UH1(v) \ __extension__({ \ union { \ int w; \ unsigned short uh[2]; \ } _HEXAGON_V32_internal_union; \ _HEXAGON_V32_internal_union.w = (v); \ _HEXAGON_V32_internal_union.uh[1]; \ }) /* Extract byte macros */ #define HEXAGON_V32_GET_B0(v) \ __extension__({ \ union { \ int w; \ signed char b[4]; \ } _HEXAGON_V32_internal_union; \ _HEXAGON_V32_internal_union.w = (v); \ _HEXAGON_V32_internal_union.b[0]; \ }) #define HEXAGON_V32_GET_B1(v) \ __extension__({ \ union { \ int w; \ signed char b[4]; \ } _HEXAGON_V32_internal_union; \ _HEXAGON_V32_internal_union.w = (v); \ _HEXAGON_V32_internal_union.b[1]; \ }) #define HEXAGON_V32_GET_B2(v) \ __extension__({ \ union { \ int w; \ signed char b[4]; \ } _HEXAGON_V32_internal_union; \ _HEXAGON_V32_internal_union.w = (v); \ _HEXAGON_V32_internal_union.b[2]; \ }) #define HEXAGON_V32_GET_B3(v) \ __extension__({ \ union { \ int w; \ signed char b[4]; \ } _HEXAGON_V32_internal_union; \ _HEXAGON_V32_internal_union.w = (v); \ _HEXAGON_V32_internal_union.b[3]; \ }) #define HEXAGON_V32_GET_UB0(v) \ __extension__({ \ union { \ int w; \ unsigned char ub[4]; \ } _HEXAGON_V32_internal_union; \ _HEXAGON_V32_internal_union.w = (v); \ _HEXAGON_V32_internal_union.ub[0]; \ }) #define HEXAGON_V32_GET_UB1(v) \ __extension__({ \ union { \ int w; \ unsigned char ub[4]; \ } _HEXAGON_V32_internal_union; \ _HEXAGON_V32_internal_union.w = (v); \ _HEXAGON_V32_internal_union.ub[1]; \ }) #define HEXAGON_V32_GET_UB2(v) \ __extension__({ \ union { \ int w; \ unsigned char ub[4]; \ } _HEXAGON_V32_internal_union; \ _HEXAGON_V32_internal_union.w = (v); \ _HEXAGON_V32_internal_union.ub[2]; \ }) #define HEXAGON_V32_GET_UB3(v) \ __extension__({ \ union { \ int w; \ unsigned char ub[4]; \ } _HEXAGON_V32_internal_union; \ _HEXAGON_V32_internal_union.w = (v); \ _HEXAGON_V32_internal_union.ub[3]; \ }) /* NOTE: All set macros return a HEXAGON_Vect32 type */ /* Set word macro */ #define HEXAGON_V32_PUT_W(v, new) (new) /* Set half word macros */ #ifdef __hexagon__ #define HEXAGON_V32_PUT_H0(v, new) \ __extension__({ \ union { \ int w; \ short h[2]; \ } _HEXAGON_V32_internal_union; \ _HEXAGON_V32_internal_union.w = (v); \ _HEXAGON_V32_internal_union.h[0] = (new); \ _HEXAGON_V32_internal_union.w; \ }) #define HEXAGON_V32_PUT_H1(v, new) \ __extension__({ \ union { \ int w; \ short h[2]; \ } _HEXAGON_V32_internal_union; \ _HEXAGON_V32_internal_union.w = (v); \ _HEXAGON_V32_internal_union.h[1] = (new); \ _HEXAGON_V32_internal_union.w; \ }) #else /* !__hexagon__ */ #define HEXAGON_V32_PUT_H0(v, new) \ (((v) & 0xffff0000) | ((HEXAGON_Vect32)((unsigned short)(new)))) #define HEXAGON_V32_PUT_H1(v, new) (((v) & 0x0000ffff) | (((HEXAGON_Vect32)(new)) << 16)) #endif /* !__hexagon__ */ /* Set byte macros */ #ifdef __hexagon__ #define HEXAGON_V32_PUT_B0(v, new) \ __extension__({ \ union { \ int w; \ char b[4]; \ } _HEXAGON_V32_internal_union; \ _HEXAGON_V32_internal_union.w = (v); \ _HEXAGON_V32_internal_union.b[0] = (new); \ _HEXAGON_V32_internal_union.w; \ }) #define HEXAGON_V32_PUT_B1(v, new) \ __extension__({ \ union { \ int w; \ char b[4]; \ } _HEXAGON_V32_internal_union; \ _HEXAGON_V32_internal_union.w = (v); \ _HEXAGON_V32_internal_union.b[1] = (new); \ _HEXAGON_V32_internal_union.w; \ }) #define HEXAGON_V32_PUT_B2(v, new) \ __extension__({ \ union { \ int w; \ char b[4]; \ } _HEXAGON_V32_internal_union; \ _HEXAGON_V32_internal_union.w = (v); \ _HEXAGON_V32_internal_union.b[2] = (new); \ _HEXAGON_V32_internal_union.w; \ }) #define HEXAGON_V32_PUT_B3(v, new) \ __extension__({ \ union { \ int w; \ char b[4]; \ } _HEXAGON_V32_internal_union; \ _HEXAGON_V32_internal_union.w = (v); \ _HEXAGON_V32_internal_union.b[3] = (new); \ _HEXAGON_V32_internal_union.w; \ }) #else /* !__hexagon__ */ #define HEXAGON_V32_PUT_B0(v, new) \ (((v) & 0xffffff00) | ((HEXAGON_Vect32)((unsigned char)(new)))) #define HEXAGON_V32_PUT_B1(v, new) \ (((v) & 0xffff00ff) | (((HEXAGON_Vect32)((unsigned char)(new))) << 8)) #define HEXAGON_V32_PUT_B2(v, new) \ (((v) & 0xff00ffff) | (((HEXAGON_Vect32)((unsigned char)(new))) << 16)) #define HEXAGON_V32_PUT_B3(v, new) (((v) & 0x00ffffff) | (((HEXAGON_Vect32)(new)) << 24)) #endif /* !__hexagon__ */ /* NOTE: All create macros return a HEXAGON_Vect32 type */ /* Create from a word */ #define HEXAGON_V32_CREATE_W(w) (w) /* Create from half words */ #ifdef __hexagon__ #define HEXAGON_V32_CREATE_H(h1, h0) \ __extension__({ \ union { \ long long d; \ short h[2]; \ } _HEXAGON_V32_internal_union; \ _HEXAGON_V32_internal_union.h[0] = (h0); \ _HEXAGON_V32_internal_union.h[1] = (h1); \ _HEXAGON_V32_internal_union.d; \ }) #else /* !__hexagon__ */ #define HEXAGON_V32_CREATE_H(h1, h0) \ ((((HEXAGON_Vect32)(h1)) << 16) | ((HEXAGON_Vect32)((h0) & 0xffff))) #endif /* !__hexagon__ */ /* Create from bytes */ #ifdef __hexagon__ #define HEXAGON_V32_CREATE_B(b3, b2, b1, b0) \ __extension__({ \ union { \ long long d; \ char b[4]; \ } _HEXAGON_V32_internal_union; \ _HEXAGON_V32_internal_union.b[0] = (b0); \ _HEXAGON_V32_internal_union.b[1] = (b1); \ _HEXAGON_V32_internal_union.b[2] = (b2); \ _HEXAGON_V32_internal_union.b[3] = (b3); \ _HEXAGON_V32_internal_union.d; \ }) #else /* !__hexagon__ */ #define HEXAGON_V32_CREATE_B(b3, b2, b1, b0) \ ((((HEXAGON_Vect32)(b3)) << 24) | (((HEXAGON_Vect32)((b2) & 0xff)) << 16) | \ (((HEXAGON_Vect32)((b1) & 0xff)) << 8) | ((HEXAGON_Vect32)((b0) & 0xff))) #endif /* !__hexagon__ */ #ifdef __cplusplus class HEXAGON_Vect32C { public: // Constructors HEXAGON_Vect32C(int w = 0) : data(w) {}; HEXAGON_Vect32C(short h1, short h0) : data(HEXAGON_V32_CREATE_H(h1, h0)) {}; HEXAGON_Vect32C(signed char b3, signed char b2, signed char b1, signed char b0) : data(HEXAGON_V32_CREATE_B(b3, b2, b1, b0)) {}; HEXAGON_Vect32C(const HEXAGON_Vect32C &v) : data(v.data) {}; HEXAGON_Vect32C &operator=(const HEXAGON_Vect32C &v) { data = v.data; return *this; }; operator int() { return data; }; // Extract word methods int W(void) { return HEXAGON_V32_GET_W(data); }; unsigned int UW(void) { return HEXAGON_V32_GET_UW(data); }; // Extract half word methods short H0(void) { return HEXAGON_V32_GET_H0(data); }; short H1(void) { return HEXAGON_V32_GET_H1(data); }; unsigned short UH0(void) { return HEXAGON_V32_GET_UH0(data); }; unsigned short UH1(void) { return HEXAGON_V32_GET_UH1(data); }; // Extract byte methods signed char B0(void) { return HEXAGON_V32_GET_B0(data); }; signed char B1(void) { return HEXAGON_V32_GET_B1(data); }; signed char B2(void) { return HEXAGON_V32_GET_B2(data); }; signed char B3(void) { return HEXAGON_V32_GET_B3(data); }; unsigned char UB0(void) { return HEXAGON_V32_GET_UB0(data); }; unsigned char UB1(void) { return HEXAGON_V32_GET_UB1(data); }; unsigned char UB2(void) { return HEXAGON_V32_GET_UB2(data); }; unsigned char UB3(void) { return HEXAGON_V32_GET_UB3(data); }; // NOTE: All set methods return a HEXAGON_Vect32C type // Set word method HEXAGON_Vect32C W(int w) { return HEXAGON_Vect32C(HEXAGON_V32_PUT_W(data, w)); }; // Set half word methods HEXAGON_Vect32C H0(short h) { return HEXAGON_Vect32C(HEXAGON_V32_PUT_H0(data, h)); }; HEXAGON_Vect32C H1(short h) { return HEXAGON_Vect32C(HEXAGON_V32_PUT_H1(data, h)); }; // Set byte methods HEXAGON_Vect32C B0(signed char b) { return HEXAGON_Vect32C(HEXAGON_V32_PUT_B0(data, b)); }; HEXAGON_Vect32C B1(signed char b) { return HEXAGON_Vect32C(HEXAGON_V32_PUT_B1(data, b)); }; HEXAGON_Vect32C B2(signed char b) { return HEXAGON_Vect32C(HEXAGON_V32_PUT_B2(data, b)); }; HEXAGON_Vect32C B3(signed char b) { return HEXAGON_Vect32C(HEXAGON_V32_PUT_B3(data, b)); }; private: int data; }; #endif /* __cplusplus */ // V65 Vector types #if __HVX_ARCH__ >= 65 #if defined __HVX__ && (__HVX_LENGTH__ == 128) typedef long HEXAGON_VecPred128 __attribute__((__vector_size__(128))) __attribute__((aligned(128))); typedef long HEXAGON_Vect1024 __attribute__((__vector_size__(128))) __attribute__((aligned(128))); typedef long HEXAGON_Vect2048 __attribute__((__vector_size__(256))) __attribute__((aligned(256))); typedef long HEXAGON_UVect1024 __attribute__((__vector_size__(128))) __attribute__((aligned(4))); typedef long HEXAGON_UVect2048 __attribute__((__vector_size__(256))) __attribute__((aligned(4))); #define HVX_VectorPred HEXAGON_VecPred128 #define HVX_Vector HEXAGON_Vect1024 #define HVX_VectorPair HEXAGON_Vect2048 #define HVX_UVector HEXAGON_UVect1024 #define HVX_UVectorPair HEXAGON_UVect2048 #else /* defined __HVX__ && (__HVX_LENGTH__ == 128) */ #if defined __HVX__ && (__HVX_LENGTH__ == 64) typedef long HEXAGON_VecPred64 __attribute__((__vector_size__(64))) __attribute__((aligned(64))); typedef long HEXAGON_Vect512 __attribute__((__vector_size__(64))) __attribute__((aligned(64))); typedef long HEXAGON_Vect1024 __attribute__((__vector_size__(128))) __attribute__((aligned(128))); typedef long HEXAGON_UVect512 __attribute__((__vector_size__(64))) __attribute__((aligned(4))); typedef long HEXAGON_UVect1024 __attribute__((__vector_size__(128))) __attribute__((aligned(4))); #define HVX_VectorPred HEXAGON_VecPred64 #define HVX_Vector HEXAGON_Vect512 #define HVX_VectorPair HEXAGON_Vect1024 #define HVX_UVector HEXAGON_UVect512 #define HVX_UVectorPair HEXAGON_UVect1024 #endif /* defined __HVX__ && (__HVX_LENGTH__ == 64) */ #endif /* defined __HVX__ && (__HVX_LENGTH__ == 128) */ #endif /* __HVX_ARCH__ >= 65 */ /* Predicates */ typedef int HEXAGON_Pred; /*** *** backward compatibility aliases ***/ /* Old names */ #define Q6Vect Q6Vect64 #define Q6V_GET_D Q6V64_GET_D #define Q6V_GET_UD Q6V64_GET_UD #define Q6V_GET_W0 Q6V64_GET_W0 #define Q6V_GET_W1 Q6V64_GET_W1 #define Q6V_GET_UW0 Q6V64_GET_UW0 #define Q6V_GET_UW1 Q6V64_GET_UW1 #define Q6V_GET_H0 Q6V64_GET_H0 #define Q6V_GET_H1 Q6V64_GET_H1 #define Q6V_GET_H2 Q6V64_GET_H2 #define Q6V_GET_H3 Q6V64_GET_H3 #define Q6V_GET_UH0 Q6V64_GET_UH0 #define Q6V_GET_UH1 Q6V64_GET_UH1 #define Q6V_GET_UH2 Q6V64_GET_UH2 #define Q6V_GET_UH3 Q6V64_GET_UH3 #define Q6V_GET_B0 Q6V64_GET_B0 #define Q6V_GET_B1 Q6V64_GET_B1 #define Q6V_GET_B2 Q6V64_GET_B2 #define Q6V_GET_B3 Q6V64_GET_B3 #define Q6V_GET_B4 Q6V64_GET_B4 #define Q6V_GET_B5 Q6V64_GET_B5 #define Q6V_GET_B6 Q6V64_GET_B6 #define Q6V_GET_B7 Q6V64_GET_B7 #define Q6V_GET_UB0 Q6V64_GET_UB0 #define Q6V_GET_UB1 Q6V64_GET_UB1 #define Q6V_GET_UB2 Q6V64_GET_UB2 #define Q6V_GET_UB3 Q6V64_GET_UB3 #define Q6V_GET_UB4 Q6V64_GET_UB4 #define Q6V_GET_UB5 Q6V64_GET_UB5 #define Q6V_GET_UB6 Q6V64_GET_UB6 #define Q6V_GET_UB7 Q6V64_GET_UB7 #define Q6V_PUT_D Q6V64_PUT_D #define Q6V_PUT_W0 Q6V64_PUT_W0 #define Q6V_PUT_W1 Q6V64_PUT_W1 #define Q6V_PUT_H0 Q6V64_PUT_H0 #define Q6V_PUT_H1 Q6V64_PUT_H1 #define Q6V_PUT_H2 Q6V64_PUT_H2 #define Q6V_PUT_H3 Q6V64_PUT_H3 #define Q6V_PUT_B0 Q6V64_PUT_B0 #define Q6V_PUT_B1 Q6V64_PUT_B1 #define Q6V_PUT_B2 Q6V64_PUT_B2 #define Q6V_PUT_B3 Q6V64_PUT_B3 #define Q6V_PUT_B4 Q6V64_PUT_B4 #define Q6V_PUT_B5 Q6V64_PUT_B5 #define Q6V_PUT_B6 Q6V64_PUT_B6 #define Q6V_PUT_B7 Q6V64_PUT_B7 #define Q6V_CREATE_D Q6V64_CREATE_D #define Q6V_CREATE_W Q6V64_CREATE_W #define Q6V_CREATE_H Q6V64_CREATE_H #define Q6V_CREATE_B Q6V64_CREATE_B #ifdef __cplusplus #define Q6VectC Q6Vect64C #endif /* __cplusplus */ /* 64 Bit Vectors */ typedef long long __attribute__((__may_alias__)) Q6Vect64; /* Extract doubleword macros */ #define Q6V64_GET_D(v) (v) #define Q6V64_GET_UD(v) ((unsigned long long)(v)) /* Extract word macros */ #define Q6V64_GET_W0(v) \ __extension__({ \ union { \ long long d; \ int w[2]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.w[0]; \ }) #define Q6V64_GET_W1(v) \ __extension__({ \ union { \ long long d; \ int w[2]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.w[1]; \ }) #define Q6V64_GET_UW0(v) \ __extension__({ \ union { \ long long d; \ unsigned int uw[2]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.uw[0]; \ }) #define Q6V64_GET_UW1(v) \ __extension__({ \ union { \ long long d; \ unsigned int uw[2]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.uw[1]; \ }) /* Extract half word macros */ #define Q6V64_GET_H0(v) \ __extension__({ \ union { \ long long d; \ short h[4]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.h[0]; \ }) #define Q6V64_GET_H1(v) \ __extension__({ \ union { \ long long d; \ short h[4]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.h[1]; \ }) #define Q6V64_GET_H2(v) \ __extension__({ \ union { \ long long d; \ short h[4]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.h[2]; \ }) #define Q6V64_GET_H3(v) \ __extension__({ \ union { \ long long d; \ short h[4]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.h[3]; \ }) #define Q6V64_GET_UH0(v) \ __extension__({ \ union { \ long long d; \ unsigned short uh[4]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.uh[0]; \ }) #define Q6V64_GET_UH1(v) \ __extension__({ \ union { \ long long d; \ unsigned short uh[4]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.uh[1]; \ }) #define Q6V64_GET_UH2(v) \ __extension__({ \ union { \ long long d; \ unsigned short uh[4]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.uh[2]; \ }) #define Q6V64_GET_UH3(v) \ __extension__({ \ union { \ long long d; \ unsigned short uh[4]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.uh[3]; \ }) /* Extract byte macros */ #define Q6V64_GET_B0(v) \ __extension__({ \ union { \ long long d; \ signed char b[8]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.b[0]; \ }) #define Q6V64_GET_B1(v) \ __extension__({ \ union { \ long long d; \ signed char b[8]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.b[1]; \ }) #define Q6V64_GET_B2(v) \ __extension__({ \ union { \ long long d; \ signed char b[8]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.b[2]; \ }) #define Q6V64_GET_B3(v) \ __extension__({ \ union { \ long long d; \ signed char b[8]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.b[3]; \ }) #define Q6V64_GET_B4(v) \ __extension__({ \ union { \ long long d; \ signed char b[8]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.b[4]; \ }) #define Q6V64_GET_B5(v) \ __extension__({ \ union { \ long long d; \ signed char b[8]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.b[5]; \ }) #define Q6V64_GET_B6(v) \ __extension__({ \ union { \ long long d; \ signed char b[8]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.b[6]; \ }) #define Q6V64_GET_B7(v) \ __extension__({ \ union { \ long long d; \ signed char b[8]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.b[7]; \ }) #define Q6V64_GET_UB0(v) \ __extension__({ \ union { \ long long d; \ unsigned char ub[8]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.ub[0]; \ }) #define Q6V64_GET_UB1(v) \ __extension__({ \ union { \ long long d; \ unsigned char ub[8]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.ub[1]; \ }) #define Q6V64_GET_UB2(v) \ __extension__({ \ union { \ long long d; \ unsigned char ub[8]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.ub[2]; \ }) #define Q6V64_GET_UB3(v) \ __extension__({ \ union { \ long long d; \ unsigned char ub[8]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.ub[3]; \ }) #define Q6V64_GET_UB4(v) \ __extension__({ \ union { \ long long d; \ unsigned char ub[8]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.ub[4]; \ }) #define Q6V64_GET_UB5(v) \ __extension__({ \ union { \ long long d; \ unsigned char ub[8]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.ub[5]; \ }) #define Q6V64_GET_UB6(v) \ __extension__({ \ union { \ long long d; \ unsigned char ub[8]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.ub[6]; \ }) #define Q6V64_GET_UB7(v) \ __extension__({ \ union { \ long long d; \ unsigned char ub[8]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.ub[7]; \ }) /* NOTE: All set macros return a Q6Vect64 type */ /* Set doubleword macro */ #define Q6V64_PUT_D(v, new) (new) /* Set word macros */ #ifdef __qdsp6__ #define Q6V64_PUT_W0(v, new) \ __extension__({ \ union { \ long long d; \ int w[2]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.w[0] = (new); \ _Q6V64_internal_union.d; \ }) #define Q6V64_PUT_W1(v, new) \ __extension__({ \ union { \ long long d; \ int w[2]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.w[1] = (new); \ _Q6V64_internal_union.d; \ }) #else /* !__qdsp6__ */ #define Q6V64_PUT_W0(v, new) \ (((v) & 0xffffffff00000000LL) | ((Q6Vect64)((unsigned int)(new)))) #define Q6V64_PUT_W1(v, new) \ (((v) & 0x00000000ffffffffLL) | (((Q6Vect64)(new)) << 32LL)) #endif /* !__qdsp6__ */ /* Set half word macros */ #ifdef __qdsp6__ #define Q6V64_PUT_H0(v, new) \ __extension__({ \ union { \ long long d; \ short h[4]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.h[0] = (new); \ _Q6V64_internal_union.d; \ }) #define Q6V64_PUT_H1(v, new) \ __extension__({ \ union { \ long long d; \ short h[4]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.h[1] = (new); \ _Q6V64_internal_union.d; \ }) #define Q6V64_PUT_H2(v, new) \ __extension__({ \ union { \ long long d; \ short h[4]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.h[2] = (new); \ _Q6V64_internal_union.d; \ }) #define Q6V64_PUT_H3(v, new) \ __extension__({ \ union { \ long long d; \ short h[4]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.h[3] = (new); \ _Q6V64_internal_union.d; \ }) #else /* !__qdsp6__ */ #define Q6V64_PUT_H0(v, new) \ (((v) & 0xffffffffffff0000LL) | ((Q6Vect64)((unsigned short)(new)))) #define Q6V64_PUT_H1(v, new) \ (((v) & 0xffffffff0000ffffLL) | (((Q6Vect64)((unsigned short)(new))) << 16LL)) #define Q6V64_PUT_H2(v, new) \ (((v) & 0xffff0000ffffffffLL) | (((Q6Vect64)((unsigned short)(new))) << 32LL)) #define Q6V64_PUT_H3(v, new) \ (((v) & 0x0000ffffffffffffLL) | (((Q6Vect64)(new)) << 48LL)) #endif /* !__qdsp6__ */ /* Set byte macros */ #ifdef __qdsp6__ #define Q6V64_PUT_B0(v, new) \ __extension__({ \ union { \ long long d; \ char b[8]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.b[0] = (new); \ _Q6V64_internal_union.d; \ }) #define Q6V64_PUT_B1(v, new) \ __extension__({ \ union { \ long long d; \ char b[8]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.b[1] = (new); \ _Q6V64_internal_union.d; \ }) #define Q6V64_PUT_B2(v, new) \ __extension__({ \ union { \ long long d; \ char b[8]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.b[2] = (new); \ _Q6V64_internal_union.d; \ }) #define Q6V64_PUT_B3(v, new) \ __extension__({ \ union { \ long long d; \ char b[8]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.b[3] = (new); \ _Q6V64_internal_union.d; \ }) #define Q6V64_PUT_B4(v, new) \ __extension__({ \ union { \ long long d; \ char b[8]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.b[4] = (new); \ _Q6V64_internal_union.d; \ }) #define Q6V64_PUT_B5(v, new) \ __extension__({ \ union { \ long long d; \ char b[8]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.b[5] = (new); \ _Q6V64_internal_union.d; \ }) #define Q6V64_PUT_B6(v, new) \ __extension__({ \ union { \ long long d; \ char b[8]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.b[6] = (new); \ _Q6V64_internal_union.d; \ }) #define Q6V64_PUT_B7(v, new) \ __extension__({ \ union { \ long long d; \ char b[8]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.b[7] = (new); \ _Q6V64_internal_union.d; \ }) #else /* !__qdsp6__ */ #define Q6V64_PUT_B0(v, new) \ (((v) & 0xffffffffffffff00LL) | ((Q6Vect64)((unsigned char)(new)))) #define Q6V64_PUT_B1(v, new) \ (((v) & 0xffffffffffff00ffLL) | (((Q6Vect64)((unsigned char)(new))) << 8LL)) #define Q6V64_PUT_B2(v, new) \ (((v) & 0xffffffffff00ffffLL) | (((Q6Vect64)((unsigned char)(new))) << 16LL)) #define Q6V64_PUT_B3(v, new) \ (((v) & 0xffffffff00ffffffLL) | (((Q6Vect64)((unsigned char)(new))) << 24LL)) #define Q6V64_PUT_B4(v, new) \ (((v) & 0xffffff00ffffffffLL) | (((Q6Vect64)((unsigned char)(new))) << 32LL)) #define Q6V64_PUT_B5(v, new) \ (((v) & 0xffff00ffffffffffLL) | (((Q6Vect64)((unsigned char)(new))) << 40LL)) #define Q6V64_PUT_B6(v, new) \ (((v) & 0xff00ffffffffffffLL) | (((Q6Vect64)((unsigned char)(new))) << 48LL)) #define Q6V64_PUT_B7(v, new) \ (((v) & 0x00ffffffffffffffLL) | (((Q6Vect64)(new)) << 56LL)) #endif /* !__qdsp6__ */ /* NOTE: All create macros return a Q6Vect64 type */ /* Create from a doubleword */ #define Q6V64_CREATE_D(d) (d) /* Create from words */ #ifdef __qdsp6__ #define Q6V64_CREATE_W(w1, w0) \ __extension__({ \ union { \ long long d; \ int w[2]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.w[0] = (w0); \ _Q6V64_internal_union.w[1] = (w1); \ _Q6V64_internal_union.d; \ }) #else /* !__qdsp6__ */ #define Q6V64_CREATE_W(w1, w0) \ ((((Q6Vect64)(w1)) << 32LL) | ((Q6Vect64)((w0) & 0xffffffff))) #endif /* !__qdsp6__ */ /* Create from half words */ #ifdef __qdsp6__ #define Q6V64_CREATE_H(h3, h2, h1, h0) \ __extension__({ \ union { \ long long d; \ short h[4]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.h[0] = (h0); \ _Q6V64_internal_union.h[1] = (h1); \ _Q6V64_internal_union.h[2] = (h2); \ _Q6V64_internal_union.h[3] = (h3); \ _Q6V64_internal_union.d; \ }) #else /* !__qdsp6__ */ #define Q6V64_CREATE_H(h3, h2, h1, h0) \ ((((Q6Vect64)(h3)) << 48LL) | (((Q6Vect64)((h2) & 0xffff)) << 32LL) | \ (((Q6Vect64)((h1) & 0xffff)) << 16LL) | ((Q6Vect64)((h0) & 0xffff))) #endif /* !__qdsp6__ */ /* Create from bytes */ #ifdef __qdsp6__ #define Q6V64_CREATE_B(b7, b6, b5, b4, b3, b2, b1, b0) \ __extension__({ \ union { \ long long d; \ char b[8]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.b[0] = (b0); \ _Q6V64_internal_union.b[1] = (b1); \ _Q6V64_internal_union.b[2] = (b2); \ _Q6V64_internal_union.b[3] = (b3); \ _Q6V64_internal_union.b[4] = (b4); \ _Q6V64_internal_union.b[5] = (b5); \ _Q6V64_internal_union.b[6] = (b6); \ _Q6V64_internal_union.b[7] = (b7); \ _Q6V64_internal_union.d; \ }) #else /* !__qdsp6__ */ #define Q6V64_CREATE_B(b7, b6, b5, b4, b3, b2, b1, b0) \ ((((Q6Vect64)(b7)) << 56LL) | (((Q6Vect64)((b6) & 0xff)) << 48LL) | \ (((Q6Vect64)((b5) & 0xff)) << 40LL) | (((Q6Vect64)((b4) & 0xff)) << 32LL) | \ (((Q6Vect64)((b3) & 0xff)) << 24LL) | (((Q6Vect64)((b2) & 0xff)) << 16LL) | \ (((Q6Vect64)((b1) & 0xff)) << 8LL) | ((Q6Vect64)((b0) & 0xff))) #endif /* !__qdsp6__ */ #ifdef __cplusplus class Q6Vect64C { public: // Constructors Q6Vect64C(long long d = 0) : data(d) {}; Q6Vect64C(int w1, int w0) : data(Q6V64_CREATE_W(w1, w0)) {}; Q6Vect64C(short h3, short h2, short h1, short h0) : data(Q6V64_CREATE_H(h3, h2, h1, h0)) {}; Q6Vect64C(signed char b7, signed char b6, signed char b5, signed char b4, signed char b3, signed char b2, signed char b1, signed char b0) : data(Q6V64_CREATE_B(b7, b6, b5, b4, b3, b2, b1, b0)) {}; Q6Vect64C(const Q6Vect64C &v) : data(v.data) {}; Q6Vect64C &operator=(const Q6Vect64C &v) { data = v.data; return *this; }; operator long long() { return data; }; // Extract doubleword methods long long D(void) { return Q6V64_GET_D(data); }; unsigned long long UD(void) { return Q6V64_GET_UD(data); }; // Extract word methods int W0(void) { return Q6V64_GET_W0(data); }; int W1(void) { return Q6V64_GET_W1(data); }; unsigned int UW0(void) { return Q6V64_GET_UW0(data); }; unsigned int UW1(void) { return Q6V64_GET_UW1(data); }; // Extract half word methods short H0(void) { return Q6V64_GET_H0(data); }; short H1(void) { return Q6V64_GET_H1(data); }; short H2(void) { return Q6V64_GET_H2(data); }; short H3(void) { return Q6V64_GET_H3(data); }; unsigned short UH0(void) { return Q6V64_GET_UH0(data); }; unsigned short UH1(void) { return Q6V64_GET_UH1(data); }; unsigned short UH2(void) { return Q6V64_GET_UH2(data); }; unsigned short UH3(void) { return Q6V64_GET_UH3(data); }; // Extract byte methods signed char B0(void) { return Q6V64_GET_B0(data); }; signed char B1(void) { return Q6V64_GET_B1(data); }; signed char B2(void) { return Q6V64_GET_B2(data); }; signed char B3(void) { return Q6V64_GET_B3(data); }; signed char B4(void) { return Q6V64_GET_B4(data); }; signed char B5(void) { return Q6V64_GET_B5(data); }; signed char B6(void) { return Q6V64_GET_B6(data); }; signed char B7(void) { return Q6V64_GET_B7(data); }; unsigned char UB0(void) { return Q6V64_GET_UB0(data); }; unsigned char UB1(void) { return Q6V64_GET_UB1(data); }; unsigned char UB2(void) { return Q6V64_GET_UB2(data); }; unsigned char UB3(void) { return Q6V64_GET_UB3(data); }; unsigned char UB4(void) { return Q6V64_GET_UB4(data); }; unsigned char UB5(void) { return Q6V64_GET_UB5(data); }; unsigned char UB6(void) { return Q6V64_GET_UB6(data); }; unsigned char UB7(void) { return Q6V64_GET_UB7(data); }; // NOTE: All set methods return a Q6Vect64C type // Set doubleword method Q6Vect64C D(long long d) { return Q6Vect64C(Q6V64_PUT_D(data, d)); }; // Set word methods Q6Vect64C W0(int w) { return Q6Vect64C(Q6V64_PUT_W0(data, w)); }; Q6Vect64C W1(int w) { return Q6Vect64C(Q6V64_PUT_W1(data, w)); }; // Set half word methods Q6Vect64C H0(short h) { return Q6Vect64C(Q6V64_PUT_H0(data, h)); }; Q6Vect64C H1(short h) { return Q6Vect64C(Q6V64_PUT_H1(data, h)); }; Q6Vect64C H2(short h) { return Q6Vect64C(Q6V64_PUT_H2(data, h)); }; Q6Vect64C H3(short h) { return Q6Vect64C(Q6V64_PUT_H3(data, h)); }; // Set byte methods Q6Vect64C B0(signed char b) { return Q6Vect64C(Q6V64_PUT_B0(data, b)); }; Q6Vect64C B1(signed char b) { return Q6Vect64C(Q6V64_PUT_B1(data, b)); }; Q6Vect64C B2(signed char b) { return Q6Vect64C(Q6V64_PUT_B2(data, b)); }; Q6Vect64C B3(signed char b) { return Q6Vect64C(Q6V64_PUT_B3(data, b)); }; Q6Vect64C B4(signed char b) { return Q6Vect64C(Q6V64_PUT_B4(data, b)); }; Q6Vect64C B5(signed char b) { return Q6Vect64C(Q6V64_PUT_B5(data, b)); }; Q6Vect64C B6(signed char b) { return Q6Vect64C(Q6V64_PUT_B6(data, b)); }; Q6Vect64C B7(signed char b) { return Q6Vect64C(Q6V64_PUT_B7(data, b)); }; private: long long data; }; #endif /* __cplusplus */ /* 32 Bit Vectors */ typedef int Q6Vect32; /* Extract word macros */ #define Q6V32_GET_W(v) (v) #define Q6V32_GET_UW(v) ((unsigned int)(v)) /* Extract half word macros */ #define Q6V32_GET_H0(v) \ __extension__({ \ union { \ int w; \ short h[2]; \ } _Q6V32_internal_union; \ _Q6V32_internal_union.w = (v); \ _Q6V32_internal_union.h[0]; \ }) #define Q6V32_GET_H1(v) \ __extension__({ \ union { \ int w; \ short h[2]; \ } _Q6V32_internal_union; \ _Q6V32_internal_union.w = (v); \ _Q6V32_internal_union.h[1]; \ }) #define Q6V32_GET_UH0(v) \ __extension__({ \ union { \ int w; \ unsigned short uh[2]; \ } _Q6V32_internal_union; \ _Q6V32_internal_union.w = (v); \ _Q6V32_internal_union.uh[0]; \ }) #define Q6V32_GET_UH1(v) \ __extension__({ \ union { \ int w; \ unsigned short uh[2]; \ } _Q6V32_internal_union; \ _Q6V32_internal_union.w = (v); \ _Q6V32_internal_union.uh[1]; \ }) /* Extract byte macros */ #define Q6V32_GET_B0(v) \ __extension__({ \ union { \ int w; \ signed char b[4]; \ } _Q6V32_internal_union; \ _Q6V32_internal_union.w = (v); \ _Q6V32_internal_union.b[0]; \ }) #define Q6V32_GET_B1(v) \ __extension__({ \ union { \ int w; \ signed char b[4]; \ } _Q6V32_internal_union; \ _Q6V32_internal_union.w = (v); \ _Q6V32_internal_union.b[1]; \ }) #define Q6V32_GET_B2(v) \ __extension__({ \ union { \ int w; \ signed char b[4]; \ } _Q6V32_internal_union; \ _Q6V32_internal_union.w = (v); \ _Q6V32_internal_union.b[2]; \ }) #define Q6V32_GET_B3(v) \ __extension__({ \ union { \ int w; \ signed char b[4]; \ } _Q6V32_internal_union; \ _Q6V32_internal_union.w = (v); \ _Q6V32_internal_union.b[3]; \ }) #define Q6V32_GET_UB0(v) \ __extension__({ \ union { \ int w; \ unsigned char ub[4]; \ } _Q6V32_internal_union; \ _Q6V32_internal_union.w = (v); \ _Q6V32_internal_union.ub[0]; \ }) #define Q6V32_GET_UB1(v) \ __extension__({ \ union { \ int w; \ unsigned char ub[4]; \ } _Q6V32_internal_union; \ _Q6V32_internal_union.w = (v); \ _Q6V32_internal_union.ub[1]; \ }) #define Q6V32_GET_UB2(v) \ __extension__({ \ union { \ int w; \ unsigned char ub[4]; \ } _Q6V32_internal_union; \ _Q6V32_internal_union.w = (v); \ _Q6V32_internal_union.ub[2]; \ }) #define Q6V32_GET_UB3(v) \ __extension__({ \ union { \ int w; \ unsigned char ub[4]; \ } _Q6V32_internal_union; \ _Q6V32_internal_union.w = (v); \ _Q6V32_internal_union.ub[3]; \ }) /* NOTE: All set macros return a Q6Vect32 type */ /* Set word macro */ #define Q6V32_PUT_W(v, new) (new) /* Set half word macros */ #ifdef __qdsp6__ #define Q6V32_PUT_H0(v, new) \ __extension__({ \ union { \ int w; \ short h[2]; \ } _Q6V32_internal_union; \ _Q6V32_internal_union.w = (v); \ _Q6V32_internal_union.h[0] = (new); \ _Q6V32_internal_union.w; \ }) #define Q6V32_PUT_H1(v, new) \ __extension__({ \ union { \ int w; \ short h[2]; \ } _Q6V32_internal_union; \ _Q6V32_internal_union.w = (v); \ _Q6V32_internal_union.h[1] = (new); \ _Q6V32_internal_union.w; \ }) #else /* !__qdsp6__ */ #define Q6V32_PUT_H0(v, new) \ (((v) & 0xffff0000) | ((Q6Vect32)((unsigned short)(new)))) #define Q6V32_PUT_H1(v, new) (((v) & 0x0000ffff) | (((Q6Vect32)(new)) << 16)) #endif /* !__qdsp6__ */ /* Set byte macros */ #ifdef __qdsp6__ #define Q6V32_PUT_B0(v, new) \ __extension__({ \ union { \ int w; \ char b[4]; \ } _Q6V32_internal_union; \ _Q6V32_internal_union.w = (v); \ _Q6V32_internal_union.b[0] = (new); \ _Q6V32_internal_union.w; \ }) #define Q6V32_PUT_B1(v, new) \ __extension__({ \ union { \ int w; \ char b[4]; \ } _Q6V32_internal_union; \ _Q6V32_internal_union.w = (v); \ _Q6V32_internal_union.b[1] = (new); \ _Q6V32_internal_union.w; \ }) #define Q6V32_PUT_B2(v, new) \ __extension__({ \ union { \ int w; \ char b[4]; \ } _Q6V32_internal_union; \ _Q6V32_internal_union.w = (v); \ _Q6V32_internal_union.b[2] = (new); \ _Q6V32_internal_union.w; \ }) #define Q6V32_PUT_B3(v, new) \ __extension__({ \ union { \ int w; \ char b[4]; \ } _Q6V32_internal_union; \ _Q6V32_internal_union.w = (v); \ _Q6V32_internal_union.b[3] = (new); \ _Q6V32_internal_union.w; \ }) #else /* !__qdsp6__ */ #define Q6V32_PUT_B0(v, new) \ (((v) & 0xffffff00) | ((Q6Vect32)((unsigned char)(new)))) #define Q6V32_PUT_B1(v, new) \ (((v) & 0xffff00ff) | (((Q6Vect32)((unsigned char)(new))) << 8)) #define Q6V32_PUT_B2(v, new) \ (((v) & 0xff00ffff) | (((Q6Vect32)((unsigned char)(new))) << 16)) #define Q6V32_PUT_B3(v, new) (((v) & 0x00ffffff) | (((Q6Vect32)(new)) << 24)) #endif /* !__qdsp6__ */ /* NOTE: All create macros return a Q6Vect32 type */ /* Create from a word */ #define Q6V32_CREATE_W(w) (w) /* Create from half words */ #ifdef __qdsp6__ #define Q6V32_CREATE_H(h1, h0) \ __extension__({ \ union { \ long long d; \ short h[2]; \ } _Q6V32_internal_union; \ _Q6V32_internal_union.h[0] = (h0); \ _Q6V32_internal_union.h[1] = (h1); \ _Q6V32_internal_union.d; \ }) #else /* !__qdsp6__ */ #define Q6V32_CREATE_H(h1, h0) \ ((((Q6Vect32)(h1)) << 16) | ((Q6Vect32)((h0) & 0xffff))) #endif /* !__qdsp6__ */ /* Create from bytes */ #ifdef __qdsp6__ #define Q6V32_CREATE_B(b3, b2, b1, b0) \ __extension__({ \ union { \ long long d; \ char b[4]; \ } _Q6V32_internal_union; \ _Q6V32_internal_union.b[0] = (b0); \ _Q6V32_internal_union.b[1] = (b1); \ _Q6V32_internal_union.b[2] = (b2); \ _Q6V32_internal_union.b[3] = (b3); \ _Q6V32_internal_union.d; \ }) #else /* !__qdsp6__ */ #define Q6V32_CREATE_B(b3, b2, b1, b0) \ ((((Q6Vect32)(b3)) << 24) | (((Q6Vect32)((b2) & 0xff)) << 16) | \ (((Q6Vect32)((b1) & 0xff)) << 8) | ((Q6Vect32)((b0) & 0xff))) #endif /* !__qdsp6__ */ #ifdef __cplusplus class Q6Vect32C { public: // Constructors Q6Vect32C(int w = 0) : data(w) {}; Q6Vect32C(short h1, short h0) : data(Q6V32_CREATE_H(h1, h0)) {}; Q6Vect32C(signed char b3, signed char b2, signed char b1, signed char b0) : data(Q6V32_CREATE_B(b3, b2, b1, b0)) {}; Q6Vect32C(const Q6Vect32C &v) : data(v.data) {}; Q6Vect32C &operator=(const Q6Vect32C &v) { data = v.data; return *this; }; operator int() { return data; }; // Extract word methods int W(void) { return Q6V32_GET_W(data); }; unsigned int UW(void) { return Q6V32_GET_UW(data); }; // Extract half word methods short H0(void) { return Q6V32_GET_H0(data); }; short H1(void) { return Q6V32_GET_H1(data); }; unsigned short UH0(void) { return Q6V32_GET_UH0(data); }; unsigned short UH1(void) { return Q6V32_GET_UH1(data); }; // Extract byte methods signed char B0(void) { return Q6V32_GET_B0(data); }; signed char B1(void) { return Q6V32_GET_B1(data); }; signed char B2(void) { return Q6V32_GET_B2(data); }; signed char B3(void) { return Q6V32_GET_B3(data); }; unsigned char UB0(void) { return Q6V32_GET_UB0(data); }; unsigned char UB1(void) { return Q6V32_GET_UB1(data); }; unsigned char UB2(void) { return Q6V32_GET_UB2(data); }; unsigned char UB3(void) { return Q6V32_GET_UB3(data); }; // NOTE: All set methods return a Q6Vect32C type // Set word method Q6Vect32C W(int w) { return Q6Vect32C(Q6V32_PUT_W(data, w)); }; // Set half word methods Q6Vect32C H0(short h) { return Q6Vect32C(Q6V32_PUT_H0(data, h)); }; Q6Vect32C H1(short h) { return Q6Vect32C(Q6V32_PUT_H1(data, h)); }; // Set byte methods Q6Vect32C B0(signed char b) { return Q6Vect32C(Q6V32_PUT_B0(data, b)); }; Q6Vect32C B1(signed char b) { return Q6Vect32C(Q6V32_PUT_B1(data, b)); }; Q6Vect32C B2(signed char b) { return Q6Vect32C(Q6V32_PUT_B2(data, b)); }; Q6Vect32C B3(signed char b) { return Q6Vect32C(Q6V32_PUT_B3(data, b)); }; private: int data; }; #endif /* __cplusplus */ // V65 Vector types #if __HVX_ARCH__ >= 65 #if defined __HVX__ && (__HVX_LENGTH__ == 128) typedef long Q6VecPred128 __attribute__((__vector_size__(128))) __attribute__((aligned(128))); typedef long Q6Vect1024 __attribute__((__vector_size__(128))) __attribute__((aligned(128))); typedef long Q6Vect2048 __attribute__((__vector_size__(256))) __attribute__((aligned(256))); #else /* defined __HVX__ && (__HVX_LENGTH__ == 128) */ #if defined __HVX__ && (__HVX_LENGTH__ == 64) typedef long Q6VecPred64 __attribute__((__vector_size__(64))) __attribute__((aligned(64))); typedef long Q6Vect512 __attribute__((__vector_size__(64))) __attribute__((aligned(64))); typedef long Q6Vect1024 __attribute__((__vector_size__(128))) __attribute__((aligned(128))); #endif /* defined __HVX__ && (__HVX_LENGTH__ == 64) */ #endif /* defined __HVX__ && (__HVX_LENGTH__ == 128) */ #endif /* __HVX_ARCH__ >= 65 */ /* Predicates */ typedef int Q6Pred; #ifdef __HVX__ // Extract HVX VectorPair macro. #define HEXAGON_HVX_GET_W(v) (v) // Extract HVX Vector macros. #define HEXAGON_HVX_GET_V0(v) \ __extension__({ \ union { \ HVX_VectorPair W; \ HVX_Vector V[2]; \ } _HEXAGON_HVX_internal_union; \ _HEXAGON_HVX_internal_union.W = (v); \ _HEXAGON_HVX_internal_union.V[0]; \ }) #define HEXAGON_HVX_GET_V1(v) \ __extension__({ \ union { \ HVX_VectorPair W; \ HVX_Vector V[2]; \ } _HEXAGON_HVX_internal_union; \ _HEXAGON_HVX_internal_union.W = (v); \ _HEXAGON_HVX_internal_union.V[1]; \ }) #define HEXAGON_HVX_GET_P(v) \ __extension__({ \ union { \ HVX_VectorPair W; \ HVX_VectorPred P[2]; \ } _HEXAGON_HVX_internal_union; \ _HEXAGON_HVX_internal_union.W = (v); \ _HEXAGON_HVX_internal_union.P[0]; \ }) // Set HVX VectorPair macro. #define HEXAGON_HVX_PUT_W(v, new) (new) // Set HVX Vector macros. #define HEXAGON_HVX_PUT_V0(v, new) \ __extension__({ \ union { \ HVX_VectorPair W; \ HVX_Vector V[2]; \ } _HEXAGON_HVX_internal_union; \ _HEXAGON_HVX_internal_union.W = (v); \ _HEXAGON_HVX_internal_union.V[0] = (new); \ _HEXAGON_HVX_internal_union.W; \ }) #define HEXAGON_HVX_PUT_V1(v, new) \ __extension__({ \ union { \ HVX_VectorPair W; \ HVX_Vector V[2]; \ } _HEXAGON_HVX_internal_union; \ _HEXAGON_HVX_internal_union.W = (v); \ _HEXAGON_HVX_internal_union.V[1] = (new); \ _HEXAGON_HVX_internal_union.W; \ }) #define HEXAGON_HVX_PUT_P(v, new) \ __extension__({ \ union { \ HVX_VectorPair W; \ HVX_VectorPred P[2]; \ } _HEXAGON_HVX_internal_union; \ _HEXAGON_HVX_internal_union.W = (v); \ _HEXAGON_HVX_internal_union.P[0] = (new); \ _HEXAGON_HVX_internal_union.W; \ }) #define HEXAGON_HVX_CREATE_W(v1, v0) \ __extension__({ \ union { \ HVX_VectorPair W; \ HVX_Vector V[2]; \ } _HEXAGON_HVX_internal_union; \ _HEXAGON_HVX_internal_union.V[0] = (v0); \ _HEXAGON_HVX_internal_union.V[1] = (v1); \ _HEXAGON_HVX_internal_union.W; \ }) #ifdef __cplusplus class HVX_Vect { public: // Constructors. // Default. HVX_Vect() : data(Q6_W_vcombine_VV(Q6_V_vzero(), Q6_V_vzero())){}; // Custom constructors. HVX_Vect(HVX_VectorPair W) : data(W){}; HVX_Vect(HVX_Vector v1, HVX_Vector v0) : data(HEXAGON_HVX_CREATE_W(v1, v0)){}; // Copy constructor. HVX_Vect(const HVX_Vect &W) = default; // Move constructor. HVX_Vect(HVX_Vect &&W) = default; // Assignment operator. HVX_Vect &operator=(const HVX_Vect &W) = default; operator HVX_VectorPair() { return data; }; // Extract VectorPair method. HVX_VectorPair W(void) { return HEXAGON_HVX_GET_W(data); }; // Extract Vector methods. HVX_Vector V0(void) { return HEXAGON_HVX_GET_V0(data); }; HVX_Vector V1(void) { return HEXAGON_HVX_GET_V1(data); }; HVX_VectorPred P(void) { return HEXAGON_HVX_GET_P(data); }; // NOTE: All set methods return a HVX_Vect type. // Set HVX VectorPair method. HVX_Vect W(HVX_VectorPair w) { return HVX_Vect(HEXAGON_HVX_PUT_W(data, w)); }; // Set HVX Vector methods. HVX_Vect V0(HVX_Vector v) { return HVX_Vect(HEXAGON_HVX_PUT_V0(data, v)); }; HVX_Vect V1(HVX_Vector v) { return HVX_Vect(HEXAGON_HVX_PUT_V1(data, v)); }; HVX_Vect P(HVX_VectorPred p) { return HVX_Vect(HEXAGON_HVX_PUT_P(data, p)); }; private: HVX_VectorPair data; }; #endif /* __cplusplus */ #endif /* __HVX__ */ #define HEXAGON_UDMA_DM0_STATUS_IDLE 0x00000000 #define HEXAGON_UDMA_DM0_STATUS_RUN 0x00000001 #define HEXAGON_UDMA_DM0_STATUS_ERROR 0x00000002 #define HEXAGON_UDMA_DESC_DSTATE_INCOMPLETE 0 #define HEXAGON_UDMA_DESC_DSTATE_COMPLETE 1 #define HEXAGON_UDMA_DESC_ORDER_NOORDER 0 #define HEXAGON_UDMA_DESC_ORDER_ORDER 1 #define HEXAGON_UDMA_DESC_BYPASS_OFF 0 #define HEXAGON_UDMA_DESC_BYPASS_ON 1 #define HEXAGON_UDMA_DESC_COMP_NONE 0 #define HEXAGON_UDMA_DESC_COMP_DLBC 1 #define HEXAGON_UDMA_DESC_DESCTYPE_TYPE0 0 #define HEXAGON_UDMA_DESC_DESCTYPE_TYPE1 1 typedef struct hexagon_udma_descriptor_type0_s { void *next; unsigned int length:24; unsigned int desctype:2; unsigned int dstcomp:1; unsigned int srccomp:1; unsigned int dstbypass:1; unsigned int srcbypass:1; unsigned int order:1; unsigned int dstate:1; void *src; void *dst; } hexagon_udma_descriptor_type0_t; typedef struct hexagon_udma_descriptor_type1_s { void *next; unsigned int length:24; unsigned int desctype:2; unsigned int dstcomp:1; unsigned int srccomp:1; unsigned int dstbypass:1; unsigned int srcbypass:1; unsigned int order:1; unsigned int dstate:1; void *src; void *dst; unsigned int allocation:28; unsigned int padding:4; unsigned int roiwidth:16; unsigned int roiheight:16; unsigned int srcstride:16; unsigned int dststride:16; unsigned int srcwidthoffset:16; unsigned int dstwidthoffset:16; } hexagon_udma_descriptor_type1_t; #endif /* !HEXAGON_TYPES_H */ iso646.hpkuintrin.h/*===---- pmmintrin.h - SSE3 intrinsics ------------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __PMMINTRIN_H #define __PMMINTRIN_H #if !defined(__i386__) && !defined(__x86_64__) #error "This header is only meant to be used on x86 and x64 architecture" #endif #include /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, \ __target__("sse3,no-evex512"), __min_vector_width__(128))) /// Loads data from an unaligned memory location to elements in a 128-bit /// vector. /// /// If the address of the data is not 16-byte aligned, the instruction may /// read two adjacent aligned blocks of memory to retrieve the requested /// data. /// /// \headerfile /// /// This intrinsic corresponds to the VLDDQU instruction. /// /// \param __p /// A pointer to a 128-bit integer vector containing integer values. /// \returns A 128-bit vector containing the moved values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_lddqu_si128(__m128i_u const *__p) { return (__m128i)__builtin_ia32_lddqu((char const *)__p); } /// Adds the even-indexed values and subtracts the odd-indexed values of /// two 128-bit vectors of [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VADDSUBPS instruction. /// /// \param __a /// A 128-bit vector of [4 x float] containing the left source operand. /// \param __b /// A 128-bit vector of [4 x float] containing the right source operand. /// \returns A 128-bit vector of [4 x float] containing the alternating sums and /// differences of both operands. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_addsub_ps(__m128 __a, __m128 __b) { return __builtin_ia32_addsubps((__v4sf)__a, (__v4sf)__b); } /// Horizontally adds the adjacent pairs of values contained in two /// 128-bit vectors of [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VHADDPS instruction. /// /// \param __a /// A 128-bit vector of [4 x float] containing one of the source operands. /// The horizontal sums of the values are stored in the lower bits of the /// destination. /// \param __b /// A 128-bit vector of [4 x float] containing one of the source operands. /// The horizontal sums of the values are stored in the upper bits of the /// destination. /// \returns A 128-bit vector of [4 x float] containing the horizontal sums of /// both operands. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_hadd_ps(__m128 __a, __m128 __b) { return __builtin_ia32_haddps((__v4sf)__a, (__v4sf)__b); } /// Horizontally subtracts the adjacent pairs of values contained in two /// 128-bit vectors of [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VHSUBPS instruction. /// /// \param __a /// A 128-bit vector of [4 x float] containing one of the source operands. /// The horizontal differences between the values are stored in the lower /// bits of the destination. /// \param __b /// A 128-bit vector of [4 x float] containing one of the source operands. /// The horizontal differences between the values are stored in the upper /// bits of the destination. /// \returns A 128-bit vector of [4 x float] containing the horizontal /// differences of both operands. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_hsub_ps(__m128 __a, __m128 __b) { return __builtin_ia32_hsubps((__v4sf)__a, (__v4sf)__b); } /// Moves and duplicates odd-indexed values from a 128-bit vector /// of [4 x float] to float values stored in a 128-bit vector of /// [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVSHDUP instruction. /// /// \param __a /// A 128-bit vector of [4 x float]. \n /// Bits [127:96] of the source are written to bits [127:96] and [95:64] of /// the destination. \n /// Bits [63:32] of the source are written to bits [63:32] and [31:0] of the /// destination. /// \returns A 128-bit vector of [4 x float] containing the moved and duplicated /// values. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_movehdup_ps(__m128 __a) { return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 1, 1, 3, 3); } /// Duplicates even-indexed values from a 128-bit vector of /// [4 x float] to float values stored in a 128-bit vector of [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVSLDUP instruction. /// /// \param __a /// A 128-bit vector of [4 x float] \n /// Bits [95:64] of the source are written to bits [127:96] and [95:64] of /// the destination. \n /// Bits [31:0] of the source are written to bits [63:32] and [31:0] of the /// destination. /// \returns A 128-bit vector of [4 x float] containing the moved and duplicated /// values. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_moveldup_ps(__m128 __a) { return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 0, 2, 2); } /// Adds the even-indexed values and subtracts the odd-indexed values of /// two 128-bit vectors of [2 x double]. /// /// \headerfile /// /// This intrinsic corresponds to the VADDSUBPD instruction. /// /// \param __a /// A 128-bit vector of [2 x double] containing the left source operand. /// \param __b /// A 128-bit vector of [2 x double] containing the right source operand. /// \returns A 128-bit vector of [2 x double] containing the alternating sums /// and differences of both operands. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_addsub_pd(__m128d __a, __m128d __b) { return __builtin_ia32_addsubpd((__v2df)__a, (__v2df)__b); } /// Horizontally adds the pairs of values contained in two 128-bit /// vectors of [2 x double]. /// /// \headerfile /// /// This intrinsic corresponds to the VHADDPD instruction. /// /// \param __a /// A 128-bit vector of [2 x double] containing one of the source operands. /// The horizontal sum of the values is stored in the lower bits of the /// destination. /// \param __b /// A 128-bit vector of [2 x double] containing one of the source operands. /// The horizontal sum of the values is stored in the upper bits of the /// destination. /// \returns A 128-bit vector of [2 x double] containing the horizontal sums of /// both operands. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_hadd_pd(__m128d __a, __m128d __b) { return __builtin_ia32_haddpd((__v2df)__a, (__v2df)__b); } /// Horizontally subtracts the pairs of values contained in two 128-bit /// vectors of [2 x double]. /// /// \headerfile /// /// This intrinsic corresponds to the VHSUBPD instruction. /// /// \param __a /// A 128-bit vector of [2 x double] containing one of the source operands. /// The horizontal difference of the values is stored in the lower bits of /// the destination. /// \param __b /// A 128-bit vector of [2 x double] containing one of the source operands. /// The horizontal difference of the values is stored in the upper bits of /// the destination. /// \returns A 128-bit vector of [2 x double] containing the horizontal /// differences of both operands. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_hsub_pd(__m128d __a, __m128d __b) { return __builtin_ia32_hsubpd((__v2df)__a, (__v2df)__b); } /// Moves and duplicates one double-precision value to double-precision /// values stored in a 128-bit vector of [2 x double]. /// /// \headerfile /// /// \code /// __m128d _mm_loaddup_pd(double const *dp); /// \endcode /// /// This intrinsic corresponds to the VMOVDDUP instruction. /// /// \param dp /// A pointer to a double-precision value to be moved and duplicated. /// \returns A 128-bit vector of [2 x double] containing the moved and /// duplicated values. #define _mm_loaddup_pd(dp) _mm_load1_pd(dp) /// Moves and duplicates the double-precision value in the lower bits of /// a 128-bit vector of [2 x double] to double-precision values stored in a /// 128-bit vector of [2 x double]. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVDDUP instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. Bits [63:0] are written to bits /// [127:64] and [63:0] of the destination. /// \returns A 128-bit vector of [2 x double] containing the moved and /// duplicated values. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_movedup_pd(__m128d __a) { return __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0); } /// Establishes a linear address memory range to be monitored and puts /// the processor in the monitor event pending state. Data stored in the /// monitored address range causes the processor to exit the pending state. /// /// The \c MONITOR instruction can be used in kernel mode, and in other modes /// if MSR C001_0015h[MonMwaitUserEn] is set. /// /// \headerfile /// /// This intrinsic corresponds to the \c MONITOR instruction. /// /// \param __p /// The memory range to be monitored. The size of the range is determined by /// CPUID function 0000_0005h. /// \param __extensions /// Optional extensions for the monitoring state. /// \param __hints /// Optional hints for the monitoring state. static __inline__ void __DEFAULT_FN_ATTRS _mm_monitor(void const *__p, unsigned __extensions, unsigned __hints) { __builtin_ia32_monitor(__p, __extensions, __hints); } /// Used with the \c MONITOR instruction to wait while the processor is in /// the monitor event pending state. Data stored in the monitored address /// range, or an interrupt, causes the processor to exit the pending state. /// /// The \c MWAIT instruction can be used in kernel mode, and in other modes if /// MSR C001_0015h[MonMwaitUserEn] is set. /// /// \headerfile /// /// This intrinsic corresponds to the \c MWAIT instruction. /// /// \param __extensions /// Optional extensions for the monitoring state, which can vary by /// processor. /// \param __hints /// Optional hints for the monitoring state, which can vary by processor. static __inline__ void __DEFAULT_FN_ATTRS _mm_mwait(unsigned __extensions, unsigned __hints) { __builtin_ia32_mwait(__extensions, __hints); } #undef __DEFAULT_FN_ATTRS #endif /* __PMMINTRIN_H */ /*===--------------- serializeintrin.h - serialize intrinsics --------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __SERIALIZEINTRIN_H #define __SERIALIZEINTRIN_H /// Serialize instruction fetch and execution. /// /// \headerfile /// /// This intrinsic corresponds to the SERIALIZE instruction. /// static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("serialize"))) _serialize (void) { __builtin_ia32_serialize (); } #endif /* __SERIALIZEINTRIN_H */ sifive_vector.hstdbool.h/*===--------------- usermsrintrin.h - USERMSR intrinsics -----------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __X86GPRINTRIN_H #error "Never use directly; include instead." #endif // __X86GPRINTRIN_H #ifndef __USERMSRINTRIN_H #define __USERMSRINTRIN_H #ifdef __x86_64__ /// Reads the contents of a 64-bit MSR specified in \a __A into \a dst. /// /// This intrinsic corresponds to the URDMSR instruction. /// \param __A /// An unsigned long long. /// /// \code{.operation} /// DEST := MSR[__A] /// \endcode static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("usermsr"))) _urdmsr(unsigned long long __A) { return __builtin_ia32_urdmsr(__A); } /// Writes the contents of \a __B into the 64-bit MSR specified in \a __A. /// /// This intrinsic corresponds to the UWRMSR instruction. /// /// \param __A /// An unsigned long long. /// \param __B /// An unsigned long long. /// /// \code{.operation} /// MSR[__A] := __B /// \endcode static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("usermsr"))) _uwrmsr(unsigned long long __A, unsigned long long __B) { return __builtin_ia32_uwrmsr(__A, __B); } #endif // __x86_64__ #endif // __USERMSRINTRIN_H llvm_libc_wrappers/inttypes.hppc_wrappers/mmintrin.hsanitizer/scudo_interface.h@\w+@wbexternal/kythe/kythe/cxx/common/kzip_writer_aosp.ccprefer-relativeexternal/regex-re2/re2/re2.ccinvalid rewrite pattern: MakasarManichaeanUgaritic%s: errno=%d%s: wrong elf type: %dhaven't left Arena regionbad arena pointer in AddToFreelist(){unnamed type#nDhUnexpected node type: , end = Enqueue to list failedwaitp != nullptr @external/boringssl/src/crypto/fipsmodule/bn/add.cexternal/boringssl/src/crypto/fipsmodule/bn/montgomery.cSHA-512 KATfailed to open /dev/urandomAES-CBC-encrypt KATEVP_AEAD_CTX_seal for AES-128-GCM failed. %luNAENGINE routinesECDHCIPHERabkythe.proto.CompilationUnit.entry_contextkythe.proto.common.Diagnostic.messagekythe.proto.common.Diagnostic.context_urlkythe.proto.ScanRequest.fact_prefix-pie) due to its inclusion in compiler argument: not_MSMode#ifndef__clang_cuda_texture_intrinsics.h__stdarg_va_list.h__stddef_rsize_t.h/*===------------- amxfp16intrin.h - AMX_FP16 intrinsics -*- C++ -*---------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===------------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; use instead." #endif /* __IMMINTRIN_H */ #ifndef __AMX_FP16INTRIN_H #define __AMX_FP16INTRIN_H #ifdef __x86_64__ /// Compute dot-product of FP16 (16-bit) floating-point pairs in tiles \a a /// and \a b, accumulating the intermediate single-precision (32-bit) /// floating-point elements with elements in \a dst, and store the 32-bit /// result back to tile \a dst. /// /// \headerfile /// /// \code /// void _tile_dpfp16ps (__tile dst, __tile a, __tile b) /// \endcode /// /// \code{.operation} /// FOR m := 0 TO dst.rows - 1 /// tmp := dst.row[m] /// FOR k := 0 TO (a.colsb / 4) - 1 /// FOR n := 0 TO (dst.colsb / 4) - 1 /// tmp.fp32[n] += FP32(a.row[m].fp16[2*k+0]) * /// FP32(b.row[k].fp16[2*n+0]) /// tmp.fp32[n] += FP32(a.row[m].fp16[2*k+1]) * /// FP32(b.row[k].fp16[2*n+1]) /// ENDFOR /// ENDFOR /// write_row_and_zero(dst, m, tmp, dst.colsb) /// ENDFOR /// zero_upper_rows(dst, dst.rows) /// zero_tileconfig_start() /// \endcode /// /// This intrinsic corresponds to the \c TDPFP16PS instruction. /// /// \param dst /// The destination tile. Max size is 1024 Bytes. /// \param a /// The 1st source tile. Max size is 1024 Bytes. /// \param b /// The 2nd source tile. Max size is 1024 Bytes. #define _tile_dpfp16ps(dst, a, b) \ __builtin_ia32_tdpfp16ps(dst, a, b) #endif /* __x86_64__ */ #endif /* __AMX_FP16INTRIN_H */ /*===---- arm_fp16.h - ARM FP16 intrinsics ---------------------------------=== * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. * *===-----------------------------------------------------------------------=== */ #ifndef __ARM_FP16_H #define __ARM_FP16_H #include typedef __fp16 float16_t; #define __ai static __inline__ __attribute__((__always_inline__, __nodebug__)) #if defined(__aarch64__) #define vabdh_f16(__p0, __p1) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ __ret = (float16_t) __builtin_neon_vabdh_f16(__s0, __s1); \ __ret; \ }) #define vabsh_f16(__p0) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ __ret = (float16_t) __builtin_neon_vabsh_f16(__s0); \ __ret; \ }) #define vaddh_f16(__p0, __p1) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ __ret = (float16_t) __builtin_neon_vaddh_f16(__s0, __s1); \ __ret; \ }) #define vcageh_f16(__p0, __p1) __extension__ ({ \ uint16_t __ret; \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ __ret = (uint16_t) __builtin_neon_vcageh_f16(__s0, __s1); \ __ret; \ }) #define vcagth_f16(__p0, __p1) __extension__ ({ \ uint16_t __ret; \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ __ret = (uint16_t) __builtin_neon_vcagth_f16(__s0, __s1); \ __ret; \ }) #define vcaleh_f16(__p0, __p1) __extension__ ({ \ uint16_t __ret; \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ __ret = (uint16_t) __builtin_neon_vcaleh_f16(__s0, __s1); \ __ret; \ }) #define vcalth_f16(__p0, __p1) __extension__ ({ \ uint16_t __ret; \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ __ret = (uint16_t) __builtin_neon_vcalth_f16(__s0, __s1); \ __ret; \ }) #define vceqh_f16(__p0, __p1) __extension__ ({ \ uint16_t __ret; \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ __ret = (uint16_t) __builtin_neon_vceqh_f16(__s0, __s1); \ __ret; \ }) #define vceqzh_f16(__p0) __extension__ ({ \ uint16_t __ret; \ float16_t __s0 = __p0; \ __ret = (uint16_t) __builtin_neon_vceqzh_f16(__s0); \ __ret; \ }) #define vcgeh_f16(__p0, __p1) __extension__ ({ \ uint16_t __ret; \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ __ret = (uint16_t) __builtin_neon_vcgeh_f16(__s0, __s1); \ __ret; \ }) #define vcgezh_f16(__p0) __extension__ ({ \ uint16_t __ret; \ float16_t __s0 = __p0; \ __ret = (uint16_t) __builtin_neon_vcgezh_f16(__s0); \ __ret; \ }) #define vcgth_f16(__p0, __p1) __extension__ ({ \ uint16_t __ret; \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ __ret = (uint16_t) __builtin_neon_vcgth_f16(__s0, __s1); \ __ret; \ }) #define vcgtzh_f16(__p0) __extension__ ({ \ uint16_t __ret; \ float16_t __s0 = __p0; \ __ret = (uint16_t) __builtin_neon_vcgtzh_f16(__s0); \ __ret; \ }) #define vcleh_f16(__p0, __p1) __extension__ ({ \ uint16_t __ret; \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ __ret = (uint16_t) __builtin_neon_vcleh_f16(__s0, __s1); \ __ret; \ }) #define vclezh_f16(__p0) __extension__ ({ \ uint16_t __ret; \ float16_t __s0 = __p0; \ __ret = (uint16_t) __builtin_neon_vclezh_f16(__s0); \ __ret; \ }) #define vclth_f16(__p0, __p1) __extension__ ({ \ uint16_t __ret; \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ __ret = (uint16_t) __builtin_neon_vclth_f16(__s0, __s1); \ __ret; \ }) #define vcltzh_f16(__p0) __extension__ ({ \ uint16_t __ret; \ float16_t __s0 = __p0; \ __ret = (uint16_t) __builtin_neon_vcltzh_f16(__s0); \ __ret; \ }) #define vcvth_n_s16_f16(__p0, __p1) __extension__ ({ \ int16_t __ret; \ float16_t __s0 = __p0; \ __ret = (int16_t) __builtin_neon_vcvth_n_s16_f16(__s0, __p1); \ __ret; \ }) #define vcvth_n_s32_f16(__p0, __p1) __extension__ ({ \ int32_t __ret; \ float16_t __s0 = __p0; \ __ret = (int32_t) __builtin_neon_vcvth_n_s32_f16(__s0, __p1); \ __ret; \ }) #define vcvth_n_s64_f16(__p0, __p1) __extension__ ({ \ int64_t __ret; \ float16_t __s0 = __p0; \ __ret = (int64_t) __builtin_neon_vcvth_n_s64_f16(__s0, __p1); \ __ret; \ }) #define vcvth_n_u16_f16(__p0, __p1) __extension__ ({ \ uint16_t __ret; \ float16_t __s0 = __p0; \ __ret = (uint16_t) __builtin_neon_vcvth_n_u16_f16(__s0, __p1); \ __ret; \ }) #define vcvth_n_u32_f16(__p0, __p1) __extension__ ({ \ uint32_t __ret; \ float16_t __s0 = __p0; \ __ret = (uint32_t) __builtin_neon_vcvth_n_u32_f16(__s0, __p1); \ __ret; \ }) #define vcvth_n_u64_f16(__p0, __p1) __extension__ ({ \ uint64_t __ret; \ float16_t __s0 = __p0; \ __ret = (uint64_t) __builtin_neon_vcvth_n_u64_f16(__s0, __p1); \ __ret; \ }) #define vcvth_s16_f16(__p0) __extension__ ({ \ int16_t __ret; \ float16_t __s0 = __p0; \ __ret = (int16_t) __builtin_neon_vcvth_s16_f16(__s0); \ __ret; \ }) #define vcvth_s32_f16(__p0) __extension__ ({ \ int32_t __ret; \ float16_t __s0 = __p0; \ __ret = (int32_t) __builtin_neon_vcvth_s32_f16(__s0); \ __ret; \ }) #define vcvth_s64_f16(__p0) __extension__ ({ \ int64_t __ret; \ float16_t __s0 = __p0; \ __ret = (int64_t) __builtin_neon_vcvth_s64_f16(__s0); \ __ret; \ }) #define vcvth_u16_f16(__p0) __extension__ ({ \ uint16_t __ret; \ float16_t __s0 = __p0; \ __ret = (uint16_t) __builtin_neon_vcvth_u16_f16(__s0); \ __ret; \ }) #define vcvth_u32_f16(__p0) __extension__ ({ \ uint32_t __ret; \ float16_t __s0 = __p0; \ __ret = (uint32_t) __builtin_neon_vcvth_u32_f16(__s0); \ __ret; \ }) #define vcvth_u64_f16(__p0) __extension__ ({ \ uint64_t __ret; \ float16_t __s0 = __p0; \ __ret = (uint64_t) __builtin_neon_vcvth_u64_f16(__s0); \ __ret; \ }) #define vcvtah_s16_f16(__p0) __extension__ ({ \ int16_t __ret; \ float16_t __s0 = __p0; \ __ret = (int16_t) __builtin_neon_vcvtah_s16_f16(__s0); \ __ret; \ }) #define vcvtah_s32_f16(__p0) __extension__ ({ \ int32_t __ret; \ float16_t __s0 = __p0; \ __ret = (int32_t) __builtin_neon_vcvtah_s32_f16(__s0); \ __ret; \ }) #define vcvtah_s64_f16(__p0) __extension__ ({ \ int64_t __ret; \ float16_t __s0 = __p0; \ __ret = (int64_t) __builtin_neon_vcvtah_s64_f16(__s0); \ __ret; \ }) #define vcvtah_u16_f16(__p0) __extension__ ({ \ uint16_t __ret; \ float16_t __s0 = __p0; \ __ret = (uint16_t) __builtin_neon_vcvtah_u16_f16(__s0); \ __ret; \ }) #define vcvtah_u32_f16(__p0) __extension__ ({ \ uint32_t __ret; \ float16_t __s0 = __p0; \ __ret = (uint32_t) __builtin_neon_vcvtah_u32_f16(__s0); \ __ret; \ }) #define vcvtah_u64_f16(__p0) __extension__ ({ \ uint64_t __ret; \ float16_t __s0 = __p0; \ __ret = (uint64_t) __builtin_neon_vcvtah_u64_f16(__s0); \ __ret; \ }) #define vcvth_f16_u16(__p0) __extension__ ({ \ float16_t __ret; \ uint16_t __s0 = __p0; \ __ret = (float16_t) __builtin_neon_vcvth_f16_u16(__s0); \ __ret; \ }) #define vcvth_f16_s16(__p0) __extension__ ({ \ float16_t __ret; \ int16_t __s0 = __p0; \ __ret = (float16_t) __builtin_neon_vcvth_f16_s16(__s0); \ __ret; \ }) #define vcvth_f16_u32(__p0) __extension__ ({ \ float16_t __ret; \ uint32_t __s0 = __p0; \ __ret = (float16_t) __builtin_neon_vcvth_f16_u32(__s0); \ __ret; \ }) #define vcvth_f16_s32(__p0) __extension__ ({ \ float16_t __ret; \ int32_t __s0 = __p0; \ __ret = (float16_t) __builtin_neon_vcvth_f16_s32(__s0); \ __ret; \ }) #define vcvth_f16_u64(__p0) __extension__ ({ \ float16_t __ret; \ uint64_t __s0 = __p0; \ __ret = (float16_t) __builtin_neon_vcvth_f16_u64(__s0); \ __ret; \ }) #define vcvth_f16_s64(__p0) __extension__ ({ \ float16_t __ret; \ int64_t __s0 = __p0; \ __ret = (float16_t) __builtin_neon_vcvth_f16_s64(__s0); \ __ret; \ }) #define vcvth_n_f16_u32(__p0, __p1) __extension__ ({ \ float16_t __ret; \ uint32_t __s0 = __p0; \ __ret = (float16_t) __builtin_neon_vcvth_n_f16_u32(__s0, __p1); \ __ret; \ }) #define vcvth_n_f16_s32(__p0, __p1) __extension__ ({ \ float16_t __ret; \ int32_t __s0 = __p0; \ __ret = (float16_t) __builtin_neon_vcvth_n_f16_s32(__s0, __p1); \ __ret; \ }) #define vcvth_n_f16_u64(__p0, __p1) __extension__ ({ \ float16_t __ret; \ uint64_t __s0 = __p0; \ __ret = (float16_t) __builtin_neon_vcvth_n_f16_u64(__s0, __p1); \ __ret; \ }) #define vcvth_n_f16_s64(__p0, __p1) __extension__ ({ \ float16_t __ret; \ int64_t __s0 = __p0; \ __ret = (float16_t) __builtin_neon_vcvth_n_f16_s64(__s0, __p1); \ __ret; \ }) #define vcvth_n_f16_u16(__p0, __p1) __extension__ ({ \ float16_t __ret; \ uint16_t __s0 = __p0; \ __ret = (float16_t) __builtin_neon_vcvth_n_f16_u16(__s0, __p1); \ __ret; \ }) #define vcvth_n_f16_s16(__p0, __p1) __extension__ ({ \ float16_t __ret; \ int16_t __s0 = __p0; \ __ret = (float16_t) __builtin_neon_vcvth_n_f16_s16(__s0, __p1); \ __ret; \ }) #define vcvtmh_s16_f16(__p0) __extension__ ({ \ int16_t __ret; \ float16_t __s0 = __p0; \ __ret = (int16_t) __builtin_neon_vcvtmh_s16_f16(__s0); \ __ret; \ }) #define vcvtmh_s32_f16(__p0) __extension__ ({ \ int32_t __ret; \ float16_t __s0 = __p0; \ __ret = (int32_t) __builtin_neon_vcvtmh_s32_f16(__s0); \ __ret; \ }) #define vcvtmh_s64_f16(__p0) __extension__ ({ \ int64_t __ret; \ float16_t __s0 = __p0; \ __ret = (int64_t) __builtin_neon_vcvtmh_s64_f16(__s0); \ __ret; \ }) #define vcvtmh_u16_f16(__p0) __extension__ ({ \ uint16_t __ret; \ float16_t __s0 = __p0; \ __ret = (uint16_t) __builtin_neon_vcvtmh_u16_f16(__s0); \ __ret; \ }) #define vcvtmh_u32_f16(__p0) __extension__ ({ \ uint32_t __ret; \ float16_t __s0 = __p0; \ __ret = (uint32_t) __builtin_neon_vcvtmh_u32_f16(__s0); \ __ret; \ }) #define vcvtmh_u64_f16(__p0) __extension__ ({ \ uint64_t __ret; \ float16_t __s0 = __p0; \ __ret = (uint64_t) __builtin_neon_vcvtmh_u64_f16(__s0); \ __ret; \ }) #define vcvtnh_s16_f16(__p0) __extension__ ({ \ int16_t __ret; \ float16_t __s0 = __p0; \ __ret = (int16_t) __builtin_neon_vcvtnh_s16_f16(__s0); \ __ret; \ }) #define vcvtnh_s32_f16(__p0) __extension__ ({ \ int32_t __ret; \ float16_t __s0 = __p0; \ __ret = (int32_t) __builtin_neon_vcvtnh_s32_f16(__s0); \ __ret; \ }) #define vcvtnh_s64_f16(__p0) __extension__ ({ \ int64_t __ret; \ float16_t __s0 = __p0; \ __ret = (int64_t) __builtin_neon_vcvtnh_s64_f16(__s0); \ __ret; \ }) #define vcvtnh_u16_f16(__p0) __extension__ ({ \ uint16_t __ret; \ float16_t __s0 = __p0; \ __ret = (uint16_t) __builtin_neon_vcvtnh_u16_f16(__s0); \ __ret; \ }) #define vcvtnh_u32_f16(__p0) __extension__ ({ \ uint32_t __ret; \ float16_t __s0 = __p0; \ __ret = (uint32_t) __builtin_neon_vcvtnh_u32_f16(__s0); \ __ret; \ }) #define vcvtnh_u64_f16(__p0) __extension__ ({ \ uint64_t __ret; \ float16_t __s0 = __p0; \ __ret = (uint64_t) __builtin_neon_vcvtnh_u64_f16(__s0); \ __ret; \ }) #define vcvtph_s16_f16(__p0) __extension__ ({ \ int16_t __ret; \ float16_t __s0 = __p0; \ __ret = (int16_t) __builtin_neon_vcvtph_s16_f16(__s0); \ __ret; \ }) #define vcvtph_s32_f16(__p0) __extension__ ({ \ int32_t __ret; \ float16_t __s0 = __p0; \ __ret = (int32_t) __builtin_neon_vcvtph_s32_f16(__s0); \ __ret; \ }) #define vcvtph_s64_f16(__p0) __extension__ ({ \ int64_t __ret; \ float16_t __s0 = __p0; \ __ret = (int64_t) __builtin_neon_vcvtph_s64_f16(__s0); \ __ret; \ }) #define vcvtph_u16_f16(__p0) __extension__ ({ \ uint16_t __ret; \ float16_t __s0 = __p0; \ __ret = (uint16_t) __builtin_neon_vcvtph_u16_f16(__s0); \ __ret; \ }) #define vcvtph_u32_f16(__p0) __extension__ ({ \ uint32_t __ret; \ float16_t __s0 = __p0; \ __ret = (uint32_t) __builtin_neon_vcvtph_u32_f16(__s0); \ __ret; \ }) #define vcvtph_u64_f16(__p0) __extension__ ({ \ uint64_t __ret; \ float16_t __s0 = __p0; \ __ret = (uint64_t) __builtin_neon_vcvtph_u64_f16(__s0); \ __ret; \ }) #define vdivh_f16(__p0, __p1) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ __ret = (float16_t) __builtin_neon_vdivh_f16(__s0, __s1); \ __ret; \ }) #define vfmah_f16(__p0, __p1, __p2) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ float16_t __s2 = __p2; \ __ret = (float16_t) __builtin_neon_vfmah_f16(__s0, __s1, __s2); \ __ret; \ }) #define vfmsh_f16(__p0, __p1, __p2) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ float16_t __s2 = __p2; \ __ret = (float16_t) __builtin_neon_vfmsh_f16(__s0, __s1, __s2); \ __ret; \ }) #define vmaxh_f16(__p0, __p1) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ __ret = (float16_t) __builtin_neon_vmaxh_f16(__s0, __s1); \ __ret; \ }) #define vmaxnmh_f16(__p0, __p1) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ __ret = (float16_t) __builtin_neon_vmaxnmh_f16(__s0, __s1); \ __ret; \ }) #define vminh_f16(__p0, __p1) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ __ret = (float16_t) __builtin_neon_vminh_f16(__s0, __s1); \ __ret; \ }) #define vminnmh_f16(__p0, __p1) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ __ret = (float16_t) __builtin_neon_vminnmh_f16(__s0, __s1); \ __ret; \ }) #define vmulh_f16(__p0, __p1) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ __ret = (float16_t) __builtin_neon_vmulh_f16(__s0, __s1); \ __ret; \ }) #define vmulxh_f16(__p0, __p1) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ __ret = (float16_t) __builtin_neon_vmulxh_f16(__s0, __s1); \ __ret; \ }) #define vnegh_f16(__p0) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ __ret = (float16_t) __builtin_neon_vnegh_f16(__s0); \ __ret; \ }) #define vrecpeh_f16(__p0) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ __ret = (float16_t) __builtin_neon_vrecpeh_f16(__s0); \ __ret; \ }) #define vrecpsh_f16(__p0, __p1) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ __ret = (float16_t) __builtin_neon_vrecpsh_f16(__s0, __s1); \ __ret; \ }) #define vrecpxh_f16(__p0) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ __ret = (float16_t) __builtin_neon_vrecpxh_f16(__s0); \ __ret; \ }) #define vrndh_f16(__p0) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ __ret = (float16_t) __builtin_neon_vrndh_f16(__s0); \ __ret; \ }) #define vrndah_f16(__p0) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ __ret = (float16_t) __builtin_neon_vrndah_f16(__s0); \ __ret; \ }) #define vrndih_f16(__p0) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ __ret = (float16_t) __builtin_neon_vrndih_f16(__s0); \ __ret; \ }) #define vrndmh_f16(__p0) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ __ret = (float16_t) __builtin_neon_vrndmh_f16(__s0); \ __ret; \ }) #define vrndnh_f16(__p0) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ __ret = (float16_t) __builtin_neon_vrndnh_f16(__s0); \ __ret; \ }) #define vrndph_f16(__p0) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ __ret = (float16_t) __builtin_neon_vrndph_f16(__s0); \ __ret; \ }) #define vrndxh_f16(__p0) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ __ret = (float16_t) __builtin_neon_vrndxh_f16(__s0); \ __ret; \ }) #define vrsqrteh_f16(__p0) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ __ret = (float16_t) __builtin_neon_vrsqrteh_f16(__s0); \ __ret; \ }) #define vrsqrtsh_f16(__p0, __p1) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ __ret = (float16_t) __builtin_neon_vrsqrtsh_f16(__s0, __s1); \ __ret; \ }) #define vsqrth_f16(__p0) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ __ret = (float16_t) __builtin_neon_vsqrth_f16(__s0); \ __ret; \ }) #define vsubh_f16(__p0, __p1) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ __ret = (float16_t) __builtin_neon_vsubh_f16(__s0, __s1); \ __ret; \ }) #endif #undef __ai #endif /* __ARM_FP16_H */ /*===------------- avx512ifmaintrin.h - IFMA intrinsics ------------------=== * * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __IFMAINTRIN_H #define __IFMAINTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512ifma,evex512"), __min_vector_width__(512))) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_madd52hi_epu64 (__m512i __X, __m512i __Y, __m512i __Z) { return (__m512i)__builtin_ia32_vpmadd52huq512((__v8di) __X, (__v8di) __Y, (__v8di) __Z); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_madd52hi_epu64 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectq_512(__M, (__v8di)_mm512_madd52hi_epu64(__W, __X, __Y), (__v8di)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_madd52hi_epu64 (__mmask8 __M, __m512i __X, __m512i __Y, __m512i __Z) { return (__m512i)__builtin_ia32_selectq_512(__M, (__v8di)_mm512_madd52hi_epu64(__X, __Y, __Z), (__v8di)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_madd52lo_epu64 (__m512i __X, __m512i __Y, __m512i __Z) { return (__m512i)__builtin_ia32_vpmadd52luq512((__v8di) __X, (__v8di) __Y, (__v8di) __Z); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_madd52lo_epu64 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectq_512(__M, (__v8di)_mm512_madd52lo_epu64(__W, __X, __Y), (__v8di)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_madd52lo_epu64 (__mmask8 __M, __m512i __X, __m512i __Y, __m512i __Z) { return (__m512i)__builtin_ia32_selectq_512(__M, (__v8di)_mm512_madd52lo_epu64(__X, __Y, __Z), (__v8di)_mm512_setzero_si512()); } #undef __DEFAULT_FN_ATTRS #endif avx512vbmiintrin.hmwaitxintrin.hopencl-c-base.h/*===---- popcntintrin.h - POPCNT intrinsics -------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __POPCNTINTRIN_H #define __POPCNTINTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("popcnt"))) #if defined(__cplusplus) && (__cplusplus >= 201103L) #define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr #else #define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS #endif /// Counts the number of bits in the source operand having a value of 1. /// /// \headerfile /// /// This intrinsic corresponds to the POPCNT instruction. /// /// \param __A /// An unsigned 32-bit integer operand. /// \returns A 32-bit integer containing the number of bits with value 1 in the /// source operand. static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_popcnt_u32(unsigned int __A) { return __builtin_popcount(__A); } #ifdef __x86_64__ /// Counts the number of bits in the source operand having a value of 1. /// /// \headerfile /// /// This intrinsic corresponds to the POPCNT instruction. /// /// \param __A /// An unsigned 64-bit integer operand. /// \returns A 64-bit integer containing the number of bits with value 1 in the /// source operand. static __inline__ long long __DEFAULT_FN_ATTRS_CONSTEXPR _mm_popcnt_u64(unsigned long long __A) { return __builtin_popcountll(__A); } #endif /* __x86_64__ */ #undef __DEFAULT_FN_ATTRS #undef __DEFAULT_FN_ATTRS_CONSTEXPR #endif /* __POPCNTINTRIN_H */ /*===---- sgxintrin.h - X86 SGX intrinsics configuration -------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #if !defined __X86INTRIN_H && !defined __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __SGXINTRIN_H #define __SGXINTRIN_H #if __has_extension(gnu_asm) /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("sgx"))) static __inline unsigned int __DEFAULT_FN_ATTRS _enclu_u32(unsigned int __leaf, __SIZE_TYPE__ __d[]) { unsigned int __result; __asm__ ("enclu" : "=a" (__result), "=b" (__d[0]), "=c" (__d[1]), "=d" (__d[2]) : "a" (__leaf), "b" (__d[0]), "c" (__d[1]), "d" (__d[2]) : "cc"); return __result; } static __inline unsigned int __DEFAULT_FN_ATTRS _encls_u32(unsigned int __leaf, __SIZE_TYPE__ __d[]) { unsigned int __result; __asm__ ("encls" : "=a" (__result), "=b" (__d[0]), "=c" (__d[1]), "=d" (__d[2]) : "a" (__leaf), "b" (__d[0]), "c" (__d[1]), "d" (__d[2]) : "cc"); return __result; } static __inline unsigned int __DEFAULT_FN_ATTRS _enclv_u32(unsigned int __leaf, __SIZE_TYPE__ __d[]) { unsigned int __result; __asm__ ("enclv" : "=a" (__result), "=b" (__d[0]), "=c" (__d[1]), "=d" (__d[2]) : "a" (__leaf), "b" (__d[0]), "c" (__d[1]), "d" (__d[2]) : "cc"); return __result; } #undef __DEFAULT_FN_ATTRS #endif /* __has_extension(gnu_asm) */ #endif /*===---- stdarg.h - Variable argument handling ----------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ /* * This header is designed to be included multiple times. If any of the __need_ * macros are defined, then only that subset of interfaces are provided. This * can be useful for POSIX headers that need to not expose all of stdarg.h, but * need to use some of its interfaces. Otherwise this header provides all of * the expected interfaces. * * When clang modules are enabled, this header is a textual header. It ignores * its header guard so that multiple submodules can export its interfaces. * Take module SM with submodules A and B, whose headers both include stdarg.h * When SM.A builds, __STDARG_H will be defined. When SM.B builds, the * definition from SM.A will leak when building without local submodule * visibility. stdarg.h wouldn't include any of its implementation headers, and * SM.B wouldn't import any of the stdarg modules, and SM.B's `export *` * wouldn't export any stdarg interfaces as expected. However, since stdarg.h * ignores its header guard when building with modules, it all works as * expected. * * When clang modules are not enabled, the header guards can function in the * normal simple fashion. */ #if !defined(__STDARG_H) || __has_feature(modules) || \ defined(__need___va_list) || defined(__need_va_list) || \ defined(__need_va_arg) || defined(__need___va_copy) || \ defined(__need_va_copy) #if !defined(__need___va_list) && !defined(__need_va_list) && \ !defined(__need_va_arg) && !defined(__need___va_copy) && \ !defined(__need_va_copy) #define __STDARG_H #define __need___va_list #define __need_va_list #define __need_va_arg #define __need___va_copy /* GCC always defines __va_copy, but does not define va_copy unless in c99 mode * or -ansi is not specified, since it was not part of C90. */ #if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || \ (defined(__cplusplus) && __cplusplus >= 201103L) || \ !defined(__STRICT_ANSI__) #define __need_va_copy #endif #endif #ifdef __need___va_list #include <__stdarg___gnuc_va_list.h> #undef __need___va_list #endif /* defined(__need___va_list) */ #ifdef __need_va_list #include <__stdarg_va_list.h> #undef __need_va_list #endif /* defined(__need_va_list) */ #ifdef __need_va_arg #include <__stdarg_va_arg.h> #undef __need_va_arg #endif /* defined(__need_va_arg) */ #ifdef __need___va_copy #include <__stdarg___va_copy.h> #undef __need___va_copy #endif /* defined(__need___va_copy) */ #ifdef __need_va_copy #include <__stdarg_va_copy.h> #undef __need_va_copy #endif /* defined(__need_va_copy) */ #endif vadefs.hxsaveoptintrin.hcuda_wrappers/bits/basic_string.hfuzzer/FuzzedDataProvider.hopenmp_wrappers/complex_cmath.hfseek(handle, 0, 2)Miss exponent in number.external/kythe/kythe/cxx/common/path_utils.cc2prefer-real, in regexp fail(%p)Cannot use SearchOnePass for unanchored matches.kRegexpCapture cap() == 0{%d}BengaliBrahmiDeseretGranthaHiraganaNabataeanOld_North_ArabianShavianTirhutaZsi < prev->levelsDvDnstd::nullptr_tchar8_t|=12%s@ %*p %9d bad pointer0 <= index && static_cast(index) <= verdefnum_\x cannot be followed by a non-hex digit\u must be followed by 4 hex digits: \h->skip == nullptrEnqueue failedexternal/boringssl/src/crypto/fipsmodule/cipher/aead.cP-256SYSX509BIORANDECDSA routinesSHOULD_NOT_HAVE_BEEN_CALLEDexternal/protobuf/src/google/protobuf/map_field_inl.hkythe.proto.common.SymbolInfo.base_namef77-cpp-inputcfg-temporary-dtors=true/Check failed: !current_files_.top().file_path.empty()Resource directory is SystemObjCXXSystem/*===---- __stddef_unreachable.h - Definition of unreachable ---------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ /* * When -fbuiltin-headers-in-system-modules is set this is a non-modular header * and needs to behave as if it was textual. */ #if !defined(unreachable) || \ (__has_feature(modules) && !__building_module(_Builtin_stddef)) #define unreachable() __builtin_unreachable() #endif /*===------------ avx512bf16intrin.h - AVX512_BF16 intrinsics --------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifdef __SSE2__ #ifndef __AVX512BF16INTRIN_H #define __AVX512BF16INTRIN_H typedef __bf16 __v32bf __attribute__((__vector_size__(64), __aligned__(64))); typedef __bf16 __m512bh __attribute__((__vector_size__(64), __aligned__(64))); typedef __bf16 __bfloat16 __attribute__((deprecated("use __bf16 instead"))); #define __DEFAULT_FN_ATTRS512 \ __attribute__((__always_inline__, __nodebug__, __target__("avx512bf16,evex512"), \ __min_vector_width__(512))) #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512bf16,no-evex512"))) /// Convert One BF16 Data to One Single Float Data. /// /// \headerfile /// /// This intrinsic does not correspond to a specific instruction. /// /// \param __A /// A bfloat data. /// \returns A float data whose sign field and exponent field keep unchanged, /// and fraction field is extended to 23 bits. static __inline__ float __DEFAULT_FN_ATTRS _mm_cvtsbh_ss(__bf16 __A) { return __builtin_ia32_cvtsbf162ss_32(__A); } /// Convert Two Packed Single Data to One Packed BF16 Data. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTNE2PS2BF16 instructions. /// /// \param __A /// A 512-bit vector of [16 x float]. /// \param __B /// A 512-bit vector of [16 x float]. /// \returns A 512-bit vector of [32 x bfloat] whose lower 256 bits come from /// conversion of __B, and higher 256 bits come from conversion of __A. static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_cvtne2ps_pbh(__m512 __A, __m512 __B) { return (__m512bh)__builtin_ia32_cvtne2ps2bf16_512((__v16sf) __A, (__v16sf) __B); } /// Convert Two Packed Single Data to One Packed BF16 Data. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTNE2PS2BF16 instructions. /// /// \param __A /// A 512-bit vector of [16 x float]. /// \param __B /// A 512-bit vector of [16 x float]. /// \param __W /// A 512-bit vector of [32 x bfloat]. /// \param __U /// A 32-bit mask value specifying what is chosen for each element. /// A 1 means conversion of __A or __B. A 0 means element from __W. /// \returns A 512-bit vector of [32 x bfloat] whose lower 256 bits come from /// conversion of __B, and higher 256 bits come from conversion of __A. static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask_cvtne2ps_pbh(__m512bh __W, __mmask32 __U, __m512 __A, __m512 __B) { return (__m512bh)__builtin_ia32_selectpbf_512((__mmask32)__U, (__v32bf)_mm512_cvtne2ps_pbh(__A, __B), (__v32bf)__W); } /// Convert Two Packed Single Data to One Packed BF16 Data. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTNE2PS2BF16 instructions. /// /// \param __A /// A 512-bit vector of [16 x float]. /// \param __B /// A 512-bit vector of [16 x float]. /// \param __U /// A 32-bit mask value specifying what is chosen for each element. /// A 1 means conversion of __A or __B. A 0 means element is zero. /// \returns A 512-bit vector of [32 x bfloat] whose lower 256 bits come from /// conversion of __B, and higher 256 bits come from conversion of __A. static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtne2ps_pbh(__mmask32 __U, __m512 __A, __m512 __B) { return (__m512bh)__builtin_ia32_selectpbf_512((__mmask32)__U, (__v32bf)_mm512_cvtne2ps_pbh(__A, __B), (__v32bf)_mm512_setzero_si512()); } /// Convert Packed Single Data to Packed BF16 Data. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTNEPS2BF16 instructions. /// /// \param __A /// A 512-bit vector of [16 x float]. /// \returns A 256-bit vector of [16 x bfloat] come from conversion of __A. static __inline__ __m256bh __DEFAULT_FN_ATTRS512 _mm512_cvtneps_pbh(__m512 __A) { return (__m256bh)__builtin_ia32_cvtneps2bf16_512_mask((__v16sf)__A, (__v16bf)_mm256_undefined_si256(), (__mmask16)-1); } /// Convert Packed Single Data to Packed BF16 Data. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTNEPS2BF16 instructions. /// /// \param __A /// A 512-bit vector of [16 x float]. /// \param __W /// A 256-bit vector of [16 x bfloat]. /// \param __U /// A 16-bit mask value specifying what is chosen for each element. /// A 1 means conversion of __A. A 0 means element from __W. /// \returns A 256-bit vector of [16 x bfloat] come from conversion of __A. static __inline__ __m256bh __DEFAULT_FN_ATTRS512 _mm512_mask_cvtneps_pbh(__m256bh __W, __mmask16 __U, __m512 __A) { return (__m256bh)__builtin_ia32_cvtneps2bf16_512_mask((__v16sf)__A, (__v16bf)__W, (__mmask16)__U); } /// Convert Packed Single Data to Packed BF16 Data. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTNEPS2BF16 instructions. /// /// \param __A /// A 512-bit vector of [16 x float]. /// \param __U /// A 16-bit mask value specifying what is chosen for each element. /// A 1 means conversion of __A. A 0 means element is zero. /// \returns A 256-bit vector of [16 x bfloat] come from conversion of __A. static __inline__ __m256bh __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtneps_pbh(__mmask16 __U, __m512 __A) { return (__m256bh)__builtin_ia32_cvtneps2bf16_512_mask((__v16sf)__A, (__v16bf)_mm256_setzero_si256(), (__mmask16)__U); } /// Dot Product of BF16 Pairs Accumulated into Packed Single Precision. /// /// \headerfile /// /// This intrinsic corresponds to the VDPBF16PS instructions. /// /// \param __A /// A 512-bit vector of [32 x bfloat]. /// \param __B /// A 512-bit vector of [32 x bfloat]. /// \param __D /// A 512-bit vector of [16 x float]. /// \returns A 512-bit vector of [16 x float] comes from Dot Product of /// __A, __B and __D static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_dpbf16_ps(__m512 __D, __m512bh __A, __m512bh __B) { return (__m512)__builtin_ia32_dpbf16ps_512((__v16sf) __D, (__v32bf) __A, (__v32bf) __B); } /// Dot Product of BF16 Pairs Accumulated into Packed Single Precision. /// /// \headerfile /// /// This intrinsic corresponds to the VDPBF16PS instructions. /// /// \param __A /// A 512-bit vector of [32 x bfloat]. /// \param __B /// A 512-bit vector of [32 x bfloat]. /// \param __D /// A 512-bit vector of [16 x float]. /// \param __U /// A 16-bit mask value specifying what is chosen for each element. /// A 1 means __A and __B's dot product accumulated with __D. A 0 means __D. /// \returns A 512-bit vector of [16 x float] comes from Dot Product of /// __A, __B and __D static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_dpbf16_ps(__m512 __D, __mmask16 __U, __m512bh __A, __m512bh __B) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_dpbf16_ps(__D, __A, __B), (__v16sf)__D); } /// Dot Product of BF16 Pairs Accumulated into Packed Single Precision. /// /// \headerfile /// /// This intrinsic corresponds to the VDPBF16PS instructions. /// /// \param __A /// A 512-bit vector of [32 x bfloat]. /// \param __B /// A 512-bit vector of [32 x bfloat]. /// \param __D /// A 512-bit vector of [16 x float]. /// \param __U /// A 16-bit mask value specifying what is chosen for each element. /// A 1 means __A and __B's dot product accumulated with __D. A 0 means 0. /// \returns A 512-bit vector of [16 x float] comes from Dot Product of /// __A, __B and __D static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_dpbf16_ps(__mmask16 __U, __m512 __D, __m512bh __A, __m512bh __B) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_dpbf16_ps(__D, __A, __B), (__v16sf)_mm512_setzero_si512()); } /// Convert Packed BF16 Data to Packed float Data. /// /// \headerfile /// /// \param __A /// A 256-bit vector of [16 x bfloat]. /// \returns A 512-bit vector of [16 x float] come from conversion of __A static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtpbh_ps(__m256bh __A) { return _mm512_castsi512_ps((__m512i)_mm512_slli_epi32( (__m512i)_mm512_cvtepi16_epi32((__m256i)__A), 16)); } /// Convert Packed BF16 Data to Packed float Data using zeroing mask. /// /// \headerfile /// /// \param __U /// A 16-bit mask. Elements are zeroed out when the corresponding mask /// bit is not set. /// \param __A /// A 256-bit vector of [16 x bfloat]. /// \returns A 512-bit vector of [16 x float] come from conversion of __A static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtpbh_ps(__mmask16 __U, __m256bh __A) { return _mm512_castsi512_ps((__m512i)_mm512_slli_epi32( (__m512i)_mm512_maskz_cvtepi16_epi32((__mmask16)__U, (__m256i)__A), 16)); } /// Convert Packed BF16 Data to Packed float Data using merging mask. /// /// \headerfile /// /// \param __S /// A 512-bit vector of [16 x float]. Elements are copied from __S when /// the corresponding mask bit is not set. /// \param __U /// A 16-bit mask. /// \param __A /// A 256-bit vector of [16 x bfloat]. /// \returns A 512-bit vector of [16 x float] come from conversion of __A static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpbh_ps(__m512 __S, __mmask16 __U, __m256bh __A) { return _mm512_castsi512_ps((__m512i)_mm512_mask_slli_epi32( (__m512i)__S, (__mmask16)__U, (__m512i)_mm512_cvtepi16_epi32((__m256i)__A), 16)); } #undef __DEFAULT_FN_ATTRS #undef __DEFAULT_FN_ATTRS512 #endif #endif avx512bwintrin.h/*===--------------- cmpccxaddintrin.h - CMPCCXADD intrinsics--------------=== * * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __X86GPRINTRIN_H #error \ "Never use directly; include instead." #endif // __X86GPRINTRIN_H #ifndef __CMPCCXADDINTRIN_H #define __CMPCCXADDINTRIN_H #ifdef __x86_64__ typedef enum { _CMPCCX_O, /* Overflow. */ _CMPCCX_NO, /* No overflow. */ _CMPCCX_B, /* Below. */ _CMPCCX_NB, /* Not below. */ _CMPCCX_Z, /* Zero. */ _CMPCCX_NZ, /* Not zero. */ _CMPCCX_BE, /* Below or equal. */ _CMPCCX_NBE, /* Neither below nor equal. */ _CMPCCX_S, /* Sign. */ _CMPCCX_NS, /* No sign. */ _CMPCCX_P, /* Parity. */ _CMPCCX_NP, /* No parity. */ _CMPCCX_L, /* Less. */ _CMPCCX_NL, /* Not less. */ _CMPCCX_LE, /* Less or equal. */ _CMPCCX_NLE, /* Neither less nor equal. */ } _CMPCCX_ENUM; /// Compares the value from the memory __A with the value of __B. If the /// specified condition __D is met, then add the third operand __C to the /// __A and write it into __A, else the value of __A is unchanged. The return /// value is the original value of __A. /// /// \headerfile /// /// This intrinsic corresponds to the \c CMPCCXADD instructions. /// /// \param __A /// __A pointer specifying the memory address. /// /// \param __B /// A integer operand. /// /// \param __C /// A integer operand. /// /// \param __D /// The specified condition. /// /// \returns a integer which is the original value of first operand. #define _cmpccxadd_epi32(__A, __B, __C, __D) \ ((int)(__builtin_ia32_cmpccxadd32((void *)(__A), (int)(__B), (int)(__C), \ (int)(__D)))) #define _cmpccxadd_epi64(__A, __B, __C, __D) \ ((long long)(__builtin_ia32_cmpccxadd64((void *)(__A), (long long)(__B), \ (long long)(__C), (int)(__D)))) #endif // __x86_64__ #endif // __CMPCCXADDINTRIN_H /*===---- float.h - Characteristics of floating point types ----------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __CLANG_FLOAT_H #define __CLANG_FLOAT_H /* If we're on MinGW, fall back to the system's float.h, which might have * additional definitions provided for Windows. * For more details see http://msdn.microsoft.com/en-us/library/y0ybw9fy.aspx * * Also fall back on Darwin and AIX to allow additional definitions and * implementation-defined values. */ #if (defined(__APPLE__) || defined(__MINGW32__) || defined(_MSC_VER) || \ defined(_AIX)) && \ __STDC_HOSTED__ && __has_include_next() /* Prior to Apple's 10.7 SDK, float.h SDK header used to apply an extra level * of #include_next to keep Metrowerks compilers happy. Avoid this * extra indirection. */ #ifdef __APPLE__ #define _FLOAT_H_ #endif # include_next /* Undefine anything that we'll be redefining below. */ # undef FLT_EVAL_METHOD # undef FLT_ROUNDS # undef FLT_RADIX # undef FLT_MANT_DIG # undef DBL_MANT_DIG # undef LDBL_MANT_DIG #if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || \ !defined(__STRICT_ANSI__) || \ (defined(__cplusplus) && __cplusplus >= 201103L) || \ (__STDC_HOSTED__ && defined(_AIX) && defined(_ALL_SOURCE)) # undef DECIMAL_DIG # endif # undef FLT_DIG # undef DBL_DIG # undef LDBL_DIG # undef FLT_MIN_EXP # undef DBL_MIN_EXP # undef LDBL_MIN_EXP # undef FLT_MIN_10_EXP # undef DBL_MIN_10_EXP # undef LDBL_MIN_10_EXP # undef FLT_MAX_EXP # undef DBL_MAX_EXP # undef LDBL_MAX_EXP # undef FLT_MAX_10_EXP # undef DBL_MAX_10_EXP # undef LDBL_MAX_10_EXP # undef FLT_MAX # undef DBL_MAX # undef LDBL_MAX # undef FLT_EPSILON # undef DBL_EPSILON # undef LDBL_EPSILON # undef FLT_MIN # undef DBL_MIN # undef LDBL_MIN #if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) || \ !defined(__STRICT_ANSI__) || \ (defined(__cplusplus) && __cplusplus >= 201703L) || \ (__STDC_HOSTED__ && defined(_AIX) && defined(_ALL_SOURCE)) # undef FLT_TRUE_MIN # undef DBL_TRUE_MIN # undef LDBL_TRUE_MIN # undef FLT_DECIMAL_DIG # undef DBL_DECIMAL_DIG # undef LDBL_DECIMAL_DIG # undef FLT_HAS_SUBNORM # undef DBL_HAS_SUBNORM # undef LDBL_HAS_SUBNORM # endif #endif /* Characteristics of floating point types, C99 5.2.4.2.2 */ #if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || \ (defined(__cplusplus) && __cplusplus >= 201103L) #define FLT_EVAL_METHOD __FLT_EVAL_METHOD__ #endif #define FLT_ROUNDS (__builtin_flt_rounds()) #define FLT_RADIX __FLT_RADIX__ #define FLT_MANT_DIG __FLT_MANT_DIG__ #define DBL_MANT_DIG __DBL_MANT_DIG__ #define LDBL_MANT_DIG __LDBL_MANT_DIG__ #if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || \ !defined(__STRICT_ANSI__) || \ (defined(__cplusplus) && __cplusplus >= 201103L) || \ (__STDC_HOSTED__ && defined(_AIX) && defined(_ALL_SOURCE)) # define DECIMAL_DIG __DECIMAL_DIG__ #endif #define FLT_DIG __FLT_DIG__ #define DBL_DIG __DBL_DIG__ #define LDBL_DIG __LDBL_DIG__ #define FLT_MIN_EXP __FLT_MIN_EXP__ #define DBL_MIN_EXP __DBL_MIN_EXP__ #define LDBL_MIN_EXP __LDBL_MIN_EXP__ #define FLT_MIN_10_EXP __FLT_MIN_10_EXP__ #define DBL_MIN_10_EXP __DBL_MIN_10_EXP__ #define LDBL_MIN_10_EXP __LDBL_MIN_10_EXP__ #define FLT_MAX_EXP __FLT_MAX_EXP__ #define DBL_MAX_EXP __DBL_MAX_EXP__ #define LDBL_MAX_EXP __LDBL_MAX_EXP__ #define FLT_MAX_10_EXP __FLT_MAX_10_EXP__ #define DBL_MAX_10_EXP __DBL_MAX_10_EXP__ #define LDBL_MAX_10_EXP __LDBL_MAX_10_EXP__ #define FLT_MAX __FLT_MAX__ #define DBL_MAX __DBL_MAX__ #define LDBL_MAX __LDBL_MAX__ #define FLT_EPSILON __FLT_EPSILON__ #define DBL_EPSILON __DBL_EPSILON__ #define LDBL_EPSILON __LDBL_EPSILON__ #define FLT_MIN __FLT_MIN__ #define DBL_MIN __DBL_MIN__ #define LDBL_MIN __LDBL_MIN__ #if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) || \ !defined(__STRICT_ANSI__) || \ (defined(__cplusplus) && __cplusplus >= 201703L) || \ (__STDC_HOSTED__ && defined(_AIX) && defined(_ALL_SOURCE)) # define FLT_TRUE_MIN __FLT_DENORM_MIN__ # define DBL_TRUE_MIN __DBL_DENORM_MIN__ # define LDBL_TRUE_MIN __LDBL_DENORM_MIN__ # define FLT_DECIMAL_DIG __FLT_DECIMAL_DIG__ # define DBL_DECIMAL_DIG __DBL_DECIMAL_DIG__ # define LDBL_DECIMAL_DIG __LDBL_DECIMAL_DIG__ # define FLT_HAS_SUBNORM __FLT_HAS_DENORM__ # define DBL_HAS_SUBNORM __DBL_HAS_DENORM__ # define LDBL_HAS_SUBNORM __LDBL_HAS_DENORM__ #endif #ifdef __STDC_WANT_IEC_60559_TYPES_EXT__ # define FLT16_MANT_DIG __FLT16_MANT_DIG__ # define FLT16_DECIMAL_DIG __FLT16_DECIMAL_DIG__ # define FLT16_DIG __FLT16_DIG__ # define FLT16_MIN_EXP __FLT16_MIN_EXP__ # define FLT16_MIN_10_EXP __FLT16_MIN_10_EXP__ # define FLT16_MAX_EXP __FLT16_MAX_EXP__ # define FLT16_MAX_10_EXP __FLT16_MAX_10_EXP__ # define FLT16_MAX __FLT16_MAX__ # define FLT16_EPSILON __FLT16_EPSILON__ # define FLT16_MIN __FLT16_MIN__ # define FLT16_TRUE_MIN __FLT16_TRUE_MIN__ #endif /* __STDC_WANT_IEC_60559_TYPES_EXT__ */ #endif /* __CLANG_FLOAT_H */ movdirintrin.hprfchwintrin.hshaintrin.h/*===---- shaintrin.h - SHA intrinsics -------------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __SHAINTRIN_H #define __SHAINTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sha"), __min_vector_width__(128))) /// Performs four iterations of the inner loop of the SHA-1 message digest /// algorithm using the starting SHA-1 state (A, B, C, D) from the 128-bit /// vector of [4 x i32] in \a V1 and the next four 32-bit elements of the /// message from the 128-bit vector of [4 x i32] in \a V2. Note that the /// SHA-1 state variable E must have already been added to \a V2 /// (\c _mm_sha1nexte_epu32() can perform this step). Returns the updated /// SHA-1 state (A, B, C, D) as a 128-bit vector of [4 x i32]. /// /// The SHA-1 algorithm has an inner loop of 80 iterations, twenty each /// with a different combining function and rounding constant. This /// intrinsic performs four iterations using a combining function and /// rounding constant selected by \a M[1:0]. /// /// \headerfile /// /// \code /// __m128i _mm_sha1rnds4_epu32(__m128i V1, __m128i V2, const int M); /// \endcode /// /// This intrinsic corresponds to the \c SHA1RNDS4 instruction. /// /// \param V1 /// A 128-bit vector of [4 x i32] containing the initial SHA-1 state. /// \param V2 /// A 128-bit vector of [4 x i32] containing the next four elements of /// the message, plus SHA-1 state variable E. /// \param M /// An immediate value where bits [1:0] select among four possible /// combining functions and rounding constants (not specified here). /// \returns A 128-bit vector of [4 x i32] containing the updated SHA-1 state. #define _mm_sha1rnds4_epu32(V1, V2, M) \ __builtin_ia32_sha1rnds4((__v4si)(__m128i)(V1), (__v4si)(__m128i)(V2), (M)) /// Calculates the SHA-1 state variable E from the SHA-1 state variables in /// the 128-bit vector of [4 x i32] in \a __X, adds that to the next set of /// four message elements in the 128-bit vector of [4 x i32] in \a __Y, and /// returns the result. /// /// \headerfile /// /// This intrinsic corresponds to the \c SHA1NEXTE instruction. /// /// \param __X /// A 128-bit vector of [4 x i32] containing the current SHA-1 state. /// \param __Y /// A 128-bit vector of [4 x i32] containing the next four elements of the /// message. /// \returns A 128-bit vector of [4 x i32] containing the updated SHA-1 /// values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sha1nexte_epu32(__m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_sha1nexte((__v4si)__X, (__v4si)__Y); } /// Performs an intermediate calculation for deriving the next four SHA-1 /// message elements using previous message elements from the 128-bit /// vectors of [4 x i32] in \a __X and \a __Y, and returns the result. /// /// \headerfile /// /// This intrinsic corresponds to the \c SHA1MSG1 instruction. /// /// \param __X /// A 128-bit vector of [4 x i32] containing previous message elements. /// \param __Y /// A 128-bit vector of [4 x i32] containing previous message elements. /// \returns A 128-bit vector of [4 x i32] containing the derived SHA-1 /// elements. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sha1msg1_epu32(__m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_sha1msg1((__v4si)__X, (__v4si)__Y); } /// Performs the final calculation for deriving the next four SHA-1 message /// elements using previous message elements from the 128-bit vectors of /// [4 x i32] in \a __X and \a __Y, and returns the result. /// /// \headerfile /// /// This intrinsic corresponds to the \c SHA1MSG2 instruction. /// /// \param __X /// A 128-bit vector of [4 x i32] containing an intermediate result. /// \param __Y /// A 128-bit vector of [4 x i32] containing previous message values. /// \returns A 128-bit vector of [4 x i32] containing the updated SHA-1 /// values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sha1msg2_epu32(__m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_sha1msg2((__v4si)__X, (__v4si)__Y); } /// Performs two rounds of SHA-256 operation using the following inputs: a /// starting SHA-256 state (C, D, G, H) from the 128-bit vector of /// [4 x i32] in \a __X; a starting SHA-256 state (A, B, E, F) from the /// 128-bit vector of [4 x i32] in \a __Y; and a pre-computed sum of the /// next two message elements (unsigned 32-bit integers) and corresponding /// rounding constants from the 128-bit vector of [4 x i32] in \a __Z. /// Returns the updated SHA-256 state (A, B, E, F) as a 128-bit vector of /// [4 x i32]. /// /// The SHA-256 algorithm has a core loop of 64 iterations. This intrinsic /// performs two of those iterations. /// /// \headerfile /// /// This intrinsic corresponds to the \c SHA256RNDS2 instruction. /// /// \param __X /// A 128-bit vector of [4 x i32] containing part of the initial SHA-256 /// state. /// \param __Y /// A 128-bit vector of [4 x i32] containing part of the initial SHA-256 /// state. /// \param __Z /// A 128-bit vector of [4 x i32] containing additional input to the /// SHA-256 operation. /// \returns A 128-bit vector of [4 x i32] containing the updated SHA-1 state. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sha256rnds2_epu32(__m128i __X, __m128i __Y, __m128i __Z) { return (__m128i)__builtin_ia32_sha256rnds2((__v4si)__X, (__v4si)__Y, (__v4si)__Z); } /// Performs an intermediate calculation for deriving the next four SHA-256 /// message elements using previous message elements from the 128-bit /// vectors of [4 x i32] in \a __X and \a __Y, and returns the result. /// /// \headerfile /// /// This intrinsic corresponds to the \c SHA256MSG1 instruction. /// /// \param __X /// A 128-bit vector of [4 x i32] containing previous message elements. /// \param __Y /// A 128-bit vector of [4 x i32] containing previous message elements. /// \returns A 128-bit vector of [4 x i32] containing the updated SHA-256 /// values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sha256msg1_epu32(__m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_sha256msg1((__v4si)__X, (__v4si)__Y); } /// Performs the final calculation for deriving the next four SHA-256 message /// elements using previous message elements from the 128-bit vectors of /// [4 x i32] in \a __X and \a __Y, and returns the result. /// /// \headerfile /// /// This intrinsic corresponds to the \c SHA256MSG2 instruction. /// /// \param __X /// A 128-bit vector of [4 x i32] containing an intermediate result. /// \param __Y /// A 128-bit vector of [4 x i32] containing previous message values. /// \returns A 128-bit vector of [4 x i32] containing the updated SHA-256 /// values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sha256msg2_epu32(__m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_sha256msg2((__v4si)__X, (__v4si)__Y); } #undef __DEFAULT_FN_ATTRS #endif /* __SHAINTRIN_H */ smmintrin.h/*===---- stddef.h - Basic type definitions --------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ /* * This header is designed to be included multiple times. If any of the __need_ * macros are defined, then only that subset of interfaces are provided. This * can be useful for POSIX headers that need to not expose all of stddef.h, but * need to use some of its interfaces. Otherwise this header provides all of * the expected interfaces. * * When clang modules are enabled, this header is a textual header. It ignores * its header guard so that multiple submodules can export its interfaces. * Take module SM with submodules A and B, whose headers both include stddef.h * When SM.A builds, __STDDEF_H will be defined. When SM.B builds, the * definition from SM.A will leak when building without local submodule * visibility. stddef.h wouldn't include any of its implementation headers, and * SM.B wouldn't import any of the stddef modules, and SM.B's `export *` * wouldn't export any stddef interfaces as expected. However, since stddef.h * ignores its header guard when building with modules, it all works as * expected. * * When clang modules are not enabled, the header guards can function in the * normal simple fashion. */ #if !defined(__STDDEF_H) || __has_feature(modules) || \ (defined(__STDC_WANT_LIB_EXT1__) && __STDC_WANT_LIB_EXT1__ >= 1) || \ defined(__need_ptrdiff_t) || defined(__need_size_t) || \ defined(__need_rsize_t) || defined(__need_wchar_t) || \ defined(__need_NULL) || defined(__need_nullptr_t) || \ defined(__need_unreachable) || defined(__need_max_align_t) || \ defined(__need_offsetof) || defined(__need_wint_t) #if !defined(__need_ptrdiff_t) && !defined(__need_size_t) && \ !defined(__need_rsize_t) && !defined(__need_wchar_t) && \ !defined(__need_NULL) && !defined(__need_nullptr_t) && \ !defined(__need_unreachable) && !defined(__need_max_align_t) && \ !defined(__need_offsetof) && !defined(__need_wint_t) #define __STDDEF_H #define __need_ptrdiff_t #define __need_size_t /* ISO9899:2011 7.20 (C11 Annex K): Define rsize_t if __STDC_WANT_LIB_EXT1__ is * enabled. */ #if defined(__STDC_WANT_LIB_EXT1__) && __STDC_WANT_LIB_EXT1__ >= 1 #define __need_rsize_t #endif #define __need_wchar_t #define __need_NULL #if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L) || \ defined(__cplusplus) #define __need_nullptr_t #endif #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L #define __need_unreachable #endif #if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) || \ (defined(__cplusplus) && __cplusplus >= 201103L) #define __need_max_align_t #endif #define __need_offsetof /* wint_t is provided by and not . It's here * for compatibility, but must be explicitly requested. Therefore * __need_wint_t is intentionally not defined here. */ #endif #if defined(__need_ptrdiff_t) #include <__stddef_ptrdiff_t.h> #undef __need_ptrdiff_t #endif /* defined(__need_ptrdiff_t) */ #if defined(__need_size_t) #include <__stddef_size_t.h> #undef __need_size_t #endif /*defined(__need_size_t) */ #if defined(__need_rsize_t) #include <__stddef_rsize_t.h> #undef __need_rsize_t #endif /* defined(__need_rsize_t) */ #if defined(__need_wchar_t) #include <__stddef_wchar_t.h> #undef __need_wchar_t #endif /* defined(__need_wchar_t) */ #if defined(__need_NULL) #include <__stddef_null.h> #undef __need_NULL #endif /* defined(__need_NULL) */ #if defined(__need_nullptr_t) #include <__stddef_nullptr_t.h> #undef __need_nullptr_t #endif /* defined(__need_nullptr_t) */ #if defined(__need_unreachable) #include <__stddef_unreachable.h> #undef __need_unreachable #endif /* defined(__need_unreachable) */ #if defined(__need_max_align_t) #include <__stddef_max_align_t.h> #undef __need_max_align_t #endif /* defined(__need_max_align_t) */ #if defined(__need_offsetof) #include <__stddef_offsetof.h> #undef __need_offsetof #endif /* defined(__need_offsetof) */ /* Some C libraries expect to see a wint_t here. Others (notably MinGW) will use __WINT_TYPE__ directly; accommodate both by requiring __need_wint_t */ #if defined(__need_wint_t) #include <__stddef_wint_t.h> #undef __need_wint_t #endif /* __need_wint_t */ #endif /*===---- tmmintrin.h - SSSE3 intrinsics -----------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __TMMINTRIN_H #define __TMMINTRIN_H #if !defined(__i386__) && !defined(__x86_64__) #error "This header is only meant to be used on x86 and x64 architecture" #endif #include /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, \ __target__("ssse3,no-evex512"), __min_vector_width__(64))) #define __DEFAULT_FN_ATTRS_MMX \ __attribute__((__always_inline__, __nodebug__, \ __target__("mmx,ssse3,no-evex512"), \ __min_vector_width__(64))) /// Computes the absolute value of each of the packed 8-bit signed /// integers in the source operand and stores the 8-bit unsigned integer /// results in the destination. /// /// \headerfile /// /// This intrinsic corresponds to the \c PABSB instruction. /// /// \param __a /// A 64-bit vector of [8 x i8]. /// \returns A 64-bit integer vector containing the absolute values of the /// elements in the operand. static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_abs_pi8(__m64 __a) { return (__m64)__builtin_ia32_pabsb((__v8qi)__a); } /// Computes the absolute value of each of the packed 8-bit signed /// integers in the source operand and stores the 8-bit unsigned integer /// results in the destination. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPABSB instruction. /// /// \param __a /// A 128-bit vector of [16 x i8]. /// \returns A 128-bit integer vector containing the absolute values of the /// elements in the operand. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_abs_epi8(__m128i __a) { return (__m128i)__builtin_elementwise_abs((__v16qs)__a); } /// Computes the absolute value of each of the packed 16-bit signed /// integers in the source operand and stores the 16-bit unsigned integer /// results in the destination. /// /// \headerfile /// /// This intrinsic corresponds to the \c PABSW instruction. /// /// \param __a /// A 64-bit vector of [4 x i16]. /// \returns A 64-bit integer vector containing the absolute values of the /// elements in the operand. static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_abs_pi16(__m64 __a) { return (__m64)__builtin_ia32_pabsw((__v4hi)__a); } /// Computes the absolute value of each of the packed 16-bit signed /// integers in the source operand and stores the 16-bit unsigned integer /// results in the destination. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPABSW instruction. /// /// \param __a /// A 128-bit vector of [8 x i16]. /// \returns A 128-bit integer vector containing the absolute values of the /// elements in the operand. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_abs_epi16(__m128i __a) { return (__m128i)__builtin_elementwise_abs((__v8hi)__a); } /// Computes the absolute value of each of the packed 32-bit signed /// integers in the source operand and stores the 32-bit unsigned integer /// results in the destination. /// /// \headerfile /// /// This intrinsic corresponds to the \c PABSD instruction. /// /// \param __a /// A 64-bit vector of [2 x i32]. /// \returns A 64-bit integer vector containing the absolute values of the /// elements in the operand. static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_abs_pi32(__m64 __a) { return (__m64)__builtin_ia32_pabsd((__v2si)__a); } /// Computes the absolute value of each of the packed 32-bit signed /// integers in the source operand and stores the 32-bit unsigned integer /// results in the destination. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPABSD instruction. /// /// \param __a /// A 128-bit vector of [4 x i32]. /// \returns A 128-bit integer vector containing the absolute values of the /// elements in the operand. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_abs_epi32(__m128i __a) { return (__m128i)__builtin_elementwise_abs((__v4si)__a); } /// Concatenates the two 128-bit integer vector operands, and /// right-shifts the result by the number of bytes specified in the immediate /// operand. /// /// \headerfile /// /// \code /// __m128i _mm_alignr_epi8(__m128i a, __m128i b, const int n); /// \endcode /// /// This intrinsic corresponds to the \c PALIGNR instruction. /// /// \param a /// A 128-bit vector of [16 x i8] containing one of the source operands. /// \param b /// A 128-bit vector of [16 x i8] containing one of the source operands. /// \param n /// An immediate operand specifying how many bytes to right-shift the result. /// \returns A 128-bit integer vector containing the concatenated right-shifted /// value. #define _mm_alignr_epi8(a, b, n) \ ((__m128i)__builtin_ia32_palignr128((__v16qi)(__m128i)(a), \ (__v16qi)(__m128i)(b), (n))) /// Concatenates the two 64-bit integer vector operands, and right-shifts /// the result by the number of bytes specified in the immediate operand. /// /// \headerfile /// /// \code /// __m64 _mm_alignr_pi8(__m64 a, __m64 b, const int n); /// \endcode /// /// This intrinsic corresponds to the \c PALIGNR instruction. /// /// \param a /// A 64-bit vector of [8 x i8] containing one of the source operands. /// \param b /// A 64-bit vector of [8 x i8] containing one of the source operands. /// \param n /// An immediate operand specifying how many bytes to right-shift the result. /// \returns A 64-bit integer vector containing the concatenated right-shifted /// value. #define _mm_alignr_pi8(a, b, n) \ ((__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n))) /// Horizontally adds the adjacent pairs of values contained in 2 packed /// 128-bit vectors of [8 x i16]. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPHADDW instruction. /// /// \param __a /// A 128-bit vector of [8 x i16] containing one of the source operands. The /// horizontal sums of the values are stored in the lower bits of the /// destination. /// \param __b /// A 128-bit vector of [8 x i16] containing one of the source operands. The /// horizontal sums of the values are stored in the upper bits of the /// destination. /// \returns A 128-bit vector of [8 x i16] containing the horizontal sums of /// both operands. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hadd_epi16(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b); } /// Horizontally adds the adjacent pairs of values contained in 2 packed /// 128-bit vectors of [4 x i32]. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPHADDD instruction. /// /// \param __a /// A 128-bit vector of [4 x i32] containing one of the source operands. The /// horizontal sums of the values are stored in the lower bits of the /// destination. /// \param __b /// A 128-bit vector of [4 x i32] containing one of the source operands. The /// horizontal sums of the values are stored in the upper bits of the /// destination. /// \returns A 128-bit vector of [4 x i32] containing the horizontal sums of /// both operands. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hadd_epi32(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b); } /// Horizontally adds the adjacent pairs of values contained in 2 packed /// 64-bit vectors of [4 x i16]. /// /// \headerfile /// /// This intrinsic corresponds to the \c PHADDW instruction. /// /// \param __a /// A 64-bit vector of [4 x i16] containing one of the source operands. The /// horizontal sums of the values are stored in the lower bits of the /// destination. /// \param __b /// A 64-bit vector of [4 x i16] containing one of the source operands. The /// horizontal sums of the values are stored in the upper bits of the /// destination. /// \returns A 64-bit vector of [4 x i16] containing the horizontal sums of both /// operands. static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_hadd_pi16(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_phaddw((__v4hi)__a, (__v4hi)__b); } /// Horizontally adds the adjacent pairs of values contained in 2 packed /// 64-bit vectors of [2 x i32]. /// /// \headerfile /// /// This intrinsic corresponds to the \c PHADDD instruction. /// /// \param __a /// A 64-bit vector of [2 x i32] containing one of the source operands. The /// horizontal sums of the values are stored in the lower bits of the /// destination. /// \param __b /// A 64-bit vector of [2 x i32] containing one of the source operands. The /// horizontal sums of the values are stored in the upper bits of the /// destination. /// \returns A 64-bit vector of [2 x i32] containing the horizontal sums of both /// operands. static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_hadd_pi32(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_phaddd((__v2si)__a, (__v2si)__b); } /// Horizontally adds, with saturation, the adjacent pairs of values contained /// in two packed 128-bit vectors of [8 x i16]. /// /// Positive sums greater than 0x7FFF are saturated to 0x7FFF. Negative sums /// less than 0x8000 are saturated to 0x8000. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPHADDSW instruction. /// /// \param __a /// A 128-bit vector of [8 x i16] containing one of the source operands. The /// horizontal sums of the values are stored in the lower bits of the /// destination. /// \param __b /// A 128-bit vector of [8 x i16] containing one of the source operands. The /// horizontal sums of the values are stored in the upper bits of the /// destination. /// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated /// sums of both operands. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hadds_epi16(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b); } /// Horizontally adds, with saturation, the adjacent pairs of values contained /// in two packed 64-bit vectors of [4 x i16]. /// /// Positive sums greater than 0x7FFF are saturated to 0x7FFF. Negative sums /// less than 0x8000 are saturated to 0x8000. /// /// \headerfile /// /// This intrinsic corresponds to the \c PHADDSW instruction. /// /// \param __a /// A 64-bit vector of [4 x i16] containing one of the source operands. The /// horizontal sums of the values are stored in the lower bits of the /// destination. /// \param __b /// A 64-bit vector of [4 x i16] containing one of the source operands. The /// horizontal sums of the values are stored in the upper bits of the /// destination. /// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated /// sums of both operands. static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_hadds_pi16(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_phaddsw((__v4hi)__a, (__v4hi)__b); } /// Horizontally subtracts the adjacent pairs of values contained in 2 /// packed 128-bit vectors of [8 x i16]. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPHSUBW instruction. /// /// \param __a /// A 128-bit vector of [8 x i16] containing one of the source operands. The /// horizontal differences between the values are stored in the lower bits of /// the destination. /// \param __b /// A 128-bit vector of [8 x i16] containing one of the source operands. The /// horizontal differences between the values are stored in the upper bits of /// the destination. /// \returns A 128-bit vector of [8 x i16] containing the horizontal differences /// of both operands. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hsub_epi16(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b); } /// Horizontally subtracts the adjacent pairs of values contained in 2 /// packed 128-bit vectors of [4 x i32]. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPHSUBD instruction. /// /// \param __a /// A 128-bit vector of [4 x i32] containing one of the source operands. The /// horizontal differences between the values are stored in the lower bits of /// the destination. /// \param __b /// A 128-bit vector of [4 x i32] containing one of the source operands. The /// horizontal differences between the values are stored in the upper bits of /// the destination. /// \returns A 128-bit vector of [4 x i32] containing the horizontal differences /// of both operands. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hsub_epi32(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b); } /// Horizontally subtracts the adjacent pairs of values contained in 2 /// packed 64-bit vectors of [4 x i16]. /// /// \headerfile /// /// This intrinsic corresponds to the \c PHSUBW instruction. /// /// \param __a /// A 64-bit vector of [4 x i16] containing one of the source operands. The /// horizontal differences between the values are stored in the lower bits of /// the destination. /// \param __b /// A 64-bit vector of [4 x i16] containing one of the source operands. The /// horizontal differences between the values are stored in the upper bits of /// the destination. /// \returns A 64-bit vector of [4 x i16] containing the horizontal differences /// of both operands. static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_hsub_pi16(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_phsubw((__v4hi)__a, (__v4hi)__b); } /// Horizontally subtracts the adjacent pairs of values contained in 2 /// packed 64-bit vectors of [2 x i32]. /// /// \headerfile /// /// This intrinsic corresponds to the \c PHSUBD instruction. /// /// \param __a /// A 64-bit vector of [2 x i32] containing one of the source operands. The /// horizontal differences between the values are stored in the lower bits of /// the destination. /// \param __b /// A 64-bit vector of [2 x i32] containing one of the source operands. The /// horizontal differences between the values are stored in the upper bits of /// the destination. /// \returns A 64-bit vector of [2 x i32] containing the horizontal differences /// of both operands. static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_hsub_pi32(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_phsubd((__v2si)__a, (__v2si)__b); } /// Horizontally subtracts, with saturation, the adjacent pairs of values /// contained in two packed 128-bit vectors of [8 x i16]. /// /// Positive differences greater than 0x7FFF are saturated to 0x7FFF. /// Negative differences less than 0x8000 are saturated to 0x8000. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPHSUBSW instruction. /// /// \param __a /// A 128-bit vector of [8 x i16] containing one of the source operands. The /// horizontal differences between the values are stored in the lower bits of /// the destination. /// \param __b /// A 128-bit vector of [8 x i16] containing one of the source operands. The /// horizontal differences between the values are stored in the upper bits of /// the destination. /// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated /// differences of both operands. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hsubs_epi16(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b); } /// Horizontally subtracts, with saturation, the adjacent pairs of values /// contained in two packed 64-bit vectors of [4 x i16]. /// /// Positive differences greater than 0x7FFF are saturated to 0x7FFF. /// Negative differences less than 0x8000 are saturated to 0x8000. /// /// \headerfile /// /// This intrinsic corresponds to the \c PHSUBSW instruction. /// /// \param __a /// A 64-bit vector of [4 x i16] containing one of the source operands. The /// horizontal differences between the values are stored in the lower bits of /// the destination. /// \param __b /// A 64-bit vector of [4 x i16] containing one of the source operands. The /// horizontal differences between the values are stored in the upper bits of /// the destination. /// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated /// differences of both operands. static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_hsubs_pi16(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_phsubsw((__v4hi)__a, (__v4hi)__b); } /// Multiplies corresponding pairs of packed 8-bit unsigned integer /// values contained in the first source operand and packed 8-bit signed /// integer values contained in the second source operand, adds pairs of /// contiguous products with signed saturation, and writes the 16-bit sums to /// the corresponding bits in the destination. /// /// For example, bits [7:0] of both operands are multiplied, bits [15:8] of /// both operands are multiplied, and the sum of both results is written to /// bits [15:0] of the destination. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMADDUBSW instruction. /// /// \param __a /// A 128-bit integer vector containing the first source operand. /// \param __b /// A 128-bit integer vector containing the second source operand. /// \returns A 128-bit integer vector containing the sums of products of both /// operands: \n /// \a R0 := (\a __a0 * \a __b0) + (\a __a1 * \a __b1) \n /// \a R1 := (\a __a2 * \a __b2) + (\a __a3 * \a __b3) \n /// \a R2 := (\a __a4 * \a __b4) + (\a __a5 * \a __b5) \n /// \a R3 := (\a __a6 * \a __b6) + (\a __a7 * \a __b7) \n /// \a R4 := (\a __a8 * \a __b8) + (\a __a9 * \a __b9) \n /// \a R5 := (\a __a10 * \a __b10) + (\a __a11 * \a __b11) \n /// \a R6 := (\a __a12 * \a __b12) + (\a __a13 * \a __b13) \n /// \a R7 := (\a __a14 * \a __b14) + (\a __a15 * \a __b15) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maddubs_epi16(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)__a, (__v16qi)__b); } /// Multiplies corresponding pairs of packed 8-bit unsigned integer /// values contained in the first source operand and packed 8-bit signed /// integer values contained in the second source operand, adds pairs of /// contiguous products with signed saturation, and writes the 16-bit sums to /// the corresponding bits in the destination. /// /// For example, bits [7:0] of both operands are multiplied, bits [15:8] of /// both operands are multiplied, and the sum of both results is written to /// bits [15:0] of the destination. /// /// \headerfile /// /// This intrinsic corresponds to the \c PMADDUBSW instruction. /// /// \param __a /// A 64-bit integer vector containing the first source operand. /// \param __b /// A 64-bit integer vector containing the second source operand. /// \returns A 64-bit integer vector containing the sums of products of both /// operands: \n /// \a R0 := (\a __a0 * \a __b0) + (\a __a1 * \a __b1) \n /// \a R1 := (\a __a2 * \a __b2) + (\a __a3 * \a __b3) \n /// \a R2 := (\a __a4 * \a __b4) + (\a __a5 * \a __b5) \n /// \a R3 := (\a __a6 * \a __b6) + (\a __a7 * \a __b7) static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_maddubs_pi16(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_pmaddubsw((__v8qi)__a, (__v8qi)__b); } /// Multiplies packed 16-bit signed integer values, truncates the 32-bit /// products to the 18 most significant bits by right-shifting, rounds the /// truncated value by adding 1, and writes bits [16:1] to the destination. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMULHRSW instruction. /// /// \param __a /// A 128-bit vector of [8 x i16] containing one of the source operands. /// \param __b /// A 128-bit vector of [8 x i16] containing one of the source operands. /// \returns A 128-bit vector of [8 x i16] containing the rounded and scaled /// products of both operands. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhrs_epi16(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b); } /// Multiplies packed 16-bit signed integer values, truncates the 32-bit /// products to the 18 most significant bits by right-shifting, rounds the /// truncated value by adding 1, and writes bits [16:1] to the destination. /// /// \headerfile /// /// This intrinsic corresponds to the \c PMULHRSW instruction. /// /// \param __a /// A 64-bit vector of [4 x i16] containing one of the source operands. /// \param __b /// A 64-bit vector of [4 x i16] containing one of the source operands. /// \returns A 64-bit vector of [4 x i16] containing the rounded and scaled /// products of both operands. static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_mulhrs_pi16(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_pmulhrsw((__v4hi)__a, (__v4hi)__b); } /// Copies the 8-bit integers from a 128-bit integer vector to the /// destination or clears 8-bit values in the destination, as specified by /// the second source operand. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSHUFB instruction. /// /// \param __a /// A 128-bit integer vector containing the values to be copied. /// \param __b /// A 128-bit integer vector containing control bytes corresponding to /// positions in the destination: /// Bit 7: \n /// 1: Clear the corresponding byte in the destination. \n /// 0: Copy the selected source byte to the corresponding byte in the /// destination. \n /// Bits [6:4] Reserved. \n /// Bits [3:0] select the source byte to be copied. /// \returns A 128-bit integer vector containing the copied or cleared values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_shuffle_epi8(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_pshufb128((__v16qi)__a, (__v16qi)__b); } /// Copies the 8-bit integers from a 64-bit integer vector to the /// destination or clears 8-bit values in the destination, as specified by /// the second source operand. /// /// \headerfile /// /// This intrinsic corresponds to the \c PSHUFB instruction. /// /// \param __a /// A 64-bit integer vector containing the values to be copied. /// \param __b /// A 64-bit integer vector containing control bytes corresponding to /// positions in the destination: /// Bit 7: \n /// 1: Clear the corresponding byte in the destination. \n /// 0: Copy the selected source byte to the corresponding byte in the /// destination. \n /// Bits [3:0] select the source byte to be copied. /// \returns A 64-bit integer vector containing the copied or cleared values. static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_shuffle_pi8(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_pshufb((__v8qi)__a, (__v8qi)__b); } /// For each 8-bit integer in the first source operand, perform one of /// the following actions as specified by the second source operand. /// /// If the byte in the second source is negative, calculate the two's /// complement of the corresponding byte in the first source, and write that /// value to the destination. If the byte in the second source is positive, /// copy the corresponding byte from the first source to the destination. If /// the byte in the second source is zero, clear the corresponding byte in /// the destination. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSIGNB instruction. /// /// \param __a /// A 128-bit integer vector containing the values to be copied. /// \param __b /// A 128-bit integer vector containing control bytes corresponding to /// positions in the destination. /// \returns A 128-bit integer vector containing the resultant values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sign_epi8(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b); } /// For each 16-bit integer in the first source operand, perform one of /// the following actions as specified by the second source operand. /// /// If the word in the second source is negative, calculate the two's /// complement of the corresponding word in the first source, and write that /// value to the destination. If the word in the second source is positive, /// copy the corresponding word from the first source to the destination. If /// the word in the second source is zero, clear the corresponding word in /// the destination. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSIGNW instruction. /// /// \param __a /// A 128-bit integer vector containing the values to be copied. /// \param __b /// A 128-bit integer vector containing control words corresponding to /// positions in the destination. /// \returns A 128-bit integer vector containing the resultant values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sign_epi16(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b); } /// For each 32-bit integer in the first source operand, perform one of /// the following actions as specified by the second source operand. /// /// If the doubleword in the second source is negative, calculate the two's /// complement of the corresponding word in the first source, and write that /// value to the destination. If the doubleword in the second source is /// positive, copy the corresponding word from the first source to the /// destination. If the doubleword in the second source is zero, clear the /// corresponding word in the destination. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSIGND instruction. /// /// \param __a /// A 128-bit integer vector containing the values to be copied. /// \param __b /// A 128-bit integer vector containing control doublewords corresponding to /// positions in the destination. /// \returns A 128-bit integer vector containing the resultant values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sign_epi32(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b); } /// For each 8-bit integer in the first source operand, perform one of /// the following actions as specified by the second source operand. /// /// If the byte in the second source is negative, calculate the two's /// complement of the corresponding byte in the first source, and write that /// value to the destination. If the byte in the second source is positive, /// copy the corresponding byte from the first source to the destination. If /// the byte in the second source is zero, clear the corresponding byte in /// the destination. /// /// \headerfile /// /// This intrinsic corresponds to the \c PSIGNB instruction. /// /// \param __a /// A 64-bit integer vector containing the values to be copied. /// \param __b /// A 64-bit integer vector containing control bytes corresponding to /// positions in the destination. /// \returns A 64-bit integer vector containing the resultant values. static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_sign_pi8(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_psignb((__v8qi)__a, (__v8qi)__b); } /// For each 16-bit integer in the first source operand, perform one of /// the following actions as specified by the second source operand. /// /// If the word in the second source is negative, calculate the two's /// complement of the corresponding word in the first source, and write that /// value to the destination. If the word in the second source is positive, /// copy the corresponding word from the first source to the destination. If /// the word in the second source is zero, clear the corresponding word in /// the destination. /// /// \headerfile /// /// This intrinsic corresponds to the \c PSIGNW instruction. /// /// \param __a /// A 64-bit integer vector containing the values to be copied. /// \param __b /// A 64-bit integer vector containing control words corresponding to /// positions in the destination. /// \returns A 64-bit integer vector containing the resultant values. static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_sign_pi16(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_psignw((__v4hi)__a, (__v4hi)__b); } /// For each 32-bit integer in the first source operand, perform one of /// the following actions as specified by the second source operand. /// /// If the doubleword in the second source is negative, calculate the two's /// complement of the corresponding doubleword in the first source, and /// write that value to the destination. If the doubleword in the second /// source is positive, copy the corresponding doubleword from the first /// source to the destination. If the doubleword in the second source is /// zero, clear the corresponding doubleword in the destination. /// /// \headerfile /// /// This intrinsic corresponds to the \c PSIGND instruction. /// /// \param __a /// A 64-bit integer vector containing the values to be copied. /// \param __b /// A 64-bit integer vector containing two control doublewords corresponding /// to positions in the destination. /// \returns A 64-bit integer vector containing the resultant values. static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_sign_pi32(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_psignd((__v2si)__a, (__v2si)__b); } #undef __DEFAULT_FN_ATTRS #undef __DEFAULT_FN_ATTRS_MMX #endif /* __TMMINTRIN_H */ uintrintrin.husermsrintrin.h//===-- Wrapper for C standard time.h declarations on the GPU -------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef __CLANG_LLVM_LIBC_WRAPPERS_TIME_H__ #define __CLANG_LLVM_LIBC_WRAPPERS_TIME_H__ #if !defined(_OPENMP) && !defined(__HIP__) && !defined(__CUDA__) #error "This file is for GPU offloading compilation only" #endif #include_next #if __has_include() #if defined(__HIP__) || defined(__CUDA__) #define __LIBC_ATTRS __attribute__((device)) #endif #pragma omp begin declare target _Static_assert(sizeof(clock_t) == sizeof(long), "ABI mismatch!"); #include #pragma omp end declare target #endif #endif // __CLANG_LLVM_LIBC_WRAPPERS_TIME_H__ kytheWalk NULL[:^punct:]Simplify failed on (?-m:^)Kayah_LiKhmerTagalogTakrisigaltstack() failed with errno=%dUnsorted addr map entry: 0x%lx: %s <-> 0x%lx: %sReading %zu bytes from offset %jd returned %zd which is not a multiple of %zu.ReadFromOffset read too much data.unexpected errnobool[abi: [external/abseil-cpp/absl/strings/internal/cord_internal.ccchild_lengthNode(slow releaseMutex::Fer with pending CondVar queueingSignal wakeupReaderLock returning external/boringssl/src/crypto/fipsmodule/cipher/cipher.cexternal/boringssl/src/crypto/fipsmodule/ecdh/ecdh.c Calculated: Z Computation ResultFFDH failed. invalid library (0)RSA routinesEVP_LIBHKDF functionsTrust Token functionsUser defined functionsexternal/boringssl/src/crypto/bio/bio.c-DKYTHE_IS_RUNNING=1.>): #ifdef__clang_cuda_complex_builtins.h/*===---- __stddef_wint.h - Definition of wint_t ---------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef _WINT_T #define _WINT_T typedef __WINT_TYPE__ wint_t; #endif /*===---- arm_neon_sve_bridge.h - ARM NEON SVE Bridge intrinsics -----------=== * * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __ARM_NEON_SVE_BRIDGE_H #define __ARM_NEON_SVE_BRIDGE_H #include #include #ifdef __cplusplus extern "C" { #endif /* Function attributes */ #define __ai static __inline__ __attribute__((__always_inline__, __nodebug__)) #define __aio \ static __inline__ \ __attribute__((__always_inline__, __nodebug__, __overloadable__)) __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_s8))) svint8_t svset_neonq(svint8_t, int8x16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_s16))) svint16_t svset_neonq(svint16_t, int16x8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_s32))) svint32_t svset_neonq(svint32_t, int32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_s64))) svint64_t svset_neonq(svint64_t, int64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_u8))) svuint8_t svset_neonq(svuint8_t, uint8x16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_u16))) svuint16_t svset_neonq(svuint16_t, uint16x8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_u32))) svuint32_t svset_neonq(svuint32_t, uint32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_u64))) svuint64_t svset_neonq(svuint64_t, uint64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_f16))) svfloat16_t svset_neonq(svfloat16_t, float16x8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_f32))) svfloat32_t svset_neonq(svfloat32_t, float32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_f64))) svfloat64_t svset_neonq(svfloat64_t, float64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_s8))) svint8_t svset_neonq_s8(svint8_t, int8x16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_s16))) svint16_t svset_neonq_s16(svint16_t, int16x8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_s32))) svint32_t svset_neonq_s32(svint32_t, int32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_s64))) svint64_t svset_neonq_s64(svint64_t, int64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_u8))) svuint8_t svset_neonq_u8(svuint8_t, uint8x16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_u16))) svuint16_t svset_neonq_u16(svuint16_t, uint16x8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_u32))) svuint32_t svset_neonq_u32(svuint32_t, uint32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_u64))) svuint64_t svset_neonq_u64(svuint64_t, uint64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_f16))) svfloat16_t svset_neonq_f16(svfloat16_t, float16x8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_f32))) svfloat32_t svset_neonq_f32(svfloat32_t, float32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_f64))) svfloat64_t svset_neonq_f64(svfloat64_t, float64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_s8))) int8x16_t svget_neonq(svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_s16))) int16x8_t svget_neonq(svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_s32))) int32x4_t svget_neonq(svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_s64))) int64x2_t svget_neonq(svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_u8))) uint8x16_t svget_neonq(svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_u16))) uint16x8_t svget_neonq(svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_u32))) uint32x4_t svget_neonq(svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_u64))) uint64x2_t svget_neonq(svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_f16))) float16x8_t svget_neonq(svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_f32))) float32x4_t svget_neonq(svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_f64))) float64x2_t svget_neonq(svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_s8))) int8x16_t svget_neonq_s8(svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_s16))) int16x8_t svget_neonq_s16(svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_s32))) int32x4_t svget_neonq_s32(svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_s64))) int64x2_t svget_neonq_s64(svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_u8))) uint8x16_t svget_neonq_u8(svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_u16))) uint16x8_t svget_neonq_u16(svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_u32))) uint32x4_t svget_neonq_u32(svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_u64))) uint64x2_t svget_neonq_u64(svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_f16))) float16x8_t svget_neonq_f16(svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_f32))) float32x4_t svget_neonq_f32(svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_f64))) float64x2_t svget_neonq_f64(svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_s8))) svint8_t svdup_neonq(int8x16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_s16))) svint16_t svdup_neonq(int16x8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_s32))) svint32_t svdup_neonq(int32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_s64))) svint64_t svdup_neonq(int64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_u8))) svuint8_t svdup_neonq(uint8x16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_u16))) svuint16_t svdup_neonq(uint16x8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_u32))) svuint32_t svdup_neonq(uint32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_u64))) svuint64_t svdup_neonq(uint64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_f16))) svfloat16_t svdup_neonq(float16x8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_f32))) svfloat32_t svdup_neonq(float32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_f64))) svfloat64_t svdup_neonq(float64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_s8))) svint8_t svdup_neonq_s8(int8x16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_s16))) svint16_t svdup_neonq_s16(int16x8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_s32))) svint32_t svdup_neonq_s32(int32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_s64))) svint64_t svdup_neonq_s64(int64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_u8))) svuint8_t svdup_neonq_u8(uint8x16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_u16))) svuint16_t svdup_neonq_u16(uint16x8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_u32))) svuint32_t svdup_neonq_u32(uint32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_u64))) svuint64_t svdup_neonq_u64(uint64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_f16))) svfloat16_t svdup_neonq_f16(float16x8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_f32))) svfloat32_t svdup_neonq_f32(float32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_f64))) svfloat64_t svdup_neonq_f64(float64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_bf16))) svbfloat16_t svset_neonq(svbfloat16_t, bfloat16x8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_bf16))) svbfloat16_t svset_neonq_bf16(svbfloat16_t, bfloat16x8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_bf16))) bfloat16x8_t svget_neonq(svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_bf16))) bfloat16x8_t svget_neonq_bf16(svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_bf16))) svbfloat16_t svdup_neonq(bfloat16x8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_bf16))) svbfloat16_t svdup_neonq_bf16(bfloat16x8_t); #undef __ai #undef __aio #ifdef __cplusplus } // extern "C" #endif #endif //__ARM_NEON_SVE_BRIDGE_H /*===---- lwpintrin.h - LWP intrinsics -------------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __X86INTRIN_H #error "Never use directly; include instead." #endif #ifndef __LWPINTRIN_H #define __LWPINTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("lwp"))) /// Parses the LWPCB at the specified address and enables /// profiling if valid. /// /// \headerfile /// /// This intrinsic corresponds to the LLWPCB instruction. /// /// \param __addr /// Address to the new Lightweight Profiling Control Block (LWPCB). If the /// LWPCB is valid, writes the address into the LWP_CBADDR MSR and enables /// Lightweight Profiling. static __inline__ void __DEFAULT_FN_ATTRS __llwpcb (void *__addr) { __builtin_ia32_llwpcb(__addr); } /// Flushes the LWP state to memory and returns the address of the LWPCB. /// /// \headerfile /// /// This intrinsic corresponds to the SLWPCB instruction. /// /// \return /// Address to the current Lightweight Profiling Control Block (LWPCB). /// If LWP is not currently enabled, returns NULL. static __inline__ void* __DEFAULT_FN_ATTRS __slwpcb (void) { return __builtin_ia32_slwpcb(); } /// Inserts programmed event record into the LWP event ring buffer /// and advances the ring buffer pointer. /// /// \headerfile /// /// This intrinsic corresponds to the LWPINS instruction. /// /// \param DATA2 /// A 32-bit value is zero-extended and inserted into the 64-bit Data2 field. /// \param DATA1 /// A 32-bit value is inserted into the 32-bit Data1 field. /// \param FLAGS /// A 32-bit immediate value is inserted into the 32-bit Flags field. /// \returns If the ring buffer is full and LWP is running in Synchronized Mode, /// the event record overwrites the last record in the buffer, the MissedEvents /// counter in the LWPCB is incremented, the head pointer is not advanced, and /// 1 is returned. Otherwise 0 is returned. #define __lwpins32(DATA2, DATA1, FLAGS) \ (__builtin_ia32_lwpins32((unsigned int) (DATA2), (unsigned int) (DATA1), \ (unsigned int) (FLAGS))) /// Decrements the LWP programmed value sample event counter. If the result is /// negative, inserts an event record into the LWP event ring buffer in memory /// and advances the ring buffer pointer. /// /// \headerfile /// /// This intrinsic corresponds to the LWPVAL instruction. /// /// \param DATA2 /// A 32-bit value is zero-extended and inserted into the 64-bit Data2 field. /// \param DATA1 /// A 32-bit value is inserted into the 32-bit Data1 field. /// \param FLAGS /// A 32-bit immediate value is inserted into the 32-bit Flags field. #define __lwpval32(DATA2, DATA1, FLAGS) \ (__builtin_ia32_lwpval32((unsigned int) (DATA2), (unsigned int) (DATA1), \ (unsigned int) (FLAGS))) #ifdef __x86_64__ /// Inserts programmed event record into the LWP event ring buffer /// and advances the ring buffer pointer. /// /// \headerfile /// /// This intrinsic corresponds to the LWPINS instruction. /// /// \param DATA2 /// A 64-bit value is inserted into the 64-bit Data2 field. /// \param DATA1 /// A 32-bit value is inserted into the 32-bit Data1 field. /// \param FLAGS /// A 32-bit immediate value is inserted into the 32-bit Flags field. /// \returns If the ring buffer is full and LWP is running in Synchronized Mode, /// the event record overwrites the last record in the buffer, the MissedEvents /// counter in the LWPCB is incremented, the head pointer is not advanced, and /// 1 is returned. Otherwise 0 is returned. #define __lwpins64(DATA2, DATA1, FLAGS) \ (__builtin_ia32_lwpins64((unsigned long long) (DATA2), (unsigned int) (DATA1), \ (unsigned int) (FLAGS))) /// Decrements the LWP programmed value sample event counter. If the result is /// negative, inserts an event record into the LWP event ring buffer in memory /// and advances the ring buffer pointer. /// /// \headerfile /// /// This intrinsic corresponds to the LWPVAL instruction. /// /// \param DATA2 /// A 64-bit value is and inserted into the 64-bit Data2 field. /// \param DATA1 /// A 32-bit value is inserted into the 32-bit Data1 field. /// \param FLAGS /// A 32-bit immediate value is inserted into the 32-bit Flags field. #define __lwpval64(DATA2, DATA1, FLAGS) \ (__builtin_ia32_lwpval64((unsigned long long) (DATA2), (unsigned int) (DATA1), \ (unsigned int) (FLAGS))) #endif #undef __DEFAULT_FN_ATTRS #endif /* __LWPINTRIN_H */ /*===---- mm3dnow.h - 3DNow! intrinsics ------------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef _MM3DNOW_H_INCLUDED #define _MM3DNOW_H_INCLUDED #include #include typedef float __v2sf __attribute__((__vector_size__(8))); /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("3dnow"), __min_vector_width__(64))) static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("3dnow"))) _m_femms(void) { __builtin_ia32_femms(); } static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pavgusb(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pavgusb((__v8qi)__m1, (__v8qi)__m2); } static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pf2id(__m64 __m) { return (__m64)__builtin_ia32_pf2id((__v2sf)__m); } static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pfacc(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pfacc((__v2sf)__m1, (__v2sf)__m2); } static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pfadd(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pfadd((__v2sf)__m1, (__v2sf)__m2); } static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pfcmpeq(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pfcmpeq((__v2sf)__m1, (__v2sf)__m2); } static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pfcmpge(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pfcmpge((__v2sf)__m1, (__v2sf)__m2); } static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pfcmpgt(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pfcmpgt((__v2sf)__m1, (__v2sf)__m2); } static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pfmax(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pfmax((__v2sf)__m1, (__v2sf)__m2); } static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pfmin(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pfmin((__v2sf)__m1, (__v2sf)__m2); } static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pfmul(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pfmul((__v2sf)__m1, (__v2sf)__m2); } static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pfrcp(__m64 __m) { return (__m64)__builtin_ia32_pfrcp((__v2sf)__m); } static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pfrcpit1(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pfrcpit1((__v2sf)__m1, (__v2sf)__m2); } static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pfrcpit2(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pfrcpit2((__v2sf)__m1, (__v2sf)__m2); } static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pfrsqrt(__m64 __m) { return (__m64)__builtin_ia32_pfrsqrt((__v2sf)__m); } static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pfrsqrtit1(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pfrsqit1((__v2sf)__m1, (__v2sf)__m2); } static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pfsub(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pfsub((__v2sf)__m1, (__v2sf)__m2); } static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pfsubr(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pfsubr((__v2sf)__m1, (__v2sf)__m2); } static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pi2fd(__m64 __m) { return (__m64)__builtin_ia32_pi2fd((__v2si)__m); } static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pmulhrw(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pmulhrw((__v4hi)__m1, (__v4hi)__m2); } /* Handle the 3dnowa instructions here. */ #undef __DEFAULT_FN_ATTRS #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("3dnowa"), __min_vector_width__(64))) static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pf2iw(__m64 __m) { return (__m64)__builtin_ia32_pf2iw((__v2sf)__m); } static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pfnacc(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pfnacc((__v2sf)__m1, (__v2sf)__m2); } static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pfpnacc(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pfpnacc((__v2sf)__m1, (__v2sf)__m2); } static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pi2fw(__m64 __m) { return (__m64)__builtin_ia32_pi2fw((__v2si)__m); } static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pswapdsf(__m64 __m) { return (__m64)__builtin_ia32_pswapdsf((__v2sf)__m); } static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pswapdsi(__m64 __m) { return (__m64)__builtin_ia32_pswapdsi((__v2si)__m); } #undef __DEFAULT_FN_ATTRS #endif /*===---- mmintrin.h - MMX intrinsics --------------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __MMINTRIN_H #define __MMINTRIN_H #if !defined(__i386__) && !defined(__x86_64__) #error "This header is only meant to be used on x86 and x64 architecture" #endif typedef long long __m64 __attribute__((__vector_size__(8), __aligned__(8))); typedef long long __v1di __attribute__((__vector_size__(8))); typedef int __v2si __attribute__((__vector_size__(8))); typedef short __v4hi __attribute__((__vector_size__(8))); typedef char __v8qi __attribute__((__vector_size__(8))); /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("mmx,no-evex512"), \ __min_vector_width__(64))) /// Clears the MMX state by setting the state of the x87 stack registers /// to empty. /// /// \headerfile /// /// This intrinsic corresponds to the EMMS instruction. /// static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("mmx,no-evex512"))) _mm_empty(void) { __builtin_ia32_emms(); } /// Constructs a 64-bit integer vector, setting the lower 32 bits to the /// value of the 32-bit integer parameter and setting the upper 32 bits to 0. /// /// \headerfile /// /// This intrinsic corresponds to the MOVD instruction. /// /// \param __i /// A 32-bit integer value. /// \returns A 64-bit integer vector. The lower 32 bits contain the value of the /// parameter. The upper 32 bits are set to 0. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cvtsi32_si64(int __i) { return (__m64)__builtin_ia32_vec_init_v2si(__i, 0); } /// Returns the lower 32 bits of a 64-bit integer vector as a 32-bit /// signed integer. /// /// \headerfile /// /// This intrinsic corresponds to the MOVD instruction. /// /// \param __m /// A 64-bit integer vector. /// \returns A 32-bit signed integer value containing the lower 32 bits of the /// parameter. static __inline__ int __DEFAULT_FN_ATTRS _mm_cvtsi64_si32(__m64 __m) { return __builtin_ia32_vec_ext_v2si((__v2si)__m, 0); } /// Casts a 64-bit signed integer value into a 64-bit integer vector. /// /// \headerfile /// /// This intrinsic corresponds to the MOVQ instruction. /// /// \param __i /// A 64-bit signed integer. /// \returns A 64-bit integer vector containing the same bitwise pattern as the /// parameter. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cvtsi64_m64(long long __i) { return (__m64)__i; } /// Casts a 64-bit integer vector into a 64-bit signed integer value. /// /// \headerfile /// /// This intrinsic corresponds to the MOVQ instruction. /// /// \param __m /// A 64-bit integer vector. /// \returns A 64-bit signed integer containing the same bitwise pattern as the /// parameter. static __inline__ long long __DEFAULT_FN_ATTRS _mm_cvtm64_si64(__m64 __m) { return (long long)__m; } /// Converts, with saturation, 16-bit signed integers from both 64-bit integer /// vector parameters of [4 x i16] into 8-bit signed integer values, and /// constructs a 64-bit integer vector of [8 x i8] as the result. /// /// Positive values greater than 0x7F are saturated to 0x7F. Negative values /// less than 0x80 are saturated to 0x80. /// /// \headerfile /// /// This intrinsic corresponds to the PACKSSWB instruction. /// /// \param __m1 /// A 64-bit integer vector of [4 x i16]. The converted [4 x i8] values are /// written to the lower 32 bits of the result. /// \param __m2 /// A 64-bit integer vector of [4 x i16]. The converted [4 x i8] values are /// written to the upper 32 bits of the result. /// \returns A 64-bit integer vector of [8 x i8] containing the converted /// values. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_packs_pi16(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_packsswb((__v4hi)__m1, (__v4hi)__m2); } /// Converts, with saturation, 32-bit signed integers from both 64-bit integer /// vector parameters of [2 x i32] into 16-bit signed integer values, and /// constructs a 64-bit integer vector of [4 x i16] as the result. /// /// Positive values greater than 0x7FFF are saturated to 0x7FFF. Negative /// values less than 0x8000 are saturated to 0x8000. /// /// \headerfile /// /// This intrinsic corresponds to the PACKSSDW instruction. /// /// \param __m1 /// A 64-bit integer vector of [2 x i32]. The converted [2 x i16] values are /// written to the lower 32 bits of the result. /// \param __m2 /// A 64-bit integer vector of [2 x i32]. The converted [2 x i16] values are /// written to the upper 32 bits of the result. /// \returns A 64-bit integer vector of [4 x i16] containing the converted /// values. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_packs_pi32(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_packssdw((__v2si)__m1, (__v2si)__m2); } /// Converts, with saturation, 16-bit signed integers from both 64-bit integer /// vector parameters of [4 x i16] into 8-bit unsigned integer values, and /// constructs a 64-bit integer vector of [8 x i8] as the result. /// /// Values greater than 0xFF are saturated to 0xFF. Values less than 0 are /// saturated to 0. /// /// \headerfile /// /// This intrinsic corresponds to the PACKUSWB instruction. /// /// \param __m1 /// A 64-bit integer vector of [4 x i16]. The converted [4 x i8] values are /// written to the lower 32 bits of the result. /// \param __m2 /// A 64-bit integer vector of [4 x i16]. The converted [4 x i8] values are /// written to the upper 32 bits of the result. /// \returns A 64-bit integer vector of [8 x i8] containing the converted /// values. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_packs_pu16(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_packuswb((__v4hi)__m1, (__v4hi)__m2); } /// Unpacks the upper 32 bits from two 64-bit integer vectors of [8 x i8] /// and interleaves them into a 64-bit integer vector of [8 x i8]. /// /// \headerfile /// /// This intrinsic corresponds to the PUNPCKHBW instruction. /// /// \param __m1 /// A 64-bit integer vector of [8 x i8]. \n /// Bits [39:32] are written to bits [7:0] of the result. \n /// Bits [47:40] are written to bits [23:16] of the result. \n /// Bits [55:48] are written to bits [39:32] of the result. \n /// Bits [63:56] are written to bits [55:48] of the result. /// \param __m2 /// A 64-bit integer vector of [8 x i8]. /// Bits [39:32] are written to bits [15:8] of the result. \n /// Bits [47:40] are written to bits [31:24] of the result. \n /// Bits [55:48] are written to bits [47:40] of the result. \n /// Bits [63:56] are written to bits [63:56] of the result. /// \returns A 64-bit integer vector of [8 x i8] containing the interleaved /// values. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_unpackhi_pi8(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_punpckhbw((__v8qi)__m1, (__v8qi)__m2); } /// Unpacks the upper 32 bits from two 64-bit integer vectors of /// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16]. /// /// \headerfile /// /// This intrinsic corresponds to the PUNPCKHWD instruction. /// /// \param __m1 /// A 64-bit integer vector of [4 x i16]. /// Bits [47:32] are written to bits [15:0] of the result. \n /// Bits [63:48] are written to bits [47:32] of the result. /// \param __m2 /// A 64-bit integer vector of [4 x i16]. /// Bits [47:32] are written to bits [31:16] of the result. \n /// Bits [63:48] are written to bits [63:48] of the result. /// \returns A 64-bit integer vector of [4 x i16] containing the interleaved /// values. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_unpackhi_pi16(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_punpckhwd((__v4hi)__m1, (__v4hi)__m2); } /// Unpacks the upper 32 bits from two 64-bit integer vectors of /// [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32]. /// /// \headerfile /// /// This intrinsic corresponds to the PUNPCKHDQ instruction. /// /// \param __m1 /// A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to /// the lower 32 bits of the result. /// \param __m2 /// A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to /// the upper 32 bits of the result. /// \returns A 64-bit integer vector of [2 x i32] containing the interleaved /// values. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_unpackhi_pi32(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_punpckhdq((__v2si)__m1, (__v2si)__m2); } /// Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8] /// and interleaves them into a 64-bit integer vector of [8 x i8]. /// /// \headerfile /// /// This intrinsic corresponds to the PUNPCKLBW instruction. /// /// \param __m1 /// A 64-bit integer vector of [8 x i8]. /// Bits [7:0] are written to bits [7:0] of the result. \n /// Bits [15:8] are written to bits [23:16] of the result. \n /// Bits [23:16] are written to bits [39:32] of the result. \n /// Bits [31:24] are written to bits [55:48] of the result. /// \param __m2 /// A 64-bit integer vector of [8 x i8]. /// Bits [7:0] are written to bits [15:8] of the result. \n /// Bits [15:8] are written to bits [31:24] of the result. \n /// Bits [23:16] are written to bits [47:40] of the result. \n /// Bits [31:24] are written to bits [63:56] of the result. /// \returns A 64-bit integer vector of [8 x i8] containing the interleaved /// values. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_unpacklo_pi8(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_punpcklbw((__v8qi)__m1, (__v8qi)__m2); } /// Unpacks the lower 32 bits from two 64-bit integer vectors of /// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16]. /// /// \headerfile /// /// This intrinsic corresponds to the PUNPCKLWD instruction. /// /// \param __m1 /// A 64-bit integer vector of [4 x i16]. /// Bits [15:0] are written to bits [15:0] of the result. \n /// Bits [31:16] are written to bits [47:32] of the result. /// \param __m2 /// A 64-bit integer vector of [4 x i16]. /// Bits [15:0] are written to bits [31:16] of the result. \n /// Bits [31:16] are written to bits [63:48] of the result. /// \returns A 64-bit integer vector of [4 x i16] containing the interleaved /// values. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_unpacklo_pi16(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_punpcklwd((__v4hi)__m1, (__v4hi)__m2); } /// Unpacks the lower 32 bits from two 64-bit integer vectors of /// [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32]. /// /// \headerfile /// /// This intrinsic corresponds to the PUNPCKLDQ instruction. /// /// \param __m1 /// A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to /// the lower 32 bits of the result. /// \param __m2 /// A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to /// the upper 32 bits of the result. /// \returns A 64-bit integer vector of [2 x i32] containing the interleaved /// values. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_unpacklo_pi32(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_punpckldq((__v2si)__m1, (__v2si)__m2); } /// Adds each 8-bit integer element of the first 64-bit integer vector /// of [8 x i8] to the corresponding 8-bit integer element of the second /// 64-bit integer vector of [8 x i8]. The lower 8 bits of the results are /// packed into a 64-bit integer vector of [8 x i8]. /// /// \headerfile /// /// This intrinsic corresponds to the PADDB instruction. /// /// \param __m1 /// A 64-bit integer vector of [8 x i8]. /// \param __m2 /// A 64-bit integer vector of [8 x i8]. /// \returns A 64-bit integer vector of [8 x i8] containing the sums of both /// parameters. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_add_pi8(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_paddb((__v8qi)__m1, (__v8qi)__m2); } /// Adds each 16-bit integer element of the first 64-bit integer vector /// of [4 x i16] to the corresponding 16-bit integer element of the second /// 64-bit integer vector of [4 x i16]. The lower 16 bits of the results are /// packed into a 64-bit integer vector of [4 x i16]. /// /// \headerfile /// /// This intrinsic corresponds to the PADDW instruction. /// /// \param __m1 /// A 64-bit integer vector of [4 x i16]. /// \param __m2 /// A 64-bit integer vector of [4 x i16]. /// \returns A 64-bit integer vector of [4 x i16] containing the sums of both /// parameters. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_add_pi16(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_paddw((__v4hi)__m1, (__v4hi)__m2); } /// Adds each 32-bit integer element of the first 64-bit integer vector /// of [2 x i32] to the corresponding 32-bit integer element of the second /// 64-bit integer vector of [2 x i32]. The lower 32 bits of the results are /// packed into a 64-bit integer vector of [2 x i32]. /// /// \headerfile /// /// This intrinsic corresponds to the PADDD instruction. /// /// \param __m1 /// A 64-bit integer vector of [2 x i32]. /// \param __m2 /// A 64-bit integer vector of [2 x i32]. /// \returns A 64-bit integer vector of [2 x i32] containing the sums of both /// parameters. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_add_pi32(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_paddd((__v2si)__m1, (__v2si)__m2); } /// Adds, with saturation, each 8-bit signed integer element of the first /// 64-bit integer vector of [8 x i8] to the corresponding 8-bit signed /// integer element of the second 64-bit integer vector of [8 x i8]. /// /// Positive sums greater than 0x7F are saturated to 0x7F. Negative sums /// less than 0x80 are saturated to 0x80. The results are packed into a /// 64-bit integer vector of [8 x i8]. /// /// \headerfile /// /// This intrinsic corresponds to the PADDSB instruction. /// /// \param __m1 /// A 64-bit integer vector of [8 x i8]. /// \param __m2 /// A 64-bit integer vector of [8 x i8]. /// \returns A 64-bit integer vector of [8 x i8] containing the saturated sums /// of both parameters. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_adds_pi8(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_paddsb((__v8qi)__m1, (__v8qi)__m2); } /// Adds, with saturation, each 16-bit signed integer element of the first /// 64-bit integer vector of [4 x i16] to the corresponding 16-bit signed /// integer element of the second 64-bit integer vector of [4 x i16]. /// /// Positive sums greater than 0x7FFF are saturated to 0x7FFF. Negative sums /// less than 0x8000 are saturated to 0x8000. The results are packed into a /// 64-bit integer vector of [4 x i16]. /// /// \headerfile /// /// This intrinsic corresponds to the PADDSW instruction. /// /// \param __m1 /// A 64-bit integer vector of [4 x i16]. /// \param __m2 /// A 64-bit integer vector of [4 x i16]. /// \returns A 64-bit integer vector of [4 x i16] containing the saturated sums /// of both parameters. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_adds_pi16(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_paddsw((__v4hi)__m1, (__v4hi)__m2); } /// Adds, with saturation, each 8-bit unsigned integer element of the first /// 64-bit integer vector of [8 x i8] to the corresponding 8-bit unsigned /// integer element of the second 64-bit integer vector of [8 x i8]. /// /// Sums greater than 0xFF are saturated to 0xFF. The results are packed /// into a 64-bit integer vector of [8 x i8]. /// /// \headerfile /// /// This intrinsic corresponds to the PADDUSB instruction. /// /// \param __m1 /// A 64-bit integer vector of [8 x i8]. /// \param __m2 /// A 64-bit integer vector of [8 x i8]. /// \returns A 64-bit integer vector of [8 x i8] containing the saturated /// unsigned sums of both parameters. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_adds_pu8(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_paddusb((__v8qi)__m1, (__v8qi)__m2); } /// Adds, with saturation, each 16-bit unsigned integer element of the first /// 64-bit integer vector of [4 x i16] to the corresponding 16-bit unsigned /// integer element of the second 64-bit integer vector of [4 x i16]. /// /// Sums greater than 0xFFFF are saturated to 0xFFFF. The results are packed /// into a 64-bit integer vector of [4 x i16]. /// /// \headerfile /// /// This intrinsic corresponds to the PADDUSW instruction. /// /// \param __m1 /// A 64-bit integer vector of [4 x i16]. /// \param __m2 /// A 64-bit integer vector of [4 x i16]. /// \returns A 64-bit integer vector of [4 x i16] containing the saturated /// unsigned sums of both parameters. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_adds_pu16(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_paddusw((__v4hi)__m1, (__v4hi)__m2); } /// Subtracts each 8-bit integer element of the second 64-bit integer /// vector of [8 x i8] from the corresponding 8-bit integer element of the /// first 64-bit integer vector of [8 x i8]. The lower 8 bits of the results /// are packed into a 64-bit integer vector of [8 x i8]. /// /// \headerfile /// /// This intrinsic corresponds to the PSUBB instruction. /// /// \param __m1 /// A 64-bit integer vector of [8 x i8] containing the minuends. /// \param __m2 /// A 64-bit integer vector of [8 x i8] containing the subtrahends. /// \returns A 64-bit integer vector of [8 x i8] containing the differences of /// both parameters. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sub_pi8(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_psubb((__v8qi)__m1, (__v8qi)__m2); } /// Subtracts each 16-bit integer element of the second 64-bit integer /// vector of [4 x i16] from the corresponding 16-bit integer element of the /// first 64-bit integer vector of [4 x i16]. The lower 16 bits of the /// results are packed into a 64-bit integer vector of [4 x i16]. /// /// \headerfile /// /// This intrinsic corresponds to the PSUBW instruction. /// /// \param __m1 /// A 64-bit integer vector of [4 x i16] containing the minuends. /// \param __m2 /// A 64-bit integer vector of [4 x i16] containing the subtrahends. /// \returns A 64-bit integer vector of [4 x i16] containing the differences of /// both parameters. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sub_pi16(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_psubw((__v4hi)__m1, (__v4hi)__m2); } /// Subtracts each 32-bit integer element of the second 64-bit integer /// vector of [2 x i32] from the corresponding 32-bit integer element of the /// first 64-bit integer vector of [2 x i32]. The lower 32 bits of the /// results are packed into a 64-bit integer vector of [2 x i32]. /// /// \headerfile /// /// This intrinsic corresponds to the PSUBD instruction. /// /// \param __m1 /// A 64-bit integer vector of [2 x i32] containing the minuends. /// \param __m2 /// A 64-bit integer vector of [2 x i32] containing the subtrahends. /// \returns A 64-bit integer vector of [2 x i32] containing the differences of /// both parameters. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sub_pi32(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_psubd((__v2si)__m1, (__v2si)__m2); } /// Subtracts, with saturation, each 8-bit signed integer element of the second /// 64-bit integer vector of [8 x i8] from the corresponding 8-bit signed /// integer element of the first 64-bit integer vector of [8 x i8]. /// /// Positive results greater than 0x7F are saturated to 0x7F. Negative /// results less than 0x80 are saturated to 0x80. The results are packed /// into a 64-bit integer vector of [8 x i8]. /// /// \headerfile /// /// This intrinsic corresponds to the PSUBSB instruction. /// /// \param __m1 /// A 64-bit integer vector of [8 x i8] containing the minuends. /// \param __m2 /// A 64-bit integer vector of [8 x i8] containing the subtrahends. /// \returns A 64-bit integer vector of [8 x i8] containing the saturated /// differences of both parameters. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_subs_pi8(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_psubsb((__v8qi)__m1, (__v8qi)__m2); } /// Subtracts, with saturation, each 16-bit signed integer element of the /// second 64-bit integer vector of [4 x i16] from the corresponding 16-bit /// signed integer element of the first 64-bit integer vector of [4 x i16]. /// /// Positive results greater than 0x7FFF are saturated to 0x7FFF. Negative /// results less than 0x8000 are saturated to 0x8000. The results are packed /// into a 64-bit integer vector of [4 x i16]. /// /// \headerfile /// /// This intrinsic corresponds to the PSUBSW instruction. /// /// \param __m1 /// A 64-bit integer vector of [4 x i16] containing the minuends. /// \param __m2 /// A 64-bit integer vector of [4 x i16] containing the subtrahends. /// \returns A 64-bit integer vector of [4 x i16] containing the saturated /// differences of both parameters. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_subs_pi16(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_psubsw((__v4hi)__m1, (__v4hi)__m2); } /// Subtracts each 8-bit unsigned integer element of the second 64-bit /// integer vector of [8 x i8] from the corresponding 8-bit unsigned integer /// element of the first 64-bit integer vector of [8 x i8]. /// /// If an element of the first vector is less than the corresponding element /// of the second vector, the result is saturated to 0. The results are /// packed into a 64-bit integer vector of [8 x i8]. /// /// \headerfile /// /// This intrinsic corresponds to the PSUBUSB instruction. /// /// \param __m1 /// A 64-bit integer vector of [8 x i8] containing the minuends. /// \param __m2 /// A 64-bit integer vector of [8 x i8] containing the subtrahends. /// \returns A 64-bit integer vector of [8 x i8] containing the saturated /// differences of both parameters. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_subs_pu8(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_psubusb((__v8qi)__m1, (__v8qi)__m2); } /// Subtracts each 16-bit unsigned integer element of the second 64-bit /// integer vector of [4 x i16] from the corresponding 16-bit unsigned /// integer element of the first 64-bit integer vector of [4 x i16]. /// /// If an element of the first vector is less than the corresponding element /// of the second vector, the result is saturated to 0. The results are /// packed into a 64-bit integer vector of [4 x i16]. /// /// \headerfile /// /// This intrinsic corresponds to the PSUBUSW instruction. /// /// \param __m1 /// A 64-bit integer vector of [4 x i16] containing the minuends. /// \param __m2 /// A 64-bit integer vector of [4 x i16] containing the subtrahends. /// \returns A 64-bit integer vector of [4 x i16] containing the saturated /// differences of both parameters. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_subs_pu16(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_psubusw((__v4hi)__m1, (__v4hi)__m2); } /// Multiplies each 16-bit signed integer element of the first 64-bit /// integer vector of [4 x i16] by the corresponding 16-bit signed integer /// element of the second 64-bit integer vector of [4 x i16] and get four /// 32-bit products. Adds adjacent pairs of products to get two 32-bit sums. /// The lower 32 bits of these two sums are packed into a 64-bit integer /// vector of [2 x i32]. /// /// For example, bits [15:0] of both parameters are multiplied, bits [31:16] /// of both parameters are multiplied, and the sum of both results is written /// to bits [31:0] of the result. /// /// \headerfile /// /// This intrinsic corresponds to the PMADDWD instruction. /// /// \param __m1 /// A 64-bit integer vector of [4 x i16]. /// \param __m2 /// A 64-bit integer vector of [4 x i16]. /// \returns A 64-bit integer vector of [2 x i32] containing the sums of /// products of both parameters. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_madd_pi16(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pmaddwd((__v4hi)__m1, (__v4hi)__m2); } /// Multiplies each 16-bit signed integer element of the first 64-bit /// integer vector of [4 x i16] by the corresponding 16-bit signed integer /// element of the second 64-bit integer vector of [4 x i16]. Packs the upper /// 16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16]. /// /// \headerfile /// /// This intrinsic corresponds to the PMULHW instruction. /// /// \param __m1 /// A 64-bit integer vector of [4 x i16]. /// \param __m2 /// A 64-bit integer vector of [4 x i16]. /// \returns A 64-bit integer vector of [4 x i16] containing the upper 16 bits /// of the products of both parameters. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_mulhi_pi16(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pmulhw((__v4hi)__m1, (__v4hi)__m2); } /// Multiplies each 16-bit signed integer element of the first 64-bit /// integer vector of [4 x i16] by the corresponding 16-bit signed integer /// element of the second 64-bit integer vector of [4 x i16]. Packs the lower /// 16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16]. /// /// \headerfile /// /// This intrinsic corresponds to the PMULLW instruction. /// /// \param __m1 /// A 64-bit integer vector of [4 x i16]. /// \param __m2 /// A 64-bit integer vector of [4 x i16]. /// \returns A 64-bit integer vector of [4 x i16] containing the lower 16 bits /// of the products of both parameters. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_mullo_pi16(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pmullw((__v4hi)__m1, (__v4hi)__m2); } /// Left-shifts each 16-bit signed integer element of the first /// parameter, which is a 64-bit integer vector of [4 x i16], by the number /// of bits specified by the second parameter, which is a 64-bit integer. The /// lower 16 bits of the results are packed into a 64-bit integer vector of /// [4 x i16]. /// /// \headerfile /// /// This intrinsic corresponds to the PSLLW instruction. /// /// \param __m /// A 64-bit integer vector of [4 x i16]. /// \param __count /// A 64-bit integer vector interpreted as a single 64-bit integer. /// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted /// values. If \a __count is greater or equal to 16, the result is set to all /// 0. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sll_pi16(__m64 __m, __m64 __count) { return (__m64)__builtin_ia32_psllw((__v4hi)__m, __count); } /// Left-shifts each 16-bit signed integer element of a 64-bit integer /// vector of [4 x i16] by the number of bits specified by a 32-bit integer. /// The lower 16 bits of the results are packed into a 64-bit integer vector /// of [4 x i16]. /// /// \headerfile /// /// This intrinsic corresponds to the PSLLW instruction. /// /// \param __m /// A 64-bit integer vector of [4 x i16]. /// \param __count /// A 32-bit integer value. /// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted /// values. If \a __count is greater or equal to 16, the result is set to all /// 0. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_slli_pi16(__m64 __m, int __count) { return (__m64)__builtin_ia32_psllwi((__v4hi)__m, __count); } /// Left-shifts each 32-bit signed integer element of the first /// parameter, which is a 64-bit integer vector of [2 x i32], by the number /// of bits specified by the second parameter, which is a 64-bit integer. The /// lower 32 bits of the results are packed into a 64-bit integer vector of /// [2 x i32]. /// /// \headerfile /// /// This intrinsic corresponds to the PSLLD instruction. /// /// \param __m /// A 64-bit integer vector of [2 x i32]. /// \param __count /// A 64-bit integer vector interpreted as a single 64-bit integer. /// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted /// values. If \a __count is greater or equal to 32, the result is set to all /// 0. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sll_pi32(__m64 __m, __m64 __count) { return (__m64)__builtin_ia32_pslld((__v2si)__m, __count); } /// Left-shifts each 32-bit signed integer element of a 64-bit integer /// vector of [2 x i32] by the number of bits specified by a 32-bit integer. /// The lower 32 bits of the results are packed into a 64-bit integer vector /// of [2 x i32]. /// /// \headerfile /// /// This intrinsic corresponds to the PSLLD instruction. /// /// \param __m /// A 64-bit integer vector of [2 x i32]. /// \param __count /// A 32-bit integer value. /// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted /// values. If \a __count is greater or equal to 32, the result is set to all /// 0. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_slli_pi32(__m64 __m, int __count) { return (__m64)__builtin_ia32_pslldi((__v2si)__m, __count); } /// Left-shifts the first 64-bit integer parameter by the number of bits /// specified by the second 64-bit integer parameter. The lower 64 bits of /// result are returned. /// /// \headerfile /// /// This intrinsic corresponds to the PSLLQ instruction. /// /// \param __m /// A 64-bit integer vector interpreted as a single 64-bit integer. /// \param __count /// A 64-bit integer vector interpreted as a single 64-bit integer. /// \returns A 64-bit integer vector containing the left-shifted value. If /// \a __count is greater or equal to 64, the result is set to 0. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sll_si64(__m64 __m, __m64 __count) { return (__m64)__builtin_ia32_psllq((__v1di)__m, __count); } /// Left-shifts the first parameter, which is a 64-bit integer, by the /// number of bits specified by the second parameter, which is a 32-bit /// integer. The lower 64 bits of result are returned. /// /// \headerfile /// /// This intrinsic corresponds to the PSLLQ instruction. /// /// \param __m /// A 64-bit integer vector interpreted as a single 64-bit integer. /// \param __count /// A 32-bit integer value. /// \returns A 64-bit integer vector containing the left-shifted value. If /// \a __count is greater or equal to 64, the result is set to 0. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_slli_si64(__m64 __m, int __count) { return (__m64)__builtin_ia32_psllqi((__v1di)__m, __count); } /// Right-shifts each 16-bit integer element of the first parameter, /// which is a 64-bit integer vector of [4 x i16], by the number of bits /// specified by the second parameter, which is a 64-bit integer. /// /// High-order bits are filled with the sign bit of the initial value of each /// 16-bit element. The 16-bit results are packed into a 64-bit integer /// vector of [4 x i16]. /// /// \headerfile /// /// This intrinsic corresponds to the PSRAW instruction. /// /// \param __m /// A 64-bit integer vector of [4 x i16]. /// \param __count /// A 64-bit integer vector interpreted as a single 64-bit integer. /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted /// values. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sra_pi16(__m64 __m, __m64 __count) { return (__m64)__builtin_ia32_psraw((__v4hi)__m, __count); } /// Right-shifts each 16-bit integer element of a 64-bit integer vector /// of [4 x i16] by the number of bits specified by a 32-bit integer. /// /// High-order bits are filled with the sign bit of the initial value of each /// 16-bit element. The 16-bit results are packed into a 64-bit integer /// vector of [4 x i16]. /// /// \headerfile /// /// This intrinsic corresponds to the PSRAW instruction. /// /// \param __m /// A 64-bit integer vector of [4 x i16]. /// \param __count /// A 32-bit integer value. /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted /// values. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_srai_pi16(__m64 __m, int __count) { return (__m64)__builtin_ia32_psrawi((__v4hi)__m, __count); } /// Right-shifts each 32-bit integer element of the first parameter, /// which is a 64-bit integer vector of [2 x i32], by the number of bits /// specified by the second parameter, which is a 64-bit integer. /// /// High-order bits are filled with the sign bit of the initial value of each /// 32-bit element. The 32-bit results are packed into a 64-bit integer /// vector of [2 x i32]. /// /// \headerfile /// /// This intrinsic corresponds to the PSRAD instruction. /// /// \param __m /// A 64-bit integer vector of [2 x i32]. /// \param __count /// A 64-bit integer vector interpreted as a single 64-bit integer. /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted /// values. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sra_pi32(__m64 __m, __m64 __count) { return (__m64)__builtin_ia32_psrad((__v2si)__m, __count); } /// Right-shifts each 32-bit integer element of a 64-bit integer vector /// of [2 x i32] by the number of bits specified by a 32-bit integer. /// /// High-order bits are filled with the sign bit of the initial value of each /// 32-bit element. The 32-bit results are packed into a 64-bit integer /// vector of [2 x i32]. /// /// \headerfile /// /// This intrinsic corresponds to the PSRAD instruction. /// /// \param __m /// A 64-bit integer vector of [2 x i32]. /// \param __count /// A 32-bit integer value. /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted /// values. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_srai_pi32(__m64 __m, int __count) { return (__m64)__builtin_ia32_psradi((__v2si)__m, __count); } /// Right-shifts each 16-bit integer element of the first parameter, /// which is a 64-bit integer vector of [4 x i16], by the number of bits /// specified by the second parameter, which is a 64-bit integer. /// /// High-order bits are cleared. The 16-bit results are packed into a 64-bit /// integer vector of [4 x i16]. /// /// \headerfile /// /// This intrinsic corresponds to the PSRLW instruction. /// /// \param __m /// A 64-bit integer vector of [4 x i16]. /// \param __count /// A 64-bit integer vector interpreted as a single 64-bit integer. /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted /// values. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_srl_pi16(__m64 __m, __m64 __count) { return (__m64)__builtin_ia32_psrlw((__v4hi)__m, __count); } /// Right-shifts each 16-bit integer element of a 64-bit integer vector /// of [4 x i16] by the number of bits specified by a 32-bit integer. /// /// High-order bits are cleared. The 16-bit results are packed into a 64-bit /// integer vector of [4 x i16]. /// /// \headerfile /// /// This intrinsic corresponds to the PSRLW instruction. /// /// \param __m /// A 64-bit integer vector of [4 x i16]. /// \param __count /// A 32-bit integer value. /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted /// values. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_srli_pi16(__m64 __m, int __count) { return (__m64)__builtin_ia32_psrlwi((__v4hi)__m, __count); } /// Right-shifts each 32-bit integer element of the first parameter, /// which is a 64-bit integer vector of [2 x i32], by the number of bits /// specified by the second parameter, which is a 64-bit integer. /// /// High-order bits are cleared. The 32-bit results are packed into a 64-bit /// integer vector of [2 x i32]. /// /// \headerfile /// /// This intrinsic corresponds to the PSRLD instruction. /// /// \param __m /// A 64-bit integer vector of [2 x i32]. /// \param __count /// A 64-bit integer vector interpreted as a single 64-bit integer. /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted /// values. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_srl_pi32(__m64 __m, __m64 __count) { return (__m64)__builtin_ia32_psrld((__v2si)__m, __count); } /// Right-shifts each 32-bit integer element of a 64-bit integer vector /// of [2 x i32] by the number of bits specified by a 32-bit integer. /// /// High-order bits are cleared. The 32-bit results are packed into a 64-bit /// integer vector of [2 x i32]. /// /// \headerfile /// /// This intrinsic corresponds to the PSRLD instruction. /// /// \param __m /// A 64-bit integer vector of [2 x i32]. /// \param __count /// A 32-bit integer value. /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted /// values. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_srli_pi32(__m64 __m, int __count) { return (__m64)__builtin_ia32_psrldi((__v2si)__m, __count); } /// Right-shifts the first 64-bit integer parameter by the number of bits /// specified by the second 64-bit integer parameter. /// /// High-order bits are cleared. /// /// \headerfile /// /// This intrinsic corresponds to the PSRLQ instruction. /// /// \param __m /// A 64-bit integer vector interpreted as a single 64-bit integer. /// \param __count /// A 64-bit integer vector interpreted as a single 64-bit integer. /// \returns A 64-bit integer vector containing the right-shifted value. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_srl_si64(__m64 __m, __m64 __count) { return (__m64)__builtin_ia32_psrlq((__v1di)__m, __count); } /// Right-shifts the first parameter, which is a 64-bit integer, by the /// number of bits specified by the second parameter, which is a 32-bit /// integer. /// /// High-order bits are cleared. /// /// \headerfile /// /// This intrinsic corresponds to the PSRLQ instruction. /// /// \param __m /// A 64-bit integer vector interpreted as a single 64-bit integer. /// \param __count /// A 32-bit integer value. /// \returns A 64-bit integer vector containing the right-shifted value. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_srli_si64(__m64 __m, int __count) { return (__m64)__builtin_ia32_psrlqi((__v1di)__m, __count); } /// Performs a bitwise AND of two 64-bit integer vectors. /// /// \headerfile /// /// This intrinsic corresponds to the PAND instruction. /// /// \param __m1 /// A 64-bit integer vector. /// \param __m2 /// A 64-bit integer vector. /// \returns A 64-bit integer vector containing the bitwise AND of both /// parameters. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_and_si64(__m64 __m1, __m64 __m2) { return __builtin_ia32_pand((__v1di)__m1, (__v1di)__m2); } /// Performs a bitwise NOT of the first 64-bit integer vector, and then /// performs a bitwise AND of the intermediate result and the second 64-bit /// integer vector. /// /// \headerfile /// /// This intrinsic corresponds to the PANDN instruction. /// /// \param __m1 /// A 64-bit integer vector. The one's complement of this parameter is used /// in the bitwise AND. /// \param __m2 /// A 64-bit integer vector. /// \returns A 64-bit integer vector containing the bitwise AND of the second /// parameter and the one's complement of the first parameter. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_andnot_si64(__m64 __m1, __m64 __m2) { return __builtin_ia32_pandn((__v1di)__m1, (__v1di)__m2); } /// Performs a bitwise OR of two 64-bit integer vectors. /// /// \headerfile /// /// This intrinsic corresponds to the POR instruction. /// /// \param __m1 /// A 64-bit integer vector. /// \param __m2 /// A 64-bit integer vector. /// \returns A 64-bit integer vector containing the bitwise OR of both /// parameters. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_or_si64(__m64 __m1, __m64 __m2) { return __builtin_ia32_por((__v1di)__m1, (__v1di)__m2); } /// Performs a bitwise exclusive OR of two 64-bit integer vectors. /// /// \headerfile /// /// This intrinsic corresponds to the PXOR instruction. /// /// \param __m1 /// A 64-bit integer vector. /// \param __m2 /// A 64-bit integer vector. /// \returns A 64-bit integer vector containing the bitwise exclusive OR of both /// parameters. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_xor_si64(__m64 __m1, __m64 __m2) { return __builtin_ia32_pxor((__v1di)__m1, (__v1di)__m2); } /// Compares the 8-bit integer elements of two 64-bit integer vectors of /// [8 x i8] to determine if the element of the first vector is equal to the /// corresponding element of the second vector. /// /// The comparison yields 0 for false, 0xFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the PCMPEQB instruction. /// /// \param __m1 /// A 64-bit integer vector of [8 x i8]. /// \param __m2 /// A 64-bit integer vector of [8 x i8]. /// \returns A 64-bit integer vector of [8 x i8] containing the comparison /// results. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cmpeq_pi8(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pcmpeqb((__v8qi)__m1, (__v8qi)__m2); } /// Compares the 16-bit integer elements of two 64-bit integer vectors of /// [4 x i16] to determine if the element of the first vector is equal to the /// corresponding element of the second vector. /// /// The comparison yields 0 for false, 0xFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the PCMPEQW instruction. /// /// \param __m1 /// A 64-bit integer vector of [4 x i16]. /// \param __m2 /// A 64-bit integer vector of [4 x i16]. /// \returns A 64-bit integer vector of [4 x i16] containing the comparison /// results. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cmpeq_pi16(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pcmpeqw((__v4hi)__m1, (__v4hi)__m2); } /// Compares the 32-bit integer elements of two 64-bit integer vectors of /// [2 x i32] to determine if the element of the first vector is equal to the /// corresponding element of the second vector. /// /// The comparison yields 0 for false, 0xFFFFFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the PCMPEQD instruction. /// /// \param __m1 /// A 64-bit integer vector of [2 x i32]. /// \param __m2 /// A 64-bit integer vector of [2 x i32]. /// \returns A 64-bit integer vector of [2 x i32] containing the comparison /// results. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cmpeq_pi32(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pcmpeqd((__v2si)__m1, (__v2si)__m2); } /// Compares the 8-bit integer elements of two 64-bit integer vectors of /// [8 x i8] to determine if the element of the first vector is greater than /// the corresponding element of the second vector. /// /// The comparison yields 0 for false, 0xFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the PCMPGTB instruction. /// /// \param __m1 /// A 64-bit integer vector of [8 x i8]. /// \param __m2 /// A 64-bit integer vector of [8 x i8]. /// \returns A 64-bit integer vector of [8 x i8] containing the comparison /// results. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cmpgt_pi8(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pcmpgtb((__v8qi)__m1, (__v8qi)__m2); } /// Compares the 16-bit integer elements of two 64-bit integer vectors of /// [4 x i16] to determine if the element of the first vector is greater than /// the corresponding element of the second vector. /// /// The comparison yields 0 for false, 0xFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the PCMPGTW instruction. /// /// \param __m1 /// A 64-bit integer vector of [4 x i16]. /// \param __m2 /// A 64-bit integer vector of [4 x i16]. /// \returns A 64-bit integer vector of [4 x i16] containing the comparison /// results. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cmpgt_pi16(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pcmpgtw((__v4hi)__m1, (__v4hi)__m2); } /// Compares the 32-bit integer elements of two 64-bit integer vectors of /// [2 x i32] to determine if the element of the first vector is greater than /// the corresponding element of the second vector. /// /// The comparison yields 0 for false, 0xFFFFFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the PCMPGTD instruction. /// /// \param __m1 /// A 64-bit integer vector of [2 x i32]. /// \param __m2 /// A 64-bit integer vector of [2 x i32]. /// \returns A 64-bit integer vector of [2 x i32] containing the comparison /// results. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cmpgt_pi32(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pcmpgtd((__v2si)__m1, (__v2si)__m2); } /// Constructs a 64-bit integer vector initialized to zero. /// /// \headerfile /// /// This intrinsic corresponds to the PXOR instruction. /// /// \returns An initialized 64-bit integer vector with all elements set to zero. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_setzero_si64(void) { return __extension__ (__m64){ 0LL }; } /// Constructs a 64-bit integer vector initialized with the specified /// 32-bit integer values. /// /// \headerfile /// /// This intrinsic is a utility function and does not correspond to a specific /// instruction. /// /// \param __i1 /// A 32-bit integer value used to initialize the upper 32 bits of the /// result. /// \param __i0 /// A 32-bit integer value used to initialize the lower 32 bits of the /// result. /// \returns An initialized 64-bit integer vector. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_set_pi32(int __i1, int __i0) { return (__m64)__builtin_ia32_vec_init_v2si(__i0, __i1); } /// Constructs a 64-bit integer vector initialized with the specified /// 16-bit integer values. /// /// \headerfile /// /// This intrinsic is a utility function and does not correspond to a specific /// instruction. /// /// \param __s3 /// A 16-bit integer value used to initialize bits [63:48] of the result. /// \param __s2 /// A 16-bit integer value used to initialize bits [47:32] of the result. /// \param __s1 /// A 16-bit integer value used to initialize bits [31:16] of the result. /// \param __s0 /// A 16-bit integer value used to initialize bits [15:0] of the result. /// \returns An initialized 64-bit integer vector. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_set_pi16(short __s3, short __s2, short __s1, short __s0) { return (__m64)__builtin_ia32_vec_init_v4hi(__s0, __s1, __s2, __s3); } /// Constructs a 64-bit integer vector initialized with the specified /// 8-bit integer values. /// /// \headerfile /// /// This intrinsic is a utility function and does not correspond to a specific /// instruction. /// /// \param __b7 /// An 8-bit integer value used to initialize bits [63:56] of the result. /// \param __b6 /// An 8-bit integer value used to initialize bits [55:48] of the result. /// \param __b5 /// An 8-bit integer value used to initialize bits [47:40] of the result. /// \param __b4 /// An 8-bit integer value used to initialize bits [39:32] of the result. /// \param __b3 /// An 8-bit integer value used to initialize bits [31:24] of the result. /// \param __b2 /// An 8-bit integer value used to initialize bits [23:16] of the result. /// \param __b1 /// An 8-bit integer value used to initialize bits [15:8] of the result. /// \param __b0 /// An 8-bit integer value used to initialize bits [7:0] of the result. /// \returns An initialized 64-bit integer vector. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2, char __b1, char __b0) { return (__m64)__builtin_ia32_vec_init_v8qi(__b0, __b1, __b2, __b3, __b4, __b5, __b6, __b7); } /// Constructs a 64-bit integer vector of [2 x i32], with each of the /// 32-bit integer vector elements set to the specified 32-bit integer /// value. /// /// \headerfile /// /// This intrinsic is a utility function and does not correspond to a specific /// instruction. /// /// \param __i /// A 32-bit integer value used to initialize each vector element of the /// result. /// \returns An initialized 64-bit integer vector of [2 x i32]. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_set1_pi32(int __i) { return _mm_set_pi32(__i, __i); } /// Constructs a 64-bit integer vector of [4 x i16], with each of the /// 16-bit integer vector elements set to the specified 16-bit integer /// value. /// /// \headerfile /// /// This intrinsic is a utility function and does not correspond to a specific /// instruction. /// /// \param __w /// A 16-bit integer value used to initialize each vector element of the /// result. /// \returns An initialized 64-bit integer vector of [4 x i16]. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_set1_pi16(short __w) { return _mm_set_pi16(__w, __w, __w, __w); } /// Constructs a 64-bit integer vector of [8 x i8], with each of the /// 8-bit integer vector elements set to the specified 8-bit integer value. /// /// \headerfile /// /// This intrinsic is a utility function and does not correspond to a specific /// instruction. /// /// \param __b /// An 8-bit integer value used to initialize each vector element of the /// result. /// \returns An initialized 64-bit integer vector of [8 x i8]. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_set1_pi8(char __b) { return _mm_set_pi8(__b, __b, __b, __b, __b, __b, __b, __b); } /// Constructs a 64-bit integer vector, initialized in reverse order with /// the specified 32-bit integer values. /// /// \headerfile /// /// This intrinsic is a utility function and does not correspond to a specific /// instruction. /// /// \param __i0 /// A 32-bit integer value used to initialize the lower 32 bits of the /// result. /// \param __i1 /// A 32-bit integer value used to initialize the upper 32 bits of the /// result. /// \returns An initialized 64-bit integer vector. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_setr_pi32(int __i0, int __i1) { return _mm_set_pi32(__i1, __i0); } /// Constructs a 64-bit integer vector, initialized in reverse order with /// the specified 16-bit integer values. /// /// \headerfile /// /// This intrinsic is a utility function and does not correspond to a specific /// instruction. /// /// \param __w0 /// A 16-bit integer value used to initialize bits [15:0] of the result. /// \param __w1 /// A 16-bit integer value used to initialize bits [31:16] of the result. /// \param __w2 /// A 16-bit integer value used to initialize bits [47:32] of the result. /// \param __w3 /// A 16-bit integer value used to initialize bits [63:48] of the result. /// \returns An initialized 64-bit integer vector. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_setr_pi16(short __w0, short __w1, short __w2, short __w3) { return _mm_set_pi16(__w3, __w2, __w1, __w0); } /// Constructs a 64-bit integer vector, initialized in reverse order with /// the specified 8-bit integer values. /// /// \headerfile /// /// This intrinsic is a utility function and does not correspond to a specific /// instruction. /// /// \param __b0 /// An 8-bit integer value used to initialize bits [7:0] of the result. /// \param __b1 /// An 8-bit integer value used to initialize bits [15:8] of the result. /// \param __b2 /// An 8-bit integer value used to initialize bits [23:16] of the result. /// \param __b3 /// An 8-bit integer value used to initialize bits [31:24] of the result. /// \param __b4 /// An 8-bit integer value used to initialize bits [39:32] of the result. /// \param __b5 /// An 8-bit integer value used to initialize bits [47:40] of the result. /// \param __b6 /// An 8-bit integer value used to initialize bits [55:48] of the result. /// \param __b7 /// An 8-bit integer value used to initialize bits [63:56] of the result. /// \returns An initialized 64-bit integer vector. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5, char __b6, char __b7) { return _mm_set_pi8(__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0); } #undef __DEFAULT_FN_ATTRS /* Aliases for compatibility. */ #define _m_empty _mm_empty #define _m_from_int _mm_cvtsi32_si64 #define _m_from_int64 _mm_cvtsi64_m64 #define _m_to_int _mm_cvtsi64_si32 #define _m_to_int64 _mm_cvtm64_si64 #define _m_packsswb _mm_packs_pi16 #define _m_packssdw _mm_packs_pi32 #define _m_packuswb _mm_packs_pu16 #define _m_punpckhbw _mm_unpackhi_pi8 #define _m_punpckhwd _mm_unpackhi_pi16 #define _m_punpckhdq _mm_unpackhi_pi32 #define _m_punpcklbw _mm_unpacklo_pi8 #define _m_punpcklwd _mm_unpacklo_pi16 #define _m_punpckldq _mm_unpacklo_pi32 #define _m_paddb _mm_add_pi8 #define _m_paddw _mm_add_pi16 #define _m_paddd _mm_add_pi32 #define _m_paddsb _mm_adds_pi8 #define _m_paddsw _mm_adds_pi16 #define _m_paddusb _mm_adds_pu8 #define _m_paddusw _mm_adds_pu16 #define _m_psubb _mm_sub_pi8 #define _m_psubw _mm_sub_pi16 #define _m_psubd _mm_sub_pi32 #define _m_psubsb _mm_subs_pi8 #define _m_psubsw _mm_subs_pi16 #define _m_psubusb _mm_subs_pu8 #define _m_psubusw _mm_subs_pu16 #define _m_pmaddwd _mm_madd_pi16 #define _m_pmulhw _mm_mulhi_pi16 #define _m_pmullw _mm_mullo_pi16 #define _m_psllw _mm_sll_pi16 #define _m_psllwi _mm_slli_pi16 #define _m_pslld _mm_sll_pi32 #define _m_pslldi _mm_slli_pi32 #define _m_psllq _mm_sll_si64 #define _m_psllqi _mm_slli_si64 #define _m_psraw _mm_sra_pi16 #define _m_psrawi _mm_srai_pi16 #define _m_psrad _mm_sra_pi32 #define _m_psradi _mm_srai_pi32 #define _m_psrlw _mm_srl_pi16 #define _m_psrlwi _mm_srli_pi16 #define _m_psrld _mm_srl_pi32 #define _m_psrldi _mm_srli_pi32 #define _m_psrlq _mm_srl_si64 #define _m_psrlqi _mm_srli_si64 #define _m_pand _mm_and_si64 #define _m_pandn _mm_andnot_si64 #define _m_por _mm_or_si64 #define _m_pxor _mm_xor_si64 #define _m_pcmpeqb _mm_cmpeq_pi8 #define _m_pcmpeqw _mm_cmpeq_pi16 #define _m_pcmpeqd _mm_cmpeq_pi32 #define _m_pcmpgtb _mm_cmpgt_pi8 #define _m_pcmpgtw _mm_cmpgt_pi16 #define _m_pcmpgtd _mm_cmpgt_pi32 #endif /* __MMINTRIN_H */ serializeintrin.hvelintrin.hxsaveintrin.hsanitizer/lsan_interface.hBad size for Root element in JSON was not an array.\'RE2: invalid startpos, endpos pair. [Rewrite schema error: '\' not allowed at end. flag=%#xUnexpected opcode in short circuit: CyrillicElbasanHanHebrewLinear_BSyriac*** Signal %d received at time=%ld%s *** %s: open failed: errno=%dmay not delete default arenasum >= avelong double>>=SbINVALID_ARGUMENTexternal/abseil-cpp/absl/strings/internal/escaping.ccs->waitp != nullptr || s->suppress_fatal_errorsUnlock waitp->thread->waitp == nullptrexternal/boringssl/src/crypto/fipsmodule/bn/gcd_extra.cexternal/boringssl/src/crypto/fipsmodule/digestsign/digestsign.cexternal/boringssl/src/crypto/fipsmodule/ec/oct.cexternal/boringssl/src/crypto/fipsmodule/cipher/e_aes.cRSA signing test failed. unknown errorASN.1 encoding routinesSSLOCSP_LIB')kythe.proto.ScanRequest.edge_kindcf77Error making relative path: --cuda-host-onlyKYTHE_OUTPUT_FILE/*===---- __stdarg_va_copy.h - Definition of va_copy------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef va_copy #define va_copy(dest, src) __builtin_va_copy(dest, src) #endif /*===---- arm_neon.h - ARM Neon intrinsics ---------------------------------=== * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. * *===-----------------------------------------------------------------------=== */ #ifndef __ARM_NEON_H #define __ARM_NEON_H #ifndef __ARM_FP #error "NEON intrinsics not available with the soft-float ABI. Please use -mfloat-abi=softfp or -mfloat-abi=hard" #else #if !defined(__ARM_NEON) #error "NEON support not enabled" #else #include #include #include #ifdef __aarch64__ typedef uint8_t poly8_t; typedef uint16_t poly16_t; typedef uint64_t poly64_t; typedef __uint128_t poly128_t; #else typedef int8_t poly8_t; typedef int16_t poly16_t; typedef int64_t poly64_t; #endif typedef __attribute__((neon_polyvector_type(8))) poly8_t poly8x8_t; typedef __attribute__((neon_polyvector_type(16))) poly8_t poly8x16_t; typedef __attribute__((neon_polyvector_type(4))) poly16_t poly16x4_t; typedef __attribute__((neon_polyvector_type(8))) poly16_t poly16x8_t; typedef __attribute__((neon_polyvector_type(1))) poly64_t poly64x1_t; typedef __attribute__((neon_polyvector_type(2))) poly64_t poly64x2_t; typedef struct poly8x8x2_t { poly8x8_t val[2]; } poly8x8x2_t; typedef struct poly8x16x2_t { poly8x16_t val[2]; } poly8x16x2_t; typedef struct poly16x4x2_t { poly16x4_t val[2]; } poly16x4x2_t; typedef struct poly16x8x2_t { poly16x8_t val[2]; } poly16x8x2_t; typedef struct poly64x1x2_t { poly64x1_t val[2]; } poly64x1x2_t; typedef struct poly64x2x2_t { poly64x2_t val[2]; } poly64x2x2_t; typedef struct poly8x8x3_t { poly8x8_t val[3]; } poly8x8x3_t; typedef struct poly8x16x3_t { poly8x16_t val[3]; } poly8x16x3_t; typedef struct poly16x4x3_t { poly16x4_t val[3]; } poly16x4x3_t; typedef struct poly16x8x3_t { poly16x8_t val[3]; } poly16x8x3_t; typedef struct poly64x1x3_t { poly64x1_t val[3]; } poly64x1x3_t; typedef struct poly64x2x3_t { poly64x2_t val[3]; } poly64x2x3_t; typedef struct poly8x8x4_t { poly8x8_t val[4]; } poly8x8x4_t; typedef struct poly8x16x4_t { poly8x16_t val[4]; } poly8x16x4_t; typedef struct poly16x4x4_t { poly16x4_t val[4]; } poly16x4x4_t; typedef struct poly16x8x4_t { poly16x8_t val[4]; } poly16x8x4_t; typedef struct poly64x1x4_t { poly64x1_t val[4]; } poly64x1x4_t; typedef struct poly64x2x4_t { poly64x2_t val[4]; } poly64x2x4_t; #define __ai static __inline__ __attribute__((__always_inline__, __nodebug__)) #ifdef __LITTLE_ENDIAN__ #define splat_lane_p8(__p0, __p1) __extension__ ({ \ poly8x8_t __ret; \ poly8x8_t __s0 = __p0; \ __ret = (poly8x8_t) __builtin_neon_splat_lane_v((int8x8_t)__s0, __p1, 4); \ __ret; \ }) #else #define splat_lane_p8(__p0, __p1) __extension__ ({ \ poly8x8_t __ret; \ poly8x8_t __s0 = __p0; \ poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (poly8x8_t) __builtin_neon_splat_lane_v((int8x8_t)__rev0, __p1, 4); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_splat_lane_p8(__p0, __p1) __extension__ ({ \ poly8x8_t __ret; \ poly8x8_t __s0 = __p0; \ __ret = (poly8x8_t) __builtin_neon_splat_lane_v((int8x8_t)__s0, __p1, 4); \ __ret; \ }) #endif #define splat_lane_p64(__p0, __p1) __extension__ ({ \ poly64x1_t __ret; \ poly64x1_t __s0 = __p0; \ __ret = (poly64x1_t) __builtin_neon_splat_lane_v((int8x8_t)__s0, __p1, 6); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define splat_lane_p16(__p0, __p1) __extension__ ({ \ poly16x4_t __ret; \ poly16x4_t __s0 = __p0; \ __ret = (poly16x4_t) __builtin_neon_splat_lane_v((int8x8_t)__s0, __p1, 5); \ __ret; \ }) #else #define splat_lane_p16(__p0, __p1) __extension__ ({ \ poly16x4_t __ret; \ poly16x4_t __s0 = __p0; \ poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = (poly16x4_t) __builtin_neon_splat_lane_v((int8x8_t)__rev0, __p1, 5); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_splat_lane_p16(__p0, __p1) __extension__ ({ \ poly16x4_t __ret; \ poly16x4_t __s0 = __p0; \ __ret = (poly16x4_t) __builtin_neon_splat_lane_v((int8x8_t)__s0, __p1, 5); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define splatq_lane_p8(__p0, __p1) __extension__ ({ \ poly8x16_t __ret; \ poly8x8_t __s0 = __p0; \ __ret = (poly8x16_t) __builtin_neon_splatq_lane_v((int8x8_t)__s0, __p1, 4); \ __ret; \ }) #else #define splatq_lane_p8(__p0, __p1) __extension__ ({ \ poly8x16_t __ret; \ poly8x8_t __s0 = __p0; \ poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (poly8x16_t) __builtin_neon_splatq_lane_v((int8x8_t)__rev0, __p1, 4); \ __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_splatq_lane_p8(__p0, __p1) __extension__ ({ \ poly8x16_t __ret; \ poly8x8_t __s0 = __p0; \ __ret = (poly8x16_t) __builtin_neon_splatq_lane_v((int8x8_t)__s0, __p1, 4); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define splatq_lane_p64(__p0, __p1) __extension__ ({ \ poly64x2_t __ret; \ poly64x1_t __s0 = __p0; \ __ret = (poly64x2_t) __builtin_neon_splatq_lane_v((int8x8_t)__s0, __p1, 6); \ __ret; \ }) #else #define splatq_lane_p64(__p0, __p1) __extension__ ({ \ poly64x2_t __ret; \ poly64x1_t __s0 = __p0; \ __ret = (poly64x2_t) __builtin_neon_splatq_lane_v((int8x8_t)__s0, __p1, 6); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #define __noswap_splatq_lane_p64(__p0, __p1) __extension__ ({ \ poly64x2_t __ret; \ poly64x1_t __s0 = __p0; \ __ret = (poly64x2_t) __builtin_neon_splatq_lane_v((int8x8_t)__s0, __p1, 6); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define splatq_lane_p16(__p0, __p1) __extension__ ({ \ poly16x8_t __ret; \ poly16x4_t __s0 = __p0; \ __ret = (poly16x8_t) __builtin_neon_splatq_lane_v((int8x8_t)__s0, __p1, 5); \ __ret; \ }) #else #define splatq_lane_p16(__p0, __p1) __extension__ ({ \ poly16x8_t __ret; \ poly16x4_t __s0 = __p0; \ poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = (poly16x8_t) __builtin_neon_splatq_lane_v((int8x8_t)__rev0, __p1, 5); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_splatq_lane_p16(__p0, __p1) __extension__ ({ \ poly16x8_t __ret; \ poly16x4_t __s0 = __p0; \ __ret = (poly16x8_t) __builtin_neon_splatq_lane_v((int8x8_t)__s0, __p1, 5); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define splatq_lane_u8(__p0, __p1) __extension__ ({ \ uint8x16_t __ret; \ uint8x8_t __s0 = __p0; \ __ret = (uint8x16_t) __builtin_neon_splatq_lane_v((int8x8_t)__s0, __p1, 16); \ __ret; \ }) #else #define splatq_lane_u8(__p0, __p1) __extension__ ({ \ uint8x16_t __ret; \ uint8x8_t __s0 = __p0; \ uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (uint8x16_t) __builtin_neon_splatq_lane_v((int8x8_t)__rev0, __p1, 16); \ __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_splatq_lane_u8(__p0, __p1) __extension__ ({ \ uint8x16_t __ret; \ uint8x8_t __s0 = __p0; \ __ret = (uint8x16_t) __builtin_neon_splatq_lane_v((int8x8_t)__s0, __p1, 16); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define splatq_lane_u32(__p0, __p1) __extension__ ({ \ uint32x4_t __ret; \ uint32x2_t __s0 = __p0; \ __ret = (uint32x4_t) __builtin_neon_splatq_lane_v((int8x8_t)__s0, __p1, 18); \ __ret; \ }) #else #define splatq_lane_u32(__p0, __p1) __extension__ ({ \ uint32x4_t __ret; \ uint32x2_t __s0 = __p0; \ uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ __ret = (uint32x4_t) __builtin_neon_splatq_lane_v((int8x8_t)__rev0, __p1, 18); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_splatq_lane_u32(__p0, __p1) __extension__ ({ \ uint32x4_t __ret; \ uint32x2_t __s0 = __p0; \ __ret = (uint32x4_t) __builtin_neon_splatq_lane_v((int8x8_t)__s0, __p1, 18); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define splatq_lane_u64(__p0, __p1) __extension__ ({ \ uint64x2_t __ret; \ uint64x1_t __s0 = __p0; \ __ret = (uint64x2_t) __builtin_neon_splatq_lane_v((int8x8_t)__s0, __p1, 19); \ __ret; \ }) #else #define splatq_lane_u64(__p0, __p1) __extension__ ({ \ uint64x2_t __ret; \ uint64x1_t __s0 = __p0; \ __ret = (uint64x2_t) __builtin_neon_splatq_lane_v((int8x8_t)__s0, __p1, 19); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #define __noswap_splatq_lane_u64(__p0, __p1) __extension__ ({ \ uint64x2_t __ret; \ uint64x1_t __s0 = __p0; \ __ret = (uint64x2_t) __builtin_neon_splatq_lane_v((int8x8_t)__s0, __p1, 19); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define splatq_lane_u16(__p0, __p1) __extension__ ({ \ uint16x8_t __ret; \ uint16x4_t __s0 = __p0; \ __ret = (uint16x8_t) __builtin_neon_splatq_lane_v((int8x8_t)__s0, __p1, 17); \ __ret; \ }) #else #define splatq_lane_u16(__p0, __p1) __extension__ ({ \ uint16x8_t __ret; \ uint16x4_t __s0 = __p0; \ uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = (uint16x8_t) __builtin_neon_splatq_lane_v((int8x8_t)__rev0, __p1, 17); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_splatq_lane_u16(__p0, __p1) __extension__ ({ \ uint16x8_t __ret; \ uint16x4_t __s0 = __p0; \ __ret = (uint16x8_t) __builtin_neon_splatq_lane_v((int8x8_t)__s0, __p1, 17); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define splatq_lane_s8(__p0, __p1) __extension__ ({ \ int8x16_t __ret; \ int8x8_t __s0 = __p0; \ __ret = (int8x16_t) __builtin_neon_splatq_lane_v((int8x8_t)__s0, __p1, 0); \ __ret; \ }) #else #define splatq_lane_s8(__p0, __p1) __extension__ ({ \ int8x16_t __ret; \ int8x8_t __s0 = __p0; \ int8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (int8x16_t) __builtin_neon_splatq_lane_v((int8x8_t)__rev0, __p1, 0); \ __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_splatq_lane_s8(__p0, __p1) __extension__ ({ \ int8x16_t __ret; \ int8x8_t __s0 = __p0; \ __ret = (int8x16_t) __builtin_neon_splatq_lane_v((int8x8_t)__s0, __p1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define splatq_lane_f64(__p0, __p1) __extension__ ({ \ float64x2_t __ret; \ float64x1_t __s0 = __p0; \ __ret = (float64x2_t) __builtin_neon_splatq_lane_v((int8x8_t)__s0, __p1, 10); \ __ret; \ }) #else #define splatq_lane_f64(__p0, __p1) __extension__ ({ \ float64x2_t __ret; \ float64x1_t __s0 = __p0; \ __ret = (float64x2_t) __builtin_neon_splatq_lane_v((int8x8_t)__s0, __p1, 10); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #define __noswap_splatq_lane_f64(__p0, __p1) __extension__ ({ \ float64x2_t __ret; \ float64x1_t __s0 = __p0; \ __ret = (float64x2_t) __builtin_neon_splatq_lane_v((int8x8_t)__s0, __p1, 10); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define splatq_lane_f32(__p0, __p1) __extension__ ({ \ float32x4_t __ret; \ float32x2_t __s0 = __p0; \ __ret = (float32x4_t) __builtin_neon_splatq_lane_v((int8x8_t)__s0, __p1, 9); \ __ret; \ }) #else #define splatq_lane_f32(__p0, __p1) __extension__ ({ \ float32x4_t __ret; \ float32x2_t __s0 = __p0; \ float32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ __ret = (float32x4_t) __builtin_neon_splatq_lane_v((int8x8_t)__rev0, __p1, 9); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_splatq_lane_f32(__p0, __p1) __extension__ ({ \ float32x4_t __ret; \ float32x2_t __s0 = __p0; \ __ret = (float32x4_t) __builtin_neon_splatq_lane_v((int8x8_t)__s0, __p1, 9); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define splatq_lane_f16(__p0, __p1) __extension__ ({ \ float16x8_t __ret; \ float16x4_t __s0 = __p0; \ __ret = (float16x8_t) __builtin_neon_splatq_lane_v((int8x8_t)__s0, __p1, 8); \ __ret; \ }) #else #define splatq_lane_f16(__p0, __p1) __extension__ ({ \ float16x8_t __ret; \ float16x4_t __s0 = __p0; \ float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = (float16x8_t) __builtin_neon_splatq_lane_v((int8x8_t)__rev0, __p1, 8); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_splatq_lane_f16(__p0, __p1) __extension__ ({ \ float16x8_t __ret; \ float16x4_t __s0 = __p0; \ __ret = (float16x8_t) __builtin_neon_splatq_lane_v((int8x8_t)__s0, __p1, 8); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define splatq_lane_s32(__p0, __p1) __extension__ ({ \ int32x4_t __ret; \ int32x2_t __s0 = __p0; \ __ret = (int32x4_t) __builtin_neon_splatq_lane_v((int8x8_t)__s0, __p1, 2); \ __ret; \ }) #else #define splatq_lane_s32(__p0, __p1) __extension__ ({ \ int32x4_t __ret; \ int32x2_t __s0 = __p0; \ int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ __ret = (int32x4_t) __builtin_neon_splatq_lane_v((int8x8_t)__rev0, __p1, 2); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_splatq_lane_s32(__p0, __p1) __extension__ ({ \ int32x4_t __ret; \ int32x2_t __s0 = __p0; \ __ret = (int32x4_t) __builtin_neon_splatq_lane_v((int8x8_t)__s0, __p1, 2); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define splatq_lane_s64(__p0, __p1) __extension__ ({ \ int64x2_t __ret; \ int64x1_t __s0 = __p0; \ __ret = (int64x2_t) __builtin_neon_splatq_lane_v((int8x8_t)__s0, __p1, 3); \ __ret; \ }) #else #define splatq_lane_s64(__p0, __p1) __extension__ ({ \ int64x2_t __ret; \ int64x1_t __s0 = __p0; \ __ret = (int64x2_t) __builtin_neon_splatq_lane_v((int8x8_t)__s0, __p1, 3); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #define __noswap_splatq_lane_s64(__p0, __p1) __extension__ ({ \ int64x2_t __ret; \ int64x1_t __s0 = __p0; \ __ret = (int64x2_t) __builtin_neon_splatq_lane_v((int8x8_t)__s0, __p1, 3); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define splatq_lane_s16(__p0, __p1) __extension__ ({ \ int16x8_t __ret; \ int16x4_t __s0 = __p0; \ __ret = (int16x8_t) __builtin_neon_splatq_lane_v((int8x8_t)__s0, __p1, 1); \ __ret; \ }) #else #define splatq_lane_s16(__p0, __p1) __extension__ ({ \ int16x8_t __ret; \ int16x4_t __s0 = __p0; \ int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = (int16x8_t) __builtin_neon_splatq_lane_v((int8x8_t)__rev0, __p1, 1); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_splatq_lane_s16(__p0, __p1) __extension__ ({ \ int16x8_t __ret; \ int16x4_t __s0 = __p0; \ __ret = (int16x8_t) __builtin_neon_splatq_lane_v((int8x8_t)__s0, __p1, 1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define splat_lane_u8(__p0, __p1) __extension__ ({ \ uint8x8_t __ret; \ uint8x8_t __s0 = __p0; \ __ret = (uint8x8_t) __builtin_neon_splat_lane_v((int8x8_t)__s0, __p1, 16); \ __ret; \ }) #else #define splat_lane_u8(__p0, __p1) __extension__ ({ \ uint8x8_t __ret; \ uint8x8_t __s0 = __p0; \ uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (uint8x8_t) __builtin_neon_splat_lane_v((int8x8_t)__rev0, __p1, 16); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_splat_lane_u8(__p0, __p1) __extension__ ({ \ uint8x8_t __ret; \ uint8x8_t __s0 = __p0; \ __ret = (uint8x8_t) __builtin_neon_splat_lane_v((int8x8_t)__s0, __p1, 16); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define splat_lane_u32(__p0, __p1) __extension__ ({ \ uint32x2_t __ret; \ uint32x2_t __s0 = __p0; \ __ret = (uint32x2_t) __builtin_neon_splat_lane_v((int8x8_t)__s0, __p1, 18); \ __ret; \ }) #else #define splat_lane_u32(__p0, __p1) __extension__ ({ \ uint32x2_t __ret; \ uint32x2_t __s0 = __p0; \ uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ __ret = (uint32x2_t) __builtin_neon_splat_lane_v((int8x8_t)__rev0, __p1, 18); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #define __noswap_splat_lane_u32(__p0, __p1) __extension__ ({ \ uint32x2_t __ret; \ uint32x2_t __s0 = __p0; \ __ret = (uint32x2_t) __builtin_neon_splat_lane_v((int8x8_t)__s0, __p1, 18); \ __ret; \ }) #endif #define splat_lane_u64(__p0, __p1) __extension__ ({ \ uint64x1_t __ret; \ uint64x1_t __s0 = __p0; \ __ret = (uint64x1_t) __builtin_neon_splat_lane_v((int8x8_t)__s0, __p1, 19); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define splat_lane_u16(__p0, __p1) __extension__ ({ \ uint16x4_t __ret; \ uint16x4_t __s0 = __p0; \ __ret = (uint16x4_t) __builtin_neon_splat_lane_v((int8x8_t)__s0, __p1, 17); \ __ret; \ }) #else #define splat_lane_u16(__p0, __p1) __extension__ ({ \ uint16x4_t __ret; \ uint16x4_t __s0 = __p0; \ uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = (uint16x4_t) __builtin_neon_splat_lane_v((int8x8_t)__rev0, __p1, 17); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_splat_lane_u16(__p0, __p1) __extension__ ({ \ uint16x4_t __ret; \ uint16x4_t __s0 = __p0; \ __ret = (uint16x4_t) __builtin_neon_splat_lane_v((int8x8_t)__s0, __p1, 17); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define splat_lane_s8(__p0, __p1) __extension__ ({ \ int8x8_t __ret; \ int8x8_t __s0 = __p0; \ __ret = (int8x8_t) __builtin_neon_splat_lane_v((int8x8_t)__s0, __p1, 0); \ __ret; \ }) #else #define splat_lane_s8(__p0, __p1) __extension__ ({ \ int8x8_t __ret; \ int8x8_t __s0 = __p0; \ int8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (int8x8_t) __builtin_neon_splat_lane_v((int8x8_t)__rev0, __p1, 0); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_splat_lane_s8(__p0, __p1) __extension__ ({ \ int8x8_t __ret; \ int8x8_t __s0 = __p0; \ __ret = (int8x8_t) __builtin_neon_splat_lane_v((int8x8_t)__s0, __p1, 0); \ __ret; \ }) #endif #define splat_lane_f64(__p0, __p1) __extension__ ({ \ float64x1_t __ret; \ float64x1_t __s0 = __p0; \ __ret = (float64x1_t) __builtin_neon_splat_lane_v((int8x8_t)__s0, __p1, 10); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define splat_lane_f32(__p0, __p1) __extension__ ({ \ float32x2_t __ret; \ float32x2_t __s0 = __p0; \ __ret = (float32x2_t) __builtin_neon_splat_lane_v((int8x8_t)__s0, __p1, 9); \ __ret; \ }) #else #define splat_lane_f32(__p0, __p1) __extension__ ({ \ float32x2_t __ret; \ float32x2_t __s0 = __p0; \ float32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ __ret = (float32x2_t) __builtin_neon_splat_lane_v((int8x8_t)__rev0, __p1, 9); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #define __noswap_splat_lane_f32(__p0, __p1) __extension__ ({ \ float32x2_t __ret; \ float32x2_t __s0 = __p0; \ __ret = (float32x2_t) __builtin_neon_splat_lane_v((int8x8_t)__s0, __p1, 9); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define splat_lane_f16(__p0, __p1) __extension__ ({ \ float16x4_t __ret; \ float16x4_t __s0 = __p0; \ __ret = (float16x4_t) __builtin_neon_splat_lane_v((int8x8_t)__s0, __p1, 8); \ __ret; \ }) #else #define splat_lane_f16(__p0, __p1) __extension__ ({ \ float16x4_t __ret; \ float16x4_t __s0 = __p0; \ float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = (float16x4_t) __builtin_neon_splat_lane_v((int8x8_t)__rev0, __p1, 8); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_splat_lane_f16(__p0, __p1) __extension__ ({ \ float16x4_t __ret; \ float16x4_t __s0 = __p0; \ __ret = (float16x4_t) __builtin_neon_splat_lane_v((int8x8_t)__s0, __p1, 8); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define splat_lane_s32(__p0, __p1) __extension__ ({ \ int32x2_t __ret; \ int32x2_t __s0 = __p0; \ __ret = (int32x2_t) __builtin_neon_splat_lane_v((int8x8_t)__s0, __p1, 2); \ __ret; \ }) #else #define splat_lane_s32(__p0, __p1) __extension__ ({ \ int32x2_t __ret; \ int32x2_t __s0 = __p0; \ int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ __ret = (int32x2_t) __builtin_neon_splat_lane_v((int8x8_t)__rev0, __p1, 2); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #define __noswap_splat_lane_s32(__p0, __p1) __extension__ ({ \ int32x2_t __ret; \ int32x2_t __s0 = __p0; \ __ret = (int32x2_t) __builtin_neon_splat_lane_v((int8x8_t)__s0, __p1, 2); \ __ret; \ }) #endif #define splat_lane_s64(__p0, __p1) __extension__ ({ \ int64x1_t __ret; \ int64x1_t __s0 = __p0; \ __ret = (int64x1_t) __builtin_neon_splat_lane_v((int8x8_t)__s0, __p1, 3); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define splat_lane_s16(__p0, __p1) __extension__ ({ \ int16x4_t __ret; \ int16x4_t __s0 = __p0; \ __ret = (int16x4_t) __builtin_neon_splat_lane_v((int8x8_t)__s0, __p1, 1); \ __ret; \ }) #else #define splat_lane_s16(__p0, __p1) __extension__ ({ \ int16x4_t __ret; \ int16x4_t __s0 = __p0; \ int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = (int16x4_t) __builtin_neon_splat_lane_v((int8x8_t)__rev0, __p1, 1); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_splat_lane_s16(__p0, __p1) __extension__ ({ \ int16x4_t __ret; \ int16x4_t __s0 = __p0; \ __ret = (int16x4_t) __builtin_neon_splat_lane_v((int8x8_t)__s0, __p1, 1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define splat_laneq_p8(__p0, __p1) __extension__ ({ \ poly8x8_t __ret; \ poly8x16_t __s0 = __p0; \ __ret = (poly8x8_t) __builtin_neon_splat_laneq_v((int8x16_t)__s0, __p1, 36); \ __ret; \ }) #else #define splat_laneq_p8(__p0, __p1) __extension__ ({ \ poly8x8_t __ret; \ poly8x16_t __s0 = __p0; \ poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (poly8x8_t) __builtin_neon_splat_laneq_v((int8x16_t)__rev0, __p1, 36); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_splat_laneq_p8(__p0, __p1) __extension__ ({ \ poly8x8_t __ret; \ poly8x16_t __s0 = __p0; \ __ret = (poly8x8_t) __builtin_neon_splat_laneq_v((int8x16_t)__s0, __p1, 36); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define splat_laneq_p64(__p0, __p1) __extension__ ({ \ poly64x1_t __ret; \ poly64x2_t __s0 = __p0; \ __ret = (poly64x1_t) __builtin_neon_splat_laneq_v((int8x16_t)__s0, __p1, 38); \ __ret; \ }) #else #define splat_laneq_p64(__p0, __p1) __extension__ ({ \ poly64x1_t __ret; \ poly64x2_t __s0 = __p0; \ poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ __ret = (poly64x1_t) __builtin_neon_splat_laneq_v((int8x16_t)__rev0, __p1, 38); \ __ret; \ }) #define __noswap_splat_laneq_p64(__p0, __p1) __extension__ ({ \ poly64x1_t __ret; \ poly64x2_t __s0 = __p0; \ __ret = (poly64x1_t) __builtin_neon_splat_laneq_v((int8x16_t)__s0, __p1, 38); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define splat_laneq_p16(__p0, __p1) __extension__ ({ \ poly16x4_t __ret; \ poly16x8_t __s0 = __p0; \ __ret = (poly16x4_t) __builtin_neon_splat_laneq_v((int8x16_t)__s0, __p1, 37); \ __ret; \ }) #else #define splat_laneq_p16(__p0, __p1) __extension__ ({ \ poly16x4_t __ret; \ poly16x8_t __s0 = __p0; \ poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (poly16x4_t) __builtin_neon_splat_laneq_v((int8x16_t)__rev0, __p1, 37); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_splat_laneq_p16(__p0, __p1) __extension__ ({ \ poly16x4_t __ret; \ poly16x8_t __s0 = __p0; \ __ret = (poly16x4_t) __builtin_neon_splat_laneq_v((int8x16_t)__s0, __p1, 37); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define splatq_laneq_p8(__p0, __p1) __extension__ ({ \ poly8x16_t __ret; \ poly8x16_t __s0 = __p0; \ __ret = (poly8x16_t) __builtin_neon_splatq_laneq_v((int8x16_t)__s0, __p1, 36); \ __ret; \ }) #else #define splatq_laneq_p8(__p0, __p1) __extension__ ({ \ poly8x16_t __ret; \ poly8x16_t __s0 = __p0; \ poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (poly8x16_t) __builtin_neon_splatq_laneq_v((int8x16_t)__rev0, __p1, 36); \ __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_splatq_laneq_p8(__p0, __p1) __extension__ ({ \ poly8x16_t __ret; \ poly8x16_t __s0 = __p0; \ __ret = (poly8x16_t) __builtin_neon_splatq_laneq_v((int8x16_t)__s0, __p1, 36); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define splatq_laneq_p64(__p0, __p1) __extension__ ({ \ poly64x2_t __ret; \ poly64x2_t __s0 = __p0; \ __ret = (poly64x2_t) __builtin_neon_splatq_laneq_v((int8x16_t)__s0, __p1, 38); \ __ret; \ }) #else #define splatq_laneq_p64(__p0, __p1) __extension__ ({ \ poly64x2_t __ret; \ poly64x2_t __s0 = __p0; \ poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ __ret = (poly64x2_t) __builtin_neon_splatq_laneq_v((int8x16_t)__rev0, __p1, 38); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #define __noswap_splatq_laneq_p64(__p0, __p1) __extension__ ({ \ poly64x2_t __ret; \ poly64x2_t __s0 = __p0; \ __ret = (poly64x2_t) __builtin_neon_splatq_laneq_v((int8x16_t)__s0, __p1, 38); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define splatq_laneq_p16(__p0, __p1) __extension__ ({ \ poly16x8_t __ret; \ poly16x8_t __s0 = __p0; \ __ret = (poly16x8_t) __builtin_neon_splatq_laneq_v((int8x16_t)__s0, __p1, 37); \ __ret; \ }) #else #define splatq_laneq_p16(__p0, __p1) __extension__ ({ \ poly16x8_t __ret; \ poly16x8_t __s0 = __p0; \ poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (poly16x8_t) __builtin_neon_splatq_laneq_v((int8x16_t)__rev0, __p1, 37); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_splatq_laneq_p16(__p0, __p1) __extension__ ({ \ poly16x8_t __ret; \ poly16x8_t __s0 = __p0; \ __ret = (poly16x8_t) __builtin_neon_splatq_laneq_v((int8x16_t)__s0, __p1, 37); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define splatq_laneq_u8(__p0, __p1) __extension__ ({ \ uint8x16_t __ret; \ uint8x16_t __s0 = __p0; \ __ret = (uint8x16_t) __builtin_neon_splatq_laneq_v((int8x16_t)__s0, __p1, 48); \ __ret; \ }) #else #define splatq_laneq_u8(__p0, __p1) __extension__ ({ \ uint8x16_t __ret; \ uint8x16_t __s0 = __p0; \ uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (uint8x16_t) __builtin_neon_splatq_laneq_v((int8x16_t)__rev0, __p1, 48); \ __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_splatq_laneq_u8(__p0, __p1) __extension__ ({ \ uint8x16_t __ret; \ uint8x16_t __s0 = __p0; \ __ret = (uint8x16_t) __builtin_neon_splatq_laneq_v((int8x16_t)__s0, __p1, 48); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define splatq_laneq_u32(__p0, __p1) __extension__ ({ \ uint32x4_t __ret; \ uint32x4_t __s0 = __p0; \ __ret = (uint32x4_t) __builtin_neon_splatq_laneq_v((int8x16_t)__s0, __p1, 50); \ __ret; \ }) #else #define splatq_laneq_u32(__p0, __p1) __extension__ ({ \ uint32x4_t __ret; \ uint32x4_t __s0 = __p0; \ uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = (uint32x4_t) __builtin_neon_splatq_laneq_v((int8x16_t)__rev0, __p1, 50); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_splatq_laneq_u32(__p0, __p1) __extension__ ({ \ uint32x4_t __ret; \ uint32x4_t __s0 = __p0; \ __ret = (uint32x4_t) __builtin_neon_splatq_laneq_v((int8x16_t)__s0, __p1, 50); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define splatq_laneq_u64(__p0, __p1) __extension__ ({ \ uint64x2_t __ret; \ uint64x2_t __s0 = __p0; \ __ret = (uint64x2_t) __builtin_neon_splatq_laneq_v((int8x16_t)__s0, __p1, 51); \ __ret; \ }) #else #define splatq_laneq_u64(__p0, __p1) __extension__ ({ \ uint64x2_t __ret; \ uint64x2_t __s0 = __p0; \ uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ __ret = (uint64x2_t) __builtin_neon_splatq_laneq_v((int8x16_t)__rev0, __p1, 51); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #define __noswap_splatq_laneq_u64(__p0, __p1) __extension__ ({ \ uint64x2_t __ret; \ uint64x2_t __s0 = __p0; \ __ret = (uint64x2_t) __builtin_neon_splatq_laneq_v((int8x16_t)__s0, __p1, 51); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define splatq_laneq_u16(__p0, __p1) __extension__ ({ \ uint16x8_t __ret; \ uint16x8_t __s0 = __p0; \ __ret = (uint16x8_t) __builtin_neon_splatq_laneq_v((int8x16_t)__s0, __p1, 49); \ __ret; \ }) #else #define splatq_laneq_u16(__p0, __p1) __extension__ ({ \ uint16x8_t __ret; \ uint16x8_t __s0 = __p0; \ uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (uint16x8_t) __builtin_neon_splatq_laneq_v((int8x16_t)__rev0, __p1, 49); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_splatq_laneq_u16(__p0, __p1) __extension__ ({ \ uint16x8_t __ret; \ uint16x8_t __s0 = __p0; \ __ret = (uint16x8_t) __builtin_neon_splatq_laneq_v((int8x16_t)__s0, __p1, 49); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define splatq_laneq_s8(__p0, __p1) __extension__ ({ \ int8x16_t __ret; \ int8x16_t __s0 = __p0; \ __ret = (int8x16_t) __builtin_neon_splatq_laneq_v((int8x16_t)__s0, __p1, 32); \ __ret; \ }) #else #define splatq_laneq_s8(__p0, __p1) __extension__ ({ \ int8x16_t __ret; \ int8x16_t __s0 = __p0; \ int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (int8x16_t) __builtin_neon_splatq_laneq_v((int8x16_t)__rev0, __p1, 32); \ __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_splatq_laneq_s8(__p0, __p1) __extension__ ({ \ int8x16_t __ret; \ int8x16_t __s0 = __p0; \ __ret = (int8x16_t) __builtin_neon_splatq_laneq_v((int8x16_t)__s0, __p1, 32); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define splatq_laneq_f64(__p0, __p1) __extension__ ({ \ float64x2_t __ret; \ float64x2_t __s0 = __p0; \ __ret = (float64x2_t) __builtin_neon_splatq_laneq_v((int8x16_t)__s0, __p1, 42); \ __ret; \ }) #else #define splatq_laneq_f64(__p0, __p1) __extension__ ({ \ float64x2_t __ret; \ float64x2_t __s0 = __p0; \ float64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ __ret = (float64x2_t) __builtin_neon_splatq_laneq_v((int8x16_t)__rev0, __p1, 42); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #define __noswap_splatq_laneq_f64(__p0, __p1) __extension__ ({ \ float64x2_t __ret; \ float64x2_t __s0 = __p0; \ __ret = (float64x2_t) __builtin_neon_splatq_laneq_v((int8x16_t)__s0, __p1, 42); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define splatq_laneq_f32(__p0, __p1) __extension__ ({ \ float32x4_t __ret; \ float32x4_t __s0 = __p0; \ __ret = (float32x4_t) __builtin_neon_splatq_laneq_v((int8x16_t)__s0, __p1, 41); \ __ret; \ }) #else #define splatq_laneq_f32(__p0, __p1) __extension__ ({ \ float32x4_t __ret; \ float32x4_t __s0 = __p0; \ float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = (float32x4_t) __builtin_neon_splatq_laneq_v((int8x16_t)__rev0, __p1, 41); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_splatq_laneq_f32(__p0, __p1) __extension__ ({ \ float32x4_t __ret; \ float32x4_t __s0 = __p0; \ __ret = (float32x4_t) __builtin_neon_splatq_laneq_v((int8x16_t)__s0, __p1, 41); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define splatq_laneq_f16(__p0, __p1) __extension__ ({ \ float16x8_t __ret; \ float16x8_t __s0 = __p0; \ __ret = (float16x8_t) __builtin_neon_splatq_laneq_v((int8x16_t)__s0, __p1, 40); \ __ret; \ }) #else #define splatq_laneq_f16(__p0, __p1) __extension__ ({ \ float16x8_t __ret; \ float16x8_t __s0 = __p0; \ float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (float16x8_t) __builtin_neon_splatq_laneq_v((int8x16_t)__rev0, __p1, 40); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_splatq_laneq_f16(__p0, __p1) __extension__ ({ \ float16x8_t __ret; \ float16x8_t __s0 = __p0; \ __ret = (float16x8_t) __builtin_neon_splatq_laneq_v((int8x16_t)__s0, __p1, 40); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define splatq_laneq_s32(__p0, __p1) __extension__ ({ \ int32x4_t __ret; \ int32x4_t __s0 = __p0; \ __ret = (int32x4_t) __builtin_neon_splatq_laneq_v((int8x16_t)__s0, __p1, 34); \ __ret; \ }) #else #define splatq_laneq_s32(__p0, __p1) __extension__ ({ \ int32x4_t __ret; \ int32x4_t __s0 = __p0; \ int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = (int32x4_t) __builtin_neon_splatq_laneq_v((int8x16_t)__rev0, __p1, 34); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_splatq_laneq_s32(__p0, __p1) __extension__ ({ \ int32x4_t __ret; \ int32x4_t __s0 = __p0; \ __ret = (int32x4_t) __builtin_neon_splatq_laneq_v((int8x16_t)__s0, __p1, 34); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define splatq_laneq_s64(__p0, __p1) __extension__ ({ \ int64x2_t __ret; \ int64x2_t __s0 = __p0; \ __ret = (int64x2_t) __builtin_neon_splatq_laneq_v((int8x16_t)__s0, __p1, 35); \ __ret; \ }) #else #define splatq_laneq_s64(__p0, __p1) __extension__ ({ \ int64x2_t __ret; \ int64x2_t __s0 = __p0; \ int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ __ret = (int64x2_t) __builtin_neon_splatq_laneq_v((int8x16_t)__rev0, __p1, 35); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #define __noswap_splatq_laneq_s64(__p0, __p1) __extension__ ({ \ int64x2_t __ret; \ int64x2_t __s0 = __p0; \ __ret = (int64x2_t) __builtin_neon_splatq_laneq_v((int8x16_t)__s0, __p1, 35); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define splatq_laneq_s16(__p0, __p1) __extension__ ({ \ int16x8_t __ret; \ int16x8_t __s0 = __p0; \ __ret = (int16x8_t) __builtin_neon_splatq_laneq_v((int8x16_t)__s0, __p1, 33); \ __ret; \ }) #else #define splatq_laneq_s16(__p0, __p1) __extension__ ({ \ int16x8_t __ret; \ int16x8_t __s0 = __p0; \ int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (int16x8_t) __builtin_neon_splatq_laneq_v((int8x16_t)__rev0, __p1, 33); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_splatq_laneq_s16(__p0, __p1) __extension__ ({ \ int16x8_t __ret; \ int16x8_t __s0 = __p0; \ __ret = (int16x8_t) __builtin_neon_splatq_laneq_v((int8x16_t)__s0, __p1, 33); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define splat_laneq_u8(__p0, __p1) __extension__ ({ \ uint8x8_t __ret; \ uint8x16_t __s0 = __p0; \ __ret = (uint8x8_t) __builtin_neon_splat_laneq_v((int8x16_t)__s0, __p1, 48); \ __ret; \ }) #else #define splat_laneq_u8(__p0, __p1) __extension__ ({ \ uint8x8_t __ret; \ uint8x16_t __s0 = __p0; \ uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (uint8x8_t) __builtin_neon_splat_laneq_v((int8x16_t)__rev0, __p1, 48); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_splat_laneq_u8(__p0, __p1) __extension__ ({ \ uint8x8_t __ret; \ uint8x16_t __s0 = __p0; \ __ret = (uint8x8_t) __builtin_neon_splat_laneq_v((int8x16_t)__s0, __p1, 48); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define splat_laneq_u32(__p0, __p1) __extension__ ({ \ uint32x2_t __ret; \ uint32x4_t __s0 = __p0; \ __ret = (uint32x2_t) __builtin_neon_splat_laneq_v((int8x16_t)__s0, __p1, 50); \ __ret; \ }) #else #define splat_laneq_u32(__p0, __p1) __extension__ ({ \ uint32x2_t __ret; \ uint32x4_t __s0 = __p0; \ uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = (uint32x2_t) __builtin_neon_splat_laneq_v((int8x16_t)__rev0, __p1, 50); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #define __noswap_splat_laneq_u32(__p0, __p1) __extension__ ({ \ uint32x2_t __ret; \ uint32x4_t __s0 = __p0; \ __ret = (uint32x2_t) __builtin_neon_splat_laneq_v((int8x16_t)__s0, __p1, 50); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define splat_laneq_u64(__p0, __p1) __extension__ ({ \ uint64x1_t __ret; \ uint64x2_t __s0 = __p0; \ __ret = (uint64x1_t) __builtin_neon_splat_laneq_v((int8x16_t)__s0, __p1, 51); \ __ret; \ }) #else #define splat_laneq_u64(__p0, __p1) __extension__ ({ \ uint64x1_t __ret; \ uint64x2_t __s0 = __p0; \ uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ __ret = (uint64x1_t) __builtin_neon_splat_laneq_v((int8x16_t)__rev0, __p1, 51); \ __ret; \ }) #define __noswap_splat_laneq_u64(__p0, __p1) __extension__ ({ \ uint64x1_t __ret; \ uint64x2_t __s0 = __p0; \ __ret = (uint64x1_t) __builtin_neon_splat_laneq_v((int8x16_t)__s0, __p1, 51); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define splat_laneq_u16(__p0, __p1) __extension__ ({ \ uint16x4_t __ret; \ uint16x8_t __s0 = __p0; \ __ret = (uint16x4_t) __builtin_neon_splat_laneq_v((int8x16_t)__s0, __p1, 49); \ __ret; \ }) #else #define splat_laneq_u16(__p0, __p1) __extension__ ({ \ uint16x4_t __ret; \ uint16x8_t __s0 = __p0; \ uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (uint16x4_t) __builtin_neon_splat_laneq_v((int8x16_t)__rev0, __p1, 49); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_splat_laneq_u16(__p0, __p1) __extension__ ({ \ uint16x4_t __ret; \ uint16x8_t __s0 = __p0; \ __ret = (uint16x4_t) __builtin_neon_splat_laneq_v((int8x16_t)__s0, __p1, 49); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define splat_laneq_s8(__p0, __p1) __extension__ ({ \ int8x8_t __ret; \ int8x16_t __s0 = __p0; \ __ret = (int8x8_t) __builtin_neon_splat_laneq_v((int8x16_t)__s0, __p1, 32); \ __ret; \ }) #else #define splat_laneq_s8(__p0, __p1) __extension__ ({ \ int8x8_t __ret; \ int8x16_t __s0 = __p0; \ int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (int8x8_t) __builtin_neon_splat_laneq_v((int8x16_t)__rev0, __p1, 32); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_splat_laneq_s8(__p0, __p1) __extension__ ({ \ int8x8_t __ret; \ int8x16_t __s0 = __p0; \ __ret = (int8x8_t) __builtin_neon_splat_laneq_v((int8x16_t)__s0, __p1, 32); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define splat_laneq_f64(__p0, __p1) __extension__ ({ \ float64x1_t __ret; \ float64x2_t __s0 = __p0; \ __ret = (float64x1_t) __builtin_neon_splat_laneq_v((int8x16_t)__s0, __p1, 42); \ __ret; \ }) #else #define splat_laneq_f64(__p0, __p1) __extension__ ({ \ float64x1_t __ret; \ float64x2_t __s0 = __p0; \ float64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ __ret = (float64x1_t) __builtin_neon_splat_laneq_v((int8x16_t)__rev0, __p1, 42); \ __ret; \ }) #define __noswap_splat_laneq_f64(__p0, __p1) __extension__ ({ \ float64x1_t __ret; \ float64x2_t __s0 = __p0; \ __ret = (float64x1_t) __builtin_neon_splat_laneq_v((int8x16_t)__s0, __p1, 42); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define splat_laneq_f32(__p0, __p1) __extension__ ({ \ float32x2_t __ret; \ float32x4_t __s0 = __p0; \ __ret = (float32x2_t) __builtin_neon_splat_laneq_v((int8x16_t)__s0, __p1, 41); \ __ret; \ }) #else #define splat_laneq_f32(__p0, __p1) __extension__ ({ \ float32x2_t __ret; \ float32x4_t __s0 = __p0; \ float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = (float32x2_t) __builtin_neon_splat_laneq_v((int8x16_t)__rev0, __p1, 41); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #define __noswap_splat_laneq_f32(__p0, __p1) __extension__ ({ \ float32x2_t __ret; \ float32x4_t __s0 = __p0; \ __ret = (float32x2_t) __builtin_neon_splat_laneq_v((int8x16_t)__s0, __p1, 41); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define splat_laneq_f16(__p0, __p1) __extension__ ({ \ float16x4_t __ret; \ float16x8_t __s0 = __p0; \ __ret = (float16x4_t) __builtin_neon_splat_laneq_v((int8x16_t)__s0, __p1, 40); \ __ret; \ }) #else #define splat_laneq_f16(__p0, __p1) __extension__ ({ \ float16x4_t __ret; \ float16x8_t __s0 = __p0; \ float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (float16x4_t) __builtin_neon_splat_laneq_v((int8x16_t)__rev0, __p1, 40); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_splat_laneq_f16(__p0, __p1) __extension__ ({ \ float16x4_t __ret; \ float16x8_t __s0 = __p0; \ __ret = (float16x4_t) __builtin_neon_splat_laneq_v((int8x16_t)__s0, __p1, 40); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define splat_laneq_s32(__p0, __p1) __extension__ ({ \ int32x2_t __ret; \ int32x4_t __s0 = __p0; \ __ret = (int32x2_t) __builtin_neon_splat_laneq_v((int8x16_t)__s0, __p1, 34); \ __ret; \ }) #else #define splat_laneq_s32(__p0, __p1) __extension__ ({ \ int32x2_t __ret; \ int32x4_t __s0 = __p0; \ int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = (int32x2_t) __builtin_neon_splat_laneq_v((int8x16_t)__rev0, __p1, 34); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #define __noswap_splat_laneq_s32(__p0, __p1) __extension__ ({ \ int32x2_t __ret; \ int32x4_t __s0 = __p0; \ __ret = (int32x2_t) __builtin_neon_splat_laneq_v((int8x16_t)__s0, __p1, 34); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define splat_laneq_s64(__p0, __p1) __extension__ ({ \ int64x1_t __ret; \ int64x2_t __s0 = __p0; \ __ret = (int64x1_t) __builtin_neon_splat_laneq_v((int8x16_t)__s0, __p1, 35); \ __ret; \ }) #else #define splat_laneq_s64(__p0, __p1) __extension__ ({ \ int64x1_t __ret; \ int64x2_t __s0 = __p0; \ int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ __ret = (int64x1_t) __builtin_neon_splat_laneq_v((int8x16_t)__rev0, __p1, 35); \ __ret; \ }) #define __noswap_splat_laneq_s64(__p0, __p1) __extension__ ({ \ int64x1_t __ret; \ int64x2_t __s0 = __p0; \ __ret = (int64x1_t) __builtin_neon_splat_laneq_v((int8x16_t)__s0, __p1, 35); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define splat_laneq_s16(__p0, __p1) __extension__ ({ \ int16x4_t __ret; \ int16x8_t __s0 = __p0; \ __ret = (int16x4_t) __builtin_neon_splat_laneq_v((int8x16_t)__s0, __p1, 33); \ __ret; \ }) #else #define splat_laneq_s16(__p0, __p1) __extension__ ({ \ int16x4_t __ret; \ int16x8_t __s0 = __p0; \ int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (int16x4_t) __builtin_neon_splat_laneq_v((int8x16_t)__rev0, __p1, 33); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_splat_laneq_s16(__p0, __p1) __extension__ ({ \ int16x4_t __ret; \ int16x8_t __s0 = __p0; \ __ret = (int16x4_t) __builtin_neon_splat_laneq_v((int8x16_t)__s0, __p1, 33); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vabdq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; __ret = (uint8x16_t) __builtin_neon_vabdq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); return __ret; } #else __ai uint8x16_t vabdq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x16_t) __builtin_neon_vabdq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 48); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } __ai uint8x16_t __noswap_vabdq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; __ret = (uint8x16_t) __builtin_neon_vabdq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vabdq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vabdq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); return __ret; } #else __ai uint32x4_t vabdq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint32x4_t) __builtin_neon_vabdq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai uint32x4_t __noswap_vabdq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vabdq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vabdq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vabdq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); return __ret; } #else __ai uint16x8_t vabdq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint16x8_t) __builtin_neon_vabdq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 49); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } __ai uint16x8_t __noswap_vabdq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vabdq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x16_t vabdq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vabdq_v((int8x16_t)__p0, (int8x16_t)__p1, 32); return __ret; } #else __ai int8x16_t vabdq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x16_t) __builtin_neon_vabdq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 32); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } __ai int8x16_t __noswap_vabdq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vabdq_v((int8x16_t)__p0, (int8x16_t)__p1, 32); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x4_t vabdq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vabdq_v((int8x16_t)__p0, (int8x16_t)__p1, 41); return __ret; } #else __ai float32x4_t vabdq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (float32x4_t) __builtin_neon_vabdq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 41); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vabdq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vabdq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); return __ret; } #else __ai int32x4_t vabdq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (int32x4_t) __builtin_neon_vabdq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 34); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai int32x4_t __noswap_vabdq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vabdq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vabdq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vabdq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); return __ret; } #else __ai int16x8_t vabdq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int16x8_t) __builtin_neon_vabdq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 33); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } __ai int16x8_t __noswap_vabdq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vabdq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vabd_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vabd_v((int8x8_t)__p0, (int8x8_t)__p1, 16); return __ret; } #else __ai uint8x8_t vabd_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x8_t) __builtin_neon_vabd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 16); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } __ai uint8x8_t __noswap_vabd_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vabd_v((int8x8_t)__p0, (int8x8_t)__p1, 16); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vabd_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vabd_v((int8x8_t)__p0, (int8x8_t)__p1, 18); return __ret; } #else __ai uint32x2_t vabd_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint32x2_t) __builtin_neon_vabd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 18); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } __ai uint32x2_t __noswap_vabd_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vabd_v((int8x8_t)__p0, (int8x8_t)__p1, 18); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vabd_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vabd_v((int8x8_t)__p0, (int8x8_t)__p1, 17); return __ret; } #else __ai uint16x4_t vabd_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint16x4_t) __builtin_neon_vabd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 17); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai uint16x4_t __noswap_vabd_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vabd_v((int8x8_t)__p0, (int8x8_t)__p1, 17); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8_t vabd_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vabd_v((int8x8_t)__p0, (int8x8_t)__p1, 0); return __ret; } #else __ai int8x8_t vabd_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x8_t) __builtin_neon_vabd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 0); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } __ai int8x8_t __noswap_vabd_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vabd_v((int8x8_t)__p0, (int8x8_t)__p1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x2_t vabd_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vabd_v((int8x8_t)__p0, (int8x8_t)__p1, 9); return __ret; } #else __ai float32x2_t vabd_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (float32x2_t) __builtin_neon_vabd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 9); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x2_t vabd_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vabd_v((int8x8_t)__p0, (int8x8_t)__p1, 2); return __ret; } #else __ai int32x2_t vabd_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (int32x2_t) __builtin_neon_vabd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } __ai int32x2_t __noswap_vabd_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vabd_v((int8x8_t)__p0, (int8x8_t)__p1, 2); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vabd_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vabd_v((int8x8_t)__p0, (int8x8_t)__p1, 1); return __ret; } #else __ai int16x4_t vabd_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (int16x4_t) __builtin_neon_vabd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai int16x4_t __noswap_vabd_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vabd_v((int8x8_t)__p0, (int8x8_t)__p1, 1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x16_t vabsq_s8(int8x16_t __p0) { int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vabsq_v((int8x16_t)__p0, 32); return __ret; } #else __ai int8x16_t vabsq_s8(int8x16_t __p0) { int8x16_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x16_t) __builtin_neon_vabsq_v((int8x16_t)__rev0, 32); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x4_t vabsq_f32(float32x4_t __p0) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vabsq_v((int8x16_t)__p0, 41); return __ret; } #else __ai float32x4_t vabsq_f32(float32x4_t __p0) { float32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (float32x4_t) __builtin_neon_vabsq_v((int8x16_t)__rev0, 41); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vabsq_s32(int32x4_t __p0) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vabsq_v((int8x16_t)__p0, 34); return __ret; } #else __ai int32x4_t vabsq_s32(int32x4_t __p0) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (int32x4_t) __builtin_neon_vabsq_v((int8x16_t)__rev0, 34); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vabsq_s16(int16x8_t __p0) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vabsq_v((int8x16_t)__p0, 33); return __ret; } #else __ai int16x8_t vabsq_s16(int16x8_t __p0) { int16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int16x8_t) __builtin_neon_vabsq_v((int8x16_t)__rev0, 33); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8_t vabs_s8(int8x8_t __p0) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vabs_v((int8x8_t)__p0, 0); return __ret; } #else __ai int8x8_t vabs_s8(int8x8_t __p0) { int8x8_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x8_t) __builtin_neon_vabs_v((int8x8_t)__rev0, 0); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x2_t vabs_f32(float32x2_t __p0) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vabs_v((int8x8_t)__p0, 9); return __ret; } #else __ai float32x2_t vabs_f32(float32x2_t __p0) { float32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (float32x2_t) __builtin_neon_vabs_v((int8x8_t)__rev0, 9); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x2_t vabs_s32(int32x2_t __p0) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vabs_v((int8x8_t)__p0, 2); return __ret; } #else __ai int32x2_t vabs_s32(int32x2_t __p0) { int32x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (int32x2_t) __builtin_neon_vabs_v((int8x8_t)__rev0, 2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vabs_s16(int16x4_t __p0) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vabs_v((int8x8_t)__p0, 1); return __ret; } #else __ai int16x4_t vabs_s16(int16x4_t __p0) { int16x4_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (int16x4_t) __builtin_neon_vabs_v((int8x8_t)__rev0, 1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vaddq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; __ret = __p0 + __p1; return __ret; } #else __ai uint8x16_t vaddq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 + __rev1; __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vaddq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; __ret = __p0 + __p1; return __ret; } #else __ai uint32x4_t vaddq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __rev0 + __rev1; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vaddq_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; __ret = __p0 + __p1; return __ret; } #else __ai uint64x2_t vaddq_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __rev0 + __rev1; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vaddq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; __ret = __p0 + __p1; return __ret; } #else __ai uint16x8_t vaddq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 + __rev1; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x16_t vaddq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; __ret = __p0 + __p1; return __ret; } #else __ai int8x16_t vaddq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 + __rev1; __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x4_t vaddq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; __ret = __p0 + __p1; return __ret; } #else __ai float32x4_t vaddq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __rev0 + __rev1; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vaddq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; __ret = __p0 + __p1; return __ret; } #else __ai int32x4_t vaddq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __rev0 + __rev1; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vaddq_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; __ret = __p0 + __p1; return __ret; } #else __ai int64x2_t vaddq_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __rev0 + __rev1; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vaddq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; __ret = __p0 + __p1; return __ret; } #else __ai int16x8_t vaddq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 + __rev1; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vadd_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; __ret = __p0 + __p1; return __ret; } #else __ai uint8x8_t vadd_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 + __rev1; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vadd_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; __ret = __p0 + __p1; return __ret; } #else __ai uint32x2_t vadd_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __rev0 + __rev1; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai uint64x1_t vadd_u64(uint64x1_t __p0, uint64x1_t __p1) { uint64x1_t __ret; __ret = __p0 + __p1; return __ret; } #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vadd_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; __ret = __p0 + __p1; return __ret; } #else __ai uint16x4_t vadd_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __rev0 + __rev1; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8_t vadd_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; __ret = __p0 + __p1; return __ret; } #else __ai int8x8_t vadd_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 + __rev1; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x2_t vadd_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; __ret = __p0 + __p1; return __ret; } #else __ai float32x2_t vadd_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __rev0 + __rev1; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x2_t vadd_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; __ret = __p0 + __p1; return __ret; } #else __ai int32x2_t vadd_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __rev0 + __rev1; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai int64x1_t vadd_s64(int64x1_t __p0, int64x1_t __p1) { int64x1_t __ret; __ret = __p0 + __p1; return __ret; } #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vadd_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; __ret = __p0 + __p1; return __ret; } #else __ai int16x4_t vadd_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __rev0 + __rev1; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly8x8_t vadd_p8(poly8x8_t __p0, poly8x8_t __p1) { poly8x8_t __ret; __ret = (poly8x8_t) __builtin_neon_vadd_v((int8x8_t)__p0, (int8x8_t)__p1, 4); return __ret; } #else __ai poly8x8_t vadd_p8(poly8x8_t __p0, poly8x8_t __p1) { poly8x8_t __ret; poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (poly8x8_t) __builtin_neon_vadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 4); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif __ai poly64x1_t vadd_p64(poly64x1_t __p0, poly64x1_t __p1) { poly64x1_t __ret; __ret = (poly64x1_t) __builtin_neon_vadd_v((int8x8_t)__p0, (int8x8_t)__p1, 6); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai poly16x4_t vadd_p16(poly16x4_t __p0, poly16x4_t __p1) { poly16x4_t __ret; __ret = (poly16x4_t) __builtin_neon_vadd_v((int8x8_t)__p0, (int8x8_t)__p1, 5); return __ret; } #else __ai poly16x4_t vadd_p16(poly16x4_t __p0, poly16x4_t __p1) { poly16x4_t __ret; poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); poly16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (poly16x4_t) __builtin_neon_vadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 5); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly8x16_t vaddq_p8(poly8x16_t __p0, poly8x16_t __p1) { poly8x16_t __ret; __ret = (poly8x16_t) __builtin_neon_vaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 36); return __ret; } #else __ai poly8x16_t vaddq_p8(poly8x16_t __p0, poly8x16_t __p1) { poly8x16_t __ret; poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (poly8x16_t) __builtin_neon_vaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 36); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly64x2_t vaddq_p64(poly64x2_t __p0, poly64x2_t __p1) { poly64x2_t __ret; __ret = (poly64x2_t) __builtin_neon_vaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 38); return __ret; } #else __ai poly64x2_t vaddq_p64(poly64x2_t __p0, poly64x2_t __p1) { poly64x2_t __ret; poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (poly64x2_t) __builtin_neon_vaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 38); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly16x8_t vaddq_p16(poly16x8_t __p0, poly16x8_t __p1) { poly16x8_t __ret; __ret = (poly16x8_t) __builtin_neon_vaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 37); return __ret; } #else __ai poly16x8_t vaddq_p16(poly16x8_t __p0, poly16x8_t __p1) { poly16x8_t __ret; poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); poly16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (poly16x8_t) __builtin_neon_vaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 37); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vaddhn_u32(uint32x4_t __p0, uint32x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vaddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 17); return __ret; } #else __ai uint16x4_t vaddhn_u32(uint32x4_t __p0, uint32x4_t __p1) { uint16x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint16x4_t) __builtin_neon_vaddhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 17); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai uint16x4_t __noswap_vaddhn_u32(uint32x4_t __p0, uint32x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vaddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 17); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vaddhn_u64(uint64x2_t __p0, uint64x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vaddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 18); return __ret; } #else __ai uint32x2_t vaddhn_u64(uint64x2_t __p0, uint64x2_t __p1) { uint32x2_t __ret; uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint32x2_t) __builtin_neon_vaddhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 18); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } __ai uint32x2_t __noswap_vaddhn_u64(uint64x2_t __p0, uint64x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vaddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 18); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vaddhn_u16(uint16x8_t __p0, uint16x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vaddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 16); return __ret; } #else __ai uint8x8_t vaddhn_u16(uint16x8_t __p0, uint16x8_t __p1) { uint8x8_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x8_t) __builtin_neon_vaddhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 16); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } __ai uint8x8_t __noswap_vaddhn_u16(uint16x8_t __p0, uint16x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vaddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 16); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vaddhn_s32(int32x4_t __p0, int32x4_t __p1) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vaddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 1); return __ret; } #else __ai int16x4_t vaddhn_s32(int32x4_t __p0, int32x4_t __p1) { int16x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (int16x4_t) __builtin_neon_vaddhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai int16x4_t __noswap_vaddhn_s32(int32x4_t __p0, int32x4_t __p1) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vaddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x2_t vaddhn_s64(int64x2_t __p0, int64x2_t __p1) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vaddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 2); return __ret; } #else __ai int32x2_t vaddhn_s64(int64x2_t __p0, int64x2_t __p1) { int32x2_t __ret; int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (int32x2_t) __builtin_neon_vaddhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } __ai int32x2_t __noswap_vaddhn_s64(int64x2_t __p0, int64x2_t __p1) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vaddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 2); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8_t vaddhn_s16(int16x8_t __p0, int16x8_t __p1) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vaddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 0); return __ret; } #else __ai int8x8_t vaddhn_s16(int16x8_t __p0, int16x8_t __p1) { int8x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x8_t) __builtin_neon_vaddhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 0); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } __ai int8x8_t __noswap_vaddhn_s16(int16x8_t __p0, int16x8_t __p1) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vaddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vandq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; __ret = __p0 & __p1; return __ret; } #else __ai uint8x16_t vandq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 & __rev1; __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vandq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; __ret = __p0 & __p1; return __ret; } #else __ai uint32x4_t vandq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __rev0 & __rev1; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vandq_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; __ret = __p0 & __p1; return __ret; } #else __ai uint64x2_t vandq_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __rev0 & __rev1; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vandq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; __ret = __p0 & __p1; return __ret; } #else __ai uint16x8_t vandq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 & __rev1; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x16_t vandq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; __ret = __p0 & __p1; return __ret; } #else __ai int8x16_t vandq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 & __rev1; __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vandq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; __ret = __p0 & __p1; return __ret; } #else __ai int32x4_t vandq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __rev0 & __rev1; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vandq_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; __ret = __p0 & __p1; return __ret; } #else __ai int64x2_t vandq_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __rev0 & __rev1; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vandq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; __ret = __p0 & __p1; return __ret; } #else __ai int16x8_t vandq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 & __rev1; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vand_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; __ret = __p0 & __p1; return __ret; } #else __ai uint8x8_t vand_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 & __rev1; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vand_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; __ret = __p0 & __p1; return __ret; } #else __ai uint32x2_t vand_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __rev0 & __rev1; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai uint64x1_t vand_u64(uint64x1_t __p0, uint64x1_t __p1) { uint64x1_t __ret; __ret = __p0 & __p1; return __ret; } #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vand_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; __ret = __p0 & __p1; return __ret; } #else __ai uint16x4_t vand_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __rev0 & __rev1; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8_t vand_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; __ret = __p0 & __p1; return __ret; } #else __ai int8x8_t vand_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 & __rev1; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x2_t vand_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; __ret = __p0 & __p1; return __ret; } #else __ai int32x2_t vand_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __rev0 & __rev1; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai int64x1_t vand_s64(int64x1_t __p0, int64x1_t __p1) { int64x1_t __ret; __ret = __p0 & __p1; return __ret; } #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vand_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; __ret = __p0 & __p1; return __ret; } #else __ai int16x4_t vand_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __rev0 & __rev1; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vbicq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; __ret = __p0 & ~__p1; return __ret; } #else __ai uint8x16_t vbicq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 & ~__rev1; __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vbicq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; __ret = __p0 & ~__p1; return __ret; } #else __ai uint32x4_t vbicq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __rev0 & ~__rev1; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vbicq_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; __ret = __p0 & ~__p1; return __ret; } #else __ai uint64x2_t vbicq_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __rev0 & ~__rev1; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vbicq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; __ret = __p0 & ~__p1; return __ret; } #else __ai uint16x8_t vbicq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 & ~__rev1; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x16_t vbicq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; __ret = __p0 & ~__p1; return __ret; } #else __ai int8x16_t vbicq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 & ~__rev1; __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vbicq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; __ret = __p0 & ~__p1; return __ret; } #else __ai int32x4_t vbicq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __rev0 & ~__rev1; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vbicq_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; __ret = __p0 & ~__p1; return __ret; } #else __ai int64x2_t vbicq_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __rev0 & ~__rev1; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vbicq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; __ret = __p0 & ~__p1; return __ret; } #else __ai int16x8_t vbicq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 & ~__rev1; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vbic_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; __ret = __p0 & ~__p1; return __ret; } #else __ai uint8x8_t vbic_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 & ~__rev1; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vbic_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; __ret = __p0 & ~__p1; return __ret; } #else __ai uint32x2_t vbic_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __rev0 & ~__rev1; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai uint64x1_t vbic_u64(uint64x1_t __p0, uint64x1_t __p1) { uint64x1_t __ret; __ret = __p0 & ~__p1; return __ret; } #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vbic_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; __ret = __p0 & ~__p1; return __ret; } #else __ai uint16x4_t vbic_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __rev0 & ~__rev1; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8_t vbic_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; __ret = __p0 & ~__p1; return __ret; } #else __ai int8x8_t vbic_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 & ~__rev1; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x2_t vbic_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; __ret = __p0 & ~__p1; return __ret; } #else __ai int32x2_t vbic_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __rev0 & ~__rev1; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai int64x1_t vbic_s64(int64x1_t __p0, int64x1_t __p1) { int64x1_t __ret; __ret = __p0 & ~__p1; return __ret; } #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vbic_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; __ret = __p0 & ~__p1; return __ret; } #else __ai int16x4_t vbic_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __rev0 & ~__rev1; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly8x8_t vbsl_p8(uint8x8_t __p0, poly8x8_t __p1, poly8x8_t __p2) { poly8x8_t __ret; __ret = (poly8x8_t) __builtin_neon_vbsl_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 4); return __ret; } #else __ai poly8x8_t vbsl_p8(uint8x8_t __p0, poly8x8_t __p1, poly8x8_t __p2) { poly8x8_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); poly8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (poly8x8_t) __builtin_neon_vbsl_v((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 4); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly16x4_t vbsl_p16(uint16x4_t __p0, poly16x4_t __p1, poly16x4_t __p2) { poly16x4_t __ret; __ret = (poly16x4_t) __builtin_neon_vbsl_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 5); return __ret; } #else __ai poly16x4_t vbsl_p16(uint16x4_t __p0, poly16x4_t __p1, poly16x4_t __p2) { poly16x4_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); poly16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); poly16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = (poly16x4_t) __builtin_neon_vbsl_v((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 5); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly8x16_t vbslq_p8(uint8x16_t __p0, poly8x16_t __p1, poly8x16_t __p2) { poly8x16_t __ret; __ret = (poly8x16_t) __builtin_neon_vbslq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 36); return __ret; } #else __ai poly8x16_t vbslq_p8(uint8x16_t __p0, poly8x16_t __p1, poly8x16_t __p2) { poly8x16_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); poly8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (poly8x16_t) __builtin_neon_vbslq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 36); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly16x8_t vbslq_p16(uint16x8_t __p0, poly16x8_t __p1, poly16x8_t __p2) { poly16x8_t __ret; __ret = (poly16x8_t) __builtin_neon_vbslq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 37); return __ret; } #else __ai poly16x8_t vbslq_p16(uint16x8_t __p0, poly16x8_t __p1, poly16x8_t __p2) { poly16x8_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); poly16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); poly16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (poly16x8_t) __builtin_neon_vbslq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 37); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vbslq_u8(uint8x16_t __p0, uint8x16_t __p1, uint8x16_t __p2) { uint8x16_t __ret; __ret = (uint8x16_t) __builtin_neon_vbslq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 48); return __ret; } #else __ai uint8x16_t vbslq_u8(uint8x16_t __p0, uint8x16_t __p1, uint8x16_t __p2) { uint8x16_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x16_t) __builtin_neon_vbslq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 48); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vbslq_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vbslq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 50); return __ret; } #else __ai uint32x4_t vbslq_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = (uint32x4_t) __builtin_neon_vbslq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 50); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vbslq_u64(uint64x2_t __p0, uint64x2_t __p1, uint64x2_t __p2) { uint64x2_t __ret; __ret = (uint64x2_t) __builtin_neon_vbslq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 51); return __ret; } #else __ai uint64x2_t vbslq_u64(uint64x2_t __p0, uint64x2_t __p1, uint64x2_t __p2) { uint64x2_t __ret; uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); uint64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); __ret = (uint64x2_t) __builtin_neon_vbslq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 51); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vbslq_u16(uint16x8_t __p0, uint16x8_t __p1, uint16x8_t __p2) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vbslq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 49); return __ret; } #else __ai uint16x8_t vbslq_u16(uint16x8_t __p0, uint16x8_t __p1, uint16x8_t __p2) { uint16x8_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint16x8_t) __builtin_neon_vbslq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 49); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x16_t vbslq_s8(uint8x16_t __p0, int8x16_t __p1, int8x16_t __p2) { int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vbslq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 32); return __ret; } #else __ai int8x16_t vbslq_s8(uint8x16_t __p0, int8x16_t __p1, int8x16_t __p2) { int8x16_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x16_t) __builtin_neon_vbslq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 32); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x4_t vbslq_f32(uint32x4_t __p0, float32x4_t __p1, float32x4_t __p2) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vbslq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); return __ret; } #else __ai float32x4_t vbslq_f32(uint32x4_t __p0, float32x4_t __p1, float32x4_t __p2) { float32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); float32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = (float32x4_t) __builtin_neon_vbslq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 41); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vbslq_s32(uint32x4_t __p0, int32x4_t __p1, int32x4_t __p2) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vbslq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 34); return __ret; } #else __ai int32x4_t vbslq_s32(uint32x4_t __p0, int32x4_t __p1, int32x4_t __p2) { int32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); int32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = (int32x4_t) __builtin_neon_vbslq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 34); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vbslq_s64(uint64x2_t __p0, int64x2_t __p1, int64x2_t __p2) { int64x2_t __ret; __ret = (int64x2_t) __builtin_neon_vbslq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 35); return __ret; } #else __ai int64x2_t vbslq_s64(uint64x2_t __p0, int64x2_t __p1, int64x2_t __p2) { int64x2_t __ret; uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); int64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); __ret = (int64x2_t) __builtin_neon_vbslq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 35); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vbslq_s16(uint16x8_t __p0, int16x8_t __p1, int16x8_t __p2) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vbslq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 33); return __ret; } #else __ai int16x8_t vbslq_s16(uint16x8_t __p0, int16x8_t __p1, int16x8_t __p2) { int16x8_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int16x8_t) __builtin_neon_vbslq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 33); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vbsl_u8(uint8x8_t __p0, uint8x8_t __p1, uint8x8_t __p2) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vbsl_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 16); return __ret; } #else __ai uint8x8_t vbsl_u8(uint8x8_t __p0, uint8x8_t __p1, uint8x8_t __p2) { uint8x8_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x8_t) __builtin_neon_vbsl_v((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 16); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vbsl_u32(uint32x2_t __p0, uint32x2_t __p1, uint32x2_t __p2) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vbsl_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 18); return __ret; } #else __ai uint32x2_t vbsl_u32(uint32x2_t __p0, uint32x2_t __p1, uint32x2_t __p2) { uint32x2_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); uint32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); __ret = (uint32x2_t) __builtin_neon_vbsl_v((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 18); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai uint64x1_t vbsl_u64(uint64x1_t __p0, uint64x1_t __p1, uint64x1_t __p2) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vbsl_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 19); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vbsl_u16(uint16x4_t __p0, uint16x4_t __p1, uint16x4_t __p2) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vbsl_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 17); return __ret; } #else __ai uint16x4_t vbsl_u16(uint16x4_t __p0, uint16x4_t __p1, uint16x4_t __p2) { uint16x4_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); uint16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = (uint16x4_t) __builtin_neon_vbsl_v((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 17); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8_t vbsl_s8(uint8x8_t __p0, int8x8_t __p1, int8x8_t __p2) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vbsl_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 0); return __ret; } #else __ai int8x8_t vbsl_s8(uint8x8_t __p0, int8x8_t __p1, int8x8_t __p2) { int8x8_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x8_t) __builtin_neon_vbsl_v((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 0); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x2_t vbsl_f32(uint32x2_t __p0, float32x2_t __p1, float32x2_t __p2) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vbsl_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 9); return __ret; } #else __ai float32x2_t vbsl_f32(uint32x2_t __p0, float32x2_t __p1, float32x2_t __p2) { float32x2_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); float32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); __ret = (float32x2_t) __builtin_neon_vbsl_v((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 9); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x2_t vbsl_s32(uint32x2_t __p0, int32x2_t __p1, int32x2_t __p2) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vbsl_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 2); return __ret; } #else __ai int32x2_t vbsl_s32(uint32x2_t __p0, int32x2_t __p1, int32x2_t __p2) { int32x2_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); int32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); __ret = (int32x2_t) __builtin_neon_vbsl_v((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai int64x1_t vbsl_s64(uint64x1_t __p0, int64x1_t __p1, int64x1_t __p2) { int64x1_t __ret; __ret = (int64x1_t) __builtin_neon_vbsl_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 3); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vbsl_s16(uint16x4_t __p0, int16x4_t __p1, int16x4_t __p2) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vbsl_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 1); return __ret; } #else __ai int16x4_t vbsl_s16(uint16x4_t __p0, int16x4_t __p1, int16x4_t __p2) { int16x4_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); int16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = (int16x4_t) __builtin_neon_vbsl_v((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vcageq_f32(float32x4_t __p0, float32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vcageq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); return __ret; } #else __ai uint32x4_t vcageq_f32(float32x4_t __p0, float32x4_t __p1) { uint32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint32x4_t) __builtin_neon_vcageq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vcage_f32(float32x2_t __p0, float32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vcage_v((int8x8_t)__p0, (int8x8_t)__p1, 18); return __ret; } #else __ai uint32x2_t vcage_f32(float32x2_t __p0, float32x2_t __p1) { uint32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint32x2_t) __builtin_neon_vcage_v((int8x8_t)__rev0, (int8x8_t)__rev1, 18); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vcagtq_f32(float32x4_t __p0, float32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vcagtq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); return __ret; } #else __ai uint32x4_t vcagtq_f32(float32x4_t __p0, float32x4_t __p1) { uint32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint32x4_t) __builtin_neon_vcagtq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vcagt_f32(float32x2_t __p0, float32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vcagt_v((int8x8_t)__p0, (int8x8_t)__p1, 18); return __ret; } #else __ai uint32x2_t vcagt_f32(float32x2_t __p0, float32x2_t __p1) { uint32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint32x2_t) __builtin_neon_vcagt_v((int8x8_t)__rev0, (int8x8_t)__rev1, 18); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vcaleq_f32(float32x4_t __p0, float32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vcaleq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); return __ret; } #else __ai uint32x4_t vcaleq_f32(float32x4_t __p0, float32x4_t __p1) { uint32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint32x4_t) __builtin_neon_vcaleq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vcale_f32(float32x2_t __p0, float32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vcale_v((int8x8_t)__p0, (int8x8_t)__p1, 18); return __ret; } #else __ai uint32x2_t vcale_f32(float32x2_t __p0, float32x2_t __p1) { uint32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint32x2_t) __builtin_neon_vcale_v((int8x8_t)__rev0, (int8x8_t)__rev1, 18); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vcaltq_f32(float32x4_t __p0, float32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vcaltq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); return __ret; } #else __ai uint32x4_t vcaltq_f32(float32x4_t __p0, float32x4_t __p1) { uint32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint32x4_t) __builtin_neon_vcaltq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vcalt_f32(float32x2_t __p0, float32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vcalt_v((int8x8_t)__p0, (int8x8_t)__p1, 18); return __ret; } #else __ai uint32x2_t vcalt_f32(float32x2_t __p0, float32x2_t __p1) { uint32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint32x2_t) __builtin_neon_vcalt_v((int8x8_t)__rev0, (int8x8_t)__rev1, 18); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vceq_p8(poly8x8_t __p0, poly8x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0 == __p1); return __ret; } #else __ai uint8x8_t vceq_p8(poly8x8_t __p0, poly8x8_t __p1) { uint8x8_t __ret; poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x8_t)(__rev0 == __rev1); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vceqq_p8(poly8x16_t __p0, poly8x16_t __p1) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0 == __p1); return __ret; } #else __ai uint8x16_t vceqq_p8(poly8x16_t __p0, poly8x16_t __p1) { uint8x16_t __ret; poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x16_t)(__rev0 == __rev1); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vceqq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0 == __p1); return __ret; } #else __ai uint8x16_t vceqq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x16_t)(__rev0 == __rev1); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vceqq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0 == __p1); return __ret; } #else __ai uint32x4_t vceqq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint32x4_t)(__rev0 == __rev1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vceqq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0 == __p1); return __ret; } #else __ai uint16x8_t vceqq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint16x8_t)(__rev0 == __rev1); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vceqq_s8(int8x16_t __p0, int8x16_t __p1) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0 == __p1); return __ret; } #else __ai uint8x16_t vceqq_s8(int8x16_t __p0, int8x16_t __p1) { uint8x16_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x16_t)(__rev0 == __rev1); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vceqq_f32(float32x4_t __p0, float32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0 == __p1); return __ret; } #else __ai uint32x4_t vceqq_f32(float32x4_t __p0, float32x4_t __p1) { uint32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint32x4_t)(__rev0 == __rev1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vceqq_s32(int32x4_t __p0, int32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0 == __p1); return __ret; } #else __ai uint32x4_t vceqq_s32(int32x4_t __p0, int32x4_t __p1) { uint32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint32x4_t)(__rev0 == __rev1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vceqq_s16(int16x8_t __p0, int16x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0 == __p1); return __ret; } #else __ai uint16x8_t vceqq_s16(int16x8_t __p0, int16x8_t __p1) { uint16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint16x8_t)(__rev0 == __rev1); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vceq_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0 == __p1); return __ret; } #else __ai uint8x8_t vceq_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x8_t)(__rev0 == __rev1); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vceq_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0 == __p1); return __ret; } #else __ai uint32x2_t vceq_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint32x2_t)(__rev0 == __rev1); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vceq_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0 == __p1); return __ret; } #else __ai uint16x4_t vceq_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint16x4_t)(__rev0 == __rev1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vceq_s8(int8x8_t __p0, int8x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0 == __p1); return __ret; } #else __ai uint8x8_t vceq_s8(int8x8_t __p0, int8x8_t __p1) { uint8x8_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x8_t)(__rev0 == __rev1); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vceq_f32(float32x2_t __p0, float32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0 == __p1); return __ret; } #else __ai uint32x2_t vceq_f32(float32x2_t __p0, float32x2_t __p1) { uint32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint32x2_t)(__rev0 == __rev1); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vceq_s32(int32x2_t __p0, int32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0 == __p1); return __ret; } #else __ai uint32x2_t vceq_s32(int32x2_t __p0, int32x2_t __p1) { uint32x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint32x2_t)(__rev0 == __rev1); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vceq_s16(int16x4_t __p0, int16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0 == __p1); return __ret; } #else __ai uint16x4_t vceq_s16(int16x4_t __p0, int16x4_t __p1) { uint16x4_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint16x4_t)(__rev0 == __rev1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vcgeq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0 >= __p1); return __ret; } #else __ai uint8x16_t vcgeq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x16_t)(__rev0 >= __rev1); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vcgeq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0 >= __p1); return __ret; } #else __ai uint32x4_t vcgeq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint32x4_t)(__rev0 >= __rev1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vcgeq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0 >= __p1); return __ret; } #else __ai uint16x8_t vcgeq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint16x8_t)(__rev0 >= __rev1); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vcgeq_s8(int8x16_t __p0, int8x16_t __p1) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0 >= __p1); return __ret; } #else __ai uint8x16_t vcgeq_s8(int8x16_t __p0, int8x16_t __p1) { uint8x16_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x16_t)(__rev0 >= __rev1); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vcgeq_f32(float32x4_t __p0, float32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0 >= __p1); return __ret; } #else __ai uint32x4_t vcgeq_f32(float32x4_t __p0, float32x4_t __p1) { uint32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint32x4_t)(__rev0 >= __rev1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vcgeq_s32(int32x4_t __p0, int32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0 >= __p1); return __ret; } #else __ai uint32x4_t vcgeq_s32(int32x4_t __p0, int32x4_t __p1) { uint32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint32x4_t)(__rev0 >= __rev1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vcgeq_s16(int16x8_t __p0, int16x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0 >= __p1); return __ret; } #else __ai uint16x8_t vcgeq_s16(int16x8_t __p0, int16x8_t __p1) { uint16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint16x8_t)(__rev0 >= __rev1); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vcge_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0 >= __p1); return __ret; } #else __ai uint8x8_t vcge_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x8_t)(__rev0 >= __rev1); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vcge_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0 >= __p1); return __ret; } #else __ai uint32x2_t vcge_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint32x2_t)(__rev0 >= __rev1); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vcge_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0 >= __p1); return __ret; } #else __ai uint16x4_t vcge_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint16x4_t)(__rev0 >= __rev1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vcge_s8(int8x8_t __p0, int8x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0 >= __p1); return __ret; } #else __ai uint8x8_t vcge_s8(int8x8_t __p0, int8x8_t __p1) { uint8x8_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x8_t)(__rev0 >= __rev1); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vcge_f32(float32x2_t __p0, float32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0 >= __p1); return __ret; } #else __ai uint32x2_t vcge_f32(float32x2_t __p0, float32x2_t __p1) { uint32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint32x2_t)(__rev0 >= __rev1); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vcge_s32(int32x2_t __p0, int32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0 >= __p1); return __ret; } #else __ai uint32x2_t vcge_s32(int32x2_t __p0, int32x2_t __p1) { uint32x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint32x2_t)(__rev0 >= __rev1); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vcge_s16(int16x4_t __p0, int16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0 >= __p1); return __ret; } #else __ai uint16x4_t vcge_s16(int16x4_t __p0, int16x4_t __p1) { uint16x4_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint16x4_t)(__rev0 >= __rev1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vcgtq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0 > __p1); return __ret; } #else __ai uint8x16_t vcgtq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x16_t)(__rev0 > __rev1); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vcgtq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0 > __p1); return __ret; } #else __ai uint32x4_t vcgtq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint32x4_t)(__rev0 > __rev1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vcgtq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0 > __p1); return __ret; } #else __ai uint16x8_t vcgtq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint16x8_t)(__rev0 > __rev1); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vcgtq_s8(int8x16_t __p0, int8x16_t __p1) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0 > __p1); return __ret; } #else __ai uint8x16_t vcgtq_s8(int8x16_t __p0, int8x16_t __p1) { uint8x16_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x16_t)(__rev0 > __rev1); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vcgtq_f32(float32x4_t __p0, float32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0 > __p1); return __ret; } #else __ai uint32x4_t vcgtq_f32(float32x4_t __p0, float32x4_t __p1) { uint32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint32x4_t)(__rev0 > __rev1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vcgtq_s32(int32x4_t __p0, int32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0 > __p1); return __ret; } #else __ai uint32x4_t vcgtq_s32(int32x4_t __p0, int32x4_t __p1) { uint32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint32x4_t)(__rev0 > __rev1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vcgtq_s16(int16x8_t __p0, int16x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0 > __p1); return __ret; } #else __ai uint16x8_t vcgtq_s16(int16x8_t __p0, int16x8_t __p1) { uint16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint16x8_t)(__rev0 > __rev1); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vcgt_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0 > __p1); return __ret; } #else __ai uint8x8_t vcgt_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x8_t)(__rev0 > __rev1); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vcgt_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0 > __p1); return __ret; } #else __ai uint32x2_t vcgt_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint32x2_t)(__rev0 > __rev1); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vcgt_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0 > __p1); return __ret; } #else __ai uint16x4_t vcgt_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint16x4_t)(__rev0 > __rev1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vcgt_s8(int8x8_t __p0, int8x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0 > __p1); return __ret; } #else __ai uint8x8_t vcgt_s8(int8x8_t __p0, int8x8_t __p1) { uint8x8_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x8_t)(__rev0 > __rev1); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vcgt_f32(float32x2_t __p0, float32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0 > __p1); return __ret; } #else __ai uint32x2_t vcgt_f32(float32x2_t __p0, float32x2_t __p1) { uint32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint32x2_t)(__rev0 > __rev1); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vcgt_s32(int32x2_t __p0, int32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0 > __p1); return __ret; } #else __ai uint32x2_t vcgt_s32(int32x2_t __p0, int32x2_t __p1) { uint32x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint32x2_t)(__rev0 > __rev1); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vcgt_s16(int16x4_t __p0, int16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0 > __p1); return __ret; } #else __ai uint16x4_t vcgt_s16(int16x4_t __p0, int16x4_t __p1) { uint16x4_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint16x4_t)(__rev0 > __rev1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vcleq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0 <= __p1); return __ret; } #else __ai uint8x16_t vcleq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x16_t)(__rev0 <= __rev1); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vcleq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0 <= __p1); return __ret; } #else __ai uint32x4_t vcleq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint32x4_t)(__rev0 <= __rev1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vcleq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0 <= __p1); return __ret; } #else __ai uint16x8_t vcleq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint16x8_t)(__rev0 <= __rev1); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vcleq_s8(int8x16_t __p0, int8x16_t __p1) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0 <= __p1); return __ret; } #else __ai uint8x16_t vcleq_s8(int8x16_t __p0, int8x16_t __p1) { uint8x16_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x16_t)(__rev0 <= __rev1); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vcleq_f32(float32x4_t __p0, float32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0 <= __p1); return __ret; } #else __ai uint32x4_t vcleq_f32(float32x4_t __p0, float32x4_t __p1) { uint32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint32x4_t)(__rev0 <= __rev1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vcleq_s32(int32x4_t __p0, int32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0 <= __p1); return __ret; } #else __ai uint32x4_t vcleq_s32(int32x4_t __p0, int32x4_t __p1) { uint32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint32x4_t)(__rev0 <= __rev1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vcleq_s16(int16x8_t __p0, int16x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0 <= __p1); return __ret; } #else __ai uint16x8_t vcleq_s16(int16x8_t __p0, int16x8_t __p1) { uint16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint16x8_t)(__rev0 <= __rev1); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vcle_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0 <= __p1); return __ret; } #else __ai uint8x8_t vcle_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x8_t)(__rev0 <= __rev1); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vcle_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0 <= __p1); return __ret; } #else __ai uint32x2_t vcle_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint32x2_t)(__rev0 <= __rev1); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vcle_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0 <= __p1); return __ret; } #else __ai uint16x4_t vcle_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint16x4_t)(__rev0 <= __rev1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vcle_s8(int8x8_t __p0, int8x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0 <= __p1); return __ret; } #else __ai uint8x8_t vcle_s8(int8x8_t __p0, int8x8_t __p1) { uint8x8_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x8_t)(__rev0 <= __rev1); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vcle_f32(float32x2_t __p0, float32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0 <= __p1); return __ret; } #else __ai uint32x2_t vcle_f32(float32x2_t __p0, float32x2_t __p1) { uint32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint32x2_t)(__rev0 <= __rev1); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vcle_s32(int32x2_t __p0, int32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0 <= __p1); return __ret; } #else __ai uint32x2_t vcle_s32(int32x2_t __p0, int32x2_t __p1) { uint32x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint32x2_t)(__rev0 <= __rev1); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vcle_s16(int16x4_t __p0, int16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0 <= __p1); return __ret; } #else __ai uint16x4_t vcle_s16(int16x4_t __p0, int16x4_t __p1) { uint16x4_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint16x4_t)(__rev0 <= __rev1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x16_t vclsq_u8(uint8x16_t __p0) { int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vclsq_v((int8x16_t)__p0, 32); return __ret; } #else __ai int8x16_t vclsq_u8(uint8x16_t __p0) { int8x16_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x16_t) __builtin_neon_vclsq_v((int8x16_t)__rev0, 32); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vclsq_u32(uint32x4_t __p0) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vclsq_v((int8x16_t)__p0, 34); return __ret; } #else __ai int32x4_t vclsq_u32(uint32x4_t __p0) { int32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (int32x4_t) __builtin_neon_vclsq_v((int8x16_t)__rev0, 34); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vclsq_u16(uint16x8_t __p0) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vclsq_v((int8x16_t)__p0, 33); return __ret; } #else __ai int16x8_t vclsq_u16(uint16x8_t __p0) { int16x8_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int16x8_t) __builtin_neon_vclsq_v((int8x16_t)__rev0, 33); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x16_t vclsq_s8(int8x16_t __p0) { int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vclsq_v((int8x16_t)__p0, 32); return __ret; } #else __ai int8x16_t vclsq_s8(int8x16_t __p0) { int8x16_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x16_t) __builtin_neon_vclsq_v((int8x16_t)__rev0, 32); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vclsq_s32(int32x4_t __p0) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vclsq_v((int8x16_t)__p0, 34); return __ret; } #else __ai int32x4_t vclsq_s32(int32x4_t __p0) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (int32x4_t) __builtin_neon_vclsq_v((int8x16_t)__rev0, 34); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vclsq_s16(int16x8_t __p0) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vclsq_v((int8x16_t)__p0, 33); return __ret; } #else __ai int16x8_t vclsq_s16(int16x8_t __p0) { int16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int16x8_t) __builtin_neon_vclsq_v((int8x16_t)__rev0, 33); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8_t vcls_u8(uint8x8_t __p0) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vcls_v((int8x8_t)__p0, 0); return __ret; } #else __ai int8x8_t vcls_u8(uint8x8_t __p0) { int8x8_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x8_t) __builtin_neon_vcls_v((int8x8_t)__rev0, 0); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x2_t vcls_u32(uint32x2_t __p0) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vcls_v((int8x8_t)__p0, 2); return __ret; } #else __ai int32x2_t vcls_u32(uint32x2_t __p0) { int32x2_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (int32x2_t) __builtin_neon_vcls_v((int8x8_t)__rev0, 2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vcls_u16(uint16x4_t __p0) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vcls_v((int8x8_t)__p0, 1); return __ret; } #else __ai int16x4_t vcls_u16(uint16x4_t __p0) { int16x4_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (int16x4_t) __builtin_neon_vcls_v((int8x8_t)__rev0, 1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8_t vcls_s8(int8x8_t __p0) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vcls_v((int8x8_t)__p0, 0); return __ret; } #else __ai int8x8_t vcls_s8(int8x8_t __p0) { int8x8_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x8_t) __builtin_neon_vcls_v((int8x8_t)__rev0, 0); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x2_t vcls_s32(int32x2_t __p0) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vcls_v((int8x8_t)__p0, 2); return __ret; } #else __ai int32x2_t vcls_s32(int32x2_t __p0) { int32x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (int32x2_t) __builtin_neon_vcls_v((int8x8_t)__rev0, 2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vcls_s16(int16x4_t __p0) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vcls_v((int8x8_t)__p0, 1); return __ret; } #else __ai int16x4_t vcls_s16(int16x4_t __p0) { int16x4_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (int16x4_t) __builtin_neon_vcls_v((int8x8_t)__rev0, 1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vcltq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0 < __p1); return __ret; } #else __ai uint8x16_t vcltq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x16_t)(__rev0 < __rev1); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vcltq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0 < __p1); return __ret; } #else __ai uint32x4_t vcltq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint32x4_t)(__rev0 < __rev1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vcltq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0 < __p1); return __ret; } #else __ai uint16x8_t vcltq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint16x8_t)(__rev0 < __rev1); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vcltq_s8(int8x16_t __p0, int8x16_t __p1) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0 < __p1); return __ret; } #else __ai uint8x16_t vcltq_s8(int8x16_t __p0, int8x16_t __p1) { uint8x16_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x16_t)(__rev0 < __rev1); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vcltq_f32(float32x4_t __p0, float32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0 < __p1); return __ret; } #else __ai uint32x4_t vcltq_f32(float32x4_t __p0, float32x4_t __p1) { uint32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint32x4_t)(__rev0 < __rev1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vcltq_s32(int32x4_t __p0, int32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0 < __p1); return __ret; } #else __ai uint32x4_t vcltq_s32(int32x4_t __p0, int32x4_t __p1) { uint32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint32x4_t)(__rev0 < __rev1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vcltq_s16(int16x8_t __p0, int16x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0 < __p1); return __ret; } #else __ai uint16x8_t vcltq_s16(int16x8_t __p0, int16x8_t __p1) { uint16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint16x8_t)(__rev0 < __rev1); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vclt_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0 < __p1); return __ret; } #else __ai uint8x8_t vclt_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x8_t)(__rev0 < __rev1); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vclt_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0 < __p1); return __ret; } #else __ai uint32x2_t vclt_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint32x2_t)(__rev0 < __rev1); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vclt_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0 < __p1); return __ret; } #else __ai uint16x4_t vclt_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint16x4_t)(__rev0 < __rev1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vclt_s8(int8x8_t __p0, int8x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0 < __p1); return __ret; } #else __ai uint8x8_t vclt_s8(int8x8_t __p0, int8x8_t __p1) { uint8x8_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x8_t)(__rev0 < __rev1); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vclt_f32(float32x2_t __p0, float32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0 < __p1); return __ret; } #else __ai uint32x2_t vclt_f32(float32x2_t __p0, float32x2_t __p1) { uint32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint32x2_t)(__rev0 < __rev1); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vclt_s32(int32x2_t __p0, int32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0 < __p1); return __ret; } #else __ai uint32x2_t vclt_s32(int32x2_t __p0, int32x2_t __p1) { uint32x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint32x2_t)(__rev0 < __rev1); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vclt_s16(int16x4_t __p0, int16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0 < __p1); return __ret; } #else __ai uint16x4_t vclt_s16(int16x4_t __p0, int16x4_t __p1) { uint16x4_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint16x4_t)(__rev0 < __rev1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vclzq_u8(uint8x16_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t) __builtin_neon_vclzq_v((int8x16_t)__p0, 48); return __ret; } #else __ai uint8x16_t vclzq_u8(uint8x16_t __p0) { uint8x16_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x16_t) __builtin_neon_vclzq_v((int8x16_t)__rev0, 48); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vclzq_u32(uint32x4_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vclzq_v((int8x16_t)__p0, 50); return __ret; } #else __ai uint32x4_t vclzq_u32(uint32x4_t __p0) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (uint32x4_t) __builtin_neon_vclzq_v((int8x16_t)__rev0, 50); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vclzq_u16(uint16x8_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vclzq_v((int8x16_t)__p0, 49); return __ret; } #else __ai uint16x8_t vclzq_u16(uint16x8_t __p0) { uint16x8_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint16x8_t) __builtin_neon_vclzq_v((int8x16_t)__rev0, 49); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x16_t vclzq_s8(int8x16_t __p0) { int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vclzq_v((int8x16_t)__p0, 32); return __ret; } #else __ai int8x16_t vclzq_s8(int8x16_t __p0) { int8x16_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x16_t) __builtin_neon_vclzq_v((int8x16_t)__rev0, 32); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vclzq_s32(int32x4_t __p0) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vclzq_v((int8x16_t)__p0, 34); return __ret; } #else __ai int32x4_t vclzq_s32(int32x4_t __p0) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (int32x4_t) __builtin_neon_vclzq_v((int8x16_t)__rev0, 34); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vclzq_s16(int16x8_t __p0) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vclzq_v((int8x16_t)__p0, 33); return __ret; } #else __ai int16x8_t vclzq_s16(int16x8_t __p0) { int16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int16x8_t) __builtin_neon_vclzq_v((int8x16_t)__rev0, 33); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vclz_u8(uint8x8_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vclz_v((int8x8_t)__p0, 16); return __ret; } #else __ai uint8x8_t vclz_u8(uint8x8_t __p0) { uint8x8_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x8_t) __builtin_neon_vclz_v((int8x8_t)__rev0, 16); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vclz_u32(uint32x2_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vclz_v((int8x8_t)__p0, 18); return __ret; } #else __ai uint32x2_t vclz_u32(uint32x2_t __p0) { uint32x2_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (uint32x2_t) __builtin_neon_vclz_v((int8x8_t)__rev0, 18); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vclz_u16(uint16x4_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vclz_v((int8x8_t)__p0, 17); return __ret; } #else __ai uint16x4_t vclz_u16(uint16x4_t __p0) { uint16x4_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (uint16x4_t) __builtin_neon_vclz_v((int8x8_t)__rev0, 17); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8_t vclz_s8(int8x8_t __p0) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vclz_v((int8x8_t)__p0, 0); return __ret; } #else __ai int8x8_t vclz_s8(int8x8_t __p0) { int8x8_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x8_t) __builtin_neon_vclz_v((int8x8_t)__rev0, 0); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x2_t vclz_s32(int32x2_t __p0) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vclz_v((int8x8_t)__p0, 2); return __ret; } #else __ai int32x2_t vclz_s32(int32x2_t __p0) { int32x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (int32x2_t) __builtin_neon_vclz_v((int8x8_t)__rev0, 2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vclz_s16(int16x4_t __p0) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vclz_v((int8x8_t)__p0, 1); return __ret; } #else __ai int16x4_t vclz_s16(int16x4_t __p0) { int16x4_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (int16x4_t) __builtin_neon_vclz_v((int8x8_t)__rev0, 1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly8x8_t vcnt_p8(poly8x8_t __p0) { poly8x8_t __ret; __ret = (poly8x8_t) __builtin_neon_vcnt_v((int8x8_t)__p0, 4); return __ret; } #else __ai poly8x8_t vcnt_p8(poly8x8_t __p0) { poly8x8_t __ret; poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (poly8x8_t) __builtin_neon_vcnt_v((int8x8_t)__rev0, 4); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly8x16_t vcntq_p8(poly8x16_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t) __builtin_neon_vcntq_v((int8x16_t)__p0, 36); return __ret; } #else __ai poly8x16_t vcntq_p8(poly8x16_t __p0) { poly8x16_t __ret; poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (poly8x16_t) __builtin_neon_vcntq_v((int8x16_t)__rev0, 36); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vcntq_u8(uint8x16_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t) __builtin_neon_vcntq_v((int8x16_t)__p0, 48); return __ret; } #else __ai uint8x16_t vcntq_u8(uint8x16_t __p0) { uint8x16_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x16_t) __builtin_neon_vcntq_v((int8x16_t)__rev0, 48); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x16_t vcntq_s8(int8x16_t __p0) { int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vcntq_v((int8x16_t)__p0, 32); return __ret; } #else __ai int8x16_t vcntq_s8(int8x16_t __p0) { int8x16_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x16_t) __builtin_neon_vcntq_v((int8x16_t)__rev0, 32); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vcnt_u8(uint8x8_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vcnt_v((int8x8_t)__p0, 16); return __ret; } #else __ai uint8x8_t vcnt_u8(uint8x8_t __p0) { uint8x8_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x8_t) __builtin_neon_vcnt_v((int8x8_t)__rev0, 16); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8_t vcnt_s8(int8x8_t __p0) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vcnt_v((int8x8_t)__p0, 0); return __ret; } #else __ai int8x8_t vcnt_s8(int8x8_t __p0) { int8x8_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x8_t) __builtin_neon_vcnt_v((int8x8_t)__rev0, 0); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly8x16_t vcombine_p8(poly8x8_t __p0, poly8x8_t __p1) { poly8x16_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); return __ret; } #else __ai poly8x16_t vcombine_p8(poly8x8_t __p0, poly8x8_t __p1) { poly8x16_t __ret; poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly16x8_t vcombine_p16(poly16x4_t __p0, poly16x4_t __p1) { poly16x8_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 1, 2, 3, 4, 5, 6, 7); return __ret; } #else __ai poly16x8_t vcombine_p16(poly16x4_t __p0, poly16x4_t __p1) { poly16x8_t __ret; poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); poly16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 1, 2, 3, 4, 5, 6, 7); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vcombine_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x16_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); return __ret; } #else __ai uint8x16_t vcombine_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x16_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } __ai uint8x16_t __noswap_vcombine_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x16_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vcombine_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x4_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 1, 2, 3); return __ret; } #else __ai uint32x4_t vcombine_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x4_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 1, 2, 3); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai uint32x4_t __noswap_vcombine_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x4_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 1, 2, 3); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vcombine_u64(uint64x1_t __p0, uint64x1_t __p1) { uint64x2_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 1); return __ret; } #else __ai uint64x2_t vcombine_u64(uint64x1_t __p0, uint64x1_t __p1) { uint64x2_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 1); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vcombine_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x8_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 1, 2, 3, 4, 5, 6, 7); return __ret; } #else __ai uint16x8_t vcombine_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x8_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 1, 2, 3, 4, 5, 6, 7); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } __ai uint16x8_t __noswap_vcombine_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x8_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 1, 2, 3, 4, 5, 6, 7); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x16_t vcombine_s8(int8x8_t __p0, int8x8_t __p1) { int8x16_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); return __ret; } #else __ai int8x16_t vcombine_s8(int8x8_t __p0, int8x8_t __p1) { int8x16_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } __ai int8x16_t __noswap_vcombine_s8(int8x8_t __p0, int8x8_t __p1) { int8x16_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x4_t vcombine_f32(float32x2_t __p0, float32x2_t __p1) { float32x4_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 1, 2, 3); return __ret; } #else __ai float32x4_t vcombine_f32(float32x2_t __p0, float32x2_t __p1) { float32x4_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 1, 2, 3); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai float32x4_t __noswap_vcombine_f32(float32x2_t __p0, float32x2_t __p1) { float32x4_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 1, 2, 3); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float16x8_t vcombine_f16(float16x4_t __p0, float16x4_t __p1) { float16x8_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 1, 2, 3, 4, 5, 6, 7); return __ret; } #else __ai float16x8_t vcombine_f16(float16x4_t __p0, float16x4_t __p1) { float16x8_t __ret; float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 1, 2, 3, 4, 5, 6, 7); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } __ai float16x8_t __noswap_vcombine_f16(float16x4_t __p0, float16x4_t __p1) { float16x8_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 1, 2, 3, 4, 5, 6, 7); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vcombine_s32(int32x2_t __p0, int32x2_t __p1) { int32x4_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 1, 2, 3); return __ret; } #else __ai int32x4_t vcombine_s32(int32x2_t __p0, int32x2_t __p1) { int32x4_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 1, 2, 3); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai int32x4_t __noswap_vcombine_s32(int32x2_t __p0, int32x2_t __p1) { int32x4_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 1, 2, 3); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vcombine_s64(int64x1_t __p0, int64x1_t __p1) { int64x2_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 1); return __ret; } #else __ai int64x2_t vcombine_s64(int64x1_t __p0, int64x1_t __p1) { int64x2_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 1); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vcombine_s16(int16x4_t __p0, int16x4_t __p1) { int16x8_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 1, 2, 3, 4, 5, 6, 7); return __ret; } #else __ai int16x8_t vcombine_s16(int16x4_t __p0, int16x4_t __p1) { int16x8_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 1, 2, 3, 4, 5, 6, 7); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } __ai int16x8_t __noswap_vcombine_s16(int16x4_t __p0, int16x4_t __p1) { int16x8_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 1, 2, 3, 4, 5, 6, 7); return __ret; } #endif #define vcreate_p8(__p0) __extension__ ({ \ poly8x8_t __ret; \ uint64_t __promote = __p0; \ __ret = (poly8x8_t)(__promote); \ __ret; \ }) #define vcreate_p16(__p0) __extension__ ({ \ poly16x4_t __ret; \ uint64_t __promote = __p0; \ __ret = (poly16x4_t)(__promote); \ __ret; \ }) #define vcreate_u8(__p0) __extension__ ({ \ uint8x8_t __ret; \ uint64_t __promote = __p0; \ __ret = (uint8x8_t)(__promote); \ __ret; \ }) #define vcreate_u32(__p0) __extension__ ({ \ uint32x2_t __ret; \ uint64_t __promote = __p0; \ __ret = (uint32x2_t)(__promote); \ __ret; \ }) #define vcreate_u64(__p0) __extension__ ({ \ uint64x1_t __ret; \ uint64_t __promote = __p0; \ __ret = (uint64x1_t)(__promote); \ __ret; \ }) #define vcreate_u16(__p0) __extension__ ({ \ uint16x4_t __ret; \ uint64_t __promote = __p0; \ __ret = (uint16x4_t)(__promote); \ __ret; \ }) #define vcreate_s8(__p0) __extension__ ({ \ int8x8_t __ret; \ uint64_t __promote = __p0; \ __ret = (int8x8_t)(__promote); \ __ret; \ }) #define vcreate_f32(__p0) __extension__ ({ \ float32x2_t __ret; \ uint64_t __promote = __p0; \ __ret = (float32x2_t)(__promote); \ __ret; \ }) #define vcreate_f16(__p0) __extension__ ({ \ float16x4_t __ret; \ uint64_t __promote = __p0; \ __ret = (float16x4_t)(__promote); \ __ret; \ }) #define vcreate_s32(__p0) __extension__ ({ \ int32x2_t __ret; \ uint64_t __promote = __p0; \ __ret = (int32x2_t)(__promote); \ __ret; \ }) #define vcreate_s64(__p0) __extension__ ({ \ int64x1_t __ret; \ uint64_t __promote = __p0; \ __ret = (int64x1_t)(__promote); \ __ret; \ }) #define vcreate_s16(__p0) __extension__ ({ \ int16x4_t __ret; \ uint64_t __promote = __p0; \ __ret = (int16x4_t)(__promote); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ __ai float32x4_t vcvtq_f32_u32(uint32x4_t __p0) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vcvtq_f32_v((int8x16_t)__p0, 50); return __ret; } #else __ai float32x4_t vcvtq_f32_u32(uint32x4_t __p0) { float32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (float32x4_t) __builtin_neon_vcvtq_f32_v((int8x16_t)__rev0, 50); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x4_t vcvtq_f32_s32(int32x4_t __p0) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vcvtq_f32_v((int8x16_t)__p0, 34); return __ret; } #else __ai float32x4_t vcvtq_f32_s32(int32x4_t __p0) { float32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (float32x4_t) __builtin_neon_vcvtq_f32_v((int8x16_t)__rev0, 34); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x2_t vcvt_f32_u32(uint32x2_t __p0) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vcvt_f32_v((int8x8_t)__p0, 18); return __ret; } #else __ai float32x2_t vcvt_f32_u32(uint32x2_t __p0) { float32x2_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (float32x2_t) __builtin_neon_vcvt_f32_v((int8x8_t)__rev0, 18); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x2_t vcvt_f32_s32(int32x2_t __p0) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vcvt_f32_v((int8x8_t)__p0, 2); return __ret; } #else __ai float32x2_t vcvt_f32_s32(int32x2_t __p0) { float32x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (float32x2_t) __builtin_neon_vcvt_f32_v((int8x8_t)__rev0, 2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ #define vcvtq_n_f32_u32(__p0, __p1) __extension__ ({ \ float32x4_t __ret; \ uint32x4_t __s0 = __p0; \ __ret = (float32x4_t) __builtin_neon_vcvtq_n_f32_v((int8x16_t)__s0, __p1, 50); \ __ret; \ }) #else #define vcvtq_n_f32_u32(__p0, __p1) __extension__ ({ \ float32x4_t __ret; \ uint32x4_t __s0 = __p0; \ uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = (float32x4_t) __builtin_neon_vcvtq_n_f32_v((int8x16_t)__rev0, __p1, 50); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcvtq_n_f32_s32(__p0, __p1) __extension__ ({ \ float32x4_t __ret; \ int32x4_t __s0 = __p0; \ __ret = (float32x4_t) __builtin_neon_vcvtq_n_f32_v((int8x16_t)__s0, __p1, 34); \ __ret; \ }) #else #define vcvtq_n_f32_s32(__p0, __p1) __extension__ ({ \ float32x4_t __ret; \ int32x4_t __s0 = __p0; \ int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = (float32x4_t) __builtin_neon_vcvtq_n_f32_v((int8x16_t)__rev0, __p1, 34); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcvt_n_f32_u32(__p0, __p1) __extension__ ({ \ float32x2_t __ret; \ uint32x2_t __s0 = __p0; \ __ret = (float32x2_t) __builtin_neon_vcvt_n_f32_v((int8x8_t)__s0, __p1, 18); \ __ret; \ }) #else #define vcvt_n_f32_u32(__p0, __p1) __extension__ ({ \ float32x2_t __ret; \ uint32x2_t __s0 = __p0; \ uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ __ret = (float32x2_t) __builtin_neon_vcvt_n_f32_v((int8x8_t)__rev0, __p1, 18); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcvt_n_f32_s32(__p0, __p1) __extension__ ({ \ float32x2_t __ret; \ int32x2_t __s0 = __p0; \ __ret = (float32x2_t) __builtin_neon_vcvt_n_f32_v((int8x8_t)__s0, __p1, 2); \ __ret; \ }) #else #define vcvt_n_f32_s32(__p0, __p1) __extension__ ({ \ float32x2_t __ret; \ int32x2_t __s0 = __p0; \ int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ __ret = (float32x2_t) __builtin_neon_vcvt_n_f32_v((int8x8_t)__rev0, __p1, 2); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcvtq_n_s32_f32(__p0, __p1) __extension__ ({ \ int32x4_t __ret; \ float32x4_t __s0 = __p0; \ __ret = (int32x4_t) __builtin_neon_vcvtq_n_s32_v((int8x16_t)__s0, __p1, 34); \ __ret; \ }) #else #define vcvtq_n_s32_f32(__p0, __p1) __extension__ ({ \ int32x4_t __ret; \ float32x4_t __s0 = __p0; \ float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = (int32x4_t) __builtin_neon_vcvtq_n_s32_v((int8x16_t)__rev0, __p1, 34); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcvt_n_s32_f32(__p0, __p1) __extension__ ({ \ int32x2_t __ret; \ float32x2_t __s0 = __p0; \ __ret = (int32x2_t) __builtin_neon_vcvt_n_s32_v((int8x8_t)__s0, __p1, 2); \ __ret; \ }) #else #define vcvt_n_s32_f32(__p0, __p1) __extension__ ({ \ int32x2_t __ret; \ float32x2_t __s0 = __p0; \ float32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ __ret = (int32x2_t) __builtin_neon_vcvt_n_s32_v((int8x8_t)__rev0, __p1, 2); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcvtq_n_u32_f32(__p0, __p1) __extension__ ({ \ uint32x4_t __ret; \ float32x4_t __s0 = __p0; \ __ret = (uint32x4_t) __builtin_neon_vcvtq_n_u32_v((int8x16_t)__s0, __p1, 50); \ __ret; \ }) #else #define vcvtq_n_u32_f32(__p0, __p1) __extension__ ({ \ uint32x4_t __ret; \ float32x4_t __s0 = __p0; \ float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = (uint32x4_t) __builtin_neon_vcvtq_n_u32_v((int8x16_t)__rev0, __p1, 50); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcvt_n_u32_f32(__p0, __p1) __extension__ ({ \ uint32x2_t __ret; \ float32x2_t __s0 = __p0; \ __ret = (uint32x2_t) __builtin_neon_vcvt_n_u32_v((int8x8_t)__s0, __p1, 18); \ __ret; \ }) #else #define vcvt_n_u32_f32(__p0, __p1) __extension__ ({ \ uint32x2_t __ret; \ float32x2_t __s0 = __p0; \ float32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ __ret = (uint32x2_t) __builtin_neon_vcvt_n_u32_v((int8x8_t)__rev0, __p1, 18); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vcvtq_s32_f32(float32x4_t __p0) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vcvtq_s32_v((int8x16_t)__p0, 34); return __ret; } #else __ai int32x4_t vcvtq_s32_f32(float32x4_t __p0) { int32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (int32x4_t) __builtin_neon_vcvtq_s32_v((int8x16_t)__rev0, 34); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x2_t vcvt_s32_f32(float32x2_t __p0) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vcvt_s32_v((int8x8_t)__p0, 2); return __ret; } #else __ai int32x2_t vcvt_s32_f32(float32x2_t __p0) { int32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (int32x2_t) __builtin_neon_vcvt_s32_v((int8x8_t)__rev0, 2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vcvtq_u32_f32(float32x4_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vcvtq_u32_v((int8x16_t)__p0, 50); return __ret; } #else __ai uint32x4_t vcvtq_u32_f32(float32x4_t __p0) { uint32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (uint32x4_t) __builtin_neon_vcvtq_u32_v((int8x16_t)__rev0, 50); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vcvt_u32_f32(float32x2_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vcvt_u32_v((int8x8_t)__p0, 18); return __ret; } #else __ai uint32x2_t vcvt_u32_f32(float32x2_t __p0) { uint32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (uint32x2_t) __builtin_neon_vcvt_u32_v((int8x8_t)__rev0, 18); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ #define vdup_lane_p8(__p0_0, __p1_0) __extension__ ({ \ poly8x8_t __ret_0; \ poly8x8_t __s0_0 = __p0_0; \ __ret_0 = splat_lane_p8(__s0_0, __p1_0); \ __ret_0; \ }) #else #define vdup_lane_p8(__p0_1, __p1_1) __extension__ ({ \ poly8x8_t __ret_1; \ poly8x8_t __s0_1 = __p0_1; \ poly8x8_t __rev0_1; __rev0_1 = __builtin_shufflevector(__s0_1, __s0_1, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_1 = __noswap_splat_lane_p8(__rev0_1, __p1_1); \ __ret_1 = __builtin_shufflevector(__ret_1, __ret_1, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_1; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vdup_lane_p16(__p0_2, __p1_2) __extension__ ({ \ poly16x4_t __ret_2; \ poly16x4_t __s0_2 = __p0_2; \ __ret_2 = splat_lane_p16(__s0_2, __p1_2); \ __ret_2; \ }) #else #define vdup_lane_p16(__p0_3, __p1_3) __extension__ ({ \ poly16x4_t __ret_3; \ poly16x4_t __s0_3 = __p0_3; \ poly16x4_t __rev0_3; __rev0_3 = __builtin_shufflevector(__s0_3, __s0_3, 3, 2, 1, 0); \ __ret_3 = __noswap_splat_lane_p16(__rev0_3, __p1_3); \ __ret_3 = __builtin_shufflevector(__ret_3, __ret_3, 3, 2, 1, 0); \ __ret_3; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vdupq_lane_p8(__p0_4, __p1_4) __extension__ ({ \ poly8x16_t __ret_4; \ poly8x8_t __s0_4 = __p0_4; \ __ret_4 = splatq_lane_p8(__s0_4, __p1_4); \ __ret_4; \ }) #else #define vdupq_lane_p8(__p0_5, __p1_5) __extension__ ({ \ poly8x16_t __ret_5; \ poly8x8_t __s0_5 = __p0_5; \ poly8x8_t __rev0_5; __rev0_5 = __builtin_shufflevector(__s0_5, __s0_5, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_5 = __noswap_splatq_lane_p8(__rev0_5, __p1_5); \ __ret_5 = __builtin_shufflevector(__ret_5, __ret_5, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_5; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vdupq_lane_p16(__p0_6, __p1_6) __extension__ ({ \ poly16x8_t __ret_6; \ poly16x4_t __s0_6 = __p0_6; \ __ret_6 = splatq_lane_p16(__s0_6, __p1_6); \ __ret_6; \ }) #else #define vdupq_lane_p16(__p0_7, __p1_7) __extension__ ({ \ poly16x8_t __ret_7; \ poly16x4_t __s0_7 = __p0_7; \ poly16x4_t __rev0_7; __rev0_7 = __builtin_shufflevector(__s0_7, __s0_7, 3, 2, 1, 0); \ __ret_7 = __noswap_splatq_lane_p16(__rev0_7, __p1_7); \ __ret_7 = __builtin_shufflevector(__ret_7, __ret_7, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_7; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vdupq_lane_u8(__p0_8, __p1_8) __extension__ ({ \ uint8x16_t __ret_8; \ uint8x8_t __s0_8 = __p0_8; \ __ret_8 = splatq_lane_u8(__s0_8, __p1_8); \ __ret_8; \ }) #else #define vdupq_lane_u8(__p0_9, __p1_9) __extension__ ({ \ uint8x16_t __ret_9; \ uint8x8_t __s0_9 = __p0_9; \ uint8x8_t __rev0_9; __rev0_9 = __builtin_shufflevector(__s0_9, __s0_9, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_9 = __noswap_splatq_lane_u8(__rev0_9, __p1_9); \ __ret_9 = __builtin_shufflevector(__ret_9, __ret_9, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_9; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vdupq_lane_u32(__p0_10, __p1_10) __extension__ ({ \ uint32x4_t __ret_10; \ uint32x2_t __s0_10 = __p0_10; \ __ret_10 = splatq_lane_u32(__s0_10, __p1_10); \ __ret_10; \ }) #else #define vdupq_lane_u32(__p0_11, __p1_11) __extension__ ({ \ uint32x4_t __ret_11; \ uint32x2_t __s0_11 = __p0_11; \ uint32x2_t __rev0_11; __rev0_11 = __builtin_shufflevector(__s0_11, __s0_11, 1, 0); \ __ret_11 = __noswap_splatq_lane_u32(__rev0_11, __p1_11); \ __ret_11 = __builtin_shufflevector(__ret_11, __ret_11, 3, 2, 1, 0); \ __ret_11; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vdupq_lane_u64(__p0_12, __p1_12) __extension__ ({ \ uint64x2_t __ret_12; \ uint64x1_t __s0_12 = __p0_12; \ __ret_12 = splatq_lane_u64(__s0_12, __p1_12); \ __ret_12; \ }) #else #define vdupq_lane_u64(__p0_13, __p1_13) __extension__ ({ \ uint64x2_t __ret_13; \ uint64x1_t __s0_13 = __p0_13; \ __ret_13 = __noswap_splatq_lane_u64(__s0_13, __p1_13); \ __ret_13 = __builtin_shufflevector(__ret_13, __ret_13, 1, 0); \ __ret_13; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vdupq_lane_u16(__p0_14, __p1_14) __extension__ ({ \ uint16x8_t __ret_14; \ uint16x4_t __s0_14 = __p0_14; \ __ret_14 = splatq_lane_u16(__s0_14, __p1_14); \ __ret_14; \ }) #else #define vdupq_lane_u16(__p0_15, __p1_15) __extension__ ({ \ uint16x8_t __ret_15; \ uint16x4_t __s0_15 = __p0_15; \ uint16x4_t __rev0_15; __rev0_15 = __builtin_shufflevector(__s0_15, __s0_15, 3, 2, 1, 0); \ __ret_15 = __noswap_splatq_lane_u16(__rev0_15, __p1_15); \ __ret_15 = __builtin_shufflevector(__ret_15, __ret_15, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_15; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vdupq_lane_s8(__p0_16, __p1_16) __extension__ ({ \ int8x16_t __ret_16; \ int8x8_t __s0_16 = __p0_16; \ __ret_16 = splatq_lane_s8(__s0_16, __p1_16); \ __ret_16; \ }) #else #define vdupq_lane_s8(__p0_17, __p1_17) __extension__ ({ \ int8x16_t __ret_17; \ int8x8_t __s0_17 = __p0_17; \ int8x8_t __rev0_17; __rev0_17 = __builtin_shufflevector(__s0_17, __s0_17, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_17 = __noswap_splatq_lane_s8(__rev0_17, __p1_17); \ __ret_17 = __builtin_shufflevector(__ret_17, __ret_17, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_17; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vdupq_lane_f32(__p0_18, __p1_18) __extension__ ({ \ float32x4_t __ret_18; \ float32x2_t __s0_18 = __p0_18; \ __ret_18 = splatq_lane_f32(__s0_18, __p1_18); \ __ret_18; \ }) #else #define vdupq_lane_f32(__p0_19, __p1_19) __extension__ ({ \ float32x4_t __ret_19; \ float32x2_t __s0_19 = __p0_19; \ float32x2_t __rev0_19; __rev0_19 = __builtin_shufflevector(__s0_19, __s0_19, 1, 0); \ __ret_19 = __noswap_splatq_lane_f32(__rev0_19, __p1_19); \ __ret_19 = __builtin_shufflevector(__ret_19, __ret_19, 3, 2, 1, 0); \ __ret_19; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vdupq_lane_f16(__p0_20, __p1_20) __extension__ ({ \ float16x8_t __ret_20; \ float16x4_t __s0_20 = __p0_20; \ __ret_20 = splatq_lane_f16(__s0_20, __p1_20); \ __ret_20; \ }) #else #define vdupq_lane_f16(__p0_21, __p1_21) __extension__ ({ \ float16x8_t __ret_21; \ float16x4_t __s0_21 = __p0_21; \ float16x4_t __rev0_21; __rev0_21 = __builtin_shufflevector(__s0_21, __s0_21, 3, 2, 1, 0); \ __ret_21 = __noswap_splatq_lane_f16(__rev0_21, __p1_21); \ __ret_21 = __builtin_shufflevector(__ret_21, __ret_21, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_21; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vdupq_lane_s32(__p0_22, __p1_22) __extension__ ({ \ int32x4_t __ret_22; \ int32x2_t __s0_22 = __p0_22; \ __ret_22 = splatq_lane_s32(__s0_22, __p1_22); \ __ret_22; \ }) #else #define vdupq_lane_s32(__p0_23, __p1_23) __extension__ ({ \ int32x4_t __ret_23; \ int32x2_t __s0_23 = __p0_23; \ int32x2_t __rev0_23; __rev0_23 = __builtin_shufflevector(__s0_23, __s0_23, 1, 0); \ __ret_23 = __noswap_splatq_lane_s32(__rev0_23, __p1_23); \ __ret_23 = __builtin_shufflevector(__ret_23, __ret_23, 3, 2, 1, 0); \ __ret_23; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vdupq_lane_s64(__p0_24, __p1_24) __extension__ ({ \ int64x2_t __ret_24; \ int64x1_t __s0_24 = __p0_24; \ __ret_24 = splatq_lane_s64(__s0_24, __p1_24); \ __ret_24; \ }) #else #define vdupq_lane_s64(__p0_25, __p1_25) __extension__ ({ \ int64x2_t __ret_25; \ int64x1_t __s0_25 = __p0_25; \ __ret_25 = __noswap_splatq_lane_s64(__s0_25, __p1_25); \ __ret_25 = __builtin_shufflevector(__ret_25, __ret_25, 1, 0); \ __ret_25; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vdupq_lane_s16(__p0_26, __p1_26) __extension__ ({ \ int16x8_t __ret_26; \ int16x4_t __s0_26 = __p0_26; \ __ret_26 = splatq_lane_s16(__s0_26, __p1_26); \ __ret_26; \ }) #else #define vdupq_lane_s16(__p0_27, __p1_27) __extension__ ({ \ int16x8_t __ret_27; \ int16x4_t __s0_27 = __p0_27; \ int16x4_t __rev0_27; __rev0_27 = __builtin_shufflevector(__s0_27, __s0_27, 3, 2, 1, 0); \ __ret_27 = __noswap_splatq_lane_s16(__rev0_27, __p1_27); \ __ret_27 = __builtin_shufflevector(__ret_27, __ret_27, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_27; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vdup_lane_u8(__p0_28, __p1_28) __extension__ ({ \ uint8x8_t __ret_28; \ uint8x8_t __s0_28 = __p0_28; \ __ret_28 = splat_lane_u8(__s0_28, __p1_28); \ __ret_28; \ }) #else #define vdup_lane_u8(__p0_29, __p1_29) __extension__ ({ \ uint8x8_t __ret_29; \ uint8x8_t __s0_29 = __p0_29; \ uint8x8_t __rev0_29; __rev0_29 = __builtin_shufflevector(__s0_29, __s0_29, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_29 = __noswap_splat_lane_u8(__rev0_29, __p1_29); \ __ret_29 = __builtin_shufflevector(__ret_29, __ret_29, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_29; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vdup_lane_u32(__p0_30, __p1_30) __extension__ ({ \ uint32x2_t __ret_30; \ uint32x2_t __s0_30 = __p0_30; \ __ret_30 = splat_lane_u32(__s0_30, __p1_30); \ __ret_30; \ }) #else #define vdup_lane_u32(__p0_31, __p1_31) __extension__ ({ \ uint32x2_t __ret_31; \ uint32x2_t __s0_31 = __p0_31; \ uint32x2_t __rev0_31; __rev0_31 = __builtin_shufflevector(__s0_31, __s0_31, 1, 0); \ __ret_31 = __noswap_splat_lane_u32(__rev0_31, __p1_31); \ __ret_31 = __builtin_shufflevector(__ret_31, __ret_31, 1, 0); \ __ret_31; \ }) #endif #define vdup_lane_u64(__p0_32, __p1_32) __extension__ ({ \ uint64x1_t __ret_32; \ uint64x1_t __s0_32 = __p0_32; \ __ret_32 = splat_lane_u64(__s0_32, __p1_32); \ __ret_32; \ }) #ifdef __LITTLE_ENDIAN__ #define vdup_lane_u16(__p0_33, __p1_33) __extension__ ({ \ uint16x4_t __ret_33; \ uint16x4_t __s0_33 = __p0_33; \ __ret_33 = splat_lane_u16(__s0_33, __p1_33); \ __ret_33; \ }) #else #define vdup_lane_u16(__p0_34, __p1_34) __extension__ ({ \ uint16x4_t __ret_34; \ uint16x4_t __s0_34 = __p0_34; \ uint16x4_t __rev0_34; __rev0_34 = __builtin_shufflevector(__s0_34, __s0_34, 3, 2, 1, 0); \ __ret_34 = __noswap_splat_lane_u16(__rev0_34, __p1_34); \ __ret_34 = __builtin_shufflevector(__ret_34, __ret_34, 3, 2, 1, 0); \ __ret_34; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vdup_lane_s8(__p0_35, __p1_35) __extension__ ({ \ int8x8_t __ret_35; \ int8x8_t __s0_35 = __p0_35; \ __ret_35 = splat_lane_s8(__s0_35, __p1_35); \ __ret_35; \ }) #else #define vdup_lane_s8(__p0_36, __p1_36) __extension__ ({ \ int8x8_t __ret_36; \ int8x8_t __s0_36 = __p0_36; \ int8x8_t __rev0_36; __rev0_36 = __builtin_shufflevector(__s0_36, __s0_36, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_36 = __noswap_splat_lane_s8(__rev0_36, __p1_36); \ __ret_36 = __builtin_shufflevector(__ret_36, __ret_36, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_36; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vdup_lane_f32(__p0_37, __p1_37) __extension__ ({ \ float32x2_t __ret_37; \ float32x2_t __s0_37 = __p0_37; \ __ret_37 = splat_lane_f32(__s0_37, __p1_37); \ __ret_37; \ }) #else #define vdup_lane_f32(__p0_38, __p1_38) __extension__ ({ \ float32x2_t __ret_38; \ float32x2_t __s0_38 = __p0_38; \ float32x2_t __rev0_38; __rev0_38 = __builtin_shufflevector(__s0_38, __s0_38, 1, 0); \ __ret_38 = __noswap_splat_lane_f32(__rev0_38, __p1_38); \ __ret_38 = __builtin_shufflevector(__ret_38, __ret_38, 1, 0); \ __ret_38; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vdup_lane_f16(__p0_39, __p1_39) __extension__ ({ \ float16x4_t __ret_39; \ float16x4_t __s0_39 = __p0_39; \ __ret_39 = splat_lane_f16(__s0_39, __p1_39); \ __ret_39; \ }) #else #define vdup_lane_f16(__p0_40, __p1_40) __extension__ ({ \ float16x4_t __ret_40; \ float16x4_t __s0_40 = __p0_40; \ float16x4_t __rev0_40; __rev0_40 = __builtin_shufflevector(__s0_40, __s0_40, 3, 2, 1, 0); \ __ret_40 = __noswap_splat_lane_f16(__rev0_40, __p1_40); \ __ret_40 = __builtin_shufflevector(__ret_40, __ret_40, 3, 2, 1, 0); \ __ret_40; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vdup_lane_s32(__p0_41, __p1_41) __extension__ ({ \ int32x2_t __ret_41; \ int32x2_t __s0_41 = __p0_41; \ __ret_41 = splat_lane_s32(__s0_41, __p1_41); \ __ret_41; \ }) #else #define vdup_lane_s32(__p0_42, __p1_42) __extension__ ({ \ int32x2_t __ret_42; \ int32x2_t __s0_42 = __p0_42; \ int32x2_t __rev0_42; __rev0_42 = __builtin_shufflevector(__s0_42, __s0_42, 1, 0); \ __ret_42 = __noswap_splat_lane_s32(__rev0_42, __p1_42); \ __ret_42 = __builtin_shufflevector(__ret_42, __ret_42, 1, 0); \ __ret_42; \ }) #endif #define vdup_lane_s64(__p0_43, __p1_43) __extension__ ({ \ int64x1_t __ret_43; \ int64x1_t __s0_43 = __p0_43; \ __ret_43 = splat_lane_s64(__s0_43, __p1_43); \ __ret_43; \ }) #ifdef __LITTLE_ENDIAN__ #define vdup_lane_s16(__p0_44, __p1_44) __extension__ ({ \ int16x4_t __ret_44; \ int16x4_t __s0_44 = __p0_44; \ __ret_44 = splat_lane_s16(__s0_44, __p1_44); \ __ret_44; \ }) #else #define vdup_lane_s16(__p0_45, __p1_45) __extension__ ({ \ int16x4_t __ret_45; \ int16x4_t __s0_45 = __p0_45; \ int16x4_t __rev0_45; __rev0_45 = __builtin_shufflevector(__s0_45, __s0_45, 3, 2, 1, 0); \ __ret_45 = __noswap_splat_lane_s16(__rev0_45, __p1_45); \ __ret_45 = __builtin_shufflevector(__ret_45, __ret_45, 3, 2, 1, 0); \ __ret_45; \ }) #endif #ifdef __LITTLE_ENDIAN__ __ai poly8x8_t vdup_n_p8(poly8_t __p0) { poly8x8_t __ret; __ret = (poly8x8_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; return __ret; } #else __ai poly8x8_t vdup_n_p8(poly8_t __p0) { poly8x8_t __ret; __ret = (poly8x8_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly16x4_t vdup_n_p16(poly16_t __p0) { poly16x4_t __ret; __ret = (poly16x4_t) {__p0, __p0, __p0, __p0}; return __ret; } #else __ai poly16x4_t vdup_n_p16(poly16_t __p0) { poly16x4_t __ret; __ret = (poly16x4_t) {__p0, __p0, __p0, __p0}; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly8x16_t vdupq_n_p8(poly8_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; return __ret; } #else __ai poly8x16_t vdupq_n_p8(poly8_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly16x8_t vdupq_n_p16(poly16_t __p0) { poly16x8_t __ret; __ret = (poly16x8_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; return __ret; } #else __ai poly16x8_t vdupq_n_p16(poly16_t __p0) { poly16x8_t __ret; __ret = (poly16x8_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vdupq_n_u8(uint8_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; return __ret; } #else __ai uint8x16_t vdupq_n_u8(uint8_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vdupq_n_u32(uint32_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t) {__p0, __p0, __p0, __p0}; return __ret; } #else __ai uint32x4_t vdupq_n_u32(uint32_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t) {__p0, __p0, __p0, __p0}; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vdupq_n_u64(uint64_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t) {__p0, __p0}; return __ret; } #else __ai uint64x2_t vdupq_n_u64(uint64_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t) {__p0, __p0}; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vdupq_n_u16(uint16_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; return __ret; } #else __ai uint16x8_t vdupq_n_u16(uint16_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x16_t vdupq_n_s8(int8_t __p0) { int8x16_t __ret; __ret = (int8x16_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; return __ret; } #else __ai int8x16_t vdupq_n_s8(int8_t __p0) { int8x16_t __ret; __ret = (int8x16_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x4_t vdupq_n_f32(float32_t __p0) { float32x4_t __ret; __ret = (float32x4_t) {__p0, __p0, __p0, __p0}; return __ret; } #else __ai float32x4_t vdupq_n_f32(float32_t __p0) { float32x4_t __ret; __ret = (float32x4_t) {__p0, __p0, __p0, __p0}; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ #define vdupq_n_f16(__p0) __extension__ ({ \ float16x8_t __ret; \ float16_t __s0 = __p0; \ __ret = (float16x8_t) {__s0, __s0, __s0, __s0, __s0, __s0, __s0, __s0}; \ __ret; \ }) #else #define vdupq_n_f16(__p0) __extension__ ({ \ float16x8_t __ret; \ float16_t __s0 = __p0; \ __ret = (float16x8_t) {__s0, __s0, __s0, __s0, __s0, __s0, __s0, __s0}; \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vdupq_n_s32(int32_t __p0) { int32x4_t __ret; __ret = (int32x4_t) {__p0, __p0, __p0, __p0}; return __ret; } #else __ai int32x4_t vdupq_n_s32(int32_t __p0) { int32x4_t __ret; __ret = (int32x4_t) {__p0, __p0, __p0, __p0}; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vdupq_n_s64(int64_t __p0) { int64x2_t __ret; __ret = (int64x2_t) {__p0, __p0}; return __ret; } #else __ai int64x2_t vdupq_n_s64(int64_t __p0) { int64x2_t __ret; __ret = (int64x2_t) {__p0, __p0}; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vdupq_n_s16(int16_t __p0) { int16x8_t __ret; __ret = (int16x8_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; return __ret; } #else __ai int16x8_t vdupq_n_s16(int16_t __p0) { int16x8_t __ret; __ret = (int16x8_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vdup_n_u8(uint8_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; return __ret; } #else __ai uint8x8_t vdup_n_u8(uint8_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vdup_n_u32(uint32_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t) {__p0, __p0}; return __ret; } #else __ai uint32x2_t vdup_n_u32(uint32_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t) {__p0, __p0}; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai uint64x1_t vdup_n_u64(uint64_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t) {__p0}; return __ret; } #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vdup_n_u16(uint16_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t) {__p0, __p0, __p0, __p0}; return __ret; } #else __ai uint16x4_t vdup_n_u16(uint16_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t) {__p0, __p0, __p0, __p0}; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8_t vdup_n_s8(int8_t __p0) { int8x8_t __ret; __ret = (int8x8_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; return __ret; } #else __ai int8x8_t vdup_n_s8(int8_t __p0) { int8x8_t __ret; __ret = (int8x8_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x2_t vdup_n_f32(float32_t __p0) { float32x2_t __ret; __ret = (float32x2_t) {__p0, __p0}; return __ret; } #else __ai float32x2_t vdup_n_f32(float32_t __p0) { float32x2_t __ret; __ret = (float32x2_t) {__p0, __p0}; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ #define vdup_n_f16(__p0) __extension__ ({ \ float16x4_t __ret; \ float16_t __s0 = __p0; \ __ret = (float16x4_t) {__s0, __s0, __s0, __s0}; \ __ret; \ }) #else #define vdup_n_f16(__p0) __extension__ ({ \ float16x4_t __ret; \ float16_t __s0 = __p0; \ __ret = (float16x4_t) {__s0, __s0, __s0, __s0}; \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ __ai int32x2_t vdup_n_s32(int32_t __p0) { int32x2_t __ret; __ret = (int32x2_t) {__p0, __p0}; return __ret; } #else __ai int32x2_t vdup_n_s32(int32_t __p0) { int32x2_t __ret; __ret = (int32x2_t) {__p0, __p0}; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai int64x1_t vdup_n_s64(int64_t __p0) { int64x1_t __ret; __ret = (int64x1_t) {__p0}; return __ret; } #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vdup_n_s16(int16_t __p0) { int16x4_t __ret; __ret = (int16x4_t) {__p0, __p0, __p0, __p0}; return __ret; } #else __ai int16x4_t vdup_n_s16(int16_t __p0) { int16x4_t __ret; __ret = (int16x4_t) {__p0, __p0, __p0, __p0}; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t veorq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; __ret = __p0 ^ __p1; return __ret; } #else __ai uint8x16_t veorq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 ^ __rev1; __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t veorq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; __ret = __p0 ^ __p1; return __ret; } #else __ai uint32x4_t veorq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __rev0 ^ __rev1; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t veorq_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; __ret = __p0 ^ __p1; return __ret; } #else __ai uint64x2_t veorq_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __rev0 ^ __rev1; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t veorq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; __ret = __p0 ^ __p1; return __ret; } #else __ai uint16x8_t veorq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 ^ __rev1; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x16_t veorq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; __ret = __p0 ^ __p1; return __ret; } #else __ai int8x16_t veorq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 ^ __rev1; __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t veorq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; __ret = __p0 ^ __p1; return __ret; } #else __ai int32x4_t veorq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __rev0 ^ __rev1; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int64x2_t veorq_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; __ret = __p0 ^ __p1; return __ret; } #else __ai int64x2_t veorq_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __rev0 ^ __rev1; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t veorq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; __ret = __p0 ^ __p1; return __ret; } #else __ai int16x8_t veorq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 ^ __rev1; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t veor_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; __ret = __p0 ^ __p1; return __ret; } #else __ai uint8x8_t veor_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 ^ __rev1; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t veor_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; __ret = __p0 ^ __p1; return __ret; } #else __ai uint32x2_t veor_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __rev0 ^ __rev1; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai uint64x1_t veor_u64(uint64x1_t __p0, uint64x1_t __p1) { uint64x1_t __ret; __ret = __p0 ^ __p1; return __ret; } #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t veor_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; __ret = __p0 ^ __p1; return __ret; } #else __ai uint16x4_t veor_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __rev0 ^ __rev1; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8_t veor_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; __ret = __p0 ^ __p1; return __ret; } #else __ai int8x8_t veor_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 ^ __rev1; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x2_t veor_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; __ret = __p0 ^ __p1; return __ret; } #else __ai int32x2_t veor_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __rev0 ^ __rev1; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai int64x1_t veor_s64(int64x1_t __p0, int64x1_t __p1) { int64x1_t __ret; __ret = __p0 ^ __p1; return __ret; } #ifdef __LITTLE_ENDIAN__ __ai int16x4_t veor_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; __ret = __p0 ^ __p1; return __ret; } #else __ai int16x4_t veor_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __rev0 ^ __rev1; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ #define vext_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x8_t __ret; \ poly8x8_t __s0 = __p0; \ poly8x8_t __s1 = __p1; \ __ret = (poly8x8_t) __builtin_neon_vext_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 4); \ __ret; \ }) #else #define vext_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x8_t __ret; \ poly8x8_t __s0 = __p0; \ poly8x8_t __s1 = __p1; \ poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (poly8x8_t) __builtin_neon_vext_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 4); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vext_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x4_t __ret; \ poly16x4_t __s0 = __p0; \ poly16x4_t __s1 = __p1; \ __ret = (poly16x4_t) __builtin_neon_vext_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 5); \ __ret; \ }) #else #define vext_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x4_t __ret; \ poly16x4_t __s0 = __p0; \ poly16x4_t __s1 = __p1; \ poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ poly16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ __ret = (poly16x4_t) __builtin_neon_vext_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 5); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vextq_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x16_t __ret; \ poly8x16_t __s0 = __p0; \ poly8x16_t __s1 = __p1; \ __ret = (poly8x16_t) __builtin_neon_vextq_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 36); \ __ret; \ }) #else #define vextq_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x16_t __ret; \ poly8x16_t __s0 = __p0; \ poly8x16_t __s1 = __p1; \ poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (poly8x16_t) __builtin_neon_vextq_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 36); \ __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vextq_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x8_t __ret; \ poly16x8_t __s0 = __p0; \ poly16x8_t __s1 = __p1; \ __ret = (poly16x8_t) __builtin_neon_vextq_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 37); \ __ret; \ }) #else #define vextq_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x8_t __ret; \ poly16x8_t __s0 = __p0; \ poly16x8_t __s1 = __p1; \ poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ poly16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (poly16x8_t) __builtin_neon_vextq_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 37); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vextq_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x16_t __ret; \ uint8x16_t __s0 = __p0; \ uint8x16_t __s1 = __p1; \ __ret = (uint8x16_t) __builtin_neon_vextq_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 48); \ __ret; \ }) #else #define vextq_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x16_t __ret; \ uint8x16_t __s0 = __p0; \ uint8x16_t __s1 = __p1; \ uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (uint8x16_t) __builtin_neon_vextq_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 48); \ __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vextq_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x4_t __ret; \ uint32x4_t __s0 = __p0; \ uint32x4_t __s1 = __p1; \ __ret = (uint32x4_t) __builtin_neon_vextq_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 50); \ __ret; \ }) #else #define vextq_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x4_t __ret; \ uint32x4_t __s0 = __p0; \ uint32x4_t __s1 = __p1; \ uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ __ret = (uint32x4_t) __builtin_neon_vextq_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 50); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vextq_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x2_t __ret; \ uint64x2_t __s0 = __p0; \ uint64x2_t __s1 = __p1; \ __ret = (uint64x2_t) __builtin_neon_vextq_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 51); \ __ret; \ }) #else #define vextq_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x2_t __ret; \ uint64x2_t __s0 = __p0; \ uint64x2_t __s1 = __p1; \ uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ __ret = (uint64x2_t) __builtin_neon_vextq_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 51); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vextq_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x8_t __ret; \ uint16x8_t __s0 = __p0; \ uint16x8_t __s1 = __p1; \ __ret = (uint16x8_t) __builtin_neon_vextq_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 49); \ __ret; \ }) #else #define vextq_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x8_t __ret; \ uint16x8_t __s0 = __p0; \ uint16x8_t __s1 = __p1; \ uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (uint16x8_t) __builtin_neon_vextq_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 49); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vextq_s8(__p0, __p1, __p2) __extension__ ({ \ int8x16_t __ret; \ int8x16_t __s0 = __p0; \ int8x16_t __s1 = __p1; \ __ret = (int8x16_t) __builtin_neon_vextq_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 32); \ __ret; \ }) #else #define vextq_s8(__p0, __p1, __p2) __extension__ ({ \ int8x16_t __ret; \ int8x16_t __s0 = __p0; \ int8x16_t __s1 = __p1; \ int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ int8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (int8x16_t) __builtin_neon_vextq_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 32); \ __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vextq_f32(__p0, __p1, __p2) __extension__ ({ \ float32x4_t __ret; \ float32x4_t __s0 = __p0; \ float32x4_t __s1 = __p1; \ __ret = (float32x4_t) __builtin_neon_vextq_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 41); \ __ret; \ }) #else #define vextq_f32(__p0, __p1, __p2) __extension__ ({ \ float32x4_t __ret; \ float32x4_t __s0 = __p0; \ float32x4_t __s1 = __p1; \ float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ float32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ __ret = (float32x4_t) __builtin_neon_vextq_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 41); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vextq_s32(__p0, __p1, __p2) __extension__ ({ \ int32x4_t __ret; \ int32x4_t __s0 = __p0; \ int32x4_t __s1 = __p1; \ __ret = (int32x4_t) __builtin_neon_vextq_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 34); \ __ret; \ }) #else #define vextq_s32(__p0, __p1, __p2) __extension__ ({ \ int32x4_t __ret; \ int32x4_t __s0 = __p0; \ int32x4_t __s1 = __p1; \ int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ __ret = (int32x4_t) __builtin_neon_vextq_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 34); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vextq_s64(__p0, __p1, __p2) __extension__ ({ \ int64x2_t __ret; \ int64x2_t __s0 = __p0; \ int64x2_t __s1 = __p1; \ __ret = (int64x2_t) __builtin_neon_vextq_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 35); \ __ret; \ }) #else #define vextq_s64(__p0, __p1, __p2) __extension__ ({ \ int64x2_t __ret; \ int64x2_t __s0 = __p0; \ int64x2_t __s1 = __p1; \ int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ int64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ __ret = (int64x2_t) __builtin_neon_vextq_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 35); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vextq_s16(__p0, __p1, __p2) __extension__ ({ \ int16x8_t __ret; \ int16x8_t __s0 = __p0; \ int16x8_t __s1 = __p1; \ __ret = (int16x8_t) __builtin_neon_vextq_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 33); \ __ret; \ }) #else #define vextq_s16(__p0, __p1, __p2) __extension__ ({ \ int16x8_t __ret; \ int16x8_t __s0 = __p0; \ int16x8_t __s1 = __p1; \ int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (int16x8_t) __builtin_neon_vextq_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 33); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vext_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x8_t __ret; \ uint8x8_t __s0 = __p0; \ uint8x8_t __s1 = __p1; \ __ret = (uint8x8_t) __builtin_neon_vext_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 16); \ __ret; \ }) #else #define vext_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x8_t __ret; \ uint8x8_t __s0 = __p0; \ uint8x8_t __s1 = __p1; \ uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (uint8x8_t) __builtin_neon_vext_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 16); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vext_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x2_t __ret; \ uint32x2_t __s0 = __p0; \ uint32x2_t __s1 = __p1; \ __ret = (uint32x2_t) __builtin_neon_vext_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 18); \ __ret; \ }) #else #define vext_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x2_t __ret; \ uint32x2_t __s0 = __p0; \ uint32x2_t __s1 = __p1; \ uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ __ret = (uint32x2_t) __builtin_neon_vext_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 18); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #define vext_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x1_t __ret; \ uint64x1_t __s0 = __p0; \ uint64x1_t __s1 = __p1; \ __ret = (uint64x1_t) __builtin_neon_vext_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 19); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vext_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x4_t __ret; \ uint16x4_t __s0 = __p0; \ uint16x4_t __s1 = __p1; \ __ret = (uint16x4_t) __builtin_neon_vext_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 17); \ __ret; \ }) #else #define vext_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x4_t __ret; \ uint16x4_t __s0 = __p0; \ uint16x4_t __s1 = __p1; \ uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ __ret = (uint16x4_t) __builtin_neon_vext_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 17); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vext_s8(__p0, __p1, __p2) __extension__ ({ \ int8x8_t __ret; \ int8x8_t __s0 = __p0; \ int8x8_t __s1 = __p1; \ __ret = (int8x8_t) __builtin_neon_vext_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 0); \ __ret; \ }) #else #define vext_s8(__p0, __p1, __p2) __extension__ ({ \ int8x8_t __ret; \ int8x8_t __s0 = __p0; \ int8x8_t __s1 = __p1; \ int8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ int8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (int8x8_t) __builtin_neon_vext_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 0); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vext_f32(__p0, __p1, __p2) __extension__ ({ \ float32x2_t __ret; \ float32x2_t __s0 = __p0; \ float32x2_t __s1 = __p1; \ __ret = (float32x2_t) __builtin_neon_vext_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 9); \ __ret; \ }) #else #define vext_f32(__p0, __p1, __p2) __extension__ ({ \ float32x2_t __ret; \ float32x2_t __s0 = __p0; \ float32x2_t __s1 = __p1; \ float32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ float32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ __ret = (float32x2_t) __builtin_neon_vext_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 9); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vext_s32(__p0, __p1, __p2) __extension__ ({ \ int32x2_t __ret; \ int32x2_t __s0 = __p0; \ int32x2_t __s1 = __p1; \ __ret = (int32x2_t) __builtin_neon_vext_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 2); \ __ret; \ }) #else #define vext_s32(__p0, __p1, __p2) __extension__ ({ \ int32x2_t __ret; \ int32x2_t __s0 = __p0; \ int32x2_t __s1 = __p1; \ int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ __ret = (int32x2_t) __builtin_neon_vext_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 2); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #define vext_s64(__p0, __p1, __p2) __extension__ ({ \ int64x1_t __ret; \ int64x1_t __s0 = __p0; \ int64x1_t __s1 = __p1; \ __ret = (int64x1_t) __builtin_neon_vext_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 3); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vext_s16(__p0, __p1, __p2) __extension__ ({ \ int16x4_t __ret; \ int16x4_t __s0 = __p0; \ int16x4_t __s1 = __p1; \ __ret = (int16x4_t) __builtin_neon_vext_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 1); \ __ret; \ }) #else #define vext_s16(__p0, __p1, __p2) __extension__ ({ \ int16x4_t __ret; \ int16x4_t __s0 = __p0; \ int16x4_t __s1 = __p1; \ int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ __ret = (int16x4_t) __builtin_neon_vext_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 1); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ __ai poly8x8_t vget_high_p8(poly8x16_t __p0) { poly8x8_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 8, 9, 10, 11, 12, 13, 14, 15); return __ret; } #else __ai poly8x8_t vget_high_p8(poly8x16_t __p0) { poly8x8_t __ret; poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev0, 8, 9, 10, 11, 12, 13, 14, 15); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } __ai poly8x8_t __noswap_vget_high_p8(poly8x16_t __p0) { poly8x8_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 8, 9, 10, 11, 12, 13, 14, 15); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly16x4_t vget_high_p16(poly16x8_t __p0) { poly16x4_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 4, 5, 6, 7); return __ret; } #else __ai poly16x4_t vget_high_p16(poly16x8_t __p0) { poly16x4_t __ret; poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev0, 4, 5, 6, 7); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vget_high_u8(uint8x16_t __p0) { uint8x8_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 8, 9, 10, 11, 12, 13, 14, 15); return __ret; } #else __ai uint8x8_t vget_high_u8(uint8x16_t __p0) { uint8x8_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev0, 8, 9, 10, 11, 12, 13, 14, 15); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } __ai uint8x8_t __noswap_vget_high_u8(uint8x16_t __p0) { uint8x8_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 8, 9, 10, 11, 12, 13, 14, 15); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vget_high_u32(uint32x4_t __p0) { uint32x2_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 2, 3); return __ret; } #else __ai uint32x2_t vget_high_u32(uint32x4_t __p0) { uint32x2_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev0, 2, 3); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } __ai uint32x2_t __noswap_vget_high_u32(uint32x4_t __p0) { uint32x2_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 2, 3); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vget_high_u64(uint64x2_t __p0) { uint64x1_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 1); return __ret; } #else __ai uint64x1_t vget_high_u64(uint64x2_t __p0) { uint64x1_t __ret; uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev0, 1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vget_high_u16(uint16x8_t __p0) { uint16x4_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 4, 5, 6, 7); return __ret; } #else __ai uint16x4_t vget_high_u16(uint16x8_t __p0) { uint16x4_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev0, 4, 5, 6, 7); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai uint16x4_t __noswap_vget_high_u16(uint16x8_t __p0) { uint16x4_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 4, 5, 6, 7); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8_t vget_high_s8(int8x16_t __p0) { int8x8_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 8, 9, 10, 11, 12, 13, 14, 15); return __ret; } #else __ai int8x8_t vget_high_s8(int8x16_t __p0) { int8x8_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev0, 8, 9, 10, 11, 12, 13, 14, 15); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } __ai int8x8_t __noswap_vget_high_s8(int8x16_t __p0) { int8x8_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 8, 9, 10, 11, 12, 13, 14, 15); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x2_t vget_high_f32(float32x4_t __p0) { float32x2_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 2, 3); return __ret; } #else __ai float32x2_t vget_high_f32(float32x4_t __p0) { float32x2_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev0, 2, 3); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } __ai float32x2_t __noswap_vget_high_f32(float32x4_t __p0) { float32x2_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 2, 3); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float16x4_t vget_high_f16(float16x8_t __p0) { float16x4_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 4, 5, 6, 7); return __ret; } #else __ai float16x4_t vget_high_f16(float16x8_t __p0) { float16x4_t __ret; float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev0, 4, 5, 6, 7); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai float16x4_t __noswap_vget_high_f16(float16x8_t __p0) { float16x4_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 4, 5, 6, 7); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x2_t vget_high_s32(int32x4_t __p0) { int32x2_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 2, 3); return __ret; } #else __ai int32x2_t vget_high_s32(int32x4_t __p0) { int32x2_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev0, 2, 3); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } __ai int32x2_t __noswap_vget_high_s32(int32x4_t __p0) { int32x2_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 2, 3); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int64x1_t vget_high_s64(int64x2_t __p0) { int64x1_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 1); return __ret; } #else __ai int64x1_t vget_high_s64(int64x2_t __p0) { int64x1_t __ret; int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev0, 1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vget_high_s16(int16x8_t __p0) { int16x4_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 4, 5, 6, 7); return __ret; } #else __ai int16x4_t vget_high_s16(int16x8_t __p0) { int16x4_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev0, 4, 5, 6, 7); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai int16x4_t __noswap_vget_high_s16(int16x8_t __p0) { int16x4_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 4, 5, 6, 7); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ #define vget_lane_p8(__p0, __p1) __extension__ ({ \ poly8_t __ret; \ poly8x8_t __s0 = __p0; \ __ret = (poly8_t) __builtin_neon_vget_lane_i8((poly8x8_t)__s0, __p1); \ __ret; \ }) #else #define vget_lane_p8(__p0, __p1) __extension__ ({ \ poly8_t __ret; \ poly8x8_t __s0 = __p0; \ poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (poly8_t) __builtin_neon_vget_lane_i8((poly8x8_t)__rev0, __p1); \ __ret; \ }) #define __noswap_vget_lane_p8(__p0, __p1) __extension__ ({ \ poly8_t __ret; \ poly8x8_t __s0 = __p0; \ __ret = (poly8_t) __builtin_neon_vget_lane_i8((poly8x8_t)__s0, __p1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vget_lane_p16(__p0, __p1) __extension__ ({ \ poly16_t __ret; \ poly16x4_t __s0 = __p0; \ __ret = (poly16_t) __builtin_neon_vget_lane_i16((poly16x4_t)__s0, __p1); \ __ret; \ }) #else #define vget_lane_p16(__p0, __p1) __extension__ ({ \ poly16_t __ret; \ poly16x4_t __s0 = __p0; \ poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = (poly16_t) __builtin_neon_vget_lane_i16((poly16x4_t)__rev0, __p1); \ __ret; \ }) #define __noswap_vget_lane_p16(__p0, __p1) __extension__ ({ \ poly16_t __ret; \ poly16x4_t __s0 = __p0; \ __ret = (poly16_t) __builtin_neon_vget_lane_i16((poly16x4_t)__s0, __p1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vgetq_lane_p8(__p0, __p1) __extension__ ({ \ poly8_t __ret; \ poly8x16_t __s0 = __p0; \ __ret = (poly8_t) __builtin_neon_vgetq_lane_i8((poly8x16_t)__s0, __p1); \ __ret; \ }) #else #define vgetq_lane_p8(__p0, __p1) __extension__ ({ \ poly8_t __ret; \ poly8x16_t __s0 = __p0; \ poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (poly8_t) __builtin_neon_vgetq_lane_i8((poly8x16_t)__rev0, __p1); \ __ret; \ }) #define __noswap_vgetq_lane_p8(__p0, __p1) __extension__ ({ \ poly8_t __ret; \ poly8x16_t __s0 = __p0; \ __ret = (poly8_t) __builtin_neon_vgetq_lane_i8((poly8x16_t)__s0, __p1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vgetq_lane_p16(__p0, __p1) __extension__ ({ \ poly16_t __ret; \ poly16x8_t __s0 = __p0; \ __ret = (poly16_t) __builtin_neon_vgetq_lane_i16((poly16x8_t)__s0, __p1); \ __ret; \ }) #else #define vgetq_lane_p16(__p0, __p1) __extension__ ({ \ poly16_t __ret; \ poly16x8_t __s0 = __p0; \ poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (poly16_t) __builtin_neon_vgetq_lane_i16((poly16x8_t)__rev0, __p1); \ __ret; \ }) #define __noswap_vgetq_lane_p16(__p0, __p1) __extension__ ({ \ poly16_t __ret; \ poly16x8_t __s0 = __p0; \ __ret = (poly16_t) __builtin_neon_vgetq_lane_i16((poly16x8_t)__s0, __p1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vgetq_lane_u8(__p0, __p1) __extension__ ({ \ uint8_t __ret; \ uint8x16_t __s0 = __p0; \ __ret = (uint8_t) __builtin_neon_vgetq_lane_i8((int8x16_t)__s0, __p1); \ __ret; \ }) #else #define vgetq_lane_u8(__p0, __p1) __extension__ ({ \ uint8_t __ret; \ uint8x16_t __s0 = __p0; \ uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (uint8_t) __builtin_neon_vgetq_lane_i8((int8x16_t)__rev0, __p1); \ __ret; \ }) #define __noswap_vgetq_lane_u8(__p0, __p1) __extension__ ({ \ uint8_t __ret; \ uint8x16_t __s0 = __p0; \ __ret = (uint8_t) __builtin_neon_vgetq_lane_i8((int8x16_t)__s0, __p1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vgetq_lane_u32(__p0, __p1) __extension__ ({ \ uint32_t __ret; \ uint32x4_t __s0 = __p0; \ __ret = (uint32_t) __builtin_neon_vgetq_lane_i32((int32x4_t)__s0, __p1); \ __ret; \ }) #else #define vgetq_lane_u32(__p0, __p1) __extension__ ({ \ uint32_t __ret; \ uint32x4_t __s0 = __p0; \ uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = (uint32_t) __builtin_neon_vgetq_lane_i32((int32x4_t)__rev0, __p1); \ __ret; \ }) #define __noswap_vgetq_lane_u32(__p0, __p1) __extension__ ({ \ uint32_t __ret; \ uint32x4_t __s0 = __p0; \ __ret = (uint32_t) __builtin_neon_vgetq_lane_i32((int32x4_t)__s0, __p1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vgetq_lane_u64(__p0, __p1) __extension__ ({ \ uint64_t __ret; \ uint64x2_t __s0 = __p0; \ __ret = (uint64_t) __builtin_neon_vgetq_lane_i64((int64x2_t)__s0, __p1); \ __ret; \ }) #else #define vgetq_lane_u64(__p0, __p1) __extension__ ({ \ uint64_t __ret; \ uint64x2_t __s0 = __p0; \ uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ __ret = (uint64_t) __builtin_neon_vgetq_lane_i64((int64x2_t)__rev0, __p1); \ __ret; \ }) #define __noswap_vgetq_lane_u64(__p0, __p1) __extension__ ({ \ uint64_t __ret; \ uint64x2_t __s0 = __p0; \ __ret = (uint64_t) __builtin_neon_vgetq_lane_i64((int64x2_t)__s0, __p1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vgetq_lane_u16(__p0, __p1) __extension__ ({ \ uint16_t __ret; \ uint16x8_t __s0 = __p0; \ __ret = (uint16_t) __builtin_neon_vgetq_lane_i16((int16x8_t)__s0, __p1); \ __ret; \ }) #else #define vgetq_lane_u16(__p0, __p1) __extension__ ({ \ uint16_t __ret; \ uint16x8_t __s0 = __p0; \ uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (uint16_t) __builtin_neon_vgetq_lane_i16((int16x8_t)__rev0, __p1); \ __ret; \ }) #define __noswap_vgetq_lane_u16(__p0, __p1) __extension__ ({ \ uint16_t __ret; \ uint16x8_t __s0 = __p0; \ __ret = (uint16_t) __builtin_neon_vgetq_lane_i16((int16x8_t)__s0, __p1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vgetq_lane_s8(__p0, __p1) __extension__ ({ \ int8_t __ret; \ int8x16_t __s0 = __p0; \ __ret = (int8_t) __builtin_neon_vgetq_lane_i8((int8x16_t)__s0, __p1); \ __ret; \ }) #else #define vgetq_lane_s8(__p0, __p1) __extension__ ({ \ int8_t __ret; \ int8x16_t __s0 = __p0; \ int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (int8_t) __builtin_neon_vgetq_lane_i8((int8x16_t)__rev0, __p1); \ __ret; \ }) #define __noswap_vgetq_lane_s8(__p0, __p1) __extension__ ({ \ int8_t __ret; \ int8x16_t __s0 = __p0; \ __ret = (int8_t) __builtin_neon_vgetq_lane_i8((int8x16_t)__s0, __p1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vgetq_lane_f32(__p0, __p1) __extension__ ({ \ float32_t __ret; \ float32x4_t __s0 = __p0; \ __ret = (float32_t) __builtin_neon_vgetq_lane_f32((float32x4_t)__s0, __p1); \ __ret; \ }) #else #define vgetq_lane_f32(__p0, __p1) __extension__ ({ \ float32_t __ret; \ float32x4_t __s0 = __p0; \ float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = (float32_t) __builtin_neon_vgetq_lane_f32((float32x4_t)__rev0, __p1); \ __ret; \ }) #define __noswap_vgetq_lane_f32(__p0, __p1) __extension__ ({ \ float32_t __ret; \ float32x4_t __s0 = __p0; \ __ret = (float32_t) __builtin_neon_vgetq_lane_f32((float32x4_t)__s0, __p1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vgetq_lane_s32(__p0, __p1) __extension__ ({ \ int32_t __ret; \ int32x4_t __s0 = __p0; \ __ret = (int32_t) __builtin_neon_vgetq_lane_i32((int32x4_t)__s0, __p1); \ __ret; \ }) #else #define vgetq_lane_s32(__p0, __p1) __extension__ ({ \ int32_t __ret; \ int32x4_t __s0 = __p0; \ int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = (int32_t) __builtin_neon_vgetq_lane_i32((int32x4_t)__rev0, __p1); \ __ret; \ }) #define __noswap_vgetq_lane_s32(__p0, __p1) __extension__ ({ \ int32_t __ret; \ int32x4_t __s0 = __p0; \ __ret = (int32_t) __builtin_neon_vgetq_lane_i32((int32x4_t)__s0, __p1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vgetq_lane_s64(__p0, __p1) __extension__ ({ \ int64_t __ret; \ int64x2_t __s0 = __p0; \ __ret = (int64_t) __builtin_neon_vgetq_lane_i64((int64x2_t)__s0, __p1); \ __ret; \ }) #else #define vgetq_lane_s64(__p0, __p1) __extension__ ({ \ int64_t __ret; \ int64x2_t __s0 = __p0; \ int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ __ret = (int64_t) __builtin_neon_vgetq_lane_i64((int64x2_t)__rev0, __p1); \ __ret; \ }) #define __noswap_vgetq_lane_s64(__p0, __p1) __extension__ ({ \ int64_t __ret; \ int64x2_t __s0 = __p0; \ __ret = (int64_t) __builtin_neon_vgetq_lane_i64((int64x2_t)__s0, __p1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vgetq_lane_s16(__p0, __p1) __extension__ ({ \ int16_t __ret; \ int16x8_t __s0 = __p0; \ __ret = (int16_t) __builtin_neon_vgetq_lane_i16((int16x8_t)__s0, __p1); \ __ret; \ }) #else #define vgetq_lane_s16(__p0, __p1) __extension__ ({ \ int16_t __ret; \ int16x8_t __s0 = __p0; \ int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (int16_t) __builtin_neon_vgetq_lane_i16((int16x8_t)__rev0, __p1); \ __ret; \ }) #define __noswap_vgetq_lane_s16(__p0, __p1) __extension__ ({ \ int16_t __ret; \ int16x8_t __s0 = __p0; \ __ret = (int16_t) __builtin_neon_vgetq_lane_i16((int16x8_t)__s0, __p1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vget_lane_u8(__p0, __p1) __extension__ ({ \ uint8_t __ret; \ uint8x8_t __s0 = __p0; \ __ret = (uint8_t) __builtin_neon_vget_lane_i8((int8x8_t)__s0, __p1); \ __ret; \ }) #else #define vget_lane_u8(__p0, __p1) __extension__ ({ \ uint8_t __ret; \ uint8x8_t __s0 = __p0; \ uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (uint8_t) __builtin_neon_vget_lane_i8((int8x8_t)__rev0, __p1); \ __ret; \ }) #define __noswap_vget_lane_u8(__p0, __p1) __extension__ ({ \ uint8_t __ret; \ uint8x8_t __s0 = __p0; \ __ret = (uint8_t) __builtin_neon_vget_lane_i8((int8x8_t)__s0, __p1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vget_lane_u32(__p0, __p1) __extension__ ({ \ uint32_t __ret; \ uint32x2_t __s0 = __p0; \ __ret = (uint32_t) __builtin_neon_vget_lane_i32((int32x2_t)__s0, __p1); \ __ret; \ }) #else #define vget_lane_u32(__p0, __p1) __extension__ ({ \ uint32_t __ret; \ uint32x2_t __s0 = __p0; \ uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ __ret = (uint32_t) __builtin_neon_vget_lane_i32((int32x2_t)__rev0, __p1); \ __ret; \ }) #define __noswap_vget_lane_u32(__p0, __p1) __extension__ ({ \ uint32_t __ret; \ uint32x2_t __s0 = __p0; \ __ret = (uint32_t) __builtin_neon_vget_lane_i32((int32x2_t)__s0, __p1); \ __ret; \ }) #endif #define vget_lane_u64(__p0, __p1) __extension__ ({ \ uint64_t __ret; \ uint64x1_t __s0 = __p0; \ __ret = (uint64_t) __builtin_neon_vget_lane_i64((int64x1_t)__s0, __p1); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vget_lane_u16(__p0, __p1) __extension__ ({ \ uint16_t __ret; \ uint16x4_t __s0 = __p0; \ __ret = (uint16_t) __builtin_neon_vget_lane_i16((int16x4_t)__s0, __p1); \ __ret; \ }) #else #define vget_lane_u16(__p0, __p1) __extension__ ({ \ uint16_t __ret; \ uint16x4_t __s0 = __p0; \ uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = (uint16_t) __builtin_neon_vget_lane_i16((int16x4_t)__rev0, __p1); \ __ret; \ }) #define __noswap_vget_lane_u16(__p0, __p1) __extension__ ({ \ uint16_t __ret; \ uint16x4_t __s0 = __p0; \ __ret = (uint16_t) __builtin_neon_vget_lane_i16((int16x4_t)__s0, __p1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vget_lane_s8(__p0, __p1) __extension__ ({ \ int8_t __ret; \ int8x8_t __s0 = __p0; \ __ret = (int8_t) __builtin_neon_vget_lane_i8((int8x8_t)__s0, __p1); \ __ret; \ }) #else #define vget_lane_s8(__p0, __p1) __extension__ ({ \ int8_t __ret; \ int8x8_t __s0 = __p0; \ int8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (int8_t) __builtin_neon_vget_lane_i8((int8x8_t)__rev0, __p1); \ __ret; \ }) #define __noswap_vget_lane_s8(__p0, __p1) __extension__ ({ \ int8_t __ret; \ int8x8_t __s0 = __p0; \ __ret = (int8_t) __builtin_neon_vget_lane_i8((int8x8_t)__s0, __p1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vget_lane_f32(__p0, __p1) __extension__ ({ \ float32_t __ret; \ float32x2_t __s0 = __p0; \ __ret = (float32_t) __builtin_neon_vget_lane_f32((float32x2_t)__s0, __p1); \ __ret; \ }) #else #define vget_lane_f32(__p0, __p1) __extension__ ({ \ float32_t __ret; \ float32x2_t __s0 = __p0; \ float32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ __ret = (float32_t) __builtin_neon_vget_lane_f32((float32x2_t)__rev0, __p1); \ __ret; \ }) #define __noswap_vget_lane_f32(__p0, __p1) __extension__ ({ \ float32_t __ret; \ float32x2_t __s0 = __p0; \ __ret = (float32_t) __builtin_neon_vget_lane_f32((float32x2_t)__s0, __p1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vget_lane_s32(__p0, __p1) __extension__ ({ \ int32_t __ret; \ int32x2_t __s0 = __p0; \ __ret = (int32_t) __builtin_neon_vget_lane_i32((int32x2_t)__s0, __p1); \ __ret; \ }) #else #define vget_lane_s32(__p0, __p1) __extension__ ({ \ int32_t __ret; \ int32x2_t __s0 = __p0; \ int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ __ret = (int32_t) __builtin_neon_vget_lane_i32((int32x2_t)__rev0, __p1); \ __ret; \ }) #define __noswap_vget_lane_s32(__p0, __p1) __extension__ ({ \ int32_t __ret; \ int32x2_t __s0 = __p0; \ __ret = (int32_t) __builtin_neon_vget_lane_i32((int32x2_t)__s0, __p1); \ __ret; \ }) #endif #define vget_lane_s64(__p0, __p1) __extension__ ({ \ int64_t __ret; \ int64x1_t __s0 = __p0; \ __ret = (int64_t) __builtin_neon_vget_lane_i64((int64x1_t)__s0, __p1); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vget_lane_s16(__p0, __p1) __extension__ ({ \ int16_t __ret; \ int16x4_t __s0 = __p0; \ __ret = (int16_t) __builtin_neon_vget_lane_i16((int16x4_t)__s0, __p1); \ __ret; \ }) #else #define vget_lane_s16(__p0, __p1) __extension__ ({ \ int16_t __ret; \ int16x4_t __s0 = __p0; \ int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = (int16_t) __builtin_neon_vget_lane_i16((int16x4_t)__rev0, __p1); \ __ret; \ }) #define __noswap_vget_lane_s16(__p0, __p1) __extension__ ({ \ int16_t __ret; \ int16x4_t __s0 = __p0; \ __ret = (int16_t) __builtin_neon_vget_lane_i16((int16x4_t)__s0, __p1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ __ai poly8x8_t vget_low_p8(poly8x16_t __p0) { poly8x8_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 0, 1, 2, 3, 4, 5, 6, 7); return __ret; } #else __ai poly8x8_t vget_low_p8(poly8x16_t __p0) { poly8x8_t __ret; poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev0, 0, 1, 2, 3, 4, 5, 6, 7); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly16x4_t vget_low_p16(poly16x8_t __p0) { poly16x4_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 0, 1, 2, 3); return __ret; } #else __ai poly16x4_t vget_low_p16(poly16x8_t __p0) { poly16x4_t __ret; poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev0, 0, 1, 2, 3); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vget_low_u8(uint8x16_t __p0) { uint8x8_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 0, 1, 2, 3, 4, 5, 6, 7); return __ret; } #else __ai uint8x8_t vget_low_u8(uint8x16_t __p0) { uint8x8_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev0, 0, 1, 2, 3, 4, 5, 6, 7); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vget_low_u32(uint32x4_t __p0) { uint32x2_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 0, 1); return __ret; } #else __ai uint32x2_t vget_low_u32(uint32x4_t __p0) { uint32x2_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev0, 0, 1); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vget_low_u64(uint64x2_t __p0) { uint64x1_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 0); return __ret; } #else __ai uint64x1_t vget_low_u64(uint64x2_t __p0) { uint64x1_t __ret; uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev0, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vget_low_u16(uint16x8_t __p0) { uint16x4_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 0, 1, 2, 3); return __ret; } #else __ai uint16x4_t vget_low_u16(uint16x8_t __p0) { uint16x4_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev0, 0, 1, 2, 3); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8_t vget_low_s8(int8x16_t __p0) { int8x8_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 0, 1, 2, 3, 4, 5, 6, 7); return __ret; } #else __ai int8x8_t vget_low_s8(int8x16_t __p0) { int8x8_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev0, 0, 1, 2, 3, 4, 5, 6, 7); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x2_t vget_low_f32(float32x4_t __p0) { float32x2_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 0, 1); return __ret; } #else __ai float32x2_t vget_low_f32(float32x4_t __p0) { float32x2_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev0, 0, 1); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float16x4_t vget_low_f16(float16x8_t __p0) { float16x4_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 0, 1, 2, 3); return __ret; } #else __ai float16x4_t vget_low_f16(float16x8_t __p0) { float16x4_t __ret; float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev0, 0, 1, 2, 3); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x2_t vget_low_s32(int32x4_t __p0) { int32x2_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 0, 1); return __ret; } #else __ai int32x2_t vget_low_s32(int32x4_t __p0) { int32x2_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev0, 0, 1); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int64x1_t vget_low_s64(int64x2_t __p0) { int64x1_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 0); return __ret; } #else __ai int64x1_t vget_low_s64(int64x2_t __p0) { int64x1_t __ret; int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev0, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vget_low_s16(int16x8_t __p0) { int16x4_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 0, 1, 2, 3); return __ret; } #else __ai int16x4_t vget_low_s16(int16x8_t __p0) { int16x4_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev0, 0, 1, 2, 3); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vhaddq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; __ret = (uint8x16_t) __builtin_neon_vhaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); return __ret; } #else __ai uint8x16_t vhaddq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x16_t) __builtin_neon_vhaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 48); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vhaddq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vhaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); return __ret; } #else __ai uint32x4_t vhaddq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint32x4_t) __builtin_neon_vhaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vhaddq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vhaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); return __ret; } #else __ai uint16x8_t vhaddq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint16x8_t) __builtin_neon_vhaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 49); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x16_t vhaddq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vhaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 32); return __ret; } #else __ai int8x16_t vhaddq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x16_t) __builtin_neon_vhaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 32); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vhaddq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vhaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); return __ret; } #else __ai int32x4_t vhaddq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (int32x4_t) __builtin_neon_vhaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 34); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vhaddq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vhaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); return __ret; } #else __ai int16x8_t vhaddq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int16x8_t) __builtin_neon_vhaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 33); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vhadd_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vhadd_v((int8x8_t)__p0, (int8x8_t)__p1, 16); return __ret; } #else __ai uint8x8_t vhadd_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x8_t) __builtin_neon_vhadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 16); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vhadd_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vhadd_v((int8x8_t)__p0, (int8x8_t)__p1, 18); return __ret; } #else __ai uint32x2_t vhadd_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint32x2_t) __builtin_neon_vhadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 18); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vhadd_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vhadd_v((int8x8_t)__p0, (int8x8_t)__p1, 17); return __ret; } #else __ai uint16x4_t vhadd_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint16x4_t) __builtin_neon_vhadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 17); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8_t vhadd_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vhadd_v((int8x8_t)__p0, (int8x8_t)__p1, 0); return __ret; } #else __ai int8x8_t vhadd_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x8_t) __builtin_neon_vhadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 0); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x2_t vhadd_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vhadd_v((int8x8_t)__p0, (int8x8_t)__p1, 2); return __ret; } #else __ai int32x2_t vhadd_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (int32x2_t) __builtin_neon_vhadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vhadd_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vhadd_v((int8x8_t)__p0, (int8x8_t)__p1, 1); return __ret; } #else __ai int16x4_t vhadd_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (int16x4_t) __builtin_neon_vhadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vhsubq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; __ret = (uint8x16_t) __builtin_neon_vhsubq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); return __ret; } #else __ai uint8x16_t vhsubq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x16_t) __builtin_neon_vhsubq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 48); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vhsubq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vhsubq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); return __ret; } #else __ai uint32x4_t vhsubq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint32x4_t) __builtin_neon_vhsubq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vhsubq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vhsubq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); return __ret; } #else __ai uint16x8_t vhsubq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint16x8_t) __builtin_neon_vhsubq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 49); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x16_t vhsubq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vhsubq_v((int8x16_t)__p0, (int8x16_t)__p1, 32); return __ret; } #else __ai int8x16_t vhsubq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x16_t) __builtin_neon_vhsubq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 32); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vhsubq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vhsubq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); return __ret; } #else __ai int32x4_t vhsubq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (int32x4_t) __builtin_neon_vhsubq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 34); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vhsubq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vhsubq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); return __ret; } #else __ai int16x8_t vhsubq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int16x8_t) __builtin_neon_vhsubq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 33); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vhsub_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vhsub_v((int8x8_t)__p0, (int8x8_t)__p1, 16); return __ret; } #else __ai uint8x8_t vhsub_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x8_t) __builtin_neon_vhsub_v((int8x8_t)__rev0, (int8x8_t)__rev1, 16); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vhsub_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vhsub_v((int8x8_t)__p0, (int8x8_t)__p1, 18); return __ret; } #else __ai uint32x2_t vhsub_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint32x2_t) __builtin_neon_vhsub_v((int8x8_t)__rev0, (int8x8_t)__rev1, 18); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vhsub_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vhsub_v((int8x8_t)__p0, (int8x8_t)__p1, 17); return __ret; } #else __ai uint16x4_t vhsub_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint16x4_t) __builtin_neon_vhsub_v((int8x8_t)__rev0, (int8x8_t)__rev1, 17); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8_t vhsub_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vhsub_v((int8x8_t)__p0, (int8x8_t)__p1, 0); return __ret; } #else __ai int8x8_t vhsub_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x8_t) __builtin_neon_vhsub_v((int8x8_t)__rev0, (int8x8_t)__rev1, 0); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x2_t vhsub_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vhsub_v((int8x8_t)__p0, (int8x8_t)__p1, 2); return __ret; } #else __ai int32x2_t vhsub_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (int32x2_t) __builtin_neon_vhsub_v((int8x8_t)__rev0, (int8x8_t)__rev1, 2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vhsub_v((int8x8_t)__p0, (int8x8_t)__p1, 1); return __ret; } #else __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (int16x4_t) __builtin_neon_vhsub_v((int8x8_t)__rev0, (int8x8_t)__rev1, 1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ #define vld1_p8(__p0) __extension__ ({ \ poly8x8_t __ret; \ __ret = (poly8x8_t) __builtin_neon_vld1_v(__p0, 4); \ __ret; \ }) #else #define vld1_p8(__p0) __extension__ ({ \ poly8x8_t __ret; \ __ret = (poly8x8_t) __builtin_neon_vld1_v(__p0, 4); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1_p16(__p0) __extension__ ({ \ poly16x4_t __ret; \ __ret = (poly16x4_t) __builtin_neon_vld1_v(__p0, 5); \ __ret; \ }) #else #define vld1_p16(__p0) __extension__ ({ \ poly16x4_t __ret; \ __ret = (poly16x4_t) __builtin_neon_vld1_v(__p0, 5); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_p8(__p0) __extension__ ({ \ poly8x16_t __ret; \ __ret = (poly8x16_t) __builtin_neon_vld1q_v(__p0, 36); \ __ret; \ }) #else #define vld1q_p8(__p0) __extension__ ({ \ poly8x16_t __ret; \ __ret = (poly8x16_t) __builtin_neon_vld1q_v(__p0, 36); \ __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_p16(__p0) __extension__ ({ \ poly16x8_t __ret; \ __ret = (poly16x8_t) __builtin_neon_vld1q_v(__p0, 37); \ __ret; \ }) #else #define vld1q_p16(__p0) __extension__ ({ \ poly16x8_t __ret; \ __ret = (poly16x8_t) __builtin_neon_vld1q_v(__p0, 37); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_u8(__p0) __extension__ ({ \ uint8x16_t __ret; \ __ret = (uint8x16_t) __builtin_neon_vld1q_v(__p0, 48); \ __ret; \ }) #else #define vld1q_u8(__p0) __extension__ ({ \ uint8x16_t __ret; \ __ret = (uint8x16_t) __builtin_neon_vld1q_v(__p0, 48); \ __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_u32(__p0) __extension__ ({ \ uint32x4_t __ret; \ __ret = (uint32x4_t) __builtin_neon_vld1q_v(__p0, 50); \ __ret; \ }) #else #define vld1q_u32(__p0) __extension__ ({ \ uint32x4_t __ret; \ __ret = (uint32x4_t) __builtin_neon_vld1q_v(__p0, 50); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_u64(__p0) __extension__ ({ \ uint64x2_t __ret; \ __ret = (uint64x2_t) __builtin_neon_vld1q_v(__p0, 51); \ __ret; \ }) #else #define vld1q_u64(__p0) __extension__ ({ \ uint64x2_t __ret; \ __ret = (uint64x2_t) __builtin_neon_vld1q_v(__p0, 51); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_u16(__p0) __extension__ ({ \ uint16x8_t __ret; \ __ret = (uint16x8_t) __builtin_neon_vld1q_v(__p0, 49); \ __ret; \ }) #else #define vld1q_u16(__p0) __extension__ ({ \ uint16x8_t __ret; \ __ret = (uint16x8_t) __builtin_neon_vld1q_v(__p0, 49); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_s8(__p0) __extension__ ({ \ int8x16_t __ret; \ __ret = (int8x16_t) __builtin_neon_vld1q_v(__p0, 32); \ __ret; \ }) #else #define vld1q_s8(__p0) __extension__ ({ \ int8x16_t __ret; \ __ret = (int8x16_t) __builtin_neon_vld1q_v(__p0, 32); \ __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_f32(__p0) __extension__ ({ \ float32x4_t __ret; \ __ret = (float32x4_t) __builtin_neon_vld1q_v(__p0, 41); \ __ret; \ }) #else #define vld1q_f32(__p0) __extension__ ({ \ float32x4_t __ret; \ __ret = (float32x4_t) __builtin_neon_vld1q_v(__p0, 41); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_s32(__p0) __extension__ ({ \ int32x4_t __ret; \ __ret = (int32x4_t) __builtin_neon_vld1q_v(__p0, 34); \ __ret; \ }) #else #define vld1q_s32(__p0) __extension__ ({ \ int32x4_t __ret; \ __ret = (int32x4_t) __builtin_neon_vld1q_v(__p0, 34); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_s64(__p0) __extension__ ({ \ int64x2_t __ret; \ __ret = (int64x2_t) __builtin_neon_vld1q_v(__p0, 35); \ __ret; \ }) #else #define vld1q_s64(__p0) __extension__ ({ \ int64x2_t __ret; \ __ret = (int64x2_t) __builtin_neon_vld1q_v(__p0, 35); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_s16(__p0) __extension__ ({ \ int16x8_t __ret; \ __ret = (int16x8_t) __builtin_neon_vld1q_v(__p0, 33); \ __ret; \ }) #else #define vld1q_s16(__p0) __extension__ ({ \ int16x8_t __ret; \ __ret = (int16x8_t) __builtin_neon_vld1q_v(__p0, 33); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1_u8(__p0) __extension__ ({ \ uint8x8_t __ret; \ __ret = (uint8x8_t) __builtin_neon_vld1_v(__p0, 16); \ __ret; \ }) #else #define vld1_u8(__p0) __extension__ ({ \ uint8x8_t __ret; \ __ret = (uint8x8_t) __builtin_neon_vld1_v(__p0, 16); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1_u32(__p0) __extension__ ({ \ uint32x2_t __ret; \ __ret = (uint32x2_t) __builtin_neon_vld1_v(__p0, 18); \ __ret; \ }) #else #define vld1_u32(__p0) __extension__ ({ \ uint32x2_t __ret; \ __ret = (uint32x2_t) __builtin_neon_vld1_v(__p0, 18); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #define vld1_u64(__p0) __extension__ ({ \ uint64x1_t __ret; \ __ret = (uint64x1_t) __builtin_neon_vld1_v(__p0, 19); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vld1_u16(__p0) __extension__ ({ \ uint16x4_t __ret; \ __ret = (uint16x4_t) __builtin_neon_vld1_v(__p0, 17); \ __ret; \ }) #else #define vld1_u16(__p0) __extension__ ({ \ uint16x4_t __ret; \ __ret = (uint16x4_t) __builtin_neon_vld1_v(__p0, 17); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1_s8(__p0) __extension__ ({ \ int8x8_t __ret; \ __ret = (int8x8_t) __builtin_neon_vld1_v(__p0, 0); \ __ret; \ }) #else #define vld1_s8(__p0) __extension__ ({ \ int8x8_t __ret; \ __ret = (int8x8_t) __builtin_neon_vld1_v(__p0, 0); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1_f32(__p0) __extension__ ({ \ float32x2_t __ret; \ __ret = (float32x2_t) __builtin_neon_vld1_v(__p0, 9); \ __ret; \ }) #else #define vld1_f32(__p0) __extension__ ({ \ float32x2_t __ret; \ __ret = (float32x2_t) __builtin_neon_vld1_v(__p0, 9); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1_s32(__p0) __extension__ ({ \ int32x2_t __ret; \ __ret = (int32x2_t) __builtin_neon_vld1_v(__p0, 2); \ __ret; \ }) #else #define vld1_s32(__p0) __extension__ ({ \ int32x2_t __ret; \ __ret = (int32x2_t) __builtin_neon_vld1_v(__p0, 2); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #define vld1_s64(__p0) __extension__ ({ \ int64x1_t __ret; \ __ret = (int64x1_t) __builtin_neon_vld1_v(__p0, 3); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vld1_s16(__p0) __extension__ ({ \ int16x4_t __ret; \ __ret = (int16x4_t) __builtin_neon_vld1_v(__p0, 1); \ __ret; \ }) #else #define vld1_s16(__p0) __extension__ ({ \ int16x4_t __ret; \ __ret = (int16x4_t) __builtin_neon_vld1_v(__p0, 1); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1_dup_p8(__p0) __extension__ ({ \ poly8x8_t __ret; \ __ret = (poly8x8_t) __builtin_neon_vld1_dup_v(__p0, 4); \ __ret; \ }) #else #define vld1_dup_p8(__p0) __extension__ ({ \ poly8x8_t __ret; \ __ret = (poly8x8_t) __builtin_neon_vld1_dup_v(__p0, 4); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1_dup_p16(__p0) __extension__ ({ \ poly16x4_t __ret; \ __ret = (poly16x4_t) __builtin_neon_vld1_dup_v(__p0, 5); \ __ret; \ }) #else #define vld1_dup_p16(__p0) __extension__ ({ \ poly16x4_t __ret; \ __ret = (poly16x4_t) __builtin_neon_vld1_dup_v(__p0, 5); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_dup_p8(__p0) __extension__ ({ \ poly8x16_t __ret; \ __ret = (poly8x16_t) __builtin_neon_vld1q_dup_v(__p0, 36); \ __ret; \ }) #else #define vld1q_dup_p8(__p0) __extension__ ({ \ poly8x16_t __ret; \ __ret = (poly8x16_t) __builtin_neon_vld1q_dup_v(__p0, 36); \ __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_dup_p16(__p0) __extension__ ({ \ poly16x8_t __ret; \ __ret = (poly16x8_t) __builtin_neon_vld1q_dup_v(__p0, 37); \ __ret; \ }) #else #define vld1q_dup_p16(__p0) __extension__ ({ \ poly16x8_t __ret; \ __ret = (poly16x8_t) __builtin_neon_vld1q_dup_v(__p0, 37); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_dup_u8(__p0) __extension__ ({ \ uint8x16_t __ret; \ __ret = (uint8x16_t) __builtin_neon_vld1q_dup_v(__p0, 48); \ __ret; \ }) #else #define vld1q_dup_u8(__p0) __extension__ ({ \ uint8x16_t __ret; \ __ret = (uint8x16_t) __builtin_neon_vld1q_dup_v(__p0, 48); \ __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_dup_u32(__p0) __extension__ ({ \ uint32x4_t __ret; \ __ret = (uint32x4_t) __builtin_neon_vld1q_dup_v(__p0, 50); \ __ret; \ }) #else #define vld1q_dup_u32(__p0) __extension__ ({ \ uint32x4_t __ret; \ __ret = (uint32x4_t) __builtin_neon_vld1q_dup_v(__p0, 50); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_dup_u64(__p0) __extension__ ({ \ uint64x2_t __ret; \ __ret = (uint64x2_t) __builtin_neon_vld1q_dup_v(__p0, 51); \ __ret; \ }) #else #define vld1q_dup_u64(__p0) __extension__ ({ \ uint64x2_t __ret; \ __ret = (uint64x2_t) __builtin_neon_vld1q_dup_v(__p0, 51); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_dup_u16(__p0) __extension__ ({ \ uint16x8_t __ret; \ __ret = (uint16x8_t) __builtin_neon_vld1q_dup_v(__p0, 49); \ __ret; \ }) #else #define vld1q_dup_u16(__p0) __extension__ ({ \ uint16x8_t __ret; \ __ret = (uint16x8_t) __builtin_neon_vld1q_dup_v(__p0, 49); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_dup_s8(__p0) __extension__ ({ \ int8x16_t __ret; \ __ret = (int8x16_t) __builtin_neon_vld1q_dup_v(__p0, 32); \ __ret; \ }) #else #define vld1q_dup_s8(__p0) __extension__ ({ \ int8x16_t __ret; \ __ret = (int8x16_t) __builtin_neon_vld1q_dup_v(__p0, 32); \ __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_dup_f32(__p0) __extension__ ({ \ float32x4_t __ret; \ __ret = (float32x4_t) __builtin_neon_vld1q_dup_v(__p0, 41); \ __ret; \ }) #else #define vld1q_dup_f32(__p0) __extension__ ({ \ float32x4_t __ret; \ __ret = (float32x4_t) __builtin_neon_vld1q_dup_v(__p0, 41); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_dup_s32(__p0) __extension__ ({ \ int32x4_t __ret; \ __ret = (int32x4_t) __builtin_neon_vld1q_dup_v(__p0, 34); \ __ret; \ }) #else #define vld1q_dup_s32(__p0) __extension__ ({ \ int32x4_t __ret; \ __ret = (int32x4_t) __builtin_neon_vld1q_dup_v(__p0, 34); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_dup_s64(__p0) __extension__ ({ \ int64x2_t __ret; \ __ret = (int64x2_t) __builtin_neon_vld1q_dup_v(__p0, 35); \ __ret; \ }) #else #define vld1q_dup_s64(__p0) __extension__ ({ \ int64x2_t __ret; \ __ret = (int64x2_t) __builtin_neon_vld1q_dup_v(__p0, 35); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_dup_s16(__p0) __extension__ ({ \ int16x8_t __ret; \ __ret = (int16x8_t) __builtin_neon_vld1q_dup_v(__p0, 33); \ __ret; \ }) #else #define vld1q_dup_s16(__p0) __extension__ ({ \ int16x8_t __ret; \ __ret = (int16x8_t) __builtin_neon_vld1q_dup_v(__p0, 33); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1_dup_u8(__p0) __extension__ ({ \ uint8x8_t __ret; \ __ret = (uint8x8_t) __builtin_neon_vld1_dup_v(__p0, 16); \ __ret; \ }) #else #define vld1_dup_u8(__p0) __extension__ ({ \ uint8x8_t __ret; \ __ret = (uint8x8_t) __builtin_neon_vld1_dup_v(__p0, 16); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1_dup_u32(__p0) __extension__ ({ \ uint32x2_t __ret; \ __ret = (uint32x2_t) __builtin_neon_vld1_dup_v(__p0, 18); \ __ret; \ }) #else #define vld1_dup_u32(__p0) __extension__ ({ \ uint32x2_t __ret; \ __ret = (uint32x2_t) __builtin_neon_vld1_dup_v(__p0, 18); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #define vld1_dup_u64(__p0) __extension__ ({ \ uint64x1_t __ret; \ __ret = (uint64x1_t) __builtin_neon_vld1_dup_v(__p0, 19); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vld1_dup_u16(__p0) __extension__ ({ \ uint16x4_t __ret; \ __ret = (uint16x4_t) __builtin_neon_vld1_dup_v(__p0, 17); \ __ret; \ }) #else #define vld1_dup_u16(__p0) __extension__ ({ \ uint16x4_t __ret; \ __ret = (uint16x4_t) __builtin_neon_vld1_dup_v(__p0, 17); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1_dup_s8(__p0) __extension__ ({ \ int8x8_t __ret; \ __ret = (int8x8_t) __builtin_neon_vld1_dup_v(__p0, 0); \ __ret; \ }) #else #define vld1_dup_s8(__p0) __extension__ ({ \ int8x8_t __ret; \ __ret = (int8x8_t) __builtin_neon_vld1_dup_v(__p0, 0); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1_dup_f32(__p0) __extension__ ({ \ float32x2_t __ret; \ __ret = (float32x2_t) __builtin_neon_vld1_dup_v(__p0, 9); \ __ret; \ }) #else #define vld1_dup_f32(__p0) __extension__ ({ \ float32x2_t __ret; \ __ret = (float32x2_t) __builtin_neon_vld1_dup_v(__p0, 9); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1_dup_s32(__p0) __extension__ ({ \ int32x2_t __ret; \ __ret = (int32x2_t) __builtin_neon_vld1_dup_v(__p0, 2); \ __ret; \ }) #else #define vld1_dup_s32(__p0) __extension__ ({ \ int32x2_t __ret; \ __ret = (int32x2_t) __builtin_neon_vld1_dup_v(__p0, 2); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #define vld1_dup_s64(__p0) __extension__ ({ \ int64x1_t __ret; \ __ret = (int64x1_t) __builtin_neon_vld1_dup_v(__p0, 3); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vld1_dup_s16(__p0) __extension__ ({ \ int16x4_t __ret; \ __ret = (int16x4_t) __builtin_neon_vld1_dup_v(__p0, 1); \ __ret; \ }) #else #define vld1_dup_s16(__p0) __extension__ ({ \ int16x4_t __ret; \ __ret = (int16x4_t) __builtin_neon_vld1_dup_v(__p0, 1); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1_lane_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x8_t __ret; \ poly8x8_t __s1 = __p1; \ __ret = (poly8x8_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__s1, __p2, 4); \ __ret; \ }) #else #define vld1_lane_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x8_t __ret; \ poly8x8_t __s1 = __p1; \ poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (poly8x8_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__rev1, __p2, 4); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1_lane_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x4_t __ret; \ poly16x4_t __s1 = __p1; \ __ret = (poly16x4_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__s1, __p2, 5); \ __ret; \ }) #else #define vld1_lane_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x4_t __ret; \ poly16x4_t __s1 = __p1; \ poly16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ __ret = (poly16x4_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__rev1, __p2, 5); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_lane_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x16_t __ret; \ poly8x16_t __s1 = __p1; \ __ret = (poly8x16_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__s1, __p2, 36); \ __ret; \ }) #else #define vld1q_lane_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x16_t __ret; \ poly8x16_t __s1 = __p1; \ poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (poly8x16_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 36); \ __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_lane_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x8_t __ret; \ poly16x8_t __s1 = __p1; \ __ret = (poly16x8_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__s1, __p2, 37); \ __ret; \ }) #else #define vld1q_lane_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x8_t __ret; \ poly16x8_t __s1 = __p1; \ poly16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (poly16x8_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 37); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_lane_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x16_t __ret; \ uint8x16_t __s1 = __p1; \ __ret = (uint8x16_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__s1, __p2, 48); \ __ret; \ }) #else #define vld1q_lane_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x16_t __ret; \ uint8x16_t __s1 = __p1; \ uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (uint8x16_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 48); \ __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_lane_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x4_t __ret; \ uint32x4_t __s1 = __p1; \ __ret = (uint32x4_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__s1, __p2, 50); \ __ret; \ }) #else #define vld1q_lane_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x4_t __ret; \ uint32x4_t __s1 = __p1; \ uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ __ret = (uint32x4_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 50); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x2_t __ret; \ uint64x2_t __s1 = __p1; \ __ret = (uint64x2_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__s1, __p2, 51); \ __ret; \ }) #else #define vld1q_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x2_t __ret; \ uint64x2_t __s1 = __p1; \ uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ __ret = (uint64x2_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 51); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_lane_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x8_t __ret; \ uint16x8_t __s1 = __p1; \ __ret = (uint16x8_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__s1, __p2, 49); \ __ret; \ }) #else #define vld1q_lane_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x8_t __ret; \ uint16x8_t __s1 = __p1; \ uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (uint16x8_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 49); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_lane_s8(__p0, __p1, __p2) __extension__ ({ \ int8x16_t __ret; \ int8x16_t __s1 = __p1; \ __ret = (int8x16_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__s1, __p2, 32); \ __ret; \ }) #else #define vld1q_lane_s8(__p0, __p1, __p2) __extension__ ({ \ int8x16_t __ret; \ int8x16_t __s1 = __p1; \ int8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (int8x16_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 32); \ __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x4_t __ret; \ float32x4_t __s1 = __p1; \ __ret = (float32x4_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__s1, __p2, 41); \ __ret; \ }) #else #define vld1q_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x4_t __ret; \ float32x4_t __s1 = __p1; \ float32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ __ret = (float32x4_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 41); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x4_t __ret; \ int32x4_t __s1 = __p1; \ __ret = (int32x4_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__s1, __p2, 34); \ __ret; \ }) #else #define vld1q_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x4_t __ret; \ int32x4_t __s1 = __p1; \ int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ __ret = (int32x4_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 34); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x2_t __ret; \ int64x2_t __s1 = __p1; \ __ret = (int64x2_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__s1, __p2, 35); \ __ret; \ }) #else #define vld1q_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x2_t __ret; \ int64x2_t __s1 = __p1; \ int64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ __ret = (int64x2_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 35); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x8_t __ret; \ int16x8_t __s1 = __p1; \ __ret = (int16x8_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__s1, __p2, 33); \ __ret; \ }) #else #define vld1q_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x8_t __ret; \ int16x8_t __s1 = __p1; \ int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (int16x8_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 33); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1_lane_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x8_t __ret; \ uint8x8_t __s1 = __p1; \ __ret = (uint8x8_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__s1, __p2, 16); \ __ret; \ }) #else #define vld1_lane_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x8_t __ret; \ uint8x8_t __s1 = __p1; \ uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (uint8x8_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__rev1, __p2, 16); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1_lane_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x2_t __ret; \ uint32x2_t __s1 = __p1; \ __ret = (uint32x2_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__s1, __p2, 18); \ __ret; \ }) #else #define vld1_lane_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x2_t __ret; \ uint32x2_t __s1 = __p1; \ uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ __ret = (uint32x2_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__rev1, __p2, 18); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #define vld1_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x1_t __ret; \ uint64x1_t __s1 = __p1; \ __ret = (uint64x1_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__s1, __p2, 19); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vld1_lane_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x4_t __ret; \ uint16x4_t __s1 = __p1; \ __ret = (uint16x4_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__s1, __p2, 17); \ __ret; \ }) #else #define vld1_lane_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x4_t __ret; \ uint16x4_t __s1 = __p1; \ uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ __ret = (uint16x4_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__rev1, __p2, 17); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1_lane_s8(__p0, __p1, __p2) __extension__ ({ \ int8x8_t __ret; \ int8x8_t __s1 = __p1; \ __ret = (int8x8_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__s1, __p2, 0); \ __ret; \ }) #else #define vld1_lane_s8(__p0, __p1, __p2) __extension__ ({ \ int8x8_t __ret; \ int8x8_t __s1 = __p1; \ int8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (int8x8_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__rev1, __p2, 0); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x2_t __ret; \ float32x2_t __s1 = __p1; \ __ret = (float32x2_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__s1, __p2, 9); \ __ret; \ }) #else #define vld1_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x2_t __ret; \ float32x2_t __s1 = __p1; \ float32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ __ret = (float32x2_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__rev1, __p2, 9); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x2_t __ret; \ int32x2_t __s1 = __p1; \ __ret = (int32x2_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__s1, __p2, 2); \ __ret; \ }) #else #define vld1_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x2_t __ret; \ int32x2_t __s1 = __p1; \ int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ __ret = (int32x2_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__rev1, __p2, 2); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #define vld1_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x1_t __ret; \ int64x1_t __s1 = __p1; \ __ret = (int64x1_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__s1, __p2, 3); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vld1_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x4_t __ret; \ int16x4_t __s1 = __p1; \ __ret = (int16x4_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__s1, __p2, 1); \ __ret; \ }) #else #define vld1_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x4_t __ret; \ int16x4_t __s1 = __p1; \ int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ __ret = (int16x4_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__rev1, __p2, 1); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1_p8_x2(__p0) __extension__ ({ \ poly8x8x2_t __ret; \ __builtin_neon_vld1_x2_v(&__ret, __p0, 4); \ __ret; \ }) #else #define vld1_p8_x2(__p0) __extension__ ({ \ poly8x8x2_t __ret; \ __builtin_neon_vld1_x2_v(&__ret, __p0, 4); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1_p16_x2(__p0) __extension__ ({ \ poly16x4x2_t __ret; \ __builtin_neon_vld1_x2_v(&__ret, __p0, 5); \ __ret; \ }) #else #define vld1_p16_x2(__p0) __extension__ ({ \ poly16x4x2_t __ret; \ __builtin_neon_vld1_x2_v(&__ret, __p0, 5); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_p8_x2(__p0) __extension__ ({ \ poly8x16x2_t __ret; \ __builtin_neon_vld1q_x2_v(&__ret, __p0, 36); \ __ret; \ }) #else #define vld1q_p8_x2(__p0) __extension__ ({ \ poly8x16x2_t __ret; \ __builtin_neon_vld1q_x2_v(&__ret, __p0, 36); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_p16_x2(__p0) __extension__ ({ \ poly16x8x2_t __ret; \ __builtin_neon_vld1q_x2_v(&__ret, __p0, 37); \ __ret; \ }) #else #define vld1q_p16_x2(__p0) __extension__ ({ \ poly16x8x2_t __ret; \ __builtin_neon_vld1q_x2_v(&__ret, __p0, 37); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_u8_x2(__p0) __extension__ ({ \ uint8x16x2_t __ret; \ __builtin_neon_vld1q_x2_v(&__ret, __p0, 48); \ __ret; \ }) #else #define vld1q_u8_x2(__p0) __extension__ ({ \ uint8x16x2_t __ret; \ __builtin_neon_vld1q_x2_v(&__ret, __p0, 48); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_u32_x2(__p0) __extension__ ({ \ uint32x4x2_t __ret; \ __builtin_neon_vld1q_x2_v(&__ret, __p0, 50); \ __ret; \ }) #else #define vld1q_u32_x2(__p0) __extension__ ({ \ uint32x4x2_t __ret; \ __builtin_neon_vld1q_x2_v(&__ret, __p0, 50); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_u64_x2(__p0) __extension__ ({ \ uint64x2x2_t __ret; \ __builtin_neon_vld1q_x2_v(&__ret, __p0, 51); \ __ret; \ }) #else #define vld1q_u64_x2(__p0) __extension__ ({ \ uint64x2x2_t __ret; \ __builtin_neon_vld1q_x2_v(&__ret, __p0, 51); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_u16_x2(__p0) __extension__ ({ \ uint16x8x2_t __ret; \ __builtin_neon_vld1q_x2_v(&__ret, __p0, 49); \ __ret; \ }) #else #define vld1q_u16_x2(__p0) __extension__ ({ \ uint16x8x2_t __ret; \ __builtin_neon_vld1q_x2_v(&__ret, __p0, 49); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_s8_x2(__p0) __extension__ ({ \ int8x16x2_t __ret; \ __builtin_neon_vld1q_x2_v(&__ret, __p0, 32); \ __ret; \ }) #else #define vld1q_s8_x2(__p0) __extension__ ({ \ int8x16x2_t __ret; \ __builtin_neon_vld1q_x2_v(&__ret, __p0, 32); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_f32_x2(__p0) __extension__ ({ \ float32x4x2_t __ret; \ __builtin_neon_vld1q_x2_v(&__ret, __p0, 41); \ __ret; \ }) #else #define vld1q_f32_x2(__p0) __extension__ ({ \ float32x4x2_t __ret; \ __builtin_neon_vld1q_x2_v(&__ret, __p0, 41); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_s32_x2(__p0) __extension__ ({ \ int32x4x2_t __ret; \ __builtin_neon_vld1q_x2_v(&__ret, __p0, 34); \ __ret; \ }) #else #define vld1q_s32_x2(__p0) __extension__ ({ \ int32x4x2_t __ret; \ __builtin_neon_vld1q_x2_v(&__ret, __p0, 34); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_s64_x2(__p0) __extension__ ({ \ int64x2x2_t __ret; \ __builtin_neon_vld1q_x2_v(&__ret, __p0, 35); \ __ret; \ }) #else #define vld1q_s64_x2(__p0) __extension__ ({ \ int64x2x2_t __ret; \ __builtin_neon_vld1q_x2_v(&__ret, __p0, 35); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_s16_x2(__p0) __extension__ ({ \ int16x8x2_t __ret; \ __builtin_neon_vld1q_x2_v(&__ret, __p0, 33); \ __ret; \ }) #else #define vld1q_s16_x2(__p0) __extension__ ({ \ int16x8x2_t __ret; \ __builtin_neon_vld1q_x2_v(&__ret, __p0, 33); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1_u8_x2(__p0) __extension__ ({ \ uint8x8x2_t __ret; \ __builtin_neon_vld1_x2_v(&__ret, __p0, 16); \ __ret; \ }) #else #define vld1_u8_x2(__p0) __extension__ ({ \ uint8x8x2_t __ret; \ __builtin_neon_vld1_x2_v(&__ret, __p0, 16); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1_u32_x2(__p0) __extension__ ({ \ uint32x2x2_t __ret; \ __builtin_neon_vld1_x2_v(&__ret, __p0, 18); \ __ret; \ }) #else #define vld1_u32_x2(__p0) __extension__ ({ \ uint32x2x2_t __ret; \ __builtin_neon_vld1_x2_v(&__ret, __p0, 18); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret; \ }) #endif #define vld1_u64_x2(__p0) __extension__ ({ \ uint64x1x2_t __ret; \ __builtin_neon_vld1_x2_v(&__ret, __p0, 19); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vld1_u16_x2(__p0) __extension__ ({ \ uint16x4x2_t __ret; \ __builtin_neon_vld1_x2_v(&__ret, __p0, 17); \ __ret; \ }) #else #define vld1_u16_x2(__p0) __extension__ ({ \ uint16x4x2_t __ret; \ __builtin_neon_vld1_x2_v(&__ret, __p0, 17); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1_s8_x2(__p0) __extension__ ({ \ int8x8x2_t __ret; \ __builtin_neon_vld1_x2_v(&__ret, __p0, 0); \ __ret; \ }) #else #define vld1_s8_x2(__p0) __extension__ ({ \ int8x8x2_t __ret; \ __builtin_neon_vld1_x2_v(&__ret, __p0, 0); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1_f32_x2(__p0) __extension__ ({ \ float32x2x2_t __ret; \ __builtin_neon_vld1_x2_v(&__ret, __p0, 9); \ __ret; \ }) #else #define vld1_f32_x2(__p0) __extension__ ({ \ float32x2x2_t __ret; \ __builtin_neon_vld1_x2_v(&__ret, __p0, 9); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1_s32_x2(__p0) __extension__ ({ \ int32x2x2_t __ret; \ __builtin_neon_vld1_x2_v(&__ret, __p0, 2); \ __ret; \ }) #else #define vld1_s32_x2(__p0) __extension__ ({ \ int32x2x2_t __ret; \ __builtin_neon_vld1_x2_v(&__ret, __p0, 2); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret; \ }) #endif #define vld1_s64_x2(__p0) __extension__ ({ \ int64x1x2_t __ret; \ __builtin_neon_vld1_x2_v(&__ret, __p0, 3); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vld1_s16_x2(__p0) __extension__ ({ \ int16x4x2_t __ret; \ __builtin_neon_vld1_x2_v(&__ret, __p0, 1); \ __ret; \ }) #else #define vld1_s16_x2(__p0) __extension__ ({ \ int16x4x2_t __ret; \ __builtin_neon_vld1_x2_v(&__ret, __p0, 1); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1_p8_x3(__p0) __extension__ ({ \ poly8x8x3_t __ret; \ __builtin_neon_vld1_x3_v(&__ret, __p0, 4); \ __ret; \ }) #else #define vld1_p8_x3(__p0) __extension__ ({ \ poly8x8x3_t __ret; \ __builtin_neon_vld1_x3_v(&__ret, __p0, 4); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1_p16_x3(__p0) __extension__ ({ \ poly16x4x3_t __ret; \ __builtin_neon_vld1_x3_v(&__ret, __p0, 5); \ __ret; \ }) #else #define vld1_p16_x3(__p0) __extension__ ({ \ poly16x4x3_t __ret; \ __builtin_neon_vld1_x3_v(&__ret, __p0, 5); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_p8_x3(__p0) __extension__ ({ \ poly8x16x3_t __ret; \ __builtin_neon_vld1q_x3_v(&__ret, __p0, 36); \ __ret; \ }) #else #define vld1q_p8_x3(__p0) __extension__ ({ \ poly8x16x3_t __ret; \ __builtin_neon_vld1q_x3_v(&__ret, __p0, 36); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_p16_x3(__p0) __extension__ ({ \ poly16x8x3_t __ret; \ __builtin_neon_vld1q_x3_v(&__ret, __p0, 37); \ __ret; \ }) #else #define vld1q_p16_x3(__p0) __extension__ ({ \ poly16x8x3_t __ret; \ __builtin_neon_vld1q_x3_v(&__ret, __p0, 37); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_u8_x3(__p0) __extension__ ({ \ uint8x16x3_t __ret; \ __builtin_neon_vld1q_x3_v(&__ret, __p0, 48); \ __ret; \ }) #else #define vld1q_u8_x3(__p0) __extension__ ({ \ uint8x16x3_t __ret; \ __builtin_neon_vld1q_x3_v(&__ret, __p0, 48); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_u32_x3(__p0) __extension__ ({ \ uint32x4x3_t __ret; \ __builtin_neon_vld1q_x3_v(&__ret, __p0, 50); \ __ret; \ }) #else #define vld1q_u32_x3(__p0) __extension__ ({ \ uint32x4x3_t __ret; \ __builtin_neon_vld1q_x3_v(&__ret, __p0, 50); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_u64_x3(__p0) __extension__ ({ \ uint64x2x3_t __ret; \ __builtin_neon_vld1q_x3_v(&__ret, __p0, 51); \ __ret; \ }) #else #define vld1q_u64_x3(__p0) __extension__ ({ \ uint64x2x3_t __ret; \ __builtin_neon_vld1q_x3_v(&__ret, __p0, 51); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_u16_x3(__p0) __extension__ ({ \ uint16x8x3_t __ret; \ __builtin_neon_vld1q_x3_v(&__ret, __p0, 49); \ __ret; \ }) #else #define vld1q_u16_x3(__p0) __extension__ ({ \ uint16x8x3_t __ret; \ __builtin_neon_vld1q_x3_v(&__ret, __p0, 49); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_s8_x3(__p0) __extension__ ({ \ int8x16x3_t __ret; \ __builtin_neon_vld1q_x3_v(&__ret, __p0, 32); \ __ret; \ }) #else #define vld1q_s8_x3(__p0) __extension__ ({ \ int8x16x3_t __ret; \ __builtin_neon_vld1q_x3_v(&__ret, __p0, 32); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_f32_x3(__p0) __extension__ ({ \ float32x4x3_t __ret; \ __builtin_neon_vld1q_x3_v(&__ret, __p0, 41); \ __ret; \ }) #else #define vld1q_f32_x3(__p0) __extension__ ({ \ float32x4x3_t __ret; \ __builtin_neon_vld1q_x3_v(&__ret, __p0, 41); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_s32_x3(__p0) __extension__ ({ \ int32x4x3_t __ret; \ __builtin_neon_vld1q_x3_v(&__ret, __p0, 34); \ __ret; \ }) #else #define vld1q_s32_x3(__p0) __extension__ ({ \ int32x4x3_t __ret; \ __builtin_neon_vld1q_x3_v(&__ret, __p0, 34); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_s64_x3(__p0) __extension__ ({ \ int64x2x3_t __ret; \ __builtin_neon_vld1q_x3_v(&__ret, __p0, 35); \ __ret; \ }) #else #define vld1q_s64_x3(__p0) __extension__ ({ \ int64x2x3_t __ret; \ __builtin_neon_vld1q_x3_v(&__ret, __p0, 35); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_s16_x3(__p0) __extension__ ({ \ int16x8x3_t __ret; \ __builtin_neon_vld1q_x3_v(&__ret, __p0, 33); \ __ret; \ }) #else #define vld1q_s16_x3(__p0) __extension__ ({ \ int16x8x3_t __ret; \ __builtin_neon_vld1q_x3_v(&__ret, __p0, 33); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1_u8_x3(__p0) __extension__ ({ \ uint8x8x3_t __ret; \ __builtin_neon_vld1_x3_v(&__ret, __p0, 16); \ __ret; \ }) #else #define vld1_u8_x3(__p0) __extension__ ({ \ uint8x8x3_t __ret; \ __builtin_neon_vld1_x3_v(&__ret, __p0, 16); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1_u32_x3(__p0) __extension__ ({ \ uint32x2x3_t __ret; \ __builtin_neon_vld1_x3_v(&__ret, __p0, 18); \ __ret; \ }) #else #define vld1_u32_x3(__p0) __extension__ ({ \ uint32x2x3_t __ret; \ __builtin_neon_vld1_x3_v(&__ret, __p0, 18); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ __ret; \ }) #endif #define vld1_u64_x3(__p0) __extension__ ({ \ uint64x1x3_t __ret; \ __builtin_neon_vld1_x3_v(&__ret, __p0, 19); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vld1_u16_x3(__p0) __extension__ ({ \ uint16x4x3_t __ret; \ __builtin_neon_vld1_x3_v(&__ret, __p0, 17); \ __ret; \ }) #else #define vld1_u16_x3(__p0) __extension__ ({ \ uint16x4x3_t __ret; \ __builtin_neon_vld1_x3_v(&__ret, __p0, 17); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1_s8_x3(__p0) __extension__ ({ \ int8x8x3_t __ret; \ __builtin_neon_vld1_x3_v(&__ret, __p0, 0); \ __ret; \ }) #else #define vld1_s8_x3(__p0) __extension__ ({ \ int8x8x3_t __ret; \ __builtin_neon_vld1_x3_v(&__ret, __p0, 0); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1_f32_x3(__p0) __extension__ ({ \ float32x2x3_t __ret; \ __builtin_neon_vld1_x3_v(&__ret, __p0, 9); \ __ret; \ }) #else #define vld1_f32_x3(__p0) __extension__ ({ \ float32x2x3_t __ret; \ __builtin_neon_vld1_x3_v(&__ret, __p0, 9); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1_s32_x3(__p0) __extension__ ({ \ int32x2x3_t __ret; \ __builtin_neon_vld1_x3_v(&__ret, __p0, 2); \ __ret; \ }) #else #define vld1_s32_x3(__p0) __extension__ ({ \ int32x2x3_t __ret; \ __builtin_neon_vld1_x3_v(&__ret, __p0, 2); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ __ret; \ }) #endif #define vld1_s64_x3(__p0) __extension__ ({ \ int64x1x3_t __ret; \ __builtin_neon_vld1_x3_v(&__ret, __p0, 3); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vld1_s16_x3(__p0) __extension__ ({ \ int16x4x3_t __ret; \ __builtin_neon_vld1_x3_v(&__ret, __p0, 1); \ __ret; \ }) #else #define vld1_s16_x3(__p0) __extension__ ({ \ int16x4x3_t __ret; \ __builtin_neon_vld1_x3_v(&__ret, __p0, 1); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1_p8_x4(__p0) __extension__ ({ \ poly8x8x4_t __ret; \ __builtin_neon_vld1_x4_v(&__ret, __p0, 4); \ __ret; \ }) #else #define vld1_p8_x4(__p0) __extension__ ({ \ poly8x8x4_t __ret; \ __builtin_neon_vld1_x4_v(&__ret, __p0, 4); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1_p16_x4(__p0) __extension__ ({ \ poly16x4x4_t __ret; \ __builtin_neon_vld1_x4_v(&__ret, __p0, 5); \ __ret; \ }) #else #define vld1_p16_x4(__p0) __extension__ ({ \ poly16x4x4_t __ret; \ __builtin_neon_vld1_x4_v(&__ret, __p0, 5); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_p8_x4(__p0) __extension__ ({ \ poly8x16x4_t __ret; \ __builtin_neon_vld1q_x4_v(&__ret, __p0, 36); \ __ret; \ }) #else #define vld1q_p8_x4(__p0) __extension__ ({ \ poly8x16x4_t __ret; \ __builtin_neon_vld1q_x4_v(&__ret, __p0, 36); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_p16_x4(__p0) __extension__ ({ \ poly16x8x4_t __ret; \ __builtin_neon_vld1q_x4_v(&__ret, __p0, 37); \ __ret; \ }) #else #define vld1q_p16_x4(__p0) __extension__ ({ \ poly16x8x4_t __ret; \ __builtin_neon_vld1q_x4_v(&__ret, __p0, 37); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_u8_x4(__p0) __extension__ ({ \ uint8x16x4_t __ret; \ __builtin_neon_vld1q_x4_v(&__ret, __p0, 48); \ __ret; \ }) #else #define vld1q_u8_x4(__p0) __extension__ ({ \ uint8x16x4_t __ret; \ __builtin_neon_vld1q_x4_v(&__ret, __p0, 48); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_u32_x4(__p0) __extension__ ({ \ uint32x4x4_t __ret; \ __builtin_neon_vld1q_x4_v(&__ret, __p0, 50); \ __ret; \ }) #else #define vld1q_u32_x4(__p0) __extension__ ({ \ uint32x4x4_t __ret; \ __builtin_neon_vld1q_x4_v(&__ret, __p0, 50); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_u64_x4(__p0) __extension__ ({ \ uint64x2x4_t __ret; \ __builtin_neon_vld1q_x4_v(&__ret, __p0, 51); \ __ret; \ }) #else #define vld1q_u64_x4(__p0) __extension__ ({ \ uint64x2x4_t __ret; \ __builtin_neon_vld1q_x4_v(&__ret, __p0, 51); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_u16_x4(__p0) __extension__ ({ \ uint16x8x4_t __ret; \ __builtin_neon_vld1q_x4_v(&__ret, __p0, 49); \ __ret; \ }) #else #define vld1q_u16_x4(__p0) __extension__ ({ \ uint16x8x4_t __ret; \ __builtin_neon_vld1q_x4_v(&__ret, __p0, 49); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_s8_x4(__p0) __extension__ ({ \ int8x16x4_t __ret; \ __builtin_neon_vld1q_x4_v(&__ret, __p0, 32); \ __ret; \ }) #else #define vld1q_s8_x4(__p0) __extension__ ({ \ int8x16x4_t __ret; \ __builtin_neon_vld1q_x4_v(&__ret, __p0, 32); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_f32_x4(__p0) __extension__ ({ \ float32x4x4_t __ret; \ __builtin_neon_vld1q_x4_v(&__ret, __p0, 41); \ __ret; \ }) #else #define vld1q_f32_x4(__p0) __extension__ ({ \ float32x4x4_t __ret; \ __builtin_neon_vld1q_x4_v(&__ret, __p0, 41); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_s32_x4(__p0) __extension__ ({ \ int32x4x4_t __ret; \ __builtin_neon_vld1q_x4_v(&__ret, __p0, 34); \ __ret; \ }) #else #define vld1q_s32_x4(__p0) __extension__ ({ \ int32x4x4_t __ret; \ __builtin_neon_vld1q_x4_v(&__ret, __p0, 34); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_s64_x4(__p0) __extension__ ({ \ int64x2x4_t __ret; \ __builtin_neon_vld1q_x4_v(&__ret, __p0, 35); \ __ret; \ }) #else #define vld1q_s64_x4(__p0) __extension__ ({ \ int64x2x4_t __ret; \ __builtin_neon_vld1q_x4_v(&__ret, __p0, 35); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_s16_x4(__p0) __extension__ ({ \ int16x8x4_t __ret; \ __builtin_neon_vld1q_x4_v(&__ret, __p0, 33); \ __ret; \ }) #else #define vld1q_s16_x4(__p0) __extension__ ({ \ int16x8x4_t __ret; \ __builtin_neon_vld1q_x4_v(&__ret, __p0, 33); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1_u8_x4(__p0) __extension__ ({ \ uint8x8x4_t __ret; \ __builtin_neon_vld1_x4_v(&__ret, __p0, 16); \ __ret; \ }) #else #define vld1_u8_x4(__p0) __extension__ ({ \ uint8x8x4_t __ret; \ __builtin_neon_vld1_x4_v(&__ret, __p0, 16); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1_u32_x4(__p0) __extension__ ({ \ uint32x2x4_t __ret; \ __builtin_neon_vld1_x4_v(&__ret, __p0, 18); \ __ret; \ }) #else #define vld1_u32_x4(__p0) __extension__ ({ \ uint32x2x4_t __ret; \ __builtin_neon_vld1_x4_v(&__ret, __p0, 18); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ __ret; \ }) #endif #define vld1_u64_x4(__p0) __extension__ ({ \ uint64x1x4_t __ret; \ __builtin_neon_vld1_x4_v(&__ret, __p0, 19); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vld1_u16_x4(__p0) __extension__ ({ \ uint16x4x4_t __ret; \ __builtin_neon_vld1_x4_v(&__ret, __p0, 17); \ __ret; \ }) #else #define vld1_u16_x4(__p0) __extension__ ({ \ uint16x4x4_t __ret; \ __builtin_neon_vld1_x4_v(&__ret, __p0, 17); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1_s8_x4(__p0) __extension__ ({ \ int8x8x4_t __ret; \ __builtin_neon_vld1_x4_v(&__ret, __p0, 0); \ __ret; \ }) #else #define vld1_s8_x4(__p0) __extension__ ({ \ int8x8x4_t __ret; \ __builtin_neon_vld1_x4_v(&__ret, __p0, 0); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1_f32_x4(__p0) __extension__ ({ \ float32x2x4_t __ret; \ __builtin_neon_vld1_x4_v(&__ret, __p0, 9); \ __ret; \ }) #else #define vld1_f32_x4(__p0) __extension__ ({ \ float32x2x4_t __ret; \ __builtin_neon_vld1_x4_v(&__ret, __p0, 9); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1_s32_x4(__p0) __extension__ ({ \ int32x2x4_t __ret; \ __builtin_neon_vld1_x4_v(&__ret, __p0, 2); \ __ret; \ }) #else #define vld1_s32_x4(__p0) __extension__ ({ \ int32x2x4_t __ret; \ __builtin_neon_vld1_x4_v(&__ret, __p0, 2); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ __ret; \ }) #endif #define vld1_s64_x4(__p0) __extension__ ({ \ int64x1x4_t __ret; \ __builtin_neon_vld1_x4_v(&__ret, __p0, 3); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vld1_s16_x4(__p0) __extension__ ({ \ int16x4x4_t __ret; \ __builtin_neon_vld1_x4_v(&__ret, __p0, 1); \ __ret; \ }) #else #define vld1_s16_x4(__p0) __extension__ ({ \ int16x4x4_t __ret; \ __builtin_neon_vld1_x4_v(&__ret, __p0, 1); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld2_p8(__p0) __extension__ ({ \ poly8x8x2_t __ret; \ __builtin_neon_vld2_v(&__ret, __p0, 4); \ __ret; \ }) #else #define vld2_p8(__p0) __extension__ ({ \ poly8x8x2_t __ret; \ __builtin_neon_vld2_v(&__ret, __p0, 4); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld2_p16(__p0) __extension__ ({ \ poly16x4x2_t __ret; \ __builtin_neon_vld2_v(&__ret, __p0, 5); \ __ret; \ }) #else #define vld2_p16(__p0) __extension__ ({ \ poly16x4x2_t __ret; \ __builtin_neon_vld2_v(&__ret, __p0, 5); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld2q_p8(__p0) __extension__ ({ \ poly8x16x2_t __ret; \ __builtin_neon_vld2q_v(&__ret, __p0, 36); \ __ret; \ }) #else #define vld2q_p8(__p0) __extension__ ({ \ poly8x16x2_t __ret; \ __builtin_neon_vld2q_v(&__ret, __p0, 36); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld2q_p16(__p0) __extension__ ({ \ poly16x8x2_t __ret; \ __builtin_neon_vld2q_v(&__ret, __p0, 37); \ __ret; \ }) #else #define vld2q_p16(__p0) __extension__ ({ \ poly16x8x2_t __ret; \ __builtin_neon_vld2q_v(&__ret, __p0, 37); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld2q_u8(__p0) __extension__ ({ \ uint8x16x2_t __ret; \ __builtin_neon_vld2q_v(&__ret, __p0, 48); \ __ret; \ }) #else #define vld2q_u8(__p0) __extension__ ({ \ uint8x16x2_t __ret; \ __builtin_neon_vld2q_v(&__ret, __p0, 48); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld2q_u32(__p0) __extension__ ({ \ uint32x4x2_t __ret; \ __builtin_neon_vld2q_v(&__ret, __p0, 50); \ __ret; \ }) #else #define vld2q_u32(__p0) __extension__ ({ \ uint32x4x2_t __ret; \ __builtin_neon_vld2q_v(&__ret, __p0, 50); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld2q_u16(__p0) __extension__ ({ \ uint16x8x2_t __ret; \ __builtin_neon_vld2q_v(&__ret, __p0, 49); \ __ret; \ }) #else #define vld2q_u16(__p0) __extension__ ({ \ uint16x8x2_t __ret; \ __builtin_neon_vld2q_v(&__ret, __p0, 49); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld2q_s8(__p0) __extension__ ({ \ int8x16x2_t __ret; \ __builtin_neon_vld2q_v(&__ret, __p0, 32); \ __ret; \ }) #else #define vld2q_s8(__p0) __extension__ ({ \ int8x16x2_t __ret; \ __builtin_neon_vld2q_v(&__ret, __p0, 32); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld2q_f32(__p0) __extension__ ({ \ float32x4x2_t __ret; \ __builtin_neon_vld2q_v(&__ret, __p0, 41); \ __ret; \ }) #else #define vld2q_f32(__p0) __extension__ ({ \ float32x4x2_t __ret; \ __builtin_neon_vld2q_v(&__ret, __p0, 41); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld2q_s32(__p0) __extension__ ({ \ int32x4x2_t __ret; \ __builtin_neon_vld2q_v(&__ret, __p0, 34); \ __ret; \ }) #else #define vld2q_s32(__p0) __extension__ ({ \ int32x4x2_t __ret; \ __builtin_neon_vld2q_v(&__ret, __p0, 34); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld2q_s16(__p0) __extension__ ({ \ int16x8x2_t __ret; \ __builtin_neon_vld2q_v(&__ret, __p0, 33); \ __ret; \ }) #else #define vld2q_s16(__p0) __extension__ ({ \ int16x8x2_t __ret; \ __builtin_neon_vld2q_v(&__ret, __p0, 33); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld2_u8(__p0) __extension__ ({ \ uint8x8x2_t __ret; \ __builtin_neon_vld2_v(&__ret, __p0, 16); \ __ret; \ }) #else #define vld2_u8(__p0) __extension__ ({ \ uint8x8x2_t __ret; \ __builtin_neon_vld2_v(&__ret, __p0, 16); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld2_u32(__p0) __extension__ ({ \ uint32x2x2_t __ret; \ __builtin_neon_vld2_v(&__ret, __p0, 18); \ __ret; \ }) #else #define vld2_u32(__p0) __extension__ ({ \ uint32x2x2_t __ret; \ __builtin_neon_vld2_v(&__ret, __p0, 18); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret; \ }) #endif #define vld2_u64(__p0) __extension__ ({ \ uint64x1x2_t __ret; \ __builtin_neon_vld2_v(&__ret, __p0, 19); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vld2_u16(__p0) __extension__ ({ \ uint16x4x2_t __ret; \ __builtin_neon_vld2_v(&__ret, __p0, 17); \ __ret; \ }) #else #define vld2_u16(__p0) __extension__ ({ \ uint16x4x2_t __ret; \ __builtin_neon_vld2_v(&__ret, __p0, 17); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld2_s8(__p0) __extension__ ({ \ int8x8x2_t __ret; \ __builtin_neon_vld2_v(&__ret, __p0, 0); \ __ret; \ }) #else #define vld2_s8(__p0) __extension__ ({ \ int8x8x2_t __ret; \ __builtin_neon_vld2_v(&__ret, __p0, 0); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld2_f32(__p0) __extension__ ({ \ float32x2x2_t __ret; \ __builtin_neon_vld2_v(&__ret, __p0, 9); \ __ret; \ }) #else #define vld2_f32(__p0) __extension__ ({ \ float32x2x2_t __ret; \ __builtin_neon_vld2_v(&__ret, __p0, 9); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld2_s32(__p0) __extension__ ({ \ int32x2x2_t __ret; \ __builtin_neon_vld2_v(&__ret, __p0, 2); \ __ret; \ }) #else #define vld2_s32(__p0) __extension__ ({ \ int32x2x2_t __ret; \ __builtin_neon_vld2_v(&__ret, __p0, 2); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret; \ }) #endif #define vld2_s64(__p0) __extension__ ({ \ int64x1x2_t __ret; \ __builtin_neon_vld2_v(&__ret, __p0, 3); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vld2_s16(__p0) __extension__ ({ \ int16x4x2_t __ret; \ __builtin_neon_vld2_v(&__ret, __p0, 1); \ __ret; \ }) #else #define vld2_s16(__p0) __extension__ ({ \ int16x4x2_t __ret; \ __builtin_neon_vld2_v(&__ret, __p0, 1); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld2_dup_p8(__p0) __extension__ ({ \ poly8x8x2_t __ret; \ __builtin_neon_vld2_dup_v(&__ret, __p0, 4); \ __ret; \ }) #else #define vld2_dup_p8(__p0) __extension__ ({ \ poly8x8x2_t __ret; \ __builtin_neon_vld2_dup_v(&__ret, __p0, 4); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld2_dup_p16(__p0) __extension__ ({ \ poly16x4x2_t __ret; \ __builtin_neon_vld2_dup_v(&__ret, __p0, 5); \ __ret; \ }) #else #define vld2_dup_p16(__p0) __extension__ ({ \ poly16x4x2_t __ret; \ __builtin_neon_vld2_dup_v(&__ret, __p0, 5); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld2q_dup_p8(__p0) __extension__ ({ \ poly8x16x2_t __ret; \ __builtin_neon_vld2q_dup_v(&__ret, __p0, 36); \ __ret; \ }) #else #define vld2q_dup_p8(__p0) __extension__ ({ \ poly8x16x2_t __ret; \ __builtin_neon_vld2q_dup_v(&__ret, __p0, 36); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld2q_dup_p16(__p0) __extension__ ({ \ poly16x8x2_t __ret; \ __builtin_neon_vld2q_dup_v(&__ret, __p0, 37); \ __ret; \ }) #else #define vld2q_dup_p16(__p0) __extension__ ({ \ poly16x8x2_t __ret; \ __builtin_neon_vld2q_dup_v(&__ret, __p0, 37); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld2q_dup_u8(__p0) __extension__ ({ \ uint8x16x2_t __ret; \ __builtin_neon_vld2q_dup_v(&__ret, __p0, 48); \ __ret; \ }) #else #define vld2q_dup_u8(__p0) __extension__ ({ \ uint8x16x2_t __ret; \ __builtin_neon_vld2q_dup_v(&__ret, __p0, 48); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld2q_dup_u32(__p0) __extension__ ({ \ uint32x4x2_t __ret; \ __builtin_neon_vld2q_dup_v(&__ret, __p0, 50); \ __ret; \ }) #else #define vld2q_dup_u32(__p0) __extension__ ({ \ uint32x4x2_t __ret; \ __builtin_neon_vld2q_dup_v(&__ret, __p0, 50); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld2q_dup_u64(__p0) __extension__ ({ \ uint64x2x2_t __ret; \ __builtin_neon_vld2q_dup_v(&__ret, __p0, 51); \ __ret; \ }) #else #define vld2q_dup_u64(__p0) __extension__ ({ \ uint64x2x2_t __ret; \ __builtin_neon_vld2q_dup_v(&__ret, __p0, 51); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld2q_dup_u16(__p0) __extension__ ({ \ uint16x8x2_t __ret; \ __builtin_neon_vld2q_dup_v(&__ret, __p0, 49); \ __ret; \ }) #else #define vld2q_dup_u16(__p0) __extension__ ({ \ uint16x8x2_t __ret; \ __builtin_neon_vld2q_dup_v(&__ret, __p0, 49); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld2q_dup_s8(__p0) __extension__ ({ \ int8x16x2_t __ret; \ __builtin_neon_vld2q_dup_v(&__ret, __p0, 32); \ __ret; \ }) #else #define vld2q_dup_s8(__p0) __extension__ ({ \ int8x16x2_t __ret; \ __builtin_neon_vld2q_dup_v(&__ret, __p0, 32); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld2q_dup_f32(__p0) __extension__ ({ \ float32x4x2_t __ret; \ __builtin_neon_vld2q_dup_v(&__ret, __p0, 41); \ __ret; \ }) #else #define vld2q_dup_f32(__p0) __extension__ ({ \ float32x4x2_t __ret; \ __builtin_neon_vld2q_dup_v(&__ret, __p0, 41); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld2q_dup_s32(__p0) __extension__ ({ \ int32x4x2_t __ret; \ __builtin_neon_vld2q_dup_v(&__ret, __p0, 34); \ __ret; \ }) #else #define vld2q_dup_s32(__p0) __extension__ ({ \ int32x4x2_t __ret; \ __builtin_neon_vld2q_dup_v(&__ret, __p0, 34); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld2q_dup_s64(__p0) __extension__ ({ \ int64x2x2_t __ret; \ __builtin_neon_vld2q_dup_v(&__ret, __p0, 35); \ __ret; \ }) #else #define vld2q_dup_s64(__p0) __extension__ ({ \ int64x2x2_t __ret; \ __builtin_neon_vld2q_dup_v(&__ret, __p0, 35); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld2q_dup_s16(__p0) __extension__ ({ \ int16x8x2_t __ret; \ __builtin_neon_vld2q_dup_v(&__ret, __p0, 33); \ __ret; \ }) #else #define vld2q_dup_s16(__p0) __extension__ ({ \ int16x8x2_t __ret; \ __builtin_neon_vld2q_dup_v(&__ret, __p0, 33); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld2_dup_u8(__p0) __extension__ ({ \ uint8x8x2_t __ret; \ __builtin_neon_vld2_dup_v(&__ret, __p0, 16); \ __ret; \ }) #else #define vld2_dup_u8(__p0) __extension__ ({ \ uint8x8x2_t __ret; \ __builtin_neon_vld2_dup_v(&__ret, __p0, 16); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld2_dup_u32(__p0) __extension__ ({ \ uint32x2x2_t __ret; \ __builtin_neon_vld2_dup_v(&__ret, __p0, 18); \ __ret; \ }) #else #define vld2_dup_u32(__p0) __extension__ ({ \ uint32x2x2_t __ret; \ __builtin_neon_vld2_dup_v(&__ret, __p0, 18); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret; \ }) #endif #define vld2_dup_u64(__p0) __extension__ ({ \ uint64x1x2_t __ret; \ __builtin_neon_vld2_dup_v(&__ret, __p0, 19); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vld2_dup_u16(__p0) __extension__ ({ \ uint16x4x2_t __ret; \ __builtin_neon_vld2_dup_v(&__ret, __p0, 17); \ __ret; \ }) #else #define vld2_dup_u16(__p0) __extension__ ({ \ uint16x4x2_t __ret; \ __builtin_neon_vld2_dup_v(&__ret, __p0, 17); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld2_dup_s8(__p0) __extension__ ({ \ int8x8x2_t __ret; \ __builtin_neon_vld2_dup_v(&__ret, __p0, 0); \ __ret; \ }) #else #define vld2_dup_s8(__p0) __extension__ ({ \ int8x8x2_t __ret; \ __builtin_neon_vld2_dup_v(&__ret, __p0, 0); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld2_dup_f32(__p0) __extension__ ({ \ float32x2x2_t __ret; \ __builtin_neon_vld2_dup_v(&__ret, __p0, 9); \ __ret; \ }) #else #define vld2_dup_f32(__p0) __extension__ ({ \ float32x2x2_t __ret; \ __builtin_neon_vld2_dup_v(&__ret, __p0, 9); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld2_dup_s32(__p0) __extension__ ({ \ int32x2x2_t __ret; \ __builtin_neon_vld2_dup_v(&__ret, __p0, 2); \ __ret; \ }) #else #define vld2_dup_s32(__p0) __extension__ ({ \ int32x2x2_t __ret; \ __builtin_neon_vld2_dup_v(&__ret, __p0, 2); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret; \ }) #endif #define vld2_dup_s64(__p0) __extension__ ({ \ int64x1x2_t __ret; \ __builtin_neon_vld2_dup_v(&__ret, __p0, 3); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vld2_dup_s16(__p0) __extension__ ({ \ int16x4x2_t __ret; \ __builtin_neon_vld2_dup_v(&__ret, __p0, 1); \ __ret; \ }) #else #define vld2_dup_s16(__p0) __extension__ ({ \ int16x4x2_t __ret; \ __builtin_neon_vld2_dup_v(&__ret, __p0, 1); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld2_lane_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x8x2_t __ret; \ poly8x8x2_t __s1 = __p1; \ __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 4); \ __ret; \ }) #else #define vld2_lane_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x8x2_t __ret; \ poly8x8x2_t __s1 = __p1; \ poly8x8x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], __p2, 4); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld2_lane_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x4x2_t __ret; \ poly16x4x2_t __s1 = __p1; \ __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 5); \ __ret; \ }) #else #define vld2_lane_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x4x2_t __ret; \ poly16x4x2_t __s1 = __p1; \ poly16x4x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], __p2, 5); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld2q_lane_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x8x2_t __ret; \ poly16x8x2_t __s1 = __p1; \ __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 37); \ __ret; \ }) #else #define vld2q_lane_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x8x2_t __ret; \ poly16x8x2_t __s1 = __p1; \ poly16x8x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 37); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld2q_lane_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x4x2_t __ret; \ uint32x4x2_t __s1 = __p1; \ __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 50); \ __ret; \ }) #else #define vld2q_lane_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x4x2_t __ret; \ uint32x4x2_t __s1 = __p1; \ uint32x4x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 50); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld2q_lane_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x8x2_t __ret; \ uint16x8x2_t __s1 = __p1; \ __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 49); \ __ret; \ }) #else #define vld2q_lane_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x8x2_t __ret; \ uint16x8x2_t __s1 = __p1; \ uint16x8x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 49); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld2q_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x4x2_t __ret; \ float32x4x2_t __s1 = __p1; \ __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 41); \ __ret; \ }) #else #define vld2q_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x4x2_t __ret; \ float32x4x2_t __s1 = __p1; \ float32x4x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 41); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld2q_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x4x2_t __ret; \ int32x4x2_t __s1 = __p1; \ __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 34); \ __ret; \ }) #else #define vld2q_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x4x2_t __ret; \ int32x4x2_t __s1 = __p1; \ int32x4x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 34); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld2q_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x8x2_t __ret; \ int16x8x2_t __s1 = __p1; \ __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 33); \ __ret; \ }) #else #define vld2q_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x8x2_t __ret; \ int16x8x2_t __s1 = __p1; \ int16x8x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 33); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld2_lane_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x8x2_t __ret; \ uint8x8x2_t __s1 = __p1; \ __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 16); \ __ret; \ }) #else #define vld2_lane_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x8x2_t __ret; \ uint8x8x2_t __s1 = __p1; \ uint8x8x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], __p2, 16); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld2_lane_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x2x2_t __ret; \ uint32x2x2_t __s1 = __p1; \ __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 18); \ __ret; \ }) #else #define vld2_lane_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x2x2_t __ret; \ uint32x2x2_t __s1 = __p1; \ uint32x2x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], __p2, 18); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld2_lane_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x4x2_t __ret; \ uint16x4x2_t __s1 = __p1; \ __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 17); \ __ret; \ }) #else #define vld2_lane_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x4x2_t __ret; \ uint16x4x2_t __s1 = __p1; \ uint16x4x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], __p2, 17); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld2_lane_s8(__p0, __p1, __p2) __extension__ ({ \ int8x8x2_t __ret; \ int8x8x2_t __s1 = __p1; \ __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 0); \ __ret; \ }) #else #define vld2_lane_s8(__p0, __p1, __p2) __extension__ ({ \ int8x8x2_t __ret; \ int8x8x2_t __s1 = __p1; \ int8x8x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], __p2, 0); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld2_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x2x2_t __ret; \ float32x2x2_t __s1 = __p1; \ __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 9); \ __ret; \ }) #else #define vld2_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x2x2_t __ret; \ float32x2x2_t __s1 = __p1; \ float32x2x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], __p2, 9); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld2_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x2x2_t __ret; \ int32x2x2_t __s1 = __p1; \ __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 2); \ __ret; \ }) #else #define vld2_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x2x2_t __ret; \ int32x2x2_t __s1 = __p1; \ int32x2x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], __p2, 2); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld2_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x4x2_t __ret; \ int16x4x2_t __s1 = __p1; \ __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 1); \ __ret; \ }) #else #define vld2_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x4x2_t __ret; \ int16x4x2_t __s1 = __p1; \ int16x4x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], __p2, 1); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld3_p8(__p0) __extension__ ({ \ poly8x8x3_t __ret; \ __builtin_neon_vld3_v(&__ret, __p0, 4); \ __ret; \ }) #else #define vld3_p8(__p0) __extension__ ({ \ poly8x8x3_t __ret; \ __builtin_neon_vld3_v(&__ret, __p0, 4); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld3_p16(__p0) __extension__ ({ \ poly16x4x3_t __ret; \ __builtin_neon_vld3_v(&__ret, __p0, 5); \ __ret; \ }) #else #define vld3_p16(__p0) __extension__ ({ \ poly16x4x3_t __ret; \ __builtin_neon_vld3_v(&__ret, __p0, 5); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld3q_p8(__p0) __extension__ ({ \ poly8x16x3_t __ret; \ __builtin_neon_vld3q_v(&__ret, __p0, 36); \ __ret; \ }) #else #define vld3q_p8(__p0) __extension__ ({ \ poly8x16x3_t __ret; \ __builtin_neon_vld3q_v(&__ret, __p0, 36); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld3q_p16(__p0) __extension__ ({ \ poly16x8x3_t __ret; \ __builtin_neon_vld3q_v(&__ret, __p0, 37); \ __ret; \ }) #else #define vld3q_p16(__p0) __extension__ ({ \ poly16x8x3_t __ret; \ __builtin_neon_vld3q_v(&__ret, __p0, 37); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld3q_u8(__p0) __extension__ ({ \ uint8x16x3_t __ret; \ __builtin_neon_vld3q_v(&__ret, __p0, 48); \ __ret; \ }) #else #define vld3q_u8(__p0) __extension__ ({ \ uint8x16x3_t __ret; \ __builtin_neon_vld3q_v(&__ret, __p0, 48); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld3q_u32(__p0) __extension__ ({ \ uint32x4x3_t __ret; \ __builtin_neon_vld3q_v(&__ret, __p0, 50); \ __ret; \ }) #else #define vld3q_u32(__p0) __extension__ ({ \ uint32x4x3_t __ret; \ __builtin_neon_vld3q_v(&__ret, __p0, 50); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld3q_u16(__p0) __extension__ ({ \ uint16x8x3_t __ret; \ __builtin_neon_vld3q_v(&__ret, __p0, 49); \ __ret; \ }) #else #define vld3q_u16(__p0) __extension__ ({ \ uint16x8x3_t __ret; \ __builtin_neon_vld3q_v(&__ret, __p0, 49); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld3q_s8(__p0) __extension__ ({ \ int8x16x3_t __ret; \ __builtin_neon_vld3q_v(&__ret, __p0, 32); \ __ret; \ }) #else #define vld3q_s8(__p0) __extension__ ({ \ int8x16x3_t __ret; \ __builtin_neon_vld3q_v(&__ret, __p0, 32); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld3q_f32(__p0) __extension__ ({ \ float32x4x3_t __ret; \ __builtin_neon_vld3q_v(&__ret, __p0, 41); \ __ret; \ }) #else #define vld3q_f32(__p0) __extension__ ({ \ float32x4x3_t __ret; \ __builtin_neon_vld3q_v(&__ret, __p0, 41); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld3q_s32(__p0) __extension__ ({ \ int32x4x3_t __ret; \ __builtin_neon_vld3q_v(&__ret, __p0, 34); \ __ret; \ }) #else #define vld3q_s32(__p0) __extension__ ({ \ int32x4x3_t __ret; \ __builtin_neon_vld3q_v(&__ret, __p0, 34); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld3q_s16(__p0) __extension__ ({ \ int16x8x3_t __ret; \ __builtin_neon_vld3q_v(&__ret, __p0, 33); \ __ret; \ }) #else #define vld3q_s16(__p0) __extension__ ({ \ int16x8x3_t __ret; \ __builtin_neon_vld3q_v(&__ret, __p0, 33); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld3_u8(__p0) __extension__ ({ \ uint8x8x3_t __ret; \ __builtin_neon_vld3_v(&__ret, __p0, 16); \ __ret; \ }) #else #define vld3_u8(__p0) __extension__ ({ \ uint8x8x3_t __ret; \ __builtin_neon_vld3_v(&__ret, __p0, 16); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld3_u32(__p0) __extension__ ({ \ uint32x2x3_t __ret; \ __builtin_neon_vld3_v(&__ret, __p0, 18); \ __ret; \ }) #else #define vld3_u32(__p0) __extension__ ({ \ uint32x2x3_t __ret; \ __builtin_neon_vld3_v(&__ret, __p0, 18); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ __ret; \ }) #endif #define vld3_u64(__p0) __extension__ ({ \ uint64x1x3_t __ret; \ __builtin_neon_vld3_v(&__ret, __p0, 19); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vld3_u16(__p0) __extension__ ({ \ uint16x4x3_t __ret; \ __builtin_neon_vld3_v(&__ret, __p0, 17); \ __ret; \ }) #else #define vld3_u16(__p0) __extension__ ({ \ uint16x4x3_t __ret; \ __builtin_neon_vld3_v(&__ret, __p0, 17); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld3_s8(__p0) __extension__ ({ \ int8x8x3_t __ret; \ __builtin_neon_vld3_v(&__ret, __p0, 0); \ __ret; \ }) #else #define vld3_s8(__p0) __extension__ ({ \ int8x8x3_t __ret; \ __builtin_neon_vld3_v(&__ret, __p0, 0); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld3_f32(__p0) __extension__ ({ \ float32x2x3_t __ret; \ __builtin_neon_vld3_v(&__ret, __p0, 9); \ __ret; \ }) #else #define vld3_f32(__p0) __extension__ ({ \ float32x2x3_t __ret; \ __builtin_neon_vld3_v(&__ret, __p0, 9); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld3_s32(__p0) __extension__ ({ \ int32x2x3_t __ret; \ __builtin_neon_vld3_v(&__ret, __p0, 2); \ __ret; \ }) #else #define vld3_s32(__p0) __extension__ ({ \ int32x2x3_t __ret; \ __builtin_neon_vld3_v(&__ret, __p0, 2); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ __ret; \ }) #endif #define vld3_s64(__p0) __extension__ ({ \ int64x1x3_t __ret; \ __builtin_neon_vld3_v(&__ret, __p0, 3); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vld3_s16(__p0) __extension__ ({ \ int16x4x3_t __ret; \ __builtin_neon_vld3_v(&__ret, __p0, 1); \ __ret; \ }) #else #define vld3_s16(__p0) __extension__ ({ \ int16x4x3_t __ret; \ __builtin_neon_vld3_v(&__ret, __p0, 1); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld3_dup_p8(__p0) __extension__ ({ \ poly8x8x3_t __ret; \ __builtin_neon_vld3_dup_v(&__ret, __p0, 4); \ __ret; \ }) #else #define vld3_dup_p8(__p0) __extension__ ({ \ poly8x8x3_t __ret; \ __builtin_neon_vld3_dup_v(&__ret, __p0, 4); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld3_dup_p16(__p0) __extension__ ({ \ poly16x4x3_t __ret; \ __builtin_neon_vld3_dup_v(&__ret, __p0, 5); \ __ret; \ }) #else #define vld3_dup_p16(__p0) __extension__ ({ \ poly16x4x3_t __ret; \ __builtin_neon_vld3_dup_v(&__ret, __p0, 5); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld3q_dup_p8(__p0) __extension__ ({ \ poly8x16x3_t __ret; \ __builtin_neon_vld3q_dup_v(&__ret, __p0, 36); \ __ret; \ }) #else #define vld3q_dup_p8(__p0) __extension__ ({ \ poly8x16x3_t __ret; \ __builtin_neon_vld3q_dup_v(&__ret, __p0, 36); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld3q_dup_p16(__p0) __extension__ ({ \ poly16x8x3_t __ret; \ __builtin_neon_vld3q_dup_v(&__ret, __p0, 37); \ __ret; \ }) #else #define vld3q_dup_p16(__p0) __extension__ ({ \ poly16x8x3_t __ret; \ __builtin_neon_vld3q_dup_v(&__ret, __p0, 37); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld3q_dup_u8(__p0) __extension__ ({ \ uint8x16x3_t __ret; \ __builtin_neon_vld3q_dup_v(&__ret, __p0, 48); \ __ret; \ }) #else #define vld3q_dup_u8(__p0) __extension__ ({ \ uint8x16x3_t __ret; \ __builtin_neon_vld3q_dup_v(&__ret, __p0, 48); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld3q_dup_u32(__p0) __extension__ ({ \ uint32x4x3_t __ret; \ __builtin_neon_vld3q_dup_v(&__ret, __p0, 50); \ __ret; \ }) #else #define vld3q_dup_u32(__p0) __extension__ ({ \ uint32x4x3_t __ret; \ __builtin_neon_vld3q_dup_v(&__ret, __p0, 50); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld3q_dup_u64(__p0) __extension__ ({ \ uint64x2x3_t __ret; \ __builtin_neon_vld3q_dup_v(&__ret, __p0, 51); \ __ret; \ }) #else #define vld3q_dup_u64(__p0) __extension__ ({ \ uint64x2x3_t __ret; \ __builtin_neon_vld3q_dup_v(&__ret, __p0, 51); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld3q_dup_u16(__p0) __extension__ ({ \ uint16x8x3_t __ret; \ __builtin_neon_vld3q_dup_v(&__ret, __p0, 49); \ __ret; \ }) #else #define vld3q_dup_u16(__p0) __extension__ ({ \ uint16x8x3_t __ret; \ __builtin_neon_vld3q_dup_v(&__ret, __p0, 49); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld3q_dup_s8(__p0) __extension__ ({ \ int8x16x3_t __ret; \ __builtin_neon_vld3q_dup_v(&__ret, __p0, 32); \ __ret; \ }) #else #define vld3q_dup_s8(__p0) __extension__ ({ \ int8x16x3_t __ret; \ __builtin_neon_vld3q_dup_v(&__ret, __p0, 32); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld3q_dup_f32(__p0) __extension__ ({ \ float32x4x3_t __ret; \ __builtin_neon_vld3q_dup_v(&__ret, __p0, 41); \ __ret; \ }) #else #define vld3q_dup_f32(__p0) __extension__ ({ \ float32x4x3_t __ret; \ __builtin_neon_vld3q_dup_v(&__ret, __p0, 41); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld3q_dup_s32(__p0) __extension__ ({ \ int32x4x3_t __ret; \ __builtin_neon_vld3q_dup_v(&__ret, __p0, 34); \ __ret; \ }) #else #define vld3q_dup_s32(__p0) __extension__ ({ \ int32x4x3_t __ret; \ __builtin_neon_vld3q_dup_v(&__ret, __p0, 34); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld3q_dup_s64(__p0) __extension__ ({ \ int64x2x3_t __ret; \ __builtin_neon_vld3q_dup_v(&__ret, __p0, 35); \ __ret; \ }) #else #define vld3q_dup_s64(__p0) __extension__ ({ \ int64x2x3_t __ret; \ __builtin_neon_vld3q_dup_v(&__ret, __p0, 35); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld3q_dup_s16(__p0) __extension__ ({ \ int16x8x3_t __ret; \ __builtin_neon_vld3q_dup_v(&__ret, __p0, 33); \ __ret; \ }) #else #define vld3q_dup_s16(__p0) __extension__ ({ \ int16x8x3_t __ret; \ __builtin_neon_vld3q_dup_v(&__ret, __p0, 33); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld3_dup_u8(__p0) __extension__ ({ \ uint8x8x3_t __ret; \ __builtin_neon_vld3_dup_v(&__ret, __p0, 16); \ __ret; \ }) #else #define vld3_dup_u8(__p0) __extension__ ({ \ uint8x8x3_t __ret; \ __builtin_neon_vld3_dup_v(&__ret, __p0, 16); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld3_dup_u32(__p0) __extension__ ({ \ uint32x2x3_t __ret; \ __builtin_neon_vld3_dup_v(&__ret, __p0, 18); \ __ret; \ }) #else #define vld3_dup_u32(__p0) __extension__ ({ \ uint32x2x3_t __ret; \ __builtin_neon_vld3_dup_v(&__ret, __p0, 18); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ __ret; \ }) #endif #define vld3_dup_u64(__p0) __extension__ ({ \ uint64x1x3_t __ret; \ __builtin_neon_vld3_dup_v(&__ret, __p0, 19); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vld3_dup_u16(__p0) __extension__ ({ \ uint16x4x3_t __ret; \ __builtin_neon_vld3_dup_v(&__ret, __p0, 17); \ __ret; \ }) #else #define vld3_dup_u16(__p0) __extension__ ({ \ uint16x4x3_t __ret; \ __builtin_neon_vld3_dup_v(&__ret, __p0, 17); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld3_dup_s8(__p0) __extension__ ({ \ int8x8x3_t __ret; \ __builtin_neon_vld3_dup_v(&__ret, __p0, 0); \ __ret; \ }) #else #define vld3_dup_s8(__p0) __extension__ ({ \ int8x8x3_t __ret; \ __builtin_neon_vld3_dup_v(&__ret, __p0, 0); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld3_dup_f32(__p0) __extension__ ({ \ float32x2x3_t __ret; \ __builtin_neon_vld3_dup_v(&__ret, __p0, 9); \ __ret; \ }) #else #define vld3_dup_f32(__p0) __extension__ ({ \ float32x2x3_t __ret; \ __builtin_neon_vld3_dup_v(&__ret, __p0, 9); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld3_dup_s32(__p0) __extension__ ({ \ int32x2x3_t __ret; \ __builtin_neon_vld3_dup_v(&__ret, __p0, 2); \ __ret; \ }) #else #define vld3_dup_s32(__p0) __extension__ ({ \ int32x2x3_t __ret; \ __builtin_neon_vld3_dup_v(&__ret, __p0, 2); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ __ret; \ }) #endif #define vld3_dup_s64(__p0) __extension__ ({ \ int64x1x3_t __ret; \ __builtin_neon_vld3_dup_v(&__ret, __p0, 3); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vld3_dup_s16(__p0) __extension__ ({ \ int16x4x3_t __ret; \ __builtin_neon_vld3_dup_v(&__ret, __p0, 1); \ __ret; \ }) #else #define vld3_dup_s16(__p0) __extension__ ({ \ int16x4x3_t __ret; \ __builtin_neon_vld3_dup_v(&__ret, __p0, 1); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld3_lane_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x8x3_t __ret; \ poly8x8x3_t __s1 = __p1; \ __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 4); \ __ret; \ }) #else #define vld3_lane_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x8x3_t __ret; \ poly8x8x3_t __s1 = __p1; \ poly8x8x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], __p2, 4); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld3_lane_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x4x3_t __ret; \ poly16x4x3_t __s1 = __p1; \ __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 5); \ __ret; \ }) #else #define vld3_lane_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x4x3_t __ret; \ poly16x4x3_t __s1 = __p1; \ poly16x4x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], __p2, 5); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld3q_lane_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x8x3_t __ret; \ poly16x8x3_t __s1 = __p1; \ __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 37); \ __ret; \ }) #else #define vld3q_lane_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x8x3_t __ret; \ poly16x8x3_t __s1 = __p1; \ poly16x8x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 37); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld3q_lane_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x4x3_t __ret; \ uint32x4x3_t __s1 = __p1; \ __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 50); \ __ret; \ }) #else #define vld3q_lane_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x4x3_t __ret; \ uint32x4x3_t __s1 = __p1; \ uint32x4x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 50); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld3q_lane_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x8x3_t __ret; \ uint16x8x3_t __s1 = __p1; \ __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 49); \ __ret; \ }) #else #define vld3q_lane_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x8x3_t __ret; \ uint16x8x3_t __s1 = __p1; \ uint16x8x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 49); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld3q_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x4x3_t __ret; \ float32x4x3_t __s1 = __p1; \ __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 41); \ __ret; \ }) #else #define vld3q_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x4x3_t __ret; \ float32x4x3_t __s1 = __p1; \ float32x4x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 41); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld3q_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x4x3_t __ret; \ int32x4x3_t __s1 = __p1; \ __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 34); \ __ret; \ }) #else #define vld3q_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x4x3_t __ret; \ int32x4x3_t __s1 = __p1; \ int32x4x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 34); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld3q_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x8x3_t __ret; \ int16x8x3_t __s1 = __p1; \ __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 33); \ __ret; \ }) #else #define vld3q_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x8x3_t __ret; \ int16x8x3_t __s1 = __p1; \ int16x8x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 33); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld3_lane_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x8x3_t __ret; \ uint8x8x3_t __s1 = __p1; \ __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 16); \ __ret; \ }) #else #define vld3_lane_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x8x3_t __ret; \ uint8x8x3_t __s1 = __p1; \ uint8x8x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], __p2, 16); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld3_lane_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x2x3_t __ret; \ uint32x2x3_t __s1 = __p1; \ __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 18); \ __ret; \ }) #else #define vld3_lane_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x2x3_t __ret; \ uint32x2x3_t __s1 = __p1; \ uint32x2x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], __p2, 18); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld3_lane_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x4x3_t __ret; \ uint16x4x3_t __s1 = __p1; \ __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 17); \ __ret; \ }) #else #define vld3_lane_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x4x3_t __ret; \ uint16x4x3_t __s1 = __p1; \ uint16x4x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], __p2, 17); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld3_lane_s8(__p0, __p1, __p2) __extension__ ({ \ int8x8x3_t __ret; \ int8x8x3_t __s1 = __p1; \ __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 0); \ __ret; \ }) #else #define vld3_lane_s8(__p0, __p1, __p2) __extension__ ({ \ int8x8x3_t __ret; \ int8x8x3_t __s1 = __p1; \ int8x8x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], __p2, 0); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld3_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x2x3_t __ret; \ float32x2x3_t __s1 = __p1; \ __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 9); \ __ret; \ }) #else #define vld3_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x2x3_t __ret; \ float32x2x3_t __s1 = __p1; \ float32x2x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], __p2, 9); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld3_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x2x3_t __ret; \ int32x2x3_t __s1 = __p1; \ __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 2); \ __ret; \ }) #else #define vld3_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x2x3_t __ret; \ int32x2x3_t __s1 = __p1; \ int32x2x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], __p2, 2); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld3_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x4x3_t __ret; \ int16x4x3_t __s1 = __p1; \ __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 1); \ __ret; \ }) #else #define vld3_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x4x3_t __ret; \ int16x4x3_t __s1 = __p1; \ int16x4x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], __p2, 1); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld4_p8(__p0) __extension__ ({ \ poly8x8x4_t __ret; \ __builtin_neon_vld4_v(&__ret, __p0, 4); \ __ret; \ }) #else #define vld4_p8(__p0) __extension__ ({ \ poly8x8x4_t __ret; \ __builtin_neon_vld4_v(&__ret, __p0, 4); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld4_p16(__p0) __extension__ ({ \ poly16x4x4_t __ret; \ __builtin_neon_vld4_v(&__ret, __p0, 5); \ __ret; \ }) #else #define vld4_p16(__p0) __extension__ ({ \ poly16x4x4_t __ret; \ __builtin_neon_vld4_v(&__ret, __p0, 5); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld4q_p8(__p0) __extension__ ({ \ poly8x16x4_t __ret; \ __builtin_neon_vld4q_v(&__ret, __p0, 36); \ __ret; \ }) #else #define vld4q_p8(__p0) __extension__ ({ \ poly8x16x4_t __ret; \ __builtin_neon_vld4q_v(&__ret, __p0, 36); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld4q_p16(__p0) __extension__ ({ \ poly16x8x4_t __ret; \ __builtin_neon_vld4q_v(&__ret, __p0, 37); \ __ret; \ }) #else #define vld4q_p16(__p0) __extension__ ({ \ poly16x8x4_t __ret; \ __builtin_neon_vld4q_v(&__ret, __p0, 37); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld4q_u8(__p0) __extension__ ({ \ uint8x16x4_t __ret; \ __builtin_neon_vld4q_v(&__ret, __p0, 48); \ __ret; \ }) #else #define vld4q_u8(__p0) __extension__ ({ \ uint8x16x4_t __ret; \ __builtin_neon_vld4q_v(&__ret, __p0, 48); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld4q_u32(__p0) __extension__ ({ \ uint32x4x4_t __ret; \ __builtin_neon_vld4q_v(&__ret, __p0, 50); \ __ret; \ }) #else #define vld4q_u32(__p0) __extension__ ({ \ uint32x4x4_t __ret; \ __builtin_neon_vld4q_v(&__ret, __p0, 50); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld4q_u16(__p0) __extension__ ({ \ uint16x8x4_t __ret; \ __builtin_neon_vld4q_v(&__ret, __p0, 49); \ __ret; \ }) #else #define vld4q_u16(__p0) __extension__ ({ \ uint16x8x4_t __ret; \ __builtin_neon_vld4q_v(&__ret, __p0, 49); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld4q_s8(__p0) __extension__ ({ \ int8x16x4_t __ret; \ __builtin_neon_vld4q_v(&__ret, __p0, 32); \ __ret; \ }) #else #define vld4q_s8(__p0) __extension__ ({ \ int8x16x4_t __ret; \ __builtin_neon_vld4q_v(&__ret, __p0, 32); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld4q_f32(__p0) __extension__ ({ \ float32x4x4_t __ret; \ __builtin_neon_vld4q_v(&__ret, __p0, 41); \ __ret; \ }) #else #define vld4q_f32(__p0) __extension__ ({ \ float32x4x4_t __ret; \ __builtin_neon_vld4q_v(&__ret, __p0, 41); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld4q_s32(__p0) __extension__ ({ \ int32x4x4_t __ret; \ __builtin_neon_vld4q_v(&__ret, __p0, 34); \ __ret; \ }) #else #define vld4q_s32(__p0) __extension__ ({ \ int32x4x4_t __ret; \ __builtin_neon_vld4q_v(&__ret, __p0, 34); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld4q_s16(__p0) __extension__ ({ \ int16x8x4_t __ret; \ __builtin_neon_vld4q_v(&__ret, __p0, 33); \ __ret; \ }) #else #define vld4q_s16(__p0) __extension__ ({ \ int16x8x4_t __ret; \ __builtin_neon_vld4q_v(&__ret, __p0, 33); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld4_u8(__p0) __extension__ ({ \ uint8x8x4_t __ret; \ __builtin_neon_vld4_v(&__ret, __p0, 16); \ __ret; \ }) #else #define vld4_u8(__p0) __extension__ ({ \ uint8x8x4_t __ret; \ __builtin_neon_vld4_v(&__ret, __p0, 16); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld4_u32(__p0) __extension__ ({ \ uint32x2x4_t __ret; \ __builtin_neon_vld4_v(&__ret, __p0, 18); \ __ret; \ }) #else #define vld4_u32(__p0) __extension__ ({ \ uint32x2x4_t __ret; \ __builtin_neon_vld4_v(&__ret, __p0, 18); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ __ret; \ }) #endif #define vld4_u64(__p0) __extension__ ({ \ uint64x1x4_t __ret; \ __builtin_neon_vld4_v(&__ret, __p0, 19); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vld4_u16(__p0) __extension__ ({ \ uint16x4x4_t __ret; \ __builtin_neon_vld4_v(&__ret, __p0, 17); \ __ret; \ }) #else #define vld4_u16(__p0) __extension__ ({ \ uint16x4x4_t __ret; \ __builtin_neon_vld4_v(&__ret, __p0, 17); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld4_s8(__p0) __extension__ ({ \ int8x8x4_t __ret; \ __builtin_neon_vld4_v(&__ret, __p0, 0); \ __ret; \ }) #else #define vld4_s8(__p0) __extension__ ({ \ int8x8x4_t __ret; \ __builtin_neon_vld4_v(&__ret, __p0, 0); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld4_f32(__p0) __extension__ ({ \ float32x2x4_t __ret; \ __builtin_neon_vld4_v(&__ret, __p0, 9); \ __ret; \ }) #else #define vld4_f32(__p0) __extension__ ({ \ float32x2x4_t __ret; \ __builtin_neon_vld4_v(&__ret, __p0, 9); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld4_s32(__p0) __extension__ ({ \ int32x2x4_t __ret; \ __builtin_neon_vld4_v(&__ret, __p0, 2); \ __ret; \ }) #else #define vld4_s32(__p0) __extension__ ({ \ int32x2x4_t __ret; \ __builtin_neon_vld4_v(&__ret, __p0, 2); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ __ret; \ }) #endif #define vld4_s64(__p0) __extension__ ({ \ int64x1x4_t __ret; \ __builtin_neon_vld4_v(&__ret, __p0, 3); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vld4_s16(__p0) __extension__ ({ \ int16x4x4_t __ret; \ __builtin_neon_vld4_v(&__ret, __p0, 1); \ __ret; \ }) #else #define vld4_s16(__p0) __extension__ ({ \ int16x4x4_t __ret; \ __builtin_neon_vld4_v(&__ret, __p0, 1); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld4_dup_p8(__p0) __extension__ ({ \ poly8x8x4_t __ret; \ __builtin_neon_vld4_dup_v(&__ret, __p0, 4); \ __ret; \ }) #else #define vld4_dup_p8(__p0) __extension__ ({ \ poly8x8x4_t __ret; \ __builtin_neon_vld4_dup_v(&__ret, __p0, 4); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld4_dup_p16(__p0) __extension__ ({ \ poly16x4x4_t __ret; \ __builtin_neon_vld4_dup_v(&__ret, __p0, 5); \ __ret; \ }) #else #define vld4_dup_p16(__p0) __extension__ ({ \ poly16x4x4_t __ret; \ __builtin_neon_vld4_dup_v(&__ret, __p0, 5); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld4q_dup_p8(__p0) __extension__ ({ \ poly8x16x4_t __ret; \ __builtin_neon_vld4q_dup_v(&__ret, __p0, 36); \ __ret; \ }) #else #define vld4q_dup_p8(__p0) __extension__ ({ \ poly8x16x4_t __ret; \ __builtin_neon_vld4q_dup_v(&__ret, __p0, 36); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld4q_dup_p16(__p0) __extension__ ({ \ poly16x8x4_t __ret; \ __builtin_neon_vld4q_dup_v(&__ret, __p0, 37); \ __ret; \ }) #else #define vld4q_dup_p16(__p0) __extension__ ({ \ poly16x8x4_t __ret; \ __builtin_neon_vld4q_dup_v(&__ret, __p0, 37); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld4q_dup_u8(__p0) __extension__ ({ \ uint8x16x4_t __ret; \ __builtin_neon_vld4q_dup_v(&__ret, __p0, 48); \ __ret; \ }) #else #define vld4q_dup_u8(__p0) __extension__ ({ \ uint8x16x4_t __ret; \ __builtin_neon_vld4q_dup_v(&__ret, __p0, 48); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld4q_dup_u32(__p0) __extension__ ({ \ uint32x4x4_t __ret; \ __builtin_neon_vld4q_dup_v(&__ret, __p0, 50); \ __ret; \ }) #else #define vld4q_dup_u32(__p0) __extension__ ({ \ uint32x4x4_t __ret; \ __builtin_neon_vld4q_dup_v(&__ret, __p0, 50); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld4q_dup_u64(__p0) __extension__ ({ \ uint64x2x4_t __ret; \ __builtin_neon_vld4q_dup_v(&__ret, __p0, 51); \ __ret; \ }) #else #define vld4q_dup_u64(__p0) __extension__ ({ \ uint64x2x4_t __ret; \ __builtin_neon_vld4q_dup_v(&__ret, __p0, 51); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld4q_dup_u16(__p0) __extension__ ({ \ uint16x8x4_t __ret; \ __builtin_neon_vld4q_dup_v(&__ret, __p0, 49); \ __ret; \ }) #else #define vld4q_dup_u16(__p0) __extension__ ({ \ uint16x8x4_t __ret; \ __builtin_neon_vld4q_dup_v(&__ret, __p0, 49); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld4q_dup_s8(__p0) __extension__ ({ \ int8x16x4_t __ret; \ __builtin_neon_vld4q_dup_v(&__ret, __p0, 32); \ __ret; \ }) #else #define vld4q_dup_s8(__p0) __extension__ ({ \ int8x16x4_t __ret; \ __builtin_neon_vld4q_dup_v(&__ret, __p0, 32); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld4q_dup_f32(__p0) __extension__ ({ \ float32x4x4_t __ret; \ __builtin_neon_vld4q_dup_v(&__ret, __p0, 41); \ __ret; \ }) #else #define vld4q_dup_f32(__p0) __extension__ ({ \ float32x4x4_t __ret; \ __builtin_neon_vld4q_dup_v(&__ret, __p0, 41); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld4q_dup_s32(__p0) __extension__ ({ \ int32x4x4_t __ret; \ __builtin_neon_vld4q_dup_v(&__ret, __p0, 34); \ __ret; \ }) #else #define vld4q_dup_s32(__p0) __extension__ ({ \ int32x4x4_t __ret; \ __builtin_neon_vld4q_dup_v(&__ret, __p0, 34); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld4q_dup_s64(__p0) __extension__ ({ \ int64x2x4_t __ret; \ __builtin_neon_vld4q_dup_v(&__ret, __p0, 35); \ __ret; \ }) #else #define vld4q_dup_s64(__p0) __extension__ ({ \ int64x2x4_t __ret; \ __builtin_neon_vld4q_dup_v(&__ret, __p0, 35); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld4q_dup_s16(__p0) __extension__ ({ \ int16x8x4_t __ret; \ __builtin_neon_vld4q_dup_v(&__ret, __p0, 33); \ __ret; \ }) #else #define vld4q_dup_s16(__p0) __extension__ ({ \ int16x8x4_t __ret; \ __builtin_neon_vld4q_dup_v(&__ret, __p0, 33); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld4_dup_u8(__p0) __extension__ ({ \ uint8x8x4_t __ret; \ __builtin_neon_vld4_dup_v(&__ret, __p0, 16); \ __ret; \ }) #else #define vld4_dup_u8(__p0) __extension__ ({ \ uint8x8x4_t __ret; \ __builtin_neon_vld4_dup_v(&__ret, __p0, 16); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld4_dup_u32(__p0) __extension__ ({ \ uint32x2x4_t __ret; \ __builtin_neon_vld4_dup_v(&__ret, __p0, 18); \ __ret; \ }) #else #define vld4_dup_u32(__p0) __extension__ ({ \ uint32x2x4_t __ret; \ __builtin_neon_vld4_dup_v(&__ret, __p0, 18); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ __ret; \ }) #endif #define vld4_dup_u64(__p0) __extension__ ({ \ uint64x1x4_t __ret; \ __builtin_neon_vld4_dup_v(&__ret, __p0, 19); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vld4_dup_u16(__p0) __extension__ ({ \ uint16x4x4_t __ret; \ __builtin_neon_vld4_dup_v(&__ret, __p0, 17); \ __ret; \ }) #else #define vld4_dup_u16(__p0) __extension__ ({ \ uint16x4x4_t __ret; \ __builtin_neon_vld4_dup_v(&__ret, __p0, 17); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld4_dup_s8(__p0) __extension__ ({ \ int8x8x4_t __ret; \ __builtin_neon_vld4_dup_v(&__ret, __p0, 0); \ __ret; \ }) #else #define vld4_dup_s8(__p0) __extension__ ({ \ int8x8x4_t __ret; \ __builtin_neon_vld4_dup_v(&__ret, __p0, 0); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld4_dup_f32(__p0) __extension__ ({ \ float32x2x4_t __ret; \ __builtin_neon_vld4_dup_v(&__ret, __p0, 9); \ __ret; \ }) #else #define vld4_dup_f32(__p0) __extension__ ({ \ float32x2x4_t __ret; \ __builtin_neon_vld4_dup_v(&__ret, __p0, 9); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld4_dup_s32(__p0) __extension__ ({ \ int32x2x4_t __ret; \ __builtin_neon_vld4_dup_v(&__ret, __p0, 2); \ __ret; \ }) #else #define vld4_dup_s32(__p0) __extension__ ({ \ int32x2x4_t __ret; \ __builtin_neon_vld4_dup_v(&__ret, __p0, 2); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ __ret; \ }) #endif #define vld4_dup_s64(__p0) __extension__ ({ \ int64x1x4_t __ret; \ __builtin_neon_vld4_dup_v(&__ret, __p0, 3); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vld4_dup_s16(__p0) __extension__ ({ \ int16x4x4_t __ret; \ __builtin_neon_vld4_dup_v(&__ret, __p0, 1); \ __ret; \ }) #else #define vld4_dup_s16(__p0) __extension__ ({ \ int16x4x4_t __ret; \ __builtin_neon_vld4_dup_v(&__ret, __p0, 1); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld4_lane_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x8x4_t __ret; \ poly8x8x4_t __s1 = __p1; \ __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 4); \ __ret; \ }) #else #define vld4_lane_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x8x4_t __ret; \ poly8x8x4_t __s1 = __p1; \ poly8x8x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], __p2, 4); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld4_lane_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x4x4_t __ret; \ poly16x4x4_t __s1 = __p1; \ __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 5); \ __ret; \ }) #else #define vld4_lane_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x4x4_t __ret; \ poly16x4x4_t __s1 = __p1; \ poly16x4x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], __p2, 5); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld4q_lane_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x8x4_t __ret; \ poly16x8x4_t __s1 = __p1; \ __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 37); \ __ret; \ }) #else #define vld4q_lane_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x8x4_t __ret; \ poly16x8x4_t __s1 = __p1; \ poly16x8x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 37); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld4q_lane_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x4x4_t __ret; \ uint32x4x4_t __s1 = __p1; \ __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 50); \ __ret; \ }) #else #define vld4q_lane_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x4x4_t __ret; \ uint32x4x4_t __s1 = __p1; \ uint32x4x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 50); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld4q_lane_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x8x4_t __ret; \ uint16x8x4_t __s1 = __p1; \ __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 49); \ __ret; \ }) #else #define vld4q_lane_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x8x4_t __ret; \ uint16x8x4_t __s1 = __p1; \ uint16x8x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 49); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld4q_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x4x4_t __ret; \ float32x4x4_t __s1 = __p1; \ __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 41); \ __ret; \ }) #else #define vld4q_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x4x4_t __ret; \ float32x4x4_t __s1 = __p1; \ float32x4x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 41); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld4q_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x4x4_t __ret; \ int32x4x4_t __s1 = __p1; \ __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 34); \ __ret; \ }) #else #define vld4q_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x4x4_t __ret; \ int32x4x4_t __s1 = __p1; \ int32x4x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 34); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld4q_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x8x4_t __ret; \ int16x8x4_t __s1 = __p1; \ __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 33); \ __ret; \ }) #else #define vld4q_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x8x4_t __ret; \ int16x8x4_t __s1 = __p1; \ int16x8x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 33); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld4_lane_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x8x4_t __ret; \ uint8x8x4_t __s1 = __p1; \ __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 16); \ __ret; \ }) #else #define vld4_lane_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x8x4_t __ret; \ uint8x8x4_t __s1 = __p1; \ uint8x8x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], __p2, 16); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld4_lane_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x2x4_t __ret; \ uint32x2x4_t __s1 = __p1; \ __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 18); \ __ret; \ }) #else #define vld4_lane_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x2x4_t __ret; \ uint32x2x4_t __s1 = __p1; \ uint32x2x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], __p2, 18); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld4_lane_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x4x4_t __ret; \ uint16x4x4_t __s1 = __p1; \ __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 17); \ __ret; \ }) #else #define vld4_lane_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x4x4_t __ret; \ uint16x4x4_t __s1 = __p1; \ uint16x4x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], __p2, 17); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld4_lane_s8(__p0, __p1, __p2) __extension__ ({ \ int8x8x4_t __ret; \ int8x8x4_t __s1 = __p1; \ __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 0); \ __ret; \ }) #else #define vld4_lane_s8(__p0, __p1, __p2) __extension__ ({ \ int8x8x4_t __ret; \ int8x8x4_t __s1 = __p1; \ int8x8x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], __p2, 0); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld4_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x2x4_t __ret; \ float32x2x4_t __s1 = __p1; \ __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 9); \ __ret; \ }) #else #define vld4_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x2x4_t __ret; \ float32x2x4_t __s1 = __p1; \ float32x2x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], __p2, 9); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld4_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x2x4_t __ret; \ int32x2x4_t __s1 = __p1; \ __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 2); \ __ret; \ }) #else #define vld4_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x2x4_t __ret; \ int32x2x4_t __s1 = __p1; \ int32x2x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], __p2, 2); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld4_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x4x4_t __ret; \ int16x4x4_t __s1 = __p1; \ __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 1); \ __ret; \ }) #else #define vld4_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x4x4_t __ret; \ int16x4x4_t __s1 = __p1; \ int16x4x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], __p2, 1); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vmaxq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; __ret = (uint8x16_t) __builtin_neon_vmaxq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); return __ret; } #else __ai uint8x16_t vmaxq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x16_t) __builtin_neon_vmaxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 48); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vmaxq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vmaxq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); return __ret; } #else __ai uint32x4_t vmaxq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint32x4_t) __builtin_neon_vmaxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vmaxq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vmaxq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); return __ret; } #else __ai uint16x8_t vmaxq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint16x8_t) __builtin_neon_vmaxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 49); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x16_t vmaxq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vmaxq_v((int8x16_t)__p0, (int8x16_t)__p1, 32); return __ret; } #else __ai int8x16_t vmaxq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x16_t) __builtin_neon_vmaxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 32); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x4_t vmaxq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vmaxq_v((int8x16_t)__p0, (int8x16_t)__p1, 41); return __ret; } #else __ai float32x4_t vmaxq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (float32x4_t) __builtin_neon_vmaxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 41); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vmaxq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vmaxq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); return __ret; } #else __ai int32x4_t vmaxq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (int32x4_t) __builtin_neon_vmaxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 34); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vmaxq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vmaxq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); return __ret; } #else __ai int16x8_t vmaxq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int16x8_t) __builtin_neon_vmaxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 33); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vmax_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vmax_v((int8x8_t)__p0, (int8x8_t)__p1, 16); return __ret; } #else __ai uint8x8_t vmax_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x8_t) __builtin_neon_vmax_v((int8x8_t)__rev0, (int8x8_t)__rev1, 16); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vmax_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vmax_v((int8x8_t)__p0, (int8x8_t)__p1, 18); return __ret; } #else __ai uint32x2_t vmax_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint32x2_t) __builtin_neon_vmax_v((int8x8_t)__rev0, (int8x8_t)__rev1, 18); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vmax_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vmax_v((int8x8_t)__p0, (int8x8_t)__p1, 17); return __ret; } #else __ai uint16x4_t vmax_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint16x4_t) __builtin_neon_vmax_v((int8x8_t)__rev0, (int8x8_t)__rev1, 17); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8_t vmax_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vmax_v((int8x8_t)__p0, (int8x8_t)__p1, 0); return __ret; } #else __ai int8x8_t vmax_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x8_t) __builtin_neon_vmax_v((int8x8_t)__rev0, (int8x8_t)__rev1, 0); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x2_t vmax_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vmax_v((int8x8_t)__p0, (int8x8_t)__p1, 9); return __ret; } #else __ai float32x2_t vmax_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (float32x2_t) __builtin_neon_vmax_v((int8x8_t)__rev0, (int8x8_t)__rev1, 9); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x2_t vmax_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vmax_v((int8x8_t)__p0, (int8x8_t)__p1, 2); return __ret; } #else __ai int32x2_t vmax_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (int32x2_t) __builtin_neon_vmax_v((int8x8_t)__rev0, (int8x8_t)__rev1, 2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vmax_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vmax_v((int8x8_t)__p0, (int8x8_t)__p1, 1); return __ret; } #else __ai int16x4_t vmax_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (int16x4_t) __builtin_neon_vmax_v((int8x8_t)__rev0, (int8x8_t)__rev1, 1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vminq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; __ret = (uint8x16_t) __builtin_neon_vminq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); return __ret; } #else __ai uint8x16_t vminq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x16_t) __builtin_neon_vminq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 48); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vminq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vminq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); return __ret; } #else __ai uint32x4_t vminq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint32x4_t) __builtin_neon_vminq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vminq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vminq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); return __ret; } #else __ai uint16x8_t vminq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint16x8_t) __builtin_neon_vminq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 49); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x16_t vminq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vminq_v((int8x16_t)__p0, (int8x16_t)__p1, 32); return __ret; } #else __ai int8x16_t vminq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x16_t) __builtin_neon_vminq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 32); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x4_t vminq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vminq_v((int8x16_t)__p0, (int8x16_t)__p1, 41); return __ret; } #else __ai float32x4_t vminq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (float32x4_t) __builtin_neon_vminq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 41); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vminq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vminq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); return __ret; } #else __ai int32x4_t vminq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (int32x4_t) __builtin_neon_vminq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 34); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vminq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vminq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); return __ret; } #else __ai int16x8_t vminq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int16x8_t) __builtin_neon_vminq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 33); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vmin_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vmin_v((int8x8_t)__p0, (int8x8_t)__p1, 16); return __ret; } #else __ai uint8x8_t vmin_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x8_t) __builtin_neon_vmin_v((int8x8_t)__rev0, (int8x8_t)__rev1, 16); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vmin_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vmin_v((int8x8_t)__p0, (int8x8_t)__p1, 18); return __ret; } #else __ai uint32x2_t vmin_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint32x2_t) __builtin_neon_vmin_v((int8x8_t)__rev0, (int8x8_t)__rev1, 18); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vmin_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vmin_v((int8x8_t)__p0, (int8x8_t)__p1, 17); return __ret; } #else __ai uint16x4_t vmin_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint16x4_t) __builtin_neon_vmin_v((int8x8_t)__rev0, (int8x8_t)__rev1, 17); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8_t vmin_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vmin_v((int8x8_t)__p0, (int8x8_t)__p1, 0); return __ret; } #else __ai int8x8_t vmin_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x8_t) __builtin_neon_vmin_v((int8x8_t)__rev0, (int8x8_t)__rev1, 0); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x2_t vmin_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vmin_v((int8x8_t)__p0, (int8x8_t)__p1, 9); return __ret; } #else __ai float32x2_t vmin_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (float32x2_t) __builtin_neon_vmin_v((int8x8_t)__rev0, (int8x8_t)__rev1, 9); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x2_t vmin_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vmin_v((int8x8_t)__p0, (int8x8_t)__p1, 2); return __ret; } #else __ai int32x2_t vmin_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (int32x2_t) __builtin_neon_vmin_v((int8x8_t)__rev0, (int8x8_t)__rev1, 2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vmin_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vmin_v((int8x8_t)__p0, (int8x8_t)__p1, 1); return __ret; } #else __ai int16x4_t vmin_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (int16x4_t) __builtin_neon_vmin_v((int8x8_t)__rev0, (int8x8_t)__rev1, 1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vmlaq_u8(uint8x16_t __p0, uint8x16_t __p1, uint8x16_t __p2) { uint8x16_t __ret; __ret = __p0 + __p1 * __p2; return __ret; } #else __ai uint8x16_t vmlaq_u8(uint8x16_t __p0, uint8x16_t __p1, uint8x16_t __p2) { uint8x16_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 + __rev1 * __rev2; __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vmlaq_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint32x4_t __ret; __ret = __p0 + __p1 * __p2; return __ret; } #else __ai uint32x4_t vmlaq_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = __rev0 + __rev1 * __rev2; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vmlaq_u16(uint16x8_t __p0, uint16x8_t __p1, uint16x8_t __p2) { uint16x8_t __ret; __ret = __p0 + __p1 * __p2; return __ret; } #else __ai uint16x8_t vmlaq_u16(uint16x8_t __p0, uint16x8_t __p1, uint16x8_t __p2) { uint16x8_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 + __rev1 * __rev2; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x16_t vmlaq_s8(int8x16_t __p0, int8x16_t __p1, int8x16_t __p2) { int8x16_t __ret; __ret = __p0 + __p1 * __p2; return __ret; } #else __ai int8x16_t vmlaq_s8(int8x16_t __p0, int8x16_t __p1, int8x16_t __p2) { int8x16_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 + __rev1 * __rev2; __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x4_t vmlaq_f32(float32x4_t __p0, float32x4_t __p1, float32x4_t __p2) { float32x4_t __ret; __ret = __p0 + __p1 * __p2; return __ret; } #else __ai float32x4_t vmlaq_f32(float32x4_t __p0, float32x4_t __p1, float32x4_t __p2) { float32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); float32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = __rev0 + __rev1 * __rev2; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vmlaq_s32(int32x4_t __p0, int32x4_t __p1, int32x4_t __p2) { int32x4_t __ret; __ret = __p0 + __p1 * __p2; return __ret; } #else __ai int32x4_t vmlaq_s32(int32x4_t __p0, int32x4_t __p1, int32x4_t __p2) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); int32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = __rev0 + __rev1 * __rev2; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vmlaq_s16(int16x8_t __p0, int16x8_t __p1, int16x8_t __p2) { int16x8_t __ret; __ret = __p0 + __p1 * __p2; return __ret; } #else __ai int16x8_t vmlaq_s16(int16x8_t __p0, int16x8_t __p1, int16x8_t __p2) { int16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 + __rev1 * __rev2; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vmla_u8(uint8x8_t __p0, uint8x8_t __p1, uint8x8_t __p2) { uint8x8_t __ret; __ret = __p0 + __p1 * __p2; return __ret; } #else __ai uint8x8_t vmla_u8(uint8x8_t __p0, uint8x8_t __p1, uint8x8_t __p2) { uint8x8_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 + __rev1 * __rev2; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vmla_u32(uint32x2_t __p0, uint32x2_t __p1, uint32x2_t __p2) { uint32x2_t __ret; __ret = __p0 + __p1 * __p2; return __ret; } #else __ai uint32x2_t vmla_u32(uint32x2_t __p0, uint32x2_t __p1, uint32x2_t __p2) { uint32x2_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); uint32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); __ret = __rev0 + __rev1 * __rev2; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vmla_u16(uint16x4_t __p0, uint16x4_t __p1, uint16x4_t __p2) { uint16x4_t __ret; __ret = __p0 + __p1 * __p2; return __ret; } #else __ai uint16x4_t vmla_u16(uint16x4_t __p0, uint16x4_t __p1, uint16x4_t __p2) { uint16x4_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); uint16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = __rev0 + __rev1 * __rev2; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8_t vmla_s8(int8x8_t __p0, int8x8_t __p1, int8x8_t __p2) { int8x8_t __ret; __ret = __p0 + __p1 * __p2; return __ret; } #else __ai int8x8_t vmla_s8(int8x8_t __p0, int8x8_t __p1, int8x8_t __p2) { int8x8_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 + __rev1 * __rev2; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x2_t vmla_f32(float32x2_t __p0, float32x2_t __p1, float32x2_t __p2) { float32x2_t __ret; __ret = __p0 + __p1 * __p2; return __ret; } #else __ai float32x2_t vmla_f32(float32x2_t __p0, float32x2_t __p1, float32x2_t __p2) { float32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); float32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); __ret = __rev0 + __rev1 * __rev2; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x2_t vmla_s32(int32x2_t __p0, int32x2_t __p1, int32x2_t __p2) { int32x2_t __ret; __ret = __p0 + __p1 * __p2; return __ret; } #else __ai int32x2_t vmla_s32(int32x2_t __p0, int32x2_t __p1, int32x2_t __p2) { int32x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); int32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); __ret = __rev0 + __rev1 * __rev2; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vmla_s16(int16x4_t __p0, int16x4_t __p1, int16x4_t __p2) { int16x4_t __ret; __ret = __p0 + __p1 * __p2; return __ret; } #else __ai int16x4_t vmla_s16(int16x4_t __p0, int16x4_t __p1, int16x4_t __p2) { int16x4_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); int16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = __rev0 + __rev1 * __rev2; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ #define vmlaq_lane_u32(__p0_46, __p1_46, __p2_46, __p3_46) __extension__ ({ \ uint32x4_t __ret_46; \ uint32x4_t __s0_46 = __p0_46; \ uint32x4_t __s1_46 = __p1_46; \ uint32x2_t __s2_46 = __p2_46; \ __ret_46 = __s0_46 + __s1_46 * splatq_lane_u32(__s2_46, __p3_46); \ __ret_46; \ }) #else #define vmlaq_lane_u32(__p0_47, __p1_47, __p2_47, __p3_47) __extension__ ({ \ uint32x4_t __ret_47; \ uint32x4_t __s0_47 = __p0_47; \ uint32x4_t __s1_47 = __p1_47; \ uint32x2_t __s2_47 = __p2_47; \ uint32x4_t __rev0_47; __rev0_47 = __builtin_shufflevector(__s0_47, __s0_47, 3, 2, 1, 0); \ uint32x4_t __rev1_47; __rev1_47 = __builtin_shufflevector(__s1_47, __s1_47, 3, 2, 1, 0); \ uint32x2_t __rev2_47; __rev2_47 = __builtin_shufflevector(__s2_47, __s2_47, 1, 0); \ __ret_47 = __rev0_47 + __rev1_47 * __noswap_splatq_lane_u32(__rev2_47, __p3_47); \ __ret_47 = __builtin_shufflevector(__ret_47, __ret_47, 3, 2, 1, 0); \ __ret_47; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmlaq_lane_u16(__p0_48, __p1_48, __p2_48, __p3_48) __extension__ ({ \ uint16x8_t __ret_48; \ uint16x8_t __s0_48 = __p0_48; \ uint16x8_t __s1_48 = __p1_48; \ uint16x4_t __s2_48 = __p2_48; \ __ret_48 = __s0_48 + __s1_48 * splatq_lane_u16(__s2_48, __p3_48); \ __ret_48; \ }) #else #define vmlaq_lane_u16(__p0_49, __p1_49, __p2_49, __p3_49) __extension__ ({ \ uint16x8_t __ret_49; \ uint16x8_t __s0_49 = __p0_49; \ uint16x8_t __s1_49 = __p1_49; \ uint16x4_t __s2_49 = __p2_49; \ uint16x8_t __rev0_49; __rev0_49 = __builtin_shufflevector(__s0_49, __s0_49, 7, 6, 5, 4, 3, 2, 1, 0); \ uint16x8_t __rev1_49; __rev1_49 = __builtin_shufflevector(__s1_49, __s1_49, 7, 6, 5, 4, 3, 2, 1, 0); \ uint16x4_t __rev2_49; __rev2_49 = __builtin_shufflevector(__s2_49, __s2_49, 3, 2, 1, 0); \ __ret_49 = __rev0_49 + __rev1_49 * __noswap_splatq_lane_u16(__rev2_49, __p3_49); \ __ret_49 = __builtin_shufflevector(__ret_49, __ret_49, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_49; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmlaq_lane_f32(__p0_50, __p1_50, __p2_50, __p3_50) __extension__ ({ \ float32x4_t __ret_50; \ float32x4_t __s0_50 = __p0_50; \ float32x4_t __s1_50 = __p1_50; \ float32x2_t __s2_50 = __p2_50; \ __ret_50 = __s0_50 + __s1_50 * splatq_lane_f32(__s2_50, __p3_50); \ __ret_50; \ }) #else #define vmlaq_lane_f32(__p0_51, __p1_51, __p2_51, __p3_51) __extension__ ({ \ float32x4_t __ret_51; \ float32x4_t __s0_51 = __p0_51; \ float32x4_t __s1_51 = __p1_51; \ float32x2_t __s2_51 = __p2_51; \ float32x4_t __rev0_51; __rev0_51 = __builtin_shufflevector(__s0_51, __s0_51, 3, 2, 1, 0); \ float32x4_t __rev1_51; __rev1_51 = __builtin_shufflevector(__s1_51, __s1_51, 3, 2, 1, 0); \ float32x2_t __rev2_51; __rev2_51 = __builtin_shufflevector(__s2_51, __s2_51, 1, 0); \ __ret_51 = __rev0_51 + __rev1_51 * __noswap_splatq_lane_f32(__rev2_51, __p3_51); \ __ret_51 = __builtin_shufflevector(__ret_51, __ret_51, 3, 2, 1, 0); \ __ret_51; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmlaq_lane_s32(__p0_52, __p1_52, __p2_52, __p3_52) __extension__ ({ \ int32x4_t __ret_52; \ int32x4_t __s0_52 = __p0_52; \ int32x4_t __s1_52 = __p1_52; \ int32x2_t __s2_52 = __p2_52; \ __ret_52 = __s0_52 + __s1_52 * splatq_lane_s32(__s2_52, __p3_52); \ __ret_52; \ }) #else #define vmlaq_lane_s32(__p0_53, __p1_53, __p2_53, __p3_53) __extension__ ({ \ int32x4_t __ret_53; \ int32x4_t __s0_53 = __p0_53; \ int32x4_t __s1_53 = __p1_53; \ int32x2_t __s2_53 = __p2_53; \ int32x4_t __rev0_53; __rev0_53 = __builtin_shufflevector(__s0_53, __s0_53, 3, 2, 1, 0); \ int32x4_t __rev1_53; __rev1_53 = __builtin_shufflevector(__s1_53, __s1_53, 3, 2, 1, 0); \ int32x2_t __rev2_53; __rev2_53 = __builtin_shufflevector(__s2_53, __s2_53, 1, 0); \ __ret_53 = __rev0_53 + __rev1_53 * __noswap_splatq_lane_s32(__rev2_53, __p3_53); \ __ret_53 = __builtin_shufflevector(__ret_53, __ret_53, 3, 2, 1, 0); \ __ret_53; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmlaq_lane_s16(__p0_54, __p1_54, __p2_54, __p3_54) __extension__ ({ \ int16x8_t __ret_54; \ int16x8_t __s0_54 = __p0_54; \ int16x8_t __s1_54 = __p1_54; \ int16x4_t __s2_54 = __p2_54; \ __ret_54 = __s0_54 + __s1_54 * splatq_lane_s16(__s2_54, __p3_54); \ __ret_54; \ }) #else #define vmlaq_lane_s16(__p0_55, __p1_55, __p2_55, __p3_55) __extension__ ({ \ int16x8_t __ret_55; \ int16x8_t __s0_55 = __p0_55; \ int16x8_t __s1_55 = __p1_55; \ int16x4_t __s2_55 = __p2_55; \ int16x8_t __rev0_55; __rev0_55 = __builtin_shufflevector(__s0_55, __s0_55, 7, 6, 5, 4, 3, 2, 1, 0); \ int16x8_t __rev1_55; __rev1_55 = __builtin_shufflevector(__s1_55, __s1_55, 7, 6, 5, 4, 3, 2, 1, 0); \ int16x4_t __rev2_55; __rev2_55 = __builtin_shufflevector(__s2_55, __s2_55, 3, 2, 1, 0); \ __ret_55 = __rev0_55 + __rev1_55 * __noswap_splatq_lane_s16(__rev2_55, __p3_55); \ __ret_55 = __builtin_shufflevector(__ret_55, __ret_55, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_55; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmla_lane_u32(__p0_56, __p1_56, __p2_56, __p3_56) __extension__ ({ \ uint32x2_t __ret_56; \ uint32x2_t __s0_56 = __p0_56; \ uint32x2_t __s1_56 = __p1_56; \ uint32x2_t __s2_56 = __p2_56; \ __ret_56 = __s0_56 + __s1_56 * splat_lane_u32(__s2_56, __p3_56); \ __ret_56; \ }) #else #define vmla_lane_u32(__p0_57, __p1_57, __p2_57, __p3_57) __extension__ ({ \ uint32x2_t __ret_57; \ uint32x2_t __s0_57 = __p0_57; \ uint32x2_t __s1_57 = __p1_57; \ uint32x2_t __s2_57 = __p2_57; \ uint32x2_t __rev0_57; __rev0_57 = __builtin_shufflevector(__s0_57, __s0_57, 1, 0); \ uint32x2_t __rev1_57; __rev1_57 = __builtin_shufflevector(__s1_57, __s1_57, 1, 0); \ uint32x2_t __rev2_57; __rev2_57 = __builtin_shufflevector(__s2_57, __s2_57, 1, 0); \ __ret_57 = __rev0_57 + __rev1_57 * __noswap_splat_lane_u32(__rev2_57, __p3_57); \ __ret_57 = __builtin_shufflevector(__ret_57, __ret_57, 1, 0); \ __ret_57; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmla_lane_u16(__p0_58, __p1_58, __p2_58, __p3_58) __extension__ ({ \ uint16x4_t __ret_58; \ uint16x4_t __s0_58 = __p0_58; \ uint16x4_t __s1_58 = __p1_58; \ uint16x4_t __s2_58 = __p2_58; \ __ret_58 = __s0_58 + __s1_58 * splat_lane_u16(__s2_58, __p3_58); \ __ret_58; \ }) #else #define vmla_lane_u16(__p0_59, __p1_59, __p2_59, __p3_59) __extension__ ({ \ uint16x4_t __ret_59; \ uint16x4_t __s0_59 = __p0_59; \ uint16x4_t __s1_59 = __p1_59; \ uint16x4_t __s2_59 = __p2_59; \ uint16x4_t __rev0_59; __rev0_59 = __builtin_shufflevector(__s0_59, __s0_59, 3, 2, 1, 0); \ uint16x4_t __rev1_59; __rev1_59 = __builtin_shufflevector(__s1_59, __s1_59, 3, 2, 1, 0); \ uint16x4_t __rev2_59; __rev2_59 = __builtin_shufflevector(__s2_59, __s2_59, 3, 2, 1, 0); \ __ret_59 = __rev0_59 + __rev1_59 * __noswap_splat_lane_u16(__rev2_59, __p3_59); \ __ret_59 = __builtin_shufflevector(__ret_59, __ret_59, 3, 2, 1, 0); \ __ret_59; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmla_lane_f32(__p0_60, __p1_60, __p2_60, __p3_60) __extension__ ({ \ float32x2_t __ret_60; \ float32x2_t __s0_60 = __p0_60; \ float32x2_t __s1_60 = __p1_60; \ float32x2_t __s2_60 = __p2_60; \ __ret_60 = __s0_60 + __s1_60 * splat_lane_f32(__s2_60, __p3_60); \ __ret_60; \ }) #else #define vmla_lane_f32(__p0_61, __p1_61, __p2_61, __p3_61) __extension__ ({ \ float32x2_t __ret_61; \ float32x2_t __s0_61 = __p0_61; \ float32x2_t __s1_61 = __p1_61; \ float32x2_t __s2_61 = __p2_61; \ float32x2_t __rev0_61; __rev0_61 = __builtin_shufflevector(__s0_61, __s0_61, 1, 0); \ float32x2_t __rev1_61; __rev1_61 = __builtin_shufflevector(__s1_61, __s1_61, 1, 0); \ float32x2_t __rev2_61; __rev2_61 = __builtin_shufflevector(__s2_61, __s2_61, 1, 0); \ __ret_61 = __rev0_61 + __rev1_61 * __noswap_splat_lane_f32(__rev2_61, __p3_61); \ __ret_61 = __builtin_shufflevector(__ret_61, __ret_61, 1, 0); \ __ret_61; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmla_lane_s32(__p0_62, __p1_62, __p2_62, __p3_62) __extension__ ({ \ int32x2_t __ret_62; \ int32x2_t __s0_62 = __p0_62; \ int32x2_t __s1_62 = __p1_62; \ int32x2_t __s2_62 = __p2_62; \ __ret_62 = __s0_62 + __s1_62 * splat_lane_s32(__s2_62, __p3_62); \ __ret_62; \ }) #else #define vmla_lane_s32(__p0_63, __p1_63, __p2_63, __p3_63) __extension__ ({ \ int32x2_t __ret_63; \ int32x2_t __s0_63 = __p0_63; \ int32x2_t __s1_63 = __p1_63; \ int32x2_t __s2_63 = __p2_63; \ int32x2_t __rev0_63; __rev0_63 = __builtin_shufflevector(__s0_63, __s0_63, 1, 0); \ int32x2_t __rev1_63; __rev1_63 = __builtin_shufflevector(__s1_63, __s1_63, 1, 0); \ int32x2_t __rev2_63; __rev2_63 = __builtin_shufflevector(__s2_63, __s2_63, 1, 0); \ __ret_63 = __rev0_63 + __rev1_63 * __noswap_splat_lane_s32(__rev2_63, __p3_63); \ __ret_63 = __builtin_shufflevector(__ret_63, __ret_63, 1, 0); \ __ret_63; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmla_lane_s16(__p0_64, __p1_64, __p2_64, __p3_64) __extension__ ({ \ int16x4_t __ret_64; \ int16x4_t __s0_64 = __p0_64; \ int16x4_t __s1_64 = __p1_64; \ int16x4_t __s2_64 = __p2_64; \ __ret_64 = __s0_64 + __s1_64 * splat_lane_s16(__s2_64, __p3_64); \ __ret_64; \ }) #else #define vmla_lane_s16(__p0_65, __p1_65, __p2_65, __p3_65) __extension__ ({ \ int16x4_t __ret_65; \ int16x4_t __s0_65 = __p0_65; \ int16x4_t __s1_65 = __p1_65; \ int16x4_t __s2_65 = __p2_65; \ int16x4_t __rev0_65; __rev0_65 = __builtin_shufflevector(__s0_65, __s0_65, 3, 2, 1, 0); \ int16x4_t __rev1_65; __rev1_65 = __builtin_shufflevector(__s1_65, __s1_65, 3, 2, 1, 0); \ int16x4_t __rev2_65; __rev2_65 = __builtin_shufflevector(__s2_65, __s2_65, 3, 2, 1, 0); \ __ret_65 = __rev0_65 + __rev1_65 * __noswap_splat_lane_s16(__rev2_65, __p3_65); \ __ret_65 = __builtin_shufflevector(__ret_65, __ret_65, 3, 2, 1, 0); \ __ret_65; \ }) #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vmlaq_n_u32(uint32x4_t __p0, uint32x4_t __p1, uint32_t __p2) { uint32x4_t __ret; __ret = __p0 + __p1 * (uint32x4_t) {__p2, __p2, __p2, __p2}; return __ret; } #else __ai uint32x4_t vmlaq_n_u32(uint32x4_t __p0, uint32x4_t __p1, uint32_t __p2) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __rev0 + __rev1 * (uint32x4_t) {__p2, __p2, __p2, __p2}; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vmlaq_n_u16(uint16x8_t __p0, uint16x8_t __p1, uint16_t __p2) { uint16x8_t __ret; __ret = __p0 + __p1 * (uint16x8_t) {__p2, __p2, __p2, __p2, __p2, __p2, __p2, __p2}; return __ret; } #else __ai uint16x8_t vmlaq_n_u16(uint16x8_t __p0, uint16x8_t __p1, uint16_t __p2) { uint16x8_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 + __rev1 * (uint16x8_t) {__p2, __p2, __p2, __p2, __p2, __p2, __p2, __p2}; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x4_t vmlaq_n_f32(float32x4_t __p0, float32x4_t __p1, float32_t __p2) { float32x4_t __ret; __ret = __p0 + __p1 * (float32x4_t) {__p2, __p2, __p2, __p2}; return __ret; } #else __ai float32x4_t vmlaq_n_f32(float32x4_t __p0, float32x4_t __p1, float32_t __p2) { float32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __rev0 + __rev1 * (float32x4_t) {__p2, __p2, __p2, __p2}; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vmlaq_n_s32(int32x4_t __p0, int32x4_t __p1, int32_t __p2) { int32x4_t __ret; __ret = __p0 + __p1 * (int32x4_t) {__p2, __p2, __p2, __p2}; return __ret; } #else __ai int32x4_t vmlaq_n_s32(int32x4_t __p0, int32x4_t __p1, int32_t __p2) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __rev0 + __rev1 * (int32x4_t) {__p2, __p2, __p2, __p2}; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vmlaq_n_s16(int16x8_t __p0, int16x8_t __p1, int16_t __p2) { int16x8_t __ret; __ret = __p0 + __p1 * (int16x8_t) {__p2, __p2, __p2, __p2, __p2, __p2, __p2, __p2}; return __ret; } #else __ai int16x8_t vmlaq_n_s16(int16x8_t __p0, int16x8_t __p1, int16_t __p2) { int16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 + __rev1 * (int16x8_t) {__p2, __p2, __p2, __p2, __p2, __p2, __p2, __p2}; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vmla_n_u32(uint32x2_t __p0, uint32x2_t __p1, uint32_t __p2) { uint32x2_t __ret; __ret = __p0 + __p1 * (uint32x2_t) {__p2, __p2}; return __ret; } #else __ai uint32x2_t vmla_n_u32(uint32x2_t __p0, uint32x2_t __p1, uint32_t __p2) { uint32x2_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __rev0 + __rev1 * (uint32x2_t) {__p2, __p2}; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vmla_n_u16(uint16x4_t __p0, uint16x4_t __p1, uint16_t __p2) { uint16x4_t __ret; __ret = __p0 + __p1 * (uint16x4_t) {__p2, __p2, __p2, __p2}; return __ret; } #else __ai uint16x4_t vmla_n_u16(uint16x4_t __p0, uint16x4_t __p1, uint16_t __p2) { uint16x4_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __rev0 + __rev1 * (uint16x4_t) {__p2, __p2, __p2, __p2}; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x2_t vmla_n_f32(float32x2_t __p0, float32x2_t __p1, float32_t __p2) { float32x2_t __ret; __ret = __p0 + __p1 * (float32x2_t) {__p2, __p2}; return __ret; } #else __ai float32x2_t vmla_n_f32(float32x2_t __p0, float32x2_t __p1, float32_t __p2) { float32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __rev0 + __rev1 * (float32x2_t) {__p2, __p2}; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x2_t vmla_n_s32(int32x2_t __p0, int32x2_t __p1, int32_t __p2) { int32x2_t __ret; __ret = __p0 + __p1 * (int32x2_t) {__p2, __p2}; return __ret; } #else __ai int32x2_t vmla_n_s32(int32x2_t __p0, int32x2_t __p1, int32_t __p2) { int32x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __rev0 + __rev1 * (int32x2_t) {__p2, __p2}; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vmla_n_s16(int16x4_t __p0, int16x4_t __p1, int16_t __p2) { int16x4_t __ret; __ret = __p0 + __p1 * (int16x4_t) {__p2, __p2, __p2, __p2}; return __ret; } #else __ai int16x4_t vmla_n_s16(int16x4_t __p0, int16x4_t __p1, int16_t __p2) { int16x4_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __rev0 + __rev1 * (int16x4_t) {__p2, __p2, __p2, __p2}; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vmlsq_u8(uint8x16_t __p0, uint8x16_t __p1, uint8x16_t __p2) { uint8x16_t __ret; __ret = __p0 - __p1 * __p2; return __ret; } #else __ai uint8x16_t vmlsq_u8(uint8x16_t __p0, uint8x16_t __p1, uint8x16_t __p2) { uint8x16_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 - __rev1 * __rev2; __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vmlsq_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint32x4_t __ret; __ret = __p0 - __p1 * __p2; return __ret; } #else __ai uint32x4_t vmlsq_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = __rev0 - __rev1 * __rev2; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vmlsq_u16(uint16x8_t __p0, uint16x8_t __p1, uint16x8_t __p2) { uint16x8_t __ret; __ret = __p0 - __p1 * __p2; return __ret; } #else __ai uint16x8_t vmlsq_u16(uint16x8_t __p0, uint16x8_t __p1, uint16x8_t __p2) { uint16x8_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 - __rev1 * __rev2; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x16_t vmlsq_s8(int8x16_t __p0, int8x16_t __p1, int8x16_t __p2) { int8x16_t __ret; __ret = __p0 - __p1 * __p2; return __ret; } #else __ai int8x16_t vmlsq_s8(int8x16_t __p0, int8x16_t __p1, int8x16_t __p2) { int8x16_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 - __rev1 * __rev2; __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x4_t vmlsq_f32(float32x4_t __p0, float32x4_t __p1, float32x4_t __p2) { float32x4_t __ret; __ret = __p0 - __p1 * __p2; return __ret; } #else __ai float32x4_t vmlsq_f32(float32x4_t __p0, float32x4_t __p1, float32x4_t __p2) { float32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); float32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = __rev0 - __rev1 * __rev2; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vmlsq_s32(int32x4_t __p0, int32x4_t __p1, int32x4_t __p2) { int32x4_t __ret; __ret = __p0 - __p1 * __p2; return __ret; } #else __ai int32x4_t vmlsq_s32(int32x4_t __p0, int32x4_t __p1, int32x4_t __p2) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); int32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = __rev0 - __rev1 * __rev2; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vmlsq_s16(int16x8_t __p0, int16x8_t __p1, int16x8_t __p2) { int16x8_t __ret; __ret = __p0 - __p1 * __p2; return __ret; } #else __ai int16x8_t vmlsq_s16(int16x8_t __p0, int16x8_t __p1, int16x8_t __p2) { int16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 - __rev1 * __rev2; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vmls_u8(uint8x8_t __p0, uint8x8_t __p1, uint8x8_t __p2) { uint8x8_t __ret; __ret = __p0 - __p1 * __p2; return __ret; } #else __ai uint8x8_t vmls_u8(uint8x8_t __p0, uint8x8_t __p1, uint8x8_t __p2) { uint8x8_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 - __rev1 * __rev2; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vmls_u32(uint32x2_t __p0, uint32x2_t __p1, uint32x2_t __p2) { uint32x2_t __ret; __ret = __p0 - __p1 * __p2; return __ret; } #else __ai uint32x2_t vmls_u32(uint32x2_t __p0, uint32x2_t __p1, uint32x2_t __p2) { uint32x2_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); uint32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); __ret = __rev0 - __rev1 * __rev2; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vmls_u16(uint16x4_t __p0, uint16x4_t __p1, uint16x4_t __p2) { uint16x4_t __ret; __ret = __p0 - __p1 * __p2; return __ret; } #else __ai uint16x4_t vmls_u16(uint16x4_t __p0, uint16x4_t __p1, uint16x4_t __p2) { uint16x4_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); uint16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = __rev0 - __rev1 * __rev2; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8_t vmls_s8(int8x8_t __p0, int8x8_t __p1, int8x8_t __p2) { int8x8_t __ret; __ret = __p0 - __p1 * __p2; return __ret; } #else __ai int8x8_t vmls_s8(int8x8_t __p0, int8x8_t __p1, int8x8_t __p2) { int8x8_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 - __rev1 * __rev2; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x2_t vmls_f32(float32x2_t __p0, float32x2_t __p1, float32x2_t __p2) { float32x2_t __ret; __ret = __p0 - __p1 * __p2; return __ret; } #else __ai float32x2_t vmls_f32(float32x2_t __p0, float32x2_t __p1, float32x2_t __p2) { float32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); float32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); __ret = __rev0 - __rev1 * __rev2; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x2_t vmls_s32(int32x2_t __p0, int32x2_t __p1, int32x2_t __p2) { int32x2_t __ret; __ret = __p0 - __p1 * __p2; return __ret; } #else __ai int32x2_t vmls_s32(int32x2_t __p0, int32x2_t __p1, int32x2_t __p2) { int32x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); int32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); __ret = __rev0 - __rev1 * __rev2; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vmls_s16(int16x4_t __p0, int16x4_t __p1, int16x4_t __p2) { int16x4_t __ret; __ret = __p0 - __p1 * __p2; return __ret; } #else __ai int16x4_t vmls_s16(int16x4_t __p0, int16x4_t __p1, int16x4_t __p2) { int16x4_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); int16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = __rev0 - __rev1 * __rev2; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ #define vmlsq_lane_u32(__p0_66, __p1_66, __p2_66, __p3_66) __extension__ ({ \ uint32x4_t __ret_66; \ uint32x4_t __s0_66 = __p0_66; \ uint32x4_t __s1_66 = __p1_66; \ uint32x2_t __s2_66 = __p2_66; \ __ret_66 = __s0_66 - __s1_66 * splatq_lane_u32(__s2_66, __p3_66); \ __ret_66; \ }) #else #define vmlsq_lane_u32(__p0_67, __p1_67, __p2_67, __p3_67) __extension__ ({ \ uint32x4_t __ret_67; \ uint32x4_t __s0_67 = __p0_67; \ uint32x4_t __s1_67 = __p1_67; \ uint32x2_t __s2_67 = __p2_67; \ uint32x4_t __rev0_67; __rev0_67 = __builtin_shufflevector(__s0_67, __s0_67, 3, 2, 1, 0); \ uint32x4_t __rev1_67; __rev1_67 = __builtin_shufflevector(__s1_67, __s1_67, 3, 2, 1, 0); \ uint32x2_t __rev2_67; __rev2_67 = __builtin_shufflevector(__s2_67, __s2_67, 1, 0); \ __ret_67 = __rev0_67 - __rev1_67 * __noswap_splatq_lane_u32(__rev2_67, __p3_67); \ __ret_67 = __builtin_shufflevector(__ret_67, __ret_67, 3, 2, 1, 0); \ __ret_67; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmlsq_lane_u16(__p0_68, __p1_68, __p2_68, __p3_68) __extension__ ({ \ uint16x8_t __ret_68; \ uint16x8_t __s0_68 = __p0_68; \ uint16x8_t __s1_68 = __p1_68; \ uint16x4_t __s2_68 = __p2_68; \ __ret_68 = __s0_68 - __s1_68 * splatq_lane_u16(__s2_68, __p3_68); \ __ret_68; \ }) #else #define vmlsq_lane_u16(__p0_69, __p1_69, __p2_69, __p3_69) __extension__ ({ \ uint16x8_t __ret_69; \ uint16x8_t __s0_69 = __p0_69; \ uint16x8_t __s1_69 = __p1_69; \ uint16x4_t __s2_69 = __p2_69; \ uint16x8_t __rev0_69; __rev0_69 = __builtin_shufflevector(__s0_69, __s0_69, 7, 6, 5, 4, 3, 2, 1, 0); \ uint16x8_t __rev1_69; __rev1_69 = __builtin_shufflevector(__s1_69, __s1_69, 7, 6, 5, 4, 3, 2, 1, 0); \ uint16x4_t __rev2_69; __rev2_69 = __builtin_shufflevector(__s2_69, __s2_69, 3, 2, 1, 0); \ __ret_69 = __rev0_69 - __rev1_69 * __noswap_splatq_lane_u16(__rev2_69, __p3_69); \ __ret_69 = __builtin_shufflevector(__ret_69, __ret_69, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_69; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmlsq_lane_f32(__p0_70, __p1_70, __p2_70, __p3_70) __extension__ ({ \ float32x4_t __ret_70; \ float32x4_t __s0_70 = __p0_70; \ float32x4_t __s1_70 = __p1_70; \ float32x2_t __s2_70 = __p2_70; \ __ret_70 = __s0_70 - __s1_70 * splatq_lane_f32(__s2_70, __p3_70); \ __ret_70; \ }) #else #define vmlsq_lane_f32(__p0_71, __p1_71, __p2_71, __p3_71) __extension__ ({ \ float32x4_t __ret_71; \ float32x4_t __s0_71 = __p0_71; \ float32x4_t __s1_71 = __p1_71; \ float32x2_t __s2_71 = __p2_71; \ float32x4_t __rev0_71; __rev0_71 = __builtin_shufflevector(__s0_71, __s0_71, 3, 2, 1, 0); \ float32x4_t __rev1_71; __rev1_71 = __builtin_shufflevector(__s1_71, __s1_71, 3, 2, 1, 0); \ float32x2_t __rev2_71; __rev2_71 = __builtin_shufflevector(__s2_71, __s2_71, 1, 0); \ __ret_71 = __rev0_71 - __rev1_71 * __noswap_splatq_lane_f32(__rev2_71, __p3_71); \ __ret_71 = __builtin_shufflevector(__ret_71, __ret_71, 3, 2, 1, 0); \ __ret_71; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmlsq_lane_s32(__p0_72, __p1_72, __p2_72, __p3_72) __extension__ ({ \ int32x4_t __ret_72; \ int32x4_t __s0_72 = __p0_72; \ int32x4_t __s1_72 = __p1_72; \ int32x2_t __s2_72 = __p2_72; \ __ret_72 = __s0_72 - __s1_72 * splatq_lane_s32(__s2_72, __p3_72); \ __ret_72; \ }) #else #define vmlsq_lane_s32(__p0_73, __p1_73, __p2_73, __p3_73) __extension__ ({ \ int32x4_t __ret_73; \ int32x4_t __s0_73 = __p0_73; \ int32x4_t __s1_73 = __p1_73; \ int32x2_t __s2_73 = __p2_73; \ int32x4_t __rev0_73; __rev0_73 = __builtin_shufflevector(__s0_73, __s0_73, 3, 2, 1, 0); \ int32x4_t __rev1_73; __rev1_73 = __builtin_shufflevector(__s1_73, __s1_73, 3, 2, 1, 0); \ int32x2_t __rev2_73; __rev2_73 = __builtin_shufflevector(__s2_73, __s2_73, 1, 0); \ __ret_73 = __rev0_73 - __rev1_73 * __noswap_splatq_lane_s32(__rev2_73, __p3_73); \ __ret_73 = __builtin_shufflevector(__ret_73, __ret_73, 3, 2, 1, 0); \ __ret_73; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmlsq_lane_s16(__p0_74, __p1_74, __p2_74, __p3_74) __extension__ ({ \ int16x8_t __ret_74; \ int16x8_t __s0_74 = __p0_74; \ int16x8_t __s1_74 = __p1_74; \ int16x4_t __s2_74 = __p2_74; \ __ret_74 = __s0_74 - __s1_74 * splatq_lane_s16(__s2_74, __p3_74); \ __ret_74; \ }) #else #define vmlsq_lane_s16(__p0_75, __p1_75, __p2_75, __p3_75) __extension__ ({ \ int16x8_t __ret_75; \ int16x8_t __s0_75 = __p0_75; \ int16x8_t __s1_75 = __p1_75; \ int16x4_t __s2_75 = __p2_75; \ int16x8_t __rev0_75; __rev0_75 = __builtin_shufflevector(__s0_75, __s0_75, 7, 6, 5, 4, 3, 2, 1, 0); \ int16x8_t __rev1_75; __rev1_75 = __builtin_shufflevector(__s1_75, __s1_75, 7, 6, 5, 4, 3, 2, 1, 0); \ int16x4_t __rev2_75; __rev2_75 = __builtin_shufflevector(__s2_75, __s2_75, 3, 2, 1, 0); \ __ret_75 = __rev0_75 - __rev1_75 * __noswap_splatq_lane_s16(__rev2_75, __p3_75); \ __ret_75 = __builtin_shufflevector(__ret_75, __ret_75, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_75; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmls_lane_u32(__p0_76, __p1_76, __p2_76, __p3_76) __extension__ ({ \ uint32x2_t __ret_76; \ uint32x2_t __s0_76 = __p0_76; \ uint32x2_t __s1_76 = __p1_76; \ uint32x2_t __s2_76 = __p2_76; \ __ret_76 = __s0_76 - __s1_76 * splat_lane_u32(__s2_76, __p3_76); \ __ret_76; \ }) #else #define vmls_lane_u32(__p0_77, __p1_77, __p2_77, __p3_77) __extension__ ({ \ uint32x2_t __ret_77; \ uint32x2_t __s0_77 = __p0_77; \ uint32x2_t __s1_77 = __p1_77; \ uint32x2_t __s2_77 = __p2_77; \ uint32x2_t __rev0_77; __rev0_77 = __builtin_shufflevector(__s0_77, __s0_77, 1, 0); \ uint32x2_t __rev1_77; __rev1_77 = __builtin_shufflevector(__s1_77, __s1_77, 1, 0); \ uint32x2_t __rev2_77; __rev2_77 = __builtin_shufflevector(__s2_77, __s2_77, 1, 0); \ __ret_77 = __rev0_77 - __rev1_77 * __noswap_splat_lane_u32(__rev2_77, __p3_77); \ __ret_77 = __builtin_shufflevector(__ret_77, __ret_77, 1, 0); \ __ret_77; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmls_lane_u16(__p0_78, __p1_78, __p2_78, __p3_78) __extension__ ({ \ uint16x4_t __ret_78; \ uint16x4_t __s0_78 = __p0_78; \ uint16x4_t __s1_78 = __p1_78; \ uint16x4_t __s2_78 = __p2_78; \ __ret_78 = __s0_78 - __s1_78 * splat_lane_u16(__s2_78, __p3_78); \ __ret_78; \ }) #else #define vmls_lane_u16(__p0_79, __p1_79, __p2_79, __p3_79) __extension__ ({ \ uint16x4_t __ret_79; \ uint16x4_t __s0_79 = __p0_79; \ uint16x4_t __s1_79 = __p1_79; \ uint16x4_t __s2_79 = __p2_79; \ uint16x4_t __rev0_79; __rev0_79 = __builtin_shufflevector(__s0_79, __s0_79, 3, 2, 1, 0); \ uint16x4_t __rev1_79; __rev1_79 = __builtin_shufflevector(__s1_79, __s1_79, 3, 2, 1, 0); \ uint16x4_t __rev2_79; __rev2_79 = __builtin_shufflevector(__s2_79, __s2_79, 3, 2, 1, 0); \ __ret_79 = __rev0_79 - __rev1_79 * __noswap_splat_lane_u16(__rev2_79, __p3_79); \ __ret_79 = __builtin_shufflevector(__ret_79, __ret_79, 3, 2, 1, 0); \ __ret_79; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmls_lane_f32(__p0_80, __p1_80, __p2_80, __p3_80) __extension__ ({ \ float32x2_t __ret_80; \ float32x2_t __s0_80 = __p0_80; \ float32x2_t __s1_80 = __p1_80; \ float32x2_t __s2_80 = __p2_80; \ __ret_80 = __s0_80 - __s1_80 * splat_lane_f32(__s2_80, __p3_80); \ __ret_80; \ }) #else #define vmls_lane_f32(__p0_81, __p1_81, __p2_81, __p3_81) __extension__ ({ \ float32x2_t __ret_81; \ float32x2_t __s0_81 = __p0_81; \ float32x2_t __s1_81 = __p1_81; \ float32x2_t __s2_81 = __p2_81; \ float32x2_t __rev0_81; __rev0_81 = __builtin_shufflevector(__s0_81, __s0_81, 1, 0); \ float32x2_t __rev1_81; __rev1_81 = __builtin_shufflevector(__s1_81, __s1_81, 1, 0); \ float32x2_t __rev2_81; __rev2_81 = __builtin_shufflevector(__s2_81, __s2_81, 1, 0); \ __ret_81 = __rev0_81 - __rev1_81 * __noswap_splat_lane_f32(__rev2_81, __p3_81); \ __ret_81 = __builtin_shufflevector(__ret_81, __ret_81, 1, 0); \ __ret_81; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmls_lane_s32(__p0_82, __p1_82, __p2_82, __p3_82) __extension__ ({ \ int32x2_t __ret_82; \ int32x2_t __s0_82 = __p0_82; \ int32x2_t __s1_82 = __p1_82; \ int32x2_t __s2_82 = __p2_82; \ __ret_82 = __s0_82 - __s1_82 * splat_lane_s32(__s2_82, __p3_82); \ __ret_82; \ }) #else #define vmls_lane_s32(__p0_83, __p1_83, __p2_83, __p3_83) __extension__ ({ \ int32x2_t __ret_83; \ int32x2_t __s0_83 = __p0_83; \ int32x2_t __s1_83 = __p1_83; \ int32x2_t __s2_83 = __p2_83; \ int32x2_t __rev0_83; __rev0_83 = __builtin_shufflevector(__s0_83, __s0_83, 1, 0); \ int32x2_t __rev1_83; __rev1_83 = __builtin_shufflevector(__s1_83, __s1_83, 1, 0); \ int32x2_t __rev2_83; __rev2_83 = __builtin_shufflevector(__s2_83, __s2_83, 1, 0); \ __ret_83 = __rev0_83 - __rev1_83 * __noswap_splat_lane_s32(__rev2_83, __p3_83); \ __ret_83 = __builtin_shufflevector(__ret_83, __ret_83, 1, 0); \ __ret_83; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmls_lane_s16(__p0_84, __p1_84, __p2_84, __p3_84) __extension__ ({ \ int16x4_t __ret_84; \ int16x4_t __s0_84 = __p0_84; \ int16x4_t __s1_84 = __p1_84; \ int16x4_t __s2_84 = __p2_84; \ __ret_84 = __s0_84 - __s1_84 * splat_lane_s16(__s2_84, __p3_84); \ __ret_84; \ }) #else #define vmls_lane_s16(__p0_85, __p1_85, __p2_85, __p3_85) __extension__ ({ \ int16x4_t __ret_85; \ int16x4_t __s0_85 = __p0_85; \ int16x4_t __s1_85 = __p1_85; \ int16x4_t __s2_85 = __p2_85; \ int16x4_t __rev0_85; __rev0_85 = __builtin_shufflevector(__s0_85, __s0_85, 3, 2, 1, 0); \ int16x4_t __rev1_85; __rev1_85 = __builtin_shufflevector(__s1_85, __s1_85, 3, 2, 1, 0); \ int16x4_t __rev2_85; __rev2_85 = __builtin_shufflevector(__s2_85, __s2_85, 3, 2, 1, 0); \ __ret_85 = __rev0_85 - __rev1_85 * __noswap_splat_lane_s16(__rev2_85, __p3_85); \ __ret_85 = __builtin_shufflevector(__ret_85, __ret_85, 3, 2, 1, 0); \ __ret_85; \ }) #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vmlsq_n_u32(uint32x4_t __p0, uint32x4_t __p1, uint32_t __p2) { uint32x4_t __ret; __ret = __p0 - __p1 * (uint32x4_t) {__p2, __p2, __p2, __p2}; return __ret; } #else __ai uint32x4_t vmlsq_n_u32(uint32x4_t __p0, uint32x4_t __p1, uint32_t __p2) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __rev0 - __rev1 * (uint32x4_t) {__p2, __p2, __p2, __p2}; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vmlsq_n_u16(uint16x8_t __p0, uint16x8_t __p1, uint16_t __p2) { uint16x8_t __ret; __ret = __p0 - __p1 * (uint16x8_t) {__p2, __p2, __p2, __p2, __p2, __p2, __p2, __p2}; return __ret; } #else __ai uint16x8_t vmlsq_n_u16(uint16x8_t __p0, uint16x8_t __p1, uint16_t __p2) { uint16x8_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 - __rev1 * (uint16x8_t) {__p2, __p2, __p2, __p2, __p2, __p2, __p2, __p2}; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x4_t vmlsq_n_f32(float32x4_t __p0, float32x4_t __p1, float32_t __p2) { float32x4_t __ret; __ret = __p0 - __p1 * (float32x4_t) {__p2, __p2, __p2, __p2}; return __ret; } #else __ai float32x4_t vmlsq_n_f32(float32x4_t __p0, float32x4_t __p1, float32_t __p2) { float32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __rev0 - __rev1 * (float32x4_t) {__p2, __p2, __p2, __p2}; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vmlsq_n_s32(int32x4_t __p0, int32x4_t __p1, int32_t __p2) { int32x4_t __ret; __ret = __p0 - __p1 * (int32x4_t) {__p2, __p2, __p2, __p2}; return __ret; } #else __ai int32x4_t vmlsq_n_s32(int32x4_t __p0, int32x4_t __p1, int32_t __p2) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __rev0 - __rev1 * (int32x4_t) {__p2, __p2, __p2, __p2}; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vmlsq_n_s16(int16x8_t __p0, int16x8_t __p1, int16_t __p2) { int16x8_t __ret; __ret = __p0 - __p1 * (int16x8_t) {__p2, __p2, __p2, __p2, __p2, __p2, __p2, __p2}; return __ret; } #else __ai int16x8_t vmlsq_n_s16(int16x8_t __p0, int16x8_t __p1, int16_t __p2) { int16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 - __rev1 * (int16x8_t) {__p2, __p2, __p2, __p2, __p2, __p2, __p2, __p2}; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vmls_n_u32(uint32x2_t __p0, uint32x2_t __p1, uint32_t __p2) { uint32x2_t __ret; __ret = __p0 - __p1 * (uint32x2_t) {__p2, __p2}; return __ret; } #else __ai uint32x2_t vmls_n_u32(uint32x2_t __p0, uint32x2_t __p1, uint32_t __p2) { uint32x2_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __rev0 - __rev1 * (uint32x2_t) {__p2, __p2}; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vmls_n_u16(uint16x4_t __p0, uint16x4_t __p1, uint16_t __p2) { uint16x4_t __ret; __ret = __p0 - __p1 * (uint16x4_t) {__p2, __p2, __p2, __p2}; return __ret; } #else __ai uint16x4_t vmls_n_u16(uint16x4_t __p0, uint16x4_t __p1, uint16_t __p2) { uint16x4_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __rev0 - __rev1 * (uint16x4_t) {__p2, __p2, __p2, __p2}; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x2_t vmls_n_f32(float32x2_t __p0, float32x2_t __p1, float32_t __p2) { float32x2_t __ret; __ret = __p0 - __p1 * (float32x2_t) {__p2, __p2}; return __ret; } #else __ai float32x2_t vmls_n_f32(float32x2_t __p0, float32x2_t __p1, float32_t __p2) { float32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __rev0 - __rev1 * (float32x2_t) {__p2, __p2}; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x2_t vmls_n_s32(int32x2_t __p0, int32x2_t __p1, int32_t __p2) { int32x2_t __ret; __ret = __p0 - __p1 * (int32x2_t) {__p2, __p2}; return __ret; } #else __ai int32x2_t vmls_n_s32(int32x2_t __p0, int32x2_t __p1, int32_t __p2) { int32x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __rev0 - __rev1 * (int32x2_t) {__p2, __p2}; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vmls_n_s16(int16x4_t __p0, int16x4_t __p1, int16_t __p2) { int16x4_t __ret; __ret = __p0 - __p1 * (int16x4_t) {__p2, __p2, __p2, __p2}; return __ret; } #else __ai int16x4_t vmls_n_s16(int16x4_t __p0, int16x4_t __p1, int16_t __p2) { int16x4_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __rev0 - __rev1 * (int16x4_t) {__p2, __p2, __p2, __p2}; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly8x8_t vmov_n_p8(poly8_t __p0) { poly8x8_t __ret; __ret = (poly8x8_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; return __ret; } #else __ai poly8x8_t vmov_n_p8(poly8_t __p0) { poly8x8_t __ret; __ret = (poly8x8_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly16x4_t vmov_n_p16(poly16_t __p0) { poly16x4_t __ret; __ret = (poly16x4_t) {__p0, __p0, __p0, __p0}; return __ret; } #else __ai poly16x4_t vmov_n_p16(poly16_t __p0) { poly16x4_t __ret; __ret = (poly16x4_t) {__p0, __p0, __p0, __p0}; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly8x16_t vmovq_n_p8(poly8_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; return __ret; } #else __ai poly8x16_t vmovq_n_p8(poly8_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly16x8_t vmovq_n_p16(poly16_t __p0) { poly16x8_t __ret; __ret = (poly16x8_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; return __ret; } #else __ai poly16x8_t vmovq_n_p16(poly16_t __p0) { poly16x8_t __ret; __ret = (poly16x8_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vmovq_n_u8(uint8_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; return __ret; } #else __ai uint8x16_t vmovq_n_u8(uint8_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vmovq_n_u32(uint32_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t) {__p0, __p0, __p0, __p0}; return __ret; } #else __ai uint32x4_t vmovq_n_u32(uint32_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t) {__p0, __p0, __p0, __p0}; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vmovq_n_u64(uint64_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t) {__p0, __p0}; return __ret; } #else __ai uint64x2_t vmovq_n_u64(uint64_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t) {__p0, __p0}; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vmovq_n_u16(uint16_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; return __ret; } #else __ai uint16x8_t vmovq_n_u16(uint16_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x16_t vmovq_n_s8(int8_t __p0) { int8x16_t __ret; __ret = (int8x16_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; return __ret; } #else __ai int8x16_t vmovq_n_s8(int8_t __p0) { int8x16_t __ret; __ret = (int8x16_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x4_t vmovq_n_f32(float32_t __p0) { float32x4_t __ret; __ret = (float32x4_t) {__p0, __p0, __p0, __p0}; return __ret; } #else __ai float32x4_t vmovq_n_f32(float32_t __p0) { float32x4_t __ret; __ret = (float32x4_t) {__p0, __p0, __p0, __p0}; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ #define vmovq_n_f16(__p0) __extension__ ({ \ float16x8_t __ret; \ float16_t __s0 = __p0; \ __ret = (float16x8_t) {__s0, __s0, __s0, __s0, __s0, __s0, __s0, __s0}; \ __ret; \ }) #else #define vmovq_n_f16(__p0) __extension__ ({ \ float16x8_t __ret; \ float16_t __s0 = __p0; \ __ret = (float16x8_t) {__s0, __s0, __s0, __s0, __s0, __s0, __s0, __s0}; \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vmovq_n_s32(int32_t __p0) { int32x4_t __ret; __ret = (int32x4_t) {__p0, __p0, __p0, __p0}; return __ret; } #else __ai int32x4_t vmovq_n_s32(int32_t __p0) { int32x4_t __ret; __ret = (int32x4_t) {__p0, __p0, __p0, __p0}; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vmovq_n_s64(int64_t __p0) { int64x2_t __ret; __ret = (int64x2_t) {__p0, __p0}; return __ret; } #else __ai int64x2_t vmovq_n_s64(int64_t __p0) { int64x2_t __ret; __ret = (int64x2_t) {__p0, __p0}; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vmovq_n_s16(int16_t __p0) { int16x8_t __ret; __ret = (int16x8_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; return __ret; } #else __ai int16x8_t vmovq_n_s16(int16_t __p0) { int16x8_t __ret; __ret = (int16x8_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vmov_n_u8(uint8_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; return __ret; } #else __ai uint8x8_t vmov_n_u8(uint8_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vmov_n_u32(uint32_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t) {__p0, __p0}; return __ret; } #else __ai uint32x2_t vmov_n_u32(uint32_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t) {__p0, __p0}; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai uint64x1_t vmov_n_u64(uint64_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t) {__p0}; return __ret; } #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vmov_n_u16(uint16_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t) {__p0, __p0, __p0, __p0}; return __ret; } #else __ai uint16x4_t vmov_n_u16(uint16_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t) {__p0, __p0, __p0, __p0}; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8_t vmov_n_s8(int8_t __p0) { int8x8_t __ret; __ret = (int8x8_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; return __ret; } #else __ai int8x8_t vmov_n_s8(int8_t __p0) { int8x8_t __ret; __ret = (int8x8_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x2_t vmov_n_f32(float32_t __p0) { float32x2_t __ret; __ret = (float32x2_t) {__p0, __p0}; return __ret; } #else __ai float32x2_t vmov_n_f32(float32_t __p0) { float32x2_t __ret; __ret = (float32x2_t) {__p0, __p0}; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ #define vmov_n_f16(__p0) __extension__ ({ \ float16x4_t __ret; \ float16_t __s0 = __p0; \ __ret = (float16x4_t) {__s0, __s0, __s0, __s0}; \ __ret; \ }) #else #define vmov_n_f16(__p0) __extension__ ({ \ float16x4_t __ret; \ float16_t __s0 = __p0; \ __ret = (float16x4_t) {__s0, __s0, __s0, __s0}; \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ __ai int32x2_t vmov_n_s32(int32_t __p0) { int32x2_t __ret; __ret = (int32x2_t) {__p0, __p0}; return __ret; } #else __ai int32x2_t vmov_n_s32(int32_t __p0) { int32x2_t __ret; __ret = (int32x2_t) {__p0, __p0}; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai int64x1_t vmov_n_s64(int64_t __p0) { int64x1_t __ret; __ret = (int64x1_t) {__p0}; return __ret; } #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vmov_n_s16(int16_t __p0) { int16x4_t __ret; __ret = (int16x4_t) {__p0, __p0, __p0, __p0}; return __ret; } #else __ai int16x4_t vmov_n_s16(int16_t __p0) { int16x4_t __ret; __ret = (int16x4_t) {__p0, __p0, __p0, __p0}; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vmovl_u8(uint8x8_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vmovl_v((int8x8_t)__p0, 49); return __ret; } #else __ai uint16x8_t vmovl_u8(uint8x8_t __p0) { uint16x8_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint16x8_t) __builtin_neon_vmovl_v((int8x8_t)__rev0, 49); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } __ai uint16x8_t __noswap_vmovl_u8(uint8x8_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vmovl_v((int8x8_t)__p0, 49); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vmovl_u32(uint32x2_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t) __builtin_neon_vmovl_v((int8x8_t)__p0, 51); return __ret; } #else __ai uint64x2_t vmovl_u32(uint32x2_t __p0) { uint64x2_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (uint64x2_t) __builtin_neon_vmovl_v((int8x8_t)__rev0, 51); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } __ai uint64x2_t __noswap_vmovl_u32(uint32x2_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t) __builtin_neon_vmovl_v((int8x8_t)__p0, 51); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vmovl_u16(uint16x4_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vmovl_v((int8x8_t)__p0, 50); return __ret; } #else __ai uint32x4_t vmovl_u16(uint16x4_t __p0) { uint32x4_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (uint32x4_t) __builtin_neon_vmovl_v((int8x8_t)__rev0, 50); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai uint32x4_t __noswap_vmovl_u16(uint16x4_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vmovl_v((int8x8_t)__p0, 50); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vmovl_s8(int8x8_t __p0) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vmovl_v((int8x8_t)__p0, 33); return __ret; } #else __ai int16x8_t vmovl_s8(int8x8_t __p0) { int16x8_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int16x8_t) __builtin_neon_vmovl_v((int8x8_t)__rev0, 33); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } __ai int16x8_t __noswap_vmovl_s8(int8x8_t __p0) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vmovl_v((int8x8_t)__p0, 33); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vmovl_s32(int32x2_t __p0) { int64x2_t __ret; __ret = (int64x2_t) __builtin_neon_vmovl_v((int8x8_t)__p0, 35); return __ret; } #else __ai int64x2_t vmovl_s32(int32x2_t __p0) { int64x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (int64x2_t) __builtin_neon_vmovl_v((int8x8_t)__rev0, 35); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } __ai int64x2_t __noswap_vmovl_s32(int32x2_t __p0) { int64x2_t __ret; __ret = (int64x2_t) __builtin_neon_vmovl_v((int8x8_t)__p0, 35); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vmovl_s16(int16x4_t __p0) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vmovl_v((int8x8_t)__p0, 34); return __ret; } #else __ai int32x4_t vmovl_s16(int16x4_t __p0) { int32x4_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (int32x4_t) __builtin_neon_vmovl_v((int8x8_t)__rev0, 34); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai int32x4_t __noswap_vmovl_s16(int16x4_t __p0) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vmovl_v((int8x8_t)__p0, 34); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vmovn_u32(uint32x4_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vmovn_v((int8x16_t)__p0, 17); return __ret; } #else __ai uint16x4_t vmovn_u32(uint32x4_t __p0) { uint16x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (uint16x4_t) __builtin_neon_vmovn_v((int8x16_t)__rev0, 17); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai uint16x4_t __noswap_vmovn_u32(uint32x4_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vmovn_v((int8x16_t)__p0, 17); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vmovn_u64(uint64x2_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vmovn_v((int8x16_t)__p0, 18); return __ret; } #else __ai uint32x2_t vmovn_u64(uint64x2_t __p0) { uint32x2_t __ret; uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (uint32x2_t) __builtin_neon_vmovn_v((int8x16_t)__rev0, 18); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } __ai uint32x2_t __noswap_vmovn_u64(uint64x2_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vmovn_v((int8x16_t)__p0, 18); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vmovn_u16(uint16x8_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vmovn_v((int8x16_t)__p0, 16); return __ret; } #else __ai uint8x8_t vmovn_u16(uint16x8_t __p0) { uint8x8_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x8_t) __builtin_neon_vmovn_v((int8x16_t)__rev0, 16); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } __ai uint8x8_t __noswap_vmovn_u16(uint16x8_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vmovn_v((int8x16_t)__p0, 16); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vmovn_s32(int32x4_t __p0) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vmovn_v((int8x16_t)__p0, 1); return __ret; } #else __ai int16x4_t vmovn_s32(int32x4_t __p0) { int16x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (int16x4_t) __builtin_neon_vmovn_v((int8x16_t)__rev0, 1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai int16x4_t __noswap_vmovn_s32(int32x4_t __p0) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vmovn_v((int8x16_t)__p0, 1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x2_t vmovn_s64(int64x2_t __p0) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vmovn_v((int8x16_t)__p0, 2); return __ret; } #else __ai int32x2_t vmovn_s64(int64x2_t __p0) { int32x2_t __ret; int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (int32x2_t) __builtin_neon_vmovn_v((int8x16_t)__rev0, 2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } __ai int32x2_t __noswap_vmovn_s64(int64x2_t __p0) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vmovn_v((int8x16_t)__p0, 2); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8_t vmovn_s16(int16x8_t __p0) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vmovn_v((int8x16_t)__p0, 0); return __ret; } #else __ai int8x8_t vmovn_s16(int16x8_t __p0) { int8x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x8_t) __builtin_neon_vmovn_v((int8x16_t)__rev0, 0); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } __ai int8x8_t __noswap_vmovn_s16(int16x8_t __p0) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vmovn_v((int8x16_t)__p0, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vmulq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; __ret = __p0 * __p1; return __ret; } #else __ai uint8x16_t vmulq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 * __rev1; __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vmulq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; __ret = __p0 * __p1; return __ret; } #else __ai uint32x4_t vmulq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __rev0 * __rev1; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vmulq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; __ret = __p0 * __p1; return __ret; } #else __ai uint16x8_t vmulq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 * __rev1; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x16_t vmulq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; __ret = __p0 * __p1; return __ret; } #else __ai int8x16_t vmulq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 * __rev1; __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x4_t vmulq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; __ret = __p0 * __p1; return __ret; } #else __ai float32x4_t vmulq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __rev0 * __rev1; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vmulq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; __ret = __p0 * __p1; return __ret; } #else __ai int32x4_t vmulq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __rev0 * __rev1; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vmulq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; __ret = __p0 * __p1; return __ret; } #else __ai int16x8_t vmulq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 * __rev1; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vmul_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; __ret = __p0 * __p1; return __ret; } #else __ai uint8x8_t vmul_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 * __rev1; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vmul_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; __ret = __p0 * __p1; return __ret; } #else __ai uint32x2_t vmul_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __rev0 * __rev1; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vmul_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; __ret = __p0 * __p1; return __ret; } #else __ai uint16x4_t vmul_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __rev0 * __rev1; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8_t vmul_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; __ret = __p0 * __p1; return __ret; } #else __ai int8x8_t vmul_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 * __rev1; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x2_t vmul_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; __ret = __p0 * __p1; return __ret; } #else __ai float32x2_t vmul_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __rev0 * __rev1; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x2_t vmul_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; __ret = __p0 * __p1; return __ret; } #else __ai int32x2_t vmul_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __rev0 * __rev1; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vmul_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; __ret = __p0 * __p1; return __ret; } #else __ai int16x4_t vmul_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __rev0 * __rev1; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly8x8_t vmul_p8(poly8x8_t __p0, poly8x8_t __p1) { poly8x8_t __ret; __ret = (poly8x8_t) __builtin_neon_vmul_v((int8x8_t)__p0, (int8x8_t)__p1, 4); return __ret; } #else __ai poly8x8_t vmul_p8(poly8x8_t __p0, poly8x8_t __p1) { poly8x8_t __ret; poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (poly8x8_t) __builtin_neon_vmul_v((int8x8_t)__rev0, (int8x8_t)__rev1, 4); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly8x16_t vmulq_p8(poly8x16_t __p0, poly8x16_t __p1) { poly8x16_t __ret; __ret = (poly8x16_t) __builtin_neon_vmulq_v((int8x16_t)__p0, (int8x16_t)__p1, 36); return __ret; } #else __ai poly8x16_t vmulq_p8(poly8x16_t __p0, poly8x16_t __p1) { poly8x16_t __ret; poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (poly8x16_t) __builtin_neon_vmulq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 36); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ #define vmulq_lane_u32(__p0_86, __p1_86, __p2_86) __extension__ ({ \ uint32x4_t __ret_86; \ uint32x4_t __s0_86 = __p0_86; \ uint32x2_t __s1_86 = __p1_86; \ __ret_86 = __s0_86 * splatq_lane_u32(__s1_86, __p2_86); \ __ret_86; \ }) #else #define vmulq_lane_u32(__p0_87, __p1_87, __p2_87) __extension__ ({ \ uint32x4_t __ret_87; \ uint32x4_t __s0_87 = __p0_87; \ uint32x2_t __s1_87 = __p1_87; \ uint32x4_t __rev0_87; __rev0_87 = __builtin_shufflevector(__s0_87, __s0_87, 3, 2, 1, 0); \ uint32x2_t __rev1_87; __rev1_87 = __builtin_shufflevector(__s1_87, __s1_87, 1, 0); \ __ret_87 = __rev0_87 * __noswap_splatq_lane_u32(__rev1_87, __p2_87); \ __ret_87 = __builtin_shufflevector(__ret_87, __ret_87, 3, 2, 1, 0); \ __ret_87; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmulq_lane_u16(__p0_88, __p1_88, __p2_88) __extension__ ({ \ uint16x8_t __ret_88; \ uint16x8_t __s0_88 = __p0_88; \ uint16x4_t __s1_88 = __p1_88; \ __ret_88 = __s0_88 * splatq_lane_u16(__s1_88, __p2_88); \ __ret_88; \ }) #else #define vmulq_lane_u16(__p0_89, __p1_89, __p2_89) __extension__ ({ \ uint16x8_t __ret_89; \ uint16x8_t __s0_89 = __p0_89; \ uint16x4_t __s1_89 = __p1_89; \ uint16x8_t __rev0_89; __rev0_89 = __builtin_shufflevector(__s0_89, __s0_89, 7, 6, 5, 4, 3, 2, 1, 0); \ uint16x4_t __rev1_89; __rev1_89 = __builtin_shufflevector(__s1_89, __s1_89, 3, 2, 1, 0); \ __ret_89 = __rev0_89 * __noswap_splatq_lane_u16(__rev1_89, __p2_89); \ __ret_89 = __builtin_shufflevector(__ret_89, __ret_89, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_89; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmulq_lane_f32(__p0_90, __p1_90, __p2_90) __extension__ ({ \ float32x4_t __ret_90; \ float32x4_t __s0_90 = __p0_90; \ float32x2_t __s1_90 = __p1_90; \ __ret_90 = __s0_90 * splatq_lane_f32(__s1_90, __p2_90); \ __ret_90; \ }) #else #define vmulq_lane_f32(__p0_91, __p1_91, __p2_91) __extension__ ({ \ float32x4_t __ret_91; \ float32x4_t __s0_91 = __p0_91; \ float32x2_t __s1_91 = __p1_91; \ float32x4_t __rev0_91; __rev0_91 = __builtin_shufflevector(__s0_91, __s0_91, 3, 2, 1, 0); \ float32x2_t __rev1_91; __rev1_91 = __builtin_shufflevector(__s1_91, __s1_91, 1, 0); \ __ret_91 = __rev0_91 * __noswap_splatq_lane_f32(__rev1_91, __p2_91); \ __ret_91 = __builtin_shufflevector(__ret_91, __ret_91, 3, 2, 1, 0); \ __ret_91; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmulq_lane_s32(__p0_92, __p1_92, __p2_92) __extension__ ({ \ int32x4_t __ret_92; \ int32x4_t __s0_92 = __p0_92; \ int32x2_t __s1_92 = __p1_92; \ __ret_92 = __s0_92 * splatq_lane_s32(__s1_92, __p2_92); \ __ret_92; \ }) #else #define vmulq_lane_s32(__p0_93, __p1_93, __p2_93) __extension__ ({ \ int32x4_t __ret_93; \ int32x4_t __s0_93 = __p0_93; \ int32x2_t __s1_93 = __p1_93; \ int32x4_t __rev0_93; __rev0_93 = __builtin_shufflevector(__s0_93, __s0_93, 3, 2, 1, 0); \ int32x2_t __rev1_93; __rev1_93 = __builtin_shufflevector(__s1_93, __s1_93, 1, 0); \ __ret_93 = __rev0_93 * __noswap_splatq_lane_s32(__rev1_93, __p2_93); \ __ret_93 = __builtin_shufflevector(__ret_93, __ret_93, 3, 2, 1, 0); \ __ret_93; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmulq_lane_s16(__p0_94, __p1_94, __p2_94) __extension__ ({ \ int16x8_t __ret_94; \ int16x8_t __s0_94 = __p0_94; \ int16x4_t __s1_94 = __p1_94; \ __ret_94 = __s0_94 * splatq_lane_s16(__s1_94, __p2_94); \ __ret_94; \ }) #else #define vmulq_lane_s16(__p0_95, __p1_95, __p2_95) __extension__ ({ \ int16x8_t __ret_95; \ int16x8_t __s0_95 = __p0_95; \ int16x4_t __s1_95 = __p1_95; \ int16x8_t __rev0_95; __rev0_95 = __builtin_shufflevector(__s0_95, __s0_95, 7, 6, 5, 4, 3, 2, 1, 0); \ int16x4_t __rev1_95; __rev1_95 = __builtin_shufflevector(__s1_95, __s1_95, 3, 2, 1, 0); \ __ret_95 = __rev0_95 * __noswap_splatq_lane_s16(__rev1_95, __p2_95); \ __ret_95 = __builtin_shufflevector(__ret_95, __ret_95, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_95; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmul_lane_u32(__p0_96, __p1_96, __p2_96) __extension__ ({ \ uint32x2_t __ret_96; \ uint32x2_t __s0_96 = __p0_96; \ uint32x2_t __s1_96 = __p1_96; \ __ret_96 = __s0_96 * splat_lane_u32(__s1_96, __p2_96); \ __ret_96; \ }) #else #define vmul_lane_u32(__p0_97, __p1_97, __p2_97) __extension__ ({ \ uint32x2_t __ret_97; \ uint32x2_t __s0_97 = __p0_97; \ uint32x2_t __s1_97 = __p1_97; \ uint32x2_t __rev0_97; __rev0_97 = __builtin_shufflevector(__s0_97, __s0_97, 1, 0); \ uint32x2_t __rev1_97; __rev1_97 = __builtin_shufflevector(__s1_97, __s1_97, 1, 0); \ __ret_97 = __rev0_97 * __noswap_splat_lane_u32(__rev1_97, __p2_97); \ __ret_97 = __builtin_shufflevector(__ret_97, __ret_97, 1, 0); \ __ret_97; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmul_lane_u16(__p0_98, __p1_98, __p2_98) __extension__ ({ \ uint16x4_t __ret_98; \ uint16x4_t __s0_98 = __p0_98; \ uint16x4_t __s1_98 = __p1_98; \ __ret_98 = __s0_98 * splat_lane_u16(__s1_98, __p2_98); \ __ret_98; \ }) #else #define vmul_lane_u16(__p0_99, __p1_99, __p2_99) __extension__ ({ \ uint16x4_t __ret_99; \ uint16x4_t __s0_99 = __p0_99; \ uint16x4_t __s1_99 = __p1_99; \ uint16x4_t __rev0_99; __rev0_99 = __builtin_shufflevector(__s0_99, __s0_99, 3, 2, 1, 0); \ uint16x4_t __rev1_99; __rev1_99 = __builtin_shufflevector(__s1_99, __s1_99, 3, 2, 1, 0); \ __ret_99 = __rev0_99 * __noswap_splat_lane_u16(__rev1_99, __p2_99); \ __ret_99 = __builtin_shufflevector(__ret_99, __ret_99, 3, 2, 1, 0); \ __ret_99; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmul_lane_f32(__p0_100, __p1_100, __p2_100) __extension__ ({ \ float32x2_t __ret_100; \ float32x2_t __s0_100 = __p0_100; \ float32x2_t __s1_100 = __p1_100; \ __ret_100 = __s0_100 * splat_lane_f32(__s1_100, __p2_100); \ __ret_100; \ }) #else #define vmul_lane_f32(__p0_101, __p1_101, __p2_101) __extension__ ({ \ float32x2_t __ret_101; \ float32x2_t __s0_101 = __p0_101; \ float32x2_t __s1_101 = __p1_101; \ float32x2_t __rev0_101; __rev0_101 = __builtin_shufflevector(__s0_101, __s0_101, 1, 0); \ float32x2_t __rev1_101; __rev1_101 = __builtin_shufflevector(__s1_101, __s1_101, 1, 0); \ __ret_101 = __rev0_101 * __noswap_splat_lane_f32(__rev1_101, __p2_101); \ __ret_101 = __builtin_shufflevector(__ret_101, __ret_101, 1, 0); \ __ret_101; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmul_lane_s32(__p0_102, __p1_102, __p2_102) __extension__ ({ \ int32x2_t __ret_102; \ int32x2_t __s0_102 = __p0_102; \ int32x2_t __s1_102 = __p1_102; \ __ret_102 = __s0_102 * splat_lane_s32(__s1_102, __p2_102); \ __ret_102; \ }) #else #define vmul_lane_s32(__p0_103, __p1_103, __p2_103) __extension__ ({ \ int32x2_t __ret_103; \ int32x2_t __s0_103 = __p0_103; \ int32x2_t __s1_103 = __p1_103; \ int32x2_t __rev0_103; __rev0_103 = __builtin_shufflevector(__s0_103, __s0_103, 1, 0); \ int32x2_t __rev1_103; __rev1_103 = __builtin_shufflevector(__s1_103, __s1_103, 1, 0); \ __ret_103 = __rev0_103 * __noswap_splat_lane_s32(__rev1_103, __p2_103); \ __ret_103 = __builtin_shufflevector(__ret_103, __ret_103, 1, 0); \ __ret_103; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmul_lane_s16(__p0_104, __p1_104, __p2_104) __extension__ ({ \ int16x4_t __ret_104; \ int16x4_t __s0_104 = __p0_104; \ int16x4_t __s1_104 = __p1_104; \ __ret_104 = __s0_104 * splat_lane_s16(__s1_104, __p2_104); \ __ret_104; \ }) #else #define vmul_lane_s16(__p0_105, __p1_105, __p2_105) __extension__ ({ \ int16x4_t __ret_105; \ int16x4_t __s0_105 = __p0_105; \ int16x4_t __s1_105 = __p1_105; \ int16x4_t __rev0_105; __rev0_105 = __builtin_shufflevector(__s0_105, __s0_105, 3, 2, 1, 0); \ int16x4_t __rev1_105; __rev1_105 = __builtin_shufflevector(__s1_105, __s1_105, 3, 2, 1, 0); \ __ret_105 = __rev0_105 * __noswap_splat_lane_s16(__rev1_105, __p2_105); \ __ret_105 = __builtin_shufflevector(__ret_105, __ret_105, 3, 2, 1, 0); \ __ret_105; \ }) #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vmulq_n_u32(uint32x4_t __p0, uint32_t __p1) { uint32x4_t __ret; __ret = __p0 * (uint32x4_t) {__p1, __p1, __p1, __p1}; return __ret; } #else __ai uint32x4_t vmulq_n_u32(uint32x4_t __p0, uint32_t __p1) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = __rev0 * (uint32x4_t) {__p1, __p1, __p1, __p1}; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vmulq_n_u16(uint16x8_t __p0, uint16_t __p1) { uint16x8_t __ret; __ret = __p0 * (uint16x8_t) {__p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1}; return __ret; } #else __ai uint16x8_t vmulq_n_u16(uint16x8_t __p0, uint16_t __p1) { uint16x8_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 * (uint16x8_t) {__p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1}; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x4_t vmulq_n_f32(float32x4_t __p0, float32_t __p1) { float32x4_t __ret; __ret = __p0 * (float32x4_t) {__p1, __p1, __p1, __p1}; return __ret; } #else __ai float32x4_t vmulq_n_f32(float32x4_t __p0, float32_t __p1) { float32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = __rev0 * (float32x4_t) {__p1, __p1, __p1, __p1}; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vmulq_n_s32(int32x4_t __p0, int32_t __p1) { int32x4_t __ret; __ret = __p0 * (int32x4_t) {__p1, __p1, __p1, __p1}; return __ret; } #else __ai int32x4_t vmulq_n_s32(int32x4_t __p0, int32_t __p1) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = __rev0 * (int32x4_t) {__p1, __p1, __p1, __p1}; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vmulq_n_s16(int16x8_t __p0, int16_t __p1) { int16x8_t __ret; __ret = __p0 * (int16x8_t) {__p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1}; return __ret; } #else __ai int16x8_t vmulq_n_s16(int16x8_t __p0, int16_t __p1) { int16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 * (int16x8_t) {__p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1}; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vmul_n_u32(uint32x2_t __p0, uint32_t __p1) { uint32x2_t __ret; __ret = __p0 * (uint32x2_t) {__p1, __p1}; return __ret; } #else __ai uint32x2_t vmul_n_u32(uint32x2_t __p0, uint32_t __p1) { uint32x2_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = __rev0 * (uint32x2_t) {__p1, __p1}; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vmul_n_u16(uint16x4_t __p0, uint16_t __p1) { uint16x4_t __ret; __ret = __p0 * (uint16x4_t) {__p1, __p1, __p1, __p1}; return __ret; } #else __ai uint16x4_t vmul_n_u16(uint16x4_t __p0, uint16_t __p1) { uint16x4_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = __rev0 * (uint16x4_t) {__p1, __p1, __p1, __p1}; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x2_t vmul_n_f32(float32x2_t __p0, float32_t __p1) { float32x2_t __ret; __ret = __p0 * (float32x2_t) {__p1, __p1}; return __ret; } #else __ai float32x2_t vmul_n_f32(float32x2_t __p0, float32_t __p1) { float32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = __rev0 * (float32x2_t) {__p1, __p1}; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x2_t vmul_n_s32(int32x2_t __p0, int32_t __p1) { int32x2_t __ret; __ret = __p0 * (int32x2_t) {__p1, __p1}; return __ret; } #else __ai int32x2_t vmul_n_s32(int32x2_t __p0, int32_t __p1) { int32x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = __rev0 * (int32x2_t) {__p1, __p1}; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vmul_n_s16(int16x4_t __p0, int16_t __p1) { int16x4_t __ret; __ret = __p0 * (int16x4_t) {__p1, __p1, __p1, __p1}; return __ret; } #else __ai int16x4_t vmul_n_s16(int16x4_t __p0, int16_t __p1) { int16x4_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = __rev0 * (int16x4_t) {__p1, __p1, __p1, __p1}; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly16x8_t vmull_p8(poly8x8_t __p0, poly8x8_t __p1) { poly16x8_t __ret; __ret = (poly16x8_t) __builtin_neon_vmull_v((int8x8_t)__p0, (int8x8_t)__p1, 37); return __ret; } #else __ai poly16x8_t vmull_p8(poly8x8_t __p0, poly8x8_t __p1) { poly16x8_t __ret; poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (poly16x8_t) __builtin_neon_vmull_v((int8x8_t)__rev0, (int8x8_t)__rev1, 37); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } __ai poly16x8_t __noswap_vmull_p8(poly8x8_t __p0, poly8x8_t __p1) { poly16x8_t __ret; __ret = (poly16x8_t) __builtin_neon_vmull_v((int8x8_t)__p0, (int8x8_t)__p1, 37); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vmull_u8(uint8x8_t __p0, uint8x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vmull_v((int8x8_t)__p0, (int8x8_t)__p1, 49); return __ret; } #else __ai uint16x8_t vmull_u8(uint8x8_t __p0, uint8x8_t __p1) { uint16x8_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint16x8_t) __builtin_neon_vmull_v((int8x8_t)__rev0, (int8x8_t)__rev1, 49); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } __ai uint16x8_t __noswap_vmull_u8(uint8x8_t __p0, uint8x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vmull_v((int8x8_t)__p0, (int8x8_t)__p1, 49); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vmull_u32(uint32x2_t __p0, uint32x2_t __p1) { uint64x2_t __ret; __ret = (uint64x2_t) __builtin_neon_vmull_v((int8x8_t)__p0, (int8x8_t)__p1, 51); return __ret; } #else __ai uint64x2_t vmull_u32(uint32x2_t __p0, uint32x2_t __p1) { uint64x2_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint64x2_t) __builtin_neon_vmull_v((int8x8_t)__rev0, (int8x8_t)__rev1, 51); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } __ai uint64x2_t __noswap_vmull_u32(uint32x2_t __p0, uint32x2_t __p1) { uint64x2_t __ret; __ret = (uint64x2_t) __builtin_neon_vmull_v((int8x8_t)__p0, (int8x8_t)__p1, 51); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vmull_u16(uint16x4_t __p0, uint16x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vmull_v((int8x8_t)__p0, (int8x8_t)__p1, 50); return __ret; } #else __ai uint32x4_t vmull_u16(uint16x4_t __p0, uint16x4_t __p1) { uint32x4_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint32x4_t) __builtin_neon_vmull_v((int8x8_t)__rev0, (int8x8_t)__rev1, 50); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai uint32x4_t __noswap_vmull_u16(uint16x4_t __p0, uint16x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vmull_v((int8x8_t)__p0, (int8x8_t)__p1, 50); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vmull_s8(int8x8_t __p0, int8x8_t __p1) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vmull_v((int8x8_t)__p0, (int8x8_t)__p1, 33); return __ret; } #else __ai int16x8_t vmull_s8(int8x8_t __p0, int8x8_t __p1) { int16x8_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int16x8_t) __builtin_neon_vmull_v((int8x8_t)__rev0, (int8x8_t)__rev1, 33); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } __ai int16x8_t __noswap_vmull_s8(int8x8_t __p0, int8x8_t __p1) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vmull_v((int8x8_t)__p0, (int8x8_t)__p1, 33); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vmull_s32(int32x2_t __p0, int32x2_t __p1) { int64x2_t __ret; __ret = (int64x2_t) __builtin_neon_vmull_v((int8x8_t)__p0, (int8x8_t)__p1, 35); return __ret; } #else __ai int64x2_t vmull_s32(int32x2_t __p0, int32x2_t __p1) { int64x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (int64x2_t) __builtin_neon_vmull_v((int8x8_t)__rev0, (int8x8_t)__rev1, 35); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } __ai int64x2_t __noswap_vmull_s32(int32x2_t __p0, int32x2_t __p1) { int64x2_t __ret; __ret = (int64x2_t) __builtin_neon_vmull_v((int8x8_t)__p0, (int8x8_t)__p1, 35); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vmull_s16(int16x4_t __p0, int16x4_t __p1) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vmull_v((int8x8_t)__p0, (int8x8_t)__p1, 34); return __ret; } #else __ai int32x4_t vmull_s16(int16x4_t __p0, int16x4_t __p1) { int32x4_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (int32x4_t) __builtin_neon_vmull_v((int8x8_t)__rev0, (int8x8_t)__rev1, 34); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai int32x4_t __noswap_vmull_s16(int16x4_t __p0, int16x4_t __p1) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vmull_v((int8x8_t)__p0, (int8x8_t)__p1, 34); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ #define vmull_lane_u32(__p0_106, __p1_106, __p2_106) __extension__ ({ \ uint64x2_t __ret_106; \ uint32x2_t __s0_106 = __p0_106; \ uint32x2_t __s1_106 = __p1_106; \ __ret_106 = vmull_u32(__s0_106, splat_lane_u32(__s1_106, __p2_106)); \ __ret_106; \ }) #else #define vmull_lane_u32(__p0_107, __p1_107, __p2_107) __extension__ ({ \ uint64x2_t __ret_107; \ uint32x2_t __s0_107 = __p0_107; \ uint32x2_t __s1_107 = __p1_107; \ uint32x2_t __rev0_107; __rev0_107 = __builtin_shufflevector(__s0_107, __s0_107, 1, 0); \ uint32x2_t __rev1_107; __rev1_107 = __builtin_shufflevector(__s1_107, __s1_107, 1, 0); \ __ret_107 = __noswap_vmull_u32(__rev0_107, __noswap_splat_lane_u32(__rev1_107, __p2_107)); \ __ret_107 = __builtin_shufflevector(__ret_107, __ret_107, 1, 0); \ __ret_107; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmull_lane_u16(__p0_108, __p1_108, __p2_108) __extension__ ({ \ uint32x4_t __ret_108; \ uint16x4_t __s0_108 = __p0_108; \ uint16x4_t __s1_108 = __p1_108; \ __ret_108 = vmull_u16(__s0_108, splat_lane_u16(__s1_108, __p2_108)); \ __ret_108; \ }) #else #define vmull_lane_u16(__p0_109, __p1_109, __p2_109) __extension__ ({ \ uint32x4_t __ret_109; \ uint16x4_t __s0_109 = __p0_109; \ uint16x4_t __s1_109 = __p1_109; \ uint16x4_t __rev0_109; __rev0_109 = __builtin_shufflevector(__s0_109, __s0_109, 3, 2, 1, 0); \ uint16x4_t __rev1_109; __rev1_109 = __builtin_shufflevector(__s1_109, __s1_109, 3, 2, 1, 0); \ __ret_109 = __noswap_vmull_u16(__rev0_109, __noswap_splat_lane_u16(__rev1_109, __p2_109)); \ __ret_109 = __builtin_shufflevector(__ret_109, __ret_109, 3, 2, 1, 0); \ __ret_109; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmull_lane_s32(__p0_110, __p1_110, __p2_110) __extension__ ({ \ int64x2_t __ret_110; \ int32x2_t __s0_110 = __p0_110; \ int32x2_t __s1_110 = __p1_110; \ __ret_110 = vmull_s32(__s0_110, splat_lane_s32(__s1_110, __p2_110)); \ __ret_110; \ }) #else #define vmull_lane_s32(__p0_111, __p1_111, __p2_111) __extension__ ({ \ int64x2_t __ret_111; \ int32x2_t __s0_111 = __p0_111; \ int32x2_t __s1_111 = __p1_111; \ int32x2_t __rev0_111; __rev0_111 = __builtin_shufflevector(__s0_111, __s0_111, 1, 0); \ int32x2_t __rev1_111; __rev1_111 = __builtin_shufflevector(__s1_111, __s1_111, 1, 0); \ __ret_111 = __noswap_vmull_s32(__rev0_111, __noswap_splat_lane_s32(__rev1_111, __p2_111)); \ __ret_111 = __builtin_shufflevector(__ret_111, __ret_111, 1, 0); \ __ret_111; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmull_lane_s16(__p0_112, __p1_112, __p2_112) __extension__ ({ \ int32x4_t __ret_112; \ int16x4_t __s0_112 = __p0_112; \ int16x4_t __s1_112 = __p1_112; \ __ret_112 = vmull_s16(__s0_112, splat_lane_s16(__s1_112, __p2_112)); \ __ret_112; \ }) #else #define vmull_lane_s16(__p0_113, __p1_113, __p2_113) __extension__ ({ \ int32x4_t __ret_113; \ int16x4_t __s0_113 = __p0_113; \ int16x4_t __s1_113 = __p1_113; \ int16x4_t __rev0_113; __rev0_113 = __builtin_shufflevector(__s0_113, __s0_113, 3, 2, 1, 0); \ int16x4_t __rev1_113; __rev1_113 = __builtin_shufflevector(__s1_113, __s1_113, 3, 2, 1, 0); \ __ret_113 = __noswap_vmull_s16(__rev0_113, __noswap_splat_lane_s16(__rev1_113, __p2_113)); \ __ret_113 = __builtin_shufflevector(__ret_113, __ret_113, 3, 2, 1, 0); \ __ret_113; \ }) #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vmull_n_u32(uint32x2_t __p0, uint32_t __p1) { uint64x2_t __ret; __ret = vmull_u32(__p0, (uint32x2_t) {__p1, __p1}); return __ret; } #else __ai uint64x2_t vmull_n_u32(uint32x2_t __p0, uint32_t __p1) { uint64x2_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = __noswap_vmull_u32(__rev0, (uint32x2_t) {__p1, __p1}); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } __ai uint64x2_t __noswap_vmull_n_u32(uint32x2_t __p0, uint32_t __p1) { uint64x2_t __ret; __ret = __noswap_vmull_u32(__p0, (uint32x2_t) {__p1, __p1}); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vmull_n_u16(uint16x4_t __p0, uint16_t __p1) { uint32x4_t __ret; __ret = vmull_u16(__p0, (uint16x4_t) {__p1, __p1, __p1, __p1}); return __ret; } #else __ai uint32x4_t vmull_n_u16(uint16x4_t __p0, uint16_t __p1) { uint32x4_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = __noswap_vmull_u16(__rev0, (uint16x4_t) {__p1, __p1, __p1, __p1}); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai uint32x4_t __noswap_vmull_n_u16(uint16x4_t __p0, uint16_t __p1) { uint32x4_t __ret; __ret = __noswap_vmull_u16(__p0, (uint16x4_t) {__p1, __p1, __p1, __p1}); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vmull_n_s32(int32x2_t __p0, int32_t __p1) { int64x2_t __ret; __ret = vmull_s32(__p0, (int32x2_t) {__p1, __p1}); return __ret; } #else __ai int64x2_t vmull_n_s32(int32x2_t __p0, int32_t __p1) { int64x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = __noswap_vmull_s32(__rev0, (int32x2_t) {__p1, __p1}); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } __ai int64x2_t __noswap_vmull_n_s32(int32x2_t __p0, int32_t __p1) { int64x2_t __ret; __ret = __noswap_vmull_s32(__p0, (int32x2_t) {__p1, __p1}); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vmull_n_s16(int16x4_t __p0, int16_t __p1) { int32x4_t __ret; __ret = vmull_s16(__p0, (int16x4_t) {__p1, __p1, __p1, __p1}); return __ret; } #else __ai int32x4_t vmull_n_s16(int16x4_t __p0, int16_t __p1) { int32x4_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = __noswap_vmull_s16(__rev0, (int16x4_t) {__p1, __p1, __p1, __p1}); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai int32x4_t __noswap_vmull_n_s16(int16x4_t __p0, int16_t __p1) { int32x4_t __ret; __ret = __noswap_vmull_s16(__p0, (int16x4_t) {__p1, __p1, __p1, __p1}); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly8x8_t vmvn_p8(poly8x8_t __p0) { poly8x8_t __ret; __ret = ~__p0; return __ret; } #else __ai poly8x8_t vmvn_p8(poly8x8_t __p0) { poly8x8_t __ret; poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = ~__rev0; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly8x16_t vmvnq_p8(poly8x16_t __p0) { poly8x16_t __ret; __ret = ~__p0; return __ret; } #else __ai poly8x16_t vmvnq_p8(poly8x16_t __p0) { poly8x16_t __ret; poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = ~__rev0; __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vmvnq_u8(uint8x16_t __p0) { uint8x16_t __ret; __ret = ~__p0; return __ret; } #else __ai uint8x16_t vmvnq_u8(uint8x16_t __p0) { uint8x16_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = ~__rev0; __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vmvnq_u32(uint32x4_t __p0) { uint32x4_t __ret; __ret = ~__p0; return __ret; } #else __ai uint32x4_t vmvnq_u32(uint32x4_t __p0) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = ~__rev0; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vmvnq_u16(uint16x8_t __p0) { uint16x8_t __ret; __ret = ~__p0; return __ret; } #else __ai uint16x8_t vmvnq_u16(uint16x8_t __p0) { uint16x8_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = ~__rev0; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x16_t vmvnq_s8(int8x16_t __p0) { int8x16_t __ret; __ret = ~__p0; return __ret; } #else __ai int8x16_t vmvnq_s8(int8x16_t __p0) { int8x16_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = ~__rev0; __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vmvnq_s32(int32x4_t __p0) { int32x4_t __ret; __ret = ~__p0; return __ret; } #else __ai int32x4_t vmvnq_s32(int32x4_t __p0) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = ~__rev0; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vmvnq_s16(int16x8_t __p0) { int16x8_t __ret; __ret = ~__p0; return __ret; } #else __ai int16x8_t vmvnq_s16(int16x8_t __p0) { int16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = ~__rev0; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vmvn_u8(uint8x8_t __p0) { uint8x8_t __ret; __ret = ~__p0; return __ret; } #else __ai uint8x8_t vmvn_u8(uint8x8_t __p0) { uint8x8_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = ~__rev0; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vmvn_u32(uint32x2_t __p0) { uint32x2_t __ret; __ret = ~__p0; return __ret; } #else __ai uint32x2_t vmvn_u32(uint32x2_t __p0) { uint32x2_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = ~__rev0; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vmvn_u16(uint16x4_t __p0) { uint16x4_t __ret; __ret = ~__p0; return __ret; } #else __ai uint16x4_t vmvn_u16(uint16x4_t __p0) { uint16x4_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = ~__rev0; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8_t vmvn_s8(int8x8_t __p0) { int8x8_t __ret; __ret = ~__p0; return __ret; } #else __ai int8x8_t vmvn_s8(int8x8_t __p0) { int8x8_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = ~__rev0; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x2_t vmvn_s32(int32x2_t __p0) { int32x2_t __ret; __ret = ~__p0; return __ret; } #else __ai int32x2_t vmvn_s32(int32x2_t __p0) { int32x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = ~__rev0; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vmvn_s16(int16x4_t __p0) { int16x4_t __ret; __ret = ~__p0; return __ret; } #else __ai int16x4_t vmvn_s16(int16x4_t __p0) { int16x4_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = ~__rev0; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x16_t vnegq_s8(int8x16_t __p0) { int8x16_t __ret; __ret = -__p0; return __ret; } #else __ai int8x16_t vnegq_s8(int8x16_t __p0) { int8x16_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = -__rev0; __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x4_t vnegq_f32(float32x4_t __p0) { float32x4_t __ret; __ret = -__p0; return __ret; } #else __ai float32x4_t vnegq_f32(float32x4_t __p0) { float32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = -__rev0; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vnegq_s32(int32x4_t __p0) { int32x4_t __ret; __ret = -__p0; return __ret; } #else __ai int32x4_t vnegq_s32(int32x4_t __p0) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = -__rev0; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vnegq_s16(int16x8_t __p0) { int16x8_t __ret; __ret = -__p0; return __ret; } #else __ai int16x8_t vnegq_s16(int16x8_t __p0) { int16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = -__rev0; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8_t vneg_s8(int8x8_t __p0) { int8x8_t __ret; __ret = -__p0; return __ret; } #else __ai int8x8_t vneg_s8(int8x8_t __p0) { int8x8_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = -__rev0; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x2_t vneg_f32(float32x2_t __p0) { float32x2_t __ret; __ret = -__p0; return __ret; } #else __ai float32x2_t vneg_f32(float32x2_t __p0) { float32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = -__rev0; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x2_t vneg_s32(int32x2_t __p0) { int32x2_t __ret; __ret = -__p0; return __ret; } #else __ai int32x2_t vneg_s32(int32x2_t __p0) { int32x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = -__rev0; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vneg_s16(int16x4_t __p0) { int16x4_t __ret; __ret = -__p0; return __ret; } #else __ai int16x4_t vneg_s16(int16x4_t __p0) { int16x4_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = -__rev0; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vornq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; __ret = __p0 | ~__p1; return __ret; } #else __ai uint8x16_t vornq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 | ~__rev1; __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vornq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; __ret = __p0 | ~__p1; return __ret; } #else __ai uint32x4_t vornq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __rev0 | ~__rev1; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vornq_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; __ret = __p0 | ~__p1; return __ret; } #else __ai uint64x2_t vornq_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __rev0 | ~__rev1; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vornq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; __ret = __p0 | ~__p1; return __ret; } #else __ai uint16x8_t vornq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 | ~__rev1; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x16_t vornq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; __ret = __p0 | ~__p1; return __ret; } #else __ai int8x16_t vornq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 | ~__rev1; __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vornq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; __ret = __p0 | ~__p1; return __ret; } #else __ai int32x4_t vornq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __rev0 | ~__rev1; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vornq_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; __ret = __p0 | ~__p1; return __ret; } #else __ai int64x2_t vornq_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __rev0 | ~__rev1; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vornq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; __ret = __p0 | ~__p1; return __ret; } #else __ai int16x8_t vornq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 | ~__rev1; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vorn_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; __ret = __p0 | ~__p1; return __ret; } #else __ai uint8x8_t vorn_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 | ~__rev1; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vorn_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; __ret = __p0 | ~__p1; return __ret; } #else __ai uint32x2_t vorn_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __rev0 | ~__rev1; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai uint64x1_t vorn_u64(uint64x1_t __p0, uint64x1_t __p1) { uint64x1_t __ret; __ret = __p0 | ~__p1; return __ret; } #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vorn_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; __ret = __p0 | ~__p1; return __ret; } #else __ai uint16x4_t vorn_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __rev0 | ~__rev1; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8_t vorn_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; __ret = __p0 | ~__p1; return __ret; } #else __ai int8x8_t vorn_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 | ~__rev1; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x2_t vorn_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; __ret = __p0 | ~__p1; return __ret; } #else __ai int32x2_t vorn_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __rev0 | ~__rev1; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai int64x1_t vorn_s64(int64x1_t __p0, int64x1_t __p1) { int64x1_t __ret; __ret = __p0 | ~__p1; return __ret; } #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vorn_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; __ret = __p0 | ~__p1; return __ret; } #else __ai int16x4_t vorn_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __rev0 | ~__rev1; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vorrq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; __ret = __p0 | __p1; return __ret; } #else __ai uint8x16_t vorrq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 | __rev1; __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vorrq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; __ret = __p0 | __p1; return __ret; } #else __ai uint32x4_t vorrq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __rev0 | __rev1; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vorrq_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; __ret = __p0 | __p1; return __ret; } #else __ai uint64x2_t vorrq_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __rev0 | __rev1; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vorrq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; __ret = __p0 | __p1; return __ret; } #else __ai uint16x8_t vorrq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 | __rev1; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x16_t vorrq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; __ret = __p0 | __p1; return __ret; } #else __ai int8x16_t vorrq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 | __rev1; __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vorrq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; __ret = __p0 | __p1; return __ret; } #else __ai int32x4_t vorrq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __rev0 | __rev1; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vorrq_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; __ret = __p0 | __p1; return __ret; } #else __ai int64x2_t vorrq_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __rev0 | __rev1; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vorrq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; __ret = __p0 | __p1; return __ret; } #else __ai int16x8_t vorrq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 | __rev1; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vorr_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; __ret = __p0 | __p1; return __ret; } #else __ai uint8x8_t vorr_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 | __rev1; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vorr_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; __ret = __p0 | __p1; return __ret; } #else __ai uint32x2_t vorr_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __rev0 | __rev1; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai uint64x1_t vorr_u64(uint64x1_t __p0, uint64x1_t __p1) { uint64x1_t __ret; __ret = __p0 | __p1; return __ret; } #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vorr_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; __ret = __p0 | __p1; return __ret; } #else __ai uint16x4_t vorr_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __rev0 | __rev1; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8_t vorr_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; __ret = __p0 | __p1; return __ret; } #else __ai int8x8_t vorr_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 | __rev1; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x2_t vorr_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; __ret = __p0 | __p1; return __ret; } #else __ai int32x2_t vorr_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __rev0 | __rev1; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai int64x1_t vorr_s64(int64x1_t __p0, int64x1_t __p1) { int64x1_t __ret; __ret = __p0 | __p1; return __ret; } #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vorr_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; __ret = __p0 | __p1; return __ret; } #else __ai int16x4_t vorr_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __rev0 | __rev1; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vpadalq_u8(uint16x8_t __p0, uint8x16_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vpadalq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); return __ret; } #else __ai uint16x8_t vpadalq_u8(uint16x8_t __p0, uint8x16_t __p1) { uint16x8_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint16x8_t) __builtin_neon_vpadalq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 49); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vpadalq_u32(uint64x2_t __p0, uint32x4_t __p1) { uint64x2_t __ret; __ret = (uint64x2_t) __builtin_neon_vpadalq_v((int8x16_t)__p0, (int8x16_t)__p1, 51); return __ret; } #else __ai uint64x2_t vpadalq_u32(uint64x2_t __p0, uint32x4_t __p1) { uint64x2_t __ret; uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint64x2_t) __builtin_neon_vpadalq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 51); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vpadalq_u16(uint32x4_t __p0, uint16x8_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vpadalq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); return __ret; } #else __ai uint32x4_t vpadalq_u16(uint32x4_t __p0, uint16x8_t __p1) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint32x4_t) __builtin_neon_vpadalq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vpadalq_s8(int16x8_t __p0, int8x16_t __p1) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vpadalq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); return __ret; } #else __ai int16x8_t vpadalq_s8(int16x8_t __p0, int8x16_t __p1) { int16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int16x8_t) __builtin_neon_vpadalq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 33); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vpadalq_s32(int64x2_t __p0, int32x4_t __p1) { int64x2_t __ret; __ret = (int64x2_t) __builtin_neon_vpadalq_v((int8x16_t)__p0, (int8x16_t)__p1, 35); return __ret; } #else __ai int64x2_t vpadalq_s32(int64x2_t __p0, int32x4_t __p1) { int64x2_t __ret; int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (int64x2_t) __builtin_neon_vpadalq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 35); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vpadalq_s16(int32x4_t __p0, int16x8_t __p1) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vpadalq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); return __ret; } #else __ai int32x4_t vpadalq_s16(int32x4_t __p0, int16x8_t __p1) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int32x4_t) __builtin_neon_vpadalq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 34); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vpadal_u8(uint16x4_t __p0, uint8x8_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vpadal_v((int8x8_t)__p0, (int8x8_t)__p1, 17); return __ret; } #else __ai uint16x4_t vpadal_u8(uint16x4_t __p0, uint8x8_t __p1) { uint16x4_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint16x4_t) __builtin_neon_vpadal_v((int8x8_t)__rev0, (int8x8_t)__rev1, 17); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vpadal_u32(uint64x1_t __p0, uint32x2_t __p1) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vpadal_v((int8x8_t)__p0, (int8x8_t)__p1, 19); return __ret; } #else __ai uint64x1_t vpadal_u32(uint64x1_t __p0, uint32x2_t __p1) { uint64x1_t __ret; uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint64x1_t) __builtin_neon_vpadal_v((int8x8_t)__p0, (int8x8_t)__rev1, 19); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vpadal_u16(uint32x2_t __p0, uint16x4_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vpadal_v((int8x8_t)__p0, (int8x8_t)__p1, 18); return __ret; } #else __ai uint32x2_t vpadal_u16(uint32x2_t __p0, uint16x4_t __p1) { uint32x2_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint32x2_t) __builtin_neon_vpadal_v((int8x8_t)__rev0, (int8x8_t)__rev1, 18); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vpadal_s8(int16x4_t __p0, int8x8_t __p1) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vpadal_v((int8x8_t)__p0, (int8x8_t)__p1, 1); return __ret; } #else __ai int16x4_t vpadal_s8(int16x4_t __p0, int8x8_t __p1) { int16x4_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int16x4_t) __builtin_neon_vpadal_v((int8x8_t)__rev0, (int8x8_t)__rev1, 1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int64x1_t vpadal_s32(int64x1_t __p0, int32x2_t __p1) { int64x1_t __ret; __ret = (int64x1_t) __builtin_neon_vpadal_v((int8x8_t)__p0, (int8x8_t)__p1, 3); return __ret; } #else __ai int64x1_t vpadal_s32(int64x1_t __p0, int32x2_t __p1) { int64x1_t __ret; int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (int64x1_t) __builtin_neon_vpadal_v((int8x8_t)__p0, (int8x8_t)__rev1, 3); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x2_t vpadal_s16(int32x2_t __p0, int16x4_t __p1) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vpadal_v((int8x8_t)__p0, (int8x8_t)__p1, 2); return __ret; } #else __ai int32x2_t vpadal_s16(int32x2_t __p0, int16x4_t __p1) { int32x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (int32x2_t) __builtin_neon_vpadal_v((int8x8_t)__rev0, (int8x8_t)__rev1, 2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vpadd_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vpadd_v((int8x8_t)__p0, (int8x8_t)__p1, 16); return __ret; } #else __ai uint8x8_t vpadd_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x8_t) __builtin_neon_vpadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 16); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vpadd_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vpadd_v((int8x8_t)__p0, (int8x8_t)__p1, 18); return __ret; } #else __ai uint32x2_t vpadd_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint32x2_t) __builtin_neon_vpadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 18); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vpadd_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vpadd_v((int8x8_t)__p0, (int8x8_t)__p1, 17); return __ret; } #else __ai uint16x4_t vpadd_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint16x4_t) __builtin_neon_vpadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 17); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8_t vpadd_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vpadd_v((int8x8_t)__p0, (int8x8_t)__p1, 0); return __ret; } #else __ai int8x8_t vpadd_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x8_t) __builtin_neon_vpadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 0); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x2_t vpadd_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vpadd_v((int8x8_t)__p0, (int8x8_t)__p1, 9); return __ret; } #else __ai float32x2_t vpadd_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (float32x2_t) __builtin_neon_vpadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 9); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x2_t vpadd_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vpadd_v((int8x8_t)__p0, (int8x8_t)__p1, 2); return __ret; } #else __ai int32x2_t vpadd_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (int32x2_t) __builtin_neon_vpadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vpadd_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vpadd_v((int8x8_t)__p0, (int8x8_t)__p1, 1); return __ret; } #else __ai int16x4_t vpadd_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (int16x4_t) __builtin_neon_vpadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vpaddlq_u8(uint8x16_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vpaddlq_v((int8x16_t)__p0, 49); return __ret; } #else __ai uint16x8_t vpaddlq_u8(uint8x16_t __p0) { uint16x8_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint16x8_t) __builtin_neon_vpaddlq_v((int8x16_t)__rev0, 49); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vpaddlq_u32(uint32x4_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t) __builtin_neon_vpaddlq_v((int8x16_t)__p0, 51); return __ret; } #else __ai uint64x2_t vpaddlq_u32(uint32x4_t __p0) { uint64x2_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (uint64x2_t) __builtin_neon_vpaddlq_v((int8x16_t)__rev0, 51); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vpaddlq_u16(uint16x8_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vpaddlq_v((int8x16_t)__p0, 50); return __ret; } #else __ai uint32x4_t vpaddlq_u16(uint16x8_t __p0) { uint32x4_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint32x4_t) __builtin_neon_vpaddlq_v((int8x16_t)__rev0, 50); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vpaddlq_s8(int8x16_t __p0) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vpaddlq_v((int8x16_t)__p0, 33); return __ret; } #else __ai int16x8_t vpaddlq_s8(int8x16_t __p0) { int16x8_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int16x8_t) __builtin_neon_vpaddlq_v((int8x16_t)__rev0, 33); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vpaddlq_s32(int32x4_t __p0) { int64x2_t __ret; __ret = (int64x2_t) __builtin_neon_vpaddlq_v((int8x16_t)__p0, 35); return __ret; } #else __ai int64x2_t vpaddlq_s32(int32x4_t __p0) { int64x2_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (int64x2_t) __builtin_neon_vpaddlq_v((int8x16_t)__rev0, 35); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vpaddlq_s16(int16x8_t __p0) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vpaddlq_v((int8x16_t)__p0, 34); return __ret; } #else __ai int32x4_t vpaddlq_s16(int16x8_t __p0) { int32x4_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int32x4_t) __builtin_neon_vpaddlq_v((int8x16_t)__rev0, 34); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vpaddl_u8(uint8x8_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vpaddl_v((int8x8_t)__p0, 17); return __ret; } #else __ai uint16x4_t vpaddl_u8(uint8x8_t __p0) { uint16x4_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint16x4_t) __builtin_neon_vpaddl_v((int8x8_t)__rev0, 17); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vpaddl_u32(uint32x2_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vpaddl_v((int8x8_t)__p0, 19); return __ret; } #else __ai uint64x1_t vpaddl_u32(uint32x2_t __p0) { uint64x1_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (uint64x1_t) __builtin_neon_vpaddl_v((int8x8_t)__rev0, 19); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vpaddl_u16(uint16x4_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vpaddl_v((int8x8_t)__p0, 18); return __ret; } #else __ai uint32x2_t vpaddl_u16(uint16x4_t __p0) { uint32x2_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (uint32x2_t) __builtin_neon_vpaddl_v((int8x8_t)__rev0, 18); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vpaddl_s8(int8x8_t __p0) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vpaddl_v((int8x8_t)__p0, 1); return __ret; } #else __ai int16x4_t vpaddl_s8(int8x8_t __p0) { int16x4_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int16x4_t) __builtin_neon_vpaddl_v((int8x8_t)__rev0, 1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int64x1_t vpaddl_s32(int32x2_t __p0) { int64x1_t __ret; __ret = (int64x1_t) __builtin_neon_vpaddl_v((int8x8_t)__p0, 3); return __ret; } #else __ai int64x1_t vpaddl_s32(int32x2_t __p0) { int64x1_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (int64x1_t) __builtin_neon_vpaddl_v((int8x8_t)__rev0, 3); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x2_t vpaddl_s16(int16x4_t __p0) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vpaddl_v((int8x8_t)__p0, 2); return __ret; } #else __ai int32x2_t vpaddl_s16(int16x4_t __p0) { int32x2_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (int32x2_t) __builtin_neon_vpaddl_v((int8x8_t)__rev0, 2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vpmax_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vpmax_v((int8x8_t)__p0, (int8x8_t)__p1, 16); return __ret; } #else __ai uint8x8_t vpmax_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x8_t) __builtin_neon_vpmax_v((int8x8_t)__rev0, (int8x8_t)__rev1, 16); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vpmax_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vpmax_v((int8x8_t)__p0, (int8x8_t)__p1, 18); return __ret; } #else __ai uint32x2_t vpmax_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint32x2_t) __builtin_neon_vpmax_v((int8x8_t)__rev0, (int8x8_t)__rev1, 18); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vpmax_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vpmax_v((int8x8_t)__p0, (int8x8_t)__p1, 17); return __ret; } #else __ai uint16x4_t vpmax_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint16x4_t) __builtin_neon_vpmax_v((int8x8_t)__rev0, (int8x8_t)__rev1, 17); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8_t vpmax_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vpmax_v((int8x8_t)__p0, (int8x8_t)__p1, 0); return __ret; } #else __ai int8x8_t vpmax_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x8_t) __builtin_neon_vpmax_v((int8x8_t)__rev0, (int8x8_t)__rev1, 0); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x2_t vpmax_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vpmax_v((int8x8_t)__p0, (int8x8_t)__p1, 9); return __ret; } #else __ai float32x2_t vpmax_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (float32x2_t) __builtin_neon_vpmax_v((int8x8_t)__rev0, (int8x8_t)__rev1, 9); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x2_t vpmax_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vpmax_v((int8x8_t)__p0, (int8x8_t)__p1, 2); return __ret; } #else __ai int32x2_t vpmax_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (int32x2_t) __builtin_neon_vpmax_v((int8x8_t)__rev0, (int8x8_t)__rev1, 2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vpmax_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vpmax_v((int8x8_t)__p0, (int8x8_t)__p1, 1); return __ret; } #else __ai int16x4_t vpmax_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (int16x4_t) __builtin_neon_vpmax_v((int8x8_t)__rev0, (int8x8_t)__rev1, 1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vpmin_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vpmin_v((int8x8_t)__p0, (int8x8_t)__p1, 16); return __ret; } #else __ai uint8x8_t vpmin_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x8_t) __builtin_neon_vpmin_v((int8x8_t)__rev0, (int8x8_t)__rev1, 16); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vpmin_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vpmin_v((int8x8_t)__p0, (int8x8_t)__p1, 18); return __ret; } #else __ai uint32x2_t vpmin_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint32x2_t) __builtin_neon_vpmin_v((int8x8_t)__rev0, (int8x8_t)__rev1, 18); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vpmin_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vpmin_v((int8x8_t)__p0, (int8x8_t)__p1, 17); return __ret; } #else __ai uint16x4_t vpmin_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint16x4_t) __builtin_neon_vpmin_v((int8x8_t)__rev0, (int8x8_t)__rev1, 17); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8_t vpmin_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vpmin_v((int8x8_t)__p0, (int8x8_t)__p1, 0); return __ret; } #else __ai int8x8_t vpmin_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x8_t) __builtin_neon_vpmin_v((int8x8_t)__rev0, (int8x8_t)__rev1, 0); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x2_t vpmin_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vpmin_v((int8x8_t)__p0, (int8x8_t)__p1, 9); return __ret; } #else __ai float32x2_t vpmin_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (float32x2_t) __builtin_neon_vpmin_v((int8x8_t)__rev0, (int8x8_t)__rev1, 9); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x2_t vpmin_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vpmin_v((int8x8_t)__p0, (int8x8_t)__p1, 2); return __ret; } #else __ai int32x2_t vpmin_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (int32x2_t) __builtin_neon_vpmin_v((int8x8_t)__rev0, (int8x8_t)__rev1, 2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vpmin_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vpmin_v((int8x8_t)__p0, (int8x8_t)__p1, 1); return __ret; } #else __ai int16x4_t vpmin_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (int16x4_t) __builtin_neon_vpmin_v((int8x8_t)__rev0, (int8x8_t)__rev1, 1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x16_t vqabsq_s8(int8x16_t __p0) { int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vqabsq_v((int8x16_t)__p0, 32); return __ret; } #else __ai int8x16_t vqabsq_s8(int8x16_t __p0) { int8x16_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x16_t) __builtin_neon_vqabsq_v((int8x16_t)__rev0, 32); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vqabsq_s32(int32x4_t __p0) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vqabsq_v((int8x16_t)__p0, 34); return __ret; } #else __ai int32x4_t vqabsq_s32(int32x4_t __p0) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (int32x4_t) __builtin_neon_vqabsq_v((int8x16_t)__rev0, 34); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vqabsq_s16(int16x8_t __p0) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vqabsq_v((int8x16_t)__p0, 33); return __ret; } #else __ai int16x8_t vqabsq_s16(int16x8_t __p0) { int16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int16x8_t) __builtin_neon_vqabsq_v((int8x16_t)__rev0, 33); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8_t vqabs_s8(int8x8_t __p0) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vqabs_v((int8x8_t)__p0, 0); return __ret; } #else __ai int8x8_t vqabs_s8(int8x8_t __p0) { int8x8_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x8_t) __builtin_neon_vqabs_v((int8x8_t)__rev0, 0); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x2_t vqabs_s32(int32x2_t __p0) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vqabs_v((int8x8_t)__p0, 2); return __ret; } #else __ai int32x2_t vqabs_s32(int32x2_t __p0) { int32x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (int32x2_t) __builtin_neon_vqabs_v((int8x8_t)__rev0, 2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vqabs_s16(int16x4_t __p0) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vqabs_v((int8x8_t)__p0, 1); return __ret; } #else __ai int16x4_t vqabs_s16(int16x4_t __p0) { int16x4_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (int16x4_t) __builtin_neon_vqabs_v((int8x8_t)__rev0, 1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vqaddq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; __ret = (uint8x16_t) __builtin_neon_vqaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); return __ret; } #else __ai uint8x16_t vqaddq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x16_t) __builtin_neon_vqaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 48); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vqaddq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vqaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); return __ret; } #else __ai uint32x4_t vqaddq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint32x4_t) __builtin_neon_vqaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vqaddq_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; __ret = (uint64x2_t) __builtin_neon_vqaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 51); return __ret; } #else __ai uint64x2_t vqaddq_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint64x2_t) __builtin_neon_vqaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 51); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vqaddq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vqaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); return __ret; } #else __ai uint16x8_t vqaddq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint16x8_t) __builtin_neon_vqaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 49); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x16_t vqaddq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vqaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 32); return __ret; } #else __ai int8x16_t vqaddq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x16_t) __builtin_neon_vqaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 32); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vqaddq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vqaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); return __ret; } #else __ai int32x4_t vqaddq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (int32x4_t) __builtin_neon_vqaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 34); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vqaddq_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; __ret = (int64x2_t) __builtin_neon_vqaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 35); return __ret; } #else __ai int64x2_t vqaddq_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (int64x2_t) __builtin_neon_vqaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 35); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vqaddq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vqaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); return __ret; } #else __ai int16x8_t vqaddq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int16x8_t) __builtin_neon_vqaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 33); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vqadd_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vqadd_v((int8x8_t)__p0, (int8x8_t)__p1, 16); return __ret; } #else __ai uint8x8_t vqadd_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x8_t) __builtin_neon_vqadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 16); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vqadd_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vqadd_v((int8x8_t)__p0, (int8x8_t)__p1, 18); return __ret; } #else __ai uint32x2_t vqadd_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint32x2_t) __builtin_neon_vqadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 18); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai uint64x1_t vqadd_u64(uint64x1_t __p0, uint64x1_t __p1) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vqadd_v((int8x8_t)__p0, (int8x8_t)__p1, 19); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vqadd_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vqadd_v((int8x8_t)__p0, (int8x8_t)__p1, 17); return __ret; } #else __ai uint16x4_t vqadd_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint16x4_t) __builtin_neon_vqadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 17); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8_t vqadd_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vqadd_v((int8x8_t)__p0, (int8x8_t)__p1, 0); return __ret; } #else __ai int8x8_t vqadd_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x8_t) __builtin_neon_vqadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 0); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x2_t vqadd_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vqadd_v((int8x8_t)__p0, (int8x8_t)__p1, 2); return __ret; } #else __ai int32x2_t vqadd_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (int32x2_t) __builtin_neon_vqadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai int64x1_t vqadd_s64(int64x1_t __p0, int64x1_t __p1) { int64x1_t __ret; __ret = (int64x1_t) __builtin_neon_vqadd_v((int8x8_t)__p0, (int8x8_t)__p1, 3); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vqadd_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vqadd_v((int8x8_t)__p0, (int8x8_t)__p1, 1); return __ret; } #else __ai int16x4_t vqadd_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (int16x4_t) __builtin_neon_vqadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vqdmlal_s32(int64x2_t __p0, int32x2_t __p1, int32x2_t __p2) { int64x2_t __ret; __ret = (int64x2_t) __builtin_neon_vqdmlal_v((int8x16_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 35); return __ret; } #else __ai int64x2_t vqdmlal_s32(int64x2_t __p0, int32x2_t __p1, int32x2_t __p2) { int64x2_t __ret; int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); int32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); __ret = (int64x2_t) __builtin_neon_vqdmlal_v((int8x16_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 35); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } __ai int64x2_t __noswap_vqdmlal_s32(int64x2_t __p0, int32x2_t __p1, int32x2_t __p2) { int64x2_t __ret; __ret = (int64x2_t) __builtin_neon_vqdmlal_v((int8x16_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 35); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vqdmlal_s16(int32x4_t __p0, int16x4_t __p1, int16x4_t __p2) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vqdmlal_v((int8x16_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 34); return __ret; } #else __ai int32x4_t vqdmlal_s16(int32x4_t __p0, int16x4_t __p1, int16x4_t __p2) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); int16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = (int32x4_t) __builtin_neon_vqdmlal_v((int8x16_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 34); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai int32x4_t __noswap_vqdmlal_s16(int32x4_t __p0, int16x4_t __p1, int16x4_t __p2) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vqdmlal_v((int8x16_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 34); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ #define vqdmlal_lane_s32(__p0_114, __p1_114, __p2_114, __p3_114) __extension__ ({ \ int64x2_t __ret_114; \ int64x2_t __s0_114 = __p0_114; \ int32x2_t __s1_114 = __p1_114; \ int32x2_t __s2_114 = __p2_114; \ __ret_114 = vqdmlal_s32(__s0_114, __s1_114, splat_lane_s32(__s2_114, __p3_114)); \ __ret_114; \ }) #else #define vqdmlal_lane_s32(__p0_115, __p1_115, __p2_115, __p3_115) __extension__ ({ \ int64x2_t __ret_115; \ int64x2_t __s0_115 = __p0_115; \ int32x2_t __s1_115 = __p1_115; \ int32x2_t __s2_115 = __p2_115; \ int64x2_t __rev0_115; __rev0_115 = __builtin_shufflevector(__s0_115, __s0_115, 1, 0); \ int32x2_t __rev1_115; __rev1_115 = __builtin_shufflevector(__s1_115, __s1_115, 1, 0); \ int32x2_t __rev2_115; __rev2_115 = __builtin_shufflevector(__s2_115, __s2_115, 1, 0); \ __ret_115 = __noswap_vqdmlal_s32(__rev0_115, __rev1_115, __noswap_splat_lane_s32(__rev2_115, __p3_115)); \ __ret_115 = __builtin_shufflevector(__ret_115, __ret_115, 1, 0); \ __ret_115; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqdmlal_lane_s16(__p0_116, __p1_116, __p2_116, __p3_116) __extension__ ({ \ int32x4_t __ret_116; \ int32x4_t __s0_116 = __p0_116; \ int16x4_t __s1_116 = __p1_116; \ int16x4_t __s2_116 = __p2_116; \ __ret_116 = vqdmlal_s16(__s0_116, __s1_116, splat_lane_s16(__s2_116, __p3_116)); \ __ret_116; \ }) #else #define vqdmlal_lane_s16(__p0_117, __p1_117, __p2_117, __p3_117) __extension__ ({ \ int32x4_t __ret_117; \ int32x4_t __s0_117 = __p0_117; \ int16x4_t __s1_117 = __p1_117; \ int16x4_t __s2_117 = __p2_117; \ int32x4_t __rev0_117; __rev0_117 = __builtin_shufflevector(__s0_117, __s0_117, 3, 2, 1, 0); \ int16x4_t __rev1_117; __rev1_117 = __builtin_shufflevector(__s1_117, __s1_117, 3, 2, 1, 0); \ int16x4_t __rev2_117; __rev2_117 = __builtin_shufflevector(__s2_117, __s2_117, 3, 2, 1, 0); \ __ret_117 = __noswap_vqdmlal_s16(__rev0_117, __rev1_117, __noswap_splat_lane_s16(__rev2_117, __p3_117)); \ __ret_117 = __builtin_shufflevector(__ret_117, __ret_117, 3, 2, 1, 0); \ __ret_117; \ }) #endif #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vqdmlal_n_s32(int64x2_t __p0, int32x2_t __p1, int32_t __p2) { int64x2_t __ret; __ret = vqdmlal_s32(__p0, __p1, (int32x2_t) {__p2, __p2}); return __ret; } #else __ai int64x2_t vqdmlal_n_s32(int64x2_t __p0, int32x2_t __p1, int32_t __p2) { int64x2_t __ret; int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __noswap_vqdmlal_s32(__rev0, __rev1, (int32x2_t) {__p2, __p2}); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } __ai int64x2_t __noswap_vqdmlal_n_s32(int64x2_t __p0, int32x2_t __p1, int32_t __p2) { int64x2_t __ret; __ret = __noswap_vqdmlal_s32(__p0, __p1, (int32x2_t) {__p2, __p2}); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vqdmlal_n_s16(int32x4_t __p0, int16x4_t __p1, int16_t __p2) { int32x4_t __ret; __ret = vqdmlal_s16(__p0, __p1, (int16x4_t) {__p2, __p2, __p2, __p2}); return __ret; } #else __ai int32x4_t vqdmlal_n_s16(int32x4_t __p0, int16x4_t __p1, int16_t __p2) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __noswap_vqdmlal_s16(__rev0, __rev1, (int16x4_t) {__p2, __p2, __p2, __p2}); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai int32x4_t __noswap_vqdmlal_n_s16(int32x4_t __p0, int16x4_t __p1, int16_t __p2) { int32x4_t __ret; __ret = __noswap_vqdmlal_s16(__p0, __p1, (int16x4_t) {__p2, __p2, __p2, __p2}); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vqdmlsl_s32(int64x2_t __p0, int32x2_t __p1, int32x2_t __p2) { int64x2_t __ret; __ret = (int64x2_t) __builtin_neon_vqdmlsl_v((int8x16_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 35); return __ret; } #else __ai int64x2_t vqdmlsl_s32(int64x2_t __p0, int32x2_t __p1, int32x2_t __p2) { int64x2_t __ret; int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); int32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); __ret = (int64x2_t) __builtin_neon_vqdmlsl_v((int8x16_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 35); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } __ai int64x2_t __noswap_vqdmlsl_s32(int64x2_t __p0, int32x2_t __p1, int32x2_t __p2) { int64x2_t __ret; __ret = (int64x2_t) __builtin_neon_vqdmlsl_v((int8x16_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 35); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vqdmlsl_s16(int32x4_t __p0, int16x4_t __p1, int16x4_t __p2) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vqdmlsl_v((int8x16_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 34); return __ret; } #else __ai int32x4_t vqdmlsl_s16(int32x4_t __p0, int16x4_t __p1, int16x4_t __p2) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); int16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = (int32x4_t) __builtin_neon_vqdmlsl_v((int8x16_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 34); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai int32x4_t __noswap_vqdmlsl_s16(int32x4_t __p0, int16x4_t __p1, int16x4_t __p2) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vqdmlsl_v((int8x16_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 34); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ #define vqdmlsl_lane_s32(__p0_118, __p1_118, __p2_118, __p3_118) __extension__ ({ \ int64x2_t __ret_118; \ int64x2_t __s0_118 = __p0_118; \ int32x2_t __s1_118 = __p1_118; \ int32x2_t __s2_118 = __p2_118; \ __ret_118 = vqdmlsl_s32(__s0_118, __s1_118, splat_lane_s32(__s2_118, __p3_118)); \ __ret_118; \ }) #else #define vqdmlsl_lane_s32(__p0_119, __p1_119, __p2_119, __p3_119) __extension__ ({ \ int64x2_t __ret_119; \ int64x2_t __s0_119 = __p0_119; \ int32x2_t __s1_119 = __p1_119; \ int32x2_t __s2_119 = __p2_119; \ int64x2_t __rev0_119; __rev0_119 = __builtin_shufflevector(__s0_119, __s0_119, 1, 0); \ int32x2_t __rev1_119; __rev1_119 = __builtin_shufflevector(__s1_119, __s1_119, 1, 0); \ int32x2_t __rev2_119; __rev2_119 = __builtin_shufflevector(__s2_119, __s2_119, 1, 0); \ __ret_119 = __noswap_vqdmlsl_s32(__rev0_119, __rev1_119, __noswap_splat_lane_s32(__rev2_119, __p3_119)); \ __ret_119 = __builtin_shufflevector(__ret_119, __ret_119, 1, 0); \ __ret_119; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqdmlsl_lane_s16(__p0_120, __p1_120, __p2_120, __p3_120) __extension__ ({ \ int32x4_t __ret_120; \ int32x4_t __s0_120 = __p0_120; \ int16x4_t __s1_120 = __p1_120; \ int16x4_t __s2_120 = __p2_120; \ __ret_120 = vqdmlsl_s16(__s0_120, __s1_120, splat_lane_s16(__s2_120, __p3_120)); \ __ret_120; \ }) #else #define vqdmlsl_lane_s16(__p0_121, __p1_121, __p2_121, __p3_121) __extension__ ({ \ int32x4_t __ret_121; \ int32x4_t __s0_121 = __p0_121; \ int16x4_t __s1_121 = __p1_121; \ int16x4_t __s2_121 = __p2_121; \ int32x4_t __rev0_121; __rev0_121 = __builtin_shufflevector(__s0_121, __s0_121, 3, 2, 1, 0); \ int16x4_t __rev1_121; __rev1_121 = __builtin_shufflevector(__s1_121, __s1_121, 3, 2, 1, 0); \ int16x4_t __rev2_121; __rev2_121 = __builtin_shufflevector(__s2_121, __s2_121, 3, 2, 1, 0); \ __ret_121 = __noswap_vqdmlsl_s16(__rev0_121, __rev1_121, __noswap_splat_lane_s16(__rev2_121, __p3_121)); \ __ret_121 = __builtin_shufflevector(__ret_121, __ret_121, 3, 2, 1, 0); \ __ret_121; \ }) #endif #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vqdmlsl_n_s32(int64x2_t __p0, int32x2_t __p1, int32_t __p2) { int64x2_t __ret; __ret = vqdmlsl_s32(__p0, __p1, (int32x2_t) {__p2, __p2}); return __ret; } #else __ai int64x2_t vqdmlsl_n_s32(int64x2_t __p0, int32x2_t __p1, int32_t __p2) { int64x2_t __ret; int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __noswap_vqdmlsl_s32(__rev0, __rev1, (int32x2_t) {__p2, __p2}); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } __ai int64x2_t __noswap_vqdmlsl_n_s32(int64x2_t __p0, int32x2_t __p1, int32_t __p2) { int64x2_t __ret; __ret = __noswap_vqdmlsl_s32(__p0, __p1, (int32x2_t) {__p2, __p2}); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vqdmlsl_n_s16(int32x4_t __p0, int16x4_t __p1, int16_t __p2) { int32x4_t __ret; __ret = vqdmlsl_s16(__p0, __p1, (int16x4_t) {__p2, __p2, __p2, __p2}); return __ret; } #else __ai int32x4_t vqdmlsl_n_s16(int32x4_t __p0, int16x4_t __p1, int16_t __p2) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __noswap_vqdmlsl_s16(__rev0, __rev1, (int16x4_t) {__p2, __p2, __p2, __p2}); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai int32x4_t __noswap_vqdmlsl_n_s16(int32x4_t __p0, int16x4_t __p1, int16_t __p2) { int32x4_t __ret; __ret = __noswap_vqdmlsl_s16(__p0, __p1, (int16x4_t) {__p2, __p2, __p2, __p2}); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vqdmulhq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vqdmulhq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); return __ret; } #else __ai int32x4_t vqdmulhq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (int32x4_t) __builtin_neon_vqdmulhq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 34); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai int32x4_t __noswap_vqdmulhq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vqdmulhq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vqdmulhq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vqdmulhq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); return __ret; } #else __ai int16x8_t vqdmulhq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int16x8_t) __builtin_neon_vqdmulhq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 33); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } __ai int16x8_t __noswap_vqdmulhq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vqdmulhq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x2_t vqdmulh_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vqdmulh_v((int8x8_t)__p0, (int8x8_t)__p1, 2); return __ret; } #else __ai int32x2_t vqdmulh_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (int32x2_t) __builtin_neon_vqdmulh_v((int8x8_t)__rev0, (int8x8_t)__rev1, 2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } __ai int32x2_t __noswap_vqdmulh_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vqdmulh_v((int8x8_t)__p0, (int8x8_t)__p1, 2); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vqdmulh_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vqdmulh_v((int8x8_t)__p0, (int8x8_t)__p1, 1); return __ret; } #else __ai int16x4_t vqdmulh_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (int16x4_t) __builtin_neon_vqdmulh_v((int8x8_t)__rev0, (int8x8_t)__rev1, 1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai int16x4_t __noswap_vqdmulh_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vqdmulh_v((int8x8_t)__p0, (int8x8_t)__p1, 1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vqdmulhq_n_s32(int32x4_t __p0, int32_t __p1) { int32x4_t __ret; __ret = vqdmulhq_s32(__p0, (int32x4_t) {__p1, __p1, __p1, __p1}); return __ret; } #else __ai int32x4_t vqdmulhq_n_s32(int32x4_t __p0, int32_t __p1) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = __noswap_vqdmulhq_s32(__rev0, (int32x4_t) {__p1, __p1, __p1, __p1}); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vqdmulhq_n_s16(int16x8_t __p0, int16_t __p1) { int16x8_t __ret; __ret = vqdmulhq_s16(__p0, (int16x8_t) {__p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1}); return __ret; } #else __ai int16x8_t vqdmulhq_n_s16(int16x8_t __p0, int16_t __p1) { int16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __noswap_vqdmulhq_s16(__rev0, (int16x8_t) {__p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1}); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x2_t vqdmulh_n_s32(int32x2_t __p0, int32_t __p1) { int32x2_t __ret; __ret = vqdmulh_s32(__p0, (int32x2_t) {__p1, __p1}); return __ret; } #else __ai int32x2_t vqdmulh_n_s32(int32x2_t __p0, int32_t __p1) { int32x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = __noswap_vqdmulh_s32(__rev0, (int32x2_t) {__p1, __p1}); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vqdmulh_n_s16(int16x4_t __p0, int16_t __p1) { int16x4_t __ret; __ret = vqdmulh_s16(__p0, (int16x4_t) {__p1, __p1, __p1, __p1}); return __ret; } #else __ai int16x4_t vqdmulh_n_s16(int16x4_t __p0, int16_t __p1) { int16x4_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = __noswap_vqdmulh_s16(__rev0, (int16x4_t) {__p1, __p1, __p1, __p1}); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vqdmull_s32(int32x2_t __p0, int32x2_t __p1) { int64x2_t __ret; __ret = (int64x2_t) __builtin_neon_vqdmull_v((int8x8_t)__p0, (int8x8_t)__p1, 35); return __ret; } #else __ai int64x2_t vqdmull_s32(int32x2_t __p0, int32x2_t __p1) { int64x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (int64x2_t) __builtin_neon_vqdmull_v((int8x8_t)__rev0, (int8x8_t)__rev1, 35); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } __ai int64x2_t __noswap_vqdmull_s32(int32x2_t __p0, int32x2_t __p1) { int64x2_t __ret; __ret = (int64x2_t) __builtin_neon_vqdmull_v((int8x8_t)__p0, (int8x8_t)__p1, 35); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vqdmull_s16(int16x4_t __p0, int16x4_t __p1) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vqdmull_v((int8x8_t)__p0, (int8x8_t)__p1, 34); return __ret; } #else __ai int32x4_t vqdmull_s16(int16x4_t __p0, int16x4_t __p1) { int32x4_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (int32x4_t) __builtin_neon_vqdmull_v((int8x8_t)__rev0, (int8x8_t)__rev1, 34); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai int32x4_t __noswap_vqdmull_s16(int16x4_t __p0, int16x4_t __p1) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vqdmull_v((int8x8_t)__p0, (int8x8_t)__p1, 34); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ #define vqdmull_lane_s32(__p0_122, __p1_122, __p2_122) __extension__ ({ \ int64x2_t __ret_122; \ int32x2_t __s0_122 = __p0_122; \ int32x2_t __s1_122 = __p1_122; \ __ret_122 = vqdmull_s32(__s0_122, splat_lane_s32(__s1_122, __p2_122)); \ __ret_122; \ }) #else #define vqdmull_lane_s32(__p0_123, __p1_123, __p2_123) __extension__ ({ \ int64x2_t __ret_123; \ int32x2_t __s0_123 = __p0_123; \ int32x2_t __s1_123 = __p1_123; \ int32x2_t __rev0_123; __rev0_123 = __builtin_shufflevector(__s0_123, __s0_123, 1, 0); \ int32x2_t __rev1_123; __rev1_123 = __builtin_shufflevector(__s1_123, __s1_123, 1, 0); \ __ret_123 = __noswap_vqdmull_s32(__rev0_123, __noswap_splat_lane_s32(__rev1_123, __p2_123)); \ __ret_123 = __builtin_shufflevector(__ret_123, __ret_123, 1, 0); \ __ret_123; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqdmull_lane_s16(__p0_124, __p1_124, __p2_124) __extension__ ({ \ int32x4_t __ret_124; \ int16x4_t __s0_124 = __p0_124; \ int16x4_t __s1_124 = __p1_124; \ __ret_124 = vqdmull_s16(__s0_124, splat_lane_s16(__s1_124, __p2_124)); \ __ret_124; \ }) #else #define vqdmull_lane_s16(__p0_125, __p1_125, __p2_125) __extension__ ({ \ int32x4_t __ret_125; \ int16x4_t __s0_125 = __p0_125; \ int16x4_t __s1_125 = __p1_125; \ int16x4_t __rev0_125; __rev0_125 = __builtin_shufflevector(__s0_125, __s0_125, 3, 2, 1, 0); \ int16x4_t __rev1_125; __rev1_125 = __builtin_shufflevector(__s1_125, __s1_125, 3, 2, 1, 0); \ __ret_125 = __noswap_vqdmull_s16(__rev0_125, __noswap_splat_lane_s16(__rev1_125, __p2_125)); \ __ret_125 = __builtin_shufflevector(__ret_125, __ret_125, 3, 2, 1, 0); \ __ret_125; \ }) #endif #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vqdmull_n_s32(int32x2_t __p0, int32_t __p1) { int64x2_t __ret; __ret = vqdmull_s32(__p0, (int32x2_t) {__p1, __p1}); return __ret; } #else __ai int64x2_t vqdmull_n_s32(int32x2_t __p0, int32_t __p1) { int64x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = __noswap_vqdmull_s32(__rev0, (int32x2_t) {__p1, __p1}); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } __ai int64x2_t __noswap_vqdmull_n_s32(int32x2_t __p0, int32_t __p1) { int64x2_t __ret; __ret = __noswap_vqdmull_s32(__p0, (int32x2_t) {__p1, __p1}); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vqdmull_n_s16(int16x4_t __p0, int16_t __p1) { int32x4_t __ret; __ret = vqdmull_s16(__p0, (int16x4_t) {__p1, __p1, __p1, __p1}); return __ret; } #else __ai int32x4_t vqdmull_n_s16(int16x4_t __p0, int16_t __p1) { int32x4_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = __noswap_vqdmull_s16(__rev0, (int16x4_t) {__p1, __p1, __p1, __p1}); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai int32x4_t __noswap_vqdmull_n_s16(int16x4_t __p0, int16_t __p1) { int32x4_t __ret; __ret = __noswap_vqdmull_s16(__p0, (int16x4_t) {__p1, __p1, __p1, __p1}); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vqmovn_u32(uint32x4_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vqmovn_v((int8x16_t)__p0, 17); return __ret; } #else __ai uint16x4_t vqmovn_u32(uint32x4_t __p0) { uint16x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (uint16x4_t) __builtin_neon_vqmovn_v((int8x16_t)__rev0, 17); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai uint16x4_t __noswap_vqmovn_u32(uint32x4_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vqmovn_v((int8x16_t)__p0, 17); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vqmovn_u64(uint64x2_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vqmovn_v((int8x16_t)__p0, 18); return __ret; } #else __ai uint32x2_t vqmovn_u64(uint64x2_t __p0) { uint32x2_t __ret; uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (uint32x2_t) __builtin_neon_vqmovn_v((int8x16_t)__rev0, 18); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } __ai uint32x2_t __noswap_vqmovn_u64(uint64x2_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vqmovn_v((int8x16_t)__p0, 18); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vqmovn_u16(uint16x8_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vqmovn_v((int8x16_t)__p0, 16); return __ret; } #else __ai uint8x8_t vqmovn_u16(uint16x8_t __p0) { uint8x8_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x8_t) __builtin_neon_vqmovn_v((int8x16_t)__rev0, 16); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } __ai uint8x8_t __noswap_vqmovn_u16(uint16x8_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vqmovn_v((int8x16_t)__p0, 16); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vqmovn_s32(int32x4_t __p0) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vqmovn_v((int8x16_t)__p0, 1); return __ret; } #else __ai int16x4_t vqmovn_s32(int32x4_t __p0) { int16x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (int16x4_t) __builtin_neon_vqmovn_v((int8x16_t)__rev0, 1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai int16x4_t __noswap_vqmovn_s32(int32x4_t __p0) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vqmovn_v((int8x16_t)__p0, 1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x2_t vqmovn_s64(int64x2_t __p0) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vqmovn_v((int8x16_t)__p0, 2); return __ret; } #else __ai int32x2_t vqmovn_s64(int64x2_t __p0) { int32x2_t __ret; int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (int32x2_t) __builtin_neon_vqmovn_v((int8x16_t)__rev0, 2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } __ai int32x2_t __noswap_vqmovn_s64(int64x2_t __p0) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vqmovn_v((int8x16_t)__p0, 2); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8_t vqmovn_s16(int16x8_t __p0) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vqmovn_v((int8x16_t)__p0, 0); return __ret; } #else __ai int8x8_t vqmovn_s16(int16x8_t __p0) { int8x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x8_t) __builtin_neon_vqmovn_v((int8x16_t)__rev0, 0); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } __ai int8x8_t __noswap_vqmovn_s16(int16x8_t __p0) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vqmovn_v((int8x16_t)__p0, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vqmovun_s32(int32x4_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vqmovun_v((int8x16_t)__p0, 17); return __ret; } #else __ai uint16x4_t vqmovun_s32(int32x4_t __p0) { uint16x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (uint16x4_t) __builtin_neon_vqmovun_v((int8x16_t)__rev0, 17); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai uint16x4_t __noswap_vqmovun_s32(int32x4_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vqmovun_v((int8x16_t)__p0, 17); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vqmovun_s64(int64x2_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vqmovun_v((int8x16_t)__p0, 18); return __ret; } #else __ai uint32x2_t vqmovun_s64(int64x2_t __p0) { uint32x2_t __ret; int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (uint32x2_t) __builtin_neon_vqmovun_v((int8x16_t)__rev0, 18); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } __ai uint32x2_t __noswap_vqmovun_s64(int64x2_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vqmovun_v((int8x16_t)__p0, 18); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vqmovun_s16(int16x8_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vqmovun_v((int8x16_t)__p0, 16); return __ret; } #else __ai uint8x8_t vqmovun_s16(int16x8_t __p0) { uint8x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x8_t) __builtin_neon_vqmovun_v((int8x16_t)__rev0, 16); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } __ai uint8x8_t __noswap_vqmovun_s16(int16x8_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vqmovun_v((int8x16_t)__p0, 16); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x16_t vqnegq_s8(int8x16_t __p0) { int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vqnegq_v((int8x16_t)__p0, 32); return __ret; } #else __ai int8x16_t vqnegq_s8(int8x16_t __p0) { int8x16_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x16_t) __builtin_neon_vqnegq_v((int8x16_t)__rev0, 32); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vqnegq_s32(int32x4_t __p0) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vqnegq_v((int8x16_t)__p0, 34); return __ret; } #else __ai int32x4_t vqnegq_s32(int32x4_t __p0) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (int32x4_t) __builtin_neon_vqnegq_v((int8x16_t)__rev0, 34); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vqnegq_s16(int16x8_t __p0) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vqnegq_v((int8x16_t)__p0, 33); return __ret; } #else __ai int16x8_t vqnegq_s16(int16x8_t __p0) { int16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int16x8_t) __builtin_neon_vqnegq_v((int8x16_t)__rev0, 33); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8_t vqneg_s8(int8x8_t __p0) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vqneg_v((int8x8_t)__p0, 0); return __ret; } #else __ai int8x8_t vqneg_s8(int8x8_t __p0) { int8x8_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x8_t) __builtin_neon_vqneg_v((int8x8_t)__rev0, 0); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x2_t vqneg_s32(int32x2_t __p0) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vqneg_v((int8x8_t)__p0, 2); return __ret; } #else __ai int32x2_t vqneg_s32(int32x2_t __p0) { int32x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (int32x2_t) __builtin_neon_vqneg_v((int8x8_t)__rev0, 2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vqneg_s16(int16x4_t __p0) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vqneg_v((int8x8_t)__p0, 1); return __ret; } #else __ai int16x4_t vqneg_s16(int16x4_t __p0) { int16x4_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (int16x4_t) __builtin_neon_vqneg_v((int8x8_t)__rev0, 1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vqrdmulhq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vqrdmulhq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); return __ret; } #else __ai int32x4_t vqrdmulhq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (int32x4_t) __builtin_neon_vqrdmulhq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 34); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai int32x4_t __noswap_vqrdmulhq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vqrdmulhq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vqrdmulhq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vqrdmulhq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); return __ret; } #else __ai int16x8_t vqrdmulhq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int16x8_t) __builtin_neon_vqrdmulhq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 33); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } __ai int16x8_t __noswap_vqrdmulhq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vqrdmulhq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x2_t vqrdmulh_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vqrdmulh_v((int8x8_t)__p0, (int8x8_t)__p1, 2); return __ret; } #else __ai int32x2_t vqrdmulh_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (int32x2_t) __builtin_neon_vqrdmulh_v((int8x8_t)__rev0, (int8x8_t)__rev1, 2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } __ai int32x2_t __noswap_vqrdmulh_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vqrdmulh_v((int8x8_t)__p0, (int8x8_t)__p1, 2); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vqrdmulh_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vqrdmulh_v((int8x8_t)__p0, (int8x8_t)__p1, 1); return __ret; } #else __ai int16x4_t vqrdmulh_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (int16x4_t) __builtin_neon_vqrdmulh_v((int8x8_t)__rev0, (int8x8_t)__rev1, 1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai int16x4_t __noswap_vqrdmulh_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vqrdmulh_v((int8x8_t)__p0, (int8x8_t)__p1, 1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vqrdmulhq_n_s32(int32x4_t __p0, int32_t __p1) { int32x4_t __ret; __ret = vqrdmulhq_s32(__p0, (int32x4_t) {__p1, __p1, __p1, __p1}); return __ret; } #else __ai int32x4_t vqrdmulhq_n_s32(int32x4_t __p0, int32_t __p1) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = __noswap_vqrdmulhq_s32(__rev0, (int32x4_t) {__p1, __p1, __p1, __p1}); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vqrdmulhq_n_s16(int16x8_t __p0, int16_t __p1) { int16x8_t __ret; __ret = vqrdmulhq_s16(__p0, (int16x8_t) {__p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1}); return __ret; } #else __ai int16x8_t vqrdmulhq_n_s16(int16x8_t __p0, int16_t __p1) { int16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __noswap_vqrdmulhq_s16(__rev0, (int16x8_t) {__p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1}); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x2_t vqrdmulh_n_s32(int32x2_t __p0, int32_t __p1) { int32x2_t __ret; __ret = vqrdmulh_s32(__p0, (int32x2_t) {__p1, __p1}); return __ret; } #else __ai int32x2_t vqrdmulh_n_s32(int32x2_t __p0, int32_t __p1) { int32x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = __noswap_vqrdmulh_s32(__rev0, (int32x2_t) {__p1, __p1}); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vqrdmulh_n_s16(int16x4_t __p0, int16_t __p1) { int16x4_t __ret; __ret = vqrdmulh_s16(__p0, (int16x4_t) {__p1, __p1, __p1, __p1}); return __ret; } #else __ai int16x4_t vqrdmulh_n_s16(int16x4_t __p0, int16_t __p1) { int16x4_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = __noswap_vqrdmulh_s16(__rev0, (int16x4_t) {__p1, __p1, __p1, __p1}); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vqrshlq_u8(uint8x16_t __p0, int8x16_t __p1) { uint8x16_t __ret; __ret = (uint8x16_t) __builtin_neon_vqrshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); return __ret; } #else __ai uint8x16_t vqrshlq_u8(uint8x16_t __p0, int8x16_t __p1) { uint8x16_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x16_t) __builtin_neon_vqrshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 48); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vqrshlq_u32(uint32x4_t __p0, int32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vqrshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); return __ret; } #else __ai uint32x4_t vqrshlq_u32(uint32x4_t __p0, int32x4_t __p1) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint32x4_t) __builtin_neon_vqrshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vqrshlq_u64(uint64x2_t __p0, int64x2_t __p1) { uint64x2_t __ret; __ret = (uint64x2_t) __builtin_neon_vqrshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 51); return __ret; } #else __ai uint64x2_t vqrshlq_u64(uint64x2_t __p0, int64x2_t __p1) { uint64x2_t __ret; uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint64x2_t) __builtin_neon_vqrshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 51); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vqrshlq_u16(uint16x8_t __p0, int16x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vqrshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); return __ret; } #else __ai uint16x8_t vqrshlq_u16(uint16x8_t __p0, int16x8_t __p1) { uint16x8_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint16x8_t) __builtin_neon_vqrshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 49); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x16_t vqrshlq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vqrshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 32); return __ret; } #else __ai int8x16_t vqrshlq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x16_t) __builtin_neon_vqrshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 32); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vqrshlq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vqrshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); return __ret; } #else __ai int32x4_t vqrshlq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (int32x4_t) __builtin_neon_vqrshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 34); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vqrshlq_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; __ret = (int64x2_t) __builtin_neon_vqrshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 35); return __ret; } #else __ai int64x2_t vqrshlq_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (int64x2_t) __builtin_neon_vqrshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 35); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vqrshlq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vqrshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); return __ret; } #else __ai int16x8_t vqrshlq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int16x8_t) __builtin_neon_vqrshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 33); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vqrshl_u8(uint8x8_t __p0, int8x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vqrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 16); return __ret; } #else __ai uint8x8_t vqrshl_u8(uint8x8_t __p0, int8x8_t __p1) { uint8x8_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x8_t) __builtin_neon_vqrshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 16); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vqrshl_u32(uint32x2_t __p0, int32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vqrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 18); return __ret; } #else __ai uint32x2_t vqrshl_u32(uint32x2_t __p0, int32x2_t __p1) { uint32x2_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint32x2_t) __builtin_neon_vqrshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 18); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai uint64x1_t vqrshl_u64(uint64x1_t __p0, int64x1_t __p1) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vqrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 19); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vqrshl_u16(uint16x4_t __p0, int16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vqrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 17); return __ret; } #else __ai uint16x4_t vqrshl_u16(uint16x4_t __p0, int16x4_t __p1) { uint16x4_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint16x4_t) __builtin_neon_vqrshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 17); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8_t vqrshl_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vqrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 0); return __ret; } #else __ai int8x8_t vqrshl_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x8_t) __builtin_neon_vqrshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 0); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x2_t vqrshl_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vqrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 2); return __ret; } #else __ai int32x2_t vqrshl_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (int32x2_t) __builtin_neon_vqrshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai int64x1_t vqrshl_s64(int64x1_t __p0, int64x1_t __p1) { int64x1_t __ret; __ret = (int64x1_t) __builtin_neon_vqrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 3); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vqrshl_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vqrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 1); return __ret; } #else __ai int16x4_t vqrshl_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (int16x4_t) __builtin_neon_vqrshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ #define vqrshrn_n_u32(__p0, __p1) __extension__ ({ \ uint16x4_t __ret; \ uint32x4_t __s0 = __p0; \ __ret = (uint16x4_t) __builtin_neon_vqrshrn_n_v((int8x16_t)__s0, __p1, 17); \ __ret; \ }) #else #define vqrshrn_n_u32(__p0, __p1) __extension__ ({ \ uint16x4_t __ret; \ uint32x4_t __s0 = __p0; \ uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = (uint16x4_t) __builtin_neon_vqrshrn_n_v((int8x16_t)__rev0, __p1, 17); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_vqrshrn_n_u32(__p0, __p1) __extension__ ({ \ uint16x4_t __ret; \ uint32x4_t __s0 = __p0; \ __ret = (uint16x4_t) __builtin_neon_vqrshrn_n_v((int8x16_t)__s0, __p1, 17); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqrshrn_n_u64(__p0, __p1) __extension__ ({ \ uint32x2_t __ret; \ uint64x2_t __s0 = __p0; \ __ret = (uint32x2_t) __builtin_neon_vqrshrn_n_v((int8x16_t)__s0, __p1, 18); \ __ret; \ }) #else #define vqrshrn_n_u64(__p0, __p1) __extension__ ({ \ uint32x2_t __ret; \ uint64x2_t __s0 = __p0; \ uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ __ret = (uint32x2_t) __builtin_neon_vqrshrn_n_v((int8x16_t)__rev0, __p1, 18); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #define __noswap_vqrshrn_n_u64(__p0, __p1) __extension__ ({ \ uint32x2_t __ret; \ uint64x2_t __s0 = __p0; \ __ret = (uint32x2_t) __builtin_neon_vqrshrn_n_v((int8x16_t)__s0, __p1, 18); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqrshrn_n_u16(__p0, __p1) __extension__ ({ \ uint8x8_t __ret; \ uint16x8_t __s0 = __p0; \ __ret = (uint8x8_t) __builtin_neon_vqrshrn_n_v((int8x16_t)__s0, __p1, 16); \ __ret; \ }) #else #define vqrshrn_n_u16(__p0, __p1) __extension__ ({ \ uint8x8_t __ret; \ uint16x8_t __s0 = __p0; \ uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (uint8x8_t) __builtin_neon_vqrshrn_n_v((int8x16_t)__rev0, __p1, 16); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_vqrshrn_n_u16(__p0, __p1) __extension__ ({ \ uint8x8_t __ret; \ uint16x8_t __s0 = __p0; \ __ret = (uint8x8_t) __builtin_neon_vqrshrn_n_v((int8x16_t)__s0, __p1, 16); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqrshrn_n_s32(__p0, __p1) __extension__ ({ \ int16x4_t __ret; \ int32x4_t __s0 = __p0; \ __ret = (int16x4_t) __builtin_neon_vqrshrn_n_v((int8x16_t)__s0, __p1, 1); \ __ret; \ }) #else #define vqrshrn_n_s32(__p0, __p1) __extension__ ({ \ int16x4_t __ret; \ int32x4_t __s0 = __p0; \ int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = (int16x4_t) __builtin_neon_vqrshrn_n_v((int8x16_t)__rev0, __p1, 1); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_vqrshrn_n_s32(__p0, __p1) __extension__ ({ \ int16x4_t __ret; \ int32x4_t __s0 = __p0; \ __ret = (int16x4_t) __builtin_neon_vqrshrn_n_v((int8x16_t)__s0, __p1, 1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqrshrn_n_s64(__p0, __p1) __extension__ ({ \ int32x2_t __ret; \ int64x2_t __s0 = __p0; \ __ret = (int32x2_t) __builtin_neon_vqrshrn_n_v((int8x16_t)__s0, __p1, 2); \ __ret; \ }) #else #define vqrshrn_n_s64(__p0, __p1) __extension__ ({ \ int32x2_t __ret; \ int64x2_t __s0 = __p0; \ int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ __ret = (int32x2_t) __builtin_neon_vqrshrn_n_v((int8x16_t)__rev0, __p1, 2); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #define __noswap_vqrshrn_n_s64(__p0, __p1) __extension__ ({ \ int32x2_t __ret; \ int64x2_t __s0 = __p0; \ __ret = (int32x2_t) __builtin_neon_vqrshrn_n_v((int8x16_t)__s0, __p1, 2); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqrshrn_n_s16(__p0, __p1) __extension__ ({ \ int8x8_t __ret; \ int16x8_t __s0 = __p0; \ __ret = (int8x8_t) __builtin_neon_vqrshrn_n_v((int8x16_t)__s0, __p1, 0); \ __ret; \ }) #else #define vqrshrn_n_s16(__p0, __p1) __extension__ ({ \ int8x8_t __ret; \ int16x8_t __s0 = __p0; \ int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (int8x8_t) __builtin_neon_vqrshrn_n_v((int8x16_t)__rev0, __p1, 0); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_vqrshrn_n_s16(__p0, __p1) __extension__ ({ \ int8x8_t __ret; \ int16x8_t __s0 = __p0; \ __ret = (int8x8_t) __builtin_neon_vqrshrn_n_v((int8x16_t)__s0, __p1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqrshrun_n_s32(__p0, __p1) __extension__ ({ \ uint16x4_t __ret; \ int32x4_t __s0 = __p0; \ __ret = (uint16x4_t) __builtin_neon_vqrshrun_n_v((int8x16_t)__s0, __p1, 17); \ __ret; \ }) #else #define vqrshrun_n_s32(__p0, __p1) __extension__ ({ \ uint16x4_t __ret; \ int32x4_t __s0 = __p0; \ int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = (uint16x4_t) __builtin_neon_vqrshrun_n_v((int8x16_t)__rev0, __p1, 17); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_vqrshrun_n_s32(__p0, __p1) __extension__ ({ \ uint16x4_t __ret; \ int32x4_t __s0 = __p0; \ __ret = (uint16x4_t) __builtin_neon_vqrshrun_n_v((int8x16_t)__s0, __p1, 17); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqrshrun_n_s64(__p0, __p1) __extension__ ({ \ uint32x2_t __ret; \ int64x2_t __s0 = __p0; \ __ret = (uint32x2_t) __builtin_neon_vqrshrun_n_v((int8x16_t)__s0, __p1, 18); \ __ret; \ }) #else #define vqrshrun_n_s64(__p0, __p1) __extension__ ({ \ uint32x2_t __ret; \ int64x2_t __s0 = __p0; \ int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ __ret = (uint32x2_t) __builtin_neon_vqrshrun_n_v((int8x16_t)__rev0, __p1, 18); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #define __noswap_vqrshrun_n_s64(__p0, __p1) __extension__ ({ \ uint32x2_t __ret; \ int64x2_t __s0 = __p0; \ __ret = (uint32x2_t) __builtin_neon_vqrshrun_n_v((int8x16_t)__s0, __p1, 18); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqrshrun_n_s16(__p0, __p1) __extension__ ({ \ uint8x8_t __ret; \ int16x8_t __s0 = __p0; \ __ret = (uint8x8_t) __builtin_neon_vqrshrun_n_v((int8x16_t)__s0, __p1, 16); \ __ret; \ }) #else #define vqrshrun_n_s16(__p0, __p1) __extension__ ({ \ uint8x8_t __ret; \ int16x8_t __s0 = __p0; \ int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (uint8x8_t) __builtin_neon_vqrshrun_n_v((int8x16_t)__rev0, __p1, 16); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_vqrshrun_n_s16(__p0, __p1) __extension__ ({ \ uint8x8_t __ret; \ int16x8_t __s0 = __p0; \ __ret = (uint8x8_t) __builtin_neon_vqrshrun_n_v((int8x16_t)__s0, __p1, 16); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vqshlq_u8(uint8x16_t __p0, int8x16_t __p1) { uint8x16_t __ret; __ret = (uint8x16_t) __builtin_neon_vqshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); return __ret; } #else __ai uint8x16_t vqshlq_u8(uint8x16_t __p0, int8x16_t __p1) { uint8x16_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x16_t) __builtin_neon_vqshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 48); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vqshlq_u32(uint32x4_t __p0, int32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vqshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); return __ret; } #else __ai uint32x4_t vqshlq_u32(uint32x4_t __p0, int32x4_t __p1) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint32x4_t) __builtin_neon_vqshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vqshlq_u64(uint64x2_t __p0, int64x2_t __p1) { uint64x2_t __ret; __ret = (uint64x2_t) __builtin_neon_vqshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 51); return __ret; } #else __ai uint64x2_t vqshlq_u64(uint64x2_t __p0, int64x2_t __p1) { uint64x2_t __ret; uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint64x2_t) __builtin_neon_vqshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 51); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vqshlq_u16(uint16x8_t __p0, int16x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vqshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); return __ret; } #else __ai uint16x8_t vqshlq_u16(uint16x8_t __p0, int16x8_t __p1) { uint16x8_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint16x8_t) __builtin_neon_vqshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 49); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x16_t vqshlq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vqshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 32); return __ret; } #else __ai int8x16_t vqshlq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x16_t) __builtin_neon_vqshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 32); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vqshlq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vqshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); return __ret; } #else __ai int32x4_t vqshlq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (int32x4_t) __builtin_neon_vqshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 34); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vqshlq_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; __ret = (int64x2_t) __builtin_neon_vqshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 35); return __ret; } #else __ai int64x2_t vqshlq_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (int64x2_t) __builtin_neon_vqshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 35); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vqshlq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vqshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); return __ret; } #else __ai int16x8_t vqshlq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int16x8_t) __builtin_neon_vqshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 33); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vqshl_u8(uint8x8_t __p0, int8x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vqshl_v((int8x8_t)__p0, (int8x8_t)__p1, 16); return __ret; } #else __ai uint8x8_t vqshl_u8(uint8x8_t __p0, int8x8_t __p1) { uint8x8_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x8_t) __builtin_neon_vqshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 16); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vqshl_u32(uint32x2_t __p0, int32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vqshl_v((int8x8_t)__p0, (int8x8_t)__p1, 18); return __ret; } #else __ai uint32x2_t vqshl_u32(uint32x2_t __p0, int32x2_t __p1) { uint32x2_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint32x2_t) __builtin_neon_vqshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 18); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai uint64x1_t vqshl_u64(uint64x1_t __p0, int64x1_t __p1) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vqshl_v((int8x8_t)__p0, (int8x8_t)__p1, 19); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vqshl_u16(uint16x4_t __p0, int16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vqshl_v((int8x8_t)__p0, (int8x8_t)__p1, 17); return __ret; } #else __ai uint16x4_t vqshl_u16(uint16x4_t __p0, int16x4_t __p1) { uint16x4_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint16x4_t) __builtin_neon_vqshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 17); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8_t vqshl_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vqshl_v((int8x8_t)__p0, (int8x8_t)__p1, 0); return __ret; } #else __ai int8x8_t vqshl_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x8_t) __builtin_neon_vqshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 0); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x2_t vqshl_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vqshl_v((int8x8_t)__p0, (int8x8_t)__p1, 2); return __ret; } #else __ai int32x2_t vqshl_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (int32x2_t) __builtin_neon_vqshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai int64x1_t vqshl_s64(int64x1_t __p0, int64x1_t __p1) { int64x1_t __ret; __ret = (int64x1_t) __builtin_neon_vqshl_v((int8x8_t)__p0, (int8x8_t)__p1, 3); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vqshl_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vqshl_v((int8x8_t)__p0, (int8x8_t)__p1, 1); return __ret; } #else __ai int16x4_t vqshl_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (int16x4_t) __builtin_neon_vqshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ #define vqshlq_n_u8(__p0, __p1) __extension__ ({ \ uint8x16_t __ret; \ uint8x16_t __s0 = __p0; \ __ret = (uint8x16_t) __builtin_neon_vqshlq_n_v((int8x16_t)__s0, __p1, 48); \ __ret; \ }) #else #define vqshlq_n_u8(__p0, __p1) __extension__ ({ \ uint8x16_t __ret; \ uint8x16_t __s0 = __p0; \ uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (uint8x16_t) __builtin_neon_vqshlq_n_v((int8x16_t)__rev0, __p1, 48); \ __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqshlq_n_u32(__p0, __p1) __extension__ ({ \ uint32x4_t __ret; \ uint32x4_t __s0 = __p0; \ __ret = (uint32x4_t) __builtin_neon_vqshlq_n_v((int8x16_t)__s0, __p1, 50); \ __ret; \ }) #else #define vqshlq_n_u32(__p0, __p1) __extension__ ({ \ uint32x4_t __ret; \ uint32x4_t __s0 = __p0; \ uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = (uint32x4_t) __builtin_neon_vqshlq_n_v((int8x16_t)__rev0, __p1, 50); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqshlq_n_u64(__p0, __p1) __extension__ ({ \ uint64x2_t __ret; \ uint64x2_t __s0 = __p0; \ __ret = (uint64x2_t) __builtin_neon_vqshlq_n_v((int8x16_t)__s0, __p1, 51); \ __ret; \ }) #else #define vqshlq_n_u64(__p0, __p1) __extension__ ({ \ uint64x2_t __ret; \ uint64x2_t __s0 = __p0; \ uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ __ret = (uint64x2_t) __builtin_neon_vqshlq_n_v((int8x16_t)__rev0, __p1, 51); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqshlq_n_u16(__p0, __p1) __extension__ ({ \ uint16x8_t __ret; \ uint16x8_t __s0 = __p0; \ __ret = (uint16x8_t) __builtin_neon_vqshlq_n_v((int8x16_t)__s0, __p1, 49); \ __ret; \ }) #else #define vqshlq_n_u16(__p0, __p1) __extension__ ({ \ uint16x8_t __ret; \ uint16x8_t __s0 = __p0; \ uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (uint16x8_t) __builtin_neon_vqshlq_n_v((int8x16_t)__rev0, __p1, 49); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqshlq_n_s8(__p0, __p1) __extension__ ({ \ int8x16_t __ret; \ int8x16_t __s0 = __p0; \ __ret = (int8x16_t) __builtin_neon_vqshlq_n_v((int8x16_t)__s0, __p1, 32); \ __ret; \ }) #else #define vqshlq_n_s8(__p0, __p1) __extension__ ({ \ int8x16_t __ret; \ int8x16_t __s0 = __p0; \ int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (int8x16_t) __builtin_neon_vqshlq_n_v((int8x16_t)__rev0, __p1, 32); \ __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqshlq_n_s32(__p0, __p1) __extension__ ({ \ int32x4_t __ret; \ int32x4_t __s0 = __p0; \ __ret = (int32x4_t) __builtin_neon_vqshlq_n_v((int8x16_t)__s0, __p1, 34); \ __ret; \ }) #else #define vqshlq_n_s32(__p0, __p1) __extension__ ({ \ int32x4_t __ret; \ int32x4_t __s0 = __p0; \ int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = (int32x4_t) __builtin_neon_vqshlq_n_v((int8x16_t)__rev0, __p1, 34); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqshlq_n_s64(__p0, __p1) __extension__ ({ \ int64x2_t __ret; \ int64x2_t __s0 = __p0; \ __ret = (int64x2_t) __builtin_neon_vqshlq_n_v((int8x16_t)__s0, __p1, 35); \ __ret; \ }) #else #define vqshlq_n_s64(__p0, __p1) __extension__ ({ \ int64x2_t __ret; \ int64x2_t __s0 = __p0; \ int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ __ret = (int64x2_t) __builtin_neon_vqshlq_n_v((int8x16_t)__rev0, __p1, 35); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqshlq_n_s16(__p0, __p1) __extension__ ({ \ int16x8_t __ret; \ int16x8_t __s0 = __p0; \ __ret = (int16x8_t) __builtin_neon_vqshlq_n_v((int8x16_t)__s0, __p1, 33); \ __ret; \ }) #else #define vqshlq_n_s16(__p0, __p1) __extension__ ({ \ int16x8_t __ret; \ int16x8_t __s0 = __p0; \ int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (int16x8_t) __builtin_neon_vqshlq_n_v((int8x16_t)__rev0, __p1, 33); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqshl_n_u8(__p0, __p1) __extension__ ({ \ uint8x8_t __ret; \ uint8x8_t __s0 = __p0; \ __ret = (uint8x8_t) __builtin_neon_vqshl_n_v((int8x8_t)__s0, __p1, 16); \ __ret; \ }) #else #define vqshl_n_u8(__p0, __p1) __extension__ ({ \ uint8x8_t __ret; \ uint8x8_t __s0 = __p0; \ uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (uint8x8_t) __builtin_neon_vqshl_n_v((int8x8_t)__rev0, __p1, 16); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqshl_n_u32(__p0, __p1) __extension__ ({ \ uint32x2_t __ret; \ uint32x2_t __s0 = __p0; \ __ret = (uint32x2_t) __builtin_neon_vqshl_n_v((int8x8_t)__s0, __p1, 18); \ __ret; \ }) #else #define vqshl_n_u32(__p0, __p1) __extension__ ({ \ uint32x2_t __ret; \ uint32x2_t __s0 = __p0; \ uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ __ret = (uint32x2_t) __builtin_neon_vqshl_n_v((int8x8_t)__rev0, __p1, 18); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #define vqshl_n_u64(__p0, __p1) __extension__ ({ \ uint64x1_t __ret; \ uint64x1_t __s0 = __p0; \ __ret = (uint64x1_t) __builtin_neon_vqshl_n_v((int8x8_t)__s0, __p1, 19); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vqshl_n_u16(__p0, __p1) __extension__ ({ \ uint16x4_t __ret; \ uint16x4_t __s0 = __p0; \ __ret = (uint16x4_t) __builtin_neon_vqshl_n_v((int8x8_t)__s0, __p1, 17); \ __ret; \ }) #else #define vqshl_n_u16(__p0, __p1) __extension__ ({ \ uint16x4_t __ret; \ uint16x4_t __s0 = __p0; \ uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = (uint16x4_t) __builtin_neon_vqshl_n_v((int8x8_t)__rev0, __p1, 17); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqshl_n_s8(__p0, __p1) __extension__ ({ \ int8x8_t __ret; \ int8x8_t __s0 = __p0; \ __ret = (int8x8_t) __builtin_neon_vqshl_n_v((int8x8_t)__s0, __p1, 0); \ __ret; \ }) #else #define vqshl_n_s8(__p0, __p1) __extension__ ({ \ int8x8_t __ret; \ int8x8_t __s0 = __p0; \ int8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (int8x8_t) __builtin_neon_vqshl_n_v((int8x8_t)__rev0, __p1, 0); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqshl_n_s32(__p0, __p1) __extension__ ({ \ int32x2_t __ret; \ int32x2_t __s0 = __p0; \ __ret = (int32x2_t) __builtin_neon_vqshl_n_v((int8x8_t)__s0, __p1, 2); \ __ret; \ }) #else #define vqshl_n_s32(__p0, __p1) __extension__ ({ \ int32x2_t __ret; \ int32x2_t __s0 = __p0; \ int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ __ret = (int32x2_t) __builtin_neon_vqshl_n_v((int8x8_t)__rev0, __p1, 2); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #define vqshl_n_s64(__p0, __p1) __extension__ ({ \ int64x1_t __ret; \ int64x1_t __s0 = __p0; \ __ret = (int64x1_t) __builtin_neon_vqshl_n_v((int8x8_t)__s0, __p1, 3); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vqshl_n_s16(__p0, __p1) __extension__ ({ \ int16x4_t __ret; \ int16x4_t __s0 = __p0; \ __ret = (int16x4_t) __builtin_neon_vqshl_n_v((int8x8_t)__s0, __p1, 1); \ __ret; \ }) #else #define vqshl_n_s16(__p0, __p1) __extension__ ({ \ int16x4_t __ret; \ int16x4_t __s0 = __p0; \ int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = (int16x4_t) __builtin_neon_vqshl_n_v((int8x8_t)__rev0, __p1, 1); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqshluq_n_s8(__p0, __p1) __extension__ ({ \ uint8x16_t __ret; \ int8x16_t __s0 = __p0; \ __ret = (uint8x16_t) __builtin_neon_vqshluq_n_v((int8x16_t)__s0, __p1, 48); \ __ret; \ }) #else #define vqshluq_n_s8(__p0, __p1) __extension__ ({ \ uint8x16_t __ret; \ int8x16_t __s0 = __p0; \ int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (uint8x16_t) __builtin_neon_vqshluq_n_v((int8x16_t)__rev0, __p1, 48); \ __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqshluq_n_s32(__p0, __p1) __extension__ ({ \ uint32x4_t __ret; \ int32x4_t __s0 = __p0; \ __ret = (uint32x4_t) __builtin_neon_vqshluq_n_v((int8x16_t)__s0, __p1, 50); \ __ret; \ }) #else #define vqshluq_n_s32(__p0, __p1) __extension__ ({ \ uint32x4_t __ret; \ int32x4_t __s0 = __p0; \ int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = (uint32x4_t) __builtin_neon_vqshluq_n_v((int8x16_t)__rev0, __p1, 50); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqshluq_n_s64(__p0, __p1) __extension__ ({ \ uint64x2_t __ret; \ int64x2_t __s0 = __p0; \ __ret = (uint64x2_t) __builtin_neon_vqshluq_n_v((int8x16_t)__s0, __p1, 51); \ __ret; \ }) #else #define vqshluq_n_s64(__p0, __p1) __extension__ ({ \ uint64x2_t __ret; \ int64x2_t __s0 = __p0; \ int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ __ret = (uint64x2_t) __builtin_neon_vqshluq_n_v((int8x16_t)__rev0, __p1, 51); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqshluq_n_s16(__p0, __p1) __extension__ ({ \ uint16x8_t __ret; \ int16x8_t __s0 = __p0; \ __ret = (uint16x8_t) __builtin_neon_vqshluq_n_v((int8x16_t)__s0, __p1, 49); \ __ret; \ }) #else #define vqshluq_n_s16(__p0, __p1) __extension__ ({ \ uint16x8_t __ret; \ int16x8_t __s0 = __p0; \ int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (uint16x8_t) __builtin_neon_vqshluq_n_v((int8x16_t)__rev0, __p1, 49); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqshlu_n_s8(__p0, __p1) __extension__ ({ \ uint8x8_t __ret; \ int8x8_t __s0 = __p0; \ __ret = (uint8x8_t) __builtin_neon_vqshlu_n_v((int8x8_t)__s0, __p1, 16); \ __ret; \ }) #else #define vqshlu_n_s8(__p0, __p1) __extension__ ({ \ uint8x8_t __ret; \ int8x8_t __s0 = __p0; \ int8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (uint8x8_t) __builtin_neon_vqshlu_n_v((int8x8_t)__rev0, __p1, 16); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqshlu_n_s32(__p0, __p1) __extension__ ({ \ uint32x2_t __ret; \ int32x2_t __s0 = __p0; \ __ret = (uint32x2_t) __builtin_neon_vqshlu_n_v((int8x8_t)__s0, __p1, 18); \ __ret; \ }) #else #define vqshlu_n_s32(__p0, __p1) __extension__ ({ \ uint32x2_t __ret; \ int32x2_t __s0 = __p0; \ int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ __ret = (uint32x2_t) __builtin_neon_vqshlu_n_v((int8x8_t)__rev0, __p1, 18); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #define vqshlu_n_s64(__p0, __p1) __extension__ ({ \ uint64x1_t __ret; \ int64x1_t __s0 = __p0; \ __ret = (uint64x1_t) __builtin_neon_vqshlu_n_v((int8x8_t)__s0, __p1, 19); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vqshlu_n_s16(__p0, __p1) __extension__ ({ \ uint16x4_t __ret; \ int16x4_t __s0 = __p0; \ __ret = (uint16x4_t) __builtin_neon_vqshlu_n_v((int8x8_t)__s0, __p1, 17); \ __ret; \ }) #else #define vqshlu_n_s16(__p0, __p1) __extension__ ({ \ uint16x4_t __ret; \ int16x4_t __s0 = __p0; \ int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = (uint16x4_t) __builtin_neon_vqshlu_n_v((int8x8_t)__rev0, __p1, 17); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqshrn_n_u32(__p0, __p1) __extension__ ({ \ uint16x4_t __ret; \ uint32x4_t __s0 = __p0; \ __ret = (uint16x4_t) __builtin_neon_vqshrn_n_v((int8x16_t)__s0, __p1, 17); \ __ret; \ }) #else #define vqshrn_n_u32(__p0, __p1) __extension__ ({ \ uint16x4_t __ret; \ uint32x4_t __s0 = __p0; \ uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = (uint16x4_t) __builtin_neon_vqshrn_n_v((int8x16_t)__rev0, __p1, 17); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_vqshrn_n_u32(__p0, __p1) __extension__ ({ \ uint16x4_t __ret; \ uint32x4_t __s0 = __p0; \ __ret = (uint16x4_t) __builtin_neon_vqshrn_n_v((int8x16_t)__s0, __p1, 17); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqshrn_n_u64(__p0, __p1) __extension__ ({ \ uint32x2_t __ret; \ uint64x2_t __s0 = __p0; \ __ret = (uint32x2_t) __builtin_neon_vqshrn_n_v((int8x16_t)__s0, __p1, 18); \ __ret; \ }) #else #define vqshrn_n_u64(__p0, __p1) __extension__ ({ \ uint32x2_t __ret; \ uint64x2_t __s0 = __p0; \ uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ __ret = (uint32x2_t) __builtin_neon_vqshrn_n_v((int8x16_t)__rev0, __p1, 18); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #define __noswap_vqshrn_n_u64(__p0, __p1) __extension__ ({ \ uint32x2_t __ret; \ uint64x2_t __s0 = __p0; \ __ret = (uint32x2_t) __builtin_neon_vqshrn_n_v((int8x16_t)__s0, __p1, 18); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqshrn_n_u16(__p0, __p1) __extension__ ({ \ uint8x8_t __ret; \ uint16x8_t __s0 = __p0; \ __ret = (uint8x8_t) __builtin_neon_vqshrn_n_v((int8x16_t)__s0, __p1, 16); \ __ret; \ }) #else #define vqshrn_n_u16(__p0, __p1) __extension__ ({ \ uint8x8_t __ret; \ uint16x8_t __s0 = __p0; \ uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (uint8x8_t) __builtin_neon_vqshrn_n_v((int8x16_t)__rev0, __p1, 16); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_vqshrn_n_u16(__p0, __p1) __extension__ ({ \ uint8x8_t __ret; \ uint16x8_t __s0 = __p0; \ __ret = (uint8x8_t) __builtin_neon_vqshrn_n_v((int8x16_t)__s0, __p1, 16); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqshrn_n_s32(__p0, __p1) __extension__ ({ \ int16x4_t __ret; \ int32x4_t __s0 = __p0; \ __ret = (int16x4_t) __builtin_neon_vqshrn_n_v((int8x16_t)__s0, __p1, 1); \ __ret; \ }) #else #define vqshrn_n_s32(__p0, __p1) __extension__ ({ \ int16x4_t __ret; \ int32x4_t __s0 = __p0; \ int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = (int16x4_t) __builtin_neon_vqshrn_n_v((int8x16_t)__rev0, __p1, 1); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_vqshrn_n_s32(__p0, __p1) __extension__ ({ \ int16x4_t __ret; \ int32x4_t __s0 = __p0; \ __ret = (int16x4_t) __builtin_neon_vqshrn_n_v((int8x16_t)__s0, __p1, 1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqshrn_n_s64(__p0, __p1) __extension__ ({ \ int32x2_t __ret; \ int64x2_t __s0 = __p0; \ __ret = (int32x2_t) __builtin_neon_vqshrn_n_v((int8x16_t)__s0, __p1, 2); \ __ret; \ }) #else #define vqshrn_n_s64(__p0, __p1) __extension__ ({ \ int32x2_t __ret; \ int64x2_t __s0 = __p0; \ int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ __ret = (int32x2_t) __builtin_neon_vqshrn_n_v((int8x16_t)__rev0, __p1, 2); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #define __noswap_vqshrn_n_s64(__p0, __p1) __extension__ ({ \ int32x2_t __ret; \ int64x2_t __s0 = __p0; \ __ret = (int32x2_t) __builtin_neon_vqshrn_n_v((int8x16_t)__s0, __p1, 2); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqshrn_n_s16(__p0, __p1) __extension__ ({ \ int8x8_t __ret; \ int16x8_t __s0 = __p0; \ __ret = (int8x8_t) __builtin_neon_vqshrn_n_v((int8x16_t)__s0, __p1, 0); \ __ret; \ }) #else #define vqshrn_n_s16(__p0, __p1) __extension__ ({ \ int8x8_t __ret; \ int16x8_t __s0 = __p0; \ int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (int8x8_t) __builtin_neon_vqshrn_n_v((int8x16_t)__rev0, __p1, 0); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_vqshrn_n_s16(__p0, __p1) __extension__ ({ \ int8x8_t __ret; \ int16x8_t __s0 = __p0; \ __ret = (int8x8_t) __builtin_neon_vqshrn_n_v((int8x16_t)__s0, __p1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqshrun_n_s32(__p0, __p1) __extension__ ({ \ uint16x4_t __ret; \ int32x4_t __s0 = __p0; \ __ret = (uint16x4_t) __builtin_neon_vqshrun_n_v((int8x16_t)__s0, __p1, 17); \ __ret; \ }) #else #define vqshrun_n_s32(__p0, __p1) __extension__ ({ \ uint16x4_t __ret; \ int32x4_t __s0 = __p0; \ int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = (uint16x4_t) __builtin_neon_vqshrun_n_v((int8x16_t)__rev0, __p1, 17); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_vqshrun_n_s32(__p0, __p1) __extension__ ({ \ uint16x4_t __ret; \ int32x4_t __s0 = __p0; \ __ret = (uint16x4_t) __builtin_neon_vqshrun_n_v((int8x16_t)__s0, __p1, 17); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqshrun_n_s64(__p0, __p1) __extension__ ({ \ uint32x2_t __ret; \ int64x2_t __s0 = __p0; \ __ret = (uint32x2_t) __builtin_neon_vqshrun_n_v((int8x16_t)__s0, __p1, 18); \ __ret; \ }) #else #define vqshrun_n_s64(__p0, __p1) __extension__ ({ \ uint32x2_t __ret; \ int64x2_t __s0 = __p0; \ int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ __ret = (uint32x2_t) __builtin_neon_vqshrun_n_v((int8x16_t)__rev0, __p1, 18); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #define __noswap_vqshrun_n_s64(__p0, __p1) __extension__ ({ \ uint32x2_t __ret; \ int64x2_t __s0 = __p0; \ __ret = (uint32x2_t) __builtin_neon_vqshrun_n_v((int8x16_t)__s0, __p1, 18); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqshrun_n_s16(__p0, __p1) __extension__ ({ \ uint8x8_t __ret; \ int16x8_t __s0 = __p0; \ __ret = (uint8x8_t) __builtin_neon_vqshrun_n_v((int8x16_t)__s0, __p1, 16); \ __ret; \ }) #else #define vqshrun_n_s16(__p0, __p1) __extension__ ({ \ uint8x8_t __ret; \ int16x8_t __s0 = __p0; \ int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (uint8x8_t) __builtin_neon_vqshrun_n_v((int8x16_t)__rev0, __p1, 16); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_vqshrun_n_s16(__p0, __p1) __extension__ ({ \ uint8x8_t __ret; \ int16x8_t __s0 = __p0; \ __ret = (uint8x8_t) __builtin_neon_vqshrun_n_v((int8x16_t)__s0, __p1, 16); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vqsubq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; __ret = (uint8x16_t) __builtin_neon_vqsubq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); return __ret; } #else __ai uint8x16_t vqsubq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x16_t) __builtin_neon_vqsubq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 48); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vqsubq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vqsubq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); return __ret; } #else __ai uint32x4_t vqsubq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint32x4_t) __builtin_neon_vqsubq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vqsubq_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; __ret = (uint64x2_t) __builtin_neon_vqsubq_v((int8x16_t)__p0, (int8x16_t)__p1, 51); return __ret; } #else __ai uint64x2_t vqsubq_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint64x2_t) __builtin_neon_vqsubq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 51); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vqsubq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vqsubq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); return __ret; } #else __ai uint16x8_t vqsubq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint16x8_t) __builtin_neon_vqsubq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 49); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x16_t vqsubq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vqsubq_v((int8x16_t)__p0, (int8x16_t)__p1, 32); return __ret; } #else __ai int8x16_t vqsubq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x16_t) __builtin_neon_vqsubq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 32); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vqsubq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vqsubq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); return __ret; } #else __ai int32x4_t vqsubq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (int32x4_t) __builtin_neon_vqsubq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 34); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vqsubq_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; __ret = (int64x2_t) __builtin_neon_vqsubq_v((int8x16_t)__p0, (int8x16_t)__p1, 35); return __ret; } #else __ai int64x2_t vqsubq_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (int64x2_t) __builtin_neon_vqsubq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 35); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vqsubq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vqsubq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); return __ret; } #else __ai int16x8_t vqsubq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int16x8_t) __builtin_neon_vqsubq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 33); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vqsub_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vqsub_v((int8x8_t)__p0, (int8x8_t)__p1, 16); return __ret; } #else __ai uint8x8_t vqsub_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x8_t) __builtin_neon_vqsub_v((int8x8_t)__rev0, (int8x8_t)__rev1, 16); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vqsub_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vqsub_v((int8x8_t)__p0, (int8x8_t)__p1, 18); return __ret; } #else __ai uint32x2_t vqsub_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint32x2_t) __builtin_neon_vqsub_v((int8x8_t)__rev0, (int8x8_t)__rev1, 18); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai uint64x1_t vqsub_u64(uint64x1_t __p0, uint64x1_t __p1) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vqsub_v((int8x8_t)__p0, (int8x8_t)__p1, 19); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vqsub_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vqsub_v((int8x8_t)__p0, (int8x8_t)__p1, 17); return __ret; } #else __ai uint16x4_t vqsub_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint16x4_t) __builtin_neon_vqsub_v((int8x8_t)__rev0, (int8x8_t)__rev1, 17); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8_t vqsub_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vqsub_v((int8x8_t)__p0, (int8x8_t)__p1, 0); return __ret; } #else __ai int8x8_t vqsub_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x8_t) __builtin_neon_vqsub_v((int8x8_t)__rev0, (int8x8_t)__rev1, 0); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x2_t vqsub_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vqsub_v((int8x8_t)__p0, (int8x8_t)__p1, 2); return __ret; } #else __ai int32x2_t vqsub_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (int32x2_t) __builtin_neon_vqsub_v((int8x8_t)__rev0, (int8x8_t)__rev1, 2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai int64x1_t vqsub_s64(int64x1_t __p0, int64x1_t __p1) { int64x1_t __ret; __ret = (int64x1_t) __builtin_neon_vqsub_v((int8x8_t)__p0, (int8x8_t)__p1, 3); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vqsub_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vqsub_v((int8x8_t)__p0, (int8x8_t)__p1, 1); return __ret; } #else __ai int16x4_t vqsub_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (int16x4_t) __builtin_neon_vqsub_v((int8x8_t)__rev0, (int8x8_t)__rev1, 1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vraddhn_u32(uint32x4_t __p0, uint32x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vraddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 17); return __ret; } #else __ai uint16x4_t vraddhn_u32(uint32x4_t __p0, uint32x4_t __p1) { uint16x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint16x4_t) __builtin_neon_vraddhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 17); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai uint16x4_t __noswap_vraddhn_u32(uint32x4_t __p0, uint32x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vraddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 17); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vraddhn_u64(uint64x2_t __p0, uint64x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vraddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 18); return __ret; } #else __ai uint32x2_t vraddhn_u64(uint64x2_t __p0, uint64x2_t __p1) { uint32x2_t __ret; uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint32x2_t) __builtin_neon_vraddhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 18); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } __ai uint32x2_t __noswap_vraddhn_u64(uint64x2_t __p0, uint64x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vraddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 18); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vraddhn_u16(uint16x8_t __p0, uint16x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vraddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 16); return __ret; } #else __ai uint8x8_t vraddhn_u16(uint16x8_t __p0, uint16x8_t __p1) { uint8x8_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x8_t) __builtin_neon_vraddhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 16); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } __ai uint8x8_t __noswap_vraddhn_u16(uint16x8_t __p0, uint16x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vraddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 16); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vraddhn_s32(int32x4_t __p0, int32x4_t __p1) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vraddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 1); return __ret; } #else __ai int16x4_t vraddhn_s32(int32x4_t __p0, int32x4_t __p1) { int16x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (int16x4_t) __builtin_neon_vraddhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai int16x4_t __noswap_vraddhn_s32(int32x4_t __p0, int32x4_t __p1) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vraddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x2_t vraddhn_s64(int64x2_t __p0, int64x2_t __p1) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vraddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 2); return __ret; } #else __ai int32x2_t vraddhn_s64(int64x2_t __p0, int64x2_t __p1) { int32x2_t __ret; int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (int32x2_t) __builtin_neon_vraddhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } __ai int32x2_t __noswap_vraddhn_s64(int64x2_t __p0, int64x2_t __p1) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vraddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 2); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8_t vraddhn_s16(int16x8_t __p0, int16x8_t __p1) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vraddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 0); return __ret; } #else __ai int8x8_t vraddhn_s16(int16x8_t __p0, int16x8_t __p1) { int8x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x8_t) __builtin_neon_vraddhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 0); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } __ai int8x8_t __noswap_vraddhn_s16(int16x8_t __p0, int16x8_t __p1) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vraddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vrecpeq_u32(uint32x4_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vrecpeq_v((int8x16_t)__p0, 50); return __ret; } #else __ai uint32x4_t vrecpeq_u32(uint32x4_t __p0) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (uint32x4_t) __builtin_neon_vrecpeq_v((int8x16_t)__rev0, 50); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x4_t vrecpeq_f32(float32x4_t __p0) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vrecpeq_v((int8x16_t)__p0, 41); return __ret; } #else __ai float32x4_t vrecpeq_f32(float32x4_t __p0) { float32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (float32x4_t) __builtin_neon_vrecpeq_v((int8x16_t)__rev0, 41); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vrecpe_u32(uint32x2_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vrecpe_v((int8x8_t)__p0, 18); return __ret; } #else __ai uint32x2_t vrecpe_u32(uint32x2_t __p0) { uint32x2_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (uint32x2_t) __builtin_neon_vrecpe_v((int8x8_t)__rev0, 18); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x2_t vrecpe_f32(float32x2_t __p0) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vrecpe_v((int8x8_t)__p0, 9); return __ret; } #else __ai float32x2_t vrecpe_f32(float32x2_t __p0) { float32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (float32x2_t) __builtin_neon_vrecpe_v((int8x8_t)__rev0, 9); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x4_t vrecpsq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vrecpsq_v((int8x16_t)__p0, (int8x16_t)__p1, 41); return __ret; } #else __ai float32x4_t vrecpsq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (float32x4_t) __builtin_neon_vrecpsq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 41); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x2_t vrecps_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vrecps_v((int8x8_t)__p0, (int8x8_t)__p1, 9); return __ret; } #else __ai float32x2_t vrecps_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (float32x2_t) __builtin_neon_vrecps_v((int8x8_t)__rev0, (int8x8_t)__rev1, 9); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly8x8_t vrev16_p8(poly8x8_t __p0) { poly8x8_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 1, 0, 3, 2, 5, 4, 7, 6); return __ret; } #else __ai poly8x8_t vrev16_p8(poly8x8_t __p0) { poly8x8_t __ret; poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev0, 1, 0, 3, 2, 5, 4, 7, 6); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly8x16_t vrev16q_p8(poly8x16_t __p0) { poly8x16_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); return __ret; } #else __ai poly8x16_t vrev16q_p8(poly8x16_t __p0) { poly8x16_t __ret; poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev0, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vrev16q_u8(uint8x16_t __p0) { uint8x16_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); return __ret; } #else __ai uint8x16_t vrev16q_u8(uint8x16_t __p0) { uint8x16_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev0, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x16_t vrev16q_s8(int8x16_t __p0) { int8x16_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); return __ret; } #else __ai int8x16_t vrev16q_s8(int8x16_t __p0) { int8x16_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev0, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vrev16_u8(uint8x8_t __p0) { uint8x8_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 1, 0, 3, 2, 5, 4, 7, 6); return __ret; } #else __ai uint8x8_t vrev16_u8(uint8x8_t __p0) { uint8x8_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev0, 1, 0, 3, 2, 5, 4, 7, 6); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8_t vrev16_s8(int8x8_t __p0) { int8x8_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 1, 0, 3, 2, 5, 4, 7, 6); return __ret; } #else __ai int8x8_t vrev16_s8(int8x8_t __p0) { int8x8_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev0, 1, 0, 3, 2, 5, 4, 7, 6); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly8x8_t vrev32_p8(poly8x8_t __p0) { poly8x8_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0, 7, 6, 5, 4); return __ret; } #else __ai poly8x8_t vrev32_p8(poly8x8_t __p0) { poly8x8_t __ret; poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev0, 3, 2, 1, 0, 7, 6, 5, 4); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly16x4_t vrev32_p16(poly16x4_t __p0) { poly16x4_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 1, 0, 3, 2); return __ret; } #else __ai poly16x4_t vrev32_p16(poly16x4_t __p0) { poly16x4_t __ret; poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev0, 1, 0, 3, 2); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly8x16_t vrev32q_p8(poly8x16_t __p0) { poly8x16_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12); return __ret; } #else __ai poly8x16_t vrev32q_p8(poly8x16_t __p0) { poly8x16_t __ret; poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev0, 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly16x8_t vrev32q_p16(poly16x8_t __p0) { poly16x8_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 1, 0, 3, 2, 5, 4, 7, 6); return __ret; } #else __ai poly16x8_t vrev32q_p16(poly16x8_t __p0) { poly16x8_t __ret; poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev0, 1, 0, 3, 2, 5, 4, 7, 6); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vrev32q_u8(uint8x16_t __p0) { uint8x16_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12); return __ret; } #else __ai uint8x16_t vrev32q_u8(uint8x16_t __p0) { uint8x16_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev0, 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vrev32q_u16(uint16x8_t __p0) { uint16x8_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 1, 0, 3, 2, 5, 4, 7, 6); return __ret; } #else __ai uint16x8_t vrev32q_u16(uint16x8_t __p0) { uint16x8_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev0, 1, 0, 3, 2, 5, 4, 7, 6); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x16_t vrev32q_s8(int8x16_t __p0) { int8x16_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12); return __ret; } #else __ai int8x16_t vrev32q_s8(int8x16_t __p0) { int8x16_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev0, 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vrev32q_s16(int16x8_t __p0) { int16x8_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 1, 0, 3, 2, 5, 4, 7, 6); return __ret; } #else __ai int16x8_t vrev32q_s16(int16x8_t __p0) { int16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev0, 1, 0, 3, 2, 5, 4, 7, 6); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vrev32_u8(uint8x8_t __p0) { uint8x8_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0, 7, 6, 5, 4); return __ret; } #else __ai uint8x8_t vrev32_u8(uint8x8_t __p0) { uint8x8_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev0, 3, 2, 1, 0, 7, 6, 5, 4); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vrev32_u16(uint16x4_t __p0) { uint16x4_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 1, 0, 3, 2); return __ret; } #else __ai uint16x4_t vrev32_u16(uint16x4_t __p0) { uint16x4_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev0, 1, 0, 3, 2); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8_t vrev32_s8(int8x8_t __p0) { int8x8_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0, 7, 6, 5, 4); return __ret; } #else __ai int8x8_t vrev32_s8(int8x8_t __p0) { int8x8_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev0, 3, 2, 1, 0, 7, 6, 5, 4); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vrev32_s16(int16x4_t __p0) { int16x4_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 1, 0, 3, 2); return __ret; } #else __ai int16x4_t vrev32_s16(int16x4_t __p0) { int16x4_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev0, 1, 0, 3, 2); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly8x8_t vrev64_p8(poly8x8_t __p0) { poly8x8_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #else __ai poly8x8_t vrev64_p8(poly8x8_t __p0) { poly8x8_t __ret; poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly16x4_t vrev64_p16(poly16x4_t __p0) { poly16x4_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); return __ret; } #else __ai poly16x4_t vrev64_p16(poly16x4_t __p0) { poly16x4_t __ret; poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev0, 3, 2, 1, 0); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly8x16_t vrev64q_p8(poly8x16_t __p0) { poly8x16_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); return __ret; } #else __ai poly8x16_t vrev64q_p8(poly8x16_t __p0) { poly8x16_t __ret; poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev0, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly16x8_t vrev64q_p16(poly16x8_t __p0) { poly16x8_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0, 7, 6, 5, 4); return __ret; } #else __ai poly16x8_t vrev64q_p16(poly16x8_t __p0) { poly16x8_t __ret; poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev0, 3, 2, 1, 0, 7, 6, 5, 4); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vrev64q_u8(uint8x16_t __p0) { uint8x16_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); return __ret; } #else __ai uint8x16_t vrev64q_u8(uint8x16_t __p0) { uint8x16_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev0, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vrev64q_u32(uint32x4_t __p0) { uint32x4_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 1, 0, 3, 2); return __ret; } #else __ai uint32x4_t vrev64q_u32(uint32x4_t __p0) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev0, 1, 0, 3, 2); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vrev64q_u16(uint16x8_t __p0) { uint16x8_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0, 7, 6, 5, 4); return __ret; } #else __ai uint16x8_t vrev64q_u16(uint16x8_t __p0) { uint16x8_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev0, 3, 2, 1, 0, 7, 6, 5, 4); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x16_t vrev64q_s8(int8x16_t __p0) { int8x16_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); return __ret; } #else __ai int8x16_t vrev64q_s8(int8x16_t __p0) { int8x16_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev0, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x4_t vrev64q_f32(float32x4_t __p0) { float32x4_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 1, 0, 3, 2); return __ret; } #else __ai float32x4_t vrev64q_f32(float32x4_t __p0) { float32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev0, 1, 0, 3, 2); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vrev64q_s32(int32x4_t __p0) { int32x4_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 1, 0, 3, 2); return __ret; } #else __ai int32x4_t vrev64q_s32(int32x4_t __p0) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev0, 1, 0, 3, 2); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vrev64q_s16(int16x8_t __p0) { int16x8_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0, 7, 6, 5, 4); return __ret; } #else __ai int16x8_t vrev64q_s16(int16x8_t __p0) { int16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev0, 3, 2, 1, 0, 7, 6, 5, 4); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vrev64_u8(uint8x8_t __p0) { uint8x8_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #else __ai uint8x8_t vrev64_u8(uint8x8_t __p0) { uint8x8_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vrev64_u32(uint32x2_t __p0) { uint32x2_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 1, 0); return __ret; } #else __ai uint32x2_t vrev64_u32(uint32x2_t __p0) { uint32x2_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev0, 1, 0); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vrev64_u16(uint16x4_t __p0) { uint16x4_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); return __ret; } #else __ai uint16x4_t vrev64_u16(uint16x4_t __p0) { uint16x4_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev0, 3, 2, 1, 0); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8_t vrev64_s8(int8x8_t __p0) { int8x8_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #else __ai int8x8_t vrev64_s8(int8x8_t __p0) { int8x8_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x2_t vrev64_f32(float32x2_t __p0) { float32x2_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 1, 0); return __ret; } #else __ai float32x2_t vrev64_f32(float32x2_t __p0) { float32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev0, 1, 0); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x2_t vrev64_s32(int32x2_t __p0) { int32x2_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 1, 0); return __ret; } #else __ai int32x2_t vrev64_s32(int32x2_t __p0) { int32x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev0, 1, 0); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vrev64_s16(int16x4_t __p0) { int16x4_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); return __ret; } #else __ai int16x4_t vrev64_s16(int16x4_t __p0) { int16x4_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev0, 3, 2, 1, 0); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vrhaddq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; __ret = (uint8x16_t) __builtin_neon_vrhaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); return __ret; } #else __ai uint8x16_t vrhaddq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x16_t) __builtin_neon_vrhaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 48); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vrhaddq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vrhaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); return __ret; } #else __ai uint32x4_t vrhaddq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint32x4_t) __builtin_neon_vrhaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vrhaddq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vrhaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); return __ret; } #else __ai uint16x8_t vrhaddq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint16x8_t) __builtin_neon_vrhaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 49); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x16_t vrhaddq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vrhaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 32); return __ret; } #else __ai int8x16_t vrhaddq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x16_t) __builtin_neon_vrhaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 32); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vrhaddq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vrhaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); return __ret; } #else __ai int32x4_t vrhaddq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (int32x4_t) __builtin_neon_vrhaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 34); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vrhaddq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vrhaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); return __ret; } #else __ai int16x8_t vrhaddq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int16x8_t) __builtin_neon_vrhaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 33); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vrhadd_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vrhadd_v((int8x8_t)__p0, (int8x8_t)__p1, 16); return __ret; } #else __ai uint8x8_t vrhadd_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x8_t) __builtin_neon_vrhadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 16); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vrhadd_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vrhadd_v((int8x8_t)__p0, (int8x8_t)__p1, 18); return __ret; } #else __ai uint32x2_t vrhadd_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint32x2_t) __builtin_neon_vrhadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 18); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vrhadd_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vrhadd_v((int8x8_t)__p0, (int8x8_t)__p1, 17); return __ret; } #else __ai uint16x4_t vrhadd_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint16x4_t) __builtin_neon_vrhadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 17); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8_t vrhadd_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vrhadd_v((int8x8_t)__p0, (int8x8_t)__p1, 0); return __ret; } #else __ai int8x8_t vrhadd_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x8_t) __builtin_neon_vrhadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 0); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x2_t vrhadd_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vrhadd_v((int8x8_t)__p0, (int8x8_t)__p1, 2); return __ret; } #else __ai int32x2_t vrhadd_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (int32x2_t) __builtin_neon_vrhadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vrhadd_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vrhadd_v((int8x8_t)__p0, (int8x8_t)__p1, 1); return __ret; } #else __ai int16x4_t vrhadd_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (int16x4_t) __builtin_neon_vrhadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vrshlq_u8(uint8x16_t __p0, int8x16_t __p1) { uint8x16_t __ret; __ret = (uint8x16_t) __builtin_neon_vrshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); return __ret; } #else __ai uint8x16_t vrshlq_u8(uint8x16_t __p0, int8x16_t __p1) { uint8x16_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x16_t) __builtin_neon_vrshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 48); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vrshlq_u32(uint32x4_t __p0, int32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vrshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); return __ret; } #else __ai uint32x4_t vrshlq_u32(uint32x4_t __p0, int32x4_t __p1) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint32x4_t) __builtin_neon_vrshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vrshlq_u64(uint64x2_t __p0, int64x2_t __p1) { uint64x2_t __ret; __ret = (uint64x2_t) __builtin_neon_vrshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 51); return __ret; } #else __ai uint64x2_t vrshlq_u64(uint64x2_t __p0, int64x2_t __p1) { uint64x2_t __ret; uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint64x2_t) __builtin_neon_vrshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 51); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vrshlq_u16(uint16x8_t __p0, int16x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vrshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); return __ret; } #else __ai uint16x8_t vrshlq_u16(uint16x8_t __p0, int16x8_t __p1) { uint16x8_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint16x8_t) __builtin_neon_vrshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 49); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x16_t vrshlq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vrshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 32); return __ret; } #else __ai int8x16_t vrshlq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x16_t) __builtin_neon_vrshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 32); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vrshlq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vrshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); return __ret; } #else __ai int32x4_t vrshlq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (int32x4_t) __builtin_neon_vrshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 34); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vrshlq_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; __ret = (int64x2_t) __builtin_neon_vrshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 35); return __ret; } #else __ai int64x2_t vrshlq_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (int64x2_t) __builtin_neon_vrshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 35); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vrshlq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vrshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); return __ret; } #else __ai int16x8_t vrshlq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int16x8_t) __builtin_neon_vrshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 33); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vrshl_u8(uint8x8_t __p0, int8x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 16); return __ret; } #else __ai uint8x8_t vrshl_u8(uint8x8_t __p0, int8x8_t __p1) { uint8x8_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x8_t) __builtin_neon_vrshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 16); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vrshl_u32(uint32x2_t __p0, int32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 18); return __ret; } #else __ai uint32x2_t vrshl_u32(uint32x2_t __p0, int32x2_t __p1) { uint32x2_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint32x2_t) __builtin_neon_vrshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 18); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai uint64x1_t vrshl_u64(uint64x1_t __p0, int64x1_t __p1) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 19); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vrshl_u16(uint16x4_t __p0, int16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 17); return __ret; } #else __ai uint16x4_t vrshl_u16(uint16x4_t __p0, int16x4_t __p1) { uint16x4_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint16x4_t) __builtin_neon_vrshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 17); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8_t vrshl_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 0); return __ret; } #else __ai int8x8_t vrshl_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x8_t) __builtin_neon_vrshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 0); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x2_t vrshl_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 2); return __ret; } #else __ai int32x2_t vrshl_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (int32x2_t) __builtin_neon_vrshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai int64x1_t vrshl_s64(int64x1_t __p0, int64x1_t __p1) { int64x1_t __ret; __ret = (int64x1_t) __builtin_neon_vrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 3); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vrshl_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 1); return __ret; } #else __ai int16x4_t vrshl_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (int16x4_t) __builtin_neon_vrshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ #define vrshrq_n_u8(__p0, __p1) __extension__ ({ \ uint8x16_t __ret; \ uint8x16_t __s0 = __p0; \ __ret = (uint8x16_t) __builtin_neon_vrshrq_n_v((int8x16_t)__s0, __p1, 48); \ __ret; \ }) #else #define vrshrq_n_u8(__p0, __p1) __extension__ ({ \ uint8x16_t __ret; \ uint8x16_t __s0 = __p0; \ uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (uint8x16_t) __builtin_neon_vrshrq_n_v((int8x16_t)__rev0, __p1, 48); \ __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vrshrq_n_u32(__p0, __p1) __extension__ ({ \ uint32x4_t __ret; \ uint32x4_t __s0 = __p0; \ __ret = (uint32x4_t) __builtin_neon_vrshrq_n_v((int8x16_t)__s0, __p1, 50); \ __ret; \ }) #else #define vrshrq_n_u32(__p0, __p1) __extension__ ({ \ uint32x4_t __ret; \ uint32x4_t __s0 = __p0; \ uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = (uint32x4_t) __builtin_neon_vrshrq_n_v((int8x16_t)__rev0, __p1, 50); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vrshrq_n_u64(__p0, __p1) __extension__ ({ \ uint64x2_t __ret; \ uint64x2_t __s0 = __p0; \ __ret = (uint64x2_t) __builtin_neon_vrshrq_n_v((int8x16_t)__s0, __p1, 51); \ __ret; \ }) #else #define vrshrq_n_u64(__p0, __p1) __extension__ ({ \ uint64x2_t __ret; \ uint64x2_t __s0 = __p0; \ uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ __ret = (uint64x2_t) __builtin_neon_vrshrq_n_v((int8x16_t)__rev0, __p1, 51); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vrshrq_n_u16(__p0, __p1) __extension__ ({ \ uint16x8_t __ret; \ uint16x8_t __s0 = __p0; \ __ret = (uint16x8_t) __builtin_neon_vrshrq_n_v((int8x16_t)__s0, __p1, 49); \ __ret; \ }) #else #define vrshrq_n_u16(__p0, __p1) __extension__ ({ \ uint16x8_t __ret; \ uint16x8_t __s0 = __p0; \ uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (uint16x8_t) __builtin_neon_vrshrq_n_v((int8x16_t)__rev0, __p1, 49); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vrshrq_n_s8(__p0, __p1) __extension__ ({ \ int8x16_t __ret; \ int8x16_t __s0 = __p0; \ __ret = (int8x16_t) __builtin_neon_vrshrq_n_v((int8x16_t)__s0, __p1, 32); \ __ret; \ }) #else #define vrshrq_n_s8(__p0, __p1) __extension__ ({ \ int8x16_t __ret; \ int8x16_t __s0 = __p0; \ int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (int8x16_t) __builtin_neon_vrshrq_n_v((int8x16_t)__rev0, __p1, 32); \ __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vrshrq_n_s32(__p0, __p1) __extension__ ({ \ int32x4_t __ret; \ int32x4_t __s0 = __p0; \ __ret = (int32x4_t) __builtin_neon_vrshrq_n_v((int8x16_t)__s0, __p1, 34); \ __ret; \ }) #else #define vrshrq_n_s32(__p0, __p1) __extension__ ({ \ int32x4_t __ret; \ int32x4_t __s0 = __p0; \ int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = (int32x4_t) __builtin_neon_vrshrq_n_v((int8x16_t)__rev0, __p1, 34); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vrshrq_n_s64(__p0, __p1) __extension__ ({ \ int64x2_t __ret; \ int64x2_t __s0 = __p0; \ __ret = (int64x2_t) __builtin_neon_vrshrq_n_v((int8x16_t)__s0, __p1, 35); \ __ret; \ }) #else #define vrshrq_n_s64(__p0, __p1) __extension__ ({ \ int64x2_t __ret; \ int64x2_t __s0 = __p0; \ int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ __ret = (int64x2_t) __builtin_neon_vrshrq_n_v((int8x16_t)__rev0, __p1, 35); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vrshrq_n_s16(__p0, __p1) __extension__ ({ \ int16x8_t __ret; \ int16x8_t __s0 = __p0; \ __ret = (int16x8_t) __builtin_neon_vrshrq_n_v((int8x16_t)__s0, __p1, 33); \ __ret; \ }) #else #define vrshrq_n_s16(__p0, __p1) __extension__ ({ \ int16x8_t __ret; \ int16x8_t __s0 = __p0; \ int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (int16x8_t) __builtin_neon_vrshrq_n_v((int8x16_t)__rev0, __p1, 33); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vrshr_n_u8(__p0, __p1) __extension__ ({ \ uint8x8_t __ret; \ uint8x8_t __s0 = __p0; \ __ret = (uint8x8_t) __builtin_neon_vrshr_n_v((int8x8_t)__s0, __p1, 16); \ __ret; \ }) #else #define vrshr_n_u8(__p0, __p1) __extension__ ({ \ uint8x8_t __ret; \ uint8x8_t __s0 = __p0; \ uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (uint8x8_t) __builtin_neon_vrshr_n_v((int8x8_t)__rev0, __p1, 16); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vrshr_n_u32(__p0, __p1) __extension__ ({ \ uint32x2_t __ret; \ uint32x2_t __s0 = __p0; \ __ret = (uint32x2_t) __builtin_neon_vrshr_n_v((int8x8_t)__s0, __p1, 18); \ __ret; \ }) #else #define vrshr_n_u32(__p0, __p1) __extension__ ({ \ uint32x2_t __ret; \ uint32x2_t __s0 = __p0; \ uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ __ret = (uint32x2_t) __builtin_neon_vrshr_n_v((int8x8_t)__rev0, __p1, 18); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #define vrshr_n_u64(__p0, __p1) __extension__ ({ \ uint64x1_t __ret; \ uint64x1_t __s0 = __p0; \ __ret = (uint64x1_t) __builtin_neon_vrshr_n_v((int8x8_t)__s0, __p1, 19); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vrshr_n_u16(__p0, __p1) __extension__ ({ \ uint16x4_t __ret; \ uint16x4_t __s0 = __p0; \ __ret = (uint16x4_t) __builtin_neon_vrshr_n_v((int8x8_t)__s0, __p1, 17); \ __ret; \ }) #else #define vrshr_n_u16(__p0, __p1) __extension__ ({ \ uint16x4_t __ret; \ uint16x4_t __s0 = __p0; \ uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = (uint16x4_t) __builtin_neon_vrshr_n_v((int8x8_t)__rev0, __p1, 17); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vrshr_n_s8(__p0, __p1) __extension__ ({ \ int8x8_t __ret; \ int8x8_t __s0 = __p0; \ __ret = (int8x8_t) __builtin_neon_vrshr_n_v((int8x8_t)__s0, __p1, 0); \ __ret; \ }) #else #define vrshr_n_s8(__p0, __p1) __extension__ ({ \ int8x8_t __ret; \ int8x8_t __s0 = __p0; \ int8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (int8x8_t) __builtin_neon_vrshr_n_v((int8x8_t)__rev0, __p1, 0); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vrshr_n_s32(__p0, __p1) __extension__ ({ \ int32x2_t __ret; \ int32x2_t __s0 = __p0; \ __ret = (int32x2_t) __builtin_neon_vrshr_n_v((int8x8_t)__s0, __p1, 2); \ __ret; \ }) #else #define vrshr_n_s32(__p0, __p1) __extension__ ({ \ int32x2_t __ret; \ int32x2_t __s0 = __p0; \ int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ __ret = (int32x2_t) __builtin_neon_vrshr_n_v((int8x8_t)__rev0, __p1, 2); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #define vrshr_n_s64(__p0, __p1) __extension__ ({ \ int64x1_t __ret; \ int64x1_t __s0 = __p0; \ __ret = (int64x1_t) __builtin_neon_vrshr_n_v((int8x8_t)__s0, __p1, 3); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vrshr_n_s16(__p0, __p1) __extension__ ({ \ int16x4_t __ret; \ int16x4_t __s0 = __p0; \ __ret = (int16x4_t) __builtin_neon_vrshr_n_v((int8x8_t)__s0, __p1, 1); \ __ret; \ }) #else #define vrshr_n_s16(__p0, __p1) __extension__ ({ \ int16x4_t __ret; \ int16x4_t __s0 = __p0; \ int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = (int16x4_t) __builtin_neon_vrshr_n_v((int8x8_t)__rev0, __p1, 1); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vrshrn_n_u32(__p0, __p1) __extension__ ({ \ uint16x4_t __ret; \ uint32x4_t __s0 = __p0; \ __ret = (uint16x4_t) __builtin_neon_vrshrn_n_v((int8x16_t)__s0, __p1, 17); \ __ret; \ }) #else #define vrshrn_n_u32(__p0, __p1) __extension__ ({ \ uint16x4_t __ret; \ uint32x4_t __s0 = __p0; \ uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = (uint16x4_t) __builtin_neon_vrshrn_n_v((int8x16_t)__rev0, __p1, 17); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_vrshrn_n_u32(__p0, __p1) __extension__ ({ \ uint16x4_t __ret; \ uint32x4_t __s0 = __p0; \ __ret = (uint16x4_t) __builtin_neon_vrshrn_n_v((int8x16_t)__s0, __p1, 17); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vrshrn_n_u64(__p0, __p1) __extension__ ({ \ uint32x2_t __ret; \ uint64x2_t __s0 = __p0; \ __ret = (uint32x2_t) __builtin_neon_vrshrn_n_v((int8x16_t)__s0, __p1, 18); \ __ret; \ }) #else #define vrshrn_n_u64(__p0, __p1) __extension__ ({ \ uint32x2_t __ret; \ uint64x2_t __s0 = __p0; \ uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ __ret = (uint32x2_t) __builtin_neon_vrshrn_n_v((int8x16_t)__rev0, __p1, 18); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #define __noswap_vrshrn_n_u64(__p0, __p1) __extension__ ({ \ uint32x2_t __ret; \ uint64x2_t __s0 = __p0; \ __ret = (uint32x2_t) __builtin_neon_vrshrn_n_v((int8x16_t)__s0, __p1, 18); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vrshrn_n_u16(__p0, __p1) __extension__ ({ \ uint8x8_t __ret; \ uint16x8_t __s0 = __p0; \ __ret = (uint8x8_t) __builtin_neon_vrshrn_n_v((int8x16_t)__s0, __p1, 16); \ __ret; \ }) #else #define vrshrn_n_u16(__p0, __p1) __extension__ ({ \ uint8x8_t __ret; \ uint16x8_t __s0 = __p0; \ uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (uint8x8_t) __builtin_neon_vrshrn_n_v((int8x16_t)__rev0, __p1, 16); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_vrshrn_n_u16(__p0, __p1) __extension__ ({ \ uint8x8_t __ret; \ uint16x8_t __s0 = __p0; \ __ret = (uint8x8_t) __builtin_neon_vrshrn_n_v((int8x16_t)__s0, __p1, 16); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vrshrn_n_s32(__p0, __p1) __extension__ ({ \ int16x4_t __ret; \ int32x4_t __s0 = __p0; \ __ret = (int16x4_t) __builtin_neon_vrshrn_n_v((int8x16_t)__s0, __p1, 1); \ __ret; \ }) #else #define vrshrn_n_s32(__p0, __p1) __extension__ ({ \ int16x4_t __ret; \ int32x4_t __s0 = __p0; \ int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = (int16x4_t) __builtin_neon_vrshrn_n_v((int8x16_t)__rev0, __p1, 1); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_vrshrn_n_s32(__p0, __p1) __extension__ ({ \ int16x4_t __ret; \ int32x4_t __s0 = __p0; \ __ret = (int16x4_t) __builtin_neon_vrshrn_n_v((int8x16_t)__s0, __p1, 1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vrshrn_n_s64(__p0, __p1) __extension__ ({ \ int32x2_t __ret; \ int64x2_t __s0 = __p0; \ __ret = (int32x2_t) __builtin_neon_vrshrn_n_v((int8x16_t)__s0, __p1, 2); \ __ret; \ }) #else #define vrshrn_n_s64(__p0, __p1) __extension__ ({ \ int32x2_t __ret; \ int64x2_t __s0 = __p0; \ int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ __ret = (int32x2_t) __builtin_neon_vrshrn_n_v((int8x16_t)__rev0, __p1, 2); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #define __noswap_vrshrn_n_s64(__p0, __p1) __extension__ ({ \ int32x2_t __ret; \ int64x2_t __s0 = __p0; \ __ret = (int32x2_t) __builtin_neon_vrshrn_n_v((int8x16_t)__s0, __p1, 2); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vrshrn_n_s16(__p0, __p1) __extension__ ({ \ int8x8_t __ret; \ int16x8_t __s0 = __p0; \ __ret = (int8x8_t) __builtin_neon_vrshrn_n_v((int8x16_t)__s0, __p1, 0); \ __ret; \ }) #else #define vrshrn_n_s16(__p0, __p1) __extension__ ({ \ int8x8_t __ret; \ int16x8_t __s0 = __p0; \ int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (int8x8_t) __builtin_neon_vrshrn_n_v((int8x16_t)__rev0, __p1, 0); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_vrshrn_n_s16(__p0, __p1) __extension__ ({ \ int8x8_t __ret; \ int16x8_t __s0 = __p0; \ __ret = (int8x8_t) __builtin_neon_vrshrn_n_v((int8x16_t)__s0, __p1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vrsqrteq_u32(uint32x4_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vrsqrteq_v((int8x16_t)__p0, 50); return __ret; } #else __ai uint32x4_t vrsqrteq_u32(uint32x4_t __p0) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (uint32x4_t) __builtin_neon_vrsqrteq_v((int8x16_t)__rev0, 50); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x4_t vrsqrteq_f32(float32x4_t __p0) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vrsqrteq_v((int8x16_t)__p0, 41); return __ret; } #else __ai float32x4_t vrsqrteq_f32(float32x4_t __p0) { float32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (float32x4_t) __builtin_neon_vrsqrteq_v((int8x16_t)__rev0, 41); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vrsqrte_u32(uint32x2_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vrsqrte_v((int8x8_t)__p0, 18); return __ret; } #else __ai uint32x2_t vrsqrte_u32(uint32x2_t __p0) { uint32x2_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (uint32x2_t) __builtin_neon_vrsqrte_v((int8x8_t)__rev0, 18); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x2_t vrsqrte_f32(float32x2_t __p0) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vrsqrte_v((int8x8_t)__p0, 9); return __ret; } #else __ai float32x2_t vrsqrte_f32(float32x2_t __p0) { float32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (float32x2_t) __builtin_neon_vrsqrte_v((int8x8_t)__rev0, 9); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x4_t vrsqrtsq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vrsqrtsq_v((int8x16_t)__p0, (int8x16_t)__p1, 41); return __ret; } #else __ai float32x4_t vrsqrtsq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (float32x4_t) __builtin_neon_vrsqrtsq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 41); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x2_t vrsqrts_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vrsqrts_v((int8x8_t)__p0, (int8x8_t)__p1, 9); return __ret; } #else __ai float32x2_t vrsqrts_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (float32x2_t) __builtin_neon_vrsqrts_v((int8x8_t)__rev0, (int8x8_t)__rev1, 9); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ #define vrsraq_n_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x16_t __ret; \ uint8x16_t __s0 = __p0; \ uint8x16_t __s1 = __p1; \ __ret = (uint8x16_t) __builtin_neon_vrsraq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 48); \ __ret; \ }) #else #define vrsraq_n_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x16_t __ret; \ uint8x16_t __s0 = __p0; \ uint8x16_t __s1 = __p1; \ uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (uint8x16_t) __builtin_neon_vrsraq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 48); \ __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vrsraq_n_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x4_t __ret; \ uint32x4_t __s0 = __p0; \ uint32x4_t __s1 = __p1; \ __ret = (uint32x4_t) __builtin_neon_vrsraq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 50); \ __ret; \ }) #else #define vrsraq_n_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x4_t __ret; \ uint32x4_t __s0 = __p0; \ uint32x4_t __s1 = __p1; \ uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ __ret = (uint32x4_t) __builtin_neon_vrsraq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 50); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vrsraq_n_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x2_t __ret; \ uint64x2_t __s0 = __p0; \ uint64x2_t __s1 = __p1; \ __ret = (uint64x2_t) __builtin_neon_vrsraq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 51); \ __ret; \ }) #else #define vrsraq_n_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x2_t __ret; \ uint64x2_t __s0 = __p0; \ uint64x2_t __s1 = __p1; \ uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ __ret = (uint64x2_t) __builtin_neon_vrsraq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 51); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vrsraq_n_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x8_t __ret; \ uint16x8_t __s0 = __p0; \ uint16x8_t __s1 = __p1; \ __ret = (uint16x8_t) __builtin_neon_vrsraq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 49); \ __ret; \ }) #else #define vrsraq_n_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x8_t __ret; \ uint16x8_t __s0 = __p0; \ uint16x8_t __s1 = __p1; \ uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (uint16x8_t) __builtin_neon_vrsraq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 49); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vrsraq_n_s8(__p0, __p1, __p2) __extension__ ({ \ int8x16_t __ret; \ int8x16_t __s0 = __p0; \ int8x16_t __s1 = __p1; \ __ret = (int8x16_t) __builtin_neon_vrsraq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 32); \ __ret; \ }) #else #define vrsraq_n_s8(__p0, __p1, __p2) __extension__ ({ \ int8x16_t __ret; \ int8x16_t __s0 = __p0; \ int8x16_t __s1 = __p1; \ int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ int8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (int8x16_t) __builtin_neon_vrsraq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 32); \ __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vrsraq_n_s32(__p0, __p1, __p2) __extension__ ({ \ int32x4_t __ret; \ int32x4_t __s0 = __p0; \ int32x4_t __s1 = __p1; \ __ret = (int32x4_t) __builtin_neon_vrsraq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 34); \ __ret; \ }) #else #define vrsraq_n_s32(__p0, __p1, __p2) __extension__ ({ \ int32x4_t __ret; \ int32x4_t __s0 = __p0; \ int32x4_t __s1 = __p1; \ int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ __ret = (int32x4_t) __builtin_neon_vrsraq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 34); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vrsraq_n_s64(__p0, __p1, __p2) __extension__ ({ \ int64x2_t __ret; \ int64x2_t __s0 = __p0; \ int64x2_t __s1 = __p1; \ __ret = (int64x2_t) __builtin_neon_vrsraq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 35); \ __ret; \ }) #else #define vrsraq_n_s64(__p0, __p1, __p2) __extension__ ({ \ int64x2_t __ret; \ int64x2_t __s0 = __p0; \ int64x2_t __s1 = __p1; \ int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ int64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ __ret = (int64x2_t) __builtin_neon_vrsraq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 35); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vrsraq_n_s16(__p0, __p1, __p2) __extension__ ({ \ int16x8_t __ret; \ int16x8_t __s0 = __p0; \ int16x8_t __s1 = __p1; \ __ret = (int16x8_t) __builtin_neon_vrsraq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 33); \ __ret; \ }) #else #define vrsraq_n_s16(__p0, __p1, __p2) __extension__ ({ \ int16x8_t __ret; \ int16x8_t __s0 = __p0; \ int16x8_t __s1 = __p1; \ int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (int16x8_t) __builtin_neon_vrsraq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 33); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vrsra_n_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x8_t __ret; \ uint8x8_t __s0 = __p0; \ uint8x8_t __s1 = __p1; \ __ret = (uint8x8_t) __builtin_neon_vrsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 16); \ __ret; \ }) #else #define vrsra_n_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x8_t __ret; \ uint8x8_t __s0 = __p0; \ uint8x8_t __s1 = __p1; \ uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (uint8x8_t) __builtin_neon_vrsra_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 16); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vrsra_n_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x2_t __ret; \ uint32x2_t __s0 = __p0; \ uint32x2_t __s1 = __p1; \ __ret = (uint32x2_t) __builtin_neon_vrsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 18); \ __ret; \ }) #else #define vrsra_n_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x2_t __ret; \ uint32x2_t __s0 = __p0; \ uint32x2_t __s1 = __p1; \ uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ __ret = (uint32x2_t) __builtin_neon_vrsra_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 18); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #define vrsra_n_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x1_t __ret; \ uint64x1_t __s0 = __p0; \ uint64x1_t __s1 = __p1; \ __ret = (uint64x1_t) __builtin_neon_vrsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 19); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vrsra_n_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x4_t __ret; \ uint16x4_t __s0 = __p0; \ uint16x4_t __s1 = __p1; \ __ret = (uint16x4_t) __builtin_neon_vrsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 17); \ __ret; \ }) #else #define vrsra_n_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x4_t __ret; \ uint16x4_t __s0 = __p0; \ uint16x4_t __s1 = __p1; \ uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ __ret = (uint16x4_t) __builtin_neon_vrsra_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 17); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vrsra_n_s8(__p0, __p1, __p2) __extension__ ({ \ int8x8_t __ret; \ int8x8_t __s0 = __p0; \ int8x8_t __s1 = __p1; \ __ret = (int8x8_t) __builtin_neon_vrsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 0); \ __ret; \ }) #else #define vrsra_n_s8(__p0, __p1, __p2) __extension__ ({ \ int8x8_t __ret; \ int8x8_t __s0 = __p0; \ int8x8_t __s1 = __p1; \ int8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ int8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (int8x8_t) __builtin_neon_vrsra_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 0); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vrsra_n_s32(__p0, __p1, __p2) __extension__ ({ \ int32x2_t __ret; \ int32x2_t __s0 = __p0; \ int32x2_t __s1 = __p1; \ __ret = (int32x2_t) __builtin_neon_vrsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 2); \ __ret; \ }) #else #define vrsra_n_s32(__p0, __p1, __p2) __extension__ ({ \ int32x2_t __ret; \ int32x2_t __s0 = __p0; \ int32x2_t __s1 = __p1; \ int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ __ret = (int32x2_t) __builtin_neon_vrsra_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 2); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #define vrsra_n_s64(__p0, __p1, __p2) __extension__ ({ \ int64x1_t __ret; \ int64x1_t __s0 = __p0; \ int64x1_t __s1 = __p1; \ __ret = (int64x1_t) __builtin_neon_vrsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 3); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vrsra_n_s16(__p0, __p1, __p2) __extension__ ({ \ int16x4_t __ret; \ int16x4_t __s0 = __p0; \ int16x4_t __s1 = __p1; \ __ret = (int16x4_t) __builtin_neon_vrsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 1); \ __ret; \ }) #else #define vrsra_n_s16(__p0, __p1, __p2) __extension__ ({ \ int16x4_t __ret; \ int16x4_t __s0 = __p0; \ int16x4_t __s1 = __p1; \ int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ __ret = (int16x4_t) __builtin_neon_vrsra_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 1); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vrsubhn_u32(uint32x4_t __p0, uint32x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vrsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 17); return __ret; } #else __ai uint16x4_t vrsubhn_u32(uint32x4_t __p0, uint32x4_t __p1) { uint16x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint16x4_t) __builtin_neon_vrsubhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 17); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai uint16x4_t __noswap_vrsubhn_u32(uint32x4_t __p0, uint32x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vrsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 17); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vrsubhn_u64(uint64x2_t __p0, uint64x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vrsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 18); return __ret; } #else __ai uint32x2_t vrsubhn_u64(uint64x2_t __p0, uint64x2_t __p1) { uint32x2_t __ret; uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint32x2_t) __builtin_neon_vrsubhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 18); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } __ai uint32x2_t __noswap_vrsubhn_u64(uint64x2_t __p0, uint64x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vrsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 18); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vrsubhn_u16(uint16x8_t __p0, uint16x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vrsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 16); return __ret; } #else __ai uint8x8_t vrsubhn_u16(uint16x8_t __p0, uint16x8_t __p1) { uint8x8_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x8_t) __builtin_neon_vrsubhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 16); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } __ai uint8x8_t __noswap_vrsubhn_u16(uint16x8_t __p0, uint16x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vrsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 16); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vrsubhn_s32(int32x4_t __p0, int32x4_t __p1) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vrsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 1); return __ret; } #else __ai int16x4_t vrsubhn_s32(int32x4_t __p0, int32x4_t __p1) { int16x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (int16x4_t) __builtin_neon_vrsubhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai int16x4_t __noswap_vrsubhn_s32(int32x4_t __p0, int32x4_t __p1) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vrsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x2_t vrsubhn_s64(int64x2_t __p0, int64x2_t __p1) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vrsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 2); return __ret; } #else __ai int32x2_t vrsubhn_s64(int64x2_t __p0, int64x2_t __p1) { int32x2_t __ret; int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (int32x2_t) __builtin_neon_vrsubhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } __ai int32x2_t __noswap_vrsubhn_s64(int64x2_t __p0, int64x2_t __p1) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vrsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 2); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8_t vrsubhn_s16(int16x8_t __p0, int16x8_t __p1) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vrsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 0); return __ret; } #else __ai int8x8_t vrsubhn_s16(int16x8_t __p0, int16x8_t __p1) { int8x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x8_t) __builtin_neon_vrsubhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 0); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } __ai int8x8_t __noswap_vrsubhn_s16(int16x8_t __p0, int16x8_t __p1) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vrsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ #define vset_lane_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x8_t __ret; \ poly8_t __s0 = __p0; \ poly8x8_t __s1 = __p1; \ __ret = (poly8x8_t) __builtin_neon_vset_lane_i8(__s0, (poly8x8_t)__s1, __p2); \ __ret; \ }) #else #define vset_lane_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x8_t __ret; \ poly8_t __s0 = __p0; \ poly8x8_t __s1 = __p1; \ poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (poly8x8_t) __builtin_neon_vset_lane_i8(__s0, (poly8x8_t)__rev1, __p2); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_vset_lane_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x8_t __ret; \ poly8_t __s0 = __p0; \ poly8x8_t __s1 = __p1; \ __ret = (poly8x8_t) __builtin_neon_vset_lane_i8(__s0, (poly8x8_t)__s1, __p2); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vset_lane_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x4_t __ret; \ poly16_t __s0 = __p0; \ poly16x4_t __s1 = __p1; \ __ret = (poly16x4_t) __builtin_neon_vset_lane_i16(__s0, (poly16x4_t)__s1, __p2); \ __ret; \ }) #else #define vset_lane_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x4_t __ret; \ poly16_t __s0 = __p0; \ poly16x4_t __s1 = __p1; \ poly16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ __ret = (poly16x4_t) __builtin_neon_vset_lane_i16(__s0, (poly16x4_t)__rev1, __p2); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_vset_lane_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x4_t __ret; \ poly16_t __s0 = __p0; \ poly16x4_t __s1 = __p1; \ __ret = (poly16x4_t) __builtin_neon_vset_lane_i16(__s0, (poly16x4_t)__s1, __p2); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vsetq_lane_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x16_t __ret; \ poly8_t __s0 = __p0; \ poly8x16_t __s1 = __p1; \ __ret = (poly8x16_t) __builtin_neon_vsetq_lane_i8(__s0, (poly8x16_t)__s1, __p2); \ __ret; \ }) #else #define vsetq_lane_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x16_t __ret; \ poly8_t __s0 = __p0; \ poly8x16_t __s1 = __p1; \ poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (poly8x16_t) __builtin_neon_vsetq_lane_i8(__s0, (poly8x16_t)__rev1, __p2); \ __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_vsetq_lane_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x16_t __ret; \ poly8_t __s0 = __p0; \ poly8x16_t __s1 = __p1; \ __ret = (poly8x16_t) __builtin_neon_vsetq_lane_i8(__s0, (poly8x16_t)__s1, __p2); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vsetq_lane_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x8_t __ret; \ poly16_t __s0 = __p0; \ poly16x8_t __s1 = __p1; \ __ret = (poly16x8_t) __builtin_neon_vsetq_lane_i16(__s0, (poly16x8_t)__s1, __p2); \ __ret; \ }) #else #define vsetq_lane_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x8_t __ret; \ poly16_t __s0 = __p0; \ poly16x8_t __s1 = __p1; \ poly16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (poly16x8_t) __builtin_neon_vsetq_lane_i16(__s0, (poly16x8_t)__rev1, __p2); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_vsetq_lane_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x8_t __ret; \ poly16_t __s0 = __p0; \ poly16x8_t __s1 = __p1; \ __ret = (poly16x8_t) __builtin_neon_vsetq_lane_i16(__s0, (poly16x8_t)__s1, __p2); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vsetq_lane_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x16_t __ret; \ uint8_t __s0 = __p0; \ uint8x16_t __s1 = __p1; \ __ret = (uint8x16_t) __builtin_neon_vsetq_lane_i8(__s0, (int8x16_t)__s1, __p2); \ __ret; \ }) #else #define vsetq_lane_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x16_t __ret; \ uint8_t __s0 = __p0; \ uint8x16_t __s1 = __p1; \ uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (uint8x16_t) __builtin_neon_vsetq_lane_i8(__s0, (int8x16_t)__rev1, __p2); \ __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_vsetq_lane_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x16_t __ret; \ uint8_t __s0 = __p0; \ uint8x16_t __s1 = __p1; \ __ret = (uint8x16_t) __builtin_neon_vsetq_lane_i8(__s0, (int8x16_t)__s1, __p2); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vsetq_lane_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x4_t __ret; \ uint32_t __s0 = __p0; \ uint32x4_t __s1 = __p1; \ __ret = (uint32x4_t) __builtin_neon_vsetq_lane_i32(__s0, (int32x4_t)__s1, __p2); \ __ret; \ }) #else #define vsetq_lane_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x4_t __ret; \ uint32_t __s0 = __p0; \ uint32x4_t __s1 = __p1; \ uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ __ret = (uint32x4_t) __builtin_neon_vsetq_lane_i32(__s0, (int32x4_t)__rev1, __p2); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_vsetq_lane_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x4_t __ret; \ uint32_t __s0 = __p0; \ uint32x4_t __s1 = __p1; \ __ret = (uint32x4_t) __builtin_neon_vsetq_lane_i32(__s0, (int32x4_t)__s1, __p2); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vsetq_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x2_t __ret; \ uint64_t __s0 = __p0; \ uint64x2_t __s1 = __p1; \ __ret = (uint64x2_t) __builtin_neon_vsetq_lane_i64(__s0, (int64x2_t)__s1, __p2); \ __ret; \ }) #else #define vsetq_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x2_t __ret; \ uint64_t __s0 = __p0; \ uint64x2_t __s1 = __p1; \ uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ __ret = (uint64x2_t) __builtin_neon_vsetq_lane_i64(__s0, (int64x2_t)__rev1, __p2); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #define __noswap_vsetq_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x2_t __ret; \ uint64_t __s0 = __p0; \ uint64x2_t __s1 = __p1; \ __ret = (uint64x2_t) __builtin_neon_vsetq_lane_i64(__s0, (int64x2_t)__s1, __p2); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vsetq_lane_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x8_t __ret; \ uint16_t __s0 = __p0; \ uint16x8_t __s1 = __p1; \ __ret = (uint16x8_t) __builtin_neon_vsetq_lane_i16(__s0, (int16x8_t)__s1, __p2); \ __ret; \ }) #else #define vsetq_lane_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x8_t __ret; \ uint16_t __s0 = __p0; \ uint16x8_t __s1 = __p1; \ uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (uint16x8_t) __builtin_neon_vsetq_lane_i16(__s0, (int16x8_t)__rev1, __p2); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_vsetq_lane_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x8_t __ret; \ uint16_t __s0 = __p0; \ uint16x8_t __s1 = __p1; \ __ret = (uint16x8_t) __builtin_neon_vsetq_lane_i16(__s0, (int16x8_t)__s1, __p2); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vsetq_lane_s8(__p0, __p1, __p2) __extension__ ({ \ int8x16_t __ret; \ int8_t __s0 = __p0; \ int8x16_t __s1 = __p1; \ __ret = (int8x16_t) __builtin_neon_vsetq_lane_i8(__s0, (int8x16_t)__s1, __p2); \ __ret; \ }) #else #define vsetq_lane_s8(__p0, __p1, __p2) __extension__ ({ \ int8x16_t __ret; \ int8_t __s0 = __p0; \ int8x16_t __s1 = __p1; \ int8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (int8x16_t) __builtin_neon_vsetq_lane_i8(__s0, (int8x16_t)__rev1, __p2); \ __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_vsetq_lane_s8(__p0, __p1, __p2) __extension__ ({ \ int8x16_t __ret; \ int8_t __s0 = __p0; \ int8x16_t __s1 = __p1; \ __ret = (int8x16_t) __builtin_neon_vsetq_lane_i8(__s0, (int8x16_t)__s1, __p2); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vsetq_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x4_t __ret; \ float32_t __s0 = __p0; \ float32x4_t __s1 = __p1; \ __ret = (float32x4_t) __builtin_neon_vsetq_lane_f32(__s0, (float32x4_t)__s1, __p2); \ __ret; \ }) #else #define vsetq_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x4_t __ret; \ float32_t __s0 = __p0; \ float32x4_t __s1 = __p1; \ float32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ __ret = (float32x4_t) __builtin_neon_vsetq_lane_f32(__s0, (float32x4_t)__rev1, __p2); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_vsetq_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x4_t __ret; \ float32_t __s0 = __p0; \ float32x4_t __s1 = __p1; \ __ret = (float32x4_t) __builtin_neon_vsetq_lane_f32(__s0, (float32x4_t)__s1, __p2); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vsetq_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x4_t __ret; \ int32_t __s0 = __p0; \ int32x4_t __s1 = __p1; \ __ret = (int32x4_t) __builtin_neon_vsetq_lane_i32(__s0, (int32x4_t)__s1, __p2); \ __ret; \ }) #else #define vsetq_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x4_t __ret; \ int32_t __s0 = __p0; \ int32x4_t __s1 = __p1; \ int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ __ret = (int32x4_t) __builtin_neon_vsetq_lane_i32(__s0, (int32x4_t)__rev1, __p2); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_vsetq_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x4_t __ret; \ int32_t __s0 = __p0; \ int32x4_t __s1 = __p1; \ __ret = (int32x4_t) __builtin_neon_vsetq_lane_i32(__s0, (int32x4_t)__s1, __p2); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vsetq_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x2_t __ret; \ int64_t __s0 = __p0; \ int64x2_t __s1 = __p1; \ __ret = (int64x2_t) __builtin_neon_vsetq_lane_i64(__s0, (int64x2_t)__s1, __p2); \ __ret; \ }) #else #define vsetq_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x2_t __ret; \ int64_t __s0 = __p0; \ int64x2_t __s1 = __p1; \ int64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ __ret = (int64x2_t) __builtin_neon_vsetq_lane_i64(__s0, (int64x2_t)__rev1, __p2); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #define __noswap_vsetq_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x2_t __ret; \ int64_t __s0 = __p0; \ int64x2_t __s1 = __p1; \ __ret = (int64x2_t) __builtin_neon_vsetq_lane_i64(__s0, (int64x2_t)__s1, __p2); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vsetq_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x8_t __ret; \ int16_t __s0 = __p0; \ int16x8_t __s1 = __p1; \ __ret = (int16x8_t) __builtin_neon_vsetq_lane_i16(__s0, (int16x8_t)__s1, __p2); \ __ret; \ }) #else #define vsetq_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x8_t __ret; \ int16_t __s0 = __p0; \ int16x8_t __s1 = __p1; \ int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (int16x8_t) __builtin_neon_vsetq_lane_i16(__s0, (int16x8_t)__rev1, __p2); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_vsetq_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x8_t __ret; \ int16_t __s0 = __p0; \ int16x8_t __s1 = __p1; \ __ret = (int16x8_t) __builtin_neon_vsetq_lane_i16(__s0, (int16x8_t)__s1, __p2); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vset_lane_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x8_t __ret; \ uint8_t __s0 = __p0; \ uint8x8_t __s1 = __p1; \ __ret = (uint8x8_t) __builtin_neon_vset_lane_i8(__s0, (int8x8_t)__s1, __p2); \ __ret; \ }) #else #define vset_lane_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x8_t __ret; \ uint8_t __s0 = __p0; \ uint8x8_t __s1 = __p1; \ uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (uint8x8_t) __builtin_neon_vset_lane_i8(__s0, (int8x8_t)__rev1, __p2); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_vset_lane_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x8_t __ret; \ uint8_t __s0 = __p0; \ uint8x8_t __s1 = __p1; \ __ret = (uint8x8_t) __builtin_neon_vset_lane_i8(__s0, (int8x8_t)__s1, __p2); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vset_lane_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x2_t __ret; \ uint32_t __s0 = __p0; \ uint32x2_t __s1 = __p1; \ __ret = (uint32x2_t) __builtin_neon_vset_lane_i32(__s0, (int32x2_t)__s1, __p2); \ __ret; \ }) #else #define vset_lane_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x2_t __ret; \ uint32_t __s0 = __p0; \ uint32x2_t __s1 = __p1; \ uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ __ret = (uint32x2_t) __builtin_neon_vset_lane_i32(__s0, (int32x2_t)__rev1, __p2); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #define __noswap_vset_lane_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x2_t __ret; \ uint32_t __s0 = __p0; \ uint32x2_t __s1 = __p1; \ __ret = (uint32x2_t) __builtin_neon_vset_lane_i32(__s0, (int32x2_t)__s1, __p2); \ __ret; \ }) #endif #define vset_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x1_t __ret; \ uint64_t __s0 = __p0; \ uint64x1_t __s1 = __p1; \ __ret = (uint64x1_t) __builtin_neon_vset_lane_i64(__s0, (int64x1_t)__s1, __p2); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vset_lane_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x4_t __ret; \ uint16_t __s0 = __p0; \ uint16x4_t __s1 = __p1; \ __ret = (uint16x4_t) __builtin_neon_vset_lane_i16(__s0, (int16x4_t)__s1, __p2); \ __ret; \ }) #else #define vset_lane_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x4_t __ret; \ uint16_t __s0 = __p0; \ uint16x4_t __s1 = __p1; \ uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ __ret = (uint16x4_t) __builtin_neon_vset_lane_i16(__s0, (int16x4_t)__rev1, __p2); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_vset_lane_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x4_t __ret; \ uint16_t __s0 = __p0; \ uint16x4_t __s1 = __p1; \ __ret = (uint16x4_t) __builtin_neon_vset_lane_i16(__s0, (int16x4_t)__s1, __p2); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vset_lane_s8(__p0, __p1, __p2) __extension__ ({ \ int8x8_t __ret; \ int8_t __s0 = __p0; \ int8x8_t __s1 = __p1; \ __ret = (int8x8_t) __builtin_neon_vset_lane_i8(__s0, (int8x8_t)__s1, __p2); \ __ret; \ }) #else #define vset_lane_s8(__p0, __p1, __p2) __extension__ ({ \ int8x8_t __ret; \ int8_t __s0 = __p0; \ int8x8_t __s1 = __p1; \ int8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (int8x8_t) __builtin_neon_vset_lane_i8(__s0, (int8x8_t)__rev1, __p2); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_vset_lane_s8(__p0, __p1, __p2) __extension__ ({ \ int8x8_t __ret; \ int8_t __s0 = __p0; \ int8x8_t __s1 = __p1; \ __ret = (int8x8_t) __builtin_neon_vset_lane_i8(__s0, (int8x8_t)__s1, __p2); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vset_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x2_t __ret; \ float32_t __s0 = __p0; \ float32x2_t __s1 = __p1; \ __ret = (float32x2_t) __builtin_neon_vset_lane_f32(__s0, (float32x2_t)__s1, __p2); \ __ret; \ }) #else #define vset_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x2_t __ret; \ float32_t __s0 = __p0; \ float32x2_t __s1 = __p1; \ float32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ __ret = (float32x2_t) __builtin_neon_vset_lane_f32(__s0, (float32x2_t)__rev1, __p2); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #define __noswap_vset_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x2_t __ret; \ float32_t __s0 = __p0; \ float32x2_t __s1 = __p1; \ __ret = (float32x2_t) __builtin_neon_vset_lane_f32(__s0, (float32x2_t)__s1, __p2); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vset_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x2_t __ret; \ int32_t __s0 = __p0; \ int32x2_t __s1 = __p1; \ __ret = (int32x2_t) __builtin_neon_vset_lane_i32(__s0, (int32x2_t)__s1, __p2); \ __ret; \ }) #else #define vset_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x2_t __ret; \ int32_t __s0 = __p0; \ int32x2_t __s1 = __p1; \ int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ __ret = (int32x2_t) __builtin_neon_vset_lane_i32(__s0, (int32x2_t)__rev1, __p2); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #define __noswap_vset_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x2_t __ret; \ int32_t __s0 = __p0; \ int32x2_t __s1 = __p1; \ __ret = (int32x2_t) __builtin_neon_vset_lane_i32(__s0, (int32x2_t)__s1, __p2); \ __ret; \ }) #endif #define vset_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x1_t __ret; \ int64_t __s0 = __p0; \ int64x1_t __s1 = __p1; \ __ret = (int64x1_t) __builtin_neon_vset_lane_i64(__s0, (int64x1_t)__s1, __p2); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vset_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x4_t __ret; \ int16_t __s0 = __p0; \ int16x4_t __s1 = __p1; \ __ret = (int16x4_t) __builtin_neon_vset_lane_i16(__s0, (int16x4_t)__s1, __p2); \ __ret; \ }) #else #define vset_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x4_t __ret; \ int16_t __s0 = __p0; \ int16x4_t __s1 = __p1; \ int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ __ret = (int16x4_t) __builtin_neon_vset_lane_i16(__s0, (int16x4_t)__rev1, __p2); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_vset_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x4_t __ret; \ int16_t __s0 = __p0; \ int16x4_t __s1 = __p1; \ __ret = (int16x4_t) __builtin_neon_vset_lane_i16(__s0, (int16x4_t)__s1, __p2); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vshlq_u8(uint8x16_t __p0, int8x16_t __p1) { uint8x16_t __ret; __ret = (uint8x16_t) __builtin_neon_vshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); return __ret; } #else __ai uint8x16_t vshlq_u8(uint8x16_t __p0, int8x16_t __p1) { uint8x16_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x16_t) __builtin_neon_vshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 48); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vshlq_u32(uint32x4_t __p0, int32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); return __ret; } #else __ai uint32x4_t vshlq_u32(uint32x4_t __p0, int32x4_t __p1) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint32x4_t) __builtin_neon_vshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vshlq_u64(uint64x2_t __p0, int64x2_t __p1) { uint64x2_t __ret; __ret = (uint64x2_t) __builtin_neon_vshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 51); return __ret; } #else __ai uint64x2_t vshlq_u64(uint64x2_t __p0, int64x2_t __p1) { uint64x2_t __ret; uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint64x2_t) __builtin_neon_vshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 51); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vshlq_u16(uint16x8_t __p0, int16x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); return __ret; } #else __ai uint16x8_t vshlq_u16(uint16x8_t __p0, int16x8_t __p1) { uint16x8_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint16x8_t) __builtin_neon_vshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 49); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x16_t vshlq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 32); return __ret; } #else __ai int8x16_t vshlq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x16_t) __builtin_neon_vshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 32); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vshlq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); return __ret; } #else __ai int32x4_t vshlq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (int32x4_t) __builtin_neon_vshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 34); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vshlq_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; __ret = (int64x2_t) __builtin_neon_vshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 35); return __ret; } #else __ai int64x2_t vshlq_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (int64x2_t) __builtin_neon_vshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 35); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vshlq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); return __ret; } #else __ai int16x8_t vshlq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int16x8_t) __builtin_neon_vshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 33); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vshl_u8(uint8x8_t __p0, int8x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vshl_v((int8x8_t)__p0, (int8x8_t)__p1, 16); return __ret; } #else __ai uint8x8_t vshl_u8(uint8x8_t __p0, int8x8_t __p1) { uint8x8_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x8_t) __builtin_neon_vshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 16); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vshl_u32(uint32x2_t __p0, int32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vshl_v((int8x8_t)__p0, (int8x8_t)__p1, 18); return __ret; } #else __ai uint32x2_t vshl_u32(uint32x2_t __p0, int32x2_t __p1) { uint32x2_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint32x2_t) __builtin_neon_vshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 18); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai uint64x1_t vshl_u64(uint64x1_t __p0, int64x1_t __p1) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vshl_v((int8x8_t)__p0, (int8x8_t)__p1, 19); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vshl_u16(uint16x4_t __p0, int16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vshl_v((int8x8_t)__p0, (int8x8_t)__p1, 17); return __ret; } #else __ai uint16x4_t vshl_u16(uint16x4_t __p0, int16x4_t __p1) { uint16x4_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint16x4_t) __builtin_neon_vshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 17); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8_t vshl_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vshl_v((int8x8_t)__p0, (int8x8_t)__p1, 0); return __ret; } #else __ai int8x8_t vshl_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x8_t) __builtin_neon_vshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 0); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x2_t vshl_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vshl_v((int8x8_t)__p0, (int8x8_t)__p1, 2); return __ret; } #else __ai int32x2_t vshl_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (int32x2_t) __builtin_neon_vshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai int64x1_t vshl_s64(int64x1_t __p0, int64x1_t __p1) { int64x1_t __ret; __ret = (int64x1_t) __builtin_neon_vshl_v((int8x8_t)__p0, (int8x8_t)__p1, 3); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vshl_v((int8x8_t)__p0, (int8x8_t)__p1, 1); return __ret; } #else __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (int16x4_t) __builtin_neon_vshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ #define vshlq_n_u8(__p0, __p1) __extension__ ({ \ uint8x16_t __ret; \ uint8x16_t __s0 = __p0; \ __ret = (uint8x16_t) __builtin_neon_vshlq_n_v((int8x16_t)__s0, __p1, 48); \ __ret; \ }) #else #define vshlq_n_u8(__p0, __p1) __extension__ ({ \ uint8x16_t __ret; \ uint8x16_t __s0 = __p0; \ uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (uint8x16_t) __builtin_neon_vshlq_n_v((int8x16_t)__rev0, __p1, 48); \ __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vshlq_n_u32(__p0, __p1) __extension__ ({ \ uint32x4_t __ret; \ uint32x4_t __s0 = __p0; \ __ret = (uint32x4_t) __builtin_neon_vshlq_n_v((int8x16_t)__s0, __p1, 50); \ __ret; \ }) #else #define vshlq_n_u32(__p0, __p1) __extension__ ({ \ uint32x4_t __ret; \ uint32x4_t __s0 = __p0; \ uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = (uint32x4_t) __builtin_neon_vshlq_n_v((int8x16_t)__rev0, __p1, 50); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vshlq_n_u64(__p0, __p1) __extension__ ({ \ uint64x2_t __ret; \ uint64x2_t __s0 = __p0; \ __ret = (uint64x2_t) __builtin_neon_vshlq_n_v((int8x16_t)__s0, __p1, 51); \ __ret; \ }) #else #define vshlq_n_u64(__p0, __p1) __extension__ ({ \ uint64x2_t __ret; \ uint64x2_t __s0 = __p0; \ uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ __ret = (uint64x2_t) __builtin_neon_vshlq_n_v((int8x16_t)__rev0, __p1, 51); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vshlq_n_u16(__p0, __p1) __extension__ ({ \ uint16x8_t __ret; \ uint16x8_t __s0 = __p0; \ __ret = (uint16x8_t) __builtin_neon_vshlq_n_v((int8x16_t)__s0, __p1, 49); \ __ret; \ }) #else #define vshlq_n_u16(__p0, __p1) __extension__ ({ \ uint16x8_t __ret; \ uint16x8_t __s0 = __p0; \ uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (uint16x8_t) __builtin_neon_vshlq_n_v((int8x16_t)__rev0, __p1, 49); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vshlq_n_s8(__p0, __p1) __extension__ ({ \ int8x16_t __ret; \ int8x16_t __s0 = __p0; \ __ret = (int8x16_t) __builtin_neon_vshlq_n_v((int8x16_t)__s0, __p1, 32); \ __ret; \ }) #else #define vshlq_n_s8(__p0, __p1) __extension__ ({ \ int8x16_t __ret; \ int8x16_t __s0 = __p0; \ int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (int8x16_t) __builtin_neon_vshlq_n_v((int8x16_t)__rev0, __p1, 32); \ __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vshlq_n_s32(__p0, __p1) __extension__ ({ \ int32x4_t __ret; \ int32x4_t __s0 = __p0; \ __ret = (int32x4_t) __builtin_neon_vshlq_n_v((int8x16_t)__s0, __p1, 34); \ __ret; \ }) #else #define vshlq_n_s32(__p0, __p1) __extension__ ({ \ int32x4_t __ret; \ int32x4_t __s0 = __p0; \ int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = (int32x4_t) __builtin_neon_vshlq_n_v((int8x16_t)__rev0, __p1, 34); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vshlq_n_s64(__p0, __p1) __extension__ ({ \ int64x2_t __ret; \ int64x2_t __s0 = __p0; \ __ret = (int64x2_t) __builtin_neon_vshlq_n_v((int8x16_t)__s0, __p1, 35); \ __ret; \ }) #else #define vshlq_n_s64(__p0, __p1) __extension__ ({ \ int64x2_t __ret; \ int64x2_t __s0 = __p0; \ int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ __ret = (int64x2_t) __builtin_neon_vshlq_n_v((int8x16_t)__rev0, __p1, 35); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vshlq_n_s16(__p0, __p1) __extension__ ({ \ int16x8_t __ret; \ int16x8_t __s0 = __p0; \ __ret = (int16x8_t) __builtin_neon_vshlq_n_v((int8x16_t)__s0, __p1, 33); \ __ret; \ }) #else #define vshlq_n_s16(__p0, __p1) __extension__ ({ \ int16x8_t __ret; \ int16x8_t __s0 = __p0; \ int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (int16x8_t) __builtin_neon_vshlq_n_v((int8x16_t)__rev0, __p1, 33); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vshl_n_u8(__p0, __p1) __extension__ ({ \ uint8x8_t __ret; \ uint8x8_t __s0 = __p0; \ __ret = (uint8x8_t) __builtin_neon_vshl_n_v((int8x8_t)__s0, __p1, 16); \ __ret; \ }) #else #define vshl_n_u8(__p0, __p1) __extension__ ({ \ uint8x8_t __ret; \ uint8x8_t __s0 = __p0; \ uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (uint8x8_t) __builtin_neon_vshl_n_v((int8x8_t)__rev0, __p1, 16); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vshl_n_u32(__p0, __p1) __extension__ ({ \ uint32x2_t __ret; \ uint32x2_t __s0 = __p0; \ __ret = (uint32x2_t) __builtin_neon_vshl_n_v((int8x8_t)__s0, __p1, 18); \ __ret; \ }) #else #define vshl_n_u32(__p0, __p1) __extension__ ({ \ uint32x2_t __ret; \ uint32x2_t __s0 = __p0; \ uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ __ret = (uint32x2_t) __builtin_neon_vshl_n_v((int8x8_t)__rev0, __p1, 18); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #define vshl_n_u64(__p0, __p1) __extension__ ({ \ uint64x1_t __ret; \ uint64x1_t __s0 = __p0; \ __ret = (uint64x1_t) __builtin_neon_vshl_n_v((int8x8_t)__s0, __p1, 19); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vshl_n_u16(__p0, __p1) __extension__ ({ \ uint16x4_t __ret; \ uint16x4_t __s0 = __p0; \ __ret = (uint16x4_t) __builtin_neon_vshl_n_v((int8x8_t)__s0, __p1, 17); \ __ret; \ }) #else #define vshl_n_u16(__p0, __p1) __extension__ ({ \ uint16x4_t __ret; \ uint16x4_t __s0 = __p0; \ uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = (uint16x4_t) __builtin_neon_vshl_n_v((int8x8_t)__rev0, __p1, 17); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vshl_n_s8(__p0, __p1) __extension__ ({ \ int8x8_t __ret; \ int8x8_t __s0 = __p0; \ __ret = (int8x8_t) __builtin_neon_vshl_n_v((int8x8_t)__s0, __p1, 0); \ __ret; \ }) #else #define vshl_n_s8(__p0, __p1) __extension__ ({ \ int8x8_t __ret; \ int8x8_t __s0 = __p0; \ int8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (int8x8_t) __builtin_neon_vshl_n_v((int8x8_t)__rev0, __p1, 0); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vshl_n_s32(__p0, __p1) __extension__ ({ \ int32x2_t __ret; \ int32x2_t __s0 = __p0; \ __ret = (int32x2_t) __builtin_neon_vshl_n_v((int8x8_t)__s0, __p1, 2); \ __ret; \ }) #else #define vshl_n_s32(__p0, __p1) __extension__ ({ \ int32x2_t __ret; \ int32x2_t __s0 = __p0; \ int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ __ret = (int32x2_t) __builtin_neon_vshl_n_v((int8x8_t)__rev0, __p1, 2); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #define vshl_n_s64(__p0, __p1) __extension__ ({ \ int64x1_t __ret; \ int64x1_t __s0 = __p0; \ __ret = (int64x1_t) __builtin_neon_vshl_n_v((int8x8_t)__s0, __p1, 3); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vshl_n_s16(__p0, __p1) __extension__ ({ \ int16x4_t __ret; \ int16x4_t __s0 = __p0; \ __ret = (int16x4_t) __builtin_neon_vshl_n_v((int8x8_t)__s0, __p1, 1); \ __ret; \ }) #else #define vshl_n_s16(__p0, __p1) __extension__ ({ \ int16x4_t __ret; \ int16x4_t __s0 = __p0; \ int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = (int16x4_t) __builtin_neon_vshl_n_v((int8x8_t)__rev0, __p1, 1); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vshll_n_u8(__p0, __p1) __extension__ ({ \ uint16x8_t __ret; \ uint8x8_t __s0 = __p0; \ __ret = (uint16x8_t) __builtin_neon_vshll_n_v((int8x8_t)__s0, __p1, 49); \ __ret; \ }) #else #define vshll_n_u8(__p0, __p1) __extension__ ({ \ uint16x8_t __ret; \ uint8x8_t __s0 = __p0; \ uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (uint16x8_t) __builtin_neon_vshll_n_v((int8x8_t)__rev0, __p1, 49); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_vshll_n_u8(__p0, __p1) __extension__ ({ \ uint16x8_t __ret; \ uint8x8_t __s0 = __p0; \ __ret = (uint16x8_t) __builtin_neon_vshll_n_v((int8x8_t)__s0, __p1, 49); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vshll_n_u32(__p0, __p1) __extension__ ({ \ uint64x2_t __ret; \ uint32x2_t __s0 = __p0; \ __ret = (uint64x2_t) __builtin_neon_vshll_n_v((int8x8_t)__s0, __p1, 51); \ __ret; \ }) #else #define vshll_n_u32(__p0, __p1) __extension__ ({ \ uint64x2_t __ret; \ uint32x2_t __s0 = __p0; \ uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ __ret = (uint64x2_t) __builtin_neon_vshll_n_v((int8x8_t)__rev0, __p1, 51); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #define __noswap_vshll_n_u32(__p0, __p1) __extension__ ({ \ uint64x2_t __ret; \ uint32x2_t __s0 = __p0; \ __ret = (uint64x2_t) __builtin_neon_vshll_n_v((int8x8_t)__s0, __p1, 51); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vshll_n_u16(__p0, __p1) __extension__ ({ \ uint32x4_t __ret; \ uint16x4_t __s0 = __p0; \ __ret = (uint32x4_t) __builtin_neon_vshll_n_v((int8x8_t)__s0, __p1, 50); \ __ret; \ }) #else #define vshll_n_u16(__p0, __p1) __extension__ ({ \ uint32x4_t __ret; \ uint16x4_t __s0 = __p0; \ uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = (uint32x4_t) __builtin_neon_vshll_n_v((int8x8_t)__rev0, __p1, 50); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_vshll_n_u16(__p0, __p1) __extension__ ({ \ uint32x4_t __ret; \ uint16x4_t __s0 = __p0; \ __ret = (uint32x4_t) __builtin_neon_vshll_n_v((int8x8_t)__s0, __p1, 50); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vshll_n_s8(__p0, __p1) __extension__ ({ \ int16x8_t __ret; \ int8x8_t __s0 = __p0; \ __ret = (int16x8_t) __builtin_neon_vshll_n_v((int8x8_t)__s0, __p1, 33); \ __ret; \ }) #else #define vshll_n_s8(__p0, __p1) __extension__ ({ \ int16x8_t __ret; \ int8x8_t __s0 = __p0; \ int8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (int16x8_t) __builtin_neon_vshll_n_v((int8x8_t)__rev0, __p1, 33); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_vshll_n_s8(__p0, __p1) __extension__ ({ \ int16x8_t __ret; \ int8x8_t __s0 = __p0; \ __ret = (int16x8_t) __builtin_neon_vshll_n_v((int8x8_t)__s0, __p1, 33); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vshll_n_s32(__p0, __p1) __extension__ ({ \ int64x2_t __ret; \ int32x2_t __s0 = __p0; \ __ret = (int64x2_t) __builtin_neon_vshll_n_v((int8x8_t)__s0, __p1, 35); \ __ret; \ }) #else #define vshll_n_s32(__p0, __p1) __extension__ ({ \ int64x2_t __ret; \ int32x2_t __s0 = __p0; \ int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ __ret = (int64x2_t) __builtin_neon_vshll_n_v((int8x8_t)__rev0, __p1, 35); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #define __noswap_vshll_n_s32(__p0, __p1) __extension__ ({ \ int64x2_t __ret; \ int32x2_t __s0 = __p0; \ __ret = (int64x2_t) __builtin_neon_vshll_n_v((int8x8_t)__s0, __p1, 35); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vshll_n_s16(__p0, __p1) __extension__ ({ \ int32x4_t __ret; \ int16x4_t __s0 = __p0; \ __ret = (int32x4_t) __builtin_neon_vshll_n_v((int8x8_t)__s0, __p1, 34); \ __ret; \ }) #else #define vshll_n_s16(__p0, __p1) __extension__ ({ \ int32x4_t __ret; \ int16x4_t __s0 = __p0; \ int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = (int32x4_t) __builtin_neon_vshll_n_v((int8x8_t)__rev0, __p1, 34); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_vshll_n_s16(__p0, __p1) __extension__ ({ \ int32x4_t __ret; \ int16x4_t __s0 = __p0; \ __ret = (int32x4_t) __builtin_neon_vshll_n_v((int8x8_t)__s0, __p1, 34); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vshrq_n_u8(__p0, __p1) __extension__ ({ \ uint8x16_t __ret; \ uint8x16_t __s0 = __p0; \ __ret = (uint8x16_t) __builtin_neon_vshrq_n_v((int8x16_t)__s0, __p1, 48); \ __ret; \ }) #else #define vshrq_n_u8(__p0, __p1) __extension__ ({ \ uint8x16_t __ret; \ uint8x16_t __s0 = __p0; \ uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (uint8x16_t) __builtin_neon_vshrq_n_v((int8x16_t)__rev0, __p1, 48); \ __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vshrq_n_u32(__p0, __p1) __extension__ ({ \ uint32x4_t __ret; \ uint32x4_t __s0 = __p0; \ __ret = (uint32x4_t) __builtin_neon_vshrq_n_v((int8x16_t)__s0, __p1, 50); \ __ret; \ }) #else #define vshrq_n_u32(__p0, __p1) __extension__ ({ \ uint32x4_t __ret; \ uint32x4_t __s0 = __p0; \ uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = (uint32x4_t) __builtin_neon_vshrq_n_v((int8x16_t)__rev0, __p1, 50); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vshrq_n_u64(__p0, __p1) __extension__ ({ \ uint64x2_t __ret; \ uint64x2_t __s0 = __p0; \ __ret = (uint64x2_t) __builtin_neon_vshrq_n_v((int8x16_t)__s0, __p1, 51); \ __ret; \ }) #else #define vshrq_n_u64(__p0, __p1) __extension__ ({ \ uint64x2_t __ret; \ uint64x2_t __s0 = __p0; \ uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ __ret = (uint64x2_t) __builtin_neon_vshrq_n_v((int8x16_t)__rev0, __p1, 51); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vshrq_n_u16(__p0, __p1) __extension__ ({ \ uint16x8_t __ret; \ uint16x8_t __s0 = __p0; \ __ret = (uint16x8_t) __builtin_neon_vshrq_n_v((int8x16_t)__s0, __p1, 49); \ __ret; \ }) #else #define vshrq_n_u16(__p0, __p1) __extension__ ({ \ uint16x8_t __ret; \ uint16x8_t __s0 = __p0; \ uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (uint16x8_t) __builtin_neon_vshrq_n_v((int8x16_t)__rev0, __p1, 49); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vshrq_n_s8(__p0, __p1) __extension__ ({ \ int8x16_t __ret; \ int8x16_t __s0 = __p0; \ __ret = (int8x16_t) __builtin_neon_vshrq_n_v((int8x16_t)__s0, __p1, 32); \ __ret; \ }) #else #define vshrq_n_s8(__p0, __p1) __extension__ ({ \ int8x16_t __ret; \ int8x16_t __s0 = __p0; \ int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (int8x16_t) __builtin_neon_vshrq_n_v((int8x16_t)__rev0, __p1, 32); \ __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vshrq_n_s32(__p0, __p1) __extension__ ({ \ int32x4_t __ret; \ int32x4_t __s0 = __p0; \ __ret = (int32x4_t) __builtin_neon_vshrq_n_v((int8x16_t)__s0, __p1, 34); \ __ret; \ }) #else #define vshrq_n_s32(__p0, __p1) __extension__ ({ \ int32x4_t __ret; \ int32x4_t __s0 = __p0; \ int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = (int32x4_t) __builtin_neon_vshrq_n_v((int8x16_t)__rev0, __p1, 34); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vshrq_n_s64(__p0, __p1) __extension__ ({ \ int64x2_t __ret; \ int64x2_t __s0 = __p0; \ __ret = (int64x2_t) __builtin_neon_vshrq_n_v((int8x16_t)__s0, __p1, 35); \ __ret; \ }) #else #define vshrq_n_s64(__p0, __p1) __extension__ ({ \ int64x2_t __ret; \ int64x2_t __s0 = __p0; \ int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ __ret = (int64x2_t) __builtin_neon_vshrq_n_v((int8x16_t)__rev0, __p1, 35); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vshrq_n_s16(__p0, __p1) __extension__ ({ \ int16x8_t __ret; \ int16x8_t __s0 = __p0; \ __ret = (int16x8_t) __builtin_neon_vshrq_n_v((int8x16_t)__s0, __p1, 33); \ __ret; \ }) #else #define vshrq_n_s16(__p0, __p1) __extension__ ({ \ int16x8_t __ret; \ int16x8_t __s0 = __p0; \ int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (int16x8_t) __builtin_neon_vshrq_n_v((int8x16_t)__rev0, __p1, 33); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vshr_n_u8(__p0, __p1) __extension__ ({ \ uint8x8_t __ret; \ uint8x8_t __s0 = __p0; \ __ret = (uint8x8_t) __builtin_neon_vshr_n_v((int8x8_t)__s0, __p1, 16); \ __ret; \ }) #else #define vshr_n_u8(__p0, __p1) __extension__ ({ \ uint8x8_t __ret; \ uint8x8_t __s0 = __p0; \ uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (uint8x8_t) __builtin_neon_vshr_n_v((int8x8_t)__rev0, __p1, 16); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vshr_n_u32(__p0, __p1) __extension__ ({ \ uint32x2_t __ret; \ uint32x2_t __s0 = __p0; \ __ret = (uint32x2_t) __builtin_neon_vshr_n_v((int8x8_t)__s0, __p1, 18); \ __ret; \ }) #else #define vshr_n_u32(__p0, __p1) __extension__ ({ \ uint32x2_t __ret; \ uint32x2_t __s0 = __p0; \ uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ __ret = (uint32x2_t) __builtin_neon_vshr_n_v((int8x8_t)__rev0, __p1, 18); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #define vshr_n_u64(__p0, __p1) __extension__ ({ \ uint64x1_t __ret; \ uint64x1_t __s0 = __p0; \ __ret = (uint64x1_t) __builtin_neon_vshr_n_v((int8x8_t)__s0, __p1, 19); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vshr_n_u16(__p0, __p1) __extension__ ({ \ uint16x4_t __ret; \ uint16x4_t __s0 = __p0; \ __ret = (uint16x4_t) __builtin_neon_vshr_n_v((int8x8_t)__s0, __p1, 17); \ __ret; \ }) #else #define vshr_n_u16(__p0, __p1) __extension__ ({ \ uint16x4_t __ret; \ uint16x4_t __s0 = __p0; \ uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = (uint16x4_t) __builtin_neon_vshr_n_v((int8x8_t)__rev0, __p1, 17); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vshr_n_s8(__p0, __p1) __extension__ ({ \ int8x8_t __ret; \ int8x8_t __s0 = __p0; \ __ret = (int8x8_t) __builtin_neon_vshr_n_v((int8x8_t)__s0, __p1, 0); \ __ret; \ }) #else #define vshr_n_s8(__p0, __p1) __extension__ ({ \ int8x8_t __ret; \ int8x8_t __s0 = __p0; \ int8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (int8x8_t) __builtin_neon_vshr_n_v((int8x8_t)__rev0, __p1, 0); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vshr_n_s32(__p0, __p1) __extension__ ({ \ int32x2_t __ret; \ int32x2_t __s0 = __p0; \ __ret = (int32x2_t) __builtin_neon_vshr_n_v((int8x8_t)__s0, __p1, 2); \ __ret; \ }) #else #define vshr_n_s32(__p0, __p1) __extension__ ({ \ int32x2_t __ret; \ int32x2_t __s0 = __p0; \ int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ __ret = (int32x2_t) __builtin_neon_vshr_n_v((int8x8_t)__rev0, __p1, 2); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #define vshr_n_s64(__p0, __p1) __extension__ ({ \ int64x1_t __ret; \ int64x1_t __s0 = __p0; \ __ret = (int64x1_t) __builtin_neon_vshr_n_v((int8x8_t)__s0, __p1, 3); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vshr_n_s16(__p0, __p1) __extension__ ({ \ int16x4_t __ret; \ int16x4_t __s0 = __p0; \ __ret = (int16x4_t) __builtin_neon_vshr_n_v((int8x8_t)__s0, __p1, 1); \ __ret; \ }) #else #define vshr_n_s16(__p0, __p1) __extension__ ({ \ int16x4_t __ret; \ int16x4_t __s0 = __p0; \ int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = (int16x4_t) __builtin_neon_vshr_n_v((int8x8_t)__rev0, __p1, 1); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vshrn_n_u32(__p0, __p1) __extension__ ({ \ uint16x4_t __ret; \ uint32x4_t __s0 = __p0; \ __ret = (uint16x4_t) __builtin_neon_vshrn_n_v((int8x16_t)__s0, __p1, 17); \ __ret; \ }) #else #define vshrn_n_u32(__p0, __p1) __extension__ ({ \ uint16x4_t __ret; \ uint32x4_t __s0 = __p0; \ uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = (uint16x4_t) __builtin_neon_vshrn_n_v((int8x16_t)__rev0, __p1, 17); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_vshrn_n_u32(__p0, __p1) __extension__ ({ \ uint16x4_t __ret; \ uint32x4_t __s0 = __p0; \ __ret = (uint16x4_t) __builtin_neon_vshrn_n_v((int8x16_t)__s0, __p1, 17); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vshrn_n_u64(__p0, __p1) __extension__ ({ \ uint32x2_t __ret; \ uint64x2_t __s0 = __p0; \ __ret = (uint32x2_t) __builtin_neon_vshrn_n_v((int8x16_t)__s0, __p1, 18); \ __ret; \ }) #else #define vshrn_n_u64(__p0, __p1) __extension__ ({ \ uint32x2_t __ret; \ uint64x2_t __s0 = __p0; \ uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ __ret = (uint32x2_t) __builtin_neon_vshrn_n_v((int8x16_t)__rev0, __p1, 18); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #define __noswap_vshrn_n_u64(__p0, __p1) __extension__ ({ \ uint32x2_t __ret; \ uint64x2_t __s0 = __p0; \ __ret = (uint32x2_t) __builtin_neon_vshrn_n_v((int8x16_t)__s0, __p1, 18); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vshrn_n_u16(__p0, __p1) __extension__ ({ \ uint8x8_t __ret; \ uint16x8_t __s0 = __p0; \ __ret = (uint8x8_t) __builtin_neon_vshrn_n_v((int8x16_t)__s0, __p1, 16); \ __ret; \ }) #else #define vshrn_n_u16(__p0, __p1) __extension__ ({ \ uint8x8_t __ret; \ uint16x8_t __s0 = __p0; \ uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (uint8x8_t) __builtin_neon_vshrn_n_v((int8x16_t)__rev0, __p1, 16); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_vshrn_n_u16(__p0, __p1) __extension__ ({ \ uint8x8_t __ret; \ uint16x8_t __s0 = __p0; \ __ret = (uint8x8_t) __builtin_neon_vshrn_n_v((int8x16_t)__s0, __p1, 16); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vshrn_n_s32(__p0, __p1) __extension__ ({ \ int16x4_t __ret; \ int32x4_t __s0 = __p0; \ __ret = (int16x4_t) __builtin_neon_vshrn_n_v((int8x16_t)__s0, __p1, 1); \ __ret; \ }) #else #define vshrn_n_s32(__p0, __p1) __extension__ ({ \ int16x4_t __ret; \ int32x4_t __s0 = __p0; \ int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = (int16x4_t) __builtin_neon_vshrn_n_v((int8x16_t)__rev0, __p1, 1); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_vshrn_n_s32(__p0, __p1) __extension__ ({ \ int16x4_t __ret; \ int32x4_t __s0 = __p0; \ __ret = (int16x4_t) __builtin_neon_vshrn_n_v((int8x16_t)__s0, __p1, 1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vshrn_n_s64(__p0, __p1) __extension__ ({ \ int32x2_t __ret; \ int64x2_t __s0 = __p0; \ __ret = (int32x2_t) __builtin_neon_vshrn_n_v((int8x16_t)__s0, __p1, 2); \ __ret; \ }) #else #define vshrn_n_s64(__p0, __p1) __extension__ ({ \ int32x2_t __ret; \ int64x2_t __s0 = __p0; \ int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ __ret = (int32x2_t) __builtin_neon_vshrn_n_v((int8x16_t)__rev0, __p1, 2); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #define __noswap_vshrn_n_s64(__p0, __p1) __extension__ ({ \ int32x2_t __ret; \ int64x2_t __s0 = __p0; \ __ret = (int32x2_t) __builtin_neon_vshrn_n_v((int8x16_t)__s0, __p1, 2); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vshrn_n_s16(__p0, __p1) __extension__ ({ \ int8x8_t __ret; \ int16x8_t __s0 = __p0; \ __ret = (int8x8_t) __builtin_neon_vshrn_n_v((int8x16_t)__s0, __p1, 0); \ __ret; \ }) #else #define vshrn_n_s16(__p0, __p1) __extension__ ({ \ int8x8_t __ret; \ int16x8_t __s0 = __p0; \ int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (int8x8_t) __builtin_neon_vshrn_n_v((int8x16_t)__rev0, __p1, 0); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_vshrn_n_s16(__p0, __p1) __extension__ ({ \ int8x8_t __ret; \ int16x8_t __s0 = __p0; \ __ret = (int8x8_t) __builtin_neon_vshrn_n_v((int8x16_t)__s0, __p1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vsli_n_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x8_t __ret; \ poly8x8_t __s0 = __p0; \ poly8x8_t __s1 = __p1; \ __ret = (poly8x8_t) __builtin_neon_vsli_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 4); \ __ret; \ }) #else #define vsli_n_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x8_t __ret; \ poly8x8_t __s0 = __p0; \ poly8x8_t __s1 = __p1; \ poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (poly8x8_t) __builtin_neon_vsli_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 4); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vsli_n_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x4_t __ret; \ poly16x4_t __s0 = __p0; \ poly16x4_t __s1 = __p1; \ __ret = (poly16x4_t) __builtin_neon_vsli_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 5); \ __ret; \ }) #else #define vsli_n_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x4_t __ret; \ poly16x4_t __s0 = __p0; \ poly16x4_t __s1 = __p1; \ poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ poly16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ __ret = (poly16x4_t) __builtin_neon_vsli_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 5); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vsliq_n_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x16_t __ret; \ poly8x16_t __s0 = __p0; \ poly8x16_t __s1 = __p1; \ __ret = (poly8x16_t) __builtin_neon_vsliq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 36); \ __ret; \ }) #else #define vsliq_n_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x16_t __ret; \ poly8x16_t __s0 = __p0; \ poly8x16_t __s1 = __p1; \ poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (poly8x16_t) __builtin_neon_vsliq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 36); \ __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vsliq_n_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x8_t __ret; \ poly16x8_t __s0 = __p0; \ poly16x8_t __s1 = __p1; \ __ret = (poly16x8_t) __builtin_neon_vsliq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 37); \ __ret; \ }) #else #define vsliq_n_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x8_t __ret; \ poly16x8_t __s0 = __p0; \ poly16x8_t __s1 = __p1; \ poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ poly16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (poly16x8_t) __builtin_neon_vsliq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 37); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vsliq_n_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x16_t __ret; \ uint8x16_t __s0 = __p0; \ uint8x16_t __s1 = __p1; \ __ret = (uint8x16_t) __builtin_neon_vsliq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 48); \ __ret; \ }) #else #define vsliq_n_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x16_t __ret; \ uint8x16_t __s0 = __p0; \ uint8x16_t __s1 = __p1; \ uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (uint8x16_t) __builtin_neon_vsliq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 48); \ __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vsliq_n_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x4_t __ret; \ uint32x4_t __s0 = __p0; \ uint32x4_t __s1 = __p1; \ __ret = (uint32x4_t) __builtin_neon_vsliq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 50); \ __ret; \ }) #else #define vsliq_n_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x4_t __ret; \ uint32x4_t __s0 = __p0; \ uint32x4_t __s1 = __p1; \ uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ __ret = (uint32x4_t) __builtin_neon_vsliq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 50); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vsliq_n_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x2_t __ret; \ uint64x2_t __s0 = __p0; \ uint64x2_t __s1 = __p1; \ __ret = (uint64x2_t) __builtin_neon_vsliq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 51); \ __ret; \ }) #else #define vsliq_n_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x2_t __ret; \ uint64x2_t __s0 = __p0; \ uint64x2_t __s1 = __p1; \ uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ __ret = (uint64x2_t) __builtin_neon_vsliq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 51); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vsliq_n_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x8_t __ret; \ uint16x8_t __s0 = __p0; \ uint16x8_t __s1 = __p1; \ __ret = (uint16x8_t) __builtin_neon_vsliq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 49); \ __ret; \ }) #else #define vsliq_n_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x8_t __ret; \ uint16x8_t __s0 = __p0; \ uint16x8_t __s1 = __p1; \ uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (uint16x8_t) __builtin_neon_vsliq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 49); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vsliq_n_s8(__p0, __p1, __p2) __extension__ ({ \ int8x16_t __ret; \ int8x16_t __s0 = __p0; \ int8x16_t __s1 = __p1; \ __ret = (int8x16_t) __builtin_neon_vsliq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 32); \ __ret; \ }) #else #define vsliq_n_s8(__p0, __p1, __p2) __extension__ ({ \ int8x16_t __ret; \ int8x16_t __s0 = __p0; \ int8x16_t __s1 = __p1; \ int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ int8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (int8x16_t) __builtin_neon_vsliq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 32); \ __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vsliq_n_s32(__p0, __p1, __p2) __extension__ ({ \ int32x4_t __ret; \ int32x4_t __s0 = __p0; \ int32x4_t __s1 = __p1; \ __ret = (int32x4_t) __builtin_neon_vsliq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 34); \ __ret; \ }) #else #define vsliq_n_s32(__p0, __p1, __p2) __extension__ ({ \ int32x4_t __ret; \ int32x4_t __s0 = __p0; \ int32x4_t __s1 = __p1; \ int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ __ret = (int32x4_t) __builtin_neon_vsliq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 34); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vsliq_n_s64(__p0, __p1, __p2) __extension__ ({ \ int64x2_t __ret; \ int64x2_t __s0 = __p0; \ int64x2_t __s1 = __p1; \ __ret = (int64x2_t) __builtin_neon_vsliq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 35); \ __ret; \ }) #else #define vsliq_n_s64(__p0, __p1, __p2) __extension__ ({ \ int64x2_t __ret; \ int64x2_t __s0 = __p0; \ int64x2_t __s1 = __p1; \ int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ int64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ __ret = (int64x2_t) __builtin_neon_vsliq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 35); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vsliq_n_s16(__p0, __p1, __p2) __extension__ ({ \ int16x8_t __ret; \ int16x8_t __s0 = __p0; \ int16x8_t __s1 = __p1; \ __ret = (int16x8_t) __builtin_neon_vsliq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 33); \ __ret; \ }) #else #define vsliq_n_s16(__p0, __p1, __p2) __extension__ ({ \ int16x8_t __ret; \ int16x8_t __s0 = __p0; \ int16x8_t __s1 = __p1; \ int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (int16x8_t) __builtin_neon_vsliq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 33); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vsli_n_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x8_t __ret; \ uint8x8_t __s0 = __p0; \ uint8x8_t __s1 = __p1; \ __ret = (uint8x8_t) __builtin_neon_vsli_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 16); \ __ret; \ }) #else #define vsli_n_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x8_t __ret; \ uint8x8_t __s0 = __p0; \ uint8x8_t __s1 = __p1; \ uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (uint8x8_t) __builtin_neon_vsli_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 16); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vsli_n_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x2_t __ret; \ uint32x2_t __s0 = __p0; \ uint32x2_t __s1 = __p1; \ __ret = (uint32x2_t) __builtin_neon_vsli_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 18); \ __ret; \ }) #else #define vsli_n_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x2_t __ret; \ uint32x2_t __s0 = __p0; \ uint32x2_t __s1 = __p1; \ uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ __ret = (uint32x2_t) __builtin_neon_vsli_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 18); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #define vsli_n_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x1_t __ret; \ uint64x1_t __s0 = __p0; \ uint64x1_t __s1 = __p1; \ __ret = (uint64x1_t) __builtin_neon_vsli_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 19); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vsli_n_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x4_t __ret; \ uint16x4_t __s0 = __p0; \ uint16x4_t __s1 = __p1; \ __ret = (uint16x4_t) __builtin_neon_vsli_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 17); \ __ret; \ }) #else #define vsli_n_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x4_t __ret; \ uint16x4_t __s0 = __p0; \ uint16x4_t __s1 = __p1; \ uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ __ret = (uint16x4_t) __builtin_neon_vsli_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 17); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vsli_n_s8(__p0, __p1, __p2) __extension__ ({ \ int8x8_t __ret; \ int8x8_t __s0 = __p0; \ int8x8_t __s1 = __p1; \ __ret = (int8x8_t) __builtin_neon_vsli_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 0); \ __ret; \ }) #else #define vsli_n_s8(__p0, __p1, __p2) __extension__ ({ \ int8x8_t __ret; \ int8x8_t __s0 = __p0; \ int8x8_t __s1 = __p1; \ int8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ int8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (int8x8_t) __builtin_neon_vsli_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 0); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vsli_n_s32(__p0, __p1, __p2) __extension__ ({ \ int32x2_t __ret; \ int32x2_t __s0 = __p0; \ int32x2_t __s1 = __p1; \ __ret = (int32x2_t) __builtin_neon_vsli_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 2); \ __ret; \ }) #else #define vsli_n_s32(__p0, __p1, __p2) __extension__ ({ \ int32x2_t __ret; \ int32x2_t __s0 = __p0; \ int32x2_t __s1 = __p1; \ int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ __ret = (int32x2_t) __builtin_neon_vsli_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 2); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #define vsli_n_s64(__p0, __p1, __p2) __extension__ ({ \ int64x1_t __ret; \ int64x1_t __s0 = __p0; \ int64x1_t __s1 = __p1; \ __ret = (int64x1_t) __builtin_neon_vsli_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 3); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vsli_n_s16(__p0, __p1, __p2) __extension__ ({ \ int16x4_t __ret; \ int16x4_t __s0 = __p0; \ int16x4_t __s1 = __p1; \ __ret = (int16x4_t) __builtin_neon_vsli_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 1); \ __ret; \ }) #else #define vsli_n_s16(__p0, __p1, __p2) __extension__ ({ \ int16x4_t __ret; \ int16x4_t __s0 = __p0; \ int16x4_t __s1 = __p1; \ int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ __ret = (int16x4_t) __builtin_neon_vsli_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 1); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vsraq_n_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x16_t __ret; \ uint8x16_t __s0 = __p0; \ uint8x16_t __s1 = __p1; \ __ret = (uint8x16_t) __builtin_neon_vsraq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 48); \ __ret; \ }) #else #define vsraq_n_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x16_t __ret; \ uint8x16_t __s0 = __p0; \ uint8x16_t __s1 = __p1; \ uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (uint8x16_t) __builtin_neon_vsraq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 48); \ __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vsraq_n_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x4_t __ret; \ uint32x4_t __s0 = __p0; \ uint32x4_t __s1 = __p1; \ __ret = (uint32x4_t) __builtin_neon_vsraq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 50); \ __ret; \ }) #else #define vsraq_n_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x4_t __ret; \ uint32x4_t __s0 = __p0; \ uint32x4_t __s1 = __p1; \ uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ __ret = (uint32x4_t) __builtin_neon_vsraq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 50); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vsraq_n_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x2_t __ret; \ uint64x2_t __s0 = __p0; \ uint64x2_t __s1 = __p1; \ __ret = (uint64x2_t) __builtin_neon_vsraq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 51); \ __ret; \ }) #else #define vsraq_n_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x2_t __ret; \ uint64x2_t __s0 = __p0; \ uint64x2_t __s1 = __p1; \ uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ __ret = (uint64x2_t) __builtin_neon_vsraq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 51); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vsraq_n_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x8_t __ret; \ uint16x8_t __s0 = __p0; \ uint16x8_t __s1 = __p1; \ __ret = (uint16x8_t) __builtin_neon_vsraq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 49); \ __ret; \ }) #else #define vsraq_n_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x8_t __ret; \ uint16x8_t __s0 = __p0; \ uint16x8_t __s1 = __p1; \ uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (uint16x8_t) __builtin_neon_vsraq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 49); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vsraq_n_s8(__p0, __p1, __p2) __extension__ ({ \ int8x16_t __ret; \ int8x16_t __s0 = __p0; \ int8x16_t __s1 = __p1; \ __ret = (int8x16_t) __builtin_neon_vsraq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 32); \ __ret; \ }) #else #define vsraq_n_s8(__p0, __p1, __p2) __extension__ ({ \ int8x16_t __ret; \ int8x16_t __s0 = __p0; \ int8x16_t __s1 = __p1; \ int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ int8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (int8x16_t) __builtin_neon_vsraq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 32); \ __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vsraq_n_s32(__p0, __p1, __p2) __extension__ ({ \ int32x4_t __ret; \ int32x4_t __s0 = __p0; \ int32x4_t __s1 = __p1; \ __ret = (int32x4_t) __builtin_neon_vsraq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 34); \ __ret; \ }) #else #define vsraq_n_s32(__p0, __p1, __p2) __extension__ ({ \ int32x4_t __ret; \ int32x4_t __s0 = __p0; \ int32x4_t __s1 = __p1; \ int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ __ret = (int32x4_t) __builtin_neon_vsraq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 34); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vsraq_n_s64(__p0, __p1, __p2) __extension__ ({ \ int64x2_t __ret; \ int64x2_t __s0 = __p0; \ int64x2_t __s1 = __p1; \ __ret = (int64x2_t) __builtin_neon_vsraq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 35); \ __ret; \ }) #else #define vsraq_n_s64(__p0, __p1, __p2) __extension__ ({ \ int64x2_t __ret; \ int64x2_t __s0 = __p0; \ int64x2_t __s1 = __p1; \ int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ int64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ __ret = (int64x2_t) __builtin_neon_vsraq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 35); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vsraq_n_s16(__p0, __p1, __p2) __extension__ ({ \ int16x8_t __ret; \ int16x8_t __s0 = __p0; \ int16x8_t __s1 = __p1; \ __ret = (int16x8_t) __builtin_neon_vsraq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 33); \ __ret; \ }) #else #define vsraq_n_s16(__p0, __p1, __p2) __extension__ ({ \ int16x8_t __ret; \ int16x8_t __s0 = __p0; \ int16x8_t __s1 = __p1; \ int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (int16x8_t) __builtin_neon_vsraq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 33); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vsra_n_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x8_t __ret; \ uint8x8_t __s0 = __p0; \ uint8x8_t __s1 = __p1; \ __ret = (uint8x8_t) __builtin_neon_vsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 16); \ __ret; \ }) #else #define vsra_n_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x8_t __ret; \ uint8x8_t __s0 = __p0; \ uint8x8_t __s1 = __p1; \ uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (uint8x8_t) __builtin_neon_vsra_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 16); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vsra_n_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x2_t __ret; \ uint32x2_t __s0 = __p0; \ uint32x2_t __s1 = __p1; \ __ret = (uint32x2_t) __builtin_neon_vsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 18); \ __ret; \ }) #else #define vsra_n_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x2_t __ret; \ uint32x2_t __s0 = __p0; \ uint32x2_t __s1 = __p1; \ uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ __ret = (uint32x2_t) __builtin_neon_vsra_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 18); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #define vsra_n_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x1_t __ret; \ uint64x1_t __s0 = __p0; \ uint64x1_t __s1 = __p1; \ __ret = (uint64x1_t) __builtin_neon_vsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 19); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vsra_n_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x4_t __ret; \ uint16x4_t __s0 = __p0; \ uint16x4_t __s1 = __p1; \ __ret = (uint16x4_t) __builtin_neon_vsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 17); \ __ret; \ }) #else #define vsra_n_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x4_t __ret; \ uint16x4_t __s0 = __p0; \ uint16x4_t __s1 = __p1; \ uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ __ret = (uint16x4_t) __builtin_neon_vsra_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 17); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vsra_n_s8(__p0, __p1, __p2) __extension__ ({ \ int8x8_t __ret; \ int8x8_t __s0 = __p0; \ int8x8_t __s1 = __p1; \ __ret = (int8x8_t) __builtin_neon_vsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 0); \ __ret; \ }) #else #define vsra_n_s8(__p0, __p1, __p2) __extension__ ({ \ int8x8_t __ret; \ int8x8_t __s0 = __p0; \ int8x8_t __s1 = __p1; \ int8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ int8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (int8x8_t) __builtin_neon_vsra_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 0); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vsra_n_s32(__p0, __p1, __p2) __extension__ ({ \ int32x2_t __ret; \ int32x2_t __s0 = __p0; \ int32x2_t __s1 = __p1; \ __ret = (int32x2_t) __builtin_neon_vsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 2); \ __ret; \ }) #else #define vsra_n_s32(__p0, __p1, __p2) __extension__ ({ \ int32x2_t __ret; \ int32x2_t __s0 = __p0; \ int32x2_t __s1 = __p1; \ int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ __ret = (int32x2_t) __builtin_neon_vsra_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 2); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #define vsra_n_s64(__p0, __p1, __p2) __extension__ ({ \ int64x1_t __ret; \ int64x1_t __s0 = __p0; \ int64x1_t __s1 = __p1; \ __ret = (int64x1_t) __builtin_neon_vsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 3); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vsra_n_s16(__p0, __p1, __p2) __extension__ ({ \ int16x4_t __ret; \ int16x4_t __s0 = __p0; \ int16x4_t __s1 = __p1; \ __ret = (int16x4_t) __builtin_neon_vsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 1); \ __ret; \ }) #else #define vsra_n_s16(__p0, __p1, __p2) __extension__ ({ \ int16x4_t __ret; \ int16x4_t __s0 = __p0; \ int16x4_t __s1 = __p1; \ int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ __ret = (int16x4_t) __builtin_neon_vsra_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 1); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vsri_n_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x8_t __ret; \ poly8x8_t __s0 = __p0; \ poly8x8_t __s1 = __p1; \ __ret = (poly8x8_t) __builtin_neon_vsri_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 4); \ __ret; \ }) #else #define vsri_n_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x8_t __ret; \ poly8x8_t __s0 = __p0; \ poly8x8_t __s1 = __p1; \ poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (poly8x8_t) __builtin_neon_vsri_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 4); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vsri_n_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x4_t __ret; \ poly16x4_t __s0 = __p0; \ poly16x4_t __s1 = __p1; \ __ret = (poly16x4_t) __builtin_neon_vsri_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 5); \ __ret; \ }) #else #define vsri_n_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x4_t __ret; \ poly16x4_t __s0 = __p0; \ poly16x4_t __s1 = __p1; \ poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ poly16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ __ret = (poly16x4_t) __builtin_neon_vsri_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 5); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vsriq_n_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x16_t __ret; \ poly8x16_t __s0 = __p0; \ poly8x16_t __s1 = __p1; \ __ret = (poly8x16_t) __builtin_neon_vsriq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 36); \ __ret; \ }) #else #define vsriq_n_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x16_t __ret; \ poly8x16_t __s0 = __p0; \ poly8x16_t __s1 = __p1; \ poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (poly8x16_t) __builtin_neon_vsriq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 36); \ __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vsriq_n_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x8_t __ret; \ poly16x8_t __s0 = __p0; \ poly16x8_t __s1 = __p1; \ __ret = (poly16x8_t) __builtin_neon_vsriq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 37); \ __ret; \ }) #else #define vsriq_n_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x8_t __ret; \ poly16x8_t __s0 = __p0; \ poly16x8_t __s1 = __p1; \ poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ poly16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (poly16x8_t) __builtin_neon_vsriq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 37); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vsriq_n_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x16_t __ret; \ uint8x16_t __s0 = __p0; \ uint8x16_t __s1 = __p1; \ __ret = (uint8x16_t) __builtin_neon_vsriq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 48); \ __ret; \ }) #else #define vsriq_n_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x16_t __ret; \ uint8x16_t __s0 = __p0; \ uint8x16_t __s1 = __p1; \ uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (uint8x16_t) __builtin_neon_vsriq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 48); \ __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vsriq_n_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x4_t __ret; \ uint32x4_t __s0 = __p0; \ uint32x4_t __s1 = __p1; \ __ret = (uint32x4_t) __builtin_neon_vsriq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 50); \ __ret; \ }) #else #define vsriq_n_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x4_t __ret; \ uint32x4_t __s0 = __p0; \ uint32x4_t __s1 = __p1; \ uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ __ret = (uint32x4_t) __builtin_neon_vsriq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 50); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vsriq_n_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x2_t __ret; \ uint64x2_t __s0 = __p0; \ uint64x2_t __s1 = __p1; \ __ret = (uint64x2_t) __builtin_neon_vsriq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 51); \ __ret; \ }) #else #define vsriq_n_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x2_t __ret; \ uint64x2_t __s0 = __p0; \ uint64x2_t __s1 = __p1; \ uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ __ret = (uint64x2_t) __builtin_neon_vsriq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 51); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vsriq_n_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x8_t __ret; \ uint16x8_t __s0 = __p0; \ uint16x8_t __s1 = __p1; \ __ret = (uint16x8_t) __builtin_neon_vsriq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 49); \ __ret; \ }) #else #define vsriq_n_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x8_t __ret; \ uint16x8_t __s0 = __p0; \ uint16x8_t __s1 = __p1; \ uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (uint16x8_t) __builtin_neon_vsriq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 49); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vsriq_n_s8(__p0, __p1, __p2) __extension__ ({ \ int8x16_t __ret; \ int8x16_t __s0 = __p0; \ int8x16_t __s1 = __p1; \ __ret = (int8x16_t) __builtin_neon_vsriq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 32); \ __ret; \ }) #else #define vsriq_n_s8(__p0, __p1, __p2) __extension__ ({ \ int8x16_t __ret; \ int8x16_t __s0 = __p0; \ int8x16_t __s1 = __p1; \ int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ int8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (int8x16_t) __builtin_neon_vsriq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 32); \ __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vsriq_n_s32(__p0, __p1, __p2) __extension__ ({ \ int32x4_t __ret; \ int32x4_t __s0 = __p0; \ int32x4_t __s1 = __p1; \ __ret = (int32x4_t) __builtin_neon_vsriq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 34); \ __ret; \ }) #else #define vsriq_n_s32(__p0, __p1, __p2) __extension__ ({ \ int32x4_t __ret; \ int32x4_t __s0 = __p0; \ int32x4_t __s1 = __p1; \ int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ __ret = (int32x4_t) __builtin_neon_vsriq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 34); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vsriq_n_s64(__p0, __p1, __p2) __extension__ ({ \ int64x2_t __ret; \ int64x2_t __s0 = __p0; \ int64x2_t __s1 = __p1; \ __ret = (int64x2_t) __builtin_neon_vsriq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 35); \ __ret; \ }) #else #define vsriq_n_s64(__p0, __p1, __p2) __extension__ ({ \ int64x2_t __ret; \ int64x2_t __s0 = __p0; \ int64x2_t __s1 = __p1; \ int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ int64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ __ret = (int64x2_t) __builtin_neon_vsriq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 35); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vsriq_n_s16(__p0, __p1, __p2) __extension__ ({ \ int16x8_t __ret; \ int16x8_t __s0 = __p0; \ int16x8_t __s1 = __p1; \ __ret = (int16x8_t) __builtin_neon_vsriq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 33); \ __ret; \ }) #else #define vsriq_n_s16(__p0, __p1, __p2) __extension__ ({ \ int16x8_t __ret; \ int16x8_t __s0 = __p0; \ int16x8_t __s1 = __p1; \ int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (int16x8_t) __builtin_neon_vsriq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 33); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vsri_n_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x8_t __ret; \ uint8x8_t __s0 = __p0; \ uint8x8_t __s1 = __p1; \ __ret = (uint8x8_t) __builtin_neon_vsri_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 16); \ __ret; \ }) #else #define vsri_n_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x8_t __ret; \ uint8x8_t __s0 = __p0; \ uint8x8_t __s1 = __p1; \ uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (uint8x8_t) __builtin_neon_vsri_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 16); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vsri_n_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x2_t __ret; \ uint32x2_t __s0 = __p0; \ uint32x2_t __s1 = __p1; \ __ret = (uint32x2_t) __builtin_neon_vsri_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 18); \ __ret; \ }) #else #define vsri_n_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x2_t __ret; \ uint32x2_t __s0 = __p0; \ uint32x2_t __s1 = __p1; \ uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ __ret = (uint32x2_t) __builtin_neon_vsri_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 18); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #define vsri_n_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x1_t __ret; \ uint64x1_t __s0 = __p0; \ uint64x1_t __s1 = __p1; \ __ret = (uint64x1_t) __builtin_neon_vsri_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 19); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vsri_n_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x4_t __ret; \ uint16x4_t __s0 = __p0; \ uint16x4_t __s1 = __p1; \ __ret = (uint16x4_t) __builtin_neon_vsri_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 17); \ __ret; \ }) #else #define vsri_n_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x4_t __ret; \ uint16x4_t __s0 = __p0; \ uint16x4_t __s1 = __p1; \ uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ __ret = (uint16x4_t) __builtin_neon_vsri_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 17); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vsri_n_s8(__p0, __p1, __p2) __extension__ ({ \ int8x8_t __ret; \ int8x8_t __s0 = __p0; \ int8x8_t __s1 = __p1; \ __ret = (int8x8_t) __builtin_neon_vsri_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 0); \ __ret; \ }) #else #define vsri_n_s8(__p0, __p1, __p2) __extension__ ({ \ int8x8_t __ret; \ int8x8_t __s0 = __p0; \ int8x8_t __s1 = __p1; \ int8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ int8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (int8x8_t) __builtin_neon_vsri_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 0); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vsri_n_s32(__p0, __p1, __p2) __extension__ ({ \ int32x2_t __ret; \ int32x2_t __s0 = __p0; \ int32x2_t __s1 = __p1; \ __ret = (int32x2_t) __builtin_neon_vsri_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 2); \ __ret; \ }) #else #define vsri_n_s32(__p0, __p1, __p2) __extension__ ({ \ int32x2_t __ret; \ int32x2_t __s0 = __p0; \ int32x2_t __s1 = __p1; \ int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ __ret = (int32x2_t) __builtin_neon_vsri_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 2); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #define vsri_n_s64(__p0, __p1, __p2) __extension__ ({ \ int64x1_t __ret; \ int64x1_t __s0 = __p0; \ int64x1_t __s1 = __p1; \ __ret = (int64x1_t) __builtin_neon_vsri_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 3); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vsri_n_s16(__p0, __p1, __p2) __extension__ ({ \ int16x4_t __ret; \ int16x4_t __s0 = __p0; \ int16x4_t __s1 = __p1; \ __ret = (int16x4_t) __builtin_neon_vsri_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 1); \ __ret; \ }) #else #define vsri_n_s16(__p0, __p1, __p2) __extension__ ({ \ int16x4_t __ret; \ int16x4_t __s0 = __p0; \ int16x4_t __s1 = __p1; \ int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ __ret = (int16x4_t) __builtin_neon_vsri_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 1); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_p8(__p0, __p1) __extension__ ({ \ poly8x8_t __s1 = __p1; \ __builtin_neon_vst1_v(__p0, (int8x8_t)__s1, 4); \ }) #else #define vst1_p8(__p0, __p1) __extension__ ({ \ poly8x8_t __s1 = __p1; \ poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst1_v(__p0, (int8x8_t)__rev1, 4); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_p16(__p0, __p1) __extension__ ({ \ poly16x4_t __s1 = __p1; \ __builtin_neon_vst1_v(__p0, (int8x8_t)__s1, 5); \ }) #else #define vst1_p16(__p0, __p1) __extension__ ({ \ poly16x4_t __s1 = __p1; \ poly16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ __builtin_neon_vst1_v(__p0, (int8x8_t)__rev1, 5); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_p8(__p0, __p1) __extension__ ({ \ poly8x16_t __s1 = __p1; \ __builtin_neon_vst1q_v(__p0, (int8x16_t)__s1, 36); \ }) #else #define vst1q_p8(__p0, __p1) __extension__ ({ \ poly8x16_t __s1 = __p1; \ poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst1q_v(__p0, (int8x16_t)__rev1, 36); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_p16(__p0, __p1) __extension__ ({ \ poly16x8_t __s1 = __p1; \ __builtin_neon_vst1q_v(__p0, (int8x16_t)__s1, 37); \ }) #else #define vst1q_p16(__p0, __p1) __extension__ ({ \ poly16x8_t __s1 = __p1; \ poly16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst1q_v(__p0, (int8x16_t)__rev1, 37); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_u8(__p0, __p1) __extension__ ({ \ uint8x16_t __s1 = __p1; \ __builtin_neon_vst1q_v(__p0, (int8x16_t)__s1, 48); \ }) #else #define vst1q_u8(__p0, __p1) __extension__ ({ \ uint8x16_t __s1 = __p1; \ uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst1q_v(__p0, (int8x16_t)__rev1, 48); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_u32(__p0, __p1) __extension__ ({ \ uint32x4_t __s1 = __p1; \ __builtin_neon_vst1q_v(__p0, (int8x16_t)__s1, 50); \ }) #else #define vst1q_u32(__p0, __p1) __extension__ ({ \ uint32x4_t __s1 = __p1; \ uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ __builtin_neon_vst1q_v(__p0, (int8x16_t)__rev1, 50); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_u64(__p0, __p1) __extension__ ({ \ uint64x2_t __s1 = __p1; \ __builtin_neon_vst1q_v(__p0, (int8x16_t)__s1, 51); \ }) #else #define vst1q_u64(__p0, __p1) __extension__ ({ \ uint64x2_t __s1 = __p1; \ uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ __builtin_neon_vst1q_v(__p0, (int8x16_t)__rev1, 51); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_u16(__p0, __p1) __extension__ ({ \ uint16x8_t __s1 = __p1; \ __builtin_neon_vst1q_v(__p0, (int8x16_t)__s1, 49); \ }) #else #define vst1q_u16(__p0, __p1) __extension__ ({ \ uint16x8_t __s1 = __p1; \ uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst1q_v(__p0, (int8x16_t)__rev1, 49); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_s8(__p0, __p1) __extension__ ({ \ int8x16_t __s1 = __p1; \ __builtin_neon_vst1q_v(__p0, (int8x16_t)__s1, 32); \ }) #else #define vst1q_s8(__p0, __p1) __extension__ ({ \ int8x16_t __s1 = __p1; \ int8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst1q_v(__p0, (int8x16_t)__rev1, 32); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_f32(__p0, __p1) __extension__ ({ \ float32x4_t __s1 = __p1; \ __builtin_neon_vst1q_v(__p0, (int8x16_t)__s1, 41); \ }) #else #define vst1q_f32(__p0, __p1) __extension__ ({ \ float32x4_t __s1 = __p1; \ float32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ __builtin_neon_vst1q_v(__p0, (int8x16_t)__rev1, 41); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_s32(__p0, __p1) __extension__ ({ \ int32x4_t __s1 = __p1; \ __builtin_neon_vst1q_v(__p0, (int8x16_t)__s1, 34); \ }) #else #define vst1q_s32(__p0, __p1) __extension__ ({ \ int32x4_t __s1 = __p1; \ int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ __builtin_neon_vst1q_v(__p0, (int8x16_t)__rev1, 34); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_s64(__p0, __p1) __extension__ ({ \ int64x2_t __s1 = __p1; \ __builtin_neon_vst1q_v(__p0, (int8x16_t)__s1, 35); \ }) #else #define vst1q_s64(__p0, __p1) __extension__ ({ \ int64x2_t __s1 = __p1; \ int64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ __builtin_neon_vst1q_v(__p0, (int8x16_t)__rev1, 35); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_s16(__p0, __p1) __extension__ ({ \ int16x8_t __s1 = __p1; \ __builtin_neon_vst1q_v(__p0, (int8x16_t)__s1, 33); \ }) #else #define vst1q_s16(__p0, __p1) __extension__ ({ \ int16x8_t __s1 = __p1; \ int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst1q_v(__p0, (int8x16_t)__rev1, 33); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_u8(__p0, __p1) __extension__ ({ \ uint8x8_t __s1 = __p1; \ __builtin_neon_vst1_v(__p0, (int8x8_t)__s1, 16); \ }) #else #define vst1_u8(__p0, __p1) __extension__ ({ \ uint8x8_t __s1 = __p1; \ uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst1_v(__p0, (int8x8_t)__rev1, 16); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_u32(__p0, __p1) __extension__ ({ \ uint32x2_t __s1 = __p1; \ __builtin_neon_vst1_v(__p0, (int8x8_t)__s1, 18); \ }) #else #define vst1_u32(__p0, __p1) __extension__ ({ \ uint32x2_t __s1 = __p1; \ uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ __builtin_neon_vst1_v(__p0, (int8x8_t)__rev1, 18); \ }) #endif #define vst1_u64(__p0, __p1) __extension__ ({ \ uint64x1_t __s1 = __p1; \ __builtin_neon_vst1_v(__p0, (int8x8_t)__s1, 19); \ }) #ifdef __LITTLE_ENDIAN__ #define vst1_u16(__p0, __p1) __extension__ ({ \ uint16x4_t __s1 = __p1; \ __builtin_neon_vst1_v(__p0, (int8x8_t)__s1, 17); \ }) #else #define vst1_u16(__p0, __p1) __extension__ ({ \ uint16x4_t __s1 = __p1; \ uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ __builtin_neon_vst1_v(__p0, (int8x8_t)__rev1, 17); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_s8(__p0, __p1) __extension__ ({ \ int8x8_t __s1 = __p1; \ __builtin_neon_vst1_v(__p0, (int8x8_t)__s1, 0); \ }) #else #define vst1_s8(__p0, __p1) __extension__ ({ \ int8x8_t __s1 = __p1; \ int8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst1_v(__p0, (int8x8_t)__rev1, 0); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_f32(__p0, __p1) __extension__ ({ \ float32x2_t __s1 = __p1; \ __builtin_neon_vst1_v(__p0, (int8x8_t)__s1, 9); \ }) #else #define vst1_f32(__p0, __p1) __extension__ ({ \ float32x2_t __s1 = __p1; \ float32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ __builtin_neon_vst1_v(__p0, (int8x8_t)__rev1, 9); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_s32(__p0, __p1) __extension__ ({ \ int32x2_t __s1 = __p1; \ __builtin_neon_vst1_v(__p0, (int8x8_t)__s1, 2); \ }) #else #define vst1_s32(__p0, __p1) __extension__ ({ \ int32x2_t __s1 = __p1; \ int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ __builtin_neon_vst1_v(__p0, (int8x8_t)__rev1, 2); \ }) #endif #define vst1_s64(__p0, __p1) __extension__ ({ \ int64x1_t __s1 = __p1; \ __builtin_neon_vst1_v(__p0, (int8x8_t)__s1, 3); \ }) #ifdef __LITTLE_ENDIAN__ #define vst1_s16(__p0, __p1) __extension__ ({ \ int16x4_t __s1 = __p1; \ __builtin_neon_vst1_v(__p0, (int8x8_t)__s1, 1); \ }) #else #define vst1_s16(__p0, __p1) __extension__ ({ \ int16x4_t __s1 = __p1; \ int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ __builtin_neon_vst1_v(__p0, (int8x8_t)__rev1, 1); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_lane_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x8_t __s1 = __p1; \ __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__s1, __p2, 4); \ }) #else #define vst1_lane_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x8_t __s1 = __p1; \ poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__rev1, __p2, 4); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_lane_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x4_t __s1 = __p1; \ __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__s1, __p2, 5); \ }) #else #define vst1_lane_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x4_t __s1 = __p1; \ poly16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__rev1, __p2, 5); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_lane_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x16_t __s1 = __p1; \ __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__s1, __p2, 36); \ }) #else #define vst1q_lane_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x16_t __s1 = __p1; \ poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 36); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_lane_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x8_t __s1 = __p1; \ __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__s1, __p2, 37); \ }) #else #define vst1q_lane_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x8_t __s1 = __p1; \ poly16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 37); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_lane_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x16_t __s1 = __p1; \ __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__s1, __p2, 48); \ }) #else #define vst1q_lane_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x16_t __s1 = __p1; \ uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 48); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_lane_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x4_t __s1 = __p1; \ __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__s1, __p2, 50); \ }) #else #define vst1q_lane_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x4_t __s1 = __p1; \ uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 50); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x2_t __s1 = __p1; \ __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__s1, __p2, 51); \ }) #else #define vst1q_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x2_t __s1 = __p1; \ uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 51); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_lane_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x8_t __s1 = __p1; \ __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__s1, __p2, 49); \ }) #else #define vst1q_lane_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x8_t __s1 = __p1; \ uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 49); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_lane_s8(__p0, __p1, __p2) __extension__ ({ \ int8x16_t __s1 = __p1; \ __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__s1, __p2, 32); \ }) #else #define vst1q_lane_s8(__p0, __p1, __p2) __extension__ ({ \ int8x16_t __s1 = __p1; \ int8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 32); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x4_t __s1 = __p1; \ __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__s1, __p2, 41); \ }) #else #define vst1q_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x4_t __s1 = __p1; \ float32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 41); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x4_t __s1 = __p1; \ __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__s1, __p2, 34); \ }) #else #define vst1q_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x4_t __s1 = __p1; \ int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 34); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x2_t __s1 = __p1; \ __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__s1, __p2, 35); \ }) #else #define vst1q_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x2_t __s1 = __p1; \ int64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 35); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x8_t __s1 = __p1; \ __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__s1, __p2, 33); \ }) #else #define vst1q_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x8_t __s1 = __p1; \ int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 33); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_lane_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x8_t __s1 = __p1; \ __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__s1, __p2, 16); \ }) #else #define vst1_lane_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x8_t __s1 = __p1; \ uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__rev1, __p2, 16); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_lane_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x2_t __s1 = __p1; \ __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__s1, __p2, 18); \ }) #else #define vst1_lane_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x2_t __s1 = __p1; \ uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__rev1, __p2, 18); \ }) #endif #define vst1_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x1_t __s1 = __p1; \ __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__s1, __p2, 19); \ }) #ifdef __LITTLE_ENDIAN__ #define vst1_lane_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x4_t __s1 = __p1; \ __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__s1, __p2, 17); \ }) #else #define vst1_lane_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x4_t __s1 = __p1; \ uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__rev1, __p2, 17); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_lane_s8(__p0, __p1, __p2) __extension__ ({ \ int8x8_t __s1 = __p1; \ __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__s1, __p2, 0); \ }) #else #define vst1_lane_s8(__p0, __p1, __p2) __extension__ ({ \ int8x8_t __s1 = __p1; \ int8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__rev1, __p2, 0); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x2_t __s1 = __p1; \ __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__s1, __p2, 9); \ }) #else #define vst1_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x2_t __s1 = __p1; \ float32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__rev1, __p2, 9); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x2_t __s1 = __p1; \ __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__s1, __p2, 2); \ }) #else #define vst1_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x2_t __s1 = __p1; \ int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__rev1, __p2, 2); \ }) #endif #define vst1_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x1_t __s1 = __p1; \ __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__s1, __p2, 3); \ }) #ifdef __LITTLE_ENDIAN__ #define vst1_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x4_t __s1 = __p1; \ __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__s1, __p2, 1); \ }) #else #define vst1_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x4_t __s1 = __p1; \ int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__rev1, __p2, 1); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_p8_x2(__p0, __p1) __extension__ ({ \ poly8x8x2_t __s1 = __p1; \ __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 4); \ }) #else #define vst1_p8_x2(__p0, __p1) __extension__ ({ \ poly8x8x2_t __s1 = __p1; \ poly8x8x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], 4); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_p16_x2(__p0, __p1) __extension__ ({ \ poly16x4x2_t __s1 = __p1; \ __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 5); \ }) #else #define vst1_p16_x2(__p0, __p1) __extension__ ({ \ poly16x4x2_t __s1 = __p1; \ poly16x4x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], 5); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_p8_x2(__p0, __p1) __extension__ ({ \ poly8x16x2_t __s1 = __p1; \ __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 36); \ }) #else #define vst1q_p8_x2(__p0, __p1) __extension__ ({ \ poly8x16x2_t __s1 = __p1; \ poly8x16x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 36); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_p16_x2(__p0, __p1) __extension__ ({ \ poly16x8x2_t __s1 = __p1; \ __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 37); \ }) #else #define vst1q_p16_x2(__p0, __p1) __extension__ ({ \ poly16x8x2_t __s1 = __p1; \ poly16x8x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 37); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_u8_x2(__p0, __p1) __extension__ ({ \ uint8x16x2_t __s1 = __p1; \ __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 48); \ }) #else #define vst1q_u8_x2(__p0, __p1) __extension__ ({ \ uint8x16x2_t __s1 = __p1; \ uint8x16x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 48); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_u32_x2(__p0, __p1) __extension__ ({ \ uint32x4x2_t __s1 = __p1; \ __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 50); \ }) #else #define vst1q_u32_x2(__p0, __p1) __extension__ ({ \ uint32x4x2_t __s1 = __p1; \ uint32x4x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 50); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_u64_x2(__p0, __p1) __extension__ ({ \ uint64x2x2_t __s1 = __p1; \ __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 51); \ }) #else #define vst1q_u64_x2(__p0, __p1) __extension__ ({ \ uint64x2x2_t __s1 = __p1; \ uint64x2x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 51); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_u16_x2(__p0, __p1) __extension__ ({ \ uint16x8x2_t __s1 = __p1; \ __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 49); \ }) #else #define vst1q_u16_x2(__p0, __p1) __extension__ ({ \ uint16x8x2_t __s1 = __p1; \ uint16x8x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 49); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_s8_x2(__p0, __p1) __extension__ ({ \ int8x16x2_t __s1 = __p1; \ __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 32); \ }) #else #define vst1q_s8_x2(__p0, __p1) __extension__ ({ \ int8x16x2_t __s1 = __p1; \ int8x16x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 32); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_f32_x2(__p0, __p1) __extension__ ({ \ float32x4x2_t __s1 = __p1; \ __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 41); \ }) #else #define vst1q_f32_x2(__p0, __p1) __extension__ ({ \ float32x4x2_t __s1 = __p1; \ float32x4x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 41); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_s32_x2(__p0, __p1) __extension__ ({ \ int32x4x2_t __s1 = __p1; \ __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 34); \ }) #else #define vst1q_s32_x2(__p0, __p1) __extension__ ({ \ int32x4x2_t __s1 = __p1; \ int32x4x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 34); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_s64_x2(__p0, __p1) __extension__ ({ \ int64x2x2_t __s1 = __p1; \ __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 35); \ }) #else #define vst1q_s64_x2(__p0, __p1) __extension__ ({ \ int64x2x2_t __s1 = __p1; \ int64x2x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 35); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_s16_x2(__p0, __p1) __extension__ ({ \ int16x8x2_t __s1 = __p1; \ __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 33); \ }) #else #define vst1q_s16_x2(__p0, __p1) __extension__ ({ \ int16x8x2_t __s1 = __p1; \ int16x8x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 33); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_u8_x2(__p0, __p1) __extension__ ({ \ uint8x8x2_t __s1 = __p1; \ __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 16); \ }) #else #define vst1_u8_x2(__p0, __p1) __extension__ ({ \ uint8x8x2_t __s1 = __p1; \ uint8x8x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], 16); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_u32_x2(__p0, __p1) __extension__ ({ \ uint32x2x2_t __s1 = __p1; \ __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 18); \ }) #else #define vst1_u32_x2(__p0, __p1) __extension__ ({ \ uint32x2x2_t __s1 = __p1; \ uint32x2x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], 18); \ }) #endif #define vst1_u64_x2(__p0, __p1) __extension__ ({ \ uint64x1x2_t __s1 = __p1; \ __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 19); \ }) #ifdef __LITTLE_ENDIAN__ #define vst1_u16_x2(__p0, __p1) __extension__ ({ \ uint16x4x2_t __s1 = __p1; \ __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 17); \ }) #else #define vst1_u16_x2(__p0, __p1) __extension__ ({ \ uint16x4x2_t __s1 = __p1; \ uint16x4x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], 17); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_s8_x2(__p0, __p1) __extension__ ({ \ int8x8x2_t __s1 = __p1; \ __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 0); \ }) #else #define vst1_s8_x2(__p0, __p1) __extension__ ({ \ int8x8x2_t __s1 = __p1; \ int8x8x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], 0); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_f32_x2(__p0, __p1) __extension__ ({ \ float32x2x2_t __s1 = __p1; \ __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 9); \ }) #else #define vst1_f32_x2(__p0, __p1) __extension__ ({ \ float32x2x2_t __s1 = __p1; \ float32x2x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], 9); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_s32_x2(__p0, __p1) __extension__ ({ \ int32x2x2_t __s1 = __p1; \ __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 2); \ }) #else #define vst1_s32_x2(__p0, __p1) __extension__ ({ \ int32x2x2_t __s1 = __p1; \ int32x2x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], 2); \ }) #endif #define vst1_s64_x2(__p0, __p1) __extension__ ({ \ int64x1x2_t __s1 = __p1; \ __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 3); \ }) #ifdef __LITTLE_ENDIAN__ #define vst1_s16_x2(__p0, __p1) __extension__ ({ \ int16x4x2_t __s1 = __p1; \ __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 1); \ }) #else #define vst1_s16_x2(__p0, __p1) __extension__ ({ \ int16x4x2_t __s1 = __p1; \ int16x4x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], 1); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_p8_x3(__p0, __p1) __extension__ ({ \ poly8x8x3_t __s1 = __p1; \ __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 4); \ }) #else #define vst1_p8_x3(__p0, __p1) __extension__ ({ \ poly8x8x3_t __s1 = __p1; \ poly8x8x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], 4); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_p16_x3(__p0, __p1) __extension__ ({ \ poly16x4x3_t __s1 = __p1; \ __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 5); \ }) #else #define vst1_p16_x3(__p0, __p1) __extension__ ({ \ poly16x4x3_t __s1 = __p1; \ poly16x4x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], 5); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_p8_x3(__p0, __p1) __extension__ ({ \ poly8x16x3_t __s1 = __p1; \ __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 36); \ }) #else #define vst1q_p8_x3(__p0, __p1) __extension__ ({ \ poly8x16x3_t __s1 = __p1; \ poly8x16x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 36); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_p16_x3(__p0, __p1) __extension__ ({ \ poly16x8x3_t __s1 = __p1; \ __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 37); \ }) #else #define vst1q_p16_x3(__p0, __p1) __extension__ ({ \ poly16x8x3_t __s1 = __p1; \ poly16x8x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 37); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_u8_x3(__p0, __p1) __extension__ ({ \ uint8x16x3_t __s1 = __p1; \ __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 48); \ }) #else #define vst1q_u8_x3(__p0, __p1) __extension__ ({ \ uint8x16x3_t __s1 = __p1; \ uint8x16x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 48); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_u32_x3(__p0, __p1) __extension__ ({ \ uint32x4x3_t __s1 = __p1; \ __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 50); \ }) #else #define vst1q_u32_x3(__p0, __p1) __extension__ ({ \ uint32x4x3_t __s1 = __p1; \ uint32x4x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 50); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_u64_x3(__p0, __p1) __extension__ ({ \ uint64x2x3_t __s1 = __p1; \ __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 51); \ }) #else #define vst1q_u64_x3(__p0, __p1) __extension__ ({ \ uint64x2x3_t __s1 = __p1; \ uint64x2x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 51); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_u16_x3(__p0, __p1) __extension__ ({ \ uint16x8x3_t __s1 = __p1; \ __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 49); \ }) #else #define vst1q_u16_x3(__p0, __p1) __extension__ ({ \ uint16x8x3_t __s1 = __p1; \ uint16x8x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 49); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_s8_x3(__p0, __p1) __extension__ ({ \ int8x16x3_t __s1 = __p1; \ __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 32); \ }) #else #define vst1q_s8_x3(__p0, __p1) __extension__ ({ \ int8x16x3_t __s1 = __p1; \ int8x16x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 32); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_f32_x3(__p0, __p1) __extension__ ({ \ float32x4x3_t __s1 = __p1; \ __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 41); \ }) #else #define vst1q_f32_x3(__p0, __p1) __extension__ ({ \ float32x4x3_t __s1 = __p1; \ float32x4x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 41); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_s32_x3(__p0, __p1) __extension__ ({ \ int32x4x3_t __s1 = __p1; \ __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 34); \ }) #else #define vst1q_s32_x3(__p0, __p1) __extension__ ({ \ int32x4x3_t __s1 = __p1; \ int32x4x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 34); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_s64_x3(__p0, __p1) __extension__ ({ \ int64x2x3_t __s1 = __p1; \ __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 35); \ }) #else #define vst1q_s64_x3(__p0, __p1) __extension__ ({ \ int64x2x3_t __s1 = __p1; \ int64x2x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 35); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_s16_x3(__p0, __p1) __extension__ ({ \ int16x8x3_t __s1 = __p1; \ __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 33); \ }) #else #define vst1q_s16_x3(__p0, __p1) __extension__ ({ \ int16x8x3_t __s1 = __p1; \ int16x8x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 33); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_u8_x3(__p0, __p1) __extension__ ({ \ uint8x8x3_t __s1 = __p1; \ __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 16); \ }) #else #define vst1_u8_x3(__p0, __p1) __extension__ ({ \ uint8x8x3_t __s1 = __p1; \ uint8x8x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], 16); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_u32_x3(__p0, __p1) __extension__ ({ \ uint32x2x3_t __s1 = __p1; \ __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 18); \ }) #else #define vst1_u32_x3(__p0, __p1) __extension__ ({ \ uint32x2x3_t __s1 = __p1; \ uint32x2x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], 18); \ }) #endif #define vst1_u64_x3(__p0, __p1) __extension__ ({ \ uint64x1x3_t __s1 = __p1; \ __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 19); \ }) #ifdef __LITTLE_ENDIAN__ #define vst1_u16_x3(__p0, __p1) __extension__ ({ \ uint16x4x3_t __s1 = __p1; \ __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 17); \ }) #else #define vst1_u16_x3(__p0, __p1) __extension__ ({ \ uint16x4x3_t __s1 = __p1; \ uint16x4x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], 17); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_s8_x3(__p0, __p1) __extension__ ({ \ int8x8x3_t __s1 = __p1; \ __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 0); \ }) #else #define vst1_s8_x3(__p0, __p1) __extension__ ({ \ int8x8x3_t __s1 = __p1; \ int8x8x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], 0); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_f32_x3(__p0, __p1) __extension__ ({ \ float32x2x3_t __s1 = __p1; \ __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 9); \ }) #else #define vst1_f32_x3(__p0, __p1) __extension__ ({ \ float32x2x3_t __s1 = __p1; \ float32x2x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], 9); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_s32_x3(__p0, __p1) __extension__ ({ \ int32x2x3_t __s1 = __p1; \ __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 2); \ }) #else #define vst1_s32_x3(__p0, __p1) __extension__ ({ \ int32x2x3_t __s1 = __p1; \ int32x2x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], 2); \ }) #endif #define vst1_s64_x3(__p0, __p1) __extension__ ({ \ int64x1x3_t __s1 = __p1; \ __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 3); \ }) #ifdef __LITTLE_ENDIAN__ #define vst1_s16_x3(__p0, __p1) __extension__ ({ \ int16x4x3_t __s1 = __p1; \ __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 1); \ }) #else #define vst1_s16_x3(__p0, __p1) __extension__ ({ \ int16x4x3_t __s1 = __p1; \ int16x4x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], 1); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_p8_x4(__p0, __p1) __extension__ ({ \ poly8x8x4_t __s1 = __p1; \ __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 4); \ }) #else #define vst1_p8_x4(__p0, __p1) __extension__ ({ \ poly8x8x4_t __s1 = __p1; \ poly8x8x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], 4); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_p16_x4(__p0, __p1) __extension__ ({ \ poly16x4x4_t __s1 = __p1; \ __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 5); \ }) #else #define vst1_p16_x4(__p0, __p1) __extension__ ({ \ poly16x4x4_t __s1 = __p1; \ poly16x4x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], 5); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_p8_x4(__p0, __p1) __extension__ ({ \ poly8x16x4_t __s1 = __p1; \ __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 36); \ }) #else #define vst1q_p8_x4(__p0, __p1) __extension__ ({ \ poly8x16x4_t __s1 = __p1; \ poly8x16x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 36); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_p16_x4(__p0, __p1) __extension__ ({ \ poly16x8x4_t __s1 = __p1; \ __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 37); \ }) #else #define vst1q_p16_x4(__p0, __p1) __extension__ ({ \ poly16x8x4_t __s1 = __p1; \ poly16x8x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 37); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_u8_x4(__p0, __p1) __extension__ ({ \ uint8x16x4_t __s1 = __p1; \ __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 48); \ }) #else #define vst1q_u8_x4(__p0, __p1) __extension__ ({ \ uint8x16x4_t __s1 = __p1; \ uint8x16x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 48); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_u32_x4(__p0, __p1) __extension__ ({ \ uint32x4x4_t __s1 = __p1; \ __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 50); \ }) #else #define vst1q_u32_x4(__p0, __p1) __extension__ ({ \ uint32x4x4_t __s1 = __p1; \ uint32x4x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 50); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_u64_x4(__p0, __p1) __extension__ ({ \ uint64x2x4_t __s1 = __p1; \ __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 51); \ }) #else #define vst1q_u64_x4(__p0, __p1) __extension__ ({ \ uint64x2x4_t __s1 = __p1; \ uint64x2x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 51); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_u16_x4(__p0, __p1) __extension__ ({ \ uint16x8x4_t __s1 = __p1; \ __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 49); \ }) #else #define vst1q_u16_x4(__p0, __p1) __extension__ ({ \ uint16x8x4_t __s1 = __p1; \ uint16x8x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 49); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_s8_x4(__p0, __p1) __extension__ ({ \ int8x16x4_t __s1 = __p1; \ __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 32); \ }) #else #define vst1q_s8_x4(__p0, __p1) __extension__ ({ \ int8x16x4_t __s1 = __p1; \ int8x16x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 32); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_f32_x4(__p0, __p1) __extension__ ({ \ float32x4x4_t __s1 = __p1; \ __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 41); \ }) #else #define vst1q_f32_x4(__p0, __p1) __extension__ ({ \ float32x4x4_t __s1 = __p1; \ float32x4x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 41); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_s32_x4(__p0, __p1) __extension__ ({ \ int32x4x4_t __s1 = __p1; \ __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 34); \ }) #else #define vst1q_s32_x4(__p0, __p1) __extension__ ({ \ int32x4x4_t __s1 = __p1; \ int32x4x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 34); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_s64_x4(__p0, __p1) __extension__ ({ \ int64x2x4_t __s1 = __p1; \ __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 35); \ }) #else #define vst1q_s64_x4(__p0, __p1) __extension__ ({ \ int64x2x4_t __s1 = __p1; \ int64x2x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 35); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_s16_x4(__p0, __p1) __extension__ ({ \ int16x8x4_t __s1 = __p1; \ __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 33); \ }) #else #define vst1q_s16_x4(__p0, __p1) __extension__ ({ \ int16x8x4_t __s1 = __p1; \ int16x8x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 33); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_u8_x4(__p0, __p1) __extension__ ({ \ uint8x8x4_t __s1 = __p1; \ __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 16); \ }) #else #define vst1_u8_x4(__p0, __p1) __extension__ ({ \ uint8x8x4_t __s1 = __p1; \ uint8x8x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], 16); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_u32_x4(__p0, __p1) __extension__ ({ \ uint32x2x4_t __s1 = __p1; \ __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 18); \ }) #else #define vst1_u32_x4(__p0, __p1) __extension__ ({ \ uint32x2x4_t __s1 = __p1; \ uint32x2x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], 18); \ }) #endif #define vst1_u64_x4(__p0, __p1) __extension__ ({ \ uint64x1x4_t __s1 = __p1; \ __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 19); \ }) #ifdef __LITTLE_ENDIAN__ #define vst1_u16_x4(__p0, __p1) __extension__ ({ \ uint16x4x4_t __s1 = __p1; \ __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 17); \ }) #else #define vst1_u16_x4(__p0, __p1) __extension__ ({ \ uint16x4x4_t __s1 = __p1; \ uint16x4x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], 17); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_s8_x4(__p0, __p1) __extension__ ({ \ int8x8x4_t __s1 = __p1; \ __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 0); \ }) #else #define vst1_s8_x4(__p0, __p1) __extension__ ({ \ int8x8x4_t __s1 = __p1; \ int8x8x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], 0); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_f32_x4(__p0, __p1) __extension__ ({ \ float32x2x4_t __s1 = __p1; \ __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 9); \ }) #else #define vst1_f32_x4(__p0, __p1) __extension__ ({ \ float32x2x4_t __s1 = __p1; \ float32x2x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], 9); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_s32_x4(__p0, __p1) __extension__ ({ \ int32x2x4_t __s1 = __p1; \ __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 2); \ }) #else #define vst1_s32_x4(__p0, __p1) __extension__ ({ \ int32x2x4_t __s1 = __p1; \ int32x2x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], 2); \ }) #endif #define vst1_s64_x4(__p0, __p1) __extension__ ({ \ int64x1x4_t __s1 = __p1; \ __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 3); \ }) #ifdef __LITTLE_ENDIAN__ #define vst1_s16_x4(__p0, __p1) __extension__ ({ \ int16x4x4_t __s1 = __p1; \ __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 1); \ }) #else #define vst1_s16_x4(__p0, __p1) __extension__ ({ \ int16x4x4_t __s1 = __p1; \ int16x4x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], 1); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2_p8(__p0, __p1) __extension__ ({ \ poly8x8x2_t __s1 = __p1; \ __builtin_neon_vst2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 4); \ }) #else #define vst2_p8(__p0, __p1) __extension__ ({ \ poly8x8x2_t __s1 = __p1; \ poly8x8x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst2_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], 4); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2_p16(__p0, __p1) __extension__ ({ \ poly16x4x2_t __s1 = __p1; \ __builtin_neon_vst2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 5); \ }) #else #define vst2_p16(__p0, __p1) __extension__ ({ \ poly16x4x2_t __s1 = __p1; \ poly16x4x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __builtin_neon_vst2_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], 5); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2q_p8(__p0, __p1) __extension__ ({ \ poly8x16x2_t __s1 = __p1; \ __builtin_neon_vst2q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 36); \ }) #else #define vst2q_p8(__p0, __p1) __extension__ ({ \ poly8x16x2_t __s1 = __p1; \ poly8x16x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst2q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 36); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2q_p16(__p0, __p1) __extension__ ({ \ poly16x8x2_t __s1 = __p1; \ __builtin_neon_vst2q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 37); \ }) #else #define vst2q_p16(__p0, __p1) __extension__ ({ \ poly16x8x2_t __s1 = __p1; \ poly16x8x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst2q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 37); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2q_u8(__p0, __p1) __extension__ ({ \ uint8x16x2_t __s1 = __p1; \ __builtin_neon_vst2q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 48); \ }) #else #define vst2q_u8(__p0, __p1) __extension__ ({ \ uint8x16x2_t __s1 = __p1; \ uint8x16x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst2q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 48); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2q_u32(__p0, __p1) __extension__ ({ \ uint32x4x2_t __s1 = __p1; \ __builtin_neon_vst2q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 50); \ }) #else #define vst2q_u32(__p0, __p1) __extension__ ({ \ uint32x4x2_t __s1 = __p1; \ uint32x4x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __builtin_neon_vst2q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 50); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2q_u16(__p0, __p1) __extension__ ({ \ uint16x8x2_t __s1 = __p1; \ __builtin_neon_vst2q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 49); \ }) #else #define vst2q_u16(__p0, __p1) __extension__ ({ \ uint16x8x2_t __s1 = __p1; \ uint16x8x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst2q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 49); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2q_s8(__p0, __p1) __extension__ ({ \ int8x16x2_t __s1 = __p1; \ __builtin_neon_vst2q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 32); \ }) #else #define vst2q_s8(__p0, __p1) __extension__ ({ \ int8x16x2_t __s1 = __p1; \ int8x16x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst2q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 32); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2q_f32(__p0, __p1) __extension__ ({ \ float32x4x2_t __s1 = __p1; \ __builtin_neon_vst2q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 41); \ }) #else #define vst2q_f32(__p0, __p1) __extension__ ({ \ float32x4x2_t __s1 = __p1; \ float32x4x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __builtin_neon_vst2q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 41); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2q_s32(__p0, __p1) __extension__ ({ \ int32x4x2_t __s1 = __p1; \ __builtin_neon_vst2q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 34); \ }) #else #define vst2q_s32(__p0, __p1) __extension__ ({ \ int32x4x2_t __s1 = __p1; \ int32x4x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __builtin_neon_vst2q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 34); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2q_s16(__p0, __p1) __extension__ ({ \ int16x8x2_t __s1 = __p1; \ __builtin_neon_vst2q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 33); \ }) #else #define vst2q_s16(__p0, __p1) __extension__ ({ \ int16x8x2_t __s1 = __p1; \ int16x8x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst2q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 33); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2_u8(__p0, __p1) __extension__ ({ \ uint8x8x2_t __s1 = __p1; \ __builtin_neon_vst2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 16); \ }) #else #define vst2_u8(__p0, __p1) __extension__ ({ \ uint8x8x2_t __s1 = __p1; \ uint8x8x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst2_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], 16); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2_u32(__p0, __p1) __extension__ ({ \ uint32x2x2_t __s1 = __p1; \ __builtin_neon_vst2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 18); \ }) #else #define vst2_u32(__p0, __p1) __extension__ ({ \ uint32x2x2_t __s1 = __p1; \ uint32x2x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __builtin_neon_vst2_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], 18); \ }) #endif #define vst2_u64(__p0, __p1) __extension__ ({ \ uint64x1x2_t __s1 = __p1; \ __builtin_neon_vst2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 19); \ }) #ifdef __LITTLE_ENDIAN__ #define vst2_u16(__p0, __p1) __extension__ ({ \ uint16x4x2_t __s1 = __p1; \ __builtin_neon_vst2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 17); \ }) #else #define vst2_u16(__p0, __p1) __extension__ ({ \ uint16x4x2_t __s1 = __p1; \ uint16x4x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __builtin_neon_vst2_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], 17); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2_s8(__p0, __p1) __extension__ ({ \ int8x8x2_t __s1 = __p1; \ __builtin_neon_vst2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 0); \ }) #else #define vst2_s8(__p0, __p1) __extension__ ({ \ int8x8x2_t __s1 = __p1; \ int8x8x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst2_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], 0); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2_f32(__p0, __p1) __extension__ ({ \ float32x2x2_t __s1 = __p1; \ __builtin_neon_vst2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 9); \ }) #else #define vst2_f32(__p0, __p1) __extension__ ({ \ float32x2x2_t __s1 = __p1; \ float32x2x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __builtin_neon_vst2_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], 9); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2_s32(__p0, __p1) __extension__ ({ \ int32x2x2_t __s1 = __p1; \ __builtin_neon_vst2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 2); \ }) #else #define vst2_s32(__p0, __p1) __extension__ ({ \ int32x2x2_t __s1 = __p1; \ int32x2x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __builtin_neon_vst2_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], 2); \ }) #endif #define vst2_s64(__p0, __p1) __extension__ ({ \ int64x1x2_t __s1 = __p1; \ __builtin_neon_vst2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 3); \ }) #ifdef __LITTLE_ENDIAN__ #define vst2_s16(__p0, __p1) __extension__ ({ \ int16x4x2_t __s1 = __p1; \ __builtin_neon_vst2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 1); \ }) #else #define vst2_s16(__p0, __p1) __extension__ ({ \ int16x4x2_t __s1 = __p1; \ int16x4x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __builtin_neon_vst2_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], 1); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2_lane_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x8x2_t __s1 = __p1; \ __builtin_neon_vst2_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 4); \ }) #else #define vst2_lane_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x8x2_t __s1 = __p1; \ poly8x8x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst2_lane_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], __p2, 4); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2_lane_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x4x2_t __s1 = __p1; \ __builtin_neon_vst2_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 5); \ }) #else #define vst2_lane_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x4x2_t __s1 = __p1; \ poly16x4x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __builtin_neon_vst2_lane_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], __p2, 5); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2q_lane_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x8x2_t __s1 = __p1; \ __builtin_neon_vst2q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 37); \ }) #else #define vst2q_lane_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x8x2_t __s1 = __p1; \ poly16x8x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst2q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 37); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2q_lane_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x4x2_t __s1 = __p1; \ __builtin_neon_vst2q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 50); \ }) #else #define vst2q_lane_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x4x2_t __s1 = __p1; \ uint32x4x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __builtin_neon_vst2q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 50); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2q_lane_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x8x2_t __s1 = __p1; \ __builtin_neon_vst2q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 49); \ }) #else #define vst2q_lane_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x8x2_t __s1 = __p1; \ uint16x8x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst2q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 49); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2q_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x4x2_t __s1 = __p1; \ __builtin_neon_vst2q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 41); \ }) #else #define vst2q_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x4x2_t __s1 = __p1; \ float32x4x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __builtin_neon_vst2q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 41); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2q_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x4x2_t __s1 = __p1; \ __builtin_neon_vst2q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 34); \ }) #else #define vst2q_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x4x2_t __s1 = __p1; \ int32x4x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __builtin_neon_vst2q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 34); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2q_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x8x2_t __s1 = __p1; \ __builtin_neon_vst2q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 33); \ }) #else #define vst2q_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x8x2_t __s1 = __p1; \ int16x8x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst2q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 33); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2_lane_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x8x2_t __s1 = __p1; \ __builtin_neon_vst2_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 16); \ }) #else #define vst2_lane_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x8x2_t __s1 = __p1; \ uint8x8x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst2_lane_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], __p2, 16); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2_lane_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x2x2_t __s1 = __p1; \ __builtin_neon_vst2_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 18); \ }) #else #define vst2_lane_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x2x2_t __s1 = __p1; \ uint32x2x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __builtin_neon_vst2_lane_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], __p2, 18); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2_lane_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x4x2_t __s1 = __p1; \ __builtin_neon_vst2_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 17); \ }) #else #define vst2_lane_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x4x2_t __s1 = __p1; \ uint16x4x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __builtin_neon_vst2_lane_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], __p2, 17); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2_lane_s8(__p0, __p1, __p2) __extension__ ({ \ int8x8x2_t __s1 = __p1; \ __builtin_neon_vst2_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 0); \ }) #else #define vst2_lane_s8(__p0, __p1, __p2) __extension__ ({ \ int8x8x2_t __s1 = __p1; \ int8x8x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst2_lane_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], __p2, 0); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x2x2_t __s1 = __p1; \ __builtin_neon_vst2_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 9); \ }) #else #define vst2_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x2x2_t __s1 = __p1; \ float32x2x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __builtin_neon_vst2_lane_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], __p2, 9); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x2x2_t __s1 = __p1; \ __builtin_neon_vst2_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 2); \ }) #else #define vst2_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x2x2_t __s1 = __p1; \ int32x2x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __builtin_neon_vst2_lane_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], __p2, 2); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x4x2_t __s1 = __p1; \ __builtin_neon_vst2_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 1); \ }) #else #define vst2_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x4x2_t __s1 = __p1; \ int16x4x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __builtin_neon_vst2_lane_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], __p2, 1); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3_p8(__p0, __p1) __extension__ ({ \ poly8x8x3_t __s1 = __p1; \ __builtin_neon_vst3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 4); \ }) #else #define vst3_p8(__p0, __p1) __extension__ ({ \ poly8x8x3_t __s1 = __p1; \ poly8x8x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst3_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], 4); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3_p16(__p0, __p1) __extension__ ({ \ poly16x4x3_t __s1 = __p1; \ __builtin_neon_vst3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 5); \ }) #else #define vst3_p16(__p0, __p1) __extension__ ({ \ poly16x4x3_t __s1 = __p1; \ poly16x4x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __builtin_neon_vst3_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], 5); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3q_p8(__p0, __p1) __extension__ ({ \ poly8x16x3_t __s1 = __p1; \ __builtin_neon_vst3q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 36); \ }) #else #define vst3q_p8(__p0, __p1) __extension__ ({ \ poly8x16x3_t __s1 = __p1; \ poly8x16x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst3q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 36); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3q_p16(__p0, __p1) __extension__ ({ \ poly16x8x3_t __s1 = __p1; \ __builtin_neon_vst3q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 37); \ }) #else #define vst3q_p16(__p0, __p1) __extension__ ({ \ poly16x8x3_t __s1 = __p1; \ poly16x8x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst3q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 37); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3q_u8(__p0, __p1) __extension__ ({ \ uint8x16x3_t __s1 = __p1; \ __builtin_neon_vst3q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 48); \ }) #else #define vst3q_u8(__p0, __p1) __extension__ ({ \ uint8x16x3_t __s1 = __p1; \ uint8x16x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst3q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 48); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3q_u32(__p0, __p1) __extension__ ({ \ uint32x4x3_t __s1 = __p1; \ __builtin_neon_vst3q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 50); \ }) #else #define vst3q_u32(__p0, __p1) __extension__ ({ \ uint32x4x3_t __s1 = __p1; \ uint32x4x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __builtin_neon_vst3q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 50); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3q_u16(__p0, __p1) __extension__ ({ \ uint16x8x3_t __s1 = __p1; \ __builtin_neon_vst3q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 49); \ }) #else #define vst3q_u16(__p0, __p1) __extension__ ({ \ uint16x8x3_t __s1 = __p1; \ uint16x8x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst3q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 49); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3q_s8(__p0, __p1) __extension__ ({ \ int8x16x3_t __s1 = __p1; \ __builtin_neon_vst3q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 32); \ }) #else #define vst3q_s8(__p0, __p1) __extension__ ({ \ int8x16x3_t __s1 = __p1; \ int8x16x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst3q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 32); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3q_f32(__p0, __p1) __extension__ ({ \ float32x4x3_t __s1 = __p1; \ __builtin_neon_vst3q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 41); \ }) #else #define vst3q_f32(__p0, __p1) __extension__ ({ \ float32x4x3_t __s1 = __p1; \ float32x4x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __builtin_neon_vst3q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 41); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3q_s32(__p0, __p1) __extension__ ({ \ int32x4x3_t __s1 = __p1; \ __builtin_neon_vst3q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 34); \ }) #else #define vst3q_s32(__p0, __p1) __extension__ ({ \ int32x4x3_t __s1 = __p1; \ int32x4x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __builtin_neon_vst3q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 34); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3q_s16(__p0, __p1) __extension__ ({ \ int16x8x3_t __s1 = __p1; \ __builtin_neon_vst3q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 33); \ }) #else #define vst3q_s16(__p0, __p1) __extension__ ({ \ int16x8x3_t __s1 = __p1; \ int16x8x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst3q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 33); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3_u8(__p0, __p1) __extension__ ({ \ uint8x8x3_t __s1 = __p1; \ __builtin_neon_vst3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 16); \ }) #else #define vst3_u8(__p0, __p1) __extension__ ({ \ uint8x8x3_t __s1 = __p1; \ uint8x8x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst3_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], 16); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3_u32(__p0, __p1) __extension__ ({ \ uint32x2x3_t __s1 = __p1; \ __builtin_neon_vst3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 18); \ }) #else #define vst3_u32(__p0, __p1) __extension__ ({ \ uint32x2x3_t __s1 = __p1; \ uint32x2x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ __builtin_neon_vst3_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], 18); \ }) #endif #define vst3_u64(__p0, __p1) __extension__ ({ \ uint64x1x3_t __s1 = __p1; \ __builtin_neon_vst3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 19); \ }) #ifdef __LITTLE_ENDIAN__ #define vst3_u16(__p0, __p1) __extension__ ({ \ uint16x4x3_t __s1 = __p1; \ __builtin_neon_vst3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 17); \ }) #else #define vst3_u16(__p0, __p1) __extension__ ({ \ uint16x4x3_t __s1 = __p1; \ uint16x4x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __builtin_neon_vst3_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], 17); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3_s8(__p0, __p1) __extension__ ({ \ int8x8x3_t __s1 = __p1; \ __builtin_neon_vst3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 0); \ }) #else #define vst3_s8(__p0, __p1) __extension__ ({ \ int8x8x3_t __s1 = __p1; \ int8x8x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst3_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], 0); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3_f32(__p0, __p1) __extension__ ({ \ float32x2x3_t __s1 = __p1; \ __builtin_neon_vst3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 9); \ }) #else #define vst3_f32(__p0, __p1) __extension__ ({ \ float32x2x3_t __s1 = __p1; \ float32x2x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ __builtin_neon_vst3_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], 9); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3_s32(__p0, __p1) __extension__ ({ \ int32x2x3_t __s1 = __p1; \ __builtin_neon_vst3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 2); \ }) #else #define vst3_s32(__p0, __p1) __extension__ ({ \ int32x2x3_t __s1 = __p1; \ int32x2x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ __builtin_neon_vst3_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], 2); \ }) #endif #define vst3_s64(__p0, __p1) __extension__ ({ \ int64x1x3_t __s1 = __p1; \ __builtin_neon_vst3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 3); \ }) #ifdef __LITTLE_ENDIAN__ #define vst3_s16(__p0, __p1) __extension__ ({ \ int16x4x3_t __s1 = __p1; \ __builtin_neon_vst3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 1); \ }) #else #define vst3_s16(__p0, __p1) __extension__ ({ \ int16x4x3_t __s1 = __p1; \ int16x4x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __builtin_neon_vst3_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], 1); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3_lane_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x8x3_t __s1 = __p1; \ __builtin_neon_vst3_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 4); \ }) #else #define vst3_lane_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x8x3_t __s1 = __p1; \ poly8x8x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst3_lane_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], __p2, 4); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3_lane_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x4x3_t __s1 = __p1; \ __builtin_neon_vst3_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 5); \ }) #else #define vst3_lane_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x4x3_t __s1 = __p1; \ poly16x4x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __builtin_neon_vst3_lane_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], __p2, 5); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3q_lane_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x8x3_t __s1 = __p1; \ __builtin_neon_vst3q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 37); \ }) #else #define vst3q_lane_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x8x3_t __s1 = __p1; \ poly16x8x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst3q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 37); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3q_lane_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x4x3_t __s1 = __p1; \ __builtin_neon_vst3q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 50); \ }) #else #define vst3q_lane_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x4x3_t __s1 = __p1; \ uint32x4x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __builtin_neon_vst3q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 50); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3q_lane_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x8x3_t __s1 = __p1; \ __builtin_neon_vst3q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 49); \ }) #else #define vst3q_lane_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x8x3_t __s1 = __p1; \ uint16x8x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst3q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 49); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3q_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x4x3_t __s1 = __p1; \ __builtin_neon_vst3q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 41); \ }) #else #define vst3q_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x4x3_t __s1 = __p1; \ float32x4x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __builtin_neon_vst3q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 41); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3q_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x4x3_t __s1 = __p1; \ __builtin_neon_vst3q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 34); \ }) #else #define vst3q_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x4x3_t __s1 = __p1; \ int32x4x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __builtin_neon_vst3q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 34); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3q_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x8x3_t __s1 = __p1; \ __builtin_neon_vst3q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 33); \ }) #else #define vst3q_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x8x3_t __s1 = __p1; \ int16x8x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst3q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 33); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3_lane_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x8x3_t __s1 = __p1; \ __builtin_neon_vst3_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 16); \ }) #else #define vst3_lane_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x8x3_t __s1 = __p1; \ uint8x8x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst3_lane_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], __p2, 16); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3_lane_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x2x3_t __s1 = __p1; \ __builtin_neon_vst3_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 18); \ }) #else #define vst3_lane_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x2x3_t __s1 = __p1; \ uint32x2x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ __builtin_neon_vst3_lane_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], __p2, 18); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3_lane_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x4x3_t __s1 = __p1; \ __builtin_neon_vst3_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 17); \ }) #else #define vst3_lane_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x4x3_t __s1 = __p1; \ uint16x4x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __builtin_neon_vst3_lane_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], __p2, 17); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3_lane_s8(__p0, __p1, __p2) __extension__ ({ \ int8x8x3_t __s1 = __p1; \ __builtin_neon_vst3_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 0); \ }) #else #define vst3_lane_s8(__p0, __p1, __p2) __extension__ ({ \ int8x8x3_t __s1 = __p1; \ int8x8x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst3_lane_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], __p2, 0); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x2x3_t __s1 = __p1; \ __builtin_neon_vst3_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 9); \ }) #else #define vst3_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x2x3_t __s1 = __p1; \ float32x2x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ __builtin_neon_vst3_lane_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], __p2, 9); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x2x3_t __s1 = __p1; \ __builtin_neon_vst3_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 2); \ }) #else #define vst3_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x2x3_t __s1 = __p1; \ int32x2x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ __builtin_neon_vst3_lane_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], __p2, 2); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x4x3_t __s1 = __p1; \ __builtin_neon_vst3_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 1); \ }) #else #define vst3_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x4x3_t __s1 = __p1; \ int16x4x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __builtin_neon_vst3_lane_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], __p2, 1); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4_p8(__p0, __p1) __extension__ ({ \ poly8x8x4_t __s1 = __p1; \ __builtin_neon_vst4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 4); \ }) #else #define vst4_p8(__p0, __p1) __extension__ ({ \ poly8x8x4_t __s1 = __p1; \ poly8x8x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst4_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], 4); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4_p16(__p0, __p1) __extension__ ({ \ poly16x4x4_t __s1 = __p1; \ __builtin_neon_vst4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 5); \ }) #else #define vst4_p16(__p0, __p1) __extension__ ({ \ poly16x4x4_t __s1 = __p1; \ poly16x4x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ __builtin_neon_vst4_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], 5); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4q_p8(__p0, __p1) __extension__ ({ \ poly8x16x4_t __s1 = __p1; \ __builtin_neon_vst4q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 36); \ }) #else #define vst4q_p8(__p0, __p1) __extension__ ({ \ poly8x16x4_t __s1 = __p1; \ poly8x16x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst4q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 36); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4q_p16(__p0, __p1) __extension__ ({ \ poly16x8x4_t __s1 = __p1; \ __builtin_neon_vst4q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 37); \ }) #else #define vst4q_p16(__p0, __p1) __extension__ ({ \ poly16x8x4_t __s1 = __p1; \ poly16x8x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst4q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 37); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4q_u8(__p0, __p1) __extension__ ({ \ uint8x16x4_t __s1 = __p1; \ __builtin_neon_vst4q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 48); \ }) #else #define vst4q_u8(__p0, __p1) __extension__ ({ \ uint8x16x4_t __s1 = __p1; \ uint8x16x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst4q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 48); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4q_u32(__p0, __p1) __extension__ ({ \ uint32x4x4_t __s1 = __p1; \ __builtin_neon_vst4q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 50); \ }) #else #define vst4q_u32(__p0, __p1) __extension__ ({ \ uint32x4x4_t __s1 = __p1; \ uint32x4x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ __builtin_neon_vst4q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 50); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4q_u16(__p0, __p1) __extension__ ({ \ uint16x8x4_t __s1 = __p1; \ __builtin_neon_vst4q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 49); \ }) #else #define vst4q_u16(__p0, __p1) __extension__ ({ \ uint16x8x4_t __s1 = __p1; \ uint16x8x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst4q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 49); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4q_s8(__p0, __p1) __extension__ ({ \ int8x16x4_t __s1 = __p1; \ __builtin_neon_vst4q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 32); \ }) #else #define vst4q_s8(__p0, __p1) __extension__ ({ \ int8x16x4_t __s1 = __p1; \ int8x16x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst4q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 32); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4q_f32(__p0, __p1) __extension__ ({ \ float32x4x4_t __s1 = __p1; \ __builtin_neon_vst4q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 41); \ }) #else #define vst4q_f32(__p0, __p1) __extension__ ({ \ float32x4x4_t __s1 = __p1; \ float32x4x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ __builtin_neon_vst4q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 41); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4q_s32(__p0, __p1) __extension__ ({ \ int32x4x4_t __s1 = __p1; \ __builtin_neon_vst4q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 34); \ }) #else #define vst4q_s32(__p0, __p1) __extension__ ({ \ int32x4x4_t __s1 = __p1; \ int32x4x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ __builtin_neon_vst4q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 34); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4q_s16(__p0, __p1) __extension__ ({ \ int16x8x4_t __s1 = __p1; \ __builtin_neon_vst4q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 33); \ }) #else #define vst4q_s16(__p0, __p1) __extension__ ({ \ int16x8x4_t __s1 = __p1; \ int16x8x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst4q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 33); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4_u8(__p0, __p1) __extension__ ({ \ uint8x8x4_t __s1 = __p1; \ __builtin_neon_vst4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 16); \ }) #else #define vst4_u8(__p0, __p1) __extension__ ({ \ uint8x8x4_t __s1 = __p1; \ uint8x8x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst4_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], 16); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4_u32(__p0, __p1) __extension__ ({ \ uint32x2x4_t __s1 = __p1; \ __builtin_neon_vst4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 18); \ }) #else #define vst4_u32(__p0, __p1) __extension__ ({ \ uint32x2x4_t __s1 = __p1; \ uint32x2x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ __builtin_neon_vst4_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], 18); \ }) #endif #define vst4_u64(__p0, __p1) __extension__ ({ \ uint64x1x4_t __s1 = __p1; \ __builtin_neon_vst4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 19); \ }) #ifdef __LITTLE_ENDIAN__ #define vst4_u16(__p0, __p1) __extension__ ({ \ uint16x4x4_t __s1 = __p1; \ __builtin_neon_vst4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 17); \ }) #else #define vst4_u16(__p0, __p1) __extension__ ({ \ uint16x4x4_t __s1 = __p1; \ uint16x4x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ __builtin_neon_vst4_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], 17); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4_s8(__p0, __p1) __extension__ ({ \ int8x8x4_t __s1 = __p1; \ __builtin_neon_vst4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 0); \ }) #else #define vst4_s8(__p0, __p1) __extension__ ({ \ int8x8x4_t __s1 = __p1; \ int8x8x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst4_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], 0); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4_f32(__p0, __p1) __extension__ ({ \ float32x2x4_t __s1 = __p1; \ __builtin_neon_vst4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 9); \ }) #else #define vst4_f32(__p0, __p1) __extension__ ({ \ float32x2x4_t __s1 = __p1; \ float32x2x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ __builtin_neon_vst4_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], 9); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4_s32(__p0, __p1) __extension__ ({ \ int32x2x4_t __s1 = __p1; \ __builtin_neon_vst4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 2); \ }) #else #define vst4_s32(__p0, __p1) __extension__ ({ \ int32x2x4_t __s1 = __p1; \ int32x2x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ __builtin_neon_vst4_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], 2); \ }) #endif #define vst4_s64(__p0, __p1) __extension__ ({ \ int64x1x4_t __s1 = __p1; \ __builtin_neon_vst4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 3); \ }) #ifdef __LITTLE_ENDIAN__ #define vst4_s16(__p0, __p1) __extension__ ({ \ int16x4x4_t __s1 = __p1; \ __builtin_neon_vst4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 1); \ }) #else #define vst4_s16(__p0, __p1) __extension__ ({ \ int16x4x4_t __s1 = __p1; \ int16x4x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ __builtin_neon_vst4_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], 1); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4_lane_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x8x4_t __s1 = __p1; \ __builtin_neon_vst4_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 4); \ }) #else #define vst4_lane_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x8x4_t __s1 = __p1; \ poly8x8x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst4_lane_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], __p2, 4); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4_lane_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x4x4_t __s1 = __p1; \ __builtin_neon_vst4_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 5); \ }) #else #define vst4_lane_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x4x4_t __s1 = __p1; \ poly16x4x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ __builtin_neon_vst4_lane_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], __p2, 5); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4q_lane_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x8x4_t __s1 = __p1; \ __builtin_neon_vst4q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 37); \ }) #else #define vst4q_lane_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x8x4_t __s1 = __p1; \ poly16x8x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst4q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 37); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4q_lane_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x4x4_t __s1 = __p1; \ __builtin_neon_vst4q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 50); \ }) #else #define vst4q_lane_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x4x4_t __s1 = __p1; \ uint32x4x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ __builtin_neon_vst4q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 50); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4q_lane_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x8x4_t __s1 = __p1; \ __builtin_neon_vst4q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 49); \ }) #else #define vst4q_lane_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x8x4_t __s1 = __p1; \ uint16x8x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst4q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 49); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4q_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x4x4_t __s1 = __p1; \ __builtin_neon_vst4q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 41); \ }) #else #define vst4q_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x4x4_t __s1 = __p1; \ float32x4x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ __builtin_neon_vst4q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 41); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4q_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x4x4_t __s1 = __p1; \ __builtin_neon_vst4q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 34); \ }) #else #define vst4q_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x4x4_t __s1 = __p1; \ int32x4x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ __builtin_neon_vst4q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 34); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4q_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x8x4_t __s1 = __p1; \ __builtin_neon_vst4q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 33); \ }) #else #define vst4q_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x8x4_t __s1 = __p1; \ int16x8x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst4q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 33); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4_lane_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x8x4_t __s1 = __p1; \ __builtin_neon_vst4_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 16); \ }) #else #define vst4_lane_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x8x4_t __s1 = __p1; \ uint8x8x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst4_lane_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], __p2, 16); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4_lane_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x2x4_t __s1 = __p1; \ __builtin_neon_vst4_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 18); \ }) #else #define vst4_lane_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x2x4_t __s1 = __p1; \ uint32x2x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ __builtin_neon_vst4_lane_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], __p2, 18); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4_lane_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x4x4_t __s1 = __p1; \ __builtin_neon_vst4_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 17); \ }) #else #define vst4_lane_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x4x4_t __s1 = __p1; \ uint16x4x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ __builtin_neon_vst4_lane_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], __p2, 17); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4_lane_s8(__p0, __p1, __p2) __extension__ ({ \ int8x8x4_t __s1 = __p1; \ __builtin_neon_vst4_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 0); \ }) #else #define vst4_lane_s8(__p0, __p1, __p2) __extension__ ({ \ int8x8x4_t __s1 = __p1; \ int8x8x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst4_lane_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], __p2, 0); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x2x4_t __s1 = __p1; \ __builtin_neon_vst4_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 9); \ }) #else #define vst4_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x2x4_t __s1 = __p1; \ float32x2x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ __builtin_neon_vst4_lane_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], __p2, 9); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x2x4_t __s1 = __p1; \ __builtin_neon_vst4_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 2); \ }) #else #define vst4_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x2x4_t __s1 = __p1; \ int32x2x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ __builtin_neon_vst4_lane_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], __p2, 2); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x4x4_t __s1 = __p1; \ __builtin_neon_vst4_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 1); \ }) #else #define vst4_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x4x4_t __s1 = __p1; \ int16x4x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ __builtin_neon_vst4_lane_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], __p2, 1); \ }) #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vsubq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; __ret = __p0 - __p1; return __ret; } #else __ai uint8x16_t vsubq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 - __rev1; __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vsubq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; __ret = __p0 - __p1; return __ret; } #else __ai uint32x4_t vsubq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __rev0 - __rev1; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vsubq_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; __ret = __p0 - __p1; return __ret; } #else __ai uint64x2_t vsubq_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __rev0 - __rev1; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vsubq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; __ret = __p0 - __p1; return __ret; } #else __ai uint16x8_t vsubq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 - __rev1; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x16_t vsubq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; __ret = __p0 - __p1; return __ret; } #else __ai int8x16_t vsubq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 - __rev1; __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x4_t vsubq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; __ret = __p0 - __p1; return __ret; } #else __ai float32x4_t vsubq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __rev0 - __rev1; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vsubq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; __ret = __p0 - __p1; return __ret; } #else __ai int32x4_t vsubq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __rev0 - __rev1; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vsubq_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; __ret = __p0 - __p1; return __ret; } #else __ai int64x2_t vsubq_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __rev0 - __rev1; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vsubq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; __ret = __p0 - __p1; return __ret; } #else __ai int16x8_t vsubq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 - __rev1; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vsub_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; __ret = __p0 - __p1; return __ret; } #else __ai uint8x8_t vsub_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 - __rev1; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vsub_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; __ret = __p0 - __p1; return __ret; } #else __ai uint32x2_t vsub_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __rev0 - __rev1; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai uint64x1_t vsub_u64(uint64x1_t __p0, uint64x1_t __p1) { uint64x1_t __ret; __ret = __p0 - __p1; return __ret; } #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vsub_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; __ret = __p0 - __p1; return __ret; } #else __ai uint16x4_t vsub_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __rev0 - __rev1; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8_t vsub_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; __ret = __p0 - __p1; return __ret; } #else __ai int8x8_t vsub_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 - __rev1; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x2_t vsub_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; __ret = __p0 - __p1; return __ret; } #else __ai float32x2_t vsub_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __rev0 - __rev1; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x2_t vsub_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; __ret = __p0 - __p1; return __ret; } #else __ai int32x2_t vsub_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __rev0 - __rev1; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai int64x1_t vsub_s64(int64x1_t __p0, int64x1_t __p1) { int64x1_t __ret; __ret = __p0 - __p1; return __ret; } #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vsub_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; __ret = __p0 - __p1; return __ret; } #else __ai int16x4_t vsub_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __rev0 - __rev1; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vsubhn_u32(uint32x4_t __p0, uint32x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 17); return __ret; } #else __ai uint16x4_t vsubhn_u32(uint32x4_t __p0, uint32x4_t __p1) { uint16x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint16x4_t) __builtin_neon_vsubhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 17); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai uint16x4_t __noswap_vsubhn_u32(uint32x4_t __p0, uint32x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 17); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vsubhn_u64(uint64x2_t __p0, uint64x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 18); return __ret; } #else __ai uint32x2_t vsubhn_u64(uint64x2_t __p0, uint64x2_t __p1) { uint32x2_t __ret; uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint32x2_t) __builtin_neon_vsubhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 18); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } __ai uint32x2_t __noswap_vsubhn_u64(uint64x2_t __p0, uint64x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 18); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vsubhn_u16(uint16x8_t __p0, uint16x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 16); return __ret; } #else __ai uint8x8_t vsubhn_u16(uint16x8_t __p0, uint16x8_t __p1) { uint8x8_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x8_t) __builtin_neon_vsubhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 16); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } __ai uint8x8_t __noswap_vsubhn_u16(uint16x8_t __p0, uint16x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 16); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vsubhn_s32(int32x4_t __p0, int32x4_t __p1) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 1); return __ret; } #else __ai int16x4_t vsubhn_s32(int32x4_t __p0, int32x4_t __p1) { int16x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (int16x4_t) __builtin_neon_vsubhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai int16x4_t __noswap_vsubhn_s32(int32x4_t __p0, int32x4_t __p1) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x2_t vsubhn_s64(int64x2_t __p0, int64x2_t __p1) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 2); return __ret; } #else __ai int32x2_t vsubhn_s64(int64x2_t __p0, int64x2_t __p1) { int32x2_t __ret; int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (int32x2_t) __builtin_neon_vsubhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } __ai int32x2_t __noswap_vsubhn_s64(int64x2_t __p0, int64x2_t __p1) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 2); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8_t vsubhn_s16(int16x8_t __p0, int16x8_t __p1) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 0); return __ret; } #else __ai int8x8_t vsubhn_s16(int16x8_t __p0, int16x8_t __p1) { int8x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x8_t) __builtin_neon_vsubhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 0); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } __ai int8x8_t __noswap_vsubhn_s16(int16x8_t __p0, int16x8_t __p1) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vsubl_u8(uint8x8_t __p0, uint8x8_t __p1) { uint16x8_t __ret; __ret = vmovl_u8(__p0) - vmovl_u8(__p1); return __ret; } #else __ai uint16x8_t vsubl_u8(uint8x8_t __p0, uint8x8_t __p1) { uint16x8_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __noswap_vmovl_u8(__rev0) - __noswap_vmovl_u8(__rev1); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vsubl_u32(uint32x2_t __p0, uint32x2_t __p1) { uint64x2_t __ret; __ret = vmovl_u32(__p0) - vmovl_u32(__p1); return __ret; } #else __ai uint64x2_t vsubl_u32(uint32x2_t __p0, uint32x2_t __p1) { uint64x2_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __noswap_vmovl_u32(__rev0) - __noswap_vmovl_u32(__rev1); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vsubl_u16(uint16x4_t __p0, uint16x4_t __p1) { uint32x4_t __ret; __ret = vmovl_u16(__p0) - vmovl_u16(__p1); return __ret; } #else __ai uint32x4_t vsubl_u16(uint16x4_t __p0, uint16x4_t __p1) { uint32x4_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __noswap_vmovl_u16(__rev0) - __noswap_vmovl_u16(__rev1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vsubl_s8(int8x8_t __p0, int8x8_t __p1) { int16x8_t __ret; __ret = vmovl_s8(__p0) - vmovl_s8(__p1); return __ret; } #else __ai int16x8_t vsubl_s8(int8x8_t __p0, int8x8_t __p1) { int16x8_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __noswap_vmovl_s8(__rev0) - __noswap_vmovl_s8(__rev1); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vsubl_s32(int32x2_t __p0, int32x2_t __p1) { int64x2_t __ret; __ret = vmovl_s32(__p0) - vmovl_s32(__p1); return __ret; } #else __ai int64x2_t vsubl_s32(int32x2_t __p0, int32x2_t __p1) { int64x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __noswap_vmovl_s32(__rev0) - __noswap_vmovl_s32(__rev1); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vsubl_s16(int16x4_t __p0, int16x4_t __p1) { int32x4_t __ret; __ret = vmovl_s16(__p0) - vmovl_s16(__p1); return __ret; } #else __ai int32x4_t vsubl_s16(int16x4_t __p0, int16x4_t __p1) { int32x4_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __noswap_vmovl_s16(__rev0) - __noswap_vmovl_s16(__rev1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vsubw_u8(uint16x8_t __p0, uint8x8_t __p1) { uint16x8_t __ret; __ret = __p0 - vmovl_u8(__p1); return __ret; } #else __ai uint16x8_t vsubw_u8(uint16x8_t __p0, uint8x8_t __p1) { uint16x8_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 - __noswap_vmovl_u8(__rev1); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vsubw_u32(uint64x2_t __p0, uint32x2_t __p1) { uint64x2_t __ret; __ret = __p0 - vmovl_u32(__p1); return __ret; } #else __ai uint64x2_t vsubw_u32(uint64x2_t __p0, uint32x2_t __p1) { uint64x2_t __ret; uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __rev0 - __noswap_vmovl_u32(__rev1); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vsubw_u16(uint32x4_t __p0, uint16x4_t __p1) { uint32x4_t __ret; __ret = __p0 - vmovl_u16(__p1); return __ret; } #else __ai uint32x4_t vsubw_u16(uint32x4_t __p0, uint16x4_t __p1) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __rev0 - __noswap_vmovl_u16(__rev1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vsubw_s8(int16x8_t __p0, int8x8_t __p1) { int16x8_t __ret; __ret = __p0 - vmovl_s8(__p1); return __ret; } #else __ai int16x8_t vsubw_s8(int16x8_t __p0, int8x8_t __p1) { int16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 - __noswap_vmovl_s8(__rev1); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vsubw_s32(int64x2_t __p0, int32x2_t __p1) { int64x2_t __ret; __ret = __p0 - vmovl_s32(__p1); return __ret; } #else __ai int64x2_t vsubw_s32(int64x2_t __p0, int32x2_t __p1) { int64x2_t __ret; int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __rev0 - __noswap_vmovl_s32(__rev1); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vsubw_s16(int32x4_t __p0, int16x4_t __p1) { int32x4_t __ret; __ret = __p0 - vmovl_s16(__p1); return __ret; } #else __ai int32x4_t vsubw_s16(int32x4_t __p0, int16x4_t __p1) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __rev0 - __noswap_vmovl_s16(__rev1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly8x8_t vtbl1_p8(poly8x8_t __p0, uint8x8_t __p1) { poly8x8_t __ret; __ret = (poly8x8_t) __builtin_neon_vtbl1_v((int8x8_t)__p0, (int8x8_t)__p1, 4); return __ret; } #else __ai poly8x8_t vtbl1_p8(poly8x8_t __p0, uint8x8_t __p1) { poly8x8_t __ret; poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (poly8x8_t) __builtin_neon_vtbl1_v((int8x8_t)__rev0, (int8x8_t)__rev1, 4); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vtbl1_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vtbl1_v((int8x8_t)__p0, (int8x8_t)__p1, 16); return __ret; } #else __ai uint8x8_t vtbl1_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x8_t) __builtin_neon_vtbl1_v((int8x8_t)__rev0, (int8x8_t)__rev1, 16); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8_t vtbl1_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vtbl1_v((int8x8_t)__p0, (int8x8_t)__p1, 0); return __ret; } #else __ai int8x8_t vtbl1_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x8_t) __builtin_neon_vtbl1_v((int8x8_t)__rev0, (int8x8_t)__rev1, 0); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly8x8_t vtbl2_p8(poly8x8x2_t __p0, uint8x8_t __p1) { poly8x8_t __ret; __ret = (poly8x8_t) __builtin_neon_vtbl2_v((int8x8_t)__p0.val[0], (int8x8_t)__p0.val[1], (int8x8_t)__p1, 4); return __ret; } #else __ai poly8x8_t vtbl2_p8(poly8x8x2_t __p0, uint8x8_t __p1) { poly8x8_t __ret; poly8x8x2_t __rev0; __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 7, 6, 5, 4, 3, 2, 1, 0); __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (poly8x8_t) __builtin_neon_vtbl2_v((int8x8_t)__rev0.val[0], (int8x8_t)__rev0.val[1], (int8x8_t)__rev1, 4); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vtbl2_u8(uint8x8x2_t __p0, uint8x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vtbl2_v((int8x8_t)__p0.val[0], (int8x8_t)__p0.val[1], (int8x8_t)__p1, 16); return __ret; } #else __ai uint8x8_t vtbl2_u8(uint8x8x2_t __p0, uint8x8_t __p1) { uint8x8_t __ret; uint8x8x2_t __rev0; __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 7, 6, 5, 4, 3, 2, 1, 0); __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x8_t) __builtin_neon_vtbl2_v((int8x8_t)__rev0.val[0], (int8x8_t)__rev0.val[1], (int8x8_t)__rev1, 16); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8_t vtbl2_s8(int8x8x2_t __p0, int8x8_t __p1) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vtbl2_v((int8x8_t)__p0.val[0], (int8x8_t)__p0.val[1], (int8x8_t)__p1, 0); return __ret; } #else __ai int8x8_t vtbl2_s8(int8x8x2_t __p0, int8x8_t __p1) { int8x8_t __ret; int8x8x2_t __rev0; __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 7, 6, 5, 4, 3, 2, 1, 0); __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x8_t) __builtin_neon_vtbl2_v((int8x8_t)__rev0.val[0], (int8x8_t)__rev0.val[1], (int8x8_t)__rev1, 0); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly8x8_t vtbl3_p8(poly8x8x3_t __p0, uint8x8_t __p1) { poly8x8_t __ret; __ret = (poly8x8_t) __builtin_neon_vtbl3_v((int8x8_t)__p0.val[0], (int8x8_t)__p0.val[1], (int8x8_t)__p0.val[2], (int8x8_t)__p1, 4); return __ret; } #else __ai poly8x8_t vtbl3_p8(poly8x8x3_t __p0, uint8x8_t __p1) { poly8x8_t __ret; poly8x8x3_t __rev0; __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 7, 6, 5, 4, 3, 2, 1, 0); __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 7, 6, 5, 4, 3, 2, 1, 0); __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (poly8x8_t) __builtin_neon_vtbl3_v((int8x8_t)__rev0.val[0], (int8x8_t)__rev0.val[1], (int8x8_t)__rev0.val[2], (int8x8_t)__rev1, 4); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vtbl3_u8(uint8x8x3_t __p0, uint8x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vtbl3_v((int8x8_t)__p0.val[0], (int8x8_t)__p0.val[1], (int8x8_t)__p0.val[2], (int8x8_t)__p1, 16); return __ret; } #else __ai uint8x8_t vtbl3_u8(uint8x8x3_t __p0, uint8x8_t __p1) { uint8x8_t __ret; uint8x8x3_t __rev0; __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 7, 6, 5, 4, 3, 2, 1, 0); __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 7, 6, 5, 4, 3, 2, 1, 0); __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x8_t) __builtin_neon_vtbl3_v((int8x8_t)__rev0.val[0], (int8x8_t)__rev0.val[1], (int8x8_t)__rev0.val[2], (int8x8_t)__rev1, 16); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8_t vtbl3_s8(int8x8x3_t __p0, int8x8_t __p1) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vtbl3_v((int8x8_t)__p0.val[0], (int8x8_t)__p0.val[1], (int8x8_t)__p0.val[2], (int8x8_t)__p1, 0); return __ret; } #else __ai int8x8_t vtbl3_s8(int8x8x3_t __p0, int8x8_t __p1) { int8x8_t __ret; int8x8x3_t __rev0; __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 7, 6, 5, 4, 3, 2, 1, 0); __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 7, 6, 5, 4, 3, 2, 1, 0); __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x8_t) __builtin_neon_vtbl3_v((int8x8_t)__rev0.val[0], (int8x8_t)__rev0.val[1], (int8x8_t)__rev0.val[2], (int8x8_t)__rev1, 0); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly8x8_t vtbl4_p8(poly8x8x4_t __p0, uint8x8_t __p1) { poly8x8_t __ret; __ret = (poly8x8_t) __builtin_neon_vtbl4_v((int8x8_t)__p0.val[0], (int8x8_t)__p0.val[1], (int8x8_t)__p0.val[2], (int8x8_t)__p0.val[3], (int8x8_t)__p1, 4); return __ret; } #else __ai poly8x8_t vtbl4_p8(poly8x8x4_t __p0, uint8x8_t __p1) { poly8x8_t __ret; poly8x8x4_t __rev0; __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 7, 6, 5, 4, 3, 2, 1, 0); __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 7, 6, 5, 4, 3, 2, 1, 0); __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], 7, 6, 5, 4, 3, 2, 1, 0); __rev0.val[3] = __builtin_shufflevector(__p0.val[3], __p0.val[3], 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (poly8x8_t) __builtin_neon_vtbl4_v((int8x8_t)__rev0.val[0], (int8x8_t)__rev0.val[1], (int8x8_t)__rev0.val[2], (int8x8_t)__rev0.val[3], (int8x8_t)__rev1, 4); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vtbl4_u8(uint8x8x4_t __p0, uint8x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vtbl4_v((int8x8_t)__p0.val[0], (int8x8_t)__p0.val[1], (int8x8_t)__p0.val[2], (int8x8_t)__p0.val[3], (int8x8_t)__p1, 16); return __ret; } #else __ai uint8x8_t vtbl4_u8(uint8x8x4_t __p0, uint8x8_t __p1) { uint8x8_t __ret; uint8x8x4_t __rev0; __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 7, 6, 5, 4, 3, 2, 1, 0); __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 7, 6, 5, 4, 3, 2, 1, 0); __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], 7, 6, 5, 4, 3, 2, 1, 0); __rev0.val[3] = __builtin_shufflevector(__p0.val[3], __p0.val[3], 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x8_t) __builtin_neon_vtbl4_v((int8x8_t)__rev0.val[0], (int8x8_t)__rev0.val[1], (int8x8_t)__rev0.val[2], (int8x8_t)__rev0.val[3], (int8x8_t)__rev1, 16); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8_t vtbl4_s8(int8x8x4_t __p0, int8x8_t __p1) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vtbl4_v((int8x8_t)__p0.val[0], (int8x8_t)__p0.val[1], (int8x8_t)__p0.val[2], (int8x8_t)__p0.val[3], (int8x8_t)__p1, 0); return __ret; } #else __ai int8x8_t vtbl4_s8(int8x8x4_t __p0, int8x8_t __p1) { int8x8_t __ret; int8x8x4_t __rev0; __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 7, 6, 5, 4, 3, 2, 1, 0); __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 7, 6, 5, 4, 3, 2, 1, 0); __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], 7, 6, 5, 4, 3, 2, 1, 0); __rev0.val[3] = __builtin_shufflevector(__p0.val[3], __p0.val[3], 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x8_t) __builtin_neon_vtbl4_v((int8x8_t)__rev0.val[0], (int8x8_t)__rev0.val[1], (int8x8_t)__rev0.val[2], (int8x8_t)__rev0.val[3], (int8x8_t)__rev1, 0); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly8x8_t vtbx1_p8(poly8x8_t __p0, poly8x8_t __p1, uint8x8_t __p2) { poly8x8_t __ret; __ret = (poly8x8_t) __builtin_neon_vtbx1_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 4); return __ret; } #else __ai poly8x8_t vtbx1_p8(poly8x8_t __p0, poly8x8_t __p1, uint8x8_t __p2) { poly8x8_t __ret; poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (poly8x8_t) __builtin_neon_vtbx1_v((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 4); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vtbx1_u8(uint8x8_t __p0, uint8x8_t __p1, uint8x8_t __p2) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vtbx1_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 16); return __ret; } #else __ai uint8x8_t vtbx1_u8(uint8x8_t __p0, uint8x8_t __p1, uint8x8_t __p2) { uint8x8_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x8_t) __builtin_neon_vtbx1_v((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 16); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8_t vtbx1_s8(int8x8_t __p0, int8x8_t __p1, int8x8_t __p2) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vtbx1_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 0); return __ret; } #else __ai int8x8_t vtbx1_s8(int8x8_t __p0, int8x8_t __p1, int8x8_t __p2) { int8x8_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x8_t) __builtin_neon_vtbx1_v((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 0); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly8x8_t vtbx2_p8(poly8x8_t __p0, poly8x8x2_t __p1, uint8x8_t __p2) { poly8x8_t __ret; __ret = (poly8x8_t) __builtin_neon_vtbx2_v((int8x8_t)__p0, (int8x8_t)__p1.val[0], (int8x8_t)__p1.val[1], (int8x8_t)__p2, 4); return __ret; } #else __ai poly8x8_t vtbx2_p8(poly8x8_t __p0, poly8x8x2_t __p1, uint8x8_t __p2) { poly8x8_t __ret; poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); poly8x8x2_t __rev1; __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (poly8x8_t) __builtin_neon_vtbx2_v((int8x8_t)__rev0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev2, 4); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vtbx2_u8(uint8x8_t __p0, uint8x8x2_t __p1, uint8x8_t __p2) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vtbx2_v((int8x8_t)__p0, (int8x8_t)__p1.val[0], (int8x8_t)__p1.val[1], (int8x8_t)__p2, 16); return __ret; } #else __ai uint8x8_t vtbx2_u8(uint8x8_t __p0, uint8x8x2_t __p1, uint8x8_t __p2) { uint8x8_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8x2_t __rev1; __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x8_t) __builtin_neon_vtbx2_v((int8x8_t)__rev0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev2, 16); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8_t vtbx2_s8(int8x8_t __p0, int8x8x2_t __p1, int8x8_t __p2) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vtbx2_v((int8x8_t)__p0, (int8x8_t)__p1.val[0], (int8x8_t)__p1.val[1], (int8x8_t)__p2, 0); return __ret; } #else __ai int8x8_t vtbx2_s8(int8x8_t __p0, int8x8x2_t __p1, int8x8_t __p2) { int8x8_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x8x2_t __rev1; __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x8_t) __builtin_neon_vtbx2_v((int8x8_t)__rev0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev2, 0); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly8x8_t vtbx3_p8(poly8x8_t __p0, poly8x8x3_t __p1, uint8x8_t __p2) { poly8x8_t __ret; __ret = (poly8x8_t) __builtin_neon_vtbx3_v((int8x8_t)__p0, (int8x8_t)__p1.val[0], (int8x8_t)__p1.val[1], (int8x8_t)__p1.val[2], (int8x8_t)__p2, 4); return __ret; } #else __ai poly8x8_t vtbx3_p8(poly8x8_t __p0, poly8x8x3_t __p1, uint8x8_t __p2) { poly8x8_t __ret; poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); poly8x8x3_t __rev1; __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (poly8x8_t) __builtin_neon_vtbx3_v((int8x8_t)__rev0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev2, 4); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vtbx3_u8(uint8x8_t __p0, uint8x8x3_t __p1, uint8x8_t __p2) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vtbx3_v((int8x8_t)__p0, (int8x8_t)__p1.val[0], (int8x8_t)__p1.val[1], (int8x8_t)__p1.val[2], (int8x8_t)__p2, 16); return __ret; } #else __ai uint8x8_t vtbx3_u8(uint8x8_t __p0, uint8x8x3_t __p1, uint8x8_t __p2) { uint8x8_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8x3_t __rev1; __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x8_t) __builtin_neon_vtbx3_v((int8x8_t)__rev0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev2, 16); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8_t vtbx3_s8(int8x8_t __p0, int8x8x3_t __p1, int8x8_t __p2) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vtbx3_v((int8x8_t)__p0, (int8x8_t)__p1.val[0], (int8x8_t)__p1.val[1], (int8x8_t)__p1.val[2], (int8x8_t)__p2, 0); return __ret; } #else __ai int8x8_t vtbx3_s8(int8x8_t __p0, int8x8x3_t __p1, int8x8_t __p2) { int8x8_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x8x3_t __rev1; __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x8_t) __builtin_neon_vtbx3_v((int8x8_t)__rev0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev2, 0); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly8x8_t vtbx4_p8(poly8x8_t __p0, poly8x8x4_t __p1, uint8x8_t __p2) { poly8x8_t __ret; __ret = (poly8x8_t) __builtin_neon_vtbx4_v((int8x8_t)__p0, (int8x8_t)__p1.val[0], (int8x8_t)__p1.val[1], (int8x8_t)__p1.val[2], (int8x8_t)__p1.val[3], (int8x8_t)__p2, 4); return __ret; } #else __ai poly8x8_t vtbx4_p8(poly8x8_t __p0, poly8x8x4_t __p1, uint8x8_t __p2) { poly8x8_t __ret; poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); poly8x8x4_t __rev1; __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); __rev1.val[3] = __builtin_shufflevector(__p1.val[3], __p1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (poly8x8_t) __builtin_neon_vtbx4_v((int8x8_t)__rev0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], (int8x8_t)__rev2, 4); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vtbx4_u8(uint8x8_t __p0, uint8x8x4_t __p1, uint8x8_t __p2) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vtbx4_v((int8x8_t)__p0, (int8x8_t)__p1.val[0], (int8x8_t)__p1.val[1], (int8x8_t)__p1.val[2], (int8x8_t)__p1.val[3], (int8x8_t)__p2, 16); return __ret; } #else __ai uint8x8_t vtbx4_u8(uint8x8_t __p0, uint8x8x4_t __p1, uint8x8_t __p2) { uint8x8_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8x4_t __rev1; __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); __rev1.val[3] = __builtin_shufflevector(__p1.val[3], __p1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x8_t) __builtin_neon_vtbx4_v((int8x8_t)__rev0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], (int8x8_t)__rev2, 16); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8_t vtbx4_s8(int8x8_t __p0, int8x8x4_t __p1, int8x8_t __p2) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vtbx4_v((int8x8_t)__p0, (int8x8_t)__p1.val[0], (int8x8_t)__p1.val[1], (int8x8_t)__p1.val[2], (int8x8_t)__p1.val[3], (int8x8_t)__p2, 0); return __ret; } #else __ai int8x8_t vtbx4_s8(int8x8_t __p0, int8x8x4_t __p1, int8x8_t __p2) { int8x8_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x8x4_t __rev1; __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); __rev1.val[3] = __builtin_shufflevector(__p1.val[3], __p1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x8_t) __builtin_neon_vtbx4_v((int8x8_t)__rev0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], (int8x8_t)__rev2, 0); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly8x8x2_t vtrn_p8(poly8x8_t __p0, poly8x8_t __p1) { poly8x8x2_t __ret; __builtin_neon_vtrn_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 4); return __ret; } #else __ai poly8x8x2_t vtrn_p8(poly8x8_t __p0, poly8x8_t __p1) { poly8x8x2_t __ret; poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __builtin_neon_vtrn_v(&__ret, (int8x8_t)__rev0, (int8x8_t)__rev1, 4); __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly16x4x2_t vtrn_p16(poly16x4_t __p0, poly16x4_t __p1) { poly16x4x2_t __ret; __builtin_neon_vtrn_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 5); return __ret; } #else __ai poly16x4x2_t vtrn_p16(poly16x4_t __p0, poly16x4_t __p1) { poly16x4x2_t __ret; poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); poly16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __builtin_neon_vtrn_v(&__ret, (int8x8_t)__rev0, (int8x8_t)__rev1, 5); __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly8x16x2_t vtrnq_p8(poly8x16_t __p0, poly8x16_t __p1) { poly8x16x2_t __ret; __builtin_neon_vtrnq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 36); return __ret; } #else __ai poly8x16x2_t vtrnq_p8(poly8x16_t __p0, poly8x16_t __p1) { poly8x16x2_t __ret; poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __builtin_neon_vtrnq_v(&__ret, (int8x16_t)__rev0, (int8x16_t)__rev1, 36); __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly16x8x2_t vtrnq_p16(poly16x8_t __p0, poly16x8_t __p1) { poly16x8x2_t __ret; __builtin_neon_vtrnq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 37); return __ret; } #else __ai poly16x8x2_t vtrnq_p16(poly16x8_t __p0, poly16x8_t __p1) { poly16x8x2_t __ret; poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); poly16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __builtin_neon_vtrnq_v(&__ret, (int8x16_t)__rev0, (int8x16_t)__rev1, 37); __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16x2_t vtrnq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16x2_t __ret; __builtin_neon_vtrnq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 48); return __ret; } #else __ai uint8x16x2_t vtrnq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16x2_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __builtin_neon_vtrnq_v(&__ret, (int8x16_t)__rev0, (int8x16_t)__rev1, 48); __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4x2_t vtrnq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4x2_t __ret; __builtin_neon_vtrnq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 50); return __ret; } #else __ai uint32x4x2_t vtrnq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4x2_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __builtin_neon_vtrnq_v(&__ret, (int8x16_t)__rev0, (int8x16_t)__rev1, 50); __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8x2_t vtrnq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8x2_t __ret; __builtin_neon_vtrnq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 49); return __ret; } #else __ai uint16x8x2_t vtrnq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8x2_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __builtin_neon_vtrnq_v(&__ret, (int8x16_t)__rev0, (int8x16_t)__rev1, 49); __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x16x2_t vtrnq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16x2_t __ret; __builtin_neon_vtrnq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 32); return __ret; } #else __ai int8x16x2_t vtrnq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16x2_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __builtin_neon_vtrnq_v(&__ret, (int8x16_t)__rev0, (int8x16_t)__rev1, 32); __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x4x2_t vtrnq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4x2_t __ret; __builtin_neon_vtrnq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 41); return __ret; } #else __ai float32x4x2_t vtrnq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4x2_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __builtin_neon_vtrnq_v(&__ret, (int8x16_t)__rev0, (int8x16_t)__rev1, 41); __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4x2_t vtrnq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4x2_t __ret; __builtin_neon_vtrnq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 34); return __ret; } #else __ai int32x4x2_t vtrnq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4x2_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __builtin_neon_vtrnq_v(&__ret, (int8x16_t)__rev0, (int8x16_t)__rev1, 34); __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8x2_t vtrnq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8x2_t __ret; __builtin_neon_vtrnq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 33); return __ret; } #else __ai int16x8x2_t vtrnq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8x2_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __builtin_neon_vtrnq_v(&__ret, (int8x16_t)__rev0, (int8x16_t)__rev1, 33); __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8x2_t vtrn_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8x2_t __ret; __builtin_neon_vtrn_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 16); return __ret; } #else __ai uint8x8x2_t vtrn_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8x2_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __builtin_neon_vtrn_v(&__ret, (int8x8_t)__rev0, (int8x8_t)__rev1, 16); __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2x2_t vtrn_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2x2_t __ret; __builtin_neon_vtrn_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 18); return __ret; } #else __ai uint32x2x2_t vtrn_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2x2_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __builtin_neon_vtrn_v(&__ret, (int8x8_t)__rev0, (int8x8_t)__rev1, 18); __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x4x2_t vtrn_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4x2_t __ret; __builtin_neon_vtrn_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 17); return __ret; } #else __ai uint16x4x2_t vtrn_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4x2_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __builtin_neon_vtrn_v(&__ret, (int8x8_t)__rev0, (int8x8_t)__rev1, 17); __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8x2_t vtrn_s8(int8x8_t __p0, int8x8_t __p1) { int8x8x2_t __ret; __builtin_neon_vtrn_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 0); return __ret; } #else __ai int8x8x2_t vtrn_s8(int8x8_t __p0, int8x8_t __p1) { int8x8x2_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __builtin_neon_vtrn_v(&__ret, (int8x8_t)__rev0, (int8x8_t)__rev1, 0); __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x2x2_t vtrn_f32(float32x2_t __p0, float32x2_t __p1) { float32x2x2_t __ret; __builtin_neon_vtrn_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 9); return __ret; } #else __ai float32x2x2_t vtrn_f32(float32x2_t __p0, float32x2_t __p1) { float32x2x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __builtin_neon_vtrn_v(&__ret, (int8x8_t)__rev0, (int8x8_t)__rev1, 9); __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x2x2_t vtrn_s32(int32x2_t __p0, int32x2_t __p1) { int32x2x2_t __ret; __builtin_neon_vtrn_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 2); return __ret; } #else __ai int32x2x2_t vtrn_s32(int32x2_t __p0, int32x2_t __p1) { int32x2x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __builtin_neon_vtrn_v(&__ret, (int8x8_t)__rev0, (int8x8_t)__rev1, 2); __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x4x2_t vtrn_s16(int16x4_t __p0, int16x4_t __p1) { int16x4x2_t __ret; __builtin_neon_vtrn_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 1); return __ret; } #else __ai int16x4x2_t vtrn_s16(int16x4_t __p0, int16x4_t __p1) { int16x4x2_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __builtin_neon_vtrn_v(&__ret, (int8x8_t)__rev0, (int8x8_t)__rev1, 1); __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vtst_p8(poly8x8_t __p0, poly8x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vtst_v((int8x8_t)__p0, (int8x8_t)__p1, 16); return __ret; } #else __ai uint8x8_t vtst_p8(poly8x8_t __p0, poly8x8_t __p1) { uint8x8_t __ret; poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x8_t) __builtin_neon_vtst_v((int8x8_t)__rev0, (int8x8_t)__rev1, 16); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vtst_p16(poly16x4_t __p0, poly16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vtst_v((int8x8_t)__p0, (int8x8_t)__p1, 17); return __ret; } #else __ai uint16x4_t vtst_p16(poly16x4_t __p0, poly16x4_t __p1) { uint16x4_t __ret; poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); poly16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint16x4_t) __builtin_neon_vtst_v((int8x8_t)__rev0, (int8x8_t)__rev1, 17); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vtstq_p8(poly8x16_t __p0, poly8x16_t __p1) { uint8x16_t __ret; __ret = (uint8x16_t) __builtin_neon_vtstq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); return __ret; } #else __ai uint8x16_t vtstq_p8(poly8x16_t __p0, poly8x16_t __p1) { uint8x16_t __ret; poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x16_t) __builtin_neon_vtstq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 48); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vtstq_p16(poly16x8_t __p0, poly16x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vtstq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); return __ret; } #else __ai uint16x8_t vtstq_p16(poly16x8_t __p0, poly16x8_t __p1) { uint16x8_t __ret; poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); poly16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint16x8_t) __builtin_neon_vtstq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 49); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vtstq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; __ret = (uint8x16_t) __builtin_neon_vtstq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); return __ret; } #else __ai uint8x16_t vtstq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x16_t) __builtin_neon_vtstq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 48); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vtstq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vtstq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); return __ret; } #else __ai uint32x4_t vtstq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint32x4_t) __builtin_neon_vtstq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vtstq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vtstq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); return __ret; } #else __ai uint16x8_t vtstq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint16x8_t) __builtin_neon_vtstq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 49); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vtstq_s8(int8x16_t __p0, int8x16_t __p1) { uint8x16_t __ret; __ret = (uint8x16_t) __builtin_neon_vtstq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); return __ret; } #else __ai uint8x16_t vtstq_s8(int8x16_t __p0, int8x16_t __p1) { uint8x16_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x16_t) __builtin_neon_vtstq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 48); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vtstq_s32(int32x4_t __p0, int32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vtstq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); return __ret; } #else __ai uint32x4_t vtstq_s32(int32x4_t __p0, int32x4_t __p1) { uint32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint32x4_t) __builtin_neon_vtstq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vtstq_s16(int16x8_t __p0, int16x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vtstq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); return __ret; } #else __ai uint16x8_t vtstq_s16(int16x8_t __p0, int16x8_t __p1) { uint16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint16x8_t) __builtin_neon_vtstq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 49); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vtst_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vtst_v((int8x8_t)__p0, (int8x8_t)__p1, 16); return __ret; } #else __ai uint8x8_t vtst_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x8_t) __builtin_neon_vtst_v((int8x8_t)__rev0, (int8x8_t)__rev1, 16); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vtst_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vtst_v((int8x8_t)__p0, (int8x8_t)__p1, 18); return __ret; } #else __ai uint32x2_t vtst_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint32x2_t) __builtin_neon_vtst_v((int8x8_t)__rev0, (int8x8_t)__rev1, 18); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vtst_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vtst_v((int8x8_t)__p0, (int8x8_t)__p1, 17); return __ret; } #else __ai uint16x4_t vtst_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint16x4_t) __builtin_neon_vtst_v((int8x8_t)__rev0, (int8x8_t)__rev1, 17); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vtst_s8(int8x8_t __p0, int8x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vtst_v((int8x8_t)__p0, (int8x8_t)__p1, 16); return __ret; } #else __ai uint8x8_t vtst_s8(int8x8_t __p0, int8x8_t __p1) { uint8x8_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x8_t) __builtin_neon_vtst_v((int8x8_t)__rev0, (int8x8_t)__rev1, 16); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vtst_s32(int32x2_t __p0, int32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vtst_v((int8x8_t)__p0, (int8x8_t)__p1, 18); return __ret; } #else __ai uint32x2_t vtst_s32(int32x2_t __p0, int32x2_t __p1) { uint32x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint32x2_t) __builtin_neon_vtst_v((int8x8_t)__rev0, (int8x8_t)__rev1, 18); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vtst_s16(int16x4_t __p0, int16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vtst_v((int8x8_t)__p0, (int8x8_t)__p1, 17); return __ret; } #else __ai uint16x4_t vtst_s16(int16x4_t __p0, int16x4_t __p1) { uint16x4_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint16x4_t) __builtin_neon_vtst_v((int8x8_t)__rev0, (int8x8_t)__rev1, 17); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly8x8x2_t vuzp_p8(poly8x8_t __p0, poly8x8_t __p1) { poly8x8x2_t __ret; __builtin_neon_vuzp_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 4); return __ret; } #else __ai poly8x8x2_t vuzp_p8(poly8x8_t __p0, poly8x8_t __p1) { poly8x8x2_t __ret; poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __builtin_neon_vuzp_v(&__ret, (int8x8_t)__rev0, (int8x8_t)__rev1, 4); __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly16x4x2_t vuzp_p16(poly16x4_t __p0, poly16x4_t __p1) { poly16x4x2_t __ret; __builtin_neon_vuzp_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 5); return __ret; } #else __ai poly16x4x2_t vuzp_p16(poly16x4_t __p0, poly16x4_t __p1) { poly16x4x2_t __ret; poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); poly16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __builtin_neon_vuzp_v(&__ret, (int8x8_t)__rev0, (int8x8_t)__rev1, 5); __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly8x16x2_t vuzpq_p8(poly8x16_t __p0, poly8x16_t __p1) { poly8x16x2_t __ret; __builtin_neon_vuzpq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 36); return __ret; } #else __ai poly8x16x2_t vuzpq_p8(poly8x16_t __p0, poly8x16_t __p1) { poly8x16x2_t __ret; poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __builtin_neon_vuzpq_v(&__ret, (int8x16_t)__rev0, (int8x16_t)__rev1, 36); __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly16x8x2_t vuzpq_p16(poly16x8_t __p0, poly16x8_t __p1) { poly16x8x2_t __ret; __builtin_neon_vuzpq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 37); return __ret; } #else __ai poly16x8x2_t vuzpq_p16(poly16x8_t __p0, poly16x8_t __p1) { poly16x8x2_t __ret; poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); poly16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __builtin_neon_vuzpq_v(&__ret, (int8x16_t)__rev0, (int8x16_t)__rev1, 37); __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16x2_t vuzpq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16x2_t __ret; __builtin_neon_vuzpq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 48); return __ret; } #else __ai uint8x16x2_t vuzpq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16x2_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __builtin_neon_vuzpq_v(&__ret, (int8x16_t)__rev0, (int8x16_t)__rev1, 48); __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4x2_t vuzpq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4x2_t __ret; __builtin_neon_vuzpq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 50); return __ret; } #else __ai uint32x4x2_t vuzpq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4x2_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __builtin_neon_vuzpq_v(&__ret, (int8x16_t)__rev0, (int8x16_t)__rev1, 50); __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8x2_t vuzpq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8x2_t __ret; __builtin_neon_vuzpq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 49); return __ret; } #else __ai uint16x8x2_t vuzpq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8x2_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __builtin_neon_vuzpq_v(&__ret, (int8x16_t)__rev0, (int8x16_t)__rev1, 49); __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x16x2_t vuzpq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16x2_t __ret; __builtin_neon_vuzpq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 32); return __ret; } #else __ai int8x16x2_t vuzpq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16x2_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __builtin_neon_vuzpq_v(&__ret, (int8x16_t)__rev0, (int8x16_t)__rev1, 32); __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x4x2_t vuzpq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4x2_t __ret; __builtin_neon_vuzpq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 41); return __ret; } #else __ai float32x4x2_t vuzpq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4x2_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __builtin_neon_vuzpq_v(&__ret, (int8x16_t)__rev0, (int8x16_t)__rev1, 41); __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4x2_t vuzpq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4x2_t __ret; __builtin_neon_vuzpq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 34); return __ret; } #else __ai int32x4x2_t vuzpq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4x2_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __builtin_neon_vuzpq_v(&__ret, (int8x16_t)__rev0, (int8x16_t)__rev1, 34); __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8x2_t vuzpq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8x2_t __ret; __builtin_neon_vuzpq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 33); return __ret; } #else __ai int16x8x2_t vuzpq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8x2_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __builtin_neon_vuzpq_v(&__ret, (int8x16_t)__rev0, (int8x16_t)__rev1, 33); __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8x2_t vuzp_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8x2_t __ret; __builtin_neon_vuzp_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 16); return __ret; } #else __ai uint8x8x2_t vuzp_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8x2_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __builtin_neon_vuzp_v(&__ret, (int8x8_t)__rev0, (int8x8_t)__rev1, 16); __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2x2_t vuzp_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2x2_t __ret; __builtin_neon_vuzp_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 18); return __ret; } #else __ai uint32x2x2_t vuzp_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2x2_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __builtin_neon_vuzp_v(&__ret, (int8x8_t)__rev0, (int8x8_t)__rev1, 18); __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x4x2_t vuzp_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4x2_t __ret; __builtin_neon_vuzp_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 17); return __ret; } #else __ai uint16x4x2_t vuzp_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4x2_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __builtin_neon_vuzp_v(&__ret, (int8x8_t)__rev0, (int8x8_t)__rev1, 17); __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8x2_t vuzp_s8(int8x8_t __p0, int8x8_t __p1) { int8x8x2_t __ret; __builtin_neon_vuzp_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 0); return __ret; } #else __ai int8x8x2_t vuzp_s8(int8x8_t __p0, int8x8_t __p1) { int8x8x2_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __builtin_neon_vuzp_v(&__ret, (int8x8_t)__rev0, (int8x8_t)__rev1, 0); __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x2x2_t vuzp_f32(float32x2_t __p0, float32x2_t __p1) { float32x2x2_t __ret; __builtin_neon_vuzp_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 9); return __ret; } #else __ai float32x2x2_t vuzp_f32(float32x2_t __p0, float32x2_t __p1) { float32x2x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __builtin_neon_vuzp_v(&__ret, (int8x8_t)__rev0, (int8x8_t)__rev1, 9); __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x2x2_t vuzp_s32(int32x2_t __p0, int32x2_t __p1) { int32x2x2_t __ret; __builtin_neon_vuzp_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 2); return __ret; } #else __ai int32x2x2_t vuzp_s32(int32x2_t __p0, int32x2_t __p1) { int32x2x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __builtin_neon_vuzp_v(&__ret, (int8x8_t)__rev0, (int8x8_t)__rev1, 2); __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x4x2_t vuzp_s16(int16x4_t __p0, int16x4_t __p1) { int16x4x2_t __ret; __builtin_neon_vuzp_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 1); return __ret; } #else __ai int16x4x2_t vuzp_s16(int16x4_t __p0, int16x4_t __p1) { int16x4x2_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __builtin_neon_vuzp_v(&__ret, (int8x8_t)__rev0, (int8x8_t)__rev1, 1); __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly8x8x2_t vzip_p8(poly8x8_t __p0, poly8x8_t __p1) { poly8x8x2_t __ret; __builtin_neon_vzip_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 4); return __ret; } #else __ai poly8x8x2_t vzip_p8(poly8x8_t __p0, poly8x8_t __p1) { poly8x8x2_t __ret; poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __builtin_neon_vzip_v(&__ret, (int8x8_t)__rev0, (int8x8_t)__rev1, 4); __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly16x4x2_t vzip_p16(poly16x4_t __p0, poly16x4_t __p1) { poly16x4x2_t __ret; __builtin_neon_vzip_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 5); return __ret; } #else __ai poly16x4x2_t vzip_p16(poly16x4_t __p0, poly16x4_t __p1) { poly16x4x2_t __ret; poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); poly16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __builtin_neon_vzip_v(&__ret, (int8x8_t)__rev0, (int8x8_t)__rev1, 5); __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly8x16x2_t vzipq_p8(poly8x16_t __p0, poly8x16_t __p1) { poly8x16x2_t __ret; __builtin_neon_vzipq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 36); return __ret; } #else __ai poly8x16x2_t vzipq_p8(poly8x16_t __p0, poly8x16_t __p1) { poly8x16x2_t __ret; poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __builtin_neon_vzipq_v(&__ret, (int8x16_t)__rev0, (int8x16_t)__rev1, 36); __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly16x8x2_t vzipq_p16(poly16x8_t __p0, poly16x8_t __p1) { poly16x8x2_t __ret; __builtin_neon_vzipq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 37); return __ret; } #else __ai poly16x8x2_t vzipq_p16(poly16x8_t __p0, poly16x8_t __p1) { poly16x8x2_t __ret; poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); poly16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __builtin_neon_vzipq_v(&__ret, (int8x16_t)__rev0, (int8x16_t)__rev1, 37); __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16x2_t vzipq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16x2_t __ret; __builtin_neon_vzipq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 48); return __ret; } #else __ai uint8x16x2_t vzipq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16x2_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __builtin_neon_vzipq_v(&__ret, (int8x16_t)__rev0, (int8x16_t)__rev1, 48); __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4x2_t vzipq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4x2_t __ret; __builtin_neon_vzipq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 50); return __ret; } #else __ai uint32x4x2_t vzipq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4x2_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __builtin_neon_vzipq_v(&__ret, (int8x16_t)__rev0, (int8x16_t)__rev1, 50); __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8x2_t vzipq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8x2_t __ret; __builtin_neon_vzipq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 49); return __ret; } #else __ai uint16x8x2_t vzipq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8x2_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __builtin_neon_vzipq_v(&__ret, (int8x16_t)__rev0, (int8x16_t)__rev1, 49); __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x16x2_t vzipq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16x2_t __ret; __builtin_neon_vzipq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 32); return __ret; } #else __ai int8x16x2_t vzipq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16x2_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __builtin_neon_vzipq_v(&__ret, (int8x16_t)__rev0, (int8x16_t)__rev1, 32); __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x4x2_t vzipq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4x2_t __ret; __builtin_neon_vzipq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 41); return __ret; } #else __ai float32x4x2_t vzipq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4x2_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __builtin_neon_vzipq_v(&__ret, (int8x16_t)__rev0, (int8x16_t)__rev1, 41); __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4x2_t vzipq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4x2_t __ret; __builtin_neon_vzipq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 34); return __ret; } #else __ai int32x4x2_t vzipq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4x2_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __builtin_neon_vzipq_v(&__ret, (int8x16_t)__rev0, (int8x16_t)__rev1, 34); __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8x2_t vzipq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8x2_t __ret; __builtin_neon_vzipq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 33); return __ret; } #else __ai int16x8x2_t vzipq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8x2_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __builtin_neon_vzipq_v(&__ret, (int8x16_t)__rev0, (int8x16_t)__rev1, 33); __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8x2_t vzip_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8x2_t __ret; __builtin_neon_vzip_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 16); return __ret; } #else __ai uint8x8x2_t vzip_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8x2_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __builtin_neon_vzip_v(&__ret, (int8x8_t)__rev0, (int8x8_t)__rev1, 16); __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2x2_t vzip_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2x2_t __ret; __builtin_neon_vzip_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 18); return __ret; } #else __ai uint32x2x2_t vzip_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2x2_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __builtin_neon_vzip_v(&__ret, (int8x8_t)__rev0, (int8x8_t)__rev1, 18); __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x4x2_t vzip_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4x2_t __ret; __builtin_neon_vzip_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 17); return __ret; } #else __ai uint16x4x2_t vzip_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4x2_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __builtin_neon_vzip_v(&__ret, (int8x8_t)__rev0, (int8x8_t)__rev1, 17); __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8x2_t vzip_s8(int8x8_t __p0, int8x8_t __p1) { int8x8x2_t __ret; __builtin_neon_vzip_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 0); return __ret; } #else __ai int8x8x2_t vzip_s8(int8x8_t __p0, int8x8_t __p1) { int8x8x2_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __builtin_neon_vzip_v(&__ret, (int8x8_t)__rev0, (int8x8_t)__rev1, 0); __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x2x2_t vzip_f32(float32x2_t __p0, float32x2_t __p1) { float32x2x2_t __ret; __builtin_neon_vzip_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 9); return __ret; } #else __ai float32x2x2_t vzip_f32(float32x2_t __p0, float32x2_t __p1) { float32x2x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __builtin_neon_vzip_v(&__ret, (int8x8_t)__rev0, (int8x8_t)__rev1, 9); __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x2x2_t vzip_s32(int32x2_t __p0, int32x2_t __p1) { int32x2x2_t __ret; __builtin_neon_vzip_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 2); return __ret; } #else __ai int32x2x2_t vzip_s32(int32x2_t __p0, int32x2_t __p1) { int32x2x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __builtin_neon_vzip_v(&__ret, (int8x8_t)__rev0, (int8x8_t)__rev1, 2); __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x4x2_t vzip_s16(int16x4_t __p0, int16x4_t __p1) { int16x4x2_t __ret; __builtin_neon_vzip_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 1); return __ret; } #else __ai int16x4x2_t vzip_s16(int16x4_t __p0, int16x4_t __p1) { int16x4x2_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __builtin_neon_vzip_v(&__ret, (int8x8_t)__rev0, (int8x8_t)__rev1, 1); __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ #define splatq_lane_bf16(__p0, __p1) __extension__ ({ \ bfloat16x8_t __ret; \ bfloat16x4_t __s0 = __p0; \ __ret = (bfloat16x8_t) __builtin_neon_splatq_lane_bf16((int8x8_t)__s0, __p1, 11); \ __ret; \ }) #else #define splatq_lane_bf16(__p0, __p1) __extension__ ({ \ bfloat16x8_t __ret; \ bfloat16x4_t __s0 = __p0; \ bfloat16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = (bfloat16x8_t) __builtin_neon_splatq_lane_bf16((int8x8_t)__rev0, __p1, 11); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_splatq_lane_bf16(__p0, __p1) __extension__ ({ \ bfloat16x8_t __ret; \ bfloat16x4_t __s0 = __p0; \ __ret = (bfloat16x8_t) __builtin_neon_splatq_lane_bf16((int8x8_t)__s0, __p1, 11); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define splat_lane_bf16(__p0, __p1) __extension__ ({ \ bfloat16x4_t __ret; \ bfloat16x4_t __s0 = __p0; \ __ret = (bfloat16x4_t) __builtin_neon_splat_lane_bf16((int8x8_t)__s0, __p1, 11); \ __ret; \ }) #else #define splat_lane_bf16(__p0, __p1) __extension__ ({ \ bfloat16x4_t __ret; \ bfloat16x4_t __s0 = __p0; \ bfloat16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = (bfloat16x4_t) __builtin_neon_splat_lane_bf16((int8x8_t)__rev0, __p1, 11); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_splat_lane_bf16(__p0, __p1) __extension__ ({ \ bfloat16x4_t __ret; \ bfloat16x4_t __s0 = __p0; \ __ret = (bfloat16x4_t) __builtin_neon_splat_lane_bf16((int8x8_t)__s0, __p1, 11); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define splatq_laneq_bf16(__p0, __p1) __extension__ ({ \ bfloat16x8_t __ret; \ bfloat16x8_t __s0 = __p0; \ __ret = (bfloat16x8_t) __builtin_neon_splatq_laneq_bf16((int8x16_t)__s0, __p1, 43); \ __ret; \ }) #else #define splatq_laneq_bf16(__p0, __p1) __extension__ ({ \ bfloat16x8_t __ret; \ bfloat16x8_t __s0 = __p0; \ bfloat16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (bfloat16x8_t) __builtin_neon_splatq_laneq_bf16((int8x16_t)__rev0, __p1, 43); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_splatq_laneq_bf16(__p0, __p1) __extension__ ({ \ bfloat16x8_t __ret; \ bfloat16x8_t __s0 = __p0; \ __ret = (bfloat16x8_t) __builtin_neon_splatq_laneq_bf16((int8x16_t)__s0, __p1, 43); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define splat_laneq_bf16(__p0, __p1) __extension__ ({ \ bfloat16x4_t __ret; \ bfloat16x8_t __s0 = __p0; \ __ret = (bfloat16x4_t) __builtin_neon_splat_laneq_bf16((int8x16_t)__s0, __p1, 43); \ __ret; \ }) #else #define splat_laneq_bf16(__p0, __p1) __extension__ ({ \ bfloat16x4_t __ret; \ bfloat16x8_t __s0 = __p0; \ bfloat16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (bfloat16x4_t) __builtin_neon_splat_laneq_bf16((int8x16_t)__rev0, __p1, 43); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_splat_laneq_bf16(__p0, __p1) __extension__ ({ \ bfloat16x4_t __ret; \ bfloat16x8_t __s0 = __p0; \ __ret = (bfloat16x4_t) __builtin_neon_splat_laneq_bf16((int8x16_t)__s0, __p1, 43); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("bf16"))) float32x4_t vbfdotq_f32(float32x4_t __p0, bfloat16x8_t __p1, bfloat16x8_t __p2) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vbfdotq_f32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); return __ret; } #else __ai __attribute__((target("bf16"))) float32x4_t vbfdotq_f32(float32x4_t __p0, bfloat16x8_t __p1, bfloat16x8_t __p2) { float32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); bfloat16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); bfloat16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (float32x4_t) __builtin_neon_vbfdotq_f32((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 41); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai __attribute__((target("bf16"))) float32x4_t __noswap_vbfdotq_f32(float32x4_t __p0, bfloat16x8_t __p1, bfloat16x8_t __p2) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vbfdotq_f32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("bf16"))) float32x2_t vbfdot_f32(float32x2_t __p0, bfloat16x4_t __p1, bfloat16x4_t __p2) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vbfdot_f32((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 9); return __ret; } #else __ai __attribute__((target("bf16"))) float32x2_t vbfdot_f32(float32x2_t __p0, bfloat16x4_t __p1, bfloat16x4_t __p2) { float32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); bfloat16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); bfloat16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = (float32x2_t) __builtin_neon_vbfdot_f32((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 9); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } __ai __attribute__((target("bf16"))) float32x2_t __noswap_vbfdot_f32(float32x2_t __p0, bfloat16x4_t __p1, bfloat16x4_t __p2) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vbfdot_f32((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 9); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ #define vbfdotq_lane_f32(__p0_126, __p1_126, __p2_126, __p3_126) __extension__ ({ \ float32x4_t __ret_126; \ float32x4_t __s0_126 = __p0_126; \ bfloat16x8_t __s1_126 = __p1_126; \ bfloat16x4_t __s2_126 = __p2_126; \ bfloat16x4_t __reint_126 = __s2_126; \ float32x4_t __reint1_126 = splatq_lane_f32(*(float32x2_t *) &__reint_126, __p3_126); \ __ret_126 = vbfdotq_f32(__s0_126, __s1_126, *(bfloat16x8_t *) &__reint1_126); \ __ret_126; \ }) #else #define vbfdotq_lane_f32(__p0_127, __p1_127, __p2_127, __p3_127) __extension__ ({ \ float32x4_t __ret_127; \ float32x4_t __s0_127 = __p0_127; \ bfloat16x8_t __s1_127 = __p1_127; \ bfloat16x4_t __s2_127 = __p2_127; \ float32x4_t __rev0_127; __rev0_127 = __builtin_shufflevector(__s0_127, __s0_127, 3, 2, 1, 0); \ bfloat16x8_t __rev1_127; __rev1_127 = __builtin_shufflevector(__s1_127, __s1_127, 7, 6, 5, 4, 3, 2, 1, 0); \ bfloat16x4_t __rev2_127; __rev2_127 = __builtin_shufflevector(__s2_127, __s2_127, 3, 2, 1, 0); \ bfloat16x4_t __reint_127 = __rev2_127; \ float32x4_t __reint1_127 = __noswap_splatq_lane_f32(*(float32x2_t *) &__reint_127, __p3_127); \ __ret_127 = __noswap_vbfdotq_f32(__rev0_127, __rev1_127, *(bfloat16x8_t *) &__reint1_127); \ __ret_127 = __builtin_shufflevector(__ret_127, __ret_127, 3, 2, 1, 0); \ __ret_127; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vbfdot_lane_f32(__p0_128, __p1_128, __p2_128, __p3_128) __extension__ ({ \ float32x2_t __ret_128; \ float32x2_t __s0_128 = __p0_128; \ bfloat16x4_t __s1_128 = __p1_128; \ bfloat16x4_t __s2_128 = __p2_128; \ bfloat16x4_t __reint_128 = __s2_128; \ float32x2_t __reint1_128 = splat_lane_f32(*(float32x2_t *) &__reint_128, __p3_128); \ __ret_128 = vbfdot_f32(__s0_128, __s1_128, *(bfloat16x4_t *) &__reint1_128); \ __ret_128; \ }) #else #define vbfdot_lane_f32(__p0_129, __p1_129, __p2_129, __p3_129) __extension__ ({ \ float32x2_t __ret_129; \ float32x2_t __s0_129 = __p0_129; \ bfloat16x4_t __s1_129 = __p1_129; \ bfloat16x4_t __s2_129 = __p2_129; \ float32x2_t __rev0_129; __rev0_129 = __builtin_shufflevector(__s0_129, __s0_129, 1, 0); \ bfloat16x4_t __rev1_129; __rev1_129 = __builtin_shufflevector(__s1_129, __s1_129, 3, 2, 1, 0); \ bfloat16x4_t __rev2_129; __rev2_129 = __builtin_shufflevector(__s2_129, __s2_129, 3, 2, 1, 0); \ bfloat16x4_t __reint_129 = __rev2_129; \ float32x2_t __reint1_129 = __noswap_splat_lane_f32(*(float32x2_t *) &__reint_129, __p3_129); \ __ret_129 = __noswap_vbfdot_f32(__rev0_129, __rev1_129, *(bfloat16x4_t *) &__reint1_129); \ __ret_129 = __builtin_shufflevector(__ret_129, __ret_129, 1, 0); \ __ret_129; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vbfdotq_laneq_f32(__p0_130, __p1_130, __p2_130, __p3_130) __extension__ ({ \ float32x4_t __ret_130; \ float32x4_t __s0_130 = __p0_130; \ bfloat16x8_t __s1_130 = __p1_130; \ bfloat16x8_t __s2_130 = __p2_130; \ bfloat16x8_t __reint_130 = __s2_130; \ float32x4_t __reint1_130 = splatq_laneq_f32(*(float32x4_t *) &__reint_130, __p3_130); \ __ret_130 = vbfdotq_f32(__s0_130, __s1_130, *(bfloat16x8_t *) &__reint1_130); \ __ret_130; \ }) #else #define vbfdotq_laneq_f32(__p0_131, __p1_131, __p2_131, __p3_131) __extension__ ({ \ float32x4_t __ret_131; \ float32x4_t __s0_131 = __p0_131; \ bfloat16x8_t __s1_131 = __p1_131; \ bfloat16x8_t __s2_131 = __p2_131; \ float32x4_t __rev0_131; __rev0_131 = __builtin_shufflevector(__s0_131, __s0_131, 3, 2, 1, 0); \ bfloat16x8_t __rev1_131; __rev1_131 = __builtin_shufflevector(__s1_131, __s1_131, 7, 6, 5, 4, 3, 2, 1, 0); \ bfloat16x8_t __rev2_131; __rev2_131 = __builtin_shufflevector(__s2_131, __s2_131, 7, 6, 5, 4, 3, 2, 1, 0); \ bfloat16x8_t __reint_131 = __rev2_131; \ float32x4_t __reint1_131 = __noswap_splatq_laneq_f32(*(float32x4_t *) &__reint_131, __p3_131); \ __ret_131 = __noswap_vbfdotq_f32(__rev0_131, __rev1_131, *(bfloat16x8_t *) &__reint1_131); \ __ret_131 = __builtin_shufflevector(__ret_131, __ret_131, 3, 2, 1, 0); \ __ret_131; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vbfdot_laneq_f32(__p0_132, __p1_132, __p2_132, __p3_132) __extension__ ({ \ float32x2_t __ret_132; \ float32x2_t __s0_132 = __p0_132; \ bfloat16x4_t __s1_132 = __p1_132; \ bfloat16x8_t __s2_132 = __p2_132; \ bfloat16x8_t __reint_132 = __s2_132; \ float32x2_t __reint1_132 = splat_laneq_f32(*(float32x4_t *) &__reint_132, __p3_132); \ __ret_132 = vbfdot_f32(__s0_132, __s1_132, *(bfloat16x4_t *) &__reint1_132); \ __ret_132; \ }) #else #define vbfdot_laneq_f32(__p0_133, __p1_133, __p2_133, __p3_133) __extension__ ({ \ float32x2_t __ret_133; \ float32x2_t __s0_133 = __p0_133; \ bfloat16x4_t __s1_133 = __p1_133; \ bfloat16x8_t __s2_133 = __p2_133; \ float32x2_t __rev0_133; __rev0_133 = __builtin_shufflevector(__s0_133, __s0_133, 1, 0); \ bfloat16x4_t __rev1_133; __rev1_133 = __builtin_shufflevector(__s1_133, __s1_133, 3, 2, 1, 0); \ bfloat16x8_t __rev2_133; __rev2_133 = __builtin_shufflevector(__s2_133, __s2_133, 7, 6, 5, 4, 3, 2, 1, 0); \ bfloat16x8_t __reint_133 = __rev2_133; \ float32x2_t __reint1_133 = __noswap_splat_laneq_f32(*(float32x4_t *) &__reint_133, __p3_133); \ __ret_133 = __noswap_vbfdot_f32(__rev0_133, __rev1_133, *(bfloat16x4_t *) &__reint1_133); \ __ret_133 = __builtin_shufflevector(__ret_133, __ret_133, 1, 0); \ __ret_133; \ }) #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("bf16"))) float32x4_t vbfmlalbq_f32(float32x4_t __p0, bfloat16x8_t __p1, bfloat16x8_t __p2) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vbfmlalbq_f32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); return __ret; } #else __ai __attribute__((target("bf16"))) float32x4_t vbfmlalbq_f32(float32x4_t __p0, bfloat16x8_t __p1, bfloat16x8_t __p2) { float32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); bfloat16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); bfloat16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (float32x4_t) __builtin_neon_vbfmlalbq_f32((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 41); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai __attribute__((target("bf16"))) float32x4_t __noswap_vbfmlalbq_f32(float32x4_t __p0, bfloat16x8_t __p1, bfloat16x8_t __p2) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vbfmlalbq_f32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("bf16"))) float32x4_t vbfmlaltq_f32(float32x4_t __p0, bfloat16x8_t __p1, bfloat16x8_t __p2) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vbfmlaltq_f32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); return __ret; } #else __ai __attribute__((target("bf16"))) float32x4_t vbfmlaltq_f32(float32x4_t __p0, bfloat16x8_t __p1, bfloat16x8_t __p2) { float32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); bfloat16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); bfloat16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (float32x4_t) __builtin_neon_vbfmlaltq_f32((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 41); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai __attribute__((target("bf16"))) float32x4_t __noswap_vbfmlaltq_f32(float32x4_t __p0, bfloat16x8_t __p1, bfloat16x8_t __p2) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vbfmlaltq_f32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("bf16"))) float32x4_t vbfmmlaq_f32(float32x4_t __p0, bfloat16x8_t __p1, bfloat16x8_t __p2) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vbfmmlaq_f32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); return __ret; } #else __ai __attribute__((target("bf16"))) float32x4_t vbfmmlaq_f32(float32x4_t __p0, bfloat16x8_t __p1, bfloat16x8_t __p2) { float32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); bfloat16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); bfloat16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (float32x4_t) __builtin_neon_vbfmmlaq_f32((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 41); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("bf16"))) bfloat16x8_t vcombine_bf16(bfloat16x4_t __p0, bfloat16x4_t __p1) { bfloat16x8_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 1, 2, 3, 4, 5, 6, 7); return __ret; } #else __ai __attribute__((target("bf16"))) bfloat16x8_t vcombine_bf16(bfloat16x4_t __p0, bfloat16x4_t __p1) { bfloat16x8_t __ret; bfloat16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); bfloat16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 1, 2, 3, 4, 5, 6, 7); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } __ai __attribute__((target("bf16"))) bfloat16x8_t __noswap_vcombine_bf16(bfloat16x4_t __p0, bfloat16x4_t __p1) { bfloat16x8_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 1, 2, 3, 4, 5, 6, 7); return __ret; } #endif #define vcreate_bf16(__p0) __extension__ ({ \ bfloat16x4_t __ret; \ uint64_t __promote = __p0; \ __ret = (bfloat16x4_t)(__promote); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("bf16"))) float32x4_t vcvt_f32_bf16(bfloat16x4_t __p0_134) { float32x4_t __ret_134; bfloat16x4_t __reint_134 = __p0_134; int32x4_t __reint1_134 = vshll_n_s16(*(int16x4_t *) &__reint_134, 16); __ret_134 = *(float32x4_t *) &__reint1_134; return __ret_134; } #else __ai __attribute__((target("bf16"))) float32x4_t vcvt_f32_bf16(bfloat16x4_t __p0_135) { float32x4_t __ret_135; bfloat16x4_t __rev0_135; __rev0_135 = __builtin_shufflevector(__p0_135, __p0_135, 3, 2, 1, 0); bfloat16x4_t __reint_135 = __rev0_135; int32x4_t __reint1_135 = __noswap_vshll_n_s16(*(int16x4_t *) &__reint_135, 16); __ret_135 = *(float32x4_t *) &__reint1_135; __ret_135 = __builtin_shufflevector(__ret_135, __ret_135, 3, 2, 1, 0); return __ret_135; } __ai __attribute__((target("bf16"))) float32x4_t __noswap_vcvt_f32_bf16(bfloat16x4_t __p0_136) { float32x4_t __ret_136; bfloat16x4_t __reint_136 = __p0_136; int32x4_t __reint1_136 = __noswap_vshll_n_s16(*(int16x4_t *) &__reint_136, 16); __ret_136 = *(float32x4_t *) &__reint1_136; return __ret_136; } #endif __ai __attribute__((target("bf16"))) float32_t vcvtah_f32_bf16(bfloat16_t __p0) { float32_t __ret; bfloat16_t __reint = __p0; int32_t __reint1 = *(int32_t *) &__reint << 16; __ret = *(float32_t *) &__reint1; return __ret; } __ai __attribute__((target("bf16"))) bfloat16_t vcvth_bf16_f32(float32_t __p0) { bfloat16_t __ret; __ret = (bfloat16_t) __builtin_neon_vcvth_bf16_f32(__p0); return __ret; } #ifdef __LITTLE_ENDIAN__ #define vduph_lane_bf16(__p0, __p1) __extension__ ({ \ bfloat16_t __ret; \ bfloat16x4_t __s0 = __p0; \ __ret = (bfloat16_t) __builtin_neon_vduph_lane_bf16((bfloat16x4_t)__s0, __p1); \ __ret; \ }) #else #define vduph_lane_bf16(__p0, __p1) __extension__ ({ \ bfloat16_t __ret; \ bfloat16x4_t __s0 = __p0; \ bfloat16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = (bfloat16_t) __builtin_neon_vduph_lane_bf16((bfloat16x4_t)__rev0, __p1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vdupq_lane_bf16(__p0_137, __p1_137) __extension__ ({ \ bfloat16x8_t __ret_137; \ bfloat16x4_t __s0_137 = __p0_137; \ __ret_137 = splatq_lane_bf16(__s0_137, __p1_137); \ __ret_137; \ }) #else #define vdupq_lane_bf16(__p0_138, __p1_138) __extension__ ({ \ bfloat16x8_t __ret_138; \ bfloat16x4_t __s0_138 = __p0_138; \ bfloat16x4_t __rev0_138; __rev0_138 = __builtin_shufflevector(__s0_138, __s0_138, 3, 2, 1, 0); \ __ret_138 = __noswap_splatq_lane_bf16(__rev0_138, __p1_138); \ __ret_138 = __builtin_shufflevector(__ret_138, __ret_138, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_138; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vdup_lane_bf16(__p0_139, __p1_139) __extension__ ({ \ bfloat16x4_t __ret_139; \ bfloat16x4_t __s0_139 = __p0_139; \ __ret_139 = splat_lane_bf16(__s0_139, __p1_139); \ __ret_139; \ }) #else #define vdup_lane_bf16(__p0_140, __p1_140) __extension__ ({ \ bfloat16x4_t __ret_140; \ bfloat16x4_t __s0_140 = __p0_140; \ bfloat16x4_t __rev0_140; __rev0_140 = __builtin_shufflevector(__s0_140, __s0_140, 3, 2, 1, 0); \ __ret_140 = __noswap_splat_lane_bf16(__rev0_140, __p1_140); \ __ret_140 = __builtin_shufflevector(__ret_140, __ret_140, 3, 2, 1, 0); \ __ret_140; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vduph_laneq_bf16(__p0, __p1) __extension__ ({ \ bfloat16_t __ret; \ bfloat16x8_t __s0 = __p0; \ __ret = (bfloat16_t) __builtin_neon_vduph_laneq_bf16((bfloat16x8_t)__s0, __p1); \ __ret; \ }) #else #define vduph_laneq_bf16(__p0, __p1) __extension__ ({ \ bfloat16_t __ret; \ bfloat16x8_t __s0 = __p0; \ bfloat16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (bfloat16_t) __builtin_neon_vduph_laneq_bf16((bfloat16x8_t)__rev0, __p1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vdupq_laneq_bf16(__p0_141, __p1_141) __extension__ ({ \ bfloat16x8_t __ret_141; \ bfloat16x8_t __s0_141 = __p0_141; \ __ret_141 = splatq_laneq_bf16(__s0_141, __p1_141); \ __ret_141; \ }) #else #define vdupq_laneq_bf16(__p0_142, __p1_142) __extension__ ({ \ bfloat16x8_t __ret_142; \ bfloat16x8_t __s0_142 = __p0_142; \ bfloat16x8_t __rev0_142; __rev0_142 = __builtin_shufflevector(__s0_142, __s0_142, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_142 = __noswap_splatq_laneq_bf16(__rev0_142, __p1_142); \ __ret_142 = __builtin_shufflevector(__ret_142, __ret_142, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_142; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vdup_laneq_bf16(__p0_143, __p1_143) __extension__ ({ \ bfloat16x4_t __ret_143; \ bfloat16x8_t __s0_143 = __p0_143; \ __ret_143 = splat_laneq_bf16(__s0_143, __p1_143); \ __ret_143; \ }) #else #define vdup_laneq_bf16(__p0_144, __p1_144) __extension__ ({ \ bfloat16x4_t __ret_144; \ bfloat16x8_t __s0_144 = __p0_144; \ bfloat16x8_t __rev0_144; __rev0_144 = __builtin_shufflevector(__s0_144, __s0_144, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_144 = __noswap_splat_laneq_bf16(__rev0_144, __p1_144); \ __ret_144 = __builtin_shufflevector(__ret_144, __ret_144, 3, 2, 1, 0); \ __ret_144; \ }) #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("bf16"))) bfloat16x8_t vdupq_n_bf16(bfloat16_t __p0) { bfloat16x8_t __ret; __ret = (bfloat16x8_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; return __ret; } #else __ai __attribute__((target("bf16"))) bfloat16x8_t vdupq_n_bf16(bfloat16_t __p0) { bfloat16x8_t __ret; __ret = (bfloat16x8_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("bf16"))) bfloat16x4_t vdup_n_bf16(bfloat16_t __p0) { bfloat16x4_t __ret; __ret = (bfloat16x4_t) {__p0, __p0, __p0, __p0}; return __ret; } #else __ai __attribute__((target("bf16"))) bfloat16x4_t vdup_n_bf16(bfloat16_t __p0) { bfloat16x4_t __ret; __ret = (bfloat16x4_t) {__p0, __p0, __p0, __p0}; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("bf16"))) bfloat16x4_t vget_high_bf16(bfloat16x8_t __p0) { bfloat16x4_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 4, 5, 6, 7); return __ret; } #else __ai __attribute__((target("bf16"))) bfloat16x4_t vget_high_bf16(bfloat16x8_t __p0) { bfloat16x4_t __ret; bfloat16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev0, 4, 5, 6, 7); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai __attribute__((target("bf16"))) bfloat16x4_t __noswap_vget_high_bf16(bfloat16x8_t __p0) { bfloat16x4_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 4, 5, 6, 7); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ #define vgetq_lane_bf16(__p0, __p1) __extension__ ({ \ bfloat16_t __ret; \ bfloat16x8_t __s0 = __p0; \ __ret = (bfloat16_t) __builtin_neon_vgetq_lane_bf16((bfloat16x8_t)__s0, __p1); \ __ret; \ }) #else #define vgetq_lane_bf16(__p0, __p1) __extension__ ({ \ bfloat16_t __ret; \ bfloat16x8_t __s0 = __p0; \ bfloat16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (bfloat16_t) __builtin_neon_vgetq_lane_bf16((bfloat16x8_t)__rev0, __p1); \ __ret; \ }) #define __noswap_vgetq_lane_bf16(__p0, __p1) __extension__ ({ \ bfloat16_t __ret; \ bfloat16x8_t __s0 = __p0; \ __ret = (bfloat16_t) __builtin_neon_vgetq_lane_bf16((bfloat16x8_t)__s0, __p1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vget_lane_bf16(__p0, __p1) __extension__ ({ \ bfloat16_t __ret; \ bfloat16x4_t __s0 = __p0; \ __ret = (bfloat16_t) __builtin_neon_vget_lane_bf16((bfloat16x4_t)__s0, __p1); \ __ret; \ }) #else #define vget_lane_bf16(__p0, __p1) __extension__ ({ \ bfloat16_t __ret; \ bfloat16x4_t __s0 = __p0; \ bfloat16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = (bfloat16_t) __builtin_neon_vget_lane_bf16((bfloat16x4_t)__rev0, __p1); \ __ret; \ }) #define __noswap_vget_lane_bf16(__p0, __p1) __extension__ ({ \ bfloat16_t __ret; \ bfloat16x4_t __s0 = __p0; \ __ret = (bfloat16_t) __builtin_neon_vget_lane_bf16((bfloat16x4_t)__s0, __p1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("bf16"))) bfloat16x4_t vget_low_bf16(bfloat16x8_t __p0) { bfloat16x4_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 0, 1, 2, 3); return __ret; } #else __ai __attribute__((target("bf16"))) bfloat16x4_t vget_low_bf16(bfloat16x8_t __p0) { bfloat16x4_t __ret; bfloat16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev0, 0, 1, 2, 3); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai __attribute__((target("bf16"))) bfloat16x4_t __noswap_vget_low_bf16(bfloat16x8_t __p0) { bfloat16x4_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 0, 1, 2, 3); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_bf16(__p0) __extension__ ({ \ bfloat16x8_t __ret; \ __ret = (bfloat16x8_t) __builtin_neon_vld1q_bf16(__p0, 43); \ __ret; \ }) #else #define vld1q_bf16(__p0) __extension__ ({ \ bfloat16x8_t __ret; \ __ret = (bfloat16x8_t) __builtin_neon_vld1q_bf16(__p0, 43); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1_bf16(__p0) __extension__ ({ \ bfloat16x4_t __ret; \ __ret = (bfloat16x4_t) __builtin_neon_vld1_bf16(__p0, 11); \ __ret; \ }) #else #define vld1_bf16(__p0) __extension__ ({ \ bfloat16x4_t __ret; \ __ret = (bfloat16x4_t) __builtin_neon_vld1_bf16(__p0, 11); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_dup_bf16(__p0) __extension__ ({ \ bfloat16x8_t __ret; \ __ret = (bfloat16x8_t) __builtin_neon_vld1q_dup_bf16(__p0, 43); \ __ret; \ }) #else #define vld1q_dup_bf16(__p0) __extension__ ({ \ bfloat16x8_t __ret; \ __ret = (bfloat16x8_t) __builtin_neon_vld1q_dup_bf16(__p0, 43); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1_dup_bf16(__p0) __extension__ ({ \ bfloat16x4_t __ret; \ __ret = (bfloat16x4_t) __builtin_neon_vld1_dup_bf16(__p0, 11); \ __ret; \ }) #else #define vld1_dup_bf16(__p0) __extension__ ({ \ bfloat16x4_t __ret; \ __ret = (bfloat16x4_t) __builtin_neon_vld1_dup_bf16(__p0, 11); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_lane_bf16(__p0, __p1, __p2) __extension__ ({ \ bfloat16x8_t __ret; \ bfloat16x8_t __s1 = __p1; \ __ret = (bfloat16x8_t) __builtin_neon_vld1q_lane_bf16(__p0, (int8x16_t)__s1, __p2, 43); \ __ret; \ }) #else #define vld1q_lane_bf16(__p0, __p1, __p2) __extension__ ({ \ bfloat16x8_t __ret; \ bfloat16x8_t __s1 = __p1; \ bfloat16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (bfloat16x8_t) __builtin_neon_vld1q_lane_bf16(__p0, (int8x16_t)__rev1, __p2, 43); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1_lane_bf16(__p0, __p1, __p2) __extension__ ({ \ bfloat16x4_t __ret; \ bfloat16x4_t __s1 = __p1; \ __ret = (bfloat16x4_t) __builtin_neon_vld1_lane_bf16(__p0, (int8x8_t)__s1, __p2, 11); \ __ret; \ }) #else #define vld1_lane_bf16(__p0, __p1, __p2) __extension__ ({ \ bfloat16x4_t __ret; \ bfloat16x4_t __s1 = __p1; \ bfloat16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ __ret = (bfloat16x4_t) __builtin_neon_vld1_lane_bf16(__p0, (int8x8_t)__rev1, __p2, 11); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_bf16_x2(__p0) __extension__ ({ \ bfloat16x8x2_t __ret; \ __builtin_neon_vld1q_bf16_x2(&__ret, __p0, 43); \ __ret; \ }) #else #define vld1q_bf16_x2(__p0) __extension__ ({ \ bfloat16x8x2_t __ret; \ __builtin_neon_vld1q_bf16_x2(&__ret, __p0, 43); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1_bf16_x2(__p0) __extension__ ({ \ bfloat16x4x2_t __ret; \ __builtin_neon_vld1_bf16_x2(&__ret, __p0, 11); \ __ret; \ }) #else #define vld1_bf16_x2(__p0) __extension__ ({ \ bfloat16x4x2_t __ret; \ __builtin_neon_vld1_bf16_x2(&__ret, __p0, 11); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_bf16_x3(__p0) __extension__ ({ \ bfloat16x8x3_t __ret; \ __builtin_neon_vld1q_bf16_x3(&__ret, __p0, 43); \ __ret; \ }) #else #define vld1q_bf16_x3(__p0) __extension__ ({ \ bfloat16x8x3_t __ret; \ __builtin_neon_vld1q_bf16_x3(&__ret, __p0, 43); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1_bf16_x3(__p0) __extension__ ({ \ bfloat16x4x3_t __ret; \ __builtin_neon_vld1_bf16_x3(&__ret, __p0, 11); \ __ret; \ }) #else #define vld1_bf16_x3(__p0) __extension__ ({ \ bfloat16x4x3_t __ret; \ __builtin_neon_vld1_bf16_x3(&__ret, __p0, 11); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_bf16_x4(__p0) __extension__ ({ \ bfloat16x8x4_t __ret; \ __builtin_neon_vld1q_bf16_x4(&__ret, __p0, 43); \ __ret; \ }) #else #define vld1q_bf16_x4(__p0) __extension__ ({ \ bfloat16x8x4_t __ret; \ __builtin_neon_vld1q_bf16_x4(&__ret, __p0, 43); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1_bf16_x4(__p0) __extension__ ({ \ bfloat16x4x4_t __ret; \ __builtin_neon_vld1_bf16_x4(&__ret, __p0, 11); \ __ret; \ }) #else #define vld1_bf16_x4(__p0) __extension__ ({ \ bfloat16x4x4_t __ret; \ __builtin_neon_vld1_bf16_x4(&__ret, __p0, 11); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld2q_bf16(__p0) __extension__ ({ \ bfloat16x8x2_t __ret; \ __builtin_neon_vld2q_bf16(&__ret, __p0, 43); \ __ret; \ }) #else #define vld2q_bf16(__p0) __extension__ ({ \ bfloat16x8x2_t __ret; \ __builtin_neon_vld2q_bf16(&__ret, __p0, 43); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld2_bf16(__p0) __extension__ ({ \ bfloat16x4x2_t __ret; \ __builtin_neon_vld2_bf16(&__ret, __p0, 11); \ __ret; \ }) #else #define vld2_bf16(__p0) __extension__ ({ \ bfloat16x4x2_t __ret; \ __builtin_neon_vld2_bf16(&__ret, __p0, 11); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld2q_dup_bf16(__p0) __extension__ ({ \ bfloat16x8x2_t __ret; \ __builtin_neon_vld2q_dup_bf16(&__ret, __p0, 43); \ __ret; \ }) #else #define vld2q_dup_bf16(__p0) __extension__ ({ \ bfloat16x8x2_t __ret; \ __builtin_neon_vld2q_dup_bf16(&__ret, __p0, 43); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld2_dup_bf16(__p0) __extension__ ({ \ bfloat16x4x2_t __ret; \ __builtin_neon_vld2_dup_bf16(&__ret, __p0, 11); \ __ret; \ }) #else #define vld2_dup_bf16(__p0) __extension__ ({ \ bfloat16x4x2_t __ret; \ __builtin_neon_vld2_dup_bf16(&__ret, __p0, 11); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld2q_lane_bf16(__p0, __p1, __p2) __extension__ ({ \ bfloat16x8x2_t __ret; \ bfloat16x8x2_t __s1 = __p1; \ __builtin_neon_vld2q_lane_bf16(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 43); \ __ret; \ }) #else #define vld2q_lane_bf16(__p0, __p1, __p2) __extension__ ({ \ bfloat16x8x2_t __ret; \ bfloat16x8x2_t __s1 = __p1; \ bfloat16x8x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vld2q_lane_bf16(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 43); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld2_lane_bf16(__p0, __p1, __p2) __extension__ ({ \ bfloat16x4x2_t __ret; \ bfloat16x4x2_t __s1 = __p1; \ __builtin_neon_vld2_lane_bf16(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 11); \ __ret; \ }) #else #define vld2_lane_bf16(__p0, __p1, __p2) __extension__ ({ \ bfloat16x4x2_t __ret; \ bfloat16x4x2_t __s1 = __p1; \ bfloat16x4x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __builtin_neon_vld2_lane_bf16(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], __p2, 11); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld3q_bf16(__p0) __extension__ ({ \ bfloat16x8x3_t __ret; \ __builtin_neon_vld3q_bf16(&__ret, __p0, 43); \ __ret; \ }) #else #define vld3q_bf16(__p0) __extension__ ({ \ bfloat16x8x3_t __ret; \ __builtin_neon_vld3q_bf16(&__ret, __p0, 43); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld3_bf16(__p0) __extension__ ({ \ bfloat16x4x3_t __ret; \ __builtin_neon_vld3_bf16(&__ret, __p0, 11); \ __ret; \ }) #else #define vld3_bf16(__p0) __extension__ ({ \ bfloat16x4x3_t __ret; \ __builtin_neon_vld3_bf16(&__ret, __p0, 11); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld3q_dup_bf16(__p0) __extension__ ({ \ bfloat16x8x3_t __ret; \ __builtin_neon_vld3q_dup_bf16(&__ret, __p0, 43); \ __ret; \ }) #else #define vld3q_dup_bf16(__p0) __extension__ ({ \ bfloat16x8x3_t __ret; \ __builtin_neon_vld3q_dup_bf16(&__ret, __p0, 43); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld3_dup_bf16(__p0) __extension__ ({ \ bfloat16x4x3_t __ret; \ __builtin_neon_vld3_dup_bf16(&__ret, __p0, 11); \ __ret; \ }) #else #define vld3_dup_bf16(__p0) __extension__ ({ \ bfloat16x4x3_t __ret; \ __builtin_neon_vld3_dup_bf16(&__ret, __p0, 11); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld3q_lane_bf16(__p0, __p1, __p2) __extension__ ({ \ bfloat16x8x3_t __ret; \ bfloat16x8x3_t __s1 = __p1; \ __builtin_neon_vld3q_lane_bf16(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 43); \ __ret; \ }) #else #define vld3q_lane_bf16(__p0, __p1, __p2) __extension__ ({ \ bfloat16x8x3_t __ret; \ bfloat16x8x3_t __s1 = __p1; \ bfloat16x8x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vld3q_lane_bf16(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 43); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld3_lane_bf16(__p0, __p1, __p2) __extension__ ({ \ bfloat16x4x3_t __ret; \ bfloat16x4x3_t __s1 = __p1; \ __builtin_neon_vld3_lane_bf16(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 11); \ __ret; \ }) #else #define vld3_lane_bf16(__p0, __p1, __p2) __extension__ ({ \ bfloat16x4x3_t __ret; \ bfloat16x4x3_t __s1 = __p1; \ bfloat16x4x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __builtin_neon_vld3_lane_bf16(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], __p2, 11); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld4q_bf16(__p0) __extension__ ({ \ bfloat16x8x4_t __ret; \ __builtin_neon_vld4q_bf16(&__ret, __p0, 43); \ __ret; \ }) #else #define vld4q_bf16(__p0) __extension__ ({ \ bfloat16x8x4_t __ret; \ __builtin_neon_vld4q_bf16(&__ret, __p0, 43); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld4_bf16(__p0) __extension__ ({ \ bfloat16x4x4_t __ret; \ __builtin_neon_vld4_bf16(&__ret, __p0, 11); \ __ret; \ }) #else #define vld4_bf16(__p0) __extension__ ({ \ bfloat16x4x4_t __ret; \ __builtin_neon_vld4_bf16(&__ret, __p0, 11); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld4q_dup_bf16(__p0) __extension__ ({ \ bfloat16x8x4_t __ret; \ __builtin_neon_vld4q_dup_bf16(&__ret, __p0, 43); \ __ret; \ }) #else #define vld4q_dup_bf16(__p0) __extension__ ({ \ bfloat16x8x4_t __ret; \ __builtin_neon_vld4q_dup_bf16(&__ret, __p0, 43); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld4_dup_bf16(__p0) __extension__ ({ \ bfloat16x4x4_t __ret; \ __builtin_neon_vld4_dup_bf16(&__ret, __p0, 11); \ __ret; \ }) #else #define vld4_dup_bf16(__p0) __extension__ ({ \ bfloat16x4x4_t __ret; \ __builtin_neon_vld4_dup_bf16(&__ret, __p0, 11); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld4q_lane_bf16(__p0, __p1, __p2) __extension__ ({ \ bfloat16x8x4_t __ret; \ bfloat16x8x4_t __s1 = __p1; \ __builtin_neon_vld4q_lane_bf16(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 43); \ __ret; \ }) #else #define vld4q_lane_bf16(__p0, __p1, __p2) __extension__ ({ \ bfloat16x8x4_t __ret; \ bfloat16x8x4_t __s1 = __p1; \ bfloat16x8x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vld4q_lane_bf16(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 43); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld4_lane_bf16(__p0, __p1, __p2) __extension__ ({ \ bfloat16x4x4_t __ret; \ bfloat16x4x4_t __s1 = __p1; \ __builtin_neon_vld4_lane_bf16(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 11); \ __ret; \ }) #else #define vld4_lane_bf16(__p0, __p1, __p2) __extension__ ({ \ bfloat16x4x4_t __ret; \ bfloat16x4x4_t __s1 = __p1; \ bfloat16x4x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ __builtin_neon_vld4_lane_bf16(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], __p2, 11); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vsetq_lane_bf16(__p0, __p1, __p2) __extension__ ({ \ bfloat16x8_t __ret; \ bfloat16_t __s0 = __p0; \ bfloat16x8_t __s1 = __p1; \ __ret = (bfloat16x8_t) __builtin_neon_vsetq_lane_bf16(__s0, (bfloat16x8_t)__s1, __p2); \ __ret; \ }) #else #define vsetq_lane_bf16(__p0, __p1, __p2) __extension__ ({ \ bfloat16x8_t __ret; \ bfloat16_t __s0 = __p0; \ bfloat16x8_t __s1 = __p1; \ bfloat16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (bfloat16x8_t) __builtin_neon_vsetq_lane_bf16(__s0, (bfloat16x8_t)__rev1, __p2); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_vsetq_lane_bf16(__p0, __p1, __p2) __extension__ ({ \ bfloat16x8_t __ret; \ bfloat16_t __s0 = __p0; \ bfloat16x8_t __s1 = __p1; \ __ret = (bfloat16x8_t) __builtin_neon_vsetq_lane_bf16(__s0, (bfloat16x8_t)__s1, __p2); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vset_lane_bf16(__p0, __p1, __p2) __extension__ ({ \ bfloat16x4_t __ret; \ bfloat16_t __s0 = __p0; \ bfloat16x4_t __s1 = __p1; \ __ret = (bfloat16x4_t) __builtin_neon_vset_lane_bf16(__s0, (bfloat16x4_t)__s1, __p2); \ __ret; \ }) #else #define vset_lane_bf16(__p0, __p1, __p2) __extension__ ({ \ bfloat16x4_t __ret; \ bfloat16_t __s0 = __p0; \ bfloat16x4_t __s1 = __p1; \ bfloat16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ __ret = (bfloat16x4_t) __builtin_neon_vset_lane_bf16(__s0, (bfloat16x4_t)__rev1, __p2); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_vset_lane_bf16(__p0, __p1, __p2) __extension__ ({ \ bfloat16x4_t __ret; \ bfloat16_t __s0 = __p0; \ bfloat16x4_t __s1 = __p1; \ __ret = (bfloat16x4_t) __builtin_neon_vset_lane_bf16(__s0, (bfloat16x4_t)__s1, __p2); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_bf16(__p0, __p1) __extension__ ({ \ bfloat16x8_t __s1 = __p1; \ __builtin_neon_vst1q_bf16(__p0, (int8x16_t)__s1, 43); \ }) #else #define vst1q_bf16(__p0, __p1) __extension__ ({ \ bfloat16x8_t __s1 = __p1; \ bfloat16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst1q_bf16(__p0, (int8x16_t)__rev1, 43); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_bf16(__p0, __p1) __extension__ ({ \ bfloat16x4_t __s1 = __p1; \ __builtin_neon_vst1_bf16(__p0, (int8x8_t)__s1, 11); \ }) #else #define vst1_bf16(__p0, __p1) __extension__ ({ \ bfloat16x4_t __s1 = __p1; \ bfloat16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ __builtin_neon_vst1_bf16(__p0, (int8x8_t)__rev1, 11); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_lane_bf16(__p0, __p1, __p2) __extension__ ({ \ bfloat16x8_t __s1 = __p1; \ __builtin_neon_vst1q_lane_bf16(__p0, (int8x16_t)__s1, __p2, 43); \ }) #else #define vst1q_lane_bf16(__p0, __p1, __p2) __extension__ ({ \ bfloat16x8_t __s1 = __p1; \ bfloat16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst1q_lane_bf16(__p0, (int8x16_t)__rev1, __p2, 43); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_lane_bf16(__p0, __p1, __p2) __extension__ ({ \ bfloat16x4_t __s1 = __p1; \ __builtin_neon_vst1_lane_bf16(__p0, (int8x8_t)__s1, __p2, 11); \ }) #else #define vst1_lane_bf16(__p0, __p1, __p2) __extension__ ({ \ bfloat16x4_t __s1 = __p1; \ bfloat16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ __builtin_neon_vst1_lane_bf16(__p0, (int8x8_t)__rev1, __p2, 11); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_bf16_x2(__p0, __p1) __extension__ ({ \ bfloat16x8x2_t __s1 = __p1; \ __builtin_neon_vst1q_bf16_x2(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 43); \ }) #else #define vst1q_bf16_x2(__p0, __p1) __extension__ ({ \ bfloat16x8x2_t __s1 = __p1; \ bfloat16x8x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst1q_bf16_x2(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 43); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_bf16_x2(__p0, __p1) __extension__ ({ \ bfloat16x4x2_t __s1 = __p1; \ __builtin_neon_vst1_bf16_x2(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 11); \ }) #else #define vst1_bf16_x2(__p0, __p1) __extension__ ({ \ bfloat16x4x2_t __s1 = __p1; \ bfloat16x4x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __builtin_neon_vst1_bf16_x2(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], 11); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_bf16_x3(__p0, __p1) __extension__ ({ \ bfloat16x8x3_t __s1 = __p1; \ __builtin_neon_vst1q_bf16_x3(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 43); \ }) #else #define vst1q_bf16_x3(__p0, __p1) __extension__ ({ \ bfloat16x8x3_t __s1 = __p1; \ bfloat16x8x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst1q_bf16_x3(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 43); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_bf16_x3(__p0, __p1) __extension__ ({ \ bfloat16x4x3_t __s1 = __p1; \ __builtin_neon_vst1_bf16_x3(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 11); \ }) #else #define vst1_bf16_x3(__p0, __p1) __extension__ ({ \ bfloat16x4x3_t __s1 = __p1; \ bfloat16x4x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __builtin_neon_vst1_bf16_x3(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], 11); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_bf16_x4(__p0, __p1) __extension__ ({ \ bfloat16x8x4_t __s1 = __p1; \ __builtin_neon_vst1q_bf16_x4(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 43); \ }) #else #define vst1q_bf16_x4(__p0, __p1) __extension__ ({ \ bfloat16x8x4_t __s1 = __p1; \ bfloat16x8x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst1q_bf16_x4(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 43); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_bf16_x4(__p0, __p1) __extension__ ({ \ bfloat16x4x4_t __s1 = __p1; \ __builtin_neon_vst1_bf16_x4(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 11); \ }) #else #define vst1_bf16_x4(__p0, __p1) __extension__ ({ \ bfloat16x4x4_t __s1 = __p1; \ bfloat16x4x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ __builtin_neon_vst1_bf16_x4(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], 11); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2q_bf16(__p0, __p1) __extension__ ({ \ bfloat16x8x2_t __s1 = __p1; \ __builtin_neon_vst2q_bf16(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 43); \ }) #else #define vst2q_bf16(__p0, __p1) __extension__ ({ \ bfloat16x8x2_t __s1 = __p1; \ bfloat16x8x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst2q_bf16(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 43); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2_bf16(__p0, __p1) __extension__ ({ \ bfloat16x4x2_t __s1 = __p1; \ __builtin_neon_vst2_bf16(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 11); \ }) #else #define vst2_bf16(__p0, __p1) __extension__ ({ \ bfloat16x4x2_t __s1 = __p1; \ bfloat16x4x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __builtin_neon_vst2_bf16(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], 11); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2q_lane_bf16(__p0, __p1, __p2) __extension__ ({ \ bfloat16x8x2_t __s1 = __p1; \ __builtin_neon_vst2q_lane_bf16(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 43); \ }) #else #define vst2q_lane_bf16(__p0, __p1, __p2) __extension__ ({ \ bfloat16x8x2_t __s1 = __p1; \ bfloat16x8x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst2q_lane_bf16(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 43); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2_lane_bf16(__p0, __p1, __p2) __extension__ ({ \ bfloat16x4x2_t __s1 = __p1; \ __builtin_neon_vst2_lane_bf16(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 11); \ }) #else #define vst2_lane_bf16(__p0, __p1, __p2) __extension__ ({ \ bfloat16x4x2_t __s1 = __p1; \ bfloat16x4x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __builtin_neon_vst2_lane_bf16(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], __p2, 11); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3q_bf16(__p0, __p1) __extension__ ({ \ bfloat16x8x3_t __s1 = __p1; \ __builtin_neon_vst3q_bf16(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 43); \ }) #else #define vst3q_bf16(__p0, __p1) __extension__ ({ \ bfloat16x8x3_t __s1 = __p1; \ bfloat16x8x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst3q_bf16(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 43); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3_bf16(__p0, __p1) __extension__ ({ \ bfloat16x4x3_t __s1 = __p1; \ __builtin_neon_vst3_bf16(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 11); \ }) #else #define vst3_bf16(__p0, __p1) __extension__ ({ \ bfloat16x4x3_t __s1 = __p1; \ bfloat16x4x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __builtin_neon_vst3_bf16(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], 11); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3q_lane_bf16(__p0, __p1, __p2) __extension__ ({ \ bfloat16x8x3_t __s1 = __p1; \ __builtin_neon_vst3q_lane_bf16(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 43); \ }) #else #define vst3q_lane_bf16(__p0, __p1, __p2) __extension__ ({ \ bfloat16x8x3_t __s1 = __p1; \ bfloat16x8x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst3q_lane_bf16(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 43); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3_lane_bf16(__p0, __p1, __p2) __extension__ ({ \ bfloat16x4x3_t __s1 = __p1; \ __builtin_neon_vst3_lane_bf16(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 11); \ }) #else #define vst3_lane_bf16(__p0, __p1, __p2) __extension__ ({ \ bfloat16x4x3_t __s1 = __p1; \ bfloat16x4x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __builtin_neon_vst3_lane_bf16(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], __p2, 11); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4q_bf16(__p0, __p1) __extension__ ({ \ bfloat16x8x4_t __s1 = __p1; \ __builtin_neon_vst4q_bf16(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 43); \ }) #else #define vst4q_bf16(__p0, __p1) __extension__ ({ \ bfloat16x8x4_t __s1 = __p1; \ bfloat16x8x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst4q_bf16(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 43); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4_bf16(__p0, __p1) __extension__ ({ \ bfloat16x4x4_t __s1 = __p1; \ __builtin_neon_vst4_bf16(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 11); \ }) #else #define vst4_bf16(__p0, __p1) __extension__ ({ \ bfloat16x4x4_t __s1 = __p1; \ bfloat16x4x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ __builtin_neon_vst4_bf16(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], 11); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4q_lane_bf16(__p0, __p1, __p2) __extension__ ({ \ bfloat16x8x4_t __s1 = __p1; \ __builtin_neon_vst4q_lane_bf16(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 43); \ }) #else #define vst4q_lane_bf16(__p0, __p1, __p2) __extension__ ({ \ bfloat16x8x4_t __s1 = __p1; \ bfloat16x8x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst4q_lane_bf16(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 43); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4_lane_bf16(__p0, __p1, __p2) __extension__ ({ \ bfloat16x4x4_t __s1 = __p1; \ __builtin_neon_vst4_lane_bf16(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 11); \ }) #else #define vst4_lane_bf16(__p0, __p1, __p2) __extension__ ({ \ bfloat16x4x4_t __s1 = __p1; \ bfloat16x4x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ __builtin_neon_vst4_lane_bf16(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], __p2, 11); \ }) #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("dotprod"))) uint32x4_t vdotq_u32(uint32x4_t __p0, uint8x16_t __p1, uint8x16_t __p2) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vdotq_u32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 50); return __ret; } #else __ai __attribute__((target("dotprod"))) uint32x4_t vdotq_u32(uint32x4_t __p0, uint8x16_t __p1, uint8x16_t __p2) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint32x4_t) __builtin_neon_vdotq_u32((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 50); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai __attribute__((target("dotprod"))) uint32x4_t __noswap_vdotq_u32(uint32x4_t __p0, uint8x16_t __p1, uint8x16_t __p2) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vdotq_u32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 50); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("dotprod"))) int32x4_t vdotq_s32(int32x4_t __p0, int8x16_t __p1, int8x16_t __p2) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vdotq_s32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 34); return __ret; } #else __ai __attribute__((target("dotprod"))) int32x4_t vdotq_s32(int32x4_t __p0, int8x16_t __p1, int8x16_t __p2) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int32x4_t) __builtin_neon_vdotq_s32((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 34); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai __attribute__((target("dotprod"))) int32x4_t __noswap_vdotq_s32(int32x4_t __p0, int8x16_t __p1, int8x16_t __p2) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vdotq_s32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 34); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("dotprod"))) uint32x2_t vdot_u32(uint32x2_t __p0, uint8x8_t __p1, uint8x8_t __p2) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vdot_u32((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 18); return __ret; } #else __ai __attribute__((target("dotprod"))) uint32x2_t vdot_u32(uint32x2_t __p0, uint8x8_t __p1, uint8x8_t __p2) { uint32x2_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint32x2_t) __builtin_neon_vdot_u32((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 18); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } __ai __attribute__((target("dotprod"))) uint32x2_t __noswap_vdot_u32(uint32x2_t __p0, uint8x8_t __p1, uint8x8_t __p2) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vdot_u32((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 18); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("dotprod"))) int32x2_t vdot_s32(int32x2_t __p0, int8x8_t __p1, int8x8_t __p2) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vdot_s32((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 2); return __ret; } #else __ai __attribute__((target("dotprod"))) int32x2_t vdot_s32(int32x2_t __p0, int8x8_t __p1, int8x8_t __p2) { int32x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int32x2_t) __builtin_neon_vdot_s32((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } __ai __attribute__((target("dotprod"))) int32x2_t __noswap_vdot_s32(int32x2_t __p0, int8x8_t __p1, int8x8_t __p2) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vdot_s32((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 2); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ #define vdotq_lane_u32(__p0_145, __p1_145, __p2_145, __p3_145) __extension__ ({ \ uint32x4_t __ret_145; \ uint32x4_t __s0_145 = __p0_145; \ uint8x16_t __s1_145 = __p1_145; \ uint8x8_t __s2_145 = __p2_145; \ uint8x8_t __reint_145 = __s2_145; \ uint32x4_t __reint1_145 = splatq_lane_u32(*(uint32x2_t *) &__reint_145, __p3_145); \ __ret_145 = vdotq_u32(__s0_145, __s1_145, *(uint8x16_t *) &__reint1_145); \ __ret_145; \ }) #else #define vdotq_lane_u32(__p0_146, __p1_146, __p2_146, __p3_146) __extension__ ({ \ uint32x4_t __ret_146; \ uint32x4_t __s0_146 = __p0_146; \ uint8x16_t __s1_146 = __p1_146; \ uint8x8_t __s2_146 = __p2_146; \ uint32x4_t __rev0_146; __rev0_146 = __builtin_shufflevector(__s0_146, __s0_146, 3, 2, 1, 0); \ uint8x16_t __rev1_146; __rev1_146 = __builtin_shufflevector(__s1_146, __s1_146, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ uint8x8_t __rev2_146; __rev2_146 = __builtin_shufflevector(__s2_146, __s2_146, 7, 6, 5, 4, 3, 2, 1, 0); \ uint8x8_t __reint_146 = __rev2_146; \ uint32x4_t __reint1_146 = __noswap_splatq_lane_u32(*(uint32x2_t *) &__reint_146, __p3_146); \ __ret_146 = __noswap_vdotq_u32(__rev0_146, __rev1_146, *(uint8x16_t *) &__reint1_146); \ __ret_146 = __builtin_shufflevector(__ret_146, __ret_146, 3, 2, 1, 0); \ __ret_146; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vdotq_lane_s32(__p0_147, __p1_147, __p2_147, __p3_147) __extension__ ({ \ int32x4_t __ret_147; \ int32x4_t __s0_147 = __p0_147; \ int8x16_t __s1_147 = __p1_147; \ int8x8_t __s2_147 = __p2_147; \ int8x8_t __reint_147 = __s2_147; \ int32x4_t __reint1_147 = splatq_lane_s32(*(int32x2_t *) &__reint_147, __p3_147); \ __ret_147 = vdotq_s32(__s0_147, __s1_147, *(int8x16_t *) &__reint1_147); \ __ret_147; \ }) #else #define vdotq_lane_s32(__p0_148, __p1_148, __p2_148, __p3_148) __extension__ ({ \ int32x4_t __ret_148; \ int32x4_t __s0_148 = __p0_148; \ int8x16_t __s1_148 = __p1_148; \ int8x8_t __s2_148 = __p2_148; \ int32x4_t __rev0_148; __rev0_148 = __builtin_shufflevector(__s0_148, __s0_148, 3, 2, 1, 0); \ int8x16_t __rev1_148; __rev1_148 = __builtin_shufflevector(__s1_148, __s1_148, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ int8x8_t __rev2_148; __rev2_148 = __builtin_shufflevector(__s2_148, __s2_148, 7, 6, 5, 4, 3, 2, 1, 0); \ int8x8_t __reint_148 = __rev2_148; \ int32x4_t __reint1_148 = __noswap_splatq_lane_s32(*(int32x2_t *) &__reint_148, __p3_148); \ __ret_148 = __noswap_vdotq_s32(__rev0_148, __rev1_148, *(int8x16_t *) &__reint1_148); \ __ret_148 = __builtin_shufflevector(__ret_148, __ret_148, 3, 2, 1, 0); \ __ret_148; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vdot_lane_u32(__p0_149, __p1_149, __p2_149, __p3_149) __extension__ ({ \ uint32x2_t __ret_149; \ uint32x2_t __s0_149 = __p0_149; \ uint8x8_t __s1_149 = __p1_149; \ uint8x8_t __s2_149 = __p2_149; \ uint8x8_t __reint_149 = __s2_149; \ uint32x2_t __reint1_149 = splat_lane_u32(*(uint32x2_t *) &__reint_149, __p3_149); \ __ret_149 = vdot_u32(__s0_149, __s1_149, *(uint8x8_t *) &__reint1_149); \ __ret_149; \ }) #else #define vdot_lane_u32(__p0_150, __p1_150, __p2_150, __p3_150) __extension__ ({ \ uint32x2_t __ret_150; \ uint32x2_t __s0_150 = __p0_150; \ uint8x8_t __s1_150 = __p1_150; \ uint8x8_t __s2_150 = __p2_150; \ uint32x2_t __rev0_150; __rev0_150 = __builtin_shufflevector(__s0_150, __s0_150, 1, 0); \ uint8x8_t __rev1_150; __rev1_150 = __builtin_shufflevector(__s1_150, __s1_150, 7, 6, 5, 4, 3, 2, 1, 0); \ uint8x8_t __rev2_150; __rev2_150 = __builtin_shufflevector(__s2_150, __s2_150, 7, 6, 5, 4, 3, 2, 1, 0); \ uint8x8_t __reint_150 = __rev2_150; \ uint32x2_t __reint1_150 = __noswap_splat_lane_u32(*(uint32x2_t *) &__reint_150, __p3_150); \ __ret_150 = __noswap_vdot_u32(__rev0_150, __rev1_150, *(uint8x8_t *) &__reint1_150); \ __ret_150 = __builtin_shufflevector(__ret_150, __ret_150, 1, 0); \ __ret_150; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vdot_lane_s32(__p0_151, __p1_151, __p2_151, __p3_151) __extension__ ({ \ int32x2_t __ret_151; \ int32x2_t __s0_151 = __p0_151; \ int8x8_t __s1_151 = __p1_151; \ int8x8_t __s2_151 = __p2_151; \ int8x8_t __reint_151 = __s2_151; \ int32x2_t __reint1_151 = splat_lane_s32(*(int32x2_t *) &__reint_151, __p3_151); \ __ret_151 = vdot_s32(__s0_151, __s1_151, *(int8x8_t *) &__reint1_151); \ __ret_151; \ }) #else #define vdot_lane_s32(__p0_152, __p1_152, __p2_152, __p3_152) __extension__ ({ \ int32x2_t __ret_152; \ int32x2_t __s0_152 = __p0_152; \ int8x8_t __s1_152 = __p1_152; \ int8x8_t __s2_152 = __p2_152; \ int32x2_t __rev0_152; __rev0_152 = __builtin_shufflevector(__s0_152, __s0_152, 1, 0); \ int8x8_t __rev1_152; __rev1_152 = __builtin_shufflevector(__s1_152, __s1_152, 7, 6, 5, 4, 3, 2, 1, 0); \ int8x8_t __rev2_152; __rev2_152 = __builtin_shufflevector(__s2_152, __s2_152, 7, 6, 5, 4, 3, 2, 1, 0); \ int8x8_t __reint_152 = __rev2_152; \ int32x2_t __reint1_152 = __noswap_splat_lane_s32(*(int32x2_t *) &__reint_152, __p3_152); \ __ret_152 = __noswap_vdot_s32(__rev0_152, __rev1_152, *(int8x8_t *) &__reint1_152); \ __ret_152 = __builtin_shufflevector(__ret_152, __ret_152, 1, 0); \ __ret_152; \ }) #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x8_t vabdq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; __ret = (float16x8_t) __builtin_neon_vabdq_f16((int8x16_t)__p0, (int8x16_t)__p1, 40); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x8_t vabdq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (float16x8_t) __builtin_neon_vabdq_f16((int8x16_t)__rev0, (int8x16_t)__rev1, 40); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x4_t vabd_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; __ret = (float16x4_t) __builtin_neon_vabd_f16((int8x8_t)__p0, (int8x8_t)__p1, 8); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x4_t vabd_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (float16x4_t) __builtin_neon_vabd_f16((int8x8_t)__rev0, (int8x8_t)__rev1, 8); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x8_t vabsq_f16(float16x8_t __p0) { float16x8_t __ret; __ret = (float16x8_t) __builtin_neon_vabsq_f16((int8x16_t)__p0, 40); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x8_t vabsq_f16(float16x8_t __p0) { float16x8_t __ret; float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (float16x8_t) __builtin_neon_vabsq_f16((int8x16_t)__rev0, 40); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x4_t vabs_f16(float16x4_t __p0) { float16x4_t __ret; __ret = (float16x4_t) __builtin_neon_vabs_f16((int8x8_t)__p0, 8); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x4_t vabs_f16(float16x4_t __p0) { float16x4_t __ret; float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (float16x4_t) __builtin_neon_vabs_f16((int8x8_t)__rev0, 8); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x8_t vaddq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; __ret = __p0 + __p1; return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x8_t vaddq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 + __rev1; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x4_t vadd_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; __ret = __p0 + __p1; return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x4_t vadd_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __rev0 + __rev1; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x8_t vbslq_f16(uint16x8_t __p0, float16x8_t __p1, float16x8_t __p2) { float16x8_t __ret; __ret = (float16x8_t) __builtin_neon_vbslq_f16((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 40); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x8_t vbslq_f16(uint16x8_t __p0, float16x8_t __p1, float16x8_t __p2) { float16x8_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (float16x8_t) __builtin_neon_vbslq_f16((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 40); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x4_t vbsl_f16(uint16x4_t __p0, float16x4_t __p1, float16x4_t __p2) { float16x4_t __ret; __ret = (float16x4_t) __builtin_neon_vbsl_f16((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 8); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x4_t vbsl_f16(uint16x4_t __p0, float16x4_t __p1, float16x4_t __p2) { float16x4_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); float16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = (float16x4_t) __builtin_neon_vbsl_f16((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 8); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) uint16x8_t vcageq_f16(float16x8_t __p0, float16x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vcageq_f16((int8x16_t)__p0, (int8x16_t)__p1, 49); return __ret; } #else __ai __attribute__((target("fullfp16"))) uint16x8_t vcageq_f16(float16x8_t __p0, float16x8_t __p1) { uint16x8_t __ret; float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint16x8_t) __builtin_neon_vcageq_f16((int8x16_t)__rev0, (int8x16_t)__rev1, 49); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) uint16x4_t vcage_f16(float16x4_t __p0, float16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vcage_f16((int8x8_t)__p0, (int8x8_t)__p1, 17); return __ret; } #else __ai __attribute__((target("fullfp16"))) uint16x4_t vcage_f16(float16x4_t __p0, float16x4_t __p1) { uint16x4_t __ret; float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint16x4_t) __builtin_neon_vcage_f16((int8x8_t)__rev0, (int8x8_t)__rev1, 17); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) uint16x8_t vcagtq_f16(float16x8_t __p0, float16x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vcagtq_f16((int8x16_t)__p0, (int8x16_t)__p1, 49); return __ret; } #else __ai __attribute__((target("fullfp16"))) uint16x8_t vcagtq_f16(float16x8_t __p0, float16x8_t __p1) { uint16x8_t __ret; float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint16x8_t) __builtin_neon_vcagtq_f16((int8x16_t)__rev0, (int8x16_t)__rev1, 49); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) uint16x4_t vcagt_f16(float16x4_t __p0, float16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vcagt_f16((int8x8_t)__p0, (int8x8_t)__p1, 17); return __ret; } #else __ai __attribute__((target("fullfp16"))) uint16x4_t vcagt_f16(float16x4_t __p0, float16x4_t __p1) { uint16x4_t __ret; float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint16x4_t) __builtin_neon_vcagt_f16((int8x8_t)__rev0, (int8x8_t)__rev1, 17); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) uint16x8_t vcaleq_f16(float16x8_t __p0, float16x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vcaleq_f16((int8x16_t)__p0, (int8x16_t)__p1, 49); return __ret; } #else __ai __attribute__((target("fullfp16"))) uint16x8_t vcaleq_f16(float16x8_t __p0, float16x8_t __p1) { uint16x8_t __ret; float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint16x8_t) __builtin_neon_vcaleq_f16((int8x16_t)__rev0, (int8x16_t)__rev1, 49); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) uint16x4_t vcale_f16(float16x4_t __p0, float16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vcale_f16((int8x8_t)__p0, (int8x8_t)__p1, 17); return __ret; } #else __ai __attribute__((target("fullfp16"))) uint16x4_t vcale_f16(float16x4_t __p0, float16x4_t __p1) { uint16x4_t __ret; float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint16x4_t) __builtin_neon_vcale_f16((int8x8_t)__rev0, (int8x8_t)__rev1, 17); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) uint16x8_t vcaltq_f16(float16x8_t __p0, float16x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vcaltq_f16((int8x16_t)__p0, (int8x16_t)__p1, 49); return __ret; } #else __ai __attribute__((target("fullfp16"))) uint16x8_t vcaltq_f16(float16x8_t __p0, float16x8_t __p1) { uint16x8_t __ret; float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint16x8_t) __builtin_neon_vcaltq_f16((int8x16_t)__rev0, (int8x16_t)__rev1, 49); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) uint16x4_t vcalt_f16(float16x4_t __p0, float16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vcalt_f16((int8x8_t)__p0, (int8x8_t)__p1, 17); return __ret; } #else __ai __attribute__((target("fullfp16"))) uint16x4_t vcalt_f16(float16x4_t __p0, float16x4_t __p1) { uint16x4_t __ret; float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint16x4_t) __builtin_neon_vcalt_f16((int8x8_t)__rev0, (int8x8_t)__rev1, 17); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) uint16x8_t vceqq_f16(float16x8_t __p0, float16x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0 == __p1); return __ret; } #else __ai __attribute__((target("fullfp16"))) uint16x8_t vceqq_f16(float16x8_t __p0, float16x8_t __p1) { uint16x8_t __ret; float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint16x8_t)(__rev0 == __rev1); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) uint16x4_t vceq_f16(float16x4_t __p0, float16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0 == __p1); return __ret; } #else __ai __attribute__((target("fullfp16"))) uint16x4_t vceq_f16(float16x4_t __p0, float16x4_t __p1) { uint16x4_t __ret; float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint16x4_t)(__rev0 == __rev1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) uint16x8_t vceqzq_f16(float16x8_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vceqzq_f16((int8x16_t)__p0, 49); return __ret; } #else __ai __attribute__((target("fullfp16"))) uint16x8_t vceqzq_f16(float16x8_t __p0) { uint16x8_t __ret; float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint16x8_t) __builtin_neon_vceqzq_f16((int8x16_t)__rev0, 49); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) uint16x4_t vceqz_f16(float16x4_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vceqz_f16((int8x8_t)__p0, 17); return __ret; } #else __ai __attribute__((target("fullfp16"))) uint16x4_t vceqz_f16(float16x4_t __p0) { uint16x4_t __ret; float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (uint16x4_t) __builtin_neon_vceqz_f16((int8x8_t)__rev0, 17); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) uint16x8_t vcgeq_f16(float16x8_t __p0, float16x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0 >= __p1); return __ret; } #else __ai __attribute__((target("fullfp16"))) uint16x8_t vcgeq_f16(float16x8_t __p0, float16x8_t __p1) { uint16x8_t __ret; float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint16x8_t)(__rev0 >= __rev1); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) uint16x4_t vcge_f16(float16x4_t __p0, float16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0 >= __p1); return __ret; } #else __ai __attribute__((target("fullfp16"))) uint16x4_t vcge_f16(float16x4_t __p0, float16x4_t __p1) { uint16x4_t __ret; float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint16x4_t)(__rev0 >= __rev1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) uint16x8_t vcgezq_f16(float16x8_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vcgezq_f16((int8x16_t)__p0, 49); return __ret; } #else __ai __attribute__((target("fullfp16"))) uint16x8_t vcgezq_f16(float16x8_t __p0) { uint16x8_t __ret; float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint16x8_t) __builtin_neon_vcgezq_f16((int8x16_t)__rev0, 49); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) uint16x4_t vcgez_f16(float16x4_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vcgez_f16((int8x8_t)__p0, 17); return __ret; } #else __ai __attribute__((target("fullfp16"))) uint16x4_t vcgez_f16(float16x4_t __p0) { uint16x4_t __ret; float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (uint16x4_t) __builtin_neon_vcgez_f16((int8x8_t)__rev0, 17); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) uint16x8_t vcgtq_f16(float16x8_t __p0, float16x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0 > __p1); return __ret; } #else __ai __attribute__((target("fullfp16"))) uint16x8_t vcgtq_f16(float16x8_t __p0, float16x8_t __p1) { uint16x8_t __ret; float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint16x8_t)(__rev0 > __rev1); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) uint16x4_t vcgt_f16(float16x4_t __p0, float16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0 > __p1); return __ret; } #else __ai __attribute__((target("fullfp16"))) uint16x4_t vcgt_f16(float16x4_t __p0, float16x4_t __p1) { uint16x4_t __ret; float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint16x4_t)(__rev0 > __rev1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) uint16x8_t vcgtzq_f16(float16x8_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vcgtzq_f16((int8x16_t)__p0, 49); return __ret; } #else __ai __attribute__((target("fullfp16"))) uint16x8_t vcgtzq_f16(float16x8_t __p0) { uint16x8_t __ret; float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint16x8_t) __builtin_neon_vcgtzq_f16((int8x16_t)__rev0, 49); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) uint16x4_t vcgtz_f16(float16x4_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vcgtz_f16((int8x8_t)__p0, 17); return __ret; } #else __ai __attribute__((target("fullfp16"))) uint16x4_t vcgtz_f16(float16x4_t __p0) { uint16x4_t __ret; float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (uint16x4_t) __builtin_neon_vcgtz_f16((int8x8_t)__rev0, 17); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) uint16x8_t vcleq_f16(float16x8_t __p0, float16x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0 <= __p1); return __ret; } #else __ai __attribute__((target("fullfp16"))) uint16x8_t vcleq_f16(float16x8_t __p0, float16x8_t __p1) { uint16x8_t __ret; float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint16x8_t)(__rev0 <= __rev1); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) uint16x4_t vcle_f16(float16x4_t __p0, float16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0 <= __p1); return __ret; } #else __ai __attribute__((target("fullfp16"))) uint16x4_t vcle_f16(float16x4_t __p0, float16x4_t __p1) { uint16x4_t __ret; float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint16x4_t)(__rev0 <= __rev1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) uint16x8_t vclezq_f16(float16x8_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vclezq_f16((int8x16_t)__p0, 49); return __ret; } #else __ai __attribute__((target("fullfp16"))) uint16x8_t vclezq_f16(float16x8_t __p0) { uint16x8_t __ret; float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint16x8_t) __builtin_neon_vclezq_f16((int8x16_t)__rev0, 49); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) uint16x4_t vclez_f16(float16x4_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vclez_f16((int8x8_t)__p0, 17); return __ret; } #else __ai __attribute__((target("fullfp16"))) uint16x4_t vclez_f16(float16x4_t __p0) { uint16x4_t __ret; float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (uint16x4_t) __builtin_neon_vclez_f16((int8x8_t)__rev0, 17); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) uint16x8_t vcltq_f16(float16x8_t __p0, float16x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0 < __p1); return __ret; } #else __ai __attribute__((target("fullfp16"))) uint16x8_t vcltq_f16(float16x8_t __p0, float16x8_t __p1) { uint16x8_t __ret; float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint16x8_t)(__rev0 < __rev1); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) uint16x4_t vclt_f16(float16x4_t __p0, float16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0 < __p1); return __ret; } #else __ai __attribute__((target("fullfp16"))) uint16x4_t vclt_f16(float16x4_t __p0, float16x4_t __p1) { uint16x4_t __ret; float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint16x4_t)(__rev0 < __rev1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) uint16x8_t vcltzq_f16(float16x8_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vcltzq_f16((int8x16_t)__p0, 49); return __ret; } #else __ai __attribute__((target("fullfp16"))) uint16x8_t vcltzq_f16(float16x8_t __p0) { uint16x8_t __ret; float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint16x8_t) __builtin_neon_vcltzq_f16((int8x16_t)__rev0, 49); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) uint16x4_t vcltz_f16(float16x4_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vcltz_f16((int8x8_t)__p0, 17); return __ret; } #else __ai __attribute__((target("fullfp16"))) uint16x4_t vcltz_f16(float16x4_t __p0) { uint16x4_t __ret; float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (uint16x4_t) __builtin_neon_vcltz_f16((int8x8_t)__rev0, 17); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x8_t vcvtq_f16_u16(uint16x8_t __p0) { float16x8_t __ret; __ret = (float16x8_t) __builtin_neon_vcvtq_f16_u16((int8x16_t)__p0, 49); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x8_t vcvtq_f16_u16(uint16x8_t __p0) { float16x8_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (float16x8_t) __builtin_neon_vcvtq_f16_u16((int8x16_t)__rev0, 49); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x8_t vcvtq_f16_s16(int16x8_t __p0) { float16x8_t __ret; __ret = (float16x8_t) __builtin_neon_vcvtq_f16_s16((int8x16_t)__p0, 33); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x8_t vcvtq_f16_s16(int16x8_t __p0) { float16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (float16x8_t) __builtin_neon_vcvtq_f16_s16((int8x16_t)__rev0, 33); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x4_t vcvt_f16_u16(uint16x4_t __p0) { float16x4_t __ret; __ret = (float16x4_t) __builtin_neon_vcvt_f16_u16((int8x8_t)__p0, 17); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x4_t vcvt_f16_u16(uint16x4_t __p0) { float16x4_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (float16x4_t) __builtin_neon_vcvt_f16_u16((int8x8_t)__rev0, 17); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x4_t vcvt_f16_s16(int16x4_t __p0) { float16x4_t __ret; __ret = (float16x4_t) __builtin_neon_vcvt_f16_s16((int8x8_t)__p0, 1); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x4_t vcvt_f16_s16(int16x4_t __p0) { float16x4_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (float16x4_t) __builtin_neon_vcvt_f16_s16((int8x8_t)__rev0, 1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ #define vcvtq_n_f16_u16(__p0, __p1) __extension__ ({ \ float16x8_t __ret; \ uint16x8_t __s0 = __p0; \ __ret = (float16x8_t) __builtin_neon_vcvtq_n_f16_u16((int8x16_t)__s0, __p1, 49); \ __ret; \ }) #else #define vcvtq_n_f16_u16(__p0, __p1) __extension__ ({ \ float16x8_t __ret; \ uint16x8_t __s0 = __p0; \ uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (float16x8_t) __builtin_neon_vcvtq_n_f16_u16((int8x16_t)__rev0, __p1, 49); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcvtq_n_f16_s16(__p0, __p1) __extension__ ({ \ float16x8_t __ret; \ int16x8_t __s0 = __p0; \ __ret = (float16x8_t) __builtin_neon_vcvtq_n_f16_s16((int8x16_t)__s0, __p1, 33); \ __ret; \ }) #else #define vcvtq_n_f16_s16(__p0, __p1) __extension__ ({ \ float16x8_t __ret; \ int16x8_t __s0 = __p0; \ int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (float16x8_t) __builtin_neon_vcvtq_n_f16_s16((int8x16_t)__rev0, __p1, 33); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcvt_n_f16_u16(__p0, __p1) __extension__ ({ \ float16x4_t __ret; \ uint16x4_t __s0 = __p0; \ __ret = (float16x4_t) __builtin_neon_vcvt_n_f16_u16((int8x8_t)__s0, __p1, 17); \ __ret; \ }) #else #define vcvt_n_f16_u16(__p0, __p1) __extension__ ({ \ float16x4_t __ret; \ uint16x4_t __s0 = __p0; \ uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = (float16x4_t) __builtin_neon_vcvt_n_f16_u16((int8x8_t)__rev0, __p1, 17); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcvt_n_f16_s16(__p0, __p1) __extension__ ({ \ float16x4_t __ret; \ int16x4_t __s0 = __p0; \ __ret = (float16x4_t) __builtin_neon_vcvt_n_f16_s16((int8x8_t)__s0, __p1, 1); \ __ret; \ }) #else #define vcvt_n_f16_s16(__p0, __p1) __extension__ ({ \ float16x4_t __ret; \ int16x4_t __s0 = __p0; \ int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = (float16x4_t) __builtin_neon_vcvt_n_f16_s16((int8x8_t)__rev0, __p1, 1); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcvtq_n_s16_f16(__p0, __p1) __extension__ ({ \ int16x8_t __ret; \ float16x8_t __s0 = __p0; \ __ret = (int16x8_t) __builtin_neon_vcvtq_n_s16_f16((int8x16_t)__s0, __p1, 33); \ __ret; \ }) #else #define vcvtq_n_s16_f16(__p0, __p1) __extension__ ({ \ int16x8_t __ret; \ float16x8_t __s0 = __p0; \ float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (int16x8_t) __builtin_neon_vcvtq_n_s16_f16((int8x16_t)__rev0, __p1, 33); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcvt_n_s16_f16(__p0, __p1) __extension__ ({ \ int16x4_t __ret; \ float16x4_t __s0 = __p0; \ __ret = (int16x4_t) __builtin_neon_vcvt_n_s16_f16((int8x8_t)__s0, __p1, 1); \ __ret; \ }) #else #define vcvt_n_s16_f16(__p0, __p1) __extension__ ({ \ int16x4_t __ret; \ float16x4_t __s0 = __p0; \ float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = (int16x4_t) __builtin_neon_vcvt_n_s16_f16((int8x8_t)__rev0, __p1, 1); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcvtq_n_u16_f16(__p0, __p1) __extension__ ({ \ uint16x8_t __ret; \ float16x8_t __s0 = __p0; \ __ret = (uint16x8_t) __builtin_neon_vcvtq_n_u16_f16((int8x16_t)__s0, __p1, 49); \ __ret; \ }) #else #define vcvtq_n_u16_f16(__p0, __p1) __extension__ ({ \ uint16x8_t __ret; \ float16x8_t __s0 = __p0; \ float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (uint16x8_t) __builtin_neon_vcvtq_n_u16_f16((int8x16_t)__rev0, __p1, 49); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcvt_n_u16_f16(__p0, __p1) __extension__ ({ \ uint16x4_t __ret; \ float16x4_t __s0 = __p0; \ __ret = (uint16x4_t) __builtin_neon_vcvt_n_u16_f16((int8x8_t)__s0, __p1, 17); \ __ret; \ }) #else #define vcvt_n_u16_f16(__p0, __p1) __extension__ ({ \ uint16x4_t __ret; \ float16x4_t __s0 = __p0; \ float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = (uint16x4_t) __builtin_neon_vcvt_n_u16_f16((int8x8_t)__rev0, __p1, 17); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) int16x8_t vcvtq_s16_f16(float16x8_t __p0) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vcvtq_s16_f16((int8x16_t)__p0, 33); return __ret; } #else __ai __attribute__((target("fullfp16"))) int16x8_t vcvtq_s16_f16(float16x8_t __p0) { int16x8_t __ret; float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int16x8_t) __builtin_neon_vcvtq_s16_f16((int8x16_t)__rev0, 33); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) int16x4_t vcvt_s16_f16(float16x4_t __p0) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vcvt_s16_f16((int8x8_t)__p0, 1); return __ret; } #else __ai __attribute__((target("fullfp16"))) int16x4_t vcvt_s16_f16(float16x4_t __p0) { int16x4_t __ret; float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (int16x4_t) __builtin_neon_vcvt_s16_f16((int8x8_t)__rev0, 1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) uint16x8_t vcvtq_u16_f16(float16x8_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vcvtq_u16_f16((int8x16_t)__p0, 49); return __ret; } #else __ai __attribute__((target("fullfp16"))) uint16x8_t vcvtq_u16_f16(float16x8_t __p0) { uint16x8_t __ret; float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint16x8_t) __builtin_neon_vcvtq_u16_f16((int8x16_t)__rev0, 49); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) uint16x4_t vcvt_u16_f16(float16x4_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vcvt_u16_f16((int8x8_t)__p0, 17); return __ret; } #else __ai __attribute__((target("fullfp16"))) uint16x4_t vcvt_u16_f16(float16x4_t __p0) { uint16x4_t __ret; float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (uint16x4_t) __builtin_neon_vcvt_u16_f16((int8x8_t)__rev0, 17); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) int16x8_t vcvtaq_s16_f16(float16x8_t __p0) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vcvtaq_s16_f16((int8x16_t)__p0, 33); return __ret; } #else __ai __attribute__((target("fullfp16"))) int16x8_t vcvtaq_s16_f16(float16x8_t __p0) { int16x8_t __ret; float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int16x8_t) __builtin_neon_vcvtaq_s16_f16((int8x16_t)__rev0, 33); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) int16x4_t vcvta_s16_f16(float16x4_t __p0) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vcvta_s16_f16((int8x8_t)__p0, 1); return __ret; } #else __ai __attribute__((target("fullfp16"))) int16x4_t vcvta_s16_f16(float16x4_t __p0) { int16x4_t __ret; float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (int16x4_t) __builtin_neon_vcvta_s16_f16((int8x8_t)__rev0, 1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) uint16x8_t vcvtaq_u16_f16(float16x8_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vcvtaq_u16_f16((int8x16_t)__p0, 49); return __ret; } #else __ai __attribute__((target("fullfp16"))) uint16x8_t vcvtaq_u16_f16(float16x8_t __p0) { uint16x8_t __ret; float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint16x8_t) __builtin_neon_vcvtaq_u16_f16((int8x16_t)__rev0, 49); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) uint16x4_t vcvta_u16_f16(float16x4_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vcvta_u16_f16((int8x8_t)__p0, 17); return __ret; } #else __ai __attribute__((target("fullfp16"))) uint16x4_t vcvta_u16_f16(float16x4_t __p0) { uint16x4_t __ret; float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (uint16x4_t) __builtin_neon_vcvta_u16_f16((int8x8_t)__rev0, 17); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) int16x8_t vcvtmq_s16_f16(float16x8_t __p0) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vcvtmq_s16_f16((int8x16_t)__p0, 33); return __ret; } #else __ai __attribute__((target("fullfp16"))) int16x8_t vcvtmq_s16_f16(float16x8_t __p0) { int16x8_t __ret; float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int16x8_t) __builtin_neon_vcvtmq_s16_f16((int8x16_t)__rev0, 33); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) int16x4_t vcvtm_s16_f16(float16x4_t __p0) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vcvtm_s16_f16((int8x8_t)__p0, 1); return __ret; } #else __ai __attribute__((target("fullfp16"))) int16x4_t vcvtm_s16_f16(float16x4_t __p0) { int16x4_t __ret; float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (int16x4_t) __builtin_neon_vcvtm_s16_f16((int8x8_t)__rev0, 1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) uint16x8_t vcvtmq_u16_f16(float16x8_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vcvtmq_u16_f16((int8x16_t)__p0, 49); return __ret; } #else __ai __attribute__((target("fullfp16"))) uint16x8_t vcvtmq_u16_f16(float16x8_t __p0) { uint16x8_t __ret; float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint16x8_t) __builtin_neon_vcvtmq_u16_f16((int8x16_t)__rev0, 49); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) uint16x4_t vcvtm_u16_f16(float16x4_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vcvtm_u16_f16((int8x8_t)__p0, 17); return __ret; } #else __ai __attribute__((target("fullfp16"))) uint16x4_t vcvtm_u16_f16(float16x4_t __p0) { uint16x4_t __ret; float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (uint16x4_t) __builtin_neon_vcvtm_u16_f16((int8x8_t)__rev0, 17); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) int16x8_t vcvtnq_s16_f16(float16x8_t __p0) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vcvtnq_s16_f16((int8x16_t)__p0, 33); return __ret; } #else __ai __attribute__((target("fullfp16"))) int16x8_t vcvtnq_s16_f16(float16x8_t __p0) { int16x8_t __ret; float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int16x8_t) __builtin_neon_vcvtnq_s16_f16((int8x16_t)__rev0, 33); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) int16x4_t vcvtn_s16_f16(float16x4_t __p0) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vcvtn_s16_f16((int8x8_t)__p0, 1); return __ret; } #else __ai __attribute__((target("fullfp16"))) int16x4_t vcvtn_s16_f16(float16x4_t __p0) { int16x4_t __ret; float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (int16x4_t) __builtin_neon_vcvtn_s16_f16((int8x8_t)__rev0, 1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) uint16x8_t vcvtnq_u16_f16(float16x8_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vcvtnq_u16_f16((int8x16_t)__p0, 49); return __ret; } #else __ai __attribute__((target("fullfp16"))) uint16x8_t vcvtnq_u16_f16(float16x8_t __p0) { uint16x8_t __ret; float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint16x8_t) __builtin_neon_vcvtnq_u16_f16((int8x16_t)__rev0, 49); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) uint16x4_t vcvtn_u16_f16(float16x4_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vcvtn_u16_f16((int8x8_t)__p0, 17); return __ret; } #else __ai __attribute__((target("fullfp16"))) uint16x4_t vcvtn_u16_f16(float16x4_t __p0) { uint16x4_t __ret; float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (uint16x4_t) __builtin_neon_vcvtn_u16_f16((int8x8_t)__rev0, 17); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) int16x8_t vcvtpq_s16_f16(float16x8_t __p0) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vcvtpq_s16_f16((int8x16_t)__p0, 33); return __ret; } #else __ai __attribute__((target("fullfp16"))) int16x8_t vcvtpq_s16_f16(float16x8_t __p0) { int16x8_t __ret; float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int16x8_t) __builtin_neon_vcvtpq_s16_f16((int8x16_t)__rev0, 33); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) int16x4_t vcvtp_s16_f16(float16x4_t __p0) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vcvtp_s16_f16((int8x8_t)__p0, 1); return __ret; } #else __ai __attribute__((target("fullfp16"))) int16x4_t vcvtp_s16_f16(float16x4_t __p0) { int16x4_t __ret; float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (int16x4_t) __builtin_neon_vcvtp_s16_f16((int8x8_t)__rev0, 1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) uint16x8_t vcvtpq_u16_f16(float16x8_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vcvtpq_u16_f16((int8x16_t)__p0, 49); return __ret; } #else __ai __attribute__((target("fullfp16"))) uint16x8_t vcvtpq_u16_f16(float16x8_t __p0) { uint16x8_t __ret; float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint16x8_t) __builtin_neon_vcvtpq_u16_f16((int8x16_t)__rev0, 49); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) uint16x4_t vcvtp_u16_f16(float16x4_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vcvtp_u16_f16((int8x8_t)__p0, 17); return __ret; } #else __ai __attribute__((target("fullfp16"))) uint16x4_t vcvtp_u16_f16(float16x4_t __p0) { uint16x4_t __ret; float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (uint16x4_t) __builtin_neon_vcvtp_u16_f16((int8x8_t)__rev0, 17); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ #define vextq_f16(__p0, __p1, __p2) __extension__ ({ \ float16x8_t __ret; \ float16x8_t __s0 = __p0; \ float16x8_t __s1 = __p1; \ __ret = (float16x8_t) __builtin_neon_vextq_f16((int8x16_t)__s0, (int8x16_t)__s1, __p2, 40); \ __ret; \ }) #else #define vextq_f16(__p0, __p1, __p2) __extension__ ({ \ float16x8_t __ret; \ float16x8_t __s0 = __p0; \ float16x8_t __s1 = __p1; \ float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ float16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (float16x8_t) __builtin_neon_vextq_f16((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 40); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vext_f16(__p0, __p1, __p2) __extension__ ({ \ float16x4_t __ret; \ float16x4_t __s0 = __p0; \ float16x4_t __s1 = __p1; \ __ret = (float16x4_t) __builtin_neon_vext_f16((int8x8_t)__s0, (int8x8_t)__s1, __p2, 8); \ __ret; \ }) #else #define vext_f16(__p0, __p1, __p2) __extension__ ({ \ float16x4_t __ret; \ float16x4_t __s0 = __p0; \ float16x4_t __s1 = __p1; \ float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ float16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ __ret = (float16x4_t) __builtin_neon_vext_f16((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 8); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x8_t vfmaq_f16(float16x8_t __p0, float16x8_t __p1, float16x8_t __p2) { float16x8_t __ret; __ret = (float16x8_t) __builtin_neon_vfmaq_f16((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 40); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x8_t vfmaq_f16(float16x8_t __p0, float16x8_t __p1, float16x8_t __p2) { float16x8_t __ret; float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (float16x8_t) __builtin_neon_vfmaq_f16((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 40); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } __ai __attribute__((target("fullfp16"))) float16x8_t __noswap_vfmaq_f16(float16x8_t __p0, float16x8_t __p1, float16x8_t __p2) { float16x8_t __ret; __ret = (float16x8_t) __builtin_neon_vfmaq_f16((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 40); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x4_t vfma_f16(float16x4_t __p0, float16x4_t __p1, float16x4_t __p2) { float16x4_t __ret; __ret = (float16x4_t) __builtin_neon_vfma_f16((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 8); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x4_t vfma_f16(float16x4_t __p0, float16x4_t __p1, float16x4_t __p2) { float16x4_t __ret; float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); float16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = (float16x4_t) __builtin_neon_vfma_f16((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 8); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai __attribute__((target("fullfp16"))) float16x4_t __noswap_vfma_f16(float16x4_t __p0, float16x4_t __p1, float16x4_t __p2) { float16x4_t __ret; __ret = (float16x4_t) __builtin_neon_vfma_f16((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 8); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x8_t vfmsq_f16(float16x8_t __p0, float16x8_t __p1, float16x8_t __p2) { float16x8_t __ret; __ret = vfmaq_f16(__p0, -__p1, __p2); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x8_t vfmsq_f16(float16x8_t __p0, float16x8_t __p1, float16x8_t __p2) { float16x8_t __ret; float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __noswap_vfmaq_f16(__rev0, -__rev1, __rev2); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x4_t vfms_f16(float16x4_t __p0, float16x4_t __p1, float16x4_t __p2) { float16x4_t __ret; __ret = vfma_f16(__p0, -__p1, __p2); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x4_t vfms_f16(float16x4_t __p0, float16x4_t __p1, float16x4_t __p2) { float16x4_t __ret; float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); float16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = __noswap_vfma_f16(__rev0, -__rev1, __rev2); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x8_t vmaxq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; __ret = (float16x8_t) __builtin_neon_vmaxq_f16((int8x16_t)__p0, (int8x16_t)__p1, 40); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x8_t vmaxq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (float16x8_t) __builtin_neon_vmaxq_f16((int8x16_t)__rev0, (int8x16_t)__rev1, 40); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x4_t vmax_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; __ret = (float16x4_t) __builtin_neon_vmax_f16((int8x8_t)__p0, (int8x8_t)__p1, 8); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x4_t vmax_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (float16x4_t) __builtin_neon_vmax_f16((int8x8_t)__rev0, (int8x8_t)__rev1, 8); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x8_t vminq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; __ret = (float16x8_t) __builtin_neon_vminq_f16((int8x16_t)__p0, (int8x16_t)__p1, 40); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x8_t vminq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (float16x8_t) __builtin_neon_vminq_f16((int8x16_t)__rev0, (int8x16_t)__rev1, 40); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x4_t vmin_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; __ret = (float16x4_t) __builtin_neon_vmin_f16((int8x8_t)__p0, (int8x8_t)__p1, 8); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x4_t vmin_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (float16x4_t) __builtin_neon_vmin_f16((int8x8_t)__rev0, (int8x8_t)__rev1, 8); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x8_t vmulq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; __ret = __p0 * __p1; return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x8_t vmulq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 * __rev1; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x4_t vmul_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; __ret = __p0 * __p1; return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x4_t vmul_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __rev0 * __rev1; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ #define vmulq_lane_f16(__p0_153, __p1_153, __p2_153) __extension__ ({ \ float16x8_t __ret_153; \ float16x8_t __s0_153 = __p0_153; \ float16x4_t __s1_153 = __p1_153; \ __ret_153 = __s0_153 * splatq_lane_f16(__s1_153, __p2_153); \ __ret_153; \ }) #else #define vmulq_lane_f16(__p0_154, __p1_154, __p2_154) __extension__ ({ \ float16x8_t __ret_154; \ float16x8_t __s0_154 = __p0_154; \ float16x4_t __s1_154 = __p1_154; \ float16x8_t __rev0_154; __rev0_154 = __builtin_shufflevector(__s0_154, __s0_154, 7, 6, 5, 4, 3, 2, 1, 0); \ float16x4_t __rev1_154; __rev1_154 = __builtin_shufflevector(__s1_154, __s1_154, 3, 2, 1, 0); \ __ret_154 = __rev0_154 * __noswap_splatq_lane_f16(__rev1_154, __p2_154); \ __ret_154 = __builtin_shufflevector(__ret_154, __ret_154, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_154; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmul_lane_f16(__p0_155, __p1_155, __p2_155) __extension__ ({ \ float16x4_t __ret_155; \ float16x4_t __s0_155 = __p0_155; \ float16x4_t __s1_155 = __p1_155; \ __ret_155 = __s0_155 * splat_lane_f16(__s1_155, __p2_155); \ __ret_155; \ }) #else #define vmul_lane_f16(__p0_156, __p1_156, __p2_156) __extension__ ({ \ float16x4_t __ret_156; \ float16x4_t __s0_156 = __p0_156; \ float16x4_t __s1_156 = __p1_156; \ float16x4_t __rev0_156; __rev0_156 = __builtin_shufflevector(__s0_156, __s0_156, 3, 2, 1, 0); \ float16x4_t __rev1_156; __rev1_156 = __builtin_shufflevector(__s1_156, __s1_156, 3, 2, 1, 0); \ __ret_156 = __rev0_156 * __noswap_splat_lane_f16(__rev1_156, __p2_156); \ __ret_156 = __builtin_shufflevector(__ret_156, __ret_156, 3, 2, 1, 0); \ __ret_156; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmulq_n_f16(__p0, __p1) __extension__ ({ \ float16x8_t __ret; \ float16x8_t __s0 = __p0; \ float16_t __s1 = __p1; \ __ret = __s0 * (float16x8_t) {__s1, __s1, __s1, __s1, __s1, __s1, __s1, __s1}; \ __ret; \ }) #else #define vmulq_n_f16(__p0, __p1) __extension__ ({ \ float16x8_t __ret; \ float16x8_t __s0 = __p0; \ float16_t __s1 = __p1; \ float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = __rev0 * (float16x8_t) {__s1, __s1, __s1, __s1, __s1, __s1, __s1, __s1}; \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmul_n_f16(__p0, __p1) __extension__ ({ \ float16x4_t __ret; \ float16x4_t __s0 = __p0; \ float16_t __s1 = __p1; \ __ret = __s0 * (float16x4_t) {__s1, __s1, __s1, __s1}; \ __ret; \ }) #else #define vmul_n_f16(__p0, __p1) __extension__ ({ \ float16x4_t __ret; \ float16x4_t __s0 = __p0; \ float16_t __s1 = __p1; \ float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = __rev0 * (float16x4_t) {__s1, __s1, __s1, __s1}; \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x8_t vnegq_f16(float16x8_t __p0) { float16x8_t __ret; __ret = -__p0; return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x8_t vnegq_f16(float16x8_t __p0) { float16x8_t __ret; float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = -__rev0; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x4_t vneg_f16(float16x4_t __p0) { float16x4_t __ret; __ret = -__p0; return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x4_t vneg_f16(float16x4_t __p0) { float16x4_t __ret; float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = -__rev0; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x4_t vpadd_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; __ret = (float16x4_t) __builtin_neon_vpadd_f16((int8x8_t)__p0, (int8x8_t)__p1, 8); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x4_t vpadd_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (float16x4_t) __builtin_neon_vpadd_f16((int8x8_t)__rev0, (int8x8_t)__rev1, 8); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x4_t vpmax_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; __ret = (float16x4_t) __builtin_neon_vpmax_f16((int8x8_t)__p0, (int8x8_t)__p1, 8); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x4_t vpmax_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (float16x4_t) __builtin_neon_vpmax_f16((int8x8_t)__rev0, (int8x8_t)__rev1, 8); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x4_t vpmin_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; __ret = (float16x4_t) __builtin_neon_vpmin_f16((int8x8_t)__p0, (int8x8_t)__p1, 8); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x4_t vpmin_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (float16x4_t) __builtin_neon_vpmin_f16((int8x8_t)__rev0, (int8x8_t)__rev1, 8); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x8_t vrecpeq_f16(float16x8_t __p0) { float16x8_t __ret; __ret = (float16x8_t) __builtin_neon_vrecpeq_f16((int8x16_t)__p0, 40); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x8_t vrecpeq_f16(float16x8_t __p0) { float16x8_t __ret; float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (float16x8_t) __builtin_neon_vrecpeq_f16((int8x16_t)__rev0, 40); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x4_t vrecpe_f16(float16x4_t __p0) { float16x4_t __ret; __ret = (float16x4_t) __builtin_neon_vrecpe_f16((int8x8_t)__p0, 8); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x4_t vrecpe_f16(float16x4_t __p0) { float16x4_t __ret; float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (float16x4_t) __builtin_neon_vrecpe_f16((int8x8_t)__rev0, 8); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x8_t vrecpsq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; __ret = (float16x8_t) __builtin_neon_vrecpsq_f16((int8x16_t)__p0, (int8x16_t)__p1, 40); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x8_t vrecpsq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (float16x8_t) __builtin_neon_vrecpsq_f16((int8x16_t)__rev0, (int8x16_t)__rev1, 40); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x4_t vrecps_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; __ret = (float16x4_t) __builtin_neon_vrecps_f16((int8x8_t)__p0, (int8x8_t)__p1, 8); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x4_t vrecps_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (float16x4_t) __builtin_neon_vrecps_f16((int8x8_t)__rev0, (int8x8_t)__rev1, 8); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x8_t vrev64q_f16(float16x8_t __p0) { float16x8_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0, 7, 6, 5, 4); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x8_t vrev64q_f16(float16x8_t __p0) { float16x8_t __ret; float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev0, 3, 2, 1, 0, 7, 6, 5, 4); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x4_t vrev64_f16(float16x4_t __p0) { float16x4_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x4_t vrev64_f16(float16x4_t __p0) { float16x4_t __ret; float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev0, 3, 2, 1, 0); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x8_t vrsqrteq_f16(float16x8_t __p0) { float16x8_t __ret; __ret = (float16x8_t) __builtin_neon_vrsqrteq_f16((int8x16_t)__p0, 40); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x8_t vrsqrteq_f16(float16x8_t __p0) { float16x8_t __ret; float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (float16x8_t) __builtin_neon_vrsqrteq_f16((int8x16_t)__rev0, 40); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x4_t vrsqrte_f16(float16x4_t __p0) { float16x4_t __ret; __ret = (float16x4_t) __builtin_neon_vrsqrte_f16((int8x8_t)__p0, 8); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x4_t vrsqrte_f16(float16x4_t __p0) { float16x4_t __ret; float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (float16x4_t) __builtin_neon_vrsqrte_f16((int8x8_t)__rev0, 8); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x8_t vrsqrtsq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; __ret = (float16x8_t) __builtin_neon_vrsqrtsq_f16((int8x16_t)__p0, (int8x16_t)__p1, 40); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x8_t vrsqrtsq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (float16x8_t) __builtin_neon_vrsqrtsq_f16((int8x16_t)__rev0, (int8x16_t)__rev1, 40); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x4_t vrsqrts_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; __ret = (float16x4_t) __builtin_neon_vrsqrts_f16((int8x8_t)__p0, (int8x8_t)__p1, 8); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x4_t vrsqrts_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (float16x4_t) __builtin_neon_vrsqrts_f16((int8x8_t)__rev0, (int8x8_t)__rev1, 8); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x8_t vsubq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; __ret = __p0 - __p1; return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x8_t vsubq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 - __rev1; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x4_t vsub_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; __ret = __p0 - __p1; return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x4_t vsub_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __rev0 - __rev1; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x8x2_t vtrnq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8x2_t __ret; __builtin_neon_vtrnq_f16(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 40); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x8x2_t vtrnq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8x2_t __ret; float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __builtin_neon_vtrnq_f16(&__ret, (int8x16_t)__rev0, (int8x16_t)__rev1, 40); __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x4x2_t vtrn_f16(float16x4_t __p0, float16x4_t __p1) { float16x4x2_t __ret; __builtin_neon_vtrn_f16(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 8); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x4x2_t vtrn_f16(float16x4_t __p0, float16x4_t __p1) { float16x4x2_t __ret; float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __builtin_neon_vtrn_f16(&__ret, (int8x8_t)__rev0, (int8x8_t)__rev1, 8); __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x8x2_t vuzpq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8x2_t __ret; __builtin_neon_vuzpq_f16(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 40); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x8x2_t vuzpq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8x2_t __ret; float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __builtin_neon_vuzpq_f16(&__ret, (int8x16_t)__rev0, (int8x16_t)__rev1, 40); __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x4x2_t vuzp_f16(float16x4_t __p0, float16x4_t __p1) { float16x4x2_t __ret; __builtin_neon_vuzp_f16(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 8); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x4x2_t vuzp_f16(float16x4_t __p0, float16x4_t __p1) { float16x4x2_t __ret; float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __builtin_neon_vuzp_f16(&__ret, (int8x8_t)__rev0, (int8x8_t)__rev1, 8); __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x8x2_t vzipq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8x2_t __ret; __builtin_neon_vzipq_f16(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 40); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x8x2_t vzipq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8x2_t __ret; float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __builtin_neon_vzipq_f16(&__ret, (int8x16_t)__rev0, (int8x16_t)__rev1, 40); __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x4x2_t vzip_f16(float16x4_t __p0, float16x4_t __p1) { float16x4x2_t __ret; __builtin_neon_vzip_f16(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 8); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x4x2_t vzip_f16(float16x4_t __p0, float16x4_t __p1) { float16x4x2_t __ret; float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __builtin_neon_vzip_f16(&__ret, (int8x8_t)__rev0, (int8x8_t)__rev1, 8); __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("i8mm"))) uint32x4_t vmmlaq_u32(uint32x4_t __p0, uint8x16_t __p1, uint8x16_t __p2) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vmmlaq_u32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 50); return __ret; } #else __ai __attribute__((target("i8mm"))) uint32x4_t vmmlaq_u32(uint32x4_t __p0, uint8x16_t __p1, uint8x16_t __p2) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint32x4_t) __builtin_neon_vmmlaq_u32((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 50); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("i8mm"))) int32x4_t vmmlaq_s32(int32x4_t __p0, int8x16_t __p1, int8x16_t __p2) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vmmlaq_s32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 34); return __ret; } #else __ai __attribute__((target("i8mm"))) int32x4_t vmmlaq_s32(int32x4_t __p0, int8x16_t __p1, int8x16_t __p2) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int32x4_t) __builtin_neon_vmmlaq_s32((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 34); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("i8mm"))) int32x4_t vusdotq_s32(int32x4_t __p0, uint8x16_t __p1, int8x16_t __p2) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vusdotq_s32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 34); return __ret; } #else __ai __attribute__((target("i8mm"))) int32x4_t vusdotq_s32(int32x4_t __p0, uint8x16_t __p1, int8x16_t __p2) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int32x4_t) __builtin_neon_vusdotq_s32((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 34); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai __attribute__((target("i8mm"))) int32x4_t __noswap_vusdotq_s32(int32x4_t __p0, uint8x16_t __p1, int8x16_t __p2) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vusdotq_s32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 34); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("i8mm"))) int32x2_t vusdot_s32(int32x2_t __p0, uint8x8_t __p1, int8x8_t __p2) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vusdot_s32((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 2); return __ret; } #else __ai __attribute__((target("i8mm"))) int32x2_t vusdot_s32(int32x2_t __p0, uint8x8_t __p1, int8x8_t __p2) { int32x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int32x2_t) __builtin_neon_vusdot_s32((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } __ai __attribute__((target("i8mm"))) int32x2_t __noswap_vusdot_s32(int32x2_t __p0, uint8x8_t __p1, int8x8_t __p2) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vusdot_s32((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 2); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ #define vusdotq_lane_s32(__p0_157, __p1_157, __p2_157, __p3_157) __extension__ ({ \ int32x4_t __ret_157; \ int32x4_t __s0_157 = __p0_157; \ uint8x16_t __s1_157 = __p1_157; \ int8x8_t __s2_157 = __p2_157; \ int8x8_t __reint_157 = __s2_157; \ __ret_157 = vusdotq_s32(__s0_157, __s1_157, (int8x16_t)(splatq_lane_s32(*(int32x2_t *) &__reint_157, __p3_157))); \ __ret_157; \ }) #else #define vusdotq_lane_s32(__p0_158, __p1_158, __p2_158, __p3_158) __extension__ ({ \ int32x4_t __ret_158; \ int32x4_t __s0_158 = __p0_158; \ uint8x16_t __s1_158 = __p1_158; \ int8x8_t __s2_158 = __p2_158; \ int32x4_t __rev0_158; __rev0_158 = __builtin_shufflevector(__s0_158, __s0_158, 3, 2, 1, 0); \ uint8x16_t __rev1_158; __rev1_158 = __builtin_shufflevector(__s1_158, __s1_158, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ int8x8_t __rev2_158; __rev2_158 = __builtin_shufflevector(__s2_158, __s2_158, 7, 6, 5, 4, 3, 2, 1, 0); \ int8x8_t __reint_158 = __rev2_158; \ __ret_158 = __noswap_vusdotq_s32(__rev0_158, __rev1_158, (int8x16_t)(__noswap_splatq_lane_s32(*(int32x2_t *) &__reint_158, __p3_158))); \ __ret_158 = __builtin_shufflevector(__ret_158, __ret_158, 3, 2, 1, 0); \ __ret_158; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vusdot_lane_s32(__p0_159, __p1_159, __p2_159, __p3_159) __extension__ ({ \ int32x2_t __ret_159; \ int32x2_t __s0_159 = __p0_159; \ uint8x8_t __s1_159 = __p1_159; \ int8x8_t __s2_159 = __p2_159; \ int8x8_t __reint_159 = __s2_159; \ __ret_159 = vusdot_s32(__s0_159, __s1_159, (int8x8_t)(splat_lane_s32(*(int32x2_t *) &__reint_159, __p3_159))); \ __ret_159; \ }) #else #define vusdot_lane_s32(__p0_160, __p1_160, __p2_160, __p3_160) __extension__ ({ \ int32x2_t __ret_160; \ int32x2_t __s0_160 = __p0_160; \ uint8x8_t __s1_160 = __p1_160; \ int8x8_t __s2_160 = __p2_160; \ int32x2_t __rev0_160; __rev0_160 = __builtin_shufflevector(__s0_160, __s0_160, 1, 0); \ uint8x8_t __rev1_160; __rev1_160 = __builtin_shufflevector(__s1_160, __s1_160, 7, 6, 5, 4, 3, 2, 1, 0); \ int8x8_t __rev2_160; __rev2_160 = __builtin_shufflevector(__s2_160, __s2_160, 7, 6, 5, 4, 3, 2, 1, 0); \ int8x8_t __reint_160 = __rev2_160; \ __ret_160 = __noswap_vusdot_s32(__rev0_160, __rev1_160, (int8x8_t)(__noswap_splat_lane_s32(*(int32x2_t *) &__reint_160, __p3_160))); \ __ret_160 = __builtin_shufflevector(__ret_160, __ret_160, 1, 0); \ __ret_160; \ }) #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("i8mm"))) int32x4_t vusmmlaq_s32(int32x4_t __p0, uint8x16_t __p1, int8x16_t __p2) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vusmmlaq_s32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 34); return __ret; } #else __ai __attribute__((target("i8mm"))) int32x4_t vusmmlaq_s32(int32x4_t __p0, uint8x16_t __p1, int8x16_t __p2) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int32x4_t) __builtin_neon_vusmmlaq_s32((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 34); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.1a"))) int32x4_t vqrdmlahq_s32(int32x4_t __p0, int32x4_t __p1, int32x4_t __p2) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vqrdmlahq_s32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 34); return __ret; } #else __ai __attribute__((target("v8.1a"))) int32x4_t vqrdmlahq_s32(int32x4_t __p0, int32x4_t __p1, int32x4_t __p2) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); int32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = (int32x4_t) __builtin_neon_vqrdmlahq_s32((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 34); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai __attribute__((target("v8.1a"))) int32x4_t __noswap_vqrdmlahq_s32(int32x4_t __p0, int32x4_t __p1, int32x4_t __p2) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vqrdmlahq_s32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 34); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.1a"))) int16x8_t vqrdmlahq_s16(int16x8_t __p0, int16x8_t __p1, int16x8_t __p2) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vqrdmlahq_s16((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 33); return __ret; } #else __ai __attribute__((target("v8.1a"))) int16x8_t vqrdmlahq_s16(int16x8_t __p0, int16x8_t __p1, int16x8_t __p2) { int16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int16x8_t) __builtin_neon_vqrdmlahq_s16((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 33); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } __ai __attribute__((target("v8.1a"))) int16x8_t __noswap_vqrdmlahq_s16(int16x8_t __p0, int16x8_t __p1, int16x8_t __p2) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vqrdmlahq_s16((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 33); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.1a"))) int32x2_t vqrdmlah_s32(int32x2_t __p0, int32x2_t __p1, int32x2_t __p2) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vqrdmlah_s32((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 2); return __ret; } #else __ai __attribute__((target("v8.1a"))) int32x2_t vqrdmlah_s32(int32x2_t __p0, int32x2_t __p1, int32x2_t __p2) { int32x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); int32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); __ret = (int32x2_t) __builtin_neon_vqrdmlah_s32((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } __ai __attribute__((target("v8.1a"))) int32x2_t __noswap_vqrdmlah_s32(int32x2_t __p0, int32x2_t __p1, int32x2_t __p2) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vqrdmlah_s32((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 2); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.1a"))) int16x4_t vqrdmlah_s16(int16x4_t __p0, int16x4_t __p1, int16x4_t __p2) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vqrdmlah_s16((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 1); return __ret; } #else __ai __attribute__((target("v8.1a"))) int16x4_t vqrdmlah_s16(int16x4_t __p0, int16x4_t __p1, int16x4_t __p2) { int16x4_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); int16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = (int16x4_t) __builtin_neon_vqrdmlah_s16((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai __attribute__((target("v8.1a"))) int16x4_t __noswap_vqrdmlah_s16(int16x4_t __p0, int16x4_t __p1, int16x4_t __p2) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vqrdmlah_s16((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ #define vqrdmlahq_lane_s32(__p0_161, __p1_161, __p2_161, __p3_161) __extension__ ({ \ int32x4_t __ret_161; \ int32x4_t __s0_161 = __p0_161; \ int32x4_t __s1_161 = __p1_161; \ int32x2_t __s2_161 = __p2_161; \ __ret_161 = vqrdmlahq_s32(__s0_161, __s1_161, splatq_lane_s32(__s2_161, __p3_161)); \ __ret_161; \ }) #else #define vqrdmlahq_lane_s32(__p0_162, __p1_162, __p2_162, __p3_162) __extension__ ({ \ int32x4_t __ret_162; \ int32x4_t __s0_162 = __p0_162; \ int32x4_t __s1_162 = __p1_162; \ int32x2_t __s2_162 = __p2_162; \ int32x4_t __rev0_162; __rev0_162 = __builtin_shufflevector(__s0_162, __s0_162, 3, 2, 1, 0); \ int32x4_t __rev1_162; __rev1_162 = __builtin_shufflevector(__s1_162, __s1_162, 3, 2, 1, 0); \ int32x2_t __rev2_162; __rev2_162 = __builtin_shufflevector(__s2_162, __s2_162, 1, 0); \ __ret_162 = __noswap_vqrdmlahq_s32(__rev0_162, __rev1_162, __noswap_splatq_lane_s32(__rev2_162, __p3_162)); \ __ret_162 = __builtin_shufflevector(__ret_162, __ret_162, 3, 2, 1, 0); \ __ret_162; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqrdmlahq_lane_s16(__p0_163, __p1_163, __p2_163, __p3_163) __extension__ ({ \ int16x8_t __ret_163; \ int16x8_t __s0_163 = __p0_163; \ int16x8_t __s1_163 = __p1_163; \ int16x4_t __s2_163 = __p2_163; \ __ret_163 = vqrdmlahq_s16(__s0_163, __s1_163, splatq_lane_s16(__s2_163, __p3_163)); \ __ret_163; \ }) #else #define vqrdmlahq_lane_s16(__p0_164, __p1_164, __p2_164, __p3_164) __extension__ ({ \ int16x8_t __ret_164; \ int16x8_t __s0_164 = __p0_164; \ int16x8_t __s1_164 = __p1_164; \ int16x4_t __s2_164 = __p2_164; \ int16x8_t __rev0_164; __rev0_164 = __builtin_shufflevector(__s0_164, __s0_164, 7, 6, 5, 4, 3, 2, 1, 0); \ int16x8_t __rev1_164; __rev1_164 = __builtin_shufflevector(__s1_164, __s1_164, 7, 6, 5, 4, 3, 2, 1, 0); \ int16x4_t __rev2_164; __rev2_164 = __builtin_shufflevector(__s2_164, __s2_164, 3, 2, 1, 0); \ __ret_164 = __noswap_vqrdmlahq_s16(__rev0_164, __rev1_164, __noswap_splatq_lane_s16(__rev2_164, __p3_164)); \ __ret_164 = __builtin_shufflevector(__ret_164, __ret_164, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_164; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqrdmlah_lane_s32(__p0_165, __p1_165, __p2_165, __p3_165) __extension__ ({ \ int32x2_t __ret_165; \ int32x2_t __s0_165 = __p0_165; \ int32x2_t __s1_165 = __p1_165; \ int32x2_t __s2_165 = __p2_165; \ __ret_165 = vqrdmlah_s32(__s0_165, __s1_165, splat_lane_s32(__s2_165, __p3_165)); \ __ret_165; \ }) #else #define vqrdmlah_lane_s32(__p0_166, __p1_166, __p2_166, __p3_166) __extension__ ({ \ int32x2_t __ret_166; \ int32x2_t __s0_166 = __p0_166; \ int32x2_t __s1_166 = __p1_166; \ int32x2_t __s2_166 = __p2_166; \ int32x2_t __rev0_166; __rev0_166 = __builtin_shufflevector(__s0_166, __s0_166, 1, 0); \ int32x2_t __rev1_166; __rev1_166 = __builtin_shufflevector(__s1_166, __s1_166, 1, 0); \ int32x2_t __rev2_166; __rev2_166 = __builtin_shufflevector(__s2_166, __s2_166, 1, 0); \ __ret_166 = __noswap_vqrdmlah_s32(__rev0_166, __rev1_166, __noswap_splat_lane_s32(__rev2_166, __p3_166)); \ __ret_166 = __builtin_shufflevector(__ret_166, __ret_166, 1, 0); \ __ret_166; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqrdmlah_lane_s16(__p0_167, __p1_167, __p2_167, __p3_167) __extension__ ({ \ int16x4_t __ret_167; \ int16x4_t __s0_167 = __p0_167; \ int16x4_t __s1_167 = __p1_167; \ int16x4_t __s2_167 = __p2_167; \ __ret_167 = vqrdmlah_s16(__s0_167, __s1_167, splat_lane_s16(__s2_167, __p3_167)); \ __ret_167; \ }) #else #define vqrdmlah_lane_s16(__p0_168, __p1_168, __p2_168, __p3_168) __extension__ ({ \ int16x4_t __ret_168; \ int16x4_t __s0_168 = __p0_168; \ int16x4_t __s1_168 = __p1_168; \ int16x4_t __s2_168 = __p2_168; \ int16x4_t __rev0_168; __rev0_168 = __builtin_shufflevector(__s0_168, __s0_168, 3, 2, 1, 0); \ int16x4_t __rev1_168; __rev1_168 = __builtin_shufflevector(__s1_168, __s1_168, 3, 2, 1, 0); \ int16x4_t __rev2_168; __rev2_168 = __builtin_shufflevector(__s2_168, __s2_168, 3, 2, 1, 0); \ __ret_168 = __noswap_vqrdmlah_s16(__rev0_168, __rev1_168, __noswap_splat_lane_s16(__rev2_168, __p3_168)); \ __ret_168 = __builtin_shufflevector(__ret_168, __ret_168, 3, 2, 1, 0); \ __ret_168; \ }) #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.1a"))) int32x4_t vqrdmlshq_s32(int32x4_t __p0, int32x4_t __p1, int32x4_t __p2) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vqrdmlshq_s32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 34); return __ret; } #else __ai __attribute__((target("v8.1a"))) int32x4_t vqrdmlshq_s32(int32x4_t __p0, int32x4_t __p1, int32x4_t __p2) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); int32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = (int32x4_t) __builtin_neon_vqrdmlshq_s32((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 34); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai __attribute__((target("v8.1a"))) int32x4_t __noswap_vqrdmlshq_s32(int32x4_t __p0, int32x4_t __p1, int32x4_t __p2) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vqrdmlshq_s32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 34); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.1a"))) int16x8_t vqrdmlshq_s16(int16x8_t __p0, int16x8_t __p1, int16x8_t __p2) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vqrdmlshq_s16((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 33); return __ret; } #else __ai __attribute__((target("v8.1a"))) int16x8_t vqrdmlshq_s16(int16x8_t __p0, int16x8_t __p1, int16x8_t __p2) { int16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int16x8_t) __builtin_neon_vqrdmlshq_s16((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 33); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } __ai __attribute__((target("v8.1a"))) int16x8_t __noswap_vqrdmlshq_s16(int16x8_t __p0, int16x8_t __p1, int16x8_t __p2) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vqrdmlshq_s16((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 33); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.1a"))) int32x2_t vqrdmlsh_s32(int32x2_t __p0, int32x2_t __p1, int32x2_t __p2) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vqrdmlsh_s32((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 2); return __ret; } #else __ai __attribute__((target("v8.1a"))) int32x2_t vqrdmlsh_s32(int32x2_t __p0, int32x2_t __p1, int32x2_t __p2) { int32x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); int32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); __ret = (int32x2_t) __builtin_neon_vqrdmlsh_s32((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } __ai __attribute__((target("v8.1a"))) int32x2_t __noswap_vqrdmlsh_s32(int32x2_t __p0, int32x2_t __p1, int32x2_t __p2) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vqrdmlsh_s32((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 2); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.1a"))) int16x4_t vqrdmlsh_s16(int16x4_t __p0, int16x4_t __p1, int16x4_t __p2) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vqrdmlsh_s16((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 1); return __ret; } #else __ai __attribute__((target("v8.1a"))) int16x4_t vqrdmlsh_s16(int16x4_t __p0, int16x4_t __p1, int16x4_t __p2) { int16x4_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); int16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = (int16x4_t) __builtin_neon_vqrdmlsh_s16((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai __attribute__((target("v8.1a"))) int16x4_t __noswap_vqrdmlsh_s16(int16x4_t __p0, int16x4_t __p1, int16x4_t __p2) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vqrdmlsh_s16((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ #define vqrdmlshq_lane_s32(__p0_169, __p1_169, __p2_169, __p3_169) __extension__ ({ \ int32x4_t __ret_169; \ int32x4_t __s0_169 = __p0_169; \ int32x4_t __s1_169 = __p1_169; \ int32x2_t __s2_169 = __p2_169; \ __ret_169 = vqrdmlshq_s32(__s0_169, __s1_169, splatq_lane_s32(__s2_169, __p3_169)); \ __ret_169; \ }) #else #define vqrdmlshq_lane_s32(__p0_170, __p1_170, __p2_170, __p3_170) __extension__ ({ \ int32x4_t __ret_170; \ int32x4_t __s0_170 = __p0_170; \ int32x4_t __s1_170 = __p1_170; \ int32x2_t __s2_170 = __p2_170; \ int32x4_t __rev0_170; __rev0_170 = __builtin_shufflevector(__s0_170, __s0_170, 3, 2, 1, 0); \ int32x4_t __rev1_170; __rev1_170 = __builtin_shufflevector(__s1_170, __s1_170, 3, 2, 1, 0); \ int32x2_t __rev2_170; __rev2_170 = __builtin_shufflevector(__s2_170, __s2_170, 1, 0); \ __ret_170 = __noswap_vqrdmlshq_s32(__rev0_170, __rev1_170, __noswap_splatq_lane_s32(__rev2_170, __p3_170)); \ __ret_170 = __builtin_shufflevector(__ret_170, __ret_170, 3, 2, 1, 0); \ __ret_170; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqrdmlshq_lane_s16(__p0_171, __p1_171, __p2_171, __p3_171) __extension__ ({ \ int16x8_t __ret_171; \ int16x8_t __s0_171 = __p0_171; \ int16x8_t __s1_171 = __p1_171; \ int16x4_t __s2_171 = __p2_171; \ __ret_171 = vqrdmlshq_s16(__s0_171, __s1_171, splatq_lane_s16(__s2_171, __p3_171)); \ __ret_171; \ }) #else #define vqrdmlshq_lane_s16(__p0_172, __p1_172, __p2_172, __p3_172) __extension__ ({ \ int16x8_t __ret_172; \ int16x8_t __s0_172 = __p0_172; \ int16x8_t __s1_172 = __p1_172; \ int16x4_t __s2_172 = __p2_172; \ int16x8_t __rev0_172; __rev0_172 = __builtin_shufflevector(__s0_172, __s0_172, 7, 6, 5, 4, 3, 2, 1, 0); \ int16x8_t __rev1_172; __rev1_172 = __builtin_shufflevector(__s1_172, __s1_172, 7, 6, 5, 4, 3, 2, 1, 0); \ int16x4_t __rev2_172; __rev2_172 = __builtin_shufflevector(__s2_172, __s2_172, 3, 2, 1, 0); \ __ret_172 = __noswap_vqrdmlshq_s16(__rev0_172, __rev1_172, __noswap_splatq_lane_s16(__rev2_172, __p3_172)); \ __ret_172 = __builtin_shufflevector(__ret_172, __ret_172, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_172; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqrdmlsh_lane_s32(__p0_173, __p1_173, __p2_173, __p3_173) __extension__ ({ \ int32x2_t __ret_173; \ int32x2_t __s0_173 = __p0_173; \ int32x2_t __s1_173 = __p1_173; \ int32x2_t __s2_173 = __p2_173; \ __ret_173 = vqrdmlsh_s32(__s0_173, __s1_173, splat_lane_s32(__s2_173, __p3_173)); \ __ret_173; \ }) #else #define vqrdmlsh_lane_s32(__p0_174, __p1_174, __p2_174, __p3_174) __extension__ ({ \ int32x2_t __ret_174; \ int32x2_t __s0_174 = __p0_174; \ int32x2_t __s1_174 = __p1_174; \ int32x2_t __s2_174 = __p2_174; \ int32x2_t __rev0_174; __rev0_174 = __builtin_shufflevector(__s0_174, __s0_174, 1, 0); \ int32x2_t __rev1_174; __rev1_174 = __builtin_shufflevector(__s1_174, __s1_174, 1, 0); \ int32x2_t __rev2_174; __rev2_174 = __builtin_shufflevector(__s2_174, __s2_174, 1, 0); \ __ret_174 = __noswap_vqrdmlsh_s32(__rev0_174, __rev1_174, __noswap_splat_lane_s32(__rev2_174, __p3_174)); \ __ret_174 = __builtin_shufflevector(__ret_174, __ret_174, 1, 0); \ __ret_174; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqrdmlsh_lane_s16(__p0_175, __p1_175, __p2_175, __p3_175) __extension__ ({ \ int16x4_t __ret_175; \ int16x4_t __s0_175 = __p0_175; \ int16x4_t __s1_175 = __p1_175; \ int16x4_t __s2_175 = __p2_175; \ __ret_175 = vqrdmlsh_s16(__s0_175, __s1_175, splat_lane_s16(__s2_175, __p3_175)); \ __ret_175; \ }) #else #define vqrdmlsh_lane_s16(__p0_176, __p1_176, __p2_176, __p3_176) __extension__ ({ \ int16x4_t __ret_176; \ int16x4_t __s0_176 = __p0_176; \ int16x4_t __s1_176 = __p1_176; \ int16x4_t __s2_176 = __p2_176; \ int16x4_t __rev0_176; __rev0_176 = __builtin_shufflevector(__s0_176, __s0_176, 3, 2, 1, 0); \ int16x4_t __rev1_176; __rev1_176 = __builtin_shufflevector(__s1_176, __s1_176, 3, 2, 1, 0); \ int16x4_t __rev2_176; __rev2_176 = __builtin_shufflevector(__s2_176, __s2_176, 3, 2, 1, 0); \ __ret_176 = __noswap_vqrdmlsh_s16(__rev0_176, __rev1_176, __noswap_splat_lane_s16(__rev2_176, __p3_176)); \ __ret_176 = __builtin_shufflevector(__ret_176, __ret_176, 3, 2, 1, 0); \ __ret_176; \ }) #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.3a"))) float32x2_t vcadd_rot270_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vcadd_rot270_f32((int8x8_t)__p0, (int8x8_t)__p1, 9); return __ret; } #else __ai __attribute__((target("v8.3a"))) float32x2_t vcadd_rot270_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (float32x2_t) __builtin_neon_vcadd_rot270_f32((int8x8_t)__rev0, (int8x8_t)__rev1, 9); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.3a"))) float32x2_t vcadd_rot90_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vcadd_rot90_f32((int8x8_t)__p0, (int8x8_t)__p1, 9); return __ret; } #else __ai __attribute__((target("v8.3a"))) float32x2_t vcadd_rot90_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (float32x2_t) __builtin_neon_vcadd_rot90_f32((int8x8_t)__rev0, (int8x8_t)__rev1, 9); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.3a"))) float32x4_t vcaddq_rot270_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vcaddq_rot270_f32((int8x16_t)__p0, (int8x16_t)__p1, 41); return __ret; } #else __ai __attribute__((target("v8.3a"))) float32x4_t vcaddq_rot270_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (float32x4_t) __builtin_neon_vcaddq_rot270_f32((int8x16_t)__rev0, (int8x16_t)__rev1, 41); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.3a"))) float32x4_t vcaddq_rot90_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vcaddq_rot90_f32((int8x16_t)__p0, (int8x16_t)__p1, 41); return __ret; } #else __ai __attribute__((target("v8.3a"))) float32x4_t vcaddq_rot90_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (float32x4_t) __builtin_neon_vcaddq_rot90_f32((int8x16_t)__rev0, (int8x16_t)__rev1, 41); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.3a"))) float32x4_t vcmlaq_f32(float32x4_t __p0, float32x4_t __p1, float32x4_t __p2) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vcmlaq_f32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); return __ret; } #else __ai __attribute__((target("v8.3a"))) float32x4_t vcmlaq_f32(float32x4_t __p0, float32x4_t __p1, float32x4_t __p2) { float32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); float32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = (float32x4_t) __builtin_neon_vcmlaq_f32((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 41); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai __attribute__((target("v8.3a"))) float32x4_t __noswap_vcmlaq_f32(float32x4_t __p0, float32x4_t __p1, float32x4_t __p2) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vcmlaq_f32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.3a"))) float32x2_t vcmla_f32(float32x2_t __p0, float32x2_t __p1, float32x2_t __p2) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vcmla_f32((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 9); return __ret; } #else __ai __attribute__((target("v8.3a"))) float32x2_t vcmla_f32(float32x2_t __p0, float32x2_t __p1, float32x2_t __p2) { float32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); float32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); __ret = (float32x2_t) __builtin_neon_vcmla_f32((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 9); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } __ai __attribute__((target("v8.3a"))) float32x2_t __noswap_vcmla_f32(float32x2_t __p0, float32x2_t __p1, float32x2_t __p2) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vcmla_f32((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 9); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ #define vcmla_lane_f32(__p0_177, __p1_177, __p2_177, __p3_177) __extension__ ({ \ float32x2_t __ret_177; \ float32x2_t __s0_177 = __p0_177; \ float32x2_t __s1_177 = __p1_177; \ float32x2_t __s2_177 = __p2_177; \ float32x2_t __reint_177 = __s2_177; \ uint64x1_t __reint1_177 = (uint64x1_t) {vget_lane_u64(*(uint64x1_t *) &__reint_177, __p3_177)}; \ __ret_177 = vcmla_f32(__s0_177, __s1_177, *(float32x2_t *) &__reint1_177); \ __ret_177; \ }) #else #define vcmla_lane_f32(__p0_178, __p1_178, __p2_178, __p3_178) __extension__ ({ \ float32x2_t __ret_178; \ float32x2_t __s0_178 = __p0_178; \ float32x2_t __s1_178 = __p1_178; \ float32x2_t __s2_178 = __p2_178; \ float32x2_t __rev0_178; __rev0_178 = __builtin_shufflevector(__s0_178, __s0_178, 1, 0); \ float32x2_t __rev1_178; __rev1_178 = __builtin_shufflevector(__s1_178, __s1_178, 1, 0); \ float32x2_t __rev2_178; __rev2_178 = __builtin_shufflevector(__s2_178, __s2_178, 1, 0); \ float32x2_t __reint_178 = __rev2_178; \ uint64x1_t __reint1_178 = (uint64x1_t) {vget_lane_u64(*(uint64x1_t *) &__reint_178, __p3_178)}; \ __ret_178 = __noswap_vcmla_f32(__rev0_178, __rev1_178, *(float32x2_t *) &__reint1_178); \ __ret_178 = __builtin_shufflevector(__ret_178, __ret_178, 1, 0); \ __ret_178; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcmlaq_lane_f32(__p0_179, __p1_179, __p2_179, __p3_179) __extension__ ({ \ float32x4_t __ret_179; \ float32x4_t __s0_179 = __p0_179; \ float32x4_t __s1_179 = __p1_179; \ float32x2_t __s2_179 = __p2_179; \ float32x2_t __reint_179 = __s2_179; \ uint64x2_t __reint1_179 = (uint64x2_t) {vget_lane_u64(*(uint64x1_t *) &__reint_179, __p3_179), vget_lane_u64(*(uint64x1_t *) &__reint_179, __p3_179)}; \ __ret_179 = vcmlaq_f32(__s0_179, __s1_179, *(float32x4_t *) &__reint1_179); \ __ret_179; \ }) #else #define vcmlaq_lane_f32(__p0_180, __p1_180, __p2_180, __p3_180) __extension__ ({ \ float32x4_t __ret_180; \ float32x4_t __s0_180 = __p0_180; \ float32x4_t __s1_180 = __p1_180; \ float32x2_t __s2_180 = __p2_180; \ float32x4_t __rev0_180; __rev0_180 = __builtin_shufflevector(__s0_180, __s0_180, 3, 2, 1, 0); \ float32x4_t __rev1_180; __rev1_180 = __builtin_shufflevector(__s1_180, __s1_180, 3, 2, 1, 0); \ float32x2_t __rev2_180; __rev2_180 = __builtin_shufflevector(__s2_180, __s2_180, 1, 0); \ float32x2_t __reint_180 = __rev2_180; \ uint64x2_t __reint1_180 = (uint64x2_t) {vget_lane_u64(*(uint64x1_t *) &__reint_180, __p3_180), vget_lane_u64(*(uint64x1_t *) &__reint_180, __p3_180)}; \ __ret_180 = __noswap_vcmlaq_f32(__rev0_180, __rev1_180, *(float32x4_t *) &__reint1_180); \ __ret_180 = __builtin_shufflevector(__ret_180, __ret_180, 3, 2, 1, 0); \ __ret_180; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcmla_laneq_f32(__p0_181, __p1_181, __p2_181, __p3_181) __extension__ ({ \ float32x2_t __ret_181; \ float32x2_t __s0_181 = __p0_181; \ float32x2_t __s1_181 = __p1_181; \ float32x4_t __s2_181 = __p2_181; \ float32x4_t __reint_181 = __s2_181; \ uint64x1_t __reint1_181 = (uint64x1_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_181, __p3_181)}; \ __ret_181 = vcmla_f32(__s0_181, __s1_181, *(float32x2_t *) &__reint1_181); \ __ret_181; \ }) #else #define vcmla_laneq_f32(__p0_182, __p1_182, __p2_182, __p3_182) __extension__ ({ \ float32x2_t __ret_182; \ float32x2_t __s0_182 = __p0_182; \ float32x2_t __s1_182 = __p1_182; \ float32x4_t __s2_182 = __p2_182; \ float32x2_t __rev0_182; __rev0_182 = __builtin_shufflevector(__s0_182, __s0_182, 1, 0); \ float32x2_t __rev1_182; __rev1_182 = __builtin_shufflevector(__s1_182, __s1_182, 1, 0); \ float32x4_t __rev2_182; __rev2_182 = __builtin_shufflevector(__s2_182, __s2_182, 3, 2, 1, 0); \ float32x4_t __reint_182 = __rev2_182; \ uint64x1_t __reint1_182 = (uint64x1_t) {__noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_182, __p3_182)}; \ __ret_182 = __noswap_vcmla_f32(__rev0_182, __rev1_182, *(float32x2_t *) &__reint1_182); \ __ret_182 = __builtin_shufflevector(__ret_182, __ret_182, 1, 0); \ __ret_182; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcmlaq_laneq_f32(__p0_183, __p1_183, __p2_183, __p3_183) __extension__ ({ \ float32x4_t __ret_183; \ float32x4_t __s0_183 = __p0_183; \ float32x4_t __s1_183 = __p1_183; \ float32x4_t __s2_183 = __p2_183; \ float32x4_t __reint_183 = __s2_183; \ uint64x2_t __reint1_183 = (uint64x2_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_183, __p3_183), vgetq_lane_u64(*(uint64x2_t *) &__reint_183, __p3_183)}; \ __ret_183 = vcmlaq_f32(__s0_183, __s1_183, *(float32x4_t *) &__reint1_183); \ __ret_183; \ }) #else #define vcmlaq_laneq_f32(__p0_184, __p1_184, __p2_184, __p3_184) __extension__ ({ \ float32x4_t __ret_184; \ float32x4_t __s0_184 = __p0_184; \ float32x4_t __s1_184 = __p1_184; \ float32x4_t __s2_184 = __p2_184; \ float32x4_t __rev0_184; __rev0_184 = __builtin_shufflevector(__s0_184, __s0_184, 3, 2, 1, 0); \ float32x4_t __rev1_184; __rev1_184 = __builtin_shufflevector(__s1_184, __s1_184, 3, 2, 1, 0); \ float32x4_t __rev2_184; __rev2_184 = __builtin_shufflevector(__s2_184, __s2_184, 3, 2, 1, 0); \ float32x4_t __reint_184 = __rev2_184; \ uint64x2_t __reint1_184 = (uint64x2_t) {__noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_184, __p3_184), __noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_184, __p3_184)}; \ __ret_184 = __noswap_vcmlaq_f32(__rev0_184, __rev1_184, *(float32x4_t *) &__reint1_184); \ __ret_184 = __builtin_shufflevector(__ret_184, __ret_184, 3, 2, 1, 0); \ __ret_184; \ }) #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.3a"))) float32x4_t vcmlaq_rot180_f32(float32x4_t __p0, float32x4_t __p1, float32x4_t __p2) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vcmlaq_rot180_f32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); return __ret; } #else __ai __attribute__((target("v8.3a"))) float32x4_t vcmlaq_rot180_f32(float32x4_t __p0, float32x4_t __p1, float32x4_t __p2) { float32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); float32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = (float32x4_t) __builtin_neon_vcmlaq_rot180_f32((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 41); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai __attribute__((target("v8.3a"))) float32x4_t __noswap_vcmlaq_rot180_f32(float32x4_t __p0, float32x4_t __p1, float32x4_t __p2) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vcmlaq_rot180_f32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.3a"))) float32x2_t vcmla_rot180_f32(float32x2_t __p0, float32x2_t __p1, float32x2_t __p2) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vcmla_rot180_f32((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 9); return __ret; } #else __ai __attribute__((target("v8.3a"))) float32x2_t vcmla_rot180_f32(float32x2_t __p0, float32x2_t __p1, float32x2_t __p2) { float32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); float32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); __ret = (float32x2_t) __builtin_neon_vcmla_rot180_f32((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 9); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } __ai __attribute__((target("v8.3a"))) float32x2_t __noswap_vcmla_rot180_f32(float32x2_t __p0, float32x2_t __p1, float32x2_t __p2) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vcmla_rot180_f32((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 9); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ #define vcmla_rot180_lane_f32(__p0_185, __p1_185, __p2_185, __p3_185) __extension__ ({ \ float32x2_t __ret_185; \ float32x2_t __s0_185 = __p0_185; \ float32x2_t __s1_185 = __p1_185; \ float32x2_t __s2_185 = __p2_185; \ float32x2_t __reint_185 = __s2_185; \ uint64x1_t __reint1_185 = (uint64x1_t) {vget_lane_u64(*(uint64x1_t *) &__reint_185, __p3_185)}; \ __ret_185 = vcmla_rot180_f32(__s0_185, __s1_185, *(float32x2_t *) &__reint1_185); \ __ret_185; \ }) #else #define vcmla_rot180_lane_f32(__p0_186, __p1_186, __p2_186, __p3_186) __extension__ ({ \ float32x2_t __ret_186; \ float32x2_t __s0_186 = __p0_186; \ float32x2_t __s1_186 = __p1_186; \ float32x2_t __s2_186 = __p2_186; \ float32x2_t __rev0_186; __rev0_186 = __builtin_shufflevector(__s0_186, __s0_186, 1, 0); \ float32x2_t __rev1_186; __rev1_186 = __builtin_shufflevector(__s1_186, __s1_186, 1, 0); \ float32x2_t __rev2_186; __rev2_186 = __builtin_shufflevector(__s2_186, __s2_186, 1, 0); \ float32x2_t __reint_186 = __rev2_186; \ uint64x1_t __reint1_186 = (uint64x1_t) {vget_lane_u64(*(uint64x1_t *) &__reint_186, __p3_186)}; \ __ret_186 = __noswap_vcmla_rot180_f32(__rev0_186, __rev1_186, *(float32x2_t *) &__reint1_186); \ __ret_186 = __builtin_shufflevector(__ret_186, __ret_186, 1, 0); \ __ret_186; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcmlaq_rot180_lane_f32(__p0_187, __p1_187, __p2_187, __p3_187) __extension__ ({ \ float32x4_t __ret_187; \ float32x4_t __s0_187 = __p0_187; \ float32x4_t __s1_187 = __p1_187; \ float32x2_t __s2_187 = __p2_187; \ float32x2_t __reint_187 = __s2_187; \ uint64x2_t __reint1_187 = (uint64x2_t) {vget_lane_u64(*(uint64x1_t *) &__reint_187, __p3_187), vget_lane_u64(*(uint64x1_t *) &__reint_187, __p3_187)}; \ __ret_187 = vcmlaq_rot180_f32(__s0_187, __s1_187, *(float32x4_t *) &__reint1_187); \ __ret_187; \ }) #else #define vcmlaq_rot180_lane_f32(__p0_188, __p1_188, __p2_188, __p3_188) __extension__ ({ \ float32x4_t __ret_188; \ float32x4_t __s0_188 = __p0_188; \ float32x4_t __s1_188 = __p1_188; \ float32x2_t __s2_188 = __p2_188; \ float32x4_t __rev0_188; __rev0_188 = __builtin_shufflevector(__s0_188, __s0_188, 3, 2, 1, 0); \ float32x4_t __rev1_188; __rev1_188 = __builtin_shufflevector(__s1_188, __s1_188, 3, 2, 1, 0); \ float32x2_t __rev2_188; __rev2_188 = __builtin_shufflevector(__s2_188, __s2_188, 1, 0); \ float32x2_t __reint_188 = __rev2_188; \ uint64x2_t __reint1_188 = (uint64x2_t) {vget_lane_u64(*(uint64x1_t *) &__reint_188, __p3_188), vget_lane_u64(*(uint64x1_t *) &__reint_188, __p3_188)}; \ __ret_188 = __noswap_vcmlaq_rot180_f32(__rev0_188, __rev1_188, *(float32x4_t *) &__reint1_188); \ __ret_188 = __builtin_shufflevector(__ret_188, __ret_188, 3, 2, 1, 0); \ __ret_188; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcmla_rot180_laneq_f32(__p0_189, __p1_189, __p2_189, __p3_189) __extension__ ({ \ float32x2_t __ret_189; \ float32x2_t __s0_189 = __p0_189; \ float32x2_t __s1_189 = __p1_189; \ float32x4_t __s2_189 = __p2_189; \ float32x4_t __reint_189 = __s2_189; \ uint64x1_t __reint1_189 = (uint64x1_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_189, __p3_189)}; \ __ret_189 = vcmla_rot180_f32(__s0_189, __s1_189, *(float32x2_t *) &__reint1_189); \ __ret_189; \ }) #else #define vcmla_rot180_laneq_f32(__p0_190, __p1_190, __p2_190, __p3_190) __extension__ ({ \ float32x2_t __ret_190; \ float32x2_t __s0_190 = __p0_190; \ float32x2_t __s1_190 = __p1_190; \ float32x4_t __s2_190 = __p2_190; \ float32x2_t __rev0_190; __rev0_190 = __builtin_shufflevector(__s0_190, __s0_190, 1, 0); \ float32x2_t __rev1_190; __rev1_190 = __builtin_shufflevector(__s1_190, __s1_190, 1, 0); \ float32x4_t __rev2_190; __rev2_190 = __builtin_shufflevector(__s2_190, __s2_190, 3, 2, 1, 0); \ float32x4_t __reint_190 = __rev2_190; \ uint64x1_t __reint1_190 = (uint64x1_t) {__noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_190, __p3_190)}; \ __ret_190 = __noswap_vcmla_rot180_f32(__rev0_190, __rev1_190, *(float32x2_t *) &__reint1_190); \ __ret_190 = __builtin_shufflevector(__ret_190, __ret_190, 1, 0); \ __ret_190; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcmlaq_rot180_laneq_f32(__p0_191, __p1_191, __p2_191, __p3_191) __extension__ ({ \ float32x4_t __ret_191; \ float32x4_t __s0_191 = __p0_191; \ float32x4_t __s1_191 = __p1_191; \ float32x4_t __s2_191 = __p2_191; \ float32x4_t __reint_191 = __s2_191; \ uint64x2_t __reint1_191 = (uint64x2_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_191, __p3_191), vgetq_lane_u64(*(uint64x2_t *) &__reint_191, __p3_191)}; \ __ret_191 = vcmlaq_rot180_f32(__s0_191, __s1_191, *(float32x4_t *) &__reint1_191); \ __ret_191; \ }) #else #define vcmlaq_rot180_laneq_f32(__p0_192, __p1_192, __p2_192, __p3_192) __extension__ ({ \ float32x4_t __ret_192; \ float32x4_t __s0_192 = __p0_192; \ float32x4_t __s1_192 = __p1_192; \ float32x4_t __s2_192 = __p2_192; \ float32x4_t __rev0_192; __rev0_192 = __builtin_shufflevector(__s0_192, __s0_192, 3, 2, 1, 0); \ float32x4_t __rev1_192; __rev1_192 = __builtin_shufflevector(__s1_192, __s1_192, 3, 2, 1, 0); \ float32x4_t __rev2_192; __rev2_192 = __builtin_shufflevector(__s2_192, __s2_192, 3, 2, 1, 0); \ float32x4_t __reint_192 = __rev2_192; \ uint64x2_t __reint1_192 = (uint64x2_t) {__noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_192, __p3_192), __noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_192, __p3_192)}; \ __ret_192 = __noswap_vcmlaq_rot180_f32(__rev0_192, __rev1_192, *(float32x4_t *) &__reint1_192); \ __ret_192 = __builtin_shufflevector(__ret_192, __ret_192, 3, 2, 1, 0); \ __ret_192; \ }) #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.3a"))) float32x4_t vcmlaq_rot270_f32(float32x4_t __p0, float32x4_t __p1, float32x4_t __p2) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vcmlaq_rot270_f32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); return __ret; } #else __ai __attribute__((target("v8.3a"))) float32x4_t vcmlaq_rot270_f32(float32x4_t __p0, float32x4_t __p1, float32x4_t __p2) { float32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); float32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = (float32x4_t) __builtin_neon_vcmlaq_rot270_f32((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 41); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai __attribute__((target("v8.3a"))) float32x4_t __noswap_vcmlaq_rot270_f32(float32x4_t __p0, float32x4_t __p1, float32x4_t __p2) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vcmlaq_rot270_f32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.3a"))) float32x2_t vcmla_rot270_f32(float32x2_t __p0, float32x2_t __p1, float32x2_t __p2) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vcmla_rot270_f32((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 9); return __ret; } #else __ai __attribute__((target("v8.3a"))) float32x2_t vcmla_rot270_f32(float32x2_t __p0, float32x2_t __p1, float32x2_t __p2) { float32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); float32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); __ret = (float32x2_t) __builtin_neon_vcmla_rot270_f32((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 9); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } __ai __attribute__((target("v8.3a"))) float32x2_t __noswap_vcmla_rot270_f32(float32x2_t __p0, float32x2_t __p1, float32x2_t __p2) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vcmla_rot270_f32((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 9); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ #define vcmla_rot270_lane_f32(__p0_193, __p1_193, __p2_193, __p3_193) __extension__ ({ \ float32x2_t __ret_193; \ float32x2_t __s0_193 = __p0_193; \ float32x2_t __s1_193 = __p1_193; \ float32x2_t __s2_193 = __p2_193; \ float32x2_t __reint_193 = __s2_193; \ uint64x1_t __reint1_193 = (uint64x1_t) {vget_lane_u64(*(uint64x1_t *) &__reint_193, __p3_193)}; \ __ret_193 = vcmla_rot270_f32(__s0_193, __s1_193, *(float32x2_t *) &__reint1_193); \ __ret_193; \ }) #else #define vcmla_rot270_lane_f32(__p0_194, __p1_194, __p2_194, __p3_194) __extension__ ({ \ float32x2_t __ret_194; \ float32x2_t __s0_194 = __p0_194; \ float32x2_t __s1_194 = __p1_194; \ float32x2_t __s2_194 = __p2_194; \ float32x2_t __rev0_194; __rev0_194 = __builtin_shufflevector(__s0_194, __s0_194, 1, 0); \ float32x2_t __rev1_194; __rev1_194 = __builtin_shufflevector(__s1_194, __s1_194, 1, 0); \ float32x2_t __rev2_194; __rev2_194 = __builtin_shufflevector(__s2_194, __s2_194, 1, 0); \ float32x2_t __reint_194 = __rev2_194; \ uint64x1_t __reint1_194 = (uint64x1_t) {vget_lane_u64(*(uint64x1_t *) &__reint_194, __p3_194)}; \ __ret_194 = __noswap_vcmla_rot270_f32(__rev0_194, __rev1_194, *(float32x2_t *) &__reint1_194); \ __ret_194 = __builtin_shufflevector(__ret_194, __ret_194, 1, 0); \ __ret_194; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcmlaq_rot270_lane_f32(__p0_195, __p1_195, __p2_195, __p3_195) __extension__ ({ \ float32x4_t __ret_195; \ float32x4_t __s0_195 = __p0_195; \ float32x4_t __s1_195 = __p1_195; \ float32x2_t __s2_195 = __p2_195; \ float32x2_t __reint_195 = __s2_195; \ uint64x2_t __reint1_195 = (uint64x2_t) {vget_lane_u64(*(uint64x1_t *) &__reint_195, __p3_195), vget_lane_u64(*(uint64x1_t *) &__reint_195, __p3_195)}; \ __ret_195 = vcmlaq_rot270_f32(__s0_195, __s1_195, *(float32x4_t *) &__reint1_195); \ __ret_195; \ }) #else #define vcmlaq_rot270_lane_f32(__p0_196, __p1_196, __p2_196, __p3_196) __extension__ ({ \ float32x4_t __ret_196; \ float32x4_t __s0_196 = __p0_196; \ float32x4_t __s1_196 = __p1_196; \ float32x2_t __s2_196 = __p2_196; \ float32x4_t __rev0_196; __rev0_196 = __builtin_shufflevector(__s0_196, __s0_196, 3, 2, 1, 0); \ float32x4_t __rev1_196; __rev1_196 = __builtin_shufflevector(__s1_196, __s1_196, 3, 2, 1, 0); \ float32x2_t __rev2_196; __rev2_196 = __builtin_shufflevector(__s2_196, __s2_196, 1, 0); \ float32x2_t __reint_196 = __rev2_196; \ uint64x2_t __reint1_196 = (uint64x2_t) {vget_lane_u64(*(uint64x1_t *) &__reint_196, __p3_196), vget_lane_u64(*(uint64x1_t *) &__reint_196, __p3_196)}; \ __ret_196 = __noswap_vcmlaq_rot270_f32(__rev0_196, __rev1_196, *(float32x4_t *) &__reint1_196); \ __ret_196 = __builtin_shufflevector(__ret_196, __ret_196, 3, 2, 1, 0); \ __ret_196; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcmla_rot270_laneq_f32(__p0_197, __p1_197, __p2_197, __p3_197) __extension__ ({ \ float32x2_t __ret_197; \ float32x2_t __s0_197 = __p0_197; \ float32x2_t __s1_197 = __p1_197; \ float32x4_t __s2_197 = __p2_197; \ float32x4_t __reint_197 = __s2_197; \ uint64x1_t __reint1_197 = (uint64x1_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_197, __p3_197)}; \ __ret_197 = vcmla_rot270_f32(__s0_197, __s1_197, *(float32x2_t *) &__reint1_197); \ __ret_197; \ }) #else #define vcmla_rot270_laneq_f32(__p0_198, __p1_198, __p2_198, __p3_198) __extension__ ({ \ float32x2_t __ret_198; \ float32x2_t __s0_198 = __p0_198; \ float32x2_t __s1_198 = __p1_198; \ float32x4_t __s2_198 = __p2_198; \ float32x2_t __rev0_198; __rev0_198 = __builtin_shufflevector(__s0_198, __s0_198, 1, 0); \ float32x2_t __rev1_198; __rev1_198 = __builtin_shufflevector(__s1_198, __s1_198, 1, 0); \ float32x4_t __rev2_198; __rev2_198 = __builtin_shufflevector(__s2_198, __s2_198, 3, 2, 1, 0); \ float32x4_t __reint_198 = __rev2_198; \ uint64x1_t __reint1_198 = (uint64x1_t) {__noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_198, __p3_198)}; \ __ret_198 = __noswap_vcmla_rot270_f32(__rev0_198, __rev1_198, *(float32x2_t *) &__reint1_198); \ __ret_198 = __builtin_shufflevector(__ret_198, __ret_198, 1, 0); \ __ret_198; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcmlaq_rot270_laneq_f32(__p0_199, __p1_199, __p2_199, __p3_199) __extension__ ({ \ float32x4_t __ret_199; \ float32x4_t __s0_199 = __p0_199; \ float32x4_t __s1_199 = __p1_199; \ float32x4_t __s2_199 = __p2_199; \ float32x4_t __reint_199 = __s2_199; \ uint64x2_t __reint1_199 = (uint64x2_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_199, __p3_199), vgetq_lane_u64(*(uint64x2_t *) &__reint_199, __p3_199)}; \ __ret_199 = vcmlaq_rot270_f32(__s0_199, __s1_199, *(float32x4_t *) &__reint1_199); \ __ret_199; \ }) #else #define vcmlaq_rot270_laneq_f32(__p0_200, __p1_200, __p2_200, __p3_200) __extension__ ({ \ float32x4_t __ret_200; \ float32x4_t __s0_200 = __p0_200; \ float32x4_t __s1_200 = __p1_200; \ float32x4_t __s2_200 = __p2_200; \ float32x4_t __rev0_200; __rev0_200 = __builtin_shufflevector(__s0_200, __s0_200, 3, 2, 1, 0); \ float32x4_t __rev1_200; __rev1_200 = __builtin_shufflevector(__s1_200, __s1_200, 3, 2, 1, 0); \ float32x4_t __rev2_200; __rev2_200 = __builtin_shufflevector(__s2_200, __s2_200, 3, 2, 1, 0); \ float32x4_t __reint_200 = __rev2_200; \ uint64x2_t __reint1_200 = (uint64x2_t) {__noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_200, __p3_200), __noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_200, __p3_200)}; \ __ret_200 = __noswap_vcmlaq_rot270_f32(__rev0_200, __rev1_200, *(float32x4_t *) &__reint1_200); \ __ret_200 = __builtin_shufflevector(__ret_200, __ret_200, 3, 2, 1, 0); \ __ret_200; \ }) #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.3a"))) float32x4_t vcmlaq_rot90_f32(float32x4_t __p0, float32x4_t __p1, float32x4_t __p2) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vcmlaq_rot90_f32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); return __ret; } #else __ai __attribute__((target("v8.3a"))) float32x4_t vcmlaq_rot90_f32(float32x4_t __p0, float32x4_t __p1, float32x4_t __p2) { float32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); float32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = (float32x4_t) __builtin_neon_vcmlaq_rot90_f32((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 41); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai __attribute__((target("v8.3a"))) float32x4_t __noswap_vcmlaq_rot90_f32(float32x4_t __p0, float32x4_t __p1, float32x4_t __p2) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vcmlaq_rot90_f32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.3a"))) float32x2_t vcmla_rot90_f32(float32x2_t __p0, float32x2_t __p1, float32x2_t __p2) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vcmla_rot90_f32((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 9); return __ret; } #else __ai __attribute__((target("v8.3a"))) float32x2_t vcmla_rot90_f32(float32x2_t __p0, float32x2_t __p1, float32x2_t __p2) { float32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); float32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); __ret = (float32x2_t) __builtin_neon_vcmla_rot90_f32((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 9); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } __ai __attribute__((target("v8.3a"))) float32x2_t __noswap_vcmla_rot90_f32(float32x2_t __p0, float32x2_t __p1, float32x2_t __p2) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vcmla_rot90_f32((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 9); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ #define vcmla_rot90_lane_f32(__p0_201, __p1_201, __p2_201, __p3_201) __extension__ ({ \ float32x2_t __ret_201; \ float32x2_t __s0_201 = __p0_201; \ float32x2_t __s1_201 = __p1_201; \ float32x2_t __s2_201 = __p2_201; \ float32x2_t __reint_201 = __s2_201; \ uint64x1_t __reint1_201 = (uint64x1_t) {vget_lane_u64(*(uint64x1_t *) &__reint_201, __p3_201)}; \ __ret_201 = vcmla_rot90_f32(__s0_201, __s1_201, *(float32x2_t *) &__reint1_201); \ __ret_201; \ }) #else #define vcmla_rot90_lane_f32(__p0_202, __p1_202, __p2_202, __p3_202) __extension__ ({ \ float32x2_t __ret_202; \ float32x2_t __s0_202 = __p0_202; \ float32x2_t __s1_202 = __p1_202; \ float32x2_t __s2_202 = __p2_202; \ float32x2_t __rev0_202; __rev0_202 = __builtin_shufflevector(__s0_202, __s0_202, 1, 0); \ float32x2_t __rev1_202; __rev1_202 = __builtin_shufflevector(__s1_202, __s1_202, 1, 0); \ float32x2_t __rev2_202; __rev2_202 = __builtin_shufflevector(__s2_202, __s2_202, 1, 0); \ float32x2_t __reint_202 = __rev2_202; \ uint64x1_t __reint1_202 = (uint64x1_t) {vget_lane_u64(*(uint64x1_t *) &__reint_202, __p3_202)}; \ __ret_202 = __noswap_vcmla_rot90_f32(__rev0_202, __rev1_202, *(float32x2_t *) &__reint1_202); \ __ret_202 = __builtin_shufflevector(__ret_202, __ret_202, 1, 0); \ __ret_202; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcmlaq_rot90_lane_f32(__p0_203, __p1_203, __p2_203, __p3_203) __extension__ ({ \ float32x4_t __ret_203; \ float32x4_t __s0_203 = __p0_203; \ float32x4_t __s1_203 = __p1_203; \ float32x2_t __s2_203 = __p2_203; \ float32x2_t __reint_203 = __s2_203; \ uint64x2_t __reint1_203 = (uint64x2_t) {vget_lane_u64(*(uint64x1_t *) &__reint_203, __p3_203), vget_lane_u64(*(uint64x1_t *) &__reint_203, __p3_203)}; \ __ret_203 = vcmlaq_rot90_f32(__s0_203, __s1_203, *(float32x4_t *) &__reint1_203); \ __ret_203; \ }) #else #define vcmlaq_rot90_lane_f32(__p0_204, __p1_204, __p2_204, __p3_204) __extension__ ({ \ float32x4_t __ret_204; \ float32x4_t __s0_204 = __p0_204; \ float32x4_t __s1_204 = __p1_204; \ float32x2_t __s2_204 = __p2_204; \ float32x4_t __rev0_204; __rev0_204 = __builtin_shufflevector(__s0_204, __s0_204, 3, 2, 1, 0); \ float32x4_t __rev1_204; __rev1_204 = __builtin_shufflevector(__s1_204, __s1_204, 3, 2, 1, 0); \ float32x2_t __rev2_204; __rev2_204 = __builtin_shufflevector(__s2_204, __s2_204, 1, 0); \ float32x2_t __reint_204 = __rev2_204; \ uint64x2_t __reint1_204 = (uint64x2_t) {vget_lane_u64(*(uint64x1_t *) &__reint_204, __p3_204), vget_lane_u64(*(uint64x1_t *) &__reint_204, __p3_204)}; \ __ret_204 = __noswap_vcmlaq_rot90_f32(__rev0_204, __rev1_204, *(float32x4_t *) &__reint1_204); \ __ret_204 = __builtin_shufflevector(__ret_204, __ret_204, 3, 2, 1, 0); \ __ret_204; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcmla_rot90_laneq_f32(__p0_205, __p1_205, __p2_205, __p3_205) __extension__ ({ \ float32x2_t __ret_205; \ float32x2_t __s0_205 = __p0_205; \ float32x2_t __s1_205 = __p1_205; \ float32x4_t __s2_205 = __p2_205; \ float32x4_t __reint_205 = __s2_205; \ uint64x1_t __reint1_205 = (uint64x1_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_205, __p3_205)}; \ __ret_205 = vcmla_rot90_f32(__s0_205, __s1_205, *(float32x2_t *) &__reint1_205); \ __ret_205; \ }) #else #define vcmla_rot90_laneq_f32(__p0_206, __p1_206, __p2_206, __p3_206) __extension__ ({ \ float32x2_t __ret_206; \ float32x2_t __s0_206 = __p0_206; \ float32x2_t __s1_206 = __p1_206; \ float32x4_t __s2_206 = __p2_206; \ float32x2_t __rev0_206; __rev0_206 = __builtin_shufflevector(__s0_206, __s0_206, 1, 0); \ float32x2_t __rev1_206; __rev1_206 = __builtin_shufflevector(__s1_206, __s1_206, 1, 0); \ float32x4_t __rev2_206; __rev2_206 = __builtin_shufflevector(__s2_206, __s2_206, 3, 2, 1, 0); \ float32x4_t __reint_206 = __rev2_206; \ uint64x1_t __reint1_206 = (uint64x1_t) {__noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_206, __p3_206)}; \ __ret_206 = __noswap_vcmla_rot90_f32(__rev0_206, __rev1_206, *(float32x2_t *) &__reint1_206); \ __ret_206 = __builtin_shufflevector(__ret_206, __ret_206, 1, 0); \ __ret_206; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcmlaq_rot90_laneq_f32(__p0_207, __p1_207, __p2_207, __p3_207) __extension__ ({ \ float32x4_t __ret_207; \ float32x4_t __s0_207 = __p0_207; \ float32x4_t __s1_207 = __p1_207; \ float32x4_t __s2_207 = __p2_207; \ float32x4_t __reint_207 = __s2_207; \ uint64x2_t __reint1_207 = (uint64x2_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_207, __p3_207), vgetq_lane_u64(*(uint64x2_t *) &__reint_207, __p3_207)}; \ __ret_207 = vcmlaq_rot90_f32(__s0_207, __s1_207, *(float32x4_t *) &__reint1_207); \ __ret_207; \ }) #else #define vcmlaq_rot90_laneq_f32(__p0_208, __p1_208, __p2_208, __p3_208) __extension__ ({ \ float32x4_t __ret_208; \ float32x4_t __s0_208 = __p0_208; \ float32x4_t __s1_208 = __p1_208; \ float32x4_t __s2_208 = __p2_208; \ float32x4_t __rev0_208; __rev0_208 = __builtin_shufflevector(__s0_208, __s0_208, 3, 2, 1, 0); \ float32x4_t __rev1_208; __rev1_208 = __builtin_shufflevector(__s1_208, __s1_208, 3, 2, 1, 0); \ float32x4_t __rev2_208; __rev2_208 = __builtin_shufflevector(__s2_208, __s2_208, 3, 2, 1, 0); \ float32x4_t __reint_208 = __rev2_208; \ uint64x2_t __reint1_208 = (uint64x2_t) {__noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_208, __p3_208), __noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_208, __p3_208)}; \ __ret_208 = __noswap_vcmlaq_rot90_f32(__rev0_208, __rev1_208, *(float32x4_t *) &__reint1_208); \ __ret_208 = __builtin_shufflevector(__ret_208, __ret_208, 3, 2, 1, 0); \ __ret_208; \ }) #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.3a,fullfp16"))) float16x4_t vcadd_rot270_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; __ret = (float16x4_t) __builtin_neon_vcadd_rot270_f16((int8x8_t)__p0, (int8x8_t)__p1, 8); return __ret; } #else __ai __attribute__((target("v8.3a,fullfp16"))) float16x4_t vcadd_rot270_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (float16x4_t) __builtin_neon_vcadd_rot270_f16((int8x8_t)__rev0, (int8x8_t)__rev1, 8); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.3a,fullfp16"))) float16x4_t vcadd_rot90_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; __ret = (float16x4_t) __builtin_neon_vcadd_rot90_f16((int8x8_t)__p0, (int8x8_t)__p1, 8); return __ret; } #else __ai __attribute__((target("v8.3a,fullfp16"))) float16x4_t vcadd_rot90_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (float16x4_t) __builtin_neon_vcadd_rot90_f16((int8x8_t)__rev0, (int8x8_t)__rev1, 8); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.3a,fullfp16"))) float16x8_t vcaddq_rot270_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; __ret = (float16x8_t) __builtin_neon_vcaddq_rot270_f16((int8x16_t)__p0, (int8x16_t)__p1, 40); return __ret; } #else __ai __attribute__((target("v8.3a,fullfp16"))) float16x8_t vcaddq_rot270_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (float16x8_t) __builtin_neon_vcaddq_rot270_f16((int8x16_t)__rev0, (int8x16_t)__rev1, 40); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.3a,fullfp16"))) float16x8_t vcaddq_rot90_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; __ret = (float16x8_t) __builtin_neon_vcaddq_rot90_f16((int8x16_t)__p0, (int8x16_t)__p1, 40); return __ret; } #else __ai __attribute__((target("v8.3a,fullfp16"))) float16x8_t vcaddq_rot90_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (float16x8_t) __builtin_neon_vcaddq_rot90_f16((int8x16_t)__rev0, (int8x16_t)__rev1, 40); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.3a,fullfp16"))) float16x8_t vcmlaq_f16(float16x8_t __p0, float16x8_t __p1, float16x8_t __p2) { float16x8_t __ret; __ret = (float16x8_t) __builtin_neon_vcmlaq_f16((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 40); return __ret; } #else __ai __attribute__((target("v8.3a,fullfp16"))) float16x8_t vcmlaq_f16(float16x8_t __p0, float16x8_t __p1, float16x8_t __p2) { float16x8_t __ret; float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (float16x8_t) __builtin_neon_vcmlaq_f16((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 40); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } __ai __attribute__((target("v8.3a,fullfp16"))) float16x8_t __noswap_vcmlaq_f16(float16x8_t __p0, float16x8_t __p1, float16x8_t __p2) { float16x8_t __ret; __ret = (float16x8_t) __builtin_neon_vcmlaq_f16((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 40); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.3a,fullfp16"))) float16x4_t vcmla_f16(float16x4_t __p0, float16x4_t __p1, float16x4_t __p2) { float16x4_t __ret; __ret = (float16x4_t) __builtin_neon_vcmla_f16((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 8); return __ret; } #else __ai __attribute__((target("v8.3a,fullfp16"))) float16x4_t vcmla_f16(float16x4_t __p0, float16x4_t __p1, float16x4_t __p2) { float16x4_t __ret; float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); float16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = (float16x4_t) __builtin_neon_vcmla_f16((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 8); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai __attribute__((target("v8.3a,fullfp16"))) float16x4_t __noswap_vcmla_f16(float16x4_t __p0, float16x4_t __p1, float16x4_t __p2) { float16x4_t __ret; __ret = (float16x4_t) __builtin_neon_vcmla_f16((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 8); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ #define vcmla_lane_f16(__p0_209, __p1_209, __p2_209, __p3_209) __extension__ ({ \ float16x4_t __ret_209; \ float16x4_t __s0_209 = __p0_209; \ float16x4_t __s1_209 = __p1_209; \ float16x4_t __s2_209 = __p2_209; \ float16x4_t __reint_209 = __s2_209; \ uint32x2_t __reint1_209 = (uint32x2_t) {vget_lane_u32(*(uint32x2_t *) &__reint_209, __p3_209), vget_lane_u32(*(uint32x2_t *) &__reint_209, __p3_209)}; \ __ret_209 = vcmla_f16(__s0_209, __s1_209, *(float16x4_t *) &__reint1_209); \ __ret_209; \ }) #else #define vcmla_lane_f16(__p0_210, __p1_210, __p2_210, __p3_210) __extension__ ({ \ float16x4_t __ret_210; \ float16x4_t __s0_210 = __p0_210; \ float16x4_t __s1_210 = __p1_210; \ float16x4_t __s2_210 = __p2_210; \ float16x4_t __rev0_210; __rev0_210 = __builtin_shufflevector(__s0_210, __s0_210, 3, 2, 1, 0); \ float16x4_t __rev1_210; __rev1_210 = __builtin_shufflevector(__s1_210, __s1_210, 3, 2, 1, 0); \ float16x4_t __rev2_210; __rev2_210 = __builtin_shufflevector(__s2_210, __s2_210, 3, 2, 1, 0); \ float16x4_t __reint_210 = __rev2_210; \ uint32x2_t __reint1_210 = (uint32x2_t) {__noswap_vget_lane_u32(*(uint32x2_t *) &__reint_210, __p3_210), __noswap_vget_lane_u32(*(uint32x2_t *) &__reint_210, __p3_210)}; \ __ret_210 = __noswap_vcmla_f16(__rev0_210, __rev1_210, *(float16x4_t *) &__reint1_210); \ __ret_210 = __builtin_shufflevector(__ret_210, __ret_210, 3, 2, 1, 0); \ __ret_210; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcmlaq_lane_f16(__p0_211, __p1_211, __p2_211, __p3_211) __extension__ ({ \ float16x8_t __ret_211; \ float16x8_t __s0_211 = __p0_211; \ float16x8_t __s1_211 = __p1_211; \ float16x4_t __s2_211 = __p2_211; \ float16x4_t __reint_211 = __s2_211; \ uint32x4_t __reint1_211 = (uint32x4_t) {vget_lane_u32(*(uint32x2_t *) &__reint_211, __p3_211), vget_lane_u32(*(uint32x2_t *) &__reint_211, __p3_211), vget_lane_u32(*(uint32x2_t *) &__reint_211, __p3_211), vget_lane_u32(*(uint32x2_t *) &__reint_211, __p3_211)}; \ __ret_211 = vcmlaq_f16(__s0_211, __s1_211, *(float16x8_t *) &__reint1_211); \ __ret_211; \ }) #else #define vcmlaq_lane_f16(__p0_212, __p1_212, __p2_212, __p3_212) __extension__ ({ \ float16x8_t __ret_212; \ float16x8_t __s0_212 = __p0_212; \ float16x8_t __s1_212 = __p1_212; \ float16x4_t __s2_212 = __p2_212; \ float16x8_t __rev0_212; __rev0_212 = __builtin_shufflevector(__s0_212, __s0_212, 7, 6, 5, 4, 3, 2, 1, 0); \ float16x8_t __rev1_212; __rev1_212 = __builtin_shufflevector(__s1_212, __s1_212, 7, 6, 5, 4, 3, 2, 1, 0); \ float16x4_t __rev2_212; __rev2_212 = __builtin_shufflevector(__s2_212, __s2_212, 3, 2, 1, 0); \ float16x4_t __reint_212 = __rev2_212; \ uint32x4_t __reint1_212 = (uint32x4_t) {__noswap_vget_lane_u32(*(uint32x2_t *) &__reint_212, __p3_212), __noswap_vget_lane_u32(*(uint32x2_t *) &__reint_212, __p3_212), __noswap_vget_lane_u32(*(uint32x2_t *) &__reint_212, __p3_212), __noswap_vget_lane_u32(*(uint32x2_t *) &__reint_212, __p3_212)}; \ __ret_212 = __noswap_vcmlaq_f16(__rev0_212, __rev1_212, *(float16x8_t *) &__reint1_212); \ __ret_212 = __builtin_shufflevector(__ret_212, __ret_212, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_212; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcmla_laneq_f16(__p0_213, __p1_213, __p2_213, __p3_213) __extension__ ({ \ float16x4_t __ret_213; \ float16x4_t __s0_213 = __p0_213; \ float16x4_t __s1_213 = __p1_213; \ float16x8_t __s2_213 = __p2_213; \ float16x8_t __reint_213 = __s2_213; \ uint32x2_t __reint1_213 = (uint32x2_t) {vgetq_lane_u32(*(uint32x4_t *) &__reint_213, __p3_213), vgetq_lane_u32(*(uint32x4_t *) &__reint_213, __p3_213)}; \ __ret_213 = vcmla_f16(__s0_213, __s1_213, *(float16x4_t *) &__reint1_213); \ __ret_213; \ }) #else #define vcmla_laneq_f16(__p0_214, __p1_214, __p2_214, __p3_214) __extension__ ({ \ float16x4_t __ret_214; \ float16x4_t __s0_214 = __p0_214; \ float16x4_t __s1_214 = __p1_214; \ float16x8_t __s2_214 = __p2_214; \ float16x4_t __rev0_214; __rev0_214 = __builtin_shufflevector(__s0_214, __s0_214, 3, 2, 1, 0); \ float16x4_t __rev1_214; __rev1_214 = __builtin_shufflevector(__s1_214, __s1_214, 3, 2, 1, 0); \ float16x8_t __rev2_214; __rev2_214 = __builtin_shufflevector(__s2_214, __s2_214, 7, 6, 5, 4, 3, 2, 1, 0); \ float16x8_t __reint_214 = __rev2_214; \ uint32x2_t __reint1_214 = (uint32x2_t) {__noswap_vgetq_lane_u32(*(uint32x4_t *) &__reint_214, __p3_214), __noswap_vgetq_lane_u32(*(uint32x4_t *) &__reint_214, __p3_214)}; \ __ret_214 = __noswap_vcmla_f16(__rev0_214, __rev1_214, *(float16x4_t *) &__reint1_214); \ __ret_214 = __builtin_shufflevector(__ret_214, __ret_214, 3, 2, 1, 0); \ __ret_214; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcmlaq_laneq_f16(__p0_215, __p1_215, __p2_215, __p3_215) __extension__ ({ \ float16x8_t __ret_215; \ float16x8_t __s0_215 = __p0_215; \ float16x8_t __s1_215 = __p1_215; \ float16x8_t __s2_215 = __p2_215; \ float16x8_t __reint_215 = __s2_215; \ uint32x4_t __reint1_215 = (uint32x4_t) {vgetq_lane_u32(*(uint32x4_t *) &__reint_215, __p3_215), vgetq_lane_u32(*(uint32x4_t *) &__reint_215, __p3_215), vgetq_lane_u32(*(uint32x4_t *) &__reint_215, __p3_215), vgetq_lane_u32(*(uint32x4_t *) &__reint_215, __p3_215)}; \ __ret_215 = vcmlaq_f16(__s0_215, __s1_215, *(float16x8_t *) &__reint1_215); \ __ret_215; \ }) #else #define vcmlaq_laneq_f16(__p0_216, __p1_216, __p2_216, __p3_216) __extension__ ({ \ float16x8_t __ret_216; \ float16x8_t __s0_216 = __p0_216; \ float16x8_t __s1_216 = __p1_216; \ float16x8_t __s2_216 = __p2_216; \ float16x8_t __rev0_216; __rev0_216 = __builtin_shufflevector(__s0_216, __s0_216, 7, 6, 5, 4, 3, 2, 1, 0); \ float16x8_t __rev1_216; __rev1_216 = __builtin_shufflevector(__s1_216, __s1_216, 7, 6, 5, 4, 3, 2, 1, 0); \ float16x8_t __rev2_216; __rev2_216 = __builtin_shufflevector(__s2_216, __s2_216, 7, 6, 5, 4, 3, 2, 1, 0); \ float16x8_t __reint_216 = __rev2_216; \ uint32x4_t __reint1_216 = (uint32x4_t) {__noswap_vgetq_lane_u32(*(uint32x4_t *) &__reint_216, __p3_216), __noswap_vgetq_lane_u32(*(uint32x4_t *) &__reint_216, __p3_216), __noswap_vgetq_lane_u32(*(uint32x4_t *) &__reint_216, __p3_216), __noswap_vgetq_lane_u32(*(uint32x4_t *) &__reint_216, __p3_216)}; \ __ret_216 = __noswap_vcmlaq_f16(__rev0_216, __rev1_216, *(float16x8_t *) &__reint1_216); \ __ret_216 = __builtin_shufflevector(__ret_216, __ret_216, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_216; \ }) #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.3a,fullfp16"))) float16x8_t vcmlaq_rot180_f16(float16x8_t __p0, float16x8_t __p1, float16x8_t __p2) { float16x8_t __ret; __ret = (float16x8_t) __builtin_neon_vcmlaq_rot180_f16((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 40); return __ret; } #else __ai __attribute__((target("v8.3a,fullfp16"))) float16x8_t vcmlaq_rot180_f16(float16x8_t __p0, float16x8_t __p1, float16x8_t __p2) { float16x8_t __ret; float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (float16x8_t) __builtin_neon_vcmlaq_rot180_f16((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 40); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } __ai __attribute__((target("v8.3a,fullfp16"))) float16x8_t __noswap_vcmlaq_rot180_f16(float16x8_t __p0, float16x8_t __p1, float16x8_t __p2) { float16x8_t __ret; __ret = (float16x8_t) __builtin_neon_vcmlaq_rot180_f16((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 40); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.3a,fullfp16"))) float16x4_t vcmla_rot180_f16(float16x4_t __p0, float16x4_t __p1, float16x4_t __p2) { float16x4_t __ret; __ret = (float16x4_t) __builtin_neon_vcmla_rot180_f16((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 8); return __ret; } #else __ai __attribute__((target("v8.3a,fullfp16"))) float16x4_t vcmla_rot180_f16(float16x4_t __p0, float16x4_t __p1, float16x4_t __p2) { float16x4_t __ret; float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); float16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = (float16x4_t) __builtin_neon_vcmla_rot180_f16((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 8); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai __attribute__((target("v8.3a,fullfp16"))) float16x4_t __noswap_vcmla_rot180_f16(float16x4_t __p0, float16x4_t __p1, float16x4_t __p2) { float16x4_t __ret; __ret = (float16x4_t) __builtin_neon_vcmla_rot180_f16((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 8); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ #define vcmla_rot180_lane_f16(__p0_217, __p1_217, __p2_217, __p3_217) __extension__ ({ \ float16x4_t __ret_217; \ float16x4_t __s0_217 = __p0_217; \ float16x4_t __s1_217 = __p1_217; \ float16x4_t __s2_217 = __p2_217; \ float16x4_t __reint_217 = __s2_217; \ uint32x2_t __reint1_217 = (uint32x2_t) {vget_lane_u32(*(uint32x2_t *) &__reint_217, __p3_217), vget_lane_u32(*(uint32x2_t *) &__reint_217, __p3_217)}; \ __ret_217 = vcmla_rot180_f16(__s0_217, __s1_217, *(float16x4_t *) &__reint1_217); \ __ret_217; \ }) #else #define vcmla_rot180_lane_f16(__p0_218, __p1_218, __p2_218, __p3_218) __extension__ ({ \ float16x4_t __ret_218; \ float16x4_t __s0_218 = __p0_218; \ float16x4_t __s1_218 = __p1_218; \ float16x4_t __s2_218 = __p2_218; \ float16x4_t __rev0_218; __rev0_218 = __builtin_shufflevector(__s0_218, __s0_218, 3, 2, 1, 0); \ float16x4_t __rev1_218; __rev1_218 = __builtin_shufflevector(__s1_218, __s1_218, 3, 2, 1, 0); \ float16x4_t __rev2_218; __rev2_218 = __builtin_shufflevector(__s2_218, __s2_218, 3, 2, 1, 0); \ float16x4_t __reint_218 = __rev2_218; \ uint32x2_t __reint1_218 = (uint32x2_t) {__noswap_vget_lane_u32(*(uint32x2_t *) &__reint_218, __p3_218), __noswap_vget_lane_u32(*(uint32x2_t *) &__reint_218, __p3_218)}; \ __ret_218 = __noswap_vcmla_rot180_f16(__rev0_218, __rev1_218, *(float16x4_t *) &__reint1_218); \ __ret_218 = __builtin_shufflevector(__ret_218, __ret_218, 3, 2, 1, 0); \ __ret_218; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcmlaq_rot180_lane_f16(__p0_219, __p1_219, __p2_219, __p3_219) __extension__ ({ \ float16x8_t __ret_219; \ float16x8_t __s0_219 = __p0_219; \ float16x8_t __s1_219 = __p1_219; \ float16x4_t __s2_219 = __p2_219; \ float16x4_t __reint_219 = __s2_219; \ uint32x4_t __reint1_219 = (uint32x4_t) {vget_lane_u32(*(uint32x2_t *) &__reint_219, __p3_219), vget_lane_u32(*(uint32x2_t *) &__reint_219, __p3_219), vget_lane_u32(*(uint32x2_t *) &__reint_219, __p3_219), vget_lane_u32(*(uint32x2_t *) &__reint_219, __p3_219)}; \ __ret_219 = vcmlaq_rot180_f16(__s0_219, __s1_219, *(float16x8_t *) &__reint1_219); \ __ret_219; \ }) #else #define vcmlaq_rot180_lane_f16(__p0_220, __p1_220, __p2_220, __p3_220) __extension__ ({ \ float16x8_t __ret_220; \ float16x8_t __s0_220 = __p0_220; \ float16x8_t __s1_220 = __p1_220; \ float16x4_t __s2_220 = __p2_220; \ float16x8_t __rev0_220; __rev0_220 = __builtin_shufflevector(__s0_220, __s0_220, 7, 6, 5, 4, 3, 2, 1, 0); \ float16x8_t __rev1_220; __rev1_220 = __builtin_shufflevector(__s1_220, __s1_220, 7, 6, 5, 4, 3, 2, 1, 0); \ float16x4_t __rev2_220; __rev2_220 = __builtin_shufflevector(__s2_220, __s2_220, 3, 2, 1, 0); \ float16x4_t __reint_220 = __rev2_220; \ uint32x4_t __reint1_220 = (uint32x4_t) {__noswap_vget_lane_u32(*(uint32x2_t *) &__reint_220, __p3_220), __noswap_vget_lane_u32(*(uint32x2_t *) &__reint_220, __p3_220), __noswap_vget_lane_u32(*(uint32x2_t *) &__reint_220, __p3_220), __noswap_vget_lane_u32(*(uint32x2_t *) &__reint_220, __p3_220)}; \ __ret_220 = __noswap_vcmlaq_rot180_f16(__rev0_220, __rev1_220, *(float16x8_t *) &__reint1_220); \ __ret_220 = __builtin_shufflevector(__ret_220, __ret_220, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_220; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcmla_rot180_laneq_f16(__p0_221, __p1_221, __p2_221, __p3_221) __extension__ ({ \ float16x4_t __ret_221; \ float16x4_t __s0_221 = __p0_221; \ float16x4_t __s1_221 = __p1_221; \ float16x8_t __s2_221 = __p2_221; \ float16x8_t __reint_221 = __s2_221; \ uint32x2_t __reint1_221 = (uint32x2_t) {vgetq_lane_u32(*(uint32x4_t *) &__reint_221, __p3_221), vgetq_lane_u32(*(uint32x4_t *) &__reint_221, __p3_221)}; \ __ret_221 = vcmla_rot180_f16(__s0_221, __s1_221, *(float16x4_t *) &__reint1_221); \ __ret_221; \ }) #else #define vcmla_rot180_laneq_f16(__p0_222, __p1_222, __p2_222, __p3_222) __extension__ ({ \ float16x4_t __ret_222; \ float16x4_t __s0_222 = __p0_222; \ float16x4_t __s1_222 = __p1_222; \ float16x8_t __s2_222 = __p2_222; \ float16x4_t __rev0_222; __rev0_222 = __builtin_shufflevector(__s0_222, __s0_222, 3, 2, 1, 0); \ float16x4_t __rev1_222; __rev1_222 = __builtin_shufflevector(__s1_222, __s1_222, 3, 2, 1, 0); \ float16x8_t __rev2_222; __rev2_222 = __builtin_shufflevector(__s2_222, __s2_222, 7, 6, 5, 4, 3, 2, 1, 0); \ float16x8_t __reint_222 = __rev2_222; \ uint32x2_t __reint1_222 = (uint32x2_t) {__noswap_vgetq_lane_u32(*(uint32x4_t *) &__reint_222, __p3_222), __noswap_vgetq_lane_u32(*(uint32x4_t *) &__reint_222, __p3_222)}; \ __ret_222 = __noswap_vcmla_rot180_f16(__rev0_222, __rev1_222, *(float16x4_t *) &__reint1_222); \ __ret_222 = __builtin_shufflevector(__ret_222, __ret_222, 3, 2, 1, 0); \ __ret_222; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcmlaq_rot180_laneq_f16(__p0_223, __p1_223, __p2_223, __p3_223) __extension__ ({ \ float16x8_t __ret_223; \ float16x8_t __s0_223 = __p0_223; \ float16x8_t __s1_223 = __p1_223; \ float16x8_t __s2_223 = __p2_223; \ float16x8_t __reint_223 = __s2_223; \ uint32x4_t __reint1_223 = (uint32x4_t) {vgetq_lane_u32(*(uint32x4_t *) &__reint_223, __p3_223), vgetq_lane_u32(*(uint32x4_t *) &__reint_223, __p3_223), vgetq_lane_u32(*(uint32x4_t *) &__reint_223, __p3_223), vgetq_lane_u32(*(uint32x4_t *) &__reint_223, __p3_223)}; \ __ret_223 = vcmlaq_rot180_f16(__s0_223, __s1_223, *(float16x8_t *) &__reint1_223); \ __ret_223; \ }) #else #define vcmlaq_rot180_laneq_f16(__p0_224, __p1_224, __p2_224, __p3_224) __extension__ ({ \ float16x8_t __ret_224; \ float16x8_t __s0_224 = __p0_224; \ float16x8_t __s1_224 = __p1_224; \ float16x8_t __s2_224 = __p2_224; \ float16x8_t __rev0_224; __rev0_224 = __builtin_shufflevector(__s0_224, __s0_224, 7, 6, 5, 4, 3, 2, 1, 0); \ float16x8_t __rev1_224; __rev1_224 = __builtin_shufflevector(__s1_224, __s1_224, 7, 6, 5, 4, 3, 2, 1, 0); \ float16x8_t __rev2_224; __rev2_224 = __builtin_shufflevector(__s2_224, __s2_224, 7, 6, 5, 4, 3, 2, 1, 0); \ float16x8_t __reint_224 = __rev2_224; \ uint32x4_t __reint1_224 = (uint32x4_t) {__noswap_vgetq_lane_u32(*(uint32x4_t *) &__reint_224, __p3_224), __noswap_vgetq_lane_u32(*(uint32x4_t *) &__reint_224, __p3_224), __noswap_vgetq_lane_u32(*(uint32x4_t *) &__reint_224, __p3_224), __noswap_vgetq_lane_u32(*(uint32x4_t *) &__reint_224, __p3_224)}; \ __ret_224 = __noswap_vcmlaq_rot180_f16(__rev0_224, __rev1_224, *(float16x8_t *) &__reint1_224); \ __ret_224 = __builtin_shufflevector(__ret_224, __ret_224, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_224; \ }) #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.3a,fullfp16"))) float16x8_t vcmlaq_rot270_f16(float16x8_t __p0, float16x8_t __p1, float16x8_t __p2) { float16x8_t __ret; __ret = (float16x8_t) __builtin_neon_vcmlaq_rot270_f16((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 40); return __ret; } #else __ai __attribute__((target("v8.3a,fullfp16"))) float16x8_t vcmlaq_rot270_f16(float16x8_t __p0, float16x8_t __p1, float16x8_t __p2) { float16x8_t __ret; float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (float16x8_t) __builtin_neon_vcmlaq_rot270_f16((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 40); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } __ai __attribute__((target("v8.3a,fullfp16"))) float16x8_t __noswap_vcmlaq_rot270_f16(float16x8_t __p0, float16x8_t __p1, float16x8_t __p2) { float16x8_t __ret; __ret = (float16x8_t) __builtin_neon_vcmlaq_rot270_f16((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 40); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.3a,fullfp16"))) float16x4_t vcmla_rot270_f16(float16x4_t __p0, float16x4_t __p1, float16x4_t __p2) { float16x4_t __ret; __ret = (float16x4_t) __builtin_neon_vcmla_rot270_f16((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 8); return __ret; } #else __ai __attribute__((target("v8.3a,fullfp16"))) float16x4_t vcmla_rot270_f16(float16x4_t __p0, float16x4_t __p1, float16x4_t __p2) { float16x4_t __ret; float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); float16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = (float16x4_t) __builtin_neon_vcmla_rot270_f16((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 8); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai __attribute__((target("v8.3a,fullfp16"))) float16x4_t __noswap_vcmla_rot270_f16(float16x4_t __p0, float16x4_t __p1, float16x4_t __p2) { float16x4_t __ret; __ret = (float16x4_t) __builtin_neon_vcmla_rot270_f16((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 8); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ #define vcmla_rot270_lane_f16(__p0_225, __p1_225, __p2_225, __p3_225) __extension__ ({ \ float16x4_t __ret_225; \ float16x4_t __s0_225 = __p0_225; \ float16x4_t __s1_225 = __p1_225; \ float16x4_t __s2_225 = __p2_225; \ float16x4_t __reint_225 = __s2_225; \ uint32x2_t __reint1_225 = (uint32x2_t) {vget_lane_u32(*(uint32x2_t *) &__reint_225, __p3_225), vget_lane_u32(*(uint32x2_t *) &__reint_225, __p3_225)}; \ __ret_225 = vcmla_rot270_f16(__s0_225, __s1_225, *(float16x4_t *) &__reint1_225); \ __ret_225; \ }) #else #define vcmla_rot270_lane_f16(__p0_226, __p1_226, __p2_226, __p3_226) __extension__ ({ \ float16x4_t __ret_226; \ float16x4_t __s0_226 = __p0_226; \ float16x4_t __s1_226 = __p1_226; \ float16x4_t __s2_226 = __p2_226; \ float16x4_t __rev0_226; __rev0_226 = __builtin_shufflevector(__s0_226, __s0_226, 3, 2, 1, 0); \ float16x4_t __rev1_226; __rev1_226 = __builtin_shufflevector(__s1_226, __s1_226, 3, 2, 1, 0); \ float16x4_t __rev2_226; __rev2_226 = __builtin_shufflevector(__s2_226, __s2_226, 3, 2, 1, 0); \ float16x4_t __reint_226 = __rev2_226; \ uint32x2_t __reint1_226 = (uint32x2_t) {__noswap_vget_lane_u32(*(uint32x2_t *) &__reint_226, __p3_226), __noswap_vget_lane_u32(*(uint32x2_t *) &__reint_226, __p3_226)}; \ __ret_226 = __noswap_vcmla_rot270_f16(__rev0_226, __rev1_226, *(float16x4_t *) &__reint1_226); \ __ret_226 = __builtin_shufflevector(__ret_226, __ret_226, 3, 2, 1, 0); \ __ret_226; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcmlaq_rot270_lane_f16(__p0_227, __p1_227, __p2_227, __p3_227) __extension__ ({ \ float16x8_t __ret_227; \ float16x8_t __s0_227 = __p0_227; \ float16x8_t __s1_227 = __p1_227; \ float16x4_t __s2_227 = __p2_227; \ float16x4_t __reint_227 = __s2_227; \ uint32x4_t __reint1_227 = (uint32x4_t) {vget_lane_u32(*(uint32x2_t *) &__reint_227, __p3_227), vget_lane_u32(*(uint32x2_t *) &__reint_227, __p3_227), vget_lane_u32(*(uint32x2_t *) &__reint_227, __p3_227), vget_lane_u32(*(uint32x2_t *) &__reint_227, __p3_227)}; \ __ret_227 = vcmlaq_rot270_f16(__s0_227, __s1_227, *(float16x8_t *) &__reint1_227); \ __ret_227; \ }) #else #define vcmlaq_rot270_lane_f16(__p0_228, __p1_228, __p2_228, __p3_228) __extension__ ({ \ float16x8_t __ret_228; \ float16x8_t __s0_228 = __p0_228; \ float16x8_t __s1_228 = __p1_228; \ float16x4_t __s2_228 = __p2_228; \ float16x8_t __rev0_228; __rev0_228 = __builtin_shufflevector(__s0_228, __s0_228, 7, 6, 5, 4, 3, 2, 1, 0); \ float16x8_t __rev1_228; __rev1_228 = __builtin_shufflevector(__s1_228, __s1_228, 7, 6, 5, 4, 3, 2, 1, 0); \ float16x4_t __rev2_228; __rev2_228 = __builtin_shufflevector(__s2_228, __s2_228, 3, 2, 1, 0); \ float16x4_t __reint_228 = __rev2_228; \ uint32x4_t __reint1_228 = (uint32x4_t) {__noswap_vget_lane_u32(*(uint32x2_t *) &__reint_228, __p3_228), __noswap_vget_lane_u32(*(uint32x2_t *) &__reint_228, __p3_228), __noswap_vget_lane_u32(*(uint32x2_t *) &__reint_228, __p3_228), __noswap_vget_lane_u32(*(uint32x2_t *) &__reint_228, __p3_228)}; \ __ret_228 = __noswap_vcmlaq_rot270_f16(__rev0_228, __rev1_228, *(float16x8_t *) &__reint1_228); \ __ret_228 = __builtin_shufflevector(__ret_228, __ret_228, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_228; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcmla_rot270_laneq_f16(__p0_229, __p1_229, __p2_229, __p3_229) __extension__ ({ \ float16x4_t __ret_229; \ float16x4_t __s0_229 = __p0_229; \ float16x4_t __s1_229 = __p1_229; \ float16x8_t __s2_229 = __p2_229; \ float16x8_t __reint_229 = __s2_229; \ uint32x2_t __reint1_229 = (uint32x2_t) {vgetq_lane_u32(*(uint32x4_t *) &__reint_229, __p3_229), vgetq_lane_u32(*(uint32x4_t *) &__reint_229, __p3_229)}; \ __ret_229 = vcmla_rot270_f16(__s0_229, __s1_229, *(float16x4_t *) &__reint1_229); \ __ret_229; \ }) #else #define vcmla_rot270_laneq_f16(__p0_230, __p1_230, __p2_230, __p3_230) __extension__ ({ \ float16x4_t __ret_230; \ float16x4_t __s0_230 = __p0_230; \ float16x4_t __s1_230 = __p1_230; \ float16x8_t __s2_230 = __p2_230; \ float16x4_t __rev0_230; __rev0_230 = __builtin_shufflevector(__s0_230, __s0_230, 3, 2, 1, 0); \ float16x4_t __rev1_230; __rev1_230 = __builtin_shufflevector(__s1_230, __s1_230, 3, 2, 1, 0); \ float16x8_t __rev2_230; __rev2_230 = __builtin_shufflevector(__s2_230, __s2_230, 7, 6, 5, 4, 3, 2, 1, 0); \ float16x8_t __reint_230 = __rev2_230; \ uint32x2_t __reint1_230 = (uint32x2_t) {__noswap_vgetq_lane_u32(*(uint32x4_t *) &__reint_230, __p3_230), __noswap_vgetq_lane_u32(*(uint32x4_t *) &__reint_230, __p3_230)}; \ __ret_230 = __noswap_vcmla_rot270_f16(__rev0_230, __rev1_230, *(float16x4_t *) &__reint1_230); \ __ret_230 = __builtin_shufflevector(__ret_230, __ret_230, 3, 2, 1, 0); \ __ret_230; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcmlaq_rot270_laneq_f16(__p0_231, __p1_231, __p2_231, __p3_231) __extension__ ({ \ float16x8_t __ret_231; \ float16x8_t __s0_231 = __p0_231; \ float16x8_t __s1_231 = __p1_231; \ float16x8_t __s2_231 = __p2_231; \ float16x8_t __reint_231 = __s2_231; \ uint32x4_t __reint1_231 = (uint32x4_t) {vgetq_lane_u32(*(uint32x4_t *) &__reint_231, __p3_231), vgetq_lane_u32(*(uint32x4_t *) &__reint_231, __p3_231), vgetq_lane_u32(*(uint32x4_t *) &__reint_231, __p3_231), vgetq_lane_u32(*(uint32x4_t *) &__reint_231, __p3_231)}; \ __ret_231 = vcmlaq_rot270_f16(__s0_231, __s1_231, *(float16x8_t *) &__reint1_231); \ __ret_231; \ }) #else #define vcmlaq_rot270_laneq_f16(__p0_232, __p1_232, __p2_232, __p3_232) __extension__ ({ \ float16x8_t __ret_232; \ float16x8_t __s0_232 = __p0_232; \ float16x8_t __s1_232 = __p1_232; \ float16x8_t __s2_232 = __p2_232; \ float16x8_t __rev0_232; __rev0_232 = __builtin_shufflevector(__s0_232, __s0_232, 7, 6, 5, 4, 3, 2, 1, 0); \ float16x8_t __rev1_232; __rev1_232 = __builtin_shufflevector(__s1_232, __s1_232, 7, 6, 5, 4, 3, 2, 1, 0); \ float16x8_t __rev2_232; __rev2_232 = __builtin_shufflevector(__s2_232, __s2_232, 7, 6, 5, 4, 3, 2, 1, 0); \ float16x8_t __reint_232 = __rev2_232; \ uint32x4_t __reint1_232 = (uint32x4_t) {__noswap_vgetq_lane_u32(*(uint32x4_t *) &__reint_232, __p3_232), __noswap_vgetq_lane_u32(*(uint32x4_t *) &__reint_232, __p3_232), __noswap_vgetq_lane_u32(*(uint32x4_t *) &__reint_232, __p3_232), __noswap_vgetq_lane_u32(*(uint32x4_t *) &__reint_232, __p3_232)}; \ __ret_232 = __noswap_vcmlaq_rot270_f16(__rev0_232, __rev1_232, *(float16x8_t *) &__reint1_232); \ __ret_232 = __builtin_shufflevector(__ret_232, __ret_232, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_232; \ }) #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.3a,fullfp16"))) float16x8_t vcmlaq_rot90_f16(float16x8_t __p0, float16x8_t __p1, float16x8_t __p2) { float16x8_t __ret; __ret = (float16x8_t) __builtin_neon_vcmlaq_rot90_f16((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 40); return __ret; } #else __ai __attribute__((target("v8.3a,fullfp16"))) float16x8_t vcmlaq_rot90_f16(float16x8_t __p0, float16x8_t __p1, float16x8_t __p2) { float16x8_t __ret; float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (float16x8_t) __builtin_neon_vcmlaq_rot90_f16((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 40); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } __ai __attribute__((target("v8.3a,fullfp16"))) float16x8_t __noswap_vcmlaq_rot90_f16(float16x8_t __p0, float16x8_t __p1, float16x8_t __p2) { float16x8_t __ret; __ret = (float16x8_t) __builtin_neon_vcmlaq_rot90_f16((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 40); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.3a,fullfp16"))) float16x4_t vcmla_rot90_f16(float16x4_t __p0, float16x4_t __p1, float16x4_t __p2) { float16x4_t __ret; __ret = (float16x4_t) __builtin_neon_vcmla_rot90_f16((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 8); return __ret; } #else __ai __attribute__((target("v8.3a,fullfp16"))) float16x4_t vcmla_rot90_f16(float16x4_t __p0, float16x4_t __p1, float16x4_t __p2) { float16x4_t __ret; float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); float16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = (float16x4_t) __builtin_neon_vcmla_rot90_f16((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 8); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai __attribute__((target("v8.3a,fullfp16"))) float16x4_t __noswap_vcmla_rot90_f16(float16x4_t __p0, float16x4_t __p1, float16x4_t __p2) { float16x4_t __ret; __ret = (float16x4_t) __builtin_neon_vcmla_rot90_f16((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 8); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ #define vcmla_rot90_lane_f16(__p0_233, __p1_233, __p2_233, __p3_233) __extension__ ({ \ float16x4_t __ret_233; \ float16x4_t __s0_233 = __p0_233; \ float16x4_t __s1_233 = __p1_233; \ float16x4_t __s2_233 = __p2_233; \ float16x4_t __reint_233 = __s2_233; \ uint32x2_t __reint1_233 = (uint32x2_t) {vget_lane_u32(*(uint32x2_t *) &__reint_233, __p3_233), vget_lane_u32(*(uint32x2_t *) &__reint_233, __p3_233)}; \ __ret_233 = vcmla_rot90_f16(__s0_233, __s1_233, *(float16x4_t *) &__reint1_233); \ __ret_233; \ }) #else #define vcmla_rot90_lane_f16(__p0_234, __p1_234, __p2_234, __p3_234) __extension__ ({ \ float16x4_t __ret_234; \ float16x4_t __s0_234 = __p0_234; \ float16x4_t __s1_234 = __p1_234; \ float16x4_t __s2_234 = __p2_234; \ float16x4_t __rev0_234; __rev0_234 = __builtin_shufflevector(__s0_234, __s0_234, 3, 2, 1, 0); \ float16x4_t __rev1_234; __rev1_234 = __builtin_shufflevector(__s1_234, __s1_234, 3, 2, 1, 0); \ float16x4_t __rev2_234; __rev2_234 = __builtin_shufflevector(__s2_234, __s2_234, 3, 2, 1, 0); \ float16x4_t __reint_234 = __rev2_234; \ uint32x2_t __reint1_234 = (uint32x2_t) {__noswap_vget_lane_u32(*(uint32x2_t *) &__reint_234, __p3_234), __noswap_vget_lane_u32(*(uint32x2_t *) &__reint_234, __p3_234)}; \ __ret_234 = __noswap_vcmla_rot90_f16(__rev0_234, __rev1_234, *(float16x4_t *) &__reint1_234); \ __ret_234 = __builtin_shufflevector(__ret_234, __ret_234, 3, 2, 1, 0); \ __ret_234; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcmlaq_rot90_lane_f16(__p0_235, __p1_235, __p2_235, __p3_235) __extension__ ({ \ float16x8_t __ret_235; \ float16x8_t __s0_235 = __p0_235; \ float16x8_t __s1_235 = __p1_235; \ float16x4_t __s2_235 = __p2_235; \ float16x4_t __reint_235 = __s2_235; \ uint32x4_t __reint1_235 = (uint32x4_t) {vget_lane_u32(*(uint32x2_t *) &__reint_235, __p3_235), vget_lane_u32(*(uint32x2_t *) &__reint_235, __p3_235), vget_lane_u32(*(uint32x2_t *) &__reint_235, __p3_235), vget_lane_u32(*(uint32x2_t *) &__reint_235, __p3_235)}; \ __ret_235 = vcmlaq_rot90_f16(__s0_235, __s1_235, *(float16x8_t *) &__reint1_235); \ __ret_235; \ }) #else #define vcmlaq_rot90_lane_f16(__p0_236, __p1_236, __p2_236, __p3_236) __extension__ ({ \ float16x8_t __ret_236; \ float16x8_t __s0_236 = __p0_236; \ float16x8_t __s1_236 = __p1_236; \ float16x4_t __s2_236 = __p2_236; \ float16x8_t __rev0_236; __rev0_236 = __builtin_shufflevector(__s0_236, __s0_236, 7, 6, 5, 4, 3, 2, 1, 0); \ float16x8_t __rev1_236; __rev1_236 = __builtin_shufflevector(__s1_236, __s1_236, 7, 6, 5, 4, 3, 2, 1, 0); \ float16x4_t __rev2_236; __rev2_236 = __builtin_shufflevector(__s2_236, __s2_236, 3, 2, 1, 0); \ float16x4_t __reint_236 = __rev2_236; \ uint32x4_t __reint1_236 = (uint32x4_t) {__noswap_vget_lane_u32(*(uint32x2_t *) &__reint_236, __p3_236), __noswap_vget_lane_u32(*(uint32x2_t *) &__reint_236, __p3_236), __noswap_vget_lane_u32(*(uint32x2_t *) &__reint_236, __p3_236), __noswap_vget_lane_u32(*(uint32x2_t *) &__reint_236, __p3_236)}; \ __ret_236 = __noswap_vcmlaq_rot90_f16(__rev0_236, __rev1_236, *(float16x8_t *) &__reint1_236); \ __ret_236 = __builtin_shufflevector(__ret_236, __ret_236, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_236; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcmla_rot90_laneq_f16(__p0_237, __p1_237, __p2_237, __p3_237) __extension__ ({ \ float16x4_t __ret_237; \ float16x4_t __s0_237 = __p0_237; \ float16x4_t __s1_237 = __p1_237; \ float16x8_t __s2_237 = __p2_237; \ float16x8_t __reint_237 = __s2_237; \ uint32x2_t __reint1_237 = (uint32x2_t) {vgetq_lane_u32(*(uint32x4_t *) &__reint_237, __p3_237), vgetq_lane_u32(*(uint32x4_t *) &__reint_237, __p3_237)}; \ __ret_237 = vcmla_rot90_f16(__s0_237, __s1_237, *(float16x4_t *) &__reint1_237); \ __ret_237; \ }) #else #define vcmla_rot90_laneq_f16(__p0_238, __p1_238, __p2_238, __p3_238) __extension__ ({ \ float16x4_t __ret_238; \ float16x4_t __s0_238 = __p0_238; \ float16x4_t __s1_238 = __p1_238; \ float16x8_t __s2_238 = __p2_238; \ float16x4_t __rev0_238; __rev0_238 = __builtin_shufflevector(__s0_238, __s0_238, 3, 2, 1, 0); \ float16x4_t __rev1_238; __rev1_238 = __builtin_shufflevector(__s1_238, __s1_238, 3, 2, 1, 0); \ float16x8_t __rev2_238; __rev2_238 = __builtin_shufflevector(__s2_238, __s2_238, 7, 6, 5, 4, 3, 2, 1, 0); \ float16x8_t __reint_238 = __rev2_238; \ uint32x2_t __reint1_238 = (uint32x2_t) {__noswap_vgetq_lane_u32(*(uint32x4_t *) &__reint_238, __p3_238), __noswap_vgetq_lane_u32(*(uint32x4_t *) &__reint_238, __p3_238)}; \ __ret_238 = __noswap_vcmla_rot90_f16(__rev0_238, __rev1_238, *(float16x4_t *) &__reint1_238); \ __ret_238 = __builtin_shufflevector(__ret_238, __ret_238, 3, 2, 1, 0); \ __ret_238; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcmlaq_rot90_laneq_f16(__p0_239, __p1_239, __p2_239, __p3_239) __extension__ ({ \ float16x8_t __ret_239; \ float16x8_t __s0_239 = __p0_239; \ float16x8_t __s1_239 = __p1_239; \ float16x8_t __s2_239 = __p2_239; \ float16x8_t __reint_239 = __s2_239; \ uint32x4_t __reint1_239 = (uint32x4_t) {vgetq_lane_u32(*(uint32x4_t *) &__reint_239, __p3_239), vgetq_lane_u32(*(uint32x4_t *) &__reint_239, __p3_239), vgetq_lane_u32(*(uint32x4_t *) &__reint_239, __p3_239), vgetq_lane_u32(*(uint32x4_t *) &__reint_239, __p3_239)}; \ __ret_239 = vcmlaq_rot90_f16(__s0_239, __s1_239, *(float16x8_t *) &__reint1_239); \ __ret_239; \ }) #else #define vcmlaq_rot90_laneq_f16(__p0_240, __p1_240, __p2_240, __p3_240) __extension__ ({ \ float16x8_t __ret_240; \ float16x8_t __s0_240 = __p0_240; \ float16x8_t __s1_240 = __p1_240; \ float16x8_t __s2_240 = __p2_240; \ float16x8_t __rev0_240; __rev0_240 = __builtin_shufflevector(__s0_240, __s0_240, 7, 6, 5, 4, 3, 2, 1, 0); \ float16x8_t __rev1_240; __rev1_240 = __builtin_shufflevector(__s1_240, __s1_240, 7, 6, 5, 4, 3, 2, 1, 0); \ float16x8_t __rev2_240; __rev2_240 = __builtin_shufflevector(__s2_240, __s2_240, 7, 6, 5, 4, 3, 2, 1, 0); \ float16x8_t __reint_240 = __rev2_240; \ uint32x4_t __reint1_240 = (uint32x4_t) {__noswap_vgetq_lane_u32(*(uint32x4_t *) &__reint_240, __p3_240), __noswap_vgetq_lane_u32(*(uint32x4_t *) &__reint_240, __p3_240), __noswap_vgetq_lane_u32(*(uint32x4_t *) &__reint_240, __p3_240), __noswap_vgetq_lane_u32(*(uint32x4_t *) &__reint_240, __p3_240)}; \ __ret_240 = __noswap_vcmlaq_rot90_f16(__rev0_240, __rev1_240, *(float16x8_t *) &__reint1_240); \ __ret_240 = __builtin_shufflevector(__ret_240, __ret_240, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_240; \ }) #endif #if !defined(__aarch64__) #ifdef __LITTLE_ENDIAN__ #define vqdmulhq_lane_s32(__p0_241, __p1_241, __p2_241) __extension__ ({ \ int32x4_t __ret_241; \ int32x4_t __s0_241 = __p0_241; \ int32x2_t __s1_241 = __p1_241; \ __ret_241 = vqdmulhq_s32(__s0_241, splatq_lane_s32(__s1_241, __p2_241)); \ __ret_241; \ }) #else #define vqdmulhq_lane_s32(__p0_242, __p1_242, __p2_242) __extension__ ({ \ int32x4_t __ret_242; \ int32x4_t __s0_242 = __p0_242; \ int32x2_t __s1_242 = __p1_242; \ int32x4_t __rev0_242; __rev0_242 = __builtin_shufflevector(__s0_242, __s0_242, 3, 2, 1, 0); \ int32x2_t __rev1_242; __rev1_242 = __builtin_shufflevector(__s1_242, __s1_242, 1, 0); \ __ret_242 = __noswap_vqdmulhq_s32(__rev0_242, __noswap_splatq_lane_s32(__rev1_242, __p2_242)); \ __ret_242 = __builtin_shufflevector(__ret_242, __ret_242, 3, 2, 1, 0); \ __ret_242; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqdmulhq_lane_s16(__p0_243, __p1_243, __p2_243) __extension__ ({ \ int16x8_t __ret_243; \ int16x8_t __s0_243 = __p0_243; \ int16x4_t __s1_243 = __p1_243; \ __ret_243 = vqdmulhq_s16(__s0_243, splatq_lane_s16(__s1_243, __p2_243)); \ __ret_243; \ }) #else #define vqdmulhq_lane_s16(__p0_244, __p1_244, __p2_244) __extension__ ({ \ int16x8_t __ret_244; \ int16x8_t __s0_244 = __p0_244; \ int16x4_t __s1_244 = __p1_244; \ int16x8_t __rev0_244; __rev0_244 = __builtin_shufflevector(__s0_244, __s0_244, 7, 6, 5, 4, 3, 2, 1, 0); \ int16x4_t __rev1_244; __rev1_244 = __builtin_shufflevector(__s1_244, __s1_244, 3, 2, 1, 0); \ __ret_244 = __noswap_vqdmulhq_s16(__rev0_244, __noswap_splatq_lane_s16(__rev1_244, __p2_244)); \ __ret_244 = __builtin_shufflevector(__ret_244, __ret_244, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_244; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqdmulh_lane_s32(__p0_245, __p1_245, __p2_245) __extension__ ({ \ int32x2_t __ret_245; \ int32x2_t __s0_245 = __p0_245; \ int32x2_t __s1_245 = __p1_245; \ __ret_245 = vqdmulh_s32(__s0_245, splat_lane_s32(__s1_245, __p2_245)); \ __ret_245; \ }) #else #define vqdmulh_lane_s32(__p0_246, __p1_246, __p2_246) __extension__ ({ \ int32x2_t __ret_246; \ int32x2_t __s0_246 = __p0_246; \ int32x2_t __s1_246 = __p1_246; \ int32x2_t __rev0_246; __rev0_246 = __builtin_shufflevector(__s0_246, __s0_246, 1, 0); \ int32x2_t __rev1_246; __rev1_246 = __builtin_shufflevector(__s1_246, __s1_246, 1, 0); \ __ret_246 = __noswap_vqdmulh_s32(__rev0_246, __noswap_splat_lane_s32(__rev1_246, __p2_246)); \ __ret_246 = __builtin_shufflevector(__ret_246, __ret_246, 1, 0); \ __ret_246; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqdmulh_lane_s16(__p0_247, __p1_247, __p2_247) __extension__ ({ \ int16x4_t __ret_247; \ int16x4_t __s0_247 = __p0_247; \ int16x4_t __s1_247 = __p1_247; \ __ret_247 = vqdmulh_s16(__s0_247, splat_lane_s16(__s1_247, __p2_247)); \ __ret_247; \ }) #else #define vqdmulh_lane_s16(__p0_248, __p1_248, __p2_248) __extension__ ({ \ int16x4_t __ret_248; \ int16x4_t __s0_248 = __p0_248; \ int16x4_t __s1_248 = __p1_248; \ int16x4_t __rev0_248; __rev0_248 = __builtin_shufflevector(__s0_248, __s0_248, 3, 2, 1, 0); \ int16x4_t __rev1_248; __rev1_248 = __builtin_shufflevector(__s1_248, __s1_248, 3, 2, 1, 0); \ __ret_248 = __noswap_vqdmulh_s16(__rev0_248, __noswap_splat_lane_s16(__rev1_248, __p2_248)); \ __ret_248 = __builtin_shufflevector(__ret_248, __ret_248, 3, 2, 1, 0); \ __ret_248; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqrdmulhq_lane_s32(__p0_249, __p1_249, __p2_249) __extension__ ({ \ int32x4_t __ret_249; \ int32x4_t __s0_249 = __p0_249; \ int32x2_t __s1_249 = __p1_249; \ __ret_249 = vqrdmulhq_s32(__s0_249, splatq_lane_s32(__s1_249, __p2_249)); \ __ret_249; \ }) #else #define vqrdmulhq_lane_s32(__p0_250, __p1_250, __p2_250) __extension__ ({ \ int32x4_t __ret_250; \ int32x4_t __s0_250 = __p0_250; \ int32x2_t __s1_250 = __p1_250; \ int32x4_t __rev0_250; __rev0_250 = __builtin_shufflevector(__s0_250, __s0_250, 3, 2, 1, 0); \ int32x2_t __rev1_250; __rev1_250 = __builtin_shufflevector(__s1_250, __s1_250, 1, 0); \ __ret_250 = __noswap_vqrdmulhq_s32(__rev0_250, __noswap_splatq_lane_s32(__rev1_250, __p2_250)); \ __ret_250 = __builtin_shufflevector(__ret_250, __ret_250, 3, 2, 1, 0); \ __ret_250; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqrdmulhq_lane_s16(__p0_251, __p1_251, __p2_251) __extension__ ({ \ int16x8_t __ret_251; \ int16x8_t __s0_251 = __p0_251; \ int16x4_t __s1_251 = __p1_251; \ __ret_251 = vqrdmulhq_s16(__s0_251, splatq_lane_s16(__s1_251, __p2_251)); \ __ret_251; \ }) #else #define vqrdmulhq_lane_s16(__p0_252, __p1_252, __p2_252) __extension__ ({ \ int16x8_t __ret_252; \ int16x8_t __s0_252 = __p0_252; \ int16x4_t __s1_252 = __p1_252; \ int16x8_t __rev0_252; __rev0_252 = __builtin_shufflevector(__s0_252, __s0_252, 7, 6, 5, 4, 3, 2, 1, 0); \ int16x4_t __rev1_252; __rev1_252 = __builtin_shufflevector(__s1_252, __s1_252, 3, 2, 1, 0); \ __ret_252 = __noswap_vqrdmulhq_s16(__rev0_252, __noswap_splatq_lane_s16(__rev1_252, __p2_252)); \ __ret_252 = __builtin_shufflevector(__ret_252, __ret_252, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_252; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqrdmulh_lane_s32(__p0_253, __p1_253, __p2_253) __extension__ ({ \ int32x2_t __ret_253; \ int32x2_t __s0_253 = __p0_253; \ int32x2_t __s1_253 = __p1_253; \ __ret_253 = vqrdmulh_s32(__s0_253, splat_lane_s32(__s1_253, __p2_253)); \ __ret_253; \ }) #else #define vqrdmulh_lane_s32(__p0_254, __p1_254, __p2_254) __extension__ ({ \ int32x2_t __ret_254; \ int32x2_t __s0_254 = __p0_254; \ int32x2_t __s1_254 = __p1_254; \ int32x2_t __rev0_254; __rev0_254 = __builtin_shufflevector(__s0_254, __s0_254, 1, 0); \ int32x2_t __rev1_254; __rev1_254 = __builtin_shufflevector(__s1_254, __s1_254, 1, 0); \ __ret_254 = __noswap_vqrdmulh_s32(__rev0_254, __noswap_splat_lane_s32(__rev1_254, __p2_254)); \ __ret_254 = __builtin_shufflevector(__ret_254, __ret_254, 1, 0); \ __ret_254; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqrdmulh_lane_s16(__p0_255, __p1_255, __p2_255) __extension__ ({ \ int16x4_t __ret_255; \ int16x4_t __s0_255 = __p0_255; \ int16x4_t __s1_255 = __p1_255; \ __ret_255 = vqrdmulh_s16(__s0_255, splat_lane_s16(__s1_255, __p2_255)); \ __ret_255; \ }) #else #define vqrdmulh_lane_s16(__p0_256, __p1_256, __p2_256) __extension__ ({ \ int16x4_t __ret_256; \ int16x4_t __s0_256 = __p0_256; \ int16x4_t __s1_256 = __p1_256; \ int16x4_t __rev0_256; __rev0_256 = __builtin_shufflevector(__s0_256, __s0_256, 3, 2, 1, 0); \ int16x4_t __rev1_256; __rev1_256 = __builtin_shufflevector(__s1_256, __s1_256, 3, 2, 1, 0); \ __ret_256 = __noswap_vqrdmulh_s16(__rev0_256, __noswap_splat_lane_s16(__rev1_256, __p2_256)); \ __ret_256 = __builtin_shufflevector(__ret_256, __ret_256, 3, 2, 1, 0); \ __ret_256; \ }) #endif __ai poly8x8_t vreinterpret_p8_p16(poly16x4_t __p0) { poly8x8_t __ret; __ret = (poly8x8_t)(__p0); return __ret; } __ai poly8x8_t vreinterpret_p8_u8(uint8x8_t __p0) { poly8x8_t __ret; __ret = (poly8x8_t)(__p0); return __ret; } __ai poly8x8_t vreinterpret_p8_u32(uint32x2_t __p0) { poly8x8_t __ret; __ret = (poly8x8_t)(__p0); return __ret; } __ai poly8x8_t vreinterpret_p8_u64(uint64x1_t __p0) { poly8x8_t __ret; __ret = (poly8x8_t)(__p0); return __ret; } __ai poly8x8_t vreinterpret_p8_u16(uint16x4_t __p0) { poly8x8_t __ret; __ret = (poly8x8_t)(__p0); return __ret; } __ai poly8x8_t vreinterpret_p8_s8(int8x8_t __p0) { poly8x8_t __ret; __ret = (poly8x8_t)(__p0); return __ret; } __ai poly8x8_t vreinterpret_p8_f32(float32x2_t __p0) { poly8x8_t __ret; __ret = (poly8x8_t)(__p0); return __ret; } __ai poly8x8_t vreinterpret_p8_f16(float16x4_t __p0) { poly8x8_t __ret; __ret = (poly8x8_t)(__p0); return __ret; } __ai poly8x8_t vreinterpret_p8_s32(int32x2_t __p0) { poly8x8_t __ret; __ret = (poly8x8_t)(__p0); return __ret; } __ai poly8x8_t vreinterpret_p8_s64(int64x1_t __p0) { poly8x8_t __ret; __ret = (poly8x8_t)(__p0); return __ret; } __ai poly8x8_t vreinterpret_p8_s16(int16x4_t __p0) { poly8x8_t __ret; __ret = (poly8x8_t)(__p0); return __ret; } __ai poly16x4_t vreinterpret_p16_p8(poly8x8_t __p0) { poly16x4_t __ret; __ret = (poly16x4_t)(__p0); return __ret; } __ai poly16x4_t vreinterpret_p16_u8(uint8x8_t __p0) { poly16x4_t __ret; __ret = (poly16x4_t)(__p0); return __ret; } __ai poly16x4_t vreinterpret_p16_u32(uint32x2_t __p0) { poly16x4_t __ret; __ret = (poly16x4_t)(__p0); return __ret; } __ai poly16x4_t vreinterpret_p16_u64(uint64x1_t __p0) { poly16x4_t __ret; __ret = (poly16x4_t)(__p0); return __ret; } __ai poly16x4_t vreinterpret_p16_u16(uint16x4_t __p0) { poly16x4_t __ret; __ret = (poly16x4_t)(__p0); return __ret; } __ai poly16x4_t vreinterpret_p16_s8(int8x8_t __p0) { poly16x4_t __ret; __ret = (poly16x4_t)(__p0); return __ret; } __ai poly16x4_t vreinterpret_p16_f32(float32x2_t __p0) { poly16x4_t __ret; __ret = (poly16x4_t)(__p0); return __ret; } __ai poly16x4_t vreinterpret_p16_f16(float16x4_t __p0) { poly16x4_t __ret; __ret = (poly16x4_t)(__p0); return __ret; } __ai poly16x4_t vreinterpret_p16_s32(int32x2_t __p0) { poly16x4_t __ret; __ret = (poly16x4_t)(__p0); return __ret; } __ai poly16x4_t vreinterpret_p16_s64(int64x1_t __p0) { poly16x4_t __ret; __ret = (poly16x4_t)(__p0); return __ret; } __ai poly16x4_t vreinterpret_p16_s16(int16x4_t __p0) { poly16x4_t __ret; __ret = (poly16x4_t)(__p0); return __ret; } __ai poly8x16_t vreinterpretq_p8_p16(poly16x8_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t)(__p0); return __ret; } __ai poly8x16_t vreinterpretq_p8_u8(uint8x16_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t)(__p0); return __ret; } __ai poly8x16_t vreinterpretq_p8_u32(uint32x4_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t)(__p0); return __ret; } __ai poly8x16_t vreinterpretq_p8_u64(uint64x2_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t)(__p0); return __ret; } __ai poly8x16_t vreinterpretq_p8_u16(uint16x8_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t)(__p0); return __ret; } __ai poly8x16_t vreinterpretq_p8_s8(int8x16_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t)(__p0); return __ret; } __ai poly8x16_t vreinterpretq_p8_f32(float32x4_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t)(__p0); return __ret; } __ai poly8x16_t vreinterpretq_p8_f16(float16x8_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t)(__p0); return __ret; } __ai poly8x16_t vreinterpretq_p8_s32(int32x4_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t)(__p0); return __ret; } __ai poly8x16_t vreinterpretq_p8_s64(int64x2_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t)(__p0); return __ret; } __ai poly8x16_t vreinterpretq_p8_s16(int16x8_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t)(__p0); return __ret; } __ai poly16x8_t vreinterpretq_p16_p8(poly8x16_t __p0) { poly16x8_t __ret; __ret = (poly16x8_t)(__p0); return __ret; } __ai poly16x8_t vreinterpretq_p16_u8(uint8x16_t __p0) { poly16x8_t __ret; __ret = (poly16x8_t)(__p0); return __ret; } __ai poly16x8_t vreinterpretq_p16_u32(uint32x4_t __p0) { poly16x8_t __ret; __ret = (poly16x8_t)(__p0); return __ret; } __ai poly16x8_t vreinterpretq_p16_u64(uint64x2_t __p0) { poly16x8_t __ret; __ret = (poly16x8_t)(__p0); return __ret; } __ai poly16x8_t vreinterpretq_p16_u16(uint16x8_t __p0) { poly16x8_t __ret; __ret = (poly16x8_t)(__p0); return __ret; } __ai poly16x8_t vreinterpretq_p16_s8(int8x16_t __p0) { poly16x8_t __ret; __ret = (poly16x8_t)(__p0); return __ret; } __ai poly16x8_t vreinterpretq_p16_f32(float32x4_t __p0) { poly16x8_t __ret; __ret = (poly16x8_t)(__p0); return __ret; } __ai poly16x8_t vreinterpretq_p16_f16(float16x8_t __p0) { poly16x8_t __ret; __ret = (poly16x8_t)(__p0); return __ret; } __ai poly16x8_t vreinterpretq_p16_s32(int32x4_t __p0) { poly16x8_t __ret; __ret = (poly16x8_t)(__p0); return __ret; } __ai poly16x8_t vreinterpretq_p16_s64(int64x2_t __p0) { poly16x8_t __ret; __ret = (poly16x8_t)(__p0); return __ret; } __ai poly16x8_t vreinterpretq_p16_s16(int16x8_t __p0) { poly16x8_t __ret; __ret = (poly16x8_t)(__p0); return __ret; } __ai uint8x16_t vreinterpretq_u8_p8(poly8x16_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0); return __ret; } __ai uint8x16_t vreinterpretq_u8_p16(poly16x8_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0); return __ret; } __ai uint8x16_t vreinterpretq_u8_u32(uint32x4_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0); return __ret; } __ai uint8x16_t vreinterpretq_u8_u64(uint64x2_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0); return __ret; } __ai uint8x16_t vreinterpretq_u8_u16(uint16x8_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0); return __ret; } __ai uint8x16_t vreinterpretq_u8_s8(int8x16_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0); return __ret; } __ai uint8x16_t vreinterpretq_u8_f32(float32x4_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0); return __ret; } __ai uint8x16_t vreinterpretq_u8_f16(float16x8_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0); return __ret; } __ai uint8x16_t vreinterpretq_u8_s32(int32x4_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0); return __ret; } __ai uint8x16_t vreinterpretq_u8_s64(int64x2_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0); return __ret; } __ai uint8x16_t vreinterpretq_u8_s16(int16x8_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0); return __ret; } __ai uint32x4_t vreinterpretq_u32_p8(poly8x16_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0); return __ret; } __ai uint32x4_t vreinterpretq_u32_p16(poly16x8_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0); return __ret; } __ai uint32x4_t vreinterpretq_u32_u8(uint8x16_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0); return __ret; } __ai uint32x4_t vreinterpretq_u32_u64(uint64x2_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0); return __ret; } __ai uint32x4_t vreinterpretq_u32_u16(uint16x8_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0); return __ret; } __ai uint32x4_t vreinterpretq_u32_s8(int8x16_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0); return __ret; } __ai uint32x4_t vreinterpretq_u32_f32(float32x4_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0); return __ret; } __ai uint32x4_t vreinterpretq_u32_f16(float16x8_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0); return __ret; } __ai uint32x4_t vreinterpretq_u32_s32(int32x4_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0); return __ret; } __ai uint32x4_t vreinterpretq_u32_s64(int64x2_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0); return __ret; } __ai uint32x4_t vreinterpretq_u32_s16(int16x8_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0); return __ret; } __ai uint64x2_t vreinterpretq_u64_p8(poly8x16_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0); return __ret; } __ai uint64x2_t vreinterpretq_u64_p16(poly16x8_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0); return __ret; } __ai uint64x2_t vreinterpretq_u64_u8(uint8x16_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0); return __ret; } __ai uint64x2_t vreinterpretq_u64_u32(uint32x4_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0); return __ret; } __ai uint64x2_t vreinterpretq_u64_u16(uint16x8_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0); return __ret; } __ai uint64x2_t vreinterpretq_u64_s8(int8x16_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0); return __ret; } __ai uint64x2_t vreinterpretq_u64_f32(float32x4_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0); return __ret; } __ai uint64x2_t vreinterpretq_u64_f16(float16x8_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0); return __ret; } __ai uint64x2_t vreinterpretq_u64_s32(int32x4_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0); return __ret; } __ai uint64x2_t vreinterpretq_u64_s64(int64x2_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0); return __ret; } __ai uint64x2_t vreinterpretq_u64_s16(int16x8_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0); return __ret; } __ai uint16x8_t vreinterpretq_u16_p8(poly8x16_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0); return __ret; } __ai uint16x8_t vreinterpretq_u16_p16(poly16x8_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0); return __ret; } __ai uint16x8_t vreinterpretq_u16_u8(uint8x16_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0); return __ret; } __ai uint16x8_t vreinterpretq_u16_u32(uint32x4_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0); return __ret; } __ai uint16x8_t vreinterpretq_u16_u64(uint64x2_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0); return __ret; } __ai uint16x8_t vreinterpretq_u16_s8(int8x16_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0); return __ret; } __ai uint16x8_t vreinterpretq_u16_f32(float32x4_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0); return __ret; } __ai uint16x8_t vreinterpretq_u16_f16(float16x8_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0); return __ret; } __ai uint16x8_t vreinterpretq_u16_s32(int32x4_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0); return __ret; } __ai uint16x8_t vreinterpretq_u16_s64(int64x2_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0); return __ret; } __ai uint16x8_t vreinterpretq_u16_s16(int16x8_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0); return __ret; } __ai int8x16_t vreinterpretq_s8_p8(poly8x16_t __p0) { int8x16_t __ret; __ret = (int8x16_t)(__p0); return __ret; } __ai int8x16_t vreinterpretq_s8_p16(poly16x8_t __p0) { int8x16_t __ret; __ret = (int8x16_t)(__p0); return __ret; } __ai int8x16_t vreinterpretq_s8_u8(uint8x16_t __p0) { int8x16_t __ret; __ret = (int8x16_t)(__p0); return __ret; } __ai int8x16_t vreinterpretq_s8_u32(uint32x4_t __p0) { int8x16_t __ret; __ret = (int8x16_t)(__p0); return __ret; } __ai int8x16_t vreinterpretq_s8_u64(uint64x2_t __p0) { int8x16_t __ret; __ret = (int8x16_t)(__p0); return __ret; } __ai int8x16_t vreinterpretq_s8_u16(uint16x8_t __p0) { int8x16_t __ret; __ret = (int8x16_t)(__p0); return __ret; } __ai int8x16_t vreinterpretq_s8_f32(float32x4_t __p0) { int8x16_t __ret; __ret = (int8x16_t)(__p0); return __ret; } __ai int8x16_t vreinterpretq_s8_f16(float16x8_t __p0) { int8x16_t __ret; __ret = (int8x16_t)(__p0); return __ret; } __ai int8x16_t vreinterpretq_s8_s32(int32x4_t __p0) { int8x16_t __ret; __ret = (int8x16_t)(__p0); return __ret; } __ai int8x16_t vreinterpretq_s8_s64(int64x2_t __p0) { int8x16_t __ret; __ret = (int8x16_t)(__p0); return __ret; } __ai int8x16_t vreinterpretq_s8_s16(int16x8_t __p0) { int8x16_t __ret; __ret = (int8x16_t)(__p0); return __ret; } __ai float32x4_t vreinterpretq_f32_p8(poly8x16_t __p0) { float32x4_t __ret; __ret = (float32x4_t)(__p0); return __ret; } __ai float32x4_t vreinterpretq_f32_p16(poly16x8_t __p0) { float32x4_t __ret; __ret = (float32x4_t)(__p0); return __ret; } __ai float32x4_t vreinterpretq_f32_u8(uint8x16_t __p0) { float32x4_t __ret; __ret = (float32x4_t)(__p0); return __ret; } __ai float32x4_t vreinterpretq_f32_u32(uint32x4_t __p0) { float32x4_t __ret; __ret = (float32x4_t)(__p0); return __ret; } __ai float32x4_t vreinterpretq_f32_u64(uint64x2_t __p0) { float32x4_t __ret; __ret = (float32x4_t)(__p0); return __ret; } __ai float32x4_t vreinterpretq_f32_u16(uint16x8_t __p0) { float32x4_t __ret; __ret = (float32x4_t)(__p0); return __ret; } __ai float32x4_t vreinterpretq_f32_s8(int8x16_t __p0) { float32x4_t __ret; __ret = (float32x4_t)(__p0); return __ret; } __ai float32x4_t vreinterpretq_f32_f16(float16x8_t __p0) { float32x4_t __ret; __ret = (float32x4_t)(__p0); return __ret; } __ai float32x4_t vreinterpretq_f32_s32(int32x4_t __p0) { float32x4_t __ret; __ret = (float32x4_t)(__p0); return __ret; } __ai float32x4_t vreinterpretq_f32_s64(int64x2_t __p0) { float32x4_t __ret; __ret = (float32x4_t)(__p0); return __ret; } __ai float32x4_t vreinterpretq_f32_s16(int16x8_t __p0) { float32x4_t __ret; __ret = (float32x4_t)(__p0); return __ret; } __ai float16x8_t vreinterpretq_f16_p8(poly8x16_t __p0) { float16x8_t __ret; __ret = (float16x8_t)(__p0); return __ret; } __ai float16x8_t vreinterpretq_f16_p16(poly16x8_t __p0) { float16x8_t __ret; __ret = (float16x8_t)(__p0); return __ret; } __ai float16x8_t vreinterpretq_f16_u8(uint8x16_t __p0) { float16x8_t __ret; __ret = (float16x8_t)(__p0); return __ret; } __ai float16x8_t vreinterpretq_f16_u32(uint32x4_t __p0) { float16x8_t __ret; __ret = (float16x8_t)(__p0); return __ret; } __ai float16x8_t vreinterpretq_f16_u64(uint64x2_t __p0) { float16x8_t __ret; __ret = (float16x8_t)(__p0); return __ret; } __ai float16x8_t vreinterpretq_f16_u16(uint16x8_t __p0) { float16x8_t __ret; __ret = (float16x8_t)(__p0); return __ret; } __ai float16x8_t vreinterpretq_f16_s8(int8x16_t __p0) { float16x8_t __ret; __ret = (float16x8_t)(__p0); return __ret; } __ai float16x8_t vreinterpretq_f16_f32(float32x4_t __p0) { float16x8_t __ret; __ret = (float16x8_t)(__p0); return __ret; } __ai float16x8_t vreinterpretq_f16_s32(int32x4_t __p0) { float16x8_t __ret; __ret = (float16x8_t)(__p0); return __ret; } __ai float16x8_t vreinterpretq_f16_s64(int64x2_t __p0) { float16x8_t __ret; __ret = (float16x8_t)(__p0); return __ret; } __ai float16x8_t vreinterpretq_f16_s16(int16x8_t __p0) { float16x8_t __ret; __ret = (float16x8_t)(__p0); return __ret; } __ai int32x4_t vreinterpretq_s32_p8(poly8x16_t __p0) { int32x4_t __ret; __ret = (int32x4_t)(__p0); return __ret; } __ai int32x4_t vreinterpretq_s32_p16(poly16x8_t __p0) { int32x4_t __ret; __ret = (int32x4_t)(__p0); return __ret; } __ai int32x4_t vreinterpretq_s32_u8(uint8x16_t __p0) { int32x4_t __ret; __ret = (int32x4_t)(__p0); return __ret; } __ai int32x4_t vreinterpretq_s32_u32(uint32x4_t __p0) { int32x4_t __ret; __ret = (int32x4_t)(__p0); return __ret; } __ai int32x4_t vreinterpretq_s32_u64(uint64x2_t __p0) { int32x4_t __ret; __ret = (int32x4_t)(__p0); return __ret; } __ai int32x4_t vreinterpretq_s32_u16(uint16x8_t __p0) { int32x4_t __ret; __ret = (int32x4_t)(__p0); return __ret; } __ai int32x4_t vreinterpretq_s32_s8(int8x16_t __p0) { int32x4_t __ret; __ret = (int32x4_t)(__p0); return __ret; } __ai int32x4_t vreinterpretq_s32_f32(float32x4_t __p0) { int32x4_t __ret; __ret = (int32x4_t)(__p0); return __ret; } __ai int32x4_t vreinterpretq_s32_f16(float16x8_t __p0) { int32x4_t __ret; __ret = (int32x4_t)(__p0); return __ret; } __ai int32x4_t vreinterpretq_s32_s64(int64x2_t __p0) { int32x4_t __ret; __ret = (int32x4_t)(__p0); return __ret; } __ai int32x4_t vreinterpretq_s32_s16(int16x8_t __p0) { int32x4_t __ret; __ret = (int32x4_t)(__p0); return __ret; } __ai int64x2_t vreinterpretq_s64_p8(poly8x16_t __p0) { int64x2_t __ret; __ret = (int64x2_t)(__p0); return __ret; } __ai int64x2_t vreinterpretq_s64_p16(poly16x8_t __p0) { int64x2_t __ret; __ret = (int64x2_t)(__p0); return __ret; } __ai int64x2_t vreinterpretq_s64_u8(uint8x16_t __p0) { int64x2_t __ret; __ret = (int64x2_t)(__p0); return __ret; } __ai int64x2_t vreinterpretq_s64_u32(uint32x4_t __p0) { int64x2_t __ret; __ret = (int64x2_t)(__p0); return __ret; } __ai int64x2_t vreinterpretq_s64_u64(uint64x2_t __p0) { int64x2_t __ret; __ret = (int64x2_t)(__p0); return __ret; } __ai int64x2_t vreinterpretq_s64_u16(uint16x8_t __p0) { int64x2_t __ret; __ret = (int64x2_t)(__p0); return __ret; } __ai int64x2_t vreinterpretq_s64_s8(int8x16_t __p0) { int64x2_t __ret; __ret = (int64x2_t)(__p0); return __ret; } __ai int64x2_t vreinterpretq_s64_f32(float32x4_t __p0) { int64x2_t __ret; __ret = (int64x2_t)(__p0); return __ret; } __ai int64x2_t vreinterpretq_s64_f16(float16x8_t __p0) { int64x2_t __ret; __ret = (int64x2_t)(__p0); return __ret; } __ai int64x2_t vreinterpretq_s64_s32(int32x4_t __p0) { int64x2_t __ret; __ret = (int64x2_t)(__p0); return __ret; } __ai int64x2_t vreinterpretq_s64_s16(int16x8_t __p0) { int64x2_t __ret; __ret = (int64x2_t)(__p0); return __ret; } __ai int16x8_t vreinterpretq_s16_p8(poly8x16_t __p0) { int16x8_t __ret; __ret = (int16x8_t)(__p0); return __ret; } __ai int16x8_t vreinterpretq_s16_p16(poly16x8_t __p0) { int16x8_t __ret; __ret = (int16x8_t)(__p0); return __ret; } __ai int16x8_t vreinterpretq_s16_u8(uint8x16_t __p0) { int16x8_t __ret; __ret = (int16x8_t)(__p0); return __ret; } __ai int16x8_t vreinterpretq_s16_u32(uint32x4_t __p0) { int16x8_t __ret; __ret = (int16x8_t)(__p0); return __ret; } __ai int16x8_t vreinterpretq_s16_u64(uint64x2_t __p0) { int16x8_t __ret; __ret = (int16x8_t)(__p0); return __ret; } __ai int16x8_t vreinterpretq_s16_u16(uint16x8_t __p0) { int16x8_t __ret; __ret = (int16x8_t)(__p0); return __ret; } __ai int16x8_t vreinterpretq_s16_s8(int8x16_t __p0) { int16x8_t __ret; __ret = (int16x8_t)(__p0); return __ret; } __ai int16x8_t vreinterpretq_s16_f32(float32x4_t __p0) { int16x8_t __ret; __ret = (int16x8_t)(__p0); return __ret; } __ai int16x8_t vreinterpretq_s16_f16(float16x8_t __p0) { int16x8_t __ret; __ret = (int16x8_t)(__p0); return __ret; } __ai int16x8_t vreinterpretq_s16_s32(int32x4_t __p0) { int16x8_t __ret; __ret = (int16x8_t)(__p0); return __ret; } __ai int16x8_t vreinterpretq_s16_s64(int64x2_t __p0) { int16x8_t __ret; __ret = (int16x8_t)(__p0); return __ret; } __ai uint8x8_t vreinterpret_u8_p8(poly8x8_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0); return __ret; } __ai uint8x8_t vreinterpret_u8_p16(poly16x4_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0); return __ret; } __ai uint8x8_t vreinterpret_u8_u32(uint32x2_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0); return __ret; } __ai uint8x8_t vreinterpret_u8_u64(uint64x1_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0); return __ret; } __ai uint8x8_t vreinterpret_u8_u16(uint16x4_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0); return __ret; } __ai uint8x8_t vreinterpret_u8_s8(int8x8_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0); return __ret; } __ai uint8x8_t vreinterpret_u8_f32(float32x2_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0); return __ret; } __ai uint8x8_t vreinterpret_u8_f16(float16x4_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0); return __ret; } __ai uint8x8_t vreinterpret_u8_s32(int32x2_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0); return __ret; } __ai uint8x8_t vreinterpret_u8_s64(int64x1_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0); return __ret; } __ai uint8x8_t vreinterpret_u8_s16(int16x4_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0); return __ret; } __ai uint32x2_t vreinterpret_u32_p8(poly8x8_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0); return __ret; } __ai uint32x2_t vreinterpret_u32_p16(poly16x4_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0); return __ret; } __ai uint32x2_t vreinterpret_u32_u8(uint8x8_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0); return __ret; } __ai uint32x2_t vreinterpret_u32_u64(uint64x1_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0); return __ret; } __ai uint32x2_t vreinterpret_u32_u16(uint16x4_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0); return __ret; } __ai uint32x2_t vreinterpret_u32_s8(int8x8_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0); return __ret; } __ai uint32x2_t vreinterpret_u32_f32(float32x2_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0); return __ret; } __ai uint32x2_t vreinterpret_u32_f16(float16x4_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0); return __ret; } __ai uint32x2_t vreinterpret_u32_s32(int32x2_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0); return __ret; } __ai uint32x2_t vreinterpret_u32_s64(int64x1_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0); return __ret; } __ai uint32x2_t vreinterpret_u32_s16(int16x4_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0); return __ret; } __ai uint64x1_t vreinterpret_u64_p8(poly8x8_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0); return __ret; } __ai uint64x1_t vreinterpret_u64_p16(poly16x4_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0); return __ret; } __ai uint64x1_t vreinterpret_u64_u8(uint8x8_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0); return __ret; } __ai uint64x1_t vreinterpret_u64_u32(uint32x2_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0); return __ret; } __ai uint64x1_t vreinterpret_u64_u16(uint16x4_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0); return __ret; } __ai uint64x1_t vreinterpret_u64_s8(int8x8_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0); return __ret; } __ai uint64x1_t vreinterpret_u64_f32(float32x2_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0); return __ret; } __ai uint64x1_t vreinterpret_u64_f16(float16x4_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0); return __ret; } __ai uint64x1_t vreinterpret_u64_s32(int32x2_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0); return __ret; } __ai uint64x1_t vreinterpret_u64_s64(int64x1_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0); return __ret; } __ai uint64x1_t vreinterpret_u64_s16(int16x4_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0); return __ret; } __ai uint16x4_t vreinterpret_u16_p8(poly8x8_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0); return __ret; } __ai uint16x4_t vreinterpret_u16_p16(poly16x4_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0); return __ret; } __ai uint16x4_t vreinterpret_u16_u8(uint8x8_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0); return __ret; } __ai uint16x4_t vreinterpret_u16_u32(uint32x2_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0); return __ret; } __ai uint16x4_t vreinterpret_u16_u64(uint64x1_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0); return __ret; } __ai uint16x4_t vreinterpret_u16_s8(int8x8_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0); return __ret; } __ai uint16x4_t vreinterpret_u16_f32(float32x2_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0); return __ret; } __ai uint16x4_t vreinterpret_u16_f16(float16x4_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0); return __ret; } __ai uint16x4_t vreinterpret_u16_s32(int32x2_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0); return __ret; } __ai uint16x4_t vreinterpret_u16_s64(int64x1_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0); return __ret; } __ai uint16x4_t vreinterpret_u16_s16(int16x4_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0); return __ret; } __ai int8x8_t vreinterpret_s8_p8(poly8x8_t __p0) { int8x8_t __ret; __ret = (int8x8_t)(__p0); return __ret; } __ai int8x8_t vreinterpret_s8_p16(poly16x4_t __p0) { int8x8_t __ret; __ret = (int8x8_t)(__p0); return __ret; } __ai int8x8_t vreinterpret_s8_u8(uint8x8_t __p0) { int8x8_t __ret; __ret = (int8x8_t)(__p0); return __ret; } __ai int8x8_t vreinterpret_s8_u32(uint32x2_t __p0) { int8x8_t __ret; __ret = (int8x8_t)(__p0); return __ret; } __ai int8x8_t vreinterpret_s8_u64(uint64x1_t __p0) { int8x8_t __ret; __ret = (int8x8_t)(__p0); return __ret; } __ai int8x8_t vreinterpret_s8_u16(uint16x4_t __p0) { int8x8_t __ret; __ret = (int8x8_t)(__p0); return __ret; } __ai int8x8_t vreinterpret_s8_f32(float32x2_t __p0) { int8x8_t __ret; __ret = (int8x8_t)(__p0); return __ret; } __ai int8x8_t vreinterpret_s8_f16(float16x4_t __p0) { int8x8_t __ret; __ret = (int8x8_t)(__p0); return __ret; } __ai int8x8_t vreinterpret_s8_s32(int32x2_t __p0) { int8x8_t __ret; __ret = (int8x8_t)(__p0); return __ret; } __ai int8x8_t vreinterpret_s8_s64(int64x1_t __p0) { int8x8_t __ret; __ret = (int8x8_t)(__p0); return __ret; } __ai int8x8_t vreinterpret_s8_s16(int16x4_t __p0) { int8x8_t __ret; __ret = (int8x8_t)(__p0); return __ret; } __ai float32x2_t vreinterpret_f32_p8(poly8x8_t __p0) { float32x2_t __ret; __ret = (float32x2_t)(__p0); return __ret; } __ai float32x2_t vreinterpret_f32_p16(poly16x4_t __p0) { float32x2_t __ret; __ret = (float32x2_t)(__p0); return __ret; } __ai float32x2_t vreinterpret_f32_u8(uint8x8_t __p0) { float32x2_t __ret; __ret = (float32x2_t)(__p0); return __ret; } __ai float32x2_t vreinterpret_f32_u32(uint32x2_t __p0) { float32x2_t __ret; __ret = (float32x2_t)(__p0); return __ret; } __ai float32x2_t vreinterpret_f32_u64(uint64x1_t __p0) { float32x2_t __ret; __ret = (float32x2_t)(__p0); return __ret; } __ai float32x2_t vreinterpret_f32_u16(uint16x4_t __p0) { float32x2_t __ret; __ret = (float32x2_t)(__p0); return __ret; } __ai float32x2_t vreinterpret_f32_s8(int8x8_t __p0) { float32x2_t __ret; __ret = (float32x2_t)(__p0); return __ret; } __ai float32x2_t vreinterpret_f32_f16(float16x4_t __p0) { float32x2_t __ret; __ret = (float32x2_t)(__p0); return __ret; } __ai float32x2_t vreinterpret_f32_s32(int32x2_t __p0) { float32x2_t __ret; __ret = (float32x2_t)(__p0); return __ret; } __ai float32x2_t vreinterpret_f32_s64(int64x1_t __p0) { float32x2_t __ret; __ret = (float32x2_t)(__p0); return __ret; } __ai float32x2_t vreinterpret_f32_s16(int16x4_t __p0) { float32x2_t __ret; __ret = (float32x2_t)(__p0); return __ret; } __ai float16x4_t vreinterpret_f16_p8(poly8x8_t __p0) { float16x4_t __ret; __ret = (float16x4_t)(__p0); return __ret; } __ai float16x4_t vreinterpret_f16_p16(poly16x4_t __p0) { float16x4_t __ret; __ret = (float16x4_t)(__p0); return __ret; } __ai float16x4_t vreinterpret_f16_u8(uint8x8_t __p0) { float16x4_t __ret; __ret = (float16x4_t)(__p0); return __ret; } __ai float16x4_t vreinterpret_f16_u32(uint32x2_t __p0) { float16x4_t __ret; __ret = (float16x4_t)(__p0); return __ret; } __ai float16x4_t vreinterpret_f16_u64(uint64x1_t __p0) { float16x4_t __ret; __ret = (float16x4_t)(__p0); return __ret; } __ai float16x4_t vreinterpret_f16_u16(uint16x4_t __p0) { float16x4_t __ret; __ret = (float16x4_t)(__p0); return __ret; } __ai float16x4_t vreinterpret_f16_s8(int8x8_t __p0) { float16x4_t __ret; __ret = (float16x4_t)(__p0); return __ret; } __ai float16x4_t vreinterpret_f16_f32(float32x2_t __p0) { float16x4_t __ret; __ret = (float16x4_t)(__p0); return __ret; } __ai float16x4_t vreinterpret_f16_s32(int32x2_t __p0) { float16x4_t __ret; __ret = (float16x4_t)(__p0); return __ret; } __ai float16x4_t vreinterpret_f16_s64(int64x1_t __p0) { float16x4_t __ret; __ret = (float16x4_t)(__p0); return __ret; } __ai float16x4_t vreinterpret_f16_s16(int16x4_t __p0) { float16x4_t __ret; __ret = (float16x4_t)(__p0); return __ret; } __ai int32x2_t vreinterpret_s32_p8(poly8x8_t __p0) { int32x2_t __ret; __ret = (int32x2_t)(__p0); return __ret; } __ai int32x2_t vreinterpret_s32_p16(poly16x4_t __p0) { int32x2_t __ret; __ret = (int32x2_t)(__p0); return __ret; } __ai int32x2_t vreinterpret_s32_u8(uint8x8_t __p0) { int32x2_t __ret; __ret = (int32x2_t)(__p0); return __ret; } __ai int32x2_t vreinterpret_s32_u32(uint32x2_t __p0) { int32x2_t __ret; __ret = (int32x2_t)(__p0); return __ret; } __ai int32x2_t vreinterpret_s32_u64(uint64x1_t __p0) { int32x2_t __ret; __ret = (int32x2_t)(__p0); return __ret; } __ai int32x2_t vreinterpret_s32_u16(uint16x4_t __p0) { int32x2_t __ret; __ret = (int32x2_t)(__p0); return __ret; } __ai int32x2_t vreinterpret_s32_s8(int8x8_t __p0) { int32x2_t __ret; __ret = (int32x2_t)(__p0); return __ret; } __ai int32x2_t vreinterpret_s32_f32(float32x2_t __p0) { int32x2_t __ret; __ret = (int32x2_t)(__p0); return __ret; } __ai int32x2_t vreinterpret_s32_f16(float16x4_t __p0) { int32x2_t __ret; __ret = (int32x2_t)(__p0); return __ret; } __ai int32x2_t vreinterpret_s32_s64(int64x1_t __p0) { int32x2_t __ret; __ret = (int32x2_t)(__p0); return __ret; } __ai int32x2_t vreinterpret_s32_s16(int16x4_t __p0) { int32x2_t __ret; __ret = (int32x2_t)(__p0); return __ret; } __ai int64x1_t vreinterpret_s64_p8(poly8x8_t __p0) { int64x1_t __ret; __ret = (int64x1_t)(__p0); return __ret; } __ai int64x1_t vreinterpret_s64_p16(poly16x4_t __p0) { int64x1_t __ret; __ret = (int64x1_t)(__p0); return __ret; } __ai int64x1_t vreinterpret_s64_u8(uint8x8_t __p0) { int64x1_t __ret; __ret = (int64x1_t)(__p0); return __ret; } __ai int64x1_t vreinterpret_s64_u32(uint32x2_t __p0) { int64x1_t __ret; __ret = (int64x1_t)(__p0); return __ret; } __ai int64x1_t vreinterpret_s64_u64(uint64x1_t __p0) { int64x1_t __ret; __ret = (int64x1_t)(__p0); return __ret; } __ai int64x1_t vreinterpret_s64_u16(uint16x4_t __p0) { int64x1_t __ret; __ret = (int64x1_t)(__p0); return __ret; } __ai int64x1_t vreinterpret_s64_s8(int8x8_t __p0) { int64x1_t __ret; __ret = (int64x1_t)(__p0); return __ret; } __ai int64x1_t vreinterpret_s64_f32(float32x2_t __p0) { int64x1_t __ret; __ret = (int64x1_t)(__p0); return __ret; } __ai int64x1_t vreinterpret_s64_f16(float16x4_t __p0) { int64x1_t __ret; __ret = (int64x1_t)(__p0); return __ret; } __ai int64x1_t vreinterpret_s64_s32(int32x2_t __p0) { int64x1_t __ret; __ret = (int64x1_t)(__p0); return __ret; } __ai int64x1_t vreinterpret_s64_s16(int16x4_t __p0) { int64x1_t __ret; __ret = (int64x1_t)(__p0); return __ret; } __ai int16x4_t vreinterpret_s16_p8(poly8x8_t __p0) { int16x4_t __ret; __ret = (int16x4_t)(__p0); return __ret; } __ai int16x4_t vreinterpret_s16_p16(poly16x4_t __p0) { int16x4_t __ret; __ret = (int16x4_t)(__p0); return __ret; } __ai int16x4_t vreinterpret_s16_u8(uint8x8_t __p0) { int16x4_t __ret; __ret = (int16x4_t)(__p0); return __ret; } __ai int16x4_t vreinterpret_s16_u32(uint32x2_t __p0) { int16x4_t __ret; __ret = (int16x4_t)(__p0); return __ret; } __ai int16x4_t vreinterpret_s16_u64(uint64x1_t __p0) { int16x4_t __ret; __ret = (int16x4_t)(__p0); return __ret; } __ai int16x4_t vreinterpret_s16_u16(uint16x4_t __p0) { int16x4_t __ret; __ret = (int16x4_t)(__p0); return __ret; } __ai int16x4_t vreinterpret_s16_s8(int8x8_t __p0) { int16x4_t __ret; __ret = (int16x4_t)(__p0); return __ret; } __ai int16x4_t vreinterpret_s16_f32(float32x2_t __p0) { int16x4_t __ret; __ret = (int16x4_t)(__p0); return __ret; } __ai int16x4_t vreinterpret_s16_f16(float16x4_t __p0) { int16x4_t __ret; __ret = (int16x4_t)(__p0); return __ret; } __ai int16x4_t vreinterpret_s16_s32(int32x2_t __p0) { int16x4_t __ret; __ret = (int16x4_t)(__p0); return __ret; } __ai int16x4_t vreinterpret_s16_s64(int64x1_t __p0) { int16x4_t __ret; __ret = (int16x4_t)(__p0); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("bf16"))) bfloat16x4_t __a32_vcvt_bf16_f32(float32x4_t __p0) { bfloat16x4_t __ret; __ret = (bfloat16x4_t) __builtin_neon___a32_vcvt_bf16_f32((int8x16_t)__p0, 11); return __ret; } #else __ai __attribute__((target("bf16"))) bfloat16x4_t __a32_vcvt_bf16_f32(float32x4_t __p0) { bfloat16x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (bfloat16x4_t) __builtin_neon___a32_vcvt_bf16_f32((int8x16_t)__rev0, 11); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai __attribute__((target("bf16"))) bfloat16x4_t __noswap___a32_vcvt_bf16_f32(float32x4_t __p0) { bfloat16x4_t __ret; __ret = (bfloat16x4_t) __builtin_neon___a32_vcvt_bf16_f32((int8x16_t)__p0, 11); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("bf16"))) bfloat16x4_t vcvt_bf16_f32(float32x4_t __p0) { bfloat16x4_t __ret; __ret = __a32_vcvt_bf16_f32(__p0); return __ret; } #else __ai __attribute__((target("bf16"))) bfloat16x4_t vcvt_bf16_f32(float32x4_t __p0) { bfloat16x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = __noswap___a32_vcvt_bf16_f32(__rev0); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("bf16"))) bfloat16x8_t vcvtq_high_bf16_f32(bfloat16x8_t __p0, float32x4_t __p1) { bfloat16x8_t __ret; __ret = vcombine_bf16(__a32_vcvt_bf16_f32(__p1), vget_low_bf16(__p0)); return __ret; } #else __ai __attribute__((target("bf16"))) bfloat16x8_t vcvtq_high_bf16_f32(bfloat16x8_t __p0, float32x4_t __p1) { bfloat16x8_t __ret; bfloat16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __noswap_vcombine_bf16(__noswap___a32_vcvt_bf16_f32(__rev1), __noswap_vget_low_bf16(__rev0)); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("bf16"))) bfloat16x8_t vcvtq_low_bf16_f32(float32x4_t __p0) { bfloat16x8_t __ret; __ret = vcombine_bf16((bfloat16x4_t)(0ULL), __a32_vcvt_bf16_f32(__p0)); return __ret; } #else __ai __attribute__((target("bf16"))) bfloat16x8_t vcvtq_low_bf16_f32(float32x4_t __p0) { bfloat16x8_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = __noswap_vcombine_bf16((bfloat16x4_t)(0ULL), __noswap___a32_vcvt_bf16_f32(__rev0)); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif __ai __attribute__((target("bf16"))) poly8x8_t vreinterpret_p8_bf16(bfloat16x4_t __p0) { poly8x8_t __ret; __ret = (poly8x8_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) poly64x1_t vreinterpret_p64_bf16(bfloat16x4_t __p0) { poly64x1_t __ret; __ret = (poly64x1_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) poly16x4_t vreinterpret_p16_bf16(bfloat16x4_t __p0) { poly16x4_t __ret; __ret = (poly16x4_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) poly8x16_t vreinterpretq_p8_bf16(bfloat16x8_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) poly64x2_t vreinterpretq_p64_bf16(bfloat16x8_t __p0) { poly64x2_t __ret; __ret = (poly64x2_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) poly16x8_t vreinterpretq_p16_bf16(bfloat16x8_t __p0) { poly16x8_t __ret; __ret = (poly16x8_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) uint8x16_t vreinterpretq_u8_bf16(bfloat16x8_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) uint32x4_t vreinterpretq_u32_bf16(bfloat16x8_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) uint64x2_t vreinterpretq_u64_bf16(bfloat16x8_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) uint16x8_t vreinterpretq_u16_bf16(bfloat16x8_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) int8x16_t vreinterpretq_s8_bf16(bfloat16x8_t __p0) { int8x16_t __ret; __ret = (int8x16_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) float32x4_t vreinterpretq_f32_bf16(bfloat16x8_t __p0) { float32x4_t __ret; __ret = (float32x4_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) float16x8_t vreinterpretq_f16_bf16(bfloat16x8_t __p0) { float16x8_t __ret; __ret = (float16x8_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) int32x4_t vreinterpretq_s32_bf16(bfloat16x8_t __p0) { int32x4_t __ret; __ret = (int32x4_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) int64x2_t vreinterpretq_s64_bf16(bfloat16x8_t __p0) { int64x2_t __ret; __ret = (int64x2_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) int16x8_t vreinterpretq_s16_bf16(bfloat16x8_t __p0) { int16x8_t __ret; __ret = (int16x8_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) uint8x8_t vreinterpret_u8_bf16(bfloat16x4_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) uint32x2_t vreinterpret_u32_bf16(bfloat16x4_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) uint64x1_t vreinterpret_u64_bf16(bfloat16x4_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) uint16x4_t vreinterpret_u16_bf16(bfloat16x4_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) int8x8_t vreinterpret_s8_bf16(bfloat16x4_t __p0) { int8x8_t __ret; __ret = (int8x8_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) float32x2_t vreinterpret_f32_bf16(bfloat16x4_t __p0) { float32x2_t __ret; __ret = (float32x2_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) float16x4_t vreinterpret_f16_bf16(bfloat16x4_t __p0) { float16x4_t __ret; __ret = (float16x4_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) int32x2_t vreinterpret_s32_bf16(bfloat16x4_t __p0) { int32x2_t __ret; __ret = (int32x2_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) int64x1_t vreinterpret_s64_bf16(bfloat16x4_t __p0) { int64x1_t __ret; __ret = (int64x1_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) int16x4_t vreinterpret_s16_bf16(bfloat16x4_t __p0) { int16x4_t __ret; __ret = (int16x4_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) bfloat16x8_t vreinterpretq_bf16_p8(poly8x16_t __p0) { bfloat16x8_t __ret; __ret = (bfloat16x8_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) bfloat16x8_t vreinterpretq_bf16_p64(poly64x2_t __p0) { bfloat16x8_t __ret; __ret = (bfloat16x8_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) bfloat16x8_t vreinterpretq_bf16_p16(poly16x8_t __p0) { bfloat16x8_t __ret; __ret = (bfloat16x8_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) bfloat16x8_t vreinterpretq_bf16_u8(uint8x16_t __p0) { bfloat16x8_t __ret; __ret = (bfloat16x8_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) bfloat16x8_t vreinterpretq_bf16_u32(uint32x4_t __p0) { bfloat16x8_t __ret; __ret = (bfloat16x8_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) bfloat16x8_t vreinterpretq_bf16_u64(uint64x2_t __p0) { bfloat16x8_t __ret; __ret = (bfloat16x8_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) bfloat16x8_t vreinterpretq_bf16_u16(uint16x8_t __p0) { bfloat16x8_t __ret; __ret = (bfloat16x8_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) bfloat16x8_t vreinterpretq_bf16_s8(int8x16_t __p0) { bfloat16x8_t __ret; __ret = (bfloat16x8_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) bfloat16x8_t vreinterpretq_bf16_f32(float32x4_t __p0) { bfloat16x8_t __ret; __ret = (bfloat16x8_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) bfloat16x8_t vreinterpretq_bf16_f16(float16x8_t __p0) { bfloat16x8_t __ret; __ret = (bfloat16x8_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) bfloat16x8_t vreinterpretq_bf16_s32(int32x4_t __p0) { bfloat16x8_t __ret; __ret = (bfloat16x8_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) bfloat16x8_t vreinterpretq_bf16_s64(int64x2_t __p0) { bfloat16x8_t __ret; __ret = (bfloat16x8_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) bfloat16x8_t vreinterpretq_bf16_s16(int16x8_t __p0) { bfloat16x8_t __ret; __ret = (bfloat16x8_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) bfloat16x4_t vreinterpret_bf16_p8(poly8x8_t __p0) { bfloat16x4_t __ret; __ret = (bfloat16x4_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) bfloat16x4_t vreinterpret_bf16_p64(poly64x1_t __p0) { bfloat16x4_t __ret; __ret = (bfloat16x4_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) bfloat16x4_t vreinterpret_bf16_p16(poly16x4_t __p0) { bfloat16x4_t __ret; __ret = (bfloat16x4_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) bfloat16x4_t vreinterpret_bf16_u8(uint8x8_t __p0) { bfloat16x4_t __ret; __ret = (bfloat16x4_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) bfloat16x4_t vreinterpret_bf16_u32(uint32x2_t __p0) { bfloat16x4_t __ret; __ret = (bfloat16x4_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) bfloat16x4_t vreinterpret_bf16_u64(uint64x1_t __p0) { bfloat16x4_t __ret; __ret = (bfloat16x4_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) bfloat16x4_t vreinterpret_bf16_u16(uint16x4_t __p0) { bfloat16x4_t __ret; __ret = (bfloat16x4_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) bfloat16x4_t vreinterpret_bf16_s8(int8x8_t __p0) { bfloat16x4_t __ret; __ret = (bfloat16x4_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) bfloat16x4_t vreinterpret_bf16_f32(float32x2_t __p0) { bfloat16x4_t __ret; __ret = (bfloat16x4_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) bfloat16x4_t vreinterpret_bf16_f16(float16x4_t __p0) { bfloat16x4_t __ret; __ret = (bfloat16x4_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) bfloat16x4_t vreinterpret_bf16_s32(int32x2_t __p0) { bfloat16x4_t __ret; __ret = (bfloat16x4_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) bfloat16x4_t vreinterpret_bf16_s64(int64x1_t __p0) { bfloat16x4_t __ret; __ret = (bfloat16x4_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) bfloat16x4_t vreinterpret_bf16_s16(int16x4_t __p0) { bfloat16x4_t __ret; __ret = (bfloat16x4_t)(__p0); return __ret; } #endif #if (__ARM_FP & 2) #ifdef __LITTLE_ENDIAN__ __ai float16x4_t vcvt_f16_f32(float32x4_t __p0) { float16x4_t __ret; __ret = (float16x4_t) __builtin_neon_vcvt_f16_f32((int8x16_t)__p0, 41); return __ret; } #else __ai float16x4_t vcvt_f16_f32(float32x4_t __p0) { float16x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (float16x4_t) __builtin_neon_vcvt_f16_f32((int8x16_t)__rev0, 41); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai float16x4_t __noswap_vcvt_f16_f32(float32x4_t __p0) { float16x4_t __ret; __ret = (float16x4_t) __builtin_neon_vcvt_f16_f32((int8x16_t)__p0, 41); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x4_t vcvt_f32_f16(float16x4_t __p0) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vcvt_f32_f16((int8x8_t)__p0, 8); return __ret; } #else __ai float32x4_t vcvt_f32_f16(float16x4_t __p0) { float32x4_t __ret; float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (float32x4_t) __builtin_neon_vcvt_f32_f16((int8x8_t)__rev0, 8); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai float32x4_t __noswap_vcvt_f32_f16(float16x4_t __p0) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vcvt_f32_f16((int8x8_t)__p0, 8); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_f16(__p0) __extension__ ({ \ float16x8_t __ret; \ __ret = (float16x8_t) __builtin_neon_vld1q_v(__p0, 40); \ __ret; \ }) #else #define vld1q_f16(__p0) __extension__ ({ \ float16x8_t __ret; \ __ret = (float16x8_t) __builtin_neon_vld1q_v(__p0, 40); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1_f16(__p0) __extension__ ({ \ float16x4_t __ret; \ __ret = (float16x4_t) __builtin_neon_vld1_v(__p0, 8); \ __ret; \ }) #else #define vld1_f16(__p0) __extension__ ({ \ float16x4_t __ret; \ __ret = (float16x4_t) __builtin_neon_vld1_v(__p0, 8); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_dup_f16(__p0) __extension__ ({ \ float16x8_t __ret; \ __ret = (float16x8_t) __builtin_neon_vld1q_dup_v(__p0, 40); \ __ret; \ }) #else #define vld1q_dup_f16(__p0) __extension__ ({ \ float16x8_t __ret; \ __ret = (float16x8_t) __builtin_neon_vld1q_dup_v(__p0, 40); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1_dup_f16(__p0) __extension__ ({ \ float16x4_t __ret; \ __ret = (float16x4_t) __builtin_neon_vld1_dup_v(__p0, 8); \ __ret; \ }) #else #define vld1_dup_f16(__p0) __extension__ ({ \ float16x4_t __ret; \ __ret = (float16x4_t) __builtin_neon_vld1_dup_v(__p0, 8); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_lane_f16(__p0, __p1, __p2) __extension__ ({ \ float16x8_t __ret; \ float16x8_t __s1 = __p1; \ __ret = (float16x8_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__s1, __p2, 40); \ __ret; \ }) #else #define vld1q_lane_f16(__p0, __p1, __p2) __extension__ ({ \ float16x8_t __ret; \ float16x8_t __s1 = __p1; \ float16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (float16x8_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 40); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1_lane_f16(__p0, __p1, __p2) __extension__ ({ \ float16x4_t __ret; \ float16x4_t __s1 = __p1; \ __ret = (float16x4_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__s1, __p2, 8); \ __ret; \ }) #else #define vld1_lane_f16(__p0, __p1, __p2) __extension__ ({ \ float16x4_t __ret; \ float16x4_t __s1 = __p1; \ float16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ __ret = (float16x4_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__rev1, __p2, 8); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_f16_x2(__p0) __extension__ ({ \ float16x8x2_t __ret; \ __builtin_neon_vld1q_x2_v(&__ret, __p0, 40); \ __ret; \ }) #else #define vld1q_f16_x2(__p0) __extension__ ({ \ float16x8x2_t __ret; \ __builtin_neon_vld1q_x2_v(&__ret, __p0, 40); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1_f16_x2(__p0) __extension__ ({ \ float16x4x2_t __ret; \ __builtin_neon_vld1_x2_v(&__ret, __p0, 8); \ __ret; \ }) #else #define vld1_f16_x2(__p0) __extension__ ({ \ float16x4x2_t __ret; \ __builtin_neon_vld1_x2_v(&__ret, __p0, 8); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_f16_x3(__p0) __extension__ ({ \ float16x8x3_t __ret; \ __builtin_neon_vld1q_x3_v(&__ret, __p0, 40); \ __ret; \ }) #else #define vld1q_f16_x3(__p0) __extension__ ({ \ float16x8x3_t __ret; \ __builtin_neon_vld1q_x3_v(&__ret, __p0, 40); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1_f16_x3(__p0) __extension__ ({ \ float16x4x3_t __ret; \ __builtin_neon_vld1_x3_v(&__ret, __p0, 8); \ __ret; \ }) #else #define vld1_f16_x3(__p0) __extension__ ({ \ float16x4x3_t __ret; \ __builtin_neon_vld1_x3_v(&__ret, __p0, 8); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_f16_x4(__p0) __extension__ ({ \ float16x8x4_t __ret; \ __builtin_neon_vld1q_x4_v(&__ret, __p0, 40); \ __ret; \ }) #else #define vld1q_f16_x4(__p0) __extension__ ({ \ float16x8x4_t __ret; \ __builtin_neon_vld1q_x4_v(&__ret, __p0, 40); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1_f16_x4(__p0) __extension__ ({ \ float16x4x4_t __ret; \ __builtin_neon_vld1_x4_v(&__ret, __p0, 8); \ __ret; \ }) #else #define vld1_f16_x4(__p0) __extension__ ({ \ float16x4x4_t __ret; \ __builtin_neon_vld1_x4_v(&__ret, __p0, 8); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld2q_f16(__p0) __extension__ ({ \ float16x8x2_t __ret; \ __builtin_neon_vld2q_v(&__ret, __p0, 40); \ __ret; \ }) #else #define vld2q_f16(__p0) __extension__ ({ \ float16x8x2_t __ret; \ __builtin_neon_vld2q_v(&__ret, __p0, 40); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld2_f16(__p0) __extension__ ({ \ float16x4x2_t __ret; \ __builtin_neon_vld2_v(&__ret, __p0, 8); \ __ret; \ }) #else #define vld2_f16(__p0) __extension__ ({ \ float16x4x2_t __ret; \ __builtin_neon_vld2_v(&__ret, __p0, 8); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld2q_dup_f16(__p0) __extension__ ({ \ float16x8x2_t __ret; \ __builtin_neon_vld2q_dup_v(&__ret, __p0, 40); \ __ret; \ }) #else #define vld2q_dup_f16(__p0) __extension__ ({ \ float16x8x2_t __ret; \ __builtin_neon_vld2q_dup_v(&__ret, __p0, 40); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld2_dup_f16(__p0) __extension__ ({ \ float16x4x2_t __ret; \ __builtin_neon_vld2_dup_v(&__ret, __p0, 8); \ __ret; \ }) #else #define vld2_dup_f16(__p0) __extension__ ({ \ float16x4x2_t __ret; \ __builtin_neon_vld2_dup_v(&__ret, __p0, 8); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld2q_lane_f16(__p0, __p1, __p2) __extension__ ({ \ float16x8x2_t __ret; \ float16x8x2_t __s1 = __p1; \ __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 40); \ __ret; \ }) #else #define vld2q_lane_f16(__p0, __p1, __p2) __extension__ ({ \ float16x8x2_t __ret; \ float16x8x2_t __s1 = __p1; \ float16x8x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 40); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld2_lane_f16(__p0, __p1, __p2) __extension__ ({ \ float16x4x2_t __ret; \ float16x4x2_t __s1 = __p1; \ __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 8); \ __ret; \ }) #else #define vld2_lane_f16(__p0, __p1, __p2) __extension__ ({ \ float16x4x2_t __ret; \ float16x4x2_t __s1 = __p1; \ float16x4x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], __p2, 8); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld3q_f16(__p0) __extension__ ({ \ float16x8x3_t __ret; \ __builtin_neon_vld3q_v(&__ret, __p0, 40); \ __ret; \ }) #else #define vld3q_f16(__p0) __extension__ ({ \ float16x8x3_t __ret; \ __builtin_neon_vld3q_v(&__ret, __p0, 40); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld3_f16(__p0) __extension__ ({ \ float16x4x3_t __ret; \ __builtin_neon_vld3_v(&__ret, __p0, 8); \ __ret; \ }) #else #define vld3_f16(__p0) __extension__ ({ \ float16x4x3_t __ret; \ __builtin_neon_vld3_v(&__ret, __p0, 8); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld3q_dup_f16(__p0) __extension__ ({ \ float16x8x3_t __ret; \ __builtin_neon_vld3q_dup_v(&__ret, __p0, 40); \ __ret; \ }) #else #define vld3q_dup_f16(__p0) __extension__ ({ \ float16x8x3_t __ret; \ __builtin_neon_vld3q_dup_v(&__ret, __p0, 40); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld3_dup_f16(__p0) __extension__ ({ \ float16x4x3_t __ret; \ __builtin_neon_vld3_dup_v(&__ret, __p0, 8); \ __ret; \ }) #else #define vld3_dup_f16(__p0) __extension__ ({ \ float16x4x3_t __ret; \ __builtin_neon_vld3_dup_v(&__ret, __p0, 8); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld3q_lane_f16(__p0, __p1, __p2) __extension__ ({ \ float16x8x3_t __ret; \ float16x8x3_t __s1 = __p1; \ __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 40); \ __ret; \ }) #else #define vld3q_lane_f16(__p0, __p1, __p2) __extension__ ({ \ float16x8x3_t __ret; \ float16x8x3_t __s1 = __p1; \ float16x8x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 40); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld3_lane_f16(__p0, __p1, __p2) __extension__ ({ \ float16x4x3_t __ret; \ float16x4x3_t __s1 = __p1; \ __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 8); \ __ret; \ }) #else #define vld3_lane_f16(__p0, __p1, __p2) __extension__ ({ \ float16x4x3_t __ret; \ float16x4x3_t __s1 = __p1; \ float16x4x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], __p2, 8); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld4q_f16(__p0) __extension__ ({ \ float16x8x4_t __ret; \ __builtin_neon_vld4q_v(&__ret, __p0, 40); \ __ret; \ }) #else #define vld4q_f16(__p0) __extension__ ({ \ float16x8x4_t __ret; \ __builtin_neon_vld4q_v(&__ret, __p0, 40); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld4_f16(__p0) __extension__ ({ \ float16x4x4_t __ret; \ __builtin_neon_vld4_v(&__ret, __p0, 8); \ __ret; \ }) #else #define vld4_f16(__p0) __extension__ ({ \ float16x4x4_t __ret; \ __builtin_neon_vld4_v(&__ret, __p0, 8); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld4q_dup_f16(__p0) __extension__ ({ \ float16x8x4_t __ret; \ __builtin_neon_vld4q_dup_v(&__ret, __p0, 40); \ __ret; \ }) #else #define vld4q_dup_f16(__p0) __extension__ ({ \ float16x8x4_t __ret; \ __builtin_neon_vld4q_dup_v(&__ret, __p0, 40); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld4_dup_f16(__p0) __extension__ ({ \ float16x4x4_t __ret; \ __builtin_neon_vld4_dup_v(&__ret, __p0, 8); \ __ret; \ }) #else #define vld4_dup_f16(__p0) __extension__ ({ \ float16x4x4_t __ret; \ __builtin_neon_vld4_dup_v(&__ret, __p0, 8); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld4q_lane_f16(__p0, __p1, __p2) __extension__ ({ \ float16x8x4_t __ret; \ float16x8x4_t __s1 = __p1; \ __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 40); \ __ret; \ }) #else #define vld4q_lane_f16(__p0, __p1, __p2) __extension__ ({ \ float16x8x4_t __ret; \ float16x8x4_t __s1 = __p1; \ float16x8x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 40); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld4_lane_f16(__p0, __p1, __p2) __extension__ ({ \ float16x4x4_t __ret; \ float16x4x4_t __s1 = __p1; \ __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 8); \ __ret; \ }) #else #define vld4_lane_f16(__p0, __p1, __p2) __extension__ ({ \ float16x4x4_t __ret; \ float16x4x4_t __s1 = __p1; \ float16x4x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], __p2, 8); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_f16(__p0, __p1) __extension__ ({ \ float16x8_t __s1 = __p1; \ __builtin_neon_vst1q_v(__p0, (int8x16_t)__s1, 40); \ }) #else #define vst1q_f16(__p0, __p1) __extension__ ({ \ float16x8_t __s1 = __p1; \ float16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst1q_v(__p0, (int8x16_t)__rev1, 40); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_f16(__p0, __p1) __extension__ ({ \ float16x4_t __s1 = __p1; \ __builtin_neon_vst1_v(__p0, (int8x8_t)__s1, 8); \ }) #else #define vst1_f16(__p0, __p1) __extension__ ({ \ float16x4_t __s1 = __p1; \ float16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ __builtin_neon_vst1_v(__p0, (int8x8_t)__rev1, 8); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_lane_f16(__p0, __p1, __p2) __extension__ ({ \ float16x8_t __s1 = __p1; \ __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__s1, __p2, 40); \ }) #else #define vst1q_lane_f16(__p0, __p1, __p2) __extension__ ({ \ float16x8_t __s1 = __p1; \ float16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 40); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_lane_f16(__p0, __p1, __p2) __extension__ ({ \ float16x4_t __s1 = __p1; \ __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__s1, __p2, 8); \ }) #else #define vst1_lane_f16(__p0, __p1, __p2) __extension__ ({ \ float16x4_t __s1 = __p1; \ float16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__rev1, __p2, 8); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_f16_x2(__p0, __p1) __extension__ ({ \ float16x8x2_t __s1 = __p1; \ __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 40); \ }) #else #define vst1q_f16_x2(__p0, __p1) __extension__ ({ \ float16x8x2_t __s1 = __p1; \ float16x8x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 40); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_f16_x2(__p0, __p1) __extension__ ({ \ float16x4x2_t __s1 = __p1; \ __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 8); \ }) #else #define vst1_f16_x2(__p0, __p1) __extension__ ({ \ float16x4x2_t __s1 = __p1; \ float16x4x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], 8); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_f16_x3(__p0, __p1) __extension__ ({ \ float16x8x3_t __s1 = __p1; \ __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 40); \ }) #else #define vst1q_f16_x3(__p0, __p1) __extension__ ({ \ float16x8x3_t __s1 = __p1; \ float16x8x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 40); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_f16_x3(__p0, __p1) __extension__ ({ \ float16x4x3_t __s1 = __p1; \ __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 8); \ }) #else #define vst1_f16_x3(__p0, __p1) __extension__ ({ \ float16x4x3_t __s1 = __p1; \ float16x4x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], 8); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_f16_x4(__p0, __p1) __extension__ ({ \ float16x8x4_t __s1 = __p1; \ __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 40); \ }) #else #define vst1q_f16_x4(__p0, __p1) __extension__ ({ \ float16x8x4_t __s1 = __p1; \ float16x8x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 40); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_f16_x4(__p0, __p1) __extension__ ({ \ float16x4x4_t __s1 = __p1; \ __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 8); \ }) #else #define vst1_f16_x4(__p0, __p1) __extension__ ({ \ float16x4x4_t __s1 = __p1; \ float16x4x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], 8); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2q_f16(__p0, __p1) __extension__ ({ \ float16x8x2_t __s1 = __p1; \ __builtin_neon_vst2q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 40); \ }) #else #define vst2q_f16(__p0, __p1) __extension__ ({ \ float16x8x2_t __s1 = __p1; \ float16x8x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst2q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 40); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2_f16(__p0, __p1) __extension__ ({ \ float16x4x2_t __s1 = __p1; \ __builtin_neon_vst2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 8); \ }) #else #define vst2_f16(__p0, __p1) __extension__ ({ \ float16x4x2_t __s1 = __p1; \ float16x4x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __builtin_neon_vst2_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], 8); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2q_lane_f16(__p0, __p1, __p2) __extension__ ({ \ float16x8x2_t __s1 = __p1; \ __builtin_neon_vst2q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 40); \ }) #else #define vst2q_lane_f16(__p0, __p1, __p2) __extension__ ({ \ float16x8x2_t __s1 = __p1; \ float16x8x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst2q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 40); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2_lane_f16(__p0, __p1, __p2) __extension__ ({ \ float16x4x2_t __s1 = __p1; \ __builtin_neon_vst2_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 8); \ }) #else #define vst2_lane_f16(__p0, __p1, __p2) __extension__ ({ \ float16x4x2_t __s1 = __p1; \ float16x4x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __builtin_neon_vst2_lane_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], __p2, 8); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3q_f16(__p0, __p1) __extension__ ({ \ float16x8x3_t __s1 = __p1; \ __builtin_neon_vst3q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 40); \ }) #else #define vst3q_f16(__p0, __p1) __extension__ ({ \ float16x8x3_t __s1 = __p1; \ float16x8x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst3q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 40); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3_f16(__p0, __p1) __extension__ ({ \ float16x4x3_t __s1 = __p1; \ __builtin_neon_vst3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 8); \ }) #else #define vst3_f16(__p0, __p1) __extension__ ({ \ float16x4x3_t __s1 = __p1; \ float16x4x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __builtin_neon_vst3_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], 8); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3q_lane_f16(__p0, __p1, __p2) __extension__ ({ \ float16x8x3_t __s1 = __p1; \ __builtin_neon_vst3q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 40); \ }) #else #define vst3q_lane_f16(__p0, __p1, __p2) __extension__ ({ \ float16x8x3_t __s1 = __p1; \ float16x8x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst3q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 40); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3_lane_f16(__p0, __p1, __p2) __extension__ ({ \ float16x4x3_t __s1 = __p1; \ __builtin_neon_vst3_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 8); \ }) #else #define vst3_lane_f16(__p0, __p1, __p2) __extension__ ({ \ float16x4x3_t __s1 = __p1; \ float16x4x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __builtin_neon_vst3_lane_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], __p2, 8); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4q_f16(__p0, __p1) __extension__ ({ \ float16x8x4_t __s1 = __p1; \ __builtin_neon_vst4q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 40); \ }) #else #define vst4q_f16(__p0, __p1) __extension__ ({ \ float16x8x4_t __s1 = __p1; \ float16x8x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst4q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 40); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4_f16(__p0, __p1) __extension__ ({ \ float16x4x4_t __s1 = __p1; \ __builtin_neon_vst4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 8); \ }) #else #define vst4_f16(__p0, __p1) __extension__ ({ \ float16x4x4_t __s1 = __p1; \ float16x4x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ __builtin_neon_vst4_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], 8); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4q_lane_f16(__p0, __p1, __p2) __extension__ ({ \ float16x8x4_t __s1 = __p1; \ __builtin_neon_vst4q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 40); \ }) #else #define vst4q_lane_f16(__p0, __p1, __p2) __extension__ ({ \ float16x8x4_t __s1 = __p1; \ float16x8x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst4q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 40); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4_lane_f16(__p0, __p1, __p2) __extension__ ({ \ float16x4x4_t __s1 = __p1; \ __builtin_neon_vst4_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 8); \ }) #else #define vst4_lane_f16(__p0, __p1, __p2) __extension__ ({ \ float16x4x4_t __s1 = __p1; \ float16x4x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ __builtin_neon_vst4_lane_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], __p2, 8); \ }) #endif #endif #if __ARM_ARCH >= 8 #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vcvtaq_s32_f32(float32x4_t __p0) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vcvtaq_s32_v((int8x16_t)__p0, 34); return __ret; } #else __ai int32x4_t vcvtaq_s32_f32(float32x4_t __p0) { int32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (int32x4_t) __builtin_neon_vcvtaq_s32_v((int8x16_t)__rev0, 34); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x2_t vcvta_s32_f32(float32x2_t __p0) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vcvta_s32_v((int8x8_t)__p0, 2); return __ret; } #else __ai int32x2_t vcvta_s32_f32(float32x2_t __p0) { int32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (int32x2_t) __builtin_neon_vcvta_s32_v((int8x8_t)__rev0, 2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vcvtaq_u32_f32(float32x4_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vcvtaq_u32_v((int8x16_t)__p0, 50); return __ret; } #else __ai uint32x4_t vcvtaq_u32_f32(float32x4_t __p0) { uint32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (uint32x4_t) __builtin_neon_vcvtaq_u32_v((int8x16_t)__rev0, 50); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vcvta_u32_f32(float32x2_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vcvta_u32_v((int8x8_t)__p0, 18); return __ret; } #else __ai uint32x2_t vcvta_u32_f32(float32x2_t __p0) { uint32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (uint32x2_t) __builtin_neon_vcvta_u32_v((int8x8_t)__rev0, 18); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vcvtmq_s32_f32(float32x4_t __p0) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vcvtmq_s32_v((int8x16_t)__p0, 34); return __ret; } #else __ai int32x4_t vcvtmq_s32_f32(float32x4_t __p0) { int32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (int32x4_t) __builtin_neon_vcvtmq_s32_v((int8x16_t)__rev0, 34); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x2_t vcvtm_s32_f32(float32x2_t __p0) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vcvtm_s32_v((int8x8_t)__p0, 2); return __ret; } #else __ai int32x2_t vcvtm_s32_f32(float32x2_t __p0) { int32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (int32x2_t) __builtin_neon_vcvtm_s32_v((int8x8_t)__rev0, 2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vcvtmq_u32_f32(float32x4_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vcvtmq_u32_v((int8x16_t)__p0, 50); return __ret; } #else __ai uint32x4_t vcvtmq_u32_f32(float32x4_t __p0) { uint32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (uint32x4_t) __builtin_neon_vcvtmq_u32_v((int8x16_t)__rev0, 50); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vcvtm_u32_f32(float32x2_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vcvtm_u32_v((int8x8_t)__p0, 18); return __ret; } #else __ai uint32x2_t vcvtm_u32_f32(float32x2_t __p0) { uint32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (uint32x2_t) __builtin_neon_vcvtm_u32_v((int8x8_t)__rev0, 18); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vcvtnq_s32_f32(float32x4_t __p0) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vcvtnq_s32_v((int8x16_t)__p0, 34); return __ret; } #else __ai int32x4_t vcvtnq_s32_f32(float32x4_t __p0) { int32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (int32x4_t) __builtin_neon_vcvtnq_s32_v((int8x16_t)__rev0, 34); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x2_t vcvtn_s32_f32(float32x2_t __p0) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vcvtn_s32_v((int8x8_t)__p0, 2); return __ret; } #else __ai int32x2_t vcvtn_s32_f32(float32x2_t __p0) { int32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (int32x2_t) __builtin_neon_vcvtn_s32_v((int8x8_t)__rev0, 2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vcvtnq_u32_f32(float32x4_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vcvtnq_u32_v((int8x16_t)__p0, 50); return __ret; } #else __ai uint32x4_t vcvtnq_u32_f32(float32x4_t __p0) { uint32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (uint32x4_t) __builtin_neon_vcvtnq_u32_v((int8x16_t)__rev0, 50); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vcvtn_u32_f32(float32x2_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vcvtn_u32_v((int8x8_t)__p0, 18); return __ret; } #else __ai uint32x2_t vcvtn_u32_f32(float32x2_t __p0) { uint32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (uint32x2_t) __builtin_neon_vcvtn_u32_v((int8x8_t)__rev0, 18); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vcvtpq_s32_f32(float32x4_t __p0) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vcvtpq_s32_v((int8x16_t)__p0, 34); return __ret; } #else __ai int32x4_t vcvtpq_s32_f32(float32x4_t __p0) { int32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (int32x4_t) __builtin_neon_vcvtpq_s32_v((int8x16_t)__rev0, 34); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x2_t vcvtp_s32_f32(float32x2_t __p0) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vcvtp_s32_v((int8x8_t)__p0, 2); return __ret; } #else __ai int32x2_t vcvtp_s32_f32(float32x2_t __p0) { int32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (int32x2_t) __builtin_neon_vcvtp_s32_v((int8x8_t)__rev0, 2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vcvtpq_u32_f32(float32x4_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vcvtpq_u32_v((int8x16_t)__p0, 50); return __ret; } #else __ai uint32x4_t vcvtpq_u32_f32(float32x4_t __p0) { uint32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (uint32x4_t) __builtin_neon_vcvtpq_u32_v((int8x16_t)__rev0, 50); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vcvtp_u32_f32(float32x2_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vcvtp_u32_v((int8x8_t)__p0, 18); return __ret; } #else __ai uint32x2_t vcvtp_u32_f32(float32x2_t __p0) { uint32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (uint32x2_t) __builtin_neon_vcvtp_u32_v((int8x8_t)__rev0, 18); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("aes"))) uint8x16_t vaesdq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; __ret = (uint8x16_t) __builtin_neon_vaesdq_u8((int8x16_t)__p0, (int8x16_t)__p1, 48); return __ret; } #else __ai __attribute__((target("aes"))) uint8x16_t vaesdq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x16_t) __builtin_neon_vaesdq_u8((int8x16_t)__rev0, (int8x16_t)__rev1, 48); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("aes"))) uint8x16_t vaeseq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; __ret = (uint8x16_t) __builtin_neon_vaeseq_u8((int8x16_t)__p0, (int8x16_t)__p1, 48); return __ret; } #else __ai __attribute__((target("aes"))) uint8x16_t vaeseq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x16_t) __builtin_neon_vaeseq_u8((int8x16_t)__rev0, (int8x16_t)__rev1, 48); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("aes"))) uint8x16_t vaesimcq_u8(uint8x16_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t) __builtin_neon_vaesimcq_u8((int8x16_t)__p0, 48); return __ret; } #else __ai __attribute__((target("aes"))) uint8x16_t vaesimcq_u8(uint8x16_t __p0) { uint8x16_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x16_t) __builtin_neon_vaesimcq_u8((int8x16_t)__rev0, 48); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("aes"))) uint8x16_t vaesmcq_u8(uint8x16_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t) __builtin_neon_vaesmcq_u8((int8x16_t)__p0, 48); return __ret; } #else __ai __attribute__((target("aes"))) uint8x16_t vaesmcq_u8(uint8x16_t __p0) { uint8x16_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x16_t) __builtin_neon_vaesmcq_u8((int8x16_t)__rev0, 48); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("sha2"))) uint32x4_t vsha1cq_u32(uint32x4_t __p0, uint32_t __p1, uint32x4_t __p2) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vsha1cq_u32(__p0, __p1, __p2); return __ret; } #else __ai __attribute__((target("sha2"))) uint32x4_t vsha1cq_u32(uint32x4_t __p0, uint32_t __p1, uint32x4_t __p2) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = (uint32x4_t) __builtin_neon_vsha1cq_u32(__rev0, __p1, __rev2); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif __ai __attribute__((target("sha2"))) uint32_t vsha1h_u32(uint32_t __p0) { uint32_t __ret; __ret = (uint32_t) __builtin_neon_vsha1h_u32(__p0); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("sha2"))) uint32x4_t vsha1mq_u32(uint32x4_t __p0, uint32_t __p1, uint32x4_t __p2) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vsha1mq_u32(__p0, __p1, __p2); return __ret; } #else __ai __attribute__((target("sha2"))) uint32x4_t vsha1mq_u32(uint32x4_t __p0, uint32_t __p1, uint32x4_t __p2) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = (uint32x4_t) __builtin_neon_vsha1mq_u32(__rev0, __p1, __rev2); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("sha2"))) uint32x4_t vsha1pq_u32(uint32x4_t __p0, uint32_t __p1, uint32x4_t __p2) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vsha1pq_u32(__p0, __p1, __p2); return __ret; } #else __ai __attribute__((target("sha2"))) uint32x4_t vsha1pq_u32(uint32x4_t __p0, uint32_t __p1, uint32x4_t __p2) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = (uint32x4_t) __builtin_neon_vsha1pq_u32(__rev0, __p1, __rev2); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("sha2"))) uint32x4_t vsha1su0q_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vsha1su0q_u32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 50); return __ret; } #else __ai __attribute__((target("sha2"))) uint32x4_t vsha1su0q_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = (uint32x4_t) __builtin_neon_vsha1su0q_u32((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 50); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("sha2"))) uint32x4_t vsha1su1q_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vsha1su1q_u32((int8x16_t)__p0, (int8x16_t)__p1, 50); return __ret; } #else __ai __attribute__((target("sha2"))) uint32x4_t vsha1su1q_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint32x4_t) __builtin_neon_vsha1su1q_u32((int8x16_t)__rev0, (int8x16_t)__rev1, 50); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("sha2"))) uint32x4_t vsha256hq_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vsha256hq_u32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 50); return __ret; } #else __ai __attribute__((target("sha2"))) uint32x4_t vsha256hq_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = (uint32x4_t) __builtin_neon_vsha256hq_u32((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 50); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("sha2"))) uint32x4_t vsha256h2q_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vsha256h2q_u32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 50); return __ret; } #else __ai __attribute__((target("sha2"))) uint32x4_t vsha256h2q_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = (uint32x4_t) __builtin_neon_vsha256h2q_u32((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 50); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("sha2"))) uint32x4_t vsha256su0q_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vsha256su0q_u32((int8x16_t)__p0, (int8x16_t)__p1, 50); return __ret; } #else __ai __attribute__((target("sha2"))) uint32x4_t vsha256su0q_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint32x4_t) __builtin_neon_vsha256su0q_u32((int8x16_t)__rev0, (int8x16_t)__rev1, 50); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("sha2"))) uint32x4_t vsha256su1q_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vsha256su1q_u32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 50); return __ret; } #else __ai __attribute__((target("sha2"))) uint32x4_t vsha256su1q_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = (uint32x4_t) __builtin_neon_vsha256su1q_u32((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 50); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #endif #if __ARM_ARCH >= 8 && defined(__ARM_FEATURE_DIRECTED_ROUNDING) #ifdef __LITTLE_ENDIAN__ __ai float32x4_t vrndq_f32(float32x4_t __p0) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vrndq_v((int8x16_t)__p0, 41); return __ret; } #else __ai float32x4_t vrndq_f32(float32x4_t __p0) { float32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (float32x4_t) __builtin_neon_vrndq_v((int8x16_t)__rev0, 41); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x2_t vrnd_f32(float32x2_t __p0) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vrnd_v((int8x8_t)__p0, 9); return __ret; } #else __ai float32x2_t vrnd_f32(float32x2_t __p0) { float32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (float32x2_t) __builtin_neon_vrnd_v((int8x8_t)__rev0, 9); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x4_t vrndaq_f32(float32x4_t __p0) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vrndaq_v((int8x16_t)__p0, 41); return __ret; } #else __ai float32x4_t vrndaq_f32(float32x4_t __p0) { float32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (float32x4_t) __builtin_neon_vrndaq_v((int8x16_t)__rev0, 41); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x2_t vrnda_f32(float32x2_t __p0) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vrnda_v((int8x8_t)__p0, 9); return __ret; } #else __ai float32x2_t vrnda_f32(float32x2_t __p0) { float32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (float32x2_t) __builtin_neon_vrnda_v((int8x8_t)__rev0, 9); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x4_t vrndiq_f32(float32x4_t __p0) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vrndiq_v((int8x16_t)__p0, 41); return __ret; } #else __ai float32x4_t vrndiq_f32(float32x4_t __p0) { float32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (float32x4_t) __builtin_neon_vrndiq_v((int8x16_t)__rev0, 41); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x2_t vrndi_f32(float32x2_t __p0) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vrndi_v((int8x8_t)__p0, 9); return __ret; } #else __ai float32x2_t vrndi_f32(float32x2_t __p0) { float32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (float32x2_t) __builtin_neon_vrndi_v((int8x8_t)__rev0, 9); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x4_t vrndmq_f32(float32x4_t __p0) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vrndmq_v((int8x16_t)__p0, 41); return __ret; } #else __ai float32x4_t vrndmq_f32(float32x4_t __p0) { float32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (float32x4_t) __builtin_neon_vrndmq_v((int8x16_t)__rev0, 41); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x2_t vrndm_f32(float32x2_t __p0) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vrndm_v((int8x8_t)__p0, 9); return __ret; } #else __ai float32x2_t vrndm_f32(float32x2_t __p0) { float32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (float32x2_t) __builtin_neon_vrndm_v((int8x8_t)__rev0, 9); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x4_t vrndnq_f32(float32x4_t __p0) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vrndnq_v((int8x16_t)__p0, 41); return __ret; } #else __ai float32x4_t vrndnq_f32(float32x4_t __p0) { float32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (float32x4_t) __builtin_neon_vrndnq_v((int8x16_t)__rev0, 41); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x2_t vrndn_f32(float32x2_t __p0) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vrndn_v((int8x8_t)__p0, 9); return __ret; } #else __ai float32x2_t vrndn_f32(float32x2_t __p0) { float32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (float32x2_t) __builtin_neon_vrndn_v((int8x8_t)__rev0, 9); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai float32_t vrndns_f32(float32_t __p0) { float32_t __ret; __ret = (float32_t) __builtin_neon_vrndns_f32(__p0); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai float32x4_t vrndpq_f32(float32x4_t __p0) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vrndpq_v((int8x16_t)__p0, 41); return __ret; } #else __ai float32x4_t vrndpq_f32(float32x4_t __p0) { float32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (float32x4_t) __builtin_neon_vrndpq_v((int8x16_t)__rev0, 41); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x2_t vrndp_f32(float32x2_t __p0) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vrndp_v((int8x8_t)__p0, 9); return __ret; } #else __ai float32x2_t vrndp_f32(float32x2_t __p0) { float32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (float32x2_t) __builtin_neon_vrndp_v((int8x8_t)__rev0, 9); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x4_t vrndxq_f32(float32x4_t __p0) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vrndxq_v((int8x16_t)__p0, 41); return __ret; } #else __ai float32x4_t vrndxq_f32(float32x4_t __p0) { float32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (float32x4_t) __builtin_neon_vrndxq_v((int8x16_t)__rev0, 41); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x2_t vrndx_f32(float32x2_t __p0) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vrndx_v((int8x8_t)__p0, 9); return __ret; } #else __ai float32x2_t vrndx_f32(float32x2_t __p0) { float32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (float32x2_t) __builtin_neon_vrndx_v((int8x8_t)__rev0, 9); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x8_t vrndq_f16(float16x8_t __p0) { float16x8_t __ret; __ret = (float16x8_t) __builtin_neon_vrndq_f16((int8x16_t)__p0, 40); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x8_t vrndq_f16(float16x8_t __p0) { float16x8_t __ret; float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (float16x8_t) __builtin_neon_vrndq_f16((int8x16_t)__rev0, 40); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x4_t vrnd_f16(float16x4_t __p0) { float16x4_t __ret; __ret = (float16x4_t) __builtin_neon_vrnd_f16((int8x8_t)__p0, 8); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x4_t vrnd_f16(float16x4_t __p0) { float16x4_t __ret; float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (float16x4_t) __builtin_neon_vrnd_f16((int8x8_t)__rev0, 8); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x8_t vrndaq_f16(float16x8_t __p0) { float16x8_t __ret; __ret = (float16x8_t) __builtin_neon_vrndaq_f16((int8x16_t)__p0, 40); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x8_t vrndaq_f16(float16x8_t __p0) { float16x8_t __ret; float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (float16x8_t) __builtin_neon_vrndaq_f16((int8x16_t)__rev0, 40); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x4_t vrnda_f16(float16x4_t __p0) { float16x4_t __ret; __ret = (float16x4_t) __builtin_neon_vrnda_f16((int8x8_t)__p0, 8); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x4_t vrnda_f16(float16x4_t __p0) { float16x4_t __ret; float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (float16x4_t) __builtin_neon_vrnda_f16((int8x8_t)__rev0, 8); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x8_t vrndmq_f16(float16x8_t __p0) { float16x8_t __ret; __ret = (float16x8_t) __builtin_neon_vrndmq_f16((int8x16_t)__p0, 40); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x8_t vrndmq_f16(float16x8_t __p0) { float16x8_t __ret; float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (float16x8_t) __builtin_neon_vrndmq_f16((int8x16_t)__rev0, 40); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x4_t vrndm_f16(float16x4_t __p0) { float16x4_t __ret; __ret = (float16x4_t) __builtin_neon_vrndm_f16((int8x8_t)__p0, 8); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x4_t vrndm_f16(float16x4_t __p0) { float16x4_t __ret; float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (float16x4_t) __builtin_neon_vrndm_f16((int8x8_t)__rev0, 8); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x8_t vrndnq_f16(float16x8_t __p0) { float16x8_t __ret; __ret = (float16x8_t) __builtin_neon_vrndnq_f16((int8x16_t)__p0, 40); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x8_t vrndnq_f16(float16x8_t __p0) { float16x8_t __ret; float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (float16x8_t) __builtin_neon_vrndnq_f16((int8x16_t)__rev0, 40); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x4_t vrndn_f16(float16x4_t __p0) { float16x4_t __ret; __ret = (float16x4_t) __builtin_neon_vrndn_f16((int8x8_t)__p0, 8); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x4_t vrndn_f16(float16x4_t __p0) { float16x4_t __ret; float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (float16x4_t) __builtin_neon_vrndn_f16((int8x8_t)__rev0, 8); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x8_t vrndpq_f16(float16x8_t __p0) { float16x8_t __ret; __ret = (float16x8_t) __builtin_neon_vrndpq_f16((int8x16_t)__p0, 40); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x8_t vrndpq_f16(float16x8_t __p0) { float16x8_t __ret; float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (float16x8_t) __builtin_neon_vrndpq_f16((int8x16_t)__rev0, 40); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x4_t vrndp_f16(float16x4_t __p0) { float16x4_t __ret; __ret = (float16x4_t) __builtin_neon_vrndp_f16((int8x8_t)__p0, 8); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x4_t vrndp_f16(float16x4_t __p0) { float16x4_t __ret; float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (float16x4_t) __builtin_neon_vrndp_f16((int8x8_t)__rev0, 8); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x8_t vrndxq_f16(float16x8_t __p0) { float16x8_t __ret; __ret = (float16x8_t) __builtin_neon_vrndxq_f16((int8x16_t)__p0, 40); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x8_t vrndxq_f16(float16x8_t __p0) { float16x8_t __ret; float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (float16x8_t) __builtin_neon_vrndxq_f16((int8x16_t)__rev0, 40); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x4_t vrndx_f16(float16x4_t __p0) { float16x4_t __ret; __ret = (float16x4_t) __builtin_neon_vrndx_f16((int8x8_t)__p0, 8); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x4_t vrndx_f16(float16x4_t __p0) { float16x4_t __ret; float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (float16x4_t) __builtin_neon_vrndx_f16((int8x8_t)__rev0, 8); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #endif #if __ARM_ARCH >= 8 && defined(__ARM_FEATURE_NUMERIC_MAXMIN) #ifdef __LITTLE_ENDIAN__ __ai float32x4_t vmaxnmq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vmaxnmq_v((int8x16_t)__p0, (int8x16_t)__p1, 41); return __ret; } #else __ai float32x4_t vmaxnmq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (float32x4_t) __builtin_neon_vmaxnmq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 41); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x2_t vmaxnm_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vmaxnm_v((int8x8_t)__p0, (int8x8_t)__p1, 9); return __ret; } #else __ai float32x2_t vmaxnm_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (float32x2_t) __builtin_neon_vmaxnm_v((int8x8_t)__rev0, (int8x8_t)__rev1, 9); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x4_t vminnmq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vminnmq_v((int8x16_t)__p0, (int8x16_t)__p1, 41); return __ret; } #else __ai float32x4_t vminnmq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (float32x4_t) __builtin_neon_vminnmq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 41); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x2_t vminnm_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vminnm_v((int8x8_t)__p0, (int8x8_t)__p1, 9); return __ret; } #else __ai float32x2_t vminnm_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (float32x2_t) __builtin_neon_vminnm_v((int8x8_t)__rev0, (int8x8_t)__rev1, 9); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x8_t vmaxnmq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; __ret = (float16x8_t) __builtin_neon_vmaxnmq_f16((int8x16_t)__p0, (int8x16_t)__p1, 40); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x8_t vmaxnmq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (float16x8_t) __builtin_neon_vmaxnmq_f16((int8x16_t)__rev0, (int8x16_t)__rev1, 40); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x4_t vmaxnm_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; __ret = (float16x4_t) __builtin_neon_vmaxnm_f16((int8x8_t)__p0, (int8x8_t)__p1, 8); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x4_t vmaxnm_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (float16x4_t) __builtin_neon_vmaxnm_f16((int8x8_t)__rev0, (int8x8_t)__rev1, 8); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x8_t vminnmq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; __ret = (float16x8_t) __builtin_neon_vminnmq_f16((int8x16_t)__p0, (int8x16_t)__p1, 40); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x8_t vminnmq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (float16x8_t) __builtin_neon_vminnmq_f16((int8x16_t)__rev0, (int8x16_t)__rev1, 40); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x4_t vminnm_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; __ret = (float16x4_t) __builtin_neon_vminnm_f16((int8x8_t)__p0, (int8x8_t)__p1, 8); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x4_t vminnm_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (float16x4_t) __builtin_neon_vminnm_f16((int8x8_t)__rev0, (int8x8_t)__rev1, 8); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #endif #if defined(__ARM_FEATURE_FMA) #ifdef __LITTLE_ENDIAN__ __ai float32x4_t vfmaq_f32(float32x4_t __p0, float32x4_t __p1, float32x4_t __p2) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vfmaq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); return __ret; } #else __ai float32x4_t vfmaq_f32(float32x4_t __p0, float32x4_t __p1, float32x4_t __p2) { float32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); float32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = (float32x4_t) __builtin_neon_vfmaq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 41); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai float32x4_t __noswap_vfmaq_f32(float32x4_t __p0, float32x4_t __p1, float32x4_t __p2) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vfmaq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x2_t vfma_f32(float32x2_t __p0, float32x2_t __p1, float32x2_t __p2) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vfma_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 9); return __ret; } #else __ai float32x2_t vfma_f32(float32x2_t __p0, float32x2_t __p1, float32x2_t __p2) { float32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); float32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); __ret = (float32x2_t) __builtin_neon_vfma_v((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 9); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } __ai float32x2_t __noswap_vfma_f32(float32x2_t __p0, float32x2_t __p1, float32x2_t __p2) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vfma_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 9); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x4_t vfmaq_n_f32(float32x4_t __p0, float32x4_t __p1, float32_t __p2) { float32x4_t __ret; __ret = vfmaq_f32(__p0, __p1, (float32x4_t) {__p2, __p2, __p2, __p2}); return __ret; } #else __ai float32x4_t vfmaq_n_f32(float32x4_t __p0, float32x4_t __p1, float32_t __p2) { float32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __noswap_vfmaq_f32(__rev0, __rev1, (float32x4_t) {__p2, __p2, __p2, __p2}); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x2_t vfma_n_f32(float32x2_t __p0, float32x2_t __p1, float32_t __p2) { float32x2_t __ret; __ret = vfma_f32(__p0, __p1, (float32x2_t) {__p2, __p2}); return __ret; } #else __ai float32x2_t vfma_n_f32(float32x2_t __p0, float32x2_t __p1, float32_t __p2) { float32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __noswap_vfma_f32(__rev0, __rev1, (float32x2_t) {__p2, __p2}); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x4_t vfmsq_f32(float32x4_t __p0, float32x4_t __p1, float32x4_t __p2) { float32x4_t __ret; __ret = vfmaq_f32(__p0, -__p1, __p2); return __ret; } #else __ai float32x4_t vfmsq_f32(float32x4_t __p0, float32x4_t __p1, float32x4_t __p2) { float32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); float32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = __noswap_vfmaq_f32(__rev0, -__rev1, __rev2); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x2_t vfms_f32(float32x2_t __p0, float32x2_t __p1, float32x2_t __p2) { float32x2_t __ret; __ret = vfma_f32(__p0, -__p1, __p2); return __ret; } #else __ai float32x2_t vfms_f32(float32x2_t __p0, float32x2_t __p1, float32x2_t __p2) { float32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); float32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); __ret = __noswap_vfma_f32(__rev0, -__rev1, __rev2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #endif #if defined(__aarch64__) #ifdef __LITTLE_ENDIAN__ __ai float64x2_t vabdq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; __ret = (float64x2_t) __builtin_neon_vabdq_v((int8x16_t)__p0, (int8x16_t)__p1, 42); return __ret; } #else __ai float64x2_t vabdq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (float64x2_t) __builtin_neon_vabdq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 42); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai float64x1_t vabd_f64(float64x1_t __p0, float64x1_t __p1) { float64x1_t __ret; __ret = (float64x1_t) __builtin_neon_vabd_v((int8x8_t)__p0, (int8x8_t)__p1, 10); return __ret; } __ai float64_t vabdd_f64(float64_t __p0, float64_t __p1) { float64_t __ret; __ret = (float64_t) __builtin_neon_vabdd_f64(__p0, __p1); return __ret; } __ai float32_t vabds_f32(float32_t __p0, float32_t __p1) { float32_t __ret; __ret = (float32_t) __builtin_neon_vabds_f32(__p0, __p1); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai float64x2_t vabsq_f64(float64x2_t __p0) { float64x2_t __ret; __ret = (float64x2_t) __builtin_neon_vabsq_v((int8x16_t)__p0, 42); return __ret; } #else __ai float64x2_t vabsq_f64(float64x2_t __p0) { float64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (float64x2_t) __builtin_neon_vabsq_v((int8x16_t)__rev0, 42); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vabsq_s64(int64x2_t __p0) { int64x2_t __ret; __ret = (int64x2_t) __builtin_neon_vabsq_v((int8x16_t)__p0, 35); return __ret; } #else __ai int64x2_t vabsq_s64(int64x2_t __p0) { int64x2_t __ret; int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (int64x2_t) __builtin_neon_vabsq_v((int8x16_t)__rev0, 35); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai float64x1_t vabs_f64(float64x1_t __p0) { float64x1_t __ret; __ret = (float64x1_t) __builtin_neon_vabs_v((int8x8_t)__p0, 10); return __ret; } __ai int64x1_t vabs_s64(int64x1_t __p0) { int64x1_t __ret; __ret = (int64x1_t) __builtin_neon_vabs_v((int8x8_t)__p0, 3); return __ret; } __ai int64_t vabsd_s64(int64_t __p0) { int64_t __ret; __ret = (int64_t) __builtin_neon_vabsd_s64(__p0); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai float64x2_t vaddq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; __ret = __p0 + __p1; return __ret; } #else __ai float64x2_t vaddq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __rev0 + __rev1; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai float64x1_t vadd_f64(float64x1_t __p0, float64x1_t __p1) { float64x1_t __ret; __ret = __p0 + __p1; return __ret; } __ai uint64_t vaddd_u64(uint64_t __p0, uint64_t __p1) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vaddd_u64(__p0, __p1); return __ret; } __ai int64_t vaddd_s64(int64_t __p0, int64_t __p1) { int64_t __ret; __ret = (int64_t) __builtin_neon_vaddd_s64(__p0, __p1); return __ret; } __ai poly128_t vaddq_p128(poly128_t __p0, poly128_t __p1) { poly128_t __ret; __ret = (poly128_t) __builtin_neon_vaddq_p128(__p0, __p1); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vaddhn_high_u32(uint16x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint16x8_t __ret; __ret = vcombine_u16(__p0, vaddhn_u32(__p1, __p2)); return __ret; } #else __ai uint16x8_t vaddhn_high_u32(uint16x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint16x8_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = __noswap_vcombine_u16(__rev0, __noswap_vaddhn_u32(__rev1, __rev2)); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vaddhn_high_u64(uint32x2_t __p0, uint64x2_t __p1, uint64x2_t __p2) { uint32x4_t __ret; __ret = vcombine_u32(__p0, vaddhn_u64(__p1, __p2)); return __ret; } #else __ai uint32x4_t vaddhn_high_u64(uint32x2_t __p0, uint64x2_t __p1, uint64x2_t __p2) { uint32x4_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); uint64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); __ret = __noswap_vcombine_u32(__rev0, __noswap_vaddhn_u64(__rev1, __rev2)); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vaddhn_high_u16(uint8x8_t __p0, uint16x8_t __p1, uint16x8_t __p2) { uint8x16_t __ret; __ret = vcombine_u8(__p0, vaddhn_u16(__p1, __p2)); return __ret; } #else __ai uint8x16_t vaddhn_high_u16(uint8x8_t __p0, uint16x8_t __p1, uint16x8_t __p2) { uint8x16_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __noswap_vcombine_u8(__rev0, __noswap_vaddhn_u16(__rev1, __rev2)); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vaddhn_high_s32(int16x4_t __p0, int32x4_t __p1, int32x4_t __p2) { int16x8_t __ret; __ret = vcombine_s16(__p0, vaddhn_s32(__p1, __p2)); return __ret; } #else __ai int16x8_t vaddhn_high_s32(int16x4_t __p0, int32x4_t __p1, int32x4_t __p2) { int16x8_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); int32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = __noswap_vcombine_s16(__rev0, __noswap_vaddhn_s32(__rev1, __rev2)); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vaddhn_high_s64(int32x2_t __p0, int64x2_t __p1, int64x2_t __p2) { int32x4_t __ret; __ret = vcombine_s32(__p0, vaddhn_s64(__p1, __p2)); return __ret; } #else __ai int32x4_t vaddhn_high_s64(int32x2_t __p0, int64x2_t __p1, int64x2_t __p2) { int32x4_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); int64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); __ret = __noswap_vcombine_s32(__rev0, __noswap_vaddhn_s64(__rev1, __rev2)); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x16_t vaddhn_high_s16(int8x8_t __p0, int16x8_t __p1, int16x8_t __p2) { int8x16_t __ret; __ret = vcombine_s8(__p0, vaddhn_s16(__p1, __p2)); return __ret; } #else __ai int8x16_t vaddhn_high_s16(int8x8_t __p0, int16x8_t __p1, int16x8_t __p2) { int8x16_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __noswap_vcombine_s8(__rev0, __noswap_vaddhn_s16(__rev1, __rev2)); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16_t vaddlvq_u8(uint8x16_t __p0) { uint16_t __ret; __ret = (uint16_t) __builtin_neon_vaddlvq_u8(__p0); return __ret; } #else __ai uint16_t vaddlvq_u8(uint8x16_t __p0) { uint16_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint16_t) __builtin_neon_vaddlvq_u8(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64_t vaddlvq_u32(uint32x4_t __p0) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vaddlvq_u32(__p0); return __ret; } #else __ai uint64_t vaddlvq_u32(uint32x4_t __p0) { uint64_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (uint64_t) __builtin_neon_vaddlvq_u32(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32_t vaddlvq_u16(uint16x8_t __p0) { uint32_t __ret; __ret = (uint32_t) __builtin_neon_vaddlvq_u16(__p0); return __ret; } #else __ai uint32_t vaddlvq_u16(uint16x8_t __p0) { uint32_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint32_t) __builtin_neon_vaddlvq_u16(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16_t vaddlvq_s8(int8x16_t __p0) { int16_t __ret; __ret = (int16_t) __builtin_neon_vaddlvq_s8(__p0); return __ret; } #else __ai int16_t vaddlvq_s8(int8x16_t __p0) { int16_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int16_t) __builtin_neon_vaddlvq_s8(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int64_t vaddlvq_s32(int32x4_t __p0) { int64_t __ret; __ret = (int64_t) __builtin_neon_vaddlvq_s32(__p0); return __ret; } #else __ai int64_t vaddlvq_s32(int32x4_t __p0) { int64_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (int64_t) __builtin_neon_vaddlvq_s32(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32_t vaddlvq_s16(int16x8_t __p0) { int32_t __ret; __ret = (int32_t) __builtin_neon_vaddlvq_s16(__p0); return __ret; } #else __ai int32_t vaddlvq_s16(int16x8_t __p0) { int32_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int32_t) __builtin_neon_vaddlvq_s16(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16_t vaddlv_u8(uint8x8_t __p0) { uint16_t __ret; __ret = (uint16_t) __builtin_neon_vaddlv_u8(__p0); return __ret; } #else __ai uint16_t vaddlv_u8(uint8x8_t __p0) { uint16_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint16_t) __builtin_neon_vaddlv_u8(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64_t vaddlv_u32(uint32x2_t __p0) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vaddlv_u32(__p0); return __ret; } #else __ai uint64_t vaddlv_u32(uint32x2_t __p0) { uint64_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (uint64_t) __builtin_neon_vaddlv_u32(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32_t vaddlv_u16(uint16x4_t __p0) { uint32_t __ret; __ret = (uint32_t) __builtin_neon_vaddlv_u16(__p0); return __ret; } #else __ai uint32_t vaddlv_u16(uint16x4_t __p0) { uint32_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (uint32_t) __builtin_neon_vaddlv_u16(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16_t vaddlv_s8(int8x8_t __p0) { int16_t __ret; __ret = (int16_t) __builtin_neon_vaddlv_s8(__p0); return __ret; } #else __ai int16_t vaddlv_s8(int8x8_t __p0) { int16_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int16_t) __builtin_neon_vaddlv_s8(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int64_t vaddlv_s32(int32x2_t __p0) { int64_t __ret; __ret = (int64_t) __builtin_neon_vaddlv_s32(__p0); return __ret; } #else __ai int64_t vaddlv_s32(int32x2_t __p0) { int64_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (int64_t) __builtin_neon_vaddlv_s32(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32_t vaddlv_s16(int16x4_t __p0) { int32_t __ret; __ret = (int32_t) __builtin_neon_vaddlv_s16(__p0); return __ret; } #else __ai int32_t vaddlv_s16(int16x4_t __p0) { int32_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (int32_t) __builtin_neon_vaddlv_s16(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8_t vaddvq_u8(uint8x16_t __p0) { uint8_t __ret; __ret = (uint8_t) __builtin_neon_vaddvq_u8(__p0); return __ret; } #else __ai uint8_t vaddvq_u8(uint8x16_t __p0) { uint8_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8_t) __builtin_neon_vaddvq_u8(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32_t vaddvq_u32(uint32x4_t __p0) { uint32_t __ret; __ret = (uint32_t) __builtin_neon_vaddvq_u32(__p0); return __ret; } #else __ai uint32_t vaddvq_u32(uint32x4_t __p0) { uint32_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (uint32_t) __builtin_neon_vaddvq_u32(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64_t vaddvq_u64(uint64x2_t __p0) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vaddvq_u64(__p0); return __ret; } #else __ai uint64_t vaddvq_u64(uint64x2_t __p0) { uint64_t __ret; uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (uint64_t) __builtin_neon_vaddvq_u64(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16_t vaddvq_u16(uint16x8_t __p0) { uint16_t __ret; __ret = (uint16_t) __builtin_neon_vaddvq_u16(__p0); return __ret; } #else __ai uint16_t vaddvq_u16(uint16x8_t __p0) { uint16_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint16_t) __builtin_neon_vaddvq_u16(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8_t vaddvq_s8(int8x16_t __p0) { int8_t __ret; __ret = (int8_t) __builtin_neon_vaddvq_s8(__p0); return __ret; } #else __ai int8_t vaddvq_s8(int8x16_t __p0) { int8_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8_t) __builtin_neon_vaddvq_s8(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float64_t vaddvq_f64(float64x2_t __p0) { float64_t __ret; __ret = (float64_t) __builtin_neon_vaddvq_f64(__p0); return __ret; } #else __ai float64_t vaddvq_f64(float64x2_t __p0) { float64_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (float64_t) __builtin_neon_vaddvq_f64(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32_t vaddvq_f32(float32x4_t __p0) { float32_t __ret; __ret = (float32_t) __builtin_neon_vaddvq_f32(__p0); return __ret; } #else __ai float32_t vaddvq_f32(float32x4_t __p0) { float32_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (float32_t) __builtin_neon_vaddvq_f32(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32_t vaddvq_s32(int32x4_t __p0) { int32_t __ret; __ret = (int32_t) __builtin_neon_vaddvq_s32(__p0); return __ret; } #else __ai int32_t vaddvq_s32(int32x4_t __p0) { int32_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (int32_t) __builtin_neon_vaddvq_s32(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int64_t vaddvq_s64(int64x2_t __p0) { int64_t __ret; __ret = (int64_t) __builtin_neon_vaddvq_s64(__p0); return __ret; } #else __ai int64_t vaddvq_s64(int64x2_t __p0) { int64_t __ret; int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (int64_t) __builtin_neon_vaddvq_s64(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16_t vaddvq_s16(int16x8_t __p0) { int16_t __ret; __ret = (int16_t) __builtin_neon_vaddvq_s16(__p0); return __ret; } #else __ai int16_t vaddvq_s16(int16x8_t __p0) { int16_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int16_t) __builtin_neon_vaddvq_s16(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8_t vaddv_u8(uint8x8_t __p0) { uint8_t __ret; __ret = (uint8_t) __builtin_neon_vaddv_u8(__p0); return __ret; } #else __ai uint8_t vaddv_u8(uint8x8_t __p0) { uint8_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8_t) __builtin_neon_vaddv_u8(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32_t vaddv_u32(uint32x2_t __p0) { uint32_t __ret; __ret = (uint32_t) __builtin_neon_vaddv_u32(__p0); return __ret; } #else __ai uint32_t vaddv_u32(uint32x2_t __p0) { uint32_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (uint32_t) __builtin_neon_vaddv_u32(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16_t vaddv_u16(uint16x4_t __p0) { uint16_t __ret; __ret = (uint16_t) __builtin_neon_vaddv_u16(__p0); return __ret; } #else __ai uint16_t vaddv_u16(uint16x4_t __p0) { uint16_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (uint16_t) __builtin_neon_vaddv_u16(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8_t vaddv_s8(int8x8_t __p0) { int8_t __ret; __ret = (int8_t) __builtin_neon_vaddv_s8(__p0); return __ret; } #else __ai int8_t vaddv_s8(int8x8_t __p0) { int8_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8_t) __builtin_neon_vaddv_s8(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32_t vaddv_f32(float32x2_t __p0) { float32_t __ret; __ret = (float32_t) __builtin_neon_vaddv_f32(__p0); return __ret; } #else __ai float32_t vaddv_f32(float32x2_t __p0) { float32_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (float32_t) __builtin_neon_vaddv_f32(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32_t vaddv_s32(int32x2_t __p0) { int32_t __ret; __ret = (int32_t) __builtin_neon_vaddv_s32(__p0); return __ret; } #else __ai int32_t vaddv_s32(int32x2_t __p0) { int32_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (int32_t) __builtin_neon_vaddv_s32(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16_t vaddv_s16(int16x4_t __p0) { int16_t __ret; __ret = (int16_t) __builtin_neon_vaddv_s16(__p0); return __ret; } #else __ai int16_t vaddv_s16(int16x4_t __p0) { int16_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (int16_t) __builtin_neon_vaddv_s16(__rev0); return __ret; } #endif __ai poly64x1_t vbsl_p64(uint64x1_t __p0, poly64x1_t __p1, poly64x1_t __p2) { poly64x1_t __ret; __ret = (poly64x1_t) __builtin_neon_vbsl_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 6); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai poly64x2_t vbslq_p64(uint64x2_t __p0, poly64x2_t __p1, poly64x2_t __p2) { poly64x2_t __ret; __ret = (poly64x2_t) __builtin_neon_vbslq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 38); return __ret; } #else __ai poly64x2_t vbslq_p64(uint64x2_t __p0, poly64x2_t __p1, poly64x2_t __p2) { poly64x2_t __ret; uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); poly64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); __ret = (poly64x2_t) __builtin_neon_vbslq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 38); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float64x2_t vbslq_f64(uint64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { float64x2_t __ret; __ret = (float64x2_t) __builtin_neon_vbslq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 42); return __ret; } #else __ai float64x2_t vbslq_f64(uint64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { float64x2_t __ret; uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); float64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); __ret = (float64x2_t) __builtin_neon_vbslq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 42); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai float64x1_t vbsl_f64(uint64x1_t __p0, float64x1_t __p1, float64x1_t __p2) { float64x1_t __ret; __ret = (float64x1_t) __builtin_neon_vbsl_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 10); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vcageq_f64(float64x2_t __p0, float64x2_t __p1) { uint64x2_t __ret; __ret = (uint64x2_t) __builtin_neon_vcageq_v((int8x16_t)__p0, (int8x16_t)__p1, 51); return __ret; } #else __ai uint64x2_t vcageq_f64(float64x2_t __p0, float64x2_t __p1) { uint64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint64x2_t) __builtin_neon_vcageq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 51); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai uint64x1_t vcage_f64(float64x1_t __p0, float64x1_t __p1) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vcage_v((int8x8_t)__p0, (int8x8_t)__p1, 19); return __ret; } __ai uint64_t vcaged_f64(float64_t __p0, float64_t __p1) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vcaged_f64(__p0, __p1); return __ret; } __ai uint32_t vcages_f32(float32_t __p0, float32_t __p1) { uint32_t __ret; __ret = (uint32_t) __builtin_neon_vcages_f32(__p0, __p1); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vcagtq_f64(float64x2_t __p0, float64x2_t __p1) { uint64x2_t __ret; __ret = (uint64x2_t) __builtin_neon_vcagtq_v((int8x16_t)__p0, (int8x16_t)__p1, 51); return __ret; } #else __ai uint64x2_t vcagtq_f64(float64x2_t __p0, float64x2_t __p1) { uint64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint64x2_t) __builtin_neon_vcagtq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 51); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai uint64x1_t vcagt_f64(float64x1_t __p0, float64x1_t __p1) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vcagt_v((int8x8_t)__p0, (int8x8_t)__p1, 19); return __ret; } __ai uint64_t vcagtd_f64(float64_t __p0, float64_t __p1) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vcagtd_f64(__p0, __p1); return __ret; } __ai uint32_t vcagts_f32(float32_t __p0, float32_t __p1) { uint32_t __ret; __ret = (uint32_t) __builtin_neon_vcagts_f32(__p0, __p1); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vcaleq_f64(float64x2_t __p0, float64x2_t __p1) { uint64x2_t __ret; __ret = (uint64x2_t) __builtin_neon_vcaleq_v((int8x16_t)__p0, (int8x16_t)__p1, 51); return __ret; } #else __ai uint64x2_t vcaleq_f64(float64x2_t __p0, float64x2_t __p1) { uint64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint64x2_t) __builtin_neon_vcaleq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 51); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai uint64x1_t vcale_f64(float64x1_t __p0, float64x1_t __p1) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vcale_v((int8x8_t)__p0, (int8x8_t)__p1, 19); return __ret; } __ai uint64_t vcaled_f64(float64_t __p0, float64_t __p1) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vcaled_f64(__p0, __p1); return __ret; } __ai uint32_t vcales_f32(float32_t __p0, float32_t __p1) { uint32_t __ret; __ret = (uint32_t) __builtin_neon_vcales_f32(__p0, __p1); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vcaltq_f64(float64x2_t __p0, float64x2_t __p1) { uint64x2_t __ret; __ret = (uint64x2_t) __builtin_neon_vcaltq_v((int8x16_t)__p0, (int8x16_t)__p1, 51); return __ret; } #else __ai uint64x2_t vcaltq_f64(float64x2_t __p0, float64x2_t __p1) { uint64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint64x2_t) __builtin_neon_vcaltq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 51); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai uint64x1_t vcalt_f64(float64x1_t __p0, float64x1_t __p1) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vcalt_v((int8x8_t)__p0, (int8x8_t)__p1, 19); return __ret; } __ai uint64_t vcaltd_f64(float64_t __p0, float64_t __p1) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vcaltd_f64(__p0, __p1); return __ret; } __ai uint32_t vcalts_f32(float32_t __p0, float32_t __p1) { uint32_t __ret; __ret = (uint32_t) __builtin_neon_vcalts_f32(__p0, __p1); return __ret; } __ai uint64x1_t vceq_p64(poly64x1_t __p0, poly64x1_t __p1) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0 == __p1); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vceqq_p64(poly64x2_t __p0, poly64x2_t __p1) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0 == __p1); return __ret; } #else __ai uint64x2_t vceqq_p64(poly64x2_t __p0, poly64x2_t __p1) { uint64x2_t __ret; poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint64x2_t)(__rev0 == __rev1); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vceqq_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0 == __p1); return __ret; } #else __ai uint64x2_t vceqq_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint64x2_t)(__rev0 == __rev1); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vceqq_f64(float64x2_t __p0, float64x2_t __p1) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0 == __p1); return __ret; } #else __ai uint64x2_t vceqq_f64(float64x2_t __p0, float64x2_t __p1) { uint64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint64x2_t)(__rev0 == __rev1); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vceqq_s64(int64x2_t __p0, int64x2_t __p1) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0 == __p1); return __ret; } #else __ai uint64x2_t vceqq_s64(int64x2_t __p0, int64x2_t __p1) { uint64x2_t __ret; int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint64x2_t)(__rev0 == __rev1); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai uint64x1_t vceq_u64(uint64x1_t __p0, uint64x1_t __p1) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0 == __p1); return __ret; } __ai uint64x1_t vceq_f64(float64x1_t __p0, float64x1_t __p1) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0 == __p1); return __ret; } __ai uint64x1_t vceq_s64(int64x1_t __p0, int64x1_t __p1) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0 == __p1); return __ret; } __ai uint64_t vceqd_u64(uint64_t __p0, uint64_t __p1) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vceqd_u64(__p0, __p1); return __ret; } __ai uint64_t vceqd_s64(int64_t __p0, int64_t __p1) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vceqd_s64(__p0, __p1); return __ret; } __ai uint64_t vceqd_f64(float64_t __p0, float64_t __p1) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vceqd_f64(__p0, __p1); return __ret; } __ai uint32_t vceqs_f32(float32_t __p0, float32_t __p1) { uint32_t __ret; __ret = (uint32_t) __builtin_neon_vceqs_f32(__p0, __p1); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vceqz_p8(poly8x8_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vceqz_v((int8x8_t)__p0, 16); return __ret; } #else __ai uint8x8_t vceqz_p8(poly8x8_t __p0) { uint8x8_t __ret; poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x8_t) __builtin_neon_vceqz_v((int8x8_t)__rev0, 16); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif __ai uint64x1_t vceqz_p64(poly64x1_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vceqz_v((int8x8_t)__p0, 19); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vceqzq_p8(poly8x16_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t) __builtin_neon_vceqzq_v((int8x16_t)__p0, 48); return __ret; } #else __ai uint8x16_t vceqzq_p8(poly8x16_t __p0) { uint8x16_t __ret; poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x16_t) __builtin_neon_vceqzq_v((int8x16_t)__rev0, 48); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vceqzq_p64(poly64x2_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t) __builtin_neon_vceqzq_v((int8x16_t)__p0, 51); return __ret; } #else __ai uint64x2_t vceqzq_p64(poly64x2_t __p0) { uint64x2_t __ret; poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (uint64x2_t) __builtin_neon_vceqzq_v((int8x16_t)__rev0, 51); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vceqzq_u8(uint8x16_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t) __builtin_neon_vceqzq_v((int8x16_t)__p0, 48); return __ret; } #else __ai uint8x16_t vceqzq_u8(uint8x16_t __p0) { uint8x16_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x16_t) __builtin_neon_vceqzq_v((int8x16_t)__rev0, 48); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vceqzq_u32(uint32x4_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vceqzq_v((int8x16_t)__p0, 50); return __ret; } #else __ai uint32x4_t vceqzq_u32(uint32x4_t __p0) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (uint32x4_t) __builtin_neon_vceqzq_v((int8x16_t)__rev0, 50); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vceqzq_u64(uint64x2_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t) __builtin_neon_vceqzq_v((int8x16_t)__p0, 51); return __ret; } #else __ai uint64x2_t vceqzq_u64(uint64x2_t __p0) { uint64x2_t __ret; uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (uint64x2_t) __builtin_neon_vceqzq_v((int8x16_t)__rev0, 51); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vceqzq_u16(uint16x8_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vceqzq_v((int8x16_t)__p0, 49); return __ret; } #else __ai uint16x8_t vceqzq_u16(uint16x8_t __p0) { uint16x8_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint16x8_t) __builtin_neon_vceqzq_v((int8x16_t)__rev0, 49); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vceqzq_s8(int8x16_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t) __builtin_neon_vceqzq_v((int8x16_t)__p0, 48); return __ret; } #else __ai uint8x16_t vceqzq_s8(int8x16_t __p0) { uint8x16_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x16_t) __builtin_neon_vceqzq_v((int8x16_t)__rev0, 48); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vceqzq_f64(float64x2_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t) __builtin_neon_vceqzq_v((int8x16_t)__p0, 51); return __ret; } #else __ai uint64x2_t vceqzq_f64(float64x2_t __p0) { uint64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (uint64x2_t) __builtin_neon_vceqzq_v((int8x16_t)__rev0, 51); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vceqzq_f32(float32x4_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vceqzq_v((int8x16_t)__p0, 50); return __ret; } #else __ai uint32x4_t vceqzq_f32(float32x4_t __p0) { uint32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (uint32x4_t) __builtin_neon_vceqzq_v((int8x16_t)__rev0, 50); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vceqzq_s32(int32x4_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vceqzq_v((int8x16_t)__p0, 50); return __ret; } #else __ai uint32x4_t vceqzq_s32(int32x4_t __p0) { uint32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (uint32x4_t) __builtin_neon_vceqzq_v((int8x16_t)__rev0, 50); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vceqzq_s64(int64x2_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t) __builtin_neon_vceqzq_v((int8x16_t)__p0, 51); return __ret; } #else __ai uint64x2_t vceqzq_s64(int64x2_t __p0) { uint64x2_t __ret; int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (uint64x2_t) __builtin_neon_vceqzq_v((int8x16_t)__rev0, 51); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vceqzq_s16(int16x8_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vceqzq_v((int8x16_t)__p0, 49); return __ret; } #else __ai uint16x8_t vceqzq_s16(int16x8_t __p0) { uint16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint16x8_t) __builtin_neon_vceqzq_v((int8x16_t)__rev0, 49); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vceqz_u8(uint8x8_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vceqz_v((int8x8_t)__p0, 16); return __ret; } #else __ai uint8x8_t vceqz_u8(uint8x8_t __p0) { uint8x8_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x8_t) __builtin_neon_vceqz_v((int8x8_t)__rev0, 16); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vceqz_u32(uint32x2_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vceqz_v((int8x8_t)__p0, 18); return __ret; } #else __ai uint32x2_t vceqz_u32(uint32x2_t __p0) { uint32x2_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (uint32x2_t) __builtin_neon_vceqz_v((int8x8_t)__rev0, 18); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai uint64x1_t vceqz_u64(uint64x1_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vceqz_v((int8x8_t)__p0, 19); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vceqz_u16(uint16x4_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vceqz_v((int8x8_t)__p0, 17); return __ret; } #else __ai uint16x4_t vceqz_u16(uint16x4_t __p0) { uint16x4_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (uint16x4_t) __builtin_neon_vceqz_v((int8x8_t)__rev0, 17); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vceqz_s8(int8x8_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vceqz_v((int8x8_t)__p0, 16); return __ret; } #else __ai uint8x8_t vceqz_s8(int8x8_t __p0) { uint8x8_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x8_t) __builtin_neon_vceqz_v((int8x8_t)__rev0, 16); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif __ai uint64x1_t vceqz_f64(float64x1_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vceqz_v((int8x8_t)__p0, 19); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vceqz_f32(float32x2_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vceqz_v((int8x8_t)__p0, 18); return __ret; } #else __ai uint32x2_t vceqz_f32(float32x2_t __p0) { uint32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (uint32x2_t) __builtin_neon_vceqz_v((int8x8_t)__rev0, 18); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vceqz_s32(int32x2_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vceqz_v((int8x8_t)__p0, 18); return __ret; } #else __ai uint32x2_t vceqz_s32(int32x2_t __p0) { uint32x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (uint32x2_t) __builtin_neon_vceqz_v((int8x8_t)__rev0, 18); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai uint64x1_t vceqz_s64(int64x1_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vceqz_v((int8x8_t)__p0, 19); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vceqz_s16(int16x4_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vceqz_v((int8x8_t)__p0, 17); return __ret; } #else __ai uint16x4_t vceqz_s16(int16x4_t __p0) { uint16x4_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (uint16x4_t) __builtin_neon_vceqz_v((int8x8_t)__rev0, 17); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif __ai uint64_t vceqzd_u64(uint64_t __p0) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vceqzd_u64(__p0); return __ret; } __ai uint64_t vceqzd_s64(int64_t __p0) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vceqzd_s64(__p0); return __ret; } __ai uint64_t vceqzd_f64(float64_t __p0) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vceqzd_f64(__p0); return __ret; } __ai uint32_t vceqzs_f32(float32_t __p0) { uint32_t __ret; __ret = (uint32_t) __builtin_neon_vceqzs_f32(__p0); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vcgeq_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0 >= __p1); return __ret; } #else __ai uint64x2_t vcgeq_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint64x2_t)(__rev0 >= __rev1); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vcgeq_f64(float64x2_t __p0, float64x2_t __p1) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0 >= __p1); return __ret; } #else __ai uint64x2_t vcgeq_f64(float64x2_t __p0, float64x2_t __p1) { uint64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint64x2_t)(__rev0 >= __rev1); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vcgeq_s64(int64x2_t __p0, int64x2_t __p1) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0 >= __p1); return __ret; } #else __ai uint64x2_t vcgeq_s64(int64x2_t __p0, int64x2_t __p1) { uint64x2_t __ret; int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint64x2_t)(__rev0 >= __rev1); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai uint64x1_t vcge_u64(uint64x1_t __p0, uint64x1_t __p1) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0 >= __p1); return __ret; } __ai uint64x1_t vcge_f64(float64x1_t __p0, float64x1_t __p1) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0 >= __p1); return __ret; } __ai uint64x1_t vcge_s64(int64x1_t __p0, int64x1_t __p1) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0 >= __p1); return __ret; } __ai uint64_t vcged_s64(int64_t __p0, int64_t __p1) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vcged_s64(__p0, __p1); return __ret; } __ai uint64_t vcged_u64(uint64_t __p0, uint64_t __p1) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vcged_u64(__p0, __p1); return __ret; } __ai uint64_t vcged_f64(float64_t __p0, float64_t __p1) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vcged_f64(__p0, __p1); return __ret; } __ai uint32_t vcges_f32(float32_t __p0, float32_t __p1) { uint32_t __ret; __ret = (uint32_t) __builtin_neon_vcges_f32(__p0, __p1); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vcgezq_s8(int8x16_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t) __builtin_neon_vcgezq_v((int8x16_t)__p0, 48); return __ret; } #else __ai uint8x16_t vcgezq_s8(int8x16_t __p0) { uint8x16_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x16_t) __builtin_neon_vcgezq_v((int8x16_t)__rev0, 48); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vcgezq_f64(float64x2_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t) __builtin_neon_vcgezq_v((int8x16_t)__p0, 51); return __ret; } #else __ai uint64x2_t vcgezq_f64(float64x2_t __p0) { uint64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (uint64x2_t) __builtin_neon_vcgezq_v((int8x16_t)__rev0, 51); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vcgezq_f32(float32x4_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vcgezq_v((int8x16_t)__p0, 50); return __ret; } #else __ai uint32x4_t vcgezq_f32(float32x4_t __p0) { uint32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (uint32x4_t) __builtin_neon_vcgezq_v((int8x16_t)__rev0, 50); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vcgezq_s32(int32x4_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vcgezq_v((int8x16_t)__p0, 50); return __ret; } #else __ai uint32x4_t vcgezq_s32(int32x4_t __p0) { uint32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (uint32x4_t) __builtin_neon_vcgezq_v((int8x16_t)__rev0, 50); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vcgezq_s64(int64x2_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t) __builtin_neon_vcgezq_v((int8x16_t)__p0, 51); return __ret; } #else __ai uint64x2_t vcgezq_s64(int64x2_t __p0) { uint64x2_t __ret; int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (uint64x2_t) __builtin_neon_vcgezq_v((int8x16_t)__rev0, 51); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vcgezq_s16(int16x8_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vcgezq_v((int8x16_t)__p0, 49); return __ret; } #else __ai uint16x8_t vcgezq_s16(int16x8_t __p0) { uint16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint16x8_t) __builtin_neon_vcgezq_v((int8x16_t)__rev0, 49); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vcgez_s8(int8x8_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vcgez_v((int8x8_t)__p0, 16); return __ret; } #else __ai uint8x8_t vcgez_s8(int8x8_t __p0) { uint8x8_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x8_t) __builtin_neon_vcgez_v((int8x8_t)__rev0, 16); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif __ai uint64x1_t vcgez_f64(float64x1_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vcgez_v((int8x8_t)__p0, 19); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vcgez_f32(float32x2_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vcgez_v((int8x8_t)__p0, 18); return __ret; } #else __ai uint32x2_t vcgez_f32(float32x2_t __p0) { uint32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (uint32x2_t) __builtin_neon_vcgez_v((int8x8_t)__rev0, 18); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vcgez_s32(int32x2_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vcgez_v((int8x8_t)__p0, 18); return __ret; } #else __ai uint32x2_t vcgez_s32(int32x2_t __p0) { uint32x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (uint32x2_t) __builtin_neon_vcgez_v((int8x8_t)__rev0, 18); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai uint64x1_t vcgez_s64(int64x1_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vcgez_v((int8x8_t)__p0, 19); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vcgez_s16(int16x4_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vcgez_v((int8x8_t)__p0, 17); return __ret; } #else __ai uint16x4_t vcgez_s16(int16x4_t __p0) { uint16x4_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (uint16x4_t) __builtin_neon_vcgez_v((int8x8_t)__rev0, 17); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif __ai uint64_t vcgezd_s64(int64_t __p0) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vcgezd_s64(__p0); return __ret; } __ai uint64_t vcgezd_f64(float64_t __p0) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vcgezd_f64(__p0); return __ret; } __ai uint32_t vcgezs_f32(float32_t __p0) { uint32_t __ret; __ret = (uint32_t) __builtin_neon_vcgezs_f32(__p0); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vcgtq_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0 > __p1); return __ret; } #else __ai uint64x2_t vcgtq_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint64x2_t)(__rev0 > __rev1); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vcgtq_f64(float64x2_t __p0, float64x2_t __p1) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0 > __p1); return __ret; } #else __ai uint64x2_t vcgtq_f64(float64x2_t __p0, float64x2_t __p1) { uint64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint64x2_t)(__rev0 > __rev1); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vcgtq_s64(int64x2_t __p0, int64x2_t __p1) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0 > __p1); return __ret; } #else __ai uint64x2_t vcgtq_s64(int64x2_t __p0, int64x2_t __p1) { uint64x2_t __ret; int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint64x2_t)(__rev0 > __rev1); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai uint64x1_t vcgt_u64(uint64x1_t __p0, uint64x1_t __p1) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0 > __p1); return __ret; } __ai uint64x1_t vcgt_f64(float64x1_t __p0, float64x1_t __p1) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0 > __p1); return __ret; } __ai uint64x1_t vcgt_s64(int64x1_t __p0, int64x1_t __p1) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0 > __p1); return __ret; } __ai uint64_t vcgtd_s64(int64_t __p0, int64_t __p1) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vcgtd_s64(__p0, __p1); return __ret; } __ai uint64_t vcgtd_u64(uint64_t __p0, uint64_t __p1) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vcgtd_u64(__p0, __p1); return __ret; } __ai uint64_t vcgtd_f64(float64_t __p0, float64_t __p1) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vcgtd_f64(__p0, __p1); return __ret; } __ai uint32_t vcgts_f32(float32_t __p0, float32_t __p1) { uint32_t __ret; __ret = (uint32_t) __builtin_neon_vcgts_f32(__p0, __p1); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vcgtzq_s8(int8x16_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t) __builtin_neon_vcgtzq_v((int8x16_t)__p0, 48); return __ret; } #else __ai uint8x16_t vcgtzq_s8(int8x16_t __p0) { uint8x16_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x16_t) __builtin_neon_vcgtzq_v((int8x16_t)__rev0, 48); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vcgtzq_f64(float64x2_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t) __builtin_neon_vcgtzq_v((int8x16_t)__p0, 51); return __ret; } #else __ai uint64x2_t vcgtzq_f64(float64x2_t __p0) { uint64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (uint64x2_t) __builtin_neon_vcgtzq_v((int8x16_t)__rev0, 51); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vcgtzq_f32(float32x4_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vcgtzq_v((int8x16_t)__p0, 50); return __ret; } #else __ai uint32x4_t vcgtzq_f32(float32x4_t __p0) { uint32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (uint32x4_t) __builtin_neon_vcgtzq_v((int8x16_t)__rev0, 50); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vcgtzq_s32(int32x4_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vcgtzq_v((int8x16_t)__p0, 50); return __ret; } #else __ai uint32x4_t vcgtzq_s32(int32x4_t __p0) { uint32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (uint32x4_t) __builtin_neon_vcgtzq_v((int8x16_t)__rev0, 50); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vcgtzq_s64(int64x2_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t) __builtin_neon_vcgtzq_v((int8x16_t)__p0, 51); return __ret; } #else __ai uint64x2_t vcgtzq_s64(int64x2_t __p0) { uint64x2_t __ret; int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (uint64x2_t) __builtin_neon_vcgtzq_v((int8x16_t)__rev0, 51); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vcgtzq_s16(int16x8_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vcgtzq_v((int8x16_t)__p0, 49); return __ret; } #else __ai uint16x8_t vcgtzq_s16(int16x8_t __p0) { uint16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint16x8_t) __builtin_neon_vcgtzq_v((int8x16_t)__rev0, 49); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vcgtz_s8(int8x8_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vcgtz_v((int8x8_t)__p0, 16); return __ret; } #else __ai uint8x8_t vcgtz_s8(int8x8_t __p0) { uint8x8_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x8_t) __builtin_neon_vcgtz_v((int8x8_t)__rev0, 16); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif __ai uint64x1_t vcgtz_f64(float64x1_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vcgtz_v((int8x8_t)__p0, 19); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vcgtz_f32(float32x2_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vcgtz_v((int8x8_t)__p0, 18); return __ret; } #else __ai uint32x2_t vcgtz_f32(float32x2_t __p0) { uint32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (uint32x2_t) __builtin_neon_vcgtz_v((int8x8_t)__rev0, 18); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vcgtz_s32(int32x2_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vcgtz_v((int8x8_t)__p0, 18); return __ret; } #else __ai uint32x2_t vcgtz_s32(int32x2_t __p0) { uint32x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (uint32x2_t) __builtin_neon_vcgtz_v((int8x8_t)__rev0, 18); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai uint64x1_t vcgtz_s64(int64x1_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vcgtz_v((int8x8_t)__p0, 19); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vcgtz_s16(int16x4_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vcgtz_v((int8x8_t)__p0, 17); return __ret; } #else __ai uint16x4_t vcgtz_s16(int16x4_t __p0) { uint16x4_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (uint16x4_t) __builtin_neon_vcgtz_v((int8x8_t)__rev0, 17); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif __ai uint64_t vcgtzd_s64(int64_t __p0) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vcgtzd_s64(__p0); return __ret; } __ai uint64_t vcgtzd_f64(float64_t __p0) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vcgtzd_f64(__p0); return __ret; } __ai uint32_t vcgtzs_f32(float32_t __p0) { uint32_t __ret; __ret = (uint32_t) __builtin_neon_vcgtzs_f32(__p0); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vcleq_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0 <= __p1); return __ret; } #else __ai uint64x2_t vcleq_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint64x2_t)(__rev0 <= __rev1); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vcleq_f64(float64x2_t __p0, float64x2_t __p1) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0 <= __p1); return __ret; } #else __ai uint64x2_t vcleq_f64(float64x2_t __p0, float64x2_t __p1) { uint64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint64x2_t)(__rev0 <= __rev1); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vcleq_s64(int64x2_t __p0, int64x2_t __p1) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0 <= __p1); return __ret; } #else __ai uint64x2_t vcleq_s64(int64x2_t __p0, int64x2_t __p1) { uint64x2_t __ret; int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint64x2_t)(__rev0 <= __rev1); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai uint64x1_t vcle_u64(uint64x1_t __p0, uint64x1_t __p1) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0 <= __p1); return __ret; } __ai uint64x1_t vcle_f64(float64x1_t __p0, float64x1_t __p1) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0 <= __p1); return __ret; } __ai uint64x1_t vcle_s64(int64x1_t __p0, int64x1_t __p1) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0 <= __p1); return __ret; } __ai uint64_t vcled_u64(uint64_t __p0, uint64_t __p1) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vcled_u64(__p0, __p1); return __ret; } __ai uint64_t vcled_s64(int64_t __p0, int64_t __p1) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vcled_s64(__p0, __p1); return __ret; } __ai uint64_t vcled_f64(float64_t __p0, float64_t __p1) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vcled_f64(__p0, __p1); return __ret; } __ai uint32_t vcles_f32(float32_t __p0, float32_t __p1) { uint32_t __ret; __ret = (uint32_t) __builtin_neon_vcles_f32(__p0, __p1); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vclezq_s8(int8x16_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t) __builtin_neon_vclezq_v((int8x16_t)__p0, 48); return __ret; } #else __ai uint8x16_t vclezq_s8(int8x16_t __p0) { uint8x16_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x16_t) __builtin_neon_vclezq_v((int8x16_t)__rev0, 48); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vclezq_f64(float64x2_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t) __builtin_neon_vclezq_v((int8x16_t)__p0, 51); return __ret; } #else __ai uint64x2_t vclezq_f64(float64x2_t __p0) { uint64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (uint64x2_t) __builtin_neon_vclezq_v((int8x16_t)__rev0, 51); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vclezq_f32(float32x4_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vclezq_v((int8x16_t)__p0, 50); return __ret; } #else __ai uint32x4_t vclezq_f32(float32x4_t __p0) { uint32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (uint32x4_t) __builtin_neon_vclezq_v((int8x16_t)__rev0, 50); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vclezq_s32(int32x4_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vclezq_v((int8x16_t)__p0, 50); return __ret; } #else __ai uint32x4_t vclezq_s32(int32x4_t __p0) { uint32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (uint32x4_t) __builtin_neon_vclezq_v((int8x16_t)__rev0, 50); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vclezq_s64(int64x2_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t) __builtin_neon_vclezq_v((int8x16_t)__p0, 51); return __ret; } #else __ai uint64x2_t vclezq_s64(int64x2_t __p0) { uint64x2_t __ret; int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (uint64x2_t) __builtin_neon_vclezq_v((int8x16_t)__rev0, 51); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vclezq_s16(int16x8_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vclezq_v((int8x16_t)__p0, 49); return __ret; } #else __ai uint16x8_t vclezq_s16(int16x8_t __p0) { uint16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint16x8_t) __builtin_neon_vclezq_v((int8x16_t)__rev0, 49); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vclez_s8(int8x8_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vclez_v((int8x8_t)__p0, 16); return __ret; } #else __ai uint8x8_t vclez_s8(int8x8_t __p0) { uint8x8_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x8_t) __builtin_neon_vclez_v((int8x8_t)__rev0, 16); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif __ai uint64x1_t vclez_f64(float64x1_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vclez_v((int8x8_t)__p0, 19); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vclez_f32(float32x2_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vclez_v((int8x8_t)__p0, 18); return __ret; } #else __ai uint32x2_t vclez_f32(float32x2_t __p0) { uint32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (uint32x2_t) __builtin_neon_vclez_v((int8x8_t)__rev0, 18); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vclez_s32(int32x2_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vclez_v((int8x8_t)__p0, 18); return __ret; } #else __ai uint32x2_t vclez_s32(int32x2_t __p0) { uint32x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (uint32x2_t) __builtin_neon_vclez_v((int8x8_t)__rev0, 18); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai uint64x1_t vclez_s64(int64x1_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vclez_v((int8x8_t)__p0, 19); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vclez_s16(int16x4_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vclez_v((int8x8_t)__p0, 17); return __ret; } #else __ai uint16x4_t vclez_s16(int16x4_t __p0) { uint16x4_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (uint16x4_t) __builtin_neon_vclez_v((int8x8_t)__rev0, 17); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif __ai uint64_t vclezd_s64(int64_t __p0) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vclezd_s64(__p0); return __ret; } __ai uint64_t vclezd_f64(float64_t __p0) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vclezd_f64(__p0); return __ret; } __ai uint32_t vclezs_f32(float32_t __p0) { uint32_t __ret; __ret = (uint32_t) __builtin_neon_vclezs_f32(__p0); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vcltq_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0 < __p1); return __ret; } #else __ai uint64x2_t vcltq_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint64x2_t)(__rev0 < __rev1); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vcltq_f64(float64x2_t __p0, float64x2_t __p1) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0 < __p1); return __ret; } #else __ai uint64x2_t vcltq_f64(float64x2_t __p0, float64x2_t __p1) { uint64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint64x2_t)(__rev0 < __rev1); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vcltq_s64(int64x2_t __p0, int64x2_t __p1) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0 < __p1); return __ret; } #else __ai uint64x2_t vcltq_s64(int64x2_t __p0, int64x2_t __p1) { uint64x2_t __ret; int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint64x2_t)(__rev0 < __rev1); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai uint64x1_t vclt_u64(uint64x1_t __p0, uint64x1_t __p1) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0 < __p1); return __ret; } __ai uint64x1_t vclt_f64(float64x1_t __p0, float64x1_t __p1) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0 < __p1); return __ret; } __ai uint64x1_t vclt_s64(int64x1_t __p0, int64x1_t __p1) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0 < __p1); return __ret; } __ai uint64_t vcltd_u64(uint64_t __p0, uint64_t __p1) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vcltd_u64(__p0, __p1); return __ret; } __ai uint64_t vcltd_s64(int64_t __p0, int64_t __p1) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vcltd_s64(__p0, __p1); return __ret; } __ai uint64_t vcltd_f64(float64_t __p0, float64_t __p1) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vcltd_f64(__p0, __p1); return __ret; } __ai uint32_t vclts_f32(float32_t __p0, float32_t __p1) { uint32_t __ret; __ret = (uint32_t) __builtin_neon_vclts_f32(__p0, __p1); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vcltzq_s8(int8x16_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t) __builtin_neon_vcltzq_v((int8x16_t)__p0, 48); return __ret; } #else __ai uint8x16_t vcltzq_s8(int8x16_t __p0) { uint8x16_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x16_t) __builtin_neon_vcltzq_v((int8x16_t)__rev0, 48); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vcltzq_f64(float64x2_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t) __builtin_neon_vcltzq_v((int8x16_t)__p0, 51); return __ret; } #else __ai uint64x2_t vcltzq_f64(float64x2_t __p0) { uint64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (uint64x2_t) __builtin_neon_vcltzq_v((int8x16_t)__rev0, 51); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vcltzq_f32(float32x4_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vcltzq_v((int8x16_t)__p0, 50); return __ret; } #else __ai uint32x4_t vcltzq_f32(float32x4_t __p0) { uint32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (uint32x4_t) __builtin_neon_vcltzq_v((int8x16_t)__rev0, 50); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vcltzq_s32(int32x4_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vcltzq_v((int8x16_t)__p0, 50); return __ret; } #else __ai uint32x4_t vcltzq_s32(int32x4_t __p0) { uint32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (uint32x4_t) __builtin_neon_vcltzq_v((int8x16_t)__rev0, 50); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vcltzq_s64(int64x2_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t) __builtin_neon_vcltzq_v((int8x16_t)__p0, 51); return __ret; } #else __ai uint64x2_t vcltzq_s64(int64x2_t __p0) { uint64x2_t __ret; int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (uint64x2_t) __builtin_neon_vcltzq_v((int8x16_t)__rev0, 51); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vcltzq_s16(int16x8_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vcltzq_v((int8x16_t)__p0, 49); return __ret; } #else __ai uint16x8_t vcltzq_s16(int16x8_t __p0) { uint16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint16x8_t) __builtin_neon_vcltzq_v((int8x16_t)__rev0, 49); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vcltz_s8(int8x8_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vcltz_v((int8x8_t)__p0, 16); return __ret; } #else __ai uint8x8_t vcltz_s8(int8x8_t __p0) { uint8x8_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x8_t) __builtin_neon_vcltz_v((int8x8_t)__rev0, 16); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif __ai uint64x1_t vcltz_f64(float64x1_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vcltz_v((int8x8_t)__p0, 19); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vcltz_f32(float32x2_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vcltz_v((int8x8_t)__p0, 18); return __ret; } #else __ai uint32x2_t vcltz_f32(float32x2_t __p0) { uint32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (uint32x2_t) __builtin_neon_vcltz_v((int8x8_t)__rev0, 18); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vcltz_s32(int32x2_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vcltz_v((int8x8_t)__p0, 18); return __ret; } #else __ai uint32x2_t vcltz_s32(int32x2_t __p0) { uint32x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (uint32x2_t) __builtin_neon_vcltz_v((int8x8_t)__rev0, 18); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai uint64x1_t vcltz_s64(int64x1_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vcltz_v((int8x8_t)__p0, 19); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vcltz_s16(int16x4_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vcltz_v((int8x8_t)__p0, 17); return __ret; } #else __ai uint16x4_t vcltz_s16(int16x4_t __p0) { uint16x4_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (uint16x4_t) __builtin_neon_vcltz_v((int8x8_t)__rev0, 17); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif __ai uint64_t vcltzd_s64(int64_t __p0) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vcltzd_s64(__p0); return __ret; } __ai uint64_t vcltzd_f64(float64_t __p0) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vcltzd_f64(__p0); return __ret; } __ai uint32_t vcltzs_f32(float32_t __p0) { uint32_t __ret; __ret = (uint32_t) __builtin_neon_vcltzs_f32(__p0); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai poly64x2_t vcombine_p64(poly64x1_t __p0, poly64x1_t __p1) { poly64x2_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 1); return __ret; } #else __ai poly64x2_t vcombine_p64(poly64x1_t __p0, poly64x1_t __p1) { poly64x2_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 1); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float64x2_t vcombine_f64(float64x1_t __p0, float64x1_t __p1) { float64x2_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 1); return __ret; } #else __ai float64x2_t vcombine_f64(float64x1_t __p0, float64x1_t __p1) { float64x2_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 1); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ #define vcopyq_lane_p8(__p0_257, __p1_257, __p2_257, __p3_257) __extension__ ({ \ poly8x16_t __ret_257; \ poly8x16_t __s0_257 = __p0_257; \ poly8x8_t __s2_257 = __p2_257; \ __ret_257 = vsetq_lane_p8(vget_lane_p8(__s2_257, __p3_257), __s0_257, __p1_257); \ __ret_257; \ }) #else #define vcopyq_lane_p8(__p0_258, __p1_258, __p2_258, __p3_258) __extension__ ({ \ poly8x16_t __ret_258; \ poly8x16_t __s0_258 = __p0_258; \ poly8x8_t __s2_258 = __p2_258; \ poly8x16_t __rev0_258; __rev0_258 = __builtin_shufflevector(__s0_258, __s0_258, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ poly8x8_t __rev2_258; __rev2_258 = __builtin_shufflevector(__s2_258, __s2_258, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_258 = __noswap_vsetq_lane_p8(__noswap_vget_lane_p8(__rev2_258, __p3_258), __rev0_258, __p1_258); \ __ret_258 = __builtin_shufflevector(__ret_258, __ret_258, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_258; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcopyq_lane_p16(__p0_259, __p1_259, __p2_259, __p3_259) __extension__ ({ \ poly16x8_t __ret_259; \ poly16x8_t __s0_259 = __p0_259; \ poly16x4_t __s2_259 = __p2_259; \ __ret_259 = vsetq_lane_p16(vget_lane_p16(__s2_259, __p3_259), __s0_259, __p1_259); \ __ret_259; \ }) #else #define vcopyq_lane_p16(__p0_260, __p1_260, __p2_260, __p3_260) __extension__ ({ \ poly16x8_t __ret_260; \ poly16x8_t __s0_260 = __p0_260; \ poly16x4_t __s2_260 = __p2_260; \ poly16x8_t __rev0_260; __rev0_260 = __builtin_shufflevector(__s0_260, __s0_260, 7, 6, 5, 4, 3, 2, 1, 0); \ poly16x4_t __rev2_260; __rev2_260 = __builtin_shufflevector(__s2_260, __s2_260, 3, 2, 1, 0); \ __ret_260 = __noswap_vsetq_lane_p16(__noswap_vget_lane_p16(__rev2_260, __p3_260), __rev0_260, __p1_260); \ __ret_260 = __builtin_shufflevector(__ret_260, __ret_260, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_260; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcopyq_lane_u8(__p0_261, __p1_261, __p2_261, __p3_261) __extension__ ({ \ uint8x16_t __ret_261; \ uint8x16_t __s0_261 = __p0_261; \ uint8x8_t __s2_261 = __p2_261; \ __ret_261 = vsetq_lane_u8(vget_lane_u8(__s2_261, __p3_261), __s0_261, __p1_261); \ __ret_261; \ }) #else #define vcopyq_lane_u8(__p0_262, __p1_262, __p2_262, __p3_262) __extension__ ({ \ uint8x16_t __ret_262; \ uint8x16_t __s0_262 = __p0_262; \ uint8x8_t __s2_262 = __p2_262; \ uint8x16_t __rev0_262; __rev0_262 = __builtin_shufflevector(__s0_262, __s0_262, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ uint8x8_t __rev2_262; __rev2_262 = __builtin_shufflevector(__s2_262, __s2_262, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_262 = __noswap_vsetq_lane_u8(__noswap_vget_lane_u8(__rev2_262, __p3_262), __rev0_262, __p1_262); \ __ret_262 = __builtin_shufflevector(__ret_262, __ret_262, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_262; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcopyq_lane_u32(__p0_263, __p1_263, __p2_263, __p3_263) __extension__ ({ \ uint32x4_t __ret_263; \ uint32x4_t __s0_263 = __p0_263; \ uint32x2_t __s2_263 = __p2_263; \ __ret_263 = vsetq_lane_u32(vget_lane_u32(__s2_263, __p3_263), __s0_263, __p1_263); \ __ret_263; \ }) #else #define vcopyq_lane_u32(__p0_264, __p1_264, __p2_264, __p3_264) __extension__ ({ \ uint32x4_t __ret_264; \ uint32x4_t __s0_264 = __p0_264; \ uint32x2_t __s2_264 = __p2_264; \ uint32x4_t __rev0_264; __rev0_264 = __builtin_shufflevector(__s0_264, __s0_264, 3, 2, 1, 0); \ uint32x2_t __rev2_264; __rev2_264 = __builtin_shufflevector(__s2_264, __s2_264, 1, 0); \ __ret_264 = __noswap_vsetq_lane_u32(__noswap_vget_lane_u32(__rev2_264, __p3_264), __rev0_264, __p1_264); \ __ret_264 = __builtin_shufflevector(__ret_264, __ret_264, 3, 2, 1, 0); \ __ret_264; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcopyq_lane_u64(__p0_265, __p1_265, __p2_265, __p3_265) __extension__ ({ \ uint64x2_t __ret_265; \ uint64x2_t __s0_265 = __p0_265; \ uint64x1_t __s2_265 = __p2_265; \ __ret_265 = vsetq_lane_u64(vget_lane_u64(__s2_265, __p3_265), __s0_265, __p1_265); \ __ret_265; \ }) #else #define vcopyq_lane_u64(__p0_266, __p1_266, __p2_266, __p3_266) __extension__ ({ \ uint64x2_t __ret_266; \ uint64x2_t __s0_266 = __p0_266; \ uint64x1_t __s2_266 = __p2_266; \ uint64x2_t __rev0_266; __rev0_266 = __builtin_shufflevector(__s0_266, __s0_266, 1, 0); \ __ret_266 = __noswap_vsetq_lane_u64(vget_lane_u64(__s2_266, __p3_266), __rev0_266, __p1_266); \ __ret_266 = __builtin_shufflevector(__ret_266, __ret_266, 1, 0); \ __ret_266; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcopyq_lane_u16(__p0_267, __p1_267, __p2_267, __p3_267) __extension__ ({ \ uint16x8_t __ret_267; \ uint16x8_t __s0_267 = __p0_267; \ uint16x4_t __s2_267 = __p2_267; \ __ret_267 = vsetq_lane_u16(vget_lane_u16(__s2_267, __p3_267), __s0_267, __p1_267); \ __ret_267; \ }) #else #define vcopyq_lane_u16(__p0_268, __p1_268, __p2_268, __p3_268) __extension__ ({ \ uint16x8_t __ret_268; \ uint16x8_t __s0_268 = __p0_268; \ uint16x4_t __s2_268 = __p2_268; \ uint16x8_t __rev0_268; __rev0_268 = __builtin_shufflevector(__s0_268, __s0_268, 7, 6, 5, 4, 3, 2, 1, 0); \ uint16x4_t __rev2_268; __rev2_268 = __builtin_shufflevector(__s2_268, __s2_268, 3, 2, 1, 0); \ __ret_268 = __noswap_vsetq_lane_u16(__noswap_vget_lane_u16(__rev2_268, __p3_268), __rev0_268, __p1_268); \ __ret_268 = __builtin_shufflevector(__ret_268, __ret_268, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_268; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcopyq_lane_s8(__p0_269, __p1_269, __p2_269, __p3_269) __extension__ ({ \ int8x16_t __ret_269; \ int8x16_t __s0_269 = __p0_269; \ int8x8_t __s2_269 = __p2_269; \ __ret_269 = vsetq_lane_s8(vget_lane_s8(__s2_269, __p3_269), __s0_269, __p1_269); \ __ret_269; \ }) #else #define vcopyq_lane_s8(__p0_270, __p1_270, __p2_270, __p3_270) __extension__ ({ \ int8x16_t __ret_270; \ int8x16_t __s0_270 = __p0_270; \ int8x8_t __s2_270 = __p2_270; \ int8x16_t __rev0_270; __rev0_270 = __builtin_shufflevector(__s0_270, __s0_270, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ int8x8_t __rev2_270; __rev2_270 = __builtin_shufflevector(__s2_270, __s2_270, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_270 = __noswap_vsetq_lane_s8(__noswap_vget_lane_s8(__rev2_270, __p3_270), __rev0_270, __p1_270); \ __ret_270 = __builtin_shufflevector(__ret_270, __ret_270, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_270; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcopyq_lane_f32(__p0_271, __p1_271, __p2_271, __p3_271) __extension__ ({ \ float32x4_t __ret_271; \ float32x4_t __s0_271 = __p0_271; \ float32x2_t __s2_271 = __p2_271; \ __ret_271 = vsetq_lane_f32(vget_lane_f32(__s2_271, __p3_271), __s0_271, __p1_271); \ __ret_271; \ }) #else #define vcopyq_lane_f32(__p0_272, __p1_272, __p2_272, __p3_272) __extension__ ({ \ float32x4_t __ret_272; \ float32x4_t __s0_272 = __p0_272; \ float32x2_t __s2_272 = __p2_272; \ float32x4_t __rev0_272; __rev0_272 = __builtin_shufflevector(__s0_272, __s0_272, 3, 2, 1, 0); \ float32x2_t __rev2_272; __rev2_272 = __builtin_shufflevector(__s2_272, __s2_272, 1, 0); \ __ret_272 = __noswap_vsetq_lane_f32(__noswap_vget_lane_f32(__rev2_272, __p3_272), __rev0_272, __p1_272); \ __ret_272 = __builtin_shufflevector(__ret_272, __ret_272, 3, 2, 1, 0); \ __ret_272; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcopyq_lane_s32(__p0_273, __p1_273, __p2_273, __p3_273) __extension__ ({ \ int32x4_t __ret_273; \ int32x4_t __s0_273 = __p0_273; \ int32x2_t __s2_273 = __p2_273; \ __ret_273 = vsetq_lane_s32(vget_lane_s32(__s2_273, __p3_273), __s0_273, __p1_273); \ __ret_273; \ }) #else #define vcopyq_lane_s32(__p0_274, __p1_274, __p2_274, __p3_274) __extension__ ({ \ int32x4_t __ret_274; \ int32x4_t __s0_274 = __p0_274; \ int32x2_t __s2_274 = __p2_274; \ int32x4_t __rev0_274; __rev0_274 = __builtin_shufflevector(__s0_274, __s0_274, 3, 2, 1, 0); \ int32x2_t __rev2_274; __rev2_274 = __builtin_shufflevector(__s2_274, __s2_274, 1, 0); \ __ret_274 = __noswap_vsetq_lane_s32(__noswap_vget_lane_s32(__rev2_274, __p3_274), __rev0_274, __p1_274); \ __ret_274 = __builtin_shufflevector(__ret_274, __ret_274, 3, 2, 1, 0); \ __ret_274; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcopyq_lane_s64(__p0_275, __p1_275, __p2_275, __p3_275) __extension__ ({ \ int64x2_t __ret_275; \ int64x2_t __s0_275 = __p0_275; \ int64x1_t __s2_275 = __p2_275; \ __ret_275 = vsetq_lane_s64(vget_lane_s64(__s2_275, __p3_275), __s0_275, __p1_275); \ __ret_275; \ }) #else #define vcopyq_lane_s64(__p0_276, __p1_276, __p2_276, __p3_276) __extension__ ({ \ int64x2_t __ret_276; \ int64x2_t __s0_276 = __p0_276; \ int64x1_t __s2_276 = __p2_276; \ int64x2_t __rev0_276; __rev0_276 = __builtin_shufflevector(__s0_276, __s0_276, 1, 0); \ __ret_276 = __noswap_vsetq_lane_s64(vget_lane_s64(__s2_276, __p3_276), __rev0_276, __p1_276); \ __ret_276 = __builtin_shufflevector(__ret_276, __ret_276, 1, 0); \ __ret_276; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcopyq_lane_s16(__p0_277, __p1_277, __p2_277, __p3_277) __extension__ ({ \ int16x8_t __ret_277; \ int16x8_t __s0_277 = __p0_277; \ int16x4_t __s2_277 = __p2_277; \ __ret_277 = vsetq_lane_s16(vget_lane_s16(__s2_277, __p3_277), __s0_277, __p1_277); \ __ret_277; \ }) #else #define vcopyq_lane_s16(__p0_278, __p1_278, __p2_278, __p3_278) __extension__ ({ \ int16x8_t __ret_278; \ int16x8_t __s0_278 = __p0_278; \ int16x4_t __s2_278 = __p2_278; \ int16x8_t __rev0_278; __rev0_278 = __builtin_shufflevector(__s0_278, __s0_278, 7, 6, 5, 4, 3, 2, 1, 0); \ int16x4_t __rev2_278; __rev2_278 = __builtin_shufflevector(__s2_278, __s2_278, 3, 2, 1, 0); \ __ret_278 = __noswap_vsetq_lane_s16(__noswap_vget_lane_s16(__rev2_278, __p3_278), __rev0_278, __p1_278); \ __ret_278 = __builtin_shufflevector(__ret_278, __ret_278, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_278; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcopy_lane_p8(__p0_279, __p1_279, __p2_279, __p3_279) __extension__ ({ \ poly8x8_t __ret_279; \ poly8x8_t __s0_279 = __p0_279; \ poly8x8_t __s2_279 = __p2_279; \ __ret_279 = vset_lane_p8(vget_lane_p8(__s2_279, __p3_279), __s0_279, __p1_279); \ __ret_279; \ }) #else #define vcopy_lane_p8(__p0_280, __p1_280, __p2_280, __p3_280) __extension__ ({ \ poly8x8_t __ret_280; \ poly8x8_t __s0_280 = __p0_280; \ poly8x8_t __s2_280 = __p2_280; \ poly8x8_t __rev0_280; __rev0_280 = __builtin_shufflevector(__s0_280, __s0_280, 7, 6, 5, 4, 3, 2, 1, 0); \ poly8x8_t __rev2_280; __rev2_280 = __builtin_shufflevector(__s2_280, __s2_280, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_280 = __noswap_vset_lane_p8(__noswap_vget_lane_p8(__rev2_280, __p3_280), __rev0_280, __p1_280); \ __ret_280 = __builtin_shufflevector(__ret_280, __ret_280, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_280; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcopy_lane_p16(__p0_281, __p1_281, __p2_281, __p3_281) __extension__ ({ \ poly16x4_t __ret_281; \ poly16x4_t __s0_281 = __p0_281; \ poly16x4_t __s2_281 = __p2_281; \ __ret_281 = vset_lane_p16(vget_lane_p16(__s2_281, __p3_281), __s0_281, __p1_281); \ __ret_281; \ }) #else #define vcopy_lane_p16(__p0_282, __p1_282, __p2_282, __p3_282) __extension__ ({ \ poly16x4_t __ret_282; \ poly16x4_t __s0_282 = __p0_282; \ poly16x4_t __s2_282 = __p2_282; \ poly16x4_t __rev0_282; __rev0_282 = __builtin_shufflevector(__s0_282, __s0_282, 3, 2, 1, 0); \ poly16x4_t __rev2_282; __rev2_282 = __builtin_shufflevector(__s2_282, __s2_282, 3, 2, 1, 0); \ __ret_282 = __noswap_vset_lane_p16(__noswap_vget_lane_p16(__rev2_282, __p3_282), __rev0_282, __p1_282); \ __ret_282 = __builtin_shufflevector(__ret_282, __ret_282, 3, 2, 1, 0); \ __ret_282; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcopy_lane_u8(__p0_283, __p1_283, __p2_283, __p3_283) __extension__ ({ \ uint8x8_t __ret_283; \ uint8x8_t __s0_283 = __p0_283; \ uint8x8_t __s2_283 = __p2_283; \ __ret_283 = vset_lane_u8(vget_lane_u8(__s2_283, __p3_283), __s0_283, __p1_283); \ __ret_283; \ }) #else #define vcopy_lane_u8(__p0_284, __p1_284, __p2_284, __p3_284) __extension__ ({ \ uint8x8_t __ret_284; \ uint8x8_t __s0_284 = __p0_284; \ uint8x8_t __s2_284 = __p2_284; \ uint8x8_t __rev0_284; __rev0_284 = __builtin_shufflevector(__s0_284, __s0_284, 7, 6, 5, 4, 3, 2, 1, 0); \ uint8x8_t __rev2_284; __rev2_284 = __builtin_shufflevector(__s2_284, __s2_284, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_284 = __noswap_vset_lane_u8(__noswap_vget_lane_u8(__rev2_284, __p3_284), __rev0_284, __p1_284); \ __ret_284 = __builtin_shufflevector(__ret_284, __ret_284, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_284; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcopy_lane_u32(__p0_285, __p1_285, __p2_285, __p3_285) __extension__ ({ \ uint32x2_t __ret_285; \ uint32x2_t __s0_285 = __p0_285; \ uint32x2_t __s2_285 = __p2_285; \ __ret_285 = vset_lane_u32(vget_lane_u32(__s2_285, __p3_285), __s0_285, __p1_285); \ __ret_285; \ }) #else #define vcopy_lane_u32(__p0_286, __p1_286, __p2_286, __p3_286) __extension__ ({ \ uint32x2_t __ret_286; \ uint32x2_t __s0_286 = __p0_286; \ uint32x2_t __s2_286 = __p2_286; \ uint32x2_t __rev0_286; __rev0_286 = __builtin_shufflevector(__s0_286, __s0_286, 1, 0); \ uint32x2_t __rev2_286; __rev2_286 = __builtin_shufflevector(__s2_286, __s2_286, 1, 0); \ __ret_286 = __noswap_vset_lane_u32(__noswap_vget_lane_u32(__rev2_286, __p3_286), __rev0_286, __p1_286); \ __ret_286 = __builtin_shufflevector(__ret_286, __ret_286, 1, 0); \ __ret_286; \ }) #endif #define vcopy_lane_u64(__p0_287, __p1_287, __p2_287, __p3_287) __extension__ ({ \ uint64x1_t __ret_287; \ uint64x1_t __s0_287 = __p0_287; \ uint64x1_t __s2_287 = __p2_287; \ __ret_287 = vset_lane_u64(vget_lane_u64(__s2_287, __p3_287), __s0_287, __p1_287); \ __ret_287; \ }) #ifdef __LITTLE_ENDIAN__ #define vcopy_lane_u16(__p0_288, __p1_288, __p2_288, __p3_288) __extension__ ({ \ uint16x4_t __ret_288; \ uint16x4_t __s0_288 = __p0_288; \ uint16x4_t __s2_288 = __p2_288; \ __ret_288 = vset_lane_u16(vget_lane_u16(__s2_288, __p3_288), __s0_288, __p1_288); \ __ret_288; \ }) #else #define vcopy_lane_u16(__p0_289, __p1_289, __p2_289, __p3_289) __extension__ ({ \ uint16x4_t __ret_289; \ uint16x4_t __s0_289 = __p0_289; \ uint16x4_t __s2_289 = __p2_289; \ uint16x4_t __rev0_289; __rev0_289 = __builtin_shufflevector(__s0_289, __s0_289, 3, 2, 1, 0); \ uint16x4_t __rev2_289; __rev2_289 = __builtin_shufflevector(__s2_289, __s2_289, 3, 2, 1, 0); \ __ret_289 = __noswap_vset_lane_u16(__noswap_vget_lane_u16(__rev2_289, __p3_289), __rev0_289, __p1_289); \ __ret_289 = __builtin_shufflevector(__ret_289, __ret_289, 3, 2, 1, 0); \ __ret_289; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcopy_lane_s8(__p0_290, __p1_290, __p2_290, __p3_290) __extension__ ({ \ int8x8_t __ret_290; \ int8x8_t __s0_290 = __p0_290; \ int8x8_t __s2_290 = __p2_290; \ __ret_290 = vset_lane_s8(vget_lane_s8(__s2_290, __p3_290), __s0_290, __p1_290); \ __ret_290; \ }) #else #define vcopy_lane_s8(__p0_291, __p1_291, __p2_291, __p3_291) __extension__ ({ \ int8x8_t __ret_291; \ int8x8_t __s0_291 = __p0_291; \ int8x8_t __s2_291 = __p2_291; \ int8x8_t __rev0_291; __rev0_291 = __builtin_shufflevector(__s0_291, __s0_291, 7, 6, 5, 4, 3, 2, 1, 0); \ int8x8_t __rev2_291; __rev2_291 = __builtin_shufflevector(__s2_291, __s2_291, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_291 = __noswap_vset_lane_s8(__noswap_vget_lane_s8(__rev2_291, __p3_291), __rev0_291, __p1_291); \ __ret_291 = __builtin_shufflevector(__ret_291, __ret_291, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_291; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcopy_lane_f32(__p0_292, __p1_292, __p2_292, __p3_292) __extension__ ({ \ float32x2_t __ret_292; \ float32x2_t __s0_292 = __p0_292; \ float32x2_t __s2_292 = __p2_292; \ __ret_292 = vset_lane_f32(vget_lane_f32(__s2_292, __p3_292), __s0_292, __p1_292); \ __ret_292; \ }) #else #define vcopy_lane_f32(__p0_293, __p1_293, __p2_293, __p3_293) __extension__ ({ \ float32x2_t __ret_293; \ float32x2_t __s0_293 = __p0_293; \ float32x2_t __s2_293 = __p2_293; \ float32x2_t __rev0_293; __rev0_293 = __builtin_shufflevector(__s0_293, __s0_293, 1, 0); \ float32x2_t __rev2_293; __rev2_293 = __builtin_shufflevector(__s2_293, __s2_293, 1, 0); \ __ret_293 = __noswap_vset_lane_f32(__noswap_vget_lane_f32(__rev2_293, __p3_293), __rev0_293, __p1_293); \ __ret_293 = __builtin_shufflevector(__ret_293, __ret_293, 1, 0); \ __ret_293; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcopy_lane_s32(__p0_294, __p1_294, __p2_294, __p3_294) __extension__ ({ \ int32x2_t __ret_294; \ int32x2_t __s0_294 = __p0_294; \ int32x2_t __s2_294 = __p2_294; \ __ret_294 = vset_lane_s32(vget_lane_s32(__s2_294, __p3_294), __s0_294, __p1_294); \ __ret_294; \ }) #else #define vcopy_lane_s32(__p0_295, __p1_295, __p2_295, __p3_295) __extension__ ({ \ int32x2_t __ret_295; \ int32x2_t __s0_295 = __p0_295; \ int32x2_t __s2_295 = __p2_295; \ int32x2_t __rev0_295; __rev0_295 = __builtin_shufflevector(__s0_295, __s0_295, 1, 0); \ int32x2_t __rev2_295; __rev2_295 = __builtin_shufflevector(__s2_295, __s2_295, 1, 0); \ __ret_295 = __noswap_vset_lane_s32(__noswap_vget_lane_s32(__rev2_295, __p3_295), __rev0_295, __p1_295); \ __ret_295 = __builtin_shufflevector(__ret_295, __ret_295, 1, 0); \ __ret_295; \ }) #endif #define vcopy_lane_s64(__p0_296, __p1_296, __p2_296, __p3_296) __extension__ ({ \ int64x1_t __ret_296; \ int64x1_t __s0_296 = __p0_296; \ int64x1_t __s2_296 = __p2_296; \ __ret_296 = vset_lane_s64(vget_lane_s64(__s2_296, __p3_296), __s0_296, __p1_296); \ __ret_296; \ }) #ifdef __LITTLE_ENDIAN__ #define vcopy_lane_s16(__p0_297, __p1_297, __p2_297, __p3_297) __extension__ ({ \ int16x4_t __ret_297; \ int16x4_t __s0_297 = __p0_297; \ int16x4_t __s2_297 = __p2_297; \ __ret_297 = vset_lane_s16(vget_lane_s16(__s2_297, __p3_297), __s0_297, __p1_297); \ __ret_297; \ }) #else #define vcopy_lane_s16(__p0_298, __p1_298, __p2_298, __p3_298) __extension__ ({ \ int16x4_t __ret_298; \ int16x4_t __s0_298 = __p0_298; \ int16x4_t __s2_298 = __p2_298; \ int16x4_t __rev0_298; __rev0_298 = __builtin_shufflevector(__s0_298, __s0_298, 3, 2, 1, 0); \ int16x4_t __rev2_298; __rev2_298 = __builtin_shufflevector(__s2_298, __s2_298, 3, 2, 1, 0); \ __ret_298 = __noswap_vset_lane_s16(__noswap_vget_lane_s16(__rev2_298, __p3_298), __rev0_298, __p1_298); \ __ret_298 = __builtin_shufflevector(__ret_298, __ret_298, 3, 2, 1, 0); \ __ret_298; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcopyq_laneq_p8(__p0_299, __p1_299, __p2_299, __p3_299) __extension__ ({ \ poly8x16_t __ret_299; \ poly8x16_t __s0_299 = __p0_299; \ poly8x16_t __s2_299 = __p2_299; \ __ret_299 = vsetq_lane_p8(vgetq_lane_p8(__s2_299, __p3_299), __s0_299, __p1_299); \ __ret_299; \ }) #else #define vcopyq_laneq_p8(__p0_300, __p1_300, __p2_300, __p3_300) __extension__ ({ \ poly8x16_t __ret_300; \ poly8x16_t __s0_300 = __p0_300; \ poly8x16_t __s2_300 = __p2_300; \ poly8x16_t __rev0_300; __rev0_300 = __builtin_shufflevector(__s0_300, __s0_300, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ poly8x16_t __rev2_300; __rev2_300 = __builtin_shufflevector(__s2_300, __s2_300, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_300 = __noswap_vsetq_lane_p8(__noswap_vgetq_lane_p8(__rev2_300, __p3_300), __rev0_300, __p1_300); \ __ret_300 = __builtin_shufflevector(__ret_300, __ret_300, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_300; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcopyq_laneq_p16(__p0_301, __p1_301, __p2_301, __p3_301) __extension__ ({ \ poly16x8_t __ret_301; \ poly16x8_t __s0_301 = __p0_301; \ poly16x8_t __s2_301 = __p2_301; \ __ret_301 = vsetq_lane_p16(vgetq_lane_p16(__s2_301, __p3_301), __s0_301, __p1_301); \ __ret_301; \ }) #else #define vcopyq_laneq_p16(__p0_302, __p1_302, __p2_302, __p3_302) __extension__ ({ \ poly16x8_t __ret_302; \ poly16x8_t __s0_302 = __p0_302; \ poly16x8_t __s2_302 = __p2_302; \ poly16x8_t __rev0_302; __rev0_302 = __builtin_shufflevector(__s0_302, __s0_302, 7, 6, 5, 4, 3, 2, 1, 0); \ poly16x8_t __rev2_302; __rev2_302 = __builtin_shufflevector(__s2_302, __s2_302, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_302 = __noswap_vsetq_lane_p16(__noswap_vgetq_lane_p16(__rev2_302, __p3_302), __rev0_302, __p1_302); \ __ret_302 = __builtin_shufflevector(__ret_302, __ret_302, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_302; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcopyq_laneq_u8(__p0_303, __p1_303, __p2_303, __p3_303) __extension__ ({ \ uint8x16_t __ret_303; \ uint8x16_t __s0_303 = __p0_303; \ uint8x16_t __s2_303 = __p2_303; \ __ret_303 = vsetq_lane_u8(vgetq_lane_u8(__s2_303, __p3_303), __s0_303, __p1_303); \ __ret_303; \ }) #else #define vcopyq_laneq_u8(__p0_304, __p1_304, __p2_304, __p3_304) __extension__ ({ \ uint8x16_t __ret_304; \ uint8x16_t __s0_304 = __p0_304; \ uint8x16_t __s2_304 = __p2_304; \ uint8x16_t __rev0_304; __rev0_304 = __builtin_shufflevector(__s0_304, __s0_304, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ uint8x16_t __rev2_304; __rev2_304 = __builtin_shufflevector(__s2_304, __s2_304, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_304 = __noswap_vsetq_lane_u8(__noswap_vgetq_lane_u8(__rev2_304, __p3_304), __rev0_304, __p1_304); \ __ret_304 = __builtin_shufflevector(__ret_304, __ret_304, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_304; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcopyq_laneq_u32(__p0_305, __p1_305, __p2_305, __p3_305) __extension__ ({ \ uint32x4_t __ret_305; \ uint32x4_t __s0_305 = __p0_305; \ uint32x4_t __s2_305 = __p2_305; \ __ret_305 = vsetq_lane_u32(vgetq_lane_u32(__s2_305, __p3_305), __s0_305, __p1_305); \ __ret_305; \ }) #else #define vcopyq_laneq_u32(__p0_306, __p1_306, __p2_306, __p3_306) __extension__ ({ \ uint32x4_t __ret_306; \ uint32x4_t __s0_306 = __p0_306; \ uint32x4_t __s2_306 = __p2_306; \ uint32x4_t __rev0_306; __rev0_306 = __builtin_shufflevector(__s0_306, __s0_306, 3, 2, 1, 0); \ uint32x4_t __rev2_306; __rev2_306 = __builtin_shufflevector(__s2_306, __s2_306, 3, 2, 1, 0); \ __ret_306 = __noswap_vsetq_lane_u32(__noswap_vgetq_lane_u32(__rev2_306, __p3_306), __rev0_306, __p1_306); \ __ret_306 = __builtin_shufflevector(__ret_306, __ret_306, 3, 2, 1, 0); \ __ret_306; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcopyq_laneq_u64(__p0_307, __p1_307, __p2_307, __p3_307) __extension__ ({ \ uint64x2_t __ret_307; \ uint64x2_t __s0_307 = __p0_307; \ uint64x2_t __s2_307 = __p2_307; \ __ret_307 = vsetq_lane_u64(vgetq_lane_u64(__s2_307, __p3_307), __s0_307, __p1_307); \ __ret_307; \ }) #else #define vcopyq_laneq_u64(__p0_308, __p1_308, __p2_308, __p3_308) __extension__ ({ \ uint64x2_t __ret_308; \ uint64x2_t __s0_308 = __p0_308; \ uint64x2_t __s2_308 = __p2_308; \ uint64x2_t __rev0_308; __rev0_308 = __builtin_shufflevector(__s0_308, __s0_308, 1, 0); \ uint64x2_t __rev2_308; __rev2_308 = __builtin_shufflevector(__s2_308, __s2_308, 1, 0); \ __ret_308 = __noswap_vsetq_lane_u64(__noswap_vgetq_lane_u64(__rev2_308, __p3_308), __rev0_308, __p1_308); \ __ret_308 = __builtin_shufflevector(__ret_308, __ret_308, 1, 0); \ __ret_308; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcopyq_laneq_u16(__p0_309, __p1_309, __p2_309, __p3_309) __extension__ ({ \ uint16x8_t __ret_309; \ uint16x8_t __s0_309 = __p0_309; \ uint16x8_t __s2_309 = __p2_309; \ __ret_309 = vsetq_lane_u16(vgetq_lane_u16(__s2_309, __p3_309), __s0_309, __p1_309); \ __ret_309; \ }) #else #define vcopyq_laneq_u16(__p0_310, __p1_310, __p2_310, __p3_310) __extension__ ({ \ uint16x8_t __ret_310; \ uint16x8_t __s0_310 = __p0_310; \ uint16x8_t __s2_310 = __p2_310; \ uint16x8_t __rev0_310; __rev0_310 = __builtin_shufflevector(__s0_310, __s0_310, 7, 6, 5, 4, 3, 2, 1, 0); \ uint16x8_t __rev2_310; __rev2_310 = __builtin_shufflevector(__s2_310, __s2_310, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_310 = __noswap_vsetq_lane_u16(__noswap_vgetq_lane_u16(__rev2_310, __p3_310), __rev0_310, __p1_310); \ __ret_310 = __builtin_shufflevector(__ret_310, __ret_310, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_310; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcopyq_laneq_s8(__p0_311, __p1_311, __p2_311, __p3_311) __extension__ ({ \ int8x16_t __ret_311; \ int8x16_t __s0_311 = __p0_311; \ int8x16_t __s2_311 = __p2_311; \ __ret_311 = vsetq_lane_s8(vgetq_lane_s8(__s2_311, __p3_311), __s0_311, __p1_311); \ __ret_311; \ }) #else #define vcopyq_laneq_s8(__p0_312, __p1_312, __p2_312, __p3_312) __extension__ ({ \ int8x16_t __ret_312; \ int8x16_t __s0_312 = __p0_312; \ int8x16_t __s2_312 = __p2_312; \ int8x16_t __rev0_312; __rev0_312 = __builtin_shufflevector(__s0_312, __s0_312, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ int8x16_t __rev2_312; __rev2_312 = __builtin_shufflevector(__s2_312, __s2_312, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_312 = __noswap_vsetq_lane_s8(__noswap_vgetq_lane_s8(__rev2_312, __p3_312), __rev0_312, __p1_312); \ __ret_312 = __builtin_shufflevector(__ret_312, __ret_312, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_312; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcopyq_laneq_f32(__p0_313, __p1_313, __p2_313, __p3_313) __extension__ ({ \ float32x4_t __ret_313; \ float32x4_t __s0_313 = __p0_313; \ float32x4_t __s2_313 = __p2_313; \ __ret_313 = vsetq_lane_f32(vgetq_lane_f32(__s2_313, __p3_313), __s0_313, __p1_313); \ __ret_313; \ }) #else #define vcopyq_laneq_f32(__p0_314, __p1_314, __p2_314, __p3_314) __extension__ ({ \ float32x4_t __ret_314; \ float32x4_t __s0_314 = __p0_314; \ float32x4_t __s2_314 = __p2_314; \ float32x4_t __rev0_314; __rev0_314 = __builtin_shufflevector(__s0_314, __s0_314, 3, 2, 1, 0); \ float32x4_t __rev2_314; __rev2_314 = __builtin_shufflevector(__s2_314, __s2_314, 3, 2, 1, 0); \ __ret_314 = __noswap_vsetq_lane_f32(__noswap_vgetq_lane_f32(__rev2_314, __p3_314), __rev0_314, __p1_314); \ __ret_314 = __builtin_shufflevector(__ret_314, __ret_314, 3, 2, 1, 0); \ __ret_314; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcopyq_laneq_s32(__p0_315, __p1_315, __p2_315, __p3_315) __extension__ ({ \ int32x4_t __ret_315; \ int32x4_t __s0_315 = __p0_315; \ int32x4_t __s2_315 = __p2_315; \ __ret_315 = vsetq_lane_s32(vgetq_lane_s32(__s2_315, __p3_315), __s0_315, __p1_315); \ __ret_315; \ }) #else #define vcopyq_laneq_s32(__p0_316, __p1_316, __p2_316, __p3_316) __extension__ ({ \ int32x4_t __ret_316; \ int32x4_t __s0_316 = __p0_316; \ int32x4_t __s2_316 = __p2_316; \ int32x4_t __rev0_316; __rev0_316 = __builtin_shufflevector(__s0_316, __s0_316, 3, 2, 1, 0); \ int32x4_t __rev2_316; __rev2_316 = __builtin_shufflevector(__s2_316, __s2_316, 3, 2, 1, 0); \ __ret_316 = __noswap_vsetq_lane_s32(__noswap_vgetq_lane_s32(__rev2_316, __p3_316), __rev0_316, __p1_316); \ __ret_316 = __builtin_shufflevector(__ret_316, __ret_316, 3, 2, 1, 0); \ __ret_316; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcopyq_laneq_s64(__p0_317, __p1_317, __p2_317, __p3_317) __extension__ ({ \ int64x2_t __ret_317; \ int64x2_t __s0_317 = __p0_317; \ int64x2_t __s2_317 = __p2_317; \ __ret_317 = vsetq_lane_s64(vgetq_lane_s64(__s2_317, __p3_317), __s0_317, __p1_317); \ __ret_317; \ }) #else #define vcopyq_laneq_s64(__p0_318, __p1_318, __p2_318, __p3_318) __extension__ ({ \ int64x2_t __ret_318; \ int64x2_t __s0_318 = __p0_318; \ int64x2_t __s2_318 = __p2_318; \ int64x2_t __rev0_318; __rev0_318 = __builtin_shufflevector(__s0_318, __s0_318, 1, 0); \ int64x2_t __rev2_318; __rev2_318 = __builtin_shufflevector(__s2_318, __s2_318, 1, 0); \ __ret_318 = __noswap_vsetq_lane_s64(__noswap_vgetq_lane_s64(__rev2_318, __p3_318), __rev0_318, __p1_318); \ __ret_318 = __builtin_shufflevector(__ret_318, __ret_318, 1, 0); \ __ret_318; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcopyq_laneq_s16(__p0_319, __p1_319, __p2_319, __p3_319) __extension__ ({ \ int16x8_t __ret_319; \ int16x8_t __s0_319 = __p0_319; \ int16x8_t __s2_319 = __p2_319; \ __ret_319 = vsetq_lane_s16(vgetq_lane_s16(__s2_319, __p3_319), __s0_319, __p1_319); \ __ret_319; \ }) #else #define vcopyq_laneq_s16(__p0_320, __p1_320, __p2_320, __p3_320) __extension__ ({ \ int16x8_t __ret_320; \ int16x8_t __s0_320 = __p0_320; \ int16x8_t __s2_320 = __p2_320; \ int16x8_t __rev0_320; __rev0_320 = __builtin_shufflevector(__s0_320, __s0_320, 7, 6, 5, 4, 3, 2, 1, 0); \ int16x8_t __rev2_320; __rev2_320 = __builtin_shufflevector(__s2_320, __s2_320, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_320 = __noswap_vsetq_lane_s16(__noswap_vgetq_lane_s16(__rev2_320, __p3_320), __rev0_320, __p1_320); \ __ret_320 = __builtin_shufflevector(__ret_320, __ret_320, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_320; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcopy_laneq_p8(__p0_321, __p1_321, __p2_321, __p3_321) __extension__ ({ \ poly8x8_t __ret_321; \ poly8x8_t __s0_321 = __p0_321; \ poly8x16_t __s2_321 = __p2_321; \ __ret_321 = vset_lane_p8(vgetq_lane_p8(__s2_321, __p3_321), __s0_321, __p1_321); \ __ret_321; \ }) #else #define vcopy_laneq_p8(__p0_322, __p1_322, __p2_322, __p3_322) __extension__ ({ \ poly8x8_t __ret_322; \ poly8x8_t __s0_322 = __p0_322; \ poly8x16_t __s2_322 = __p2_322; \ poly8x8_t __rev0_322; __rev0_322 = __builtin_shufflevector(__s0_322, __s0_322, 7, 6, 5, 4, 3, 2, 1, 0); \ poly8x16_t __rev2_322; __rev2_322 = __builtin_shufflevector(__s2_322, __s2_322, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_322 = __noswap_vset_lane_p8(__noswap_vgetq_lane_p8(__rev2_322, __p3_322), __rev0_322, __p1_322); \ __ret_322 = __builtin_shufflevector(__ret_322, __ret_322, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_322; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcopy_laneq_p16(__p0_323, __p1_323, __p2_323, __p3_323) __extension__ ({ \ poly16x4_t __ret_323; \ poly16x4_t __s0_323 = __p0_323; \ poly16x8_t __s2_323 = __p2_323; \ __ret_323 = vset_lane_p16(vgetq_lane_p16(__s2_323, __p3_323), __s0_323, __p1_323); \ __ret_323; \ }) #else #define vcopy_laneq_p16(__p0_324, __p1_324, __p2_324, __p3_324) __extension__ ({ \ poly16x4_t __ret_324; \ poly16x4_t __s0_324 = __p0_324; \ poly16x8_t __s2_324 = __p2_324; \ poly16x4_t __rev0_324; __rev0_324 = __builtin_shufflevector(__s0_324, __s0_324, 3, 2, 1, 0); \ poly16x8_t __rev2_324; __rev2_324 = __builtin_shufflevector(__s2_324, __s2_324, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_324 = __noswap_vset_lane_p16(__noswap_vgetq_lane_p16(__rev2_324, __p3_324), __rev0_324, __p1_324); \ __ret_324 = __builtin_shufflevector(__ret_324, __ret_324, 3, 2, 1, 0); \ __ret_324; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcopy_laneq_u8(__p0_325, __p1_325, __p2_325, __p3_325) __extension__ ({ \ uint8x8_t __ret_325; \ uint8x8_t __s0_325 = __p0_325; \ uint8x16_t __s2_325 = __p2_325; \ __ret_325 = vset_lane_u8(vgetq_lane_u8(__s2_325, __p3_325), __s0_325, __p1_325); \ __ret_325; \ }) #else #define vcopy_laneq_u8(__p0_326, __p1_326, __p2_326, __p3_326) __extension__ ({ \ uint8x8_t __ret_326; \ uint8x8_t __s0_326 = __p0_326; \ uint8x16_t __s2_326 = __p2_326; \ uint8x8_t __rev0_326; __rev0_326 = __builtin_shufflevector(__s0_326, __s0_326, 7, 6, 5, 4, 3, 2, 1, 0); \ uint8x16_t __rev2_326; __rev2_326 = __builtin_shufflevector(__s2_326, __s2_326, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_326 = __noswap_vset_lane_u8(__noswap_vgetq_lane_u8(__rev2_326, __p3_326), __rev0_326, __p1_326); \ __ret_326 = __builtin_shufflevector(__ret_326, __ret_326, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_326; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcopy_laneq_u32(__p0_327, __p1_327, __p2_327, __p3_327) __extension__ ({ \ uint32x2_t __ret_327; \ uint32x2_t __s0_327 = __p0_327; \ uint32x4_t __s2_327 = __p2_327; \ __ret_327 = vset_lane_u32(vgetq_lane_u32(__s2_327, __p3_327), __s0_327, __p1_327); \ __ret_327; \ }) #else #define vcopy_laneq_u32(__p0_328, __p1_328, __p2_328, __p3_328) __extension__ ({ \ uint32x2_t __ret_328; \ uint32x2_t __s0_328 = __p0_328; \ uint32x4_t __s2_328 = __p2_328; \ uint32x2_t __rev0_328; __rev0_328 = __builtin_shufflevector(__s0_328, __s0_328, 1, 0); \ uint32x4_t __rev2_328; __rev2_328 = __builtin_shufflevector(__s2_328, __s2_328, 3, 2, 1, 0); \ __ret_328 = __noswap_vset_lane_u32(__noswap_vgetq_lane_u32(__rev2_328, __p3_328), __rev0_328, __p1_328); \ __ret_328 = __builtin_shufflevector(__ret_328, __ret_328, 1, 0); \ __ret_328; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcopy_laneq_u64(__p0_329, __p1_329, __p2_329, __p3_329) __extension__ ({ \ uint64x1_t __ret_329; \ uint64x1_t __s0_329 = __p0_329; \ uint64x2_t __s2_329 = __p2_329; \ __ret_329 = vset_lane_u64(vgetq_lane_u64(__s2_329, __p3_329), __s0_329, __p1_329); \ __ret_329; \ }) #else #define vcopy_laneq_u64(__p0_330, __p1_330, __p2_330, __p3_330) __extension__ ({ \ uint64x1_t __ret_330; \ uint64x1_t __s0_330 = __p0_330; \ uint64x2_t __s2_330 = __p2_330; \ uint64x2_t __rev2_330; __rev2_330 = __builtin_shufflevector(__s2_330, __s2_330, 1, 0); \ __ret_330 = vset_lane_u64(__noswap_vgetq_lane_u64(__rev2_330, __p3_330), __s0_330, __p1_330); \ __ret_330; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcopy_laneq_u16(__p0_331, __p1_331, __p2_331, __p3_331) __extension__ ({ \ uint16x4_t __ret_331; \ uint16x4_t __s0_331 = __p0_331; \ uint16x8_t __s2_331 = __p2_331; \ __ret_331 = vset_lane_u16(vgetq_lane_u16(__s2_331, __p3_331), __s0_331, __p1_331); \ __ret_331; \ }) #else #define vcopy_laneq_u16(__p0_332, __p1_332, __p2_332, __p3_332) __extension__ ({ \ uint16x4_t __ret_332; \ uint16x4_t __s0_332 = __p0_332; \ uint16x8_t __s2_332 = __p2_332; \ uint16x4_t __rev0_332; __rev0_332 = __builtin_shufflevector(__s0_332, __s0_332, 3, 2, 1, 0); \ uint16x8_t __rev2_332; __rev2_332 = __builtin_shufflevector(__s2_332, __s2_332, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_332 = __noswap_vset_lane_u16(__noswap_vgetq_lane_u16(__rev2_332, __p3_332), __rev0_332, __p1_332); \ __ret_332 = __builtin_shufflevector(__ret_332, __ret_332, 3, 2, 1, 0); \ __ret_332; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcopy_laneq_s8(__p0_333, __p1_333, __p2_333, __p3_333) __extension__ ({ \ int8x8_t __ret_333; \ int8x8_t __s0_333 = __p0_333; \ int8x16_t __s2_333 = __p2_333; \ __ret_333 = vset_lane_s8(vgetq_lane_s8(__s2_333, __p3_333), __s0_333, __p1_333); \ __ret_333; \ }) #else #define vcopy_laneq_s8(__p0_334, __p1_334, __p2_334, __p3_334) __extension__ ({ \ int8x8_t __ret_334; \ int8x8_t __s0_334 = __p0_334; \ int8x16_t __s2_334 = __p2_334; \ int8x8_t __rev0_334; __rev0_334 = __builtin_shufflevector(__s0_334, __s0_334, 7, 6, 5, 4, 3, 2, 1, 0); \ int8x16_t __rev2_334; __rev2_334 = __builtin_shufflevector(__s2_334, __s2_334, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_334 = __noswap_vset_lane_s8(__noswap_vgetq_lane_s8(__rev2_334, __p3_334), __rev0_334, __p1_334); \ __ret_334 = __builtin_shufflevector(__ret_334, __ret_334, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_334; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcopy_laneq_f32(__p0_335, __p1_335, __p2_335, __p3_335) __extension__ ({ \ float32x2_t __ret_335; \ float32x2_t __s0_335 = __p0_335; \ float32x4_t __s2_335 = __p2_335; \ __ret_335 = vset_lane_f32(vgetq_lane_f32(__s2_335, __p3_335), __s0_335, __p1_335); \ __ret_335; \ }) #else #define vcopy_laneq_f32(__p0_336, __p1_336, __p2_336, __p3_336) __extension__ ({ \ float32x2_t __ret_336; \ float32x2_t __s0_336 = __p0_336; \ float32x4_t __s2_336 = __p2_336; \ float32x2_t __rev0_336; __rev0_336 = __builtin_shufflevector(__s0_336, __s0_336, 1, 0); \ float32x4_t __rev2_336; __rev2_336 = __builtin_shufflevector(__s2_336, __s2_336, 3, 2, 1, 0); \ __ret_336 = __noswap_vset_lane_f32(__noswap_vgetq_lane_f32(__rev2_336, __p3_336), __rev0_336, __p1_336); \ __ret_336 = __builtin_shufflevector(__ret_336, __ret_336, 1, 0); \ __ret_336; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcopy_laneq_s32(__p0_337, __p1_337, __p2_337, __p3_337) __extension__ ({ \ int32x2_t __ret_337; \ int32x2_t __s0_337 = __p0_337; \ int32x4_t __s2_337 = __p2_337; \ __ret_337 = vset_lane_s32(vgetq_lane_s32(__s2_337, __p3_337), __s0_337, __p1_337); \ __ret_337; \ }) #else #define vcopy_laneq_s32(__p0_338, __p1_338, __p2_338, __p3_338) __extension__ ({ \ int32x2_t __ret_338; \ int32x2_t __s0_338 = __p0_338; \ int32x4_t __s2_338 = __p2_338; \ int32x2_t __rev0_338; __rev0_338 = __builtin_shufflevector(__s0_338, __s0_338, 1, 0); \ int32x4_t __rev2_338; __rev2_338 = __builtin_shufflevector(__s2_338, __s2_338, 3, 2, 1, 0); \ __ret_338 = __noswap_vset_lane_s32(__noswap_vgetq_lane_s32(__rev2_338, __p3_338), __rev0_338, __p1_338); \ __ret_338 = __builtin_shufflevector(__ret_338, __ret_338, 1, 0); \ __ret_338; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcopy_laneq_s64(__p0_339, __p1_339, __p2_339, __p3_339) __extension__ ({ \ int64x1_t __ret_339; \ int64x1_t __s0_339 = __p0_339; \ int64x2_t __s2_339 = __p2_339; \ __ret_339 = vset_lane_s64(vgetq_lane_s64(__s2_339, __p3_339), __s0_339, __p1_339); \ __ret_339; \ }) #else #define vcopy_laneq_s64(__p0_340, __p1_340, __p2_340, __p3_340) __extension__ ({ \ int64x1_t __ret_340; \ int64x1_t __s0_340 = __p0_340; \ int64x2_t __s2_340 = __p2_340; \ int64x2_t __rev2_340; __rev2_340 = __builtin_shufflevector(__s2_340, __s2_340, 1, 0); \ __ret_340 = vset_lane_s64(__noswap_vgetq_lane_s64(__rev2_340, __p3_340), __s0_340, __p1_340); \ __ret_340; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcopy_laneq_s16(__p0_341, __p1_341, __p2_341, __p3_341) __extension__ ({ \ int16x4_t __ret_341; \ int16x4_t __s0_341 = __p0_341; \ int16x8_t __s2_341 = __p2_341; \ __ret_341 = vset_lane_s16(vgetq_lane_s16(__s2_341, __p3_341), __s0_341, __p1_341); \ __ret_341; \ }) #else #define vcopy_laneq_s16(__p0_342, __p1_342, __p2_342, __p3_342) __extension__ ({ \ int16x4_t __ret_342; \ int16x4_t __s0_342 = __p0_342; \ int16x8_t __s2_342 = __p2_342; \ int16x4_t __rev0_342; __rev0_342 = __builtin_shufflevector(__s0_342, __s0_342, 3, 2, 1, 0); \ int16x8_t __rev2_342; __rev2_342 = __builtin_shufflevector(__s2_342, __s2_342, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_342 = __noswap_vset_lane_s16(__noswap_vgetq_lane_s16(__rev2_342, __p3_342), __rev0_342, __p1_342); \ __ret_342 = __builtin_shufflevector(__ret_342, __ret_342, 3, 2, 1, 0); \ __ret_342; \ }) #endif #define vcreate_p64(__p0) __extension__ ({ \ poly64x1_t __ret; \ uint64_t __promote = __p0; \ __ret = (poly64x1_t)(__promote); \ __ret; \ }) #define vcreate_f64(__p0) __extension__ ({ \ float64x1_t __ret; \ uint64_t __promote = __p0; \ __ret = (float64x1_t)(__promote); \ __ret; \ }) __ai float32_t vcvts_f32_s32(int32_t __p0) { float32_t __ret; __ret = (float32_t) __builtin_neon_vcvts_f32_s32(__p0); return __ret; } __ai float32_t vcvts_f32_u32(uint32_t __p0) { float32_t __ret; __ret = (float32_t) __builtin_neon_vcvts_f32_u32(__p0); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai float32x2_t vcvt_f32_f64(float64x2_t __p0) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vcvt_f32_f64((int8x16_t)__p0, 9); return __ret; } #else __ai float32x2_t vcvt_f32_f64(float64x2_t __p0) { float32x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (float32x2_t) __builtin_neon_vcvt_f32_f64((int8x16_t)__rev0, 9); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } __ai float32x2_t __noswap_vcvt_f32_f64(float64x2_t __p0) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vcvt_f32_f64((int8x16_t)__p0, 9); return __ret; } #endif __ai float64_t vcvtd_f64_s64(int64_t __p0) { float64_t __ret; __ret = (float64_t) __builtin_neon_vcvtd_f64_s64(__p0); return __ret; } __ai float64_t vcvtd_f64_u64(uint64_t __p0) { float64_t __ret; __ret = (float64_t) __builtin_neon_vcvtd_f64_u64(__p0); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai float64x2_t vcvtq_f64_u64(uint64x2_t __p0) { float64x2_t __ret; __ret = (float64x2_t) __builtin_neon_vcvtq_f64_v((int8x16_t)__p0, 51); return __ret; } #else __ai float64x2_t vcvtq_f64_u64(uint64x2_t __p0) { float64x2_t __ret; uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (float64x2_t) __builtin_neon_vcvtq_f64_v((int8x16_t)__rev0, 51); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float64x2_t vcvtq_f64_s64(int64x2_t __p0) { float64x2_t __ret; __ret = (float64x2_t) __builtin_neon_vcvtq_f64_v((int8x16_t)__p0, 35); return __ret; } #else __ai float64x2_t vcvtq_f64_s64(int64x2_t __p0) { float64x2_t __ret; int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (float64x2_t) __builtin_neon_vcvtq_f64_v((int8x16_t)__rev0, 35); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai float64x1_t vcvt_f64_u64(uint64x1_t __p0) { float64x1_t __ret; __ret = (float64x1_t) __builtin_neon_vcvt_f64_v((int8x8_t)__p0, 19); return __ret; } __ai float64x1_t vcvt_f64_s64(int64x1_t __p0) { float64x1_t __ret; __ret = (float64x1_t) __builtin_neon_vcvt_f64_v((int8x8_t)__p0, 3); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai float64x2_t vcvt_f64_f32(float32x2_t __p0) { float64x2_t __ret; __ret = (float64x2_t) __builtin_neon_vcvt_f64_f32((int8x8_t)__p0, 42); return __ret; } #else __ai float64x2_t vcvt_f64_f32(float32x2_t __p0) { float64x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (float64x2_t) __builtin_neon_vcvt_f64_f32((int8x8_t)__rev0, 42); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } __ai float64x2_t __noswap_vcvt_f64_f32(float32x2_t __p0) { float64x2_t __ret; __ret = (float64x2_t) __builtin_neon_vcvt_f64_f32((int8x8_t)__p0, 42); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float16x8_t vcvt_high_f16_f32(float16x4_t __p0, float32x4_t __p1) { float16x8_t __ret; __ret = vcombine_f16(__p0, vcvt_f16_f32(__p1)); return __ret; } #else __ai float16x8_t vcvt_high_f16_f32(float16x4_t __p0, float32x4_t __p1) { float16x8_t __ret; float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __noswap_vcombine_f16(__rev0, __noswap_vcvt_f16_f32(__rev1)); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x4_t vcvt_high_f32_f16(float16x8_t __p0) { float32x4_t __ret; __ret = vcvt_f32_f16(vget_high_f16(__p0)); return __ret; } #else __ai float32x4_t vcvt_high_f32_f16(float16x8_t __p0) { float32x4_t __ret; float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __noswap_vcvt_f32_f16(__noswap_vget_high_f16(__rev0)); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x4_t vcvt_high_f32_f64(float32x2_t __p0, float64x2_t __p1) { float32x4_t __ret; __ret = vcombine_f32(__p0, vcvt_f32_f64(__p1)); return __ret; } #else __ai float32x4_t vcvt_high_f32_f64(float32x2_t __p0, float64x2_t __p1) { float32x4_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __noswap_vcombine_f32(__rev0, __noswap_vcvt_f32_f64(__rev1)); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float64x2_t vcvt_high_f64_f32(float32x4_t __p0) { float64x2_t __ret; __ret = vcvt_f64_f32(vget_high_f32(__p0)); return __ret; } #else __ai float64x2_t vcvt_high_f64_f32(float32x4_t __p0) { float64x2_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = __noswap_vcvt_f64_f32(__noswap_vget_high_f32(__rev0)); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #define vcvts_n_f32_u32(__p0, __p1) __extension__ ({ \ float32_t __ret; \ uint32_t __s0 = __p0; \ __ret = (float32_t) __builtin_neon_vcvts_n_f32_u32(__s0, __p1); \ __ret; \ }) #define vcvts_n_f32_s32(__p0, __p1) __extension__ ({ \ float32_t __ret; \ int32_t __s0 = __p0; \ __ret = (float32_t) __builtin_neon_vcvts_n_f32_s32(__s0, __p1); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vcvtq_n_f64_u64(__p0, __p1) __extension__ ({ \ float64x2_t __ret; \ uint64x2_t __s0 = __p0; \ __ret = (float64x2_t) __builtin_neon_vcvtq_n_f64_v((int8x16_t)__s0, __p1, 51); \ __ret; \ }) #else #define vcvtq_n_f64_u64(__p0, __p1) __extension__ ({ \ float64x2_t __ret; \ uint64x2_t __s0 = __p0; \ uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ __ret = (float64x2_t) __builtin_neon_vcvtq_n_f64_v((int8x16_t)__rev0, __p1, 51); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcvtq_n_f64_s64(__p0, __p1) __extension__ ({ \ float64x2_t __ret; \ int64x2_t __s0 = __p0; \ __ret = (float64x2_t) __builtin_neon_vcvtq_n_f64_v((int8x16_t)__s0, __p1, 35); \ __ret; \ }) #else #define vcvtq_n_f64_s64(__p0, __p1) __extension__ ({ \ float64x2_t __ret; \ int64x2_t __s0 = __p0; \ int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ __ret = (float64x2_t) __builtin_neon_vcvtq_n_f64_v((int8x16_t)__rev0, __p1, 35); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #define vcvt_n_f64_u64(__p0, __p1) __extension__ ({ \ float64x1_t __ret; \ uint64x1_t __s0 = __p0; \ __ret = (float64x1_t) __builtin_neon_vcvt_n_f64_v((int8x8_t)__s0, __p1, 19); \ __ret; \ }) #define vcvt_n_f64_s64(__p0, __p1) __extension__ ({ \ float64x1_t __ret; \ int64x1_t __s0 = __p0; \ __ret = (float64x1_t) __builtin_neon_vcvt_n_f64_v((int8x8_t)__s0, __p1, 3); \ __ret; \ }) #define vcvtd_n_f64_u64(__p0, __p1) __extension__ ({ \ float64_t __ret; \ uint64_t __s0 = __p0; \ __ret = (float64_t) __builtin_neon_vcvtd_n_f64_u64(__s0, __p1); \ __ret; \ }) #define vcvtd_n_f64_s64(__p0, __p1) __extension__ ({ \ float64_t __ret; \ int64_t __s0 = __p0; \ __ret = (float64_t) __builtin_neon_vcvtd_n_f64_s64(__s0, __p1); \ __ret; \ }) #define vcvts_n_s32_f32(__p0, __p1) __extension__ ({ \ int32_t __ret; \ float32_t __s0 = __p0; \ __ret = (int32_t) __builtin_neon_vcvts_n_s32_f32(__s0, __p1); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vcvtq_n_s64_f64(__p0, __p1) __extension__ ({ \ int64x2_t __ret; \ float64x2_t __s0 = __p0; \ __ret = (int64x2_t) __builtin_neon_vcvtq_n_s64_v((int8x16_t)__s0, __p1, 35); \ __ret; \ }) #else #define vcvtq_n_s64_f64(__p0, __p1) __extension__ ({ \ int64x2_t __ret; \ float64x2_t __s0 = __p0; \ float64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ __ret = (int64x2_t) __builtin_neon_vcvtq_n_s64_v((int8x16_t)__rev0, __p1, 35); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #define vcvt_n_s64_f64(__p0, __p1) __extension__ ({ \ int64x1_t __ret; \ float64x1_t __s0 = __p0; \ __ret = (int64x1_t) __builtin_neon_vcvt_n_s64_v((int8x8_t)__s0, __p1, 3); \ __ret; \ }) #define vcvtd_n_s64_f64(__p0, __p1) __extension__ ({ \ int64_t __ret; \ float64_t __s0 = __p0; \ __ret = (int64_t) __builtin_neon_vcvtd_n_s64_f64(__s0, __p1); \ __ret; \ }) #define vcvts_n_u32_f32(__p0, __p1) __extension__ ({ \ uint32_t __ret; \ float32_t __s0 = __p0; \ __ret = (uint32_t) __builtin_neon_vcvts_n_u32_f32(__s0, __p1); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vcvtq_n_u64_f64(__p0, __p1) __extension__ ({ \ uint64x2_t __ret; \ float64x2_t __s0 = __p0; \ __ret = (uint64x2_t) __builtin_neon_vcvtq_n_u64_v((int8x16_t)__s0, __p1, 51); \ __ret; \ }) #else #define vcvtq_n_u64_f64(__p0, __p1) __extension__ ({ \ uint64x2_t __ret; \ float64x2_t __s0 = __p0; \ float64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ __ret = (uint64x2_t) __builtin_neon_vcvtq_n_u64_v((int8x16_t)__rev0, __p1, 51); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #define vcvt_n_u64_f64(__p0, __p1) __extension__ ({ \ uint64x1_t __ret; \ float64x1_t __s0 = __p0; \ __ret = (uint64x1_t) __builtin_neon_vcvt_n_u64_v((int8x8_t)__s0, __p1, 19); \ __ret; \ }) #define vcvtd_n_u64_f64(__p0, __p1) __extension__ ({ \ uint64_t __ret; \ float64_t __s0 = __p0; \ __ret = (uint64_t) __builtin_neon_vcvtd_n_u64_f64(__s0, __p1); \ __ret; \ }) __ai int32_t vcvts_s32_f32(float32_t __p0) { int32_t __ret; __ret = (int32_t) __builtin_neon_vcvts_s32_f32(__p0); return __ret; } __ai int64_t vcvtd_s64_f64(float64_t __p0) { int64_t __ret; __ret = (int64_t) __builtin_neon_vcvtd_s64_f64(__p0); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vcvtq_s64_f64(float64x2_t __p0) { int64x2_t __ret; __ret = (int64x2_t) __builtin_neon_vcvtq_s64_v((int8x16_t)__p0, 35); return __ret; } #else __ai int64x2_t vcvtq_s64_f64(float64x2_t __p0) { int64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (int64x2_t) __builtin_neon_vcvtq_s64_v((int8x16_t)__rev0, 35); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai int64x1_t vcvt_s64_f64(float64x1_t __p0) { int64x1_t __ret; __ret = (int64x1_t) __builtin_neon_vcvt_s64_v((int8x8_t)__p0, 3); return __ret; } __ai uint32_t vcvts_u32_f32(float32_t __p0) { uint32_t __ret; __ret = (uint32_t) __builtin_neon_vcvts_u32_f32(__p0); return __ret; } __ai uint64_t vcvtd_u64_f64(float64_t __p0) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vcvtd_u64_f64(__p0); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vcvtq_u64_f64(float64x2_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t) __builtin_neon_vcvtq_u64_v((int8x16_t)__p0, 51); return __ret; } #else __ai uint64x2_t vcvtq_u64_f64(float64x2_t __p0) { uint64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (uint64x2_t) __builtin_neon_vcvtq_u64_v((int8x16_t)__rev0, 51); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai uint64x1_t vcvt_u64_f64(float64x1_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vcvt_u64_v((int8x8_t)__p0, 19); return __ret; } __ai int32_t vcvtas_s32_f32(float32_t __p0) { int32_t __ret; __ret = (int32_t) __builtin_neon_vcvtas_s32_f32(__p0); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vcvtaq_s64_f64(float64x2_t __p0) { int64x2_t __ret; __ret = (int64x2_t) __builtin_neon_vcvtaq_s64_v((int8x16_t)__p0, 35); return __ret; } #else __ai int64x2_t vcvtaq_s64_f64(float64x2_t __p0) { int64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (int64x2_t) __builtin_neon_vcvtaq_s64_v((int8x16_t)__rev0, 35); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai int64x1_t vcvta_s64_f64(float64x1_t __p0) { int64x1_t __ret; __ret = (int64x1_t) __builtin_neon_vcvta_s64_v((int8x8_t)__p0, 3); return __ret; } __ai int64_t vcvtad_s64_f64(float64_t __p0) { int64_t __ret; __ret = (int64_t) __builtin_neon_vcvtad_s64_f64(__p0); return __ret; } __ai uint32_t vcvtas_u32_f32(float32_t __p0) { uint32_t __ret; __ret = (uint32_t) __builtin_neon_vcvtas_u32_f32(__p0); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vcvtaq_u64_f64(float64x2_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t) __builtin_neon_vcvtaq_u64_v((int8x16_t)__p0, 51); return __ret; } #else __ai uint64x2_t vcvtaq_u64_f64(float64x2_t __p0) { uint64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (uint64x2_t) __builtin_neon_vcvtaq_u64_v((int8x16_t)__rev0, 51); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai uint64x1_t vcvta_u64_f64(float64x1_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vcvta_u64_v((int8x8_t)__p0, 19); return __ret; } __ai uint64_t vcvtad_u64_f64(float64_t __p0) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vcvtad_u64_f64(__p0); return __ret; } __ai int32_t vcvtms_s32_f32(float32_t __p0) { int32_t __ret; __ret = (int32_t) __builtin_neon_vcvtms_s32_f32(__p0); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vcvtmq_s64_f64(float64x2_t __p0) { int64x2_t __ret; __ret = (int64x2_t) __builtin_neon_vcvtmq_s64_v((int8x16_t)__p0, 35); return __ret; } #else __ai int64x2_t vcvtmq_s64_f64(float64x2_t __p0) { int64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (int64x2_t) __builtin_neon_vcvtmq_s64_v((int8x16_t)__rev0, 35); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai int64x1_t vcvtm_s64_f64(float64x1_t __p0) { int64x1_t __ret; __ret = (int64x1_t) __builtin_neon_vcvtm_s64_v((int8x8_t)__p0, 3); return __ret; } __ai int64_t vcvtmd_s64_f64(float64_t __p0) { int64_t __ret; __ret = (int64_t) __builtin_neon_vcvtmd_s64_f64(__p0); return __ret; } __ai uint32_t vcvtms_u32_f32(float32_t __p0) { uint32_t __ret; __ret = (uint32_t) __builtin_neon_vcvtms_u32_f32(__p0); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vcvtmq_u64_f64(float64x2_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t) __builtin_neon_vcvtmq_u64_v((int8x16_t)__p0, 51); return __ret; } #else __ai uint64x2_t vcvtmq_u64_f64(float64x2_t __p0) { uint64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (uint64x2_t) __builtin_neon_vcvtmq_u64_v((int8x16_t)__rev0, 51); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai uint64x1_t vcvtm_u64_f64(float64x1_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vcvtm_u64_v((int8x8_t)__p0, 19); return __ret; } __ai uint64_t vcvtmd_u64_f64(float64_t __p0) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vcvtmd_u64_f64(__p0); return __ret; } __ai int32_t vcvtns_s32_f32(float32_t __p0) { int32_t __ret; __ret = (int32_t) __builtin_neon_vcvtns_s32_f32(__p0); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vcvtnq_s64_f64(float64x2_t __p0) { int64x2_t __ret; __ret = (int64x2_t) __builtin_neon_vcvtnq_s64_v((int8x16_t)__p0, 35); return __ret; } #else __ai int64x2_t vcvtnq_s64_f64(float64x2_t __p0) { int64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (int64x2_t) __builtin_neon_vcvtnq_s64_v((int8x16_t)__rev0, 35); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai int64x1_t vcvtn_s64_f64(float64x1_t __p0) { int64x1_t __ret; __ret = (int64x1_t) __builtin_neon_vcvtn_s64_v((int8x8_t)__p0, 3); return __ret; } __ai int64_t vcvtnd_s64_f64(float64_t __p0) { int64_t __ret; __ret = (int64_t) __builtin_neon_vcvtnd_s64_f64(__p0); return __ret; } __ai uint32_t vcvtns_u32_f32(float32_t __p0) { uint32_t __ret; __ret = (uint32_t) __builtin_neon_vcvtns_u32_f32(__p0); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vcvtnq_u64_f64(float64x2_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t) __builtin_neon_vcvtnq_u64_v((int8x16_t)__p0, 51); return __ret; } #else __ai uint64x2_t vcvtnq_u64_f64(float64x2_t __p0) { uint64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (uint64x2_t) __builtin_neon_vcvtnq_u64_v((int8x16_t)__rev0, 51); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai uint64x1_t vcvtn_u64_f64(float64x1_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vcvtn_u64_v((int8x8_t)__p0, 19); return __ret; } __ai uint64_t vcvtnd_u64_f64(float64_t __p0) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vcvtnd_u64_f64(__p0); return __ret; } __ai int32_t vcvtps_s32_f32(float32_t __p0) { int32_t __ret; __ret = (int32_t) __builtin_neon_vcvtps_s32_f32(__p0); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vcvtpq_s64_f64(float64x2_t __p0) { int64x2_t __ret; __ret = (int64x2_t) __builtin_neon_vcvtpq_s64_v((int8x16_t)__p0, 35); return __ret; } #else __ai int64x2_t vcvtpq_s64_f64(float64x2_t __p0) { int64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (int64x2_t) __builtin_neon_vcvtpq_s64_v((int8x16_t)__rev0, 35); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai int64x1_t vcvtp_s64_f64(float64x1_t __p0) { int64x1_t __ret; __ret = (int64x1_t) __builtin_neon_vcvtp_s64_v((int8x8_t)__p0, 3); return __ret; } __ai int64_t vcvtpd_s64_f64(float64_t __p0) { int64_t __ret; __ret = (int64_t) __builtin_neon_vcvtpd_s64_f64(__p0); return __ret; } __ai uint32_t vcvtps_u32_f32(float32_t __p0) { uint32_t __ret; __ret = (uint32_t) __builtin_neon_vcvtps_u32_f32(__p0); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vcvtpq_u64_f64(float64x2_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t) __builtin_neon_vcvtpq_u64_v((int8x16_t)__p0, 51); return __ret; } #else __ai uint64x2_t vcvtpq_u64_f64(float64x2_t __p0) { uint64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (uint64x2_t) __builtin_neon_vcvtpq_u64_v((int8x16_t)__rev0, 51); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai uint64x1_t vcvtp_u64_f64(float64x1_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vcvtp_u64_v((int8x8_t)__p0, 19); return __ret; } __ai uint64_t vcvtpd_u64_f64(float64_t __p0) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vcvtpd_u64_f64(__p0); return __ret; } __ai float32_t vcvtxd_f32_f64(float64_t __p0) { float32_t __ret; __ret = (float32_t) __builtin_neon_vcvtxd_f32_f64(__p0); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai float32x2_t vcvtx_f32_f64(float64x2_t __p0) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vcvtx_f32_v((int8x16_t)__p0, 42); return __ret; } #else __ai float32x2_t vcvtx_f32_f64(float64x2_t __p0) { float32x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (float32x2_t) __builtin_neon_vcvtx_f32_v((int8x16_t)__rev0, 42); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } __ai float32x2_t __noswap_vcvtx_f32_f64(float64x2_t __p0) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vcvtx_f32_v((int8x16_t)__p0, 42); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x4_t vcvtx_high_f32_f64(float32x2_t __p0, float64x2_t __p1) { float32x4_t __ret; __ret = vcombine_f32(__p0, vcvtx_f32_f64(__p1)); return __ret; } #else __ai float32x4_t vcvtx_high_f32_f64(float32x2_t __p0, float64x2_t __p1) { float32x4_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __noswap_vcombine_f32(__rev0, __noswap_vcvtx_f32_f64(__rev1)); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float64x2_t vdivq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; __ret = __p0 / __p1; return __ret; } #else __ai float64x2_t vdivq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __rev0 / __rev1; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x4_t vdivq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; __ret = __p0 / __p1; return __ret; } #else __ai float32x4_t vdivq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __rev0 / __rev1; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif __ai float64x1_t vdiv_f64(float64x1_t __p0, float64x1_t __p1) { float64x1_t __ret; __ret = __p0 / __p1; return __ret; } #ifdef __LITTLE_ENDIAN__ __ai float32x2_t vdiv_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; __ret = __p0 / __p1; return __ret; } #else __ai float32x2_t vdiv_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __rev0 / __rev1; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ #define vdupb_lane_p8(__p0, __p1) __extension__ ({ \ poly8_t __ret; \ poly8x8_t __s0 = __p0; \ __ret = (poly8_t) __builtin_neon_vdupb_lane_i8((poly8x8_t)__s0, __p1); \ __ret; \ }) #else #define vdupb_lane_p8(__p0, __p1) __extension__ ({ \ poly8_t __ret; \ poly8x8_t __s0 = __p0; \ poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (poly8_t) __builtin_neon_vdupb_lane_i8((poly8x8_t)__rev0, __p1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vduph_lane_p16(__p0, __p1) __extension__ ({ \ poly16_t __ret; \ poly16x4_t __s0 = __p0; \ __ret = (poly16_t) __builtin_neon_vduph_lane_i16((poly16x4_t)__s0, __p1); \ __ret; \ }) #else #define vduph_lane_p16(__p0, __p1) __extension__ ({ \ poly16_t __ret; \ poly16x4_t __s0 = __p0; \ poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = (poly16_t) __builtin_neon_vduph_lane_i16((poly16x4_t)__rev0, __p1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vdupb_lane_u8(__p0, __p1) __extension__ ({ \ uint8_t __ret; \ uint8x8_t __s0 = __p0; \ __ret = (uint8_t) __builtin_neon_vdupb_lane_i8((int8x8_t)__s0, __p1); \ __ret; \ }) #else #define vdupb_lane_u8(__p0, __p1) __extension__ ({ \ uint8_t __ret; \ uint8x8_t __s0 = __p0; \ uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (uint8_t) __builtin_neon_vdupb_lane_i8((int8x8_t)__rev0, __p1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vdups_lane_u32(__p0, __p1) __extension__ ({ \ uint32_t __ret; \ uint32x2_t __s0 = __p0; \ __ret = (uint32_t) __builtin_neon_vdups_lane_i32((int32x2_t)__s0, __p1); \ __ret; \ }) #else #define vdups_lane_u32(__p0, __p1) __extension__ ({ \ uint32_t __ret; \ uint32x2_t __s0 = __p0; \ uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ __ret = (uint32_t) __builtin_neon_vdups_lane_i32((int32x2_t)__rev0, __p1); \ __ret; \ }) #endif #define vdupd_lane_u64(__p0, __p1) __extension__ ({ \ uint64_t __ret; \ uint64x1_t __s0 = __p0; \ __ret = (uint64_t) __builtin_neon_vdupd_lane_i64((int64x1_t)__s0, __p1); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vduph_lane_u16(__p0, __p1) __extension__ ({ \ uint16_t __ret; \ uint16x4_t __s0 = __p0; \ __ret = (uint16_t) __builtin_neon_vduph_lane_i16((int16x4_t)__s0, __p1); \ __ret; \ }) #else #define vduph_lane_u16(__p0, __p1) __extension__ ({ \ uint16_t __ret; \ uint16x4_t __s0 = __p0; \ uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = (uint16_t) __builtin_neon_vduph_lane_i16((int16x4_t)__rev0, __p1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vdupb_lane_s8(__p0, __p1) __extension__ ({ \ int8_t __ret; \ int8x8_t __s0 = __p0; \ __ret = (int8_t) __builtin_neon_vdupb_lane_i8((int8x8_t)__s0, __p1); \ __ret; \ }) #else #define vdupb_lane_s8(__p0, __p1) __extension__ ({ \ int8_t __ret; \ int8x8_t __s0 = __p0; \ int8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (int8_t) __builtin_neon_vdupb_lane_i8((int8x8_t)__rev0, __p1); \ __ret; \ }) #endif #define vdupd_lane_f64(__p0, __p1) __extension__ ({ \ float64_t __ret; \ float64x1_t __s0 = __p0; \ __ret = (float64_t) __builtin_neon_vdupd_lane_f64((float64x1_t)__s0, __p1); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vdups_lane_f32(__p0, __p1) __extension__ ({ \ float32_t __ret; \ float32x2_t __s0 = __p0; \ __ret = (float32_t) __builtin_neon_vdups_lane_f32((float32x2_t)__s0, __p1); \ __ret; \ }) #else #define vdups_lane_f32(__p0, __p1) __extension__ ({ \ float32_t __ret; \ float32x2_t __s0 = __p0; \ float32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ __ret = (float32_t) __builtin_neon_vdups_lane_f32((float32x2_t)__rev0, __p1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vdups_lane_s32(__p0, __p1) __extension__ ({ \ int32_t __ret; \ int32x2_t __s0 = __p0; \ __ret = (int32_t) __builtin_neon_vdups_lane_i32((int32x2_t)__s0, __p1); \ __ret; \ }) #else #define vdups_lane_s32(__p0, __p1) __extension__ ({ \ int32_t __ret; \ int32x2_t __s0 = __p0; \ int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ __ret = (int32_t) __builtin_neon_vdups_lane_i32((int32x2_t)__rev0, __p1); \ __ret; \ }) #endif #define vdupd_lane_s64(__p0, __p1) __extension__ ({ \ int64_t __ret; \ int64x1_t __s0 = __p0; \ __ret = (int64_t) __builtin_neon_vdupd_lane_i64((int64x1_t)__s0, __p1); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vduph_lane_s16(__p0, __p1) __extension__ ({ \ int16_t __ret; \ int16x4_t __s0 = __p0; \ __ret = (int16_t) __builtin_neon_vduph_lane_i16((int16x4_t)__s0, __p1); \ __ret; \ }) #else #define vduph_lane_s16(__p0, __p1) __extension__ ({ \ int16_t __ret; \ int16x4_t __s0 = __p0; \ int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = (int16_t) __builtin_neon_vduph_lane_i16((int16x4_t)__rev0, __p1); \ __ret; \ }) #endif #define vdup_lane_p64(__p0_343, __p1_343) __extension__ ({ \ poly64x1_t __ret_343; \ poly64x1_t __s0_343 = __p0_343; \ __ret_343 = splat_lane_p64(__s0_343, __p1_343); \ __ret_343; \ }) #ifdef __LITTLE_ENDIAN__ #define vdupq_lane_p64(__p0_344, __p1_344) __extension__ ({ \ poly64x2_t __ret_344; \ poly64x1_t __s0_344 = __p0_344; \ __ret_344 = splatq_lane_p64(__s0_344, __p1_344); \ __ret_344; \ }) #else #define vdupq_lane_p64(__p0_345, __p1_345) __extension__ ({ \ poly64x2_t __ret_345; \ poly64x1_t __s0_345 = __p0_345; \ __ret_345 = __noswap_splatq_lane_p64(__s0_345, __p1_345); \ __ret_345 = __builtin_shufflevector(__ret_345, __ret_345, 1, 0); \ __ret_345; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vdupq_lane_f64(__p0_346, __p1_346) __extension__ ({ \ float64x2_t __ret_346; \ float64x1_t __s0_346 = __p0_346; \ __ret_346 = splatq_lane_f64(__s0_346, __p1_346); \ __ret_346; \ }) #else #define vdupq_lane_f64(__p0_347, __p1_347) __extension__ ({ \ float64x2_t __ret_347; \ float64x1_t __s0_347 = __p0_347; \ __ret_347 = __noswap_splatq_lane_f64(__s0_347, __p1_347); \ __ret_347 = __builtin_shufflevector(__ret_347, __ret_347, 1, 0); \ __ret_347; \ }) #endif #define vdup_lane_f64(__p0_348, __p1_348) __extension__ ({ \ float64x1_t __ret_348; \ float64x1_t __s0_348 = __p0_348; \ __ret_348 = splat_lane_f64(__s0_348, __p1_348); \ __ret_348; \ }) #ifdef __LITTLE_ENDIAN__ #define vdupb_laneq_p8(__p0, __p1) __extension__ ({ \ poly8_t __ret; \ poly8x16_t __s0 = __p0; \ __ret = (poly8_t) __builtin_neon_vdupb_laneq_i8((poly8x16_t)__s0, __p1); \ __ret; \ }) #else #define vdupb_laneq_p8(__p0, __p1) __extension__ ({ \ poly8_t __ret; \ poly8x16_t __s0 = __p0; \ poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (poly8_t) __builtin_neon_vdupb_laneq_i8((poly8x16_t)__rev0, __p1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vduph_laneq_p16(__p0, __p1) __extension__ ({ \ poly16_t __ret; \ poly16x8_t __s0 = __p0; \ __ret = (poly16_t) __builtin_neon_vduph_laneq_i16((poly16x8_t)__s0, __p1); \ __ret; \ }) #else #define vduph_laneq_p16(__p0, __p1) __extension__ ({ \ poly16_t __ret; \ poly16x8_t __s0 = __p0; \ poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (poly16_t) __builtin_neon_vduph_laneq_i16((poly16x8_t)__rev0, __p1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vdupb_laneq_u8(__p0, __p1) __extension__ ({ \ uint8_t __ret; \ uint8x16_t __s0 = __p0; \ __ret = (uint8_t) __builtin_neon_vdupb_laneq_i8((int8x16_t)__s0, __p1); \ __ret; \ }) #else #define vdupb_laneq_u8(__p0, __p1) __extension__ ({ \ uint8_t __ret; \ uint8x16_t __s0 = __p0; \ uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (uint8_t) __builtin_neon_vdupb_laneq_i8((int8x16_t)__rev0, __p1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vdups_laneq_u32(__p0, __p1) __extension__ ({ \ uint32_t __ret; \ uint32x4_t __s0 = __p0; \ __ret = (uint32_t) __builtin_neon_vdups_laneq_i32((int32x4_t)__s0, __p1); \ __ret; \ }) #else #define vdups_laneq_u32(__p0, __p1) __extension__ ({ \ uint32_t __ret; \ uint32x4_t __s0 = __p0; \ uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = (uint32_t) __builtin_neon_vdups_laneq_i32((int32x4_t)__rev0, __p1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vdupd_laneq_u64(__p0, __p1) __extension__ ({ \ uint64_t __ret; \ uint64x2_t __s0 = __p0; \ __ret = (uint64_t) __builtin_neon_vdupd_laneq_i64((int64x2_t)__s0, __p1); \ __ret; \ }) #else #define vdupd_laneq_u64(__p0, __p1) __extension__ ({ \ uint64_t __ret; \ uint64x2_t __s0 = __p0; \ uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ __ret = (uint64_t) __builtin_neon_vdupd_laneq_i64((int64x2_t)__rev0, __p1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vduph_laneq_u16(__p0, __p1) __extension__ ({ \ uint16_t __ret; \ uint16x8_t __s0 = __p0; \ __ret = (uint16_t) __builtin_neon_vduph_laneq_i16((int16x8_t)__s0, __p1); \ __ret; \ }) #else #define vduph_laneq_u16(__p0, __p1) __extension__ ({ \ uint16_t __ret; \ uint16x8_t __s0 = __p0; \ uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (uint16_t) __builtin_neon_vduph_laneq_i16((int16x8_t)__rev0, __p1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vdupb_laneq_s8(__p0, __p1) __extension__ ({ \ int8_t __ret; \ int8x16_t __s0 = __p0; \ __ret = (int8_t) __builtin_neon_vdupb_laneq_i8((int8x16_t)__s0, __p1); \ __ret; \ }) #else #define vdupb_laneq_s8(__p0, __p1) __extension__ ({ \ int8_t __ret; \ int8x16_t __s0 = __p0; \ int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (int8_t) __builtin_neon_vdupb_laneq_i8((int8x16_t)__rev0, __p1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vdupd_laneq_f64(__p0, __p1) __extension__ ({ \ float64_t __ret; \ float64x2_t __s0 = __p0; \ __ret = (float64_t) __builtin_neon_vdupd_laneq_f64((float64x2_t)__s0, __p1); \ __ret; \ }) #else #define vdupd_laneq_f64(__p0, __p1) __extension__ ({ \ float64_t __ret; \ float64x2_t __s0 = __p0; \ float64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ __ret = (float64_t) __builtin_neon_vdupd_laneq_f64((float64x2_t)__rev0, __p1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vdups_laneq_f32(__p0, __p1) __extension__ ({ \ float32_t __ret; \ float32x4_t __s0 = __p0; \ __ret = (float32_t) __builtin_neon_vdups_laneq_f32((float32x4_t)__s0, __p1); \ __ret; \ }) #else #define vdups_laneq_f32(__p0, __p1) __extension__ ({ \ float32_t __ret; \ float32x4_t __s0 = __p0; \ float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = (float32_t) __builtin_neon_vdups_laneq_f32((float32x4_t)__rev0, __p1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vdups_laneq_s32(__p0, __p1) __extension__ ({ \ int32_t __ret; \ int32x4_t __s0 = __p0; \ __ret = (int32_t) __builtin_neon_vdups_laneq_i32((int32x4_t)__s0, __p1); \ __ret; \ }) #else #define vdups_laneq_s32(__p0, __p1) __extension__ ({ \ int32_t __ret; \ int32x4_t __s0 = __p0; \ int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = (int32_t) __builtin_neon_vdups_laneq_i32((int32x4_t)__rev0, __p1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vdupd_laneq_s64(__p0, __p1) __extension__ ({ \ int64_t __ret; \ int64x2_t __s0 = __p0; \ __ret = (int64_t) __builtin_neon_vdupd_laneq_i64((int64x2_t)__s0, __p1); \ __ret; \ }) #else #define vdupd_laneq_s64(__p0, __p1) __extension__ ({ \ int64_t __ret; \ int64x2_t __s0 = __p0; \ int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ __ret = (int64_t) __builtin_neon_vdupd_laneq_i64((int64x2_t)__rev0, __p1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vduph_laneq_s16(__p0, __p1) __extension__ ({ \ int16_t __ret; \ int16x8_t __s0 = __p0; \ __ret = (int16_t) __builtin_neon_vduph_laneq_i16((int16x8_t)__s0, __p1); \ __ret; \ }) #else #define vduph_laneq_s16(__p0, __p1) __extension__ ({ \ int16_t __ret; \ int16x8_t __s0 = __p0; \ int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (int16_t) __builtin_neon_vduph_laneq_i16((int16x8_t)__rev0, __p1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vdup_laneq_p8(__p0_349, __p1_349) __extension__ ({ \ poly8x8_t __ret_349; \ poly8x16_t __s0_349 = __p0_349; \ __ret_349 = splat_laneq_p8(__s0_349, __p1_349); \ __ret_349; \ }) #else #define vdup_laneq_p8(__p0_350, __p1_350) __extension__ ({ \ poly8x8_t __ret_350; \ poly8x16_t __s0_350 = __p0_350; \ poly8x16_t __rev0_350; __rev0_350 = __builtin_shufflevector(__s0_350, __s0_350, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_350 = __noswap_splat_laneq_p8(__rev0_350, __p1_350); \ __ret_350 = __builtin_shufflevector(__ret_350, __ret_350, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_350; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vdup_laneq_p64(__p0_351, __p1_351) __extension__ ({ \ poly64x1_t __ret_351; \ poly64x2_t __s0_351 = __p0_351; \ __ret_351 = splat_laneq_p64(__s0_351, __p1_351); \ __ret_351; \ }) #else #define vdup_laneq_p64(__p0_352, __p1_352) __extension__ ({ \ poly64x1_t __ret_352; \ poly64x2_t __s0_352 = __p0_352; \ poly64x2_t __rev0_352; __rev0_352 = __builtin_shufflevector(__s0_352, __s0_352, 1, 0); \ __ret_352 = __noswap_splat_laneq_p64(__rev0_352, __p1_352); \ __ret_352; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vdup_laneq_p16(__p0_353, __p1_353) __extension__ ({ \ poly16x4_t __ret_353; \ poly16x8_t __s0_353 = __p0_353; \ __ret_353 = splat_laneq_p16(__s0_353, __p1_353); \ __ret_353; \ }) #else #define vdup_laneq_p16(__p0_354, __p1_354) __extension__ ({ \ poly16x4_t __ret_354; \ poly16x8_t __s0_354 = __p0_354; \ poly16x8_t __rev0_354; __rev0_354 = __builtin_shufflevector(__s0_354, __s0_354, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_354 = __noswap_splat_laneq_p16(__rev0_354, __p1_354); \ __ret_354 = __builtin_shufflevector(__ret_354, __ret_354, 3, 2, 1, 0); \ __ret_354; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vdupq_laneq_p8(__p0_355, __p1_355) __extension__ ({ \ poly8x16_t __ret_355; \ poly8x16_t __s0_355 = __p0_355; \ __ret_355 = splatq_laneq_p8(__s0_355, __p1_355); \ __ret_355; \ }) #else #define vdupq_laneq_p8(__p0_356, __p1_356) __extension__ ({ \ poly8x16_t __ret_356; \ poly8x16_t __s0_356 = __p0_356; \ poly8x16_t __rev0_356; __rev0_356 = __builtin_shufflevector(__s0_356, __s0_356, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_356 = __noswap_splatq_laneq_p8(__rev0_356, __p1_356); \ __ret_356 = __builtin_shufflevector(__ret_356, __ret_356, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_356; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vdupq_laneq_p64(__p0_357, __p1_357) __extension__ ({ \ poly64x2_t __ret_357; \ poly64x2_t __s0_357 = __p0_357; \ __ret_357 = splatq_laneq_p64(__s0_357, __p1_357); \ __ret_357; \ }) #else #define vdupq_laneq_p64(__p0_358, __p1_358) __extension__ ({ \ poly64x2_t __ret_358; \ poly64x2_t __s0_358 = __p0_358; \ poly64x2_t __rev0_358; __rev0_358 = __builtin_shufflevector(__s0_358, __s0_358, 1, 0); \ __ret_358 = __noswap_splatq_laneq_p64(__rev0_358, __p1_358); \ __ret_358 = __builtin_shufflevector(__ret_358, __ret_358, 1, 0); \ __ret_358; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vdupq_laneq_p16(__p0_359, __p1_359) __extension__ ({ \ poly16x8_t __ret_359; \ poly16x8_t __s0_359 = __p0_359; \ __ret_359 = splatq_laneq_p16(__s0_359, __p1_359); \ __ret_359; \ }) #else #define vdupq_laneq_p16(__p0_360, __p1_360) __extension__ ({ \ poly16x8_t __ret_360; \ poly16x8_t __s0_360 = __p0_360; \ poly16x8_t __rev0_360; __rev0_360 = __builtin_shufflevector(__s0_360, __s0_360, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_360 = __noswap_splatq_laneq_p16(__rev0_360, __p1_360); \ __ret_360 = __builtin_shufflevector(__ret_360, __ret_360, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_360; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vdupq_laneq_u8(__p0_361, __p1_361) __extension__ ({ \ uint8x16_t __ret_361; \ uint8x16_t __s0_361 = __p0_361; \ __ret_361 = splatq_laneq_u8(__s0_361, __p1_361); \ __ret_361; \ }) #else #define vdupq_laneq_u8(__p0_362, __p1_362) __extension__ ({ \ uint8x16_t __ret_362; \ uint8x16_t __s0_362 = __p0_362; \ uint8x16_t __rev0_362; __rev0_362 = __builtin_shufflevector(__s0_362, __s0_362, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_362 = __noswap_splatq_laneq_u8(__rev0_362, __p1_362); \ __ret_362 = __builtin_shufflevector(__ret_362, __ret_362, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_362; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vdupq_laneq_u32(__p0_363, __p1_363) __extension__ ({ \ uint32x4_t __ret_363; \ uint32x4_t __s0_363 = __p0_363; \ __ret_363 = splatq_laneq_u32(__s0_363, __p1_363); \ __ret_363; \ }) #else #define vdupq_laneq_u32(__p0_364, __p1_364) __extension__ ({ \ uint32x4_t __ret_364; \ uint32x4_t __s0_364 = __p0_364; \ uint32x4_t __rev0_364; __rev0_364 = __builtin_shufflevector(__s0_364, __s0_364, 3, 2, 1, 0); \ __ret_364 = __noswap_splatq_laneq_u32(__rev0_364, __p1_364); \ __ret_364 = __builtin_shufflevector(__ret_364, __ret_364, 3, 2, 1, 0); \ __ret_364; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vdupq_laneq_u64(__p0_365, __p1_365) __extension__ ({ \ uint64x2_t __ret_365; \ uint64x2_t __s0_365 = __p0_365; \ __ret_365 = splatq_laneq_u64(__s0_365, __p1_365); \ __ret_365; \ }) #else #define vdupq_laneq_u64(__p0_366, __p1_366) __extension__ ({ \ uint64x2_t __ret_366; \ uint64x2_t __s0_366 = __p0_366; \ uint64x2_t __rev0_366; __rev0_366 = __builtin_shufflevector(__s0_366, __s0_366, 1, 0); \ __ret_366 = __noswap_splatq_laneq_u64(__rev0_366, __p1_366); \ __ret_366 = __builtin_shufflevector(__ret_366, __ret_366, 1, 0); \ __ret_366; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vdupq_laneq_u16(__p0_367, __p1_367) __extension__ ({ \ uint16x8_t __ret_367; \ uint16x8_t __s0_367 = __p0_367; \ __ret_367 = splatq_laneq_u16(__s0_367, __p1_367); \ __ret_367; \ }) #else #define vdupq_laneq_u16(__p0_368, __p1_368) __extension__ ({ \ uint16x8_t __ret_368; \ uint16x8_t __s0_368 = __p0_368; \ uint16x8_t __rev0_368; __rev0_368 = __builtin_shufflevector(__s0_368, __s0_368, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_368 = __noswap_splatq_laneq_u16(__rev0_368, __p1_368); \ __ret_368 = __builtin_shufflevector(__ret_368, __ret_368, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_368; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vdupq_laneq_s8(__p0_369, __p1_369) __extension__ ({ \ int8x16_t __ret_369; \ int8x16_t __s0_369 = __p0_369; \ __ret_369 = splatq_laneq_s8(__s0_369, __p1_369); \ __ret_369; \ }) #else #define vdupq_laneq_s8(__p0_370, __p1_370) __extension__ ({ \ int8x16_t __ret_370; \ int8x16_t __s0_370 = __p0_370; \ int8x16_t __rev0_370; __rev0_370 = __builtin_shufflevector(__s0_370, __s0_370, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_370 = __noswap_splatq_laneq_s8(__rev0_370, __p1_370); \ __ret_370 = __builtin_shufflevector(__ret_370, __ret_370, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_370; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vdupq_laneq_f64(__p0_371, __p1_371) __extension__ ({ \ float64x2_t __ret_371; \ float64x2_t __s0_371 = __p0_371; \ __ret_371 = splatq_laneq_f64(__s0_371, __p1_371); \ __ret_371; \ }) #else #define vdupq_laneq_f64(__p0_372, __p1_372) __extension__ ({ \ float64x2_t __ret_372; \ float64x2_t __s0_372 = __p0_372; \ float64x2_t __rev0_372; __rev0_372 = __builtin_shufflevector(__s0_372, __s0_372, 1, 0); \ __ret_372 = __noswap_splatq_laneq_f64(__rev0_372, __p1_372); \ __ret_372 = __builtin_shufflevector(__ret_372, __ret_372, 1, 0); \ __ret_372; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vdupq_laneq_f32(__p0_373, __p1_373) __extension__ ({ \ float32x4_t __ret_373; \ float32x4_t __s0_373 = __p0_373; \ __ret_373 = splatq_laneq_f32(__s0_373, __p1_373); \ __ret_373; \ }) #else #define vdupq_laneq_f32(__p0_374, __p1_374) __extension__ ({ \ float32x4_t __ret_374; \ float32x4_t __s0_374 = __p0_374; \ float32x4_t __rev0_374; __rev0_374 = __builtin_shufflevector(__s0_374, __s0_374, 3, 2, 1, 0); \ __ret_374 = __noswap_splatq_laneq_f32(__rev0_374, __p1_374); \ __ret_374 = __builtin_shufflevector(__ret_374, __ret_374, 3, 2, 1, 0); \ __ret_374; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vdupq_laneq_f16(__p0_375, __p1_375) __extension__ ({ \ float16x8_t __ret_375; \ float16x8_t __s0_375 = __p0_375; \ __ret_375 = splatq_laneq_f16(__s0_375, __p1_375); \ __ret_375; \ }) #else #define vdupq_laneq_f16(__p0_376, __p1_376) __extension__ ({ \ float16x8_t __ret_376; \ float16x8_t __s0_376 = __p0_376; \ float16x8_t __rev0_376; __rev0_376 = __builtin_shufflevector(__s0_376, __s0_376, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_376 = __noswap_splatq_laneq_f16(__rev0_376, __p1_376); \ __ret_376 = __builtin_shufflevector(__ret_376, __ret_376, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_376; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vdupq_laneq_s32(__p0_377, __p1_377) __extension__ ({ \ int32x4_t __ret_377; \ int32x4_t __s0_377 = __p0_377; \ __ret_377 = splatq_laneq_s32(__s0_377, __p1_377); \ __ret_377; \ }) #else #define vdupq_laneq_s32(__p0_378, __p1_378) __extension__ ({ \ int32x4_t __ret_378; \ int32x4_t __s0_378 = __p0_378; \ int32x4_t __rev0_378; __rev0_378 = __builtin_shufflevector(__s0_378, __s0_378, 3, 2, 1, 0); \ __ret_378 = __noswap_splatq_laneq_s32(__rev0_378, __p1_378); \ __ret_378 = __builtin_shufflevector(__ret_378, __ret_378, 3, 2, 1, 0); \ __ret_378; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vdupq_laneq_s64(__p0_379, __p1_379) __extension__ ({ \ int64x2_t __ret_379; \ int64x2_t __s0_379 = __p0_379; \ __ret_379 = splatq_laneq_s64(__s0_379, __p1_379); \ __ret_379; \ }) #else #define vdupq_laneq_s64(__p0_380, __p1_380) __extension__ ({ \ int64x2_t __ret_380; \ int64x2_t __s0_380 = __p0_380; \ int64x2_t __rev0_380; __rev0_380 = __builtin_shufflevector(__s0_380, __s0_380, 1, 0); \ __ret_380 = __noswap_splatq_laneq_s64(__rev0_380, __p1_380); \ __ret_380 = __builtin_shufflevector(__ret_380, __ret_380, 1, 0); \ __ret_380; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vdupq_laneq_s16(__p0_381, __p1_381) __extension__ ({ \ int16x8_t __ret_381; \ int16x8_t __s0_381 = __p0_381; \ __ret_381 = splatq_laneq_s16(__s0_381, __p1_381); \ __ret_381; \ }) #else #define vdupq_laneq_s16(__p0_382, __p1_382) __extension__ ({ \ int16x8_t __ret_382; \ int16x8_t __s0_382 = __p0_382; \ int16x8_t __rev0_382; __rev0_382 = __builtin_shufflevector(__s0_382, __s0_382, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_382 = __noswap_splatq_laneq_s16(__rev0_382, __p1_382); \ __ret_382 = __builtin_shufflevector(__ret_382, __ret_382, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_382; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vdup_laneq_u8(__p0_383, __p1_383) __extension__ ({ \ uint8x8_t __ret_383; \ uint8x16_t __s0_383 = __p0_383; \ __ret_383 = splat_laneq_u8(__s0_383, __p1_383); \ __ret_383; \ }) #else #define vdup_laneq_u8(__p0_384, __p1_384) __extension__ ({ \ uint8x8_t __ret_384; \ uint8x16_t __s0_384 = __p0_384; \ uint8x16_t __rev0_384; __rev0_384 = __builtin_shufflevector(__s0_384, __s0_384, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_384 = __noswap_splat_laneq_u8(__rev0_384, __p1_384); \ __ret_384 = __builtin_shufflevector(__ret_384, __ret_384, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_384; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vdup_laneq_u32(__p0_385, __p1_385) __extension__ ({ \ uint32x2_t __ret_385; \ uint32x4_t __s0_385 = __p0_385; \ __ret_385 = splat_laneq_u32(__s0_385, __p1_385); \ __ret_385; \ }) #else #define vdup_laneq_u32(__p0_386, __p1_386) __extension__ ({ \ uint32x2_t __ret_386; \ uint32x4_t __s0_386 = __p0_386; \ uint32x4_t __rev0_386; __rev0_386 = __builtin_shufflevector(__s0_386, __s0_386, 3, 2, 1, 0); \ __ret_386 = __noswap_splat_laneq_u32(__rev0_386, __p1_386); \ __ret_386 = __builtin_shufflevector(__ret_386, __ret_386, 1, 0); \ __ret_386; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vdup_laneq_u64(__p0_387, __p1_387) __extension__ ({ \ uint64x1_t __ret_387; \ uint64x2_t __s0_387 = __p0_387; \ __ret_387 = splat_laneq_u64(__s0_387, __p1_387); \ __ret_387; \ }) #else #define vdup_laneq_u64(__p0_388, __p1_388) __extension__ ({ \ uint64x1_t __ret_388; \ uint64x2_t __s0_388 = __p0_388; \ uint64x2_t __rev0_388; __rev0_388 = __builtin_shufflevector(__s0_388, __s0_388, 1, 0); \ __ret_388 = __noswap_splat_laneq_u64(__rev0_388, __p1_388); \ __ret_388; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vdup_laneq_u16(__p0_389, __p1_389) __extension__ ({ \ uint16x4_t __ret_389; \ uint16x8_t __s0_389 = __p0_389; \ __ret_389 = splat_laneq_u16(__s0_389, __p1_389); \ __ret_389; \ }) #else #define vdup_laneq_u16(__p0_390, __p1_390) __extension__ ({ \ uint16x4_t __ret_390; \ uint16x8_t __s0_390 = __p0_390; \ uint16x8_t __rev0_390; __rev0_390 = __builtin_shufflevector(__s0_390, __s0_390, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_390 = __noswap_splat_laneq_u16(__rev0_390, __p1_390); \ __ret_390 = __builtin_shufflevector(__ret_390, __ret_390, 3, 2, 1, 0); \ __ret_390; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vdup_laneq_s8(__p0_391, __p1_391) __extension__ ({ \ int8x8_t __ret_391; \ int8x16_t __s0_391 = __p0_391; \ __ret_391 = splat_laneq_s8(__s0_391, __p1_391); \ __ret_391; \ }) #else #define vdup_laneq_s8(__p0_392, __p1_392) __extension__ ({ \ int8x8_t __ret_392; \ int8x16_t __s0_392 = __p0_392; \ int8x16_t __rev0_392; __rev0_392 = __builtin_shufflevector(__s0_392, __s0_392, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_392 = __noswap_splat_laneq_s8(__rev0_392, __p1_392); \ __ret_392 = __builtin_shufflevector(__ret_392, __ret_392, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_392; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vdup_laneq_f64(__p0_393, __p1_393) __extension__ ({ \ float64x1_t __ret_393; \ float64x2_t __s0_393 = __p0_393; \ __ret_393 = splat_laneq_f64(__s0_393, __p1_393); \ __ret_393; \ }) #else #define vdup_laneq_f64(__p0_394, __p1_394) __extension__ ({ \ float64x1_t __ret_394; \ float64x2_t __s0_394 = __p0_394; \ float64x2_t __rev0_394; __rev0_394 = __builtin_shufflevector(__s0_394, __s0_394, 1, 0); \ __ret_394 = __noswap_splat_laneq_f64(__rev0_394, __p1_394); \ __ret_394; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vdup_laneq_f32(__p0_395, __p1_395) __extension__ ({ \ float32x2_t __ret_395; \ float32x4_t __s0_395 = __p0_395; \ __ret_395 = splat_laneq_f32(__s0_395, __p1_395); \ __ret_395; \ }) #else #define vdup_laneq_f32(__p0_396, __p1_396) __extension__ ({ \ float32x2_t __ret_396; \ float32x4_t __s0_396 = __p0_396; \ float32x4_t __rev0_396; __rev0_396 = __builtin_shufflevector(__s0_396, __s0_396, 3, 2, 1, 0); \ __ret_396 = __noswap_splat_laneq_f32(__rev0_396, __p1_396); \ __ret_396 = __builtin_shufflevector(__ret_396, __ret_396, 1, 0); \ __ret_396; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vdup_laneq_f16(__p0_397, __p1_397) __extension__ ({ \ float16x4_t __ret_397; \ float16x8_t __s0_397 = __p0_397; \ __ret_397 = splat_laneq_f16(__s0_397, __p1_397); \ __ret_397; \ }) #else #define vdup_laneq_f16(__p0_398, __p1_398) __extension__ ({ \ float16x4_t __ret_398; \ float16x8_t __s0_398 = __p0_398; \ float16x8_t __rev0_398; __rev0_398 = __builtin_shufflevector(__s0_398, __s0_398, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_398 = __noswap_splat_laneq_f16(__rev0_398, __p1_398); \ __ret_398 = __builtin_shufflevector(__ret_398, __ret_398, 3, 2, 1, 0); \ __ret_398; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vdup_laneq_s32(__p0_399, __p1_399) __extension__ ({ \ int32x2_t __ret_399; \ int32x4_t __s0_399 = __p0_399; \ __ret_399 = splat_laneq_s32(__s0_399, __p1_399); \ __ret_399; \ }) #else #define vdup_laneq_s32(__p0_400, __p1_400) __extension__ ({ \ int32x2_t __ret_400; \ int32x4_t __s0_400 = __p0_400; \ int32x4_t __rev0_400; __rev0_400 = __builtin_shufflevector(__s0_400, __s0_400, 3, 2, 1, 0); \ __ret_400 = __noswap_splat_laneq_s32(__rev0_400, __p1_400); \ __ret_400 = __builtin_shufflevector(__ret_400, __ret_400, 1, 0); \ __ret_400; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vdup_laneq_s64(__p0_401, __p1_401) __extension__ ({ \ int64x1_t __ret_401; \ int64x2_t __s0_401 = __p0_401; \ __ret_401 = splat_laneq_s64(__s0_401, __p1_401); \ __ret_401; \ }) #else #define vdup_laneq_s64(__p0_402, __p1_402) __extension__ ({ \ int64x1_t __ret_402; \ int64x2_t __s0_402 = __p0_402; \ int64x2_t __rev0_402; __rev0_402 = __builtin_shufflevector(__s0_402, __s0_402, 1, 0); \ __ret_402 = __noswap_splat_laneq_s64(__rev0_402, __p1_402); \ __ret_402; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vdup_laneq_s16(__p0_403, __p1_403) __extension__ ({ \ int16x4_t __ret_403; \ int16x8_t __s0_403 = __p0_403; \ __ret_403 = splat_laneq_s16(__s0_403, __p1_403); \ __ret_403; \ }) #else #define vdup_laneq_s16(__p0_404, __p1_404) __extension__ ({ \ int16x4_t __ret_404; \ int16x8_t __s0_404 = __p0_404; \ int16x8_t __rev0_404; __rev0_404 = __builtin_shufflevector(__s0_404, __s0_404, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_404 = __noswap_splat_laneq_s16(__rev0_404, __p1_404); \ __ret_404 = __builtin_shufflevector(__ret_404, __ret_404, 3, 2, 1, 0); \ __ret_404; \ }) #endif __ai poly64x1_t vdup_n_p64(poly64_t __p0) { poly64x1_t __ret; __ret = (poly64x1_t) {__p0}; return __ret; } #ifdef __LITTLE_ENDIAN__ __ai poly64x2_t vdupq_n_p64(poly64_t __p0) { poly64x2_t __ret; __ret = (poly64x2_t) {__p0, __p0}; return __ret; } #else __ai poly64x2_t vdupq_n_p64(poly64_t __p0) { poly64x2_t __ret; __ret = (poly64x2_t) {__p0, __p0}; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float64x2_t vdupq_n_f64(float64_t __p0) { float64x2_t __ret; __ret = (float64x2_t) {__p0, __p0}; return __ret; } #else __ai float64x2_t vdupq_n_f64(float64_t __p0) { float64x2_t __ret; __ret = (float64x2_t) {__p0, __p0}; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai float64x1_t vdup_n_f64(float64_t __p0) { float64x1_t __ret; __ret = (float64x1_t) {__p0}; return __ret; } #define vext_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x1_t __ret; \ poly64x1_t __s0 = __p0; \ poly64x1_t __s1 = __p1; \ __ret = (poly64x1_t) __builtin_neon_vext_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 6); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vextq_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x2_t __ret; \ poly64x2_t __s0 = __p0; \ poly64x2_t __s1 = __p1; \ __ret = (poly64x2_t) __builtin_neon_vextq_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 38); \ __ret; \ }) #else #define vextq_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x2_t __ret; \ poly64x2_t __s0 = __p0; \ poly64x2_t __s1 = __p1; \ poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ __ret = (poly64x2_t) __builtin_neon_vextq_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 38); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vextq_f64(__p0, __p1, __p2) __extension__ ({ \ float64x2_t __ret; \ float64x2_t __s0 = __p0; \ float64x2_t __s1 = __p1; \ __ret = (float64x2_t) __builtin_neon_vextq_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 42); \ __ret; \ }) #else #define vextq_f64(__p0, __p1, __p2) __extension__ ({ \ float64x2_t __ret; \ float64x2_t __s0 = __p0; \ float64x2_t __s1 = __p1; \ float64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ float64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ __ret = (float64x2_t) __builtin_neon_vextq_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 42); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #define vext_f64(__p0, __p1, __p2) __extension__ ({ \ float64x1_t __ret; \ float64x1_t __s0 = __p0; \ float64x1_t __s1 = __p1; \ __ret = (float64x1_t) __builtin_neon_vext_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 10); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ __ai float64x2_t vfmaq_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { float64x2_t __ret; __ret = (float64x2_t) __builtin_neon_vfmaq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 42); return __ret; } #else __ai float64x2_t vfmaq_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { float64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); float64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); __ret = (float64x2_t) __builtin_neon_vfmaq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 42); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } __ai float64x2_t __noswap_vfmaq_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { float64x2_t __ret; __ret = (float64x2_t) __builtin_neon_vfmaq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 42); return __ret; } #endif __ai float64x1_t vfma_f64(float64x1_t __p0, float64x1_t __p1, float64x1_t __p2) { float64x1_t __ret; __ret = (float64x1_t) __builtin_neon_vfma_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 10); return __ret; } #define vfmad_lane_f64(__p0, __p1, __p2, __p3) __extension__ ({ \ float64_t __ret; \ float64_t __s0 = __p0; \ float64_t __s1 = __p1; \ float64x1_t __s2 = __p2; \ __ret = (float64_t) __builtin_neon_vfmad_lane_f64(__s0, __s1, (float64x1_t)__s2, __p3); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vfmas_lane_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ float32_t __ret; \ float32_t __s0 = __p0; \ float32_t __s1 = __p1; \ float32x2_t __s2 = __p2; \ __ret = (float32_t) __builtin_neon_vfmas_lane_f32(__s0, __s1, (float32x2_t)__s2, __p3); \ __ret; \ }) #else #define vfmas_lane_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ float32_t __ret; \ float32_t __s0 = __p0; \ float32_t __s1 = __p1; \ float32x2_t __s2 = __p2; \ float32x2_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 1, 0); \ __ret = (float32_t) __builtin_neon_vfmas_lane_f32(__s0, __s1, (float32x2_t)__rev2, __p3); \ __ret; \ }) #define __noswap_vfmas_lane_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ float32_t __ret; \ float32_t __s0 = __p0; \ float32_t __s1 = __p1; \ float32x2_t __s2 = __p2; \ __ret = (float32_t) __builtin_neon_vfmas_lane_f32(__s0, __s1, (float32x2_t)__s2, __p3); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vfmaq_lane_f64(__p0, __p1, __p2, __p3) __extension__ ({ \ float64x2_t __ret; \ float64x2_t __s0 = __p0; \ float64x2_t __s1 = __p1; \ float64x1_t __s2 = __p2; \ __ret = (float64x2_t) __builtin_neon_vfmaq_lane_v((int8x16_t)__s0, (int8x16_t)__s1, (int8x8_t)__s2, __p3, 42); \ __ret; \ }) #else #define vfmaq_lane_f64(__p0, __p1, __p2, __p3) __extension__ ({ \ float64x2_t __ret; \ float64x2_t __s0 = __p0; \ float64x2_t __s1 = __p1; \ float64x1_t __s2 = __p2; \ float64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ float64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ __ret = (float64x2_t) __builtin_neon_vfmaq_lane_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x8_t)__s2, __p3, 42); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #define __noswap_vfmaq_lane_f64(__p0, __p1, __p2, __p3) __extension__ ({ \ float64x2_t __ret; \ float64x2_t __s0 = __p0; \ float64x2_t __s1 = __p1; \ float64x1_t __s2 = __p2; \ __ret = (float64x2_t) __builtin_neon_vfmaq_lane_v((int8x16_t)__s0, (int8x16_t)__s1, (int8x8_t)__s2, __p3, 42); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vfmaq_lane_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ float32x4_t __ret; \ float32x4_t __s0 = __p0; \ float32x4_t __s1 = __p1; \ float32x2_t __s2 = __p2; \ __ret = (float32x4_t) __builtin_neon_vfmaq_lane_v((int8x16_t)__s0, (int8x16_t)__s1, (int8x8_t)__s2, __p3, 41); \ __ret; \ }) #else #define vfmaq_lane_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ float32x4_t __ret; \ float32x4_t __s0 = __p0; \ float32x4_t __s1 = __p1; \ float32x2_t __s2 = __p2; \ float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ float32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ float32x2_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 1, 0); \ __ret = (float32x4_t) __builtin_neon_vfmaq_lane_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x8_t)__rev2, __p3, 41); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_vfmaq_lane_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ float32x4_t __ret; \ float32x4_t __s0 = __p0; \ float32x4_t __s1 = __p1; \ float32x2_t __s2 = __p2; \ __ret = (float32x4_t) __builtin_neon_vfmaq_lane_v((int8x16_t)__s0, (int8x16_t)__s1, (int8x8_t)__s2, __p3, 41); \ __ret; \ }) #endif #define vfma_lane_f64(__p0, __p1, __p2, __p3) __extension__ ({ \ float64x1_t __ret; \ float64x1_t __s0 = __p0; \ float64x1_t __s1 = __p1; \ float64x1_t __s2 = __p2; \ __ret = (float64x1_t) __builtin_neon_vfma_lane_v((int8x8_t)__s0, (int8x8_t)__s1, (int8x8_t)__s2, __p3, 10); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vfma_lane_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ float32x2_t __ret; \ float32x2_t __s0 = __p0; \ float32x2_t __s1 = __p1; \ float32x2_t __s2 = __p2; \ __ret = (float32x2_t) __builtin_neon_vfma_lane_v((int8x8_t)__s0, (int8x8_t)__s1, (int8x8_t)__s2, __p3, 9); \ __ret; \ }) #else #define vfma_lane_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ float32x2_t __ret; \ float32x2_t __s0 = __p0; \ float32x2_t __s1 = __p1; \ float32x2_t __s2 = __p2; \ float32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ float32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ float32x2_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 1, 0); \ __ret = (float32x2_t) __builtin_neon_vfma_lane_v((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, __p3, 9); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #define __noswap_vfma_lane_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ float32x2_t __ret; \ float32x2_t __s0 = __p0; \ float32x2_t __s1 = __p1; \ float32x2_t __s2 = __p2; \ __ret = (float32x2_t) __builtin_neon_vfma_lane_v((int8x8_t)__s0, (int8x8_t)__s1, (int8x8_t)__s2, __p3, 9); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vfmad_laneq_f64(__p0, __p1, __p2, __p3) __extension__ ({ \ float64_t __ret; \ float64_t __s0 = __p0; \ float64_t __s1 = __p1; \ float64x2_t __s2 = __p2; \ __ret = (float64_t) __builtin_neon_vfmad_laneq_f64(__s0, __s1, (float64x2_t)__s2, __p3); \ __ret; \ }) #else #define vfmad_laneq_f64(__p0, __p1, __p2, __p3) __extension__ ({ \ float64_t __ret; \ float64_t __s0 = __p0; \ float64_t __s1 = __p1; \ float64x2_t __s2 = __p2; \ float64x2_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 1, 0); \ __ret = (float64_t) __builtin_neon_vfmad_laneq_f64(__s0, __s1, (float64x2_t)__rev2, __p3); \ __ret; \ }) #define __noswap_vfmad_laneq_f64(__p0, __p1, __p2, __p3) __extension__ ({ \ float64_t __ret; \ float64_t __s0 = __p0; \ float64_t __s1 = __p1; \ float64x2_t __s2 = __p2; \ __ret = (float64_t) __builtin_neon_vfmad_laneq_f64(__s0, __s1, (float64x2_t)__s2, __p3); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vfmas_laneq_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ float32_t __ret; \ float32_t __s0 = __p0; \ float32_t __s1 = __p1; \ float32x4_t __s2 = __p2; \ __ret = (float32_t) __builtin_neon_vfmas_laneq_f32(__s0, __s1, (float32x4_t)__s2, __p3); \ __ret; \ }) #else #define vfmas_laneq_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ float32_t __ret; \ float32_t __s0 = __p0; \ float32_t __s1 = __p1; \ float32x4_t __s2 = __p2; \ float32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ __ret = (float32_t) __builtin_neon_vfmas_laneq_f32(__s0, __s1, (float32x4_t)__rev2, __p3); \ __ret; \ }) #define __noswap_vfmas_laneq_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ float32_t __ret; \ float32_t __s0 = __p0; \ float32_t __s1 = __p1; \ float32x4_t __s2 = __p2; \ __ret = (float32_t) __builtin_neon_vfmas_laneq_f32(__s0, __s1, (float32x4_t)__s2, __p3); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vfmaq_laneq_f64(__p0, __p1, __p2, __p3) __extension__ ({ \ float64x2_t __ret; \ float64x2_t __s0 = __p0; \ float64x2_t __s1 = __p1; \ float64x2_t __s2 = __p2; \ __ret = (float64x2_t) __builtin_neon_vfmaq_laneq_v((int8x16_t)__s0, (int8x16_t)__s1, (int8x16_t)__s2, __p3, 42); \ __ret; \ }) #else #define vfmaq_laneq_f64(__p0, __p1, __p2, __p3) __extension__ ({ \ float64x2_t __ret; \ float64x2_t __s0 = __p0; \ float64x2_t __s1 = __p1; \ float64x2_t __s2 = __p2; \ float64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ float64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ float64x2_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 1, 0); \ __ret = (float64x2_t) __builtin_neon_vfmaq_laneq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, __p3, 42); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #define __noswap_vfmaq_laneq_f64(__p0, __p1, __p2, __p3) __extension__ ({ \ float64x2_t __ret; \ float64x2_t __s0 = __p0; \ float64x2_t __s1 = __p1; \ float64x2_t __s2 = __p2; \ __ret = (float64x2_t) __builtin_neon_vfmaq_laneq_v((int8x16_t)__s0, (int8x16_t)__s1, (int8x16_t)__s2, __p3, 42); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vfmaq_laneq_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ float32x4_t __ret; \ float32x4_t __s0 = __p0; \ float32x4_t __s1 = __p1; \ float32x4_t __s2 = __p2; \ __ret = (float32x4_t) __builtin_neon_vfmaq_laneq_v((int8x16_t)__s0, (int8x16_t)__s1, (int8x16_t)__s2, __p3, 41); \ __ret; \ }) #else #define vfmaq_laneq_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ float32x4_t __ret; \ float32x4_t __s0 = __p0; \ float32x4_t __s1 = __p1; \ float32x4_t __s2 = __p2; \ float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ float32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ float32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ __ret = (float32x4_t) __builtin_neon_vfmaq_laneq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, __p3, 41); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_vfmaq_laneq_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ float32x4_t __ret; \ float32x4_t __s0 = __p0; \ float32x4_t __s1 = __p1; \ float32x4_t __s2 = __p2; \ __ret = (float32x4_t) __builtin_neon_vfmaq_laneq_v((int8x16_t)__s0, (int8x16_t)__s1, (int8x16_t)__s2, __p3, 41); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vfma_laneq_f64(__p0, __p1, __p2, __p3) __extension__ ({ \ float64x1_t __ret; \ float64x1_t __s0 = __p0; \ float64x1_t __s1 = __p1; \ float64x2_t __s2 = __p2; \ __ret = (float64x1_t) __builtin_neon_vfma_laneq_v((int8x8_t)__s0, (int8x8_t)__s1, (int8x16_t)__s2, __p3, 10); \ __ret; \ }) #else #define vfma_laneq_f64(__p0, __p1, __p2, __p3) __extension__ ({ \ float64x1_t __ret; \ float64x1_t __s0 = __p0; \ float64x1_t __s1 = __p1; \ float64x2_t __s2 = __p2; \ float64x2_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 1, 0); \ __ret = (float64x1_t) __builtin_neon_vfma_laneq_v((int8x8_t)__s0, (int8x8_t)__s1, (int8x16_t)__rev2, __p3, 10); \ __ret; \ }) #define __noswap_vfma_laneq_f64(__p0, __p1, __p2, __p3) __extension__ ({ \ float64x1_t __ret; \ float64x1_t __s0 = __p0; \ float64x1_t __s1 = __p1; \ float64x2_t __s2 = __p2; \ __ret = (float64x1_t) __builtin_neon_vfma_laneq_v((int8x8_t)__s0, (int8x8_t)__s1, (int8x16_t)__s2, __p3, 10); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vfma_laneq_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ float32x2_t __ret; \ float32x2_t __s0 = __p0; \ float32x2_t __s1 = __p1; \ float32x4_t __s2 = __p2; \ __ret = (float32x2_t) __builtin_neon_vfma_laneq_v((int8x8_t)__s0, (int8x8_t)__s1, (int8x16_t)__s2, __p3, 9); \ __ret; \ }) #else #define vfma_laneq_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ float32x2_t __ret; \ float32x2_t __s0 = __p0; \ float32x2_t __s1 = __p1; \ float32x4_t __s2 = __p2; \ float32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ float32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ float32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ __ret = (float32x2_t) __builtin_neon_vfma_laneq_v((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x16_t)__rev2, __p3, 9); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #define __noswap_vfma_laneq_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ float32x2_t __ret; \ float32x2_t __s0 = __p0; \ float32x2_t __s1 = __p1; \ float32x4_t __s2 = __p2; \ __ret = (float32x2_t) __builtin_neon_vfma_laneq_v((int8x8_t)__s0, (int8x8_t)__s1, (int8x16_t)__s2, __p3, 9); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ __ai float64x2_t vfmaq_n_f64(float64x2_t __p0, float64x2_t __p1, float64_t __p2) { float64x2_t __ret; __ret = vfmaq_f64(__p0, __p1, (float64x2_t) {__p2, __p2}); return __ret; } #else __ai float64x2_t vfmaq_n_f64(float64x2_t __p0, float64x2_t __p1, float64_t __p2) { float64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __noswap_vfmaq_f64(__rev0, __rev1, (float64x2_t) {__p2, __p2}); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai float64x1_t vfma_n_f64(float64x1_t __p0, float64x1_t __p1, float64_t __p2) { float64x1_t __ret; __ret = vfma_f64(__p0, __p1, (float64x1_t) {__p2}); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai float64x2_t vfmsq_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { float64x2_t __ret; __ret = vfmaq_f64(__p0, -__p1, __p2); return __ret; } #else __ai float64x2_t vfmsq_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { float64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); float64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); __ret = __noswap_vfmaq_f64(__rev0, -__rev1, __rev2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai float64x1_t vfms_f64(float64x1_t __p0, float64x1_t __p1, float64x1_t __p2) { float64x1_t __ret; __ret = vfma_f64(__p0, -__p1, __p2); return __ret; } #define vfmsd_lane_f64(__p0_405, __p1_405, __p2_405, __p3_405) __extension__ ({ \ float64_t __ret_405; \ float64_t __s0_405 = __p0_405; \ float64_t __s1_405 = __p1_405; \ float64x1_t __s2_405 = __p2_405; \ __ret_405 = vfmad_lane_f64(__s0_405, -__s1_405, __s2_405, __p3_405); \ __ret_405; \ }) #ifdef __LITTLE_ENDIAN__ #define vfmss_lane_f32(__p0_406, __p1_406, __p2_406, __p3_406) __extension__ ({ \ float32_t __ret_406; \ float32_t __s0_406 = __p0_406; \ float32_t __s1_406 = __p1_406; \ float32x2_t __s2_406 = __p2_406; \ __ret_406 = vfmas_lane_f32(__s0_406, -__s1_406, __s2_406, __p3_406); \ __ret_406; \ }) #else #define vfmss_lane_f32(__p0_407, __p1_407, __p2_407, __p3_407) __extension__ ({ \ float32_t __ret_407; \ float32_t __s0_407 = __p0_407; \ float32_t __s1_407 = __p1_407; \ float32x2_t __s2_407 = __p2_407; \ float32x2_t __rev2_407; __rev2_407 = __builtin_shufflevector(__s2_407, __s2_407, 1, 0); \ __ret_407 = __noswap_vfmas_lane_f32(__s0_407, -__s1_407, __rev2_407, __p3_407); \ __ret_407; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vfmsq_lane_f64(__p0_408, __p1_408, __p2_408, __p3_408) __extension__ ({ \ float64x2_t __ret_408; \ float64x2_t __s0_408 = __p0_408; \ float64x2_t __s1_408 = __p1_408; \ float64x1_t __s2_408 = __p2_408; \ __ret_408 = vfmaq_lane_f64(__s0_408, -__s1_408, __s2_408, __p3_408); \ __ret_408; \ }) #else #define vfmsq_lane_f64(__p0_409, __p1_409, __p2_409, __p3_409) __extension__ ({ \ float64x2_t __ret_409; \ float64x2_t __s0_409 = __p0_409; \ float64x2_t __s1_409 = __p1_409; \ float64x1_t __s2_409 = __p2_409; \ float64x2_t __rev0_409; __rev0_409 = __builtin_shufflevector(__s0_409, __s0_409, 1, 0); \ float64x2_t __rev1_409; __rev1_409 = __builtin_shufflevector(__s1_409, __s1_409, 1, 0); \ __ret_409 = __noswap_vfmaq_lane_f64(__rev0_409, -__rev1_409, __s2_409, __p3_409); \ __ret_409 = __builtin_shufflevector(__ret_409, __ret_409, 1, 0); \ __ret_409; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vfmsq_lane_f32(__p0_410, __p1_410, __p2_410, __p3_410) __extension__ ({ \ float32x4_t __ret_410; \ float32x4_t __s0_410 = __p0_410; \ float32x4_t __s1_410 = __p1_410; \ float32x2_t __s2_410 = __p2_410; \ __ret_410 = vfmaq_lane_f32(__s0_410, -__s1_410, __s2_410, __p3_410); \ __ret_410; \ }) #else #define vfmsq_lane_f32(__p0_411, __p1_411, __p2_411, __p3_411) __extension__ ({ \ float32x4_t __ret_411; \ float32x4_t __s0_411 = __p0_411; \ float32x4_t __s1_411 = __p1_411; \ float32x2_t __s2_411 = __p2_411; \ float32x4_t __rev0_411; __rev0_411 = __builtin_shufflevector(__s0_411, __s0_411, 3, 2, 1, 0); \ float32x4_t __rev1_411; __rev1_411 = __builtin_shufflevector(__s1_411, __s1_411, 3, 2, 1, 0); \ float32x2_t __rev2_411; __rev2_411 = __builtin_shufflevector(__s2_411, __s2_411, 1, 0); \ __ret_411 = __noswap_vfmaq_lane_f32(__rev0_411, -__rev1_411, __rev2_411, __p3_411); \ __ret_411 = __builtin_shufflevector(__ret_411, __ret_411, 3, 2, 1, 0); \ __ret_411; \ }) #endif #define vfms_lane_f64(__p0_412, __p1_412, __p2_412, __p3_412) __extension__ ({ \ float64x1_t __ret_412; \ float64x1_t __s0_412 = __p0_412; \ float64x1_t __s1_412 = __p1_412; \ float64x1_t __s2_412 = __p2_412; \ __ret_412 = vfma_lane_f64(__s0_412, -__s1_412, __s2_412, __p3_412); \ __ret_412; \ }) #ifdef __LITTLE_ENDIAN__ #define vfms_lane_f32(__p0_413, __p1_413, __p2_413, __p3_413) __extension__ ({ \ float32x2_t __ret_413; \ float32x2_t __s0_413 = __p0_413; \ float32x2_t __s1_413 = __p1_413; \ float32x2_t __s2_413 = __p2_413; \ __ret_413 = vfma_lane_f32(__s0_413, -__s1_413, __s2_413, __p3_413); \ __ret_413; \ }) #else #define vfms_lane_f32(__p0_414, __p1_414, __p2_414, __p3_414) __extension__ ({ \ float32x2_t __ret_414; \ float32x2_t __s0_414 = __p0_414; \ float32x2_t __s1_414 = __p1_414; \ float32x2_t __s2_414 = __p2_414; \ float32x2_t __rev0_414; __rev0_414 = __builtin_shufflevector(__s0_414, __s0_414, 1, 0); \ float32x2_t __rev1_414; __rev1_414 = __builtin_shufflevector(__s1_414, __s1_414, 1, 0); \ float32x2_t __rev2_414; __rev2_414 = __builtin_shufflevector(__s2_414, __s2_414, 1, 0); \ __ret_414 = __noswap_vfma_lane_f32(__rev0_414, -__rev1_414, __rev2_414, __p3_414); \ __ret_414 = __builtin_shufflevector(__ret_414, __ret_414, 1, 0); \ __ret_414; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vfmsd_laneq_f64(__p0_415, __p1_415, __p2_415, __p3_415) __extension__ ({ \ float64_t __ret_415; \ float64_t __s0_415 = __p0_415; \ float64_t __s1_415 = __p1_415; \ float64x2_t __s2_415 = __p2_415; \ __ret_415 = vfmad_laneq_f64(__s0_415, -__s1_415, __s2_415, __p3_415); \ __ret_415; \ }) #else #define vfmsd_laneq_f64(__p0_416, __p1_416, __p2_416, __p3_416) __extension__ ({ \ float64_t __ret_416; \ float64_t __s0_416 = __p0_416; \ float64_t __s1_416 = __p1_416; \ float64x2_t __s2_416 = __p2_416; \ float64x2_t __rev2_416; __rev2_416 = __builtin_shufflevector(__s2_416, __s2_416, 1, 0); \ __ret_416 = __noswap_vfmad_laneq_f64(__s0_416, -__s1_416, __rev2_416, __p3_416); \ __ret_416; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vfmss_laneq_f32(__p0_417, __p1_417, __p2_417, __p3_417) __extension__ ({ \ float32_t __ret_417; \ float32_t __s0_417 = __p0_417; \ float32_t __s1_417 = __p1_417; \ float32x4_t __s2_417 = __p2_417; \ __ret_417 = vfmas_laneq_f32(__s0_417, -__s1_417, __s2_417, __p3_417); \ __ret_417; \ }) #else #define vfmss_laneq_f32(__p0_418, __p1_418, __p2_418, __p3_418) __extension__ ({ \ float32_t __ret_418; \ float32_t __s0_418 = __p0_418; \ float32_t __s1_418 = __p1_418; \ float32x4_t __s2_418 = __p2_418; \ float32x4_t __rev2_418; __rev2_418 = __builtin_shufflevector(__s2_418, __s2_418, 3, 2, 1, 0); \ __ret_418 = __noswap_vfmas_laneq_f32(__s0_418, -__s1_418, __rev2_418, __p3_418); \ __ret_418; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vfmsq_laneq_f64(__p0_419, __p1_419, __p2_419, __p3_419) __extension__ ({ \ float64x2_t __ret_419; \ float64x2_t __s0_419 = __p0_419; \ float64x2_t __s1_419 = __p1_419; \ float64x2_t __s2_419 = __p2_419; \ __ret_419 = vfmaq_laneq_f64(__s0_419, -__s1_419, __s2_419, __p3_419); \ __ret_419; \ }) #else #define vfmsq_laneq_f64(__p0_420, __p1_420, __p2_420, __p3_420) __extension__ ({ \ float64x2_t __ret_420; \ float64x2_t __s0_420 = __p0_420; \ float64x2_t __s1_420 = __p1_420; \ float64x2_t __s2_420 = __p2_420; \ float64x2_t __rev0_420; __rev0_420 = __builtin_shufflevector(__s0_420, __s0_420, 1, 0); \ float64x2_t __rev1_420; __rev1_420 = __builtin_shufflevector(__s1_420, __s1_420, 1, 0); \ float64x2_t __rev2_420; __rev2_420 = __builtin_shufflevector(__s2_420, __s2_420, 1, 0); \ __ret_420 = __noswap_vfmaq_laneq_f64(__rev0_420, -__rev1_420, __rev2_420, __p3_420); \ __ret_420 = __builtin_shufflevector(__ret_420, __ret_420, 1, 0); \ __ret_420; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vfmsq_laneq_f32(__p0_421, __p1_421, __p2_421, __p3_421) __extension__ ({ \ float32x4_t __ret_421; \ float32x4_t __s0_421 = __p0_421; \ float32x4_t __s1_421 = __p1_421; \ float32x4_t __s2_421 = __p2_421; \ __ret_421 = vfmaq_laneq_f32(__s0_421, -__s1_421, __s2_421, __p3_421); \ __ret_421; \ }) #else #define vfmsq_laneq_f32(__p0_422, __p1_422, __p2_422, __p3_422) __extension__ ({ \ float32x4_t __ret_422; \ float32x4_t __s0_422 = __p0_422; \ float32x4_t __s1_422 = __p1_422; \ float32x4_t __s2_422 = __p2_422; \ float32x4_t __rev0_422; __rev0_422 = __builtin_shufflevector(__s0_422, __s0_422, 3, 2, 1, 0); \ float32x4_t __rev1_422; __rev1_422 = __builtin_shufflevector(__s1_422, __s1_422, 3, 2, 1, 0); \ float32x4_t __rev2_422; __rev2_422 = __builtin_shufflevector(__s2_422, __s2_422, 3, 2, 1, 0); \ __ret_422 = __noswap_vfmaq_laneq_f32(__rev0_422, -__rev1_422, __rev2_422, __p3_422); \ __ret_422 = __builtin_shufflevector(__ret_422, __ret_422, 3, 2, 1, 0); \ __ret_422; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vfms_laneq_f64(__p0_423, __p1_423, __p2_423, __p3_423) __extension__ ({ \ float64x1_t __ret_423; \ float64x1_t __s0_423 = __p0_423; \ float64x1_t __s1_423 = __p1_423; \ float64x2_t __s2_423 = __p2_423; \ __ret_423 = vfma_laneq_f64(__s0_423, -__s1_423, __s2_423, __p3_423); \ __ret_423; \ }) #else #define vfms_laneq_f64(__p0_424, __p1_424, __p2_424, __p3_424) __extension__ ({ \ float64x1_t __ret_424; \ float64x1_t __s0_424 = __p0_424; \ float64x1_t __s1_424 = __p1_424; \ float64x2_t __s2_424 = __p2_424; \ float64x2_t __rev2_424; __rev2_424 = __builtin_shufflevector(__s2_424, __s2_424, 1, 0); \ __ret_424 = __noswap_vfma_laneq_f64(__s0_424, -__s1_424, __rev2_424, __p3_424); \ __ret_424; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vfms_laneq_f32(__p0_425, __p1_425, __p2_425, __p3_425) __extension__ ({ \ float32x2_t __ret_425; \ float32x2_t __s0_425 = __p0_425; \ float32x2_t __s1_425 = __p1_425; \ float32x4_t __s2_425 = __p2_425; \ __ret_425 = vfma_laneq_f32(__s0_425, -__s1_425, __s2_425, __p3_425); \ __ret_425; \ }) #else #define vfms_laneq_f32(__p0_426, __p1_426, __p2_426, __p3_426) __extension__ ({ \ float32x2_t __ret_426; \ float32x2_t __s0_426 = __p0_426; \ float32x2_t __s1_426 = __p1_426; \ float32x4_t __s2_426 = __p2_426; \ float32x2_t __rev0_426; __rev0_426 = __builtin_shufflevector(__s0_426, __s0_426, 1, 0); \ float32x2_t __rev1_426; __rev1_426 = __builtin_shufflevector(__s1_426, __s1_426, 1, 0); \ float32x4_t __rev2_426; __rev2_426 = __builtin_shufflevector(__s2_426, __s2_426, 3, 2, 1, 0); \ __ret_426 = __noswap_vfma_laneq_f32(__rev0_426, -__rev1_426, __rev2_426, __p3_426); \ __ret_426 = __builtin_shufflevector(__ret_426, __ret_426, 1, 0); \ __ret_426; \ }) #endif #ifdef __LITTLE_ENDIAN__ __ai float64x2_t vfmsq_n_f64(float64x2_t __p0, float64x2_t __p1, float64_t __p2) { float64x2_t __ret; __ret = vfmaq_f64(__p0, -__p1, (float64x2_t) {__p2, __p2}); return __ret; } #else __ai float64x2_t vfmsq_n_f64(float64x2_t __p0, float64x2_t __p1, float64_t __p2) { float64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __noswap_vfmaq_f64(__rev0, -__rev1, (float64x2_t) {__p2, __p2}); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x4_t vfmsq_n_f32(float32x4_t __p0, float32x4_t __p1, float32_t __p2) { float32x4_t __ret; __ret = vfmaq_f32(__p0, -__p1, (float32x4_t) {__p2, __p2, __p2, __p2}); return __ret; } #else __ai float32x4_t vfmsq_n_f32(float32x4_t __p0, float32x4_t __p1, float32_t __p2) { float32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __noswap_vfmaq_f32(__rev0, -__rev1, (float32x4_t) {__p2, __p2, __p2, __p2}); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif __ai float64x1_t vfms_n_f64(float64x1_t __p0, float64x1_t __p1, float64_t __p2) { float64x1_t __ret; __ret = vfma_f64(__p0, -__p1, (float64x1_t) {__p2}); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai float32x2_t vfms_n_f32(float32x2_t __p0, float32x2_t __p1, float32_t __p2) { float32x2_t __ret; __ret = vfma_f32(__p0, -__p1, (float32x2_t) {__p2, __p2}); return __ret; } #else __ai float32x2_t vfms_n_f32(float32x2_t __p0, float32x2_t __p1, float32_t __p2) { float32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __noswap_vfma_f32(__rev0, -__rev1, (float32x2_t) {__p2, __p2}); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly64x1_t vget_high_p64(poly64x2_t __p0) { poly64x1_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 1); return __ret; } #else __ai poly64x1_t vget_high_p64(poly64x2_t __p0) { poly64x1_t __ret; poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev0, 1); return __ret; } __ai poly64x1_t __noswap_vget_high_p64(poly64x2_t __p0) { poly64x1_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 1); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float64x1_t vget_high_f64(float64x2_t __p0) { float64x1_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 1); return __ret; } #else __ai float64x1_t vget_high_f64(float64x2_t __p0) { float64x1_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev0, 1); return __ret; } #endif #define vget_lane_p64(__p0, __p1) __extension__ ({ \ poly64_t __ret; \ poly64x1_t __s0 = __p0; \ __ret = (poly64_t) __builtin_neon_vget_lane_i64((poly64x1_t)__s0, __p1); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vgetq_lane_p64(__p0, __p1) __extension__ ({ \ poly64_t __ret; \ poly64x2_t __s0 = __p0; \ __ret = (poly64_t) __builtin_neon_vgetq_lane_i64((poly64x2_t)__s0, __p1); \ __ret; \ }) #else #define vgetq_lane_p64(__p0, __p1) __extension__ ({ \ poly64_t __ret; \ poly64x2_t __s0 = __p0; \ poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ __ret = (poly64_t) __builtin_neon_vgetq_lane_i64((poly64x2_t)__rev0, __p1); \ __ret; \ }) #define __noswap_vgetq_lane_p64(__p0, __p1) __extension__ ({ \ poly64_t __ret; \ poly64x2_t __s0 = __p0; \ __ret = (poly64_t) __builtin_neon_vgetq_lane_i64((poly64x2_t)__s0, __p1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vgetq_lane_f64(__p0, __p1) __extension__ ({ \ float64_t __ret; \ float64x2_t __s0 = __p0; \ __ret = (float64_t) __builtin_neon_vgetq_lane_f64((float64x2_t)__s0, __p1); \ __ret; \ }) #else #define vgetq_lane_f64(__p0, __p1) __extension__ ({ \ float64_t __ret; \ float64x2_t __s0 = __p0; \ float64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ __ret = (float64_t) __builtin_neon_vgetq_lane_f64((float64x2_t)__rev0, __p1); \ __ret; \ }) #define __noswap_vgetq_lane_f64(__p0, __p1) __extension__ ({ \ float64_t __ret; \ float64x2_t __s0 = __p0; \ __ret = (float64_t) __builtin_neon_vgetq_lane_f64((float64x2_t)__s0, __p1); \ __ret; \ }) #endif #define vget_lane_f64(__p0, __p1) __extension__ ({ \ float64_t __ret; \ float64x1_t __s0 = __p0; \ __ret = (float64_t) __builtin_neon_vget_lane_f64((float64x1_t)__s0, __p1); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ __ai poly64x1_t vget_low_p64(poly64x2_t __p0) { poly64x1_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 0); return __ret; } #else __ai poly64x1_t vget_low_p64(poly64x2_t __p0) { poly64x1_t __ret; poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev0, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float64x1_t vget_low_f64(float64x2_t __p0) { float64x1_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 0); return __ret; } #else __ai float64x1_t vget_low_f64(float64x2_t __p0) { float64x1_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev0, 0); return __ret; } #endif #define vld1_p64(__p0) __extension__ ({ \ poly64x1_t __ret; \ __ret = (poly64x1_t) __builtin_neon_vld1_v(__p0, 6); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vld1q_p64(__p0) __extension__ ({ \ poly64x2_t __ret; \ __ret = (poly64x2_t) __builtin_neon_vld1q_v(__p0, 38); \ __ret; \ }) #else #define vld1q_p64(__p0) __extension__ ({ \ poly64x2_t __ret; \ __ret = (poly64x2_t) __builtin_neon_vld1q_v(__p0, 38); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_f64(__p0) __extension__ ({ \ float64x2_t __ret; \ __ret = (float64x2_t) __builtin_neon_vld1q_v(__p0, 42); \ __ret; \ }) #else #define vld1q_f64(__p0) __extension__ ({ \ float64x2_t __ret; \ __ret = (float64x2_t) __builtin_neon_vld1q_v(__p0, 42); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #define vld1_f64(__p0) __extension__ ({ \ float64x1_t __ret; \ __ret = (float64x1_t) __builtin_neon_vld1_v(__p0, 10); \ __ret; \ }) #define vld1_dup_p64(__p0) __extension__ ({ \ poly64x1_t __ret; \ __ret = (poly64x1_t) __builtin_neon_vld1_dup_v(__p0, 6); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vld1q_dup_p64(__p0) __extension__ ({ \ poly64x2_t __ret; \ __ret = (poly64x2_t) __builtin_neon_vld1q_dup_v(__p0, 38); \ __ret; \ }) #else #define vld1q_dup_p64(__p0) __extension__ ({ \ poly64x2_t __ret; \ __ret = (poly64x2_t) __builtin_neon_vld1q_dup_v(__p0, 38); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_dup_f64(__p0) __extension__ ({ \ float64x2_t __ret; \ __ret = (float64x2_t) __builtin_neon_vld1q_dup_v(__p0, 42); \ __ret; \ }) #else #define vld1q_dup_f64(__p0) __extension__ ({ \ float64x2_t __ret; \ __ret = (float64x2_t) __builtin_neon_vld1q_dup_v(__p0, 42); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #define vld1_dup_f64(__p0) __extension__ ({ \ float64x1_t __ret; \ __ret = (float64x1_t) __builtin_neon_vld1_dup_v(__p0, 10); \ __ret; \ }) #define vld1_lane_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x1_t __ret; \ poly64x1_t __s1 = __p1; \ __ret = (poly64x1_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__s1, __p2, 6); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vld1q_lane_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x2_t __ret; \ poly64x2_t __s1 = __p1; \ __ret = (poly64x2_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__s1, __p2, 38); \ __ret; \ }) #else #define vld1q_lane_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x2_t __ret; \ poly64x2_t __s1 = __p1; \ poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ __ret = (poly64x2_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 38); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x2_t __ret; \ float64x2_t __s1 = __p1; \ __ret = (float64x2_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__s1, __p2, 42); \ __ret; \ }) #else #define vld1q_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x2_t __ret; \ float64x2_t __s1 = __p1; \ float64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ __ret = (float64x2_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 42); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #define vld1_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x1_t __ret; \ float64x1_t __s1 = __p1; \ __ret = (float64x1_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__s1, __p2, 10); \ __ret; \ }) #define vld1_p64_x2(__p0) __extension__ ({ \ poly64x1x2_t __ret; \ __builtin_neon_vld1_x2_v(&__ret, __p0, 6); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vld1q_p64_x2(__p0) __extension__ ({ \ poly64x2x2_t __ret; \ __builtin_neon_vld1q_x2_v(&__ret, __p0, 38); \ __ret; \ }) #else #define vld1q_p64_x2(__p0) __extension__ ({ \ poly64x2x2_t __ret; \ __builtin_neon_vld1q_x2_v(&__ret, __p0, 38); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_f64_x2(__p0) __extension__ ({ \ float64x2x2_t __ret; \ __builtin_neon_vld1q_x2_v(&__ret, __p0, 42); \ __ret; \ }) #else #define vld1q_f64_x2(__p0) __extension__ ({ \ float64x2x2_t __ret; \ __builtin_neon_vld1q_x2_v(&__ret, __p0, 42); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret; \ }) #endif #define vld1_f64_x2(__p0) __extension__ ({ \ float64x1x2_t __ret; \ __builtin_neon_vld1_x2_v(&__ret, __p0, 10); \ __ret; \ }) #define vld1_p64_x3(__p0) __extension__ ({ \ poly64x1x3_t __ret; \ __builtin_neon_vld1_x3_v(&__ret, __p0, 6); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vld1q_p64_x3(__p0) __extension__ ({ \ poly64x2x3_t __ret; \ __builtin_neon_vld1q_x3_v(&__ret, __p0, 38); \ __ret; \ }) #else #define vld1q_p64_x3(__p0) __extension__ ({ \ poly64x2x3_t __ret; \ __builtin_neon_vld1q_x3_v(&__ret, __p0, 38); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_f64_x3(__p0) __extension__ ({ \ float64x2x3_t __ret; \ __builtin_neon_vld1q_x3_v(&__ret, __p0, 42); \ __ret; \ }) #else #define vld1q_f64_x3(__p0) __extension__ ({ \ float64x2x3_t __ret; \ __builtin_neon_vld1q_x3_v(&__ret, __p0, 42); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ __ret; \ }) #endif #define vld1_f64_x3(__p0) __extension__ ({ \ float64x1x3_t __ret; \ __builtin_neon_vld1_x3_v(&__ret, __p0, 10); \ __ret; \ }) #define vld1_p64_x4(__p0) __extension__ ({ \ poly64x1x4_t __ret; \ __builtin_neon_vld1_x4_v(&__ret, __p0, 6); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vld1q_p64_x4(__p0) __extension__ ({ \ poly64x2x4_t __ret; \ __builtin_neon_vld1q_x4_v(&__ret, __p0, 38); \ __ret; \ }) #else #define vld1q_p64_x4(__p0) __extension__ ({ \ poly64x2x4_t __ret; \ __builtin_neon_vld1q_x4_v(&__ret, __p0, 38); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld1q_f64_x4(__p0) __extension__ ({ \ float64x2x4_t __ret; \ __builtin_neon_vld1q_x4_v(&__ret, __p0, 42); \ __ret; \ }) #else #define vld1q_f64_x4(__p0) __extension__ ({ \ float64x2x4_t __ret; \ __builtin_neon_vld1q_x4_v(&__ret, __p0, 42); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ __ret; \ }) #endif #define vld1_f64_x4(__p0) __extension__ ({ \ float64x1x4_t __ret; \ __builtin_neon_vld1_x4_v(&__ret, __p0, 10); \ __ret; \ }) #define vld2_p64(__p0) __extension__ ({ \ poly64x1x2_t __ret; \ __builtin_neon_vld2_v(&__ret, __p0, 6); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vld2q_p64(__p0) __extension__ ({ \ poly64x2x2_t __ret; \ __builtin_neon_vld2q_v(&__ret, __p0, 38); \ __ret; \ }) #else #define vld2q_p64(__p0) __extension__ ({ \ poly64x2x2_t __ret; \ __builtin_neon_vld2q_v(&__ret, __p0, 38); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld2q_u64(__p0) __extension__ ({ \ uint64x2x2_t __ret; \ __builtin_neon_vld2q_v(&__ret, __p0, 51); \ __ret; \ }) #else #define vld2q_u64(__p0) __extension__ ({ \ uint64x2x2_t __ret; \ __builtin_neon_vld2q_v(&__ret, __p0, 51); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld2q_f64(__p0) __extension__ ({ \ float64x2x2_t __ret; \ __builtin_neon_vld2q_v(&__ret, __p0, 42); \ __ret; \ }) #else #define vld2q_f64(__p0) __extension__ ({ \ float64x2x2_t __ret; \ __builtin_neon_vld2q_v(&__ret, __p0, 42); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld2q_s64(__p0) __extension__ ({ \ int64x2x2_t __ret; \ __builtin_neon_vld2q_v(&__ret, __p0, 35); \ __ret; \ }) #else #define vld2q_s64(__p0) __extension__ ({ \ int64x2x2_t __ret; \ __builtin_neon_vld2q_v(&__ret, __p0, 35); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret; \ }) #endif #define vld2_f64(__p0) __extension__ ({ \ float64x1x2_t __ret; \ __builtin_neon_vld2_v(&__ret, __p0, 10); \ __ret; \ }) #define vld2_dup_p64(__p0) __extension__ ({ \ poly64x1x2_t __ret; \ __builtin_neon_vld2_dup_v(&__ret, __p0, 6); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vld2q_dup_p64(__p0) __extension__ ({ \ poly64x2x2_t __ret; \ __builtin_neon_vld2q_dup_v(&__ret, __p0, 38); \ __ret; \ }) #else #define vld2q_dup_p64(__p0) __extension__ ({ \ poly64x2x2_t __ret; \ __builtin_neon_vld2q_dup_v(&__ret, __p0, 38); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld2q_dup_f64(__p0) __extension__ ({ \ float64x2x2_t __ret; \ __builtin_neon_vld2q_dup_v(&__ret, __p0, 42); \ __ret; \ }) #else #define vld2q_dup_f64(__p0) __extension__ ({ \ float64x2x2_t __ret; \ __builtin_neon_vld2q_dup_v(&__ret, __p0, 42); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret; \ }) #endif #define vld2_dup_f64(__p0) __extension__ ({ \ float64x1x2_t __ret; \ __builtin_neon_vld2_dup_v(&__ret, __p0, 10); \ __ret; \ }) #define vld2_lane_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x1x2_t __ret; \ poly64x1x2_t __s1 = __p1; \ __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 6); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vld2q_lane_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x16x2_t __ret; \ poly8x16x2_t __s1 = __p1; \ __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 36); \ __ret; \ }) #else #define vld2q_lane_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x16x2_t __ret; \ poly8x16x2_t __s1 = __p1; \ poly8x16x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 36); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld2q_lane_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x2x2_t __ret; \ poly64x2x2_t __s1 = __p1; \ __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 38); \ __ret; \ }) #else #define vld2q_lane_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x2x2_t __ret; \ poly64x2x2_t __s1 = __p1; \ poly64x2x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 38); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld2q_lane_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x16x2_t __ret; \ uint8x16x2_t __s1 = __p1; \ __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 48); \ __ret; \ }) #else #define vld2q_lane_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x16x2_t __ret; \ uint8x16x2_t __s1 = __p1; \ uint8x16x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 48); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld2q_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x2x2_t __ret; \ uint64x2x2_t __s1 = __p1; \ __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 51); \ __ret; \ }) #else #define vld2q_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x2x2_t __ret; \ uint64x2x2_t __s1 = __p1; \ uint64x2x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 51); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld2q_lane_s8(__p0, __p1, __p2) __extension__ ({ \ int8x16x2_t __ret; \ int8x16x2_t __s1 = __p1; \ __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 32); \ __ret; \ }) #else #define vld2q_lane_s8(__p0, __p1, __p2) __extension__ ({ \ int8x16x2_t __ret; \ int8x16x2_t __s1 = __p1; \ int8x16x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 32); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld2q_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x2x2_t __ret; \ float64x2x2_t __s1 = __p1; \ __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 42); \ __ret; \ }) #else #define vld2q_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x2x2_t __ret; \ float64x2x2_t __s1 = __p1; \ float64x2x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 42); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld2q_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x2x2_t __ret; \ int64x2x2_t __s1 = __p1; \ __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 35); \ __ret; \ }) #else #define vld2q_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x2x2_t __ret; \ int64x2x2_t __s1 = __p1; \ int64x2x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 35); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret; \ }) #endif #define vld2_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x1x2_t __ret; \ uint64x1x2_t __s1 = __p1; \ __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 19); \ __ret; \ }) #define vld2_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x1x2_t __ret; \ float64x1x2_t __s1 = __p1; \ __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 10); \ __ret; \ }) #define vld2_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x1x2_t __ret; \ int64x1x2_t __s1 = __p1; \ __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 3); \ __ret; \ }) #define vld3_p64(__p0) __extension__ ({ \ poly64x1x3_t __ret; \ __builtin_neon_vld3_v(&__ret, __p0, 6); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vld3q_p64(__p0) __extension__ ({ \ poly64x2x3_t __ret; \ __builtin_neon_vld3q_v(&__ret, __p0, 38); \ __ret; \ }) #else #define vld3q_p64(__p0) __extension__ ({ \ poly64x2x3_t __ret; \ __builtin_neon_vld3q_v(&__ret, __p0, 38); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld3q_u64(__p0) __extension__ ({ \ uint64x2x3_t __ret; \ __builtin_neon_vld3q_v(&__ret, __p0, 51); \ __ret; \ }) #else #define vld3q_u64(__p0) __extension__ ({ \ uint64x2x3_t __ret; \ __builtin_neon_vld3q_v(&__ret, __p0, 51); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld3q_f64(__p0) __extension__ ({ \ float64x2x3_t __ret; \ __builtin_neon_vld3q_v(&__ret, __p0, 42); \ __ret; \ }) #else #define vld3q_f64(__p0) __extension__ ({ \ float64x2x3_t __ret; \ __builtin_neon_vld3q_v(&__ret, __p0, 42); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld3q_s64(__p0) __extension__ ({ \ int64x2x3_t __ret; \ __builtin_neon_vld3q_v(&__ret, __p0, 35); \ __ret; \ }) #else #define vld3q_s64(__p0) __extension__ ({ \ int64x2x3_t __ret; \ __builtin_neon_vld3q_v(&__ret, __p0, 35); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ __ret; \ }) #endif #define vld3_f64(__p0) __extension__ ({ \ float64x1x3_t __ret; \ __builtin_neon_vld3_v(&__ret, __p0, 10); \ __ret; \ }) #define vld3_dup_p64(__p0) __extension__ ({ \ poly64x1x3_t __ret; \ __builtin_neon_vld3_dup_v(&__ret, __p0, 6); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vld3q_dup_p64(__p0) __extension__ ({ \ poly64x2x3_t __ret; \ __builtin_neon_vld3q_dup_v(&__ret, __p0, 38); \ __ret; \ }) #else #define vld3q_dup_p64(__p0) __extension__ ({ \ poly64x2x3_t __ret; \ __builtin_neon_vld3q_dup_v(&__ret, __p0, 38); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld3q_dup_f64(__p0) __extension__ ({ \ float64x2x3_t __ret; \ __builtin_neon_vld3q_dup_v(&__ret, __p0, 42); \ __ret; \ }) #else #define vld3q_dup_f64(__p0) __extension__ ({ \ float64x2x3_t __ret; \ __builtin_neon_vld3q_dup_v(&__ret, __p0, 42); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ __ret; \ }) #endif #define vld3_dup_f64(__p0) __extension__ ({ \ float64x1x3_t __ret; \ __builtin_neon_vld3_dup_v(&__ret, __p0, 10); \ __ret; \ }) #define vld3_lane_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x1x3_t __ret; \ poly64x1x3_t __s1 = __p1; \ __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 6); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vld3q_lane_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x16x3_t __ret; \ poly8x16x3_t __s1 = __p1; \ __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 36); \ __ret; \ }) #else #define vld3q_lane_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x16x3_t __ret; \ poly8x16x3_t __s1 = __p1; \ poly8x16x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 36); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld3q_lane_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x2x3_t __ret; \ poly64x2x3_t __s1 = __p1; \ __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 38); \ __ret; \ }) #else #define vld3q_lane_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x2x3_t __ret; \ poly64x2x3_t __s1 = __p1; \ poly64x2x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 38); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld3q_lane_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x16x3_t __ret; \ uint8x16x3_t __s1 = __p1; \ __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 48); \ __ret; \ }) #else #define vld3q_lane_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x16x3_t __ret; \ uint8x16x3_t __s1 = __p1; \ uint8x16x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 48); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld3q_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x2x3_t __ret; \ uint64x2x3_t __s1 = __p1; \ __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 51); \ __ret; \ }) #else #define vld3q_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x2x3_t __ret; \ uint64x2x3_t __s1 = __p1; \ uint64x2x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 51); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld3q_lane_s8(__p0, __p1, __p2) __extension__ ({ \ int8x16x3_t __ret; \ int8x16x3_t __s1 = __p1; \ __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 32); \ __ret; \ }) #else #define vld3q_lane_s8(__p0, __p1, __p2) __extension__ ({ \ int8x16x3_t __ret; \ int8x16x3_t __s1 = __p1; \ int8x16x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 32); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld3q_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x2x3_t __ret; \ float64x2x3_t __s1 = __p1; \ __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 42); \ __ret; \ }) #else #define vld3q_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x2x3_t __ret; \ float64x2x3_t __s1 = __p1; \ float64x2x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 42); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld3q_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x2x3_t __ret; \ int64x2x3_t __s1 = __p1; \ __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 35); \ __ret; \ }) #else #define vld3q_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x2x3_t __ret; \ int64x2x3_t __s1 = __p1; \ int64x2x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 35); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ __ret; \ }) #endif #define vld3_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x1x3_t __ret; \ uint64x1x3_t __s1 = __p1; \ __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 19); \ __ret; \ }) #define vld3_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x1x3_t __ret; \ float64x1x3_t __s1 = __p1; \ __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 10); \ __ret; \ }) #define vld3_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x1x3_t __ret; \ int64x1x3_t __s1 = __p1; \ __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 3); \ __ret; \ }) #define vld4_p64(__p0) __extension__ ({ \ poly64x1x4_t __ret; \ __builtin_neon_vld4_v(&__ret, __p0, 6); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vld4q_p64(__p0) __extension__ ({ \ poly64x2x4_t __ret; \ __builtin_neon_vld4q_v(&__ret, __p0, 38); \ __ret; \ }) #else #define vld4q_p64(__p0) __extension__ ({ \ poly64x2x4_t __ret; \ __builtin_neon_vld4q_v(&__ret, __p0, 38); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld4q_u64(__p0) __extension__ ({ \ uint64x2x4_t __ret; \ __builtin_neon_vld4q_v(&__ret, __p0, 51); \ __ret; \ }) #else #define vld4q_u64(__p0) __extension__ ({ \ uint64x2x4_t __ret; \ __builtin_neon_vld4q_v(&__ret, __p0, 51); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld4q_f64(__p0) __extension__ ({ \ float64x2x4_t __ret; \ __builtin_neon_vld4q_v(&__ret, __p0, 42); \ __ret; \ }) #else #define vld4q_f64(__p0) __extension__ ({ \ float64x2x4_t __ret; \ __builtin_neon_vld4q_v(&__ret, __p0, 42); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld4q_s64(__p0) __extension__ ({ \ int64x2x4_t __ret; \ __builtin_neon_vld4q_v(&__ret, __p0, 35); \ __ret; \ }) #else #define vld4q_s64(__p0) __extension__ ({ \ int64x2x4_t __ret; \ __builtin_neon_vld4q_v(&__ret, __p0, 35); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ __ret; \ }) #endif #define vld4_f64(__p0) __extension__ ({ \ float64x1x4_t __ret; \ __builtin_neon_vld4_v(&__ret, __p0, 10); \ __ret; \ }) #define vld4_dup_p64(__p0) __extension__ ({ \ poly64x1x4_t __ret; \ __builtin_neon_vld4_dup_v(&__ret, __p0, 6); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vld4q_dup_p64(__p0) __extension__ ({ \ poly64x2x4_t __ret; \ __builtin_neon_vld4q_dup_v(&__ret, __p0, 38); \ __ret; \ }) #else #define vld4q_dup_p64(__p0) __extension__ ({ \ poly64x2x4_t __ret; \ __builtin_neon_vld4q_dup_v(&__ret, __p0, 38); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld4q_dup_f64(__p0) __extension__ ({ \ float64x2x4_t __ret; \ __builtin_neon_vld4q_dup_v(&__ret, __p0, 42); \ __ret; \ }) #else #define vld4q_dup_f64(__p0) __extension__ ({ \ float64x2x4_t __ret; \ __builtin_neon_vld4q_dup_v(&__ret, __p0, 42); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ __ret; \ }) #endif #define vld4_dup_f64(__p0) __extension__ ({ \ float64x1x4_t __ret; \ __builtin_neon_vld4_dup_v(&__ret, __p0, 10); \ __ret; \ }) #define vld4_lane_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x1x4_t __ret; \ poly64x1x4_t __s1 = __p1; \ __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 6); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vld4q_lane_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x16x4_t __ret; \ poly8x16x4_t __s1 = __p1; \ __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 36); \ __ret; \ }) #else #define vld4q_lane_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x16x4_t __ret; \ poly8x16x4_t __s1 = __p1; \ poly8x16x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 36); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld4q_lane_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x2x4_t __ret; \ poly64x2x4_t __s1 = __p1; \ __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 38); \ __ret; \ }) #else #define vld4q_lane_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x2x4_t __ret; \ poly64x2x4_t __s1 = __p1; \ poly64x2x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 38); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld4q_lane_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x16x4_t __ret; \ uint8x16x4_t __s1 = __p1; \ __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 48); \ __ret; \ }) #else #define vld4q_lane_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x16x4_t __ret; \ uint8x16x4_t __s1 = __p1; \ uint8x16x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 48); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld4q_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x2x4_t __ret; \ uint64x2x4_t __s1 = __p1; \ __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 51); \ __ret; \ }) #else #define vld4q_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x2x4_t __ret; \ uint64x2x4_t __s1 = __p1; \ uint64x2x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 51); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld4q_lane_s8(__p0, __p1, __p2) __extension__ ({ \ int8x16x4_t __ret; \ int8x16x4_t __s1 = __p1; \ __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 32); \ __ret; \ }) #else #define vld4q_lane_s8(__p0, __p1, __p2) __extension__ ({ \ int8x16x4_t __ret; \ int8x16x4_t __s1 = __p1; \ int8x16x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 32); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld4q_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x2x4_t __ret; \ float64x2x4_t __s1 = __p1; \ __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 42); \ __ret; \ }) #else #define vld4q_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x2x4_t __ret; \ float64x2x4_t __s1 = __p1; \ float64x2x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 42); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vld4q_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x2x4_t __ret; \ int64x2x4_t __s1 = __p1; \ __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 35); \ __ret; \ }) #else #define vld4q_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x2x4_t __ret; \ int64x2x4_t __s1 = __p1; \ int64x2x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 35); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ __ret; \ }) #endif #define vld4_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x1x4_t __ret; \ uint64x1x4_t __s1 = __p1; \ __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 19); \ __ret; \ }) #define vld4_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x1x4_t __ret; \ float64x1x4_t __s1 = __p1; \ __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 10); \ __ret; \ }) #define vld4_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x1x4_t __ret; \ int64x1x4_t __s1 = __p1; \ __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 3); \ __ret; \ }) #define vldrq_p128(__p0) __extension__ ({ \ poly128_t __ret; \ __ret = (poly128_t) __builtin_neon_vldrq_p128(__p0); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ __ai float64x2_t vmaxq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; __ret = (float64x2_t) __builtin_neon_vmaxq_v((int8x16_t)__p0, (int8x16_t)__p1, 42); return __ret; } #else __ai float64x2_t vmaxq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (float64x2_t) __builtin_neon_vmaxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 42); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai float64x1_t vmax_f64(float64x1_t __p0, float64x1_t __p1) { float64x1_t __ret; __ret = (float64x1_t) __builtin_neon_vmax_v((int8x8_t)__p0, (int8x8_t)__p1, 10); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai float64_t vmaxnmvq_f64(float64x2_t __p0) { float64_t __ret; __ret = (float64_t) __builtin_neon_vmaxnmvq_f64(__p0); return __ret; } #else __ai float64_t vmaxnmvq_f64(float64x2_t __p0) { float64_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (float64_t) __builtin_neon_vmaxnmvq_f64(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32_t vmaxnmvq_f32(float32x4_t __p0) { float32_t __ret; __ret = (float32_t) __builtin_neon_vmaxnmvq_f32(__p0); return __ret; } #else __ai float32_t vmaxnmvq_f32(float32x4_t __p0) { float32_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (float32_t) __builtin_neon_vmaxnmvq_f32(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32_t vmaxnmv_f32(float32x2_t __p0) { float32_t __ret; __ret = (float32_t) __builtin_neon_vmaxnmv_f32(__p0); return __ret; } #else __ai float32_t vmaxnmv_f32(float32x2_t __p0) { float32_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (float32_t) __builtin_neon_vmaxnmv_f32(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8_t vmaxvq_u8(uint8x16_t __p0) { uint8_t __ret; __ret = (uint8_t) __builtin_neon_vmaxvq_u8(__p0); return __ret; } #else __ai uint8_t vmaxvq_u8(uint8x16_t __p0) { uint8_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8_t) __builtin_neon_vmaxvq_u8(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32_t vmaxvq_u32(uint32x4_t __p0) { uint32_t __ret; __ret = (uint32_t) __builtin_neon_vmaxvq_u32(__p0); return __ret; } #else __ai uint32_t vmaxvq_u32(uint32x4_t __p0) { uint32_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (uint32_t) __builtin_neon_vmaxvq_u32(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16_t vmaxvq_u16(uint16x8_t __p0) { uint16_t __ret; __ret = (uint16_t) __builtin_neon_vmaxvq_u16(__p0); return __ret; } #else __ai uint16_t vmaxvq_u16(uint16x8_t __p0) { uint16_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint16_t) __builtin_neon_vmaxvq_u16(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8_t vmaxvq_s8(int8x16_t __p0) { int8_t __ret; __ret = (int8_t) __builtin_neon_vmaxvq_s8(__p0); return __ret; } #else __ai int8_t vmaxvq_s8(int8x16_t __p0) { int8_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8_t) __builtin_neon_vmaxvq_s8(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float64_t vmaxvq_f64(float64x2_t __p0) { float64_t __ret; __ret = (float64_t) __builtin_neon_vmaxvq_f64(__p0); return __ret; } #else __ai float64_t vmaxvq_f64(float64x2_t __p0) { float64_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (float64_t) __builtin_neon_vmaxvq_f64(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32_t vmaxvq_f32(float32x4_t __p0) { float32_t __ret; __ret = (float32_t) __builtin_neon_vmaxvq_f32(__p0); return __ret; } #else __ai float32_t vmaxvq_f32(float32x4_t __p0) { float32_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (float32_t) __builtin_neon_vmaxvq_f32(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32_t vmaxvq_s32(int32x4_t __p0) { int32_t __ret; __ret = (int32_t) __builtin_neon_vmaxvq_s32(__p0); return __ret; } #else __ai int32_t vmaxvq_s32(int32x4_t __p0) { int32_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (int32_t) __builtin_neon_vmaxvq_s32(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16_t vmaxvq_s16(int16x8_t __p0) { int16_t __ret; __ret = (int16_t) __builtin_neon_vmaxvq_s16(__p0); return __ret; } #else __ai int16_t vmaxvq_s16(int16x8_t __p0) { int16_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int16_t) __builtin_neon_vmaxvq_s16(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8_t vmaxv_u8(uint8x8_t __p0) { uint8_t __ret; __ret = (uint8_t) __builtin_neon_vmaxv_u8(__p0); return __ret; } #else __ai uint8_t vmaxv_u8(uint8x8_t __p0) { uint8_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8_t) __builtin_neon_vmaxv_u8(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32_t vmaxv_u32(uint32x2_t __p0) { uint32_t __ret; __ret = (uint32_t) __builtin_neon_vmaxv_u32(__p0); return __ret; } #else __ai uint32_t vmaxv_u32(uint32x2_t __p0) { uint32_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (uint32_t) __builtin_neon_vmaxv_u32(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16_t vmaxv_u16(uint16x4_t __p0) { uint16_t __ret; __ret = (uint16_t) __builtin_neon_vmaxv_u16(__p0); return __ret; } #else __ai uint16_t vmaxv_u16(uint16x4_t __p0) { uint16_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (uint16_t) __builtin_neon_vmaxv_u16(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8_t vmaxv_s8(int8x8_t __p0) { int8_t __ret; __ret = (int8_t) __builtin_neon_vmaxv_s8(__p0); return __ret; } #else __ai int8_t vmaxv_s8(int8x8_t __p0) { int8_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8_t) __builtin_neon_vmaxv_s8(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32_t vmaxv_f32(float32x2_t __p0) { float32_t __ret; __ret = (float32_t) __builtin_neon_vmaxv_f32(__p0); return __ret; } #else __ai float32_t vmaxv_f32(float32x2_t __p0) { float32_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (float32_t) __builtin_neon_vmaxv_f32(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32_t vmaxv_s32(int32x2_t __p0) { int32_t __ret; __ret = (int32_t) __builtin_neon_vmaxv_s32(__p0); return __ret; } #else __ai int32_t vmaxv_s32(int32x2_t __p0) { int32_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (int32_t) __builtin_neon_vmaxv_s32(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16_t vmaxv_s16(int16x4_t __p0) { int16_t __ret; __ret = (int16_t) __builtin_neon_vmaxv_s16(__p0); return __ret; } #else __ai int16_t vmaxv_s16(int16x4_t __p0) { int16_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (int16_t) __builtin_neon_vmaxv_s16(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float64x2_t vminq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; __ret = (float64x2_t) __builtin_neon_vminq_v((int8x16_t)__p0, (int8x16_t)__p1, 42); return __ret; } #else __ai float64x2_t vminq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (float64x2_t) __builtin_neon_vminq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 42); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai float64x1_t vmin_f64(float64x1_t __p0, float64x1_t __p1) { float64x1_t __ret; __ret = (float64x1_t) __builtin_neon_vmin_v((int8x8_t)__p0, (int8x8_t)__p1, 10); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai float64_t vminnmvq_f64(float64x2_t __p0) { float64_t __ret; __ret = (float64_t) __builtin_neon_vminnmvq_f64(__p0); return __ret; } #else __ai float64_t vminnmvq_f64(float64x2_t __p0) { float64_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (float64_t) __builtin_neon_vminnmvq_f64(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32_t vminnmvq_f32(float32x4_t __p0) { float32_t __ret; __ret = (float32_t) __builtin_neon_vminnmvq_f32(__p0); return __ret; } #else __ai float32_t vminnmvq_f32(float32x4_t __p0) { float32_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (float32_t) __builtin_neon_vminnmvq_f32(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32_t vminnmv_f32(float32x2_t __p0) { float32_t __ret; __ret = (float32_t) __builtin_neon_vminnmv_f32(__p0); return __ret; } #else __ai float32_t vminnmv_f32(float32x2_t __p0) { float32_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (float32_t) __builtin_neon_vminnmv_f32(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8_t vminvq_u8(uint8x16_t __p0) { uint8_t __ret; __ret = (uint8_t) __builtin_neon_vminvq_u8(__p0); return __ret; } #else __ai uint8_t vminvq_u8(uint8x16_t __p0) { uint8_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8_t) __builtin_neon_vminvq_u8(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32_t vminvq_u32(uint32x4_t __p0) { uint32_t __ret; __ret = (uint32_t) __builtin_neon_vminvq_u32(__p0); return __ret; } #else __ai uint32_t vminvq_u32(uint32x4_t __p0) { uint32_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (uint32_t) __builtin_neon_vminvq_u32(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16_t vminvq_u16(uint16x8_t __p0) { uint16_t __ret; __ret = (uint16_t) __builtin_neon_vminvq_u16(__p0); return __ret; } #else __ai uint16_t vminvq_u16(uint16x8_t __p0) { uint16_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint16_t) __builtin_neon_vminvq_u16(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8_t vminvq_s8(int8x16_t __p0) { int8_t __ret; __ret = (int8_t) __builtin_neon_vminvq_s8(__p0); return __ret; } #else __ai int8_t vminvq_s8(int8x16_t __p0) { int8_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8_t) __builtin_neon_vminvq_s8(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float64_t vminvq_f64(float64x2_t __p0) { float64_t __ret; __ret = (float64_t) __builtin_neon_vminvq_f64(__p0); return __ret; } #else __ai float64_t vminvq_f64(float64x2_t __p0) { float64_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (float64_t) __builtin_neon_vminvq_f64(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32_t vminvq_f32(float32x4_t __p0) { float32_t __ret; __ret = (float32_t) __builtin_neon_vminvq_f32(__p0); return __ret; } #else __ai float32_t vminvq_f32(float32x4_t __p0) { float32_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (float32_t) __builtin_neon_vminvq_f32(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32_t vminvq_s32(int32x4_t __p0) { int32_t __ret; __ret = (int32_t) __builtin_neon_vminvq_s32(__p0); return __ret; } #else __ai int32_t vminvq_s32(int32x4_t __p0) { int32_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (int32_t) __builtin_neon_vminvq_s32(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16_t vminvq_s16(int16x8_t __p0) { int16_t __ret; __ret = (int16_t) __builtin_neon_vminvq_s16(__p0); return __ret; } #else __ai int16_t vminvq_s16(int16x8_t __p0) { int16_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int16_t) __builtin_neon_vminvq_s16(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8_t vminv_u8(uint8x8_t __p0) { uint8_t __ret; __ret = (uint8_t) __builtin_neon_vminv_u8(__p0); return __ret; } #else __ai uint8_t vminv_u8(uint8x8_t __p0) { uint8_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8_t) __builtin_neon_vminv_u8(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32_t vminv_u32(uint32x2_t __p0) { uint32_t __ret; __ret = (uint32_t) __builtin_neon_vminv_u32(__p0); return __ret; } #else __ai uint32_t vminv_u32(uint32x2_t __p0) { uint32_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (uint32_t) __builtin_neon_vminv_u32(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16_t vminv_u16(uint16x4_t __p0) { uint16_t __ret; __ret = (uint16_t) __builtin_neon_vminv_u16(__p0); return __ret; } #else __ai uint16_t vminv_u16(uint16x4_t __p0) { uint16_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (uint16_t) __builtin_neon_vminv_u16(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8_t vminv_s8(int8x8_t __p0) { int8_t __ret; __ret = (int8_t) __builtin_neon_vminv_s8(__p0); return __ret; } #else __ai int8_t vminv_s8(int8x8_t __p0) { int8_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8_t) __builtin_neon_vminv_s8(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32_t vminv_f32(float32x2_t __p0) { float32_t __ret; __ret = (float32_t) __builtin_neon_vminv_f32(__p0); return __ret; } #else __ai float32_t vminv_f32(float32x2_t __p0) { float32_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (float32_t) __builtin_neon_vminv_f32(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32_t vminv_s32(int32x2_t __p0) { int32_t __ret; __ret = (int32_t) __builtin_neon_vminv_s32(__p0); return __ret; } #else __ai int32_t vminv_s32(int32x2_t __p0) { int32_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (int32_t) __builtin_neon_vminv_s32(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16_t vminv_s16(int16x4_t __p0) { int16_t __ret; __ret = (int16_t) __builtin_neon_vminv_s16(__p0); return __ret; } #else __ai int16_t vminv_s16(int16x4_t __p0) { int16_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (int16_t) __builtin_neon_vminv_s16(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float64x2_t vmlaq_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { float64x2_t __ret; __ret = __p0 + __p1 * __p2; return __ret; } #else __ai float64x2_t vmlaq_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { float64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); float64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); __ret = __rev0 + __rev1 * __rev2; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai float64x1_t vmla_f64(float64x1_t __p0, float64x1_t __p1, float64x1_t __p2) { float64x1_t __ret; __ret = __p0 + __p1 * __p2; return __ret; } #ifdef __LITTLE_ENDIAN__ #define vmlaq_laneq_u32(__p0_427, __p1_427, __p2_427, __p3_427) __extension__ ({ \ uint32x4_t __ret_427; \ uint32x4_t __s0_427 = __p0_427; \ uint32x4_t __s1_427 = __p1_427; \ uint32x4_t __s2_427 = __p2_427; \ __ret_427 = __s0_427 + __s1_427 * splatq_laneq_u32(__s2_427, __p3_427); \ __ret_427; \ }) #else #define vmlaq_laneq_u32(__p0_428, __p1_428, __p2_428, __p3_428) __extension__ ({ \ uint32x4_t __ret_428; \ uint32x4_t __s0_428 = __p0_428; \ uint32x4_t __s1_428 = __p1_428; \ uint32x4_t __s2_428 = __p2_428; \ uint32x4_t __rev0_428; __rev0_428 = __builtin_shufflevector(__s0_428, __s0_428, 3, 2, 1, 0); \ uint32x4_t __rev1_428; __rev1_428 = __builtin_shufflevector(__s1_428, __s1_428, 3, 2, 1, 0); \ uint32x4_t __rev2_428; __rev2_428 = __builtin_shufflevector(__s2_428, __s2_428, 3, 2, 1, 0); \ __ret_428 = __rev0_428 + __rev1_428 * __noswap_splatq_laneq_u32(__rev2_428, __p3_428); \ __ret_428 = __builtin_shufflevector(__ret_428, __ret_428, 3, 2, 1, 0); \ __ret_428; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmlaq_laneq_u16(__p0_429, __p1_429, __p2_429, __p3_429) __extension__ ({ \ uint16x8_t __ret_429; \ uint16x8_t __s0_429 = __p0_429; \ uint16x8_t __s1_429 = __p1_429; \ uint16x8_t __s2_429 = __p2_429; \ __ret_429 = __s0_429 + __s1_429 * splatq_laneq_u16(__s2_429, __p3_429); \ __ret_429; \ }) #else #define vmlaq_laneq_u16(__p0_430, __p1_430, __p2_430, __p3_430) __extension__ ({ \ uint16x8_t __ret_430; \ uint16x8_t __s0_430 = __p0_430; \ uint16x8_t __s1_430 = __p1_430; \ uint16x8_t __s2_430 = __p2_430; \ uint16x8_t __rev0_430; __rev0_430 = __builtin_shufflevector(__s0_430, __s0_430, 7, 6, 5, 4, 3, 2, 1, 0); \ uint16x8_t __rev1_430; __rev1_430 = __builtin_shufflevector(__s1_430, __s1_430, 7, 6, 5, 4, 3, 2, 1, 0); \ uint16x8_t __rev2_430; __rev2_430 = __builtin_shufflevector(__s2_430, __s2_430, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_430 = __rev0_430 + __rev1_430 * __noswap_splatq_laneq_u16(__rev2_430, __p3_430); \ __ret_430 = __builtin_shufflevector(__ret_430, __ret_430, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_430; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmlaq_laneq_f32(__p0_431, __p1_431, __p2_431, __p3_431) __extension__ ({ \ float32x4_t __ret_431; \ float32x4_t __s0_431 = __p0_431; \ float32x4_t __s1_431 = __p1_431; \ float32x4_t __s2_431 = __p2_431; \ __ret_431 = __s0_431 + __s1_431 * splatq_laneq_f32(__s2_431, __p3_431); \ __ret_431; \ }) #else #define vmlaq_laneq_f32(__p0_432, __p1_432, __p2_432, __p3_432) __extension__ ({ \ float32x4_t __ret_432; \ float32x4_t __s0_432 = __p0_432; \ float32x4_t __s1_432 = __p1_432; \ float32x4_t __s2_432 = __p2_432; \ float32x4_t __rev0_432; __rev0_432 = __builtin_shufflevector(__s0_432, __s0_432, 3, 2, 1, 0); \ float32x4_t __rev1_432; __rev1_432 = __builtin_shufflevector(__s1_432, __s1_432, 3, 2, 1, 0); \ float32x4_t __rev2_432; __rev2_432 = __builtin_shufflevector(__s2_432, __s2_432, 3, 2, 1, 0); \ __ret_432 = __rev0_432 + __rev1_432 * __noswap_splatq_laneq_f32(__rev2_432, __p3_432); \ __ret_432 = __builtin_shufflevector(__ret_432, __ret_432, 3, 2, 1, 0); \ __ret_432; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmlaq_laneq_s32(__p0_433, __p1_433, __p2_433, __p3_433) __extension__ ({ \ int32x4_t __ret_433; \ int32x4_t __s0_433 = __p0_433; \ int32x4_t __s1_433 = __p1_433; \ int32x4_t __s2_433 = __p2_433; \ __ret_433 = __s0_433 + __s1_433 * splatq_laneq_s32(__s2_433, __p3_433); \ __ret_433; \ }) #else #define vmlaq_laneq_s32(__p0_434, __p1_434, __p2_434, __p3_434) __extension__ ({ \ int32x4_t __ret_434; \ int32x4_t __s0_434 = __p0_434; \ int32x4_t __s1_434 = __p1_434; \ int32x4_t __s2_434 = __p2_434; \ int32x4_t __rev0_434; __rev0_434 = __builtin_shufflevector(__s0_434, __s0_434, 3, 2, 1, 0); \ int32x4_t __rev1_434; __rev1_434 = __builtin_shufflevector(__s1_434, __s1_434, 3, 2, 1, 0); \ int32x4_t __rev2_434; __rev2_434 = __builtin_shufflevector(__s2_434, __s2_434, 3, 2, 1, 0); \ __ret_434 = __rev0_434 + __rev1_434 * __noswap_splatq_laneq_s32(__rev2_434, __p3_434); \ __ret_434 = __builtin_shufflevector(__ret_434, __ret_434, 3, 2, 1, 0); \ __ret_434; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmlaq_laneq_s16(__p0_435, __p1_435, __p2_435, __p3_435) __extension__ ({ \ int16x8_t __ret_435; \ int16x8_t __s0_435 = __p0_435; \ int16x8_t __s1_435 = __p1_435; \ int16x8_t __s2_435 = __p2_435; \ __ret_435 = __s0_435 + __s1_435 * splatq_laneq_s16(__s2_435, __p3_435); \ __ret_435; \ }) #else #define vmlaq_laneq_s16(__p0_436, __p1_436, __p2_436, __p3_436) __extension__ ({ \ int16x8_t __ret_436; \ int16x8_t __s0_436 = __p0_436; \ int16x8_t __s1_436 = __p1_436; \ int16x8_t __s2_436 = __p2_436; \ int16x8_t __rev0_436; __rev0_436 = __builtin_shufflevector(__s0_436, __s0_436, 7, 6, 5, 4, 3, 2, 1, 0); \ int16x8_t __rev1_436; __rev1_436 = __builtin_shufflevector(__s1_436, __s1_436, 7, 6, 5, 4, 3, 2, 1, 0); \ int16x8_t __rev2_436; __rev2_436 = __builtin_shufflevector(__s2_436, __s2_436, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_436 = __rev0_436 + __rev1_436 * __noswap_splatq_laneq_s16(__rev2_436, __p3_436); \ __ret_436 = __builtin_shufflevector(__ret_436, __ret_436, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_436; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmla_laneq_u32(__p0_437, __p1_437, __p2_437, __p3_437) __extension__ ({ \ uint32x2_t __ret_437; \ uint32x2_t __s0_437 = __p0_437; \ uint32x2_t __s1_437 = __p1_437; \ uint32x4_t __s2_437 = __p2_437; \ __ret_437 = __s0_437 + __s1_437 * splat_laneq_u32(__s2_437, __p3_437); \ __ret_437; \ }) #else #define vmla_laneq_u32(__p0_438, __p1_438, __p2_438, __p3_438) __extension__ ({ \ uint32x2_t __ret_438; \ uint32x2_t __s0_438 = __p0_438; \ uint32x2_t __s1_438 = __p1_438; \ uint32x4_t __s2_438 = __p2_438; \ uint32x2_t __rev0_438; __rev0_438 = __builtin_shufflevector(__s0_438, __s0_438, 1, 0); \ uint32x2_t __rev1_438; __rev1_438 = __builtin_shufflevector(__s1_438, __s1_438, 1, 0); \ uint32x4_t __rev2_438; __rev2_438 = __builtin_shufflevector(__s2_438, __s2_438, 3, 2, 1, 0); \ __ret_438 = __rev0_438 + __rev1_438 * __noswap_splat_laneq_u32(__rev2_438, __p3_438); \ __ret_438 = __builtin_shufflevector(__ret_438, __ret_438, 1, 0); \ __ret_438; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmla_laneq_u16(__p0_439, __p1_439, __p2_439, __p3_439) __extension__ ({ \ uint16x4_t __ret_439; \ uint16x4_t __s0_439 = __p0_439; \ uint16x4_t __s1_439 = __p1_439; \ uint16x8_t __s2_439 = __p2_439; \ __ret_439 = __s0_439 + __s1_439 * splat_laneq_u16(__s2_439, __p3_439); \ __ret_439; \ }) #else #define vmla_laneq_u16(__p0_440, __p1_440, __p2_440, __p3_440) __extension__ ({ \ uint16x4_t __ret_440; \ uint16x4_t __s0_440 = __p0_440; \ uint16x4_t __s1_440 = __p1_440; \ uint16x8_t __s2_440 = __p2_440; \ uint16x4_t __rev0_440; __rev0_440 = __builtin_shufflevector(__s0_440, __s0_440, 3, 2, 1, 0); \ uint16x4_t __rev1_440; __rev1_440 = __builtin_shufflevector(__s1_440, __s1_440, 3, 2, 1, 0); \ uint16x8_t __rev2_440; __rev2_440 = __builtin_shufflevector(__s2_440, __s2_440, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_440 = __rev0_440 + __rev1_440 * __noswap_splat_laneq_u16(__rev2_440, __p3_440); \ __ret_440 = __builtin_shufflevector(__ret_440, __ret_440, 3, 2, 1, 0); \ __ret_440; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmla_laneq_f32(__p0_441, __p1_441, __p2_441, __p3_441) __extension__ ({ \ float32x2_t __ret_441; \ float32x2_t __s0_441 = __p0_441; \ float32x2_t __s1_441 = __p1_441; \ float32x4_t __s2_441 = __p2_441; \ __ret_441 = __s0_441 + __s1_441 * splat_laneq_f32(__s2_441, __p3_441); \ __ret_441; \ }) #else #define vmla_laneq_f32(__p0_442, __p1_442, __p2_442, __p3_442) __extension__ ({ \ float32x2_t __ret_442; \ float32x2_t __s0_442 = __p0_442; \ float32x2_t __s1_442 = __p1_442; \ float32x4_t __s2_442 = __p2_442; \ float32x2_t __rev0_442; __rev0_442 = __builtin_shufflevector(__s0_442, __s0_442, 1, 0); \ float32x2_t __rev1_442; __rev1_442 = __builtin_shufflevector(__s1_442, __s1_442, 1, 0); \ float32x4_t __rev2_442; __rev2_442 = __builtin_shufflevector(__s2_442, __s2_442, 3, 2, 1, 0); \ __ret_442 = __rev0_442 + __rev1_442 * __noswap_splat_laneq_f32(__rev2_442, __p3_442); \ __ret_442 = __builtin_shufflevector(__ret_442, __ret_442, 1, 0); \ __ret_442; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmla_laneq_s32(__p0_443, __p1_443, __p2_443, __p3_443) __extension__ ({ \ int32x2_t __ret_443; \ int32x2_t __s0_443 = __p0_443; \ int32x2_t __s1_443 = __p1_443; \ int32x4_t __s2_443 = __p2_443; \ __ret_443 = __s0_443 + __s1_443 * splat_laneq_s32(__s2_443, __p3_443); \ __ret_443; \ }) #else #define vmla_laneq_s32(__p0_444, __p1_444, __p2_444, __p3_444) __extension__ ({ \ int32x2_t __ret_444; \ int32x2_t __s0_444 = __p0_444; \ int32x2_t __s1_444 = __p1_444; \ int32x4_t __s2_444 = __p2_444; \ int32x2_t __rev0_444; __rev0_444 = __builtin_shufflevector(__s0_444, __s0_444, 1, 0); \ int32x2_t __rev1_444; __rev1_444 = __builtin_shufflevector(__s1_444, __s1_444, 1, 0); \ int32x4_t __rev2_444; __rev2_444 = __builtin_shufflevector(__s2_444, __s2_444, 3, 2, 1, 0); \ __ret_444 = __rev0_444 + __rev1_444 * __noswap_splat_laneq_s32(__rev2_444, __p3_444); \ __ret_444 = __builtin_shufflevector(__ret_444, __ret_444, 1, 0); \ __ret_444; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmla_laneq_s16(__p0_445, __p1_445, __p2_445, __p3_445) __extension__ ({ \ int16x4_t __ret_445; \ int16x4_t __s0_445 = __p0_445; \ int16x4_t __s1_445 = __p1_445; \ int16x8_t __s2_445 = __p2_445; \ __ret_445 = __s0_445 + __s1_445 * splat_laneq_s16(__s2_445, __p3_445); \ __ret_445; \ }) #else #define vmla_laneq_s16(__p0_446, __p1_446, __p2_446, __p3_446) __extension__ ({ \ int16x4_t __ret_446; \ int16x4_t __s0_446 = __p0_446; \ int16x4_t __s1_446 = __p1_446; \ int16x8_t __s2_446 = __p2_446; \ int16x4_t __rev0_446; __rev0_446 = __builtin_shufflevector(__s0_446, __s0_446, 3, 2, 1, 0); \ int16x4_t __rev1_446; __rev1_446 = __builtin_shufflevector(__s1_446, __s1_446, 3, 2, 1, 0); \ int16x8_t __rev2_446; __rev2_446 = __builtin_shufflevector(__s2_446, __s2_446, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_446 = __rev0_446 + __rev1_446 * __noswap_splat_laneq_s16(__rev2_446, __p3_446); \ __ret_446 = __builtin_shufflevector(__ret_446, __ret_446, 3, 2, 1, 0); \ __ret_446; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmlal_high_lane_u32(__p0_447, __p1_447, __p2_447, __p3_447) __extension__ ({ \ uint64x2_t __ret_447; \ uint64x2_t __s0_447 = __p0_447; \ uint32x4_t __s1_447 = __p1_447; \ uint32x2_t __s2_447 = __p2_447; \ __ret_447 = __s0_447 + vmull_u32(vget_high_u32(__s1_447), splat_lane_u32(__s2_447, __p3_447)); \ __ret_447; \ }) #else #define vmlal_high_lane_u32(__p0_448, __p1_448, __p2_448, __p3_448) __extension__ ({ \ uint64x2_t __ret_448; \ uint64x2_t __s0_448 = __p0_448; \ uint32x4_t __s1_448 = __p1_448; \ uint32x2_t __s2_448 = __p2_448; \ uint64x2_t __rev0_448; __rev0_448 = __builtin_shufflevector(__s0_448, __s0_448, 1, 0); \ uint32x4_t __rev1_448; __rev1_448 = __builtin_shufflevector(__s1_448, __s1_448, 3, 2, 1, 0); \ uint32x2_t __rev2_448; __rev2_448 = __builtin_shufflevector(__s2_448, __s2_448, 1, 0); \ __ret_448 = __rev0_448 + __noswap_vmull_u32(__noswap_vget_high_u32(__rev1_448), __noswap_splat_lane_u32(__rev2_448, __p3_448)); \ __ret_448 = __builtin_shufflevector(__ret_448, __ret_448, 1, 0); \ __ret_448; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmlal_high_lane_u16(__p0_449, __p1_449, __p2_449, __p3_449) __extension__ ({ \ uint32x4_t __ret_449; \ uint32x4_t __s0_449 = __p0_449; \ uint16x8_t __s1_449 = __p1_449; \ uint16x4_t __s2_449 = __p2_449; \ __ret_449 = __s0_449 + vmull_u16(vget_high_u16(__s1_449), splat_lane_u16(__s2_449, __p3_449)); \ __ret_449; \ }) #else #define vmlal_high_lane_u16(__p0_450, __p1_450, __p2_450, __p3_450) __extension__ ({ \ uint32x4_t __ret_450; \ uint32x4_t __s0_450 = __p0_450; \ uint16x8_t __s1_450 = __p1_450; \ uint16x4_t __s2_450 = __p2_450; \ uint32x4_t __rev0_450; __rev0_450 = __builtin_shufflevector(__s0_450, __s0_450, 3, 2, 1, 0); \ uint16x8_t __rev1_450; __rev1_450 = __builtin_shufflevector(__s1_450, __s1_450, 7, 6, 5, 4, 3, 2, 1, 0); \ uint16x4_t __rev2_450; __rev2_450 = __builtin_shufflevector(__s2_450, __s2_450, 3, 2, 1, 0); \ __ret_450 = __rev0_450 + __noswap_vmull_u16(__noswap_vget_high_u16(__rev1_450), __noswap_splat_lane_u16(__rev2_450, __p3_450)); \ __ret_450 = __builtin_shufflevector(__ret_450, __ret_450, 3, 2, 1, 0); \ __ret_450; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmlal_high_lane_s32(__p0_451, __p1_451, __p2_451, __p3_451) __extension__ ({ \ int64x2_t __ret_451; \ int64x2_t __s0_451 = __p0_451; \ int32x4_t __s1_451 = __p1_451; \ int32x2_t __s2_451 = __p2_451; \ __ret_451 = __s0_451 + vmull_s32(vget_high_s32(__s1_451), splat_lane_s32(__s2_451, __p3_451)); \ __ret_451; \ }) #else #define vmlal_high_lane_s32(__p0_452, __p1_452, __p2_452, __p3_452) __extension__ ({ \ int64x2_t __ret_452; \ int64x2_t __s0_452 = __p0_452; \ int32x4_t __s1_452 = __p1_452; \ int32x2_t __s2_452 = __p2_452; \ int64x2_t __rev0_452; __rev0_452 = __builtin_shufflevector(__s0_452, __s0_452, 1, 0); \ int32x4_t __rev1_452; __rev1_452 = __builtin_shufflevector(__s1_452, __s1_452, 3, 2, 1, 0); \ int32x2_t __rev2_452; __rev2_452 = __builtin_shufflevector(__s2_452, __s2_452, 1, 0); \ __ret_452 = __rev0_452 + __noswap_vmull_s32(__noswap_vget_high_s32(__rev1_452), __noswap_splat_lane_s32(__rev2_452, __p3_452)); \ __ret_452 = __builtin_shufflevector(__ret_452, __ret_452, 1, 0); \ __ret_452; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmlal_high_lane_s16(__p0_453, __p1_453, __p2_453, __p3_453) __extension__ ({ \ int32x4_t __ret_453; \ int32x4_t __s0_453 = __p0_453; \ int16x8_t __s1_453 = __p1_453; \ int16x4_t __s2_453 = __p2_453; \ __ret_453 = __s0_453 + vmull_s16(vget_high_s16(__s1_453), splat_lane_s16(__s2_453, __p3_453)); \ __ret_453; \ }) #else #define vmlal_high_lane_s16(__p0_454, __p1_454, __p2_454, __p3_454) __extension__ ({ \ int32x4_t __ret_454; \ int32x4_t __s0_454 = __p0_454; \ int16x8_t __s1_454 = __p1_454; \ int16x4_t __s2_454 = __p2_454; \ int32x4_t __rev0_454; __rev0_454 = __builtin_shufflevector(__s0_454, __s0_454, 3, 2, 1, 0); \ int16x8_t __rev1_454; __rev1_454 = __builtin_shufflevector(__s1_454, __s1_454, 7, 6, 5, 4, 3, 2, 1, 0); \ int16x4_t __rev2_454; __rev2_454 = __builtin_shufflevector(__s2_454, __s2_454, 3, 2, 1, 0); \ __ret_454 = __rev0_454 + __noswap_vmull_s16(__noswap_vget_high_s16(__rev1_454), __noswap_splat_lane_s16(__rev2_454, __p3_454)); \ __ret_454 = __builtin_shufflevector(__ret_454, __ret_454, 3, 2, 1, 0); \ __ret_454; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmlal_high_laneq_u32(__p0_455, __p1_455, __p2_455, __p3_455) __extension__ ({ \ uint64x2_t __ret_455; \ uint64x2_t __s0_455 = __p0_455; \ uint32x4_t __s1_455 = __p1_455; \ uint32x4_t __s2_455 = __p2_455; \ __ret_455 = __s0_455 + vmull_u32(vget_high_u32(__s1_455), splat_laneq_u32(__s2_455, __p3_455)); \ __ret_455; \ }) #else #define vmlal_high_laneq_u32(__p0_456, __p1_456, __p2_456, __p3_456) __extension__ ({ \ uint64x2_t __ret_456; \ uint64x2_t __s0_456 = __p0_456; \ uint32x4_t __s1_456 = __p1_456; \ uint32x4_t __s2_456 = __p2_456; \ uint64x2_t __rev0_456; __rev0_456 = __builtin_shufflevector(__s0_456, __s0_456, 1, 0); \ uint32x4_t __rev1_456; __rev1_456 = __builtin_shufflevector(__s1_456, __s1_456, 3, 2, 1, 0); \ uint32x4_t __rev2_456; __rev2_456 = __builtin_shufflevector(__s2_456, __s2_456, 3, 2, 1, 0); \ __ret_456 = __rev0_456 + __noswap_vmull_u32(__noswap_vget_high_u32(__rev1_456), __noswap_splat_laneq_u32(__rev2_456, __p3_456)); \ __ret_456 = __builtin_shufflevector(__ret_456, __ret_456, 1, 0); \ __ret_456; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmlal_high_laneq_u16(__p0_457, __p1_457, __p2_457, __p3_457) __extension__ ({ \ uint32x4_t __ret_457; \ uint32x4_t __s0_457 = __p0_457; \ uint16x8_t __s1_457 = __p1_457; \ uint16x8_t __s2_457 = __p2_457; \ __ret_457 = __s0_457 + vmull_u16(vget_high_u16(__s1_457), splat_laneq_u16(__s2_457, __p3_457)); \ __ret_457; \ }) #else #define vmlal_high_laneq_u16(__p0_458, __p1_458, __p2_458, __p3_458) __extension__ ({ \ uint32x4_t __ret_458; \ uint32x4_t __s0_458 = __p0_458; \ uint16x8_t __s1_458 = __p1_458; \ uint16x8_t __s2_458 = __p2_458; \ uint32x4_t __rev0_458; __rev0_458 = __builtin_shufflevector(__s0_458, __s0_458, 3, 2, 1, 0); \ uint16x8_t __rev1_458; __rev1_458 = __builtin_shufflevector(__s1_458, __s1_458, 7, 6, 5, 4, 3, 2, 1, 0); \ uint16x8_t __rev2_458; __rev2_458 = __builtin_shufflevector(__s2_458, __s2_458, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_458 = __rev0_458 + __noswap_vmull_u16(__noswap_vget_high_u16(__rev1_458), __noswap_splat_laneq_u16(__rev2_458, __p3_458)); \ __ret_458 = __builtin_shufflevector(__ret_458, __ret_458, 3, 2, 1, 0); \ __ret_458; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmlal_high_laneq_s32(__p0_459, __p1_459, __p2_459, __p3_459) __extension__ ({ \ int64x2_t __ret_459; \ int64x2_t __s0_459 = __p0_459; \ int32x4_t __s1_459 = __p1_459; \ int32x4_t __s2_459 = __p2_459; \ __ret_459 = __s0_459 + vmull_s32(vget_high_s32(__s1_459), splat_laneq_s32(__s2_459, __p3_459)); \ __ret_459; \ }) #else #define vmlal_high_laneq_s32(__p0_460, __p1_460, __p2_460, __p3_460) __extension__ ({ \ int64x2_t __ret_460; \ int64x2_t __s0_460 = __p0_460; \ int32x4_t __s1_460 = __p1_460; \ int32x4_t __s2_460 = __p2_460; \ int64x2_t __rev0_460; __rev0_460 = __builtin_shufflevector(__s0_460, __s0_460, 1, 0); \ int32x4_t __rev1_460; __rev1_460 = __builtin_shufflevector(__s1_460, __s1_460, 3, 2, 1, 0); \ int32x4_t __rev2_460; __rev2_460 = __builtin_shufflevector(__s2_460, __s2_460, 3, 2, 1, 0); \ __ret_460 = __rev0_460 + __noswap_vmull_s32(__noswap_vget_high_s32(__rev1_460), __noswap_splat_laneq_s32(__rev2_460, __p3_460)); \ __ret_460 = __builtin_shufflevector(__ret_460, __ret_460, 1, 0); \ __ret_460; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmlal_high_laneq_s16(__p0_461, __p1_461, __p2_461, __p3_461) __extension__ ({ \ int32x4_t __ret_461; \ int32x4_t __s0_461 = __p0_461; \ int16x8_t __s1_461 = __p1_461; \ int16x8_t __s2_461 = __p2_461; \ __ret_461 = __s0_461 + vmull_s16(vget_high_s16(__s1_461), splat_laneq_s16(__s2_461, __p3_461)); \ __ret_461; \ }) #else #define vmlal_high_laneq_s16(__p0_462, __p1_462, __p2_462, __p3_462) __extension__ ({ \ int32x4_t __ret_462; \ int32x4_t __s0_462 = __p0_462; \ int16x8_t __s1_462 = __p1_462; \ int16x8_t __s2_462 = __p2_462; \ int32x4_t __rev0_462; __rev0_462 = __builtin_shufflevector(__s0_462, __s0_462, 3, 2, 1, 0); \ int16x8_t __rev1_462; __rev1_462 = __builtin_shufflevector(__s1_462, __s1_462, 7, 6, 5, 4, 3, 2, 1, 0); \ int16x8_t __rev2_462; __rev2_462 = __builtin_shufflevector(__s2_462, __s2_462, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_462 = __rev0_462 + __noswap_vmull_s16(__noswap_vget_high_s16(__rev1_462), __noswap_splat_laneq_s16(__rev2_462, __p3_462)); \ __ret_462 = __builtin_shufflevector(__ret_462, __ret_462, 3, 2, 1, 0); \ __ret_462; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmlal_laneq_u32(__p0_463, __p1_463, __p2_463, __p3_463) __extension__ ({ \ uint64x2_t __ret_463; \ uint64x2_t __s0_463 = __p0_463; \ uint32x2_t __s1_463 = __p1_463; \ uint32x4_t __s2_463 = __p2_463; \ __ret_463 = __s0_463 + vmull_u32(__s1_463, splat_laneq_u32(__s2_463, __p3_463)); \ __ret_463; \ }) #else #define vmlal_laneq_u32(__p0_464, __p1_464, __p2_464, __p3_464) __extension__ ({ \ uint64x2_t __ret_464; \ uint64x2_t __s0_464 = __p0_464; \ uint32x2_t __s1_464 = __p1_464; \ uint32x4_t __s2_464 = __p2_464; \ uint64x2_t __rev0_464; __rev0_464 = __builtin_shufflevector(__s0_464, __s0_464, 1, 0); \ uint32x2_t __rev1_464; __rev1_464 = __builtin_shufflevector(__s1_464, __s1_464, 1, 0); \ uint32x4_t __rev2_464; __rev2_464 = __builtin_shufflevector(__s2_464, __s2_464, 3, 2, 1, 0); \ __ret_464 = __rev0_464 + __noswap_vmull_u32(__rev1_464, __noswap_splat_laneq_u32(__rev2_464, __p3_464)); \ __ret_464 = __builtin_shufflevector(__ret_464, __ret_464, 1, 0); \ __ret_464; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmlal_laneq_u16(__p0_465, __p1_465, __p2_465, __p3_465) __extension__ ({ \ uint32x4_t __ret_465; \ uint32x4_t __s0_465 = __p0_465; \ uint16x4_t __s1_465 = __p1_465; \ uint16x8_t __s2_465 = __p2_465; \ __ret_465 = __s0_465 + vmull_u16(__s1_465, splat_laneq_u16(__s2_465, __p3_465)); \ __ret_465; \ }) #else #define vmlal_laneq_u16(__p0_466, __p1_466, __p2_466, __p3_466) __extension__ ({ \ uint32x4_t __ret_466; \ uint32x4_t __s0_466 = __p0_466; \ uint16x4_t __s1_466 = __p1_466; \ uint16x8_t __s2_466 = __p2_466; \ uint32x4_t __rev0_466; __rev0_466 = __builtin_shufflevector(__s0_466, __s0_466, 3, 2, 1, 0); \ uint16x4_t __rev1_466; __rev1_466 = __builtin_shufflevector(__s1_466, __s1_466, 3, 2, 1, 0); \ uint16x8_t __rev2_466; __rev2_466 = __builtin_shufflevector(__s2_466, __s2_466, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_466 = __rev0_466 + __noswap_vmull_u16(__rev1_466, __noswap_splat_laneq_u16(__rev2_466, __p3_466)); \ __ret_466 = __builtin_shufflevector(__ret_466, __ret_466, 3, 2, 1, 0); \ __ret_466; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmlal_laneq_s32(__p0_467, __p1_467, __p2_467, __p3_467) __extension__ ({ \ int64x2_t __ret_467; \ int64x2_t __s0_467 = __p0_467; \ int32x2_t __s1_467 = __p1_467; \ int32x4_t __s2_467 = __p2_467; \ __ret_467 = __s0_467 + vmull_s32(__s1_467, splat_laneq_s32(__s2_467, __p3_467)); \ __ret_467; \ }) #else #define vmlal_laneq_s32(__p0_468, __p1_468, __p2_468, __p3_468) __extension__ ({ \ int64x2_t __ret_468; \ int64x2_t __s0_468 = __p0_468; \ int32x2_t __s1_468 = __p1_468; \ int32x4_t __s2_468 = __p2_468; \ int64x2_t __rev0_468; __rev0_468 = __builtin_shufflevector(__s0_468, __s0_468, 1, 0); \ int32x2_t __rev1_468; __rev1_468 = __builtin_shufflevector(__s1_468, __s1_468, 1, 0); \ int32x4_t __rev2_468; __rev2_468 = __builtin_shufflevector(__s2_468, __s2_468, 3, 2, 1, 0); \ __ret_468 = __rev0_468 + __noswap_vmull_s32(__rev1_468, __noswap_splat_laneq_s32(__rev2_468, __p3_468)); \ __ret_468 = __builtin_shufflevector(__ret_468, __ret_468, 1, 0); \ __ret_468; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmlal_laneq_s16(__p0_469, __p1_469, __p2_469, __p3_469) __extension__ ({ \ int32x4_t __ret_469; \ int32x4_t __s0_469 = __p0_469; \ int16x4_t __s1_469 = __p1_469; \ int16x8_t __s2_469 = __p2_469; \ __ret_469 = __s0_469 + vmull_s16(__s1_469, splat_laneq_s16(__s2_469, __p3_469)); \ __ret_469; \ }) #else #define vmlal_laneq_s16(__p0_470, __p1_470, __p2_470, __p3_470) __extension__ ({ \ int32x4_t __ret_470; \ int32x4_t __s0_470 = __p0_470; \ int16x4_t __s1_470 = __p1_470; \ int16x8_t __s2_470 = __p2_470; \ int32x4_t __rev0_470; __rev0_470 = __builtin_shufflevector(__s0_470, __s0_470, 3, 2, 1, 0); \ int16x4_t __rev1_470; __rev1_470 = __builtin_shufflevector(__s1_470, __s1_470, 3, 2, 1, 0); \ int16x8_t __rev2_470; __rev2_470 = __builtin_shufflevector(__s2_470, __s2_470, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_470 = __rev0_470 + __noswap_vmull_s16(__rev1_470, __noswap_splat_laneq_s16(__rev2_470, __p3_470)); \ __ret_470 = __builtin_shufflevector(__ret_470, __ret_470, 3, 2, 1, 0); \ __ret_470; \ }) #endif #ifdef __LITTLE_ENDIAN__ __ai float64x2_t vmlsq_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { float64x2_t __ret; __ret = __p0 - __p1 * __p2; return __ret; } #else __ai float64x2_t vmlsq_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { float64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); float64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); __ret = __rev0 - __rev1 * __rev2; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai float64x1_t vmls_f64(float64x1_t __p0, float64x1_t __p1, float64x1_t __p2) { float64x1_t __ret; __ret = __p0 - __p1 * __p2; return __ret; } #ifdef __LITTLE_ENDIAN__ #define vmlsq_laneq_u32(__p0_471, __p1_471, __p2_471, __p3_471) __extension__ ({ \ uint32x4_t __ret_471; \ uint32x4_t __s0_471 = __p0_471; \ uint32x4_t __s1_471 = __p1_471; \ uint32x4_t __s2_471 = __p2_471; \ __ret_471 = __s0_471 - __s1_471 * splatq_laneq_u32(__s2_471, __p3_471); \ __ret_471; \ }) #else #define vmlsq_laneq_u32(__p0_472, __p1_472, __p2_472, __p3_472) __extension__ ({ \ uint32x4_t __ret_472; \ uint32x4_t __s0_472 = __p0_472; \ uint32x4_t __s1_472 = __p1_472; \ uint32x4_t __s2_472 = __p2_472; \ uint32x4_t __rev0_472; __rev0_472 = __builtin_shufflevector(__s0_472, __s0_472, 3, 2, 1, 0); \ uint32x4_t __rev1_472; __rev1_472 = __builtin_shufflevector(__s1_472, __s1_472, 3, 2, 1, 0); \ uint32x4_t __rev2_472; __rev2_472 = __builtin_shufflevector(__s2_472, __s2_472, 3, 2, 1, 0); \ __ret_472 = __rev0_472 - __rev1_472 * __noswap_splatq_laneq_u32(__rev2_472, __p3_472); \ __ret_472 = __builtin_shufflevector(__ret_472, __ret_472, 3, 2, 1, 0); \ __ret_472; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmlsq_laneq_u16(__p0_473, __p1_473, __p2_473, __p3_473) __extension__ ({ \ uint16x8_t __ret_473; \ uint16x8_t __s0_473 = __p0_473; \ uint16x8_t __s1_473 = __p1_473; \ uint16x8_t __s2_473 = __p2_473; \ __ret_473 = __s0_473 - __s1_473 * splatq_laneq_u16(__s2_473, __p3_473); \ __ret_473; \ }) #else #define vmlsq_laneq_u16(__p0_474, __p1_474, __p2_474, __p3_474) __extension__ ({ \ uint16x8_t __ret_474; \ uint16x8_t __s0_474 = __p0_474; \ uint16x8_t __s1_474 = __p1_474; \ uint16x8_t __s2_474 = __p2_474; \ uint16x8_t __rev0_474; __rev0_474 = __builtin_shufflevector(__s0_474, __s0_474, 7, 6, 5, 4, 3, 2, 1, 0); \ uint16x8_t __rev1_474; __rev1_474 = __builtin_shufflevector(__s1_474, __s1_474, 7, 6, 5, 4, 3, 2, 1, 0); \ uint16x8_t __rev2_474; __rev2_474 = __builtin_shufflevector(__s2_474, __s2_474, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_474 = __rev0_474 - __rev1_474 * __noswap_splatq_laneq_u16(__rev2_474, __p3_474); \ __ret_474 = __builtin_shufflevector(__ret_474, __ret_474, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_474; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmlsq_laneq_f32(__p0_475, __p1_475, __p2_475, __p3_475) __extension__ ({ \ float32x4_t __ret_475; \ float32x4_t __s0_475 = __p0_475; \ float32x4_t __s1_475 = __p1_475; \ float32x4_t __s2_475 = __p2_475; \ __ret_475 = __s0_475 - __s1_475 * splatq_laneq_f32(__s2_475, __p3_475); \ __ret_475; \ }) #else #define vmlsq_laneq_f32(__p0_476, __p1_476, __p2_476, __p3_476) __extension__ ({ \ float32x4_t __ret_476; \ float32x4_t __s0_476 = __p0_476; \ float32x4_t __s1_476 = __p1_476; \ float32x4_t __s2_476 = __p2_476; \ float32x4_t __rev0_476; __rev0_476 = __builtin_shufflevector(__s0_476, __s0_476, 3, 2, 1, 0); \ float32x4_t __rev1_476; __rev1_476 = __builtin_shufflevector(__s1_476, __s1_476, 3, 2, 1, 0); \ float32x4_t __rev2_476; __rev2_476 = __builtin_shufflevector(__s2_476, __s2_476, 3, 2, 1, 0); \ __ret_476 = __rev0_476 - __rev1_476 * __noswap_splatq_laneq_f32(__rev2_476, __p3_476); \ __ret_476 = __builtin_shufflevector(__ret_476, __ret_476, 3, 2, 1, 0); \ __ret_476; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmlsq_laneq_s32(__p0_477, __p1_477, __p2_477, __p3_477) __extension__ ({ \ int32x4_t __ret_477; \ int32x4_t __s0_477 = __p0_477; \ int32x4_t __s1_477 = __p1_477; \ int32x4_t __s2_477 = __p2_477; \ __ret_477 = __s0_477 - __s1_477 * splatq_laneq_s32(__s2_477, __p3_477); \ __ret_477; \ }) #else #define vmlsq_laneq_s32(__p0_478, __p1_478, __p2_478, __p3_478) __extension__ ({ \ int32x4_t __ret_478; \ int32x4_t __s0_478 = __p0_478; \ int32x4_t __s1_478 = __p1_478; \ int32x4_t __s2_478 = __p2_478; \ int32x4_t __rev0_478; __rev0_478 = __builtin_shufflevector(__s0_478, __s0_478, 3, 2, 1, 0); \ int32x4_t __rev1_478; __rev1_478 = __builtin_shufflevector(__s1_478, __s1_478, 3, 2, 1, 0); \ int32x4_t __rev2_478; __rev2_478 = __builtin_shufflevector(__s2_478, __s2_478, 3, 2, 1, 0); \ __ret_478 = __rev0_478 - __rev1_478 * __noswap_splatq_laneq_s32(__rev2_478, __p3_478); \ __ret_478 = __builtin_shufflevector(__ret_478, __ret_478, 3, 2, 1, 0); \ __ret_478; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmlsq_laneq_s16(__p0_479, __p1_479, __p2_479, __p3_479) __extension__ ({ \ int16x8_t __ret_479; \ int16x8_t __s0_479 = __p0_479; \ int16x8_t __s1_479 = __p1_479; \ int16x8_t __s2_479 = __p2_479; \ __ret_479 = __s0_479 - __s1_479 * splatq_laneq_s16(__s2_479, __p3_479); \ __ret_479; \ }) #else #define vmlsq_laneq_s16(__p0_480, __p1_480, __p2_480, __p3_480) __extension__ ({ \ int16x8_t __ret_480; \ int16x8_t __s0_480 = __p0_480; \ int16x8_t __s1_480 = __p1_480; \ int16x8_t __s2_480 = __p2_480; \ int16x8_t __rev0_480; __rev0_480 = __builtin_shufflevector(__s0_480, __s0_480, 7, 6, 5, 4, 3, 2, 1, 0); \ int16x8_t __rev1_480; __rev1_480 = __builtin_shufflevector(__s1_480, __s1_480, 7, 6, 5, 4, 3, 2, 1, 0); \ int16x8_t __rev2_480; __rev2_480 = __builtin_shufflevector(__s2_480, __s2_480, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_480 = __rev0_480 - __rev1_480 * __noswap_splatq_laneq_s16(__rev2_480, __p3_480); \ __ret_480 = __builtin_shufflevector(__ret_480, __ret_480, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_480; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmls_laneq_u32(__p0_481, __p1_481, __p2_481, __p3_481) __extension__ ({ \ uint32x2_t __ret_481; \ uint32x2_t __s0_481 = __p0_481; \ uint32x2_t __s1_481 = __p1_481; \ uint32x4_t __s2_481 = __p2_481; \ __ret_481 = __s0_481 - __s1_481 * splat_laneq_u32(__s2_481, __p3_481); \ __ret_481; \ }) #else #define vmls_laneq_u32(__p0_482, __p1_482, __p2_482, __p3_482) __extension__ ({ \ uint32x2_t __ret_482; \ uint32x2_t __s0_482 = __p0_482; \ uint32x2_t __s1_482 = __p1_482; \ uint32x4_t __s2_482 = __p2_482; \ uint32x2_t __rev0_482; __rev0_482 = __builtin_shufflevector(__s0_482, __s0_482, 1, 0); \ uint32x2_t __rev1_482; __rev1_482 = __builtin_shufflevector(__s1_482, __s1_482, 1, 0); \ uint32x4_t __rev2_482; __rev2_482 = __builtin_shufflevector(__s2_482, __s2_482, 3, 2, 1, 0); \ __ret_482 = __rev0_482 - __rev1_482 * __noswap_splat_laneq_u32(__rev2_482, __p3_482); \ __ret_482 = __builtin_shufflevector(__ret_482, __ret_482, 1, 0); \ __ret_482; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmls_laneq_u16(__p0_483, __p1_483, __p2_483, __p3_483) __extension__ ({ \ uint16x4_t __ret_483; \ uint16x4_t __s0_483 = __p0_483; \ uint16x4_t __s1_483 = __p1_483; \ uint16x8_t __s2_483 = __p2_483; \ __ret_483 = __s0_483 - __s1_483 * splat_laneq_u16(__s2_483, __p3_483); \ __ret_483; \ }) #else #define vmls_laneq_u16(__p0_484, __p1_484, __p2_484, __p3_484) __extension__ ({ \ uint16x4_t __ret_484; \ uint16x4_t __s0_484 = __p0_484; \ uint16x4_t __s1_484 = __p1_484; \ uint16x8_t __s2_484 = __p2_484; \ uint16x4_t __rev0_484; __rev0_484 = __builtin_shufflevector(__s0_484, __s0_484, 3, 2, 1, 0); \ uint16x4_t __rev1_484; __rev1_484 = __builtin_shufflevector(__s1_484, __s1_484, 3, 2, 1, 0); \ uint16x8_t __rev2_484; __rev2_484 = __builtin_shufflevector(__s2_484, __s2_484, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_484 = __rev0_484 - __rev1_484 * __noswap_splat_laneq_u16(__rev2_484, __p3_484); \ __ret_484 = __builtin_shufflevector(__ret_484, __ret_484, 3, 2, 1, 0); \ __ret_484; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmls_laneq_f32(__p0_485, __p1_485, __p2_485, __p3_485) __extension__ ({ \ float32x2_t __ret_485; \ float32x2_t __s0_485 = __p0_485; \ float32x2_t __s1_485 = __p1_485; \ float32x4_t __s2_485 = __p2_485; \ __ret_485 = __s0_485 - __s1_485 * splat_laneq_f32(__s2_485, __p3_485); \ __ret_485; \ }) #else #define vmls_laneq_f32(__p0_486, __p1_486, __p2_486, __p3_486) __extension__ ({ \ float32x2_t __ret_486; \ float32x2_t __s0_486 = __p0_486; \ float32x2_t __s1_486 = __p1_486; \ float32x4_t __s2_486 = __p2_486; \ float32x2_t __rev0_486; __rev0_486 = __builtin_shufflevector(__s0_486, __s0_486, 1, 0); \ float32x2_t __rev1_486; __rev1_486 = __builtin_shufflevector(__s1_486, __s1_486, 1, 0); \ float32x4_t __rev2_486; __rev2_486 = __builtin_shufflevector(__s2_486, __s2_486, 3, 2, 1, 0); \ __ret_486 = __rev0_486 - __rev1_486 * __noswap_splat_laneq_f32(__rev2_486, __p3_486); \ __ret_486 = __builtin_shufflevector(__ret_486, __ret_486, 1, 0); \ __ret_486; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmls_laneq_s32(__p0_487, __p1_487, __p2_487, __p3_487) __extension__ ({ \ int32x2_t __ret_487; \ int32x2_t __s0_487 = __p0_487; \ int32x2_t __s1_487 = __p1_487; \ int32x4_t __s2_487 = __p2_487; \ __ret_487 = __s0_487 - __s1_487 * splat_laneq_s32(__s2_487, __p3_487); \ __ret_487; \ }) #else #define vmls_laneq_s32(__p0_488, __p1_488, __p2_488, __p3_488) __extension__ ({ \ int32x2_t __ret_488; \ int32x2_t __s0_488 = __p0_488; \ int32x2_t __s1_488 = __p1_488; \ int32x4_t __s2_488 = __p2_488; \ int32x2_t __rev0_488; __rev0_488 = __builtin_shufflevector(__s0_488, __s0_488, 1, 0); \ int32x2_t __rev1_488; __rev1_488 = __builtin_shufflevector(__s1_488, __s1_488, 1, 0); \ int32x4_t __rev2_488; __rev2_488 = __builtin_shufflevector(__s2_488, __s2_488, 3, 2, 1, 0); \ __ret_488 = __rev0_488 - __rev1_488 * __noswap_splat_laneq_s32(__rev2_488, __p3_488); \ __ret_488 = __builtin_shufflevector(__ret_488, __ret_488, 1, 0); \ __ret_488; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmls_laneq_s16(__p0_489, __p1_489, __p2_489, __p3_489) __extension__ ({ \ int16x4_t __ret_489; \ int16x4_t __s0_489 = __p0_489; \ int16x4_t __s1_489 = __p1_489; \ int16x8_t __s2_489 = __p2_489; \ __ret_489 = __s0_489 - __s1_489 * splat_laneq_s16(__s2_489, __p3_489); \ __ret_489; \ }) #else #define vmls_laneq_s16(__p0_490, __p1_490, __p2_490, __p3_490) __extension__ ({ \ int16x4_t __ret_490; \ int16x4_t __s0_490 = __p0_490; \ int16x4_t __s1_490 = __p1_490; \ int16x8_t __s2_490 = __p2_490; \ int16x4_t __rev0_490; __rev0_490 = __builtin_shufflevector(__s0_490, __s0_490, 3, 2, 1, 0); \ int16x4_t __rev1_490; __rev1_490 = __builtin_shufflevector(__s1_490, __s1_490, 3, 2, 1, 0); \ int16x8_t __rev2_490; __rev2_490 = __builtin_shufflevector(__s2_490, __s2_490, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_490 = __rev0_490 - __rev1_490 * __noswap_splat_laneq_s16(__rev2_490, __p3_490); \ __ret_490 = __builtin_shufflevector(__ret_490, __ret_490, 3, 2, 1, 0); \ __ret_490; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmlsl_high_lane_u32(__p0_491, __p1_491, __p2_491, __p3_491) __extension__ ({ \ uint64x2_t __ret_491; \ uint64x2_t __s0_491 = __p0_491; \ uint32x4_t __s1_491 = __p1_491; \ uint32x2_t __s2_491 = __p2_491; \ __ret_491 = __s0_491 - vmull_u32(vget_high_u32(__s1_491), splat_lane_u32(__s2_491, __p3_491)); \ __ret_491; \ }) #else #define vmlsl_high_lane_u32(__p0_492, __p1_492, __p2_492, __p3_492) __extension__ ({ \ uint64x2_t __ret_492; \ uint64x2_t __s0_492 = __p0_492; \ uint32x4_t __s1_492 = __p1_492; \ uint32x2_t __s2_492 = __p2_492; \ uint64x2_t __rev0_492; __rev0_492 = __builtin_shufflevector(__s0_492, __s0_492, 1, 0); \ uint32x4_t __rev1_492; __rev1_492 = __builtin_shufflevector(__s1_492, __s1_492, 3, 2, 1, 0); \ uint32x2_t __rev2_492; __rev2_492 = __builtin_shufflevector(__s2_492, __s2_492, 1, 0); \ __ret_492 = __rev0_492 - __noswap_vmull_u32(__noswap_vget_high_u32(__rev1_492), __noswap_splat_lane_u32(__rev2_492, __p3_492)); \ __ret_492 = __builtin_shufflevector(__ret_492, __ret_492, 1, 0); \ __ret_492; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmlsl_high_lane_u16(__p0_493, __p1_493, __p2_493, __p3_493) __extension__ ({ \ uint32x4_t __ret_493; \ uint32x4_t __s0_493 = __p0_493; \ uint16x8_t __s1_493 = __p1_493; \ uint16x4_t __s2_493 = __p2_493; \ __ret_493 = __s0_493 - vmull_u16(vget_high_u16(__s1_493), splat_lane_u16(__s2_493, __p3_493)); \ __ret_493; \ }) #else #define vmlsl_high_lane_u16(__p0_494, __p1_494, __p2_494, __p3_494) __extension__ ({ \ uint32x4_t __ret_494; \ uint32x4_t __s0_494 = __p0_494; \ uint16x8_t __s1_494 = __p1_494; \ uint16x4_t __s2_494 = __p2_494; \ uint32x4_t __rev0_494; __rev0_494 = __builtin_shufflevector(__s0_494, __s0_494, 3, 2, 1, 0); \ uint16x8_t __rev1_494; __rev1_494 = __builtin_shufflevector(__s1_494, __s1_494, 7, 6, 5, 4, 3, 2, 1, 0); \ uint16x4_t __rev2_494; __rev2_494 = __builtin_shufflevector(__s2_494, __s2_494, 3, 2, 1, 0); \ __ret_494 = __rev0_494 - __noswap_vmull_u16(__noswap_vget_high_u16(__rev1_494), __noswap_splat_lane_u16(__rev2_494, __p3_494)); \ __ret_494 = __builtin_shufflevector(__ret_494, __ret_494, 3, 2, 1, 0); \ __ret_494; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmlsl_high_lane_s32(__p0_495, __p1_495, __p2_495, __p3_495) __extension__ ({ \ int64x2_t __ret_495; \ int64x2_t __s0_495 = __p0_495; \ int32x4_t __s1_495 = __p1_495; \ int32x2_t __s2_495 = __p2_495; \ __ret_495 = __s0_495 - vmull_s32(vget_high_s32(__s1_495), splat_lane_s32(__s2_495, __p3_495)); \ __ret_495; \ }) #else #define vmlsl_high_lane_s32(__p0_496, __p1_496, __p2_496, __p3_496) __extension__ ({ \ int64x2_t __ret_496; \ int64x2_t __s0_496 = __p0_496; \ int32x4_t __s1_496 = __p1_496; \ int32x2_t __s2_496 = __p2_496; \ int64x2_t __rev0_496; __rev0_496 = __builtin_shufflevector(__s0_496, __s0_496, 1, 0); \ int32x4_t __rev1_496; __rev1_496 = __builtin_shufflevector(__s1_496, __s1_496, 3, 2, 1, 0); \ int32x2_t __rev2_496; __rev2_496 = __builtin_shufflevector(__s2_496, __s2_496, 1, 0); \ __ret_496 = __rev0_496 - __noswap_vmull_s32(__noswap_vget_high_s32(__rev1_496), __noswap_splat_lane_s32(__rev2_496, __p3_496)); \ __ret_496 = __builtin_shufflevector(__ret_496, __ret_496, 1, 0); \ __ret_496; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmlsl_high_lane_s16(__p0_497, __p1_497, __p2_497, __p3_497) __extension__ ({ \ int32x4_t __ret_497; \ int32x4_t __s0_497 = __p0_497; \ int16x8_t __s1_497 = __p1_497; \ int16x4_t __s2_497 = __p2_497; \ __ret_497 = __s0_497 - vmull_s16(vget_high_s16(__s1_497), splat_lane_s16(__s2_497, __p3_497)); \ __ret_497; \ }) #else #define vmlsl_high_lane_s16(__p0_498, __p1_498, __p2_498, __p3_498) __extension__ ({ \ int32x4_t __ret_498; \ int32x4_t __s0_498 = __p0_498; \ int16x8_t __s1_498 = __p1_498; \ int16x4_t __s2_498 = __p2_498; \ int32x4_t __rev0_498; __rev0_498 = __builtin_shufflevector(__s0_498, __s0_498, 3, 2, 1, 0); \ int16x8_t __rev1_498; __rev1_498 = __builtin_shufflevector(__s1_498, __s1_498, 7, 6, 5, 4, 3, 2, 1, 0); \ int16x4_t __rev2_498; __rev2_498 = __builtin_shufflevector(__s2_498, __s2_498, 3, 2, 1, 0); \ __ret_498 = __rev0_498 - __noswap_vmull_s16(__noswap_vget_high_s16(__rev1_498), __noswap_splat_lane_s16(__rev2_498, __p3_498)); \ __ret_498 = __builtin_shufflevector(__ret_498, __ret_498, 3, 2, 1, 0); \ __ret_498; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmlsl_high_laneq_u32(__p0_499, __p1_499, __p2_499, __p3_499) __extension__ ({ \ uint64x2_t __ret_499; \ uint64x2_t __s0_499 = __p0_499; \ uint32x4_t __s1_499 = __p1_499; \ uint32x4_t __s2_499 = __p2_499; \ __ret_499 = __s0_499 - vmull_u32(vget_high_u32(__s1_499), splat_laneq_u32(__s2_499, __p3_499)); \ __ret_499; \ }) #else #define vmlsl_high_laneq_u32(__p0_500, __p1_500, __p2_500, __p3_500) __extension__ ({ \ uint64x2_t __ret_500; \ uint64x2_t __s0_500 = __p0_500; \ uint32x4_t __s1_500 = __p1_500; \ uint32x4_t __s2_500 = __p2_500; \ uint64x2_t __rev0_500; __rev0_500 = __builtin_shufflevector(__s0_500, __s0_500, 1, 0); \ uint32x4_t __rev1_500; __rev1_500 = __builtin_shufflevector(__s1_500, __s1_500, 3, 2, 1, 0); \ uint32x4_t __rev2_500; __rev2_500 = __builtin_shufflevector(__s2_500, __s2_500, 3, 2, 1, 0); \ __ret_500 = __rev0_500 - __noswap_vmull_u32(__noswap_vget_high_u32(__rev1_500), __noswap_splat_laneq_u32(__rev2_500, __p3_500)); \ __ret_500 = __builtin_shufflevector(__ret_500, __ret_500, 1, 0); \ __ret_500; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmlsl_high_laneq_u16(__p0_501, __p1_501, __p2_501, __p3_501) __extension__ ({ \ uint32x4_t __ret_501; \ uint32x4_t __s0_501 = __p0_501; \ uint16x8_t __s1_501 = __p1_501; \ uint16x8_t __s2_501 = __p2_501; \ __ret_501 = __s0_501 - vmull_u16(vget_high_u16(__s1_501), splat_laneq_u16(__s2_501, __p3_501)); \ __ret_501; \ }) #else #define vmlsl_high_laneq_u16(__p0_502, __p1_502, __p2_502, __p3_502) __extension__ ({ \ uint32x4_t __ret_502; \ uint32x4_t __s0_502 = __p0_502; \ uint16x8_t __s1_502 = __p1_502; \ uint16x8_t __s2_502 = __p2_502; \ uint32x4_t __rev0_502; __rev0_502 = __builtin_shufflevector(__s0_502, __s0_502, 3, 2, 1, 0); \ uint16x8_t __rev1_502; __rev1_502 = __builtin_shufflevector(__s1_502, __s1_502, 7, 6, 5, 4, 3, 2, 1, 0); \ uint16x8_t __rev2_502; __rev2_502 = __builtin_shufflevector(__s2_502, __s2_502, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_502 = __rev0_502 - __noswap_vmull_u16(__noswap_vget_high_u16(__rev1_502), __noswap_splat_laneq_u16(__rev2_502, __p3_502)); \ __ret_502 = __builtin_shufflevector(__ret_502, __ret_502, 3, 2, 1, 0); \ __ret_502; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmlsl_high_laneq_s32(__p0_503, __p1_503, __p2_503, __p3_503) __extension__ ({ \ int64x2_t __ret_503; \ int64x2_t __s0_503 = __p0_503; \ int32x4_t __s1_503 = __p1_503; \ int32x4_t __s2_503 = __p2_503; \ __ret_503 = __s0_503 - vmull_s32(vget_high_s32(__s1_503), splat_laneq_s32(__s2_503, __p3_503)); \ __ret_503; \ }) #else #define vmlsl_high_laneq_s32(__p0_504, __p1_504, __p2_504, __p3_504) __extension__ ({ \ int64x2_t __ret_504; \ int64x2_t __s0_504 = __p0_504; \ int32x4_t __s1_504 = __p1_504; \ int32x4_t __s2_504 = __p2_504; \ int64x2_t __rev0_504; __rev0_504 = __builtin_shufflevector(__s0_504, __s0_504, 1, 0); \ int32x4_t __rev1_504; __rev1_504 = __builtin_shufflevector(__s1_504, __s1_504, 3, 2, 1, 0); \ int32x4_t __rev2_504; __rev2_504 = __builtin_shufflevector(__s2_504, __s2_504, 3, 2, 1, 0); \ __ret_504 = __rev0_504 - __noswap_vmull_s32(__noswap_vget_high_s32(__rev1_504), __noswap_splat_laneq_s32(__rev2_504, __p3_504)); \ __ret_504 = __builtin_shufflevector(__ret_504, __ret_504, 1, 0); \ __ret_504; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmlsl_high_laneq_s16(__p0_505, __p1_505, __p2_505, __p3_505) __extension__ ({ \ int32x4_t __ret_505; \ int32x4_t __s0_505 = __p0_505; \ int16x8_t __s1_505 = __p1_505; \ int16x8_t __s2_505 = __p2_505; \ __ret_505 = __s0_505 - vmull_s16(vget_high_s16(__s1_505), splat_laneq_s16(__s2_505, __p3_505)); \ __ret_505; \ }) #else #define vmlsl_high_laneq_s16(__p0_506, __p1_506, __p2_506, __p3_506) __extension__ ({ \ int32x4_t __ret_506; \ int32x4_t __s0_506 = __p0_506; \ int16x8_t __s1_506 = __p1_506; \ int16x8_t __s2_506 = __p2_506; \ int32x4_t __rev0_506; __rev0_506 = __builtin_shufflevector(__s0_506, __s0_506, 3, 2, 1, 0); \ int16x8_t __rev1_506; __rev1_506 = __builtin_shufflevector(__s1_506, __s1_506, 7, 6, 5, 4, 3, 2, 1, 0); \ int16x8_t __rev2_506; __rev2_506 = __builtin_shufflevector(__s2_506, __s2_506, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_506 = __rev0_506 - __noswap_vmull_s16(__noswap_vget_high_s16(__rev1_506), __noswap_splat_laneq_s16(__rev2_506, __p3_506)); \ __ret_506 = __builtin_shufflevector(__ret_506, __ret_506, 3, 2, 1, 0); \ __ret_506; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmlsl_laneq_u32(__p0_507, __p1_507, __p2_507, __p3_507) __extension__ ({ \ uint64x2_t __ret_507; \ uint64x2_t __s0_507 = __p0_507; \ uint32x2_t __s1_507 = __p1_507; \ uint32x4_t __s2_507 = __p2_507; \ __ret_507 = __s0_507 - vmull_u32(__s1_507, splat_laneq_u32(__s2_507, __p3_507)); \ __ret_507; \ }) #else #define vmlsl_laneq_u32(__p0_508, __p1_508, __p2_508, __p3_508) __extension__ ({ \ uint64x2_t __ret_508; \ uint64x2_t __s0_508 = __p0_508; \ uint32x2_t __s1_508 = __p1_508; \ uint32x4_t __s2_508 = __p2_508; \ uint64x2_t __rev0_508; __rev0_508 = __builtin_shufflevector(__s0_508, __s0_508, 1, 0); \ uint32x2_t __rev1_508; __rev1_508 = __builtin_shufflevector(__s1_508, __s1_508, 1, 0); \ uint32x4_t __rev2_508; __rev2_508 = __builtin_shufflevector(__s2_508, __s2_508, 3, 2, 1, 0); \ __ret_508 = __rev0_508 - __noswap_vmull_u32(__rev1_508, __noswap_splat_laneq_u32(__rev2_508, __p3_508)); \ __ret_508 = __builtin_shufflevector(__ret_508, __ret_508, 1, 0); \ __ret_508; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmlsl_laneq_u16(__p0_509, __p1_509, __p2_509, __p3_509) __extension__ ({ \ uint32x4_t __ret_509; \ uint32x4_t __s0_509 = __p0_509; \ uint16x4_t __s1_509 = __p1_509; \ uint16x8_t __s2_509 = __p2_509; \ __ret_509 = __s0_509 - vmull_u16(__s1_509, splat_laneq_u16(__s2_509, __p3_509)); \ __ret_509; \ }) #else #define vmlsl_laneq_u16(__p0_510, __p1_510, __p2_510, __p3_510) __extension__ ({ \ uint32x4_t __ret_510; \ uint32x4_t __s0_510 = __p0_510; \ uint16x4_t __s1_510 = __p1_510; \ uint16x8_t __s2_510 = __p2_510; \ uint32x4_t __rev0_510; __rev0_510 = __builtin_shufflevector(__s0_510, __s0_510, 3, 2, 1, 0); \ uint16x4_t __rev1_510; __rev1_510 = __builtin_shufflevector(__s1_510, __s1_510, 3, 2, 1, 0); \ uint16x8_t __rev2_510; __rev2_510 = __builtin_shufflevector(__s2_510, __s2_510, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_510 = __rev0_510 - __noswap_vmull_u16(__rev1_510, __noswap_splat_laneq_u16(__rev2_510, __p3_510)); \ __ret_510 = __builtin_shufflevector(__ret_510, __ret_510, 3, 2, 1, 0); \ __ret_510; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmlsl_laneq_s32(__p0_511, __p1_511, __p2_511, __p3_511) __extension__ ({ \ int64x2_t __ret_511; \ int64x2_t __s0_511 = __p0_511; \ int32x2_t __s1_511 = __p1_511; \ int32x4_t __s2_511 = __p2_511; \ __ret_511 = __s0_511 - vmull_s32(__s1_511, splat_laneq_s32(__s2_511, __p3_511)); \ __ret_511; \ }) #else #define vmlsl_laneq_s32(__p0_512, __p1_512, __p2_512, __p3_512) __extension__ ({ \ int64x2_t __ret_512; \ int64x2_t __s0_512 = __p0_512; \ int32x2_t __s1_512 = __p1_512; \ int32x4_t __s2_512 = __p2_512; \ int64x2_t __rev0_512; __rev0_512 = __builtin_shufflevector(__s0_512, __s0_512, 1, 0); \ int32x2_t __rev1_512; __rev1_512 = __builtin_shufflevector(__s1_512, __s1_512, 1, 0); \ int32x4_t __rev2_512; __rev2_512 = __builtin_shufflevector(__s2_512, __s2_512, 3, 2, 1, 0); \ __ret_512 = __rev0_512 - __noswap_vmull_s32(__rev1_512, __noswap_splat_laneq_s32(__rev2_512, __p3_512)); \ __ret_512 = __builtin_shufflevector(__ret_512, __ret_512, 1, 0); \ __ret_512; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmlsl_laneq_s16(__p0_513, __p1_513, __p2_513, __p3_513) __extension__ ({ \ int32x4_t __ret_513; \ int32x4_t __s0_513 = __p0_513; \ int16x4_t __s1_513 = __p1_513; \ int16x8_t __s2_513 = __p2_513; \ __ret_513 = __s0_513 - vmull_s16(__s1_513, splat_laneq_s16(__s2_513, __p3_513)); \ __ret_513; \ }) #else #define vmlsl_laneq_s16(__p0_514, __p1_514, __p2_514, __p3_514) __extension__ ({ \ int32x4_t __ret_514; \ int32x4_t __s0_514 = __p0_514; \ int16x4_t __s1_514 = __p1_514; \ int16x8_t __s2_514 = __p2_514; \ int32x4_t __rev0_514; __rev0_514 = __builtin_shufflevector(__s0_514, __s0_514, 3, 2, 1, 0); \ int16x4_t __rev1_514; __rev1_514 = __builtin_shufflevector(__s1_514, __s1_514, 3, 2, 1, 0); \ int16x8_t __rev2_514; __rev2_514 = __builtin_shufflevector(__s2_514, __s2_514, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_514 = __rev0_514 - __noswap_vmull_s16(__rev1_514, __noswap_splat_laneq_s16(__rev2_514, __p3_514)); \ __ret_514 = __builtin_shufflevector(__ret_514, __ret_514, 3, 2, 1, 0); \ __ret_514; \ }) #endif __ai poly64x1_t vmov_n_p64(poly64_t __p0) { poly64x1_t __ret; __ret = (poly64x1_t) {__p0}; return __ret; } #ifdef __LITTLE_ENDIAN__ __ai poly64x2_t vmovq_n_p64(poly64_t __p0) { poly64x2_t __ret; __ret = (poly64x2_t) {__p0, __p0}; return __ret; } #else __ai poly64x2_t vmovq_n_p64(poly64_t __p0) { poly64x2_t __ret; __ret = (poly64x2_t) {__p0, __p0}; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float64x2_t vmovq_n_f64(float64_t __p0) { float64x2_t __ret; __ret = (float64x2_t) {__p0, __p0}; return __ret; } #else __ai float64x2_t vmovq_n_f64(float64_t __p0) { float64x2_t __ret; __ret = (float64x2_t) {__p0, __p0}; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai float64x1_t vmov_n_f64(float64_t __p0) { float64x1_t __ret; __ret = (float64x1_t) {__p0}; return __ret; } #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vmovl_high_u8(uint8x16_t __p0_515) { uint16x8_t __ret_515; uint8x8_t __a1_515 = vget_high_u8(__p0_515); __ret_515 = (uint16x8_t)(vshll_n_u8(__a1_515, 0)); return __ret_515; } #else __ai uint16x8_t vmovl_high_u8(uint8x16_t __p0_516) { uint16x8_t __ret_516; uint8x16_t __rev0_516; __rev0_516 = __builtin_shufflevector(__p0_516, __p0_516, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __a1_516 = __noswap_vget_high_u8(__rev0_516); __ret_516 = (uint16x8_t)(__noswap_vshll_n_u8(__a1_516, 0)); __ret_516 = __builtin_shufflevector(__ret_516, __ret_516, 7, 6, 5, 4, 3, 2, 1, 0); return __ret_516; } __ai uint16x8_t __noswap_vmovl_high_u8(uint8x16_t __p0_517) { uint16x8_t __ret_517; uint8x8_t __a1_517 = __noswap_vget_high_u8(__p0_517); __ret_517 = (uint16x8_t)(__noswap_vshll_n_u8(__a1_517, 0)); return __ret_517; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vmovl_high_u32(uint32x4_t __p0_518) { uint64x2_t __ret_518; uint32x2_t __a1_518 = vget_high_u32(__p0_518); __ret_518 = (uint64x2_t)(vshll_n_u32(__a1_518, 0)); return __ret_518; } #else __ai uint64x2_t vmovl_high_u32(uint32x4_t __p0_519) { uint64x2_t __ret_519; uint32x4_t __rev0_519; __rev0_519 = __builtin_shufflevector(__p0_519, __p0_519, 3, 2, 1, 0); uint32x2_t __a1_519 = __noswap_vget_high_u32(__rev0_519); __ret_519 = (uint64x2_t)(__noswap_vshll_n_u32(__a1_519, 0)); __ret_519 = __builtin_shufflevector(__ret_519, __ret_519, 1, 0); return __ret_519; } __ai uint64x2_t __noswap_vmovl_high_u32(uint32x4_t __p0_520) { uint64x2_t __ret_520; uint32x2_t __a1_520 = __noswap_vget_high_u32(__p0_520); __ret_520 = (uint64x2_t)(__noswap_vshll_n_u32(__a1_520, 0)); return __ret_520; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vmovl_high_u16(uint16x8_t __p0_521) { uint32x4_t __ret_521; uint16x4_t __a1_521 = vget_high_u16(__p0_521); __ret_521 = (uint32x4_t)(vshll_n_u16(__a1_521, 0)); return __ret_521; } #else __ai uint32x4_t vmovl_high_u16(uint16x8_t __p0_522) { uint32x4_t __ret_522; uint16x8_t __rev0_522; __rev0_522 = __builtin_shufflevector(__p0_522, __p0_522, 7, 6, 5, 4, 3, 2, 1, 0); uint16x4_t __a1_522 = __noswap_vget_high_u16(__rev0_522); __ret_522 = (uint32x4_t)(__noswap_vshll_n_u16(__a1_522, 0)); __ret_522 = __builtin_shufflevector(__ret_522, __ret_522, 3, 2, 1, 0); return __ret_522; } __ai uint32x4_t __noswap_vmovl_high_u16(uint16x8_t __p0_523) { uint32x4_t __ret_523; uint16x4_t __a1_523 = __noswap_vget_high_u16(__p0_523); __ret_523 = (uint32x4_t)(__noswap_vshll_n_u16(__a1_523, 0)); return __ret_523; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vmovl_high_s8(int8x16_t __p0_524) { int16x8_t __ret_524; int8x8_t __a1_524 = vget_high_s8(__p0_524); __ret_524 = (int16x8_t)(vshll_n_s8(__a1_524, 0)); return __ret_524; } #else __ai int16x8_t vmovl_high_s8(int8x16_t __p0_525) { int16x8_t __ret_525; int8x16_t __rev0_525; __rev0_525 = __builtin_shufflevector(__p0_525, __p0_525, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __a1_525 = __noswap_vget_high_s8(__rev0_525); __ret_525 = (int16x8_t)(__noswap_vshll_n_s8(__a1_525, 0)); __ret_525 = __builtin_shufflevector(__ret_525, __ret_525, 7, 6, 5, 4, 3, 2, 1, 0); return __ret_525; } __ai int16x8_t __noswap_vmovl_high_s8(int8x16_t __p0_526) { int16x8_t __ret_526; int8x8_t __a1_526 = __noswap_vget_high_s8(__p0_526); __ret_526 = (int16x8_t)(__noswap_vshll_n_s8(__a1_526, 0)); return __ret_526; } #endif #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vmovl_high_s32(int32x4_t __p0_527) { int64x2_t __ret_527; int32x2_t __a1_527 = vget_high_s32(__p0_527); __ret_527 = (int64x2_t)(vshll_n_s32(__a1_527, 0)); return __ret_527; } #else __ai int64x2_t vmovl_high_s32(int32x4_t __p0_528) { int64x2_t __ret_528; int32x4_t __rev0_528; __rev0_528 = __builtin_shufflevector(__p0_528, __p0_528, 3, 2, 1, 0); int32x2_t __a1_528 = __noswap_vget_high_s32(__rev0_528); __ret_528 = (int64x2_t)(__noswap_vshll_n_s32(__a1_528, 0)); __ret_528 = __builtin_shufflevector(__ret_528, __ret_528, 1, 0); return __ret_528; } __ai int64x2_t __noswap_vmovl_high_s32(int32x4_t __p0_529) { int64x2_t __ret_529; int32x2_t __a1_529 = __noswap_vget_high_s32(__p0_529); __ret_529 = (int64x2_t)(__noswap_vshll_n_s32(__a1_529, 0)); return __ret_529; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vmovl_high_s16(int16x8_t __p0_530) { int32x4_t __ret_530; int16x4_t __a1_530 = vget_high_s16(__p0_530); __ret_530 = (int32x4_t)(vshll_n_s16(__a1_530, 0)); return __ret_530; } #else __ai int32x4_t vmovl_high_s16(int16x8_t __p0_531) { int32x4_t __ret_531; int16x8_t __rev0_531; __rev0_531 = __builtin_shufflevector(__p0_531, __p0_531, 7, 6, 5, 4, 3, 2, 1, 0); int16x4_t __a1_531 = __noswap_vget_high_s16(__rev0_531); __ret_531 = (int32x4_t)(__noswap_vshll_n_s16(__a1_531, 0)); __ret_531 = __builtin_shufflevector(__ret_531, __ret_531, 3, 2, 1, 0); return __ret_531; } __ai int32x4_t __noswap_vmovl_high_s16(int16x8_t __p0_532) { int32x4_t __ret_532; int16x4_t __a1_532 = __noswap_vget_high_s16(__p0_532); __ret_532 = (int32x4_t)(__noswap_vshll_n_s16(__a1_532, 0)); return __ret_532; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vmovn_high_u32(uint16x4_t __p0, uint32x4_t __p1) { uint16x8_t __ret; __ret = vcombine_u16(__p0, vmovn_u32(__p1)); return __ret; } #else __ai uint16x8_t vmovn_high_u32(uint16x4_t __p0, uint32x4_t __p1) { uint16x8_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __noswap_vcombine_u16(__rev0, __noswap_vmovn_u32(__rev1)); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vmovn_high_u64(uint32x2_t __p0, uint64x2_t __p1) { uint32x4_t __ret; __ret = vcombine_u32(__p0, vmovn_u64(__p1)); return __ret; } #else __ai uint32x4_t vmovn_high_u64(uint32x2_t __p0, uint64x2_t __p1) { uint32x4_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __noswap_vcombine_u32(__rev0, __noswap_vmovn_u64(__rev1)); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vmovn_high_u16(uint8x8_t __p0, uint16x8_t __p1) { uint8x16_t __ret; __ret = vcombine_u8(__p0, vmovn_u16(__p1)); return __ret; } #else __ai uint8x16_t vmovn_high_u16(uint8x8_t __p0, uint16x8_t __p1) { uint8x16_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __noswap_vcombine_u8(__rev0, __noswap_vmovn_u16(__rev1)); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vmovn_high_s32(int16x4_t __p0, int32x4_t __p1) { int16x8_t __ret; __ret = vcombine_s16(__p0, vmovn_s32(__p1)); return __ret; } #else __ai int16x8_t vmovn_high_s32(int16x4_t __p0, int32x4_t __p1) { int16x8_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __noswap_vcombine_s16(__rev0, __noswap_vmovn_s32(__rev1)); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vmovn_high_s64(int32x2_t __p0, int64x2_t __p1) { int32x4_t __ret; __ret = vcombine_s32(__p0, vmovn_s64(__p1)); return __ret; } #else __ai int32x4_t vmovn_high_s64(int32x2_t __p0, int64x2_t __p1) { int32x4_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __noswap_vcombine_s32(__rev0, __noswap_vmovn_s64(__rev1)); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x16_t vmovn_high_s16(int8x8_t __p0, int16x8_t __p1) { int8x16_t __ret; __ret = vcombine_s8(__p0, vmovn_s16(__p1)); return __ret; } #else __ai int8x16_t vmovn_high_s16(int8x8_t __p0, int16x8_t __p1) { int8x16_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __noswap_vcombine_s8(__rev0, __noswap_vmovn_s16(__rev1)); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float64x2_t vmulq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; __ret = __p0 * __p1; return __ret; } #else __ai float64x2_t vmulq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __rev0 * __rev1; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai float64x1_t vmul_f64(float64x1_t __p0, float64x1_t __p1) { float64x1_t __ret; __ret = __p0 * __p1; return __ret; } #define vmuld_lane_f64(__p0_533, __p1_533, __p2_533) __extension__ ({ \ float64_t __ret_533; \ float64_t __s0_533 = __p0_533; \ float64x1_t __s1_533 = __p1_533; \ __ret_533 = __s0_533 * vget_lane_f64(__s1_533, __p2_533); \ __ret_533; \ }) #ifdef __LITTLE_ENDIAN__ #define vmuls_lane_f32(__p0_534, __p1_534, __p2_534) __extension__ ({ \ float32_t __ret_534; \ float32_t __s0_534 = __p0_534; \ float32x2_t __s1_534 = __p1_534; \ __ret_534 = __s0_534 * vget_lane_f32(__s1_534, __p2_534); \ __ret_534; \ }) #else #define vmuls_lane_f32(__p0_535, __p1_535, __p2_535) __extension__ ({ \ float32_t __ret_535; \ float32_t __s0_535 = __p0_535; \ float32x2_t __s1_535 = __p1_535; \ float32x2_t __rev1_535; __rev1_535 = __builtin_shufflevector(__s1_535, __s1_535, 1, 0); \ __ret_535 = __s0_535 * __noswap_vget_lane_f32(__rev1_535, __p2_535); \ __ret_535; \ }) #endif #define vmul_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x1_t __ret; \ float64x1_t __s0 = __p0; \ float64x1_t __s1 = __p1; \ __ret = (float64x1_t) __builtin_neon_vmul_lane_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 10); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vmulq_lane_f64(__p0_536, __p1_536, __p2_536) __extension__ ({ \ float64x2_t __ret_536; \ float64x2_t __s0_536 = __p0_536; \ float64x1_t __s1_536 = __p1_536; \ __ret_536 = __s0_536 * splatq_lane_f64(__s1_536, __p2_536); \ __ret_536; \ }) #else #define vmulq_lane_f64(__p0_537, __p1_537, __p2_537) __extension__ ({ \ float64x2_t __ret_537; \ float64x2_t __s0_537 = __p0_537; \ float64x1_t __s1_537 = __p1_537; \ float64x2_t __rev0_537; __rev0_537 = __builtin_shufflevector(__s0_537, __s0_537, 1, 0); \ __ret_537 = __rev0_537 * __noswap_splatq_lane_f64(__s1_537, __p2_537); \ __ret_537 = __builtin_shufflevector(__ret_537, __ret_537, 1, 0); \ __ret_537; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmuld_laneq_f64(__p0_538, __p1_538, __p2_538) __extension__ ({ \ float64_t __ret_538; \ float64_t __s0_538 = __p0_538; \ float64x2_t __s1_538 = __p1_538; \ __ret_538 = __s0_538 * vgetq_lane_f64(__s1_538, __p2_538); \ __ret_538; \ }) #else #define vmuld_laneq_f64(__p0_539, __p1_539, __p2_539) __extension__ ({ \ float64_t __ret_539; \ float64_t __s0_539 = __p0_539; \ float64x2_t __s1_539 = __p1_539; \ float64x2_t __rev1_539; __rev1_539 = __builtin_shufflevector(__s1_539, __s1_539, 1, 0); \ __ret_539 = __s0_539 * __noswap_vgetq_lane_f64(__rev1_539, __p2_539); \ __ret_539; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmuls_laneq_f32(__p0_540, __p1_540, __p2_540) __extension__ ({ \ float32_t __ret_540; \ float32_t __s0_540 = __p0_540; \ float32x4_t __s1_540 = __p1_540; \ __ret_540 = __s0_540 * vgetq_lane_f32(__s1_540, __p2_540); \ __ret_540; \ }) #else #define vmuls_laneq_f32(__p0_541, __p1_541, __p2_541) __extension__ ({ \ float32_t __ret_541; \ float32_t __s0_541 = __p0_541; \ float32x4_t __s1_541 = __p1_541; \ float32x4_t __rev1_541; __rev1_541 = __builtin_shufflevector(__s1_541, __s1_541, 3, 2, 1, 0); \ __ret_541 = __s0_541 * __noswap_vgetq_lane_f32(__rev1_541, __p2_541); \ __ret_541; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmul_laneq_f64(__p0, __p1, __p2) __extension__ ({ \ float64x1_t __ret; \ float64x1_t __s0 = __p0; \ float64x2_t __s1 = __p1; \ __ret = (float64x1_t) __builtin_neon_vmul_laneq_v((int8x8_t)__s0, (int8x16_t)__s1, __p2, 10); \ __ret; \ }) #else #define vmul_laneq_f64(__p0, __p1, __p2) __extension__ ({ \ float64x1_t __ret; \ float64x1_t __s0 = __p0; \ float64x2_t __s1 = __p1; \ float64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ __ret = (float64x1_t) __builtin_neon_vmul_laneq_v((int8x8_t)__s0, (int8x16_t)__rev1, __p2, 10); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmulq_laneq_u32(__p0_542, __p1_542, __p2_542) __extension__ ({ \ uint32x4_t __ret_542; \ uint32x4_t __s0_542 = __p0_542; \ uint32x4_t __s1_542 = __p1_542; \ __ret_542 = __s0_542 * splatq_laneq_u32(__s1_542, __p2_542); \ __ret_542; \ }) #else #define vmulq_laneq_u32(__p0_543, __p1_543, __p2_543) __extension__ ({ \ uint32x4_t __ret_543; \ uint32x4_t __s0_543 = __p0_543; \ uint32x4_t __s1_543 = __p1_543; \ uint32x4_t __rev0_543; __rev0_543 = __builtin_shufflevector(__s0_543, __s0_543, 3, 2, 1, 0); \ uint32x4_t __rev1_543; __rev1_543 = __builtin_shufflevector(__s1_543, __s1_543, 3, 2, 1, 0); \ __ret_543 = __rev0_543 * __noswap_splatq_laneq_u32(__rev1_543, __p2_543); \ __ret_543 = __builtin_shufflevector(__ret_543, __ret_543, 3, 2, 1, 0); \ __ret_543; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmulq_laneq_u16(__p0_544, __p1_544, __p2_544) __extension__ ({ \ uint16x8_t __ret_544; \ uint16x8_t __s0_544 = __p0_544; \ uint16x8_t __s1_544 = __p1_544; \ __ret_544 = __s0_544 * splatq_laneq_u16(__s1_544, __p2_544); \ __ret_544; \ }) #else #define vmulq_laneq_u16(__p0_545, __p1_545, __p2_545) __extension__ ({ \ uint16x8_t __ret_545; \ uint16x8_t __s0_545 = __p0_545; \ uint16x8_t __s1_545 = __p1_545; \ uint16x8_t __rev0_545; __rev0_545 = __builtin_shufflevector(__s0_545, __s0_545, 7, 6, 5, 4, 3, 2, 1, 0); \ uint16x8_t __rev1_545; __rev1_545 = __builtin_shufflevector(__s1_545, __s1_545, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_545 = __rev0_545 * __noswap_splatq_laneq_u16(__rev1_545, __p2_545); \ __ret_545 = __builtin_shufflevector(__ret_545, __ret_545, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_545; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmulq_laneq_f64(__p0_546, __p1_546, __p2_546) __extension__ ({ \ float64x2_t __ret_546; \ float64x2_t __s0_546 = __p0_546; \ float64x2_t __s1_546 = __p1_546; \ __ret_546 = __s0_546 * splatq_laneq_f64(__s1_546, __p2_546); \ __ret_546; \ }) #else #define vmulq_laneq_f64(__p0_547, __p1_547, __p2_547) __extension__ ({ \ float64x2_t __ret_547; \ float64x2_t __s0_547 = __p0_547; \ float64x2_t __s1_547 = __p1_547; \ float64x2_t __rev0_547; __rev0_547 = __builtin_shufflevector(__s0_547, __s0_547, 1, 0); \ float64x2_t __rev1_547; __rev1_547 = __builtin_shufflevector(__s1_547, __s1_547, 1, 0); \ __ret_547 = __rev0_547 * __noswap_splatq_laneq_f64(__rev1_547, __p2_547); \ __ret_547 = __builtin_shufflevector(__ret_547, __ret_547, 1, 0); \ __ret_547; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmulq_laneq_f32(__p0_548, __p1_548, __p2_548) __extension__ ({ \ float32x4_t __ret_548; \ float32x4_t __s0_548 = __p0_548; \ float32x4_t __s1_548 = __p1_548; \ __ret_548 = __s0_548 * splatq_laneq_f32(__s1_548, __p2_548); \ __ret_548; \ }) #else #define vmulq_laneq_f32(__p0_549, __p1_549, __p2_549) __extension__ ({ \ float32x4_t __ret_549; \ float32x4_t __s0_549 = __p0_549; \ float32x4_t __s1_549 = __p1_549; \ float32x4_t __rev0_549; __rev0_549 = __builtin_shufflevector(__s0_549, __s0_549, 3, 2, 1, 0); \ float32x4_t __rev1_549; __rev1_549 = __builtin_shufflevector(__s1_549, __s1_549, 3, 2, 1, 0); \ __ret_549 = __rev0_549 * __noswap_splatq_laneq_f32(__rev1_549, __p2_549); \ __ret_549 = __builtin_shufflevector(__ret_549, __ret_549, 3, 2, 1, 0); \ __ret_549; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmulq_laneq_s32(__p0_550, __p1_550, __p2_550) __extension__ ({ \ int32x4_t __ret_550; \ int32x4_t __s0_550 = __p0_550; \ int32x4_t __s1_550 = __p1_550; \ __ret_550 = __s0_550 * splatq_laneq_s32(__s1_550, __p2_550); \ __ret_550; \ }) #else #define vmulq_laneq_s32(__p0_551, __p1_551, __p2_551) __extension__ ({ \ int32x4_t __ret_551; \ int32x4_t __s0_551 = __p0_551; \ int32x4_t __s1_551 = __p1_551; \ int32x4_t __rev0_551; __rev0_551 = __builtin_shufflevector(__s0_551, __s0_551, 3, 2, 1, 0); \ int32x4_t __rev1_551; __rev1_551 = __builtin_shufflevector(__s1_551, __s1_551, 3, 2, 1, 0); \ __ret_551 = __rev0_551 * __noswap_splatq_laneq_s32(__rev1_551, __p2_551); \ __ret_551 = __builtin_shufflevector(__ret_551, __ret_551, 3, 2, 1, 0); \ __ret_551; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmulq_laneq_s16(__p0_552, __p1_552, __p2_552) __extension__ ({ \ int16x8_t __ret_552; \ int16x8_t __s0_552 = __p0_552; \ int16x8_t __s1_552 = __p1_552; \ __ret_552 = __s0_552 * splatq_laneq_s16(__s1_552, __p2_552); \ __ret_552; \ }) #else #define vmulq_laneq_s16(__p0_553, __p1_553, __p2_553) __extension__ ({ \ int16x8_t __ret_553; \ int16x8_t __s0_553 = __p0_553; \ int16x8_t __s1_553 = __p1_553; \ int16x8_t __rev0_553; __rev0_553 = __builtin_shufflevector(__s0_553, __s0_553, 7, 6, 5, 4, 3, 2, 1, 0); \ int16x8_t __rev1_553; __rev1_553 = __builtin_shufflevector(__s1_553, __s1_553, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_553 = __rev0_553 * __noswap_splatq_laneq_s16(__rev1_553, __p2_553); \ __ret_553 = __builtin_shufflevector(__ret_553, __ret_553, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_553; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmul_laneq_u32(__p0_554, __p1_554, __p2_554) __extension__ ({ \ uint32x2_t __ret_554; \ uint32x2_t __s0_554 = __p0_554; \ uint32x4_t __s1_554 = __p1_554; \ __ret_554 = __s0_554 * splat_laneq_u32(__s1_554, __p2_554); \ __ret_554; \ }) #else #define vmul_laneq_u32(__p0_555, __p1_555, __p2_555) __extension__ ({ \ uint32x2_t __ret_555; \ uint32x2_t __s0_555 = __p0_555; \ uint32x4_t __s1_555 = __p1_555; \ uint32x2_t __rev0_555; __rev0_555 = __builtin_shufflevector(__s0_555, __s0_555, 1, 0); \ uint32x4_t __rev1_555; __rev1_555 = __builtin_shufflevector(__s1_555, __s1_555, 3, 2, 1, 0); \ __ret_555 = __rev0_555 * __noswap_splat_laneq_u32(__rev1_555, __p2_555); \ __ret_555 = __builtin_shufflevector(__ret_555, __ret_555, 1, 0); \ __ret_555; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmul_laneq_u16(__p0_556, __p1_556, __p2_556) __extension__ ({ \ uint16x4_t __ret_556; \ uint16x4_t __s0_556 = __p0_556; \ uint16x8_t __s1_556 = __p1_556; \ __ret_556 = __s0_556 * splat_laneq_u16(__s1_556, __p2_556); \ __ret_556; \ }) #else #define vmul_laneq_u16(__p0_557, __p1_557, __p2_557) __extension__ ({ \ uint16x4_t __ret_557; \ uint16x4_t __s0_557 = __p0_557; \ uint16x8_t __s1_557 = __p1_557; \ uint16x4_t __rev0_557; __rev0_557 = __builtin_shufflevector(__s0_557, __s0_557, 3, 2, 1, 0); \ uint16x8_t __rev1_557; __rev1_557 = __builtin_shufflevector(__s1_557, __s1_557, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_557 = __rev0_557 * __noswap_splat_laneq_u16(__rev1_557, __p2_557); \ __ret_557 = __builtin_shufflevector(__ret_557, __ret_557, 3, 2, 1, 0); \ __ret_557; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmul_laneq_f32(__p0_558, __p1_558, __p2_558) __extension__ ({ \ float32x2_t __ret_558; \ float32x2_t __s0_558 = __p0_558; \ float32x4_t __s1_558 = __p1_558; \ __ret_558 = __s0_558 * splat_laneq_f32(__s1_558, __p2_558); \ __ret_558; \ }) #else #define vmul_laneq_f32(__p0_559, __p1_559, __p2_559) __extension__ ({ \ float32x2_t __ret_559; \ float32x2_t __s0_559 = __p0_559; \ float32x4_t __s1_559 = __p1_559; \ float32x2_t __rev0_559; __rev0_559 = __builtin_shufflevector(__s0_559, __s0_559, 1, 0); \ float32x4_t __rev1_559; __rev1_559 = __builtin_shufflevector(__s1_559, __s1_559, 3, 2, 1, 0); \ __ret_559 = __rev0_559 * __noswap_splat_laneq_f32(__rev1_559, __p2_559); \ __ret_559 = __builtin_shufflevector(__ret_559, __ret_559, 1, 0); \ __ret_559; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmul_laneq_s32(__p0_560, __p1_560, __p2_560) __extension__ ({ \ int32x2_t __ret_560; \ int32x2_t __s0_560 = __p0_560; \ int32x4_t __s1_560 = __p1_560; \ __ret_560 = __s0_560 * splat_laneq_s32(__s1_560, __p2_560); \ __ret_560; \ }) #else #define vmul_laneq_s32(__p0_561, __p1_561, __p2_561) __extension__ ({ \ int32x2_t __ret_561; \ int32x2_t __s0_561 = __p0_561; \ int32x4_t __s1_561 = __p1_561; \ int32x2_t __rev0_561; __rev0_561 = __builtin_shufflevector(__s0_561, __s0_561, 1, 0); \ int32x4_t __rev1_561; __rev1_561 = __builtin_shufflevector(__s1_561, __s1_561, 3, 2, 1, 0); \ __ret_561 = __rev0_561 * __noswap_splat_laneq_s32(__rev1_561, __p2_561); \ __ret_561 = __builtin_shufflevector(__ret_561, __ret_561, 1, 0); \ __ret_561; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmul_laneq_s16(__p0_562, __p1_562, __p2_562) __extension__ ({ \ int16x4_t __ret_562; \ int16x4_t __s0_562 = __p0_562; \ int16x8_t __s1_562 = __p1_562; \ __ret_562 = __s0_562 * splat_laneq_s16(__s1_562, __p2_562); \ __ret_562; \ }) #else #define vmul_laneq_s16(__p0_563, __p1_563, __p2_563) __extension__ ({ \ int16x4_t __ret_563; \ int16x4_t __s0_563 = __p0_563; \ int16x8_t __s1_563 = __p1_563; \ int16x4_t __rev0_563; __rev0_563 = __builtin_shufflevector(__s0_563, __s0_563, 3, 2, 1, 0); \ int16x8_t __rev1_563; __rev1_563 = __builtin_shufflevector(__s1_563, __s1_563, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_563 = __rev0_563 * __noswap_splat_laneq_s16(__rev1_563, __p2_563); \ __ret_563 = __builtin_shufflevector(__ret_563, __ret_563, 3, 2, 1, 0); \ __ret_563; \ }) #endif __ai float64x1_t vmul_n_f64(float64x1_t __p0, float64_t __p1) { float64x1_t __ret; __ret = (float64x1_t) __builtin_neon_vmul_n_f64((float64x1_t)__p0, __p1); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai float64x2_t vmulq_n_f64(float64x2_t __p0, float64_t __p1) { float64x2_t __ret; __ret = __p0 * (float64x2_t) {__p1, __p1}; return __ret; } #else __ai float64x2_t vmulq_n_f64(float64x2_t __p0, float64_t __p1) { float64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = __rev0 * (float64x2_t) {__p1, __p1}; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly16x8_t vmull_high_p8(poly8x16_t __p0, poly8x16_t __p1) { poly16x8_t __ret; __ret = vmull_p8(vget_high_p8(__p0), vget_high_p8(__p1)); return __ret; } #else __ai poly16x8_t vmull_high_p8(poly8x16_t __p0, poly8x16_t __p1) { poly16x8_t __ret; poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __noswap_vmull_p8(__noswap_vget_high_p8(__rev0), __noswap_vget_high_p8(__rev1)); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vmull_high_u8(uint8x16_t __p0, uint8x16_t __p1) { uint16x8_t __ret; __ret = vmull_u8(vget_high_u8(__p0), vget_high_u8(__p1)); return __ret; } #else __ai uint16x8_t vmull_high_u8(uint8x16_t __p0, uint8x16_t __p1) { uint16x8_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __noswap_vmull_u8(__noswap_vget_high_u8(__rev0), __noswap_vget_high_u8(__rev1)); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vmull_high_u32(uint32x4_t __p0, uint32x4_t __p1) { uint64x2_t __ret; __ret = vmull_u32(vget_high_u32(__p0), vget_high_u32(__p1)); return __ret; } #else __ai uint64x2_t vmull_high_u32(uint32x4_t __p0, uint32x4_t __p1) { uint64x2_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __noswap_vmull_u32(__noswap_vget_high_u32(__rev0), __noswap_vget_high_u32(__rev1)); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vmull_high_u16(uint16x8_t __p0, uint16x8_t __p1) { uint32x4_t __ret; __ret = vmull_u16(vget_high_u16(__p0), vget_high_u16(__p1)); return __ret; } #else __ai uint32x4_t vmull_high_u16(uint16x8_t __p0, uint16x8_t __p1) { uint32x4_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __noswap_vmull_u16(__noswap_vget_high_u16(__rev0), __noswap_vget_high_u16(__rev1)); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vmull_high_s8(int8x16_t __p0, int8x16_t __p1) { int16x8_t __ret; __ret = vmull_s8(vget_high_s8(__p0), vget_high_s8(__p1)); return __ret; } #else __ai int16x8_t vmull_high_s8(int8x16_t __p0, int8x16_t __p1) { int16x8_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __noswap_vmull_s8(__noswap_vget_high_s8(__rev0), __noswap_vget_high_s8(__rev1)); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vmull_high_s32(int32x4_t __p0, int32x4_t __p1) { int64x2_t __ret; __ret = vmull_s32(vget_high_s32(__p0), vget_high_s32(__p1)); return __ret; } #else __ai int64x2_t vmull_high_s32(int32x4_t __p0, int32x4_t __p1) { int64x2_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __noswap_vmull_s32(__noswap_vget_high_s32(__rev0), __noswap_vget_high_s32(__rev1)); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vmull_high_s16(int16x8_t __p0, int16x8_t __p1) { int32x4_t __ret; __ret = vmull_s16(vget_high_s16(__p0), vget_high_s16(__p1)); return __ret; } #else __ai int32x4_t vmull_high_s16(int16x8_t __p0, int16x8_t __p1) { int32x4_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __noswap_vmull_s16(__noswap_vget_high_s16(__rev0), __noswap_vget_high_s16(__rev1)); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ #define vmull_high_lane_u32(__p0_564, __p1_564, __p2_564) __extension__ ({ \ uint64x2_t __ret_564; \ uint32x4_t __s0_564 = __p0_564; \ uint32x2_t __s1_564 = __p1_564; \ __ret_564 = vmull_u32(vget_high_u32(__s0_564), splat_lane_u32(__s1_564, __p2_564)); \ __ret_564; \ }) #else #define vmull_high_lane_u32(__p0_565, __p1_565, __p2_565) __extension__ ({ \ uint64x2_t __ret_565; \ uint32x4_t __s0_565 = __p0_565; \ uint32x2_t __s1_565 = __p1_565; \ uint32x4_t __rev0_565; __rev0_565 = __builtin_shufflevector(__s0_565, __s0_565, 3, 2, 1, 0); \ uint32x2_t __rev1_565; __rev1_565 = __builtin_shufflevector(__s1_565, __s1_565, 1, 0); \ __ret_565 = __noswap_vmull_u32(__noswap_vget_high_u32(__rev0_565), __noswap_splat_lane_u32(__rev1_565, __p2_565)); \ __ret_565 = __builtin_shufflevector(__ret_565, __ret_565, 1, 0); \ __ret_565; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmull_high_lane_u16(__p0_566, __p1_566, __p2_566) __extension__ ({ \ uint32x4_t __ret_566; \ uint16x8_t __s0_566 = __p0_566; \ uint16x4_t __s1_566 = __p1_566; \ __ret_566 = vmull_u16(vget_high_u16(__s0_566), splat_lane_u16(__s1_566, __p2_566)); \ __ret_566; \ }) #else #define vmull_high_lane_u16(__p0_567, __p1_567, __p2_567) __extension__ ({ \ uint32x4_t __ret_567; \ uint16x8_t __s0_567 = __p0_567; \ uint16x4_t __s1_567 = __p1_567; \ uint16x8_t __rev0_567; __rev0_567 = __builtin_shufflevector(__s0_567, __s0_567, 7, 6, 5, 4, 3, 2, 1, 0); \ uint16x4_t __rev1_567; __rev1_567 = __builtin_shufflevector(__s1_567, __s1_567, 3, 2, 1, 0); \ __ret_567 = __noswap_vmull_u16(__noswap_vget_high_u16(__rev0_567), __noswap_splat_lane_u16(__rev1_567, __p2_567)); \ __ret_567 = __builtin_shufflevector(__ret_567, __ret_567, 3, 2, 1, 0); \ __ret_567; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmull_high_lane_s32(__p0_568, __p1_568, __p2_568) __extension__ ({ \ int64x2_t __ret_568; \ int32x4_t __s0_568 = __p0_568; \ int32x2_t __s1_568 = __p1_568; \ __ret_568 = vmull_s32(vget_high_s32(__s0_568), splat_lane_s32(__s1_568, __p2_568)); \ __ret_568; \ }) #else #define vmull_high_lane_s32(__p0_569, __p1_569, __p2_569) __extension__ ({ \ int64x2_t __ret_569; \ int32x4_t __s0_569 = __p0_569; \ int32x2_t __s1_569 = __p1_569; \ int32x4_t __rev0_569; __rev0_569 = __builtin_shufflevector(__s0_569, __s0_569, 3, 2, 1, 0); \ int32x2_t __rev1_569; __rev1_569 = __builtin_shufflevector(__s1_569, __s1_569, 1, 0); \ __ret_569 = __noswap_vmull_s32(__noswap_vget_high_s32(__rev0_569), __noswap_splat_lane_s32(__rev1_569, __p2_569)); \ __ret_569 = __builtin_shufflevector(__ret_569, __ret_569, 1, 0); \ __ret_569; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmull_high_lane_s16(__p0_570, __p1_570, __p2_570) __extension__ ({ \ int32x4_t __ret_570; \ int16x8_t __s0_570 = __p0_570; \ int16x4_t __s1_570 = __p1_570; \ __ret_570 = vmull_s16(vget_high_s16(__s0_570), splat_lane_s16(__s1_570, __p2_570)); \ __ret_570; \ }) #else #define vmull_high_lane_s16(__p0_571, __p1_571, __p2_571) __extension__ ({ \ int32x4_t __ret_571; \ int16x8_t __s0_571 = __p0_571; \ int16x4_t __s1_571 = __p1_571; \ int16x8_t __rev0_571; __rev0_571 = __builtin_shufflevector(__s0_571, __s0_571, 7, 6, 5, 4, 3, 2, 1, 0); \ int16x4_t __rev1_571; __rev1_571 = __builtin_shufflevector(__s1_571, __s1_571, 3, 2, 1, 0); \ __ret_571 = __noswap_vmull_s16(__noswap_vget_high_s16(__rev0_571), __noswap_splat_lane_s16(__rev1_571, __p2_571)); \ __ret_571 = __builtin_shufflevector(__ret_571, __ret_571, 3, 2, 1, 0); \ __ret_571; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmull_high_laneq_u32(__p0_572, __p1_572, __p2_572) __extension__ ({ \ uint64x2_t __ret_572; \ uint32x4_t __s0_572 = __p0_572; \ uint32x4_t __s1_572 = __p1_572; \ __ret_572 = vmull_u32(vget_high_u32(__s0_572), splat_laneq_u32(__s1_572, __p2_572)); \ __ret_572; \ }) #else #define vmull_high_laneq_u32(__p0_573, __p1_573, __p2_573) __extension__ ({ \ uint64x2_t __ret_573; \ uint32x4_t __s0_573 = __p0_573; \ uint32x4_t __s1_573 = __p1_573; \ uint32x4_t __rev0_573; __rev0_573 = __builtin_shufflevector(__s0_573, __s0_573, 3, 2, 1, 0); \ uint32x4_t __rev1_573; __rev1_573 = __builtin_shufflevector(__s1_573, __s1_573, 3, 2, 1, 0); \ __ret_573 = __noswap_vmull_u32(__noswap_vget_high_u32(__rev0_573), __noswap_splat_laneq_u32(__rev1_573, __p2_573)); \ __ret_573 = __builtin_shufflevector(__ret_573, __ret_573, 1, 0); \ __ret_573; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmull_high_laneq_u16(__p0_574, __p1_574, __p2_574) __extension__ ({ \ uint32x4_t __ret_574; \ uint16x8_t __s0_574 = __p0_574; \ uint16x8_t __s1_574 = __p1_574; \ __ret_574 = vmull_u16(vget_high_u16(__s0_574), splat_laneq_u16(__s1_574, __p2_574)); \ __ret_574; \ }) #else #define vmull_high_laneq_u16(__p0_575, __p1_575, __p2_575) __extension__ ({ \ uint32x4_t __ret_575; \ uint16x8_t __s0_575 = __p0_575; \ uint16x8_t __s1_575 = __p1_575; \ uint16x8_t __rev0_575; __rev0_575 = __builtin_shufflevector(__s0_575, __s0_575, 7, 6, 5, 4, 3, 2, 1, 0); \ uint16x8_t __rev1_575; __rev1_575 = __builtin_shufflevector(__s1_575, __s1_575, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_575 = __noswap_vmull_u16(__noswap_vget_high_u16(__rev0_575), __noswap_splat_laneq_u16(__rev1_575, __p2_575)); \ __ret_575 = __builtin_shufflevector(__ret_575, __ret_575, 3, 2, 1, 0); \ __ret_575; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmull_high_laneq_s32(__p0_576, __p1_576, __p2_576) __extension__ ({ \ int64x2_t __ret_576; \ int32x4_t __s0_576 = __p0_576; \ int32x4_t __s1_576 = __p1_576; \ __ret_576 = vmull_s32(vget_high_s32(__s0_576), splat_laneq_s32(__s1_576, __p2_576)); \ __ret_576; \ }) #else #define vmull_high_laneq_s32(__p0_577, __p1_577, __p2_577) __extension__ ({ \ int64x2_t __ret_577; \ int32x4_t __s0_577 = __p0_577; \ int32x4_t __s1_577 = __p1_577; \ int32x4_t __rev0_577; __rev0_577 = __builtin_shufflevector(__s0_577, __s0_577, 3, 2, 1, 0); \ int32x4_t __rev1_577; __rev1_577 = __builtin_shufflevector(__s1_577, __s1_577, 3, 2, 1, 0); \ __ret_577 = __noswap_vmull_s32(__noswap_vget_high_s32(__rev0_577), __noswap_splat_laneq_s32(__rev1_577, __p2_577)); \ __ret_577 = __builtin_shufflevector(__ret_577, __ret_577, 1, 0); \ __ret_577; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmull_high_laneq_s16(__p0_578, __p1_578, __p2_578) __extension__ ({ \ int32x4_t __ret_578; \ int16x8_t __s0_578 = __p0_578; \ int16x8_t __s1_578 = __p1_578; \ __ret_578 = vmull_s16(vget_high_s16(__s0_578), splat_laneq_s16(__s1_578, __p2_578)); \ __ret_578; \ }) #else #define vmull_high_laneq_s16(__p0_579, __p1_579, __p2_579) __extension__ ({ \ int32x4_t __ret_579; \ int16x8_t __s0_579 = __p0_579; \ int16x8_t __s1_579 = __p1_579; \ int16x8_t __rev0_579; __rev0_579 = __builtin_shufflevector(__s0_579, __s0_579, 7, 6, 5, 4, 3, 2, 1, 0); \ int16x8_t __rev1_579; __rev1_579 = __builtin_shufflevector(__s1_579, __s1_579, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_579 = __noswap_vmull_s16(__noswap_vget_high_s16(__rev0_579), __noswap_splat_laneq_s16(__rev1_579, __p2_579)); \ __ret_579 = __builtin_shufflevector(__ret_579, __ret_579, 3, 2, 1, 0); \ __ret_579; \ }) #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vmull_high_n_u32(uint32x4_t __p0, uint32_t __p1) { uint64x2_t __ret; __ret = vmull_n_u32(vget_high_u32(__p0), __p1); return __ret; } #else __ai uint64x2_t vmull_high_n_u32(uint32x4_t __p0, uint32_t __p1) { uint64x2_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = __noswap_vmull_n_u32(__noswap_vget_high_u32(__rev0), __p1); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vmull_high_n_u16(uint16x8_t __p0, uint16_t __p1) { uint32x4_t __ret; __ret = vmull_n_u16(vget_high_u16(__p0), __p1); return __ret; } #else __ai uint32x4_t vmull_high_n_u16(uint16x8_t __p0, uint16_t __p1) { uint32x4_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __noswap_vmull_n_u16(__noswap_vget_high_u16(__rev0), __p1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vmull_high_n_s32(int32x4_t __p0, int32_t __p1) { int64x2_t __ret; __ret = vmull_n_s32(vget_high_s32(__p0), __p1); return __ret; } #else __ai int64x2_t vmull_high_n_s32(int32x4_t __p0, int32_t __p1) { int64x2_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = __noswap_vmull_n_s32(__noswap_vget_high_s32(__rev0), __p1); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vmull_high_n_s16(int16x8_t __p0, int16_t __p1) { int32x4_t __ret; __ret = vmull_n_s16(vget_high_s16(__p0), __p1); return __ret; } #else __ai int32x4_t vmull_high_n_s16(int16x8_t __p0, int16_t __p1) { int32x4_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __noswap_vmull_n_s16(__noswap_vget_high_s16(__rev0), __p1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ #define vmull_laneq_u32(__p0_580, __p1_580, __p2_580) __extension__ ({ \ uint64x2_t __ret_580; \ uint32x2_t __s0_580 = __p0_580; \ uint32x4_t __s1_580 = __p1_580; \ __ret_580 = vmull_u32(__s0_580, splat_laneq_u32(__s1_580, __p2_580)); \ __ret_580; \ }) #else #define vmull_laneq_u32(__p0_581, __p1_581, __p2_581) __extension__ ({ \ uint64x2_t __ret_581; \ uint32x2_t __s0_581 = __p0_581; \ uint32x4_t __s1_581 = __p1_581; \ uint32x2_t __rev0_581; __rev0_581 = __builtin_shufflevector(__s0_581, __s0_581, 1, 0); \ uint32x4_t __rev1_581; __rev1_581 = __builtin_shufflevector(__s1_581, __s1_581, 3, 2, 1, 0); \ __ret_581 = __noswap_vmull_u32(__rev0_581, __noswap_splat_laneq_u32(__rev1_581, __p2_581)); \ __ret_581 = __builtin_shufflevector(__ret_581, __ret_581, 1, 0); \ __ret_581; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmull_laneq_u16(__p0_582, __p1_582, __p2_582) __extension__ ({ \ uint32x4_t __ret_582; \ uint16x4_t __s0_582 = __p0_582; \ uint16x8_t __s1_582 = __p1_582; \ __ret_582 = vmull_u16(__s0_582, splat_laneq_u16(__s1_582, __p2_582)); \ __ret_582; \ }) #else #define vmull_laneq_u16(__p0_583, __p1_583, __p2_583) __extension__ ({ \ uint32x4_t __ret_583; \ uint16x4_t __s0_583 = __p0_583; \ uint16x8_t __s1_583 = __p1_583; \ uint16x4_t __rev0_583; __rev0_583 = __builtin_shufflevector(__s0_583, __s0_583, 3, 2, 1, 0); \ uint16x8_t __rev1_583; __rev1_583 = __builtin_shufflevector(__s1_583, __s1_583, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_583 = __noswap_vmull_u16(__rev0_583, __noswap_splat_laneq_u16(__rev1_583, __p2_583)); \ __ret_583 = __builtin_shufflevector(__ret_583, __ret_583, 3, 2, 1, 0); \ __ret_583; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmull_laneq_s32(__p0_584, __p1_584, __p2_584) __extension__ ({ \ int64x2_t __ret_584; \ int32x2_t __s0_584 = __p0_584; \ int32x4_t __s1_584 = __p1_584; \ __ret_584 = vmull_s32(__s0_584, splat_laneq_s32(__s1_584, __p2_584)); \ __ret_584; \ }) #else #define vmull_laneq_s32(__p0_585, __p1_585, __p2_585) __extension__ ({ \ int64x2_t __ret_585; \ int32x2_t __s0_585 = __p0_585; \ int32x4_t __s1_585 = __p1_585; \ int32x2_t __rev0_585; __rev0_585 = __builtin_shufflevector(__s0_585, __s0_585, 1, 0); \ int32x4_t __rev1_585; __rev1_585 = __builtin_shufflevector(__s1_585, __s1_585, 3, 2, 1, 0); \ __ret_585 = __noswap_vmull_s32(__rev0_585, __noswap_splat_laneq_s32(__rev1_585, __p2_585)); \ __ret_585 = __builtin_shufflevector(__ret_585, __ret_585, 1, 0); \ __ret_585; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmull_laneq_s16(__p0_586, __p1_586, __p2_586) __extension__ ({ \ int32x4_t __ret_586; \ int16x4_t __s0_586 = __p0_586; \ int16x8_t __s1_586 = __p1_586; \ __ret_586 = vmull_s16(__s0_586, splat_laneq_s16(__s1_586, __p2_586)); \ __ret_586; \ }) #else #define vmull_laneq_s16(__p0_587, __p1_587, __p2_587) __extension__ ({ \ int32x4_t __ret_587; \ int16x4_t __s0_587 = __p0_587; \ int16x8_t __s1_587 = __p1_587; \ int16x4_t __rev0_587; __rev0_587 = __builtin_shufflevector(__s0_587, __s0_587, 3, 2, 1, 0); \ int16x8_t __rev1_587; __rev1_587 = __builtin_shufflevector(__s1_587, __s1_587, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_587 = __noswap_vmull_s16(__rev0_587, __noswap_splat_laneq_s16(__rev1_587, __p2_587)); \ __ret_587 = __builtin_shufflevector(__ret_587, __ret_587, 3, 2, 1, 0); \ __ret_587; \ }) #endif #ifdef __LITTLE_ENDIAN__ __ai float64x2_t vmulxq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; __ret = (float64x2_t) __builtin_neon_vmulxq_v((int8x16_t)__p0, (int8x16_t)__p1, 42); return __ret; } #else __ai float64x2_t vmulxq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (float64x2_t) __builtin_neon_vmulxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 42); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } __ai float64x2_t __noswap_vmulxq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; __ret = (float64x2_t) __builtin_neon_vmulxq_v((int8x16_t)__p0, (int8x16_t)__p1, 42); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x4_t vmulxq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vmulxq_v((int8x16_t)__p0, (int8x16_t)__p1, 41); return __ret; } #else __ai float32x4_t vmulxq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (float32x4_t) __builtin_neon_vmulxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 41); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai float32x4_t __noswap_vmulxq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vmulxq_v((int8x16_t)__p0, (int8x16_t)__p1, 41); return __ret; } #endif __ai float64x1_t vmulx_f64(float64x1_t __p0, float64x1_t __p1) { float64x1_t __ret; __ret = (float64x1_t) __builtin_neon_vmulx_v((int8x8_t)__p0, (int8x8_t)__p1, 10); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai float32x2_t vmulx_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vmulx_v((int8x8_t)__p0, (int8x8_t)__p1, 9); return __ret; } #else __ai float32x2_t vmulx_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (float32x2_t) __builtin_neon_vmulx_v((int8x8_t)__rev0, (int8x8_t)__rev1, 9); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } __ai float32x2_t __noswap_vmulx_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vmulx_v((int8x8_t)__p0, (int8x8_t)__p1, 9); return __ret; } #endif __ai float64_t vmulxd_f64(float64_t __p0, float64_t __p1) { float64_t __ret; __ret = (float64_t) __builtin_neon_vmulxd_f64(__p0, __p1); return __ret; } __ai float32_t vmulxs_f32(float32_t __p0, float32_t __p1) { float32_t __ret; __ret = (float32_t) __builtin_neon_vmulxs_f32(__p0, __p1); return __ret; } #define vmulxd_lane_f64(__p0_588, __p1_588, __p2_588) __extension__ ({ \ float64_t __ret_588; \ float64_t __s0_588 = __p0_588; \ float64x1_t __s1_588 = __p1_588; \ __ret_588 = vmulxd_f64(__s0_588, vget_lane_f64(__s1_588, __p2_588)); \ __ret_588; \ }) #ifdef __LITTLE_ENDIAN__ #define vmulxs_lane_f32(__p0_589, __p1_589, __p2_589) __extension__ ({ \ float32_t __ret_589; \ float32_t __s0_589 = __p0_589; \ float32x2_t __s1_589 = __p1_589; \ __ret_589 = vmulxs_f32(__s0_589, vget_lane_f32(__s1_589, __p2_589)); \ __ret_589; \ }) #else #define vmulxs_lane_f32(__p0_590, __p1_590, __p2_590) __extension__ ({ \ float32_t __ret_590; \ float32_t __s0_590 = __p0_590; \ float32x2_t __s1_590 = __p1_590; \ float32x2_t __rev1_590; __rev1_590 = __builtin_shufflevector(__s1_590, __s1_590, 1, 0); \ __ret_590 = vmulxs_f32(__s0_590, __noswap_vget_lane_f32(__rev1_590, __p2_590)); \ __ret_590; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmulxq_lane_f64(__p0_591, __p1_591, __p2_591) __extension__ ({ \ float64x2_t __ret_591; \ float64x2_t __s0_591 = __p0_591; \ float64x1_t __s1_591 = __p1_591; \ __ret_591 = vmulxq_f64(__s0_591, splatq_lane_f64(__s1_591, __p2_591)); \ __ret_591; \ }) #else #define vmulxq_lane_f64(__p0_592, __p1_592, __p2_592) __extension__ ({ \ float64x2_t __ret_592; \ float64x2_t __s0_592 = __p0_592; \ float64x1_t __s1_592 = __p1_592; \ float64x2_t __rev0_592; __rev0_592 = __builtin_shufflevector(__s0_592, __s0_592, 1, 0); \ __ret_592 = __noswap_vmulxq_f64(__rev0_592, __noswap_splatq_lane_f64(__s1_592, __p2_592)); \ __ret_592 = __builtin_shufflevector(__ret_592, __ret_592, 1, 0); \ __ret_592; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmulxq_lane_f32(__p0_593, __p1_593, __p2_593) __extension__ ({ \ float32x4_t __ret_593; \ float32x4_t __s0_593 = __p0_593; \ float32x2_t __s1_593 = __p1_593; \ __ret_593 = vmulxq_f32(__s0_593, splatq_lane_f32(__s1_593, __p2_593)); \ __ret_593; \ }) #else #define vmulxq_lane_f32(__p0_594, __p1_594, __p2_594) __extension__ ({ \ float32x4_t __ret_594; \ float32x4_t __s0_594 = __p0_594; \ float32x2_t __s1_594 = __p1_594; \ float32x4_t __rev0_594; __rev0_594 = __builtin_shufflevector(__s0_594, __s0_594, 3, 2, 1, 0); \ float32x2_t __rev1_594; __rev1_594 = __builtin_shufflevector(__s1_594, __s1_594, 1, 0); \ __ret_594 = __noswap_vmulxq_f32(__rev0_594, __noswap_splatq_lane_f32(__rev1_594, __p2_594)); \ __ret_594 = __builtin_shufflevector(__ret_594, __ret_594, 3, 2, 1, 0); \ __ret_594; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmulx_lane_f32(__p0_595, __p1_595, __p2_595) __extension__ ({ \ float32x2_t __ret_595; \ float32x2_t __s0_595 = __p0_595; \ float32x2_t __s1_595 = __p1_595; \ __ret_595 = vmulx_f32(__s0_595, splat_lane_f32(__s1_595, __p2_595)); \ __ret_595; \ }) #else #define vmulx_lane_f32(__p0_596, __p1_596, __p2_596) __extension__ ({ \ float32x2_t __ret_596; \ float32x2_t __s0_596 = __p0_596; \ float32x2_t __s1_596 = __p1_596; \ float32x2_t __rev0_596; __rev0_596 = __builtin_shufflevector(__s0_596, __s0_596, 1, 0); \ float32x2_t __rev1_596; __rev1_596 = __builtin_shufflevector(__s1_596, __s1_596, 1, 0); \ __ret_596 = __noswap_vmulx_f32(__rev0_596, __noswap_splat_lane_f32(__rev1_596, __p2_596)); \ __ret_596 = __builtin_shufflevector(__ret_596, __ret_596, 1, 0); \ __ret_596; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmulxd_laneq_f64(__p0_597, __p1_597, __p2_597) __extension__ ({ \ float64_t __ret_597; \ float64_t __s0_597 = __p0_597; \ float64x2_t __s1_597 = __p1_597; \ __ret_597 = vmulxd_f64(__s0_597, vgetq_lane_f64(__s1_597, __p2_597)); \ __ret_597; \ }) #else #define vmulxd_laneq_f64(__p0_598, __p1_598, __p2_598) __extension__ ({ \ float64_t __ret_598; \ float64_t __s0_598 = __p0_598; \ float64x2_t __s1_598 = __p1_598; \ float64x2_t __rev1_598; __rev1_598 = __builtin_shufflevector(__s1_598, __s1_598, 1, 0); \ __ret_598 = vmulxd_f64(__s0_598, __noswap_vgetq_lane_f64(__rev1_598, __p2_598)); \ __ret_598; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmulxs_laneq_f32(__p0_599, __p1_599, __p2_599) __extension__ ({ \ float32_t __ret_599; \ float32_t __s0_599 = __p0_599; \ float32x4_t __s1_599 = __p1_599; \ __ret_599 = vmulxs_f32(__s0_599, vgetq_lane_f32(__s1_599, __p2_599)); \ __ret_599; \ }) #else #define vmulxs_laneq_f32(__p0_600, __p1_600, __p2_600) __extension__ ({ \ float32_t __ret_600; \ float32_t __s0_600 = __p0_600; \ float32x4_t __s1_600 = __p1_600; \ float32x4_t __rev1_600; __rev1_600 = __builtin_shufflevector(__s1_600, __s1_600, 3, 2, 1, 0); \ __ret_600 = vmulxs_f32(__s0_600, __noswap_vgetq_lane_f32(__rev1_600, __p2_600)); \ __ret_600; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmulxq_laneq_f64(__p0_601, __p1_601, __p2_601) __extension__ ({ \ float64x2_t __ret_601; \ float64x2_t __s0_601 = __p0_601; \ float64x2_t __s1_601 = __p1_601; \ __ret_601 = vmulxq_f64(__s0_601, splatq_laneq_f64(__s1_601, __p2_601)); \ __ret_601; \ }) #else #define vmulxq_laneq_f64(__p0_602, __p1_602, __p2_602) __extension__ ({ \ float64x2_t __ret_602; \ float64x2_t __s0_602 = __p0_602; \ float64x2_t __s1_602 = __p1_602; \ float64x2_t __rev0_602; __rev0_602 = __builtin_shufflevector(__s0_602, __s0_602, 1, 0); \ float64x2_t __rev1_602; __rev1_602 = __builtin_shufflevector(__s1_602, __s1_602, 1, 0); \ __ret_602 = __noswap_vmulxq_f64(__rev0_602, __noswap_splatq_laneq_f64(__rev1_602, __p2_602)); \ __ret_602 = __builtin_shufflevector(__ret_602, __ret_602, 1, 0); \ __ret_602; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmulxq_laneq_f32(__p0_603, __p1_603, __p2_603) __extension__ ({ \ float32x4_t __ret_603; \ float32x4_t __s0_603 = __p0_603; \ float32x4_t __s1_603 = __p1_603; \ __ret_603 = vmulxq_f32(__s0_603, splatq_laneq_f32(__s1_603, __p2_603)); \ __ret_603; \ }) #else #define vmulxq_laneq_f32(__p0_604, __p1_604, __p2_604) __extension__ ({ \ float32x4_t __ret_604; \ float32x4_t __s0_604 = __p0_604; \ float32x4_t __s1_604 = __p1_604; \ float32x4_t __rev0_604; __rev0_604 = __builtin_shufflevector(__s0_604, __s0_604, 3, 2, 1, 0); \ float32x4_t __rev1_604; __rev1_604 = __builtin_shufflevector(__s1_604, __s1_604, 3, 2, 1, 0); \ __ret_604 = __noswap_vmulxq_f32(__rev0_604, __noswap_splatq_laneq_f32(__rev1_604, __p2_604)); \ __ret_604 = __builtin_shufflevector(__ret_604, __ret_604, 3, 2, 1, 0); \ __ret_604; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmulx_laneq_f32(__p0_605, __p1_605, __p2_605) __extension__ ({ \ float32x2_t __ret_605; \ float32x2_t __s0_605 = __p0_605; \ float32x4_t __s1_605 = __p1_605; \ __ret_605 = vmulx_f32(__s0_605, splat_laneq_f32(__s1_605, __p2_605)); \ __ret_605; \ }) #else #define vmulx_laneq_f32(__p0_606, __p1_606, __p2_606) __extension__ ({ \ float32x2_t __ret_606; \ float32x2_t __s0_606 = __p0_606; \ float32x4_t __s1_606 = __p1_606; \ float32x2_t __rev0_606; __rev0_606 = __builtin_shufflevector(__s0_606, __s0_606, 1, 0); \ float32x4_t __rev1_606; __rev1_606 = __builtin_shufflevector(__s1_606, __s1_606, 3, 2, 1, 0); \ __ret_606 = __noswap_vmulx_f32(__rev0_606, __noswap_splat_laneq_f32(__rev1_606, __p2_606)); \ __ret_606 = __builtin_shufflevector(__ret_606, __ret_606, 1, 0); \ __ret_606; \ }) #endif #ifdef __LITTLE_ENDIAN__ __ai float64x2_t vnegq_f64(float64x2_t __p0) { float64x2_t __ret; __ret = -__p0; return __ret; } #else __ai float64x2_t vnegq_f64(float64x2_t __p0) { float64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = -__rev0; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vnegq_s64(int64x2_t __p0) { int64x2_t __ret; __ret = -__p0; return __ret; } #else __ai int64x2_t vnegq_s64(int64x2_t __p0) { int64x2_t __ret; int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = -__rev0; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai float64x1_t vneg_f64(float64x1_t __p0) { float64x1_t __ret; __ret = -__p0; return __ret; } __ai int64x1_t vneg_s64(int64x1_t __p0) { int64x1_t __ret; __ret = -__p0; return __ret; } __ai int64_t vnegd_s64(int64_t __p0) { int64_t __ret; __ret = (int64_t) __builtin_neon_vnegd_s64(__p0); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vpaddq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; __ret = (uint8x16_t) __builtin_neon_vpaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); return __ret; } #else __ai uint8x16_t vpaddq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x16_t) __builtin_neon_vpaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 48); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vpaddq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vpaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); return __ret; } #else __ai uint32x4_t vpaddq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint32x4_t) __builtin_neon_vpaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vpaddq_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; __ret = (uint64x2_t) __builtin_neon_vpaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 51); return __ret; } #else __ai uint64x2_t vpaddq_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint64x2_t) __builtin_neon_vpaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 51); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vpaddq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vpaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); return __ret; } #else __ai uint16x8_t vpaddq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint16x8_t) __builtin_neon_vpaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 49); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x16_t vpaddq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vpaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 32); return __ret; } #else __ai int8x16_t vpaddq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x16_t) __builtin_neon_vpaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 32); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float64x2_t vpaddq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; __ret = (float64x2_t) __builtin_neon_vpaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 42); return __ret; } #else __ai float64x2_t vpaddq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (float64x2_t) __builtin_neon_vpaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 42); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x4_t vpaddq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vpaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 41); return __ret; } #else __ai float32x4_t vpaddq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (float32x4_t) __builtin_neon_vpaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 41); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vpaddq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vpaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); return __ret; } #else __ai int32x4_t vpaddq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (int32x4_t) __builtin_neon_vpaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 34); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vpaddq_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; __ret = (int64x2_t) __builtin_neon_vpaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 35); return __ret; } #else __ai int64x2_t vpaddq_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (int64x2_t) __builtin_neon_vpaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 35); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vpaddq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vpaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); return __ret; } #else __ai int16x8_t vpaddq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int16x8_t) __builtin_neon_vpaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 33); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64_t vpaddd_u64(uint64x2_t __p0) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vpaddd_u64(__p0); return __ret; } #else __ai uint64_t vpaddd_u64(uint64x2_t __p0) { uint64_t __ret; uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (uint64_t) __builtin_neon_vpaddd_u64(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float64_t vpaddd_f64(float64x2_t __p0) { float64_t __ret; __ret = (float64_t) __builtin_neon_vpaddd_f64(__p0); return __ret; } #else __ai float64_t vpaddd_f64(float64x2_t __p0) { float64_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (float64_t) __builtin_neon_vpaddd_f64(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int64_t vpaddd_s64(int64x2_t __p0) { int64_t __ret; __ret = (int64_t) __builtin_neon_vpaddd_s64(__p0); return __ret; } #else __ai int64_t vpaddd_s64(int64x2_t __p0) { int64_t __ret; int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (int64_t) __builtin_neon_vpaddd_s64(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32_t vpadds_f32(float32x2_t __p0) { float32_t __ret; __ret = (float32_t) __builtin_neon_vpadds_f32(__p0); return __ret; } #else __ai float32_t vpadds_f32(float32x2_t __p0) { float32_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (float32_t) __builtin_neon_vpadds_f32(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vpmaxq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; __ret = (uint8x16_t) __builtin_neon_vpmaxq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); return __ret; } #else __ai uint8x16_t vpmaxq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x16_t) __builtin_neon_vpmaxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 48); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vpmaxq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vpmaxq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); return __ret; } #else __ai uint32x4_t vpmaxq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint32x4_t) __builtin_neon_vpmaxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vpmaxq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vpmaxq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); return __ret; } #else __ai uint16x8_t vpmaxq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint16x8_t) __builtin_neon_vpmaxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 49); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x16_t vpmaxq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vpmaxq_v((int8x16_t)__p0, (int8x16_t)__p1, 32); return __ret; } #else __ai int8x16_t vpmaxq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x16_t) __builtin_neon_vpmaxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 32); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float64x2_t vpmaxq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; __ret = (float64x2_t) __builtin_neon_vpmaxq_v((int8x16_t)__p0, (int8x16_t)__p1, 42); return __ret; } #else __ai float64x2_t vpmaxq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (float64x2_t) __builtin_neon_vpmaxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 42); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x4_t vpmaxq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vpmaxq_v((int8x16_t)__p0, (int8x16_t)__p1, 41); return __ret; } #else __ai float32x4_t vpmaxq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (float32x4_t) __builtin_neon_vpmaxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 41); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vpmaxq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vpmaxq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); return __ret; } #else __ai int32x4_t vpmaxq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (int32x4_t) __builtin_neon_vpmaxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 34); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vpmaxq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vpmaxq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); return __ret; } #else __ai int16x8_t vpmaxq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int16x8_t) __builtin_neon_vpmaxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 33); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float64_t vpmaxqd_f64(float64x2_t __p0) { float64_t __ret; __ret = (float64_t) __builtin_neon_vpmaxqd_f64(__p0); return __ret; } #else __ai float64_t vpmaxqd_f64(float64x2_t __p0) { float64_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (float64_t) __builtin_neon_vpmaxqd_f64(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32_t vpmaxs_f32(float32x2_t __p0) { float32_t __ret; __ret = (float32_t) __builtin_neon_vpmaxs_f32(__p0); return __ret; } #else __ai float32_t vpmaxs_f32(float32x2_t __p0) { float32_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (float32_t) __builtin_neon_vpmaxs_f32(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float64x2_t vpmaxnmq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; __ret = (float64x2_t) __builtin_neon_vpmaxnmq_v((int8x16_t)__p0, (int8x16_t)__p1, 42); return __ret; } #else __ai float64x2_t vpmaxnmq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (float64x2_t) __builtin_neon_vpmaxnmq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 42); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x4_t vpmaxnmq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vpmaxnmq_v((int8x16_t)__p0, (int8x16_t)__p1, 41); return __ret; } #else __ai float32x4_t vpmaxnmq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (float32x4_t) __builtin_neon_vpmaxnmq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 41); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x2_t vpmaxnm_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vpmaxnm_v((int8x8_t)__p0, (int8x8_t)__p1, 9); return __ret; } #else __ai float32x2_t vpmaxnm_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (float32x2_t) __builtin_neon_vpmaxnm_v((int8x8_t)__rev0, (int8x8_t)__rev1, 9); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float64_t vpmaxnmqd_f64(float64x2_t __p0) { float64_t __ret; __ret = (float64_t) __builtin_neon_vpmaxnmqd_f64(__p0); return __ret; } #else __ai float64_t vpmaxnmqd_f64(float64x2_t __p0) { float64_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (float64_t) __builtin_neon_vpmaxnmqd_f64(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32_t vpmaxnms_f32(float32x2_t __p0) { float32_t __ret; __ret = (float32_t) __builtin_neon_vpmaxnms_f32(__p0); return __ret; } #else __ai float32_t vpmaxnms_f32(float32x2_t __p0) { float32_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (float32_t) __builtin_neon_vpmaxnms_f32(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vpminq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; __ret = (uint8x16_t) __builtin_neon_vpminq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); return __ret; } #else __ai uint8x16_t vpminq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x16_t) __builtin_neon_vpminq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 48); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vpminq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vpminq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); return __ret; } #else __ai uint32x4_t vpminq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint32x4_t) __builtin_neon_vpminq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vpminq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vpminq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); return __ret; } #else __ai uint16x8_t vpminq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint16x8_t) __builtin_neon_vpminq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 49); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x16_t vpminq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vpminq_v((int8x16_t)__p0, (int8x16_t)__p1, 32); return __ret; } #else __ai int8x16_t vpminq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x16_t) __builtin_neon_vpminq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 32); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float64x2_t vpminq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; __ret = (float64x2_t) __builtin_neon_vpminq_v((int8x16_t)__p0, (int8x16_t)__p1, 42); return __ret; } #else __ai float64x2_t vpminq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (float64x2_t) __builtin_neon_vpminq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 42); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x4_t vpminq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vpminq_v((int8x16_t)__p0, (int8x16_t)__p1, 41); return __ret; } #else __ai float32x4_t vpminq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (float32x4_t) __builtin_neon_vpminq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 41); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vpminq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vpminq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); return __ret; } #else __ai int32x4_t vpminq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (int32x4_t) __builtin_neon_vpminq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 34); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vpminq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vpminq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); return __ret; } #else __ai int16x8_t vpminq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int16x8_t) __builtin_neon_vpminq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 33); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float64_t vpminqd_f64(float64x2_t __p0) { float64_t __ret; __ret = (float64_t) __builtin_neon_vpminqd_f64(__p0); return __ret; } #else __ai float64_t vpminqd_f64(float64x2_t __p0) { float64_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (float64_t) __builtin_neon_vpminqd_f64(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32_t vpmins_f32(float32x2_t __p0) { float32_t __ret; __ret = (float32_t) __builtin_neon_vpmins_f32(__p0); return __ret; } #else __ai float32_t vpmins_f32(float32x2_t __p0) { float32_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (float32_t) __builtin_neon_vpmins_f32(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float64x2_t vpminnmq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; __ret = (float64x2_t) __builtin_neon_vpminnmq_v((int8x16_t)__p0, (int8x16_t)__p1, 42); return __ret; } #else __ai float64x2_t vpminnmq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (float64x2_t) __builtin_neon_vpminnmq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 42); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x4_t vpminnmq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vpminnmq_v((int8x16_t)__p0, (int8x16_t)__p1, 41); return __ret; } #else __ai float32x4_t vpminnmq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (float32x4_t) __builtin_neon_vpminnmq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 41); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x2_t vpminnm_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vpminnm_v((int8x8_t)__p0, (int8x8_t)__p1, 9); return __ret; } #else __ai float32x2_t vpminnm_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (float32x2_t) __builtin_neon_vpminnm_v((int8x8_t)__rev0, (int8x8_t)__rev1, 9); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float64_t vpminnmqd_f64(float64x2_t __p0) { float64_t __ret; __ret = (float64_t) __builtin_neon_vpminnmqd_f64(__p0); return __ret; } #else __ai float64_t vpminnmqd_f64(float64x2_t __p0) { float64_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (float64_t) __builtin_neon_vpminnmqd_f64(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32_t vpminnms_f32(float32x2_t __p0) { float32_t __ret; __ret = (float32_t) __builtin_neon_vpminnms_f32(__p0); return __ret; } #else __ai float32_t vpminnms_f32(float32x2_t __p0) { float32_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (float32_t) __builtin_neon_vpminnms_f32(__rev0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vqabsq_s64(int64x2_t __p0) { int64x2_t __ret; __ret = (int64x2_t) __builtin_neon_vqabsq_v((int8x16_t)__p0, 35); return __ret; } #else __ai int64x2_t vqabsq_s64(int64x2_t __p0) { int64x2_t __ret; int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (int64x2_t) __builtin_neon_vqabsq_v((int8x16_t)__rev0, 35); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai int64x1_t vqabs_s64(int64x1_t __p0) { int64x1_t __ret; __ret = (int64x1_t) __builtin_neon_vqabs_v((int8x8_t)__p0, 3); return __ret; } __ai int8_t vqabsb_s8(int8_t __p0) { int8_t __ret; __ret = (int8_t) __builtin_neon_vqabsb_s8(__p0); return __ret; } __ai int32_t vqabss_s32(int32_t __p0) { int32_t __ret; __ret = (int32_t) __builtin_neon_vqabss_s32(__p0); return __ret; } __ai int64_t vqabsd_s64(int64_t __p0) { int64_t __ret; __ret = (int64_t) __builtin_neon_vqabsd_s64(__p0); return __ret; } __ai int16_t vqabsh_s16(int16_t __p0) { int16_t __ret; __ret = (int16_t) __builtin_neon_vqabsh_s16(__p0); return __ret; } __ai uint8_t vqaddb_u8(uint8_t __p0, uint8_t __p1) { uint8_t __ret; __ret = (uint8_t) __builtin_neon_vqaddb_u8(__p0, __p1); return __ret; } __ai uint32_t vqadds_u32(uint32_t __p0, uint32_t __p1) { uint32_t __ret; __ret = (uint32_t) __builtin_neon_vqadds_u32(__p0, __p1); return __ret; } __ai uint64_t vqaddd_u64(uint64_t __p0, uint64_t __p1) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vqaddd_u64(__p0, __p1); return __ret; } __ai uint16_t vqaddh_u16(uint16_t __p0, uint16_t __p1) { uint16_t __ret; __ret = (uint16_t) __builtin_neon_vqaddh_u16(__p0, __p1); return __ret; } __ai int8_t vqaddb_s8(int8_t __p0, int8_t __p1) { int8_t __ret; __ret = (int8_t) __builtin_neon_vqaddb_s8(__p0, __p1); return __ret; } __ai int32_t vqadds_s32(int32_t __p0, int32_t __p1) { int32_t __ret; __ret = (int32_t) __builtin_neon_vqadds_s32(__p0, __p1); return __ret; } __ai int64_t vqaddd_s64(int64_t __p0, int64_t __p1) { int64_t __ret; __ret = (int64_t) __builtin_neon_vqaddd_s64(__p0, __p1); return __ret; } __ai int16_t vqaddh_s16(int16_t __p0, int16_t __p1) { int16_t __ret; __ret = (int16_t) __builtin_neon_vqaddh_s16(__p0, __p1); return __ret; } __ai int64_t vqdmlals_s32(int64_t __p0, int32_t __p1, int32_t __p2) { int64_t __ret; __ret = (int64_t) __builtin_neon_vqdmlals_s32(__p0, __p1, __p2); return __ret; } __ai int32_t vqdmlalh_s16(int32_t __p0, int16_t __p1, int16_t __p2) { int32_t __ret; __ret = (int32_t) __builtin_neon_vqdmlalh_s16(__p0, __p1, __p2); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vqdmlal_high_s32(int64x2_t __p0, int32x4_t __p1, int32x4_t __p2) { int64x2_t __ret; __ret = vqdmlal_s32(__p0, vget_high_s32(__p1), vget_high_s32(__p2)); return __ret; } #else __ai int64x2_t vqdmlal_high_s32(int64x2_t __p0, int32x4_t __p1, int32x4_t __p2) { int64x2_t __ret; int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); int32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = __noswap_vqdmlal_s32(__rev0, __noswap_vget_high_s32(__rev1), __noswap_vget_high_s32(__rev2)); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vqdmlal_high_s16(int32x4_t __p0, int16x8_t __p1, int16x8_t __p2) { int32x4_t __ret; __ret = vqdmlal_s16(__p0, vget_high_s16(__p1), vget_high_s16(__p2)); return __ret; } #else __ai int32x4_t vqdmlal_high_s16(int32x4_t __p0, int16x8_t __p1, int16x8_t __p2) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __noswap_vqdmlal_s16(__rev0, __noswap_vget_high_s16(__rev1), __noswap_vget_high_s16(__rev2)); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ #define vqdmlal_high_lane_s32(__p0_607, __p1_607, __p2_607, __p3_607) __extension__ ({ \ int64x2_t __ret_607; \ int64x2_t __s0_607 = __p0_607; \ int32x4_t __s1_607 = __p1_607; \ int32x2_t __s2_607 = __p2_607; \ __ret_607 = vqdmlal_s32(__s0_607, vget_high_s32(__s1_607), splat_lane_s32(__s2_607, __p3_607)); \ __ret_607; \ }) #else #define vqdmlal_high_lane_s32(__p0_608, __p1_608, __p2_608, __p3_608) __extension__ ({ \ int64x2_t __ret_608; \ int64x2_t __s0_608 = __p0_608; \ int32x4_t __s1_608 = __p1_608; \ int32x2_t __s2_608 = __p2_608; \ int64x2_t __rev0_608; __rev0_608 = __builtin_shufflevector(__s0_608, __s0_608, 1, 0); \ int32x4_t __rev1_608; __rev1_608 = __builtin_shufflevector(__s1_608, __s1_608, 3, 2, 1, 0); \ int32x2_t __rev2_608; __rev2_608 = __builtin_shufflevector(__s2_608, __s2_608, 1, 0); \ __ret_608 = __noswap_vqdmlal_s32(__rev0_608, __noswap_vget_high_s32(__rev1_608), __noswap_splat_lane_s32(__rev2_608, __p3_608)); \ __ret_608 = __builtin_shufflevector(__ret_608, __ret_608, 1, 0); \ __ret_608; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqdmlal_high_lane_s16(__p0_609, __p1_609, __p2_609, __p3_609) __extension__ ({ \ int32x4_t __ret_609; \ int32x4_t __s0_609 = __p0_609; \ int16x8_t __s1_609 = __p1_609; \ int16x4_t __s2_609 = __p2_609; \ __ret_609 = vqdmlal_s16(__s0_609, vget_high_s16(__s1_609), splat_lane_s16(__s2_609, __p3_609)); \ __ret_609; \ }) #else #define vqdmlal_high_lane_s16(__p0_610, __p1_610, __p2_610, __p3_610) __extension__ ({ \ int32x4_t __ret_610; \ int32x4_t __s0_610 = __p0_610; \ int16x8_t __s1_610 = __p1_610; \ int16x4_t __s2_610 = __p2_610; \ int32x4_t __rev0_610; __rev0_610 = __builtin_shufflevector(__s0_610, __s0_610, 3, 2, 1, 0); \ int16x8_t __rev1_610; __rev1_610 = __builtin_shufflevector(__s1_610, __s1_610, 7, 6, 5, 4, 3, 2, 1, 0); \ int16x4_t __rev2_610; __rev2_610 = __builtin_shufflevector(__s2_610, __s2_610, 3, 2, 1, 0); \ __ret_610 = __noswap_vqdmlal_s16(__rev0_610, __noswap_vget_high_s16(__rev1_610), __noswap_splat_lane_s16(__rev2_610, __p3_610)); \ __ret_610 = __builtin_shufflevector(__ret_610, __ret_610, 3, 2, 1, 0); \ __ret_610; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqdmlal_high_laneq_s32(__p0_611, __p1_611, __p2_611, __p3_611) __extension__ ({ \ int64x2_t __ret_611; \ int64x2_t __s0_611 = __p0_611; \ int32x4_t __s1_611 = __p1_611; \ int32x4_t __s2_611 = __p2_611; \ __ret_611 = vqdmlal_s32(__s0_611, vget_high_s32(__s1_611), splat_laneq_s32(__s2_611, __p3_611)); \ __ret_611; \ }) #else #define vqdmlal_high_laneq_s32(__p0_612, __p1_612, __p2_612, __p3_612) __extension__ ({ \ int64x2_t __ret_612; \ int64x2_t __s0_612 = __p0_612; \ int32x4_t __s1_612 = __p1_612; \ int32x4_t __s2_612 = __p2_612; \ int64x2_t __rev0_612; __rev0_612 = __builtin_shufflevector(__s0_612, __s0_612, 1, 0); \ int32x4_t __rev1_612; __rev1_612 = __builtin_shufflevector(__s1_612, __s1_612, 3, 2, 1, 0); \ int32x4_t __rev2_612; __rev2_612 = __builtin_shufflevector(__s2_612, __s2_612, 3, 2, 1, 0); \ __ret_612 = __noswap_vqdmlal_s32(__rev0_612, __noswap_vget_high_s32(__rev1_612), __noswap_splat_laneq_s32(__rev2_612, __p3_612)); \ __ret_612 = __builtin_shufflevector(__ret_612, __ret_612, 1, 0); \ __ret_612; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqdmlal_high_laneq_s16(__p0_613, __p1_613, __p2_613, __p3_613) __extension__ ({ \ int32x4_t __ret_613; \ int32x4_t __s0_613 = __p0_613; \ int16x8_t __s1_613 = __p1_613; \ int16x8_t __s2_613 = __p2_613; \ __ret_613 = vqdmlal_s16(__s0_613, vget_high_s16(__s1_613), splat_laneq_s16(__s2_613, __p3_613)); \ __ret_613; \ }) #else #define vqdmlal_high_laneq_s16(__p0_614, __p1_614, __p2_614, __p3_614) __extension__ ({ \ int32x4_t __ret_614; \ int32x4_t __s0_614 = __p0_614; \ int16x8_t __s1_614 = __p1_614; \ int16x8_t __s2_614 = __p2_614; \ int32x4_t __rev0_614; __rev0_614 = __builtin_shufflevector(__s0_614, __s0_614, 3, 2, 1, 0); \ int16x8_t __rev1_614; __rev1_614 = __builtin_shufflevector(__s1_614, __s1_614, 7, 6, 5, 4, 3, 2, 1, 0); \ int16x8_t __rev2_614; __rev2_614 = __builtin_shufflevector(__s2_614, __s2_614, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_614 = __noswap_vqdmlal_s16(__rev0_614, __noswap_vget_high_s16(__rev1_614), __noswap_splat_laneq_s16(__rev2_614, __p3_614)); \ __ret_614 = __builtin_shufflevector(__ret_614, __ret_614, 3, 2, 1, 0); \ __ret_614; \ }) #endif #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vqdmlal_high_n_s32(int64x2_t __p0, int32x4_t __p1, int32_t __p2) { int64x2_t __ret; __ret = vqdmlal_n_s32(__p0, vget_high_s32(__p1), __p2); return __ret; } #else __ai int64x2_t vqdmlal_high_n_s32(int64x2_t __p0, int32x4_t __p1, int32_t __p2) { int64x2_t __ret; int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __noswap_vqdmlal_n_s32(__rev0, __noswap_vget_high_s32(__rev1), __p2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vqdmlal_high_n_s16(int32x4_t __p0, int16x8_t __p1, int16_t __p2) { int32x4_t __ret; __ret = vqdmlal_n_s16(__p0, vget_high_s16(__p1), __p2); return __ret; } #else __ai int32x4_t vqdmlal_high_n_s16(int32x4_t __p0, int16x8_t __p1, int16_t __p2) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __noswap_vqdmlal_n_s16(__rev0, __noswap_vget_high_s16(__rev1), __p2); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ #define vqdmlals_lane_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ int64_t __ret; \ int64_t __s0 = __p0; \ int32_t __s1 = __p1; \ int32x2_t __s2 = __p2; \ __ret = (int64_t) __builtin_neon_vqdmlals_lane_s32(__s0, __s1, __s2, __p3); \ __ret; \ }) #else #define vqdmlals_lane_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ int64_t __ret; \ int64_t __s0 = __p0; \ int32_t __s1 = __p1; \ int32x2_t __s2 = __p2; \ int32x2_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 1, 0); \ __ret = (int64_t) __builtin_neon_vqdmlals_lane_s32(__s0, __s1, __rev2, __p3); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqdmlalh_lane_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ int32_t __ret; \ int32_t __s0 = __p0; \ int16_t __s1 = __p1; \ int16x4_t __s2 = __p2; \ __ret = (int32_t) __builtin_neon_vqdmlalh_lane_s16(__s0, __s1, __s2, __p3); \ __ret; \ }) #else #define vqdmlalh_lane_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ int32_t __ret; \ int32_t __s0 = __p0; \ int16_t __s1 = __p1; \ int16x4_t __s2 = __p2; \ int16x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ __ret = (int32_t) __builtin_neon_vqdmlalh_lane_s16(__s0, __s1, __rev2, __p3); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqdmlals_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ int64_t __ret; \ int64_t __s0 = __p0; \ int32_t __s1 = __p1; \ int32x4_t __s2 = __p2; \ __ret = (int64_t) __builtin_neon_vqdmlals_laneq_s32(__s0, __s1, __s2, __p3); \ __ret; \ }) #else #define vqdmlals_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ int64_t __ret; \ int64_t __s0 = __p0; \ int32_t __s1 = __p1; \ int32x4_t __s2 = __p2; \ int32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ __ret = (int64_t) __builtin_neon_vqdmlals_laneq_s32(__s0, __s1, __rev2, __p3); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqdmlalh_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ int32_t __ret; \ int32_t __s0 = __p0; \ int16_t __s1 = __p1; \ int16x8_t __s2 = __p2; \ __ret = (int32_t) __builtin_neon_vqdmlalh_laneq_s16(__s0, __s1, __s2, __p3); \ __ret; \ }) #else #define vqdmlalh_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ int32_t __ret; \ int32_t __s0 = __p0; \ int16_t __s1 = __p1; \ int16x8_t __s2 = __p2; \ int16x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (int32_t) __builtin_neon_vqdmlalh_laneq_s16(__s0, __s1, __rev2, __p3); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqdmlal_laneq_s32(__p0_615, __p1_615, __p2_615, __p3_615) __extension__ ({ \ int64x2_t __ret_615; \ int64x2_t __s0_615 = __p0_615; \ int32x2_t __s1_615 = __p1_615; \ int32x4_t __s2_615 = __p2_615; \ __ret_615 = vqdmlal_s32(__s0_615, __s1_615, splat_laneq_s32(__s2_615, __p3_615)); \ __ret_615; \ }) #else #define vqdmlal_laneq_s32(__p0_616, __p1_616, __p2_616, __p3_616) __extension__ ({ \ int64x2_t __ret_616; \ int64x2_t __s0_616 = __p0_616; \ int32x2_t __s1_616 = __p1_616; \ int32x4_t __s2_616 = __p2_616; \ int64x2_t __rev0_616; __rev0_616 = __builtin_shufflevector(__s0_616, __s0_616, 1, 0); \ int32x2_t __rev1_616; __rev1_616 = __builtin_shufflevector(__s1_616, __s1_616, 1, 0); \ int32x4_t __rev2_616; __rev2_616 = __builtin_shufflevector(__s2_616, __s2_616, 3, 2, 1, 0); \ __ret_616 = __noswap_vqdmlal_s32(__rev0_616, __rev1_616, __noswap_splat_laneq_s32(__rev2_616, __p3_616)); \ __ret_616 = __builtin_shufflevector(__ret_616, __ret_616, 1, 0); \ __ret_616; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqdmlal_laneq_s16(__p0_617, __p1_617, __p2_617, __p3_617) __extension__ ({ \ int32x4_t __ret_617; \ int32x4_t __s0_617 = __p0_617; \ int16x4_t __s1_617 = __p1_617; \ int16x8_t __s2_617 = __p2_617; \ __ret_617 = vqdmlal_s16(__s0_617, __s1_617, splat_laneq_s16(__s2_617, __p3_617)); \ __ret_617; \ }) #else #define vqdmlal_laneq_s16(__p0_618, __p1_618, __p2_618, __p3_618) __extension__ ({ \ int32x4_t __ret_618; \ int32x4_t __s0_618 = __p0_618; \ int16x4_t __s1_618 = __p1_618; \ int16x8_t __s2_618 = __p2_618; \ int32x4_t __rev0_618; __rev0_618 = __builtin_shufflevector(__s0_618, __s0_618, 3, 2, 1, 0); \ int16x4_t __rev1_618; __rev1_618 = __builtin_shufflevector(__s1_618, __s1_618, 3, 2, 1, 0); \ int16x8_t __rev2_618; __rev2_618 = __builtin_shufflevector(__s2_618, __s2_618, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_618 = __noswap_vqdmlal_s16(__rev0_618, __rev1_618, __noswap_splat_laneq_s16(__rev2_618, __p3_618)); \ __ret_618 = __builtin_shufflevector(__ret_618, __ret_618, 3, 2, 1, 0); \ __ret_618; \ }) #endif __ai int64_t vqdmlsls_s32(int64_t __p0, int32_t __p1, int32_t __p2) { int64_t __ret; __ret = (int64_t) __builtin_neon_vqdmlsls_s32(__p0, __p1, __p2); return __ret; } __ai int32_t vqdmlslh_s16(int32_t __p0, int16_t __p1, int16_t __p2) { int32_t __ret; __ret = (int32_t) __builtin_neon_vqdmlslh_s16(__p0, __p1, __p2); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vqdmlsl_high_s32(int64x2_t __p0, int32x4_t __p1, int32x4_t __p2) { int64x2_t __ret; __ret = vqdmlsl_s32(__p0, vget_high_s32(__p1), vget_high_s32(__p2)); return __ret; } #else __ai int64x2_t vqdmlsl_high_s32(int64x2_t __p0, int32x4_t __p1, int32x4_t __p2) { int64x2_t __ret; int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); int32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = __noswap_vqdmlsl_s32(__rev0, __noswap_vget_high_s32(__rev1), __noswap_vget_high_s32(__rev2)); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vqdmlsl_high_s16(int32x4_t __p0, int16x8_t __p1, int16x8_t __p2) { int32x4_t __ret; __ret = vqdmlsl_s16(__p0, vget_high_s16(__p1), vget_high_s16(__p2)); return __ret; } #else __ai int32x4_t vqdmlsl_high_s16(int32x4_t __p0, int16x8_t __p1, int16x8_t __p2) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __noswap_vqdmlsl_s16(__rev0, __noswap_vget_high_s16(__rev1), __noswap_vget_high_s16(__rev2)); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ #define vqdmlsl_high_lane_s32(__p0_619, __p1_619, __p2_619, __p3_619) __extension__ ({ \ int64x2_t __ret_619; \ int64x2_t __s0_619 = __p0_619; \ int32x4_t __s1_619 = __p1_619; \ int32x2_t __s2_619 = __p2_619; \ __ret_619 = vqdmlsl_s32(__s0_619, vget_high_s32(__s1_619), splat_lane_s32(__s2_619, __p3_619)); \ __ret_619; \ }) #else #define vqdmlsl_high_lane_s32(__p0_620, __p1_620, __p2_620, __p3_620) __extension__ ({ \ int64x2_t __ret_620; \ int64x2_t __s0_620 = __p0_620; \ int32x4_t __s1_620 = __p1_620; \ int32x2_t __s2_620 = __p2_620; \ int64x2_t __rev0_620; __rev0_620 = __builtin_shufflevector(__s0_620, __s0_620, 1, 0); \ int32x4_t __rev1_620; __rev1_620 = __builtin_shufflevector(__s1_620, __s1_620, 3, 2, 1, 0); \ int32x2_t __rev2_620; __rev2_620 = __builtin_shufflevector(__s2_620, __s2_620, 1, 0); \ __ret_620 = __noswap_vqdmlsl_s32(__rev0_620, __noswap_vget_high_s32(__rev1_620), __noswap_splat_lane_s32(__rev2_620, __p3_620)); \ __ret_620 = __builtin_shufflevector(__ret_620, __ret_620, 1, 0); \ __ret_620; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqdmlsl_high_lane_s16(__p0_621, __p1_621, __p2_621, __p3_621) __extension__ ({ \ int32x4_t __ret_621; \ int32x4_t __s0_621 = __p0_621; \ int16x8_t __s1_621 = __p1_621; \ int16x4_t __s2_621 = __p2_621; \ __ret_621 = vqdmlsl_s16(__s0_621, vget_high_s16(__s1_621), splat_lane_s16(__s2_621, __p3_621)); \ __ret_621; \ }) #else #define vqdmlsl_high_lane_s16(__p0_622, __p1_622, __p2_622, __p3_622) __extension__ ({ \ int32x4_t __ret_622; \ int32x4_t __s0_622 = __p0_622; \ int16x8_t __s1_622 = __p1_622; \ int16x4_t __s2_622 = __p2_622; \ int32x4_t __rev0_622; __rev0_622 = __builtin_shufflevector(__s0_622, __s0_622, 3, 2, 1, 0); \ int16x8_t __rev1_622; __rev1_622 = __builtin_shufflevector(__s1_622, __s1_622, 7, 6, 5, 4, 3, 2, 1, 0); \ int16x4_t __rev2_622; __rev2_622 = __builtin_shufflevector(__s2_622, __s2_622, 3, 2, 1, 0); \ __ret_622 = __noswap_vqdmlsl_s16(__rev0_622, __noswap_vget_high_s16(__rev1_622), __noswap_splat_lane_s16(__rev2_622, __p3_622)); \ __ret_622 = __builtin_shufflevector(__ret_622, __ret_622, 3, 2, 1, 0); \ __ret_622; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqdmlsl_high_laneq_s32(__p0_623, __p1_623, __p2_623, __p3_623) __extension__ ({ \ int64x2_t __ret_623; \ int64x2_t __s0_623 = __p0_623; \ int32x4_t __s1_623 = __p1_623; \ int32x4_t __s2_623 = __p2_623; \ __ret_623 = vqdmlsl_s32(__s0_623, vget_high_s32(__s1_623), splat_laneq_s32(__s2_623, __p3_623)); \ __ret_623; \ }) #else #define vqdmlsl_high_laneq_s32(__p0_624, __p1_624, __p2_624, __p3_624) __extension__ ({ \ int64x2_t __ret_624; \ int64x2_t __s0_624 = __p0_624; \ int32x4_t __s1_624 = __p1_624; \ int32x4_t __s2_624 = __p2_624; \ int64x2_t __rev0_624; __rev0_624 = __builtin_shufflevector(__s0_624, __s0_624, 1, 0); \ int32x4_t __rev1_624; __rev1_624 = __builtin_shufflevector(__s1_624, __s1_624, 3, 2, 1, 0); \ int32x4_t __rev2_624; __rev2_624 = __builtin_shufflevector(__s2_624, __s2_624, 3, 2, 1, 0); \ __ret_624 = __noswap_vqdmlsl_s32(__rev0_624, __noswap_vget_high_s32(__rev1_624), __noswap_splat_laneq_s32(__rev2_624, __p3_624)); \ __ret_624 = __builtin_shufflevector(__ret_624, __ret_624, 1, 0); \ __ret_624; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqdmlsl_high_laneq_s16(__p0_625, __p1_625, __p2_625, __p3_625) __extension__ ({ \ int32x4_t __ret_625; \ int32x4_t __s0_625 = __p0_625; \ int16x8_t __s1_625 = __p1_625; \ int16x8_t __s2_625 = __p2_625; \ __ret_625 = vqdmlsl_s16(__s0_625, vget_high_s16(__s1_625), splat_laneq_s16(__s2_625, __p3_625)); \ __ret_625; \ }) #else #define vqdmlsl_high_laneq_s16(__p0_626, __p1_626, __p2_626, __p3_626) __extension__ ({ \ int32x4_t __ret_626; \ int32x4_t __s0_626 = __p0_626; \ int16x8_t __s1_626 = __p1_626; \ int16x8_t __s2_626 = __p2_626; \ int32x4_t __rev0_626; __rev0_626 = __builtin_shufflevector(__s0_626, __s0_626, 3, 2, 1, 0); \ int16x8_t __rev1_626; __rev1_626 = __builtin_shufflevector(__s1_626, __s1_626, 7, 6, 5, 4, 3, 2, 1, 0); \ int16x8_t __rev2_626; __rev2_626 = __builtin_shufflevector(__s2_626, __s2_626, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_626 = __noswap_vqdmlsl_s16(__rev0_626, __noswap_vget_high_s16(__rev1_626), __noswap_splat_laneq_s16(__rev2_626, __p3_626)); \ __ret_626 = __builtin_shufflevector(__ret_626, __ret_626, 3, 2, 1, 0); \ __ret_626; \ }) #endif #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vqdmlsl_high_n_s32(int64x2_t __p0, int32x4_t __p1, int32_t __p2) { int64x2_t __ret; __ret = vqdmlsl_n_s32(__p0, vget_high_s32(__p1), __p2); return __ret; } #else __ai int64x2_t vqdmlsl_high_n_s32(int64x2_t __p0, int32x4_t __p1, int32_t __p2) { int64x2_t __ret; int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __noswap_vqdmlsl_n_s32(__rev0, __noswap_vget_high_s32(__rev1), __p2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vqdmlsl_high_n_s16(int32x4_t __p0, int16x8_t __p1, int16_t __p2) { int32x4_t __ret; __ret = vqdmlsl_n_s16(__p0, vget_high_s16(__p1), __p2); return __ret; } #else __ai int32x4_t vqdmlsl_high_n_s16(int32x4_t __p0, int16x8_t __p1, int16_t __p2) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __noswap_vqdmlsl_n_s16(__rev0, __noswap_vget_high_s16(__rev1), __p2); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ #define vqdmlsls_lane_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ int64_t __ret; \ int64_t __s0 = __p0; \ int32_t __s1 = __p1; \ int32x2_t __s2 = __p2; \ __ret = (int64_t) __builtin_neon_vqdmlsls_lane_s32(__s0, __s1, __s2, __p3); \ __ret; \ }) #else #define vqdmlsls_lane_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ int64_t __ret; \ int64_t __s0 = __p0; \ int32_t __s1 = __p1; \ int32x2_t __s2 = __p2; \ int32x2_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 1, 0); \ __ret = (int64_t) __builtin_neon_vqdmlsls_lane_s32(__s0, __s1, __rev2, __p3); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqdmlslh_lane_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ int32_t __ret; \ int32_t __s0 = __p0; \ int16_t __s1 = __p1; \ int16x4_t __s2 = __p2; \ __ret = (int32_t) __builtin_neon_vqdmlslh_lane_s16(__s0, __s1, __s2, __p3); \ __ret; \ }) #else #define vqdmlslh_lane_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ int32_t __ret; \ int32_t __s0 = __p0; \ int16_t __s1 = __p1; \ int16x4_t __s2 = __p2; \ int16x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ __ret = (int32_t) __builtin_neon_vqdmlslh_lane_s16(__s0, __s1, __rev2, __p3); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqdmlsls_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ int64_t __ret; \ int64_t __s0 = __p0; \ int32_t __s1 = __p1; \ int32x4_t __s2 = __p2; \ __ret = (int64_t) __builtin_neon_vqdmlsls_laneq_s32(__s0, __s1, __s2, __p3); \ __ret; \ }) #else #define vqdmlsls_laneq_s32(__p0, __p1, __p2, __p3) __extension__ ({ \ int64_t __ret; \ int64_t __s0 = __p0; \ int32_t __s1 = __p1; \ int32x4_t __s2 = __p2; \ int32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ __ret = (int64_t) __builtin_neon_vqdmlsls_laneq_s32(__s0, __s1, __rev2, __p3); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqdmlslh_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ int32_t __ret; \ int32_t __s0 = __p0; \ int16_t __s1 = __p1; \ int16x8_t __s2 = __p2; \ __ret = (int32_t) __builtin_neon_vqdmlslh_laneq_s16(__s0, __s1, __s2, __p3); \ __ret; \ }) #else #define vqdmlslh_laneq_s16(__p0, __p1, __p2, __p3) __extension__ ({ \ int32_t __ret; \ int32_t __s0 = __p0; \ int16_t __s1 = __p1; \ int16x8_t __s2 = __p2; \ int16x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (int32_t) __builtin_neon_vqdmlslh_laneq_s16(__s0, __s1, __rev2, __p3); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqdmlsl_laneq_s32(__p0_627, __p1_627, __p2_627, __p3_627) __extension__ ({ \ int64x2_t __ret_627; \ int64x2_t __s0_627 = __p0_627; \ int32x2_t __s1_627 = __p1_627; \ int32x4_t __s2_627 = __p2_627; \ __ret_627 = vqdmlsl_s32(__s0_627, __s1_627, splat_laneq_s32(__s2_627, __p3_627)); \ __ret_627; \ }) #else #define vqdmlsl_laneq_s32(__p0_628, __p1_628, __p2_628, __p3_628) __extension__ ({ \ int64x2_t __ret_628; \ int64x2_t __s0_628 = __p0_628; \ int32x2_t __s1_628 = __p1_628; \ int32x4_t __s2_628 = __p2_628; \ int64x2_t __rev0_628; __rev0_628 = __builtin_shufflevector(__s0_628, __s0_628, 1, 0); \ int32x2_t __rev1_628; __rev1_628 = __builtin_shufflevector(__s1_628, __s1_628, 1, 0); \ int32x4_t __rev2_628; __rev2_628 = __builtin_shufflevector(__s2_628, __s2_628, 3, 2, 1, 0); \ __ret_628 = __noswap_vqdmlsl_s32(__rev0_628, __rev1_628, __noswap_splat_laneq_s32(__rev2_628, __p3_628)); \ __ret_628 = __builtin_shufflevector(__ret_628, __ret_628, 1, 0); \ __ret_628; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqdmlsl_laneq_s16(__p0_629, __p1_629, __p2_629, __p3_629) __extension__ ({ \ int32x4_t __ret_629; \ int32x4_t __s0_629 = __p0_629; \ int16x4_t __s1_629 = __p1_629; \ int16x8_t __s2_629 = __p2_629; \ __ret_629 = vqdmlsl_s16(__s0_629, __s1_629, splat_laneq_s16(__s2_629, __p3_629)); \ __ret_629; \ }) #else #define vqdmlsl_laneq_s16(__p0_630, __p1_630, __p2_630, __p3_630) __extension__ ({ \ int32x4_t __ret_630; \ int32x4_t __s0_630 = __p0_630; \ int16x4_t __s1_630 = __p1_630; \ int16x8_t __s2_630 = __p2_630; \ int32x4_t __rev0_630; __rev0_630 = __builtin_shufflevector(__s0_630, __s0_630, 3, 2, 1, 0); \ int16x4_t __rev1_630; __rev1_630 = __builtin_shufflevector(__s1_630, __s1_630, 3, 2, 1, 0); \ int16x8_t __rev2_630; __rev2_630 = __builtin_shufflevector(__s2_630, __s2_630, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_630 = __noswap_vqdmlsl_s16(__rev0_630, __rev1_630, __noswap_splat_laneq_s16(__rev2_630, __p3_630)); \ __ret_630 = __builtin_shufflevector(__ret_630, __ret_630, 3, 2, 1, 0); \ __ret_630; \ }) #endif __ai int32_t vqdmulhs_s32(int32_t __p0, int32_t __p1) { int32_t __ret; __ret = (int32_t) __builtin_neon_vqdmulhs_s32(__p0, __p1); return __ret; } __ai int16_t vqdmulhh_s16(int16_t __p0, int16_t __p1) { int16_t __ret; __ret = (int16_t) __builtin_neon_vqdmulhh_s16(__p0, __p1); return __ret; } #ifdef __LITTLE_ENDIAN__ #define vqdmulhq_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x4_t __ret; \ int32x4_t __s0 = __p0; \ int32x2_t __s1 = __p1; \ __ret = (int32x4_t) __builtin_neon_vqdmulhq_lane_v((int8x16_t)__s0, (int8x8_t)__s1, __p2, 2); \ __ret; \ }) #else #define vqdmulhq_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x4_t __ret; \ int32x4_t __s0 = __p0; \ int32x2_t __s1 = __p1; \ int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ __ret = (int32x4_t) __builtin_neon_vqdmulhq_lane_v((int8x16_t)__rev0, (int8x8_t)__rev1, __p2, 2); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqdmulhq_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x8_t __ret; \ int16x8_t __s0 = __p0; \ int16x4_t __s1 = __p1; \ __ret = (int16x8_t) __builtin_neon_vqdmulhq_lane_v((int8x16_t)__s0, (int8x8_t)__s1, __p2, 1); \ __ret; \ }) #else #define vqdmulhq_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x8_t __ret; \ int16x8_t __s0 = __p0; \ int16x4_t __s1 = __p1; \ int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ __ret = (int16x8_t) __builtin_neon_vqdmulhq_lane_v((int8x16_t)__rev0, (int8x8_t)__rev1, __p2, 1); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqdmulh_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x2_t __ret; \ int32x2_t __s0 = __p0; \ int32x2_t __s1 = __p1; \ __ret = (int32x2_t) __builtin_neon_vqdmulh_lane_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 2); \ __ret; \ }) #else #define vqdmulh_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x2_t __ret; \ int32x2_t __s0 = __p0; \ int32x2_t __s1 = __p1; \ int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ __ret = (int32x2_t) __builtin_neon_vqdmulh_lane_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 2); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqdmulh_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x4_t __ret; \ int16x4_t __s0 = __p0; \ int16x4_t __s1 = __p1; \ __ret = (int16x4_t) __builtin_neon_vqdmulh_lane_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 1); \ __ret; \ }) #else #define vqdmulh_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x4_t __ret; \ int16x4_t __s0 = __p0; \ int16x4_t __s1 = __p1; \ int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ __ret = (int16x4_t) __builtin_neon_vqdmulh_lane_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 1); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqdmulhs_lane_s32(__p0_631, __p1_631, __p2_631) __extension__ ({ \ int32_t __ret_631; \ int32_t __s0_631 = __p0_631; \ int32x2_t __s1_631 = __p1_631; \ __ret_631 = vqdmulhs_s32(__s0_631, vget_lane_s32(__s1_631, __p2_631)); \ __ret_631; \ }) #else #define vqdmulhs_lane_s32(__p0_632, __p1_632, __p2_632) __extension__ ({ \ int32_t __ret_632; \ int32_t __s0_632 = __p0_632; \ int32x2_t __s1_632 = __p1_632; \ int32x2_t __rev1_632; __rev1_632 = __builtin_shufflevector(__s1_632, __s1_632, 1, 0); \ __ret_632 = vqdmulhs_s32(__s0_632, __noswap_vget_lane_s32(__rev1_632, __p2_632)); \ __ret_632; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqdmulhh_lane_s16(__p0_633, __p1_633, __p2_633) __extension__ ({ \ int16_t __ret_633; \ int16_t __s0_633 = __p0_633; \ int16x4_t __s1_633 = __p1_633; \ __ret_633 = vqdmulhh_s16(__s0_633, vget_lane_s16(__s1_633, __p2_633)); \ __ret_633; \ }) #else #define vqdmulhh_lane_s16(__p0_634, __p1_634, __p2_634) __extension__ ({ \ int16_t __ret_634; \ int16_t __s0_634 = __p0_634; \ int16x4_t __s1_634 = __p1_634; \ int16x4_t __rev1_634; __rev1_634 = __builtin_shufflevector(__s1_634, __s1_634, 3, 2, 1, 0); \ __ret_634 = vqdmulhh_s16(__s0_634, __noswap_vget_lane_s16(__rev1_634, __p2_634)); \ __ret_634; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqdmulhs_laneq_s32(__p0_635, __p1_635, __p2_635) __extension__ ({ \ int32_t __ret_635; \ int32_t __s0_635 = __p0_635; \ int32x4_t __s1_635 = __p1_635; \ __ret_635 = vqdmulhs_s32(__s0_635, vgetq_lane_s32(__s1_635, __p2_635)); \ __ret_635; \ }) #else #define vqdmulhs_laneq_s32(__p0_636, __p1_636, __p2_636) __extension__ ({ \ int32_t __ret_636; \ int32_t __s0_636 = __p0_636; \ int32x4_t __s1_636 = __p1_636; \ int32x4_t __rev1_636; __rev1_636 = __builtin_shufflevector(__s1_636, __s1_636, 3, 2, 1, 0); \ __ret_636 = vqdmulhs_s32(__s0_636, __noswap_vgetq_lane_s32(__rev1_636, __p2_636)); \ __ret_636; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqdmulhh_laneq_s16(__p0_637, __p1_637, __p2_637) __extension__ ({ \ int16_t __ret_637; \ int16_t __s0_637 = __p0_637; \ int16x8_t __s1_637 = __p1_637; \ __ret_637 = vqdmulhh_s16(__s0_637, vgetq_lane_s16(__s1_637, __p2_637)); \ __ret_637; \ }) #else #define vqdmulhh_laneq_s16(__p0_638, __p1_638, __p2_638) __extension__ ({ \ int16_t __ret_638; \ int16_t __s0_638 = __p0_638; \ int16x8_t __s1_638 = __p1_638; \ int16x8_t __rev1_638; __rev1_638 = __builtin_shufflevector(__s1_638, __s1_638, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_638 = vqdmulhh_s16(__s0_638, __noswap_vgetq_lane_s16(__rev1_638, __p2_638)); \ __ret_638; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqdmulhq_laneq_s32(__p0, __p1, __p2) __extension__ ({ \ int32x4_t __ret; \ int32x4_t __s0 = __p0; \ int32x4_t __s1 = __p1; \ __ret = (int32x4_t) __builtin_neon_vqdmulhq_laneq_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 34); \ __ret; \ }) #else #define vqdmulhq_laneq_s32(__p0, __p1, __p2) __extension__ ({ \ int32x4_t __ret; \ int32x4_t __s0 = __p0; \ int32x4_t __s1 = __p1; \ int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ __ret = (int32x4_t) __builtin_neon_vqdmulhq_laneq_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 34); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqdmulhq_laneq_s16(__p0, __p1, __p2) __extension__ ({ \ int16x8_t __ret; \ int16x8_t __s0 = __p0; \ int16x8_t __s1 = __p1; \ __ret = (int16x8_t) __builtin_neon_vqdmulhq_laneq_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 33); \ __ret; \ }) #else #define vqdmulhq_laneq_s16(__p0, __p1, __p2) __extension__ ({ \ int16x8_t __ret; \ int16x8_t __s0 = __p0; \ int16x8_t __s1 = __p1; \ int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (int16x8_t) __builtin_neon_vqdmulhq_laneq_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 33); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqdmulh_laneq_s32(__p0, __p1, __p2) __extension__ ({ \ int32x2_t __ret; \ int32x2_t __s0 = __p0; \ int32x4_t __s1 = __p1; \ __ret = (int32x2_t) __builtin_neon_vqdmulh_laneq_v((int8x8_t)__s0, (int8x16_t)__s1, __p2, 2); \ __ret; \ }) #else #define vqdmulh_laneq_s32(__p0, __p1, __p2) __extension__ ({ \ int32x2_t __ret; \ int32x2_t __s0 = __p0; \ int32x4_t __s1 = __p1; \ int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ __ret = (int32x2_t) __builtin_neon_vqdmulh_laneq_v((int8x8_t)__rev0, (int8x16_t)__rev1, __p2, 2); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqdmulh_laneq_s16(__p0, __p1, __p2) __extension__ ({ \ int16x4_t __ret; \ int16x4_t __s0 = __p0; \ int16x8_t __s1 = __p1; \ __ret = (int16x4_t) __builtin_neon_vqdmulh_laneq_v((int8x8_t)__s0, (int8x16_t)__s1, __p2, 1); \ __ret; \ }) #else #define vqdmulh_laneq_s16(__p0, __p1, __p2) __extension__ ({ \ int16x4_t __ret; \ int16x4_t __s0 = __p0; \ int16x8_t __s1 = __p1; \ int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (int16x4_t) __builtin_neon_vqdmulh_laneq_v((int8x8_t)__rev0, (int8x16_t)__rev1, __p2, 1); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif __ai int64_t vqdmulls_s32(int32_t __p0, int32_t __p1) { int64_t __ret; __ret = (int64_t) __builtin_neon_vqdmulls_s32(__p0, __p1); return __ret; } __ai int32_t vqdmullh_s16(int16_t __p0, int16_t __p1) { int32_t __ret; __ret = (int32_t) __builtin_neon_vqdmullh_s16(__p0, __p1); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vqdmull_high_s32(int32x4_t __p0, int32x4_t __p1) { int64x2_t __ret; __ret = vqdmull_s32(vget_high_s32(__p0), vget_high_s32(__p1)); return __ret; } #else __ai int64x2_t vqdmull_high_s32(int32x4_t __p0, int32x4_t __p1) { int64x2_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __noswap_vqdmull_s32(__noswap_vget_high_s32(__rev0), __noswap_vget_high_s32(__rev1)); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vqdmull_high_s16(int16x8_t __p0, int16x8_t __p1) { int32x4_t __ret; __ret = vqdmull_s16(vget_high_s16(__p0), vget_high_s16(__p1)); return __ret; } #else __ai int32x4_t vqdmull_high_s16(int16x8_t __p0, int16x8_t __p1) { int32x4_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __noswap_vqdmull_s16(__noswap_vget_high_s16(__rev0), __noswap_vget_high_s16(__rev1)); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ #define vqdmull_high_lane_s32(__p0_639, __p1_639, __p2_639) __extension__ ({ \ int64x2_t __ret_639; \ int32x4_t __s0_639 = __p0_639; \ int32x2_t __s1_639 = __p1_639; \ __ret_639 = vqdmull_s32(vget_high_s32(__s0_639), splat_lane_s32(__s1_639, __p2_639)); \ __ret_639; \ }) #else #define vqdmull_high_lane_s32(__p0_640, __p1_640, __p2_640) __extension__ ({ \ int64x2_t __ret_640; \ int32x4_t __s0_640 = __p0_640; \ int32x2_t __s1_640 = __p1_640; \ int32x4_t __rev0_640; __rev0_640 = __builtin_shufflevector(__s0_640, __s0_640, 3, 2, 1, 0); \ int32x2_t __rev1_640; __rev1_640 = __builtin_shufflevector(__s1_640, __s1_640, 1, 0); \ __ret_640 = __noswap_vqdmull_s32(__noswap_vget_high_s32(__rev0_640), __noswap_splat_lane_s32(__rev1_640, __p2_640)); \ __ret_640 = __builtin_shufflevector(__ret_640, __ret_640, 1, 0); \ __ret_640; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqdmull_high_lane_s16(__p0_641, __p1_641, __p2_641) __extension__ ({ \ int32x4_t __ret_641; \ int16x8_t __s0_641 = __p0_641; \ int16x4_t __s1_641 = __p1_641; \ __ret_641 = vqdmull_s16(vget_high_s16(__s0_641), splat_lane_s16(__s1_641, __p2_641)); \ __ret_641; \ }) #else #define vqdmull_high_lane_s16(__p0_642, __p1_642, __p2_642) __extension__ ({ \ int32x4_t __ret_642; \ int16x8_t __s0_642 = __p0_642; \ int16x4_t __s1_642 = __p1_642; \ int16x8_t __rev0_642; __rev0_642 = __builtin_shufflevector(__s0_642, __s0_642, 7, 6, 5, 4, 3, 2, 1, 0); \ int16x4_t __rev1_642; __rev1_642 = __builtin_shufflevector(__s1_642, __s1_642, 3, 2, 1, 0); \ __ret_642 = __noswap_vqdmull_s16(__noswap_vget_high_s16(__rev0_642), __noswap_splat_lane_s16(__rev1_642, __p2_642)); \ __ret_642 = __builtin_shufflevector(__ret_642, __ret_642, 3, 2, 1, 0); \ __ret_642; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqdmull_high_laneq_s32(__p0_643, __p1_643, __p2_643) __extension__ ({ \ int64x2_t __ret_643; \ int32x4_t __s0_643 = __p0_643; \ int32x4_t __s1_643 = __p1_643; \ __ret_643 = vqdmull_s32(vget_high_s32(__s0_643), splat_laneq_s32(__s1_643, __p2_643)); \ __ret_643; \ }) #else #define vqdmull_high_laneq_s32(__p0_644, __p1_644, __p2_644) __extension__ ({ \ int64x2_t __ret_644; \ int32x4_t __s0_644 = __p0_644; \ int32x4_t __s1_644 = __p1_644; \ int32x4_t __rev0_644; __rev0_644 = __builtin_shufflevector(__s0_644, __s0_644, 3, 2, 1, 0); \ int32x4_t __rev1_644; __rev1_644 = __builtin_shufflevector(__s1_644, __s1_644, 3, 2, 1, 0); \ __ret_644 = __noswap_vqdmull_s32(__noswap_vget_high_s32(__rev0_644), __noswap_splat_laneq_s32(__rev1_644, __p2_644)); \ __ret_644 = __builtin_shufflevector(__ret_644, __ret_644, 1, 0); \ __ret_644; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqdmull_high_laneq_s16(__p0_645, __p1_645, __p2_645) __extension__ ({ \ int32x4_t __ret_645; \ int16x8_t __s0_645 = __p0_645; \ int16x8_t __s1_645 = __p1_645; \ __ret_645 = vqdmull_s16(vget_high_s16(__s0_645), splat_laneq_s16(__s1_645, __p2_645)); \ __ret_645; \ }) #else #define vqdmull_high_laneq_s16(__p0_646, __p1_646, __p2_646) __extension__ ({ \ int32x4_t __ret_646; \ int16x8_t __s0_646 = __p0_646; \ int16x8_t __s1_646 = __p1_646; \ int16x8_t __rev0_646; __rev0_646 = __builtin_shufflevector(__s0_646, __s0_646, 7, 6, 5, 4, 3, 2, 1, 0); \ int16x8_t __rev1_646; __rev1_646 = __builtin_shufflevector(__s1_646, __s1_646, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_646 = __noswap_vqdmull_s16(__noswap_vget_high_s16(__rev0_646), __noswap_splat_laneq_s16(__rev1_646, __p2_646)); \ __ret_646 = __builtin_shufflevector(__ret_646, __ret_646, 3, 2, 1, 0); \ __ret_646; \ }) #endif #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vqdmull_high_n_s32(int32x4_t __p0, int32_t __p1) { int64x2_t __ret; __ret = vqdmull_n_s32(vget_high_s32(__p0), __p1); return __ret; } #else __ai int64x2_t vqdmull_high_n_s32(int32x4_t __p0, int32_t __p1) { int64x2_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = __noswap_vqdmull_n_s32(__noswap_vget_high_s32(__rev0), __p1); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vqdmull_high_n_s16(int16x8_t __p0, int16_t __p1) { int32x4_t __ret; __ret = vqdmull_n_s16(vget_high_s16(__p0), __p1); return __ret; } #else __ai int32x4_t vqdmull_high_n_s16(int16x8_t __p0, int16_t __p1) { int32x4_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __noswap_vqdmull_n_s16(__noswap_vget_high_s16(__rev0), __p1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ #define vqdmulls_lane_s32(__p0_647, __p1_647, __p2_647) __extension__ ({ \ int64_t __ret_647; \ int32_t __s0_647 = __p0_647; \ int32x2_t __s1_647 = __p1_647; \ __ret_647 = vqdmulls_s32(__s0_647, vget_lane_s32(__s1_647, __p2_647)); \ __ret_647; \ }) #else #define vqdmulls_lane_s32(__p0_648, __p1_648, __p2_648) __extension__ ({ \ int64_t __ret_648; \ int32_t __s0_648 = __p0_648; \ int32x2_t __s1_648 = __p1_648; \ int32x2_t __rev1_648; __rev1_648 = __builtin_shufflevector(__s1_648, __s1_648, 1, 0); \ __ret_648 = vqdmulls_s32(__s0_648, __noswap_vget_lane_s32(__rev1_648, __p2_648)); \ __ret_648; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqdmullh_lane_s16(__p0_649, __p1_649, __p2_649) __extension__ ({ \ int32_t __ret_649; \ int16_t __s0_649 = __p0_649; \ int16x4_t __s1_649 = __p1_649; \ __ret_649 = vqdmullh_s16(__s0_649, vget_lane_s16(__s1_649, __p2_649)); \ __ret_649; \ }) #else #define vqdmullh_lane_s16(__p0_650, __p1_650, __p2_650) __extension__ ({ \ int32_t __ret_650; \ int16_t __s0_650 = __p0_650; \ int16x4_t __s1_650 = __p1_650; \ int16x4_t __rev1_650; __rev1_650 = __builtin_shufflevector(__s1_650, __s1_650, 3, 2, 1, 0); \ __ret_650 = vqdmullh_s16(__s0_650, __noswap_vget_lane_s16(__rev1_650, __p2_650)); \ __ret_650; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqdmulls_laneq_s32(__p0_651, __p1_651, __p2_651) __extension__ ({ \ int64_t __ret_651; \ int32_t __s0_651 = __p0_651; \ int32x4_t __s1_651 = __p1_651; \ __ret_651 = vqdmulls_s32(__s0_651, vgetq_lane_s32(__s1_651, __p2_651)); \ __ret_651; \ }) #else #define vqdmulls_laneq_s32(__p0_652, __p1_652, __p2_652) __extension__ ({ \ int64_t __ret_652; \ int32_t __s0_652 = __p0_652; \ int32x4_t __s1_652 = __p1_652; \ int32x4_t __rev1_652; __rev1_652 = __builtin_shufflevector(__s1_652, __s1_652, 3, 2, 1, 0); \ __ret_652 = vqdmulls_s32(__s0_652, __noswap_vgetq_lane_s32(__rev1_652, __p2_652)); \ __ret_652; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqdmullh_laneq_s16(__p0_653, __p1_653, __p2_653) __extension__ ({ \ int32_t __ret_653; \ int16_t __s0_653 = __p0_653; \ int16x8_t __s1_653 = __p1_653; \ __ret_653 = vqdmullh_s16(__s0_653, vgetq_lane_s16(__s1_653, __p2_653)); \ __ret_653; \ }) #else #define vqdmullh_laneq_s16(__p0_654, __p1_654, __p2_654) __extension__ ({ \ int32_t __ret_654; \ int16_t __s0_654 = __p0_654; \ int16x8_t __s1_654 = __p1_654; \ int16x8_t __rev1_654; __rev1_654 = __builtin_shufflevector(__s1_654, __s1_654, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_654 = vqdmullh_s16(__s0_654, __noswap_vgetq_lane_s16(__rev1_654, __p2_654)); \ __ret_654; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqdmull_laneq_s32(__p0_655, __p1_655, __p2_655) __extension__ ({ \ int64x2_t __ret_655; \ int32x2_t __s0_655 = __p0_655; \ int32x4_t __s1_655 = __p1_655; \ __ret_655 = vqdmull_s32(__s0_655, splat_laneq_s32(__s1_655, __p2_655)); \ __ret_655; \ }) #else #define vqdmull_laneq_s32(__p0_656, __p1_656, __p2_656) __extension__ ({ \ int64x2_t __ret_656; \ int32x2_t __s0_656 = __p0_656; \ int32x4_t __s1_656 = __p1_656; \ int32x2_t __rev0_656; __rev0_656 = __builtin_shufflevector(__s0_656, __s0_656, 1, 0); \ int32x4_t __rev1_656; __rev1_656 = __builtin_shufflevector(__s1_656, __s1_656, 3, 2, 1, 0); \ __ret_656 = __noswap_vqdmull_s32(__rev0_656, __noswap_splat_laneq_s32(__rev1_656, __p2_656)); \ __ret_656 = __builtin_shufflevector(__ret_656, __ret_656, 1, 0); \ __ret_656; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqdmull_laneq_s16(__p0_657, __p1_657, __p2_657) __extension__ ({ \ int32x4_t __ret_657; \ int16x4_t __s0_657 = __p0_657; \ int16x8_t __s1_657 = __p1_657; \ __ret_657 = vqdmull_s16(__s0_657, splat_laneq_s16(__s1_657, __p2_657)); \ __ret_657; \ }) #else #define vqdmull_laneq_s16(__p0_658, __p1_658, __p2_658) __extension__ ({ \ int32x4_t __ret_658; \ int16x4_t __s0_658 = __p0_658; \ int16x8_t __s1_658 = __p1_658; \ int16x4_t __rev0_658; __rev0_658 = __builtin_shufflevector(__s0_658, __s0_658, 3, 2, 1, 0); \ int16x8_t __rev1_658; __rev1_658 = __builtin_shufflevector(__s1_658, __s1_658, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_658 = __noswap_vqdmull_s16(__rev0_658, __noswap_splat_laneq_s16(__rev1_658, __p2_658)); \ __ret_658 = __builtin_shufflevector(__ret_658, __ret_658, 3, 2, 1, 0); \ __ret_658; \ }) #endif __ai int16_t vqmovns_s32(int32_t __p0) { int16_t __ret; __ret = (int16_t) __builtin_neon_vqmovns_s32(__p0); return __ret; } __ai int32_t vqmovnd_s64(int64_t __p0) { int32_t __ret; __ret = (int32_t) __builtin_neon_vqmovnd_s64(__p0); return __ret; } __ai int8_t vqmovnh_s16(int16_t __p0) { int8_t __ret; __ret = (int8_t) __builtin_neon_vqmovnh_s16(__p0); return __ret; } __ai uint16_t vqmovns_u32(uint32_t __p0) { uint16_t __ret; __ret = (uint16_t) __builtin_neon_vqmovns_u32(__p0); return __ret; } __ai uint32_t vqmovnd_u64(uint64_t __p0) { uint32_t __ret; __ret = (uint32_t) __builtin_neon_vqmovnd_u64(__p0); return __ret; } __ai uint8_t vqmovnh_u16(uint16_t __p0) { uint8_t __ret; __ret = (uint8_t) __builtin_neon_vqmovnh_u16(__p0); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vqmovn_high_u32(uint16x4_t __p0, uint32x4_t __p1) { uint16x8_t __ret; __ret = vcombine_u16(__p0, vqmovn_u32(__p1)); return __ret; } #else __ai uint16x8_t vqmovn_high_u32(uint16x4_t __p0, uint32x4_t __p1) { uint16x8_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __noswap_vcombine_u16(__rev0, __noswap_vqmovn_u32(__rev1)); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vqmovn_high_u64(uint32x2_t __p0, uint64x2_t __p1) { uint32x4_t __ret; __ret = vcombine_u32(__p0, vqmovn_u64(__p1)); return __ret; } #else __ai uint32x4_t vqmovn_high_u64(uint32x2_t __p0, uint64x2_t __p1) { uint32x4_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __noswap_vcombine_u32(__rev0, __noswap_vqmovn_u64(__rev1)); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vqmovn_high_u16(uint8x8_t __p0, uint16x8_t __p1) { uint8x16_t __ret; __ret = vcombine_u8(__p0, vqmovn_u16(__p1)); return __ret; } #else __ai uint8x16_t vqmovn_high_u16(uint8x8_t __p0, uint16x8_t __p1) { uint8x16_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __noswap_vcombine_u8(__rev0, __noswap_vqmovn_u16(__rev1)); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vqmovn_high_s32(int16x4_t __p0, int32x4_t __p1) { int16x8_t __ret; __ret = vcombine_s16(__p0, vqmovn_s32(__p1)); return __ret; } #else __ai int16x8_t vqmovn_high_s32(int16x4_t __p0, int32x4_t __p1) { int16x8_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __noswap_vcombine_s16(__rev0, __noswap_vqmovn_s32(__rev1)); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vqmovn_high_s64(int32x2_t __p0, int64x2_t __p1) { int32x4_t __ret; __ret = vcombine_s32(__p0, vqmovn_s64(__p1)); return __ret; } #else __ai int32x4_t vqmovn_high_s64(int32x2_t __p0, int64x2_t __p1) { int32x4_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __noswap_vcombine_s32(__rev0, __noswap_vqmovn_s64(__rev1)); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x16_t vqmovn_high_s16(int8x8_t __p0, int16x8_t __p1) { int8x16_t __ret; __ret = vcombine_s8(__p0, vqmovn_s16(__p1)); return __ret; } #else __ai int8x16_t vqmovn_high_s16(int8x8_t __p0, int16x8_t __p1) { int8x16_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __noswap_vcombine_s8(__rev0, __noswap_vqmovn_s16(__rev1)); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif __ai uint16_t vqmovuns_s32(int32_t __p0) { uint16_t __ret; __ret = (uint16_t) __builtin_neon_vqmovuns_s32(__p0); return __ret; } __ai uint32_t vqmovund_s64(int64_t __p0) { uint32_t __ret; __ret = (uint32_t) __builtin_neon_vqmovund_s64(__p0); return __ret; } __ai uint8_t vqmovunh_s16(int16_t __p0) { uint8_t __ret; __ret = (uint8_t) __builtin_neon_vqmovunh_s16(__p0); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vqmovun_high_s32(uint16x4_t __p0, int32x4_t __p1) { uint16x8_t __ret; __ret = vcombine_u16((uint16x4_t)(__p0), vqmovun_s32(__p1)); return __ret; } #else __ai uint16x8_t vqmovun_high_s32(uint16x4_t __p0, int32x4_t __p1) { uint16x8_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __noswap_vcombine_u16((uint16x4_t)(__rev0), __noswap_vqmovun_s32(__rev1)); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vqmovun_high_s64(uint32x2_t __p0, int64x2_t __p1) { uint32x4_t __ret; __ret = vcombine_u32((uint32x2_t)(__p0), vqmovun_s64(__p1)); return __ret; } #else __ai uint32x4_t vqmovun_high_s64(uint32x2_t __p0, int64x2_t __p1) { uint32x4_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __noswap_vcombine_u32((uint32x2_t)(__rev0), __noswap_vqmovun_s64(__rev1)); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vqmovun_high_s16(uint8x8_t __p0, int16x8_t __p1) { uint8x16_t __ret; __ret = vcombine_u8((uint8x8_t)(__p0), vqmovun_s16(__p1)); return __ret; } #else __ai uint8x16_t vqmovun_high_s16(uint8x8_t __p0, int16x8_t __p1) { uint8x16_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __noswap_vcombine_u8((uint8x8_t)(__rev0), __noswap_vqmovun_s16(__rev1)); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vqnegq_s64(int64x2_t __p0) { int64x2_t __ret; __ret = (int64x2_t) __builtin_neon_vqnegq_v((int8x16_t)__p0, 35); return __ret; } #else __ai int64x2_t vqnegq_s64(int64x2_t __p0) { int64x2_t __ret; int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (int64x2_t) __builtin_neon_vqnegq_v((int8x16_t)__rev0, 35); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai int64x1_t vqneg_s64(int64x1_t __p0) { int64x1_t __ret; __ret = (int64x1_t) __builtin_neon_vqneg_v((int8x8_t)__p0, 3); return __ret; } __ai int8_t vqnegb_s8(int8_t __p0) { int8_t __ret; __ret = (int8_t) __builtin_neon_vqnegb_s8(__p0); return __ret; } __ai int32_t vqnegs_s32(int32_t __p0) { int32_t __ret; __ret = (int32_t) __builtin_neon_vqnegs_s32(__p0); return __ret; } __ai int64_t vqnegd_s64(int64_t __p0) { int64_t __ret; __ret = (int64_t) __builtin_neon_vqnegd_s64(__p0); return __ret; } __ai int16_t vqnegh_s16(int16_t __p0) { int16_t __ret; __ret = (int16_t) __builtin_neon_vqnegh_s16(__p0); return __ret; } __ai int32_t vqrdmulhs_s32(int32_t __p0, int32_t __p1) { int32_t __ret; __ret = (int32_t) __builtin_neon_vqrdmulhs_s32(__p0, __p1); return __ret; } __ai int16_t vqrdmulhh_s16(int16_t __p0, int16_t __p1) { int16_t __ret; __ret = (int16_t) __builtin_neon_vqrdmulhh_s16(__p0, __p1); return __ret; } #ifdef __LITTLE_ENDIAN__ #define vqrdmulhq_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x4_t __ret; \ int32x4_t __s0 = __p0; \ int32x2_t __s1 = __p1; \ __ret = (int32x4_t) __builtin_neon_vqrdmulhq_lane_v((int8x16_t)__s0, (int8x8_t)__s1, __p2, 2); \ __ret; \ }) #else #define vqrdmulhq_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x4_t __ret; \ int32x4_t __s0 = __p0; \ int32x2_t __s1 = __p1; \ int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ __ret = (int32x4_t) __builtin_neon_vqrdmulhq_lane_v((int8x16_t)__rev0, (int8x8_t)__rev1, __p2, 2); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqrdmulhq_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x8_t __ret; \ int16x8_t __s0 = __p0; \ int16x4_t __s1 = __p1; \ __ret = (int16x8_t) __builtin_neon_vqrdmulhq_lane_v((int8x16_t)__s0, (int8x8_t)__s1, __p2, 1); \ __ret; \ }) #else #define vqrdmulhq_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x8_t __ret; \ int16x8_t __s0 = __p0; \ int16x4_t __s1 = __p1; \ int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ __ret = (int16x8_t) __builtin_neon_vqrdmulhq_lane_v((int8x16_t)__rev0, (int8x8_t)__rev1, __p2, 1); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqrdmulh_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x2_t __ret; \ int32x2_t __s0 = __p0; \ int32x2_t __s1 = __p1; \ __ret = (int32x2_t) __builtin_neon_vqrdmulh_lane_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 2); \ __ret; \ }) #else #define vqrdmulh_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x2_t __ret; \ int32x2_t __s0 = __p0; \ int32x2_t __s1 = __p1; \ int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ __ret = (int32x2_t) __builtin_neon_vqrdmulh_lane_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 2); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqrdmulh_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x4_t __ret; \ int16x4_t __s0 = __p0; \ int16x4_t __s1 = __p1; \ __ret = (int16x4_t) __builtin_neon_vqrdmulh_lane_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 1); \ __ret; \ }) #else #define vqrdmulh_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x4_t __ret; \ int16x4_t __s0 = __p0; \ int16x4_t __s1 = __p1; \ int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ __ret = (int16x4_t) __builtin_neon_vqrdmulh_lane_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 1); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqrdmulhs_lane_s32(__p0_659, __p1_659, __p2_659) __extension__ ({ \ int32_t __ret_659; \ int32_t __s0_659 = __p0_659; \ int32x2_t __s1_659 = __p1_659; \ __ret_659 = vqrdmulhs_s32(__s0_659, vget_lane_s32(__s1_659, __p2_659)); \ __ret_659; \ }) #else #define vqrdmulhs_lane_s32(__p0_660, __p1_660, __p2_660) __extension__ ({ \ int32_t __ret_660; \ int32_t __s0_660 = __p0_660; \ int32x2_t __s1_660 = __p1_660; \ int32x2_t __rev1_660; __rev1_660 = __builtin_shufflevector(__s1_660, __s1_660, 1, 0); \ __ret_660 = vqrdmulhs_s32(__s0_660, __noswap_vget_lane_s32(__rev1_660, __p2_660)); \ __ret_660; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqrdmulhh_lane_s16(__p0_661, __p1_661, __p2_661) __extension__ ({ \ int16_t __ret_661; \ int16_t __s0_661 = __p0_661; \ int16x4_t __s1_661 = __p1_661; \ __ret_661 = vqrdmulhh_s16(__s0_661, vget_lane_s16(__s1_661, __p2_661)); \ __ret_661; \ }) #else #define vqrdmulhh_lane_s16(__p0_662, __p1_662, __p2_662) __extension__ ({ \ int16_t __ret_662; \ int16_t __s0_662 = __p0_662; \ int16x4_t __s1_662 = __p1_662; \ int16x4_t __rev1_662; __rev1_662 = __builtin_shufflevector(__s1_662, __s1_662, 3, 2, 1, 0); \ __ret_662 = vqrdmulhh_s16(__s0_662, __noswap_vget_lane_s16(__rev1_662, __p2_662)); \ __ret_662; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqrdmulhs_laneq_s32(__p0_663, __p1_663, __p2_663) __extension__ ({ \ int32_t __ret_663; \ int32_t __s0_663 = __p0_663; \ int32x4_t __s1_663 = __p1_663; \ __ret_663 = vqrdmulhs_s32(__s0_663, vgetq_lane_s32(__s1_663, __p2_663)); \ __ret_663; \ }) #else #define vqrdmulhs_laneq_s32(__p0_664, __p1_664, __p2_664) __extension__ ({ \ int32_t __ret_664; \ int32_t __s0_664 = __p0_664; \ int32x4_t __s1_664 = __p1_664; \ int32x4_t __rev1_664; __rev1_664 = __builtin_shufflevector(__s1_664, __s1_664, 3, 2, 1, 0); \ __ret_664 = vqrdmulhs_s32(__s0_664, __noswap_vgetq_lane_s32(__rev1_664, __p2_664)); \ __ret_664; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqrdmulhh_laneq_s16(__p0_665, __p1_665, __p2_665) __extension__ ({ \ int16_t __ret_665; \ int16_t __s0_665 = __p0_665; \ int16x8_t __s1_665 = __p1_665; \ __ret_665 = vqrdmulhh_s16(__s0_665, vgetq_lane_s16(__s1_665, __p2_665)); \ __ret_665; \ }) #else #define vqrdmulhh_laneq_s16(__p0_666, __p1_666, __p2_666) __extension__ ({ \ int16_t __ret_666; \ int16_t __s0_666 = __p0_666; \ int16x8_t __s1_666 = __p1_666; \ int16x8_t __rev1_666; __rev1_666 = __builtin_shufflevector(__s1_666, __s1_666, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_666 = vqrdmulhh_s16(__s0_666, __noswap_vgetq_lane_s16(__rev1_666, __p2_666)); \ __ret_666; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqrdmulhq_laneq_s32(__p0, __p1, __p2) __extension__ ({ \ int32x4_t __ret; \ int32x4_t __s0 = __p0; \ int32x4_t __s1 = __p1; \ __ret = (int32x4_t) __builtin_neon_vqrdmulhq_laneq_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 34); \ __ret; \ }) #else #define vqrdmulhq_laneq_s32(__p0, __p1, __p2) __extension__ ({ \ int32x4_t __ret; \ int32x4_t __s0 = __p0; \ int32x4_t __s1 = __p1; \ int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ __ret = (int32x4_t) __builtin_neon_vqrdmulhq_laneq_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 34); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqrdmulhq_laneq_s16(__p0, __p1, __p2) __extension__ ({ \ int16x8_t __ret; \ int16x8_t __s0 = __p0; \ int16x8_t __s1 = __p1; \ __ret = (int16x8_t) __builtin_neon_vqrdmulhq_laneq_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 33); \ __ret; \ }) #else #define vqrdmulhq_laneq_s16(__p0, __p1, __p2) __extension__ ({ \ int16x8_t __ret; \ int16x8_t __s0 = __p0; \ int16x8_t __s1 = __p1; \ int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (int16x8_t) __builtin_neon_vqrdmulhq_laneq_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 33); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqrdmulh_laneq_s32(__p0, __p1, __p2) __extension__ ({ \ int32x2_t __ret; \ int32x2_t __s0 = __p0; \ int32x4_t __s1 = __p1; \ __ret = (int32x2_t) __builtin_neon_vqrdmulh_laneq_v((int8x8_t)__s0, (int8x16_t)__s1, __p2, 2); \ __ret; \ }) #else #define vqrdmulh_laneq_s32(__p0, __p1, __p2) __extension__ ({ \ int32x2_t __ret; \ int32x2_t __s0 = __p0; \ int32x4_t __s1 = __p1; \ int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ __ret = (int32x2_t) __builtin_neon_vqrdmulh_laneq_v((int8x8_t)__rev0, (int8x16_t)__rev1, __p2, 2); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqrdmulh_laneq_s16(__p0, __p1, __p2) __extension__ ({ \ int16x4_t __ret; \ int16x4_t __s0 = __p0; \ int16x8_t __s1 = __p1; \ __ret = (int16x4_t) __builtin_neon_vqrdmulh_laneq_v((int8x8_t)__s0, (int8x16_t)__s1, __p2, 1); \ __ret; \ }) #else #define vqrdmulh_laneq_s16(__p0, __p1, __p2) __extension__ ({ \ int16x4_t __ret; \ int16x4_t __s0 = __p0; \ int16x8_t __s1 = __p1; \ int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (int16x4_t) __builtin_neon_vqrdmulh_laneq_v((int8x8_t)__rev0, (int8x16_t)__rev1, __p2, 1); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif __ai uint8_t vqrshlb_u8(uint8_t __p0, int8_t __p1) { uint8_t __ret; __ret = (uint8_t) __builtin_neon_vqrshlb_u8(__p0, __p1); return __ret; } __ai uint32_t vqrshls_u32(uint32_t __p0, int32_t __p1) { uint32_t __ret; __ret = (uint32_t) __builtin_neon_vqrshls_u32(__p0, __p1); return __ret; } __ai uint64_t vqrshld_u64(uint64_t __p0, int64_t __p1) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vqrshld_u64(__p0, __p1); return __ret; } __ai uint16_t vqrshlh_u16(uint16_t __p0, int16_t __p1) { uint16_t __ret; __ret = (uint16_t) __builtin_neon_vqrshlh_u16(__p0, __p1); return __ret; } __ai int8_t vqrshlb_s8(int8_t __p0, int8_t __p1) { int8_t __ret; __ret = (int8_t) __builtin_neon_vqrshlb_s8(__p0, __p1); return __ret; } __ai int32_t vqrshls_s32(int32_t __p0, int32_t __p1) { int32_t __ret; __ret = (int32_t) __builtin_neon_vqrshls_s32(__p0, __p1); return __ret; } __ai int64_t vqrshld_s64(int64_t __p0, int64_t __p1) { int64_t __ret; __ret = (int64_t) __builtin_neon_vqrshld_s64(__p0, __p1); return __ret; } __ai int16_t vqrshlh_s16(int16_t __p0, int16_t __p1) { int16_t __ret; __ret = (int16_t) __builtin_neon_vqrshlh_s16(__p0, __p1); return __ret; } #ifdef __LITTLE_ENDIAN__ #define vqrshrn_high_n_u32(__p0_667, __p1_667, __p2_667) __extension__ ({ \ uint16x8_t __ret_667; \ uint16x4_t __s0_667 = __p0_667; \ uint32x4_t __s1_667 = __p1_667; \ __ret_667 = (uint16x8_t)(vcombine_u16((uint16x4_t)(__s0_667), (uint16x4_t)(vqrshrn_n_u32(__s1_667, __p2_667)))); \ __ret_667; \ }) #else #define vqrshrn_high_n_u32(__p0_668, __p1_668, __p2_668) __extension__ ({ \ uint16x8_t __ret_668; \ uint16x4_t __s0_668 = __p0_668; \ uint32x4_t __s1_668 = __p1_668; \ uint16x4_t __rev0_668; __rev0_668 = __builtin_shufflevector(__s0_668, __s0_668, 3, 2, 1, 0); \ uint32x4_t __rev1_668; __rev1_668 = __builtin_shufflevector(__s1_668, __s1_668, 3, 2, 1, 0); \ __ret_668 = (uint16x8_t)(__noswap_vcombine_u16((uint16x4_t)(__rev0_668), (uint16x4_t)(__noswap_vqrshrn_n_u32(__rev1_668, __p2_668)))); \ __ret_668 = __builtin_shufflevector(__ret_668, __ret_668, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_668; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqrshrn_high_n_u64(__p0_669, __p1_669, __p2_669) __extension__ ({ \ uint32x4_t __ret_669; \ uint32x2_t __s0_669 = __p0_669; \ uint64x2_t __s1_669 = __p1_669; \ __ret_669 = (uint32x4_t)(vcombine_u32((uint32x2_t)(__s0_669), (uint32x2_t)(vqrshrn_n_u64(__s1_669, __p2_669)))); \ __ret_669; \ }) #else #define vqrshrn_high_n_u64(__p0_670, __p1_670, __p2_670) __extension__ ({ \ uint32x4_t __ret_670; \ uint32x2_t __s0_670 = __p0_670; \ uint64x2_t __s1_670 = __p1_670; \ uint32x2_t __rev0_670; __rev0_670 = __builtin_shufflevector(__s0_670, __s0_670, 1, 0); \ uint64x2_t __rev1_670; __rev1_670 = __builtin_shufflevector(__s1_670, __s1_670, 1, 0); \ __ret_670 = (uint32x4_t)(__noswap_vcombine_u32((uint32x2_t)(__rev0_670), (uint32x2_t)(__noswap_vqrshrn_n_u64(__rev1_670, __p2_670)))); \ __ret_670 = __builtin_shufflevector(__ret_670, __ret_670, 3, 2, 1, 0); \ __ret_670; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqrshrn_high_n_u16(__p0_671, __p1_671, __p2_671) __extension__ ({ \ uint8x16_t __ret_671; \ uint8x8_t __s0_671 = __p0_671; \ uint16x8_t __s1_671 = __p1_671; \ __ret_671 = (uint8x16_t)(vcombine_u8((uint8x8_t)(__s0_671), (uint8x8_t)(vqrshrn_n_u16(__s1_671, __p2_671)))); \ __ret_671; \ }) #else #define vqrshrn_high_n_u16(__p0_672, __p1_672, __p2_672) __extension__ ({ \ uint8x16_t __ret_672; \ uint8x8_t __s0_672 = __p0_672; \ uint16x8_t __s1_672 = __p1_672; \ uint8x8_t __rev0_672; __rev0_672 = __builtin_shufflevector(__s0_672, __s0_672, 7, 6, 5, 4, 3, 2, 1, 0); \ uint16x8_t __rev1_672; __rev1_672 = __builtin_shufflevector(__s1_672, __s1_672, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_672 = (uint8x16_t)(__noswap_vcombine_u8((uint8x8_t)(__rev0_672), (uint8x8_t)(__noswap_vqrshrn_n_u16(__rev1_672, __p2_672)))); \ __ret_672 = __builtin_shufflevector(__ret_672, __ret_672, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_672; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqrshrn_high_n_s32(__p0_673, __p1_673, __p2_673) __extension__ ({ \ int16x8_t __ret_673; \ int16x4_t __s0_673 = __p0_673; \ int32x4_t __s1_673 = __p1_673; \ __ret_673 = (int16x8_t)(vcombine_s16((int16x4_t)(__s0_673), (int16x4_t)(vqrshrn_n_s32(__s1_673, __p2_673)))); \ __ret_673; \ }) #else #define vqrshrn_high_n_s32(__p0_674, __p1_674, __p2_674) __extension__ ({ \ int16x8_t __ret_674; \ int16x4_t __s0_674 = __p0_674; \ int32x4_t __s1_674 = __p1_674; \ int16x4_t __rev0_674; __rev0_674 = __builtin_shufflevector(__s0_674, __s0_674, 3, 2, 1, 0); \ int32x4_t __rev1_674; __rev1_674 = __builtin_shufflevector(__s1_674, __s1_674, 3, 2, 1, 0); \ __ret_674 = (int16x8_t)(__noswap_vcombine_s16((int16x4_t)(__rev0_674), (int16x4_t)(__noswap_vqrshrn_n_s32(__rev1_674, __p2_674)))); \ __ret_674 = __builtin_shufflevector(__ret_674, __ret_674, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_674; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqrshrn_high_n_s64(__p0_675, __p1_675, __p2_675) __extension__ ({ \ int32x4_t __ret_675; \ int32x2_t __s0_675 = __p0_675; \ int64x2_t __s1_675 = __p1_675; \ __ret_675 = (int32x4_t)(vcombine_s32((int32x2_t)(__s0_675), (int32x2_t)(vqrshrn_n_s64(__s1_675, __p2_675)))); \ __ret_675; \ }) #else #define vqrshrn_high_n_s64(__p0_676, __p1_676, __p2_676) __extension__ ({ \ int32x4_t __ret_676; \ int32x2_t __s0_676 = __p0_676; \ int64x2_t __s1_676 = __p1_676; \ int32x2_t __rev0_676; __rev0_676 = __builtin_shufflevector(__s0_676, __s0_676, 1, 0); \ int64x2_t __rev1_676; __rev1_676 = __builtin_shufflevector(__s1_676, __s1_676, 1, 0); \ __ret_676 = (int32x4_t)(__noswap_vcombine_s32((int32x2_t)(__rev0_676), (int32x2_t)(__noswap_vqrshrn_n_s64(__rev1_676, __p2_676)))); \ __ret_676 = __builtin_shufflevector(__ret_676, __ret_676, 3, 2, 1, 0); \ __ret_676; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqrshrn_high_n_s16(__p0_677, __p1_677, __p2_677) __extension__ ({ \ int8x16_t __ret_677; \ int8x8_t __s0_677 = __p0_677; \ int16x8_t __s1_677 = __p1_677; \ __ret_677 = (int8x16_t)(vcombine_s8((int8x8_t)(__s0_677), (int8x8_t)(vqrshrn_n_s16(__s1_677, __p2_677)))); \ __ret_677; \ }) #else #define vqrshrn_high_n_s16(__p0_678, __p1_678, __p2_678) __extension__ ({ \ int8x16_t __ret_678; \ int8x8_t __s0_678 = __p0_678; \ int16x8_t __s1_678 = __p1_678; \ int8x8_t __rev0_678; __rev0_678 = __builtin_shufflevector(__s0_678, __s0_678, 7, 6, 5, 4, 3, 2, 1, 0); \ int16x8_t __rev1_678; __rev1_678 = __builtin_shufflevector(__s1_678, __s1_678, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_678 = (int8x16_t)(__noswap_vcombine_s8((int8x8_t)(__rev0_678), (int8x8_t)(__noswap_vqrshrn_n_s16(__rev1_678, __p2_678)))); \ __ret_678 = __builtin_shufflevector(__ret_678, __ret_678, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_678; \ }) #endif #define vqrshrns_n_u32(__p0, __p1) __extension__ ({ \ uint16_t __ret; \ uint32_t __s0 = __p0; \ __ret = (uint16_t) __builtin_neon_vqrshrns_n_u32(__s0, __p1); \ __ret; \ }) #define vqrshrnd_n_u64(__p0, __p1) __extension__ ({ \ uint32_t __ret; \ uint64_t __s0 = __p0; \ __ret = (uint32_t) __builtin_neon_vqrshrnd_n_u64(__s0, __p1); \ __ret; \ }) #define vqrshrnh_n_u16(__p0, __p1) __extension__ ({ \ uint8_t __ret; \ uint16_t __s0 = __p0; \ __ret = (uint8_t) __builtin_neon_vqrshrnh_n_u16(__s0, __p1); \ __ret; \ }) #define vqrshrns_n_s32(__p0, __p1) __extension__ ({ \ int16_t __ret; \ int32_t __s0 = __p0; \ __ret = (int16_t) __builtin_neon_vqrshrns_n_s32(__s0, __p1); \ __ret; \ }) #define vqrshrnd_n_s64(__p0, __p1) __extension__ ({ \ int32_t __ret; \ int64_t __s0 = __p0; \ __ret = (int32_t) __builtin_neon_vqrshrnd_n_s64(__s0, __p1); \ __ret; \ }) #define vqrshrnh_n_s16(__p0, __p1) __extension__ ({ \ int8_t __ret; \ int16_t __s0 = __p0; \ __ret = (int8_t) __builtin_neon_vqrshrnh_n_s16(__s0, __p1); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vqrshrun_high_n_s32(__p0_679, __p1_679, __p2_679) __extension__ ({ \ int16x8_t __ret_679; \ int16x4_t __s0_679 = __p0_679; \ int32x4_t __s1_679 = __p1_679; \ __ret_679 = (int16x8_t)(vcombine_s16((int16x4_t)(__s0_679), (int16x4_t)(vqrshrun_n_s32(__s1_679, __p2_679)))); \ __ret_679; \ }) #else #define vqrshrun_high_n_s32(__p0_680, __p1_680, __p2_680) __extension__ ({ \ int16x8_t __ret_680; \ int16x4_t __s0_680 = __p0_680; \ int32x4_t __s1_680 = __p1_680; \ int16x4_t __rev0_680; __rev0_680 = __builtin_shufflevector(__s0_680, __s0_680, 3, 2, 1, 0); \ int32x4_t __rev1_680; __rev1_680 = __builtin_shufflevector(__s1_680, __s1_680, 3, 2, 1, 0); \ __ret_680 = (int16x8_t)(__noswap_vcombine_s16((int16x4_t)(__rev0_680), (int16x4_t)(__noswap_vqrshrun_n_s32(__rev1_680, __p2_680)))); \ __ret_680 = __builtin_shufflevector(__ret_680, __ret_680, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_680; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqrshrun_high_n_s64(__p0_681, __p1_681, __p2_681) __extension__ ({ \ int32x4_t __ret_681; \ int32x2_t __s0_681 = __p0_681; \ int64x2_t __s1_681 = __p1_681; \ __ret_681 = (int32x4_t)(vcombine_s32((int32x2_t)(__s0_681), (int32x2_t)(vqrshrun_n_s64(__s1_681, __p2_681)))); \ __ret_681; \ }) #else #define vqrshrun_high_n_s64(__p0_682, __p1_682, __p2_682) __extension__ ({ \ int32x4_t __ret_682; \ int32x2_t __s0_682 = __p0_682; \ int64x2_t __s1_682 = __p1_682; \ int32x2_t __rev0_682; __rev0_682 = __builtin_shufflevector(__s0_682, __s0_682, 1, 0); \ int64x2_t __rev1_682; __rev1_682 = __builtin_shufflevector(__s1_682, __s1_682, 1, 0); \ __ret_682 = (int32x4_t)(__noswap_vcombine_s32((int32x2_t)(__rev0_682), (int32x2_t)(__noswap_vqrshrun_n_s64(__rev1_682, __p2_682)))); \ __ret_682 = __builtin_shufflevector(__ret_682, __ret_682, 3, 2, 1, 0); \ __ret_682; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqrshrun_high_n_s16(__p0_683, __p1_683, __p2_683) __extension__ ({ \ int8x16_t __ret_683; \ int8x8_t __s0_683 = __p0_683; \ int16x8_t __s1_683 = __p1_683; \ __ret_683 = (int8x16_t)(vcombine_s8((int8x8_t)(__s0_683), (int8x8_t)(vqrshrun_n_s16(__s1_683, __p2_683)))); \ __ret_683; \ }) #else #define vqrshrun_high_n_s16(__p0_684, __p1_684, __p2_684) __extension__ ({ \ int8x16_t __ret_684; \ int8x8_t __s0_684 = __p0_684; \ int16x8_t __s1_684 = __p1_684; \ int8x8_t __rev0_684; __rev0_684 = __builtin_shufflevector(__s0_684, __s0_684, 7, 6, 5, 4, 3, 2, 1, 0); \ int16x8_t __rev1_684; __rev1_684 = __builtin_shufflevector(__s1_684, __s1_684, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_684 = (int8x16_t)(__noswap_vcombine_s8((int8x8_t)(__rev0_684), (int8x8_t)(__noswap_vqrshrun_n_s16(__rev1_684, __p2_684)))); \ __ret_684 = __builtin_shufflevector(__ret_684, __ret_684, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_684; \ }) #endif #define vqrshruns_n_s32(__p0, __p1) __extension__ ({ \ uint16_t __ret; \ int32_t __s0 = __p0; \ __ret = (uint16_t) __builtin_neon_vqrshruns_n_s32(__s0, __p1); \ __ret; \ }) #define vqrshrund_n_s64(__p0, __p1) __extension__ ({ \ uint32_t __ret; \ int64_t __s0 = __p0; \ __ret = (uint32_t) __builtin_neon_vqrshrund_n_s64(__s0, __p1); \ __ret; \ }) #define vqrshrunh_n_s16(__p0, __p1) __extension__ ({ \ uint8_t __ret; \ int16_t __s0 = __p0; \ __ret = (uint8_t) __builtin_neon_vqrshrunh_n_s16(__s0, __p1); \ __ret; \ }) __ai uint8_t vqshlb_u8(uint8_t __p0, int8_t __p1) { uint8_t __ret; __ret = (uint8_t) __builtin_neon_vqshlb_u8(__p0, __p1); return __ret; } __ai uint32_t vqshls_u32(uint32_t __p0, int32_t __p1) { uint32_t __ret; __ret = (uint32_t) __builtin_neon_vqshls_u32(__p0, __p1); return __ret; } __ai uint64_t vqshld_u64(uint64_t __p0, int64_t __p1) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vqshld_u64(__p0, __p1); return __ret; } __ai uint16_t vqshlh_u16(uint16_t __p0, int16_t __p1) { uint16_t __ret; __ret = (uint16_t) __builtin_neon_vqshlh_u16(__p0, __p1); return __ret; } __ai int8_t vqshlb_s8(int8_t __p0, int8_t __p1) { int8_t __ret; __ret = (int8_t) __builtin_neon_vqshlb_s8(__p0, __p1); return __ret; } __ai int32_t vqshls_s32(int32_t __p0, int32_t __p1) { int32_t __ret; __ret = (int32_t) __builtin_neon_vqshls_s32(__p0, __p1); return __ret; } __ai int64_t vqshld_s64(int64_t __p0, int64_t __p1) { int64_t __ret; __ret = (int64_t) __builtin_neon_vqshld_s64(__p0, __p1); return __ret; } __ai int16_t vqshlh_s16(int16_t __p0, int16_t __p1) { int16_t __ret; __ret = (int16_t) __builtin_neon_vqshlh_s16(__p0, __p1); return __ret; } #define vqshlb_n_u8(__p0, __p1) __extension__ ({ \ uint8_t __ret; \ uint8_t __s0 = __p0; \ __ret = (uint8_t) __builtin_neon_vqshlb_n_u8(__s0, __p1); \ __ret; \ }) #define vqshls_n_u32(__p0, __p1) __extension__ ({ \ uint32_t __ret; \ uint32_t __s0 = __p0; \ __ret = (uint32_t) __builtin_neon_vqshls_n_u32(__s0, __p1); \ __ret; \ }) #define vqshld_n_u64(__p0, __p1) __extension__ ({ \ uint64_t __ret; \ uint64_t __s0 = __p0; \ __ret = (uint64_t) __builtin_neon_vqshld_n_u64(__s0, __p1); \ __ret; \ }) #define vqshlh_n_u16(__p0, __p1) __extension__ ({ \ uint16_t __ret; \ uint16_t __s0 = __p0; \ __ret = (uint16_t) __builtin_neon_vqshlh_n_u16(__s0, __p1); \ __ret; \ }) #define vqshlb_n_s8(__p0, __p1) __extension__ ({ \ int8_t __ret; \ int8_t __s0 = __p0; \ __ret = (int8_t) __builtin_neon_vqshlb_n_s8(__s0, __p1); \ __ret; \ }) #define vqshls_n_s32(__p0, __p1) __extension__ ({ \ int32_t __ret; \ int32_t __s0 = __p0; \ __ret = (int32_t) __builtin_neon_vqshls_n_s32(__s0, __p1); \ __ret; \ }) #define vqshld_n_s64(__p0, __p1) __extension__ ({ \ int64_t __ret; \ int64_t __s0 = __p0; \ __ret = (int64_t) __builtin_neon_vqshld_n_s64(__s0, __p1); \ __ret; \ }) #define vqshlh_n_s16(__p0, __p1) __extension__ ({ \ int16_t __ret; \ int16_t __s0 = __p0; \ __ret = (int16_t) __builtin_neon_vqshlh_n_s16(__s0, __p1); \ __ret; \ }) #define vqshlub_n_s8(__p0, __p1) __extension__ ({ \ int8_t __ret; \ int8_t __s0 = __p0; \ __ret = (int8_t) __builtin_neon_vqshlub_n_s8(__s0, __p1); \ __ret; \ }) #define vqshlus_n_s32(__p0, __p1) __extension__ ({ \ int32_t __ret; \ int32_t __s0 = __p0; \ __ret = (int32_t) __builtin_neon_vqshlus_n_s32(__s0, __p1); \ __ret; \ }) #define vqshlud_n_s64(__p0, __p1) __extension__ ({ \ int64_t __ret; \ int64_t __s0 = __p0; \ __ret = (int64_t) __builtin_neon_vqshlud_n_s64(__s0, __p1); \ __ret; \ }) #define vqshluh_n_s16(__p0, __p1) __extension__ ({ \ int16_t __ret; \ int16_t __s0 = __p0; \ __ret = (int16_t) __builtin_neon_vqshluh_n_s16(__s0, __p1); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vqshrn_high_n_u32(__p0_685, __p1_685, __p2_685) __extension__ ({ \ uint16x8_t __ret_685; \ uint16x4_t __s0_685 = __p0_685; \ uint32x4_t __s1_685 = __p1_685; \ __ret_685 = (uint16x8_t)(vcombine_u16((uint16x4_t)(__s0_685), (uint16x4_t)(vqshrn_n_u32(__s1_685, __p2_685)))); \ __ret_685; \ }) #else #define vqshrn_high_n_u32(__p0_686, __p1_686, __p2_686) __extension__ ({ \ uint16x8_t __ret_686; \ uint16x4_t __s0_686 = __p0_686; \ uint32x4_t __s1_686 = __p1_686; \ uint16x4_t __rev0_686; __rev0_686 = __builtin_shufflevector(__s0_686, __s0_686, 3, 2, 1, 0); \ uint32x4_t __rev1_686; __rev1_686 = __builtin_shufflevector(__s1_686, __s1_686, 3, 2, 1, 0); \ __ret_686 = (uint16x8_t)(__noswap_vcombine_u16((uint16x4_t)(__rev0_686), (uint16x4_t)(__noswap_vqshrn_n_u32(__rev1_686, __p2_686)))); \ __ret_686 = __builtin_shufflevector(__ret_686, __ret_686, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_686; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqshrn_high_n_u64(__p0_687, __p1_687, __p2_687) __extension__ ({ \ uint32x4_t __ret_687; \ uint32x2_t __s0_687 = __p0_687; \ uint64x2_t __s1_687 = __p1_687; \ __ret_687 = (uint32x4_t)(vcombine_u32((uint32x2_t)(__s0_687), (uint32x2_t)(vqshrn_n_u64(__s1_687, __p2_687)))); \ __ret_687; \ }) #else #define vqshrn_high_n_u64(__p0_688, __p1_688, __p2_688) __extension__ ({ \ uint32x4_t __ret_688; \ uint32x2_t __s0_688 = __p0_688; \ uint64x2_t __s1_688 = __p1_688; \ uint32x2_t __rev0_688; __rev0_688 = __builtin_shufflevector(__s0_688, __s0_688, 1, 0); \ uint64x2_t __rev1_688; __rev1_688 = __builtin_shufflevector(__s1_688, __s1_688, 1, 0); \ __ret_688 = (uint32x4_t)(__noswap_vcombine_u32((uint32x2_t)(__rev0_688), (uint32x2_t)(__noswap_vqshrn_n_u64(__rev1_688, __p2_688)))); \ __ret_688 = __builtin_shufflevector(__ret_688, __ret_688, 3, 2, 1, 0); \ __ret_688; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqshrn_high_n_u16(__p0_689, __p1_689, __p2_689) __extension__ ({ \ uint8x16_t __ret_689; \ uint8x8_t __s0_689 = __p0_689; \ uint16x8_t __s1_689 = __p1_689; \ __ret_689 = (uint8x16_t)(vcombine_u8((uint8x8_t)(__s0_689), (uint8x8_t)(vqshrn_n_u16(__s1_689, __p2_689)))); \ __ret_689; \ }) #else #define vqshrn_high_n_u16(__p0_690, __p1_690, __p2_690) __extension__ ({ \ uint8x16_t __ret_690; \ uint8x8_t __s0_690 = __p0_690; \ uint16x8_t __s1_690 = __p1_690; \ uint8x8_t __rev0_690; __rev0_690 = __builtin_shufflevector(__s0_690, __s0_690, 7, 6, 5, 4, 3, 2, 1, 0); \ uint16x8_t __rev1_690; __rev1_690 = __builtin_shufflevector(__s1_690, __s1_690, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_690 = (uint8x16_t)(__noswap_vcombine_u8((uint8x8_t)(__rev0_690), (uint8x8_t)(__noswap_vqshrn_n_u16(__rev1_690, __p2_690)))); \ __ret_690 = __builtin_shufflevector(__ret_690, __ret_690, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_690; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqshrn_high_n_s32(__p0_691, __p1_691, __p2_691) __extension__ ({ \ int16x8_t __ret_691; \ int16x4_t __s0_691 = __p0_691; \ int32x4_t __s1_691 = __p1_691; \ __ret_691 = (int16x8_t)(vcombine_s16((int16x4_t)(__s0_691), (int16x4_t)(vqshrn_n_s32(__s1_691, __p2_691)))); \ __ret_691; \ }) #else #define vqshrn_high_n_s32(__p0_692, __p1_692, __p2_692) __extension__ ({ \ int16x8_t __ret_692; \ int16x4_t __s0_692 = __p0_692; \ int32x4_t __s1_692 = __p1_692; \ int16x4_t __rev0_692; __rev0_692 = __builtin_shufflevector(__s0_692, __s0_692, 3, 2, 1, 0); \ int32x4_t __rev1_692; __rev1_692 = __builtin_shufflevector(__s1_692, __s1_692, 3, 2, 1, 0); \ __ret_692 = (int16x8_t)(__noswap_vcombine_s16((int16x4_t)(__rev0_692), (int16x4_t)(__noswap_vqshrn_n_s32(__rev1_692, __p2_692)))); \ __ret_692 = __builtin_shufflevector(__ret_692, __ret_692, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_692; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqshrn_high_n_s64(__p0_693, __p1_693, __p2_693) __extension__ ({ \ int32x4_t __ret_693; \ int32x2_t __s0_693 = __p0_693; \ int64x2_t __s1_693 = __p1_693; \ __ret_693 = (int32x4_t)(vcombine_s32((int32x2_t)(__s0_693), (int32x2_t)(vqshrn_n_s64(__s1_693, __p2_693)))); \ __ret_693; \ }) #else #define vqshrn_high_n_s64(__p0_694, __p1_694, __p2_694) __extension__ ({ \ int32x4_t __ret_694; \ int32x2_t __s0_694 = __p0_694; \ int64x2_t __s1_694 = __p1_694; \ int32x2_t __rev0_694; __rev0_694 = __builtin_shufflevector(__s0_694, __s0_694, 1, 0); \ int64x2_t __rev1_694; __rev1_694 = __builtin_shufflevector(__s1_694, __s1_694, 1, 0); \ __ret_694 = (int32x4_t)(__noswap_vcombine_s32((int32x2_t)(__rev0_694), (int32x2_t)(__noswap_vqshrn_n_s64(__rev1_694, __p2_694)))); \ __ret_694 = __builtin_shufflevector(__ret_694, __ret_694, 3, 2, 1, 0); \ __ret_694; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqshrn_high_n_s16(__p0_695, __p1_695, __p2_695) __extension__ ({ \ int8x16_t __ret_695; \ int8x8_t __s0_695 = __p0_695; \ int16x8_t __s1_695 = __p1_695; \ __ret_695 = (int8x16_t)(vcombine_s8((int8x8_t)(__s0_695), (int8x8_t)(vqshrn_n_s16(__s1_695, __p2_695)))); \ __ret_695; \ }) #else #define vqshrn_high_n_s16(__p0_696, __p1_696, __p2_696) __extension__ ({ \ int8x16_t __ret_696; \ int8x8_t __s0_696 = __p0_696; \ int16x8_t __s1_696 = __p1_696; \ int8x8_t __rev0_696; __rev0_696 = __builtin_shufflevector(__s0_696, __s0_696, 7, 6, 5, 4, 3, 2, 1, 0); \ int16x8_t __rev1_696; __rev1_696 = __builtin_shufflevector(__s1_696, __s1_696, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_696 = (int8x16_t)(__noswap_vcombine_s8((int8x8_t)(__rev0_696), (int8x8_t)(__noswap_vqshrn_n_s16(__rev1_696, __p2_696)))); \ __ret_696 = __builtin_shufflevector(__ret_696, __ret_696, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_696; \ }) #endif #define vqshrns_n_u32(__p0, __p1) __extension__ ({ \ uint16_t __ret; \ uint32_t __s0 = __p0; \ __ret = (uint16_t) __builtin_neon_vqshrns_n_u32(__s0, __p1); \ __ret; \ }) #define vqshrnd_n_u64(__p0, __p1) __extension__ ({ \ uint32_t __ret; \ uint64_t __s0 = __p0; \ __ret = (uint32_t) __builtin_neon_vqshrnd_n_u64(__s0, __p1); \ __ret; \ }) #define vqshrnh_n_u16(__p0, __p1) __extension__ ({ \ uint8_t __ret; \ uint16_t __s0 = __p0; \ __ret = (uint8_t) __builtin_neon_vqshrnh_n_u16(__s0, __p1); \ __ret; \ }) #define vqshrns_n_s32(__p0, __p1) __extension__ ({ \ int16_t __ret; \ int32_t __s0 = __p0; \ __ret = (int16_t) __builtin_neon_vqshrns_n_s32(__s0, __p1); \ __ret; \ }) #define vqshrnd_n_s64(__p0, __p1) __extension__ ({ \ int32_t __ret; \ int64_t __s0 = __p0; \ __ret = (int32_t) __builtin_neon_vqshrnd_n_s64(__s0, __p1); \ __ret; \ }) #define vqshrnh_n_s16(__p0, __p1) __extension__ ({ \ int8_t __ret; \ int16_t __s0 = __p0; \ __ret = (int8_t) __builtin_neon_vqshrnh_n_s16(__s0, __p1); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vqshrun_high_n_s32(__p0_697, __p1_697, __p2_697) __extension__ ({ \ int16x8_t __ret_697; \ int16x4_t __s0_697 = __p0_697; \ int32x4_t __s1_697 = __p1_697; \ __ret_697 = (int16x8_t)(vcombine_s16((int16x4_t)(__s0_697), (int16x4_t)(vqshrun_n_s32(__s1_697, __p2_697)))); \ __ret_697; \ }) #else #define vqshrun_high_n_s32(__p0_698, __p1_698, __p2_698) __extension__ ({ \ int16x8_t __ret_698; \ int16x4_t __s0_698 = __p0_698; \ int32x4_t __s1_698 = __p1_698; \ int16x4_t __rev0_698; __rev0_698 = __builtin_shufflevector(__s0_698, __s0_698, 3, 2, 1, 0); \ int32x4_t __rev1_698; __rev1_698 = __builtin_shufflevector(__s1_698, __s1_698, 3, 2, 1, 0); \ __ret_698 = (int16x8_t)(__noswap_vcombine_s16((int16x4_t)(__rev0_698), (int16x4_t)(__noswap_vqshrun_n_s32(__rev1_698, __p2_698)))); \ __ret_698 = __builtin_shufflevector(__ret_698, __ret_698, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_698; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqshrun_high_n_s64(__p0_699, __p1_699, __p2_699) __extension__ ({ \ int32x4_t __ret_699; \ int32x2_t __s0_699 = __p0_699; \ int64x2_t __s1_699 = __p1_699; \ __ret_699 = (int32x4_t)(vcombine_s32((int32x2_t)(__s0_699), (int32x2_t)(vqshrun_n_s64(__s1_699, __p2_699)))); \ __ret_699; \ }) #else #define vqshrun_high_n_s64(__p0_700, __p1_700, __p2_700) __extension__ ({ \ int32x4_t __ret_700; \ int32x2_t __s0_700 = __p0_700; \ int64x2_t __s1_700 = __p1_700; \ int32x2_t __rev0_700; __rev0_700 = __builtin_shufflevector(__s0_700, __s0_700, 1, 0); \ int64x2_t __rev1_700; __rev1_700 = __builtin_shufflevector(__s1_700, __s1_700, 1, 0); \ __ret_700 = (int32x4_t)(__noswap_vcombine_s32((int32x2_t)(__rev0_700), (int32x2_t)(__noswap_vqshrun_n_s64(__rev1_700, __p2_700)))); \ __ret_700 = __builtin_shufflevector(__ret_700, __ret_700, 3, 2, 1, 0); \ __ret_700; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqshrun_high_n_s16(__p0_701, __p1_701, __p2_701) __extension__ ({ \ int8x16_t __ret_701; \ int8x8_t __s0_701 = __p0_701; \ int16x8_t __s1_701 = __p1_701; \ __ret_701 = (int8x16_t)(vcombine_s8((int8x8_t)(__s0_701), (int8x8_t)(vqshrun_n_s16(__s1_701, __p2_701)))); \ __ret_701; \ }) #else #define vqshrun_high_n_s16(__p0_702, __p1_702, __p2_702) __extension__ ({ \ int8x16_t __ret_702; \ int8x8_t __s0_702 = __p0_702; \ int16x8_t __s1_702 = __p1_702; \ int8x8_t __rev0_702; __rev0_702 = __builtin_shufflevector(__s0_702, __s0_702, 7, 6, 5, 4, 3, 2, 1, 0); \ int16x8_t __rev1_702; __rev1_702 = __builtin_shufflevector(__s1_702, __s1_702, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_702 = (int8x16_t)(__noswap_vcombine_s8((int8x8_t)(__rev0_702), (int8x8_t)(__noswap_vqshrun_n_s16(__rev1_702, __p2_702)))); \ __ret_702 = __builtin_shufflevector(__ret_702, __ret_702, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_702; \ }) #endif #define vqshruns_n_s32(__p0, __p1) __extension__ ({ \ uint16_t __ret; \ int32_t __s0 = __p0; \ __ret = (uint16_t) __builtin_neon_vqshruns_n_s32(__s0, __p1); \ __ret; \ }) #define vqshrund_n_s64(__p0, __p1) __extension__ ({ \ uint32_t __ret; \ int64_t __s0 = __p0; \ __ret = (uint32_t) __builtin_neon_vqshrund_n_s64(__s0, __p1); \ __ret; \ }) #define vqshrunh_n_s16(__p0, __p1) __extension__ ({ \ uint8_t __ret; \ int16_t __s0 = __p0; \ __ret = (uint8_t) __builtin_neon_vqshrunh_n_s16(__s0, __p1); \ __ret; \ }) __ai uint8_t vqsubb_u8(uint8_t __p0, uint8_t __p1) { uint8_t __ret; __ret = (uint8_t) __builtin_neon_vqsubb_u8(__p0, __p1); return __ret; } __ai uint32_t vqsubs_u32(uint32_t __p0, uint32_t __p1) { uint32_t __ret; __ret = (uint32_t) __builtin_neon_vqsubs_u32(__p0, __p1); return __ret; } __ai uint64_t vqsubd_u64(uint64_t __p0, uint64_t __p1) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vqsubd_u64(__p0, __p1); return __ret; } __ai uint16_t vqsubh_u16(uint16_t __p0, uint16_t __p1) { uint16_t __ret; __ret = (uint16_t) __builtin_neon_vqsubh_u16(__p0, __p1); return __ret; } __ai int8_t vqsubb_s8(int8_t __p0, int8_t __p1) { int8_t __ret; __ret = (int8_t) __builtin_neon_vqsubb_s8(__p0, __p1); return __ret; } __ai int32_t vqsubs_s32(int32_t __p0, int32_t __p1) { int32_t __ret; __ret = (int32_t) __builtin_neon_vqsubs_s32(__p0, __p1); return __ret; } __ai int64_t vqsubd_s64(int64_t __p0, int64_t __p1) { int64_t __ret; __ret = (int64_t) __builtin_neon_vqsubd_s64(__p0, __p1); return __ret; } __ai int16_t vqsubh_s16(int16_t __p0, int16_t __p1) { int16_t __ret; __ret = (int16_t) __builtin_neon_vqsubh_s16(__p0, __p1); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai poly8x8_t vqtbl1_p8(poly8x16_t __p0, uint8x8_t __p1) { poly8x8_t __ret; __ret = (poly8x8_t) __builtin_neon_vqtbl1_v((int8x16_t)__p0, (int8x8_t)__p1, 4); return __ret; } #else __ai poly8x8_t vqtbl1_p8(poly8x16_t __p0, uint8x8_t __p1) { poly8x8_t __ret; poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (poly8x8_t) __builtin_neon_vqtbl1_v((int8x16_t)__rev0, (int8x8_t)__rev1, 4); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly8x16_t vqtbl1q_p8(poly8x16_t __p0, uint8x16_t __p1) { poly8x16_t __ret; __ret = (poly8x16_t) __builtin_neon_vqtbl1q_v((int8x16_t)__p0, (int8x16_t)__p1, 36); return __ret; } #else __ai poly8x16_t vqtbl1q_p8(poly8x16_t __p0, uint8x16_t __p1) { poly8x16_t __ret; poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (poly8x16_t) __builtin_neon_vqtbl1q_v((int8x16_t)__rev0, (int8x16_t)__rev1, 36); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vqtbl1q_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; __ret = (uint8x16_t) __builtin_neon_vqtbl1q_v((int8x16_t)__p0, (int8x16_t)__p1, 48); return __ret; } #else __ai uint8x16_t vqtbl1q_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x16_t) __builtin_neon_vqtbl1q_v((int8x16_t)__rev0, (int8x16_t)__rev1, 48); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x16_t vqtbl1q_s8(int8x16_t __p0, uint8x16_t __p1) { int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vqtbl1q_v((int8x16_t)__p0, (int8x16_t)__p1, 32); return __ret; } #else __ai int8x16_t vqtbl1q_s8(int8x16_t __p0, uint8x16_t __p1) { int8x16_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x16_t) __builtin_neon_vqtbl1q_v((int8x16_t)__rev0, (int8x16_t)__rev1, 32); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vqtbl1_u8(uint8x16_t __p0, uint8x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vqtbl1_v((int8x16_t)__p0, (int8x8_t)__p1, 16); return __ret; } #else __ai uint8x8_t vqtbl1_u8(uint8x16_t __p0, uint8x8_t __p1) { uint8x8_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x8_t) __builtin_neon_vqtbl1_v((int8x16_t)__rev0, (int8x8_t)__rev1, 16); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8_t vqtbl1_s8(int8x16_t __p0, uint8x8_t __p1) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vqtbl1_v((int8x16_t)__p0, (int8x8_t)__p1, 0); return __ret; } #else __ai int8x8_t vqtbl1_s8(int8x16_t __p0, uint8x8_t __p1) { int8x8_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x8_t) __builtin_neon_vqtbl1_v((int8x16_t)__rev0, (int8x8_t)__rev1, 0); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly8x8_t vqtbl2_p8(poly8x16x2_t __p0, uint8x8_t __p1) { poly8x8_t __ret; __ret = (poly8x8_t) __builtin_neon_vqtbl2_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x8_t)__p1, 4); return __ret; } #else __ai poly8x8_t vqtbl2_p8(poly8x16x2_t __p0, uint8x8_t __p1) { poly8x8_t __ret; poly8x16x2_t __rev0; __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (poly8x8_t) __builtin_neon_vqtbl2_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x8_t)__rev1, 4); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly8x16_t vqtbl2q_p8(poly8x16x2_t __p0, uint8x16_t __p1) { poly8x16_t __ret; __ret = (poly8x16_t) __builtin_neon_vqtbl2q_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x16_t)__p1, 36); return __ret; } #else __ai poly8x16_t vqtbl2q_p8(poly8x16x2_t __p0, uint8x16_t __p1) { poly8x16_t __ret; poly8x16x2_t __rev0; __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (poly8x16_t) __builtin_neon_vqtbl2q_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev1, 36); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vqtbl2q_u8(uint8x16x2_t __p0, uint8x16_t __p1) { uint8x16_t __ret; __ret = (uint8x16_t) __builtin_neon_vqtbl2q_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x16_t)__p1, 48); return __ret; } #else __ai uint8x16_t vqtbl2q_u8(uint8x16x2_t __p0, uint8x16_t __p1) { uint8x16_t __ret; uint8x16x2_t __rev0; __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x16_t) __builtin_neon_vqtbl2q_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev1, 48); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x16_t vqtbl2q_s8(int8x16x2_t __p0, uint8x16_t __p1) { int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vqtbl2q_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x16_t)__p1, 32); return __ret; } #else __ai int8x16_t vqtbl2q_s8(int8x16x2_t __p0, uint8x16_t __p1) { int8x16_t __ret; int8x16x2_t __rev0; __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x16_t) __builtin_neon_vqtbl2q_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev1, 32); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vqtbl2_u8(uint8x16x2_t __p0, uint8x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vqtbl2_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x8_t)__p1, 16); return __ret; } #else __ai uint8x8_t vqtbl2_u8(uint8x16x2_t __p0, uint8x8_t __p1) { uint8x8_t __ret; uint8x16x2_t __rev0; __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x8_t) __builtin_neon_vqtbl2_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x8_t)__rev1, 16); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8_t vqtbl2_s8(int8x16x2_t __p0, uint8x8_t __p1) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vqtbl2_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x8_t)__p1, 0); return __ret; } #else __ai int8x8_t vqtbl2_s8(int8x16x2_t __p0, uint8x8_t __p1) { int8x8_t __ret; int8x16x2_t __rev0; __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x8_t) __builtin_neon_vqtbl2_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x8_t)__rev1, 0); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly8x8_t vqtbl3_p8(poly8x16x3_t __p0, uint8x8_t __p1) { poly8x8_t __ret; __ret = (poly8x8_t) __builtin_neon_vqtbl3_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x16_t)__p0.val[2], (int8x8_t)__p1, 4); return __ret; } #else __ai poly8x8_t vqtbl3_p8(poly8x16x3_t __p0, uint8x8_t __p1) { poly8x8_t __ret; poly8x16x3_t __rev0; __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (poly8x8_t) __builtin_neon_vqtbl3_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev0.val[2], (int8x8_t)__rev1, 4); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly8x16_t vqtbl3q_p8(poly8x16x3_t __p0, uint8x16_t __p1) { poly8x16_t __ret; __ret = (poly8x16_t) __builtin_neon_vqtbl3q_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x16_t)__p0.val[2], (int8x16_t)__p1, 36); return __ret; } #else __ai poly8x16_t vqtbl3q_p8(poly8x16x3_t __p0, uint8x16_t __p1) { poly8x16_t __ret; poly8x16x3_t __rev0; __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (poly8x16_t) __builtin_neon_vqtbl3q_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev0.val[2], (int8x16_t)__rev1, 36); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vqtbl3q_u8(uint8x16x3_t __p0, uint8x16_t __p1) { uint8x16_t __ret; __ret = (uint8x16_t) __builtin_neon_vqtbl3q_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x16_t)__p0.val[2], (int8x16_t)__p1, 48); return __ret; } #else __ai uint8x16_t vqtbl3q_u8(uint8x16x3_t __p0, uint8x16_t __p1) { uint8x16_t __ret; uint8x16x3_t __rev0; __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x16_t) __builtin_neon_vqtbl3q_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev0.val[2], (int8x16_t)__rev1, 48); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x16_t vqtbl3q_s8(int8x16x3_t __p0, uint8x16_t __p1) { int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vqtbl3q_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x16_t)__p0.val[2], (int8x16_t)__p1, 32); return __ret; } #else __ai int8x16_t vqtbl3q_s8(int8x16x3_t __p0, uint8x16_t __p1) { int8x16_t __ret; int8x16x3_t __rev0; __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x16_t) __builtin_neon_vqtbl3q_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev0.val[2], (int8x16_t)__rev1, 32); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vqtbl3_u8(uint8x16x3_t __p0, uint8x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vqtbl3_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x16_t)__p0.val[2], (int8x8_t)__p1, 16); return __ret; } #else __ai uint8x8_t vqtbl3_u8(uint8x16x3_t __p0, uint8x8_t __p1) { uint8x8_t __ret; uint8x16x3_t __rev0; __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x8_t) __builtin_neon_vqtbl3_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev0.val[2], (int8x8_t)__rev1, 16); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8_t vqtbl3_s8(int8x16x3_t __p0, uint8x8_t __p1) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vqtbl3_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x16_t)__p0.val[2], (int8x8_t)__p1, 0); return __ret; } #else __ai int8x8_t vqtbl3_s8(int8x16x3_t __p0, uint8x8_t __p1) { int8x8_t __ret; int8x16x3_t __rev0; __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x8_t) __builtin_neon_vqtbl3_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev0.val[2], (int8x8_t)__rev1, 0); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly8x8_t vqtbl4_p8(poly8x16x4_t __p0, uint8x8_t __p1) { poly8x8_t __ret; __ret = (poly8x8_t) __builtin_neon_vqtbl4_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x16_t)__p0.val[2], (int8x16_t)__p0.val[3], (int8x8_t)__p1, 4); return __ret; } #else __ai poly8x8_t vqtbl4_p8(poly8x16x4_t __p0, uint8x8_t __p1) { poly8x8_t __ret; poly8x16x4_t __rev0; __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev0.val[3] = __builtin_shufflevector(__p0.val[3], __p0.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (poly8x8_t) __builtin_neon_vqtbl4_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev0.val[2], (int8x16_t)__rev0.val[3], (int8x8_t)__rev1, 4); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly8x16_t vqtbl4q_p8(poly8x16x4_t __p0, uint8x16_t __p1) { poly8x16_t __ret; __ret = (poly8x16_t) __builtin_neon_vqtbl4q_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x16_t)__p0.val[2], (int8x16_t)__p0.val[3], (int8x16_t)__p1, 36); return __ret; } #else __ai poly8x16_t vqtbl4q_p8(poly8x16x4_t __p0, uint8x16_t __p1) { poly8x16_t __ret; poly8x16x4_t __rev0; __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev0.val[3] = __builtin_shufflevector(__p0.val[3], __p0.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (poly8x16_t) __builtin_neon_vqtbl4q_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev0.val[2], (int8x16_t)__rev0.val[3], (int8x16_t)__rev1, 36); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vqtbl4q_u8(uint8x16x4_t __p0, uint8x16_t __p1) { uint8x16_t __ret; __ret = (uint8x16_t) __builtin_neon_vqtbl4q_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x16_t)__p0.val[2], (int8x16_t)__p0.val[3], (int8x16_t)__p1, 48); return __ret; } #else __ai uint8x16_t vqtbl4q_u8(uint8x16x4_t __p0, uint8x16_t __p1) { uint8x16_t __ret; uint8x16x4_t __rev0; __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev0.val[3] = __builtin_shufflevector(__p0.val[3], __p0.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x16_t) __builtin_neon_vqtbl4q_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev0.val[2], (int8x16_t)__rev0.val[3], (int8x16_t)__rev1, 48); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x16_t vqtbl4q_s8(int8x16x4_t __p0, uint8x16_t __p1) { int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vqtbl4q_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x16_t)__p0.val[2], (int8x16_t)__p0.val[3], (int8x16_t)__p1, 32); return __ret; } #else __ai int8x16_t vqtbl4q_s8(int8x16x4_t __p0, uint8x16_t __p1) { int8x16_t __ret; int8x16x4_t __rev0; __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev0.val[3] = __builtin_shufflevector(__p0.val[3], __p0.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x16_t) __builtin_neon_vqtbl4q_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev0.val[2], (int8x16_t)__rev0.val[3], (int8x16_t)__rev1, 32); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vqtbl4_u8(uint8x16x4_t __p0, uint8x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vqtbl4_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x16_t)__p0.val[2], (int8x16_t)__p0.val[3], (int8x8_t)__p1, 16); return __ret; } #else __ai uint8x8_t vqtbl4_u8(uint8x16x4_t __p0, uint8x8_t __p1) { uint8x8_t __ret; uint8x16x4_t __rev0; __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev0.val[3] = __builtin_shufflevector(__p0.val[3], __p0.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x8_t) __builtin_neon_vqtbl4_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev0.val[2], (int8x16_t)__rev0.val[3], (int8x8_t)__rev1, 16); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8_t vqtbl4_s8(int8x16x4_t __p0, uint8x8_t __p1) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vqtbl4_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x16_t)__p0.val[2], (int8x16_t)__p0.val[3], (int8x8_t)__p1, 0); return __ret; } #else __ai int8x8_t vqtbl4_s8(int8x16x4_t __p0, uint8x8_t __p1) { int8x8_t __ret; int8x16x4_t __rev0; __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev0.val[3] = __builtin_shufflevector(__p0.val[3], __p0.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x8_t) __builtin_neon_vqtbl4_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev0.val[2], (int8x16_t)__rev0.val[3], (int8x8_t)__rev1, 0); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly8x8_t vqtbx1_p8(poly8x8_t __p0, poly8x16_t __p1, uint8x8_t __p2) { poly8x8_t __ret; __ret = (poly8x8_t) __builtin_neon_vqtbx1_v((int8x8_t)__p0, (int8x16_t)__p1, (int8x8_t)__p2, 4); return __ret; } #else __ai poly8x8_t vqtbx1_p8(poly8x8_t __p0, poly8x16_t __p1, uint8x8_t __p2) { poly8x8_t __ret; poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (poly8x8_t) __builtin_neon_vqtbx1_v((int8x8_t)__rev0, (int8x16_t)__rev1, (int8x8_t)__rev2, 4); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly8x16_t vqtbx1q_p8(poly8x16_t __p0, poly8x16_t __p1, uint8x16_t __p2) { poly8x16_t __ret; __ret = (poly8x16_t) __builtin_neon_vqtbx1q_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 36); return __ret; } #else __ai poly8x16_t vqtbx1q_p8(poly8x16_t __p0, poly8x16_t __p1, uint8x16_t __p2) { poly8x16_t __ret; poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (poly8x16_t) __builtin_neon_vqtbx1q_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 36); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vqtbx1q_u8(uint8x16_t __p0, uint8x16_t __p1, uint8x16_t __p2) { uint8x16_t __ret; __ret = (uint8x16_t) __builtin_neon_vqtbx1q_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 48); return __ret; } #else __ai uint8x16_t vqtbx1q_u8(uint8x16_t __p0, uint8x16_t __p1, uint8x16_t __p2) { uint8x16_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x16_t) __builtin_neon_vqtbx1q_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 48); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x16_t vqtbx1q_s8(int8x16_t __p0, int8x16_t __p1, uint8x16_t __p2) { int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vqtbx1q_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 32); return __ret; } #else __ai int8x16_t vqtbx1q_s8(int8x16_t __p0, int8x16_t __p1, uint8x16_t __p2) { int8x16_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x16_t) __builtin_neon_vqtbx1q_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 32); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vqtbx1_u8(uint8x8_t __p0, uint8x16_t __p1, uint8x8_t __p2) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vqtbx1_v((int8x8_t)__p0, (int8x16_t)__p1, (int8x8_t)__p2, 16); return __ret; } #else __ai uint8x8_t vqtbx1_u8(uint8x8_t __p0, uint8x16_t __p1, uint8x8_t __p2) { uint8x8_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x8_t) __builtin_neon_vqtbx1_v((int8x8_t)__rev0, (int8x16_t)__rev1, (int8x8_t)__rev2, 16); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8_t vqtbx1_s8(int8x8_t __p0, int8x16_t __p1, uint8x8_t __p2) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vqtbx1_v((int8x8_t)__p0, (int8x16_t)__p1, (int8x8_t)__p2, 0); return __ret; } #else __ai int8x8_t vqtbx1_s8(int8x8_t __p0, int8x16_t __p1, uint8x8_t __p2) { int8x8_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x8_t) __builtin_neon_vqtbx1_v((int8x8_t)__rev0, (int8x16_t)__rev1, (int8x8_t)__rev2, 0); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly8x8_t vqtbx2_p8(poly8x8_t __p0, poly8x16x2_t __p1, uint8x8_t __p2) { poly8x8_t __ret; __ret = (poly8x8_t) __builtin_neon_vqtbx2_v((int8x8_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x8_t)__p2, 4); return __ret; } #else __ai poly8x8_t vqtbx2_p8(poly8x8_t __p0, poly8x16x2_t __p1, uint8x8_t __p2) { poly8x8_t __ret; poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); poly8x16x2_t __rev1; __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (poly8x8_t) __builtin_neon_vqtbx2_v((int8x8_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x8_t)__rev2, 4); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly8x16_t vqtbx2q_p8(poly8x16_t __p0, poly8x16x2_t __p1, uint8x16_t __p2) { poly8x16_t __ret; __ret = (poly8x16_t) __builtin_neon_vqtbx2q_v((int8x16_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x16_t)__p2, 36); return __ret; } #else __ai poly8x16_t vqtbx2q_p8(poly8x16_t __p0, poly8x16x2_t __p1, uint8x16_t __p2) { poly8x16_t __ret; poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); poly8x16x2_t __rev1; __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (poly8x16_t) __builtin_neon_vqtbx2q_v((int8x16_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev2, 36); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vqtbx2q_u8(uint8x16_t __p0, uint8x16x2_t __p1, uint8x16_t __p2) { uint8x16_t __ret; __ret = (uint8x16_t) __builtin_neon_vqtbx2q_v((int8x16_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x16_t)__p2, 48); return __ret; } #else __ai uint8x16_t vqtbx2q_u8(uint8x16_t __p0, uint8x16x2_t __p1, uint8x16_t __p2) { uint8x16_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16x2_t __rev1; __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x16_t) __builtin_neon_vqtbx2q_v((int8x16_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev2, 48); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x16_t vqtbx2q_s8(int8x16_t __p0, int8x16x2_t __p1, uint8x16_t __p2) { int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vqtbx2q_v((int8x16_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x16_t)__p2, 32); return __ret; } #else __ai int8x16_t vqtbx2q_s8(int8x16_t __p0, int8x16x2_t __p1, uint8x16_t __p2) { int8x16_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16x2_t __rev1; __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x16_t) __builtin_neon_vqtbx2q_v((int8x16_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev2, 32); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vqtbx2_u8(uint8x8_t __p0, uint8x16x2_t __p1, uint8x8_t __p2) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vqtbx2_v((int8x8_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x8_t)__p2, 16); return __ret; } #else __ai uint8x8_t vqtbx2_u8(uint8x8_t __p0, uint8x16x2_t __p1, uint8x8_t __p2) { uint8x8_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16x2_t __rev1; __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x8_t) __builtin_neon_vqtbx2_v((int8x8_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x8_t)__rev2, 16); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8_t vqtbx2_s8(int8x8_t __p0, int8x16x2_t __p1, uint8x8_t __p2) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vqtbx2_v((int8x8_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x8_t)__p2, 0); return __ret; } #else __ai int8x8_t vqtbx2_s8(int8x8_t __p0, int8x16x2_t __p1, uint8x8_t __p2) { int8x8_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x16x2_t __rev1; __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x8_t) __builtin_neon_vqtbx2_v((int8x8_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x8_t)__rev2, 0); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly8x8_t vqtbx3_p8(poly8x8_t __p0, poly8x16x3_t __p1, uint8x8_t __p2) { poly8x8_t __ret; __ret = (poly8x8_t) __builtin_neon_vqtbx3_v((int8x8_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x16_t)__p1.val[2], (int8x8_t)__p2, 4); return __ret; } #else __ai poly8x8_t vqtbx3_p8(poly8x8_t __p0, poly8x16x3_t __p1, uint8x8_t __p2) { poly8x8_t __ret; poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); poly8x16x3_t __rev1; __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (poly8x8_t) __builtin_neon_vqtbx3_v((int8x8_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x8_t)__rev2, 4); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly8x16_t vqtbx3q_p8(poly8x16_t __p0, poly8x16x3_t __p1, uint8x16_t __p2) { poly8x16_t __ret; __ret = (poly8x16_t) __builtin_neon_vqtbx3q_v((int8x16_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x16_t)__p1.val[2], (int8x16_t)__p2, 36); return __ret; } #else __ai poly8x16_t vqtbx3q_p8(poly8x16_t __p0, poly8x16x3_t __p1, uint8x16_t __p2) { poly8x16_t __ret; poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); poly8x16x3_t __rev1; __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (poly8x16_t) __builtin_neon_vqtbx3q_v((int8x16_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev2, 36); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vqtbx3q_u8(uint8x16_t __p0, uint8x16x3_t __p1, uint8x16_t __p2) { uint8x16_t __ret; __ret = (uint8x16_t) __builtin_neon_vqtbx3q_v((int8x16_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x16_t)__p1.val[2], (int8x16_t)__p2, 48); return __ret; } #else __ai uint8x16_t vqtbx3q_u8(uint8x16_t __p0, uint8x16x3_t __p1, uint8x16_t __p2) { uint8x16_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16x3_t __rev1; __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x16_t) __builtin_neon_vqtbx3q_v((int8x16_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev2, 48); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x16_t vqtbx3q_s8(int8x16_t __p0, int8x16x3_t __p1, uint8x16_t __p2) { int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vqtbx3q_v((int8x16_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x16_t)__p1.val[2], (int8x16_t)__p2, 32); return __ret; } #else __ai int8x16_t vqtbx3q_s8(int8x16_t __p0, int8x16x3_t __p1, uint8x16_t __p2) { int8x16_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16x3_t __rev1; __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x16_t) __builtin_neon_vqtbx3q_v((int8x16_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev2, 32); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vqtbx3_u8(uint8x8_t __p0, uint8x16x3_t __p1, uint8x8_t __p2) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vqtbx3_v((int8x8_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x16_t)__p1.val[2], (int8x8_t)__p2, 16); return __ret; } #else __ai uint8x8_t vqtbx3_u8(uint8x8_t __p0, uint8x16x3_t __p1, uint8x8_t __p2) { uint8x8_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16x3_t __rev1; __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x8_t) __builtin_neon_vqtbx3_v((int8x8_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x8_t)__rev2, 16); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8_t vqtbx3_s8(int8x8_t __p0, int8x16x3_t __p1, uint8x8_t __p2) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vqtbx3_v((int8x8_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x16_t)__p1.val[2], (int8x8_t)__p2, 0); return __ret; } #else __ai int8x8_t vqtbx3_s8(int8x8_t __p0, int8x16x3_t __p1, uint8x8_t __p2) { int8x8_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x16x3_t __rev1; __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x8_t) __builtin_neon_vqtbx3_v((int8x8_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x8_t)__rev2, 0); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly8x8_t vqtbx4_p8(poly8x8_t __p0, poly8x16x4_t __p1, uint8x8_t __p2) { poly8x8_t __ret; __ret = (poly8x8_t) __builtin_neon_vqtbx4_v((int8x8_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x16_t)__p1.val[2], (int8x16_t)__p1.val[3], (int8x8_t)__p2, 4); return __ret; } #else __ai poly8x8_t vqtbx4_p8(poly8x8_t __p0, poly8x16x4_t __p1, uint8x8_t __p2) { poly8x8_t __ret; poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); poly8x16x4_t __rev1; __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev1.val[3] = __builtin_shufflevector(__p1.val[3], __p1.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (poly8x8_t) __builtin_neon_vqtbx4_v((int8x8_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], (int8x8_t)__rev2, 4); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly8x16_t vqtbx4q_p8(poly8x16_t __p0, poly8x16x4_t __p1, uint8x16_t __p2) { poly8x16_t __ret; __ret = (poly8x16_t) __builtin_neon_vqtbx4q_v((int8x16_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x16_t)__p1.val[2], (int8x16_t)__p1.val[3], (int8x16_t)__p2, 36); return __ret; } #else __ai poly8x16_t vqtbx4q_p8(poly8x16_t __p0, poly8x16x4_t __p1, uint8x16_t __p2) { poly8x16_t __ret; poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); poly8x16x4_t __rev1; __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev1.val[3] = __builtin_shufflevector(__p1.val[3], __p1.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (poly8x16_t) __builtin_neon_vqtbx4q_v((int8x16_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], (int8x16_t)__rev2, 36); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vqtbx4q_u8(uint8x16_t __p0, uint8x16x4_t __p1, uint8x16_t __p2) { uint8x16_t __ret; __ret = (uint8x16_t) __builtin_neon_vqtbx4q_v((int8x16_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x16_t)__p1.val[2], (int8x16_t)__p1.val[3], (int8x16_t)__p2, 48); return __ret; } #else __ai uint8x16_t vqtbx4q_u8(uint8x16_t __p0, uint8x16x4_t __p1, uint8x16_t __p2) { uint8x16_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16x4_t __rev1; __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev1.val[3] = __builtin_shufflevector(__p1.val[3], __p1.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x16_t) __builtin_neon_vqtbx4q_v((int8x16_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], (int8x16_t)__rev2, 48); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x16_t vqtbx4q_s8(int8x16_t __p0, int8x16x4_t __p1, uint8x16_t __p2) { int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vqtbx4q_v((int8x16_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x16_t)__p1.val[2], (int8x16_t)__p1.val[3], (int8x16_t)__p2, 32); return __ret; } #else __ai int8x16_t vqtbx4q_s8(int8x16_t __p0, int8x16x4_t __p1, uint8x16_t __p2) { int8x16_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16x4_t __rev1; __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev1.val[3] = __builtin_shufflevector(__p1.val[3], __p1.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x16_t) __builtin_neon_vqtbx4q_v((int8x16_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], (int8x16_t)__rev2, 32); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vqtbx4_u8(uint8x8_t __p0, uint8x16x4_t __p1, uint8x8_t __p2) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vqtbx4_v((int8x8_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x16_t)__p1.val[2], (int8x16_t)__p1.val[3], (int8x8_t)__p2, 16); return __ret; } #else __ai uint8x8_t vqtbx4_u8(uint8x8_t __p0, uint8x16x4_t __p1, uint8x8_t __p2) { uint8x8_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16x4_t __rev1; __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev1.val[3] = __builtin_shufflevector(__p1.val[3], __p1.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x8_t) __builtin_neon_vqtbx4_v((int8x8_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], (int8x8_t)__rev2, 16); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8_t vqtbx4_s8(int8x8_t __p0, int8x16x4_t __p1, uint8x8_t __p2) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vqtbx4_v((int8x8_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x16_t)__p1.val[2], (int8x16_t)__p1.val[3], (int8x8_t)__p2, 0); return __ret; } #else __ai int8x8_t vqtbx4_s8(int8x8_t __p0, int8x16x4_t __p1, uint8x8_t __p2) { int8x8_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x16x4_t __rev1; __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev1.val[3] = __builtin_shufflevector(__p1.val[3], __p1.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x8_t) __builtin_neon_vqtbx4_v((int8x8_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], (int8x8_t)__rev2, 0); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vraddhn_high_u32(uint16x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint16x8_t __ret; __ret = vcombine_u16(__p0, vraddhn_u32(__p1, __p2)); return __ret; } #else __ai uint16x8_t vraddhn_high_u32(uint16x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint16x8_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = __noswap_vcombine_u16(__rev0, __noswap_vraddhn_u32(__rev1, __rev2)); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vraddhn_high_u64(uint32x2_t __p0, uint64x2_t __p1, uint64x2_t __p2) { uint32x4_t __ret; __ret = vcombine_u32(__p0, vraddhn_u64(__p1, __p2)); return __ret; } #else __ai uint32x4_t vraddhn_high_u64(uint32x2_t __p0, uint64x2_t __p1, uint64x2_t __p2) { uint32x4_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); uint64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); __ret = __noswap_vcombine_u32(__rev0, __noswap_vraddhn_u64(__rev1, __rev2)); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vraddhn_high_u16(uint8x8_t __p0, uint16x8_t __p1, uint16x8_t __p2) { uint8x16_t __ret; __ret = vcombine_u8(__p0, vraddhn_u16(__p1, __p2)); return __ret; } #else __ai uint8x16_t vraddhn_high_u16(uint8x8_t __p0, uint16x8_t __p1, uint16x8_t __p2) { uint8x16_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __noswap_vcombine_u8(__rev0, __noswap_vraddhn_u16(__rev1, __rev2)); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vraddhn_high_s32(int16x4_t __p0, int32x4_t __p1, int32x4_t __p2) { int16x8_t __ret; __ret = vcombine_s16(__p0, vraddhn_s32(__p1, __p2)); return __ret; } #else __ai int16x8_t vraddhn_high_s32(int16x4_t __p0, int32x4_t __p1, int32x4_t __p2) { int16x8_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); int32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = __noswap_vcombine_s16(__rev0, __noswap_vraddhn_s32(__rev1, __rev2)); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vraddhn_high_s64(int32x2_t __p0, int64x2_t __p1, int64x2_t __p2) { int32x4_t __ret; __ret = vcombine_s32(__p0, vraddhn_s64(__p1, __p2)); return __ret; } #else __ai int32x4_t vraddhn_high_s64(int32x2_t __p0, int64x2_t __p1, int64x2_t __p2) { int32x4_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); int64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); __ret = __noswap_vcombine_s32(__rev0, __noswap_vraddhn_s64(__rev1, __rev2)); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x16_t vraddhn_high_s16(int8x8_t __p0, int16x8_t __p1, int16x8_t __p2) { int8x16_t __ret; __ret = vcombine_s8(__p0, vraddhn_s16(__p1, __p2)); return __ret; } #else __ai int8x16_t vraddhn_high_s16(int8x8_t __p0, int16x8_t __p1, int16x8_t __p2) { int8x16_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __noswap_vcombine_s8(__rev0, __noswap_vraddhn_s16(__rev1, __rev2)); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly8x8_t vrbit_p8(poly8x8_t __p0) { poly8x8_t __ret; __ret = (poly8x8_t) __builtin_neon_vrbit_v((int8x8_t)__p0, 4); return __ret; } #else __ai poly8x8_t vrbit_p8(poly8x8_t __p0) { poly8x8_t __ret; poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (poly8x8_t) __builtin_neon_vrbit_v((int8x8_t)__rev0, 4); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly8x16_t vrbitq_p8(poly8x16_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t) __builtin_neon_vrbitq_v((int8x16_t)__p0, 36); return __ret; } #else __ai poly8x16_t vrbitq_p8(poly8x16_t __p0) { poly8x16_t __ret; poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (poly8x16_t) __builtin_neon_vrbitq_v((int8x16_t)__rev0, 36); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vrbitq_u8(uint8x16_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t) __builtin_neon_vrbitq_v((int8x16_t)__p0, 48); return __ret; } #else __ai uint8x16_t vrbitq_u8(uint8x16_t __p0) { uint8x16_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x16_t) __builtin_neon_vrbitq_v((int8x16_t)__rev0, 48); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x16_t vrbitq_s8(int8x16_t __p0) { int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vrbitq_v((int8x16_t)__p0, 32); return __ret; } #else __ai int8x16_t vrbitq_s8(int8x16_t __p0) { int8x16_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x16_t) __builtin_neon_vrbitq_v((int8x16_t)__rev0, 32); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vrbit_u8(uint8x8_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vrbit_v((int8x8_t)__p0, 16); return __ret; } #else __ai uint8x8_t vrbit_u8(uint8x8_t __p0) { uint8x8_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x8_t) __builtin_neon_vrbit_v((int8x8_t)__rev0, 16); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8_t vrbit_s8(int8x8_t __p0) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vrbit_v((int8x8_t)__p0, 0); return __ret; } #else __ai int8x8_t vrbit_s8(int8x8_t __p0) { int8x8_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x8_t) __builtin_neon_vrbit_v((int8x8_t)__rev0, 0); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float64x2_t vrecpeq_f64(float64x2_t __p0) { float64x2_t __ret; __ret = (float64x2_t) __builtin_neon_vrecpeq_v((int8x16_t)__p0, 42); return __ret; } #else __ai float64x2_t vrecpeq_f64(float64x2_t __p0) { float64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (float64x2_t) __builtin_neon_vrecpeq_v((int8x16_t)__rev0, 42); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai float64x1_t vrecpe_f64(float64x1_t __p0) { float64x1_t __ret; __ret = (float64x1_t) __builtin_neon_vrecpe_v((int8x8_t)__p0, 10); return __ret; } __ai float64_t vrecped_f64(float64_t __p0) { float64_t __ret; __ret = (float64_t) __builtin_neon_vrecped_f64(__p0); return __ret; } __ai float32_t vrecpes_f32(float32_t __p0) { float32_t __ret; __ret = (float32_t) __builtin_neon_vrecpes_f32(__p0); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai float64x2_t vrecpsq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; __ret = (float64x2_t) __builtin_neon_vrecpsq_v((int8x16_t)__p0, (int8x16_t)__p1, 42); return __ret; } #else __ai float64x2_t vrecpsq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (float64x2_t) __builtin_neon_vrecpsq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 42); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai float64x1_t vrecps_f64(float64x1_t __p0, float64x1_t __p1) { float64x1_t __ret; __ret = (float64x1_t) __builtin_neon_vrecps_v((int8x8_t)__p0, (int8x8_t)__p1, 10); return __ret; } __ai float64_t vrecpsd_f64(float64_t __p0, float64_t __p1) { float64_t __ret; __ret = (float64_t) __builtin_neon_vrecpsd_f64(__p0, __p1); return __ret; } __ai float32_t vrecpss_f32(float32_t __p0, float32_t __p1) { float32_t __ret; __ret = (float32_t) __builtin_neon_vrecpss_f32(__p0, __p1); return __ret; } __ai float64_t vrecpxd_f64(float64_t __p0) { float64_t __ret; __ret = (float64_t) __builtin_neon_vrecpxd_f64(__p0); return __ret; } __ai float32_t vrecpxs_f32(float32_t __p0) { float32_t __ret; __ret = (float32_t) __builtin_neon_vrecpxs_f32(__p0); return __ret; } __ai poly8x8_t vreinterpret_p8_p64(poly64x1_t __p0) { poly8x8_t __ret; __ret = (poly8x8_t)(__p0); return __ret; } __ai poly8x8_t vreinterpret_p8_p16(poly16x4_t __p0) { poly8x8_t __ret; __ret = (poly8x8_t)(__p0); return __ret; } __ai poly8x8_t vreinterpret_p8_u8(uint8x8_t __p0) { poly8x8_t __ret; __ret = (poly8x8_t)(__p0); return __ret; } __ai poly8x8_t vreinterpret_p8_u32(uint32x2_t __p0) { poly8x8_t __ret; __ret = (poly8x8_t)(__p0); return __ret; } __ai poly8x8_t vreinterpret_p8_u64(uint64x1_t __p0) { poly8x8_t __ret; __ret = (poly8x8_t)(__p0); return __ret; } __ai poly8x8_t vreinterpret_p8_u16(uint16x4_t __p0) { poly8x8_t __ret; __ret = (poly8x8_t)(__p0); return __ret; } __ai poly8x8_t vreinterpret_p8_s8(int8x8_t __p0) { poly8x8_t __ret; __ret = (poly8x8_t)(__p0); return __ret; } __ai poly8x8_t vreinterpret_p8_f64(float64x1_t __p0) { poly8x8_t __ret; __ret = (poly8x8_t)(__p0); return __ret; } __ai poly8x8_t vreinterpret_p8_f32(float32x2_t __p0) { poly8x8_t __ret; __ret = (poly8x8_t)(__p0); return __ret; } __ai poly8x8_t vreinterpret_p8_f16(float16x4_t __p0) { poly8x8_t __ret; __ret = (poly8x8_t)(__p0); return __ret; } __ai poly8x8_t vreinterpret_p8_s32(int32x2_t __p0) { poly8x8_t __ret; __ret = (poly8x8_t)(__p0); return __ret; } __ai poly8x8_t vreinterpret_p8_s64(int64x1_t __p0) { poly8x8_t __ret; __ret = (poly8x8_t)(__p0); return __ret; } __ai poly8x8_t vreinterpret_p8_s16(int16x4_t __p0) { poly8x8_t __ret; __ret = (poly8x8_t)(__p0); return __ret; } __ai poly64x1_t vreinterpret_p64_p8(poly8x8_t __p0) { poly64x1_t __ret; __ret = (poly64x1_t)(__p0); return __ret; } __ai poly64x1_t vreinterpret_p64_p16(poly16x4_t __p0) { poly64x1_t __ret; __ret = (poly64x1_t)(__p0); return __ret; } __ai poly64x1_t vreinterpret_p64_u8(uint8x8_t __p0) { poly64x1_t __ret; __ret = (poly64x1_t)(__p0); return __ret; } __ai poly64x1_t vreinterpret_p64_u32(uint32x2_t __p0) { poly64x1_t __ret; __ret = (poly64x1_t)(__p0); return __ret; } __ai poly64x1_t vreinterpret_p64_u64(uint64x1_t __p0) { poly64x1_t __ret; __ret = (poly64x1_t)(__p0); return __ret; } __ai poly64x1_t vreinterpret_p64_u16(uint16x4_t __p0) { poly64x1_t __ret; __ret = (poly64x1_t)(__p0); return __ret; } __ai poly64x1_t vreinterpret_p64_s8(int8x8_t __p0) { poly64x1_t __ret; __ret = (poly64x1_t)(__p0); return __ret; } __ai poly64x1_t vreinterpret_p64_f64(float64x1_t __p0) { poly64x1_t __ret; __ret = (poly64x1_t)(__p0); return __ret; } __ai poly64x1_t vreinterpret_p64_f32(float32x2_t __p0) { poly64x1_t __ret; __ret = (poly64x1_t)(__p0); return __ret; } __ai poly64x1_t vreinterpret_p64_f16(float16x4_t __p0) { poly64x1_t __ret; __ret = (poly64x1_t)(__p0); return __ret; } __ai poly64x1_t vreinterpret_p64_s32(int32x2_t __p0) { poly64x1_t __ret; __ret = (poly64x1_t)(__p0); return __ret; } __ai poly64x1_t vreinterpret_p64_s64(int64x1_t __p0) { poly64x1_t __ret; __ret = (poly64x1_t)(__p0); return __ret; } __ai poly64x1_t vreinterpret_p64_s16(int16x4_t __p0) { poly64x1_t __ret; __ret = (poly64x1_t)(__p0); return __ret; } __ai poly16x4_t vreinterpret_p16_p8(poly8x8_t __p0) { poly16x4_t __ret; __ret = (poly16x4_t)(__p0); return __ret; } __ai poly16x4_t vreinterpret_p16_p64(poly64x1_t __p0) { poly16x4_t __ret; __ret = (poly16x4_t)(__p0); return __ret; } __ai poly16x4_t vreinterpret_p16_u8(uint8x8_t __p0) { poly16x4_t __ret; __ret = (poly16x4_t)(__p0); return __ret; } __ai poly16x4_t vreinterpret_p16_u32(uint32x2_t __p0) { poly16x4_t __ret; __ret = (poly16x4_t)(__p0); return __ret; } __ai poly16x4_t vreinterpret_p16_u64(uint64x1_t __p0) { poly16x4_t __ret; __ret = (poly16x4_t)(__p0); return __ret; } __ai poly16x4_t vreinterpret_p16_u16(uint16x4_t __p0) { poly16x4_t __ret; __ret = (poly16x4_t)(__p0); return __ret; } __ai poly16x4_t vreinterpret_p16_s8(int8x8_t __p0) { poly16x4_t __ret; __ret = (poly16x4_t)(__p0); return __ret; } __ai poly16x4_t vreinterpret_p16_f64(float64x1_t __p0) { poly16x4_t __ret; __ret = (poly16x4_t)(__p0); return __ret; } __ai poly16x4_t vreinterpret_p16_f32(float32x2_t __p0) { poly16x4_t __ret; __ret = (poly16x4_t)(__p0); return __ret; } __ai poly16x4_t vreinterpret_p16_f16(float16x4_t __p0) { poly16x4_t __ret; __ret = (poly16x4_t)(__p0); return __ret; } __ai poly16x4_t vreinterpret_p16_s32(int32x2_t __p0) { poly16x4_t __ret; __ret = (poly16x4_t)(__p0); return __ret; } __ai poly16x4_t vreinterpret_p16_s64(int64x1_t __p0) { poly16x4_t __ret; __ret = (poly16x4_t)(__p0); return __ret; } __ai poly16x4_t vreinterpret_p16_s16(int16x4_t __p0) { poly16x4_t __ret; __ret = (poly16x4_t)(__p0); return __ret; } __ai poly8x16_t vreinterpretq_p8_p128(poly128_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t)(__p0); return __ret; } __ai poly8x16_t vreinterpretq_p8_p64(poly64x2_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t)(__p0); return __ret; } __ai poly8x16_t vreinterpretq_p8_p16(poly16x8_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t)(__p0); return __ret; } __ai poly8x16_t vreinterpretq_p8_u8(uint8x16_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t)(__p0); return __ret; } __ai poly8x16_t vreinterpretq_p8_u32(uint32x4_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t)(__p0); return __ret; } __ai poly8x16_t vreinterpretq_p8_u64(uint64x2_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t)(__p0); return __ret; } __ai poly8x16_t vreinterpretq_p8_u16(uint16x8_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t)(__p0); return __ret; } __ai poly8x16_t vreinterpretq_p8_s8(int8x16_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t)(__p0); return __ret; } __ai poly8x16_t vreinterpretq_p8_f64(float64x2_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t)(__p0); return __ret; } __ai poly8x16_t vreinterpretq_p8_f32(float32x4_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t)(__p0); return __ret; } __ai poly8x16_t vreinterpretq_p8_f16(float16x8_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t)(__p0); return __ret; } __ai poly8x16_t vreinterpretq_p8_s32(int32x4_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t)(__p0); return __ret; } __ai poly8x16_t vreinterpretq_p8_s64(int64x2_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t)(__p0); return __ret; } __ai poly8x16_t vreinterpretq_p8_s16(int16x8_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t)(__p0); return __ret; } __ai poly128_t vreinterpretq_p128_p8(poly8x16_t __p0) { poly128_t __ret; __ret = (poly128_t)(__p0); return __ret; } __ai poly128_t vreinterpretq_p128_p64(poly64x2_t __p0) { poly128_t __ret; __ret = (poly128_t)(__p0); return __ret; } __ai poly128_t vreinterpretq_p128_p16(poly16x8_t __p0) { poly128_t __ret; __ret = (poly128_t)(__p0); return __ret; } __ai poly128_t vreinterpretq_p128_u8(uint8x16_t __p0) { poly128_t __ret; __ret = (poly128_t)(__p0); return __ret; } __ai poly128_t vreinterpretq_p128_u32(uint32x4_t __p0) { poly128_t __ret; __ret = (poly128_t)(__p0); return __ret; } __ai poly128_t vreinterpretq_p128_u64(uint64x2_t __p0) { poly128_t __ret; __ret = (poly128_t)(__p0); return __ret; } __ai poly128_t vreinterpretq_p128_u16(uint16x8_t __p0) { poly128_t __ret; __ret = (poly128_t)(__p0); return __ret; } __ai poly128_t vreinterpretq_p128_s8(int8x16_t __p0) { poly128_t __ret; __ret = (poly128_t)(__p0); return __ret; } __ai poly128_t vreinterpretq_p128_f64(float64x2_t __p0) { poly128_t __ret; __ret = (poly128_t)(__p0); return __ret; } __ai poly128_t vreinterpretq_p128_f32(float32x4_t __p0) { poly128_t __ret; __ret = (poly128_t)(__p0); return __ret; } __ai poly128_t vreinterpretq_p128_f16(float16x8_t __p0) { poly128_t __ret; __ret = (poly128_t)(__p0); return __ret; } __ai poly128_t vreinterpretq_p128_s32(int32x4_t __p0) { poly128_t __ret; __ret = (poly128_t)(__p0); return __ret; } __ai poly128_t vreinterpretq_p128_s64(int64x2_t __p0) { poly128_t __ret; __ret = (poly128_t)(__p0); return __ret; } __ai poly128_t vreinterpretq_p128_s16(int16x8_t __p0) { poly128_t __ret; __ret = (poly128_t)(__p0); return __ret; } __ai poly64x2_t vreinterpretq_p64_p8(poly8x16_t __p0) { poly64x2_t __ret; __ret = (poly64x2_t)(__p0); return __ret; } __ai poly64x2_t vreinterpretq_p64_p128(poly128_t __p0) { poly64x2_t __ret; __ret = (poly64x2_t)(__p0); return __ret; } __ai poly64x2_t vreinterpretq_p64_p16(poly16x8_t __p0) { poly64x2_t __ret; __ret = (poly64x2_t)(__p0); return __ret; } __ai poly64x2_t vreinterpretq_p64_u8(uint8x16_t __p0) { poly64x2_t __ret; __ret = (poly64x2_t)(__p0); return __ret; } __ai poly64x2_t vreinterpretq_p64_u32(uint32x4_t __p0) { poly64x2_t __ret; __ret = (poly64x2_t)(__p0); return __ret; } __ai poly64x2_t vreinterpretq_p64_u64(uint64x2_t __p0) { poly64x2_t __ret; __ret = (poly64x2_t)(__p0); return __ret; } __ai poly64x2_t vreinterpretq_p64_u16(uint16x8_t __p0) { poly64x2_t __ret; __ret = (poly64x2_t)(__p0); return __ret; } __ai poly64x2_t vreinterpretq_p64_s8(int8x16_t __p0) { poly64x2_t __ret; __ret = (poly64x2_t)(__p0); return __ret; } __ai poly64x2_t vreinterpretq_p64_f64(float64x2_t __p0) { poly64x2_t __ret; __ret = (poly64x2_t)(__p0); return __ret; } __ai poly64x2_t vreinterpretq_p64_f32(float32x4_t __p0) { poly64x2_t __ret; __ret = (poly64x2_t)(__p0); return __ret; } __ai poly64x2_t vreinterpretq_p64_f16(float16x8_t __p0) { poly64x2_t __ret; __ret = (poly64x2_t)(__p0); return __ret; } __ai poly64x2_t vreinterpretq_p64_s32(int32x4_t __p0) { poly64x2_t __ret; __ret = (poly64x2_t)(__p0); return __ret; } __ai poly64x2_t vreinterpretq_p64_s64(int64x2_t __p0) { poly64x2_t __ret; __ret = (poly64x2_t)(__p0); return __ret; } __ai poly64x2_t vreinterpretq_p64_s16(int16x8_t __p0) { poly64x2_t __ret; __ret = (poly64x2_t)(__p0); return __ret; } __ai poly16x8_t vreinterpretq_p16_p8(poly8x16_t __p0) { poly16x8_t __ret; __ret = (poly16x8_t)(__p0); return __ret; } __ai poly16x8_t vreinterpretq_p16_p128(poly128_t __p0) { poly16x8_t __ret; __ret = (poly16x8_t)(__p0); return __ret; } __ai poly16x8_t vreinterpretq_p16_p64(poly64x2_t __p0) { poly16x8_t __ret; __ret = (poly16x8_t)(__p0); return __ret; } __ai poly16x8_t vreinterpretq_p16_u8(uint8x16_t __p0) { poly16x8_t __ret; __ret = (poly16x8_t)(__p0); return __ret; } __ai poly16x8_t vreinterpretq_p16_u32(uint32x4_t __p0) { poly16x8_t __ret; __ret = (poly16x8_t)(__p0); return __ret; } __ai poly16x8_t vreinterpretq_p16_u64(uint64x2_t __p0) { poly16x8_t __ret; __ret = (poly16x8_t)(__p0); return __ret; } __ai poly16x8_t vreinterpretq_p16_u16(uint16x8_t __p0) { poly16x8_t __ret; __ret = (poly16x8_t)(__p0); return __ret; } __ai poly16x8_t vreinterpretq_p16_s8(int8x16_t __p0) { poly16x8_t __ret; __ret = (poly16x8_t)(__p0); return __ret; } __ai poly16x8_t vreinterpretq_p16_f64(float64x2_t __p0) { poly16x8_t __ret; __ret = (poly16x8_t)(__p0); return __ret; } __ai poly16x8_t vreinterpretq_p16_f32(float32x4_t __p0) { poly16x8_t __ret; __ret = (poly16x8_t)(__p0); return __ret; } __ai poly16x8_t vreinterpretq_p16_f16(float16x8_t __p0) { poly16x8_t __ret; __ret = (poly16x8_t)(__p0); return __ret; } __ai poly16x8_t vreinterpretq_p16_s32(int32x4_t __p0) { poly16x8_t __ret; __ret = (poly16x8_t)(__p0); return __ret; } __ai poly16x8_t vreinterpretq_p16_s64(int64x2_t __p0) { poly16x8_t __ret; __ret = (poly16x8_t)(__p0); return __ret; } __ai poly16x8_t vreinterpretq_p16_s16(int16x8_t __p0) { poly16x8_t __ret; __ret = (poly16x8_t)(__p0); return __ret; } __ai uint8x16_t vreinterpretq_u8_p8(poly8x16_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0); return __ret; } __ai uint8x16_t vreinterpretq_u8_p128(poly128_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0); return __ret; } __ai uint8x16_t vreinterpretq_u8_p64(poly64x2_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0); return __ret; } __ai uint8x16_t vreinterpretq_u8_p16(poly16x8_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0); return __ret; } __ai uint8x16_t vreinterpretq_u8_u32(uint32x4_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0); return __ret; } __ai uint8x16_t vreinterpretq_u8_u64(uint64x2_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0); return __ret; } __ai uint8x16_t vreinterpretq_u8_u16(uint16x8_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0); return __ret; } __ai uint8x16_t vreinterpretq_u8_s8(int8x16_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0); return __ret; } __ai uint8x16_t vreinterpretq_u8_f64(float64x2_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0); return __ret; } __ai uint8x16_t vreinterpretq_u8_f32(float32x4_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0); return __ret; } __ai uint8x16_t vreinterpretq_u8_f16(float16x8_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0); return __ret; } __ai uint8x16_t vreinterpretq_u8_s32(int32x4_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0); return __ret; } __ai uint8x16_t vreinterpretq_u8_s64(int64x2_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0); return __ret; } __ai uint8x16_t vreinterpretq_u8_s16(int16x8_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0); return __ret; } __ai uint32x4_t vreinterpretq_u32_p8(poly8x16_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0); return __ret; } __ai uint32x4_t vreinterpretq_u32_p128(poly128_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0); return __ret; } __ai uint32x4_t vreinterpretq_u32_p64(poly64x2_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0); return __ret; } __ai uint32x4_t vreinterpretq_u32_p16(poly16x8_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0); return __ret; } __ai uint32x4_t vreinterpretq_u32_u8(uint8x16_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0); return __ret; } __ai uint32x4_t vreinterpretq_u32_u64(uint64x2_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0); return __ret; } __ai uint32x4_t vreinterpretq_u32_u16(uint16x8_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0); return __ret; } __ai uint32x4_t vreinterpretq_u32_s8(int8x16_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0); return __ret; } __ai uint32x4_t vreinterpretq_u32_f64(float64x2_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0); return __ret; } __ai uint32x4_t vreinterpretq_u32_f32(float32x4_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0); return __ret; } __ai uint32x4_t vreinterpretq_u32_f16(float16x8_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0); return __ret; } __ai uint32x4_t vreinterpretq_u32_s32(int32x4_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0); return __ret; } __ai uint32x4_t vreinterpretq_u32_s64(int64x2_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0); return __ret; } __ai uint32x4_t vreinterpretq_u32_s16(int16x8_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0); return __ret; } __ai uint64x2_t vreinterpretq_u64_p8(poly8x16_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0); return __ret; } __ai uint64x2_t vreinterpretq_u64_p128(poly128_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0); return __ret; } __ai uint64x2_t vreinterpretq_u64_p64(poly64x2_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0); return __ret; } __ai uint64x2_t vreinterpretq_u64_p16(poly16x8_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0); return __ret; } __ai uint64x2_t vreinterpretq_u64_u8(uint8x16_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0); return __ret; } __ai uint64x2_t vreinterpretq_u64_u32(uint32x4_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0); return __ret; } __ai uint64x2_t vreinterpretq_u64_u16(uint16x8_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0); return __ret; } __ai uint64x2_t vreinterpretq_u64_s8(int8x16_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0); return __ret; } __ai uint64x2_t vreinterpretq_u64_f64(float64x2_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0); return __ret; } __ai uint64x2_t vreinterpretq_u64_f32(float32x4_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0); return __ret; } __ai uint64x2_t vreinterpretq_u64_f16(float16x8_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0); return __ret; } __ai uint64x2_t vreinterpretq_u64_s32(int32x4_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0); return __ret; } __ai uint64x2_t vreinterpretq_u64_s64(int64x2_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0); return __ret; } __ai uint64x2_t vreinterpretq_u64_s16(int16x8_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0); return __ret; } __ai uint16x8_t vreinterpretq_u16_p8(poly8x16_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0); return __ret; } __ai uint16x8_t vreinterpretq_u16_p128(poly128_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0); return __ret; } __ai uint16x8_t vreinterpretq_u16_p64(poly64x2_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0); return __ret; } __ai uint16x8_t vreinterpretq_u16_p16(poly16x8_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0); return __ret; } __ai uint16x8_t vreinterpretq_u16_u8(uint8x16_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0); return __ret; } __ai uint16x8_t vreinterpretq_u16_u32(uint32x4_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0); return __ret; } __ai uint16x8_t vreinterpretq_u16_u64(uint64x2_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0); return __ret; } __ai uint16x8_t vreinterpretq_u16_s8(int8x16_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0); return __ret; } __ai uint16x8_t vreinterpretq_u16_f64(float64x2_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0); return __ret; } __ai uint16x8_t vreinterpretq_u16_f32(float32x4_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0); return __ret; } __ai uint16x8_t vreinterpretq_u16_f16(float16x8_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0); return __ret; } __ai uint16x8_t vreinterpretq_u16_s32(int32x4_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0); return __ret; } __ai uint16x8_t vreinterpretq_u16_s64(int64x2_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0); return __ret; } __ai uint16x8_t vreinterpretq_u16_s16(int16x8_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0); return __ret; } __ai int8x16_t vreinterpretq_s8_p8(poly8x16_t __p0) { int8x16_t __ret; __ret = (int8x16_t)(__p0); return __ret; } __ai int8x16_t vreinterpretq_s8_p128(poly128_t __p0) { int8x16_t __ret; __ret = (int8x16_t)(__p0); return __ret; } __ai int8x16_t vreinterpretq_s8_p64(poly64x2_t __p0) { int8x16_t __ret; __ret = (int8x16_t)(__p0); return __ret; } __ai int8x16_t vreinterpretq_s8_p16(poly16x8_t __p0) { int8x16_t __ret; __ret = (int8x16_t)(__p0); return __ret; } __ai int8x16_t vreinterpretq_s8_u8(uint8x16_t __p0) { int8x16_t __ret; __ret = (int8x16_t)(__p0); return __ret; } __ai int8x16_t vreinterpretq_s8_u32(uint32x4_t __p0) { int8x16_t __ret; __ret = (int8x16_t)(__p0); return __ret; } __ai int8x16_t vreinterpretq_s8_u64(uint64x2_t __p0) { int8x16_t __ret; __ret = (int8x16_t)(__p0); return __ret; } __ai int8x16_t vreinterpretq_s8_u16(uint16x8_t __p0) { int8x16_t __ret; __ret = (int8x16_t)(__p0); return __ret; } __ai int8x16_t vreinterpretq_s8_f64(float64x2_t __p0) { int8x16_t __ret; __ret = (int8x16_t)(__p0); return __ret; } __ai int8x16_t vreinterpretq_s8_f32(float32x4_t __p0) { int8x16_t __ret; __ret = (int8x16_t)(__p0); return __ret; } __ai int8x16_t vreinterpretq_s8_f16(float16x8_t __p0) { int8x16_t __ret; __ret = (int8x16_t)(__p0); return __ret; } __ai int8x16_t vreinterpretq_s8_s32(int32x4_t __p0) { int8x16_t __ret; __ret = (int8x16_t)(__p0); return __ret; } __ai int8x16_t vreinterpretq_s8_s64(int64x2_t __p0) { int8x16_t __ret; __ret = (int8x16_t)(__p0); return __ret; } __ai int8x16_t vreinterpretq_s8_s16(int16x8_t __p0) { int8x16_t __ret; __ret = (int8x16_t)(__p0); return __ret; } __ai float64x2_t vreinterpretq_f64_p8(poly8x16_t __p0) { float64x2_t __ret; __ret = (float64x2_t)(__p0); return __ret; } __ai float64x2_t vreinterpretq_f64_p128(poly128_t __p0) { float64x2_t __ret; __ret = (float64x2_t)(__p0); return __ret; } __ai float64x2_t vreinterpretq_f64_p64(poly64x2_t __p0) { float64x2_t __ret; __ret = (float64x2_t)(__p0); return __ret; } __ai float64x2_t vreinterpretq_f64_p16(poly16x8_t __p0) { float64x2_t __ret; __ret = (float64x2_t)(__p0); return __ret; } __ai float64x2_t vreinterpretq_f64_u8(uint8x16_t __p0) { float64x2_t __ret; __ret = (float64x2_t)(__p0); return __ret; } __ai float64x2_t vreinterpretq_f64_u32(uint32x4_t __p0) { float64x2_t __ret; __ret = (float64x2_t)(__p0); return __ret; } __ai float64x2_t vreinterpretq_f64_u64(uint64x2_t __p0) { float64x2_t __ret; __ret = (float64x2_t)(__p0); return __ret; } __ai float64x2_t vreinterpretq_f64_u16(uint16x8_t __p0) { float64x2_t __ret; __ret = (float64x2_t)(__p0); return __ret; } __ai float64x2_t vreinterpretq_f64_s8(int8x16_t __p0) { float64x2_t __ret; __ret = (float64x2_t)(__p0); return __ret; } __ai float64x2_t vreinterpretq_f64_f32(float32x4_t __p0) { float64x2_t __ret; __ret = (float64x2_t)(__p0); return __ret; } __ai float64x2_t vreinterpretq_f64_f16(float16x8_t __p0) { float64x2_t __ret; __ret = (float64x2_t)(__p0); return __ret; } __ai float64x2_t vreinterpretq_f64_s32(int32x4_t __p0) { float64x2_t __ret; __ret = (float64x2_t)(__p0); return __ret; } __ai float64x2_t vreinterpretq_f64_s64(int64x2_t __p0) { float64x2_t __ret; __ret = (float64x2_t)(__p0); return __ret; } __ai float64x2_t vreinterpretq_f64_s16(int16x8_t __p0) { float64x2_t __ret; __ret = (float64x2_t)(__p0); return __ret; } __ai float32x4_t vreinterpretq_f32_p8(poly8x16_t __p0) { float32x4_t __ret; __ret = (float32x4_t)(__p0); return __ret; } __ai float32x4_t vreinterpretq_f32_p128(poly128_t __p0) { float32x4_t __ret; __ret = (float32x4_t)(__p0); return __ret; } __ai float32x4_t vreinterpretq_f32_p64(poly64x2_t __p0) { float32x4_t __ret; __ret = (float32x4_t)(__p0); return __ret; } __ai float32x4_t vreinterpretq_f32_p16(poly16x8_t __p0) { float32x4_t __ret; __ret = (float32x4_t)(__p0); return __ret; } __ai float32x4_t vreinterpretq_f32_u8(uint8x16_t __p0) { float32x4_t __ret; __ret = (float32x4_t)(__p0); return __ret; } __ai float32x4_t vreinterpretq_f32_u32(uint32x4_t __p0) { float32x4_t __ret; __ret = (float32x4_t)(__p0); return __ret; } __ai float32x4_t vreinterpretq_f32_u64(uint64x2_t __p0) { float32x4_t __ret; __ret = (float32x4_t)(__p0); return __ret; } __ai float32x4_t vreinterpretq_f32_u16(uint16x8_t __p0) { float32x4_t __ret; __ret = (float32x4_t)(__p0); return __ret; } __ai float32x4_t vreinterpretq_f32_s8(int8x16_t __p0) { float32x4_t __ret; __ret = (float32x4_t)(__p0); return __ret; } __ai float32x4_t vreinterpretq_f32_f64(float64x2_t __p0) { float32x4_t __ret; __ret = (float32x4_t)(__p0); return __ret; } __ai float32x4_t vreinterpretq_f32_f16(float16x8_t __p0) { float32x4_t __ret; __ret = (float32x4_t)(__p0); return __ret; } __ai float32x4_t vreinterpretq_f32_s32(int32x4_t __p0) { float32x4_t __ret; __ret = (float32x4_t)(__p0); return __ret; } __ai float32x4_t vreinterpretq_f32_s64(int64x2_t __p0) { float32x4_t __ret; __ret = (float32x4_t)(__p0); return __ret; } __ai float32x4_t vreinterpretq_f32_s16(int16x8_t __p0) { float32x4_t __ret; __ret = (float32x4_t)(__p0); return __ret; } __ai float16x8_t vreinterpretq_f16_p8(poly8x16_t __p0) { float16x8_t __ret; __ret = (float16x8_t)(__p0); return __ret; } __ai float16x8_t vreinterpretq_f16_p128(poly128_t __p0) { float16x8_t __ret; __ret = (float16x8_t)(__p0); return __ret; } __ai float16x8_t vreinterpretq_f16_p64(poly64x2_t __p0) { float16x8_t __ret; __ret = (float16x8_t)(__p0); return __ret; } __ai float16x8_t vreinterpretq_f16_p16(poly16x8_t __p0) { float16x8_t __ret; __ret = (float16x8_t)(__p0); return __ret; } __ai float16x8_t vreinterpretq_f16_u8(uint8x16_t __p0) { float16x8_t __ret; __ret = (float16x8_t)(__p0); return __ret; } __ai float16x8_t vreinterpretq_f16_u32(uint32x4_t __p0) { float16x8_t __ret; __ret = (float16x8_t)(__p0); return __ret; } __ai float16x8_t vreinterpretq_f16_u64(uint64x2_t __p0) { float16x8_t __ret; __ret = (float16x8_t)(__p0); return __ret; } __ai float16x8_t vreinterpretq_f16_u16(uint16x8_t __p0) { float16x8_t __ret; __ret = (float16x8_t)(__p0); return __ret; } __ai float16x8_t vreinterpretq_f16_s8(int8x16_t __p0) { float16x8_t __ret; __ret = (float16x8_t)(__p0); return __ret; } __ai float16x8_t vreinterpretq_f16_f64(float64x2_t __p0) { float16x8_t __ret; __ret = (float16x8_t)(__p0); return __ret; } __ai float16x8_t vreinterpretq_f16_f32(float32x4_t __p0) { float16x8_t __ret; __ret = (float16x8_t)(__p0); return __ret; } __ai float16x8_t vreinterpretq_f16_s32(int32x4_t __p0) { float16x8_t __ret; __ret = (float16x8_t)(__p0); return __ret; } __ai float16x8_t vreinterpretq_f16_s64(int64x2_t __p0) { float16x8_t __ret; __ret = (float16x8_t)(__p0); return __ret; } __ai float16x8_t vreinterpretq_f16_s16(int16x8_t __p0) { float16x8_t __ret; __ret = (float16x8_t)(__p0); return __ret; } __ai int32x4_t vreinterpretq_s32_p8(poly8x16_t __p0) { int32x4_t __ret; __ret = (int32x4_t)(__p0); return __ret; } __ai int32x4_t vreinterpretq_s32_p128(poly128_t __p0) { int32x4_t __ret; __ret = (int32x4_t)(__p0); return __ret; } __ai int32x4_t vreinterpretq_s32_p64(poly64x2_t __p0) { int32x4_t __ret; __ret = (int32x4_t)(__p0); return __ret; } __ai int32x4_t vreinterpretq_s32_p16(poly16x8_t __p0) { int32x4_t __ret; __ret = (int32x4_t)(__p0); return __ret; } __ai int32x4_t vreinterpretq_s32_u8(uint8x16_t __p0) { int32x4_t __ret; __ret = (int32x4_t)(__p0); return __ret; } __ai int32x4_t vreinterpretq_s32_u32(uint32x4_t __p0) { int32x4_t __ret; __ret = (int32x4_t)(__p0); return __ret; } __ai int32x4_t vreinterpretq_s32_u64(uint64x2_t __p0) { int32x4_t __ret; __ret = (int32x4_t)(__p0); return __ret; } __ai int32x4_t vreinterpretq_s32_u16(uint16x8_t __p0) { int32x4_t __ret; __ret = (int32x4_t)(__p0); return __ret; } __ai int32x4_t vreinterpretq_s32_s8(int8x16_t __p0) { int32x4_t __ret; __ret = (int32x4_t)(__p0); return __ret; } __ai int32x4_t vreinterpretq_s32_f64(float64x2_t __p0) { int32x4_t __ret; __ret = (int32x4_t)(__p0); return __ret; } __ai int32x4_t vreinterpretq_s32_f32(float32x4_t __p0) { int32x4_t __ret; __ret = (int32x4_t)(__p0); return __ret; } __ai int32x4_t vreinterpretq_s32_f16(float16x8_t __p0) { int32x4_t __ret; __ret = (int32x4_t)(__p0); return __ret; } __ai int32x4_t vreinterpretq_s32_s64(int64x2_t __p0) { int32x4_t __ret; __ret = (int32x4_t)(__p0); return __ret; } __ai int32x4_t vreinterpretq_s32_s16(int16x8_t __p0) { int32x4_t __ret; __ret = (int32x4_t)(__p0); return __ret; } __ai int64x2_t vreinterpretq_s64_p8(poly8x16_t __p0) { int64x2_t __ret; __ret = (int64x2_t)(__p0); return __ret; } __ai int64x2_t vreinterpretq_s64_p128(poly128_t __p0) { int64x2_t __ret; __ret = (int64x2_t)(__p0); return __ret; } __ai int64x2_t vreinterpretq_s64_p64(poly64x2_t __p0) { int64x2_t __ret; __ret = (int64x2_t)(__p0); return __ret; } __ai int64x2_t vreinterpretq_s64_p16(poly16x8_t __p0) { int64x2_t __ret; __ret = (int64x2_t)(__p0); return __ret; } __ai int64x2_t vreinterpretq_s64_u8(uint8x16_t __p0) { int64x2_t __ret; __ret = (int64x2_t)(__p0); return __ret; } __ai int64x2_t vreinterpretq_s64_u32(uint32x4_t __p0) { int64x2_t __ret; __ret = (int64x2_t)(__p0); return __ret; } __ai int64x2_t vreinterpretq_s64_u64(uint64x2_t __p0) { int64x2_t __ret; __ret = (int64x2_t)(__p0); return __ret; } __ai int64x2_t vreinterpretq_s64_u16(uint16x8_t __p0) { int64x2_t __ret; __ret = (int64x2_t)(__p0); return __ret; } __ai int64x2_t vreinterpretq_s64_s8(int8x16_t __p0) { int64x2_t __ret; __ret = (int64x2_t)(__p0); return __ret; } __ai int64x2_t vreinterpretq_s64_f64(float64x2_t __p0) { int64x2_t __ret; __ret = (int64x2_t)(__p0); return __ret; } __ai int64x2_t vreinterpretq_s64_f32(float32x4_t __p0) { int64x2_t __ret; __ret = (int64x2_t)(__p0); return __ret; } __ai int64x2_t vreinterpretq_s64_f16(float16x8_t __p0) { int64x2_t __ret; __ret = (int64x2_t)(__p0); return __ret; } __ai int64x2_t vreinterpretq_s64_s32(int32x4_t __p0) { int64x2_t __ret; __ret = (int64x2_t)(__p0); return __ret; } __ai int64x2_t vreinterpretq_s64_s16(int16x8_t __p0) { int64x2_t __ret; __ret = (int64x2_t)(__p0); return __ret; } __ai int16x8_t vreinterpretq_s16_p8(poly8x16_t __p0) { int16x8_t __ret; __ret = (int16x8_t)(__p0); return __ret; } __ai int16x8_t vreinterpretq_s16_p128(poly128_t __p0) { int16x8_t __ret; __ret = (int16x8_t)(__p0); return __ret; } __ai int16x8_t vreinterpretq_s16_p64(poly64x2_t __p0) { int16x8_t __ret; __ret = (int16x8_t)(__p0); return __ret; } __ai int16x8_t vreinterpretq_s16_p16(poly16x8_t __p0) { int16x8_t __ret; __ret = (int16x8_t)(__p0); return __ret; } __ai int16x8_t vreinterpretq_s16_u8(uint8x16_t __p0) { int16x8_t __ret; __ret = (int16x8_t)(__p0); return __ret; } __ai int16x8_t vreinterpretq_s16_u32(uint32x4_t __p0) { int16x8_t __ret; __ret = (int16x8_t)(__p0); return __ret; } __ai int16x8_t vreinterpretq_s16_u64(uint64x2_t __p0) { int16x8_t __ret; __ret = (int16x8_t)(__p0); return __ret; } __ai int16x8_t vreinterpretq_s16_u16(uint16x8_t __p0) { int16x8_t __ret; __ret = (int16x8_t)(__p0); return __ret; } __ai int16x8_t vreinterpretq_s16_s8(int8x16_t __p0) { int16x8_t __ret; __ret = (int16x8_t)(__p0); return __ret; } __ai int16x8_t vreinterpretq_s16_f64(float64x2_t __p0) { int16x8_t __ret; __ret = (int16x8_t)(__p0); return __ret; } __ai int16x8_t vreinterpretq_s16_f32(float32x4_t __p0) { int16x8_t __ret; __ret = (int16x8_t)(__p0); return __ret; } __ai int16x8_t vreinterpretq_s16_f16(float16x8_t __p0) { int16x8_t __ret; __ret = (int16x8_t)(__p0); return __ret; } __ai int16x8_t vreinterpretq_s16_s32(int32x4_t __p0) { int16x8_t __ret; __ret = (int16x8_t)(__p0); return __ret; } __ai int16x8_t vreinterpretq_s16_s64(int64x2_t __p0) { int16x8_t __ret; __ret = (int16x8_t)(__p0); return __ret; } __ai uint8x8_t vreinterpret_u8_p8(poly8x8_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0); return __ret; } __ai uint8x8_t vreinterpret_u8_p64(poly64x1_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0); return __ret; } __ai uint8x8_t vreinterpret_u8_p16(poly16x4_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0); return __ret; } __ai uint8x8_t vreinterpret_u8_u32(uint32x2_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0); return __ret; } __ai uint8x8_t vreinterpret_u8_u64(uint64x1_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0); return __ret; } __ai uint8x8_t vreinterpret_u8_u16(uint16x4_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0); return __ret; } __ai uint8x8_t vreinterpret_u8_s8(int8x8_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0); return __ret; } __ai uint8x8_t vreinterpret_u8_f64(float64x1_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0); return __ret; } __ai uint8x8_t vreinterpret_u8_f32(float32x2_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0); return __ret; } __ai uint8x8_t vreinterpret_u8_f16(float16x4_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0); return __ret; } __ai uint8x8_t vreinterpret_u8_s32(int32x2_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0); return __ret; } __ai uint8x8_t vreinterpret_u8_s64(int64x1_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0); return __ret; } __ai uint8x8_t vreinterpret_u8_s16(int16x4_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0); return __ret; } __ai uint32x2_t vreinterpret_u32_p8(poly8x8_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0); return __ret; } __ai uint32x2_t vreinterpret_u32_p64(poly64x1_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0); return __ret; } __ai uint32x2_t vreinterpret_u32_p16(poly16x4_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0); return __ret; } __ai uint32x2_t vreinterpret_u32_u8(uint8x8_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0); return __ret; } __ai uint32x2_t vreinterpret_u32_u64(uint64x1_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0); return __ret; } __ai uint32x2_t vreinterpret_u32_u16(uint16x4_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0); return __ret; } __ai uint32x2_t vreinterpret_u32_s8(int8x8_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0); return __ret; } __ai uint32x2_t vreinterpret_u32_f64(float64x1_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0); return __ret; } __ai uint32x2_t vreinterpret_u32_f32(float32x2_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0); return __ret; } __ai uint32x2_t vreinterpret_u32_f16(float16x4_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0); return __ret; } __ai uint32x2_t vreinterpret_u32_s32(int32x2_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0); return __ret; } __ai uint32x2_t vreinterpret_u32_s64(int64x1_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0); return __ret; } __ai uint32x2_t vreinterpret_u32_s16(int16x4_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0); return __ret; } __ai uint64x1_t vreinterpret_u64_p8(poly8x8_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0); return __ret; } __ai uint64x1_t vreinterpret_u64_p64(poly64x1_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0); return __ret; } __ai uint64x1_t vreinterpret_u64_p16(poly16x4_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0); return __ret; } __ai uint64x1_t vreinterpret_u64_u8(uint8x8_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0); return __ret; } __ai uint64x1_t vreinterpret_u64_u32(uint32x2_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0); return __ret; } __ai uint64x1_t vreinterpret_u64_u16(uint16x4_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0); return __ret; } __ai uint64x1_t vreinterpret_u64_s8(int8x8_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0); return __ret; } __ai uint64x1_t vreinterpret_u64_f64(float64x1_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0); return __ret; } __ai uint64x1_t vreinterpret_u64_f32(float32x2_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0); return __ret; } __ai uint64x1_t vreinterpret_u64_f16(float16x4_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0); return __ret; } __ai uint64x1_t vreinterpret_u64_s32(int32x2_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0); return __ret; } __ai uint64x1_t vreinterpret_u64_s64(int64x1_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0); return __ret; } __ai uint64x1_t vreinterpret_u64_s16(int16x4_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0); return __ret; } __ai uint16x4_t vreinterpret_u16_p8(poly8x8_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0); return __ret; } __ai uint16x4_t vreinterpret_u16_p64(poly64x1_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0); return __ret; } __ai uint16x4_t vreinterpret_u16_p16(poly16x4_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0); return __ret; } __ai uint16x4_t vreinterpret_u16_u8(uint8x8_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0); return __ret; } __ai uint16x4_t vreinterpret_u16_u32(uint32x2_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0); return __ret; } __ai uint16x4_t vreinterpret_u16_u64(uint64x1_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0); return __ret; } __ai uint16x4_t vreinterpret_u16_s8(int8x8_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0); return __ret; } __ai uint16x4_t vreinterpret_u16_f64(float64x1_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0); return __ret; } __ai uint16x4_t vreinterpret_u16_f32(float32x2_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0); return __ret; } __ai uint16x4_t vreinterpret_u16_f16(float16x4_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0); return __ret; } __ai uint16x4_t vreinterpret_u16_s32(int32x2_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0); return __ret; } __ai uint16x4_t vreinterpret_u16_s64(int64x1_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0); return __ret; } __ai uint16x4_t vreinterpret_u16_s16(int16x4_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0); return __ret; } __ai int8x8_t vreinterpret_s8_p8(poly8x8_t __p0) { int8x8_t __ret; __ret = (int8x8_t)(__p0); return __ret; } __ai int8x8_t vreinterpret_s8_p64(poly64x1_t __p0) { int8x8_t __ret; __ret = (int8x8_t)(__p0); return __ret; } __ai int8x8_t vreinterpret_s8_p16(poly16x4_t __p0) { int8x8_t __ret; __ret = (int8x8_t)(__p0); return __ret; } __ai int8x8_t vreinterpret_s8_u8(uint8x8_t __p0) { int8x8_t __ret; __ret = (int8x8_t)(__p0); return __ret; } __ai int8x8_t vreinterpret_s8_u32(uint32x2_t __p0) { int8x8_t __ret; __ret = (int8x8_t)(__p0); return __ret; } __ai int8x8_t vreinterpret_s8_u64(uint64x1_t __p0) { int8x8_t __ret; __ret = (int8x8_t)(__p0); return __ret; } __ai int8x8_t vreinterpret_s8_u16(uint16x4_t __p0) { int8x8_t __ret; __ret = (int8x8_t)(__p0); return __ret; } __ai int8x8_t vreinterpret_s8_f64(float64x1_t __p0) { int8x8_t __ret; __ret = (int8x8_t)(__p0); return __ret; } __ai int8x8_t vreinterpret_s8_f32(float32x2_t __p0) { int8x8_t __ret; __ret = (int8x8_t)(__p0); return __ret; } __ai int8x8_t vreinterpret_s8_f16(float16x4_t __p0) { int8x8_t __ret; __ret = (int8x8_t)(__p0); return __ret; } __ai int8x8_t vreinterpret_s8_s32(int32x2_t __p0) { int8x8_t __ret; __ret = (int8x8_t)(__p0); return __ret; } __ai int8x8_t vreinterpret_s8_s64(int64x1_t __p0) { int8x8_t __ret; __ret = (int8x8_t)(__p0); return __ret; } __ai int8x8_t vreinterpret_s8_s16(int16x4_t __p0) { int8x8_t __ret; __ret = (int8x8_t)(__p0); return __ret; } __ai float64x1_t vreinterpret_f64_p8(poly8x8_t __p0) { float64x1_t __ret; __ret = (float64x1_t)(__p0); return __ret; } __ai float64x1_t vreinterpret_f64_p64(poly64x1_t __p0) { float64x1_t __ret; __ret = (float64x1_t)(__p0); return __ret; } __ai float64x1_t vreinterpret_f64_p16(poly16x4_t __p0) { float64x1_t __ret; __ret = (float64x1_t)(__p0); return __ret; } __ai float64x1_t vreinterpret_f64_u8(uint8x8_t __p0) { float64x1_t __ret; __ret = (float64x1_t)(__p0); return __ret; } __ai float64x1_t vreinterpret_f64_u32(uint32x2_t __p0) { float64x1_t __ret; __ret = (float64x1_t)(__p0); return __ret; } __ai float64x1_t vreinterpret_f64_u64(uint64x1_t __p0) { float64x1_t __ret; __ret = (float64x1_t)(__p0); return __ret; } __ai float64x1_t vreinterpret_f64_u16(uint16x4_t __p0) { float64x1_t __ret; __ret = (float64x1_t)(__p0); return __ret; } __ai float64x1_t vreinterpret_f64_s8(int8x8_t __p0) { float64x1_t __ret; __ret = (float64x1_t)(__p0); return __ret; } __ai float64x1_t vreinterpret_f64_f32(float32x2_t __p0) { float64x1_t __ret; __ret = (float64x1_t)(__p0); return __ret; } __ai float64x1_t vreinterpret_f64_f16(float16x4_t __p0) { float64x1_t __ret; __ret = (float64x1_t)(__p0); return __ret; } __ai float64x1_t vreinterpret_f64_s32(int32x2_t __p0) { float64x1_t __ret; __ret = (float64x1_t)(__p0); return __ret; } __ai float64x1_t vreinterpret_f64_s64(int64x1_t __p0) { float64x1_t __ret; __ret = (float64x1_t)(__p0); return __ret; } __ai float64x1_t vreinterpret_f64_s16(int16x4_t __p0) { float64x1_t __ret; __ret = (float64x1_t)(__p0); return __ret; } __ai float32x2_t vreinterpret_f32_p8(poly8x8_t __p0) { float32x2_t __ret; __ret = (float32x2_t)(__p0); return __ret; } __ai float32x2_t vreinterpret_f32_p64(poly64x1_t __p0) { float32x2_t __ret; __ret = (float32x2_t)(__p0); return __ret; } __ai float32x2_t vreinterpret_f32_p16(poly16x4_t __p0) { float32x2_t __ret; __ret = (float32x2_t)(__p0); return __ret; } __ai float32x2_t vreinterpret_f32_u8(uint8x8_t __p0) { float32x2_t __ret; __ret = (float32x2_t)(__p0); return __ret; } __ai float32x2_t vreinterpret_f32_u32(uint32x2_t __p0) { float32x2_t __ret; __ret = (float32x2_t)(__p0); return __ret; } __ai float32x2_t vreinterpret_f32_u64(uint64x1_t __p0) { float32x2_t __ret; __ret = (float32x2_t)(__p0); return __ret; } __ai float32x2_t vreinterpret_f32_u16(uint16x4_t __p0) { float32x2_t __ret; __ret = (float32x2_t)(__p0); return __ret; } __ai float32x2_t vreinterpret_f32_s8(int8x8_t __p0) { float32x2_t __ret; __ret = (float32x2_t)(__p0); return __ret; } __ai float32x2_t vreinterpret_f32_f64(float64x1_t __p0) { float32x2_t __ret; __ret = (float32x2_t)(__p0); return __ret; } __ai float32x2_t vreinterpret_f32_f16(float16x4_t __p0) { float32x2_t __ret; __ret = (float32x2_t)(__p0); return __ret; } __ai float32x2_t vreinterpret_f32_s32(int32x2_t __p0) { float32x2_t __ret; __ret = (float32x2_t)(__p0); return __ret; } __ai float32x2_t vreinterpret_f32_s64(int64x1_t __p0) { float32x2_t __ret; __ret = (float32x2_t)(__p0); return __ret; } __ai float32x2_t vreinterpret_f32_s16(int16x4_t __p0) { float32x2_t __ret; __ret = (float32x2_t)(__p0); return __ret; } __ai float16x4_t vreinterpret_f16_p8(poly8x8_t __p0) { float16x4_t __ret; __ret = (float16x4_t)(__p0); return __ret; } __ai float16x4_t vreinterpret_f16_p64(poly64x1_t __p0) { float16x4_t __ret; __ret = (float16x4_t)(__p0); return __ret; } __ai float16x4_t vreinterpret_f16_p16(poly16x4_t __p0) { float16x4_t __ret; __ret = (float16x4_t)(__p0); return __ret; } __ai float16x4_t vreinterpret_f16_u8(uint8x8_t __p0) { float16x4_t __ret; __ret = (float16x4_t)(__p0); return __ret; } __ai float16x4_t vreinterpret_f16_u32(uint32x2_t __p0) { float16x4_t __ret; __ret = (float16x4_t)(__p0); return __ret; } __ai float16x4_t vreinterpret_f16_u64(uint64x1_t __p0) { float16x4_t __ret; __ret = (float16x4_t)(__p0); return __ret; } __ai float16x4_t vreinterpret_f16_u16(uint16x4_t __p0) { float16x4_t __ret; __ret = (float16x4_t)(__p0); return __ret; } __ai float16x4_t vreinterpret_f16_s8(int8x8_t __p0) { float16x4_t __ret; __ret = (float16x4_t)(__p0); return __ret; } __ai float16x4_t vreinterpret_f16_f64(float64x1_t __p0) { float16x4_t __ret; __ret = (float16x4_t)(__p0); return __ret; } __ai float16x4_t vreinterpret_f16_f32(float32x2_t __p0) { float16x4_t __ret; __ret = (float16x4_t)(__p0); return __ret; } __ai float16x4_t vreinterpret_f16_s32(int32x2_t __p0) { float16x4_t __ret; __ret = (float16x4_t)(__p0); return __ret; } __ai float16x4_t vreinterpret_f16_s64(int64x1_t __p0) { float16x4_t __ret; __ret = (float16x4_t)(__p0); return __ret; } __ai float16x4_t vreinterpret_f16_s16(int16x4_t __p0) { float16x4_t __ret; __ret = (float16x4_t)(__p0); return __ret; } __ai int32x2_t vreinterpret_s32_p8(poly8x8_t __p0) { int32x2_t __ret; __ret = (int32x2_t)(__p0); return __ret; } __ai int32x2_t vreinterpret_s32_p64(poly64x1_t __p0) { int32x2_t __ret; __ret = (int32x2_t)(__p0); return __ret; } __ai int32x2_t vreinterpret_s32_p16(poly16x4_t __p0) { int32x2_t __ret; __ret = (int32x2_t)(__p0); return __ret; } __ai int32x2_t vreinterpret_s32_u8(uint8x8_t __p0) { int32x2_t __ret; __ret = (int32x2_t)(__p0); return __ret; } __ai int32x2_t vreinterpret_s32_u32(uint32x2_t __p0) { int32x2_t __ret; __ret = (int32x2_t)(__p0); return __ret; } __ai int32x2_t vreinterpret_s32_u64(uint64x1_t __p0) { int32x2_t __ret; __ret = (int32x2_t)(__p0); return __ret; } __ai int32x2_t vreinterpret_s32_u16(uint16x4_t __p0) { int32x2_t __ret; __ret = (int32x2_t)(__p0); return __ret; } __ai int32x2_t vreinterpret_s32_s8(int8x8_t __p0) { int32x2_t __ret; __ret = (int32x2_t)(__p0); return __ret; } __ai int32x2_t vreinterpret_s32_f64(float64x1_t __p0) { int32x2_t __ret; __ret = (int32x2_t)(__p0); return __ret; } __ai int32x2_t vreinterpret_s32_f32(float32x2_t __p0) { int32x2_t __ret; __ret = (int32x2_t)(__p0); return __ret; } __ai int32x2_t vreinterpret_s32_f16(float16x4_t __p0) { int32x2_t __ret; __ret = (int32x2_t)(__p0); return __ret; } __ai int32x2_t vreinterpret_s32_s64(int64x1_t __p0) { int32x2_t __ret; __ret = (int32x2_t)(__p0); return __ret; } __ai int32x2_t vreinterpret_s32_s16(int16x4_t __p0) { int32x2_t __ret; __ret = (int32x2_t)(__p0); return __ret; } __ai int64x1_t vreinterpret_s64_p8(poly8x8_t __p0) { int64x1_t __ret; __ret = (int64x1_t)(__p0); return __ret; } __ai int64x1_t vreinterpret_s64_p64(poly64x1_t __p0) { int64x1_t __ret; __ret = (int64x1_t)(__p0); return __ret; } __ai int64x1_t vreinterpret_s64_p16(poly16x4_t __p0) { int64x1_t __ret; __ret = (int64x1_t)(__p0); return __ret; } __ai int64x1_t vreinterpret_s64_u8(uint8x8_t __p0) { int64x1_t __ret; __ret = (int64x1_t)(__p0); return __ret; } __ai int64x1_t vreinterpret_s64_u32(uint32x2_t __p0) { int64x1_t __ret; __ret = (int64x1_t)(__p0); return __ret; } __ai int64x1_t vreinterpret_s64_u64(uint64x1_t __p0) { int64x1_t __ret; __ret = (int64x1_t)(__p0); return __ret; } __ai int64x1_t vreinterpret_s64_u16(uint16x4_t __p0) { int64x1_t __ret; __ret = (int64x1_t)(__p0); return __ret; } __ai int64x1_t vreinterpret_s64_s8(int8x8_t __p0) { int64x1_t __ret; __ret = (int64x1_t)(__p0); return __ret; } __ai int64x1_t vreinterpret_s64_f64(float64x1_t __p0) { int64x1_t __ret; __ret = (int64x1_t)(__p0); return __ret; } __ai int64x1_t vreinterpret_s64_f32(float32x2_t __p0) { int64x1_t __ret; __ret = (int64x1_t)(__p0); return __ret; } __ai int64x1_t vreinterpret_s64_f16(float16x4_t __p0) { int64x1_t __ret; __ret = (int64x1_t)(__p0); return __ret; } __ai int64x1_t vreinterpret_s64_s32(int32x2_t __p0) { int64x1_t __ret; __ret = (int64x1_t)(__p0); return __ret; } __ai int64x1_t vreinterpret_s64_s16(int16x4_t __p0) { int64x1_t __ret; __ret = (int64x1_t)(__p0); return __ret; } __ai int16x4_t vreinterpret_s16_p8(poly8x8_t __p0) { int16x4_t __ret; __ret = (int16x4_t)(__p0); return __ret; } __ai int16x4_t vreinterpret_s16_p64(poly64x1_t __p0) { int16x4_t __ret; __ret = (int16x4_t)(__p0); return __ret; } __ai int16x4_t vreinterpret_s16_p16(poly16x4_t __p0) { int16x4_t __ret; __ret = (int16x4_t)(__p0); return __ret; } __ai int16x4_t vreinterpret_s16_u8(uint8x8_t __p0) { int16x4_t __ret; __ret = (int16x4_t)(__p0); return __ret; } __ai int16x4_t vreinterpret_s16_u32(uint32x2_t __p0) { int16x4_t __ret; __ret = (int16x4_t)(__p0); return __ret; } __ai int16x4_t vreinterpret_s16_u64(uint64x1_t __p0) { int16x4_t __ret; __ret = (int16x4_t)(__p0); return __ret; } __ai int16x4_t vreinterpret_s16_u16(uint16x4_t __p0) { int16x4_t __ret; __ret = (int16x4_t)(__p0); return __ret; } __ai int16x4_t vreinterpret_s16_s8(int8x8_t __p0) { int16x4_t __ret; __ret = (int16x4_t)(__p0); return __ret; } __ai int16x4_t vreinterpret_s16_f64(float64x1_t __p0) { int16x4_t __ret; __ret = (int16x4_t)(__p0); return __ret; } __ai int16x4_t vreinterpret_s16_f32(float32x2_t __p0) { int16x4_t __ret; __ret = (int16x4_t)(__p0); return __ret; } __ai int16x4_t vreinterpret_s16_f16(float16x4_t __p0) { int16x4_t __ret; __ret = (int16x4_t)(__p0); return __ret; } __ai int16x4_t vreinterpret_s16_s32(int32x2_t __p0) { int16x4_t __ret; __ret = (int16x4_t)(__p0); return __ret; } __ai int16x4_t vreinterpret_s16_s64(int64x1_t __p0) { int16x4_t __ret; __ret = (int16x4_t)(__p0); return __ret; } __ai uint64_t vrshld_u64(uint64_t __p0, int64_t __p1) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vrshld_u64(__p0, __p1); return __ret; } __ai int64_t vrshld_s64(int64_t __p0, int64_t __p1) { int64_t __ret; __ret = (int64_t) __builtin_neon_vrshld_s64(__p0, __p1); return __ret; } #define vrshrd_n_u64(__p0, __p1) __extension__ ({ \ uint64_t __ret; \ uint64_t __s0 = __p0; \ __ret = (uint64_t) __builtin_neon_vrshrd_n_u64(__s0, __p1); \ __ret; \ }) #define vrshrd_n_s64(__p0, __p1) __extension__ ({ \ int64_t __ret; \ int64_t __s0 = __p0; \ __ret = (int64_t) __builtin_neon_vrshrd_n_s64(__s0, __p1); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vrshrn_high_n_u32(__p0_703, __p1_703, __p2_703) __extension__ ({ \ uint16x8_t __ret_703; \ uint16x4_t __s0_703 = __p0_703; \ uint32x4_t __s1_703 = __p1_703; \ __ret_703 = (uint16x8_t)(vcombine_u16((uint16x4_t)(__s0_703), (uint16x4_t)(vrshrn_n_u32(__s1_703, __p2_703)))); \ __ret_703; \ }) #else #define vrshrn_high_n_u32(__p0_704, __p1_704, __p2_704) __extension__ ({ \ uint16x8_t __ret_704; \ uint16x4_t __s0_704 = __p0_704; \ uint32x4_t __s1_704 = __p1_704; \ uint16x4_t __rev0_704; __rev0_704 = __builtin_shufflevector(__s0_704, __s0_704, 3, 2, 1, 0); \ uint32x4_t __rev1_704; __rev1_704 = __builtin_shufflevector(__s1_704, __s1_704, 3, 2, 1, 0); \ __ret_704 = (uint16x8_t)(__noswap_vcombine_u16((uint16x4_t)(__rev0_704), (uint16x4_t)(__noswap_vrshrn_n_u32(__rev1_704, __p2_704)))); \ __ret_704 = __builtin_shufflevector(__ret_704, __ret_704, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_704; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vrshrn_high_n_u64(__p0_705, __p1_705, __p2_705) __extension__ ({ \ uint32x4_t __ret_705; \ uint32x2_t __s0_705 = __p0_705; \ uint64x2_t __s1_705 = __p1_705; \ __ret_705 = (uint32x4_t)(vcombine_u32((uint32x2_t)(__s0_705), (uint32x2_t)(vrshrn_n_u64(__s1_705, __p2_705)))); \ __ret_705; \ }) #else #define vrshrn_high_n_u64(__p0_706, __p1_706, __p2_706) __extension__ ({ \ uint32x4_t __ret_706; \ uint32x2_t __s0_706 = __p0_706; \ uint64x2_t __s1_706 = __p1_706; \ uint32x2_t __rev0_706; __rev0_706 = __builtin_shufflevector(__s0_706, __s0_706, 1, 0); \ uint64x2_t __rev1_706; __rev1_706 = __builtin_shufflevector(__s1_706, __s1_706, 1, 0); \ __ret_706 = (uint32x4_t)(__noswap_vcombine_u32((uint32x2_t)(__rev0_706), (uint32x2_t)(__noswap_vrshrn_n_u64(__rev1_706, __p2_706)))); \ __ret_706 = __builtin_shufflevector(__ret_706, __ret_706, 3, 2, 1, 0); \ __ret_706; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vrshrn_high_n_u16(__p0_707, __p1_707, __p2_707) __extension__ ({ \ uint8x16_t __ret_707; \ uint8x8_t __s0_707 = __p0_707; \ uint16x8_t __s1_707 = __p1_707; \ __ret_707 = (uint8x16_t)(vcombine_u8((uint8x8_t)(__s0_707), (uint8x8_t)(vrshrn_n_u16(__s1_707, __p2_707)))); \ __ret_707; \ }) #else #define vrshrn_high_n_u16(__p0_708, __p1_708, __p2_708) __extension__ ({ \ uint8x16_t __ret_708; \ uint8x8_t __s0_708 = __p0_708; \ uint16x8_t __s1_708 = __p1_708; \ uint8x8_t __rev0_708; __rev0_708 = __builtin_shufflevector(__s0_708, __s0_708, 7, 6, 5, 4, 3, 2, 1, 0); \ uint16x8_t __rev1_708; __rev1_708 = __builtin_shufflevector(__s1_708, __s1_708, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_708 = (uint8x16_t)(__noswap_vcombine_u8((uint8x8_t)(__rev0_708), (uint8x8_t)(__noswap_vrshrn_n_u16(__rev1_708, __p2_708)))); \ __ret_708 = __builtin_shufflevector(__ret_708, __ret_708, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_708; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vrshrn_high_n_s32(__p0_709, __p1_709, __p2_709) __extension__ ({ \ int16x8_t __ret_709; \ int16x4_t __s0_709 = __p0_709; \ int32x4_t __s1_709 = __p1_709; \ __ret_709 = (int16x8_t)(vcombine_s16((int16x4_t)(__s0_709), (int16x4_t)(vrshrn_n_s32(__s1_709, __p2_709)))); \ __ret_709; \ }) #else #define vrshrn_high_n_s32(__p0_710, __p1_710, __p2_710) __extension__ ({ \ int16x8_t __ret_710; \ int16x4_t __s0_710 = __p0_710; \ int32x4_t __s1_710 = __p1_710; \ int16x4_t __rev0_710; __rev0_710 = __builtin_shufflevector(__s0_710, __s0_710, 3, 2, 1, 0); \ int32x4_t __rev1_710; __rev1_710 = __builtin_shufflevector(__s1_710, __s1_710, 3, 2, 1, 0); \ __ret_710 = (int16x8_t)(__noswap_vcombine_s16((int16x4_t)(__rev0_710), (int16x4_t)(__noswap_vrshrn_n_s32(__rev1_710, __p2_710)))); \ __ret_710 = __builtin_shufflevector(__ret_710, __ret_710, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_710; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vrshrn_high_n_s64(__p0_711, __p1_711, __p2_711) __extension__ ({ \ int32x4_t __ret_711; \ int32x2_t __s0_711 = __p0_711; \ int64x2_t __s1_711 = __p1_711; \ __ret_711 = (int32x4_t)(vcombine_s32((int32x2_t)(__s0_711), (int32x2_t)(vrshrn_n_s64(__s1_711, __p2_711)))); \ __ret_711; \ }) #else #define vrshrn_high_n_s64(__p0_712, __p1_712, __p2_712) __extension__ ({ \ int32x4_t __ret_712; \ int32x2_t __s0_712 = __p0_712; \ int64x2_t __s1_712 = __p1_712; \ int32x2_t __rev0_712; __rev0_712 = __builtin_shufflevector(__s0_712, __s0_712, 1, 0); \ int64x2_t __rev1_712; __rev1_712 = __builtin_shufflevector(__s1_712, __s1_712, 1, 0); \ __ret_712 = (int32x4_t)(__noswap_vcombine_s32((int32x2_t)(__rev0_712), (int32x2_t)(__noswap_vrshrn_n_s64(__rev1_712, __p2_712)))); \ __ret_712 = __builtin_shufflevector(__ret_712, __ret_712, 3, 2, 1, 0); \ __ret_712; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vrshrn_high_n_s16(__p0_713, __p1_713, __p2_713) __extension__ ({ \ int8x16_t __ret_713; \ int8x8_t __s0_713 = __p0_713; \ int16x8_t __s1_713 = __p1_713; \ __ret_713 = (int8x16_t)(vcombine_s8((int8x8_t)(__s0_713), (int8x8_t)(vrshrn_n_s16(__s1_713, __p2_713)))); \ __ret_713; \ }) #else #define vrshrn_high_n_s16(__p0_714, __p1_714, __p2_714) __extension__ ({ \ int8x16_t __ret_714; \ int8x8_t __s0_714 = __p0_714; \ int16x8_t __s1_714 = __p1_714; \ int8x8_t __rev0_714; __rev0_714 = __builtin_shufflevector(__s0_714, __s0_714, 7, 6, 5, 4, 3, 2, 1, 0); \ int16x8_t __rev1_714; __rev1_714 = __builtin_shufflevector(__s1_714, __s1_714, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_714 = (int8x16_t)(__noswap_vcombine_s8((int8x8_t)(__rev0_714), (int8x8_t)(__noswap_vrshrn_n_s16(__rev1_714, __p2_714)))); \ __ret_714 = __builtin_shufflevector(__ret_714, __ret_714, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_714; \ }) #endif #ifdef __LITTLE_ENDIAN__ __ai float64x2_t vrsqrteq_f64(float64x2_t __p0) { float64x2_t __ret; __ret = (float64x2_t) __builtin_neon_vrsqrteq_v((int8x16_t)__p0, 42); return __ret; } #else __ai float64x2_t vrsqrteq_f64(float64x2_t __p0) { float64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (float64x2_t) __builtin_neon_vrsqrteq_v((int8x16_t)__rev0, 42); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai float64x1_t vrsqrte_f64(float64x1_t __p0) { float64x1_t __ret; __ret = (float64x1_t) __builtin_neon_vrsqrte_v((int8x8_t)__p0, 10); return __ret; } __ai float64_t vrsqrted_f64(float64_t __p0) { float64_t __ret; __ret = (float64_t) __builtin_neon_vrsqrted_f64(__p0); return __ret; } __ai float32_t vrsqrtes_f32(float32_t __p0) { float32_t __ret; __ret = (float32_t) __builtin_neon_vrsqrtes_f32(__p0); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai float64x2_t vrsqrtsq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; __ret = (float64x2_t) __builtin_neon_vrsqrtsq_v((int8x16_t)__p0, (int8x16_t)__p1, 42); return __ret; } #else __ai float64x2_t vrsqrtsq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (float64x2_t) __builtin_neon_vrsqrtsq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 42); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai float64x1_t vrsqrts_f64(float64x1_t __p0, float64x1_t __p1) { float64x1_t __ret; __ret = (float64x1_t) __builtin_neon_vrsqrts_v((int8x8_t)__p0, (int8x8_t)__p1, 10); return __ret; } __ai float64_t vrsqrtsd_f64(float64_t __p0, float64_t __p1) { float64_t __ret; __ret = (float64_t) __builtin_neon_vrsqrtsd_f64(__p0, __p1); return __ret; } __ai float32_t vrsqrtss_f32(float32_t __p0, float32_t __p1) { float32_t __ret; __ret = (float32_t) __builtin_neon_vrsqrtss_f32(__p0, __p1); return __ret; } #define vrsrad_n_u64(__p0, __p1, __p2) __extension__ ({ \ uint64_t __ret; \ uint64_t __s0 = __p0; \ uint64_t __s1 = __p1; \ __ret = (uint64_t) __builtin_neon_vrsrad_n_u64(__s0, __s1, __p2); \ __ret; \ }) #define vrsrad_n_s64(__p0, __p1, __p2) __extension__ ({ \ int64_t __ret; \ int64_t __s0 = __p0; \ int64_t __s1 = __p1; \ __ret = (int64_t) __builtin_neon_vrsrad_n_s64(__s0, __s1, __p2); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vrsubhn_high_u32(uint16x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint16x8_t __ret; __ret = vcombine_u16(__p0, vrsubhn_u32(__p1, __p2)); return __ret; } #else __ai uint16x8_t vrsubhn_high_u32(uint16x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint16x8_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = __noswap_vcombine_u16(__rev0, __noswap_vrsubhn_u32(__rev1, __rev2)); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vrsubhn_high_u64(uint32x2_t __p0, uint64x2_t __p1, uint64x2_t __p2) { uint32x4_t __ret; __ret = vcombine_u32(__p0, vrsubhn_u64(__p1, __p2)); return __ret; } #else __ai uint32x4_t vrsubhn_high_u64(uint32x2_t __p0, uint64x2_t __p1, uint64x2_t __p2) { uint32x4_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); uint64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); __ret = __noswap_vcombine_u32(__rev0, __noswap_vrsubhn_u64(__rev1, __rev2)); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vrsubhn_high_u16(uint8x8_t __p0, uint16x8_t __p1, uint16x8_t __p2) { uint8x16_t __ret; __ret = vcombine_u8(__p0, vrsubhn_u16(__p1, __p2)); return __ret; } #else __ai uint8x16_t vrsubhn_high_u16(uint8x8_t __p0, uint16x8_t __p1, uint16x8_t __p2) { uint8x16_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __noswap_vcombine_u8(__rev0, __noswap_vrsubhn_u16(__rev1, __rev2)); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vrsubhn_high_s32(int16x4_t __p0, int32x4_t __p1, int32x4_t __p2) { int16x8_t __ret; __ret = vcombine_s16(__p0, vrsubhn_s32(__p1, __p2)); return __ret; } #else __ai int16x8_t vrsubhn_high_s32(int16x4_t __p0, int32x4_t __p1, int32x4_t __p2) { int16x8_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); int32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = __noswap_vcombine_s16(__rev0, __noswap_vrsubhn_s32(__rev1, __rev2)); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vrsubhn_high_s64(int32x2_t __p0, int64x2_t __p1, int64x2_t __p2) { int32x4_t __ret; __ret = vcombine_s32(__p0, vrsubhn_s64(__p1, __p2)); return __ret; } #else __ai int32x4_t vrsubhn_high_s64(int32x2_t __p0, int64x2_t __p1, int64x2_t __p2) { int32x4_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); int64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); __ret = __noswap_vcombine_s32(__rev0, __noswap_vrsubhn_s64(__rev1, __rev2)); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x16_t vrsubhn_high_s16(int8x8_t __p0, int16x8_t __p1, int16x8_t __p2) { int8x16_t __ret; __ret = vcombine_s8(__p0, vrsubhn_s16(__p1, __p2)); return __ret; } #else __ai int8x16_t vrsubhn_high_s16(int8x8_t __p0, int16x8_t __p1, int16x8_t __p2) { int8x16_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __noswap_vcombine_s8(__rev0, __noswap_vrsubhn_s16(__rev1, __rev2)); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #define vset_lane_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x1_t __ret; \ poly64_t __s0 = __p0; \ poly64x1_t __s1 = __p1; \ __ret = (poly64x1_t) __builtin_neon_vset_lane_i64(__s0, (poly64x1_t)__s1, __p2); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vsetq_lane_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x2_t __ret; \ poly64_t __s0 = __p0; \ poly64x2_t __s1 = __p1; \ __ret = (poly64x2_t) __builtin_neon_vsetq_lane_i64(__s0, (poly64x2_t)__s1, __p2); \ __ret; \ }) #else #define vsetq_lane_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x2_t __ret; \ poly64_t __s0 = __p0; \ poly64x2_t __s1 = __p1; \ poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ __ret = (poly64x2_t) __builtin_neon_vsetq_lane_i64(__s0, (poly64x2_t)__rev1, __p2); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #define __noswap_vsetq_lane_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x2_t __ret; \ poly64_t __s0 = __p0; \ poly64x2_t __s1 = __p1; \ __ret = (poly64x2_t) __builtin_neon_vsetq_lane_i64(__s0, (poly64x2_t)__s1, __p2); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vsetq_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x2_t __ret; \ float64_t __s0 = __p0; \ float64x2_t __s1 = __p1; \ __ret = (float64x2_t) __builtin_neon_vsetq_lane_f64(__s0, (float64x2_t)__s1, __p2); \ __ret; \ }) #else #define vsetq_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x2_t __ret; \ float64_t __s0 = __p0; \ float64x2_t __s1 = __p1; \ float64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ __ret = (float64x2_t) __builtin_neon_vsetq_lane_f64(__s0, (float64x2_t)__rev1, __p2); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #define __noswap_vsetq_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x2_t __ret; \ float64_t __s0 = __p0; \ float64x2_t __s1 = __p1; \ __ret = (float64x2_t) __builtin_neon_vsetq_lane_f64(__s0, (float64x2_t)__s1, __p2); \ __ret; \ }) #endif #define vset_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x1_t __ret; \ float64_t __s0 = __p0; \ float64x1_t __s1 = __p1; \ __ret = (float64x1_t) __builtin_neon_vset_lane_f64(__s0, (float64x1_t)__s1, __p2); \ __ret; \ }) __ai uint64_t vshld_u64(uint64_t __p0, int64_t __p1) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vshld_u64(__p0, __p1); return __ret; } __ai int64_t vshld_s64(int64_t __p0, int64_t __p1) { int64_t __ret; __ret = (int64_t) __builtin_neon_vshld_s64(__p0, __p1); return __ret; } #define vshld_n_u64(__p0, __p1) __extension__ ({ \ uint64_t __ret; \ uint64_t __s0 = __p0; \ __ret = (uint64_t) __builtin_neon_vshld_n_u64(__s0, __p1); \ __ret; \ }) #define vshld_n_s64(__p0, __p1) __extension__ ({ \ int64_t __ret; \ int64_t __s0 = __p0; \ __ret = (int64_t) __builtin_neon_vshld_n_s64(__s0, __p1); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vshll_high_n_u8(__p0_715, __p1_715) __extension__ ({ \ uint16x8_t __ret_715; \ uint8x16_t __s0_715 = __p0_715; \ __ret_715 = (uint16x8_t)(vshll_n_u8(vget_high_u8(__s0_715), __p1_715)); \ __ret_715; \ }) #else #define vshll_high_n_u8(__p0_716, __p1_716) __extension__ ({ \ uint16x8_t __ret_716; \ uint8x16_t __s0_716 = __p0_716; \ uint8x16_t __rev0_716; __rev0_716 = __builtin_shufflevector(__s0_716, __s0_716, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_716 = (uint16x8_t)(__noswap_vshll_n_u8(__noswap_vget_high_u8(__rev0_716), __p1_716)); \ __ret_716 = __builtin_shufflevector(__ret_716, __ret_716, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_716; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vshll_high_n_u32(__p0_717, __p1_717) __extension__ ({ \ uint64x2_t __ret_717; \ uint32x4_t __s0_717 = __p0_717; \ __ret_717 = (uint64x2_t)(vshll_n_u32(vget_high_u32(__s0_717), __p1_717)); \ __ret_717; \ }) #else #define vshll_high_n_u32(__p0_718, __p1_718) __extension__ ({ \ uint64x2_t __ret_718; \ uint32x4_t __s0_718 = __p0_718; \ uint32x4_t __rev0_718; __rev0_718 = __builtin_shufflevector(__s0_718, __s0_718, 3, 2, 1, 0); \ __ret_718 = (uint64x2_t)(__noswap_vshll_n_u32(__noswap_vget_high_u32(__rev0_718), __p1_718)); \ __ret_718 = __builtin_shufflevector(__ret_718, __ret_718, 1, 0); \ __ret_718; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vshll_high_n_u16(__p0_719, __p1_719) __extension__ ({ \ uint32x4_t __ret_719; \ uint16x8_t __s0_719 = __p0_719; \ __ret_719 = (uint32x4_t)(vshll_n_u16(vget_high_u16(__s0_719), __p1_719)); \ __ret_719; \ }) #else #define vshll_high_n_u16(__p0_720, __p1_720) __extension__ ({ \ uint32x4_t __ret_720; \ uint16x8_t __s0_720 = __p0_720; \ uint16x8_t __rev0_720; __rev0_720 = __builtin_shufflevector(__s0_720, __s0_720, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_720 = (uint32x4_t)(__noswap_vshll_n_u16(__noswap_vget_high_u16(__rev0_720), __p1_720)); \ __ret_720 = __builtin_shufflevector(__ret_720, __ret_720, 3, 2, 1, 0); \ __ret_720; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vshll_high_n_s8(__p0_721, __p1_721) __extension__ ({ \ int16x8_t __ret_721; \ int8x16_t __s0_721 = __p0_721; \ __ret_721 = (int16x8_t)(vshll_n_s8(vget_high_s8(__s0_721), __p1_721)); \ __ret_721; \ }) #else #define vshll_high_n_s8(__p0_722, __p1_722) __extension__ ({ \ int16x8_t __ret_722; \ int8x16_t __s0_722 = __p0_722; \ int8x16_t __rev0_722; __rev0_722 = __builtin_shufflevector(__s0_722, __s0_722, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_722 = (int16x8_t)(__noswap_vshll_n_s8(__noswap_vget_high_s8(__rev0_722), __p1_722)); \ __ret_722 = __builtin_shufflevector(__ret_722, __ret_722, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_722; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vshll_high_n_s32(__p0_723, __p1_723) __extension__ ({ \ int64x2_t __ret_723; \ int32x4_t __s0_723 = __p0_723; \ __ret_723 = (int64x2_t)(vshll_n_s32(vget_high_s32(__s0_723), __p1_723)); \ __ret_723; \ }) #else #define vshll_high_n_s32(__p0_724, __p1_724) __extension__ ({ \ int64x2_t __ret_724; \ int32x4_t __s0_724 = __p0_724; \ int32x4_t __rev0_724; __rev0_724 = __builtin_shufflevector(__s0_724, __s0_724, 3, 2, 1, 0); \ __ret_724 = (int64x2_t)(__noswap_vshll_n_s32(__noswap_vget_high_s32(__rev0_724), __p1_724)); \ __ret_724 = __builtin_shufflevector(__ret_724, __ret_724, 1, 0); \ __ret_724; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vshll_high_n_s16(__p0_725, __p1_725) __extension__ ({ \ int32x4_t __ret_725; \ int16x8_t __s0_725 = __p0_725; \ __ret_725 = (int32x4_t)(vshll_n_s16(vget_high_s16(__s0_725), __p1_725)); \ __ret_725; \ }) #else #define vshll_high_n_s16(__p0_726, __p1_726) __extension__ ({ \ int32x4_t __ret_726; \ int16x8_t __s0_726 = __p0_726; \ int16x8_t __rev0_726; __rev0_726 = __builtin_shufflevector(__s0_726, __s0_726, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_726 = (int32x4_t)(__noswap_vshll_n_s16(__noswap_vget_high_s16(__rev0_726), __p1_726)); \ __ret_726 = __builtin_shufflevector(__ret_726, __ret_726, 3, 2, 1, 0); \ __ret_726; \ }) #endif #define vshrd_n_u64(__p0, __p1) __extension__ ({ \ uint64_t __ret; \ uint64_t __s0 = __p0; \ __ret = (uint64_t) __builtin_neon_vshrd_n_u64(__s0, __p1); \ __ret; \ }) #define vshrd_n_s64(__p0, __p1) __extension__ ({ \ int64_t __ret; \ int64_t __s0 = __p0; \ __ret = (int64_t) __builtin_neon_vshrd_n_s64(__s0, __p1); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vshrn_high_n_u32(__p0_727, __p1_727, __p2_727) __extension__ ({ \ uint16x8_t __ret_727; \ uint16x4_t __s0_727 = __p0_727; \ uint32x4_t __s1_727 = __p1_727; \ __ret_727 = (uint16x8_t)(vcombine_u16((uint16x4_t)(__s0_727), (uint16x4_t)(vshrn_n_u32(__s1_727, __p2_727)))); \ __ret_727; \ }) #else #define vshrn_high_n_u32(__p0_728, __p1_728, __p2_728) __extension__ ({ \ uint16x8_t __ret_728; \ uint16x4_t __s0_728 = __p0_728; \ uint32x4_t __s1_728 = __p1_728; \ uint16x4_t __rev0_728; __rev0_728 = __builtin_shufflevector(__s0_728, __s0_728, 3, 2, 1, 0); \ uint32x4_t __rev1_728; __rev1_728 = __builtin_shufflevector(__s1_728, __s1_728, 3, 2, 1, 0); \ __ret_728 = (uint16x8_t)(__noswap_vcombine_u16((uint16x4_t)(__rev0_728), (uint16x4_t)(__noswap_vshrn_n_u32(__rev1_728, __p2_728)))); \ __ret_728 = __builtin_shufflevector(__ret_728, __ret_728, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_728; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vshrn_high_n_u64(__p0_729, __p1_729, __p2_729) __extension__ ({ \ uint32x4_t __ret_729; \ uint32x2_t __s0_729 = __p0_729; \ uint64x2_t __s1_729 = __p1_729; \ __ret_729 = (uint32x4_t)(vcombine_u32((uint32x2_t)(__s0_729), (uint32x2_t)(vshrn_n_u64(__s1_729, __p2_729)))); \ __ret_729; \ }) #else #define vshrn_high_n_u64(__p0_730, __p1_730, __p2_730) __extension__ ({ \ uint32x4_t __ret_730; \ uint32x2_t __s0_730 = __p0_730; \ uint64x2_t __s1_730 = __p1_730; \ uint32x2_t __rev0_730; __rev0_730 = __builtin_shufflevector(__s0_730, __s0_730, 1, 0); \ uint64x2_t __rev1_730; __rev1_730 = __builtin_shufflevector(__s1_730, __s1_730, 1, 0); \ __ret_730 = (uint32x4_t)(__noswap_vcombine_u32((uint32x2_t)(__rev0_730), (uint32x2_t)(__noswap_vshrn_n_u64(__rev1_730, __p2_730)))); \ __ret_730 = __builtin_shufflevector(__ret_730, __ret_730, 3, 2, 1, 0); \ __ret_730; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vshrn_high_n_u16(__p0_731, __p1_731, __p2_731) __extension__ ({ \ uint8x16_t __ret_731; \ uint8x8_t __s0_731 = __p0_731; \ uint16x8_t __s1_731 = __p1_731; \ __ret_731 = (uint8x16_t)(vcombine_u8((uint8x8_t)(__s0_731), (uint8x8_t)(vshrn_n_u16(__s1_731, __p2_731)))); \ __ret_731; \ }) #else #define vshrn_high_n_u16(__p0_732, __p1_732, __p2_732) __extension__ ({ \ uint8x16_t __ret_732; \ uint8x8_t __s0_732 = __p0_732; \ uint16x8_t __s1_732 = __p1_732; \ uint8x8_t __rev0_732; __rev0_732 = __builtin_shufflevector(__s0_732, __s0_732, 7, 6, 5, 4, 3, 2, 1, 0); \ uint16x8_t __rev1_732; __rev1_732 = __builtin_shufflevector(__s1_732, __s1_732, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_732 = (uint8x16_t)(__noswap_vcombine_u8((uint8x8_t)(__rev0_732), (uint8x8_t)(__noswap_vshrn_n_u16(__rev1_732, __p2_732)))); \ __ret_732 = __builtin_shufflevector(__ret_732, __ret_732, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_732; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vshrn_high_n_s32(__p0_733, __p1_733, __p2_733) __extension__ ({ \ int16x8_t __ret_733; \ int16x4_t __s0_733 = __p0_733; \ int32x4_t __s1_733 = __p1_733; \ __ret_733 = (int16x8_t)(vcombine_s16((int16x4_t)(__s0_733), (int16x4_t)(vshrn_n_s32(__s1_733, __p2_733)))); \ __ret_733; \ }) #else #define vshrn_high_n_s32(__p0_734, __p1_734, __p2_734) __extension__ ({ \ int16x8_t __ret_734; \ int16x4_t __s0_734 = __p0_734; \ int32x4_t __s1_734 = __p1_734; \ int16x4_t __rev0_734; __rev0_734 = __builtin_shufflevector(__s0_734, __s0_734, 3, 2, 1, 0); \ int32x4_t __rev1_734; __rev1_734 = __builtin_shufflevector(__s1_734, __s1_734, 3, 2, 1, 0); \ __ret_734 = (int16x8_t)(__noswap_vcombine_s16((int16x4_t)(__rev0_734), (int16x4_t)(__noswap_vshrn_n_s32(__rev1_734, __p2_734)))); \ __ret_734 = __builtin_shufflevector(__ret_734, __ret_734, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_734; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vshrn_high_n_s64(__p0_735, __p1_735, __p2_735) __extension__ ({ \ int32x4_t __ret_735; \ int32x2_t __s0_735 = __p0_735; \ int64x2_t __s1_735 = __p1_735; \ __ret_735 = (int32x4_t)(vcombine_s32((int32x2_t)(__s0_735), (int32x2_t)(vshrn_n_s64(__s1_735, __p2_735)))); \ __ret_735; \ }) #else #define vshrn_high_n_s64(__p0_736, __p1_736, __p2_736) __extension__ ({ \ int32x4_t __ret_736; \ int32x2_t __s0_736 = __p0_736; \ int64x2_t __s1_736 = __p1_736; \ int32x2_t __rev0_736; __rev0_736 = __builtin_shufflevector(__s0_736, __s0_736, 1, 0); \ int64x2_t __rev1_736; __rev1_736 = __builtin_shufflevector(__s1_736, __s1_736, 1, 0); \ __ret_736 = (int32x4_t)(__noswap_vcombine_s32((int32x2_t)(__rev0_736), (int32x2_t)(__noswap_vshrn_n_s64(__rev1_736, __p2_736)))); \ __ret_736 = __builtin_shufflevector(__ret_736, __ret_736, 3, 2, 1, 0); \ __ret_736; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vshrn_high_n_s16(__p0_737, __p1_737, __p2_737) __extension__ ({ \ int8x16_t __ret_737; \ int8x8_t __s0_737 = __p0_737; \ int16x8_t __s1_737 = __p1_737; \ __ret_737 = (int8x16_t)(vcombine_s8((int8x8_t)(__s0_737), (int8x8_t)(vshrn_n_s16(__s1_737, __p2_737)))); \ __ret_737; \ }) #else #define vshrn_high_n_s16(__p0_738, __p1_738, __p2_738) __extension__ ({ \ int8x16_t __ret_738; \ int8x8_t __s0_738 = __p0_738; \ int16x8_t __s1_738 = __p1_738; \ int8x8_t __rev0_738; __rev0_738 = __builtin_shufflevector(__s0_738, __s0_738, 7, 6, 5, 4, 3, 2, 1, 0); \ int16x8_t __rev1_738; __rev1_738 = __builtin_shufflevector(__s1_738, __s1_738, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_738 = (int8x16_t)(__noswap_vcombine_s8((int8x8_t)(__rev0_738), (int8x8_t)(__noswap_vshrn_n_s16(__rev1_738, __p2_738)))); \ __ret_738 = __builtin_shufflevector(__ret_738, __ret_738, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_738; \ }) #endif #define vslid_n_u64(__p0, __p1, __p2) __extension__ ({ \ uint64_t __ret; \ uint64_t __s0 = __p0; \ uint64_t __s1 = __p1; \ __ret = (uint64_t) __builtin_neon_vslid_n_u64(__s0, __s1, __p2); \ __ret; \ }) #define vslid_n_s64(__p0, __p1, __p2) __extension__ ({ \ int64_t __ret; \ int64_t __s0 = __p0; \ int64_t __s1 = __p1; \ __ret = (int64_t) __builtin_neon_vslid_n_s64(__s0, __s1, __p2); \ __ret; \ }) #define vsli_n_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x1_t __ret; \ poly64x1_t __s0 = __p0; \ poly64x1_t __s1 = __p1; \ __ret = (poly64x1_t) __builtin_neon_vsli_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 6); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vsliq_n_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x2_t __ret; \ poly64x2_t __s0 = __p0; \ poly64x2_t __s1 = __p1; \ __ret = (poly64x2_t) __builtin_neon_vsliq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 38); \ __ret; \ }) #else #define vsliq_n_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x2_t __ret; \ poly64x2_t __s0 = __p0; \ poly64x2_t __s1 = __p1; \ poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ __ret = (poly64x2_t) __builtin_neon_vsliq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 38); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif __ai uint8_t vsqaddb_u8(uint8_t __p0, int8_t __p1) { uint8_t __ret; __ret = (uint8_t) __builtin_neon_vsqaddb_u8(__p0, __p1); return __ret; } __ai uint32_t vsqadds_u32(uint32_t __p0, int32_t __p1) { uint32_t __ret; __ret = (uint32_t) __builtin_neon_vsqadds_u32(__p0, __p1); return __ret; } __ai uint64_t vsqaddd_u64(uint64_t __p0, int64_t __p1) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vsqaddd_u64(__p0, __p1); return __ret; } __ai uint16_t vsqaddh_u16(uint16_t __p0, int16_t __p1) { uint16_t __ret; __ret = (uint16_t) __builtin_neon_vsqaddh_u16(__p0, __p1); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vsqaddq_u8(uint8x16_t __p0, int8x16_t __p1) { uint8x16_t __ret; __ret = (uint8x16_t) __builtin_neon_vsqaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); return __ret; } #else __ai uint8x16_t vsqaddq_u8(uint8x16_t __p0, int8x16_t __p1) { uint8x16_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x16_t) __builtin_neon_vsqaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 48); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vsqaddq_u32(uint32x4_t __p0, int32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vsqaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); return __ret; } #else __ai uint32x4_t vsqaddq_u32(uint32x4_t __p0, int32x4_t __p1) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint32x4_t) __builtin_neon_vsqaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vsqaddq_u64(uint64x2_t __p0, int64x2_t __p1) { uint64x2_t __ret; __ret = (uint64x2_t) __builtin_neon_vsqaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 51); return __ret; } #else __ai uint64x2_t vsqaddq_u64(uint64x2_t __p0, int64x2_t __p1) { uint64x2_t __ret; uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint64x2_t) __builtin_neon_vsqaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 51); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vsqaddq_u16(uint16x8_t __p0, int16x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vsqaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); return __ret; } #else __ai uint16x8_t vsqaddq_u16(uint16x8_t __p0, int16x8_t __p1) { uint16x8_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint16x8_t) __builtin_neon_vsqaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 49); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vsqadd_u8(uint8x8_t __p0, int8x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vsqadd_v((int8x8_t)__p0, (int8x8_t)__p1, 16); return __ret; } #else __ai uint8x8_t vsqadd_u8(uint8x8_t __p0, int8x8_t __p1) { uint8x8_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x8_t) __builtin_neon_vsqadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 16); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vsqadd_u32(uint32x2_t __p0, int32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vsqadd_v((int8x8_t)__p0, (int8x8_t)__p1, 18); return __ret; } #else __ai uint32x2_t vsqadd_u32(uint32x2_t __p0, int32x2_t __p1) { uint32x2_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint32x2_t) __builtin_neon_vsqadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 18); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai uint64x1_t vsqadd_u64(uint64x1_t __p0, int64x1_t __p1) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vsqadd_v((int8x8_t)__p0, (int8x8_t)__p1, 19); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vsqadd_u16(uint16x4_t __p0, int16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vsqadd_v((int8x8_t)__p0, (int8x8_t)__p1, 17); return __ret; } #else __ai uint16x4_t vsqadd_u16(uint16x4_t __p0, int16x4_t __p1) { uint16x4_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint16x4_t) __builtin_neon_vsqadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 17); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float64x2_t vsqrtq_f64(float64x2_t __p0) { float64x2_t __ret; __ret = (float64x2_t) __builtin_neon_vsqrtq_v((int8x16_t)__p0, 42); return __ret; } #else __ai float64x2_t vsqrtq_f64(float64x2_t __p0) { float64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (float64x2_t) __builtin_neon_vsqrtq_v((int8x16_t)__rev0, 42); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x4_t vsqrtq_f32(float32x4_t __p0) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vsqrtq_v((int8x16_t)__p0, 41); return __ret; } #else __ai float32x4_t vsqrtq_f32(float32x4_t __p0) { float32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (float32x4_t) __builtin_neon_vsqrtq_v((int8x16_t)__rev0, 41); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif __ai float64x1_t vsqrt_f64(float64x1_t __p0) { float64x1_t __ret; __ret = (float64x1_t) __builtin_neon_vsqrt_v((int8x8_t)__p0, 10); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai float32x2_t vsqrt_f32(float32x2_t __p0) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vsqrt_v((int8x8_t)__p0, 9); return __ret; } #else __ai float32x2_t vsqrt_f32(float32x2_t __p0) { float32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (float32x2_t) __builtin_neon_vsqrt_v((int8x8_t)__rev0, 9); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #define vsrad_n_u64(__p0, __p1, __p2) __extension__ ({ \ uint64_t __ret; \ uint64_t __s0 = __p0; \ uint64_t __s1 = __p1; \ __ret = (uint64_t) __builtin_neon_vsrad_n_u64(__s0, __s1, __p2); \ __ret; \ }) #define vsrad_n_s64(__p0, __p1, __p2) __extension__ ({ \ int64_t __ret; \ int64_t __s0 = __p0; \ int64_t __s1 = __p1; \ __ret = (int64_t) __builtin_neon_vsrad_n_s64(__s0, __s1, __p2); \ __ret; \ }) #define vsrid_n_u64(__p0, __p1, __p2) __extension__ ({ \ uint64_t __ret; \ uint64_t __s0 = __p0; \ uint64_t __s1 = __p1; \ __ret = (uint64_t) __builtin_neon_vsrid_n_u64(__s0, __s1, __p2); \ __ret; \ }) #define vsrid_n_s64(__p0, __p1, __p2) __extension__ ({ \ int64_t __ret; \ int64_t __s0 = __p0; \ int64_t __s1 = __p1; \ __ret = (int64_t) __builtin_neon_vsrid_n_s64(__s0, __s1, __p2); \ __ret; \ }) #define vsri_n_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x1_t __ret; \ poly64x1_t __s0 = __p0; \ poly64x1_t __s1 = __p1; \ __ret = (poly64x1_t) __builtin_neon_vsri_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 6); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vsriq_n_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x2_t __ret; \ poly64x2_t __s0 = __p0; \ poly64x2_t __s1 = __p1; \ __ret = (poly64x2_t) __builtin_neon_vsriq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 38); \ __ret; \ }) #else #define vsriq_n_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x2_t __ret; \ poly64x2_t __s0 = __p0; \ poly64x2_t __s1 = __p1; \ poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ __ret = (poly64x2_t) __builtin_neon_vsriq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 38); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #define vst1_p64(__p0, __p1) __extension__ ({ \ poly64x1_t __s1 = __p1; \ __builtin_neon_vst1_v(__p0, (int8x8_t)__s1, 6); \ }) #ifdef __LITTLE_ENDIAN__ #define vst1q_p64(__p0, __p1) __extension__ ({ \ poly64x2_t __s1 = __p1; \ __builtin_neon_vst1q_v(__p0, (int8x16_t)__s1, 38); \ }) #else #define vst1q_p64(__p0, __p1) __extension__ ({ \ poly64x2_t __s1 = __p1; \ poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ __builtin_neon_vst1q_v(__p0, (int8x16_t)__rev1, 38); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_f64(__p0, __p1) __extension__ ({ \ float64x2_t __s1 = __p1; \ __builtin_neon_vst1q_v(__p0, (int8x16_t)__s1, 42); \ }) #else #define vst1q_f64(__p0, __p1) __extension__ ({ \ float64x2_t __s1 = __p1; \ float64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ __builtin_neon_vst1q_v(__p0, (int8x16_t)__rev1, 42); \ }) #endif #define vst1_f64(__p0, __p1) __extension__ ({ \ float64x1_t __s1 = __p1; \ __builtin_neon_vst1_v(__p0, (int8x8_t)__s1, 10); \ }) #define vst1_lane_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x1_t __s1 = __p1; \ __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__s1, __p2, 6); \ }) #ifdef __LITTLE_ENDIAN__ #define vst1q_lane_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x2_t __s1 = __p1; \ __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__s1, __p2, 38); \ }) #else #define vst1q_lane_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x2_t __s1 = __p1; \ poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 38); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x2_t __s1 = __p1; \ __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__s1, __p2, 42); \ }) #else #define vst1q_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x2_t __s1 = __p1; \ float64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 42); \ }) #endif #define vst1_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x1_t __s1 = __p1; \ __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__s1, __p2, 10); \ }) #define vst1_p64_x2(__p0, __p1) __extension__ ({ \ poly64x1x2_t __s1 = __p1; \ __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 6); \ }) #ifdef __LITTLE_ENDIAN__ #define vst1q_p64_x2(__p0, __p1) __extension__ ({ \ poly64x2x2_t __s1 = __p1; \ __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 38); \ }) #else #define vst1q_p64_x2(__p0, __p1) __extension__ ({ \ poly64x2x2_t __s1 = __p1; \ poly64x2x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 38); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_f64_x2(__p0, __p1) __extension__ ({ \ float64x2x2_t __s1 = __p1; \ __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 42); \ }) #else #define vst1q_f64_x2(__p0, __p1) __extension__ ({ \ float64x2x2_t __s1 = __p1; \ float64x2x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 42); \ }) #endif #define vst1_f64_x2(__p0, __p1) __extension__ ({ \ float64x1x2_t __s1 = __p1; \ __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 10); \ }) #define vst1_p64_x3(__p0, __p1) __extension__ ({ \ poly64x1x3_t __s1 = __p1; \ __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 6); \ }) #ifdef __LITTLE_ENDIAN__ #define vst1q_p64_x3(__p0, __p1) __extension__ ({ \ poly64x2x3_t __s1 = __p1; \ __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 38); \ }) #else #define vst1q_p64_x3(__p0, __p1) __extension__ ({ \ poly64x2x3_t __s1 = __p1; \ poly64x2x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 38); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_f64_x3(__p0, __p1) __extension__ ({ \ float64x2x3_t __s1 = __p1; \ __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 42); \ }) #else #define vst1q_f64_x3(__p0, __p1) __extension__ ({ \ float64x2x3_t __s1 = __p1; \ float64x2x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 42); \ }) #endif #define vst1_f64_x3(__p0, __p1) __extension__ ({ \ float64x1x3_t __s1 = __p1; \ __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 10); \ }) #define vst1_p64_x4(__p0, __p1) __extension__ ({ \ poly64x1x4_t __s1 = __p1; \ __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 6); \ }) #ifdef __LITTLE_ENDIAN__ #define vst1q_p64_x4(__p0, __p1) __extension__ ({ \ poly64x2x4_t __s1 = __p1; \ __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 38); \ }) #else #define vst1q_p64_x4(__p0, __p1) __extension__ ({ \ poly64x2x4_t __s1 = __p1; \ poly64x2x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 38); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_f64_x4(__p0, __p1) __extension__ ({ \ float64x2x4_t __s1 = __p1; \ __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 42); \ }) #else #define vst1q_f64_x4(__p0, __p1) __extension__ ({ \ float64x2x4_t __s1 = __p1; \ float64x2x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 42); \ }) #endif #define vst1_f64_x4(__p0, __p1) __extension__ ({ \ float64x1x4_t __s1 = __p1; \ __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 10); \ }) #define vst2_p64(__p0, __p1) __extension__ ({ \ poly64x1x2_t __s1 = __p1; \ __builtin_neon_vst2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 6); \ }) #ifdef __LITTLE_ENDIAN__ #define vst2q_p64(__p0, __p1) __extension__ ({ \ poly64x2x2_t __s1 = __p1; \ __builtin_neon_vst2q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 38); \ }) #else #define vst2q_p64(__p0, __p1) __extension__ ({ \ poly64x2x2_t __s1 = __p1; \ poly64x2x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __builtin_neon_vst2q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 38); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2q_u64(__p0, __p1) __extension__ ({ \ uint64x2x2_t __s1 = __p1; \ __builtin_neon_vst2q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 51); \ }) #else #define vst2q_u64(__p0, __p1) __extension__ ({ \ uint64x2x2_t __s1 = __p1; \ uint64x2x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __builtin_neon_vst2q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 51); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2q_f64(__p0, __p1) __extension__ ({ \ float64x2x2_t __s1 = __p1; \ __builtin_neon_vst2q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 42); \ }) #else #define vst2q_f64(__p0, __p1) __extension__ ({ \ float64x2x2_t __s1 = __p1; \ float64x2x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __builtin_neon_vst2q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 42); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2q_s64(__p0, __p1) __extension__ ({ \ int64x2x2_t __s1 = __p1; \ __builtin_neon_vst2q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 35); \ }) #else #define vst2q_s64(__p0, __p1) __extension__ ({ \ int64x2x2_t __s1 = __p1; \ int64x2x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __builtin_neon_vst2q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 35); \ }) #endif #define vst2_f64(__p0, __p1) __extension__ ({ \ float64x1x2_t __s1 = __p1; \ __builtin_neon_vst2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 10); \ }) #define vst2_lane_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x1x2_t __s1 = __p1; \ __builtin_neon_vst2_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 6); \ }) #ifdef __LITTLE_ENDIAN__ #define vst2q_lane_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x16x2_t __s1 = __p1; \ __builtin_neon_vst2q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 36); \ }) #else #define vst2q_lane_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x16x2_t __s1 = __p1; \ poly8x16x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst2q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 36); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2q_lane_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x2x2_t __s1 = __p1; \ __builtin_neon_vst2q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 38); \ }) #else #define vst2q_lane_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x2x2_t __s1 = __p1; \ poly64x2x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __builtin_neon_vst2q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 38); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2q_lane_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x16x2_t __s1 = __p1; \ __builtin_neon_vst2q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 48); \ }) #else #define vst2q_lane_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x16x2_t __s1 = __p1; \ uint8x16x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst2q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 48); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2q_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x2x2_t __s1 = __p1; \ __builtin_neon_vst2q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 51); \ }) #else #define vst2q_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x2x2_t __s1 = __p1; \ uint64x2x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __builtin_neon_vst2q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 51); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2q_lane_s8(__p0, __p1, __p2) __extension__ ({ \ int8x16x2_t __s1 = __p1; \ __builtin_neon_vst2q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 32); \ }) #else #define vst2q_lane_s8(__p0, __p1, __p2) __extension__ ({ \ int8x16x2_t __s1 = __p1; \ int8x16x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst2q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 32); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2q_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x2x2_t __s1 = __p1; \ __builtin_neon_vst2q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 42); \ }) #else #define vst2q_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x2x2_t __s1 = __p1; \ float64x2x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __builtin_neon_vst2q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 42); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2q_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x2x2_t __s1 = __p1; \ __builtin_neon_vst2q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 35); \ }) #else #define vst2q_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x2x2_t __s1 = __p1; \ int64x2x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __builtin_neon_vst2q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 35); \ }) #endif #define vst2_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x1x2_t __s1 = __p1; \ __builtin_neon_vst2_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 19); \ }) #define vst2_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x1x2_t __s1 = __p1; \ __builtin_neon_vst2_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 10); \ }) #define vst2_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x1x2_t __s1 = __p1; \ __builtin_neon_vst2_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 3); \ }) #define vst3_p64(__p0, __p1) __extension__ ({ \ poly64x1x3_t __s1 = __p1; \ __builtin_neon_vst3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 6); \ }) #ifdef __LITTLE_ENDIAN__ #define vst3q_p64(__p0, __p1) __extension__ ({ \ poly64x2x3_t __s1 = __p1; \ __builtin_neon_vst3q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 38); \ }) #else #define vst3q_p64(__p0, __p1) __extension__ ({ \ poly64x2x3_t __s1 = __p1; \ poly64x2x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ __builtin_neon_vst3q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 38); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3q_u64(__p0, __p1) __extension__ ({ \ uint64x2x3_t __s1 = __p1; \ __builtin_neon_vst3q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 51); \ }) #else #define vst3q_u64(__p0, __p1) __extension__ ({ \ uint64x2x3_t __s1 = __p1; \ uint64x2x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ __builtin_neon_vst3q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 51); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3q_f64(__p0, __p1) __extension__ ({ \ float64x2x3_t __s1 = __p1; \ __builtin_neon_vst3q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 42); \ }) #else #define vst3q_f64(__p0, __p1) __extension__ ({ \ float64x2x3_t __s1 = __p1; \ float64x2x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ __builtin_neon_vst3q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 42); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3q_s64(__p0, __p1) __extension__ ({ \ int64x2x3_t __s1 = __p1; \ __builtin_neon_vst3q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 35); \ }) #else #define vst3q_s64(__p0, __p1) __extension__ ({ \ int64x2x3_t __s1 = __p1; \ int64x2x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ __builtin_neon_vst3q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 35); \ }) #endif #define vst3_f64(__p0, __p1) __extension__ ({ \ float64x1x3_t __s1 = __p1; \ __builtin_neon_vst3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 10); \ }) #define vst3_lane_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x1x3_t __s1 = __p1; \ __builtin_neon_vst3_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 6); \ }) #ifdef __LITTLE_ENDIAN__ #define vst3q_lane_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x16x3_t __s1 = __p1; \ __builtin_neon_vst3q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 36); \ }) #else #define vst3q_lane_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x16x3_t __s1 = __p1; \ poly8x16x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst3q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 36); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3q_lane_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x2x3_t __s1 = __p1; \ __builtin_neon_vst3q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 38); \ }) #else #define vst3q_lane_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x2x3_t __s1 = __p1; \ poly64x2x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ __builtin_neon_vst3q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 38); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3q_lane_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x16x3_t __s1 = __p1; \ __builtin_neon_vst3q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 48); \ }) #else #define vst3q_lane_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x16x3_t __s1 = __p1; \ uint8x16x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst3q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 48); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3q_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x2x3_t __s1 = __p1; \ __builtin_neon_vst3q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 51); \ }) #else #define vst3q_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x2x3_t __s1 = __p1; \ uint64x2x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ __builtin_neon_vst3q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 51); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3q_lane_s8(__p0, __p1, __p2) __extension__ ({ \ int8x16x3_t __s1 = __p1; \ __builtin_neon_vst3q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 32); \ }) #else #define vst3q_lane_s8(__p0, __p1, __p2) __extension__ ({ \ int8x16x3_t __s1 = __p1; \ int8x16x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst3q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 32); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3q_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x2x3_t __s1 = __p1; \ __builtin_neon_vst3q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 42); \ }) #else #define vst3q_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x2x3_t __s1 = __p1; \ float64x2x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ __builtin_neon_vst3q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 42); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3q_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x2x3_t __s1 = __p1; \ __builtin_neon_vst3q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 35); \ }) #else #define vst3q_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x2x3_t __s1 = __p1; \ int64x2x3_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ __builtin_neon_vst3q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 35); \ }) #endif #define vst3_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x1x3_t __s1 = __p1; \ __builtin_neon_vst3_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 19); \ }) #define vst3_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x1x3_t __s1 = __p1; \ __builtin_neon_vst3_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 10); \ }) #define vst3_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x1x3_t __s1 = __p1; \ __builtin_neon_vst3_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 3); \ }) #define vst4_p64(__p0, __p1) __extension__ ({ \ poly64x1x4_t __s1 = __p1; \ __builtin_neon_vst4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 6); \ }) #ifdef __LITTLE_ENDIAN__ #define vst4q_p64(__p0, __p1) __extension__ ({ \ poly64x2x4_t __s1 = __p1; \ __builtin_neon_vst4q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 38); \ }) #else #define vst4q_p64(__p0, __p1) __extension__ ({ \ poly64x2x4_t __s1 = __p1; \ poly64x2x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ __builtin_neon_vst4q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 38); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4q_u64(__p0, __p1) __extension__ ({ \ uint64x2x4_t __s1 = __p1; \ __builtin_neon_vst4q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 51); \ }) #else #define vst4q_u64(__p0, __p1) __extension__ ({ \ uint64x2x4_t __s1 = __p1; \ uint64x2x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ __builtin_neon_vst4q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 51); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4q_f64(__p0, __p1) __extension__ ({ \ float64x2x4_t __s1 = __p1; \ __builtin_neon_vst4q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 42); \ }) #else #define vst4q_f64(__p0, __p1) __extension__ ({ \ float64x2x4_t __s1 = __p1; \ float64x2x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ __builtin_neon_vst4q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 42); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4q_s64(__p0, __p1) __extension__ ({ \ int64x2x4_t __s1 = __p1; \ __builtin_neon_vst4q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 35); \ }) #else #define vst4q_s64(__p0, __p1) __extension__ ({ \ int64x2x4_t __s1 = __p1; \ int64x2x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ __builtin_neon_vst4q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 35); \ }) #endif #define vst4_f64(__p0, __p1) __extension__ ({ \ float64x1x4_t __s1 = __p1; \ __builtin_neon_vst4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 10); \ }) #define vst4_lane_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x1x4_t __s1 = __p1; \ __builtin_neon_vst4_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 6); \ }) #ifdef __LITTLE_ENDIAN__ #define vst4q_lane_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x16x4_t __s1 = __p1; \ __builtin_neon_vst4q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 36); \ }) #else #define vst4q_lane_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x16x4_t __s1 = __p1; \ poly8x16x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst4q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 36); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4q_lane_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x2x4_t __s1 = __p1; \ __builtin_neon_vst4q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 38); \ }) #else #define vst4q_lane_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x2x4_t __s1 = __p1; \ poly64x2x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ __builtin_neon_vst4q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 38); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4q_lane_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x16x4_t __s1 = __p1; \ __builtin_neon_vst4q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 48); \ }) #else #define vst4q_lane_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x16x4_t __s1 = __p1; \ uint8x16x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst4q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 48); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4q_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x2x4_t __s1 = __p1; \ __builtin_neon_vst4q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 51); \ }) #else #define vst4q_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x2x4_t __s1 = __p1; \ uint64x2x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ __builtin_neon_vst4q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 51); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4q_lane_s8(__p0, __p1, __p2) __extension__ ({ \ int8x16x4_t __s1 = __p1; \ __builtin_neon_vst4q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 32); \ }) #else #define vst4q_lane_s8(__p0, __p1, __p2) __extension__ ({ \ int8x16x4_t __s1 = __p1; \ int8x16x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __builtin_neon_vst4q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 32); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4q_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x2x4_t __s1 = __p1; \ __builtin_neon_vst4q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 42); \ }) #else #define vst4q_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x2x4_t __s1 = __p1; \ float64x2x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ __builtin_neon_vst4q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 42); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4q_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x2x4_t __s1 = __p1; \ __builtin_neon_vst4q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 35); \ }) #else #define vst4q_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x2x4_t __s1 = __p1; \ int64x2x4_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ __builtin_neon_vst4q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 35); \ }) #endif #define vst4_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x1x4_t __s1 = __p1; \ __builtin_neon_vst4_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 19); \ }) #define vst4_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x1x4_t __s1 = __p1; \ __builtin_neon_vst4_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 10); \ }) #define vst4_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x1x4_t __s1 = __p1; \ __builtin_neon_vst4_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 3); \ }) #define vstrq_p128(__p0, __p1) __extension__ ({ \ poly128_t __s1 = __p1; \ __builtin_neon_vstrq_p128(__p0, __s1); \ }) __ai uint64_t vsubd_u64(uint64_t __p0, uint64_t __p1) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vsubd_u64(__p0, __p1); return __ret; } __ai int64_t vsubd_s64(int64_t __p0, int64_t __p1) { int64_t __ret; __ret = (int64_t) __builtin_neon_vsubd_s64(__p0, __p1); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai float64x2_t vsubq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; __ret = __p0 - __p1; return __ret; } #else __ai float64x2_t vsubq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __rev0 - __rev1; __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai float64x1_t vsub_f64(float64x1_t __p0, float64x1_t __p1) { float64x1_t __ret; __ret = __p0 - __p1; return __ret; } #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vsubhn_high_u32(uint16x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint16x8_t __ret; __ret = vcombine_u16(__p0, vsubhn_u32(__p1, __p2)); return __ret; } #else __ai uint16x8_t vsubhn_high_u32(uint16x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint16x8_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = __noswap_vcombine_u16(__rev0, __noswap_vsubhn_u32(__rev1, __rev2)); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vsubhn_high_u64(uint32x2_t __p0, uint64x2_t __p1, uint64x2_t __p2) { uint32x4_t __ret; __ret = vcombine_u32(__p0, vsubhn_u64(__p1, __p2)); return __ret; } #else __ai uint32x4_t vsubhn_high_u64(uint32x2_t __p0, uint64x2_t __p1, uint64x2_t __p2) { uint32x4_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); uint64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); __ret = __noswap_vcombine_u32(__rev0, __noswap_vsubhn_u64(__rev1, __rev2)); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vsubhn_high_u16(uint8x8_t __p0, uint16x8_t __p1, uint16x8_t __p2) { uint8x16_t __ret; __ret = vcombine_u8(__p0, vsubhn_u16(__p1, __p2)); return __ret; } #else __ai uint8x16_t vsubhn_high_u16(uint8x8_t __p0, uint16x8_t __p1, uint16x8_t __p2) { uint8x16_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __noswap_vcombine_u8(__rev0, __noswap_vsubhn_u16(__rev1, __rev2)); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vsubhn_high_s32(int16x4_t __p0, int32x4_t __p1, int32x4_t __p2) { int16x8_t __ret; __ret = vcombine_s16(__p0, vsubhn_s32(__p1, __p2)); return __ret; } #else __ai int16x8_t vsubhn_high_s32(int16x4_t __p0, int32x4_t __p1, int32x4_t __p2) { int16x8_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); int32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = __noswap_vcombine_s16(__rev0, __noswap_vsubhn_s32(__rev1, __rev2)); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vsubhn_high_s64(int32x2_t __p0, int64x2_t __p1, int64x2_t __p2) { int32x4_t __ret; __ret = vcombine_s32(__p0, vsubhn_s64(__p1, __p2)); return __ret; } #else __ai int32x4_t vsubhn_high_s64(int32x2_t __p0, int64x2_t __p1, int64x2_t __p2) { int32x4_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); int64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); __ret = __noswap_vcombine_s32(__rev0, __noswap_vsubhn_s64(__rev1, __rev2)); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x16_t vsubhn_high_s16(int8x8_t __p0, int16x8_t __p1, int16x8_t __p2) { int8x16_t __ret; __ret = vcombine_s8(__p0, vsubhn_s16(__p1, __p2)); return __ret; } #else __ai int8x16_t vsubhn_high_s16(int8x8_t __p0, int16x8_t __p1, int16x8_t __p2) { int8x16_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __noswap_vcombine_s8(__rev0, __noswap_vsubhn_s16(__rev1, __rev2)); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vsubl_high_u8(uint8x16_t __p0, uint8x16_t __p1) { uint16x8_t __ret; __ret = vmovl_high_u8(__p0) - vmovl_high_u8(__p1); return __ret; } #else __ai uint16x8_t vsubl_high_u8(uint8x16_t __p0, uint8x16_t __p1) { uint16x8_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __noswap_vmovl_high_u8(__rev0) - __noswap_vmovl_high_u8(__rev1); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vsubl_high_u32(uint32x4_t __p0, uint32x4_t __p1) { uint64x2_t __ret; __ret = vmovl_high_u32(__p0) - vmovl_high_u32(__p1); return __ret; } #else __ai uint64x2_t vsubl_high_u32(uint32x4_t __p0, uint32x4_t __p1) { uint64x2_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __noswap_vmovl_high_u32(__rev0) - __noswap_vmovl_high_u32(__rev1); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vsubl_high_u16(uint16x8_t __p0, uint16x8_t __p1) { uint32x4_t __ret; __ret = vmovl_high_u16(__p0) - vmovl_high_u16(__p1); return __ret; } #else __ai uint32x4_t vsubl_high_u16(uint16x8_t __p0, uint16x8_t __p1) { uint32x4_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __noswap_vmovl_high_u16(__rev0) - __noswap_vmovl_high_u16(__rev1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vsubl_high_s8(int8x16_t __p0, int8x16_t __p1) { int16x8_t __ret; __ret = vmovl_high_s8(__p0) - vmovl_high_s8(__p1); return __ret; } #else __ai int16x8_t vsubl_high_s8(int8x16_t __p0, int8x16_t __p1) { int16x8_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __noswap_vmovl_high_s8(__rev0) - __noswap_vmovl_high_s8(__rev1); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vsubl_high_s32(int32x4_t __p0, int32x4_t __p1) { int64x2_t __ret; __ret = vmovl_high_s32(__p0) - vmovl_high_s32(__p1); return __ret; } #else __ai int64x2_t vsubl_high_s32(int32x4_t __p0, int32x4_t __p1) { int64x2_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __noswap_vmovl_high_s32(__rev0) - __noswap_vmovl_high_s32(__rev1); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vsubl_high_s16(int16x8_t __p0, int16x8_t __p1) { int32x4_t __ret; __ret = vmovl_high_s16(__p0) - vmovl_high_s16(__p1); return __ret; } #else __ai int32x4_t vsubl_high_s16(int16x8_t __p0, int16x8_t __p1) { int32x4_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __noswap_vmovl_high_s16(__rev0) - __noswap_vmovl_high_s16(__rev1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vsubw_high_u8(uint16x8_t __p0, uint8x16_t __p1) { uint16x8_t __ret; __ret = __p0 - vmovl_high_u8(__p1); return __ret; } #else __ai uint16x8_t vsubw_high_u8(uint16x8_t __p0, uint8x16_t __p1) { uint16x8_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 - __noswap_vmovl_high_u8(__rev1); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vsubw_high_u32(uint64x2_t __p0, uint32x4_t __p1) { uint64x2_t __ret; __ret = __p0 - vmovl_high_u32(__p1); return __ret; } #else __ai uint64x2_t vsubw_high_u32(uint64x2_t __p0, uint32x4_t __p1) { uint64x2_t __ret; uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __rev0 - __noswap_vmovl_high_u32(__rev1); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vsubw_high_u16(uint32x4_t __p0, uint16x8_t __p1) { uint32x4_t __ret; __ret = __p0 - vmovl_high_u16(__p1); return __ret; } #else __ai uint32x4_t vsubw_high_u16(uint32x4_t __p0, uint16x8_t __p1) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 - __noswap_vmovl_high_u16(__rev1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vsubw_high_s8(int16x8_t __p0, int8x16_t __p1) { int16x8_t __ret; __ret = __p0 - vmovl_high_s8(__p1); return __ret; } #else __ai int16x8_t vsubw_high_s8(int16x8_t __p0, int8x16_t __p1) { int16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 - __noswap_vmovl_high_s8(__rev1); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vsubw_high_s32(int64x2_t __p0, int32x4_t __p1) { int64x2_t __ret; __ret = __p0 - vmovl_high_s32(__p1); return __ret; } #else __ai int64x2_t vsubw_high_s32(int64x2_t __p0, int32x4_t __p1) { int64x2_t __ret; int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __rev0 - __noswap_vmovl_high_s32(__rev1); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vsubw_high_s16(int32x4_t __p0, int16x8_t __p1) { int32x4_t __ret; __ret = __p0 - vmovl_high_s16(__p1); return __ret; } #else __ai int32x4_t vsubw_high_s16(int32x4_t __p0, int16x8_t __p1) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 - __noswap_vmovl_high_s16(__rev1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly8x8_t vtrn1_p8(poly8x8_t __p0, poly8x8_t __p1) { poly8x8_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 8, 2, 10, 4, 12, 6, 14); return __ret; } #else __ai poly8x8_t vtrn1_p8(poly8x8_t __p0, poly8x8_t __p1) { poly8x8_t __ret; poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 8, 2, 10, 4, 12, 6, 14); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly16x4_t vtrn1_p16(poly16x4_t __p0, poly16x4_t __p1) { poly16x4_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 4, 2, 6); return __ret; } #else __ai poly16x4_t vtrn1_p16(poly16x4_t __p0, poly16x4_t __p1) { poly16x4_t __ret; poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); poly16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 4, 2, 6); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly8x16_t vtrn1q_p8(poly8x16_t __p0, poly8x16_t __p1) { poly8x16_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30); return __ret; } #else __ai poly8x16_t vtrn1q_p8(poly8x16_t __p0, poly8x16_t __p1) { poly8x16_t __ret; poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly64x2_t vtrn1q_p64(poly64x2_t __p0, poly64x2_t __p1) { poly64x2_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 2); return __ret; } #else __ai poly64x2_t vtrn1q_p64(poly64x2_t __p0, poly64x2_t __p1) { poly64x2_t __ret; poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly16x8_t vtrn1q_p16(poly16x8_t __p0, poly16x8_t __p1) { poly16x8_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 8, 2, 10, 4, 12, 6, 14); return __ret; } #else __ai poly16x8_t vtrn1q_p16(poly16x8_t __p0, poly16x8_t __p1) { poly16x8_t __ret; poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); poly16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 8, 2, 10, 4, 12, 6, 14); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vtrn1q_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30); return __ret; } #else __ai uint8x16_t vtrn1q_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vtrn1q_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 4, 2, 6); return __ret; } #else __ai uint32x4_t vtrn1q_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 4, 2, 6); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vtrn1q_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 2); return __ret; } #else __ai uint64x2_t vtrn1q_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vtrn1q_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 8, 2, 10, 4, 12, 6, 14); return __ret; } #else __ai uint16x8_t vtrn1q_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 8, 2, 10, 4, 12, 6, 14); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x16_t vtrn1q_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30); return __ret; } #else __ai int8x16_t vtrn1q_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float64x2_t vtrn1q_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 2); return __ret; } #else __ai float64x2_t vtrn1q_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x4_t vtrn1q_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 4, 2, 6); return __ret; } #else __ai float32x4_t vtrn1q_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 4, 2, 6); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vtrn1q_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 4, 2, 6); return __ret; } #else __ai int32x4_t vtrn1q_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 4, 2, 6); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vtrn1q_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 2); return __ret; } #else __ai int64x2_t vtrn1q_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vtrn1q_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 8, 2, 10, 4, 12, 6, 14); return __ret; } #else __ai int16x8_t vtrn1q_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 8, 2, 10, 4, 12, 6, 14); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vtrn1_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 8, 2, 10, 4, 12, 6, 14); return __ret; } #else __ai uint8x8_t vtrn1_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 8, 2, 10, 4, 12, 6, 14); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vtrn1_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 2); return __ret; } #else __ai uint32x2_t vtrn1_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vtrn1_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 4, 2, 6); return __ret; } #else __ai uint16x4_t vtrn1_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 4, 2, 6); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8_t vtrn1_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 8, 2, 10, 4, 12, 6, 14); return __ret; } #else __ai int8x8_t vtrn1_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 8, 2, 10, 4, 12, 6, 14); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x2_t vtrn1_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 2); return __ret; } #else __ai float32x2_t vtrn1_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x2_t vtrn1_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 2); return __ret; } #else __ai int32x2_t vtrn1_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vtrn1_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 4, 2, 6); return __ret; } #else __ai int16x4_t vtrn1_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 4, 2, 6); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly8x8_t vtrn2_p8(poly8x8_t __p0, poly8x8_t __p1) { poly8x8_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 1, 9, 3, 11, 5, 13, 7, 15); return __ret; } #else __ai poly8x8_t vtrn2_p8(poly8x8_t __p0, poly8x8_t __p1) { poly8x8_t __ret; poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 9, 3, 11, 5, 13, 7, 15); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly16x4_t vtrn2_p16(poly16x4_t __p0, poly16x4_t __p1) { poly16x4_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 1, 5, 3, 7); return __ret; } #else __ai poly16x4_t vtrn2_p16(poly16x4_t __p0, poly16x4_t __p1) { poly16x4_t __ret; poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); poly16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 5, 3, 7); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly8x16_t vtrn2q_p8(poly8x16_t __p0, poly8x16_t __p1) { poly8x16_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31); return __ret; } #else __ai poly8x16_t vtrn2q_p8(poly8x16_t __p0, poly8x16_t __p1) { poly8x16_t __ret; poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly64x2_t vtrn2q_p64(poly64x2_t __p0, poly64x2_t __p1) { poly64x2_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 1, 3); return __ret; } #else __ai poly64x2_t vtrn2q_p64(poly64x2_t __p0, poly64x2_t __p1) { poly64x2_t __ret; poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly16x8_t vtrn2q_p16(poly16x8_t __p0, poly16x8_t __p1) { poly16x8_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 1, 9, 3, 11, 5, 13, 7, 15); return __ret; } #else __ai poly16x8_t vtrn2q_p16(poly16x8_t __p0, poly16x8_t __p1) { poly16x8_t __ret; poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); poly16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 9, 3, 11, 5, 13, 7, 15); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vtrn2q_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31); return __ret; } #else __ai uint8x16_t vtrn2q_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vtrn2q_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 1, 5, 3, 7); return __ret; } #else __ai uint32x4_t vtrn2q_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 5, 3, 7); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vtrn2q_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 1, 3); return __ret; } #else __ai uint64x2_t vtrn2q_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vtrn2q_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 1, 9, 3, 11, 5, 13, 7, 15); return __ret; } #else __ai uint16x8_t vtrn2q_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 9, 3, 11, 5, 13, 7, 15); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x16_t vtrn2q_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31); return __ret; } #else __ai int8x16_t vtrn2q_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float64x2_t vtrn2q_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 1, 3); return __ret; } #else __ai float64x2_t vtrn2q_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x4_t vtrn2q_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 1, 5, 3, 7); return __ret; } #else __ai float32x4_t vtrn2q_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 5, 3, 7); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vtrn2q_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 1, 5, 3, 7); return __ret; } #else __ai int32x4_t vtrn2q_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 5, 3, 7); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vtrn2q_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 1, 3); return __ret; } #else __ai int64x2_t vtrn2q_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vtrn2q_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 1, 9, 3, 11, 5, 13, 7, 15); return __ret; } #else __ai int16x8_t vtrn2q_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 9, 3, 11, 5, 13, 7, 15); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vtrn2_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 1, 9, 3, 11, 5, 13, 7, 15); return __ret; } #else __ai uint8x8_t vtrn2_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 9, 3, 11, 5, 13, 7, 15); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vtrn2_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 1, 3); return __ret; } #else __ai uint32x2_t vtrn2_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vtrn2_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 1, 5, 3, 7); return __ret; } #else __ai uint16x4_t vtrn2_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 5, 3, 7); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8_t vtrn2_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 1, 9, 3, 11, 5, 13, 7, 15); return __ret; } #else __ai int8x8_t vtrn2_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 9, 3, 11, 5, 13, 7, 15); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x2_t vtrn2_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 1, 3); return __ret; } #else __ai float32x2_t vtrn2_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x2_t vtrn2_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 1, 3); return __ret; } #else __ai int32x2_t vtrn2_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vtrn2_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 1, 5, 3, 7); return __ret; } #else __ai int16x4_t vtrn2_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 5, 3, 7); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif __ai uint64x1_t vtst_p64(poly64x1_t __p0, poly64x1_t __p1) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vtst_v((int8x8_t)__p0, (int8x8_t)__p1, 19); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vtstq_p64(poly64x2_t __p0, poly64x2_t __p1) { uint64x2_t __ret; __ret = (uint64x2_t) __builtin_neon_vtstq_v((int8x16_t)__p0, (int8x16_t)__p1, 51); return __ret; } #else __ai uint64x2_t vtstq_p64(poly64x2_t __p0, poly64x2_t __p1) { uint64x2_t __ret; poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint64x2_t) __builtin_neon_vtstq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 51); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vtstq_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; __ret = (uint64x2_t) __builtin_neon_vtstq_v((int8x16_t)__p0, (int8x16_t)__p1, 51); return __ret; } #else __ai uint64x2_t vtstq_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint64x2_t) __builtin_neon_vtstq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 51); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vtstq_s64(int64x2_t __p0, int64x2_t __p1) { uint64x2_t __ret; __ret = (uint64x2_t) __builtin_neon_vtstq_v((int8x16_t)__p0, (int8x16_t)__p1, 51); return __ret; } #else __ai uint64x2_t vtstq_s64(int64x2_t __p0, int64x2_t __p1) { uint64x2_t __ret; int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint64x2_t) __builtin_neon_vtstq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 51); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai uint64x1_t vtst_u64(uint64x1_t __p0, uint64x1_t __p1) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vtst_v((int8x8_t)__p0, (int8x8_t)__p1, 19); return __ret; } __ai uint64x1_t vtst_s64(int64x1_t __p0, int64x1_t __p1) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vtst_v((int8x8_t)__p0, (int8x8_t)__p1, 19); return __ret; } __ai uint64_t vtstd_u64(uint64_t __p0, uint64_t __p1) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vtstd_u64(__p0, __p1); return __ret; } __ai uint64_t vtstd_s64(int64_t __p0, int64_t __p1) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vtstd_s64(__p0, __p1); return __ret; } __ai int8_t vuqaddb_s8(int8_t __p0, uint8_t __p1) { int8_t __ret; __ret = (int8_t) __builtin_neon_vuqaddb_s8(__p0, __p1); return __ret; } __ai int32_t vuqadds_s32(int32_t __p0, uint32_t __p1) { int32_t __ret; __ret = (int32_t) __builtin_neon_vuqadds_s32(__p0, __p1); return __ret; } __ai int64_t vuqaddd_s64(int64_t __p0, uint64_t __p1) { int64_t __ret; __ret = (int64_t) __builtin_neon_vuqaddd_s64(__p0, __p1); return __ret; } __ai int16_t vuqaddh_s16(int16_t __p0, uint16_t __p1) { int16_t __ret; __ret = (int16_t) __builtin_neon_vuqaddh_s16(__p0, __p1); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai int8x16_t vuqaddq_s8(int8x16_t __p0, uint8x16_t __p1) { int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vuqaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 32); return __ret; } #else __ai int8x16_t vuqaddq_s8(int8x16_t __p0, uint8x16_t __p1) { int8x16_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x16_t) __builtin_neon_vuqaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 32); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vuqaddq_s32(int32x4_t __p0, uint32x4_t __p1) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vuqaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); return __ret; } #else __ai int32x4_t vuqaddq_s32(int32x4_t __p0, uint32x4_t __p1) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (int32x4_t) __builtin_neon_vuqaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 34); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vuqaddq_s64(int64x2_t __p0, uint64x2_t __p1) { int64x2_t __ret; __ret = (int64x2_t) __builtin_neon_vuqaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 35); return __ret; } #else __ai int64x2_t vuqaddq_s64(int64x2_t __p0, uint64x2_t __p1) { int64x2_t __ret; int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (int64x2_t) __builtin_neon_vuqaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 35); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vuqaddq_s16(int16x8_t __p0, uint16x8_t __p1) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vuqaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); return __ret; } #else __ai int16x8_t vuqaddq_s16(int16x8_t __p0, uint16x8_t __p1) { int16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int16x8_t) __builtin_neon_vuqaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 33); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8_t vuqadd_s8(int8x8_t __p0, uint8x8_t __p1) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vuqadd_v((int8x8_t)__p0, (int8x8_t)__p1, 0); return __ret; } #else __ai int8x8_t vuqadd_s8(int8x8_t __p0, uint8x8_t __p1) { int8x8_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x8_t) __builtin_neon_vuqadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 0); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x2_t vuqadd_s32(int32x2_t __p0, uint32x2_t __p1) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vuqadd_v((int8x8_t)__p0, (int8x8_t)__p1, 2); return __ret; } #else __ai int32x2_t vuqadd_s32(int32x2_t __p0, uint32x2_t __p1) { int32x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (int32x2_t) __builtin_neon_vuqadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai int64x1_t vuqadd_s64(int64x1_t __p0, uint64x1_t __p1) { int64x1_t __ret; __ret = (int64x1_t) __builtin_neon_vuqadd_v((int8x8_t)__p0, (int8x8_t)__p1, 3); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vuqadd_s16(int16x4_t __p0, uint16x4_t __p1) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vuqadd_v((int8x8_t)__p0, (int8x8_t)__p1, 1); return __ret; } #else __ai int16x4_t vuqadd_s16(int16x4_t __p0, uint16x4_t __p1) { int16x4_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (int16x4_t) __builtin_neon_vuqadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly8x8_t vuzp1_p8(poly8x8_t __p0, poly8x8_t __p1) { poly8x8_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 2, 4, 6, 8, 10, 12, 14); return __ret; } #else __ai poly8x8_t vuzp1_p8(poly8x8_t __p0, poly8x8_t __p1) { poly8x8_t __ret; poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2, 4, 6, 8, 10, 12, 14); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly16x4_t vuzp1_p16(poly16x4_t __p0, poly16x4_t __p1) { poly16x4_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 2, 4, 6); return __ret; } #else __ai poly16x4_t vuzp1_p16(poly16x4_t __p0, poly16x4_t __p1) { poly16x4_t __ret; poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); poly16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2, 4, 6); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly8x16_t vuzp1q_p8(poly8x16_t __p0, poly8x16_t __p1) { poly8x16_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30); return __ret; } #else __ai poly8x16_t vuzp1q_p8(poly8x16_t __p0, poly8x16_t __p1) { poly8x16_t __ret; poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly64x2_t vuzp1q_p64(poly64x2_t __p0, poly64x2_t __p1) { poly64x2_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 2); return __ret; } #else __ai poly64x2_t vuzp1q_p64(poly64x2_t __p0, poly64x2_t __p1) { poly64x2_t __ret; poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly16x8_t vuzp1q_p16(poly16x8_t __p0, poly16x8_t __p1) { poly16x8_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 2, 4, 6, 8, 10, 12, 14); return __ret; } #else __ai poly16x8_t vuzp1q_p16(poly16x8_t __p0, poly16x8_t __p1) { poly16x8_t __ret; poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); poly16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2, 4, 6, 8, 10, 12, 14); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vuzp1q_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30); return __ret; } #else __ai uint8x16_t vuzp1q_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vuzp1q_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 2, 4, 6); return __ret; } #else __ai uint32x4_t vuzp1q_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2, 4, 6); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vuzp1q_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 2); return __ret; } #else __ai uint64x2_t vuzp1q_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vuzp1q_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 2, 4, 6, 8, 10, 12, 14); return __ret; } #else __ai uint16x8_t vuzp1q_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2, 4, 6, 8, 10, 12, 14); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x16_t vuzp1q_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30); return __ret; } #else __ai int8x16_t vuzp1q_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float64x2_t vuzp1q_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 2); return __ret; } #else __ai float64x2_t vuzp1q_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x4_t vuzp1q_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 2, 4, 6); return __ret; } #else __ai float32x4_t vuzp1q_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2, 4, 6); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vuzp1q_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 2, 4, 6); return __ret; } #else __ai int32x4_t vuzp1q_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2, 4, 6); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vuzp1q_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 2); return __ret; } #else __ai int64x2_t vuzp1q_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vuzp1q_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 2, 4, 6, 8, 10, 12, 14); return __ret; } #else __ai int16x8_t vuzp1q_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2, 4, 6, 8, 10, 12, 14); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vuzp1_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 2, 4, 6, 8, 10, 12, 14); return __ret; } #else __ai uint8x8_t vuzp1_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2, 4, 6, 8, 10, 12, 14); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vuzp1_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 2); return __ret; } #else __ai uint32x2_t vuzp1_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vuzp1_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 2, 4, 6); return __ret; } #else __ai uint16x4_t vuzp1_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2, 4, 6); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8_t vuzp1_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 2, 4, 6, 8, 10, 12, 14); return __ret; } #else __ai int8x8_t vuzp1_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2, 4, 6, 8, 10, 12, 14); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x2_t vuzp1_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 2); return __ret; } #else __ai float32x2_t vuzp1_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x2_t vuzp1_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 2); return __ret; } #else __ai int32x2_t vuzp1_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vuzp1_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 2, 4, 6); return __ret; } #else __ai int16x4_t vuzp1_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2, 4, 6); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly8x8_t vuzp2_p8(poly8x8_t __p0, poly8x8_t __p1) { poly8x8_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 1, 3, 5, 7, 9, 11, 13, 15); return __ret; } #else __ai poly8x8_t vuzp2_p8(poly8x8_t __p0, poly8x8_t __p1) { poly8x8_t __ret; poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3, 5, 7, 9, 11, 13, 15); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly16x4_t vuzp2_p16(poly16x4_t __p0, poly16x4_t __p1) { poly16x4_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 1, 3, 5, 7); return __ret; } #else __ai poly16x4_t vuzp2_p16(poly16x4_t __p0, poly16x4_t __p1) { poly16x4_t __ret; poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); poly16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3, 5, 7); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly8x16_t vuzp2q_p8(poly8x16_t __p0, poly8x16_t __p1) { poly8x16_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31); return __ret; } #else __ai poly8x16_t vuzp2q_p8(poly8x16_t __p0, poly8x16_t __p1) { poly8x16_t __ret; poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly64x2_t vuzp2q_p64(poly64x2_t __p0, poly64x2_t __p1) { poly64x2_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 1, 3); return __ret; } #else __ai poly64x2_t vuzp2q_p64(poly64x2_t __p0, poly64x2_t __p1) { poly64x2_t __ret; poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly16x8_t vuzp2q_p16(poly16x8_t __p0, poly16x8_t __p1) { poly16x8_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 1, 3, 5, 7, 9, 11, 13, 15); return __ret; } #else __ai poly16x8_t vuzp2q_p16(poly16x8_t __p0, poly16x8_t __p1) { poly16x8_t __ret; poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); poly16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3, 5, 7, 9, 11, 13, 15); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vuzp2q_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31); return __ret; } #else __ai uint8x16_t vuzp2q_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vuzp2q_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 1, 3, 5, 7); return __ret; } #else __ai uint32x4_t vuzp2q_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3, 5, 7); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vuzp2q_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 1, 3); return __ret; } #else __ai uint64x2_t vuzp2q_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vuzp2q_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 1, 3, 5, 7, 9, 11, 13, 15); return __ret; } #else __ai uint16x8_t vuzp2q_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3, 5, 7, 9, 11, 13, 15); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x16_t vuzp2q_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31); return __ret; } #else __ai int8x16_t vuzp2q_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float64x2_t vuzp2q_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 1, 3); return __ret; } #else __ai float64x2_t vuzp2q_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x4_t vuzp2q_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 1, 3, 5, 7); return __ret; } #else __ai float32x4_t vuzp2q_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3, 5, 7); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vuzp2q_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 1, 3, 5, 7); return __ret; } #else __ai int32x4_t vuzp2q_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3, 5, 7); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vuzp2q_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 1, 3); return __ret; } #else __ai int64x2_t vuzp2q_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vuzp2q_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 1, 3, 5, 7, 9, 11, 13, 15); return __ret; } #else __ai int16x8_t vuzp2q_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3, 5, 7, 9, 11, 13, 15); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vuzp2_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 1, 3, 5, 7, 9, 11, 13, 15); return __ret; } #else __ai uint8x8_t vuzp2_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3, 5, 7, 9, 11, 13, 15); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vuzp2_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 1, 3); return __ret; } #else __ai uint32x2_t vuzp2_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vuzp2_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 1, 3, 5, 7); return __ret; } #else __ai uint16x4_t vuzp2_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3, 5, 7); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8_t vuzp2_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 1, 3, 5, 7, 9, 11, 13, 15); return __ret; } #else __ai int8x8_t vuzp2_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3, 5, 7, 9, 11, 13, 15); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x2_t vuzp2_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 1, 3); return __ret; } #else __ai float32x2_t vuzp2_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x2_t vuzp2_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 1, 3); return __ret; } #else __ai int32x2_t vuzp2_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vuzp2_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 1, 3, 5, 7); return __ret; } #else __ai int16x4_t vuzp2_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3, 5, 7); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly8x8_t vzip1_p8(poly8x8_t __p0, poly8x8_t __p1) { poly8x8_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 8, 1, 9, 2, 10, 3, 11); return __ret; } #else __ai poly8x8_t vzip1_p8(poly8x8_t __p0, poly8x8_t __p1) { poly8x8_t __ret; poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 8, 1, 9, 2, 10, 3, 11); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly16x4_t vzip1_p16(poly16x4_t __p0, poly16x4_t __p1) { poly16x4_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 4, 1, 5); return __ret; } #else __ai poly16x4_t vzip1_p16(poly16x4_t __p0, poly16x4_t __p1) { poly16x4_t __ret; poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); poly16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 4, 1, 5); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly8x16_t vzip1q_p8(poly8x16_t __p0, poly8x16_t __p1) { poly8x16_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); return __ret; } #else __ai poly8x16_t vzip1q_p8(poly8x16_t __p0, poly8x16_t __p1) { poly8x16_t __ret; poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly64x2_t vzip1q_p64(poly64x2_t __p0, poly64x2_t __p1) { poly64x2_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 2); return __ret; } #else __ai poly64x2_t vzip1q_p64(poly64x2_t __p0, poly64x2_t __p1) { poly64x2_t __ret; poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly16x8_t vzip1q_p16(poly16x8_t __p0, poly16x8_t __p1) { poly16x8_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 8, 1, 9, 2, 10, 3, 11); return __ret; } #else __ai poly16x8_t vzip1q_p16(poly16x8_t __p0, poly16x8_t __p1) { poly16x8_t __ret; poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); poly16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 8, 1, 9, 2, 10, 3, 11); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vzip1q_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); return __ret; } #else __ai uint8x16_t vzip1q_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vzip1q_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 4, 1, 5); return __ret; } #else __ai uint32x4_t vzip1q_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 4, 1, 5); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vzip1q_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 2); return __ret; } #else __ai uint64x2_t vzip1q_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vzip1q_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 8, 1, 9, 2, 10, 3, 11); return __ret; } #else __ai uint16x8_t vzip1q_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 8, 1, 9, 2, 10, 3, 11); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x16_t vzip1q_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); return __ret; } #else __ai int8x16_t vzip1q_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float64x2_t vzip1q_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 2); return __ret; } #else __ai float64x2_t vzip1q_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x4_t vzip1q_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 4, 1, 5); return __ret; } #else __ai float32x4_t vzip1q_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 4, 1, 5); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vzip1q_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 4, 1, 5); return __ret; } #else __ai int32x4_t vzip1q_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 4, 1, 5); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vzip1q_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 2); return __ret; } #else __ai int64x2_t vzip1q_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vzip1q_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 8, 1, 9, 2, 10, 3, 11); return __ret; } #else __ai int16x8_t vzip1q_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 8, 1, 9, 2, 10, 3, 11); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vzip1_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 8, 1, 9, 2, 10, 3, 11); return __ret; } #else __ai uint8x8_t vzip1_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 8, 1, 9, 2, 10, 3, 11); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vzip1_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 2); return __ret; } #else __ai uint32x2_t vzip1_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vzip1_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 4, 1, 5); return __ret; } #else __ai uint16x4_t vzip1_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 4, 1, 5); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8_t vzip1_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 8, 1, 9, 2, 10, 3, 11); return __ret; } #else __ai int8x8_t vzip1_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 8, 1, 9, 2, 10, 3, 11); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x2_t vzip1_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 2); return __ret; } #else __ai float32x2_t vzip1_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x2_t vzip1_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 2); return __ret; } #else __ai int32x2_t vzip1_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vzip1_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 4, 1, 5); return __ret; } #else __ai int16x4_t vzip1_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 4, 1, 5); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly8x8_t vzip2_p8(poly8x8_t __p0, poly8x8_t __p1) { poly8x8_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 4, 12, 5, 13, 6, 14, 7, 15); return __ret; } #else __ai poly8x8_t vzip2_p8(poly8x8_t __p0, poly8x8_t __p1) { poly8x8_t __ret; poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 4, 12, 5, 13, 6, 14, 7, 15); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly16x4_t vzip2_p16(poly16x4_t __p0, poly16x4_t __p1) { poly16x4_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 2, 6, 3, 7); return __ret; } #else __ai poly16x4_t vzip2_p16(poly16x4_t __p0, poly16x4_t __p1) { poly16x4_t __ret; poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); poly16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 2, 6, 3, 7); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly8x16_t vzip2q_p8(poly8x16_t __p0, poly8x16_t __p1) { poly8x16_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); return __ret; } #else __ai poly8x16_t vzip2q_p8(poly8x16_t __p0, poly8x16_t __p1) { poly8x16_t __ret; poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly64x2_t vzip2q_p64(poly64x2_t __p0, poly64x2_t __p1) { poly64x2_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 1, 3); return __ret; } #else __ai poly64x2_t vzip2q_p64(poly64x2_t __p0, poly64x2_t __p1) { poly64x2_t __ret; poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai poly16x8_t vzip2q_p16(poly16x8_t __p0, poly16x8_t __p1) { poly16x8_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 4, 12, 5, 13, 6, 14, 7, 15); return __ret; } #else __ai poly16x8_t vzip2q_p16(poly16x8_t __p0, poly16x8_t __p1) { poly16x8_t __ret; poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); poly16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 4, 12, 5, 13, 6, 14, 7, 15); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vzip2q_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); return __ret; } #else __ai uint8x16_t vzip2q_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vzip2q_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 2, 6, 3, 7); return __ret; } #else __ai uint32x4_t vzip2q_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 2, 6, 3, 7); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vzip2q_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 1, 3); return __ret; } #else __ai uint64x2_t vzip2q_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vzip2q_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 4, 12, 5, 13, 6, 14, 7, 15); return __ret; } #else __ai uint16x8_t vzip2q_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 4, 12, 5, 13, 6, 14, 7, 15); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x16_t vzip2q_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); return __ret; } #else __ai int8x16_t vzip2q_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float64x2_t vzip2q_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 1, 3); return __ret; } #else __ai float64x2_t vzip2q_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x4_t vzip2q_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 2, 6, 3, 7); return __ret; } #else __ai float32x4_t vzip2q_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 2, 6, 3, 7); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vzip2q_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 2, 6, 3, 7); return __ret; } #else __ai int32x4_t vzip2q_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 2, 6, 3, 7); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vzip2q_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 1, 3); return __ret; } #else __ai int64x2_t vzip2q_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vzip2q_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 4, 12, 5, 13, 6, 14, 7, 15); return __ret; } #else __ai int16x8_t vzip2q_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 4, 12, 5, 13, 6, 14, 7, 15); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vzip2_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 4, 12, 5, 13, 6, 14, 7, 15); return __ret; } #else __ai uint8x8_t vzip2_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 4, 12, 5, 13, 6, 14, 7, 15); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vzip2_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 1, 3); return __ret; } #else __ai uint32x2_t vzip2_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vzip2_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 2, 6, 3, 7); return __ret; } #else __ai uint16x4_t vzip2_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 2, 6, 3, 7); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8_t vzip2_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 4, 12, 5, 13, 6, 14, 7, 15); return __ret; } #else __ai int8x8_t vzip2_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 4, 12, 5, 13, 6, 14, 7, 15); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai float32x2_t vzip2_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 1, 3); return __ret; } #else __ai float32x2_t vzip2_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x2_t vzip2_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 1, 3); return __ret; } #else __ai int32x2_t vzip2_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vzip2_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 2, 6, 3, 7); return __ret; } #else __ai int16x4_t vzip2_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 2, 6, 3, 7); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif __ai __attribute__((target("aes"))) poly128_t vmull_p64(poly64_t __p0, poly64_t __p1) { poly128_t __ret; __ret = (poly128_t) __builtin_neon_vmull_p64(__p0, __p1); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("aes"))) poly128_t vmull_high_p64(poly64x2_t __p0, poly64x2_t __p1) { poly128_t __ret; __ret = vmull_p64((poly64_t)(vget_high_p64(__p0)), (poly64_t)(vget_high_p64(__p1))); return __ret; } #else __ai __attribute__((target("aes"))) poly128_t vmull_high_p64(poly64x2_t __p0, poly64x2_t __p1) { poly128_t __ret; poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = vmull_p64((poly64_t)(__noswap_vget_high_p64(__rev0)), (poly64_t)(__noswap_vget_high_p64(__rev1))); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("bf16"))) bfloat16x8_t __a64_vcvtq_low_bf16_f32(float32x4_t __p0) { bfloat16x8_t __ret; __ret = (bfloat16x8_t) __builtin_neon___a64_vcvtq_low_bf16_f32((int8x16_t)__p0, 43); return __ret; } #else __ai __attribute__((target("bf16"))) bfloat16x8_t __a64_vcvtq_low_bf16_f32(float32x4_t __p0) { bfloat16x8_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (bfloat16x8_t) __builtin_neon___a64_vcvtq_low_bf16_f32((int8x16_t)__rev0, 43); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } __ai __attribute__((target("bf16"))) bfloat16x8_t __noswap___a64_vcvtq_low_bf16_f32(float32x4_t __p0) { bfloat16x8_t __ret; __ret = (bfloat16x8_t) __builtin_neon___a64_vcvtq_low_bf16_f32((int8x16_t)__p0, 43); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ #define vcopyq_lane_bf16(__p0_739, __p1_739, __p2_739, __p3_739) __extension__ ({ \ bfloat16x8_t __ret_739; \ bfloat16x8_t __s0_739 = __p0_739; \ bfloat16x4_t __s2_739 = __p2_739; \ __ret_739 = vsetq_lane_bf16(vget_lane_bf16(__s2_739, __p3_739), __s0_739, __p1_739); \ __ret_739; \ }) #else #define vcopyq_lane_bf16(__p0_740, __p1_740, __p2_740, __p3_740) __extension__ ({ \ bfloat16x8_t __ret_740; \ bfloat16x8_t __s0_740 = __p0_740; \ bfloat16x4_t __s2_740 = __p2_740; \ bfloat16x8_t __rev0_740; __rev0_740 = __builtin_shufflevector(__s0_740, __s0_740, 7, 6, 5, 4, 3, 2, 1, 0); \ bfloat16x4_t __rev2_740; __rev2_740 = __builtin_shufflevector(__s2_740, __s2_740, 3, 2, 1, 0); \ __ret_740 = __noswap_vsetq_lane_bf16(__noswap_vget_lane_bf16(__rev2_740, __p3_740), __rev0_740, __p1_740); \ __ret_740 = __builtin_shufflevector(__ret_740, __ret_740, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_740; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcopy_lane_bf16(__p0_741, __p1_741, __p2_741, __p3_741) __extension__ ({ \ bfloat16x4_t __ret_741; \ bfloat16x4_t __s0_741 = __p0_741; \ bfloat16x4_t __s2_741 = __p2_741; \ __ret_741 = vset_lane_bf16(vget_lane_bf16(__s2_741, __p3_741), __s0_741, __p1_741); \ __ret_741; \ }) #else #define vcopy_lane_bf16(__p0_742, __p1_742, __p2_742, __p3_742) __extension__ ({ \ bfloat16x4_t __ret_742; \ bfloat16x4_t __s0_742 = __p0_742; \ bfloat16x4_t __s2_742 = __p2_742; \ bfloat16x4_t __rev0_742; __rev0_742 = __builtin_shufflevector(__s0_742, __s0_742, 3, 2, 1, 0); \ bfloat16x4_t __rev2_742; __rev2_742 = __builtin_shufflevector(__s2_742, __s2_742, 3, 2, 1, 0); \ __ret_742 = __noswap_vset_lane_bf16(__noswap_vget_lane_bf16(__rev2_742, __p3_742), __rev0_742, __p1_742); \ __ret_742 = __builtin_shufflevector(__ret_742, __ret_742, 3, 2, 1, 0); \ __ret_742; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcopyq_laneq_bf16(__p0_743, __p1_743, __p2_743, __p3_743) __extension__ ({ \ bfloat16x8_t __ret_743; \ bfloat16x8_t __s0_743 = __p0_743; \ bfloat16x8_t __s2_743 = __p2_743; \ __ret_743 = vsetq_lane_bf16(vgetq_lane_bf16(__s2_743, __p3_743), __s0_743, __p1_743); \ __ret_743; \ }) #else #define vcopyq_laneq_bf16(__p0_744, __p1_744, __p2_744, __p3_744) __extension__ ({ \ bfloat16x8_t __ret_744; \ bfloat16x8_t __s0_744 = __p0_744; \ bfloat16x8_t __s2_744 = __p2_744; \ bfloat16x8_t __rev0_744; __rev0_744 = __builtin_shufflevector(__s0_744, __s0_744, 7, 6, 5, 4, 3, 2, 1, 0); \ bfloat16x8_t __rev2_744; __rev2_744 = __builtin_shufflevector(__s2_744, __s2_744, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_744 = __noswap_vsetq_lane_bf16(__noswap_vgetq_lane_bf16(__rev2_744, __p3_744), __rev0_744, __p1_744); \ __ret_744 = __builtin_shufflevector(__ret_744, __ret_744, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_744; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcopy_laneq_bf16(__p0_745, __p1_745, __p2_745, __p3_745) __extension__ ({ \ bfloat16x4_t __ret_745; \ bfloat16x4_t __s0_745 = __p0_745; \ bfloat16x8_t __s2_745 = __p2_745; \ __ret_745 = vset_lane_bf16(vgetq_lane_bf16(__s2_745, __p3_745), __s0_745, __p1_745); \ __ret_745; \ }) #else #define vcopy_laneq_bf16(__p0_746, __p1_746, __p2_746, __p3_746) __extension__ ({ \ bfloat16x4_t __ret_746; \ bfloat16x4_t __s0_746 = __p0_746; \ bfloat16x8_t __s2_746 = __p2_746; \ bfloat16x4_t __rev0_746; __rev0_746 = __builtin_shufflevector(__s0_746, __s0_746, 3, 2, 1, 0); \ bfloat16x8_t __rev2_746; __rev2_746 = __builtin_shufflevector(__s2_746, __s2_746, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_746 = __noswap_vset_lane_bf16(__noswap_vgetq_lane_bf16(__rev2_746, __p3_746), __rev0_746, __p1_746); \ __ret_746 = __builtin_shufflevector(__ret_746, __ret_746, 3, 2, 1, 0); \ __ret_746; \ }) #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("bf16"))) bfloat16x4_t vcvt_bf16_f32(float32x4_t __p0) { bfloat16x4_t __ret; __ret = vget_low_bf16(__a64_vcvtq_low_bf16_f32(__p0)); return __ret; } #else __ai __attribute__((target("bf16"))) bfloat16x4_t vcvt_bf16_f32(float32x4_t __p0) { bfloat16x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = __noswap_vget_low_bf16(__noswap___a64_vcvtq_low_bf16_f32(__rev0)); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("bf16"))) bfloat16x8_t vcvtq_high_bf16_f32(bfloat16x8_t __p0, float32x4_t __p1) { bfloat16x8_t __ret; __ret = (bfloat16x8_t) __builtin_neon_vcvtq_high_bf16_f32((int8x16_t)__p0, (int8x16_t)__p1, 43); return __ret; } #else __ai __attribute__((target("bf16"))) bfloat16x8_t vcvtq_high_bf16_f32(bfloat16x8_t __p0, float32x4_t __p1) { bfloat16x8_t __ret; bfloat16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (bfloat16x8_t) __builtin_neon_vcvtq_high_bf16_f32((int8x16_t)__rev0, (int8x16_t)__rev1, 43); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("bf16"))) bfloat16x8_t vcvtq_low_bf16_f32(float32x4_t __p0) { bfloat16x8_t __ret; __ret = __a64_vcvtq_low_bf16_f32(__p0); return __ret; } #else __ai __attribute__((target("bf16"))) bfloat16x8_t vcvtq_low_bf16_f32(float32x4_t __p0) { bfloat16x8_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = __noswap___a64_vcvtq_low_bf16_f32(__rev0); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif __ai __attribute__((target("bf16"))) poly8x8_t vreinterpret_p8_bf16(bfloat16x4_t __p0) { poly8x8_t __ret; __ret = (poly8x8_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) poly64x1_t vreinterpret_p64_bf16(bfloat16x4_t __p0) { poly64x1_t __ret; __ret = (poly64x1_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) poly16x4_t vreinterpret_p16_bf16(bfloat16x4_t __p0) { poly16x4_t __ret; __ret = (poly16x4_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) poly8x16_t vreinterpretq_p8_bf16(bfloat16x8_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) poly128_t vreinterpretq_p128_bf16(bfloat16x8_t __p0) { poly128_t __ret; __ret = (poly128_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) poly64x2_t vreinterpretq_p64_bf16(bfloat16x8_t __p0) { poly64x2_t __ret; __ret = (poly64x2_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) poly16x8_t vreinterpretq_p16_bf16(bfloat16x8_t __p0) { poly16x8_t __ret; __ret = (poly16x8_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) uint8x16_t vreinterpretq_u8_bf16(bfloat16x8_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) uint32x4_t vreinterpretq_u32_bf16(bfloat16x8_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) uint64x2_t vreinterpretq_u64_bf16(bfloat16x8_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) uint16x8_t vreinterpretq_u16_bf16(bfloat16x8_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) int8x16_t vreinterpretq_s8_bf16(bfloat16x8_t __p0) { int8x16_t __ret; __ret = (int8x16_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) float64x2_t vreinterpretq_f64_bf16(bfloat16x8_t __p0) { float64x2_t __ret; __ret = (float64x2_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) float32x4_t vreinterpretq_f32_bf16(bfloat16x8_t __p0) { float32x4_t __ret; __ret = (float32x4_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) float16x8_t vreinterpretq_f16_bf16(bfloat16x8_t __p0) { float16x8_t __ret; __ret = (float16x8_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) int32x4_t vreinterpretq_s32_bf16(bfloat16x8_t __p0) { int32x4_t __ret; __ret = (int32x4_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) int64x2_t vreinterpretq_s64_bf16(bfloat16x8_t __p0) { int64x2_t __ret; __ret = (int64x2_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) int16x8_t vreinterpretq_s16_bf16(bfloat16x8_t __p0) { int16x8_t __ret; __ret = (int16x8_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) uint8x8_t vreinterpret_u8_bf16(bfloat16x4_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) uint32x2_t vreinterpret_u32_bf16(bfloat16x4_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) uint64x1_t vreinterpret_u64_bf16(bfloat16x4_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) uint16x4_t vreinterpret_u16_bf16(bfloat16x4_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) int8x8_t vreinterpret_s8_bf16(bfloat16x4_t __p0) { int8x8_t __ret; __ret = (int8x8_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) float64x1_t vreinterpret_f64_bf16(bfloat16x4_t __p0) { float64x1_t __ret; __ret = (float64x1_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) float32x2_t vreinterpret_f32_bf16(bfloat16x4_t __p0) { float32x2_t __ret; __ret = (float32x2_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) float16x4_t vreinterpret_f16_bf16(bfloat16x4_t __p0) { float16x4_t __ret; __ret = (float16x4_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) int32x2_t vreinterpret_s32_bf16(bfloat16x4_t __p0) { int32x2_t __ret; __ret = (int32x2_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) int64x1_t vreinterpret_s64_bf16(bfloat16x4_t __p0) { int64x1_t __ret; __ret = (int64x1_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) int16x4_t vreinterpret_s16_bf16(bfloat16x4_t __p0) { int16x4_t __ret; __ret = (int16x4_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) bfloat16x8_t vreinterpretq_bf16_p8(poly8x16_t __p0) { bfloat16x8_t __ret; __ret = (bfloat16x8_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) bfloat16x8_t vreinterpretq_bf16_p128(poly128_t __p0) { bfloat16x8_t __ret; __ret = (bfloat16x8_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) bfloat16x8_t vreinterpretq_bf16_p64(poly64x2_t __p0) { bfloat16x8_t __ret; __ret = (bfloat16x8_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) bfloat16x8_t vreinterpretq_bf16_p16(poly16x8_t __p0) { bfloat16x8_t __ret; __ret = (bfloat16x8_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) bfloat16x8_t vreinterpretq_bf16_u8(uint8x16_t __p0) { bfloat16x8_t __ret; __ret = (bfloat16x8_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) bfloat16x8_t vreinterpretq_bf16_u32(uint32x4_t __p0) { bfloat16x8_t __ret; __ret = (bfloat16x8_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) bfloat16x8_t vreinterpretq_bf16_u64(uint64x2_t __p0) { bfloat16x8_t __ret; __ret = (bfloat16x8_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) bfloat16x8_t vreinterpretq_bf16_u16(uint16x8_t __p0) { bfloat16x8_t __ret; __ret = (bfloat16x8_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) bfloat16x8_t vreinterpretq_bf16_s8(int8x16_t __p0) { bfloat16x8_t __ret; __ret = (bfloat16x8_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) bfloat16x8_t vreinterpretq_bf16_f64(float64x2_t __p0) { bfloat16x8_t __ret; __ret = (bfloat16x8_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) bfloat16x8_t vreinterpretq_bf16_f32(float32x4_t __p0) { bfloat16x8_t __ret; __ret = (bfloat16x8_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) bfloat16x8_t vreinterpretq_bf16_f16(float16x8_t __p0) { bfloat16x8_t __ret; __ret = (bfloat16x8_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) bfloat16x8_t vreinterpretq_bf16_s32(int32x4_t __p0) { bfloat16x8_t __ret; __ret = (bfloat16x8_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) bfloat16x8_t vreinterpretq_bf16_s64(int64x2_t __p0) { bfloat16x8_t __ret; __ret = (bfloat16x8_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) bfloat16x8_t vreinterpretq_bf16_s16(int16x8_t __p0) { bfloat16x8_t __ret; __ret = (bfloat16x8_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) bfloat16x4_t vreinterpret_bf16_p8(poly8x8_t __p0) { bfloat16x4_t __ret; __ret = (bfloat16x4_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) bfloat16x4_t vreinterpret_bf16_p64(poly64x1_t __p0) { bfloat16x4_t __ret; __ret = (bfloat16x4_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) bfloat16x4_t vreinterpret_bf16_p16(poly16x4_t __p0) { bfloat16x4_t __ret; __ret = (bfloat16x4_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) bfloat16x4_t vreinterpret_bf16_u8(uint8x8_t __p0) { bfloat16x4_t __ret; __ret = (bfloat16x4_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) bfloat16x4_t vreinterpret_bf16_u32(uint32x2_t __p0) { bfloat16x4_t __ret; __ret = (bfloat16x4_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) bfloat16x4_t vreinterpret_bf16_u64(uint64x1_t __p0) { bfloat16x4_t __ret; __ret = (bfloat16x4_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) bfloat16x4_t vreinterpret_bf16_u16(uint16x4_t __p0) { bfloat16x4_t __ret; __ret = (bfloat16x4_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) bfloat16x4_t vreinterpret_bf16_s8(int8x8_t __p0) { bfloat16x4_t __ret; __ret = (bfloat16x4_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) bfloat16x4_t vreinterpret_bf16_f64(float64x1_t __p0) { bfloat16x4_t __ret; __ret = (bfloat16x4_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) bfloat16x4_t vreinterpret_bf16_f32(float32x2_t __p0) { bfloat16x4_t __ret; __ret = (bfloat16x4_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) bfloat16x4_t vreinterpret_bf16_f16(float16x4_t __p0) { bfloat16x4_t __ret; __ret = (bfloat16x4_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) bfloat16x4_t vreinterpret_bf16_s32(int32x2_t __p0) { bfloat16x4_t __ret; __ret = (bfloat16x4_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) bfloat16x4_t vreinterpret_bf16_s64(int64x1_t __p0) { bfloat16x4_t __ret; __ret = (bfloat16x4_t)(__p0); return __ret; } __ai __attribute__((target("bf16"))) bfloat16x4_t vreinterpret_bf16_s16(int16x4_t __p0) { bfloat16x4_t __ret; __ret = (bfloat16x4_t)(__p0); return __ret; } #ifdef __LITTLE_ENDIAN__ #define vdotq_laneq_u32(__p0_747, __p1_747, __p2_747, __p3_747) __extension__ ({ \ uint32x4_t __ret_747; \ uint32x4_t __s0_747 = __p0_747; \ uint8x16_t __s1_747 = __p1_747; \ uint8x16_t __s2_747 = __p2_747; \ uint8x16_t __reint_747 = __s2_747; \ uint32x4_t __reint1_747 = splatq_laneq_u32(*(uint32x4_t *) &__reint_747, __p3_747); \ __ret_747 = vdotq_u32(__s0_747, __s1_747, *(uint8x16_t *) &__reint1_747); \ __ret_747; \ }) #else #define vdotq_laneq_u32(__p0_748, __p1_748, __p2_748, __p3_748) __extension__ ({ \ uint32x4_t __ret_748; \ uint32x4_t __s0_748 = __p0_748; \ uint8x16_t __s1_748 = __p1_748; \ uint8x16_t __s2_748 = __p2_748; \ uint32x4_t __rev0_748; __rev0_748 = __builtin_shufflevector(__s0_748, __s0_748, 3, 2, 1, 0); \ uint8x16_t __rev1_748; __rev1_748 = __builtin_shufflevector(__s1_748, __s1_748, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ uint8x16_t __rev2_748; __rev2_748 = __builtin_shufflevector(__s2_748, __s2_748, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ uint8x16_t __reint_748 = __rev2_748; \ uint32x4_t __reint1_748 = __noswap_splatq_laneq_u32(*(uint32x4_t *) &__reint_748, __p3_748); \ __ret_748 = __noswap_vdotq_u32(__rev0_748, __rev1_748, *(uint8x16_t *) &__reint1_748); \ __ret_748 = __builtin_shufflevector(__ret_748, __ret_748, 3, 2, 1, 0); \ __ret_748; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vdotq_laneq_s32(__p0_749, __p1_749, __p2_749, __p3_749) __extension__ ({ \ int32x4_t __ret_749; \ int32x4_t __s0_749 = __p0_749; \ int8x16_t __s1_749 = __p1_749; \ int8x16_t __s2_749 = __p2_749; \ int8x16_t __reint_749 = __s2_749; \ int32x4_t __reint1_749 = splatq_laneq_s32(*(int32x4_t *) &__reint_749, __p3_749); \ __ret_749 = vdotq_s32(__s0_749, __s1_749, *(int8x16_t *) &__reint1_749); \ __ret_749; \ }) #else #define vdotq_laneq_s32(__p0_750, __p1_750, __p2_750, __p3_750) __extension__ ({ \ int32x4_t __ret_750; \ int32x4_t __s0_750 = __p0_750; \ int8x16_t __s1_750 = __p1_750; \ int8x16_t __s2_750 = __p2_750; \ int32x4_t __rev0_750; __rev0_750 = __builtin_shufflevector(__s0_750, __s0_750, 3, 2, 1, 0); \ int8x16_t __rev1_750; __rev1_750 = __builtin_shufflevector(__s1_750, __s1_750, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ int8x16_t __rev2_750; __rev2_750 = __builtin_shufflevector(__s2_750, __s2_750, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ int8x16_t __reint_750 = __rev2_750; \ int32x4_t __reint1_750 = __noswap_splatq_laneq_s32(*(int32x4_t *) &__reint_750, __p3_750); \ __ret_750 = __noswap_vdotq_s32(__rev0_750, __rev1_750, *(int8x16_t *) &__reint1_750); \ __ret_750 = __builtin_shufflevector(__ret_750, __ret_750, 3, 2, 1, 0); \ __ret_750; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vdot_laneq_u32(__p0_751, __p1_751, __p2_751, __p3_751) __extension__ ({ \ uint32x2_t __ret_751; \ uint32x2_t __s0_751 = __p0_751; \ uint8x8_t __s1_751 = __p1_751; \ uint8x16_t __s2_751 = __p2_751; \ uint8x16_t __reint_751 = __s2_751; \ uint32x2_t __reint1_751 = splat_laneq_u32(*(uint32x4_t *) &__reint_751, __p3_751); \ __ret_751 = vdot_u32(__s0_751, __s1_751, *(uint8x8_t *) &__reint1_751); \ __ret_751; \ }) #else #define vdot_laneq_u32(__p0_752, __p1_752, __p2_752, __p3_752) __extension__ ({ \ uint32x2_t __ret_752; \ uint32x2_t __s0_752 = __p0_752; \ uint8x8_t __s1_752 = __p1_752; \ uint8x16_t __s2_752 = __p2_752; \ uint32x2_t __rev0_752; __rev0_752 = __builtin_shufflevector(__s0_752, __s0_752, 1, 0); \ uint8x8_t __rev1_752; __rev1_752 = __builtin_shufflevector(__s1_752, __s1_752, 7, 6, 5, 4, 3, 2, 1, 0); \ uint8x16_t __rev2_752; __rev2_752 = __builtin_shufflevector(__s2_752, __s2_752, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ uint8x16_t __reint_752 = __rev2_752; \ uint32x2_t __reint1_752 = __noswap_splat_laneq_u32(*(uint32x4_t *) &__reint_752, __p3_752); \ __ret_752 = __noswap_vdot_u32(__rev0_752, __rev1_752, *(uint8x8_t *) &__reint1_752); \ __ret_752 = __builtin_shufflevector(__ret_752, __ret_752, 1, 0); \ __ret_752; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vdot_laneq_s32(__p0_753, __p1_753, __p2_753, __p3_753) __extension__ ({ \ int32x2_t __ret_753; \ int32x2_t __s0_753 = __p0_753; \ int8x8_t __s1_753 = __p1_753; \ int8x16_t __s2_753 = __p2_753; \ int8x16_t __reint_753 = __s2_753; \ int32x2_t __reint1_753 = splat_laneq_s32(*(int32x4_t *) &__reint_753, __p3_753); \ __ret_753 = vdot_s32(__s0_753, __s1_753, *(int8x8_t *) &__reint1_753); \ __ret_753; \ }) #else #define vdot_laneq_s32(__p0_754, __p1_754, __p2_754, __p3_754) __extension__ ({ \ int32x2_t __ret_754; \ int32x2_t __s0_754 = __p0_754; \ int8x8_t __s1_754 = __p1_754; \ int8x16_t __s2_754 = __p2_754; \ int32x2_t __rev0_754; __rev0_754 = __builtin_shufflevector(__s0_754, __s0_754, 1, 0); \ int8x8_t __rev1_754; __rev1_754 = __builtin_shufflevector(__s1_754, __s1_754, 7, 6, 5, 4, 3, 2, 1, 0); \ int8x16_t __rev2_754; __rev2_754 = __builtin_shufflevector(__s2_754, __s2_754, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ int8x16_t __reint_754 = __rev2_754; \ int32x2_t __reint1_754 = __noswap_splat_laneq_s32(*(int32x4_t *) &__reint_754, __p3_754); \ __ret_754 = __noswap_vdot_s32(__rev0_754, __rev1_754, *(int8x8_t *) &__reint1_754); \ __ret_754 = __builtin_shufflevector(__ret_754, __ret_754, 1, 0); \ __ret_754; \ }) #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fp16fml"))) float32x4_t vfmlalq_high_f16(float32x4_t __p0, float16x8_t __p1, float16x8_t __p2) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vfmlalq_high_f16((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); return __ret; } #else __ai __attribute__((target("fp16fml"))) float32x4_t vfmlalq_high_f16(float32x4_t __p0, float16x8_t __p1, float16x8_t __p2) { float32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (float32x4_t) __builtin_neon_vfmlalq_high_f16((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 41); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai __attribute__((target("fp16fml"))) float32x4_t __noswap_vfmlalq_high_f16(float32x4_t __p0, float16x8_t __p1, float16x8_t __p2) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vfmlalq_high_f16((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fp16fml"))) float32x2_t vfmlal_high_f16(float32x2_t __p0, float16x4_t __p1, float16x4_t __p2) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vfmlal_high_f16((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 9); return __ret; } #else __ai __attribute__((target("fp16fml"))) float32x2_t vfmlal_high_f16(float32x2_t __p0, float16x4_t __p1, float16x4_t __p2) { float32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); float16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = (float32x2_t) __builtin_neon_vfmlal_high_f16((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 9); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } __ai __attribute__((target("fp16fml"))) float32x2_t __noswap_vfmlal_high_f16(float32x2_t __p0, float16x4_t __p1, float16x4_t __p2) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vfmlal_high_f16((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 9); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fp16fml"))) float32x4_t vfmlalq_low_f16(float32x4_t __p0, float16x8_t __p1, float16x8_t __p2) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vfmlalq_low_f16((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); return __ret; } #else __ai __attribute__((target("fp16fml"))) float32x4_t vfmlalq_low_f16(float32x4_t __p0, float16x8_t __p1, float16x8_t __p2) { float32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (float32x4_t) __builtin_neon_vfmlalq_low_f16((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 41); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai __attribute__((target("fp16fml"))) float32x4_t __noswap_vfmlalq_low_f16(float32x4_t __p0, float16x8_t __p1, float16x8_t __p2) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vfmlalq_low_f16((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fp16fml"))) float32x2_t vfmlal_low_f16(float32x2_t __p0, float16x4_t __p1, float16x4_t __p2) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vfmlal_low_f16((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 9); return __ret; } #else __ai __attribute__((target("fp16fml"))) float32x2_t vfmlal_low_f16(float32x2_t __p0, float16x4_t __p1, float16x4_t __p2) { float32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); float16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = (float32x2_t) __builtin_neon_vfmlal_low_f16((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 9); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } __ai __attribute__((target("fp16fml"))) float32x2_t __noswap_vfmlal_low_f16(float32x2_t __p0, float16x4_t __p1, float16x4_t __p2) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vfmlal_low_f16((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 9); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fp16fml"))) float32x4_t vfmlslq_high_f16(float32x4_t __p0, float16x8_t __p1, float16x8_t __p2) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vfmlslq_high_f16((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); return __ret; } #else __ai __attribute__((target("fp16fml"))) float32x4_t vfmlslq_high_f16(float32x4_t __p0, float16x8_t __p1, float16x8_t __p2) { float32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (float32x4_t) __builtin_neon_vfmlslq_high_f16((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 41); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai __attribute__((target("fp16fml"))) float32x4_t __noswap_vfmlslq_high_f16(float32x4_t __p0, float16x8_t __p1, float16x8_t __p2) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vfmlslq_high_f16((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fp16fml"))) float32x2_t vfmlsl_high_f16(float32x2_t __p0, float16x4_t __p1, float16x4_t __p2) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vfmlsl_high_f16((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 9); return __ret; } #else __ai __attribute__((target("fp16fml"))) float32x2_t vfmlsl_high_f16(float32x2_t __p0, float16x4_t __p1, float16x4_t __p2) { float32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); float16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = (float32x2_t) __builtin_neon_vfmlsl_high_f16((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 9); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } __ai __attribute__((target("fp16fml"))) float32x2_t __noswap_vfmlsl_high_f16(float32x2_t __p0, float16x4_t __p1, float16x4_t __p2) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vfmlsl_high_f16((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 9); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fp16fml"))) float32x4_t vfmlslq_low_f16(float32x4_t __p0, float16x8_t __p1, float16x8_t __p2) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vfmlslq_low_f16((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); return __ret; } #else __ai __attribute__((target("fp16fml"))) float32x4_t vfmlslq_low_f16(float32x4_t __p0, float16x8_t __p1, float16x8_t __p2) { float32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (float32x4_t) __builtin_neon_vfmlslq_low_f16((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 41); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai __attribute__((target("fp16fml"))) float32x4_t __noswap_vfmlslq_low_f16(float32x4_t __p0, float16x8_t __p1, float16x8_t __p2) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vfmlslq_low_f16((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fp16fml"))) float32x2_t vfmlsl_low_f16(float32x2_t __p0, float16x4_t __p1, float16x4_t __p2) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vfmlsl_low_f16((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 9); return __ret; } #else __ai __attribute__((target("fp16fml"))) float32x2_t vfmlsl_low_f16(float32x2_t __p0, float16x4_t __p1, float16x4_t __p2) { float32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); float16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = (float32x2_t) __builtin_neon_vfmlsl_low_f16((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 9); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } __ai __attribute__((target("fp16fml"))) float32x2_t __noswap_vfmlsl_low_f16(float32x2_t __p0, float16x4_t __p1, float16x4_t __p2) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vfmlsl_low_f16((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 9); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x8_t vdivq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; __ret = __p0 / __p1; return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x8_t vdivq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 / __rev1; __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x4_t vdiv_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; __ret = __p0 / __p1; return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x4_t vdiv_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __rev0 / __rev1; __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ #define vduph_lane_f16(__p0, __p1) __extension__ ({ \ float16_t __ret; \ float16x4_t __s0 = __p0; \ __ret = (float16_t) __builtin_neon_vduph_lane_f16((float16x4_t)__s0, __p1); \ __ret; \ }) #else #define vduph_lane_f16(__p0, __p1) __extension__ ({ \ float16_t __ret; \ float16x4_t __s0 = __p0; \ float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = (float16_t) __builtin_neon_vduph_lane_f16((float16x4_t)__rev0, __p1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vduph_laneq_f16(__p0, __p1) __extension__ ({ \ float16_t __ret; \ float16x8_t __s0 = __p0; \ __ret = (float16_t) __builtin_neon_vduph_laneq_f16((float16x8_t)__s0, __p1); \ __ret; \ }) #else #define vduph_laneq_f16(__p0, __p1) __extension__ ({ \ float16_t __ret; \ float16x8_t __s0 = __p0; \ float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (float16_t) __builtin_neon_vduph_laneq_f16((float16x8_t)__rev0, __p1); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vfmah_lane_f16(__p0, __p1, __p2, __p3) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ float16x4_t __s2 = __p2; \ __ret = (float16_t) __builtin_neon_vfmah_lane_f16(__s0, __s1, (float16x4_t)__s2, __p3); \ __ret; \ }) #else #define vfmah_lane_f16(__p0, __p1, __p2, __p3) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ float16x4_t __s2 = __p2; \ float16x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ __ret = (float16_t) __builtin_neon_vfmah_lane_f16(__s0, __s1, (float16x4_t)__rev2, __p3); \ __ret; \ }) #define __noswap_vfmah_lane_f16(__p0, __p1, __p2, __p3) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ float16x4_t __s2 = __p2; \ __ret = (float16_t) __builtin_neon_vfmah_lane_f16(__s0, __s1, (float16x4_t)__s2, __p3); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vfmaq_lane_f16(__p0, __p1, __p2, __p3) __extension__ ({ \ float16x8_t __ret; \ float16x8_t __s0 = __p0; \ float16x8_t __s1 = __p1; \ float16x4_t __s2 = __p2; \ __ret = (float16x8_t) __builtin_neon_vfmaq_lane_f16((int8x16_t)__s0, (int8x16_t)__s1, (int8x8_t)__s2, __p3, 40); \ __ret; \ }) #else #define vfmaq_lane_f16(__p0, __p1, __p2, __p3) __extension__ ({ \ float16x8_t __ret; \ float16x8_t __s0 = __p0; \ float16x8_t __s1 = __p1; \ float16x4_t __s2 = __p2; \ float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ float16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ float16x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ __ret = (float16x8_t) __builtin_neon_vfmaq_lane_f16((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x8_t)__rev2, __p3, 40); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_vfmaq_lane_f16(__p0, __p1, __p2, __p3) __extension__ ({ \ float16x8_t __ret; \ float16x8_t __s0 = __p0; \ float16x8_t __s1 = __p1; \ float16x4_t __s2 = __p2; \ __ret = (float16x8_t) __builtin_neon_vfmaq_lane_f16((int8x16_t)__s0, (int8x16_t)__s1, (int8x8_t)__s2, __p3, 40); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vfma_lane_f16(__p0, __p1, __p2, __p3) __extension__ ({ \ float16x4_t __ret; \ float16x4_t __s0 = __p0; \ float16x4_t __s1 = __p1; \ float16x4_t __s2 = __p2; \ __ret = (float16x4_t) __builtin_neon_vfma_lane_f16((int8x8_t)__s0, (int8x8_t)__s1, (int8x8_t)__s2, __p3, 8); \ __ret; \ }) #else #define vfma_lane_f16(__p0, __p1, __p2, __p3) __extension__ ({ \ float16x4_t __ret; \ float16x4_t __s0 = __p0; \ float16x4_t __s1 = __p1; \ float16x4_t __s2 = __p2; \ float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ float16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ float16x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ __ret = (float16x4_t) __builtin_neon_vfma_lane_f16((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, __p3, 8); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_vfma_lane_f16(__p0, __p1, __p2, __p3) __extension__ ({ \ float16x4_t __ret; \ float16x4_t __s0 = __p0; \ float16x4_t __s1 = __p1; \ float16x4_t __s2 = __p2; \ __ret = (float16x4_t) __builtin_neon_vfma_lane_f16((int8x8_t)__s0, (int8x8_t)__s1, (int8x8_t)__s2, __p3, 8); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vfmah_laneq_f16(__p0, __p1, __p2, __p3) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ float16x8_t __s2 = __p2; \ __ret = (float16_t) __builtin_neon_vfmah_laneq_f16(__s0, __s1, (float16x8_t)__s2, __p3); \ __ret; \ }) #else #define vfmah_laneq_f16(__p0, __p1, __p2, __p3) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ float16x8_t __s2 = __p2; \ float16x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (float16_t) __builtin_neon_vfmah_laneq_f16(__s0, __s1, (float16x8_t)__rev2, __p3); \ __ret; \ }) #define __noswap_vfmah_laneq_f16(__p0, __p1, __p2, __p3) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ float16x8_t __s2 = __p2; \ __ret = (float16_t) __builtin_neon_vfmah_laneq_f16(__s0, __s1, (float16x8_t)__s2, __p3); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vfmaq_laneq_f16(__p0, __p1, __p2, __p3) __extension__ ({ \ float16x8_t __ret; \ float16x8_t __s0 = __p0; \ float16x8_t __s1 = __p1; \ float16x8_t __s2 = __p2; \ __ret = (float16x8_t) __builtin_neon_vfmaq_laneq_f16((int8x16_t)__s0, (int8x16_t)__s1, (int8x16_t)__s2, __p3, 40); \ __ret; \ }) #else #define vfmaq_laneq_f16(__p0, __p1, __p2, __p3) __extension__ ({ \ float16x8_t __ret; \ float16x8_t __s0 = __p0; \ float16x8_t __s1 = __p1; \ float16x8_t __s2 = __p2; \ float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ float16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ float16x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (float16x8_t) __builtin_neon_vfmaq_laneq_f16((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, __p3, 40); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_vfmaq_laneq_f16(__p0, __p1, __p2, __p3) __extension__ ({ \ float16x8_t __ret; \ float16x8_t __s0 = __p0; \ float16x8_t __s1 = __p1; \ float16x8_t __s2 = __p2; \ __ret = (float16x8_t) __builtin_neon_vfmaq_laneq_f16((int8x16_t)__s0, (int8x16_t)__s1, (int8x16_t)__s2, __p3, 40); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vfma_laneq_f16(__p0, __p1, __p2, __p3) __extension__ ({ \ float16x4_t __ret; \ float16x4_t __s0 = __p0; \ float16x4_t __s1 = __p1; \ float16x8_t __s2 = __p2; \ __ret = (float16x4_t) __builtin_neon_vfma_laneq_f16((int8x8_t)__s0, (int8x8_t)__s1, (int8x16_t)__s2, __p3, 8); \ __ret; \ }) #else #define vfma_laneq_f16(__p0, __p1, __p2, __p3) __extension__ ({ \ float16x4_t __ret; \ float16x4_t __s0 = __p0; \ float16x4_t __s1 = __p1; \ float16x8_t __s2 = __p2; \ float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ float16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ float16x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (float16x4_t) __builtin_neon_vfma_laneq_f16((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x16_t)__rev2, __p3, 8); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #define __noswap_vfma_laneq_f16(__p0, __p1, __p2, __p3) __extension__ ({ \ float16x4_t __ret; \ float16x4_t __s0 = __p0; \ float16x4_t __s1 = __p1; \ float16x8_t __s2 = __p2; \ __ret = (float16x4_t) __builtin_neon_vfma_laneq_f16((int8x8_t)__s0, (int8x8_t)__s1, (int8x16_t)__s2, __p3, 8); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vfmaq_n_f16(__p0, __p1, __p2) __extension__ ({ \ float16x8_t __ret; \ float16x8_t __s0 = __p0; \ float16x8_t __s1 = __p1; \ float16_t __s2 = __p2; \ __ret = vfmaq_f16(__s0, __s1, (float16x8_t) {__s2, __s2, __s2, __s2, __s2, __s2, __s2, __s2}); \ __ret; \ }) #else #define vfmaq_n_f16(__p0, __p1, __p2) __extension__ ({ \ float16x8_t __ret; \ float16x8_t __s0 = __p0; \ float16x8_t __s1 = __p1; \ float16_t __s2 = __p2; \ float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ float16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = __noswap_vfmaq_f16(__rev0, __rev1, (float16x8_t) {__s2, __s2, __s2, __s2, __s2, __s2, __s2, __s2}); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vfma_n_f16(__p0, __p1, __p2) __extension__ ({ \ float16x4_t __ret; \ float16x4_t __s0 = __p0; \ float16x4_t __s1 = __p1; \ float16_t __s2 = __p2; \ __ret = vfma_f16(__s0, __s1, (float16x4_t) {__s2, __s2, __s2, __s2}); \ __ret; \ }) #else #define vfma_n_f16(__p0, __p1, __p2) __extension__ ({ \ float16x4_t __ret; \ float16x4_t __s0 = __p0; \ float16x4_t __s1 = __p1; \ float16_t __s2 = __p2; \ float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ float16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ __ret = __noswap_vfma_f16(__rev0, __rev1, (float16x4_t) {__s2, __s2, __s2, __s2}); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vfmsh_lane_f16(__p0_755, __p1_755, __p2_755, __p3_755) __extension__ ({ \ float16_t __ret_755; \ float16_t __s0_755 = __p0_755; \ float16_t __s1_755 = __p1_755; \ float16x4_t __s2_755 = __p2_755; \ __ret_755 = vfmah_lane_f16(__s0_755, -__s1_755, __s2_755, __p3_755); \ __ret_755; \ }) #else #define vfmsh_lane_f16(__p0_756, __p1_756, __p2_756, __p3_756) __extension__ ({ \ float16_t __ret_756; \ float16_t __s0_756 = __p0_756; \ float16_t __s1_756 = __p1_756; \ float16x4_t __s2_756 = __p2_756; \ float16x4_t __rev2_756; __rev2_756 = __builtin_shufflevector(__s2_756, __s2_756, 3, 2, 1, 0); \ __ret_756 = __noswap_vfmah_lane_f16(__s0_756, -__s1_756, __rev2_756, __p3_756); \ __ret_756; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vfmsq_lane_f16(__p0_757, __p1_757, __p2_757, __p3_757) __extension__ ({ \ float16x8_t __ret_757; \ float16x8_t __s0_757 = __p0_757; \ float16x8_t __s1_757 = __p1_757; \ float16x4_t __s2_757 = __p2_757; \ __ret_757 = vfmaq_lane_f16(__s0_757, -__s1_757, __s2_757, __p3_757); \ __ret_757; \ }) #else #define vfmsq_lane_f16(__p0_758, __p1_758, __p2_758, __p3_758) __extension__ ({ \ float16x8_t __ret_758; \ float16x8_t __s0_758 = __p0_758; \ float16x8_t __s1_758 = __p1_758; \ float16x4_t __s2_758 = __p2_758; \ float16x8_t __rev0_758; __rev0_758 = __builtin_shufflevector(__s0_758, __s0_758, 7, 6, 5, 4, 3, 2, 1, 0); \ float16x8_t __rev1_758; __rev1_758 = __builtin_shufflevector(__s1_758, __s1_758, 7, 6, 5, 4, 3, 2, 1, 0); \ float16x4_t __rev2_758; __rev2_758 = __builtin_shufflevector(__s2_758, __s2_758, 3, 2, 1, 0); \ __ret_758 = __noswap_vfmaq_lane_f16(__rev0_758, -__rev1_758, __rev2_758, __p3_758); \ __ret_758 = __builtin_shufflevector(__ret_758, __ret_758, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_758; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vfms_lane_f16(__p0_759, __p1_759, __p2_759, __p3_759) __extension__ ({ \ float16x4_t __ret_759; \ float16x4_t __s0_759 = __p0_759; \ float16x4_t __s1_759 = __p1_759; \ float16x4_t __s2_759 = __p2_759; \ __ret_759 = vfma_lane_f16(__s0_759, -__s1_759, __s2_759, __p3_759); \ __ret_759; \ }) #else #define vfms_lane_f16(__p0_760, __p1_760, __p2_760, __p3_760) __extension__ ({ \ float16x4_t __ret_760; \ float16x4_t __s0_760 = __p0_760; \ float16x4_t __s1_760 = __p1_760; \ float16x4_t __s2_760 = __p2_760; \ float16x4_t __rev0_760; __rev0_760 = __builtin_shufflevector(__s0_760, __s0_760, 3, 2, 1, 0); \ float16x4_t __rev1_760; __rev1_760 = __builtin_shufflevector(__s1_760, __s1_760, 3, 2, 1, 0); \ float16x4_t __rev2_760; __rev2_760 = __builtin_shufflevector(__s2_760, __s2_760, 3, 2, 1, 0); \ __ret_760 = __noswap_vfma_lane_f16(__rev0_760, -__rev1_760, __rev2_760, __p3_760); \ __ret_760 = __builtin_shufflevector(__ret_760, __ret_760, 3, 2, 1, 0); \ __ret_760; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vfmsh_laneq_f16(__p0_761, __p1_761, __p2_761, __p3_761) __extension__ ({ \ float16_t __ret_761; \ float16_t __s0_761 = __p0_761; \ float16_t __s1_761 = __p1_761; \ float16x8_t __s2_761 = __p2_761; \ __ret_761 = vfmah_laneq_f16(__s0_761, -__s1_761, __s2_761, __p3_761); \ __ret_761; \ }) #else #define vfmsh_laneq_f16(__p0_762, __p1_762, __p2_762, __p3_762) __extension__ ({ \ float16_t __ret_762; \ float16_t __s0_762 = __p0_762; \ float16_t __s1_762 = __p1_762; \ float16x8_t __s2_762 = __p2_762; \ float16x8_t __rev2_762; __rev2_762 = __builtin_shufflevector(__s2_762, __s2_762, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_762 = __noswap_vfmah_laneq_f16(__s0_762, -__s1_762, __rev2_762, __p3_762); \ __ret_762; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vfmsq_laneq_f16(__p0_763, __p1_763, __p2_763, __p3_763) __extension__ ({ \ float16x8_t __ret_763; \ float16x8_t __s0_763 = __p0_763; \ float16x8_t __s1_763 = __p1_763; \ float16x8_t __s2_763 = __p2_763; \ __ret_763 = vfmaq_laneq_f16(__s0_763, -__s1_763, __s2_763, __p3_763); \ __ret_763; \ }) #else #define vfmsq_laneq_f16(__p0_764, __p1_764, __p2_764, __p3_764) __extension__ ({ \ float16x8_t __ret_764; \ float16x8_t __s0_764 = __p0_764; \ float16x8_t __s1_764 = __p1_764; \ float16x8_t __s2_764 = __p2_764; \ float16x8_t __rev0_764; __rev0_764 = __builtin_shufflevector(__s0_764, __s0_764, 7, 6, 5, 4, 3, 2, 1, 0); \ float16x8_t __rev1_764; __rev1_764 = __builtin_shufflevector(__s1_764, __s1_764, 7, 6, 5, 4, 3, 2, 1, 0); \ float16x8_t __rev2_764; __rev2_764 = __builtin_shufflevector(__s2_764, __s2_764, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_764 = __noswap_vfmaq_laneq_f16(__rev0_764, -__rev1_764, __rev2_764, __p3_764); \ __ret_764 = __builtin_shufflevector(__ret_764, __ret_764, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_764; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vfms_laneq_f16(__p0_765, __p1_765, __p2_765, __p3_765) __extension__ ({ \ float16x4_t __ret_765; \ float16x4_t __s0_765 = __p0_765; \ float16x4_t __s1_765 = __p1_765; \ float16x8_t __s2_765 = __p2_765; \ __ret_765 = vfma_laneq_f16(__s0_765, -__s1_765, __s2_765, __p3_765); \ __ret_765; \ }) #else #define vfms_laneq_f16(__p0_766, __p1_766, __p2_766, __p3_766) __extension__ ({ \ float16x4_t __ret_766; \ float16x4_t __s0_766 = __p0_766; \ float16x4_t __s1_766 = __p1_766; \ float16x8_t __s2_766 = __p2_766; \ float16x4_t __rev0_766; __rev0_766 = __builtin_shufflevector(__s0_766, __s0_766, 3, 2, 1, 0); \ float16x4_t __rev1_766; __rev1_766 = __builtin_shufflevector(__s1_766, __s1_766, 3, 2, 1, 0); \ float16x8_t __rev2_766; __rev2_766 = __builtin_shufflevector(__s2_766, __s2_766, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_766 = __noswap_vfma_laneq_f16(__rev0_766, -__rev1_766, __rev2_766, __p3_766); \ __ret_766 = __builtin_shufflevector(__ret_766, __ret_766, 3, 2, 1, 0); \ __ret_766; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vfmsq_n_f16(__p0, __p1, __p2) __extension__ ({ \ float16x8_t __ret; \ float16x8_t __s0 = __p0; \ float16x8_t __s1 = __p1; \ float16_t __s2 = __p2; \ __ret = vfmaq_f16(__s0, -__s1, (float16x8_t) {__s2, __s2, __s2, __s2, __s2, __s2, __s2, __s2}); \ __ret; \ }) #else #define vfmsq_n_f16(__p0, __p1, __p2) __extension__ ({ \ float16x8_t __ret; \ float16x8_t __s0 = __p0; \ float16x8_t __s1 = __p1; \ float16_t __s2 = __p2; \ float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ float16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = __noswap_vfmaq_f16(__rev0, -__rev1, (float16x8_t) {__s2, __s2, __s2, __s2, __s2, __s2, __s2, __s2}); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vfms_n_f16(__p0, __p1, __p2) __extension__ ({ \ float16x4_t __ret; \ float16x4_t __s0 = __p0; \ float16x4_t __s1 = __p1; \ float16_t __s2 = __p2; \ __ret = vfma_f16(__s0, -__s1, (float16x4_t) {__s2, __s2, __s2, __s2}); \ __ret; \ }) #else #define vfms_n_f16(__p0, __p1, __p2) __extension__ ({ \ float16x4_t __ret; \ float16x4_t __s0 = __p0; \ float16x4_t __s1 = __p1; \ float16_t __s2 = __p2; \ float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ float16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ __ret = __noswap_vfma_f16(__rev0, -__rev1, (float16x4_t) {__s2, __s2, __s2, __s2}); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmaxnmvq_f16(__p0) __extension__ ({ \ float16_t __ret; \ float16x8_t __s0 = __p0; \ __ret = (float16_t) __builtin_neon_vmaxnmvq_f16((int8x16_t)__s0); \ __ret; \ }) #else #define vmaxnmvq_f16(__p0) __extension__ ({ \ float16_t __ret; \ float16x8_t __s0 = __p0; \ float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (float16_t) __builtin_neon_vmaxnmvq_f16((int8x16_t)__rev0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmaxnmv_f16(__p0) __extension__ ({ \ float16_t __ret; \ float16x4_t __s0 = __p0; \ __ret = (float16_t) __builtin_neon_vmaxnmv_f16((int8x8_t)__s0); \ __ret; \ }) #else #define vmaxnmv_f16(__p0) __extension__ ({ \ float16_t __ret; \ float16x4_t __s0 = __p0; \ float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = (float16_t) __builtin_neon_vmaxnmv_f16((int8x8_t)__rev0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmaxvq_f16(__p0) __extension__ ({ \ float16_t __ret; \ float16x8_t __s0 = __p0; \ __ret = (float16_t) __builtin_neon_vmaxvq_f16((int8x16_t)__s0); \ __ret; \ }) #else #define vmaxvq_f16(__p0) __extension__ ({ \ float16_t __ret; \ float16x8_t __s0 = __p0; \ float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (float16_t) __builtin_neon_vmaxvq_f16((int8x16_t)__rev0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmaxv_f16(__p0) __extension__ ({ \ float16_t __ret; \ float16x4_t __s0 = __p0; \ __ret = (float16_t) __builtin_neon_vmaxv_f16((int8x8_t)__s0); \ __ret; \ }) #else #define vmaxv_f16(__p0) __extension__ ({ \ float16_t __ret; \ float16x4_t __s0 = __p0; \ float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = (float16_t) __builtin_neon_vmaxv_f16((int8x8_t)__rev0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vminnmvq_f16(__p0) __extension__ ({ \ float16_t __ret; \ float16x8_t __s0 = __p0; \ __ret = (float16_t) __builtin_neon_vminnmvq_f16((int8x16_t)__s0); \ __ret; \ }) #else #define vminnmvq_f16(__p0) __extension__ ({ \ float16_t __ret; \ float16x8_t __s0 = __p0; \ float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (float16_t) __builtin_neon_vminnmvq_f16((int8x16_t)__rev0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vminnmv_f16(__p0) __extension__ ({ \ float16_t __ret; \ float16x4_t __s0 = __p0; \ __ret = (float16_t) __builtin_neon_vminnmv_f16((int8x8_t)__s0); \ __ret; \ }) #else #define vminnmv_f16(__p0) __extension__ ({ \ float16_t __ret; \ float16x4_t __s0 = __p0; \ float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = (float16_t) __builtin_neon_vminnmv_f16((int8x8_t)__rev0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vminvq_f16(__p0) __extension__ ({ \ float16_t __ret; \ float16x8_t __s0 = __p0; \ __ret = (float16_t) __builtin_neon_vminvq_f16((int8x16_t)__s0); \ __ret; \ }) #else #define vminvq_f16(__p0) __extension__ ({ \ float16_t __ret; \ float16x8_t __s0 = __p0; \ float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (float16_t) __builtin_neon_vminvq_f16((int8x16_t)__rev0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vminv_f16(__p0) __extension__ ({ \ float16_t __ret; \ float16x4_t __s0 = __p0; \ __ret = (float16_t) __builtin_neon_vminv_f16((int8x8_t)__s0); \ __ret; \ }) #else #define vminv_f16(__p0) __extension__ ({ \ float16_t __ret; \ float16x4_t __s0 = __p0; \ float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = (float16_t) __builtin_neon_vminv_f16((int8x8_t)__rev0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmulq_laneq_f16(__p0_767, __p1_767, __p2_767) __extension__ ({ \ float16x8_t __ret_767; \ float16x8_t __s0_767 = __p0_767; \ float16x8_t __s1_767 = __p1_767; \ __ret_767 = __s0_767 * splatq_laneq_f16(__s1_767, __p2_767); \ __ret_767; \ }) #else #define vmulq_laneq_f16(__p0_768, __p1_768, __p2_768) __extension__ ({ \ float16x8_t __ret_768; \ float16x8_t __s0_768 = __p0_768; \ float16x8_t __s1_768 = __p1_768; \ float16x8_t __rev0_768; __rev0_768 = __builtin_shufflevector(__s0_768, __s0_768, 7, 6, 5, 4, 3, 2, 1, 0); \ float16x8_t __rev1_768; __rev1_768 = __builtin_shufflevector(__s1_768, __s1_768, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_768 = __rev0_768 * __noswap_splatq_laneq_f16(__rev1_768, __p2_768); \ __ret_768 = __builtin_shufflevector(__ret_768, __ret_768, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_768; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmul_laneq_f16(__p0_769, __p1_769, __p2_769) __extension__ ({ \ float16x4_t __ret_769; \ float16x4_t __s0_769 = __p0_769; \ float16x8_t __s1_769 = __p1_769; \ __ret_769 = __s0_769 * splat_laneq_f16(__s1_769, __p2_769); \ __ret_769; \ }) #else #define vmul_laneq_f16(__p0_770, __p1_770, __p2_770) __extension__ ({ \ float16x4_t __ret_770; \ float16x4_t __s0_770 = __p0_770; \ float16x8_t __s1_770 = __p1_770; \ float16x4_t __rev0_770; __rev0_770 = __builtin_shufflevector(__s0_770, __s0_770, 3, 2, 1, 0); \ float16x8_t __rev1_770; __rev1_770 = __builtin_shufflevector(__s1_770, __s1_770, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_770 = __rev0_770 * __noswap_splat_laneq_f16(__rev1_770, __p2_770); \ __ret_770 = __builtin_shufflevector(__ret_770, __ret_770, 3, 2, 1, 0); \ __ret_770; \ }) #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x8_t vmulxq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; __ret = (float16x8_t) __builtin_neon_vmulxq_f16((int8x16_t)__p0, (int8x16_t)__p1, 40); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x8_t vmulxq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (float16x8_t) __builtin_neon_vmulxq_f16((int8x16_t)__rev0, (int8x16_t)__rev1, 40); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } __ai __attribute__((target("fullfp16"))) float16x8_t __noswap_vmulxq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; __ret = (float16x8_t) __builtin_neon_vmulxq_f16((int8x16_t)__p0, (int8x16_t)__p1, 40); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x4_t vmulx_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; __ret = (float16x4_t) __builtin_neon_vmulx_f16((int8x8_t)__p0, (int8x8_t)__p1, 8); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x4_t vmulx_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (float16x4_t) __builtin_neon_vmulx_f16((int8x8_t)__rev0, (int8x8_t)__rev1, 8); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai __attribute__((target("fullfp16"))) float16x4_t __noswap_vmulx_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; __ret = (float16x4_t) __builtin_neon_vmulx_f16((int8x8_t)__p0, (int8x8_t)__p1, 8); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ #define vmulxh_lane_f16(__p0, __p1, __p2) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ float16x4_t __s1 = __p1; \ __ret = (float16_t) __builtin_neon_vmulxh_lane_f16(__s0, (float16x4_t)__s1, __p2); \ __ret; \ }) #else #define vmulxh_lane_f16(__p0, __p1, __p2) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ float16x4_t __s1 = __p1; \ float16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ __ret = (float16_t) __builtin_neon_vmulxh_lane_f16(__s0, (float16x4_t)__rev1, __p2); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmulxq_lane_f16(__p0_771, __p1_771, __p2_771) __extension__ ({ \ float16x8_t __ret_771; \ float16x8_t __s0_771 = __p0_771; \ float16x4_t __s1_771 = __p1_771; \ __ret_771 = vmulxq_f16(__s0_771, splatq_lane_f16(__s1_771, __p2_771)); \ __ret_771; \ }) #else #define vmulxq_lane_f16(__p0_772, __p1_772, __p2_772) __extension__ ({ \ float16x8_t __ret_772; \ float16x8_t __s0_772 = __p0_772; \ float16x4_t __s1_772 = __p1_772; \ float16x8_t __rev0_772; __rev0_772 = __builtin_shufflevector(__s0_772, __s0_772, 7, 6, 5, 4, 3, 2, 1, 0); \ float16x4_t __rev1_772; __rev1_772 = __builtin_shufflevector(__s1_772, __s1_772, 3, 2, 1, 0); \ __ret_772 = __noswap_vmulxq_f16(__rev0_772, __noswap_splatq_lane_f16(__rev1_772, __p2_772)); \ __ret_772 = __builtin_shufflevector(__ret_772, __ret_772, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_772; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmulx_lane_f16(__p0_773, __p1_773, __p2_773) __extension__ ({ \ float16x4_t __ret_773; \ float16x4_t __s0_773 = __p0_773; \ float16x4_t __s1_773 = __p1_773; \ __ret_773 = vmulx_f16(__s0_773, splat_lane_f16(__s1_773, __p2_773)); \ __ret_773; \ }) #else #define vmulx_lane_f16(__p0_774, __p1_774, __p2_774) __extension__ ({ \ float16x4_t __ret_774; \ float16x4_t __s0_774 = __p0_774; \ float16x4_t __s1_774 = __p1_774; \ float16x4_t __rev0_774; __rev0_774 = __builtin_shufflevector(__s0_774, __s0_774, 3, 2, 1, 0); \ float16x4_t __rev1_774; __rev1_774 = __builtin_shufflevector(__s1_774, __s1_774, 3, 2, 1, 0); \ __ret_774 = __noswap_vmulx_f16(__rev0_774, __noswap_splat_lane_f16(__rev1_774, __p2_774)); \ __ret_774 = __builtin_shufflevector(__ret_774, __ret_774, 3, 2, 1, 0); \ __ret_774; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmulxh_laneq_f16(__p0, __p1, __p2) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ float16x8_t __s1 = __p1; \ __ret = (float16_t) __builtin_neon_vmulxh_laneq_f16(__s0, (float16x8_t)__s1, __p2); \ __ret; \ }) #else #define vmulxh_laneq_f16(__p0, __p1, __p2) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ float16x8_t __s1 = __p1; \ float16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = (float16_t) __builtin_neon_vmulxh_laneq_f16(__s0, (float16x8_t)__rev1, __p2); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmulxq_laneq_f16(__p0_775, __p1_775, __p2_775) __extension__ ({ \ float16x8_t __ret_775; \ float16x8_t __s0_775 = __p0_775; \ float16x8_t __s1_775 = __p1_775; \ __ret_775 = vmulxq_f16(__s0_775, splatq_laneq_f16(__s1_775, __p2_775)); \ __ret_775; \ }) #else #define vmulxq_laneq_f16(__p0_776, __p1_776, __p2_776) __extension__ ({ \ float16x8_t __ret_776; \ float16x8_t __s0_776 = __p0_776; \ float16x8_t __s1_776 = __p1_776; \ float16x8_t __rev0_776; __rev0_776 = __builtin_shufflevector(__s0_776, __s0_776, 7, 6, 5, 4, 3, 2, 1, 0); \ float16x8_t __rev1_776; __rev1_776 = __builtin_shufflevector(__s1_776, __s1_776, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_776 = __noswap_vmulxq_f16(__rev0_776, __noswap_splatq_laneq_f16(__rev1_776, __p2_776)); \ __ret_776 = __builtin_shufflevector(__ret_776, __ret_776, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_776; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmulx_laneq_f16(__p0_777, __p1_777, __p2_777) __extension__ ({ \ float16x4_t __ret_777; \ float16x4_t __s0_777 = __p0_777; \ float16x8_t __s1_777 = __p1_777; \ __ret_777 = vmulx_f16(__s0_777, splat_laneq_f16(__s1_777, __p2_777)); \ __ret_777; \ }) #else #define vmulx_laneq_f16(__p0_778, __p1_778, __p2_778) __extension__ ({ \ float16x4_t __ret_778; \ float16x4_t __s0_778 = __p0_778; \ float16x8_t __s1_778 = __p1_778; \ float16x4_t __rev0_778; __rev0_778 = __builtin_shufflevector(__s0_778, __s0_778, 3, 2, 1, 0); \ float16x8_t __rev1_778; __rev1_778 = __builtin_shufflevector(__s1_778, __s1_778, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_778 = __noswap_vmulx_f16(__rev0_778, __noswap_splat_laneq_f16(__rev1_778, __p2_778)); \ __ret_778 = __builtin_shufflevector(__ret_778, __ret_778, 3, 2, 1, 0); \ __ret_778; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmulxq_n_f16(__p0, __p1) __extension__ ({ \ float16x8_t __ret; \ float16x8_t __s0 = __p0; \ float16_t __s1 = __p1; \ __ret = vmulxq_f16(__s0, (float16x8_t) {__s1, __s1, __s1, __s1, __s1, __s1, __s1, __s1}); \ __ret; \ }) #else #define vmulxq_n_f16(__p0, __p1) __extension__ ({ \ float16x8_t __ret; \ float16x8_t __s0 = __p0; \ float16_t __s1 = __p1; \ float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret = __noswap_vmulxq_f16(__rev0, (float16x8_t) {__s1, __s1, __s1, __s1, __s1, __s1, __s1, __s1}); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmulx_n_f16(__p0, __p1) __extension__ ({ \ float16x4_t __ret; \ float16x4_t __s0 = __p0; \ float16_t __s1 = __p1; \ __ret = vmulx_f16(__s0, (float16x4_t) {__s1, __s1, __s1, __s1}); \ __ret; \ }) #else #define vmulx_n_f16(__p0, __p1) __extension__ ({ \ float16x4_t __ret; \ float16x4_t __s0 = __p0; \ float16_t __s1 = __p1; \ float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ __ret = __noswap_vmulx_f16(__rev0, (float16x4_t) {__s1, __s1, __s1, __s1}); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x8_t vpaddq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; __ret = (float16x8_t) __builtin_neon_vpaddq_f16((int8x16_t)__p0, (int8x16_t)__p1, 40); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x8_t vpaddq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (float16x8_t) __builtin_neon_vpaddq_f16((int8x16_t)__rev0, (int8x16_t)__rev1, 40); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x8_t vpmaxq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; __ret = (float16x8_t) __builtin_neon_vpmaxq_f16((int8x16_t)__p0, (int8x16_t)__p1, 40); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x8_t vpmaxq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (float16x8_t) __builtin_neon_vpmaxq_f16((int8x16_t)__rev0, (int8x16_t)__rev1, 40); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x8_t vpmaxnmq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; __ret = (float16x8_t) __builtin_neon_vpmaxnmq_f16((int8x16_t)__p0, (int8x16_t)__p1, 40); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x8_t vpmaxnmq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (float16x8_t) __builtin_neon_vpmaxnmq_f16((int8x16_t)__rev0, (int8x16_t)__rev1, 40); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x4_t vpmaxnm_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; __ret = (float16x4_t) __builtin_neon_vpmaxnm_f16((int8x8_t)__p0, (int8x8_t)__p1, 8); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x4_t vpmaxnm_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (float16x4_t) __builtin_neon_vpmaxnm_f16((int8x8_t)__rev0, (int8x8_t)__rev1, 8); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x8_t vpminq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; __ret = (float16x8_t) __builtin_neon_vpminq_f16((int8x16_t)__p0, (int8x16_t)__p1, 40); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x8_t vpminq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (float16x8_t) __builtin_neon_vpminq_f16((int8x16_t)__rev0, (int8x16_t)__rev1, 40); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x8_t vpminnmq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; __ret = (float16x8_t) __builtin_neon_vpminnmq_f16((int8x16_t)__p0, (int8x16_t)__p1, 40); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x8_t vpminnmq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (float16x8_t) __builtin_neon_vpminnmq_f16((int8x16_t)__rev0, (int8x16_t)__rev1, 40); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x4_t vpminnm_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; __ret = (float16x4_t) __builtin_neon_vpminnm_f16((int8x8_t)__p0, (int8x8_t)__p1, 8); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x4_t vpminnm_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (float16x4_t) __builtin_neon_vpminnm_f16((int8x8_t)__rev0, (int8x8_t)__rev1, 8); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x8_t vrndiq_f16(float16x8_t __p0) { float16x8_t __ret; __ret = (float16x8_t) __builtin_neon_vrndiq_f16((int8x16_t)__p0, 40); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x8_t vrndiq_f16(float16x8_t __p0) { float16x8_t __ret; float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (float16x8_t) __builtin_neon_vrndiq_f16((int8x16_t)__rev0, 40); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x4_t vrndi_f16(float16x4_t __p0) { float16x4_t __ret; __ret = (float16x4_t) __builtin_neon_vrndi_f16((int8x8_t)__p0, 8); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x4_t vrndi_f16(float16x4_t __p0) { float16x4_t __ret; float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (float16x4_t) __builtin_neon_vrndi_f16((int8x8_t)__rev0, 8); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x8_t vsqrtq_f16(float16x8_t __p0) { float16x8_t __ret; __ret = (float16x8_t) __builtin_neon_vsqrtq_f16((int8x16_t)__p0, 40); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x8_t vsqrtq_f16(float16x8_t __p0) { float16x8_t __ret; float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (float16x8_t) __builtin_neon_vsqrtq_f16((int8x16_t)__rev0, 40); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x4_t vsqrt_f16(float16x4_t __p0) { float16x4_t __ret; __ret = (float16x4_t) __builtin_neon_vsqrt_f16((int8x8_t)__p0, 8); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x4_t vsqrt_f16(float16x4_t __p0) { float16x4_t __ret; float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (float16x4_t) __builtin_neon_vsqrt_f16((int8x8_t)__rev0, 8); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x8_t vtrn1q_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 8, 2, 10, 4, 12, 6, 14); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x8_t vtrn1q_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 8, 2, 10, 4, 12, 6, 14); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x4_t vtrn1_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 4, 2, 6); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x4_t vtrn1_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 4, 2, 6); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x8_t vtrn2q_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 1, 9, 3, 11, 5, 13, 7, 15); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x8_t vtrn2q_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 9, 3, 11, 5, 13, 7, 15); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x4_t vtrn2_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 1, 5, 3, 7); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x4_t vtrn2_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 5, 3, 7); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x8_t vuzp1q_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 2, 4, 6, 8, 10, 12, 14); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x8_t vuzp1q_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2, 4, 6, 8, 10, 12, 14); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x4_t vuzp1_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 2, 4, 6); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x4_t vuzp1_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2, 4, 6); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x8_t vuzp2q_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 1, 3, 5, 7, 9, 11, 13, 15); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x8_t vuzp2q_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3, 5, 7, 9, 11, 13, 15); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x4_t vuzp2_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 1, 3, 5, 7); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x4_t vuzp2_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3, 5, 7); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x8_t vzip1q_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 8, 1, 9, 2, 10, 3, 11); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x8_t vzip1q_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 8, 1, 9, 2, 10, 3, 11); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x4_t vzip1_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 4, 1, 5); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x4_t vzip1_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 4, 1, 5); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x8_t vzip2q_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 4, 12, 5, 13, 6, 14, 7, 15); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x8_t vzip2q_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 4, 12, 5, 13, 6, 14, 7, 15); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16"))) float16x4_t vzip2_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 2, 6, 3, 7); return __ret; } #else __ai __attribute__((target("fullfp16"))) float16x4_t vzip2_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __builtin_shufflevector(__rev0, __rev1, 2, 6, 3, 7); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ #define vsudotq_laneq_s32(__p0_779, __p1_779, __p2_779, __p3_779) __extension__ ({ \ int32x4_t __ret_779; \ int32x4_t __s0_779 = __p0_779; \ int8x16_t __s1_779 = __p1_779; \ uint8x16_t __s2_779 = __p2_779; \ uint8x16_t __reint_779 = __s2_779; \ __ret_779 = vusdotq_s32(__s0_779, (uint8x16_t)(splatq_laneq_s32(*(int32x4_t *) &__reint_779, __p3_779)), __s1_779); \ __ret_779; \ }) #else #define vsudotq_laneq_s32(__p0_780, __p1_780, __p2_780, __p3_780) __extension__ ({ \ int32x4_t __ret_780; \ int32x4_t __s0_780 = __p0_780; \ int8x16_t __s1_780 = __p1_780; \ uint8x16_t __s2_780 = __p2_780; \ int32x4_t __rev0_780; __rev0_780 = __builtin_shufflevector(__s0_780, __s0_780, 3, 2, 1, 0); \ int8x16_t __rev1_780; __rev1_780 = __builtin_shufflevector(__s1_780, __s1_780, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ uint8x16_t __rev2_780; __rev2_780 = __builtin_shufflevector(__s2_780, __s2_780, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ uint8x16_t __reint_780 = __rev2_780; \ __ret_780 = __noswap_vusdotq_s32(__rev0_780, (uint8x16_t)(__noswap_splatq_laneq_s32(*(int32x4_t *) &__reint_780, __p3_780)), __rev1_780); \ __ret_780 = __builtin_shufflevector(__ret_780, __ret_780, 3, 2, 1, 0); \ __ret_780; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vsudot_laneq_s32(__p0_781, __p1_781, __p2_781, __p3_781) __extension__ ({ \ int32x2_t __ret_781; \ int32x2_t __s0_781 = __p0_781; \ int8x8_t __s1_781 = __p1_781; \ uint8x16_t __s2_781 = __p2_781; \ uint8x16_t __reint_781 = __s2_781; \ __ret_781 = vusdot_s32(__s0_781, (uint8x8_t)(splat_laneq_s32(*(int32x4_t *) &__reint_781, __p3_781)), __s1_781); \ __ret_781; \ }) #else #define vsudot_laneq_s32(__p0_782, __p1_782, __p2_782, __p3_782) __extension__ ({ \ int32x2_t __ret_782; \ int32x2_t __s0_782 = __p0_782; \ int8x8_t __s1_782 = __p1_782; \ uint8x16_t __s2_782 = __p2_782; \ int32x2_t __rev0_782; __rev0_782 = __builtin_shufflevector(__s0_782, __s0_782, 1, 0); \ int8x8_t __rev1_782; __rev1_782 = __builtin_shufflevector(__s1_782, __s1_782, 7, 6, 5, 4, 3, 2, 1, 0); \ uint8x16_t __rev2_782; __rev2_782 = __builtin_shufflevector(__s2_782, __s2_782, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ uint8x16_t __reint_782 = __rev2_782; \ __ret_782 = __noswap_vusdot_s32(__rev0_782, (uint8x8_t)(__noswap_splat_laneq_s32(*(int32x4_t *) &__reint_782, __p3_782)), __rev1_782); \ __ret_782 = __builtin_shufflevector(__ret_782, __ret_782, 1, 0); \ __ret_782; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vusdotq_laneq_s32(__p0_783, __p1_783, __p2_783, __p3_783) __extension__ ({ \ int32x4_t __ret_783; \ int32x4_t __s0_783 = __p0_783; \ uint8x16_t __s1_783 = __p1_783; \ int8x16_t __s2_783 = __p2_783; \ int8x16_t __reint_783 = __s2_783; \ __ret_783 = vusdotq_s32(__s0_783, __s1_783, (int8x16_t)(splatq_laneq_s32(*(int32x4_t *) &__reint_783, __p3_783))); \ __ret_783; \ }) #else #define vusdotq_laneq_s32(__p0_784, __p1_784, __p2_784, __p3_784) __extension__ ({ \ int32x4_t __ret_784; \ int32x4_t __s0_784 = __p0_784; \ uint8x16_t __s1_784 = __p1_784; \ int8x16_t __s2_784 = __p2_784; \ int32x4_t __rev0_784; __rev0_784 = __builtin_shufflevector(__s0_784, __s0_784, 3, 2, 1, 0); \ uint8x16_t __rev1_784; __rev1_784 = __builtin_shufflevector(__s1_784, __s1_784, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ int8x16_t __rev2_784; __rev2_784 = __builtin_shufflevector(__s2_784, __s2_784, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ int8x16_t __reint_784 = __rev2_784; \ __ret_784 = __noswap_vusdotq_s32(__rev0_784, __rev1_784, (int8x16_t)(__noswap_splatq_laneq_s32(*(int32x4_t *) &__reint_784, __p3_784))); \ __ret_784 = __builtin_shufflevector(__ret_784, __ret_784, 3, 2, 1, 0); \ __ret_784; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vusdot_laneq_s32(__p0_785, __p1_785, __p2_785, __p3_785) __extension__ ({ \ int32x2_t __ret_785; \ int32x2_t __s0_785 = __p0_785; \ uint8x8_t __s1_785 = __p1_785; \ int8x16_t __s2_785 = __p2_785; \ int8x16_t __reint_785 = __s2_785; \ __ret_785 = vusdot_s32(__s0_785, __s1_785, (int8x8_t)(splat_laneq_s32(*(int32x4_t *) &__reint_785, __p3_785))); \ __ret_785; \ }) #else #define vusdot_laneq_s32(__p0_786, __p1_786, __p2_786, __p3_786) __extension__ ({ \ int32x2_t __ret_786; \ int32x2_t __s0_786 = __p0_786; \ uint8x8_t __s1_786 = __p1_786; \ int8x16_t __s2_786 = __p2_786; \ int32x2_t __rev0_786; __rev0_786 = __builtin_shufflevector(__s0_786, __s0_786, 1, 0); \ uint8x8_t __rev1_786; __rev1_786 = __builtin_shufflevector(__s1_786, __s1_786, 7, 6, 5, 4, 3, 2, 1, 0); \ int8x16_t __rev2_786; __rev2_786 = __builtin_shufflevector(__s2_786, __s2_786, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ int8x16_t __reint_786 = __rev2_786; \ __ret_786 = __noswap_vusdot_s32(__rev0_786, __rev1_786, (int8x8_t)(__noswap_splat_laneq_s32(*(int32x4_t *) &__reint_786, __p3_786))); \ __ret_786 = __builtin_shufflevector(__ret_786, __ret_786, 1, 0); \ __ret_786; \ }) #endif #define vldap1_lane_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x1_t __ret; \ poly64x1_t __s1 = __p1; \ __ret = (poly64x1_t) __builtin_neon_vldap1_lane_p64(__p0, (int8x8_t)__s1, __p2, 6); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vldap1q_lane_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x2_t __ret; \ poly64x2_t __s1 = __p1; \ __ret = (poly64x2_t) __builtin_neon_vldap1q_lane_p64(__p0, (int8x16_t)__s1, __p2, 38); \ __ret; \ }) #else #define vldap1q_lane_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x2_t __ret; \ poly64x2_t __s1 = __p1; \ poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ __ret = (poly64x2_t) __builtin_neon_vldap1q_lane_p64(__p0, (int8x16_t)__rev1, __p2, 38); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vldap1q_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x2_t __ret; \ uint64x2_t __s1 = __p1; \ __ret = (uint64x2_t) __builtin_neon_vldap1q_lane_u64(__p0, (int8x16_t)__s1, __p2, 51); \ __ret; \ }) #else #define vldap1q_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x2_t __ret; \ uint64x2_t __s1 = __p1; \ uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ __ret = (uint64x2_t) __builtin_neon_vldap1q_lane_u64(__p0, (int8x16_t)__rev1, __p2, 51); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vldap1q_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x2_t __ret; \ float64x2_t __s1 = __p1; \ __ret = (float64x2_t) __builtin_neon_vldap1q_lane_f64(__p0, (int8x16_t)__s1, __p2, 42); \ __ret; \ }) #else #define vldap1q_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x2_t __ret; \ float64x2_t __s1 = __p1; \ float64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ __ret = (float64x2_t) __builtin_neon_vldap1q_lane_f64(__p0, (int8x16_t)__rev1, __p2, 42); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vldap1q_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x2_t __ret; \ int64x2_t __s1 = __p1; \ __ret = (int64x2_t) __builtin_neon_vldap1q_lane_s64(__p0, (int8x16_t)__s1, __p2, 35); \ __ret; \ }) #else #define vldap1q_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x2_t __ret; \ int64x2_t __s1 = __p1; \ int64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ __ret = (int64x2_t) __builtin_neon_vldap1q_lane_s64(__p0, (int8x16_t)__rev1, __p2, 35); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #define vldap1_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x1_t __ret; \ uint64x1_t __s1 = __p1; \ __ret = (uint64x1_t) __builtin_neon_vldap1_lane_u64(__p0, (int8x8_t)__s1, __p2, 19); \ __ret; \ }) #define vldap1_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x1_t __ret; \ float64x1_t __s1 = __p1; \ __ret = (float64x1_t) __builtin_neon_vldap1_lane_f64(__p0, (int8x8_t)__s1, __p2, 10); \ __ret; \ }) #define vldap1_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x1_t __ret; \ int64x1_t __s1 = __p1; \ __ret = (int64x1_t) __builtin_neon_vldap1_lane_s64(__p0, (int8x8_t)__s1, __p2, 3); \ __ret; \ }) #define vstl1_lane_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x1_t __s1 = __p1; \ __builtin_neon_vstl1_lane_p64(__p0, (int8x8_t)__s1, __p2, 6); \ }) #ifdef __LITTLE_ENDIAN__ #define vstl1q_lane_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x2_t __s1 = __p1; \ __builtin_neon_vstl1q_lane_p64(__p0, (int8x16_t)__s1, __p2, 38); \ }) #else #define vstl1q_lane_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x2_t __s1 = __p1; \ poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ __builtin_neon_vstl1q_lane_p64(__p0, (int8x16_t)__rev1, __p2, 38); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vstl1q_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x2_t __s1 = __p1; \ __builtin_neon_vstl1q_lane_u64(__p0, (int8x16_t)__s1, __p2, 51); \ }) #else #define vstl1q_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x2_t __s1 = __p1; \ uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ __builtin_neon_vstl1q_lane_u64(__p0, (int8x16_t)__rev1, __p2, 51); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vstl1q_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x2_t __s1 = __p1; \ __builtin_neon_vstl1q_lane_f64(__p0, (int8x16_t)__s1, __p2, 42); \ }) #else #define vstl1q_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x2_t __s1 = __p1; \ float64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ __builtin_neon_vstl1q_lane_f64(__p0, (int8x16_t)__rev1, __p2, 42); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vstl1q_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x2_t __s1 = __p1; \ __builtin_neon_vstl1q_lane_s64(__p0, (int8x16_t)__s1, __p2, 35); \ }) #else #define vstl1q_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x2_t __s1 = __p1; \ int64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ __builtin_neon_vstl1q_lane_s64(__p0, (int8x16_t)__rev1, __p2, 35); \ }) #endif #define vstl1_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x1_t __s1 = __p1; \ __builtin_neon_vstl1_lane_u64(__p0, (int8x8_t)__s1, __p2, 19); \ }) #define vstl1_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x1_t __s1 = __p1; \ __builtin_neon_vstl1_lane_f64(__p0, (int8x8_t)__s1, __p2, 10); \ }) #define vstl1_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x1_t __s1 = __p1; \ __builtin_neon_vstl1_lane_s64(__p0, (int8x8_t)__s1, __p2, 3); \ }) #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("sha3"))) uint8x16_t vbcaxq_u8(uint8x16_t __p0, uint8x16_t __p1, uint8x16_t __p2) { uint8x16_t __ret; __ret = (uint8x16_t) __builtin_neon_vbcaxq_u8((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 48); return __ret; } #else __ai __attribute__((target("sha3"))) uint8x16_t vbcaxq_u8(uint8x16_t __p0, uint8x16_t __p1, uint8x16_t __p2) { uint8x16_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x16_t) __builtin_neon_vbcaxq_u8((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 48); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("sha3"))) uint32x4_t vbcaxq_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vbcaxq_u32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 50); return __ret; } #else __ai __attribute__((target("sha3"))) uint32x4_t vbcaxq_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = (uint32x4_t) __builtin_neon_vbcaxq_u32((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 50); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("sha3"))) uint64x2_t vbcaxq_u64(uint64x2_t __p0, uint64x2_t __p1, uint64x2_t __p2) { uint64x2_t __ret; __ret = (uint64x2_t) __builtin_neon_vbcaxq_u64((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 51); return __ret; } #else __ai __attribute__((target("sha3"))) uint64x2_t vbcaxq_u64(uint64x2_t __p0, uint64x2_t __p1, uint64x2_t __p2) { uint64x2_t __ret; uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); uint64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); __ret = (uint64x2_t) __builtin_neon_vbcaxq_u64((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 51); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("sha3"))) uint16x8_t vbcaxq_u16(uint16x8_t __p0, uint16x8_t __p1, uint16x8_t __p2) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vbcaxq_u16((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 49); return __ret; } #else __ai __attribute__((target("sha3"))) uint16x8_t vbcaxq_u16(uint16x8_t __p0, uint16x8_t __p1, uint16x8_t __p2) { uint16x8_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint16x8_t) __builtin_neon_vbcaxq_u16((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 49); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("sha3"))) int8x16_t vbcaxq_s8(int8x16_t __p0, int8x16_t __p1, int8x16_t __p2) { int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vbcaxq_s8((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 32); return __ret; } #else __ai __attribute__((target("sha3"))) int8x16_t vbcaxq_s8(int8x16_t __p0, int8x16_t __p1, int8x16_t __p2) { int8x16_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x16_t) __builtin_neon_vbcaxq_s8((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 32); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("sha3"))) int32x4_t vbcaxq_s32(int32x4_t __p0, int32x4_t __p1, int32x4_t __p2) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vbcaxq_s32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 34); return __ret; } #else __ai __attribute__((target("sha3"))) int32x4_t vbcaxq_s32(int32x4_t __p0, int32x4_t __p1, int32x4_t __p2) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); int32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = (int32x4_t) __builtin_neon_vbcaxq_s32((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 34); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("sha3"))) int64x2_t vbcaxq_s64(int64x2_t __p0, int64x2_t __p1, int64x2_t __p2) { int64x2_t __ret; __ret = (int64x2_t) __builtin_neon_vbcaxq_s64((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 35); return __ret; } #else __ai __attribute__((target("sha3"))) int64x2_t vbcaxq_s64(int64x2_t __p0, int64x2_t __p1, int64x2_t __p2) { int64x2_t __ret; int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); int64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); __ret = (int64x2_t) __builtin_neon_vbcaxq_s64((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 35); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("sha3"))) int16x8_t vbcaxq_s16(int16x8_t __p0, int16x8_t __p1, int16x8_t __p2) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vbcaxq_s16((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 33); return __ret; } #else __ai __attribute__((target("sha3"))) int16x8_t vbcaxq_s16(int16x8_t __p0, int16x8_t __p1, int16x8_t __p2) { int16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int16x8_t) __builtin_neon_vbcaxq_s16((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 33); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("sha3"))) uint8x16_t veor3q_u8(uint8x16_t __p0, uint8x16_t __p1, uint8x16_t __p2) { uint8x16_t __ret; __ret = (uint8x16_t) __builtin_neon_veor3q_u8((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 48); return __ret; } #else __ai __attribute__((target("sha3"))) uint8x16_t veor3q_u8(uint8x16_t __p0, uint8x16_t __p1, uint8x16_t __p2) { uint8x16_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint8x16_t) __builtin_neon_veor3q_u8((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 48); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("sha3"))) uint32x4_t veor3q_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_veor3q_u32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 50); return __ret; } #else __ai __attribute__((target("sha3"))) uint32x4_t veor3q_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = (uint32x4_t) __builtin_neon_veor3q_u32((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 50); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("sha3"))) uint64x2_t veor3q_u64(uint64x2_t __p0, uint64x2_t __p1, uint64x2_t __p2) { uint64x2_t __ret; __ret = (uint64x2_t) __builtin_neon_veor3q_u64((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 51); return __ret; } #else __ai __attribute__((target("sha3"))) uint64x2_t veor3q_u64(uint64x2_t __p0, uint64x2_t __p1, uint64x2_t __p2) { uint64x2_t __ret; uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); uint64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); __ret = (uint64x2_t) __builtin_neon_veor3q_u64((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 51); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("sha3"))) uint16x8_t veor3q_u16(uint16x8_t __p0, uint16x8_t __p1, uint16x8_t __p2) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_veor3q_u16((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 49); return __ret; } #else __ai __attribute__((target("sha3"))) uint16x8_t veor3q_u16(uint16x8_t __p0, uint16x8_t __p1, uint16x8_t __p2) { uint16x8_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint16x8_t) __builtin_neon_veor3q_u16((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 49); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("sha3"))) int8x16_t veor3q_s8(int8x16_t __p0, int8x16_t __p1, int8x16_t __p2) { int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_veor3q_s8((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 32); return __ret; } #else __ai __attribute__((target("sha3"))) int8x16_t veor3q_s8(int8x16_t __p0, int8x16_t __p1, int8x16_t __p2) { int8x16_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int8x16_t) __builtin_neon_veor3q_s8((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 32); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("sha3"))) int32x4_t veor3q_s32(int32x4_t __p0, int32x4_t __p1, int32x4_t __p2) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_veor3q_s32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 34); return __ret; } #else __ai __attribute__((target("sha3"))) int32x4_t veor3q_s32(int32x4_t __p0, int32x4_t __p1, int32x4_t __p2) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); int32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = (int32x4_t) __builtin_neon_veor3q_s32((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 34); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("sha3"))) int64x2_t veor3q_s64(int64x2_t __p0, int64x2_t __p1, int64x2_t __p2) { int64x2_t __ret; __ret = (int64x2_t) __builtin_neon_veor3q_s64((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 35); return __ret; } #else __ai __attribute__((target("sha3"))) int64x2_t veor3q_s64(int64x2_t __p0, int64x2_t __p1, int64x2_t __p2) { int64x2_t __ret; int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); int64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); __ret = (int64x2_t) __builtin_neon_veor3q_s64((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 35); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("sha3"))) int16x8_t veor3q_s16(int16x8_t __p0, int16x8_t __p1, int16x8_t __p2) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_veor3q_s16((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 33); return __ret; } #else __ai __attribute__((target("sha3"))) int16x8_t veor3q_s16(int16x8_t __p0, int16x8_t __p1, int16x8_t __p2) { int16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int16x8_t) __builtin_neon_veor3q_s16((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 33); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("sha3"))) uint64x2_t vrax1q_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; __ret = (uint64x2_t) __builtin_neon_vrax1q_u64((int8x16_t)__p0, (int8x16_t)__p1, 51); return __ret; } #else __ai __attribute__((target("sha3"))) uint64x2_t vrax1q_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint64x2_t) __builtin_neon_vrax1q_u64((int8x16_t)__rev0, (int8x16_t)__rev1, 51); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("sha3"))) uint64x2_t vsha512hq_u64(uint64x2_t __p0, uint64x2_t __p1, uint64x2_t __p2) { uint64x2_t __ret; __ret = (uint64x2_t) __builtin_neon_vsha512hq_u64((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 51); return __ret; } #else __ai __attribute__((target("sha3"))) uint64x2_t vsha512hq_u64(uint64x2_t __p0, uint64x2_t __p1, uint64x2_t __p2) { uint64x2_t __ret; uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); uint64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); __ret = (uint64x2_t) __builtin_neon_vsha512hq_u64((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 51); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("sha3"))) uint64x2_t vsha512h2q_u64(uint64x2_t __p0, uint64x2_t __p1, uint64x2_t __p2) { uint64x2_t __ret; __ret = (uint64x2_t) __builtin_neon_vsha512h2q_u64((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 51); return __ret; } #else __ai __attribute__((target("sha3"))) uint64x2_t vsha512h2q_u64(uint64x2_t __p0, uint64x2_t __p1, uint64x2_t __p2) { uint64x2_t __ret; uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); uint64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); __ret = (uint64x2_t) __builtin_neon_vsha512h2q_u64((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 51); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("sha3"))) uint64x2_t vsha512su0q_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; __ret = (uint64x2_t) __builtin_neon_vsha512su0q_u64((int8x16_t)__p0, (int8x16_t)__p1, 51); return __ret; } #else __ai __attribute__((target("sha3"))) uint64x2_t vsha512su0q_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint64x2_t) __builtin_neon_vsha512su0q_u64((int8x16_t)__rev0, (int8x16_t)__rev1, 51); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("sha3"))) uint64x2_t vsha512su1q_u64(uint64x2_t __p0, uint64x2_t __p1, uint64x2_t __p2) { uint64x2_t __ret; __ret = (uint64x2_t) __builtin_neon_vsha512su1q_u64((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 51); return __ret; } #else __ai __attribute__((target("sha3"))) uint64x2_t vsha512su1q_u64(uint64x2_t __p0, uint64x2_t __p1, uint64x2_t __p2) { uint64x2_t __ret; uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); uint64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); __ret = (uint64x2_t) __builtin_neon_vsha512su1q_u64((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 51); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ #define vxarq_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x2_t __ret; \ uint64x2_t __s0 = __p0; \ uint64x2_t __s1 = __p1; \ __ret = (uint64x2_t) __builtin_neon_vxarq_u64((int8x16_t)__s0, (int8x16_t)__s1, __p2, 51); \ __ret; \ }) #else #define vxarq_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x2_t __ret; \ uint64x2_t __s0 = __p0; \ uint64x2_t __s1 = __p1; \ uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ __ret = (uint64x2_t) __builtin_neon_vxarq_u64((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 51); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("sm4"))) uint32x4_t vsm3partw1q_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vsm3partw1q_u32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 50); return __ret; } #else __ai __attribute__((target("sm4"))) uint32x4_t vsm3partw1q_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = (uint32x4_t) __builtin_neon_vsm3partw1q_u32((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 50); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("sm4"))) uint32x4_t vsm3partw2q_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vsm3partw2q_u32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 50); return __ret; } #else __ai __attribute__((target("sm4"))) uint32x4_t vsm3partw2q_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = (uint32x4_t) __builtin_neon_vsm3partw2q_u32((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 50); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("sm4"))) uint32x4_t vsm3ss1q_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vsm3ss1q_u32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 50); return __ret; } #else __ai __attribute__((target("sm4"))) uint32x4_t vsm3ss1q_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = (uint32x4_t) __builtin_neon_vsm3ss1q_u32((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 50); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ #define vsm3tt1aq_u32(__p0, __p1, __p2, __p3) __extension__ ({ \ uint32x4_t __ret; \ uint32x4_t __s0 = __p0; \ uint32x4_t __s1 = __p1; \ uint32x4_t __s2 = __p2; \ __ret = (uint32x4_t) __builtin_neon_vsm3tt1aq_u32((int8x16_t)__s0, (int8x16_t)__s1, (int8x16_t)__s2, __p3, 50); \ __ret; \ }) #else #define vsm3tt1aq_u32(__p0, __p1, __p2, __p3) __extension__ ({ \ uint32x4_t __ret; \ uint32x4_t __s0 = __p0; \ uint32x4_t __s1 = __p1; \ uint32x4_t __s2 = __p2; \ uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ __ret = (uint32x4_t) __builtin_neon_vsm3tt1aq_u32((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, __p3, 50); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vsm3tt1bq_u32(__p0, __p1, __p2, __p3) __extension__ ({ \ uint32x4_t __ret; \ uint32x4_t __s0 = __p0; \ uint32x4_t __s1 = __p1; \ uint32x4_t __s2 = __p2; \ __ret = (uint32x4_t) __builtin_neon_vsm3tt1bq_u32((int8x16_t)__s0, (int8x16_t)__s1, (int8x16_t)__s2, __p3, 50); \ __ret; \ }) #else #define vsm3tt1bq_u32(__p0, __p1, __p2, __p3) __extension__ ({ \ uint32x4_t __ret; \ uint32x4_t __s0 = __p0; \ uint32x4_t __s1 = __p1; \ uint32x4_t __s2 = __p2; \ uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ __ret = (uint32x4_t) __builtin_neon_vsm3tt1bq_u32((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, __p3, 50); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vsm3tt2aq_u32(__p0, __p1, __p2, __p3) __extension__ ({ \ uint32x4_t __ret; \ uint32x4_t __s0 = __p0; \ uint32x4_t __s1 = __p1; \ uint32x4_t __s2 = __p2; \ __ret = (uint32x4_t) __builtin_neon_vsm3tt2aq_u32((int8x16_t)__s0, (int8x16_t)__s1, (int8x16_t)__s2, __p3, 50); \ __ret; \ }) #else #define vsm3tt2aq_u32(__p0, __p1, __p2, __p3) __extension__ ({ \ uint32x4_t __ret; \ uint32x4_t __s0 = __p0; \ uint32x4_t __s1 = __p1; \ uint32x4_t __s2 = __p2; \ uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ __ret = (uint32x4_t) __builtin_neon_vsm3tt2aq_u32((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, __p3, 50); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vsm3tt2bq_u32(__p0, __p1, __p2, __p3) __extension__ ({ \ uint32x4_t __ret; \ uint32x4_t __s0 = __p0; \ uint32x4_t __s1 = __p1; \ uint32x4_t __s2 = __p2; \ __ret = (uint32x4_t) __builtin_neon_vsm3tt2bq_u32((int8x16_t)__s0, (int8x16_t)__s1, (int8x16_t)__s2, __p3, 50); \ __ret; \ }) #else #define vsm3tt2bq_u32(__p0, __p1, __p2, __p3) __extension__ ({ \ uint32x4_t __ret; \ uint32x4_t __s0 = __p0; \ uint32x4_t __s1 = __p1; \ uint32x4_t __s2 = __p2; \ uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ __ret = (uint32x4_t) __builtin_neon_vsm3tt2bq_u32((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, __p3, 50); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("sm4"))) uint32x4_t vsm4eq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vsm4eq_u32((int8x16_t)__p0, (int8x16_t)__p1, 50); return __ret; } #else __ai __attribute__((target("sm4"))) uint32x4_t vsm4eq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint32x4_t) __builtin_neon_vsm4eq_u32((int8x16_t)__rev0, (int8x16_t)__rev1, 50); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("sm4"))) uint32x4_t vsm4ekeyq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vsm4ekeyq_u32((int8x16_t)__p0, (int8x16_t)__p1, 50); return __ret; } #else __ai __attribute__((target("sm4"))) uint32x4_t vsm4ekeyq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint32x4_t) __builtin_neon_vsm4ekeyq_u32((int8x16_t)__rev0, (int8x16_t)__rev1, 50); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif __ai __attribute__((target("v8.1a"))) int32_t vqrdmlahs_s32(int32_t __p0, int32_t __p1, int32_t __p2) { int32_t __ret; __ret = (int32_t) __builtin_neon_vqrdmlahs_s32(__p0, __p1, __p2); return __ret; } __ai __attribute__((target("v8.1a"))) int16_t vqrdmlahh_s16(int16_t __p0, int16_t __p1, int16_t __p2) { int16_t __ret; __ret = (int16_t) __builtin_neon_vqrdmlahh_s16(__p0, __p1, __p2); return __ret; } #ifdef __LITTLE_ENDIAN__ #define vqrdmlahs_lane_s32(__p0_787, __p1_787, __p2_787, __p3_787) __extension__ ({ \ int32_t __ret_787; \ int32_t __s0_787 = __p0_787; \ int32_t __s1_787 = __p1_787; \ int32x2_t __s2_787 = __p2_787; \ __ret_787 = vqrdmlahs_s32(__s0_787, __s1_787, vget_lane_s32(__s2_787, __p3_787)); \ __ret_787; \ }) #else #define vqrdmlahs_lane_s32(__p0_788, __p1_788, __p2_788, __p3_788) __extension__ ({ \ int32_t __ret_788; \ int32_t __s0_788 = __p0_788; \ int32_t __s1_788 = __p1_788; \ int32x2_t __s2_788 = __p2_788; \ int32x2_t __rev2_788; __rev2_788 = __builtin_shufflevector(__s2_788, __s2_788, 1, 0); \ __ret_788 = vqrdmlahs_s32(__s0_788, __s1_788, __noswap_vget_lane_s32(__rev2_788, __p3_788)); \ __ret_788; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqrdmlahh_lane_s16(__p0_789, __p1_789, __p2_789, __p3_789) __extension__ ({ \ int16_t __ret_789; \ int16_t __s0_789 = __p0_789; \ int16_t __s1_789 = __p1_789; \ int16x4_t __s2_789 = __p2_789; \ __ret_789 = vqrdmlahh_s16(__s0_789, __s1_789, vget_lane_s16(__s2_789, __p3_789)); \ __ret_789; \ }) #else #define vqrdmlahh_lane_s16(__p0_790, __p1_790, __p2_790, __p3_790) __extension__ ({ \ int16_t __ret_790; \ int16_t __s0_790 = __p0_790; \ int16_t __s1_790 = __p1_790; \ int16x4_t __s2_790 = __p2_790; \ int16x4_t __rev2_790; __rev2_790 = __builtin_shufflevector(__s2_790, __s2_790, 3, 2, 1, 0); \ __ret_790 = vqrdmlahh_s16(__s0_790, __s1_790, __noswap_vget_lane_s16(__rev2_790, __p3_790)); \ __ret_790; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqrdmlahs_laneq_s32(__p0_791, __p1_791, __p2_791, __p3_791) __extension__ ({ \ int32_t __ret_791; \ int32_t __s0_791 = __p0_791; \ int32_t __s1_791 = __p1_791; \ int32x4_t __s2_791 = __p2_791; \ __ret_791 = vqrdmlahs_s32(__s0_791, __s1_791, vgetq_lane_s32(__s2_791, __p3_791)); \ __ret_791; \ }) #else #define vqrdmlahs_laneq_s32(__p0_792, __p1_792, __p2_792, __p3_792) __extension__ ({ \ int32_t __ret_792; \ int32_t __s0_792 = __p0_792; \ int32_t __s1_792 = __p1_792; \ int32x4_t __s2_792 = __p2_792; \ int32x4_t __rev2_792; __rev2_792 = __builtin_shufflevector(__s2_792, __s2_792, 3, 2, 1, 0); \ __ret_792 = vqrdmlahs_s32(__s0_792, __s1_792, __noswap_vgetq_lane_s32(__rev2_792, __p3_792)); \ __ret_792; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqrdmlahh_laneq_s16(__p0_793, __p1_793, __p2_793, __p3_793) __extension__ ({ \ int16_t __ret_793; \ int16_t __s0_793 = __p0_793; \ int16_t __s1_793 = __p1_793; \ int16x8_t __s2_793 = __p2_793; \ __ret_793 = vqrdmlahh_s16(__s0_793, __s1_793, vgetq_lane_s16(__s2_793, __p3_793)); \ __ret_793; \ }) #else #define vqrdmlahh_laneq_s16(__p0_794, __p1_794, __p2_794, __p3_794) __extension__ ({ \ int16_t __ret_794; \ int16_t __s0_794 = __p0_794; \ int16_t __s1_794 = __p1_794; \ int16x8_t __s2_794 = __p2_794; \ int16x8_t __rev2_794; __rev2_794 = __builtin_shufflevector(__s2_794, __s2_794, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_794 = vqrdmlahh_s16(__s0_794, __s1_794, __noswap_vgetq_lane_s16(__rev2_794, __p3_794)); \ __ret_794; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqrdmlahq_laneq_s32(__p0_795, __p1_795, __p2_795, __p3_795) __extension__ ({ \ int32x4_t __ret_795; \ int32x4_t __s0_795 = __p0_795; \ int32x4_t __s1_795 = __p1_795; \ int32x4_t __s2_795 = __p2_795; \ __ret_795 = vqrdmlahq_s32(__s0_795, __s1_795, splatq_laneq_s32(__s2_795, __p3_795)); \ __ret_795; \ }) #else #define vqrdmlahq_laneq_s32(__p0_796, __p1_796, __p2_796, __p3_796) __extension__ ({ \ int32x4_t __ret_796; \ int32x4_t __s0_796 = __p0_796; \ int32x4_t __s1_796 = __p1_796; \ int32x4_t __s2_796 = __p2_796; \ int32x4_t __rev0_796; __rev0_796 = __builtin_shufflevector(__s0_796, __s0_796, 3, 2, 1, 0); \ int32x4_t __rev1_796; __rev1_796 = __builtin_shufflevector(__s1_796, __s1_796, 3, 2, 1, 0); \ int32x4_t __rev2_796; __rev2_796 = __builtin_shufflevector(__s2_796, __s2_796, 3, 2, 1, 0); \ __ret_796 = __noswap_vqrdmlahq_s32(__rev0_796, __rev1_796, __noswap_splatq_laneq_s32(__rev2_796, __p3_796)); \ __ret_796 = __builtin_shufflevector(__ret_796, __ret_796, 3, 2, 1, 0); \ __ret_796; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqrdmlahq_laneq_s16(__p0_797, __p1_797, __p2_797, __p3_797) __extension__ ({ \ int16x8_t __ret_797; \ int16x8_t __s0_797 = __p0_797; \ int16x8_t __s1_797 = __p1_797; \ int16x8_t __s2_797 = __p2_797; \ __ret_797 = vqrdmlahq_s16(__s0_797, __s1_797, splatq_laneq_s16(__s2_797, __p3_797)); \ __ret_797; \ }) #else #define vqrdmlahq_laneq_s16(__p0_798, __p1_798, __p2_798, __p3_798) __extension__ ({ \ int16x8_t __ret_798; \ int16x8_t __s0_798 = __p0_798; \ int16x8_t __s1_798 = __p1_798; \ int16x8_t __s2_798 = __p2_798; \ int16x8_t __rev0_798; __rev0_798 = __builtin_shufflevector(__s0_798, __s0_798, 7, 6, 5, 4, 3, 2, 1, 0); \ int16x8_t __rev1_798; __rev1_798 = __builtin_shufflevector(__s1_798, __s1_798, 7, 6, 5, 4, 3, 2, 1, 0); \ int16x8_t __rev2_798; __rev2_798 = __builtin_shufflevector(__s2_798, __s2_798, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_798 = __noswap_vqrdmlahq_s16(__rev0_798, __rev1_798, __noswap_splatq_laneq_s16(__rev2_798, __p3_798)); \ __ret_798 = __builtin_shufflevector(__ret_798, __ret_798, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_798; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqrdmlah_laneq_s32(__p0_799, __p1_799, __p2_799, __p3_799) __extension__ ({ \ int32x2_t __ret_799; \ int32x2_t __s0_799 = __p0_799; \ int32x2_t __s1_799 = __p1_799; \ int32x4_t __s2_799 = __p2_799; \ __ret_799 = vqrdmlah_s32(__s0_799, __s1_799, splat_laneq_s32(__s2_799, __p3_799)); \ __ret_799; \ }) #else #define vqrdmlah_laneq_s32(__p0_800, __p1_800, __p2_800, __p3_800) __extension__ ({ \ int32x2_t __ret_800; \ int32x2_t __s0_800 = __p0_800; \ int32x2_t __s1_800 = __p1_800; \ int32x4_t __s2_800 = __p2_800; \ int32x2_t __rev0_800; __rev0_800 = __builtin_shufflevector(__s0_800, __s0_800, 1, 0); \ int32x2_t __rev1_800; __rev1_800 = __builtin_shufflevector(__s1_800, __s1_800, 1, 0); \ int32x4_t __rev2_800; __rev2_800 = __builtin_shufflevector(__s2_800, __s2_800, 3, 2, 1, 0); \ __ret_800 = __noswap_vqrdmlah_s32(__rev0_800, __rev1_800, __noswap_splat_laneq_s32(__rev2_800, __p3_800)); \ __ret_800 = __builtin_shufflevector(__ret_800, __ret_800, 1, 0); \ __ret_800; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqrdmlah_laneq_s16(__p0_801, __p1_801, __p2_801, __p3_801) __extension__ ({ \ int16x4_t __ret_801; \ int16x4_t __s0_801 = __p0_801; \ int16x4_t __s1_801 = __p1_801; \ int16x8_t __s2_801 = __p2_801; \ __ret_801 = vqrdmlah_s16(__s0_801, __s1_801, splat_laneq_s16(__s2_801, __p3_801)); \ __ret_801; \ }) #else #define vqrdmlah_laneq_s16(__p0_802, __p1_802, __p2_802, __p3_802) __extension__ ({ \ int16x4_t __ret_802; \ int16x4_t __s0_802 = __p0_802; \ int16x4_t __s1_802 = __p1_802; \ int16x8_t __s2_802 = __p2_802; \ int16x4_t __rev0_802; __rev0_802 = __builtin_shufflevector(__s0_802, __s0_802, 3, 2, 1, 0); \ int16x4_t __rev1_802; __rev1_802 = __builtin_shufflevector(__s1_802, __s1_802, 3, 2, 1, 0); \ int16x8_t __rev2_802; __rev2_802 = __builtin_shufflevector(__s2_802, __s2_802, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_802 = __noswap_vqrdmlah_s16(__rev0_802, __rev1_802, __noswap_splat_laneq_s16(__rev2_802, __p3_802)); \ __ret_802 = __builtin_shufflevector(__ret_802, __ret_802, 3, 2, 1, 0); \ __ret_802; \ }) #endif __ai __attribute__((target("v8.1a"))) int32_t vqrdmlshs_s32(int32_t __p0, int32_t __p1, int32_t __p2) { int32_t __ret; __ret = (int32_t) __builtin_neon_vqrdmlshs_s32(__p0, __p1, __p2); return __ret; } __ai __attribute__((target("v8.1a"))) int16_t vqrdmlshh_s16(int16_t __p0, int16_t __p1, int16_t __p2) { int16_t __ret; __ret = (int16_t) __builtin_neon_vqrdmlshh_s16(__p0, __p1, __p2); return __ret; } #ifdef __LITTLE_ENDIAN__ #define vqrdmlshs_lane_s32(__p0_803, __p1_803, __p2_803, __p3_803) __extension__ ({ \ int32_t __ret_803; \ int32_t __s0_803 = __p0_803; \ int32_t __s1_803 = __p1_803; \ int32x2_t __s2_803 = __p2_803; \ __ret_803 = vqrdmlshs_s32(__s0_803, __s1_803, vget_lane_s32(__s2_803, __p3_803)); \ __ret_803; \ }) #else #define vqrdmlshs_lane_s32(__p0_804, __p1_804, __p2_804, __p3_804) __extension__ ({ \ int32_t __ret_804; \ int32_t __s0_804 = __p0_804; \ int32_t __s1_804 = __p1_804; \ int32x2_t __s2_804 = __p2_804; \ int32x2_t __rev2_804; __rev2_804 = __builtin_shufflevector(__s2_804, __s2_804, 1, 0); \ __ret_804 = vqrdmlshs_s32(__s0_804, __s1_804, __noswap_vget_lane_s32(__rev2_804, __p3_804)); \ __ret_804; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqrdmlshh_lane_s16(__p0_805, __p1_805, __p2_805, __p3_805) __extension__ ({ \ int16_t __ret_805; \ int16_t __s0_805 = __p0_805; \ int16_t __s1_805 = __p1_805; \ int16x4_t __s2_805 = __p2_805; \ __ret_805 = vqrdmlshh_s16(__s0_805, __s1_805, vget_lane_s16(__s2_805, __p3_805)); \ __ret_805; \ }) #else #define vqrdmlshh_lane_s16(__p0_806, __p1_806, __p2_806, __p3_806) __extension__ ({ \ int16_t __ret_806; \ int16_t __s0_806 = __p0_806; \ int16_t __s1_806 = __p1_806; \ int16x4_t __s2_806 = __p2_806; \ int16x4_t __rev2_806; __rev2_806 = __builtin_shufflevector(__s2_806, __s2_806, 3, 2, 1, 0); \ __ret_806 = vqrdmlshh_s16(__s0_806, __s1_806, __noswap_vget_lane_s16(__rev2_806, __p3_806)); \ __ret_806; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqrdmlshs_laneq_s32(__p0_807, __p1_807, __p2_807, __p3_807) __extension__ ({ \ int32_t __ret_807; \ int32_t __s0_807 = __p0_807; \ int32_t __s1_807 = __p1_807; \ int32x4_t __s2_807 = __p2_807; \ __ret_807 = vqrdmlshs_s32(__s0_807, __s1_807, vgetq_lane_s32(__s2_807, __p3_807)); \ __ret_807; \ }) #else #define vqrdmlshs_laneq_s32(__p0_808, __p1_808, __p2_808, __p3_808) __extension__ ({ \ int32_t __ret_808; \ int32_t __s0_808 = __p0_808; \ int32_t __s1_808 = __p1_808; \ int32x4_t __s2_808 = __p2_808; \ int32x4_t __rev2_808; __rev2_808 = __builtin_shufflevector(__s2_808, __s2_808, 3, 2, 1, 0); \ __ret_808 = vqrdmlshs_s32(__s0_808, __s1_808, __noswap_vgetq_lane_s32(__rev2_808, __p3_808)); \ __ret_808; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqrdmlshh_laneq_s16(__p0_809, __p1_809, __p2_809, __p3_809) __extension__ ({ \ int16_t __ret_809; \ int16_t __s0_809 = __p0_809; \ int16_t __s1_809 = __p1_809; \ int16x8_t __s2_809 = __p2_809; \ __ret_809 = vqrdmlshh_s16(__s0_809, __s1_809, vgetq_lane_s16(__s2_809, __p3_809)); \ __ret_809; \ }) #else #define vqrdmlshh_laneq_s16(__p0_810, __p1_810, __p2_810, __p3_810) __extension__ ({ \ int16_t __ret_810; \ int16_t __s0_810 = __p0_810; \ int16_t __s1_810 = __p1_810; \ int16x8_t __s2_810 = __p2_810; \ int16x8_t __rev2_810; __rev2_810 = __builtin_shufflevector(__s2_810, __s2_810, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_810 = vqrdmlshh_s16(__s0_810, __s1_810, __noswap_vgetq_lane_s16(__rev2_810, __p3_810)); \ __ret_810; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqrdmlshq_laneq_s32(__p0_811, __p1_811, __p2_811, __p3_811) __extension__ ({ \ int32x4_t __ret_811; \ int32x4_t __s0_811 = __p0_811; \ int32x4_t __s1_811 = __p1_811; \ int32x4_t __s2_811 = __p2_811; \ __ret_811 = vqrdmlshq_s32(__s0_811, __s1_811, splatq_laneq_s32(__s2_811, __p3_811)); \ __ret_811; \ }) #else #define vqrdmlshq_laneq_s32(__p0_812, __p1_812, __p2_812, __p3_812) __extension__ ({ \ int32x4_t __ret_812; \ int32x4_t __s0_812 = __p0_812; \ int32x4_t __s1_812 = __p1_812; \ int32x4_t __s2_812 = __p2_812; \ int32x4_t __rev0_812; __rev0_812 = __builtin_shufflevector(__s0_812, __s0_812, 3, 2, 1, 0); \ int32x4_t __rev1_812; __rev1_812 = __builtin_shufflevector(__s1_812, __s1_812, 3, 2, 1, 0); \ int32x4_t __rev2_812; __rev2_812 = __builtin_shufflevector(__s2_812, __s2_812, 3, 2, 1, 0); \ __ret_812 = __noswap_vqrdmlshq_s32(__rev0_812, __rev1_812, __noswap_splatq_laneq_s32(__rev2_812, __p3_812)); \ __ret_812 = __builtin_shufflevector(__ret_812, __ret_812, 3, 2, 1, 0); \ __ret_812; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqrdmlshq_laneq_s16(__p0_813, __p1_813, __p2_813, __p3_813) __extension__ ({ \ int16x8_t __ret_813; \ int16x8_t __s0_813 = __p0_813; \ int16x8_t __s1_813 = __p1_813; \ int16x8_t __s2_813 = __p2_813; \ __ret_813 = vqrdmlshq_s16(__s0_813, __s1_813, splatq_laneq_s16(__s2_813, __p3_813)); \ __ret_813; \ }) #else #define vqrdmlshq_laneq_s16(__p0_814, __p1_814, __p2_814, __p3_814) __extension__ ({ \ int16x8_t __ret_814; \ int16x8_t __s0_814 = __p0_814; \ int16x8_t __s1_814 = __p1_814; \ int16x8_t __s2_814 = __p2_814; \ int16x8_t __rev0_814; __rev0_814 = __builtin_shufflevector(__s0_814, __s0_814, 7, 6, 5, 4, 3, 2, 1, 0); \ int16x8_t __rev1_814; __rev1_814 = __builtin_shufflevector(__s1_814, __s1_814, 7, 6, 5, 4, 3, 2, 1, 0); \ int16x8_t __rev2_814; __rev2_814 = __builtin_shufflevector(__s2_814, __s2_814, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_814 = __noswap_vqrdmlshq_s16(__rev0_814, __rev1_814, __noswap_splatq_laneq_s16(__rev2_814, __p3_814)); \ __ret_814 = __builtin_shufflevector(__ret_814, __ret_814, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_814; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqrdmlsh_laneq_s32(__p0_815, __p1_815, __p2_815, __p3_815) __extension__ ({ \ int32x2_t __ret_815; \ int32x2_t __s0_815 = __p0_815; \ int32x2_t __s1_815 = __p1_815; \ int32x4_t __s2_815 = __p2_815; \ __ret_815 = vqrdmlsh_s32(__s0_815, __s1_815, splat_laneq_s32(__s2_815, __p3_815)); \ __ret_815; \ }) #else #define vqrdmlsh_laneq_s32(__p0_816, __p1_816, __p2_816, __p3_816) __extension__ ({ \ int32x2_t __ret_816; \ int32x2_t __s0_816 = __p0_816; \ int32x2_t __s1_816 = __p1_816; \ int32x4_t __s2_816 = __p2_816; \ int32x2_t __rev0_816; __rev0_816 = __builtin_shufflevector(__s0_816, __s0_816, 1, 0); \ int32x2_t __rev1_816; __rev1_816 = __builtin_shufflevector(__s1_816, __s1_816, 1, 0); \ int32x4_t __rev2_816; __rev2_816 = __builtin_shufflevector(__s2_816, __s2_816, 3, 2, 1, 0); \ __ret_816 = __noswap_vqrdmlsh_s32(__rev0_816, __rev1_816, __noswap_splat_laneq_s32(__rev2_816, __p3_816)); \ __ret_816 = __builtin_shufflevector(__ret_816, __ret_816, 1, 0); \ __ret_816; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vqrdmlsh_laneq_s16(__p0_817, __p1_817, __p2_817, __p3_817) __extension__ ({ \ int16x4_t __ret_817; \ int16x4_t __s0_817 = __p0_817; \ int16x4_t __s1_817 = __p1_817; \ int16x8_t __s2_817 = __p2_817; \ __ret_817 = vqrdmlsh_s16(__s0_817, __s1_817, splat_laneq_s16(__s2_817, __p3_817)); \ __ret_817; \ }) #else #define vqrdmlsh_laneq_s16(__p0_818, __p1_818, __p2_818, __p3_818) __extension__ ({ \ int16x4_t __ret_818; \ int16x4_t __s0_818 = __p0_818; \ int16x4_t __s1_818 = __p1_818; \ int16x8_t __s2_818 = __p2_818; \ int16x4_t __rev0_818; __rev0_818 = __builtin_shufflevector(__s0_818, __s0_818, 3, 2, 1, 0); \ int16x4_t __rev1_818; __rev1_818 = __builtin_shufflevector(__s1_818, __s1_818, 3, 2, 1, 0); \ int16x8_t __rev2_818; __rev2_818 = __builtin_shufflevector(__s2_818, __s2_818, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_818 = __noswap_vqrdmlsh_s16(__rev0_818, __rev1_818, __noswap_splat_laneq_s16(__rev2_818, __p3_818)); \ __ret_818 = __builtin_shufflevector(__ret_818, __ret_818, 3, 2, 1, 0); \ __ret_818; \ }) #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.3a"))) float64x2_t vcaddq_rot270_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; __ret = (float64x2_t) __builtin_neon_vcaddq_rot270_f64((int8x16_t)__p0, (int8x16_t)__p1, 42); return __ret; } #else __ai __attribute__((target("v8.3a"))) float64x2_t vcaddq_rot270_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (float64x2_t) __builtin_neon_vcaddq_rot270_f64((int8x16_t)__rev0, (int8x16_t)__rev1, 42); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.3a"))) float64x2_t vcaddq_rot90_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; __ret = (float64x2_t) __builtin_neon_vcaddq_rot90_f64((int8x16_t)__p0, (int8x16_t)__p1, 42); return __ret; } #else __ai __attribute__((target("v8.3a"))) float64x2_t vcaddq_rot90_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (float64x2_t) __builtin_neon_vcaddq_rot90_f64((int8x16_t)__rev0, (int8x16_t)__rev1, 42); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.3a"))) float64x2_t vcmlaq_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { float64x2_t __ret; __ret = (float64x2_t) __builtin_neon_vcmlaq_f64((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 42); return __ret; } #else __ai __attribute__((target("v8.3a"))) float64x2_t vcmlaq_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { float64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); float64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); __ret = (float64x2_t) __builtin_neon_vcmlaq_f64((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 42); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } __ai __attribute__((target("v8.3a"))) float64x2_t __noswap_vcmlaq_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { float64x2_t __ret; __ret = (float64x2_t) __builtin_neon_vcmlaq_f64((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 42); return __ret; } #endif __ai __attribute__((target("v8.3a"))) float64x1_t vcmla_f64(float64x1_t __p0, float64x1_t __p1, float64x1_t __p2) { float64x1_t __ret; __ret = (float64x1_t) __builtin_neon_vcmla_f64((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 10); return __ret; } #define vcmla_lane_f64(__p0_819, __p1_819, __p2_819, __p3_819) __extension__ ({ \ float64x1_t __ret_819; \ float64x1_t __s0_819 = __p0_819; \ float64x1_t __s1_819 = __p1_819; \ float64x1_t __s2_819 = __p2_819; \ float64x1_t __reint_819 = __s2_819; \ uint64x2_t __reint1_819 = (uint64x2_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_819, __p3_819), vgetq_lane_u64(*(uint64x2_t *) &__reint_819, __p3_819)}; \ __ret_819 = vcmla_f64(__s0_819, __s1_819, *(float64x1_t *) &__reint1_819); \ __ret_819; \ }) #ifdef __LITTLE_ENDIAN__ #define vcmlaq_lane_f64(__p0_820, __p1_820, __p2_820, __p3_820) __extension__ ({ \ float64x2_t __ret_820; \ float64x2_t __s0_820 = __p0_820; \ float64x2_t __s1_820 = __p1_820; \ float64x1_t __s2_820 = __p2_820; \ float64x1_t __reint_820 = __s2_820; \ uint64x2_t __reint1_820 = (uint64x2_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_820, __p3_820), vgetq_lane_u64(*(uint64x2_t *) &__reint_820, __p3_820)}; \ __ret_820 = vcmlaq_f64(__s0_820, __s1_820, *(float64x2_t *) &__reint1_820); \ __ret_820; \ }) #else #define vcmlaq_lane_f64(__p0_821, __p1_821, __p2_821, __p3_821) __extension__ ({ \ float64x2_t __ret_821; \ float64x2_t __s0_821 = __p0_821; \ float64x2_t __s1_821 = __p1_821; \ float64x1_t __s2_821 = __p2_821; \ float64x2_t __rev0_821; __rev0_821 = __builtin_shufflevector(__s0_821, __s0_821, 1, 0); \ float64x2_t __rev1_821; __rev1_821 = __builtin_shufflevector(__s1_821, __s1_821, 1, 0); \ float64x1_t __reint_821 = __s2_821; \ uint64x2_t __reint1_821 = (uint64x2_t) {__noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_821, __p3_821), __noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_821, __p3_821)}; \ __ret_821 = __noswap_vcmlaq_f64(__rev0_821, __rev1_821, *(float64x2_t *) &__reint1_821); \ __ret_821 = __builtin_shufflevector(__ret_821, __ret_821, 1, 0); \ __ret_821; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcmla_laneq_f64(__p0_822, __p1_822, __p2_822, __p3_822) __extension__ ({ \ float64x1_t __ret_822; \ float64x1_t __s0_822 = __p0_822; \ float64x1_t __s1_822 = __p1_822; \ float64x2_t __s2_822 = __p2_822; \ float64x2_t __reint_822 = __s2_822; \ uint64x2_t __reint1_822 = (uint64x2_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_822, __p3_822), vgetq_lane_u64(*(uint64x2_t *) &__reint_822, __p3_822)}; \ __ret_822 = vcmla_f64(__s0_822, __s1_822, *(float64x1_t *) &__reint1_822); \ __ret_822; \ }) #else #define vcmla_laneq_f64(__p0_823, __p1_823, __p2_823, __p3_823) __extension__ ({ \ float64x1_t __ret_823; \ float64x1_t __s0_823 = __p0_823; \ float64x1_t __s1_823 = __p1_823; \ float64x2_t __s2_823 = __p2_823; \ float64x2_t __rev2_823; __rev2_823 = __builtin_shufflevector(__s2_823, __s2_823, 1, 0); \ float64x2_t __reint_823 = __rev2_823; \ uint64x2_t __reint1_823 = (uint64x2_t) {__noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_823, __p3_823), __noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_823, __p3_823)}; \ __ret_823 = vcmla_f64(__s0_823, __s1_823, *(float64x1_t *) &__reint1_823); \ __ret_823; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcmlaq_laneq_f64(__p0_824, __p1_824, __p2_824, __p3_824) __extension__ ({ \ float64x2_t __ret_824; \ float64x2_t __s0_824 = __p0_824; \ float64x2_t __s1_824 = __p1_824; \ float64x2_t __s2_824 = __p2_824; \ float64x2_t __reint_824 = __s2_824; \ uint64x2_t __reint1_824 = (uint64x2_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_824, __p3_824), vgetq_lane_u64(*(uint64x2_t *) &__reint_824, __p3_824)}; \ __ret_824 = vcmlaq_f64(__s0_824, __s1_824, *(float64x2_t *) &__reint1_824); \ __ret_824; \ }) #else #define vcmlaq_laneq_f64(__p0_825, __p1_825, __p2_825, __p3_825) __extension__ ({ \ float64x2_t __ret_825; \ float64x2_t __s0_825 = __p0_825; \ float64x2_t __s1_825 = __p1_825; \ float64x2_t __s2_825 = __p2_825; \ float64x2_t __rev0_825; __rev0_825 = __builtin_shufflevector(__s0_825, __s0_825, 1, 0); \ float64x2_t __rev1_825; __rev1_825 = __builtin_shufflevector(__s1_825, __s1_825, 1, 0); \ float64x2_t __rev2_825; __rev2_825 = __builtin_shufflevector(__s2_825, __s2_825, 1, 0); \ float64x2_t __reint_825 = __rev2_825; \ uint64x2_t __reint1_825 = (uint64x2_t) {__noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_825, __p3_825), __noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_825, __p3_825)}; \ __ret_825 = __noswap_vcmlaq_f64(__rev0_825, __rev1_825, *(float64x2_t *) &__reint1_825); \ __ret_825 = __builtin_shufflevector(__ret_825, __ret_825, 1, 0); \ __ret_825; \ }) #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.3a"))) float64x2_t vcmlaq_rot180_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { float64x2_t __ret; __ret = (float64x2_t) __builtin_neon_vcmlaq_rot180_f64((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 42); return __ret; } #else __ai __attribute__((target("v8.3a"))) float64x2_t vcmlaq_rot180_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { float64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); float64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); __ret = (float64x2_t) __builtin_neon_vcmlaq_rot180_f64((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 42); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } __ai __attribute__((target("v8.3a"))) float64x2_t __noswap_vcmlaq_rot180_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { float64x2_t __ret; __ret = (float64x2_t) __builtin_neon_vcmlaq_rot180_f64((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 42); return __ret; } #endif __ai __attribute__((target("v8.3a"))) float64x1_t vcmla_rot180_f64(float64x1_t __p0, float64x1_t __p1, float64x1_t __p2) { float64x1_t __ret; __ret = (float64x1_t) __builtin_neon_vcmla_rot180_f64((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 10); return __ret; } #define vcmla_rot180_lane_f64(__p0_826, __p1_826, __p2_826, __p3_826) __extension__ ({ \ float64x1_t __ret_826; \ float64x1_t __s0_826 = __p0_826; \ float64x1_t __s1_826 = __p1_826; \ float64x1_t __s2_826 = __p2_826; \ float64x1_t __reint_826 = __s2_826; \ uint64x2_t __reint1_826 = (uint64x2_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_826, __p3_826), vgetq_lane_u64(*(uint64x2_t *) &__reint_826, __p3_826)}; \ __ret_826 = vcmla_rot180_f64(__s0_826, __s1_826, *(float64x1_t *) &__reint1_826); \ __ret_826; \ }) #ifdef __LITTLE_ENDIAN__ #define vcmlaq_rot180_lane_f64(__p0_827, __p1_827, __p2_827, __p3_827) __extension__ ({ \ float64x2_t __ret_827; \ float64x2_t __s0_827 = __p0_827; \ float64x2_t __s1_827 = __p1_827; \ float64x1_t __s2_827 = __p2_827; \ float64x1_t __reint_827 = __s2_827; \ uint64x2_t __reint1_827 = (uint64x2_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_827, __p3_827), vgetq_lane_u64(*(uint64x2_t *) &__reint_827, __p3_827)}; \ __ret_827 = vcmlaq_rot180_f64(__s0_827, __s1_827, *(float64x2_t *) &__reint1_827); \ __ret_827; \ }) #else #define vcmlaq_rot180_lane_f64(__p0_828, __p1_828, __p2_828, __p3_828) __extension__ ({ \ float64x2_t __ret_828; \ float64x2_t __s0_828 = __p0_828; \ float64x2_t __s1_828 = __p1_828; \ float64x1_t __s2_828 = __p2_828; \ float64x2_t __rev0_828; __rev0_828 = __builtin_shufflevector(__s0_828, __s0_828, 1, 0); \ float64x2_t __rev1_828; __rev1_828 = __builtin_shufflevector(__s1_828, __s1_828, 1, 0); \ float64x1_t __reint_828 = __s2_828; \ uint64x2_t __reint1_828 = (uint64x2_t) {__noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_828, __p3_828), __noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_828, __p3_828)}; \ __ret_828 = __noswap_vcmlaq_rot180_f64(__rev0_828, __rev1_828, *(float64x2_t *) &__reint1_828); \ __ret_828 = __builtin_shufflevector(__ret_828, __ret_828, 1, 0); \ __ret_828; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcmla_rot180_laneq_f64(__p0_829, __p1_829, __p2_829, __p3_829) __extension__ ({ \ float64x1_t __ret_829; \ float64x1_t __s0_829 = __p0_829; \ float64x1_t __s1_829 = __p1_829; \ float64x2_t __s2_829 = __p2_829; \ float64x2_t __reint_829 = __s2_829; \ uint64x2_t __reint1_829 = (uint64x2_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_829, __p3_829), vgetq_lane_u64(*(uint64x2_t *) &__reint_829, __p3_829)}; \ __ret_829 = vcmla_rot180_f64(__s0_829, __s1_829, *(float64x1_t *) &__reint1_829); \ __ret_829; \ }) #else #define vcmla_rot180_laneq_f64(__p0_830, __p1_830, __p2_830, __p3_830) __extension__ ({ \ float64x1_t __ret_830; \ float64x1_t __s0_830 = __p0_830; \ float64x1_t __s1_830 = __p1_830; \ float64x2_t __s2_830 = __p2_830; \ float64x2_t __rev2_830; __rev2_830 = __builtin_shufflevector(__s2_830, __s2_830, 1, 0); \ float64x2_t __reint_830 = __rev2_830; \ uint64x2_t __reint1_830 = (uint64x2_t) {__noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_830, __p3_830), __noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_830, __p3_830)}; \ __ret_830 = vcmla_rot180_f64(__s0_830, __s1_830, *(float64x1_t *) &__reint1_830); \ __ret_830; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcmlaq_rot180_laneq_f64(__p0_831, __p1_831, __p2_831, __p3_831) __extension__ ({ \ float64x2_t __ret_831; \ float64x2_t __s0_831 = __p0_831; \ float64x2_t __s1_831 = __p1_831; \ float64x2_t __s2_831 = __p2_831; \ float64x2_t __reint_831 = __s2_831; \ uint64x2_t __reint1_831 = (uint64x2_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_831, __p3_831), vgetq_lane_u64(*(uint64x2_t *) &__reint_831, __p3_831)}; \ __ret_831 = vcmlaq_rot180_f64(__s0_831, __s1_831, *(float64x2_t *) &__reint1_831); \ __ret_831; \ }) #else #define vcmlaq_rot180_laneq_f64(__p0_832, __p1_832, __p2_832, __p3_832) __extension__ ({ \ float64x2_t __ret_832; \ float64x2_t __s0_832 = __p0_832; \ float64x2_t __s1_832 = __p1_832; \ float64x2_t __s2_832 = __p2_832; \ float64x2_t __rev0_832; __rev0_832 = __builtin_shufflevector(__s0_832, __s0_832, 1, 0); \ float64x2_t __rev1_832; __rev1_832 = __builtin_shufflevector(__s1_832, __s1_832, 1, 0); \ float64x2_t __rev2_832; __rev2_832 = __builtin_shufflevector(__s2_832, __s2_832, 1, 0); \ float64x2_t __reint_832 = __rev2_832; \ uint64x2_t __reint1_832 = (uint64x2_t) {__noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_832, __p3_832), __noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_832, __p3_832)}; \ __ret_832 = __noswap_vcmlaq_rot180_f64(__rev0_832, __rev1_832, *(float64x2_t *) &__reint1_832); \ __ret_832 = __builtin_shufflevector(__ret_832, __ret_832, 1, 0); \ __ret_832; \ }) #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.3a"))) float64x2_t vcmlaq_rot270_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { float64x2_t __ret; __ret = (float64x2_t) __builtin_neon_vcmlaq_rot270_f64((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 42); return __ret; } #else __ai __attribute__((target("v8.3a"))) float64x2_t vcmlaq_rot270_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { float64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); float64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); __ret = (float64x2_t) __builtin_neon_vcmlaq_rot270_f64((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 42); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } __ai __attribute__((target("v8.3a"))) float64x2_t __noswap_vcmlaq_rot270_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { float64x2_t __ret; __ret = (float64x2_t) __builtin_neon_vcmlaq_rot270_f64((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 42); return __ret; } #endif __ai __attribute__((target("v8.3a"))) float64x1_t vcmla_rot270_f64(float64x1_t __p0, float64x1_t __p1, float64x1_t __p2) { float64x1_t __ret; __ret = (float64x1_t) __builtin_neon_vcmla_rot270_f64((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 10); return __ret; } #define vcmla_rot270_lane_f64(__p0_833, __p1_833, __p2_833, __p3_833) __extension__ ({ \ float64x1_t __ret_833; \ float64x1_t __s0_833 = __p0_833; \ float64x1_t __s1_833 = __p1_833; \ float64x1_t __s2_833 = __p2_833; \ float64x1_t __reint_833 = __s2_833; \ uint64x2_t __reint1_833 = (uint64x2_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_833, __p3_833), vgetq_lane_u64(*(uint64x2_t *) &__reint_833, __p3_833)}; \ __ret_833 = vcmla_rot270_f64(__s0_833, __s1_833, *(float64x1_t *) &__reint1_833); \ __ret_833; \ }) #ifdef __LITTLE_ENDIAN__ #define vcmlaq_rot270_lane_f64(__p0_834, __p1_834, __p2_834, __p3_834) __extension__ ({ \ float64x2_t __ret_834; \ float64x2_t __s0_834 = __p0_834; \ float64x2_t __s1_834 = __p1_834; \ float64x1_t __s2_834 = __p2_834; \ float64x1_t __reint_834 = __s2_834; \ uint64x2_t __reint1_834 = (uint64x2_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_834, __p3_834), vgetq_lane_u64(*(uint64x2_t *) &__reint_834, __p3_834)}; \ __ret_834 = vcmlaq_rot270_f64(__s0_834, __s1_834, *(float64x2_t *) &__reint1_834); \ __ret_834; \ }) #else #define vcmlaq_rot270_lane_f64(__p0_835, __p1_835, __p2_835, __p3_835) __extension__ ({ \ float64x2_t __ret_835; \ float64x2_t __s0_835 = __p0_835; \ float64x2_t __s1_835 = __p1_835; \ float64x1_t __s2_835 = __p2_835; \ float64x2_t __rev0_835; __rev0_835 = __builtin_shufflevector(__s0_835, __s0_835, 1, 0); \ float64x2_t __rev1_835; __rev1_835 = __builtin_shufflevector(__s1_835, __s1_835, 1, 0); \ float64x1_t __reint_835 = __s2_835; \ uint64x2_t __reint1_835 = (uint64x2_t) {__noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_835, __p3_835), __noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_835, __p3_835)}; \ __ret_835 = __noswap_vcmlaq_rot270_f64(__rev0_835, __rev1_835, *(float64x2_t *) &__reint1_835); \ __ret_835 = __builtin_shufflevector(__ret_835, __ret_835, 1, 0); \ __ret_835; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcmla_rot270_laneq_f64(__p0_836, __p1_836, __p2_836, __p3_836) __extension__ ({ \ float64x1_t __ret_836; \ float64x1_t __s0_836 = __p0_836; \ float64x1_t __s1_836 = __p1_836; \ float64x2_t __s2_836 = __p2_836; \ float64x2_t __reint_836 = __s2_836; \ uint64x2_t __reint1_836 = (uint64x2_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_836, __p3_836), vgetq_lane_u64(*(uint64x2_t *) &__reint_836, __p3_836)}; \ __ret_836 = vcmla_rot270_f64(__s0_836, __s1_836, *(float64x1_t *) &__reint1_836); \ __ret_836; \ }) #else #define vcmla_rot270_laneq_f64(__p0_837, __p1_837, __p2_837, __p3_837) __extension__ ({ \ float64x1_t __ret_837; \ float64x1_t __s0_837 = __p0_837; \ float64x1_t __s1_837 = __p1_837; \ float64x2_t __s2_837 = __p2_837; \ float64x2_t __rev2_837; __rev2_837 = __builtin_shufflevector(__s2_837, __s2_837, 1, 0); \ float64x2_t __reint_837 = __rev2_837; \ uint64x2_t __reint1_837 = (uint64x2_t) {__noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_837, __p3_837), __noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_837, __p3_837)}; \ __ret_837 = vcmla_rot270_f64(__s0_837, __s1_837, *(float64x1_t *) &__reint1_837); \ __ret_837; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcmlaq_rot270_laneq_f64(__p0_838, __p1_838, __p2_838, __p3_838) __extension__ ({ \ float64x2_t __ret_838; \ float64x2_t __s0_838 = __p0_838; \ float64x2_t __s1_838 = __p1_838; \ float64x2_t __s2_838 = __p2_838; \ float64x2_t __reint_838 = __s2_838; \ uint64x2_t __reint1_838 = (uint64x2_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_838, __p3_838), vgetq_lane_u64(*(uint64x2_t *) &__reint_838, __p3_838)}; \ __ret_838 = vcmlaq_rot270_f64(__s0_838, __s1_838, *(float64x2_t *) &__reint1_838); \ __ret_838; \ }) #else #define vcmlaq_rot270_laneq_f64(__p0_839, __p1_839, __p2_839, __p3_839) __extension__ ({ \ float64x2_t __ret_839; \ float64x2_t __s0_839 = __p0_839; \ float64x2_t __s1_839 = __p1_839; \ float64x2_t __s2_839 = __p2_839; \ float64x2_t __rev0_839; __rev0_839 = __builtin_shufflevector(__s0_839, __s0_839, 1, 0); \ float64x2_t __rev1_839; __rev1_839 = __builtin_shufflevector(__s1_839, __s1_839, 1, 0); \ float64x2_t __rev2_839; __rev2_839 = __builtin_shufflevector(__s2_839, __s2_839, 1, 0); \ float64x2_t __reint_839 = __rev2_839; \ uint64x2_t __reint1_839 = (uint64x2_t) {__noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_839, __p3_839), __noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_839, __p3_839)}; \ __ret_839 = __noswap_vcmlaq_rot270_f64(__rev0_839, __rev1_839, *(float64x2_t *) &__reint1_839); \ __ret_839 = __builtin_shufflevector(__ret_839, __ret_839, 1, 0); \ __ret_839; \ }) #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.3a"))) float64x2_t vcmlaq_rot90_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { float64x2_t __ret; __ret = (float64x2_t) __builtin_neon_vcmlaq_rot90_f64((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 42); return __ret; } #else __ai __attribute__((target("v8.3a"))) float64x2_t vcmlaq_rot90_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { float64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); float64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); __ret = (float64x2_t) __builtin_neon_vcmlaq_rot90_f64((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 42); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } __ai __attribute__((target("v8.3a"))) float64x2_t __noswap_vcmlaq_rot90_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { float64x2_t __ret; __ret = (float64x2_t) __builtin_neon_vcmlaq_rot90_f64((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 42); return __ret; } #endif __ai __attribute__((target("v8.3a"))) float64x1_t vcmla_rot90_f64(float64x1_t __p0, float64x1_t __p1, float64x1_t __p2) { float64x1_t __ret; __ret = (float64x1_t) __builtin_neon_vcmla_rot90_f64((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 10); return __ret; } #define vcmla_rot90_lane_f64(__p0_840, __p1_840, __p2_840, __p3_840) __extension__ ({ \ float64x1_t __ret_840; \ float64x1_t __s0_840 = __p0_840; \ float64x1_t __s1_840 = __p1_840; \ float64x1_t __s2_840 = __p2_840; \ float64x1_t __reint_840 = __s2_840; \ uint64x2_t __reint1_840 = (uint64x2_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_840, __p3_840), vgetq_lane_u64(*(uint64x2_t *) &__reint_840, __p3_840)}; \ __ret_840 = vcmla_rot90_f64(__s0_840, __s1_840, *(float64x1_t *) &__reint1_840); \ __ret_840; \ }) #ifdef __LITTLE_ENDIAN__ #define vcmlaq_rot90_lane_f64(__p0_841, __p1_841, __p2_841, __p3_841) __extension__ ({ \ float64x2_t __ret_841; \ float64x2_t __s0_841 = __p0_841; \ float64x2_t __s1_841 = __p1_841; \ float64x1_t __s2_841 = __p2_841; \ float64x1_t __reint_841 = __s2_841; \ uint64x2_t __reint1_841 = (uint64x2_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_841, __p3_841), vgetq_lane_u64(*(uint64x2_t *) &__reint_841, __p3_841)}; \ __ret_841 = vcmlaq_rot90_f64(__s0_841, __s1_841, *(float64x2_t *) &__reint1_841); \ __ret_841; \ }) #else #define vcmlaq_rot90_lane_f64(__p0_842, __p1_842, __p2_842, __p3_842) __extension__ ({ \ float64x2_t __ret_842; \ float64x2_t __s0_842 = __p0_842; \ float64x2_t __s1_842 = __p1_842; \ float64x1_t __s2_842 = __p2_842; \ float64x2_t __rev0_842; __rev0_842 = __builtin_shufflevector(__s0_842, __s0_842, 1, 0); \ float64x2_t __rev1_842; __rev1_842 = __builtin_shufflevector(__s1_842, __s1_842, 1, 0); \ float64x1_t __reint_842 = __s2_842; \ uint64x2_t __reint1_842 = (uint64x2_t) {__noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_842, __p3_842), __noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_842, __p3_842)}; \ __ret_842 = __noswap_vcmlaq_rot90_f64(__rev0_842, __rev1_842, *(float64x2_t *) &__reint1_842); \ __ret_842 = __builtin_shufflevector(__ret_842, __ret_842, 1, 0); \ __ret_842; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcmla_rot90_laneq_f64(__p0_843, __p1_843, __p2_843, __p3_843) __extension__ ({ \ float64x1_t __ret_843; \ float64x1_t __s0_843 = __p0_843; \ float64x1_t __s1_843 = __p1_843; \ float64x2_t __s2_843 = __p2_843; \ float64x2_t __reint_843 = __s2_843; \ uint64x2_t __reint1_843 = (uint64x2_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_843, __p3_843), vgetq_lane_u64(*(uint64x2_t *) &__reint_843, __p3_843)}; \ __ret_843 = vcmla_rot90_f64(__s0_843, __s1_843, *(float64x1_t *) &__reint1_843); \ __ret_843; \ }) #else #define vcmla_rot90_laneq_f64(__p0_844, __p1_844, __p2_844, __p3_844) __extension__ ({ \ float64x1_t __ret_844; \ float64x1_t __s0_844 = __p0_844; \ float64x1_t __s1_844 = __p1_844; \ float64x2_t __s2_844 = __p2_844; \ float64x2_t __rev2_844; __rev2_844 = __builtin_shufflevector(__s2_844, __s2_844, 1, 0); \ float64x2_t __reint_844 = __rev2_844; \ uint64x2_t __reint1_844 = (uint64x2_t) {__noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_844, __p3_844), __noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_844, __p3_844)}; \ __ret_844 = vcmla_rot90_f64(__s0_844, __s1_844, *(float64x1_t *) &__reint1_844); \ __ret_844; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcmlaq_rot90_laneq_f64(__p0_845, __p1_845, __p2_845, __p3_845) __extension__ ({ \ float64x2_t __ret_845; \ float64x2_t __s0_845 = __p0_845; \ float64x2_t __s1_845 = __p1_845; \ float64x2_t __s2_845 = __p2_845; \ float64x2_t __reint_845 = __s2_845; \ uint64x2_t __reint1_845 = (uint64x2_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_845, __p3_845), vgetq_lane_u64(*(uint64x2_t *) &__reint_845, __p3_845)}; \ __ret_845 = vcmlaq_rot90_f64(__s0_845, __s1_845, *(float64x2_t *) &__reint1_845); \ __ret_845; \ }) #else #define vcmlaq_rot90_laneq_f64(__p0_846, __p1_846, __p2_846, __p3_846) __extension__ ({ \ float64x2_t __ret_846; \ float64x2_t __s0_846 = __p0_846; \ float64x2_t __s1_846 = __p1_846; \ float64x2_t __s2_846 = __p2_846; \ float64x2_t __rev0_846; __rev0_846 = __builtin_shufflevector(__s0_846, __s0_846, 1, 0); \ float64x2_t __rev1_846; __rev1_846 = __builtin_shufflevector(__s1_846, __s1_846, 1, 0); \ float64x2_t __rev2_846; __rev2_846 = __builtin_shufflevector(__s2_846, __s2_846, 1, 0); \ float64x2_t __reint_846 = __rev2_846; \ uint64x2_t __reint1_846 = (uint64x2_t) {__noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_846, __p3_846), __noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_846, __p3_846)}; \ __ret_846 = __noswap_vcmlaq_rot90_f64(__rev0_846, __rev1_846, *(float64x2_t *) &__reint1_846); \ __ret_846 = __builtin_shufflevector(__ret_846, __ret_846, 1, 0); \ __ret_846; \ }) #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.5a"))) float32x4_t vrnd32xq_f32(float32x4_t __p0) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vrnd32xq_f32((int8x16_t)__p0, 41); return __ret; } #else __ai __attribute__((target("v8.5a"))) float32x4_t vrnd32xq_f32(float32x4_t __p0) { float32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (float32x4_t) __builtin_neon_vrnd32xq_f32((int8x16_t)__rev0, 41); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.5a"))) float32x2_t vrnd32x_f32(float32x2_t __p0) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vrnd32x_f32((int8x8_t)__p0, 9); return __ret; } #else __ai __attribute__((target("v8.5a"))) float32x2_t vrnd32x_f32(float32x2_t __p0) { float32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (float32x2_t) __builtin_neon_vrnd32x_f32((int8x8_t)__rev0, 9); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.5a"))) float64x2_t vrnd32xq_f64(float64x2_t __p0) { float64x2_t __ret; __ret = (float64x2_t) __builtin_neon_vrnd32xq_f64((int8x16_t)__p0, 42); return __ret; } #else __ai __attribute__((target("v8.5a"))) float64x2_t vrnd32xq_f64(float64x2_t __p0) { float64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (float64x2_t) __builtin_neon_vrnd32xq_f64((int8x16_t)__rev0, 42); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai __attribute__((target("v8.5a"))) float64x1_t vrnd32x_f64(float64x1_t __p0) { float64x1_t __ret; __ret = (float64x1_t) __builtin_neon_vrnd32x_f64((int8x8_t)__p0, 10); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.5a"))) float32x4_t vrnd32zq_f32(float32x4_t __p0) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vrnd32zq_f32((int8x16_t)__p0, 41); return __ret; } #else __ai __attribute__((target("v8.5a"))) float32x4_t vrnd32zq_f32(float32x4_t __p0) { float32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (float32x4_t) __builtin_neon_vrnd32zq_f32((int8x16_t)__rev0, 41); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.5a"))) float32x2_t vrnd32z_f32(float32x2_t __p0) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vrnd32z_f32((int8x8_t)__p0, 9); return __ret; } #else __ai __attribute__((target("v8.5a"))) float32x2_t vrnd32z_f32(float32x2_t __p0) { float32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (float32x2_t) __builtin_neon_vrnd32z_f32((int8x8_t)__rev0, 9); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.5a"))) float64x2_t vrnd32zq_f64(float64x2_t __p0) { float64x2_t __ret; __ret = (float64x2_t) __builtin_neon_vrnd32zq_f64((int8x16_t)__p0, 42); return __ret; } #else __ai __attribute__((target("v8.5a"))) float64x2_t vrnd32zq_f64(float64x2_t __p0) { float64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (float64x2_t) __builtin_neon_vrnd32zq_f64((int8x16_t)__rev0, 42); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai __attribute__((target("v8.5a"))) float64x1_t vrnd32z_f64(float64x1_t __p0) { float64x1_t __ret; __ret = (float64x1_t) __builtin_neon_vrnd32z_f64((int8x8_t)__p0, 10); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.5a"))) float32x4_t vrnd64xq_f32(float32x4_t __p0) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vrnd64xq_f32((int8x16_t)__p0, 41); return __ret; } #else __ai __attribute__((target("v8.5a"))) float32x4_t vrnd64xq_f32(float32x4_t __p0) { float32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (float32x4_t) __builtin_neon_vrnd64xq_f32((int8x16_t)__rev0, 41); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.5a"))) float32x2_t vrnd64x_f32(float32x2_t __p0) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vrnd64x_f32((int8x8_t)__p0, 9); return __ret; } #else __ai __attribute__((target("v8.5a"))) float32x2_t vrnd64x_f32(float32x2_t __p0) { float32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (float32x2_t) __builtin_neon_vrnd64x_f32((int8x8_t)__rev0, 9); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.5a"))) float64x2_t vrnd64xq_f64(float64x2_t __p0) { float64x2_t __ret; __ret = (float64x2_t) __builtin_neon_vrnd64xq_f64((int8x16_t)__p0, 42); return __ret; } #else __ai __attribute__((target("v8.5a"))) float64x2_t vrnd64xq_f64(float64x2_t __p0) { float64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (float64x2_t) __builtin_neon_vrnd64xq_f64((int8x16_t)__rev0, 42); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai __attribute__((target("v8.5a"))) float64x1_t vrnd64x_f64(float64x1_t __p0) { float64x1_t __ret; __ret = (float64x1_t) __builtin_neon_vrnd64x_f64((int8x8_t)__p0, 10); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.5a"))) float32x4_t vrnd64zq_f32(float32x4_t __p0) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vrnd64zq_f32((int8x16_t)__p0, 41); return __ret; } #else __ai __attribute__((target("v8.5a"))) float32x4_t vrnd64zq_f32(float32x4_t __p0) { float32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); __ret = (float32x4_t) __builtin_neon_vrnd64zq_f32((int8x16_t)__rev0, 41); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.5a"))) float32x2_t vrnd64z_f32(float32x2_t __p0) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vrnd64z_f32((int8x8_t)__p0, 9); return __ret; } #else __ai __attribute__((target("v8.5a"))) float32x2_t vrnd64z_f32(float32x2_t __p0) { float32x2_t __ret; float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (float32x2_t) __builtin_neon_vrnd64z_f32((int8x8_t)__rev0, 9); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.5a"))) float64x2_t vrnd64zq_f64(float64x2_t __p0) { float64x2_t __ret; __ret = (float64x2_t) __builtin_neon_vrnd64zq_f64((int8x16_t)__p0, 42); return __ret; } #else __ai __attribute__((target("v8.5a"))) float64x2_t vrnd64zq_f64(float64x2_t __p0) { float64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (float64x2_t) __builtin_neon_vrnd64zq_f64((int8x16_t)__rev0, 42); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai __attribute__((target("v8.5a"))) float64x1_t vrnd64z_f64(float64x1_t __p0) { float64x1_t __ret; __ret = (float64x1_t) __builtin_neon_vrnd64z_f64((int8x8_t)__p0, 10); return __ret; } #endif #if defined(__aarch64__) && defined(__ARM_FEATURE_DIRECTED_ROUNDING) #ifdef __LITTLE_ENDIAN__ __ai float64x2_t vrndq_f64(float64x2_t __p0) { float64x2_t __ret; __ret = (float64x2_t) __builtin_neon_vrndq_v((int8x16_t)__p0, 42); return __ret; } #else __ai float64x2_t vrndq_f64(float64x2_t __p0) { float64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (float64x2_t) __builtin_neon_vrndq_v((int8x16_t)__rev0, 42); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai float64x1_t vrnd_f64(float64x1_t __p0) { float64x1_t __ret; __ret = (float64x1_t) __builtin_neon_vrnd_v((int8x8_t)__p0, 10); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai float64x2_t vrndaq_f64(float64x2_t __p0) { float64x2_t __ret; __ret = (float64x2_t) __builtin_neon_vrndaq_v((int8x16_t)__p0, 42); return __ret; } #else __ai float64x2_t vrndaq_f64(float64x2_t __p0) { float64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (float64x2_t) __builtin_neon_vrndaq_v((int8x16_t)__rev0, 42); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai float64x1_t vrnda_f64(float64x1_t __p0) { float64x1_t __ret; __ret = (float64x1_t) __builtin_neon_vrnda_v((int8x8_t)__p0, 10); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai float64x2_t vrndiq_f64(float64x2_t __p0) { float64x2_t __ret; __ret = (float64x2_t) __builtin_neon_vrndiq_v((int8x16_t)__p0, 42); return __ret; } #else __ai float64x2_t vrndiq_f64(float64x2_t __p0) { float64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (float64x2_t) __builtin_neon_vrndiq_v((int8x16_t)__rev0, 42); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai float64x1_t vrndi_f64(float64x1_t __p0) { float64x1_t __ret; __ret = (float64x1_t) __builtin_neon_vrndi_v((int8x8_t)__p0, 10); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai float64x2_t vrndmq_f64(float64x2_t __p0) { float64x2_t __ret; __ret = (float64x2_t) __builtin_neon_vrndmq_v((int8x16_t)__p0, 42); return __ret; } #else __ai float64x2_t vrndmq_f64(float64x2_t __p0) { float64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (float64x2_t) __builtin_neon_vrndmq_v((int8x16_t)__rev0, 42); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai float64x1_t vrndm_f64(float64x1_t __p0) { float64x1_t __ret; __ret = (float64x1_t) __builtin_neon_vrndm_v((int8x8_t)__p0, 10); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai float64x2_t vrndnq_f64(float64x2_t __p0) { float64x2_t __ret; __ret = (float64x2_t) __builtin_neon_vrndnq_v((int8x16_t)__p0, 42); return __ret; } #else __ai float64x2_t vrndnq_f64(float64x2_t __p0) { float64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (float64x2_t) __builtin_neon_vrndnq_v((int8x16_t)__rev0, 42); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai float64x1_t vrndn_f64(float64x1_t __p0) { float64x1_t __ret; __ret = (float64x1_t) __builtin_neon_vrndn_v((int8x8_t)__p0, 10); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai float64x2_t vrndpq_f64(float64x2_t __p0) { float64x2_t __ret; __ret = (float64x2_t) __builtin_neon_vrndpq_v((int8x16_t)__p0, 42); return __ret; } #else __ai float64x2_t vrndpq_f64(float64x2_t __p0) { float64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (float64x2_t) __builtin_neon_vrndpq_v((int8x16_t)__rev0, 42); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai float64x1_t vrndp_f64(float64x1_t __p0) { float64x1_t __ret; __ret = (float64x1_t) __builtin_neon_vrndp_v((int8x8_t)__p0, 10); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai float64x2_t vrndxq_f64(float64x2_t __p0) { float64x2_t __ret; __ret = (float64x2_t) __builtin_neon_vrndxq_v((int8x16_t)__p0, 42); return __ret; } #else __ai float64x2_t vrndxq_f64(float64x2_t __p0) { float64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); __ret = (float64x2_t) __builtin_neon_vrndxq_v((int8x16_t)__rev0, 42); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai float64x1_t vrndx_f64(float64x1_t __p0) { float64x1_t __ret; __ret = (float64x1_t) __builtin_neon_vrndx_v((int8x8_t)__p0, 10); return __ret; } #endif #if defined(__aarch64__) && defined(__ARM_FEATURE_NUMERIC_MAXMIN) #ifdef __LITTLE_ENDIAN__ __ai float64x2_t vmaxnmq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; __ret = (float64x2_t) __builtin_neon_vmaxnmq_v((int8x16_t)__p0, (int8x16_t)__p1, 42); return __ret; } #else __ai float64x2_t vmaxnmq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (float64x2_t) __builtin_neon_vmaxnmq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 42); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai float64x1_t vmaxnm_f64(float64x1_t __p0, float64x1_t __p1) { float64x1_t __ret; __ret = (float64x1_t) __builtin_neon_vmaxnm_v((int8x8_t)__p0, (int8x8_t)__p1, 10); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai float64x2_t vminnmq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; __ret = (float64x2_t) __builtin_neon_vminnmq_v((int8x16_t)__p0, (int8x16_t)__p1, 42); return __ret; } #else __ai float64x2_t vminnmq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (float64x2_t) __builtin_neon_vminnmq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 42); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif __ai float64x1_t vminnm_f64(float64x1_t __p0, float64x1_t __p1) { float64x1_t __ret; __ret = (float64x1_t) __builtin_neon_vminnm_v((int8x8_t)__p0, (int8x8_t)__p1, 10); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vabaq_u8(uint8x16_t __p0, uint8x16_t __p1, uint8x16_t __p2) { uint8x16_t __ret; __ret = __p0 + vabdq_u8(__p1, __p2); return __ret; } #else __ai uint8x16_t vabaq_u8(uint8x16_t __p0, uint8x16_t __p1, uint8x16_t __p2) { uint8x16_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 + __noswap_vabdq_u8(__rev1, __rev2); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vabaq_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint32x4_t __ret; __ret = __p0 + vabdq_u32(__p1, __p2); return __ret; } #else __ai uint32x4_t vabaq_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = __rev0 + __noswap_vabdq_u32(__rev1, __rev2); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vabaq_u16(uint16x8_t __p0, uint16x8_t __p1, uint16x8_t __p2) { uint16x8_t __ret; __ret = __p0 + vabdq_u16(__p1, __p2); return __ret; } #else __ai uint16x8_t vabaq_u16(uint16x8_t __p0, uint16x8_t __p1, uint16x8_t __p2) { uint16x8_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 + __noswap_vabdq_u16(__rev1, __rev2); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x16_t vabaq_s8(int8x16_t __p0, int8x16_t __p1, int8x16_t __p2) { int8x16_t __ret; __ret = __p0 + vabdq_s8(__p1, __p2); return __ret; } #else __ai int8x16_t vabaq_s8(int8x16_t __p0, int8x16_t __p1, int8x16_t __p2) { int8x16_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 + __noswap_vabdq_s8(__rev1, __rev2); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vabaq_s32(int32x4_t __p0, int32x4_t __p1, int32x4_t __p2) { int32x4_t __ret; __ret = __p0 + vabdq_s32(__p1, __p2); return __ret; } #else __ai int32x4_t vabaq_s32(int32x4_t __p0, int32x4_t __p1, int32x4_t __p2) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); int32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = __rev0 + __noswap_vabdq_s32(__rev1, __rev2); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vabaq_s16(int16x8_t __p0, int16x8_t __p1, int16x8_t __p2) { int16x8_t __ret; __ret = __p0 + vabdq_s16(__p1, __p2); return __ret; } #else __ai int16x8_t vabaq_s16(int16x8_t __p0, int16x8_t __p1, int16x8_t __p2) { int16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 + __noswap_vabdq_s16(__rev1, __rev2); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vaba_u8(uint8x8_t __p0, uint8x8_t __p1, uint8x8_t __p2) { uint8x8_t __ret; __ret = __p0 + vabd_u8(__p1, __p2); return __ret; } #else __ai uint8x8_t vaba_u8(uint8x8_t __p0, uint8x8_t __p1, uint8x8_t __p2) { uint8x8_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 + __noswap_vabd_u8(__rev1, __rev2); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vaba_u32(uint32x2_t __p0, uint32x2_t __p1, uint32x2_t __p2) { uint32x2_t __ret; __ret = __p0 + vabd_u32(__p1, __p2); return __ret; } #else __ai uint32x2_t vaba_u32(uint32x2_t __p0, uint32x2_t __p1, uint32x2_t __p2) { uint32x2_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); uint32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); __ret = __rev0 + __noswap_vabd_u32(__rev1, __rev2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vaba_u16(uint16x4_t __p0, uint16x4_t __p1, uint16x4_t __p2) { uint16x4_t __ret; __ret = __p0 + vabd_u16(__p1, __p2); return __ret; } #else __ai uint16x4_t vaba_u16(uint16x4_t __p0, uint16x4_t __p1, uint16x4_t __p2) { uint16x4_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); uint16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = __rev0 + __noswap_vabd_u16(__rev1, __rev2); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int8x8_t vaba_s8(int8x8_t __p0, int8x8_t __p1, int8x8_t __p2) { int8x8_t __ret; __ret = __p0 + vabd_s8(__p1, __p2); return __ret; } #else __ai int8x8_t vaba_s8(int8x8_t __p0, int8x8_t __p1, int8x8_t __p2) { int8x8_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 + __noswap_vabd_s8(__rev1, __rev2); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x2_t vaba_s32(int32x2_t __p0, int32x2_t __p1, int32x2_t __p2) { int32x2_t __ret; __ret = __p0 + vabd_s32(__p1, __p2); return __ret; } #else __ai int32x2_t vaba_s32(int32x2_t __p0, int32x2_t __p1, int32x2_t __p2) { int32x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); int32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); __ret = __rev0 + __noswap_vabd_s32(__rev1, __rev2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vaba_s16(int16x4_t __p0, int16x4_t __p1, int16x4_t __p2) { int16x4_t __ret; __ret = __p0 + vabd_s16(__p1, __p2); return __ret; } #else __ai int16x4_t vaba_s16(int16x4_t __p0, int16x4_t __p1, int16x4_t __p2) { int16x4_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); int16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = __rev0 + __noswap_vabd_s16(__rev1, __rev2); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vabdl_u8(uint8x8_t __p0, uint8x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t)(vmovl_u8((uint8x8_t)(vabd_u8(__p0, __p1)))); return __ret; } #else __ai uint16x8_t vabdl_u8(uint8x8_t __p0, uint8x8_t __p1) { uint16x8_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (uint16x8_t)(__noswap_vmovl_u8((uint8x8_t)(__noswap_vabd_u8(__rev0, __rev1)))); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } __ai uint16x8_t __noswap_vabdl_u8(uint8x8_t __p0, uint8x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t)(__noswap_vmovl_u8((uint8x8_t)(__noswap_vabd_u8(__p0, __p1)))); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vabdl_u32(uint32x2_t __p0, uint32x2_t __p1) { uint64x2_t __ret; __ret = (uint64x2_t)(vmovl_u32((uint32x2_t)(vabd_u32(__p0, __p1)))); return __ret; } #else __ai uint64x2_t vabdl_u32(uint32x2_t __p0, uint32x2_t __p1) { uint64x2_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (uint64x2_t)(__noswap_vmovl_u32((uint32x2_t)(__noswap_vabd_u32(__rev0, __rev1)))); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } __ai uint64x2_t __noswap_vabdl_u32(uint32x2_t __p0, uint32x2_t __p1) { uint64x2_t __ret; __ret = (uint64x2_t)(__noswap_vmovl_u32((uint32x2_t)(__noswap_vabd_u32(__p0, __p1)))); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vabdl_u16(uint16x4_t __p0, uint16x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t)(vmovl_u16((uint16x4_t)(vabd_u16(__p0, __p1)))); return __ret; } #else __ai uint32x4_t vabdl_u16(uint16x4_t __p0, uint16x4_t __p1) { uint32x4_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (uint32x4_t)(__noswap_vmovl_u16((uint16x4_t)(__noswap_vabd_u16(__rev0, __rev1)))); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai uint32x4_t __noswap_vabdl_u16(uint16x4_t __p0, uint16x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t)(__noswap_vmovl_u16((uint16x4_t)(__noswap_vabd_u16(__p0, __p1)))); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vabdl_s8(int8x8_t __p0, int8x8_t __p1) { int16x8_t __ret; __ret = (int16x8_t)(vmovl_u8((uint8x8_t)(vabd_s8(__p0, __p1)))); return __ret; } #else __ai int16x8_t vabdl_s8(int8x8_t __p0, int8x8_t __p1) { int16x8_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = (int16x8_t)(__noswap_vmovl_u8((uint8x8_t)(__noswap_vabd_s8(__rev0, __rev1)))); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } __ai int16x8_t __noswap_vabdl_s8(int8x8_t __p0, int8x8_t __p1) { int16x8_t __ret; __ret = (int16x8_t)(__noswap_vmovl_u8((uint8x8_t)(__noswap_vabd_s8(__p0, __p1)))); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vabdl_s32(int32x2_t __p0, int32x2_t __p1) { int64x2_t __ret; __ret = (int64x2_t)(vmovl_u32((uint32x2_t)(vabd_s32(__p0, __p1)))); return __ret; } #else __ai int64x2_t vabdl_s32(int32x2_t __p0, int32x2_t __p1) { int64x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = (int64x2_t)(__noswap_vmovl_u32((uint32x2_t)(__noswap_vabd_s32(__rev0, __rev1)))); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } __ai int64x2_t __noswap_vabdl_s32(int32x2_t __p0, int32x2_t __p1) { int64x2_t __ret; __ret = (int64x2_t)(__noswap_vmovl_u32((uint32x2_t)(__noswap_vabd_s32(__p0, __p1)))); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vabdl_s16(int16x4_t __p0, int16x4_t __p1) { int32x4_t __ret; __ret = (int32x4_t)(vmovl_u16((uint16x4_t)(vabd_s16(__p0, __p1)))); return __ret; } #else __ai int32x4_t vabdl_s16(int16x4_t __p0, int16x4_t __p1) { int32x4_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = (int32x4_t)(__noswap_vmovl_u16((uint16x4_t)(__noswap_vabd_s16(__rev0, __rev1)))); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai int32x4_t __noswap_vabdl_s16(int16x4_t __p0, int16x4_t __p1) { int32x4_t __ret; __ret = (int32x4_t)(__noswap_vmovl_u16((uint16x4_t)(__noswap_vabd_s16(__p0, __p1)))); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vaddl_u8(uint8x8_t __p0, uint8x8_t __p1) { uint16x8_t __ret; __ret = vmovl_u8(__p0) + vmovl_u8(__p1); return __ret; } #else __ai uint16x8_t vaddl_u8(uint8x8_t __p0, uint8x8_t __p1) { uint16x8_t __ret; uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __noswap_vmovl_u8(__rev0) + __noswap_vmovl_u8(__rev1); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vaddl_u32(uint32x2_t __p0, uint32x2_t __p1) { uint64x2_t __ret; __ret = vmovl_u32(__p0) + vmovl_u32(__p1); return __ret; } #else __ai uint64x2_t vaddl_u32(uint32x2_t __p0, uint32x2_t __p1) { uint64x2_t __ret; uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __noswap_vmovl_u32(__rev0) + __noswap_vmovl_u32(__rev1); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vaddl_u16(uint16x4_t __p0, uint16x4_t __p1) { uint32x4_t __ret; __ret = vmovl_u16(__p0) + vmovl_u16(__p1); return __ret; } #else __ai uint32x4_t vaddl_u16(uint16x4_t __p0, uint16x4_t __p1) { uint32x4_t __ret; uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __noswap_vmovl_u16(__rev0) + __noswap_vmovl_u16(__rev1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vaddl_s8(int8x8_t __p0, int8x8_t __p1) { int16x8_t __ret; __ret = vmovl_s8(__p0) + vmovl_s8(__p1); return __ret; } #else __ai int16x8_t vaddl_s8(int8x8_t __p0, int8x8_t __p1) { int16x8_t __ret; int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __noswap_vmovl_s8(__rev0) + __noswap_vmovl_s8(__rev1); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vaddl_s32(int32x2_t __p0, int32x2_t __p1) { int64x2_t __ret; __ret = vmovl_s32(__p0) + vmovl_s32(__p1); return __ret; } #else __ai int64x2_t vaddl_s32(int32x2_t __p0, int32x2_t __p1) { int64x2_t __ret; int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __noswap_vmovl_s32(__rev0) + __noswap_vmovl_s32(__rev1); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vaddl_s16(int16x4_t __p0, int16x4_t __p1) { int32x4_t __ret; __ret = vmovl_s16(__p0) + vmovl_s16(__p1); return __ret; } #else __ai int32x4_t vaddl_s16(int16x4_t __p0, int16x4_t __p1) { int32x4_t __ret; int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __noswap_vmovl_s16(__rev0) + __noswap_vmovl_s16(__rev1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vaddw_u8(uint16x8_t __p0, uint8x8_t __p1) { uint16x8_t __ret; __ret = __p0 + vmovl_u8(__p1); return __ret; } #else __ai uint16x8_t vaddw_u8(uint16x8_t __p0, uint8x8_t __p1) { uint16x8_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 + __noswap_vmovl_u8(__rev1); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vaddw_u32(uint64x2_t __p0, uint32x2_t __p1) { uint64x2_t __ret; __ret = __p0 + vmovl_u32(__p1); return __ret; } #else __ai uint64x2_t vaddw_u32(uint64x2_t __p0, uint32x2_t __p1) { uint64x2_t __ret; uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __rev0 + __noswap_vmovl_u32(__rev1); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vaddw_u16(uint32x4_t __p0, uint16x4_t __p1) { uint32x4_t __ret; __ret = __p0 + vmovl_u16(__p1); return __ret; } #else __ai uint32x4_t vaddw_u16(uint32x4_t __p0, uint16x4_t __p1) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __rev0 + __noswap_vmovl_u16(__rev1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vaddw_s8(int16x8_t __p0, int8x8_t __p1) { int16x8_t __ret; __ret = __p0 + vmovl_s8(__p1); return __ret; } #else __ai int16x8_t vaddw_s8(int16x8_t __p0, int8x8_t __p1) { int16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 + __noswap_vmovl_s8(__rev1); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vaddw_s32(int64x2_t __p0, int32x2_t __p1) { int64x2_t __ret; __ret = __p0 + vmovl_s32(__p1); return __ret; } #else __ai int64x2_t vaddw_s32(int64x2_t __p0, int32x2_t __p1) { int64x2_t __ret; int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __rev0 + __noswap_vmovl_s32(__rev1); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vaddw_s16(int32x4_t __p0, int16x4_t __p1) { int32x4_t __ret; __ret = __p0 + vmovl_s16(__p1); return __ret; } #else __ai int32x4_t vaddw_s16(int32x4_t __p0, int16x4_t __p1) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __rev0 + __noswap_vmovl_s16(__rev1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ #define vget_lane_f16(__p0_847, __p1_847) __extension__ ({ \ float16_t __ret_847; \ float16x4_t __s0_847 = __p0_847; \ float16x4_t __reint_847 = __s0_847; \ int16_t __reint1_847 = vget_lane_s16(*(int16x4_t *) &__reint_847, __p1_847); \ __ret_847 = *(float16_t *) &__reint1_847; \ __ret_847; \ }) #else #define vget_lane_f16(__p0_848, __p1_848) __extension__ ({ \ float16_t __ret_848; \ float16x4_t __s0_848 = __p0_848; \ float16x4_t __rev0_848; __rev0_848 = __builtin_shufflevector(__s0_848, __s0_848, 3, 2, 1, 0); \ float16x4_t __reint_848 = __rev0_848; \ int16_t __reint1_848 = __noswap_vget_lane_s16(*(int16x4_t *) &__reint_848, __p1_848); \ __ret_848 = *(float16_t *) &__reint1_848; \ __ret_848; \ }) #define __noswap_vget_lane_f16(__p0_849, __p1_849) __extension__ ({ \ float16_t __ret_849; \ float16x4_t __s0_849 = __p0_849; \ float16x4_t __reint_849 = __s0_849; \ int16_t __reint1_849 = __noswap_vget_lane_s16(*(int16x4_t *) &__reint_849, __p1_849); \ __ret_849 = *(float16_t *) &__reint1_849; \ __ret_849; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vgetq_lane_f16(__p0_850, __p1_850) __extension__ ({ \ float16_t __ret_850; \ float16x8_t __s0_850 = __p0_850; \ float16x8_t __reint_850 = __s0_850; \ int16_t __reint1_850 = vgetq_lane_s16(*(int16x8_t *) &__reint_850, __p1_850); \ __ret_850 = *(float16_t *) &__reint1_850; \ __ret_850; \ }) #else #define vgetq_lane_f16(__p0_851, __p1_851) __extension__ ({ \ float16_t __ret_851; \ float16x8_t __s0_851 = __p0_851; \ float16x8_t __rev0_851; __rev0_851 = __builtin_shufflevector(__s0_851, __s0_851, 7, 6, 5, 4, 3, 2, 1, 0); \ float16x8_t __reint_851 = __rev0_851; \ int16_t __reint1_851 = __noswap_vgetq_lane_s16(*(int16x8_t *) &__reint_851, __p1_851); \ __ret_851 = *(float16_t *) &__reint1_851; \ __ret_851; \ }) #define __noswap_vgetq_lane_f16(__p0_852, __p1_852) __extension__ ({ \ float16_t __ret_852; \ float16x8_t __s0_852 = __p0_852; \ float16x8_t __reint_852 = __s0_852; \ int16_t __reint1_852 = __noswap_vgetq_lane_s16(*(int16x8_t *) &__reint_852, __p1_852); \ __ret_852 = *(float16_t *) &__reint1_852; \ __ret_852; \ }) #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vmlal_u8(uint16x8_t __p0, uint8x8_t __p1, uint8x8_t __p2) { uint16x8_t __ret; __ret = __p0 + vmull_u8(__p1, __p2); return __ret; } #else __ai uint16x8_t vmlal_u8(uint16x8_t __p0, uint8x8_t __p1, uint8x8_t __p2) { uint16x8_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 + __noswap_vmull_u8(__rev1, __rev2); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } __ai uint16x8_t __noswap_vmlal_u8(uint16x8_t __p0, uint8x8_t __p1, uint8x8_t __p2) { uint16x8_t __ret; __ret = __p0 + __noswap_vmull_u8(__p1, __p2); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vmlal_u32(uint64x2_t __p0, uint32x2_t __p1, uint32x2_t __p2) { uint64x2_t __ret; __ret = __p0 + vmull_u32(__p1, __p2); return __ret; } #else __ai uint64x2_t vmlal_u32(uint64x2_t __p0, uint32x2_t __p1, uint32x2_t __p2) { uint64x2_t __ret; uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); uint32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); __ret = __rev0 + __noswap_vmull_u32(__rev1, __rev2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } __ai uint64x2_t __noswap_vmlal_u32(uint64x2_t __p0, uint32x2_t __p1, uint32x2_t __p2) { uint64x2_t __ret; __ret = __p0 + __noswap_vmull_u32(__p1, __p2); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vmlal_u16(uint32x4_t __p0, uint16x4_t __p1, uint16x4_t __p2) { uint32x4_t __ret; __ret = __p0 + vmull_u16(__p1, __p2); return __ret; } #else __ai uint32x4_t vmlal_u16(uint32x4_t __p0, uint16x4_t __p1, uint16x4_t __p2) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); uint16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = __rev0 + __noswap_vmull_u16(__rev1, __rev2); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai uint32x4_t __noswap_vmlal_u16(uint32x4_t __p0, uint16x4_t __p1, uint16x4_t __p2) { uint32x4_t __ret; __ret = __p0 + __noswap_vmull_u16(__p1, __p2); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vmlal_s8(int16x8_t __p0, int8x8_t __p1, int8x8_t __p2) { int16x8_t __ret; __ret = __p0 + vmull_s8(__p1, __p2); return __ret; } #else __ai int16x8_t vmlal_s8(int16x8_t __p0, int8x8_t __p1, int8x8_t __p2) { int16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 + __noswap_vmull_s8(__rev1, __rev2); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } __ai int16x8_t __noswap_vmlal_s8(int16x8_t __p0, int8x8_t __p1, int8x8_t __p2) { int16x8_t __ret; __ret = __p0 + __noswap_vmull_s8(__p1, __p2); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vmlal_s32(int64x2_t __p0, int32x2_t __p1, int32x2_t __p2) { int64x2_t __ret; __ret = __p0 + vmull_s32(__p1, __p2); return __ret; } #else __ai int64x2_t vmlal_s32(int64x2_t __p0, int32x2_t __p1, int32x2_t __p2) { int64x2_t __ret; int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); int32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); __ret = __rev0 + __noswap_vmull_s32(__rev1, __rev2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } __ai int64x2_t __noswap_vmlal_s32(int64x2_t __p0, int32x2_t __p1, int32x2_t __p2) { int64x2_t __ret; __ret = __p0 + __noswap_vmull_s32(__p1, __p2); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vmlal_s16(int32x4_t __p0, int16x4_t __p1, int16x4_t __p2) { int32x4_t __ret; __ret = __p0 + vmull_s16(__p1, __p2); return __ret; } #else __ai int32x4_t vmlal_s16(int32x4_t __p0, int16x4_t __p1, int16x4_t __p2) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); int16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = __rev0 + __noswap_vmull_s16(__rev1, __rev2); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai int32x4_t __noswap_vmlal_s16(int32x4_t __p0, int16x4_t __p1, int16x4_t __p2) { int32x4_t __ret; __ret = __p0 + __noswap_vmull_s16(__p1, __p2); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ #define vmlal_lane_u32(__p0_853, __p1_853, __p2_853, __p3_853) __extension__ ({ \ uint64x2_t __ret_853; \ uint64x2_t __s0_853 = __p0_853; \ uint32x2_t __s1_853 = __p1_853; \ uint32x2_t __s2_853 = __p2_853; \ __ret_853 = __s0_853 + vmull_u32(__s1_853, splat_lane_u32(__s2_853, __p3_853)); \ __ret_853; \ }) #else #define vmlal_lane_u32(__p0_854, __p1_854, __p2_854, __p3_854) __extension__ ({ \ uint64x2_t __ret_854; \ uint64x2_t __s0_854 = __p0_854; \ uint32x2_t __s1_854 = __p1_854; \ uint32x2_t __s2_854 = __p2_854; \ uint64x2_t __rev0_854; __rev0_854 = __builtin_shufflevector(__s0_854, __s0_854, 1, 0); \ uint32x2_t __rev1_854; __rev1_854 = __builtin_shufflevector(__s1_854, __s1_854, 1, 0); \ uint32x2_t __rev2_854; __rev2_854 = __builtin_shufflevector(__s2_854, __s2_854, 1, 0); \ __ret_854 = __rev0_854 + __noswap_vmull_u32(__rev1_854, __noswap_splat_lane_u32(__rev2_854, __p3_854)); \ __ret_854 = __builtin_shufflevector(__ret_854, __ret_854, 1, 0); \ __ret_854; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmlal_lane_u16(__p0_855, __p1_855, __p2_855, __p3_855) __extension__ ({ \ uint32x4_t __ret_855; \ uint32x4_t __s0_855 = __p0_855; \ uint16x4_t __s1_855 = __p1_855; \ uint16x4_t __s2_855 = __p2_855; \ __ret_855 = __s0_855 + vmull_u16(__s1_855, splat_lane_u16(__s2_855, __p3_855)); \ __ret_855; \ }) #else #define vmlal_lane_u16(__p0_856, __p1_856, __p2_856, __p3_856) __extension__ ({ \ uint32x4_t __ret_856; \ uint32x4_t __s0_856 = __p0_856; \ uint16x4_t __s1_856 = __p1_856; \ uint16x4_t __s2_856 = __p2_856; \ uint32x4_t __rev0_856; __rev0_856 = __builtin_shufflevector(__s0_856, __s0_856, 3, 2, 1, 0); \ uint16x4_t __rev1_856; __rev1_856 = __builtin_shufflevector(__s1_856, __s1_856, 3, 2, 1, 0); \ uint16x4_t __rev2_856; __rev2_856 = __builtin_shufflevector(__s2_856, __s2_856, 3, 2, 1, 0); \ __ret_856 = __rev0_856 + __noswap_vmull_u16(__rev1_856, __noswap_splat_lane_u16(__rev2_856, __p3_856)); \ __ret_856 = __builtin_shufflevector(__ret_856, __ret_856, 3, 2, 1, 0); \ __ret_856; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmlal_lane_s32(__p0_857, __p1_857, __p2_857, __p3_857) __extension__ ({ \ int64x2_t __ret_857; \ int64x2_t __s0_857 = __p0_857; \ int32x2_t __s1_857 = __p1_857; \ int32x2_t __s2_857 = __p2_857; \ __ret_857 = __s0_857 + vmull_s32(__s1_857, splat_lane_s32(__s2_857, __p3_857)); \ __ret_857; \ }) #else #define vmlal_lane_s32(__p0_858, __p1_858, __p2_858, __p3_858) __extension__ ({ \ int64x2_t __ret_858; \ int64x2_t __s0_858 = __p0_858; \ int32x2_t __s1_858 = __p1_858; \ int32x2_t __s2_858 = __p2_858; \ int64x2_t __rev0_858; __rev0_858 = __builtin_shufflevector(__s0_858, __s0_858, 1, 0); \ int32x2_t __rev1_858; __rev1_858 = __builtin_shufflevector(__s1_858, __s1_858, 1, 0); \ int32x2_t __rev2_858; __rev2_858 = __builtin_shufflevector(__s2_858, __s2_858, 1, 0); \ __ret_858 = __rev0_858 + __noswap_vmull_s32(__rev1_858, __noswap_splat_lane_s32(__rev2_858, __p3_858)); \ __ret_858 = __builtin_shufflevector(__ret_858, __ret_858, 1, 0); \ __ret_858; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmlal_lane_s16(__p0_859, __p1_859, __p2_859, __p3_859) __extension__ ({ \ int32x4_t __ret_859; \ int32x4_t __s0_859 = __p0_859; \ int16x4_t __s1_859 = __p1_859; \ int16x4_t __s2_859 = __p2_859; \ __ret_859 = __s0_859 + vmull_s16(__s1_859, splat_lane_s16(__s2_859, __p3_859)); \ __ret_859; \ }) #else #define vmlal_lane_s16(__p0_860, __p1_860, __p2_860, __p3_860) __extension__ ({ \ int32x4_t __ret_860; \ int32x4_t __s0_860 = __p0_860; \ int16x4_t __s1_860 = __p1_860; \ int16x4_t __s2_860 = __p2_860; \ int32x4_t __rev0_860; __rev0_860 = __builtin_shufflevector(__s0_860, __s0_860, 3, 2, 1, 0); \ int16x4_t __rev1_860; __rev1_860 = __builtin_shufflevector(__s1_860, __s1_860, 3, 2, 1, 0); \ int16x4_t __rev2_860; __rev2_860 = __builtin_shufflevector(__s2_860, __s2_860, 3, 2, 1, 0); \ __ret_860 = __rev0_860 + __noswap_vmull_s16(__rev1_860, __noswap_splat_lane_s16(__rev2_860, __p3_860)); \ __ret_860 = __builtin_shufflevector(__ret_860, __ret_860, 3, 2, 1, 0); \ __ret_860; \ }) #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vmlal_n_u32(uint64x2_t __p0, uint32x2_t __p1, uint32_t __p2) { uint64x2_t __ret; __ret = __p0 + vmull_u32(__p1, (uint32x2_t) {__p2, __p2}); return __ret; } #else __ai uint64x2_t vmlal_n_u32(uint64x2_t __p0, uint32x2_t __p1, uint32_t __p2) { uint64x2_t __ret; uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __rev0 + __noswap_vmull_u32(__rev1, (uint32x2_t) {__p2, __p2}); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } __ai uint64x2_t __noswap_vmlal_n_u32(uint64x2_t __p0, uint32x2_t __p1, uint32_t __p2) { uint64x2_t __ret; __ret = __p0 + __noswap_vmull_u32(__p1, (uint32x2_t) {__p2, __p2}); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vmlal_n_u16(uint32x4_t __p0, uint16x4_t __p1, uint16_t __p2) { uint32x4_t __ret; __ret = __p0 + vmull_u16(__p1, (uint16x4_t) {__p2, __p2, __p2, __p2}); return __ret; } #else __ai uint32x4_t vmlal_n_u16(uint32x4_t __p0, uint16x4_t __p1, uint16_t __p2) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __rev0 + __noswap_vmull_u16(__rev1, (uint16x4_t) {__p2, __p2, __p2, __p2}); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai uint32x4_t __noswap_vmlal_n_u16(uint32x4_t __p0, uint16x4_t __p1, uint16_t __p2) { uint32x4_t __ret; __ret = __p0 + __noswap_vmull_u16(__p1, (uint16x4_t) {__p2, __p2, __p2, __p2}); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vmlal_n_s32(int64x2_t __p0, int32x2_t __p1, int32_t __p2) { int64x2_t __ret; __ret = __p0 + vmull_s32(__p1, (int32x2_t) {__p2, __p2}); return __ret; } #else __ai int64x2_t vmlal_n_s32(int64x2_t __p0, int32x2_t __p1, int32_t __p2) { int64x2_t __ret; int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __rev0 + __noswap_vmull_s32(__rev1, (int32x2_t) {__p2, __p2}); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } __ai int64x2_t __noswap_vmlal_n_s32(int64x2_t __p0, int32x2_t __p1, int32_t __p2) { int64x2_t __ret; __ret = __p0 + __noswap_vmull_s32(__p1, (int32x2_t) {__p2, __p2}); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vmlal_n_s16(int32x4_t __p0, int16x4_t __p1, int16_t __p2) { int32x4_t __ret; __ret = __p0 + vmull_s16(__p1, (int16x4_t) {__p2, __p2, __p2, __p2}); return __ret; } #else __ai int32x4_t vmlal_n_s16(int32x4_t __p0, int16x4_t __p1, int16_t __p2) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __rev0 + __noswap_vmull_s16(__rev1, (int16x4_t) {__p2, __p2, __p2, __p2}); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai int32x4_t __noswap_vmlal_n_s16(int32x4_t __p0, int16x4_t __p1, int16_t __p2) { int32x4_t __ret; __ret = __p0 + __noswap_vmull_s16(__p1, (int16x4_t) {__p2, __p2, __p2, __p2}); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vmlsl_u8(uint16x8_t __p0, uint8x8_t __p1, uint8x8_t __p2) { uint16x8_t __ret; __ret = __p0 - vmull_u8(__p1, __p2); return __ret; } #else __ai uint16x8_t vmlsl_u8(uint16x8_t __p0, uint8x8_t __p1, uint8x8_t __p2) { uint16x8_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 - __noswap_vmull_u8(__rev1, __rev2); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } __ai uint16x8_t __noswap_vmlsl_u8(uint16x8_t __p0, uint8x8_t __p1, uint8x8_t __p2) { uint16x8_t __ret; __ret = __p0 - __noswap_vmull_u8(__p1, __p2); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vmlsl_u32(uint64x2_t __p0, uint32x2_t __p1, uint32x2_t __p2) { uint64x2_t __ret; __ret = __p0 - vmull_u32(__p1, __p2); return __ret; } #else __ai uint64x2_t vmlsl_u32(uint64x2_t __p0, uint32x2_t __p1, uint32x2_t __p2) { uint64x2_t __ret; uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); uint32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); __ret = __rev0 - __noswap_vmull_u32(__rev1, __rev2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } __ai uint64x2_t __noswap_vmlsl_u32(uint64x2_t __p0, uint32x2_t __p1, uint32x2_t __p2) { uint64x2_t __ret; __ret = __p0 - __noswap_vmull_u32(__p1, __p2); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vmlsl_u16(uint32x4_t __p0, uint16x4_t __p1, uint16x4_t __p2) { uint32x4_t __ret; __ret = __p0 - vmull_u16(__p1, __p2); return __ret; } #else __ai uint32x4_t vmlsl_u16(uint32x4_t __p0, uint16x4_t __p1, uint16x4_t __p2) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); uint16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = __rev0 - __noswap_vmull_u16(__rev1, __rev2); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai uint32x4_t __noswap_vmlsl_u16(uint32x4_t __p0, uint16x4_t __p1, uint16x4_t __p2) { uint32x4_t __ret; __ret = __p0 - __noswap_vmull_u16(__p1, __p2); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vmlsl_s8(int16x8_t __p0, int8x8_t __p1, int8x8_t __p2) { int16x8_t __ret; __ret = __p0 - vmull_s8(__p1, __p2); return __ret; } #else __ai int16x8_t vmlsl_s8(int16x8_t __p0, int8x8_t __p1, int8x8_t __p2) { int16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 - __noswap_vmull_s8(__rev1, __rev2); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } __ai int16x8_t __noswap_vmlsl_s8(int16x8_t __p0, int8x8_t __p1, int8x8_t __p2) { int16x8_t __ret; __ret = __p0 - __noswap_vmull_s8(__p1, __p2); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vmlsl_s32(int64x2_t __p0, int32x2_t __p1, int32x2_t __p2) { int64x2_t __ret; __ret = __p0 - vmull_s32(__p1, __p2); return __ret; } #else __ai int64x2_t vmlsl_s32(int64x2_t __p0, int32x2_t __p1, int32x2_t __p2) { int64x2_t __ret; int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); int32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); __ret = __rev0 - __noswap_vmull_s32(__rev1, __rev2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } __ai int64x2_t __noswap_vmlsl_s32(int64x2_t __p0, int32x2_t __p1, int32x2_t __p2) { int64x2_t __ret; __ret = __p0 - __noswap_vmull_s32(__p1, __p2); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vmlsl_s16(int32x4_t __p0, int16x4_t __p1, int16x4_t __p2) { int32x4_t __ret; __ret = __p0 - vmull_s16(__p1, __p2); return __ret; } #else __ai int32x4_t vmlsl_s16(int32x4_t __p0, int16x4_t __p1, int16x4_t __p2) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); int16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = __rev0 - __noswap_vmull_s16(__rev1, __rev2); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai int32x4_t __noswap_vmlsl_s16(int32x4_t __p0, int16x4_t __p1, int16x4_t __p2) { int32x4_t __ret; __ret = __p0 - __noswap_vmull_s16(__p1, __p2); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ #define vmlsl_lane_u32(__p0_861, __p1_861, __p2_861, __p3_861) __extension__ ({ \ uint64x2_t __ret_861; \ uint64x2_t __s0_861 = __p0_861; \ uint32x2_t __s1_861 = __p1_861; \ uint32x2_t __s2_861 = __p2_861; \ __ret_861 = __s0_861 - vmull_u32(__s1_861, splat_lane_u32(__s2_861, __p3_861)); \ __ret_861; \ }) #else #define vmlsl_lane_u32(__p0_862, __p1_862, __p2_862, __p3_862) __extension__ ({ \ uint64x2_t __ret_862; \ uint64x2_t __s0_862 = __p0_862; \ uint32x2_t __s1_862 = __p1_862; \ uint32x2_t __s2_862 = __p2_862; \ uint64x2_t __rev0_862; __rev0_862 = __builtin_shufflevector(__s0_862, __s0_862, 1, 0); \ uint32x2_t __rev1_862; __rev1_862 = __builtin_shufflevector(__s1_862, __s1_862, 1, 0); \ uint32x2_t __rev2_862; __rev2_862 = __builtin_shufflevector(__s2_862, __s2_862, 1, 0); \ __ret_862 = __rev0_862 - __noswap_vmull_u32(__rev1_862, __noswap_splat_lane_u32(__rev2_862, __p3_862)); \ __ret_862 = __builtin_shufflevector(__ret_862, __ret_862, 1, 0); \ __ret_862; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmlsl_lane_u16(__p0_863, __p1_863, __p2_863, __p3_863) __extension__ ({ \ uint32x4_t __ret_863; \ uint32x4_t __s0_863 = __p0_863; \ uint16x4_t __s1_863 = __p1_863; \ uint16x4_t __s2_863 = __p2_863; \ __ret_863 = __s0_863 - vmull_u16(__s1_863, splat_lane_u16(__s2_863, __p3_863)); \ __ret_863; \ }) #else #define vmlsl_lane_u16(__p0_864, __p1_864, __p2_864, __p3_864) __extension__ ({ \ uint32x4_t __ret_864; \ uint32x4_t __s0_864 = __p0_864; \ uint16x4_t __s1_864 = __p1_864; \ uint16x4_t __s2_864 = __p2_864; \ uint32x4_t __rev0_864; __rev0_864 = __builtin_shufflevector(__s0_864, __s0_864, 3, 2, 1, 0); \ uint16x4_t __rev1_864; __rev1_864 = __builtin_shufflevector(__s1_864, __s1_864, 3, 2, 1, 0); \ uint16x4_t __rev2_864; __rev2_864 = __builtin_shufflevector(__s2_864, __s2_864, 3, 2, 1, 0); \ __ret_864 = __rev0_864 - __noswap_vmull_u16(__rev1_864, __noswap_splat_lane_u16(__rev2_864, __p3_864)); \ __ret_864 = __builtin_shufflevector(__ret_864, __ret_864, 3, 2, 1, 0); \ __ret_864; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmlsl_lane_s32(__p0_865, __p1_865, __p2_865, __p3_865) __extension__ ({ \ int64x2_t __ret_865; \ int64x2_t __s0_865 = __p0_865; \ int32x2_t __s1_865 = __p1_865; \ int32x2_t __s2_865 = __p2_865; \ __ret_865 = __s0_865 - vmull_s32(__s1_865, splat_lane_s32(__s2_865, __p3_865)); \ __ret_865; \ }) #else #define vmlsl_lane_s32(__p0_866, __p1_866, __p2_866, __p3_866) __extension__ ({ \ int64x2_t __ret_866; \ int64x2_t __s0_866 = __p0_866; \ int32x2_t __s1_866 = __p1_866; \ int32x2_t __s2_866 = __p2_866; \ int64x2_t __rev0_866; __rev0_866 = __builtin_shufflevector(__s0_866, __s0_866, 1, 0); \ int32x2_t __rev1_866; __rev1_866 = __builtin_shufflevector(__s1_866, __s1_866, 1, 0); \ int32x2_t __rev2_866; __rev2_866 = __builtin_shufflevector(__s2_866, __s2_866, 1, 0); \ __ret_866 = __rev0_866 - __noswap_vmull_s32(__rev1_866, __noswap_splat_lane_s32(__rev2_866, __p3_866)); \ __ret_866 = __builtin_shufflevector(__ret_866, __ret_866, 1, 0); \ __ret_866; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmlsl_lane_s16(__p0_867, __p1_867, __p2_867, __p3_867) __extension__ ({ \ int32x4_t __ret_867; \ int32x4_t __s0_867 = __p0_867; \ int16x4_t __s1_867 = __p1_867; \ int16x4_t __s2_867 = __p2_867; \ __ret_867 = __s0_867 - vmull_s16(__s1_867, splat_lane_s16(__s2_867, __p3_867)); \ __ret_867; \ }) #else #define vmlsl_lane_s16(__p0_868, __p1_868, __p2_868, __p3_868) __extension__ ({ \ int32x4_t __ret_868; \ int32x4_t __s0_868 = __p0_868; \ int16x4_t __s1_868 = __p1_868; \ int16x4_t __s2_868 = __p2_868; \ int32x4_t __rev0_868; __rev0_868 = __builtin_shufflevector(__s0_868, __s0_868, 3, 2, 1, 0); \ int16x4_t __rev1_868; __rev1_868 = __builtin_shufflevector(__s1_868, __s1_868, 3, 2, 1, 0); \ int16x4_t __rev2_868; __rev2_868 = __builtin_shufflevector(__s2_868, __s2_868, 3, 2, 1, 0); \ __ret_868 = __rev0_868 - __noswap_vmull_s16(__rev1_868, __noswap_splat_lane_s16(__rev2_868, __p3_868)); \ __ret_868 = __builtin_shufflevector(__ret_868, __ret_868, 3, 2, 1, 0); \ __ret_868; \ }) #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vmlsl_n_u32(uint64x2_t __p0, uint32x2_t __p1, uint32_t __p2) { uint64x2_t __ret; __ret = __p0 - vmull_u32(__p1, (uint32x2_t) {__p2, __p2}); return __ret; } #else __ai uint64x2_t vmlsl_n_u32(uint64x2_t __p0, uint32x2_t __p1, uint32_t __p2) { uint64x2_t __ret; uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __rev0 - __noswap_vmull_u32(__rev1, (uint32x2_t) {__p2, __p2}); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } __ai uint64x2_t __noswap_vmlsl_n_u32(uint64x2_t __p0, uint32x2_t __p1, uint32_t __p2) { uint64x2_t __ret; __ret = __p0 - __noswap_vmull_u32(__p1, (uint32x2_t) {__p2, __p2}); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vmlsl_n_u16(uint32x4_t __p0, uint16x4_t __p1, uint16_t __p2) { uint32x4_t __ret; __ret = __p0 - vmull_u16(__p1, (uint16x4_t) {__p2, __p2, __p2, __p2}); return __ret; } #else __ai uint32x4_t vmlsl_n_u16(uint32x4_t __p0, uint16x4_t __p1, uint16_t __p2) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __rev0 - __noswap_vmull_u16(__rev1, (uint16x4_t) {__p2, __p2, __p2, __p2}); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai uint32x4_t __noswap_vmlsl_n_u16(uint32x4_t __p0, uint16x4_t __p1, uint16_t __p2) { uint32x4_t __ret; __ret = __p0 - __noswap_vmull_u16(__p1, (uint16x4_t) {__p2, __p2, __p2, __p2}); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vmlsl_n_s32(int64x2_t __p0, int32x2_t __p1, int32_t __p2) { int64x2_t __ret; __ret = __p0 - vmull_s32(__p1, (int32x2_t) {__p2, __p2}); return __ret; } #else __ai int64x2_t vmlsl_n_s32(int64x2_t __p0, int32x2_t __p1, int32_t __p2) { int64x2_t __ret; int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); __ret = __rev0 - __noswap_vmull_s32(__rev1, (int32x2_t) {__p2, __p2}); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } __ai int64x2_t __noswap_vmlsl_n_s32(int64x2_t __p0, int32x2_t __p1, int32_t __p2) { int64x2_t __ret; __ret = __p0 - __noswap_vmull_s32(__p1, (int32x2_t) {__p2, __p2}); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vmlsl_n_s16(int32x4_t __p0, int16x4_t __p1, int16_t __p2) { int32x4_t __ret; __ret = __p0 - vmull_s16(__p1, (int16x4_t) {__p2, __p2, __p2, __p2}); return __ret; } #else __ai int32x4_t vmlsl_n_s16(int32x4_t __p0, int16x4_t __p1, int16_t __p2) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __rev0 - __noswap_vmull_s16(__rev1, (int16x4_t) {__p2, __p2, __p2, __p2}); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai int32x4_t __noswap_vmlsl_n_s16(int32x4_t __p0, int16x4_t __p1, int16_t __p2) { int32x4_t __ret; __ret = __p0 - __noswap_vmull_s16(__p1, (int16x4_t) {__p2, __p2, __p2, __p2}); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ #define vset_lane_f16(__p0_869, __p1_869, __p2_869) __extension__ ({ \ float16x4_t __ret_869; \ float16_t __s0_869 = __p0_869; \ float16x4_t __s1_869 = __p1_869; \ float16_t __reint_869 = __s0_869; \ float16x4_t __reint1_869 = __s1_869; \ int16x4_t __reint2_869 = vset_lane_s16(*(int16_t *) &__reint_869, *(int16x4_t *) &__reint1_869, __p2_869); \ __ret_869 = *(float16x4_t *) &__reint2_869; \ __ret_869; \ }) #else #define vset_lane_f16(__p0_870, __p1_870, __p2_870) __extension__ ({ \ float16x4_t __ret_870; \ float16_t __s0_870 = __p0_870; \ float16x4_t __s1_870 = __p1_870; \ float16x4_t __rev1_870; __rev1_870 = __builtin_shufflevector(__s1_870, __s1_870, 3, 2, 1, 0); \ float16_t __reint_870 = __s0_870; \ float16x4_t __reint1_870 = __rev1_870; \ int16x4_t __reint2_870 = __noswap_vset_lane_s16(*(int16_t *) &__reint_870, *(int16x4_t *) &__reint1_870, __p2_870); \ __ret_870 = *(float16x4_t *) &__reint2_870; \ __ret_870 = __builtin_shufflevector(__ret_870, __ret_870, 3, 2, 1, 0); \ __ret_870; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vsetq_lane_f16(__p0_871, __p1_871, __p2_871) __extension__ ({ \ float16x8_t __ret_871; \ float16_t __s0_871 = __p0_871; \ float16x8_t __s1_871 = __p1_871; \ float16_t __reint_871 = __s0_871; \ float16x8_t __reint1_871 = __s1_871; \ int16x8_t __reint2_871 = vsetq_lane_s16(*(int16_t *) &__reint_871, *(int16x8_t *) &__reint1_871, __p2_871); \ __ret_871 = *(float16x8_t *) &__reint2_871; \ __ret_871; \ }) #else #define vsetq_lane_f16(__p0_872, __p1_872, __p2_872) __extension__ ({ \ float16x8_t __ret_872; \ float16_t __s0_872 = __p0_872; \ float16x8_t __s1_872 = __p1_872; \ float16x8_t __rev1_872; __rev1_872 = __builtin_shufflevector(__s1_872, __s1_872, 7, 6, 5, 4, 3, 2, 1, 0); \ float16_t __reint_872 = __s0_872; \ float16x8_t __reint1_872 = __rev1_872; \ int16x8_t __reint2_872 = __noswap_vsetq_lane_s16(*(int16_t *) &__reint_872, *(int16x8_t *) &__reint1_872, __p2_872); \ __ret_872 = *(float16x8_t *) &__reint2_872; \ __ret_872 = __builtin_shufflevector(__ret_872, __ret_872, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_872; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vbfmlalbq_lane_f32(__p0_873, __p1_873, __p2_873, __p3_873) __extension__ ({ \ float32x4_t __ret_873; \ float32x4_t __s0_873 = __p0_873; \ bfloat16x8_t __s1_873 = __p1_873; \ bfloat16x4_t __s2_873 = __p2_873; \ __ret_873 = vbfmlalbq_f32(__s0_873, __s1_873, (bfloat16x8_t) {vget_lane_bf16(__s2_873, __p3_873), vget_lane_bf16(__s2_873, __p3_873), vget_lane_bf16(__s2_873, __p3_873), vget_lane_bf16(__s2_873, __p3_873), vget_lane_bf16(__s2_873, __p3_873), vget_lane_bf16(__s2_873, __p3_873), vget_lane_bf16(__s2_873, __p3_873), vget_lane_bf16(__s2_873, __p3_873)}); \ __ret_873; \ }) #else #define vbfmlalbq_lane_f32(__p0_874, __p1_874, __p2_874, __p3_874) __extension__ ({ \ float32x4_t __ret_874; \ float32x4_t __s0_874 = __p0_874; \ bfloat16x8_t __s1_874 = __p1_874; \ bfloat16x4_t __s2_874 = __p2_874; \ float32x4_t __rev0_874; __rev0_874 = __builtin_shufflevector(__s0_874, __s0_874, 3, 2, 1, 0); \ bfloat16x8_t __rev1_874; __rev1_874 = __builtin_shufflevector(__s1_874, __s1_874, 7, 6, 5, 4, 3, 2, 1, 0); \ bfloat16x4_t __rev2_874; __rev2_874 = __builtin_shufflevector(__s2_874, __s2_874, 3, 2, 1, 0); \ __ret_874 = __noswap_vbfmlalbq_f32(__rev0_874, __rev1_874, (bfloat16x8_t) {__noswap_vget_lane_bf16(__rev2_874, __p3_874), __noswap_vget_lane_bf16(__rev2_874, __p3_874), __noswap_vget_lane_bf16(__rev2_874, __p3_874), __noswap_vget_lane_bf16(__rev2_874, __p3_874), __noswap_vget_lane_bf16(__rev2_874, __p3_874), __noswap_vget_lane_bf16(__rev2_874, __p3_874), __noswap_vget_lane_bf16(__rev2_874, __p3_874), __noswap_vget_lane_bf16(__rev2_874, __p3_874)}); \ __ret_874 = __builtin_shufflevector(__ret_874, __ret_874, 3, 2, 1, 0); \ __ret_874; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vbfmlalbq_laneq_f32(__p0_875, __p1_875, __p2_875, __p3_875) __extension__ ({ \ float32x4_t __ret_875; \ float32x4_t __s0_875 = __p0_875; \ bfloat16x8_t __s1_875 = __p1_875; \ bfloat16x8_t __s2_875 = __p2_875; \ __ret_875 = vbfmlalbq_f32(__s0_875, __s1_875, (bfloat16x8_t) {vgetq_lane_bf16(__s2_875, __p3_875), vgetq_lane_bf16(__s2_875, __p3_875), vgetq_lane_bf16(__s2_875, __p3_875), vgetq_lane_bf16(__s2_875, __p3_875), vgetq_lane_bf16(__s2_875, __p3_875), vgetq_lane_bf16(__s2_875, __p3_875), vgetq_lane_bf16(__s2_875, __p3_875), vgetq_lane_bf16(__s2_875, __p3_875)}); \ __ret_875; \ }) #else #define vbfmlalbq_laneq_f32(__p0_876, __p1_876, __p2_876, __p3_876) __extension__ ({ \ float32x4_t __ret_876; \ float32x4_t __s0_876 = __p0_876; \ bfloat16x8_t __s1_876 = __p1_876; \ bfloat16x8_t __s2_876 = __p2_876; \ float32x4_t __rev0_876; __rev0_876 = __builtin_shufflevector(__s0_876, __s0_876, 3, 2, 1, 0); \ bfloat16x8_t __rev1_876; __rev1_876 = __builtin_shufflevector(__s1_876, __s1_876, 7, 6, 5, 4, 3, 2, 1, 0); \ bfloat16x8_t __rev2_876; __rev2_876 = __builtin_shufflevector(__s2_876, __s2_876, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_876 = __noswap_vbfmlalbq_f32(__rev0_876, __rev1_876, (bfloat16x8_t) {__noswap_vgetq_lane_bf16(__rev2_876, __p3_876), __noswap_vgetq_lane_bf16(__rev2_876, __p3_876), __noswap_vgetq_lane_bf16(__rev2_876, __p3_876), __noswap_vgetq_lane_bf16(__rev2_876, __p3_876), __noswap_vgetq_lane_bf16(__rev2_876, __p3_876), __noswap_vgetq_lane_bf16(__rev2_876, __p3_876), __noswap_vgetq_lane_bf16(__rev2_876, __p3_876), __noswap_vgetq_lane_bf16(__rev2_876, __p3_876)}); \ __ret_876 = __builtin_shufflevector(__ret_876, __ret_876, 3, 2, 1, 0); \ __ret_876; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vbfmlaltq_lane_f32(__p0_877, __p1_877, __p2_877, __p3_877) __extension__ ({ \ float32x4_t __ret_877; \ float32x4_t __s0_877 = __p0_877; \ bfloat16x8_t __s1_877 = __p1_877; \ bfloat16x4_t __s2_877 = __p2_877; \ __ret_877 = vbfmlaltq_f32(__s0_877, __s1_877, (bfloat16x8_t) {vget_lane_bf16(__s2_877, __p3_877), vget_lane_bf16(__s2_877, __p3_877), vget_lane_bf16(__s2_877, __p3_877), vget_lane_bf16(__s2_877, __p3_877), vget_lane_bf16(__s2_877, __p3_877), vget_lane_bf16(__s2_877, __p3_877), vget_lane_bf16(__s2_877, __p3_877), vget_lane_bf16(__s2_877, __p3_877)}); \ __ret_877; \ }) #else #define vbfmlaltq_lane_f32(__p0_878, __p1_878, __p2_878, __p3_878) __extension__ ({ \ float32x4_t __ret_878; \ float32x4_t __s0_878 = __p0_878; \ bfloat16x8_t __s1_878 = __p1_878; \ bfloat16x4_t __s2_878 = __p2_878; \ float32x4_t __rev0_878; __rev0_878 = __builtin_shufflevector(__s0_878, __s0_878, 3, 2, 1, 0); \ bfloat16x8_t __rev1_878; __rev1_878 = __builtin_shufflevector(__s1_878, __s1_878, 7, 6, 5, 4, 3, 2, 1, 0); \ bfloat16x4_t __rev2_878; __rev2_878 = __builtin_shufflevector(__s2_878, __s2_878, 3, 2, 1, 0); \ __ret_878 = __noswap_vbfmlaltq_f32(__rev0_878, __rev1_878, (bfloat16x8_t) {__noswap_vget_lane_bf16(__rev2_878, __p3_878), __noswap_vget_lane_bf16(__rev2_878, __p3_878), __noswap_vget_lane_bf16(__rev2_878, __p3_878), __noswap_vget_lane_bf16(__rev2_878, __p3_878), __noswap_vget_lane_bf16(__rev2_878, __p3_878), __noswap_vget_lane_bf16(__rev2_878, __p3_878), __noswap_vget_lane_bf16(__rev2_878, __p3_878), __noswap_vget_lane_bf16(__rev2_878, __p3_878)}); \ __ret_878 = __builtin_shufflevector(__ret_878, __ret_878, 3, 2, 1, 0); \ __ret_878; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vbfmlaltq_laneq_f32(__p0_879, __p1_879, __p2_879, __p3_879) __extension__ ({ \ float32x4_t __ret_879; \ float32x4_t __s0_879 = __p0_879; \ bfloat16x8_t __s1_879 = __p1_879; \ bfloat16x8_t __s2_879 = __p2_879; \ __ret_879 = vbfmlaltq_f32(__s0_879, __s1_879, (bfloat16x8_t) {vgetq_lane_bf16(__s2_879, __p3_879), vgetq_lane_bf16(__s2_879, __p3_879), vgetq_lane_bf16(__s2_879, __p3_879), vgetq_lane_bf16(__s2_879, __p3_879), vgetq_lane_bf16(__s2_879, __p3_879), vgetq_lane_bf16(__s2_879, __p3_879), vgetq_lane_bf16(__s2_879, __p3_879), vgetq_lane_bf16(__s2_879, __p3_879)}); \ __ret_879; \ }) #else #define vbfmlaltq_laneq_f32(__p0_880, __p1_880, __p2_880, __p3_880) __extension__ ({ \ float32x4_t __ret_880; \ float32x4_t __s0_880 = __p0_880; \ bfloat16x8_t __s1_880 = __p1_880; \ bfloat16x8_t __s2_880 = __p2_880; \ float32x4_t __rev0_880; __rev0_880 = __builtin_shufflevector(__s0_880, __s0_880, 3, 2, 1, 0); \ bfloat16x8_t __rev1_880; __rev1_880 = __builtin_shufflevector(__s1_880, __s1_880, 7, 6, 5, 4, 3, 2, 1, 0); \ bfloat16x8_t __rev2_880; __rev2_880 = __builtin_shufflevector(__s2_880, __s2_880, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_880 = __noswap_vbfmlaltq_f32(__rev0_880, __rev1_880, (bfloat16x8_t) {__noswap_vgetq_lane_bf16(__rev2_880, __p3_880), __noswap_vgetq_lane_bf16(__rev2_880, __p3_880), __noswap_vgetq_lane_bf16(__rev2_880, __p3_880), __noswap_vgetq_lane_bf16(__rev2_880, __p3_880), __noswap_vgetq_lane_bf16(__rev2_880, __p3_880), __noswap_vgetq_lane_bf16(__rev2_880, __p3_880), __noswap_vgetq_lane_bf16(__rev2_880, __p3_880), __noswap_vgetq_lane_bf16(__rev2_880, __p3_880)}); \ __ret_880 = __builtin_shufflevector(__ret_880, __ret_880, 3, 2, 1, 0); \ __ret_880; \ }) #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("bf16"))) float32x4_t vcvtq_high_f32_bf16(bfloat16x8_t __p0) { float32x4_t __ret; __ret = vcvt_f32_bf16(vget_high_bf16(__p0)); return __ret; } #else __ai __attribute__((target("bf16"))) float32x4_t vcvtq_high_f32_bf16(bfloat16x8_t __p0) { float32x4_t __ret; bfloat16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __noswap_vcvt_f32_bf16(__noswap_vget_high_bf16(__rev0)); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("bf16"))) float32x4_t vcvtq_low_f32_bf16(bfloat16x8_t __p0) { float32x4_t __ret; __ret = vcvt_f32_bf16(vget_low_bf16(__p0)); return __ret; } #else __ai __attribute__((target("bf16"))) float32x4_t vcvtq_low_f32_bf16(bfloat16x8_t __p0) { float32x4_t __ret; bfloat16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __noswap_vcvt_f32_bf16(__noswap_vget_low_bf16(__rev0)); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ #define vsudotq_lane_s32(__p0_881, __p1_881, __p2_881, __p3_881) __extension__ ({ \ int32x4_t __ret_881; \ int32x4_t __s0_881 = __p0_881; \ int8x16_t __s1_881 = __p1_881; \ uint8x8_t __s2_881 = __p2_881; \ uint8x8_t __reint_881 = __s2_881; \ __ret_881 = vusdotq_s32(__s0_881, (uint8x16_t)(splatq_lane_s32(*(int32x2_t *) &__reint_881, __p3_881)), __s1_881); \ __ret_881; \ }) #else #define vsudotq_lane_s32(__p0_882, __p1_882, __p2_882, __p3_882) __extension__ ({ \ int32x4_t __ret_882; \ int32x4_t __s0_882 = __p0_882; \ int8x16_t __s1_882 = __p1_882; \ uint8x8_t __s2_882 = __p2_882; \ int32x4_t __rev0_882; __rev0_882 = __builtin_shufflevector(__s0_882, __s0_882, 3, 2, 1, 0); \ int8x16_t __rev1_882; __rev1_882 = __builtin_shufflevector(__s1_882, __s1_882, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ uint8x8_t __rev2_882; __rev2_882 = __builtin_shufflevector(__s2_882, __s2_882, 7, 6, 5, 4, 3, 2, 1, 0); \ uint8x8_t __reint_882 = __rev2_882; \ __ret_882 = __noswap_vusdotq_s32(__rev0_882, (uint8x16_t)(__noswap_splatq_lane_s32(*(int32x2_t *) &__reint_882, __p3_882)), __rev1_882); \ __ret_882 = __builtin_shufflevector(__ret_882, __ret_882, 3, 2, 1, 0); \ __ret_882; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vsudot_lane_s32(__p0_883, __p1_883, __p2_883, __p3_883) __extension__ ({ \ int32x2_t __ret_883; \ int32x2_t __s0_883 = __p0_883; \ int8x8_t __s1_883 = __p1_883; \ uint8x8_t __s2_883 = __p2_883; \ uint8x8_t __reint_883 = __s2_883; \ __ret_883 = vusdot_s32(__s0_883, (uint8x8_t)(splat_lane_s32(*(int32x2_t *) &__reint_883, __p3_883)), __s1_883); \ __ret_883; \ }) #else #define vsudot_lane_s32(__p0_884, __p1_884, __p2_884, __p3_884) __extension__ ({ \ int32x2_t __ret_884; \ int32x2_t __s0_884 = __p0_884; \ int8x8_t __s1_884 = __p1_884; \ uint8x8_t __s2_884 = __p2_884; \ int32x2_t __rev0_884; __rev0_884 = __builtin_shufflevector(__s0_884, __s0_884, 1, 0); \ int8x8_t __rev1_884; __rev1_884 = __builtin_shufflevector(__s1_884, __s1_884, 7, 6, 5, 4, 3, 2, 1, 0); \ uint8x8_t __rev2_884; __rev2_884 = __builtin_shufflevector(__s2_884, __s2_884, 7, 6, 5, 4, 3, 2, 1, 0); \ uint8x8_t __reint_884 = __rev2_884; \ __ret_884 = __noswap_vusdot_s32(__rev0_884, (uint8x8_t)(__noswap_splat_lane_s32(*(int32x2_t *) &__reint_884, __p3_884)), __rev1_884); \ __ret_884 = __builtin_shufflevector(__ret_884, __ret_884, 1, 0); \ __ret_884; \ }) #endif #if defined(__aarch64__) #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vabdl_high_u8(uint8x16_t __p0, uint8x16_t __p1) { uint16x8_t __ret; __ret = vabdl_u8(vget_high_u8(__p0), vget_high_u8(__p1)); return __ret; } #else __ai uint16x8_t vabdl_high_u8(uint8x16_t __p0, uint8x16_t __p1) { uint16x8_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __noswap_vabdl_u8(__noswap_vget_high_u8(__rev0), __noswap_vget_high_u8(__rev1)); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vabdl_high_u32(uint32x4_t __p0, uint32x4_t __p1) { uint64x2_t __ret; __ret = vabdl_u32(vget_high_u32(__p0), vget_high_u32(__p1)); return __ret; } #else __ai uint64x2_t vabdl_high_u32(uint32x4_t __p0, uint32x4_t __p1) { uint64x2_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __noswap_vabdl_u32(__noswap_vget_high_u32(__rev0), __noswap_vget_high_u32(__rev1)); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vabdl_high_u16(uint16x8_t __p0, uint16x8_t __p1) { uint32x4_t __ret; __ret = vabdl_u16(vget_high_u16(__p0), vget_high_u16(__p1)); return __ret; } #else __ai uint32x4_t vabdl_high_u16(uint16x8_t __p0, uint16x8_t __p1) { uint32x4_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __noswap_vabdl_u16(__noswap_vget_high_u16(__rev0), __noswap_vget_high_u16(__rev1)); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vabdl_high_s8(int8x16_t __p0, int8x16_t __p1) { int16x8_t __ret; __ret = vabdl_s8(vget_high_s8(__p0), vget_high_s8(__p1)); return __ret; } #else __ai int16x8_t vabdl_high_s8(int8x16_t __p0, int8x16_t __p1) { int16x8_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __noswap_vabdl_s8(__noswap_vget_high_s8(__rev0), __noswap_vget_high_s8(__rev1)); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vabdl_high_s32(int32x4_t __p0, int32x4_t __p1) { int64x2_t __ret; __ret = vabdl_s32(vget_high_s32(__p0), vget_high_s32(__p1)); return __ret; } #else __ai int64x2_t vabdl_high_s32(int32x4_t __p0, int32x4_t __p1) { int64x2_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __noswap_vabdl_s32(__noswap_vget_high_s32(__rev0), __noswap_vget_high_s32(__rev1)); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vabdl_high_s16(int16x8_t __p0, int16x8_t __p1) { int32x4_t __ret; __ret = vabdl_s16(vget_high_s16(__p0), vget_high_s16(__p1)); return __ret; } #else __ai int32x4_t vabdl_high_s16(int16x8_t __p0, int16x8_t __p1) { int32x4_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __noswap_vabdl_s16(__noswap_vget_high_s16(__rev0), __noswap_vget_high_s16(__rev1)); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vaddl_high_u8(uint8x16_t __p0, uint8x16_t __p1) { uint16x8_t __ret; __ret = vmovl_high_u8(__p0) + vmovl_high_u8(__p1); return __ret; } #else __ai uint16x8_t vaddl_high_u8(uint8x16_t __p0, uint8x16_t __p1) { uint16x8_t __ret; uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __noswap_vmovl_high_u8(__rev0) + __noswap_vmovl_high_u8(__rev1); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vaddl_high_u32(uint32x4_t __p0, uint32x4_t __p1) { uint64x2_t __ret; __ret = vmovl_high_u32(__p0) + vmovl_high_u32(__p1); return __ret; } #else __ai uint64x2_t vaddl_high_u32(uint32x4_t __p0, uint32x4_t __p1) { uint64x2_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __noswap_vmovl_high_u32(__rev0) + __noswap_vmovl_high_u32(__rev1); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vaddl_high_u16(uint16x8_t __p0, uint16x8_t __p1) { uint32x4_t __ret; __ret = vmovl_high_u16(__p0) + vmovl_high_u16(__p1); return __ret; } #else __ai uint32x4_t vaddl_high_u16(uint16x8_t __p0, uint16x8_t __p1) { uint32x4_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __noswap_vmovl_high_u16(__rev0) + __noswap_vmovl_high_u16(__rev1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vaddl_high_s8(int8x16_t __p0, int8x16_t __p1) { int16x8_t __ret; __ret = vmovl_high_s8(__p0) + vmovl_high_s8(__p1); return __ret; } #else __ai int16x8_t vaddl_high_s8(int8x16_t __p0, int8x16_t __p1) { int16x8_t __ret; int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __noswap_vmovl_high_s8(__rev0) + __noswap_vmovl_high_s8(__rev1); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vaddl_high_s32(int32x4_t __p0, int32x4_t __p1) { int64x2_t __ret; __ret = vmovl_high_s32(__p0) + vmovl_high_s32(__p1); return __ret; } #else __ai int64x2_t vaddl_high_s32(int32x4_t __p0, int32x4_t __p1) { int64x2_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __noswap_vmovl_high_s32(__rev0) + __noswap_vmovl_high_s32(__rev1); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vaddl_high_s16(int16x8_t __p0, int16x8_t __p1) { int32x4_t __ret; __ret = vmovl_high_s16(__p0) + vmovl_high_s16(__p1); return __ret; } #else __ai int32x4_t vaddl_high_s16(int16x8_t __p0, int16x8_t __p1) { int32x4_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __noswap_vmovl_high_s16(__rev0) + __noswap_vmovl_high_s16(__rev1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vaddw_high_u8(uint16x8_t __p0, uint8x16_t __p1) { uint16x8_t __ret; __ret = __p0 + vmovl_high_u8(__p1); return __ret; } #else __ai uint16x8_t vaddw_high_u8(uint16x8_t __p0, uint8x16_t __p1) { uint16x8_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 + __noswap_vmovl_high_u8(__rev1); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vaddw_high_u32(uint64x2_t __p0, uint32x4_t __p1) { uint64x2_t __ret; __ret = __p0 + vmovl_high_u32(__p1); return __ret; } #else __ai uint64x2_t vaddw_high_u32(uint64x2_t __p0, uint32x4_t __p1) { uint64x2_t __ret; uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __rev0 + __noswap_vmovl_high_u32(__rev1); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vaddw_high_u16(uint32x4_t __p0, uint16x8_t __p1) { uint32x4_t __ret; __ret = __p0 + vmovl_high_u16(__p1); return __ret; } #else __ai uint32x4_t vaddw_high_u16(uint32x4_t __p0, uint16x8_t __p1) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 + __noswap_vmovl_high_u16(__rev1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vaddw_high_s8(int16x8_t __p0, int8x16_t __p1) { int16x8_t __ret; __ret = __p0 + vmovl_high_s8(__p1); return __ret; } #else __ai int16x8_t vaddw_high_s8(int16x8_t __p0, int8x16_t __p1) { int16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 + __noswap_vmovl_high_s8(__rev1); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vaddw_high_s32(int64x2_t __p0, int32x4_t __p1) { int64x2_t __ret; __ret = __p0 + vmovl_high_s32(__p1); return __ret; } #else __ai int64x2_t vaddw_high_s32(int64x2_t __p0, int32x4_t __p1) { int64x2_t __ret; int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __rev0 + __noswap_vmovl_high_s32(__rev1); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vaddw_high_s16(int32x4_t __p0, int16x8_t __p1) { int32x4_t __ret; __ret = __p0 + vmovl_high_s16(__p1); return __ret; } #else __ai int32x4_t vaddw_high_s16(int32x4_t __p0, int16x8_t __p1) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 + __noswap_vmovl_high_s16(__rev1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ #define vcopyq_lane_p64(__p0_885, __p1_885, __p2_885, __p3_885) __extension__ ({ \ poly64x2_t __ret_885; \ poly64x2_t __s0_885 = __p0_885; \ poly64x1_t __s2_885 = __p2_885; \ __ret_885 = vsetq_lane_p64(vget_lane_p64(__s2_885, __p3_885), __s0_885, __p1_885); \ __ret_885; \ }) #else #define vcopyq_lane_p64(__p0_886, __p1_886, __p2_886, __p3_886) __extension__ ({ \ poly64x2_t __ret_886; \ poly64x2_t __s0_886 = __p0_886; \ poly64x1_t __s2_886 = __p2_886; \ poly64x2_t __rev0_886; __rev0_886 = __builtin_shufflevector(__s0_886, __s0_886, 1, 0); \ __ret_886 = __noswap_vsetq_lane_p64(vget_lane_p64(__s2_886, __p3_886), __rev0_886, __p1_886); \ __ret_886 = __builtin_shufflevector(__ret_886, __ret_886, 1, 0); \ __ret_886; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcopyq_lane_f64(__p0_887, __p1_887, __p2_887, __p3_887) __extension__ ({ \ float64x2_t __ret_887; \ float64x2_t __s0_887 = __p0_887; \ float64x1_t __s2_887 = __p2_887; \ __ret_887 = vsetq_lane_f64(vget_lane_f64(__s2_887, __p3_887), __s0_887, __p1_887); \ __ret_887; \ }) #else #define vcopyq_lane_f64(__p0_888, __p1_888, __p2_888, __p3_888) __extension__ ({ \ float64x2_t __ret_888; \ float64x2_t __s0_888 = __p0_888; \ float64x1_t __s2_888 = __p2_888; \ float64x2_t __rev0_888; __rev0_888 = __builtin_shufflevector(__s0_888, __s0_888, 1, 0); \ __ret_888 = __noswap_vsetq_lane_f64(vget_lane_f64(__s2_888, __p3_888), __rev0_888, __p1_888); \ __ret_888 = __builtin_shufflevector(__ret_888, __ret_888, 1, 0); \ __ret_888; \ }) #endif #define vcopy_lane_p64(__p0_889, __p1_889, __p2_889, __p3_889) __extension__ ({ \ poly64x1_t __ret_889; \ poly64x1_t __s0_889 = __p0_889; \ poly64x1_t __s2_889 = __p2_889; \ __ret_889 = vset_lane_p64(vget_lane_p64(__s2_889, __p3_889), __s0_889, __p1_889); \ __ret_889; \ }) #define vcopy_lane_f64(__p0_890, __p1_890, __p2_890, __p3_890) __extension__ ({ \ float64x1_t __ret_890; \ float64x1_t __s0_890 = __p0_890; \ float64x1_t __s2_890 = __p2_890; \ __ret_890 = vset_lane_f64(vget_lane_f64(__s2_890, __p3_890), __s0_890, __p1_890); \ __ret_890; \ }) #ifdef __LITTLE_ENDIAN__ #define vcopyq_laneq_p64(__p0_891, __p1_891, __p2_891, __p3_891) __extension__ ({ \ poly64x2_t __ret_891; \ poly64x2_t __s0_891 = __p0_891; \ poly64x2_t __s2_891 = __p2_891; \ __ret_891 = vsetq_lane_p64(vgetq_lane_p64(__s2_891, __p3_891), __s0_891, __p1_891); \ __ret_891; \ }) #else #define vcopyq_laneq_p64(__p0_892, __p1_892, __p2_892, __p3_892) __extension__ ({ \ poly64x2_t __ret_892; \ poly64x2_t __s0_892 = __p0_892; \ poly64x2_t __s2_892 = __p2_892; \ poly64x2_t __rev0_892; __rev0_892 = __builtin_shufflevector(__s0_892, __s0_892, 1, 0); \ poly64x2_t __rev2_892; __rev2_892 = __builtin_shufflevector(__s2_892, __s2_892, 1, 0); \ __ret_892 = __noswap_vsetq_lane_p64(__noswap_vgetq_lane_p64(__rev2_892, __p3_892), __rev0_892, __p1_892); \ __ret_892 = __builtin_shufflevector(__ret_892, __ret_892, 1, 0); \ __ret_892; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcopyq_laneq_f64(__p0_893, __p1_893, __p2_893, __p3_893) __extension__ ({ \ float64x2_t __ret_893; \ float64x2_t __s0_893 = __p0_893; \ float64x2_t __s2_893 = __p2_893; \ __ret_893 = vsetq_lane_f64(vgetq_lane_f64(__s2_893, __p3_893), __s0_893, __p1_893); \ __ret_893; \ }) #else #define vcopyq_laneq_f64(__p0_894, __p1_894, __p2_894, __p3_894) __extension__ ({ \ float64x2_t __ret_894; \ float64x2_t __s0_894 = __p0_894; \ float64x2_t __s2_894 = __p2_894; \ float64x2_t __rev0_894; __rev0_894 = __builtin_shufflevector(__s0_894, __s0_894, 1, 0); \ float64x2_t __rev2_894; __rev2_894 = __builtin_shufflevector(__s2_894, __s2_894, 1, 0); \ __ret_894 = __noswap_vsetq_lane_f64(__noswap_vgetq_lane_f64(__rev2_894, __p3_894), __rev0_894, __p1_894); \ __ret_894 = __builtin_shufflevector(__ret_894, __ret_894, 1, 0); \ __ret_894; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcopy_laneq_p64(__p0_895, __p1_895, __p2_895, __p3_895) __extension__ ({ \ poly64x1_t __ret_895; \ poly64x1_t __s0_895 = __p0_895; \ poly64x2_t __s2_895 = __p2_895; \ __ret_895 = vset_lane_p64(vgetq_lane_p64(__s2_895, __p3_895), __s0_895, __p1_895); \ __ret_895; \ }) #else #define vcopy_laneq_p64(__p0_896, __p1_896, __p2_896, __p3_896) __extension__ ({ \ poly64x1_t __ret_896; \ poly64x1_t __s0_896 = __p0_896; \ poly64x2_t __s2_896 = __p2_896; \ poly64x2_t __rev2_896; __rev2_896 = __builtin_shufflevector(__s2_896, __s2_896, 1, 0); \ __ret_896 = vset_lane_p64(__noswap_vgetq_lane_p64(__rev2_896, __p3_896), __s0_896, __p1_896); \ __ret_896; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vcopy_laneq_f64(__p0_897, __p1_897, __p2_897, __p3_897) __extension__ ({ \ float64x1_t __ret_897; \ float64x1_t __s0_897 = __p0_897; \ float64x2_t __s2_897 = __p2_897; \ __ret_897 = vset_lane_f64(vgetq_lane_f64(__s2_897, __p3_897), __s0_897, __p1_897); \ __ret_897; \ }) #else #define vcopy_laneq_f64(__p0_898, __p1_898, __p2_898, __p3_898) __extension__ ({ \ float64x1_t __ret_898; \ float64x1_t __s0_898 = __p0_898; \ float64x2_t __s2_898 = __p2_898; \ float64x2_t __rev2_898; __rev2_898 = __builtin_shufflevector(__s2_898, __s2_898, 1, 0); \ __ret_898 = vset_lane_f64(__noswap_vgetq_lane_f64(__rev2_898, __p3_898), __s0_898, __p1_898); \ __ret_898; \ }) #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vmlal_high_u8(uint16x8_t __p0, uint8x16_t __p1, uint8x16_t __p2) { uint16x8_t __ret; __ret = vmlal_u8(__p0, vget_high_u8(__p1), vget_high_u8(__p2)); return __ret; } #else __ai uint16x8_t vmlal_high_u8(uint16x8_t __p0, uint8x16_t __p1, uint8x16_t __p2) { uint16x8_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __noswap_vmlal_u8(__rev0, __noswap_vget_high_u8(__rev1), __noswap_vget_high_u8(__rev2)); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vmlal_high_u32(uint64x2_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint64x2_t __ret; __ret = vmlal_u32(__p0, vget_high_u32(__p1), vget_high_u32(__p2)); return __ret; } #else __ai uint64x2_t vmlal_high_u32(uint64x2_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint64x2_t __ret; uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = __noswap_vmlal_u32(__rev0, __noswap_vget_high_u32(__rev1), __noswap_vget_high_u32(__rev2)); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vmlal_high_u16(uint32x4_t __p0, uint16x8_t __p1, uint16x8_t __p2) { uint32x4_t __ret; __ret = vmlal_u16(__p0, vget_high_u16(__p1), vget_high_u16(__p2)); return __ret; } #else __ai uint32x4_t vmlal_high_u16(uint32x4_t __p0, uint16x8_t __p1, uint16x8_t __p2) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __noswap_vmlal_u16(__rev0, __noswap_vget_high_u16(__rev1), __noswap_vget_high_u16(__rev2)); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vmlal_high_s8(int16x8_t __p0, int8x16_t __p1, int8x16_t __p2) { int16x8_t __ret; __ret = vmlal_s8(__p0, vget_high_s8(__p1), vget_high_s8(__p2)); return __ret; } #else __ai int16x8_t vmlal_high_s8(int16x8_t __p0, int8x16_t __p1, int8x16_t __p2) { int16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __noswap_vmlal_s8(__rev0, __noswap_vget_high_s8(__rev1), __noswap_vget_high_s8(__rev2)); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vmlal_high_s32(int64x2_t __p0, int32x4_t __p1, int32x4_t __p2) { int64x2_t __ret; __ret = vmlal_s32(__p0, vget_high_s32(__p1), vget_high_s32(__p2)); return __ret; } #else __ai int64x2_t vmlal_high_s32(int64x2_t __p0, int32x4_t __p1, int32x4_t __p2) { int64x2_t __ret; int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); int32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = __noswap_vmlal_s32(__rev0, __noswap_vget_high_s32(__rev1), __noswap_vget_high_s32(__rev2)); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vmlal_high_s16(int32x4_t __p0, int16x8_t __p1, int16x8_t __p2) { int32x4_t __ret; __ret = vmlal_s16(__p0, vget_high_s16(__p1), vget_high_s16(__p2)); return __ret; } #else __ai int32x4_t vmlal_high_s16(int32x4_t __p0, int16x8_t __p1, int16x8_t __p2) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __noswap_vmlal_s16(__rev0, __noswap_vget_high_s16(__rev1), __noswap_vget_high_s16(__rev2)); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vmlal_high_n_u32(uint64x2_t __p0, uint32x4_t __p1, uint32_t __p2) { uint64x2_t __ret; __ret = vmlal_n_u32(__p0, vget_high_u32(__p1), __p2); return __ret; } #else __ai uint64x2_t vmlal_high_n_u32(uint64x2_t __p0, uint32x4_t __p1, uint32_t __p2) { uint64x2_t __ret; uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __noswap_vmlal_n_u32(__rev0, __noswap_vget_high_u32(__rev1), __p2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vmlal_high_n_u16(uint32x4_t __p0, uint16x8_t __p1, uint16_t __p2) { uint32x4_t __ret; __ret = vmlal_n_u16(__p0, vget_high_u16(__p1), __p2); return __ret; } #else __ai uint32x4_t vmlal_high_n_u16(uint32x4_t __p0, uint16x8_t __p1, uint16_t __p2) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __noswap_vmlal_n_u16(__rev0, __noswap_vget_high_u16(__rev1), __p2); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vmlal_high_n_s32(int64x2_t __p0, int32x4_t __p1, int32_t __p2) { int64x2_t __ret; __ret = vmlal_n_s32(__p0, vget_high_s32(__p1), __p2); return __ret; } #else __ai int64x2_t vmlal_high_n_s32(int64x2_t __p0, int32x4_t __p1, int32_t __p2) { int64x2_t __ret; int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __noswap_vmlal_n_s32(__rev0, __noswap_vget_high_s32(__rev1), __p2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vmlal_high_n_s16(int32x4_t __p0, int16x8_t __p1, int16_t __p2) { int32x4_t __ret; __ret = vmlal_n_s16(__p0, vget_high_s16(__p1), __p2); return __ret; } #else __ai int32x4_t vmlal_high_n_s16(int32x4_t __p0, int16x8_t __p1, int16_t __p2) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __noswap_vmlal_n_s16(__rev0, __noswap_vget_high_s16(__rev1), __p2); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vmlsl_high_u8(uint16x8_t __p0, uint8x16_t __p1, uint8x16_t __p2) { uint16x8_t __ret; __ret = vmlsl_u8(__p0, vget_high_u8(__p1), vget_high_u8(__p2)); return __ret; } #else __ai uint16x8_t vmlsl_high_u8(uint16x8_t __p0, uint8x16_t __p1, uint8x16_t __p2) { uint16x8_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __noswap_vmlsl_u8(__rev0, __noswap_vget_high_u8(__rev1), __noswap_vget_high_u8(__rev2)); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vmlsl_high_u32(uint64x2_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint64x2_t __ret; __ret = vmlsl_u32(__p0, vget_high_u32(__p1), vget_high_u32(__p2)); return __ret; } #else __ai uint64x2_t vmlsl_high_u32(uint64x2_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint64x2_t __ret; uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = __noswap_vmlsl_u32(__rev0, __noswap_vget_high_u32(__rev1), __noswap_vget_high_u32(__rev2)); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vmlsl_high_u16(uint32x4_t __p0, uint16x8_t __p1, uint16x8_t __p2) { uint32x4_t __ret; __ret = vmlsl_u16(__p0, vget_high_u16(__p1), vget_high_u16(__p2)); return __ret; } #else __ai uint32x4_t vmlsl_high_u16(uint32x4_t __p0, uint16x8_t __p1, uint16x8_t __p2) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __noswap_vmlsl_u16(__rev0, __noswap_vget_high_u16(__rev1), __noswap_vget_high_u16(__rev2)); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vmlsl_high_s8(int16x8_t __p0, int8x16_t __p1, int8x16_t __p2) { int16x8_t __ret; __ret = vmlsl_s8(__p0, vget_high_s8(__p1), vget_high_s8(__p2)); return __ret; } #else __ai int16x8_t vmlsl_high_s8(int16x8_t __p0, int8x16_t __p1, int8x16_t __p2) { int16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __noswap_vmlsl_s8(__rev0, __noswap_vget_high_s8(__rev1), __noswap_vget_high_s8(__rev2)); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vmlsl_high_s32(int64x2_t __p0, int32x4_t __p1, int32x4_t __p2) { int64x2_t __ret; __ret = vmlsl_s32(__p0, vget_high_s32(__p1), vget_high_s32(__p2)); return __ret; } #else __ai int64x2_t vmlsl_high_s32(int64x2_t __p0, int32x4_t __p1, int32x4_t __p2) { int64x2_t __ret; int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); int32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = __noswap_vmlsl_s32(__rev0, __noswap_vget_high_s32(__rev1), __noswap_vget_high_s32(__rev2)); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vmlsl_high_s16(int32x4_t __p0, int16x8_t __p1, int16x8_t __p2) { int32x4_t __ret; __ret = vmlsl_s16(__p0, vget_high_s16(__p1), vget_high_s16(__p2)); return __ret; } #else __ai int32x4_t vmlsl_high_s16(int32x4_t __p0, int16x8_t __p1, int16x8_t __p2) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __noswap_vmlsl_s16(__rev0, __noswap_vget_high_s16(__rev1), __noswap_vget_high_s16(__rev2)); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vmlsl_high_n_u32(uint64x2_t __p0, uint32x4_t __p1, uint32_t __p2) { uint64x2_t __ret; __ret = vmlsl_n_u32(__p0, vget_high_u32(__p1), __p2); return __ret; } #else __ai uint64x2_t vmlsl_high_n_u32(uint64x2_t __p0, uint32x4_t __p1, uint32_t __p2) { uint64x2_t __ret; uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __noswap_vmlsl_n_u32(__rev0, __noswap_vget_high_u32(__rev1), __p2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vmlsl_high_n_u16(uint32x4_t __p0, uint16x8_t __p1, uint16_t __p2) { uint32x4_t __ret; __ret = vmlsl_n_u16(__p0, vget_high_u16(__p1), __p2); return __ret; } #else __ai uint32x4_t vmlsl_high_n_u16(uint32x4_t __p0, uint16x8_t __p1, uint16_t __p2) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __noswap_vmlsl_n_u16(__rev0, __noswap_vget_high_u16(__rev1), __p2); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vmlsl_high_n_s32(int64x2_t __p0, int32x4_t __p1, int32_t __p2) { int64x2_t __ret; __ret = vmlsl_n_s32(__p0, vget_high_s32(__p1), __p2); return __ret; } #else __ai int64x2_t vmlsl_high_n_s32(int64x2_t __p0, int32x4_t __p1, int32_t __p2) { int64x2_t __ret; int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); __ret = __noswap_vmlsl_n_s32(__rev0, __noswap_vget_high_s32(__rev1), __p2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vmlsl_high_n_s16(int32x4_t __p0, int16x8_t __p1, int16_t __p2) { int32x4_t __ret; __ret = vmlsl_n_s16(__p0, vget_high_s16(__p1), __p2); return __ret; } #else __ai int32x4_t vmlsl_high_n_s16(int32x4_t __p0, int16x8_t __p1, int16_t __p2) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __noswap_vmlsl_n_s16(__rev0, __noswap_vget_high_s16(__rev1), __p2); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #define vmulx_lane_f64(__p0_899, __p1_899, __p2_899) __extension__ ({ \ float64x1_t __ret_899; \ float64x1_t __s0_899 = __p0_899; \ float64x1_t __s1_899 = __p1_899; \ float64_t __x_899 = vget_lane_f64(__s0_899, 0); \ float64_t __y_899 = vget_lane_f64(__s1_899, __p2_899); \ float64_t __z_899 = vmulxd_f64(__x_899, __y_899); \ __ret_899 = vset_lane_f64(__z_899, __s0_899, __p2_899); \ __ret_899; \ }) #ifdef __LITTLE_ENDIAN__ #define vmulx_laneq_f64(__p0_900, __p1_900, __p2_900) __extension__ ({ \ float64x1_t __ret_900; \ float64x1_t __s0_900 = __p0_900; \ float64x2_t __s1_900 = __p1_900; \ float64_t __x_900 = vget_lane_f64(__s0_900, 0); \ float64_t __y_900 = vgetq_lane_f64(__s1_900, __p2_900); \ float64_t __z_900 = vmulxd_f64(__x_900, __y_900); \ __ret_900 = vset_lane_f64(__z_900, __s0_900, 0); \ __ret_900; \ }) #else #define vmulx_laneq_f64(__p0_901, __p1_901, __p2_901) __extension__ ({ \ float64x1_t __ret_901; \ float64x1_t __s0_901 = __p0_901; \ float64x2_t __s1_901 = __p1_901; \ float64x2_t __rev1_901; __rev1_901 = __builtin_shufflevector(__s1_901, __s1_901, 1, 0); \ float64_t __x_901 = vget_lane_f64(__s0_901, 0); \ float64_t __y_901 = __noswap_vgetq_lane_f64(__rev1_901, __p2_901); \ float64_t __z_901 = vmulxd_f64(__x_901, __y_901); \ __ret_901 = vset_lane_f64(__z_901, __s0_901, 0); \ __ret_901; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vfmlalq_lane_high_f16(__p0_902, __p1_902, __p2_902, __p3_902) __extension__ ({ \ float32x4_t __ret_902; \ float32x4_t __s0_902 = __p0_902; \ float16x8_t __s1_902 = __p1_902; \ float16x4_t __s2_902 = __p2_902; \ __ret_902 = vfmlalq_high_f16(__s0_902, __s1_902, (float16x8_t) {vget_lane_f16(__s2_902, __p3_902), vget_lane_f16(__s2_902, __p3_902), vget_lane_f16(__s2_902, __p3_902), vget_lane_f16(__s2_902, __p3_902), vget_lane_f16(__s2_902, __p3_902), vget_lane_f16(__s2_902, __p3_902), vget_lane_f16(__s2_902, __p3_902), vget_lane_f16(__s2_902, __p3_902)}); \ __ret_902; \ }) #else #define vfmlalq_lane_high_f16(__p0_903, __p1_903, __p2_903, __p3_903) __extension__ ({ \ float32x4_t __ret_903; \ float32x4_t __s0_903 = __p0_903; \ float16x8_t __s1_903 = __p1_903; \ float16x4_t __s2_903 = __p2_903; \ float32x4_t __rev0_903; __rev0_903 = __builtin_shufflevector(__s0_903, __s0_903, 3, 2, 1, 0); \ float16x8_t __rev1_903; __rev1_903 = __builtin_shufflevector(__s1_903, __s1_903, 7, 6, 5, 4, 3, 2, 1, 0); \ float16x4_t __rev2_903; __rev2_903 = __builtin_shufflevector(__s2_903, __s2_903, 3, 2, 1, 0); \ __ret_903 = __noswap_vfmlalq_high_f16(__rev0_903, __rev1_903, (float16x8_t) {__noswap_vget_lane_f16(__rev2_903, __p3_903), __noswap_vget_lane_f16(__rev2_903, __p3_903), __noswap_vget_lane_f16(__rev2_903, __p3_903), __noswap_vget_lane_f16(__rev2_903, __p3_903), __noswap_vget_lane_f16(__rev2_903, __p3_903), __noswap_vget_lane_f16(__rev2_903, __p3_903), __noswap_vget_lane_f16(__rev2_903, __p3_903), __noswap_vget_lane_f16(__rev2_903, __p3_903)}); \ __ret_903 = __builtin_shufflevector(__ret_903, __ret_903, 3, 2, 1, 0); \ __ret_903; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vfmlal_lane_high_f16(__p0_904, __p1_904, __p2_904, __p3_904) __extension__ ({ \ float32x2_t __ret_904; \ float32x2_t __s0_904 = __p0_904; \ float16x4_t __s1_904 = __p1_904; \ float16x4_t __s2_904 = __p2_904; \ __ret_904 = vfmlal_high_f16(__s0_904, __s1_904, (float16x4_t) {vget_lane_f16(__s2_904, __p3_904), vget_lane_f16(__s2_904, __p3_904), vget_lane_f16(__s2_904, __p3_904), vget_lane_f16(__s2_904, __p3_904)}); \ __ret_904; \ }) #else #define vfmlal_lane_high_f16(__p0_905, __p1_905, __p2_905, __p3_905) __extension__ ({ \ float32x2_t __ret_905; \ float32x2_t __s0_905 = __p0_905; \ float16x4_t __s1_905 = __p1_905; \ float16x4_t __s2_905 = __p2_905; \ float32x2_t __rev0_905; __rev0_905 = __builtin_shufflevector(__s0_905, __s0_905, 1, 0); \ float16x4_t __rev1_905; __rev1_905 = __builtin_shufflevector(__s1_905, __s1_905, 3, 2, 1, 0); \ float16x4_t __rev2_905; __rev2_905 = __builtin_shufflevector(__s2_905, __s2_905, 3, 2, 1, 0); \ __ret_905 = __noswap_vfmlal_high_f16(__rev0_905, __rev1_905, (float16x4_t) {__noswap_vget_lane_f16(__rev2_905, __p3_905), __noswap_vget_lane_f16(__rev2_905, __p3_905), __noswap_vget_lane_f16(__rev2_905, __p3_905), __noswap_vget_lane_f16(__rev2_905, __p3_905)}); \ __ret_905 = __builtin_shufflevector(__ret_905, __ret_905, 1, 0); \ __ret_905; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vfmlalq_lane_low_f16(__p0_906, __p1_906, __p2_906, __p3_906) __extension__ ({ \ float32x4_t __ret_906; \ float32x4_t __s0_906 = __p0_906; \ float16x8_t __s1_906 = __p1_906; \ float16x4_t __s2_906 = __p2_906; \ __ret_906 = vfmlalq_low_f16(__s0_906, __s1_906, (float16x8_t) {vget_lane_f16(__s2_906, __p3_906), vget_lane_f16(__s2_906, __p3_906), vget_lane_f16(__s2_906, __p3_906), vget_lane_f16(__s2_906, __p3_906), vget_lane_f16(__s2_906, __p3_906), vget_lane_f16(__s2_906, __p3_906), vget_lane_f16(__s2_906, __p3_906), vget_lane_f16(__s2_906, __p3_906)}); \ __ret_906; \ }) #else #define vfmlalq_lane_low_f16(__p0_907, __p1_907, __p2_907, __p3_907) __extension__ ({ \ float32x4_t __ret_907; \ float32x4_t __s0_907 = __p0_907; \ float16x8_t __s1_907 = __p1_907; \ float16x4_t __s2_907 = __p2_907; \ float32x4_t __rev0_907; __rev0_907 = __builtin_shufflevector(__s0_907, __s0_907, 3, 2, 1, 0); \ float16x8_t __rev1_907; __rev1_907 = __builtin_shufflevector(__s1_907, __s1_907, 7, 6, 5, 4, 3, 2, 1, 0); \ float16x4_t __rev2_907; __rev2_907 = __builtin_shufflevector(__s2_907, __s2_907, 3, 2, 1, 0); \ __ret_907 = __noswap_vfmlalq_low_f16(__rev0_907, __rev1_907, (float16x8_t) {__noswap_vget_lane_f16(__rev2_907, __p3_907), __noswap_vget_lane_f16(__rev2_907, __p3_907), __noswap_vget_lane_f16(__rev2_907, __p3_907), __noswap_vget_lane_f16(__rev2_907, __p3_907), __noswap_vget_lane_f16(__rev2_907, __p3_907), __noswap_vget_lane_f16(__rev2_907, __p3_907), __noswap_vget_lane_f16(__rev2_907, __p3_907), __noswap_vget_lane_f16(__rev2_907, __p3_907)}); \ __ret_907 = __builtin_shufflevector(__ret_907, __ret_907, 3, 2, 1, 0); \ __ret_907; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vfmlal_lane_low_f16(__p0_908, __p1_908, __p2_908, __p3_908) __extension__ ({ \ float32x2_t __ret_908; \ float32x2_t __s0_908 = __p0_908; \ float16x4_t __s1_908 = __p1_908; \ float16x4_t __s2_908 = __p2_908; \ __ret_908 = vfmlal_low_f16(__s0_908, __s1_908, (float16x4_t) {vget_lane_f16(__s2_908, __p3_908), vget_lane_f16(__s2_908, __p3_908), vget_lane_f16(__s2_908, __p3_908), vget_lane_f16(__s2_908, __p3_908)}); \ __ret_908; \ }) #else #define vfmlal_lane_low_f16(__p0_909, __p1_909, __p2_909, __p3_909) __extension__ ({ \ float32x2_t __ret_909; \ float32x2_t __s0_909 = __p0_909; \ float16x4_t __s1_909 = __p1_909; \ float16x4_t __s2_909 = __p2_909; \ float32x2_t __rev0_909; __rev0_909 = __builtin_shufflevector(__s0_909, __s0_909, 1, 0); \ float16x4_t __rev1_909; __rev1_909 = __builtin_shufflevector(__s1_909, __s1_909, 3, 2, 1, 0); \ float16x4_t __rev2_909; __rev2_909 = __builtin_shufflevector(__s2_909, __s2_909, 3, 2, 1, 0); \ __ret_909 = __noswap_vfmlal_low_f16(__rev0_909, __rev1_909, (float16x4_t) {__noswap_vget_lane_f16(__rev2_909, __p3_909), __noswap_vget_lane_f16(__rev2_909, __p3_909), __noswap_vget_lane_f16(__rev2_909, __p3_909), __noswap_vget_lane_f16(__rev2_909, __p3_909)}); \ __ret_909 = __builtin_shufflevector(__ret_909, __ret_909, 1, 0); \ __ret_909; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vfmlalq_laneq_high_f16(__p0_910, __p1_910, __p2_910, __p3_910) __extension__ ({ \ float32x4_t __ret_910; \ float32x4_t __s0_910 = __p0_910; \ float16x8_t __s1_910 = __p1_910; \ float16x8_t __s2_910 = __p2_910; \ __ret_910 = vfmlalq_high_f16(__s0_910, __s1_910, (float16x8_t) {vgetq_lane_f16(__s2_910, __p3_910), vgetq_lane_f16(__s2_910, __p3_910), vgetq_lane_f16(__s2_910, __p3_910), vgetq_lane_f16(__s2_910, __p3_910), vgetq_lane_f16(__s2_910, __p3_910), vgetq_lane_f16(__s2_910, __p3_910), vgetq_lane_f16(__s2_910, __p3_910), vgetq_lane_f16(__s2_910, __p3_910)}); \ __ret_910; \ }) #else #define vfmlalq_laneq_high_f16(__p0_911, __p1_911, __p2_911, __p3_911) __extension__ ({ \ float32x4_t __ret_911; \ float32x4_t __s0_911 = __p0_911; \ float16x8_t __s1_911 = __p1_911; \ float16x8_t __s2_911 = __p2_911; \ float32x4_t __rev0_911; __rev0_911 = __builtin_shufflevector(__s0_911, __s0_911, 3, 2, 1, 0); \ float16x8_t __rev1_911; __rev1_911 = __builtin_shufflevector(__s1_911, __s1_911, 7, 6, 5, 4, 3, 2, 1, 0); \ float16x8_t __rev2_911; __rev2_911 = __builtin_shufflevector(__s2_911, __s2_911, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_911 = __noswap_vfmlalq_high_f16(__rev0_911, __rev1_911, (float16x8_t) {__noswap_vgetq_lane_f16(__rev2_911, __p3_911), __noswap_vgetq_lane_f16(__rev2_911, __p3_911), __noswap_vgetq_lane_f16(__rev2_911, __p3_911), __noswap_vgetq_lane_f16(__rev2_911, __p3_911), __noswap_vgetq_lane_f16(__rev2_911, __p3_911), __noswap_vgetq_lane_f16(__rev2_911, __p3_911), __noswap_vgetq_lane_f16(__rev2_911, __p3_911), __noswap_vgetq_lane_f16(__rev2_911, __p3_911)}); \ __ret_911 = __builtin_shufflevector(__ret_911, __ret_911, 3, 2, 1, 0); \ __ret_911; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vfmlal_laneq_high_f16(__p0_912, __p1_912, __p2_912, __p3_912) __extension__ ({ \ float32x2_t __ret_912; \ float32x2_t __s0_912 = __p0_912; \ float16x4_t __s1_912 = __p1_912; \ float16x8_t __s2_912 = __p2_912; \ __ret_912 = vfmlal_high_f16(__s0_912, __s1_912, (float16x4_t) {vgetq_lane_f16(__s2_912, __p3_912), vgetq_lane_f16(__s2_912, __p3_912), vgetq_lane_f16(__s2_912, __p3_912), vgetq_lane_f16(__s2_912, __p3_912)}); \ __ret_912; \ }) #else #define vfmlal_laneq_high_f16(__p0_913, __p1_913, __p2_913, __p3_913) __extension__ ({ \ float32x2_t __ret_913; \ float32x2_t __s0_913 = __p0_913; \ float16x4_t __s1_913 = __p1_913; \ float16x8_t __s2_913 = __p2_913; \ float32x2_t __rev0_913; __rev0_913 = __builtin_shufflevector(__s0_913, __s0_913, 1, 0); \ float16x4_t __rev1_913; __rev1_913 = __builtin_shufflevector(__s1_913, __s1_913, 3, 2, 1, 0); \ float16x8_t __rev2_913; __rev2_913 = __builtin_shufflevector(__s2_913, __s2_913, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_913 = __noswap_vfmlal_high_f16(__rev0_913, __rev1_913, (float16x4_t) {__noswap_vgetq_lane_f16(__rev2_913, __p3_913), __noswap_vgetq_lane_f16(__rev2_913, __p3_913), __noswap_vgetq_lane_f16(__rev2_913, __p3_913), __noswap_vgetq_lane_f16(__rev2_913, __p3_913)}); \ __ret_913 = __builtin_shufflevector(__ret_913, __ret_913, 1, 0); \ __ret_913; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vfmlalq_laneq_low_f16(__p0_914, __p1_914, __p2_914, __p3_914) __extension__ ({ \ float32x4_t __ret_914; \ float32x4_t __s0_914 = __p0_914; \ float16x8_t __s1_914 = __p1_914; \ float16x8_t __s2_914 = __p2_914; \ __ret_914 = vfmlalq_low_f16(__s0_914, __s1_914, (float16x8_t) {vgetq_lane_f16(__s2_914, __p3_914), vgetq_lane_f16(__s2_914, __p3_914), vgetq_lane_f16(__s2_914, __p3_914), vgetq_lane_f16(__s2_914, __p3_914), vgetq_lane_f16(__s2_914, __p3_914), vgetq_lane_f16(__s2_914, __p3_914), vgetq_lane_f16(__s2_914, __p3_914), vgetq_lane_f16(__s2_914, __p3_914)}); \ __ret_914; \ }) #else #define vfmlalq_laneq_low_f16(__p0_915, __p1_915, __p2_915, __p3_915) __extension__ ({ \ float32x4_t __ret_915; \ float32x4_t __s0_915 = __p0_915; \ float16x8_t __s1_915 = __p1_915; \ float16x8_t __s2_915 = __p2_915; \ float32x4_t __rev0_915; __rev0_915 = __builtin_shufflevector(__s0_915, __s0_915, 3, 2, 1, 0); \ float16x8_t __rev1_915; __rev1_915 = __builtin_shufflevector(__s1_915, __s1_915, 7, 6, 5, 4, 3, 2, 1, 0); \ float16x8_t __rev2_915; __rev2_915 = __builtin_shufflevector(__s2_915, __s2_915, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_915 = __noswap_vfmlalq_low_f16(__rev0_915, __rev1_915, (float16x8_t) {__noswap_vgetq_lane_f16(__rev2_915, __p3_915), __noswap_vgetq_lane_f16(__rev2_915, __p3_915), __noswap_vgetq_lane_f16(__rev2_915, __p3_915), __noswap_vgetq_lane_f16(__rev2_915, __p3_915), __noswap_vgetq_lane_f16(__rev2_915, __p3_915), __noswap_vgetq_lane_f16(__rev2_915, __p3_915), __noswap_vgetq_lane_f16(__rev2_915, __p3_915), __noswap_vgetq_lane_f16(__rev2_915, __p3_915)}); \ __ret_915 = __builtin_shufflevector(__ret_915, __ret_915, 3, 2, 1, 0); \ __ret_915; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vfmlal_laneq_low_f16(__p0_916, __p1_916, __p2_916, __p3_916) __extension__ ({ \ float32x2_t __ret_916; \ float32x2_t __s0_916 = __p0_916; \ float16x4_t __s1_916 = __p1_916; \ float16x8_t __s2_916 = __p2_916; \ __ret_916 = vfmlal_low_f16(__s0_916, __s1_916, (float16x4_t) {vgetq_lane_f16(__s2_916, __p3_916), vgetq_lane_f16(__s2_916, __p3_916), vgetq_lane_f16(__s2_916, __p3_916), vgetq_lane_f16(__s2_916, __p3_916)}); \ __ret_916; \ }) #else #define vfmlal_laneq_low_f16(__p0_917, __p1_917, __p2_917, __p3_917) __extension__ ({ \ float32x2_t __ret_917; \ float32x2_t __s0_917 = __p0_917; \ float16x4_t __s1_917 = __p1_917; \ float16x8_t __s2_917 = __p2_917; \ float32x2_t __rev0_917; __rev0_917 = __builtin_shufflevector(__s0_917, __s0_917, 1, 0); \ float16x4_t __rev1_917; __rev1_917 = __builtin_shufflevector(__s1_917, __s1_917, 3, 2, 1, 0); \ float16x8_t __rev2_917; __rev2_917 = __builtin_shufflevector(__s2_917, __s2_917, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_917 = __noswap_vfmlal_low_f16(__rev0_917, __rev1_917, (float16x4_t) {__noswap_vgetq_lane_f16(__rev2_917, __p3_917), __noswap_vgetq_lane_f16(__rev2_917, __p3_917), __noswap_vgetq_lane_f16(__rev2_917, __p3_917), __noswap_vgetq_lane_f16(__rev2_917, __p3_917)}); \ __ret_917 = __builtin_shufflevector(__ret_917, __ret_917, 1, 0); \ __ret_917; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vfmlslq_lane_high_f16(__p0_918, __p1_918, __p2_918, __p3_918) __extension__ ({ \ float32x4_t __ret_918; \ float32x4_t __s0_918 = __p0_918; \ float16x8_t __s1_918 = __p1_918; \ float16x4_t __s2_918 = __p2_918; \ __ret_918 = vfmlslq_high_f16(__s0_918, __s1_918, (float16x8_t) {vget_lane_f16(__s2_918, __p3_918), vget_lane_f16(__s2_918, __p3_918), vget_lane_f16(__s2_918, __p3_918), vget_lane_f16(__s2_918, __p3_918), vget_lane_f16(__s2_918, __p3_918), vget_lane_f16(__s2_918, __p3_918), vget_lane_f16(__s2_918, __p3_918), vget_lane_f16(__s2_918, __p3_918)}); \ __ret_918; \ }) #else #define vfmlslq_lane_high_f16(__p0_919, __p1_919, __p2_919, __p3_919) __extension__ ({ \ float32x4_t __ret_919; \ float32x4_t __s0_919 = __p0_919; \ float16x8_t __s1_919 = __p1_919; \ float16x4_t __s2_919 = __p2_919; \ float32x4_t __rev0_919; __rev0_919 = __builtin_shufflevector(__s0_919, __s0_919, 3, 2, 1, 0); \ float16x8_t __rev1_919; __rev1_919 = __builtin_shufflevector(__s1_919, __s1_919, 7, 6, 5, 4, 3, 2, 1, 0); \ float16x4_t __rev2_919; __rev2_919 = __builtin_shufflevector(__s2_919, __s2_919, 3, 2, 1, 0); \ __ret_919 = __noswap_vfmlslq_high_f16(__rev0_919, __rev1_919, (float16x8_t) {__noswap_vget_lane_f16(__rev2_919, __p3_919), __noswap_vget_lane_f16(__rev2_919, __p3_919), __noswap_vget_lane_f16(__rev2_919, __p3_919), __noswap_vget_lane_f16(__rev2_919, __p3_919), __noswap_vget_lane_f16(__rev2_919, __p3_919), __noswap_vget_lane_f16(__rev2_919, __p3_919), __noswap_vget_lane_f16(__rev2_919, __p3_919), __noswap_vget_lane_f16(__rev2_919, __p3_919)}); \ __ret_919 = __builtin_shufflevector(__ret_919, __ret_919, 3, 2, 1, 0); \ __ret_919; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vfmlsl_lane_high_f16(__p0_920, __p1_920, __p2_920, __p3_920) __extension__ ({ \ float32x2_t __ret_920; \ float32x2_t __s0_920 = __p0_920; \ float16x4_t __s1_920 = __p1_920; \ float16x4_t __s2_920 = __p2_920; \ __ret_920 = vfmlsl_high_f16(__s0_920, __s1_920, (float16x4_t) {vget_lane_f16(__s2_920, __p3_920), vget_lane_f16(__s2_920, __p3_920), vget_lane_f16(__s2_920, __p3_920), vget_lane_f16(__s2_920, __p3_920)}); \ __ret_920; \ }) #else #define vfmlsl_lane_high_f16(__p0_921, __p1_921, __p2_921, __p3_921) __extension__ ({ \ float32x2_t __ret_921; \ float32x2_t __s0_921 = __p0_921; \ float16x4_t __s1_921 = __p1_921; \ float16x4_t __s2_921 = __p2_921; \ float32x2_t __rev0_921; __rev0_921 = __builtin_shufflevector(__s0_921, __s0_921, 1, 0); \ float16x4_t __rev1_921; __rev1_921 = __builtin_shufflevector(__s1_921, __s1_921, 3, 2, 1, 0); \ float16x4_t __rev2_921; __rev2_921 = __builtin_shufflevector(__s2_921, __s2_921, 3, 2, 1, 0); \ __ret_921 = __noswap_vfmlsl_high_f16(__rev0_921, __rev1_921, (float16x4_t) {__noswap_vget_lane_f16(__rev2_921, __p3_921), __noswap_vget_lane_f16(__rev2_921, __p3_921), __noswap_vget_lane_f16(__rev2_921, __p3_921), __noswap_vget_lane_f16(__rev2_921, __p3_921)}); \ __ret_921 = __builtin_shufflevector(__ret_921, __ret_921, 1, 0); \ __ret_921; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vfmlslq_lane_low_f16(__p0_922, __p1_922, __p2_922, __p3_922) __extension__ ({ \ float32x4_t __ret_922; \ float32x4_t __s0_922 = __p0_922; \ float16x8_t __s1_922 = __p1_922; \ float16x4_t __s2_922 = __p2_922; \ __ret_922 = vfmlslq_low_f16(__s0_922, __s1_922, (float16x8_t) {vget_lane_f16(__s2_922, __p3_922), vget_lane_f16(__s2_922, __p3_922), vget_lane_f16(__s2_922, __p3_922), vget_lane_f16(__s2_922, __p3_922), vget_lane_f16(__s2_922, __p3_922), vget_lane_f16(__s2_922, __p3_922), vget_lane_f16(__s2_922, __p3_922), vget_lane_f16(__s2_922, __p3_922)}); \ __ret_922; \ }) #else #define vfmlslq_lane_low_f16(__p0_923, __p1_923, __p2_923, __p3_923) __extension__ ({ \ float32x4_t __ret_923; \ float32x4_t __s0_923 = __p0_923; \ float16x8_t __s1_923 = __p1_923; \ float16x4_t __s2_923 = __p2_923; \ float32x4_t __rev0_923; __rev0_923 = __builtin_shufflevector(__s0_923, __s0_923, 3, 2, 1, 0); \ float16x8_t __rev1_923; __rev1_923 = __builtin_shufflevector(__s1_923, __s1_923, 7, 6, 5, 4, 3, 2, 1, 0); \ float16x4_t __rev2_923; __rev2_923 = __builtin_shufflevector(__s2_923, __s2_923, 3, 2, 1, 0); \ __ret_923 = __noswap_vfmlslq_low_f16(__rev0_923, __rev1_923, (float16x8_t) {__noswap_vget_lane_f16(__rev2_923, __p3_923), __noswap_vget_lane_f16(__rev2_923, __p3_923), __noswap_vget_lane_f16(__rev2_923, __p3_923), __noswap_vget_lane_f16(__rev2_923, __p3_923), __noswap_vget_lane_f16(__rev2_923, __p3_923), __noswap_vget_lane_f16(__rev2_923, __p3_923), __noswap_vget_lane_f16(__rev2_923, __p3_923), __noswap_vget_lane_f16(__rev2_923, __p3_923)}); \ __ret_923 = __builtin_shufflevector(__ret_923, __ret_923, 3, 2, 1, 0); \ __ret_923; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vfmlsl_lane_low_f16(__p0_924, __p1_924, __p2_924, __p3_924) __extension__ ({ \ float32x2_t __ret_924; \ float32x2_t __s0_924 = __p0_924; \ float16x4_t __s1_924 = __p1_924; \ float16x4_t __s2_924 = __p2_924; \ __ret_924 = vfmlsl_low_f16(__s0_924, __s1_924, (float16x4_t) {vget_lane_f16(__s2_924, __p3_924), vget_lane_f16(__s2_924, __p3_924), vget_lane_f16(__s2_924, __p3_924), vget_lane_f16(__s2_924, __p3_924)}); \ __ret_924; \ }) #else #define vfmlsl_lane_low_f16(__p0_925, __p1_925, __p2_925, __p3_925) __extension__ ({ \ float32x2_t __ret_925; \ float32x2_t __s0_925 = __p0_925; \ float16x4_t __s1_925 = __p1_925; \ float16x4_t __s2_925 = __p2_925; \ float32x2_t __rev0_925; __rev0_925 = __builtin_shufflevector(__s0_925, __s0_925, 1, 0); \ float16x4_t __rev1_925; __rev1_925 = __builtin_shufflevector(__s1_925, __s1_925, 3, 2, 1, 0); \ float16x4_t __rev2_925; __rev2_925 = __builtin_shufflevector(__s2_925, __s2_925, 3, 2, 1, 0); \ __ret_925 = __noswap_vfmlsl_low_f16(__rev0_925, __rev1_925, (float16x4_t) {__noswap_vget_lane_f16(__rev2_925, __p3_925), __noswap_vget_lane_f16(__rev2_925, __p3_925), __noswap_vget_lane_f16(__rev2_925, __p3_925), __noswap_vget_lane_f16(__rev2_925, __p3_925)}); \ __ret_925 = __builtin_shufflevector(__ret_925, __ret_925, 1, 0); \ __ret_925; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vfmlslq_laneq_high_f16(__p0_926, __p1_926, __p2_926, __p3_926) __extension__ ({ \ float32x4_t __ret_926; \ float32x4_t __s0_926 = __p0_926; \ float16x8_t __s1_926 = __p1_926; \ float16x8_t __s2_926 = __p2_926; \ __ret_926 = vfmlslq_high_f16(__s0_926, __s1_926, (float16x8_t) {vgetq_lane_f16(__s2_926, __p3_926), vgetq_lane_f16(__s2_926, __p3_926), vgetq_lane_f16(__s2_926, __p3_926), vgetq_lane_f16(__s2_926, __p3_926), vgetq_lane_f16(__s2_926, __p3_926), vgetq_lane_f16(__s2_926, __p3_926), vgetq_lane_f16(__s2_926, __p3_926), vgetq_lane_f16(__s2_926, __p3_926)}); \ __ret_926; \ }) #else #define vfmlslq_laneq_high_f16(__p0_927, __p1_927, __p2_927, __p3_927) __extension__ ({ \ float32x4_t __ret_927; \ float32x4_t __s0_927 = __p0_927; \ float16x8_t __s1_927 = __p1_927; \ float16x8_t __s2_927 = __p2_927; \ float32x4_t __rev0_927; __rev0_927 = __builtin_shufflevector(__s0_927, __s0_927, 3, 2, 1, 0); \ float16x8_t __rev1_927; __rev1_927 = __builtin_shufflevector(__s1_927, __s1_927, 7, 6, 5, 4, 3, 2, 1, 0); \ float16x8_t __rev2_927; __rev2_927 = __builtin_shufflevector(__s2_927, __s2_927, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_927 = __noswap_vfmlslq_high_f16(__rev0_927, __rev1_927, (float16x8_t) {__noswap_vgetq_lane_f16(__rev2_927, __p3_927), __noswap_vgetq_lane_f16(__rev2_927, __p3_927), __noswap_vgetq_lane_f16(__rev2_927, __p3_927), __noswap_vgetq_lane_f16(__rev2_927, __p3_927), __noswap_vgetq_lane_f16(__rev2_927, __p3_927), __noswap_vgetq_lane_f16(__rev2_927, __p3_927), __noswap_vgetq_lane_f16(__rev2_927, __p3_927), __noswap_vgetq_lane_f16(__rev2_927, __p3_927)}); \ __ret_927 = __builtin_shufflevector(__ret_927, __ret_927, 3, 2, 1, 0); \ __ret_927; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vfmlsl_laneq_high_f16(__p0_928, __p1_928, __p2_928, __p3_928) __extension__ ({ \ float32x2_t __ret_928; \ float32x2_t __s0_928 = __p0_928; \ float16x4_t __s1_928 = __p1_928; \ float16x8_t __s2_928 = __p2_928; \ __ret_928 = vfmlsl_high_f16(__s0_928, __s1_928, (float16x4_t) {vgetq_lane_f16(__s2_928, __p3_928), vgetq_lane_f16(__s2_928, __p3_928), vgetq_lane_f16(__s2_928, __p3_928), vgetq_lane_f16(__s2_928, __p3_928)}); \ __ret_928; \ }) #else #define vfmlsl_laneq_high_f16(__p0_929, __p1_929, __p2_929, __p3_929) __extension__ ({ \ float32x2_t __ret_929; \ float32x2_t __s0_929 = __p0_929; \ float16x4_t __s1_929 = __p1_929; \ float16x8_t __s2_929 = __p2_929; \ float32x2_t __rev0_929; __rev0_929 = __builtin_shufflevector(__s0_929, __s0_929, 1, 0); \ float16x4_t __rev1_929; __rev1_929 = __builtin_shufflevector(__s1_929, __s1_929, 3, 2, 1, 0); \ float16x8_t __rev2_929; __rev2_929 = __builtin_shufflevector(__s2_929, __s2_929, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_929 = __noswap_vfmlsl_high_f16(__rev0_929, __rev1_929, (float16x4_t) {__noswap_vgetq_lane_f16(__rev2_929, __p3_929), __noswap_vgetq_lane_f16(__rev2_929, __p3_929), __noswap_vgetq_lane_f16(__rev2_929, __p3_929), __noswap_vgetq_lane_f16(__rev2_929, __p3_929)}); \ __ret_929 = __builtin_shufflevector(__ret_929, __ret_929, 1, 0); \ __ret_929; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vfmlslq_laneq_low_f16(__p0_930, __p1_930, __p2_930, __p3_930) __extension__ ({ \ float32x4_t __ret_930; \ float32x4_t __s0_930 = __p0_930; \ float16x8_t __s1_930 = __p1_930; \ float16x8_t __s2_930 = __p2_930; \ __ret_930 = vfmlslq_low_f16(__s0_930, __s1_930, (float16x8_t) {vgetq_lane_f16(__s2_930, __p3_930), vgetq_lane_f16(__s2_930, __p3_930), vgetq_lane_f16(__s2_930, __p3_930), vgetq_lane_f16(__s2_930, __p3_930), vgetq_lane_f16(__s2_930, __p3_930), vgetq_lane_f16(__s2_930, __p3_930), vgetq_lane_f16(__s2_930, __p3_930), vgetq_lane_f16(__s2_930, __p3_930)}); \ __ret_930; \ }) #else #define vfmlslq_laneq_low_f16(__p0_931, __p1_931, __p2_931, __p3_931) __extension__ ({ \ float32x4_t __ret_931; \ float32x4_t __s0_931 = __p0_931; \ float16x8_t __s1_931 = __p1_931; \ float16x8_t __s2_931 = __p2_931; \ float32x4_t __rev0_931; __rev0_931 = __builtin_shufflevector(__s0_931, __s0_931, 3, 2, 1, 0); \ float16x8_t __rev1_931; __rev1_931 = __builtin_shufflevector(__s1_931, __s1_931, 7, 6, 5, 4, 3, 2, 1, 0); \ float16x8_t __rev2_931; __rev2_931 = __builtin_shufflevector(__s2_931, __s2_931, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_931 = __noswap_vfmlslq_low_f16(__rev0_931, __rev1_931, (float16x8_t) {__noswap_vgetq_lane_f16(__rev2_931, __p3_931), __noswap_vgetq_lane_f16(__rev2_931, __p3_931), __noswap_vgetq_lane_f16(__rev2_931, __p3_931), __noswap_vgetq_lane_f16(__rev2_931, __p3_931), __noswap_vgetq_lane_f16(__rev2_931, __p3_931), __noswap_vgetq_lane_f16(__rev2_931, __p3_931), __noswap_vgetq_lane_f16(__rev2_931, __p3_931), __noswap_vgetq_lane_f16(__rev2_931, __p3_931)}); \ __ret_931 = __builtin_shufflevector(__ret_931, __ret_931, 3, 2, 1, 0); \ __ret_931; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vfmlsl_laneq_low_f16(__p0_932, __p1_932, __p2_932, __p3_932) __extension__ ({ \ float32x2_t __ret_932; \ float32x2_t __s0_932 = __p0_932; \ float16x4_t __s1_932 = __p1_932; \ float16x8_t __s2_932 = __p2_932; \ __ret_932 = vfmlsl_low_f16(__s0_932, __s1_932, (float16x4_t) {vgetq_lane_f16(__s2_932, __p3_932), vgetq_lane_f16(__s2_932, __p3_932), vgetq_lane_f16(__s2_932, __p3_932), vgetq_lane_f16(__s2_932, __p3_932)}); \ __ret_932; \ }) #else #define vfmlsl_laneq_low_f16(__p0_933, __p1_933, __p2_933, __p3_933) __extension__ ({ \ float32x2_t __ret_933; \ float32x2_t __s0_933 = __p0_933; \ float16x4_t __s1_933 = __p1_933; \ float16x8_t __s2_933 = __p2_933; \ float32x2_t __rev0_933; __rev0_933 = __builtin_shufflevector(__s0_933, __s0_933, 1, 0); \ float16x4_t __rev1_933; __rev1_933 = __builtin_shufflevector(__s1_933, __s1_933, 3, 2, 1, 0); \ float16x8_t __rev2_933; __rev2_933 = __builtin_shufflevector(__s2_933, __s2_933, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_933 = __noswap_vfmlsl_low_f16(__rev0_933, __rev1_933, (float16x4_t) {__noswap_vgetq_lane_f16(__rev2_933, __p3_933), __noswap_vgetq_lane_f16(__rev2_933, __p3_933), __noswap_vgetq_lane_f16(__rev2_933, __p3_933), __noswap_vgetq_lane_f16(__rev2_933, __p3_933)}); \ __ret_933 = __builtin_shufflevector(__ret_933, __ret_933, 1, 0); \ __ret_933; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmulh_lane_f16(__p0_934, __p1_934, __p2_934) __extension__ ({ \ float16_t __ret_934; \ float16_t __s0_934 = __p0_934; \ float16x4_t __s1_934 = __p1_934; \ __ret_934 = __s0_934 * vget_lane_f16(__s1_934, __p2_934); \ __ret_934; \ }) #else #define vmulh_lane_f16(__p0_935, __p1_935, __p2_935) __extension__ ({ \ float16_t __ret_935; \ float16_t __s0_935 = __p0_935; \ float16x4_t __s1_935 = __p1_935; \ float16x4_t __rev1_935; __rev1_935 = __builtin_shufflevector(__s1_935, __s1_935, 3, 2, 1, 0); \ __ret_935 = __s0_935 * __noswap_vget_lane_f16(__rev1_935, __p2_935); \ __ret_935; \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vmulh_laneq_f16(__p0_936, __p1_936, __p2_936) __extension__ ({ \ float16_t __ret_936; \ float16_t __s0_936 = __p0_936; \ float16x8_t __s1_936 = __p1_936; \ __ret_936 = __s0_936 * vgetq_lane_f16(__s1_936, __p2_936); \ __ret_936; \ }) #else #define vmulh_laneq_f16(__p0_937, __p1_937, __p2_937) __extension__ ({ \ float16_t __ret_937; \ float16_t __s0_937 = __p0_937; \ float16x8_t __s1_937 = __p1_937; \ float16x8_t __rev1_937; __rev1_937 = __builtin_shufflevector(__s1_937, __s1_937, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_937 = __s0_937 * __noswap_vgetq_lane_f16(__rev1_937, __p2_937); \ __ret_937; \ }) #endif #endif #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vabal_u8(uint16x8_t __p0, uint8x8_t __p1, uint8x8_t __p2) { uint16x8_t __ret; __ret = __p0 + vabdl_u8(__p1, __p2); return __ret; } #else __ai uint16x8_t vabal_u8(uint16x8_t __p0, uint8x8_t __p1, uint8x8_t __p2) { uint16x8_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 + __noswap_vabdl_u8(__rev1, __rev2); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } __ai uint16x8_t __noswap_vabal_u8(uint16x8_t __p0, uint8x8_t __p1, uint8x8_t __p2) { uint16x8_t __ret; __ret = __p0 + __noswap_vabdl_u8(__p1, __p2); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vabal_u32(uint64x2_t __p0, uint32x2_t __p1, uint32x2_t __p2) { uint64x2_t __ret; __ret = __p0 + vabdl_u32(__p1, __p2); return __ret; } #else __ai uint64x2_t vabal_u32(uint64x2_t __p0, uint32x2_t __p1, uint32x2_t __p2) { uint64x2_t __ret; uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); uint32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); __ret = __rev0 + __noswap_vabdl_u32(__rev1, __rev2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } __ai uint64x2_t __noswap_vabal_u32(uint64x2_t __p0, uint32x2_t __p1, uint32x2_t __p2) { uint64x2_t __ret; __ret = __p0 + __noswap_vabdl_u32(__p1, __p2); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vabal_u16(uint32x4_t __p0, uint16x4_t __p1, uint16x4_t __p2) { uint32x4_t __ret; __ret = __p0 + vabdl_u16(__p1, __p2); return __ret; } #else __ai uint32x4_t vabal_u16(uint32x4_t __p0, uint16x4_t __p1, uint16x4_t __p2) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); uint16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = __rev0 + __noswap_vabdl_u16(__rev1, __rev2); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai uint32x4_t __noswap_vabal_u16(uint32x4_t __p0, uint16x4_t __p1, uint16x4_t __p2) { uint32x4_t __ret; __ret = __p0 + __noswap_vabdl_u16(__p1, __p2); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vabal_s8(int16x8_t __p0, int8x8_t __p1, int8x8_t __p2) { int16x8_t __ret; __ret = __p0 + vabdl_s8(__p1, __p2); return __ret; } #else __ai int16x8_t vabal_s8(int16x8_t __p0, int8x8_t __p1, int8x8_t __p2) { int16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __rev0 + __noswap_vabdl_s8(__rev1, __rev2); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } __ai int16x8_t __noswap_vabal_s8(int16x8_t __p0, int8x8_t __p1, int8x8_t __p2) { int16x8_t __ret; __ret = __p0 + __noswap_vabdl_s8(__p1, __p2); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vabal_s32(int64x2_t __p0, int32x2_t __p1, int32x2_t __p2) { int64x2_t __ret; __ret = __p0 + vabdl_s32(__p1, __p2); return __ret; } #else __ai int64x2_t vabal_s32(int64x2_t __p0, int32x2_t __p1, int32x2_t __p2) { int64x2_t __ret; int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); int32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); __ret = __rev0 + __noswap_vabdl_s32(__rev1, __rev2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } __ai int64x2_t __noswap_vabal_s32(int64x2_t __p0, int32x2_t __p1, int32x2_t __p2) { int64x2_t __ret; __ret = __p0 + __noswap_vabdl_s32(__p1, __p2); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vabal_s16(int32x4_t __p0, int16x4_t __p1, int16x4_t __p2) { int32x4_t __ret; __ret = __p0 + vabdl_s16(__p1, __p2); return __ret; } #else __ai int32x4_t vabal_s16(int32x4_t __p0, int16x4_t __p1, int16x4_t __p2) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); int16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = __rev0 + __noswap_vabdl_s16(__rev1, __rev2); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai int32x4_t __noswap_vabal_s16(int32x4_t __p0, int16x4_t __p1, int16x4_t __p2) { int32x4_t __ret; __ret = __p0 + __noswap_vabdl_s16(__p1, __p2); return __ret; } #endif #if defined(__aarch64__) #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vabal_high_u8(uint16x8_t __p0, uint8x16_t __p1, uint8x16_t __p2) { uint16x8_t __ret; __ret = vabal_u8(__p0, vget_high_u8(__p1), vget_high_u8(__p2)); return __ret; } #else __ai uint16x8_t vabal_high_u8(uint16x8_t __p0, uint8x16_t __p1, uint8x16_t __p2) { uint16x8_t __ret; uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __noswap_vabal_u8(__rev0, __noswap_vget_high_u8(__rev1), __noswap_vget_high_u8(__rev2)); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vabal_high_u32(uint64x2_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint64x2_t __ret; __ret = vabal_u32(__p0, vget_high_u32(__p1), vget_high_u32(__p2)); return __ret; } #else __ai uint64x2_t vabal_high_u32(uint64x2_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint64x2_t __ret; uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = __noswap_vabal_u32(__rev0, __noswap_vget_high_u32(__rev1), __noswap_vget_high_u32(__rev2)); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vabal_high_u16(uint32x4_t __p0, uint16x8_t __p1, uint16x8_t __p2) { uint32x4_t __ret; __ret = vabal_u16(__p0, vget_high_u16(__p1), vget_high_u16(__p2)); return __ret; } #else __ai uint32x4_t vabal_high_u16(uint32x4_t __p0, uint16x8_t __p1, uint16x8_t __p2) { uint32x4_t __ret; uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __noswap_vabal_u16(__rev0, __noswap_vget_high_u16(__rev1), __noswap_vget_high_u16(__rev2)); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vabal_high_s8(int16x8_t __p0, int8x16_t __p1, int8x16_t __p2) { int16x8_t __ret; __ret = vabal_s8(__p0, vget_high_s8(__p1), vget_high_s8(__p2)); return __ret; } #else __ai int16x8_t vabal_high_s8(int16x8_t __p0, int8x16_t __p1, int8x16_t __p2) { int16x8_t __ret; int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __noswap_vabal_s8(__rev0, __noswap_vget_high_s8(__rev1), __noswap_vget_high_s8(__rev2)); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vabal_high_s32(int64x2_t __p0, int32x4_t __p1, int32x4_t __p2) { int64x2_t __ret; __ret = vabal_s32(__p0, vget_high_s32(__p1), vget_high_s32(__p2)); return __ret; } #else __ai int64x2_t vabal_high_s32(int64x2_t __p0, int32x4_t __p1, int32x4_t __p2) { int64x2_t __ret; int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); int32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); __ret = __noswap_vabal_s32(__rev0, __noswap_vget_high_s32(__rev1), __noswap_vget_high_s32(__rev2)); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vabal_high_s16(int32x4_t __p0, int16x8_t __p1, int16x8_t __p2) { int32x4_t __ret; __ret = vabal_s16(__p0, vget_high_s16(__p1), vget_high_s16(__p2)); return __ret; } #else __ai int32x4_t vabal_high_s16(int32x4_t __p0, int16x8_t __p1, int16x8_t __p2) { int32x4_t __ret; int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); __ret = __noswap_vabal_s16(__rev0, __noswap_vget_high_s16(__rev1), __noswap_vget_high_s16(__rev2)); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif #endif #undef __ai #endif /* if !defined(__ARM_NEON) */ #endif /* ifndef __ARM_FP */ #endif /* __ARM_NEON_H */ /*===---- cetintrin.h - CET intrinsic --------------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __CETINTRIN_H #define __CETINTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("shstk"))) static __inline__ void __DEFAULT_FN_ATTRS _incsspd(int __a) { __builtin_ia32_incsspd((unsigned int)__a); } #ifdef __x86_64__ static __inline__ void __DEFAULT_FN_ATTRS _incsspq(unsigned long long __a) { __builtin_ia32_incsspq(__a); } #endif /* __x86_64__ */ #ifdef __x86_64__ static __inline__ void __DEFAULT_FN_ATTRS _inc_ssp(unsigned int __a) { __builtin_ia32_incsspq(__a); } #else /* __x86_64__ */ static __inline__ void __DEFAULT_FN_ATTRS _inc_ssp(unsigned int __a) { __builtin_ia32_incsspd(__a); } #endif /* __x86_64__ */ static __inline__ unsigned int __DEFAULT_FN_ATTRS _rdsspd(unsigned int __a) { return __builtin_ia32_rdsspd(__a); } static __inline__ unsigned int __DEFAULT_FN_ATTRS _rdsspd_i32(void) { #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wuninitialized" unsigned int t; return __builtin_ia32_rdsspd(t); #pragma clang diagnostic pop } #ifdef __x86_64__ static __inline__ unsigned long long __DEFAULT_FN_ATTRS _rdsspq(unsigned long long __a) { return __builtin_ia32_rdsspq(__a); } static __inline__ unsigned long long __DEFAULT_FN_ATTRS _rdsspq_i64(void) { #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wuninitialized" unsigned long long t; return __builtin_ia32_rdsspq(t); #pragma clang diagnostic pop } #endif /* __x86_64__ */ #ifdef __x86_64__ static __inline__ unsigned long long __DEFAULT_FN_ATTRS _get_ssp(void) { return __builtin_ia32_rdsspq(0); } #else /* __x86_64__ */ static __inline__ unsigned int __DEFAULT_FN_ATTRS _get_ssp(void) { return __builtin_ia32_rdsspd(0); } #endif /* __x86_64__ */ static __inline__ void __DEFAULT_FN_ATTRS _saveprevssp(void) { __builtin_ia32_saveprevssp(); } static __inline__ void __DEFAULT_FN_ATTRS _rstorssp(void * __p) { __builtin_ia32_rstorssp(__p); } static __inline__ void __DEFAULT_FN_ATTRS _wrssd(unsigned int __a, void * __p) { __builtin_ia32_wrssd(__a, __p); } #ifdef __x86_64__ static __inline__ void __DEFAULT_FN_ATTRS _wrssq(unsigned long long __a, void * __p) { __builtin_ia32_wrssq(__a, __p); } #endif /* __x86_64__ */ static __inline__ void __DEFAULT_FN_ATTRS _wrussd(unsigned int __a, void * __p) { __builtin_ia32_wrussd(__a, __p); } #ifdef __x86_64__ static __inline__ void __DEFAULT_FN_ATTRS _wrussq(unsigned long long __a, void * __p) { __builtin_ia32_wrussq(__a, __p); } #endif /* __x86_64__ */ static __inline__ void __DEFAULT_FN_ATTRS _setssbsy(void) { __builtin_ia32_setssbsy(); } static __inline__ void __DEFAULT_FN_ATTRS _clrssbsy(void * __p) { __builtin_ia32_clrssbsy(__p); } #undef __DEFAULT_FN_ATTRS #endif /* __CETINTRIN_H */ /*===---- cpuid.h - X86 cpu model detection --------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __CPUID_H #define __CPUID_H #if !(__x86_64__ || __i386__) #error this header is for x86 only #endif /* Responses identification request with %eax 0 */ /* AMD: "AuthenticAMD" */ #define signature_AMD_ebx 0x68747541 #define signature_AMD_edx 0x69746e65 #define signature_AMD_ecx 0x444d4163 /* CENTAUR: "CentaurHauls" */ #define signature_CENTAUR_ebx 0x746e6543 #define signature_CENTAUR_edx 0x48727561 #define signature_CENTAUR_ecx 0x736c7561 /* CYRIX: "CyrixInstead" */ #define signature_CYRIX_ebx 0x69727943 #define signature_CYRIX_edx 0x736e4978 #define signature_CYRIX_ecx 0x64616574 /* HYGON: "HygonGenuine" */ #define signature_HYGON_ebx 0x6f677948 #define signature_HYGON_edx 0x6e65476e #define signature_HYGON_ecx 0x656e6975 /* INTEL: "GenuineIntel" */ #define signature_INTEL_ebx 0x756e6547 #define signature_INTEL_edx 0x49656e69 #define signature_INTEL_ecx 0x6c65746e /* TM1: "TransmetaCPU" */ #define signature_TM1_ebx 0x6e617254 #define signature_TM1_edx 0x74656d73 #define signature_TM1_ecx 0x55504361 /* TM2: "GenuineTMx86" */ #define signature_TM2_ebx 0x756e6547 #define signature_TM2_edx 0x54656e69 #define signature_TM2_ecx 0x3638784d /* NSC: "Geode by NSC" */ #define signature_NSC_ebx 0x646f6547 #define signature_NSC_edx 0x79622065 #define signature_NSC_ecx 0x43534e20 /* NEXGEN: "NexGenDriven" */ #define signature_NEXGEN_ebx 0x4778654e #define signature_NEXGEN_edx 0x72446e65 #define signature_NEXGEN_ecx 0x6e657669 /* RISE: "RiseRiseRise" */ #define signature_RISE_ebx 0x65736952 #define signature_RISE_edx 0x65736952 #define signature_RISE_ecx 0x65736952 /* SIS: "SiS SiS SiS " */ #define signature_SIS_ebx 0x20536953 #define signature_SIS_edx 0x20536953 #define signature_SIS_ecx 0x20536953 /* UMC: "UMC UMC UMC " */ #define signature_UMC_ebx 0x20434d55 #define signature_UMC_edx 0x20434d55 #define signature_UMC_ecx 0x20434d55 /* VIA: "VIA VIA VIA " */ #define signature_VIA_ebx 0x20414956 #define signature_VIA_edx 0x20414956 #define signature_VIA_ecx 0x20414956 /* VORTEX: "Vortex86 SoC" */ #define signature_VORTEX_ebx 0x74726f56 #define signature_VORTEX_edx 0x36387865 #define signature_VORTEX_ecx 0x436f5320 /* Features in %ecx for leaf 1 */ #define bit_SSE3 0x00000001 #define bit_PCLMULQDQ 0x00000002 #define bit_PCLMUL bit_PCLMULQDQ /* for gcc compat */ #define bit_DTES64 0x00000004 #define bit_MONITOR 0x00000008 #define bit_DSCPL 0x00000010 #define bit_VMX 0x00000020 #define bit_SMX 0x00000040 #define bit_EIST 0x00000080 #define bit_TM2 0x00000100 #define bit_SSSE3 0x00000200 #define bit_CNXTID 0x00000400 #define bit_FMA 0x00001000 #define bit_CMPXCHG16B 0x00002000 #define bit_xTPR 0x00004000 #define bit_PDCM 0x00008000 #define bit_PCID 0x00020000 #define bit_DCA 0x00040000 #define bit_SSE41 0x00080000 #define bit_SSE4_1 bit_SSE41 /* for gcc compat */ #define bit_SSE42 0x00100000 #define bit_SSE4_2 bit_SSE42 /* for gcc compat */ #define bit_x2APIC 0x00200000 #define bit_MOVBE 0x00400000 #define bit_POPCNT 0x00800000 #define bit_TSCDeadline 0x01000000 #define bit_AESNI 0x02000000 #define bit_AES bit_AESNI /* for gcc compat */ #define bit_XSAVE 0x04000000 #define bit_OSXSAVE 0x08000000 #define bit_AVX 0x10000000 #define bit_F16C 0x20000000 #define bit_RDRND 0x40000000 /* Features in %edx for leaf 1 */ #define bit_FPU 0x00000001 #define bit_VME 0x00000002 #define bit_DE 0x00000004 #define bit_PSE 0x00000008 #define bit_TSC 0x00000010 #define bit_MSR 0x00000020 #define bit_PAE 0x00000040 #define bit_MCE 0x00000080 #define bit_CX8 0x00000100 #define bit_CMPXCHG8B bit_CX8 /* for gcc compat */ #define bit_APIC 0x00000200 #define bit_SEP 0x00000800 #define bit_MTRR 0x00001000 #define bit_PGE 0x00002000 #define bit_MCA 0x00004000 #define bit_CMOV 0x00008000 #define bit_PAT 0x00010000 #define bit_PSE36 0x00020000 #define bit_PSN 0x00040000 #define bit_CLFSH 0x00080000 #define bit_DS 0x00200000 #define bit_ACPI 0x00400000 #define bit_MMX 0x00800000 #define bit_FXSR 0x01000000 #define bit_FXSAVE bit_FXSR /* for gcc compat */ #define bit_SSE 0x02000000 #define bit_SSE2 0x04000000 #define bit_SS 0x08000000 #define bit_HTT 0x10000000 #define bit_TM 0x20000000 #define bit_PBE 0x80000000 /* Features in %ebx for leaf 7 sub-leaf 0 */ #define bit_FSGSBASE 0x00000001 #define bit_SGX 0x00000004 #define bit_BMI 0x00000008 #define bit_HLE 0x00000010 #define bit_AVX2 0x00000020 #define bit_SMEP 0x00000080 #define bit_BMI2 0x00000100 #define bit_ENH_MOVSB 0x00000200 #define bit_INVPCID 0x00000400 #define bit_RTM 0x00000800 #define bit_MPX 0x00004000 #define bit_AVX512F 0x00010000 #define bit_AVX512DQ 0x00020000 #define bit_RDSEED 0x00040000 #define bit_ADX 0x00080000 #define bit_AVX512IFMA 0x00200000 #define bit_CLFLUSHOPT 0x00800000 #define bit_CLWB 0x01000000 #define bit_AVX512PF 0x04000000 #define bit_AVX512ER 0x08000000 #define bit_AVX512CD 0x10000000 #define bit_SHA 0x20000000 #define bit_AVX512BW 0x40000000 #define bit_AVX512VL 0x80000000 /* Features in %ecx for leaf 7 sub-leaf 0 */ #define bit_PREFTCHWT1 0x00000001 #define bit_AVX512VBMI 0x00000002 #define bit_PKU 0x00000004 #define bit_OSPKE 0x00000010 #define bit_WAITPKG 0x00000020 #define bit_AVX512VBMI2 0x00000040 #define bit_SHSTK 0x00000080 #define bit_GFNI 0x00000100 #define bit_VAES 0x00000200 #define bit_VPCLMULQDQ 0x00000400 #define bit_AVX512VNNI 0x00000800 #define bit_AVX512BITALG 0x00001000 #define bit_AVX512VPOPCNTDQ 0x00004000 #define bit_RDPID 0x00400000 #define bit_CLDEMOTE 0x02000000 #define bit_MOVDIRI 0x08000000 #define bit_MOVDIR64B 0x10000000 #define bit_ENQCMD 0x20000000 /* Features in %edx for leaf 7 sub-leaf 0 */ #define bit_AVX5124VNNIW 0x00000004 #define bit_AVX5124FMAPS 0x00000008 #define bit_UINTR 0x00000020 #define bit_SERIALIZE 0x00004000 #define bit_TSXLDTRK 0x00010000 #define bit_PCONFIG 0x00040000 #define bit_IBT 0x00100000 #define bit_AMXBF16 0x00400000 #define bit_AVX512FP16 0x00800000 #define bit_AMXTILE 0x01000000 #define bit_AMXINT8 0x02000000 /* Features in %eax for leaf 7 sub-leaf 1 */ #define bit_SHA512 0x00000001 #define bit_SM3 0x00000002 #define bit_SM4 0x00000004 #define bit_RAOINT 0x00000008 #define bit_AVXVNNI 0x00000010 #define bit_AVX512BF16 0x00000020 #define bit_CMPCCXADD 0x00000080 #define bit_AMXFP16 0x00200000 #define bit_HRESET 0x00400000 #define bit_AVXIFMA 0x00800000 /* Features in %edx for leaf 7 sub-leaf 1 */ #define bit_AVXVNNIINT8 0x00000010 #define bit_AVXNECONVERT 0x00000020 #define bit_AMXCOMPLEX 0x00000100 #define bit_AVXVNNIINT16 0x00000400 #define bit_PREFETCHI 0x00004000 #define bit_USERMSR 0x00008000 #define bit_AVX10 0x00080000 #define bit_APXF 0x00200000 /* Features in %eax for leaf 13 sub-leaf 1 */ #define bit_XSAVEOPT 0x00000001 #define bit_XSAVEC 0x00000002 #define bit_XSAVES 0x00000008 /* Features in %eax for leaf 0x14 sub-leaf 0 */ #define bit_PTWRITE 0x00000010 /* Features in %ecx for leaf 0x80000001 */ #define bit_LAHF_LM 0x00000001 #define bit_ABM 0x00000020 #define bit_LZCNT bit_ABM /* for gcc compat */ #define bit_SSE4a 0x00000040 #define bit_PRFCHW 0x00000100 #define bit_XOP 0x00000800 #define bit_LWP 0x00008000 #define bit_FMA4 0x00010000 #define bit_TBM 0x00200000 #define bit_MWAITX 0x20000000 /* Features in %edx for leaf 0x80000001 */ #define bit_MMXEXT 0x00400000 #define bit_LM 0x20000000 #define bit_3DNOWP 0x40000000 #define bit_3DNOW 0x80000000 /* Features in %ebx for leaf 0x80000008 */ #define bit_CLZERO 0x00000001 #define bit_RDPRU 0x00000010 #define bit_WBNOINVD 0x00000200 /* Features in %ebx for leaf 0x24 */ #define bit_AVX10_256 0x00020000 #define bit_AVX10_512 0x00040000 #if __i386__ #define __cpuid(__leaf, __eax, __ebx, __ecx, __edx) \ __asm("cpuid" : "=a"(__eax), "=b" (__ebx), "=c"(__ecx), "=d"(__edx) \ : "0"(__leaf)) #define __cpuid_count(__leaf, __count, __eax, __ebx, __ecx, __edx) \ __asm("cpuid" : "=a"(__eax), "=b" (__ebx), "=c"(__ecx), "=d"(__edx) \ : "0"(__leaf), "2"(__count)) #else /* x86-64 uses %rbx as the base register, so preserve it. */ #define __cpuid(__leaf, __eax, __ebx, __ecx, __edx) \ __asm(" xchgq %%rbx,%q1\n" \ " cpuid\n" \ " xchgq %%rbx,%q1" \ : "=a"(__eax), "=r" (__ebx), "=c"(__ecx), "=d"(__edx) \ : "0"(__leaf)) #define __cpuid_count(__leaf, __count, __eax, __ebx, __ecx, __edx) \ __asm(" xchgq %%rbx,%q1\n" \ " cpuid\n" \ " xchgq %%rbx,%q1" \ : "=a"(__eax), "=r" (__ebx), "=c"(__ecx), "=d"(__edx) \ : "0"(__leaf), "2"(__count)) #endif static __inline unsigned int __get_cpuid_max (unsigned int __leaf, unsigned int *__sig) { unsigned int __eax, __ebx, __ecx, __edx; #if __i386__ int __cpuid_supported; __asm(" pushfl\n" " popl %%eax\n" " movl %%eax,%%ecx\n" " xorl $0x00200000,%%eax\n" " pushl %%eax\n" " popfl\n" " pushfl\n" " popl %%eax\n" " movl $0,%0\n" " cmpl %%eax,%%ecx\n" " je 1f\n" " movl $1,%0\n" "1:" : "=r" (__cpuid_supported) : : "eax", "ecx"); if (!__cpuid_supported) return 0; #endif __cpuid(__leaf, __eax, __ebx, __ecx, __edx); if (__sig) *__sig = __ebx; return __eax; } static __inline int __get_cpuid (unsigned int __leaf, unsigned int *__eax, unsigned int *__ebx, unsigned int *__ecx, unsigned int *__edx) { unsigned int __max_leaf = __get_cpuid_max(__leaf & 0x80000000, 0); if (__max_leaf == 0 || __max_leaf < __leaf) return 0; __cpuid(__leaf, *__eax, *__ebx, *__ecx, *__edx); return 1; } static __inline int __get_cpuid_count (unsigned int __leaf, unsigned int __subleaf, unsigned int *__eax, unsigned int *__ebx, unsigned int *__ecx, unsigned int *__edx) { unsigned int __max_leaf = __get_cpuid_max(__leaf & 0x80000000, 0); if (__max_leaf == 0 || __max_leaf < __leaf) return 0; __cpuid_count(__leaf, __subleaf, *__eax, *__ebx, *__ecx, *__edx); return 1; } #endif /* __CPUID_H */ float.hfma4intrin.hlsxintrin.h/*===---- pkuintrin.h - PKU intrinsics -------------------------------------=== * * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __PKUINTRIN_H #define __PKUINTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("pku"))) static __inline__ unsigned int __DEFAULT_FN_ATTRS _rdpkru_u32(void) { return __builtin_ia32_rdpkru(); } static __inline__ void __DEFAULT_FN_ATTRS _wrpkru(unsigned int __val) { __builtin_ia32_wrpkru(__val); } #undef __DEFAULT_FN_ATTRS #endif sanitizer/allocator_interface.hpatternError reverse compiling 'Bad call to ParseState::ParsePerlFlagsAddFoldedRange recurses too much.\S[:^upper:]/iSimplifyWalker::ShortVisit called\z(?HaveMatch:%d)Canadian_AboriginalMOriyaPoSSoyomboWarang_CitiSIGILLSIGFPESIGTRAPf->header.magic == Magic(kMagicAllocated, &f->header)errno == EFAULT || errno == EINVALbmmlntaalength > 0#unexpected empty wake listCheck (v & (kMuWriter | kMuReader)) != (kMuWriter | kMuReader) failed: %s: Mutex corrupt: both reader and writer lock held: %pexternal/abseil-cpp/absl/synchronization/internal/futex_waiter.cctzdataexternal/boringssl/src/crypto/fipsmodule/rsa/rsa.cexternal/boringssl/src/crypto/fipsmodule/cipher/e_aesccm.cCTR-DRBG failed. RSA key construction failed compiler: n/aNONESYS_LIBelliptic curve routinesHMAC_LIBexternal/boringssl/src/crypto/ecdh_extra/ecdh_extra.cexternal/boringssl/src/crypto/dsa/dsa.cstring length exceeds max sizelength_error was thrown in -fno-exceptions mode with message "%s"MapKey::type MapKey is not initialized. kythe.proto.common.Link.definitionKYTHE_VNAMESKYTHE_EXCLUDE_AUTOCONFIGURATION_FILESheader_search_infostat_pathCheck failed: Found null file: System header prefix: No FileRef for /*===-- __clang_cuda_libdevice_declares.h - decls for libdevice functions --=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __CLANG_CUDA_LIBDEVICE_DECLARES_H__ #define __CLANG_CUDA_LIBDEVICE_DECLARES_H__ #if defined(__cplusplus) extern "C" { #endif #if defined(__OPENMP_NVPTX__) #define __DEVICE__ #pragma omp begin assumes ext_spmd_amenable no_openmp #elif defined(__CUDA__) #define __DEVICE__ __device__ #endif __DEVICE__ int __nv_abs(int __a); __DEVICE__ double __nv_acos(double __a); __DEVICE__ float __nv_acosf(float __a); __DEVICE__ double __nv_acosh(double __a); __DEVICE__ float __nv_acoshf(float __a); __DEVICE__ double __nv_asin(double __a); __DEVICE__ float __nv_asinf(float __a); __DEVICE__ double __nv_asinh(double __a); __DEVICE__ float __nv_asinhf(float __a); __DEVICE__ double __nv_atan2(double __a, double __b); __DEVICE__ float __nv_atan2f(float __a, float __b); __DEVICE__ double __nv_atan(double __a); __DEVICE__ float __nv_atanf(float __a); __DEVICE__ double __nv_atanh(double __a); __DEVICE__ float __nv_atanhf(float __a); __DEVICE__ int __nv_brev(int __a); __DEVICE__ long long __nv_brevll(long long __a); __DEVICE__ int __nv_byte_perm(int __a, int __b, int __c); __DEVICE__ double __nv_cbrt(double __a); __DEVICE__ float __nv_cbrtf(float __a); __DEVICE__ double __nv_ceil(double __a); __DEVICE__ float __nv_ceilf(float __a); __DEVICE__ int __nv_clz(int __a); __DEVICE__ int __nv_clzll(long long __a); __DEVICE__ double __nv_copysign(double __a, double __b); __DEVICE__ float __nv_copysignf(float __a, float __b); __DEVICE__ double __nv_cos(double __a); __DEVICE__ float __nv_cosf(float __a); __DEVICE__ double __nv_cosh(double __a); __DEVICE__ float __nv_coshf(float __a); __DEVICE__ double __nv_cospi(double __a); __DEVICE__ float __nv_cospif(float __a); __DEVICE__ double __nv_cyl_bessel_i0(double __a); __DEVICE__ float __nv_cyl_bessel_i0f(float __a); __DEVICE__ double __nv_cyl_bessel_i1(double __a); __DEVICE__ float __nv_cyl_bessel_i1f(float __a); __DEVICE__ double __nv_dadd_rd(double __a, double __b); __DEVICE__ double __nv_dadd_rn(double __a, double __b); __DEVICE__ double __nv_dadd_ru(double __a, double __b); __DEVICE__ double __nv_dadd_rz(double __a, double __b); __DEVICE__ double __nv_ddiv_rd(double __a, double __b); __DEVICE__ double __nv_ddiv_rn(double __a, double __b); __DEVICE__ double __nv_ddiv_ru(double __a, double __b); __DEVICE__ double __nv_ddiv_rz(double __a, double __b); __DEVICE__ double __nv_dmul_rd(double __a, double __b); __DEVICE__ double __nv_dmul_rn(double __a, double __b); __DEVICE__ double __nv_dmul_ru(double __a, double __b); __DEVICE__ double __nv_dmul_rz(double __a, double __b); __DEVICE__ float __nv_double2float_rd(double __a); __DEVICE__ float __nv_double2float_rn(double __a); __DEVICE__ float __nv_double2float_ru(double __a); __DEVICE__ float __nv_double2float_rz(double __a); __DEVICE__ int __nv_double2hiint(double __a); __DEVICE__ int __nv_double2int_rd(double __a); __DEVICE__ int __nv_double2int_rn(double __a); __DEVICE__ int __nv_double2int_ru(double __a); __DEVICE__ int __nv_double2int_rz(double __a); __DEVICE__ long long __nv_double2ll_rd(double __a); __DEVICE__ long long __nv_double2ll_rn(double __a); __DEVICE__ long long __nv_double2ll_ru(double __a); __DEVICE__ long long __nv_double2ll_rz(double __a); __DEVICE__ int __nv_double2loint(double __a); __DEVICE__ unsigned int __nv_double2uint_rd(double __a); __DEVICE__ unsigned int __nv_double2uint_rn(double __a); __DEVICE__ unsigned int __nv_double2uint_ru(double __a); __DEVICE__ unsigned int __nv_double2uint_rz(double __a); __DEVICE__ unsigned long long __nv_double2ull_rd(double __a); __DEVICE__ unsigned long long __nv_double2ull_rn(double __a); __DEVICE__ unsigned long long __nv_double2ull_ru(double __a); __DEVICE__ unsigned long long __nv_double2ull_rz(double __a); __DEVICE__ unsigned long long __nv_double_as_longlong(double __a); __DEVICE__ double __nv_drcp_rd(double __a); __DEVICE__ double __nv_drcp_rn(double __a); __DEVICE__ double __nv_drcp_ru(double __a); __DEVICE__ double __nv_drcp_rz(double __a); __DEVICE__ double __nv_dsqrt_rd(double __a); __DEVICE__ double __nv_dsqrt_rn(double __a); __DEVICE__ double __nv_dsqrt_ru(double __a); __DEVICE__ double __nv_dsqrt_rz(double __a); __DEVICE__ double __nv_dsub_rd(double __a, double __b); __DEVICE__ double __nv_dsub_rn(double __a, double __b); __DEVICE__ double __nv_dsub_ru(double __a, double __b); __DEVICE__ double __nv_dsub_rz(double __a, double __b); __DEVICE__ double __nv_erfc(double __a); __DEVICE__ float __nv_erfcf(float __a); __DEVICE__ double __nv_erfcinv(double __a); __DEVICE__ float __nv_erfcinvf(float __a); __DEVICE__ double __nv_erfcx(double __a); __DEVICE__ float __nv_erfcxf(float __a); __DEVICE__ double __nv_erf(double __a); __DEVICE__ float __nv_erff(float __a); __DEVICE__ double __nv_erfinv(double __a); __DEVICE__ float __nv_erfinvf(float __a); __DEVICE__ double __nv_exp10(double __a); __DEVICE__ float __nv_exp10f(float __a); __DEVICE__ double __nv_exp2(double __a); __DEVICE__ float __nv_exp2f(float __a); __DEVICE__ double __nv_exp(double __a); __DEVICE__ float __nv_expf(float __a); __DEVICE__ double __nv_expm1(double __a); __DEVICE__ float __nv_expm1f(float __a); __DEVICE__ double __nv_fabs(double __a); __DEVICE__ float __nv_fabsf(float __a); __DEVICE__ float __nv_fadd_rd(float __a, float __b); __DEVICE__ float __nv_fadd_rn(float __a, float __b); __DEVICE__ float __nv_fadd_ru(float __a, float __b); __DEVICE__ float __nv_fadd_rz(float __a, float __b); __DEVICE__ float __nv_fast_cosf(float __a); __DEVICE__ float __nv_fast_exp10f(float __a); __DEVICE__ float __nv_fast_expf(float __a); __DEVICE__ float __nv_fast_fdividef(float __a, float __b); __DEVICE__ float __nv_fast_log10f(float __a); __DEVICE__ float __nv_fast_log2f(float __a); __DEVICE__ float __nv_fast_logf(float __a); __DEVICE__ float __nv_fast_powf(float __a, float __b); __DEVICE__ void __nv_fast_sincosf(float __a, float *__s, float *__c); __DEVICE__ float __nv_fast_sinf(float __a); __DEVICE__ float __nv_fast_tanf(float __a); __DEVICE__ double __nv_fdim(double __a, double __b); __DEVICE__ float __nv_fdimf(float __a, float __b); __DEVICE__ float __nv_fdiv_rd(float __a, float __b); __DEVICE__ float __nv_fdiv_rn(float __a, float __b); __DEVICE__ float __nv_fdiv_ru(float __a, float __b); __DEVICE__ float __nv_fdiv_rz(float __a, float __b); __DEVICE__ int __nv_ffs(int __a); __DEVICE__ int __nv_ffsll(long long __a); __DEVICE__ int __nv_finitef(float __a); __DEVICE__ unsigned short __nv_float2half_rn(float __a); __DEVICE__ int __nv_float2int_rd(float __a); __DEVICE__ int __nv_float2int_rn(float __a); __DEVICE__ int __nv_float2int_ru(float __a); __DEVICE__ int __nv_float2int_rz(float __a); __DEVICE__ long long __nv_float2ll_rd(float __a); __DEVICE__ long long __nv_float2ll_rn(float __a); __DEVICE__ long long __nv_float2ll_ru(float __a); __DEVICE__ long long __nv_float2ll_rz(float __a); __DEVICE__ unsigned int __nv_float2uint_rd(float __a); __DEVICE__ unsigned int __nv_float2uint_rn(float __a); __DEVICE__ unsigned int __nv_float2uint_ru(float __a); __DEVICE__ unsigned int __nv_float2uint_rz(float __a); __DEVICE__ unsigned long long __nv_float2ull_rd(float __a); __DEVICE__ unsigned long long __nv_float2ull_rn(float __a); __DEVICE__ unsigned long long __nv_float2ull_ru(float __a); __DEVICE__ unsigned long long __nv_float2ull_rz(float __a); __DEVICE__ int __nv_float_as_int(float __a); __DEVICE__ unsigned int __nv_float_as_uint(float __a); __DEVICE__ double __nv_floor(double __a); __DEVICE__ float __nv_floorf(float __a); __DEVICE__ double __nv_fma(double __a, double __b, double __c); __DEVICE__ float __nv_fmaf(float __a, float __b, float __c); __DEVICE__ float __nv_fmaf_ieee_rd(float __a, float __b, float __c); __DEVICE__ float __nv_fmaf_ieee_rn(float __a, float __b, float __c); __DEVICE__ float __nv_fmaf_ieee_ru(float __a, float __b, float __c); __DEVICE__ float __nv_fmaf_ieee_rz(float __a, float __b, float __c); __DEVICE__ float __nv_fmaf_rd(float __a, float __b, float __c); __DEVICE__ float __nv_fmaf_rn(float __a, float __b, float __c); __DEVICE__ float __nv_fmaf_ru(float __a, float __b, float __c); __DEVICE__ float __nv_fmaf_rz(float __a, float __b, float __c); __DEVICE__ double __nv_fma_rd(double __a, double __b, double __c); __DEVICE__ double __nv_fma_rn(double __a, double __b, double __c); __DEVICE__ double __nv_fma_ru(double __a, double __b, double __c); __DEVICE__ double __nv_fma_rz(double __a, double __b, double __c); __DEVICE__ double __nv_fmax(double __a, double __b); __DEVICE__ float __nv_fmaxf(float __a, float __b); __DEVICE__ double __nv_fmin(double __a, double __b); __DEVICE__ float __nv_fminf(float __a, float __b); __DEVICE__ double __nv_fmod(double __a, double __b); __DEVICE__ float __nv_fmodf(float __a, float __b); __DEVICE__ float __nv_fmul_rd(float __a, float __b); __DEVICE__ float __nv_fmul_rn(float __a, float __b); __DEVICE__ float __nv_fmul_ru(float __a, float __b); __DEVICE__ float __nv_fmul_rz(float __a, float __b); __DEVICE__ float __nv_frcp_rd(float __a); __DEVICE__ float __nv_frcp_rn(float __a); __DEVICE__ float __nv_frcp_ru(float __a); __DEVICE__ float __nv_frcp_rz(float __a); __DEVICE__ double __nv_frexp(double __a, int *__b); __DEVICE__ float __nv_frexpf(float __a, int *__b); __DEVICE__ float __nv_frsqrt_rn(float __a); __DEVICE__ float __nv_fsqrt_rd(float __a); __DEVICE__ float __nv_fsqrt_rn(float __a); __DEVICE__ float __nv_fsqrt_ru(float __a); __DEVICE__ float __nv_fsqrt_rz(float __a); __DEVICE__ float __nv_fsub_rd(float __a, float __b); __DEVICE__ float __nv_fsub_rn(float __a, float __b); __DEVICE__ float __nv_fsub_ru(float __a, float __b); __DEVICE__ float __nv_fsub_rz(float __a, float __b); __DEVICE__ int __nv_hadd(int __a, int __b); __DEVICE__ float __nv_half2float(unsigned short __h); __DEVICE__ double __nv_hiloint2double(int __a, int __b); __DEVICE__ double __nv_hypot(double __a, double __b); __DEVICE__ float __nv_hypotf(float __a, float __b); __DEVICE__ int __nv_ilogb(double __a); __DEVICE__ int __nv_ilogbf(float __a); __DEVICE__ double __nv_int2double_rn(int __a); __DEVICE__ float __nv_int2float_rd(int __a); __DEVICE__ float __nv_int2float_rn(int __a); __DEVICE__ float __nv_int2float_ru(int __a); __DEVICE__ float __nv_int2float_rz(int __a); __DEVICE__ float __nv_int_as_float(int __a); __DEVICE__ int __nv_isfinited(double __a); __DEVICE__ int __nv_isinfd(double __a); __DEVICE__ int __nv_isinff(float __a); __DEVICE__ int __nv_isnand(double __a); __DEVICE__ int __nv_isnanf(float __a); __DEVICE__ double __nv_j0(double __a); __DEVICE__ float __nv_j0f(float __a); __DEVICE__ double __nv_j1(double __a); __DEVICE__ float __nv_j1f(float __a); __DEVICE__ float __nv_jnf(int __a, float __b); __DEVICE__ double __nv_jn(int __a, double __b); __DEVICE__ double __nv_ldexp(double __a, int __b); __DEVICE__ float __nv_ldexpf(float __a, int __b); __DEVICE__ double __nv_lgamma(double __a); __DEVICE__ float __nv_lgammaf(float __a); __DEVICE__ double __nv_ll2double_rd(long long __a); __DEVICE__ double __nv_ll2double_rn(long long __a); __DEVICE__ double __nv_ll2double_ru(long long __a); __DEVICE__ double __nv_ll2double_rz(long long __a); __DEVICE__ float __nv_ll2float_rd(long long __a); __DEVICE__ float __nv_ll2float_rn(long long __a); __DEVICE__ float __nv_ll2float_ru(long long __a); __DEVICE__ float __nv_ll2float_rz(long long __a); __DEVICE__ long long __nv_llabs(long long __a); __DEVICE__ long long __nv_llmax(long long __a, long long __b); __DEVICE__ long long __nv_llmin(long long __a, long long __b); __DEVICE__ long long __nv_llrint(double __a); __DEVICE__ long long __nv_llrintf(float __a); __DEVICE__ long long __nv_llround(double __a); __DEVICE__ long long __nv_llroundf(float __a); __DEVICE__ double __nv_log10(double __a); __DEVICE__ float __nv_log10f(float __a); __DEVICE__ double __nv_log1p(double __a); __DEVICE__ float __nv_log1pf(float __a); __DEVICE__ double __nv_log2(double __a); __DEVICE__ float __nv_log2f(float __a); __DEVICE__ double __nv_logb(double __a); __DEVICE__ float __nv_logbf(float __a); __DEVICE__ double __nv_log(double __a); __DEVICE__ float __nv_logf(float __a); __DEVICE__ double __nv_longlong_as_double(long long __a); __DEVICE__ int __nv_max(int __a, int __b); __DEVICE__ int __nv_min(int __a, int __b); __DEVICE__ double __nv_modf(double __a, double *__b); __DEVICE__ float __nv_modff(float __a, float *__b); __DEVICE__ int __nv_mul24(int __a, int __b); __DEVICE__ long long __nv_mul64hi(long long __a, long long __b); __DEVICE__ int __nv_mulhi(int __a, int __b); __DEVICE__ double __nv_nan(const signed char *__a); __DEVICE__ float __nv_nanf(const signed char *__a); __DEVICE__ double __nv_nearbyint(double __a); __DEVICE__ float __nv_nearbyintf(float __a); __DEVICE__ double __nv_nextafter(double __a, double __b); __DEVICE__ float __nv_nextafterf(float __a, float __b); __DEVICE__ double __nv_norm3d(double __a, double __b, double __c); __DEVICE__ float __nv_norm3df(float __a, float __b, float __c); __DEVICE__ double __nv_norm4d(double __a, double __b, double __c, double __d); __DEVICE__ float __nv_norm4df(float __a, float __b, float __c, float __d); __DEVICE__ double __nv_normcdf(double __a); __DEVICE__ float __nv_normcdff(float __a); __DEVICE__ double __nv_normcdfinv(double __a); __DEVICE__ float __nv_normcdfinvf(float __a); __DEVICE__ float __nv_normf(int __a, const float *__b); __DEVICE__ double __nv_norm(int __a, const double *__b); __DEVICE__ int __nv_popc(unsigned int __a); __DEVICE__ int __nv_popcll(unsigned long long __a); __DEVICE__ double __nv_pow(double __a, double __b); __DEVICE__ float __nv_powf(float __a, float __b); __DEVICE__ double __nv_powi(double __a, int __b); __DEVICE__ float __nv_powif(float __a, int __b); __DEVICE__ double __nv_rcbrt(double __a); __DEVICE__ float __nv_rcbrtf(float __a); __DEVICE__ double __nv_rcp64h(double __a); __DEVICE__ double __nv_remainder(double __a, double __b); __DEVICE__ float __nv_remainderf(float __a, float __b); __DEVICE__ double __nv_remquo(double __a, double __b, int *__c); __DEVICE__ float __nv_remquof(float __a, float __b, int *__c); __DEVICE__ int __nv_rhadd(int __a, int __b); __DEVICE__ double __nv_rhypot(double __a, double __b); __DEVICE__ float __nv_rhypotf(float __a, float __b); __DEVICE__ double __nv_rint(double __a); __DEVICE__ float __nv_rintf(float __a); __DEVICE__ double __nv_rnorm3d(double __a, double __b, double __c); __DEVICE__ float __nv_rnorm3df(float __a, float __b, float __c); __DEVICE__ double __nv_rnorm4d(double __a, double __b, double __c, double __d); __DEVICE__ float __nv_rnorm4df(float __a, float __b, float __c, float __d); __DEVICE__ float __nv_rnormf(int __a, const float *__b); __DEVICE__ double __nv_rnorm(int __a, const double *__b); __DEVICE__ double __nv_round(double __a); __DEVICE__ float __nv_roundf(float __a); __DEVICE__ double __nv_rsqrt(double __a); __DEVICE__ float __nv_rsqrtf(float __a); __DEVICE__ int __nv_sad(int __a, int __b, int __c); __DEVICE__ float __nv_saturatef(float __a); __DEVICE__ double __nv_scalbn(double __a, int __b); __DEVICE__ float __nv_scalbnf(float __a, int __b); __DEVICE__ int __nv_signbitd(double __a); __DEVICE__ int __nv_signbitf(float __a); __DEVICE__ void __nv_sincos(double __a, double *__b, double *__c); __DEVICE__ void __nv_sincosf(float __a, float *__b, float *__c); __DEVICE__ void __nv_sincospi(double __a, double *__b, double *__c); __DEVICE__ void __nv_sincospif(float __a, float *__b, float *__c); __DEVICE__ double __nv_sin(double __a); __DEVICE__ float __nv_sinf(float __a); __DEVICE__ double __nv_sinh(double __a); __DEVICE__ float __nv_sinhf(float __a); __DEVICE__ double __nv_sinpi(double __a); __DEVICE__ float __nv_sinpif(float __a); __DEVICE__ double __nv_sqrt(double __a); __DEVICE__ float __nv_sqrtf(float __a); __DEVICE__ double __nv_tan(double __a); __DEVICE__ float __nv_tanf(float __a); __DEVICE__ double __nv_tanh(double __a); __DEVICE__ float __nv_tanhf(float __a); __DEVICE__ double __nv_tgamma(double __a); __DEVICE__ float __nv_tgammaf(float __a); __DEVICE__ double __nv_trunc(double __a); __DEVICE__ float __nv_truncf(float __a); __DEVICE__ int __nv_uhadd(unsigned int __a, unsigned int __b); __DEVICE__ double __nv_uint2double_rn(unsigned int __i); __DEVICE__ float __nv_uint2float_rd(unsigned int __a); __DEVICE__ float __nv_uint2float_rn(unsigned int __a); __DEVICE__ float __nv_uint2float_ru(unsigned int __a); __DEVICE__ float __nv_uint2float_rz(unsigned int __a); __DEVICE__ float __nv_uint_as_float(unsigned int __a); __DEVICE__ double __nv_ull2double_rd(unsigned long long __a); __DEVICE__ double __nv_ull2double_rn(unsigned long long __a); __DEVICE__ double __nv_ull2double_ru(unsigned long long __a); __DEVICE__ double __nv_ull2double_rz(unsigned long long __a); __DEVICE__ float __nv_ull2float_rd(unsigned long long __a); __DEVICE__ float __nv_ull2float_rn(unsigned long long __a); __DEVICE__ float __nv_ull2float_ru(unsigned long long __a); __DEVICE__ float __nv_ull2float_rz(unsigned long long __a); __DEVICE__ unsigned long long __nv_ullmax(unsigned long long __a, unsigned long long __b); __DEVICE__ unsigned long long __nv_ullmin(unsigned long long __a, unsigned long long __b); __DEVICE__ unsigned int __nv_umax(unsigned int __a, unsigned int __b); __DEVICE__ unsigned int __nv_umin(unsigned int __a, unsigned int __b); __DEVICE__ unsigned int __nv_umul24(unsigned int __a, unsigned int __b); __DEVICE__ unsigned long long __nv_umul64hi(unsigned long long __a, unsigned long long __b); __DEVICE__ unsigned int __nv_umulhi(unsigned int __a, unsigned int __b); __DEVICE__ unsigned int __nv_urhadd(unsigned int __a, unsigned int __b); __DEVICE__ unsigned int __nv_usad(unsigned int __a, unsigned int __b, unsigned int __c); #if CUDA_VERSION >= 9000 && CUDA_VERSION < 9020 __DEVICE__ int __nv_vabs2(int __a); __DEVICE__ int __nv_vabs4(int __a); __DEVICE__ int __nv_vabsdiffs2(int __a, int __b); __DEVICE__ int __nv_vabsdiffs4(int __a, int __b); __DEVICE__ int __nv_vabsdiffu2(int __a, int __b); __DEVICE__ int __nv_vabsdiffu4(int __a, int __b); __DEVICE__ int __nv_vabsss2(int __a); __DEVICE__ int __nv_vabsss4(int __a); __DEVICE__ int __nv_vadd2(int __a, int __b); __DEVICE__ int __nv_vadd4(int __a, int __b); __DEVICE__ int __nv_vaddss2(int __a, int __b); __DEVICE__ int __nv_vaddss4(int __a, int __b); __DEVICE__ int __nv_vaddus2(int __a, int __b); __DEVICE__ int __nv_vaddus4(int __a, int __b); __DEVICE__ int __nv_vavgs2(int __a, int __b); __DEVICE__ int __nv_vavgs4(int __a, int __b); __DEVICE__ int __nv_vavgu2(int __a, int __b); __DEVICE__ int __nv_vavgu4(int __a, int __b); __DEVICE__ int __nv_vcmpeq2(int __a, int __b); __DEVICE__ int __nv_vcmpeq4(int __a, int __b); __DEVICE__ int __nv_vcmpges2(int __a, int __b); __DEVICE__ int __nv_vcmpges4(int __a, int __b); __DEVICE__ int __nv_vcmpgeu2(int __a, int __b); __DEVICE__ int __nv_vcmpgeu4(int __a, int __b); __DEVICE__ int __nv_vcmpgts2(int __a, int __b); __DEVICE__ int __nv_vcmpgts4(int __a, int __b); __DEVICE__ int __nv_vcmpgtu2(int __a, int __b); __DEVICE__ int __nv_vcmpgtu4(int __a, int __b); __DEVICE__ int __nv_vcmples2(int __a, int __b); __DEVICE__ int __nv_vcmples4(int __a, int __b); __DEVICE__ int __nv_vcmpleu2(int __a, int __b); __DEVICE__ int __nv_vcmpleu4(int __a, int __b); __DEVICE__ int __nv_vcmplts2(int __a, int __b); __DEVICE__ int __nv_vcmplts4(int __a, int __b); __DEVICE__ int __nv_vcmpltu2(int __a, int __b); __DEVICE__ int __nv_vcmpltu4(int __a, int __b); __DEVICE__ int __nv_vcmpne2(int __a, int __b); __DEVICE__ int __nv_vcmpne4(int __a, int __b); __DEVICE__ int __nv_vhaddu2(int __a, int __b); __DEVICE__ int __nv_vhaddu4(int __a, int __b); __DEVICE__ int __nv_vmaxs2(int __a, int __b); __DEVICE__ int __nv_vmaxs4(int __a, int __b); __DEVICE__ int __nv_vmaxu2(int __a, int __b); __DEVICE__ int __nv_vmaxu4(int __a, int __b); __DEVICE__ int __nv_vmins2(int __a, int __b); __DEVICE__ int __nv_vmins4(int __a, int __b); __DEVICE__ int __nv_vminu2(int __a, int __b); __DEVICE__ int __nv_vminu4(int __a, int __b); __DEVICE__ int __nv_vneg2(int __a); __DEVICE__ int __nv_vneg4(int __a); __DEVICE__ int __nv_vnegss2(int __a); __DEVICE__ int __nv_vnegss4(int __a); __DEVICE__ int __nv_vsads2(int __a, int __b); __DEVICE__ int __nv_vsads4(int __a, int __b); __DEVICE__ int __nv_vsadu2(int __a, int __b); __DEVICE__ int __nv_vsadu4(int __a, int __b); __DEVICE__ int __nv_vseteq2(int __a, int __b); __DEVICE__ int __nv_vseteq4(int __a, int __b); __DEVICE__ int __nv_vsetges2(int __a, int __b); __DEVICE__ int __nv_vsetges4(int __a, int __b); __DEVICE__ int __nv_vsetgeu2(int __a, int __b); __DEVICE__ int __nv_vsetgeu4(int __a, int __b); __DEVICE__ int __nv_vsetgts2(int __a, int __b); __DEVICE__ int __nv_vsetgts4(int __a, int __b); __DEVICE__ int __nv_vsetgtu2(int __a, int __b); __DEVICE__ int __nv_vsetgtu4(int __a, int __b); __DEVICE__ int __nv_vsetles2(int __a, int __b); __DEVICE__ int __nv_vsetles4(int __a, int __b); __DEVICE__ int __nv_vsetleu2(int __a, int __b); __DEVICE__ int __nv_vsetleu4(int __a, int __b); __DEVICE__ int __nv_vsetlts2(int __a, int __b); __DEVICE__ int __nv_vsetlts4(int __a, int __b); __DEVICE__ int __nv_vsetltu2(int __a, int __b); __DEVICE__ int __nv_vsetltu4(int __a, int __b); __DEVICE__ int __nv_vsetne2(int __a, int __b); __DEVICE__ int __nv_vsetne4(int __a, int __b); __DEVICE__ int __nv_vsub2(int __a, int __b); __DEVICE__ int __nv_vsub4(int __a, int __b); __DEVICE__ int __nv_vsubss2(int __a, int __b); __DEVICE__ int __nv_vsubss4(int __a, int __b); __DEVICE__ int __nv_vsubus2(int __a, int __b); __DEVICE__ int __nv_vsubus4(int __a, int __b); #endif // CUDA_VERSION __DEVICE__ double __nv_y0(double __a); __DEVICE__ float __nv_y0f(float __a); __DEVICE__ double __nv_y1(double __a); __DEVICE__ float __nv_y1f(float __a); __DEVICE__ float __nv_ynf(int __a, float __b); __DEVICE__ double __nv_yn(int __a, double __b); #if defined(__OPENMP_NVPTX__) #pragma omp end assumes ext_spmd_amenable no_openmp #endif #if defined(__cplusplus) } // extern "C" #endif #endif // __CLANG_CUDA_LIBDEVICE_DECLARES_H__ arm_cmse.h/*===------------- avx512bitalgintrin.h - BITALG intrinsics ------------------=== * * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __AVX512BITALGINTRIN_H #define __AVX512BITALGINTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512bitalg,evex512"), \ __min_vector_width__(512))) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_popcnt_epi16(__m512i __A) { return (__m512i) __builtin_ia32_vpopcntw_512((__v32hi) __A); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_popcnt_epi16(__m512i __A, __mmask32 __U, __m512i __B) { return (__m512i) __builtin_ia32_selectw_512((__mmask32) __U, (__v32hi) _mm512_popcnt_epi16(__B), (__v32hi) __A); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_popcnt_epi16(__mmask32 __U, __m512i __B) { return _mm512_mask_popcnt_epi16((__m512i) _mm512_setzero_si512(), __U, __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_popcnt_epi8(__m512i __A) { return (__m512i) __builtin_ia32_vpopcntb_512((__v64qi) __A); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_popcnt_epi8(__m512i __A, __mmask64 __U, __m512i __B) { return (__m512i) __builtin_ia32_selectb_512((__mmask64) __U, (__v64qi) _mm512_popcnt_epi8(__B), (__v64qi) __A); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_popcnt_epi8(__mmask64 __U, __m512i __B) { return _mm512_mask_popcnt_epi8((__m512i) _mm512_setzero_si512(), __U, __B); } static __inline__ __mmask64 __DEFAULT_FN_ATTRS _mm512_mask_bitshuffle_epi64_mask(__mmask64 __U, __m512i __A, __m512i __B) { return (__mmask64) __builtin_ia32_vpshufbitqmb512_mask((__v64qi) __A, (__v64qi) __B, __U); } static __inline__ __mmask64 __DEFAULT_FN_ATTRS _mm512_bitshuffle_epi64_mask(__m512i __A, __m512i __B) { return _mm512_mask_bitshuffle_epi64_mask((__mmask64) -1, __A, __B); } #undef __DEFAULT_FN_ATTRS #endif /*===------------- avx512cdintrin.h - AVX512CD intrinsics ------------------=== * * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __AVX512CDINTRIN_H #define __AVX512CDINTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512cd,evex512"), __min_vector_width__(512))) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_conflict_epi64 (__m512i __A) { return (__m512i) __builtin_ia32_vpconflictdi_512 ((__v8di) __A); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_conflict_epi64 (__m512i __W, __mmask8 __U, __m512i __A) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_conflict_epi64(__A), (__v8di)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_conflict_epi64 (__mmask8 __U, __m512i __A) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_conflict_epi64(__A), (__v8di)_mm512_setzero_si512 ()); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_conflict_epi32 (__m512i __A) { return (__m512i) __builtin_ia32_vpconflictsi_512 ((__v16si) __A); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_conflict_epi32 (__m512i __W, __mmask16 __U, __m512i __A) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_conflict_epi32(__A), (__v16si)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_conflict_epi32 (__mmask16 __U, __m512i __A) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_conflict_epi32(__A), (__v16si)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_lzcnt_epi32 (__m512i __A) { return (__m512i) __builtin_ia32_vplzcntd_512 ((__v16si) __A); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_lzcnt_epi32 (__m512i __W, __mmask16 __U, __m512i __A) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_lzcnt_epi32(__A), (__v16si)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_lzcnt_epi32 (__mmask16 __U, __m512i __A) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_lzcnt_epi32(__A), (__v16si)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_lzcnt_epi64 (__m512i __A) { return (__m512i) __builtin_ia32_vplzcntq_512 ((__v8di) __A); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_lzcnt_epi64 (__m512i __W, __mmask8 __U, __m512i __A) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_lzcnt_epi64(__A), (__v8di)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_lzcnt_epi64 (__mmask8 __U, __m512i __A) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_lzcnt_epi64(__A), (__v8di)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_broadcastmb_epi64 (__mmask8 __A) { return (__m512i) _mm512_set1_epi64((long long) __A); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_broadcastmw_epi32 (__mmask16 __A) { return (__m512i) _mm512_set1_epi32((int) __A); } #undef __DEFAULT_FN_ATTRS #endif /*===---- avx512dqintrin.h - AVX512DQ intrinsics ---------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __AVX512DQINTRIN_H #define __AVX512DQINTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512dq,evex512"), __min_vector_width__(512))) #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512dq,no-evex512"))) static __inline __mmask8 __DEFAULT_FN_ATTRS _knot_mask8(__mmask8 __M) { return __builtin_ia32_knotqi(__M); } static __inline__ __mmask8 __DEFAULT_FN_ATTRS _kand_mask8(__mmask8 __A, __mmask8 __B) { return (__mmask8)__builtin_ia32_kandqi((__mmask8)__A, (__mmask8)__B); } static __inline__ __mmask8 __DEFAULT_FN_ATTRS _kandn_mask8(__mmask8 __A, __mmask8 __B) { return (__mmask8)__builtin_ia32_kandnqi((__mmask8)__A, (__mmask8)__B); } static __inline__ __mmask8 __DEFAULT_FN_ATTRS _kor_mask8(__mmask8 __A, __mmask8 __B) { return (__mmask8)__builtin_ia32_korqi((__mmask8)__A, (__mmask8)__B); } static __inline__ __mmask8 __DEFAULT_FN_ATTRS _kxnor_mask8(__mmask8 __A, __mmask8 __B) { return (__mmask8)__builtin_ia32_kxnorqi((__mmask8)__A, (__mmask8)__B); } static __inline__ __mmask8 __DEFAULT_FN_ATTRS _kxor_mask8(__mmask8 __A, __mmask8 __B) { return (__mmask8)__builtin_ia32_kxorqi((__mmask8)__A, (__mmask8)__B); } static __inline__ unsigned char __DEFAULT_FN_ATTRS _kortestc_mask8_u8(__mmask8 __A, __mmask8 __B) { return (unsigned char)__builtin_ia32_kortestcqi(__A, __B); } static __inline__ unsigned char __DEFAULT_FN_ATTRS _kortestz_mask8_u8(__mmask8 __A, __mmask8 __B) { return (unsigned char)__builtin_ia32_kortestzqi(__A, __B); } static __inline__ unsigned char __DEFAULT_FN_ATTRS _kortest_mask8_u8(__mmask8 __A, __mmask8 __B, unsigned char *__C) { *__C = (unsigned char)__builtin_ia32_kortestcqi(__A, __B); return (unsigned char)__builtin_ia32_kortestzqi(__A, __B); } static __inline__ unsigned char __DEFAULT_FN_ATTRS _ktestc_mask8_u8(__mmask8 __A, __mmask8 __B) { return (unsigned char)__builtin_ia32_ktestcqi(__A, __B); } static __inline__ unsigned char __DEFAULT_FN_ATTRS _ktestz_mask8_u8(__mmask8 __A, __mmask8 __B) { return (unsigned char)__builtin_ia32_ktestzqi(__A, __B); } static __inline__ unsigned char __DEFAULT_FN_ATTRS _ktest_mask8_u8(__mmask8 __A, __mmask8 __B, unsigned char *__C) { *__C = (unsigned char)__builtin_ia32_ktestcqi(__A, __B); return (unsigned char)__builtin_ia32_ktestzqi(__A, __B); } static __inline__ unsigned char __DEFAULT_FN_ATTRS _ktestc_mask16_u8(__mmask16 __A, __mmask16 __B) { return (unsigned char)__builtin_ia32_ktestchi(__A, __B); } static __inline__ unsigned char __DEFAULT_FN_ATTRS _ktestz_mask16_u8(__mmask16 __A, __mmask16 __B) { return (unsigned char)__builtin_ia32_ktestzhi(__A, __B); } static __inline__ unsigned char __DEFAULT_FN_ATTRS _ktest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C) { *__C = (unsigned char)__builtin_ia32_ktestchi(__A, __B); return (unsigned char)__builtin_ia32_ktestzhi(__A, __B); } static __inline__ __mmask8 __DEFAULT_FN_ATTRS _kadd_mask8(__mmask8 __A, __mmask8 __B) { return (__mmask8)__builtin_ia32_kaddqi((__mmask8)__A, (__mmask8)__B); } static __inline__ __mmask16 __DEFAULT_FN_ATTRS _kadd_mask16(__mmask16 __A, __mmask16 __B) { return (__mmask16)__builtin_ia32_kaddhi((__mmask16)__A, (__mmask16)__B); } #define _kshiftli_mask8(A, I) \ ((__mmask8)__builtin_ia32_kshiftliqi((__mmask8)(A), (unsigned int)(I))) #define _kshiftri_mask8(A, I) \ ((__mmask8)__builtin_ia32_kshiftriqi((__mmask8)(A), (unsigned int)(I))) static __inline__ unsigned int __DEFAULT_FN_ATTRS _cvtmask8_u32(__mmask8 __A) { return (unsigned int)__builtin_ia32_kmovb((__mmask8)__A); } static __inline__ __mmask8 __DEFAULT_FN_ATTRS _cvtu32_mask8(unsigned int __A) { return (__mmask8)__builtin_ia32_kmovb((__mmask8)__A); } static __inline__ __mmask8 __DEFAULT_FN_ATTRS _load_mask8(__mmask8 *__A) { return (__mmask8)__builtin_ia32_kmovb(*(__mmask8 *)__A); } static __inline__ void __DEFAULT_FN_ATTRS _store_mask8(__mmask8 *__A, __mmask8 __B) { *(__mmask8 *)__A = __builtin_ia32_kmovb((__mmask8)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mullo_epi64 (__m512i __A, __m512i __B) { return (__m512i) ((__v8du) __A * (__v8du) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mullo_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_mullo_epi64(__A, __B), (__v8di)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mullo_epi64(__mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_mullo_epi64(__A, __B), (__v8di)_mm512_setzero_si512()); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_xor_pd(__m512d __A, __m512d __B) { return (__m512d)((__v8du)__A ^ (__v8du)__B); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_xor_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_xor_pd(__A, __B), (__v8df)__W); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_xor_pd(__mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_xor_pd(__A, __B), (__v8df)_mm512_setzero_pd()); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_xor_ps (__m512 __A, __m512 __B) { return (__m512)((__v16su)__A ^ (__v16su)__B); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_xor_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_xor_ps(__A, __B), (__v16sf)__W); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_xor_ps(__mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_xor_ps(__A, __B), (__v16sf)_mm512_setzero_ps()); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_or_pd(__m512d __A, __m512d __B) { return (__m512d)((__v8du)__A | (__v8du)__B); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_or_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_or_pd(__A, __B), (__v8df)__W); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_or_pd(__mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_or_pd(__A, __B), (__v8df)_mm512_setzero_pd()); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_or_ps(__m512 __A, __m512 __B) { return (__m512)((__v16su)__A | (__v16su)__B); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_or_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_or_ps(__A, __B), (__v16sf)__W); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_or_ps(__mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_or_ps(__A, __B), (__v16sf)_mm512_setzero_ps()); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_and_pd(__m512d __A, __m512d __B) { return (__m512d)((__v8du)__A & (__v8du)__B); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_and_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_and_pd(__A, __B), (__v8df)__W); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_and_pd(__mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_and_pd(__A, __B), (__v8df)_mm512_setzero_pd()); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_and_ps(__m512 __A, __m512 __B) { return (__m512)((__v16su)__A & (__v16su)__B); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_and_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_and_ps(__A, __B), (__v16sf)__W); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_and_ps(__mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_and_ps(__A, __B), (__v16sf)_mm512_setzero_ps()); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_andnot_pd(__m512d __A, __m512d __B) { return (__m512d)(~(__v8du)__A & (__v8du)__B); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_andnot_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_andnot_pd(__A, __B), (__v8df)__W); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_andnot_pd(__mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_andnot_pd(__A, __B), (__v8df)_mm512_setzero_pd()); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_andnot_ps(__m512 __A, __m512 __B) { return (__m512)(~(__v16su)__A & (__v16su)__B); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_andnot_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_andnot_ps(__A, __B), (__v16sf)__W); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_andnot_ps(__mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_andnot_ps(__A, __B), (__v16sf)_mm512_setzero_ps()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtpd_epi64 (__m512d __A) { return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A, (__v8di) _mm512_setzero_si512(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) { return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A, (__v8di) __W, (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtpd_epi64 (__mmask8 __U, __m512d __A) { return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A, (__v8di) _mm512_setzero_si512(), (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } #define _mm512_cvt_roundpd_epi64(A, R) \ ((__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \ (__v8di)_mm512_setzero_si512(), \ (__mmask8)-1, (int)(R))) #define _mm512_mask_cvt_roundpd_epi64(W, U, A, R) \ ((__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \ (__v8di)(__m512i)(W), \ (__mmask8)(U), (int)(R))) #define _mm512_maskz_cvt_roundpd_epi64(U, A, R) \ ((__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \ (__v8di)_mm512_setzero_si512(), \ (__mmask8)(U), (int)(R))) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtpd_epu64 (__m512d __A) { return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A, (__v8di) _mm512_setzero_si512(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) { return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A, (__v8di) __W, (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtpd_epu64 (__mmask8 __U, __m512d __A) { return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A, (__v8di) _mm512_setzero_si512(), (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } #define _mm512_cvt_roundpd_epu64(A, R) \ ((__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \ (__v8di)_mm512_setzero_si512(), \ (__mmask8)-1, (int)(R))) #define _mm512_mask_cvt_roundpd_epu64(W, U, A, R) \ ((__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \ (__v8di)(__m512i)(W), \ (__mmask8)(U), (int)(R))) #define _mm512_maskz_cvt_roundpd_epu64(U, A, R) \ ((__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \ (__v8di)_mm512_setzero_si512(), \ (__mmask8)(U), (int)(R))) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtps_epi64 (__m256 __A) { return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A, (__v8di) _mm512_setzero_si512(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) { return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A, (__v8di) __W, (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtps_epi64 (__mmask8 __U, __m256 __A) { return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A, (__v8di) _mm512_setzero_si512(), (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } #define _mm512_cvt_roundps_epi64(A, R) \ ((__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \ (__v8di)_mm512_setzero_si512(), \ (__mmask8)-1, (int)(R))) #define _mm512_mask_cvt_roundps_epi64(W, U, A, R) \ ((__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \ (__v8di)(__m512i)(W), \ (__mmask8)(U), (int)(R))) #define _mm512_maskz_cvt_roundps_epi64(U, A, R) \ ((__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \ (__v8di)_mm512_setzero_si512(), \ (__mmask8)(U), (int)(R))) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtps_epu64 (__m256 __A) { return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A, (__v8di) _mm512_setzero_si512(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) { return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A, (__v8di) __W, (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtps_epu64 (__mmask8 __U, __m256 __A) { return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A, (__v8di) _mm512_setzero_si512(), (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } #define _mm512_cvt_roundps_epu64(A, R) \ ((__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \ (__v8di)_mm512_setzero_si512(), \ (__mmask8)-1, (int)(R))) #define _mm512_mask_cvt_roundps_epu64(W, U, A, R) \ ((__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \ (__v8di)(__m512i)(W), \ (__mmask8)(U), (int)(R))) #define _mm512_maskz_cvt_roundps_epu64(U, A, R) \ ((__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \ (__v8di)_mm512_setzero_si512(), \ (__mmask8)(U), (int)(R))) static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtepi64_pd (__m512i __A) { return (__m512d)__builtin_convertvector((__v8di)__A, __v8df); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_pd (__m512d __W, __mmask8 __U, __m512i __A) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_cvtepi64_pd(__A), (__v8df)__W); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi64_pd (__mmask8 __U, __m512i __A) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_cvtepi64_pd(__A), (__v8df)_mm512_setzero_pd()); } #define _mm512_cvt_roundepi64_pd(A, R) \ ((__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \ (__v8df)_mm512_setzero_pd(), \ (__mmask8)-1, (int)(R))) #define _mm512_mask_cvt_roundepi64_pd(W, U, A, R) \ ((__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \ (__v8df)(__m512d)(W), \ (__mmask8)(U), (int)(R))) #define _mm512_maskz_cvt_roundepi64_pd(U, A, R) \ ((__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \ (__v8df)_mm512_setzero_pd(), \ (__mmask8)(U), (int)(R))) static __inline__ __m256 __DEFAULT_FN_ATTRS512 _mm512_cvtepi64_ps (__m512i __A) { return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A, (__v8sf) _mm256_setzero_ps(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m256 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_ps (__m256 __W, __mmask8 __U, __m512i __A) { return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A, (__v8sf) __W, (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m256 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi64_ps (__mmask8 __U, __m512i __A) { return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A, (__v8sf) _mm256_setzero_ps(), (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } #define _mm512_cvt_roundepi64_ps(A, R) \ ((__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \ (__v8sf)_mm256_setzero_ps(), \ (__mmask8)-1, (int)(R))) #define _mm512_mask_cvt_roundepi64_ps(W, U, A, R) \ ((__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \ (__v8sf)(__m256)(W), (__mmask8)(U), \ (int)(R))) #define _mm512_maskz_cvt_roundepi64_ps(U, A, R) \ ((__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \ (__v8sf)_mm256_setzero_ps(), \ (__mmask8)(U), (int)(R))) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvttpd_epi64 (__m512d __A) { return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A, (__v8di) _mm512_setzero_si512(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) { return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A, (__v8di) __W, (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttpd_epi64 (__mmask8 __U, __m512d __A) { return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A, (__v8di) _mm512_setzero_si512(), (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } #define _mm512_cvtt_roundpd_epi64(A, R) \ ((__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \ (__v8di)_mm512_setzero_si512(), \ (__mmask8)-1, (int)(R))) #define _mm512_mask_cvtt_roundpd_epi64(W, U, A, R) \ ((__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \ (__v8di)(__m512i)(W), \ (__mmask8)(U), (int)(R))) #define _mm512_maskz_cvtt_roundpd_epi64(U, A, R) \ ((__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \ (__v8di)_mm512_setzero_si512(), \ (__mmask8)(U), (int)(R))) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvttpd_epu64 (__m512d __A) { return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A, (__v8di) _mm512_setzero_si512(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) { return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A, (__v8di) __W, (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttpd_epu64 (__mmask8 __U, __m512d __A) { return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A, (__v8di) _mm512_setzero_si512(), (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } #define _mm512_cvtt_roundpd_epu64(A, R) \ ((__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \ (__v8di)_mm512_setzero_si512(), \ (__mmask8)-1, (int)(R))) #define _mm512_mask_cvtt_roundpd_epu64(W, U, A, R) \ ((__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \ (__v8di)(__m512i)(W), \ (__mmask8)(U), (int)(R))) #define _mm512_maskz_cvtt_roundpd_epu64(U, A, R) \ ((__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \ (__v8di)_mm512_setzero_si512(), \ (__mmask8)(U), (int)(R))) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvttps_epi64 (__m256 __A) { return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A, (__v8di) _mm512_setzero_si512(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) { return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A, (__v8di) __W, (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttps_epi64 (__mmask8 __U, __m256 __A) { return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A, (__v8di) _mm512_setzero_si512(), (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } #define _mm512_cvtt_roundps_epi64(A, R) \ ((__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \ (__v8di)_mm512_setzero_si512(), \ (__mmask8)-1, (int)(R))) #define _mm512_mask_cvtt_roundps_epi64(W, U, A, R) \ ((__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \ (__v8di)(__m512i)(W), \ (__mmask8)(U), (int)(R))) #define _mm512_maskz_cvtt_roundps_epi64(U, A, R) \ ((__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \ (__v8di)_mm512_setzero_si512(), \ (__mmask8)(U), (int)(R))) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvttps_epu64 (__m256 __A) { return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A, (__v8di) _mm512_setzero_si512(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) { return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A, (__v8di) __W, (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttps_epu64 (__mmask8 __U, __m256 __A) { return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A, (__v8di) _mm512_setzero_si512(), (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } #define _mm512_cvtt_roundps_epu64(A, R) \ ((__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \ (__v8di)_mm512_setzero_si512(), \ (__mmask8)-1, (int)(R))) #define _mm512_mask_cvtt_roundps_epu64(W, U, A, R) \ ((__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \ (__v8di)(__m512i)(W), \ (__mmask8)(U), (int)(R))) #define _mm512_maskz_cvtt_roundps_epu64(U, A, R) \ ((__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \ (__v8di)_mm512_setzero_si512(), \ (__mmask8)(U), (int)(R))) static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtepu64_pd (__m512i __A) { return (__m512d)__builtin_convertvector((__v8du)__A, __v8df); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu64_pd (__m512d __W, __mmask8 __U, __m512i __A) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_cvtepu64_pd(__A), (__v8df)__W); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu64_pd (__mmask8 __U, __m512i __A) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_cvtepu64_pd(__A), (__v8df)_mm512_setzero_pd()); } #define _mm512_cvt_roundepu64_pd(A, R) \ ((__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \ (__v8df)_mm512_setzero_pd(), \ (__mmask8)-1, (int)(R))) #define _mm512_mask_cvt_roundepu64_pd(W, U, A, R) \ ((__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \ (__v8df)(__m512d)(W), \ (__mmask8)(U), (int)(R))) #define _mm512_maskz_cvt_roundepu64_pd(U, A, R) \ ((__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \ (__v8df)_mm512_setzero_pd(), \ (__mmask8)(U), (int)(R))) static __inline__ __m256 __DEFAULT_FN_ATTRS512 _mm512_cvtepu64_ps (__m512i __A) { return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A, (__v8sf) _mm256_setzero_ps(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m256 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu64_ps (__m256 __W, __mmask8 __U, __m512i __A) { return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A, (__v8sf) __W, (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m256 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu64_ps (__mmask8 __U, __m512i __A) { return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A, (__v8sf) _mm256_setzero_ps(), (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } #define _mm512_cvt_roundepu64_ps(A, R) \ ((__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \ (__v8sf)_mm256_setzero_ps(), \ (__mmask8)-1, (int)(R))) #define _mm512_mask_cvt_roundepu64_ps(W, U, A, R) \ ((__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \ (__v8sf)(__m256)(W), (__mmask8)(U), \ (int)(R))) #define _mm512_maskz_cvt_roundepu64_ps(U, A, R) \ ((__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \ (__v8sf)_mm256_setzero_ps(), \ (__mmask8)(U), (int)(R))) #define _mm512_range_pd(A, B, C) \ ((__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), (int)(C), \ (__v8df)_mm512_setzero_pd(), \ (__mmask8)-1, \ _MM_FROUND_CUR_DIRECTION)) #define _mm512_mask_range_pd(W, U, A, B, C) \ ((__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), (int)(C), \ (__v8df)(__m512d)(W), (__mmask8)(U), \ _MM_FROUND_CUR_DIRECTION)) #define _mm512_maskz_range_pd(U, A, B, C) \ ((__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), (int)(C), \ (__v8df)_mm512_setzero_pd(), \ (__mmask8)(U), \ _MM_FROUND_CUR_DIRECTION)) #define _mm512_range_round_pd(A, B, C, R) \ ((__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), (int)(C), \ (__v8df)_mm512_setzero_pd(), \ (__mmask8)-1, (int)(R))) #define _mm512_mask_range_round_pd(W, U, A, B, C, R) \ ((__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), (int)(C), \ (__v8df)(__m512d)(W), (__mmask8)(U), \ (int)(R))) #define _mm512_maskz_range_round_pd(U, A, B, C, R) \ ((__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), (int)(C), \ (__v8df)_mm512_setzero_pd(), \ (__mmask8)(U), (int)(R))) #define _mm512_range_ps(A, B, C) \ ((__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), (int)(C), \ (__v16sf)_mm512_setzero_ps(), \ (__mmask16)-1, \ _MM_FROUND_CUR_DIRECTION)) #define _mm512_mask_range_ps(W, U, A, B, C) \ ((__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), (int)(C), \ (__v16sf)(__m512)(W), (__mmask16)(U), \ _MM_FROUND_CUR_DIRECTION)) #define _mm512_maskz_range_ps(U, A, B, C) \ ((__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), (int)(C), \ (__v16sf)_mm512_setzero_ps(), \ (__mmask16)(U), \ _MM_FROUND_CUR_DIRECTION)) #define _mm512_range_round_ps(A, B, C, R) \ ((__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), (int)(C), \ (__v16sf)_mm512_setzero_ps(), \ (__mmask16)-1, (int)(R))) #define _mm512_mask_range_round_ps(W, U, A, B, C, R) \ ((__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), (int)(C), \ (__v16sf)(__m512)(W), (__mmask16)(U), \ (int)(R))) #define _mm512_maskz_range_round_ps(U, A, B, C, R) \ ((__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), (int)(C), \ (__v16sf)_mm512_setzero_ps(), \ (__mmask16)(U), (int)(R))) #define _mm_range_round_ss(A, B, C, R) \ ((__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8) -1, (int)(C),\ (int)(R))) #define _mm_range_ss(A ,B , C) _mm_range_round_ss(A, B, C ,_MM_FROUND_CUR_DIRECTION) #define _mm_mask_range_round_ss(W, U, A, B, C, R) \ ((__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)(__m128)(W),\ (__mmask8)(U), (int)(C),\ (int)(R))) #define _mm_mask_range_ss(W , U, A, B, C) _mm_mask_range_round_ss(W, U, A, B, C , _MM_FROUND_CUR_DIRECTION) #define _mm_maskz_range_round_ss(U, A, B, C, R) \ ((__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U), (int)(C),\ (int)(R))) #define _mm_maskz_range_ss(U, A ,B , C) _mm_maskz_range_round_ss(U, A, B, C ,_MM_FROUND_CUR_DIRECTION) #define _mm_range_round_sd(A, B, C, R) \ ((__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8) -1, (int)(C),\ (int)(R))) #define _mm_range_sd(A ,B , C) _mm_range_round_sd(A, B, C ,_MM_FROUND_CUR_DIRECTION) #define _mm_mask_range_round_sd(W, U, A, B, C, R) \ ((__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)(__m128d)(W),\ (__mmask8)(U), (int)(C),\ (int)(R))) #define _mm_mask_range_sd(W, U, A, B, C) _mm_mask_range_round_sd(W, U, A, B, C ,_MM_FROUND_CUR_DIRECTION) #define _mm_maskz_range_round_sd(U, A, B, C, R) \ ((__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)(U), (int)(C),\ (int)(R))) #define _mm_maskz_range_sd(U, A, B, C) _mm_maskz_range_round_sd(U, A, B, C ,_MM_FROUND_CUR_DIRECTION) #define _mm512_reduce_pd(A, B) \ ((__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ (__v8df)_mm512_setzero_pd(), \ (__mmask8)-1, \ _MM_FROUND_CUR_DIRECTION)) #define _mm512_mask_reduce_pd(W, U, A, B) \ ((__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ (__v8df)(__m512d)(W), \ (__mmask8)(U), \ _MM_FROUND_CUR_DIRECTION)) #define _mm512_maskz_reduce_pd(U, A, B) \ ((__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ (__v8df)_mm512_setzero_pd(), \ (__mmask8)(U), \ _MM_FROUND_CUR_DIRECTION)) #define _mm512_reduce_ps(A, B) \ ((__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ (__v16sf)_mm512_setzero_ps(), \ (__mmask16)-1, \ _MM_FROUND_CUR_DIRECTION)) #define _mm512_mask_reduce_ps(W, U, A, B) \ ((__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ (__v16sf)(__m512)(W), \ (__mmask16)(U), \ _MM_FROUND_CUR_DIRECTION)) #define _mm512_maskz_reduce_ps(U, A, B) \ ((__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ (__v16sf)_mm512_setzero_ps(), \ (__mmask16)(U), \ _MM_FROUND_CUR_DIRECTION)) #define _mm512_reduce_round_pd(A, B, R) \ ((__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ (__v8df)_mm512_setzero_pd(), \ (__mmask8)-1, (int)(R))) #define _mm512_mask_reduce_round_pd(W, U, A, B, R) \ ((__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ (__v8df)(__m512d)(W), \ (__mmask8)(U), (int)(R))) #define _mm512_maskz_reduce_round_pd(U, A, B, R) \ ((__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ (__v8df)_mm512_setzero_pd(), \ (__mmask8)(U), (int)(R))) #define _mm512_reduce_round_ps(A, B, R) \ ((__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ (__v16sf)_mm512_setzero_ps(), \ (__mmask16)-1, (int)(R))) #define _mm512_mask_reduce_round_ps(W, U, A, B, R) \ ((__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ (__v16sf)(__m512)(W), \ (__mmask16)(U), (int)(R))) #define _mm512_maskz_reduce_round_ps(U, A, B, R) \ ((__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ (__v16sf)_mm512_setzero_ps(), \ (__mmask16)(U), (int)(R))) #define _mm_reduce_ss(A, B, C) \ ((__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), (__mmask8)-1, \ (int)(C), _MM_FROUND_CUR_DIRECTION)) #define _mm_mask_reduce_ss(W, U, A, B, C) \ ((__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)(__m128)(W), (__mmask8)(U), \ (int)(C), _MM_FROUND_CUR_DIRECTION)) #define _mm_maskz_reduce_ss(U, A, B, C) \ ((__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U), (int)(C), \ _MM_FROUND_CUR_DIRECTION)) #define _mm_reduce_round_ss(A, B, C, R) \ ((__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), (__mmask8)-1, \ (int)(C), (int)(R))) #define _mm_mask_reduce_round_ss(W, U, A, B, C, R) \ ((__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)(__m128)(W), (__mmask8)(U), \ (int)(C), (int)(R))) #define _mm_maskz_reduce_round_ss(U, A, B, C, R) \ ((__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U), (int)(C), (int)(R))) #define _mm_reduce_sd(A, B, C) \ ((__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)-1, (int)(C), \ _MM_FROUND_CUR_DIRECTION)) #define _mm_mask_reduce_sd(W, U, A, B, C) \ ((__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)(__m128d)(W), (__mmask8)(U), \ (int)(C), _MM_FROUND_CUR_DIRECTION)) #define _mm_maskz_reduce_sd(U, A, B, C) \ ((__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)(U), (int)(C), \ _MM_FROUND_CUR_DIRECTION)) #define _mm_reduce_round_sd(A, B, C, R) \ ((__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)-1, (int)(C), (int)(R))) #define _mm_mask_reduce_round_sd(W, U, A, B, C, R) \ ((__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)(__m128d)(W), (__mmask8)(U), \ (int)(C), (int)(R))) #define _mm_maskz_reduce_round_sd(U, A, B, C, R) \ ((__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)(U), (int)(C), (int)(R))) static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_movepi32_mask (__m512i __A) { return (__mmask16) __builtin_ia32_cvtd2mask512 ((__v16si) __A); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_movm_epi32 (__mmask16 __A) { return (__m512i) __builtin_ia32_cvtmask2d512 (__A); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_movm_epi64 (__mmask8 __A) { return (__m512i) __builtin_ia32_cvtmask2q512 (__A); } static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 _mm512_movepi64_mask (__m512i __A) { return (__mmask8) __builtin_ia32_cvtq2mask512 ((__v8di) __A); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_broadcast_f32x2 (__m128 __A) { return (__m512)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_f32x2 (__m512 __O, __mmask16 __M, __m128 __A) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__M, (__v16sf)_mm512_broadcast_f32x2(__A), (__v16sf)__O); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_f32x2 (__mmask16 __M, __m128 __A) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__M, (__v16sf)_mm512_broadcast_f32x2(__A), (__v16sf)_mm512_setzero_ps()); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_broadcast_f32x8(__m256 __A) { return (__m512)__builtin_shufflevector((__v8sf)__A, (__v8sf)__A, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_f32x8(__m512 __O, __mmask16 __M, __m256 __A) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__M, (__v16sf)_mm512_broadcast_f32x8(__A), (__v16sf)__O); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_f32x8(__mmask16 __M, __m256 __A) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__M, (__v16sf)_mm512_broadcast_f32x8(__A), (__v16sf)_mm512_setzero_ps()); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_broadcast_f64x2(__m128d __A) { return (__m512d)__builtin_shufflevector((__v2df)__A, (__v2df)__A, 0, 1, 0, 1, 0, 1, 0, 1); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_f64x2(__m512d __O, __mmask8 __M, __m128d __A) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M, (__v8df)_mm512_broadcast_f64x2(__A), (__v8df)__O); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_f64x2(__mmask8 __M, __m128d __A) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M, (__v8df)_mm512_broadcast_f64x2(__A), (__v8df)_mm512_setzero_pd()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_broadcast_i32x2 (__m128i __A) { return (__m512i)__builtin_shufflevector((__v4si)__A, (__v4si)__A, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_i32x2 (__m512i __O, __mmask16 __M, __m128i __A) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, (__v16si)_mm512_broadcast_i32x2(__A), (__v16si)__O); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_i32x2 (__mmask16 __M, __m128i __A) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, (__v16si)_mm512_broadcast_i32x2(__A), (__v16si)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_broadcast_i32x8(__m256i __A) { return (__m512i)__builtin_shufflevector((__v8si)__A, (__v8si)__A, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_i32x8(__m512i __O, __mmask16 __M, __m256i __A) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, (__v16si)_mm512_broadcast_i32x8(__A), (__v16si)__O); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_i32x8(__mmask16 __M, __m256i __A) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, (__v16si)_mm512_broadcast_i32x8(__A), (__v16si)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_broadcast_i64x2(__m128i __A) { return (__m512i)__builtin_shufflevector((__v2di)__A, (__v2di)__A, 0, 1, 0, 1, 0, 1, 0, 1); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_i64x2(__m512i __O, __mmask8 __M, __m128i __A) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, (__v8di)_mm512_broadcast_i64x2(__A), (__v8di)__O); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, (__v8di)_mm512_broadcast_i64x2(__A), (__v8di)_mm512_setzero_si512()); } #define _mm512_extractf32x8_ps(A, imm) \ ((__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \ (__v8sf)_mm256_undefined_ps(), \ (__mmask8)-1)) #define _mm512_mask_extractf32x8_ps(W, U, A, imm) \ ((__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \ (__v8sf)(__m256)(W), \ (__mmask8)(U))) #define _mm512_maskz_extractf32x8_ps(U, A, imm) \ ((__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \ (__v8sf)_mm256_setzero_ps(), \ (__mmask8)(U))) #define _mm512_extractf64x2_pd(A, imm) \ ((__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \ (int)(imm), \ (__v2df)_mm_undefined_pd(), \ (__mmask8)-1)) #define _mm512_mask_extractf64x2_pd(W, U, A, imm) \ ((__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \ (int)(imm), \ (__v2df)(__m128d)(W), \ (__mmask8)(U))) #define _mm512_maskz_extractf64x2_pd(U, A, imm) \ ((__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \ (int)(imm), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)(U))) #define _mm512_extracti32x8_epi32(A, imm) \ ((__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \ (__v8si)_mm256_undefined_si256(), \ (__mmask8)-1)) #define _mm512_mask_extracti32x8_epi32(W, U, A, imm) \ ((__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \ (__v8si)(__m256i)(W), \ (__mmask8)(U))) #define _mm512_maskz_extracti32x8_epi32(U, A, imm) \ ((__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \ (__v8si)_mm256_setzero_si256(), \ (__mmask8)(U))) #define _mm512_extracti64x2_epi64(A, imm) \ ((__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \ (int)(imm), \ (__v2di)_mm_undefined_si128(), \ (__mmask8)-1)) #define _mm512_mask_extracti64x2_epi64(W, U, A, imm) \ ((__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \ (int)(imm), \ (__v2di)(__m128i)(W), \ (__mmask8)(U))) #define _mm512_maskz_extracti64x2_epi64(U, A, imm) \ ((__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \ (int)(imm), \ (__v2di)_mm_setzero_si128(), \ (__mmask8)(U))) #define _mm512_insertf32x8(A, B, imm) \ ((__m512)__builtin_ia32_insertf32x8((__v16sf)(__m512)(A), \ (__v8sf)(__m256)(B), (int)(imm))) #define _mm512_mask_insertf32x8(W, U, A, B, imm) \ ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_insertf32x8((A), (B), (imm)), \ (__v16sf)(__m512)(W))) #define _mm512_maskz_insertf32x8(U, A, B, imm) \ ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_insertf32x8((A), (B), (imm)), \ (__v16sf)_mm512_setzero_ps())) #define _mm512_insertf64x2(A, B, imm) \ ((__m512d)__builtin_ia32_insertf64x2_512((__v8df)(__m512d)(A), \ (__v2df)(__m128d)(B), (int)(imm))) #define _mm512_mask_insertf64x2(W, U, A, B, imm) \ ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_insertf64x2((A), (B), (imm)), \ (__v8df)(__m512d)(W))) #define _mm512_maskz_insertf64x2(U, A, B, imm) \ ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_insertf64x2((A), (B), (imm)), \ (__v8df)_mm512_setzero_pd())) #define _mm512_inserti32x8(A, B, imm) \ ((__m512i)__builtin_ia32_inserti32x8((__v16si)(__m512i)(A), \ (__v8si)(__m256i)(B), (int)(imm))) #define _mm512_mask_inserti32x8(W, U, A, B, imm) \ ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ (__v16si)_mm512_inserti32x8((A), (B), (imm)), \ (__v16si)(__m512i)(W))) #define _mm512_maskz_inserti32x8(U, A, B, imm) \ ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ (__v16si)_mm512_inserti32x8((A), (B), (imm)), \ (__v16si)_mm512_setzero_si512())) #define _mm512_inserti64x2(A, B, imm) \ ((__m512i)__builtin_ia32_inserti64x2_512((__v8di)(__m512i)(A), \ (__v2di)(__m128i)(B), (int)(imm))) #define _mm512_mask_inserti64x2(W, U, A, B, imm) \ ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ (__v8di)_mm512_inserti64x2((A), (B), (imm)), \ (__v8di)(__m512i)(W))) #define _mm512_maskz_inserti64x2(U, A, B, imm) \ ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ (__v8di)_mm512_inserti64x2((A), (B), (imm)), \ (__v8di)_mm512_setzero_si512())) #define _mm512_mask_fpclass_ps_mask(U, A, imm) \ ((__mmask16)__builtin_ia32_fpclassps512_mask((__v16sf)(__m512)(A), \ (int)(imm), (__mmask16)(U))) #define _mm512_fpclass_ps_mask(A, imm) \ ((__mmask16)__builtin_ia32_fpclassps512_mask((__v16sf)(__m512)(A), \ (int)(imm), (__mmask16)-1)) #define _mm512_mask_fpclass_pd_mask(U, A, imm) \ ((__mmask8)__builtin_ia32_fpclasspd512_mask((__v8df)(__m512d)(A), (int)(imm), \ (__mmask8)(U))) #define _mm512_fpclass_pd_mask(A, imm) \ ((__mmask8)__builtin_ia32_fpclasspd512_mask((__v8df)(__m512d)(A), (int)(imm), \ (__mmask8)-1)) #define _mm_fpclass_sd_mask(A, imm) \ ((__mmask8)__builtin_ia32_fpclasssd_mask((__v2df)(__m128d)(A), (int)(imm), \ (__mmask8)-1)) #define _mm_mask_fpclass_sd_mask(U, A, imm) \ ((__mmask8)__builtin_ia32_fpclasssd_mask((__v2df)(__m128d)(A), (int)(imm), \ (__mmask8)(U))) #define _mm_fpclass_ss_mask(A, imm) \ ((__mmask8)__builtin_ia32_fpclassss_mask((__v4sf)(__m128)(A), (int)(imm), \ (__mmask8)-1)) #define _mm_mask_fpclass_ss_mask(U, A, imm) \ ((__mmask8)__builtin_ia32_fpclassss_mask((__v4sf)(__m128)(A), (int)(imm), \ (__mmask8)(U))) #undef __DEFAULT_FN_ATTRS512 #undef __DEFAULT_FN_ATTRS #endif crc32intrin.hhexagon_protos.hrdpruintrin.h//===----- sifive_vector.h - SiFive Vector definitions --------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef _SIFIVE_VECTOR_H_ #define _SIFIVE_VECTOR_H_ #include "riscv_vector.h" #pragma clang riscv intrinsic sifive_vector #define __riscv_sf_vc_x_se_u8mf4(p27_26, p24_20, p11_7, rs1, vl) \ __riscv_sf_vc_x_se(p27_26, p24_20, p11_7, (uint8_t)rs1, 8, 6, vl) #define __riscv_sf_vc_x_se_u8mf2(p27_26, p24_20, p11_7, rs1, vl) \ __riscv_sf_vc_x_se(p27_26, p24_20, p11_7, (uint8_t)rs1, 8, 7, vl) #define __riscv_sf_vc_x_se_u8m1(p27_26, p24_20, p11_7, rs1, vl) \ __riscv_sf_vc_x_se(p27_26, p24_20, p11_7, (uint8_t)rs1, 8, 0, vl) #define __riscv_sf_vc_x_se_u8m2(p27_26, p24_20, p11_7, rs1, vl) \ __riscv_sf_vc_x_se(p27_26, p24_20, p11_7, (uint8_t)rs1, 8, 1, vl) #define __riscv_sf_vc_x_se_u8m4(p27_26, p24_20, p11_7, rs1, vl) \ __riscv_sf_vc_x_se(p27_26, p24_20, p11_7, (uint8_t)rs1, 8, 2, vl) #define __riscv_sf_vc_x_se_u8m8(p27_26, p24_20, p11_7, rs1, vl) \ __riscv_sf_vc_x_se(p27_26, p24_20, p11_7, (uint8_t)rs1, 8, 3, vl) #define __riscv_sf_vc_x_se_u16mf2(p27_26, p24_20, p11_7, rs1, vl) \ __riscv_sf_vc_x_se(p27_26, p24_20, p11_7, (uint16_t)rs1, 16, 7, vl) #define __riscv_sf_vc_x_se_u16m1(p27_26, p24_20, p11_7, rs1, vl) \ __riscv_sf_vc_x_se(p27_26, p24_20, p11_7, (uint16_t)rs1, 16, 0, vl) #define __riscv_sf_vc_x_se_u16m2(p27_26, p24_20, p11_7, rs1, vl) \ __riscv_sf_vc_x_se(p27_26, p24_20, p11_7, (uint16_t)rs1, 16, 1, vl) #define __riscv_sf_vc_x_se_u16m4(p27_26, p24_20, p11_7, rs1, vl) \ __riscv_sf_vc_x_se(p27_26, p24_20, p11_7, (uint16_t)rs1, 16, 2, vl) #define __riscv_sf_vc_x_se_u16m8(p27_26, p24_20, p11_7, rs1, vl) \ __riscv_sf_vc_x_se(p27_26, p24_20, p11_7, (uint16_t)rs1, 16, 3, vl) #define __riscv_sf_vc_x_se_u32m1(p27_26, p24_20, p11_7, rs1, vl) \ __riscv_sf_vc_x_se(p27_26, p24_20, p11_7, (uint32_t)rs1, 32, 0, vl) #define __riscv_sf_vc_x_se_u32m2(p27_26, p24_20, p11_7, rs1, vl) \ __riscv_sf_vc_x_se(p27_26, p24_20, p11_7, (uint32_t)rs1, 32, 1, vl) #define __riscv_sf_vc_x_se_u32m4(p27_26, p24_20, p11_7, rs1, vl) \ __riscv_sf_vc_x_se(p27_26, p24_20, p11_7, (uint32_t)rs1, 32, 2, vl) #define __riscv_sf_vc_x_se_u32m8(p27_26, p24_20, p11_7, rs1, vl) \ __riscv_sf_vc_x_se(p27_26, p24_20, p11_7, (uint32_t)rs1, 32, 3, vl) #define __riscv_sf_vc_i_se_u8mf4(p27_26, p24_20, p11_7, simm5, vl) \ __riscv_sf_vc_i_se(p27_26, p24_20, p11_7, simm5, 8, 7, vl) #define __riscv_sf_vc_i_se_u8mf2(p27_26, p24_20, p11_7, simm5, vl) \ __riscv_sf_vc_i_se(p27_26, p24_20, p11_7, simm5, 8, 6, vl) #define __riscv_sf_vc_i_se_u8m1(p27_26, p24_20, p11_7, simm5, vl) \ __riscv_sf_vc_i_se(p27_26, p24_20, p11_7, simm5, 8, 0, vl) #define __riscv_sf_vc_i_se_u8m2(p27_26, p24_20, p11_7, simm5, vl) \ __riscv_sf_vc_i_se(p27_26, p24_20, p11_7, simm5, 8, 1, vl) #define __riscv_sf_vc_i_se_u8m4(p27_26, p24_20, p11_7, simm5, vl) \ __riscv_sf_vc_i_se(p27_26, p24_20, p11_7, simm5, 8, 2, vl) #define __riscv_sf_vc_i_se_u8m8(p27_26, p24_20, p11_7, simm5, vl) \ __riscv_sf_vc_i_se(p27_26, p24_20, p11_7, simm5, 8, 3, vl) #define __riscv_sf_vc_i_se_u16mf2(p27_26, p24_20, p11_7, simm5, vl) \ __riscv_sf_vc_i_se(p27_26, p24_20, p11_7, simm5, 16, 7, vl) #define __riscv_sf_vc_i_se_u16m1(p27_26, p24_20, p11_7, simm5, vl) \ __riscv_sf_vc_i_se(p27_26, p24_20, p11_7, simm5, 16, 0, vl) #define __riscv_sf_vc_i_se_u16m2(p27_26, p24_20, p11_7, simm5, vl) \ __riscv_sf_vc_i_se(p27_26, p24_20, p11_7, simm5, 16, 1, vl) #define __riscv_sf_vc_i_se_u16m4(p27_26, p24_20, p11_7, simm5, vl) \ __riscv_sf_vc_i_se(p27_26, p24_20, p11_7, simm5, 16, 2, vl) #define __riscv_sf_vc_i_se_u16m8(p27_26, p24_20, p11_7, simm5, vl) \ __riscv_sf_vc_i_se(p27_26, p24_20, p11_7, simm5, 16, 3, vl) #define __riscv_sf_vc_i_se_u32m1(p27_26, p24_20, p11_7, simm5, vl) \ __riscv_sf_vc_i_se(p27_26, p24_20, p11_7, simm5, 32, 0, vl) #define __riscv_sf_vc_i_se_u32m2(p27_26, p24_20, p11_7, simm5, vl) \ __riscv_sf_vc_i_se(p27_26, p24_20, p11_7, simm5, 32, 1, vl) #define __riscv_sf_vc_i_se_u32m4(p27_26, p24_20, p11_7, simm5, vl) \ __riscv_sf_vc_i_se(p27_26, p24_20, p11_7, simm5, 32, 2, vl) #define __riscv_sf_vc_i_se_u32m8(p27_26, p24_20, p11_7, simm5, vl) \ __riscv_sf_vc_i_se(p27_26, p24_20, p11_7, simm5, 32, 3, vl) #if __riscv_v_elen >= 64 #define __riscv_sf_vc_x_se_u8mf8(p27_26, p24_20, p11_7, rs1, vl) \ __riscv_sf_vc_x_se(p27_26, p24_20, p11_7, (uint8_t)rs1, 8, 5, vl) #define __riscv_sf_vc_x_se_u16mf4(p27_26, p24_20, p11_7, rs1, vl) \ __riscv_sf_vc_x_se(p27_26, p24_20, p11_7, (uint16_t)rs1, 16, 6, vl) #define __riscv_sf_vc_x_se_u32mf2(p27_26, p24_20, p11_7, rs1, vl) \ __riscv_sf_vc_x_se(p27_26, p24_20, p11_7, (uint32_t)rs1, 32, 7, vl) #define __riscv_sf_vc_i_se_u8mf8(p27_26, p24_20, p11_7, simm5, vl) \ __riscv_sf_vc_i_se(p27_26, p24_20, p11_7, simm5, 8, 5, vl) #define __riscv_sf_vc_i_se_u16mf4(p27_26, p24_20, p11_7, simm5, vl) \ __riscv_sf_vc_i_se(p27_26, p24_20, p11_7, simm5, 16, 6, vl) #define __riscv_sf_vc_i_se_u32mf2(p27_26, p24_20, p11_7, simm5, vl) \ __riscv_sf_vc_i_se(p27_26, p24_20, p11_7, simm5, 32, 7, vl) #define __riscv_sf_vc_i_se_u64m1(p27_26, p24_20, p11_7, simm5, vl) \ __riscv_sf_vc_i_se(p27_26, p24_20, p11_7, simm5, 64, 0, vl) #define __riscv_sf_vc_i_se_u64m2(p27_26, p24_20, p11_7, simm5, vl) \ __riscv_sf_vc_i_se(p27_26, p24_20, p11_7, simm5, 64, 1, vl) #define __riscv_sf_vc_i_se_u64m4(p27_26, p24_20, p11_7, simm5, vl) \ __riscv_sf_vc_i_se(p27_26, p24_20, p11_7, simm5, 64, 2, vl) #define __riscv_sf_vc_i_se_u64m8(p27_26, p24_20, p11_7, simm5, vl) \ __riscv_sf_vc_i_se(p27_26, p24_20, p11_7, simm5, 64, 3, vl) #if __riscv_xlen >= 64 #define __riscv_sf_vc_x_se_u64m1(p27_26, p24_20, p11_7, rs1, vl) \ __riscv_sf_vc_x_se(p27_26, p24_20, p11_7, (uint64_t)rs1, 64, 0, vl) #define __riscv_sf_vc_x_se_u64m2(p27_26, p24_20, p11_7, rs1, vl) \ __riscv_sf_vc_x_se(p27_26, p24_20, p11_7, (uint64_t)rs1, 64, 1, vl) #define __riscv_sf_vc_x_se_u64m4(p27_26, p24_20, p11_7, rs1, vl) \ __riscv_sf_vc_x_se(p27_26, p24_20, p11_7, (uint64_t)rs1, 64, 2, vl) #define __riscv_sf_vc_x_se_u64m8(p27_26, p24_20, p11_7, rs1, vl) \ __riscv_sf_vc_x_se(p27_26, p24_20, p11_7, (uint64_t)rs1, 64, 3, vl) #endif #endif #endif //_SIFIVE_VECTOR_H_ /*===---- unwind.h - Stack unwinding ----------------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ /* See "Data Definitions for libgcc_s" in the Linux Standard Base.*/ #ifndef __CLANG_UNWIND_H #define __CLANG_UNWIND_H #if defined(__APPLE__) && __has_include_next() /* Darwin (from 11.x on) provide an unwind.h. If that's available, * use it. libunwind wraps some of its definitions in #ifdef _GNU_SOURCE, * so define that around the include.*/ # ifndef _GNU_SOURCE # define _SHOULD_UNDEFINE_GNU_SOURCE # define _GNU_SOURCE # endif // libunwind's unwind.h reflects the current visibility. However, Mozilla // builds with -fvisibility=hidden and relies on gcc's unwind.h to reset the // visibility to default and export its contents. gcc also allows users to // override its override by #defining HIDE_EXPORTS (but note, this only obeys // the user's -fvisibility setting; it doesn't hide any exports on its own). We // imitate gcc's header here: # ifdef HIDE_EXPORTS # include_next # else # pragma GCC visibility push(default) # include_next # pragma GCC visibility pop # endif # ifdef _SHOULD_UNDEFINE_GNU_SOURCE # undef _GNU_SOURCE # undef _SHOULD_UNDEFINE_GNU_SOURCE # endif #else #include #ifdef __cplusplus extern "C" { #endif /* It is a bit strange for a header to play with the visibility of the symbols it declares, but this matches gcc's behavior and some programs depend on it */ #ifndef HIDE_EXPORTS #pragma GCC visibility push(default) #endif typedef uintptr_t _Unwind_Word __attribute__((__mode__(__unwind_word__))); typedef intptr_t _Unwind_Sword __attribute__((__mode__(__unwind_word__))); typedef uintptr_t _Unwind_Ptr; typedef uintptr_t _Unwind_Internal_Ptr; typedef uint64_t _Unwind_Exception_Class; typedef intptr_t _sleb128_t; typedef uintptr_t _uleb128_t; struct _Unwind_Context; #if defined(__arm__) && !(defined(__USING_SJLJ_EXCEPTIONS__) || \ defined(__ARM_DWARF_EH__) || defined(__SEH__)) struct _Unwind_Control_Block; typedef struct _Unwind_Control_Block _Unwind_Control_Block; #define _Unwind_Exception _Unwind_Control_Block /* Alias */ #else struct _Unwind_Exception; typedef struct _Unwind_Exception _Unwind_Exception; #endif typedef enum { _URC_NO_REASON = 0, #if defined(__arm__) && !defined(__USING_SJLJ_EXCEPTIONS__) && \ !defined(__ARM_DWARF_EH__) && !defined(__SEH__) _URC_OK = 0, /* used by ARM EHABI */ #endif _URC_FOREIGN_EXCEPTION_CAUGHT = 1, _URC_FATAL_PHASE2_ERROR = 2, _URC_FATAL_PHASE1_ERROR = 3, _URC_NORMAL_STOP = 4, _URC_END_OF_STACK = 5, _URC_HANDLER_FOUND = 6, _URC_INSTALL_CONTEXT = 7, _URC_CONTINUE_UNWIND = 8, #if defined(__arm__) && !defined(__USING_SJLJ_EXCEPTIONS__) && \ !defined(__ARM_DWARF_EH__) && !defined(__SEH__) _URC_FAILURE = 9 /* used by ARM EHABI */ #endif } _Unwind_Reason_Code; typedef enum { _UA_SEARCH_PHASE = 1, _UA_CLEANUP_PHASE = 2, _UA_HANDLER_FRAME = 4, _UA_FORCE_UNWIND = 8, _UA_END_OF_STACK = 16 /* gcc extension to C++ ABI */ } _Unwind_Action; typedef void (*_Unwind_Exception_Cleanup_Fn)(_Unwind_Reason_Code, _Unwind_Exception *); #if defined(__arm__) && !(defined(__USING_SJLJ_EXCEPTIONS__) || \ defined(__ARM_DWARF_EH__) || defined(__SEH__)) typedef struct _Unwind_Control_Block _Unwind_Control_Block; typedef uint32_t _Unwind_EHT_Header; struct _Unwind_Control_Block { uint64_t exception_class; void (*exception_cleanup)(_Unwind_Reason_Code, _Unwind_Control_Block *); /* unwinder cache (private fields for the unwinder's use) */ struct { uint32_t reserved1; /* forced unwind stop function, 0 if not forced */ uint32_t reserved2; /* personality routine */ uint32_t reserved3; /* callsite */ uint32_t reserved4; /* forced unwind stop argument */ uint32_t reserved5; } unwinder_cache; /* propagation barrier cache (valid after phase 1) */ struct { uint32_t sp; uint32_t bitpattern[5]; } barrier_cache; /* cleanup cache (preserved over cleanup) */ struct { uint32_t bitpattern[4]; } cleanup_cache; /* personality cache (for personality's benefit) */ struct { uint32_t fnstart; /* function start address */ _Unwind_EHT_Header *ehtp; /* pointer to EHT entry header word */ uint32_t additional; /* additional data */ uint32_t reserved1; } pr_cache; long long int : 0; /* force alignment of next item to 8-byte boundary */ } __attribute__((__aligned__(8))); #else struct _Unwind_Exception { _Unwind_Exception_Class exception_class; _Unwind_Exception_Cleanup_Fn exception_cleanup; #if !defined (__USING_SJLJ_EXCEPTIONS__) && defined (__SEH__) _Unwind_Word private_[6]; #else _Unwind_Word private_1; _Unwind_Word private_2; #endif /* The Itanium ABI requires that _Unwind_Exception objects are "double-word * aligned". GCC has interpreted this to mean "use the maximum useful * alignment for the target"; so do we. */ } __attribute__((__aligned__)); #endif typedef _Unwind_Reason_Code (*_Unwind_Stop_Fn)(int, _Unwind_Action, _Unwind_Exception_Class, _Unwind_Exception *, struct _Unwind_Context *, void *); typedef _Unwind_Reason_Code (*_Unwind_Personality_Fn)(int, _Unwind_Action, _Unwind_Exception_Class, _Unwind_Exception *, struct _Unwind_Context *); typedef _Unwind_Personality_Fn __personality_routine; typedef _Unwind_Reason_Code (*_Unwind_Trace_Fn)(struct _Unwind_Context *, void *); #if defined(__arm__) && !(defined(__USING_SJLJ_EXCEPTIONS__) || \ defined(__ARM_DWARF_EH__) || defined(__SEH__)) typedef enum { _UVRSC_CORE = 0, /* integer register */ _UVRSC_VFP = 1, /* vfp */ _UVRSC_WMMXD = 3, /* Intel WMMX data register */ _UVRSC_WMMXC = 4, /* Intel WMMX control register */ _UVRSC_PSEUDO = 5 /* Special purpose pseudo register */ } _Unwind_VRS_RegClass; typedef enum { _UVRSD_UINT32 = 0, _UVRSD_VFPX = 1, _UVRSD_UINT64 = 3, _UVRSD_FLOAT = 4, _UVRSD_DOUBLE = 5 } _Unwind_VRS_DataRepresentation; typedef enum { _UVRSR_OK = 0, _UVRSR_NOT_IMPLEMENTED = 1, _UVRSR_FAILED = 2 } _Unwind_VRS_Result; typedef uint32_t _Unwind_State; #define _US_VIRTUAL_UNWIND_FRAME ((_Unwind_State)0) #define _US_UNWIND_FRAME_STARTING ((_Unwind_State)1) #define _US_UNWIND_FRAME_RESUME ((_Unwind_State)2) #define _US_ACTION_MASK ((_Unwind_State)3) #define _US_FORCE_UNWIND ((_Unwind_State)8) _Unwind_VRS_Result _Unwind_VRS_Get(struct _Unwind_Context *__context, _Unwind_VRS_RegClass __regclass, uint32_t __regno, _Unwind_VRS_DataRepresentation __representation, void *__valuep); _Unwind_VRS_Result _Unwind_VRS_Set(struct _Unwind_Context *__context, _Unwind_VRS_RegClass __regclass, uint32_t __regno, _Unwind_VRS_DataRepresentation __representation, void *__valuep); static __inline__ _Unwind_Word _Unwind_GetGR(struct _Unwind_Context *__context, int __index) { _Unwind_Word __value; _Unwind_VRS_Get(__context, _UVRSC_CORE, __index, _UVRSD_UINT32, &__value); return __value; } static __inline__ void _Unwind_SetGR(struct _Unwind_Context *__context, int __index, _Unwind_Word __value) { _Unwind_VRS_Set(__context, _UVRSC_CORE, __index, _UVRSD_UINT32, &__value); } static __inline__ _Unwind_Word _Unwind_GetIP(struct _Unwind_Context *__context) { _Unwind_Word __ip = _Unwind_GetGR(__context, 15); return __ip & ~(_Unwind_Word)(0x1); /* Remove thumb mode bit. */ } static __inline__ void _Unwind_SetIP(struct _Unwind_Context *__context, _Unwind_Word __value) { _Unwind_Word __thumb_mode_bit = _Unwind_GetGR(__context, 15) & 0x1; _Unwind_SetGR(__context, 15, __value | __thumb_mode_bit); } #else _Unwind_Word _Unwind_GetGR(struct _Unwind_Context *, int); void _Unwind_SetGR(struct _Unwind_Context *, int, _Unwind_Word); _Unwind_Word _Unwind_GetIP(struct _Unwind_Context *); void _Unwind_SetIP(struct _Unwind_Context *, _Unwind_Word); #endif _Unwind_Word _Unwind_GetIPInfo(struct _Unwind_Context *, int *); _Unwind_Word _Unwind_GetCFA(struct _Unwind_Context *); _Unwind_Word _Unwind_GetBSP(struct _Unwind_Context *); void *_Unwind_GetLanguageSpecificData(struct _Unwind_Context *); _Unwind_Ptr _Unwind_GetRegionStart(struct _Unwind_Context *); /* DWARF EH functions; currently not available on Darwin/ARM */ #if !defined(__APPLE__) || !defined(__arm__) _Unwind_Reason_Code _Unwind_RaiseException(_Unwind_Exception *); _Unwind_Reason_Code _Unwind_ForcedUnwind(_Unwind_Exception *, _Unwind_Stop_Fn, void *); void _Unwind_DeleteException(_Unwind_Exception *); void _Unwind_Resume(_Unwind_Exception *); _Unwind_Reason_Code _Unwind_Resume_or_Rethrow(_Unwind_Exception *); #endif _Unwind_Reason_Code _Unwind_Backtrace(_Unwind_Trace_Fn, void *); /* setjmp(3)/longjmp(3) stuff */ typedef struct SjLj_Function_Context *_Unwind_FunctionContext_t; void _Unwind_SjLj_Register(_Unwind_FunctionContext_t); void _Unwind_SjLj_Unregister(_Unwind_FunctionContext_t); _Unwind_Reason_Code _Unwind_SjLj_RaiseException(_Unwind_Exception *); _Unwind_Reason_Code _Unwind_SjLj_ForcedUnwind(_Unwind_Exception *, _Unwind_Stop_Fn, void *); void _Unwind_SjLj_Resume(_Unwind_Exception *); _Unwind_Reason_Code _Unwind_SjLj_Resume_or_Rethrow(_Unwind_Exception *); void *_Unwind_FindEnclosingFunction(void *); #ifdef __APPLE__ _Unwind_Ptr _Unwind_GetDataRelBase(struct _Unwind_Context *) __attribute__((__unavailable__)); _Unwind_Ptr _Unwind_GetTextRelBase(struct _Unwind_Context *) __attribute__((__unavailable__)); /* Darwin-specific functions */ void __register_frame(const void *); void __deregister_frame(const void *); struct dwarf_eh_bases { uintptr_t tbase; uintptr_t dbase; uintptr_t func; }; void *_Unwind_Find_FDE(const void *, struct dwarf_eh_bases *); void __register_frame_info_bases(const void *, void *, void *, void *) __attribute__((__unavailable__)); void __register_frame_info(const void *, void *) __attribute__((__unavailable__)); void __register_frame_info_table_bases(const void *, void*, void *, void *) __attribute__((__unavailable__)); void __register_frame_info_table(const void *, void *) __attribute__((__unavailable__)); void __register_frame_table(const void *) __attribute__((__unavailable__)); void __deregister_frame_info(const void *) __attribute__((__unavailable__)); void __deregister_frame_info_bases(const void *)__attribute__((__unavailable__)); #else _Unwind_Ptr _Unwind_GetDataRelBase(struct _Unwind_Context *); _Unwind_Ptr _Unwind_GetTextRelBase(struct _Unwind_Context *); #endif #ifndef HIDE_EXPORTS #pragma GCC visibility pop #endif #ifdef __cplusplus } #endif #endif #endif /* __CLANG_UNWIND_H */ /*===---- varargs.h - Variable argument handling -------------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __VARARGS_H #define __VARARGS_H #error "Please use instead of " #endif #define _vel_vld_vssl __builtin_ve_vl_vld_vssl #define _vel_vld_vssvl __builtin_ve_vl_vld_vssvl #define _vel_vldnc_vssl __builtin_ve_vl_vldnc_vssl #define _vel_vldnc_vssvl __builtin_ve_vl_vldnc_vssvl #define _vel_vldu_vssl __builtin_ve_vl_vldu_vssl #define _vel_vldu_vssvl __builtin_ve_vl_vldu_vssvl #define _vel_vldunc_vssl __builtin_ve_vl_vldunc_vssl #define _vel_vldunc_vssvl __builtin_ve_vl_vldunc_vssvl #define _vel_vldlsx_vssl __builtin_ve_vl_vldlsx_vssl #define _vel_vldlsx_vssvl __builtin_ve_vl_vldlsx_vssvl #define _vel_vldlsxnc_vssl __builtin_ve_vl_vldlsxnc_vssl #define _vel_vldlsxnc_vssvl __builtin_ve_vl_vldlsxnc_vssvl #define _vel_vldlzx_vssl __builtin_ve_vl_vldlzx_vssl #define _vel_vldlzx_vssvl __builtin_ve_vl_vldlzx_vssvl #define _vel_vldlzxnc_vssl __builtin_ve_vl_vldlzxnc_vssl #define _vel_vldlzxnc_vssvl __builtin_ve_vl_vldlzxnc_vssvl #define _vel_vld2d_vssl __builtin_ve_vl_vld2d_vssl #define _vel_vld2d_vssvl __builtin_ve_vl_vld2d_vssvl #define _vel_vld2dnc_vssl __builtin_ve_vl_vld2dnc_vssl #define _vel_vld2dnc_vssvl __builtin_ve_vl_vld2dnc_vssvl #define _vel_vldu2d_vssl __builtin_ve_vl_vldu2d_vssl #define _vel_vldu2d_vssvl __builtin_ve_vl_vldu2d_vssvl #define _vel_vldu2dnc_vssl __builtin_ve_vl_vldu2dnc_vssl #define _vel_vldu2dnc_vssvl __builtin_ve_vl_vldu2dnc_vssvl #define _vel_vldl2dsx_vssl __builtin_ve_vl_vldl2dsx_vssl #define _vel_vldl2dsx_vssvl __builtin_ve_vl_vldl2dsx_vssvl #define _vel_vldl2dsxnc_vssl __builtin_ve_vl_vldl2dsxnc_vssl #define _vel_vldl2dsxnc_vssvl __builtin_ve_vl_vldl2dsxnc_vssvl #define _vel_vldl2dzx_vssl __builtin_ve_vl_vldl2dzx_vssl #define _vel_vldl2dzx_vssvl __builtin_ve_vl_vldl2dzx_vssvl #define _vel_vldl2dzxnc_vssl __builtin_ve_vl_vldl2dzxnc_vssl #define _vel_vldl2dzxnc_vssvl __builtin_ve_vl_vldl2dzxnc_vssvl #define _vel_vst_vssl __builtin_ve_vl_vst_vssl #define _vel_vst_vssml __builtin_ve_vl_vst_vssml #define _vel_vstnc_vssl __builtin_ve_vl_vstnc_vssl #define _vel_vstnc_vssml __builtin_ve_vl_vstnc_vssml #define _vel_vstot_vssl __builtin_ve_vl_vstot_vssl #define _vel_vstot_vssml __builtin_ve_vl_vstot_vssml #define _vel_vstncot_vssl __builtin_ve_vl_vstncot_vssl #define _vel_vstncot_vssml __builtin_ve_vl_vstncot_vssml #define _vel_vstu_vssl __builtin_ve_vl_vstu_vssl #define _vel_vstu_vssml __builtin_ve_vl_vstu_vssml #define _vel_vstunc_vssl __builtin_ve_vl_vstunc_vssl #define _vel_vstunc_vssml __builtin_ve_vl_vstunc_vssml #define _vel_vstuot_vssl __builtin_ve_vl_vstuot_vssl #define _vel_vstuot_vssml __builtin_ve_vl_vstuot_vssml #define _vel_vstuncot_vssl __builtin_ve_vl_vstuncot_vssl #define _vel_vstuncot_vssml __builtin_ve_vl_vstuncot_vssml #define _vel_vstl_vssl __builtin_ve_vl_vstl_vssl #define _vel_vstl_vssml __builtin_ve_vl_vstl_vssml #define _vel_vstlnc_vssl __builtin_ve_vl_vstlnc_vssl #define _vel_vstlnc_vssml __builtin_ve_vl_vstlnc_vssml #define _vel_vstlot_vssl __builtin_ve_vl_vstlot_vssl #define _vel_vstlot_vssml __builtin_ve_vl_vstlot_vssml #define _vel_vstlncot_vssl __builtin_ve_vl_vstlncot_vssl #define _vel_vstlncot_vssml __builtin_ve_vl_vstlncot_vssml #define _vel_vst2d_vssl __builtin_ve_vl_vst2d_vssl #define _vel_vst2d_vssml __builtin_ve_vl_vst2d_vssml #define _vel_vst2dnc_vssl __builtin_ve_vl_vst2dnc_vssl #define _vel_vst2dnc_vssml __builtin_ve_vl_vst2dnc_vssml #define _vel_vst2dot_vssl __builtin_ve_vl_vst2dot_vssl #define _vel_vst2dot_vssml __builtin_ve_vl_vst2dot_vssml #define _vel_vst2dncot_vssl __builtin_ve_vl_vst2dncot_vssl #define _vel_vst2dncot_vssml __builtin_ve_vl_vst2dncot_vssml #define _vel_vstu2d_vssl __builtin_ve_vl_vstu2d_vssl #define _vel_vstu2d_vssml __builtin_ve_vl_vstu2d_vssml #define _vel_vstu2dnc_vssl __builtin_ve_vl_vstu2dnc_vssl #define _vel_vstu2dnc_vssml __builtin_ve_vl_vstu2dnc_vssml #define _vel_vstu2dot_vssl __builtin_ve_vl_vstu2dot_vssl #define _vel_vstu2dot_vssml __builtin_ve_vl_vstu2dot_vssml #define _vel_vstu2dncot_vssl __builtin_ve_vl_vstu2dncot_vssl #define _vel_vstu2dncot_vssml __builtin_ve_vl_vstu2dncot_vssml #define _vel_vstl2d_vssl __builtin_ve_vl_vstl2d_vssl #define _vel_vstl2d_vssml __builtin_ve_vl_vstl2d_vssml #define _vel_vstl2dnc_vssl __builtin_ve_vl_vstl2dnc_vssl #define _vel_vstl2dnc_vssml __builtin_ve_vl_vstl2dnc_vssml #define _vel_vstl2dot_vssl __builtin_ve_vl_vstl2dot_vssl #define _vel_vstl2dot_vssml __builtin_ve_vl_vstl2dot_vssml #define _vel_vstl2dncot_vssl __builtin_ve_vl_vstl2dncot_vssl #define _vel_vstl2dncot_vssml __builtin_ve_vl_vstl2dncot_vssml #define _vel_pfchv_ssl __builtin_ve_vl_pfchv_ssl #define _vel_pfchvnc_ssl __builtin_ve_vl_pfchvnc_ssl #define _vel_lsv_vvss __builtin_ve_vl_lsv_vvss #define _vel_lvsl_svs __builtin_ve_vl_lvsl_svs #define _vel_lvsd_svs __builtin_ve_vl_lvsd_svs #define _vel_lvss_svs __builtin_ve_vl_lvss_svs #define _vel_lvm_mmss __builtin_ve_vl_lvm_mmss #define _vel_lvm_MMss __builtin_ve_vl_lvm_MMss #define _vel_svm_sms __builtin_ve_vl_svm_sms #define _vel_svm_sMs __builtin_ve_vl_svm_sMs #define _vel_vbrdd_vsl __builtin_ve_vl_vbrdd_vsl #define _vel_vbrdd_vsvl __builtin_ve_vl_vbrdd_vsvl #define _vel_vbrdd_vsmvl __builtin_ve_vl_vbrdd_vsmvl #define _vel_vbrdl_vsl __builtin_ve_vl_vbrdl_vsl #define _vel_vbrdl_vsvl __builtin_ve_vl_vbrdl_vsvl #define _vel_vbrdl_vsmvl __builtin_ve_vl_vbrdl_vsmvl #define _vel_vbrds_vsl __builtin_ve_vl_vbrds_vsl #define _vel_vbrds_vsvl __builtin_ve_vl_vbrds_vsvl #define _vel_vbrds_vsmvl __builtin_ve_vl_vbrds_vsmvl #define _vel_vbrdw_vsl __builtin_ve_vl_vbrdw_vsl #define _vel_vbrdw_vsvl __builtin_ve_vl_vbrdw_vsvl #define _vel_vbrdw_vsmvl __builtin_ve_vl_vbrdw_vsmvl #define _vel_pvbrd_vsl __builtin_ve_vl_pvbrd_vsl #define _vel_pvbrd_vsvl __builtin_ve_vl_pvbrd_vsvl #define _vel_pvbrd_vsMvl __builtin_ve_vl_pvbrd_vsMvl #define _vel_vmv_vsvl __builtin_ve_vl_vmv_vsvl #define _vel_vmv_vsvvl __builtin_ve_vl_vmv_vsvvl #define _vel_vmv_vsvmvl __builtin_ve_vl_vmv_vsvmvl #define _vel_vaddul_vvvl __builtin_ve_vl_vaddul_vvvl #define _vel_vaddul_vvvvl __builtin_ve_vl_vaddul_vvvvl #define _vel_vaddul_vsvl __builtin_ve_vl_vaddul_vsvl #define _vel_vaddul_vsvvl __builtin_ve_vl_vaddul_vsvvl #define _vel_vaddul_vvvmvl __builtin_ve_vl_vaddul_vvvmvl #define _vel_vaddul_vsvmvl __builtin_ve_vl_vaddul_vsvmvl #define _vel_vadduw_vvvl __builtin_ve_vl_vadduw_vvvl #define _vel_vadduw_vvvvl __builtin_ve_vl_vadduw_vvvvl #define _vel_vadduw_vsvl __builtin_ve_vl_vadduw_vsvl #define _vel_vadduw_vsvvl __builtin_ve_vl_vadduw_vsvvl #define _vel_vadduw_vvvmvl __builtin_ve_vl_vadduw_vvvmvl #define _vel_vadduw_vsvmvl __builtin_ve_vl_vadduw_vsvmvl #define _vel_pvaddu_vvvl __builtin_ve_vl_pvaddu_vvvl #define _vel_pvaddu_vvvvl __builtin_ve_vl_pvaddu_vvvvl #define _vel_pvaddu_vsvl __builtin_ve_vl_pvaddu_vsvl #define _vel_pvaddu_vsvvl __builtin_ve_vl_pvaddu_vsvvl #define _vel_pvaddu_vvvMvl __builtin_ve_vl_pvaddu_vvvMvl #define _vel_pvaddu_vsvMvl __builtin_ve_vl_pvaddu_vsvMvl #define _vel_vaddswsx_vvvl __builtin_ve_vl_vaddswsx_vvvl #define _vel_vaddswsx_vvvvl __builtin_ve_vl_vaddswsx_vvvvl #define _vel_vaddswsx_vsvl __builtin_ve_vl_vaddswsx_vsvl #define _vel_vaddswsx_vsvvl __builtin_ve_vl_vaddswsx_vsvvl #define _vel_vaddswsx_vvvmvl __builtin_ve_vl_vaddswsx_vvvmvl #define _vel_vaddswsx_vsvmvl __builtin_ve_vl_vaddswsx_vsvmvl #define _vel_vaddswzx_vvvl __builtin_ve_vl_vaddswzx_vvvl #define _vel_vaddswzx_vvvvl __builtin_ve_vl_vaddswzx_vvvvl #define _vel_vaddswzx_vsvl __builtin_ve_vl_vaddswzx_vsvl #define _vel_vaddswzx_vsvvl __builtin_ve_vl_vaddswzx_vsvvl #define _vel_vaddswzx_vvvmvl __builtin_ve_vl_vaddswzx_vvvmvl #define _vel_vaddswzx_vsvmvl __builtin_ve_vl_vaddswzx_vsvmvl #define _vel_pvadds_vvvl __builtin_ve_vl_pvadds_vvvl #define _vel_pvadds_vvvvl __builtin_ve_vl_pvadds_vvvvl #define _vel_pvadds_vsvl __builtin_ve_vl_pvadds_vsvl #define _vel_pvadds_vsvvl __builtin_ve_vl_pvadds_vsvvl #define _vel_pvadds_vvvMvl __builtin_ve_vl_pvadds_vvvMvl #define _vel_pvadds_vsvMvl __builtin_ve_vl_pvadds_vsvMvl #define _vel_vaddsl_vvvl __builtin_ve_vl_vaddsl_vvvl #define _vel_vaddsl_vvvvl __builtin_ve_vl_vaddsl_vvvvl #define _vel_vaddsl_vsvl __builtin_ve_vl_vaddsl_vsvl #define _vel_vaddsl_vsvvl __builtin_ve_vl_vaddsl_vsvvl #define _vel_vaddsl_vvvmvl __builtin_ve_vl_vaddsl_vvvmvl #define _vel_vaddsl_vsvmvl __builtin_ve_vl_vaddsl_vsvmvl #define _vel_vsubul_vvvl __builtin_ve_vl_vsubul_vvvl #define _vel_vsubul_vvvvl __builtin_ve_vl_vsubul_vvvvl #define _vel_vsubul_vsvl __builtin_ve_vl_vsubul_vsvl #define _vel_vsubul_vsvvl __builtin_ve_vl_vsubul_vsvvl #define _vel_vsubul_vvvmvl __builtin_ve_vl_vsubul_vvvmvl #define _vel_vsubul_vsvmvl __builtin_ve_vl_vsubul_vsvmvl #define _vel_vsubuw_vvvl __builtin_ve_vl_vsubuw_vvvl #define _vel_vsubuw_vvvvl __builtin_ve_vl_vsubuw_vvvvl #define _vel_vsubuw_vsvl __builtin_ve_vl_vsubuw_vsvl #define _vel_vsubuw_vsvvl __builtin_ve_vl_vsubuw_vsvvl #define _vel_vsubuw_vvvmvl __builtin_ve_vl_vsubuw_vvvmvl #define _vel_vsubuw_vsvmvl __builtin_ve_vl_vsubuw_vsvmvl #define _vel_pvsubu_vvvl __builtin_ve_vl_pvsubu_vvvl #define _vel_pvsubu_vvvvl __builtin_ve_vl_pvsubu_vvvvl #define _vel_pvsubu_vsvl __builtin_ve_vl_pvsubu_vsvl #define _vel_pvsubu_vsvvl __builtin_ve_vl_pvsubu_vsvvl #define _vel_pvsubu_vvvMvl __builtin_ve_vl_pvsubu_vvvMvl #define _vel_pvsubu_vsvMvl __builtin_ve_vl_pvsubu_vsvMvl #define _vel_vsubswsx_vvvl __builtin_ve_vl_vsubswsx_vvvl #define _vel_vsubswsx_vvvvl __builtin_ve_vl_vsubswsx_vvvvl #define _vel_vsubswsx_vsvl __builtin_ve_vl_vsubswsx_vsvl #define _vel_vsubswsx_vsvvl __builtin_ve_vl_vsubswsx_vsvvl #define _vel_vsubswsx_vvvmvl __builtin_ve_vl_vsubswsx_vvvmvl #define _vel_vsubswsx_vsvmvl __builtin_ve_vl_vsubswsx_vsvmvl #define _vel_vsubswzx_vvvl __builtin_ve_vl_vsubswzx_vvvl #define _vel_vsubswzx_vvvvl __builtin_ve_vl_vsubswzx_vvvvl #define _vel_vsubswzx_vsvl __builtin_ve_vl_vsubswzx_vsvl #define _vel_vsubswzx_vsvvl __builtin_ve_vl_vsubswzx_vsvvl #define _vel_vsubswzx_vvvmvl __builtin_ve_vl_vsubswzx_vvvmvl #define _vel_vsubswzx_vsvmvl __builtin_ve_vl_vsubswzx_vsvmvl #define _vel_pvsubs_vvvl __builtin_ve_vl_pvsubs_vvvl #define _vel_pvsubs_vvvvl __builtin_ve_vl_pvsubs_vvvvl #define _vel_pvsubs_vsvl __builtin_ve_vl_pvsubs_vsvl #define _vel_pvsubs_vsvvl __builtin_ve_vl_pvsubs_vsvvl #define _vel_pvsubs_vvvMvl __builtin_ve_vl_pvsubs_vvvMvl #define _vel_pvsubs_vsvMvl __builtin_ve_vl_pvsubs_vsvMvl #define _vel_vsubsl_vvvl __builtin_ve_vl_vsubsl_vvvl #define _vel_vsubsl_vvvvl __builtin_ve_vl_vsubsl_vvvvl #define _vel_vsubsl_vsvl __builtin_ve_vl_vsubsl_vsvl #define _vel_vsubsl_vsvvl __builtin_ve_vl_vsubsl_vsvvl #define _vel_vsubsl_vvvmvl __builtin_ve_vl_vsubsl_vvvmvl #define _vel_vsubsl_vsvmvl __builtin_ve_vl_vsubsl_vsvmvl #define _vel_vmulul_vvvl __builtin_ve_vl_vmulul_vvvl #define _vel_vmulul_vvvvl __builtin_ve_vl_vmulul_vvvvl #define _vel_vmulul_vsvl __builtin_ve_vl_vmulul_vsvl #define _vel_vmulul_vsvvl __builtin_ve_vl_vmulul_vsvvl #define _vel_vmulul_vvvmvl __builtin_ve_vl_vmulul_vvvmvl #define _vel_vmulul_vsvmvl __builtin_ve_vl_vmulul_vsvmvl #define _vel_vmuluw_vvvl __builtin_ve_vl_vmuluw_vvvl #define _vel_vmuluw_vvvvl __builtin_ve_vl_vmuluw_vvvvl #define _vel_vmuluw_vsvl __builtin_ve_vl_vmuluw_vsvl #define _vel_vmuluw_vsvvl __builtin_ve_vl_vmuluw_vsvvl #define _vel_vmuluw_vvvmvl __builtin_ve_vl_vmuluw_vvvmvl #define _vel_vmuluw_vsvmvl __builtin_ve_vl_vmuluw_vsvmvl #define _vel_vmulswsx_vvvl __builtin_ve_vl_vmulswsx_vvvl #define _vel_vmulswsx_vvvvl __builtin_ve_vl_vmulswsx_vvvvl #define _vel_vmulswsx_vsvl __builtin_ve_vl_vmulswsx_vsvl #define _vel_vmulswsx_vsvvl __builtin_ve_vl_vmulswsx_vsvvl #define _vel_vmulswsx_vvvmvl __builtin_ve_vl_vmulswsx_vvvmvl #define _vel_vmulswsx_vsvmvl __builtin_ve_vl_vmulswsx_vsvmvl #define _vel_vmulswzx_vvvl __builtin_ve_vl_vmulswzx_vvvl #define _vel_vmulswzx_vvvvl __builtin_ve_vl_vmulswzx_vvvvl #define _vel_vmulswzx_vsvl __builtin_ve_vl_vmulswzx_vsvl #define _vel_vmulswzx_vsvvl __builtin_ve_vl_vmulswzx_vsvvl #define _vel_vmulswzx_vvvmvl __builtin_ve_vl_vmulswzx_vvvmvl #define _vel_vmulswzx_vsvmvl __builtin_ve_vl_vmulswzx_vsvmvl #define _vel_vmulsl_vvvl __builtin_ve_vl_vmulsl_vvvl #define _vel_vmulsl_vvvvl __builtin_ve_vl_vmulsl_vvvvl #define _vel_vmulsl_vsvl __builtin_ve_vl_vmulsl_vsvl #define _vel_vmulsl_vsvvl __builtin_ve_vl_vmulsl_vsvvl #define _vel_vmulsl_vvvmvl __builtin_ve_vl_vmulsl_vvvmvl #define _vel_vmulsl_vsvmvl __builtin_ve_vl_vmulsl_vsvmvl #define _vel_vmulslw_vvvl __builtin_ve_vl_vmulslw_vvvl #define _vel_vmulslw_vvvvl __builtin_ve_vl_vmulslw_vvvvl #define _vel_vmulslw_vsvl __builtin_ve_vl_vmulslw_vsvl #define _vel_vmulslw_vsvvl __builtin_ve_vl_vmulslw_vsvvl #define _vel_vdivul_vvvl __builtin_ve_vl_vdivul_vvvl #define _vel_vdivul_vvvvl __builtin_ve_vl_vdivul_vvvvl #define _vel_vdivul_vsvl __builtin_ve_vl_vdivul_vsvl #define _vel_vdivul_vsvvl __builtin_ve_vl_vdivul_vsvvl #define _vel_vdivul_vvvmvl __builtin_ve_vl_vdivul_vvvmvl #define _vel_vdivul_vsvmvl __builtin_ve_vl_vdivul_vsvmvl #define _vel_vdivuw_vvvl __builtin_ve_vl_vdivuw_vvvl #define _vel_vdivuw_vvvvl __builtin_ve_vl_vdivuw_vvvvl #define _vel_vdivuw_vsvl __builtin_ve_vl_vdivuw_vsvl #define _vel_vdivuw_vsvvl __builtin_ve_vl_vdivuw_vsvvl #define _vel_vdivuw_vvvmvl __builtin_ve_vl_vdivuw_vvvmvl #define _vel_vdivuw_vsvmvl __builtin_ve_vl_vdivuw_vsvmvl #define _vel_vdivul_vvsl __builtin_ve_vl_vdivul_vvsl #define _vel_vdivul_vvsvl __builtin_ve_vl_vdivul_vvsvl #define _vel_vdivul_vvsmvl __builtin_ve_vl_vdivul_vvsmvl #define _vel_vdivuw_vvsl __builtin_ve_vl_vdivuw_vvsl #define _vel_vdivuw_vvsvl __builtin_ve_vl_vdivuw_vvsvl #define _vel_vdivuw_vvsmvl __builtin_ve_vl_vdivuw_vvsmvl #define _vel_vdivswsx_vvvl __builtin_ve_vl_vdivswsx_vvvl #define _vel_vdivswsx_vvvvl __builtin_ve_vl_vdivswsx_vvvvl #define _vel_vdivswsx_vsvl __builtin_ve_vl_vdivswsx_vsvl #define _vel_vdivswsx_vsvvl __builtin_ve_vl_vdivswsx_vsvvl #define _vel_vdivswsx_vvvmvl __builtin_ve_vl_vdivswsx_vvvmvl #define _vel_vdivswsx_vsvmvl __builtin_ve_vl_vdivswsx_vsvmvl #define _vel_vdivswzx_vvvl __builtin_ve_vl_vdivswzx_vvvl #define _vel_vdivswzx_vvvvl __builtin_ve_vl_vdivswzx_vvvvl #define _vel_vdivswzx_vsvl __builtin_ve_vl_vdivswzx_vsvl #define _vel_vdivswzx_vsvvl __builtin_ve_vl_vdivswzx_vsvvl #define _vel_vdivswzx_vvvmvl __builtin_ve_vl_vdivswzx_vvvmvl #define _vel_vdivswzx_vsvmvl __builtin_ve_vl_vdivswzx_vsvmvl #define _vel_vdivswsx_vvsl __builtin_ve_vl_vdivswsx_vvsl #define _vel_vdivswsx_vvsvl __builtin_ve_vl_vdivswsx_vvsvl #define _vel_vdivswsx_vvsmvl __builtin_ve_vl_vdivswsx_vvsmvl #define _vel_vdivswzx_vvsl __builtin_ve_vl_vdivswzx_vvsl #define _vel_vdivswzx_vvsvl __builtin_ve_vl_vdivswzx_vvsvl #define _vel_vdivswzx_vvsmvl __builtin_ve_vl_vdivswzx_vvsmvl #define _vel_vdivsl_vvvl __builtin_ve_vl_vdivsl_vvvl #define _vel_vdivsl_vvvvl __builtin_ve_vl_vdivsl_vvvvl #define _vel_vdivsl_vsvl __builtin_ve_vl_vdivsl_vsvl #define _vel_vdivsl_vsvvl __builtin_ve_vl_vdivsl_vsvvl #define _vel_vdivsl_vvvmvl __builtin_ve_vl_vdivsl_vvvmvl #define _vel_vdivsl_vsvmvl __builtin_ve_vl_vdivsl_vsvmvl #define _vel_vdivsl_vvsl __builtin_ve_vl_vdivsl_vvsl #define _vel_vdivsl_vvsvl __builtin_ve_vl_vdivsl_vvsvl #define _vel_vdivsl_vvsmvl __builtin_ve_vl_vdivsl_vvsmvl #define _vel_vcmpul_vvvl __builtin_ve_vl_vcmpul_vvvl #define _vel_vcmpul_vvvvl __builtin_ve_vl_vcmpul_vvvvl #define _vel_vcmpul_vsvl __builtin_ve_vl_vcmpul_vsvl #define _vel_vcmpul_vsvvl __builtin_ve_vl_vcmpul_vsvvl #define _vel_vcmpul_vvvmvl __builtin_ve_vl_vcmpul_vvvmvl #define _vel_vcmpul_vsvmvl __builtin_ve_vl_vcmpul_vsvmvl #define _vel_vcmpuw_vvvl __builtin_ve_vl_vcmpuw_vvvl #define _vel_vcmpuw_vvvvl __builtin_ve_vl_vcmpuw_vvvvl #define _vel_vcmpuw_vsvl __builtin_ve_vl_vcmpuw_vsvl #define _vel_vcmpuw_vsvvl __builtin_ve_vl_vcmpuw_vsvvl #define _vel_vcmpuw_vvvmvl __builtin_ve_vl_vcmpuw_vvvmvl #define _vel_vcmpuw_vsvmvl __builtin_ve_vl_vcmpuw_vsvmvl #define _vel_pvcmpu_vvvl __builtin_ve_vl_pvcmpu_vvvl #define _vel_pvcmpu_vvvvl __builtin_ve_vl_pvcmpu_vvvvl #define _vel_pvcmpu_vsvl __builtin_ve_vl_pvcmpu_vsvl #define _vel_pvcmpu_vsvvl __builtin_ve_vl_pvcmpu_vsvvl #define _vel_pvcmpu_vvvMvl __builtin_ve_vl_pvcmpu_vvvMvl #define _vel_pvcmpu_vsvMvl __builtin_ve_vl_pvcmpu_vsvMvl #define _vel_vcmpswsx_vvvl __builtin_ve_vl_vcmpswsx_vvvl #define _vel_vcmpswsx_vvvvl __builtin_ve_vl_vcmpswsx_vvvvl #define _vel_vcmpswsx_vsvl __builtin_ve_vl_vcmpswsx_vsvl #define _vel_vcmpswsx_vsvvl __builtin_ve_vl_vcmpswsx_vsvvl #define _vel_vcmpswsx_vvvmvl __builtin_ve_vl_vcmpswsx_vvvmvl #define _vel_vcmpswsx_vsvmvl __builtin_ve_vl_vcmpswsx_vsvmvl #define _vel_vcmpswzx_vvvl __builtin_ve_vl_vcmpswzx_vvvl #define _vel_vcmpswzx_vvvvl __builtin_ve_vl_vcmpswzx_vvvvl #define _vel_vcmpswzx_vsvl __builtin_ve_vl_vcmpswzx_vsvl #define _vel_vcmpswzx_vsvvl __builtin_ve_vl_vcmpswzx_vsvvl #define _vel_vcmpswzx_vvvmvl __builtin_ve_vl_vcmpswzx_vvvmvl #define _vel_vcmpswzx_vsvmvl __builtin_ve_vl_vcmpswzx_vsvmvl #define _vel_pvcmps_vvvl __builtin_ve_vl_pvcmps_vvvl #define _vel_pvcmps_vvvvl __builtin_ve_vl_pvcmps_vvvvl #define _vel_pvcmps_vsvl __builtin_ve_vl_pvcmps_vsvl #define _vel_pvcmps_vsvvl __builtin_ve_vl_pvcmps_vsvvl #define _vel_pvcmps_vvvMvl __builtin_ve_vl_pvcmps_vvvMvl #define _vel_pvcmps_vsvMvl __builtin_ve_vl_pvcmps_vsvMvl #define _vel_vcmpsl_vvvl __builtin_ve_vl_vcmpsl_vvvl #define _vel_vcmpsl_vvvvl __builtin_ve_vl_vcmpsl_vvvvl #define _vel_vcmpsl_vsvl __builtin_ve_vl_vcmpsl_vsvl #define _vel_vcmpsl_vsvvl __builtin_ve_vl_vcmpsl_vsvvl #define _vel_vcmpsl_vvvmvl __builtin_ve_vl_vcmpsl_vvvmvl #define _vel_vcmpsl_vsvmvl __builtin_ve_vl_vcmpsl_vsvmvl #define _vel_vmaxswsx_vvvl __builtin_ve_vl_vmaxswsx_vvvl #define _vel_vmaxswsx_vvvvl __builtin_ve_vl_vmaxswsx_vvvvl #define _vel_vmaxswsx_vsvl __builtin_ve_vl_vmaxswsx_vsvl #define _vel_vmaxswsx_vsvvl __builtin_ve_vl_vmaxswsx_vsvvl #define _vel_vmaxswsx_vvvmvl __builtin_ve_vl_vmaxswsx_vvvmvl #define _vel_vmaxswsx_vsvmvl __builtin_ve_vl_vmaxswsx_vsvmvl #define _vel_vmaxswzx_vvvl __builtin_ve_vl_vmaxswzx_vvvl #define _vel_vmaxswzx_vvvvl __builtin_ve_vl_vmaxswzx_vvvvl #define _vel_vmaxswzx_vsvl __builtin_ve_vl_vmaxswzx_vsvl #define _vel_vmaxswzx_vsvvl __builtin_ve_vl_vmaxswzx_vsvvl #define _vel_vmaxswzx_vvvmvl __builtin_ve_vl_vmaxswzx_vvvmvl #define _vel_vmaxswzx_vsvmvl __builtin_ve_vl_vmaxswzx_vsvmvl #define _vel_pvmaxs_vvvl __builtin_ve_vl_pvmaxs_vvvl #define _vel_pvmaxs_vvvvl __builtin_ve_vl_pvmaxs_vvvvl #define _vel_pvmaxs_vsvl __builtin_ve_vl_pvmaxs_vsvl #define _vel_pvmaxs_vsvvl __builtin_ve_vl_pvmaxs_vsvvl #define _vel_pvmaxs_vvvMvl __builtin_ve_vl_pvmaxs_vvvMvl #define _vel_pvmaxs_vsvMvl __builtin_ve_vl_pvmaxs_vsvMvl #define _vel_vminswsx_vvvl __builtin_ve_vl_vminswsx_vvvl #define _vel_vminswsx_vvvvl __builtin_ve_vl_vminswsx_vvvvl #define _vel_vminswsx_vsvl __builtin_ve_vl_vminswsx_vsvl #define _vel_vminswsx_vsvvl __builtin_ve_vl_vminswsx_vsvvl #define _vel_vminswsx_vvvmvl __builtin_ve_vl_vminswsx_vvvmvl #define _vel_vminswsx_vsvmvl __builtin_ve_vl_vminswsx_vsvmvl #define _vel_vminswzx_vvvl __builtin_ve_vl_vminswzx_vvvl #define _vel_vminswzx_vvvvl __builtin_ve_vl_vminswzx_vvvvl #define _vel_vminswzx_vsvl __builtin_ve_vl_vminswzx_vsvl #define _vel_vminswzx_vsvvl __builtin_ve_vl_vminswzx_vsvvl #define _vel_vminswzx_vvvmvl __builtin_ve_vl_vminswzx_vvvmvl #define _vel_vminswzx_vsvmvl __builtin_ve_vl_vminswzx_vsvmvl #define _vel_pvmins_vvvl __builtin_ve_vl_pvmins_vvvl #define _vel_pvmins_vvvvl __builtin_ve_vl_pvmins_vvvvl #define _vel_pvmins_vsvl __builtin_ve_vl_pvmins_vsvl #define _vel_pvmins_vsvvl __builtin_ve_vl_pvmins_vsvvl #define _vel_pvmins_vvvMvl __builtin_ve_vl_pvmins_vvvMvl #define _vel_pvmins_vsvMvl __builtin_ve_vl_pvmins_vsvMvl #define _vel_vmaxsl_vvvl __builtin_ve_vl_vmaxsl_vvvl #define _vel_vmaxsl_vvvvl __builtin_ve_vl_vmaxsl_vvvvl #define _vel_vmaxsl_vsvl __builtin_ve_vl_vmaxsl_vsvl #define _vel_vmaxsl_vsvvl __builtin_ve_vl_vmaxsl_vsvvl #define _vel_vmaxsl_vvvmvl __builtin_ve_vl_vmaxsl_vvvmvl #define _vel_vmaxsl_vsvmvl __builtin_ve_vl_vmaxsl_vsvmvl #define _vel_vminsl_vvvl __builtin_ve_vl_vminsl_vvvl #define _vel_vminsl_vvvvl __builtin_ve_vl_vminsl_vvvvl #define _vel_vminsl_vsvl __builtin_ve_vl_vminsl_vsvl #define _vel_vminsl_vsvvl __builtin_ve_vl_vminsl_vsvvl #define _vel_vminsl_vvvmvl __builtin_ve_vl_vminsl_vvvmvl #define _vel_vminsl_vsvmvl __builtin_ve_vl_vminsl_vsvmvl #define _vel_vand_vvvl __builtin_ve_vl_vand_vvvl #define _vel_vand_vvvvl __builtin_ve_vl_vand_vvvvl #define _vel_vand_vsvl __builtin_ve_vl_vand_vsvl #define _vel_vand_vsvvl __builtin_ve_vl_vand_vsvvl #define _vel_vand_vvvmvl __builtin_ve_vl_vand_vvvmvl #define _vel_vand_vsvmvl __builtin_ve_vl_vand_vsvmvl #define _vel_pvand_vvvl __builtin_ve_vl_pvand_vvvl #define _vel_pvand_vvvvl __builtin_ve_vl_pvand_vvvvl #define _vel_pvand_vsvl __builtin_ve_vl_pvand_vsvl #define _vel_pvand_vsvvl __builtin_ve_vl_pvand_vsvvl #define _vel_pvand_vvvMvl __builtin_ve_vl_pvand_vvvMvl #define _vel_pvand_vsvMvl __builtin_ve_vl_pvand_vsvMvl #define _vel_vor_vvvl __builtin_ve_vl_vor_vvvl #define _vel_vor_vvvvl __builtin_ve_vl_vor_vvvvl #define _vel_vor_vsvl __builtin_ve_vl_vor_vsvl #define _vel_vor_vsvvl __builtin_ve_vl_vor_vsvvl #define _vel_vor_vvvmvl __builtin_ve_vl_vor_vvvmvl #define _vel_vor_vsvmvl __builtin_ve_vl_vor_vsvmvl #define _vel_pvor_vvvl __builtin_ve_vl_pvor_vvvl #define _vel_pvor_vvvvl __builtin_ve_vl_pvor_vvvvl #define _vel_pvor_vsvl __builtin_ve_vl_pvor_vsvl #define _vel_pvor_vsvvl __builtin_ve_vl_pvor_vsvvl #define _vel_pvor_vvvMvl __builtin_ve_vl_pvor_vvvMvl #define _vel_pvor_vsvMvl __builtin_ve_vl_pvor_vsvMvl #define _vel_vxor_vvvl __builtin_ve_vl_vxor_vvvl #define _vel_vxor_vvvvl __builtin_ve_vl_vxor_vvvvl #define _vel_vxor_vsvl __builtin_ve_vl_vxor_vsvl #define _vel_vxor_vsvvl __builtin_ve_vl_vxor_vsvvl #define _vel_vxor_vvvmvl __builtin_ve_vl_vxor_vvvmvl #define _vel_vxor_vsvmvl __builtin_ve_vl_vxor_vsvmvl #define _vel_pvxor_vvvl __builtin_ve_vl_pvxor_vvvl #define _vel_pvxor_vvvvl __builtin_ve_vl_pvxor_vvvvl #define _vel_pvxor_vsvl __builtin_ve_vl_pvxor_vsvl #define _vel_pvxor_vsvvl __builtin_ve_vl_pvxor_vsvvl #define _vel_pvxor_vvvMvl __builtin_ve_vl_pvxor_vvvMvl #define _vel_pvxor_vsvMvl __builtin_ve_vl_pvxor_vsvMvl #define _vel_veqv_vvvl __builtin_ve_vl_veqv_vvvl #define _vel_veqv_vvvvl __builtin_ve_vl_veqv_vvvvl #define _vel_veqv_vsvl __builtin_ve_vl_veqv_vsvl #define _vel_veqv_vsvvl __builtin_ve_vl_veqv_vsvvl #define _vel_veqv_vvvmvl __builtin_ve_vl_veqv_vvvmvl #define _vel_veqv_vsvmvl __builtin_ve_vl_veqv_vsvmvl #define _vel_pveqv_vvvl __builtin_ve_vl_pveqv_vvvl #define _vel_pveqv_vvvvl __builtin_ve_vl_pveqv_vvvvl #define _vel_pveqv_vsvl __builtin_ve_vl_pveqv_vsvl #define _vel_pveqv_vsvvl __builtin_ve_vl_pveqv_vsvvl #define _vel_pveqv_vvvMvl __builtin_ve_vl_pveqv_vvvMvl #define _vel_pveqv_vsvMvl __builtin_ve_vl_pveqv_vsvMvl #define _vel_vldz_vvl __builtin_ve_vl_vldz_vvl #define _vel_vldz_vvvl __builtin_ve_vl_vldz_vvvl #define _vel_vldz_vvmvl __builtin_ve_vl_vldz_vvmvl #define _vel_pvldzlo_vvl __builtin_ve_vl_pvldzlo_vvl #define _vel_pvldzlo_vvvl __builtin_ve_vl_pvldzlo_vvvl #define _vel_pvldzlo_vvmvl __builtin_ve_vl_pvldzlo_vvmvl #define _vel_pvldzup_vvl __builtin_ve_vl_pvldzup_vvl #define _vel_pvldzup_vvvl __builtin_ve_vl_pvldzup_vvvl #define _vel_pvldzup_vvmvl __builtin_ve_vl_pvldzup_vvmvl #define _vel_pvldz_vvl __builtin_ve_vl_pvldz_vvl #define _vel_pvldz_vvvl __builtin_ve_vl_pvldz_vvvl #define _vel_pvldz_vvMvl __builtin_ve_vl_pvldz_vvMvl #define _vel_vpcnt_vvl __builtin_ve_vl_vpcnt_vvl #define _vel_vpcnt_vvvl __builtin_ve_vl_vpcnt_vvvl #define _vel_vpcnt_vvmvl __builtin_ve_vl_vpcnt_vvmvl #define _vel_pvpcntlo_vvl __builtin_ve_vl_pvpcntlo_vvl #define _vel_pvpcntlo_vvvl __builtin_ve_vl_pvpcntlo_vvvl #define _vel_pvpcntlo_vvmvl __builtin_ve_vl_pvpcntlo_vvmvl #define _vel_pvpcntup_vvl __builtin_ve_vl_pvpcntup_vvl #define _vel_pvpcntup_vvvl __builtin_ve_vl_pvpcntup_vvvl #define _vel_pvpcntup_vvmvl __builtin_ve_vl_pvpcntup_vvmvl #define _vel_pvpcnt_vvl __builtin_ve_vl_pvpcnt_vvl #define _vel_pvpcnt_vvvl __builtin_ve_vl_pvpcnt_vvvl #define _vel_pvpcnt_vvMvl __builtin_ve_vl_pvpcnt_vvMvl #define _vel_vbrv_vvl __builtin_ve_vl_vbrv_vvl #define _vel_vbrv_vvvl __builtin_ve_vl_vbrv_vvvl #define _vel_vbrv_vvmvl __builtin_ve_vl_vbrv_vvmvl #define _vel_pvbrvlo_vvl __builtin_ve_vl_pvbrvlo_vvl #define _vel_pvbrvlo_vvvl __builtin_ve_vl_pvbrvlo_vvvl #define _vel_pvbrvlo_vvmvl __builtin_ve_vl_pvbrvlo_vvmvl #define _vel_pvbrvup_vvl __builtin_ve_vl_pvbrvup_vvl #define _vel_pvbrvup_vvvl __builtin_ve_vl_pvbrvup_vvvl #define _vel_pvbrvup_vvmvl __builtin_ve_vl_pvbrvup_vvmvl #define _vel_pvbrv_vvl __builtin_ve_vl_pvbrv_vvl #define _vel_pvbrv_vvvl __builtin_ve_vl_pvbrv_vvvl #define _vel_pvbrv_vvMvl __builtin_ve_vl_pvbrv_vvMvl #define _vel_vseq_vl __builtin_ve_vl_vseq_vl #define _vel_vseq_vvl __builtin_ve_vl_vseq_vvl #define _vel_pvseqlo_vl __builtin_ve_vl_pvseqlo_vl #define _vel_pvseqlo_vvl __builtin_ve_vl_pvseqlo_vvl #define _vel_pvsequp_vl __builtin_ve_vl_pvsequp_vl #define _vel_pvsequp_vvl __builtin_ve_vl_pvsequp_vvl #define _vel_pvseq_vl __builtin_ve_vl_pvseq_vl #define _vel_pvseq_vvl __builtin_ve_vl_pvseq_vvl #define _vel_vsll_vvvl __builtin_ve_vl_vsll_vvvl #define _vel_vsll_vvvvl __builtin_ve_vl_vsll_vvvvl #define _vel_vsll_vvsl __builtin_ve_vl_vsll_vvsl #define _vel_vsll_vvsvl __builtin_ve_vl_vsll_vvsvl #define _vel_vsll_vvvmvl __builtin_ve_vl_vsll_vvvmvl #define _vel_vsll_vvsmvl __builtin_ve_vl_vsll_vvsmvl #define _vel_pvsll_vvvl __builtin_ve_vl_pvsll_vvvl #define _vel_pvsll_vvvvl __builtin_ve_vl_pvsll_vvvvl #define _vel_pvsll_vvsl __builtin_ve_vl_pvsll_vvsl #define _vel_pvsll_vvsvl __builtin_ve_vl_pvsll_vvsvl #define _vel_pvsll_vvvMvl __builtin_ve_vl_pvsll_vvvMvl #define _vel_pvsll_vvsMvl __builtin_ve_vl_pvsll_vvsMvl #define _vel_vsrl_vvvl __builtin_ve_vl_vsrl_vvvl #define _vel_vsrl_vvvvl __builtin_ve_vl_vsrl_vvvvl #define _vel_vsrl_vvsl __builtin_ve_vl_vsrl_vvsl #define _vel_vsrl_vvsvl __builtin_ve_vl_vsrl_vvsvl #define _vel_vsrl_vvvmvl __builtin_ve_vl_vsrl_vvvmvl #define _vel_vsrl_vvsmvl __builtin_ve_vl_vsrl_vvsmvl #define _vel_pvsrl_vvvl __builtin_ve_vl_pvsrl_vvvl #define _vel_pvsrl_vvvvl __builtin_ve_vl_pvsrl_vvvvl #define _vel_pvsrl_vvsl __builtin_ve_vl_pvsrl_vvsl #define _vel_pvsrl_vvsvl __builtin_ve_vl_pvsrl_vvsvl #define _vel_pvsrl_vvvMvl __builtin_ve_vl_pvsrl_vvvMvl #define _vel_pvsrl_vvsMvl __builtin_ve_vl_pvsrl_vvsMvl #define _vel_vslawsx_vvvl __builtin_ve_vl_vslawsx_vvvl #define _vel_vslawsx_vvvvl __builtin_ve_vl_vslawsx_vvvvl #define _vel_vslawsx_vvsl __builtin_ve_vl_vslawsx_vvsl #define _vel_vslawsx_vvsvl __builtin_ve_vl_vslawsx_vvsvl #define _vel_vslawsx_vvvmvl __builtin_ve_vl_vslawsx_vvvmvl #define _vel_vslawsx_vvsmvl __builtin_ve_vl_vslawsx_vvsmvl #define _vel_vslawzx_vvvl __builtin_ve_vl_vslawzx_vvvl #define _vel_vslawzx_vvvvl __builtin_ve_vl_vslawzx_vvvvl #define _vel_vslawzx_vvsl __builtin_ve_vl_vslawzx_vvsl #define _vel_vslawzx_vvsvl __builtin_ve_vl_vslawzx_vvsvl #define _vel_vslawzx_vvvmvl __builtin_ve_vl_vslawzx_vvvmvl #define _vel_vslawzx_vvsmvl __builtin_ve_vl_vslawzx_vvsmvl #define _vel_pvsla_vvvl __builtin_ve_vl_pvsla_vvvl #define _vel_pvsla_vvvvl __builtin_ve_vl_pvsla_vvvvl #define _vel_pvsla_vvsl __builtin_ve_vl_pvsla_vvsl #define _vel_pvsla_vvsvl __builtin_ve_vl_pvsla_vvsvl #define _vel_pvsla_vvvMvl __builtin_ve_vl_pvsla_vvvMvl #define _vel_pvsla_vvsMvl __builtin_ve_vl_pvsla_vvsMvl #define _vel_vslal_vvvl __builtin_ve_vl_vslal_vvvl #define _vel_vslal_vvvvl __builtin_ve_vl_vslal_vvvvl #define _vel_vslal_vvsl __builtin_ve_vl_vslal_vvsl #define _vel_vslal_vvsvl __builtin_ve_vl_vslal_vvsvl #define _vel_vslal_vvvmvl __builtin_ve_vl_vslal_vvvmvl #define _vel_vslal_vvsmvl __builtin_ve_vl_vslal_vvsmvl #define _vel_vsrawsx_vvvl __builtin_ve_vl_vsrawsx_vvvl #define _vel_vsrawsx_vvvvl __builtin_ve_vl_vsrawsx_vvvvl #define _vel_vsrawsx_vvsl __builtin_ve_vl_vsrawsx_vvsl #define _vel_vsrawsx_vvsvl __builtin_ve_vl_vsrawsx_vvsvl #define _vel_vsrawsx_vvvmvl __builtin_ve_vl_vsrawsx_vvvmvl #define _vel_vsrawsx_vvsmvl __builtin_ve_vl_vsrawsx_vvsmvl #define _vel_vsrawzx_vvvl __builtin_ve_vl_vsrawzx_vvvl #define _vel_vsrawzx_vvvvl __builtin_ve_vl_vsrawzx_vvvvl #define _vel_vsrawzx_vvsl __builtin_ve_vl_vsrawzx_vvsl #define _vel_vsrawzx_vvsvl __builtin_ve_vl_vsrawzx_vvsvl #define _vel_vsrawzx_vvvmvl __builtin_ve_vl_vsrawzx_vvvmvl #define _vel_vsrawzx_vvsmvl __builtin_ve_vl_vsrawzx_vvsmvl #define _vel_pvsra_vvvl __builtin_ve_vl_pvsra_vvvl #define _vel_pvsra_vvvvl __builtin_ve_vl_pvsra_vvvvl #define _vel_pvsra_vvsl __builtin_ve_vl_pvsra_vvsl #define _vel_pvsra_vvsvl __builtin_ve_vl_pvsra_vvsvl #define _vel_pvsra_vvvMvl __builtin_ve_vl_pvsra_vvvMvl #define _vel_pvsra_vvsMvl __builtin_ve_vl_pvsra_vvsMvl #define _vel_vsral_vvvl __builtin_ve_vl_vsral_vvvl #define _vel_vsral_vvvvl __builtin_ve_vl_vsral_vvvvl #define _vel_vsral_vvsl __builtin_ve_vl_vsral_vvsl #define _vel_vsral_vvsvl __builtin_ve_vl_vsral_vvsvl #define _vel_vsral_vvvmvl __builtin_ve_vl_vsral_vvvmvl #define _vel_vsral_vvsmvl __builtin_ve_vl_vsral_vvsmvl #define _vel_vsfa_vvssl __builtin_ve_vl_vsfa_vvssl #define _vel_vsfa_vvssvl __builtin_ve_vl_vsfa_vvssvl #define _vel_vsfa_vvssmvl __builtin_ve_vl_vsfa_vvssmvl #define _vel_vfaddd_vvvl __builtin_ve_vl_vfaddd_vvvl #define _vel_vfaddd_vvvvl __builtin_ve_vl_vfaddd_vvvvl #define _vel_vfaddd_vsvl __builtin_ve_vl_vfaddd_vsvl #define _vel_vfaddd_vsvvl __builtin_ve_vl_vfaddd_vsvvl #define _vel_vfaddd_vvvmvl __builtin_ve_vl_vfaddd_vvvmvl #define _vel_vfaddd_vsvmvl __builtin_ve_vl_vfaddd_vsvmvl #define _vel_vfadds_vvvl __builtin_ve_vl_vfadds_vvvl #define _vel_vfadds_vvvvl __builtin_ve_vl_vfadds_vvvvl #define _vel_vfadds_vsvl __builtin_ve_vl_vfadds_vsvl #define _vel_vfadds_vsvvl __builtin_ve_vl_vfadds_vsvvl #define _vel_vfadds_vvvmvl __builtin_ve_vl_vfadds_vvvmvl #define _vel_vfadds_vsvmvl __builtin_ve_vl_vfadds_vsvmvl #define _vel_pvfadd_vvvl __builtin_ve_vl_pvfadd_vvvl #define _vel_pvfadd_vvvvl __builtin_ve_vl_pvfadd_vvvvl #define _vel_pvfadd_vsvl __builtin_ve_vl_pvfadd_vsvl #define _vel_pvfadd_vsvvl __builtin_ve_vl_pvfadd_vsvvl #define _vel_pvfadd_vvvMvl __builtin_ve_vl_pvfadd_vvvMvl #define _vel_pvfadd_vsvMvl __builtin_ve_vl_pvfadd_vsvMvl #define _vel_vfsubd_vvvl __builtin_ve_vl_vfsubd_vvvl #define _vel_vfsubd_vvvvl __builtin_ve_vl_vfsubd_vvvvl #define _vel_vfsubd_vsvl __builtin_ve_vl_vfsubd_vsvl #define _vel_vfsubd_vsvvl __builtin_ve_vl_vfsubd_vsvvl #define _vel_vfsubd_vvvmvl __builtin_ve_vl_vfsubd_vvvmvl #define _vel_vfsubd_vsvmvl __builtin_ve_vl_vfsubd_vsvmvl #define _vel_vfsubs_vvvl __builtin_ve_vl_vfsubs_vvvl #define _vel_vfsubs_vvvvl __builtin_ve_vl_vfsubs_vvvvl #define _vel_vfsubs_vsvl __builtin_ve_vl_vfsubs_vsvl #define _vel_vfsubs_vsvvl __builtin_ve_vl_vfsubs_vsvvl #define _vel_vfsubs_vvvmvl __builtin_ve_vl_vfsubs_vvvmvl #define _vel_vfsubs_vsvmvl __builtin_ve_vl_vfsubs_vsvmvl #define _vel_pvfsub_vvvl __builtin_ve_vl_pvfsub_vvvl #define _vel_pvfsub_vvvvl __builtin_ve_vl_pvfsub_vvvvl #define _vel_pvfsub_vsvl __builtin_ve_vl_pvfsub_vsvl #define _vel_pvfsub_vsvvl __builtin_ve_vl_pvfsub_vsvvl #define _vel_pvfsub_vvvMvl __builtin_ve_vl_pvfsub_vvvMvl #define _vel_pvfsub_vsvMvl __builtin_ve_vl_pvfsub_vsvMvl #define _vel_vfmuld_vvvl __builtin_ve_vl_vfmuld_vvvl #define _vel_vfmuld_vvvvl __builtin_ve_vl_vfmuld_vvvvl #define _vel_vfmuld_vsvl __builtin_ve_vl_vfmuld_vsvl #define _vel_vfmuld_vsvvl __builtin_ve_vl_vfmuld_vsvvl #define _vel_vfmuld_vvvmvl __builtin_ve_vl_vfmuld_vvvmvl #define _vel_vfmuld_vsvmvl __builtin_ve_vl_vfmuld_vsvmvl #define _vel_vfmuls_vvvl __builtin_ve_vl_vfmuls_vvvl #define _vel_vfmuls_vvvvl __builtin_ve_vl_vfmuls_vvvvl #define _vel_vfmuls_vsvl __builtin_ve_vl_vfmuls_vsvl #define _vel_vfmuls_vsvvl __builtin_ve_vl_vfmuls_vsvvl #define _vel_vfmuls_vvvmvl __builtin_ve_vl_vfmuls_vvvmvl #define _vel_vfmuls_vsvmvl __builtin_ve_vl_vfmuls_vsvmvl #define _vel_pvfmul_vvvl __builtin_ve_vl_pvfmul_vvvl #define _vel_pvfmul_vvvvl __builtin_ve_vl_pvfmul_vvvvl #define _vel_pvfmul_vsvl __builtin_ve_vl_pvfmul_vsvl #define _vel_pvfmul_vsvvl __builtin_ve_vl_pvfmul_vsvvl #define _vel_pvfmul_vvvMvl __builtin_ve_vl_pvfmul_vvvMvl #define _vel_pvfmul_vsvMvl __builtin_ve_vl_pvfmul_vsvMvl #define _vel_vfdivd_vvvl __builtin_ve_vl_vfdivd_vvvl #define _vel_vfdivd_vvvvl __builtin_ve_vl_vfdivd_vvvvl #define _vel_vfdivd_vsvl __builtin_ve_vl_vfdivd_vsvl #define _vel_vfdivd_vsvvl __builtin_ve_vl_vfdivd_vsvvl #define _vel_vfdivd_vvvmvl __builtin_ve_vl_vfdivd_vvvmvl #define _vel_vfdivd_vsvmvl __builtin_ve_vl_vfdivd_vsvmvl #define _vel_vfdivs_vvvl __builtin_ve_vl_vfdivs_vvvl #define _vel_vfdivs_vvvvl __builtin_ve_vl_vfdivs_vvvvl #define _vel_vfdivs_vsvl __builtin_ve_vl_vfdivs_vsvl #define _vel_vfdivs_vsvvl __builtin_ve_vl_vfdivs_vsvvl #define _vel_vfdivs_vvvmvl __builtin_ve_vl_vfdivs_vvvmvl #define _vel_vfdivs_vsvmvl __builtin_ve_vl_vfdivs_vsvmvl #define _vel_vfsqrtd_vvl __builtin_ve_vl_vfsqrtd_vvl #define _vel_vfsqrtd_vvvl __builtin_ve_vl_vfsqrtd_vvvl #define _vel_vfsqrts_vvl __builtin_ve_vl_vfsqrts_vvl #define _vel_vfsqrts_vvvl __builtin_ve_vl_vfsqrts_vvvl #define _vel_vfcmpd_vvvl __builtin_ve_vl_vfcmpd_vvvl #define _vel_vfcmpd_vvvvl __builtin_ve_vl_vfcmpd_vvvvl #define _vel_vfcmpd_vsvl __builtin_ve_vl_vfcmpd_vsvl #define _vel_vfcmpd_vsvvl __builtin_ve_vl_vfcmpd_vsvvl #define _vel_vfcmpd_vvvmvl __builtin_ve_vl_vfcmpd_vvvmvl #define _vel_vfcmpd_vsvmvl __builtin_ve_vl_vfcmpd_vsvmvl #define _vel_vfcmps_vvvl __builtin_ve_vl_vfcmps_vvvl #define _vel_vfcmps_vvvvl __builtin_ve_vl_vfcmps_vvvvl #define _vel_vfcmps_vsvl __builtin_ve_vl_vfcmps_vsvl #define _vel_vfcmps_vsvvl __builtin_ve_vl_vfcmps_vsvvl #define _vel_vfcmps_vvvmvl __builtin_ve_vl_vfcmps_vvvmvl #define _vel_vfcmps_vsvmvl __builtin_ve_vl_vfcmps_vsvmvl #define _vel_pvfcmp_vvvl __builtin_ve_vl_pvfcmp_vvvl #define _vel_pvfcmp_vvvvl __builtin_ve_vl_pvfcmp_vvvvl #define _vel_pvfcmp_vsvl __builtin_ve_vl_pvfcmp_vsvl #define _vel_pvfcmp_vsvvl __builtin_ve_vl_pvfcmp_vsvvl #define _vel_pvfcmp_vvvMvl __builtin_ve_vl_pvfcmp_vvvMvl #define _vel_pvfcmp_vsvMvl __builtin_ve_vl_pvfcmp_vsvMvl #define _vel_vfmaxd_vvvl __builtin_ve_vl_vfmaxd_vvvl #define _vel_vfmaxd_vvvvl __builtin_ve_vl_vfmaxd_vvvvl #define _vel_vfmaxd_vsvl __builtin_ve_vl_vfmaxd_vsvl #define _vel_vfmaxd_vsvvl __builtin_ve_vl_vfmaxd_vsvvl #define _vel_vfmaxd_vvvmvl __builtin_ve_vl_vfmaxd_vvvmvl #define _vel_vfmaxd_vsvmvl __builtin_ve_vl_vfmaxd_vsvmvl #define _vel_vfmaxs_vvvl __builtin_ve_vl_vfmaxs_vvvl #define _vel_vfmaxs_vvvvl __builtin_ve_vl_vfmaxs_vvvvl #define _vel_vfmaxs_vsvl __builtin_ve_vl_vfmaxs_vsvl #define _vel_vfmaxs_vsvvl __builtin_ve_vl_vfmaxs_vsvvl #define _vel_vfmaxs_vvvmvl __builtin_ve_vl_vfmaxs_vvvmvl #define _vel_vfmaxs_vsvmvl __builtin_ve_vl_vfmaxs_vsvmvl #define _vel_pvfmax_vvvl __builtin_ve_vl_pvfmax_vvvl #define _vel_pvfmax_vvvvl __builtin_ve_vl_pvfmax_vvvvl #define _vel_pvfmax_vsvl __builtin_ve_vl_pvfmax_vsvl #define _vel_pvfmax_vsvvl __builtin_ve_vl_pvfmax_vsvvl #define _vel_pvfmax_vvvMvl __builtin_ve_vl_pvfmax_vvvMvl #define _vel_pvfmax_vsvMvl __builtin_ve_vl_pvfmax_vsvMvl #define _vel_vfmind_vvvl __builtin_ve_vl_vfmind_vvvl #define _vel_vfmind_vvvvl __builtin_ve_vl_vfmind_vvvvl #define _vel_vfmind_vsvl __builtin_ve_vl_vfmind_vsvl #define _vel_vfmind_vsvvl __builtin_ve_vl_vfmind_vsvvl #define _vel_vfmind_vvvmvl __builtin_ve_vl_vfmind_vvvmvl #define _vel_vfmind_vsvmvl __builtin_ve_vl_vfmind_vsvmvl #define _vel_vfmins_vvvl __builtin_ve_vl_vfmins_vvvl #define _vel_vfmins_vvvvl __builtin_ve_vl_vfmins_vvvvl #define _vel_vfmins_vsvl __builtin_ve_vl_vfmins_vsvl #define _vel_vfmins_vsvvl __builtin_ve_vl_vfmins_vsvvl #define _vel_vfmins_vvvmvl __builtin_ve_vl_vfmins_vvvmvl #define _vel_vfmins_vsvmvl __builtin_ve_vl_vfmins_vsvmvl #define _vel_pvfmin_vvvl __builtin_ve_vl_pvfmin_vvvl #define _vel_pvfmin_vvvvl __builtin_ve_vl_pvfmin_vvvvl #define _vel_pvfmin_vsvl __builtin_ve_vl_pvfmin_vsvl #define _vel_pvfmin_vsvvl __builtin_ve_vl_pvfmin_vsvvl #define _vel_pvfmin_vvvMvl __builtin_ve_vl_pvfmin_vvvMvl #define _vel_pvfmin_vsvMvl __builtin_ve_vl_pvfmin_vsvMvl #define _vel_vfmadd_vvvvl __builtin_ve_vl_vfmadd_vvvvl #define _vel_vfmadd_vvvvvl __builtin_ve_vl_vfmadd_vvvvvl #define _vel_vfmadd_vsvvl __builtin_ve_vl_vfmadd_vsvvl #define _vel_vfmadd_vsvvvl __builtin_ve_vl_vfmadd_vsvvvl #define _vel_vfmadd_vvsvl __builtin_ve_vl_vfmadd_vvsvl #define _vel_vfmadd_vvsvvl __builtin_ve_vl_vfmadd_vvsvvl #define _vel_vfmadd_vvvvmvl __builtin_ve_vl_vfmadd_vvvvmvl #define _vel_vfmadd_vsvvmvl __builtin_ve_vl_vfmadd_vsvvmvl #define _vel_vfmadd_vvsvmvl __builtin_ve_vl_vfmadd_vvsvmvl #define _vel_vfmads_vvvvl __builtin_ve_vl_vfmads_vvvvl #define _vel_vfmads_vvvvvl __builtin_ve_vl_vfmads_vvvvvl #define _vel_vfmads_vsvvl __builtin_ve_vl_vfmads_vsvvl #define _vel_vfmads_vsvvvl __builtin_ve_vl_vfmads_vsvvvl #define _vel_vfmads_vvsvl __builtin_ve_vl_vfmads_vvsvl #define _vel_vfmads_vvsvvl __builtin_ve_vl_vfmads_vvsvvl #define _vel_vfmads_vvvvmvl __builtin_ve_vl_vfmads_vvvvmvl #define _vel_vfmads_vsvvmvl __builtin_ve_vl_vfmads_vsvvmvl #define _vel_vfmads_vvsvmvl __builtin_ve_vl_vfmads_vvsvmvl #define _vel_pvfmad_vvvvl __builtin_ve_vl_pvfmad_vvvvl #define _vel_pvfmad_vvvvvl __builtin_ve_vl_pvfmad_vvvvvl #define _vel_pvfmad_vsvvl __builtin_ve_vl_pvfmad_vsvvl #define _vel_pvfmad_vsvvvl __builtin_ve_vl_pvfmad_vsvvvl #define _vel_pvfmad_vvsvl __builtin_ve_vl_pvfmad_vvsvl #define _vel_pvfmad_vvsvvl __builtin_ve_vl_pvfmad_vvsvvl #define _vel_pvfmad_vvvvMvl __builtin_ve_vl_pvfmad_vvvvMvl #define _vel_pvfmad_vsvvMvl __builtin_ve_vl_pvfmad_vsvvMvl #define _vel_pvfmad_vvsvMvl __builtin_ve_vl_pvfmad_vvsvMvl #define _vel_vfmsbd_vvvvl __builtin_ve_vl_vfmsbd_vvvvl #define _vel_vfmsbd_vvvvvl __builtin_ve_vl_vfmsbd_vvvvvl #define _vel_vfmsbd_vsvvl __builtin_ve_vl_vfmsbd_vsvvl #define _vel_vfmsbd_vsvvvl __builtin_ve_vl_vfmsbd_vsvvvl #define _vel_vfmsbd_vvsvl __builtin_ve_vl_vfmsbd_vvsvl #define _vel_vfmsbd_vvsvvl __builtin_ve_vl_vfmsbd_vvsvvl #define _vel_vfmsbd_vvvvmvl __builtin_ve_vl_vfmsbd_vvvvmvl #define _vel_vfmsbd_vsvvmvl __builtin_ve_vl_vfmsbd_vsvvmvl #define _vel_vfmsbd_vvsvmvl __builtin_ve_vl_vfmsbd_vvsvmvl #define _vel_vfmsbs_vvvvl __builtin_ve_vl_vfmsbs_vvvvl #define _vel_vfmsbs_vvvvvl __builtin_ve_vl_vfmsbs_vvvvvl #define _vel_vfmsbs_vsvvl __builtin_ve_vl_vfmsbs_vsvvl #define _vel_vfmsbs_vsvvvl __builtin_ve_vl_vfmsbs_vsvvvl #define _vel_vfmsbs_vvsvl __builtin_ve_vl_vfmsbs_vvsvl #define _vel_vfmsbs_vvsvvl __builtin_ve_vl_vfmsbs_vvsvvl #define _vel_vfmsbs_vvvvmvl __builtin_ve_vl_vfmsbs_vvvvmvl #define _vel_vfmsbs_vsvvmvl __builtin_ve_vl_vfmsbs_vsvvmvl #define _vel_vfmsbs_vvsvmvl __builtin_ve_vl_vfmsbs_vvsvmvl #define _vel_pvfmsb_vvvvl __builtin_ve_vl_pvfmsb_vvvvl #define _vel_pvfmsb_vvvvvl __builtin_ve_vl_pvfmsb_vvvvvl #define _vel_pvfmsb_vsvvl __builtin_ve_vl_pvfmsb_vsvvl #define _vel_pvfmsb_vsvvvl __builtin_ve_vl_pvfmsb_vsvvvl #define _vel_pvfmsb_vvsvl __builtin_ve_vl_pvfmsb_vvsvl #define _vel_pvfmsb_vvsvvl __builtin_ve_vl_pvfmsb_vvsvvl #define _vel_pvfmsb_vvvvMvl __builtin_ve_vl_pvfmsb_vvvvMvl #define _vel_pvfmsb_vsvvMvl __builtin_ve_vl_pvfmsb_vsvvMvl #define _vel_pvfmsb_vvsvMvl __builtin_ve_vl_pvfmsb_vvsvMvl #define _vel_vfnmadd_vvvvl __builtin_ve_vl_vfnmadd_vvvvl #define _vel_vfnmadd_vvvvvl __builtin_ve_vl_vfnmadd_vvvvvl #define _vel_vfnmadd_vsvvl __builtin_ve_vl_vfnmadd_vsvvl #define _vel_vfnmadd_vsvvvl __builtin_ve_vl_vfnmadd_vsvvvl #define _vel_vfnmadd_vvsvl __builtin_ve_vl_vfnmadd_vvsvl #define _vel_vfnmadd_vvsvvl __builtin_ve_vl_vfnmadd_vvsvvl #define _vel_vfnmadd_vvvvmvl __builtin_ve_vl_vfnmadd_vvvvmvl #define _vel_vfnmadd_vsvvmvl __builtin_ve_vl_vfnmadd_vsvvmvl #define _vel_vfnmadd_vvsvmvl __builtin_ve_vl_vfnmadd_vvsvmvl #define _vel_vfnmads_vvvvl __builtin_ve_vl_vfnmads_vvvvl #define _vel_vfnmads_vvvvvl __builtin_ve_vl_vfnmads_vvvvvl #define _vel_vfnmads_vsvvl __builtin_ve_vl_vfnmads_vsvvl #define _vel_vfnmads_vsvvvl __builtin_ve_vl_vfnmads_vsvvvl #define _vel_vfnmads_vvsvl __builtin_ve_vl_vfnmads_vvsvl #define _vel_vfnmads_vvsvvl __builtin_ve_vl_vfnmads_vvsvvl #define _vel_vfnmads_vvvvmvl __builtin_ve_vl_vfnmads_vvvvmvl #define _vel_vfnmads_vsvvmvl __builtin_ve_vl_vfnmads_vsvvmvl #define _vel_vfnmads_vvsvmvl __builtin_ve_vl_vfnmads_vvsvmvl #define _vel_pvfnmad_vvvvl __builtin_ve_vl_pvfnmad_vvvvl #define _vel_pvfnmad_vvvvvl __builtin_ve_vl_pvfnmad_vvvvvl #define _vel_pvfnmad_vsvvl __builtin_ve_vl_pvfnmad_vsvvl #define _vel_pvfnmad_vsvvvl __builtin_ve_vl_pvfnmad_vsvvvl #define _vel_pvfnmad_vvsvl __builtin_ve_vl_pvfnmad_vvsvl #define _vel_pvfnmad_vvsvvl __builtin_ve_vl_pvfnmad_vvsvvl #define _vel_pvfnmad_vvvvMvl __builtin_ve_vl_pvfnmad_vvvvMvl #define _vel_pvfnmad_vsvvMvl __builtin_ve_vl_pvfnmad_vsvvMvl #define _vel_pvfnmad_vvsvMvl __builtin_ve_vl_pvfnmad_vvsvMvl #define _vel_vfnmsbd_vvvvl __builtin_ve_vl_vfnmsbd_vvvvl #define _vel_vfnmsbd_vvvvvl __builtin_ve_vl_vfnmsbd_vvvvvl #define _vel_vfnmsbd_vsvvl __builtin_ve_vl_vfnmsbd_vsvvl #define _vel_vfnmsbd_vsvvvl __builtin_ve_vl_vfnmsbd_vsvvvl #define _vel_vfnmsbd_vvsvl __builtin_ve_vl_vfnmsbd_vvsvl #define _vel_vfnmsbd_vvsvvl __builtin_ve_vl_vfnmsbd_vvsvvl #define _vel_vfnmsbd_vvvvmvl __builtin_ve_vl_vfnmsbd_vvvvmvl #define _vel_vfnmsbd_vsvvmvl __builtin_ve_vl_vfnmsbd_vsvvmvl #define _vel_vfnmsbd_vvsvmvl __builtin_ve_vl_vfnmsbd_vvsvmvl #define _vel_vfnmsbs_vvvvl __builtin_ve_vl_vfnmsbs_vvvvl #define _vel_vfnmsbs_vvvvvl __builtin_ve_vl_vfnmsbs_vvvvvl #define _vel_vfnmsbs_vsvvl __builtin_ve_vl_vfnmsbs_vsvvl #define _vel_vfnmsbs_vsvvvl __builtin_ve_vl_vfnmsbs_vsvvvl #define _vel_vfnmsbs_vvsvl __builtin_ve_vl_vfnmsbs_vvsvl #define _vel_vfnmsbs_vvsvvl __builtin_ve_vl_vfnmsbs_vvsvvl #define _vel_vfnmsbs_vvvvmvl __builtin_ve_vl_vfnmsbs_vvvvmvl #define _vel_vfnmsbs_vsvvmvl __builtin_ve_vl_vfnmsbs_vsvvmvl #define _vel_vfnmsbs_vvsvmvl __builtin_ve_vl_vfnmsbs_vvsvmvl #define _vel_pvfnmsb_vvvvl __builtin_ve_vl_pvfnmsb_vvvvl #define _vel_pvfnmsb_vvvvvl __builtin_ve_vl_pvfnmsb_vvvvvl #define _vel_pvfnmsb_vsvvl __builtin_ve_vl_pvfnmsb_vsvvl #define _vel_pvfnmsb_vsvvvl __builtin_ve_vl_pvfnmsb_vsvvvl #define _vel_pvfnmsb_vvsvl __builtin_ve_vl_pvfnmsb_vvsvl #define _vel_pvfnmsb_vvsvvl __builtin_ve_vl_pvfnmsb_vvsvvl #define _vel_pvfnmsb_vvvvMvl __builtin_ve_vl_pvfnmsb_vvvvMvl #define _vel_pvfnmsb_vsvvMvl __builtin_ve_vl_pvfnmsb_vsvvMvl #define _vel_pvfnmsb_vvsvMvl __builtin_ve_vl_pvfnmsb_vvsvMvl #define _vel_vrcpd_vvl __builtin_ve_vl_vrcpd_vvl #define _vel_vrcpd_vvvl __builtin_ve_vl_vrcpd_vvvl #define _vel_vrcps_vvl __builtin_ve_vl_vrcps_vvl #define _vel_vrcps_vvvl __builtin_ve_vl_vrcps_vvvl #define _vel_pvrcp_vvl __builtin_ve_vl_pvrcp_vvl #define _vel_pvrcp_vvvl __builtin_ve_vl_pvrcp_vvvl #define _vel_vrsqrtd_vvl __builtin_ve_vl_vrsqrtd_vvl #define _vel_vrsqrtd_vvvl __builtin_ve_vl_vrsqrtd_vvvl #define _vel_vrsqrts_vvl __builtin_ve_vl_vrsqrts_vvl #define _vel_vrsqrts_vvvl __builtin_ve_vl_vrsqrts_vvvl #define _vel_pvrsqrt_vvl __builtin_ve_vl_pvrsqrt_vvl #define _vel_pvrsqrt_vvvl __builtin_ve_vl_pvrsqrt_vvvl #define _vel_vrsqrtdnex_vvl __builtin_ve_vl_vrsqrtdnex_vvl #define _vel_vrsqrtdnex_vvvl __builtin_ve_vl_vrsqrtdnex_vvvl #define _vel_vrsqrtsnex_vvl __builtin_ve_vl_vrsqrtsnex_vvl #define _vel_vrsqrtsnex_vvvl __builtin_ve_vl_vrsqrtsnex_vvvl #define _vel_pvrsqrtnex_vvl __builtin_ve_vl_pvrsqrtnex_vvl #define _vel_pvrsqrtnex_vvvl __builtin_ve_vl_pvrsqrtnex_vvvl #define _vel_vcvtwdsx_vvl __builtin_ve_vl_vcvtwdsx_vvl #define _vel_vcvtwdsx_vvvl __builtin_ve_vl_vcvtwdsx_vvvl #define _vel_vcvtwdsx_vvmvl __builtin_ve_vl_vcvtwdsx_vvmvl #define _vel_vcvtwdsxrz_vvl __builtin_ve_vl_vcvtwdsxrz_vvl #define _vel_vcvtwdsxrz_vvvl __builtin_ve_vl_vcvtwdsxrz_vvvl #define _vel_vcvtwdsxrz_vvmvl __builtin_ve_vl_vcvtwdsxrz_vvmvl #define _vel_vcvtwdzx_vvl __builtin_ve_vl_vcvtwdzx_vvl #define _vel_vcvtwdzx_vvvl __builtin_ve_vl_vcvtwdzx_vvvl #define _vel_vcvtwdzx_vvmvl __builtin_ve_vl_vcvtwdzx_vvmvl #define _vel_vcvtwdzxrz_vvl __builtin_ve_vl_vcvtwdzxrz_vvl #define _vel_vcvtwdzxrz_vvvl __builtin_ve_vl_vcvtwdzxrz_vvvl #define _vel_vcvtwdzxrz_vvmvl __builtin_ve_vl_vcvtwdzxrz_vvmvl #define _vel_vcvtwssx_vvl __builtin_ve_vl_vcvtwssx_vvl #define _vel_vcvtwssx_vvvl __builtin_ve_vl_vcvtwssx_vvvl #define _vel_vcvtwssx_vvmvl __builtin_ve_vl_vcvtwssx_vvmvl #define _vel_vcvtwssxrz_vvl __builtin_ve_vl_vcvtwssxrz_vvl #define _vel_vcvtwssxrz_vvvl __builtin_ve_vl_vcvtwssxrz_vvvl #define _vel_vcvtwssxrz_vvmvl __builtin_ve_vl_vcvtwssxrz_vvmvl #define _vel_vcvtwszx_vvl __builtin_ve_vl_vcvtwszx_vvl #define _vel_vcvtwszx_vvvl __builtin_ve_vl_vcvtwszx_vvvl #define _vel_vcvtwszx_vvmvl __builtin_ve_vl_vcvtwszx_vvmvl #define _vel_vcvtwszxrz_vvl __builtin_ve_vl_vcvtwszxrz_vvl #define _vel_vcvtwszxrz_vvvl __builtin_ve_vl_vcvtwszxrz_vvvl #define _vel_vcvtwszxrz_vvmvl __builtin_ve_vl_vcvtwszxrz_vvmvl #define _vel_pvcvtws_vvl __builtin_ve_vl_pvcvtws_vvl #define _vel_pvcvtws_vvvl __builtin_ve_vl_pvcvtws_vvvl #define _vel_pvcvtws_vvMvl __builtin_ve_vl_pvcvtws_vvMvl #define _vel_pvcvtwsrz_vvl __builtin_ve_vl_pvcvtwsrz_vvl #define _vel_pvcvtwsrz_vvvl __builtin_ve_vl_pvcvtwsrz_vvvl #define _vel_pvcvtwsrz_vvMvl __builtin_ve_vl_pvcvtwsrz_vvMvl #define _vel_vcvtld_vvl __builtin_ve_vl_vcvtld_vvl #define _vel_vcvtld_vvvl __builtin_ve_vl_vcvtld_vvvl #define _vel_vcvtld_vvmvl __builtin_ve_vl_vcvtld_vvmvl #define _vel_vcvtldrz_vvl __builtin_ve_vl_vcvtldrz_vvl #define _vel_vcvtldrz_vvvl __builtin_ve_vl_vcvtldrz_vvvl #define _vel_vcvtldrz_vvmvl __builtin_ve_vl_vcvtldrz_vvmvl #define _vel_vcvtdw_vvl __builtin_ve_vl_vcvtdw_vvl #define _vel_vcvtdw_vvvl __builtin_ve_vl_vcvtdw_vvvl #define _vel_vcvtsw_vvl __builtin_ve_vl_vcvtsw_vvl #define _vel_vcvtsw_vvvl __builtin_ve_vl_vcvtsw_vvvl #define _vel_pvcvtsw_vvl __builtin_ve_vl_pvcvtsw_vvl #define _vel_pvcvtsw_vvvl __builtin_ve_vl_pvcvtsw_vvvl #define _vel_vcvtdl_vvl __builtin_ve_vl_vcvtdl_vvl #define _vel_vcvtdl_vvvl __builtin_ve_vl_vcvtdl_vvvl #define _vel_vcvtds_vvl __builtin_ve_vl_vcvtds_vvl #define _vel_vcvtds_vvvl __builtin_ve_vl_vcvtds_vvvl #define _vel_vcvtsd_vvl __builtin_ve_vl_vcvtsd_vvl #define _vel_vcvtsd_vvvl __builtin_ve_vl_vcvtsd_vvvl #define _vel_vmrg_vvvml __builtin_ve_vl_vmrg_vvvml #define _vel_vmrg_vvvmvl __builtin_ve_vl_vmrg_vvvmvl #define _vel_vmrg_vsvml __builtin_ve_vl_vmrg_vsvml #define _vel_vmrg_vsvmvl __builtin_ve_vl_vmrg_vsvmvl #define _vel_vmrgw_vvvMl __builtin_ve_vl_vmrgw_vvvMl #define _vel_vmrgw_vvvMvl __builtin_ve_vl_vmrgw_vvvMvl #define _vel_vmrgw_vsvMl __builtin_ve_vl_vmrgw_vsvMl #define _vel_vmrgw_vsvMvl __builtin_ve_vl_vmrgw_vsvMvl #define _vel_vshf_vvvsl __builtin_ve_vl_vshf_vvvsl #define _vel_vshf_vvvsvl __builtin_ve_vl_vshf_vvvsvl #define _vel_vcp_vvmvl __builtin_ve_vl_vcp_vvmvl #define _vel_vex_vvmvl __builtin_ve_vl_vex_vvmvl #define _vel_vfmklat_ml __builtin_ve_vl_vfmklat_ml #define _vel_vfmklaf_ml __builtin_ve_vl_vfmklaf_ml #define _vel_pvfmkat_Ml __builtin_ve_vl_pvfmkat_Ml #define _vel_pvfmkaf_Ml __builtin_ve_vl_pvfmkaf_Ml #define _vel_vfmklgt_mvl __builtin_ve_vl_vfmklgt_mvl #define _vel_vfmklgt_mvml __builtin_ve_vl_vfmklgt_mvml #define _vel_vfmkllt_mvl __builtin_ve_vl_vfmkllt_mvl #define _vel_vfmkllt_mvml __builtin_ve_vl_vfmkllt_mvml #define _vel_vfmklne_mvl __builtin_ve_vl_vfmklne_mvl #define _vel_vfmklne_mvml __builtin_ve_vl_vfmklne_mvml #define _vel_vfmkleq_mvl __builtin_ve_vl_vfmkleq_mvl #define _vel_vfmkleq_mvml __builtin_ve_vl_vfmkleq_mvml #define _vel_vfmklge_mvl __builtin_ve_vl_vfmklge_mvl #define _vel_vfmklge_mvml __builtin_ve_vl_vfmklge_mvml #define _vel_vfmklle_mvl __builtin_ve_vl_vfmklle_mvl #define _vel_vfmklle_mvml __builtin_ve_vl_vfmklle_mvml #define _vel_vfmklnum_mvl __builtin_ve_vl_vfmklnum_mvl #define _vel_vfmklnum_mvml __builtin_ve_vl_vfmklnum_mvml #define _vel_vfmklnan_mvl __builtin_ve_vl_vfmklnan_mvl #define _vel_vfmklnan_mvml __builtin_ve_vl_vfmklnan_mvml #define _vel_vfmklgtnan_mvl __builtin_ve_vl_vfmklgtnan_mvl #define _vel_vfmklgtnan_mvml __builtin_ve_vl_vfmklgtnan_mvml #define _vel_vfmklltnan_mvl __builtin_ve_vl_vfmklltnan_mvl #define _vel_vfmklltnan_mvml __builtin_ve_vl_vfmklltnan_mvml #define _vel_vfmklnenan_mvl __builtin_ve_vl_vfmklnenan_mvl #define _vel_vfmklnenan_mvml __builtin_ve_vl_vfmklnenan_mvml #define _vel_vfmkleqnan_mvl __builtin_ve_vl_vfmkleqnan_mvl #define _vel_vfmkleqnan_mvml __builtin_ve_vl_vfmkleqnan_mvml #define _vel_vfmklgenan_mvl __builtin_ve_vl_vfmklgenan_mvl #define _vel_vfmklgenan_mvml __builtin_ve_vl_vfmklgenan_mvml #define _vel_vfmkllenan_mvl __builtin_ve_vl_vfmkllenan_mvl #define _vel_vfmkllenan_mvml __builtin_ve_vl_vfmkllenan_mvml #define _vel_vfmkwgt_mvl __builtin_ve_vl_vfmkwgt_mvl #define _vel_vfmkwgt_mvml __builtin_ve_vl_vfmkwgt_mvml #define _vel_vfmkwlt_mvl __builtin_ve_vl_vfmkwlt_mvl #define _vel_vfmkwlt_mvml __builtin_ve_vl_vfmkwlt_mvml #define _vel_vfmkwne_mvl __builtin_ve_vl_vfmkwne_mvl #define _vel_vfmkwne_mvml __builtin_ve_vl_vfmkwne_mvml #define _vel_vfmkweq_mvl __builtin_ve_vl_vfmkweq_mvl #define _vel_vfmkweq_mvml __builtin_ve_vl_vfmkweq_mvml #define _vel_vfmkwge_mvl __builtin_ve_vl_vfmkwge_mvl #define _vel_vfmkwge_mvml __builtin_ve_vl_vfmkwge_mvml #define _vel_vfmkwle_mvl __builtin_ve_vl_vfmkwle_mvl #define _vel_vfmkwle_mvml __builtin_ve_vl_vfmkwle_mvml #define _vel_vfmkwnum_mvl __builtin_ve_vl_vfmkwnum_mvl #define _vel_vfmkwnum_mvml __builtin_ve_vl_vfmkwnum_mvml #define _vel_vfmkwnan_mvl __builtin_ve_vl_vfmkwnan_mvl #define _vel_vfmkwnan_mvml __builtin_ve_vl_vfmkwnan_mvml #define _vel_vfmkwgtnan_mvl __builtin_ve_vl_vfmkwgtnan_mvl #define _vel_vfmkwgtnan_mvml __builtin_ve_vl_vfmkwgtnan_mvml #define _vel_vfmkwltnan_mvl __builtin_ve_vl_vfmkwltnan_mvl #define _vel_vfmkwltnan_mvml __builtin_ve_vl_vfmkwltnan_mvml #define _vel_vfmkwnenan_mvl __builtin_ve_vl_vfmkwnenan_mvl #define _vel_vfmkwnenan_mvml __builtin_ve_vl_vfmkwnenan_mvml #define _vel_vfmkweqnan_mvl __builtin_ve_vl_vfmkweqnan_mvl #define _vel_vfmkweqnan_mvml __builtin_ve_vl_vfmkweqnan_mvml #define _vel_vfmkwgenan_mvl __builtin_ve_vl_vfmkwgenan_mvl #define _vel_vfmkwgenan_mvml __builtin_ve_vl_vfmkwgenan_mvml #define _vel_vfmkwlenan_mvl __builtin_ve_vl_vfmkwlenan_mvl #define _vel_vfmkwlenan_mvml __builtin_ve_vl_vfmkwlenan_mvml #define _vel_pvfmkwlogt_mvl __builtin_ve_vl_pvfmkwlogt_mvl #define _vel_pvfmkwupgt_mvl __builtin_ve_vl_pvfmkwupgt_mvl #define _vel_pvfmkwlogt_mvml __builtin_ve_vl_pvfmkwlogt_mvml #define _vel_pvfmkwupgt_mvml __builtin_ve_vl_pvfmkwupgt_mvml #define _vel_pvfmkwlolt_mvl __builtin_ve_vl_pvfmkwlolt_mvl #define _vel_pvfmkwuplt_mvl __builtin_ve_vl_pvfmkwuplt_mvl #define _vel_pvfmkwlolt_mvml __builtin_ve_vl_pvfmkwlolt_mvml #define _vel_pvfmkwuplt_mvml __builtin_ve_vl_pvfmkwuplt_mvml #define _vel_pvfmkwlone_mvl __builtin_ve_vl_pvfmkwlone_mvl #define _vel_pvfmkwupne_mvl __builtin_ve_vl_pvfmkwupne_mvl #define _vel_pvfmkwlone_mvml __builtin_ve_vl_pvfmkwlone_mvml #define _vel_pvfmkwupne_mvml __builtin_ve_vl_pvfmkwupne_mvml #define _vel_pvfmkwloeq_mvl __builtin_ve_vl_pvfmkwloeq_mvl #define _vel_pvfmkwupeq_mvl __builtin_ve_vl_pvfmkwupeq_mvl #define _vel_pvfmkwloeq_mvml __builtin_ve_vl_pvfmkwloeq_mvml #define _vel_pvfmkwupeq_mvml __builtin_ve_vl_pvfmkwupeq_mvml #define _vel_pvfmkwloge_mvl __builtin_ve_vl_pvfmkwloge_mvl #define _vel_pvfmkwupge_mvl __builtin_ve_vl_pvfmkwupge_mvl #define _vel_pvfmkwloge_mvml __builtin_ve_vl_pvfmkwloge_mvml #define _vel_pvfmkwupge_mvml __builtin_ve_vl_pvfmkwupge_mvml #define _vel_pvfmkwlole_mvl __builtin_ve_vl_pvfmkwlole_mvl #define _vel_pvfmkwuple_mvl __builtin_ve_vl_pvfmkwuple_mvl #define _vel_pvfmkwlole_mvml __builtin_ve_vl_pvfmkwlole_mvml #define _vel_pvfmkwuple_mvml __builtin_ve_vl_pvfmkwuple_mvml #define _vel_pvfmkwlonum_mvl __builtin_ve_vl_pvfmkwlonum_mvl #define _vel_pvfmkwupnum_mvl __builtin_ve_vl_pvfmkwupnum_mvl #define _vel_pvfmkwlonum_mvml __builtin_ve_vl_pvfmkwlonum_mvml #define _vel_pvfmkwupnum_mvml __builtin_ve_vl_pvfmkwupnum_mvml #define _vel_pvfmkwlonan_mvl __builtin_ve_vl_pvfmkwlonan_mvl #define _vel_pvfmkwupnan_mvl __builtin_ve_vl_pvfmkwupnan_mvl #define _vel_pvfmkwlonan_mvml __builtin_ve_vl_pvfmkwlonan_mvml #define _vel_pvfmkwupnan_mvml __builtin_ve_vl_pvfmkwupnan_mvml #define _vel_pvfmkwlogtnan_mvl __builtin_ve_vl_pvfmkwlogtnan_mvl #define _vel_pvfmkwupgtnan_mvl __builtin_ve_vl_pvfmkwupgtnan_mvl #define _vel_pvfmkwlogtnan_mvml __builtin_ve_vl_pvfmkwlogtnan_mvml #define _vel_pvfmkwupgtnan_mvml __builtin_ve_vl_pvfmkwupgtnan_mvml #define _vel_pvfmkwloltnan_mvl __builtin_ve_vl_pvfmkwloltnan_mvl #define _vel_pvfmkwupltnan_mvl __builtin_ve_vl_pvfmkwupltnan_mvl #define _vel_pvfmkwloltnan_mvml __builtin_ve_vl_pvfmkwloltnan_mvml #define _vel_pvfmkwupltnan_mvml __builtin_ve_vl_pvfmkwupltnan_mvml #define _vel_pvfmkwlonenan_mvl __builtin_ve_vl_pvfmkwlonenan_mvl #define _vel_pvfmkwupnenan_mvl __builtin_ve_vl_pvfmkwupnenan_mvl #define _vel_pvfmkwlonenan_mvml __builtin_ve_vl_pvfmkwlonenan_mvml #define _vel_pvfmkwupnenan_mvml __builtin_ve_vl_pvfmkwupnenan_mvml #define _vel_pvfmkwloeqnan_mvl __builtin_ve_vl_pvfmkwloeqnan_mvl #define _vel_pvfmkwupeqnan_mvl __builtin_ve_vl_pvfmkwupeqnan_mvl #define _vel_pvfmkwloeqnan_mvml __builtin_ve_vl_pvfmkwloeqnan_mvml #define _vel_pvfmkwupeqnan_mvml __builtin_ve_vl_pvfmkwupeqnan_mvml #define _vel_pvfmkwlogenan_mvl __builtin_ve_vl_pvfmkwlogenan_mvl #define _vel_pvfmkwupgenan_mvl __builtin_ve_vl_pvfmkwupgenan_mvl #define _vel_pvfmkwlogenan_mvml __builtin_ve_vl_pvfmkwlogenan_mvml #define _vel_pvfmkwupgenan_mvml __builtin_ve_vl_pvfmkwupgenan_mvml #define _vel_pvfmkwlolenan_mvl __builtin_ve_vl_pvfmkwlolenan_mvl #define _vel_pvfmkwuplenan_mvl __builtin_ve_vl_pvfmkwuplenan_mvl #define _vel_pvfmkwlolenan_mvml __builtin_ve_vl_pvfmkwlolenan_mvml #define _vel_pvfmkwuplenan_mvml __builtin_ve_vl_pvfmkwuplenan_mvml #define _vel_pvfmkwgt_Mvl __builtin_ve_vl_pvfmkwgt_Mvl #define _vel_pvfmkwgt_MvMl __builtin_ve_vl_pvfmkwgt_MvMl #define _vel_pvfmkwlt_Mvl __builtin_ve_vl_pvfmkwlt_Mvl #define _vel_pvfmkwlt_MvMl __builtin_ve_vl_pvfmkwlt_MvMl #define _vel_pvfmkwne_Mvl __builtin_ve_vl_pvfmkwne_Mvl #define _vel_pvfmkwne_MvMl __builtin_ve_vl_pvfmkwne_MvMl #define _vel_pvfmkweq_Mvl __builtin_ve_vl_pvfmkweq_Mvl #define _vel_pvfmkweq_MvMl __builtin_ve_vl_pvfmkweq_MvMl #define _vel_pvfmkwge_Mvl __builtin_ve_vl_pvfmkwge_Mvl #define _vel_pvfmkwge_MvMl __builtin_ve_vl_pvfmkwge_MvMl #define _vel_pvfmkwle_Mvl __builtin_ve_vl_pvfmkwle_Mvl #define _vel_pvfmkwle_MvMl __builtin_ve_vl_pvfmkwle_MvMl #define _vel_pvfmkwnum_Mvl __builtin_ve_vl_pvfmkwnum_Mvl #define _vel_pvfmkwnum_MvMl __builtin_ve_vl_pvfmkwnum_MvMl #define _vel_pvfmkwnan_Mvl __builtin_ve_vl_pvfmkwnan_Mvl #define _vel_pvfmkwnan_MvMl __builtin_ve_vl_pvfmkwnan_MvMl #define _vel_pvfmkwgtnan_Mvl __builtin_ve_vl_pvfmkwgtnan_Mvl #define _vel_pvfmkwgtnan_MvMl __builtin_ve_vl_pvfmkwgtnan_MvMl #define _vel_pvfmkwltnan_Mvl __builtin_ve_vl_pvfmkwltnan_Mvl #define _vel_pvfmkwltnan_MvMl __builtin_ve_vl_pvfmkwltnan_MvMl #define _vel_pvfmkwnenan_Mvl __builtin_ve_vl_pvfmkwnenan_Mvl #define _vel_pvfmkwnenan_MvMl __builtin_ve_vl_pvfmkwnenan_MvMl #define _vel_pvfmkweqnan_Mvl __builtin_ve_vl_pvfmkweqnan_Mvl #define _vel_pvfmkweqnan_MvMl __builtin_ve_vl_pvfmkweqnan_MvMl #define _vel_pvfmkwgenan_Mvl __builtin_ve_vl_pvfmkwgenan_Mvl #define _vel_pvfmkwgenan_MvMl __builtin_ve_vl_pvfmkwgenan_MvMl #define _vel_pvfmkwlenan_Mvl __builtin_ve_vl_pvfmkwlenan_Mvl #define _vel_pvfmkwlenan_MvMl __builtin_ve_vl_pvfmkwlenan_MvMl #define _vel_vfmkdgt_mvl __builtin_ve_vl_vfmkdgt_mvl #define _vel_vfmkdgt_mvml __builtin_ve_vl_vfmkdgt_mvml #define _vel_vfmkdlt_mvl __builtin_ve_vl_vfmkdlt_mvl #define _vel_vfmkdlt_mvml __builtin_ve_vl_vfmkdlt_mvml #define _vel_vfmkdne_mvl __builtin_ve_vl_vfmkdne_mvl #define _vel_vfmkdne_mvml __builtin_ve_vl_vfmkdne_mvml #define _vel_vfmkdeq_mvl __builtin_ve_vl_vfmkdeq_mvl #define _vel_vfmkdeq_mvml __builtin_ve_vl_vfmkdeq_mvml #define _vel_vfmkdge_mvl __builtin_ve_vl_vfmkdge_mvl #define _vel_vfmkdge_mvml __builtin_ve_vl_vfmkdge_mvml #define _vel_vfmkdle_mvl __builtin_ve_vl_vfmkdle_mvl #define _vel_vfmkdle_mvml __builtin_ve_vl_vfmkdle_mvml #define _vel_vfmkdnum_mvl __builtin_ve_vl_vfmkdnum_mvl #define _vel_vfmkdnum_mvml __builtin_ve_vl_vfmkdnum_mvml #define _vel_vfmkdnan_mvl __builtin_ve_vl_vfmkdnan_mvl #define _vel_vfmkdnan_mvml __builtin_ve_vl_vfmkdnan_mvml #define _vel_vfmkdgtnan_mvl __builtin_ve_vl_vfmkdgtnan_mvl #define _vel_vfmkdgtnan_mvml __builtin_ve_vl_vfmkdgtnan_mvml #define _vel_vfmkdltnan_mvl __builtin_ve_vl_vfmkdltnan_mvl #define _vel_vfmkdltnan_mvml __builtin_ve_vl_vfmkdltnan_mvml #define _vel_vfmkdnenan_mvl __builtin_ve_vl_vfmkdnenan_mvl #define _vel_vfmkdnenan_mvml __builtin_ve_vl_vfmkdnenan_mvml #define _vel_vfmkdeqnan_mvl __builtin_ve_vl_vfmkdeqnan_mvl #define _vel_vfmkdeqnan_mvml __builtin_ve_vl_vfmkdeqnan_mvml #define _vel_vfmkdgenan_mvl __builtin_ve_vl_vfmkdgenan_mvl #define _vel_vfmkdgenan_mvml __builtin_ve_vl_vfmkdgenan_mvml #define _vel_vfmkdlenan_mvl __builtin_ve_vl_vfmkdlenan_mvl #define _vel_vfmkdlenan_mvml __builtin_ve_vl_vfmkdlenan_mvml #define _vel_vfmksgt_mvl __builtin_ve_vl_vfmksgt_mvl #define _vel_vfmksgt_mvml __builtin_ve_vl_vfmksgt_mvml #define _vel_vfmkslt_mvl __builtin_ve_vl_vfmkslt_mvl #define _vel_vfmkslt_mvml __builtin_ve_vl_vfmkslt_mvml #define _vel_vfmksne_mvl __builtin_ve_vl_vfmksne_mvl #define _vel_vfmksne_mvml __builtin_ve_vl_vfmksne_mvml #define _vel_vfmkseq_mvl __builtin_ve_vl_vfmkseq_mvl #define _vel_vfmkseq_mvml __builtin_ve_vl_vfmkseq_mvml #define _vel_vfmksge_mvl __builtin_ve_vl_vfmksge_mvl #define _vel_vfmksge_mvml __builtin_ve_vl_vfmksge_mvml #define _vel_vfmksle_mvl __builtin_ve_vl_vfmksle_mvl #define _vel_vfmksle_mvml __builtin_ve_vl_vfmksle_mvml #define _vel_vfmksnum_mvl __builtin_ve_vl_vfmksnum_mvl #define _vel_vfmksnum_mvml __builtin_ve_vl_vfmksnum_mvml #define _vel_vfmksnan_mvl __builtin_ve_vl_vfmksnan_mvl #define _vel_vfmksnan_mvml __builtin_ve_vl_vfmksnan_mvml #define _vel_vfmksgtnan_mvl __builtin_ve_vl_vfmksgtnan_mvl #define _vel_vfmksgtnan_mvml __builtin_ve_vl_vfmksgtnan_mvml #define _vel_vfmksltnan_mvl __builtin_ve_vl_vfmksltnan_mvl #define _vel_vfmksltnan_mvml __builtin_ve_vl_vfmksltnan_mvml #define _vel_vfmksnenan_mvl __builtin_ve_vl_vfmksnenan_mvl #define _vel_vfmksnenan_mvml __builtin_ve_vl_vfmksnenan_mvml #define _vel_vfmkseqnan_mvl __builtin_ve_vl_vfmkseqnan_mvl #define _vel_vfmkseqnan_mvml __builtin_ve_vl_vfmkseqnan_mvml #define _vel_vfmksgenan_mvl __builtin_ve_vl_vfmksgenan_mvl #define _vel_vfmksgenan_mvml __builtin_ve_vl_vfmksgenan_mvml #define _vel_vfmkslenan_mvl __builtin_ve_vl_vfmkslenan_mvl #define _vel_vfmkslenan_mvml __builtin_ve_vl_vfmkslenan_mvml #define _vel_pvfmkslogt_mvl __builtin_ve_vl_pvfmkslogt_mvl #define _vel_pvfmksupgt_mvl __builtin_ve_vl_pvfmksupgt_mvl #define _vel_pvfmkslogt_mvml __builtin_ve_vl_pvfmkslogt_mvml #define _vel_pvfmksupgt_mvml __builtin_ve_vl_pvfmksupgt_mvml #define _vel_pvfmkslolt_mvl __builtin_ve_vl_pvfmkslolt_mvl #define _vel_pvfmksuplt_mvl __builtin_ve_vl_pvfmksuplt_mvl #define _vel_pvfmkslolt_mvml __builtin_ve_vl_pvfmkslolt_mvml #define _vel_pvfmksuplt_mvml __builtin_ve_vl_pvfmksuplt_mvml #define _vel_pvfmkslone_mvl __builtin_ve_vl_pvfmkslone_mvl #define _vel_pvfmksupne_mvl __builtin_ve_vl_pvfmksupne_mvl #define _vel_pvfmkslone_mvml __builtin_ve_vl_pvfmkslone_mvml #define _vel_pvfmksupne_mvml __builtin_ve_vl_pvfmksupne_mvml #define _vel_pvfmksloeq_mvl __builtin_ve_vl_pvfmksloeq_mvl #define _vel_pvfmksupeq_mvl __builtin_ve_vl_pvfmksupeq_mvl #define _vel_pvfmksloeq_mvml __builtin_ve_vl_pvfmksloeq_mvml #define _vel_pvfmksupeq_mvml __builtin_ve_vl_pvfmksupeq_mvml #define _vel_pvfmksloge_mvl __builtin_ve_vl_pvfmksloge_mvl #define _vel_pvfmksupge_mvl __builtin_ve_vl_pvfmksupge_mvl #define _vel_pvfmksloge_mvml __builtin_ve_vl_pvfmksloge_mvml #define _vel_pvfmksupge_mvml __builtin_ve_vl_pvfmksupge_mvml #define _vel_pvfmkslole_mvl __builtin_ve_vl_pvfmkslole_mvl #define _vel_pvfmksuple_mvl __builtin_ve_vl_pvfmksuple_mvl #define _vel_pvfmkslole_mvml __builtin_ve_vl_pvfmkslole_mvml #define _vel_pvfmksuple_mvml __builtin_ve_vl_pvfmksuple_mvml #define _vel_pvfmkslonum_mvl __builtin_ve_vl_pvfmkslonum_mvl #define _vel_pvfmksupnum_mvl __builtin_ve_vl_pvfmksupnum_mvl #define _vel_pvfmkslonum_mvml __builtin_ve_vl_pvfmkslonum_mvml #define _vel_pvfmksupnum_mvml __builtin_ve_vl_pvfmksupnum_mvml #define _vel_pvfmkslonan_mvl __builtin_ve_vl_pvfmkslonan_mvl #define _vel_pvfmksupnan_mvl __builtin_ve_vl_pvfmksupnan_mvl #define _vel_pvfmkslonan_mvml __builtin_ve_vl_pvfmkslonan_mvml #define _vel_pvfmksupnan_mvml __builtin_ve_vl_pvfmksupnan_mvml #define _vel_pvfmkslogtnan_mvl __builtin_ve_vl_pvfmkslogtnan_mvl #define _vel_pvfmksupgtnan_mvl __builtin_ve_vl_pvfmksupgtnan_mvl #define _vel_pvfmkslogtnan_mvml __builtin_ve_vl_pvfmkslogtnan_mvml #define _vel_pvfmksupgtnan_mvml __builtin_ve_vl_pvfmksupgtnan_mvml #define _vel_pvfmksloltnan_mvl __builtin_ve_vl_pvfmksloltnan_mvl #define _vel_pvfmksupltnan_mvl __builtin_ve_vl_pvfmksupltnan_mvl #define _vel_pvfmksloltnan_mvml __builtin_ve_vl_pvfmksloltnan_mvml #define _vel_pvfmksupltnan_mvml __builtin_ve_vl_pvfmksupltnan_mvml #define _vel_pvfmkslonenan_mvl __builtin_ve_vl_pvfmkslonenan_mvl #define _vel_pvfmksupnenan_mvl __builtin_ve_vl_pvfmksupnenan_mvl #define _vel_pvfmkslonenan_mvml __builtin_ve_vl_pvfmkslonenan_mvml #define _vel_pvfmksupnenan_mvml __builtin_ve_vl_pvfmksupnenan_mvml #define _vel_pvfmksloeqnan_mvl __builtin_ve_vl_pvfmksloeqnan_mvl #define _vel_pvfmksupeqnan_mvl __builtin_ve_vl_pvfmksupeqnan_mvl #define _vel_pvfmksloeqnan_mvml __builtin_ve_vl_pvfmksloeqnan_mvml #define _vel_pvfmksupeqnan_mvml __builtin_ve_vl_pvfmksupeqnan_mvml #define _vel_pvfmkslogenan_mvl __builtin_ve_vl_pvfmkslogenan_mvl #define _vel_pvfmksupgenan_mvl __builtin_ve_vl_pvfmksupgenan_mvl #define _vel_pvfmkslogenan_mvml __builtin_ve_vl_pvfmkslogenan_mvml #define _vel_pvfmksupgenan_mvml __builtin_ve_vl_pvfmksupgenan_mvml #define _vel_pvfmkslolenan_mvl __builtin_ve_vl_pvfmkslolenan_mvl #define _vel_pvfmksuplenan_mvl __builtin_ve_vl_pvfmksuplenan_mvl #define _vel_pvfmkslolenan_mvml __builtin_ve_vl_pvfmkslolenan_mvml #define _vel_pvfmksuplenan_mvml __builtin_ve_vl_pvfmksuplenan_mvml #define _vel_pvfmksgt_Mvl __builtin_ve_vl_pvfmksgt_Mvl #define _vel_pvfmksgt_MvMl __builtin_ve_vl_pvfmksgt_MvMl #define _vel_pvfmkslt_Mvl __builtin_ve_vl_pvfmkslt_Mvl #define _vel_pvfmkslt_MvMl __builtin_ve_vl_pvfmkslt_MvMl #define _vel_pvfmksne_Mvl __builtin_ve_vl_pvfmksne_Mvl #define _vel_pvfmksne_MvMl __builtin_ve_vl_pvfmksne_MvMl #define _vel_pvfmkseq_Mvl __builtin_ve_vl_pvfmkseq_Mvl #define _vel_pvfmkseq_MvMl __builtin_ve_vl_pvfmkseq_MvMl #define _vel_pvfmksge_Mvl __builtin_ve_vl_pvfmksge_Mvl #define _vel_pvfmksge_MvMl __builtin_ve_vl_pvfmksge_MvMl #define _vel_pvfmksle_Mvl __builtin_ve_vl_pvfmksle_Mvl #define _vel_pvfmksle_MvMl __builtin_ve_vl_pvfmksle_MvMl #define _vel_pvfmksnum_Mvl __builtin_ve_vl_pvfmksnum_Mvl #define _vel_pvfmksnum_MvMl __builtin_ve_vl_pvfmksnum_MvMl #define _vel_pvfmksnan_Mvl __builtin_ve_vl_pvfmksnan_Mvl #define _vel_pvfmksnan_MvMl __builtin_ve_vl_pvfmksnan_MvMl #define _vel_pvfmksgtnan_Mvl __builtin_ve_vl_pvfmksgtnan_Mvl #define _vel_pvfmksgtnan_MvMl __builtin_ve_vl_pvfmksgtnan_MvMl #define _vel_pvfmksltnan_Mvl __builtin_ve_vl_pvfmksltnan_Mvl #define _vel_pvfmksltnan_MvMl __builtin_ve_vl_pvfmksltnan_MvMl #define _vel_pvfmksnenan_Mvl __builtin_ve_vl_pvfmksnenan_Mvl #define _vel_pvfmksnenan_MvMl __builtin_ve_vl_pvfmksnenan_MvMl #define _vel_pvfmkseqnan_Mvl __builtin_ve_vl_pvfmkseqnan_Mvl #define _vel_pvfmkseqnan_MvMl __builtin_ve_vl_pvfmkseqnan_MvMl #define _vel_pvfmksgenan_Mvl __builtin_ve_vl_pvfmksgenan_Mvl #define _vel_pvfmksgenan_MvMl __builtin_ve_vl_pvfmksgenan_MvMl #define _vel_pvfmkslenan_Mvl __builtin_ve_vl_pvfmkslenan_Mvl #define _vel_pvfmkslenan_MvMl __builtin_ve_vl_pvfmkslenan_MvMl #define _vel_vsumwsx_vvl __builtin_ve_vl_vsumwsx_vvl #define _vel_vsumwsx_vvml __builtin_ve_vl_vsumwsx_vvml #define _vel_vsumwzx_vvl __builtin_ve_vl_vsumwzx_vvl #define _vel_vsumwzx_vvml __builtin_ve_vl_vsumwzx_vvml #define _vel_vsuml_vvl __builtin_ve_vl_vsuml_vvl #define _vel_vsuml_vvml __builtin_ve_vl_vsuml_vvml #define _vel_vfsumd_vvl __builtin_ve_vl_vfsumd_vvl #define _vel_vfsumd_vvml __builtin_ve_vl_vfsumd_vvml #define _vel_vfsums_vvl __builtin_ve_vl_vfsums_vvl #define _vel_vfsums_vvml __builtin_ve_vl_vfsums_vvml #define _vel_vrmaxswfstsx_vvl __builtin_ve_vl_vrmaxswfstsx_vvl #define _vel_vrmaxswfstsx_vvvl __builtin_ve_vl_vrmaxswfstsx_vvvl #define _vel_vrmaxswlstsx_vvl __builtin_ve_vl_vrmaxswlstsx_vvl #define _vel_vrmaxswlstsx_vvvl __builtin_ve_vl_vrmaxswlstsx_vvvl #define _vel_vrmaxswfstzx_vvl __builtin_ve_vl_vrmaxswfstzx_vvl #define _vel_vrmaxswfstzx_vvvl __builtin_ve_vl_vrmaxswfstzx_vvvl #define _vel_vrmaxswlstzx_vvl __builtin_ve_vl_vrmaxswlstzx_vvl #define _vel_vrmaxswlstzx_vvvl __builtin_ve_vl_vrmaxswlstzx_vvvl #define _vel_vrminswfstsx_vvl __builtin_ve_vl_vrminswfstsx_vvl #define _vel_vrminswfstsx_vvvl __builtin_ve_vl_vrminswfstsx_vvvl #define _vel_vrminswlstsx_vvl __builtin_ve_vl_vrminswlstsx_vvl #define _vel_vrminswlstsx_vvvl __builtin_ve_vl_vrminswlstsx_vvvl #define _vel_vrminswfstzx_vvl __builtin_ve_vl_vrminswfstzx_vvl #define _vel_vrminswfstzx_vvvl __builtin_ve_vl_vrminswfstzx_vvvl #define _vel_vrminswlstzx_vvl __builtin_ve_vl_vrminswlstzx_vvl #define _vel_vrminswlstzx_vvvl __builtin_ve_vl_vrminswlstzx_vvvl #define _vel_vrmaxslfst_vvl __builtin_ve_vl_vrmaxslfst_vvl #define _vel_vrmaxslfst_vvvl __builtin_ve_vl_vrmaxslfst_vvvl #define _vel_vrmaxsllst_vvl __builtin_ve_vl_vrmaxsllst_vvl #define _vel_vrmaxsllst_vvvl __builtin_ve_vl_vrmaxsllst_vvvl #define _vel_vrminslfst_vvl __builtin_ve_vl_vrminslfst_vvl #define _vel_vrminslfst_vvvl __builtin_ve_vl_vrminslfst_vvvl #define _vel_vrminsllst_vvl __builtin_ve_vl_vrminsllst_vvl #define _vel_vrminsllst_vvvl __builtin_ve_vl_vrminsllst_vvvl #define _vel_vfrmaxdfst_vvl __builtin_ve_vl_vfrmaxdfst_vvl #define _vel_vfrmaxdfst_vvvl __builtin_ve_vl_vfrmaxdfst_vvvl #define _vel_vfrmaxdlst_vvl __builtin_ve_vl_vfrmaxdlst_vvl #define _vel_vfrmaxdlst_vvvl __builtin_ve_vl_vfrmaxdlst_vvvl #define _vel_vfrmaxsfst_vvl __builtin_ve_vl_vfrmaxsfst_vvl #define _vel_vfrmaxsfst_vvvl __builtin_ve_vl_vfrmaxsfst_vvvl #define _vel_vfrmaxslst_vvl __builtin_ve_vl_vfrmaxslst_vvl #define _vel_vfrmaxslst_vvvl __builtin_ve_vl_vfrmaxslst_vvvl #define _vel_vfrmindfst_vvl __builtin_ve_vl_vfrmindfst_vvl #define _vel_vfrmindfst_vvvl __builtin_ve_vl_vfrmindfst_vvvl #define _vel_vfrmindlst_vvl __builtin_ve_vl_vfrmindlst_vvl #define _vel_vfrmindlst_vvvl __builtin_ve_vl_vfrmindlst_vvvl #define _vel_vfrminsfst_vvl __builtin_ve_vl_vfrminsfst_vvl #define _vel_vfrminsfst_vvvl __builtin_ve_vl_vfrminsfst_vvvl #define _vel_vfrminslst_vvl __builtin_ve_vl_vfrminslst_vvl #define _vel_vfrminslst_vvvl __builtin_ve_vl_vfrminslst_vvvl #define _vel_vrand_vvl __builtin_ve_vl_vrand_vvl #define _vel_vrand_vvml __builtin_ve_vl_vrand_vvml #define _vel_vror_vvl __builtin_ve_vl_vror_vvl #define _vel_vror_vvml __builtin_ve_vl_vror_vvml #define _vel_vrxor_vvl __builtin_ve_vl_vrxor_vvl #define _vel_vrxor_vvml __builtin_ve_vl_vrxor_vvml #define _vel_vgt_vvssl __builtin_ve_vl_vgt_vvssl #define _vel_vgt_vvssvl __builtin_ve_vl_vgt_vvssvl #define _vel_vgt_vvssml __builtin_ve_vl_vgt_vvssml #define _vel_vgt_vvssmvl __builtin_ve_vl_vgt_vvssmvl #define _vel_vgtnc_vvssl __builtin_ve_vl_vgtnc_vvssl #define _vel_vgtnc_vvssvl __builtin_ve_vl_vgtnc_vvssvl #define _vel_vgtnc_vvssml __builtin_ve_vl_vgtnc_vvssml #define _vel_vgtnc_vvssmvl __builtin_ve_vl_vgtnc_vvssmvl #define _vel_vgtu_vvssl __builtin_ve_vl_vgtu_vvssl #define _vel_vgtu_vvssvl __builtin_ve_vl_vgtu_vvssvl #define _vel_vgtu_vvssml __builtin_ve_vl_vgtu_vvssml #define _vel_vgtu_vvssmvl __builtin_ve_vl_vgtu_vvssmvl #define _vel_vgtunc_vvssl __builtin_ve_vl_vgtunc_vvssl #define _vel_vgtunc_vvssvl __builtin_ve_vl_vgtunc_vvssvl #define _vel_vgtunc_vvssml __builtin_ve_vl_vgtunc_vvssml #define _vel_vgtunc_vvssmvl __builtin_ve_vl_vgtunc_vvssmvl #define _vel_vgtlsx_vvssl __builtin_ve_vl_vgtlsx_vvssl #define _vel_vgtlsx_vvssvl __builtin_ve_vl_vgtlsx_vvssvl #define _vel_vgtlsx_vvssml __builtin_ve_vl_vgtlsx_vvssml #define _vel_vgtlsx_vvssmvl __builtin_ve_vl_vgtlsx_vvssmvl #define _vel_vgtlsxnc_vvssl __builtin_ve_vl_vgtlsxnc_vvssl #define _vel_vgtlsxnc_vvssvl __builtin_ve_vl_vgtlsxnc_vvssvl #define _vel_vgtlsxnc_vvssml __builtin_ve_vl_vgtlsxnc_vvssml #define _vel_vgtlsxnc_vvssmvl __builtin_ve_vl_vgtlsxnc_vvssmvl #define _vel_vgtlzx_vvssl __builtin_ve_vl_vgtlzx_vvssl #define _vel_vgtlzx_vvssvl __builtin_ve_vl_vgtlzx_vvssvl #define _vel_vgtlzx_vvssml __builtin_ve_vl_vgtlzx_vvssml #define _vel_vgtlzx_vvssmvl __builtin_ve_vl_vgtlzx_vvssmvl #define _vel_vgtlzxnc_vvssl __builtin_ve_vl_vgtlzxnc_vvssl #define _vel_vgtlzxnc_vvssvl __builtin_ve_vl_vgtlzxnc_vvssvl #define _vel_vgtlzxnc_vvssml __builtin_ve_vl_vgtlzxnc_vvssml #define _vel_vgtlzxnc_vvssmvl __builtin_ve_vl_vgtlzxnc_vvssmvl #define _vel_vsc_vvssl __builtin_ve_vl_vsc_vvssl #define _vel_vsc_vvssml __builtin_ve_vl_vsc_vvssml #define _vel_vscnc_vvssl __builtin_ve_vl_vscnc_vvssl #define _vel_vscnc_vvssml __builtin_ve_vl_vscnc_vvssml #define _vel_vscot_vvssl __builtin_ve_vl_vscot_vvssl #define _vel_vscot_vvssml __builtin_ve_vl_vscot_vvssml #define _vel_vscncot_vvssl __builtin_ve_vl_vscncot_vvssl #define _vel_vscncot_vvssml __builtin_ve_vl_vscncot_vvssml #define _vel_vscu_vvssl __builtin_ve_vl_vscu_vvssl #define _vel_vscu_vvssml __builtin_ve_vl_vscu_vvssml #define _vel_vscunc_vvssl __builtin_ve_vl_vscunc_vvssl #define _vel_vscunc_vvssml __builtin_ve_vl_vscunc_vvssml #define _vel_vscuot_vvssl __builtin_ve_vl_vscuot_vvssl #define _vel_vscuot_vvssml __builtin_ve_vl_vscuot_vvssml #define _vel_vscuncot_vvssl __builtin_ve_vl_vscuncot_vvssl #define _vel_vscuncot_vvssml __builtin_ve_vl_vscuncot_vvssml #define _vel_vscl_vvssl __builtin_ve_vl_vscl_vvssl #define _vel_vscl_vvssml __builtin_ve_vl_vscl_vvssml #define _vel_vsclnc_vvssl __builtin_ve_vl_vsclnc_vvssl #define _vel_vsclnc_vvssml __builtin_ve_vl_vsclnc_vvssml #define _vel_vsclot_vvssl __builtin_ve_vl_vsclot_vvssl #define _vel_vsclot_vvssml __builtin_ve_vl_vsclot_vvssml #define _vel_vsclncot_vvssl __builtin_ve_vl_vsclncot_vvssl #define _vel_vsclncot_vvssml __builtin_ve_vl_vsclncot_vvssml #define _vel_andm_mmm __builtin_ve_vl_andm_mmm #define _vel_andm_MMM __builtin_ve_vl_andm_MMM #define _vel_orm_mmm __builtin_ve_vl_orm_mmm #define _vel_orm_MMM __builtin_ve_vl_orm_MMM #define _vel_xorm_mmm __builtin_ve_vl_xorm_mmm #define _vel_xorm_MMM __builtin_ve_vl_xorm_MMM #define _vel_eqvm_mmm __builtin_ve_vl_eqvm_mmm #define _vel_eqvm_MMM __builtin_ve_vl_eqvm_MMM #define _vel_nndm_mmm __builtin_ve_vl_nndm_mmm #define _vel_nndm_MMM __builtin_ve_vl_nndm_MMM #define _vel_negm_mm __builtin_ve_vl_negm_mm #define _vel_negm_MM __builtin_ve_vl_negm_MM #define _vel_pcvm_sml __builtin_ve_vl_pcvm_sml #define _vel_lzvm_sml __builtin_ve_vl_lzvm_sml #define _vel_tovm_sml __builtin_ve_vl_tovm_sml #define _vel_lcr_sss __builtin_ve_vl_lcr_sss #define _vel_scr_sss __builtin_ve_vl_scr_sss #define _vel_tscr_ssss __builtin_ve_vl_tscr_ssss #define _vel_fidcr_sss __builtin_ve_vl_fidcr_sss #define _vel_fencei __builtin_ve_vl_fencei #define _vel_fencem_s __builtin_ve_vl_fencem_s #define _vel_fencec_s __builtin_ve_vl_fencec_s #define _vel_svob __builtin_ve_vl_svob wbnoinvdintrin.hwmmintrin.h//===-- sanitizer/hwasan_interface.h ----------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file is a part of HWAddressSanitizer. // // Public interface header. //===----------------------------------------------------------------------===// #ifndef SANITIZER_HWASAN_INTERFACE_H #define SANITIZER_HWASAN_INTERFACE_H #include #ifdef __cplusplus extern "C" { #endif // Libc hook for program startup in statically linked executables. // Initializes enough of the runtime to run instrumented code. This function // should only be called in statically linked executables because it modifies // the GOT, which won't work in regular binaries because RELRO will already // have been applied by the time the function is called. This also means that // the function should be called before libc applies RELRO. // Does not call libc unless there is an error. // Can be called multiple times. void SANITIZER_CDECL __hwasan_init_static(void); // This function may be optionally provided by user and should return // a string containing HWASan runtime options. See asan_flags.h for details. const char *SANITIZER_CDECL __hwasan_default_options(void); void SANITIZER_CDECL __hwasan_enable_allocator_tagging(void); void SANITIZER_CDECL __hwasan_disable_allocator_tagging(void); // Mark region of memory with the given tag. Both address and size need to be // 16-byte aligned. void SANITIZER_CDECL __hwasan_tag_memory(const volatile void *p, unsigned char tag, size_t size); /// Set pointer tag. Previous tag is lost. void *SANITIZER_CDECL __hwasan_tag_pointer(const volatile void *p, unsigned char tag); /// Get tag from the pointer. unsigned char SANITIZER_CDECL __hwasan_get_tag_from_pointer(const volatile void *p); // Set memory tag from the current SP address to the given address to zero. // This is meant to annotate longjmp and other non-local jumps. // This function needs to know the (almost) exact destination frame address; // clearing shadow for the entire thread stack like __asan_handle_no_return // does would cause false reports. void SANITIZER_CDECL __hwasan_handle_longjmp(const void *sp_dst); // Set memory tag for the part of the current thread stack below sp_dst to // zero. Call this in vfork() before returning in the parent process. void SANITIZER_CDECL __hwasan_handle_vfork(const void *sp_dst); // Libc hook for thread creation. Should be called in the child thread before // any instrumented code. void SANITIZER_CDECL __hwasan_thread_enter(); // Libc hook for thread destruction. No instrumented code should run after // this call. void SANITIZER_CDECL __hwasan_thread_exit(); // Print shadow and origin for the memory range to stderr in a human-readable // format. void SANITIZER_CDECL __hwasan_print_shadow(const volatile void *x, size_t size); // Print one-line report about the memory usage of the current process. void SANITIZER_CDECL __hwasan_print_memory_usage(); /* Returns the offset of the first byte in the memory range that can not be * accessed through the pointer in x, or -1 if the whole range is good. */ intptr_t SANITIZER_CDECL __hwasan_test_shadow(const volatile void *x, size_t size); /* Sets the callback function to be called during HWASan error reporting. */ void SANITIZER_CDECL __hwasan_set_error_report_callback(void (*callback)(const char *)); int SANITIZER_CDECL __sanitizer_posix_memalign(void **memptr, size_t alignment, size_t size); void *SANITIZER_CDECL __sanitizer_memalign(size_t alignment, size_t size); void *SANITIZER_CDECL __sanitizer_aligned_alloc(size_t alignment, size_t size); void *SANITIZER_CDECL __sanitizer___libc_memalign(size_t alignment, size_t size); void *SANITIZER_CDECL __sanitizer_valloc(size_t size); void *SANITIZER_CDECL __sanitizer_pvalloc(size_t size); void SANITIZER_CDECL __sanitizer_free(void *ptr); void SANITIZER_CDECL __sanitizer_cfree(void *ptr); size_t SANITIZER_CDECL __sanitizer_malloc_usable_size(const void *ptr); struct mallinfo SANITIZER_CDECL __sanitizer_mallinfo(); int SANITIZER_CDECL __sanitizer_mallopt(int cmd, int value); void SANITIZER_CDECL __sanitizer_malloc_stats(void); void *SANITIZER_CDECL __sanitizer_calloc(size_t nmemb, size_t size); void *SANITIZER_CDECL __sanitizer_realloc(void *ptr, size_t size); void *SANITIZER_CDECL __sanitizer_reallocarray(void *ptr, size_t nmemb, size_t size); void *SANITIZER_CDECL __sanitizer_malloc(size_t size); #ifdef __cplusplus } // extern "C" #endif #endif // SANITIZER_HWASAN_INTERFACE_H Miss fraction part in number.Unspecific syntax error.\\jsontext size: [:blank:]external/regex-re2/re2/compile.cccapture %d -> %d\%03ounhandled DoCoalesce failed: r1->op() is Caucasian_AlbanianCherokeeNoPsalter_PahlaviSyloti_Nagrip.p_type == PT_NULL%s: too many interesting LOAD segments: %zu >= %zunothing in arena to freeprev < nextsDu&&&stringfn != &InitAndGetCPU=================================== w->waitp->cond == nullptrthread should hold at least a read lock on Mutex %p %ss->waitp == nullptr || s->waitp == waitp || s->suppress_fatal_errorsclock_gettime(c, &now) == 0-00zoneinfo/tzif2/TLS13-KDF failed. CRYPTOCOMPmalloc failureexternal/boringssl/src/crypto/evp/p_hkdf.ckythe.proto.BuildDetails.rule_typekythe.proto.common.CorpusPath.rootkythe.proto.ContextDependentVersion.Row.source_contextkythe.proto.VName.corpusCould not parse vname generator configuration: : mapped to MSMode/*===-- __clang_cuda_complex_builtins - CUDA impls of runtime complex fns ---=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __CLANG_CUDA_COMPLEX_BUILTINS #define __CLANG_CUDA_COMPLEX_BUILTINS // This header defines __muldc3, __mulsc3, __divdc3, and __divsc3. These are // libgcc functions that clang assumes are available when compiling c99 complex // operations. (These implementations come from libc++, and have been modified // to work with CUDA and OpenMP target offloading [in C and C++ mode].) #pragma push_macro("__DEVICE__") #if defined(__OPENMP_NVPTX__) || defined(__OPENMP_AMDGCN__) #pragma omp declare target #define __DEVICE__ __attribute__((noinline, nothrow, cold, weak)) #else #define __DEVICE__ __device__ inline #endif // To make the algorithms available for C and C++ in CUDA and OpenMP we select // different but equivalent function versions. TODO: For OpenMP we currently // select the native builtins as the overload support for templates is lacking. #if !defined(__OPENMP_NVPTX__) && !defined(__OPENMP_AMDGCN__) #define _ISNANd std::isnan #define _ISNANf std::isnan #define _ISINFd std::isinf #define _ISINFf std::isinf #define _ISFINITEd std::isfinite #define _ISFINITEf std::isfinite #define _COPYSIGNd std::copysign #define _COPYSIGNf std::copysign #define _SCALBNd std::scalbn #define _SCALBNf std::scalbn #define _ABSd std::abs #define _ABSf std::abs #define _LOGBd std::logb #define _LOGBf std::logb // Rather than pulling in std::max from algorithm everytime, use available ::max. #define _fmaxd max #define _fmaxf max #else #ifdef __AMDGCN__ #define _ISNANd __ocml_isnan_f64 #define _ISNANf __ocml_isnan_f32 #define _ISINFd __ocml_isinf_f64 #define _ISINFf __ocml_isinf_f32 #define _ISFINITEd __ocml_isfinite_f64 #define _ISFINITEf __ocml_isfinite_f32 #define _COPYSIGNd __ocml_copysign_f64 #define _COPYSIGNf __ocml_copysign_f32 #define _SCALBNd __ocml_scalbn_f64 #define _SCALBNf __ocml_scalbn_f32 #define _ABSd __ocml_fabs_f64 #define _ABSf __ocml_fabs_f32 #define _LOGBd __ocml_logb_f64 #define _LOGBf __ocml_logb_f32 #define _fmaxd __ocml_fmax_f64 #define _fmaxf __ocml_fmax_f32 #else #define _ISNANd __nv_isnand #define _ISNANf __nv_isnanf #define _ISINFd __nv_isinfd #define _ISINFf __nv_isinff #define _ISFINITEd __nv_isfinited #define _ISFINITEf __nv_finitef #define _COPYSIGNd __nv_copysign #define _COPYSIGNf __nv_copysignf #define _SCALBNd __nv_scalbn #define _SCALBNf __nv_scalbnf #define _ABSd __nv_fabs #define _ABSf __nv_fabsf #define _LOGBd __nv_logb #define _LOGBf __nv_logbf #define _fmaxd __nv_fmax #define _fmaxf __nv_fmaxf #endif #endif #if defined(__cplusplus) extern "C" { #endif __DEVICE__ double _Complex __muldc3(double __a, double __b, double __c, double __d) { double __ac = __a * __c; double __bd = __b * __d; double __ad = __a * __d; double __bc = __b * __c; double _Complex z; __real__(z) = __ac - __bd; __imag__(z) = __ad + __bc; if (_ISNANd(__real__(z)) && _ISNANd(__imag__(z))) { int __recalc = 0; if (_ISINFd(__a) || _ISINFd(__b)) { __a = _COPYSIGNd(_ISINFd(__a) ? 1 : 0, __a); __b = _COPYSIGNd(_ISINFd(__b) ? 1 : 0, __b); if (_ISNANd(__c)) __c = _COPYSIGNd(0, __c); if (_ISNANd(__d)) __d = _COPYSIGNd(0, __d); __recalc = 1; } if (_ISINFd(__c) || _ISINFd(__d)) { __c = _COPYSIGNd(_ISINFd(__c) ? 1 : 0, __c); __d = _COPYSIGNd(_ISINFd(__d) ? 1 : 0, __d); if (_ISNANd(__a)) __a = _COPYSIGNd(0, __a); if (_ISNANd(__b)) __b = _COPYSIGNd(0, __b); __recalc = 1; } if (!__recalc && (_ISINFd(__ac) || _ISINFd(__bd) || _ISINFd(__ad) || _ISINFd(__bc))) { if (_ISNANd(__a)) __a = _COPYSIGNd(0, __a); if (_ISNANd(__b)) __b = _COPYSIGNd(0, __b); if (_ISNANd(__c)) __c = _COPYSIGNd(0, __c); if (_ISNANd(__d)) __d = _COPYSIGNd(0, __d); __recalc = 1; } if (__recalc) { // Can't use std::numeric_limits::infinity() -- that doesn't have // a device overload (and isn't constexpr before C++11, naturally). __real__(z) = __builtin_huge_val() * (__a * __c - __b * __d); __imag__(z) = __builtin_huge_val() * (__a * __d + __b * __c); } } return z; } __DEVICE__ float _Complex __mulsc3(float __a, float __b, float __c, float __d) { float __ac = __a * __c; float __bd = __b * __d; float __ad = __a * __d; float __bc = __b * __c; float _Complex z; __real__(z) = __ac - __bd; __imag__(z) = __ad + __bc; if (_ISNANf(__real__(z)) && _ISNANf(__imag__(z))) { int __recalc = 0; if (_ISINFf(__a) || _ISINFf(__b)) { __a = _COPYSIGNf(_ISINFf(__a) ? 1 : 0, __a); __b = _COPYSIGNf(_ISINFf(__b) ? 1 : 0, __b); if (_ISNANf(__c)) __c = _COPYSIGNf(0, __c); if (_ISNANf(__d)) __d = _COPYSIGNf(0, __d); __recalc = 1; } if (_ISINFf(__c) || _ISINFf(__d)) { __c = _COPYSIGNf(_ISINFf(__c) ? 1 : 0, __c); __d = _COPYSIGNf(_ISINFf(__d) ? 1 : 0, __d); if (_ISNANf(__a)) __a = _COPYSIGNf(0, __a); if (_ISNANf(__b)) __b = _COPYSIGNf(0, __b); __recalc = 1; } if (!__recalc && (_ISINFf(__ac) || _ISINFf(__bd) || _ISINFf(__ad) || _ISINFf(__bc))) { if (_ISNANf(__a)) __a = _COPYSIGNf(0, __a); if (_ISNANf(__b)) __b = _COPYSIGNf(0, __b); if (_ISNANf(__c)) __c = _COPYSIGNf(0, __c); if (_ISNANf(__d)) __d = _COPYSIGNf(0, __d); __recalc = 1; } if (__recalc) { __real__(z) = __builtin_huge_valf() * (__a * __c - __b * __d); __imag__(z) = __builtin_huge_valf() * (__a * __d + __b * __c); } } return z; } __DEVICE__ double _Complex __divdc3(double __a, double __b, double __c, double __d) { int __ilogbw = 0; // Can't use std::max, because that's defined in , and we don't // want to pull that in for every compile. The CUDA headers define // ::max(float, float) and ::max(double, double), which is sufficient for us. double __logbw = _LOGBd(_fmaxd(_ABSd(__c), _ABSd(__d))); if (_ISFINITEd(__logbw)) { __ilogbw = (int)__logbw; __c = _SCALBNd(__c, -__ilogbw); __d = _SCALBNd(__d, -__ilogbw); } double __denom = __c * __c + __d * __d; double _Complex z; __real__(z) = _SCALBNd((__a * __c + __b * __d) / __denom, -__ilogbw); __imag__(z) = _SCALBNd((__b * __c - __a * __d) / __denom, -__ilogbw); if (_ISNANd(__real__(z)) && _ISNANd(__imag__(z))) { if ((__denom == 0.0) && (!_ISNANd(__a) || !_ISNANd(__b))) { __real__(z) = _COPYSIGNd(__builtin_huge_val(), __c) * __a; __imag__(z) = _COPYSIGNd(__builtin_huge_val(), __c) * __b; } else if ((_ISINFd(__a) || _ISINFd(__b)) && _ISFINITEd(__c) && _ISFINITEd(__d)) { __a = _COPYSIGNd(_ISINFd(__a) ? 1.0 : 0.0, __a); __b = _COPYSIGNd(_ISINFd(__b) ? 1.0 : 0.0, __b); __real__(z) = __builtin_huge_val() * (__a * __c + __b * __d); __imag__(z) = __builtin_huge_val() * (__b * __c - __a * __d); } else if (_ISINFd(__logbw) && __logbw > 0.0 && _ISFINITEd(__a) && _ISFINITEd(__b)) { __c = _COPYSIGNd(_ISINFd(__c) ? 1.0 : 0.0, __c); __d = _COPYSIGNd(_ISINFd(__d) ? 1.0 : 0.0, __d); __real__(z) = 0.0 * (__a * __c + __b * __d); __imag__(z) = 0.0 * (__b * __c - __a * __d); } } return z; } __DEVICE__ float _Complex __divsc3(float __a, float __b, float __c, float __d) { int __ilogbw = 0; float __logbw = _LOGBf(_fmaxf(_ABSf(__c), _ABSf(__d))); if (_ISFINITEf(__logbw)) { __ilogbw = (int)__logbw; __c = _SCALBNf(__c, -__ilogbw); __d = _SCALBNf(__d, -__ilogbw); } float __denom = __c * __c + __d * __d; float _Complex z; __real__(z) = _SCALBNf((__a * __c + __b * __d) / __denom, -__ilogbw); __imag__(z) = _SCALBNf((__b * __c - __a * __d) / __denom, -__ilogbw); if (_ISNANf(__real__(z)) && _ISNANf(__imag__(z))) { if ((__denom == 0) && (!_ISNANf(__a) || !_ISNANf(__b))) { __real__(z) = _COPYSIGNf(__builtin_huge_valf(), __c) * __a; __imag__(z) = _COPYSIGNf(__builtin_huge_valf(), __c) * __b; } else if ((_ISINFf(__a) || _ISINFf(__b)) && _ISFINITEf(__c) && _ISFINITEf(__d)) { __a = _COPYSIGNf(_ISINFf(__a) ? 1 : 0, __a); __b = _COPYSIGNf(_ISINFf(__b) ? 1 : 0, __b); __real__(z) = __builtin_huge_valf() * (__a * __c + __b * __d); __imag__(z) = __builtin_huge_valf() * (__b * __c - __a * __d); } else if (_ISINFf(__logbw) && __logbw > 0 && _ISFINITEf(__a) && _ISFINITEf(__b)) { __c = _COPYSIGNf(_ISINFf(__c) ? 1 : 0, __c); __d = _COPYSIGNf(_ISINFf(__d) ? 1 : 0, __d); __real__(z) = 0 * (__a * __c + __b * __d); __imag__(z) = 0 * (__b * __c - __a * __d); } } return z; } #if defined(__cplusplus) } // extern "C" #endif #undef _ISNANd #undef _ISNANf #undef _ISINFd #undef _ISINFf #undef _COPYSIGNd #undef _COPYSIGNf #undef _ISFINITEd #undef _ISFINITEf #undef _SCALBNd #undef _SCALBNf #undef _ABSd #undef _ABSf #undef _LOGBd #undef _LOGBf #undef _fmaxd #undef _fmaxf #if defined(__OPENMP_NVPTX__) || defined(__OPENMP_AMDGCN__) #pragma omp end declare target #endif #pragma pop_macro("__DEVICE__") #endif // __CLANG_CUDA_COMPLEX_BUILTINS /*===---- adxintrin.h - ADX intrinsics -------------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __ADXINTRIN_H #define __ADXINTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("adx"))) /* Use C++ inline semantics in C++, GNU inline for C mode. */ #if defined(__cplusplus) #define __INLINE __inline #else #define __INLINE static __inline #endif #if defined(__cplusplus) extern "C" { #endif /* Intrinsics that are available only if __ADX__ is defined. */ /// Adds unsigned 32-bit integers \a __x and \a __y, plus 0 or 1 as indicated /// by the carry flag \a __cf. Stores the unsigned 32-bit sum in the memory /// at \a __p, and returns the 8-bit carry-out (carry flag). /// /// \code{.operation} /// temp := (__cf == 0) ? 0 : 1 /// Store32(__p, __x + __y + temp) /// result := CF /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c ADCX instruction. /// /// \param __cf /// The 8-bit unsigned carry flag; any non-zero value indicates carry. /// \param __x /// A 32-bit unsigned addend. /// \param __y /// A 32-bit unsigned addend. /// \param __p /// Pointer to memory for storing the sum. /// \returns The 8-bit unsigned carry-out value. __INLINE unsigned char __DEFAULT_FN_ATTRS _addcarryx_u32(unsigned char __cf, unsigned int __x, unsigned int __y, unsigned int *__p) { return __builtin_ia32_addcarryx_u32(__cf, __x, __y, __p); } #ifdef __x86_64__ /// Adds unsigned 64-bit integers \a __x and \a __y, plus 0 or 1 as indicated /// by the carry flag \a __cf. Stores the unsigned 64-bit sum in the memory /// at \a __p, and returns the 8-bit carry-out (carry flag). /// /// \code{.operation} /// temp := (__cf == 0) ? 0 : 1 /// Store64(__p, __x + __y + temp) /// result := CF /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c ADCX instruction. /// /// \param __cf /// The 8-bit unsigned carry flag; any non-zero value indicates carry. /// \param __x /// A 64-bit unsigned addend. /// \param __y /// A 64-bit unsigned addend. /// \param __p /// Pointer to memory for storing the sum. /// \returns The 8-bit unsigned carry-out value. __INLINE unsigned char __DEFAULT_FN_ATTRS _addcarryx_u64(unsigned char __cf, unsigned long long __x, unsigned long long __y, unsigned long long *__p) { return __builtin_ia32_addcarryx_u64(__cf, __x, __y, __p); } #endif #if defined(__cplusplus) } #endif #undef __INLINE #undef __DEFAULT_FN_ATTRS #endif /* __ADXINTRIN_H */ amxcomplexintrin.h/*===------------- avx512pfintrin.h - PF intrinsics ------------------------=== * * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __AVX512PFINTRIN_H #define __AVX512PFINTRIN_H #define _mm512_mask_prefetch_i32gather_pd(index, mask, addr, scale, hint) \ __builtin_ia32_gatherpfdpd((__mmask8)(mask), (__v8si)(__m256i)(index), \ (void const *)(addr), (int)(scale), \ (int)(hint)) #define _mm512_prefetch_i32gather_pd(index, addr, scale, hint) \ __builtin_ia32_gatherpfdpd((__mmask8) -1, (__v8si)(__m256i)(index), \ (void const *)(addr), (int)(scale), \ (int)(hint)) #define _mm512_mask_prefetch_i32gather_ps(index, mask, addr, scale, hint) \ __builtin_ia32_gatherpfdps((__mmask16)(mask), \ (__v16si)(__m512i)(index), (void const *)(addr), \ (int)(scale), (int)(hint)) #define _mm512_prefetch_i32gather_ps(index, addr, scale, hint) \ __builtin_ia32_gatherpfdps((__mmask16) -1, \ (__v16si)(__m512i)(index), (void const *)(addr), \ (int)(scale), (int)(hint)) #define _mm512_mask_prefetch_i64gather_pd(index, mask, addr, scale, hint) \ __builtin_ia32_gatherpfqpd((__mmask8)(mask), (__v8di)(__m512i)(index), \ (void const *)(addr), (int)(scale), \ (int)(hint)) #define _mm512_prefetch_i64gather_pd(index, addr, scale, hint) \ __builtin_ia32_gatherpfqpd((__mmask8) -1, (__v8di)(__m512i)(index), \ (void const *)(addr), (int)(scale), \ (int)(hint)) #define _mm512_mask_prefetch_i64gather_ps(index, mask, addr, scale, hint) \ __builtin_ia32_gatherpfqps((__mmask8)(mask), (__v8di)(__m512i)(index), \ (void const *)(addr), (int)(scale), (int)(hint)) #define _mm512_prefetch_i64gather_ps(index, addr, scale, hint) \ __builtin_ia32_gatherpfqps((__mmask8) -1, (__v8di)(__m512i)(index), \ (void const *)(addr), (int)(scale), (int)(hint)) #define _mm512_prefetch_i32scatter_pd(addr, index, scale, hint) \ __builtin_ia32_scatterpfdpd((__mmask8)-1, (__v8si)(__m256i)(index), \ (void *)(addr), (int)(scale), \ (int)(hint)) #define _mm512_mask_prefetch_i32scatter_pd(addr, mask, index, scale, hint) \ __builtin_ia32_scatterpfdpd((__mmask8)(mask), (__v8si)(__m256i)(index), \ (void *)(addr), (int)(scale), \ (int)(hint)) #define _mm512_prefetch_i32scatter_ps(addr, index, scale, hint) \ __builtin_ia32_scatterpfdps((__mmask16)-1, (__v16si)(__m512i)(index), \ (void *)(addr), (int)(scale), (int)(hint)) #define _mm512_mask_prefetch_i32scatter_ps(addr, mask, index, scale, hint) \ __builtin_ia32_scatterpfdps((__mmask16)(mask), \ (__v16si)(__m512i)(index), (void *)(addr), \ (int)(scale), (int)(hint)) #define _mm512_prefetch_i64scatter_pd(addr, index, scale, hint) \ __builtin_ia32_scatterpfqpd((__mmask8)-1, (__v8di)(__m512i)(index), \ (void *)(addr), (int)(scale), \ (int)(hint)) #define _mm512_mask_prefetch_i64scatter_pd(addr, mask, index, scale, hint) \ __builtin_ia32_scatterpfqpd((__mmask8)(mask), (__v8di)(__m512i)(index), \ (void *)(addr), (int)(scale), \ (int)(hint)) #define _mm512_prefetch_i64scatter_ps(addr, index, scale, hint) \ __builtin_ia32_scatterpfqps((__mmask8)-1, (__v8di)(__m512i)(index), \ (void *)(addr), (int)(scale), (int)(hint)) #define _mm512_mask_prefetch_i64scatter_ps(addr, mask, index, scale, hint) \ __builtin_ia32_scatterpfqps((__mmask8)(mask), (__v8di)(__m512i)(index), \ (void *)(addr), (int)(scale), (int)(hint)) #endif clwbintrin.hhtmxlintrin.h/*===---- prfchiintrin.h - PREFETCHI intrinsic -----------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __PRFCHIINTRIN_H #define __PRFCHIINTRIN_H #ifdef __x86_64__ /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("prefetchi"))) /// Loads an instruction sequence containing the specified memory address into /// all level cache. /// /// Note that the effect of this intrinsic is dependent on the processor /// implementation. /// /// \headerfile /// /// This intrinsic corresponds to the \c PREFETCHIT0 instruction. /// /// \param __P /// A pointer specifying the memory address to be prefetched. static __inline__ void __DEFAULT_FN_ATTRS _m_prefetchit0(volatile const void *__P) { #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wcast-qual" __builtin_ia32_prefetchi((const void *)__P, 3 /* _MM_HINT_T0 */); #pragma clang diagnostic pop } /// Loads an instruction sequence containing the specified memory address into /// all but the first-level cache. /// /// Note that the effect of this intrinsic is dependent on the processor /// implementation. /// /// \headerfile /// /// This intrinsic corresponds to the \c PREFETCHIT1 instruction. /// /// \param __P /// A pointer specifying the memory address to be prefetched. static __inline__ void __DEFAULT_FN_ATTRS _m_prefetchit1(volatile const void *__P) { #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wcast-qual" __builtin_ia32_prefetchi((const void *)__P, 2 /* _MM_HINT_T1 */); #pragma clang diagnostic pop } #endif /* __x86_64__ */ #undef __DEFAULT_FN_ATTRS #endif /* __PRFCHWINTRIN_H */ /*===--------------- sha512intrin.h - SHA512 intrinsics -----------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif // __IMMINTRIN_H #ifndef __SHA512INTRIN_H #define __SHA512INTRIN_H #define __DEFAULT_FN_ATTRS256 \ __attribute__((__always_inline__, __nodebug__, __target__("sha512"), \ __min_vector_width__(256))) /// This intrinisc is one of the two SHA512 message scheduling instructions. /// The intrinsic performs an intermediate calculation for the next four /// SHA512 message qwords. The calculated results are stored in \a dst. /// /// \headerfile /// /// \code /// __m256i _mm256_sha512msg1_epi64(__m256i __A, __m128i __B) /// \endcode /// /// This intrinsic corresponds to the \c VSHA512MSG1 instruction. /// /// \param __A /// A 256-bit vector of [4 x long long]. /// \param __B /// A 128-bit vector of [2 x long long]. /// \returns /// A 256-bit vector of [4 x long long]. /// /// \code{.operation} /// DEFINE ROR64(qword, n) { /// count := n % 64 /// dest := (qword >> count) | (qword << (64 - count)) /// RETURN dest /// } /// DEFINE SHR64(qword, n) { /// RETURN qword >> n /// } /// DEFINE s0(qword): /// RETURN ROR64(qword,1) ^ ROR64(qword, 8) ^ SHR64(qword, 7) /// } /// W[4] := __B.qword[0] /// W[3] := __A.qword[3] /// W[2] := __A.qword[2] /// W[1] := __A.qword[1] /// W[0] := __A.qword[0] /// dst.qword[3] := W[3] + s0(W[4]) /// dst.qword[2] := W[2] + s0(W[3]) /// dst.qword[1] := W[1] + s0(W[2]) /// dst.qword[0] := W[0] + s0(W[1]) /// dst[MAX:256] := 0 /// \endcode static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sha512msg1_epi64(__m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_vsha512msg1((__v4du)__A, (__v2du)__B); } /// This intrinisc is one of the two SHA512 message scheduling instructions. /// The intrinsic performs the final calculation for the next four SHA512 /// message qwords. The calculated results are stored in \a dst. /// /// \headerfile /// /// \code /// __m256i _mm256_sha512msg2_epi64(__m256i __A, __m256i __B) /// \endcode /// /// This intrinsic corresponds to the \c VSHA512MSG2 instruction. /// /// \param __A /// A 256-bit vector of [4 x long long]. /// \param __B /// A 256-bit vector of [4 x long long]. /// \returns /// A 256-bit vector of [4 x long long]. /// /// \code{.operation} /// DEFINE ROR64(qword, n) { /// count := n % 64 /// dest := (qword >> count) | (qword << (64 - count)) /// RETURN dest /// } /// DEFINE SHR64(qword, n) { /// RETURN qword >> n /// } /// DEFINE s1(qword) { /// RETURN ROR64(qword,19) ^ ROR64(qword, 61) ^ SHR64(qword, 6) /// } /// W[14] := __B.qword[2] /// W[15] := __B.qword[3] /// W[16] := __A.qword[0] + s1(W[14]) /// W[17] := __A.qword[1] + s1(W[15]) /// W[18] := __A.qword[2] + s1(W[16]) /// W[19] := __A.qword[3] + s1(W[17]) /// dst.qword[3] := W[19] /// dst.qword[2] := W[18] /// dst.qword[1] := W[17] /// dst.qword[0] := W[16] /// dst[MAX:256] := 0 /// \endcode static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sha512msg2_epi64(__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_vsha512msg2((__v4du)__A, (__v4du)__B); } /// This intrinisc performs two rounds of SHA512 operation using initial SHA512 /// state (C,D,G,H) from \a __A, an initial SHA512 state (A,B,E,F) from /// \a __A, and a pre-computed sum of the next two round message qwords and /// the corresponding round constants from \a __C (only the two lower qwords /// of the third operand). The updated SHA512 state (A,B,E,F) is written to /// \a __A, and \a __A can be used as the updated state (C,D,G,H) in later /// rounds. /// /// \headerfile /// /// \code /// __m256i _mm256_sha512rnds2_epi64(__m256i __A, __m256i __B, __m128i __C) /// \endcode /// /// This intrinsic corresponds to the \c VSHA512RNDS2 instruction. /// /// \param __A /// A 256-bit vector of [4 x long long]. /// \param __B /// A 256-bit vector of [4 x long long]. /// \param __C /// A 128-bit vector of [2 x long long]. /// \returns /// A 256-bit vector of [4 x long long]. /// /// \code{.operation} /// DEFINE ROR64(qword, n) { /// count := n % 64 /// dest := (qword >> count) | (qword << (64 - count)) /// RETURN dest /// } /// DEFINE SHR64(qword, n) { /// RETURN qword >> n /// } /// DEFINE cap_sigma0(qword) { /// RETURN ROR64(qword,28) ^ ROR64(qword, 34) ^ ROR64(qword, 39) /// } /// DEFINE cap_sigma1(qword) { /// RETURN ROR64(qword,14) ^ ROR64(qword, 18) ^ ROR64(qword, 41) /// } /// DEFINE MAJ(a,b,c) { /// RETURN (a & b) ^ (a & c) ^ (b & c) /// } /// DEFINE CH(e,f,g) { /// RETURN (e & f) ^ (g & ~e) /// } /// A[0] := __B.qword[3] /// B[0] := __B.qword[2] /// C[0] := __C.qword[3] /// D[0] := __C.qword[2] /// E[0] := __B.qword[1] /// F[0] := __B.qword[0] /// G[0] := __C.qword[1] /// H[0] := __C.qword[0] /// WK[0]:= __A.qword[0] /// WK[1]:= __A.qword[1] /// FOR i := 0 to 1: /// A[i+1] := CH(E[i], F[i], G[i]) + /// cap_sigma1(E[i]) + WK[i] + H[i] + /// MAJ(A[i], B[i], C[i]) + /// cap_sigma0(A[i]) /// B[i+1] := A[i] /// C[i+1] := B[i] /// D[i+1] := C[i] /// E[i+1] := CH(E[i], F[i], G[i]) + /// cap_sigma1(E[i]) + WK[i] + H[i] + D[i] /// F[i+1] := E[i] /// G[i+1] := F[i] /// H[i+1] := G[i] /// ENDFOR /// dst.qword[3] := A[2] /// dst.qword[2] := B[2] /// dst.qword[1] := E[2] /// dst.qword[0] := F[2] /// dst[MAX:256] := 0 /// \endcode static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sha512rnds2_epi64(__m256i __A, __m256i __B, __m128i __C) { return (__m256i)__builtin_ia32_vsha512rnds2((__v4du)__A, (__v4du)__B, (__v2du)__C); } #undef __DEFAULT_FN_ATTRS256 #endif // __SHA512INTRIN_H /*===---- stdckdint.h - Standard header for checking integer----------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __STDCKDINT_H #define __STDCKDINT_H /* If we're hosted, fall back to the system's stdckdint.h. FreeBSD, for * example, already has a Clang-compatible stdckdint.h header. * * The `stdckdint.h` header requires C 23 or newer. */ #if __STDC_HOSTED__ && __has_include_next() #include_next #else /* C23 7.20.1 Defines several macros for performing checked integer arithmetic*/ #define __STDC_VERSION_STDCKDINT_H__ 202311L // Both A and B shall be any integer type other than "plain" char, bool, a bit- // precise integer type, or an enumerated type, and they need not be the same. // R shall be a modifiable lvalue of any integer type other than "plain" char, // bool, a bit-precise integer type, or an enumerated type. It shouldn't be // short type, either. Otherwise, it may be unable to hold two the result of // operating two 'int's. // A diagnostic message will be produced if A or B are not suitable integer // types, or if R is not a modifiable lvalue of a suitable integer type or R // is short type. #define ckd_add(R, A, B) __builtin_add_overflow((A), (B), (R)) #define ckd_sub(R, A, B) __builtin_sub_overflow((A), (B), (R)) #define ckd_mul(R, A, B) __builtin_mul_overflow((A), (B), (R)) #endif /* __STDC_HOSTED__ */ #endif /* __STDCKDINT_H */ /*===---- wmmintrin.h - AES intrinsics ------------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __WMMINTRIN_H #define __WMMINTRIN_H #if !defined(__i386__) && !defined(__x86_64__) #error "This header is only meant to be used on x86 and x64 architecture" #endif #include #include <__wmmintrin_aes.h> #include <__wmmintrin_pclmul.h> #endif /* __WMMINTRIN_H */ /*===---- openmp_wrapper/math.h -------- OpenMP math.h intercept ------ c++ -=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ // If we are in C++ mode and include (not ) first, we still need // to make sure is read first. The problem otherwise is that we haven't // seen the declarations of the math.h functions when the system math.h includes // our cmath overlay. However, our cmath overlay, or better the underlying // overlay, e.g. CUDA, uses the math.h functions. Since we haven't declared them // yet we get errors. CUDA avoids this by eagerly declaring all math functions // (in the __device__ space) but we cannot do this. Instead we break the // dependence by forcing cmath to go first. While our cmath will in turn include // this file, the cmath guards will prevent recursion. #ifdef __cplusplus #include #endif #ifndef __CLANG_OPENMP_MATH_H__ #define __CLANG_OPENMP_MATH_H__ #ifndef _OPENMP #error "This file is for OpenMP compilation only." #endif #include_next // We need limits.h for __clang_cuda_math.h below and because it should not hurt // we include it eagerly here. #include // We need stdlib.h because (for now) __clang_cuda_math.h below declares `abs` // which should live in stdlib.h. #include #pragma omp begin declare variant match( \ device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)}) #define __CUDA__ #define __OPENMP_NVPTX__ #include <__clang_cuda_math.h> #undef __OPENMP_NVPTX__ #undef __CUDA__ #pragma omp end declare variant #ifdef __AMDGCN__ #pragma omp begin declare variant match(device = {arch(amdgcn)}) #define __OPENMP_AMDGCN__ #include <__clang_hip_math.h> #undef __OPENMP_AMDGCN__ #pragma omp end declare variant #endif #endif orc/c_api.h/*===---- mm_malloc.h - Implementation of _mm_malloc and _mm_free ----------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef _MM_MALLOC_H_INCLUDED #define _MM_MALLOC_H_INCLUDED #if defined(__powerpc64__) && \ (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) #include /* We can't depend on since the prototype of posix_memalign may not be visible. */ #ifndef __cplusplus extern int posix_memalign(void **, size_t, size_t); #else extern "C" int posix_memalign(void **, size_t, size_t); #endif static __inline void *_mm_malloc(size_t __size, size_t __alignment) { /* PowerPC64 ELF V2 ABI requires quadword alignment. */ size_t __vec_align = sizeof(__vector float); void *__ptr; if (__alignment < __vec_align) __alignment = __vec_align; if (posix_memalign(&__ptr, __alignment, __size) == 0) return __ptr; else return NULL; } static __inline void _mm_free(void *__ptr) { free(__ptr); } #else #include_next #endif #endif /* _MM_MALLOC_H_INCLUDED */ //===-- sanitizer/coverage_interface.h --------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // Public interface for sanitizer coverage. //===----------------------------------------------------------------------===// #ifndef SANITIZER_COVERAG_INTERFACE_H #define SANITIZER_COVERAG_INTERFACE_H #include #ifdef __cplusplus extern "C" { #endif // Record and dump coverage info. void SANITIZER_CDECL __sanitizer_cov_dump(void); // Clear collected coverage info. void SANITIZER_CDECL __sanitizer_cov_reset(void); // Dump collected coverage info. Sorts pcs by module into individual .sancov // files. void SANITIZER_CDECL __sanitizer_dump_coverage(const uintptr_t *pcs, uintptr_t len); #ifdef __cplusplus } // extern "C" #endif #endif // SANITIZER_COVERAG_INTERFACE_H (fseek(handle, 0, 2)=Number too big to be stored in double.SearchOnePass inconsistency...Concat of Stack not empty., arg missing ]CaptureNamesWalker::ShortVisit called[GurmukhiModiSiddhamSinhalaTai_ThamTangutexternal/abseil-cpp/absl/debugging/failure_signal_handler.ccreinterpret_cast(region) % arena->pagesize == 0syscall(SYS_rt_sigprocmask, ~0, addr, nullptr, 8) == -1decimal128-=*=eO!ooDEADLINE_EXCEEDEDABORTED, start = condition untrue on return from Awaitnot waiting when should befile:external/boringssl/src/crypto/fipsmodule/bn/sqrt.c%s: getrandom indicates that the entropy pool has not been initialized. Rather than continue with poor entropy, this process will block until entropy is available. RSA-sign KATlib(%u)PKCS8CIPHER_LIBkythe.proto.AnalysisRequest.build_idkythe.proto.VName.rootkythe.proto.VName.language^(GNUmodeSysroot set to )/*===--- __clang_cuda_intrinsics.h - Device-side CUDA intrinsic wrappers ---=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __CLANG_CUDA_INTRINSICS_H__ #define __CLANG_CUDA_INTRINSICS_H__ #ifndef __CUDA__ #error "This file is for CUDA compilation only." #endif // sm_30 intrinsics: __shfl_{up,down,xor}. #define __SM_30_INTRINSICS_H__ #define __SM_30_INTRINSICS_HPP__ #if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300 #pragma push_macro("__MAKE_SHUFFLES") #define __MAKE_SHUFFLES(__FnName, __IntIntrinsic, __FloatIntrinsic, __Mask, \ __Type) \ inline __device__ int __FnName(int __val, __Type __offset, \ int __width = warpSize) { \ return __IntIntrinsic(__val, __offset, \ ((warpSize - __width) << 8) | (__Mask)); \ } \ inline __device__ float __FnName(float __val, __Type __offset, \ int __width = warpSize) { \ return __FloatIntrinsic(__val, __offset, \ ((warpSize - __width) << 8) | (__Mask)); \ } \ inline __device__ unsigned int __FnName(unsigned int __val, __Type __offset, \ int __width = warpSize) { \ return static_cast( \ ::__FnName(static_cast(__val), __offset, __width)); \ } \ inline __device__ long long __FnName(long long __val, __Type __offset, \ int __width = warpSize) { \ struct __Bits { \ int __a, __b; \ }; \ _Static_assert(sizeof(__val) == sizeof(__Bits)); \ _Static_assert(sizeof(__Bits) == 2 * sizeof(int)); \ __Bits __tmp; \ memcpy(&__tmp, &__val, sizeof(__val)); \ __tmp.__a = ::__FnName(__tmp.__a, __offset, __width); \ __tmp.__b = ::__FnName(__tmp.__b, __offset, __width); \ long long __ret; \ memcpy(&__ret, &__tmp, sizeof(__tmp)); \ return __ret; \ } \ inline __device__ long __FnName(long __val, __Type __offset, \ int __width = warpSize) { \ _Static_assert(sizeof(long) == sizeof(long long) || \ sizeof(long) == sizeof(int)); \ if (sizeof(long) == sizeof(long long)) { \ return static_cast( \ ::__FnName(static_cast(__val), __offset, __width)); \ } else if (sizeof(long) == sizeof(int)) { \ return static_cast( \ ::__FnName(static_cast(__val), __offset, __width)); \ } \ } \ inline __device__ unsigned long __FnName( \ unsigned long __val, __Type __offset, int __width = warpSize) { \ return static_cast( \ ::__FnName(static_cast(__val), __offset, __width)); \ } \ inline __device__ unsigned long long __FnName( \ unsigned long long __val, __Type __offset, int __width = warpSize) { \ return static_cast( \ ::__FnName(static_cast(__val), __offset, __width)); \ } \ inline __device__ double __FnName(double __val, __Type __offset, \ int __width = warpSize) { \ long long __tmp; \ _Static_assert(sizeof(__tmp) == sizeof(__val)); \ memcpy(&__tmp, &__val, sizeof(__val)); \ __tmp = ::__FnName(__tmp, __offset, __width); \ double __ret; \ memcpy(&__ret, &__tmp, sizeof(__ret)); \ return __ret; \ } __MAKE_SHUFFLES(__shfl, __nvvm_shfl_idx_i32, __nvvm_shfl_idx_f32, 0x1f, int); // We use 0 rather than 31 as our mask, because shfl.up applies to lanes >= // maxLane. __MAKE_SHUFFLES(__shfl_up, __nvvm_shfl_up_i32, __nvvm_shfl_up_f32, 0, unsigned int); __MAKE_SHUFFLES(__shfl_down, __nvvm_shfl_down_i32, __nvvm_shfl_down_f32, 0x1f, unsigned int); __MAKE_SHUFFLES(__shfl_xor, __nvvm_shfl_bfly_i32, __nvvm_shfl_bfly_f32, 0x1f, int); #pragma pop_macro("__MAKE_SHUFFLES") #endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300 #if CUDA_VERSION >= 9000 #if (!defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300) // __shfl_sync_* variants available in CUDA-9 #pragma push_macro("__MAKE_SYNC_SHUFFLES") #define __MAKE_SYNC_SHUFFLES(__FnName, __IntIntrinsic, __FloatIntrinsic, \ __Mask, __Type) \ inline __device__ int __FnName(unsigned int __mask, int __val, \ __Type __offset, int __width = warpSize) { \ return __IntIntrinsic(__mask, __val, __offset, \ ((warpSize - __width) << 8) | (__Mask)); \ } \ inline __device__ float __FnName(unsigned int __mask, float __val, \ __Type __offset, int __width = warpSize) { \ return __FloatIntrinsic(__mask, __val, __offset, \ ((warpSize - __width) << 8) | (__Mask)); \ } \ inline __device__ unsigned int __FnName(unsigned int __mask, \ unsigned int __val, __Type __offset, \ int __width = warpSize) { \ return static_cast( \ ::__FnName(__mask, static_cast(__val), __offset, __width)); \ } \ inline __device__ long long __FnName(unsigned int __mask, long long __val, \ __Type __offset, \ int __width = warpSize) { \ struct __Bits { \ int __a, __b; \ }; \ _Static_assert(sizeof(__val) == sizeof(__Bits)); \ _Static_assert(sizeof(__Bits) == 2 * sizeof(int)); \ __Bits __tmp; \ memcpy(&__tmp, &__val, sizeof(__val)); \ __tmp.__a = ::__FnName(__mask, __tmp.__a, __offset, __width); \ __tmp.__b = ::__FnName(__mask, __tmp.__b, __offset, __width); \ long long __ret; \ memcpy(&__ret, &__tmp, sizeof(__tmp)); \ return __ret; \ } \ inline __device__ unsigned long long __FnName( \ unsigned int __mask, unsigned long long __val, __Type __offset, \ int __width = warpSize) { \ return static_cast( \ ::__FnName(__mask, static_cast(__val), __offset, __width)); \ } \ inline __device__ long __FnName(unsigned int __mask, long __val, \ __Type __offset, int __width = warpSize) { \ _Static_assert(sizeof(long) == sizeof(long long) || \ sizeof(long) == sizeof(int)); \ if (sizeof(long) == sizeof(long long)) { \ return static_cast(::__FnName( \ __mask, static_cast(__val), __offset, __width)); \ } else if (sizeof(long) == sizeof(int)) { \ return static_cast( \ ::__FnName(__mask, static_cast(__val), __offset, __width)); \ } \ } \ inline __device__ unsigned long __FnName( \ unsigned int __mask, unsigned long __val, __Type __offset, \ int __width = warpSize) { \ return static_cast( \ ::__FnName(__mask, static_cast(__val), __offset, __width)); \ } \ inline __device__ double __FnName(unsigned int __mask, double __val, \ __Type __offset, int __width = warpSize) { \ long long __tmp; \ _Static_assert(sizeof(__tmp) == sizeof(__val)); \ memcpy(&__tmp, &__val, sizeof(__val)); \ __tmp = ::__FnName(__mask, __tmp, __offset, __width); \ double __ret; \ memcpy(&__ret, &__tmp, sizeof(__ret)); \ return __ret; \ } __MAKE_SYNC_SHUFFLES(__shfl_sync, __nvvm_shfl_sync_idx_i32, __nvvm_shfl_sync_idx_f32, 0x1f, int); // We use 0 rather than 31 as our mask, because shfl.up applies to lanes >= // maxLane. __MAKE_SYNC_SHUFFLES(__shfl_up_sync, __nvvm_shfl_sync_up_i32, __nvvm_shfl_sync_up_f32, 0, unsigned int); __MAKE_SYNC_SHUFFLES(__shfl_down_sync, __nvvm_shfl_sync_down_i32, __nvvm_shfl_sync_down_f32, 0x1f, unsigned int); __MAKE_SYNC_SHUFFLES(__shfl_xor_sync, __nvvm_shfl_sync_bfly_i32, __nvvm_shfl_sync_bfly_f32, 0x1f, int); #pragma pop_macro("__MAKE_SYNC_SHUFFLES") inline __device__ void __syncwarp(unsigned int mask = 0xffffffff) { return __nvvm_bar_warp_sync(mask); } inline __device__ void __barrier_sync(unsigned int id) { __nvvm_barrier_sync(id); } inline __device__ void __barrier_sync_count(unsigned int id, unsigned int count) { __nvvm_barrier_sync_cnt(id, count); } inline __device__ int __all_sync(unsigned int mask, int pred) { return __nvvm_vote_all_sync(mask, pred); } inline __device__ int __any_sync(unsigned int mask, int pred) { return __nvvm_vote_any_sync(mask, pred); } inline __device__ int __uni_sync(unsigned int mask, int pred) { return __nvvm_vote_uni_sync(mask, pred); } inline __device__ unsigned int __ballot_sync(unsigned int mask, int pred) { return __nvvm_vote_ballot_sync(mask, pred); } inline __device__ unsigned int __activemask() { #if CUDA_VERSION < 9020 return __nvvm_vote_ballot(1); #else return __nvvm_activemask(); #endif } inline __device__ unsigned int __fns(unsigned mask, unsigned base, int offset) { return __nvvm_fns(mask, base, offset); } #endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300 // Define __match* builtins CUDA-9 headers expect to see. #if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 700 inline __device__ unsigned int __match32_any_sync(unsigned int mask, unsigned int value) { return __nvvm_match_any_sync_i32(mask, value); } inline __device__ unsigned int __match64_any_sync(unsigned int mask, unsigned long long value) { return __nvvm_match_any_sync_i64(mask, value); } inline __device__ unsigned int __match32_all_sync(unsigned int mask, unsigned int value, int *pred) { return __nvvm_match_all_sync_i32p(mask, value, pred); } inline __device__ unsigned int __match64_all_sync(unsigned int mask, unsigned long long value, int *pred) { return __nvvm_match_all_sync_i64p(mask, value, pred); } #include "crt/sm_70_rt.hpp" #endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 700 #endif // __CUDA_VERSION >= 9000 // sm_32 intrinsics: __ldg and __funnelshift_{l,lc,r,rc}. // Prevent the vanilla sm_32 intrinsics header from being included. #define __SM_32_INTRINSICS_H__ #define __SM_32_INTRINSICS_HPP__ #if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 320 inline __device__ char __ldg(const char *ptr) { return __nvvm_ldg_c(ptr); } inline __device__ short __ldg(const short *ptr) { return __nvvm_ldg_s(ptr); } inline __device__ int __ldg(const int *ptr) { return __nvvm_ldg_i(ptr); } inline __device__ long __ldg(const long *ptr) { return __nvvm_ldg_l(ptr); } inline __device__ long long __ldg(const long long *ptr) { return __nvvm_ldg_ll(ptr); } inline __device__ unsigned char __ldg(const unsigned char *ptr) { return __nvvm_ldg_uc(ptr); } inline __device__ signed char __ldg(const signed char *ptr) { return __nvvm_ldg_uc((const unsigned char *)ptr); } inline __device__ unsigned short __ldg(const unsigned short *ptr) { return __nvvm_ldg_us(ptr); } inline __device__ unsigned int __ldg(const unsigned int *ptr) { return __nvvm_ldg_ui(ptr); } inline __device__ unsigned long __ldg(const unsigned long *ptr) { return __nvvm_ldg_ul(ptr); } inline __device__ unsigned long long __ldg(const unsigned long long *ptr) { return __nvvm_ldg_ull(ptr); } inline __device__ float __ldg(const float *ptr) { return __nvvm_ldg_f(ptr); } inline __device__ double __ldg(const double *ptr) { return __nvvm_ldg_d(ptr); } inline __device__ char2 __ldg(const char2 *ptr) { typedef char c2 __attribute__((ext_vector_type(2))); // We can assume that ptr is aligned at least to char2's alignment, but the // load will assume that ptr is aligned to char2's alignment. This is only // safe if alignof(c2) <= alignof(char2). c2 rv = __nvvm_ldg_c2(reinterpret_cast(ptr)); char2 ret; ret.x = rv[0]; ret.y = rv[1]; return ret; } inline __device__ char4 __ldg(const char4 *ptr) { typedef char c4 __attribute__((ext_vector_type(4))); c4 rv = __nvvm_ldg_c4(reinterpret_cast(ptr)); char4 ret; ret.x = rv[0]; ret.y = rv[1]; ret.z = rv[2]; ret.w = rv[3]; return ret; } inline __device__ short2 __ldg(const short2 *ptr) { typedef short s2 __attribute__((ext_vector_type(2))); s2 rv = __nvvm_ldg_s2(reinterpret_cast(ptr)); short2 ret; ret.x = rv[0]; ret.y = rv[1]; return ret; } inline __device__ short4 __ldg(const short4 *ptr) { typedef short s4 __attribute__((ext_vector_type(4))); s4 rv = __nvvm_ldg_s4(reinterpret_cast(ptr)); short4 ret; ret.x = rv[0]; ret.y = rv[1]; ret.z = rv[2]; ret.w = rv[3]; return ret; } inline __device__ int2 __ldg(const int2 *ptr) { typedef int i2 __attribute__((ext_vector_type(2))); i2 rv = __nvvm_ldg_i2(reinterpret_cast(ptr)); int2 ret; ret.x = rv[0]; ret.y = rv[1]; return ret; } inline __device__ int4 __ldg(const int4 *ptr) { typedef int i4 __attribute__((ext_vector_type(4))); i4 rv = __nvvm_ldg_i4(reinterpret_cast(ptr)); int4 ret; ret.x = rv[0]; ret.y = rv[1]; ret.z = rv[2]; ret.w = rv[3]; return ret; } inline __device__ longlong2 __ldg(const longlong2 *ptr) { typedef long long ll2 __attribute__((ext_vector_type(2))); ll2 rv = __nvvm_ldg_ll2(reinterpret_cast(ptr)); longlong2 ret; ret.x = rv[0]; ret.y = rv[1]; return ret; } inline __device__ uchar2 __ldg(const uchar2 *ptr) { typedef unsigned char uc2 __attribute__((ext_vector_type(2))); uc2 rv = __nvvm_ldg_uc2(reinterpret_cast(ptr)); uchar2 ret; ret.x = rv[0]; ret.y = rv[1]; return ret; } inline __device__ uchar4 __ldg(const uchar4 *ptr) { typedef unsigned char uc4 __attribute__((ext_vector_type(4))); uc4 rv = __nvvm_ldg_uc4(reinterpret_cast(ptr)); uchar4 ret; ret.x = rv[0]; ret.y = rv[1]; ret.z = rv[2]; ret.w = rv[3]; return ret; } inline __device__ ushort2 __ldg(const ushort2 *ptr) { typedef unsigned short us2 __attribute__((ext_vector_type(2))); us2 rv = __nvvm_ldg_us2(reinterpret_cast(ptr)); ushort2 ret; ret.x = rv[0]; ret.y = rv[1]; return ret; } inline __device__ ushort4 __ldg(const ushort4 *ptr) { typedef unsigned short us4 __attribute__((ext_vector_type(4))); us4 rv = __nvvm_ldg_us4(reinterpret_cast(ptr)); ushort4 ret; ret.x = rv[0]; ret.y = rv[1]; ret.z = rv[2]; ret.w = rv[3]; return ret; } inline __device__ uint2 __ldg(const uint2 *ptr) { typedef unsigned int ui2 __attribute__((ext_vector_type(2))); ui2 rv = __nvvm_ldg_ui2(reinterpret_cast(ptr)); uint2 ret; ret.x = rv[0]; ret.y = rv[1]; return ret; } inline __device__ uint4 __ldg(const uint4 *ptr) { typedef unsigned int ui4 __attribute__((ext_vector_type(4))); ui4 rv = __nvvm_ldg_ui4(reinterpret_cast(ptr)); uint4 ret; ret.x = rv[0]; ret.y = rv[1]; ret.z = rv[2]; ret.w = rv[3]; return ret; } inline __device__ ulonglong2 __ldg(const ulonglong2 *ptr) { typedef unsigned long long ull2 __attribute__((ext_vector_type(2))); ull2 rv = __nvvm_ldg_ull2(reinterpret_cast(ptr)); ulonglong2 ret; ret.x = rv[0]; ret.y = rv[1]; return ret; } inline __device__ float2 __ldg(const float2 *ptr) { typedef float f2 __attribute__((ext_vector_type(2))); f2 rv = __nvvm_ldg_f2(reinterpret_cast(ptr)); float2 ret; ret.x = rv[0]; ret.y = rv[1]; return ret; } inline __device__ float4 __ldg(const float4 *ptr) { typedef float f4 __attribute__((ext_vector_type(4))); f4 rv = __nvvm_ldg_f4(reinterpret_cast(ptr)); float4 ret; ret.x = rv[0]; ret.y = rv[1]; ret.z = rv[2]; ret.w = rv[3]; return ret; } inline __device__ double2 __ldg(const double2 *ptr) { typedef double d2 __attribute__((ext_vector_type(2))); d2 rv = __nvvm_ldg_d2(reinterpret_cast(ptr)); double2 ret; ret.x = rv[0]; ret.y = rv[1]; return ret; } // TODO: Implement these as intrinsics, so the backend can work its magic on // these. Alternatively, we could implement these as plain C and try to get // llvm to recognize the relevant patterns. inline __device__ unsigned __funnelshift_l(unsigned low32, unsigned high32, unsigned shiftWidth) { unsigned result; asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result) : "r"(low32), "r"(high32), "r"(shiftWidth)); return result; } inline __device__ unsigned __funnelshift_lc(unsigned low32, unsigned high32, unsigned shiftWidth) { unsigned result; asm("shf.l.clamp.b32 %0, %1, %2, %3;" : "=r"(result) : "r"(low32), "r"(high32), "r"(shiftWidth)); return result; } inline __device__ unsigned __funnelshift_r(unsigned low32, unsigned high32, unsigned shiftWidth) { unsigned result; asm("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(result) : "r"(low32), "r"(high32), "r"(shiftWidth)); return result; } inline __device__ unsigned __funnelshift_rc(unsigned low32, unsigned high32, unsigned shiftWidth) { unsigned ret; asm("shf.r.clamp.b32 %0, %1, %2, %3;" : "=r"(ret) : "r"(low32), "r"(high32), "r"(shiftWidth)); return ret; } #endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 320 #if CUDA_VERSION >= 11000 extern "C" { __device__ inline size_t __nv_cvta_generic_to_global_impl(const void *__ptr) { return (size_t)(void __attribute__((address_space(1))) *)__ptr; } __device__ inline size_t __nv_cvta_generic_to_shared_impl(const void *__ptr) { return (size_t)(void __attribute__((address_space(3))) *)__ptr; } __device__ inline size_t __nv_cvta_generic_to_constant_impl(const void *__ptr) { return (size_t)(void __attribute__((address_space(4))) *)__ptr; } __device__ inline size_t __nv_cvta_generic_to_local_impl(const void *__ptr) { return (size_t)(void __attribute__((address_space(5))) *)__ptr; } __device__ inline void *__nv_cvta_global_to_generic_impl(size_t __ptr) { return (void *)(void __attribute__((address_space(1))) *)__ptr; } __device__ inline void *__nv_cvta_shared_to_generic_impl(size_t __ptr) { return (void *)(void __attribute__((address_space(3))) *)__ptr; } __device__ inline void *__nv_cvta_constant_to_generic_impl(size_t __ptr) { return (void *)(void __attribute__((address_space(4))) *)__ptr; } __device__ inline void *__nv_cvta_local_to_generic_impl(size_t __ptr) { return (void *)(void __attribute__((address_space(5))) *)__ptr; } __device__ inline cuuint32_t __nvvm_get_smem_pointer(void *__ptr) { return __nv_cvta_generic_to_shared_impl(__ptr); } } // extern "C" #if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 800 __device__ inline unsigned __reduce_add_sync(unsigned __mask, unsigned __value) { return __nvvm_redux_sync_add(__mask, __value); } __device__ inline unsigned __reduce_min_sync(unsigned __mask, unsigned __value) { return __nvvm_redux_sync_umin(__mask, __value); } __device__ inline unsigned __reduce_max_sync(unsigned __mask, unsigned __value) { return __nvvm_redux_sync_umax(__mask, __value); } __device__ inline int __reduce_min_sync(unsigned __mask, int __value) { return __nvvm_redux_sync_min(__mask, __value); } __device__ inline int __reduce_max_sync(unsigned __mask, int __value) { return __nvvm_redux_sync_max(__mask, __value); } __device__ inline unsigned __reduce_or_sync(unsigned __mask, unsigned __value) { return __nvvm_redux_sync_or(__mask, __value); } __device__ inline unsigned __reduce_and_sync(unsigned __mask, unsigned __value) { return __nvvm_redux_sync_and(__mask, __value); } __device__ inline unsigned __reduce_xor_sync(unsigned __mask, unsigned __value) { return __nvvm_redux_sync_xor(__mask, __value); } __device__ inline void __nv_memcpy_async_shared_global_4(void *__dst, const void *__src, unsigned __src_size) { __nvvm_cp_async_ca_shared_global_4( (void __attribute__((address_space(3))) *)__dst, (const void __attribute__((address_space(1))) *)__src, __src_size); } __device__ inline void __nv_memcpy_async_shared_global_8(void *__dst, const void *__src, unsigned __src_size) { __nvvm_cp_async_ca_shared_global_8( (void __attribute__((address_space(3))) *)__dst, (const void __attribute__((address_space(1))) *)__src, __src_size); } __device__ inline void __nv_memcpy_async_shared_global_16(void *__dst, const void *__src, unsigned __src_size) { __nvvm_cp_async_ca_shared_global_16( (void __attribute__((address_space(3))) *)__dst, (const void __attribute__((address_space(1))) *)__src, __src_size); } __device__ inline void * __nv_associate_access_property(const void *__ptr, unsigned long long __prop) { // TODO: it appears to provide compiler with some sort of a hint. We do not // know what exactly it is supposed to do. However, CUDA headers suggest that // just passing through __ptr should not affect correctness. They do so on // pre-sm80 GPUs where this builtin is not available. return (void*)__ptr; } #endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 800 #if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 900 __device__ inline unsigned __isCtaShared(const void *ptr) { return __isShared(ptr); } __device__ inline unsigned __isClusterShared(const void *__ptr) { return __nvvm_isspacep_shared_cluster(__ptr); } __device__ inline void *__cluster_map_shared_rank(const void *__ptr, unsigned __rank) { return __nvvm_mapa((void *)__ptr, __rank); } __device__ inline unsigned __cluster_query_shared_rank(const void *__ptr) { return __nvvm_getctarank((void *)__ptr); } __device__ inline uint2 __cluster_map_shared_multicast(const void *__ptr, unsigned int __cluster_cta_mask) { return make_uint2((unsigned)__cvta_generic_to_shared(__ptr), __cluster_cta_mask); } __device__ inline unsigned __clusterDimIsSpecified() { return __nvvm_is_explicit_cluster(); } __device__ inline dim3 __clusterDim() { return dim3(__nvvm_read_ptx_sreg_cluster_nctaid_x(), __nvvm_read_ptx_sreg_cluster_nctaid_y(), __nvvm_read_ptx_sreg_cluster_nctaid_z()); } __device__ inline dim3 __clusterRelativeBlockIdx() { return dim3(__nvvm_read_ptx_sreg_cluster_ctaid_x(), __nvvm_read_ptx_sreg_cluster_ctaid_y(), __nvvm_read_ptx_sreg_cluster_ctaid_z()); } __device__ inline dim3 __clusterGridDimInClusters() { return dim3(__nvvm_read_ptx_sreg_nclusterid_x(), __nvvm_read_ptx_sreg_nclusterid_y(), __nvvm_read_ptx_sreg_nclusterid_z()); } __device__ inline dim3 __clusterIdx() { return dim3(__nvvm_read_ptx_sreg_clusterid_x(), __nvvm_read_ptx_sreg_clusterid_y(), __nvvm_read_ptx_sreg_clusterid_z()); } __device__ inline unsigned __clusterRelativeBlockRank() { return __nvvm_read_ptx_sreg_cluster_ctarank(); } __device__ inline unsigned __clusterSizeInBlocks() { return __nvvm_read_ptx_sreg_cluster_nctarank(); } __device__ inline void __cluster_barrier_arrive() { __nvvm_barrier_cluster_arrive(); } __device__ inline void __cluster_barrier_arrive_relaxed() { __nvvm_barrier_cluster_arrive_relaxed(); } __device__ inline void __cluster_barrier_wait() { __nvvm_barrier_cluster_wait(); } __device__ inline void __threadfence_cluster() { __nvvm_fence_sc_cluster(); } __device__ inline float2 atomicAdd(float2 *__ptr, float2 __val) { float2 __ret; __asm__("atom.add.v2.f32 {%0, %1}, [%2], {%3, %4};" : "=f"(__ret.x), "=f"(__ret.y) : "l"(__ptr), "f"(__val.x), "f"(__val.y)); return __ret; } __device__ inline float2 atomicAdd_block(float2 *__ptr, float2 __val) { float2 __ret; __asm__("atom.cta.add.v2.f32 {%0, %1}, [%2], {%3, %4};" : "=f"(__ret.x), "=f"(__ret.y) : "l"(__ptr), "f"(__val.x), "f"(__val.y)); return __ret; } __device__ inline float2 atomicAdd_system(float2 *__ptr, float2 __val) { float2 __ret; __asm__("atom.sys.add.v2.f32 {%0, %1}, [%2], {%3, %4};" : "=f"(__ret.x), "=f"(__ret.y) : "l"(__ptr), "f"(__val.x), "f"(__val.y)); return __ret; } __device__ inline float4 atomicAdd(float4 *__ptr, float4 __val) { float4 __ret; __asm__("atom.add.v4.f32 {%0, %1, %2, %3}, [%4], {%5, %6, %7, %8};" : "=f"(__ret.x), "=f"(__ret.y), "=f"(__ret.z), "=f"(__ret.w) : "l"(__ptr), "f"(__val.x), "f"(__val.y), "f"(__val.z), "f"(__val.w)); return __ret; } __device__ inline float4 atomicAdd_block(float4 *__ptr, float4 __val) { float4 __ret; __asm__( "atom.cta.add.v4.f32 {%0, %1, %2, %3}, [%4], {%5, %6, %7, %8};" : "=f"(__ret.x), "=f"(__ret.y), "=f"(__ret.z), "=f"(__ret.w) : "l"(__ptr), "f"(__val.x), "f"(__val.y), "f"(__val.z), "f"(__val.w)); return __ret; } __device__ inline float4 atomicAdd_system(float4 *__ptr, float4 __val) { float4 __ret; __asm__( "atom.sys.add.v4.f32 {%0, %1, %2, %3}, [%4], {%5, %6, %7, %8};" : "=f"(__ret.x), "=f"(__ret.y), "=f"(__ret.z), "=f"(__ret.w) : "l"(__ptr), "f"(__val.x), "f"(__val.y), "f"(__val.z), "f"(__val.w) :); return __ret; } #endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 900 #endif // CUDA_VERSION >= 11000 #endif // defined(__CLANG_CUDA_INTRINSICS_H__) /*===---- __stdarg___gnuc_va_list.h - Definition of __gnuc_va_list ---------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __GNUC_VA_LIST #define __GNUC_VA_LIST typedef __builtin_va_list __gnuc_va_list; #endif __stddef_null.havx512vlcdintrin.havxifmaintrin.h/*===---- emmintrin.h - SSE2 intrinsics ------------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __EMMINTRIN_H #define __EMMINTRIN_H #if !defined(__i386__) && !defined(__x86_64__) #error "This header is only meant to be used on x86 and x64 architecture" #endif #include typedef double __m128d __attribute__((__vector_size__(16), __aligned__(16))); typedef long long __m128i __attribute__((__vector_size__(16), __aligned__(16))); typedef double __m128d_u __attribute__((__vector_size__(16), __aligned__(1))); typedef long long __m128i_u __attribute__((__vector_size__(16), __aligned__(1))); /* Type defines. */ typedef double __v2df __attribute__((__vector_size__(16))); typedef long long __v2di __attribute__((__vector_size__(16))); typedef short __v8hi __attribute__((__vector_size__(16))); typedef char __v16qi __attribute__((__vector_size__(16))); /* Unsigned types */ typedef unsigned long long __v2du __attribute__((__vector_size__(16))); typedef unsigned short __v8hu __attribute__((__vector_size__(16))); typedef unsigned char __v16qu __attribute__((__vector_size__(16))); /* We need an explicitly signed variant for char. Note that this shouldn't * appear in the interface though. */ typedef signed char __v16qs __attribute__((__vector_size__(16))); #ifdef __SSE2__ /* Both _Float16 and __bf16 require SSE2 being enabled. */ typedef _Float16 __v8hf __attribute__((__vector_size__(16), __aligned__(16))); typedef _Float16 __m128h __attribute__((__vector_size__(16), __aligned__(16))); typedef _Float16 __m128h_u __attribute__((__vector_size__(16), __aligned__(1))); typedef __bf16 __v8bf __attribute__((__vector_size__(16), __aligned__(16))); typedef __bf16 __m128bh __attribute__((__vector_size__(16), __aligned__(16))); #endif /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, \ __target__("sse2,no-evex512"), __min_vector_width__(128))) #define __DEFAULT_FN_ATTRS_MMX \ __attribute__((__always_inline__, __nodebug__, \ __target__("mmx,sse2,no-evex512"), __min_vector_width__(64))) /// Adds lower double-precision values in both operands and returns the /// sum in the lower 64 bits of the result. The upper 64 bits of the result /// are copied from the upper double-precision value of the first operand. /// /// \headerfile /// /// This intrinsic corresponds to the VADDSD / ADDSD instruction. /// /// \param __a /// A 128-bit vector of [2 x double] containing one of the source operands. /// \param __b /// A 128-bit vector of [2 x double] containing one of the source operands. /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the /// sum of the lower 64 bits of both operands. The upper 64 bits are copied /// from the upper 64 bits of the first source operand. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_add_sd(__m128d __a, __m128d __b) { __a[0] += __b[0]; return __a; } /// Adds two 128-bit vectors of [2 x double]. /// /// \headerfile /// /// This intrinsic corresponds to the VADDPD / ADDPD instruction. /// /// \param __a /// A 128-bit vector of [2 x double] containing one of the source operands. /// \param __b /// A 128-bit vector of [2 x double] containing one of the source operands. /// \returns A 128-bit vector of [2 x double] containing the sums of both /// operands. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_add_pd(__m128d __a, __m128d __b) { return (__m128d)((__v2df)__a + (__v2df)__b); } /// Subtracts the lower double-precision value of the second operand /// from the lower double-precision value of the first operand and returns /// the difference in the lower 64 bits of the result. The upper 64 bits of /// the result are copied from the upper double-precision value of the first /// operand. /// /// \headerfile /// /// This intrinsic corresponds to the VSUBSD / SUBSD instruction. /// /// \param __a /// A 128-bit vector of [2 x double] containing the minuend. /// \param __b /// A 128-bit vector of [2 x double] containing the subtrahend. /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the /// difference of the lower 64 bits of both operands. The upper 64 bits are /// copied from the upper 64 bits of the first source operand. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sub_sd(__m128d __a, __m128d __b) { __a[0] -= __b[0]; return __a; } /// Subtracts two 128-bit vectors of [2 x double]. /// /// \headerfile /// /// This intrinsic corresponds to the VSUBPD / SUBPD instruction. /// /// \param __a /// A 128-bit vector of [2 x double] containing the minuend. /// \param __b /// A 128-bit vector of [2 x double] containing the subtrahend. /// \returns A 128-bit vector of [2 x double] containing the differences between /// both operands. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sub_pd(__m128d __a, __m128d __b) { return (__m128d)((__v2df)__a - (__v2df)__b); } /// Multiplies lower double-precision values in both operands and returns /// the product in the lower 64 bits of the result. The upper 64 bits of the /// result are copied from the upper double-precision value of the first /// operand. /// /// \headerfile /// /// This intrinsic corresponds to the VMULSD / MULSD instruction. /// /// \param __a /// A 128-bit vector of [2 x double] containing one of the source operands. /// \param __b /// A 128-bit vector of [2 x double] containing one of the source operands. /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the /// product of the lower 64 bits of both operands. The upper 64 bits are /// copied from the upper 64 bits of the first source operand. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mul_sd(__m128d __a, __m128d __b) { __a[0] *= __b[0]; return __a; } /// Multiplies two 128-bit vectors of [2 x double]. /// /// \headerfile /// /// This intrinsic corresponds to the VMULPD / MULPD instruction. /// /// \param __a /// A 128-bit vector of [2 x double] containing one of the operands. /// \param __b /// A 128-bit vector of [2 x double] containing one of the operands. /// \returns A 128-bit vector of [2 x double] containing the products of both /// operands. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mul_pd(__m128d __a, __m128d __b) { return (__m128d)((__v2df)__a * (__v2df)__b); } /// Divides the lower double-precision value of the first operand by the /// lower double-precision value of the second operand and returns the /// quotient in the lower 64 bits of the result. The upper 64 bits of the /// result are copied from the upper double-precision value of the first /// operand. /// /// \headerfile /// /// This intrinsic corresponds to the VDIVSD / DIVSD instruction. /// /// \param __a /// A 128-bit vector of [2 x double] containing the dividend. /// \param __b /// A 128-bit vector of [2 x double] containing divisor. /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the /// quotient of the lower 64 bits of both operands. The upper 64 bits are /// copied from the upper 64 bits of the first source operand. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_div_sd(__m128d __a, __m128d __b) { __a[0] /= __b[0]; return __a; } /// Performs an element-by-element division of two 128-bit vectors of /// [2 x double]. /// /// \headerfile /// /// This intrinsic corresponds to the VDIVPD / DIVPD instruction. /// /// \param __a /// A 128-bit vector of [2 x double] containing the dividend. /// \param __b /// A 128-bit vector of [2 x double] containing the divisor. /// \returns A 128-bit vector of [2 x double] containing the quotients of both /// operands. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_div_pd(__m128d __a, __m128d __b) { return (__m128d)((__v2df)__a / (__v2df)__b); } /// Calculates the square root of the lower double-precision value of /// the second operand and returns it in the lower 64 bits of the result. /// The upper 64 bits of the result are copied from the upper /// double-precision value of the first operand. /// /// \headerfile /// /// This intrinsic corresponds to the VSQRTSD / SQRTSD instruction. /// /// \param __a /// A 128-bit vector of [2 x double] containing one of the operands. The /// upper 64 bits of this operand are copied to the upper 64 bits of the /// result. /// \param __b /// A 128-bit vector of [2 x double] containing one of the operands. The /// square root is calculated using the lower 64 bits of this operand. /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the /// square root of the lower 64 bits of operand \a __b, and whose upper 64 /// bits are copied from the upper 64 bits of operand \a __a. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sqrt_sd(__m128d __a, __m128d __b) { __m128d __c = __builtin_ia32_sqrtsd((__v2df)__b); return __extension__(__m128d){__c[0], __a[1]}; } /// Calculates the square root of the each of two values stored in a /// 128-bit vector of [2 x double]. /// /// \headerfile /// /// This intrinsic corresponds to the VSQRTPD / SQRTPD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. /// \returns A 128-bit vector of [2 x double] containing the square roots of the /// values in the operand. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sqrt_pd(__m128d __a) { return __builtin_ia32_sqrtpd((__v2df)__a); } /// Compares lower 64-bit double-precision values of both operands, and /// returns the lesser of the pair of values in the lower 64-bits of the /// result. The upper 64 bits of the result are copied from the upper /// double-precision value of the first operand. /// /// \headerfile /// /// This intrinsic corresponds to the VMINSD / MINSD instruction. /// /// \param __a /// A 128-bit vector of [2 x double] containing one of the operands. The /// lower 64 bits of this operand are used in the comparison. /// \param __b /// A 128-bit vector of [2 x double] containing one of the operands. The /// lower 64 bits of this operand are used in the comparison. /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the /// minimum value between both operands. The upper 64 bits are copied from /// the upper 64 bits of the first source operand. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_min_sd(__m128d __a, __m128d __b) { return __builtin_ia32_minsd((__v2df)__a, (__v2df)__b); } /// Performs element-by-element comparison of the two 128-bit vectors of /// [2 x double] and returns the vector containing the lesser of each pair of /// values. /// /// \headerfile /// /// This intrinsic corresponds to the VMINPD / MINPD instruction. /// /// \param __a /// A 128-bit vector of [2 x double] containing one of the operands. /// \param __b /// A 128-bit vector of [2 x double] containing one of the operands. /// \returns A 128-bit vector of [2 x double] containing the minimum values /// between both operands. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_min_pd(__m128d __a, __m128d __b) { return __builtin_ia32_minpd((__v2df)__a, (__v2df)__b); } /// Compares lower 64-bit double-precision values of both operands, and /// returns the greater of the pair of values in the lower 64-bits of the /// result. The upper 64 bits of the result are copied from the upper /// double-precision value of the first operand. /// /// \headerfile /// /// This intrinsic corresponds to the VMAXSD / MAXSD instruction. /// /// \param __a /// A 128-bit vector of [2 x double] containing one of the operands. The /// lower 64 bits of this operand are used in the comparison. /// \param __b /// A 128-bit vector of [2 x double] containing one of the operands. The /// lower 64 bits of this operand are used in the comparison. /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the /// maximum value between both operands. The upper 64 bits are copied from /// the upper 64 bits of the first source operand. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_max_sd(__m128d __a, __m128d __b) { return __builtin_ia32_maxsd((__v2df)__a, (__v2df)__b); } /// Performs element-by-element comparison of the two 128-bit vectors of /// [2 x double] and returns the vector containing the greater of each pair /// of values. /// /// \headerfile /// /// This intrinsic corresponds to the VMAXPD / MAXPD instruction. /// /// \param __a /// A 128-bit vector of [2 x double] containing one of the operands. /// \param __b /// A 128-bit vector of [2 x double] containing one of the operands. /// \returns A 128-bit vector of [2 x double] containing the maximum values /// between both operands. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_max_pd(__m128d __a, __m128d __b) { return __builtin_ia32_maxpd((__v2df)__a, (__v2df)__b); } /// Performs a bitwise AND of two 128-bit vectors of [2 x double]. /// /// \headerfile /// /// This intrinsic corresponds to the VPAND / PAND instruction. /// /// \param __a /// A 128-bit vector of [2 x double] containing one of the source operands. /// \param __b /// A 128-bit vector of [2 x double] containing one of the source operands. /// \returns A 128-bit vector of [2 x double] containing the bitwise AND of the /// values between both operands. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_and_pd(__m128d __a, __m128d __b) { return (__m128d)((__v2du)__a & (__v2du)__b); } /// Performs a bitwise AND of two 128-bit vectors of [2 x double], using /// the one's complement of the values contained in the first source operand. /// /// \headerfile /// /// This intrinsic corresponds to the VPANDN / PANDN instruction. /// /// \param __a /// A 128-bit vector of [2 x double] containing the left source operand. The /// one's complement of this value is used in the bitwise AND. /// \param __b /// A 128-bit vector of [2 x double] containing the right source operand. /// \returns A 128-bit vector of [2 x double] containing the bitwise AND of the /// values in the second operand and the one's complement of the first /// operand. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_andnot_pd(__m128d __a, __m128d __b) { return (__m128d)(~(__v2du)__a & (__v2du)__b); } /// Performs a bitwise OR of two 128-bit vectors of [2 x double]. /// /// \headerfile /// /// This intrinsic corresponds to the VPOR / POR instruction. /// /// \param __a /// A 128-bit vector of [2 x double] containing one of the source operands. /// \param __b /// A 128-bit vector of [2 x double] containing one of the source operands. /// \returns A 128-bit vector of [2 x double] containing the bitwise OR of the /// values between both operands. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_or_pd(__m128d __a, __m128d __b) { return (__m128d)((__v2du)__a | (__v2du)__b); } /// Performs a bitwise XOR of two 128-bit vectors of [2 x double]. /// /// \headerfile /// /// This intrinsic corresponds to the VPXOR / PXOR instruction. /// /// \param __a /// A 128-bit vector of [2 x double] containing one of the source operands. /// \param __b /// A 128-bit vector of [2 x double] containing one of the source operands. /// \returns A 128-bit vector of [2 x double] containing the bitwise XOR of the /// values between both operands. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_xor_pd(__m128d __a, __m128d __b) { return (__m128d)((__v2du)__a ^ (__v2du)__b); } /// Compares each of the corresponding double-precision values of the /// 128-bit vectors of [2 x double] for equality. /// /// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPEQPD / CMPEQPD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. /// \param __b /// A 128-bit vector of [2 x double]. /// \returns A 128-bit vector containing the comparison results. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpeq_pd(__m128d __a, __m128d __b) { return (__m128d)__builtin_ia32_cmpeqpd((__v2df)__a, (__v2df)__b); } /// Compares each of the corresponding double-precision values of the /// 128-bit vectors of [2 x double] to determine if the values in the first /// operand are less than those in the second operand. /// /// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPLTPD / CMPLTPD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. /// \param __b /// A 128-bit vector of [2 x double]. /// \returns A 128-bit vector containing the comparison results. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmplt_pd(__m128d __a, __m128d __b) { return (__m128d)__builtin_ia32_cmpltpd((__v2df)__a, (__v2df)__b); } /// Compares each of the corresponding double-precision values of the /// 128-bit vectors of [2 x double] to determine if the values in the first /// operand are less than or equal to those in the second operand. /// /// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPLEPD / CMPLEPD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. /// \param __b /// A 128-bit vector of [2 x double]. /// \returns A 128-bit vector containing the comparison results. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmple_pd(__m128d __a, __m128d __b) { return (__m128d)__builtin_ia32_cmplepd((__v2df)__a, (__v2df)__b); } /// Compares each of the corresponding double-precision values of the /// 128-bit vectors of [2 x double] to determine if the values in the first /// operand are greater than those in the second operand. /// /// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPLTPD / CMPLTPD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. /// \param __b /// A 128-bit vector of [2 x double]. /// \returns A 128-bit vector containing the comparison results. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpgt_pd(__m128d __a, __m128d __b) { return (__m128d)__builtin_ia32_cmpltpd((__v2df)__b, (__v2df)__a); } /// Compares each of the corresponding double-precision values of the /// 128-bit vectors of [2 x double] to determine if the values in the first /// operand are greater than or equal to those in the second operand. /// /// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPLEPD / CMPLEPD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. /// \param __b /// A 128-bit vector of [2 x double]. /// \returns A 128-bit vector containing the comparison results. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpge_pd(__m128d __a, __m128d __b) { return (__m128d)__builtin_ia32_cmplepd((__v2df)__b, (__v2df)__a); } /// Compares each of the corresponding double-precision values of the /// 128-bit vectors of [2 x double] to determine if the values in the first /// operand are ordered with respect to those in the second operand. /// /// A pair of double-precision values are "ordered" with respect to each /// other if neither value is a NaN. Each comparison yields 0x0 for false, /// 0xFFFFFFFFFFFFFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPORDPD / CMPORDPD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. /// \param __b /// A 128-bit vector of [2 x double]. /// \returns A 128-bit vector containing the comparison results. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpord_pd(__m128d __a, __m128d __b) { return (__m128d)__builtin_ia32_cmpordpd((__v2df)__a, (__v2df)__b); } /// Compares each of the corresponding double-precision values of the /// 128-bit vectors of [2 x double] to determine if the values in the first /// operand are unordered with respect to those in the second operand. /// /// A pair of double-precision values are "unordered" with respect to each /// other if one or both values are NaN. Each comparison yields 0x0 for /// false, 0xFFFFFFFFFFFFFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPUNORDPD / CMPUNORDPD /// instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. /// \param __b /// A 128-bit vector of [2 x double]. /// \returns A 128-bit vector containing the comparison results. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpunord_pd(__m128d __a, __m128d __b) { return (__m128d)__builtin_ia32_cmpunordpd((__v2df)__a, (__v2df)__b); } /// Compares each of the corresponding double-precision values of the /// 128-bit vectors of [2 x double] to determine if the values in the first /// operand are unequal to those in the second operand. /// /// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPNEQPD / CMPNEQPD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. /// \param __b /// A 128-bit vector of [2 x double]. /// \returns A 128-bit vector containing the comparison results. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpneq_pd(__m128d __a, __m128d __b) { return (__m128d)__builtin_ia32_cmpneqpd((__v2df)__a, (__v2df)__b); } /// Compares each of the corresponding double-precision values of the /// 128-bit vectors of [2 x double] to determine if the values in the first /// operand are not less than those in the second operand. /// /// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPNLTPD / CMPNLTPD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. /// \param __b /// A 128-bit vector of [2 x double]. /// \returns A 128-bit vector containing the comparison results. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnlt_pd(__m128d __a, __m128d __b) { return (__m128d)__builtin_ia32_cmpnltpd((__v2df)__a, (__v2df)__b); } /// Compares each of the corresponding double-precision values of the /// 128-bit vectors of [2 x double] to determine if the values in the first /// operand are not less than or equal to those in the second operand. /// /// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPNLEPD / CMPNLEPD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. /// \param __b /// A 128-bit vector of [2 x double]. /// \returns A 128-bit vector containing the comparison results. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnle_pd(__m128d __a, __m128d __b) { return (__m128d)__builtin_ia32_cmpnlepd((__v2df)__a, (__v2df)__b); } /// Compares each of the corresponding double-precision values of the /// 128-bit vectors of [2 x double] to determine if the values in the first /// operand are not greater than those in the second operand. /// /// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPNLTPD / CMPNLTPD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. /// \param __b /// A 128-bit vector of [2 x double]. /// \returns A 128-bit vector containing the comparison results. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpngt_pd(__m128d __a, __m128d __b) { return (__m128d)__builtin_ia32_cmpnltpd((__v2df)__b, (__v2df)__a); } /// Compares each of the corresponding double-precision values of the /// 128-bit vectors of [2 x double] to determine if the values in the first /// operand are not greater than or equal to those in the second operand. /// /// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPNLEPD / CMPNLEPD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. /// \param __b /// A 128-bit vector of [2 x double]. /// \returns A 128-bit vector containing the comparison results. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnge_pd(__m128d __a, __m128d __b) { return (__m128d)__builtin_ia32_cmpnlepd((__v2df)__b, (__v2df)__a); } /// Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] for equality. /// /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPEQSD / CMPEQSD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __b. /// \param __b /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __a. /// \returns A 128-bit vector. The lower 64 bits contains the comparison /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpeq_sd(__m128d __a, __m128d __b) { return (__m128d)__builtin_ia32_cmpeqsd((__v2df)__a, (__v2df)__b); } /// Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is less than the corresponding value in /// the second parameter. /// /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPLTSD / CMPLTSD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __b. /// \param __b /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __a. /// \returns A 128-bit vector. The lower 64 bits contains the comparison /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmplt_sd(__m128d __a, __m128d __b) { return (__m128d)__builtin_ia32_cmpltsd((__v2df)__a, (__v2df)__b); } /// Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is less than or equal to the /// corresponding value in the second parameter. /// /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPLESD / CMPLESD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __b. /// \param __b /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __a. /// \returns A 128-bit vector. The lower 64 bits contains the comparison /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmple_sd(__m128d __a, __m128d __b) { return (__m128d)__builtin_ia32_cmplesd((__v2df)__a, (__v2df)__b); } /// Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is greater than the corresponding value /// in the second parameter. /// /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPLTSD / CMPLTSD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __b. /// \param __b /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __a. /// \returns A 128-bit vector. The lower 64 bits contains the comparison /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpgt_sd(__m128d __a, __m128d __b) { __m128d __c = __builtin_ia32_cmpltsd((__v2df)__b, (__v2df)__a); return __extension__(__m128d){__c[0], __a[1]}; } /// Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is greater than or equal to the /// corresponding value in the second parameter. /// /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPLESD / CMPLESD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __b. /// \param __b /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __a. /// \returns A 128-bit vector. The lower 64 bits contains the comparison /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpge_sd(__m128d __a, __m128d __b) { __m128d __c = __builtin_ia32_cmplesd((__v2df)__b, (__v2df)__a); return __extension__(__m128d){__c[0], __a[1]}; } /// Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is "ordered" with respect to the /// corresponding value in the second parameter. /// /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. A pair /// of double-precision values are "ordered" with respect to each other if /// neither value is a NaN. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPORDSD / CMPORDSD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __b. /// \param __b /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __a. /// \returns A 128-bit vector. The lower 64 bits contains the comparison /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpord_sd(__m128d __a, __m128d __b) { return (__m128d)__builtin_ia32_cmpordsd((__v2df)__a, (__v2df)__b); } /// Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is "unordered" with respect to the /// corresponding value in the second parameter. /// /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. A pair /// of double-precision values are "unordered" with respect to each other if /// one or both values are NaN. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPUNORDSD / CMPUNORDSD /// instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __b. /// \param __b /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __a. /// \returns A 128-bit vector. The lower 64 bits contains the comparison /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpunord_sd(__m128d __a, __m128d __b) { return (__m128d)__builtin_ia32_cmpunordsd((__v2df)__a, (__v2df)__b); } /// Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is unequal to the corresponding value in /// the second parameter. /// /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPNEQSD / CMPNEQSD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __b. /// \param __b /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __a. /// \returns A 128-bit vector. The lower 64 bits contains the comparison /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpneq_sd(__m128d __a, __m128d __b) { return (__m128d)__builtin_ia32_cmpneqsd((__v2df)__a, (__v2df)__b); } /// Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is not less than the corresponding /// value in the second parameter. /// /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPNLTSD / CMPNLTSD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __b. /// \param __b /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __a. /// \returns A 128-bit vector. The lower 64 bits contains the comparison /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnlt_sd(__m128d __a, __m128d __b) { return (__m128d)__builtin_ia32_cmpnltsd((__v2df)__a, (__v2df)__b); } /// Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is not less than or equal to the /// corresponding value in the second parameter. /// /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPNLESD / CMPNLESD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __b. /// \param __b /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __a. /// \returns A 128-bit vector. The lower 64 bits contains the comparison /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnle_sd(__m128d __a, __m128d __b) { return (__m128d)__builtin_ia32_cmpnlesd((__v2df)__a, (__v2df)__b); } /// Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is not greater than the corresponding /// value in the second parameter. /// /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPNLTSD / CMPNLTSD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __b. /// \param __b /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __a. /// \returns A 128-bit vector. The lower 64 bits contains the comparison /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpngt_sd(__m128d __a, __m128d __b) { __m128d __c = __builtin_ia32_cmpnltsd((__v2df)__b, (__v2df)__a); return __extension__(__m128d){__c[0], __a[1]}; } /// Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is not greater than or equal to the /// corresponding value in the second parameter. /// /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPNLESD / CMPNLESD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __b. /// \param __b /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __a. /// \returns A 128-bit vector. The lower 64 bits contains the comparison /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnge_sd(__m128d __a, __m128d __b) { __m128d __c = __builtin_ia32_cmpnlesd((__v2df)__b, (__v2df)__a); return __extension__(__m128d){__c[0], __a[1]}; } /// Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] for equality. /// /// The comparison returns 0 for false, 1 for true. If either of the two /// lower double-precision values is NaN, returns 0. /// /// \headerfile /// /// This intrinsic corresponds to the VCOMISD / COMISD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __b. /// \param __b /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __a. /// \returns An integer containing the comparison results. static __inline__ int __DEFAULT_FN_ATTRS _mm_comieq_sd(__m128d __a, __m128d __b) { return __builtin_ia32_comisdeq((__v2df)__a, (__v2df)__b); } /// Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is less than the corresponding value in /// the second parameter. /// /// The comparison returns 0 for false, 1 for true. If either of the two /// lower double-precision values is NaN, returns 0. /// /// \headerfile /// /// This intrinsic corresponds to the VCOMISD / COMISD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __b. /// \param __b /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __a. /// \returns An integer containing the comparison results. static __inline__ int __DEFAULT_FN_ATTRS _mm_comilt_sd(__m128d __a, __m128d __b) { return __builtin_ia32_comisdlt((__v2df)__a, (__v2df)__b); } /// Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is less than or equal to the /// corresponding value in the second parameter. /// /// The comparison returns 0 for false, 1 for true. If either of the two /// lower double-precision values is NaN, returns 0. /// /// \headerfile /// /// This intrinsic corresponds to the VCOMISD / COMISD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __b. /// \param __b /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __a. /// \returns An integer containing the comparison results. static __inline__ int __DEFAULT_FN_ATTRS _mm_comile_sd(__m128d __a, __m128d __b) { return __builtin_ia32_comisdle((__v2df)__a, (__v2df)__b); } /// Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is greater than the corresponding value /// in the second parameter. /// /// The comparison returns 0 for false, 1 for true. If either of the two /// lower double-precision values is NaN, returns 0. /// /// \headerfile /// /// This intrinsic corresponds to the VCOMISD / COMISD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __b. /// \param __b /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __a. /// \returns An integer containing the comparison results. static __inline__ int __DEFAULT_FN_ATTRS _mm_comigt_sd(__m128d __a, __m128d __b) { return __builtin_ia32_comisdgt((__v2df)__a, (__v2df)__b); } /// Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is greater than or equal to the /// corresponding value in the second parameter. /// /// The comparison returns 0 for false, 1 for true. If either of the two /// lower double-precision values is NaN, returns 0. /// /// \headerfile /// /// This intrinsic corresponds to the VCOMISD / COMISD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __b. /// \param __b /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __a. /// \returns An integer containing the comparison results. static __inline__ int __DEFAULT_FN_ATTRS _mm_comige_sd(__m128d __a, __m128d __b) { return __builtin_ia32_comisdge((__v2df)__a, (__v2df)__b); } /// Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is unequal to the corresponding value in /// the second parameter. /// /// The comparison returns 0 for false, 1 for true. If either of the two /// lower double-precision values is NaN, 1 is returned. /// /// \headerfile /// /// This intrinsic corresponds to the VCOMISD / COMISD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __b. /// \param __b /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __a. /// \returns An integer containing the comparison results. static __inline__ int __DEFAULT_FN_ATTRS _mm_comineq_sd(__m128d __a, __m128d __b) { return __builtin_ia32_comisdneq((__v2df)__a, (__v2df)__b); } /// Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] for equality. /// /// The comparison returns 0 for false, 1 for true. If either of the two /// lower double-precision values is NaN, returns 0. /// /// \headerfile /// /// This intrinsic corresponds to the VUCOMISD / UCOMISD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __b. /// \param __b /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __a. /// \returns An integer containing the comparison results. static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomieq_sd(__m128d __a, __m128d __b) { return __builtin_ia32_ucomisdeq((__v2df)__a, (__v2df)__b); } /// Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is less than the corresponding value in /// the second parameter. /// /// The comparison returns 0 for false, 1 for true. If either of the two /// lower double-precision values is NaN, returns 0. /// /// \headerfile /// /// This intrinsic corresponds to the VUCOMISD / UCOMISD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __b. /// \param __b /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __a. /// \returns An integer containing the comparison results. static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomilt_sd(__m128d __a, __m128d __b) { return __builtin_ia32_ucomisdlt((__v2df)__a, (__v2df)__b); } /// Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is less than or equal to the /// corresponding value in the second parameter. /// /// The comparison returns 0 for false, 1 for true. If either of the two /// lower double-precision values is NaN, returns 0. /// /// \headerfile /// /// This intrinsic corresponds to the VUCOMISD / UCOMISD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __b. /// \param __b /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __a. /// \returns An integer containing the comparison results. static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomile_sd(__m128d __a, __m128d __b) { return __builtin_ia32_ucomisdle((__v2df)__a, (__v2df)__b); } /// Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is greater than the corresponding value /// in the second parameter. /// /// The comparison returns 0 for false, 1 for true. If either of the two /// lower double-precision values is NaN, returns 0. /// /// \headerfile /// /// This intrinsic corresponds to the VUCOMISD / UCOMISD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __b. /// \param __b /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __a. /// \returns An integer containing the comparison results. static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomigt_sd(__m128d __a, __m128d __b) { return __builtin_ia32_ucomisdgt((__v2df)__a, (__v2df)__b); } /// Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is greater than or equal to the /// corresponding value in the second parameter. /// /// The comparison returns 0 for false, 1 for true. If either of the two /// lower double-precision values is NaN, returns 0. /// /// \headerfile /// /// This intrinsic corresponds to the VUCOMISD / UCOMISD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __b. /// \param __b /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __a. /// \returns An integer containing the comparison results. static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomige_sd(__m128d __a, __m128d __b) { return __builtin_ia32_ucomisdge((__v2df)__a, (__v2df)__b); } /// Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is unequal to the corresponding value in /// the second parameter. /// /// The comparison returns 0 for false, 1 for true. If either of the two /// lower double-precision values is NaN, 1 is returned. /// /// \headerfile /// /// This intrinsic corresponds to the VUCOMISD / UCOMISD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __b. /// \param __b /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __a. /// \returns An integer containing the comparison result. static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomineq_sd(__m128d __a, __m128d __b) { return __builtin_ia32_ucomisdneq((__v2df)__a, (__v2df)__b); } /// Converts the two double-precision floating-point elements of a /// 128-bit vector of [2 x double] into two single-precision floating-point /// values, returned in the lower 64 bits of a 128-bit vector of [4 x float]. /// The upper 64 bits of the result vector are set to zero. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTPD2PS / CVTPD2PS instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. /// \returns A 128-bit vector of [4 x float] whose lower 64 bits contain the /// converted values. The upper 64 bits are set to zero. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtpd_ps(__m128d __a) { return __builtin_ia32_cvtpd2ps((__v2df)__a); } /// Converts the lower two single-precision floating-point elements of a /// 128-bit vector of [4 x float] into two double-precision floating-point /// values, returned in a 128-bit vector of [2 x double]. The upper two /// elements of the input vector are unused. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTPS2PD / CVTPS2PD instruction. /// /// \param __a /// A 128-bit vector of [4 x float]. The lower two single-precision /// floating-point elements are converted to double-precision values. The /// upper two elements are unused. /// \returns A 128-bit vector of [2 x double] containing the converted values. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtps_pd(__m128 __a) { return (__m128d) __builtin_convertvector( __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 1), __v2df); } /// Converts the lower two integer elements of a 128-bit vector of /// [4 x i32] into two double-precision floating-point values, returned in a /// 128-bit vector of [2 x double]. /// /// The upper two elements of the input vector are unused. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTDQ2PD / CVTDQ2PD instruction. /// /// \param __a /// A 128-bit integer vector of [4 x i32]. The lower two integer elements are /// converted to double-precision values. /// /// The upper two elements are unused. /// \returns A 128-bit vector of [2 x double] containing the converted values. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtepi32_pd(__m128i __a) { return (__m128d) __builtin_convertvector( __builtin_shufflevector((__v4si)__a, (__v4si)__a, 0, 1), __v2df); } /// Converts the two double-precision floating-point elements of a /// 128-bit vector of [2 x double] into two signed 32-bit integer values, /// returned in the lower 64 bits of a 128-bit vector of [4 x i32]. The upper /// 64 bits of the result vector are set to zero. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTPD2DQ / CVTPD2DQ instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. /// \returns A 128-bit vector of [4 x i32] whose lower 64 bits contain the /// converted values. The upper 64 bits are set to zero. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtpd_epi32(__m128d __a) { return __builtin_ia32_cvtpd2dq((__v2df)__a); } /// Converts the low-order element of a 128-bit vector of [2 x double] /// into a 32-bit signed integer value. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTSD2SI / CVTSD2SI instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. The lower 64 bits are used in the /// conversion. /// \returns A 32-bit signed integer containing the converted value. static __inline__ int __DEFAULT_FN_ATTRS _mm_cvtsd_si32(__m128d __a) { return __builtin_ia32_cvtsd2si((__v2df)__a); } /// Converts the lower double-precision floating-point element of a /// 128-bit vector of [2 x double], in the second parameter, into a /// single-precision floating-point value, returned in the lower 32 bits of a /// 128-bit vector of [4 x float]. The upper 96 bits of the result vector are /// copied from the upper 96 bits of the first parameter. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTSD2SS / CVTSD2SS instruction. /// /// \param __a /// A 128-bit vector of [4 x float]. The upper 96 bits of this parameter are /// copied to the upper 96 bits of the result. /// \param __b /// A 128-bit vector of [2 x double]. The lower double-precision /// floating-point element is used in the conversion. /// \returns A 128-bit vector of [4 x float]. The lower 32 bits contain the /// converted value from the second parameter. The upper 96 bits are copied /// from the upper 96 bits of the first parameter. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtsd_ss(__m128 __a, __m128d __b) { return (__m128)__builtin_ia32_cvtsd2ss((__v4sf)__a, (__v2df)__b); } /// Converts a 32-bit signed integer value, in the second parameter, into /// a double-precision floating-point value, returned in the lower 64 bits of /// a 128-bit vector of [2 x double]. The upper 64 bits of the result vector /// are copied from the upper 64 bits of the first parameter. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTSI2SD / CVTSI2SD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. The upper 64 bits of this parameter are /// copied to the upper 64 bits of the result. /// \param __b /// A 32-bit signed integer containing the value to be converted. /// \returns A 128-bit vector of [2 x double]. The lower 64 bits contain the /// converted value from the second parameter. The upper 64 bits are copied /// from the upper 64 bits of the first parameter. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtsi32_sd(__m128d __a, int __b) { __a[0] = __b; return __a; } /// Converts the lower single-precision floating-point element of a /// 128-bit vector of [4 x float], in the second parameter, into a /// double-precision floating-point value, returned in the lower 64 bits of /// a 128-bit vector of [2 x double]. The upper 64 bits of the result vector /// are copied from the upper 64 bits of the first parameter. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTSS2SD / CVTSS2SD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. The upper 64 bits of this parameter are /// copied to the upper 64 bits of the result. /// \param __b /// A 128-bit vector of [4 x float]. The lower single-precision /// floating-point element is used in the conversion. /// \returns A 128-bit vector of [2 x double]. The lower 64 bits contain the /// converted value from the second parameter. The upper 64 bits are copied /// from the upper 64 bits of the first parameter. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtss_sd(__m128d __a, __m128 __b) { __a[0] = __b[0]; return __a; } /// Converts the two double-precision floating-point elements of a /// 128-bit vector of [2 x double] into two signed 32-bit integer values, /// returned in the lower 64 bits of a 128-bit vector of [4 x i32]. /// /// If the result of either conversion is inexact, the result is truncated /// (rounded towards zero) regardless of the current MXCSR setting. The upper /// 64 bits of the result vector are set to zero. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTTPD2DQ / CVTTPD2DQ /// instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. /// \returns A 128-bit vector of [4 x i32] whose lower 64 bits contain the /// converted values. The upper 64 bits are set to zero. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttpd_epi32(__m128d __a) { return (__m128i)__builtin_ia32_cvttpd2dq((__v2df)__a); } /// Converts the low-order element of a [2 x double] vector into a 32-bit /// signed integer value, truncating the result when it is inexact. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTTSD2SI / CVTTSD2SI /// instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. The lower 64 bits are used in the /// conversion. /// \returns A 32-bit signed integer containing the converted value. static __inline__ int __DEFAULT_FN_ATTRS _mm_cvttsd_si32(__m128d __a) { return __builtin_ia32_cvttsd2si((__v2df)__a); } /// Converts the two double-precision floating-point elements of a /// 128-bit vector of [2 x double] into two signed 32-bit integer values, /// returned in a 64-bit vector of [2 x i32]. /// /// \headerfile /// /// This intrinsic corresponds to the CVTPD2PI instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. /// \returns A 64-bit vector of [2 x i32] containing the converted values. static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_cvtpd_pi32(__m128d __a) { return (__m64)__builtin_ia32_cvtpd2pi((__v2df)__a); } /// Converts the two double-precision floating-point elements of a /// 128-bit vector of [2 x double] into two signed 32-bit integer values, /// returned in a 64-bit vector of [2 x i32]. /// /// If the result of either conversion is inexact, the result is truncated /// (rounded towards zero) regardless of the current MXCSR setting. /// /// \headerfile /// /// This intrinsic corresponds to the CVTTPD2PI instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. /// \returns A 64-bit vector of [2 x i32] containing the converted values. static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_cvttpd_pi32(__m128d __a) { return (__m64)__builtin_ia32_cvttpd2pi((__v2df)__a); } /// Converts the two signed 32-bit integer elements of a 64-bit vector of /// [2 x i32] into two double-precision floating-point values, returned in a /// 128-bit vector of [2 x double]. /// /// \headerfile /// /// This intrinsic corresponds to the CVTPI2PD instruction. /// /// \param __a /// A 64-bit vector of [2 x i32]. /// \returns A 128-bit vector of [2 x double] containing the converted values. static __inline__ __m128d __DEFAULT_FN_ATTRS_MMX _mm_cvtpi32_pd(__m64 __a) { return __builtin_ia32_cvtpi2pd((__v2si)__a); } /// Returns the low-order element of a 128-bit vector of [2 x double] as /// a double-precision floating-point value. /// /// \headerfile /// /// This intrinsic has no corresponding instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. The lower 64 bits are returned. /// \returns A double-precision floating-point value copied from the lower 64 /// bits of \a __a. static __inline__ double __DEFAULT_FN_ATTRS _mm_cvtsd_f64(__m128d __a) { return __a[0]; } /// Loads a 128-bit floating-point vector of [2 x double] from an aligned /// memory location. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVAPD / MOVAPD instruction. /// /// \param __dp /// A pointer to a 128-bit memory location. The address of the memory /// location has to be 16-byte aligned. /// \returns A 128-bit vector of [2 x double] containing the loaded values. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_load_pd(double const *__dp) { return *(const __m128d *)__dp; } /// Loads a double-precision floating-point value from a specified memory /// location and duplicates it to both vector elements of a 128-bit vector of /// [2 x double]. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVDDUP / MOVDDUP instruction. /// /// \param __dp /// A pointer to a memory location containing a double-precision value. /// \returns A 128-bit vector of [2 x double] containing the loaded and /// duplicated values. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_load1_pd(double const *__dp) { struct __mm_load1_pd_struct { double __u; } __attribute__((__packed__, __may_alias__)); double __u = ((const struct __mm_load1_pd_struct *)__dp)->__u; return __extension__(__m128d){__u, __u}; } #define _mm_load_pd1(dp) _mm_load1_pd(dp) /// Loads two double-precision values, in reverse order, from an aligned /// memory location into a 128-bit vector of [2 x double]. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVAPD / MOVAPD instruction + /// needed shuffling instructions. In AVX mode, the shuffling may be combined /// with the \c VMOVAPD, resulting in only a \c VPERMILPD instruction. /// /// \param __dp /// A 16-byte aligned pointer to an array of double-precision values to be /// loaded in reverse order. /// \returns A 128-bit vector of [2 x double] containing the reversed loaded /// values. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_loadr_pd(double const *__dp) { __m128d __u = *(const __m128d *)__dp; return __builtin_shufflevector((__v2df)__u, (__v2df)__u, 1, 0); } /// Loads a 128-bit floating-point vector of [2 x double] from an /// unaligned memory location. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVUPD / MOVUPD instruction. /// /// \param __dp /// A pointer to a 128-bit memory location. The address of the memory /// location does not have to be aligned. /// \returns A 128-bit vector of [2 x double] containing the loaded values. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_loadu_pd(double const *__dp) { struct __loadu_pd { __m128d_u __v; } __attribute__((__packed__, __may_alias__)); return ((const struct __loadu_pd *)__dp)->__v; } /// Loads a 64-bit integer value to the low element of a 128-bit integer /// vector and clears the upper element. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVQ / MOVQ instruction. /// /// \param __a /// A pointer to a 64-bit memory location. The address of the memory /// location does not have to be aligned. /// \returns A 128-bit vector of [2 x i64] containing the loaded value. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_loadu_si64(void const *__a) { struct __loadu_si64 { long long __v; } __attribute__((__packed__, __may_alias__)); long long __u = ((const struct __loadu_si64 *)__a)->__v; return __extension__(__m128i)(__v2di){__u, 0LL}; } /// Loads a 32-bit integer value to the low element of a 128-bit integer /// vector and clears the upper element. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVD / MOVD instruction. /// /// \param __a /// A pointer to a 32-bit memory location. The address of the memory /// location does not have to be aligned. /// \returns A 128-bit vector of [4 x i32] containing the loaded value. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_loadu_si32(void const *__a) { struct __loadu_si32 { int __v; } __attribute__((__packed__, __may_alias__)); int __u = ((const struct __loadu_si32 *)__a)->__v; return __extension__(__m128i)(__v4si){__u, 0, 0, 0}; } /// Loads a 16-bit integer value to the low element of a 128-bit integer /// vector and clears the upper element. /// /// \headerfile /// /// This intrinsic does not correspond to a specific instruction. /// /// \param __a /// A pointer to a 16-bit memory location. The address of the memory /// location does not have to be aligned. /// \returns A 128-bit vector of [8 x i16] containing the loaded value. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_loadu_si16(void const *__a) { struct __loadu_si16 { short __v; } __attribute__((__packed__, __may_alias__)); short __u = ((const struct __loadu_si16 *)__a)->__v; return __extension__(__m128i)(__v8hi){__u, 0, 0, 0, 0, 0, 0, 0}; } /// Loads a 64-bit double-precision value to the low element of a /// 128-bit integer vector and clears the upper element. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVSD / MOVSD instruction. /// /// \param __dp /// A pointer to a memory location containing a double-precision value. /// The address of the memory location does not have to be aligned. /// \returns A 128-bit vector of [2 x double] containing the loaded value. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_load_sd(double const *__dp) { struct __mm_load_sd_struct { double __u; } __attribute__((__packed__, __may_alias__)); double __u = ((const struct __mm_load_sd_struct *)__dp)->__u; return __extension__(__m128d){__u, 0}; } /// Loads a double-precision value into the high-order bits of a 128-bit /// vector of [2 x double]. The low-order bits are copied from the low-order /// bits of the first operand. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVHPD / MOVHPD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. \n /// Bits [63:0] are written to bits [63:0] of the result. /// \param __dp /// A pointer to a 64-bit memory location containing a double-precision /// floating-point value that is loaded. The loaded value is written to bits /// [127:64] of the result. The address of the memory location does not have /// to be aligned. /// \returns A 128-bit vector of [2 x double] containing the moved values. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_loadh_pd(__m128d __a, double const *__dp) { struct __mm_loadh_pd_struct { double __u; } __attribute__((__packed__, __may_alias__)); double __u = ((const struct __mm_loadh_pd_struct *)__dp)->__u; return __extension__(__m128d){__a[0], __u}; } /// Loads a double-precision value into the low-order bits of a 128-bit /// vector of [2 x double]. The high-order bits are copied from the /// high-order bits of the first operand. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVLPD / MOVLPD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. \n /// Bits [127:64] are written to bits [127:64] of the result. /// \param __dp /// A pointer to a 64-bit memory location containing a double-precision /// floating-point value that is loaded. The loaded value is written to bits /// [63:0] of the result. The address of the memory location does not have to /// be aligned. /// \returns A 128-bit vector of [2 x double] containing the moved values. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_loadl_pd(__m128d __a, double const *__dp) { struct __mm_loadl_pd_struct { double __u; } __attribute__((__packed__, __may_alias__)); double __u = ((const struct __mm_loadl_pd_struct *)__dp)->__u; return __extension__(__m128d){__u, __a[1]}; } /// Constructs a 128-bit floating-point vector of [2 x double] with /// unspecified content. This could be used as an argument to another /// intrinsic function where the argument is required but the value is not /// actually used. /// /// \headerfile /// /// This intrinsic has no corresponding instruction. /// /// \returns A 128-bit floating-point vector of [2 x double] with unspecified /// content. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_undefined_pd(void) { return (__m128d)__builtin_ia32_undef128(); } /// Constructs a 128-bit floating-point vector of [2 x double]. The lower /// 64 bits of the vector are initialized with the specified double-precision /// floating-point value. The upper 64 bits are set to zero. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVQ / MOVQ instruction. /// /// \param __w /// A double-precision floating-point value used to initialize the lower 64 /// bits of the result. /// \returns An initialized 128-bit floating-point vector of [2 x double]. The /// lower 64 bits contain the value of the parameter. The upper 64 bits are /// set to zero. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_set_sd(double __w) { return __extension__(__m128d){__w, 0}; } /// Constructs a 128-bit floating-point vector of [2 x double], with each /// of the two double-precision floating-point vector elements set to the /// specified double-precision floating-point value. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVDDUP / MOVLHPS instruction. /// /// \param __w /// A double-precision floating-point value used to initialize each vector /// element of the result. /// \returns An initialized 128-bit floating-point vector of [2 x double]. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_set1_pd(double __w) { return __extension__(__m128d){__w, __w}; } /// Constructs a 128-bit floating-point vector of [2 x double], with each /// of the two double-precision floating-point vector elements set to the /// specified double-precision floating-point value. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVDDUP / MOVLHPS instruction. /// /// \param __w /// A double-precision floating-point value used to initialize each vector /// element of the result. /// \returns An initialized 128-bit floating-point vector of [2 x double]. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_set_pd1(double __w) { return _mm_set1_pd(__w); } /// Constructs a 128-bit floating-point vector of [2 x double] /// initialized with the specified double-precision floating-point values. /// /// \headerfile /// /// This intrinsic corresponds to the VUNPCKLPD / UNPCKLPD instruction. /// /// \param __w /// A double-precision floating-point value used to initialize the upper 64 /// bits of the result. /// \param __x /// A double-precision floating-point value used to initialize the lower 64 /// bits of the result. /// \returns An initialized 128-bit floating-point vector of [2 x double]. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_set_pd(double __w, double __x) { return __extension__(__m128d){__x, __w}; } /// Constructs a 128-bit floating-point vector of [2 x double], /// initialized in reverse order with the specified double-precision /// floating-point values. /// /// \headerfile /// /// This intrinsic corresponds to the VUNPCKLPD / UNPCKLPD instruction. /// /// \param __w /// A double-precision floating-point value used to initialize the lower 64 /// bits of the result. /// \param __x /// A double-precision floating-point value used to initialize the upper 64 /// bits of the result. /// \returns An initialized 128-bit floating-point vector of [2 x double]. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_setr_pd(double __w, double __x) { return __extension__(__m128d){__w, __x}; } /// Constructs a 128-bit floating-point vector of [2 x double] /// initialized to zero. /// /// \headerfile /// /// This intrinsic corresponds to the VXORPS / XORPS instruction. /// /// \returns An initialized 128-bit floating-point vector of [2 x double] with /// all elements set to zero. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_setzero_pd(void) { return __extension__(__m128d){0.0, 0.0}; } /// Constructs a 128-bit floating-point vector of [2 x double]. The lower /// 64 bits are set to the lower 64 bits of the second parameter. The upper /// 64 bits are set to the upper 64 bits of the first parameter. /// /// \headerfile /// /// This intrinsic corresponds to the VBLENDPD / BLENDPD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. The upper 64 bits are written to the /// upper 64 bits of the result. /// \param __b /// A 128-bit vector of [2 x double]. The lower 64 bits are written to the /// lower 64 bits of the result. /// \returns A 128-bit vector of [2 x double] containing the moved values. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_move_sd(__m128d __a, __m128d __b) { __a[0] = __b[0]; return __a; } /// Stores the lower 64 bits of a 128-bit vector of [2 x double] to a /// memory location. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVSD / MOVSD instruction. /// /// \param __dp /// A pointer to a 64-bit memory location. /// \param __a /// A 128-bit vector of [2 x double] containing the value to be stored. static __inline__ void __DEFAULT_FN_ATTRS _mm_store_sd(double *__dp, __m128d __a) { struct __mm_store_sd_struct { double __u; } __attribute__((__packed__, __may_alias__)); ((struct __mm_store_sd_struct *)__dp)->__u = __a[0]; } /// Moves packed double-precision values from a 128-bit vector of /// [2 x double] to a memory location. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVAPD / MOVAPS instruction. /// /// \param __dp /// A pointer to an aligned memory location that can store two /// double-precision values. /// \param __a /// A packed 128-bit vector of [2 x double] containing the values to be /// moved. static __inline__ void __DEFAULT_FN_ATTRS _mm_store_pd(double *__dp, __m128d __a) { *(__m128d *)__dp = __a; } /// Moves the lower 64 bits of a 128-bit vector of [2 x double] twice to /// the upper and lower 64 bits of a memory location. /// /// \headerfile /// /// This intrinsic corresponds to the /// VMOVDDUP + VMOVAPD / MOVLHPS + MOVAPS instruction. /// /// \param __dp /// A pointer to a memory location that can store two double-precision /// values. /// \param __a /// A 128-bit vector of [2 x double] whose lower 64 bits are copied to each /// of the values in \a __dp. static __inline__ void __DEFAULT_FN_ATTRS _mm_store1_pd(double *__dp, __m128d __a) { __a = __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0); _mm_store_pd(__dp, __a); } /// Moves the lower 64 bits of a 128-bit vector of [2 x double] twice to /// the upper and lower 64 bits of a memory location. /// /// \headerfile /// /// This intrinsic corresponds to the /// VMOVDDUP + VMOVAPD / MOVLHPS + MOVAPS instruction. /// /// \param __dp /// A pointer to a memory location that can store two double-precision /// values. /// \param __a /// A 128-bit vector of [2 x double] whose lower 64 bits are copied to each /// of the values in \a __dp. static __inline__ void __DEFAULT_FN_ATTRS _mm_store_pd1(double *__dp, __m128d __a) { _mm_store1_pd(__dp, __a); } /// Stores a 128-bit vector of [2 x double] into an unaligned memory /// location. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVUPD / MOVUPD instruction. /// /// \param __dp /// A pointer to a 128-bit memory location. The address of the memory /// location does not have to be aligned. /// \param __a /// A 128-bit vector of [2 x double] containing the values to be stored. static __inline__ void __DEFAULT_FN_ATTRS _mm_storeu_pd(double *__dp, __m128d __a) { struct __storeu_pd { __m128d_u __v; } __attribute__((__packed__, __may_alias__)); ((struct __storeu_pd *)__dp)->__v = __a; } /// Stores two double-precision values, in reverse order, from a 128-bit /// vector of [2 x double] to a 16-byte aligned memory location. /// /// \headerfile /// /// This intrinsic corresponds to a shuffling instruction followed by a /// VMOVAPD / MOVAPD instruction. /// /// \param __dp /// A pointer to a 16-byte aligned memory location that can store two /// double-precision values. /// \param __a /// A 128-bit vector of [2 x double] containing the values to be reversed and /// stored. static __inline__ void __DEFAULT_FN_ATTRS _mm_storer_pd(double *__dp, __m128d __a) { __a = __builtin_shufflevector((__v2df)__a, (__v2df)__a, 1, 0); *(__m128d *)__dp = __a; } /// Stores the upper 64 bits of a 128-bit vector of [2 x double] to a /// memory location. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVHPD / MOVHPD instruction. /// /// \param __dp /// A pointer to a 64-bit memory location. /// \param __a /// A 128-bit vector of [2 x double] containing the value to be stored. static __inline__ void __DEFAULT_FN_ATTRS _mm_storeh_pd(double *__dp, __m128d __a) { struct __mm_storeh_pd_struct { double __u; } __attribute__((__packed__, __may_alias__)); ((struct __mm_storeh_pd_struct *)__dp)->__u = __a[1]; } /// Stores the lower 64 bits of a 128-bit vector of [2 x double] to a /// memory location. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVLPD / MOVLPD instruction. /// /// \param __dp /// A pointer to a 64-bit memory location. /// \param __a /// A 128-bit vector of [2 x double] containing the value to be stored. static __inline__ void __DEFAULT_FN_ATTRS _mm_storel_pd(double *__dp, __m128d __a) { struct __mm_storeh_pd_struct { double __u; } __attribute__((__packed__, __may_alias__)); ((struct __mm_storeh_pd_struct *)__dp)->__u = __a[0]; } /// Adds the corresponding elements of two 128-bit vectors of [16 x i8], /// saving the lower 8 bits of each sum in the corresponding element of a /// 128-bit result vector of [16 x i8]. /// /// The integer elements of both parameters can be either signed or unsigned. /// /// \headerfile /// /// This intrinsic corresponds to the VPADDB / PADDB instruction. /// /// \param __a /// A 128-bit vector of [16 x i8]. /// \param __b /// A 128-bit vector of [16 x i8]. /// \returns A 128-bit vector of [16 x i8] containing the sums of both /// parameters. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi8(__m128i __a, __m128i __b) { return (__m128i)((__v16qu)__a + (__v16qu)__b); } /// Adds the corresponding elements of two 128-bit vectors of [8 x i16], /// saving the lower 16 bits of each sum in the corresponding element of a /// 128-bit result vector of [8 x i16]. /// /// The integer elements of both parameters can be either signed or unsigned. /// /// \headerfile /// /// This intrinsic corresponds to the VPADDW / PADDW instruction. /// /// \param __a /// A 128-bit vector of [8 x i16]. /// \param __b /// A 128-bit vector of [8 x i16]. /// \returns A 128-bit vector of [8 x i16] containing the sums of both /// parameters. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi16(__m128i __a, __m128i __b) { return (__m128i)((__v8hu)__a + (__v8hu)__b); } /// Adds the corresponding elements of two 128-bit vectors of [4 x i32], /// saving the lower 32 bits of each sum in the corresponding element of a /// 128-bit result vector of [4 x i32]. /// /// The integer elements of both parameters can be either signed or unsigned. /// /// \headerfile /// /// This intrinsic corresponds to the VPADDD / PADDD instruction. /// /// \param __a /// A 128-bit vector of [4 x i32]. /// \param __b /// A 128-bit vector of [4 x i32]. /// \returns A 128-bit vector of [4 x i32] containing the sums of both /// parameters. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi32(__m128i __a, __m128i __b) { return (__m128i)((__v4su)__a + (__v4su)__b); } /// Adds two signed or unsigned 64-bit integer values, returning the /// lower 64 bits of the sum. /// /// \headerfile /// /// This intrinsic corresponds to the PADDQ instruction. /// /// \param __a /// A 64-bit integer. /// \param __b /// A 64-bit integer. /// \returns A 64-bit integer containing the sum of both parameters. static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_add_si64(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_paddq((__v1di)__a, (__v1di)__b); } /// Adds the corresponding elements of two 128-bit vectors of [2 x i64], /// saving the lower 64 bits of each sum in the corresponding element of a /// 128-bit result vector of [2 x i64]. /// /// The integer elements of both parameters can be either signed or unsigned. /// /// \headerfile /// /// This intrinsic corresponds to the VPADDQ / PADDQ instruction. /// /// \param __a /// A 128-bit vector of [2 x i64]. /// \param __b /// A 128-bit vector of [2 x i64]. /// \returns A 128-bit vector of [2 x i64] containing the sums of both /// parameters. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi64(__m128i __a, __m128i __b) { return (__m128i)((__v2du)__a + (__v2du)__b); } /// Adds, with saturation, the corresponding elements of two 128-bit /// signed [16 x i8] vectors, saving each sum in the corresponding element /// of a 128-bit result vector of [16 x i8]. /// /// Positive sums greater than 0x7F are saturated to 0x7F. Negative sums /// less than 0x80 are saturated to 0x80. /// /// \headerfile /// /// This intrinsic corresponds to the VPADDSB / PADDSB instruction. /// /// \param __a /// A 128-bit signed [16 x i8] vector. /// \param __b /// A 128-bit signed [16 x i8] vector. /// \returns A 128-bit signed [16 x i8] vector containing the saturated sums of /// both parameters. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epi8(__m128i __a, __m128i __b) { return (__m128i)__builtin_elementwise_add_sat((__v16qs)__a, (__v16qs)__b); } /// Adds, with saturation, the corresponding elements of two 128-bit /// signed [8 x i16] vectors, saving each sum in the corresponding element /// of a 128-bit result vector of [8 x i16]. /// /// Positive sums greater than 0x7FFF are saturated to 0x7FFF. Negative sums /// less than 0x8000 are saturated to 0x8000. /// /// \headerfile /// /// This intrinsic corresponds to the VPADDSW / PADDSW instruction. /// /// \param __a /// A 128-bit signed [8 x i16] vector. /// \param __b /// A 128-bit signed [8 x i16] vector. /// \returns A 128-bit signed [8 x i16] vector containing the saturated sums of /// both parameters. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epi16(__m128i __a, __m128i __b) { return (__m128i)__builtin_elementwise_add_sat((__v8hi)__a, (__v8hi)__b); } /// Adds, with saturation, the corresponding elements of two 128-bit /// unsigned [16 x i8] vectors, saving each sum in the corresponding element /// of a 128-bit result vector of [16 x i8]. /// /// Positive sums greater than 0xFF are saturated to 0xFF. Negative sums are /// saturated to 0x00. /// /// \headerfile /// /// This intrinsic corresponds to the VPADDUSB / PADDUSB instruction. /// /// \param __a /// A 128-bit unsigned [16 x i8] vector. /// \param __b /// A 128-bit unsigned [16 x i8] vector. /// \returns A 128-bit unsigned [16 x i8] vector containing the saturated sums /// of both parameters. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epu8(__m128i __a, __m128i __b) { return (__m128i)__builtin_elementwise_add_sat((__v16qu)__a, (__v16qu)__b); } /// Adds, with saturation, the corresponding elements of two 128-bit /// unsigned [8 x i16] vectors, saving each sum in the corresponding element /// of a 128-bit result vector of [8 x i16]. /// /// Positive sums greater than 0xFFFF are saturated to 0xFFFF. Negative sums /// are saturated to 0x0000. /// /// \headerfile /// /// This intrinsic corresponds to the VPADDUSB / PADDUSB instruction. /// /// \param __a /// A 128-bit unsigned [8 x i16] vector. /// \param __b /// A 128-bit unsigned [8 x i16] vector. /// \returns A 128-bit unsigned [8 x i16] vector containing the saturated sums /// of both parameters. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epu16(__m128i __a, __m128i __b) { return (__m128i)__builtin_elementwise_add_sat((__v8hu)__a, (__v8hu)__b); } /// Computes the rounded averages of corresponding elements of two /// 128-bit unsigned [16 x i8] vectors, saving each result in the /// corresponding element of a 128-bit result vector of [16 x i8]. /// /// \headerfile /// /// This intrinsic corresponds to the VPAVGB / PAVGB instruction. /// /// \param __a /// A 128-bit unsigned [16 x i8] vector. /// \param __b /// A 128-bit unsigned [16 x i8] vector. /// \returns A 128-bit unsigned [16 x i8] vector containing the rounded /// averages of both parameters. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_avg_epu8(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_pavgb128((__v16qi)__a, (__v16qi)__b); } /// Computes the rounded averages of corresponding elements of two /// 128-bit unsigned [8 x i16] vectors, saving each result in the /// corresponding element of a 128-bit result vector of [8 x i16]. /// /// \headerfile /// /// This intrinsic corresponds to the VPAVGW / PAVGW instruction. /// /// \param __a /// A 128-bit unsigned [8 x i16] vector. /// \param __b /// A 128-bit unsigned [8 x i16] vector. /// \returns A 128-bit unsigned [8 x i16] vector containing the rounded /// averages of both parameters. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_avg_epu16(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_pavgw128((__v8hi)__a, (__v8hi)__b); } /// Multiplies the corresponding elements of two 128-bit signed [8 x i16] /// vectors, producing eight intermediate 32-bit signed integer products, and /// adds the consecutive pairs of 32-bit products to form a 128-bit signed /// [4 x i32] vector. /// /// For example, bits [15:0] of both parameters are multiplied producing a /// 32-bit product, bits [31:16] of both parameters are multiplied producing /// a 32-bit product, and the sum of those two products becomes bits [31:0] /// of the result. /// /// \headerfile /// /// This intrinsic corresponds to the VPMADDWD / PMADDWD instruction. /// /// \param __a /// A 128-bit signed [8 x i16] vector. /// \param __b /// A 128-bit signed [8 x i16] vector. /// \returns A 128-bit signed [4 x i32] vector containing the sums of products /// of both parameters. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_madd_epi16(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_pmaddwd128((__v8hi)__a, (__v8hi)__b); } /// Compares corresponding elements of two 128-bit signed [8 x i16] /// vectors, saving the greater value from each comparison in the /// corresponding element of a 128-bit result vector of [8 x i16]. /// /// \headerfile /// /// This intrinsic corresponds to the VPMAXSW / PMAXSW instruction. /// /// \param __a /// A 128-bit signed [8 x i16] vector. /// \param __b /// A 128-bit signed [8 x i16] vector. /// \returns A 128-bit signed [8 x i16] vector containing the greater value of /// each comparison. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi16(__m128i __a, __m128i __b) { return (__m128i)__builtin_elementwise_max((__v8hi)__a, (__v8hi)__b); } /// Compares corresponding elements of two 128-bit unsigned [16 x i8] /// vectors, saving the greater value from each comparison in the /// corresponding element of a 128-bit result vector of [16 x i8]. /// /// \headerfile /// /// This intrinsic corresponds to the VPMAXUB / PMAXUB instruction. /// /// \param __a /// A 128-bit unsigned [16 x i8] vector. /// \param __b /// A 128-bit unsigned [16 x i8] vector. /// \returns A 128-bit unsigned [16 x i8] vector containing the greater value of /// each comparison. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epu8(__m128i __a, __m128i __b) { return (__m128i)__builtin_elementwise_max((__v16qu)__a, (__v16qu)__b); } /// Compares corresponding elements of two 128-bit signed [8 x i16] /// vectors, saving the smaller value from each comparison in the /// corresponding element of a 128-bit result vector of [8 x i16]. /// /// \headerfile /// /// This intrinsic corresponds to the VPMINSW / PMINSW instruction. /// /// \param __a /// A 128-bit signed [8 x i16] vector. /// \param __b /// A 128-bit signed [8 x i16] vector. /// \returns A 128-bit signed [8 x i16] vector containing the smaller value of /// each comparison. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi16(__m128i __a, __m128i __b) { return (__m128i)__builtin_elementwise_min((__v8hi)__a, (__v8hi)__b); } /// Compares corresponding elements of two 128-bit unsigned [16 x i8] /// vectors, saving the smaller value from each comparison in the /// corresponding element of a 128-bit result vector of [16 x i8]. /// /// \headerfile /// /// This intrinsic corresponds to the VPMINUB / PMINUB instruction. /// /// \param __a /// A 128-bit unsigned [16 x i8] vector. /// \param __b /// A 128-bit unsigned [16 x i8] vector. /// \returns A 128-bit unsigned [16 x i8] vector containing the smaller value of /// each comparison. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu8(__m128i __a, __m128i __b) { return (__m128i)__builtin_elementwise_min((__v16qu)__a, (__v16qu)__b); } /// Multiplies the corresponding elements of two signed [8 x i16] /// vectors, saving the upper 16 bits of each 32-bit product in the /// corresponding element of a 128-bit signed [8 x i16] result vector. /// /// \headerfile /// /// This intrinsic corresponds to the VPMULHW / PMULHW instruction. /// /// \param __a /// A 128-bit signed [8 x i16] vector. /// \param __b /// A 128-bit signed [8 x i16] vector. /// \returns A 128-bit signed [8 x i16] vector containing the upper 16 bits of /// each of the eight 32-bit products. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhi_epi16(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_pmulhw128((__v8hi)__a, (__v8hi)__b); } /// Multiplies the corresponding elements of two unsigned [8 x i16] /// vectors, saving the upper 16 bits of each 32-bit product in the /// corresponding element of a 128-bit unsigned [8 x i16] result vector. /// /// \headerfile /// /// This intrinsic corresponds to the VPMULHUW / PMULHUW instruction. /// /// \param __a /// A 128-bit unsigned [8 x i16] vector. /// \param __b /// A 128-bit unsigned [8 x i16] vector. /// \returns A 128-bit unsigned [8 x i16] vector containing the upper 16 bits /// of each of the eight 32-bit products. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhi_epu16(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_pmulhuw128((__v8hi)__a, (__v8hi)__b); } /// Multiplies the corresponding elements of two signed [8 x i16] /// vectors, saving the lower 16 bits of each 32-bit product in the /// corresponding element of a 128-bit signed [8 x i16] result vector. /// /// \headerfile /// /// This intrinsic corresponds to the VPMULLW / PMULLW instruction. /// /// \param __a /// A 128-bit signed [8 x i16] vector. /// \param __b /// A 128-bit signed [8 x i16] vector. /// \returns A 128-bit signed [8 x i16] vector containing the lower 16 bits of /// each of the eight 32-bit products. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mullo_epi16(__m128i __a, __m128i __b) { return (__m128i)((__v8hu)__a * (__v8hu)__b); } /// Multiplies 32-bit unsigned integer values contained in the lower bits /// of the two 64-bit integer vectors and returns the 64-bit unsigned /// product. /// /// \headerfile /// /// This intrinsic corresponds to the PMULUDQ instruction. /// /// \param __a /// A 64-bit integer containing one of the source operands. /// \param __b /// A 64-bit integer containing one of the source operands. /// \returns A 64-bit integer vector containing the product of both operands. static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_mul_su32(__m64 __a, __m64 __b) { return __builtin_ia32_pmuludq((__v2si)__a, (__v2si)__b); } /// Multiplies 32-bit unsigned integer values contained in the lower /// bits of the corresponding elements of two [2 x i64] vectors, and returns /// the 64-bit products in the corresponding elements of a [2 x i64] vector. /// /// \headerfile /// /// This intrinsic corresponds to the VPMULUDQ / PMULUDQ instruction. /// /// \param __a /// A [2 x i64] vector containing one of the source operands. /// \param __b /// A [2 x i64] vector containing one of the source operands. /// \returns A [2 x i64] vector containing the product of both operands. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mul_epu32(__m128i __a, __m128i __b) { return __builtin_ia32_pmuludq128((__v4si)__a, (__v4si)__b); } /// Computes the absolute differences of corresponding 8-bit integer /// values in two 128-bit vectors. Sums the first 8 absolute differences, and /// separately sums the second 8 absolute differences. Packs these two /// unsigned 16-bit integer sums into the upper and lower elements of a /// [2 x i64] vector. /// /// \headerfile /// /// This intrinsic corresponds to the VPSADBW / PSADBW instruction. /// /// \param __a /// A 128-bit integer vector containing one of the source operands. /// \param __b /// A 128-bit integer vector containing one of the source operands. /// \returns A [2 x i64] vector containing the sums of the sets of absolute /// differences between both operands. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sad_epu8(__m128i __a, __m128i __b) { return __builtin_ia32_psadbw128((__v16qi)__a, (__v16qi)__b); } /// Subtracts the corresponding 8-bit integer values in the operands. /// /// \headerfile /// /// This intrinsic corresponds to the VPSUBB / PSUBB instruction. /// /// \param __a /// A 128-bit integer vector containing the minuends. /// \param __b /// A 128-bit integer vector containing the subtrahends. /// \returns A 128-bit integer vector containing the differences of the values /// in the operands. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi8(__m128i __a, __m128i __b) { return (__m128i)((__v16qu)__a - (__v16qu)__b); } /// Subtracts the corresponding 16-bit integer values in the operands. /// /// \headerfile /// /// This intrinsic corresponds to the VPSUBW / PSUBW instruction. /// /// \param __a /// A 128-bit integer vector containing the minuends. /// \param __b /// A 128-bit integer vector containing the subtrahends. /// \returns A 128-bit integer vector containing the differences of the values /// in the operands. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi16(__m128i __a, __m128i __b) { return (__m128i)((__v8hu)__a - (__v8hu)__b); } /// Subtracts the corresponding 32-bit integer values in the operands. /// /// \headerfile /// /// This intrinsic corresponds to the VPSUBD / PSUBD instruction. /// /// \param __a /// A 128-bit integer vector containing the minuends. /// \param __b /// A 128-bit integer vector containing the subtrahends. /// \returns A 128-bit integer vector containing the differences of the values /// in the operands. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi32(__m128i __a, __m128i __b) { return (__m128i)((__v4su)__a - (__v4su)__b); } /// Subtracts signed or unsigned 64-bit integer values and writes the /// difference to the corresponding bits in the destination. /// /// \headerfile /// /// This intrinsic corresponds to the PSUBQ instruction. /// /// \param __a /// A 64-bit integer vector containing the minuend. /// \param __b /// A 64-bit integer vector containing the subtrahend. /// \returns A 64-bit integer vector containing the difference of the values in /// the operands. static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_sub_si64(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_psubq((__v1di)__a, (__v1di)__b); } /// Subtracts the corresponding elements of two [2 x i64] vectors. /// /// \headerfile /// /// This intrinsic corresponds to the VPSUBQ / PSUBQ instruction. /// /// \param __a /// A 128-bit integer vector containing the minuends. /// \param __b /// A 128-bit integer vector containing the subtrahends. /// \returns A 128-bit integer vector containing the differences of the values /// in the operands. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi64(__m128i __a, __m128i __b) { return (__m128i)((__v2du)__a - (__v2du)__b); } /// Subtracts, with saturation, corresponding 8-bit signed integer values in /// the input and returns the differences in the corresponding bytes in the /// destination. /// /// Differences greater than 0x7F are saturated to 0x7F, and differences /// less than 0x80 are saturated to 0x80. /// /// \headerfile /// /// This intrinsic corresponds to the VPSUBSB / PSUBSB instruction. /// /// \param __a /// A 128-bit integer vector containing the minuends. /// \param __b /// A 128-bit integer vector containing the subtrahends. /// \returns A 128-bit integer vector containing the differences of the values /// in the operands. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epi8(__m128i __a, __m128i __b) { return (__m128i)__builtin_elementwise_sub_sat((__v16qs)__a, (__v16qs)__b); } /// Subtracts, with saturation, corresponding 16-bit signed integer values in /// the input and returns the differences in the corresponding bytes in the /// destination. /// /// Differences greater than 0x7FFF are saturated to 0x7FFF, and values less /// than 0x8000 are saturated to 0x8000. /// /// \headerfile /// /// This intrinsic corresponds to the VPSUBSW / PSUBSW instruction. /// /// \param __a /// A 128-bit integer vector containing the minuends. /// \param __b /// A 128-bit integer vector containing the subtrahends. /// \returns A 128-bit integer vector containing the differences of the values /// in the operands. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epi16(__m128i __a, __m128i __b) { return (__m128i)__builtin_elementwise_sub_sat((__v8hi)__a, (__v8hi)__b); } /// Subtracts, with saturation, corresponding 8-bit unsigned integer values in /// the input and returns the differences in the corresponding bytes in the /// destination. /// /// Differences less than 0x00 are saturated to 0x00. /// /// \headerfile /// /// This intrinsic corresponds to the VPSUBUSB / PSUBUSB instruction. /// /// \param __a /// A 128-bit integer vector containing the minuends. /// \param __b /// A 128-bit integer vector containing the subtrahends. /// \returns A 128-bit integer vector containing the unsigned integer /// differences of the values in the operands. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epu8(__m128i __a, __m128i __b) { return (__m128i)__builtin_elementwise_sub_sat((__v16qu)__a, (__v16qu)__b); } /// Subtracts, with saturation, corresponding 16-bit unsigned integer values in /// the input and returns the differences in the corresponding bytes in the /// destination. /// /// Differences less than 0x0000 are saturated to 0x0000. /// /// \headerfile /// /// This intrinsic corresponds to the VPSUBUSW / PSUBUSW instruction. /// /// \param __a /// A 128-bit integer vector containing the minuends. /// \param __b /// A 128-bit integer vector containing the subtrahends. /// \returns A 128-bit integer vector containing the unsigned integer /// differences of the values in the operands. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epu16(__m128i __a, __m128i __b) { return (__m128i)__builtin_elementwise_sub_sat((__v8hu)__a, (__v8hu)__b); } /// Performs a bitwise AND of two 128-bit integer vectors. /// /// \headerfile /// /// This intrinsic corresponds to the VPAND / PAND instruction. /// /// \param __a /// A 128-bit integer vector containing one of the source operands. /// \param __b /// A 128-bit integer vector containing one of the source operands. /// \returns A 128-bit integer vector containing the bitwise AND of the values /// in both operands. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_and_si128(__m128i __a, __m128i __b) { return (__m128i)((__v2du)__a & (__v2du)__b); } /// Performs a bitwise AND of two 128-bit integer vectors, using the /// one's complement of the values contained in the first source operand. /// /// \headerfile /// /// This intrinsic corresponds to the VPANDN / PANDN instruction. /// /// \param __a /// A 128-bit vector containing the left source operand. The one's complement /// of this value is used in the bitwise AND. /// \param __b /// A 128-bit vector containing the right source operand. /// \returns A 128-bit integer vector containing the bitwise AND of the one's /// complement of the first operand and the values in the second operand. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_andnot_si128(__m128i __a, __m128i __b) { return (__m128i)(~(__v2du)__a & (__v2du)__b); } /// Performs a bitwise OR of two 128-bit integer vectors. /// /// \headerfile /// /// This intrinsic corresponds to the VPOR / POR instruction. /// /// \param __a /// A 128-bit integer vector containing one of the source operands. /// \param __b /// A 128-bit integer vector containing one of the source operands. /// \returns A 128-bit integer vector containing the bitwise OR of the values /// in both operands. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_or_si128(__m128i __a, __m128i __b) { return (__m128i)((__v2du)__a | (__v2du)__b); } /// Performs a bitwise exclusive OR of two 128-bit integer vectors. /// /// \headerfile /// /// This intrinsic corresponds to the VPXOR / PXOR instruction. /// /// \param __a /// A 128-bit integer vector containing one of the source operands. /// \param __b /// A 128-bit integer vector containing one of the source operands. /// \returns A 128-bit integer vector containing the bitwise exclusive OR of the /// values in both operands. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_xor_si128(__m128i __a, __m128i __b) { return (__m128i)((__v2du)__a ^ (__v2du)__b); } /// Left-shifts the 128-bit integer vector operand by the specified /// number of bytes. Low-order bits are cleared. /// /// \headerfile /// /// \code /// __m128i _mm_slli_si128(__m128i a, const int imm); /// \endcode /// /// This intrinsic corresponds to the VPSLLDQ / PSLLDQ instruction. /// /// \param a /// A 128-bit integer vector containing the source operand. /// \param imm /// An immediate value specifying the number of bytes to left-shift operand /// \a a. /// \returns A 128-bit integer vector containing the left-shifted value. #define _mm_slli_si128(a, imm) \ ((__m128i)__builtin_ia32_pslldqi128_byteshift((__v2di)(__m128i)(a), \ (int)(imm))) #define _mm_bslli_si128(a, imm) \ ((__m128i)__builtin_ia32_pslldqi128_byteshift((__v2di)(__m128i)(a), \ (int)(imm))) /// Left-shifts each 16-bit value in the 128-bit integer vector operand /// by the specified number of bits. Low-order bits are cleared. /// /// \headerfile /// /// This intrinsic corresponds to the VPSLLW / PSLLW instruction. /// /// \param __a /// A 128-bit integer vector containing the source operand. /// \param __count /// An integer value specifying the number of bits to left-shift each value /// in operand \a __a. /// \returns A 128-bit integer vector containing the left-shifted values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi16(__m128i __a, int __count) { return (__m128i)__builtin_ia32_psllwi128((__v8hi)__a, __count); } /// Left-shifts each 16-bit value in the 128-bit integer vector operand /// by the specified number of bits. Low-order bits are cleared. /// /// \headerfile /// /// This intrinsic corresponds to the VPSLLW / PSLLW instruction. /// /// \param __a /// A 128-bit integer vector containing the source operand. /// \param __count /// A 128-bit integer vector in which bits [63:0] specify the number of bits /// to left-shift each value in operand \a __a. /// \returns A 128-bit integer vector containing the left-shifted values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi16(__m128i __a, __m128i __count) { return (__m128i)__builtin_ia32_psllw128((__v8hi)__a, (__v8hi)__count); } /// Left-shifts each 32-bit value in the 128-bit integer vector operand /// by the specified number of bits. Low-order bits are cleared. /// /// \headerfile /// /// This intrinsic corresponds to the VPSLLD / PSLLD instruction. /// /// \param __a /// A 128-bit integer vector containing the source operand. /// \param __count /// An integer value specifying the number of bits to left-shift each value /// in operand \a __a. /// \returns A 128-bit integer vector containing the left-shifted values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi32(__m128i __a, int __count) { return (__m128i)__builtin_ia32_pslldi128((__v4si)__a, __count); } /// Left-shifts each 32-bit value in the 128-bit integer vector operand /// by the specified number of bits. Low-order bits are cleared. /// /// \headerfile /// /// This intrinsic corresponds to the VPSLLD / PSLLD instruction. /// /// \param __a /// A 128-bit integer vector containing the source operand. /// \param __count /// A 128-bit integer vector in which bits [63:0] specify the number of bits /// to left-shift each value in operand \a __a. /// \returns A 128-bit integer vector containing the left-shifted values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi32(__m128i __a, __m128i __count) { return (__m128i)__builtin_ia32_pslld128((__v4si)__a, (__v4si)__count); } /// Left-shifts each 64-bit value in the 128-bit integer vector operand /// by the specified number of bits. Low-order bits are cleared. /// /// \headerfile /// /// This intrinsic corresponds to the VPSLLQ / PSLLQ instruction. /// /// \param __a /// A 128-bit integer vector containing the source operand. /// \param __count /// An integer value specifying the number of bits to left-shift each value /// in operand \a __a. /// \returns A 128-bit integer vector containing the left-shifted values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi64(__m128i __a, int __count) { return __builtin_ia32_psllqi128((__v2di)__a, __count); } /// Left-shifts each 64-bit value in the 128-bit integer vector operand /// by the specified number of bits. Low-order bits are cleared. /// /// \headerfile /// /// This intrinsic corresponds to the VPSLLQ / PSLLQ instruction. /// /// \param __a /// A 128-bit integer vector containing the source operand. /// \param __count /// A 128-bit integer vector in which bits [63:0] specify the number of bits /// to left-shift each value in operand \a __a. /// \returns A 128-bit integer vector containing the left-shifted values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi64(__m128i __a, __m128i __count) { return __builtin_ia32_psllq128((__v2di)__a, (__v2di)__count); } /// Right-shifts each 16-bit value in the 128-bit integer vector operand /// by the specified number of bits. High-order bits are filled with the sign /// bit of the initial value. /// /// \headerfile /// /// This intrinsic corresponds to the VPSRAW / PSRAW instruction. /// /// \param __a /// A 128-bit integer vector containing the source operand. /// \param __count /// An integer value specifying the number of bits to right-shift each value /// in operand \a __a. /// \returns A 128-bit integer vector containing the right-shifted values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srai_epi16(__m128i __a, int __count) { return (__m128i)__builtin_ia32_psrawi128((__v8hi)__a, __count); } /// Right-shifts each 16-bit value in the 128-bit integer vector operand /// by the specified number of bits. High-order bits are filled with the sign /// bit of the initial value. /// /// \headerfile /// /// This intrinsic corresponds to the VPSRAW / PSRAW instruction. /// /// \param __a /// A 128-bit integer vector containing the source operand. /// \param __count /// A 128-bit integer vector in which bits [63:0] specify the number of bits /// to right-shift each value in operand \a __a. /// \returns A 128-bit integer vector containing the right-shifted values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sra_epi16(__m128i __a, __m128i __count) { return (__m128i)__builtin_ia32_psraw128((__v8hi)__a, (__v8hi)__count); } /// Right-shifts each 32-bit value in the 128-bit integer vector operand /// by the specified number of bits. High-order bits are filled with the sign /// bit of the initial value. /// /// \headerfile /// /// This intrinsic corresponds to the VPSRAD / PSRAD instruction. /// /// \param __a /// A 128-bit integer vector containing the source operand. /// \param __count /// An integer value specifying the number of bits to right-shift each value /// in operand \a __a. /// \returns A 128-bit integer vector containing the right-shifted values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srai_epi32(__m128i __a, int __count) { return (__m128i)__builtin_ia32_psradi128((__v4si)__a, __count); } /// Right-shifts each 32-bit value in the 128-bit integer vector operand /// by the specified number of bits. High-order bits are filled with the sign /// bit of the initial value. /// /// \headerfile /// /// This intrinsic corresponds to the VPSRAD / PSRAD instruction. /// /// \param __a /// A 128-bit integer vector containing the source operand. /// \param __count /// A 128-bit integer vector in which bits [63:0] specify the number of bits /// to right-shift each value in operand \a __a. /// \returns A 128-bit integer vector containing the right-shifted values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sra_epi32(__m128i __a, __m128i __count) { return (__m128i)__builtin_ia32_psrad128((__v4si)__a, (__v4si)__count); } /// Right-shifts the 128-bit integer vector operand by the specified /// number of bytes. High-order bits are cleared. /// /// \headerfile /// /// \code /// __m128i _mm_srli_si128(__m128i a, const int imm); /// \endcode /// /// This intrinsic corresponds to the VPSRLDQ / PSRLDQ instruction. /// /// \param a /// A 128-bit integer vector containing the source operand. /// \param imm /// An immediate value specifying the number of bytes to right-shift operand /// \a a. /// \returns A 128-bit integer vector containing the right-shifted value. #define _mm_srli_si128(a, imm) \ ((__m128i)__builtin_ia32_psrldqi128_byteshift((__v2di)(__m128i)(a), \ (int)(imm))) #define _mm_bsrli_si128(a, imm) \ ((__m128i)__builtin_ia32_psrldqi128_byteshift((__v2di)(__m128i)(a), \ (int)(imm))) /// Right-shifts each of 16-bit values in the 128-bit integer vector /// operand by the specified number of bits. High-order bits are cleared. /// /// \headerfile /// /// This intrinsic corresponds to the VPSRLW / PSRLW instruction. /// /// \param __a /// A 128-bit integer vector containing the source operand. /// \param __count /// An integer value specifying the number of bits to right-shift each value /// in operand \a __a. /// \returns A 128-bit integer vector containing the right-shifted values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi16(__m128i __a, int __count) { return (__m128i)__builtin_ia32_psrlwi128((__v8hi)__a, __count); } /// Right-shifts each of 16-bit values in the 128-bit integer vector /// operand by the specified number of bits. High-order bits are cleared. /// /// \headerfile /// /// This intrinsic corresponds to the VPSRLW / PSRLW instruction. /// /// \param __a /// A 128-bit integer vector containing the source operand. /// \param __count /// A 128-bit integer vector in which bits [63:0] specify the number of bits /// to right-shift each value in operand \a __a. /// \returns A 128-bit integer vector containing the right-shifted values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi16(__m128i __a, __m128i __count) { return (__m128i)__builtin_ia32_psrlw128((__v8hi)__a, (__v8hi)__count); } /// Right-shifts each of 32-bit values in the 128-bit integer vector /// operand by the specified number of bits. High-order bits are cleared. /// /// \headerfile /// /// This intrinsic corresponds to the VPSRLD / PSRLD instruction. /// /// \param __a /// A 128-bit integer vector containing the source operand. /// \param __count /// An integer value specifying the number of bits to right-shift each value /// in operand \a __a. /// \returns A 128-bit integer vector containing the right-shifted values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi32(__m128i __a, int __count) { return (__m128i)__builtin_ia32_psrldi128((__v4si)__a, __count); } /// Right-shifts each of 32-bit values in the 128-bit integer vector /// operand by the specified number of bits. High-order bits are cleared. /// /// \headerfile /// /// This intrinsic corresponds to the VPSRLD / PSRLD instruction. /// /// \param __a /// A 128-bit integer vector containing the source operand. /// \param __count /// A 128-bit integer vector in which bits [63:0] specify the number of bits /// to right-shift each value in operand \a __a. /// \returns A 128-bit integer vector containing the right-shifted values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi32(__m128i __a, __m128i __count) { return (__m128i)__builtin_ia32_psrld128((__v4si)__a, (__v4si)__count); } /// Right-shifts each of 64-bit values in the 128-bit integer vector /// operand by the specified number of bits. High-order bits are cleared. /// /// \headerfile /// /// This intrinsic corresponds to the VPSRLQ / PSRLQ instruction. /// /// \param __a /// A 128-bit integer vector containing the source operand. /// \param __count /// An integer value specifying the number of bits to right-shift each value /// in operand \a __a. /// \returns A 128-bit integer vector containing the right-shifted values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi64(__m128i __a, int __count) { return __builtin_ia32_psrlqi128((__v2di)__a, __count); } /// Right-shifts each of 64-bit values in the 128-bit integer vector /// operand by the specified number of bits. High-order bits are cleared. /// /// \headerfile /// /// This intrinsic corresponds to the VPSRLQ / PSRLQ instruction. /// /// \param __a /// A 128-bit integer vector containing the source operand. /// \param __count /// A 128-bit integer vector in which bits [63:0] specify the number of bits /// to right-shift each value in operand \a __a. /// \returns A 128-bit integer vector containing the right-shifted values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi64(__m128i __a, __m128i __count) { return __builtin_ia32_psrlq128((__v2di)__a, (__v2di)__count); } /// Compares each of the corresponding 8-bit values of the 128-bit /// integer vectors for equality. /// /// Each comparison yields 0x0 for false, 0xFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VPCMPEQB / PCMPEQB instruction. /// /// \param __a /// A 128-bit integer vector. /// \param __b /// A 128-bit integer vector. /// \returns A 128-bit integer vector containing the comparison results. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi8(__m128i __a, __m128i __b) { return (__m128i)((__v16qi)__a == (__v16qi)__b); } /// Compares each of the corresponding 16-bit values of the 128-bit /// integer vectors for equality. /// /// Each comparison yields 0x0 for false, 0xFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VPCMPEQW / PCMPEQW instruction. /// /// \param __a /// A 128-bit integer vector. /// \param __b /// A 128-bit integer vector. /// \returns A 128-bit integer vector containing the comparison results. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi16(__m128i __a, __m128i __b) { return (__m128i)((__v8hi)__a == (__v8hi)__b); } /// Compares each of the corresponding 32-bit values of the 128-bit /// integer vectors for equality. /// /// Each comparison yields 0x0 for false, 0xFFFFFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VPCMPEQD / PCMPEQD instruction. /// /// \param __a /// A 128-bit integer vector. /// \param __b /// A 128-bit integer vector. /// \returns A 128-bit integer vector containing the comparison results. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi32(__m128i __a, __m128i __b) { return (__m128i)((__v4si)__a == (__v4si)__b); } /// Compares each of the corresponding signed 8-bit values of the 128-bit /// integer vectors to determine if the values in the first operand are /// greater than those in the second operand. /// /// Each comparison yields 0x0 for false, 0xFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VPCMPGTB / PCMPGTB instruction. /// /// \param __a /// A 128-bit integer vector. /// \param __b /// A 128-bit integer vector. /// \returns A 128-bit integer vector containing the comparison results. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpgt_epi8(__m128i __a, __m128i __b) { /* This function always performs a signed comparison, but __v16qi is a char which may be signed or unsigned, so use __v16qs. */ return (__m128i)((__v16qs)__a > (__v16qs)__b); } /// Compares each of the corresponding signed 16-bit values of the /// 128-bit integer vectors to determine if the values in the first operand /// are greater than those in the second operand. /// /// Each comparison yields 0x0 for false, 0xFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VPCMPGTW / PCMPGTW instruction. /// /// \param __a /// A 128-bit integer vector. /// \param __b /// A 128-bit integer vector. /// \returns A 128-bit integer vector containing the comparison results. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpgt_epi16(__m128i __a, __m128i __b) { return (__m128i)((__v8hi)__a > (__v8hi)__b); } /// Compares each of the corresponding signed 32-bit values of the /// 128-bit integer vectors to determine if the values in the first operand /// are greater than those in the second operand. /// /// Each comparison yields 0x0 for false, 0xFFFFFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VPCMPGTD / PCMPGTD instruction. /// /// \param __a /// A 128-bit integer vector. /// \param __b /// A 128-bit integer vector. /// \returns A 128-bit integer vector containing the comparison results. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpgt_epi32(__m128i __a, __m128i __b) { return (__m128i)((__v4si)__a > (__v4si)__b); } /// Compares each of the corresponding signed 8-bit values of the 128-bit /// integer vectors to determine if the values in the first operand are less /// than those in the second operand. /// /// Each comparison yields 0x0 for false, 0xFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VPCMPGTB / PCMPGTB instruction. /// /// \param __a /// A 128-bit integer vector. /// \param __b /// A 128-bit integer vector. /// \returns A 128-bit integer vector containing the comparison results. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmplt_epi8(__m128i __a, __m128i __b) { return _mm_cmpgt_epi8(__b, __a); } /// Compares each of the corresponding signed 16-bit values of the /// 128-bit integer vectors to determine if the values in the first operand /// are less than those in the second operand. /// /// Each comparison yields 0x0 for false, 0xFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VPCMPGTW / PCMPGTW instruction. /// /// \param __a /// A 128-bit integer vector. /// \param __b /// A 128-bit integer vector. /// \returns A 128-bit integer vector containing the comparison results. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmplt_epi16(__m128i __a, __m128i __b) { return _mm_cmpgt_epi16(__b, __a); } /// Compares each of the corresponding signed 32-bit values of the /// 128-bit integer vectors to determine if the values in the first operand /// are less than those in the second operand. /// /// Each comparison yields 0x0 for false, 0xFFFFFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VPCMPGTD / PCMPGTD instruction. /// /// \param __a /// A 128-bit integer vector. /// \param __b /// A 128-bit integer vector. /// \returns A 128-bit integer vector containing the comparison results. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmplt_epi32(__m128i __a, __m128i __b) { return _mm_cmpgt_epi32(__b, __a); } #ifdef __x86_64__ /// Converts a 64-bit signed integer value from the second operand into a /// double-precision value and returns it in the lower element of a [2 x /// double] vector; the upper element of the returned vector is copied from /// the upper element of the first operand. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTSI2SD / CVTSI2SD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. The upper 64 bits of this operand are /// copied to the upper 64 bits of the destination. /// \param __b /// A 64-bit signed integer operand containing the value to be converted. /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the /// converted value of the second operand. The upper 64 bits are copied from /// the upper 64 bits of the first operand. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtsi64_sd(__m128d __a, long long __b) { __a[0] = __b; return __a; } /// Converts the first (lower) element of a vector of [2 x double] into a /// 64-bit signed integer value, according to the current rounding mode. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTSD2SI / CVTSD2SI instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. The lower 64 bits are used in the /// conversion. /// \returns A 64-bit signed integer containing the converted value. static __inline__ long long __DEFAULT_FN_ATTRS _mm_cvtsd_si64(__m128d __a) { return __builtin_ia32_cvtsd2si64((__v2df)__a); } /// Converts the first (lower) element of a vector of [2 x double] into a /// 64-bit signed integer value, truncating the result when it is inexact. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTTSD2SI / CVTTSD2SI /// instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. The lower 64 bits are used in the /// conversion. /// \returns A 64-bit signed integer containing the converted value. static __inline__ long long __DEFAULT_FN_ATTRS _mm_cvttsd_si64(__m128d __a) { return __builtin_ia32_cvttsd2si64((__v2df)__a); } #endif /// Converts a vector of [4 x i32] into a vector of [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTDQ2PS / CVTDQ2PS instruction. /// /// \param __a /// A 128-bit integer vector. /// \returns A 128-bit vector of [4 x float] containing the converted values. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtepi32_ps(__m128i __a) { return (__m128) __builtin_convertvector((__v4si)__a, __v4sf); } /// Converts a vector of [4 x float] into a vector of [4 x i32]. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTPS2DQ / CVTPS2DQ instruction. /// /// \param __a /// A 128-bit vector of [4 x float]. /// \returns A 128-bit integer vector of [4 x i32] containing the converted /// values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtps_epi32(__m128 __a) { return (__m128i)__builtin_ia32_cvtps2dq((__v4sf)__a); } /// Converts a vector of [4 x float] into a vector of [4 x i32], /// truncating the result when it is inexact. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTTPS2DQ / CVTTPS2DQ /// instruction. /// /// \param __a /// A 128-bit vector of [4 x float]. /// \returns A 128-bit vector of [4 x i32] containing the converted values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttps_epi32(__m128 __a) { return (__m128i)__builtin_ia32_cvttps2dq((__v4sf)__a); } /// Returns a vector of [4 x i32] where the lowest element is the input /// operand and the remaining elements are zero. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVD / MOVD instruction. /// /// \param __a /// A 32-bit signed integer operand. /// \returns A 128-bit vector of [4 x i32]. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtsi32_si128(int __a) { return __extension__(__m128i)(__v4si){__a, 0, 0, 0}; } /// Returns a vector of [2 x i64] where the lower element is the input /// operand and the upper element is zero. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVQ / MOVQ instruction /// in 64-bit mode. /// /// \param __a /// A 64-bit signed integer operand containing the value to be converted. /// \returns A 128-bit vector of [2 x i64] containing the converted value. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtsi64_si128(long long __a) { return __extension__(__m128i)(__v2di){__a, 0}; } /// Moves the least significant 32 bits of a vector of [4 x i32] to a /// 32-bit signed integer value. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVD / MOVD instruction. /// /// \param __a /// A vector of [4 x i32]. The least significant 32 bits are moved to the /// destination. /// \returns A 32-bit signed integer containing the moved value. static __inline__ int __DEFAULT_FN_ATTRS _mm_cvtsi128_si32(__m128i __a) { __v4si __b = (__v4si)__a; return __b[0]; } /// Moves the least significant 64 bits of a vector of [2 x i64] to a /// 64-bit signed integer value. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVQ / MOVQ instruction. /// /// \param __a /// A vector of [2 x i64]. The least significant 64 bits are moved to the /// destination. /// \returns A 64-bit signed integer containing the moved value. static __inline__ long long __DEFAULT_FN_ATTRS _mm_cvtsi128_si64(__m128i __a) { return __a[0]; } /// Moves packed integer values from an aligned 128-bit memory location /// to elements in a 128-bit integer vector. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVDQA / MOVDQA instruction. /// /// \param __p /// An aligned pointer to a memory location containing integer values. /// \returns A 128-bit integer vector containing the moved values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_load_si128(__m128i const *__p) { return *__p; } /// Moves packed integer values from an unaligned 128-bit memory location /// to elements in a 128-bit integer vector. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVDQU / MOVDQU instruction. /// /// \param __p /// A pointer to a memory location containing integer values. /// \returns A 128-bit integer vector containing the moved values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_loadu_si128(__m128i_u const *__p) { struct __loadu_si128 { __m128i_u __v; } __attribute__((__packed__, __may_alias__)); return ((const struct __loadu_si128 *)__p)->__v; } /// Returns a vector of [2 x i64] where the lower element is taken from /// the lower element of the operand, and the upper element is zero. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVQ / MOVQ instruction. /// /// \param __p /// A 128-bit vector of [2 x i64]. Bits [63:0] are written to bits [63:0] of /// the destination. /// \returns A 128-bit vector of [2 x i64]. The lower order bits contain the /// moved value. The higher order bits are cleared. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_loadl_epi64(__m128i_u const *__p) { struct __mm_loadl_epi64_struct { long long __u; } __attribute__((__packed__, __may_alias__)); return __extension__(__m128i){ ((const struct __mm_loadl_epi64_struct *)__p)->__u, 0}; } /// Generates a 128-bit vector of [4 x i32] with unspecified content. /// This could be used as an argument to another intrinsic function where the /// argument is required but the value is not actually used. /// /// \headerfile /// /// This intrinsic has no corresponding instruction. /// /// \returns A 128-bit vector of [4 x i32] with unspecified content. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_undefined_si128(void) { return (__m128i)__builtin_ia32_undef128(); } /// Initializes both 64-bit values in a 128-bit vector of [2 x i64] with /// the specified 64-bit integer values. /// /// \headerfile /// /// This intrinsic is a utility function and does not correspond to a specific /// instruction. /// /// \param __q1 /// A 64-bit integer value used to initialize the upper 64 bits of the /// destination vector of [2 x i64]. /// \param __q0 /// A 64-bit integer value used to initialize the lower 64 bits of the /// destination vector of [2 x i64]. /// \returns An initialized 128-bit vector of [2 x i64] containing the values /// provided in the operands. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi64x(long long __q1, long long __q0) { return __extension__(__m128i)(__v2di){__q0, __q1}; } /// Initializes both 64-bit values in a 128-bit vector of [2 x i64] with /// the specified 64-bit integer values. /// /// \headerfile /// /// This intrinsic is a utility function and does not correspond to a specific /// instruction. /// /// \param __q1 /// A 64-bit integer value used to initialize the upper 64 bits of the /// destination vector of [2 x i64]. /// \param __q0 /// A 64-bit integer value used to initialize the lower 64 bits of the /// destination vector of [2 x i64]. /// \returns An initialized 128-bit vector of [2 x i64] containing the values /// provided in the operands. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi64(__m64 __q1, __m64 __q0) { return _mm_set_epi64x((long long)__q1, (long long)__q0); } /// Initializes the 32-bit values in a 128-bit vector of [4 x i32] with /// the specified 32-bit integer values. /// /// \headerfile /// /// This intrinsic is a utility function and does not correspond to a specific /// instruction. /// /// \param __i3 /// A 32-bit integer value used to initialize bits [127:96] of the /// destination vector. /// \param __i2 /// A 32-bit integer value used to initialize bits [95:64] of the destination /// vector. /// \param __i1 /// A 32-bit integer value used to initialize bits [63:32] of the destination /// vector. /// \param __i0 /// A 32-bit integer value used to initialize bits [31:0] of the destination /// vector. /// \returns An initialized 128-bit vector of [4 x i32] containing the values /// provided in the operands. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi32(int __i3, int __i2, int __i1, int __i0) { return __extension__(__m128i)(__v4si){__i0, __i1, __i2, __i3}; } /// Initializes the 16-bit values in a 128-bit vector of [8 x i16] with /// the specified 16-bit integer values. /// /// \headerfile /// /// This intrinsic is a utility function and does not correspond to a specific /// instruction. /// /// \param __w7 /// A 16-bit integer value used to initialize bits [127:112] of the /// destination vector. /// \param __w6 /// A 16-bit integer value used to initialize bits [111:96] of the /// destination vector. /// \param __w5 /// A 16-bit integer value used to initialize bits [95:80] of the destination /// vector. /// \param __w4 /// A 16-bit integer value used to initialize bits [79:64] of the destination /// vector. /// \param __w3 /// A 16-bit integer value used to initialize bits [63:48] of the destination /// vector. /// \param __w2 /// A 16-bit integer value used to initialize bits [47:32] of the destination /// vector. /// \param __w1 /// A 16-bit integer value used to initialize bits [31:16] of the destination /// vector. /// \param __w0 /// A 16-bit integer value used to initialize bits [15:0] of the destination /// vector. /// \returns An initialized 128-bit vector of [8 x i16] containing the values /// provided in the operands. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi16(short __w7, short __w6, short __w5, short __w4, short __w3, short __w2, short __w1, short __w0) { return __extension__(__m128i)(__v8hi){__w0, __w1, __w2, __w3, __w4, __w5, __w6, __w7}; } /// Initializes the 8-bit values in a 128-bit vector of [16 x i8] with /// the specified 8-bit integer values. /// /// \headerfile /// /// This intrinsic is a utility function and does not correspond to a specific /// instruction. /// /// \param __b15 /// Initializes bits [127:120] of the destination vector. /// \param __b14 /// Initializes bits [119:112] of the destination vector. /// \param __b13 /// Initializes bits [111:104] of the destination vector. /// \param __b12 /// Initializes bits [103:96] of the destination vector. /// \param __b11 /// Initializes bits [95:88] of the destination vector. /// \param __b10 /// Initializes bits [87:80] of the destination vector. /// \param __b9 /// Initializes bits [79:72] of the destination vector. /// \param __b8 /// Initializes bits [71:64] of the destination vector. /// \param __b7 /// Initializes bits [63:56] of the destination vector. /// \param __b6 /// Initializes bits [55:48] of the destination vector. /// \param __b5 /// Initializes bits [47:40] of the destination vector. /// \param __b4 /// Initializes bits [39:32] of the destination vector. /// \param __b3 /// Initializes bits [31:24] of the destination vector. /// \param __b2 /// Initializes bits [23:16] of the destination vector. /// \param __b1 /// Initializes bits [15:8] of the destination vector. /// \param __b0 /// Initializes bits [7:0] of the destination vector. /// \returns An initialized 128-bit vector of [16 x i8] containing the values /// provided in the operands. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi8(char __b15, char __b14, char __b13, char __b12, char __b11, char __b10, char __b9, char __b8, char __b7, char __b6, char __b5, char __b4, char __b3, char __b2, char __b1, char __b0) { return __extension__(__m128i)(__v16qi){ __b0, __b1, __b2, __b3, __b4, __b5, __b6, __b7, __b8, __b9, __b10, __b11, __b12, __b13, __b14, __b15}; } /// Initializes both values in a 128-bit integer vector with the /// specified 64-bit integer value. /// /// \headerfile /// /// This intrinsic is a utility function and does not correspond to a specific /// instruction. /// /// \param __q /// Integer value used to initialize the elements of the destination integer /// vector. /// \returns An initialized 128-bit integer vector of [2 x i64] with both /// elements containing the value provided in the operand. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi64x(long long __q) { return _mm_set_epi64x(__q, __q); } /// Initializes both values in a 128-bit vector of [2 x i64] with the /// specified 64-bit value. /// /// \headerfile /// /// This intrinsic is a utility function and does not correspond to a specific /// instruction. /// /// \param __q /// A 64-bit value used to initialize the elements of the destination integer /// vector. /// \returns An initialized 128-bit vector of [2 x i64] with all elements /// containing the value provided in the operand. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi64(__m64 __q) { return _mm_set_epi64(__q, __q); } /// Initializes all values in a 128-bit vector of [4 x i32] with the /// specified 32-bit value. /// /// \headerfile /// /// This intrinsic is a utility function and does not correspond to a specific /// instruction. /// /// \param __i /// A 32-bit value used to initialize the elements of the destination integer /// vector. /// \returns An initialized 128-bit vector of [4 x i32] with all elements /// containing the value provided in the operand. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi32(int __i) { return _mm_set_epi32(__i, __i, __i, __i); } /// Initializes all values in a 128-bit vector of [8 x i16] with the /// specified 16-bit value. /// /// \headerfile /// /// This intrinsic is a utility function and does not correspond to a specific /// instruction. /// /// \param __w /// A 16-bit value used to initialize the elements of the destination integer /// vector. /// \returns An initialized 128-bit vector of [8 x i16] with all elements /// containing the value provided in the operand. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi16(short __w) { return _mm_set_epi16(__w, __w, __w, __w, __w, __w, __w, __w); } /// Initializes all values in a 128-bit vector of [16 x i8] with the /// specified 8-bit value. /// /// \headerfile /// /// This intrinsic is a utility function and does not correspond to a specific /// instruction. /// /// \param __b /// An 8-bit value used to initialize the elements of the destination integer /// vector. /// \returns An initialized 128-bit vector of [16 x i8] with all elements /// containing the value provided in the operand. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi8(char __b) { return _mm_set_epi8(__b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b); } /// Constructs a 128-bit integer vector, initialized in reverse order /// with the specified 64-bit integral values. /// /// \headerfile /// /// This intrinsic does not correspond to a specific instruction. /// /// \param __q0 /// A 64-bit integral value used to initialize the lower 64 bits of the /// result. /// \param __q1 /// A 64-bit integral value used to initialize the upper 64 bits of the /// result. /// \returns An initialized 128-bit integer vector. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi64(__m64 __q0, __m64 __q1) { return _mm_set_epi64(__q1, __q0); } /// Constructs a 128-bit integer vector, initialized in reverse order /// with the specified 32-bit integral values. /// /// \headerfile /// /// This intrinsic is a utility function and does not correspond to a specific /// instruction. /// /// \param __i0 /// A 32-bit integral value used to initialize bits [31:0] of the result. /// \param __i1 /// A 32-bit integral value used to initialize bits [63:32] of the result. /// \param __i2 /// A 32-bit integral value used to initialize bits [95:64] of the result. /// \param __i3 /// A 32-bit integral value used to initialize bits [127:96] of the result. /// \returns An initialized 128-bit integer vector. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi32(int __i0, int __i1, int __i2, int __i3) { return _mm_set_epi32(__i3, __i2, __i1, __i0); } /// Constructs a 128-bit integer vector, initialized in reverse order /// with the specified 16-bit integral values. /// /// \headerfile /// /// This intrinsic is a utility function and does not correspond to a specific /// instruction. /// /// \param __w0 /// A 16-bit integral value used to initialize bits [15:0] of the result. /// \param __w1 /// A 16-bit integral value used to initialize bits [31:16] of the result. /// \param __w2 /// A 16-bit integral value used to initialize bits [47:32] of the result. /// \param __w3 /// A 16-bit integral value used to initialize bits [63:48] of the result. /// \param __w4 /// A 16-bit integral value used to initialize bits [79:64] of the result. /// \param __w5 /// A 16-bit integral value used to initialize bits [95:80] of the result. /// \param __w6 /// A 16-bit integral value used to initialize bits [111:96] of the result. /// \param __w7 /// A 16-bit integral value used to initialize bits [127:112] of the result. /// \returns An initialized 128-bit integer vector. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi16(short __w0, short __w1, short __w2, short __w3, short __w4, short __w5, short __w6, short __w7) { return _mm_set_epi16(__w7, __w6, __w5, __w4, __w3, __w2, __w1, __w0); } /// Constructs a 128-bit integer vector, initialized in reverse order /// with the specified 8-bit integral values. /// /// \headerfile /// /// This intrinsic is a utility function and does not correspond to a specific /// instruction. /// /// \param __b0 /// An 8-bit integral value used to initialize bits [7:0] of the result. /// \param __b1 /// An 8-bit integral value used to initialize bits [15:8] of the result. /// \param __b2 /// An 8-bit integral value used to initialize bits [23:16] of the result. /// \param __b3 /// An 8-bit integral value used to initialize bits [31:24] of the result. /// \param __b4 /// An 8-bit integral value used to initialize bits [39:32] of the result. /// \param __b5 /// An 8-bit integral value used to initialize bits [47:40] of the result. /// \param __b6 /// An 8-bit integral value used to initialize bits [55:48] of the result. /// \param __b7 /// An 8-bit integral value used to initialize bits [63:56] of the result. /// \param __b8 /// An 8-bit integral value used to initialize bits [71:64] of the result. /// \param __b9 /// An 8-bit integral value used to initialize bits [79:72] of the result. /// \param __b10 /// An 8-bit integral value used to initialize bits [87:80] of the result. /// \param __b11 /// An 8-bit integral value used to initialize bits [95:88] of the result. /// \param __b12 /// An 8-bit integral value used to initialize bits [103:96] of the result. /// \param __b13 /// An 8-bit integral value used to initialize bits [111:104] of the result. /// \param __b14 /// An 8-bit integral value used to initialize bits [119:112] of the result. /// \param __b15 /// An 8-bit integral value used to initialize bits [127:120] of the result. /// \returns An initialized 128-bit integer vector. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5, char __b6, char __b7, char __b8, char __b9, char __b10, char __b11, char __b12, char __b13, char __b14, char __b15) { return _mm_set_epi8(__b15, __b14, __b13, __b12, __b11, __b10, __b9, __b8, __b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0); } /// Creates a 128-bit integer vector initialized to zero. /// /// \headerfile /// /// This intrinsic corresponds to the VXORPS / XORPS instruction. /// /// \returns An initialized 128-bit integer vector with all elements set to /// zero. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setzero_si128(void) { return __extension__(__m128i)(__v2di){0LL, 0LL}; } /// Stores a 128-bit integer vector to a memory location aligned on a /// 128-bit boundary. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVAPS / MOVAPS instruction. /// /// \param __p /// A pointer to an aligned memory location that will receive the integer /// values. /// \param __b /// A 128-bit integer vector containing the values to be moved. static __inline__ void __DEFAULT_FN_ATTRS _mm_store_si128(__m128i *__p, __m128i __b) { *__p = __b; } /// Stores a 128-bit integer vector to an unaligned memory location. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVUPS / MOVUPS instruction. /// /// \param __p /// A pointer to a memory location that will receive the integer values. /// \param __b /// A 128-bit integer vector containing the values to be moved. static __inline__ void __DEFAULT_FN_ATTRS _mm_storeu_si128(__m128i_u *__p, __m128i __b) { struct __storeu_si128 { __m128i_u __v; } __attribute__((__packed__, __may_alias__)); ((struct __storeu_si128 *)__p)->__v = __b; } /// Stores a 64-bit integer value from the low element of a 128-bit integer /// vector. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVQ / MOVQ instruction. /// /// \param __p /// A pointer to a 64-bit memory location. The address of the memory /// location does not have to be aligned. /// \param __b /// A 128-bit integer vector containing the value to be stored. static __inline__ void __DEFAULT_FN_ATTRS _mm_storeu_si64(void *__p, __m128i __b) { struct __storeu_si64 { long long __v; } __attribute__((__packed__, __may_alias__)); ((struct __storeu_si64 *)__p)->__v = ((__v2di)__b)[0]; } /// Stores a 32-bit integer value from the low element of a 128-bit integer /// vector. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVD / MOVD instruction. /// /// \param __p /// A pointer to a 32-bit memory location. The address of the memory /// location does not have to be aligned. /// \param __b /// A 128-bit integer vector containing the value to be stored. static __inline__ void __DEFAULT_FN_ATTRS _mm_storeu_si32(void *__p, __m128i __b) { struct __storeu_si32 { int __v; } __attribute__((__packed__, __may_alias__)); ((struct __storeu_si32 *)__p)->__v = ((__v4si)__b)[0]; } /// Stores a 16-bit integer value from the low element of a 128-bit integer /// vector. /// /// \headerfile /// /// This intrinsic does not correspond to a specific instruction. /// /// \param __p /// A pointer to a 16-bit memory location. The address of the memory /// location does not have to be aligned. /// \param __b /// A 128-bit integer vector containing the value to be stored. static __inline__ void __DEFAULT_FN_ATTRS _mm_storeu_si16(void *__p, __m128i __b) { struct __storeu_si16 { short __v; } __attribute__((__packed__, __may_alias__)); ((struct __storeu_si16 *)__p)->__v = ((__v8hi)__b)[0]; } /// Moves bytes selected by the mask from the first operand to the /// specified unaligned memory location. When a mask bit is 1, the /// corresponding byte is written, otherwise it is not written. /// /// To minimize caching, the data is flagged as non-temporal (unlikely to be /// used again soon). Exception and trap behavior for elements not selected /// for storage to memory are implementation dependent. /// /// \headerfile /// /// This intrinsic corresponds to the VMASKMOVDQU / MASKMOVDQU /// instruction. /// /// \param __d /// A 128-bit integer vector containing the values to be moved. /// \param __n /// A 128-bit integer vector containing the mask. The most significant bit of /// each byte represents the mask bits. /// \param __p /// A pointer to an unaligned 128-bit memory location where the specified /// values are moved. static __inline__ void __DEFAULT_FN_ATTRS _mm_maskmoveu_si128(__m128i __d, __m128i __n, char *__p) { __builtin_ia32_maskmovdqu((__v16qi)__d, (__v16qi)__n, __p); } /// Stores the lower 64 bits of a 128-bit integer vector of [2 x i64] to /// a memory location. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVLPS / MOVLPS instruction. /// /// \param __p /// A pointer to a 64-bit memory location that will receive the lower 64 bits /// of the integer vector parameter. /// \param __a /// A 128-bit integer vector of [2 x i64]. The lower 64 bits contain the /// value to be stored. static __inline__ void __DEFAULT_FN_ATTRS _mm_storel_epi64(__m128i_u *__p, __m128i __a) { struct __mm_storel_epi64_struct { long long __u; } __attribute__((__packed__, __may_alias__)); ((struct __mm_storel_epi64_struct *)__p)->__u = __a[0]; } /// Stores a 128-bit floating point vector of [2 x double] to a 128-bit /// aligned memory location. /// /// To minimize caching, the data is flagged as non-temporal (unlikely to be /// used again soon). /// /// \headerfile /// /// This intrinsic corresponds to the VMOVNTPS / MOVNTPS instruction. /// /// \param __p /// A pointer to the 128-bit aligned memory location used to store the value. /// \param __a /// A vector of [2 x double] containing the 64-bit values to be stored. static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_pd(void *__p, __m128d __a) { __builtin_nontemporal_store((__v2df)__a, (__v2df *)__p); } /// Stores a 128-bit integer vector to a 128-bit aligned memory location. /// /// To minimize caching, the data is flagged as non-temporal (unlikely to be /// used again soon). /// /// \headerfile /// /// This intrinsic corresponds to the VMOVNTPS / MOVNTPS instruction. /// /// \param __p /// A pointer to the 128-bit aligned memory location used to store the value. /// \param __a /// A 128-bit integer vector containing the values to be stored. static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_si128(void *__p, __m128i __a) { __builtin_nontemporal_store((__v2di)__a, (__v2di *)__p); } /// Stores a 32-bit integer value in the specified memory location. /// /// To minimize caching, the data is flagged as non-temporal (unlikely to be /// used again soon). /// /// \headerfile /// /// This intrinsic corresponds to the MOVNTI instruction. /// /// \param __p /// A pointer to the 32-bit memory location used to store the value. /// \param __a /// A 32-bit integer containing the value to be stored. static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("sse2"))) _mm_stream_si32(void *__p, int __a) { __builtin_ia32_movnti((int *)__p, __a); } #ifdef __x86_64__ /// Stores a 64-bit integer value in the specified memory location. /// /// To minimize caching, the data is flagged as non-temporal (unlikely to be /// used again soon). /// /// \headerfile /// /// This intrinsic corresponds to the MOVNTIQ instruction. /// /// \param __p /// A pointer to the 64-bit memory location used to store the value. /// \param __a /// A 64-bit integer containing the value to be stored. static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("sse2"))) _mm_stream_si64(void *__p, long long __a) { __builtin_ia32_movnti64((long long *)__p, __a); } #endif #if defined(__cplusplus) extern "C" { #endif /// The cache line containing \a __p is flushed and invalidated from all /// caches in the coherency domain. /// /// \headerfile /// /// This intrinsic corresponds to the CLFLUSH instruction. /// /// \param __p /// A pointer to the memory location used to identify the cache line to be /// flushed. void _mm_clflush(void const *__p); /// Forces strong memory ordering (serialization) between load /// instructions preceding this instruction and load instructions following /// this instruction, ensuring the system completes all previous loads before /// executing subsequent loads. /// /// \headerfile /// /// This intrinsic corresponds to the LFENCE instruction. /// void _mm_lfence(void); /// Forces strong memory ordering (serialization) between load and store /// instructions preceding this instruction and load and store instructions /// following this instruction, ensuring that the system completes all /// previous memory accesses before executing subsequent memory accesses. /// /// \headerfile /// /// This intrinsic corresponds to the MFENCE instruction. /// void _mm_mfence(void); #if defined(__cplusplus) } // extern "C" #endif /// Converts, with saturation, 16-bit signed integers from both 128-bit integer /// vector operands into 8-bit signed integers, and packs the results into /// the destination. /// /// Positive values greater than 0x7F are saturated to 0x7F. Negative values /// less than 0x80 are saturated to 0x80. /// /// \headerfile /// /// This intrinsic corresponds to the VPACKSSWB / PACKSSWB instruction. /// /// \param __a /// A 128-bit integer vector of [8 x i16]. The converted [8 x i8] values are /// written to the lower 64 bits of the result. /// \param __b /// A 128-bit integer vector of [8 x i16]. The converted [8 x i8] values are /// written to the higher 64 bits of the result. /// \returns A 128-bit vector of [16 x i8] containing the converted values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packs_epi16(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_packsswb128((__v8hi)__a, (__v8hi)__b); } /// Converts, with saturation, 32-bit signed integers from both 128-bit integer /// vector operands into 16-bit signed integers, and packs the results into /// the destination. /// /// Positive values greater than 0x7FFF are saturated to 0x7FFF. Negative /// values less than 0x8000 are saturated to 0x8000. /// /// \headerfile /// /// This intrinsic corresponds to the VPACKSSDW / PACKSSDW instruction. /// /// \param __a /// A 128-bit integer vector of [4 x i32]. The converted [4 x i16] values /// are written to the lower 64 bits of the result. /// \param __b /// A 128-bit integer vector of [4 x i32]. The converted [4 x i16] values /// are written to the higher 64 bits of the result. /// \returns A 128-bit vector of [8 x i16] containing the converted values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packs_epi32(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_packssdw128((__v4si)__a, (__v4si)__b); } /// Converts, with saturation, 16-bit signed integers from both 128-bit integer /// vector operands into 8-bit unsigned integers, and packs the results into /// the destination. /// /// Values greater than 0xFF are saturated to 0xFF. Values less than 0x00 /// are saturated to 0x00. /// /// \headerfile /// /// This intrinsic corresponds to the VPACKUSWB / PACKUSWB instruction. /// /// \param __a /// A 128-bit integer vector of [8 x i16]. The converted [8 x i8] values are /// written to the lower 64 bits of the result. /// \param __b /// A 128-bit integer vector of [8 x i16]. The converted [8 x i8] values are /// written to the higher 64 bits of the result. /// \returns A 128-bit vector of [16 x i8] containing the converted values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packus_epi16(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_packuswb128((__v8hi)__a, (__v8hi)__b); } /// Extracts 16 bits from a 128-bit integer vector of [8 x i16], using /// the immediate-value parameter as a selector. /// /// \headerfile /// /// \code /// __m128i _mm_extract_epi16(__m128i a, const int imm); /// \endcode /// /// This intrinsic corresponds to the VPEXTRW / PEXTRW instruction. /// /// \param a /// A 128-bit integer vector. /// \param imm /// An immediate value. Bits [2:0] selects values from \a a to be assigned /// to bits[15:0] of the result. \n /// 000: assign values from bits [15:0] of \a a. \n /// 001: assign values from bits [31:16] of \a a. \n /// 010: assign values from bits [47:32] of \a a. \n /// 011: assign values from bits [63:48] of \a a. \n /// 100: assign values from bits [79:64] of \a a. \n /// 101: assign values from bits [95:80] of \a a. \n /// 110: assign values from bits [111:96] of \a a. \n /// 111: assign values from bits [127:112] of \a a. /// \returns An integer, whose lower 16 bits are selected from the 128-bit /// integer vector parameter and the remaining bits are assigned zeros. #define _mm_extract_epi16(a, imm) \ ((int)(unsigned short)__builtin_ia32_vec_ext_v8hi((__v8hi)(__m128i)(a), \ (int)(imm))) /// Constructs a 128-bit integer vector by first making a copy of the /// 128-bit integer vector parameter, and then inserting the lower 16 bits /// of an integer parameter into an offset specified by the immediate-value /// parameter. /// /// \headerfile /// /// \code /// __m128i _mm_insert_epi16(__m128i a, int b, const int imm); /// \endcode /// /// This intrinsic corresponds to the VPINSRW / PINSRW instruction. /// /// \param a /// A 128-bit integer vector of [8 x i16]. This vector is copied to the /// result and then one of the eight elements in the result is replaced by /// the lower 16 bits of \a b. /// \param b /// An integer. The lower 16 bits of this parameter are written to the /// result beginning at an offset specified by \a imm. /// \param imm /// An immediate value specifying the bit offset in the result at which the /// lower 16 bits of \a b are written. /// \returns A 128-bit integer vector containing the constructed values. #define _mm_insert_epi16(a, b, imm) \ ((__m128i)__builtin_ia32_vec_set_v8hi((__v8hi)(__m128i)(a), (int)(b), \ (int)(imm))) /// Copies the values of the most significant bits from each 8-bit /// element in a 128-bit integer vector of [16 x i8] to create a 16-bit mask /// value, zero-extends the value, and writes it to the destination. /// /// \headerfile /// /// This intrinsic corresponds to the VPMOVMSKB / PMOVMSKB instruction. /// /// \param __a /// A 128-bit integer vector containing the values with bits to be extracted. /// \returns The most significant bits from each 8-bit element in \a __a, /// written to bits [15:0]. The other bits are assigned zeros. static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_epi8(__m128i __a) { return __builtin_ia32_pmovmskb128((__v16qi)__a); } /// Constructs a 128-bit integer vector by shuffling four 32-bit /// elements of a 128-bit integer vector parameter, using the immediate-value /// parameter as a specifier. /// /// \headerfile /// /// \code /// __m128i _mm_shuffle_epi32(__m128i a, const int imm); /// \endcode /// /// This intrinsic corresponds to the VPSHUFD / PSHUFD instruction. /// /// \param a /// A 128-bit integer vector containing the values to be copied. /// \param imm /// An immediate value containing an 8-bit value specifying which elements to /// copy from a. The destinations within the 128-bit destination are assigned /// values as follows: \n /// Bits [1:0] are used to assign values to bits [31:0] of the result. \n /// Bits [3:2] are used to assign values to bits [63:32] of the result. \n /// Bits [5:4] are used to assign values to bits [95:64] of the result. \n /// Bits [7:6] are used to assign values to bits [127:96] of the result. \n /// Bit value assignments: \n /// 00: assign values from bits [31:0] of \a a. \n /// 01: assign values from bits [63:32] of \a a. \n /// 10: assign values from bits [95:64] of \a a. \n /// 11: assign values from bits [127:96] of \a a. \n /// Note: To generate a mask, you can use the \c _MM_SHUFFLE macro. /// _MM_SHUFFLE(b6, b4, b2, b0) can create an 8-bit mask of the form /// [b6, b4, b2, b0]. /// \returns A 128-bit integer vector containing the shuffled values. #define _mm_shuffle_epi32(a, imm) \ ((__m128i)__builtin_ia32_pshufd((__v4si)(__m128i)(a), (int)(imm))) /// Constructs a 128-bit integer vector by shuffling four lower 16-bit /// elements of a 128-bit integer vector of [8 x i16], using the immediate /// value parameter as a specifier. /// /// \headerfile /// /// \code /// __m128i _mm_shufflelo_epi16(__m128i a, const int imm); /// \endcode /// /// This intrinsic corresponds to the VPSHUFLW / PSHUFLW instruction. /// /// \param a /// A 128-bit integer vector of [8 x i16]. Bits [127:64] are copied to bits /// [127:64] of the result. /// \param imm /// An 8-bit immediate value specifying which elements to copy from \a a. \n /// Bits[1:0] are used to assign values to bits [15:0] of the result. \n /// Bits[3:2] are used to assign values to bits [31:16] of the result. \n /// Bits[5:4] are used to assign values to bits [47:32] of the result. \n /// Bits[7:6] are used to assign values to bits [63:48] of the result. \n /// Bit value assignments: \n /// 00: assign values from bits [15:0] of \a a. \n /// 01: assign values from bits [31:16] of \a a. \n /// 10: assign values from bits [47:32] of \a a. \n /// 11: assign values from bits [63:48] of \a a. \n /// Note: To generate a mask, you can use the \c _MM_SHUFFLE macro. /// _MM_SHUFFLE(b6, b4, b2, b0) can create an 8-bit mask of the form /// [b6, b4, b2, b0]. /// \returns A 128-bit integer vector containing the shuffled values. #define _mm_shufflelo_epi16(a, imm) \ ((__m128i)__builtin_ia32_pshuflw((__v8hi)(__m128i)(a), (int)(imm))) /// Constructs a 128-bit integer vector by shuffling four upper 16-bit /// elements of a 128-bit integer vector of [8 x i16], using the immediate /// value parameter as a specifier. /// /// \headerfile /// /// \code /// __m128i _mm_shufflehi_epi16(__m128i a, const int imm); /// \endcode /// /// This intrinsic corresponds to the VPSHUFHW / PSHUFHW instruction. /// /// \param a /// A 128-bit integer vector of [8 x i16]. Bits [63:0] are copied to bits /// [63:0] of the result. /// \param imm /// An 8-bit immediate value specifying which elements to copy from \a a. \n /// Bits[1:0] are used to assign values to bits [79:64] of the result. \n /// Bits[3:2] are used to assign values to bits [95:80] of the result. \n /// Bits[5:4] are used to assign values to bits [111:96] of the result. \n /// Bits[7:6] are used to assign values to bits [127:112] of the result. \n /// Bit value assignments: \n /// 00: assign values from bits [79:64] of \a a. \n /// 01: assign values from bits [95:80] of \a a. \n /// 10: assign values from bits [111:96] of \a a. \n /// 11: assign values from bits [127:112] of \a a. \n /// Note: To generate a mask, you can use the \c _MM_SHUFFLE macro. /// _MM_SHUFFLE(b6, b4, b2, b0) can create an 8-bit mask of the form /// [b6, b4, b2, b0]. /// \returns A 128-bit integer vector containing the shuffled values. #define _mm_shufflehi_epi16(a, imm) \ ((__m128i)__builtin_ia32_pshufhw((__v8hi)(__m128i)(a), (int)(imm))) /// Unpacks the high-order (index 8-15) values from two 128-bit vectors /// of [16 x i8] and interleaves them into a 128-bit vector of [16 x i8]. /// /// \headerfile /// /// This intrinsic corresponds to the VPUNPCKHBW / PUNPCKHBW /// instruction. /// /// \param __a /// A 128-bit vector of [16 x i8]. /// Bits [71:64] are written to bits [7:0] of the result. \n /// Bits [79:72] are written to bits [23:16] of the result. \n /// Bits [87:80] are written to bits [39:32] of the result. \n /// Bits [95:88] are written to bits [55:48] of the result. \n /// Bits [103:96] are written to bits [71:64] of the result. \n /// Bits [111:104] are written to bits [87:80] of the result. \n /// Bits [119:112] are written to bits [103:96] of the result. \n /// Bits [127:120] are written to bits [119:112] of the result. /// \param __b /// A 128-bit vector of [16 x i8]. \n /// Bits [71:64] are written to bits [15:8] of the result. \n /// Bits [79:72] are written to bits [31:24] of the result. \n /// Bits [87:80] are written to bits [47:40] of the result. \n /// Bits [95:88] are written to bits [63:56] of the result. \n /// Bits [103:96] are written to bits [79:72] of the result. \n /// Bits [111:104] are written to bits [95:88] of the result. \n /// Bits [119:112] are written to bits [111:104] of the result. \n /// Bits [127:120] are written to bits [127:120] of the result. /// \returns A 128-bit vector of [16 x i8] containing the interleaved values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi8(__m128i __a, __m128i __b) { return (__m128i)__builtin_shufflevector( (__v16qi)__a, (__v16qi)__b, 8, 16 + 8, 9, 16 + 9, 10, 16 + 10, 11, 16 + 11, 12, 16 + 12, 13, 16 + 13, 14, 16 + 14, 15, 16 + 15); } /// Unpacks the high-order (index 4-7) values from two 128-bit vectors of /// [8 x i16] and interleaves them into a 128-bit vector of [8 x i16]. /// /// \headerfile /// /// This intrinsic corresponds to the VPUNPCKHWD / PUNPCKHWD /// instruction. /// /// \param __a /// A 128-bit vector of [8 x i16]. /// Bits [79:64] are written to bits [15:0] of the result. \n /// Bits [95:80] are written to bits [47:32] of the result. \n /// Bits [111:96] are written to bits [79:64] of the result. \n /// Bits [127:112] are written to bits [111:96] of the result. /// \param __b /// A 128-bit vector of [8 x i16]. /// Bits [79:64] are written to bits [31:16] of the result. \n /// Bits [95:80] are written to bits [63:48] of the result. \n /// Bits [111:96] are written to bits [95:80] of the result. \n /// Bits [127:112] are written to bits [127:112] of the result. /// \returns A 128-bit vector of [8 x i16] containing the interleaved values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi16(__m128i __a, __m128i __b) { return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 4, 8 + 4, 5, 8 + 5, 6, 8 + 6, 7, 8 + 7); } /// Unpacks the high-order (index 2,3) values from two 128-bit vectors of /// [4 x i32] and interleaves them into a 128-bit vector of [4 x i32]. /// /// \headerfile /// /// This intrinsic corresponds to the VPUNPCKHDQ / PUNPCKHDQ /// instruction. /// /// \param __a /// A 128-bit vector of [4 x i32]. \n /// Bits [95:64] are written to bits [31:0] of the destination. \n /// Bits [127:96] are written to bits [95:64] of the destination. /// \param __b /// A 128-bit vector of [4 x i32]. \n /// Bits [95:64] are written to bits [64:32] of the destination. \n /// Bits [127:96] are written to bits [127:96] of the destination. /// \returns A 128-bit vector of [4 x i32] containing the interleaved values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi32(__m128i __a, __m128i __b) { return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 2, 4 + 2, 3, 4 + 3); } /// Unpacks the high-order 64-bit elements from two 128-bit vectors of /// [2 x i64] and interleaves them into a 128-bit vector of [2 x i64]. /// /// \headerfile /// /// This intrinsic corresponds to the VPUNPCKHQDQ / PUNPCKHQDQ /// instruction. /// /// \param __a /// A 128-bit vector of [2 x i64]. \n /// Bits [127:64] are written to bits [63:0] of the destination. /// \param __b /// A 128-bit vector of [2 x i64]. \n /// Bits [127:64] are written to bits [127:64] of the destination. /// \returns A 128-bit vector of [2 x i64] containing the interleaved values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi64(__m128i __a, __m128i __b) { return (__m128i)__builtin_shufflevector((__v2di)__a, (__v2di)__b, 1, 2 + 1); } /// Unpacks the low-order (index 0-7) values from two 128-bit vectors of /// [16 x i8] and interleaves them into a 128-bit vector of [16 x i8]. /// /// \headerfile /// /// This intrinsic corresponds to the VPUNPCKLBW / PUNPCKLBW /// instruction. /// /// \param __a /// A 128-bit vector of [16 x i8]. \n /// Bits [7:0] are written to bits [7:0] of the result. \n /// Bits [15:8] are written to bits [23:16] of the result. \n /// Bits [23:16] are written to bits [39:32] of the result. \n /// Bits [31:24] are written to bits [55:48] of the result. \n /// Bits [39:32] are written to bits [71:64] of the result. \n /// Bits [47:40] are written to bits [87:80] of the result. \n /// Bits [55:48] are written to bits [103:96] of the result. \n /// Bits [63:56] are written to bits [119:112] of the result. /// \param __b /// A 128-bit vector of [16 x i8]. /// Bits [7:0] are written to bits [15:8] of the result. \n /// Bits [15:8] are written to bits [31:24] of the result. \n /// Bits [23:16] are written to bits [47:40] of the result. \n /// Bits [31:24] are written to bits [63:56] of the result. \n /// Bits [39:32] are written to bits [79:72] of the result. \n /// Bits [47:40] are written to bits [95:88] of the result. \n /// Bits [55:48] are written to bits [111:104] of the result. \n /// Bits [63:56] are written to bits [127:120] of the result. /// \returns A 128-bit vector of [16 x i8] containing the interleaved values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi8(__m128i __a, __m128i __b) { return (__m128i)__builtin_shufflevector( (__v16qi)__a, (__v16qi)__b, 0, 16 + 0, 1, 16 + 1, 2, 16 + 2, 3, 16 + 3, 4, 16 + 4, 5, 16 + 5, 6, 16 + 6, 7, 16 + 7); } /// Unpacks the low-order (index 0-3) values from each of the two 128-bit /// vectors of [8 x i16] and interleaves them into a 128-bit vector of /// [8 x i16]. /// /// \headerfile /// /// This intrinsic corresponds to the VPUNPCKLWD / PUNPCKLWD /// instruction. /// /// \param __a /// A 128-bit vector of [8 x i16]. /// Bits [15:0] are written to bits [15:0] of the result. \n /// Bits [31:16] are written to bits [47:32] of the result. \n /// Bits [47:32] are written to bits [79:64] of the result. \n /// Bits [63:48] are written to bits [111:96] of the result. /// \param __b /// A 128-bit vector of [8 x i16]. /// Bits [15:0] are written to bits [31:16] of the result. \n /// Bits [31:16] are written to bits [63:48] of the result. \n /// Bits [47:32] are written to bits [95:80] of the result. \n /// Bits [63:48] are written to bits [127:112] of the result. /// \returns A 128-bit vector of [8 x i16] containing the interleaved values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi16(__m128i __a, __m128i __b) { return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 0, 8 + 0, 1, 8 + 1, 2, 8 + 2, 3, 8 + 3); } /// Unpacks the low-order (index 0,1) values from two 128-bit vectors of /// [4 x i32] and interleaves them into a 128-bit vector of [4 x i32]. /// /// \headerfile /// /// This intrinsic corresponds to the VPUNPCKLDQ / PUNPCKLDQ /// instruction. /// /// \param __a /// A 128-bit vector of [4 x i32]. \n /// Bits [31:0] are written to bits [31:0] of the destination. \n /// Bits [63:32] are written to bits [95:64] of the destination. /// \param __b /// A 128-bit vector of [4 x i32]. \n /// Bits [31:0] are written to bits [64:32] of the destination. \n /// Bits [63:32] are written to bits [127:96] of the destination. /// \returns A 128-bit vector of [4 x i32] containing the interleaved values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi32(__m128i __a, __m128i __b) { return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 0, 4 + 0, 1, 4 + 1); } /// Unpacks the low-order 64-bit elements from two 128-bit vectors of /// [2 x i64] and interleaves them into a 128-bit vector of [2 x i64]. /// /// \headerfile /// /// This intrinsic corresponds to the VPUNPCKLQDQ / PUNPCKLQDQ /// instruction. /// /// \param __a /// A 128-bit vector of [2 x i64]. \n /// Bits [63:0] are written to bits [63:0] of the destination. \n /// \param __b /// A 128-bit vector of [2 x i64]. \n /// Bits [63:0] are written to bits [127:64] of the destination. \n /// \returns A 128-bit vector of [2 x i64] containing the interleaved values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi64(__m128i __a, __m128i __b) { return (__m128i)__builtin_shufflevector((__v2di)__a, (__v2di)__b, 0, 2 + 0); } /// Returns the lower 64 bits of a 128-bit integer vector as a 64-bit /// integer. /// /// \headerfile /// /// This intrinsic corresponds to the MOVDQ2Q instruction. /// /// \param __a /// A 128-bit integer vector operand. The lower 64 bits are moved to the /// destination. /// \returns A 64-bit integer containing the lower 64 bits of the parameter. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_movepi64_pi64(__m128i __a) { return (__m64)__a[0]; } /// Moves the 64-bit operand to a 128-bit integer vector, zeroing the /// upper bits. /// /// \headerfile /// /// This intrinsic corresponds to the MOVD+VMOVQ instruction. /// /// \param __a /// A 64-bit value. /// \returns A 128-bit integer vector. The lower 64 bits contain the value from /// the operand. The upper 64 bits are assigned zeros. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_movpi64_epi64(__m64 __a) { return __extension__(__m128i)(__v2di){(long long)__a, 0}; } /// Moves the lower 64 bits of a 128-bit integer vector to a 128-bit /// integer vector, zeroing the upper bits. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVQ / MOVQ instruction. /// /// \param __a /// A 128-bit integer vector operand. The lower 64 bits are moved to the /// destination. /// \returns A 128-bit integer vector. The lower 64 bits contain the value from /// the operand. The upper 64 bits are assigned zeros. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_move_epi64(__m128i __a) { return __builtin_shufflevector((__v2di)__a, _mm_setzero_si128(), 0, 2); } /// Unpacks the high-order 64-bit elements from two 128-bit vectors of /// [2 x double] and interleaves them into a 128-bit vector of [2 x /// double]. /// /// \headerfile /// /// This intrinsic corresponds to the VUNPCKHPD / UNPCKHPD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. \n /// Bits [127:64] are written to bits [63:0] of the destination. /// \param __b /// A 128-bit vector of [2 x double]. \n /// Bits [127:64] are written to bits [127:64] of the destination. /// \returns A 128-bit vector of [2 x double] containing the interleaved values. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_unpackhi_pd(__m128d __a, __m128d __b) { return __builtin_shufflevector((__v2df)__a, (__v2df)__b, 1, 2 + 1); } /// Unpacks the low-order 64-bit elements from two 128-bit vectors /// of [2 x double] and interleaves them into a 128-bit vector of [2 x /// double]. /// /// \headerfile /// /// This intrinsic corresponds to the VUNPCKLPD / UNPCKLPD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. \n /// Bits [63:0] are written to bits [63:0] of the destination. /// \param __b /// A 128-bit vector of [2 x double]. \n /// Bits [63:0] are written to bits [127:64] of the destination. /// \returns A 128-bit vector of [2 x double] containing the interleaved values. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_unpacklo_pd(__m128d __a, __m128d __b) { return __builtin_shufflevector((__v2df)__a, (__v2df)__b, 0, 2 + 0); } /// Extracts the sign bits of the double-precision values in the 128-bit /// vector of [2 x double], zero-extends the value, and writes it to the /// low-order bits of the destination. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVMSKPD / MOVMSKPD instruction. /// /// \param __a /// A 128-bit vector of [2 x double] containing the values with sign bits to /// be extracted. /// \returns The sign bits from each of the double-precision elements in \a __a, /// written to bits [1:0]. The remaining bits are assigned values of zero. static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_pd(__m128d __a) { return __builtin_ia32_movmskpd((__v2df)__a); } /// Constructs a 128-bit floating-point vector of [2 x double] from two /// 128-bit vector parameters of [2 x double], using the immediate-value /// parameter as a specifier. /// /// \headerfile /// /// \code /// __m128d _mm_shuffle_pd(__m128d a, __m128d b, const int i); /// \endcode /// /// This intrinsic corresponds to the VSHUFPD / SHUFPD instruction. /// /// \param a /// A 128-bit vector of [2 x double]. /// \param b /// A 128-bit vector of [2 x double]. /// \param i /// An 8-bit immediate value. The least significant two bits specify which /// elements to copy from \a a and \a b: \n /// Bit[0] = 0: lower element of \a a copied to lower element of result. \n /// Bit[0] = 1: upper element of \a a copied to lower element of result. \n /// Bit[1] = 0: lower element of \a b copied to upper element of result. \n /// Bit[1] = 1: upper element of \a b copied to upper element of result. \n /// Note: To generate a mask, you can use the \c _MM_SHUFFLE2 macro. /// _MM_SHUFFLE2(b1, b0) can create a 2-bit mask of the form /// [b1, b0]. /// \returns A 128-bit vector of [2 x double] containing the shuffled values. #define _mm_shuffle_pd(a, b, i) \ ((__m128d)__builtin_ia32_shufpd((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \ (int)(i))) /// Casts a 128-bit floating-point vector of [2 x double] into a 128-bit /// floating-point vector of [4 x float]. /// /// \headerfile /// /// This intrinsic has no corresponding instruction. /// /// \param __a /// A 128-bit floating-point vector of [2 x double]. /// \returns A 128-bit floating-point vector of [4 x float] containing the same /// bitwise pattern as the parameter. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_castpd_ps(__m128d __a) { return (__m128)__a; } /// Casts a 128-bit floating-point vector of [2 x double] into a 128-bit /// integer vector. /// /// \headerfile /// /// This intrinsic has no corresponding instruction. /// /// \param __a /// A 128-bit floating-point vector of [2 x double]. /// \returns A 128-bit integer vector containing the same bitwise pattern as the /// parameter. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_castpd_si128(__m128d __a) { return (__m128i)__a; } /// Casts a 128-bit floating-point vector of [4 x float] into a 128-bit /// floating-point vector of [2 x double]. /// /// \headerfile /// /// This intrinsic has no corresponding instruction. /// /// \param __a /// A 128-bit floating-point vector of [4 x float]. /// \returns A 128-bit floating-point vector of [2 x double] containing the same /// bitwise pattern as the parameter. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_castps_pd(__m128 __a) { return (__m128d)__a; } /// Casts a 128-bit floating-point vector of [4 x float] into a 128-bit /// integer vector. /// /// \headerfile /// /// This intrinsic has no corresponding instruction. /// /// \param __a /// A 128-bit floating-point vector of [4 x float]. /// \returns A 128-bit integer vector containing the same bitwise pattern as the /// parameter. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_castps_si128(__m128 __a) { return (__m128i)__a; } /// Casts a 128-bit integer vector into a 128-bit floating-point vector /// of [4 x float]. /// /// \headerfile /// /// This intrinsic has no corresponding instruction. /// /// \param __a /// A 128-bit integer vector. /// \returns A 128-bit floating-point vector of [4 x float] containing the same /// bitwise pattern as the parameter. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_castsi128_ps(__m128i __a) { return (__m128)__a; } /// Casts a 128-bit integer vector into a 128-bit floating-point vector /// of [2 x double]. /// /// \headerfile /// /// This intrinsic has no corresponding instruction. /// /// \param __a /// A 128-bit integer vector. /// \returns A 128-bit floating-point vector of [2 x double] containing the same /// bitwise pattern as the parameter. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_castsi128_pd(__m128i __a) { return (__m128d)__a; } /// Compares each of the corresponding double-precision values of two /// 128-bit vectors of [2 x double], using the operation specified by the /// immediate integer operand. /// /// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. /// /// \headerfile /// /// \code /// __m128d _mm_cmp_pd(__m128d a, __m128d b, const int c); /// \endcode /// /// This intrinsic corresponds to the (V)CMPPD instruction. /// /// \param a /// A 128-bit vector of [2 x double]. /// \param b /// A 128-bit vector of [2 x double]. /// \param c /// An immediate integer operand, with bits [4:0] specifying which comparison /// operation to use: \n /// 0x00: Equal (ordered, non-signaling) \n /// 0x01: Less-than (ordered, signaling) \n /// 0x02: Less-than-or-equal (ordered, signaling) \n /// 0x03: Unordered (non-signaling) \n /// 0x04: Not-equal (unordered, non-signaling) \n /// 0x05: Not-less-than (unordered, signaling) \n /// 0x06: Not-less-than-or-equal (unordered, signaling) \n /// 0x07: Ordered (non-signaling) \n /// \returns A 128-bit vector of [2 x double] containing the comparison results. #define _mm_cmp_pd(a, b, c) \ ((__m128d)__builtin_ia32_cmppd((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \ (c))) /// Compares each of the corresponding scalar double-precision values of /// two 128-bit vectors of [2 x double], using the operation specified by the /// immediate integer operand. /// /// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. /// /// \headerfile /// /// \code /// __m128d _mm_cmp_sd(__m128d a, __m128d b, const int c); /// \endcode /// /// This intrinsic corresponds to the (V)CMPSD instruction. /// /// \param a /// A 128-bit vector of [2 x double]. /// \param b /// A 128-bit vector of [2 x double]. /// \param c /// An immediate integer operand, with bits [4:0] specifying which comparison /// operation to use: \n /// 0x00: Equal (ordered, non-signaling) \n /// 0x01: Less-than (ordered, signaling) \n /// 0x02: Less-than-or-equal (ordered, signaling) \n /// 0x03: Unordered (non-signaling) \n /// 0x04: Not-equal (unordered, non-signaling) \n /// 0x05: Not-less-than (unordered, signaling) \n /// 0x06: Not-less-than-or-equal (unordered, signaling) \n /// 0x07: Ordered (non-signaling) \n /// \returns A 128-bit vector of [2 x double] containing the comparison results. #define _mm_cmp_sd(a, b, c) \ ((__m128d)__builtin_ia32_cmpsd((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \ (c))) #if defined(__cplusplus) extern "C" { #endif /// Indicates that a spin loop is being executed for the purposes of /// optimizing power consumption during the loop. /// /// \headerfile /// /// This intrinsic corresponds to the PAUSE instruction. /// void _mm_pause(void); #if defined(__cplusplus) } // extern "C" #endif #undef __DEFAULT_FN_ATTRS #undef __DEFAULT_FN_ATTRS_MMX #define _MM_SHUFFLE2(x, y) (((x) << 1) | (y)) #define _MM_DENORMALS_ZERO_ON (0x0040U) #define _MM_DENORMALS_ZERO_OFF (0x0000U) #define _MM_DENORMALS_ZERO_MASK (0x0040U) #define _MM_GET_DENORMALS_ZERO_MODE() (_mm_getcsr() & _MM_DENORMALS_ZERO_MASK) #define _MM_SET_DENORMALS_ZERO_MODE(x) \ (_mm_setcsr((_mm_getcsr() & ~_MM_DENORMALS_ZERO_MASK) | (x))) #endif /* __EMMINTRIN_H */ /*===------------------ enqcmdintrin.h - enqcmd intrinsics -----------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __ENQCMDINTRIN_H #define __ENQCMDINTRIN_H /* Define the default attributes for the functions in this file */ #define _DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("enqcmd"))) /// Reads 64-byte command pointed by \a __src, formats 64-byte enqueue store /// data, and performs 64-byte enqueue store to memory pointed by \a __dst. /// This intrinsics may only be used in User mode. /// /// \headerfile /// /// This intrinsics corresponds to the ENQCMD instruction. /// /// \param __dst /// Pointer to the destination of the enqueue store. /// \param __src /// Pointer to 64-byte command data. /// \returns If the command data is successfully written to \a __dst then 0 is /// returned. Otherwise 1 is returned. static __inline__ int _DEFAULT_FN_ATTRS _enqcmd (void *__dst, const void *__src) { return __builtin_ia32_enqcmd(__dst, __src); } /// Reads 64-byte command pointed by \a __src, formats 64-byte enqueue store /// data, and performs 64-byte enqueue store to memory pointed by \a __dst /// This intrinsic may only be used in Privileged mode. /// /// \headerfile /// /// This intrinsics corresponds to the ENQCMDS instruction. /// /// \param __dst /// Pointer to the destination of the enqueue store. /// \param __src /// Pointer to 64-byte command data. /// \returns If the command data is successfully written to \a __dst then 0 is /// returned. Otherwise 1 is returned. static __inline__ int _DEFAULT_FN_ATTRS _enqcmds (void *__dst, const void *__src) { return __builtin_ia32_enqcmds(__dst, __src); } #undef _DEFAULT_FN_ATTRS #endif /* __ENQCMDINTRIN_H */ /*===------------- invpcidintrin.h - INVPCID intrinsic ---------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __INVPCIDINTRIN_H #define __INVPCIDINTRIN_H static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("invpcid"))) _invpcid(unsigned int __type, void *__descriptor) { __builtin_ia32_invpcid(__type, __descriptor); } #endif /* __INVPCIDINTRIN_H */ /*===------------ lasxintrin.h - LoongArch LASX intrinsics -----------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef _LOONGSON_ASXINTRIN_H #define _LOONGSON_ASXINTRIN_H 1 #if defined(__loongarch_asx) typedef signed char v32i8 __attribute__((vector_size(32), aligned(32))); typedef signed char v32i8_b __attribute__((vector_size(32), aligned(1))); typedef unsigned char v32u8 __attribute__((vector_size(32), aligned(32))); typedef unsigned char v32u8_b __attribute__((vector_size(32), aligned(1))); typedef short v16i16 __attribute__((vector_size(32), aligned(32))); typedef short v16i16_h __attribute__((vector_size(32), aligned(2))); typedef unsigned short v16u16 __attribute__((vector_size(32), aligned(32))); typedef unsigned short v16u16_h __attribute__((vector_size(32), aligned(2))); typedef int v8i32 __attribute__((vector_size(32), aligned(32))); typedef int v8i32_w __attribute__((vector_size(32), aligned(4))); typedef unsigned int v8u32 __attribute__((vector_size(32), aligned(32))); typedef unsigned int v8u32_w __attribute__((vector_size(32), aligned(4))); typedef long long v4i64 __attribute__((vector_size(32), aligned(32))); typedef long long v4i64_d __attribute__((vector_size(32), aligned(8))); typedef unsigned long long v4u64 __attribute__((vector_size(32), aligned(32))); typedef unsigned long long v4u64_d __attribute__((vector_size(32), aligned(8))); typedef float v8f32 __attribute__((vector_size(32), aligned(32))); typedef float v8f32_w __attribute__((vector_size(32), aligned(4))); typedef double v4f64 __attribute__((vector_size(32), aligned(32))); typedef double v4f64_d __attribute__((vector_size(32), aligned(8))); typedef double v4f64 __attribute__((vector_size(32), aligned(32))); typedef double v4f64_d __attribute__((vector_size(32), aligned(8))); typedef float __m256 __attribute__((__vector_size__(32), __may_alias__)); typedef long long __m256i __attribute__((__vector_size__(32), __may_alias__)); typedef double __m256d __attribute__((__vector_size__(32), __may_alias__)); extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvsll_b(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvsll_b((v32i8)_1, (v32i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvsll_h(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvsll_h((v16i16)_1, (v16i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvsll_w(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvsll_w((v8i32)_1, (v8i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvsll_d(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvsll_d((v4i64)_1, (v4i64)_2); } #define __lasx_xvslli_b(/*__m256i*/ _1, /*ui3*/ _2) \ ((__m256i)__builtin_lasx_xvslli_b((v32i8)(_1), (_2))) #define __lasx_xvslli_h(/*__m256i*/ _1, /*ui4*/ _2) \ ((__m256i)__builtin_lasx_xvslli_h((v16i16)(_1), (_2))) #define __lasx_xvslli_w(/*__m256i*/ _1, /*ui5*/ _2) \ ((__m256i)__builtin_lasx_xvslli_w((v8i32)(_1), (_2))) #define __lasx_xvslli_d(/*__m256i*/ _1, /*ui6*/ _2) \ ((__m256i)__builtin_lasx_xvslli_d((v4i64)(_1), (_2))) extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvsra_b(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvsra_b((v32i8)_1, (v32i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvsra_h(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvsra_h((v16i16)_1, (v16i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvsra_w(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvsra_w((v8i32)_1, (v8i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvsra_d(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvsra_d((v4i64)_1, (v4i64)_2); } #define __lasx_xvsrai_b(/*__m256i*/ _1, /*ui3*/ _2) \ ((__m256i)__builtin_lasx_xvsrai_b((v32i8)(_1), (_2))) #define __lasx_xvsrai_h(/*__m256i*/ _1, /*ui4*/ _2) \ ((__m256i)__builtin_lasx_xvsrai_h((v16i16)(_1), (_2))) #define __lasx_xvsrai_w(/*__m256i*/ _1, /*ui5*/ _2) \ ((__m256i)__builtin_lasx_xvsrai_w((v8i32)(_1), (_2))) #define __lasx_xvsrai_d(/*__m256i*/ _1, /*ui6*/ _2) \ ((__m256i)__builtin_lasx_xvsrai_d((v4i64)(_1), (_2))) extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvsrar_b(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvsrar_b((v32i8)_1, (v32i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvsrar_h(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvsrar_h((v16i16)_1, (v16i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvsrar_w(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvsrar_w((v8i32)_1, (v8i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvsrar_d(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvsrar_d((v4i64)_1, (v4i64)_2); } #define __lasx_xvsrari_b(/*__m256i*/ _1, /*ui3*/ _2) \ ((__m256i)__builtin_lasx_xvsrari_b((v32i8)(_1), (_2))) #define __lasx_xvsrari_h(/*__m256i*/ _1, /*ui4*/ _2) \ ((__m256i)__builtin_lasx_xvsrari_h((v16i16)(_1), (_2))) #define __lasx_xvsrari_w(/*__m256i*/ _1, /*ui5*/ _2) \ ((__m256i)__builtin_lasx_xvsrari_w((v8i32)(_1), (_2))) #define __lasx_xvsrari_d(/*__m256i*/ _1, /*ui6*/ _2) \ ((__m256i)__builtin_lasx_xvsrari_d((v4i64)(_1), (_2))) extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvsrl_b(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvsrl_b((v32i8)_1, (v32i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvsrl_h(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvsrl_h((v16i16)_1, (v16i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvsrl_w(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvsrl_w((v8i32)_1, (v8i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvsrl_d(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvsrl_d((v4i64)_1, (v4i64)_2); } #define __lasx_xvsrli_b(/*__m256i*/ _1, /*ui3*/ _2) \ ((__m256i)__builtin_lasx_xvsrli_b((v32i8)(_1), (_2))) #define __lasx_xvsrli_h(/*__m256i*/ _1, /*ui4*/ _2) \ ((__m256i)__builtin_lasx_xvsrli_h((v16i16)(_1), (_2))) #define __lasx_xvsrli_w(/*__m256i*/ _1, /*ui5*/ _2) \ ((__m256i)__builtin_lasx_xvsrli_w((v8i32)(_1), (_2))) #define __lasx_xvsrli_d(/*__m256i*/ _1, /*ui6*/ _2) \ ((__m256i)__builtin_lasx_xvsrli_d((v4i64)(_1), (_2))) extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvsrlr_b(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvsrlr_b((v32i8)_1, (v32i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvsrlr_h(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvsrlr_h((v16i16)_1, (v16i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvsrlr_w(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvsrlr_w((v8i32)_1, (v8i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvsrlr_d(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvsrlr_d((v4i64)_1, (v4i64)_2); } #define __lasx_xvsrlri_b(/*__m256i*/ _1, /*ui3*/ _2) \ ((__m256i)__builtin_lasx_xvsrlri_b((v32i8)(_1), (_2))) #define __lasx_xvsrlri_h(/*__m256i*/ _1, /*ui4*/ _2) \ ((__m256i)__builtin_lasx_xvsrlri_h((v16i16)(_1), (_2))) #define __lasx_xvsrlri_w(/*__m256i*/ _1, /*ui5*/ _2) \ ((__m256i)__builtin_lasx_xvsrlri_w((v8i32)(_1), (_2))) #define __lasx_xvsrlri_d(/*__m256i*/ _1, /*ui6*/ _2) \ ((__m256i)__builtin_lasx_xvsrlri_d((v4i64)(_1), (_2))) extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvbitclr_b(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvbitclr_b((v32u8)_1, (v32u8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvbitclr_h(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvbitclr_h((v16u16)_1, (v16u16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvbitclr_w(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvbitclr_w((v8u32)_1, (v8u32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvbitclr_d(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvbitclr_d((v4u64)_1, (v4u64)_2); } #define __lasx_xvbitclri_b(/*__m256i*/ _1, /*ui3*/ _2) \ ((__m256i)__builtin_lasx_xvbitclri_b((v32u8)(_1), (_2))) #define __lasx_xvbitclri_h(/*__m256i*/ _1, /*ui4*/ _2) \ ((__m256i)__builtin_lasx_xvbitclri_h((v16u16)(_1), (_2))) #define __lasx_xvbitclri_w(/*__m256i*/ _1, /*ui5*/ _2) \ ((__m256i)__builtin_lasx_xvbitclri_w((v8u32)(_1), (_2))) #define __lasx_xvbitclri_d(/*__m256i*/ _1, /*ui6*/ _2) \ ((__m256i)__builtin_lasx_xvbitclri_d((v4u64)(_1), (_2))) extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvbitset_b(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvbitset_b((v32u8)_1, (v32u8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvbitset_h(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvbitset_h((v16u16)_1, (v16u16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvbitset_w(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvbitset_w((v8u32)_1, (v8u32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvbitset_d(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvbitset_d((v4u64)_1, (v4u64)_2); } #define __lasx_xvbitseti_b(/*__m256i*/ _1, /*ui3*/ _2) \ ((__m256i)__builtin_lasx_xvbitseti_b((v32u8)(_1), (_2))) #define __lasx_xvbitseti_h(/*__m256i*/ _1, /*ui4*/ _2) \ ((__m256i)__builtin_lasx_xvbitseti_h((v16u16)(_1), (_2))) #define __lasx_xvbitseti_w(/*__m256i*/ _1, /*ui5*/ _2) \ ((__m256i)__builtin_lasx_xvbitseti_w((v8u32)(_1), (_2))) #define __lasx_xvbitseti_d(/*__m256i*/ _1, /*ui6*/ _2) \ ((__m256i)__builtin_lasx_xvbitseti_d((v4u64)(_1), (_2))) extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvbitrev_b(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvbitrev_b((v32u8)_1, (v32u8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvbitrev_h(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvbitrev_h((v16u16)_1, (v16u16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvbitrev_w(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvbitrev_w((v8u32)_1, (v8u32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvbitrev_d(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvbitrev_d((v4u64)_1, (v4u64)_2); } #define __lasx_xvbitrevi_b(/*__m256i*/ _1, /*ui3*/ _2) \ ((__m256i)__builtin_lasx_xvbitrevi_b((v32u8)(_1), (_2))) #define __lasx_xvbitrevi_h(/*__m256i*/ _1, /*ui4*/ _2) \ ((__m256i)__builtin_lasx_xvbitrevi_h((v16u16)(_1), (_2))) #define __lasx_xvbitrevi_w(/*__m256i*/ _1, /*ui5*/ _2) \ ((__m256i)__builtin_lasx_xvbitrevi_w((v8u32)(_1), (_2))) #define __lasx_xvbitrevi_d(/*__m256i*/ _1, /*ui6*/ _2) \ ((__m256i)__builtin_lasx_xvbitrevi_d((v4u64)(_1), (_2))) extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvadd_b(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvadd_b((v32i8)_1, (v32i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvadd_h(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvadd_h((v16i16)_1, (v16i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvadd_w(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvadd_w((v8i32)_1, (v8i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvadd_d(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvadd_d((v4i64)_1, (v4i64)_2); } #define __lasx_xvaddi_bu(/*__m256i*/ _1, /*ui5*/ _2) \ ((__m256i)__builtin_lasx_xvaddi_bu((v32i8)(_1), (_2))) #define __lasx_xvaddi_hu(/*__m256i*/ _1, /*ui5*/ _2) \ ((__m256i)__builtin_lasx_xvaddi_hu((v16i16)(_1), (_2))) #define __lasx_xvaddi_wu(/*__m256i*/ _1, /*ui5*/ _2) \ ((__m256i)__builtin_lasx_xvaddi_wu((v8i32)(_1), (_2))) #define __lasx_xvaddi_du(/*__m256i*/ _1, /*ui5*/ _2) \ ((__m256i)__builtin_lasx_xvaddi_du((v4i64)(_1), (_2))) extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvsub_b(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvsub_b((v32i8)_1, (v32i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvsub_h(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvsub_h((v16i16)_1, (v16i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvsub_w(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvsub_w((v8i32)_1, (v8i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvsub_d(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvsub_d((v4i64)_1, (v4i64)_2); } #define __lasx_xvsubi_bu(/*__m256i*/ _1, /*ui5*/ _2) \ ((__m256i)__builtin_lasx_xvsubi_bu((v32i8)(_1), (_2))) #define __lasx_xvsubi_hu(/*__m256i*/ _1, /*ui5*/ _2) \ ((__m256i)__builtin_lasx_xvsubi_hu((v16i16)(_1), (_2))) #define __lasx_xvsubi_wu(/*__m256i*/ _1, /*ui5*/ _2) \ ((__m256i)__builtin_lasx_xvsubi_wu((v8i32)(_1), (_2))) #define __lasx_xvsubi_du(/*__m256i*/ _1, /*ui5*/ _2) \ ((__m256i)__builtin_lasx_xvsubi_du((v4i64)(_1), (_2))) extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmax_b(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvmax_b((v32i8)_1, (v32i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmax_h(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvmax_h((v16i16)_1, (v16i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmax_w(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvmax_w((v8i32)_1, (v8i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmax_d(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvmax_d((v4i64)_1, (v4i64)_2); } #define __lasx_xvmaxi_b(/*__m256i*/ _1, /*si5*/ _2) \ ((__m256i)__builtin_lasx_xvmaxi_b((v32i8)(_1), (_2))) #define __lasx_xvmaxi_h(/*__m256i*/ _1, /*si5*/ _2) \ ((__m256i)__builtin_lasx_xvmaxi_h((v16i16)(_1), (_2))) #define __lasx_xvmaxi_w(/*__m256i*/ _1, /*si5*/ _2) \ ((__m256i)__builtin_lasx_xvmaxi_w((v8i32)(_1), (_2))) #define __lasx_xvmaxi_d(/*__m256i*/ _1, /*si5*/ _2) \ ((__m256i)__builtin_lasx_xvmaxi_d((v4i64)(_1), (_2))) extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmax_bu(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvmax_bu((v32u8)_1, (v32u8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmax_hu(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvmax_hu((v16u16)_1, (v16u16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmax_wu(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvmax_wu((v8u32)_1, (v8u32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmax_du(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvmax_du((v4u64)_1, (v4u64)_2); } #define __lasx_xvmaxi_bu(/*__m256i*/ _1, /*ui5*/ _2) \ ((__m256i)__builtin_lasx_xvmaxi_bu((v32u8)(_1), (_2))) #define __lasx_xvmaxi_hu(/*__m256i*/ _1, /*ui5*/ _2) \ ((__m256i)__builtin_lasx_xvmaxi_hu((v16u16)(_1), (_2))) #define __lasx_xvmaxi_wu(/*__m256i*/ _1, /*ui5*/ _2) \ ((__m256i)__builtin_lasx_xvmaxi_wu((v8u32)(_1), (_2))) #define __lasx_xvmaxi_du(/*__m256i*/ _1, /*ui5*/ _2) \ ((__m256i)__builtin_lasx_xvmaxi_du((v4u64)(_1), (_2))) extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmin_b(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvmin_b((v32i8)_1, (v32i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmin_h(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvmin_h((v16i16)_1, (v16i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmin_w(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvmin_w((v8i32)_1, (v8i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmin_d(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvmin_d((v4i64)_1, (v4i64)_2); } #define __lasx_xvmini_b(/*__m256i*/ _1, /*si5*/ _2) \ ((__m256i)__builtin_lasx_xvmini_b((v32i8)(_1), (_2))) #define __lasx_xvmini_h(/*__m256i*/ _1, /*si5*/ _2) \ ((__m256i)__builtin_lasx_xvmini_h((v16i16)(_1), (_2))) #define __lasx_xvmini_w(/*__m256i*/ _1, /*si5*/ _2) \ ((__m256i)__builtin_lasx_xvmini_w((v8i32)(_1), (_2))) #define __lasx_xvmini_d(/*__m256i*/ _1, /*si5*/ _2) \ ((__m256i)__builtin_lasx_xvmini_d((v4i64)(_1), (_2))) extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmin_bu(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvmin_bu((v32u8)_1, (v32u8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmin_hu(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvmin_hu((v16u16)_1, (v16u16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmin_wu(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvmin_wu((v8u32)_1, (v8u32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmin_du(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvmin_du((v4u64)_1, (v4u64)_2); } #define __lasx_xvmini_bu(/*__m256i*/ _1, /*ui5*/ _2) \ ((__m256i)__builtin_lasx_xvmini_bu((v32u8)(_1), (_2))) #define __lasx_xvmini_hu(/*__m256i*/ _1, /*ui5*/ _2) \ ((__m256i)__builtin_lasx_xvmini_hu((v16u16)(_1), (_2))) #define __lasx_xvmini_wu(/*__m256i*/ _1, /*ui5*/ _2) \ ((__m256i)__builtin_lasx_xvmini_wu((v8u32)(_1), (_2))) #define __lasx_xvmini_du(/*__m256i*/ _1, /*ui5*/ _2) \ ((__m256i)__builtin_lasx_xvmini_du((v4u64)(_1), (_2))) extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvseq_b(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvseq_b((v32i8)_1, (v32i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvseq_h(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvseq_h((v16i16)_1, (v16i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvseq_w(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvseq_w((v8i32)_1, (v8i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvseq_d(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvseq_d((v4i64)_1, (v4i64)_2); } #define __lasx_xvseqi_b(/*__m256i*/ _1, /*si5*/ _2) \ ((__m256i)__builtin_lasx_xvseqi_b((v32i8)(_1), (_2))) #define __lasx_xvseqi_h(/*__m256i*/ _1, /*si5*/ _2) \ ((__m256i)__builtin_lasx_xvseqi_h((v16i16)(_1), (_2))) #define __lasx_xvseqi_w(/*__m256i*/ _1, /*si5*/ _2) \ ((__m256i)__builtin_lasx_xvseqi_w((v8i32)(_1), (_2))) #define __lasx_xvseqi_d(/*__m256i*/ _1, /*si5*/ _2) \ ((__m256i)__builtin_lasx_xvseqi_d((v4i64)(_1), (_2))) extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvslt_b(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvslt_b((v32i8)_1, (v32i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvslt_h(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvslt_h((v16i16)_1, (v16i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvslt_w(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvslt_w((v8i32)_1, (v8i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvslt_d(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvslt_d((v4i64)_1, (v4i64)_2); } #define __lasx_xvslti_b(/*__m256i*/ _1, /*si5*/ _2) \ ((__m256i)__builtin_lasx_xvslti_b((v32i8)(_1), (_2))) #define __lasx_xvslti_h(/*__m256i*/ _1, /*si5*/ _2) \ ((__m256i)__builtin_lasx_xvslti_h((v16i16)(_1), (_2))) #define __lasx_xvslti_w(/*__m256i*/ _1, /*si5*/ _2) \ ((__m256i)__builtin_lasx_xvslti_w((v8i32)(_1), (_2))) #define __lasx_xvslti_d(/*__m256i*/ _1, /*si5*/ _2) \ ((__m256i)__builtin_lasx_xvslti_d((v4i64)(_1), (_2))) extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvslt_bu(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvslt_bu((v32u8)_1, (v32u8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvslt_hu(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvslt_hu((v16u16)_1, (v16u16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvslt_wu(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvslt_wu((v8u32)_1, (v8u32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvslt_du(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvslt_du((v4u64)_1, (v4u64)_2); } #define __lasx_xvslti_bu(/*__m256i*/ _1, /*ui5*/ _2) \ ((__m256i)__builtin_lasx_xvslti_bu((v32u8)(_1), (_2))) #define __lasx_xvslti_hu(/*__m256i*/ _1, /*ui5*/ _2) \ ((__m256i)__builtin_lasx_xvslti_hu((v16u16)(_1), (_2))) #define __lasx_xvslti_wu(/*__m256i*/ _1, /*ui5*/ _2) \ ((__m256i)__builtin_lasx_xvslti_wu((v8u32)(_1), (_2))) #define __lasx_xvslti_du(/*__m256i*/ _1, /*ui5*/ _2) \ ((__m256i)__builtin_lasx_xvslti_du((v4u64)(_1), (_2))) extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvsle_b(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvsle_b((v32i8)_1, (v32i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvsle_h(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvsle_h((v16i16)_1, (v16i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvsle_w(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvsle_w((v8i32)_1, (v8i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvsle_d(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvsle_d((v4i64)_1, (v4i64)_2); } #define __lasx_xvslei_b(/*__m256i*/ _1, /*si5*/ _2) \ ((__m256i)__builtin_lasx_xvslei_b((v32i8)(_1), (_2))) #define __lasx_xvslei_h(/*__m256i*/ _1, /*si5*/ _2) \ ((__m256i)__builtin_lasx_xvslei_h((v16i16)(_1), (_2))) #define __lasx_xvslei_w(/*__m256i*/ _1, /*si5*/ _2) \ ((__m256i)__builtin_lasx_xvslei_w((v8i32)(_1), (_2))) #define __lasx_xvslei_d(/*__m256i*/ _1, /*si5*/ _2) \ ((__m256i)__builtin_lasx_xvslei_d((v4i64)(_1), (_2))) extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvsle_bu(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvsle_bu((v32u8)_1, (v32u8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvsle_hu(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvsle_hu((v16u16)_1, (v16u16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvsle_wu(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvsle_wu((v8u32)_1, (v8u32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvsle_du(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvsle_du((v4u64)_1, (v4u64)_2); } #define __lasx_xvslei_bu(/*__m256i*/ _1, /*ui5*/ _2) \ ((__m256i)__builtin_lasx_xvslei_bu((v32u8)(_1), (_2))) #define __lasx_xvslei_hu(/*__m256i*/ _1, /*ui5*/ _2) \ ((__m256i)__builtin_lasx_xvslei_hu((v16u16)(_1), (_2))) #define __lasx_xvslei_wu(/*__m256i*/ _1, /*ui5*/ _2) \ ((__m256i)__builtin_lasx_xvslei_wu((v8u32)(_1), (_2))) #define __lasx_xvslei_du(/*__m256i*/ _1, /*ui5*/ _2) \ ((__m256i)__builtin_lasx_xvslei_du((v4u64)(_1), (_2))) #define __lasx_xvsat_b(/*__m256i*/ _1, /*ui3*/ _2) \ ((__m256i)__builtin_lasx_xvsat_b((v32i8)(_1), (_2))) #define __lasx_xvsat_h(/*__m256i*/ _1, /*ui4*/ _2) \ ((__m256i)__builtin_lasx_xvsat_h((v16i16)(_1), (_2))) #define __lasx_xvsat_w(/*__m256i*/ _1, /*ui5*/ _2) \ ((__m256i)__builtin_lasx_xvsat_w((v8i32)(_1), (_2))) #define __lasx_xvsat_d(/*__m256i*/ _1, /*ui6*/ _2) \ ((__m256i)__builtin_lasx_xvsat_d((v4i64)(_1), (_2))) #define __lasx_xvsat_bu(/*__m256i*/ _1, /*ui3*/ _2) \ ((__m256i)__builtin_lasx_xvsat_bu((v32u8)(_1), (_2))) #define __lasx_xvsat_hu(/*__m256i*/ _1, /*ui4*/ _2) \ ((__m256i)__builtin_lasx_xvsat_hu((v16u16)(_1), (_2))) #define __lasx_xvsat_wu(/*__m256i*/ _1, /*ui5*/ _2) \ ((__m256i)__builtin_lasx_xvsat_wu((v8u32)(_1), (_2))) #define __lasx_xvsat_du(/*__m256i*/ _1, /*ui6*/ _2) \ ((__m256i)__builtin_lasx_xvsat_du((v4u64)(_1), (_2))) extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvadda_b(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvadda_b((v32i8)_1, (v32i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvadda_h(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvadda_h((v16i16)_1, (v16i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvadda_w(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvadda_w((v8i32)_1, (v8i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvadda_d(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvadda_d((v4i64)_1, (v4i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvsadd_b(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvsadd_b((v32i8)_1, (v32i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvsadd_h(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvsadd_h((v16i16)_1, (v16i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvsadd_w(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvsadd_w((v8i32)_1, (v8i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvsadd_d(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvsadd_d((v4i64)_1, (v4i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvsadd_bu(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvsadd_bu((v32u8)_1, (v32u8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvsadd_hu(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvsadd_hu((v16u16)_1, (v16u16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvsadd_wu(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvsadd_wu((v8u32)_1, (v8u32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvsadd_du(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvsadd_du((v4u64)_1, (v4u64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvavg_b(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvavg_b((v32i8)_1, (v32i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvavg_h(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvavg_h((v16i16)_1, (v16i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvavg_w(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvavg_w((v8i32)_1, (v8i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvavg_d(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvavg_d((v4i64)_1, (v4i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvavg_bu(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvavg_bu((v32u8)_1, (v32u8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvavg_hu(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvavg_hu((v16u16)_1, (v16u16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvavg_wu(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvavg_wu((v8u32)_1, (v8u32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvavg_du(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvavg_du((v4u64)_1, (v4u64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvavgr_b(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvavgr_b((v32i8)_1, (v32i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvavgr_h(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvavgr_h((v16i16)_1, (v16i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvavgr_w(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvavgr_w((v8i32)_1, (v8i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvavgr_d(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvavgr_d((v4i64)_1, (v4i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvavgr_bu(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvavgr_bu((v32u8)_1, (v32u8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvavgr_hu(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvavgr_hu((v16u16)_1, (v16u16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvavgr_wu(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvavgr_wu((v8u32)_1, (v8u32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvavgr_du(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvavgr_du((v4u64)_1, (v4u64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvssub_b(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvssub_b((v32i8)_1, (v32i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvssub_h(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvssub_h((v16i16)_1, (v16i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvssub_w(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvssub_w((v8i32)_1, (v8i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvssub_d(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvssub_d((v4i64)_1, (v4i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvssub_bu(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvssub_bu((v32u8)_1, (v32u8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvssub_hu(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvssub_hu((v16u16)_1, (v16u16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvssub_wu(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvssub_wu((v8u32)_1, (v8u32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvssub_du(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvssub_du((v4u64)_1, (v4u64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvabsd_b(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvabsd_b((v32i8)_1, (v32i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvabsd_h(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvabsd_h((v16i16)_1, (v16i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvabsd_w(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvabsd_w((v8i32)_1, (v8i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvabsd_d(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvabsd_d((v4i64)_1, (v4i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvabsd_bu(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvabsd_bu((v32u8)_1, (v32u8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvabsd_hu(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvabsd_hu((v16u16)_1, (v16u16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvabsd_wu(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvabsd_wu((v8u32)_1, (v8u32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvabsd_du(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvabsd_du((v4u64)_1, (v4u64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmul_b(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvmul_b((v32i8)_1, (v32i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmul_h(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvmul_h((v16i16)_1, (v16i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmul_w(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvmul_w((v8i32)_1, (v8i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmul_d(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvmul_d((v4i64)_1, (v4i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmadd_b(__m256i _1, __m256i _2, __m256i _3) { return (__m256i)__builtin_lasx_xvmadd_b((v32i8)_1, (v32i8)_2, (v32i8)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmadd_h(__m256i _1, __m256i _2, __m256i _3) { return (__m256i)__builtin_lasx_xvmadd_h((v16i16)_1, (v16i16)_2, (v16i16)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmadd_w(__m256i _1, __m256i _2, __m256i _3) { return (__m256i)__builtin_lasx_xvmadd_w((v8i32)_1, (v8i32)_2, (v8i32)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmadd_d(__m256i _1, __m256i _2, __m256i _3) { return (__m256i)__builtin_lasx_xvmadd_d((v4i64)_1, (v4i64)_2, (v4i64)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmsub_b(__m256i _1, __m256i _2, __m256i _3) { return (__m256i)__builtin_lasx_xvmsub_b((v32i8)_1, (v32i8)_2, (v32i8)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmsub_h(__m256i _1, __m256i _2, __m256i _3) { return (__m256i)__builtin_lasx_xvmsub_h((v16i16)_1, (v16i16)_2, (v16i16)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmsub_w(__m256i _1, __m256i _2, __m256i _3) { return (__m256i)__builtin_lasx_xvmsub_w((v8i32)_1, (v8i32)_2, (v8i32)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmsub_d(__m256i _1, __m256i _2, __m256i _3) { return (__m256i)__builtin_lasx_xvmsub_d((v4i64)_1, (v4i64)_2, (v4i64)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvdiv_b(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvdiv_b((v32i8)_1, (v32i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvdiv_h(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvdiv_h((v16i16)_1, (v16i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvdiv_w(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvdiv_w((v8i32)_1, (v8i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvdiv_d(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvdiv_d((v4i64)_1, (v4i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvdiv_bu(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvdiv_bu((v32u8)_1, (v32u8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvdiv_hu(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvdiv_hu((v16u16)_1, (v16u16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvdiv_wu(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvdiv_wu((v8u32)_1, (v8u32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvdiv_du(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvdiv_du((v4u64)_1, (v4u64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvhaddw_h_b(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvhaddw_h_b((v32i8)_1, (v32i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvhaddw_w_h(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvhaddw_w_h((v16i16)_1, (v16i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvhaddw_d_w(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvhaddw_d_w((v8i32)_1, (v8i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvhaddw_hu_bu(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvhaddw_hu_bu((v32u8)_1, (v32u8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvhaddw_wu_hu(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvhaddw_wu_hu((v16u16)_1, (v16u16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvhaddw_du_wu(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvhaddw_du_wu((v8u32)_1, (v8u32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvhsubw_h_b(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvhsubw_h_b((v32i8)_1, (v32i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvhsubw_w_h(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvhsubw_w_h((v16i16)_1, (v16i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvhsubw_d_w(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvhsubw_d_w((v8i32)_1, (v8i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvhsubw_hu_bu(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvhsubw_hu_bu((v32u8)_1, (v32u8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvhsubw_wu_hu(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvhsubw_wu_hu((v16u16)_1, (v16u16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvhsubw_du_wu(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvhsubw_du_wu((v8u32)_1, (v8u32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmod_b(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvmod_b((v32i8)_1, (v32i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmod_h(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvmod_h((v16i16)_1, (v16i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmod_w(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvmod_w((v8i32)_1, (v8i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmod_d(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvmod_d((v4i64)_1, (v4i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmod_bu(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvmod_bu((v32u8)_1, (v32u8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmod_hu(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvmod_hu((v16u16)_1, (v16u16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmod_wu(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvmod_wu((v8u32)_1, (v8u32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmod_du(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvmod_du((v4u64)_1, (v4u64)_2); } #define __lasx_xvrepl128vei_b(/*__m256i*/ _1, /*ui4*/ _2) \ ((__m256i)__builtin_lasx_xvrepl128vei_b((v32i8)(_1), (_2))) #define __lasx_xvrepl128vei_h(/*__m256i*/ _1, /*ui3*/ _2) \ ((__m256i)__builtin_lasx_xvrepl128vei_h((v16i16)(_1), (_2))) #define __lasx_xvrepl128vei_w(/*__m256i*/ _1, /*ui2*/ _2) \ ((__m256i)__builtin_lasx_xvrepl128vei_w((v8i32)(_1), (_2))) #define __lasx_xvrepl128vei_d(/*__m256i*/ _1, /*ui1*/ _2) \ ((__m256i)__builtin_lasx_xvrepl128vei_d((v4i64)(_1), (_2))) extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvpickev_b(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvpickev_b((v32i8)_1, (v32i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvpickev_h(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvpickev_h((v16i16)_1, (v16i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvpickev_w(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvpickev_w((v8i32)_1, (v8i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvpickev_d(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvpickev_d((v4i64)_1, (v4i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvpickod_b(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvpickod_b((v32i8)_1, (v32i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvpickod_h(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvpickod_h((v16i16)_1, (v16i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvpickod_w(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvpickod_w((v8i32)_1, (v8i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvpickod_d(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvpickod_d((v4i64)_1, (v4i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvilvh_b(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvilvh_b((v32i8)_1, (v32i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvilvh_h(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvilvh_h((v16i16)_1, (v16i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvilvh_w(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvilvh_w((v8i32)_1, (v8i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvilvh_d(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvilvh_d((v4i64)_1, (v4i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvilvl_b(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvilvl_b((v32i8)_1, (v32i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvilvl_h(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvilvl_h((v16i16)_1, (v16i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvilvl_w(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvilvl_w((v8i32)_1, (v8i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvilvl_d(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvilvl_d((v4i64)_1, (v4i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvpackev_b(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvpackev_b((v32i8)_1, (v32i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvpackev_h(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvpackev_h((v16i16)_1, (v16i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvpackev_w(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvpackev_w((v8i32)_1, (v8i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvpackev_d(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvpackev_d((v4i64)_1, (v4i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvpackod_b(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvpackod_b((v32i8)_1, (v32i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvpackod_h(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvpackod_h((v16i16)_1, (v16i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvpackod_w(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvpackod_w((v8i32)_1, (v8i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvpackod_d(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvpackod_d((v4i64)_1, (v4i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvshuf_b(__m256i _1, __m256i _2, __m256i _3) { return (__m256i)__builtin_lasx_xvshuf_b((v32i8)_1, (v32i8)_2, (v32i8)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvshuf_h(__m256i _1, __m256i _2, __m256i _3) { return (__m256i)__builtin_lasx_xvshuf_h((v16i16)_1, (v16i16)_2, (v16i16)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvshuf_w(__m256i _1, __m256i _2, __m256i _3) { return (__m256i)__builtin_lasx_xvshuf_w((v8i32)_1, (v8i32)_2, (v8i32)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvshuf_d(__m256i _1, __m256i _2, __m256i _3) { return (__m256i)__builtin_lasx_xvshuf_d((v4i64)_1, (v4i64)_2, (v4i64)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvand_v(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvand_v((v32u8)_1, (v32u8)_2); } #define __lasx_xvandi_b(/*__m256i*/ _1, /*ui8*/ _2) \ ((__m256i)__builtin_lasx_xvandi_b((v32u8)(_1), (_2))) extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvor_v(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvor_v((v32u8)_1, (v32u8)_2); } #define __lasx_xvori_b(/*__m256i*/ _1, /*ui8*/ _2) \ ((__m256i)__builtin_lasx_xvori_b((v32u8)(_1), (_2))) extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvnor_v(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvnor_v((v32u8)_1, (v32u8)_2); } #define __lasx_xvnori_b(/*__m256i*/ _1, /*ui8*/ _2) \ ((__m256i)__builtin_lasx_xvnori_b((v32u8)(_1), (_2))) extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvxor_v(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvxor_v((v32u8)_1, (v32u8)_2); } #define __lasx_xvxori_b(/*__m256i*/ _1, /*ui8*/ _2) \ ((__m256i)__builtin_lasx_xvxori_b((v32u8)(_1), (_2))) extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvbitsel_v(__m256i _1, __m256i _2, __m256i _3) { return (__m256i)__builtin_lasx_xvbitsel_v((v32u8)_1, (v32u8)_2, (v32u8)_3); } #define __lasx_xvbitseli_b(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ ((__m256i)__builtin_lasx_xvbitseli_b((v32u8)(_1), (v32u8)(_2), (_3))) #define __lasx_xvshuf4i_b(/*__m256i*/ _1, /*ui8*/ _2) \ ((__m256i)__builtin_lasx_xvshuf4i_b((v32i8)(_1), (_2))) #define __lasx_xvshuf4i_h(/*__m256i*/ _1, /*ui8*/ _2) \ ((__m256i)__builtin_lasx_xvshuf4i_h((v16i16)(_1), (_2))) #define __lasx_xvshuf4i_w(/*__m256i*/ _1, /*ui8*/ _2) \ ((__m256i)__builtin_lasx_xvshuf4i_w((v8i32)(_1), (_2))) extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvreplgr2vr_b(int _1) { return (__m256i)__builtin_lasx_xvreplgr2vr_b((int)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvreplgr2vr_h(int _1) { return (__m256i)__builtin_lasx_xvreplgr2vr_h((int)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvreplgr2vr_w(int _1) { return (__m256i)__builtin_lasx_xvreplgr2vr_w((int)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvreplgr2vr_d(long int _1) { return (__m256i)__builtin_lasx_xvreplgr2vr_d((long int)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvpcnt_b(__m256i _1) { return (__m256i)__builtin_lasx_xvpcnt_b((v32i8)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvpcnt_h(__m256i _1) { return (__m256i)__builtin_lasx_xvpcnt_h((v16i16)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvpcnt_w(__m256i _1) { return (__m256i)__builtin_lasx_xvpcnt_w((v8i32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvpcnt_d(__m256i _1) { return (__m256i)__builtin_lasx_xvpcnt_d((v4i64)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvclo_b(__m256i _1) { return (__m256i)__builtin_lasx_xvclo_b((v32i8)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvclo_h(__m256i _1) { return (__m256i)__builtin_lasx_xvclo_h((v16i16)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvclo_w(__m256i _1) { return (__m256i)__builtin_lasx_xvclo_w((v8i32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvclo_d(__m256i _1) { return (__m256i)__builtin_lasx_xvclo_d((v4i64)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvclz_b(__m256i _1) { return (__m256i)__builtin_lasx_xvclz_b((v32i8)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvclz_h(__m256i _1) { return (__m256i)__builtin_lasx_xvclz_h((v16i16)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvclz_w(__m256i _1) { return (__m256i)__builtin_lasx_xvclz_w((v8i32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvclz_d(__m256i _1) { return (__m256i)__builtin_lasx_xvclz_d((v4i64)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 __lasx_xvfadd_s(__m256 _1, __m256 _2) { return (__m256)__builtin_lasx_xvfadd_s((v8f32)_1, (v8f32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d __lasx_xvfadd_d(__m256d _1, __m256d _2) { return (__m256d)__builtin_lasx_xvfadd_d((v4f64)_1, (v4f64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 __lasx_xvfsub_s(__m256 _1, __m256 _2) { return (__m256)__builtin_lasx_xvfsub_s((v8f32)_1, (v8f32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d __lasx_xvfsub_d(__m256d _1, __m256d _2) { return (__m256d)__builtin_lasx_xvfsub_d((v4f64)_1, (v4f64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 __lasx_xvfmul_s(__m256 _1, __m256 _2) { return (__m256)__builtin_lasx_xvfmul_s((v8f32)_1, (v8f32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d __lasx_xvfmul_d(__m256d _1, __m256d _2) { return (__m256d)__builtin_lasx_xvfmul_d((v4f64)_1, (v4f64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 __lasx_xvfdiv_s(__m256 _1, __m256 _2) { return (__m256)__builtin_lasx_xvfdiv_s((v8f32)_1, (v8f32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d __lasx_xvfdiv_d(__m256d _1, __m256d _2) { return (__m256d)__builtin_lasx_xvfdiv_d((v4f64)_1, (v4f64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvfcvt_h_s(__m256 _1, __m256 _2) { return (__m256i)__builtin_lasx_xvfcvt_h_s((v8f32)_1, (v8f32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 __lasx_xvfcvt_s_d(__m256d _1, __m256d _2) { return (__m256)__builtin_lasx_xvfcvt_s_d((v4f64)_1, (v4f64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 __lasx_xvfmin_s(__m256 _1, __m256 _2) { return (__m256)__builtin_lasx_xvfmin_s((v8f32)_1, (v8f32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d __lasx_xvfmin_d(__m256d _1, __m256d _2) { return (__m256d)__builtin_lasx_xvfmin_d((v4f64)_1, (v4f64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 __lasx_xvfmina_s(__m256 _1, __m256 _2) { return (__m256)__builtin_lasx_xvfmina_s((v8f32)_1, (v8f32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d __lasx_xvfmina_d(__m256d _1, __m256d _2) { return (__m256d)__builtin_lasx_xvfmina_d((v4f64)_1, (v4f64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 __lasx_xvfmax_s(__m256 _1, __m256 _2) { return (__m256)__builtin_lasx_xvfmax_s((v8f32)_1, (v8f32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d __lasx_xvfmax_d(__m256d _1, __m256d _2) { return (__m256d)__builtin_lasx_xvfmax_d((v4f64)_1, (v4f64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 __lasx_xvfmaxa_s(__m256 _1, __m256 _2) { return (__m256)__builtin_lasx_xvfmaxa_s((v8f32)_1, (v8f32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d __lasx_xvfmaxa_d(__m256d _1, __m256d _2) { return (__m256d)__builtin_lasx_xvfmaxa_d((v4f64)_1, (v4f64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvfclass_s(__m256 _1) { return (__m256i)__builtin_lasx_xvfclass_s((v8f32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvfclass_d(__m256d _1) { return (__m256i)__builtin_lasx_xvfclass_d((v4f64)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 __lasx_xvfsqrt_s(__m256 _1) { return (__m256)__builtin_lasx_xvfsqrt_s((v8f32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d __lasx_xvfsqrt_d(__m256d _1) { return (__m256d)__builtin_lasx_xvfsqrt_d((v4f64)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 __lasx_xvfrecip_s(__m256 _1) { return (__m256)__builtin_lasx_xvfrecip_s((v8f32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d __lasx_xvfrecip_d(__m256d _1) { return (__m256d)__builtin_lasx_xvfrecip_d((v4f64)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 __lasx_xvfrecipe_s(__m256 _1) { return (__m256)__builtin_lasx_xvfrecipe_s((v8f32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d __lasx_xvfrecipe_d(__m256d _1) { return (__m256d)__builtin_lasx_xvfrecipe_d((v4f64)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 __lasx_xvfrint_s(__m256 _1) { return (__m256)__builtin_lasx_xvfrint_s((v8f32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d __lasx_xvfrint_d(__m256d _1) { return (__m256d)__builtin_lasx_xvfrint_d((v4f64)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 __lasx_xvfrsqrt_s(__m256 _1) { return (__m256)__builtin_lasx_xvfrsqrt_s((v8f32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d __lasx_xvfrsqrt_d(__m256d _1) { return (__m256d)__builtin_lasx_xvfrsqrt_d((v4f64)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 __lasx_xvfrsqrte_s(__m256 _1) { return (__m256)__builtin_lasx_xvfrsqrte_s((v8f32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d __lasx_xvfrsqrte_d(__m256d _1) { return (__m256d)__builtin_lasx_xvfrsqrte_d((v4f64)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 __lasx_xvflogb_s(__m256 _1) { return (__m256)__builtin_lasx_xvflogb_s((v8f32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d __lasx_xvflogb_d(__m256d _1) { return (__m256d)__builtin_lasx_xvflogb_d((v4f64)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 __lasx_xvfcvth_s_h(__m256i _1) { return (__m256)__builtin_lasx_xvfcvth_s_h((v16i16)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d __lasx_xvfcvth_d_s(__m256 _1) { return (__m256d)__builtin_lasx_xvfcvth_d_s((v8f32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 __lasx_xvfcvtl_s_h(__m256i _1) { return (__m256)__builtin_lasx_xvfcvtl_s_h((v16i16)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d __lasx_xvfcvtl_d_s(__m256 _1) { return (__m256d)__builtin_lasx_xvfcvtl_d_s((v8f32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvftint_w_s(__m256 _1) { return (__m256i)__builtin_lasx_xvftint_w_s((v8f32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvftint_l_d(__m256d _1) { return (__m256i)__builtin_lasx_xvftint_l_d((v4f64)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvftint_wu_s(__m256 _1) { return (__m256i)__builtin_lasx_xvftint_wu_s((v8f32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvftint_lu_d(__m256d _1) { return (__m256i)__builtin_lasx_xvftint_lu_d((v4f64)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvftintrz_w_s(__m256 _1) { return (__m256i)__builtin_lasx_xvftintrz_w_s((v8f32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvftintrz_l_d(__m256d _1) { return (__m256i)__builtin_lasx_xvftintrz_l_d((v4f64)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvftintrz_wu_s(__m256 _1) { return (__m256i)__builtin_lasx_xvftintrz_wu_s((v8f32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvftintrz_lu_d(__m256d _1) { return (__m256i)__builtin_lasx_xvftintrz_lu_d((v4f64)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 __lasx_xvffint_s_w(__m256i _1) { return (__m256)__builtin_lasx_xvffint_s_w((v8i32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d __lasx_xvffint_d_l(__m256i _1) { return (__m256d)__builtin_lasx_xvffint_d_l((v4i64)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 __lasx_xvffint_s_wu(__m256i _1) { return (__m256)__builtin_lasx_xvffint_s_wu((v8u32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d __lasx_xvffint_d_lu(__m256i _1) { return (__m256d)__builtin_lasx_xvffint_d_lu((v4u64)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvreplve_b(__m256i _1, int _2) { return (__m256i)__builtin_lasx_xvreplve_b((v32i8)_1, (int)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvreplve_h(__m256i _1, int _2) { return (__m256i)__builtin_lasx_xvreplve_h((v16i16)_1, (int)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvreplve_w(__m256i _1, int _2) { return (__m256i)__builtin_lasx_xvreplve_w((v8i32)_1, (int)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvreplve_d(__m256i _1, int _2) { return (__m256i)__builtin_lasx_xvreplve_d((v4i64)_1, (int)_2); } #define __lasx_xvpermi_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ ((__m256i)__builtin_lasx_xvpermi_w((v8i32)(_1), (v8i32)(_2), (_3))) extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvandn_v(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvandn_v((v32u8)_1, (v32u8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvneg_b(__m256i _1) { return (__m256i)__builtin_lasx_xvneg_b((v32i8)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvneg_h(__m256i _1) { return (__m256i)__builtin_lasx_xvneg_h((v16i16)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvneg_w(__m256i _1) { return (__m256i)__builtin_lasx_xvneg_w((v8i32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvneg_d(__m256i _1) { return (__m256i)__builtin_lasx_xvneg_d((v4i64)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmuh_b(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvmuh_b((v32i8)_1, (v32i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmuh_h(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvmuh_h((v16i16)_1, (v16i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmuh_w(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvmuh_w((v8i32)_1, (v8i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmuh_d(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvmuh_d((v4i64)_1, (v4i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmuh_bu(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvmuh_bu((v32u8)_1, (v32u8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmuh_hu(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvmuh_hu((v16u16)_1, (v16u16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmuh_wu(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvmuh_wu((v8u32)_1, (v8u32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmuh_du(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvmuh_du((v4u64)_1, (v4u64)_2); } #define __lasx_xvsllwil_h_b(/*__m256i*/ _1, /*ui3*/ _2) \ ((__m256i)__builtin_lasx_xvsllwil_h_b((v32i8)(_1), (_2))) #define __lasx_xvsllwil_w_h(/*__m256i*/ _1, /*ui4*/ _2) \ ((__m256i)__builtin_lasx_xvsllwil_w_h((v16i16)(_1), (_2))) #define __lasx_xvsllwil_d_w(/*__m256i*/ _1, /*ui5*/ _2) \ ((__m256i)__builtin_lasx_xvsllwil_d_w((v8i32)(_1), (_2))) #define __lasx_xvsllwil_hu_bu(/*__m256i*/ _1, /*ui3*/ _2) \ ((__m256i)__builtin_lasx_xvsllwil_hu_bu((v32u8)(_1), (_2))) #define __lasx_xvsllwil_wu_hu(/*__m256i*/ _1, /*ui4*/ _2) \ ((__m256i)__builtin_lasx_xvsllwil_wu_hu((v16u16)(_1), (_2))) #define __lasx_xvsllwil_du_wu(/*__m256i*/ _1, /*ui5*/ _2) \ ((__m256i)__builtin_lasx_xvsllwil_du_wu((v8u32)(_1), (_2))) extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvsran_b_h(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvsran_b_h((v16i16)_1, (v16i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvsran_h_w(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvsran_h_w((v8i32)_1, (v8i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvsran_w_d(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvsran_w_d((v4i64)_1, (v4i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvssran_b_h(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvssran_b_h((v16i16)_1, (v16i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvssran_h_w(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvssran_h_w((v8i32)_1, (v8i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvssran_w_d(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvssran_w_d((v4i64)_1, (v4i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvssran_bu_h(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvssran_bu_h((v16u16)_1, (v16u16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvssran_hu_w(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvssran_hu_w((v8u32)_1, (v8u32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvssran_wu_d(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvssran_wu_d((v4u64)_1, (v4u64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvsrarn_b_h(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvsrarn_b_h((v16i16)_1, (v16i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvsrarn_h_w(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvsrarn_h_w((v8i32)_1, (v8i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvsrarn_w_d(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvsrarn_w_d((v4i64)_1, (v4i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvssrarn_b_h(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvssrarn_b_h((v16i16)_1, (v16i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvssrarn_h_w(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvssrarn_h_w((v8i32)_1, (v8i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvssrarn_w_d(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvssrarn_w_d((v4i64)_1, (v4i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvssrarn_bu_h(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvssrarn_bu_h((v16u16)_1, (v16u16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvssrarn_hu_w(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvssrarn_hu_w((v8u32)_1, (v8u32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvssrarn_wu_d(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvssrarn_wu_d((v4u64)_1, (v4u64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvsrln_b_h(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvsrln_b_h((v16i16)_1, (v16i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvsrln_h_w(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvsrln_h_w((v8i32)_1, (v8i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvsrln_w_d(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvsrln_w_d((v4i64)_1, (v4i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvssrln_bu_h(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvssrln_bu_h((v16u16)_1, (v16u16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvssrln_hu_w(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvssrln_hu_w((v8u32)_1, (v8u32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvssrln_wu_d(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvssrln_wu_d((v4u64)_1, (v4u64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvsrlrn_b_h(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvsrlrn_b_h((v16i16)_1, (v16i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvsrlrn_h_w(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvsrlrn_h_w((v8i32)_1, (v8i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvsrlrn_w_d(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvsrlrn_w_d((v4i64)_1, (v4i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvssrlrn_bu_h(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvssrlrn_bu_h((v16u16)_1, (v16u16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvssrlrn_hu_w(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvssrlrn_hu_w((v8u32)_1, (v8u32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvssrlrn_wu_d(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvssrlrn_wu_d((v4u64)_1, (v4u64)_2); } #define __lasx_xvfrstpi_b(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ((__m256i)__builtin_lasx_xvfrstpi_b((v32i8)(_1), (v32i8)(_2), (_3))) #define __lasx_xvfrstpi_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ((__m256i)__builtin_lasx_xvfrstpi_h((v16i16)(_1), (v16i16)(_2), (_3))) extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvfrstp_b(__m256i _1, __m256i _2, __m256i _3) { return (__m256i)__builtin_lasx_xvfrstp_b((v32i8)_1, (v32i8)_2, (v32i8)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvfrstp_h(__m256i _1, __m256i _2, __m256i _3) { return (__m256i)__builtin_lasx_xvfrstp_h((v16i16)_1, (v16i16)_2, (v16i16)_3); } #define __lasx_xvshuf4i_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ ((__m256i)__builtin_lasx_xvshuf4i_d((v4i64)(_1), (v4i64)(_2), (_3))) #define __lasx_xvbsrl_v(/*__m256i*/ _1, /*ui5*/ _2) \ ((__m256i)__builtin_lasx_xvbsrl_v((v32i8)(_1), (_2))) #define __lasx_xvbsll_v(/*__m256i*/ _1, /*ui5*/ _2) \ ((__m256i)__builtin_lasx_xvbsll_v((v32i8)(_1), (_2))) #define __lasx_xvextrins_b(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ ((__m256i)__builtin_lasx_xvextrins_b((v32i8)(_1), (v32i8)(_2), (_3))) #define __lasx_xvextrins_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ ((__m256i)__builtin_lasx_xvextrins_h((v16i16)(_1), (v16i16)(_2), (_3))) #define __lasx_xvextrins_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ ((__m256i)__builtin_lasx_xvextrins_w((v8i32)(_1), (v8i32)(_2), (_3))) #define __lasx_xvextrins_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ ((__m256i)__builtin_lasx_xvextrins_d((v4i64)(_1), (v4i64)(_2), (_3))) extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmskltz_b(__m256i _1) { return (__m256i)__builtin_lasx_xvmskltz_b((v32i8)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmskltz_h(__m256i _1) { return (__m256i)__builtin_lasx_xvmskltz_h((v16i16)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmskltz_w(__m256i _1) { return (__m256i)__builtin_lasx_xvmskltz_w((v8i32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmskltz_d(__m256i _1) { return (__m256i)__builtin_lasx_xvmskltz_d((v4i64)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvsigncov_b(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvsigncov_b((v32i8)_1, (v32i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvsigncov_h(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvsigncov_h((v16i16)_1, (v16i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvsigncov_w(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvsigncov_w((v8i32)_1, (v8i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvsigncov_d(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvsigncov_d((v4i64)_1, (v4i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 __lasx_xvfmadd_s(__m256 _1, __m256 _2, __m256 _3) { return (__m256)__builtin_lasx_xvfmadd_s((v8f32)_1, (v8f32)_2, (v8f32)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d __lasx_xvfmadd_d(__m256d _1, __m256d _2, __m256d _3) { return (__m256d)__builtin_lasx_xvfmadd_d((v4f64)_1, (v4f64)_2, (v4f64)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 __lasx_xvfmsub_s(__m256 _1, __m256 _2, __m256 _3) { return (__m256)__builtin_lasx_xvfmsub_s((v8f32)_1, (v8f32)_2, (v8f32)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d __lasx_xvfmsub_d(__m256d _1, __m256d _2, __m256d _3) { return (__m256d)__builtin_lasx_xvfmsub_d((v4f64)_1, (v4f64)_2, (v4f64)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 __lasx_xvfnmadd_s(__m256 _1, __m256 _2, __m256 _3) { return (__m256)__builtin_lasx_xvfnmadd_s((v8f32)_1, (v8f32)_2, (v8f32)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d __lasx_xvfnmadd_d(__m256d _1, __m256d _2, __m256d _3) { return (__m256d)__builtin_lasx_xvfnmadd_d((v4f64)_1, (v4f64)_2, (v4f64)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 __lasx_xvfnmsub_s(__m256 _1, __m256 _2, __m256 _3) { return (__m256)__builtin_lasx_xvfnmsub_s((v8f32)_1, (v8f32)_2, (v8f32)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d __lasx_xvfnmsub_d(__m256d _1, __m256d _2, __m256d _3) { return (__m256d)__builtin_lasx_xvfnmsub_d((v4f64)_1, (v4f64)_2, (v4f64)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvftintrne_w_s(__m256 _1) { return (__m256i)__builtin_lasx_xvftintrne_w_s((v8f32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvftintrne_l_d(__m256d _1) { return (__m256i)__builtin_lasx_xvftintrne_l_d((v4f64)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvftintrp_w_s(__m256 _1) { return (__m256i)__builtin_lasx_xvftintrp_w_s((v8f32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvftintrp_l_d(__m256d _1) { return (__m256i)__builtin_lasx_xvftintrp_l_d((v4f64)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvftintrm_w_s(__m256 _1) { return (__m256i)__builtin_lasx_xvftintrm_w_s((v8f32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvftintrm_l_d(__m256d _1) { return (__m256i)__builtin_lasx_xvftintrm_l_d((v4f64)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvftint_w_d(__m256d _1, __m256d _2) { return (__m256i)__builtin_lasx_xvftint_w_d((v4f64)_1, (v4f64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 __lasx_xvffint_s_l(__m256i _1, __m256i _2) { return (__m256)__builtin_lasx_xvffint_s_l((v4i64)_1, (v4i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvftintrz_w_d(__m256d _1, __m256d _2) { return (__m256i)__builtin_lasx_xvftintrz_w_d((v4f64)_1, (v4f64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvftintrp_w_d(__m256d _1, __m256d _2) { return (__m256i)__builtin_lasx_xvftintrp_w_d((v4f64)_1, (v4f64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvftintrm_w_d(__m256d _1, __m256d _2) { return (__m256i)__builtin_lasx_xvftintrm_w_d((v4f64)_1, (v4f64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvftintrne_w_d(__m256d _1, __m256d _2) { return (__m256i)__builtin_lasx_xvftintrne_w_d((v4f64)_1, (v4f64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvftinth_l_s(__m256 _1) { return (__m256i)__builtin_lasx_xvftinth_l_s((v8f32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvftintl_l_s(__m256 _1) { return (__m256i)__builtin_lasx_xvftintl_l_s((v8f32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d __lasx_xvffinth_d_w(__m256i _1) { return (__m256d)__builtin_lasx_xvffinth_d_w((v8i32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d __lasx_xvffintl_d_w(__m256i _1) { return (__m256d)__builtin_lasx_xvffintl_d_w((v8i32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvftintrzh_l_s(__m256 _1) { return (__m256i)__builtin_lasx_xvftintrzh_l_s((v8f32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvftintrzl_l_s(__m256 _1) { return (__m256i)__builtin_lasx_xvftintrzl_l_s((v8f32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvftintrph_l_s(__m256 _1) { return (__m256i)__builtin_lasx_xvftintrph_l_s((v8f32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvftintrpl_l_s(__m256 _1) { return (__m256i)__builtin_lasx_xvftintrpl_l_s((v8f32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvftintrmh_l_s(__m256 _1) { return (__m256i)__builtin_lasx_xvftintrmh_l_s((v8f32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvftintrml_l_s(__m256 _1) { return (__m256i)__builtin_lasx_xvftintrml_l_s((v8f32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvftintrneh_l_s(__m256 _1) { return (__m256i)__builtin_lasx_xvftintrneh_l_s((v8f32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvftintrnel_l_s(__m256 _1) { return (__m256i)__builtin_lasx_xvftintrnel_l_s((v8f32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 __lasx_xvfrintrne_s(__m256 _1) { return (__m256)__builtin_lasx_xvfrintrne_s((v8f32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d __lasx_xvfrintrne_d(__m256d _1) { return (__m256d)__builtin_lasx_xvfrintrne_d((v4f64)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 __lasx_xvfrintrz_s(__m256 _1) { return (__m256)__builtin_lasx_xvfrintrz_s((v8f32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d __lasx_xvfrintrz_d(__m256d _1) { return (__m256d)__builtin_lasx_xvfrintrz_d((v4f64)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 __lasx_xvfrintrp_s(__m256 _1) { return (__m256)__builtin_lasx_xvfrintrp_s((v8f32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d __lasx_xvfrintrp_d(__m256d _1) { return (__m256d)__builtin_lasx_xvfrintrp_d((v4f64)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 __lasx_xvfrintrm_s(__m256 _1) { return (__m256)__builtin_lasx_xvfrintrm_s((v8f32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d __lasx_xvfrintrm_d(__m256d _1) { return (__m256d)__builtin_lasx_xvfrintrm_d((v4f64)_1); } #define __lasx_xvld(/*void **/ _1, /*si12*/ _2) \ ((__m256i)__builtin_lasx_xvld((void const *)(_1), (_2))) #define __lasx_xvst(/*__m256i*/ _1, /*void **/ _2, /*si12*/ _3) \ ((void)__builtin_lasx_xvst((v32i8)(_1), (void *)(_2), (_3))) #define __lasx_xvstelm_b(/*__m256i*/ _1, /*void **/ _2, /*si8*/ _3, \ /*idx*/ _4) \ ((void)__builtin_lasx_xvstelm_b((v32i8)(_1), (void *)(_2), (_3), (_4))) #define __lasx_xvstelm_h(/*__m256i*/ _1, /*void **/ _2, /*si8*/ _3, \ /*idx*/ _4) \ ((void)__builtin_lasx_xvstelm_h((v16i16)(_1), (void *)(_2), (_3), (_4))) #define __lasx_xvstelm_w(/*__m256i*/ _1, /*void **/ _2, /*si8*/ _3, \ /*idx*/ _4) \ ((void)__builtin_lasx_xvstelm_w((v8i32)(_1), (void *)(_2), (_3), (_4))) #define __lasx_xvstelm_d(/*__m256i*/ _1, /*void **/ _2, /*si8*/ _3, \ /*idx*/ _4) \ ((void)__builtin_lasx_xvstelm_d((v4i64)(_1), (void *)(_2), (_3), (_4))) #define __lasx_xvinsve0_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui3*/ _3) \ ((__m256i)__builtin_lasx_xvinsve0_w((v8i32)(_1), (v8i32)(_2), (_3))) #define __lasx_xvinsve0_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui2*/ _3) \ ((__m256i)__builtin_lasx_xvinsve0_d((v4i64)(_1), (v4i64)(_2), (_3))) #define __lasx_xvpickve_w(/*__m256i*/ _1, /*ui3*/ _2) \ ((__m256i)__builtin_lasx_xvpickve_w((v8i32)(_1), (_2))) #define __lasx_xvpickve_d(/*__m256i*/ _1, /*ui2*/ _2) \ ((__m256i)__builtin_lasx_xvpickve_d((v4i64)(_1), (_2))) extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvssrlrn_b_h(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvssrlrn_b_h((v16i16)_1, (v16i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvssrlrn_h_w(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvssrlrn_h_w((v8i32)_1, (v8i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvssrlrn_w_d(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvssrlrn_w_d((v4i64)_1, (v4i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvssrln_b_h(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvssrln_b_h((v16i16)_1, (v16i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvssrln_h_w(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvssrln_h_w((v8i32)_1, (v8i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvssrln_w_d(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvssrln_w_d((v4i64)_1, (v4i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvorn_v(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvorn_v((v32i8)_1, (v32i8)_2); } #define __lasx_xvldi(/*i13*/ _1) ((__m256i)__builtin_lasx_xvldi((_1))) extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvldx(void const *_1, long int _2) { return (__m256i)__builtin_lasx_xvldx((void const *)_1, (long int)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) void __lasx_xvstx(__m256i _1, void *_2, long int _3) { return (void)__builtin_lasx_xvstx((v32i8)_1, (void *)_2, (long int)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvextl_qu_du(__m256i _1) { return (__m256i)__builtin_lasx_xvextl_qu_du((v4u64)_1); } #define __lasx_xvinsgr2vr_w(/*__m256i*/ _1, /*int*/ _2, /*ui3*/ _3) \ ((__m256i)__builtin_lasx_xvinsgr2vr_w((v8i32)(_1), (int)(_2), (_3))) #define __lasx_xvinsgr2vr_d(/*__m256i*/ _1, /*long int*/ _2, /*ui2*/ _3) \ ((__m256i)__builtin_lasx_xvinsgr2vr_d((v4i64)(_1), (long int)(_2), (_3))) extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvreplve0_b(__m256i _1) { return (__m256i)__builtin_lasx_xvreplve0_b((v32i8)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvreplve0_h(__m256i _1) { return (__m256i)__builtin_lasx_xvreplve0_h((v16i16)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvreplve0_w(__m256i _1) { return (__m256i)__builtin_lasx_xvreplve0_w((v8i32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvreplve0_d(__m256i _1) { return (__m256i)__builtin_lasx_xvreplve0_d((v4i64)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvreplve0_q(__m256i _1) { return (__m256i)__builtin_lasx_xvreplve0_q((v32i8)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_vext2xv_h_b(__m256i _1) { return (__m256i)__builtin_lasx_vext2xv_h_b((v32i8)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_vext2xv_w_h(__m256i _1) { return (__m256i)__builtin_lasx_vext2xv_w_h((v16i16)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_vext2xv_d_w(__m256i _1) { return (__m256i)__builtin_lasx_vext2xv_d_w((v8i32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_vext2xv_w_b(__m256i _1) { return (__m256i)__builtin_lasx_vext2xv_w_b((v32i8)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_vext2xv_d_h(__m256i _1) { return (__m256i)__builtin_lasx_vext2xv_d_h((v16i16)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_vext2xv_d_b(__m256i _1) { return (__m256i)__builtin_lasx_vext2xv_d_b((v32i8)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_vext2xv_hu_bu(__m256i _1) { return (__m256i)__builtin_lasx_vext2xv_hu_bu((v32i8)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_vext2xv_wu_hu(__m256i _1) { return (__m256i)__builtin_lasx_vext2xv_wu_hu((v16i16)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_vext2xv_du_wu(__m256i _1) { return (__m256i)__builtin_lasx_vext2xv_du_wu((v8i32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_vext2xv_wu_bu(__m256i _1) { return (__m256i)__builtin_lasx_vext2xv_wu_bu((v32i8)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_vext2xv_du_hu(__m256i _1) { return (__m256i)__builtin_lasx_vext2xv_du_hu((v16i16)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_vext2xv_du_bu(__m256i _1) { return (__m256i)__builtin_lasx_vext2xv_du_bu((v32i8)_1); } #define __lasx_xvpermi_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ ((__m256i)__builtin_lasx_xvpermi_q((v32i8)(_1), (v32i8)(_2), (_3))) #define __lasx_xvpermi_d(/*__m256i*/ _1, /*ui8*/ _2) \ ((__m256i)__builtin_lasx_xvpermi_d((v4i64)(_1), (_2))) extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvperm_w(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvperm_w((v8i32)_1, (v8i32)_2); } #define __lasx_xvldrepl_b(/*void **/ _1, /*si12*/ _2) \ ((__m256i)__builtin_lasx_xvldrepl_b((void const *)(_1), (_2))) #define __lasx_xvldrepl_h(/*void **/ _1, /*si11*/ _2) \ ((__m256i)__builtin_lasx_xvldrepl_h((void const *)(_1), (_2))) #define __lasx_xvldrepl_w(/*void **/ _1, /*si10*/ _2) \ ((__m256i)__builtin_lasx_xvldrepl_w((void const *)(_1), (_2))) #define __lasx_xvldrepl_d(/*void **/ _1, /*si9*/ _2) \ ((__m256i)__builtin_lasx_xvldrepl_d((void const *)(_1), (_2))) #define __lasx_xvpickve2gr_w(/*__m256i*/ _1, /*ui3*/ _2) \ ((int)__builtin_lasx_xvpickve2gr_w((v8i32)(_1), (_2))) #define __lasx_xvpickve2gr_wu(/*__m256i*/ _1, /*ui3*/ _2) \ ((unsigned int)__builtin_lasx_xvpickve2gr_wu((v8i32)(_1), (_2))) #define __lasx_xvpickve2gr_d(/*__m256i*/ _1, /*ui2*/ _2) \ ((long int)__builtin_lasx_xvpickve2gr_d((v4i64)(_1), (_2))) #define __lasx_xvpickve2gr_du(/*__m256i*/ _1, /*ui2*/ _2) \ ((unsigned long int)__builtin_lasx_xvpickve2gr_du((v4i64)(_1), (_2))) extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvaddwev_q_d(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvaddwev_q_d((v4i64)_1, (v4i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvaddwev_d_w(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvaddwev_d_w((v8i32)_1, (v8i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvaddwev_w_h(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvaddwev_w_h((v16i16)_1, (v16i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvaddwev_h_b(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvaddwev_h_b((v32i8)_1, (v32i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvaddwev_q_du(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvaddwev_q_du((v4u64)_1, (v4u64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvaddwev_d_wu(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvaddwev_d_wu((v8u32)_1, (v8u32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvaddwev_w_hu(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvaddwev_w_hu((v16u16)_1, (v16u16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvaddwev_h_bu(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvaddwev_h_bu((v32u8)_1, (v32u8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvsubwev_q_d(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvsubwev_q_d((v4i64)_1, (v4i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvsubwev_d_w(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvsubwev_d_w((v8i32)_1, (v8i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvsubwev_w_h(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvsubwev_w_h((v16i16)_1, (v16i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvsubwev_h_b(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvsubwev_h_b((v32i8)_1, (v32i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvsubwev_q_du(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvsubwev_q_du((v4u64)_1, (v4u64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvsubwev_d_wu(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvsubwev_d_wu((v8u32)_1, (v8u32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvsubwev_w_hu(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvsubwev_w_hu((v16u16)_1, (v16u16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvsubwev_h_bu(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvsubwev_h_bu((v32u8)_1, (v32u8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmulwev_q_d(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvmulwev_q_d((v4i64)_1, (v4i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmulwev_d_w(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvmulwev_d_w((v8i32)_1, (v8i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmulwev_w_h(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvmulwev_w_h((v16i16)_1, (v16i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmulwev_h_b(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvmulwev_h_b((v32i8)_1, (v32i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmulwev_q_du(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvmulwev_q_du((v4u64)_1, (v4u64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmulwev_d_wu(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvmulwev_d_wu((v8u32)_1, (v8u32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmulwev_w_hu(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvmulwev_w_hu((v16u16)_1, (v16u16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmulwev_h_bu(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvmulwev_h_bu((v32u8)_1, (v32u8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvaddwod_q_d(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvaddwod_q_d((v4i64)_1, (v4i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvaddwod_d_w(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvaddwod_d_w((v8i32)_1, (v8i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvaddwod_w_h(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvaddwod_w_h((v16i16)_1, (v16i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvaddwod_h_b(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvaddwod_h_b((v32i8)_1, (v32i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvaddwod_q_du(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvaddwod_q_du((v4u64)_1, (v4u64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvaddwod_d_wu(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvaddwod_d_wu((v8u32)_1, (v8u32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvaddwod_w_hu(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvaddwod_w_hu((v16u16)_1, (v16u16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvaddwod_h_bu(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvaddwod_h_bu((v32u8)_1, (v32u8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvsubwod_q_d(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvsubwod_q_d((v4i64)_1, (v4i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvsubwod_d_w(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvsubwod_d_w((v8i32)_1, (v8i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvsubwod_w_h(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvsubwod_w_h((v16i16)_1, (v16i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvsubwod_h_b(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvsubwod_h_b((v32i8)_1, (v32i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvsubwod_q_du(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvsubwod_q_du((v4u64)_1, (v4u64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvsubwod_d_wu(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvsubwod_d_wu((v8u32)_1, (v8u32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvsubwod_w_hu(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvsubwod_w_hu((v16u16)_1, (v16u16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvsubwod_h_bu(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvsubwod_h_bu((v32u8)_1, (v32u8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmulwod_q_d(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvmulwod_q_d((v4i64)_1, (v4i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmulwod_d_w(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvmulwod_d_w((v8i32)_1, (v8i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmulwod_w_h(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvmulwod_w_h((v16i16)_1, (v16i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmulwod_h_b(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvmulwod_h_b((v32i8)_1, (v32i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmulwod_q_du(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvmulwod_q_du((v4u64)_1, (v4u64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmulwod_d_wu(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvmulwod_d_wu((v8u32)_1, (v8u32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmulwod_w_hu(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvmulwod_w_hu((v16u16)_1, (v16u16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmulwod_h_bu(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvmulwod_h_bu((v32u8)_1, (v32u8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvaddwev_d_wu_w(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvaddwev_d_wu_w((v8u32)_1, (v8i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvaddwev_w_hu_h(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvaddwev_w_hu_h((v16u16)_1, (v16i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvaddwev_h_bu_b(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvaddwev_h_bu_b((v32u8)_1, (v32i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmulwev_d_wu_w(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvmulwev_d_wu_w((v8u32)_1, (v8i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmulwev_w_hu_h(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvmulwev_w_hu_h((v16u16)_1, (v16i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmulwev_h_bu_b(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvmulwev_h_bu_b((v32u8)_1, (v32i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvaddwod_d_wu_w(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvaddwod_d_wu_w((v8u32)_1, (v8i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvaddwod_w_hu_h(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvaddwod_w_hu_h((v16u16)_1, (v16i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvaddwod_h_bu_b(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvaddwod_h_bu_b((v32u8)_1, (v32i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmulwod_d_wu_w(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvmulwod_d_wu_w((v8u32)_1, (v8i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmulwod_w_hu_h(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvmulwod_w_hu_h((v16u16)_1, (v16i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmulwod_h_bu_b(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvmulwod_h_bu_b((v32u8)_1, (v32i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvhaddw_q_d(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvhaddw_q_d((v4i64)_1, (v4i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvhaddw_qu_du(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvhaddw_qu_du((v4u64)_1, (v4u64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvhsubw_q_d(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvhsubw_q_d((v4i64)_1, (v4i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvhsubw_qu_du(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvhsubw_qu_du((v4u64)_1, (v4u64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmaddwev_q_d(__m256i _1, __m256i _2, __m256i _3) { return (__m256i)__builtin_lasx_xvmaddwev_q_d((v4i64)_1, (v4i64)_2, (v4i64)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmaddwev_d_w(__m256i _1, __m256i _2, __m256i _3) { return (__m256i)__builtin_lasx_xvmaddwev_d_w((v4i64)_1, (v8i32)_2, (v8i32)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmaddwev_w_h(__m256i _1, __m256i _2, __m256i _3) { return (__m256i)__builtin_lasx_xvmaddwev_w_h((v8i32)_1, (v16i16)_2, (v16i16)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmaddwev_h_b(__m256i _1, __m256i _2, __m256i _3) { return (__m256i)__builtin_lasx_xvmaddwev_h_b((v16i16)_1, (v32i8)_2, (v32i8)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmaddwev_q_du(__m256i _1, __m256i _2, __m256i _3) { return (__m256i)__builtin_lasx_xvmaddwev_q_du((v4u64)_1, (v4u64)_2, (v4u64)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmaddwev_d_wu(__m256i _1, __m256i _2, __m256i _3) { return (__m256i)__builtin_lasx_xvmaddwev_d_wu((v4u64)_1, (v8u32)_2, (v8u32)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmaddwev_w_hu(__m256i _1, __m256i _2, __m256i _3) { return (__m256i)__builtin_lasx_xvmaddwev_w_hu((v8u32)_1, (v16u16)_2, (v16u16)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmaddwev_h_bu(__m256i _1, __m256i _2, __m256i _3) { return (__m256i)__builtin_lasx_xvmaddwev_h_bu((v16u16)_1, (v32u8)_2, (v32u8)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmaddwod_q_d(__m256i _1, __m256i _2, __m256i _3) { return (__m256i)__builtin_lasx_xvmaddwod_q_d((v4i64)_1, (v4i64)_2, (v4i64)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmaddwod_d_w(__m256i _1, __m256i _2, __m256i _3) { return (__m256i)__builtin_lasx_xvmaddwod_d_w((v4i64)_1, (v8i32)_2, (v8i32)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmaddwod_w_h(__m256i _1, __m256i _2, __m256i _3) { return (__m256i)__builtin_lasx_xvmaddwod_w_h((v8i32)_1, (v16i16)_2, (v16i16)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmaddwod_h_b(__m256i _1, __m256i _2, __m256i _3) { return (__m256i)__builtin_lasx_xvmaddwod_h_b((v16i16)_1, (v32i8)_2, (v32i8)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmaddwod_q_du(__m256i _1, __m256i _2, __m256i _3) { return (__m256i)__builtin_lasx_xvmaddwod_q_du((v4u64)_1, (v4u64)_2, (v4u64)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmaddwod_d_wu(__m256i _1, __m256i _2, __m256i _3) { return (__m256i)__builtin_lasx_xvmaddwod_d_wu((v4u64)_1, (v8u32)_2, (v8u32)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmaddwod_w_hu(__m256i _1, __m256i _2, __m256i _3) { return (__m256i)__builtin_lasx_xvmaddwod_w_hu((v8u32)_1, (v16u16)_2, (v16u16)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmaddwod_h_bu(__m256i _1, __m256i _2, __m256i _3) { return (__m256i)__builtin_lasx_xvmaddwod_h_bu((v16u16)_1, (v32u8)_2, (v32u8)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmaddwev_q_du_d(__m256i _1, __m256i _2, __m256i _3) { return (__m256i)__builtin_lasx_xvmaddwev_q_du_d((v4i64)_1, (v4u64)_2, (v4i64)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmaddwev_d_wu_w(__m256i _1, __m256i _2, __m256i _3) { return (__m256i)__builtin_lasx_xvmaddwev_d_wu_w((v4i64)_1, (v8u32)_2, (v8i32)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmaddwev_w_hu_h(__m256i _1, __m256i _2, __m256i _3) { return (__m256i)__builtin_lasx_xvmaddwev_w_hu_h((v8i32)_1, (v16u16)_2, (v16i16)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmaddwev_h_bu_b(__m256i _1, __m256i _2, __m256i _3) { return (__m256i)__builtin_lasx_xvmaddwev_h_bu_b((v16i16)_1, (v32u8)_2, (v32i8)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmaddwod_q_du_d(__m256i _1, __m256i _2, __m256i _3) { return (__m256i)__builtin_lasx_xvmaddwod_q_du_d((v4i64)_1, (v4u64)_2, (v4i64)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmaddwod_d_wu_w(__m256i _1, __m256i _2, __m256i _3) { return (__m256i)__builtin_lasx_xvmaddwod_d_wu_w((v4i64)_1, (v8u32)_2, (v8i32)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmaddwod_w_hu_h(__m256i _1, __m256i _2, __m256i _3) { return (__m256i)__builtin_lasx_xvmaddwod_w_hu_h((v8i32)_1, (v16u16)_2, (v16i16)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmaddwod_h_bu_b(__m256i _1, __m256i _2, __m256i _3) { return (__m256i)__builtin_lasx_xvmaddwod_h_bu_b((v16i16)_1, (v32u8)_2, (v32i8)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvrotr_b(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvrotr_b((v32i8)_1, (v32i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvrotr_h(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvrotr_h((v16i16)_1, (v16i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvrotr_w(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvrotr_w((v8i32)_1, (v8i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvrotr_d(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvrotr_d((v4i64)_1, (v4i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvadd_q(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvadd_q((v4i64)_1, (v4i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvsub_q(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvsub_q((v4i64)_1, (v4i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvaddwev_q_du_d(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvaddwev_q_du_d((v4u64)_1, (v4i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvaddwod_q_du_d(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvaddwod_q_du_d((v4u64)_1, (v4i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmulwev_q_du_d(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvmulwev_q_du_d((v4u64)_1, (v4i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmulwod_q_du_d(__m256i _1, __m256i _2) { return (__m256i)__builtin_lasx_xvmulwod_q_du_d((v4u64)_1, (v4i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmskgez_b(__m256i _1) { return (__m256i)__builtin_lasx_xvmskgez_b((v32i8)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvmsknz_b(__m256i _1) { return (__m256i)__builtin_lasx_xvmsknz_b((v32i8)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvexth_h_b(__m256i _1) { return (__m256i)__builtin_lasx_xvexth_h_b((v32i8)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvexth_w_h(__m256i _1) { return (__m256i)__builtin_lasx_xvexth_w_h((v16i16)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvexth_d_w(__m256i _1) { return (__m256i)__builtin_lasx_xvexth_d_w((v8i32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvexth_q_d(__m256i _1) { return (__m256i)__builtin_lasx_xvexth_q_d((v4i64)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvexth_hu_bu(__m256i _1) { return (__m256i)__builtin_lasx_xvexth_hu_bu((v32u8)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvexth_wu_hu(__m256i _1) { return (__m256i)__builtin_lasx_xvexth_wu_hu((v16u16)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvexth_du_wu(__m256i _1) { return (__m256i)__builtin_lasx_xvexth_du_wu((v8u32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvexth_qu_du(__m256i _1) { return (__m256i)__builtin_lasx_xvexth_qu_du((v4u64)_1); } #define __lasx_xvrotri_b(/*__m256i*/ _1, /*ui3*/ _2) \ ((__m256i)__builtin_lasx_xvrotri_b((v32i8)(_1), (_2))) #define __lasx_xvrotri_h(/*__m256i*/ _1, /*ui4*/ _2) \ ((__m256i)__builtin_lasx_xvrotri_h((v16i16)(_1), (_2))) #define __lasx_xvrotri_w(/*__m256i*/ _1, /*ui5*/ _2) \ ((__m256i)__builtin_lasx_xvrotri_w((v8i32)(_1), (_2))) #define __lasx_xvrotri_d(/*__m256i*/ _1, /*ui6*/ _2) \ ((__m256i)__builtin_lasx_xvrotri_d((v4i64)(_1), (_2))) extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvextl_q_d(__m256i _1) { return (__m256i)__builtin_lasx_xvextl_q_d((v4i64)_1); } #define __lasx_xvsrlni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ((__m256i)__builtin_lasx_xvsrlni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) #define __lasx_xvsrlni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ((__m256i)__builtin_lasx_xvsrlni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) #define __lasx_xvsrlni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ((__m256i)__builtin_lasx_xvsrlni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) #define __lasx_xvsrlni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ((__m256i)__builtin_lasx_xvsrlni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) #define __lasx_xvsrlrni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ((__m256i)__builtin_lasx_xvsrlrni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) #define __lasx_xvsrlrni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ((__m256i)__builtin_lasx_xvsrlrni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) #define __lasx_xvsrlrni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ((__m256i)__builtin_lasx_xvsrlrni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) #define __lasx_xvsrlrni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ((__m256i)__builtin_lasx_xvsrlrni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) #define __lasx_xvssrlni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ((__m256i)__builtin_lasx_xvssrlni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) #define __lasx_xvssrlni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ((__m256i)__builtin_lasx_xvssrlni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) #define __lasx_xvssrlni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ((__m256i)__builtin_lasx_xvssrlni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) #define __lasx_xvssrlni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ((__m256i)__builtin_lasx_xvssrlni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) #define __lasx_xvssrlni_bu_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ((__m256i)__builtin_lasx_xvssrlni_bu_h((v32u8)(_1), (v32i8)(_2), (_3))) #define __lasx_xvssrlni_hu_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ((__m256i)__builtin_lasx_xvssrlni_hu_w((v16u16)(_1), (v16i16)(_2), (_3))) #define __lasx_xvssrlni_wu_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ((__m256i)__builtin_lasx_xvssrlni_wu_d((v8u32)(_1), (v8i32)(_2), (_3))) #define __lasx_xvssrlni_du_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ((__m256i)__builtin_lasx_xvssrlni_du_q((v4u64)(_1), (v4i64)(_2), (_3))) #define __lasx_xvssrlrni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ((__m256i)__builtin_lasx_xvssrlrni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) #define __lasx_xvssrlrni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ((__m256i)__builtin_lasx_xvssrlrni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) #define __lasx_xvssrlrni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ((__m256i)__builtin_lasx_xvssrlrni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) #define __lasx_xvssrlrni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ((__m256i)__builtin_lasx_xvssrlrni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) #define __lasx_xvssrlrni_bu_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ((__m256i)__builtin_lasx_xvssrlrni_bu_h((v32u8)(_1), (v32i8)(_2), (_3))) #define __lasx_xvssrlrni_hu_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ((__m256i)__builtin_lasx_xvssrlrni_hu_w((v16u16)(_1), (v16i16)(_2), (_3))) #define __lasx_xvssrlrni_wu_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ((__m256i)__builtin_lasx_xvssrlrni_wu_d((v8u32)(_1), (v8i32)(_2), (_3))) #define __lasx_xvssrlrni_du_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ((__m256i)__builtin_lasx_xvssrlrni_du_q((v4u64)(_1), (v4i64)(_2), (_3))) #define __lasx_xvsrani_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ((__m256i)__builtin_lasx_xvsrani_b_h((v32i8)(_1), (v32i8)(_2), (_3))) #define __lasx_xvsrani_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ((__m256i)__builtin_lasx_xvsrani_h_w((v16i16)(_1), (v16i16)(_2), (_3))) #define __lasx_xvsrani_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ((__m256i)__builtin_lasx_xvsrani_w_d((v8i32)(_1), (v8i32)(_2), (_3))) #define __lasx_xvsrani_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ((__m256i)__builtin_lasx_xvsrani_d_q((v4i64)(_1), (v4i64)(_2), (_3))) #define __lasx_xvsrarni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ((__m256i)__builtin_lasx_xvsrarni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) #define __lasx_xvsrarni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ((__m256i)__builtin_lasx_xvsrarni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) #define __lasx_xvsrarni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ((__m256i)__builtin_lasx_xvsrarni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) #define __lasx_xvsrarni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ((__m256i)__builtin_lasx_xvsrarni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) #define __lasx_xvssrani_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ((__m256i)__builtin_lasx_xvssrani_b_h((v32i8)(_1), (v32i8)(_2), (_3))) #define __lasx_xvssrani_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ((__m256i)__builtin_lasx_xvssrani_h_w((v16i16)(_1), (v16i16)(_2), (_3))) #define __lasx_xvssrani_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ((__m256i)__builtin_lasx_xvssrani_w_d((v8i32)(_1), (v8i32)(_2), (_3))) #define __lasx_xvssrani_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ((__m256i)__builtin_lasx_xvssrani_d_q((v4i64)(_1), (v4i64)(_2), (_3))) #define __lasx_xvssrani_bu_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ((__m256i)__builtin_lasx_xvssrani_bu_h((v32u8)(_1), (v32i8)(_2), (_3))) #define __lasx_xvssrani_hu_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ((__m256i)__builtin_lasx_xvssrani_hu_w((v16u16)(_1), (v16i16)(_2), (_3))) #define __lasx_xvssrani_wu_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ((__m256i)__builtin_lasx_xvssrani_wu_d((v8u32)(_1), (v8i32)(_2), (_3))) #define __lasx_xvssrani_du_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ((__m256i)__builtin_lasx_xvssrani_du_q((v4u64)(_1), (v4i64)(_2), (_3))) #define __lasx_xvssrarni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ((__m256i)__builtin_lasx_xvssrarni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) #define __lasx_xvssrarni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ((__m256i)__builtin_lasx_xvssrarni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) #define __lasx_xvssrarni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ((__m256i)__builtin_lasx_xvssrarni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) #define __lasx_xvssrarni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ((__m256i)__builtin_lasx_xvssrarni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) #define __lasx_xvssrarni_bu_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ((__m256i)__builtin_lasx_xvssrarni_bu_h((v32u8)(_1), (v32i8)(_2), (_3))) #define __lasx_xvssrarni_hu_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ((__m256i)__builtin_lasx_xvssrarni_hu_w((v16u16)(_1), (v16i16)(_2), (_3))) #define __lasx_xvssrarni_wu_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ((__m256i)__builtin_lasx_xvssrarni_wu_d((v8u32)(_1), (v8i32)(_2), (_3))) #define __lasx_xvssrarni_du_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ((__m256i)__builtin_lasx_xvssrarni_du_q((v4u64)(_1), (v4i64)(_2), (_3))) #define __lasx_xbnz_b(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_b((v32u8)(_1))) #define __lasx_xbnz_d(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_d((v4u64)(_1))) #define __lasx_xbnz_h(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_h((v16u16)(_1))) #define __lasx_xbnz_v(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_v((v32u8)(_1))) #define __lasx_xbnz_w(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_w((v8u32)(_1))) #define __lasx_xbz_b(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_b((v32u8)(_1))) #define __lasx_xbz_d(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_d((v4u64)(_1))) #define __lasx_xbz_h(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_h((v16u16)(_1))) #define __lasx_xbz_v(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_v((v32u8)(_1))) #define __lasx_xbz_w(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_w((v8u32)(_1))) extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvfcmp_caf_d(__m256d _1, __m256d _2) { return (__m256i)__builtin_lasx_xvfcmp_caf_d((v4f64)_1, (v4f64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvfcmp_caf_s(__m256 _1, __m256 _2) { return (__m256i)__builtin_lasx_xvfcmp_caf_s((v8f32)_1, (v8f32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvfcmp_ceq_d(__m256d _1, __m256d _2) { return (__m256i)__builtin_lasx_xvfcmp_ceq_d((v4f64)_1, (v4f64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvfcmp_ceq_s(__m256 _1, __m256 _2) { return (__m256i)__builtin_lasx_xvfcmp_ceq_s((v8f32)_1, (v8f32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvfcmp_cle_d(__m256d _1, __m256d _2) { return (__m256i)__builtin_lasx_xvfcmp_cle_d((v4f64)_1, (v4f64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvfcmp_cle_s(__m256 _1, __m256 _2) { return (__m256i)__builtin_lasx_xvfcmp_cle_s((v8f32)_1, (v8f32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvfcmp_clt_d(__m256d _1, __m256d _2) { return (__m256i)__builtin_lasx_xvfcmp_clt_d((v4f64)_1, (v4f64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvfcmp_clt_s(__m256 _1, __m256 _2) { return (__m256i)__builtin_lasx_xvfcmp_clt_s((v8f32)_1, (v8f32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvfcmp_cne_d(__m256d _1, __m256d _2) { return (__m256i)__builtin_lasx_xvfcmp_cne_d((v4f64)_1, (v4f64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvfcmp_cne_s(__m256 _1, __m256 _2) { return (__m256i)__builtin_lasx_xvfcmp_cne_s((v8f32)_1, (v8f32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvfcmp_cor_d(__m256d _1, __m256d _2) { return (__m256i)__builtin_lasx_xvfcmp_cor_d((v4f64)_1, (v4f64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvfcmp_cor_s(__m256 _1, __m256 _2) { return (__m256i)__builtin_lasx_xvfcmp_cor_s((v8f32)_1, (v8f32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvfcmp_cueq_d(__m256d _1, __m256d _2) { return (__m256i)__builtin_lasx_xvfcmp_cueq_d((v4f64)_1, (v4f64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvfcmp_cueq_s(__m256 _1, __m256 _2) { return (__m256i)__builtin_lasx_xvfcmp_cueq_s((v8f32)_1, (v8f32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvfcmp_cule_d(__m256d _1, __m256d _2) { return (__m256i)__builtin_lasx_xvfcmp_cule_d((v4f64)_1, (v4f64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvfcmp_cule_s(__m256 _1, __m256 _2) { return (__m256i)__builtin_lasx_xvfcmp_cule_s((v8f32)_1, (v8f32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvfcmp_cult_d(__m256d _1, __m256d _2) { return (__m256i)__builtin_lasx_xvfcmp_cult_d((v4f64)_1, (v4f64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvfcmp_cult_s(__m256 _1, __m256 _2) { return (__m256i)__builtin_lasx_xvfcmp_cult_s((v8f32)_1, (v8f32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvfcmp_cun_d(__m256d _1, __m256d _2) { return (__m256i)__builtin_lasx_xvfcmp_cun_d((v4f64)_1, (v4f64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvfcmp_cune_d(__m256d _1, __m256d _2) { return (__m256i)__builtin_lasx_xvfcmp_cune_d((v4f64)_1, (v4f64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvfcmp_cune_s(__m256 _1, __m256 _2) { return (__m256i)__builtin_lasx_xvfcmp_cune_s((v8f32)_1, (v8f32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvfcmp_cun_s(__m256 _1, __m256 _2) { return (__m256i)__builtin_lasx_xvfcmp_cun_s((v8f32)_1, (v8f32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvfcmp_saf_d(__m256d _1, __m256d _2) { return (__m256i)__builtin_lasx_xvfcmp_saf_d((v4f64)_1, (v4f64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvfcmp_saf_s(__m256 _1, __m256 _2) { return (__m256i)__builtin_lasx_xvfcmp_saf_s((v8f32)_1, (v8f32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvfcmp_seq_d(__m256d _1, __m256d _2) { return (__m256i)__builtin_lasx_xvfcmp_seq_d((v4f64)_1, (v4f64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvfcmp_seq_s(__m256 _1, __m256 _2) { return (__m256i)__builtin_lasx_xvfcmp_seq_s((v8f32)_1, (v8f32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvfcmp_sle_d(__m256d _1, __m256d _2) { return (__m256i)__builtin_lasx_xvfcmp_sle_d((v4f64)_1, (v4f64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvfcmp_sle_s(__m256 _1, __m256 _2) { return (__m256i)__builtin_lasx_xvfcmp_sle_s((v8f32)_1, (v8f32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvfcmp_slt_d(__m256d _1, __m256d _2) { return (__m256i)__builtin_lasx_xvfcmp_slt_d((v4f64)_1, (v4f64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvfcmp_slt_s(__m256 _1, __m256 _2) { return (__m256i)__builtin_lasx_xvfcmp_slt_s((v8f32)_1, (v8f32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvfcmp_sne_d(__m256d _1, __m256d _2) { return (__m256i)__builtin_lasx_xvfcmp_sne_d((v4f64)_1, (v4f64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvfcmp_sne_s(__m256 _1, __m256 _2) { return (__m256i)__builtin_lasx_xvfcmp_sne_s((v8f32)_1, (v8f32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvfcmp_sor_d(__m256d _1, __m256d _2) { return (__m256i)__builtin_lasx_xvfcmp_sor_d((v4f64)_1, (v4f64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvfcmp_sor_s(__m256 _1, __m256 _2) { return (__m256i)__builtin_lasx_xvfcmp_sor_s((v8f32)_1, (v8f32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvfcmp_sueq_d(__m256d _1, __m256d _2) { return (__m256i)__builtin_lasx_xvfcmp_sueq_d((v4f64)_1, (v4f64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvfcmp_sueq_s(__m256 _1, __m256 _2) { return (__m256i)__builtin_lasx_xvfcmp_sueq_s((v8f32)_1, (v8f32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvfcmp_sule_d(__m256d _1, __m256d _2) { return (__m256i)__builtin_lasx_xvfcmp_sule_d((v4f64)_1, (v4f64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvfcmp_sule_s(__m256 _1, __m256 _2) { return (__m256i)__builtin_lasx_xvfcmp_sule_s((v8f32)_1, (v8f32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvfcmp_sult_d(__m256d _1, __m256d _2) { return (__m256i)__builtin_lasx_xvfcmp_sult_d((v4f64)_1, (v4f64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvfcmp_sult_s(__m256 _1, __m256 _2) { return (__m256i)__builtin_lasx_xvfcmp_sult_s((v8f32)_1, (v8f32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvfcmp_sun_d(__m256d _1, __m256d _2) { return (__m256i)__builtin_lasx_xvfcmp_sun_d((v4f64)_1, (v4f64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvfcmp_sune_d(__m256d _1, __m256d _2) { return (__m256i)__builtin_lasx_xvfcmp_sune_d((v4f64)_1, (v4f64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvfcmp_sune_s(__m256 _1, __m256 _2) { return (__m256i)__builtin_lasx_xvfcmp_sune_s((v8f32)_1, (v8f32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvfcmp_sun_s(__m256 _1, __m256 _2) { return (__m256i)__builtin_lasx_xvfcmp_sun_s((v8f32)_1, (v8f32)_2); } #define __lasx_xvpickve_d_f(/*__m256d*/ _1, /*ui2*/ _2) \ ((__m256d)__builtin_lasx_xvpickve_d_f((v4f64)(_1), (_2))) #define __lasx_xvpickve_w_f(/*__m256*/ _1, /*ui3*/ _2) \ ((__m256)__builtin_lasx_xvpickve_w_f((v8f32)(_1), (_2))) #define __lasx_xvrepli_b(/*si10*/ _1) ((__m256i)__builtin_lasx_xvrepli_b((_1))) #define __lasx_xvrepli_d(/*si10*/ _1) ((__m256i)__builtin_lasx_xvrepli_d((_1))) #define __lasx_xvrepli_h(/*si10*/ _1) ((__m256i)__builtin_lasx_xvrepli_h((_1))) #define __lasx_xvrepli_w(/*si10*/ _1) ((__m256i)__builtin_lasx_xvrepli_w((_1))) #endif /* defined(__loongarch_asx). */ #endif /* _LOONGSON_ASXINTRIN_H. */ /*===---- limits.h - Standard header for integer sizes --------------------===*\ * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * \*===----------------------------------------------------------------------===*/ #ifndef __CLANG_LIMITS_H #define __CLANG_LIMITS_H /* The system's limits.h may, in turn, try to #include_next GCC's limits.h. Avert this #include_next madness. */ #if defined __GNUC__ && !defined _GCC_LIMITS_H_ #define _GCC_LIMITS_H_ #endif /* System headers include a number of constants from POSIX in . Include it if we're hosted. */ #if __STDC_HOSTED__ && __has_include_next() #include_next #endif /* Many system headers try to "help us out" by defining these. No really, we know how big each datatype is. */ #undef SCHAR_MIN #undef SCHAR_MAX #undef UCHAR_MAX #undef SHRT_MIN #undef SHRT_MAX #undef USHRT_MAX #undef INT_MIN #undef INT_MAX #undef UINT_MAX #undef LONG_MIN #undef LONG_MAX #undef ULONG_MAX #undef CHAR_BIT #undef CHAR_MIN #undef CHAR_MAX /* C90/99 5.2.4.2.1 */ #define SCHAR_MAX __SCHAR_MAX__ #define SHRT_MAX __SHRT_MAX__ #define INT_MAX __INT_MAX__ #define LONG_MAX __LONG_MAX__ #define SCHAR_MIN (-__SCHAR_MAX__-1) #define SHRT_MIN (-__SHRT_MAX__ -1) #define INT_MIN (-__INT_MAX__ -1) #define LONG_MIN (-__LONG_MAX__ -1L) #define UCHAR_MAX (__SCHAR_MAX__*2 +1) #if __SHRT_WIDTH__ < __INT_WIDTH__ #define USHRT_MAX (__SHRT_MAX__ * 2 + 1) #else #define USHRT_MAX (__SHRT_MAX__ * 2U + 1U) #endif #define UINT_MAX (__INT_MAX__ *2U +1U) #define ULONG_MAX (__LONG_MAX__ *2UL+1UL) #ifndef MB_LEN_MAX #define MB_LEN_MAX 1 #endif #define CHAR_BIT __CHAR_BIT__ /* C23 5.2.4.2.1 */ #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L #define BOOL_WIDTH __BOOL_WIDTH__ #define CHAR_WIDTH CHAR_BIT #define SCHAR_WIDTH CHAR_BIT #define UCHAR_WIDTH CHAR_BIT #define USHRT_WIDTH __SHRT_WIDTH__ #define SHRT_WIDTH __SHRT_WIDTH__ #define UINT_WIDTH __INT_WIDTH__ #define INT_WIDTH __INT_WIDTH__ #define ULONG_WIDTH __LONG_WIDTH__ #define LONG_WIDTH __LONG_WIDTH__ #define ULLONG_WIDTH __LLONG_WIDTH__ #define LLONG_WIDTH __LLONG_WIDTH__ #define BITINT_MAXWIDTH __BITINT_MAXWIDTH__ #endif #ifdef __CHAR_UNSIGNED__ /* -funsigned-char */ #define CHAR_MIN 0 #define CHAR_MAX UCHAR_MAX #else #define CHAR_MIN SCHAR_MIN #define CHAR_MAX __SCHAR_MAX__ #endif /* C99 5.2.4.2.1: Added long long. C++11 18.3.3.2: same contents as the Standard C Library header . */ #if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || \ (defined(__cplusplus) && __cplusplus >= 201103L) #undef LLONG_MIN #undef LLONG_MAX #undef ULLONG_MAX #define LLONG_MAX __LONG_LONG_MAX__ #define LLONG_MIN (-__LONG_LONG_MAX__-1LL) #define ULLONG_MAX (__LONG_LONG_MAX__*2ULL+1ULL) #endif /* LONG_LONG_MIN/LONG_LONG_MAX/ULONG_LONG_MAX are a GNU extension. It's too bad that we don't have something like #pragma poison that could be used to deprecate a macro - the code should just use LLONG_MAX and friends. */ #if defined(__GNU_LIBRARY__) ? defined(__USE_GNU) : !defined(__STRICT_ANSI__) #undef LONG_LONG_MIN #undef LONG_LONG_MAX #undef ULONG_LONG_MAX #define LONG_LONG_MAX __LONG_LONG_MAX__ #define LONG_LONG_MIN (-__LONG_LONG_MAX__-1LL) #define ULONG_LONG_MAX (__LONG_LONG_MAX__*2ULL+1ULL) #endif #endif /* __CLANG_LIMITS_H */ /*===---- mwaitxintrin.h - MONITORX/MWAITX intrinsics ----------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __X86INTRIN_H #error "Never use directly; include instead." #endif #ifndef __MWAITXINTRIN_H #define __MWAITXINTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("mwaitx"))) /// Establishes a linear address memory range to be monitored and puts /// the processor in the monitor event pending state. Data stored in the /// monitored address range causes the processor to exit the pending state. /// /// \headerfile /// /// This intrinsic corresponds to the \c MONITORX instruction. /// /// \param __p /// The memory range to be monitored. The size of the range is determined by /// CPUID function 0000_0005h. /// \param __extensions /// Optional extensions for the monitoring state. /// \param __hints /// Optional hints for the monitoring state. static __inline__ void __DEFAULT_FN_ATTRS _mm_monitorx(void * __p, unsigned __extensions, unsigned __hints) { __builtin_ia32_monitorx(__p, __extensions, __hints); } /// Used with the \c MONITORX instruction to wait while the processor is in /// the monitor event pending state. Data stored in the monitored address /// range, or an interrupt, causes the processor to exit the pending state. /// /// \headerfile /// /// This intrinsic corresponds to the \c MWAITX instruction. /// /// \param __extensions /// Optional extensions for the monitoring state, which can vary by /// processor. /// \param __hints /// Optional hints for the monitoring state, which can vary by processor. static __inline__ void __DEFAULT_FN_ATTRS _mm_mwaitx(unsigned __extensions, unsigned __hints, unsigned __clock) { __builtin_ia32_mwaitx(__extensions, __hints, __clock); } #undef __DEFAULT_FN_ATTRS #endif /* __MWAITXINTRIN_H */ /*===---- rdpruintrin.h - RDPRU intrinsics ---------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #if !defined __X86INTRIN_H #error "Never use directly; include instead." #endif #ifndef __RDPRUINTRIN_H #define __RDPRUINTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("rdpru"))) /// Reads the content of a processor register. /// /// \headerfile /// /// This intrinsic corresponds to the RDPRU instruction. /// /// \param reg_id /// A processor register identifier. static __inline__ unsigned long long __DEFAULT_FN_ATTRS __rdpru (int reg_id) { return __builtin_ia32_rdpru(reg_id); } #define __RDPRU_MPERF 0 #define __RDPRU_APERF 1 /// Reads the content of processor register MPERF. /// /// \headerfile /// /// This intrinsic generates instruction RDPRU to read the value of /// register MPERF. #define __mperf() __builtin_ia32_rdpru(__RDPRU_MPERF) /// Reads the content of processor register APERF. /// /// \headerfile /// /// This intrinsic generates instruction RDPRU to read the value of /// register APERF. #define __aperf() __builtin_ia32_rdpru(__RDPRU_APERF) #undef __DEFAULT_FN_ATTRS #endif /* __RDPRUINTRIN_H */ /*===---- tgmath.h - Standard header for type generic math ----------------===*\ * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * \*===----------------------------------------------------------------------===*/ #ifndef __CLANG_TGMATH_H #define __CLANG_TGMATH_H /* C99 7.22 Type-generic math . */ #include /* * Allow additional definitions and implementation-defined values on Apple * platforms. This is done after #include to avoid depcycle conflicts * between libcxx and darwin in C++ modules builds. */ #if defined(__APPLE__) && __STDC_HOSTED__ && __has_include_next() # include_next #else /* C++ handles type genericity with overloading in math.h. */ #ifndef __cplusplus #include #define _TG_ATTRSp __attribute__((__overloadable__)) #define _TG_ATTRS __attribute__((__overloadable__, __always_inline__)) // promotion typedef void _Argument_type_is_not_arithmetic; static _Argument_type_is_not_arithmetic __tg_promote(...) __attribute__((__unavailable__,__overloadable__)); static double _TG_ATTRSp __tg_promote(int); static double _TG_ATTRSp __tg_promote(unsigned int); static double _TG_ATTRSp __tg_promote(long); static double _TG_ATTRSp __tg_promote(unsigned long); static double _TG_ATTRSp __tg_promote(long long); static double _TG_ATTRSp __tg_promote(unsigned long long); static float _TG_ATTRSp __tg_promote(float); static double _TG_ATTRSp __tg_promote(double); static long double _TG_ATTRSp __tg_promote(long double); static float _Complex _TG_ATTRSp __tg_promote(float _Complex); static double _Complex _TG_ATTRSp __tg_promote(double _Complex); static long double _Complex _TG_ATTRSp __tg_promote(long double _Complex); #define __tg_promote1(__x) (__typeof__(__tg_promote(__x))) #define __tg_promote2(__x, __y) (__typeof__(__tg_promote(__x) + \ __tg_promote(__y))) #define __tg_promote3(__x, __y, __z) (__typeof__(__tg_promote(__x) + \ __tg_promote(__y) + \ __tg_promote(__z))) // acos static float _TG_ATTRS __tg_acos(float __x) {return acosf(__x);} static double _TG_ATTRS __tg_acos(double __x) {return acos(__x);} static long double _TG_ATTRS __tg_acos(long double __x) {return acosl(__x);} static float _Complex _TG_ATTRS __tg_acos(float _Complex __x) {return cacosf(__x);} static double _Complex _TG_ATTRS __tg_acos(double _Complex __x) {return cacos(__x);} static long double _Complex _TG_ATTRS __tg_acos(long double _Complex __x) {return cacosl(__x);} #undef acos #define acos(__x) __tg_acos(__tg_promote1((__x))(__x)) // asin static float _TG_ATTRS __tg_asin(float __x) {return asinf(__x);} static double _TG_ATTRS __tg_asin(double __x) {return asin(__x);} static long double _TG_ATTRS __tg_asin(long double __x) {return asinl(__x);} static float _Complex _TG_ATTRS __tg_asin(float _Complex __x) {return casinf(__x);} static double _Complex _TG_ATTRS __tg_asin(double _Complex __x) {return casin(__x);} static long double _Complex _TG_ATTRS __tg_asin(long double _Complex __x) {return casinl(__x);} #undef asin #define asin(__x) __tg_asin(__tg_promote1((__x))(__x)) // atan static float _TG_ATTRS __tg_atan(float __x) {return atanf(__x);} static double _TG_ATTRS __tg_atan(double __x) {return atan(__x);} static long double _TG_ATTRS __tg_atan(long double __x) {return atanl(__x);} static float _Complex _TG_ATTRS __tg_atan(float _Complex __x) {return catanf(__x);} static double _Complex _TG_ATTRS __tg_atan(double _Complex __x) {return catan(__x);} static long double _Complex _TG_ATTRS __tg_atan(long double _Complex __x) {return catanl(__x);} #undef atan #define atan(__x) __tg_atan(__tg_promote1((__x))(__x)) // acosh static float _TG_ATTRS __tg_acosh(float __x) {return acoshf(__x);} static double _TG_ATTRS __tg_acosh(double __x) {return acosh(__x);} static long double _TG_ATTRS __tg_acosh(long double __x) {return acoshl(__x);} static float _Complex _TG_ATTRS __tg_acosh(float _Complex __x) {return cacoshf(__x);} static double _Complex _TG_ATTRS __tg_acosh(double _Complex __x) {return cacosh(__x);} static long double _Complex _TG_ATTRS __tg_acosh(long double _Complex __x) {return cacoshl(__x);} #undef acosh #define acosh(__x) __tg_acosh(__tg_promote1((__x))(__x)) // asinh static float _TG_ATTRS __tg_asinh(float __x) {return asinhf(__x);} static double _TG_ATTRS __tg_asinh(double __x) {return asinh(__x);} static long double _TG_ATTRS __tg_asinh(long double __x) {return asinhl(__x);} static float _Complex _TG_ATTRS __tg_asinh(float _Complex __x) {return casinhf(__x);} static double _Complex _TG_ATTRS __tg_asinh(double _Complex __x) {return casinh(__x);} static long double _Complex _TG_ATTRS __tg_asinh(long double _Complex __x) {return casinhl(__x);} #undef asinh #define asinh(__x) __tg_asinh(__tg_promote1((__x))(__x)) // atanh static float _TG_ATTRS __tg_atanh(float __x) {return atanhf(__x);} static double _TG_ATTRS __tg_atanh(double __x) {return atanh(__x);} static long double _TG_ATTRS __tg_atanh(long double __x) {return atanhl(__x);} static float _Complex _TG_ATTRS __tg_atanh(float _Complex __x) {return catanhf(__x);} static double _Complex _TG_ATTRS __tg_atanh(double _Complex __x) {return catanh(__x);} static long double _Complex _TG_ATTRS __tg_atanh(long double _Complex __x) {return catanhl(__x);} #undef atanh #define atanh(__x) __tg_atanh(__tg_promote1((__x))(__x)) // cos static float _TG_ATTRS __tg_cos(float __x) {return cosf(__x);} static double _TG_ATTRS __tg_cos(double __x) {return cos(__x);} static long double _TG_ATTRS __tg_cos(long double __x) {return cosl(__x);} static float _Complex _TG_ATTRS __tg_cos(float _Complex __x) {return ccosf(__x);} static double _Complex _TG_ATTRS __tg_cos(double _Complex __x) {return ccos(__x);} static long double _Complex _TG_ATTRS __tg_cos(long double _Complex __x) {return ccosl(__x);} #undef cos #define cos(__x) __tg_cos(__tg_promote1((__x))(__x)) // sin static float _TG_ATTRS __tg_sin(float __x) {return sinf(__x);} static double _TG_ATTRS __tg_sin(double __x) {return sin(__x);} static long double _TG_ATTRS __tg_sin(long double __x) {return sinl(__x);} static float _Complex _TG_ATTRS __tg_sin(float _Complex __x) {return csinf(__x);} static double _Complex _TG_ATTRS __tg_sin(double _Complex __x) {return csin(__x);} static long double _Complex _TG_ATTRS __tg_sin(long double _Complex __x) {return csinl(__x);} #undef sin #define sin(__x) __tg_sin(__tg_promote1((__x))(__x)) // tan static float _TG_ATTRS __tg_tan(float __x) {return tanf(__x);} static double _TG_ATTRS __tg_tan(double __x) {return tan(__x);} static long double _TG_ATTRS __tg_tan(long double __x) {return tanl(__x);} static float _Complex _TG_ATTRS __tg_tan(float _Complex __x) {return ctanf(__x);} static double _Complex _TG_ATTRS __tg_tan(double _Complex __x) {return ctan(__x);} static long double _Complex _TG_ATTRS __tg_tan(long double _Complex __x) {return ctanl(__x);} #undef tan #define tan(__x) __tg_tan(__tg_promote1((__x))(__x)) // cosh static float _TG_ATTRS __tg_cosh(float __x) {return coshf(__x);} static double _TG_ATTRS __tg_cosh(double __x) {return cosh(__x);} static long double _TG_ATTRS __tg_cosh(long double __x) {return coshl(__x);} static float _Complex _TG_ATTRS __tg_cosh(float _Complex __x) {return ccoshf(__x);} static double _Complex _TG_ATTRS __tg_cosh(double _Complex __x) {return ccosh(__x);} static long double _Complex _TG_ATTRS __tg_cosh(long double _Complex __x) {return ccoshl(__x);} #undef cosh #define cosh(__x) __tg_cosh(__tg_promote1((__x))(__x)) // sinh static float _TG_ATTRS __tg_sinh(float __x) {return sinhf(__x);} static double _TG_ATTRS __tg_sinh(double __x) {return sinh(__x);} static long double _TG_ATTRS __tg_sinh(long double __x) {return sinhl(__x);} static float _Complex _TG_ATTRS __tg_sinh(float _Complex __x) {return csinhf(__x);} static double _Complex _TG_ATTRS __tg_sinh(double _Complex __x) {return csinh(__x);} static long double _Complex _TG_ATTRS __tg_sinh(long double _Complex __x) {return csinhl(__x);} #undef sinh #define sinh(__x) __tg_sinh(__tg_promote1((__x))(__x)) // tanh static float _TG_ATTRS __tg_tanh(float __x) {return tanhf(__x);} static double _TG_ATTRS __tg_tanh(double __x) {return tanh(__x);} static long double _TG_ATTRS __tg_tanh(long double __x) {return tanhl(__x);} static float _Complex _TG_ATTRS __tg_tanh(float _Complex __x) {return ctanhf(__x);} static double _Complex _TG_ATTRS __tg_tanh(double _Complex __x) {return ctanh(__x);} static long double _Complex _TG_ATTRS __tg_tanh(long double _Complex __x) {return ctanhl(__x);} #undef tanh #define tanh(__x) __tg_tanh(__tg_promote1((__x))(__x)) // exp static float _TG_ATTRS __tg_exp(float __x) {return expf(__x);} static double _TG_ATTRS __tg_exp(double __x) {return exp(__x);} static long double _TG_ATTRS __tg_exp(long double __x) {return expl(__x);} static float _Complex _TG_ATTRS __tg_exp(float _Complex __x) {return cexpf(__x);} static double _Complex _TG_ATTRS __tg_exp(double _Complex __x) {return cexp(__x);} static long double _Complex _TG_ATTRS __tg_exp(long double _Complex __x) {return cexpl(__x);} #undef exp #define exp(__x) __tg_exp(__tg_promote1((__x))(__x)) // log static float _TG_ATTRS __tg_log(float __x) {return logf(__x);} static double _TG_ATTRS __tg_log(double __x) {return log(__x);} static long double _TG_ATTRS __tg_log(long double __x) {return logl(__x);} static float _Complex _TG_ATTRS __tg_log(float _Complex __x) {return clogf(__x);} static double _Complex _TG_ATTRS __tg_log(double _Complex __x) {return clog(__x);} static long double _Complex _TG_ATTRS __tg_log(long double _Complex __x) {return clogl(__x);} #undef log #define log(__x) __tg_log(__tg_promote1((__x))(__x)) // pow static float _TG_ATTRS __tg_pow(float __x, float __y) {return powf(__x, __y);} static double _TG_ATTRS __tg_pow(double __x, double __y) {return pow(__x, __y);} static long double _TG_ATTRS __tg_pow(long double __x, long double __y) {return powl(__x, __y);} static float _Complex _TG_ATTRS __tg_pow(float _Complex __x, float _Complex __y) {return cpowf(__x, __y);} static double _Complex _TG_ATTRS __tg_pow(double _Complex __x, double _Complex __y) {return cpow(__x, __y);} static long double _Complex _TG_ATTRS __tg_pow(long double _Complex __x, long double _Complex __y) {return cpowl(__x, __y);} #undef pow #define pow(__x, __y) __tg_pow(__tg_promote2((__x), (__y))(__x), \ __tg_promote2((__x), (__y))(__y)) // sqrt static float _TG_ATTRS __tg_sqrt(float __x) {return sqrtf(__x);} static double _TG_ATTRS __tg_sqrt(double __x) {return sqrt(__x);} static long double _TG_ATTRS __tg_sqrt(long double __x) {return sqrtl(__x);} static float _Complex _TG_ATTRS __tg_sqrt(float _Complex __x) {return csqrtf(__x);} static double _Complex _TG_ATTRS __tg_sqrt(double _Complex __x) {return csqrt(__x);} static long double _Complex _TG_ATTRS __tg_sqrt(long double _Complex __x) {return csqrtl(__x);} #undef sqrt #define sqrt(__x) __tg_sqrt(__tg_promote1((__x))(__x)) // fabs static float _TG_ATTRS __tg_fabs(float __x) {return fabsf(__x);} static double _TG_ATTRS __tg_fabs(double __x) {return fabs(__x);} static long double _TG_ATTRS __tg_fabs(long double __x) {return fabsl(__x);} static float _TG_ATTRS __tg_fabs(float _Complex __x) {return cabsf(__x);} static double _TG_ATTRS __tg_fabs(double _Complex __x) {return cabs(__x);} static long double _TG_ATTRS __tg_fabs(long double _Complex __x) {return cabsl(__x);} #undef fabs #define fabs(__x) __tg_fabs(__tg_promote1((__x))(__x)) // atan2 static float _TG_ATTRS __tg_atan2(float __x, float __y) {return atan2f(__x, __y);} static double _TG_ATTRS __tg_atan2(double __x, double __y) {return atan2(__x, __y);} static long double _TG_ATTRS __tg_atan2(long double __x, long double __y) {return atan2l(__x, __y);} #undef atan2 #define atan2(__x, __y) __tg_atan2(__tg_promote2((__x), (__y))(__x), \ __tg_promote2((__x), (__y))(__y)) // cbrt static float _TG_ATTRS __tg_cbrt(float __x) {return cbrtf(__x);} static double _TG_ATTRS __tg_cbrt(double __x) {return cbrt(__x);} static long double _TG_ATTRS __tg_cbrt(long double __x) {return cbrtl(__x);} #undef cbrt #define cbrt(__x) __tg_cbrt(__tg_promote1((__x))(__x)) // ceil static float _TG_ATTRS __tg_ceil(float __x) {return ceilf(__x);} static double _TG_ATTRS __tg_ceil(double __x) {return ceil(__x);} static long double _TG_ATTRS __tg_ceil(long double __x) {return ceill(__x);} #undef ceil #define ceil(__x) __tg_ceil(__tg_promote1((__x))(__x)) // copysign static float _TG_ATTRS __tg_copysign(float __x, float __y) {return copysignf(__x, __y);} static double _TG_ATTRS __tg_copysign(double __x, double __y) {return copysign(__x, __y);} static long double _TG_ATTRS __tg_copysign(long double __x, long double __y) {return copysignl(__x, __y);} #undef copysign #define copysign(__x, __y) __tg_copysign(__tg_promote2((__x), (__y))(__x), \ __tg_promote2((__x), (__y))(__y)) // erf static float _TG_ATTRS __tg_erf(float __x) {return erff(__x);} static double _TG_ATTRS __tg_erf(double __x) {return erf(__x);} static long double _TG_ATTRS __tg_erf(long double __x) {return erfl(__x);} #undef erf #define erf(__x) __tg_erf(__tg_promote1((__x))(__x)) // erfc static float _TG_ATTRS __tg_erfc(float __x) {return erfcf(__x);} static double _TG_ATTRS __tg_erfc(double __x) {return erfc(__x);} static long double _TG_ATTRS __tg_erfc(long double __x) {return erfcl(__x);} #undef erfc #define erfc(__x) __tg_erfc(__tg_promote1((__x))(__x)) // exp2 static float _TG_ATTRS __tg_exp2(float __x) {return exp2f(__x);} static double _TG_ATTRS __tg_exp2(double __x) {return exp2(__x);} static long double _TG_ATTRS __tg_exp2(long double __x) {return exp2l(__x);} #undef exp2 #define exp2(__x) __tg_exp2(__tg_promote1((__x))(__x)) // expm1 static float _TG_ATTRS __tg_expm1(float __x) {return expm1f(__x);} static double _TG_ATTRS __tg_expm1(double __x) {return expm1(__x);} static long double _TG_ATTRS __tg_expm1(long double __x) {return expm1l(__x);} #undef expm1 #define expm1(__x) __tg_expm1(__tg_promote1((__x))(__x)) // fdim static float _TG_ATTRS __tg_fdim(float __x, float __y) {return fdimf(__x, __y);} static double _TG_ATTRS __tg_fdim(double __x, double __y) {return fdim(__x, __y);} static long double _TG_ATTRS __tg_fdim(long double __x, long double __y) {return fdiml(__x, __y);} #undef fdim #define fdim(__x, __y) __tg_fdim(__tg_promote2((__x), (__y))(__x), \ __tg_promote2((__x), (__y))(__y)) // floor static float _TG_ATTRS __tg_floor(float __x) {return floorf(__x);} static double _TG_ATTRS __tg_floor(double __x) {return floor(__x);} static long double _TG_ATTRS __tg_floor(long double __x) {return floorl(__x);} #undef floor #define floor(__x) __tg_floor(__tg_promote1((__x))(__x)) // fma static float _TG_ATTRS __tg_fma(float __x, float __y, float __z) {return fmaf(__x, __y, __z);} static double _TG_ATTRS __tg_fma(double __x, double __y, double __z) {return fma(__x, __y, __z);} static long double _TG_ATTRS __tg_fma(long double __x,long double __y, long double __z) {return fmal(__x, __y, __z);} #undef fma #define fma(__x, __y, __z) \ __tg_fma(__tg_promote3((__x), (__y), (__z))(__x), \ __tg_promote3((__x), (__y), (__z))(__y), \ __tg_promote3((__x), (__y), (__z))(__z)) // fmax static float _TG_ATTRS __tg_fmax(float __x, float __y) {return fmaxf(__x, __y);} static double _TG_ATTRS __tg_fmax(double __x, double __y) {return fmax(__x, __y);} static long double _TG_ATTRS __tg_fmax(long double __x, long double __y) {return fmaxl(__x, __y);} #undef fmax #define fmax(__x, __y) __tg_fmax(__tg_promote2((__x), (__y))(__x), \ __tg_promote2((__x), (__y))(__y)) // fmin static float _TG_ATTRS __tg_fmin(float __x, float __y) {return fminf(__x, __y);} static double _TG_ATTRS __tg_fmin(double __x, double __y) {return fmin(__x, __y);} static long double _TG_ATTRS __tg_fmin(long double __x, long double __y) {return fminl(__x, __y);} #undef fmin #define fmin(__x, __y) __tg_fmin(__tg_promote2((__x), (__y))(__x), \ __tg_promote2((__x), (__y))(__y)) // fmod static float _TG_ATTRS __tg_fmod(float __x, float __y) {return fmodf(__x, __y);} static double _TG_ATTRS __tg_fmod(double __x, double __y) {return fmod(__x, __y);} static long double _TG_ATTRS __tg_fmod(long double __x, long double __y) {return fmodl(__x, __y);} #undef fmod #define fmod(__x, __y) __tg_fmod(__tg_promote2((__x), (__y))(__x), \ __tg_promote2((__x), (__y))(__y)) // frexp static float _TG_ATTRS __tg_frexp(float __x, int* __y) {return frexpf(__x, __y);} static double _TG_ATTRS __tg_frexp(double __x, int* __y) {return frexp(__x, __y);} static long double _TG_ATTRS __tg_frexp(long double __x, int* __y) {return frexpl(__x, __y);} #undef frexp #define frexp(__x, __y) __tg_frexp(__tg_promote1((__x))(__x), __y) // hypot static float _TG_ATTRS __tg_hypot(float __x, float __y) {return hypotf(__x, __y);} static double _TG_ATTRS __tg_hypot(double __x, double __y) {return hypot(__x, __y);} static long double _TG_ATTRS __tg_hypot(long double __x, long double __y) {return hypotl(__x, __y);} #undef hypot #define hypot(__x, __y) __tg_hypot(__tg_promote2((__x), (__y))(__x), \ __tg_promote2((__x), (__y))(__y)) // ilogb static int _TG_ATTRS __tg_ilogb(float __x) {return ilogbf(__x);} static int _TG_ATTRS __tg_ilogb(double __x) {return ilogb(__x);} static int _TG_ATTRS __tg_ilogb(long double __x) {return ilogbl(__x);} #undef ilogb #define ilogb(__x) __tg_ilogb(__tg_promote1((__x))(__x)) // ldexp static float _TG_ATTRS __tg_ldexp(float __x, int __y) {return ldexpf(__x, __y);} static double _TG_ATTRS __tg_ldexp(double __x, int __y) {return ldexp(__x, __y);} static long double _TG_ATTRS __tg_ldexp(long double __x, int __y) {return ldexpl(__x, __y);} #undef ldexp #define ldexp(__x, __y) __tg_ldexp(__tg_promote1((__x))(__x), __y) // lgamma static float _TG_ATTRS __tg_lgamma(float __x) {return lgammaf(__x);} static double _TG_ATTRS __tg_lgamma(double __x) {return lgamma(__x);} static long double _TG_ATTRS __tg_lgamma(long double __x) {return lgammal(__x);} #undef lgamma #define lgamma(__x) __tg_lgamma(__tg_promote1((__x))(__x)) // llrint static long long _TG_ATTRS __tg_llrint(float __x) {return llrintf(__x);} static long long _TG_ATTRS __tg_llrint(double __x) {return llrint(__x);} static long long _TG_ATTRS __tg_llrint(long double __x) {return llrintl(__x);} #undef llrint #define llrint(__x) __tg_llrint(__tg_promote1((__x))(__x)) // llround static long long _TG_ATTRS __tg_llround(float __x) {return llroundf(__x);} static long long _TG_ATTRS __tg_llround(double __x) {return llround(__x);} static long long _TG_ATTRS __tg_llround(long double __x) {return llroundl(__x);} #undef llround #define llround(__x) __tg_llround(__tg_promote1((__x))(__x)) // log10 static float _TG_ATTRS __tg_log10(float __x) {return log10f(__x);} static double _TG_ATTRS __tg_log10(double __x) {return log10(__x);} static long double _TG_ATTRS __tg_log10(long double __x) {return log10l(__x);} #undef log10 #define log10(__x) __tg_log10(__tg_promote1((__x))(__x)) // log1p static float _TG_ATTRS __tg_log1p(float __x) {return log1pf(__x);} static double _TG_ATTRS __tg_log1p(double __x) {return log1p(__x);} static long double _TG_ATTRS __tg_log1p(long double __x) {return log1pl(__x);} #undef log1p #define log1p(__x) __tg_log1p(__tg_promote1((__x))(__x)) // log2 static float _TG_ATTRS __tg_log2(float __x) {return log2f(__x);} static double _TG_ATTRS __tg_log2(double __x) {return log2(__x);} static long double _TG_ATTRS __tg_log2(long double __x) {return log2l(__x);} #undef log2 #define log2(__x) __tg_log2(__tg_promote1((__x))(__x)) // logb static float _TG_ATTRS __tg_logb(float __x) {return logbf(__x);} static double _TG_ATTRS __tg_logb(double __x) {return logb(__x);} static long double _TG_ATTRS __tg_logb(long double __x) {return logbl(__x);} #undef logb #define logb(__x) __tg_logb(__tg_promote1((__x))(__x)) // lrint static long _TG_ATTRS __tg_lrint(float __x) {return lrintf(__x);} static long _TG_ATTRS __tg_lrint(double __x) {return lrint(__x);} static long _TG_ATTRS __tg_lrint(long double __x) {return lrintl(__x);} #undef lrint #define lrint(__x) __tg_lrint(__tg_promote1((__x))(__x)) // lround static long _TG_ATTRS __tg_lround(float __x) {return lroundf(__x);} static long _TG_ATTRS __tg_lround(double __x) {return lround(__x);} static long _TG_ATTRS __tg_lround(long double __x) {return lroundl(__x);} #undef lround #define lround(__x) __tg_lround(__tg_promote1((__x))(__x)) // nearbyint static float _TG_ATTRS __tg_nearbyint(float __x) {return nearbyintf(__x);} static double _TG_ATTRS __tg_nearbyint(double __x) {return nearbyint(__x);} static long double _TG_ATTRS __tg_nearbyint(long double __x) {return nearbyintl(__x);} #undef nearbyint #define nearbyint(__x) __tg_nearbyint(__tg_promote1((__x))(__x)) // nextafter static float _TG_ATTRS __tg_nextafter(float __x, float __y) {return nextafterf(__x, __y);} static double _TG_ATTRS __tg_nextafter(double __x, double __y) {return nextafter(__x, __y);} static long double _TG_ATTRS __tg_nextafter(long double __x, long double __y) {return nextafterl(__x, __y);} #undef nextafter #define nextafter(__x, __y) __tg_nextafter(__tg_promote2((__x), (__y))(__x), \ __tg_promote2((__x), (__y))(__y)) // nexttoward static float _TG_ATTRS __tg_nexttoward(float __x, long double __y) {return nexttowardf(__x, __y);} static double _TG_ATTRS __tg_nexttoward(double __x, long double __y) {return nexttoward(__x, __y);} static long double _TG_ATTRS __tg_nexttoward(long double __x, long double __y) {return nexttowardl(__x, __y);} #undef nexttoward #define nexttoward(__x, __y) __tg_nexttoward(__tg_promote1((__x))(__x), (__y)) // remainder static float _TG_ATTRS __tg_remainder(float __x, float __y) {return remainderf(__x, __y);} static double _TG_ATTRS __tg_remainder(double __x, double __y) {return remainder(__x, __y);} static long double _TG_ATTRS __tg_remainder(long double __x, long double __y) {return remainderl(__x, __y);} #undef remainder #define remainder(__x, __y) __tg_remainder(__tg_promote2((__x), (__y))(__x), \ __tg_promote2((__x), (__y))(__y)) // remquo static float _TG_ATTRS __tg_remquo(float __x, float __y, int* __z) {return remquof(__x, __y, __z);} static double _TG_ATTRS __tg_remquo(double __x, double __y, int* __z) {return remquo(__x, __y, __z);} static long double _TG_ATTRS __tg_remquo(long double __x,long double __y, int* __z) {return remquol(__x, __y, __z);} #undef remquo #define remquo(__x, __y, __z) \ __tg_remquo(__tg_promote2((__x), (__y))(__x), \ __tg_promote2((__x), (__y))(__y), \ (__z)) // rint static float _TG_ATTRS __tg_rint(float __x) {return rintf(__x);} static double _TG_ATTRS __tg_rint(double __x) {return rint(__x);} static long double _TG_ATTRS __tg_rint(long double __x) {return rintl(__x);} #undef rint #define rint(__x) __tg_rint(__tg_promote1((__x))(__x)) // round static float _TG_ATTRS __tg_round(float __x) {return roundf(__x);} static double _TG_ATTRS __tg_round(double __x) {return round(__x);} static long double _TG_ATTRS __tg_round(long double __x) {return roundl(__x);} #undef round #define round(__x) __tg_round(__tg_promote1((__x))(__x)) // scalbn static float _TG_ATTRS __tg_scalbn(float __x, int __y) {return scalbnf(__x, __y);} static double _TG_ATTRS __tg_scalbn(double __x, int __y) {return scalbn(__x, __y);} static long double _TG_ATTRS __tg_scalbn(long double __x, int __y) {return scalbnl(__x, __y);} #undef scalbn #define scalbn(__x, __y) __tg_scalbn(__tg_promote1((__x))(__x), __y) // scalbln static float _TG_ATTRS __tg_scalbln(float __x, long __y) {return scalblnf(__x, __y);} static double _TG_ATTRS __tg_scalbln(double __x, long __y) {return scalbln(__x, __y);} static long double _TG_ATTRS __tg_scalbln(long double __x, long __y) {return scalblnl(__x, __y);} #undef scalbln #define scalbln(__x, __y) __tg_scalbln(__tg_promote1((__x))(__x), __y) // tgamma static float _TG_ATTRS __tg_tgamma(float __x) {return tgammaf(__x);} static double _TG_ATTRS __tg_tgamma(double __x) {return tgamma(__x);} static long double _TG_ATTRS __tg_tgamma(long double __x) {return tgammal(__x);} #undef tgamma #define tgamma(__x) __tg_tgamma(__tg_promote1((__x))(__x)) // trunc static float _TG_ATTRS __tg_trunc(float __x) {return truncf(__x);} static double _TG_ATTRS __tg_trunc(double __x) {return trunc(__x);} static long double _TG_ATTRS __tg_trunc(long double __x) {return truncl(__x);} #undef trunc #define trunc(__x) __tg_trunc(__tg_promote1((__x))(__x)) // carg static float _TG_ATTRS __tg_carg(float __x) {return atan2f(0.F, __x);} static double _TG_ATTRS __tg_carg(double __x) {return atan2(0., __x);} static long double _TG_ATTRS __tg_carg(long double __x) {return atan2l(0.L, __x);} static float _TG_ATTRS __tg_carg(float _Complex __x) {return cargf(__x);} static double _TG_ATTRS __tg_carg(double _Complex __x) {return carg(__x);} static long double _TG_ATTRS __tg_carg(long double _Complex __x) {return cargl(__x);} #undef carg #define carg(__x) __tg_carg(__tg_promote1((__x))(__x)) // cimag static float _TG_ATTRS __tg_cimag(float __x) {return 0;} static double _TG_ATTRS __tg_cimag(double __x) {return 0;} static long double _TG_ATTRS __tg_cimag(long double __x) {return 0;} static float _TG_ATTRS __tg_cimag(float _Complex __x) {return cimagf(__x);} static double _TG_ATTRS __tg_cimag(double _Complex __x) {return cimag(__x);} static long double _TG_ATTRS __tg_cimag(long double _Complex __x) {return cimagl(__x);} #undef cimag #define cimag(__x) __tg_cimag(__tg_promote1((__x))(__x)) // conj static float _Complex _TG_ATTRS __tg_conj(float __x) {return __x;} static double _Complex _TG_ATTRS __tg_conj(double __x) {return __x;} static long double _Complex _TG_ATTRS __tg_conj(long double __x) {return __x;} static float _Complex _TG_ATTRS __tg_conj(float _Complex __x) {return conjf(__x);} static double _Complex _TG_ATTRS __tg_conj(double _Complex __x) {return conj(__x);} static long double _Complex _TG_ATTRS __tg_conj(long double _Complex __x) {return conjl(__x);} #undef conj #define conj(__x) __tg_conj(__tg_promote1((__x))(__x)) // cproj static float _Complex _TG_ATTRS __tg_cproj(float __x) {return cprojf(__x);} static double _Complex _TG_ATTRS __tg_cproj(double __x) {return cproj(__x);} static long double _Complex _TG_ATTRS __tg_cproj(long double __x) {return cprojl(__x);} static float _Complex _TG_ATTRS __tg_cproj(float _Complex __x) {return cprojf(__x);} static double _Complex _TG_ATTRS __tg_cproj(double _Complex __x) {return cproj(__x);} static long double _Complex _TG_ATTRS __tg_cproj(long double _Complex __x) {return cprojl(__x);} #undef cproj #define cproj(__x) __tg_cproj(__tg_promote1((__x))(__x)) // creal static float _TG_ATTRS __tg_creal(float __x) {return __x;} static double _TG_ATTRS __tg_creal(double __x) {return __x;} static long double _TG_ATTRS __tg_creal(long double __x) {return __x;} static float _TG_ATTRS __tg_creal(float _Complex __x) {return crealf(__x);} static double _TG_ATTRS __tg_creal(double _Complex __x) {return creal(__x);} static long double _TG_ATTRS __tg_creal(long double _Complex __x) {return creall(__x);} #undef creal #define creal(__x) __tg_creal(__tg_promote1((__x))(__x)) #undef _TG_ATTRSp #undef _TG_ATTRS #endif /* __cplusplus */ #endif /* __has_include_next */ #endif /* __CLANG_TGMATH_H */ llvm_libc_wrappers/stdio.h/*===---- emmintrin.h - Implementation of SSE2 intrinsics on PowerPC -------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ /* Implemented from the specification included in the Intel C++ Compiler User Guide and Reference, version 9.0. */ #ifndef NO_WARN_X86_INTRINSICS /* This header file is to help porting code using Intel intrinsics explicitly from x86_64 to powerpc64/powerpc64le. Since X86 SSE2 intrinsics mainly handles __m128i and __m128d type, PowerPC VMX/VSX ISA is a good match for vector float SIMD operations. However scalar float operations in vector (XMM) registers require the POWER8 VSX ISA (2.07) level. There are differences for data format and placement of float scalars in the vector register, which require extra steps to match SSE2 scalar float semantics on POWER. It should be noted that there's much difference between X86_64's MXSCR and PowerISA's FPSCR/VSCR registers. It's recommended to use portable instead of access MXSCR directly. Most SSE2 scalar float intrinsic operations can be performed more efficiently as C language float scalar operations or optimized to use vector SIMD operations. We recommend this for new applications. */ #error \ "Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this error." #endif #ifndef EMMINTRIN_H_ #define EMMINTRIN_H_ #if defined(__powerpc64__) && \ (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) #include /* We need definitions from the SSE header files. */ #include /* SSE2 */ typedef __vector double __v2df; typedef __vector float __v4f; typedef __vector long long __v2di; typedef __vector unsigned long long __v2du; typedef __vector int __v4si; typedef __vector unsigned int __v4su; typedef __vector short __v8hi; typedef __vector unsigned short __v8hu; typedef __vector signed char __v16qi; typedef __vector unsigned char __v16qu; /* The Intel API is flexible enough that we must allow aliasing with other vector types, and their scalar components. */ typedef long long __m128i __attribute__((__vector_size__(16), __may_alias__)); typedef double __m128d __attribute__((__vector_size__(16), __may_alias__)); /* Unaligned version of the same types. */ typedef long long __m128i_u __attribute__((__vector_size__(16), __may_alias__, __aligned__(1))); typedef double __m128d_u __attribute__((__vector_size__(16), __may_alias__, __aligned__(1))); /* Define two value permute mask. */ #define _MM_SHUFFLE2(x, y) (((x) << 1) | (y)) /* Create a vector with element 0 as F and the rest zero. */ extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_set_sd(double __F) { return __extension__(__m128d){__F, 0.0}; } /* Create a vector with both elements equal to F. */ extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_set1_pd(double __F) { return __extension__(__m128d){__F, __F}; } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_set_pd1(double __F) { return _mm_set1_pd(__F); } /* Create a vector with the lower value X and upper value W. */ extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_set_pd(double __W, double __X) { return __extension__(__m128d){__X, __W}; } /* Create a vector with the lower value W and upper value X. */ extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_setr_pd(double __W, double __X) { return __extension__(__m128d){__W, __X}; } /* Create an undefined vector. */ extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_undefined_pd(void) { __m128d __Y = __Y; return __Y; } /* Create a vector of zeros. */ extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_setzero_pd(void) { return (__m128d)vec_splats(0); } /* Sets the low DPFP value of A from the low value of B. */ extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_move_sd(__m128d __A, __m128d __B) { __v2df __result = (__v2df)__A; __result[0] = ((__v2df)__B)[0]; return (__m128d)__result; } /* Load two DPFP values from P. The address must be 16-byte aligned. */ extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_load_pd(double const *__P) { return ((__m128d)vec_ld(0, (__v16qu *)__P)); } /* Load two DPFP values from P. The address need not be 16-byte aligned. */ extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_loadu_pd(double const *__P) { return (vec_vsx_ld(0, __P)); } /* Create a vector with all two elements equal to *P. */ extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_load1_pd(double const *__P) { return (vec_splats(*__P)); } /* Create a vector with element 0 as *P and the rest zero. */ extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_load_sd(double const *__P) { return _mm_set_sd(*__P); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_load_pd1(double const *__P) { return _mm_load1_pd(__P); } /* Load two DPFP values in reverse order. The address must be aligned. */ extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_loadr_pd(double const *__P) { __v2df __tmp = _mm_load_pd(__P); return (__m128d)vec_xxpermdi(__tmp, __tmp, 2); } /* Store two DPFP values. The address must be 16-byte aligned. */ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_store_pd(double *__P, __m128d __A) { vec_st((__v16qu)__A, 0, (__v16qu *)__P); } /* Store two DPFP values. The address need not be 16-byte aligned. */ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_storeu_pd(double *__P, __m128d __A) { *(__m128d_u *)__P = __A; } /* Stores the lower DPFP value. */ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_store_sd(double *__P, __m128d __A) { *__P = ((__v2df)__A)[0]; } extern __inline double __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtsd_f64(__m128d __A) { return ((__v2df)__A)[0]; } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_storel_pd(double *__P, __m128d __A) { _mm_store_sd(__P, __A); } /* Stores the upper DPFP value. */ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_storeh_pd(double *__P, __m128d __A) { *__P = ((__v2df)__A)[1]; } /* Store the lower DPFP value across two words. The address must be 16-byte aligned. */ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_store1_pd(double *__P, __m128d __A) { _mm_store_pd(__P, vec_splat(__A, 0)); } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_store_pd1(double *__P, __m128d __A) { _mm_store1_pd(__P, __A); } /* Store two DPFP values in reverse order. The address must be aligned. */ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_storer_pd(double *__P, __m128d __A) { _mm_store_pd(__P, vec_xxpermdi(__A, __A, 2)); } /* Intel intrinsic. */ extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtsi128_si64(__m128i __A) { return ((__v2di)__A)[0]; } /* Microsoft intrinsic. */ extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtsi128_si64x(__m128i __A) { return ((__v2di)__A)[0]; } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_add_pd(__m128d __A, __m128d __B) { return (__m128d)((__v2df)__A + (__v2df)__B); } /* Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. */ extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_add_sd(__m128d __A, __m128d __B) { __A[0] = __A[0] + __B[0]; return (__A); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sub_pd(__m128d __A, __m128d __B) { return (__m128d)((__v2df)__A - (__v2df)__B); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sub_sd(__m128d __A, __m128d __B) { __A[0] = __A[0] - __B[0]; return (__A); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_mul_pd(__m128d __A, __m128d __B) { return (__m128d)((__v2df)__A * (__v2df)__B); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_mul_sd(__m128d __A, __m128d __B) { __A[0] = __A[0] * __B[0]; return (__A); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_div_pd(__m128d __A, __m128d __B) { return (__m128d)((__v2df)__A / (__v2df)__B); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_div_sd(__m128d __A, __m128d __B) { __A[0] = __A[0] / __B[0]; return (__A); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sqrt_pd(__m128d __A) { return (vec_sqrt(__A)); } /* Return pair {sqrt (B[0]), A[1]}. */ extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sqrt_sd(__m128d __A, __m128d __B) { __v2df __c; __c = vec_sqrt((__v2df)_mm_set1_pd(__B[0])); return (__m128d)_mm_setr_pd(__c[0], __A[1]); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_min_pd(__m128d __A, __m128d __B) { return (vec_min(__A, __B)); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_min_sd(__m128d __A, __m128d __B) { __v2df __a, __b, __c; __a = vec_splats(__A[0]); __b = vec_splats(__B[0]); __c = vec_min(__a, __b); return (__m128d)_mm_setr_pd(__c[0], __A[1]); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_max_pd(__m128d __A, __m128d __B) { return (vec_max(__A, __B)); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_max_sd(__m128d __A, __m128d __B) { __v2df __a, __b, __c; __a = vec_splats(__A[0]); __b = vec_splats(__B[0]); __c = vec_max(__a, __b); return (__m128d)_mm_setr_pd(__c[0], __A[1]); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpeq_pd(__m128d __A, __m128d __B) { return ((__m128d)vec_cmpeq((__v2df)__A, (__v2df)__B)); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmplt_pd(__m128d __A, __m128d __B) { return ((__m128d)vec_cmplt((__v2df)__A, (__v2df)__B)); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmple_pd(__m128d __A, __m128d __B) { return ((__m128d)vec_cmple((__v2df)__A, (__v2df)__B)); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpgt_pd(__m128d __A, __m128d __B) { return ((__m128d)vec_cmpgt((__v2df)__A, (__v2df)__B)); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpge_pd(__m128d __A, __m128d __B) { return ((__m128d)vec_cmpge((__v2df)__A, (__v2df)__B)); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpneq_pd(__m128d __A, __m128d __B) { __v2df __temp = (__v2df)vec_cmpeq((__v2df)__A, (__v2df)__B); return ((__m128d)vec_nor(__temp, __temp)); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpnlt_pd(__m128d __A, __m128d __B) { return ((__m128d)vec_cmpge((__v2df)__A, (__v2df)__B)); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpnle_pd(__m128d __A, __m128d __B) { return ((__m128d)vec_cmpgt((__v2df)__A, (__v2df)__B)); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpngt_pd(__m128d __A, __m128d __B) { return ((__m128d)vec_cmple((__v2df)__A, (__v2df)__B)); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpnge_pd(__m128d __A, __m128d __B) { return ((__m128d)vec_cmplt((__v2df)__A, (__v2df)__B)); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpord_pd(__m128d __A, __m128d __B) { __v2du __c, __d; /* Compare against self will return false (0's) if NAN. */ __c = (__v2du)vec_cmpeq(__A, __A); __d = (__v2du)vec_cmpeq(__B, __B); /* A != NAN and B != NAN. */ return ((__m128d)vec_and(__c, __d)); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpunord_pd(__m128d __A, __m128d __B) { #if _ARCH_PWR8 __v2du __c, __d; /* Compare against self will return false (0's) if NAN. */ __c = (__v2du)vec_cmpeq((__v2df)__A, (__v2df)__A); __d = (__v2du)vec_cmpeq((__v2df)__B, (__v2df)__B); /* A == NAN OR B == NAN converts too: NOT(A != NAN) OR NOT(B != NAN). */ __c = vec_nor(__c, __c); return ((__m128d)vec_orc(__c, __d)); #else __v2du __c, __d; /* Compare against self will return false (0's) if NAN. */ __c = (__v2du)vec_cmpeq((__v2df)__A, (__v2df)__A); __d = (__v2du)vec_cmpeq((__v2df)__B, (__v2df)__B); /* Convert the true ('1's) is NAN. */ __c = vec_nor(__c, __c); __d = vec_nor(__d, __d); return ((__m128d)vec_or(__c, __d)); #endif } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpeq_sd(__m128d __A, __m128d __B) { __v2df __a, __b, __c; /* PowerISA VSX does not allow partial (for just lower double) results. So to insure we don't generate spurious exceptions (from the upper double values) we splat the lower double before we do the operation. */ __a = vec_splats(__A[0]); __b = vec_splats(__B[0]); __c = (__v2df)vec_cmpeq(__a, __b); /* Then we merge the lower double result with the original upper double from __A. */ return (__m128d)_mm_setr_pd(__c[0], __A[1]); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmplt_sd(__m128d __A, __m128d __B) { __v2df __a, __b, __c; __a = vec_splats(__A[0]); __b = vec_splats(__B[0]); __c = (__v2df)vec_cmplt(__a, __b); return (__m128d)_mm_setr_pd(__c[0], __A[1]); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmple_sd(__m128d __A, __m128d __B) { __v2df __a, __b, __c; __a = vec_splats(__A[0]); __b = vec_splats(__B[0]); __c = (__v2df)vec_cmple(__a, __b); return (__m128d)_mm_setr_pd(__c[0], __A[1]); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpgt_sd(__m128d __A, __m128d __B) { __v2df __a, __b, __c; __a = vec_splats(__A[0]); __b = vec_splats(__B[0]); __c = (__v2df)vec_cmpgt(__a, __b); return (__m128d)_mm_setr_pd(__c[0], __A[1]); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpge_sd(__m128d __A, __m128d __B) { __v2df __a, __b, __c; __a = vec_splats(__A[0]); __b = vec_splats(__B[0]); __c = (__v2df)vec_cmpge(__a, __b); return (__m128d)_mm_setr_pd(__c[0], __A[1]); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpneq_sd(__m128d __A, __m128d __B) { __v2df __a, __b, __c; __a = vec_splats(__A[0]); __b = vec_splats(__B[0]); __c = (__v2df)vec_cmpeq(__a, __b); __c = vec_nor(__c, __c); return (__m128d)_mm_setr_pd(__c[0], __A[1]); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpnlt_sd(__m128d __A, __m128d __B) { __v2df __a, __b, __c; __a = vec_splats(__A[0]); __b = vec_splats(__B[0]); /* Not less than is just greater than or equal. */ __c = (__v2df)vec_cmpge(__a, __b); return (__m128d)_mm_setr_pd(__c[0], __A[1]); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpnle_sd(__m128d __A, __m128d __B) { __v2df __a, __b, __c; __a = vec_splats(__A[0]); __b = vec_splats(__B[0]); /* Not less than or equal is just greater than. */ __c = (__v2df)vec_cmpge(__a, __b); return (__m128d)_mm_setr_pd(__c[0], __A[1]); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpngt_sd(__m128d __A, __m128d __B) { __v2df __a, __b, __c; __a = vec_splats(__A[0]); __b = vec_splats(__B[0]); /* Not greater than is just less than or equal. */ __c = (__v2df)vec_cmple(__a, __b); return (__m128d)_mm_setr_pd(__c[0], __A[1]); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpnge_sd(__m128d __A, __m128d __B) { __v2df __a, __b, __c; __a = vec_splats(__A[0]); __b = vec_splats(__B[0]); /* Not greater than or equal is just less than. */ __c = (__v2df)vec_cmplt(__a, __b); return (__m128d)_mm_setr_pd(__c[0], __A[1]); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpord_sd(__m128d __A, __m128d __B) { __v2df __r; __r = (__v2df)_mm_cmpord_pd(vec_splats(__A[0]), vec_splats(__B[0])); return (__m128d)_mm_setr_pd(__r[0], ((__v2df)__A)[1]); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpunord_sd(__m128d __A, __m128d __B) { __v2df __r; __r = _mm_cmpunord_pd(vec_splats(__A[0]), vec_splats(__B[0])); return (__m128d)_mm_setr_pd(__r[0], __A[1]); } /* FIXME The __mm_comi??_sd and __mm_ucomi??_sd implementations below are exactly the same because GCC for PowerPC only generates unordered compares (scalar and vector). Technically __mm_comieq_sp et all should be using the ordered compare and signal for QNaNs. The __mm_ucomieq_sd et all should be OK. */ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comieq_sd(__m128d __A, __m128d __B) { return (__A[0] == __B[0]); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comilt_sd(__m128d __A, __m128d __B) { return (__A[0] < __B[0]); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comile_sd(__m128d __A, __m128d __B) { return (__A[0] <= __B[0]); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comigt_sd(__m128d __A, __m128d __B) { return (__A[0] > __B[0]); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comige_sd(__m128d __A, __m128d __B) { return (__A[0] >= __B[0]); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comineq_sd(__m128d __A, __m128d __B) { return (__A[0] != __B[0]); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_ucomieq_sd(__m128d __A, __m128d __B) { return (__A[0] == __B[0]); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_ucomilt_sd(__m128d __A, __m128d __B) { return (__A[0] < __B[0]); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_ucomile_sd(__m128d __A, __m128d __B) { return (__A[0] <= __B[0]); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_ucomigt_sd(__m128d __A, __m128d __B) { return (__A[0] > __B[0]); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_ucomige_sd(__m128d __A, __m128d __B) { return (__A[0] >= __B[0]); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_ucomineq_sd(__m128d __A, __m128d __B) { return (__A[0] != __B[0]); } /* Create a vector of Qi, where i is the element number. */ extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_set_epi64x(long long __q1, long long __q0) { return __extension__(__m128i)(__v2di){__q0, __q1}; } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_set_epi64(__m64 __q1, __m64 __q0) { return _mm_set_epi64x((long long)__q1, (long long)__q0); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_set_epi32(int __q3, int __q2, int __q1, int __q0) { return __extension__(__m128i)(__v4si){__q0, __q1, __q2, __q3}; } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_set_epi16(short __q7, short __q6, short __q5, short __q4, short __q3, short __q2, short __q1, short __q0) { return __extension__(__m128i)(__v8hi){__q0, __q1, __q2, __q3, __q4, __q5, __q6, __q7}; } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_set_epi8(char __q15, char __q14, char __q13, char __q12, char __q11, char __q10, char __q09, char __q08, char __q07, char __q06, char __q05, char __q04, char __q03, char __q02, char __q01, char __q00) { return __extension__(__m128i)(__v16qi){ __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07, __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15}; } /* Set all of the elements of the vector to A. */ extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_set1_epi64x(long long __A) { return _mm_set_epi64x(__A, __A); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_set1_epi64(__m64 __A) { return _mm_set_epi64(__A, __A); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_set1_epi32(int __A) { return _mm_set_epi32(__A, __A, __A, __A); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_set1_epi16(short __A) { return _mm_set_epi16(__A, __A, __A, __A, __A, __A, __A, __A); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_set1_epi8(char __A) { return _mm_set_epi8(__A, __A, __A, __A, __A, __A, __A, __A, __A, __A, __A, __A, __A, __A, __A, __A); } /* Create a vector of Qi, where i is the element number. The parameter order is reversed from the _mm_set_epi* functions. */ extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_setr_epi64(__m64 __q0, __m64 __q1) { return _mm_set_epi64(__q1, __q0); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_setr_epi32(int __q0, int __q1, int __q2, int __q3) { return _mm_set_epi32(__q3, __q2, __q1, __q0); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_setr_epi16(short __q0, short __q1, short __q2, short __q3, short __q4, short __q5, short __q6, short __q7) { return _mm_set_epi16(__q7, __q6, __q5, __q4, __q3, __q2, __q1, __q0); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_setr_epi8(char __q00, char __q01, char __q02, char __q03, char __q04, char __q05, char __q06, char __q07, char __q08, char __q09, char __q10, char __q11, char __q12, char __q13, char __q14, char __q15) { return _mm_set_epi8(__q15, __q14, __q13, __q12, __q11, __q10, __q09, __q08, __q07, __q06, __q05, __q04, __q03, __q02, __q01, __q00); } /* Create a vector with element 0 as *P and the rest zero. */ extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_load_si128(__m128i const *__P) { return *__P; } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_loadu_si128(__m128i_u const *__P) { return (__m128i)(vec_vsx_ld(0, (signed int const *)__P)); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_loadl_epi64(__m128i_u const *__P) { return _mm_set_epi64((__m64)0LL, *(__m64 *)__P); } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_store_si128(__m128i *__P, __m128i __B) { vec_st((__v16qu)__B, 0, (__v16qu *)__P); } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_storeu_si128(__m128i_u *__P, __m128i __B) { *__P = __B; } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_storel_epi64(__m128i_u *__P, __m128i __B) { *(long long *)__P = ((__v2di)__B)[0]; } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_movepi64_pi64(__m128i_u __B) { return (__m64)((__v2di)__B)[0]; } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_movpi64_epi64(__m64 __A) { return _mm_set_epi64((__m64)0LL, __A); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_move_epi64(__m128i __A) { return _mm_set_epi64((__m64)0LL, (__m64)__A[0]); } /* Create an undefined vector. */ extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_undefined_si128(void) { __m128i __Y = __Y; return __Y; } /* Create a vector of zeros. */ extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_setzero_si128(void) { return __extension__(__m128i)(__v4si){0, 0, 0, 0}; } #ifdef _ARCH_PWR8 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtepi32_pd(__m128i __A) { __v2di __val; /* For LE need to generate Vector Unpack Low Signed Word. Which is generated from unpackh. */ __val = (__v2di)vec_unpackh((__v4si)__A); return (__m128d)vec_ctf(__val, 0); } #endif extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtepi32_ps(__m128i __A) { return ((__m128)vec_ctf((__v4si)__A, 0)); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtpd_epi32(__m128d __A) { __v2df __rounded = vec_rint(__A); __v4si __result, __temp; const __v4si __vzero = {0, 0, 0, 0}; /* VSX Vector truncate Double-Precision to integer and Convert to Signed Integer Word format with Saturate. */ __asm__("xvcvdpsxws %x0,%x1" : "=wa"(__temp) : "wa"(__rounded) :); #ifdef _ARCH_PWR8 #ifdef __LITTLE_ENDIAN__ __temp = vec_mergeo(__temp, __temp); #else __temp = vec_mergee(__temp, __temp); #endif __result = (__v4si)vec_vpkudum((__vector long long)__temp, (__vector long long)__vzero); #else { const __v16qu __pkperm = {0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0a, 0x0b, 0x14, 0x15, 0x16, 0x17, 0x1c, 0x1d, 0x1e, 0x1f}; __result = (__v4si)vec_perm((__v16qu)__temp, (__v16qu)__vzero, __pkperm); } #endif return (__m128i)__result; } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtpd_pi32(__m128d __A) { __m128i __result = _mm_cvtpd_epi32(__A); return (__m64)__result[0]; } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtpd_ps(__m128d __A) { __v4sf __result; __v4si __temp; const __v4si __vzero = {0, 0, 0, 0}; __asm__("xvcvdpsp %x0,%x1" : "=wa"(__temp) : "wa"(__A) :); #ifdef _ARCH_PWR8 #ifdef __LITTLE_ENDIAN__ __temp = vec_mergeo(__temp, __temp); #else __temp = vec_mergee(__temp, __temp); #endif __result = (__v4sf)vec_vpkudum((__vector long long)__temp, (__vector long long)__vzero); #else { const __v16qu __pkperm = {0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0a, 0x0b, 0x14, 0x15, 0x16, 0x17, 0x1c, 0x1d, 0x1e, 0x1f}; __result = (__v4sf)vec_perm((__v16qu)__temp, (__v16qu)__vzero, __pkperm); } #endif return ((__m128)__result); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvttpd_epi32(__m128d __A) { __v4si __result; __v4si __temp; const __v4si __vzero = {0, 0, 0, 0}; /* VSX Vector truncate Double-Precision to integer and Convert to Signed Integer Word format with Saturate. */ __asm__("xvcvdpsxws %x0,%x1" : "=wa"(__temp) : "wa"(__A) :); #ifdef _ARCH_PWR8 #ifdef __LITTLE_ENDIAN__ __temp = vec_mergeo(__temp, __temp); #else __temp = vec_mergee(__temp, __temp); #endif __result = (__v4si)vec_vpkudum((__vector long long)__temp, (__vector long long)__vzero); #else { const __v16qu __pkperm = {0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0a, 0x0b, 0x14, 0x15, 0x16, 0x17, 0x1c, 0x1d, 0x1e, 0x1f}; __result = (__v4si)vec_perm((__v16qu)__temp, (__v16qu)__vzero, __pkperm); } #endif return ((__m128i)__result); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvttpd_pi32(__m128d __A) { __m128i __result = _mm_cvttpd_epi32(__A); return (__m64)__result[0]; } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtsi128_si32(__m128i __A) { return ((__v4si)__A)[0]; } #ifdef _ARCH_PWR8 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtpi32_pd(__m64 __A) { __v4si __temp; __v2di __tmp2; __v4f __result; __temp = (__v4si)vec_splats(__A); __tmp2 = (__v2di)vec_unpackl(__temp); __result = vec_ctf((__vector signed long long)__tmp2, 0); return (__m128d)__result; } #endif extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtps_epi32(__m128 __A) { __v4sf __rounded; __v4si __result; __rounded = vec_rint((__v4sf)__A); __result = vec_cts(__rounded, 0); return (__m128i)__result; } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvttps_epi32(__m128 __A) { __v4si __result; __result = vec_cts((__v4sf)__A, 0); return (__m128i)__result; } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtps_pd(__m128 __A) { /* Check if vec_doubleh is defined by . If so use that. */ #ifdef vec_doubleh return (__m128d)vec_doubleh((__v4sf)__A); #else /* Otherwise the compiler is not current and so need to generate the equivalent code. */ __v4sf __a = (__v4sf)__A; __v4sf __temp; __v2df __result; #ifdef __LITTLE_ENDIAN__ /* The input float values are in elements {[0], [1]} but the convert instruction needs them in elements {[1], [3]}, So we use two shift left double vector word immediates to get the elements lined up. */ __temp = __builtin_vsx_xxsldwi(__a, __a, 3); __temp = __builtin_vsx_xxsldwi(__a, __temp, 2); #else /* The input float values are in elements {[0], [1]} but the convert instruction needs them in elements {[0], [2]}, So we use two shift left double vector word immediates to get the elements lined up. */ __temp = vec_vmrghw(__a, __a); #endif __asm__(" xvcvspdp %x0,%x1" : "=wa"(__result) : "wa"(__temp) :); return (__m128d)__result; #endif } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtsd_si32(__m128d __A) { __v2df __rounded = vec_rint((__v2df)__A); int __result = ((__v2df)__rounded)[0]; return __result; } /* Intel intrinsic. */ extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtsd_si64(__m128d __A) { __v2df __rounded = vec_rint((__v2df)__A); long long __result = ((__v2df)__rounded)[0]; return __result; } /* Microsoft intrinsic. */ extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtsd_si64x(__m128d __A) { return _mm_cvtsd_si64((__v2df)__A); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvttsd_si32(__m128d __A) { int __result = ((__v2df)__A)[0]; return __result; } /* Intel intrinsic. */ extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvttsd_si64(__m128d __A) { long long __result = ((__v2df)__A)[0]; return __result; } /* Microsoft intrinsic. */ extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvttsd_si64x(__m128d __A) { return _mm_cvttsd_si64(__A); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtsd_ss(__m128 __A, __m128d __B) { __v4sf __result = (__v4sf)__A; #ifdef __LITTLE_ENDIAN__ __v4sf __temp_s; /* Copy double element[0] to element [1] for conversion. */ __v2df __temp_b = vec_splat((__v2df)__B, 0); /* Pre-rotate __A left 3 (logically right 1) elements. */ __result = __builtin_vsx_xxsldwi(__result, __result, 3); /* Convert double to single float scalar in a vector. */ __asm__("xscvdpsp %x0,%x1" : "=wa"(__temp_s) : "wa"(__temp_b) :); /* Shift the resulting scalar into vector element [0]. */ __result = __builtin_vsx_xxsldwi(__result, __temp_s, 1); #else __result[0] = ((__v2df)__B)[0]; #endif return (__m128)__result; } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtsi32_sd(__m128d __A, int __B) { __v2df __result = (__v2df)__A; double __db = __B; __result[0] = __db; return (__m128d)__result; } /* Intel intrinsic. */ extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtsi64_sd(__m128d __A, long long __B) { __v2df __result = (__v2df)__A; double __db = __B; __result[0] = __db; return (__m128d)__result; } /* Microsoft intrinsic. */ extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtsi64x_sd(__m128d __A, long long __B) { return _mm_cvtsi64_sd(__A, __B); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtss_sd(__m128d __A, __m128 __B) { #ifdef __LITTLE_ENDIAN__ /* Use splat to move element [0] into position for the convert. */ __v4sf __temp = vec_splat((__v4sf)__B, 0); __v2df __res; /* Convert single float scalar to double in a vector. */ __asm__("xscvspdp %x0,%x1" : "=wa"(__res) : "wa"(__temp) :); return (__m128d)vec_mergel(__res, (__v2df)__A); #else __v2df __res = (__v2df)__A; __res[0] = ((__v4sf)__B)[0]; return (__m128d)__res; #endif } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_shuffle_pd(__m128d __A, __m128d __B, const int __mask) { __vector double __result; const int __litmsk = __mask & 0x3; if (__litmsk == 0) __result = vec_mergeh(__A, __B); #if __GNUC__ < 6 else if (__litmsk == 1) __result = vec_xxpermdi(__B, __A, 2); else if (__litmsk == 2) __result = vec_xxpermdi(__B, __A, 1); #else else if (__litmsk == 1) __result = vec_xxpermdi(__A, __B, 2); else if (__litmsk == 2) __result = vec_xxpermdi(__A, __B, 1); #endif else __result = vec_mergel(__A, __B); return __result; } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_unpackhi_pd(__m128d __A, __m128d __B) { return (__m128d)vec_mergel((__v2df)__A, (__v2df)__B); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_unpacklo_pd(__m128d __A, __m128d __B) { return (__m128d)vec_mergeh((__v2df)__A, (__v2df)__B); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_loadh_pd(__m128d __A, double const *__B) { __v2df __result = (__v2df)__A; __result[1] = *__B; return (__m128d)__result; } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_loadl_pd(__m128d __A, double const *__B) { __v2df __result = (__v2df)__A; __result[0] = *__B; return (__m128d)__result; } #ifdef _ARCH_PWR8 /* Intrinsic functions that require PowerISA 2.07 minimum. */ /* Creates a 2-bit mask from the most significant bits of the DPFP values. */ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_movemask_pd(__m128d __A) { #ifdef _ARCH_PWR10 return vec_extractm((__v2du)__A); #else __vector unsigned long long __result; static const __vector unsigned int __perm_mask = { #ifdef __LITTLE_ENDIAN__ 0x80800040, 0x80808080, 0x80808080, 0x80808080 #else 0x80808080, 0x80808080, 0x80808080, 0x80804000 #endif }; __result = ((__vector unsigned long long)vec_vbpermq( (__vector unsigned char)__A, (__vector unsigned char)__perm_mask)); #ifdef __LITTLE_ENDIAN__ return __result[1]; #else return __result[0]; #endif #endif /* !_ARCH_PWR10 */ } #endif /* _ARCH_PWR8 */ extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_packs_epi16(__m128i __A, __m128i __B) { return (__m128i)vec_packs((__v8hi)__A, (__v8hi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_packs_epi32(__m128i __A, __m128i __B) { return (__m128i)vec_packs((__v4si)__A, (__v4si)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_packus_epi16(__m128i __A, __m128i __B) { return (__m128i)vec_packsu((__v8hi)__A, (__v8hi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_unpackhi_epi8(__m128i __A, __m128i __B) { return (__m128i)vec_mergel((__v16qu)__A, (__v16qu)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_unpackhi_epi16(__m128i __A, __m128i __B) { return (__m128i)vec_mergel((__v8hu)__A, (__v8hu)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_unpackhi_epi32(__m128i __A, __m128i __B) { return (__m128i)vec_mergel((__v4su)__A, (__v4su)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_unpackhi_epi64(__m128i __A, __m128i __B) { return (__m128i)vec_mergel((__vector long long)__A, (__vector long long)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_unpacklo_epi8(__m128i __A, __m128i __B) { return (__m128i)vec_mergeh((__v16qu)__A, (__v16qu)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_unpacklo_epi16(__m128i __A, __m128i __B) { return (__m128i)vec_mergeh((__v8hi)__A, (__v8hi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_unpacklo_epi32(__m128i __A, __m128i __B) { return (__m128i)vec_mergeh((__v4si)__A, (__v4si)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_unpacklo_epi64(__m128i __A, __m128i __B) { return (__m128i)vec_mergeh((__vector long long)__A, (__vector long long)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_add_epi8(__m128i __A, __m128i __B) { return (__m128i)((__v16qu)__A + (__v16qu)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_add_epi16(__m128i __A, __m128i __B) { return (__m128i)((__v8hu)__A + (__v8hu)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_add_epi32(__m128i __A, __m128i __B) { return (__m128i)((__v4su)__A + (__v4su)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_add_epi64(__m128i __A, __m128i __B) { return (__m128i)((__v2du)__A + (__v2du)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_adds_epi8(__m128i __A, __m128i __B) { return (__m128i)vec_adds((__v16qi)__A, (__v16qi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_adds_epi16(__m128i __A, __m128i __B) { return (__m128i)vec_adds((__v8hi)__A, (__v8hi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_adds_epu8(__m128i __A, __m128i __B) { return (__m128i)vec_adds((__v16qu)__A, (__v16qu)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_adds_epu16(__m128i __A, __m128i __B) { return (__m128i)vec_adds((__v8hu)__A, (__v8hu)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sub_epi8(__m128i __A, __m128i __B) { return (__m128i)((__v16qu)__A - (__v16qu)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sub_epi16(__m128i __A, __m128i __B) { return (__m128i)((__v8hu)__A - (__v8hu)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sub_epi32(__m128i __A, __m128i __B) { return (__m128i)((__v4su)__A - (__v4su)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sub_epi64(__m128i __A, __m128i __B) { return (__m128i)((__v2du)__A - (__v2du)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_subs_epi8(__m128i __A, __m128i __B) { return (__m128i)vec_subs((__v16qi)__A, (__v16qi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_subs_epi16(__m128i __A, __m128i __B) { return (__m128i)vec_subs((__v8hi)__A, (__v8hi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_subs_epu8(__m128i __A, __m128i __B) { return (__m128i)vec_subs((__v16qu)__A, (__v16qu)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_subs_epu16(__m128i __A, __m128i __B) { return (__m128i)vec_subs((__v8hu)__A, (__v8hu)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_madd_epi16(__m128i __A, __m128i __B) { __vector signed int __zero = {0, 0, 0, 0}; return (__m128i)vec_vmsumshm((__v8hi)__A, (__v8hi)__B, __zero); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_mulhi_epi16(__m128i __A, __m128i __B) { __vector signed int __w0, __w1; __vector unsigned char __xform1 = { #ifdef __LITTLE_ENDIAN__ 0x02, 0x03, 0x12, 0x13, 0x06, 0x07, 0x16, 0x17, 0x0A, 0x0B, 0x1A, 0x1B, 0x0E, 0x0F, 0x1E, 0x1F #else 0x00, 0x01, 0x10, 0x11, 0x04, 0x05, 0x14, 0x15, 0x08, 0x09, 0x18, 0x19, 0x0C, 0x0D, 0x1C, 0x1D #endif }; __w0 = vec_vmulesh((__v8hi)__A, (__v8hi)__B); __w1 = vec_vmulosh((__v8hi)__A, (__v8hi)__B); return (__m128i)vec_perm(__w0, __w1, __xform1); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_mullo_epi16(__m128i __A, __m128i __B) { return (__m128i)((__v8hi)__A * (__v8hi)__B); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_mul_su32(__m64 __A, __m64 __B) { unsigned int __a = __A; unsigned int __b = __B; return ((__m64)__a * (__m64)__b); } #ifdef _ARCH_PWR8 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_mul_epu32(__m128i __A, __m128i __B) { #if __GNUC__ < 8 __v2du __result; #ifdef __LITTLE_ENDIAN__ /* VMX Vector Multiply Odd Unsigned Word. */ __asm__("vmulouw %0,%1,%2" : "=v"(__result) : "v"(__A), "v"(__B) :); #else /* VMX Vector Multiply Even Unsigned Word. */ __asm__("vmuleuw %0,%1,%2" : "=v"(__result) : "v"(__A), "v"(__B) :); #endif return (__m128i)__result; #else return (__m128i)vec_mule((__v4su)__A, (__v4su)__B); #endif } #endif extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_slli_epi16(__m128i __A, int __B) { __v8hu __lshift; __v8hi __result = {0, 0, 0, 0, 0, 0, 0, 0}; if (__B >= 0 && __B < 16) { if (__builtin_constant_p(__B)) __lshift = (__v8hu)vec_splat_s16(__B); else __lshift = vec_splats((unsigned short)__B); __result = vec_sl((__v8hi)__A, __lshift); } return (__m128i)__result; } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_slli_epi32(__m128i __A, int __B) { __v4su __lshift; __v4si __result = {0, 0, 0, 0}; if (__B >= 0 && __B < 32) { if (__builtin_constant_p(__B) && __B < 16) __lshift = (__v4su)vec_splat_s32(__B); else __lshift = vec_splats((unsigned int)__B); __result = vec_sl((__v4si)__A, __lshift); } return (__m128i)__result; } #ifdef _ARCH_PWR8 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_slli_epi64(__m128i __A, int __B) { __v2du __lshift; __v2di __result = {0, 0}; if (__B >= 0 && __B < 64) { if (__builtin_constant_p(__B) && __B < 16) __lshift = (__v2du)vec_splat_s32(__B); else __lshift = (__v2du)vec_splats((unsigned int)__B); __result = vec_sl((__v2di)__A, __lshift); } return (__m128i)__result; } #endif extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_srai_epi16(__m128i __A, int __B) { __v8hu __rshift = {15, 15, 15, 15, 15, 15, 15, 15}; __v8hi __result; if (__B < 16) { if (__builtin_constant_p(__B)) __rshift = (__v8hu)vec_splat_s16(__B); else __rshift = vec_splats((unsigned short)__B); } __result = vec_sra((__v8hi)__A, __rshift); return (__m128i)__result; } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_srai_epi32(__m128i __A, int __B) { __v4su __rshift = {31, 31, 31, 31}; __v4si __result; if (__B < 32) { if (__builtin_constant_p(__B)) { if (__B < 16) __rshift = (__v4su)vec_splat_s32(__B); else __rshift = (__v4su)vec_splats((unsigned int)__B); } else __rshift = vec_splats((unsigned int)__B); } __result = vec_sra((__v4si)__A, __rshift); return (__m128i)__result; } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_bslli_si128(__m128i __A, const int __N) { __v16qu __result; const __v16qu __zeros = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; if (__N < 16) __result = vec_sld((__v16qu)__A, __zeros, __N); else __result = __zeros; return (__m128i)__result; } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_bsrli_si128(__m128i __A, const int __N) { __v16qu __result; const __v16qu __zeros = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; if (__N < 16) #ifdef __LITTLE_ENDIAN__ if (__builtin_constant_p(__N)) /* Would like to use Vector Shift Left Double by Octet Immediate here to use the immediate form and avoid load of __N * 8 value into a separate VR. */ __result = vec_sld(__zeros, (__v16qu)__A, (16 - __N)); else #endif { __v16qu __shift = vec_splats((unsigned char)(__N * 8)); #ifdef __LITTLE_ENDIAN__ __result = vec_sro((__v16qu)__A, __shift); #else __result = vec_slo((__v16qu)__A, __shift); #endif } else __result = __zeros; return (__m128i)__result; } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_srli_si128(__m128i __A, const int __N) { return _mm_bsrli_si128(__A, __N); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_slli_si128(__m128i __A, const int _imm5) { __v16qu __result; const __v16qu __zeros = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; if (_imm5 < 16) #ifdef __LITTLE_ENDIAN__ __result = vec_sld((__v16qu)__A, __zeros, _imm5); #else __result = vec_sld(__zeros, (__v16qu)__A, (16 - _imm5)); #endif else __result = __zeros; return (__m128i)__result; } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_srli_epi16(__m128i __A, int __B) { __v8hu __rshift; __v8hi __result = {0, 0, 0, 0, 0, 0, 0, 0}; if (__B < 16) { if (__builtin_constant_p(__B)) __rshift = (__v8hu)vec_splat_s16(__B); else __rshift = vec_splats((unsigned short)__B); __result = vec_sr((__v8hi)__A, __rshift); } return (__m128i)__result; } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_srli_epi32(__m128i __A, int __B) { __v4su __rshift; __v4si __result = {0, 0, 0, 0}; if (__B < 32) { if (__builtin_constant_p(__B)) { if (__B < 16) __rshift = (__v4su)vec_splat_s32(__B); else __rshift = (__v4su)vec_splats((unsigned int)__B); } else __rshift = vec_splats((unsigned int)__B); __result = vec_sr((__v4si)__A, __rshift); } return (__m128i)__result; } #ifdef _ARCH_PWR8 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_srli_epi64(__m128i __A, int __B) { __v2du __rshift; __v2di __result = {0, 0}; if (__B < 64) { if (__builtin_constant_p(__B)) { if (__B < 16) __rshift = (__v2du)vec_splat_s32(__B); else __rshift = (__v2du)vec_splats((unsigned long long)__B); } else __rshift = (__v2du)vec_splats((unsigned int)__B); __result = vec_sr((__v2di)__A, __rshift); } return (__m128i)__result; } #endif extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sll_epi16(__m128i __A, __m128i __B) { __v8hu __lshift; __vector __bool short __shmask; const __v8hu __shmax = {15, 15, 15, 15, 15, 15, 15, 15}; __v8hu __result; #ifdef __LITTLE_ENDIAN__ __lshift = vec_splat((__v8hu)__B, 0); #else __lshift = vec_splat((__v8hu)__B, 3); #endif __shmask = vec_cmple(__lshift, __shmax); __result = vec_sl((__v8hu)__A, __lshift); __result = vec_sel((__v8hu)__shmask, __result, __shmask); return (__m128i)__result; } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sll_epi32(__m128i __A, __m128i __B) { __v4su __lshift; __vector __bool int __shmask; const __v4su __shmax = {32, 32, 32, 32}; __v4su __result; #ifdef __LITTLE_ENDIAN__ __lshift = vec_splat((__v4su)__B, 0); #else __lshift = vec_splat((__v4su)__B, 1); #endif __shmask = vec_cmplt(__lshift, __shmax); __result = vec_sl((__v4su)__A, __lshift); __result = vec_sel((__v4su)__shmask, __result, __shmask); return (__m128i)__result; } #ifdef _ARCH_PWR8 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sll_epi64(__m128i __A, __m128i __B) { __v2du __lshift; __vector __bool long long __shmask; const __v2du __shmax = {64, 64}; __v2du __result; __lshift = vec_splat((__v2du)__B, 0); __shmask = vec_cmplt(__lshift, __shmax); __result = vec_sl((__v2du)__A, __lshift); __result = vec_sel((__v2du)__shmask, __result, __shmask); return (__m128i)__result; } #endif extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sra_epi16(__m128i __A, __m128i __B) { const __v8hu __rshmax = {15, 15, 15, 15, 15, 15, 15, 15}; __v8hu __rshift; __v8hi __result; #ifdef __LITTLE_ENDIAN__ __rshift = vec_splat((__v8hu)__B, 0); #else __rshift = vec_splat((__v8hu)__B, 3); #endif __rshift = vec_min(__rshift, __rshmax); __result = vec_sra((__v8hi)__A, __rshift); return (__m128i)__result; } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sra_epi32(__m128i __A, __m128i __B) { const __v4su __rshmax = {31, 31, 31, 31}; __v4su __rshift; __v4si __result; #ifdef __LITTLE_ENDIAN__ __rshift = vec_splat((__v4su)__B, 0); #else __rshift = vec_splat((__v4su)__B, 1); #endif __rshift = vec_min(__rshift, __rshmax); __result = vec_sra((__v4si)__A, __rshift); return (__m128i)__result; } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_srl_epi16(__m128i __A, __m128i __B) { __v8hu __rshift; __vector __bool short __shmask; const __v8hu __shmax = {15, 15, 15, 15, 15, 15, 15, 15}; __v8hu __result; #ifdef __LITTLE_ENDIAN__ __rshift = vec_splat((__v8hu)__B, 0); #else __rshift = vec_splat((__v8hu)__B, 3); #endif __shmask = vec_cmple(__rshift, __shmax); __result = vec_sr((__v8hu)__A, __rshift); __result = vec_sel((__v8hu)__shmask, __result, __shmask); return (__m128i)__result; } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_srl_epi32(__m128i __A, __m128i __B) { __v4su __rshift; __vector __bool int __shmask; const __v4su __shmax = {32, 32, 32, 32}; __v4su __result; #ifdef __LITTLE_ENDIAN__ __rshift = vec_splat((__v4su)__B, 0); #else __rshift = vec_splat((__v4su)__B, 1); #endif __shmask = vec_cmplt(__rshift, __shmax); __result = vec_sr((__v4su)__A, __rshift); __result = vec_sel((__v4su)__shmask, __result, __shmask); return (__m128i)__result; } #ifdef _ARCH_PWR8 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_srl_epi64(__m128i __A, __m128i __B) { __v2du __rshift; __vector __bool long long __shmask; const __v2du __shmax = {64, 64}; __v2du __result; __rshift = vec_splat((__v2du)__B, 0); __shmask = vec_cmplt(__rshift, __shmax); __result = vec_sr((__v2du)__A, __rshift); __result = vec_sel((__v2du)__shmask, __result, __shmask); return (__m128i)__result; } #endif extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_and_pd(__m128d __A, __m128d __B) { return (vec_and((__v2df)__A, (__v2df)__B)); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_andnot_pd(__m128d __A, __m128d __B) { return (vec_andc((__v2df)__B, (__v2df)__A)); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_or_pd(__m128d __A, __m128d __B) { return (vec_or((__v2df)__A, (__v2df)__B)); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_xor_pd(__m128d __A, __m128d __B) { return (vec_xor((__v2df)__A, (__v2df)__B)); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_and_si128(__m128i __A, __m128i __B) { return (__m128i)vec_and((__v2di)__A, (__v2di)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_andnot_si128(__m128i __A, __m128i __B) { return (__m128i)vec_andc((__v2di)__B, (__v2di)__A); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_or_si128(__m128i __A, __m128i __B) { return (__m128i)vec_or((__v2di)__A, (__v2di)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_xor_si128(__m128i __A, __m128i __B) { return (__m128i)vec_xor((__v2di)__A, (__v2di)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpeq_epi8(__m128i __A, __m128i __B) { return (__m128i)vec_cmpeq((__v16qi)__A, (__v16qi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpeq_epi16(__m128i __A, __m128i __B) { return (__m128i)vec_cmpeq((__v8hi)__A, (__v8hi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpeq_epi32(__m128i __A, __m128i __B) { return (__m128i)vec_cmpeq((__v4si)__A, (__v4si)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmplt_epi8(__m128i __A, __m128i __B) { return (__m128i)vec_cmplt((__v16qi)__A, (__v16qi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmplt_epi16(__m128i __A, __m128i __B) { return (__m128i)vec_cmplt((__v8hi)__A, (__v8hi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmplt_epi32(__m128i __A, __m128i __B) { return (__m128i)vec_cmplt((__v4si)__A, (__v4si)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpgt_epi8(__m128i __A, __m128i __B) { return (__m128i)vec_cmpgt((__v16qi)__A, (__v16qi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpgt_epi16(__m128i __A, __m128i __B) { return (__m128i)vec_cmpgt((__v8hi)__A, (__v8hi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpgt_epi32(__m128i __A, __m128i __B) { return (__m128i)vec_cmpgt((__v4si)__A, (__v4si)__B); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_extract_epi16(__m128i const __A, int const __N) { return (unsigned short)((__v8hi)__A)[__N & 7]; } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_insert_epi16(__m128i const __A, int const __D, int const __N) { __v8hi __result = (__v8hi)__A; __result[(__N & 7)] = __D; return (__m128i)__result; } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_max_epi16(__m128i __A, __m128i __B) { return (__m128i)vec_max((__v8hi)__A, (__v8hi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_max_epu8(__m128i __A, __m128i __B) { return (__m128i)vec_max((__v16qu)__A, (__v16qu)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_min_epi16(__m128i __A, __m128i __B) { return (__m128i)vec_min((__v8hi)__A, (__v8hi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_min_epu8(__m128i __A, __m128i __B) { return (__m128i)vec_min((__v16qu)__A, (__v16qu)__B); } #ifdef _ARCH_PWR8 /* Intrinsic functions that require PowerISA 2.07 minimum. */ /* Return a mask created from the most significant bit of each 8-bit element in A. */ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_movemask_epi8(__m128i __A) { #ifdef _ARCH_PWR10 return vec_extractm((__v16qu)__A); #else __vector unsigned long long __result; static const __vector unsigned char __perm_mask = { 0x78, 0x70, 0x68, 0x60, 0x58, 0x50, 0x48, 0x40, 0x38, 0x30, 0x28, 0x20, 0x18, 0x10, 0x08, 0x00}; __result = ((__vector unsigned long long)vec_vbpermq( (__vector unsigned char)__A, (__vector unsigned char)__perm_mask)); #ifdef __LITTLE_ENDIAN__ return __result[1]; #else return __result[0]; #endif #endif /* !_ARCH_PWR10 */ } #endif /* _ARCH_PWR8 */ extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_mulhi_epu16(__m128i __A, __m128i __B) { __v4su __w0, __w1; __v16qu __xform1 = { #ifdef __LITTLE_ENDIAN__ 0x02, 0x03, 0x12, 0x13, 0x06, 0x07, 0x16, 0x17, 0x0A, 0x0B, 0x1A, 0x1B, 0x0E, 0x0F, 0x1E, 0x1F #else 0x00, 0x01, 0x10, 0x11, 0x04, 0x05, 0x14, 0x15, 0x08, 0x09, 0x18, 0x19, 0x0C, 0x0D, 0x1C, 0x1D #endif }; __w0 = vec_vmuleuh((__v8hu)__A, (__v8hu)__B); __w1 = vec_vmulouh((__v8hu)__A, (__v8hu)__B); return (__m128i)vec_perm(__w0, __w1, __xform1); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_shufflehi_epi16(__m128i __A, const int __mask) { unsigned long __element_selector_98 = __mask & 0x03; unsigned long __element_selector_BA = (__mask >> 2) & 0x03; unsigned long __element_selector_DC = (__mask >> 4) & 0x03; unsigned long __element_selector_FE = (__mask >> 6) & 0x03; static const unsigned short __permute_selectors[4] = { #ifdef __LITTLE_ENDIAN__ 0x0908, 0x0B0A, 0x0D0C, 0x0F0E #else 0x0809, 0x0A0B, 0x0C0D, 0x0E0F #endif }; __v2du __pmask = #ifdef __LITTLE_ENDIAN__ {0x1716151413121110UL, 0UL}; #else {0x1011121314151617UL, 0UL}; #endif __m64_union __t; __v2du __a, __r; __t.as_short[0] = __permute_selectors[__element_selector_98]; __t.as_short[1] = __permute_selectors[__element_selector_BA]; __t.as_short[2] = __permute_selectors[__element_selector_DC]; __t.as_short[3] = __permute_selectors[__element_selector_FE]; __pmask[1] = __t.as_m64; __a = (__v2du)__A; __r = vec_perm(__a, __a, (__vector unsigned char)__pmask); return (__m128i)__r; } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_shufflelo_epi16(__m128i __A, const int __mask) { unsigned long __element_selector_10 = __mask & 0x03; unsigned long __element_selector_32 = (__mask >> 2) & 0x03; unsigned long __element_selector_54 = (__mask >> 4) & 0x03; unsigned long __element_selector_76 = (__mask >> 6) & 0x03; static const unsigned short __permute_selectors[4] = { #ifdef __LITTLE_ENDIAN__ 0x0100, 0x0302, 0x0504, 0x0706 #else 0x0001, 0x0203, 0x0405, 0x0607 #endif }; __v2du __pmask = #ifdef __LITTLE_ENDIAN__ {0UL, 0x1f1e1d1c1b1a1918UL}; #else {0UL, 0x18191a1b1c1d1e1fUL}; #endif __m64_union __t; __v2du __a, __r; __t.as_short[0] = __permute_selectors[__element_selector_10]; __t.as_short[1] = __permute_selectors[__element_selector_32]; __t.as_short[2] = __permute_selectors[__element_selector_54]; __t.as_short[3] = __permute_selectors[__element_selector_76]; __pmask[0] = __t.as_m64; __a = (__v2du)__A; __r = vec_perm(__a, __a, (__vector unsigned char)__pmask); return (__m128i)__r; } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_shuffle_epi32(__m128i __A, const int __mask) { unsigned long __element_selector_10 = __mask & 0x03; unsigned long __element_selector_32 = (__mask >> 2) & 0x03; unsigned long __element_selector_54 = (__mask >> 4) & 0x03; unsigned long __element_selector_76 = (__mask >> 6) & 0x03; static const unsigned int __permute_selectors[4] = { #ifdef __LITTLE_ENDIAN__ 0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C #else 0x00010203, 0x04050607, 0x08090A0B, 0x0C0D0E0F #endif }; __v4su __t; __t[0] = __permute_selectors[__element_selector_10]; __t[1] = __permute_selectors[__element_selector_32]; __t[2] = __permute_selectors[__element_selector_54] + 0x10101010; __t[3] = __permute_selectors[__element_selector_76] + 0x10101010; return (__m128i)vec_perm((__v4si)__A, (__v4si)__A, (__vector unsigned char)__t); } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_maskmoveu_si128(__m128i __A, __m128i __B, char *__C) { __v2du __hibit = {0x7f7f7f7f7f7f7f7fUL, 0x7f7f7f7f7f7f7f7fUL}; __v16qu __mask, __tmp; __m128i_u *__p = (__m128i_u *)__C; __tmp = (__v16qu)_mm_loadu_si128(__p); __mask = (__v16qu)vec_cmpgt((__v16qu)__B, (__v16qu)__hibit); __tmp = vec_sel(__tmp, (__v16qu)__A, __mask); _mm_storeu_si128(__p, (__m128i)__tmp); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_avg_epu8(__m128i __A, __m128i __B) { return (__m128i)vec_avg((__v16qu)__A, (__v16qu)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_avg_epu16(__m128i __A, __m128i __B) { return (__m128i)vec_avg((__v8hu)__A, (__v8hu)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sad_epu8(__m128i __A, __m128i __B) { __v16qu __a, __b; __v16qu __vabsdiff; __v4si __vsum; const __v4su __zero = {0, 0, 0, 0}; __v4si __result; __a = (__v16qu)__A; __b = (__v16qu)__B; #ifndef _ARCH_PWR9 __v16qu __vmin = vec_min(__a, __b); __v16qu __vmax = vec_max(__a, __b); __vabsdiff = vec_sub(__vmax, __vmin); #else __vabsdiff = vec_absd(__a, __b); #endif /* Sum four groups of bytes into integers. */ __vsum = (__vector signed int)vec_sum4s(__vabsdiff, __zero); #ifdef __LITTLE_ENDIAN__ /* Sum across four integers with two integer results. */ __asm__("vsum2sws %0,%1,%2" : "=v"(__result) : "v"(__vsum), "v"(__zero)); /* Note: vec_sum2s could be used here, but on little-endian, vector shifts are added that are not needed for this use-case. A vector shift to correctly position the 32-bit integer results (currently at [0] and [2]) to [1] and [3] would then need to be swapped back again since the desired results are two 64-bit integers ([1]|[0] and [3]|[2]). Thus, no shift is performed. */ #else /* Sum across four integers with two integer results. */ __result = vec_sum2s(__vsum, (__vector signed int)__zero); /* Rotate the sums into the correct position. */ __result = vec_sld(__result, __result, 6); #endif return (__m128i)__result; } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_stream_si32(int *__A, int __B) { /* Use the data cache block touch for store transient. */ __asm__("dcbtstt 0,%0" : : "b"(__A) : "memory"); *__A = __B; } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_stream_si64(long long int *__A, long long int __B) { /* Use the data cache block touch for store transient. */ __asm__(" dcbtstt 0,%0" : : "b"(__A) : "memory"); *__A = __B; } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_stream_si128(__m128i *__A, __m128i __B) { /* Use the data cache block touch for store transient. */ __asm__("dcbtstt 0,%0" : : "b"(__A) : "memory"); *__A = __B; } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_stream_pd(double *__A, __m128d __B) { /* Use the data cache block touch for store transient. */ __asm__("dcbtstt 0,%0" : : "b"(__A) : "memory"); *(__m128d *)__A = __B; } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_clflush(void const *__A) { /* Use the data cache block flush. */ __asm__("dcbf 0,%0" : : "b"(__A) : "memory"); } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_lfence(void) { /* Use light weight sync for load to load ordering. */ __atomic_thread_fence(__ATOMIC_RELEASE); } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_mfence(void) { /* Use heavy weight sync for any to any ordering. */ __atomic_thread_fence(__ATOMIC_SEQ_CST); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtsi32_si128(int __A) { return _mm_set_epi32(0, 0, 0, __A); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtsi64_si128(long long __A) { return __extension__(__m128i)(__v2di){__A, 0LL}; } /* Microsoft intrinsic. */ extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtsi64x_si128(long long __A) { return __extension__(__m128i)(__v2di){__A, 0LL}; } /* Casts between various SP, DP, INT vector types. Note that these do no conversion of values, they just change the type. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_castpd_ps(__m128d __A) { return (__m128)__A; } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_castpd_si128(__m128d __A) { return (__m128i)__A; } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_castps_pd(__m128 __A) { return (__m128d)__A; } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_castps_si128(__m128 __A) { return (__m128i)__A; } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_castsi128_ps(__m128i __A) { return (__m128)__A; } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_castsi128_pd(__m128i __A) { return (__m128d)__A; } #else #include_next #endif /* defined(__powerpc64__) && \ * (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) */ #endif /* EMMINTRIN_H_ */ ppc_wrappers/mm_malloc.hThe document is empty.Rewrite schema error: '\' must be followed by a digit or '\'.[:cntrl:]CuneiformKharoshthiLycianPslen + 1 < sizeof(symbol_buf_)/proc/self/task/%d/mapsReading %zu bytes from offset %ju returned %zd which is negative.Unable to read from fd %d at offset %lld: n_read = %zdlnwde->()%s@ %*p %s base != debugging_internal::ElfMemImage::kInvalidBaseyesNULL waitp == nullptr || waitp->thread->waitp == nullptr || waitp->thread->suppress_fatal_errorsSignalAll on entropy fill failedSHA-1 KATTLS KDF failed. external/boringssl/src/crypto/rsa_extra/rsa_asn1.cexternal/boringssl/src/crypto/evp/p_x25519_asn1.ckythe.proto.KzipInfo.CorpusInfo.LanguageRequiredInputsEntry.keyCan't reproduce include lookup state: != Failed to open KzipWriter: system_header_prefixkythe_metadataCheck failed: llvm::sys::path::is_absolute(file_name)altivec.harm_sme.havxvnniint8intrin.hcetintrin.hcpuid.h/*===---- prfchwintrin.h - PREFETCHW intrinsic -----------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #if !defined(__X86INTRIN_H) && !defined(_MM3DNOW_H_INCLUDED) #error "Never use directly; include or instead." #endif #ifndef __PRFCHWINTRIN_H #define __PRFCHWINTRIN_H /// Loads a memory sequence containing the specified memory address into /// all data cache levels. /// /// The cache-coherency state is set to exclusive. Data can be read from /// and written to the cache line without additional delay. /// /// \headerfile /// /// This intrinsic corresponds to the \c PREFETCHT0 instruction. /// /// \param __P /// A pointer specifying the memory address to be prefetched. static __inline__ void __attribute__((__always_inline__, __nodebug__)) _m_prefetch(void *__P) { __builtin_prefetch (__P, 0, 3 /* _MM_HINT_T0 */); } /// Loads a memory sequence containing the specified memory address into /// the L1 data cache and sets the cache-coherency state to modified. /// /// This provides a hint to the processor that the cache line will be /// modified. It is intended for use when the cache line will be written to /// shortly after the prefetch is performed. /// /// Note that the effect of this intrinsic is dependent on the processor /// implementation. /// /// \headerfile /// /// This intrinsic corresponds to the \c PREFETCHW instruction. /// /// \param __P /// A pointer specifying the memory address to be prefetched. static __inline__ void __attribute__((__always_inline__, __nodebug__)) _m_prefetchw(volatile const void *__P) { #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wcast-qual" __builtin_prefetch ((const void*)__P, 1, 3 /* _MM_HINT_T0 */); #pragma clang diagnostic pop } #endif /* __PRFCHWINTRIN_H */ rtmintrin.h/*===---- rtmintrin.h - RTM intrinsics -------------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __RTMINTRIN_H #define __RTMINTRIN_H #define _XBEGIN_STARTED (~0u) #define _XABORT_EXPLICIT (1 << 0) #define _XABORT_RETRY (1 << 1) #define _XABORT_CONFLICT (1 << 2) #define _XABORT_CAPACITY (1 << 3) #define _XABORT_DEBUG (1 << 4) #define _XABORT_NESTED (1 << 5) #define _XABORT_CODE(x) (((x) >> 24) & 0xFF) /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("rtm"))) static __inline__ unsigned int __DEFAULT_FN_ATTRS _xbegin(void) { return (unsigned int)__builtin_ia32_xbegin(); } static __inline__ void __DEFAULT_FN_ATTRS _xend(void) { __builtin_ia32_xend(); } #define _xabort(imm) __builtin_ia32_xabort((imm)) #undef __DEFAULT_FN_ATTRS #endif /* __RTMINTRIN_H */ velintrin_approx.h/*===--------------- x86gprintrin.h - X86 GPR intrinsics ------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __X86GPRINTRIN_H #define __X86GPRINTRIN_H #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__HRESET__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__UINTR__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__USERMSR__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__CRC32__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__PRFCHI__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__RAOINT__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__CMPCCXADD__) #include #endif #if defined(__i386__) #define __SAVE_GPRBX "mov {%%ebx, %%eax |eax, ebx};" #define __RESTORE_GPRBX "mov {%%eax, %%ebx |ebx, eax};" #define __TMPGPR "eax" #else // When in 64-bit target, the 32-bit operands generate a 32-bit result, // zero-extended to a 64-bit result in the destination general-purpose, // It means "mov x %ebx" will clobber the higher 32 bits of rbx, so we // should preserve the 64-bit register rbx. #define __SAVE_GPRBX "mov {%%rbx, %%rax |rax, rbx};" #define __RESTORE_GPRBX "mov {%%rax, %%rbx |rbx, rax};" #define __TMPGPR "rax" #endif #define __SSC_MARK(__Tag) \ __asm__ __volatile__( __SAVE_GPRBX \ "mov {%0, %%ebx|ebx, %0}; " \ ".byte 0x64, 0x67, 0x90; " \ __RESTORE_GPRBX \ ::"i"(__Tag) \ : __TMPGPR ); #endif /* __X86GPRINTRIN_H */ //===-- Wrapper for C standard ctype.h declarations on the GPU ------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef __CLANG_LLVM_LIBC_WRAPPERS_CTYPE_H__ #define __CLANG_LLVM_LIBC_WRAPPERS_CTYPE_H__ #if !defined(_OPENMP) && !defined(__HIP__) && !defined(__CUDA__) #error "This file is for GPU offloading compilation only" #endif // The GNU headers like to define 'toupper' and 'tolower' redundantly. This is // necessary to prevent it from doing that and remapping our implementation. #if (defined(__NVPTX__) || defined(__AMDGPU__)) && defined(__GLIBC__) #pragma push_macro("__USE_EXTERN_INLINES") #undef __USE_EXTERN_INLINES #endif #include_next #if (defined(__NVPTX__) || defined(__AMDGPU__)) && defined(__GLIBC__) #pragma pop_macro("__USE_EXTERN_INLINES") #endif #if __has_include() #if defined(__HIP__) || defined(__CUDA__) #define __LIBC_ATTRS __attribute__((device)) #endif // The GNU headers like to provide these as macros, we need to undefine them so // they do not conflict with the following definitions for the GPU. #pragma push_macro("isalnum") #pragma push_macro("isalpha") #pragma push_macro("isascii") #pragma push_macro("isblank") #pragma push_macro("iscntrl") #pragma push_macro("isdigit") #pragma push_macro("isgraph") #pragma push_macro("islower") #pragma push_macro("isprint") #pragma push_macro("ispunct") #pragma push_macro("isspace") #pragma push_macro("isupper") #pragma push_macro("isxdigit") #pragma push_macro("toascii") #pragma push_macro("tolower") #pragma push_macro("toupper") #undef isalnum #undef isalpha #undef isascii #undef iscntrl #undef isdigit #undef islower #undef isgraph #undef isprint #undef ispunct #undef isspace #undef isupper #undef isblank #undef isxdigit #undef toascii #undef tolower #undef toupper #pragma omp begin declare target #include #pragma omp end declare target // Restore the original macros when compiling on the host. #if !defined(__NVPTX__) && !defined(__AMDGPU__) #pragma pop_macro("isalnum") #pragma pop_macro("isalpha") #pragma pop_macro("isascii") #pragma pop_macro("isblank") #pragma pop_macro("iscntrl") #pragma pop_macro("isdigit") #pragma pop_macro("isgraph") #pragma pop_macro("islower") #pragma pop_macro("isprint") #pragma pop_macro("ispunct") #pragma pop_macro("isspace") #pragma pop_macro("isupper") #pragma pop_macro("isxdigit") #pragma pop_macro("toascii") #pragma pop_macro("tolower") #pragma pop_macro("toupper") #endif #undef __LIBC_ATTRS #endif #endif // __CLANG_LLVM_LIBC_WRAPPERS_CTYPE_H__ //===-- Wrapper for C standard stdlib.h declarations on the GPU -----------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef __CLANG_LLVM_LIBC_WRAPPERS_STDLIB_H__ #define __CLANG_LLVM_LIBC_WRAPPERS_STDLIB_H__ #if !defined(_OPENMP) && !defined(__HIP__) && !defined(__CUDA__) #error "This file is for GPU offloading compilation only" #endif #include_next #if __has_include() #if defined(__HIP__) || defined(__CUDA__) #define __LIBC_ATTRS __attribute__((device)) #endif #pragma omp begin declare target // The LLVM C library uses these named types so we forward declare them. typedef void (*__atexithandler_t)(void); typedef int (*__bsearchcompare_t)(const void *, const void *); typedef int (*__qsortcompare_t)(const void *, const void *); typedef int (*__qsortrcompare_t)(const void *, const void *, void *); // Enforce ABI compatibility with the structs used by the LLVM C library. _Static_assert(__builtin_offsetof(div_t, quot) == 0, "ABI mismatch!"); _Static_assert(__builtin_offsetof(ldiv_t, quot) == 0, "ABI mismatch!"); _Static_assert(__builtin_offsetof(lldiv_t, quot) == 0, "ABI mismatch!"); #include #pragma omp end declare target #undef __LIBC_ATTRS #endif #endif // __CLANG_LLVM_LIBC_WRAPPERS_STDLIB_H__ /*===---- immintrin.h - Implementation of Intel intrinsics on PowerPC ------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef IMMINTRIN_H_ #define IMMINTRIN_H_ #include #include #include #include #include #include #include #endif /* IMMINTRIN_H_ */ //===-- allocator_interface.h ---------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // Public interface header for allocator used in sanitizers (ASan/TSan/MSan). //===----------------------------------------------------------------------===// #ifndef SANITIZER_ALLOCATOR_INTERFACE_H #define SANITIZER_ALLOCATOR_INTERFACE_H #include #include #ifdef __cplusplus extern "C" { #endif /* Returns the estimated number of bytes that will be reserved by allocator for request of "size" bytes. If allocator can't allocate that much memory, returns the maximal possible allocation size, otherwise returns "size". */ size_t SANITIZER_CDECL __sanitizer_get_estimated_allocated_size(size_t size); /* Returns true if p was returned by the allocator and is not yet freed. */ int SANITIZER_CDECL __sanitizer_get_ownership(const volatile void *p); /* If a pointer lies within an allocation, it will return the start address of the allocation. Otherwise, it returns nullptr. */ const void *SANITIZER_CDECL __sanitizer_get_allocated_begin(const void *p); /* Returns the number of bytes reserved for the pointer p. Requires (get_ownership(p) == true) or (p == 0). */ size_t SANITIZER_CDECL __sanitizer_get_allocated_size(const volatile void *p); /* Returns the number of bytes reserved for the pointer p. Requires __sanitizer_get_allocated_begin(p) == p. */ size_t SANITIZER_CDECL __sanitizer_get_allocated_size_fast(const volatile void *p); /* Number of bytes, allocated and not yet freed by the application. */ size_t SANITIZER_CDECL __sanitizer_get_current_allocated_bytes(void); /* Number of bytes, mmaped by the allocator to fulfill allocation requests. Generally, for request of X bytes, allocator can reserve and add to free lists a large number of chunks of size X to use them for future requests. All these chunks count toward the heap size. Currently, allocator never releases memory to OS (instead, it just puts freed chunks to free lists). */ size_t SANITIZER_CDECL __sanitizer_get_heap_size(void); /* Number of bytes, mmaped by the allocator, which can be used to fulfill allocation requests. When a user program frees memory chunk, it can first fall into quarantine and will count toward __sanitizer_get_free_bytes() later. */ size_t SANITIZER_CDECL __sanitizer_get_free_bytes(void); /* Number of bytes in unmapped pages, that are released to OS. Currently, always returns 0. */ size_t SANITIZER_CDECL __sanitizer_get_unmapped_bytes(void); /* Malloc hooks that may be optionally provided by user. __sanitizer_malloc_hook(ptr, size) is called immediately after allocation of "size" bytes, which returned "ptr". __sanitizer_free_hook(ptr) is called immediately before deallocation of "ptr". */ void SANITIZER_CDECL __sanitizer_malloc_hook(const volatile void *ptr, size_t size); void SANITIZER_CDECL __sanitizer_free_hook(const volatile void *ptr); /* Installs a pair of hooks for malloc/free. Several (currently, 5) hook pairs may be installed, they are executed in the order they were installed and after calling __sanitizer_malloc_hook/__sanitizer_free_hook. Unlike __sanitizer_malloc_hook/__sanitizer_free_hook these hooks can be chained and do not rely on weak symbols working on the platform, but require __sanitizer_install_malloc_and_free_hooks to be called at startup and thus will not be called on malloc/free very early in the process. Returns the number of hooks currently installed or 0 on failure. Not thread-safe, should be called in the main thread before starting other threads. */ int SANITIZER_CDECL __sanitizer_install_malloc_and_free_hooks( void(SANITIZER_CDECL *malloc_hook)(const volatile void *, size_t), void(SANITIZER_CDECL *free_hook)(const volatile void *)); /* Drains allocator quarantines (calling thread's and global ones), returns freed memory back to OS and releases other non-essential internal allocator resources in attempt to reduce process RSS. Currently available with ASan only. */ void SANITIZER_CDECL __sanitizer_purge_allocator(void); #ifdef __cplusplus } // extern "C" #endif #endif Invalid RE2: \s[:space:]%d. %s PfSoTai_VietCorrupt /proc/self/maps line: %spthread_sigmask failed: %d{lambda()#_GLOBAL__N_shortunsigned __int128DeDiDsnars</proc/self/auxvoffset out of rangeRESOURCE_EXHAUSTEDDATA_LOSSCheck n <= size() failed: edge != nullptrLogic problem? szsrc = %zuinfdetected illegal recursion into Mutex codeUnlockSlow is confusedenqueue_after->skip == nullptr || MuEquivalentWaiter(enqueue_after, s)external/abseil-cpp/absl/time/internal/get_current_time_posix.incexternal/boringssl/src/crypto/fipsmodule/digest/digest.cP-224external/boringssl/src/crypto/fipsmodule/rsa/padding.cNIST P-256ECDSA-sign KAT failed. OPENSSL_internalexternal/boringssl/src/crypto/evp/p_dsa_asn1.cexternal/boringssl/src/crypto/evp/p_x25519.ckythe.proto.FileInfo.pathkythe.proto.common.Diagnostic.detailskythe.proto.VName.path[^-].*\.(c|i)assemblergo-XanalyzerError writing filedata: builtinObjCSystem__clang_cuda_math.h/*===--------- amxcomplexintrin.h - AMXCOMPLEX intrinsics -*- C++ -*---------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===------------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif // __IMMINTRIN_H #ifndef __AMX_COMPLEXINTRIN_H #define __AMX_COMPLEXINTRIN_H #ifdef __x86_64__ #define __DEFAULT_FN_ATTRS_COMPLEX \ __attribute__((__always_inline__, __nodebug__, __target__("amx-complex"))) /// Perform matrix multiplication of two tiles containing complex elements and /// accumulate the results into a packed single precision tile. Each dword /// element in input tiles \a a and \a b is interpreted as a complex number /// with FP16 real part and FP16 imaginary part. /// Calculates the imaginary part of the result. For each possible combination /// of (row of \a a, column of \a b), it performs a set of multiplication /// and accumulations on all corresponding complex numbers (one from \a a /// and one from \a b). The imaginary part of the \a a element is multiplied /// with the real part of the corresponding \a b element, and the real part /// of the \a a element is multiplied with the imaginary part of the /// corresponding \a b elements. The two accumulated results are added, and /// then accumulated into the corresponding row and column of \a dst. /// /// \headerfile /// /// \code /// void _tile_cmmimfp16ps(__tile dst, __tile a, __tile b); /// \endcode /// /// \code{.operation} /// FOR m := 0 TO dst.rows - 1 /// tmp := dst.row[m] /// FOR k := 0 TO (a.colsb / 4) - 1 /// FOR n := 0 TO (dst.colsb / 4) - 1 /// tmp.fp32[n] += FP32(a.row[m].fp16[2*k+0]) * FP32(b.row[k].fp16[2*n+1]) /// tmp.fp32[n] += FP32(a.row[m].fp16[2*k+1]) * FP32(b.row[k].fp16[2*n+0]) /// ENDFOR /// ENDFOR /// write_row_and_zero(dst, m, tmp, dst.colsb) /// ENDFOR /// zero_upper_rows(dst, dst.rows) /// zero_tileconfig_start() /// \endcode /// /// This intrinsic corresponds to the \c TCMMIMFP16PS instruction. /// /// \param dst /// The destination tile. Max size is 1024 Bytes. /// \param a /// The 1st source tile. Max size is 1024 Bytes. /// \param b /// The 2nd source tile. Max size is 1024 Bytes. #define _tile_cmmimfp16ps(dst, a, b) __builtin_ia32_tcmmimfp16ps(dst, a, b) /// Perform matrix multiplication of two tiles containing complex elements and /// accumulate the results into a packed single precision tile. Each dword /// element in input tiles \a a and \a b is interpreted as a complex number /// with FP16 real part and FP16 imaginary part. /// Calculates the real part of the result. For each possible combination /// of (row of \a a, column of \a b), it performs a set of multiplication /// and accumulations on all corresponding complex numbers (one from \a a /// and one from \a b). The real part of the \a a element is multiplied /// with the real part of the corresponding \a b element, and the negated /// imaginary part of the \a a element is multiplied with the imaginary /// part of the corresponding \a b elements. The two accumulated results /// are added, and then accumulated into the corresponding row and column /// of \a dst. /// /// \headerfile /// /// \code /// void _tile_cmmrlfp16ps(__tile dst, __tile a, __tile b); /// \endcode /// /// \code{.operation} /// FOR m := 0 TO dst.rows - 1 /// tmp := dst.row[m] /// FOR k := 0 TO (a.colsb / 4) - 1 /// FOR n := 0 TO (dst.colsb / 4) - 1 /// tmp.fp32[n] += FP32(a.row[m].fp16[2*k+0]) * FP32(b.row[k].fp16[2*n+0]) /// tmp.fp32[n] += FP32(-a.row[m].fp16[2*k+1]) * FP32(b.row[k].fp16[2*n+1]) /// ENDFOR /// ENDFOR /// write_row_and_zero(dst, m, tmp, dst.colsb) /// ENDFOR /// zero_upper_rows(dst, dst.rows) /// zero_tileconfig_start() /// \endcode /// /// This intrinsic corresponds to the \c TCMMIMFP16PS instruction. /// /// \param dst /// The destination tile. Max size is 1024 Bytes. /// \param a /// The 1st source tile. Max size is 1024 Bytes. /// \param b /// The 2nd source tile. Max size is 1024 Bytes. #define _tile_cmmrlfp16ps(dst, a, b) __builtin_ia32_tcmmrlfp16ps(dst, a, b) static __inline__ _tile1024i __DEFAULT_FN_ATTRS_COMPLEX _tile_cmmimfp16ps_internal(unsigned short m, unsigned short n, unsigned short k, _tile1024i dst, _tile1024i src1, _tile1024i src2) { return __builtin_ia32_tcmmimfp16ps_internal(m, n, k, dst, src1, src2); } static __inline__ _tile1024i __DEFAULT_FN_ATTRS_COMPLEX _tile_cmmrlfp16ps_internal(unsigned short m, unsigned short n, unsigned short k, _tile1024i dst, _tile1024i src1, _tile1024i src2) { return __builtin_ia32_tcmmrlfp16ps_internal(m, n, k, dst, src1, src2); } /// Perform matrix multiplication of two tiles containing complex elements and /// accumulate the results into a packed single precision tile. Each dword /// element in input tiles src0 and src1 is interpreted as a complex number with /// FP16 real part and FP16 imaginary part. /// This function calculates the imaginary part of the result. /// /// \headerfile /// /// This intrinsic corresponds to the TCMMIMFP16PS instruction. /// /// \param dst /// The destination tile. Max size is 1024 Bytes. /// \param src0 /// The 1st source tile. Max size is 1024 Bytes. /// \param src1 /// The 2nd source tile. Max size is 1024 Bytes. __DEFAULT_FN_ATTRS_COMPLEX static void __tile_cmmimfp16ps(__tile1024i *dst, __tile1024i src0, __tile1024i src1) { dst->tile = _tile_cmmimfp16ps_internal(src0.row, src1.col, src0.col, dst->tile, src0.tile, src1.tile); } /// Perform matrix multiplication of two tiles containing complex elements and /// accumulate the results into a packed single precision tile. Each dword /// element in input tiles src0 and src1 is interpreted as a complex number with /// FP16 real part and FP16 imaginary part. /// This function calculates the real part of the result. /// /// \headerfile /// /// This intrinsic corresponds to the TCMMRLFP16PS instruction. /// /// \param dst /// The destination tile. Max size is 1024 Bytes. /// \param src0 /// The 1st source tile. Max size is 1024 Bytes. /// \param src1 /// The 2nd source tile. Max size is 1024 Bytes. __DEFAULT_FN_ATTRS_COMPLEX static void __tile_cmmrlfp16ps(__tile1024i *dst, __tile1024i src0, __tile1024i src1) { dst->tile = _tile_cmmrlfp16ps_internal(src0.row, src1.col, src0.col, dst->tile, src0.tile, src1.tile); } #endif // __x86_64__ #endif // __AMX_COMPLEXINTRIN_H /*===------------- avx512vlvnniintrin.h - VNNI intrinsics ------------------=== * * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __AVX512VLVNNIINTRIN_H #define __AVX512VLVNNIINTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS128 \ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512vl,avx512vnni,no-evex512"), \ __min_vector_width__(128))) #define __DEFAULT_FN_ATTRS256 \ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512vl,avx512vnni,no-evex512"), \ __min_vector_width__(256))) /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a A with /// corresponding signed 8-bit integers in \a B, producing 4 intermediate signed /// 16-bit results. Sum these 4 results with the corresponding 32-bit integer /// in \a S, and store the packed 32-bit results in DST. /// /// This intrinsic corresponds to the VPDPBUSD instructions. /// /// \code{.operation} /// FOR j := 0 to 7 /// tmp1.word := Signed(ZeroExtend16(A.byte[4*j]) * SignExtend16(B.byte[4*j])) /// tmp2.word := Signed(ZeroExtend16(A.byte[4*j+1]) * SignExtend16(B.byte[4*j+1])) /// tmp3.word := Signed(ZeroExtend16(A.byte[4*j+2]) * SignExtend16(B.byte[4*j+2])) /// tmp4.word := Signed(ZeroExtend16(A.byte[4*j+3]) * SignExtend16(B.byte[4*j+3])) /// DST.dword[j] := S.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 /// ENDFOR /// DST[MAX:256] := 0 /// \endcode #define _mm256_dpbusd_epi32(S, A, B) \ ((__m256i)__builtin_ia32_vpdpbusd256((__v8si)(S), (__v8si)(A), (__v8si)(B))) /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a A with /// corresponding signed 8-bit integers in \a B, producing 4 intermediate signed /// 16-bit results. Sum these 4 results with the corresponding 32-bit integer /// in \a S using signed saturation, and store the packed 32-bit results in DST. /// /// This intrinsic corresponds to the VPDPBUSDS instructions. /// /// \code{.operation} /// FOR j := 0 to 7 /// tmp1.word := Signed(ZeroExtend16(A.byte[4*j]) * SignExtend16(B.byte[4*j])) /// tmp2.word := Signed(ZeroExtend16(A.byte[4*j+1]) * SignExtend16(B.byte[4*j+1])) /// tmp3.word := Signed(ZeroExtend16(A.byte[4*j+2]) * SignExtend16(B.byte[4*j+2])) /// tmp4.word := Signed(ZeroExtend16(A.byte[4*j+3]) * SignExtend16(B.byte[4*j+3])) /// DST.dword[j] := Saturate32(S.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) /// ENDFOR /// DST[MAX:256] := 0 /// \endcode #define _mm256_dpbusds_epi32(S, A, B) \ ((__m256i)__builtin_ia32_vpdpbusds256((__v8si)(S), (__v8si)(A), (__v8si)(B))) /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a A with /// corresponding 16-bit integers in \a B, producing 2 intermediate signed 32-bit /// results. Sum these 2 results with the corresponding 32-bit integer in \a S, /// and store the packed 32-bit results in DST. /// /// This intrinsic corresponds to the VPDPWSSD instructions. /// /// \code{.operation} /// FOR j := 0 to 7 /// tmp1.dword := SignExtend32(A.word[2*j]) * SignExtend32(B.word[2*j]) /// tmp2.dword := SignExtend32(A.word[2*j+1]) * SignExtend32(B.word[2*j+1]) /// DST.dword[j] := S.dword[j] + tmp1 + tmp2 /// ENDFOR /// DST[MAX:256] := 0 /// \endcode #define _mm256_dpwssd_epi32(S, A, B) \ ((__m256i)__builtin_ia32_vpdpwssd256((__v8si)(S), (__v8si)(A), (__v8si)(B))) /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a A with /// corresponding 16-bit integers in \a B, producing 2 intermediate signed 32-bit /// results. Sum these 2 results with the corresponding 32-bit integer in \a S /// using signed saturation, and store the packed 32-bit results in DST. /// /// This intrinsic corresponds to the VPDPWSSDS instructions. /// /// \code{.operation} /// FOR j := 0 to 7 /// tmp1.dword := SignExtend32(A.word[2*j]) * SignExtend32(B.word[2*j]) /// tmp2.dword := SignExtend32(A.word[2*j+1]) * SignExtend32(B.word[2*j+1]) /// DST.dword[j] := Saturate32(S.dword[j] + tmp1 + tmp2) /// ENDFOR /// DST[MAX:256] := 0 /// \endcode #define _mm256_dpwssds_epi32(S, A, B) \ ((__m256i)__builtin_ia32_vpdpwssds256((__v8si)(S), (__v8si)(A), (__v8si)(B))) /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a A with /// corresponding signed 8-bit integers in \a B, producing 4 intermediate signed /// 16-bit results. Sum these 4 results with the corresponding 32-bit integer /// in \a S, and store the packed 32-bit results in DST. /// /// This intrinsic corresponds to the VPDPBUSD instructions. /// /// \code{.operation} /// FOR j := 0 to 3 /// tmp1.word := Signed(ZeroExtend16(A.byte[4*j]) * SignExtend16(B.byte[4*j])) /// tmp2.word := Signed(ZeroExtend16(A.byte[4*j+1]) * SignExtend16(B.byte[4*j+1])) /// tmp3.word := Signed(ZeroExtend16(A.byte[4*j+2]) * SignExtend16(B.byte[4*j+2])) /// tmp4.word := Signed(ZeroExtend16(A.byte[4*j+3]) * SignExtend16(B.byte[4*j+3])) /// DST.dword[j] := S.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 /// ENDFOR /// DST[MAX:128] := 0 /// \endcode #define _mm_dpbusd_epi32(S, A, B) \ ((__m128i)__builtin_ia32_vpdpbusd128((__v4si)(S), (__v4si)(A), (__v4si)(B))) /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a A with /// corresponding signed 8-bit integers in \a B, producing 4 intermediate signed /// 16-bit results. Sum these 4 results with the corresponding 32-bit integer /// in \a S using signed saturation, and store the packed 32-bit results in DST. /// /// This intrinsic corresponds to the VPDPBUSDS instructions. /// /// \code{.operation} /// FOR j := 0 to 3 /// tmp1.word := Signed(ZeroExtend16(A.byte[4*j]) * SignExtend16(B.byte[4*j])) /// tmp2.word := Signed(ZeroExtend16(A.byte[4*j+1]) * SignExtend16(B.byte[4*j+1])) /// tmp3.word := Signed(ZeroExtend16(A.byte[4*j+2]) * SignExtend16(B.byte[4*j+2])) /// tmp4.word := Signed(ZeroExtend16(A.byte[4*j+3]) * SignExtend16(B.byte[4*j+3])) /// DST.dword[j] := Saturate32(S.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) /// ENDFOR /// DST[MAX:128] := 0 /// \endcode #define _mm_dpbusds_epi32(S, A, B) \ ((__m128i)__builtin_ia32_vpdpbusds128((__v4si)(S), (__v4si)(A), (__v4si)(B))) /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a A with /// corresponding 16-bit integers in \a B, producing 2 intermediate signed 32-bit /// results. Sum these 2 results with the corresponding 32-bit integer in \a S, /// and store the packed 32-bit results in DST. /// /// This intrinsic corresponds to the VPDPWSSD instructions. /// /// \code{.operation} /// FOR j := 0 to 3 /// tmp1.dword := SignExtend32(A.word[2*j]) * SignExtend32(B.word[2*j]) /// tmp2.dword := SignExtend32(A.word[2*j+1]) * SignExtend32(B.word[2*j+1]) /// DST.dword[j] := S.dword[j] + tmp1 + tmp2 /// ENDFOR /// DST[MAX:128] := 0 /// \endcode #define _mm_dpwssd_epi32(S, A, B) \ ((__m128i)__builtin_ia32_vpdpwssd128((__v4si)(S), (__v4si)(A), (__v4si)(B))) /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a A with /// corresponding 16-bit integers in \a B, producing 2 intermediate signed 32-bit /// results. Sum these 2 results with the corresponding 32-bit integer in \a S /// using signed saturation, and store the packed 32-bit results in DST. /// /// This intrinsic corresponds to the VPDPWSSDS instructions. /// /// \code{.operation} /// FOR j := 0 to 3 /// tmp1.dword := SignExtend32(A.word[2*j]) * SignExtend32(B.word[2*j]) /// tmp2.dword := SignExtend32(A.word[2*j+1]) * SignExtend32(B.word[2*j+1]) /// DST.dword[j] := Saturate32(S.dword[j] + tmp1 + tmp2) /// ENDFOR /// DST[MAX:128] := 0 /// \endcode #define _mm_dpwssds_epi32(S, A, B) \ ((__m128i)__builtin_ia32_vpdpwssds128((__v4si)(S), (__v4si)(A), (__v4si)(B))) static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_dpbusd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256(__U, (__v8si)_mm256_dpbusd_epi32(__S, __A, __B), (__v8si)__S); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_dpbusd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256(__U, (__v8si)_mm256_dpbusd_epi32(__S, __A, __B), (__v8si)_mm256_setzero_si256()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_dpbusds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256(__U, (__v8si)_mm256_dpbusds_epi32(__S, __A, __B), (__v8si)__S); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_dpbusds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256(__U, (__v8si)_mm256_dpbusds_epi32(__S, __A, __B), (__v8si)_mm256_setzero_si256()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_dpwssd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256(__U, (__v8si)_mm256_dpwssd_epi32(__S, __A, __B), (__v8si)__S); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_dpwssd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256(__U, (__v8si)_mm256_dpwssd_epi32(__S, __A, __B), (__v8si)_mm256_setzero_si256()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_dpwssds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256(__U, (__v8si)_mm256_dpwssds_epi32(__S, __A, __B), (__v8si)__S); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_dpwssds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256(__U, (__v8si)_mm256_dpwssds_epi32(__S, __A, __B), (__v8si)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_dpbusd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128(__U, (__v4si)_mm_dpbusd_epi32(__S, __A, __B), (__v4si)__S); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_dpbusd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128(__U, (__v4si)_mm_dpbusd_epi32(__S, __A, __B), (__v4si)_mm_setzero_si128()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_dpbusds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128(__U, (__v4si)_mm_dpbusds_epi32(__S, __A, __B), (__v4si)__S); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_dpbusds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128(__U, (__v4si)_mm_dpbusds_epi32(__S, __A, __B), (__v4si)_mm_setzero_si128()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_dpwssd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128(__U, (__v4si)_mm_dpwssd_epi32(__S, __A, __B), (__v4si)__S); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_dpwssd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128(__U, (__v4si)_mm_dpwssd_epi32(__S, __A, __B), (__v4si)_mm_setzero_si128()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_dpwssds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128(__U, (__v4si)_mm_dpwssds_epi32(__S, __A, __B), (__v4si)__S); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_dpwssds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128(__U, (__v4si)_mm_dpwssds_epi32(__S, __A, __B), (__v4si)_mm_setzero_si128()); } #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 #endif avx512vlvp2intersectintrin.h/*===----------- avxvnniint16intrin.h - AVXVNNIINT16 intrinsics-------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error \ "Never use directly; include instead." #endif // __IMMINTRIN_H #ifndef __AVXVNNIINT16INTRIN_H #define __AVXVNNIINT16INTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS128 \ __attribute__((__always_inline__, __nodebug__, __target__("avxvnniint16"), \ __min_vector_width__(128))) #define __DEFAULT_FN_ATTRS256 \ __attribute__((__always_inline__, __nodebug__, __target__("avxvnniint16"), \ __min_vector_width__(256))) /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a __A with /// corresponding unsigned 16-bit integers in \a __B, producing 2 intermediate /// signed 16-bit results. Sum these 2 results with the corresponding /// 32-bit integer in \a __W, and store the packed 32-bit results in \a dst. /// /// \headerfile /// /// \code /// __m128i _mm_dpwsud_epi32(__m128i __W, __m128i __A, __m128i __B) /// \endcode /// /// This intrinsic corresponds to the \c VPDPWSUD instruction. /// /// \param __W /// A 128-bit vector of [4 x int]. /// \param __A /// A 128-bit vector of [8 x short]. /// \param __B /// A 128-bit vector of [8 x unsigned short]. /// \returns /// A 128-bit vector of [4 x int]. /// /// \code{.operation} /// FOR j := 0 to 3 /// tmp1.dword := SignExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j]) /// tmp2.dword := SignExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1]) /// dst.dword[j] := __W.dword[j] + tmp1 + tmp2 /// ENDFOR /// dst[MAX:128] := 0 /// \endcode static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpwsud_epi32(__m128i __W, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vpdpwsud128((__v4si)__W, (__v4si)__A, (__v4si)__B); } /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a __A with /// corresponding unsigned 16-bit integers in \a __B, producing 2 intermediate /// signed 16-bit results. Sum these 2 results with the corresponding /// 32-bit integer in \a __W, and store the packed 32-bit results in \a dst. /// /// \headerfile /// /// \code /// __m256i _mm256_dpwsud_epi32(__m256i __W, __m256i __A, __m256i __B) /// \endcode /// /// This intrinsic corresponds to the \c VPDPWSUD instruction. /// /// \param __W /// A 256-bit vector of [8 x int]. /// \param __A /// A 256-bit vector of [16 x short]. /// \param __B /// A 256-bit vector of [16 x unsigned short]. /// \returns /// A 256-bit vector of [8 x int]. /// /// \code{.operation} /// FOR j := 0 to 7 /// tmp1.dword := SignExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j]) /// tmp2.dword := SignExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1]) /// dst.dword[j] := __W.dword[j] + tmp1 + tmp2 /// ENDFOR /// dst[MAX:256] := 0 /// \endcode static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_dpwsud_epi32(__m256i __W, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_vpdpwsud256((__v8si)__W, (__v8si)__A, (__v8si)__B); } /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a __A with /// corresponding unsigned 16-bit integers in \a __B, producing 2 intermediate /// signed 16-bit results. Sum these 2 results with the corresponding /// 32-bit integer in \a __W with signed saturation, and store the packed /// 32-bit results in \a dst. /// /// \headerfile /// /// \code /// __m128i _mm_dpwsuds_epi32(__m128i __W, __m128i __A, __m128i __B) /// \endcode /// /// This intrinsic corresponds to the \c VPDPWSUDS instruction. /// /// \param __W /// A 128-bit vector of [4 x int]. /// \param __A /// A 128-bit vector of [8 x short]. /// \param __B /// A 128-bit vector of [8 x unsigned short]. /// \returns /// A 128-bit vector of [4 x int]. /// /// \code{.operation} /// FOR j := 0 to 3 /// tmp1.dword := SignExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j]) /// tmp2.dword := SignExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1]) /// dst.dword[j] := SIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2) /// ENDFOR /// dst[MAX:128] := 0 /// \endcode static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpwsuds_epi32(__m128i __W, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vpdpwsuds128((__v4si)__W, (__v4si)__A, (__v4si)__B); } /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a __A with /// corresponding unsigned 16-bit integers in \a __B, producing 2 intermediate /// signed 16-bit results. Sum these 2 results with the corresponding /// 32-bit integer in \a __W with signed saturation, and store the packed /// 32-bit results in \a dst. /// /// \headerfile /// /// \code /// __m256i _mm256_dpwsuds_epi32(__m256i __W, __m256i __A, __m256i __B) /// \endcode /// /// This intrinsic corresponds to the \c VPDPWSUDS instruction. /// /// \param __W /// A 256-bit vector of [8 x int]. /// \param __A /// A 256-bit vector of [16 x short]. /// \param __B /// A 256-bit vector of [16 x unsigned short]. /// \returns /// A 256-bit vector of [8 x int]. /// /// \code{.operation} /// FOR j := 0 to 7 /// tmp1.dword := SignExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j]) /// tmp2.dword := SignExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1]) /// dst.dword[j] := SIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2) /// ENDFOR /// dst[MAX:256] := 0 /// \endcode static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_dpwsuds_epi32(__m256i __W, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_vpdpwsuds256((__v8si)__W, (__v8si)__A, (__v8si)__B); } /// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A with /// corresponding signed 16-bit integers in \a __B, producing 2 intermediate /// signed 16-bit results. Sum these 2 results with the corresponding /// 32-bit integer in \a __W, and store the packed 32-bit results in \a dst. /// /// \headerfile /// /// \code /// __m128i _mm_dpbusd_epi32(__m128i __W, __m128i __A, __m128i __B) /// \endcode /// /// This intrinsic corresponds to the \c VPDPWUSD instruction. /// /// \param __W /// A 128-bit vector of [4 x int]. /// \param __A /// A 128-bit vector of [8 x unsigned short]. /// \param __B /// A 128-bit vector of [8 x short]. /// \returns /// A 128-bit vector of [4 x int]. /// /// \code{.operation} /// FOR j := 0 to 3 /// tmp1.dword := ZeroExtend32(__A.word[2*j]) * SignExtend32(__B.word[2*j]) /// tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1]) /// dst.dword[j] := __W.dword[j] + tmp1 + tmp2 /// ENDFOR /// dst[MAX:128] := 0 /// \endcode static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpwusd_epi32(__m128i __W, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vpdpwusd128((__v4si)__W, (__v4si)__A, (__v4si)__B); } /// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A with /// corresponding signed 16-bit integers in \a __B, producing 2 intermediate /// signed 16-bit results. Sum these 2 results with the corresponding /// 32-bit integer in \a __W, and store the packed 32-bit results in \a dst. /// /// \headerfile /// /// \code /// __m256i _mm256_dpwusd_epi32(__m256i __W, __m256i __A, __m256i __B) /// \endcode /// /// This intrinsic corresponds to the \c VPDPWUSD instruction. /// /// \param __W /// A 256-bit vector of [8 x int]. /// \param __A /// A 256-bit vector of [16 x unsigned short]. /// \param __B /// A 256-bit vector of [16 x short]. /// \returns /// A 256-bit vector of [8 x int]. /// /// \code{.operation} /// FOR j := 0 to 7 /// tmp1.dword := ZeroExtend32(__A.word[2*j]) * SignExtend32(__B.word[2*j]) /// tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1]) /// dst.dword[j] := __W.dword[j] + tmp1 + tmp2 /// ENDFOR /// dst[MAX:256] := 0 /// \endcode static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_dpwusd_epi32(__m256i __W, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_vpdpwusd256((__v8si)__W, (__v8si)__A, (__v8si)__B); } /// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A with /// corresponding signed 16-bit integers in \a __B, producing 2 intermediate /// signed 16-bit results. Sum these 2 results with the corresponding /// 32-bit integer in \a __W with signed saturation, and store the packed /// 32-bit results in \a dst. /// /// \headerfile /// /// \code /// __m128i _mm_dpwusds_epi32(__m128i __W, __m128i __A, __m128i __B) /// \endcode /// /// This intrinsic corresponds to the \c VPDPWSUDS instruction. /// /// \param __W /// A 128-bit vector of [4 x int]. /// \param __A /// A 128-bit vector of [8 x unsigned short]. /// \param __B /// A 128-bit vector of [8 x short]. /// \returns /// A 128-bit vector of [4 x int]. /// /// \code{.operation} /// FOR j := 0 to 3 /// tmp1.dword := ZeroExtend32(__A.word[2*j]) * SignExtend32(__B.word[2*j]) /// tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1]) /// dst.dword[j] := SIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2) /// ENDFOR /// dst[MAX:128] := 0 /// \endcode static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpwusds_epi32(__m128i __W, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vpdpwusds128((__v4si)__W, (__v4si)__A, (__v4si)__B); } /// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A with /// corresponding signed 16-bit integers in \a __B, producing 2 intermediate /// signed 16-bit results. Sum these 2 results with the corresponding /// 32-bit integer in \a __W with signed saturation, and store the packed /// 32-bit results in \a dst. /// /// \headerfile /// /// \code /// __m256i _mm256_dpwsuds_epi32(__m256i __W, __m256i __A, __m256i __B) /// \endcode /// /// This intrinsic corresponds to the \c VPDPWSUDS instruction. /// /// \param __W /// A 256-bit vector of [8 x int]. /// \param __A /// A 256-bit vector of [16 x unsigned short]. /// \param __B /// A 256-bit vector of [16 x short]. /// \returns /// A 256-bit vector of [8 x int]. /// /// \code{.operation} /// FOR j := 0 to 7 /// tmp1.dword := ZeroExtend32(__A.word[2*j]) * SignExtend32(__B.word[2*j]) /// tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1]) /// dst.dword[j] := SIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2) /// ENDFOR /// dst[MAX:256] := 0 /// \endcode static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_dpwusds_epi32(__m256i __W, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_vpdpwusds256((__v8si)__W, (__v8si)__A, (__v8si)__B); } /// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A with /// corresponding unsigned 16-bit integers in \a __B, producing 2 intermediate /// signed 16-bit results. Sum these 2 results with the corresponding /// 32-bit integer in \a __W, and store the packed 32-bit results in \a dst. /// /// \headerfile /// /// \code /// __m128i _mm_dpwuud_epi32(__m128i __W, __m128i __A, __m128i __B) /// \endcode /// /// This intrinsic corresponds to the \c VPDPWUUD instruction. /// /// \param __W /// A 128-bit vector of [4 x unsigned int]. /// \param __A /// A 128-bit vector of [8 x unsigned short]. /// \param __B /// A 128-bit vector of [8 x unsigned short]. /// \returns /// A 128-bit vector of [4 x unsigned int]. /// /// \code{.operation} /// FOR j := 0 to 3 /// tmp1.dword := ZeroExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j]) /// tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1]) /// dst.dword[j] := __W.dword[j] + tmp1 + tmp2 /// ENDFOR /// dst[MAX:128] := 0 /// \endcode static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpwuud_epi32(__m128i __W, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vpdpwuud128((__v4si)__W, (__v4si)__A, (__v4si)__B); } /// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A with /// corresponding unsigned 16-bit integers in \a __B, producing 2 intermediate /// signed 16-bit results. Sum these 2 results with the corresponding /// 32-bit integer in \a __W, and store the packed 32-bit results in \a dst. /// /// \headerfile /// /// \code /// __m256i _mm256_dpwuud_epi32(__m256i __W, __m256i __A, __m256i __B) /// \endcode /// /// This intrinsic corresponds to the \c VPDPWUUD instruction. /// /// \param __W /// A 256-bit vector of [8 x unsigned int]. /// \param __A /// A 256-bit vector of [16 x unsigned short]. /// \param __B /// A 256-bit vector of [16 x unsigned short]. /// \returns /// A 256-bit vector of [8 x unsigned int]. /// /// \code{.operation} /// FOR j := 0 to 7 /// tmp1.dword := ZeroExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j]) /// tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1]) /// dst.dword[j] := __W.dword[j] + tmp1 + tmp2 /// ENDFOR /// dst[MAX:256] := 0 /// \endcode static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_dpwuud_epi32(__m256i __W, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_vpdpwuud256((__v8si)__W, (__v8si)__A, (__v8si)__B); } /// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A with /// corresponding unsigned 16-bit integers in \a __B, producing 2 intermediate /// signed 16-bit results. Sum these 2 results with the corresponding /// 32-bit integer in \a __W with signed saturation, and store the packed /// 32-bit results in \a dst. /// /// \headerfile /// /// \code /// __m128i _mm_dpwsuds_epi32(__m128i __W, __m128i __A, __m128i __B) /// \endcode /// /// This intrinsic corresponds to the \c VPDPWSUDS instruction. /// /// \param __W /// A 128-bit vector of [4 x unsigned int]. /// \param __A /// A 128-bit vector of [8 x unsigned short]. /// \param __B /// A 128-bit vector of [8 x unsigned short]. /// \returns /// A 128-bit vector of [4 x unsigned int]. /// /// \code{.operation} /// FOR j := 0 to 3 /// tmp1.dword := ZeroExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j]) /// tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1]) /// dst.dword[j] := UNSIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2) /// ENDFOR /// dst[MAX:128] := 0 /// \endcode static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpwuuds_epi32(__m128i __W, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vpdpwuuds128((__v4si)__W, (__v4si)__A, (__v4si)__B); } /// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A with /// corresponding unsigned 16-bit integers in \a __B, producing 2 intermediate /// signed 16-bit results. Sum these 2 results with the corresponding /// 32-bit integer in \a __W with signed saturation, and store the packed /// 32-bit results in \a dst. /// /// \headerfile /// /// \code /// __m256i _mm256_dpwuuds_epi32(__m256i __W, __m256i __A, __m256i __B) /// \endcode /// /// This intrinsic corresponds to the \c VPDPWSUDS instruction. /// /// \param __W /// A 256-bit vector of [8 x unsigned int]. /// \param __A /// A 256-bit vector of [16 x unsigned short]. /// \param __B /// A 256-bit vector of [16 x unsigned short]. /// \returns /// A 256-bit vector of [8 x unsigned int]. /// /// \code{.operation} /// FOR j := 0 to 7 /// tmp1.dword := ZeroExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j]) /// tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1]) /// dst.dword[j] := UNSIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2) /// ENDFOR /// dst[MAX:256] := 0 /// \endcode static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_dpwuuds_epi32(__m256i __W, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_vpdpwuuds256((__v8si)__W, (__v8si)__A, (__v8si)__B); } #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 #endif // __AVXVNNIINT16INTRIN_H /*===----------------------- clzerointrin.h - CLZERO ----------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __X86INTRIN_H #error "Never use directly; include instead." #endif #ifndef __CLZEROINTRIN_H #define __CLZEROINTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("clzero"))) /// Zeroes out the cache line for the address \a __line. This uses a /// non-temporal store. Calling \c _mm_sfence() afterward might be needed /// to enforce ordering. /// /// \headerfile /// /// This intrinsic corresponds to the \c CLZERO instruction. /// /// \param __line /// An address within the cache line to zero out. static __inline__ void __DEFAULT_FN_ATTRS _mm_clzero (void * __line) { __builtin_ia32_clzero ((void *)__line); } #undef __DEFAULT_FN_ATTRS #endif /* __CLZEROINTRIN_H */ /*===------------ larchintrin.h - LoongArch intrinsics ---------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef _LOONGARCH_BASE_INTRIN_H #define _LOONGARCH_BASE_INTRIN_H #ifdef __cplusplus extern "C" { #endif typedef struct rdtime { unsigned int value; unsigned int timeid; } __rdtime_t; #if __loongarch_grlen == 64 typedef struct drdtime { unsigned long dvalue; unsigned long dtimeid; } __drdtime_t; extern __inline __drdtime_t __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __rdtime_d(void) { __drdtime_t __drdtime; __asm__ volatile( "rdtime.d %[val], %[tid]\n\t" : [val] "=&r"(__drdtime.dvalue), [tid] "=&r"(__drdtime.dtimeid)); return __drdtime; } #endif extern __inline __rdtime_t __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __rdtimeh_w(void) { __rdtime_t __rdtime; __asm__ volatile("rdtimeh.w %[val], %[tid]\n\t" : [val] "=&r"(__rdtime.value), [tid] "=&r"(__rdtime.timeid)); return __rdtime; } extern __inline __rdtime_t __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __rdtimel_w(void) { __rdtime_t __rdtime; __asm__ volatile("rdtimel.w %[val], %[tid]\n\t" : [val] "=&r"(__rdtime.value), [tid] "=&r"(__rdtime.timeid)); return __rdtime; } #if __loongarch_grlen == 64 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __crc_w_b_w(char _1, int _2) { return (int)__builtin_loongarch_crc_w_b_w((char)_1, (int)_2); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __crc_w_h_w(short _1, int _2) { return (int)__builtin_loongarch_crc_w_h_w((short)_1, (int)_2); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __crc_w_w_w(int _1, int _2) { return (int)__builtin_loongarch_crc_w_w_w((int)_1, (int)_2); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __crc_w_d_w(long int _1, int _2) { return (int)__builtin_loongarch_crc_w_d_w((long int)_1, (int)_2); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __crcc_w_b_w(char _1, int _2) { return (int)__builtin_loongarch_crcc_w_b_w((char)_1, (int)_2); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __crcc_w_h_w(short _1, int _2) { return (int)__builtin_loongarch_crcc_w_h_w((short)_1, (int)_2); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __crcc_w_w_w(int _1, int _2) { return (int)__builtin_loongarch_crcc_w_w_w((int)_1, (int)_2); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __crcc_w_d_w(long int _1, int _2) { return (int)__builtin_loongarch_crcc_w_d_w((long int)_1, (int)_2); } #endif #define __break(/*ui15*/ _1) __builtin_loongarch_break((_1)) #if __loongarch_grlen == 32 #define __cacop_w(/*uimm5*/ _1, /*unsigned int*/ _2, /*simm12*/ _3) \ ((void)__builtin_loongarch_cacop_w((_1), (unsigned int)(_2), (_3))) #endif #if __loongarch_grlen == 64 #define __cacop_d(/*uimm5*/ _1, /*unsigned long int*/ _2, /*simm12*/ _3) \ ((void)__builtin_loongarch_cacop_d((_1), (unsigned long int)(_2), (_3))) #endif #define __dbar(/*ui15*/ _1) __builtin_loongarch_dbar((_1)) #define __ibar(/*ui15*/ _1) __builtin_loongarch_ibar((_1)) #define __movfcsr2gr(/*ui5*/ _1) __builtin_loongarch_movfcsr2gr((_1)); #define __movgr2fcsr(/*ui5*/ _1, _2) \ __builtin_loongarch_movgr2fcsr((_1), (unsigned int)_2); #define __syscall(/*ui15*/ _1) __builtin_loongarch_syscall((_1)) #define __csrrd_w(/*ui14*/ _1) ((unsigned int)__builtin_loongarch_csrrd_w((_1))) #define __csrwr_w(/*unsigned int*/ _1, /*ui14*/ _2) \ ((unsigned int)__builtin_loongarch_csrwr_w((unsigned int)(_1), (_2))) #define __csrxchg_w(/*unsigned int*/ _1, /*unsigned int*/ _2, /*ui14*/ _3) \ ((unsigned int)__builtin_loongarch_csrxchg_w((unsigned int)(_1), \ (unsigned int)(_2), (_3))) #if __loongarch_grlen == 64 #define __csrrd_d(/*ui14*/ _1) \ ((unsigned long int)__builtin_loongarch_csrrd_d((_1))) #define __csrwr_d(/*unsigned long int*/ _1, /*ui14*/ _2) \ ((unsigned long int)__builtin_loongarch_csrwr_d((unsigned long int)(_1), \ (_2))) #define __csrxchg_d(/*unsigned long int*/ _1, /*unsigned long int*/ _2, \ /*ui14*/ _3) \ ((unsigned long int)__builtin_loongarch_csrxchg_d( \ (unsigned long int)(_1), (unsigned long int)(_2), (_3))) #endif extern __inline unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __iocsrrd_b(unsigned int _1) { return (unsigned char)__builtin_loongarch_iocsrrd_b((unsigned int)_1); } extern __inline unsigned short __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __iocsrrd_h(unsigned int _1) { return (unsigned short)__builtin_loongarch_iocsrrd_h((unsigned int)_1); } extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __iocsrrd_w(unsigned int _1) { return (unsigned int)__builtin_loongarch_iocsrrd_w((unsigned int)_1); } #if __loongarch_grlen == 64 extern __inline unsigned long int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __iocsrrd_d(unsigned int _1) { return (unsigned long int)__builtin_loongarch_iocsrrd_d((unsigned int)_1); } #endif extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __iocsrwr_b(unsigned char _1, unsigned int _2) { __builtin_loongarch_iocsrwr_b((unsigned char)_1, (unsigned int)_2); } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __iocsrwr_h(unsigned short _1, unsigned int _2) { __builtin_loongarch_iocsrwr_h((unsigned short)_1, (unsigned int)_2); } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __iocsrwr_w(unsigned int _1, unsigned int _2) { __builtin_loongarch_iocsrwr_w((unsigned int)_1, (unsigned int)_2); } extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __cpucfg(unsigned int _1) { return (unsigned int)__builtin_loongarch_cpucfg((unsigned int)_1); } #if __loongarch_grlen == 64 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __iocsrwr_d(unsigned long int _1, unsigned int _2) { __builtin_loongarch_iocsrwr_d((unsigned long int)_1, (unsigned int)_2); } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __asrtgt_d(long int _1, long int _2) { __builtin_loongarch_asrtgt_d((long int)_1, (long int)_2); } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __asrtle_d(long int _1, long int _2) { __builtin_loongarch_asrtle_d((long int)_1, (long int)_2); } #endif #if __loongarch_grlen == 64 #define __lddir_d(/*long int*/ _1, /*ui5*/ _2) \ ((long int)__builtin_loongarch_lddir_d((long int)(_1), (_2))) #define __ldpte_d(/*long int*/ _1, /*ui5*/ _2) \ ((void)__builtin_loongarch_ldpte_d((long int)(_1), (_2))) #endif #define __frecipe_s(/*float*/ _1) \ (float)__builtin_loongarch_frecipe_s((float)_1) #define __frecipe_d(/*double*/ _1) \ (double)__builtin_loongarch_frecipe_d((double)_1) #define __frsqrte_s(/*float*/ _1) \ (float)__builtin_loongarch_frsqrte_s((float)_1) #define __frsqrte_d(/*double*/ _1) \ (double)__builtin_loongarch_frsqrte_d((double)_1) #ifdef __cplusplus } #endif #endif /* _LOONGARCH_BASE_INTRIN_H */ /*===---- msa.h - MIPS MSA intrinsics --------------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef _MSA_H #define _MSA_H 1 #if defined(__mips_msa) typedef signed char v16i8 __attribute__((vector_size(16), aligned(16))); typedef signed char v16i8_b __attribute__((vector_size(16), aligned(1))); typedef unsigned char v16u8 __attribute__((vector_size(16), aligned(16))); typedef unsigned char v16u8_b __attribute__((vector_size(16), aligned(1))); typedef short v8i16 __attribute__((vector_size(16), aligned(16))); typedef short v8i16_h __attribute__((vector_size(16), aligned(2))); typedef unsigned short v8u16 __attribute__((vector_size(16), aligned(16))); typedef unsigned short v8u16_h __attribute__((vector_size(16), aligned(2))); typedef int v4i32 __attribute__((vector_size(16), aligned(16))); typedef int v4i32_w __attribute__((vector_size(16), aligned(4))); typedef unsigned int v4u32 __attribute__((vector_size(16), aligned(16))); typedef unsigned int v4u32_w __attribute__((vector_size(16), aligned(4))); typedef long long v2i64 __attribute__((vector_size(16), aligned(16))); typedef long long v2i64_d __attribute__((vector_size(16), aligned(8))); typedef unsigned long long v2u64 __attribute__((vector_size(16), aligned(16))); typedef unsigned long long v2u64_d __attribute__((vector_size(16), aligned(8))); typedef float v4f32 __attribute__((vector_size(16), aligned(16))); typedef float v4f32_w __attribute__((vector_size(16), aligned(4))); typedef double v2f64 __attribute__ ((vector_size(16), aligned(16))); typedef double v2f64_d __attribute__ ((vector_size(16), aligned(8))); #define __msa_sll_b __builtin_msa_sll_b #define __msa_sll_h __builtin_msa_sll_h #define __msa_sll_w __builtin_msa_sll_w #define __msa_sll_d __builtin_msa_sll_d #define __msa_slli_b __builtin_msa_slli_b #define __msa_slli_h __builtin_msa_slli_h #define __msa_slli_w __builtin_msa_slli_w #define __msa_slli_d __builtin_msa_slli_d #define __msa_sra_b __builtin_msa_sra_b #define __msa_sra_h __builtin_msa_sra_h #define __msa_sra_w __builtin_msa_sra_w #define __msa_sra_d __builtin_msa_sra_d #define __msa_srai_b __builtin_msa_srai_b #define __msa_srai_h __builtin_msa_srai_h #define __msa_srai_w __builtin_msa_srai_w #define __msa_srai_d __builtin_msa_srai_d #define __msa_srar_b __builtin_msa_srar_b #define __msa_srar_h __builtin_msa_srar_h #define __msa_srar_w __builtin_msa_srar_w #define __msa_srar_d __builtin_msa_srar_d #define __msa_srari_b __builtin_msa_srari_b #define __msa_srari_h __builtin_msa_srari_h #define __msa_srari_w __builtin_msa_srari_w #define __msa_srari_d __builtin_msa_srari_d #define __msa_srl_b __builtin_msa_srl_b #define __msa_srl_h __builtin_msa_srl_h #define __msa_srl_w __builtin_msa_srl_w #define __msa_srl_d __builtin_msa_srl_d #define __msa_srli_b __builtin_msa_srli_b #define __msa_srli_h __builtin_msa_srli_h #define __msa_srli_w __builtin_msa_srli_w #define __msa_srli_d __builtin_msa_srli_d #define __msa_srlr_b __builtin_msa_srlr_b #define __msa_srlr_h __builtin_msa_srlr_h #define __msa_srlr_w __builtin_msa_srlr_w #define __msa_srlr_d __builtin_msa_srlr_d #define __msa_srlri_b __builtin_msa_srlri_b #define __msa_srlri_h __builtin_msa_srlri_h #define __msa_srlri_w __builtin_msa_srlri_w #define __msa_srlri_d __builtin_msa_srlri_d #define __msa_bclr_b __builtin_msa_bclr_b #define __msa_bclr_h __builtin_msa_bclr_h #define __msa_bclr_w __builtin_msa_bclr_w #define __msa_bclr_d __builtin_msa_bclr_d #define __msa_bclri_b __builtin_msa_bclri_b #define __msa_bclri_h __builtin_msa_bclri_h #define __msa_bclri_w __builtin_msa_bclri_w #define __msa_bclri_d __builtin_msa_bclri_d #define __msa_bset_b __builtin_msa_bset_b #define __msa_bset_h __builtin_msa_bset_h #define __msa_bset_w __builtin_msa_bset_w #define __msa_bset_d __builtin_msa_bset_d #define __msa_bseti_b __builtin_msa_bseti_b #define __msa_bseti_h __builtin_msa_bseti_h #define __msa_bseti_w __builtin_msa_bseti_w #define __msa_bseti_d __builtin_msa_bseti_d #define __msa_bneg_b __builtin_msa_bneg_b #define __msa_bneg_h __builtin_msa_bneg_h #define __msa_bneg_w __builtin_msa_bneg_w #define __msa_bneg_d __builtin_msa_bneg_d #define __msa_bnegi_b __builtin_msa_bnegi_b #define __msa_bnegi_h __builtin_msa_bnegi_h #define __msa_bnegi_w __builtin_msa_bnegi_w #define __msa_bnegi_d __builtin_msa_bnegi_d #define __msa_binsl_b __builtin_msa_binsl_b #define __msa_binsl_h __builtin_msa_binsl_h #define __msa_binsl_w __builtin_msa_binsl_w #define __msa_binsl_d __builtin_msa_binsl_d #define __msa_binsli_b __builtin_msa_binsli_b #define __msa_binsli_h __builtin_msa_binsli_h #define __msa_binsli_w __builtin_msa_binsli_w #define __msa_binsli_d __builtin_msa_binsli_d #define __msa_binsr_b __builtin_msa_binsr_b #define __msa_binsr_h __builtin_msa_binsr_h #define __msa_binsr_w __builtin_msa_binsr_w #define __msa_binsr_d __builtin_msa_binsr_d #define __msa_binsri_b __builtin_msa_binsri_b #define __msa_binsri_h __builtin_msa_binsri_h #define __msa_binsri_w __builtin_msa_binsri_w #define __msa_binsri_d __builtin_msa_binsri_d #define __msa_addv_b __builtin_msa_addv_b #define __msa_addv_h __builtin_msa_addv_h #define __msa_addv_w __builtin_msa_addv_w #define __msa_addv_d __builtin_msa_addv_d #define __msa_addvi_b __builtin_msa_addvi_b #define __msa_addvi_h __builtin_msa_addvi_h #define __msa_addvi_w __builtin_msa_addvi_w #define __msa_addvi_d __builtin_msa_addvi_d #define __msa_subv_b __builtin_msa_subv_b #define __msa_subv_h __builtin_msa_subv_h #define __msa_subv_w __builtin_msa_subv_w #define __msa_subv_d __builtin_msa_subv_d #define __msa_subvi_b __builtin_msa_subvi_b #define __msa_subvi_h __builtin_msa_subvi_h #define __msa_subvi_w __builtin_msa_subvi_w #define __msa_subvi_d __builtin_msa_subvi_d #define __msa_max_s_b __builtin_msa_max_s_b #define __msa_max_s_h __builtin_msa_max_s_h #define __msa_max_s_w __builtin_msa_max_s_w #define __msa_max_s_d __builtin_msa_max_s_d #define __msa_maxi_s_b __builtin_msa_maxi_s_b #define __msa_maxi_s_h __builtin_msa_maxi_s_h #define __msa_maxi_s_w __builtin_msa_maxi_s_w #define __msa_maxi_s_d __builtin_msa_maxi_s_d #define __msa_max_u_b __builtin_msa_max_u_b #define __msa_max_u_h __builtin_msa_max_u_h #define __msa_max_u_w __builtin_msa_max_u_w #define __msa_max_u_d __builtin_msa_max_u_d #define __msa_maxi_u_b __builtin_msa_maxi_u_b #define __msa_maxi_u_h __builtin_msa_maxi_u_h #define __msa_maxi_u_w __builtin_msa_maxi_u_w #define __msa_maxi_u_d __builtin_msa_maxi_u_d #define __msa_min_s_b __builtin_msa_min_s_b #define __msa_min_s_h __builtin_msa_min_s_h #define __msa_min_s_w __builtin_msa_min_s_w #define __msa_min_s_d __builtin_msa_min_s_d #define __msa_mini_s_b __builtin_msa_mini_s_b #define __msa_mini_s_h __builtin_msa_mini_s_h #define __msa_mini_s_w __builtin_msa_mini_s_w #define __msa_mini_s_d __builtin_msa_mini_s_d #define __msa_min_u_b __builtin_msa_min_u_b #define __msa_min_u_h __builtin_msa_min_u_h #define __msa_min_u_w __builtin_msa_min_u_w #define __msa_min_u_d __builtin_msa_min_u_d #define __msa_mini_u_b __builtin_msa_mini_u_b #define __msa_mini_u_h __builtin_msa_mini_u_h #define __msa_mini_u_w __builtin_msa_mini_u_w #define __msa_mini_u_d __builtin_msa_mini_u_d #define __msa_max_a_b __builtin_msa_max_a_b #define __msa_max_a_h __builtin_msa_max_a_h #define __msa_max_a_w __builtin_msa_max_a_w #define __msa_max_a_d __builtin_msa_max_a_d #define __msa_min_a_b __builtin_msa_min_a_b #define __msa_min_a_h __builtin_msa_min_a_h #define __msa_min_a_w __builtin_msa_min_a_w #define __msa_min_a_d __builtin_msa_min_a_d #define __msa_ceq_b __builtin_msa_ceq_b #define __msa_ceq_h __builtin_msa_ceq_h #define __msa_ceq_w __builtin_msa_ceq_w #define __msa_ceq_d __builtin_msa_ceq_d #define __msa_ceqi_b __builtin_msa_ceqi_b #define __msa_ceqi_h __builtin_msa_ceqi_h #define __msa_ceqi_w __builtin_msa_ceqi_w #define __msa_ceqi_d __builtin_msa_ceqi_d #define __msa_clt_s_b __builtin_msa_clt_s_b #define __msa_clt_s_h __builtin_msa_clt_s_h #define __msa_clt_s_w __builtin_msa_clt_s_w #define __msa_clt_s_d __builtin_msa_clt_s_d #define __msa_clti_s_b __builtin_msa_clti_s_b #define __msa_clti_s_h __builtin_msa_clti_s_h #define __msa_clti_s_w __builtin_msa_clti_s_w #define __msa_clti_s_d __builtin_msa_clti_s_d #define __msa_clt_u_b __builtin_msa_clt_u_b #define __msa_clt_u_h __builtin_msa_clt_u_h #define __msa_clt_u_w __builtin_msa_clt_u_w #define __msa_clt_u_d __builtin_msa_clt_u_d #define __msa_clti_u_b __builtin_msa_clti_u_b #define __msa_clti_u_h __builtin_msa_clti_u_h #define __msa_clti_u_w __builtin_msa_clti_u_w #define __msa_clti_u_d __builtin_msa_clti_u_d #define __msa_cle_s_b __builtin_msa_cle_s_b #define __msa_cle_s_h __builtin_msa_cle_s_h #define __msa_cle_s_w __builtin_msa_cle_s_w #define __msa_cle_s_d __builtin_msa_cle_s_d #define __msa_clei_s_b __builtin_msa_clei_s_b #define __msa_clei_s_h __builtin_msa_clei_s_h #define __msa_clei_s_w __builtin_msa_clei_s_w #define __msa_clei_s_d __builtin_msa_clei_s_d #define __msa_cle_u_b __builtin_msa_cle_u_b #define __msa_cle_u_h __builtin_msa_cle_u_h #define __msa_cle_u_w __builtin_msa_cle_u_w #define __msa_cle_u_d __builtin_msa_cle_u_d #define __msa_clei_u_b __builtin_msa_clei_u_b #define __msa_clei_u_h __builtin_msa_clei_u_h #define __msa_clei_u_w __builtin_msa_clei_u_w #define __msa_clei_u_d __builtin_msa_clei_u_d #define __msa_ld_b __builtin_msa_ld_b #define __msa_ld_h __builtin_msa_ld_h #define __msa_ld_w __builtin_msa_ld_w #define __msa_ld_d __builtin_msa_ld_d #define __msa_ldr_d __builtin_msa_ldr_d #define __msa_ldr_w __builtin_msa_ldrq_w #define __msa_st_b __builtin_msa_st_b #define __msa_st_h __builtin_msa_st_h #define __msa_st_w __builtin_msa_st_w #define __msa_st_d __builtin_msa_st_d #define __msa_str_d __builtin_msa_str_d #define __msa_str_w __builtin_msa_strq_w #define __msa_sat_s_b __builtin_msa_sat_s_b #define __msa_sat_s_h __builtin_msa_sat_s_h #define __msa_sat_s_w __builtin_msa_sat_s_w #define __msa_sat_s_d __builtin_msa_sat_s_d #define __msa_sat_u_b __builtin_msa_sat_u_b #define __msa_sat_u_h __builtin_msa_sat_u_h #define __msa_sat_u_w __builtin_msa_sat_u_w #define __msa_sat_u_d __builtin_msa_sat_u_d #define __msa_add_a_b __builtin_msa_add_a_b #define __msa_add_a_h __builtin_msa_add_a_h #define __msa_add_a_w __builtin_msa_add_a_w #define __msa_add_a_d __builtin_msa_add_a_d #define __msa_adds_a_b __builtin_msa_adds_a_b #define __msa_adds_a_h __builtin_msa_adds_a_h #define __msa_adds_a_w __builtin_msa_adds_a_w #define __msa_adds_a_d __builtin_msa_adds_a_d #define __msa_adds_s_b __builtin_msa_adds_s_b #define __msa_adds_s_h __builtin_msa_adds_s_h #define __msa_adds_s_w __builtin_msa_adds_s_w #define __msa_adds_s_d __builtin_msa_adds_s_d #define __msa_adds_u_b __builtin_msa_adds_u_b #define __msa_adds_u_h __builtin_msa_adds_u_h #define __msa_adds_u_w __builtin_msa_adds_u_w #define __msa_adds_u_d __builtin_msa_adds_u_d #define __msa_ave_s_b __builtin_msa_ave_s_b #define __msa_ave_s_h __builtin_msa_ave_s_h #define __msa_ave_s_w __builtin_msa_ave_s_w #define __msa_ave_s_d __builtin_msa_ave_s_d #define __msa_ave_u_b __builtin_msa_ave_u_b #define __msa_ave_u_h __builtin_msa_ave_u_h #define __msa_ave_u_w __builtin_msa_ave_u_w #define __msa_ave_u_d __builtin_msa_ave_u_d #define __msa_aver_s_b __builtin_msa_aver_s_b #define __msa_aver_s_h __builtin_msa_aver_s_h #define __msa_aver_s_w __builtin_msa_aver_s_w #define __msa_aver_s_d __builtin_msa_aver_s_d #define __msa_aver_u_b __builtin_msa_aver_u_b #define __msa_aver_u_h __builtin_msa_aver_u_h #define __msa_aver_u_w __builtin_msa_aver_u_w #define __msa_aver_u_d __builtin_msa_aver_u_d #define __msa_subs_s_b __builtin_msa_subs_s_b #define __msa_subs_s_h __builtin_msa_subs_s_h #define __msa_subs_s_w __builtin_msa_subs_s_w #define __msa_subs_s_d __builtin_msa_subs_s_d #define __msa_subs_u_b __builtin_msa_subs_u_b #define __msa_subs_u_h __builtin_msa_subs_u_h #define __msa_subs_u_w __builtin_msa_subs_u_w #define __msa_subs_u_d __builtin_msa_subs_u_d #define __msa_subsuu_s_b __builtin_msa_subsuu_s_b #define __msa_subsuu_s_h __builtin_msa_subsuu_s_h #define __msa_subsuu_s_w __builtin_msa_subsuu_s_w #define __msa_subsuu_s_d __builtin_msa_subsuu_s_d #define __msa_subsus_u_b __builtin_msa_subsus_u_b #define __msa_subsus_u_h __builtin_msa_subsus_u_h #define __msa_subsus_u_w __builtin_msa_subsus_u_w #define __msa_subsus_u_d __builtin_msa_subsus_u_d #define __msa_asub_s_b __builtin_msa_asub_s_b #define __msa_asub_s_h __builtin_msa_asub_s_h #define __msa_asub_s_w __builtin_msa_asub_s_w #define __msa_asub_s_d __builtin_msa_asub_s_d #define __msa_asub_u_b __builtin_msa_asub_u_b #define __msa_asub_u_h __builtin_msa_asub_u_h #define __msa_asub_u_w __builtin_msa_asub_u_w #define __msa_asub_u_d __builtin_msa_asub_u_d #define __msa_mulv_b __builtin_msa_mulv_b #define __msa_mulv_h __builtin_msa_mulv_h #define __msa_mulv_w __builtin_msa_mulv_w #define __msa_mulv_d __builtin_msa_mulv_d #define __msa_maddv_b __builtin_msa_maddv_b #define __msa_maddv_h __builtin_msa_maddv_h #define __msa_maddv_w __builtin_msa_maddv_w #define __msa_maddv_d __builtin_msa_maddv_d #define __msa_msubv_b __builtin_msa_msubv_b #define __msa_msubv_h __builtin_msa_msubv_h #define __msa_msubv_w __builtin_msa_msubv_w #define __msa_msubv_d __builtin_msa_msubv_d #define __msa_div_s_b __builtin_msa_div_s_b #define __msa_div_s_h __builtin_msa_div_s_h #define __msa_div_s_w __builtin_msa_div_s_w #define __msa_div_s_d __builtin_msa_div_s_d #define __msa_div_u_b __builtin_msa_div_u_b #define __msa_div_u_h __builtin_msa_div_u_h #define __msa_div_u_w __builtin_msa_div_u_w #define __msa_div_u_d __builtin_msa_div_u_d #define __msa_hadd_s_h __builtin_msa_hadd_s_h #define __msa_hadd_s_w __builtin_msa_hadd_s_w #define __msa_hadd_s_d __builtin_msa_hadd_s_d #define __msa_hadd_u_h __builtin_msa_hadd_u_h #define __msa_hadd_u_w __builtin_msa_hadd_u_w #define __msa_hadd_u_d __builtin_msa_hadd_u_d #define __msa_hsub_s_h __builtin_msa_hsub_s_h #define __msa_hsub_s_w __builtin_msa_hsub_s_w #define __msa_hsub_s_d __builtin_msa_hsub_s_d #define __msa_hsub_u_h __builtin_msa_hsub_u_h #define __msa_hsub_u_w __builtin_msa_hsub_u_w #define __msa_hsub_u_d __builtin_msa_hsub_u_d #define __msa_mod_s_b __builtin_msa_mod_s_b #define __msa_mod_s_h __builtin_msa_mod_s_h #define __msa_mod_s_w __builtin_msa_mod_s_w #define __msa_mod_s_d __builtin_msa_mod_s_d #define __msa_mod_u_b __builtin_msa_mod_u_b #define __msa_mod_u_h __builtin_msa_mod_u_h #define __msa_mod_u_w __builtin_msa_mod_u_w #define __msa_mod_u_d __builtin_msa_mod_u_d #define __msa_dotp_s_h __builtin_msa_dotp_s_h #define __msa_dotp_s_w __builtin_msa_dotp_s_w #define __msa_dotp_s_d __builtin_msa_dotp_s_d #define __msa_dotp_u_h __builtin_msa_dotp_u_h #define __msa_dotp_u_w __builtin_msa_dotp_u_w #define __msa_dotp_u_d __builtin_msa_dotp_u_d #define __msa_dpadd_s_h __builtin_msa_dpadd_s_h #define __msa_dpadd_s_w __builtin_msa_dpadd_s_w #define __msa_dpadd_s_d __builtin_msa_dpadd_s_d #define __msa_dpadd_u_h __builtin_msa_dpadd_u_h #define __msa_dpadd_u_w __builtin_msa_dpadd_u_w #define __msa_dpadd_u_d __builtin_msa_dpadd_u_d #define __msa_dpsub_s_h __builtin_msa_dpsub_s_h #define __msa_dpsub_s_w __builtin_msa_dpsub_s_w #define __msa_dpsub_s_d __builtin_msa_dpsub_s_d #define __msa_dpsub_u_h __builtin_msa_dpsub_u_h #define __msa_dpsub_u_w __builtin_msa_dpsub_u_w #define __msa_dpsub_u_d __builtin_msa_dpsub_u_d #define __msa_sld_b __builtin_msa_sld_b #define __msa_sld_h __builtin_msa_sld_h #define __msa_sld_w __builtin_msa_sld_w #define __msa_sld_d __builtin_msa_sld_d #define __msa_sldi_b __builtin_msa_sldi_b #define __msa_sldi_h __builtin_msa_sldi_h #define __msa_sldi_w __builtin_msa_sldi_w #define __msa_sldi_d __builtin_msa_sldi_d #define __msa_splat_b __builtin_msa_splat_b #define __msa_splat_h __builtin_msa_splat_h #define __msa_splat_w __builtin_msa_splat_w #define __msa_splat_d __builtin_msa_splat_d #define __msa_splati_b __builtin_msa_splati_b #define __msa_splati_h __builtin_msa_splati_h #define __msa_splati_w __builtin_msa_splati_w #define __msa_splati_d __builtin_msa_splati_d #define __msa_pckev_b __builtin_msa_pckev_b #define __msa_pckev_h __builtin_msa_pckev_h #define __msa_pckev_w __builtin_msa_pckev_w #define __msa_pckev_d __builtin_msa_pckev_d #define __msa_pckod_b __builtin_msa_pckod_b #define __msa_pckod_h __builtin_msa_pckod_h #define __msa_pckod_w __builtin_msa_pckod_w #define __msa_pckod_d __builtin_msa_pckod_d #define __msa_ilvl_b __builtin_msa_ilvl_b #define __msa_ilvl_h __builtin_msa_ilvl_h #define __msa_ilvl_w __builtin_msa_ilvl_w #define __msa_ilvl_d __builtin_msa_ilvl_d #define __msa_ilvr_b __builtin_msa_ilvr_b #define __msa_ilvr_h __builtin_msa_ilvr_h #define __msa_ilvr_w __builtin_msa_ilvr_w #define __msa_ilvr_d __builtin_msa_ilvr_d #define __msa_ilvev_b __builtin_msa_ilvev_b #define __msa_ilvev_h __builtin_msa_ilvev_h #define __msa_ilvev_w __builtin_msa_ilvev_w #define __msa_ilvev_d __builtin_msa_ilvev_d #define __msa_ilvod_b __builtin_msa_ilvod_b #define __msa_ilvod_h __builtin_msa_ilvod_h #define __msa_ilvod_w __builtin_msa_ilvod_w #define __msa_ilvod_d __builtin_msa_ilvod_d #define __msa_vshf_b __builtin_msa_vshf_b #define __msa_vshf_h __builtin_msa_vshf_h #define __msa_vshf_w __builtin_msa_vshf_w #define __msa_vshf_d __builtin_msa_vshf_d #define __msa_and_v __builtin_msa_and_v #define __msa_andi_b __builtin_msa_andi_b #define __msa_or_v __builtin_msa_or_v #define __msa_ori_b __builtin_msa_ori_b #define __msa_nor_v __builtin_msa_nor_v #define __msa_nori_b __builtin_msa_nori_b #define __msa_xor_v __builtin_msa_xor_v #define __msa_xori_b __builtin_msa_xori_b #define __msa_bmnz_v __builtin_msa_bmnz_v #define __msa_bmnzi_b __builtin_msa_bmnzi_b #define __msa_bmz_v __builtin_msa_bmz_v #define __msa_bmzi_b __builtin_msa_bmzi_b #define __msa_bsel_v __builtin_msa_bsel_v #define __msa_bseli_b __builtin_msa_bseli_b #define __msa_shf_b __builtin_msa_shf_b #define __msa_shf_h __builtin_msa_shf_h #define __msa_shf_w __builtin_msa_shf_w #define __msa_test_bnz_v __builtin_msa_bnz_v #define __msa_test_bz_v __builtin_msa_bz_v #define __msa_fill_b __builtin_msa_fill_b #define __msa_fill_h __builtin_msa_fill_h #define __msa_fill_w __builtin_msa_fill_w #define __msa_fill_d __builtin_msa_fill_d #define __msa_pcnt_b __builtin_msa_pcnt_b #define __msa_pcnt_h __builtin_msa_pcnt_h #define __msa_pcnt_w __builtin_msa_pcnt_w #define __msa_pcnt_d __builtin_msa_pcnt_d #define __msa_nloc_b __builtin_msa_nloc_b #define __msa_nloc_h __builtin_msa_nloc_h #define __msa_nloc_w __builtin_msa_nloc_w #define __msa_nloc_d __builtin_msa_nloc_d #define __msa_nlzc_b __builtin_msa_nlzc_b #define __msa_nlzc_h __builtin_msa_nlzc_h #define __msa_nlzc_w __builtin_msa_nlzc_w #define __msa_nlzc_d __builtin_msa_nlzc_d #define __msa_copy_s_b __builtin_msa_copy_s_b #define __msa_copy_s_h __builtin_msa_copy_s_h #define __msa_copy_s_w __builtin_msa_copy_s_w #define __msa_copy_s_d __builtin_msa_copy_s_d #define __msa_copy_u_b __builtin_msa_copy_u_b #define __msa_copy_u_h __builtin_msa_copy_u_h #define __msa_copy_u_w __builtin_msa_copy_u_w #define __msa_copy_u_d __builtin_msa_copy_u_d #define __msa_insert_b __builtin_msa_insert_b #define __msa_insert_h __builtin_msa_insert_h #define __msa_insert_w __builtin_msa_insert_w #define __msa_insert_d __builtin_msa_insert_d #define __msa_insve_b __builtin_msa_insve_b #define __msa_insve_h __builtin_msa_insve_h #define __msa_insve_w __builtin_msa_insve_w #define __msa_insve_d __builtin_msa_insve_d #define __msa_test_bnz_b __builtin_msa_bnz_b #define __msa_test_bnz_h __builtin_msa_bnz_h #define __msa_test_bnz_w __builtin_msa_bnz_w #define __msa_test_bnz_d __builtin_msa_bnz_d #define __msa_test_bz_b __builtin_msa_bz_b #define __msa_test_bz_h __builtin_msa_bz_h #define __msa_test_bz_w __builtin_msa_bz_w #define __msa_test_bz_d __builtin_msa_bz_d #define __msa_ldi_b __builtin_msa_ldi_b #define __msa_ldi_h __builtin_msa_ldi_h #define __msa_ldi_w __builtin_msa_ldi_w #define __msa_ldi_d __builtin_msa_ldi_d #define __msa_fcaf_w __builtin_msa_fcaf_w #define __msa_fcaf_d __builtin_msa_fcaf_d #define __msa_fcor_w __builtin_msa_fcor_w #define __msa_fcor_d __builtin_msa_fcor_d #define __msa_fcun_w __builtin_msa_fcun_w #define __msa_fcun_d __builtin_msa_fcun_d #define __msa_fcune_w __builtin_msa_fcune_w #define __msa_fcune_d __builtin_msa_fcune_d #define __msa_fcueq_w __builtin_msa_fcueq_w #define __msa_fcueq_d __builtin_msa_fcueq_d #define __msa_fceq_w __builtin_msa_fceq_w #define __msa_fceq_d __builtin_msa_fceq_d #define __msa_fcne_w __builtin_msa_fcne_w #define __msa_fcne_d __builtin_msa_fcne_d #define __msa_fclt_w __builtin_msa_fclt_w #define __msa_fclt_d __builtin_msa_fclt_d #define __msa_fcult_w __builtin_msa_fcult_w #define __msa_fcult_d __builtin_msa_fcult_d #define __msa_fcle_w __builtin_msa_fcle_w #define __msa_fcle_d __builtin_msa_fcle_d #define __msa_fcule_w __builtin_msa_fcule_w #define __msa_fcule_d __builtin_msa_fcule_d #define __msa_fsaf_w __builtin_msa_fsaf_w #define __msa_fsaf_d __builtin_msa_fsaf_d #define __msa_fsor_w __builtin_msa_fsor_w #define __msa_fsor_d __builtin_msa_fsor_d #define __msa_fsun_w __builtin_msa_fsun_w #define __msa_fsun_d __builtin_msa_fsun_d #define __msa_fsune_w __builtin_msa_fsune_w #define __msa_fsune_d __builtin_msa_fsune_d #define __msa_fsueq_w __builtin_msa_fsueq_w #define __msa_fsueq_d __builtin_msa_fsueq_d #define __msa_fseq_w __builtin_msa_fseq_w #define __msa_fseq_d __builtin_msa_fseq_d #define __msa_fsne_w __builtin_msa_fsne_w #define __msa_fsne_d __builtin_msa_fsne_d #define __msa_fslt_w __builtin_msa_fslt_w #define __msa_fslt_d __builtin_msa_fslt_d #define __msa_fsult_w __builtin_msa_fsult_w #define __msa_fsult_d __builtin_msa_fsult_d #define __msa_fsle_w __builtin_msa_fsle_w #define __msa_fsle_d __builtin_msa_fsle_d #define __msa_fsule_w __builtin_msa_fsule_w #define __msa_fsule_d __builtin_msa_fsule_d #define __msa_fadd_w __builtin_msa_fadd_w #define __msa_fadd_d __builtin_msa_fadd_d #define __msa_fsub_w __builtin_msa_fsub_w #define __msa_fsub_d __builtin_msa_fsub_d #define __msa_fmul_w __builtin_msa_fmul_w #define __msa_fmul_d __builtin_msa_fmul_d #define __msa_fdiv_w __builtin_msa_fdiv_w #define __msa_fdiv_d __builtin_msa_fdiv_d #define __msa_fmadd_w __builtin_msa_fmadd_w #define __msa_fmadd_d __builtin_msa_fmadd_d #define __msa_fmsub_w __builtin_msa_fmsub_w #define __msa_fmsub_d __builtin_msa_fmsub_d #define __msa_fexp2_w __builtin_msa_fexp2_w #define __msa_fexp2_d __builtin_msa_fexp2_d #define __msa_fexdo_h __builtin_msa_fexdo_h #define __msa_fexdo_w __builtin_msa_fexdo_w #define __msa_ftq_h __builtin_msa_ftq_h #define __msa_ftq_w __builtin_msa_ftq_w #define __msa_fmin_w __builtin_msa_fmin_w #define __msa_fmin_d __builtin_msa_fmin_d #define __msa_fmin_a_w __builtin_msa_fmin_a_w #define __msa_fmin_a_d __builtin_msa_fmin_a_d #define __msa_fmax_w __builtin_msa_fmax_w #define __msa_fmax_d __builtin_msa_fmax_d #define __msa_fmax_a_w __builtin_msa_fmax_a_w #define __msa_fmax_a_d __builtin_msa_fmax_a_d #define __msa_mul_q_h __builtin_msa_mul_q_h #define __msa_mul_q_w __builtin_msa_mul_q_w #define __msa_mulr_q_h __builtin_msa_mulr_q_h #define __msa_mulr_q_w __builtin_msa_mulr_q_w #define __msa_madd_q_h __builtin_msa_madd_q_h #define __msa_madd_q_w __builtin_msa_madd_q_w #define __msa_maddr_q_h __builtin_msa_maddr_q_h #define __msa_maddr_q_w __builtin_msa_maddr_q_w #define __msa_msub_q_h __builtin_msa_msub_q_h #define __msa_msub_q_w __builtin_msa_msub_q_w #define __msa_msubr_q_h __builtin_msa_msubr_q_h #define __msa_msubr_q_w __builtin_msa_msubr_q_w #define __msa_fclass_w __builtin_msa_fclass_w #define __msa_fclass_d __builtin_msa_fclass_d #define __msa_fsqrt_w __builtin_msa_fsqrt_w #define __msa_fsqrt_d __builtin_msa_fsqrt_d #define __msa_frcp_w __builtin_msa_frcp_w #define __msa_frcp_d __builtin_msa_frcp_d #define __msa_frint_w __builtin_msa_frint_w #define __msa_frint_d __builtin_msa_frint_d #define __msa_frsqrt_w __builtin_msa_frsqrt_w #define __msa_frsqrt_d __builtin_msa_frsqrt_d #define __msa_flog2_w __builtin_msa_flog2_w #define __msa_flog2_d __builtin_msa_flog2_d #define __msa_fexupl_w __builtin_msa_fexupl_w #define __msa_fexupl_d __builtin_msa_fexupl_d #define __msa_fexupr_w __builtin_msa_fexupr_w #define __msa_fexupr_d __builtin_msa_fexupr_d #define __msa_ffql_w __builtin_msa_ffql_w #define __msa_ffql_d __builtin_msa_ffql_d #define __msa_ffqr_w __builtin_msa_ffqr_w #define __msa_ffqr_d __builtin_msa_ffqr_d #define __msa_ftint_s_w __builtin_msa_ftint_s_w #define __msa_ftint_s_d __builtin_msa_ftint_s_d #define __msa_ftint_u_w __builtin_msa_ftint_u_w #define __msa_ftint_u_d __builtin_msa_ftint_u_d #define __msa_ftrunc_s_w __builtin_msa_ftrunc_s_w #define __msa_ftrunc_s_d __builtin_msa_ftrunc_s_d #define __msa_ftrunc_u_w __builtin_msa_ftrunc_u_w #define __msa_ftrunc_u_d __builtin_msa_ftrunc_u_d #define __msa_ffint_s_w __builtin_msa_ffint_s_w #define __msa_ffint_s_d __builtin_msa_ffint_s_d #define __msa_ffint_u_w __builtin_msa_ffint_u_w #define __msa_ffint_u_d __builtin_msa_ffint_u_d #define __msa_cfcmsa __builtin_msa_cfcmsa #define __msa_move_v __builtin_msa_move_v #define __msa_cast_to_vector_float __builtin_msa_cast_to_vector_float #define __msa_cast_to_vector_double __builtin_msa_cast_to_vector_double #define __msa_cast_to_scalar_float __builtin_msa_cast_to_scalar_float #define __msa_cast_to_scalar_double __builtin_msa_cast_to_scalar_double #endif /* defined(__mips_msa) */ #endif /* _MSA_H */ /*===---- riscv_crypto.h - RISC-V Zk* intrinsics ---------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __RISCV_CRYPTO_H #define __RISCV_CRYPTO_H #include #if defined(__cplusplus) extern "C" { #endif #if defined(__riscv_zknd) #if __riscv_xlen == 32 #define __riscv_aes32dsi(x, y, bs) __builtin_riscv_aes32dsi(x, y, bs) #define __riscv_aes32dsmi(x, y, bs) __builtin_riscv_aes32dsmi(x, y, bs) #endif #if __riscv_xlen == 64 static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__)) __riscv_aes64ds(uint64_t __x, uint64_t __y) { return __builtin_riscv_aes64ds(__x, __y); } static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__)) __riscv_aes64dsm(uint64_t __x, uint64_t __y) { return __builtin_riscv_aes64dsm(__x, __y); } static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__)) __riscv_aes64im(uint64_t __x) { return __builtin_riscv_aes64im(__x); } #endif #endif // defined(__riscv_zknd) #if defined(__riscv_zkne) #if __riscv_xlen == 32 #define __riscv_aes32esi(x, y, bs) __builtin_riscv_aes32esi(x, y, bs) #define __riscv_aes32esmi(x, y, bs) __builtin_riscv_aes32esmi(x, y, bs) #endif #if __riscv_xlen == 64 static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__)) __riscv_aes64es(uint64_t __x, uint64_t __y) { return __builtin_riscv_aes64es(__x, __y); } static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__)) __riscv_aes64esm(uint64_t __x, uint64_t __y) { return __builtin_riscv_aes64esm(__x, __y); } #endif #endif // defined(__riscv_zkne) #if defined(__riscv_zknd) || defined(__riscv_zkne) #if __riscv_xlen == 64 #define __riscv_aes64ks1i(x, rnum) __builtin_riscv_aes64ks1i(x, rnum) static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__)) __riscv_aes64ks2(uint64_t __x, uint64_t __y) { return __builtin_riscv_aes64ks2(__x, __y); } #endif #endif // defined(__riscv_zknd) || defined(__riscv_zkne) #if defined(__riscv_zknh) static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) __riscv_sha256sig0(uint32_t __x) { return __builtin_riscv_sha256sig0(__x); } static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) __riscv_sha256sig1(uint32_t __x) { return __builtin_riscv_sha256sig1(__x); } static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) __riscv_sha256sum0(uint32_t __x) { return __builtin_riscv_sha256sum0(__x); } static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) __riscv_sha256sum1(uint32_t __x) { return __builtin_riscv_sha256sum1(__x); } #if __riscv_xlen == 32 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) __riscv_sha512sig0h(uint32_t __x, uint32_t __y) { return __builtin_riscv_sha512sig0h(__x, __y); } static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) __riscv_sha512sig0l(uint32_t __x, uint32_t __y) { return __builtin_riscv_sha512sig0l(__x, __y); } static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) __riscv_sha512sig1h(uint32_t __x, uint32_t __y) { return __builtin_riscv_sha512sig1h(__x, __y); } static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) __riscv_sha512sig1l(uint32_t __x, uint32_t __y) { return __builtin_riscv_sha512sig1l(__x, __y); } static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) __riscv_sha512sum0r(uint32_t __x, uint32_t __y) { return __builtin_riscv_sha512sum0r(__x, __y); } static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) __riscv_sha512sum1r(uint32_t __x, uint32_t __y) { return __builtin_riscv_sha512sum1r(__x, __y); } #endif #if __riscv_xlen == 64 static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__)) __riscv_sha512sig0(uint64_t __x) { return __builtin_riscv_sha512sig0(__x); } static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__)) __riscv_sha512sig1(uint64_t __x) { return __builtin_riscv_sha512sig1(__x); } static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__)) __riscv_sha512sum0(uint64_t __x) { return __builtin_riscv_sha512sum0(__x); } static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__)) __riscv_sha512sum1(uint64_t __x) { return __builtin_riscv_sha512sum1(__x); } #endif #endif // defined(__riscv_zknh) #if defined(__riscv_zksh) static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) __riscv_sm3p0(uint32_t __x) { return __builtin_riscv_sm3p0(__x); } static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) __riscv_sm3p1(uint32_t __x) { return __builtin_riscv_sm3p1(__x); } #endif // defined(__riscv_zksh) #if defined(__riscv_zksed) #define __riscv_sm4ed(x, y, bs) __builtin_riscv_sm4ed(x, y, bs); #define __riscv_sm4ks(x, y, bs) __builtin_riscv_sm4ks(x, y, bs); #endif // defined(__riscv_zksed) #if defined(__cplusplus) } #endif #endif stdint.htbmintrin.hvecintrin.h// CUDA headers define __noinline__ which interferes with libstdc++'s use of // `__attribute((__noinline__))`. In order to avoid compilation error, // temporarily unset __noinline__ when we include affected libstdc++ header. #pragma push_macro("__noinline__") #undef __noinline__ #include_next "bits/basic_string.h" #pragma pop_macro("__noinline__") openmp_wrappers/math.h/*===---- pmmintrin.h - Implementation of SSE3 intrinsics on PowerPC -------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ /* Implemented from the specification included in the Intel C++ Compiler User Guide and Reference, version 9.0. */ #ifndef NO_WARN_X86_INTRINSICS /* This header is distributed to simplify porting x86_64 code that makes explicit use of Intel intrinsics to powerpc64le. It is the user's responsibility to determine if the results are acceptable and make additional changes as necessary. Note that much code that uses Intel intrinsics can be rewritten in standard C or GNU C extensions, which are more portable and better optimized across multiple targets. In the specific case of X86 SSE3 intrinsics, the PowerPC VMX/VSX ISA is a good match for most SIMD operations. However the Horizontal add/sub requires the data pairs be permuted into a separate registers with vertical even/odd alignment for the operation. And the addsub operation requires the sign of only the even numbered elements be flipped (xored with -0.0). For larger blocks of code using these intrinsic implementations, the compiler be should be able to schedule instructions to avoid additional latency. In the specific case of the monitor and mwait instructions there are no direct equivalent in the PowerISA at this time. So those intrinsics are not implemented. */ #error \ "Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this warning." #endif #ifndef PMMINTRIN_H_ #define PMMINTRIN_H_ #if defined(__powerpc64__) && \ (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) /* We need definitions from the SSE2 and SSE header files*/ #include extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_addsub_ps(__m128 __X, __m128 __Y) { const __v4sf __even_n0 = {-0.0, 0.0, -0.0, 0.0}; __v4sf __even_neg_Y = vec_xor(__Y, __even_n0); return (__m128)vec_add(__X, __even_neg_Y); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_addsub_pd(__m128d __X, __m128d __Y) { const __v2df __even_n0 = {-0.0, 0.0}; __v2df __even_neg_Y = vec_xor(__Y, __even_n0); return (__m128d)vec_add(__X, __even_neg_Y); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_hadd_ps(__m128 __X, __m128 __Y) { __vector unsigned char __xform2 = {0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0A, 0x0B, 0x10, 0x11, 0x12, 0x13, 0x18, 0x19, 0x1A, 0x1B}; __vector unsigned char __xform1 = {0x04, 0x05, 0x06, 0x07, 0x0C, 0x0D, 0x0E, 0x0F, 0x14, 0x15, 0x16, 0x17, 0x1C, 0x1D, 0x1E, 0x1F}; return (__m128)vec_add(vec_perm((__v4sf)__X, (__v4sf)__Y, __xform2), vec_perm((__v4sf)__X, (__v4sf)__Y, __xform1)); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_hsub_ps(__m128 __X, __m128 __Y) { __vector unsigned char __xform2 = {0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0A, 0x0B, 0x10, 0x11, 0x12, 0x13, 0x18, 0x19, 0x1A, 0x1B}; __vector unsigned char __xform1 = {0x04, 0x05, 0x06, 0x07, 0x0C, 0x0D, 0x0E, 0x0F, 0x14, 0x15, 0x16, 0x17, 0x1C, 0x1D, 0x1E, 0x1F}; return (__m128)vec_sub(vec_perm((__v4sf)__X, (__v4sf)__Y, __xform2), vec_perm((__v4sf)__X, (__v4sf)__Y, __xform1)); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_hadd_pd(__m128d __X, __m128d __Y) { return (__m128d)vec_add(vec_mergeh((__v2df)__X, (__v2df)__Y), vec_mergel((__v2df)__X, (__v2df)__Y)); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_hsub_pd(__m128d __X, __m128d __Y) { return (__m128d)vec_sub(vec_mergeh((__v2df)__X, (__v2df)__Y), vec_mergel((__v2df)__X, (__v2df)__Y)); } #ifdef _ARCH_PWR8 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_movehdup_ps(__m128 __X) { return (__m128)vec_mergeo((__v4su)__X, (__v4su)__X); } #endif #ifdef _ARCH_PWR8 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_moveldup_ps(__m128 __X) { return (__m128)vec_mergee((__v4su)__X, (__v4su)__X); } #endif extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_loaddup_pd(double const *__P) { return (__m128d)vec_splats(*__P); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_movedup_pd(__m128d __X) { return _mm_shuffle_pd(__X, __X, _MM_SHUFFLE2(0, 0)); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_lddqu_si128(__m128i const *__P) { return (__m128i)(vec_vsx_ld(0, (signed int const *)__P)); } /* POWER8 / POWER9 have no equivalent for _mm_monitor nor _mm_wait. */ #else #include_next #endif /* defined(__powerpc64__) && \ * (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) */ #endif /* PMMINTRIN_H_ */ sanitizer/ubsan_interface.hCouldn't open input file , 1=!= near offset Missing a colon after a name of object member.contentFailure serializing compilation unitDFA out of memory: size %d+ %s external/regex-re2/re2/bitstate.ccNamedCapturesWalker::ShortVisit calledDoCoalesce failed: r2->op() is $\rBamumHatranKaithiKhojkiLMahajaniMarchenNushuOsmanya%s%s: unable to find LOAD segment for pc: %p, start_addr: %zxexternal/abseil-cpp/absl/base/internal/low_level_alloc.ccempty arena has non-page-aligned block sizee == foundreinterpret_cast(prev) + prev->header.size < reinterpret_cast(next)malformed freelistdoublecleoexternal/abseil-cpp/absl/debugging/internal/vdso_support.cc (='String cannot end with \xLOCALTIME/data/misc/zoneinfo/current/tzdataexternal/boringssl/src/crypto/fipsmodule/bn/shift.cconfiguration file routinesECDH routinesHKDF_LIBexternal/boringssl/src/crypto/bytestring/cbb.ckythe.proto.AnalysisRequest.revision type does not match c++-header-W(no-)?(error=)?coverage-mismatch|-W(no-)?(error=)?frame-larger-than.*|-W(no-)?(error=)?maybe-uninitialized|-W(no-)?(error=)?thread-safety|-W(no-)?(error=)?thread-unsupported-lock-name|-W(no-)?(error=)?unused-but-set-parameter|-W(no-)?(error=)?unused-but-set-variable|-W(no-)?(error=)?unused-local-typedefs|-enable-libstdcxx-debug|-f(no-)?align-functions.*|-f(no-)?asynchronous-unwind-tables|-f(no-)?builtin-.*|-f(no-)?callgraph-profiles-sections|-f(no-)?float-store|-f(no-)?canonical-system-headers|-f(no-)?eliminate-unused-debug-types|-f(no-)?gcse|-f(no-)?ident|-f(no-)?inline-small-functions|-f(no-)?ivopts|-f(no-)?non-call-exceptions|-f(no-)?optimize-locality|-f(no-)?permissive|-f(no-)?plugin-arg-.*|-f(no-)?plugin=.*|-f(no-)?prefetch-loop-arrays|-f(no-)?profile-correction|-f(no-)?profile-dir.*|-f(no-)?profile-generate.*|-f(no-)?profile-use.*|-f(no-)?profile-reusedist|-f(no-)?profile-values|-f(no-)?record-compilation-info-in-elf|-f(no-)?reorder-functions=.*|-f(no-)?rounding-math|-f(no-)?ripa|-f(no-)?ripa-disallow-asm-modules|-f(no-)?sanitize.*|-f(no-)?see|-f(no-)?strict-enum-precision|-f(no-)?tracer|-f(no-)?tree-.*|-f(no-)?unroll-all-loops|-f(no-)?warn-incomplete-patterns|-g(:lines,source|gdb)|-m(no-)?align-double|-m(no-)?fpmath=.*|-m(no-)?cld|-m(no-)?red-zone|--param=.*|-mcpu=.*|-mapcs-frame|-pass-exit-codesno_math_builtinAngled/*===---- cuda_builtin_vars.h - CUDA built-in variables ---------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __CUDA_BUILTIN_VARS_H #define __CUDA_BUILTIN_VARS_H // Forward declares from vector_types.h. struct uint3; struct dim3; // The file implements built-in CUDA variables using __declspec(property). // https://msdn.microsoft.com/en-us/library/yhfk0thd.aspx // All read accesses of built-in variable fields get converted into calls to a // getter function which in turn calls the appropriate builtin to fetch the // value. // // Example: // int x = threadIdx.x; // IR output: // %0 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() #3 // PTX output: // mov.u32 %r2, %tid.x; #define __CUDA_DEVICE_BUILTIN(FIELD, INTRINSIC) \ __declspec(property(get = __fetch_builtin_##FIELD)) unsigned int FIELD; \ static inline __attribute__((always_inline)) \ __attribute__((device)) unsigned int __fetch_builtin_##FIELD(void) { \ return INTRINSIC; \ } #if __cplusplus >= 201103L #define __DELETE =delete #else #define __DELETE #endif // Make sure nobody can create instances of the special variable types. nvcc // also disallows taking address of special variables, so we disable address-of // operator as well. #define __CUDA_DISALLOW_BUILTINVAR_ACCESS(TypeName) \ __attribute__((device)) TypeName() __DELETE; \ __attribute__((device)) TypeName(const TypeName &) __DELETE; \ __attribute__((device)) void operator=(const TypeName &) const __DELETE; \ __attribute__((device)) TypeName *operator&() const __DELETE struct __cuda_builtin_threadIdx_t { __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_tid_x()); __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_tid_y()); __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_tid_z()); // threadIdx should be convertible to uint3 (in fact in nvcc, it *is* a // uint3). This function is defined after we pull in vector_types.h. __attribute__((device)) operator dim3() const; __attribute__((device)) operator uint3() const; private: __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_threadIdx_t); }; struct __cuda_builtin_blockIdx_t { __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_ctaid_x()); __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_ctaid_y()); __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_ctaid_z()); // blockIdx should be convertible to uint3 (in fact in nvcc, it *is* a // uint3). This function is defined after we pull in vector_types.h. __attribute__((device)) operator dim3() const; __attribute__((device)) operator uint3() const; private: __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockIdx_t); }; struct __cuda_builtin_blockDim_t { __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_ntid_x()); __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_ntid_y()); __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_ntid_z()); // blockDim should be convertible to dim3 (in fact in nvcc, it *is* a // dim3). This function is defined after we pull in vector_types.h. __attribute__((device)) operator dim3() const; __attribute__((device)) operator uint3() const; private: __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockDim_t); }; struct __cuda_builtin_gridDim_t { __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_nctaid_x()); __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_nctaid_y()); __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_nctaid_z()); // gridDim should be convertible to dim3 (in fact in nvcc, it *is* a // dim3). This function is defined after we pull in vector_types.h. __attribute__((device)) operator dim3() const; __attribute__((device)) operator uint3() const; private: __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_gridDim_t); }; #define __CUDA_BUILTIN_VAR \ extern const __attribute__((device)) __attribute__((weak)) __CUDA_BUILTIN_VAR __cuda_builtin_threadIdx_t threadIdx; __CUDA_BUILTIN_VAR __cuda_builtin_blockIdx_t blockIdx; __CUDA_BUILTIN_VAR __cuda_builtin_blockDim_t blockDim; __CUDA_BUILTIN_VAR __cuda_builtin_gridDim_t gridDim; // warpSize should translate to read of %WARP_SZ but there's currently no // builtin to do so. According to PTX v4.2 docs 'to date, all target // architectures have a WARP_SZ value of 32'. __attribute__((device)) const int warpSize = 32; #undef __CUDA_DEVICE_BUILTIN #undef __CUDA_BUILTIN_VAR #undef __CUDA_DISALLOW_BUILTINVAR_ACCESS #undef __DELETE #endif /* __CUDA_BUILTIN_VARS_H */ /*===---- __stddef_nullptr_t.h - Definition of nullptr_t -------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ /* * When -fbuiltin-headers-in-system-modules is set this is a non-modular header * and needs to behave as if it was textual. */ #if !defined(_NULLPTR_T) || \ (__has_feature(modules) && !__building_module(_Builtin_stddef)) #define _NULLPTR_T #ifdef __cplusplus #if defined(_MSC_EXTENSIONS) && defined(_NATIVE_NULLPTR_SUPPORTED) namespace std { typedef decltype(nullptr) nullptr_t; } using ::std::nullptr_t; #endif #elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L typedef typeof(nullptr) nullptr_t; #endif #endif /*===---- avx2intrin.h - AVX2 intrinsics -----------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __AVX2INTRIN_H #define __AVX2INTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS256 \ __attribute__((__always_inline__, __nodebug__, \ __target__("avx2,no-evex512"), __min_vector_width__(256))) #define __DEFAULT_FN_ATTRS128 \ __attribute__((__always_inline__, __nodebug__, \ __target__("avx2,no-evex512"), __min_vector_width__(128))) /* SSE4 Multiple Packed Sums of Absolute Difference. */ /// Computes sixteen sum of absolute difference (SAD) operations on sets of /// four unsigned 8-bit integers from the 256-bit integer vectors \a X and /// \a Y. /// /// Eight SAD results are computed using the lower half of the input /// vectors, and another eight using the upper half. These 16-bit values /// are returned in the lower and upper halves of the 256-bit result, /// respectively. /// /// A single SAD operation selects four bytes from \a X and four bytes from /// \a Y as input. It computes the differences between each \a X byte and /// the corresponding \a Y byte, takes the absolute value of each /// difference, and sums these four values to form one 16-bit result. The /// intrinsic computes 16 of these results with different sets of input /// bytes. /// /// For each set of eight results, the SAD operations use the same four /// bytes from \a Y; the starting bit position for these four bytes is /// specified by \a M[1:0] times 32. The eight operations use successive /// sets of four bytes from \a X; the starting bit position for the first /// set of four bytes is specified by \a M[2] times 32. These bit positions /// are all relative to the 128-bit lane for each set of eight operations. /// /// \code{.operation} /// r := 0 /// FOR i := 0 TO 1 /// j := i*3 /// Ybase := M[j+1:j]*32 + i*128 /// Xbase := M[j+2]*32 + i*128 /// FOR k := 0 TO 3 /// temp0 := ABS(X[Xbase+7:Xbase] - Y[Ybase+7:Ybase]) /// temp1 := ABS(X[Xbase+15:Xbase+8] - Y[Ybase+15:Ybase+8]) /// temp2 := ABS(X[Xbase+23:Xbase+16] - Y[Ybase+23:Ybase+16]) /// temp3 := ABS(X[Xbase+31:Xbase+24] - Y[Ybase+31:Ybase+24]) /// result[r+15:r] := temp0 + temp1 + temp2 + temp3 /// Xbase := Xbase + 8 /// r := r + 16 /// ENDFOR /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m256i _mm256_mpsadbw_epu8(__m256i X, __m256i Y, const int M); /// \endcode /// /// This intrinsic corresponds to the \c VMPSADBW instruction. /// /// \param X /// A 256-bit integer vector containing one of the inputs. /// \param Y /// A 256-bit integer vector containing one of the inputs. /// \param M /// An unsigned immediate value specifying the starting positions of the /// bytes to operate on. /// \returns A 256-bit vector of [16 x i16] containing the result. #define _mm256_mpsadbw_epu8(X, Y, M) \ ((__m256i)__builtin_ia32_mpsadbw256((__v32qi)(__m256i)(X), \ (__v32qi)(__m256i)(Y), (int)(M))) /// Computes the absolute value of each signed byte in the 256-bit integer /// vector \a __a and returns each value in the corresponding byte of /// the result. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPABSB instruction. /// /// \param __a /// A 256-bit integer vector. /// \returns A 256-bit integer vector containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_abs_epi8(__m256i __a) { return (__m256i)__builtin_elementwise_abs((__v32qs)__a); } /// Computes the absolute value of each signed 16-bit element in the 256-bit /// vector of [16 x i16] in \a __a and returns each value in the /// corresponding element of the result. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPABSW instruction. /// /// \param __a /// A 256-bit vector of [16 x i16]. /// \returns A 256-bit vector of [16 x i16] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_abs_epi16(__m256i __a) { return (__m256i)__builtin_elementwise_abs((__v16hi)__a); } /// Computes the absolute value of each signed 32-bit element in the 256-bit /// vector of [8 x i32] in \a __a and returns each value in the /// corresponding element of the result. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPABSD instruction. /// /// \param __a /// A 256-bit vector of [8 x i32]. /// \returns A 256-bit vector of [8 x i32] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_abs_epi32(__m256i __a) { return (__m256i)__builtin_elementwise_abs((__v8si)__a); } /// Converts the elements of two 256-bit vectors of [16 x i16] to 8-bit /// integers using signed saturation, and returns the 256-bit result. /// /// \code{.operation} /// FOR i := 0 TO 7 /// j := i*16 /// k := i*8 /// result[7+k:k] := SATURATE8(__a[15+j:j]) /// result[71+k:64+k] := SATURATE8(__b[15+j:j]) /// result[135+k:128+k] := SATURATE8(__a[143+j:128+j]) /// result[199+k:192+k] := SATURATE8(__b[143+j:128+j]) /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPACKSSWB instruction. /// /// \param __a /// A 256-bit vector of [16 x i16] used to generate result[63:0] and /// result[191:128]. /// \param __b /// A 256-bit vector of [16 x i16] used to generate result[127:64] and /// result[255:192]. /// \returns A 256-bit integer vector containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_packs_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_packsswb256((__v16hi)__a, (__v16hi)__b); } /// Converts the elements of two 256-bit vectors of [8 x i32] to 16-bit /// integers using signed saturation, and returns the resulting 256-bit /// vector of [16 x i16]. /// /// \code{.operation} /// FOR i := 0 TO 3 /// j := i*32 /// k := i*16 /// result[15+k:k] := SATURATE16(__a[31+j:j]) /// result[79+k:64+k] := SATURATE16(__b[31+j:j]) /// result[143+k:128+k] := SATURATE16(__a[159+j:128+j]) /// result[207+k:192+k] := SATURATE16(__b[159+j:128+j]) /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPACKSSDW instruction. /// /// \param __a /// A 256-bit vector of [8 x i32] used to generate result[63:0] and /// result[191:128]. /// \param __b /// A 256-bit vector of [8 x i32] used to generate result[127:64] and /// result[255:192]. /// \returns A 256-bit vector of [16 x i16] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_packs_epi32(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_packssdw256((__v8si)__a, (__v8si)__b); } /// Converts elements from two 256-bit vectors of [16 x i16] to 8-bit integers /// using unsigned saturation, and returns the 256-bit result. /// /// \code{.operation} /// FOR i := 0 TO 7 /// j := i*16 /// k := i*8 /// result[7+k:k] := SATURATE8U(__a[15+j:j]) /// result[71+k:64+k] := SATURATE8U(__b[15+j:j]) /// result[135+k:128+k] := SATURATE8U(__a[143+j:128+j]) /// result[199+k:192+k] := SATURATE8U(__b[143+j:128+j]) /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPACKUSWB instruction. /// /// \param __a /// A 256-bit vector of [16 x i16] used to generate result[63:0] and /// result[191:128]. /// \param __b /// A 256-bit vector of [16 x i16] used to generate result[127:64] and /// result[255:192]. /// \returns A 256-bit integer vector containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_packus_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_packuswb256((__v16hi)__a, (__v16hi)__b); } /// Converts elements from two 256-bit vectors of [8 x i32] to 16-bit integers /// using unsigned saturation, and returns the resulting 256-bit vector of /// [16 x i16]. /// /// \code{.operation} /// FOR i := 0 TO 3 /// j := i*32 /// k := i*16 /// result[15+k:k] := SATURATE16U(__V1[31+j:j]) /// result[79+k:64+k] := SATURATE16U(__V2[31+j:j]) /// result[143+k:128+k] := SATURATE16U(__V1[159+j:128+j]) /// result[207+k:192+k] := SATURATE16U(__V2[159+j:128+j]) /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPACKUSDW instruction. /// /// \param __V1 /// A 256-bit vector of [8 x i32] used to generate result[63:0] and /// result[191:128]. /// \param __V2 /// A 256-bit vector of [8 x i32] used to generate result[127:64] and /// result[255:192]. /// \returns A 256-bit vector of [16 x i16] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_packus_epi32(__m256i __V1, __m256i __V2) { return (__m256i) __builtin_ia32_packusdw256((__v8si)__V1, (__v8si)__V2); } /// Adds 8-bit integers from corresponding bytes of two 256-bit integer /// vectors and returns the lower 8 bits of each sum in the corresponding /// byte of the 256-bit integer vector result (overflow is ignored). /// /// \headerfile /// /// This intrinsic corresponds to the \c VPADDB instruction. /// /// \param __a /// A 256-bit integer vector containing one of the source operands. /// \param __b /// A 256-bit integer vector containing one of the source operands. /// \returns A 256-bit integer vector containing the sums. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_add_epi8(__m256i __a, __m256i __b) { return (__m256i)((__v32qu)__a + (__v32qu)__b); } /// Adds 16-bit integers from corresponding elements of two 256-bit vectors of /// [16 x i16] and returns the lower 16 bits of each sum in the /// corresponding element of the [16 x i16] result (overflow is ignored). /// /// \headerfile /// /// This intrinsic corresponds to the \c VPADDW instruction. /// /// \param __a /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \param __b /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \returns A 256-bit vector of [16 x i16] containing the sums. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_add_epi16(__m256i __a, __m256i __b) { return (__m256i)((__v16hu)__a + (__v16hu)__b); } /// Adds 32-bit integers from corresponding elements of two 256-bit vectors of /// [8 x i32] and returns the lower 32 bits of each sum in the corresponding /// element of the [8 x i32] result (overflow is ignored). /// /// \headerfile /// /// This intrinsic corresponds to the \c VPADDD instruction. /// /// \param __a /// A 256-bit vector of [8 x i32] containing one of the source operands. /// \param __b /// A 256-bit vector of [8 x i32] containing one of the source operands. /// \returns A 256-bit vector of [8 x i32] containing the sums. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_add_epi32(__m256i __a, __m256i __b) { return (__m256i)((__v8su)__a + (__v8su)__b); } /// Adds 64-bit integers from corresponding elements of two 256-bit vectors of /// [4 x i64] and returns the lower 64 bits of each sum in the corresponding /// element of the [4 x i64] result (overflow is ignored). /// /// \headerfile /// /// This intrinsic corresponds to the \c VPADDQ instruction. /// /// \param __a /// A 256-bit vector of [4 x i64] containing one of the source operands. /// \param __b /// A 256-bit vector of [4 x i64] containing one of the source operands. /// \returns A 256-bit vector of [4 x i64] containing the sums. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_add_epi64(__m256i __a, __m256i __b) { return (__m256i)((__v4du)__a + (__v4du)__b); } /// Adds 8-bit integers from corresponding bytes of two 256-bit integer /// vectors using signed saturation, and returns each sum in the /// corresponding byte of the 256-bit integer vector result. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPADDSB instruction. /// /// \param __a /// A 256-bit integer vector containing one of the source operands. /// \param __b /// A 256-bit integer vector containing one of the source operands. /// \returns A 256-bit integer vector containing the sums. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_adds_epi8(__m256i __a, __m256i __b) { return (__m256i)__builtin_elementwise_add_sat((__v32qs)__a, (__v32qs)__b); } /// Adds 16-bit integers from corresponding elements of two 256-bit vectors of /// [16 x i16] using signed saturation, and returns the [16 x i16] result. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPADDSW instruction. /// /// \param __a /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \param __b /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \returns A 256-bit vector of [16 x i16] containing the sums. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_adds_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_elementwise_add_sat((__v16hi)__a, (__v16hi)__b); } /// Adds 8-bit integers from corresponding bytes of two 256-bit integer /// vectors using unsigned saturation, and returns each sum in the /// corresponding byte of the 256-bit integer vector result. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPADDUSB instruction. /// /// \param __a /// A 256-bit integer vector containing one of the source operands. /// \param __b /// A 256-bit integer vector containing one of the source operands. /// \returns A 256-bit integer vector containing the sums. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_adds_epu8(__m256i __a, __m256i __b) { return (__m256i)__builtin_elementwise_add_sat((__v32qu)__a, (__v32qu)__b); } /// Adds 16-bit integers from corresponding elements of two 256-bit vectors of /// [16 x i16] using unsigned saturation, and returns the [16 x i16] result. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPADDUSW instruction. /// /// \param __a /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \param __b /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \returns A 256-bit vector of [16 x i16] containing the sums. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_adds_epu16(__m256i __a, __m256i __b) { return (__m256i)__builtin_elementwise_add_sat((__v16hu)__a, (__v16hu)__b); } /// Uses the lower half of the 256-bit vector \a a as the upper half of a /// temporary 256-bit value, and the lower half of the 256-bit vector \a b /// as the lower half of the temporary value. Right-shifts the temporary /// value by \a n bytes, and uses the lower 16 bytes of the shifted value /// as the lower 16 bytes of the result. Uses the upper halves of \a a and /// \a b to make another temporary value, right shifts by \a n, and uses /// the lower 16 bytes of the shifted value as the upper 16 bytes of the /// result. /// /// \headerfile /// /// \code /// __m256i _mm256_alignr_epi8(__m256i a, __m256i b, const int n); /// \endcode /// /// This intrinsic corresponds to the \c VPALIGNR instruction. /// /// \param a /// A 256-bit integer vector containing source values. /// \param b /// A 256-bit integer vector containing source values. /// \param n /// An immediate value specifying the number of bytes to shift. /// \returns A 256-bit integer vector containing the result. #define _mm256_alignr_epi8(a, b, n) \ ((__m256i)__builtin_ia32_palignr256((__v32qi)(__m256i)(a), \ (__v32qi)(__m256i)(b), (n))) /// Computes the bitwise AND of the 256-bit integer vectors in \a __a and /// \a __b. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPAND instruction. /// /// \param __a /// A 256-bit integer vector. /// \param __b /// A 256-bit integer vector. /// \returns A 256-bit integer vector containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_and_si256(__m256i __a, __m256i __b) { return (__m256i)((__v4du)__a & (__v4du)__b); } /// Computes the bitwise AND of the 256-bit integer vector in \a __b with /// the bitwise NOT of the 256-bit integer vector in \a __a. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPANDN instruction. /// /// \param __a /// A 256-bit integer vector. /// \param __b /// A 256-bit integer vector. /// \returns A 256-bit integer vector containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_andnot_si256(__m256i __a, __m256i __b) { return (__m256i)(~(__v4du)__a & (__v4du)__b); } /// Computes the averages of the corresponding unsigned bytes in the two /// 256-bit integer vectors in \a __a and \a __b and returns each /// average in the corresponding byte of the 256-bit result. /// /// \code{.operation} /// FOR i := 0 TO 31 /// j := i*8 /// result[j+7:j] := (__a[j+7:j] + __b[j+7:j] + 1) >> 1 /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPAVGB instruction. /// /// \param __a /// A 256-bit integer vector. /// \param __b /// A 256-bit integer vector. /// \returns A 256-bit integer vector containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_avg_epu8(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pavgb256((__v32qi)__a, (__v32qi)__b); } /// Computes the averages of the corresponding unsigned 16-bit integers in /// the two 256-bit vectors of [16 x i16] in \a __a and \a __b and returns /// each average in the corresponding element of the 256-bit result. /// /// \code{.operation} /// FOR i := 0 TO 15 /// j := i*16 /// result[j+15:j] := (__a[j+15:j] + __b[j+15:j] + 1) >> 1 /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPAVGW instruction. /// /// \param __a /// A 256-bit vector of [16 x i16]. /// \param __b /// A 256-bit vector of [16 x i16]. /// \returns A 256-bit vector of [16 x i16] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_avg_epu16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pavgw256((__v16hi)__a, (__v16hi)__b); } /// Merges 8-bit integer values from either of the two 256-bit vectors /// \a __V1 or \a __V2, as specified by the 256-bit mask \a __M and returns /// the resulting 256-bit integer vector. /// /// \code{.operation} /// FOR i := 0 TO 31 /// j := i*8 /// IF __M[7+i] == 0 /// result[7+j:j] := __V1[7+j:j] /// ELSE /// result[7+j:j] := __V2[7+j:j] /// FI /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPBLENDVB instruction. /// /// \param __V1 /// A 256-bit integer vector containing source values. /// \param __V2 /// A 256-bit integer vector containing source values. /// \param __M /// A 256-bit integer vector, with bit [7] of each byte specifying the /// source for each corresponding byte of the result. When the mask bit /// is 0, the byte is copied from \a __V1; otherwise, it is copied from /// \a __V2. /// \returns A 256-bit integer vector containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_blendv_epi8(__m256i __V1, __m256i __V2, __m256i __M) { return (__m256i)__builtin_ia32_pblendvb256((__v32qi)__V1, (__v32qi)__V2, (__v32qi)__M); } /// Merges 16-bit integer values from either of the two 256-bit vectors /// \a V1 or \a V2, as specified by the immediate integer operand \a M, /// and returns the resulting 256-bit vector of [16 x i16]. /// /// \code{.operation} /// FOR i := 0 TO 7 /// j := i*16 /// IF M[i] == 0 /// result[7+j:j] := V1[7+j:j] /// result[135+j:128+j] := V1[135+j:128+j] /// ELSE /// result[7+j:j] := V2[7+j:j] /// result[135+j:128+j] := V2[135+j:128+j] /// FI /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m256i _mm256_blend_epi16(__m256i V1, __m256i V2, const int M); /// \endcode /// /// This intrinsic corresponds to the \c VPBLENDW instruction. /// /// \param V1 /// A 256-bit vector of [16 x i16] containing source values. /// \param V2 /// A 256-bit vector of [16 x i16] containing source values. /// \param M /// An immediate 8-bit integer operand, with bits [7:0] specifying the /// source for each element of the result. The position of the mask bit /// corresponds to the index of a copied value. When a mask bit is 0, the /// element is copied from \a V1; otherwise, it is copied from \a V2. /// \a M[0] determines the source for elements 0 and 8, \a M[1] for /// elements 1 and 9, and so forth. /// \returns A 256-bit vector of [16 x i16] containing the result. #define _mm256_blend_epi16(V1, V2, M) \ ((__m256i)__builtin_ia32_pblendw256((__v16hi)(__m256i)(V1), \ (__v16hi)(__m256i)(V2), (int)(M))) /// Compares corresponding bytes in the 256-bit integer vectors in \a __a and /// \a __b for equality and returns the outcomes in the corresponding /// bytes of the 256-bit result. /// /// \code{.operation} /// FOR i := 0 TO 31 /// j := i*8 /// result[j+7:j] := (__a[j+7:j] == __b[j+7:j]) ? 0xFF : 0 /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPCMPEQB instruction. /// /// \param __a /// A 256-bit integer vector containing one of the inputs. /// \param __b /// A 256-bit integer vector containing one of the inputs. /// \returns A 256-bit integer vector containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cmpeq_epi8(__m256i __a, __m256i __b) { return (__m256i)((__v32qi)__a == (__v32qi)__b); } /// Compares corresponding elements in the 256-bit vectors of [16 x i16] in /// \a __a and \a __b for equality and returns the outcomes in the /// corresponding elements of the 256-bit result. /// /// \code{.operation} /// FOR i := 0 TO 15 /// j := i*16 /// result[j+15:j] := (__a[j+15:j] == __b[j+15:j]) ? 0xFFFF : 0 /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPCMPEQW instruction. /// /// \param __a /// A 256-bit vector of [16 x i16] containing one of the inputs. /// \param __b /// A 256-bit vector of [16 x i16] containing one of the inputs. /// \returns A 256-bit vector of [16 x i16] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cmpeq_epi16(__m256i __a, __m256i __b) { return (__m256i)((__v16hi)__a == (__v16hi)__b); } /// Compares corresponding elements in the 256-bit vectors of [8 x i32] in /// \a __a and \a __b for equality and returns the outcomes in the /// corresponding elements of the 256-bit result. /// /// \code{.operation} /// FOR i := 0 TO 7 /// j := i*32 /// result[j+31:j] := (__a[j+31:j] == __b[j+31:j]) ? 0xFFFFFFFF : 0 /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPCMPEQD instruction. /// /// \param __a /// A 256-bit vector of [8 x i32] containing one of the inputs. /// \param __b /// A 256-bit vector of [8 x i32] containing one of the inputs. /// \returns A 256-bit vector of [8 x i32] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cmpeq_epi32(__m256i __a, __m256i __b) { return (__m256i)((__v8si)__a == (__v8si)__b); } /// Compares corresponding elements in the 256-bit vectors of [4 x i64] in /// \a __a and \a __b for equality and returns the outcomes in the /// corresponding elements of the 256-bit result. /// /// \code{.operation} /// FOR i := 0 TO 3 /// j := i*64 /// result[j+63:j] := (__a[j+63:j] == __b[j+63:j]) ? 0xFFFFFFFFFFFFFFFF : 0 /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPCMPEQQ instruction. /// /// \param __a /// A 256-bit vector of [4 x i64] containing one of the inputs. /// \param __b /// A 256-bit vector of [4 x i64] containing one of the inputs. /// \returns A 256-bit vector of [4 x i64] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cmpeq_epi64(__m256i __a, __m256i __b) { return (__m256i)((__v4di)__a == (__v4di)__b); } /// Compares corresponding signed bytes in the 256-bit integer vectors in /// \a __a and \a __b for greater-than and returns the outcomes in the /// corresponding bytes of the 256-bit result. /// /// \code{.operation} /// FOR i := 0 TO 31 /// j := i*8 /// result[j+7:j] := (__a[j+7:j] > __b[j+7:j]) ? 0xFF : 0 /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPCMPGTB instruction. /// /// \param __a /// A 256-bit integer vector containing one of the inputs. /// \param __b /// A 256-bit integer vector containing one of the inputs. /// \returns A 256-bit integer vector containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cmpgt_epi8(__m256i __a, __m256i __b) { /* This function always performs a signed comparison, but __v32qi is a char which may be signed or unsigned, so use __v32qs. */ return (__m256i)((__v32qs)__a > (__v32qs)__b); } /// Compares corresponding signed elements in the 256-bit vectors of /// [16 x i16] in \a __a and \a __b for greater-than and returns the /// outcomes in the corresponding elements of the 256-bit result. /// /// \code{.operation} /// FOR i := 0 TO 15 /// j := i*16 /// result[j+15:j] := (__a[j+15:j] > __b[j+15:j]) ? 0xFFFF : 0 /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPCMPGTW instruction. /// /// \param __a /// A 256-bit vector of [16 x i16] containing one of the inputs. /// \param __b /// A 256-bit vector of [16 x i16] containing one of the inputs. /// \returns A 256-bit vector of [16 x i16] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cmpgt_epi16(__m256i __a, __m256i __b) { return (__m256i)((__v16hi)__a > (__v16hi)__b); } /// Compares corresponding signed elements in the 256-bit vectors of /// [8 x i32] in \a __a and \a __b for greater-than and returns the /// outcomes in the corresponding elements of the 256-bit result. /// /// \code{.operation} /// FOR i := 0 TO 7 /// j := i*32 /// result[j+31:j] := (__a[j+31:j] > __b[j+31:j]) ? 0xFFFFFFFF : 0 /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPCMPGTD instruction. /// /// \param __a /// A 256-bit vector of [8 x i32] containing one of the inputs. /// \param __b /// A 256-bit vector of [8 x i32] containing one of the inputs. /// \returns A 256-bit vector of [8 x i32] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cmpgt_epi32(__m256i __a, __m256i __b) { return (__m256i)((__v8si)__a > (__v8si)__b); } /// Compares corresponding signed elements in the 256-bit vectors of /// [4 x i64] in \a __a and \a __b for greater-than and returns the /// outcomes in the corresponding elements of the 256-bit result. /// /// \code{.operation} /// FOR i := 0 TO 3 /// j := i*64 /// result[j+63:j] := (__a[j+63:j] > __b[j+63:j]) ? 0xFFFFFFFFFFFFFFFF : 0 /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPCMPGTQ instruction. /// /// \param __a /// A 256-bit vector of [4 x i64] containing one of the inputs. /// \param __b /// A 256-bit vector of [4 x i64] containing one of the inputs. /// \returns A 256-bit vector of [4 x i64] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cmpgt_epi64(__m256i __a, __m256i __b) { return (__m256i)((__v4di)__a > (__v4di)__b); } /// Horizontally adds the adjacent pairs of 16-bit integers from two 256-bit /// vectors of [16 x i16] and returns the lower 16 bits of each sum in an /// element of the [16 x i16] result (overflow is ignored). Sums from /// \a __a are returned in the lower 64 bits of each 128-bit half of the /// result; sums from \a __b are returned in the upper 64 bits of each /// 128-bit half of the result. /// /// \code{.operation} /// FOR i := 0 TO 1 /// j := i*128 /// result[j+15:j] := __a[j+15:j] + __a[j+31:j+16] /// result[j+31:j+16] := __a[j+47:j+32] + __a[j+63:j+48] /// result[j+47:j+32] := __a[j+79:j+64] + __a[j+95:j+80] /// result[j+63:j+48] := __a[j+111:j+96] + __a[j+127:j+112] /// result[j+79:j+64] := __b[j+15:j] + __b[j+31:j+16] /// result[j+95:j+80] := __b[j+47:j+32] + __b[j+63:j+48] /// result[j+111:j+96] := __b[j+79:j+64] + __b[j+95:j+80] /// result[j+127:j+112] := __b[j+111:j+96] + __b[j+127:j+112] /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPHADDW instruction. /// /// \param __a /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \param __b /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \returns A 256-bit vector of [16 x i16] containing the sums. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_hadd_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_phaddw256((__v16hi)__a, (__v16hi)__b); } /// Horizontally adds the adjacent pairs of 32-bit integers from two 256-bit /// vectors of [8 x i32] and returns the lower 32 bits of each sum in an /// element of the [8 x i32] result (overflow is ignored). Sums from \a __a /// are returned in the lower 64 bits of each 128-bit half of the result; /// sums from \a __b are returned in the upper 64 bits of each 128-bit half /// of the result. /// /// \code{.operation} /// FOR i := 0 TO 1 /// j := i*128 /// result[j+31:j] := __a[j+31:j] + __a[j+63:j+32] /// result[j+63:j+32] := __a[j+95:j+64] + __a[j+127:j+96] /// result[j+95:j+64] := __b[j+31:j] + __b[j+63:j+32] /// result[j+127:j+96] := __b[j+95:j+64] + __b[j+127:j+96] /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPHADDD instruction. /// /// \param __a /// A 256-bit vector of [8 x i32] containing one of the source operands. /// \param __b /// A 256-bit vector of [8 x i32] containing one of the source operands. /// \returns A 256-bit vector of [8 x i32] containing the sums. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_hadd_epi32(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_phaddd256((__v8si)__a, (__v8si)__b); } /// Horizontally adds the adjacent pairs of 16-bit integers from two 256-bit /// vectors of [16 x i16] using signed saturation and returns each sum in /// an element of the [16 x i16] result. Sums from \a __a are returned in /// the lower 64 bits of each 128-bit half of the result; sums from \a __b /// are returned in the upper 64 bits of each 128-bit half of the result. /// /// \code{.operation} /// FOR i := 0 TO 1 /// j := i*128 /// result[j+15:j] := SATURATE16(__a[j+15:j] + __a[j+31:j+16]) /// result[j+31:j+16] := SATURATE16(__a[j+47:j+32] + __a[j+63:j+48]) /// result[j+47:j+32] := SATURATE16(__a[j+79:j+64] + __a[j+95:j+80]) /// result[j+63:j+48] := SATURATE16(__a[j+111:j+96] + __a[j+127:j+112]) /// result[j+79:j+64] := SATURATE16(__b[j+15:j] + __b[j+31:j+16]) /// result[j+95:j+80] := SATURATE16(__b[j+47:j+32] + __b[j+63:j+48]) /// result[j+111:j+96] := SATURATE16(__b[j+79:j+64] + __b[j+95:j+80]) /// result[j+127:j+112] := SATURATE16(__b[j+111:j+96] + __b[j+127:j+112]) /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPHADDSW instruction. /// /// \param __a /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \param __b /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \returns A 256-bit vector of [16 x i16] containing the sums. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_hadds_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_phaddsw256((__v16hi)__a, (__v16hi)__b); } /// Horizontally subtracts adjacent pairs of 16-bit integers from two 256-bit /// vectors of [16 x i16] and returns the lower 16 bits of each difference /// in an element of the [16 x i16] result (overflow is ignored). /// Differences from \a __a are returned in the lower 64 bits of each /// 128-bit half of the result; differences from \a __b are returned in the /// upper 64 bits of each 128-bit half of the result. /// /// \code{.operation} /// FOR i := 0 TO 1 /// j := i*128 /// result[j+15:j] := __a[j+15:j] - __a[j+31:j+16] /// result[j+31:j+16] := __a[j+47:j+32] - __a[j+63:j+48] /// result[j+47:j+32] := __a[j+79:j+64] - __a[j+95:j+80] /// result[j+63:j+48] := __a[j+111:j+96] - __a[j+127:j+112] /// result[j+79:j+64] := __b[j+15:j] - __b[j+31:j+16] /// result[j+95:j+80] := __b[j+47:j+32] - __b[j+63:j+48] /// result[j+111:j+96] := __b[j+79:j+64] - __b[j+95:j+80] /// result[j+127:j+112] := __b[j+111:j+96] - __b[j+127:j+112] /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPHSUBW instruction. /// /// \param __a /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \param __b /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \returns A 256-bit vector of [16 x i16] containing the differences. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_hsub_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_phsubw256((__v16hi)__a, (__v16hi)__b); } /// Horizontally subtracts adjacent pairs of 32-bit integers from two 256-bit /// vectors of [8 x i32] and returns the lower 32 bits of each difference in /// an element of the [8 x i32] result (overflow is ignored). Differences /// from \a __a are returned in the lower 64 bits of each 128-bit half of /// the result; differences from \a __b are returned in the upper 64 bits /// of each 128-bit half of the result. /// /// \code{.operation} /// FOR i := 0 TO 1 /// j := i*128 /// result[j+31:j] := __a[j+31:j] - __a[j+63:j+32] /// result[j+63:j+32] := __a[j+95:j+64] - __a[j+127:j+96] /// result[j+95:j+64] := __b[j+31:j] - __b[j+63:j+32] /// result[j+127:j+96] := __b[j+95:j+64] - __b[j+127:j+96] /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPHSUBD instruction. /// /// \param __a /// A 256-bit vector of [8 x i32] containing one of the source operands. /// \param __b /// A 256-bit vector of [8 x i32] containing one of the source operands. /// \returns A 256-bit vector of [8 x i32] containing the differences. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_hsub_epi32(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_phsubd256((__v8si)__a, (__v8si)__b); } /// Horizontally subtracts adjacent pairs of 16-bit integers from two 256-bit /// vectors of [16 x i16] using signed saturation and returns each sum in /// an element of the [16 x i16] result. Differences from \a __a are /// returned in the lower 64 bits of each 128-bit half of the result; /// differences from \a __b are returned in the upper 64 bits of each /// 128-bit half of the result. /// /// \code{.operation} /// FOR i := 0 TO 1 /// j := i*128 /// result[j+15:j] := SATURATE16(__a[j+15:j] - __a[j+31:j+16]) /// result[j+31:j+16] := SATURATE16(__a[j+47:j+32] - __a[j+63:j+48]) /// result[j+47:j+32] := SATURATE16(__a[j+79:j+64] - __a[j+95:j+80]) /// result[j+63:j+48] := SATURATE16(__a[j+111:j+96] - __a[j+127:j+112]) /// result[j+79:j+64] := SATURATE16(__b[j+15:j] - __b[j+31:j+16]) /// result[j+95:j+80] := SATURATE16(__b[j+47:j+32] - __b[j+63:j+48]) /// result[j+111:j+96] := SATURATE16(__b[j+79:j+64] - __b[j+95:j+80]) /// result[j+127:j+112] := SATURATE16(__b[j+111:j+96] - __b[j+127:j+112]) /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPHSUBSW instruction. /// /// \param __a /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \param __b /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \returns A 256-bit vector of [16 x i16] containing the differences. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_hsubs_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_phsubsw256((__v16hi)__a, (__v16hi)__b); } /// Multiplies each unsigned byte from the 256-bit integer vector in \a __a /// with the corresponding signed byte from the 256-bit integer vector in /// \a __b, forming signed 16-bit intermediate products. Adds adjacent /// pairs of those products using signed saturation to form 16-bit sums /// returned as elements of the [16 x i16] result. /// /// \code{.operation} /// FOR i := 0 TO 15 /// j := i*16 /// temp1 := __a[j+7:j] * __b[j+7:j] /// temp2 := __a[j+15:j+8] * __b[j+15:j+8] /// result[j+15:j] := SATURATE16(temp1 + temp2) /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMADDUBSW instruction. /// /// \param __a /// A 256-bit vector containing one of the source operands. /// \param __b /// A 256-bit vector containing one of the source operands. /// \returns A 256-bit vector of [16 x i16] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maddubs_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmaddubsw256((__v32qi)__a, (__v32qi)__b); } /// Multiplies corresponding 16-bit elements of two 256-bit vectors of /// [16 x i16], forming 32-bit intermediate products, and adds pairs of /// those products to form 32-bit sums returned as elements of the /// [8 x i32] result. /// /// There is only one wraparound case: when all four of the 16-bit sources /// are \c 0x8000, the result will be \c 0x80000000. /// /// \code{.operation} /// FOR i := 0 TO 7 /// j := i*32 /// temp1 := __a[j+15:j] * __b[j+15:j] /// temp2 := __a[j+31:j+16] * __b[j+31:j+16] /// result[j+31:j] := temp1 + temp2 /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMADDWD instruction. /// /// \param __a /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \param __b /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \returns A 256-bit vector of [8 x i32] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_madd_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmaddwd256((__v16hi)__a, (__v16hi)__b); } /// Compares the corresponding signed bytes in the two 256-bit integer vectors /// in \a __a and \a __b and returns the larger of each pair in the /// corresponding byte of the 256-bit result. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMAXSB instruction. /// /// \param __a /// A 256-bit integer vector. /// \param __b /// A 256-bit integer vector. /// \returns A 256-bit integer vector containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epi8(__m256i __a, __m256i __b) { return (__m256i)__builtin_elementwise_max((__v32qs)__a, (__v32qs)__b); } /// Compares the corresponding signed 16-bit integers in the two 256-bit /// vectors of [16 x i16] in \a __a and \a __b and returns the larger of /// each pair in the corresponding element of the 256-bit result. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMAXSW instruction. /// /// \param __a /// A 256-bit vector of [16 x i16]. /// \param __b /// A 256-bit vector of [16 x i16]. /// \returns A 256-bit vector of [16 x i16] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_elementwise_max((__v16hi)__a, (__v16hi)__b); } /// Compares the corresponding signed 32-bit integers in the two 256-bit /// vectors of [8 x i32] in \a __a and \a __b and returns the larger of /// each pair in the corresponding element of the 256-bit result. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMAXSD instruction. /// /// \param __a /// A 256-bit vector of [8 x i32]. /// \param __b /// A 256-bit vector of [8 x i32]. /// \returns A 256-bit vector of [8 x i32] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epi32(__m256i __a, __m256i __b) { return (__m256i)__builtin_elementwise_max((__v8si)__a, (__v8si)__b); } /// Compares the corresponding unsigned bytes in the two 256-bit integer /// vectors in \a __a and \a __b and returns the larger of each pair in /// the corresponding byte of the 256-bit result. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMAXUB instruction. /// /// \param __a /// A 256-bit integer vector. /// \param __b /// A 256-bit integer vector. /// \returns A 256-bit integer vector containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epu8(__m256i __a, __m256i __b) { return (__m256i)__builtin_elementwise_max((__v32qu)__a, (__v32qu)__b); } /// Compares the corresponding unsigned 16-bit integers in the two 256-bit /// vectors of [16 x i16] in \a __a and \a __b and returns the larger of /// each pair in the corresponding element of the 256-bit result. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMAXUW instruction. /// /// \param __a /// A 256-bit vector of [16 x i16]. /// \param __b /// A 256-bit vector of [16 x i16]. /// \returns A 256-bit vector of [16 x i16] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epu16(__m256i __a, __m256i __b) { return (__m256i)__builtin_elementwise_max((__v16hu)__a, (__v16hu)__b); } /// Compares the corresponding unsigned 32-bit integers in the two 256-bit /// vectors of [8 x i32] in \a __a and \a __b and returns the larger of /// each pair in the corresponding element of the 256-bit result. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMAXUD instruction. /// /// \param __a /// A 256-bit vector of [8 x i32]. /// \param __b /// A 256-bit vector of [8 x i32]. /// \returns A 256-bit vector of [8 x i32] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epu32(__m256i __a, __m256i __b) { return (__m256i)__builtin_elementwise_max((__v8su)__a, (__v8su)__b); } /// Compares the corresponding signed bytes in the two 256-bit integer vectors /// in \a __a and \a __b and returns the smaller of each pair in the /// corresponding byte of the 256-bit result. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMINSB instruction. /// /// \param __a /// A 256-bit integer vector. /// \param __b /// A 256-bit integer vector. /// \returns A 256-bit integer vector containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epi8(__m256i __a, __m256i __b) { return (__m256i)__builtin_elementwise_min((__v32qs)__a, (__v32qs)__b); } /// Compares the corresponding signed 16-bit integers in the two 256-bit /// vectors of [16 x i16] in \a __a and \a __b and returns the smaller of /// each pair in the corresponding element of the 256-bit result. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMINSW instruction. /// /// \param __a /// A 256-bit vector of [16 x i16]. /// \param __b /// A 256-bit vector of [16 x i16]. /// \returns A 256-bit vector of [16 x i16] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_elementwise_min((__v16hi)__a, (__v16hi)__b); } /// Compares the corresponding signed 32-bit integers in the two 256-bit /// vectors of [8 x i32] in \a __a and \a __b and returns the smaller of /// each pair in the corresponding element of the 256-bit result. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMINSD instruction. /// /// \param __a /// A 256-bit vector of [8 x i32]. /// \param __b /// A 256-bit vector of [8 x i32]. /// \returns A 256-bit vector of [8 x i32] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epi32(__m256i __a, __m256i __b) { return (__m256i)__builtin_elementwise_min((__v8si)__a, (__v8si)__b); } /// Compares the corresponding unsigned bytes in the two 256-bit integer /// vectors in \a __a and \a __b and returns the smaller of each pair in /// the corresponding byte of the 256-bit result. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMINUB instruction. /// /// \param __a /// A 256-bit integer vector. /// \param __b /// A 256-bit integer vector. /// \returns A 256-bit integer vector containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epu8(__m256i __a, __m256i __b) { return (__m256i)__builtin_elementwise_min((__v32qu)__a, (__v32qu)__b); } /// Compares the corresponding unsigned 16-bit integers in the two 256-bit /// vectors of [16 x i16] in \a __a and \a __b and returns the smaller of /// each pair in the corresponding element of the 256-bit result. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMINUW instruction. /// /// \param __a /// A 256-bit vector of [16 x i16]. /// \param __b /// A 256-bit vector of [16 x i16]. /// \returns A 256-bit vector of [16 x i16] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epu16(__m256i __a, __m256i __b) { return (__m256i)__builtin_elementwise_min((__v16hu)__a, (__v16hu)__b); } /// Compares the corresponding unsigned 32-bit integers in the two 256-bit /// vectors of [8 x i32] in \a __a and \a __b and returns the smaller of /// each pair in the corresponding element of the 256-bit result. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMINUD instruction. /// /// \param __a /// A 256-bit vector of [8 x i32]. /// \param __b /// A 256-bit vector of [8 x i32]. /// \returns A 256-bit vector of [8 x i32] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epu32(__m256i __a, __m256i __b) { return (__m256i)__builtin_elementwise_min((__v8su)__a, (__v8su)__b); } /// Creates a 32-bit integer mask from the most significant bit of each byte /// in the 256-bit integer vector in \a __a and returns the result. /// /// \code{.operation} /// FOR i := 0 TO 31 /// j := i*8 /// result[i] := __a[j+7] /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMOVMSKB instruction. /// /// \param __a /// A 256-bit integer vector containing the source bytes. /// \returns The 32-bit integer mask. static __inline__ int __DEFAULT_FN_ATTRS256 _mm256_movemask_epi8(__m256i __a) { return __builtin_ia32_pmovmskb256((__v32qi)__a); } /// Sign-extends bytes from the 128-bit integer vector in \a __V and returns /// the 16-bit values in the corresponding elements of a 256-bit vector /// of [16 x i16]. /// /// \code{.operation} /// FOR i := 0 TO 15 /// j := i*8 /// k := i*16 /// result[k+15:k] := SignExtend(__V[j+7:j]) /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMOVSXBW instruction. /// /// \param __V /// A 128-bit integer vector containing the source bytes. /// \returns A 256-bit vector of [16 x i16] containing the sign-extended /// values. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepi8_epi16(__m128i __V) { /* This function always performs a signed extension, but __v16qi is a char which may be signed or unsigned, so use __v16qs. */ return (__m256i)__builtin_convertvector((__v16qs)__V, __v16hi); } /// Sign-extends bytes from the lower half of the 128-bit integer vector in /// \a __V and returns the 32-bit values in the corresponding elements of a /// 256-bit vector of [8 x i32]. /// /// \code{.operation} /// FOR i := 0 TO 7 /// j := i*8 /// k := i*32 /// result[k+31:k] := SignExtend(__V[j+7:j]) /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMOVSXBD instruction. /// /// \param __V /// A 128-bit integer vector containing the source bytes. /// \returns A 256-bit vector of [8 x i32] containing the sign-extended /// values. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepi8_epi32(__m128i __V) { /* This function always performs a signed extension, but __v16qi is a char which may be signed or unsigned, so use __v16qs. */ return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8si); } /// Sign-extends the first four bytes from the 128-bit integer vector in /// \a __V and returns the 64-bit values in the corresponding elements of a /// 256-bit vector of [4 x i64]. /// /// \code{.operation} /// result[63:0] := SignExtend(__V[7:0]) /// result[127:64] := SignExtend(__V[15:8]) /// result[191:128] := SignExtend(__V[23:16]) /// result[255:192] := SignExtend(__V[31:24]) /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMOVSXBQ instruction. /// /// \param __V /// A 128-bit integer vector containing the source bytes. /// \returns A 256-bit vector of [4 x i64] containing the sign-extended /// values. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepi8_epi64(__m128i __V) { /* This function always performs a signed extension, but __v16qi is a char which may be signed or unsigned, so use __v16qs. */ return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3), __v4di); } /// Sign-extends 16-bit elements from the 128-bit vector of [8 x i16] in /// \a __V and returns the 32-bit values in the corresponding elements of a /// 256-bit vector of [8 x i32]. /// /// \code{.operation} /// FOR i := 0 TO 7 /// j := i*16 /// k := i*32 /// result[k+31:k] := SignExtend(__V[j+15:j]) /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMOVSXWD instruction. /// /// \param __V /// A 128-bit vector of [8 x i16] containing the source values. /// \returns A 256-bit vector of [8 x i32] containing the sign-extended /// values. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepi16_epi32(__m128i __V) { return (__m256i)__builtin_convertvector((__v8hi)__V, __v8si); } /// Sign-extends 16-bit elements from the lower half of the 128-bit vector of /// [8 x i16] in \a __V and returns the 64-bit values in the corresponding /// elements of a 256-bit vector of [4 x i64]. /// /// \code{.operation} /// result[63:0] := SignExtend(__V[15:0]) /// result[127:64] := SignExtend(__V[31:16]) /// result[191:128] := SignExtend(__V[47:32]) /// result[255:192] := SignExtend(__V[64:48]) /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMOVSXWQ instruction. /// /// \param __V /// A 128-bit vector of [8 x i16] containing the source values. /// \returns A 256-bit vector of [4 x i64] containing the sign-extended /// values. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepi16_epi64(__m128i __V) { return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v8hi)__V, (__v8hi)__V, 0, 1, 2, 3), __v4di); } /// Sign-extends 32-bit elements from the 128-bit vector of [4 x i32] in /// \a __V and returns the 64-bit values in the corresponding elements of a /// 256-bit vector of [4 x i64]. /// /// \code{.operation} /// result[63:0] := SignExtend(__V[31:0]) /// result[127:64] := SignExtend(__V[63:32]) /// result[191:128] := SignExtend(__V[95:64]) /// result[255:192] := SignExtend(__V[127:96]) /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMOVSXDQ instruction. /// /// \param __V /// A 128-bit vector of [4 x i32] containing the source values. /// \returns A 256-bit vector of [4 x i64] containing the sign-extended /// values. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepi32_epi64(__m128i __V) { return (__m256i)__builtin_convertvector((__v4si)__V, __v4di); } /// Zero-extends bytes from the 128-bit integer vector in \a __V and returns /// the 16-bit values in the corresponding elements of a 256-bit vector /// of [16 x i16]. /// /// \code{.operation} /// FOR i := 0 TO 15 /// j := i*8 /// k := i*16 /// result[k+15:k] := ZeroExtend(__V[j+7:j]) /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMOVZXBW instruction. /// /// \param __V /// A 128-bit integer vector containing the source bytes. /// \returns A 256-bit vector of [16 x i16] containing the zero-extended /// values. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepu8_epi16(__m128i __V) { return (__m256i)__builtin_convertvector((__v16qu)__V, __v16hi); } /// Zero-extends bytes from the lower half of the 128-bit integer vector in /// \a __V and returns the 32-bit values in the corresponding elements of a /// 256-bit vector of [8 x i32]. /// /// \code{.operation} /// FOR i := 0 TO 7 /// j := i*8 /// k := i*32 /// result[k+31:k] := ZeroExtend(__V[j+7:j]) /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMOVZXBD instruction. /// /// \param __V /// A 128-bit integer vector containing the source bytes. /// \returns A 256-bit vector of [8 x i32] containing the zero-extended /// values. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepu8_epi32(__m128i __V) { return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8si); } /// Zero-extends the first four bytes from the 128-bit integer vector in /// \a __V and returns the 64-bit values in the corresponding elements of a /// 256-bit vector of [4 x i64]. /// /// \code{.operation} /// result[63:0] := ZeroExtend(__V[7:0]) /// result[127:64] := ZeroExtend(__V[15:8]) /// result[191:128] := ZeroExtend(__V[23:16]) /// result[255:192] := ZeroExtend(__V[31:24]) /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMOVZXBQ instruction. /// /// \param __V /// A 128-bit integer vector containing the source bytes. /// \returns A 256-bit vector of [4 x i64] containing the zero-extended /// values. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepu8_epi64(__m128i __V) { return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3), __v4di); } /// Zero-extends 16-bit elements from the 128-bit vector of [8 x i16] in /// \a __V and returns the 32-bit values in the corresponding elements of a /// 256-bit vector of [8 x i32]. /// /// \code{.operation} /// FOR i := 0 TO 7 /// j := i*16 /// k := i*32 /// result[k+31:k] := ZeroExtend(__V[j+15:j]) /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMOVZXWD instruction. /// /// \param __V /// A 128-bit vector of [8 x i16] containing the source values. /// \returns A 256-bit vector of [8 x i32] containing the zero-extended /// values. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepu16_epi32(__m128i __V) { return (__m256i)__builtin_convertvector((__v8hu)__V, __v8si); } /// Zero-extends 16-bit elements from the lower half of the 128-bit vector of /// [8 x i16] in \a __V and returns the 64-bit values in the corresponding /// elements of a 256-bit vector of [4 x i64]. /// /// \code{.operation} /// result[63:0] := ZeroExtend(__V[15:0]) /// result[127:64] := ZeroExtend(__V[31:16]) /// result[191:128] := ZeroExtend(__V[47:32]) /// result[255:192] := ZeroExtend(__V[64:48]) /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMOVSXWQ instruction. /// /// \param __V /// A 128-bit vector of [8 x i16] containing the source values. /// \returns A 256-bit vector of [4 x i64] containing the zero-extended /// values. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepu16_epi64(__m128i __V) { return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v8hu)__V, (__v8hu)__V, 0, 1, 2, 3), __v4di); } /// Zero-extends 32-bit elements from the 128-bit vector of [4 x i32] in /// \a __V and returns the 64-bit values in the corresponding elements of a /// 256-bit vector of [4 x i64]. /// /// \code{.operation} /// result[63:0] := ZeroExtend(__V[31:0]) /// result[127:64] := ZeroExtend(__V[63:32]) /// result[191:128] := ZeroExtend(__V[95:64]) /// result[255:192] := ZeroExtend(__V[127:96]) /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMOVZXDQ instruction. /// /// \param __V /// A 128-bit vector of [4 x i32] containing the source values. /// \returns A 256-bit vector of [4 x i64] containing the zero-extended /// values. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepu32_epi64(__m128i __V) { return (__m256i)__builtin_convertvector((__v4su)__V, __v4di); } /// Multiplies signed 32-bit integers from even-numbered elements of two /// 256-bit vectors of [8 x i32] and returns the 64-bit products in the /// [4 x i64] result. /// /// \code{.operation} /// result[63:0] := __a[31:0] * __b[31:0] /// result[127:64] := __a[95:64] * __b[95:64] /// result[191:128] := __a[159:128] * __b[159:128] /// result[255:192] := __a[223:192] * __b[223:192] /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMULDQ instruction. /// /// \param __a /// A 256-bit vector of [8 x i32] containing one of the source operands. /// \param __b /// A 256-bit vector of [8 x i32] containing one of the source operands. /// \returns A 256-bit vector of [4 x i64] containing the products. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mul_epi32(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmuldq256((__v8si)__a, (__v8si)__b); } /// Multiplies signed 16-bit integer elements of two 256-bit vectors of /// [16 x i16], truncates the 32-bit results to the most significant 18 /// bits, rounds by adding 1, and returns bits [16:1] of each rounded /// product in the [16 x i16] result. /// /// \code{.operation} /// FOR i := 0 TO 15 /// j := i*16 /// temp := ((__a[j+15:j] * __b[j+15:j]) >> 14) + 1 /// result[j+15:j] := temp[16:1] /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMULHRSW instruction. /// /// \param __a /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \param __b /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \returns A 256-bit vector of [16 x i16] containing the rounded products. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mulhrs_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmulhrsw256((__v16hi)__a, (__v16hi)__b); } /// Multiplies unsigned 16-bit integer elements of two 256-bit vectors of /// [16 x i16], and returns the upper 16 bits of each 32-bit product in the /// [16 x i16] result. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMULHUW instruction. /// /// \param __a /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \param __b /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \returns A 256-bit vector of [16 x i16] containing the products. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mulhi_epu16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmulhuw256((__v16hi)__a, (__v16hi)__b); } /// Multiplies signed 16-bit integer elements of two 256-bit vectors of /// [16 x i16], and returns the upper 16 bits of each 32-bit product in the /// [16 x i16] result. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMULHW instruction. /// /// \param __a /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \param __b /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \returns A 256-bit vector of [16 x i16] containing the products. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mulhi_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmulhw256((__v16hi)__a, (__v16hi)__b); } /// Multiplies signed 16-bit integer elements of two 256-bit vectors of /// [16 x i16], and returns the lower 16 bits of each 32-bit product in the /// [16 x i16] result. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMULLW instruction. /// /// \param __a /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \param __b /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \returns A 256-bit vector of [16 x i16] containing the products. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mullo_epi16(__m256i __a, __m256i __b) { return (__m256i)((__v16hu)__a * (__v16hu)__b); } /// Multiplies signed 32-bit integer elements of two 256-bit vectors of /// [8 x i32], and returns the lower 32 bits of each 64-bit product in the /// [8 x i32] result. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMULLD instruction. /// /// \param __a /// A 256-bit vector of [8 x i32] containing one of the source operands. /// \param __b /// A 256-bit vector of [8 x i32] containing one of the source operands. /// \returns A 256-bit vector of [8 x i32] containing the products. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mullo_epi32 (__m256i __a, __m256i __b) { return (__m256i)((__v8su)__a * (__v8su)__b); } /// Multiplies unsigned 32-bit integers from even-numered elements of two /// 256-bit vectors of [8 x i32] and returns the 64-bit products in the /// [4 x i64] result. /// /// \code{.operation} /// result[63:0] := __a[31:0] * __b[31:0] /// result[127:64] := __a[95:64] * __b[95:64] /// result[191:128] := __a[159:128] * __b[159:128] /// result[255:192] := __a[223:192] * __b[223:192] /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMULUDQ instruction. /// /// \param __a /// A 256-bit vector of [8 x i32] containing one of the source operands. /// \param __b /// A 256-bit vector of [8 x i32] containing one of the source operands. /// \returns A 256-bit vector of [4 x i64] containing the products. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mul_epu32(__m256i __a, __m256i __b) { return __builtin_ia32_pmuludq256((__v8si)__a, (__v8si)__b); } /// Computes the bitwise OR of the 256-bit integer vectors in \a __a and /// \a __b. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPOR instruction. /// /// \param __a /// A 256-bit integer vector. /// \param __b /// A 256-bit integer vector. /// \returns A 256-bit integer vector containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_or_si256(__m256i __a, __m256i __b) { return (__m256i)((__v4du)__a | (__v4du)__b); } /// Computes four sum of absolute difference (SAD) operations on sets of eight /// unsigned 8-bit integers from the 256-bit integer vectors \a __a and /// \a __b. /// /// One SAD result is computed for each set of eight bytes from \a __a and /// eight bytes from \a __b. The zero-extended SAD value is returned in the /// corresponding 64-bit element of the result. /// /// A single SAD operation takes the differences between the corresponding /// bytes of \a __a and \a __b, takes the absolute value of each difference, /// and sums these eight values to form one 16-bit result. This operation /// is repeated four times with successive sets of eight bytes. /// /// \code{.operation} /// FOR i := 0 TO 3 /// j := i*64 /// temp0 := ABS(__a[j+7:j] - __b[j+7:j]) /// temp1 := ABS(__a[j+15:j+8] - __b[j+15:j+8]) /// temp2 := ABS(__a[j+23:j+16] - __b[j+23:j+16]) /// temp3 := ABS(__a[j+31:j+24] - __b[j+31:j+24]) /// temp4 := ABS(__a[j+39:j+32] - __b[j+39:j+32]) /// temp5 := ABS(__a[j+47:j+40] - __b[j+47:j+40]) /// temp6 := ABS(__a[j+55:j+48] - __b[j+55:j+48]) /// temp7 := ABS(__a[j+63:j+56] - __b[j+63:j+56]) /// result[j+15:j] := temp0 + temp1 + temp2 + temp3 + /// temp4 + temp5 + temp6 + temp7 /// result[j+63:j+16] := 0 /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSADBW instruction. /// /// \param __a /// A 256-bit integer vector. /// \param __b /// A 256-bit integer vector. /// \returns A 256-bit integer vector containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sad_epu8(__m256i __a, __m256i __b) { return __builtin_ia32_psadbw256((__v32qi)__a, (__v32qi)__b); } /// Shuffles 8-bit integers in the 256-bit integer vector \a __a according /// to control information in the 256-bit integer vector \a __b, and /// returns the 256-bit result. In effect there are two separate 128-bit /// shuffles in the lower and upper halves. /// /// \code{.operation} /// FOR i := 0 TO 31 /// j := i*8 /// IF __b[j+7] == 1 /// result[j+7:j] := 0 /// ELSE /// k := __b[j+3:j] * 8 /// IF i > 15 /// k := k + 128 /// FI /// result[j+7:j] := __a[k+7:k] /// FI /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSHUFB instruction. /// /// \param __a /// A 256-bit integer vector containing source values. /// \param __b /// A 256-bit integer vector containing control information to determine /// what goes into the corresponding byte of the result. If bit 7 of the /// control byte is 1, the result byte is 0; otherwise, bits 3:0 of the /// control byte specify the index (within the same 128-bit half) of \a __a /// to copy to the result byte. /// \returns A 256-bit integer vector containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_shuffle_epi8(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pshufb256((__v32qi)__a, (__v32qi)__b); } /// Shuffles 32-bit integers from the 256-bit vector of [8 x i32] in \a a /// according to control information in the integer literal \a imm, and /// returns the 256-bit result. In effect there are two parallel 128-bit /// shuffles in the lower and upper halves. /// /// \code{.operation} /// FOR i := 0 to 3 /// j := i*32 /// k := (imm >> i*2)[1:0] * 32 /// result[j+31:j] := a[k+31:k] /// result[128+j+31:128+j] := a[128+k+31:128+k] /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m256i _mm256_shuffle_epi32(__m256i a, const int imm); /// \endcode /// /// This intrinsic corresponds to the \c VPSHUFB instruction. /// /// \param a /// A 256-bit vector of [8 x i32] containing source values. /// \param imm /// An immediate 8-bit value specifying which elements to copy from \a a. /// \a imm[1:0] specifies the index in \a a for elements 0 and 4 of the /// result, \a imm[3:2] specifies the index for elements 1 and 5, and so /// forth. /// \returns A 256-bit vector of [8 x i32] containing the result. #define _mm256_shuffle_epi32(a, imm) \ ((__m256i)__builtin_ia32_pshufd256((__v8si)(__m256i)(a), (int)(imm))) /// Shuffles 16-bit integers from the 256-bit vector of [16 x i16] in \a a /// according to control information in the integer literal \a imm, and /// returns the 256-bit result. The upper 64 bits of each 128-bit half /// are shuffled in parallel; the lower 64 bits of each 128-bit half are /// copied from \a a unchanged. /// /// \code{.operation} /// result[63:0] := a[63:0] /// result[191:128] := a[191:128] /// FOR i := 0 TO 3 /// j := i * 16 + 64 /// k := (imm >> i*2)[1:0] * 16 + 64 /// result[j+15:j] := a[k+15:k] /// result[128+j+15:128+j] := a[128+k+15:128+k] /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m256i _mm256_shufflehi_epi16(__m256i a, const int imm); /// \endcode /// /// This intrinsic corresponds to the \c VPSHUFHW instruction. /// /// \param a /// A 256-bit vector of [16 x i16] containing source values. /// \param imm /// An immediate 8-bit value specifying which elements to copy from \a a. /// \a imm[1:0] specifies the index in \a a for elements 4 and 8 of the /// result, \a imm[3:2] specifies the index for elements 5 and 9, and so /// forth. Indexes are offset by 4 (so 0 means index 4, and so forth). /// \returns A 256-bit vector of [16 x i16] containing the result. #define _mm256_shufflehi_epi16(a, imm) \ ((__m256i)__builtin_ia32_pshufhw256((__v16hi)(__m256i)(a), (int)(imm))) /// Shuffles 16-bit integers from the 256-bit vector of [16 x i16] \a a /// according to control information in the integer literal \a imm, and /// returns the 256-bit [16 x i16] result. The lower 64 bits of each /// 128-bit half are shuffled; the upper 64 bits of each 128-bit half are /// copied from \a a unchanged. /// /// \code{.operation} /// result[127:64] := a[127:64] /// result[255:192] := a[255:192] /// FOR i := 0 TO 3 /// j := i * 16 /// k := (imm >> i*2)[1:0] * 16 /// result[j+15:j] := a[k+15:k] /// result[128+j+15:128+j] := a[128+k+15:128+k] /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m256i _mm256_shufflelo_epi16(__m256i a, const int imm); /// \endcode /// /// This intrinsic corresponds to the \c VPSHUFLW instruction. /// /// \param a /// A 256-bit vector of [16 x i16] to use as a source of data for the /// result. /// \param imm /// An immediate 8-bit value specifying which elements to copy from \a a. /// \a imm[1:0] specifies the index in \a a for elements 0 and 8 of the /// result, \a imm[3:2] specifies the index for elements 1 and 9, and so /// forth. /// \returns A 256-bit vector of [16 x i16] containing the result. #define _mm256_shufflelo_epi16(a, imm) \ ((__m256i)__builtin_ia32_pshuflw256((__v16hi)(__m256i)(a), (int)(imm))) /// Sets each byte of the result to the corresponding byte of the 256-bit /// integer vector in \a __a, the negative of that byte, or zero, depending /// on whether the corresponding byte of the 256-bit integer vector in /// \a __b is greater than zero, less than zero, or equal to zero, /// respectively. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSIGNB instruction. /// /// \param __a /// A 256-bit integer vector. /// \param __b /// A 256-bit integer vector]. /// \returns A 256-bit integer vector containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sign_epi8(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_psignb256((__v32qi)__a, (__v32qi)__b); } /// Sets each element of the result to the corresponding element of the /// 256-bit vector of [16 x i16] in \a __a, the negative of that element, /// or zero, depending on whether the corresponding element of the 256-bit /// vector of [16 x i16] in \a __b is greater than zero, less than zero, or /// equal to zero, respectively. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSIGNW instruction. /// /// \param __a /// A 256-bit vector of [16 x i16]. /// \param __b /// A 256-bit vector of [16 x i16]. /// \returns A 256-bit vector of [16 x i16] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sign_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_psignw256((__v16hi)__a, (__v16hi)__b); } /// Sets each element of the result to the corresponding element of the /// 256-bit vector of [8 x i32] in \a __a, the negative of that element, or /// zero, depending on whether the corresponding element of the 256-bit /// vector of [8 x i32] in \a __b is greater than zero, less than zero, or /// equal to zero, respectively. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSIGND instruction. /// /// \param __a /// A 256-bit vector of [8 x i32]. /// \param __b /// A 256-bit vector of [8 x i32]. /// \returns A 256-bit vector of [8 x i32] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sign_epi32(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_psignd256((__v8si)__a, (__v8si)__b); } /// Shifts each 128-bit half of the 256-bit integer vector \a a left by /// \a imm bytes, shifting in zero bytes, and returns the result. If \a imm /// is greater than 15, the returned result is all zeroes. /// /// \headerfile /// /// \code /// __m256i _mm256_slli_si256(__m256i a, const int imm); /// \endcode /// /// This intrinsic corresponds to the \c VPSLLDQ instruction. /// /// \param a /// A 256-bit integer vector to be shifted. /// \param imm /// An unsigned immediate value specifying the shift count (in bytes). /// \returns A 256-bit integer vector containing the result. #define _mm256_slli_si256(a, imm) \ ((__m256i)__builtin_ia32_pslldqi256_byteshift((__v4di)(__m256i)(a), (int)(imm))) /// Shifts each 128-bit half of the 256-bit integer vector \a a left by /// \a imm bytes, shifting in zero bytes, and returns the result. If \a imm /// is greater than 15, the returned result is all zeroes. /// /// \headerfile /// /// \code /// __m256i _mm256_bslli_epi128(__m256i a, const int imm); /// \endcode /// /// This intrinsic corresponds to the \c VPSLLDQ instruction. /// /// \param a /// A 256-bit integer vector to be shifted. /// \param imm /// An unsigned immediate value specifying the shift count (in bytes). /// \returns A 256-bit integer vector containing the result. #define _mm256_bslli_epi128(a, imm) \ ((__m256i)__builtin_ia32_pslldqi256_byteshift((__v4di)(__m256i)(a), (int)(imm))) /// Shifts each 16-bit element of the 256-bit vector of [16 x i16] in \a __a /// left by \a __count bits, shifting in zero bits, and returns the result. /// If \a __count is greater than 15, the returned result is all zeroes. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSLLW instruction. /// /// \param __a /// A 256-bit vector of [16 x i16] to be shifted. /// \param __count /// An unsigned integer value specifying the shift count (in bits). /// \returns A 256-bit vector of [16 x i16] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_slli_epi16(__m256i __a, int __count) { return (__m256i)__builtin_ia32_psllwi256((__v16hi)__a, __count); } /// Shifts each 16-bit element of the 256-bit vector of [16 x i16] in \a __a /// left by the number of bits specified by the lower 64 bits of \a __count, /// shifting in zero bits, and returns the result. If \a __count is greater /// than 15, the returned result is all zeroes. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSLLW instruction. /// /// \param __a /// A 256-bit vector of [16 x i16] to be shifted. /// \param __count /// A 128-bit vector of [2 x i64] whose lower element gives the unsigned /// shift count (in bits). The upper element is ignored. /// \returns A 256-bit vector of [16 x i16] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sll_epi16(__m256i __a, __m128i __count) { return (__m256i)__builtin_ia32_psllw256((__v16hi)__a, (__v8hi)__count); } /// Shifts each 32-bit element of the 256-bit vector of [8 x i32] in \a __a /// left by \a __count bits, shifting in zero bits, and returns the result. /// If \a __count is greater than 31, the returned result is all zeroes. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSLLD instruction. /// /// \param __a /// A 256-bit vector of [8 x i32] to be shifted. /// \param __count /// An unsigned integer value specifying the shift count (in bits). /// \returns A 256-bit vector of [8 x i32] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_slli_epi32(__m256i __a, int __count) { return (__m256i)__builtin_ia32_pslldi256((__v8si)__a, __count); } /// Shifts each 32-bit element of the 256-bit vector of [8 x i32] in \a __a /// left by the number of bits given in the lower 64 bits of \a __count, /// shifting in zero bits, and returns the result. If \a __count is greater /// than 31, the returned result is all zeroes. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSLLD instruction. /// /// \param __a /// A 256-bit vector of [8 x i32] to be shifted. /// \param __count /// A 128-bit vector of [2 x i64] whose lower element gives the unsigned /// shift count (in bits). The upper element is ignored. /// \returns A 256-bit vector of [8 x i32] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sll_epi32(__m256i __a, __m128i __count) { return (__m256i)__builtin_ia32_pslld256((__v8si)__a, (__v4si)__count); } /// Shifts each 64-bit element of the 256-bit vector of [4 x i64] in \a __a /// left by \a __count bits, shifting in zero bits, and returns the result. /// If \a __count is greater than 63, the returned result is all zeroes. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSLLQ instruction. /// /// \param __a /// A 256-bit vector of [4 x i64] to be shifted. /// \param __count /// An unsigned integer value specifying the shift count (in bits). /// \returns A 256-bit vector of [4 x i64] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_slli_epi64(__m256i __a, int __count) { return __builtin_ia32_psllqi256((__v4di)__a, __count); } /// Shifts each 64-bit element of the 256-bit vector of [4 x i64] in \a __a /// left by the number of bits given in the lower 64 bits of \a __count, /// shifting in zero bits, and returns the result. If \a __count is greater /// than 63, the returned result is all zeroes. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSLLQ instruction. /// /// \param __a /// A 256-bit vector of [4 x i64] to be shifted. /// \param __count /// A 128-bit vector of [2 x i64] whose lower element gives the unsigned /// shift count (in bits). The upper element is ignored. /// \returns A 256-bit vector of [4 x i64] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sll_epi64(__m256i __a, __m128i __count) { return __builtin_ia32_psllq256((__v4di)__a, __count); } /// Shifts each 16-bit element of the 256-bit vector of [16 x i16] in \a __a /// right by \a __count bits, shifting in sign bits, and returns the result. /// If \a __count is greater than 15, each element of the result is either /// 0 or -1 according to the corresponding input sign bit. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSRAW instruction. /// /// \param __a /// A 256-bit vector of [16 x i16] to be shifted. /// \param __count /// An unsigned integer value specifying the shift count (in bits). /// \returns A 256-bit vector of [16 x i16] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srai_epi16(__m256i __a, int __count) { return (__m256i)__builtin_ia32_psrawi256((__v16hi)__a, __count); } /// Shifts each 16-bit element of the 256-bit vector of [16 x i16] in \a __a /// right by the number of bits given in the lower 64 bits of \a __count, /// shifting in sign bits, and returns the result. If \a __count is greater /// than 15, each element of the result is either 0 or -1 according to the /// corresponding input sign bit. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSRAW instruction. /// /// \param __a /// A 256-bit vector of [16 x i16] to be shifted. /// \param __count /// A 128-bit vector of [2 x i64] whose lower element gives the unsigned /// shift count (in bits). The upper element is ignored. /// \returns A 256-bit vector of [16 x i16] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sra_epi16(__m256i __a, __m128i __count) { return (__m256i)__builtin_ia32_psraw256((__v16hi)__a, (__v8hi)__count); } /// Shifts each 32-bit element of the 256-bit vector of [8 x i32] in \a __a /// right by \a __count bits, shifting in sign bits, and returns the result. /// If \a __count is greater than 31, each element of the result is either /// 0 or -1 according to the corresponding input sign bit. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSRAD instruction. /// /// \param __a /// A 256-bit vector of [8 x i32] to be shifted. /// \param __count /// An unsigned integer value specifying the shift count (in bits). /// \returns A 256-bit vector of [8 x i32] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srai_epi32(__m256i __a, int __count) { return (__m256i)__builtin_ia32_psradi256((__v8si)__a, __count); } /// Shifts each 32-bit element of the 256-bit vector of [8 x i32] in \a __a /// right by the number of bits given in the lower 64 bits of \a __count, /// shifting in sign bits, and returns the result. If \a __count is greater /// than 31, each element of the result is either 0 or -1 according to the /// corresponding input sign bit. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSRAD instruction. /// /// \param __a /// A 256-bit vector of [8 x i32] to be shifted. /// \param __count /// A 128-bit vector of [2 x i64] whose lower element gives the unsigned /// shift count (in bits). The upper element is ignored. /// \returns A 256-bit vector of [8 x i32] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sra_epi32(__m256i __a, __m128i __count) { return (__m256i)__builtin_ia32_psrad256((__v8si)__a, (__v4si)__count); } /// Shifts each 128-bit half of the 256-bit integer vector in \a a right by /// \a imm bytes, shifting in zero bytes, and returns the result. If /// \a imm is greater than 15, the returned result is all zeroes. /// /// \headerfile /// /// \code /// __m256i _mm256_srli_si256(__m256i a, const int imm); /// \endcode /// /// This intrinsic corresponds to the \c VPSRLDQ instruction. /// /// \param a /// A 256-bit integer vector to be shifted. /// \param imm /// An unsigned immediate value specifying the shift count (in bytes). /// \returns A 256-bit integer vector containing the result. #define _mm256_srli_si256(a, imm) \ ((__m256i)__builtin_ia32_psrldqi256_byteshift((__m256i)(a), (int)(imm))) /// Shifts each 128-bit half of the 256-bit integer vector in \a a right by /// \a imm bytes, shifting in zero bytes, and returns the result. If /// \a imm is greater than 15, the returned result is all zeroes. /// /// \headerfile /// /// \code /// __m256i _mm256_bsrli_epi128(__m256i a, const int imm); /// \endcode /// /// This intrinsic corresponds to the \c VPSRLDQ instruction. /// /// \param a /// A 256-bit integer vector to be shifted. /// \param imm /// An unsigned immediate value specifying the shift count (in bytes). /// \returns A 256-bit integer vector containing the result. #define _mm256_bsrli_epi128(a, imm) \ ((__m256i)__builtin_ia32_psrldqi256_byteshift((__m256i)(a), (int)(imm))) /// Shifts each 16-bit element of the 256-bit vector of [16 x i16] in \a __a /// right by \a __count bits, shifting in zero bits, and returns the result. /// If \a __count is greater than 15, the returned result is all zeroes. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSRLW instruction. /// /// \param __a /// A 256-bit vector of [16 x i16] to be shifted. /// \param __count /// An unsigned integer value specifying the shift count (in bits). /// \returns A 256-bit vector of [16 x i16] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srli_epi16(__m256i __a, int __count) { return (__m256i)__builtin_ia32_psrlwi256((__v16hi)__a, __count); } /// Shifts each 16-bit element of the 256-bit vector of [16 x i16] in \a __a /// right by the number of bits given in the lower 64 bits of \a __count, /// shifting in zero bits, and returns the result. If \a __count is greater /// than 15, the returned result is all zeroes. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSRLW instruction. /// /// \param __a /// A 256-bit vector of [16 x i16] to be shifted. /// \param __count /// A 128-bit vector of [2 x i64] whose lower element gives the unsigned /// shift count (in bits). The upper element is ignored. /// \returns A 256-bit vector of [16 x i16] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srl_epi16(__m256i __a, __m128i __count) { return (__m256i)__builtin_ia32_psrlw256((__v16hi)__a, (__v8hi)__count); } /// Shifts each 32-bit element of the 256-bit vector of [8 x i32] in \a __a /// right by \a __count bits, shifting in zero bits, and returns the result. /// If \a __count is greater than 31, the returned result is all zeroes. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSRLD instruction. /// /// \param __a /// A 256-bit vector of [8 x i32] to be shifted. /// \param __count /// An unsigned integer value specifying the shift count (in bits). /// \returns A 256-bit vector of [8 x i32] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srli_epi32(__m256i __a, int __count) { return (__m256i)__builtin_ia32_psrldi256((__v8si)__a, __count); } /// Shifts each 32-bit element of the 256-bit vector of [8 x i32] in \a __a /// right by the number of bits given in the lower 64 bits of \a __count, /// shifting in zero bits, and returns the result. If \a __count is greater /// than 31, the returned result is all zeroes. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSRLD instruction. /// /// \param __a /// A 256-bit vector of [8 x i32] to be shifted. /// \param __count /// A 128-bit vector of [2 x i64] whose lower element gives the unsigned /// shift count (in bits). The upper element is ignored. /// \returns A 256-bit vector of [8 x i32] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srl_epi32(__m256i __a, __m128i __count) { return (__m256i)__builtin_ia32_psrld256((__v8si)__a, (__v4si)__count); } /// Shifts each 64-bit element of the 256-bit vector of [4 x i64] in \a __a /// right by \a __count bits, shifting in zero bits, and returns the result. /// If \a __count is greater than 63, the returned result is all zeroes. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSRLQ instruction. /// /// \param __a /// A 256-bit vector of [4 x i64] to be shifted. /// \param __count /// An unsigned integer value specifying the shift count (in bits). /// \returns A 256-bit vector of [4 x i64] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srli_epi64(__m256i __a, int __count) { return __builtin_ia32_psrlqi256((__v4di)__a, __count); } /// Shifts each 64-bit element of the 256-bit vector of [4 x i64] in \a __a /// right by the number of bits given in the lower 64 bits of \a __count, /// shifting in zero bits, and returns the result. If \a __count is greater /// than 63, the returned result is all zeroes. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSRLQ instruction. /// /// \param __a /// A 256-bit vector of [4 x i64] to be shifted. /// \param __count /// A 128-bit vector of [2 x i64] whose lower element gives the unsigned /// shift count (in bits). The upper element is ignored. /// \returns A 256-bit vector of [4 x i64] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srl_epi64(__m256i __a, __m128i __count) { return __builtin_ia32_psrlq256((__v4di)__a, __count); } /// Subtracts 8-bit integers from corresponding bytes of two 256-bit integer /// vectors. Returns the lower 8 bits of each difference in the /// corresponding byte of the 256-bit integer vector result (overflow is /// ignored). /// /// \code{.operation} /// FOR i := 0 TO 31 /// j := i*8 /// result[j+7:j] := __a[j+7:j] - __b[j+7:j] /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSUBB instruction. /// /// \param __a /// A 256-bit integer vector containing the minuends. /// \param __b /// A 256-bit integer vector containing the subtrahends. /// \returns A 256-bit integer vector containing the differences. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sub_epi8(__m256i __a, __m256i __b) { return (__m256i)((__v32qu)__a - (__v32qu)__b); } /// Subtracts 16-bit integers from corresponding elements of two 256-bit /// vectors of [16 x i16]. Returns the lower 16 bits of each difference in /// the corresponding element of the [16 x i16] result (overflow is /// ignored). /// /// \code{.operation} /// FOR i := 0 TO 15 /// j := i*16 /// result[j+15:j] := __a[j+15:j] - __b[j+15:j] /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSUBW instruction. /// /// \param __a /// A 256-bit vector of [16 x i16] containing the minuends. /// \param __b /// A 256-bit vector of [16 x i16] containing the subtrahends. /// \returns A 256-bit vector of [16 x i16] containing the differences. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sub_epi16(__m256i __a, __m256i __b) { return (__m256i)((__v16hu)__a - (__v16hu)__b); } /// Subtracts 32-bit integers from corresponding elements of two 256-bit /// vectors of [8 x i32]. Returns the lower 32 bits of each difference in /// the corresponding element of the [8 x i32] result (overflow is ignored). /// /// \code{.operation} /// FOR i := 0 TO 7 /// j := i*32 /// result[j+31:j] := __a[j+31:j] - __b[j+31:j] /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSUBD instruction. /// /// \param __a /// A 256-bit vector of [8 x i32] containing the minuends. /// \param __b /// A 256-bit vector of [8 x i32] containing the subtrahends. /// \returns A 256-bit vector of [8 x i32] containing the differences. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sub_epi32(__m256i __a, __m256i __b) { return (__m256i)((__v8su)__a - (__v8su)__b); } /// Subtracts 64-bit integers from corresponding elements of two 256-bit /// vectors of [4 x i64]. Returns the lower 64 bits of each difference in /// the corresponding element of the [4 x i64] result (overflow is ignored). /// /// \code{.operation} /// FOR i := 0 TO 3 /// j := i*64 /// result[j+63:j] := __a[j+63:j] - __b[j+63:j] /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSUBQ instruction. /// /// \param __a /// A 256-bit vector of [4 x i64] containing the minuends. /// \param __b /// A 256-bit vector of [4 x i64] containing the subtrahends. /// \returns A 256-bit vector of [4 x i64] containing the differences. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sub_epi64(__m256i __a, __m256i __b) { return (__m256i)((__v4du)__a - (__v4du)__b); } /// Subtracts 8-bit integers from corresponding bytes of two 256-bit integer /// vectors using signed saturation, and returns each differences in the /// corresponding byte of the 256-bit integer vector result. /// /// \code{.operation} /// FOR i := 0 TO 31 /// j := i*8 /// result[j+7:j] := SATURATE8(__a[j+7:j] - __b[j+7:j]) /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSUBSB instruction. /// /// \param __a /// A 256-bit integer vector containing the minuends. /// \param __b /// A 256-bit integer vector containing the subtrahends. /// \returns A 256-bit integer vector containing the differences. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_subs_epi8(__m256i __a, __m256i __b) { return (__m256i)__builtin_elementwise_sub_sat((__v32qs)__a, (__v32qs)__b); } /// Subtracts 16-bit integers from corresponding elements of two 256-bit /// vectors of [16 x i16] using signed saturation, and returns each /// difference in the corresponding element of the [16 x i16] result. /// /// \code{.operation} /// FOR i := 0 TO 15 /// j := i*16 /// result[j+7:j] := SATURATE16(__a[j+7:j] - __b[j+7:j]) /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSUBSW instruction. /// /// \param __a /// A 256-bit vector of [16 x i16] containing the minuends. /// \param __b /// A 256-bit vector of [16 x i16] containing the subtrahends. /// \returns A 256-bit vector of [16 x i16] containing the differences. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_subs_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_elementwise_sub_sat((__v16hi)__a, (__v16hi)__b); } /// Subtracts 8-bit integers from corresponding bytes of two 256-bit integer /// vectors using unsigned saturation, and returns each difference in the /// corresponding byte of the 256-bit integer vector result. For each byte, /// computes result = __a - __b . /// /// \code{.operation} /// FOR i := 0 TO 31 /// j := i*8 /// result[j+7:j] := SATURATE8U(__a[j+7:j] - __b[j+7:j]) /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSUBUSB instruction. /// /// \param __a /// A 256-bit integer vector containing the minuends. /// \param __b /// A 256-bit integer vector containing the subtrahends. /// \returns A 256-bit integer vector containing the differences. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_subs_epu8(__m256i __a, __m256i __b) { return (__m256i)__builtin_elementwise_sub_sat((__v32qu)__a, (__v32qu)__b); } /// Subtracts 16-bit integers from corresponding elements of two 256-bit /// vectors of [16 x i16] using unsigned saturation, and returns each /// difference in the corresponding element of the [16 x i16] result. /// /// \code{.operation} /// FOR i := 0 TO 15 /// j := i*16 /// result[j+15:j] := SATURATE16U(__a[j+15:j] - __b[j+15:j]) /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSUBUSW instruction. /// /// \param __a /// A 256-bit vector of [16 x i16] containing the minuends. /// \param __b /// A 256-bit vector of [16 x i16] containing the subtrahends. /// \returns A 256-bit vector of [16 x i16] containing the differences. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_subs_epu16(__m256i __a, __m256i __b) { return (__m256i)__builtin_elementwise_sub_sat((__v16hu)__a, (__v16hu)__b); } /// Unpacks and interleaves 8-bit integers from parts of the 256-bit integer /// vectors in \a __a and \a __b to form the 256-bit result. Specifically, /// uses the upper 64 bits of each 128-bit half of \a __a and \a __b as /// input; other bits in these parameters are ignored. /// /// \code{.operation} /// result[7:0] := __a[71:64] /// result[15:8] := __b[71:64] /// result[23:16] := __a[79:72] /// result[31:24] := __b[79:72] /// . . . /// result[127:120] := __b[127:120] /// result[135:128] := __a[199:192] /// . . . /// result[255:248] := __b[255:248] /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPUNPCKHBW instruction. /// /// \param __a /// A 256-bit integer vector used as the source for the even-numbered bytes /// of the result. /// \param __b /// A 256-bit integer vector used as the source for the odd-numbered bytes /// of the result. /// \returns A 256-bit integer vector containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_unpackhi_epi8(__m256i __a, __m256i __b) { return (__m256i)__builtin_shufflevector((__v32qi)__a, (__v32qi)__b, 8, 32+8, 9, 32+9, 10, 32+10, 11, 32+11, 12, 32+12, 13, 32+13, 14, 32+14, 15, 32+15, 24, 32+24, 25, 32+25, 26, 32+26, 27, 32+27, 28, 32+28, 29, 32+29, 30, 32+30, 31, 32+31); } /// Unpacks and interleaves 16-bit integers from parts of the 256-bit vectors /// of [16 x i16] in \a __a and \a __b to return the resulting 256-bit /// vector of [16 x i16]. Specifically, uses the upper 64 bits of each /// 128-bit half of \a __a and \a __b as input; other bits in these /// parameters are ignored. /// /// \code{.operation} /// result[15:0] := __a[79:64] /// result[31:16] := __b[79:64] /// result[47:32] := __a[95:80] /// result[63:48] := __b[95:80] /// . . . /// result[127:112] := __b[127:112] /// result[143:128] := __a[211:196] /// . . . /// result[255:240] := __b[255:240] /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPUNPCKHWD instruction. /// /// \param __a /// A 256-bit vector of [16 x i16] used as the source for the even-numbered /// elements of the result. /// \param __b /// A 256-bit vector of [16 x i16] used as the source for the odd-numbered /// elements of the result. /// \returns A 256-bit vector of [16 x i16] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_unpackhi_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)__b, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15); } /// Unpacks and interleaves 32-bit integers from parts of the 256-bit vectors /// of [8 x i32] in \a __a and \a __b to return the resulting 256-bit vector /// of [8 x i32]. Specifically, uses the upper 64 bits of each 128-bit half /// of \a __a and \a __b as input; other bits in these parameters are /// ignored. /// /// \code{.operation} /// result[31:0] := __a[95:64] /// result[63:32] := __b[95:64] /// result[95:64] := __a[127:96] /// result[127:96] := __b[127:96] /// result[159:128] := __a[223:192] /// result[191:160] := __b[223:192] /// result[223:192] := __a[255:224] /// result[255:224] := __b[255:224] /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPUNPCKHDQ instruction. /// /// \param __a /// A 256-bit vector of [8 x i32] used as the source for the even-numbered /// elements of the result. /// \param __b /// A 256-bit vector of [8 x i32] used as the source for the odd-numbered /// elements of the result. /// \returns A 256-bit vector of [8 x i32] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_unpackhi_epi32(__m256i __a, __m256i __b) { return (__m256i)__builtin_shufflevector((__v8si)__a, (__v8si)__b, 2, 8+2, 3, 8+3, 6, 8+6, 7, 8+7); } /// Unpacks and interleaves 64-bit integers from parts of the 256-bit vectors /// of [4 x i64] in \a __a and \a __b to return the resulting 256-bit vector /// of [4 x i64]. Specifically, uses the upper 64 bits of each 128-bit half /// of \a __a and \a __b as input; other bits in these parameters are /// ignored. /// /// \code{.operation} /// result[63:0] := __a[127:64] /// result[127:64] := __b[127:64] /// result[191:128] := __a[255:192] /// result[255:192] := __b[255:192] /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPUNPCKHQDQ instruction. /// /// \param __a /// A 256-bit vector of [4 x i64] used as the source for the even-numbered /// elements of the result. /// \param __b /// A 256-bit vector of [4 x i64] used as the source for the odd-numbered /// elements of the result. /// \returns A 256-bit vector of [4 x i64] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_unpackhi_epi64(__m256i __a, __m256i __b) { return (__m256i)__builtin_shufflevector((__v4di)__a, (__v4di)__b, 1, 4+1, 3, 4+3); } /// Unpacks and interleaves 8-bit integers from parts of the 256-bit integer /// vectors in \a __a and \a __b to form the 256-bit result. Specifically, /// uses the lower 64 bits of each 128-bit half of \a __a and \a __b as /// input; other bits in these parameters are ignored. /// /// \code{.operation} /// result[7:0] := __a[7:0] /// result[15:8] := __b[7:0] /// result[23:16] := __a[15:8] /// result[31:24] := __b[15:8] /// . . . /// result[127:120] := __b[63:56] /// result[135:128] := __a[135:128] /// . . . /// result[255:248] := __b[191:184] /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPUNPCKLBW instruction. /// /// \param __a /// A 256-bit integer vector used as the source for the even-numbered bytes /// of the result. /// \param __b /// A 256-bit integer vector used as the source for the odd-numbered bytes /// of the result. /// \returns A 256-bit integer vector containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_unpacklo_epi8(__m256i __a, __m256i __b) { return (__m256i)__builtin_shufflevector((__v32qi)__a, (__v32qi)__b, 0, 32+0, 1, 32+1, 2, 32+2, 3, 32+3, 4, 32+4, 5, 32+5, 6, 32+6, 7, 32+7, 16, 32+16, 17, 32+17, 18, 32+18, 19, 32+19, 20, 32+20, 21, 32+21, 22, 32+22, 23, 32+23); } /// Unpacks and interleaves 16-bit integers from parts of the 256-bit vectors /// of [16 x i16] in \a __a and \a __b to return the resulting 256-bit /// vector of [16 x i16]. Specifically, uses the lower 64 bits of each /// 128-bit half of \a __a and \a __b as input; other bits in these /// parameters are ignored. /// /// \code{.operation} /// result[15:0] := __a[15:0] /// result[31:16] := __b[15:0] /// result[47:32] := __a[31:16] /// result[63:48] := __b[31:16] /// . . . /// result[127:112] := __b[63:48] /// result[143:128] := __a[143:128] /// . . . /// result[255:239] := __b[191:176] /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPUNPCKLWD instruction. /// /// \param __a /// A 256-bit vector of [16 x i16] used as the source for the even-numbered /// elements of the result. /// \param __b /// A 256-bit vector of [16 x i16] used as the source for the odd-numbered /// elements of the result. /// \returns A 256-bit vector of [16 x i16] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_unpacklo_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)__b, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11); } /// Unpacks and interleaves 32-bit integers from parts of the 256-bit vectors /// of [8 x i32] in \a __a and \a __b to return the resulting 256-bit vector /// of [8 x i32]. Specifically, uses the lower 64 bits of each 128-bit half /// of \a __a and \a __b as input; other bits in these parameters are /// ignored. /// /// \code{.operation} /// result[31:0] := __a[31:0] /// result[63:32] := __b[31:0] /// result[95:64] := __a[63:32] /// result[127:96] := __b[63:32] /// result[159:128] := __a[159:128] /// result[191:160] := __b[159:128] /// result[223:192] := __a[191:160] /// result[255:224] := __b[191:190] /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPUNPCKLDQ instruction. /// /// \param __a /// A 256-bit vector of [8 x i32] used as the source for the even-numbered /// elements of the result. /// \param __b /// A 256-bit vector of [8 x i32] used as the source for the odd-numbered /// elements of the result. /// \returns A 256-bit vector of [8 x i32] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_unpacklo_epi32(__m256i __a, __m256i __b) { return (__m256i)__builtin_shufflevector((__v8si)__a, (__v8si)__b, 0, 8+0, 1, 8+1, 4, 8+4, 5, 8+5); } /// Unpacks and interleaves 64-bit integers from parts of the 256-bit vectors /// of [4 x i64] in \a __a and \a __b to return the resulting 256-bit vector /// of [4 x i64]. Specifically, uses the lower 64 bits of each 128-bit half /// of \a __a and \a __b as input; other bits in these parameters are /// ignored. /// /// \code{.operation} /// result[63:0] := __a[63:0] /// result[127:64] := __b[63:0] /// result[191:128] := __a[191:128] /// result[255:192] := __b[191:128] /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPUNPCKLQDQ instruction. /// /// \param __a /// A 256-bit vector of [4 x i64] used as the source for the even-numbered /// elements of the result. /// \param __b /// A 256-bit vector of [4 x i64] used as the source for the odd-numbered /// elements of the result. /// \returns A 256-bit vector of [4 x i64] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_unpacklo_epi64(__m256i __a, __m256i __b) { return (__m256i)__builtin_shufflevector((__v4di)__a, (__v4di)__b, 0, 4+0, 2, 4+2); } /// Computes the bitwise XOR of the 256-bit integer vectors in \a __a and /// \a __b. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPXOR instruction. /// /// \param __a /// A 256-bit integer vector. /// \param __b /// A 256-bit integer vector. /// \returns A 256-bit integer vector containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_xor_si256(__m256i __a, __m256i __b) { return (__m256i)((__v4du)__a ^ (__v4du)__b); } /// Loads the 256-bit integer vector from memory \a __V using a non-temporal /// memory hint and returns the vector. \a __V must be aligned on a 32-byte /// boundary. /// /// \headerfile /// /// This intrinsic corresponds to the \c VMOVNTDQA instruction. /// /// \param __V /// A pointer to the 32-byte aligned memory containing the vector to load. /// \returns A 256-bit integer vector loaded from memory. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_stream_load_si256(const void *__V) { typedef __v4di __v4di_aligned __attribute__((aligned(32))); return (__m256i)__builtin_nontemporal_load((const __v4di_aligned *)__V); } /// Broadcasts the 32-bit floating-point value from the low element of the /// 128-bit vector of [4 x float] in \a __X to all elements of the result's /// 128-bit vector of [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the \c VBROADCASTSS instruction. /// /// \param __X /// A 128-bit vector of [4 x float] whose low element will be broadcast. /// \returns A 128-bit vector of [4 x float] containing the result. static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_broadcastss_ps(__m128 __X) { return (__m128)__builtin_shufflevector((__v4sf)__X, (__v4sf)__X, 0, 0, 0, 0); } /// Broadcasts the 64-bit floating-point value from the low element of the /// 128-bit vector of [2 x double] in \a __a to both elements of the /// result's 128-bit vector of [2 x double]. /// /// \headerfile /// /// This intrinsic corresponds to the \c MOVDDUP instruction. /// /// \param __a /// A 128-bit vector of [2 x double] whose low element will be broadcast. /// \returns A 128-bit vector of [2 x double] containing the result. static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_broadcastsd_pd(__m128d __a) { return __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0); } /// Broadcasts the 32-bit floating-point value from the low element of the /// 128-bit vector of [4 x float] in \a __X to all elements of the /// result's 256-bit vector of [8 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the \c VBROADCASTSS instruction. /// /// \param __X /// A 128-bit vector of [4 x float] whose low element will be broadcast. /// \returns A 256-bit vector of [8 x float] containing the result. static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_broadcastss_ps(__m128 __X) { return (__m256)__builtin_shufflevector((__v4sf)__X, (__v4sf)__X, 0, 0, 0, 0, 0, 0, 0, 0); } /// Broadcasts the 64-bit floating-point value from the low element of the /// 128-bit vector of [2 x double] in \a __X to all elements of the /// result's 256-bit vector of [4 x double]. /// /// \headerfile /// /// This intrinsic corresponds to the \c VBROADCASTSD instruction. /// /// \param __X /// A 128-bit vector of [2 x double] whose low element will be broadcast. /// \returns A 256-bit vector of [4 x double] containing the result. static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_broadcastsd_pd(__m128d __X) { return (__m256d)__builtin_shufflevector((__v2df)__X, (__v2df)__X, 0, 0, 0, 0); } /// Broadcasts the 128-bit integer data from \a __X to both the lower and /// upper halves of the 256-bit result. /// /// \headerfile /// /// This intrinsic corresponds to the \c VBROADCASTI128 instruction. /// /// \param __X /// A 128-bit integer vector to be broadcast. /// \returns A 256-bit integer vector containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_broadcastsi128_si256(__m128i __X) { return (__m256i)__builtin_shufflevector((__v2di)__X, (__v2di)__X, 0, 1, 0, 1); } #define _mm_broadcastsi128_si256(X) _mm256_broadcastsi128_si256(X) /// Merges 32-bit integer elements from either of the two 128-bit vectors of /// [4 x i32] in \a V1 or \a V2 to the result's 128-bit vector of [4 x i32], /// as specified by the immediate integer operand \a M. /// /// \code{.operation} /// FOR i := 0 TO 3 /// j := i*32 /// IF M[i] == 0 /// result[31+j:j] := V1[31+j:j] /// ELSE /// result[31+j:j] := V2[32+j:j] /// FI /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m128i _mm_blend_epi32(__m128i V1, __m128i V2, const int M); /// \endcode /// /// This intrinsic corresponds to the \c VPBLENDDD instruction. /// /// \param V1 /// A 128-bit vector of [4 x i32] containing source values. /// \param V2 /// A 128-bit vector of [4 x i32] containing source values. /// \param M /// An immediate 8-bit integer operand, with bits [3:0] specifying the /// source for each element of the result. The position of the mask bit /// corresponds to the index of a copied value. When a mask bit is 0, the /// element is copied from \a V1; otherwise, it is copied from \a V2. /// \returns A 128-bit vector of [4 x i32] containing the result. #define _mm_blend_epi32(V1, V2, M) \ ((__m128i)__builtin_ia32_pblendd128((__v4si)(__m128i)(V1), \ (__v4si)(__m128i)(V2), (int)(M))) /// Merges 32-bit integer elements from either of the two 256-bit vectors of /// [8 x i32] in \a V1 or \a V2 to return a 256-bit vector of [8 x i32], /// as specified by the immediate integer operand \a M. /// /// \code{.operation} /// FOR i := 0 TO 7 /// j := i*32 /// IF M[i] == 0 /// result[31+j:j] := V1[31+j:j] /// ELSE /// result[31+j:j] := V2[32+j:j] /// FI /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m256i _mm256_blend_epi32(__m256i V1, __m256i V2, const int M); /// \endcode /// /// This intrinsic corresponds to the \c VPBLENDDD instruction. /// /// \param V1 /// A 256-bit vector of [8 x i32] containing source values. /// \param V2 /// A 256-bit vector of [8 x i32] containing source values. /// \param M /// An immediate 8-bit integer operand, with bits [7:0] specifying the /// source for each element of the result. The position of the mask bit /// corresponds to the index of a copied value. When a mask bit is 0, the /// element is copied from \a V1; otherwise, it is is copied from \a V2. /// \returns A 256-bit vector of [8 x i32] containing the result. #define _mm256_blend_epi32(V1, V2, M) \ ((__m256i)__builtin_ia32_pblendd256((__v8si)(__m256i)(V1), \ (__v8si)(__m256i)(V2), (int)(M))) /// Broadcasts the low byte from the 128-bit integer vector in \a __X to all /// bytes of the 256-bit result. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPBROADCASTB instruction. /// /// \param __X /// A 128-bit integer vector whose low byte will be broadcast. /// \returns A 256-bit integer vector containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_broadcastb_epi8(__m128i __X) { return (__m256i)__builtin_shufflevector((__v16qi)__X, (__v16qi)__X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); } /// Broadcasts the low element from the 128-bit vector of [8 x i16] in \a __X /// to all elements of the result's 256-bit vector of [16 x i16]. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPBROADCASTW instruction. /// /// \param __X /// A 128-bit vector of [8 x i16] whose low element will be broadcast. /// \returns A 256-bit vector of [16 x i16] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_broadcastw_epi16(__m128i __X) { return (__m256i)__builtin_shufflevector((__v8hi)__X, (__v8hi)__X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); } /// Broadcasts the low element from the 128-bit vector of [4 x i32] in \a __X /// to all elements of the result's 256-bit vector of [8 x i32]. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPBROADCASTD instruction. /// /// \param __X /// A 128-bit vector of [4 x i32] whose low element will be broadcast. /// \returns A 256-bit vector of [8 x i32] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_broadcastd_epi32(__m128i __X) { return (__m256i)__builtin_shufflevector((__v4si)__X, (__v4si)__X, 0, 0, 0, 0, 0, 0, 0, 0); } /// Broadcasts the low element from the 128-bit vector of [2 x i64] in \a __X /// to all elements of the result's 256-bit vector of [4 x i64]. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPBROADCASTQ instruction. /// /// \param __X /// A 128-bit vector of [2 x i64] whose low element will be broadcast. /// \returns A 256-bit vector of [4 x i64] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_broadcastq_epi64(__m128i __X) { return (__m256i)__builtin_shufflevector((__v2di)__X, (__v2di)__X, 0, 0, 0, 0); } /// Broadcasts the low byte from the 128-bit integer vector in \a __X to all /// bytes of the 128-bit result. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPBROADCASTB instruction. /// /// \param __X /// A 128-bit integer vector whose low byte will be broadcast. /// \returns A 128-bit integer vector containing the result. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_broadcastb_epi8(__m128i __X) { return (__m128i)__builtin_shufflevector((__v16qi)__X, (__v16qi)__X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); } /// Broadcasts the low element from the 128-bit vector of [8 x i16] in /// \a __X to all elements of the result's 128-bit vector of [8 x i16]. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPBROADCASTW instruction. /// /// \param __X /// A 128-bit vector of [8 x i16] whose low element will be broadcast. /// \returns A 128-bit vector of [8 x i16] containing the result. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_broadcastw_epi16(__m128i __X) { return (__m128i)__builtin_shufflevector((__v8hi)__X, (__v8hi)__X, 0, 0, 0, 0, 0, 0, 0, 0); } /// Broadcasts the low element from the 128-bit vector of [4 x i32] in \a __X /// to all elements of the result's vector of [4 x i32]. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPBROADCASTD instruction. /// /// \param __X /// A 128-bit vector of [4 x i32] whose low element will be broadcast. /// \returns A 128-bit vector of [4 x i32] containing the result. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_broadcastd_epi32(__m128i __X) { return (__m128i)__builtin_shufflevector((__v4si)__X, (__v4si)__X, 0, 0, 0, 0); } /// Broadcasts the low element from the 128-bit vector of [2 x i64] in \a __X /// to both elements of the result's 128-bit vector of [2 x i64]. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPBROADCASTQ instruction. /// /// \param __X /// A 128-bit vector of [2 x i64] whose low element will be broadcast. /// \returns A 128-bit vector of [2 x i64] containing the result. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_broadcastq_epi64(__m128i __X) { return (__m128i)__builtin_shufflevector((__v2di)__X, (__v2di)__X, 0, 0); } /// Sets the result's 256-bit vector of [8 x i32] to copies of elements of the /// 256-bit vector of [8 x i32] in \a __a as specified by indexes in the /// elements of the 256-bit vector of [8 x i32] in \a __b. /// /// \code{.operation} /// FOR i := 0 TO 7 /// j := i*32 /// k := __b[j+2:j] * 32 /// result[j+31:j] := __a[k+31:k] /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPERMD instruction. /// /// \param __a /// A 256-bit vector of [8 x i32] containing the source values. /// \param __b /// A 256-bit vector of [8 x i32] containing indexes of values to use from /// \a __a. /// \returns A 256-bit vector of [8 x i32] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_permutevar8x32_epi32(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_permvarsi256((__v8si)__a, (__v8si)__b); } /// Sets the result's 256-bit vector of [4 x double] to copies of elements of /// the 256-bit vector of [4 x double] in \a V as specified by the /// immediate value \a M. /// /// \code{.operation} /// FOR i := 0 TO 3 /// j := i*64 /// k := (M >> i*2)[1:0] * 64 /// result[j+63:j] := V[k+63:k] /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m256d _mm256_permute4x64_pd(__m256d V, const int M); /// \endcode /// /// This intrinsic corresponds to the \c VPERMPD instruction. /// /// \param V /// A 256-bit vector of [4 x double] containing the source values. /// \param M /// An immediate 8-bit value specifying which elements to copy from \a V. /// \a M[1:0] specifies the index in \a a for element 0 of the result, /// \a M[3:2] specifies the index for element 1, and so forth. /// \returns A 256-bit vector of [4 x double] containing the result. #define _mm256_permute4x64_pd(V, M) \ ((__m256d)__builtin_ia32_permdf256((__v4df)(__m256d)(V), (int)(M))) /// Sets the result's 256-bit vector of [8 x float] to copies of elements of /// the 256-bit vector of [8 x float] in \a __a as specified by indexes in /// the elements of the 256-bit vector of [8 x i32] in \a __b. /// /// \code{.operation} /// FOR i := 0 TO 7 /// j := i*32 /// k := __b[j+2:j] * 32 /// result[j+31:j] := __a[k+31:k] /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPERMPS instruction. /// /// \param __a /// A 256-bit vector of [8 x float] containing the source values. /// \param __b /// A 256-bit vector of [8 x i32] containing indexes of values to use from /// \a __a. /// \returns A 256-bit vector of [8 x float] containing the result. static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_permutevar8x32_ps(__m256 __a, __m256i __b) { return (__m256)__builtin_ia32_permvarsf256((__v8sf)__a, (__v8si)__b); } /// Sets the result's 256-bit vector of [4 x i64] result to copies of elements /// of the 256-bit vector of [4 x i64] in \a V as specified by the /// immediate value \a M. /// /// \code{.operation} /// FOR i := 0 TO 3 /// j := i*64 /// k := (M >> i*2)[1:0] * 64 /// result[j+63:j] := V[k+63:k] /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m256i _mm256_permute4x64_epi64(__m256i V, const int M); /// \endcode /// /// This intrinsic corresponds to the \c VPERMQ instruction. /// /// \param V /// A 256-bit vector of [4 x i64] containing the source values. /// \param M /// An immediate 8-bit value specifying which elements to copy from \a V. /// \a M[1:0] specifies the index in \a a for element 0 of the result, /// \a M[3:2] specifies the index for element 1, and so forth. /// \returns A 256-bit vector of [4 x i64] containing the result. #define _mm256_permute4x64_epi64(V, M) \ ((__m256i)__builtin_ia32_permdi256((__v4di)(__m256i)(V), (int)(M))) /// Sets each half of the 256-bit result either to zero or to one of the /// four possible 128-bit halves of the 256-bit vectors \a V1 and \a V2, /// as specified by the immediate value \a M. /// /// \code{.operation} /// FOR i := 0 TO 1 /// j := i*128 /// k := M >> (i*4) /// IF k[3] == 0 /// CASE (k[1:0]) OF /// 0: result[127+j:j] := V1[127:0] /// 1: result[127+j:j] := V1[255:128] /// 2: result[127+j:j] := V2[127:0] /// 3: result[127+j:j] := V2[255:128] /// ESAC /// ELSE /// result[127+j:j] := 0 /// FI /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m256i _mm256_permute2x128_si256(__m256i V1, __m256i V2, const int M); /// \endcode /// /// This intrinsic corresponds to the \c VPERM2I128 instruction. /// /// \param V1 /// A 256-bit integer vector containing source values. /// \param V2 /// A 256-bit integer vector containing source values. /// \param M /// An immediate value specifying how to form the result. Bits [3:0] /// control the lower half of the result, bits [7:4] control the upper half. /// Within each 4-bit control value, if bit 3 is 1, the result is zero, /// otherwise bits [1:0] determine the source as follows. \n /// 0: the lower half of \a V1 \n /// 1: the upper half of \a V1 \n /// 2: the lower half of \a V2 \n /// 3: the upper half of \a V2 /// \returns A 256-bit integer vector containing the result. #define _mm256_permute2x128_si256(V1, V2, M) \ ((__m256i)__builtin_ia32_permti256((__m256i)(V1), (__m256i)(V2), (int)(M))) /// Extracts half of the 256-bit vector \a V to the 128-bit result. If bit 0 /// of the immediate \a M is zero, extracts the lower half of the result; /// otherwise, extracts the upper half. /// /// \headerfile /// /// \code /// __m128i _mm256_extracti128_si256(__m256i V, const int M); /// \endcode /// /// This intrinsic corresponds to the \c VEXTRACTI128 instruction. /// /// \param V /// A 256-bit integer vector containing the source values. /// \param M /// An immediate value specifying which half of \a V to extract. /// \returns A 128-bit integer vector containing the result. #define _mm256_extracti128_si256(V, M) \ ((__m128i)__builtin_ia32_extract128i256((__v4di)(__m256i)(V), (int)(M))) /// Copies the 256-bit vector \a V1 to the result, then overwrites half of the /// result with the 128-bit vector \a V2. If bit 0 of the immediate \a M /// is zero, overwrites the lower half of the result; otherwise, /// overwrites the upper half. /// /// \headerfile /// /// \code /// __m256i _mm256_inserti128_si256(__m256i V1, __m128i V2, const int M); /// \endcode /// /// This intrinsic corresponds to the \c VINSERTI128 instruction. /// /// \param V1 /// A 256-bit integer vector containing a source value. /// \param V2 /// A 128-bit integer vector containing a source value. /// \param M /// An immediate value specifying where to put \a V2 in the result. /// \returns A 256-bit integer vector containing the result. #define _mm256_inserti128_si256(V1, V2, M) \ ((__m256i)__builtin_ia32_insert128i256((__v4di)(__m256i)(V1), \ (__v2di)(__m128i)(V2), (int)(M))) /// Conditionally loads eight 32-bit integer elements from memory \a __X, if /// the most significant bit of the corresponding element in the mask /// \a __M is set; otherwise, sets that element of the result to zero. /// Returns the 256-bit [8 x i32] result. /// /// \code{.operation} /// FOR i := 0 TO 7 /// j := i*32 /// IF __M[j+31] == 1 /// result[j+31:j] := Load32(__X+(i*4)) /// ELSE /// result[j+31:j] := 0 /// FI /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMASKMOVD instruction. /// /// \param __X /// A pointer to the memory used for loading values. /// \param __M /// A 256-bit vector of [8 x i32] containing the mask bits. /// \returns A 256-bit vector of [8 x i32] containing the loaded or zeroed /// elements. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskload_epi32(int const *__X, __m256i __M) { return (__m256i)__builtin_ia32_maskloadd256((const __v8si *)__X, (__v8si)__M); } /// Conditionally loads four 64-bit integer elements from memory \a __X, if /// the most significant bit of the corresponding element in the mask /// \a __M is set; otherwise, sets that element of the result to zero. /// Returns the 256-bit [4 x i64] result. /// /// \code{.operation} /// FOR i := 0 TO 3 /// j := i*64 /// IF __M[j+63] == 1 /// result[j+63:j] := Load64(__X+(i*8)) /// ELSE /// result[j+63:j] := 0 /// FI /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMASKMOVQ instruction. /// /// \param __X /// A pointer to the memory used for loading values. /// \param __M /// A 256-bit vector of [4 x i64] containing the mask bits. /// \returns A 256-bit vector of [4 x i64] containing the loaded or zeroed /// elements. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskload_epi64(long long const *__X, __m256i __M) { return (__m256i)__builtin_ia32_maskloadq256((const __v4di *)__X, (__v4di)__M); } /// Conditionally loads four 32-bit integer elements from memory \a __X, if /// the most significant bit of the corresponding element in the mask /// \a __M is set; otherwise, sets that element of the result to zero. /// Returns the 128-bit [4 x i32] result. /// /// \code{.operation} /// FOR i := 0 TO 3 /// j := i*32 /// IF __M[j+31] == 1 /// result[j+31:j] := Load32(__X+(i*4)) /// ELSE /// result[j+31:j] := 0 /// FI /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMASKMOVD instruction. /// /// \param __X /// A pointer to the memory used for loading values. /// \param __M /// A 128-bit vector of [4 x i32] containing the mask bits. /// \returns A 128-bit vector of [4 x i32] containing the loaded or zeroed /// elements. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskload_epi32(int const *__X, __m128i __M) { return (__m128i)__builtin_ia32_maskloadd((const __v4si *)__X, (__v4si)__M); } /// Conditionally loads two 64-bit integer elements from memory \a __X, if /// the most significant bit of the corresponding element in the mask /// \a __M is set; otherwise, sets that element of the result to zero. /// Returns the 128-bit [2 x i64] result. /// /// \code{.operation} /// FOR i := 0 TO 1 /// j := i*64 /// IF __M[j+63] == 1 /// result[j+63:j] := Load64(__X+(i*8)) /// ELSE /// result[j+63:j] := 0 /// FI /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMASKMOVQ instruction. /// /// \param __X /// A pointer to the memory used for loading values. /// \param __M /// A 128-bit vector of [2 x i64] containing the mask bits. /// \returns A 128-bit vector of [2 x i64] containing the loaded or zeroed /// elements. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskload_epi64(long long const *__X, __m128i __M) { return (__m128i)__builtin_ia32_maskloadq((const __v2di *)__X, (__v2di)__M); } /// Conditionally stores eight 32-bit integer elements from the 256-bit vector /// of [8 x i32] in \a __Y to memory \a __X, if the most significant bit of /// the corresponding element in the mask \a __M is set; otherwise, the /// memory element is unchanged. /// /// \code{.operation} /// FOR i := 0 TO 7 /// j := i*32 /// IF __M[j+31] == 1 /// Store32(__X+(i*4), __Y[j+31:j]) /// FI /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMASKMOVD instruction. /// /// \param __X /// A pointer to the memory used for storing values. /// \param __M /// A 256-bit vector of [8 x i32] containing the mask bits. /// \param __Y /// A 256-bit vector of [8 x i32] containing the values to store. static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_maskstore_epi32(int *__X, __m256i __M, __m256i __Y) { __builtin_ia32_maskstored256((__v8si *)__X, (__v8si)__M, (__v8si)__Y); } /// Conditionally stores four 64-bit integer elements from the 256-bit vector /// of [4 x i64] in \a __Y to memory \a __X, if the most significant bit of /// the corresponding element in the mask \a __M is set; otherwise, the /// memory element is unchanged. /// /// \code{.operation} /// FOR i := 0 TO 3 /// j := i*64 /// IF __M[j+63] == 1 /// Store64(__X+(i*8), __Y[j+63:j]) /// FI /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMASKMOVQ instruction. /// /// \param __X /// A pointer to the memory used for storing values. /// \param __M /// A 256-bit vector of [4 x i64] containing the mask bits. /// \param __Y /// A 256-bit vector of [4 x i64] containing the values to store. static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_maskstore_epi64(long long *__X, __m256i __M, __m256i __Y) { __builtin_ia32_maskstoreq256((__v4di *)__X, (__v4di)__M, (__v4di)__Y); } /// Conditionally stores four 32-bit integer elements from the 128-bit vector /// of [4 x i32] in \a __Y to memory \a __X, if the most significant bit of /// the corresponding element in the mask \a __M is set; otherwise, the /// memory element is unchanged. /// /// \code{.operation} /// FOR i := 0 TO 3 /// j := i*32 /// IF __M[j+31] == 1 /// Store32(__X+(i*4), __Y[j+31:j]) /// FI /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMASKMOVD instruction. /// /// \param __X /// A pointer to the memory used for storing values. /// \param __M /// A 128-bit vector of [4 x i32] containing the mask bits. /// \param __Y /// A 128-bit vector of [4 x i32] containing the values to store. static __inline__ void __DEFAULT_FN_ATTRS128 _mm_maskstore_epi32(int *__X, __m128i __M, __m128i __Y) { __builtin_ia32_maskstored((__v4si *)__X, (__v4si)__M, (__v4si)__Y); } /// Conditionally stores two 64-bit integer elements from the 128-bit vector /// of [2 x i64] in \a __Y to memory \a __X, if the most significant bit of /// the corresponding element in the mask \a __M is set; otherwise, the /// memory element is unchanged. /// /// \code{.operation} /// FOR i := 0 TO 1 /// j := i*64 /// IF __M[j+63] == 1 /// Store64(__X+(i*8), __Y[j+63:j]) /// FI /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMASKMOVQ instruction. /// /// \param __X /// A pointer to the memory used for storing values. /// \param __M /// A 128-bit vector of [2 x i64] containing the mask bits. /// \param __Y /// A 128-bit vector of [2 x i64] containing the values to store. static __inline__ void __DEFAULT_FN_ATTRS128 _mm_maskstore_epi64(long long *__X, __m128i __M, __m128i __Y) { __builtin_ia32_maskstoreq(( __v2di *)__X, (__v2di)__M, (__v2di)__Y); } /// Shifts each 32-bit element of the 256-bit vector of [8 x i32] in \a __X /// left by the number of bits given in the corresponding element of the /// 256-bit vector of [8 x i32] in \a __Y, shifting in zero bits, and /// returns the result. If the shift count for any element is greater than /// 31, the result for that element is zero. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSLLVD instruction. /// /// \param __X /// A 256-bit vector of [8 x i32] to be shifted. /// \param __Y /// A 256-bit vector of [8 x i32] containing the unsigned shift counts (in /// bits). /// \returns A 256-bit vector of [8 x i32] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sllv_epi32(__m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_psllv8si((__v8si)__X, (__v8si)__Y); } /// Shifts each 32-bit element of the 128-bit vector of [4 x i32] in \a __X /// left by the number of bits given in the corresponding element of the /// 128-bit vector of [4 x i32] in \a __Y, shifting in zero bits, and /// returns the result. If the shift count for any element is greater than /// 31, the result for that element is zero. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSLLVD instruction. /// /// \param __X /// A 128-bit vector of [4 x i32] to be shifted. /// \param __Y /// A 128-bit vector of [4 x i32] containing the unsigned shift counts (in /// bits). /// \returns A 128-bit vector of [4 x i32] containing the result. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_sllv_epi32(__m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_psllv4si((__v4si)__X, (__v4si)__Y); } /// Shifts each 64-bit element of the 256-bit vector of [4 x i64] in \a __X /// left by the number of bits given in the corresponding element of the /// 128-bit vector of [4 x i64] in \a __Y, shifting in zero bits, and /// returns the result. If the shift count for any element is greater than /// 63, the result for that element is zero. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSLLVQ instruction. /// /// \param __X /// A 256-bit vector of [4 x i64] to be shifted. /// \param __Y /// A 256-bit vector of [4 x i64] containing the unsigned shift counts (in /// bits). /// \returns A 256-bit vector of [4 x i64] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sllv_epi64(__m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_psllv4di((__v4di)__X, (__v4di)__Y); } /// Shifts each 64-bit element of the 128-bit vector of [2 x i64] in \a __X /// left by the number of bits given in the corresponding element of the /// 128-bit vector of [2 x i64] in \a __Y, shifting in zero bits, and /// returns the result. If the shift count for any element is greater than /// 63, the result for that element is zero. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSLLVQ instruction. /// /// \param __X /// A 128-bit vector of [2 x i64] to be shifted. /// \param __Y /// A 128-bit vector of [2 x i64] containing the unsigned shift counts (in /// bits). /// \returns A 128-bit vector of [2 x i64] containing the result. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_sllv_epi64(__m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_psllv2di((__v2di)__X, (__v2di)__Y); } /// Shifts each 32-bit element of the 256-bit vector of [8 x i32] in \a __X /// right by the number of bits given in the corresponding element of the /// 256-bit vector of [8 x i32] in \a __Y, shifting in sign bits, and /// returns the result. If the shift count for any element is greater than /// 31, the result for that element is 0 or -1 according to the sign bit /// for that element. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSRAVD instruction. /// /// \param __X /// A 256-bit vector of [8 x i32] to be shifted. /// \param __Y /// A 256-bit vector of [8 x i32] containing the unsigned shift counts (in /// bits). /// \returns A 256-bit vector of [8 x i32] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srav_epi32(__m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_psrav8si((__v8si)__X, (__v8si)__Y); } /// Shifts each 32-bit element of the 128-bit vector of [4 x i32] in \a __X /// right by the number of bits given in the corresponding element of the /// 128-bit vector of [4 x i32] in \a __Y, shifting in sign bits, and /// returns the result. If the shift count for any element is greater than /// 31, the result for that element is 0 or -1 according to the sign bit /// for that element. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSRAVD instruction. /// /// \param __X /// A 128-bit vector of [4 x i32] to be shifted. /// \param __Y /// A 128-bit vector of [4 x i32] containing the unsigned shift counts (in /// bits). /// \returns A 128-bit vector of [4 x i32] containing the result. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_srav_epi32(__m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_psrav4si((__v4si)__X, (__v4si)__Y); } /// Shifts each 32-bit element of the 256-bit vector of [8 x i32] in \a __X /// right by the number of bits given in the corresponding element of the /// 256-bit vector of [8 x i32] in \a __Y, shifting in zero bits, and /// returns the result. If the shift count for any element is greater than /// 31, the result for that element is zero. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSRLVD instruction. /// /// \param __X /// A 256-bit vector of [8 x i32] to be shifted. /// \param __Y /// A 256-bit vector of [8 x i32] containing the unsigned shift counts (in /// bits). /// \returns A 256-bit vector of [8 x i32] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srlv_epi32(__m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_psrlv8si((__v8si)__X, (__v8si)__Y); } /// Shifts each 32-bit element of the 128-bit vector of [4 x i32] in \a __X /// right by the number of bits given in the corresponding element of the /// 128-bit vector of [4 x i32] in \a __Y, shifting in zero bits, and /// returns the result. If the shift count for any element is greater than /// 31, the result for that element is zero. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSRLVD instruction. /// /// \param __X /// A 128-bit vector of [4 x i32] to be shifted. /// \param __Y /// A 128-bit vector of [4 x i32] containing the unsigned shift counts (in /// bits). /// \returns A 128-bit vector of [4 x i32] containing the result. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_srlv_epi32(__m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_psrlv4si((__v4si)__X, (__v4si)__Y); } /// Shifts each 64-bit element of the 256-bit vector of [4 x i64] in \a __X /// right by the number of bits given in the corresponding element of the /// 128-bit vector of [4 x i64] in \a __Y, shifting in zero bits, and /// returns the result. If the shift count for any element is greater than /// 63, the result for that element is zero. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSRLVQ instruction. /// /// \param __X /// A 256-bit vector of [4 x i64] to be shifted. /// \param __Y /// A 256-bit vector of [4 x i64] containing the unsigned shift counts (in /// bits). /// \returns A 256-bit vector of [4 x i64] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srlv_epi64(__m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_psrlv4di((__v4di)__X, (__v4di)__Y); } /// Shifts each 64-bit element of the 128-bit vector of [2 x i64] in \a __X /// right by the number of bits given in the corresponding element of the /// 128-bit vector of [2 x i64] in \a __Y, shifting in zero bits, and /// returns the result. If the shift count for any element is greater than /// 63, the result for that element is zero. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSRLVQ instruction. /// /// \param __X /// A 128-bit vector of [2 x i64] to be shifted. /// \param __Y /// A 128-bit vector of [2 x i64] containing the unsigned shift counts (in /// bits). /// \returns A 128-bit vector of [2 x i64] containing the result. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_srlv_epi64(__m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_psrlv2di((__v2di)__X, (__v2di)__Y); } /// Conditionally gathers two 64-bit floating-point values, either from the /// 128-bit vector of [2 x double] in \a a, or from memory \a m using scaled /// indexes from the 128-bit vector of [4 x i32] in \a i. The 128-bit vector /// of [2 x double] in \a mask determines the source for each element. /// /// \code{.operation} /// FOR element := 0 to 1 /// j := element*64 /// k := element*32 /// IF mask[j+63] == 0 /// result[j+63:j] := a[j+63:j] /// ELSE /// result[j+63:j] := Load64(m + SignExtend(i[k+31:k])*s) /// FI /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m128d _mm_mask_i32gather_pd(__m128d a, const double *m, __m128i i, /// __m128d mask, const int s); /// \endcode /// /// This intrinsic corresponds to the \c VGATHERDPD instruction. /// /// \param a /// A 128-bit vector of [2 x double] used as the source when a mask bit is /// zero. /// \param m /// A pointer to the memory used for loading values. /// \param i /// A 128-bit vector of [4 x i32] containing signed indexes into \a m. Only /// the first two elements are used. /// \param mask /// A 128-bit vector of [2 x double] containing the mask. The most /// significant bit of each element in the mask vector represents the mask /// bits. If a mask bit is zero, the corresponding value from vector \a a /// is gathered; otherwise the value is loaded from memory. /// \param s /// A literal constant scale factor for the indexes in \a i. Must be /// 1, 2, 4, or 8. /// \returns A 128-bit vector of [2 x double] containing the gathered values. #define _mm_mask_i32gather_pd(a, m, i, mask, s) \ ((__m128d)__builtin_ia32_gatherd_pd((__v2df)(__m128i)(a), \ (double const *)(m), \ (__v4si)(__m128i)(i), \ (__v2df)(__m128d)(mask), (s))) /// Conditionally gathers four 64-bit floating-point values, either from the /// 256-bit vector of [4 x double] in \a a, or from memory \a m using scaled /// indexes from the 128-bit vector of [4 x i32] in \a i. The 256-bit vector /// of [4 x double] in \a mask determines the source for each element. /// /// \code{.operation} /// FOR element := 0 to 3 /// j := element*64 /// k := element*32 /// IF mask[j+63] == 0 /// result[j+63:j] := a[j+63:j] /// ELSE /// result[j+63:j] := Load64(m + SignExtend(i[k+31:k])*s) /// FI /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m256d _mm256_mask_i32gather_pd(__m256d a, const double *m, __m128i i, /// __m256d mask, const int s); /// \endcode /// /// This intrinsic corresponds to the \c VGATHERDPD instruction. /// /// \param a /// A 256-bit vector of [4 x double] used as the source when a mask bit is /// zero. /// \param m /// A pointer to the memory used for loading values. /// \param i /// A 128-bit vector of [4 x i32] containing signed indexes into \a m. /// \param mask /// A 256-bit vector of [4 x double] containing the mask. The most /// significant bit of each element in the mask vector represents the mask /// bits. If a mask bit is zero, the corresponding value from vector \a a /// is gathered; otherwise the value is loaded from memory. /// \param s /// A literal constant scale factor for the indexes in \a i. Must be /// 1, 2, 4, or 8. /// \returns A 256-bit vector of [4 x double] containing the gathered values. #define _mm256_mask_i32gather_pd(a, m, i, mask, s) \ ((__m256d)__builtin_ia32_gatherd_pd256((__v4df)(__m256d)(a), \ (double const *)(m), \ (__v4si)(__m128i)(i), \ (__v4df)(__m256d)(mask), (s))) /// Conditionally gathers two 64-bit floating-point values, either from the /// 128-bit vector of [2 x double] in \a a, or from memory \a m using scaled /// indexes from the 128-bit vector of [2 x i64] in \a i. The 128-bit vector /// of [2 x double] in \a mask determines the source for each element. /// /// \code{.operation} /// FOR element := 0 to 1 /// j := element*64 /// k := element*64 /// IF mask[j+63] == 0 /// result[j+63:j] := a[j+63:j] /// ELSE /// result[j+63:j] := Load64(m + SignExtend(i[k+63:k])*s) /// FI /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m128d _mm_mask_i64gather_pd(__m128d a, const double *m, __m128i i, /// __m128d mask, const int s); /// \endcode /// /// This intrinsic corresponds to the \c VGATHERQPD instruction. /// /// \param a /// A 128-bit vector of [2 x double] used as the source when a mask bit is /// zero. /// \param m /// A pointer to the memory used for loading values. /// \param i /// A 128-bit vector of [2 x i64] containing signed indexes into \a m. /// \param mask /// A 128-bit vector of [2 x double] containing the mask. The most /// significant bit of each element in the mask vector represents the mask /// bits. If a mask bit is zero, the corresponding value from vector \a a /// is gathered; otherwise the value is loaded from memory. /// \param s /// A literal constant scale factor for the indexes in \a i. Must be /// 1, 2, 4, or 8. /// \returns A 128-bit vector of [2 x double] containing the gathered values. #define _mm_mask_i64gather_pd(a, m, i, mask, s) \ ((__m128d)__builtin_ia32_gatherq_pd((__v2df)(__m128d)(a), \ (double const *)(m), \ (__v2di)(__m128i)(i), \ (__v2df)(__m128d)(mask), (s))) /// Conditionally gathers four 64-bit floating-point values, either from the /// 256-bit vector of [4 x double] in \a a, or from memory \a m using scaled /// indexes from the 256-bit vector of [4 x i64] in \a i. The 256-bit vector /// of [4 x double] in \a mask determines the source for each element. /// /// \code{.operation} /// FOR element := 0 to 3 /// j := element*64 /// k := element*64 /// IF mask[j+63] == 0 /// result[j+63:j] := a[j+63:j] /// ELSE /// result[j+63:j] := Load64(m + SignExtend(i[k+63:k])*s) /// FI /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m256d _mm256_mask_i64gather_pd(__m256d a, const double *m, __m256i i, /// __m256d mask, const int s); /// \endcode /// /// This intrinsic corresponds to the \c VGATHERQPD instruction. /// /// \param a /// A 256-bit vector of [4 x double] used as the source when a mask bit is /// zero. /// \param m /// A pointer to the memory used for loading values. /// \param i /// A 256-bit vector of [4 x i64] containing signed indexes into \a m. /// \param mask /// A 256-bit vector of [4 x double] containing the mask. The most /// significant bit of each element in the mask vector represents the mask /// bits. If a mask bit is zero, the corresponding value from vector \a a /// is gathered; otherwise the value is loaded from memory. /// \param s /// A literal constant scale factor for the indexes in \a i. Must be /// 1, 2, 4, or 8. /// \returns A 256-bit vector of [4 x double] containing the gathered values. #define _mm256_mask_i64gather_pd(a, m, i, mask, s) \ ((__m256d)__builtin_ia32_gatherq_pd256((__v4df)(__m256d)(a), \ (double const *)(m), \ (__v4di)(__m256i)(i), \ (__v4df)(__m256d)(mask), (s))) /// Conditionally gathers four 32-bit floating-point values, either from the /// 128-bit vector of [4 x float] in \a a, or from memory \a m using scaled /// indexes from the 128-bit vector of [4 x i32] in \a i. The 128-bit vector /// of [4 x float] in \a mask determines the source for each element. /// /// \code{.operation} /// FOR element := 0 to 3 /// j := element*32 /// k := element*32 /// IF mask[j+31] == 0 /// result[j+31:j] := a[j+31:j] /// ELSE /// result[j+31:j] := Load32(m + SignExtend(i[k+31:k])*s) /// FI /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m128 _mm_mask_i32gather_ps(__m128 a, const float *m, __m128i i, /// __m128 mask, const int s); /// \endcode /// /// This intrinsic corresponds to the \c VGATHERDPS instruction. /// /// \param a /// A 128-bit vector of [4 x float] used as the source when a mask bit is /// zero. /// \param m /// A pointer to the memory used for loading values. /// \param i /// A 128-bit vector of [4 x i32] containing signed indexes into \a m. /// \param mask /// A 128-bit vector of [4 x float] containing the mask. The most /// significant bit of each element in the mask vector represents the mask /// bits. If a mask bit is zero, the corresponding value from vector \a a /// is gathered; otherwise the value is loaded from memory. /// \param s /// A literal constant scale factor for the indexes in \a i. Must be /// 1, 2, 4, or 8. /// \returns A 128-bit vector of [4 x float] containing the gathered values. #define _mm_mask_i32gather_ps(a, m, i, mask, s) \ ((__m128)__builtin_ia32_gatherd_ps((__v4sf)(__m128)(a), \ (float const *)(m), \ (__v4si)(__m128i)(i), \ (__v4sf)(__m128)(mask), (s))) /// Conditionally gathers eight 32-bit floating-point values, either from the /// 256-bit vector of [8 x float] in \a a, or from memory \a m using scaled /// indexes from the 256-bit vector of [8 x i32] in \a i. The 256-bit vector /// of [8 x float] in \a mask determines the source for each element. /// /// \code{.operation} /// FOR element := 0 to 7 /// j := element*32 /// k := element*32 /// IF mask[j+31] == 0 /// result[j+31:j] := a[j+31:j] /// ELSE /// result[j+31:j] := Load32(m + SignExtend(i[k+31:k])*s) /// FI /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m256 _mm256_mask_i32gather_ps(__m256 a, const float *m, __m256i i, /// __m256 mask, const int s); /// \endcode /// /// This intrinsic corresponds to the \c VGATHERDPS instruction. /// /// \param a /// A 256-bit vector of [8 x float] used as the source when a mask bit is /// zero. /// \param m /// A pointer to the memory used for loading values. /// \param i /// A 256-bit vector of [8 x i32] containing signed indexes into \a m. /// \param mask /// A 256-bit vector of [8 x float] containing the mask. The most /// significant bit of each element in the mask vector represents the mask /// bits. If a mask bit is zero, the corresponding value from vector \a a /// is gathered; otherwise the value is loaded from memory. /// \param s /// A literal constant scale factor for the indexes in \a i. Must be /// 1, 2, 4, or 8. /// \returns A 256-bit vector of [8 x float] containing the gathered values. #define _mm256_mask_i32gather_ps(a, m, i, mask, s) \ ((__m256)__builtin_ia32_gatherd_ps256((__v8sf)(__m256)(a), \ (float const *)(m), \ (__v8si)(__m256i)(i), \ (__v8sf)(__m256)(mask), (s))) /// Conditionally gathers two 32-bit floating-point values, either from the /// 128-bit vector of [4 x float] in \a a, or from memory \a m using scaled /// indexes from the 128-bit vector of [2 x i64] in \a i. The 128-bit vector /// of [4 x float] in \a mask determines the source for the lower two /// elements. The upper two elements of the result are zeroed. /// /// \code{.operation} /// FOR element := 0 to 1 /// j := element*32 /// k := element*64 /// IF mask[j+31] == 0 /// result[j+31:j] := a[j+31:j] /// ELSE /// result[j+31:j] := Load32(m + SignExtend(i[k+63:k])*s) /// FI /// ENDFOR /// result[127:64] := 0 /// \endcode /// /// \headerfile /// /// \code /// __m128 _mm_mask_i64gather_ps(__m128 a, const float *m, __m128i i, /// __m128 mask, const int s); /// \endcode /// /// This intrinsic corresponds to the \c VGATHERQPS instruction. /// /// \param a /// A 128-bit vector of [4 x float] used as the source when a mask bit is /// zero. Only the first two elements are used. /// \param m /// A pointer to the memory used for loading values. /// \param i /// A 128-bit vector of [2 x i64] containing signed indexes into \a m. /// \param mask /// A 128-bit vector of [4 x float] containing the mask. The most /// significant bit of each element in the mask vector represents the mask /// bits. If a mask bit is zero, the corresponding value from vector \a a /// is gathered; otherwise the value is loaded from memory. Only the first /// two elements are used. /// \param s /// A literal constant scale factor for the indexes in \a i. Must be /// 1, 2, 4, or 8. /// \returns A 128-bit vector of [4 x float] containing the gathered values. #define _mm_mask_i64gather_ps(a, m, i, mask, s) \ ((__m128)__builtin_ia32_gatherq_ps((__v4sf)(__m128)(a), \ (float const *)(m), \ (__v2di)(__m128i)(i), \ (__v4sf)(__m128)(mask), (s))) /// Conditionally gathers four 32-bit floating-point values, either from the /// 128-bit vector of [4 x float] in \a a, or from memory \a m using scaled /// indexes from the 256-bit vector of [4 x i64] in \a i. The 128-bit vector /// of [4 x float] in \a mask determines the source for each element. /// /// \code{.operation} /// FOR element := 0 to 3 /// j := element*32 /// k := element*64 /// IF mask[j+31] == 0 /// result[j+31:j] := a[j+31:j] /// ELSE /// result[j+31:j] := Load32(m + SignExtend(i[k+63:k])*s) /// FI /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m128 _mm256_mask_i64gather_ps(__m128 a, const float *m, __m256i i, /// __m128 mask, const int s); /// \endcode /// /// This intrinsic corresponds to the \c VGATHERQPS instruction. /// /// \param a /// A 128-bit vector of [4 x float] used as the source when a mask bit is /// zero. /// \param m /// A pointer to the memory used for loading values. /// \param i /// A 256-bit vector of [4 x i64] containing signed indexes into \a m. /// \param mask /// A 128-bit vector of [4 x float] containing the mask. The most /// significant bit of each element in the mask vector represents the mask /// bits. If a mask bit is zero, the corresponding value from vector \a a /// is gathered; otherwise the value is loaded from memory. /// \param s /// A literal constant scale factor for the indexes in \a i. Must be /// 1, 2, 4, or 8. /// \returns A 128-bit vector of [4 x float] containing the gathered values. #define _mm256_mask_i64gather_ps(a, m, i, mask, s) \ ((__m128)__builtin_ia32_gatherq_ps256((__v4sf)(__m128)(a), \ (float const *)(m), \ (__v4di)(__m256i)(i), \ (__v4sf)(__m128)(mask), (s))) /// Conditionally gathers four 32-bit integer values, either from the /// 128-bit vector of [4 x i32] in \a a, or from memory \a m using scaled /// indexes from the 128-bit vector of [4 x i32] in \a i. The 128-bit vector /// of [4 x i32] in \a mask determines the source for each element. /// /// \code{.operation} /// FOR element := 0 to 3 /// j := element*32 /// k := element*32 /// IF mask[j+31] == 0 /// result[j+31:j] := a[j+31:j] /// ELSE /// result[j+31:j] := Load32(m + SignExtend(i[k+31:k])*s) /// FI /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m128i _mm_mask_i32gather_epi32(__m128i a, const int *m, __m128i i, /// __m128i mask, const int s); /// \endcode /// /// This intrinsic corresponds to the \c VPGATHERDD instruction. /// /// \param a /// A 128-bit vector of [4 x i32] used as the source when a mask bit is /// zero. /// \param m /// A pointer to the memory used for loading values. /// \param i /// A 128-bit vector of [4 x i32] containing signed indexes into \a m. /// \param mask /// A 128-bit vector of [4 x i32] containing the mask. The most significant /// bit of each element in the mask vector represents the mask bits. If a /// mask bit is zero, the corresponding value from vector \a a is gathered; /// otherwise the value is loaded from memory. /// \param s /// A literal constant scale factor for the indexes in \a i. Must be /// 1, 2, 4, or 8. /// \returns A 128-bit vector of [4 x i32] containing the gathered values. #define _mm_mask_i32gather_epi32(a, m, i, mask, s) \ ((__m128i)__builtin_ia32_gatherd_d((__v4si)(__m128i)(a), \ (int const *)(m), \ (__v4si)(__m128i)(i), \ (__v4si)(__m128i)(mask), (s))) /// Conditionally gathers eight 32-bit integer values, either from the /// 256-bit vector of [8 x i32] in \a a, or from memory \a m using scaled /// indexes from the 256-bit vector of [8 x i32] in \a i. The 256-bit vector /// of [8 x i32] in \a mask determines the source for each element. /// /// \code{.operation} /// FOR element := 0 to 7 /// j := element*32 /// k := element*32 /// IF mask[j+31] == 0 /// result[j+31:j] := a[j+31:j] /// ELSE /// result[j+31:j] := Load32(m + SignExtend(i[k+31:k])*s) /// FI /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m256i _mm256_mask_i32gather_epi32(__m256i a, const int *m, __m256i i, /// __m256i mask, const int s); /// \endcode /// /// This intrinsic corresponds to the \c VPGATHERDD instruction. /// /// \param a /// A 256-bit vector of [8 x i32] used as the source when a mask bit is /// zero. /// \param m /// A pointer to the memory used for loading values. /// \param i /// A 256-bit vector of [8 x i32] containing signed indexes into \a m. /// \param mask /// A 256-bit vector of [8 x i32] containing the mask. The most significant /// bit of each element in the mask vector represents the mask bits. If a /// mask bit is zero, the corresponding value from vector \a a is gathered; /// otherwise the value is loaded from memory. /// \param s /// A literal constant scale factor for the indexes in \a i. Must be /// 1, 2, 4, or 8. /// \returns A 256-bit vector of [8 x i32] containing the gathered values. #define _mm256_mask_i32gather_epi32(a, m, i, mask, s) \ ((__m256i)__builtin_ia32_gatherd_d256((__v8si)(__m256i)(a), \ (int const *)(m), \ (__v8si)(__m256i)(i), \ (__v8si)(__m256i)(mask), (s))) /// Conditionally gathers two 32-bit integer values, either from the /// 128-bit vector of [4 x i32] in \a a, or from memory \a m using scaled /// indexes from the 128-bit vector of [2 x i64] in \a i. The 128-bit vector /// of [4 x i32] in \a mask determines the source for the lower two /// elements. The upper two elements of the result are zeroed. /// /// \code{.operation} /// FOR element := 0 to 1 /// j := element*32 /// k := element*64 /// IF mask[j+31] == 0 /// result[j+31:j] := a[j+31:j] /// ELSE /// result[j+31:j] := Load32(m + SignExtend(i[k+63:k])*s) /// FI /// ENDFOR /// result[127:64] := 0 /// \endcode /// /// \headerfile /// /// \code /// __m128i _mm_mask_i64gather_epi32(__m128i a, const int *m, __m128i i, /// __m128i mask, const int s); /// \endcode /// /// This intrinsic corresponds to the \c VPGATHERQD instruction. /// /// \param a /// A 128-bit vector of [4 x i32] used as the source when a mask bit is /// zero. Only the first two elements are used. /// \param m /// A pointer to the memory used for loading values. /// \param i /// A 128-bit vector of [2 x i64] containing indexes into \a m. /// \param mask /// A 128-bit vector of [4 x i32] containing the mask. The most significant /// bit of each element in the mask vector represents the mask bits. If a /// mask bit is zero, the corresponding value from vector \a a is gathered; /// otherwise the value is loaded from memory. Only the first two elements /// are used. /// \param s /// A literal constant scale factor for the indexes in \a i. Must be /// 1, 2, 4, or 8. /// \returns A 128-bit vector of [4 x i32] containing the gathered values. #define _mm_mask_i64gather_epi32(a, m, i, mask, s) \ ((__m128i)__builtin_ia32_gatherq_d((__v4si)(__m128i)(a), \ (int const *)(m), \ (__v2di)(__m128i)(i), \ (__v4si)(__m128i)(mask), (s))) /// Conditionally gathers four 32-bit integer values, either from the /// 128-bit vector of [4 x i32] in \a a, or from memory \a m using scaled /// indexes from the 256-bit vector of [4 x i64] in \a i. The 128-bit vector /// of [4 x i32] in \a mask determines the source for each element. /// /// \code{.operation} /// FOR element := 0 to 3 /// j := element*32 /// k := element*64 /// IF mask[j+31] == 0 /// result[j+31:j] := a[j+31:j] /// ELSE /// result[j+31:j] := Load32(m + SignExtend(i[k+63:k])*s) /// FI /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m128i _mm256_mask_i64gather_epi32(__m128i a, const int *m, __m256i i, /// __m128i mask, const int s); /// \endcode /// /// This intrinsic corresponds to the \c VPGATHERQD instruction. /// /// \param a /// A 128-bit vector of [4 x i32] used as the source when a mask bit is /// zero. /// \param m /// A pointer to the memory used for loading values. /// \param i /// A 256-bit vector of [4 x i64] containing signed indexes into \a m. /// \param mask /// A 128-bit vector of [4 x i32] containing the mask. The most significant /// bit of each element in the mask vector represents the mask bits. If a /// mask bit is zero, the corresponding value from vector \a a is gathered; /// otherwise the value is loaded from memory. /// \param s /// A literal constant scale factor for the indexes in \a i. Must be /// 1, 2, 4, or 8. /// \returns A 128-bit vector of [4 x i32] containing the gathered values. #define _mm256_mask_i64gather_epi32(a, m, i, mask, s) \ ((__m128i)__builtin_ia32_gatherq_d256((__v4si)(__m128i)(a), \ (int const *)(m), \ (__v4di)(__m256i)(i), \ (__v4si)(__m128i)(mask), (s))) /// Conditionally gathers two 64-bit integer values, either from the /// 128-bit vector of [2 x i64] in \a a, or from memory \a m using scaled /// indexes from the 128-bit vector of [4 x i32] in \a i. The 128-bit vector /// of [2 x i64] in \a mask determines the source for each element. /// /// \code{.operation} /// FOR element := 0 to 1 /// j := element*64 /// k := element*32 /// IF mask[j+63] == 0 /// result[j+63:j] := a[j+63:j] /// ELSE /// result[j+63:j] := Load64(m + SignExtend(i[k+31:k])*s) /// FI /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m128i _mm_mask_i32gather_epi64(__m128i a, const long long *m, __m128i i, /// __m128i mask, const int s); /// \endcode /// /// This intrinsic corresponds to the \c VPGATHERDQ instruction. /// /// \param a /// A 128-bit vector of [2 x i64] used as the source when a mask bit is /// zero. /// \param m /// A pointer to the memory used for loading values. /// \param i /// A 128-bit vector of [4 x i32] containing signed indexes into \a m. Only /// the first two elements are used. /// \param mask /// A 128-bit vector of [2 x i64] containing the mask. The most significant /// bit of each element in the mask vector represents the mask bits. If a /// mask bit is zero, the corresponding value from vector \a a is gathered; /// otherwise the value is loaded from memory. /// \param s /// A literal constant scale factor for the indexes in \a i. Must be /// 1, 2, 4, or 8. /// \returns A 128-bit vector of [2 x i64] containing the gathered values. #define _mm_mask_i32gather_epi64(a, m, i, mask, s) \ ((__m128i)__builtin_ia32_gatherd_q((__v2di)(__m128i)(a), \ (long long const *)(m), \ (__v4si)(__m128i)(i), \ (__v2di)(__m128i)(mask), (s))) /// Conditionally gathers four 64-bit integer values, either from the /// 256-bit vector of [4 x i64] in \a a, or from memory \a m using scaled /// indexes from the 128-bit vector of [4 x i32] in \a i. The 256-bit vector /// of [4 x i64] in \a mask determines the source for each element. /// /// \code{.operation} /// FOR element := 0 to 3 /// j := element*64 /// k := element*32 /// IF mask[j+63] == 0 /// result[j+63:j] := a[j+63:j] /// ELSE /// result[j+63:j] := Load64(m + SignExtend(i[k+31:k])*s) /// FI /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m256i _mm256_mask_i32gather_epi64(__m256i a, const long long *m, /// __m128i i, __m256i mask, const int s); /// \endcode /// /// This intrinsic corresponds to the \c VPGATHERDQ instruction. /// /// \param a /// A 256-bit vector of [4 x i64] used as the source when a mask bit is /// zero. /// \param m /// A pointer to the memory used for loading values. /// \param i /// A 128-bit vector of [4 x i32] containing signed indexes into \a m. /// \param mask /// A 256-bit vector of [4 x i64] containing the mask. The most significant /// bit of each element in the mask vector represents the mask bits. If a /// mask bit is zero, the corresponding value from vector \a a is gathered; /// otherwise the value is loaded from memory. /// \param s /// A literal constant scale factor for the indexes in \a i. Must be /// 1, 2, 4, or 8. /// \returns A 256-bit vector of [4 x i64] containing the gathered values. #define _mm256_mask_i32gather_epi64(a, m, i, mask, s) \ ((__m256i)__builtin_ia32_gatherd_q256((__v4di)(__m256i)(a), \ (long long const *)(m), \ (__v4si)(__m128i)(i), \ (__v4di)(__m256i)(mask), (s))) /// Conditionally gathers two 64-bit integer values, either from the /// 128-bit vector of [2 x i64] in \a a, or from memory \a m using scaled /// indexes from the 128-bit vector of [2 x i64] in \a i. The 128-bit vector /// of [2 x i64] in \a mask determines the source for each element. /// /// \code{.operation} /// FOR element := 0 to 1 /// j := element*64 /// k := element*64 /// IF mask[j+63] == 0 /// result[j+63:j] := a[j+63:j] /// ELSE /// result[j+63:j] := Load64(m + SignExtend(i[k+63:k])*s) /// FI /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m128i _mm_mask_i64gather_epi64(__m128i a, const long long *m, __m128i i, /// __m128i mask, const int s); /// \endcode /// /// This intrinsic corresponds to the \c VPGATHERQQ instruction. /// /// \param a /// A 128-bit vector of [2 x i64] used as the source when a mask bit is /// zero. /// \param m /// A pointer to the memory used for loading values. /// \param i /// A 128-bit vector of [2 x i64] containing signed indexes into \a m. /// \param mask /// A 128-bit vector of [2 x i64] containing the mask. The most significant /// bit of each element in the mask vector represents the mask bits. If a /// mask bit is zero, the corresponding value from vector \a a is gathered; /// otherwise the value is loaded from memory. /// \param s /// A literal constant scale factor for the indexes in \a i. Must be /// 1, 2, 4, or 8. /// \returns A 128-bit vector of [2 x i64] containing the gathered values. #define _mm_mask_i64gather_epi64(a, m, i, mask, s) \ ((__m128i)__builtin_ia32_gatherq_q((__v2di)(__m128i)(a), \ (long long const *)(m), \ (__v2di)(__m128i)(i), \ (__v2di)(__m128i)(mask), (s))) /// Conditionally gathers four 64-bit integer values, either from the /// 256-bit vector of [4 x i64] in \a a, or from memory \a m using scaled /// indexes from the 256-bit vector of [4 x i64] in \a i. The 256-bit vector /// of [4 x i64] in \a mask determines the source for each element. /// /// \code{.operation} /// FOR element := 0 to 3 /// j := element*64 /// k := element*64 /// IF mask[j+63] == 0 /// result[j+63:j] := a[j+63:j] /// ELSE /// result[j+63:j] := Load64(m + SignExtend(i[k+63:k])*s) /// FI /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m256i _mm256_mask_i64gather_epi64(__m256i a, const long long *m, /// __m256i i, __m256i mask, const int s); /// \endcode /// /// This intrinsic corresponds to the \c VPGATHERQQ instruction. /// /// \param a /// A 256-bit vector of [4 x i64] used as the source when a mask bit is /// zero. /// \param m /// A pointer to the memory used for loading values. /// \param i /// A 256-bit vector of [4 x i64] containing signed indexes into \a m. /// \param mask /// A 256-bit vector of [4 x i64] containing the mask. The most significant /// bit of each element in the mask vector represents the mask bits. If a /// mask bit is zero, the corresponding value from vector \a a is gathered; /// otherwise the value is loaded from memory. /// \param s /// A literal constant scale factor for the indexes in \a i. Must be /// 1, 2, 4, or 8. /// \returns A 256-bit vector of [4 x i64] containing the gathered values. #define _mm256_mask_i64gather_epi64(a, m, i, mask, s) \ ((__m256i)__builtin_ia32_gatherq_q256((__v4di)(__m256i)(a), \ (long long const *)(m), \ (__v4di)(__m256i)(i), \ (__v4di)(__m256i)(mask), (s))) /// Gathers two 64-bit floating-point values from memory \a m using scaled /// indexes from the 128-bit vector of [4 x i32] in \a i. /// /// \code{.operation} /// FOR element := 0 to 1 /// j := element*64 /// k := element*32 /// result[j+63:j] := Load64(m + SignExtend(i[k+31:k])*s) /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m128d _mm_i32gather_pd(const double *m, __m128i i, const int s); /// \endcode /// /// This intrinsic corresponds to the \c VGATHERDPD instruction. /// /// \param m /// A pointer to the memory used for loading values. /// \param i /// A 128-bit vector of [4 x i32] containing signed indexes into \a m. Only /// the first two elements are used. /// \param s /// A literal constant scale factor for the indexes in \a i. Must be /// 1, 2, 4, or 8. /// \returns A 128-bit vector of [2 x double] containing the gathered values. #define _mm_i32gather_pd(m, i, s) \ ((__m128d)__builtin_ia32_gatherd_pd((__v2df)_mm_undefined_pd(), \ (double const *)(m), \ (__v4si)(__m128i)(i), \ (__v2df)_mm_cmpeq_pd(_mm_setzero_pd(), \ _mm_setzero_pd()), \ (s))) /// Gathers four 64-bit floating-point values from memory \a m using scaled /// indexes from the 128-bit vector of [4 x i32] in \a i. /// /// \code{.operation} /// FOR element := 0 to 3 /// j := element*64 /// k := element*32 /// result[j+63:j] := Load64(m + SignExtend(i[k+31:k])*s) /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m256d _mm256_i32gather_pd(const double *m, __m128i i, const int s); /// \endcode /// /// This intrinsic corresponds to the \c VGATHERDPD instruction. /// /// \param m /// A pointer to the memory used for loading values. /// \param i /// A 128-bit vector of [4 x i32] containing signed indexes into \a m. /// \param s /// A literal constant scale factor for the indexes in \a i. Must be /// 1, 2, 4, or 8. /// \returns A 256-bit vector of [4 x double] containing the gathered values. #define _mm256_i32gather_pd(m, i, s) \ ((__m256d)__builtin_ia32_gatherd_pd256((__v4df)_mm256_undefined_pd(), \ (double const *)(m), \ (__v4si)(__m128i)(i), \ (__v4df)_mm256_cmp_pd(_mm256_setzero_pd(), \ _mm256_setzero_pd(), \ _CMP_EQ_OQ), \ (s))) /// Gathers two 64-bit floating-point values from memory \a m using scaled /// indexes from the 128-bit vector of [2 x i64] in \a i. /// /// \code{.operation} /// FOR element := 0 to 1 /// j := element*64 /// k := element*64 /// result[j+63:j] := Load64(m + SignExtend(i[k+63:k])*s) /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m128d _mm_i64gather_pd(const double *m, __m128i i, const int s); /// \endcode /// /// This intrinsic corresponds to the \c VGATHERQPD instruction. /// /// \param m /// A pointer to the memory used for loading values. /// \param i /// A 128-bit vector of [2 x i64] containing signed indexes into \a m. /// \param s /// A literal constant scale factor for the indexes in \a i. Must be /// 1, 2, 4, or 8. /// \returns A 128-bit vector of [2 x double] containing the gathered values. #define _mm_i64gather_pd(m, i, s) \ ((__m128d)__builtin_ia32_gatherq_pd((__v2df)_mm_undefined_pd(), \ (double const *)(m), \ (__v2di)(__m128i)(i), \ (__v2df)_mm_cmpeq_pd(_mm_setzero_pd(), \ _mm_setzero_pd()), \ (s))) /// Gathers four 64-bit floating-point values from memory \a m using scaled /// indexes from the 256-bit vector of [4 x i64] in \a i. /// /// \code{.operation} /// FOR element := 0 to 3 /// j := element*64 /// k := element*64 /// result[j+63:j] := Load64(m + SignExtend(i[k+63:k])*s) /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m256d _mm256_i64gather_pd(const double *m, __m256i i, const int s); /// \endcode /// /// This intrinsic corresponds to the \c VGATHERQPD instruction. /// /// \param m /// A pointer to the memory used for loading values. /// \param i /// A 256-bit vector of [4 x i64] containing signed indexes into \a m. /// \param s /// A literal constant scale factor for the indexes in \a i. Must be /// 1, 2, 4, or 8. /// \returns A 256-bit vector of [4 x double] containing the gathered values. #define _mm256_i64gather_pd(m, i, s) \ ((__m256d)__builtin_ia32_gatherq_pd256((__v4df)_mm256_undefined_pd(), \ (double const *)(m), \ (__v4di)(__m256i)(i), \ (__v4df)_mm256_cmp_pd(_mm256_setzero_pd(), \ _mm256_setzero_pd(), \ _CMP_EQ_OQ), \ (s))) /// Gathers four 32-bit floating-point values from memory \a m using scaled /// indexes from the 128-bit vector of [4 x i32] in \a i. /// /// \code{.operation} /// FOR element := 0 to 3 /// j := element*32 /// k := element*32 /// result[j+31:j] := Load32(m + SignExtend(i[k+31:k])*s) /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m128 _mm_i32gather_ps(const float *m, __m128i i, const int s); /// \endcode /// /// This intrinsic corresponds to the \c VGATHERDPS instruction. /// /// \param m /// A pointer to the memory used for loading values. /// \param i /// A 128-bit vector of [4 x i32] containing signed indexes into \a m. /// \param s /// A literal constant scale factor for the indexes in \a i. Must be /// 1, 2, 4, or 8. /// \returns A 128-bit vector of [4 x float] containing the gathered values. #define _mm_i32gather_ps(m, i, s) \ ((__m128)__builtin_ia32_gatherd_ps((__v4sf)_mm_undefined_ps(), \ (float const *)(m), \ (__v4si)(__m128i)(i), \ (__v4sf)_mm_cmpeq_ps(_mm_setzero_ps(), \ _mm_setzero_ps()), \ (s))) /// Gathers eight 32-bit floating-point values from memory \a m using scaled /// indexes from the 256-bit vector of [8 x i32] in \a i. /// /// \code{.operation} /// FOR element := 0 to 7 /// j := element*32 /// k := element*32 /// result[j+31:j] := Load32(m + SignExtend(i[k+31:k])*s) /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m256 _mm256_i32gather_ps(const float *m, __m256i i, const int s); /// \endcode /// /// This intrinsic corresponds to the \c VGATHERDPS instruction. /// /// \param m /// A pointer to the memory used for loading values. /// \param i /// A 256-bit vector of [8 x i32] containing signed indexes into \a m. /// \param s /// A literal constant scale factor for the indexes in \a i. Must be /// 1, 2, 4, or 8. /// \returns A 256-bit vector of [8 x float] containing the gathered values. #define _mm256_i32gather_ps(m, i, s) \ ((__m256)__builtin_ia32_gatherd_ps256((__v8sf)_mm256_undefined_ps(), \ (float const *)(m), \ (__v8si)(__m256i)(i), \ (__v8sf)_mm256_cmp_ps(_mm256_setzero_ps(), \ _mm256_setzero_ps(), \ _CMP_EQ_OQ), \ (s))) /// Gathers two 32-bit floating-point values from memory \a m using scaled /// indexes from the 128-bit vector of [2 x i64] in \a i. The upper two /// elements of the result are zeroed. /// /// \code{.operation} /// FOR element := 0 to 1 /// j := element*32 /// k := element*64 /// result[j+31:j] := Load32(m + SignExtend(i[k+63:k])*s) /// ENDFOR /// result[127:64] := 0 /// \endcode /// /// \headerfile /// /// \code /// __m128 _mm_i64gather_ps(const float *m, __m128i i, const int s); /// \endcode /// /// This intrinsic corresponds to the \c VGATHERQPS instruction. /// /// \param m /// A pointer to the memory used for loading values. /// \param i /// A 128-bit vector of [2 x i64] containing signed indexes into \a m. /// \param s /// A literal constant scale factor for the indexes in \a i. Must be /// 1, 2, 4, or 8. /// \returns A 128-bit vector of [4 x float] containing the gathered values. #define _mm_i64gather_ps(m, i, s) \ ((__m128)__builtin_ia32_gatherq_ps((__v4sf)_mm_undefined_ps(), \ (float const *)(m), \ (__v2di)(__m128i)(i), \ (__v4sf)_mm_cmpeq_ps(_mm_setzero_ps(), \ _mm_setzero_ps()), \ (s))) /// Gathers four 32-bit floating-point values from memory \a m using scaled /// indexes from the 256-bit vector of [4 x i64] in \a i. /// /// \code{.operation} /// FOR element := 0 to 3 /// j := element*32 /// k := element*64 /// result[j+31:j] := Load32(m + SignExtend(i[k+64:k])*s) /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m128 _mm256_i64gather_ps(const float *m, __m256i i, const int s); /// \endcode /// /// This intrinsic corresponds to the \c VGATHERQPS instruction. /// /// \param m /// A pointer to the memory used for loading values. /// \param i /// A 256-bit vector of [4 x i64] containing signed indexes into \a m. /// \param s /// A literal constant scale factor for the indexes in \a i. Must be /// 1, 2, 4, or 8. /// \returns A 128-bit vector of [4 x float] containing the gathered values. #define _mm256_i64gather_ps(m, i, s) \ ((__m128)__builtin_ia32_gatherq_ps256((__v4sf)_mm_undefined_ps(), \ (float const *)(m), \ (__v4di)(__m256i)(i), \ (__v4sf)_mm_cmpeq_ps(_mm_setzero_ps(), \ _mm_setzero_ps()), \ (s))) /// Gathers four 32-bit floating-point values from memory \a m using scaled /// indexes from the 128-bit vector of [4 x i32] in \a i. /// /// \code{.operation} /// FOR element := 0 to 3 /// j := element*32 /// k := element*32 /// result[j+31:j] := Load32(m + SignExtend(i[k+31:k])*s) /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m128i _mm_i32gather_epi32(const int *m, __m128i i, const int s); /// \endcode /// /// This intrinsic corresponds to the \c VPGATHERDD instruction. /// /// \param m /// A pointer to the memory used for loading values. /// \param i /// A 128-bit vector of [4 x i32] containing signed indexes into \a m. /// \param s /// A literal constant scale factor for the indexes in \a i. Must be /// 1, 2, 4, or 8. /// \returns A 128-bit vector of [4 x i32] containing the gathered values. #define _mm_i32gather_epi32(m, i, s) \ ((__m128i)__builtin_ia32_gatherd_d((__v4si)_mm_undefined_si128(), \ (int const *)(m), (__v4si)(__m128i)(i), \ (__v4si)_mm_set1_epi32(-1), (s))) /// Gathers eight 32-bit floating-point values from memory \a m using scaled /// indexes from the 256-bit vector of [8 x i32] in \a i. /// /// \code{.operation} /// FOR element := 0 to 7 /// j := element*32 /// k := element*32 /// result[j+31:j] := Load32(m + SignExtend(i[k+31:k])*s) /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m256i _mm256_i32gather_epi32(const int *m, __m256i i, const int s); /// \endcode /// /// This intrinsic corresponds to the \c VPGATHERDD instruction. /// /// \param m /// A pointer to the memory used for loading values. /// \param i /// A 256-bit vector of [8 x i32] containing signed indexes into \a m. /// \param s /// A literal constant scale factor for the indexes in \a i. Must be /// 1, 2, 4, or 8. /// \returns A 256-bit vector of [8 x i32] containing the gathered values. #define _mm256_i32gather_epi32(m, i, s) \ ((__m256i)__builtin_ia32_gatherd_d256((__v8si)_mm256_undefined_si256(), \ (int const *)(m), (__v8si)(__m256i)(i), \ (__v8si)_mm256_set1_epi32(-1), (s))) /// Gathers two 32-bit integer values from memory \a m using scaled indexes /// from the 128-bit vector of [2 x i64] in \a i. The upper two elements /// of the result are zeroed. /// /// \code{.operation} /// FOR element := 0 to 1 /// j := element*32 /// k := element*64 /// result[j+31:j] := Load32(m + SignExtend(i[k+63:k])*s) /// ENDFOR /// result[127:64] := 0 /// \endcode /// /// \headerfile /// /// \code /// __m128i _mm_i64gather_epi32(const int *m, __m128i i, const int s); /// \endcode /// /// This intrinsic corresponds to the \c VPGATHERQD instruction. /// /// \param m /// A pointer to the memory used for loading values. /// \param i /// A 128-bit vector of [2 x i64] containing signed indexes into \a m. /// \param s /// A literal constant scale factor for the indexes in \a i. Must be /// 1, 2, 4, or 8. /// \returns A 128-bit vector of [4 x i32] containing the gathered values. #define _mm_i64gather_epi32(m, i, s) \ ((__m128i)__builtin_ia32_gatherq_d((__v4si)_mm_undefined_si128(), \ (int const *)(m), (__v2di)(__m128i)(i), \ (__v4si)_mm_set1_epi32(-1), (s))) /// Gathers four 32-bit integer values from memory \a m using scaled indexes /// from the 256-bit vector of [4 x i64] in \a i. /// /// \code{.operation} /// FOR element := 0 to 3 /// j := element*32 /// k := element*64 /// result[j+31:j] := Load32(m + SignExtend(i[k+63:k])*s) /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m128i _mm256_i64gather_epi32(const int *m, __m256i i, const int s); /// \endcode /// /// This intrinsic corresponds to the \c VPGATHERQD instruction. /// /// \param m /// A pointer to the memory used for loading values. /// \param i /// A 256-bit vector of [4 x i64] containing signed indexes into \a m. /// \param s /// A literal constant scale factor for the indexes in \a i. Must be /// 1, 2, 4, or 8. /// \returns A 128-bit vector of [4 x i32] containing the gathered values. #define _mm256_i64gather_epi32(m, i, s) \ ((__m128i)__builtin_ia32_gatherq_d256((__v4si)_mm_undefined_si128(), \ (int const *)(m), (__v4di)(__m256i)(i), \ (__v4si)_mm_set1_epi32(-1), (s))) /// Gathers two 64-bit integer values from memory \a m using scaled indexes /// from the 128-bit vector of [4 x i32] in \a i. /// /// \code{.operation} /// FOR element := 0 to 1 /// j := element*64 /// k := element*32 /// result[j+63:j] := Load64(m + SignExtend(i[k+31:k])*s) /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m128i _mm_i32gather_epi64(const long long *m, __m128i i, const int s); /// \endcode /// /// This intrinsic corresponds to the \c VPGATHERDQ instruction. /// /// \param m /// A pointer to the memory used for loading values. /// \param i /// A 128-bit vector of [4 x i32] containing signed indexes into \a m. Only /// the first two elements are used. /// \param s /// A literal constant scale factor for the indexes in \a i. Must be /// 1, 2, 4, or 8. /// \returns A 128-bit vector of [2 x i64] containing the gathered values. #define _mm_i32gather_epi64(m, i, s) \ ((__m128i)__builtin_ia32_gatherd_q((__v2di)_mm_undefined_si128(), \ (long long const *)(m), \ (__v4si)(__m128i)(i), \ (__v2di)_mm_set1_epi64x(-1), (s))) /// Gathers four 64-bit integer values from memory \a m using scaled indexes /// from the 128-bit vector of [4 x i32] in \a i. /// /// \code{.operation} /// FOR element := 0 to 3 /// j := element*64 /// k := element*32 /// result[j+63:j] := Load64(m + SignExtend(i[k+31:k])*s) /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m256i _mm256_i32gather_epi64(const long long *m, __m128i i, const int s); /// \endcode /// /// This intrinsic corresponds to the \c VPGATHERDQ instruction. /// /// \param m /// A pointer to the memory used for loading values. /// \param i /// A 128-bit vector of [4 x i32] containing signed indexes into \a m. /// \param s /// A literal constant scale factor for the indexes in \a i. Must be /// 1, 2, 4, or 8. /// \returns A 256-bit vector of [4 x i64] containing the gathered values. #define _mm256_i32gather_epi64(m, i, s) \ ((__m256i)__builtin_ia32_gatherd_q256((__v4di)_mm256_undefined_si256(), \ (long long const *)(m), \ (__v4si)(__m128i)(i), \ (__v4di)_mm256_set1_epi64x(-1), (s))) /// Gathers two 64-bit integer values from memory \a m using scaled indexes /// from the 128-bit vector of [2 x i64] in \a i. /// /// \code{.operation} /// FOR element := 0 to 1 /// j := element*64 /// k := element*64 /// result[j+63:j] := Load64(m + SignExtend(i[k+63:k])*s) /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m128i _mm_i64gather_epi64(const long long *m, __m128i i, const int s); /// \endcode /// /// This intrinsic corresponds to the \c VPGATHERQQ instruction. /// /// \param m /// A pointer to the memory used for loading values. /// \param i /// A 128-bit vector of [2 x i64] containing signed indexes into \a m. /// \param s /// A literal constant scale factor for the indexes in \a i. Must be /// 1, 2, 4, or 8. /// \returns A 128-bit vector of [2 x i64] containing the gathered values. #define _mm_i64gather_epi64(m, i, s) \ ((__m128i)__builtin_ia32_gatherq_q((__v2di)_mm_undefined_si128(), \ (long long const *)(m), \ (__v2di)(__m128i)(i), \ (__v2di)_mm_set1_epi64x(-1), (s))) /// Gathers four 64-bit integer values from memory \a m using scaled indexes /// from the 256-bit vector of [4 x i64] in \a i. /// /// \code{.operation} /// FOR element := 0 to 3 /// j := element*64 /// k := element*64 /// result[j+63:j] := Load64(m + SignExtend(i[k+63:k])*s) /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m256i _mm256_i64gather_epi64(const long long *m, __m256i i, const int s); /// \endcode /// /// This intrinsic corresponds to the \c VPGATHERQQ instruction. /// /// \param m /// A pointer to the memory used for loading values. /// \param i /// A 256-bit vector of [4 x i64] containing signed indexes into \a m. /// \param s /// A literal constant scale factor for the indexes in \a i. Must be /// 1, 2, 4, or 8. /// \returns A 256-bit vector of [4 x i64] containing the gathered values. #define _mm256_i64gather_epi64(m, i, s) \ ((__m256i)__builtin_ia32_gatherq_q256((__v4di)_mm256_undefined_si256(), \ (long long const *)(m), \ (__v4di)(__m256i)(i), \ (__v4di)_mm256_set1_epi64x(-1), (s))) #undef __DEFAULT_FN_ATTRS256 #undef __DEFAULT_FN_ATTRS128 #endif /* __AVX2INTRIN_H */ avx512ifmavlintrin.h/*===------------- avx512ifmavlintrin.h - IFMA intrinsics ------------------=== * * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __IFMAVLINTRIN_H #define __IFMAVLINTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS128 \ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512ifma,avx512vl,no-evex512"), \ __min_vector_width__(128))) #define __DEFAULT_FN_ATTRS256 \ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512ifma,avx512vl,no-evex512"), \ __min_vector_width__(256))) #define _mm_madd52hi_epu64(X, Y, Z) \ ((__m128i)__builtin_ia32_vpmadd52huq128((__v2di)(X), (__v2di)(Y), \ (__v2di)(Z))) #define _mm256_madd52hi_epu64(X, Y, Z) \ ((__m256i)__builtin_ia32_vpmadd52huq256((__v4di)(X), (__v4di)(Y), \ (__v4di)(Z))) #define _mm_madd52lo_epu64(X, Y, Z) \ ((__m128i)__builtin_ia32_vpmadd52luq128((__v2di)(X), (__v2di)(Y), \ (__v2di)(Z))) #define _mm256_madd52lo_epu64(X, Y, Z) \ ((__m256i)__builtin_ia32_vpmadd52luq256((__v4di)(X), (__v4di)(Y), \ (__v4di)(Z))) static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_madd52hi_epu64 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectq_128(__M, (__v2di)_mm_madd52hi_epu64(__W, __X, __Y), (__v2di)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_madd52hi_epu64 (__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z) { return (__m128i)__builtin_ia32_selectq_128(__M, (__v2di)_mm_madd52hi_epu64(__X, __Y, __Z), (__v2di)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_madd52hi_epu64 (__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectq_256(__M, (__v4di)_mm256_madd52hi_epu64(__W, __X, __Y), (__v4di)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_madd52hi_epu64 (__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z) { return (__m256i)__builtin_ia32_selectq_256(__M, (__v4di)_mm256_madd52hi_epu64(__X, __Y, __Z), (__v4di)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_madd52lo_epu64 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectq_128(__M, (__v2di)_mm_madd52lo_epu64(__W, __X, __Y), (__v2di)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_madd52lo_epu64 (__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z) { return (__m128i)__builtin_ia32_selectq_128(__M, (__v2di)_mm_madd52lo_epu64(__X, __Y, __Z), (__v2di)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_madd52lo_epu64 (__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectq_256(__M, (__v4di)_mm256_madd52lo_epu64(__W, __X, __Y), (__v4di)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_madd52lo_epu64 (__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z) { return (__m256i)__builtin_ia32_selectq_256(__M, (__v4di)_mm256_madd52lo_epu64(__X, __Y, __Z), (__v4di)_mm256_setzero_si256()); } #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 #endif avx512vlvnniintrin.h/*===---- lzcntintrin.h - LZCNT intrinsics ---------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #if !defined __X86INTRIN_H && !defined __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __LZCNTINTRIN_H #define __LZCNTINTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("lzcnt"))) #ifndef _MSC_VER /// Counts the number of leading zero bits in the operand. /// /// \headerfile /// /// This intrinsic corresponds to the \c LZCNT instruction. /// /// \param __X /// An unsigned 16-bit integer whose leading zeros are to be counted. /// \returns An unsigned 16-bit integer containing the number of leading zero /// bits in the operand. #define __lzcnt16(X) __builtin_ia32_lzcnt_u16((unsigned short)(X)) #endif // _MSC_VER /// Counts the number of leading zero bits in the operand. /// /// \headerfile /// /// This intrinsic corresponds to the \c LZCNT instruction. /// /// \param __X /// An unsigned 32-bit integer whose leading zeros are to be counted. /// \returns An unsigned 32-bit integer containing the number of leading zero /// bits in the operand. /// \see _lzcnt_u32 static __inline__ unsigned int __DEFAULT_FN_ATTRS __lzcnt32(unsigned int __X) { return __builtin_ia32_lzcnt_u32(__X); } /// Counts the number of leading zero bits in the operand. /// /// \headerfile /// /// This intrinsic corresponds to the \c LZCNT instruction. /// /// \param __X /// An unsigned 32-bit integer whose leading zeros are to be counted. /// \returns An unsigned 32-bit integer containing the number of leading zero /// bits in the operand. /// \see __lzcnt32 static __inline__ unsigned int __DEFAULT_FN_ATTRS _lzcnt_u32(unsigned int __X) { return __builtin_ia32_lzcnt_u32(__X); } #ifdef __x86_64__ #ifndef _MSC_VER /// Counts the number of leading zero bits in the operand. /// /// \headerfile /// /// This intrinsic corresponds to the \c LZCNT instruction. /// /// \param __X /// An unsigned 64-bit integer whose leading zeros are to be counted. /// \returns An unsigned 64-bit integer containing the number of leading zero /// bits in the operand. /// \see _lzcnt_u64 #define __lzcnt64(X) __builtin_ia32_lzcnt_u64((unsigned long long)(X)) #endif // _MSC_VER /// Counts the number of leading zero bits in the operand. /// /// \headerfile /// /// This intrinsic corresponds to the \c LZCNT instruction. /// /// \param __X /// An unsigned 64-bit integer whose leading zeros are to be counted. /// \returns An unsigned 64-bit integer containing the number of leading zero /// bits in the operand. /// \see __lzcnt64 static __inline__ unsigned long long __DEFAULT_FN_ATTRS _lzcnt_u64(unsigned long long __X) { return __builtin_ia32_lzcnt_u64(__X); } #endif #undef __DEFAULT_FN_ATTRS #endif /* __LZCNTINTRIN_H */ pmmintrin.hriscv_crypto.h/*===----------------------- waitpkgintrin.h - WAITPKG --------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #if !defined __X86INTRIN_H && !defined __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __WAITPKGINTRIN_H #define __WAITPKGINTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("waitpkg"))) static __inline__ void __DEFAULT_FN_ATTRS _umonitor (void * __address) { __builtin_ia32_umonitor (__address); } static __inline__ unsigned char __DEFAULT_FN_ATTRS _umwait (unsigned int __control, unsigned long long __counter) { return __builtin_ia32_umwait (__control, (unsigned int)(__counter >> 32), (unsigned int)__counter); } static __inline__ unsigned char __DEFAULT_FN_ATTRS _tpause (unsigned int __control, unsigned long long __counter) { return __builtin_ia32_tpause (__control, (unsigned int)(__counter >> 32), (unsigned int)__counter); } #undef __DEFAULT_FN_ATTRS #endif /* __WAITPKGINTRIN_H */ /*===---- xsaveoptintrin.h - XSAVEOPT intrinsic ----------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __XSAVEOPTINTRIN_H #define __XSAVEOPTINTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("xsaveopt"))) static __inline__ void __DEFAULT_FN_ATTRS _xsaveopt(void *__p, unsigned long long __m) { __builtin_ia32_xsaveopt(__p, __m); } #ifdef __x86_64__ static __inline__ void __DEFAULT_FN_ATTRS _xsaveopt64(void *__p, unsigned long long __m) { __builtin_ia32_xsaveopt64(__p, __m); } #endif #undef __DEFAULT_FN_ATTRS #endif /*===---- xtestintrin.h - XTEST intrinsic ----------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __XTESTINTRIN_H #define __XTESTINTRIN_H /* xtest returns non-zero if the instruction is executed within an RTM or active * HLE region. */ /* FIXME: This can be an either or for RTM/HLE. Deal with this when HLE is * supported. */ static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rtm"))) _xtest(void) { return __builtin_ia32_xtest(); } #endif sanitizer/dfsan_interface.hsanitizer/linux_syscall_hooks.h//===-- sanitizer/memprof_interface.h --------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file is a part of MemProfiler (MemProf). // // Public interface header. //===----------------------------------------------------------------------===// #ifndef SANITIZER_MEMPROF_INTERFACE_H #define SANITIZER_MEMPROF_INTERFACE_H #include #ifdef __cplusplus extern "C" { #endif /// Records access to a memory region ([addr, addr+size)). /// /// This memory must be previously allocated by your program. /// /// \param addr Start of memory region. /// \param size Size of memory region. void SANITIZER_CDECL __memprof_record_access_range(void const volatile *addr, size_t size); /// Records access to a memory address addr. /// /// This memory must be previously allocated by your program. /// /// \param addr Accessed memory address void SANITIZER_CDECL __memprof_record_access(void const volatile *addr); /// User-provided callback on MemProf errors. /// /// You can provide a function that would be called immediately when MemProf /// detects an error. This is useful in cases when MemProf detects an error but /// your program crashes before the MemProf report is printed. void SANITIZER_CDECL __memprof_on_error(void); /// Prints accumulated statistics to stderr (useful for calling from the /// debugger). void SANITIZER_CDECL __memprof_print_accumulated_stats(void); /// User-provided default option settings. /// /// You can provide your own implementation of this function to return a string /// containing MemProf runtime options (for example, /// verbosity=1:print_stats=1). /// /// \returns Default options string. const char *SANITIZER_CDECL __memprof_default_options(void); /// Prints the memory profile to the current profile file. /// /// \returns 0 on success. int SANITIZER_CDECL __memprof_profile_dump(void); /// Closes the existing file descriptor, if it is valid and not stdout or /// stderr, and resets the internal state such that the profile filename is /// reopened on the next profile dump attempt. This can be used to enable /// multiple rounds of profiling on the same binary. void SANITIZER_CDECL __memprof_profile_reset(void); #ifdef __cplusplus } // extern "C" #endif #endif // SANITIZER_MEMPROF_INTERFACE_H //===-- sanitizer/scudo_interface.h -----------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // /// Public Scudo interface header. // //===----------------------------------------------------------------------===// #ifndef SANITIZER_SCUDO_INTERFACE_H_ #define SANITIZER_SCUDO_INTERFACE_H_ #include #ifdef __cplusplus extern "C" { #endif // This function may be optionally provided by a user and should return // a string containing Scudo runtime options. See scudo_flags.h for details. const char *SANITIZER_CDECL __scudo_default_options(void); // This function allows to set the RSS limit at runtime. This can be either // the hard limit (HardLimit=1) or the soft limit (HardLimit=0). The limit // can be removed by setting LimitMb to 0. This function's parameters should // be fully trusted to avoid security mishaps. void SANITIZER_CDECL __scudo_set_rss_limit(size_t LimitMb, int HardLimit); // This function outputs various allocator statistics for both the Primary // and Secondary allocators, including memory usage, number of allocations // and deallocations. void SANITIZER_CDECL __scudo_print_stats(void); #ifdef __cplusplus } // extern "C" #endif #endif // SANITIZER_SCUDO_INTERFACE_H_ //===-- tsan_interface.h ----------------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file is a part of ThreadSanitizer (TSan), a race detector. // // Public interface header for TSan. //===----------------------------------------------------------------------===// #ifndef SANITIZER_TSAN_INTERFACE_H #define SANITIZER_TSAN_INTERFACE_H #include #ifdef __cplusplus extern "C" { #endif // __tsan_release establishes a happens-before relation with a preceding // __tsan_acquire on the same address. void SANITIZER_CDECL __tsan_acquire(void *addr); void SANITIZER_CDECL __tsan_release(void *addr); // Annotations for custom mutexes. // The annotations allow to get better reports (with sets of locked mutexes), // detect more types of bugs (e.g. mutex misuses, races between lock/unlock and // destruction and potential deadlocks) and improve precision and performance // (by ignoring individual atomic operations in mutex code). However, the // downside is that annotated mutex code itself is not checked for correctness. // Mutex creation flags are passed to __tsan_mutex_create annotation. // If mutex has no constructor and __tsan_mutex_create is not called, // the flags may be passed to __tsan_mutex_pre_lock/__tsan_mutex_post_lock // annotations. // Mutex has static storage duration and no-op constructor and destructor. // This effectively makes tsan ignore destroy annotation. static const unsigned __tsan_mutex_linker_init = 1 << 0; // Mutex is write reentrant. static const unsigned __tsan_mutex_write_reentrant = 1 << 1; // Mutex is read reentrant. static const unsigned __tsan_mutex_read_reentrant = 1 << 2; // Mutex does not have static storage duration, and must not be used after // its destructor runs. The opposite of __tsan_mutex_linker_init. // If this flag is passed to __tsan_mutex_destroy, then the destruction // is ignored unless this flag was previously set on the mutex. static const unsigned __tsan_mutex_not_static = 1 << 8; // Mutex operation flags: // Denotes read lock operation. static const unsigned __tsan_mutex_read_lock = 1 << 3; // Denotes try lock operation. static const unsigned __tsan_mutex_try_lock = 1 << 4; // Denotes that a try lock operation has failed to acquire the mutex. static const unsigned __tsan_mutex_try_lock_failed = 1 << 5; // Denotes that the lock operation acquires multiple recursion levels. // Number of levels is passed in recursion parameter. // This is useful for annotation of e.g. Java builtin monitors, // for which wait operation releases all recursive acquisitions of the mutex. static const unsigned __tsan_mutex_recursive_lock = 1 << 6; // Denotes that the unlock operation releases all recursion levels. // Number of released levels is returned and later must be passed to // the corresponding __tsan_mutex_post_lock annotation. static const unsigned __tsan_mutex_recursive_unlock = 1 << 7; // Convenient composed constants. static const unsigned __tsan_mutex_try_read_lock = __tsan_mutex_read_lock | __tsan_mutex_try_lock; static const unsigned __tsan_mutex_try_read_lock_failed = __tsan_mutex_try_read_lock | __tsan_mutex_try_lock_failed; // Annotate creation of a mutex. // Supported flags: mutex creation flags. void SANITIZER_CDECL __tsan_mutex_create(void *addr, unsigned flags); // Annotate destruction of a mutex. // Supported flags: // - __tsan_mutex_linker_init // - __tsan_mutex_not_static void SANITIZER_CDECL __tsan_mutex_destroy(void *addr, unsigned flags); // Annotate start of lock operation. // Supported flags: // - __tsan_mutex_read_lock // - __tsan_mutex_try_lock // - all mutex creation flags void SANITIZER_CDECL __tsan_mutex_pre_lock(void *addr, unsigned flags); // Annotate end of lock operation. // Supported flags: // - __tsan_mutex_read_lock (must match __tsan_mutex_pre_lock) // - __tsan_mutex_try_lock (must match __tsan_mutex_pre_lock) // - __tsan_mutex_try_lock_failed // - __tsan_mutex_recursive_lock // - all mutex creation flags void SANITIZER_CDECL __tsan_mutex_post_lock(void *addr, unsigned flags, int recursion); // Annotate start of unlock operation. // Supported flags: // - __tsan_mutex_read_lock // - __tsan_mutex_recursive_unlock int SANITIZER_CDECL __tsan_mutex_pre_unlock(void *addr, unsigned flags); // Annotate end of unlock operation. // Supported flags: // - __tsan_mutex_read_lock (must match __tsan_mutex_pre_unlock) void SANITIZER_CDECL __tsan_mutex_post_unlock(void *addr, unsigned flags); // Annotate start/end of notify/signal/broadcast operation. // Supported flags: none. void SANITIZER_CDECL __tsan_mutex_pre_signal(void *addr, unsigned flags); void SANITIZER_CDECL __tsan_mutex_post_signal(void *addr, unsigned flags); // Annotate start/end of a region of code where lock/unlock/signal operation // diverts to do something else unrelated to the mutex. This can be used to // annotate, for example, calls into cooperative scheduler or contention // profiling code. // These annotations must be called only from within // __tsan_mutex_pre/post_lock, __tsan_mutex_pre/post_unlock, // __tsan_mutex_pre/post_signal regions. // Supported flags: none. void SANITIZER_CDECL __tsan_mutex_pre_divert(void *addr, unsigned flags); void SANITIZER_CDECL __tsan_mutex_post_divert(void *addr, unsigned flags); // Check that the current thread does not hold any mutexes, // report a bug report otherwise. void SANITIZER_CDECL __tsan_check_no_mutexes_held(); // External race detection API. // Can be used by non-instrumented libraries to detect when their objects are // being used in an unsafe manner. // - __tsan_external_read/__tsan_external_write annotates the logical reads // and writes of the object at the specified address. 'caller_pc' should // be the PC of the library user, which the library can obtain with e.g. // `__builtin_return_address(0)`. // - __tsan_external_register_tag registers a 'tag' with the specified name, // which is later used in read/write annotations to denote the object type // - __tsan_external_assign_tag can optionally mark a heap object with a tag void *SANITIZER_CDECL __tsan_external_register_tag(const char *object_type); void SANITIZER_CDECL __tsan_external_register_header(void *tag, const char *header); void SANITIZER_CDECL __tsan_external_assign_tag(void *addr, void *tag); void SANITIZER_CDECL __tsan_external_read(void *addr, void *caller_pc, void *tag); void SANITIZER_CDECL __tsan_external_write(void *addr, void *caller_pc, void *tag); // Fiber switching API. // - TSAN context for fiber can be created by __tsan_create_fiber // and freed by __tsan_destroy_fiber. // - TSAN context of current fiber or thread can be obtained // by calling __tsan_get_current_fiber. // - __tsan_switch_to_fiber should be called immediately before switch // to fiber, such as call of swapcontext. // - Fiber name can be set by __tsan_set_fiber_name. void *SANITIZER_CDECL __tsan_get_current_fiber(void); void *SANITIZER_CDECL __tsan_create_fiber(unsigned flags); void SANITIZER_CDECL __tsan_destroy_fiber(void *fiber); void SANITIZER_CDECL __tsan_switch_to_fiber(void *fiber, unsigned flags); void SANITIZER_CDECL __tsan_set_fiber_name(void *fiber, const char *name); // Flags for __tsan_switch_to_fiber: // Do not establish a happens-before relation between fibers static const unsigned __tsan_switch_to_fiber_no_sync = 1 << 0; // User-provided callback invoked on TSan initialization. void SANITIZER_CDECL __tsan_on_initialize(); // User-provided callback invoked on TSan shutdown. // `failed` - Nonzero if TSan did detect issues, zero otherwise. // Return `0` if TSan should exit as if no issues were detected. Return nonzero // if TSan should exit as if issues were detected. int SANITIZER_CDECL __tsan_on_finalize(int failed); // Release TSan internal memory in a best-effort manner. void SANITIZER_CDECL __tsan_flush_memory(); // User-provided default TSAN options. const char *SANITIZER_CDECL __tsan_default_options(void); // User-provided default TSAN suppressions. const char *SANITIZER_CDECL __tsan_default_suppressions(void); /// Returns a report's description. /// /// Returns a report's description (issue type), number of duplicate issues /// found, counts of array data (stack traces, memory operations, locations, /// mutexes, threads, unique thread IDs) and a stack trace of a sleep() /// call (if one was involved in the issue). /// /// \param report Opaque pointer to the current report. /// \param[out] description Report type description. /// \param[out] count Count of duplicate issues. /// \param[out] stack_count Count of stack traces. /// \param[out] mop_count Count of memory operations. /// \param[out] loc_count Count of locations. /// \param[out] mutex_count Count of mutexes. /// \param[out] thread_count Count of threads. /// \param[out] unique_tid_count Count of unique thread IDs. /// \param sleep_trace A buffer to store the stack trace of a sleep() /// call. /// \param trace_size Size in bytes of the trace buffer. /// \returns Returns 1 if successful, 0 if not. int SANITIZER_CDECL __tsan_get_report_data( void *report, const char **description, int *count, int *stack_count, int *mop_count, int *loc_count, int *mutex_count, int *thread_count, int *unique_tid_count, void **sleep_trace, unsigned long trace_size); /// Returns information about stack traces included in the report. /// /// \param report Opaque pointer to the current report. /// \param idx Index to the report's stacks. /// \param trace A buffer to store the stack trace. /// \param trace_size Size in bytes of the trace buffer. /// \returns Returns 1 if successful, 0 if not. int SANITIZER_CDECL __tsan_get_report_stack(void *report, unsigned long idx, void **trace, unsigned long trace_size); /// Returns information about memory operations included in the report. /// /// \param report Opaque pointer to the current report. /// \param idx Index to the report's memory operations. /// \param[out] tid Thread ID of the memory operation. /// \param[out] addr Address of the memory operation. /// \param[out] size Size of the memory operation. /// \param[out] write Write flag of the memory operation. /// \param[out] atomic Atomicity flag of the memory operation. /// \param trace A buffer to store the stack trace. /// \param trace_size Size in bytes of the trace buffer. /// \returns Returns 1 if successful, 0 if not. int SANITIZER_CDECL __tsan_get_report_mop(void *report, unsigned long idx, int *tid, void **addr, int *size, int *write, int *atomic, void **trace, unsigned long trace_size); /// Returns information about locations included in the report. /// /// \param report Opaque pointer to the current report. /// \param idx Index to the report's locations. /// \param[out] type Type of the location. /// \param[out] addr Address of the location. /// \param[out] start Start of the location. /// \param[out] size Size of the location. /// \param[out] tid Thread ID of the location. /// \param[out] fd File descriptor of the location. /// \param[out] suppressable Suppressable flag. /// \param trace A buffer to store the stack trace. /// \param trace_size Size in bytes of the trace buffer. /// \returns Returns 1 if successful, 0 if not. int SANITIZER_CDECL __tsan_get_report_loc(void *report, unsigned long idx, const char **type, void **addr, void **start, unsigned long *size, int *tid, int *fd, int *suppressable, void **trace, unsigned long trace_size); /// Returns information about mutexes included in the report. /// /// \param report Opaque pointer to the current report. /// \param idx Index to the report's mutexes. /// \param[out] mutex_id Id of the mutex. /// \param[out] addr Address of the mutex. /// \param[out] destroyed Destroyed mutex flag. /// \param trace A buffer to store the stack trace. /// \param trace_size Size in bytes of the trace buffer. /// \returns Returns 1 if successful, 0 if not. int SANITIZER_CDECL __tsan_get_report_mutex(void *report, unsigned long idx, uint64_t *mutex_id, void **addr, int *destroyed, void **trace, unsigned long trace_size); /// Returns information about threads included in the report. /// /// \param report Opaque pointer to the current report. /// \param idx Index to the report's threads. /// \param[out] tid Thread ID of the thread. /// \param[out] os_id Operating system's ID of the thread. /// \param[out] running Running flag of the thread. /// \param[out] name Name of the thread. /// \param[out] parent_tid ID of the parent thread. /// \param trace A buffer to store the stack trace. /// \param trace_size Size in bytes of the trace buffer. /// \returns Returns 1 if successful, 0 if not. int SANITIZER_CDECL __tsan_get_report_thread(void *report, unsigned long idx, int *tid, uint64_t *os_id, int *running, const char **name, int *parent_tid, void **trace, unsigned long trace_size); /// Returns information about unique thread IDs included in the report. /// /// \param report Opaque pointer to the current report. /// \param idx Index to the report's unique thread IDs. /// \param[out] tid Unique thread ID of the report. /// \returns Returns 1 if successful, 0 if not. int SANITIZER_CDECL __tsan_get_report_unique_tid(void *report, unsigned long idx, int *tid); /// Returns the current report. /// /// If TSan is currently reporting a detected issue on the current thread, /// returns an opaque pointer to the current report. Otherwise returns NULL. /// \returns An opaque pointer to the current report. Otherwise returns NULL. void *SANITIZER_CDECL __tsan_get_current_report(); #ifdef __cplusplus } // extern "C" #endif #endif // SANITIZER_TSAN_INTERFACE_H //===- xray_interface.h -----------------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file is a part of XRay, a dynamic runtime instrumentation system. // // APIs for controlling XRay functionality explicitly. //===----------------------------------------------------------------------===// #ifndef XRAY_XRAY_INTERFACE_H #define XRAY_XRAY_INTERFACE_H #include #include extern "C" { /// Synchronize this with AsmPrinter::SledKind in LLVM. enum XRayEntryType { ENTRY = 0, EXIT = 1, TAIL = 2, LOG_ARGS_ENTRY = 3, CUSTOM_EVENT = 4, TYPED_EVENT = 5, }; /// Provide a function to invoke for when instrumentation points are hit. This /// is a user-visible control surface that overrides the default implementation. /// The function provided should take the following arguments: /// /// - function id: an identifier that indicates the id of a function; this id /// is generated by xray; the mapping between the function id /// and the actual function pointer is available through /// __xray_table. /// - entry type: identifies what kind of instrumentation point was /// encountered (function entry, function exit, etc.). See the /// enum XRayEntryType for more details. /// /// The user handler must handle correctly spurious calls after this handler is /// removed or replaced with another handler, because it would be too costly for /// XRay runtime to avoid spurious calls. /// To prevent circular calling, the handler function itself and all its /// direct&indirect callees must not be instrumented with XRay, which can be /// achieved by marking them all with: __attribute__((xray_never_instrument)) /// /// Returns 1 on success, 0 on error. extern int __xray_set_handler(void (*entry)(int32_t, XRayEntryType)); /// This removes whatever the currently provided handler is. Returns 1 on /// success, 0 on error. extern int __xray_remove_handler(); /// Use XRay to log the first argument of each (instrumented) function call. /// When this function exits, all threads will have observed the effect and /// start logging their subsequent affected function calls (if patched). /// /// Returns 1 on success, 0 on error. extern int __xray_set_handler_arg1(void (*entry)(int32_t, XRayEntryType, uint64_t)); /// Disables the XRay handler used to log first arguments of function calls. /// Returns 1 on success, 0 on error. extern int __xray_remove_handler_arg1(); /// Provide a function to invoke when XRay encounters a custom event. extern int __xray_set_customevent_handler(void (*entry)(void *, std::size_t)); /// This removes whatever the currently provided custom event handler is. /// Returns 1 on success, 0 on error. extern int __xray_remove_customevent_handler(); /// Set a handler for xray typed event logging. The first parameter is a type /// identifier, the second is a payload, and the third is the payload size. /// NOTE: fdrLoggingHandleTypedEvent only supports uint16_t event type. extern int __xray_set_typedevent_handler(void (*entry)(size_t, const void *, size_t)); /// Removes the currently set typed event handler. /// Returns 1 on success, 0 on error. extern int __xray_remove_typedevent_handler(); extern uint16_t __xray_register_event_type(const char *event_type); enum XRayPatchingStatus { NOT_INITIALIZED = 0, SUCCESS = 1, ONGOING = 2, FAILED = 3, }; /// This tells XRay to patch the instrumentation points. See XRayPatchingStatus /// for possible result values. extern XRayPatchingStatus __xray_patch(); /// Reverses the effect of __xray_patch(). See XRayPatchingStatus for possible /// result values. extern XRayPatchingStatus __xray_unpatch(); /// This patches a specific function id. See XRayPatchingStatus for possible /// result values. extern XRayPatchingStatus __xray_patch_function(int32_t FuncId); /// This unpatches a specific function id. See XRayPatchingStatus for possible /// result values. extern XRayPatchingStatus __xray_unpatch_function(int32_t FuncId); /// This function returns the address of the function provided a valid function /// id. We return 0 if we encounter any error, even if 0 may be a valid function /// address. extern uintptr_t __xray_function_address(int32_t FuncId); /// This function returns the maximum valid function id. Returns 0 if we /// encounter errors (when there are no instrumented functions, etc.). extern size_t __xray_max_function_id(); /// Initialize the required XRay data structures. This is useful in cases where /// users want to control precisely when the XRay instrumentation data /// structures are initialized, for example when the XRay library is built with /// the XRAY_NO_PREINIT preprocessor definition. /// /// Calling __xray_init() more than once is safe across multiple threads. extern void __xray_init(); } // end extern "C" #endif // XRAY_XRAY_INTERFACE_H Couldn't seek fseek(handle, 0, 0) (fread(&content[0], size, 1, handle)=Missing a comma or '}' after an object member.string_view::substr..requested group [:^lower:][:^print:]njob_ = Unexpected opcode: Unexpected op in Regexp::Equal: [truncated]\x%02xGreekKhudawadiMasaram_GondiNewaclock_gettime() failed: (region->header.arena == arenasize % arena->pagesize == 0block not big enough for even one levelnext->header.magic == Magic(kMagicUnallocated, &next->header)(anonymous namespace)opsgeppOUT_OF_RANGEString cannot end with \Output pointer must not be nullptr.trueCordRepBtree::CheckValid() FAILED: %s0xcondition untrue on return from LockSlowwake_list != kPerThreadSynchNull#trans=/usr/share/zoneinfo/config/data/tzdata/external/boringssl/src/crypto/fipsmodule/bn/bn.cexternal/boringssl/src/crypto/fipsmodule/rsa/rsa_impl.cNIST P-521error:%08x:%s:OPENSSL_internal:%sPEM routinesPEMPKCS7ENGINE_LIBOCSPCOMP routinesexternal/boringssl/src/crypto/evp/evp.cMapKey::GetStringValuekythe.proto.common.Language.namekythe.proto.common.NodeInfo.FactsEntry.key[^-].*\.(s|S|sx)c-headerf95/root (1=#undef__clang_cuda_device_functions.h/*===---- __clang_cuda_device_functions.h - CUDA runtime support -----------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __CLANG_CUDA_DEVICE_FUNCTIONS_H__ #define __CLANG_CUDA_DEVICE_FUNCTIONS_H__ #ifndef __OPENMP_NVPTX__ #if CUDA_VERSION < 9000 #error This file is intended to be used with CUDA-9+ only. #endif #endif // __DEVICE__ is a helper macro with common set of attributes for the wrappers // we implement in this file. We need static in order to avoid emitting unused // functions and __forceinline__ helps inlining these wrappers at -O1. #pragma push_macro("__DEVICE__") #ifdef __OPENMP_NVPTX__ #define __DEVICE__ static __attribute__((always_inline, nothrow)) #else #define __DEVICE__ static __device__ __forceinline__ #endif __DEVICE__ int __all(int __a) { return __nvvm_vote_all(__a); } __DEVICE__ int __any(int __a) { return __nvvm_vote_any(__a); } __DEVICE__ unsigned int __ballot(int __a) { return __nvvm_vote_ballot(__a); } __DEVICE__ unsigned int __brev(unsigned int __a) { return __nv_brev(__a); } __DEVICE__ unsigned long long __brevll(unsigned long long __a) { return __nv_brevll(__a); } #if defined(__cplusplus) __DEVICE__ void __brkpt() { __asm__ __volatile__("brkpt;"); } __DEVICE__ void __brkpt(int __a) { __brkpt(); } #else __DEVICE__ void __attribute__((overloadable)) __brkpt(void) { __asm__ __volatile__("brkpt;"); } __DEVICE__ void __attribute__((overloadable)) __brkpt(int __a) { __brkpt(); } #endif __DEVICE__ unsigned int __byte_perm(unsigned int __a, unsigned int __b, unsigned int __c) { return __nv_byte_perm(__a, __b, __c); } __DEVICE__ int __clz(int __a) { return __nv_clz(__a); } __DEVICE__ int __clzll(long long __a) { return __nv_clzll(__a); } __DEVICE__ float __cosf(float __a) { return __nv_fast_cosf(__a); } __DEVICE__ double __dAtomicAdd(double *__p, double __v) { return __nvvm_atom_add_gen_d(__p, __v); } __DEVICE__ double __dAtomicAdd_block(double *__p, double __v) { return __nvvm_atom_cta_add_gen_d(__p, __v); } __DEVICE__ double __dAtomicAdd_system(double *__p, double __v) { return __nvvm_atom_sys_add_gen_d(__p, __v); } __DEVICE__ double __dadd_rd(double __a, double __b) { return __nv_dadd_rd(__a, __b); } __DEVICE__ double __dadd_rn(double __a, double __b) { return __nv_dadd_rn(__a, __b); } __DEVICE__ double __dadd_ru(double __a, double __b) { return __nv_dadd_ru(__a, __b); } __DEVICE__ double __dadd_rz(double __a, double __b) { return __nv_dadd_rz(__a, __b); } __DEVICE__ double __ddiv_rd(double __a, double __b) { return __nv_ddiv_rd(__a, __b); } __DEVICE__ double __ddiv_rn(double __a, double __b) { return __nv_ddiv_rn(__a, __b); } __DEVICE__ double __ddiv_ru(double __a, double __b) { return __nv_ddiv_ru(__a, __b); } __DEVICE__ double __ddiv_rz(double __a, double __b) { return __nv_ddiv_rz(__a, __b); } __DEVICE__ double __dmul_rd(double __a, double __b) { return __nv_dmul_rd(__a, __b); } __DEVICE__ double __dmul_rn(double __a, double __b) { return __nv_dmul_rn(__a, __b); } __DEVICE__ double __dmul_ru(double __a, double __b) { return __nv_dmul_ru(__a, __b); } __DEVICE__ double __dmul_rz(double __a, double __b) { return __nv_dmul_rz(__a, __b); } __DEVICE__ float __double2float_rd(double __a) { return __nv_double2float_rd(__a); } __DEVICE__ float __double2float_rn(double __a) { return __nv_double2float_rn(__a); } __DEVICE__ float __double2float_ru(double __a) { return __nv_double2float_ru(__a); } __DEVICE__ float __double2float_rz(double __a) { return __nv_double2float_rz(__a); } __DEVICE__ int __double2hiint(double __a) { return __nv_double2hiint(__a); } __DEVICE__ int __double2int_rd(double __a) { return __nv_double2int_rd(__a); } __DEVICE__ int __double2int_rn(double __a) { return __nv_double2int_rn(__a); } __DEVICE__ int __double2int_ru(double __a) { return __nv_double2int_ru(__a); } __DEVICE__ int __double2int_rz(double __a) { return __nv_double2int_rz(__a); } __DEVICE__ long long __double2ll_rd(double __a) { return __nv_double2ll_rd(__a); } __DEVICE__ long long __double2ll_rn(double __a) { return __nv_double2ll_rn(__a); } __DEVICE__ long long __double2ll_ru(double __a) { return __nv_double2ll_ru(__a); } __DEVICE__ long long __double2ll_rz(double __a) { return __nv_double2ll_rz(__a); } __DEVICE__ int __double2loint(double __a) { return __nv_double2loint(__a); } __DEVICE__ unsigned int __double2uint_rd(double __a) { return __nv_double2uint_rd(__a); } __DEVICE__ unsigned int __double2uint_rn(double __a) { return __nv_double2uint_rn(__a); } __DEVICE__ unsigned int __double2uint_ru(double __a) { return __nv_double2uint_ru(__a); } __DEVICE__ unsigned int __double2uint_rz(double __a) { return __nv_double2uint_rz(__a); } __DEVICE__ unsigned long long __double2ull_rd(double __a) { return __nv_double2ull_rd(__a); } __DEVICE__ unsigned long long __double2ull_rn(double __a) { return __nv_double2ull_rn(__a); } __DEVICE__ unsigned long long __double2ull_ru(double __a) { return __nv_double2ull_ru(__a); } __DEVICE__ unsigned long long __double2ull_rz(double __a) { return __nv_double2ull_rz(__a); } __DEVICE__ long long __double_as_longlong(double __a) { return __nv_double_as_longlong(__a); } __DEVICE__ double __drcp_rd(double __a) { return __nv_drcp_rd(__a); } __DEVICE__ double __drcp_rn(double __a) { return __nv_drcp_rn(__a); } __DEVICE__ double __drcp_ru(double __a) { return __nv_drcp_ru(__a); } __DEVICE__ double __drcp_rz(double __a) { return __nv_drcp_rz(__a); } __DEVICE__ double __dsqrt_rd(double __a) { return __nv_dsqrt_rd(__a); } __DEVICE__ double __dsqrt_rn(double __a) { return __nv_dsqrt_rn(__a); } __DEVICE__ double __dsqrt_ru(double __a) { return __nv_dsqrt_ru(__a); } __DEVICE__ double __dsqrt_rz(double __a) { return __nv_dsqrt_rz(__a); } __DEVICE__ double __dsub_rd(double __a, double __b) { return __nv_dsub_rd(__a, __b); } __DEVICE__ double __dsub_rn(double __a, double __b) { return __nv_dsub_rn(__a, __b); } __DEVICE__ double __dsub_ru(double __a, double __b) { return __nv_dsub_ru(__a, __b); } __DEVICE__ double __dsub_rz(double __a, double __b) { return __nv_dsub_rz(__a, __b); } __DEVICE__ float __exp10f(float __a) { return __nv_fast_exp10f(__a); } __DEVICE__ float __expf(float __a) { return __nv_fast_expf(__a); } __DEVICE__ float __fAtomicAdd(float *__p, float __v) { return __nvvm_atom_add_gen_f(__p, __v); } __DEVICE__ float __fAtomicAdd_block(float *__p, float __v) { return __nvvm_atom_cta_add_gen_f(__p, __v); } __DEVICE__ float __fAtomicAdd_system(float *__p, float __v) { return __nvvm_atom_sys_add_gen_f(__p, __v); } __DEVICE__ float __fAtomicExch(float *__p, float __v) { return __nv_int_as_float( __nvvm_atom_xchg_gen_i((int *)__p, __nv_float_as_int(__v))); } __DEVICE__ float __fAtomicExch_block(float *__p, float __v) { return __nv_int_as_float( __nvvm_atom_cta_xchg_gen_i((int *)__p, __nv_float_as_int(__v))); } __DEVICE__ float __fAtomicExch_system(float *__p, float __v) { return __nv_int_as_float( __nvvm_atom_sys_xchg_gen_i((int *)__p, __nv_float_as_int(__v))); } __DEVICE__ float __fadd_rd(float __a, float __b) { return __nv_fadd_rd(__a, __b); } __DEVICE__ float __fadd_rn(float __a, float __b) { return __nv_fadd_rn(__a, __b); } __DEVICE__ float __fadd_ru(float __a, float __b) { return __nv_fadd_ru(__a, __b); } __DEVICE__ float __fadd_rz(float __a, float __b) { return __nv_fadd_rz(__a, __b); } __DEVICE__ float __fdiv_rd(float __a, float __b) { return __nv_fdiv_rd(__a, __b); } __DEVICE__ float __fdiv_rn(float __a, float __b) { return __nv_fdiv_rn(__a, __b); } __DEVICE__ float __fdiv_ru(float __a, float __b) { return __nv_fdiv_ru(__a, __b); } __DEVICE__ float __fdiv_rz(float __a, float __b) { return __nv_fdiv_rz(__a, __b); } __DEVICE__ float __fdividef(float __a, float __b) { return __nv_fast_fdividef(__a, __b); } __DEVICE__ int __ffs(int __a) { return __nv_ffs(__a); } __DEVICE__ int __ffsll(long long __a) { return __nv_ffsll(__a); } __DEVICE__ int __finite(double __a) { return __nv_isfinited(__a); } __DEVICE__ int __finitef(float __a) { return __nv_finitef(__a); } #ifdef _MSC_VER __DEVICE__ int __finitel(long double __a); #endif __DEVICE__ int __float2int_rd(float __a) { return __nv_float2int_rd(__a); } __DEVICE__ int __float2int_rn(float __a) { return __nv_float2int_rn(__a); } __DEVICE__ int __float2int_ru(float __a) { return __nv_float2int_ru(__a); } __DEVICE__ int __float2int_rz(float __a) { return __nv_float2int_rz(__a); } __DEVICE__ long long __float2ll_rd(float __a) { return __nv_float2ll_rd(__a); } __DEVICE__ long long __float2ll_rn(float __a) { return __nv_float2ll_rn(__a); } __DEVICE__ long long __float2ll_ru(float __a) { return __nv_float2ll_ru(__a); } __DEVICE__ long long __float2ll_rz(float __a) { return __nv_float2ll_rz(__a); } __DEVICE__ unsigned int __float2uint_rd(float __a) { return __nv_float2uint_rd(__a); } __DEVICE__ unsigned int __float2uint_rn(float __a) { return __nv_float2uint_rn(__a); } __DEVICE__ unsigned int __float2uint_ru(float __a) { return __nv_float2uint_ru(__a); } __DEVICE__ unsigned int __float2uint_rz(float __a) { return __nv_float2uint_rz(__a); } __DEVICE__ unsigned long long __float2ull_rd(float __a) { return __nv_float2ull_rd(__a); } __DEVICE__ unsigned long long __float2ull_rn(float __a) { return __nv_float2ull_rn(__a); } __DEVICE__ unsigned long long __float2ull_ru(float __a) { return __nv_float2ull_ru(__a); } __DEVICE__ unsigned long long __float2ull_rz(float __a) { return __nv_float2ull_rz(__a); } __DEVICE__ int __float_as_int(float __a) { return __nv_float_as_int(__a); } __DEVICE__ unsigned int __float_as_uint(float __a) { return __nv_float_as_uint(__a); } __DEVICE__ double __fma_rd(double __a, double __b, double __c) { return __nv_fma_rd(__a, __b, __c); } __DEVICE__ double __fma_rn(double __a, double __b, double __c) { return __nv_fma_rn(__a, __b, __c); } __DEVICE__ double __fma_ru(double __a, double __b, double __c) { return __nv_fma_ru(__a, __b, __c); } __DEVICE__ double __fma_rz(double __a, double __b, double __c) { return __nv_fma_rz(__a, __b, __c); } __DEVICE__ float __fmaf_ieee_rd(float __a, float __b, float __c) { return __nv_fmaf_ieee_rd(__a, __b, __c); } __DEVICE__ float __fmaf_ieee_rn(float __a, float __b, float __c) { return __nv_fmaf_ieee_rn(__a, __b, __c); } __DEVICE__ float __fmaf_ieee_ru(float __a, float __b, float __c) { return __nv_fmaf_ieee_ru(__a, __b, __c); } __DEVICE__ float __fmaf_ieee_rz(float __a, float __b, float __c) { return __nv_fmaf_ieee_rz(__a, __b, __c); } __DEVICE__ float __fmaf_rd(float __a, float __b, float __c) { return __nv_fmaf_rd(__a, __b, __c); } __DEVICE__ float __fmaf_rn(float __a, float __b, float __c) { return __nv_fmaf_rn(__a, __b, __c); } __DEVICE__ float __fmaf_ru(float __a, float __b, float __c) { return __nv_fmaf_ru(__a, __b, __c); } __DEVICE__ float __fmaf_rz(float __a, float __b, float __c) { return __nv_fmaf_rz(__a, __b, __c); } __DEVICE__ float __fmul_rd(float __a, float __b) { return __nv_fmul_rd(__a, __b); } __DEVICE__ float __fmul_rn(float __a, float __b) { return __nv_fmul_rn(__a, __b); } __DEVICE__ float __fmul_ru(float __a, float __b) { return __nv_fmul_ru(__a, __b); } __DEVICE__ float __fmul_rz(float __a, float __b) { return __nv_fmul_rz(__a, __b); } __DEVICE__ float __frcp_rd(float __a) { return __nv_frcp_rd(__a); } __DEVICE__ float __frcp_rn(float __a) { return __nv_frcp_rn(__a); } __DEVICE__ float __frcp_ru(float __a) { return __nv_frcp_ru(__a); } __DEVICE__ float __frcp_rz(float __a) { return __nv_frcp_rz(__a); } __DEVICE__ float __frsqrt_rn(float __a) { return __nv_frsqrt_rn(__a); } __DEVICE__ float __fsqrt_rd(float __a) { return __nv_fsqrt_rd(__a); } __DEVICE__ float __fsqrt_rn(float __a) { return __nv_fsqrt_rn(__a); } __DEVICE__ float __fsqrt_ru(float __a) { return __nv_fsqrt_ru(__a); } __DEVICE__ float __fsqrt_rz(float __a) { return __nv_fsqrt_rz(__a); } __DEVICE__ float __fsub_rd(float __a, float __b) { return __nv_fsub_rd(__a, __b); } __DEVICE__ float __fsub_rn(float __a, float __b) { return __nv_fsub_rn(__a, __b); } __DEVICE__ float __fsub_ru(float __a, float __b) { return __nv_fsub_ru(__a, __b); } __DEVICE__ float __fsub_rz(float __a, float __b) { return __nv_fsub_rz(__a, __b); } __DEVICE__ int __hadd(int __a, int __b) { return __nv_hadd(__a, __b); } __DEVICE__ double __hiloint2double(int __a, int __b) { return __nv_hiloint2double(__a, __b); } __DEVICE__ int __iAtomicAdd(int *__p, int __v) { return __nvvm_atom_add_gen_i(__p, __v); } __DEVICE__ int __iAtomicAdd_block(int *__p, int __v) { return __nvvm_atom_cta_add_gen_i(__p, __v); } __DEVICE__ int __iAtomicAdd_system(int *__p, int __v) { return __nvvm_atom_sys_add_gen_i(__p, __v); } __DEVICE__ int __iAtomicAnd(int *__p, int __v) { return __nvvm_atom_and_gen_i(__p, __v); } __DEVICE__ int __iAtomicAnd_block(int *__p, int __v) { return __nvvm_atom_cta_and_gen_i(__p, __v); } __DEVICE__ int __iAtomicAnd_system(int *__p, int __v) { return __nvvm_atom_sys_and_gen_i(__p, __v); } __DEVICE__ int __iAtomicCAS(int *__p, int __cmp, int __v) { return __nvvm_atom_cas_gen_i(__p, __cmp, __v); } __DEVICE__ int __iAtomicCAS_block(int *__p, int __cmp, int __v) { return __nvvm_atom_cta_cas_gen_i(__p, __cmp, __v); } __DEVICE__ int __iAtomicCAS_system(int *__p, int __cmp, int __v) { return __nvvm_atom_sys_cas_gen_i(__p, __cmp, __v); } __DEVICE__ int __iAtomicExch(int *__p, int __v) { return __nvvm_atom_xchg_gen_i(__p, __v); } __DEVICE__ int __iAtomicExch_block(int *__p, int __v) { return __nvvm_atom_cta_xchg_gen_i(__p, __v); } __DEVICE__ int __iAtomicExch_system(int *__p, int __v) { return __nvvm_atom_sys_xchg_gen_i(__p, __v); } __DEVICE__ int __iAtomicMax(int *__p, int __v) { return __nvvm_atom_max_gen_i(__p, __v); } __DEVICE__ int __iAtomicMax_block(int *__p, int __v) { return __nvvm_atom_cta_max_gen_i(__p, __v); } __DEVICE__ int __iAtomicMax_system(int *__p, int __v) { return __nvvm_atom_sys_max_gen_i(__p, __v); } __DEVICE__ int __iAtomicMin(int *__p, int __v) { return __nvvm_atom_min_gen_i(__p, __v); } __DEVICE__ int __iAtomicMin_block(int *__p, int __v) { return __nvvm_atom_cta_min_gen_i(__p, __v); } __DEVICE__ int __iAtomicMin_system(int *__p, int __v) { return __nvvm_atom_sys_min_gen_i(__p, __v); } __DEVICE__ int __iAtomicOr(int *__p, int __v) { return __nvvm_atom_or_gen_i(__p, __v); } __DEVICE__ int __iAtomicOr_block(int *__p, int __v) { return __nvvm_atom_cta_or_gen_i(__p, __v); } __DEVICE__ int __iAtomicOr_system(int *__p, int __v) { return __nvvm_atom_sys_or_gen_i(__p, __v); } __DEVICE__ int __iAtomicXor(int *__p, int __v) { return __nvvm_atom_xor_gen_i(__p, __v); } __DEVICE__ int __iAtomicXor_block(int *__p, int __v) { return __nvvm_atom_cta_xor_gen_i(__p, __v); } __DEVICE__ int __iAtomicXor_system(int *__p, int __v) { return __nvvm_atom_sys_xor_gen_i(__p, __v); } __DEVICE__ long long __illAtomicMax(long long *__p, long long __v) { return __nvvm_atom_max_gen_ll(__p, __v); } __DEVICE__ long long __illAtomicMax_block(long long *__p, long long __v) { return __nvvm_atom_cta_max_gen_ll(__p, __v); } __DEVICE__ long long __illAtomicMax_system(long long *__p, long long __v) { return __nvvm_atom_sys_max_gen_ll(__p, __v); } __DEVICE__ long long __illAtomicMin(long long *__p, long long __v) { return __nvvm_atom_min_gen_ll(__p, __v); } __DEVICE__ long long __illAtomicMin_block(long long *__p, long long __v) { return __nvvm_atom_cta_min_gen_ll(__p, __v); } __DEVICE__ long long __illAtomicMin_system(long long *__p, long long __v) { return __nvvm_atom_sys_min_gen_ll(__p, __v); } __DEVICE__ double __int2double_rn(int __a) { return __nv_int2double_rn(__a); } __DEVICE__ float __int2float_rd(int __a) { return __nv_int2float_rd(__a); } __DEVICE__ float __int2float_rn(int __a) { return __nv_int2float_rn(__a); } __DEVICE__ float __int2float_ru(int __a) { return __nv_int2float_ru(__a); } __DEVICE__ float __int2float_rz(int __a) { return __nv_int2float_rz(__a); } __DEVICE__ float __int_as_float(int __a) { return __nv_int_as_float(__a); } __DEVICE__ int __isfinited(double __a) { return __nv_isfinited(__a); } __DEVICE__ int __isinf(double __a) { return __nv_isinfd(__a); } __DEVICE__ int __isinff(float __a) { return __nv_isinff(__a); } #ifdef _MSC_VER __DEVICE__ int __isinfl(long double __a); #endif __DEVICE__ int __isnan(double __a) { return __nv_isnand(__a); } __DEVICE__ int __isnanf(float __a) { return __nv_isnanf(__a); } #ifdef _MSC_VER __DEVICE__ int __isnanl(long double __a); #endif __DEVICE__ double __ll2double_rd(long long __a) { return __nv_ll2double_rd(__a); } __DEVICE__ double __ll2double_rn(long long __a) { return __nv_ll2double_rn(__a); } __DEVICE__ double __ll2double_ru(long long __a) { return __nv_ll2double_ru(__a); } __DEVICE__ double __ll2double_rz(long long __a) { return __nv_ll2double_rz(__a); } __DEVICE__ float __ll2float_rd(long long __a) { return __nv_ll2float_rd(__a); } __DEVICE__ float __ll2float_rn(long long __a) { return __nv_ll2float_rn(__a); } __DEVICE__ float __ll2float_ru(long long __a) { return __nv_ll2float_ru(__a); } __DEVICE__ float __ll2float_rz(long long __a) { return __nv_ll2float_rz(__a); } __DEVICE__ long long __llAtomicAnd(long long *__p, long long __v) { return __nvvm_atom_and_gen_ll(__p, __v); } __DEVICE__ long long __llAtomicAnd_block(long long *__p, long long __v) { return __nvvm_atom_cta_and_gen_ll(__p, __v); } __DEVICE__ long long __llAtomicAnd_system(long long *__p, long long __v) { return __nvvm_atom_sys_and_gen_ll(__p, __v); } __DEVICE__ long long __llAtomicOr(long long *__p, long long __v) { return __nvvm_atom_or_gen_ll(__p, __v); } __DEVICE__ long long __llAtomicOr_block(long long *__p, long long __v) { return __nvvm_atom_cta_or_gen_ll(__p, __v); } __DEVICE__ long long __llAtomicOr_system(long long *__p, long long __v) { return __nvvm_atom_sys_or_gen_ll(__p, __v); } __DEVICE__ long long __llAtomicXor(long long *__p, long long __v) { return __nvvm_atom_xor_gen_ll(__p, __v); } __DEVICE__ long long __llAtomicXor_block(long long *__p, long long __v) { return __nvvm_atom_cta_xor_gen_ll(__p, __v); } __DEVICE__ long long __llAtomicXor_system(long long *__p, long long __v) { return __nvvm_atom_sys_xor_gen_ll(__p, __v); } __DEVICE__ float __log10f(float __a) { return __nv_fast_log10f(__a); } __DEVICE__ float __log2f(float __a) { return __nv_fast_log2f(__a); } __DEVICE__ float __logf(float __a) { return __nv_fast_logf(__a); } __DEVICE__ double __longlong_as_double(long long __a) { return __nv_longlong_as_double(__a); } __DEVICE__ int __mul24(int __a, int __b) { return __nv_mul24(__a, __b); } __DEVICE__ long long __mul64hi(long long __a, long long __b) { return __nv_mul64hi(__a, __b); } __DEVICE__ int __mulhi(int __a, int __b) { return __nv_mulhi(__a, __b); } __DEVICE__ unsigned int __pm0(void) { return __nvvm_read_ptx_sreg_pm0(); } __DEVICE__ unsigned int __pm1(void) { return __nvvm_read_ptx_sreg_pm1(); } __DEVICE__ unsigned int __pm2(void) { return __nvvm_read_ptx_sreg_pm2(); } __DEVICE__ unsigned int __pm3(void) { return __nvvm_read_ptx_sreg_pm3(); } __DEVICE__ int __popc(unsigned int __a) { return __nv_popc(__a); } __DEVICE__ int __popcll(unsigned long long __a) { return __nv_popcll(__a); } __DEVICE__ float __powf(float __a, float __b) { return __nv_fast_powf(__a, __b); } // Parameter must have a known integer value. #define __prof_trigger(__a) __asm__ __volatile__("pmevent \t%0;" ::"i"(__a)) __DEVICE__ int __rhadd(int __a, int __b) { return __nv_rhadd(__a, __b); } __DEVICE__ unsigned int __sad(int __a, int __b, unsigned int __c) { return __nv_sad(__a, __b, __c); } __DEVICE__ float __saturatef(float __a) { return __nv_saturatef(__a); } __DEVICE__ int __signbitd(double __a) { return __nv_signbitd(__a); } __DEVICE__ int __signbitf(float __a) { return __nv_signbitf(__a); } __DEVICE__ void __sincosf(float __a, float *__s, float *__c) { return __nv_fast_sincosf(__a, __s, __c); } __DEVICE__ float __sinf(float __a) { return __nv_fast_sinf(__a); } __DEVICE__ int __syncthreads_and(int __a) { return __nvvm_bar0_and(__a); } __DEVICE__ int __syncthreads_count(int __a) { return __nvvm_bar0_popc(__a); } __DEVICE__ int __syncthreads_or(int __a) { return __nvvm_bar0_or(__a); } __DEVICE__ float __tanf(float __a) { return __nv_fast_tanf(__a); } __DEVICE__ void __threadfence(void) { __nvvm_membar_gl(); } __DEVICE__ void __threadfence_block(void) { __nvvm_membar_cta(); }; __DEVICE__ void __threadfence_system(void) { __nvvm_membar_sys(); }; __DEVICE__ void __trap(void) { __asm__ __volatile__("trap;"); } __DEVICE__ unsigned int __uAtomicAdd(unsigned int *__p, unsigned int __v) { return __nvvm_atom_add_gen_i((int *)__p, __v); } __DEVICE__ unsigned int __uAtomicAdd_block(unsigned int *__p, unsigned int __v) { return __nvvm_atom_cta_add_gen_i((int *)__p, __v); } __DEVICE__ unsigned int __uAtomicAdd_system(unsigned int *__p, unsigned int __v) { return __nvvm_atom_sys_add_gen_i((int *)__p, __v); } __DEVICE__ unsigned int __uAtomicAnd(unsigned int *__p, unsigned int __v) { return __nvvm_atom_and_gen_i((int *)__p, __v); } __DEVICE__ unsigned int __uAtomicAnd_block(unsigned int *__p, unsigned int __v) { return __nvvm_atom_cta_and_gen_i((int *)__p, __v); } __DEVICE__ unsigned int __uAtomicAnd_system(unsigned int *__p, unsigned int __v) { return __nvvm_atom_sys_and_gen_i((int *)__p, __v); } __DEVICE__ unsigned int __uAtomicCAS(unsigned int *__p, unsigned int __cmp, unsigned int __v) { return __nvvm_atom_cas_gen_i((int *)__p, __cmp, __v); } __DEVICE__ unsigned int __uAtomicCAS_block(unsigned int *__p, unsigned int __cmp, unsigned int __v) { return __nvvm_atom_cta_cas_gen_i((int *)__p, __cmp, __v); } __DEVICE__ unsigned int __uAtomicCAS_system(unsigned int *__p, unsigned int __cmp, unsigned int __v) { return __nvvm_atom_sys_cas_gen_i((int *)__p, __cmp, __v); } __DEVICE__ unsigned int __uAtomicDec(unsigned int *__p, unsigned int __v) { return __nvvm_atom_dec_gen_ui(__p, __v); } __DEVICE__ unsigned int __uAtomicDec_block(unsigned int *__p, unsigned int __v) { return __nvvm_atom_cta_dec_gen_ui(__p, __v); } __DEVICE__ unsigned int __uAtomicDec_system(unsigned int *__p, unsigned int __v) { return __nvvm_atom_sys_dec_gen_ui(__p, __v); } __DEVICE__ unsigned int __uAtomicExch(unsigned int *__p, unsigned int __v) { return __nvvm_atom_xchg_gen_i((int *)__p, __v); } __DEVICE__ unsigned int __uAtomicExch_block(unsigned int *__p, unsigned int __v) { return __nvvm_atom_cta_xchg_gen_i((int *)__p, __v); } __DEVICE__ unsigned int __uAtomicExch_system(unsigned int *__p, unsigned int __v) { return __nvvm_atom_sys_xchg_gen_i((int *)__p, __v); } __DEVICE__ unsigned int __uAtomicInc(unsigned int *__p, unsigned int __v) { return __nvvm_atom_inc_gen_ui(__p, __v); } __DEVICE__ unsigned int __uAtomicInc_block(unsigned int *__p, unsigned int __v) { return __nvvm_atom_cta_inc_gen_ui(__p, __v); } __DEVICE__ unsigned int __uAtomicInc_system(unsigned int *__p, unsigned int __v) { return __nvvm_atom_sys_inc_gen_ui(__p, __v); } __DEVICE__ unsigned int __uAtomicMax(unsigned int *__p, unsigned int __v) { return __nvvm_atom_max_gen_ui(__p, __v); } __DEVICE__ unsigned int __uAtomicMax_block(unsigned int *__p, unsigned int __v) { return __nvvm_atom_cta_max_gen_ui(__p, __v); } __DEVICE__ unsigned int __uAtomicMax_system(unsigned int *__p, unsigned int __v) { return __nvvm_atom_sys_max_gen_ui(__p, __v); } __DEVICE__ unsigned int __uAtomicMin(unsigned int *__p, unsigned int __v) { return __nvvm_atom_min_gen_ui(__p, __v); } __DEVICE__ unsigned int __uAtomicMin_block(unsigned int *__p, unsigned int __v) { return __nvvm_atom_cta_min_gen_ui(__p, __v); } __DEVICE__ unsigned int __uAtomicMin_system(unsigned int *__p, unsigned int __v) { return __nvvm_atom_sys_min_gen_ui(__p, __v); } __DEVICE__ unsigned int __uAtomicOr(unsigned int *__p, unsigned int __v) { return __nvvm_atom_or_gen_i((int *)__p, __v); } __DEVICE__ unsigned int __uAtomicOr_block(unsigned int *__p, unsigned int __v) { return __nvvm_atom_cta_or_gen_i((int *)__p, __v); } __DEVICE__ unsigned int __uAtomicOr_system(unsigned int *__p, unsigned int __v) { return __nvvm_atom_sys_or_gen_i((int *)__p, __v); } __DEVICE__ unsigned int __uAtomicXor(unsigned int *__p, unsigned int __v) { return __nvvm_atom_xor_gen_i((int *)__p, __v); } __DEVICE__ unsigned int __uAtomicXor_block(unsigned int *__p, unsigned int __v) { return __nvvm_atom_cta_xor_gen_i((int *)__p, __v); } __DEVICE__ unsigned int __uAtomicXor_system(unsigned int *__p, unsigned int __v) { return __nvvm_atom_sys_xor_gen_i((int *)__p, __v); } __DEVICE__ unsigned int __uhadd(unsigned int __a, unsigned int __b) { return __nv_uhadd(__a, __b); } __DEVICE__ double __uint2double_rn(unsigned int __a) { return __nv_uint2double_rn(__a); } __DEVICE__ float __uint2float_rd(unsigned int __a) { return __nv_uint2float_rd(__a); } __DEVICE__ float __uint2float_rn(unsigned int __a) { return __nv_uint2float_rn(__a); } __DEVICE__ float __uint2float_ru(unsigned int __a) { return __nv_uint2float_ru(__a); } __DEVICE__ float __uint2float_rz(unsigned int __a) { return __nv_uint2float_rz(__a); } __DEVICE__ float __uint_as_float(unsigned int __a) { return __nv_uint_as_float(__a); } // __DEVICE__ double __ull2double_rd(unsigned long long __a) { return __nv_ull2double_rd(__a); } __DEVICE__ double __ull2double_rn(unsigned long long __a) { return __nv_ull2double_rn(__a); } __DEVICE__ double __ull2double_ru(unsigned long long __a) { return __nv_ull2double_ru(__a); } __DEVICE__ double __ull2double_rz(unsigned long long __a) { return __nv_ull2double_rz(__a); } __DEVICE__ float __ull2float_rd(unsigned long long __a) { return __nv_ull2float_rd(__a); } __DEVICE__ float __ull2float_rn(unsigned long long __a) { return __nv_ull2float_rn(__a); } __DEVICE__ float __ull2float_ru(unsigned long long __a) { return __nv_ull2float_ru(__a); } __DEVICE__ float __ull2float_rz(unsigned long long __a) { return __nv_ull2float_rz(__a); } __DEVICE__ unsigned long long __ullAtomicAdd(unsigned long long *__p, unsigned long long __v) { return __nvvm_atom_add_gen_ll((long long *)__p, __v); } __DEVICE__ unsigned long long __ullAtomicAdd_block(unsigned long long *__p, unsigned long long __v) { return __nvvm_atom_cta_add_gen_ll((long long *)__p, __v); } __DEVICE__ unsigned long long __ullAtomicAdd_system(unsigned long long *__p, unsigned long long __v) { return __nvvm_atom_sys_add_gen_ll((long long *)__p, __v); } __DEVICE__ unsigned long long __ullAtomicAnd(unsigned long long *__p, unsigned long long __v) { return __nvvm_atom_and_gen_ll((long long *)__p, __v); } __DEVICE__ unsigned long long __ullAtomicAnd_block(unsigned long long *__p, unsigned long long __v) { return __nvvm_atom_cta_and_gen_ll((long long *)__p, __v); } __DEVICE__ unsigned long long __ullAtomicAnd_system(unsigned long long *__p, unsigned long long __v) { return __nvvm_atom_sys_and_gen_ll((long long *)__p, __v); } __DEVICE__ unsigned long long __ullAtomicCAS(unsigned long long *__p, unsigned long long __cmp, unsigned long long __v) { return __nvvm_atom_cas_gen_ll((long long *)__p, __cmp, __v); } __DEVICE__ unsigned long long __ullAtomicCAS_block(unsigned long long *__p, unsigned long long __cmp, unsigned long long __v) { return __nvvm_atom_cta_cas_gen_ll((long long *)__p, __cmp, __v); } __DEVICE__ unsigned long long __ullAtomicCAS_system(unsigned long long *__p, unsigned long long __cmp, unsigned long long __v) { return __nvvm_atom_sys_cas_gen_ll((long long *)__p, __cmp, __v); } __DEVICE__ unsigned long long __ullAtomicExch(unsigned long long *__p, unsigned long long __v) { return __nvvm_atom_xchg_gen_ll((long long *)__p, __v); } __DEVICE__ unsigned long long __ullAtomicExch_block(unsigned long long *__p, unsigned long long __v) { return __nvvm_atom_cta_xchg_gen_ll((long long *)__p, __v); } __DEVICE__ unsigned long long __ullAtomicExch_system(unsigned long long *__p, unsigned long long __v) { return __nvvm_atom_sys_xchg_gen_ll((long long *)__p, __v); } __DEVICE__ unsigned long long __ullAtomicMax(unsigned long long *__p, unsigned long long __v) { return __nvvm_atom_max_gen_ull(__p, __v); } __DEVICE__ unsigned long long __ullAtomicMax_block(unsigned long long *__p, unsigned long long __v) { return __nvvm_atom_cta_max_gen_ull(__p, __v); } __DEVICE__ unsigned long long __ullAtomicMax_system(unsigned long long *__p, unsigned long long __v) { return __nvvm_atom_sys_max_gen_ull(__p, __v); } __DEVICE__ unsigned long long __ullAtomicMin(unsigned long long *__p, unsigned long long __v) { return __nvvm_atom_min_gen_ull(__p, __v); } __DEVICE__ unsigned long long __ullAtomicMin_block(unsigned long long *__p, unsigned long long __v) { return __nvvm_atom_cta_min_gen_ull(__p, __v); } __DEVICE__ unsigned long long __ullAtomicMin_system(unsigned long long *__p, unsigned long long __v) { return __nvvm_atom_sys_min_gen_ull(__p, __v); } __DEVICE__ unsigned long long __ullAtomicOr(unsigned long long *__p, unsigned long long __v) { return __nvvm_atom_or_gen_ll((long long *)__p, __v); } __DEVICE__ unsigned long long __ullAtomicOr_block(unsigned long long *__p, unsigned long long __v) { return __nvvm_atom_cta_or_gen_ll((long long *)__p, __v); } __DEVICE__ unsigned long long __ullAtomicOr_system(unsigned long long *__p, unsigned long long __v) { return __nvvm_atom_sys_or_gen_ll((long long *)__p, __v); } __DEVICE__ unsigned long long __ullAtomicXor(unsigned long long *__p, unsigned long long __v) { return __nvvm_atom_xor_gen_ll((long long *)__p, __v); } __DEVICE__ unsigned long long __ullAtomicXor_block(unsigned long long *__p, unsigned long long __v) { return __nvvm_atom_cta_xor_gen_ll((long long *)__p, __v); } __DEVICE__ unsigned long long __ullAtomicXor_system(unsigned long long *__p, unsigned long long __v) { return __nvvm_atom_sys_xor_gen_ll((long long *)__p, __v); } __DEVICE__ unsigned int __umul24(unsigned int __a, unsigned int __b) { return __nv_umul24(__a, __b); } __DEVICE__ unsigned long long __umul64hi(unsigned long long __a, unsigned long long __b) { return __nv_umul64hi(__a, __b); } __DEVICE__ unsigned int __umulhi(unsigned int __a, unsigned int __b) { return __nv_umulhi(__a, __b); } __DEVICE__ unsigned int __urhadd(unsigned int __a, unsigned int __b) { return __nv_urhadd(__a, __b); } __DEVICE__ unsigned int __usad(unsigned int __a, unsigned int __b, unsigned int __c) { return __nv_usad(__a, __b, __c); } #if CUDA_VERSION >= 9000 && CUDA_VERSION < 9020 __DEVICE__ unsigned int __vabs2(unsigned int __a) { return __nv_vabs2(__a); } __DEVICE__ unsigned int __vabs4(unsigned int __a) { return __nv_vabs4(__a); } __DEVICE__ unsigned int __vabsdiffs2(unsigned int __a, unsigned int __b) { return __nv_vabsdiffs2(__a, __b); } __DEVICE__ unsigned int __vabsdiffs4(unsigned int __a, unsigned int __b) { return __nv_vabsdiffs4(__a, __b); } __DEVICE__ unsigned int __vabsdiffu2(unsigned int __a, unsigned int __b) { return __nv_vabsdiffu2(__a, __b); } __DEVICE__ unsigned int __vabsdiffu4(unsigned int __a, unsigned int __b) { return __nv_vabsdiffu4(__a, __b); } __DEVICE__ unsigned int __vabsss2(unsigned int __a) { return __nv_vabsss2(__a); } __DEVICE__ unsigned int __vabsss4(unsigned int __a) { return __nv_vabsss4(__a); } __DEVICE__ unsigned int __vadd2(unsigned int __a, unsigned int __b) { return __nv_vadd2(__a, __b); } __DEVICE__ unsigned int __vadd4(unsigned int __a, unsigned int __b) { return __nv_vadd4(__a, __b); } __DEVICE__ unsigned int __vaddss2(unsigned int __a, unsigned int __b) { return __nv_vaddss2(__a, __b); } __DEVICE__ unsigned int __vaddss4(unsigned int __a, unsigned int __b) { return __nv_vaddss4(__a, __b); } __DEVICE__ unsigned int __vaddus2(unsigned int __a, unsigned int __b) { return __nv_vaddus2(__a, __b); } __DEVICE__ unsigned int __vaddus4(unsigned int __a, unsigned int __b) { return __nv_vaddus4(__a, __b); } __DEVICE__ unsigned int __vavgs2(unsigned int __a, unsigned int __b) { return __nv_vavgs2(__a, __b); } __DEVICE__ unsigned int __vavgs4(unsigned int __a, unsigned int __b) { return __nv_vavgs4(__a, __b); } __DEVICE__ unsigned int __vavgu2(unsigned int __a, unsigned int __b) { return __nv_vavgu2(__a, __b); } __DEVICE__ unsigned int __vavgu4(unsigned int __a, unsigned int __b) { return __nv_vavgu4(__a, __b); } __DEVICE__ unsigned int __vcmpeq2(unsigned int __a, unsigned int __b) { return __nv_vcmpeq2(__a, __b); } __DEVICE__ unsigned int __vcmpeq4(unsigned int __a, unsigned int __b) { return __nv_vcmpeq4(__a, __b); } __DEVICE__ unsigned int __vcmpges2(unsigned int __a, unsigned int __b) { return __nv_vcmpges2(__a, __b); } __DEVICE__ unsigned int __vcmpges4(unsigned int __a, unsigned int __b) { return __nv_vcmpges4(__a, __b); } __DEVICE__ unsigned int __vcmpgeu2(unsigned int __a, unsigned int __b) { return __nv_vcmpgeu2(__a, __b); } __DEVICE__ unsigned int __vcmpgeu4(unsigned int __a, unsigned int __b) { return __nv_vcmpgeu4(__a, __b); } __DEVICE__ unsigned int __vcmpgts2(unsigned int __a, unsigned int __b) { return __nv_vcmpgts2(__a, __b); } __DEVICE__ unsigned int __vcmpgts4(unsigned int __a, unsigned int __b) { return __nv_vcmpgts4(__a, __b); } __DEVICE__ unsigned int __vcmpgtu2(unsigned int __a, unsigned int __b) { return __nv_vcmpgtu2(__a, __b); } __DEVICE__ unsigned int __vcmpgtu4(unsigned int __a, unsigned int __b) { return __nv_vcmpgtu4(__a, __b); } __DEVICE__ unsigned int __vcmples2(unsigned int __a, unsigned int __b) { return __nv_vcmples2(__a, __b); } __DEVICE__ unsigned int __vcmples4(unsigned int __a, unsigned int __b) { return __nv_vcmples4(__a, __b); } __DEVICE__ unsigned int __vcmpleu2(unsigned int __a, unsigned int __b) { return __nv_vcmpleu2(__a, __b); } __DEVICE__ unsigned int __vcmpleu4(unsigned int __a, unsigned int __b) { return __nv_vcmpleu4(__a, __b); } __DEVICE__ unsigned int __vcmplts2(unsigned int __a, unsigned int __b) { return __nv_vcmplts2(__a, __b); } __DEVICE__ unsigned int __vcmplts4(unsigned int __a, unsigned int __b) { return __nv_vcmplts4(__a, __b); } __DEVICE__ unsigned int __vcmpltu2(unsigned int __a, unsigned int __b) { return __nv_vcmpltu2(__a, __b); } __DEVICE__ unsigned int __vcmpltu4(unsigned int __a, unsigned int __b) { return __nv_vcmpltu4(__a, __b); } __DEVICE__ unsigned int __vcmpne2(unsigned int __a, unsigned int __b) { return __nv_vcmpne2(__a, __b); } __DEVICE__ unsigned int __vcmpne4(unsigned int __a, unsigned int __b) { return __nv_vcmpne4(__a, __b); } __DEVICE__ unsigned int __vhaddu2(unsigned int __a, unsigned int __b) { return __nv_vhaddu2(__a, __b); } __DEVICE__ unsigned int __vhaddu4(unsigned int __a, unsigned int __b) { return __nv_vhaddu4(__a, __b); } __DEVICE__ unsigned int __vmaxs2(unsigned int __a, unsigned int __b) { return __nv_vmaxs2(__a, __b); } __DEVICE__ unsigned int __vmaxs4(unsigned int __a, unsigned int __b) { return __nv_vmaxs4(__a, __b); } __DEVICE__ unsigned int __vmaxu2(unsigned int __a, unsigned int __b) { return __nv_vmaxu2(__a, __b); } __DEVICE__ unsigned int __vmaxu4(unsigned int __a, unsigned int __b) { return __nv_vmaxu4(__a, __b); } __DEVICE__ unsigned int __vmins2(unsigned int __a, unsigned int __b) { return __nv_vmins2(__a, __b); } __DEVICE__ unsigned int __vmins4(unsigned int __a, unsigned int __b) { return __nv_vmins4(__a, __b); } __DEVICE__ unsigned int __vminu2(unsigned int __a, unsigned int __b) { return __nv_vminu2(__a, __b); } __DEVICE__ unsigned int __vminu4(unsigned int __a, unsigned int __b) { return __nv_vminu4(__a, __b); } __DEVICE__ unsigned int __vneg2(unsigned int __a) { return __nv_vneg2(__a); } __DEVICE__ unsigned int __vneg4(unsigned int __a) { return __nv_vneg4(__a); } __DEVICE__ unsigned int __vnegss2(unsigned int __a) { return __nv_vnegss2(__a); } __DEVICE__ unsigned int __vnegss4(unsigned int __a) { return __nv_vnegss4(__a); } __DEVICE__ unsigned int __vsads2(unsigned int __a, unsigned int __b) { return __nv_vsads2(__a, __b); } __DEVICE__ unsigned int __vsads4(unsigned int __a, unsigned int __b) { return __nv_vsads4(__a, __b); } __DEVICE__ unsigned int __vsadu2(unsigned int __a, unsigned int __b) { return __nv_vsadu2(__a, __b); } __DEVICE__ unsigned int __vsadu4(unsigned int __a, unsigned int __b) { return __nv_vsadu4(__a, __b); } __DEVICE__ unsigned int __vseteq2(unsigned int __a, unsigned int __b) { return __nv_vseteq2(__a, __b); } __DEVICE__ unsigned int __vseteq4(unsigned int __a, unsigned int __b) { return __nv_vseteq4(__a, __b); } __DEVICE__ unsigned int __vsetges2(unsigned int __a, unsigned int __b) { return __nv_vsetges2(__a, __b); } __DEVICE__ unsigned int __vsetges4(unsigned int __a, unsigned int __b) { return __nv_vsetges4(__a, __b); } __DEVICE__ unsigned int __vsetgeu2(unsigned int __a, unsigned int __b) { return __nv_vsetgeu2(__a, __b); } __DEVICE__ unsigned int __vsetgeu4(unsigned int __a, unsigned int __b) { return __nv_vsetgeu4(__a, __b); } __DEVICE__ unsigned int __vsetgts2(unsigned int __a, unsigned int __b) { return __nv_vsetgts2(__a, __b); } __DEVICE__ unsigned int __vsetgts4(unsigned int __a, unsigned int __b) { return __nv_vsetgts4(__a, __b); } __DEVICE__ unsigned int __vsetgtu2(unsigned int __a, unsigned int __b) { return __nv_vsetgtu2(__a, __b); } __DEVICE__ unsigned int __vsetgtu4(unsigned int __a, unsigned int __b) { return __nv_vsetgtu4(__a, __b); } __DEVICE__ unsigned int __vsetles2(unsigned int __a, unsigned int __b) { return __nv_vsetles2(__a, __b); } __DEVICE__ unsigned int __vsetles4(unsigned int __a, unsigned int __b) { return __nv_vsetles4(__a, __b); } __DEVICE__ unsigned int __vsetleu2(unsigned int __a, unsigned int __b) { return __nv_vsetleu2(__a, __b); } __DEVICE__ unsigned int __vsetleu4(unsigned int __a, unsigned int __b) { return __nv_vsetleu4(__a, __b); } __DEVICE__ unsigned int __vsetlts2(unsigned int __a, unsigned int __b) { return __nv_vsetlts2(__a, __b); } __DEVICE__ unsigned int __vsetlts4(unsigned int __a, unsigned int __b) { return __nv_vsetlts4(__a, __b); } __DEVICE__ unsigned int __vsetltu2(unsigned int __a, unsigned int __b) { return __nv_vsetltu2(__a, __b); } __DEVICE__ unsigned int __vsetltu4(unsigned int __a, unsigned int __b) { return __nv_vsetltu4(__a, __b); } __DEVICE__ unsigned int __vsetne2(unsigned int __a, unsigned int __b) { return __nv_vsetne2(__a, __b); } __DEVICE__ unsigned int __vsetne4(unsigned int __a, unsigned int __b) { return __nv_vsetne4(__a, __b); } __DEVICE__ unsigned int __vsub2(unsigned int __a, unsigned int __b) { return __nv_vsub2(__a, __b); } __DEVICE__ unsigned int __vsub4(unsigned int __a, unsigned int __b) { return __nv_vsub4(__a, __b); } __DEVICE__ unsigned int __vsubss2(unsigned int __a, unsigned int __b) { return __nv_vsubss2(__a, __b); } __DEVICE__ unsigned int __vsubss4(unsigned int __a, unsigned int __b) { return __nv_vsubss4(__a, __b); } __DEVICE__ unsigned int __vsubus2(unsigned int __a, unsigned int __b) { return __nv_vsubus2(__a, __b); } __DEVICE__ unsigned int __vsubus4(unsigned int __a, unsigned int __b) { return __nv_vsubus4(__a, __b); } #else // CUDA_VERSION >= 9020 // CUDA no longer provides inline assembly (or bitcode) implementation of these // functions, so we have to reimplment them. The implementation is naive and is // not optimized for performance. // Helper function to convert N-bit boolean subfields into all-0 or all-1. // E.g. __bool2mask(0x01000100,8) -> 0xff00ff00 // __bool2mask(0x00010000,16) -> 0xffff0000 __DEVICE__ unsigned int __bool2mask(unsigned int __a, int shift) { return (__a << shift) - __a; } __DEVICE__ unsigned int __vabs2(unsigned int __a) { unsigned int r; __asm__("vabsdiff2.s32.s32.s32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(0), "r"(0)); return r; } __DEVICE__ unsigned int __vabs4(unsigned int __a) { unsigned int r; __asm__("vabsdiff4.s32.s32.s32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(0), "r"(0)); return r; } __DEVICE__ unsigned int __vabsdiffs2(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vabsdiff2.s32.s32.s32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vabsdiffs4(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vabsdiff4.s32.s32.s32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vabsdiffu2(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vabsdiff2.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vabsdiffu4(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vabsdiff4.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vabsss2(unsigned int __a) { unsigned int r; __asm__("vabsdiff2.s32.s32.s32.sat %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(0), "r"(0)); return r; } __DEVICE__ unsigned int __vabsss4(unsigned int __a) { unsigned int r; __asm__("vabsdiff4.s32.s32.s32.sat %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(0), "r"(0)); return r; } __DEVICE__ unsigned int __vadd2(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vadd2.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vadd4(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vadd4.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vaddss2(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vadd2.s32.s32.s32.sat %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vaddss4(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vadd4.s32.s32.s32.sat %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vaddus2(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vadd2.u32.u32.u32.sat %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vaddus4(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vadd4.u32.u32.u32.sat %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vavgs2(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vavrg2.s32.s32.s32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vavgs4(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vavrg4.s32.s32.s32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vavgu2(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vavrg2.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vavgu4(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vavrg4.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vseteq2(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vset2.u32.u32.eq %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vcmpeq2(unsigned int __a, unsigned int __b) { return __bool2mask(__vseteq2(__a, __b), 16); } __DEVICE__ unsigned int __vseteq4(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vset4.u32.u32.eq %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vcmpeq4(unsigned int __a, unsigned int __b) { return __bool2mask(__vseteq4(__a, __b), 8); } __DEVICE__ unsigned int __vsetges2(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vset2.s32.s32.ge %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vcmpges2(unsigned int __a, unsigned int __b) { return __bool2mask(__vsetges2(__a, __b), 16); } __DEVICE__ unsigned int __vsetges4(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vset4.s32.s32.ge %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vcmpges4(unsigned int __a, unsigned int __b) { return __bool2mask(__vsetges4(__a, __b), 8); } __DEVICE__ unsigned int __vsetgeu2(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vset2.u32.u32.ge %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vcmpgeu2(unsigned int __a, unsigned int __b) { return __bool2mask(__vsetgeu2(__a, __b), 16); } __DEVICE__ unsigned int __vsetgeu4(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vset4.u32.u32.ge %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vcmpgeu4(unsigned int __a, unsigned int __b) { return __bool2mask(__vsetgeu4(__a, __b), 8); } __DEVICE__ unsigned int __vsetgts2(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vset2.s32.s32.gt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vcmpgts2(unsigned int __a, unsigned int __b) { return __bool2mask(__vsetgts2(__a, __b), 16); } __DEVICE__ unsigned int __vsetgts4(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vset4.s32.s32.gt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vcmpgts4(unsigned int __a, unsigned int __b) { return __bool2mask(__vsetgts4(__a, __b), 8); } __DEVICE__ unsigned int __vsetgtu2(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vset2.u32.u32.gt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vcmpgtu2(unsigned int __a, unsigned int __b) { return __bool2mask(__vsetgtu2(__a, __b), 16); } __DEVICE__ unsigned int __vsetgtu4(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vset4.u32.u32.gt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vcmpgtu4(unsigned int __a, unsigned int __b) { return __bool2mask(__vsetgtu4(__a, __b), 8); } __DEVICE__ unsigned int __vsetles2(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vset2.s32.s32.le %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vcmples2(unsigned int __a, unsigned int __b) { return __bool2mask(__vsetles2(__a, __b), 16); } __DEVICE__ unsigned int __vsetles4(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vset4.s32.s32.le %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vcmples4(unsigned int __a, unsigned int __b) { return __bool2mask(__vsetles4(__a, __b), 8); } __DEVICE__ unsigned int __vsetleu2(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vset2.u32.u32.le %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vcmpleu2(unsigned int __a, unsigned int __b) { return __bool2mask(__vsetleu2(__a, __b), 16); } __DEVICE__ unsigned int __vsetleu4(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vset4.u32.u32.le %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vcmpleu4(unsigned int __a, unsigned int __b) { return __bool2mask(__vsetleu4(__a, __b), 8); } __DEVICE__ unsigned int __vsetlts2(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vset2.s32.s32.lt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vcmplts2(unsigned int __a, unsigned int __b) { return __bool2mask(__vsetlts2(__a, __b), 16); } __DEVICE__ unsigned int __vsetlts4(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vset4.s32.s32.lt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vcmplts4(unsigned int __a, unsigned int __b) { return __bool2mask(__vsetlts4(__a, __b), 8); } __DEVICE__ unsigned int __vsetltu2(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vset2.u32.u32.lt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vcmpltu2(unsigned int __a, unsigned int __b) { return __bool2mask(__vsetltu2(__a, __b), 16); } __DEVICE__ unsigned int __vsetltu4(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vset4.u32.u32.lt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vcmpltu4(unsigned int __a, unsigned int __b) { return __bool2mask(__vsetltu4(__a, __b), 8); } __DEVICE__ unsigned int __vsetne2(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vset2.u32.u32.ne %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vcmpne2(unsigned int __a, unsigned int __b) { return __bool2mask(__vsetne2(__a, __b), 16); } __DEVICE__ unsigned int __vsetne4(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vset4.u32.u32.ne %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vcmpne4(unsigned int __a, unsigned int __b) { return __bool2mask(__vsetne4(__a, __b), 8); } // Based on ITEM 23 in AIM-239: http://dspace.mit.edu/handle/1721.1/6086 // (a & b) + (a | b) = a + b = (a ^ b) + 2 * (a & b) => // (a + b) / 2 = ((a ^ b) >> 1) + (a & b) // To operate on multiple sub-elements we need to make sure to mask out bits // that crossed over into adjacent elements during the shift. __DEVICE__ unsigned int __vhaddu2(unsigned int __a, unsigned int __b) { return (((__a ^ __b) >> 1) & ~0x80008000u) + (__a & __b); } __DEVICE__ unsigned int __vhaddu4(unsigned int __a, unsigned int __b) { return (((__a ^ __b) >> 1) & ~0x80808080u) + (__a & __b); } __DEVICE__ unsigned int __vmaxs2(unsigned int __a, unsigned int __b) { unsigned int r; if ((__a & 0x8000) && (__b & 0x8000)) { // Work around a bug in ptxas which produces invalid result if low element // is negative. unsigned mask = __vcmpgts2(__a, __b); r = (__a & mask) | (__b & ~mask); } else { __asm__("vmax2.s32.s32.s32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); } return r; } __DEVICE__ unsigned int __vmaxs4(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vmax4.s32.s32.s32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vmaxu2(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vmax2.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vmaxu4(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vmax4.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vmins2(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vmin2.s32.s32.s32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vmins4(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vmin4.s32.s32.s32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vminu2(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vmin2.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vminu4(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vmin4.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vsads2(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vabsdiff2.s32.s32.s32.add %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vsads4(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vabsdiff4.s32.s32.s32.add %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vsadu2(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vabsdiff2.u32.u32.u32.add %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vsadu4(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vabsdiff4.u32.u32.u32.add %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vsub2(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vsub2.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vneg2(unsigned int __a) { return __vsub2(0, __a); } __DEVICE__ unsigned int __vsub4(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vsub4.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vneg4(unsigned int __a) { return __vsub4(0, __a); } __DEVICE__ unsigned int __vsubss2(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vsub2.s32.s32.s32.sat %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vnegss2(unsigned int __a) { return __vsubss2(0, __a); } __DEVICE__ unsigned int __vsubss4(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vsub4.s32.s32.s32.sat %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vnegss4(unsigned int __a) { return __vsubss4(0, __a); } __DEVICE__ unsigned int __vsubus2(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vsub2.u32.u32.u32.sat %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vsubus4(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vsub4.u32.u32.u32.sat %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } #endif // CUDA_VERSION >= 9020 // For OpenMP we require the user to include as we need to know what // clock_t is on the system. #ifndef __OPENMP_NVPTX__ __DEVICE__ /* clock_t= */ int clock() { return __nvvm_read_ptx_sreg_clock(); } #endif __DEVICE__ long long clock64() { return __nvvm_read_ptx_sreg_clock64(); } // These functions shouldn't be declared when including this header // for math function resolution purposes. #ifndef __OPENMP_NVPTX__ __DEVICE__ void *memcpy(void *__a, const void *__b, size_t __c) { return __builtin_memcpy(__a, __b, __c); } __DEVICE__ void *memset(void *__a, int __b, size_t __c) { return __builtin_memset(__a, __b, __c); } #endif #pragma pop_macro("__DEVICE__") #endif // __CLANG_CUDA_DEVICE_FUNCTIONS_H__ /*===--- __clang_cuda_texture_intrinsics.h - Device-side texture support ---=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== * * This header provides in-header implmentations for NVCC's built-in * __nv_tex_surf_handler() which is used by CUDA's texture-related headers. The * built-in is unusual as it's actually a set of function overloads that use the * first string literal argument as one of the overload parameters. */ #ifndef __CLANG_CUDA_TEXTURE_INTRINSICS_H__ #define __CLANG_CUDA_TEXTURE_INTRINSICS_H__ #ifndef __CUDA__ #error "This file is for CUDA compilation only." #endif // __nv_tex_surf_handler() provided by this header as a macro. #define __nv_tex_surf_handler(__op, __ptr, ...) \ ::__cuda_tex::__tex_fetch< \ ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash(__op)>>(__ptr, \ __VA_ARGS__) #pragma push_macro("__ASM_OUT") #pragma push_macro("__ASM_OUTP") #pragma push_macro("__Args") #pragma push_macro("__ID") #pragma push_macro("__IDV") #pragma push_macro("__IMPL_2DGATHER") #pragma push_macro("__IMPL_ALIAS") #pragma push_macro("__IMPL_ALIASI") #pragma push_macro("__IMPL_F1") #pragma push_macro("__IMPL_F3") #pragma push_macro("__IMPL_F3N") #pragma push_macro("__IMPL_F3S") #pragma push_macro("__IMPL_S") #pragma push_macro("__IMPL_S3") #pragma push_macro("__IMPL_S3I") #pragma push_macro("__IMPL_S3N") #pragma push_macro("__IMPL_S3NI") #pragma push_macro("__IMPL_S3S") #pragma push_macro("__IMPL_S3SI") #pragma push_macro("__IMPL_SI") #pragma push_macro("__L") #pragma push_macro("__STRIP_PARENS") // Put all functions into anonymous namespace so they have internal linkage. // The device-only function here must be internal in order to avoid ODR // violations in case they are used from the files compiled with // -fgpu-rdc. E.g. a library and an app using it may be built with a different // version of this header file. namespace { // Put the implmentation into its own namespace so we don't pollute the TU. namespace __cuda_tex { // First, we need a perfect hash function and a few constexpr helper functions // for converting a string literal into a numeric value which can be used to // parametrize a template. We can not use string literals for that as that would // require C++20. // // The hash function was generated with 'gperf' and then manually converted into // its constexpr equivalent. // // NOTE: the perfect hashing scheme comes with inherent self-test. If the hash // function has a collision for any of the texture operations, the compilation // will fail due to an attempt to redefine a tag with the same value. If the // header compiles, then the hash function is good enough for the job. constexpr int __tex_len(const char *s) { return (s[0] == 0) ? 0 : (s[1] == 0) ? 1 : (s[2] == 0) ? 2 : (s[3] == 0) ? 3 : (s[4] == 0) ? 4 : (s[5] == 0) ? 5 : (s[6] == 0) ? 6 : (s[7] == 0) ? 7 : (s[8] == 0) ? 8 : (s[9] == 0) ? 9 : (s[10] == 0) ? 10 : (s[11] == 0) ? 11 : (s[12] == 0) ? 12 : (s[13] == 0) ? 13 : (s[14] == 0) ? 14 : (s[15] == 0) ? 15 : (s[16] == 0) ? 16 : (s[17] == 0) ? 17 : (s[18] == 0) ? 18 : (s[19] == 0) ? 19 : (s[20] == 0) ? 20 : (s[21] == 0) ? 21 : (s[22] == 0) ? 22 : (s[23] == 0) ? 23 : (s[24] == 0) ? 24 : (s[25] == 0) ? 25 : (s[26] == 0) ? 26 : (s[27] == 0) ? 27 : (s[28] == 0) ? 28 : (s[29] == 0) ? 29 : (s[30] == 0) ? 30 : (s[31] == 0) ? 31 : 32; } constexpr int __tex_hash_map(int c) { return (c == 49) ? 10 : (c == 50) ? 0 : (c == 51) ? 100 : (c == 52) ? 30 : (c == 67) ? 10 : (c == 68) ? 0 : (c == 69) ? 25 : (c == 72) ? 70 : (c == 77) ? 0 : (c == 96) ? 44 : (c == 99) ? 10 : (c == 100) ? 5 : (c == 101) ? 60 : (c == 102) ? 40 : (c == 103) ? 70 : (c == 104) ? 25 : (c == 112) ? 0 : (c == 114) ? 45 : (c == 117) ? 5 : (c == 118) ? 85 : (c == 120) ? 20 : 225; } constexpr int __tex_op_hash(const char *str) { return __tex_len(str) + __tex_hash_map(str[7] + 1) + __tex_hash_map(str[6]) + __tex_hash_map(str[5]) + __tex_hash_map(str[__tex_len(str) - 1]); } // Tag type to identify particular texture operation. template struct __Tag; #define __ID(__op) __Tag<__tex_op_hash(__op)> // Tags for variants of particular operation. E.g. tex2Dgather can translate // into 4 different instructions. #define __IDV(__op, __variant) \ __Tag<10000 + __tex_op_hash(__op) * 100 + __variant> // Helper classes for figuring out key data types for derived types. // E.g. char2 has __base_t = char, __fetch_t = char4 template struct __TypeInfoT; // Type info for the fundamental types. template <> struct __TypeInfoT { using __base_t = float; using __fetch_t = float4; }; template <> struct __TypeInfoT { using __base_t = char; using __fetch_t = int4; }; template <> struct __TypeInfoT { using __base_t = signed char; using __fetch_t = int4; }; template <> struct __TypeInfoT { using __base_t = unsigned char; using __fetch_t = uint4; }; template <> struct __TypeInfoT { using __base_t = short; using __fetch_t = int4; }; template <> struct __TypeInfoT { using __base_t = unsigned short; using __fetch_t = uint4; }; template <> struct __TypeInfoT { using __base_t = int; using __fetch_t = int4; }; template <> struct __TypeInfoT { using __base_t = unsigned int; using __fetch_t = uint4; }; // Derived base/fetch types for N-element vectors. template struct __TypeInfoT { using __base_t = decltype(__T::x); using __fetch_t = typename __TypeInfoT<__base_t>::__fetch_t; }; // Classes that implement specific texture ops. template struct __tex_fetch_v4; // Helper macros to strip parens from a macro argument. #define __Args(...) __VA_ARGS__ #define __STRIP_PARENS(__X) __X #define __L(__X) __STRIP_PARENS(__Args __X) // Construct inline assembly output args. // Results are stored in a temp var __r. // isResident bool is pointed to by __ir // Asm args for return values. It's a 4-element vector #define __ASM_OUT(__t) \ ("=" __t(__r.x), "=" __t(__r.y), "=" __t(__r.z), "=" __t(__r.w)) // .. possibly combined with a predicate. #define __ASM_OUTP(__t) (__L(__ASM_OUT(__t)), "=h"(*__ir)) // Implements a single variant of texture fetch instruction. #define __IMPL_F1(__rt, __dt, __args, __asm_op, __asm_outs, __asm_args) \ template <> \ __device__ __rt __run<__dt>(cudaTextureObject_t __obj, __L(__args)) { \ __rt __r; \ asm(__asm_op : __L(__asm_outs) : "l"(__obj), __L(__asm_args)); \ return __r; \ } // Implements texture fetch instructions for int4/uint4/float4 data types. #define __IMPL_F3(__args, __asm_op, __ctype, __asm_op_args, __asm_args) \ __IMPL_F1(int4, int4, __args, __asm_op ".s32." __ctype "\t" __asm_op_args, \ __ASM_OUT("r"), __asm_args) \ __IMPL_F1(uint4, uint4, __args, __asm_op ".u32." __ctype "\t" __asm_op_args, \ __ASM_OUT("r"), __asm_args) \ __IMPL_F1(float4, float4, __args, \ __asm_op ".f32." __ctype "\t" __asm_op_args, __ASM_OUT("f"), \ __asm_args) // Implements 'sparse' texture fetch instructions for int4/uint4/float4 data // types. Similar to above, but returns a boolean 'isPresent' value in addition // to texture data, #define __IMPL_F3S(__args, __asm_op, __ctype, __asm_op_args, __asm_args) \ __IMPL_F1(int4, int4, __args, __asm_op ".s32." __ctype "\t" __asm_op_args, \ __ASM_OUTP("r"), __asm_args) \ __IMPL_F1(uint4, uint4, __args, __asm_op ".u32." __ctype "\t" __asm_op_args, \ __ASM_OUTP("r"), __asm_args) \ __IMPL_F1(float4, float4, __args, \ __asm_op ".f32." __ctype "\t" __asm_op_args, __ASM_OUTP("f"), \ __asm_args) // Similar to F3, but for integer data which is returned as normalized floats. // Only instantiates fetch functions for int4/uint4. #define __IMPL_F3N(__args, __asm_op, __ctype, __asm_op_args, __asm_args) \ __IMPL_F1(float4, int4, __args, __asm_op ".s32." __ctype "\t" __asm_op_args, \ __ASM_OUT("r"), __asm_args) \ __IMPL_F1(float4, uint4, __args, \ __asm_op ".u32." __ctype "\t" __asm_op_args, __ASM_OUT("r"), \ __asm_args) // Instantiates __tex_fetch_v4 with regular fetch functions. #define __IMPL_S3I(__op, __args, __asm_op, __ctype, __asm_op_args, __asm_args) \ template <> struct __tex_fetch_v4<__op> { \ template \ __device__ static T __run(cudaTextureObject_t __obj, __L(__args)); \ __IMPL_F3(__args, __asm_op, __ctype, __asm_op_args, __asm_args) \ } // Same, but for sparse ops. Only available on sm_60+ #if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 600) #define __IMPL_S3SI(__op, __args, __asm_op, __ctype, __asm_op_args, \ __asm_args) \ template <> struct __tex_fetch_v4<__op> { \ template \ __device__ static T __run(cudaTextureObject_t __obj, __L(__args)); \ __IMPL_F3S(__args, __asm_op, __ctype, __asm_op_args, __asm_args) \ } #else #define __IMPL_S3SI(__op, __args, __asm_op, __ctype, __asm_op_args, __asm_args) #endif // Same, but for normalized float ops. #define __IMPL_S3NI(__op, __args, __asm_op, __ctype, __asm_op_args, \ __asm_args) \ template <> struct __tex_fetch_v4<__op> { \ template \ __device__ static float4 __run(cudaTextureObject_t __obj, __L(__args)); \ __IMPL_F3N(__args, __asm_op, __ctype, __asm_op_args, __asm_args) \ } // Regular and normalized float ops share a lot of similarities. This macro // instantiates both variants -- normal for __op and normalized for __opn. #define __IMPL_SI(__op, __opn, __args, __asm_op, __ctype, __asm_op_args, \ __asm_args) \ __IMPL_S3I(__op, __args, __asm_op, __ctype, __asm_op_args, __asm_args); \ __IMPL_S3NI(__opn, __args, __asm_op, __ctype, __asm_op_args, __asm_args) // Convenience macros which converts string literal __op into a __Tag, #define __IMPL_S3(__op, __args, __asm_op, __ctype, __asm_op_args, __asm_args) \ __IMPL_S3I(__ID(__op), __args, __asm_op, __ctype, __asm_op_args, __asm_args) #define __IMPL_S3S(__op, __args, __asm_op, __ctype, __asm_op_args, __asm_args) \ __IMPL_S3SI(__ID(__op), __args, __asm_op, __ctype, __asm_op_args, __asm_args) #define __IMPL_S3N(__op, __args, __asm_op, __ctype, __asm_op_args, __asm_args) \ __IMPL_S3NI(__ID(__op), __args, __asm_op, __ctype, __asm_op_args, __asm_args) #define __IMPL_S(__op, __opn, __args, __asm_op, __ctype, __asm_op_args, \ __asm_args) \ __IMPL_SI(__ID(__op), __ID(__opn), __args, __asm_op, __ctype, __asm_op_args, \ __asm_args) // CUDA headers have some 'legacy' texture oprerations that duplicate // functionality. So, we just inherit it, instead of refining a copy. #define __IMPL_ALIASI(__op, __opn) \ template <> struct __tex_fetch_v4<__op> : __tex_fetch_v4<__opn> {} #define __IMPL_ALIAS(__op, __opn) __IMPL_ALIASI(__ID(__op), __ID(__opn)) // Now we can instantiate everything we need for each specific texture fetch // variant. __IMPL_S("__tex1D_v2", "__tex1D_rmnf_v2", (float __x), "tex.1d.v4", "f32", "{%0, %1, %2, %3}, [%4, {%5}];", ("f"(__x))); __IMPL_S("__tex1Dfetch_v2", "__tex1Dfetch_rmnf_v2", (int __x), "tex.1d.v4", "s32", "{%0, %1, %2, %3}, [%4, {%5}];", ("r"(__x))); __IMPL_ALIAS("__itex1D", "__tex1D_v2"); __IMPL_ALIAS("__itex1Dfetch", "__tex1Dfetch_v2"); __IMPL_S("__tex1DGrad_v2", "__tex1DGrad_rmnf_v2", (float __x, float __dPdx, float __dPdy), "tex.grad.1d.v4", "f32", "{%0, %1, %2, %3}, [%4, {%5}], {%6}, {%7};", ("f"(__x), "f"(__dPdx), "f"(__dPdy))); __IMPL_ALIAS("__itex1DGrad", "__tex1DGrad_v2"); __IMPL_S("__tex1DLayered_v2", "__tex1DLayered_rmnf_v2", (float __x, int __layer), "tex.a1d.v4", "f32", "{%0, %1, %2, %3}, [%4, {%5, %6}];", ("r"(__layer), "f"(__x))); __IMPL_ALIAS("__itex1DLayered", "__tex1DLayered_v2"); __IMPL_S("__tex1DLayeredGrad_v2", "__tex1DLayeredGrad_rmnf_v2", (float __x, int __layer, float __dPdx, float __dPdy), "tex.grad.a1d.v4", "f32", "{%0, %1, %2, %3}, [%4, {%5, %6}], {%7}, {%8};", ("r"(__layer), "f"(__x), "f"(__dPdx), "f"(__dPdy))); __IMPL_ALIAS("__itex1DLayeredGrad", "__tex1DLayeredGrad_v2"); __IMPL_S("__tex1DLayeredLod_v2", "__tex1DLayeredLod_rmnf_v2", (float __x, int __layer, float __level), "tex.level.a1d.v4", "f32", "{%0, %1, %2, %3}, [%4, {%5, %6}], %7;", ("r"(__layer), "f"(__x), "f"(__level))); __IMPL_ALIAS("__itex1DLayeredLod", "__tex1DLayeredLod_v2"); __IMPL_S("__tex1DLod_v2", "__tex1DLod_rmnf_v2", (float __x, float __level), "tex.level.1d.v4", "f32", "{%0, %1, %2, %3}, [%4, {%5}], %6;", ("f"(__x), "f"(__level))); __IMPL_ALIAS("__itex1DLod", "__tex1DLod_v2"); // 2D __IMPL_S("__tex2D_v2", "__tex2D_rmnf_v2", (float __x, float __y), "tex.2d.v4", "f32", "{%0, %1, %2, %3}, [%4, {%5, %6}];", ("f"(__x), "f"(__y))); __IMPL_ALIAS("__itex2D", "__tex2D_v2"); __IMPL_S3S("__itex2D_sparse", (float __x, float __y, unsigned char *__ir), "{.reg .pred %%p0;\n\t" "tex.2d.v4", "f32", "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7}];\n\t" " selp.u16 %4, 1, 0, %%p0; }", ("f"(__x), "f"(__y))); __IMPL_S("__tex2DGrad_v2", "__tex2DGrad_rmnf_v2", (float __x, float __y, const float2 *__dPdx, const float2 *__dPdy), "tex.grad.2d.v4", "f32", "{%0, %1, %2, %3}, [%4, {%5, %6}], {%7, %8}, {%9, %10};", ("f"(__x), "f"(__y), "f"(__dPdx->x), "f"(__dPdx->y), "f"(__dPdy->x), "f"(__dPdy->y))); __IMPL_ALIAS("__itex2DGrad_v2", "__tex2DGrad_v2"); __IMPL_S3S("__itex2DGrad_sparse", (float __x, float __y, const float2 *__dPdx, const float2 *__dPdy, unsigned char *__ir), "{.reg .pred %%p0;\n\t" "tex.grad.2d.v4", "f32", "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7}], {%8, %9}, {%10, %11};\n\t" "selp.u16 %4, 1, 0, %%p0; }", ("f"(__x), "f"(__y), "f"(__dPdx->x), "f"(__dPdx->y), "f"(__dPdy->x), "f"(__dPdy->y))); __IMPL_S("__tex2DLayered_v2", "__tex2DLayered_rmnf_v2", (float __x, float __y, int __layer), "tex.a2d.v4", "f32", "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}];", ("r"(__layer), "f"(__x), "f"(__y))); __IMPL_ALIAS("__itex2DLayered", "__tex2DLayered_v2"); __IMPL_S3S("__itex2DLayered_sparse", (float __x, float __y, int __layer, unsigned char *__ir), "{.reg .pred %%p0;\n\t" "tex.a2d.v4", "f32", "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}];\n\t" "selp.u16 %4, 1, 0, %%p0; }", ("r"(__layer), "f"(__x), "f"(__y))); __IMPL_S("__tex2DLayeredGrad_v2", "__tex2DLayeredGrad_rmnf_v2", (float __x, float __y, int __layer, const float2 *__dPdx, const float2 *__dPdy), "tex.grad.a2d.v4", "f32", "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], {%8, %9}, {%10, %11};", ("r"(__layer), "f"(__x), "f"(__y), "f"(__dPdx->x), "f"(__dPdx->y), "f"(__dPdy->x), "f"(__dPdy->y))); __IMPL_ALIAS("__itex2DLayeredGrad_v2", "__tex2DLayeredGrad_v2"); __IMPL_S3S( "__itex2DLayeredGrad_sparse", (float __x, float __y, int __layer, const float2 *__dPdx, const float2 *__dPdy, unsigned char *__ir), "{.reg .pred %%p0;\n\t" "tex.grad.a2d.v4", "f32", "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}], {%9, %10}, {%11, %12};\n\t" "selp.u16 %4, 1, 0, %%p0; }", ("r"(__layer), "f"(__x), "f"(__y), "f"(__dPdx->x), "f"(__dPdx->y), "f"(__dPdy->x), "f"(__dPdy->y))); __IMPL_S("__tex2DLayeredLod_v2", "__tex2DLayeredLod_rmnf_v2", (float __x, float __y, int __layer, float __level), "tex.level.a2d.v4", "f32", "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], %8;", ("r"(__layer), "f"(__x), "f"(__y), "f"(__level))); __IMPL_ALIAS("__itex2DLayeredLod", "__tex2DLayeredLod_v2"); __IMPL_S3S("__itex2DLayeredLod_sparse", (float __x, float __y, int __layer, float __level, unsigned char *__ir), "{.reg .pred %%p0;\n\t" "tex.level.a2d.v4", "f32", "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}], %9;\n\t" "selp.u16 %4, 1, 0, %%p0; }", ("r"(__layer), "f"(__x), "f"(__y), "f"(__level))); __IMPL_S("__tex2DLod_v2", "__tex2DLod_rmnf_v2", (float __x, float __y, float __level), "tex.level.2d.v4", "f32", "{%0, %1, %2, %3}, [%4, {%5, %6}], %7;", ("f"(__x), "f"(__y), "f"(__level))); __IMPL_ALIAS("__itex2DLod", "__tex2DLod_v2"); __IMPL_S3S("__itex2DLod_sparse", (float __x, float __y, float __level, unsigned char *__ir), "{.reg .pred %%p0;\n\t" "tex.level.2d.v4", "f32", "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7}], %8;\n\t" "selp.u16 %4, 1, 0, %%p0; }", ("f"(__x), "f"(__y), "f"(__level))); // 2D gather is special. Unlike other variants that translate into exactly one // asm instruction, it uses one of the four different instructions selected by // __comp. We implement each instruction variant separately, and dispatch the // right one from the manually implemented 'umbrella' fetch. #define __IMPL_2DGATHER(variant, instr) \ __IMPL_SI(__IDV("__tex2Dgather_v2", variant), \ __IDV("__tex2Dgather_rmnf_v2", variant), \ (float __x, float __y, int __comp), instr, "f32", \ "{%0, %1, %2, %3}, [%4, {%5, %6}];", ("f"(__x), "f"(__y))); \ __IMPL_ALIASI(__IDV("__itex2Dgather", variant), \ __IDV("__tex2Dgather_v2", variant)); \ __IMPL_S3SI(__IDV("__itex2Dgather_sparse", variant), \ (float __x, float __y, unsigned char *__ir, int __comp), \ "{.reg .pred %%p0;\n\t" instr, "f32", \ "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7}];\n\t" \ "selp.u16 %4, 1, 0, %%p0; }", \ ("f"(__x), "f"(__y))); __IMPL_2DGATHER(0, "tld4.r.2d.v4"); __IMPL_2DGATHER(1, "tld4.g.2d.v4"); __IMPL_2DGATHER(2, "tld4.b.2d.v4"); __IMPL_2DGATHER(3, "tld4.a.2d.v4"); // Umbrella dispatcher -- calls into specific 2Dgather variant. template <> struct __tex_fetch_v4<__ID("__tex2Dgather_v2")> { template __device__ static __T __run(cudaTextureObject_t __obj, float __x, float __y, int __comp) { switch (__comp) { case 0: return __tex_fetch_v4<__IDV("__tex2Dgather_v2", 0)>::__run<__T>( __obj, __x, __y, __comp); case 1: return __tex_fetch_v4<__IDV("__tex2Dgather_v2", 1)>::__run<__T>( __obj, __x, __y, __comp); case 2: return __tex_fetch_v4<__IDV("__tex2Dgather_v2", 2)>::__run<__T>( __obj, __x, __y, __comp); case 3: return __tex_fetch_v4<__IDV("__tex2Dgather_v2", 3)>::__run<__T>( __obj, __x, __y, __comp); } } }; __IMPL_ALIAS("__itex2Dgather", "__tex2Dgather_v2"); template <> struct __tex_fetch_v4<__ID("__tex2Dgather_rmnf_v2")> { template __device__ static float4 __run(cudaTextureObject_t __obj, float __x, float __y, int __comp) { switch (__comp) { case 0: return __tex_fetch_v4<__IDV("__tex2Dgather_rmnf_v2", 0)>::__run<__T>( __obj, __x, __y, __comp); case 1: return __tex_fetch_v4<__IDV("__tex2Dgather_rmnf_v2", 1)>::__run<__T>( __obj, __x, __y, __comp); case 2: return __tex_fetch_v4<__IDV("__tex2Dgather_rmnf_v2", 2)>::__run<__T>( __obj, __x, __y, __comp); case 3: return __tex_fetch_v4<__IDV("__tex2Dgather_rmnf_v2", 3)>::__run<__T>( __obj, __x, __y, __comp); } } }; #if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 600) template <> struct __tex_fetch_v4<__ID("__itex2Dgather_sparse")> { template __device__ static __T __run(cudaTextureObject_t __obj, float __x, float __y, unsigned char *__ir, int __comp) { switch (__comp) { case 0: return __tex_fetch_v4<__IDV("__itex2Dgather_sparse", 0)>::__run<__T>( __obj, __x, __y, __ir, __comp); case 1: return __tex_fetch_v4<__IDV("__itex2Dgather_sparse", 1)>::__run<__T>( __obj, __x, __y, __ir, __comp); case 2: return __tex_fetch_v4<__IDV("__itex2Dgather_sparse", 2)>::__run<__T>( __obj, __x, __y, __ir, __comp); case 3: return __tex_fetch_v4<__IDV("__itex2Dgather_sparse", 3)>::__run<__T>( __obj, __x, __y, __ir, __comp); } } }; #endif // 3D __IMPL_S("__tex3D_v2", "__tex3D_rmnf_v2", (float __x, float __y, float __z), "tex.3d.v4", "f32", "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}];", ("f"(__x), "f"(__y), "f"(__z))); __IMPL_ALIAS("__itex3D", "__tex3D_v2"); __IMPL_S3S("__itex3D_sparse", (float __x, float __y, float __z, unsigned char *__ir), "{.reg .pred %%p0;\n\t" "tex.3d.v4", "f32", "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}];\n\t" "selp.u16 %4, 1, 0, %%p0; }", ("f"(__x), "f"(__y), "f"(__z))); __IMPL_S("__tex3DGrad_v2", "__tex3DGrad_rmnf_v2", (float __x, float __y, float __z, const float4 *__dPdx, const float4 *__dPdy), "tex.grad.3d.v4", "f32", "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], " "{%8, %9, %10, %10}, {%11, %12, %13, %13};", ("f"(__x), "f"(__y), "f"(__z), "f"(__dPdx->x), "f"(__dPdx->y), "f"(__dPdx->z), "f"(__dPdy->x), "f"(__dPdy->y), "f"(__dPdy->z))); __IMPL_ALIAS("__itex3DGrad_v2", "__tex3DGrad_v2"); __IMPL_S3S("__itex3DGrad_sparse", (float __x, float __y, float __z, const float4 *__dPdx, const float4 *__dPdy, unsigned char *__ir), "{.reg .pred %%p0;\n\t" "tex.grad.3d.v4", "f32", "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}], " "{%9, %10, %11, %11}, {%12, %13, %14, %14};\n\t" "selp.u16 %4, 1, 0, %%p0; }", ("f"(__x), "f"(__y), "f"(__z), "f"(__dPdx->x), "f"(__dPdx->y), "f"(__dPdx->z), "f"(__dPdy->x), "f"(__dPdy->y), "f"(__dPdy->z))); __IMPL_S("__tex3DLod_v2", "__tex3DLod_rmnf_v2", (float __x, float __y, float __z, float __level), "tex.level.3d.v4", "f32", "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], %8;", ("f"(__x), "f"(__y), "f"(__z), "f"(__level))); __IMPL_ALIAS("__itex3DLod", "__tex3DLod_v2"); __IMPL_S3S("__itex3DLod_sparse", (float __x, float __y, float __z, float __level, unsigned char *__ir), "{.reg .pred %%p0;\n\t" "tex.level.3d.v4", "f32", "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}], %9;\n\t" "selp.u16 %4, 1, 0, %%p0; }", ("f"(__x), "f"(__y), "f"(__z), "f"(__level))); // Cubemap __IMPL_S("__texCubemap_v2", "__texCubemap_rmnf_v2", (float __x, float __y, float __z), "tex.cube.v4", "f32", "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}];", ("f"(__x), "f"(__y), "f"(__z))); __IMPL_ALIAS("__itexCubemap", "__texCubemap_v2"); __IMPL_S3S("__itexCubemap_sparse", (float __x, float __y, float __z, unsigned char *__ir), "{.reg .pred %%p0;\n\t" "tex.cube.v4", "f32", "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}];\n\t" "selp.u16 %4, 1, 0, %%p0; }", ("f"(__x), "f"(__y), "f"(__z))); __IMPL_S("__texCubemapGrad_v2", "__texCubemapGrad_rmnf_v2", (float __x, float __y, float __z, const float4 *__dPdx, const float4 *__dPdy), "tex.grad.cube.v4", "f32", "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], " "{%8, %9, %10, %10}, {%11, %12, %13, %13};", ("f"(__x), "f"(__y), "f"(__z), "f"(__dPdx->x), "f"(__dPdx->y), "f"(__dPdx->z), "f"(__dPdy->x), "f"(__dPdy->y), "f"(__dPdy->z))); __IMPL_ALIAS("__itexCubemapGrad_v2", "__texCubemapGrad_v2"); __IMPL_S("__texCubemapLayered_v2", "__texCubemapLayered_rmnf_v2", (float __x, float __y, float __z, int __layer), "tex.acube.v4", "f32", "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}];", ("r"(__layer), "f"(__x), "f"(__y), "f"(__z))); __IMPL_ALIAS("__itexCubemapLayered", "__texCubemapLayered_v2"); __IMPL_S("__texCubemapLayeredGrad_v2", "__texCubemapLayeredGrad_rmnf_v2", (float __x, float __y, float __z, int __layer, const float4 *__dPdx, const float4 *__dPdy), "tex.grad.acube.v4", "f32", "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}], " "{%9, %10, %11, %11}, {%12, %13, %14, %14};", ("r"(__layer), "f"(__x), "f"(__y), "f"(__z), "f"(__dPdx->x), "f"(__dPdx->y), "f"(__dPdx->z), "f"(__dPdy->x), "f"(__dPdy->y), "f"(__dPdy->z))); __IMPL_ALIAS("__itexCubemapLayeredGrad_v2", "__texCubemapLayeredGrad_v2"); __IMPL_S("__texCubemapLayeredLod_v2", "__texCubemapLayeredLod_rmnf_v2", (float __x, float __y, float __z, int __layer, float __level), "tex.level.acube.v4", "f32", "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}], %9;", ("r"(__layer), "f"(__x), "f"(__y), "f"(__z), "f"(__level))); __IMPL_ALIAS("__itexCubemapLayeredLod", "__texCubemapLayeredLod_v2"); __IMPL_S("__texCubemapLod_v2", "__texCubemapLod_rmnf_v2", (float __x, float __y, float __z, float __level), "tex.level.cube.v4", "f32", "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], %8;", ("f"(__x), "f"(__y), "f"(__z), "f"(__level))); __IMPL_ALIAS("__itexCubemapLod", "__texCubemapLod_v2"); // Helper class for extracting slice of data from V4 fetch results. template struct __convert { template ::__base_t)> __device__ static __DestT __run(__SrcT __v); template <> __device__ static __DestT __run<1>(__SrcT __v) { return {__v.x}; } template <> __device__ static __DestT __run<2>(__SrcT __v) { return {__v.x, __v.y}; } template <> __device__ static __DestT __run<3>(__SrcT __v) { return {__v.x, __v.y, __v.z}; } template <> __device__ static __DestT __run<4>(__SrcT __v) { return {__v.x, __v.y, __v.z, __v.w}; } }; // These are the top-level function overloads the __nv_tex_surf_handler expands // to. Each overload deals with one of the several ways __nv_tex_surf_handler // is called by CUDA headers. In the end, each of the overloads does the same // job -- it figures out which `__tex_fetch_v4::run` variant should be used to // fetch texture data and which `__convert::run` is needed to convert it into // appropriate return type. // __nv_tex_surf_handler("__tex...", &ret, cudaTextureObject_t handle, args...); // Data type and return type are based on ret. template __device__ static void __tex_fetch(__T *__ptr, cudaTextureObject_t __handle, __Args... __args) { using __FetchT = typename __TypeInfoT<__T>::__fetch_t; *__ptr = __convert<__T, __FetchT>::__run( __tex_fetch_v4<__op>::template __run<__FetchT>(__handle, __args...)); } #if CUDA_VERSION < 12000 // texture<> objects get magically converted into a texture reference. However, // there's no way to convert them to cudaTextureObject_t on C++ level. So, we // cheat a bit and use inline assembly to do it. It costs us an extra register // and a move, but that is easy for ptxas to optimize away. template __device__ cudaTextureObject_t __tex_handle_to_obj(__T __handle) { cudaTextureObject_t __obj; asm("mov.b64 %0, %1; " : "=l"(__obj) : "l"(__handle)); return __obj; } // __nv_tex_surf_handler ("__tex...", &ret, textureReference, args...); // Data type and return type is based on ret. template __device__ static void __tex_fetch(__T *__ptr, __HandleT __handle, __Args... __args) { using __FetchT = typename __TypeInfoT<__T>::__fetch_t; *__ptr = __convert<__T, __FetchT>::__run( __tex_fetch_v4<__op>::template __run<__FetchT>( __tex_handle_to_obj(__handle), __args...)); } // __nv_tex_surf_handler ("__tex...", &type_dummy, &ret, texture<...>, args...); // cudaReadModeNormalizedFloat fetches always return float4. template __device__ static void __tex_fetch(__DataT *, __RetT *__ptr, texture<__DataT, __TexT, cudaReadModeNormalizedFloat> __handle, __Args... __args) { using __FetchT = typename __TypeInfoT<__DataT>::__fetch_t; *__ptr = __convert<__RetT, float4>::__run( __tex_fetch_v4<__op>::template __run<__FetchT>( __tex_handle_to_obj(__handle), __args...)); } // __nv_tex_surf_handler ("__tex...", &type_dummy, &ret, texture<...>, args...); // For cudaReadModeElementType fetch return type is based on type_dummy. template __device__ static void __tex_fetch(__DataT *, __RetT *__ptr, texture<__DataT, __TexT, cudaReadModeElementType> __handle, __Args... __args) { using __FetchT = typename __TypeInfoT<__DataT>::__fetch_t; *__ptr = __convert<__RetT, __FetchT>::__run( __tex_fetch_v4<__op>::template __run<__FetchT>( __tex_handle_to_obj(__handle), __args...)); } #endif // CUDA_VERSION } // namespace __cuda_tex } // namespace #pragma pop_macro("__ASM_OUT") #pragma pop_macro("__ASM_OUTP") #pragma pop_macro("__Args") #pragma pop_macro("__ID") #pragma pop_macro("__IDV") #pragma pop_macro("__IMPL_2DGATHER") #pragma pop_macro("__IMPL_ALIAS") #pragma pop_macro("__IMPL_ALIASI") #pragma pop_macro("__IMPL_F1") #pragma pop_macro("__IMPL_F3") #pragma pop_macro("__IMPL_F3N") #pragma pop_macro("__IMPL_F3S") #pragma pop_macro("__IMPL_S") #pragma pop_macro("__IMPL_S3") #pragma pop_macro("__IMPL_S3I") #pragma pop_macro("__IMPL_S3N") #pragma pop_macro("__IMPL_S3NI") #pragma pop_macro("__IMPL_S3S") #pragma pop_macro("__IMPL_S3SI") #pragma pop_macro("__IMPL_SI") #pragma pop_macro("__L") #pragma pop_macro("__STRIP_PARENS") #endif // __CLANG_CUDA_TEXTURE_INTRINSICS_H__ __stdarg_va_copy.h/*===---- __stddef_null.h - Definition of NULL -----------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #if !defined(NULL) || !__building_module(_Builtin_stddef) /* linux/stddef.h will define NULL to 0. glibc (and other) headers then define * __need_NULL and rely on stddef.h to redefine NULL to the correct value again. * Modules don't support redefining macros like that, but support that pattern * in the non-modules case. */ #undef NULL #ifdef __cplusplus #if !defined(__MINGW32__) && !defined(_MSC_VER) #define NULL __null #else #define NULL 0 #endif #else #define NULL ((void*)0) #endif #endif __wmmintrin_aes.harm_acle.havx512vldqintrin.h/*===-------------- avxneconvertintrin.h - AVXNECONVERT --------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error \ "Never use directly; include instead." #endif // __IMMINTRIN_H #ifdef __SSE2__ #ifndef __AVXNECONVERTINTRIN_H #define __AVXNECONVERTINTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS128 \ __attribute__((__always_inline__, __nodebug__, __target__("avxneconvert"), \ __min_vector_width__(128))) #define __DEFAULT_FN_ATTRS256 \ __attribute__((__always_inline__, __nodebug__, __target__("avxneconvert"), \ __min_vector_width__(256))) /// Convert scalar BF16 (16-bit) floating-point element /// stored at memory locations starting at location \a __A to a /// single-precision (32-bit) floating-point, broadcast it to packed /// single-precision (32-bit) floating-point elements, and store the results in /// \a dst. /// /// \headerfile /// /// \code /// _mm_bcstnebf16_ps(const void *__A); /// \endcode /// /// This intrinsic corresponds to the \c VBCSTNEBF162PS instruction. /// /// \param __A /// A pointer to a 16-bit memory location. The address of the memory /// location does not have to be aligned. /// \returns /// A 128-bit vector of [4 x float]. /// /// \code{.operation} /// b := Convert_BF16_To_FP32(MEM[__A+15:__A]) /// FOR j := 0 to 3 /// m := j*32 /// dst[m+31:m] := b /// ENDFOR /// dst[MAX:128] := 0 /// \endcode static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_bcstnebf16_ps(const void *__A) { return (__m128)__builtin_ia32_vbcstnebf162ps128((const __bf16 *)__A); } /// Convert scalar BF16 (16-bit) floating-point element /// stored at memory locations starting at location \a __A to a /// single-precision (32-bit) floating-point, broadcast it to packed /// single-precision (32-bit) floating-point elements, and store the results in /// \a dst. /// /// \headerfile /// /// \code /// _mm256_bcstnebf16_ps(const void *__A); /// \endcode /// /// This intrinsic corresponds to the \c VBCSTNEBF162PS instruction. /// /// \param __A /// A pointer to a 16-bit memory location. The address of the memory /// location does not have to be aligned. /// \returns /// A 256-bit vector of [8 x float]. /// /// \code{.operation} /// b := Convert_BF16_To_FP32(MEM[__A+15:__A]) /// FOR j := 0 to 7 /// m := j*32 /// dst[m+31:m] := b /// ENDFOR /// dst[MAX:256] := 0 /// \endcode static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_bcstnebf16_ps(const void *__A) { return (__m256)__builtin_ia32_vbcstnebf162ps256((const __bf16 *)__A); } /// Convert scalar half-precision (16-bit) floating-point element /// stored at memory locations starting at location \a __A to a /// single-precision (32-bit) floating-point, broadcast it to packed /// single-precision (32-bit) floating-point elements, and store the results in /// \a dst. /// /// \headerfile /// /// \code /// _mm_bcstnesh_ps(const void *__A); /// \endcode /// /// This intrinsic corresponds to the \c VBCSTNESH2PS instruction. /// /// \param __A /// A pointer to a 16-bit memory location. The address of the memory /// location does not have to be aligned. /// \returns /// A 128-bit vector of [4 x float]. /// /// \code{.operation} /// b := Convert_FP16_To_FP32(MEM[__A+15:__A]) /// FOR j := 0 to 3 /// m := j*32 /// dst[m+31:m] := b /// ENDFOR /// dst[MAX:128] := 0 /// \endcode static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_bcstnesh_ps(const void *__A) { return (__m128)__builtin_ia32_vbcstnesh2ps128((const _Float16 *)__A); } /// Convert scalar half-precision (16-bit) floating-point element /// stored at memory locations starting at location \a __A to a /// single-precision (32-bit) floating-point, broadcast it to packed /// single-precision (32-bit) floating-point elements, and store the results in /// \a dst. /// /// \headerfile /// /// \code /// _mm256_bcstnesh_ps(const void *__A); /// \endcode /// /// This intrinsic corresponds to the \c VBCSTNESH2PS instruction. /// /// \param __A /// A pointer to a 16-bit memory location. The address of the memory /// location does not have to be aligned. /// \returns /// A 256-bit vector of [8 x float]. /// /// \code{.operation} /// b := Convert_FP16_To_FP32(MEM[__A+15:__A]) /// FOR j := 0 to 7 /// m := j*32 /// dst[m+31:m] := b /// ENDFOR /// dst[MAX:256] := 0 /// \endcode static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_bcstnesh_ps(const void *__A) { return (__m256)__builtin_ia32_vbcstnesh2ps256((const _Float16 *)__A); } /// Convert packed BF16 (16-bit) floating-point even-indexed elements /// stored at memory locations starting at location \a __A to packed /// single-precision (32-bit) floating-point elements, and store the results in /// \a dst. /// /// \headerfile /// /// \code /// _mm_cvtneebf16_ps(const __m128bh *__A); /// \endcode /// /// This intrinsic corresponds to the \c VCVTNEEBF162PS instruction. /// /// \param __A /// A pointer to a 128-bit memory location containing 8 consecutive /// BF16 (16-bit) floating-point values. /// \returns /// A 128-bit vector of [4 x float]. /// /// \code{.operation} /// FOR j := 0 to 3 /// k := j*2 /// i := k*16 /// m := j*32 /// dst[m+31:m] := Convert_BF16_To_FP32(MEM[__A+i+15:__A+i]) /// ENDFOR /// dst[MAX:128] := 0 /// \endcode static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtneebf16_ps(const __m128bh *__A) { return (__m128)__builtin_ia32_vcvtneebf162ps128((const __v8bf *)__A); } /// Convert packed BF16 (16-bit) floating-point even-indexed elements /// stored at memory locations starting at location \a __A to packed /// single-precision (32-bit) floating-point elements, and store the results in /// \a dst. /// /// \headerfile /// /// \code /// _mm256_cvtneebf16_ps(const __m256bh *__A); /// \endcode /// /// This intrinsic corresponds to the \c VCVTNEEBF162PS instruction. /// /// \param __A /// A pointer to a 256-bit memory location containing 16 consecutive /// BF16 (16-bit) floating-point values. /// \returns /// A 256-bit vector of [8 x float]. /// /// \code{.operation} /// FOR j := 0 to 7 /// k := j*2 /// i := k*16 /// m := j*32 /// dst[m+31:m] := Convert_BF16_To_FP32(MEM[__A+i+15:__A+i]) /// ENDFOR /// dst[MAX:256] := 0 /// \endcode static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_cvtneebf16_ps(const __m256bh *__A) { return (__m256)__builtin_ia32_vcvtneebf162ps256((const __v16bf *)__A); } /// Convert packed half-precision (16-bit) floating-point even-indexed elements /// stored at memory locations starting at location \a __A to packed /// single-precision (32-bit) floating-point elements, and store the results in /// \a dst. /// /// \headerfile /// /// \code /// _mm_cvtneeph_ps(const __m128h *__A); /// \endcode /// /// This intrinsic corresponds to the \c VCVTNEEPH2PS instruction. /// /// \param __A /// A pointer to a 128-bit memory location containing 8 consecutive /// half-precision (16-bit) floating-point values. /// \returns /// A 128-bit vector of [4 x float]. /// /// \code{.operation} /// FOR j := 0 to 3 /// k := j*2 /// i := k*16 /// m := j*32 /// dst[m+31:m] := Convert_FP16_To_FP32(MEM[__A+i+15:__A+i]) /// ENDFOR /// dst[MAX:128] := 0 /// \endcode static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtneeph_ps(const __m128h *__A) { return (__m128)__builtin_ia32_vcvtneeph2ps128((const __v8hf *)__A); } /// Convert packed half-precision (16-bit) floating-point even-indexed elements /// stored at memory locations starting at location \a __A to packed /// single-precision (32-bit) floating-point elements, and store the results in /// \a dst. /// /// \headerfile /// /// \code /// _mm256_cvtneeph_ps(const __m256h *__A); /// \endcode /// /// This intrinsic corresponds to the \c VCVTNEEPH2PS instruction. /// /// \param __A /// A pointer to a 256-bit memory location containing 16 consecutive /// half-precision (16-bit) floating-point values. /// \returns /// A 256-bit vector of [8 x float]. /// /// \code{.operation} /// FOR j := 0 to 7 /// k := j*2 /// i := k*16 /// m := j*32 /// dst[m+31:m] := Convert_FP16_To_FP32(MEM[__A+i+15:__A+i]) /// ENDFOR /// dst[MAX:256] := 0 /// \endcode static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_cvtneeph_ps(const __m256h *__A) { return (__m256)__builtin_ia32_vcvtneeph2ps256((const __v16hf *)__A); } /// Convert packed BF16 (16-bit) floating-point odd-indexed elements /// stored at memory locations starting at location \a __A to packed /// single-precision (32-bit) floating-point elements, and store the results in /// \a dst. /// /// \headerfile /// /// \code /// _mm_cvtneobf16_ps(const __m128bh *__A); /// \endcode /// /// This intrinsic corresponds to the \c VCVTNEOBF162PS instruction. /// /// \param __A /// A pointer to a 128-bit memory location containing 8 consecutive /// BF16 (16-bit) floating-point values. /// \returns /// A 128-bit vector of [4 x float]. /// /// \code{.operation} /// FOR j := 0 to 3 /// k := j*2+1 /// i := k*16 /// m := j*32 /// dst[m+31:m] := Convert_BF16_To_FP32(MEM[__A+i+15:__A+i]) /// ENDFOR /// dst[MAX:128] := 0 /// \endcode static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtneobf16_ps(const __m128bh *__A) { return (__m128)__builtin_ia32_vcvtneobf162ps128((const __v8bf *)__A); } /// Convert packed BF16 (16-bit) floating-point odd-indexed elements /// stored at memory locations starting at location \a __A to packed /// single-precision (32-bit) floating-point elements, and store the results in /// \a dst. /// /// \headerfile /// /// \code /// _mm256_cvtneobf16_ps(const __m256bh *__A); /// \endcode /// /// This intrinsic corresponds to the \c VCVTNEOBF162PS instruction. /// /// \param __A /// A pointer to a 256-bit memory location containing 16 consecutive /// BF16 (16-bit) floating-point values. /// \returns /// A 256-bit vector of [8 x float]. /// /// \code{.operation} /// FOR j := 0 to 7 /// k := j*2+1 /// i := k*16 /// m := j*32 /// dst[m+31:m] := Convert_BF16_To_FP32(MEM[__A+i+15:__A+i]) /// ENDFOR /// dst[MAX:256] := 0 /// \endcode static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_cvtneobf16_ps(const __m256bh *__A) { return (__m256)__builtin_ia32_vcvtneobf162ps256((const __v16bf *)__A); } /// Convert packed half-precision (16-bit) floating-point odd-indexed elements /// stored at memory locations starting at location \a __A to packed /// single-precision (32-bit) floating-point elements, and store the results in /// \a dst. /// /// \headerfile /// /// \code /// _mm_cvtneoph_ps(const __m128h *__A); /// \endcode /// /// This intrinsic corresponds to the \c VCVTNEOPH2PS instruction. /// /// \param __A /// A pointer to a 128-bit memory location containing 8 consecutive /// half-precision (16-bit) floating-point values. /// \returns /// A 128-bit vector of [4 x float]. /// /// \code{.operation} /// FOR j := 0 to 3 /// k := j*2+1 /// i := k*16 /// m := j*32 /// dst[m+31:m] := Convert_FP16_To_FP32(MEM[__A+i+15:__A+i]) /// ENDFOR /// dst[MAX:128] := 0 /// \endcode static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtneoph_ps(const __m128h *__A) { return (__m128)__builtin_ia32_vcvtneoph2ps128((const __v8hf *)__A); } /// Convert packed half-precision (16-bit) floating-point odd-indexed elements /// stored at memory locations starting at location \a __A to packed /// single-precision (32-bit) floating-point elements, and store the results in /// \a dst. /// /// \headerfile /// /// \code /// _mm256_cvtneoph_ps(const __m256h *__A); /// \endcode /// /// This intrinsic corresponds to the \c VCVTNEOPH2PS instruction. /// /// \param __A /// A pointer to a 256-bit memory location containing 16 consecutive /// half-precision (16-bit) floating-point values. /// \returns /// A 256-bit vector of [8 x float]. /// /// \code{.operation} /// FOR j := 0 to 7 /// k := j*2+1 /// i := k*16 /// m := j*32 /// dst[m+31:m] := Convert_FP16_To_FP32(MEM[__A+i+15:__A+i]) /// ENDFOR /// dst[MAX:256] := 0 /// \endcode static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_cvtneoph_ps(const __m256h *__A) { return (__m256)__builtin_ia32_vcvtneoph2ps256((const __v16hf *)__A); } /// Convert packed single-precision (32-bit) floating-point elements in \a __A /// to packed BF16 (16-bit) floating-point elements, and store the results in \a /// dst. /// /// \headerfile /// /// \code /// _mm_cvtneps_avx_pbh(__m128 __A); /// \endcode /// /// This intrinsic corresponds to the \c VCVTNEPS2BF16 instruction. /// /// \param __A /// A 128-bit vector of [4 x float]. /// \returns /// A 128-bit vector of [8 x bfloat]. /// /// \code{.operation} /// FOR j := 0 to 3 /// dst.word[j] := Convert_FP32_To_BF16(__A.fp32[j]) /// ENDFOR /// dst[MAX:128] := 0 /// \endcode static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_cvtneps_avx_pbh(__m128 __A) { return (__m128bh)__builtin_ia32_vcvtneps2bf16128((__v4sf)__A); } /// Convert packed single-precision (32-bit) floating-point elements in \a __A /// to packed BF16 (16-bit) floating-point elements, and store the results in \a /// dst. /// /// \headerfile /// /// \code /// _mm256_cvtneps_avx_pbh(__m256 __A); /// \endcode /// /// This intrinsic corresponds to the \c VCVTNEPS2BF16 instruction. /// /// \param __A /// A 256-bit vector of [8 x float]. /// \returns /// A 128-bit vector of [8 x bfloat]. /// /// \code{.operation} /// FOR j := 0 to 7 /// dst.word[j] := Convert_FP32_To_BF16(a.fp32[j]) /// ENDFOR /// dst[MAX:128] := 0 /// \endcode static __inline__ __m128bh __DEFAULT_FN_ATTRS256 _mm256_cvtneps_avx_pbh(__m256 __A) { return (__m128bh)__builtin_ia32_vcvtneps2bf16256((__v8sf)__A); } #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 #endif // __AVXNECONVERTINTRIN_H #endif // __SSE2__ /*===---- f16cintrin.h - F16C intrinsics -----------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #if !defined __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __F16CINTRIN_H #define __F16CINTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS128 \ __attribute__((__always_inline__, __nodebug__, __target__("f16c"), __min_vector_width__(128))) #define __DEFAULT_FN_ATTRS256 \ __attribute__((__always_inline__, __nodebug__, __target__("f16c"), __min_vector_width__(256))) /* NOTE: Intel documents the 128-bit versions of these as being in emmintrin.h, * but that's because icc can emulate these without f16c using a library call. * Since we don't do that let's leave these in f16cintrin.h. */ /// Converts a 16-bit half-precision float value into a 32-bit float /// value. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTPH2PS instruction. /// /// \param __a /// A 16-bit half-precision float value. /// \returns The converted 32-bit float value. static __inline float __DEFAULT_FN_ATTRS128 _cvtsh_ss(unsigned short __a) { __v8hi __v = {(short)__a, 0, 0, 0, 0, 0, 0, 0}; __v4sf __r = __builtin_ia32_vcvtph2ps(__v); return __r[0]; } /// Converts a 32-bit single-precision float value to a 16-bit /// half-precision float value. /// /// \headerfile /// /// \code /// unsigned short _cvtss_sh(float a, const int imm); /// \endcode /// /// This intrinsic corresponds to the VCVTPS2PH instruction. /// /// \param a /// A 32-bit single-precision float value to be converted to a 16-bit /// half-precision float value. /// \param imm /// An immediate value controlling rounding using bits [2:0]: \n /// 000: Nearest \n /// 001: Down \n /// 010: Up \n /// 011: Truncate \n /// 1XX: Use MXCSR.RC for rounding /// \returns The converted 16-bit half-precision float value. #define _cvtss_sh(a, imm) __extension__ ({ \ (unsigned short)(((__v8hi)__builtin_ia32_vcvtps2ph((__v4sf){a, 0, 0, 0}, \ (imm)))[0]); }) /// Converts a 128-bit vector containing 32-bit float values into a /// 128-bit vector containing 16-bit half-precision float values. /// /// \headerfile /// /// \code /// __m128i _mm_cvtps_ph(__m128 a, const int imm); /// \endcode /// /// This intrinsic corresponds to the VCVTPS2PH instruction. /// /// \param a /// A 128-bit vector containing 32-bit float values. /// \param imm /// An immediate value controlling rounding using bits [2:0]: \n /// 000: Nearest \n /// 001: Down \n /// 010: Up \n /// 011: Truncate \n /// 1XX: Use MXCSR.RC for rounding /// \returns A 128-bit vector containing converted 16-bit half-precision float /// values. The lower 64 bits are used to store the converted 16-bit /// half-precision floating-point values. #define _mm_cvtps_ph(a, imm) \ ((__m128i)__builtin_ia32_vcvtps2ph((__v4sf)(__m128)(a), (imm))) /// Converts a 128-bit vector containing 16-bit half-precision float /// values into a 128-bit vector containing 32-bit float values. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTPH2PS instruction. /// /// \param __a /// A 128-bit vector containing 16-bit half-precision float values. The lower /// 64 bits are used in the conversion. /// \returns A 128-bit vector of [4 x float] containing converted float values. static __inline __m128 __DEFAULT_FN_ATTRS128 _mm_cvtph_ps(__m128i __a) { return (__m128)__builtin_ia32_vcvtph2ps((__v8hi)__a); } /// Converts a 256-bit vector of [8 x float] into a 128-bit vector /// containing 16-bit half-precision float values. /// /// \headerfile /// /// \code /// __m128i _mm256_cvtps_ph(__m256 a, const int imm); /// \endcode /// /// This intrinsic corresponds to the VCVTPS2PH instruction. /// /// \param a /// A 256-bit vector containing 32-bit single-precision float values to be /// converted to 16-bit half-precision float values. /// \param imm /// An immediate value controlling rounding using bits [2:0]: \n /// 000: Nearest \n /// 001: Down \n /// 010: Up \n /// 011: Truncate \n /// 1XX: Use MXCSR.RC for rounding /// \returns A 128-bit vector containing the converted 16-bit half-precision /// float values. #define _mm256_cvtps_ph(a, imm) \ ((__m128i)__builtin_ia32_vcvtps2ph256((__v8sf)(__m256)(a), (imm))) /// Converts a 128-bit vector containing 16-bit half-precision float /// values into a 256-bit vector of [8 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTPH2PS instruction. /// /// \param __a /// A 128-bit vector containing 16-bit half-precision float values to be /// converted to 32-bit single-precision float values. /// \returns A vector of [8 x float] containing the converted 32-bit /// single-precision float values. static __inline __m256 __DEFAULT_FN_ATTRS256 _mm256_cvtph_ps(__m128i __a) { return (__m256)__builtin_ia32_vcvtph2ps256((__v8hi)__a); } #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 #endif /* __F16CINTRIN_H */ /*===---- fmaintrin.h - FMA intrinsics -------------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __FMAINTRIN_H #define __FMAINTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("fma"), __min_vector_width__(128))) #define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("fma"), __min_vector_width__(256))) /// Computes a multiply-add of 128-bit vectors of [4 x float]. /// For each element, computes (__A * __B) + __C . /// /// \headerfile /// /// This intrinsic corresponds to the \c VFMADD213PS instruction. /// /// \param __A /// A 128-bit vector of [4 x float] containing the multiplicand. /// \param __B /// A 128-bit vector of [4 x float] containing the multiplier. /// \param __C /// A 128-bit vector of [4 x float] containing the addend. /// \returns A 128-bit vector of [4 x float] containing the result. static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_fmadd_ps(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } /// Computes a multiply-add of 128-bit vectors of [2 x double]. /// For each element, computes (__A * __B) + __C . /// /// \headerfile /// /// This intrinsic corresponds to the \c VFMADD213PD instruction. /// /// \param __A /// A 128-bit vector of [2 x double] containing the multiplicand. /// \param __B /// A 128-bit vector of [2 x double] containing the multiplier. /// \param __C /// A 128-bit vector of [2 x double] containing the addend. /// \returns A 128-bit [2 x double] vector containing the result. static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_fmadd_pd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C); } /// Computes a scalar multiply-add of the single-precision values in the /// low 32 bits of 128-bit vectors of [4 x float]. /// /// \code{.operation} /// result[31:0] = (__A[31:0] * __B[31:0]) + __C[31:0] /// result[127:32] = __A[127:32] /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VFMADD213SS instruction. /// /// \param __A /// A 128-bit vector of [4 x float] containing the multiplicand in the low /// 32 bits. /// \param __B /// A 128-bit vector of [4 x float] containing the multiplier in the low /// 32 bits. /// \param __C /// A 128-bit vector of [4 x float] containing the addend in the low /// 32 bits. /// \returns A 128-bit vector of [4 x float] containing the result in the low /// 32 bits and a copy of \a __A[127:32] in the upper 96 bits. static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_fmadd_ss(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } /// Computes a scalar multiply-add of the double-precision values in the /// low 64 bits of 128-bit vectors of [2 x double]. /// /// \code{.operation} /// result[63:0] = (__A[63:0] * __B[63:0]) + __C[63:0] /// result[127:64] = __A[127:64] /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VFMADD213SD instruction. /// /// \param __A /// A 128-bit vector of [2 x double] containing the multiplicand in the low /// 64 bits. /// \param __B /// A 128-bit vector of [2 x double] containing the multiplier in the low /// 64 bits. /// \param __C /// A 128-bit vector of [2 x double] containing the addend in the low /// 64 bits. /// \returns A 128-bit vector of [2 x double] containing the result in the low /// 64 bits and a copy of \a __A[127:64] in the upper 64 bits. static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_fmadd_sd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, (__v2df)__C); } /// Computes a multiply-subtract of 128-bit vectors of [4 x float]. /// For each element, computes (__A * __B) - __C . /// /// \headerfile /// /// This intrinsic corresponds to the \c VFMSUB213PS instruction. /// /// \param __A /// A 128-bit vector of [4 x float] containing the multiplicand. /// \param __B /// A 128-bit vector of [4 x float] containing the multiplier. /// \param __C /// A 128-bit vector of [4 x float] containing the subtrahend. /// \returns A 128-bit vector of [4 x float] containing the result. static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_fmsub_ps(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); } /// Computes a multiply-subtract of 128-bit vectors of [2 x double]. /// For each element, computes (__A * __B) - __C . /// /// \headerfile /// /// This intrinsic corresponds to the \c VFMSUB213PD instruction. /// /// \param __A /// A 128-bit vector of [2 x double] containing the multiplicand. /// \param __B /// A 128-bit vector of [2 x double] containing the multiplier. /// \param __C /// A 128-bit vector of [2 x double] containing the addend. /// \returns A 128-bit vector of [2 x double] containing the result. static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, -(__v2df)__C); } /// Computes a scalar multiply-subtract of the single-precision values in /// the low 32 bits of 128-bit vectors of [4 x float]. /// /// \code{.operation} /// result[31:0] = (__A[31:0] * __B[31:0]) - __C[31:0] /// result[127:32] = __A[127:32] /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VFMSUB213SS instruction. /// /// \param __A /// A 128-bit vector of [4 x float] containing the multiplicand in the low /// 32 bits. /// \param __B /// A 128-bit vector of [4 x float] containing the multiplier in the low /// 32 bits. /// \param __C /// A 128-bit vector of [4 x float] containing the subtrahend in the low /// 32 bits. /// \returns A 128-bit vector of [4 x float] containing the result in the low /// 32 bits, and a copy of \a __A[127:32] in the upper 96 bits. static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); } /// Computes a scalar multiply-subtract of the double-precision values in /// the low 64 bits of 128-bit vectors of [2 x double]. /// /// \code{.operation} /// result[63:0] = (__A[63:0] * __B[63:0]) - __C[63:0] /// result[127:64] = __A[127:64] /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VFMSUB213SD instruction. /// /// \param __A /// A 128-bit vector of [2 x double] containing the multiplicand in the low /// 64 bits. /// \param __B /// A 128-bit vector of [2 x double] containing the multiplier in the low /// 64 bits. /// \param __C /// A 128-bit vector of [2 x double] containing the subtrahend in the low /// 64 bits. /// \returns A 128-bit vector of [2 x double] containing the result in the low /// 64 bits, and a copy of \a __A[127:64] in the upper 64 bits. static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, -(__v2df)__C); } /// Computes a negated multiply-add of 128-bit vectors of [4 x float]. /// For each element, computes -(__A * __B) + __C . /// /// \headerfile /// /// This intrinsic corresponds to the \c VFNMADD213DPS instruction. /// /// \param __A /// A 128-bit vector of [4 x float] containing the multiplicand. /// \param __B /// A 128-bit vector of [4 x float] containing the multiplier. /// \param __C /// A 128-bit vector of [4 x float] containing the addend. /// \returns A 128-bit [4 x float] vector containing the result. static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } /// Computes a negated multiply-add of 128-bit vectors of [2 x double]. /// For each element, computes -(__A * __B) + __C . /// /// \headerfile /// /// This intrinsic corresponds to the \c VFNMADD213PD instruction. /// /// \param __A /// A 128-bit vector of [2 x double] containing the multiplicand. /// \param __B /// A 128-bit vector of [2 x double] containing the multiplier. /// \param __C /// A 128-bit vector of [2 x double] containing the addend. /// \returns A 128-bit vector of [2 x double] containing the result. static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, (__v2df)__C); } /// Computes a scalar negated multiply-add of the single-precision values in /// the low 32 bits of 128-bit vectors of [4 x float]. /// /// \code{.operation} /// result[31:0] = -(__A[31:0] * __B[31:0]) + __C[31:0] /// result[127:32] = __A[127:32] /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VFNMADD213SS instruction. /// /// \param __A /// A 128-bit vector of [4 x float] containing the multiplicand in the low /// 32 bits. /// \param __B /// A 128-bit vector of [4 x float] containing the multiplier in the low /// 32 bits. /// \param __C /// A 128-bit vector of [4 x float] containing the addend in the low /// 32 bits. /// \returns A 128-bit vector of [4 x float] containing the result in the low /// 32 bits, and a copy of \a __A[127:32] in the upper 96 bits. static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, (__v4sf)__C); } /// Computes a scalar negated multiply-add of the double-precision values /// in the low 64 bits of 128-bit vectors of [2 x double]. /// /// \code{.operation} /// result[63:0] = -(__A[63:0] * __B[63:0]) + __C[63:0] /// result[127:64] = __A[127:64] /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VFNMADD213SD instruction. /// /// \param __A /// A 128-bit vector of [2 x double] containing the multiplicand in the low /// 64 bits. /// \param __B /// A 128-bit vector of [2 x double] containing the multiplier in the low /// 64 bits. /// \param __C /// A 128-bit vector of [2 x double] containing the addend in the low /// 64 bits. /// \returns A 128-bit vector of [2 x double] containing the result in the low /// 64 bits, and a copy of \a __A[127:64] in the upper 64 bits. static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, (__v2df)__C); } /// Computes a negated multiply-subtract of 128-bit vectors of [4 x float]. /// For each element, computes -(__A * __B) - __C . /// /// \headerfile /// /// This intrinsic corresponds to the \c VFNMSUB213PS instruction. /// /// \param __A /// A 128-bit vector of [4 x float] containing the multiplicand. /// \param __B /// A 128-bit vector of [4 x float] containing the multiplier. /// \param __C /// A 128-bit vector of [4 x float] containing the subtrahend. /// \returns A 128-bit vector of [4 x float] containing the result. static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); } /// Computes a negated multiply-subtract of 128-bit vectors of [2 x double]. /// For each element, computes -(__A * __B) - __C . /// /// \headerfile /// /// This intrinsic corresponds to the \c VFNMSUB213PD instruction. /// /// \param __A /// A 128-bit vector of [2 x double] containing the multiplicand. /// \param __B /// A 128-bit vector of [2 x double] containing the multiplier. /// \param __C /// A 128-bit vector of [2 x double] containing the subtrahend. /// \returns A 128-bit vector of [2 x double] containing the result. static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C); } /// Computes a scalar negated multiply-subtract of the single-precision /// values in the low 32 bits of 128-bit vectors of [4 x float]. /// /// \code{.operation} /// result[31:0] = -(__A[31:0] * __B[31:0]) - __C[31:0] /// result[127:32] = __A[127:32] /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VFNMSUB213SS instruction. /// /// \param __A /// A 128-bit vector of [4 x float] containing the multiplicand in the low /// 32 bits. /// \param __B /// A 128-bit vector of [4 x float] containing the multiplier in the low /// 32 bits. /// \param __C /// A 128-bit vector of [4 x float] containing the subtrahend in the low /// 32 bits. /// \returns A 128-bit vector of [4 x float] containing the result in the low /// 32 bits, and a copy of \a __A[127:32] in the upper 96 bits. static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, -(__v4sf)__C); } /// Computes a scalar negated multiply-subtract of the double-precision /// values in the low 64 bits of 128-bit vectors of [2 x double]. /// /// \code{.operation} /// result[63:0] = -(__A[63:0] * __B[63:0]) - __C[63:0] /// result[127:64] = __A[127:64] /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VFNMSUB213SD instruction. /// /// \param __A /// A 128-bit vector of [2 x double] containing the multiplicand in the low /// 64 bits. /// \param __B /// A 128-bit vector of [2 x double] containing the multiplier in the low /// 64 bits. /// \param __C /// A 128-bit vector of [2 x double] containing the subtrahend in the low /// 64 bits. /// \returns A 128-bit vector of [2 x double] containing the result in the low /// 64 bits, and a copy of \a __A[127:64] in the upper 64 bits. static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_fnmsub_sd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, -(__v2df)__C); } /// Computes a multiply with alternating add/subtract of 128-bit vectors of /// [4 x float]. /// /// \code{.operation} /// result[31:0] = (__A[31:0] * __B[31:0]) - __C[31:0] /// result[63:32] = (__A[63:32] * __B[63:32]) + __C[63:32] /// result[95:64] = (__A[95:64] * __B[95:64]) - __C[95:64] /// result[127:96] = (__A[127:96] * __B[127:96]) + __C[127:96] /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VFMADDSUB213PS instruction. /// /// \param __A /// A 128-bit vector of [4 x float] containing the multiplicand. /// \param __B /// A 128-bit vector of [4 x float] containing the multiplier. /// \param __C /// A 128-bit vector of [4 x float] containing the addend/subtrahend. /// \returns A 128-bit vector of [4 x float] containing the result. static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } /// Computes a multiply with alternating add/subtract of 128-bit vectors of /// [2 x double]. /// /// \code{.operation} /// result[63:0] = (__A[63:0] * __B[63:0]) - __C[63:0] /// result[127:64] = (__A[127:64] * __B[127:64]) + __C[127:64] /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VFMADDSUB213PD instruction. /// /// \param __A /// A 128-bit vector of [2 x double] containing the multiplicand. /// \param __B /// A 128-bit vector of [2 x double] containing the multiplier. /// \param __C /// A 128-bit vector of [2 x double] containing the addend/subtrahend. /// \returns A 128-bit vector of [2 x double] containing the result. static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C); } /// Computes a multiply with alternating add/subtract of 128-bit vectors of /// [4 x float]. /// /// \code{.operation} /// result[31:0] = (__A[31:0] * __B[31:0]) + __C[31:0] /// result[63:32] = (__A[63:32] * __B[63:32]) - __C[63:32] /// result[95:64] = (__A[95:64] * __B[95:64]) + __C[95:64] /// result[127:96 = (__A[127:96] * __B[127:96]) - __C[127:96] /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VFMSUBADD213PS instruction. /// /// \param __A /// A 128-bit vector of [4 x float] containing the multiplicand. /// \param __B /// A 128-bit vector of [4 x float] containing the multiplier. /// \param __C /// A 128-bit vector of [4 x float] containing the addend/subtrahend. /// \returns A 128-bit vector of [4 x float] containing the result. static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); } /// Computes a multiply with alternating add/subtract of 128-bit vectors of /// [2 x double]. /// /// \code{.operation} /// result[63:0] = (__A[63:0] * __B[63:0]) + __C[63:0] /// result[127:64] = (__A[127:64] * __B[127:64]) - __C[127:64] /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VFMADDSUB213PD instruction. /// /// \param __A /// A 128-bit vector of [2 x double] containing the multiplicand. /// \param __B /// A 128-bit vector of [2 x double] containing the multiplier. /// \param __C /// A 128-bit vector of [2 x double] containing the addend/subtrahend. /// \returns A 128-bit vector of [2 x double] containing the result. static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, -(__v2df)__C); } /// Computes a multiply-add of 256-bit vectors of [8 x float]. /// For each element, computes (__A * __B) + __C . /// /// \headerfile /// /// This intrinsic corresponds to the \c VFMADD213PS instruction. /// /// \param __A /// A 256-bit vector of [8 x float] containing the multiplicand. /// \param __B /// A 256-bit vector of [8 x float] containing the multiplier. /// \param __C /// A 256-bit vector of [8 x float] containing the addend. /// \returns A 256-bit vector of [8 x float] containing the result. static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_fmadd_ps(__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); } /// Computes a multiply-add of 256-bit vectors of [4 x double]. /// For each element, computes (__A * __B) + __C . /// /// \headerfile /// /// This intrinsic corresponds to the \c VFMADD213PD instruction. /// /// \param __A /// A 256-bit vector of [4 x double] containing the multiplicand. /// \param __B /// A 256-bit vector of [4 x double] containing the multiplier. /// \param __C /// A 256-bit vector of [4 x double] containing the addend. /// \returns A 256-bit vector of [4 x double] containing the result. static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_fmadd_pd(__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C); } /// Computes a multiply-subtract of 256-bit vectors of [8 x float]. /// For each element, computes (__A * __B) - __C . /// /// \headerfile /// /// This intrinsic corresponds to the \c VFMSUB213PS instruction. /// /// \param __A /// A 256-bit vector of [8 x float] containing the multiplicand. /// \param __B /// A 256-bit vector of [8 x float] containing the multiplier. /// \param __C /// A 256-bit vector of [8 x float] containing the subtrahend. /// \returns A 256-bit vector of [8 x float] containing the result. static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_fmsub_ps(__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); } /// Computes a multiply-subtract of 256-bit vectors of [4 x double]. /// For each element, computes (__A * __B) - __C . /// /// \headerfile /// /// This intrinsic corresponds to the \c VFMSUB213PD instruction. /// /// \param __A /// A 256-bit vector of [4 x double] containing the multiplicand. /// \param __B /// A 256-bit vector of [4 x double] containing the multiplier. /// \param __C /// A 256-bit vector of [4 x double] containing the subtrahend. /// \returns A 256-bit vector of [4 x double] containing the result. static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_fmsub_pd(__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C); } /// Computes a negated multiply-add of 256-bit vectors of [8 x float]. /// For each element, computes -(__A * __B) + __C . /// /// \headerfile /// /// This intrinsic corresponds to the \c VFNMADD213PS instruction. /// /// \param __A /// A 256-bit vector of [8 x float] containing the multiplicand. /// \param __B /// A 256-bit vector of [8 x float] containing the multiplier. /// \param __C /// A 256-bit vector of [8 x float] containing the addend. /// \returns A 256-bit vector of [8 x float] containing the result. static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C); } /// Computes a negated multiply-add of 256-bit vectors of [4 x double]. /// For each element, computes -(__A * __B) + __C . /// /// \headerfile /// /// This intrinsic corresponds to the \c VFNMADD213PD instruction. /// /// \param __A /// A 256-bit vector of [4 x double] containing the multiplicand. /// \param __B /// A 256-bit vector of [4 x double] containing the multiplier. /// \param __C /// A 256-bit vector of [4 x double] containing the addend. /// \returns A 256-bit vector of [4 x double] containing the result. static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, (__v4df)__C); } /// Computes a negated multiply-subtract of 256-bit vectors of [8 x float]. /// For each element, computes -(__A * __B) - __C . /// /// \headerfile /// /// This intrinsic corresponds to the \c VFNMSUB213PS instruction. /// /// \param __A /// A 256-bit vector of [8 x float] containing the multiplicand. /// \param __B /// A 256-bit vector of [8 x float] containing the multiplier. /// \param __C /// A 256-bit vector of [8 x float] containing the subtrahend. /// \returns A 256-bit vector of [8 x float] containing the result. static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); } /// Computes a negated multiply-subtract of 256-bit vectors of [4 x double]. /// For each element, computes -(__A * __B) - __C . /// /// \headerfile /// /// This intrinsic corresponds to the \c VFNMSUB213PD instruction. /// /// \param __A /// A 256-bit vector of [4 x double] containing the multiplicand. /// \param __B /// A 256-bit vector of [4 x double] containing the multiplier. /// \param __C /// A 256-bit vector of [4 x double] containing the subtrahend. /// \returns A 256-bit vector of [4 x double] containing the result. static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, -(__v4df)__C); } /// Computes a multiply with alternating add/subtract of 256-bit vectors of /// [8 x float]. /// /// \code{.operation} /// result[31:0] = (__A[31:0] * __B[31:0]) - __C[31:0] /// result[63:32] = (__A[63:32] * __B[63:32]) + __C[63:32] /// result[95:64] = (__A[95:64] * __B[95:64]) - __C[95:64] /// result[127:96] = (__A[127:96] * __B[127:96]) + __C[127:96] /// result[159:128] = (__A[159:128] * __B[159:128]) - __C[159:128] /// result[191:160] = (__A[191:160] * __B[191:160]) + __C[191:160] /// result[223:192] = (__A[223:192] * __B[223:192]) - __C[223:192] /// result[255:224] = (__A[255:224] * __B[255:224]) + __C[255:224] /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VFMADDSUB213PS instruction. /// /// \param __A /// A 256-bit vector of [8 x float] containing the multiplicand. /// \param __B /// A 256-bit vector of [8 x float] containing the multiplier. /// \param __C /// A 256-bit vector of [8 x float] containing the addend/subtrahend. /// \returns A 256-bit vector of [8 x float] containing the result. static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); } /// Computes a multiply with alternating add/subtract of 256-bit vectors of /// [4 x double]. /// /// \code{.operation} /// result[63:0] = (__A[63:0] * __B[63:0]) - __C[63:0] /// result[127:64] = (__A[127:64] * __B[127:64]) + __C[127:64] /// result[191:128] = (__A[191:128] * __B[191:128]) - __C[191:128] /// result[255:192] = (__A[255:192] * __B[255:192]) + __C[255:192] /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VFMADDSUB213PD instruction. /// /// \param __A /// A 256-bit vector of [4 x double] containing the multiplicand. /// \param __B /// A 256-bit vector of [4 x double] containing the multiplier. /// \param __C /// A 256-bit vector of [4 x double] containing the addend/subtrahend. /// \returns A 256-bit vector of [4 x double] containing the result. static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C); } /// Computes a vector multiply with alternating add/subtract of 256-bit /// vectors of [8 x float]. /// /// \code{.operation} /// result[31:0] = (__A[31:0] * __B[31:0]) + __C[31:0] /// result[63:32] = (__A[63:32] * __B[63:32]) - __C[63:32] /// result[95:64] = (__A[95:64] * __B[95:64]) + __C[95:64] /// result[127:96] = (__A[127:96] * __B[127:96]) - __C[127:96] /// result[159:128] = (__A[159:128] * __B[159:128]) + __C[159:128] /// result[191:160] = (__A[191:160] * __B[191:160]) - __C[191:160] /// result[223:192] = (__A[223:192] * __B[223:192]) + __C[223:192] /// result[255:224] = (__A[255:224] * __B[255:224]) - __C[255:224] /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VFMSUBADD213PS instruction. /// /// \param __A /// A 256-bit vector of [8 x float] containing the multiplicand. /// \param __B /// A 256-bit vector of [8 x float] containing the multiplier. /// \param __C /// A 256-bit vector of [8 x float] containing the addend/subtrahend. /// \returns A 256-bit vector of [8 x float] containing the result. static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); } /// Computes a vector multiply with alternating add/subtract of 256-bit /// vectors of [4 x double]. /// /// \code{.operation} /// result[63:0] = (__A[63:0] * __B[63:0]) + __C[63:0] /// result[127:64] = (__A[127:64] * __B[127:64]) - __C[127:64] /// result[191:128] = (__A[191:128] * __B[191:128]) + __C[191:128] /// result[255:192] = (__A[255:192] * __B[255:192]) - __C[255:192] /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VFMSUBADD213PD instruction. /// /// \param __A /// A 256-bit vector of [4 x double] containing the multiplicand. /// \param __B /// A 256-bit vector of [4 x double] containing the multiplier. /// \param __C /// A 256-bit vector of [4 x double] containing the addend/subtrahend. /// \returns A 256-bit vector of [4 x double] containing the result. static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C); } #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 #endif /* __FMAINTRIN_H */ //===--- opencl-c.h - OpenCL C language builtin function header -----------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef _OPENCL_H_ #define _OPENCL_H_ #include "opencl-c-base.h" #if defined(__opencl_c_images) #ifndef cl_khr_depth_images #define cl_khr_depth_images #endif //cl_khr_depth_images #endif //defined(__opencl_c_images) #if __OPENCL_C_VERSION__ < CL_VERSION_2_0 #ifdef cl_khr_3d_image_writes #pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable #endif //cl_khr_3d_image_writes #endif //__OPENCL_C_VERSION__ < CL_VERSION_2_0 #if (defined(__OPENCL_CPP_VERSION__) || \ (__OPENCL_C_VERSION__ >= CL_VERSION_1_2)) && \ (defined(__SPIR__) || defined(__SPIRV__)) #pragma OPENCL EXTENSION cl_intel_planar_yuv : begin #pragma OPENCL EXTENSION cl_intel_planar_yuv : end #endif // (defined(__OPENCL_CPP_VERSION__) || // (__OPENCL_C_VERSION__ >= CL_VERSION_1_2)) && // (defined(__SPIR__) || defined(__SPIRV__)) #define __ovld __attribute__((overloadable)) #define __conv __attribute__((convergent)) // Optimizations #define __purefn __attribute__((pure)) #define __cnfn __attribute__((const)) // OpenCL v1.1/1.2/2.0 s6.2.3 - Explicit conversions char __ovld __cnfn convert_char_rte(char); char __ovld __cnfn convert_char_sat_rte(char); char __ovld __cnfn convert_char_rtz(char); char __ovld __cnfn convert_char_sat_rtz(char); char __ovld __cnfn convert_char_rtp(char); char __ovld __cnfn convert_char_sat_rtp(char); char __ovld __cnfn convert_char_rtn(char); char __ovld __cnfn convert_char_sat_rtn(char); char __ovld __cnfn convert_char(char); char __ovld __cnfn convert_char_sat(char); char __ovld __cnfn convert_char_rte(uchar); char __ovld __cnfn convert_char_sat_rte(uchar); char __ovld __cnfn convert_char_rtz(uchar); char __ovld __cnfn convert_char_sat_rtz(uchar); char __ovld __cnfn convert_char_rtp(uchar); char __ovld __cnfn convert_char_sat_rtp(uchar); char __ovld __cnfn convert_char_rtn(uchar); char __ovld __cnfn convert_char_sat_rtn(uchar); char __ovld __cnfn convert_char(uchar); char __ovld __cnfn convert_char_sat(uchar); char __ovld __cnfn convert_char_rte(short); char __ovld __cnfn convert_char_sat_rte(short); char __ovld __cnfn convert_char_rtz(short); char __ovld __cnfn convert_char_sat_rtz(short); char __ovld __cnfn convert_char_rtp(short); char __ovld __cnfn convert_char_sat_rtp(short); char __ovld __cnfn convert_char_rtn(short); char __ovld __cnfn convert_char_sat_rtn(short); char __ovld __cnfn convert_char(short); char __ovld __cnfn convert_char_sat(short); char __ovld __cnfn convert_char_rte(ushort); char __ovld __cnfn convert_char_sat_rte(ushort); char __ovld __cnfn convert_char_rtz(ushort); char __ovld __cnfn convert_char_sat_rtz(ushort); char __ovld __cnfn convert_char_rtp(ushort); char __ovld __cnfn convert_char_sat_rtp(ushort); char __ovld __cnfn convert_char_rtn(ushort); char __ovld __cnfn convert_char_sat_rtn(ushort); char __ovld __cnfn convert_char(ushort); char __ovld __cnfn convert_char_sat(ushort); char __ovld __cnfn convert_char_rte(int); char __ovld __cnfn convert_char_sat_rte(int); char __ovld __cnfn convert_char_rtz(int); char __ovld __cnfn convert_char_sat_rtz(int); char __ovld __cnfn convert_char_rtp(int); char __ovld __cnfn convert_char_sat_rtp(int); char __ovld __cnfn convert_char_rtn(int); char __ovld __cnfn convert_char_sat_rtn(int); char __ovld __cnfn convert_char(int); char __ovld __cnfn convert_char_sat(int); char __ovld __cnfn convert_char_rte(uint); char __ovld __cnfn convert_char_sat_rte(uint); char __ovld __cnfn convert_char_rtz(uint); char __ovld __cnfn convert_char_sat_rtz(uint); char __ovld __cnfn convert_char_rtp(uint); char __ovld __cnfn convert_char_sat_rtp(uint); char __ovld __cnfn convert_char_rtn(uint); char __ovld __cnfn convert_char_sat_rtn(uint); char __ovld __cnfn convert_char(uint); char __ovld __cnfn convert_char_sat(uint); char __ovld __cnfn convert_char_rte(long); char __ovld __cnfn convert_char_sat_rte(long); char __ovld __cnfn convert_char_rtz(long); char __ovld __cnfn convert_char_sat_rtz(long); char __ovld __cnfn convert_char_rtp(long); char __ovld __cnfn convert_char_sat_rtp(long); char __ovld __cnfn convert_char_rtn(long); char __ovld __cnfn convert_char_sat_rtn(long); char __ovld __cnfn convert_char(long); char __ovld __cnfn convert_char_sat(long); char __ovld __cnfn convert_char_rte(ulong); char __ovld __cnfn convert_char_sat_rte(ulong); char __ovld __cnfn convert_char_rtz(ulong); char __ovld __cnfn convert_char_sat_rtz(ulong); char __ovld __cnfn convert_char_rtp(ulong); char __ovld __cnfn convert_char_sat_rtp(ulong); char __ovld __cnfn convert_char_rtn(ulong); char __ovld __cnfn convert_char_sat_rtn(ulong); char __ovld __cnfn convert_char(ulong); char __ovld __cnfn convert_char_sat(ulong); char __ovld __cnfn convert_char_rte(float); char __ovld __cnfn convert_char_sat_rte(float); char __ovld __cnfn convert_char_rtz(float); char __ovld __cnfn convert_char_sat_rtz(float); char __ovld __cnfn convert_char_rtp(float); char __ovld __cnfn convert_char_sat_rtp(float); char __ovld __cnfn convert_char_rtn(float); char __ovld __cnfn convert_char_sat_rtn(float); char __ovld __cnfn convert_char(float); char __ovld __cnfn convert_char_sat(float); uchar __ovld __cnfn convert_uchar_rte(char); uchar __ovld __cnfn convert_uchar_sat_rte(char); uchar __ovld __cnfn convert_uchar_rtz(char); uchar __ovld __cnfn convert_uchar_sat_rtz(char); uchar __ovld __cnfn convert_uchar_rtp(char); uchar __ovld __cnfn convert_uchar_sat_rtp(char); uchar __ovld __cnfn convert_uchar_rtn(char); uchar __ovld __cnfn convert_uchar_sat_rtn(char); uchar __ovld __cnfn convert_uchar(char); uchar __ovld __cnfn convert_uchar_sat(char); uchar __ovld __cnfn convert_uchar_rte(uchar); uchar __ovld __cnfn convert_uchar_sat_rte(uchar); uchar __ovld __cnfn convert_uchar_rtz(uchar); uchar __ovld __cnfn convert_uchar_sat_rtz(uchar); uchar __ovld __cnfn convert_uchar_rtp(uchar); uchar __ovld __cnfn convert_uchar_sat_rtp(uchar); uchar __ovld __cnfn convert_uchar_rtn(uchar); uchar __ovld __cnfn convert_uchar_sat_rtn(uchar); uchar __ovld __cnfn convert_uchar(uchar); uchar __ovld __cnfn convert_uchar_sat(uchar); uchar __ovld __cnfn convert_uchar_rte(short); uchar __ovld __cnfn convert_uchar_sat_rte(short); uchar __ovld __cnfn convert_uchar_rtz(short); uchar __ovld __cnfn convert_uchar_sat_rtz(short); uchar __ovld __cnfn convert_uchar_rtp(short); uchar __ovld __cnfn convert_uchar_sat_rtp(short); uchar __ovld __cnfn convert_uchar_rtn(short); uchar __ovld __cnfn convert_uchar_sat_rtn(short); uchar __ovld __cnfn convert_uchar(short); uchar __ovld __cnfn convert_uchar_sat(short); uchar __ovld __cnfn convert_uchar_rte(ushort); uchar __ovld __cnfn convert_uchar_sat_rte(ushort); uchar __ovld __cnfn convert_uchar_rtz(ushort); uchar __ovld __cnfn convert_uchar_sat_rtz(ushort); uchar __ovld __cnfn convert_uchar_rtp(ushort); uchar __ovld __cnfn convert_uchar_sat_rtp(ushort); uchar __ovld __cnfn convert_uchar_rtn(ushort); uchar __ovld __cnfn convert_uchar_sat_rtn(ushort); uchar __ovld __cnfn convert_uchar(ushort); uchar __ovld __cnfn convert_uchar_sat(ushort); uchar __ovld __cnfn convert_uchar_rte(int); uchar __ovld __cnfn convert_uchar_sat_rte(int); uchar __ovld __cnfn convert_uchar_rtz(int); uchar __ovld __cnfn convert_uchar_sat_rtz(int); uchar __ovld __cnfn convert_uchar_rtp(int); uchar __ovld __cnfn convert_uchar_sat_rtp(int); uchar __ovld __cnfn convert_uchar_rtn(int); uchar __ovld __cnfn convert_uchar_sat_rtn(int); uchar __ovld __cnfn convert_uchar(int); uchar __ovld __cnfn convert_uchar_sat(int); uchar __ovld __cnfn convert_uchar_rte(uint); uchar __ovld __cnfn convert_uchar_sat_rte(uint); uchar __ovld __cnfn convert_uchar_rtz(uint); uchar __ovld __cnfn convert_uchar_sat_rtz(uint); uchar __ovld __cnfn convert_uchar_rtp(uint); uchar __ovld __cnfn convert_uchar_sat_rtp(uint); uchar __ovld __cnfn convert_uchar_rtn(uint); uchar __ovld __cnfn convert_uchar_sat_rtn(uint); uchar __ovld __cnfn convert_uchar(uint); uchar __ovld __cnfn convert_uchar_sat(uint); uchar __ovld __cnfn convert_uchar_rte(long); uchar __ovld __cnfn convert_uchar_sat_rte(long); uchar __ovld __cnfn convert_uchar_rtz(long); uchar __ovld __cnfn convert_uchar_sat_rtz(long); uchar __ovld __cnfn convert_uchar_rtp(long); uchar __ovld __cnfn convert_uchar_sat_rtp(long); uchar __ovld __cnfn convert_uchar_rtn(long); uchar __ovld __cnfn convert_uchar_sat_rtn(long); uchar __ovld __cnfn convert_uchar(long); uchar __ovld __cnfn convert_uchar_sat(long); uchar __ovld __cnfn convert_uchar_rte(ulong); uchar __ovld __cnfn convert_uchar_sat_rte(ulong); uchar __ovld __cnfn convert_uchar_rtz(ulong); uchar __ovld __cnfn convert_uchar_sat_rtz(ulong); uchar __ovld __cnfn convert_uchar_rtp(ulong); uchar __ovld __cnfn convert_uchar_sat_rtp(ulong); uchar __ovld __cnfn convert_uchar_rtn(ulong); uchar __ovld __cnfn convert_uchar_sat_rtn(ulong); uchar __ovld __cnfn convert_uchar(ulong); uchar __ovld __cnfn convert_uchar_sat(ulong); uchar __ovld __cnfn convert_uchar_rte(float); uchar __ovld __cnfn convert_uchar_sat_rte(float); uchar __ovld __cnfn convert_uchar_rtz(float); uchar __ovld __cnfn convert_uchar_sat_rtz(float); uchar __ovld __cnfn convert_uchar_rtp(float); uchar __ovld __cnfn convert_uchar_sat_rtp(float); uchar __ovld __cnfn convert_uchar_rtn(float); uchar __ovld __cnfn convert_uchar_sat_rtn(float); uchar __ovld __cnfn convert_uchar(float); uchar __ovld __cnfn convert_uchar_sat(float); short __ovld __cnfn convert_short_rte(char); short __ovld __cnfn convert_short_sat_rte(char); short __ovld __cnfn convert_short_rtz(char); short __ovld __cnfn convert_short_sat_rtz(char); short __ovld __cnfn convert_short_rtp(char); short __ovld __cnfn convert_short_sat_rtp(char); short __ovld __cnfn convert_short_rtn(char); short __ovld __cnfn convert_short_sat_rtn(char); short __ovld __cnfn convert_short(char); short __ovld __cnfn convert_short_sat(char); short __ovld __cnfn convert_short_rte(uchar); short __ovld __cnfn convert_short_sat_rte(uchar); short __ovld __cnfn convert_short_rtz(uchar); short __ovld __cnfn convert_short_sat_rtz(uchar); short __ovld __cnfn convert_short_rtp(uchar); short __ovld __cnfn convert_short_sat_rtp(uchar); short __ovld __cnfn convert_short_rtn(uchar); short __ovld __cnfn convert_short_sat_rtn(uchar); short __ovld __cnfn convert_short(uchar); short __ovld __cnfn convert_short_sat(uchar); short __ovld __cnfn convert_short_rte(short); short __ovld __cnfn convert_short_sat_rte(short); short __ovld __cnfn convert_short_rtz(short); short __ovld __cnfn convert_short_sat_rtz(short); short __ovld __cnfn convert_short_rtp(short); short __ovld __cnfn convert_short_sat_rtp(short); short __ovld __cnfn convert_short_rtn(short); short __ovld __cnfn convert_short_sat_rtn(short); short __ovld __cnfn convert_short(short); short __ovld __cnfn convert_short_sat(short); short __ovld __cnfn convert_short_rte(ushort); short __ovld __cnfn convert_short_sat_rte(ushort); short __ovld __cnfn convert_short_rtz(ushort); short __ovld __cnfn convert_short_sat_rtz(ushort); short __ovld __cnfn convert_short_rtp(ushort); short __ovld __cnfn convert_short_sat_rtp(ushort); short __ovld __cnfn convert_short_rtn(ushort); short __ovld __cnfn convert_short_sat_rtn(ushort); short __ovld __cnfn convert_short(ushort); short __ovld __cnfn convert_short_sat(ushort); short __ovld __cnfn convert_short_rte(int); short __ovld __cnfn convert_short_sat_rte(int); short __ovld __cnfn convert_short_rtz(int); short __ovld __cnfn convert_short_sat_rtz(int); short __ovld __cnfn convert_short_rtp(int); short __ovld __cnfn convert_short_sat_rtp(int); short __ovld __cnfn convert_short_rtn(int); short __ovld __cnfn convert_short_sat_rtn(int); short __ovld __cnfn convert_short(int); short __ovld __cnfn convert_short_sat(int); short __ovld __cnfn convert_short_rte(uint); short __ovld __cnfn convert_short_sat_rte(uint); short __ovld __cnfn convert_short_rtz(uint); short __ovld __cnfn convert_short_sat_rtz(uint); short __ovld __cnfn convert_short_rtp(uint); short __ovld __cnfn convert_short_sat_rtp(uint); short __ovld __cnfn convert_short_rtn(uint); short __ovld __cnfn convert_short_sat_rtn(uint); short __ovld __cnfn convert_short(uint); short __ovld __cnfn convert_short_sat(uint); short __ovld __cnfn convert_short_rte(long); short __ovld __cnfn convert_short_sat_rte(long); short __ovld __cnfn convert_short_rtz(long); short __ovld __cnfn convert_short_sat_rtz(long); short __ovld __cnfn convert_short_rtp(long); short __ovld __cnfn convert_short_sat_rtp(long); short __ovld __cnfn convert_short_rtn(long); short __ovld __cnfn convert_short_sat_rtn(long); short __ovld __cnfn convert_short(long); short __ovld __cnfn convert_short_sat(long); short __ovld __cnfn convert_short_rte(ulong); short __ovld __cnfn convert_short_sat_rte(ulong); short __ovld __cnfn convert_short_rtz(ulong); short __ovld __cnfn convert_short_sat_rtz(ulong); short __ovld __cnfn convert_short_rtp(ulong); short __ovld __cnfn convert_short_sat_rtp(ulong); short __ovld __cnfn convert_short_rtn(ulong); short __ovld __cnfn convert_short_sat_rtn(ulong); short __ovld __cnfn convert_short(ulong); short __ovld __cnfn convert_short_sat(ulong); short __ovld __cnfn convert_short_rte(float); short __ovld __cnfn convert_short_sat_rte(float); short __ovld __cnfn convert_short_rtz(float); short __ovld __cnfn convert_short_sat_rtz(float); short __ovld __cnfn convert_short_rtp(float); short __ovld __cnfn convert_short_sat_rtp(float); short __ovld __cnfn convert_short_rtn(float); short __ovld __cnfn convert_short_sat_rtn(float); short __ovld __cnfn convert_short(float); short __ovld __cnfn convert_short_sat(float); ushort __ovld __cnfn convert_ushort_rte(char); ushort __ovld __cnfn convert_ushort_sat_rte(char); ushort __ovld __cnfn convert_ushort_rtz(char); ushort __ovld __cnfn convert_ushort_sat_rtz(char); ushort __ovld __cnfn convert_ushort_rtp(char); ushort __ovld __cnfn convert_ushort_sat_rtp(char); ushort __ovld __cnfn convert_ushort_rtn(char); ushort __ovld __cnfn convert_ushort_sat_rtn(char); ushort __ovld __cnfn convert_ushort(char); ushort __ovld __cnfn convert_ushort_sat(char); ushort __ovld __cnfn convert_ushort_rte(uchar); ushort __ovld __cnfn convert_ushort_sat_rte(uchar); ushort __ovld __cnfn convert_ushort_rtz(uchar); ushort __ovld __cnfn convert_ushort_sat_rtz(uchar); ushort __ovld __cnfn convert_ushort_rtp(uchar); ushort __ovld __cnfn convert_ushort_sat_rtp(uchar); ushort __ovld __cnfn convert_ushort_rtn(uchar); ushort __ovld __cnfn convert_ushort_sat_rtn(uchar); ushort __ovld __cnfn convert_ushort(uchar); ushort __ovld __cnfn convert_ushort_sat(uchar); ushort __ovld __cnfn convert_ushort_rte(short); ushort __ovld __cnfn convert_ushort_sat_rte(short); ushort __ovld __cnfn convert_ushort_rtz(short); ushort __ovld __cnfn convert_ushort_sat_rtz(short); ushort __ovld __cnfn convert_ushort_rtp(short); ushort __ovld __cnfn convert_ushort_sat_rtp(short); ushort __ovld __cnfn convert_ushort_rtn(short); ushort __ovld __cnfn convert_ushort_sat_rtn(short); ushort __ovld __cnfn convert_ushort(short); ushort __ovld __cnfn convert_ushort_sat(short); ushort __ovld __cnfn convert_ushort_rte(ushort); ushort __ovld __cnfn convert_ushort_sat_rte(ushort); ushort __ovld __cnfn convert_ushort_rtz(ushort); ushort __ovld __cnfn convert_ushort_sat_rtz(ushort); ushort __ovld __cnfn convert_ushort_rtp(ushort); ushort __ovld __cnfn convert_ushort_sat_rtp(ushort); ushort __ovld __cnfn convert_ushort_rtn(ushort); ushort __ovld __cnfn convert_ushort_sat_rtn(ushort); ushort __ovld __cnfn convert_ushort(ushort); ushort __ovld __cnfn convert_ushort_sat(ushort); ushort __ovld __cnfn convert_ushort_rte(int); ushort __ovld __cnfn convert_ushort_sat_rte(int); ushort __ovld __cnfn convert_ushort_rtz(int); ushort __ovld __cnfn convert_ushort_sat_rtz(int); ushort __ovld __cnfn convert_ushort_rtp(int); ushort __ovld __cnfn convert_ushort_sat_rtp(int); ushort __ovld __cnfn convert_ushort_rtn(int); ushort __ovld __cnfn convert_ushort_sat_rtn(int); ushort __ovld __cnfn convert_ushort(int); ushort __ovld __cnfn convert_ushort_sat(int); ushort __ovld __cnfn convert_ushort_rte(uint); ushort __ovld __cnfn convert_ushort_sat_rte(uint); ushort __ovld __cnfn convert_ushort_rtz(uint); ushort __ovld __cnfn convert_ushort_sat_rtz(uint); ushort __ovld __cnfn convert_ushort_rtp(uint); ushort __ovld __cnfn convert_ushort_sat_rtp(uint); ushort __ovld __cnfn convert_ushort_rtn(uint); ushort __ovld __cnfn convert_ushort_sat_rtn(uint); ushort __ovld __cnfn convert_ushort(uint); ushort __ovld __cnfn convert_ushort_sat(uint); ushort __ovld __cnfn convert_ushort_rte(long); ushort __ovld __cnfn convert_ushort_sat_rte(long); ushort __ovld __cnfn convert_ushort_rtz(long); ushort __ovld __cnfn convert_ushort_sat_rtz(long); ushort __ovld __cnfn convert_ushort_rtp(long); ushort __ovld __cnfn convert_ushort_sat_rtp(long); ushort __ovld __cnfn convert_ushort_rtn(long); ushort __ovld __cnfn convert_ushort_sat_rtn(long); ushort __ovld __cnfn convert_ushort(long); ushort __ovld __cnfn convert_ushort_sat(long); ushort __ovld __cnfn convert_ushort_rte(ulong); ushort __ovld __cnfn convert_ushort_sat_rte(ulong); ushort __ovld __cnfn convert_ushort_rtz(ulong); ushort __ovld __cnfn convert_ushort_sat_rtz(ulong); ushort __ovld __cnfn convert_ushort_rtp(ulong); ushort __ovld __cnfn convert_ushort_sat_rtp(ulong); ushort __ovld __cnfn convert_ushort_rtn(ulong); ushort __ovld __cnfn convert_ushort_sat_rtn(ulong); ushort __ovld __cnfn convert_ushort(ulong); ushort __ovld __cnfn convert_ushort_sat(ulong); ushort __ovld __cnfn convert_ushort_rte(float); ushort __ovld __cnfn convert_ushort_sat_rte(float); ushort __ovld __cnfn convert_ushort_rtz(float); ushort __ovld __cnfn convert_ushort_sat_rtz(float); ushort __ovld __cnfn convert_ushort_rtp(float); ushort __ovld __cnfn convert_ushort_sat_rtp(float); ushort __ovld __cnfn convert_ushort_rtn(float); ushort __ovld __cnfn convert_ushort_sat_rtn(float); ushort __ovld __cnfn convert_ushort(float); ushort __ovld __cnfn convert_ushort_sat(float); int __ovld __cnfn convert_int_rte(char); int __ovld __cnfn convert_int_sat_rte(char); int __ovld __cnfn convert_int_rtz(char); int __ovld __cnfn convert_int_sat_rtz(char); int __ovld __cnfn convert_int_rtp(char); int __ovld __cnfn convert_int_sat_rtp(char); int __ovld __cnfn convert_int_rtn(char); int __ovld __cnfn convert_int_sat_rtn(char); int __ovld __cnfn convert_int(char); int __ovld __cnfn convert_int_sat(char); int __ovld __cnfn convert_int_rte(uchar); int __ovld __cnfn convert_int_sat_rte(uchar); int __ovld __cnfn convert_int_rtz(uchar); int __ovld __cnfn convert_int_sat_rtz(uchar); int __ovld __cnfn convert_int_rtp(uchar); int __ovld __cnfn convert_int_sat_rtp(uchar); int __ovld __cnfn convert_int_rtn(uchar); int __ovld __cnfn convert_int_sat_rtn(uchar); int __ovld __cnfn convert_int(uchar); int __ovld __cnfn convert_int_sat(uchar); int __ovld __cnfn convert_int_rte(short); int __ovld __cnfn convert_int_sat_rte(short); int __ovld __cnfn convert_int_rtz(short); int __ovld __cnfn convert_int_sat_rtz(short); int __ovld __cnfn convert_int_rtp(short); int __ovld __cnfn convert_int_sat_rtp(short); int __ovld __cnfn convert_int_rtn(short); int __ovld __cnfn convert_int_sat_rtn(short); int __ovld __cnfn convert_int(short); int __ovld __cnfn convert_int_sat(short); int __ovld __cnfn convert_int_rte(ushort); int __ovld __cnfn convert_int_sat_rte(ushort); int __ovld __cnfn convert_int_rtz(ushort); int __ovld __cnfn convert_int_sat_rtz(ushort); int __ovld __cnfn convert_int_rtp(ushort); int __ovld __cnfn convert_int_sat_rtp(ushort); int __ovld __cnfn convert_int_rtn(ushort); int __ovld __cnfn convert_int_sat_rtn(ushort); int __ovld __cnfn convert_int(ushort); int __ovld __cnfn convert_int_sat(ushort); int __ovld __cnfn convert_int_rte(int); int __ovld __cnfn convert_int_sat_rte(int); int __ovld __cnfn convert_int_rtz(int); int __ovld __cnfn convert_int_sat_rtz(int); int __ovld __cnfn convert_int_rtp(int); int __ovld __cnfn convert_int_sat_rtp(int); int __ovld __cnfn convert_int_rtn(int); int __ovld __cnfn convert_int_sat_rtn(int); int __ovld __cnfn convert_int(int); int __ovld __cnfn convert_int_sat(int); int __ovld __cnfn convert_int_rte(uint); int __ovld __cnfn convert_int_sat_rte(uint); int __ovld __cnfn convert_int_rtz(uint); int __ovld __cnfn convert_int_sat_rtz(uint); int __ovld __cnfn convert_int_rtp(uint); int __ovld __cnfn convert_int_sat_rtp(uint); int __ovld __cnfn convert_int_rtn(uint); int __ovld __cnfn convert_int_sat_rtn(uint); int __ovld __cnfn convert_int(uint); int __ovld __cnfn convert_int_sat(uint); int __ovld __cnfn convert_int_rte(long); int __ovld __cnfn convert_int_sat_rte(long); int __ovld __cnfn convert_int_rtz(long); int __ovld __cnfn convert_int_sat_rtz(long); int __ovld __cnfn convert_int_rtp(long); int __ovld __cnfn convert_int_sat_rtp(long); int __ovld __cnfn convert_int_rtn(long); int __ovld __cnfn convert_int_sat_rtn(long); int __ovld __cnfn convert_int(long); int __ovld __cnfn convert_int_sat(long); int __ovld __cnfn convert_int_rte(ulong); int __ovld __cnfn convert_int_sat_rte(ulong); int __ovld __cnfn convert_int_rtz(ulong); int __ovld __cnfn convert_int_sat_rtz(ulong); int __ovld __cnfn convert_int_rtp(ulong); int __ovld __cnfn convert_int_sat_rtp(ulong); int __ovld __cnfn convert_int_rtn(ulong); int __ovld __cnfn convert_int_sat_rtn(ulong); int __ovld __cnfn convert_int(ulong); int __ovld __cnfn convert_int_sat(ulong); int __ovld __cnfn convert_int_rte(float); int __ovld __cnfn convert_int_sat_rte(float); int __ovld __cnfn convert_int_rtz(float); int __ovld __cnfn convert_int_sat_rtz(float); int __ovld __cnfn convert_int_rtp(float); int __ovld __cnfn convert_int_sat_rtp(float); int __ovld __cnfn convert_int_rtn(float); int __ovld __cnfn convert_int_sat_rtn(float); int __ovld __cnfn convert_int(float); int __ovld __cnfn convert_int_sat(float); uint __ovld __cnfn convert_uint_rte(char); uint __ovld __cnfn convert_uint_sat_rte(char); uint __ovld __cnfn convert_uint_rtz(char); uint __ovld __cnfn convert_uint_sat_rtz(char); uint __ovld __cnfn convert_uint_rtp(char); uint __ovld __cnfn convert_uint_sat_rtp(char); uint __ovld __cnfn convert_uint_rtn(char); uint __ovld __cnfn convert_uint_sat_rtn(char); uint __ovld __cnfn convert_uint(char); uint __ovld __cnfn convert_uint_sat(char); uint __ovld __cnfn convert_uint_rte(uchar); uint __ovld __cnfn convert_uint_sat_rte(uchar); uint __ovld __cnfn convert_uint_rtz(uchar); uint __ovld __cnfn convert_uint_sat_rtz(uchar); uint __ovld __cnfn convert_uint_rtp(uchar); uint __ovld __cnfn convert_uint_sat_rtp(uchar); uint __ovld __cnfn convert_uint_rtn(uchar); uint __ovld __cnfn convert_uint_sat_rtn(uchar); uint __ovld __cnfn convert_uint(uchar); uint __ovld __cnfn convert_uint_sat(uchar); uint __ovld __cnfn convert_uint_rte(short); uint __ovld __cnfn convert_uint_sat_rte(short); uint __ovld __cnfn convert_uint_rtz(short); uint __ovld __cnfn convert_uint_sat_rtz(short); uint __ovld __cnfn convert_uint_rtp(short); uint __ovld __cnfn convert_uint_sat_rtp(short); uint __ovld __cnfn convert_uint_rtn(short); uint __ovld __cnfn convert_uint_sat_rtn(short); uint __ovld __cnfn convert_uint(short); uint __ovld __cnfn convert_uint_sat(short); uint __ovld __cnfn convert_uint_rte(ushort); uint __ovld __cnfn convert_uint_sat_rte(ushort); uint __ovld __cnfn convert_uint_rtz(ushort); uint __ovld __cnfn convert_uint_sat_rtz(ushort); uint __ovld __cnfn convert_uint_rtp(ushort); uint __ovld __cnfn convert_uint_sat_rtp(ushort); uint __ovld __cnfn convert_uint_rtn(ushort); uint __ovld __cnfn convert_uint_sat_rtn(ushort); uint __ovld __cnfn convert_uint(ushort); uint __ovld __cnfn convert_uint_sat(ushort); uint __ovld __cnfn convert_uint_rte(int); uint __ovld __cnfn convert_uint_sat_rte(int); uint __ovld __cnfn convert_uint_rtz(int); uint __ovld __cnfn convert_uint_sat_rtz(int); uint __ovld __cnfn convert_uint_rtp(int); uint __ovld __cnfn convert_uint_sat_rtp(int); uint __ovld __cnfn convert_uint_rtn(int); uint __ovld __cnfn convert_uint_sat_rtn(int); uint __ovld __cnfn convert_uint(int); uint __ovld __cnfn convert_uint_sat(int); uint __ovld __cnfn convert_uint_rte(uint); uint __ovld __cnfn convert_uint_sat_rte(uint); uint __ovld __cnfn convert_uint_rtz(uint); uint __ovld __cnfn convert_uint_sat_rtz(uint); uint __ovld __cnfn convert_uint_rtp(uint); uint __ovld __cnfn convert_uint_sat_rtp(uint); uint __ovld __cnfn convert_uint_rtn(uint); uint __ovld __cnfn convert_uint_sat_rtn(uint); uint __ovld __cnfn convert_uint(uint); uint __ovld __cnfn convert_uint_sat(uint); uint __ovld __cnfn convert_uint_rte(long); uint __ovld __cnfn convert_uint_sat_rte(long); uint __ovld __cnfn convert_uint_rtz(long); uint __ovld __cnfn convert_uint_sat_rtz(long); uint __ovld __cnfn convert_uint_rtp(long); uint __ovld __cnfn convert_uint_sat_rtp(long); uint __ovld __cnfn convert_uint_rtn(long); uint __ovld __cnfn convert_uint_sat_rtn(long); uint __ovld __cnfn convert_uint(long); uint __ovld __cnfn convert_uint_sat(long); uint __ovld __cnfn convert_uint_rte(ulong); uint __ovld __cnfn convert_uint_sat_rte(ulong); uint __ovld __cnfn convert_uint_rtz(ulong); uint __ovld __cnfn convert_uint_sat_rtz(ulong); uint __ovld __cnfn convert_uint_rtp(ulong); uint __ovld __cnfn convert_uint_sat_rtp(ulong); uint __ovld __cnfn convert_uint_rtn(ulong); uint __ovld __cnfn convert_uint_sat_rtn(ulong); uint __ovld __cnfn convert_uint(ulong); uint __ovld __cnfn convert_uint_sat(ulong); uint __ovld __cnfn convert_uint_rte(float); uint __ovld __cnfn convert_uint_sat_rte(float); uint __ovld __cnfn convert_uint_rtz(float); uint __ovld __cnfn convert_uint_sat_rtz(float); uint __ovld __cnfn convert_uint_rtp(float); uint __ovld __cnfn convert_uint_sat_rtp(float); uint __ovld __cnfn convert_uint_rtn(float); uint __ovld __cnfn convert_uint_sat_rtn(float); uint __ovld __cnfn convert_uint(float); uint __ovld __cnfn convert_uint_sat(float); long __ovld __cnfn convert_long_rte(char); long __ovld __cnfn convert_long_sat_rte(char); long __ovld __cnfn convert_long_rtz(char); long __ovld __cnfn convert_long_sat_rtz(char); long __ovld __cnfn convert_long_rtp(char); long __ovld __cnfn convert_long_sat_rtp(char); long __ovld __cnfn convert_long_rtn(char); long __ovld __cnfn convert_long_sat_rtn(char); long __ovld __cnfn convert_long(char); long __ovld __cnfn convert_long_sat(char); long __ovld __cnfn convert_long_rte(uchar); long __ovld __cnfn convert_long_sat_rte(uchar); long __ovld __cnfn convert_long_rtz(uchar); long __ovld __cnfn convert_long_sat_rtz(uchar); long __ovld __cnfn convert_long_rtp(uchar); long __ovld __cnfn convert_long_sat_rtp(uchar); long __ovld __cnfn convert_long_rtn(uchar); long __ovld __cnfn convert_long_sat_rtn(uchar); long __ovld __cnfn convert_long(uchar); long __ovld __cnfn convert_long_sat(uchar); long __ovld __cnfn convert_long_rte(short); long __ovld __cnfn convert_long_sat_rte(short); long __ovld __cnfn convert_long_rtz(short); long __ovld __cnfn convert_long_sat_rtz(short); long __ovld __cnfn convert_long_rtp(short); long __ovld __cnfn convert_long_sat_rtp(short); long __ovld __cnfn convert_long_rtn(short); long __ovld __cnfn convert_long_sat_rtn(short); long __ovld __cnfn convert_long(short); long __ovld __cnfn convert_long_sat(short); long __ovld __cnfn convert_long_rte(ushort); long __ovld __cnfn convert_long_sat_rte(ushort); long __ovld __cnfn convert_long_rtz(ushort); long __ovld __cnfn convert_long_sat_rtz(ushort); long __ovld __cnfn convert_long_rtp(ushort); long __ovld __cnfn convert_long_sat_rtp(ushort); long __ovld __cnfn convert_long_rtn(ushort); long __ovld __cnfn convert_long_sat_rtn(ushort); long __ovld __cnfn convert_long(ushort); long __ovld __cnfn convert_long_sat(ushort); long __ovld __cnfn convert_long_rte(int); long __ovld __cnfn convert_long_sat_rte(int); long __ovld __cnfn convert_long_rtz(int); long __ovld __cnfn convert_long_sat_rtz(int); long __ovld __cnfn convert_long_rtp(int); long __ovld __cnfn convert_long_sat_rtp(int); long __ovld __cnfn convert_long_rtn(int); long __ovld __cnfn convert_long_sat_rtn(int); long __ovld __cnfn convert_long(int); long __ovld __cnfn convert_long_sat(int); long __ovld __cnfn convert_long_rte(uint); long __ovld __cnfn convert_long_sat_rte(uint); long __ovld __cnfn convert_long_rtz(uint); long __ovld __cnfn convert_long_sat_rtz(uint); long __ovld __cnfn convert_long_rtp(uint); long __ovld __cnfn convert_long_sat_rtp(uint); long __ovld __cnfn convert_long_rtn(uint); long __ovld __cnfn convert_long_sat_rtn(uint); long __ovld __cnfn convert_long(uint); long __ovld __cnfn convert_long_sat(uint); long __ovld __cnfn convert_long_rte(long); long __ovld __cnfn convert_long_sat_rte(long); long __ovld __cnfn convert_long_rtz(long); long __ovld __cnfn convert_long_sat_rtz(long); long __ovld __cnfn convert_long_rtp(long); long __ovld __cnfn convert_long_sat_rtp(long); long __ovld __cnfn convert_long_rtn(long); long __ovld __cnfn convert_long_sat_rtn(long); long __ovld __cnfn convert_long(long); long __ovld __cnfn convert_long_sat(long); long __ovld __cnfn convert_long_rte(ulong); long __ovld __cnfn convert_long_sat_rte(ulong); long __ovld __cnfn convert_long_rtz(ulong); long __ovld __cnfn convert_long_sat_rtz(ulong); long __ovld __cnfn convert_long_rtp(ulong); long __ovld __cnfn convert_long_sat_rtp(ulong); long __ovld __cnfn convert_long_rtn(ulong); long __ovld __cnfn convert_long_sat_rtn(ulong); long __ovld __cnfn convert_long(ulong); long __ovld __cnfn convert_long_sat(ulong); long __ovld __cnfn convert_long_rte(float); long __ovld __cnfn convert_long_sat_rte(float); long __ovld __cnfn convert_long_rtz(float); long __ovld __cnfn convert_long_sat_rtz(float); long __ovld __cnfn convert_long_rtp(float); long __ovld __cnfn convert_long_sat_rtp(float); long __ovld __cnfn convert_long_rtn(float); long __ovld __cnfn convert_long_sat_rtn(float); long __ovld __cnfn convert_long(float); long __ovld __cnfn convert_long_sat(float); ulong __ovld __cnfn convert_ulong_rte(char); ulong __ovld __cnfn convert_ulong_sat_rte(char); ulong __ovld __cnfn convert_ulong_rtz(char); ulong __ovld __cnfn convert_ulong_sat_rtz(char); ulong __ovld __cnfn convert_ulong_rtp(char); ulong __ovld __cnfn convert_ulong_sat_rtp(char); ulong __ovld __cnfn convert_ulong_rtn(char); ulong __ovld __cnfn convert_ulong_sat_rtn(char); ulong __ovld __cnfn convert_ulong(char); ulong __ovld __cnfn convert_ulong_sat(char); ulong __ovld __cnfn convert_ulong_rte(uchar); ulong __ovld __cnfn convert_ulong_sat_rte(uchar); ulong __ovld __cnfn convert_ulong_rtz(uchar); ulong __ovld __cnfn convert_ulong_sat_rtz(uchar); ulong __ovld __cnfn convert_ulong_rtp(uchar); ulong __ovld __cnfn convert_ulong_sat_rtp(uchar); ulong __ovld __cnfn convert_ulong_rtn(uchar); ulong __ovld __cnfn convert_ulong_sat_rtn(uchar); ulong __ovld __cnfn convert_ulong(uchar); ulong __ovld __cnfn convert_ulong_sat(uchar); ulong __ovld __cnfn convert_ulong_rte(short); ulong __ovld __cnfn convert_ulong_sat_rte(short); ulong __ovld __cnfn convert_ulong_rtz(short); ulong __ovld __cnfn convert_ulong_sat_rtz(short); ulong __ovld __cnfn convert_ulong_rtp(short); ulong __ovld __cnfn convert_ulong_sat_rtp(short); ulong __ovld __cnfn convert_ulong_rtn(short); ulong __ovld __cnfn convert_ulong_sat_rtn(short); ulong __ovld __cnfn convert_ulong(short); ulong __ovld __cnfn convert_ulong_sat(short); ulong __ovld __cnfn convert_ulong_rte(ushort); ulong __ovld __cnfn convert_ulong_sat_rte(ushort); ulong __ovld __cnfn convert_ulong_rtz(ushort); ulong __ovld __cnfn convert_ulong_sat_rtz(ushort); ulong __ovld __cnfn convert_ulong_rtp(ushort); ulong __ovld __cnfn convert_ulong_sat_rtp(ushort); ulong __ovld __cnfn convert_ulong_rtn(ushort); ulong __ovld __cnfn convert_ulong_sat_rtn(ushort); ulong __ovld __cnfn convert_ulong(ushort); ulong __ovld __cnfn convert_ulong_sat(ushort); ulong __ovld __cnfn convert_ulong_rte(int); ulong __ovld __cnfn convert_ulong_sat_rte(int); ulong __ovld __cnfn convert_ulong_rtz(int); ulong __ovld __cnfn convert_ulong_sat_rtz(int); ulong __ovld __cnfn convert_ulong_rtp(int); ulong __ovld __cnfn convert_ulong_sat_rtp(int); ulong __ovld __cnfn convert_ulong_rtn(int); ulong __ovld __cnfn convert_ulong_sat_rtn(int); ulong __ovld __cnfn convert_ulong(int); ulong __ovld __cnfn convert_ulong_sat(int); ulong __ovld __cnfn convert_ulong_rte(uint); ulong __ovld __cnfn convert_ulong_sat_rte(uint); ulong __ovld __cnfn convert_ulong_rtz(uint); ulong __ovld __cnfn convert_ulong_sat_rtz(uint); ulong __ovld __cnfn convert_ulong_rtp(uint); ulong __ovld __cnfn convert_ulong_sat_rtp(uint); ulong __ovld __cnfn convert_ulong_rtn(uint); ulong __ovld __cnfn convert_ulong_sat_rtn(uint); ulong __ovld __cnfn convert_ulong(uint); ulong __ovld __cnfn convert_ulong_sat(uint); ulong __ovld __cnfn convert_ulong_rte(long); ulong __ovld __cnfn convert_ulong_sat_rte(long); ulong __ovld __cnfn convert_ulong_rtz(long); ulong __ovld __cnfn convert_ulong_sat_rtz(long); ulong __ovld __cnfn convert_ulong_rtp(long); ulong __ovld __cnfn convert_ulong_sat_rtp(long); ulong __ovld __cnfn convert_ulong_rtn(long); ulong __ovld __cnfn convert_ulong_sat_rtn(long); ulong __ovld __cnfn convert_ulong(long); ulong __ovld __cnfn convert_ulong_sat(long); ulong __ovld __cnfn convert_ulong_rte(ulong); ulong __ovld __cnfn convert_ulong_sat_rte(ulong); ulong __ovld __cnfn convert_ulong_rtz(ulong); ulong __ovld __cnfn convert_ulong_sat_rtz(ulong); ulong __ovld __cnfn convert_ulong_rtp(ulong); ulong __ovld __cnfn convert_ulong_sat_rtp(ulong); ulong __ovld __cnfn convert_ulong_rtn(ulong); ulong __ovld __cnfn convert_ulong_sat_rtn(ulong); ulong __ovld __cnfn convert_ulong(ulong); ulong __ovld __cnfn convert_ulong_sat(ulong); ulong __ovld __cnfn convert_ulong_rte(float); ulong __ovld __cnfn convert_ulong_sat_rte(float); ulong __ovld __cnfn convert_ulong_rtz(float); ulong __ovld __cnfn convert_ulong_sat_rtz(float); ulong __ovld __cnfn convert_ulong_rtp(float); ulong __ovld __cnfn convert_ulong_sat_rtp(float); ulong __ovld __cnfn convert_ulong_rtn(float); ulong __ovld __cnfn convert_ulong_sat_rtn(float); ulong __ovld __cnfn convert_ulong(float); ulong __ovld __cnfn convert_ulong_sat(float); float __ovld __cnfn convert_float_rte(char); float __ovld __cnfn convert_float_rtz(char); float __ovld __cnfn convert_float_rtp(char); float __ovld __cnfn convert_float_rtn(char); float __ovld __cnfn convert_float(char); float __ovld __cnfn convert_float_rte(uchar); float __ovld __cnfn convert_float_rtz(uchar); float __ovld __cnfn convert_float_rtp(uchar); float __ovld __cnfn convert_float_rtn(uchar); float __ovld __cnfn convert_float(uchar); float __ovld __cnfn convert_float_rte(short); float __ovld __cnfn convert_float_rtz(short); float __ovld __cnfn convert_float_rtp(short); float __ovld __cnfn convert_float_rtn(short); float __ovld __cnfn convert_float(short); float __ovld __cnfn convert_float_rte(ushort); float __ovld __cnfn convert_float_rtz(ushort); float __ovld __cnfn convert_float_rtp(ushort); float __ovld __cnfn convert_float_rtn(ushort); float __ovld __cnfn convert_float(ushort); float __ovld __cnfn convert_float_rte(int); float __ovld __cnfn convert_float_rtz(int); float __ovld __cnfn convert_float_rtp(int); float __ovld __cnfn convert_float_rtn(int); float __ovld __cnfn convert_float(int); float __ovld __cnfn convert_float_rte(uint); float __ovld __cnfn convert_float_rtz(uint); float __ovld __cnfn convert_float_rtp(uint); float __ovld __cnfn convert_float_rtn(uint); float __ovld __cnfn convert_float(uint); float __ovld __cnfn convert_float_rte(long); float __ovld __cnfn convert_float_rtz(long); float __ovld __cnfn convert_float_rtp(long); float __ovld __cnfn convert_float_rtn(long); float __ovld __cnfn convert_float(long); float __ovld __cnfn convert_float_rte(ulong); float __ovld __cnfn convert_float_rtz(ulong); float __ovld __cnfn convert_float_rtp(ulong); float __ovld __cnfn convert_float_rtn(ulong); float __ovld __cnfn convert_float(ulong); float __ovld __cnfn convert_float_rte(float); float __ovld __cnfn convert_float_rtz(float); float __ovld __cnfn convert_float_rtp(float); float __ovld __cnfn convert_float_rtn(float); float __ovld __cnfn convert_float(float); char2 __ovld __cnfn convert_char2_rte(char2); char2 __ovld __cnfn convert_char2_sat_rte(char2); char2 __ovld __cnfn convert_char2_rtz(char2); char2 __ovld __cnfn convert_char2_sat_rtz(char2); char2 __ovld __cnfn convert_char2_rtp(char2); char2 __ovld __cnfn convert_char2_sat_rtp(char2); char2 __ovld __cnfn convert_char2_rtn(char2); char2 __ovld __cnfn convert_char2_sat_rtn(char2); char2 __ovld __cnfn convert_char2(char2); char2 __ovld __cnfn convert_char2_sat(char2); char2 __ovld __cnfn convert_char2_rte(uchar2); char2 __ovld __cnfn convert_char2_sat_rte(uchar2); char2 __ovld __cnfn convert_char2_rtz(uchar2); char2 __ovld __cnfn convert_char2_sat_rtz(uchar2); char2 __ovld __cnfn convert_char2_rtp(uchar2); char2 __ovld __cnfn convert_char2_sat_rtp(uchar2); char2 __ovld __cnfn convert_char2_rtn(uchar2); char2 __ovld __cnfn convert_char2_sat_rtn(uchar2); char2 __ovld __cnfn convert_char2(uchar2); char2 __ovld __cnfn convert_char2_sat(uchar2); char2 __ovld __cnfn convert_char2_rte(short2); char2 __ovld __cnfn convert_char2_sat_rte(short2); char2 __ovld __cnfn convert_char2_rtz(short2); char2 __ovld __cnfn convert_char2_sat_rtz(short2); char2 __ovld __cnfn convert_char2_rtp(short2); char2 __ovld __cnfn convert_char2_sat_rtp(short2); char2 __ovld __cnfn convert_char2_rtn(short2); char2 __ovld __cnfn convert_char2_sat_rtn(short2); char2 __ovld __cnfn convert_char2(short2); char2 __ovld __cnfn convert_char2_sat(short2); char2 __ovld __cnfn convert_char2_rte(ushort2); char2 __ovld __cnfn convert_char2_sat_rte(ushort2); char2 __ovld __cnfn convert_char2_rtz(ushort2); char2 __ovld __cnfn convert_char2_sat_rtz(ushort2); char2 __ovld __cnfn convert_char2_rtp(ushort2); char2 __ovld __cnfn convert_char2_sat_rtp(ushort2); char2 __ovld __cnfn convert_char2_rtn(ushort2); char2 __ovld __cnfn convert_char2_sat_rtn(ushort2); char2 __ovld __cnfn convert_char2(ushort2); char2 __ovld __cnfn convert_char2_sat(ushort2); char2 __ovld __cnfn convert_char2_rte(int2); char2 __ovld __cnfn convert_char2_sat_rte(int2); char2 __ovld __cnfn convert_char2_rtz(int2); char2 __ovld __cnfn convert_char2_sat_rtz(int2); char2 __ovld __cnfn convert_char2_rtp(int2); char2 __ovld __cnfn convert_char2_sat_rtp(int2); char2 __ovld __cnfn convert_char2_rtn(int2); char2 __ovld __cnfn convert_char2_sat_rtn(int2); char2 __ovld __cnfn convert_char2(int2); char2 __ovld __cnfn convert_char2_sat(int2); char2 __ovld __cnfn convert_char2_rte(uint2); char2 __ovld __cnfn convert_char2_sat_rte(uint2); char2 __ovld __cnfn convert_char2_rtz(uint2); char2 __ovld __cnfn convert_char2_sat_rtz(uint2); char2 __ovld __cnfn convert_char2_rtp(uint2); char2 __ovld __cnfn convert_char2_sat_rtp(uint2); char2 __ovld __cnfn convert_char2_rtn(uint2); char2 __ovld __cnfn convert_char2_sat_rtn(uint2); char2 __ovld __cnfn convert_char2(uint2); char2 __ovld __cnfn convert_char2_sat(uint2); char2 __ovld __cnfn convert_char2_rte(long2); char2 __ovld __cnfn convert_char2_sat_rte(long2); char2 __ovld __cnfn convert_char2_rtz(long2); char2 __ovld __cnfn convert_char2_sat_rtz(long2); char2 __ovld __cnfn convert_char2_rtp(long2); char2 __ovld __cnfn convert_char2_sat_rtp(long2); char2 __ovld __cnfn convert_char2_rtn(long2); char2 __ovld __cnfn convert_char2_sat_rtn(long2); char2 __ovld __cnfn convert_char2(long2); char2 __ovld __cnfn convert_char2_sat(long2); char2 __ovld __cnfn convert_char2_rte(ulong2); char2 __ovld __cnfn convert_char2_sat_rte(ulong2); char2 __ovld __cnfn convert_char2_rtz(ulong2); char2 __ovld __cnfn convert_char2_sat_rtz(ulong2); char2 __ovld __cnfn convert_char2_rtp(ulong2); char2 __ovld __cnfn convert_char2_sat_rtp(ulong2); char2 __ovld __cnfn convert_char2_rtn(ulong2); char2 __ovld __cnfn convert_char2_sat_rtn(ulong2); char2 __ovld __cnfn convert_char2(ulong2); char2 __ovld __cnfn convert_char2_sat(ulong2); char2 __ovld __cnfn convert_char2_rte(float2); char2 __ovld __cnfn convert_char2_sat_rte(float2); char2 __ovld __cnfn convert_char2_rtz(float2); char2 __ovld __cnfn convert_char2_sat_rtz(float2); char2 __ovld __cnfn convert_char2_rtp(float2); char2 __ovld __cnfn convert_char2_sat_rtp(float2); char2 __ovld __cnfn convert_char2_rtn(float2); char2 __ovld __cnfn convert_char2_sat_rtn(float2); char2 __ovld __cnfn convert_char2(float2); char2 __ovld __cnfn convert_char2_sat(float2); uchar2 __ovld __cnfn convert_uchar2_rte(char2); uchar2 __ovld __cnfn convert_uchar2_sat_rte(char2); uchar2 __ovld __cnfn convert_uchar2_rtz(char2); uchar2 __ovld __cnfn convert_uchar2_sat_rtz(char2); uchar2 __ovld __cnfn convert_uchar2_rtp(char2); uchar2 __ovld __cnfn convert_uchar2_sat_rtp(char2); uchar2 __ovld __cnfn convert_uchar2_rtn(char2); uchar2 __ovld __cnfn convert_uchar2_sat_rtn(char2); uchar2 __ovld __cnfn convert_uchar2(char2); uchar2 __ovld __cnfn convert_uchar2_sat(char2); uchar2 __ovld __cnfn convert_uchar2_rte(uchar2); uchar2 __ovld __cnfn convert_uchar2_sat_rte(uchar2); uchar2 __ovld __cnfn convert_uchar2_rtz(uchar2); uchar2 __ovld __cnfn convert_uchar2_sat_rtz(uchar2); uchar2 __ovld __cnfn convert_uchar2_rtp(uchar2); uchar2 __ovld __cnfn convert_uchar2_sat_rtp(uchar2); uchar2 __ovld __cnfn convert_uchar2_rtn(uchar2); uchar2 __ovld __cnfn convert_uchar2_sat_rtn(uchar2); uchar2 __ovld __cnfn convert_uchar2(uchar2); uchar2 __ovld __cnfn convert_uchar2_sat(uchar2); uchar2 __ovld __cnfn convert_uchar2_rte(short2); uchar2 __ovld __cnfn convert_uchar2_sat_rte(short2); uchar2 __ovld __cnfn convert_uchar2_rtz(short2); uchar2 __ovld __cnfn convert_uchar2_sat_rtz(short2); uchar2 __ovld __cnfn convert_uchar2_rtp(short2); uchar2 __ovld __cnfn convert_uchar2_sat_rtp(short2); uchar2 __ovld __cnfn convert_uchar2_rtn(short2); uchar2 __ovld __cnfn convert_uchar2_sat_rtn(short2); uchar2 __ovld __cnfn convert_uchar2(short2); uchar2 __ovld __cnfn convert_uchar2_sat(short2); uchar2 __ovld __cnfn convert_uchar2_rte(ushort2); uchar2 __ovld __cnfn convert_uchar2_sat_rte(ushort2); uchar2 __ovld __cnfn convert_uchar2_rtz(ushort2); uchar2 __ovld __cnfn convert_uchar2_sat_rtz(ushort2); uchar2 __ovld __cnfn convert_uchar2_rtp(ushort2); uchar2 __ovld __cnfn convert_uchar2_sat_rtp(ushort2); uchar2 __ovld __cnfn convert_uchar2_rtn(ushort2); uchar2 __ovld __cnfn convert_uchar2_sat_rtn(ushort2); uchar2 __ovld __cnfn convert_uchar2(ushort2); uchar2 __ovld __cnfn convert_uchar2_sat(ushort2); uchar2 __ovld __cnfn convert_uchar2_rte(int2); uchar2 __ovld __cnfn convert_uchar2_sat_rte(int2); uchar2 __ovld __cnfn convert_uchar2_rtz(int2); uchar2 __ovld __cnfn convert_uchar2_sat_rtz(int2); uchar2 __ovld __cnfn convert_uchar2_rtp(int2); uchar2 __ovld __cnfn convert_uchar2_sat_rtp(int2); uchar2 __ovld __cnfn convert_uchar2_rtn(int2); uchar2 __ovld __cnfn convert_uchar2_sat_rtn(int2); uchar2 __ovld __cnfn convert_uchar2(int2); uchar2 __ovld __cnfn convert_uchar2_sat(int2); uchar2 __ovld __cnfn convert_uchar2_rte(uint2); uchar2 __ovld __cnfn convert_uchar2_sat_rte(uint2); uchar2 __ovld __cnfn convert_uchar2_rtz(uint2); uchar2 __ovld __cnfn convert_uchar2_sat_rtz(uint2); uchar2 __ovld __cnfn convert_uchar2_rtp(uint2); uchar2 __ovld __cnfn convert_uchar2_sat_rtp(uint2); uchar2 __ovld __cnfn convert_uchar2_rtn(uint2); uchar2 __ovld __cnfn convert_uchar2_sat_rtn(uint2); uchar2 __ovld __cnfn convert_uchar2(uint2); uchar2 __ovld __cnfn convert_uchar2_sat(uint2); uchar2 __ovld __cnfn convert_uchar2_rte(long2); uchar2 __ovld __cnfn convert_uchar2_sat_rte(long2); uchar2 __ovld __cnfn convert_uchar2_rtz(long2); uchar2 __ovld __cnfn convert_uchar2_sat_rtz(long2); uchar2 __ovld __cnfn convert_uchar2_rtp(long2); uchar2 __ovld __cnfn convert_uchar2_sat_rtp(long2); uchar2 __ovld __cnfn convert_uchar2_rtn(long2); uchar2 __ovld __cnfn convert_uchar2_sat_rtn(long2); uchar2 __ovld __cnfn convert_uchar2(long2); uchar2 __ovld __cnfn convert_uchar2_sat(long2); uchar2 __ovld __cnfn convert_uchar2_rte(ulong2); uchar2 __ovld __cnfn convert_uchar2_sat_rte(ulong2); uchar2 __ovld __cnfn convert_uchar2_rtz(ulong2); uchar2 __ovld __cnfn convert_uchar2_sat_rtz(ulong2); uchar2 __ovld __cnfn convert_uchar2_rtp(ulong2); uchar2 __ovld __cnfn convert_uchar2_sat_rtp(ulong2); uchar2 __ovld __cnfn convert_uchar2_rtn(ulong2); uchar2 __ovld __cnfn convert_uchar2_sat_rtn(ulong2); uchar2 __ovld __cnfn convert_uchar2(ulong2); uchar2 __ovld __cnfn convert_uchar2_sat(ulong2); uchar2 __ovld __cnfn convert_uchar2_rte(float2); uchar2 __ovld __cnfn convert_uchar2_sat_rte(float2); uchar2 __ovld __cnfn convert_uchar2_rtz(float2); uchar2 __ovld __cnfn convert_uchar2_sat_rtz(float2); uchar2 __ovld __cnfn convert_uchar2_rtp(float2); uchar2 __ovld __cnfn convert_uchar2_sat_rtp(float2); uchar2 __ovld __cnfn convert_uchar2_rtn(float2); uchar2 __ovld __cnfn convert_uchar2_sat_rtn(float2); uchar2 __ovld __cnfn convert_uchar2(float2); uchar2 __ovld __cnfn convert_uchar2_sat(float2); short2 __ovld __cnfn convert_short2_rte(char2); short2 __ovld __cnfn convert_short2_sat_rte(char2); short2 __ovld __cnfn convert_short2_rtz(char2); short2 __ovld __cnfn convert_short2_sat_rtz(char2); short2 __ovld __cnfn convert_short2_rtp(char2); short2 __ovld __cnfn convert_short2_sat_rtp(char2); short2 __ovld __cnfn convert_short2_rtn(char2); short2 __ovld __cnfn convert_short2_sat_rtn(char2); short2 __ovld __cnfn convert_short2(char2); short2 __ovld __cnfn convert_short2_sat(char2); short2 __ovld __cnfn convert_short2_rte(uchar2); short2 __ovld __cnfn convert_short2_sat_rte(uchar2); short2 __ovld __cnfn convert_short2_rtz(uchar2); short2 __ovld __cnfn convert_short2_sat_rtz(uchar2); short2 __ovld __cnfn convert_short2_rtp(uchar2); short2 __ovld __cnfn convert_short2_sat_rtp(uchar2); short2 __ovld __cnfn convert_short2_rtn(uchar2); short2 __ovld __cnfn convert_short2_sat_rtn(uchar2); short2 __ovld __cnfn convert_short2(uchar2); short2 __ovld __cnfn convert_short2_sat(uchar2); short2 __ovld __cnfn convert_short2_rte(short2); short2 __ovld __cnfn convert_short2_sat_rte(short2); short2 __ovld __cnfn convert_short2_rtz(short2); short2 __ovld __cnfn convert_short2_sat_rtz(short2); short2 __ovld __cnfn convert_short2_rtp(short2); short2 __ovld __cnfn convert_short2_sat_rtp(short2); short2 __ovld __cnfn convert_short2_rtn(short2); short2 __ovld __cnfn convert_short2_sat_rtn(short2); short2 __ovld __cnfn convert_short2(short2); short2 __ovld __cnfn convert_short2_sat(short2); short2 __ovld __cnfn convert_short2_rte(ushort2); short2 __ovld __cnfn convert_short2_sat_rte(ushort2); short2 __ovld __cnfn convert_short2_rtz(ushort2); short2 __ovld __cnfn convert_short2_sat_rtz(ushort2); short2 __ovld __cnfn convert_short2_rtp(ushort2); short2 __ovld __cnfn convert_short2_sat_rtp(ushort2); short2 __ovld __cnfn convert_short2_rtn(ushort2); short2 __ovld __cnfn convert_short2_sat_rtn(ushort2); short2 __ovld __cnfn convert_short2(ushort2); short2 __ovld __cnfn convert_short2_sat(ushort2); short2 __ovld __cnfn convert_short2_rte(int2); short2 __ovld __cnfn convert_short2_sat_rte(int2); short2 __ovld __cnfn convert_short2_rtz(int2); short2 __ovld __cnfn convert_short2_sat_rtz(int2); short2 __ovld __cnfn convert_short2_rtp(int2); short2 __ovld __cnfn convert_short2_sat_rtp(int2); short2 __ovld __cnfn convert_short2_rtn(int2); short2 __ovld __cnfn convert_short2_sat_rtn(int2); short2 __ovld __cnfn convert_short2(int2); short2 __ovld __cnfn convert_short2_sat(int2); short2 __ovld __cnfn convert_short2_rte(uint2); short2 __ovld __cnfn convert_short2_sat_rte(uint2); short2 __ovld __cnfn convert_short2_rtz(uint2); short2 __ovld __cnfn convert_short2_sat_rtz(uint2); short2 __ovld __cnfn convert_short2_rtp(uint2); short2 __ovld __cnfn convert_short2_sat_rtp(uint2); short2 __ovld __cnfn convert_short2_rtn(uint2); short2 __ovld __cnfn convert_short2_sat_rtn(uint2); short2 __ovld __cnfn convert_short2(uint2); short2 __ovld __cnfn convert_short2_sat(uint2); short2 __ovld __cnfn convert_short2_rte(long2); short2 __ovld __cnfn convert_short2_sat_rte(long2); short2 __ovld __cnfn convert_short2_rtz(long2); short2 __ovld __cnfn convert_short2_sat_rtz(long2); short2 __ovld __cnfn convert_short2_rtp(long2); short2 __ovld __cnfn convert_short2_sat_rtp(long2); short2 __ovld __cnfn convert_short2_rtn(long2); short2 __ovld __cnfn convert_short2_sat_rtn(long2); short2 __ovld __cnfn convert_short2(long2); short2 __ovld __cnfn convert_short2_sat(long2); short2 __ovld __cnfn convert_short2_rte(ulong2); short2 __ovld __cnfn convert_short2_sat_rte(ulong2); short2 __ovld __cnfn convert_short2_rtz(ulong2); short2 __ovld __cnfn convert_short2_sat_rtz(ulong2); short2 __ovld __cnfn convert_short2_rtp(ulong2); short2 __ovld __cnfn convert_short2_sat_rtp(ulong2); short2 __ovld __cnfn convert_short2_rtn(ulong2); short2 __ovld __cnfn convert_short2_sat_rtn(ulong2); short2 __ovld __cnfn convert_short2(ulong2); short2 __ovld __cnfn convert_short2_sat(ulong2); short2 __ovld __cnfn convert_short2_rte(float2); short2 __ovld __cnfn convert_short2_sat_rte(float2); short2 __ovld __cnfn convert_short2_rtz(float2); short2 __ovld __cnfn convert_short2_sat_rtz(float2); short2 __ovld __cnfn convert_short2_rtp(float2); short2 __ovld __cnfn convert_short2_sat_rtp(float2); short2 __ovld __cnfn convert_short2_rtn(float2); short2 __ovld __cnfn convert_short2_sat_rtn(float2); short2 __ovld __cnfn convert_short2(float2); short2 __ovld __cnfn convert_short2_sat(float2); ushort2 __ovld __cnfn convert_ushort2_rte(char2); ushort2 __ovld __cnfn convert_ushort2_sat_rte(char2); ushort2 __ovld __cnfn convert_ushort2_rtz(char2); ushort2 __ovld __cnfn convert_ushort2_sat_rtz(char2); ushort2 __ovld __cnfn convert_ushort2_rtp(char2); ushort2 __ovld __cnfn convert_ushort2_sat_rtp(char2); ushort2 __ovld __cnfn convert_ushort2_rtn(char2); ushort2 __ovld __cnfn convert_ushort2_sat_rtn(char2); ushort2 __ovld __cnfn convert_ushort2(char2); ushort2 __ovld __cnfn convert_ushort2_sat(char2); ushort2 __ovld __cnfn convert_ushort2_rte(uchar2); ushort2 __ovld __cnfn convert_ushort2_sat_rte(uchar2); ushort2 __ovld __cnfn convert_ushort2_rtz(uchar2); ushort2 __ovld __cnfn convert_ushort2_sat_rtz(uchar2); ushort2 __ovld __cnfn convert_ushort2_rtp(uchar2); ushort2 __ovld __cnfn convert_ushort2_sat_rtp(uchar2); ushort2 __ovld __cnfn convert_ushort2_rtn(uchar2); ushort2 __ovld __cnfn convert_ushort2_sat_rtn(uchar2); ushort2 __ovld __cnfn convert_ushort2(uchar2); ushort2 __ovld __cnfn convert_ushort2_sat(uchar2); ushort2 __ovld __cnfn convert_ushort2_rte(short2); ushort2 __ovld __cnfn convert_ushort2_sat_rte(short2); ushort2 __ovld __cnfn convert_ushort2_rtz(short2); ushort2 __ovld __cnfn convert_ushort2_sat_rtz(short2); ushort2 __ovld __cnfn convert_ushort2_rtp(short2); ushort2 __ovld __cnfn convert_ushort2_sat_rtp(short2); ushort2 __ovld __cnfn convert_ushort2_rtn(short2); ushort2 __ovld __cnfn convert_ushort2_sat_rtn(short2); ushort2 __ovld __cnfn convert_ushort2(short2); ushort2 __ovld __cnfn convert_ushort2_sat(short2); ushort2 __ovld __cnfn convert_ushort2_rte(ushort2); ushort2 __ovld __cnfn convert_ushort2_sat_rte(ushort2); ushort2 __ovld __cnfn convert_ushort2_rtz(ushort2); ushort2 __ovld __cnfn convert_ushort2_sat_rtz(ushort2); ushort2 __ovld __cnfn convert_ushort2_rtp(ushort2); ushort2 __ovld __cnfn convert_ushort2_sat_rtp(ushort2); ushort2 __ovld __cnfn convert_ushort2_rtn(ushort2); ushort2 __ovld __cnfn convert_ushort2_sat_rtn(ushort2); ushort2 __ovld __cnfn convert_ushort2(ushort2); ushort2 __ovld __cnfn convert_ushort2_sat(ushort2); ushort2 __ovld __cnfn convert_ushort2_rte(int2); ushort2 __ovld __cnfn convert_ushort2_sat_rte(int2); ushort2 __ovld __cnfn convert_ushort2_rtz(int2); ushort2 __ovld __cnfn convert_ushort2_sat_rtz(int2); ushort2 __ovld __cnfn convert_ushort2_rtp(int2); ushort2 __ovld __cnfn convert_ushort2_sat_rtp(int2); ushort2 __ovld __cnfn convert_ushort2_rtn(int2); ushort2 __ovld __cnfn convert_ushort2_sat_rtn(int2); ushort2 __ovld __cnfn convert_ushort2(int2); ushort2 __ovld __cnfn convert_ushort2_sat(int2); ushort2 __ovld __cnfn convert_ushort2_rte(uint2); ushort2 __ovld __cnfn convert_ushort2_sat_rte(uint2); ushort2 __ovld __cnfn convert_ushort2_rtz(uint2); ushort2 __ovld __cnfn convert_ushort2_sat_rtz(uint2); ushort2 __ovld __cnfn convert_ushort2_rtp(uint2); ushort2 __ovld __cnfn convert_ushort2_sat_rtp(uint2); ushort2 __ovld __cnfn convert_ushort2_rtn(uint2); ushort2 __ovld __cnfn convert_ushort2_sat_rtn(uint2); ushort2 __ovld __cnfn convert_ushort2(uint2); ushort2 __ovld __cnfn convert_ushort2_sat(uint2); ushort2 __ovld __cnfn convert_ushort2_rte(long2); ushort2 __ovld __cnfn convert_ushort2_sat_rte(long2); ushort2 __ovld __cnfn convert_ushort2_rtz(long2); ushort2 __ovld __cnfn convert_ushort2_sat_rtz(long2); ushort2 __ovld __cnfn convert_ushort2_rtp(long2); ushort2 __ovld __cnfn convert_ushort2_sat_rtp(long2); ushort2 __ovld __cnfn convert_ushort2_rtn(long2); ushort2 __ovld __cnfn convert_ushort2_sat_rtn(long2); ushort2 __ovld __cnfn convert_ushort2(long2); ushort2 __ovld __cnfn convert_ushort2_sat(long2); ushort2 __ovld __cnfn convert_ushort2_rte(ulong2); ushort2 __ovld __cnfn convert_ushort2_sat_rte(ulong2); ushort2 __ovld __cnfn convert_ushort2_rtz(ulong2); ushort2 __ovld __cnfn convert_ushort2_sat_rtz(ulong2); ushort2 __ovld __cnfn convert_ushort2_rtp(ulong2); ushort2 __ovld __cnfn convert_ushort2_sat_rtp(ulong2); ushort2 __ovld __cnfn convert_ushort2_rtn(ulong2); ushort2 __ovld __cnfn convert_ushort2_sat_rtn(ulong2); ushort2 __ovld __cnfn convert_ushort2(ulong2); ushort2 __ovld __cnfn convert_ushort2_sat(ulong2); ushort2 __ovld __cnfn convert_ushort2_rte(float2); ushort2 __ovld __cnfn convert_ushort2_sat_rte(float2); ushort2 __ovld __cnfn convert_ushort2_rtz(float2); ushort2 __ovld __cnfn convert_ushort2_sat_rtz(float2); ushort2 __ovld __cnfn convert_ushort2_rtp(float2); ushort2 __ovld __cnfn convert_ushort2_sat_rtp(float2); ushort2 __ovld __cnfn convert_ushort2_rtn(float2); ushort2 __ovld __cnfn convert_ushort2_sat_rtn(float2); ushort2 __ovld __cnfn convert_ushort2(float2); ushort2 __ovld __cnfn convert_ushort2_sat(float2); int2 __ovld __cnfn convert_int2_rte(char2); int2 __ovld __cnfn convert_int2_sat_rte(char2); int2 __ovld __cnfn convert_int2_rtz(char2); int2 __ovld __cnfn convert_int2_sat_rtz(char2); int2 __ovld __cnfn convert_int2_rtp(char2); int2 __ovld __cnfn convert_int2_sat_rtp(char2); int2 __ovld __cnfn convert_int2_rtn(char2); int2 __ovld __cnfn convert_int2_sat_rtn(char2); int2 __ovld __cnfn convert_int2(char2); int2 __ovld __cnfn convert_int2_sat(char2); int2 __ovld __cnfn convert_int2_rte(uchar2); int2 __ovld __cnfn convert_int2_sat_rte(uchar2); int2 __ovld __cnfn convert_int2_rtz(uchar2); int2 __ovld __cnfn convert_int2_sat_rtz(uchar2); int2 __ovld __cnfn convert_int2_rtp(uchar2); int2 __ovld __cnfn convert_int2_sat_rtp(uchar2); int2 __ovld __cnfn convert_int2_rtn(uchar2); int2 __ovld __cnfn convert_int2_sat_rtn(uchar2); int2 __ovld __cnfn convert_int2(uchar2); int2 __ovld __cnfn convert_int2_sat(uchar2); int2 __ovld __cnfn convert_int2_rte(short2); int2 __ovld __cnfn convert_int2_sat_rte(short2); int2 __ovld __cnfn convert_int2_rtz(short2); int2 __ovld __cnfn convert_int2_sat_rtz(short2); int2 __ovld __cnfn convert_int2_rtp(short2); int2 __ovld __cnfn convert_int2_sat_rtp(short2); int2 __ovld __cnfn convert_int2_rtn(short2); int2 __ovld __cnfn convert_int2_sat_rtn(short2); int2 __ovld __cnfn convert_int2(short2); int2 __ovld __cnfn convert_int2_sat(short2); int2 __ovld __cnfn convert_int2_rte(ushort2); int2 __ovld __cnfn convert_int2_sat_rte(ushort2); int2 __ovld __cnfn convert_int2_rtz(ushort2); int2 __ovld __cnfn convert_int2_sat_rtz(ushort2); int2 __ovld __cnfn convert_int2_rtp(ushort2); int2 __ovld __cnfn convert_int2_sat_rtp(ushort2); int2 __ovld __cnfn convert_int2_rtn(ushort2); int2 __ovld __cnfn convert_int2_sat_rtn(ushort2); int2 __ovld __cnfn convert_int2(ushort2); int2 __ovld __cnfn convert_int2_sat(ushort2); int2 __ovld __cnfn convert_int2_rte(int2); int2 __ovld __cnfn convert_int2_sat_rte(int2); int2 __ovld __cnfn convert_int2_rtz(int2); int2 __ovld __cnfn convert_int2_sat_rtz(int2); int2 __ovld __cnfn convert_int2_rtp(int2); int2 __ovld __cnfn convert_int2_sat_rtp(int2); int2 __ovld __cnfn convert_int2_rtn(int2); int2 __ovld __cnfn convert_int2_sat_rtn(int2); int2 __ovld __cnfn convert_int2(int2); int2 __ovld __cnfn convert_int2_sat(int2); int2 __ovld __cnfn convert_int2_rte(uint2); int2 __ovld __cnfn convert_int2_sat_rte(uint2); int2 __ovld __cnfn convert_int2_rtz(uint2); int2 __ovld __cnfn convert_int2_sat_rtz(uint2); int2 __ovld __cnfn convert_int2_rtp(uint2); int2 __ovld __cnfn convert_int2_sat_rtp(uint2); int2 __ovld __cnfn convert_int2_rtn(uint2); int2 __ovld __cnfn convert_int2_sat_rtn(uint2); int2 __ovld __cnfn convert_int2(uint2); int2 __ovld __cnfn convert_int2_sat(uint2); int2 __ovld __cnfn convert_int2_rte(long2); int2 __ovld __cnfn convert_int2_sat_rte(long2); int2 __ovld __cnfn convert_int2_rtz(long2); int2 __ovld __cnfn convert_int2_sat_rtz(long2); int2 __ovld __cnfn convert_int2_rtp(long2); int2 __ovld __cnfn convert_int2_sat_rtp(long2); int2 __ovld __cnfn convert_int2_rtn(long2); int2 __ovld __cnfn convert_int2_sat_rtn(long2); int2 __ovld __cnfn convert_int2(long2); int2 __ovld __cnfn convert_int2_sat(long2); int2 __ovld __cnfn convert_int2_rte(ulong2); int2 __ovld __cnfn convert_int2_sat_rte(ulong2); int2 __ovld __cnfn convert_int2_rtz(ulong2); int2 __ovld __cnfn convert_int2_sat_rtz(ulong2); int2 __ovld __cnfn convert_int2_rtp(ulong2); int2 __ovld __cnfn convert_int2_sat_rtp(ulong2); int2 __ovld __cnfn convert_int2_rtn(ulong2); int2 __ovld __cnfn convert_int2_sat_rtn(ulong2); int2 __ovld __cnfn convert_int2(ulong2); int2 __ovld __cnfn convert_int2_sat(ulong2); int2 __ovld __cnfn convert_int2_rte(float2); int2 __ovld __cnfn convert_int2_sat_rte(float2); int2 __ovld __cnfn convert_int2_rtz(float2); int2 __ovld __cnfn convert_int2_sat_rtz(float2); int2 __ovld __cnfn convert_int2_rtp(float2); int2 __ovld __cnfn convert_int2_sat_rtp(float2); int2 __ovld __cnfn convert_int2_rtn(float2); int2 __ovld __cnfn convert_int2_sat_rtn(float2); int2 __ovld __cnfn convert_int2(float2); int2 __ovld __cnfn convert_int2_sat(float2); uint2 __ovld __cnfn convert_uint2_rte(char2); uint2 __ovld __cnfn convert_uint2_sat_rte(char2); uint2 __ovld __cnfn convert_uint2_rtz(char2); uint2 __ovld __cnfn convert_uint2_sat_rtz(char2); uint2 __ovld __cnfn convert_uint2_rtp(char2); uint2 __ovld __cnfn convert_uint2_sat_rtp(char2); uint2 __ovld __cnfn convert_uint2_rtn(char2); uint2 __ovld __cnfn convert_uint2_sat_rtn(char2); uint2 __ovld __cnfn convert_uint2(char2); uint2 __ovld __cnfn convert_uint2_sat(char2); uint2 __ovld __cnfn convert_uint2_rte(uchar2); uint2 __ovld __cnfn convert_uint2_sat_rte(uchar2); uint2 __ovld __cnfn convert_uint2_rtz(uchar2); uint2 __ovld __cnfn convert_uint2_sat_rtz(uchar2); uint2 __ovld __cnfn convert_uint2_rtp(uchar2); uint2 __ovld __cnfn convert_uint2_sat_rtp(uchar2); uint2 __ovld __cnfn convert_uint2_rtn(uchar2); uint2 __ovld __cnfn convert_uint2_sat_rtn(uchar2); uint2 __ovld __cnfn convert_uint2(uchar2); uint2 __ovld __cnfn convert_uint2_sat(uchar2); uint2 __ovld __cnfn convert_uint2_rte(short2); uint2 __ovld __cnfn convert_uint2_sat_rte(short2); uint2 __ovld __cnfn convert_uint2_rtz(short2); uint2 __ovld __cnfn convert_uint2_sat_rtz(short2); uint2 __ovld __cnfn convert_uint2_rtp(short2); uint2 __ovld __cnfn convert_uint2_sat_rtp(short2); uint2 __ovld __cnfn convert_uint2_rtn(short2); uint2 __ovld __cnfn convert_uint2_sat_rtn(short2); uint2 __ovld __cnfn convert_uint2(short2); uint2 __ovld __cnfn convert_uint2_sat(short2); uint2 __ovld __cnfn convert_uint2_rte(ushort2); uint2 __ovld __cnfn convert_uint2_sat_rte(ushort2); uint2 __ovld __cnfn convert_uint2_rtz(ushort2); uint2 __ovld __cnfn convert_uint2_sat_rtz(ushort2); uint2 __ovld __cnfn convert_uint2_rtp(ushort2); uint2 __ovld __cnfn convert_uint2_sat_rtp(ushort2); uint2 __ovld __cnfn convert_uint2_rtn(ushort2); uint2 __ovld __cnfn convert_uint2_sat_rtn(ushort2); uint2 __ovld __cnfn convert_uint2(ushort2); uint2 __ovld __cnfn convert_uint2_sat(ushort2); uint2 __ovld __cnfn convert_uint2_rte(int2); uint2 __ovld __cnfn convert_uint2_sat_rte(int2); uint2 __ovld __cnfn convert_uint2_rtz(int2); uint2 __ovld __cnfn convert_uint2_sat_rtz(int2); uint2 __ovld __cnfn convert_uint2_rtp(int2); uint2 __ovld __cnfn convert_uint2_sat_rtp(int2); uint2 __ovld __cnfn convert_uint2_rtn(int2); uint2 __ovld __cnfn convert_uint2_sat_rtn(int2); uint2 __ovld __cnfn convert_uint2(int2); uint2 __ovld __cnfn convert_uint2_sat(int2); uint2 __ovld __cnfn convert_uint2_rte(uint2); uint2 __ovld __cnfn convert_uint2_sat_rte(uint2); uint2 __ovld __cnfn convert_uint2_rtz(uint2); uint2 __ovld __cnfn convert_uint2_sat_rtz(uint2); uint2 __ovld __cnfn convert_uint2_rtp(uint2); uint2 __ovld __cnfn convert_uint2_sat_rtp(uint2); uint2 __ovld __cnfn convert_uint2_rtn(uint2); uint2 __ovld __cnfn convert_uint2_sat_rtn(uint2); uint2 __ovld __cnfn convert_uint2(uint2); uint2 __ovld __cnfn convert_uint2_sat(uint2); uint2 __ovld __cnfn convert_uint2_rte(long2); uint2 __ovld __cnfn convert_uint2_sat_rte(long2); uint2 __ovld __cnfn convert_uint2_rtz(long2); uint2 __ovld __cnfn convert_uint2_sat_rtz(long2); uint2 __ovld __cnfn convert_uint2_rtp(long2); uint2 __ovld __cnfn convert_uint2_sat_rtp(long2); uint2 __ovld __cnfn convert_uint2_rtn(long2); uint2 __ovld __cnfn convert_uint2_sat_rtn(long2); uint2 __ovld __cnfn convert_uint2(long2); uint2 __ovld __cnfn convert_uint2_sat(long2); uint2 __ovld __cnfn convert_uint2_rte(ulong2); uint2 __ovld __cnfn convert_uint2_sat_rte(ulong2); uint2 __ovld __cnfn convert_uint2_rtz(ulong2); uint2 __ovld __cnfn convert_uint2_sat_rtz(ulong2); uint2 __ovld __cnfn convert_uint2_rtp(ulong2); uint2 __ovld __cnfn convert_uint2_sat_rtp(ulong2); uint2 __ovld __cnfn convert_uint2_rtn(ulong2); uint2 __ovld __cnfn convert_uint2_sat_rtn(ulong2); uint2 __ovld __cnfn convert_uint2(ulong2); uint2 __ovld __cnfn convert_uint2_sat(ulong2); uint2 __ovld __cnfn convert_uint2_rte(float2); uint2 __ovld __cnfn convert_uint2_sat_rte(float2); uint2 __ovld __cnfn convert_uint2_rtz(float2); uint2 __ovld __cnfn convert_uint2_sat_rtz(float2); uint2 __ovld __cnfn convert_uint2_rtp(float2); uint2 __ovld __cnfn convert_uint2_sat_rtp(float2); uint2 __ovld __cnfn convert_uint2_rtn(float2); uint2 __ovld __cnfn convert_uint2_sat_rtn(float2); uint2 __ovld __cnfn convert_uint2(float2); uint2 __ovld __cnfn convert_uint2_sat(float2); long2 __ovld __cnfn convert_long2_rte(char2); long2 __ovld __cnfn convert_long2_sat_rte(char2); long2 __ovld __cnfn convert_long2_rtz(char2); long2 __ovld __cnfn convert_long2_sat_rtz(char2); long2 __ovld __cnfn convert_long2_rtp(char2); long2 __ovld __cnfn convert_long2_sat_rtp(char2); long2 __ovld __cnfn convert_long2_rtn(char2); long2 __ovld __cnfn convert_long2_sat_rtn(char2); long2 __ovld __cnfn convert_long2(char2); long2 __ovld __cnfn convert_long2_sat(char2); long2 __ovld __cnfn convert_long2_rte(uchar2); long2 __ovld __cnfn convert_long2_sat_rte(uchar2); long2 __ovld __cnfn convert_long2_rtz(uchar2); long2 __ovld __cnfn convert_long2_sat_rtz(uchar2); long2 __ovld __cnfn convert_long2_rtp(uchar2); long2 __ovld __cnfn convert_long2_sat_rtp(uchar2); long2 __ovld __cnfn convert_long2_rtn(uchar2); long2 __ovld __cnfn convert_long2_sat_rtn(uchar2); long2 __ovld __cnfn convert_long2(uchar2); long2 __ovld __cnfn convert_long2_sat(uchar2); long2 __ovld __cnfn convert_long2_rte(short2); long2 __ovld __cnfn convert_long2_sat_rte(short2); long2 __ovld __cnfn convert_long2_rtz(short2); long2 __ovld __cnfn convert_long2_sat_rtz(short2); long2 __ovld __cnfn convert_long2_rtp(short2); long2 __ovld __cnfn convert_long2_sat_rtp(short2); long2 __ovld __cnfn convert_long2_rtn(short2); long2 __ovld __cnfn convert_long2_sat_rtn(short2); long2 __ovld __cnfn convert_long2(short2); long2 __ovld __cnfn convert_long2_sat(short2); long2 __ovld __cnfn convert_long2_rte(ushort2); long2 __ovld __cnfn convert_long2_sat_rte(ushort2); long2 __ovld __cnfn convert_long2_rtz(ushort2); long2 __ovld __cnfn convert_long2_sat_rtz(ushort2); long2 __ovld __cnfn convert_long2_rtp(ushort2); long2 __ovld __cnfn convert_long2_sat_rtp(ushort2); long2 __ovld __cnfn convert_long2_rtn(ushort2); long2 __ovld __cnfn convert_long2_sat_rtn(ushort2); long2 __ovld __cnfn convert_long2(ushort2); long2 __ovld __cnfn convert_long2_sat(ushort2); long2 __ovld __cnfn convert_long2_rte(int2); long2 __ovld __cnfn convert_long2_sat_rte(int2); long2 __ovld __cnfn convert_long2_rtz(int2); long2 __ovld __cnfn convert_long2_sat_rtz(int2); long2 __ovld __cnfn convert_long2_rtp(int2); long2 __ovld __cnfn convert_long2_sat_rtp(int2); long2 __ovld __cnfn convert_long2_rtn(int2); long2 __ovld __cnfn convert_long2_sat_rtn(int2); long2 __ovld __cnfn convert_long2(int2); long2 __ovld __cnfn convert_long2_sat(int2); long2 __ovld __cnfn convert_long2_rte(uint2); long2 __ovld __cnfn convert_long2_sat_rte(uint2); long2 __ovld __cnfn convert_long2_rtz(uint2); long2 __ovld __cnfn convert_long2_sat_rtz(uint2); long2 __ovld __cnfn convert_long2_rtp(uint2); long2 __ovld __cnfn convert_long2_sat_rtp(uint2); long2 __ovld __cnfn convert_long2_rtn(uint2); long2 __ovld __cnfn convert_long2_sat_rtn(uint2); long2 __ovld __cnfn convert_long2(uint2); long2 __ovld __cnfn convert_long2_sat(uint2); long2 __ovld __cnfn convert_long2_rte(long2); long2 __ovld __cnfn convert_long2_sat_rte(long2); long2 __ovld __cnfn convert_long2_rtz(long2); long2 __ovld __cnfn convert_long2_sat_rtz(long2); long2 __ovld __cnfn convert_long2_rtp(long2); long2 __ovld __cnfn convert_long2_sat_rtp(long2); long2 __ovld __cnfn convert_long2_rtn(long2); long2 __ovld __cnfn convert_long2_sat_rtn(long2); long2 __ovld __cnfn convert_long2(long2); long2 __ovld __cnfn convert_long2_sat(long2); long2 __ovld __cnfn convert_long2_rte(ulong2); long2 __ovld __cnfn convert_long2_sat_rte(ulong2); long2 __ovld __cnfn convert_long2_rtz(ulong2); long2 __ovld __cnfn convert_long2_sat_rtz(ulong2); long2 __ovld __cnfn convert_long2_rtp(ulong2); long2 __ovld __cnfn convert_long2_sat_rtp(ulong2); long2 __ovld __cnfn convert_long2_rtn(ulong2); long2 __ovld __cnfn convert_long2_sat_rtn(ulong2); long2 __ovld __cnfn convert_long2(ulong2); long2 __ovld __cnfn convert_long2_sat(ulong2); long2 __ovld __cnfn convert_long2_rte(float2); long2 __ovld __cnfn convert_long2_sat_rte(float2); long2 __ovld __cnfn convert_long2_rtz(float2); long2 __ovld __cnfn convert_long2_sat_rtz(float2); long2 __ovld __cnfn convert_long2_rtp(float2); long2 __ovld __cnfn convert_long2_sat_rtp(float2); long2 __ovld __cnfn convert_long2_rtn(float2); long2 __ovld __cnfn convert_long2_sat_rtn(float2); long2 __ovld __cnfn convert_long2(float2); long2 __ovld __cnfn convert_long2_sat(float2); ulong2 __ovld __cnfn convert_ulong2_rte(char2); ulong2 __ovld __cnfn convert_ulong2_sat_rte(char2); ulong2 __ovld __cnfn convert_ulong2_rtz(char2); ulong2 __ovld __cnfn convert_ulong2_sat_rtz(char2); ulong2 __ovld __cnfn convert_ulong2_rtp(char2); ulong2 __ovld __cnfn convert_ulong2_sat_rtp(char2); ulong2 __ovld __cnfn convert_ulong2_rtn(char2); ulong2 __ovld __cnfn convert_ulong2_sat_rtn(char2); ulong2 __ovld __cnfn convert_ulong2(char2); ulong2 __ovld __cnfn convert_ulong2_sat(char2); ulong2 __ovld __cnfn convert_ulong2_rte(uchar2); ulong2 __ovld __cnfn convert_ulong2_sat_rte(uchar2); ulong2 __ovld __cnfn convert_ulong2_rtz(uchar2); ulong2 __ovld __cnfn convert_ulong2_sat_rtz(uchar2); ulong2 __ovld __cnfn convert_ulong2_rtp(uchar2); ulong2 __ovld __cnfn convert_ulong2_sat_rtp(uchar2); ulong2 __ovld __cnfn convert_ulong2_rtn(uchar2); ulong2 __ovld __cnfn convert_ulong2_sat_rtn(uchar2); ulong2 __ovld __cnfn convert_ulong2(uchar2); ulong2 __ovld __cnfn convert_ulong2_sat(uchar2); ulong2 __ovld __cnfn convert_ulong2_rte(short2); ulong2 __ovld __cnfn convert_ulong2_sat_rte(short2); ulong2 __ovld __cnfn convert_ulong2_rtz(short2); ulong2 __ovld __cnfn convert_ulong2_sat_rtz(short2); ulong2 __ovld __cnfn convert_ulong2_rtp(short2); ulong2 __ovld __cnfn convert_ulong2_sat_rtp(short2); ulong2 __ovld __cnfn convert_ulong2_rtn(short2); ulong2 __ovld __cnfn convert_ulong2_sat_rtn(short2); ulong2 __ovld __cnfn convert_ulong2(short2); ulong2 __ovld __cnfn convert_ulong2_sat(short2); ulong2 __ovld __cnfn convert_ulong2_rte(ushort2); ulong2 __ovld __cnfn convert_ulong2_sat_rte(ushort2); ulong2 __ovld __cnfn convert_ulong2_rtz(ushort2); ulong2 __ovld __cnfn convert_ulong2_sat_rtz(ushort2); ulong2 __ovld __cnfn convert_ulong2_rtp(ushort2); ulong2 __ovld __cnfn convert_ulong2_sat_rtp(ushort2); ulong2 __ovld __cnfn convert_ulong2_rtn(ushort2); ulong2 __ovld __cnfn convert_ulong2_sat_rtn(ushort2); ulong2 __ovld __cnfn convert_ulong2(ushort2); ulong2 __ovld __cnfn convert_ulong2_sat(ushort2); ulong2 __ovld __cnfn convert_ulong2_rte(int2); ulong2 __ovld __cnfn convert_ulong2_sat_rte(int2); ulong2 __ovld __cnfn convert_ulong2_rtz(int2); ulong2 __ovld __cnfn convert_ulong2_sat_rtz(int2); ulong2 __ovld __cnfn convert_ulong2_rtp(int2); ulong2 __ovld __cnfn convert_ulong2_sat_rtp(int2); ulong2 __ovld __cnfn convert_ulong2_rtn(int2); ulong2 __ovld __cnfn convert_ulong2_sat_rtn(int2); ulong2 __ovld __cnfn convert_ulong2(int2); ulong2 __ovld __cnfn convert_ulong2_sat(int2); ulong2 __ovld __cnfn convert_ulong2_rte(uint2); ulong2 __ovld __cnfn convert_ulong2_sat_rte(uint2); ulong2 __ovld __cnfn convert_ulong2_rtz(uint2); ulong2 __ovld __cnfn convert_ulong2_sat_rtz(uint2); ulong2 __ovld __cnfn convert_ulong2_rtp(uint2); ulong2 __ovld __cnfn convert_ulong2_sat_rtp(uint2); ulong2 __ovld __cnfn convert_ulong2_rtn(uint2); ulong2 __ovld __cnfn convert_ulong2_sat_rtn(uint2); ulong2 __ovld __cnfn convert_ulong2(uint2); ulong2 __ovld __cnfn convert_ulong2_sat(uint2); ulong2 __ovld __cnfn convert_ulong2_rte(long2); ulong2 __ovld __cnfn convert_ulong2_sat_rte(long2); ulong2 __ovld __cnfn convert_ulong2_rtz(long2); ulong2 __ovld __cnfn convert_ulong2_sat_rtz(long2); ulong2 __ovld __cnfn convert_ulong2_rtp(long2); ulong2 __ovld __cnfn convert_ulong2_sat_rtp(long2); ulong2 __ovld __cnfn convert_ulong2_rtn(long2); ulong2 __ovld __cnfn convert_ulong2_sat_rtn(long2); ulong2 __ovld __cnfn convert_ulong2(long2); ulong2 __ovld __cnfn convert_ulong2_sat(long2); ulong2 __ovld __cnfn convert_ulong2_rte(ulong2); ulong2 __ovld __cnfn convert_ulong2_sat_rte(ulong2); ulong2 __ovld __cnfn convert_ulong2_rtz(ulong2); ulong2 __ovld __cnfn convert_ulong2_sat_rtz(ulong2); ulong2 __ovld __cnfn convert_ulong2_rtp(ulong2); ulong2 __ovld __cnfn convert_ulong2_sat_rtp(ulong2); ulong2 __ovld __cnfn convert_ulong2_rtn(ulong2); ulong2 __ovld __cnfn convert_ulong2_sat_rtn(ulong2); ulong2 __ovld __cnfn convert_ulong2(ulong2); ulong2 __ovld __cnfn convert_ulong2_sat(ulong2); ulong2 __ovld __cnfn convert_ulong2_rte(float2); ulong2 __ovld __cnfn convert_ulong2_sat_rte(float2); ulong2 __ovld __cnfn convert_ulong2_rtz(float2); ulong2 __ovld __cnfn convert_ulong2_sat_rtz(float2); ulong2 __ovld __cnfn convert_ulong2_rtp(float2); ulong2 __ovld __cnfn convert_ulong2_sat_rtp(float2); ulong2 __ovld __cnfn convert_ulong2_rtn(float2); ulong2 __ovld __cnfn convert_ulong2_sat_rtn(float2); ulong2 __ovld __cnfn convert_ulong2(float2); ulong2 __ovld __cnfn convert_ulong2_sat(float2); float2 __ovld __cnfn convert_float2_rte(char2); float2 __ovld __cnfn convert_float2_rtz(char2); float2 __ovld __cnfn convert_float2_rtp(char2); float2 __ovld __cnfn convert_float2_rtn(char2); float2 __ovld __cnfn convert_float2(char2); float2 __ovld __cnfn convert_float2_rte(uchar2); float2 __ovld __cnfn convert_float2_rtz(uchar2); float2 __ovld __cnfn convert_float2_rtp(uchar2); float2 __ovld __cnfn convert_float2_rtn(uchar2); float2 __ovld __cnfn convert_float2(uchar2); float2 __ovld __cnfn convert_float2_rte(short2); float2 __ovld __cnfn convert_float2_rtz(short2); float2 __ovld __cnfn convert_float2_rtp(short2); float2 __ovld __cnfn convert_float2_rtn(short2); float2 __ovld __cnfn convert_float2(short2); float2 __ovld __cnfn convert_float2_rte(ushort2); float2 __ovld __cnfn convert_float2_rtz(ushort2); float2 __ovld __cnfn convert_float2_rtp(ushort2); float2 __ovld __cnfn convert_float2_rtn(ushort2); float2 __ovld __cnfn convert_float2(ushort2); float2 __ovld __cnfn convert_float2_rte(int2); float2 __ovld __cnfn convert_float2_rtz(int2); float2 __ovld __cnfn convert_float2_rtp(int2); float2 __ovld __cnfn convert_float2_rtn(int2); float2 __ovld __cnfn convert_float2(int2); float2 __ovld __cnfn convert_float2_rte(uint2); float2 __ovld __cnfn convert_float2_rtz(uint2); float2 __ovld __cnfn convert_float2_rtp(uint2); float2 __ovld __cnfn convert_float2_rtn(uint2); float2 __ovld __cnfn convert_float2(uint2); float2 __ovld __cnfn convert_float2_rte(long2); float2 __ovld __cnfn convert_float2_rtz(long2); float2 __ovld __cnfn convert_float2_rtp(long2); float2 __ovld __cnfn convert_float2_rtn(long2); float2 __ovld __cnfn convert_float2(long2); float2 __ovld __cnfn convert_float2_rte(ulong2); float2 __ovld __cnfn convert_float2_rtz(ulong2); float2 __ovld __cnfn convert_float2_rtp(ulong2); float2 __ovld __cnfn convert_float2_rtn(ulong2); float2 __ovld __cnfn convert_float2(ulong2); float2 __ovld __cnfn convert_float2_rte(float2); float2 __ovld __cnfn convert_float2_rtz(float2); float2 __ovld __cnfn convert_float2_rtp(float2); float2 __ovld __cnfn convert_float2_rtn(float2); float2 __ovld __cnfn convert_float2(float2); char3 __ovld __cnfn convert_char3_rte(char3); char3 __ovld __cnfn convert_char3_sat_rte(char3); char3 __ovld __cnfn convert_char3_rtz(char3); char3 __ovld __cnfn convert_char3_sat_rtz(char3); char3 __ovld __cnfn convert_char3_rtp(char3); char3 __ovld __cnfn convert_char3_sat_rtp(char3); char3 __ovld __cnfn convert_char3_rtn(char3); char3 __ovld __cnfn convert_char3_sat_rtn(char3); char3 __ovld __cnfn convert_char3(char3); char3 __ovld __cnfn convert_char3_sat(char3); char3 __ovld __cnfn convert_char3_rte(uchar3); char3 __ovld __cnfn convert_char3_sat_rte(uchar3); char3 __ovld __cnfn convert_char3_rtz(uchar3); char3 __ovld __cnfn convert_char3_sat_rtz(uchar3); char3 __ovld __cnfn convert_char3_rtp(uchar3); char3 __ovld __cnfn convert_char3_sat_rtp(uchar3); char3 __ovld __cnfn convert_char3_rtn(uchar3); char3 __ovld __cnfn convert_char3_sat_rtn(uchar3); char3 __ovld __cnfn convert_char3(uchar3); char3 __ovld __cnfn convert_char3_sat(uchar3); char3 __ovld __cnfn convert_char3_rte(short3); char3 __ovld __cnfn convert_char3_sat_rte(short3); char3 __ovld __cnfn convert_char3_rtz(short3); char3 __ovld __cnfn convert_char3_sat_rtz(short3); char3 __ovld __cnfn convert_char3_rtp(short3); char3 __ovld __cnfn convert_char3_sat_rtp(short3); char3 __ovld __cnfn convert_char3_rtn(short3); char3 __ovld __cnfn convert_char3_sat_rtn(short3); char3 __ovld __cnfn convert_char3(short3); char3 __ovld __cnfn convert_char3_sat(short3); char3 __ovld __cnfn convert_char3_rte(ushort3); char3 __ovld __cnfn convert_char3_sat_rte(ushort3); char3 __ovld __cnfn convert_char3_rtz(ushort3); char3 __ovld __cnfn convert_char3_sat_rtz(ushort3); char3 __ovld __cnfn convert_char3_rtp(ushort3); char3 __ovld __cnfn convert_char3_sat_rtp(ushort3); char3 __ovld __cnfn convert_char3_rtn(ushort3); char3 __ovld __cnfn convert_char3_sat_rtn(ushort3); char3 __ovld __cnfn convert_char3(ushort3); char3 __ovld __cnfn convert_char3_sat(ushort3); char3 __ovld __cnfn convert_char3_rte(int3); char3 __ovld __cnfn convert_char3_sat_rte(int3); char3 __ovld __cnfn convert_char3_rtz(int3); char3 __ovld __cnfn convert_char3_sat_rtz(int3); char3 __ovld __cnfn convert_char3_rtp(int3); char3 __ovld __cnfn convert_char3_sat_rtp(int3); char3 __ovld __cnfn convert_char3_rtn(int3); char3 __ovld __cnfn convert_char3_sat_rtn(int3); char3 __ovld __cnfn convert_char3(int3); char3 __ovld __cnfn convert_char3_sat(int3); char3 __ovld __cnfn convert_char3_rte(uint3); char3 __ovld __cnfn convert_char3_sat_rte(uint3); char3 __ovld __cnfn convert_char3_rtz(uint3); char3 __ovld __cnfn convert_char3_sat_rtz(uint3); char3 __ovld __cnfn convert_char3_rtp(uint3); char3 __ovld __cnfn convert_char3_sat_rtp(uint3); char3 __ovld __cnfn convert_char3_rtn(uint3); char3 __ovld __cnfn convert_char3_sat_rtn(uint3); char3 __ovld __cnfn convert_char3(uint3); char3 __ovld __cnfn convert_char3_sat(uint3); char3 __ovld __cnfn convert_char3_rte(long3); char3 __ovld __cnfn convert_char3_sat_rte(long3); char3 __ovld __cnfn convert_char3_rtz(long3); char3 __ovld __cnfn convert_char3_sat_rtz(long3); char3 __ovld __cnfn convert_char3_rtp(long3); char3 __ovld __cnfn convert_char3_sat_rtp(long3); char3 __ovld __cnfn convert_char3_rtn(long3); char3 __ovld __cnfn convert_char3_sat_rtn(long3); char3 __ovld __cnfn convert_char3(long3); char3 __ovld __cnfn convert_char3_sat(long3); char3 __ovld __cnfn convert_char3_rte(ulong3); char3 __ovld __cnfn convert_char3_sat_rte(ulong3); char3 __ovld __cnfn convert_char3_rtz(ulong3); char3 __ovld __cnfn convert_char3_sat_rtz(ulong3); char3 __ovld __cnfn convert_char3_rtp(ulong3); char3 __ovld __cnfn convert_char3_sat_rtp(ulong3); char3 __ovld __cnfn convert_char3_rtn(ulong3); char3 __ovld __cnfn convert_char3_sat_rtn(ulong3); char3 __ovld __cnfn convert_char3(ulong3); char3 __ovld __cnfn convert_char3_sat(ulong3); char3 __ovld __cnfn convert_char3_rte(float3); char3 __ovld __cnfn convert_char3_sat_rte(float3); char3 __ovld __cnfn convert_char3_rtz(float3); char3 __ovld __cnfn convert_char3_sat_rtz(float3); char3 __ovld __cnfn convert_char3_rtp(float3); char3 __ovld __cnfn convert_char3_sat_rtp(float3); char3 __ovld __cnfn convert_char3_rtn(float3); char3 __ovld __cnfn convert_char3_sat_rtn(float3); char3 __ovld __cnfn convert_char3(float3); char3 __ovld __cnfn convert_char3_sat(float3); uchar3 __ovld __cnfn convert_uchar3_rte(char3); uchar3 __ovld __cnfn convert_uchar3_sat_rte(char3); uchar3 __ovld __cnfn convert_uchar3_rtz(char3); uchar3 __ovld __cnfn convert_uchar3_sat_rtz(char3); uchar3 __ovld __cnfn convert_uchar3_rtp(char3); uchar3 __ovld __cnfn convert_uchar3_sat_rtp(char3); uchar3 __ovld __cnfn convert_uchar3_rtn(char3); uchar3 __ovld __cnfn convert_uchar3_sat_rtn(char3); uchar3 __ovld __cnfn convert_uchar3(char3); uchar3 __ovld __cnfn convert_uchar3_sat(char3); uchar3 __ovld __cnfn convert_uchar3_rte(uchar3); uchar3 __ovld __cnfn convert_uchar3_sat_rte(uchar3); uchar3 __ovld __cnfn convert_uchar3_rtz(uchar3); uchar3 __ovld __cnfn convert_uchar3_sat_rtz(uchar3); uchar3 __ovld __cnfn convert_uchar3_rtp(uchar3); uchar3 __ovld __cnfn convert_uchar3_sat_rtp(uchar3); uchar3 __ovld __cnfn convert_uchar3_rtn(uchar3); uchar3 __ovld __cnfn convert_uchar3_sat_rtn(uchar3); uchar3 __ovld __cnfn convert_uchar3(uchar3); uchar3 __ovld __cnfn convert_uchar3_sat(uchar3); uchar3 __ovld __cnfn convert_uchar3_rte(short3); uchar3 __ovld __cnfn convert_uchar3_sat_rte(short3); uchar3 __ovld __cnfn convert_uchar3_rtz(short3); uchar3 __ovld __cnfn convert_uchar3_sat_rtz(short3); uchar3 __ovld __cnfn convert_uchar3_rtp(short3); uchar3 __ovld __cnfn convert_uchar3_sat_rtp(short3); uchar3 __ovld __cnfn convert_uchar3_rtn(short3); uchar3 __ovld __cnfn convert_uchar3_sat_rtn(short3); uchar3 __ovld __cnfn convert_uchar3(short3); uchar3 __ovld __cnfn convert_uchar3_sat(short3); uchar3 __ovld __cnfn convert_uchar3_rte(ushort3); uchar3 __ovld __cnfn convert_uchar3_sat_rte(ushort3); uchar3 __ovld __cnfn convert_uchar3_rtz(ushort3); uchar3 __ovld __cnfn convert_uchar3_sat_rtz(ushort3); uchar3 __ovld __cnfn convert_uchar3_rtp(ushort3); uchar3 __ovld __cnfn convert_uchar3_sat_rtp(ushort3); uchar3 __ovld __cnfn convert_uchar3_rtn(ushort3); uchar3 __ovld __cnfn convert_uchar3_sat_rtn(ushort3); uchar3 __ovld __cnfn convert_uchar3(ushort3); uchar3 __ovld __cnfn convert_uchar3_sat(ushort3); uchar3 __ovld __cnfn convert_uchar3_rte(int3); uchar3 __ovld __cnfn convert_uchar3_sat_rte(int3); uchar3 __ovld __cnfn convert_uchar3_rtz(int3); uchar3 __ovld __cnfn convert_uchar3_sat_rtz(int3); uchar3 __ovld __cnfn convert_uchar3_rtp(int3); uchar3 __ovld __cnfn convert_uchar3_sat_rtp(int3); uchar3 __ovld __cnfn convert_uchar3_rtn(int3); uchar3 __ovld __cnfn convert_uchar3_sat_rtn(int3); uchar3 __ovld __cnfn convert_uchar3(int3); uchar3 __ovld __cnfn convert_uchar3_sat(int3); uchar3 __ovld __cnfn convert_uchar3_rte(uint3); uchar3 __ovld __cnfn convert_uchar3_sat_rte(uint3); uchar3 __ovld __cnfn convert_uchar3_rtz(uint3); uchar3 __ovld __cnfn convert_uchar3_sat_rtz(uint3); uchar3 __ovld __cnfn convert_uchar3_rtp(uint3); uchar3 __ovld __cnfn convert_uchar3_sat_rtp(uint3); uchar3 __ovld __cnfn convert_uchar3_rtn(uint3); uchar3 __ovld __cnfn convert_uchar3_sat_rtn(uint3); uchar3 __ovld __cnfn convert_uchar3(uint3); uchar3 __ovld __cnfn convert_uchar3_sat(uint3); uchar3 __ovld __cnfn convert_uchar3_rte(long3); uchar3 __ovld __cnfn convert_uchar3_sat_rte(long3); uchar3 __ovld __cnfn convert_uchar3_rtz(long3); uchar3 __ovld __cnfn convert_uchar3_sat_rtz(long3); uchar3 __ovld __cnfn convert_uchar3_rtp(long3); uchar3 __ovld __cnfn convert_uchar3_sat_rtp(long3); uchar3 __ovld __cnfn convert_uchar3_rtn(long3); uchar3 __ovld __cnfn convert_uchar3_sat_rtn(long3); uchar3 __ovld __cnfn convert_uchar3(long3); uchar3 __ovld __cnfn convert_uchar3_sat(long3); uchar3 __ovld __cnfn convert_uchar3_rte(ulong3); uchar3 __ovld __cnfn convert_uchar3_sat_rte(ulong3); uchar3 __ovld __cnfn convert_uchar3_rtz(ulong3); uchar3 __ovld __cnfn convert_uchar3_sat_rtz(ulong3); uchar3 __ovld __cnfn convert_uchar3_rtp(ulong3); uchar3 __ovld __cnfn convert_uchar3_sat_rtp(ulong3); uchar3 __ovld __cnfn convert_uchar3_rtn(ulong3); uchar3 __ovld __cnfn convert_uchar3_sat_rtn(ulong3); uchar3 __ovld __cnfn convert_uchar3(ulong3); uchar3 __ovld __cnfn convert_uchar3_sat(ulong3); uchar3 __ovld __cnfn convert_uchar3_rte(float3); uchar3 __ovld __cnfn convert_uchar3_sat_rte(float3); uchar3 __ovld __cnfn convert_uchar3_rtz(float3); uchar3 __ovld __cnfn convert_uchar3_sat_rtz(float3); uchar3 __ovld __cnfn convert_uchar3_rtp(float3); uchar3 __ovld __cnfn convert_uchar3_sat_rtp(float3); uchar3 __ovld __cnfn convert_uchar3_rtn(float3); uchar3 __ovld __cnfn convert_uchar3_sat_rtn(float3); uchar3 __ovld __cnfn convert_uchar3(float3); uchar3 __ovld __cnfn convert_uchar3_sat(float3); short3 __ovld __cnfn convert_short3_rte(char3); short3 __ovld __cnfn convert_short3_sat_rte(char3); short3 __ovld __cnfn convert_short3_rtz(char3); short3 __ovld __cnfn convert_short3_sat_rtz(char3); short3 __ovld __cnfn convert_short3_rtp(char3); short3 __ovld __cnfn convert_short3_sat_rtp(char3); short3 __ovld __cnfn convert_short3_rtn(char3); short3 __ovld __cnfn convert_short3_sat_rtn(char3); short3 __ovld __cnfn convert_short3(char3); short3 __ovld __cnfn convert_short3_sat(char3); short3 __ovld __cnfn convert_short3_rte(uchar3); short3 __ovld __cnfn convert_short3_sat_rte(uchar3); short3 __ovld __cnfn convert_short3_rtz(uchar3); short3 __ovld __cnfn convert_short3_sat_rtz(uchar3); short3 __ovld __cnfn convert_short3_rtp(uchar3); short3 __ovld __cnfn convert_short3_sat_rtp(uchar3); short3 __ovld __cnfn convert_short3_rtn(uchar3); short3 __ovld __cnfn convert_short3_sat_rtn(uchar3); short3 __ovld __cnfn convert_short3(uchar3); short3 __ovld __cnfn convert_short3_sat(uchar3); short3 __ovld __cnfn convert_short3_rte(short3); short3 __ovld __cnfn convert_short3_sat_rte(short3); short3 __ovld __cnfn convert_short3_rtz(short3); short3 __ovld __cnfn convert_short3_sat_rtz(short3); short3 __ovld __cnfn convert_short3_rtp(short3); short3 __ovld __cnfn convert_short3_sat_rtp(short3); short3 __ovld __cnfn convert_short3_rtn(short3); short3 __ovld __cnfn convert_short3_sat_rtn(short3); short3 __ovld __cnfn convert_short3(short3); short3 __ovld __cnfn convert_short3_sat(short3); short3 __ovld __cnfn convert_short3_rte(ushort3); short3 __ovld __cnfn convert_short3_sat_rte(ushort3); short3 __ovld __cnfn convert_short3_rtz(ushort3); short3 __ovld __cnfn convert_short3_sat_rtz(ushort3); short3 __ovld __cnfn convert_short3_rtp(ushort3); short3 __ovld __cnfn convert_short3_sat_rtp(ushort3); short3 __ovld __cnfn convert_short3_rtn(ushort3); short3 __ovld __cnfn convert_short3_sat_rtn(ushort3); short3 __ovld __cnfn convert_short3(ushort3); short3 __ovld __cnfn convert_short3_sat(ushort3); short3 __ovld __cnfn convert_short3_rte(int3); short3 __ovld __cnfn convert_short3_sat_rte(int3); short3 __ovld __cnfn convert_short3_rtz(int3); short3 __ovld __cnfn convert_short3_sat_rtz(int3); short3 __ovld __cnfn convert_short3_rtp(int3); short3 __ovld __cnfn convert_short3_sat_rtp(int3); short3 __ovld __cnfn convert_short3_rtn(int3); short3 __ovld __cnfn convert_short3_sat_rtn(int3); short3 __ovld __cnfn convert_short3(int3); short3 __ovld __cnfn convert_short3_sat(int3); short3 __ovld __cnfn convert_short3_rte(uint3); short3 __ovld __cnfn convert_short3_sat_rte(uint3); short3 __ovld __cnfn convert_short3_rtz(uint3); short3 __ovld __cnfn convert_short3_sat_rtz(uint3); short3 __ovld __cnfn convert_short3_rtp(uint3); short3 __ovld __cnfn convert_short3_sat_rtp(uint3); short3 __ovld __cnfn convert_short3_rtn(uint3); short3 __ovld __cnfn convert_short3_sat_rtn(uint3); short3 __ovld __cnfn convert_short3(uint3); short3 __ovld __cnfn convert_short3_sat(uint3); short3 __ovld __cnfn convert_short3_rte(long3); short3 __ovld __cnfn convert_short3_sat_rte(long3); short3 __ovld __cnfn convert_short3_rtz(long3); short3 __ovld __cnfn convert_short3_sat_rtz(long3); short3 __ovld __cnfn convert_short3_rtp(long3); short3 __ovld __cnfn convert_short3_sat_rtp(long3); short3 __ovld __cnfn convert_short3_rtn(long3); short3 __ovld __cnfn convert_short3_sat_rtn(long3); short3 __ovld __cnfn convert_short3(long3); short3 __ovld __cnfn convert_short3_sat(long3); short3 __ovld __cnfn convert_short3_rte(ulong3); short3 __ovld __cnfn convert_short3_sat_rte(ulong3); short3 __ovld __cnfn convert_short3_rtz(ulong3); short3 __ovld __cnfn convert_short3_sat_rtz(ulong3); short3 __ovld __cnfn convert_short3_rtp(ulong3); short3 __ovld __cnfn convert_short3_sat_rtp(ulong3); short3 __ovld __cnfn convert_short3_rtn(ulong3); short3 __ovld __cnfn convert_short3_sat_rtn(ulong3); short3 __ovld __cnfn convert_short3(ulong3); short3 __ovld __cnfn convert_short3_sat(ulong3); short3 __ovld __cnfn convert_short3_rte(float3); short3 __ovld __cnfn convert_short3_sat_rte(float3); short3 __ovld __cnfn convert_short3_rtz(float3); short3 __ovld __cnfn convert_short3_sat_rtz(float3); short3 __ovld __cnfn convert_short3_rtp(float3); short3 __ovld __cnfn convert_short3_sat_rtp(float3); short3 __ovld __cnfn convert_short3_rtn(float3); short3 __ovld __cnfn convert_short3_sat_rtn(float3); short3 __ovld __cnfn convert_short3(float3); short3 __ovld __cnfn convert_short3_sat(float3); ushort3 __ovld __cnfn convert_ushort3_rte(char3); ushort3 __ovld __cnfn convert_ushort3_sat_rte(char3); ushort3 __ovld __cnfn convert_ushort3_rtz(char3); ushort3 __ovld __cnfn convert_ushort3_sat_rtz(char3); ushort3 __ovld __cnfn convert_ushort3_rtp(char3); ushort3 __ovld __cnfn convert_ushort3_sat_rtp(char3); ushort3 __ovld __cnfn convert_ushort3_rtn(char3); ushort3 __ovld __cnfn convert_ushort3_sat_rtn(char3); ushort3 __ovld __cnfn convert_ushort3(char3); ushort3 __ovld __cnfn convert_ushort3_sat(char3); ushort3 __ovld __cnfn convert_ushort3_rte(uchar3); ushort3 __ovld __cnfn convert_ushort3_sat_rte(uchar3); ushort3 __ovld __cnfn convert_ushort3_rtz(uchar3); ushort3 __ovld __cnfn convert_ushort3_sat_rtz(uchar3); ushort3 __ovld __cnfn convert_ushort3_rtp(uchar3); ushort3 __ovld __cnfn convert_ushort3_sat_rtp(uchar3); ushort3 __ovld __cnfn convert_ushort3_rtn(uchar3); ushort3 __ovld __cnfn convert_ushort3_sat_rtn(uchar3); ushort3 __ovld __cnfn convert_ushort3(uchar3); ushort3 __ovld __cnfn convert_ushort3_sat(uchar3); ushort3 __ovld __cnfn convert_ushort3_rte(short3); ushort3 __ovld __cnfn convert_ushort3_sat_rte(short3); ushort3 __ovld __cnfn convert_ushort3_rtz(short3); ushort3 __ovld __cnfn convert_ushort3_sat_rtz(short3); ushort3 __ovld __cnfn convert_ushort3_rtp(short3); ushort3 __ovld __cnfn convert_ushort3_sat_rtp(short3); ushort3 __ovld __cnfn convert_ushort3_rtn(short3); ushort3 __ovld __cnfn convert_ushort3_sat_rtn(short3); ushort3 __ovld __cnfn convert_ushort3(short3); ushort3 __ovld __cnfn convert_ushort3_sat(short3); ushort3 __ovld __cnfn convert_ushort3_rte(ushort3); ushort3 __ovld __cnfn convert_ushort3_sat_rte(ushort3); ushort3 __ovld __cnfn convert_ushort3_rtz(ushort3); ushort3 __ovld __cnfn convert_ushort3_sat_rtz(ushort3); ushort3 __ovld __cnfn convert_ushort3_rtp(ushort3); ushort3 __ovld __cnfn convert_ushort3_sat_rtp(ushort3); ushort3 __ovld __cnfn convert_ushort3_rtn(ushort3); ushort3 __ovld __cnfn convert_ushort3_sat_rtn(ushort3); ushort3 __ovld __cnfn convert_ushort3(ushort3); ushort3 __ovld __cnfn convert_ushort3_sat(ushort3); ushort3 __ovld __cnfn convert_ushort3_rte(int3); ushort3 __ovld __cnfn convert_ushort3_sat_rte(int3); ushort3 __ovld __cnfn convert_ushort3_rtz(int3); ushort3 __ovld __cnfn convert_ushort3_sat_rtz(int3); ushort3 __ovld __cnfn convert_ushort3_rtp(int3); ushort3 __ovld __cnfn convert_ushort3_sat_rtp(int3); ushort3 __ovld __cnfn convert_ushort3_rtn(int3); ushort3 __ovld __cnfn convert_ushort3_sat_rtn(int3); ushort3 __ovld __cnfn convert_ushort3(int3); ushort3 __ovld __cnfn convert_ushort3_sat(int3); ushort3 __ovld __cnfn convert_ushort3_rte(uint3); ushort3 __ovld __cnfn convert_ushort3_sat_rte(uint3); ushort3 __ovld __cnfn convert_ushort3_rtz(uint3); ushort3 __ovld __cnfn convert_ushort3_sat_rtz(uint3); ushort3 __ovld __cnfn convert_ushort3_rtp(uint3); ushort3 __ovld __cnfn convert_ushort3_sat_rtp(uint3); ushort3 __ovld __cnfn convert_ushort3_rtn(uint3); ushort3 __ovld __cnfn convert_ushort3_sat_rtn(uint3); ushort3 __ovld __cnfn convert_ushort3(uint3); ushort3 __ovld __cnfn convert_ushort3_sat(uint3); ushort3 __ovld __cnfn convert_ushort3_rte(long3); ushort3 __ovld __cnfn convert_ushort3_sat_rte(long3); ushort3 __ovld __cnfn convert_ushort3_rtz(long3); ushort3 __ovld __cnfn convert_ushort3_sat_rtz(long3); ushort3 __ovld __cnfn convert_ushort3_rtp(long3); ushort3 __ovld __cnfn convert_ushort3_sat_rtp(long3); ushort3 __ovld __cnfn convert_ushort3_rtn(long3); ushort3 __ovld __cnfn convert_ushort3_sat_rtn(long3); ushort3 __ovld __cnfn convert_ushort3(long3); ushort3 __ovld __cnfn convert_ushort3_sat(long3); ushort3 __ovld __cnfn convert_ushort3_rte(ulong3); ushort3 __ovld __cnfn convert_ushort3_sat_rte(ulong3); ushort3 __ovld __cnfn convert_ushort3_rtz(ulong3); ushort3 __ovld __cnfn convert_ushort3_sat_rtz(ulong3); ushort3 __ovld __cnfn convert_ushort3_rtp(ulong3); ushort3 __ovld __cnfn convert_ushort3_sat_rtp(ulong3); ushort3 __ovld __cnfn convert_ushort3_rtn(ulong3); ushort3 __ovld __cnfn convert_ushort3_sat_rtn(ulong3); ushort3 __ovld __cnfn convert_ushort3(ulong3); ushort3 __ovld __cnfn convert_ushort3_sat(ulong3); ushort3 __ovld __cnfn convert_ushort3_rte(float3); ushort3 __ovld __cnfn convert_ushort3_sat_rte(float3); ushort3 __ovld __cnfn convert_ushort3_rtz(float3); ushort3 __ovld __cnfn convert_ushort3_sat_rtz(float3); ushort3 __ovld __cnfn convert_ushort3_rtp(float3); ushort3 __ovld __cnfn convert_ushort3_sat_rtp(float3); ushort3 __ovld __cnfn convert_ushort3_rtn(float3); ushort3 __ovld __cnfn convert_ushort3_sat_rtn(float3); ushort3 __ovld __cnfn convert_ushort3(float3); ushort3 __ovld __cnfn convert_ushort3_sat(float3); int3 __ovld __cnfn convert_int3_rte(char3); int3 __ovld __cnfn convert_int3_sat_rte(char3); int3 __ovld __cnfn convert_int3_rtz(char3); int3 __ovld __cnfn convert_int3_sat_rtz(char3); int3 __ovld __cnfn convert_int3_rtp(char3); int3 __ovld __cnfn convert_int3_sat_rtp(char3); int3 __ovld __cnfn convert_int3_rtn(char3); int3 __ovld __cnfn convert_int3_sat_rtn(char3); int3 __ovld __cnfn convert_int3(char3); int3 __ovld __cnfn convert_int3_sat(char3); int3 __ovld __cnfn convert_int3_rte(uchar3); int3 __ovld __cnfn convert_int3_sat_rte(uchar3); int3 __ovld __cnfn convert_int3_rtz(uchar3); int3 __ovld __cnfn convert_int3_sat_rtz(uchar3); int3 __ovld __cnfn convert_int3_rtp(uchar3); int3 __ovld __cnfn convert_int3_sat_rtp(uchar3); int3 __ovld __cnfn convert_int3_rtn(uchar3); int3 __ovld __cnfn convert_int3_sat_rtn(uchar3); int3 __ovld __cnfn convert_int3(uchar3); int3 __ovld __cnfn convert_int3_sat(uchar3); int3 __ovld __cnfn convert_int3_rte(short3); int3 __ovld __cnfn convert_int3_sat_rte(short3); int3 __ovld __cnfn convert_int3_rtz(short3); int3 __ovld __cnfn convert_int3_sat_rtz(short3); int3 __ovld __cnfn convert_int3_rtp(short3); int3 __ovld __cnfn convert_int3_sat_rtp(short3); int3 __ovld __cnfn convert_int3_rtn(short3); int3 __ovld __cnfn convert_int3_sat_rtn(short3); int3 __ovld __cnfn convert_int3(short3); int3 __ovld __cnfn convert_int3_sat(short3); int3 __ovld __cnfn convert_int3_rte(ushort3); int3 __ovld __cnfn convert_int3_sat_rte(ushort3); int3 __ovld __cnfn convert_int3_rtz(ushort3); int3 __ovld __cnfn convert_int3_sat_rtz(ushort3); int3 __ovld __cnfn convert_int3_rtp(ushort3); int3 __ovld __cnfn convert_int3_sat_rtp(ushort3); int3 __ovld __cnfn convert_int3_rtn(ushort3); int3 __ovld __cnfn convert_int3_sat_rtn(ushort3); int3 __ovld __cnfn convert_int3(ushort3); int3 __ovld __cnfn convert_int3_sat(ushort3); int3 __ovld __cnfn convert_int3_rte(int3); int3 __ovld __cnfn convert_int3_sat_rte(int3); int3 __ovld __cnfn convert_int3_rtz(int3); int3 __ovld __cnfn convert_int3_sat_rtz(int3); int3 __ovld __cnfn convert_int3_rtp(int3); int3 __ovld __cnfn convert_int3_sat_rtp(int3); int3 __ovld __cnfn convert_int3_rtn(int3); int3 __ovld __cnfn convert_int3_sat_rtn(int3); int3 __ovld __cnfn convert_int3(int3); int3 __ovld __cnfn convert_int3_sat(int3); int3 __ovld __cnfn convert_int3_rte(uint3); int3 __ovld __cnfn convert_int3_sat_rte(uint3); int3 __ovld __cnfn convert_int3_rtz(uint3); int3 __ovld __cnfn convert_int3_sat_rtz(uint3); int3 __ovld __cnfn convert_int3_rtp(uint3); int3 __ovld __cnfn convert_int3_sat_rtp(uint3); int3 __ovld __cnfn convert_int3_rtn(uint3); int3 __ovld __cnfn convert_int3_sat_rtn(uint3); int3 __ovld __cnfn convert_int3(uint3); int3 __ovld __cnfn convert_int3_sat(uint3); int3 __ovld __cnfn convert_int3_rte(long3); int3 __ovld __cnfn convert_int3_sat_rte(long3); int3 __ovld __cnfn convert_int3_rtz(long3); int3 __ovld __cnfn convert_int3_sat_rtz(long3); int3 __ovld __cnfn convert_int3_rtp(long3); int3 __ovld __cnfn convert_int3_sat_rtp(long3); int3 __ovld __cnfn convert_int3_rtn(long3); int3 __ovld __cnfn convert_int3_sat_rtn(long3); int3 __ovld __cnfn convert_int3(long3); int3 __ovld __cnfn convert_int3_sat(long3); int3 __ovld __cnfn convert_int3_rte(ulong3); int3 __ovld __cnfn convert_int3_sat_rte(ulong3); int3 __ovld __cnfn convert_int3_rtz(ulong3); int3 __ovld __cnfn convert_int3_sat_rtz(ulong3); int3 __ovld __cnfn convert_int3_rtp(ulong3); int3 __ovld __cnfn convert_int3_sat_rtp(ulong3); int3 __ovld __cnfn convert_int3_rtn(ulong3); int3 __ovld __cnfn convert_int3_sat_rtn(ulong3); int3 __ovld __cnfn convert_int3(ulong3); int3 __ovld __cnfn convert_int3_sat(ulong3); int3 __ovld __cnfn convert_int3_rte(float3); int3 __ovld __cnfn convert_int3_sat_rte(float3); int3 __ovld __cnfn convert_int3_rtz(float3); int3 __ovld __cnfn convert_int3_sat_rtz(float3); int3 __ovld __cnfn convert_int3_rtp(float3); int3 __ovld __cnfn convert_int3_sat_rtp(float3); int3 __ovld __cnfn convert_int3_rtn(float3); int3 __ovld __cnfn convert_int3_sat_rtn(float3); int3 __ovld __cnfn convert_int3(float3); int3 __ovld __cnfn convert_int3_sat(float3); uint3 __ovld __cnfn convert_uint3_rte(char3); uint3 __ovld __cnfn convert_uint3_sat_rte(char3); uint3 __ovld __cnfn convert_uint3_rtz(char3); uint3 __ovld __cnfn convert_uint3_sat_rtz(char3); uint3 __ovld __cnfn convert_uint3_rtp(char3); uint3 __ovld __cnfn convert_uint3_sat_rtp(char3); uint3 __ovld __cnfn convert_uint3_rtn(char3); uint3 __ovld __cnfn convert_uint3_sat_rtn(char3); uint3 __ovld __cnfn convert_uint3(char3); uint3 __ovld __cnfn convert_uint3_sat(char3); uint3 __ovld __cnfn convert_uint3_rte(uchar3); uint3 __ovld __cnfn convert_uint3_sat_rte(uchar3); uint3 __ovld __cnfn convert_uint3_rtz(uchar3); uint3 __ovld __cnfn convert_uint3_sat_rtz(uchar3); uint3 __ovld __cnfn convert_uint3_rtp(uchar3); uint3 __ovld __cnfn convert_uint3_sat_rtp(uchar3); uint3 __ovld __cnfn convert_uint3_rtn(uchar3); uint3 __ovld __cnfn convert_uint3_sat_rtn(uchar3); uint3 __ovld __cnfn convert_uint3(uchar3); uint3 __ovld __cnfn convert_uint3_sat(uchar3); uint3 __ovld __cnfn convert_uint3_rte(short3); uint3 __ovld __cnfn convert_uint3_sat_rte(short3); uint3 __ovld __cnfn convert_uint3_rtz(short3); uint3 __ovld __cnfn convert_uint3_sat_rtz(short3); uint3 __ovld __cnfn convert_uint3_rtp(short3); uint3 __ovld __cnfn convert_uint3_sat_rtp(short3); uint3 __ovld __cnfn convert_uint3_rtn(short3); uint3 __ovld __cnfn convert_uint3_sat_rtn(short3); uint3 __ovld __cnfn convert_uint3(short3); uint3 __ovld __cnfn convert_uint3_sat(short3); uint3 __ovld __cnfn convert_uint3_rte(ushort3); uint3 __ovld __cnfn convert_uint3_sat_rte(ushort3); uint3 __ovld __cnfn convert_uint3_rtz(ushort3); uint3 __ovld __cnfn convert_uint3_sat_rtz(ushort3); uint3 __ovld __cnfn convert_uint3_rtp(ushort3); uint3 __ovld __cnfn convert_uint3_sat_rtp(ushort3); uint3 __ovld __cnfn convert_uint3_rtn(ushort3); uint3 __ovld __cnfn convert_uint3_sat_rtn(ushort3); uint3 __ovld __cnfn convert_uint3(ushort3); uint3 __ovld __cnfn convert_uint3_sat(ushort3); uint3 __ovld __cnfn convert_uint3_rte(int3); uint3 __ovld __cnfn convert_uint3_sat_rte(int3); uint3 __ovld __cnfn convert_uint3_rtz(int3); uint3 __ovld __cnfn convert_uint3_sat_rtz(int3); uint3 __ovld __cnfn convert_uint3_rtp(int3); uint3 __ovld __cnfn convert_uint3_sat_rtp(int3); uint3 __ovld __cnfn convert_uint3_rtn(int3); uint3 __ovld __cnfn convert_uint3_sat_rtn(int3); uint3 __ovld __cnfn convert_uint3(int3); uint3 __ovld __cnfn convert_uint3_sat(int3); uint3 __ovld __cnfn convert_uint3_rte(uint3); uint3 __ovld __cnfn convert_uint3_sat_rte(uint3); uint3 __ovld __cnfn convert_uint3_rtz(uint3); uint3 __ovld __cnfn convert_uint3_sat_rtz(uint3); uint3 __ovld __cnfn convert_uint3_rtp(uint3); uint3 __ovld __cnfn convert_uint3_sat_rtp(uint3); uint3 __ovld __cnfn convert_uint3_rtn(uint3); uint3 __ovld __cnfn convert_uint3_sat_rtn(uint3); uint3 __ovld __cnfn convert_uint3(uint3); uint3 __ovld __cnfn convert_uint3_sat(uint3); uint3 __ovld __cnfn convert_uint3_rte(long3); uint3 __ovld __cnfn convert_uint3_sat_rte(long3); uint3 __ovld __cnfn convert_uint3_rtz(long3); uint3 __ovld __cnfn convert_uint3_sat_rtz(long3); uint3 __ovld __cnfn convert_uint3_rtp(long3); uint3 __ovld __cnfn convert_uint3_sat_rtp(long3); uint3 __ovld __cnfn convert_uint3_rtn(long3); uint3 __ovld __cnfn convert_uint3_sat_rtn(long3); uint3 __ovld __cnfn convert_uint3(long3); uint3 __ovld __cnfn convert_uint3_sat(long3); uint3 __ovld __cnfn convert_uint3_rte(ulong3); uint3 __ovld __cnfn convert_uint3_sat_rte(ulong3); uint3 __ovld __cnfn convert_uint3_rtz(ulong3); uint3 __ovld __cnfn convert_uint3_sat_rtz(ulong3); uint3 __ovld __cnfn convert_uint3_rtp(ulong3); uint3 __ovld __cnfn convert_uint3_sat_rtp(ulong3); uint3 __ovld __cnfn convert_uint3_rtn(ulong3); uint3 __ovld __cnfn convert_uint3_sat_rtn(ulong3); uint3 __ovld __cnfn convert_uint3(ulong3); uint3 __ovld __cnfn convert_uint3_sat(ulong3); uint3 __ovld __cnfn convert_uint3_rte(float3); uint3 __ovld __cnfn convert_uint3_sat_rte(float3); uint3 __ovld __cnfn convert_uint3_rtz(float3); uint3 __ovld __cnfn convert_uint3_sat_rtz(float3); uint3 __ovld __cnfn convert_uint3_rtp(float3); uint3 __ovld __cnfn convert_uint3_sat_rtp(float3); uint3 __ovld __cnfn convert_uint3_rtn(float3); uint3 __ovld __cnfn convert_uint3_sat_rtn(float3); uint3 __ovld __cnfn convert_uint3(float3); uint3 __ovld __cnfn convert_uint3_sat(float3); long3 __ovld __cnfn convert_long3_rte(char3); long3 __ovld __cnfn convert_long3_sat_rte(char3); long3 __ovld __cnfn convert_long3_rtz(char3); long3 __ovld __cnfn convert_long3_sat_rtz(char3); long3 __ovld __cnfn convert_long3_rtp(char3); long3 __ovld __cnfn convert_long3_sat_rtp(char3); long3 __ovld __cnfn convert_long3_rtn(char3); long3 __ovld __cnfn convert_long3_sat_rtn(char3); long3 __ovld __cnfn convert_long3(char3); long3 __ovld __cnfn convert_long3_sat(char3); long3 __ovld __cnfn convert_long3_rte(uchar3); long3 __ovld __cnfn convert_long3_sat_rte(uchar3); long3 __ovld __cnfn convert_long3_rtz(uchar3); long3 __ovld __cnfn convert_long3_sat_rtz(uchar3); long3 __ovld __cnfn convert_long3_rtp(uchar3); long3 __ovld __cnfn convert_long3_sat_rtp(uchar3); long3 __ovld __cnfn convert_long3_rtn(uchar3); long3 __ovld __cnfn convert_long3_sat_rtn(uchar3); long3 __ovld __cnfn convert_long3(uchar3); long3 __ovld __cnfn convert_long3_sat(uchar3); long3 __ovld __cnfn convert_long3_rte(short3); long3 __ovld __cnfn convert_long3_sat_rte(short3); long3 __ovld __cnfn convert_long3_rtz(short3); long3 __ovld __cnfn convert_long3_sat_rtz(short3); long3 __ovld __cnfn convert_long3_rtp(short3); long3 __ovld __cnfn convert_long3_sat_rtp(short3); long3 __ovld __cnfn convert_long3_rtn(short3); long3 __ovld __cnfn convert_long3_sat_rtn(short3); long3 __ovld __cnfn convert_long3(short3); long3 __ovld __cnfn convert_long3_sat(short3); long3 __ovld __cnfn convert_long3_rte(ushort3); long3 __ovld __cnfn convert_long3_sat_rte(ushort3); long3 __ovld __cnfn convert_long3_rtz(ushort3); long3 __ovld __cnfn convert_long3_sat_rtz(ushort3); long3 __ovld __cnfn convert_long3_rtp(ushort3); long3 __ovld __cnfn convert_long3_sat_rtp(ushort3); long3 __ovld __cnfn convert_long3_rtn(ushort3); long3 __ovld __cnfn convert_long3_sat_rtn(ushort3); long3 __ovld __cnfn convert_long3(ushort3); long3 __ovld __cnfn convert_long3_sat(ushort3); long3 __ovld __cnfn convert_long3_rte(int3); long3 __ovld __cnfn convert_long3_sat_rte(int3); long3 __ovld __cnfn convert_long3_rtz(int3); long3 __ovld __cnfn convert_long3_sat_rtz(int3); long3 __ovld __cnfn convert_long3_rtp(int3); long3 __ovld __cnfn convert_long3_sat_rtp(int3); long3 __ovld __cnfn convert_long3_rtn(int3); long3 __ovld __cnfn convert_long3_sat_rtn(int3); long3 __ovld __cnfn convert_long3(int3); long3 __ovld __cnfn convert_long3_sat(int3); long3 __ovld __cnfn convert_long3_rte(uint3); long3 __ovld __cnfn convert_long3_sat_rte(uint3); long3 __ovld __cnfn convert_long3_rtz(uint3); long3 __ovld __cnfn convert_long3_sat_rtz(uint3); long3 __ovld __cnfn convert_long3_rtp(uint3); long3 __ovld __cnfn convert_long3_sat_rtp(uint3); long3 __ovld __cnfn convert_long3_rtn(uint3); long3 __ovld __cnfn convert_long3_sat_rtn(uint3); long3 __ovld __cnfn convert_long3(uint3); long3 __ovld __cnfn convert_long3_sat(uint3); long3 __ovld __cnfn convert_long3_rte(long3); long3 __ovld __cnfn convert_long3_sat_rte(long3); long3 __ovld __cnfn convert_long3_rtz(long3); long3 __ovld __cnfn convert_long3_sat_rtz(long3); long3 __ovld __cnfn convert_long3_rtp(long3); long3 __ovld __cnfn convert_long3_sat_rtp(long3); long3 __ovld __cnfn convert_long3_rtn(long3); long3 __ovld __cnfn convert_long3_sat_rtn(long3); long3 __ovld __cnfn convert_long3(long3); long3 __ovld __cnfn convert_long3_sat(long3); long3 __ovld __cnfn convert_long3_rte(ulong3); long3 __ovld __cnfn convert_long3_sat_rte(ulong3); long3 __ovld __cnfn convert_long3_rtz(ulong3); long3 __ovld __cnfn convert_long3_sat_rtz(ulong3); long3 __ovld __cnfn convert_long3_rtp(ulong3); long3 __ovld __cnfn convert_long3_sat_rtp(ulong3); long3 __ovld __cnfn convert_long3_rtn(ulong3); long3 __ovld __cnfn convert_long3_sat_rtn(ulong3); long3 __ovld __cnfn convert_long3(ulong3); long3 __ovld __cnfn convert_long3_sat(ulong3); long3 __ovld __cnfn convert_long3_rte(float3); long3 __ovld __cnfn convert_long3_sat_rte(float3); long3 __ovld __cnfn convert_long3_rtz(float3); long3 __ovld __cnfn convert_long3_sat_rtz(float3); long3 __ovld __cnfn convert_long3_rtp(float3); long3 __ovld __cnfn convert_long3_sat_rtp(float3); long3 __ovld __cnfn convert_long3_rtn(float3); long3 __ovld __cnfn convert_long3_sat_rtn(float3); long3 __ovld __cnfn convert_long3(float3); long3 __ovld __cnfn convert_long3_sat(float3); ulong3 __ovld __cnfn convert_ulong3_rte(char3); ulong3 __ovld __cnfn convert_ulong3_sat_rte(char3); ulong3 __ovld __cnfn convert_ulong3_rtz(char3); ulong3 __ovld __cnfn convert_ulong3_sat_rtz(char3); ulong3 __ovld __cnfn convert_ulong3_rtp(char3); ulong3 __ovld __cnfn convert_ulong3_sat_rtp(char3); ulong3 __ovld __cnfn convert_ulong3_rtn(char3); ulong3 __ovld __cnfn convert_ulong3_sat_rtn(char3); ulong3 __ovld __cnfn convert_ulong3(char3); ulong3 __ovld __cnfn convert_ulong3_sat(char3); ulong3 __ovld __cnfn convert_ulong3_rte(uchar3); ulong3 __ovld __cnfn convert_ulong3_sat_rte(uchar3); ulong3 __ovld __cnfn convert_ulong3_rtz(uchar3); ulong3 __ovld __cnfn convert_ulong3_sat_rtz(uchar3); ulong3 __ovld __cnfn convert_ulong3_rtp(uchar3); ulong3 __ovld __cnfn convert_ulong3_sat_rtp(uchar3); ulong3 __ovld __cnfn convert_ulong3_rtn(uchar3); ulong3 __ovld __cnfn convert_ulong3_sat_rtn(uchar3); ulong3 __ovld __cnfn convert_ulong3(uchar3); ulong3 __ovld __cnfn convert_ulong3_sat(uchar3); ulong3 __ovld __cnfn convert_ulong3_rte(short3); ulong3 __ovld __cnfn convert_ulong3_sat_rte(short3); ulong3 __ovld __cnfn convert_ulong3_rtz(short3); ulong3 __ovld __cnfn convert_ulong3_sat_rtz(short3); ulong3 __ovld __cnfn convert_ulong3_rtp(short3); ulong3 __ovld __cnfn convert_ulong3_sat_rtp(short3); ulong3 __ovld __cnfn convert_ulong3_rtn(short3); ulong3 __ovld __cnfn convert_ulong3_sat_rtn(short3); ulong3 __ovld __cnfn convert_ulong3(short3); ulong3 __ovld __cnfn convert_ulong3_sat(short3); ulong3 __ovld __cnfn convert_ulong3_rte(ushort3); ulong3 __ovld __cnfn convert_ulong3_sat_rte(ushort3); ulong3 __ovld __cnfn convert_ulong3_rtz(ushort3); ulong3 __ovld __cnfn convert_ulong3_sat_rtz(ushort3); ulong3 __ovld __cnfn convert_ulong3_rtp(ushort3); ulong3 __ovld __cnfn convert_ulong3_sat_rtp(ushort3); ulong3 __ovld __cnfn convert_ulong3_rtn(ushort3); ulong3 __ovld __cnfn convert_ulong3_sat_rtn(ushort3); ulong3 __ovld __cnfn convert_ulong3(ushort3); ulong3 __ovld __cnfn convert_ulong3_sat(ushort3); ulong3 __ovld __cnfn convert_ulong3_rte(int3); ulong3 __ovld __cnfn convert_ulong3_sat_rte(int3); ulong3 __ovld __cnfn convert_ulong3_rtz(int3); ulong3 __ovld __cnfn convert_ulong3_sat_rtz(int3); ulong3 __ovld __cnfn convert_ulong3_rtp(int3); ulong3 __ovld __cnfn convert_ulong3_sat_rtp(int3); ulong3 __ovld __cnfn convert_ulong3_rtn(int3); ulong3 __ovld __cnfn convert_ulong3_sat_rtn(int3); ulong3 __ovld __cnfn convert_ulong3(int3); ulong3 __ovld __cnfn convert_ulong3_sat(int3); ulong3 __ovld __cnfn convert_ulong3_rte(uint3); ulong3 __ovld __cnfn convert_ulong3_sat_rte(uint3); ulong3 __ovld __cnfn convert_ulong3_rtz(uint3); ulong3 __ovld __cnfn convert_ulong3_sat_rtz(uint3); ulong3 __ovld __cnfn convert_ulong3_rtp(uint3); ulong3 __ovld __cnfn convert_ulong3_sat_rtp(uint3); ulong3 __ovld __cnfn convert_ulong3_rtn(uint3); ulong3 __ovld __cnfn convert_ulong3_sat_rtn(uint3); ulong3 __ovld __cnfn convert_ulong3(uint3); ulong3 __ovld __cnfn convert_ulong3_sat(uint3); ulong3 __ovld __cnfn convert_ulong3_rte(long3); ulong3 __ovld __cnfn convert_ulong3_sat_rte(long3); ulong3 __ovld __cnfn convert_ulong3_rtz(long3); ulong3 __ovld __cnfn convert_ulong3_sat_rtz(long3); ulong3 __ovld __cnfn convert_ulong3_rtp(long3); ulong3 __ovld __cnfn convert_ulong3_sat_rtp(long3); ulong3 __ovld __cnfn convert_ulong3_rtn(long3); ulong3 __ovld __cnfn convert_ulong3_sat_rtn(long3); ulong3 __ovld __cnfn convert_ulong3(long3); ulong3 __ovld __cnfn convert_ulong3_sat(long3); ulong3 __ovld __cnfn convert_ulong3_rte(ulong3); ulong3 __ovld __cnfn convert_ulong3_sat_rte(ulong3); ulong3 __ovld __cnfn convert_ulong3_rtz(ulong3); ulong3 __ovld __cnfn convert_ulong3_sat_rtz(ulong3); ulong3 __ovld __cnfn convert_ulong3_rtp(ulong3); ulong3 __ovld __cnfn convert_ulong3_sat_rtp(ulong3); ulong3 __ovld __cnfn convert_ulong3_rtn(ulong3); ulong3 __ovld __cnfn convert_ulong3_sat_rtn(ulong3); ulong3 __ovld __cnfn convert_ulong3(ulong3); ulong3 __ovld __cnfn convert_ulong3_sat(ulong3); ulong3 __ovld __cnfn convert_ulong3_rte(float3); ulong3 __ovld __cnfn convert_ulong3_sat_rte(float3); ulong3 __ovld __cnfn convert_ulong3_rtz(float3); ulong3 __ovld __cnfn convert_ulong3_sat_rtz(float3); ulong3 __ovld __cnfn convert_ulong3_rtp(float3); ulong3 __ovld __cnfn convert_ulong3_sat_rtp(float3); ulong3 __ovld __cnfn convert_ulong3_rtn(float3); ulong3 __ovld __cnfn convert_ulong3_sat_rtn(float3); ulong3 __ovld __cnfn convert_ulong3(float3); ulong3 __ovld __cnfn convert_ulong3_sat(float3); float3 __ovld __cnfn convert_float3_rte(char3); float3 __ovld __cnfn convert_float3_rtz(char3); float3 __ovld __cnfn convert_float3_rtp(char3); float3 __ovld __cnfn convert_float3_rtn(char3); float3 __ovld __cnfn convert_float3(char3); float3 __ovld __cnfn convert_float3_rte(uchar3); float3 __ovld __cnfn convert_float3_rtz(uchar3); float3 __ovld __cnfn convert_float3_rtp(uchar3); float3 __ovld __cnfn convert_float3_rtn(uchar3); float3 __ovld __cnfn convert_float3(uchar3); float3 __ovld __cnfn convert_float3_rte(short3); float3 __ovld __cnfn convert_float3_rtz(short3); float3 __ovld __cnfn convert_float3_rtp(short3); float3 __ovld __cnfn convert_float3_rtn(short3); float3 __ovld __cnfn convert_float3(short3); float3 __ovld __cnfn convert_float3_rte(ushort3); float3 __ovld __cnfn convert_float3_rtz(ushort3); float3 __ovld __cnfn convert_float3_rtp(ushort3); float3 __ovld __cnfn convert_float3_rtn(ushort3); float3 __ovld __cnfn convert_float3(ushort3); float3 __ovld __cnfn convert_float3_rte(int3); float3 __ovld __cnfn convert_float3_rtz(int3); float3 __ovld __cnfn convert_float3_rtp(int3); float3 __ovld __cnfn convert_float3_rtn(int3); float3 __ovld __cnfn convert_float3(int3); float3 __ovld __cnfn convert_float3_rte(uint3); float3 __ovld __cnfn convert_float3_rtz(uint3); float3 __ovld __cnfn convert_float3_rtp(uint3); float3 __ovld __cnfn convert_float3_rtn(uint3); float3 __ovld __cnfn convert_float3(uint3); float3 __ovld __cnfn convert_float3_rte(long3); float3 __ovld __cnfn convert_float3_rtz(long3); float3 __ovld __cnfn convert_float3_rtp(long3); float3 __ovld __cnfn convert_float3_rtn(long3); float3 __ovld __cnfn convert_float3(long3); float3 __ovld __cnfn convert_float3_rte(ulong3); float3 __ovld __cnfn convert_float3_rtz(ulong3); float3 __ovld __cnfn convert_float3_rtp(ulong3); float3 __ovld __cnfn convert_float3_rtn(ulong3); float3 __ovld __cnfn convert_float3(ulong3); float3 __ovld __cnfn convert_float3_rte(float3); float3 __ovld __cnfn convert_float3_rtz(float3); float3 __ovld __cnfn convert_float3_rtp(float3); float3 __ovld __cnfn convert_float3_rtn(float3); float3 __ovld __cnfn convert_float3(float3); char4 __ovld __cnfn convert_char4_rte(char4); char4 __ovld __cnfn convert_char4_sat_rte(char4); char4 __ovld __cnfn convert_char4_rtz(char4); char4 __ovld __cnfn convert_char4_sat_rtz(char4); char4 __ovld __cnfn convert_char4_rtp(char4); char4 __ovld __cnfn convert_char4_sat_rtp(char4); char4 __ovld __cnfn convert_char4_rtn(char4); char4 __ovld __cnfn convert_char4_sat_rtn(char4); char4 __ovld __cnfn convert_char4(char4); char4 __ovld __cnfn convert_char4_sat(char4); char4 __ovld __cnfn convert_char4_rte(uchar4); char4 __ovld __cnfn convert_char4_sat_rte(uchar4); char4 __ovld __cnfn convert_char4_rtz(uchar4); char4 __ovld __cnfn convert_char4_sat_rtz(uchar4); char4 __ovld __cnfn convert_char4_rtp(uchar4); char4 __ovld __cnfn convert_char4_sat_rtp(uchar4); char4 __ovld __cnfn convert_char4_rtn(uchar4); char4 __ovld __cnfn convert_char4_sat_rtn(uchar4); char4 __ovld __cnfn convert_char4(uchar4); char4 __ovld __cnfn convert_char4_sat(uchar4); char4 __ovld __cnfn convert_char4_rte(short4); char4 __ovld __cnfn convert_char4_sat_rte(short4); char4 __ovld __cnfn convert_char4_rtz(short4); char4 __ovld __cnfn convert_char4_sat_rtz(short4); char4 __ovld __cnfn convert_char4_rtp(short4); char4 __ovld __cnfn convert_char4_sat_rtp(short4); char4 __ovld __cnfn convert_char4_rtn(short4); char4 __ovld __cnfn convert_char4_sat_rtn(short4); char4 __ovld __cnfn convert_char4(short4); char4 __ovld __cnfn convert_char4_sat(short4); char4 __ovld __cnfn convert_char4_rte(ushort4); char4 __ovld __cnfn convert_char4_sat_rte(ushort4); char4 __ovld __cnfn convert_char4_rtz(ushort4); char4 __ovld __cnfn convert_char4_sat_rtz(ushort4); char4 __ovld __cnfn convert_char4_rtp(ushort4); char4 __ovld __cnfn convert_char4_sat_rtp(ushort4); char4 __ovld __cnfn convert_char4_rtn(ushort4); char4 __ovld __cnfn convert_char4_sat_rtn(ushort4); char4 __ovld __cnfn convert_char4(ushort4); char4 __ovld __cnfn convert_char4_sat(ushort4); char4 __ovld __cnfn convert_char4_rte(int4); char4 __ovld __cnfn convert_char4_sat_rte(int4); char4 __ovld __cnfn convert_char4_rtz(int4); char4 __ovld __cnfn convert_char4_sat_rtz(int4); char4 __ovld __cnfn convert_char4_rtp(int4); char4 __ovld __cnfn convert_char4_sat_rtp(int4); char4 __ovld __cnfn convert_char4_rtn(int4); char4 __ovld __cnfn convert_char4_sat_rtn(int4); char4 __ovld __cnfn convert_char4(int4); char4 __ovld __cnfn convert_char4_sat(int4); char4 __ovld __cnfn convert_char4_rte(uint4); char4 __ovld __cnfn convert_char4_sat_rte(uint4); char4 __ovld __cnfn convert_char4_rtz(uint4); char4 __ovld __cnfn convert_char4_sat_rtz(uint4); char4 __ovld __cnfn convert_char4_rtp(uint4); char4 __ovld __cnfn convert_char4_sat_rtp(uint4); char4 __ovld __cnfn convert_char4_rtn(uint4); char4 __ovld __cnfn convert_char4_sat_rtn(uint4); char4 __ovld __cnfn convert_char4(uint4); char4 __ovld __cnfn convert_char4_sat(uint4); char4 __ovld __cnfn convert_char4_rte(long4); char4 __ovld __cnfn convert_char4_sat_rte(long4); char4 __ovld __cnfn convert_char4_rtz(long4); char4 __ovld __cnfn convert_char4_sat_rtz(long4); char4 __ovld __cnfn convert_char4_rtp(long4); char4 __ovld __cnfn convert_char4_sat_rtp(long4); char4 __ovld __cnfn convert_char4_rtn(long4); char4 __ovld __cnfn convert_char4_sat_rtn(long4); char4 __ovld __cnfn convert_char4(long4); char4 __ovld __cnfn convert_char4_sat(long4); char4 __ovld __cnfn convert_char4_rte(ulong4); char4 __ovld __cnfn convert_char4_sat_rte(ulong4); char4 __ovld __cnfn convert_char4_rtz(ulong4); char4 __ovld __cnfn convert_char4_sat_rtz(ulong4); char4 __ovld __cnfn convert_char4_rtp(ulong4); char4 __ovld __cnfn convert_char4_sat_rtp(ulong4); char4 __ovld __cnfn convert_char4_rtn(ulong4); char4 __ovld __cnfn convert_char4_sat_rtn(ulong4); char4 __ovld __cnfn convert_char4(ulong4); char4 __ovld __cnfn convert_char4_sat(ulong4); char4 __ovld __cnfn convert_char4_rte(float4); char4 __ovld __cnfn convert_char4_sat_rte(float4); char4 __ovld __cnfn convert_char4_rtz(float4); char4 __ovld __cnfn convert_char4_sat_rtz(float4); char4 __ovld __cnfn convert_char4_rtp(float4); char4 __ovld __cnfn convert_char4_sat_rtp(float4); char4 __ovld __cnfn convert_char4_rtn(float4); char4 __ovld __cnfn convert_char4_sat_rtn(float4); char4 __ovld __cnfn convert_char4(float4); char4 __ovld __cnfn convert_char4_sat(float4); uchar4 __ovld __cnfn convert_uchar4_rte(char4); uchar4 __ovld __cnfn convert_uchar4_sat_rte(char4); uchar4 __ovld __cnfn convert_uchar4_rtz(char4); uchar4 __ovld __cnfn convert_uchar4_sat_rtz(char4); uchar4 __ovld __cnfn convert_uchar4_rtp(char4); uchar4 __ovld __cnfn convert_uchar4_sat_rtp(char4); uchar4 __ovld __cnfn convert_uchar4_rtn(char4); uchar4 __ovld __cnfn convert_uchar4_sat_rtn(char4); uchar4 __ovld __cnfn convert_uchar4(char4); uchar4 __ovld __cnfn convert_uchar4_sat(char4); uchar4 __ovld __cnfn convert_uchar4_rte(uchar4); uchar4 __ovld __cnfn convert_uchar4_sat_rte(uchar4); uchar4 __ovld __cnfn convert_uchar4_rtz(uchar4); uchar4 __ovld __cnfn convert_uchar4_sat_rtz(uchar4); uchar4 __ovld __cnfn convert_uchar4_rtp(uchar4); uchar4 __ovld __cnfn convert_uchar4_sat_rtp(uchar4); uchar4 __ovld __cnfn convert_uchar4_rtn(uchar4); uchar4 __ovld __cnfn convert_uchar4_sat_rtn(uchar4); uchar4 __ovld __cnfn convert_uchar4(uchar4); uchar4 __ovld __cnfn convert_uchar4_sat(uchar4); uchar4 __ovld __cnfn convert_uchar4_rte(short4); uchar4 __ovld __cnfn convert_uchar4_sat_rte(short4); uchar4 __ovld __cnfn convert_uchar4_rtz(short4); uchar4 __ovld __cnfn convert_uchar4_sat_rtz(short4); uchar4 __ovld __cnfn convert_uchar4_rtp(short4); uchar4 __ovld __cnfn convert_uchar4_sat_rtp(short4); uchar4 __ovld __cnfn convert_uchar4_rtn(short4); uchar4 __ovld __cnfn convert_uchar4_sat_rtn(short4); uchar4 __ovld __cnfn convert_uchar4(short4); uchar4 __ovld __cnfn convert_uchar4_sat(short4); uchar4 __ovld __cnfn convert_uchar4_rte(ushort4); uchar4 __ovld __cnfn convert_uchar4_sat_rte(ushort4); uchar4 __ovld __cnfn convert_uchar4_rtz(ushort4); uchar4 __ovld __cnfn convert_uchar4_sat_rtz(ushort4); uchar4 __ovld __cnfn convert_uchar4_rtp(ushort4); uchar4 __ovld __cnfn convert_uchar4_sat_rtp(ushort4); uchar4 __ovld __cnfn convert_uchar4_rtn(ushort4); uchar4 __ovld __cnfn convert_uchar4_sat_rtn(ushort4); uchar4 __ovld __cnfn convert_uchar4(ushort4); uchar4 __ovld __cnfn convert_uchar4_sat(ushort4); uchar4 __ovld __cnfn convert_uchar4_rte(int4); uchar4 __ovld __cnfn convert_uchar4_sat_rte(int4); uchar4 __ovld __cnfn convert_uchar4_rtz(int4); uchar4 __ovld __cnfn convert_uchar4_sat_rtz(int4); uchar4 __ovld __cnfn convert_uchar4_rtp(int4); uchar4 __ovld __cnfn convert_uchar4_sat_rtp(int4); uchar4 __ovld __cnfn convert_uchar4_rtn(int4); uchar4 __ovld __cnfn convert_uchar4_sat_rtn(int4); uchar4 __ovld __cnfn convert_uchar4(int4); uchar4 __ovld __cnfn convert_uchar4_sat(int4); uchar4 __ovld __cnfn convert_uchar4_rte(uint4); uchar4 __ovld __cnfn convert_uchar4_sat_rte(uint4); uchar4 __ovld __cnfn convert_uchar4_rtz(uint4); uchar4 __ovld __cnfn convert_uchar4_sat_rtz(uint4); uchar4 __ovld __cnfn convert_uchar4_rtp(uint4); uchar4 __ovld __cnfn convert_uchar4_sat_rtp(uint4); uchar4 __ovld __cnfn convert_uchar4_rtn(uint4); uchar4 __ovld __cnfn convert_uchar4_sat_rtn(uint4); uchar4 __ovld __cnfn convert_uchar4(uint4); uchar4 __ovld __cnfn convert_uchar4_sat(uint4); uchar4 __ovld __cnfn convert_uchar4_rte(long4); uchar4 __ovld __cnfn convert_uchar4_sat_rte(long4); uchar4 __ovld __cnfn convert_uchar4_rtz(long4); uchar4 __ovld __cnfn convert_uchar4_sat_rtz(long4); uchar4 __ovld __cnfn convert_uchar4_rtp(long4); uchar4 __ovld __cnfn convert_uchar4_sat_rtp(long4); uchar4 __ovld __cnfn convert_uchar4_rtn(long4); uchar4 __ovld __cnfn convert_uchar4_sat_rtn(long4); uchar4 __ovld __cnfn convert_uchar4(long4); uchar4 __ovld __cnfn convert_uchar4_sat(long4); uchar4 __ovld __cnfn convert_uchar4_rte(ulong4); uchar4 __ovld __cnfn convert_uchar4_sat_rte(ulong4); uchar4 __ovld __cnfn convert_uchar4_rtz(ulong4); uchar4 __ovld __cnfn convert_uchar4_sat_rtz(ulong4); uchar4 __ovld __cnfn convert_uchar4_rtp(ulong4); uchar4 __ovld __cnfn convert_uchar4_sat_rtp(ulong4); uchar4 __ovld __cnfn convert_uchar4_rtn(ulong4); uchar4 __ovld __cnfn convert_uchar4_sat_rtn(ulong4); uchar4 __ovld __cnfn convert_uchar4(ulong4); uchar4 __ovld __cnfn convert_uchar4_sat(ulong4); uchar4 __ovld __cnfn convert_uchar4_rte(float4); uchar4 __ovld __cnfn convert_uchar4_sat_rte(float4); uchar4 __ovld __cnfn convert_uchar4_rtz(float4); uchar4 __ovld __cnfn convert_uchar4_sat_rtz(float4); uchar4 __ovld __cnfn convert_uchar4_rtp(float4); uchar4 __ovld __cnfn convert_uchar4_sat_rtp(float4); uchar4 __ovld __cnfn convert_uchar4_rtn(float4); uchar4 __ovld __cnfn convert_uchar4_sat_rtn(float4); uchar4 __ovld __cnfn convert_uchar4(float4); uchar4 __ovld __cnfn convert_uchar4_sat(float4); short4 __ovld __cnfn convert_short4_rte(char4); short4 __ovld __cnfn convert_short4_sat_rte(char4); short4 __ovld __cnfn convert_short4_rtz(char4); short4 __ovld __cnfn convert_short4_sat_rtz(char4); short4 __ovld __cnfn convert_short4_rtp(char4); short4 __ovld __cnfn convert_short4_sat_rtp(char4); short4 __ovld __cnfn convert_short4_rtn(char4); short4 __ovld __cnfn convert_short4_sat_rtn(char4); short4 __ovld __cnfn convert_short4(char4); short4 __ovld __cnfn convert_short4_sat(char4); short4 __ovld __cnfn convert_short4_rte(uchar4); short4 __ovld __cnfn convert_short4_sat_rte(uchar4); short4 __ovld __cnfn convert_short4_rtz(uchar4); short4 __ovld __cnfn convert_short4_sat_rtz(uchar4); short4 __ovld __cnfn convert_short4_rtp(uchar4); short4 __ovld __cnfn convert_short4_sat_rtp(uchar4); short4 __ovld __cnfn convert_short4_rtn(uchar4); short4 __ovld __cnfn convert_short4_sat_rtn(uchar4); short4 __ovld __cnfn convert_short4(uchar4); short4 __ovld __cnfn convert_short4_sat(uchar4); short4 __ovld __cnfn convert_short4_rte(short4); short4 __ovld __cnfn convert_short4_sat_rte(short4); short4 __ovld __cnfn convert_short4_rtz(short4); short4 __ovld __cnfn convert_short4_sat_rtz(short4); short4 __ovld __cnfn convert_short4_rtp(short4); short4 __ovld __cnfn convert_short4_sat_rtp(short4); short4 __ovld __cnfn convert_short4_rtn(short4); short4 __ovld __cnfn convert_short4_sat_rtn(short4); short4 __ovld __cnfn convert_short4(short4); short4 __ovld __cnfn convert_short4_sat(short4); short4 __ovld __cnfn convert_short4_rte(ushort4); short4 __ovld __cnfn convert_short4_sat_rte(ushort4); short4 __ovld __cnfn convert_short4_rtz(ushort4); short4 __ovld __cnfn convert_short4_sat_rtz(ushort4); short4 __ovld __cnfn convert_short4_rtp(ushort4); short4 __ovld __cnfn convert_short4_sat_rtp(ushort4); short4 __ovld __cnfn convert_short4_rtn(ushort4); short4 __ovld __cnfn convert_short4_sat_rtn(ushort4); short4 __ovld __cnfn convert_short4(ushort4); short4 __ovld __cnfn convert_short4_sat(ushort4); short4 __ovld __cnfn convert_short4_rte(int4); short4 __ovld __cnfn convert_short4_sat_rte(int4); short4 __ovld __cnfn convert_short4_rtz(int4); short4 __ovld __cnfn convert_short4_sat_rtz(int4); short4 __ovld __cnfn convert_short4_rtp(int4); short4 __ovld __cnfn convert_short4_sat_rtp(int4); short4 __ovld __cnfn convert_short4_rtn(int4); short4 __ovld __cnfn convert_short4_sat_rtn(int4); short4 __ovld __cnfn convert_short4(int4); short4 __ovld __cnfn convert_short4_sat(int4); short4 __ovld __cnfn convert_short4_rte(uint4); short4 __ovld __cnfn convert_short4_sat_rte(uint4); short4 __ovld __cnfn convert_short4_rtz(uint4); short4 __ovld __cnfn convert_short4_sat_rtz(uint4); short4 __ovld __cnfn convert_short4_rtp(uint4); short4 __ovld __cnfn convert_short4_sat_rtp(uint4); short4 __ovld __cnfn convert_short4_rtn(uint4); short4 __ovld __cnfn convert_short4_sat_rtn(uint4); short4 __ovld __cnfn convert_short4(uint4); short4 __ovld __cnfn convert_short4_sat(uint4); short4 __ovld __cnfn convert_short4_rte(long4); short4 __ovld __cnfn convert_short4_sat_rte(long4); short4 __ovld __cnfn convert_short4_rtz(long4); short4 __ovld __cnfn convert_short4_sat_rtz(long4); short4 __ovld __cnfn convert_short4_rtp(long4); short4 __ovld __cnfn convert_short4_sat_rtp(long4); short4 __ovld __cnfn convert_short4_rtn(long4); short4 __ovld __cnfn convert_short4_sat_rtn(long4); short4 __ovld __cnfn convert_short4(long4); short4 __ovld __cnfn convert_short4_sat(long4); short4 __ovld __cnfn convert_short4_rte(ulong4); short4 __ovld __cnfn convert_short4_sat_rte(ulong4); short4 __ovld __cnfn convert_short4_rtz(ulong4); short4 __ovld __cnfn convert_short4_sat_rtz(ulong4); short4 __ovld __cnfn convert_short4_rtp(ulong4); short4 __ovld __cnfn convert_short4_sat_rtp(ulong4); short4 __ovld __cnfn convert_short4_rtn(ulong4); short4 __ovld __cnfn convert_short4_sat_rtn(ulong4); short4 __ovld __cnfn convert_short4(ulong4); short4 __ovld __cnfn convert_short4_sat(ulong4); short4 __ovld __cnfn convert_short4_rte(float4); short4 __ovld __cnfn convert_short4_sat_rte(float4); short4 __ovld __cnfn convert_short4_rtz(float4); short4 __ovld __cnfn convert_short4_sat_rtz(float4); short4 __ovld __cnfn convert_short4_rtp(float4); short4 __ovld __cnfn convert_short4_sat_rtp(float4); short4 __ovld __cnfn convert_short4_rtn(float4); short4 __ovld __cnfn convert_short4_sat_rtn(float4); short4 __ovld __cnfn convert_short4(float4); short4 __ovld __cnfn convert_short4_sat(float4); ushort4 __ovld __cnfn convert_ushort4_rte(char4); ushort4 __ovld __cnfn convert_ushort4_sat_rte(char4); ushort4 __ovld __cnfn convert_ushort4_rtz(char4); ushort4 __ovld __cnfn convert_ushort4_sat_rtz(char4); ushort4 __ovld __cnfn convert_ushort4_rtp(char4); ushort4 __ovld __cnfn convert_ushort4_sat_rtp(char4); ushort4 __ovld __cnfn convert_ushort4_rtn(char4); ushort4 __ovld __cnfn convert_ushort4_sat_rtn(char4); ushort4 __ovld __cnfn convert_ushort4(char4); ushort4 __ovld __cnfn convert_ushort4_sat(char4); ushort4 __ovld __cnfn convert_ushort4_rte(uchar4); ushort4 __ovld __cnfn convert_ushort4_sat_rte(uchar4); ushort4 __ovld __cnfn convert_ushort4_rtz(uchar4); ushort4 __ovld __cnfn convert_ushort4_sat_rtz(uchar4); ushort4 __ovld __cnfn convert_ushort4_rtp(uchar4); ushort4 __ovld __cnfn convert_ushort4_sat_rtp(uchar4); ushort4 __ovld __cnfn convert_ushort4_rtn(uchar4); ushort4 __ovld __cnfn convert_ushort4_sat_rtn(uchar4); ushort4 __ovld __cnfn convert_ushort4(uchar4); ushort4 __ovld __cnfn convert_ushort4_sat(uchar4); ushort4 __ovld __cnfn convert_ushort4_rte(short4); ushort4 __ovld __cnfn convert_ushort4_sat_rte(short4); ushort4 __ovld __cnfn convert_ushort4_rtz(short4); ushort4 __ovld __cnfn convert_ushort4_sat_rtz(short4); ushort4 __ovld __cnfn convert_ushort4_rtp(short4); ushort4 __ovld __cnfn convert_ushort4_sat_rtp(short4); ushort4 __ovld __cnfn convert_ushort4_rtn(short4); ushort4 __ovld __cnfn convert_ushort4_sat_rtn(short4); ushort4 __ovld __cnfn convert_ushort4(short4); ushort4 __ovld __cnfn convert_ushort4_sat(short4); ushort4 __ovld __cnfn convert_ushort4_rte(ushort4); ushort4 __ovld __cnfn convert_ushort4_sat_rte(ushort4); ushort4 __ovld __cnfn convert_ushort4_rtz(ushort4); ushort4 __ovld __cnfn convert_ushort4_sat_rtz(ushort4); ushort4 __ovld __cnfn convert_ushort4_rtp(ushort4); ushort4 __ovld __cnfn convert_ushort4_sat_rtp(ushort4); ushort4 __ovld __cnfn convert_ushort4_rtn(ushort4); ushort4 __ovld __cnfn convert_ushort4_sat_rtn(ushort4); ushort4 __ovld __cnfn convert_ushort4(ushort4); ushort4 __ovld __cnfn convert_ushort4_sat(ushort4); ushort4 __ovld __cnfn convert_ushort4_rte(int4); ushort4 __ovld __cnfn convert_ushort4_sat_rte(int4); ushort4 __ovld __cnfn convert_ushort4_rtz(int4); ushort4 __ovld __cnfn convert_ushort4_sat_rtz(int4); ushort4 __ovld __cnfn convert_ushort4_rtp(int4); ushort4 __ovld __cnfn convert_ushort4_sat_rtp(int4); ushort4 __ovld __cnfn convert_ushort4_rtn(int4); ushort4 __ovld __cnfn convert_ushort4_sat_rtn(int4); ushort4 __ovld __cnfn convert_ushort4(int4); ushort4 __ovld __cnfn convert_ushort4_sat(int4); ushort4 __ovld __cnfn convert_ushort4_rte(uint4); ushort4 __ovld __cnfn convert_ushort4_sat_rte(uint4); ushort4 __ovld __cnfn convert_ushort4_rtz(uint4); ushort4 __ovld __cnfn convert_ushort4_sat_rtz(uint4); ushort4 __ovld __cnfn convert_ushort4_rtp(uint4); ushort4 __ovld __cnfn convert_ushort4_sat_rtp(uint4); ushort4 __ovld __cnfn convert_ushort4_rtn(uint4); ushort4 __ovld __cnfn convert_ushort4_sat_rtn(uint4); ushort4 __ovld __cnfn convert_ushort4(uint4); ushort4 __ovld __cnfn convert_ushort4_sat(uint4); ushort4 __ovld __cnfn convert_ushort4_rte(long4); ushort4 __ovld __cnfn convert_ushort4_sat_rte(long4); ushort4 __ovld __cnfn convert_ushort4_rtz(long4); ushort4 __ovld __cnfn convert_ushort4_sat_rtz(long4); ushort4 __ovld __cnfn convert_ushort4_rtp(long4); ushort4 __ovld __cnfn convert_ushort4_sat_rtp(long4); ushort4 __ovld __cnfn convert_ushort4_rtn(long4); ushort4 __ovld __cnfn convert_ushort4_sat_rtn(long4); ushort4 __ovld __cnfn convert_ushort4(long4); ushort4 __ovld __cnfn convert_ushort4_sat(long4); ushort4 __ovld __cnfn convert_ushort4_rte(ulong4); ushort4 __ovld __cnfn convert_ushort4_sat_rte(ulong4); ushort4 __ovld __cnfn convert_ushort4_rtz(ulong4); ushort4 __ovld __cnfn convert_ushort4_sat_rtz(ulong4); ushort4 __ovld __cnfn convert_ushort4_rtp(ulong4); ushort4 __ovld __cnfn convert_ushort4_sat_rtp(ulong4); ushort4 __ovld __cnfn convert_ushort4_rtn(ulong4); ushort4 __ovld __cnfn convert_ushort4_sat_rtn(ulong4); ushort4 __ovld __cnfn convert_ushort4(ulong4); ushort4 __ovld __cnfn convert_ushort4_sat(ulong4); ushort4 __ovld __cnfn convert_ushort4_rte(float4); ushort4 __ovld __cnfn convert_ushort4_sat_rte(float4); ushort4 __ovld __cnfn convert_ushort4_rtz(float4); ushort4 __ovld __cnfn convert_ushort4_sat_rtz(float4); ushort4 __ovld __cnfn convert_ushort4_rtp(float4); ushort4 __ovld __cnfn convert_ushort4_sat_rtp(float4); ushort4 __ovld __cnfn convert_ushort4_rtn(float4); ushort4 __ovld __cnfn convert_ushort4_sat_rtn(float4); ushort4 __ovld __cnfn convert_ushort4(float4); ushort4 __ovld __cnfn convert_ushort4_sat(float4); int4 __ovld __cnfn convert_int4_rte(char4); int4 __ovld __cnfn convert_int4_sat_rte(char4); int4 __ovld __cnfn convert_int4_rtz(char4); int4 __ovld __cnfn convert_int4_sat_rtz(char4); int4 __ovld __cnfn convert_int4_rtp(char4); int4 __ovld __cnfn convert_int4_sat_rtp(char4); int4 __ovld __cnfn convert_int4_rtn(char4); int4 __ovld __cnfn convert_int4_sat_rtn(char4); int4 __ovld __cnfn convert_int4(char4); int4 __ovld __cnfn convert_int4_sat(char4); int4 __ovld __cnfn convert_int4_rte(uchar4); int4 __ovld __cnfn convert_int4_sat_rte(uchar4); int4 __ovld __cnfn convert_int4_rtz(uchar4); int4 __ovld __cnfn convert_int4_sat_rtz(uchar4); int4 __ovld __cnfn convert_int4_rtp(uchar4); int4 __ovld __cnfn convert_int4_sat_rtp(uchar4); int4 __ovld __cnfn convert_int4_rtn(uchar4); int4 __ovld __cnfn convert_int4_sat_rtn(uchar4); int4 __ovld __cnfn convert_int4(uchar4); int4 __ovld __cnfn convert_int4_sat(uchar4); int4 __ovld __cnfn convert_int4_rte(short4); int4 __ovld __cnfn convert_int4_sat_rte(short4); int4 __ovld __cnfn convert_int4_rtz(short4); int4 __ovld __cnfn convert_int4_sat_rtz(short4); int4 __ovld __cnfn convert_int4_rtp(short4); int4 __ovld __cnfn convert_int4_sat_rtp(short4); int4 __ovld __cnfn convert_int4_rtn(short4); int4 __ovld __cnfn convert_int4_sat_rtn(short4); int4 __ovld __cnfn convert_int4(short4); int4 __ovld __cnfn convert_int4_sat(short4); int4 __ovld __cnfn convert_int4_rte(ushort4); int4 __ovld __cnfn convert_int4_sat_rte(ushort4); int4 __ovld __cnfn convert_int4_rtz(ushort4); int4 __ovld __cnfn convert_int4_sat_rtz(ushort4); int4 __ovld __cnfn convert_int4_rtp(ushort4); int4 __ovld __cnfn convert_int4_sat_rtp(ushort4); int4 __ovld __cnfn convert_int4_rtn(ushort4); int4 __ovld __cnfn convert_int4_sat_rtn(ushort4); int4 __ovld __cnfn convert_int4(ushort4); int4 __ovld __cnfn convert_int4_sat(ushort4); int4 __ovld __cnfn convert_int4_rte(int4); int4 __ovld __cnfn convert_int4_sat_rte(int4); int4 __ovld __cnfn convert_int4_rtz(int4); int4 __ovld __cnfn convert_int4_sat_rtz(int4); int4 __ovld __cnfn convert_int4_rtp(int4); int4 __ovld __cnfn convert_int4_sat_rtp(int4); int4 __ovld __cnfn convert_int4_rtn(int4); int4 __ovld __cnfn convert_int4_sat_rtn(int4); int4 __ovld __cnfn convert_int4(int4); int4 __ovld __cnfn convert_int4_sat(int4); int4 __ovld __cnfn convert_int4_rte(uint4); int4 __ovld __cnfn convert_int4_sat_rte(uint4); int4 __ovld __cnfn convert_int4_rtz(uint4); int4 __ovld __cnfn convert_int4_sat_rtz(uint4); int4 __ovld __cnfn convert_int4_rtp(uint4); int4 __ovld __cnfn convert_int4_sat_rtp(uint4); int4 __ovld __cnfn convert_int4_rtn(uint4); int4 __ovld __cnfn convert_int4_sat_rtn(uint4); int4 __ovld __cnfn convert_int4(uint4); int4 __ovld __cnfn convert_int4_sat(uint4); int4 __ovld __cnfn convert_int4_rte(long4); int4 __ovld __cnfn convert_int4_sat_rte(long4); int4 __ovld __cnfn convert_int4_rtz(long4); int4 __ovld __cnfn convert_int4_sat_rtz(long4); int4 __ovld __cnfn convert_int4_rtp(long4); int4 __ovld __cnfn convert_int4_sat_rtp(long4); int4 __ovld __cnfn convert_int4_rtn(long4); int4 __ovld __cnfn convert_int4_sat_rtn(long4); int4 __ovld __cnfn convert_int4(long4); int4 __ovld __cnfn convert_int4_sat(long4); int4 __ovld __cnfn convert_int4_rte(ulong4); int4 __ovld __cnfn convert_int4_sat_rte(ulong4); int4 __ovld __cnfn convert_int4_rtz(ulong4); int4 __ovld __cnfn convert_int4_sat_rtz(ulong4); int4 __ovld __cnfn convert_int4_rtp(ulong4); int4 __ovld __cnfn convert_int4_sat_rtp(ulong4); int4 __ovld __cnfn convert_int4_rtn(ulong4); int4 __ovld __cnfn convert_int4_sat_rtn(ulong4); int4 __ovld __cnfn convert_int4(ulong4); int4 __ovld __cnfn convert_int4_sat(ulong4); int4 __ovld __cnfn convert_int4_rte(float4); int4 __ovld __cnfn convert_int4_sat_rte(float4); int4 __ovld __cnfn convert_int4_rtz(float4); int4 __ovld __cnfn convert_int4_sat_rtz(float4); int4 __ovld __cnfn convert_int4_rtp(float4); int4 __ovld __cnfn convert_int4_sat_rtp(float4); int4 __ovld __cnfn convert_int4_rtn(float4); int4 __ovld __cnfn convert_int4_sat_rtn(float4); int4 __ovld __cnfn convert_int4(float4); int4 __ovld __cnfn convert_int4_sat(float4); uint4 __ovld __cnfn convert_uint4_rte(char4); uint4 __ovld __cnfn convert_uint4_sat_rte(char4); uint4 __ovld __cnfn convert_uint4_rtz(char4); uint4 __ovld __cnfn convert_uint4_sat_rtz(char4); uint4 __ovld __cnfn convert_uint4_rtp(char4); uint4 __ovld __cnfn convert_uint4_sat_rtp(char4); uint4 __ovld __cnfn convert_uint4_rtn(char4); uint4 __ovld __cnfn convert_uint4_sat_rtn(char4); uint4 __ovld __cnfn convert_uint4(char4); uint4 __ovld __cnfn convert_uint4_sat(char4); uint4 __ovld __cnfn convert_uint4_rte(uchar4); uint4 __ovld __cnfn convert_uint4_sat_rte(uchar4); uint4 __ovld __cnfn convert_uint4_rtz(uchar4); uint4 __ovld __cnfn convert_uint4_sat_rtz(uchar4); uint4 __ovld __cnfn convert_uint4_rtp(uchar4); uint4 __ovld __cnfn convert_uint4_sat_rtp(uchar4); uint4 __ovld __cnfn convert_uint4_rtn(uchar4); uint4 __ovld __cnfn convert_uint4_sat_rtn(uchar4); uint4 __ovld __cnfn convert_uint4(uchar4); uint4 __ovld __cnfn convert_uint4_sat(uchar4); uint4 __ovld __cnfn convert_uint4_rte(short4); uint4 __ovld __cnfn convert_uint4_sat_rte(short4); uint4 __ovld __cnfn convert_uint4_rtz(short4); uint4 __ovld __cnfn convert_uint4_sat_rtz(short4); uint4 __ovld __cnfn convert_uint4_rtp(short4); uint4 __ovld __cnfn convert_uint4_sat_rtp(short4); uint4 __ovld __cnfn convert_uint4_rtn(short4); uint4 __ovld __cnfn convert_uint4_sat_rtn(short4); uint4 __ovld __cnfn convert_uint4(short4); uint4 __ovld __cnfn convert_uint4_sat(short4); uint4 __ovld __cnfn convert_uint4_rte(ushort4); uint4 __ovld __cnfn convert_uint4_sat_rte(ushort4); uint4 __ovld __cnfn convert_uint4_rtz(ushort4); uint4 __ovld __cnfn convert_uint4_sat_rtz(ushort4); uint4 __ovld __cnfn convert_uint4_rtp(ushort4); uint4 __ovld __cnfn convert_uint4_sat_rtp(ushort4); uint4 __ovld __cnfn convert_uint4_rtn(ushort4); uint4 __ovld __cnfn convert_uint4_sat_rtn(ushort4); uint4 __ovld __cnfn convert_uint4(ushort4); uint4 __ovld __cnfn convert_uint4_sat(ushort4); uint4 __ovld __cnfn convert_uint4_rte(int4); uint4 __ovld __cnfn convert_uint4_sat_rte(int4); uint4 __ovld __cnfn convert_uint4_rtz(int4); uint4 __ovld __cnfn convert_uint4_sat_rtz(int4); uint4 __ovld __cnfn convert_uint4_rtp(int4); uint4 __ovld __cnfn convert_uint4_sat_rtp(int4); uint4 __ovld __cnfn convert_uint4_rtn(int4); uint4 __ovld __cnfn convert_uint4_sat_rtn(int4); uint4 __ovld __cnfn convert_uint4(int4); uint4 __ovld __cnfn convert_uint4_sat(int4); uint4 __ovld __cnfn convert_uint4_rte(uint4); uint4 __ovld __cnfn convert_uint4_sat_rte(uint4); uint4 __ovld __cnfn convert_uint4_rtz(uint4); uint4 __ovld __cnfn convert_uint4_sat_rtz(uint4); uint4 __ovld __cnfn convert_uint4_rtp(uint4); uint4 __ovld __cnfn convert_uint4_sat_rtp(uint4); uint4 __ovld __cnfn convert_uint4_rtn(uint4); uint4 __ovld __cnfn convert_uint4_sat_rtn(uint4); uint4 __ovld __cnfn convert_uint4(uint4); uint4 __ovld __cnfn convert_uint4_sat(uint4); uint4 __ovld __cnfn convert_uint4_rte(long4); uint4 __ovld __cnfn convert_uint4_sat_rte(long4); uint4 __ovld __cnfn convert_uint4_rtz(long4); uint4 __ovld __cnfn convert_uint4_sat_rtz(long4); uint4 __ovld __cnfn convert_uint4_rtp(long4); uint4 __ovld __cnfn convert_uint4_sat_rtp(long4); uint4 __ovld __cnfn convert_uint4_rtn(long4); uint4 __ovld __cnfn convert_uint4_sat_rtn(long4); uint4 __ovld __cnfn convert_uint4(long4); uint4 __ovld __cnfn convert_uint4_sat(long4); uint4 __ovld __cnfn convert_uint4_rte(ulong4); uint4 __ovld __cnfn convert_uint4_sat_rte(ulong4); uint4 __ovld __cnfn convert_uint4_rtz(ulong4); uint4 __ovld __cnfn convert_uint4_sat_rtz(ulong4); uint4 __ovld __cnfn convert_uint4_rtp(ulong4); uint4 __ovld __cnfn convert_uint4_sat_rtp(ulong4); uint4 __ovld __cnfn convert_uint4_rtn(ulong4); uint4 __ovld __cnfn convert_uint4_sat_rtn(ulong4); uint4 __ovld __cnfn convert_uint4(ulong4); uint4 __ovld __cnfn convert_uint4_sat(ulong4); uint4 __ovld __cnfn convert_uint4_rte(float4); uint4 __ovld __cnfn convert_uint4_sat_rte(float4); uint4 __ovld __cnfn convert_uint4_rtz(float4); uint4 __ovld __cnfn convert_uint4_sat_rtz(float4); uint4 __ovld __cnfn convert_uint4_rtp(float4); uint4 __ovld __cnfn convert_uint4_sat_rtp(float4); uint4 __ovld __cnfn convert_uint4_rtn(float4); uint4 __ovld __cnfn convert_uint4_sat_rtn(float4); uint4 __ovld __cnfn convert_uint4(float4); uint4 __ovld __cnfn convert_uint4_sat(float4); long4 __ovld __cnfn convert_long4_rte(char4); long4 __ovld __cnfn convert_long4_sat_rte(char4); long4 __ovld __cnfn convert_long4_rtz(char4); long4 __ovld __cnfn convert_long4_sat_rtz(char4); long4 __ovld __cnfn convert_long4_rtp(char4); long4 __ovld __cnfn convert_long4_sat_rtp(char4); long4 __ovld __cnfn convert_long4_rtn(char4); long4 __ovld __cnfn convert_long4_sat_rtn(char4); long4 __ovld __cnfn convert_long4(char4); long4 __ovld __cnfn convert_long4_sat(char4); long4 __ovld __cnfn convert_long4_rte(uchar4); long4 __ovld __cnfn convert_long4_sat_rte(uchar4); long4 __ovld __cnfn convert_long4_rtz(uchar4); long4 __ovld __cnfn convert_long4_sat_rtz(uchar4); long4 __ovld __cnfn convert_long4_rtp(uchar4); long4 __ovld __cnfn convert_long4_sat_rtp(uchar4); long4 __ovld __cnfn convert_long4_rtn(uchar4); long4 __ovld __cnfn convert_long4_sat_rtn(uchar4); long4 __ovld __cnfn convert_long4(uchar4); long4 __ovld __cnfn convert_long4_sat(uchar4); long4 __ovld __cnfn convert_long4_rte(short4); long4 __ovld __cnfn convert_long4_sat_rte(short4); long4 __ovld __cnfn convert_long4_rtz(short4); long4 __ovld __cnfn convert_long4_sat_rtz(short4); long4 __ovld __cnfn convert_long4_rtp(short4); long4 __ovld __cnfn convert_long4_sat_rtp(short4); long4 __ovld __cnfn convert_long4_rtn(short4); long4 __ovld __cnfn convert_long4_sat_rtn(short4); long4 __ovld __cnfn convert_long4(short4); long4 __ovld __cnfn convert_long4_sat(short4); long4 __ovld __cnfn convert_long4_rte(ushort4); long4 __ovld __cnfn convert_long4_sat_rte(ushort4); long4 __ovld __cnfn convert_long4_rtz(ushort4); long4 __ovld __cnfn convert_long4_sat_rtz(ushort4); long4 __ovld __cnfn convert_long4_rtp(ushort4); long4 __ovld __cnfn convert_long4_sat_rtp(ushort4); long4 __ovld __cnfn convert_long4_rtn(ushort4); long4 __ovld __cnfn convert_long4_sat_rtn(ushort4); long4 __ovld __cnfn convert_long4(ushort4); long4 __ovld __cnfn convert_long4_sat(ushort4); long4 __ovld __cnfn convert_long4_rte(int4); long4 __ovld __cnfn convert_long4_sat_rte(int4); long4 __ovld __cnfn convert_long4_rtz(int4); long4 __ovld __cnfn convert_long4_sat_rtz(int4); long4 __ovld __cnfn convert_long4_rtp(int4); long4 __ovld __cnfn convert_long4_sat_rtp(int4); long4 __ovld __cnfn convert_long4_rtn(int4); long4 __ovld __cnfn convert_long4_sat_rtn(int4); long4 __ovld __cnfn convert_long4(int4); long4 __ovld __cnfn convert_long4_sat(int4); long4 __ovld __cnfn convert_long4_rte(uint4); long4 __ovld __cnfn convert_long4_sat_rte(uint4); long4 __ovld __cnfn convert_long4_rtz(uint4); long4 __ovld __cnfn convert_long4_sat_rtz(uint4); long4 __ovld __cnfn convert_long4_rtp(uint4); long4 __ovld __cnfn convert_long4_sat_rtp(uint4); long4 __ovld __cnfn convert_long4_rtn(uint4); long4 __ovld __cnfn convert_long4_sat_rtn(uint4); long4 __ovld __cnfn convert_long4(uint4); long4 __ovld __cnfn convert_long4_sat(uint4); long4 __ovld __cnfn convert_long4_rte(long4); long4 __ovld __cnfn convert_long4_sat_rte(long4); long4 __ovld __cnfn convert_long4_rtz(long4); long4 __ovld __cnfn convert_long4_sat_rtz(long4); long4 __ovld __cnfn convert_long4_rtp(long4); long4 __ovld __cnfn convert_long4_sat_rtp(long4); long4 __ovld __cnfn convert_long4_rtn(long4); long4 __ovld __cnfn convert_long4_sat_rtn(long4); long4 __ovld __cnfn convert_long4(long4); long4 __ovld __cnfn convert_long4_sat(long4); long4 __ovld __cnfn convert_long4_rte(ulong4); long4 __ovld __cnfn convert_long4_sat_rte(ulong4); long4 __ovld __cnfn convert_long4_rtz(ulong4); long4 __ovld __cnfn convert_long4_sat_rtz(ulong4); long4 __ovld __cnfn convert_long4_rtp(ulong4); long4 __ovld __cnfn convert_long4_sat_rtp(ulong4); long4 __ovld __cnfn convert_long4_rtn(ulong4); long4 __ovld __cnfn convert_long4_sat_rtn(ulong4); long4 __ovld __cnfn convert_long4(ulong4); long4 __ovld __cnfn convert_long4_sat(ulong4); long4 __ovld __cnfn convert_long4_rte(float4); long4 __ovld __cnfn convert_long4_sat_rte(float4); long4 __ovld __cnfn convert_long4_rtz(float4); long4 __ovld __cnfn convert_long4_sat_rtz(float4); long4 __ovld __cnfn convert_long4_rtp(float4); long4 __ovld __cnfn convert_long4_sat_rtp(float4); long4 __ovld __cnfn convert_long4_rtn(float4); long4 __ovld __cnfn convert_long4_sat_rtn(float4); long4 __ovld __cnfn convert_long4(float4); long4 __ovld __cnfn convert_long4_sat(float4); ulong4 __ovld __cnfn convert_ulong4_rte(char4); ulong4 __ovld __cnfn convert_ulong4_sat_rte(char4); ulong4 __ovld __cnfn convert_ulong4_rtz(char4); ulong4 __ovld __cnfn convert_ulong4_sat_rtz(char4); ulong4 __ovld __cnfn convert_ulong4_rtp(char4); ulong4 __ovld __cnfn convert_ulong4_sat_rtp(char4); ulong4 __ovld __cnfn convert_ulong4_rtn(char4); ulong4 __ovld __cnfn convert_ulong4_sat_rtn(char4); ulong4 __ovld __cnfn convert_ulong4(char4); ulong4 __ovld __cnfn convert_ulong4_sat(char4); ulong4 __ovld __cnfn convert_ulong4_rte(uchar4); ulong4 __ovld __cnfn convert_ulong4_sat_rte(uchar4); ulong4 __ovld __cnfn convert_ulong4_rtz(uchar4); ulong4 __ovld __cnfn convert_ulong4_sat_rtz(uchar4); ulong4 __ovld __cnfn convert_ulong4_rtp(uchar4); ulong4 __ovld __cnfn convert_ulong4_sat_rtp(uchar4); ulong4 __ovld __cnfn convert_ulong4_rtn(uchar4); ulong4 __ovld __cnfn convert_ulong4_sat_rtn(uchar4); ulong4 __ovld __cnfn convert_ulong4(uchar4); ulong4 __ovld __cnfn convert_ulong4_sat(uchar4); ulong4 __ovld __cnfn convert_ulong4_rte(short4); ulong4 __ovld __cnfn convert_ulong4_sat_rte(short4); ulong4 __ovld __cnfn convert_ulong4_rtz(short4); ulong4 __ovld __cnfn convert_ulong4_sat_rtz(short4); ulong4 __ovld __cnfn convert_ulong4_rtp(short4); ulong4 __ovld __cnfn convert_ulong4_sat_rtp(short4); ulong4 __ovld __cnfn convert_ulong4_rtn(short4); ulong4 __ovld __cnfn convert_ulong4_sat_rtn(short4); ulong4 __ovld __cnfn convert_ulong4(short4); ulong4 __ovld __cnfn convert_ulong4_sat(short4); ulong4 __ovld __cnfn convert_ulong4_rte(ushort4); ulong4 __ovld __cnfn convert_ulong4_sat_rte(ushort4); ulong4 __ovld __cnfn convert_ulong4_rtz(ushort4); ulong4 __ovld __cnfn convert_ulong4_sat_rtz(ushort4); ulong4 __ovld __cnfn convert_ulong4_rtp(ushort4); ulong4 __ovld __cnfn convert_ulong4_sat_rtp(ushort4); ulong4 __ovld __cnfn convert_ulong4_rtn(ushort4); ulong4 __ovld __cnfn convert_ulong4_sat_rtn(ushort4); ulong4 __ovld __cnfn convert_ulong4(ushort4); ulong4 __ovld __cnfn convert_ulong4_sat(ushort4); ulong4 __ovld __cnfn convert_ulong4_rte(int4); ulong4 __ovld __cnfn convert_ulong4_sat_rte(int4); ulong4 __ovld __cnfn convert_ulong4_rtz(int4); ulong4 __ovld __cnfn convert_ulong4_sat_rtz(int4); ulong4 __ovld __cnfn convert_ulong4_rtp(int4); ulong4 __ovld __cnfn convert_ulong4_sat_rtp(int4); ulong4 __ovld __cnfn convert_ulong4_rtn(int4); ulong4 __ovld __cnfn convert_ulong4_sat_rtn(int4); ulong4 __ovld __cnfn convert_ulong4(int4); ulong4 __ovld __cnfn convert_ulong4_sat(int4); ulong4 __ovld __cnfn convert_ulong4_rte(uint4); ulong4 __ovld __cnfn convert_ulong4_sat_rte(uint4); ulong4 __ovld __cnfn convert_ulong4_rtz(uint4); ulong4 __ovld __cnfn convert_ulong4_sat_rtz(uint4); ulong4 __ovld __cnfn convert_ulong4_rtp(uint4); ulong4 __ovld __cnfn convert_ulong4_sat_rtp(uint4); ulong4 __ovld __cnfn convert_ulong4_rtn(uint4); ulong4 __ovld __cnfn convert_ulong4_sat_rtn(uint4); ulong4 __ovld __cnfn convert_ulong4(uint4); ulong4 __ovld __cnfn convert_ulong4_sat(uint4); ulong4 __ovld __cnfn convert_ulong4_rte(long4); ulong4 __ovld __cnfn convert_ulong4_sat_rte(long4); ulong4 __ovld __cnfn convert_ulong4_rtz(long4); ulong4 __ovld __cnfn convert_ulong4_sat_rtz(long4); ulong4 __ovld __cnfn convert_ulong4_rtp(long4); ulong4 __ovld __cnfn convert_ulong4_sat_rtp(long4); ulong4 __ovld __cnfn convert_ulong4_rtn(long4); ulong4 __ovld __cnfn convert_ulong4_sat_rtn(long4); ulong4 __ovld __cnfn convert_ulong4(long4); ulong4 __ovld __cnfn convert_ulong4_sat(long4); ulong4 __ovld __cnfn convert_ulong4_rte(ulong4); ulong4 __ovld __cnfn convert_ulong4_sat_rte(ulong4); ulong4 __ovld __cnfn convert_ulong4_rtz(ulong4); ulong4 __ovld __cnfn convert_ulong4_sat_rtz(ulong4); ulong4 __ovld __cnfn convert_ulong4_rtp(ulong4); ulong4 __ovld __cnfn convert_ulong4_sat_rtp(ulong4); ulong4 __ovld __cnfn convert_ulong4_rtn(ulong4); ulong4 __ovld __cnfn convert_ulong4_sat_rtn(ulong4); ulong4 __ovld __cnfn convert_ulong4(ulong4); ulong4 __ovld __cnfn convert_ulong4_sat(ulong4); ulong4 __ovld __cnfn convert_ulong4_rte(float4); ulong4 __ovld __cnfn convert_ulong4_sat_rte(float4); ulong4 __ovld __cnfn convert_ulong4_rtz(float4); ulong4 __ovld __cnfn convert_ulong4_sat_rtz(float4); ulong4 __ovld __cnfn convert_ulong4_rtp(float4); ulong4 __ovld __cnfn convert_ulong4_sat_rtp(float4); ulong4 __ovld __cnfn convert_ulong4_rtn(float4); ulong4 __ovld __cnfn convert_ulong4_sat_rtn(float4); ulong4 __ovld __cnfn convert_ulong4(float4); ulong4 __ovld __cnfn convert_ulong4_sat(float4); float4 __ovld __cnfn convert_float4_rte(char4); float4 __ovld __cnfn convert_float4_rtz(char4); float4 __ovld __cnfn convert_float4_rtp(char4); float4 __ovld __cnfn convert_float4_rtn(char4); float4 __ovld __cnfn convert_float4(char4); float4 __ovld __cnfn convert_float4_rte(uchar4); float4 __ovld __cnfn convert_float4_rtz(uchar4); float4 __ovld __cnfn convert_float4_rtp(uchar4); float4 __ovld __cnfn convert_float4_rtn(uchar4); float4 __ovld __cnfn convert_float4(uchar4); float4 __ovld __cnfn convert_float4_rte(short4); float4 __ovld __cnfn convert_float4_rtz(short4); float4 __ovld __cnfn convert_float4_rtp(short4); float4 __ovld __cnfn convert_float4_rtn(short4); float4 __ovld __cnfn convert_float4(short4); float4 __ovld __cnfn convert_float4_rte(ushort4); float4 __ovld __cnfn convert_float4_rtz(ushort4); float4 __ovld __cnfn convert_float4_rtp(ushort4); float4 __ovld __cnfn convert_float4_rtn(ushort4); float4 __ovld __cnfn convert_float4(ushort4); float4 __ovld __cnfn convert_float4_rte(int4); float4 __ovld __cnfn convert_float4_rtz(int4); float4 __ovld __cnfn convert_float4_rtp(int4); float4 __ovld __cnfn convert_float4_rtn(int4); float4 __ovld __cnfn convert_float4(int4); float4 __ovld __cnfn convert_float4_rte(uint4); float4 __ovld __cnfn convert_float4_rtz(uint4); float4 __ovld __cnfn convert_float4_rtp(uint4); float4 __ovld __cnfn convert_float4_rtn(uint4); float4 __ovld __cnfn convert_float4(uint4); float4 __ovld __cnfn convert_float4_rte(long4); float4 __ovld __cnfn convert_float4_rtz(long4); float4 __ovld __cnfn convert_float4_rtp(long4); float4 __ovld __cnfn convert_float4_rtn(long4); float4 __ovld __cnfn convert_float4(long4); float4 __ovld __cnfn convert_float4_rte(ulong4); float4 __ovld __cnfn convert_float4_rtz(ulong4); float4 __ovld __cnfn convert_float4_rtp(ulong4); float4 __ovld __cnfn convert_float4_rtn(ulong4); float4 __ovld __cnfn convert_float4(ulong4); float4 __ovld __cnfn convert_float4_rte(float4); float4 __ovld __cnfn convert_float4_rtz(float4); float4 __ovld __cnfn convert_float4_rtp(float4); float4 __ovld __cnfn convert_float4_rtn(float4); float4 __ovld __cnfn convert_float4(float4); char8 __ovld __cnfn convert_char8_rte(char8); char8 __ovld __cnfn convert_char8_sat_rte(char8); char8 __ovld __cnfn convert_char8_rtz(char8); char8 __ovld __cnfn convert_char8_sat_rtz(char8); char8 __ovld __cnfn convert_char8_rtp(char8); char8 __ovld __cnfn convert_char8_sat_rtp(char8); char8 __ovld __cnfn convert_char8_rtn(char8); char8 __ovld __cnfn convert_char8_sat_rtn(char8); char8 __ovld __cnfn convert_char8(char8); char8 __ovld __cnfn convert_char8_sat(char8); char8 __ovld __cnfn convert_char8_rte(uchar8); char8 __ovld __cnfn convert_char8_sat_rte(uchar8); char8 __ovld __cnfn convert_char8_rtz(uchar8); char8 __ovld __cnfn convert_char8_sat_rtz(uchar8); char8 __ovld __cnfn convert_char8_rtp(uchar8); char8 __ovld __cnfn convert_char8_sat_rtp(uchar8); char8 __ovld __cnfn convert_char8_rtn(uchar8); char8 __ovld __cnfn convert_char8_sat_rtn(uchar8); char8 __ovld __cnfn convert_char8(uchar8); char8 __ovld __cnfn convert_char8_sat(uchar8); char8 __ovld __cnfn convert_char8_rte(short8); char8 __ovld __cnfn convert_char8_sat_rte(short8); char8 __ovld __cnfn convert_char8_rtz(short8); char8 __ovld __cnfn convert_char8_sat_rtz(short8); char8 __ovld __cnfn convert_char8_rtp(short8); char8 __ovld __cnfn convert_char8_sat_rtp(short8); char8 __ovld __cnfn convert_char8_rtn(short8); char8 __ovld __cnfn convert_char8_sat_rtn(short8); char8 __ovld __cnfn convert_char8(short8); char8 __ovld __cnfn convert_char8_sat(short8); char8 __ovld __cnfn convert_char8_rte(ushort8); char8 __ovld __cnfn convert_char8_sat_rte(ushort8); char8 __ovld __cnfn convert_char8_rtz(ushort8); char8 __ovld __cnfn convert_char8_sat_rtz(ushort8); char8 __ovld __cnfn convert_char8_rtp(ushort8); char8 __ovld __cnfn convert_char8_sat_rtp(ushort8); char8 __ovld __cnfn convert_char8_rtn(ushort8); char8 __ovld __cnfn convert_char8_sat_rtn(ushort8); char8 __ovld __cnfn convert_char8(ushort8); char8 __ovld __cnfn convert_char8_sat(ushort8); char8 __ovld __cnfn convert_char8_rte(int8); char8 __ovld __cnfn convert_char8_sat_rte(int8); char8 __ovld __cnfn convert_char8_rtz(int8); char8 __ovld __cnfn convert_char8_sat_rtz(int8); char8 __ovld __cnfn convert_char8_rtp(int8); char8 __ovld __cnfn convert_char8_sat_rtp(int8); char8 __ovld __cnfn convert_char8_rtn(int8); char8 __ovld __cnfn convert_char8_sat_rtn(int8); char8 __ovld __cnfn convert_char8(int8); char8 __ovld __cnfn convert_char8_sat(int8); char8 __ovld __cnfn convert_char8_rte(uint8); char8 __ovld __cnfn convert_char8_sat_rte(uint8); char8 __ovld __cnfn convert_char8_rtz(uint8); char8 __ovld __cnfn convert_char8_sat_rtz(uint8); char8 __ovld __cnfn convert_char8_rtp(uint8); char8 __ovld __cnfn convert_char8_sat_rtp(uint8); char8 __ovld __cnfn convert_char8_rtn(uint8); char8 __ovld __cnfn convert_char8_sat_rtn(uint8); char8 __ovld __cnfn convert_char8(uint8); char8 __ovld __cnfn convert_char8_sat(uint8); char8 __ovld __cnfn convert_char8_rte(long8); char8 __ovld __cnfn convert_char8_sat_rte(long8); char8 __ovld __cnfn convert_char8_rtz(long8); char8 __ovld __cnfn convert_char8_sat_rtz(long8); char8 __ovld __cnfn convert_char8_rtp(long8); char8 __ovld __cnfn convert_char8_sat_rtp(long8); char8 __ovld __cnfn convert_char8_rtn(long8); char8 __ovld __cnfn convert_char8_sat_rtn(long8); char8 __ovld __cnfn convert_char8(long8); char8 __ovld __cnfn convert_char8_sat(long8); char8 __ovld __cnfn convert_char8_rte(ulong8); char8 __ovld __cnfn convert_char8_sat_rte(ulong8); char8 __ovld __cnfn convert_char8_rtz(ulong8); char8 __ovld __cnfn convert_char8_sat_rtz(ulong8); char8 __ovld __cnfn convert_char8_rtp(ulong8); char8 __ovld __cnfn convert_char8_sat_rtp(ulong8); char8 __ovld __cnfn convert_char8_rtn(ulong8); char8 __ovld __cnfn convert_char8_sat_rtn(ulong8); char8 __ovld __cnfn convert_char8(ulong8); char8 __ovld __cnfn convert_char8_sat(ulong8); char8 __ovld __cnfn convert_char8_rte(float8); char8 __ovld __cnfn convert_char8_sat_rte(float8); char8 __ovld __cnfn convert_char8_rtz(float8); char8 __ovld __cnfn convert_char8_sat_rtz(float8); char8 __ovld __cnfn convert_char8_rtp(float8); char8 __ovld __cnfn convert_char8_sat_rtp(float8); char8 __ovld __cnfn convert_char8_rtn(float8); char8 __ovld __cnfn convert_char8_sat_rtn(float8); char8 __ovld __cnfn convert_char8(float8); char8 __ovld __cnfn convert_char8_sat(float8); uchar8 __ovld __cnfn convert_uchar8_rte(char8); uchar8 __ovld __cnfn convert_uchar8_sat_rte(char8); uchar8 __ovld __cnfn convert_uchar8_rtz(char8); uchar8 __ovld __cnfn convert_uchar8_sat_rtz(char8); uchar8 __ovld __cnfn convert_uchar8_rtp(char8); uchar8 __ovld __cnfn convert_uchar8_sat_rtp(char8); uchar8 __ovld __cnfn convert_uchar8_rtn(char8); uchar8 __ovld __cnfn convert_uchar8_sat_rtn(char8); uchar8 __ovld __cnfn convert_uchar8(char8); uchar8 __ovld __cnfn convert_uchar8_sat(char8); uchar8 __ovld __cnfn convert_uchar8_rte(uchar8); uchar8 __ovld __cnfn convert_uchar8_sat_rte(uchar8); uchar8 __ovld __cnfn convert_uchar8_rtz(uchar8); uchar8 __ovld __cnfn convert_uchar8_sat_rtz(uchar8); uchar8 __ovld __cnfn convert_uchar8_rtp(uchar8); uchar8 __ovld __cnfn convert_uchar8_sat_rtp(uchar8); uchar8 __ovld __cnfn convert_uchar8_rtn(uchar8); uchar8 __ovld __cnfn convert_uchar8_sat_rtn(uchar8); uchar8 __ovld __cnfn convert_uchar8(uchar8); uchar8 __ovld __cnfn convert_uchar8_sat(uchar8); uchar8 __ovld __cnfn convert_uchar8_rte(short8); uchar8 __ovld __cnfn convert_uchar8_sat_rte(short8); uchar8 __ovld __cnfn convert_uchar8_rtz(short8); uchar8 __ovld __cnfn convert_uchar8_sat_rtz(short8); uchar8 __ovld __cnfn convert_uchar8_rtp(short8); uchar8 __ovld __cnfn convert_uchar8_sat_rtp(short8); uchar8 __ovld __cnfn convert_uchar8_rtn(short8); uchar8 __ovld __cnfn convert_uchar8_sat_rtn(short8); uchar8 __ovld __cnfn convert_uchar8(short8); uchar8 __ovld __cnfn convert_uchar8_sat(short8); uchar8 __ovld __cnfn convert_uchar8_rte(ushort8); uchar8 __ovld __cnfn convert_uchar8_sat_rte(ushort8); uchar8 __ovld __cnfn convert_uchar8_rtz(ushort8); uchar8 __ovld __cnfn convert_uchar8_sat_rtz(ushort8); uchar8 __ovld __cnfn convert_uchar8_rtp(ushort8); uchar8 __ovld __cnfn convert_uchar8_sat_rtp(ushort8); uchar8 __ovld __cnfn convert_uchar8_rtn(ushort8); uchar8 __ovld __cnfn convert_uchar8_sat_rtn(ushort8); uchar8 __ovld __cnfn convert_uchar8(ushort8); uchar8 __ovld __cnfn convert_uchar8_sat(ushort8); uchar8 __ovld __cnfn convert_uchar8_rte(int8); uchar8 __ovld __cnfn convert_uchar8_sat_rte(int8); uchar8 __ovld __cnfn convert_uchar8_rtz(int8); uchar8 __ovld __cnfn convert_uchar8_sat_rtz(int8); uchar8 __ovld __cnfn convert_uchar8_rtp(int8); uchar8 __ovld __cnfn convert_uchar8_sat_rtp(int8); uchar8 __ovld __cnfn convert_uchar8_rtn(int8); uchar8 __ovld __cnfn convert_uchar8_sat_rtn(int8); uchar8 __ovld __cnfn convert_uchar8(int8); uchar8 __ovld __cnfn convert_uchar8_sat(int8); uchar8 __ovld __cnfn convert_uchar8_rte(uint8); uchar8 __ovld __cnfn convert_uchar8_sat_rte(uint8); uchar8 __ovld __cnfn convert_uchar8_rtz(uint8); uchar8 __ovld __cnfn convert_uchar8_sat_rtz(uint8); uchar8 __ovld __cnfn convert_uchar8_rtp(uint8); uchar8 __ovld __cnfn convert_uchar8_sat_rtp(uint8); uchar8 __ovld __cnfn convert_uchar8_rtn(uint8); uchar8 __ovld __cnfn convert_uchar8_sat_rtn(uint8); uchar8 __ovld __cnfn convert_uchar8(uint8); uchar8 __ovld __cnfn convert_uchar8_sat(uint8); uchar8 __ovld __cnfn convert_uchar8_rte(long8); uchar8 __ovld __cnfn convert_uchar8_sat_rte(long8); uchar8 __ovld __cnfn convert_uchar8_rtz(long8); uchar8 __ovld __cnfn convert_uchar8_sat_rtz(long8); uchar8 __ovld __cnfn convert_uchar8_rtp(long8); uchar8 __ovld __cnfn convert_uchar8_sat_rtp(long8); uchar8 __ovld __cnfn convert_uchar8_rtn(long8); uchar8 __ovld __cnfn convert_uchar8_sat_rtn(long8); uchar8 __ovld __cnfn convert_uchar8(long8); uchar8 __ovld __cnfn convert_uchar8_sat(long8); uchar8 __ovld __cnfn convert_uchar8_rte(ulong8); uchar8 __ovld __cnfn convert_uchar8_sat_rte(ulong8); uchar8 __ovld __cnfn convert_uchar8_rtz(ulong8); uchar8 __ovld __cnfn convert_uchar8_sat_rtz(ulong8); uchar8 __ovld __cnfn convert_uchar8_rtp(ulong8); uchar8 __ovld __cnfn convert_uchar8_sat_rtp(ulong8); uchar8 __ovld __cnfn convert_uchar8_rtn(ulong8); uchar8 __ovld __cnfn convert_uchar8_sat_rtn(ulong8); uchar8 __ovld __cnfn convert_uchar8(ulong8); uchar8 __ovld __cnfn convert_uchar8_sat(ulong8); uchar8 __ovld __cnfn convert_uchar8_rte(float8); uchar8 __ovld __cnfn convert_uchar8_sat_rte(float8); uchar8 __ovld __cnfn convert_uchar8_rtz(float8); uchar8 __ovld __cnfn convert_uchar8_sat_rtz(float8); uchar8 __ovld __cnfn convert_uchar8_rtp(float8); uchar8 __ovld __cnfn convert_uchar8_sat_rtp(float8); uchar8 __ovld __cnfn convert_uchar8_rtn(float8); uchar8 __ovld __cnfn convert_uchar8_sat_rtn(float8); uchar8 __ovld __cnfn convert_uchar8(float8); uchar8 __ovld __cnfn convert_uchar8_sat(float8); short8 __ovld __cnfn convert_short8_rte(char8); short8 __ovld __cnfn convert_short8_sat_rte(char8); short8 __ovld __cnfn convert_short8_rtz(char8); short8 __ovld __cnfn convert_short8_sat_rtz(char8); short8 __ovld __cnfn convert_short8_rtp(char8); short8 __ovld __cnfn convert_short8_sat_rtp(char8); short8 __ovld __cnfn convert_short8_rtn(char8); short8 __ovld __cnfn convert_short8_sat_rtn(char8); short8 __ovld __cnfn convert_short8(char8); short8 __ovld __cnfn convert_short8_sat(char8); short8 __ovld __cnfn convert_short8_rte(uchar8); short8 __ovld __cnfn convert_short8_sat_rte(uchar8); short8 __ovld __cnfn convert_short8_rtz(uchar8); short8 __ovld __cnfn convert_short8_sat_rtz(uchar8); short8 __ovld __cnfn convert_short8_rtp(uchar8); short8 __ovld __cnfn convert_short8_sat_rtp(uchar8); short8 __ovld __cnfn convert_short8_rtn(uchar8); short8 __ovld __cnfn convert_short8_sat_rtn(uchar8); short8 __ovld __cnfn convert_short8(uchar8); short8 __ovld __cnfn convert_short8_sat(uchar8); short8 __ovld __cnfn convert_short8_rte(short8); short8 __ovld __cnfn convert_short8_sat_rte(short8); short8 __ovld __cnfn convert_short8_rtz(short8); short8 __ovld __cnfn convert_short8_sat_rtz(short8); short8 __ovld __cnfn convert_short8_rtp(short8); short8 __ovld __cnfn convert_short8_sat_rtp(short8); short8 __ovld __cnfn convert_short8_rtn(short8); short8 __ovld __cnfn convert_short8_sat_rtn(short8); short8 __ovld __cnfn convert_short8(short8); short8 __ovld __cnfn convert_short8_sat(short8); short8 __ovld __cnfn convert_short8_rte(ushort8); short8 __ovld __cnfn convert_short8_sat_rte(ushort8); short8 __ovld __cnfn convert_short8_rtz(ushort8); short8 __ovld __cnfn convert_short8_sat_rtz(ushort8); short8 __ovld __cnfn convert_short8_rtp(ushort8); short8 __ovld __cnfn convert_short8_sat_rtp(ushort8); short8 __ovld __cnfn convert_short8_rtn(ushort8); short8 __ovld __cnfn convert_short8_sat_rtn(ushort8); short8 __ovld __cnfn convert_short8(ushort8); short8 __ovld __cnfn convert_short8_sat(ushort8); short8 __ovld __cnfn convert_short8_rte(int8); short8 __ovld __cnfn convert_short8_sat_rte(int8); short8 __ovld __cnfn convert_short8_rtz(int8); short8 __ovld __cnfn convert_short8_sat_rtz(int8); short8 __ovld __cnfn convert_short8_rtp(int8); short8 __ovld __cnfn convert_short8_sat_rtp(int8); short8 __ovld __cnfn convert_short8_rtn(int8); short8 __ovld __cnfn convert_short8_sat_rtn(int8); short8 __ovld __cnfn convert_short8(int8); short8 __ovld __cnfn convert_short8_sat(int8); short8 __ovld __cnfn convert_short8_rte(uint8); short8 __ovld __cnfn convert_short8_sat_rte(uint8); short8 __ovld __cnfn convert_short8_rtz(uint8); short8 __ovld __cnfn convert_short8_sat_rtz(uint8); short8 __ovld __cnfn convert_short8_rtp(uint8); short8 __ovld __cnfn convert_short8_sat_rtp(uint8); short8 __ovld __cnfn convert_short8_rtn(uint8); short8 __ovld __cnfn convert_short8_sat_rtn(uint8); short8 __ovld __cnfn convert_short8(uint8); short8 __ovld __cnfn convert_short8_sat(uint8); short8 __ovld __cnfn convert_short8_rte(long8); short8 __ovld __cnfn convert_short8_sat_rte(long8); short8 __ovld __cnfn convert_short8_rtz(long8); short8 __ovld __cnfn convert_short8_sat_rtz(long8); short8 __ovld __cnfn convert_short8_rtp(long8); short8 __ovld __cnfn convert_short8_sat_rtp(long8); short8 __ovld __cnfn convert_short8_rtn(long8); short8 __ovld __cnfn convert_short8_sat_rtn(long8); short8 __ovld __cnfn convert_short8(long8); short8 __ovld __cnfn convert_short8_sat(long8); short8 __ovld __cnfn convert_short8_rte(ulong8); short8 __ovld __cnfn convert_short8_sat_rte(ulong8); short8 __ovld __cnfn convert_short8_rtz(ulong8); short8 __ovld __cnfn convert_short8_sat_rtz(ulong8); short8 __ovld __cnfn convert_short8_rtp(ulong8); short8 __ovld __cnfn convert_short8_sat_rtp(ulong8); short8 __ovld __cnfn convert_short8_rtn(ulong8); short8 __ovld __cnfn convert_short8_sat_rtn(ulong8); short8 __ovld __cnfn convert_short8(ulong8); short8 __ovld __cnfn convert_short8_sat(ulong8); short8 __ovld __cnfn convert_short8_rte(float8); short8 __ovld __cnfn convert_short8_sat_rte(float8); short8 __ovld __cnfn convert_short8_rtz(float8); short8 __ovld __cnfn convert_short8_sat_rtz(float8); short8 __ovld __cnfn convert_short8_rtp(float8); short8 __ovld __cnfn convert_short8_sat_rtp(float8); short8 __ovld __cnfn convert_short8_rtn(float8); short8 __ovld __cnfn convert_short8_sat_rtn(float8); short8 __ovld __cnfn convert_short8(float8); short8 __ovld __cnfn convert_short8_sat(float8); ushort8 __ovld __cnfn convert_ushort8_rte(char8); ushort8 __ovld __cnfn convert_ushort8_sat_rte(char8); ushort8 __ovld __cnfn convert_ushort8_rtz(char8); ushort8 __ovld __cnfn convert_ushort8_sat_rtz(char8); ushort8 __ovld __cnfn convert_ushort8_rtp(char8); ushort8 __ovld __cnfn convert_ushort8_sat_rtp(char8); ushort8 __ovld __cnfn convert_ushort8_rtn(char8); ushort8 __ovld __cnfn convert_ushort8_sat_rtn(char8); ushort8 __ovld __cnfn convert_ushort8(char8); ushort8 __ovld __cnfn convert_ushort8_sat(char8); ushort8 __ovld __cnfn convert_ushort8_rte(uchar8); ushort8 __ovld __cnfn convert_ushort8_sat_rte(uchar8); ushort8 __ovld __cnfn convert_ushort8_rtz(uchar8); ushort8 __ovld __cnfn convert_ushort8_sat_rtz(uchar8); ushort8 __ovld __cnfn convert_ushort8_rtp(uchar8); ushort8 __ovld __cnfn convert_ushort8_sat_rtp(uchar8); ushort8 __ovld __cnfn convert_ushort8_rtn(uchar8); ushort8 __ovld __cnfn convert_ushort8_sat_rtn(uchar8); ushort8 __ovld __cnfn convert_ushort8(uchar8); ushort8 __ovld __cnfn convert_ushort8_sat(uchar8); ushort8 __ovld __cnfn convert_ushort8_rte(short8); ushort8 __ovld __cnfn convert_ushort8_sat_rte(short8); ushort8 __ovld __cnfn convert_ushort8_rtz(short8); ushort8 __ovld __cnfn convert_ushort8_sat_rtz(short8); ushort8 __ovld __cnfn convert_ushort8_rtp(short8); ushort8 __ovld __cnfn convert_ushort8_sat_rtp(short8); ushort8 __ovld __cnfn convert_ushort8_rtn(short8); ushort8 __ovld __cnfn convert_ushort8_sat_rtn(short8); ushort8 __ovld __cnfn convert_ushort8(short8); ushort8 __ovld __cnfn convert_ushort8_sat(short8); ushort8 __ovld __cnfn convert_ushort8_rte(ushort8); ushort8 __ovld __cnfn convert_ushort8_sat_rte(ushort8); ushort8 __ovld __cnfn convert_ushort8_rtz(ushort8); ushort8 __ovld __cnfn convert_ushort8_sat_rtz(ushort8); ushort8 __ovld __cnfn convert_ushort8_rtp(ushort8); ushort8 __ovld __cnfn convert_ushort8_sat_rtp(ushort8); ushort8 __ovld __cnfn convert_ushort8_rtn(ushort8); ushort8 __ovld __cnfn convert_ushort8_sat_rtn(ushort8); ushort8 __ovld __cnfn convert_ushort8(ushort8); ushort8 __ovld __cnfn convert_ushort8_sat(ushort8); ushort8 __ovld __cnfn convert_ushort8_rte(int8); ushort8 __ovld __cnfn convert_ushort8_sat_rte(int8); ushort8 __ovld __cnfn convert_ushort8_rtz(int8); ushort8 __ovld __cnfn convert_ushort8_sat_rtz(int8); ushort8 __ovld __cnfn convert_ushort8_rtp(int8); ushort8 __ovld __cnfn convert_ushort8_sat_rtp(int8); ushort8 __ovld __cnfn convert_ushort8_rtn(int8); ushort8 __ovld __cnfn convert_ushort8_sat_rtn(int8); ushort8 __ovld __cnfn convert_ushort8(int8); ushort8 __ovld __cnfn convert_ushort8_sat(int8); ushort8 __ovld __cnfn convert_ushort8_rte(uint8); ushort8 __ovld __cnfn convert_ushort8_sat_rte(uint8); ushort8 __ovld __cnfn convert_ushort8_rtz(uint8); ushort8 __ovld __cnfn convert_ushort8_sat_rtz(uint8); ushort8 __ovld __cnfn convert_ushort8_rtp(uint8); ushort8 __ovld __cnfn convert_ushort8_sat_rtp(uint8); ushort8 __ovld __cnfn convert_ushort8_rtn(uint8); ushort8 __ovld __cnfn convert_ushort8_sat_rtn(uint8); ushort8 __ovld __cnfn convert_ushort8(uint8); ushort8 __ovld __cnfn convert_ushort8_sat(uint8); ushort8 __ovld __cnfn convert_ushort8_rte(long8); ushort8 __ovld __cnfn convert_ushort8_sat_rte(long8); ushort8 __ovld __cnfn convert_ushort8_rtz(long8); ushort8 __ovld __cnfn convert_ushort8_sat_rtz(long8); ushort8 __ovld __cnfn convert_ushort8_rtp(long8); ushort8 __ovld __cnfn convert_ushort8_sat_rtp(long8); ushort8 __ovld __cnfn convert_ushort8_rtn(long8); ushort8 __ovld __cnfn convert_ushort8_sat_rtn(long8); ushort8 __ovld __cnfn convert_ushort8(long8); ushort8 __ovld __cnfn convert_ushort8_sat(long8); ushort8 __ovld __cnfn convert_ushort8_rte(ulong8); ushort8 __ovld __cnfn convert_ushort8_sat_rte(ulong8); ushort8 __ovld __cnfn convert_ushort8_rtz(ulong8); ushort8 __ovld __cnfn convert_ushort8_sat_rtz(ulong8); ushort8 __ovld __cnfn convert_ushort8_rtp(ulong8); ushort8 __ovld __cnfn convert_ushort8_sat_rtp(ulong8); ushort8 __ovld __cnfn convert_ushort8_rtn(ulong8); ushort8 __ovld __cnfn convert_ushort8_sat_rtn(ulong8); ushort8 __ovld __cnfn convert_ushort8(ulong8); ushort8 __ovld __cnfn convert_ushort8_sat(ulong8); ushort8 __ovld __cnfn convert_ushort8_rte(float8); ushort8 __ovld __cnfn convert_ushort8_sat_rte(float8); ushort8 __ovld __cnfn convert_ushort8_rtz(float8); ushort8 __ovld __cnfn convert_ushort8_sat_rtz(float8); ushort8 __ovld __cnfn convert_ushort8_rtp(float8); ushort8 __ovld __cnfn convert_ushort8_sat_rtp(float8); ushort8 __ovld __cnfn convert_ushort8_rtn(float8); ushort8 __ovld __cnfn convert_ushort8_sat_rtn(float8); ushort8 __ovld __cnfn convert_ushort8(float8); ushort8 __ovld __cnfn convert_ushort8_sat(float8); int8 __ovld __cnfn convert_int8_rte(char8); int8 __ovld __cnfn convert_int8_sat_rte(char8); int8 __ovld __cnfn convert_int8_rtz(char8); int8 __ovld __cnfn convert_int8_sat_rtz(char8); int8 __ovld __cnfn convert_int8_rtp(char8); int8 __ovld __cnfn convert_int8_sat_rtp(char8); int8 __ovld __cnfn convert_int8_rtn(char8); int8 __ovld __cnfn convert_int8_sat_rtn(char8); int8 __ovld __cnfn convert_int8(char8); int8 __ovld __cnfn convert_int8_sat(char8); int8 __ovld __cnfn convert_int8_rte(uchar8); int8 __ovld __cnfn convert_int8_sat_rte(uchar8); int8 __ovld __cnfn convert_int8_rtz(uchar8); int8 __ovld __cnfn convert_int8_sat_rtz(uchar8); int8 __ovld __cnfn convert_int8_rtp(uchar8); int8 __ovld __cnfn convert_int8_sat_rtp(uchar8); int8 __ovld __cnfn convert_int8_rtn(uchar8); int8 __ovld __cnfn convert_int8_sat_rtn(uchar8); int8 __ovld __cnfn convert_int8(uchar8); int8 __ovld __cnfn convert_int8_sat(uchar8); int8 __ovld __cnfn convert_int8_rte(short8); int8 __ovld __cnfn convert_int8_sat_rte(short8); int8 __ovld __cnfn convert_int8_rtz(short8); int8 __ovld __cnfn convert_int8_sat_rtz(short8); int8 __ovld __cnfn convert_int8_rtp(short8); int8 __ovld __cnfn convert_int8_sat_rtp(short8); int8 __ovld __cnfn convert_int8_rtn(short8); int8 __ovld __cnfn convert_int8_sat_rtn(short8); int8 __ovld __cnfn convert_int8(short8); int8 __ovld __cnfn convert_int8_sat(short8); int8 __ovld __cnfn convert_int8_rte(ushort8); int8 __ovld __cnfn convert_int8_sat_rte(ushort8); int8 __ovld __cnfn convert_int8_rtz(ushort8); int8 __ovld __cnfn convert_int8_sat_rtz(ushort8); int8 __ovld __cnfn convert_int8_rtp(ushort8); int8 __ovld __cnfn convert_int8_sat_rtp(ushort8); int8 __ovld __cnfn convert_int8_rtn(ushort8); int8 __ovld __cnfn convert_int8_sat_rtn(ushort8); int8 __ovld __cnfn convert_int8(ushort8); int8 __ovld __cnfn convert_int8_sat(ushort8); int8 __ovld __cnfn convert_int8_rte(int8); int8 __ovld __cnfn convert_int8_sat_rte(int8); int8 __ovld __cnfn convert_int8_rtz(int8); int8 __ovld __cnfn convert_int8_sat_rtz(int8); int8 __ovld __cnfn convert_int8_rtp(int8); int8 __ovld __cnfn convert_int8_sat_rtp(int8); int8 __ovld __cnfn convert_int8_rtn(int8); int8 __ovld __cnfn convert_int8_sat_rtn(int8); int8 __ovld __cnfn convert_int8(int8); int8 __ovld __cnfn convert_int8_sat(int8); int8 __ovld __cnfn convert_int8_rte(uint8); int8 __ovld __cnfn convert_int8_sat_rte(uint8); int8 __ovld __cnfn convert_int8_rtz(uint8); int8 __ovld __cnfn convert_int8_sat_rtz(uint8); int8 __ovld __cnfn convert_int8_rtp(uint8); int8 __ovld __cnfn convert_int8_sat_rtp(uint8); int8 __ovld __cnfn convert_int8_rtn(uint8); int8 __ovld __cnfn convert_int8_sat_rtn(uint8); int8 __ovld __cnfn convert_int8(uint8); int8 __ovld __cnfn convert_int8_sat(uint8); int8 __ovld __cnfn convert_int8_rte(long8); int8 __ovld __cnfn convert_int8_sat_rte(long8); int8 __ovld __cnfn convert_int8_rtz(long8); int8 __ovld __cnfn convert_int8_sat_rtz(long8); int8 __ovld __cnfn convert_int8_rtp(long8); int8 __ovld __cnfn convert_int8_sat_rtp(long8); int8 __ovld __cnfn convert_int8_rtn(long8); int8 __ovld __cnfn convert_int8_sat_rtn(long8); int8 __ovld __cnfn convert_int8(long8); int8 __ovld __cnfn convert_int8_sat(long8); int8 __ovld __cnfn convert_int8_rte(ulong8); int8 __ovld __cnfn convert_int8_sat_rte(ulong8); int8 __ovld __cnfn convert_int8_rtz(ulong8); int8 __ovld __cnfn convert_int8_sat_rtz(ulong8); int8 __ovld __cnfn convert_int8_rtp(ulong8); int8 __ovld __cnfn convert_int8_sat_rtp(ulong8); int8 __ovld __cnfn convert_int8_rtn(ulong8); int8 __ovld __cnfn convert_int8_sat_rtn(ulong8); int8 __ovld __cnfn convert_int8(ulong8); int8 __ovld __cnfn convert_int8_sat(ulong8); int8 __ovld __cnfn convert_int8_rte(float8); int8 __ovld __cnfn convert_int8_sat_rte(float8); int8 __ovld __cnfn convert_int8_rtz(float8); int8 __ovld __cnfn convert_int8_sat_rtz(float8); int8 __ovld __cnfn convert_int8_rtp(float8); int8 __ovld __cnfn convert_int8_sat_rtp(float8); int8 __ovld __cnfn convert_int8_rtn(float8); int8 __ovld __cnfn convert_int8_sat_rtn(float8); int8 __ovld __cnfn convert_int8(float8); int8 __ovld __cnfn convert_int8_sat(float8); uint8 __ovld __cnfn convert_uint8_rte(char8); uint8 __ovld __cnfn convert_uint8_sat_rte(char8); uint8 __ovld __cnfn convert_uint8_rtz(char8); uint8 __ovld __cnfn convert_uint8_sat_rtz(char8); uint8 __ovld __cnfn convert_uint8_rtp(char8); uint8 __ovld __cnfn convert_uint8_sat_rtp(char8); uint8 __ovld __cnfn convert_uint8_rtn(char8); uint8 __ovld __cnfn convert_uint8_sat_rtn(char8); uint8 __ovld __cnfn convert_uint8(char8); uint8 __ovld __cnfn convert_uint8_sat(char8); uint8 __ovld __cnfn convert_uint8_rte(uchar8); uint8 __ovld __cnfn convert_uint8_sat_rte(uchar8); uint8 __ovld __cnfn convert_uint8_rtz(uchar8); uint8 __ovld __cnfn convert_uint8_sat_rtz(uchar8); uint8 __ovld __cnfn convert_uint8_rtp(uchar8); uint8 __ovld __cnfn convert_uint8_sat_rtp(uchar8); uint8 __ovld __cnfn convert_uint8_rtn(uchar8); uint8 __ovld __cnfn convert_uint8_sat_rtn(uchar8); uint8 __ovld __cnfn convert_uint8(uchar8); uint8 __ovld __cnfn convert_uint8_sat(uchar8); uint8 __ovld __cnfn convert_uint8_rte(short8); uint8 __ovld __cnfn convert_uint8_sat_rte(short8); uint8 __ovld __cnfn convert_uint8_rtz(short8); uint8 __ovld __cnfn convert_uint8_sat_rtz(short8); uint8 __ovld __cnfn convert_uint8_rtp(short8); uint8 __ovld __cnfn convert_uint8_sat_rtp(short8); uint8 __ovld __cnfn convert_uint8_rtn(short8); uint8 __ovld __cnfn convert_uint8_sat_rtn(short8); uint8 __ovld __cnfn convert_uint8(short8); uint8 __ovld __cnfn convert_uint8_sat(short8); uint8 __ovld __cnfn convert_uint8_rte(ushort8); uint8 __ovld __cnfn convert_uint8_sat_rte(ushort8); uint8 __ovld __cnfn convert_uint8_rtz(ushort8); uint8 __ovld __cnfn convert_uint8_sat_rtz(ushort8); uint8 __ovld __cnfn convert_uint8_rtp(ushort8); uint8 __ovld __cnfn convert_uint8_sat_rtp(ushort8); uint8 __ovld __cnfn convert_uint8_rtn(ushort8); uint8 __ovld __cnfn convert_uint8_sat_rtn(ushort8); uint8 __ovld __cnfn convert_uint8(ushort8); uint8 __ovld __cnfn convert_uint8_sat(ushort8); uint8 __ovld __cnfn convert_uint8_rte(int8); uint8 __ovld __cnfn convert_uint8_sat_rte(int8); uint8 __ovld __cnfn convert_uint8_rtz(int8); uint8 __ovld __cnfn convert_uint8_sat_rtz(int8); uint8 __ovld __cnfn convert_uint8_rtp(int8); uint8 __ovld __cnfn convert_uint8_sat_rtp(int8); uint8 __ovld __cnfn convert_uint8_rtn(int8); uint8 __ovld __cnfn convert_uint8_sat_rtn(int8); uint8 __ovld __cnfn convert_uint8(int8); uint8 __ovld __cnfn convert_uint8_sat(int8); uint8 __ovld __cnfn convert_uint8_rte(uint8); uint8 __ovld __cnfn convert_uint8_sat_rte(uint8); uint8 __ovld __cnfn convert_uint8_rtz(uint8); uint8 __ovld __cnfn convert_uint8_sat_rtz(uint8); uint8 __ovld __cnfn convert_uint8_rtp(uint8); uint8 __ovld __cnfn convert_uint8_sat_rtp(uint8); uint8 __ovld __cnfn convert_uint8_rtn(uint8); uint8 __ovld __cnfn convert_uint8_sat_rtn(uint8); uint8 __ovld __cnfn convert_uint8(uint8); uint8 __ovld __cnfn convert_uint8_sat(uint8); uint8 __ovld __cnfn convert_uint8_rte(long8); uint8 __ovld __cnfn convert_uint8_sat_rte(long8); uint8 __ovld __cnfn convert_uint8_rtz(long8); uint8 __ovld __cnfn convert_uint8_sat_rtz(long8); uint8 __ovld __cnfn convert_uint8_rtp(long8); uint8 __ovld __cnfn convert_uint8_sat_rtp(long8); uint8 __ovld __cnfn convert_uint8_rtn(long8); uint8 __ovld __cnfn convert_uint8_sat_rtn(long8); uint8 __ovld __cnfn convert_uint8(long8); uint8 __ovld __cnfn convert_uint8_sat(long8); uint8 __ovld __cnfn convert_uint8_rte(ulong8); uint8 __ovld __cnfn convert_uint8_sat_rte(ulong8); uint8 __ovld __cnfn convert_uint8_rtz(ulong8); uint8 __ovld __cnfn convert_uint8_sat_rtz(ulong8); uint8 __ovld __cnfn convert_uint8_rtp(ulong8); uint8 __ovld __cnfn convert_uint8_sat_rtp(ulong8); uint8 __ovld __cnfn convert_uint8_rtn(ulong8); uint8 __ovld __cnfn convert_uint8_sat_rtn(ulong8); uint8 __ovld __cnfn convert_uint8(ulong8); uint8 __ovld __cnfn convert_uint8_sat(ulong8); uint8 __ovld __cnfn convert_uint8_rte(float8); uint8 __ovld __cnfn convert_uint8_sat_rte(float8); uint8 __ovld __cnfn convert_uint8_rtz(float8); uint8 __ovld __cnfn convert_uint8_sat_rtz(float8); uint8 __ovld __cnfn convert_uint8_rtp(float8); uint8 __ovld __cnfn convert_uint8_sat_rtp(float8); uint8 __ovld __cnfn convert_uint8_rtn(float8); uint8 __ovld __cnfn convert_uint8_sat_rtn(float8); uint8 __ovld __cnfn convert_uint8(float8); uint8 __ovld __cnfn convert_uint8_sat(float8); long8 __ovld __cnfn convert_long8_rte(char8); long8 __ovld __cnfn convert_long8_sat_rte(char8); long8 __ovld __cnfn convert_long8_rtz(char8); long8 __ovld __cnfn convert_long8_sat_rtz(char8); long8 __ovld __cnfn convert_long8_rtp(char8); long8 __ovld __cnfn convert_long8_sat_rtp(char8); long8 __ovld __cnfn convert_long8_rtn(char8); long8 __ovld __cnfn convert_long8_sat_rtn(char8); long8 __ovld __cnfn convert_long8(char8); long8 __ovld __cnfn convert_long8_sat(char8); long8 __ovld __cnfn convert_long8_rte(uchar8); long8 __ovld __cnfn convert_long8_sat_rte(uchar8); long8 __ovld __cnfn convert_long8_rtz(uchar8); long8 __ovld __cnfn convert_long8_sat_rtz(uchar8); long8 __ovld __cnfn convert_long8_rtp(uchar8); long8 __ovld __cnfn convert_long8_sat_rtp(uchar8); long8 __ovld __cnfn convert_long8_rtn(uchar8); long8 __ovld __cnfn convert_long8_sat_rtn(uchar8); long8 __ovld __cnfn convert_long8(uchar8); long8 __ovld __cnfn convert_long8_sat(uchar8); long8 __ovld __cnfn convert_long8_rte(short8); long8 __ovld __cnfn convert_long8_sat_rte(short8); long8 __ovld __cnfn convert_long8_rtz(short8); long8 __ovld __cnfn convert_long8_sat_rtz(short8); long8 __ovld __cnfn convert_long8_rtp(short8); long8 __ovld __cnfn convert_long8_sat_rtp(short8); long8 __ovld __cnfn convert_long8_rtn(short8); long8 __ovld __cnfn convert_long8_sat_rtn(short8); long8 __ovld __cnfn convert_long8(short8); long8 __ovld __cnfn convert_long8_sat(short8); long8 __ovld __cnfn convert_long8_rte(ushort8); long8 __ovld __cnfn convert_long8_sat_rte(ushort8); long8 __ovld __cnfn convert_long8_rtz(ushort8); long8 __ovld __cnfn convert_long8_sat_rtz(ushort8); long8 __ovld __cnfn convert_long8_rtp(ushort8); long8 __ovld __cnfn convert_long8_sat_rtp(ushort8); long8 __ovld __cnfn convert_long8_rtn(ushort8); long8 __ovld __cnfn convert_long8_sat_rtn(ushort8); long8 __ovld __cnfn convert_long8(ushort8); long8 __ovld __cnfn convert_long8_sat(ushort8); long8 __ovld __cnfn convert_long8_rte(int8); long8 __ovld __cnfn convert_long8_sat_rte(int8); long8 __ovld __cnfn convert_long8_rtz(int8); long8 __ovld __cnfn convert_long8_sat_rtz(int8); long8 __ovld __cnfn convert_long8_rtp(int8); long8 __ovld __cnfn convert_long8_sat_rtp(int8); long8 __ovld __cnfn convert_long8_rtn(int8); long8 __ovld __cnfn convert_long8_sat_rtn(int8); long8 __ovld __cnfn convert_long8(int8); long8 __ovld __cnfn convert_long8_sat(int8); long8 __ovld __cnfn convert_long8_rte(uint8); long8 __ovld __cnfn convert_long8_sat_rte(uint8); long8 __ovld __cnfn convert_long8_rtz(uint8); long8 __ovld __cnfn convert_long8_sat_rtz(uint8); long8 __ovld __cnfn convert_long8_rtp(uint8); long8 __ovld __cnfn convert_long8_sat_rtp(uint8); long8 __ovld __cnfn convert_long8_rtn(uint8); long8 __ovld __cnfn convert_long8_sat_rtn(uint8); long8 __ovld __cnfn convert_long8(uint8); long8 __ovld __cnfn convert_long8_sat(uint8); long8 __ovld __cnfn convert_long8_rte(long8); long8 __ovld __cnfn convert_long8_sat_rte(long8); long8 __ovld __cnfn convert_long8_rtz(long8); long8 __ovld __cnfn convert_long8_sat_rtz(long8); long8 __ovld __cnfn convert_long8_rtp(long8); long8 __ovld __cnfn convert_long8_sat_rtp(long8); long8 __ovld __cnfn convert_long8_rtn(long8); long8 __ovld __cnfn convert_long8_sat_rtn(long8); long8 __ovld __cnfn convert_long8(long8); long8 __ovld __cnfn convert_long8_sat(long8); long8 __ovld __cnfn convert_long8_rte(ulong8); long8 __ovld __cnfn convert_long8_sat_rte(ulong8); long8 __ovld __cnfn convert_long8_rtz(ulong8); long8 __ovld __cnfn convert_long8_sat_rtz(ulong8); long8 __ovld __cnfn convert_long8_rtp(ulong8); long8 __ovld __cnfn convert_long8_sat_rtp(ulong8); long8 __ovld __cnfn convert_long8_rtn(ulong8); long8 __ovld __cnfn convert_long8_sat_rtn(ulong8); long8 __ovld __cnfn convert_long8(ulong8); long8 __ovld __cnfn convert_long8_sat(ulong8); long8 __ovld __cnfn convert_long8_rte(float8); long8 __ovld __cnfn convert_long8_sat_rte(float8); long8 __ovld __cnfn convert_long8_rtz(float8); long8 __ovld __cnfn convert_long8_sat_rtz(float8); long8 __ovld __cnfn convert_long8_rtp(float8); long8 __ovld __cnfn convert_long8_sat_rtp(float8); long8 __ovld __cnfn convert_long8_rtn(float8); long8 __ovld __cnfn convert_long8_sat_rtn(float8); long8 __ovld __cnfn convert_long8(float8); long8 __ovld __cnfn convert_long8_sat(float8); ulong8 __ovld __cnfn convert_ulong8_rte(char8); ulong8 __ovld __cnfn convert_ulong8_sat_rte(char8); ulong8 __ovld __cnfn convert_ulong8_rtz(char8); ulong8 __ovld __cnfn convert_ulong8_sat_rtz(char8); ulong8 __ovld __cnfn convert_ulong8_rtp(char8); ulong8 __ovld __cnfn convert_ulong8_sat_rtp(char8); ulong8 __ovld __cnfn convert_ulong8_rtn(char8); ulong8 __ovld __cnfn convert_ulong8_sat_rtn(char8); ulong8 __ovld __cnfn convert_ulong8(char8); ulong8 __ovld __cnfn convert_ulong8_sat(char8); ulong8 __ovld __cnfn convert_ulong8_rte(uchar8); ulong8 __ovld __cnfn convert_ulong8_sat_rte(uchar8); ulong8 __ovld __cnfn convert_ulong8_rtz(uchar8); ulong8 __ovld __cnfn convert_ulong8_sat_rtz(uchar8); ulong8 __ovld __cnfn convert_ulong8_rtp(uchar8); ulong8 __ovld __cnfn convert_ulong8_sat_rtp(uchar8); ulong8 __ovld __cnfn convert_ulong8_rtn(uchar8); ulong8 __ovld __cnfn convert_ulong8_sat_rtn(uchar8); ulong8 __ovld __cnfn convert_ulong8(uchar8); ulong8 __ovld __cnfn convert_ulong8_sat(uchar8); ulong8 __ovld __cnfn convert_ulong8_rte(short8); ulong8 __ovld __cnfn convert_ulong8_sat_rte(short8); ulong8 __ovld __cnfn convert_ulong8_rtz(short8); ulong8 __ovld __cnfn convert_ulong8_sat_rtz(short8); ulong8 __ovld __cnfn convert_ulong8_rtp(short8); ulong8 __ovld __cnfn convert_ulong8_sat_rtp(short8); ulong8 __ovld __cnfn convert_ulong8_rtn(short8); ulong8 __ovld __cnfn convert_ulong8_sat_rtn(short8); ulong8 __ovld __cnfn convert_ulong8(short8); ulong8 __ovld __cnfn convert_ulong8_sat(short8); ulong8 __ovld __cnfn convert_ulong8_rte(ushort8); ulong8 __ovld __cnfn convert_ulong8_sat_rte(ushort8); ulong8 __ovld __cnfn convert_ulong8_rtz(ushort8); ulong8 __ovld __cnfn convert_ulong8_sat_rtz(ushort8); ulong8 __ovld __cnfn convert_ulong8_rtp(ushort8); ulong8 __ovld __cnfn convert_ulong8_sat_rtp(ushort8); ulong8 __ovld __cnfn convert_ulong8_rtn(ushort8); ulong8 __ovld __cnfn convert_ulong8_sat_rtn(ushort8); ulong8 __ovld __cnfn convert_ulong8(ushort8); ulong8 __ovld __cnfn convert_ulong8_sat(ushort8); ulong8 __ovld __cnfn convert_ulong8_rte(int8); ulong8 __ovld __cnfn convert_ulong8_sat_rte(int8); ulong8 __ovld __cnfn convert_ulong8_rtz(int8); ulong8 __ovld __cnfn convert_ulong8_sat_rtz(int8); ulong8 __ovld __cnfn convert_ulong8_rtp(int8); ulong8 __ovld __cnfn convert_ulong8_sat_rtp(int8); ulong8 __ovld __cnfn convert_ulong8_rtn(int8); ulong8 __ovld __cnfn convert_ulong8_sat_rtn(int8); ulong8 __ovld __cnfn convert_ulong8(int8); ulong8 __ovld __cnfn convert_ulong8_sat(int8); ulong8 __ovld __cnfn convert_ulong8_rte(uint8); ulong8 __ovld __cnfn convert_ulong8_sat_rte(uint8); ulong8 __ovld __cnfn convert_ulong8_rtz(uint8); ulong8 __ovld __cnfn convert_ulong8_sat_rtz(uint8); ulong8 __ovld __cnfn convert_ulong8_rtp(uint8); ulong8 __ovld __cnfn convert_ulong8_sat_rtp(uint8); ulong8 __ovld __cnfn convert_ulong8_rtn(uint8); ulong8 __ovld __cnfn convert_ulong8_sat_rtn(uint8); ulong8 __ovld __cnfn convert_ulong8(uint8); ulong8 __ovld __cnfn convert_ulong8_sat(uint8); ulong8 __ovld __cnfn convert_ulong8_rte(long8); ulong8 __ovld __cnfn convert_ulong8_sat_rte(long8); ulong8 __ovld __cnfn convert_ulong8_rtz(long8); ulong8 __ovld __cnfn convert_ulong8_sat_rtz(long8); ulong8 __ovld __cnfn convert_ulong8_rtp(long8); ulong8 __ovld __cnfn convert_ulong8_sat_rtp(long8); ulong8 __ovld __cnfn convert_ulong8_rtn(long8); ulong8 __ovld __cnfn convert_ulong8_sat_rtn(long8); ulong8 __ovld __cnfn convert_ulong8(long8); ulong8 __ovld __cnfn convert_ulong8_sat(long8); ulong8 __ovld __cnfn convert_ulong8_rte(ulong8); ulong8 __ovld __cnfn convert_ulong8_sat_rte(ulong8); ulong8 __ovld __cnfn convert_ulong8_rtz(ulong8); ulong8 __ovld __cnfn convert_ulong8_sat_rtz(ulong8); ulong8 __ovld __cnfn convert_ulong8_rtp(ulong8); ulong8 __ovld __cnfn convert_ulong8_sat_rtp(ulong8); ulong8 __ovld __cnfn convert_ulong8_rtn(ulong8); ulong8 __ovld __cnfn convert_ulong8_sat_rtn(ulong8); ulong8 __ovld __cnfn convert_ulong8(ulong8); ulong8 __ovld __cnfn convert_ulong8_sat(ulong8); ulong8 __ovld __cnfn convert_ulong8_rte(float8); ulong8 __ovld __cnfn convert_ulong8_sat_rte(float8); ulong8 __ovld __cnfn convert_ulong8_rtz(float8); ulong8 __ovld __cnfn convert_ulong8_sat_rtz(float8); ulong8 __ovld __cnfn convert_ulong8_rtp(float8); ulong8 __ovld __cnfn convert_ulong8_sat_rtp(float8); ulong8 __ovld __cnfn convert_ulong8_rtn(float8); ulong8 __ovld __cnfn convert_ulong8_sat_rtn(float8); ulong8 __ovld __cnfn convert_ulong8(float8); ulong8 __ovld __cnfn convert_ulong8_sat(float8); float8 __ovld __cnfn convert_float8_rte(char8); float8 __ovld __cnfn convert_float8_rtz(char8); float8 __ovld __cnfn convert_float8_rtp(char8); float8 __ovld __cnfn convert_float8_rtn(char8); float8 __ovld __cnfn convert_float8(char8); float8 __ovld __cnfn convert_float8_rte(uchar8); float8 __ovld __cnfn convert_float8_rtz(uchar8); float8 __ovld __cnfn convert_float8_rtp(uchar8); float8 __ovld __cnfn convert_float8_rtn(uchar8); float8 __ovld __cnfn convert_float8(uchar8); float8 __ovld __cnfn convert_float8_rte(short8); float8 __ovld __cnfn convert_float8_rtz(short8); float8 __ovld __cnfn convert_float8_rtp(short8); float8 __ovld __cnfn convert_float8_rtn(short8); float8 __ovld __cnfn convert_float8(short8); float8 __ovld __cnfn convert_float8_rte(ushort8); float8 __ovld __cnfn convert_float8_rtz(ushort8); float8 __ovld __cnfn convert_float8_rtp(ushort8); float8 __ovld __cnfn convert_float8_rtn(ushort8); float8 __ovld __cnfn convert_float8(ushort8); float8 __ovld __cnfn convert_float8_rte(int8); float8 __ovld __cnfn convert_float8_rtz(int8); float8 __ovld __cnfn convert_float8_rtp(int8); float8 __ovld __cnfn convert_float8_rtn(int8); float8 __ovld __cnfn convert_float8(int8); float8 __ovld __cnfn convert_float8_rte(uint8); float8 __ovld __cnfn convert_float8_rtz(uint8); float8 __ovld __cnfn convert_float8_rtp(uint8); float8 __ovld __cnfn convert_float8_rtn(uint8); float8 __ovld __cnfn convert_float8(uint8); float8 __ovld __cnfn convert_float8_rte(long8); float8 __ovld __cnfn convert_float8_rtz(long8); float8 __ovld __cnfn convert_float8_rtp(long8); float8 __ovld __cnfn convert_float8_rtn(long8); float8 __ovld __cnfn convert_float8(long8); float8 __ovld __cnfn convert_float8_rte(ulong8); float8 __ovld __cnfn convert_float8_rtz(ulong8); float8 __ovld __cnfn convert_float8_rtp(ulong8); float8 __ovld __cnfn convert_float8_rtn(ulong8); float8 __ovld __cnfn convert_float8(ulong8); float8 __ovld __cnfn convert_float8_rte(float8); float8 __ovld __cnfn convert_float8_rtz(float8); float8 __ovld __cnfn convert_float8_rtp(float8); float8 __ovld __cnfn convert_float8_rtn(float8); float8 __ovld __cnfn convert_float8(float8); char16 __ovld __cnfn convert_char16_rte(char16); char16 __ovld __cnfn convert_char16_sat_rte(char16); char16 __ovld __cnfn convert_char16_rtz(char16); char16 __ovld __cnfn convert_char16_sat_rtz(char16); char16 __ovld __cnfn convert_char16_rtp(char16); char16 __ovld __cnfn convert_char16_sat_rtp(char16); char16 __ovld __cnfn convert_char16_rtn(char16); char16 __ovld __cnfn convert_char16_sat_rtn(char16); char16 __ovld __cnfn convert_char16(char16); char16 __ovld __cnfn convert_char16_sat(char16); char16 __ovld __cnfn convert_char16_rte(uchar16); char16 __ovld __cnfn convert_char16_sat_rte(uchar16); char16 __ovld __cnfn convert_char16_rtz(uchar16); char16 __ovld __cnfn convert_char16_sat_rtz(uchar16); char16 __ovld __cnfn convert_char16_rtp(uchar16); char16 __ovld __cnfn convert_char16_sat_rtp(uchar16); char16 __ovld __cnfn convert_char16_rtn(uchar16); char16 __ovld __cnfn convert_char16_sat_rtn(uchar16); char16 __ovld __cnfn convert_char16(uchar16); char16 __ovld __cnfn convert_char16_sat(uchar16); char16 __ovld __cnfn convert_char16_rte(short16); char16 __ovld __cnfn convert_char16_sat_rte(short16); char16 __ovld __cnfn convert_char16_rtz(short16); char16 __ovld __cnfn convert_char16_sat_rtz(short16); char16 __ovld __cnfn convert_char16_rtp(short16); char16 __ovld __cnfn convert_char16_sat_rtp(short16); char16 __ovld __cnfn convert_char16_rtn(short16); char16 __ovld __cnfn convert_char16_sat_rtn(short16); char16 __ovld __cnfn convert_char16(short16); char16 __ovld __cnfn convert_char16_sat(short16); char16 __ovld __cnfn convert_char16_rte(ushort16); char16 __ovld __cnfn convert_char16_sat_rte(ushort16); char16 __ovld __cnfn convert_char16_rtz(ushort16); char16 __ovld __cnfn convert_char16_sat_rtz(ushort16); char16 __ovld __cnfn convert_char16_rtp(ushort16); char16 __ovld __cnfn convert_char16_sat_rtp(ushort16); char16 __ovld __cnfn convert_char16_rtn(ushort16); char16 __ovld __cnfn convert_char16_sat_rtn(ushort16); char16 __ovld __cnfn convert_char16(ushort16); char16 __ovld __cnfn convert_char16_sat(ushort16); char16 __ovld __cnfn convert_char16_rte(int16); char16 __ovld __cnfn convert_char16_sat_rte(int16); char16 __ovld __cnfn convert_char16_rtz(int16); char16 __ovld __cnfn convert_char16_sat_rtz(int16); char16 __ovld __cnfn convert_char16_rtp(int16); char16 __ovld __cnfn convert_char16_sat_rtp(int16); char16 __ovld __cnfn convert_char16_rtn(int16); char16 __ovld __cnfn convert_char16_sat_rtn(int16); char16 __ovld __cnfn convert_char16(int16); char16 __ovld __cnfn convert_char16_sat(int16); char16 __ovld __cnfn convert_char16_rte(uint16); char16 __ovld __cnfn convert_char16_sat_rte(uint16); char16 __ovld __cnfn convert_char16_rtz(uint16); char16 __ovld __cnfn convert_char16_sat_rtz(uint16); char16 __ovld __cnfn convert_char16_rtp(uint16); char16 __ovld __cnfn convert_char16_sat_rtp(uint16); char16 __ovld __cnfn convert_char16_rtn(uint16); char16 __ovld __cnfn convert_char16_sat_rtn(uint16); char16 __ovld __cnfn convert_char16(uint16); char16 __ovld __cnfn convert_char16_sat(uint16); char16 __ovld __cnfn convert_char16_rte(long16); char16 __ovld __cnfn convert_char16_sat_rte(long16); char16 __ovld __cnfn convert_char16_rtz(long16); char16 __ovld __cnfn convert_char16_sat_rtz(long16); char16 __ovld __cnfn convert_char16_rtp(long16); char16 __ovld __cnfn convert_char16_sat_rtp(long16); char16 __ovld __cnfn convert_char16_rtn(long16); char16 __ovld __cnfn convert_char16_sat_rtn(long16); char16 __ovld __cnfn convert_char16(long16); char16 __ovld __cnfn convert_char16_sat(long16); char16 __ovld __cnfn convert_char16_rte(ulong16); char16 __ovld __cnfn convert_char16_sat_rte(ulong16); char16 __ovld __cnfn convert_char16_rtz(ulong16); char16 __ovld __cnfn convert_char16_sat_rtz(ulong16); char16 __ovld __cnfn convert_char16_rtp(ulong16); char16 __ovld __cnfn convert_char16_sat_rtp(ulong16); char16 __ovld __cnfn convert_char16_rtn(ulong16); char16 __ovld __cnfn convert_char16_sat_rtn(ulong16); char16 __ovld __cnfn convert_char16(ulong16); char16 __ovld __cnfn convert_char16_sat(ulong16); char16 __ovld __cnfn convert_char16_rte(float16); char16 __ovld __cnfn convert_char16_sat_rte(float16); char16 __ovld __cnfn convert_char16_rtz(float16); char16 __ovld __cnfn convert_char16_sat_rtz(float16); char16 __ovld __cnfn convert_char16_rtp(float16); char16 __ovld __cnfn convert_char16_sat_rtp(float16); char16 __ovld __cnfn convert_char16_rtn(float16); char16 __ovld __cnfn convert_char16_sat_rtn(float16); char16 __ovld __cnfn convert_char16(float16); char16 __ovld __cnfn convert_char16_sat(float16); uchar16 __ovld __cnfn convert_uchar16_rte(char16); uchar16 __ovld __cnfn convert_uchar16_sat_rte(char16); uchar16 __ovld __cnfn convert_uchar16_rtz(char16); uchar16 __ovld __cnfn convert_uchar16_sat_rtz(char16); uchar16 __ovld __cnfn convert_uchar16_rtp(char16); uchar16 __ovld __cnfn convert_uchar16_sat_rtp(char16); uchar16 __ovld __cnfn convert_uchar16_rtn(char16); uchar16 __ovld __cnfn convert_uchar16_sat_rtn(char16); uchar16 __ovld __cnfn convert_uchar16(char16); uchar16 __ovld __cnfn convert_uchar16_sat(char16); uchar16 __ovld __cnfn convert_uchar16_rte(uchar16); uchar16 __ovld __cnfn convert_uchar16_sat_rte(uchar16); uchar16 __ovld __cnfn convert_uchar16_rtz(uchar16); uchar16 __ovld __cnfn convert_uchar16_sat_rtz(uchar16); uchar16 __ovld __cnfn convert_uchar16_rtp(uchar16); uchar16 __ovld __cnfn convert_uchar16_sat_rtp(uchar16); uchar16 __ovld __cnfn convert_uchar16_rtn(uchar16); uchar16 __ovld __cnfn convert_uchar16_sat_rtn(uchar16); uchar16 __ovld __cnfn convert_uchar16(uchar16); uchar16 __ovld __cnfn convert_uchar16_sat(uchar16); uchar16 __ovld __cnfn convert_uchar16_rte(short16); uchar16 __ovld __cnfn convert_uchar16_sat_rte(short16); uchar16 __ovld __cnfn convert_uchar16_rtz(short16); uchar16 __ovld __cnfn convert_uchar16_sat_rtz(short16); uchar16 __ovld __cnfn convert_uchar16_rtp(short16); uchar16 __ovld __cnfn convert_uchar16_sat_rtp(short16); uchar16 __ovld __cnfn convert_uchar16_rtn(short16); uchar16 __ovld __cnfn convert_uchar16_sat_rtn(short16); uchar16 __ovld __cnfn convert_uchar16(short16); uchar16 __ovld __cnfn convert_uchar16_sat(short16); uchar16 __ovld __cnfn convert_uchar16_rte(ushort16); uchar16 __ovld __cnfn convert_uchar16_sat_rte(ushort16); uchar16 __ovld __cnfn convert_uchar16_rtz(ushort16); uchar16 __ovld __cnfn convert_uchar16_sat_rtz(ushort16); uchar16 __ovld __cnfn convert_uchar16_rtp(ushort16); uchar16 __ovld __cnfn convert_uchar16_sat_rtp(ushort16); uchar16 __ovld __cnfn convert_uchar16_rtn(ushort16); uchar16 __ovld __cnfn convert_uchar16_sat_rtn(ushort16); uchar16 __ovld __cnfn convert_uchar16(ushort16); uchar16 __ovld __cnfn convert_uchar16_sat(ushort16); uchar16 __ovld __cnfn convert_uchar16_rte(int16); uchar16 __ovld __cnfn convert_uchar16_sat_rte(int16); uchar16 __ovld __cnfn convert_uchar16_rtz(int16); uchar16 __ovld __cnfn convert_uchar16_sat_rtz(int16); uchar16 __ovld __cnfn convert_uchar16_rtp(int16); uchar16 __ovld __cnfn convert_uchar16_sat_rtp(int16); uchar16 __ovld __cnfn convert_uchar16_rtn(int16); uchar16 __ovld __cnfn convert_uchar16_sat_rtn(int16); uchar16 __ovld __cnfn convert_uchar16(int16); uchar16 __ovld __cnfn convert_uchar16_sat(int16); uchar16 __ovld __cnfn convert_uchar16_rte(uint16); uchar16 __ovld __cnfn convert_uchar16_sat_rte(uint16); uchar16 __ovld __cnfn convert_uchar16_rtz(uint16); uchar16 __ovld __cnfn convert_uchar16_sat_rtz(uint16); uchar16 __ovld __cnfn convert_uchar16_rtp(uint16); uchar16 __ovld __cnfn convert_uchar16_sat_rtp(uint16); uchar16 __ovld __cnfn convert_uchar16_rtn(uint16); uchar16 __ovld __cnfn convert_uchar16_sat_rtn(uint16); uchar16 __ovld __cnfn convert_uchar16(uint16); uchar16 __ovld __cnfn convert_uchar16_sat(uint16); uchar16 __ovld __cnfn convert_uchar16_rte(long16); uchar16 __ovld __cnfn convert_uchar16_sat_rte(long16); uchar16 __ovld __cnfn convert_uchar16_rtz(long16); uchar16 __ovld __cnfn convert_uchar16_sat_rtz(long16); uchar16 __ovld __cnfn convert_uchar16_rtp(long16); uchar16 __ovld __cnfn convert_uchar16_sat_rtp(long16); uchar16 __ovld __cnfn convert_uchar16_rtn(long16); uchar16 __ovld __cnfn convert_uchar16_sat_rtn(long16); uchar16 __ovld __cnfn convert_uchar16(long16); uchar16 __ovld __cnfn convert_uchar16_sat(long16); uchar16 __ovld __cnfn convert_uchar16_rte(ulong16); uchar16 __ovld __cnfn convert_uchar16_sat_rte(ulong16); uchar16 __ovld __cnfn convert_uchar16_rtz(ulong16); uchar16 __ovld __cnfn convert_uchar16_sat_rtz(ulong16); uchar16 __ovld __cnfn convert_uchar16_rtp(ulong16); uchar16 __ovld __cnfn convert_uchar16_sat_rtp(ulong16); uchar16 __ovld __cnfn convert_uchar16_rtn(ulong16); uchar16 __ovld __cnfn convert_uchar16_sat_rtn(ulong16); uchar16 __ovld __cnfn convert_uchar16(ulong16); uchar16 __ovld __cnfn convert_uchar16_sat(ulong16); uchar16 __ovld __cnfn convert_uchar16_rte(float16); uchar16 __ovld __cnfn convert_uchar16_sat_rte(float16); uchar16 __ovld __cnfn convert_uchar16_rtz(float16); uchar16 __ovld __cnfn convert_uchar16_sat_rtz(float16); uchar16 __ovld __cnfn convert_uchar16_rtp(float16); uchar16 __ovld __cnfn convert_uchar16_sat_rtp(float16); uchar16 __ovld __cnfn convert_uchar16_rtn(float16); uchar16 __ovld __cnfn convert_uchar16_sat_rtn(float16); uchar16 __ovld __cnfn convert_uchar16(float16); uchar16 __ovld __cnfn convert_uchar16_sat(float16); short16 __ovld __cnfn convert_short16_rte(char16); short16 __ovld __cnfn convert_short16_sat_rte(char16); short16 __ovld __cnfn convert_short16_rtz(char16); short16 __ovld __cnfn convert_short16_sat_rtz(char16); short16 __ovld __cnfn convert_short16_rtp(char16); short16 __ovld __cnfn convert_short16_sat_rtp(char16); short16 __ovld __cnfn convert_short16_rtn(char16); short16 __ovld __cnfn convert_short16_sat_rtn(char16); short16 __ovld __cnfn convert_short16(char16); short16 __ovld __cnfn convert_short16_sat(char16); short16 __ovld __cnfn convert_short16_rte(uchar16); short16 __ovld __cnfn convert_short16_sat_rte(uchar16); short16 __ovld __cnfn convert_short16_rtz(uchar16); short16 __ovld __cnfn convert_short16_sat_rtz(uchar16); short16 __ovld __cnfn convert_short16_rtp(uchar16); short16 __ovld __cnfn convert_short16_sat_rtp(uchar16); short16 __ovld __cnfn convert_short16_rtn(uchar16); short16 __ovld __cnfn convert_short16_sat_rtn(uchar16); short16 __ovld __cnfn convert_short16(uchar16); short16 __ovld __cnfn convert_short16_sat(uchar16); short16 __ovld __cnfn convert_short16_rte(short16); short16 __ovld __cnfn convert_short16_sat_rte(short16); short16 __ovld __cnfn convert_short16_rtz(short16); short16 __ovld __cnfn convert_short16_sat_rtz(short16); short16 __ovld __cnfn convert_short16_rtp(short16); short16 __ovld __cnfn convert_short16_sat_rtp(short16); short16 __ovld __cnfn convert_short16_rtn(short16); short16 __ovld __cnfn convert_short16_sat_rtn(short16); short16 __ovld __cnfn convert_short16(short16); short16 __ovld __cnfn convert_short16_sat(short16); short16 __ovld __cnfn convert_short16_rte(ushort16); short16 __ovld __cnfn convert_short16_sat_rte(ushort16); short16 __ovld __cnfn convert_short16_rtz(ushort16); short16 __ovld __cnfn convert_short16_sat_rtz(ushort16); short16 __ovld __cnfn convert_short16_rtp(ushort16); short16 __ovld __cnfn convert_short16_sat_rtp(ushort16); short16 __ovld __cnfn convert_short16_rtn(ushort16); short16 __ovld __cnfn convert_short16_sat_rtn(ushort16); short16 __ovld __cnfn convert_short16(ushort16); short16 __ovld __cnfn convert_short16_sat(ushort16); short16 __ovld __cnfn convert_short16_rte(int16); short16 __ovld __cnfn convert_short16_sat_rte(int16); short16 __ovld __cnfn convert_short16_rtz(int16); short16 __ovld __cnfn convert_short16_sat_rtz(int16); short16 __ovld __cnfn convert_short16_rtp(int16); short16 __ovld __cnfn convert_short16_sat_rtp(int16); short16 __ovld __cnfn convert_short16_rtn(int16); short16 __ovld __cnfn convert_short16_sat_rtn(int16); short16 __ovld __cnfn convert_short16(int16); short16 __ovld __cnfn convert_short16_sat(int16); short16 __ovld __cnfn convert_short16_rte(uint16); short16 __ovld __cnfn convert_short16_sat_rte(uint16); short16 __ovld __cnfn convert_short16_rtz(uint16); short16 __ovld __cnfn convert_short16_sat_rtz(uint16); short16 __ovld __cnfn convert_short16_rtp(uint16); short16 __ovld __cnfn convert_short16_sat_rtp(uint16); short16 __ovld __cnfn convert_short16_rtn(uint16); short16 __ovld __cnfn convert_short16_sat_rtn(uint16); short16 __ovld __cnfn convert_short16(uint16); short16 __ovld __cnfn convert_short16_sat(uint16); short16 __ovld __cnfn convert_short16_rte(long16); short16 __ovld __cnfn convert_short16_sat_rte(long16); short16 __ovld __cnfn convert_short16_rtz(long16); short16 __ovld __cnfn convert_short16_sat_rtz(long16); short16 __ovld __cnfn convert_short16_rtp(long16); short16 __ovld __cnfn convert_short16_sat_rtp(long16); short16 __ovld __cnfn convert_short16_rtn(long16); short16 __ovld __cnfn convert_short16_sat_rtn(long16); short16 __ovld __cnfn convert_short16(long16); short16 __ovld __cnfn convert_short16_sat(long16); short16 __ovld __cnfn convert_short16_rte(ulong16); short16 __ovld __cnfn convert_short16_sat_rte(ulong16); short16 __ovld __cnfn convert_short16_rtz(ulong16); short16 __ovld __cnfn convert_short16_sat_rtz(ulong16); short16 __ovld __cnfn convert_short16_rtp(ulong16); short16 __ovld __cnfn convert_short16_sat_rtp(ulong16); short16 __ovld __cnfn convert_short16_rtn(ulong16); short16 __ovld __cnfn convert_short16_sat_rtn(ulong16); short16 __ovld __cnfn convert_short16(ulong16); short16 __ovld __cnfn convert_short16_sat(ulong16); short16 __ovld __cnfn convert_short16_rte(float16); short16 __ovld __cnfn convert_short16_sat_rte(float16); short16 __ovld __cnfn convert_short16_rtz(float16); short16 __ovld __cnfn convert_short16_sat_rtz(float16); short16 __ovld __cnfn convert_short16_rtp(float16); short16 __ovld __cnfn convert_short16_sat_rtp(float16); short16 __ovld __cnfn convert_short16_rtn(float16); short16 __ovld __cnfn convert_short16_sat_rtn(float16); short16 __ovld __cnfn convert_short16(float16); short16 __ovld __cnfn convert_short16_sat(float16); ushort16 __ovld __cnfn convert_ushort16_rte(char16); ushort16 __ovld __cnfn convert_ushort16_sat_rte(char16); ushort16 __ovld __cnfn convert_ushort16_rtz(char16); ushort16 __ovld __cnfn convert_ushort16_sat_rtz(char16); ushort16 __ovld __cnfn convert_ushort16_rtp(char16); ushort16 __ovld __cnfn convert_ushort16_sat_rtp(char16); ushort16 __ovld __cnfn convert_ushort16_rtn(char16); ushort16 __ovld __cnfn convert_ushort16_sat_rtn(char16); ushort16 __ovld __cnfn convert_ushort16(char16); ushort16 __ovld __cnfn convert_ushort16_sat(char16); ushort16 __ovld __cnfn convert_ushort16_rte(uchar16); ushort16 __ovld __cnfn convert_ushort16_sat_rte(uchar16); ushort16 __ovld __cnfn convert_ushort16_rtz(uchar16); ushort16 __ovld __cnfn convert_ushort16_sat_rtz(uchar16); ushort16 __ovld __cnfn convert_ushort16_rtp(uchar16); ushort16 __ovld __cnfn convert_ushort16_sat_rtp(uchar16); ushort16 __ovld __cnfn convert_ushort16_rtn(uchar16); ushort16 __ovld __cnfn convert_ushort16_sat_rtn(uchar16); ushort16 __ovld __cnfn convert_ushort16(uchar16); ushort16 __ovld __cnfn convert_ushort16_sat(uchar16); ushort16 __ovld __cnfn convert_ushort16_rte(short16); ushort16 __ovld __cnfn convert_ushort16_sat_rte(short16); ushort16 __ovld __cnfn convert_ushort16_rtz(short16); ushort16 __ovld __cnfn convert_ushort16_sat_rtz(short16); ushort16 __ovld __cnfn convert_ushort16_rtp(short16); ushort16 __ovld __cnfn convert_ushort16_sat_rtp(short16); ushort16 __ovld __cnfn convert_ushort16_rtn(short16); ushort16 __ovld __cnfn convert_ushort16_sat_rtn(short16); ushort16 __ovld __cnfn convert_ushort16(short16); ushort16 __ovld __cnfn convert_ushort16_sat(short16); ushort16 __ovld __cnfn convert_ushort16_rte(ushort16); ushort16 __ovld __cnfn convert_ushort16_sat_rte(ushort16); ushort16 __ovld __cnfn convert_ushort16_rtz(ushort16); ushort16 __ovld __cnfn convert_ushort16_sat_rtz(ushort16); ushort16 __ovld __cnfn convert_ushort16_rtp(ushort16); ushort16 __ovld __cnfn convert_ushort16_sat_rtp(ushort16); ushort16 __ovld __cnfn convert_ushort16_rtn(ushort16); ushort16 __ovld __cnfn convert_ushort16_sat_rtn(ushort16); ushort16 __ovld __cnfn convert_ushort16(ushort16); ushort16 __ovld __cnfn convert_ushort16_sat(ushort16); ushort16 __ovld __cnfn convert_ushort16_rte(int16); ushort16 __ovld __cnfn convert_ushort16_sat_rte(int16); ushort16 __ovld __cnfn convert_ushort16_rtz(int16); ushort16 __ovld __cnfn convert_ushort16_sat_rtz(int16); ushort16 __ovld __cnfn convert_ushort16_rtp(int16); ushort16 __ovld __cnfn convert_ushort16_sat_rtp(int16); ushort16 __ovld __cnfn convert_ushort16_rtn(int16); ushort16 __ovld __cnfn convert_ushort16_sat_rtn(int16); ushort16 __ovld __cnfn convert_ushort16(int16); ushort16 __ovld __cnfn convert_ushort16_sat(int16); ushort16 __ovld __cnfn convert_ushort16_rte(uint16); ushort16 __ovld __cnfn convert_ushort16_sat_rte(uint16); ushort16 __ovld __cnfn convert_ushort16_rtz(uint16); ushort16 __ovld __cnfn convert_ushort16_sat_rtz(uint16); ushort16 __ovld __cnfn convert_ushort16_rtp(uint16); ushort16 __ovld __cnfn convert_ushort16_sat_rtp(uint16); ushort16 __ovld __cnfn convert_ushort16_rtn(uint16); ushort16 __ovld __cnfn convert_ushort16_sat_rtn(uint16); ushort16 __ovld __cnfn convert_ushort16(uint16); ushort16 __ovld __cnfn convert_ushort16_sat(uint16); ushort16 __ovld __cnfn convert_ushort16_rte(long16); ushort16 __ovld __cnfn convert_ushort16_sat_rte(long16); ushort16 __ovld __cnfn convert_ushort16_rtz(long16); ushort16 __ovld __cnfn convert_ushort16_sat_rtz(long16); ushort16 __ovld __cnfn convert_ushort16_rtp(long16); ushort16 __ovld __cnfn convert_ushort16_sat_rtp(long16); ushort16 __ovld __cnfn convert_ushort16_rtn(long16); ushort16 __ovld __cnfn convert_ushort16_sat_rtn(long16); ushort16 __ovld __cnfn convert_ushort16(long16); ushort16 __ovld __cnfn convert_ushort16_sat(long16); ushort16 __ovld __cnfn convert_ushort16_rte(ulong16); ushort16 __ovld __cnfn convert_ushort16_sat_rte(ulong16); ushort16 __ovld __cnfn convert_ushort16_rtz(ulong16); ushort16 __ovld __cnfn convert_ushort16_sat_rtz(ulong16); ushort16 __ovld __cnfn convert_ushort16_rtp(ulong16); ushort16 __ovld __cnfn convert_ushort16_sat_rtp(ulong16); ushort16 __ovld __cnfn convert_ushort16_rtn(ulong16); ushort16 __ovld __cnfn convert_ushort16_sat_rtn(ulong16); ushort16 __ovld __cnfn convert_ushort16(ulong16); ushort16 __ovld __cnfn convert_ushort16_sat(ulong16); ushort16 __ovld __cnfn convert_ushort16_rte(float16); ushort16 __ovld __cnfn convert_ushort16_sat_rte(float16); ushort16 __ovld __cnfn convert_ushort16_rtz(float16); ushort16 __ovld __cnfn convert_ushort16_sat_rtz(float16); ushort16 __ovld __cnfn convert_ushort16_rtp(float16); ushort16 __ovld __cnfn convert_ushort16_sat_rtp(float16); ushort16 __ovld __cnfn convert_ushort16_rtn(float16); ushort16 __ovld __cnfn convert_ushort16_sat_rtn(float16); ushort16 __ovld __cnfn convert_ushort16(float16); ushort16 __ovld __cnfn convert_ushort16_sat(float16); int16 __ovld __cnfn convert_int16_rte(char16); int16 __ovld __cnfn convert_int16_sat_rte(char16); int16 __ovld __cnfn convert_int16_rtz(char16); int16 __ovld __cnfn convert_int16_sat_rtz(char16); int16 __ovld __cnfn convert_int16_rtp(char16); int16 __ovld __cnfn convert_int16_sat_rtp(char16); int16 __ovld __cnfn convert_int16_rtn(char16); int16 __ovld __cnfn convert_int16_sat_rtn(char16); int16 __ovld __cnfn convert_int16(char16); int16 __ovld __cnfn convert_int16_sat(char16); int16 __ovld __cnfn convert_int16_rte(uchar16); int16 __ovld __cnfn convert_int16_sat_rte(uchar16); int16 __ovld __cnfn convert_int16_rtz(uchar16); int16 __ovld __cnfn convert_int16_sat_rtz(uchar16); int16 __ovld __cnfn convert_int16_rtp(uchar16); int16 __ovld __cnfn convert_int16_sat_rtp(uchar16); int16 __ovld __cnfn convert_int16_rtn(uchar16); int16 __ovld __cnfn convert_int16_sat_rtn(uchar16); int16 __ovld __cnfn convert_int16(uchar16); int16 __ovld __cnfn convert_int16_sat(uchar16); int16 __ovld __cnfn convert_int16_rte(short16); int16 __ovld __cnfn convert_int16_sat_rte(short16); int16 __ovld __cnfn convert_int16_rtz(short16); int16 __ovld __cnfn convert_int16_sat_rtz(short16); int16 __ovld __cnfn convert_int16_rtp(short16); int16 __ovld __cnfn convert_int16_sat_rtp(short16); int16 __ovld __cnfn convert_int16_rtn(short16); int16 __ovld __cnfn convert_int16_sat_rtn(short16); int16 __ovld __cnfn convert_int16(short16); int16 __ovld __cnfn convert_int16_sat(short16); int16 __ovld __cnfn convert_int16_rte(ushort16); int16 __ovld __cnfn convert_int16_sat_rte(ushort16); int16 __ovld __cnfn convert_int16_rtz(ushort16); int16 __ovld __cnfn convert_int16_sat_rtz(ushort16); int16 __ovld __cnfn convert_int16_rtp(ushort16); int16 __ovld __cnfn convert_int16_sat_rtp(ushort16); int16 __ovld __cnfn convert_int16_rtn(ushort16); int16 __ovld __cnfn convert_int16_sat_rtn(ushort16); int16 __ovld __cnfn convert_int16(ushort16); int16 __ovld __cnfn convert_int16_sat(ushort16); int16 __ovld __cnfn convert_int16_rte(int16); int16 __ovld __cnfn convert_int16_sat_rte(int16); int16 __ovld __cnfn convert_int16_rtz(int16); int16 __ovld __cnfn convert_int16_sat_rtz(int16); int16 __ovld __cnfn convert_int16_rtp(int16); int16 __ovld __cnfn convert_int16_sat_rtp(int16); int16 __ovld __cnfn convert_int16_rtn(int16); int16 __ovld __cnfn convert_int16_sat_rtn(int16); int16 __ovld __cnfn convert_int16(int16); int16 __ovld __cnfn convert_int16_sat(int16); int16 __ovld __cnfn convert_int16_rte(uint16); int16 __ovld __cnfn convert_int16_sat_rte(uint16); int16 __ovld __cnfn convert_int16_rtz(uint16); int16 __ovld __cnfn convert_int16_sat_rtz(uint16); int16 __ovld __cnfn convert_int16_rtp(uint16); int16 __ovld __cnfn convert_int16_sat_rtp(uint16); int16 __ovld __cnfn convert_int16_rtn(uint16); int16 __ovld __cnfn convert_int16_sat_rtn(uint16); int16 __ovld __cnfn convert_int16(uint16); int16 __ovld __cnfn convert_int16_sat(uint16); int16 __ovld __cnfn convert_int16_rte(long16); int16 __ovld __cnfn convert_int16_sat_rte(long16); int16 __ovld __cnfn convert_int16_rtz(long16); int16 __ovld __cnfn convert_int16_sat_rtz(long16); int16 __ovld __cnfn convert_int16_rtp(long16); int16 __ovld __cnfn convert_int16_sat_rtp(long16); int16 __ovld __cnfn convert_int16_rtn(long16); int16 __ovld __cnfn convert_int16_sat_rtn(long16); int16 __ovld __cnfn convert_int16(long16); int16 __ovld __cnfn convert_int16_sat(long16); int16 __ovld __cnfn convert_int16_rte(ulong16); int16 __ovld __cnfn convert_int16_sat_rte(ulong16); int16 __ovld __cnfn convert_int16_rtz(ulong16); int16 __ovld __cnfn convert_int16_sat_rtz(ulong16); int16 __ovld __cnfn convert_int16_rtp(ulong16); int16 __ovld __cnfn convert_int16_sat_rtp(ulong16); int16 __ovld __cnfn convert_int16_rtn(ulong16); int16 __ovld __cnfn convert_int16_sat_rtn(ulong16); int16 __ovld __cnfn convert_int16(ulong16); int16 __ovld __cnfn convert_int16_sat(ulong16); int16 __ovld __cnfn convert_int16_rte(float16); int16 __ovld __cnfn convert_int16_sat_rte(float16); int16 __ovld __cnfn convert_int16_rtz(float16); int16 __ovld __cnfn convert_int16_sat_rtz(float16); int16 __ovld __cnfn convert_int16_rtp(float16); int16 __ovld __cnfn convert_int16_sat_rtp(float16); int16 __ovld __cnfn convert_int16_rtn(float16); int16 __ovld __cnfn convert_int16_sat_rtn(float16); int16 __ovld __cnfn convert_int16(float16); int16 __ovld __cnfn convert_int16_sat(float16); uint16 __ovld __cnfn convert_uint16_rte(char16); uint16 __ovld __cnfn convert_uint16_sat_rte(char16); uint16 __ovld __cnfn convert_uint16_rtz(char16); uint16 __ovld __cnfn convert_uint16_sat_rtz(char16); uint16 __ovld __cnfn convert_uint16_rtp(char16); uint16 __ovld __cnfn convert_uint16_sat_rtp(char16); uint16 __ovld __cnfn convert_uint16_rtn(char16); uint16 __ovld __cnfn convert_uint16_sat_rtn(char16); uint16 __ovld __cnfn convert_uint16(char16); uint16 __ovld __cnfn convert_uint16_sat(char16); uint16 __ovld __cnfn convert_uint16_rte(uchar16); uint16 __ovld __cnfn convert_uint16_sat_rte(uchar16); uint16 __ovld __cnfn convert_uint16_rtz(uchar16); uint16 __ovld __cnfn convert_uint16_sat_rtz(uchar16); uint16 __ovld __cnfn convert_uint16_rtp(uchar16); uint16 __ovld __cnfn convert_uint16_sat_rtp(uchar16); uint16 __ovld __cnfn convert_uint16_rtn(uchar16); uint16 __ovld __cnfn convert_uint16_sat_rtn(uchar16); uint16 __ovld __cnfn convert_uint16(uchar16); uint16 __ovld __cnfn convert_uint16_sat(uchar16); uint16 __ovld __cnfn convert_uint16_rte(short16); uint16 __ovld __cnfn convert_uint16_sat_rte(short16); uint16 __ovld __cnfn convert_uint16_rtz(short16); uint16 __ovld __cnfn convert_uint16_sat_rtz(short16); uint16 __ovld __cnfn convert_uint16_rtp(short16); uint16 __ovld __cnfn convert_uint16_sat_rtp(short16); uint16 __ovld __cnfn convert_uint16_rtn(short16); uint16 __ovld __cnfn convert_uint16_sat_rtn(short16); uint16 __ovld __cnfn convert_uint16(short16); uint16 __ovld __cnfn convert_uint16_sat(short16); uint16 __ovld __cnfn convert_uint16_rte(ushort16); uint16 __ovld __cnfn convert_uint16_sat_rte(ushort16); uint16 __ovld __cnfn convert_uint16_rtz(ushort16); uint16 __ovld __cnfn convert_uint16_sat_rtz(ushort16); uint16 __ovld __cnfn convert_uint16_rtp(ushort16); uint16 __ovld __cnfn convert_uint16_sat_rtp(ushort16); uint16 __ovld __cnfn convert_uint16_rtn(ushort16); uint16 __ovld __cnfn convert_uint16_sat_rtn(ushort16); uint16 __ovld __cnfn convert_uint16(ushort16); uint16 __ovld __cnfn convert_uint16_sat(ushort16); uint16 __ovld __cnfn convert_uint16_rte(int16); uint16 __ovld __cnfn convert_uint16_sat_rte(int16); uint16 __ovld __cnfn convert_uint16_rtz(int16); uint16 __ovld __cnfn convert_uint16_sat_rtz(int16); uint16 __ovld __cnfn convert_uint16_rtp(int16); uint16 __ovld __cnfn convert_uint16_sat_rtp(int16); uint16 __ovld __cnfn convert_uint16_rtn(int16); uint16 __ovld __cnfn convert_uint16_sat_rtn(int16); uint16 __ovld __cnfn convert_uint16(int16); uint16 __ovld __cnfn convert_uint16_sat(int16); uint16 __ovld __cnfn convert_uint16_rte(uint16); uint16 __ovld __cnfn convert_uint16_sat_rte(uint16); uint16 __ovld __cnfn convert_uint16_rtz(uint16); uint16 __ovld __cnfn convert_uint16_sat_rtz(uint16); uint16 __ovld __cnfn convert_uint16_rtp(uint16); uint16 __ovld __cnfn convert_uint16_sat_rtp(uint16); uint16 __ovld __cnfn convert_uint16_rtn(uint16); uint16 __ovld __cnfn convert_uint16_sat_rtn(uint16); uint16 __ovld __cnfn convert_uint16(uint16); uint16 __ovld __cnfn convert_uint16_sat(uint16); uint16 __ovld __cnfn convert_uint16_rte(long16); uint16 __ovld __cnfn convert_uint16_sat_rte(long16); uint16 __ovld __cnfn convert_uint16_rtz(long16); uint16 __ovld __cnfn convert_uint16_sat_rtz(long16); uint16 __ovld __cnfn convert_uint16_rtp(long16); uint16 __ovld __cnfn convert_uint16_sat_rtp(long16); uint16 __ovld __cnfn convert_uint16_rtn(long16); uint16 __ovld __cnfn convert_uint16_sat_rtn(long16); uint16 __ovld __cnfn convert_uint16(long16); uint16 __ovld __cnfn convert_uint16_sat(long16); uint16 __ovld __cnfn convert_uint16_rte(ulong16); uint16 __ovld __cnfn convert_uint16_sat_rte(ulong16); uint16 __ovld __cnfn convert_uint16_rtz(ulong16); uint16 __ovld __cnfn convert_uint16_sat_rtz(ulong16); uint16 __ovld __cnfn convert_uint16_rtp(ulong16); uint16 __ovld __cnfn convert_uint16_sat_rtp(ulong16); uint16 __ovld __cnfn convert_uint16_rtn(ulong16); uint16 __ovld __cnfn convert_uint16_sat_rtn(ulong16); uint16 __ovld __cnfn convert_uint16(ulong16); uint16 __ovld __cnfn convert_uint16_sat(ulong16); uint16 __ovld __cnfn convert_uint16_rte(float16); uint16 __ovld __cnfn convert_uint16_sat_rte(float16); uint16 __ovld __cnfn convert_uint16_rtz(float16); uint16 __ovld __cnfn convert_uint16_sat_rtz(float16); uint16 __ovld __cnfn convert_uint16_rtp(float16); uint16 __ovld __cnfn convert_uint16_sat_rtp(float16); uint16 __ovld __cnfn convert_uint16_rtn(float16); uint16 __ovld __cnfn convert_uint16_sat_rtn(float16); uint16 __ovld __cnfn convert_uint16(float16); uint16 __ovld __cnfn convert_uint16_sat(float16); long16 __ovld __cnfn convert_long16_rte(char16); long16 __ovld __cnfn convert_long16_sat_rte(char16); long16 __ovld __cnfn convert_long16_rtz(char16); long16 __ovld __cnfn convert_long16_sat_rtz(char16); long16 __ovld __cnfn convert_long16_rtp(char16); long16 __ovld __cnfn convert_long16_sat_rtp(char16); long16 __ovld __cnfn convert_long16_rtn(char16); long16 __ovld __cnfn convert_long16_sat_rtn(char16); long16 __ovld __cnfn convert_long16(char16); long16 __ovld __cnfn convert_long16_sat(char16); long16 __ovld __cnfn convert_long16_rte(uchar16); long16 __ovld __cnfn convert_long16_sat_rte(uchar16); long16 __ovld __cnfn convert_long16_rtz(uchar16); long16 __ovld __cnfn convert_long16_sat_rtz(uchar16); long16 __ovld __cnfn convert_long16_rtp(uchar16); long16 __ovld __cnfn convert_long16_sat_rtp(uchar16); long16 __ovld __cnfn convert_long16_rtn(uchar16); long16 __ovld __cnfn convert_long16_sat_rtn(uchar16); long16 __ovld __cnfn convert_long16(uchar16); long16 __ovld __cnfn convert_long16_sat(uchar16); long16 __ovld __cnfn convert_long16_rte(short16); long16 __ovld __cnfn convert_long16_sat_rte(short16); long16 __ovld __cnfn convert_long16_rtz(short16); long16 __ovld __cnfn convert_long16_sat_rtz(short16); long16 __ovld __cnfn convert_long16_rtp(short16); long16 __ovld __cnfn convert_long16_sat_rtp(short16); long16 __ovld __cnfn convert_long16_rtn(short16); long16 __ovld __cnfn convert_long16_sat_rtn(short16); long16 __ovld __cnfn convert_long16(short16); long16 __ovld __cnfn convert_long16_sat(short16); long16 __ovld __cnfn convert_long16_rte(ushort16); long16 __ovld __cnfn convert_long16_sat_rte(ushort16); long16 __ovld __cnfn convert_long16_rtz(ushort16); long16 __ovld __cnfn convert_long16_sat_rtz(ushort16); long16 __ovld __cnfn convert_long16_rtp(ushort16); long16 __ovld __cnfn convert_long16_sat_rtp(ushort16); long16 __ovld __cnfn convert_long16_rtn(ushort16); long16 __ovld __cnfn convert_long16_sat_rtn(ushort16); long16 __ovld __cnfn convert_long16(ushort16); long16 __ovld __cnfn convert_long16_sat(ushort16); long16 __ovld __cnfn convert_long16_rte(int16); long16 __ovld __cnfn convert_long16_sat_rte(int16); long16 __ovld __cnfn convert_long16_rtz(int16); long16 __ovld __cnfn convert_long16_sat_rtz(int16); long16 __ovld __cnfn convert_long16_rtp(int16); long16 __ovld __cnfn convert_long16_sat_rtp(int16); long16 __ovld __cnfn convert_long16_rtn(int16); long16 __ovld __cnfn convert_long16_sat_rtn(int16); long16 __ovld __cnfn convert_long16(int16); long16 __ovld __cnfn convert_long16_sat(int16); long16 __ovld __cnfn convert_long16_rte(uint16); long16 __ovld __cnfn convert_long16_sat_rte(uint16); long16 __ovld __cnfn convert_long16_rtz(uint16); long16 __ovld __cnfn convert_long16_sat_rtz(uint16); long16 __ovld __cnfn convert_long16_rtp(uint16); long16 __ovld __cnfn convert_long16_sat_rtp(uint16); long16 __ovld __cnfn convert_long16_rtn(uint16); long16 __ovld __cnfn convert_long16_sat_rtn(uint16); long16 __ovld __cnfn convert_long16(uint16); long16 __ovld __cnfn convert_long16_sat(uint16); long16 __ovld __cnfn convert_long16_rte(long16); long16 __ovld __cnfn convert_long16_sat_rte(long16); long16 __ovld __cnfn convert_long16_rtz(long16); long16 __ovld __cnfn convert_long16_sat_rtz(long16); long16 __ovld __cnfn convert_long16_rtp(long16); long16 __ovld __cnfn convert_long16_sat_rtp(long16); long16 __ovld __cnfn convert_long16_rtn(long16); long16 __ovld __cnfn convert_long16_sat_rtn(long16); long16 __ovld __cnfn convert_long16(long16); long16 __ovld __cnfn convert_long16_sat(long16); long16 __ovld __cnfn convert_long16_rte(ulong16); long16 __ovld __cnfn convert_long16_sat_rte(ulong16); long16 __ovld __cnfn convert_long16_rtz(ulong16); long16 __ovld __cnfn convert_long16_sat_rtz(ulong16); long16 __ovld __cnfn convert_long16_rtp(ulong16); long16 __ovld __cnfn convert_long16_sat_rtp(ulong16); long16 __ovld __cnfn convert_long16_rtn(ulong16); long16 __ovld __cnfn convert_long16_sat_rtn(ulong16); long16 __ovld __cnfn convert_long16(ulong16); long16 __ovld __cnfn convert_long16_sat(ulong16); long16 __ovld __cnfn convert_long16_rte(float16); long16 __ovld __cnfn convert_long16_sat_rte(float16); long16 __ovld __cnfn convert_long16_rtz(float16); long16 __ovld __cnfn convert_long16_sat_rtz(float16); long16 __ovld __cnfn convert_long16_rtp(float16); long16 __ovld __cnfn convert_long16_sat_rtp(float16); long16 __ovld __cnfn convert_long16_rtn(float16); long16 __ovld __cnfn convert_long16_sat_rtn(float16); long16 __ovld __cnfn convert_long16(float16); long16 __ovld __cnfn convert_long16_sat(float16); ulong16 __ovld __cnfn convert_ulong16_rte(char16); ulong16 __ovld __cnfn convert_ulong16_sat_rte(char16); ulong16 __ovld __cnfn convert_ulong16_rtz(char16); ulong16 __ovld __cnfn convert_ulong16_sat_rtz(char16); ulong16 __ovld __cnfn convert_ulong16_rtp(char16); ulong16 __ovld __cnfn convert_ulong16_sat_rtp(char16); ulong16 __ovld __cnfn convert_ulong16_rtn(char16); ulong16 __ovld __cnfn convert_ulong16_sat_rtn(char16); ulong16 __ovld __cnfn convert_ulong16(char16); ulong16 __ovld __cnfn convert_ulong16_sat(char16); ulong16 __ovld __cnfn convert_ulong16_rte(uchar16); ulong16 __ovld __cnfn convert_ulong16_sat_rte(uchar16); ulong16 __ovld __cnfn convert_ulong16_rtz(uchar16); ulong16 __ovld __cnfn convert_ulong16_sat_rtz(uchar16); ulong16 __ovld __cnfn convert_ulong16_rtp(uchar16); ulong16 __ovld __cnfn convert_ulong16_sat_rtp(uchar16); ulong16 __ovld __cnfn convert_ulong16_rtn(uchar16); ulong16 __ovld __cnfn convert_ulong16_sat_rtn(uchar16); ulong16 __ovld __cnfn convert_ulong16(uchar16); ulong16 __ovld __cnfn convert_ulong16_sat(uchar16); ulong16 __ovld __cnfn convert_ulong16_rte(short16); ulong16 __ovld __cnfn convert_ulong16_sat_rte(short16); ulong16 __ovld __cnfn convert_ulong16_rtz(short16); ulong16 __ovld __cnfn convert_ulong16_sat_rtz(short16); ulong16 __ovld __cnfn convert_ulong16_rtp(short16); ulong16 __ovld __cnfn convert_ulong16_sat_rtp(short16); ulong16 __ovld __cnfn convert_ulong16_rtn(short16); ulong16 __ovld __cnfn convert_ulong16_sat_rtn(short16); ulong16 __ovld __cnfn convert_ulong16(short16); ulong16 __ovld __cnfn convert_ulong16_sat(short16); ulong16 __ovld __cnfn convert_ulong16_rte(ushort16); ulong16 __ovld __cnfn convert_ulong16_sat_rte(ushort16); ulong16 __ovld __cnfn convert_ulong16_rtz(ushort16); ulong16 __ovld __cnfn convert_ulong16_sat_rtz(ushort16); ulong16 __ovld __cnfn convert_ulong16_rtp(ushort16); ulong16 __ovld __cnfn convert_ulong16_sat_rtp(ushort16); ulong16 __ovld __cnfn convert_ulong16_rtn(ushort16); ulong16 __ovld __cnfn convert_ulong16_sat_rtn(ushort16); ulong16 __ovld __cnfn convert_ulong16(ushort16); ulong16 __ovld __cnfn convert_ulong16_sat(ushort16); ulong16 __ovld __cnfn convert_ulong16_rte(int16); ulong16 __ovld __cnfn convert_ulong16_sat_rte(int16); ulong16 __ovld __cnfn convert_ulong16_rtz(int16); ulong16 __ovld __cnfn convert_ulong16_sat_rtz(int16); ulong16 __ovld __cnfn convert_ulong16_rtp(int16); ulong16 __ovld __cnfn convert_ulong16_sat_rtp(int16); ulong16 __ovld __cnfn convert_ulong16_rtn(int16); ulong16 __ovld __cnfn convert_ulong16_sat_rtn(int16); ulong16 __ovld __cnfn convert_ulong16(int16); ulong16 __ovld __cnfn convert_ulong16_sat(int16); ulong16 __ovld __cnfn convert_ulong16_rte(uint16); ulong16 __ovld __cnfn convert_ulong16_sat_rte(uint16); ulong16 __ovld __cnfn convert_ulong16_rtz(uint16); ulong16 __ovld __cnfn convert_ulong16_sat_rtz(uint16); ulong16 __ovld __cnfn convert_ulong16_rtp(uint16); ulong16 __ovld __cnfn convert_ulong16_sat_rtp(uint16); ulong16 __ovld __cnfn convert_ulong16_rtn(uint16); ulong16 __ovld __cnfn convert_ulong16_sat_rtn(uint16); ulong16 __ovld __cnfn convert_ulong16(uint16); ulong16 __ovld __cnfn convert_ulong16_sat(uint16); ulong16 __ovld __cnfn convert_ulong16_rte(long16); ulong16 __ovld __cnfn convert_ulong16_sat_rte(long16); ulong16 __ovld __cnfn convert_ulong16_rtz(long16); ulong16 __ovld __cnfn convert_ulong16_sat_rtz(long16); ulong16 __ovld __cnfn convert_ulong16_rtp(long16); ulong16 __ovld __cnfn convert_ulong16_sat_rtp(long16); ulong16 __ovld __cnfn convert_ulong16_rtn(long16); ulong16 __ovld __cnfn convert_ulong16_sat_rtn(long16); ulong16 __ovld __cnfn convert_ulong16(long16); ulong16 __ovld __cnfn convert_ulong16_sat(long16); ulong16 __ovld __cnfn convert_ulong16_rte(ulong16); ulong16 __ovld __cnfn convert_ulong16_sat_rte(ulong16); ulong16 __ovld __cnfn convert_ulong16_rtz(ulong16); ulong16 __ovld __cnfn convert_ulong16_sat_rtz(ulong16); ulong16 __ovld __cnfn convert_ulong16_rtp(ulong16); ulong16 __ovld __cnfn convert_ulong16_sat_rtp(ulong16); ulong16 __ovld __cnfn convert_ulong16_rtn(ulong16); ulong16 __ovld __cnfn convert_ulong16_sat_rtn(ulong16); ulong16 __ovld __cnfn convert_ulong16(ulong16); ulong16 __ovld __cnfn convert_ulong16_sat(ulong16); ulong16 __ovld __cnfn convert_ulong16_rte(float16); ulong16 __ovld __cnfn convert_ulong16_sat_rte(float16); ulong16 __ovld __cnfn convert_ulong16_rtz(float16); ulong16 __ovld __cnfn convert_ulong16_sat_rtz(float16); ulong16 __ovld __cnfn convert_ulong16_rtp(float16); ulong16 __ovld __cnfn convert_ulong16_sat_rtp(float16); ulong16 __ovld __cnfn convert_ulong16_rtn(float16); ulong16 __ovld __cnfn convert_ulong16_sat_rtn(float16); ulong16 __ovld __cnfn convert_ulong16(float16); ulong16 __ovld __cnfn convert_ulong16_sat(float16); float16 __ovld __cnfn convert_float16_rte(char16); float16 __ovld __cnfn convert_float16_rtz(char16); float16 __ovld __cnfn convert_float16_rtp(char16); float16 __ovld __cnfn convert_float16_rtn(char16); float16 __ovld __cnfn convert_float16(char16); float16 __ovld __cnfn convert_float16_rte(uchar16); float16 __ovld __cnfn convert_float16_rtz(uchar16); float16 __ovld __cnfn convert_float16_rtp(uchar16); float16 __ovld __cnfn convert_float16_rtn(uchar16); float16 __ovld __cnfn convert_float16(uchar16); float16 __ovld __cnfn convert_float16_rte(short16); float16 __ovld __cnfn convert_float16_rtz(short16); float16 __ovld __cnfn convert_float16_rtp(short16); float16 __ovld __cnfn convert_float16_rtn(short16); float16 __ovld __cnfn convert_float16(short16); float16 __ovld __cnfn convert_float16_rte(ushort16); float16 __ovld __cnfn convert_float16_rtz(ushort16); float16 __ovld __cnfn convert_float16_rtp(ushort16); float16 __ovld __cnfn convert_float16_rtn(ushort16); float16 __ovld __cnfn convert_float16(ushort16); float16 __ovld __cnfn convert_float16_rte(int16); float16 __ovld __cnfn convert_float16_rtz(int16); float16 __ovld __cnfn convert_float16_rtp(int16); float16 __ovld __cnfn convert_float16_rtn(int16); float16 __ovld __cnfn convert_float16(int16); float16 __ovld __cnfn convert_float16_rte(uint16); float16 __ovld __cnfn convert_float16_rtz(uint16); float16 __ovld __cnfn convert_float16_rtp(uint16); float16 __ovld __cnfn convert_float16_rtn(uint16); float16 __ovld __cnfn convert_float16(uint16); float16 __ovld __cnfn convert_float16_rte(long16); float16 __ovld __cnfn convert_float16_rtz(long16); float16 __ovld __cnfn convert_float16_rtp(long16); float16 __ovld __cnfn convert_float16_rtn(long16); float16 __ovld __cnfn convert_float16(long16); float16 __ovld __cnfn convert_float16_rte(ulong16); float16 __ovld __cnfn convert_float16_rtz(ulong16); float16 __ovld __cnfn convert_float16_rtp(ulong16); float16 __ovld __cnfn convert_float16_rtn(ulong16); float16 __ovld __cnfn convert_float16(ulong16); float16 __ovld __cnfn convert_float16_rte(float16); float16 __ovld __cnfn convert_float16_rtz(float16); float16 __ovld __cnfn convert_float16_rtp(float16); float16 __ovld __cnfn convert_float16_rtn(float16); float16 __ovld __cnfn convert_float16(float16); // Conversions with double data type parameters or return value. #ifdef cl_khr_fp64 #pragma OPENCL EXTENSION cl_khr_fp64 : enable char __ovld __cnfn convert_char(double); char __ovld __cnfn convert_char_rte(double); char __ovld __cnfn convert_char_rtn(double); char __ovld __cnfn convert_char_rtp(double); char __ovld __cnfn convert_char_rtz(double); char __ovld __cnfn convert_char_sat(double); char __ovld __cnfn convert_char_sat_rte(double); char __ovld __cnfn convert_char_sat_rtn(double); char __ovld __cnfn convert_char_sat_rtp(double); char __ovld __cnfn convert_char_sat_rtz(double); char2 __ovld __cnfn convert_char2(double2); char2 __ovld __cnfn convert_char2_rte(double2); char2 __ovld __cnfn convert_char2_rtn(double2); char2 __ovld __cnfn convert_char2_rtp(double2); char2 __ovld __cnfn convert_char2_rtz(double2); char2 __ovld __cnfn convert_char2_sat(double2); char2 __ovld __cnfn convert_char2_sat_rte(double2); char2 __ovld __cnfn convert_char2_sat_rtn(double2); char2 __ovld __cnfn convert_char2_sat_rtp(double2); char2 __ovld __cnfn convert_char2_sat_rtz(double2); char3 __ovld __cnfn convert_char3(double3); char3 __ovld __cnfn convert_char3_rte(double3); char3 __ovld __cnfn convert_char3_rtn(double3); char3 __ovld __cnfn convert_char3_rtp(double3); char3 __ovld __cnfn convert_char3_rtz(double3); char3 __ovld __cnfn convert_char3_sat(double3); char3 __ovld __cnfn convert_char3_sat_rte(double3); char3 __ovld __cnfn convert_char3_sat_rtn(double3); char3 __ovld __cnfn convert_char3_sat_rtp(double3); char3 __ovld __cnfn convert_char3_sat_rtz(double3); char4 __ovld __cnfn convert_char4(double4); char4 __ovld __cnfn convert_char4_rte(double4); char4 __ovld __cnfn convert_char4_rtn(double4); char4 __ovld __cnfn convert_char4_rtp(double4); char4 __ovld __cnfn convert_char4_rtz(double4); char4 __ovld __cnfn convert_char4_sat(double4); char4 __ovld __cnfn convert_char4_sat_rte(double4); char4 __ovld __cnfn convert_char4_sat_rtn(double4); char4 __ovld __cnfn convert_char4_sat_rtp(double4); char4 __ovld __cnfn convert_char4_sat_rtz(double4); char8 __ovld __cnfn convert_char8(double8); char8 __ovld __cnfn convert_char8_rte(double8); char8 __ovld __cnfn convert_char8_rtn(double8); char8 __ovld __cnfn convert_char8_rtp(double8); char8 __ovld __cnfn convert_char8_rtz(double8); char8 __ovld __cnfn convert_char8_sat(double8); char8 __ovld __cnfn convert_char8_sat_rte(double8); char8 __ovld __cnfn convert_char8_sat_rtn(double8); char8 __ovld __cnfn convert_char8_sat_rtp(double8); char8 __ovld __cnfn convert_char8_sat_rtz(double8); char16 __ovld __cnfn convert_char16(double16); char16 __ovld __cnfn convert_char16_rte(double16); char16 __ovld __cnfn convert_char16_rtn(double16); char16 __ovld __cnfn convert_char16_rtp(double16); char16 __ovld __cnfn convert_char16_rtz(double16); char16 __ovld __cnfn convert_char16_sat(double16); char16 __ovld __cnfn convert_char16_sat_rte(double16); char16 __ovld __cnfn convert_char16_sat_rtn(double16); char16 __ovld __cnfn convert_char16_sat_rtp(double16); char16 __ovld __cnfn convert_char16_sat_rtz(double16); uchar __ovld __cnfn convert_uchar(double); uchar __ovld __cnfn convert_uchar_rte(double); uchar __ovld __cnfn convert_uchar_rtn(double); uchar __ovld __cnfn convert_uchar_rtp(double); uchar __ovld __cnfn convert_uchar_rtz(double); uchar __ovld __cnfn convert_uchar_sat(double); uchar __ovld __cnfn convert_uchar_sat_rte(double); uchar __ovld __cnfn convert_uchar_sat_rtn(double); uchar __ovld __cnfn convert_uchar_sat_rtp(double); uchar __ovld __cnfn convert_uchar_sat_rtz(double); uchar2 __ovld __cnfn convert_uchar2(double2); uchar2 __ovld __cnfn convert_uchar2_rte(double2); uchar2 __ovld __cnfn convert_uchar2_rtn(double2); uchar2 __ovld __cnfn convert_uchar2_rtp(double2); uchar2 __ovld __cnfn convert_uchar2_rtz(double2); uchar2 __ovld __cnfn convert_uchar2_sat(double2); uchar2 __ovld __cnfn convert_uchar2_sat_rte(double2); uchar2 __ovld __cnfn convert_uchar2_sat_rtn(double2); uchar2 __ovld __cnfn convert_uchar2_sat_rtp(double2); uchar2 __ovld __cnfn convert_uchar2_sat_rtz(double2); uchar3 __ovld __cnfn convert_uchar3(double3); uchar3 __ovld __cnfn convert_uchar3_rte(double3); uchar3 __ovld __cnfn convert_uchar3_rtn(double3); uchar3 __ovld __cnfn convert_uchar3_rtp(double3); uchar3 __ovld __cnfn convert_uchar3_rtz(double3); uchar3 __ovld __cnfn convert_uchar3_sat(double3); uchar3 __ovld __cnfn convert_uchar3_sat_rte(double3); uchar3 __ovld __cnfn convert_uchar3_sat_rtn(double3); uchar3 __ovld __cnfn convert_uchar3_sat_rtp(double3); uchar3 __ovld __cnfn convert_uchar3_sat_rtz(double3); uchar4 __ovld __cnfn convert_uchar4(double4); uchar4 __ovld __cnfn convert_uchar4_rte(double4); uchar4 __ovld __cnfn convert_uchar4_rtn(double4); uchar4 __ovld __cnfn convert_uchar4_rtp(double4); uchar4 __ovld __cnfn convert_uchar4_rtz(double4); uchar4 __ovld __cnfn convert_uchar4_sat(double4); uchar4 __ovld __cnfn convert_uchar4_sat_rte(double4); uchar4 __ovld __cnfn convert_uchar4_sat_rtn(double4); uchar4 __ovld __cnfn convert_uchar4_sat_rtp(double4); uchar4 __ovld __cnfn convert_uchar4_sat_rtz(double4); uchar8 __ovld __cnfn convert_uchar8(double8); uchar8 __ovld __cnfn convert_uchar8_rte(double8); uchar8 __ovld __cnfn convert_uchar8_rtn(double8); uchar8 __ovld __cnfn convert_uchar8_rtp(double8); uchar8 __ovld __cnfn convert_uchar8_rtz(double8); uchar8 __ovld __cnfn convert_uchar8_sat(double8); uchar8 __ovld __cnfn convert_uchar8_sat_rte(double8); uchar8 __ovld __cnfn convert_uchar8_sat_rtn(double8); uchar8 __ovld __cnfn convert_uchar8_sat_rtp(double8); uchar8 __ovld __cnfn convert_uchar8_sat_rtz(double8); uchar16 __ovld __cnfn convert_uchar16(double16); uchar16 __ovld __cnfn convert_uchar16_rte(double16); uchar16 __ovld __cnfn convert_uchar16_rtn(double16); uchar16 __ovld __cnfn convert_uchar16_rtp(double16); uchar16 __ovld __cnfn convert_uchar16_rtz(double16); uchar16 __ovld __cnfn convert_uchar16_sat(double16); uchar16 __ovld __cnfn convert_uchar16_sat_rte(double16); uchar16 __ovld __cnfn convert_uchar16_sat_rtn(double16); uchar16 __ovld __cnfn convert_uchar16_sat_rtp(double16); uchar16 __ovld __cnfn convert_uchar16_sat_rtz(double16); short __ovld __cnfn convert_short(double); short __ovld __cnfn convert_short_rte(double); short __ovld __cnfn convert_short_rtn(double); short __ovld __cnfn convert_short_rtp(double); short __ovld __cnfn convert_short_rtz(double); short __ovld __cnfn convert_short_sat(double); short __ovld __cnfn convert_short_sat_rte(double); short __ovld __cnfn convert_short_sat_rtn(double); short __ovld __cnfn convert_short_sat_rtp(double); short __ovld __cnfn convert_short_sat_rtz(double); short2 __ovld __cnfn convert_short2(double2); short2 __ovld __cnfn convert_short2_rte(double2); short2 __ovld __cnfn convert_short2_rtn(double2); short2 __ovld __cnfn convert_short2_rtp(double2); short2 __ovld __cnfn convert_short2_rtz(double2); short2 __ovld __cnfn convert_short2_sat(double2); short2 __ovld __cnfn convert_short2_sat_rte(double2); short2 __ovld __cnfn convert_short2_sat_rtn(double2); short2 __ovld __cnfn convert_short2_sat_rtp(double2); short2 __ovld __cnfn convert_short2_sat_rtz(double2); short3 __ovld __cnfn convert_short3(double3); short3 __ovld __cnfn convert_short3_rte(double3); short3 __ovld __cnfn convert_short3_rtn(double3); short3 __ovld __cnfn convert_short3_rtp(double3); short3 __ovld __cnfn convert_short3_rtz(double3); short3 __ovld __cnfn convert_short3_sat(double3); short3 __ovld __cnfn convert_short3_sat_rte(double3); short3 __ovld __cnfn convert_short3_sat_rtn(double3); short3 __ovld __cnfn convert_short3_sat_rtp(double3); short3 __ovld __cnfn convert_short3_sat_rtz(double3); short4 __ovld __cnfn convert_short4(double4); short4 __ovld __cnfn convert_short4_rte(double4); short4 __ovld __cnfn convert_short4_rtn(double4); short4 __ovld __cnfn convert_short4_rtp(double4); short4 __ovld __cnfn convert_short4_rtz(double4); short4 __ovld __cnfn convert_short4_sat(double4); short4 __ovld __cnfn convert_short4_sat_rte(double4); short4 __ovld __cnfn convert_short4_sat_rtn(double4); short4 __ovld __cnfn convert_short4_sat_rtp(double4); short4 __ovld __cnfn convert_short4_sat_rtz(double4); short8 __ovld __cnfn convert_short8(double8); short8 __ovld __cnfn convert_short8_rte(double8); short8 __ovld __cnfn convert_short8_rtn(double8); short8 __ovld __cnfn convert_short8_rtp(double8); short8 __ovld __cnfn convert_short8_rtz(double8); short8 __ovld __cnfn convert_short8_sat(double8); short8 __ovld __cnfn convert_short8_sat_rte(double8); short8 __ovld __cnfn convert_short8_sat_rtn(double8); short8 __ovld __cnfn convert_short8_sat_rtp(double8); short8 __ovld __cnfn convert_short8_sat_rtz(double8); short16 __ovld __cnfn convert_short16(double16); short16 __ovld __cnfn convert_short16_rte(double16); short16 __ovld __cnfn convert_short16_rtn(double16); short16 __ovld __cnfn convert_short16_rtp(double16); short16 __ovld __cnfn convert_short16_rtz(double16); short16 __ovld __cnfn convert_short16_sat(double16); short16 __ovld __cnfn convert_short16_sat_rte(double16); short16 __ovld __cnfn convert_short16_sat_rtn(double16); short16 __ovld __cnfn convert_short16_sat_rtp(double16); short16 __ovld __cnfn convert_short16_sat_rtz(double16); ushort __ovld __cnfn convert_ushort(double); ushort __ovld __cnfn convert_ushort_rte(double); ushort __ovld __cnfn convert_ushort_rtn(double); ushort __ovld __cnfn convert_ushort_rtp(double); ushort __ovld __cnfn convert_ushort_rtz(double); ushort __ovld __cnfn convert_ushort_sat(double); ushort __ovld __cnfn convert_ushort_sat_rte(double); ushort __ovld __cnfn convert_ushort_sat_rtn(double); ushort __ovld __cnfn convert_ushort_sat_rtp(double); ushort __ovld __cnfn convert_ushort_sat_rtz(double); ushort2 __ovld __cnfn convert_ushort2(double2); ushort2 __ovld __cnfn convert_ushort2_rte(double2); ushort2 __ovld __cnfn convert_ushort2_rtn(double2); ushort2 __ovld __cnfn convert_ushort2_rtp(double2); ushort2 __ovld __cnfn convert_ushort2_rtz(double2); ushort2 __ovld __cnfn convert_ushort2_sat(double2); ushort2 __ovld __cnfn convert_ushort2_sat_rte(double2); ushort2 __ovld __cnfn convert_ushort2_sat_rtn(double2); ushort2 __ovld __cnfn convert_ushort2_sat_rtp(double2); ushort2 __ovld __cnfn convert_ushort2_sat_rtz(double2); ushort3 __ovld __cnfn convert_ushort3(double3); ushort3 __ovld __cnfn convert_ushort3_rte(double3); ushort3 __ovld __cnfn convert_ushort3_rtn(double3); ushort3 __ovld __cnfn convert_ushort3_rtp(double3); ushort3 __ovld __cnfn convert_ushort3_rtz(double3); ushort3 __ovld __cnfn convert_ushort3_sat(double3); ushort3 __ovld __cnfn convert_ushort3_sat_rte(double3); ushort3 __ovld __cnfn convert_ushort3_sat_rtn(double3); ushort3 __ovld __cnfn convert_ushort3_sat_rtp(double3); ushort3 __ovld __cnfn convert_ushort3_sat_rtz(double3); ushort4 __ovld __cnfn convert_ushort4(double4); ushort4 __ovld __cnfn convert_ushort4_rte(double4); ushort4 __ovld __cnfn convert_ushort4_rtn(double4); ushort4 __ovld __cnfn convert_ushort4_rtp(double4); ushort4 __ovld __cnfn convert_ushort4_rtz(double4); ushort4 __ovld __cnfn convert_ushort4_sat(double4); ushort4 __ovld __cnfn convert_ushort4_sat_rte(double4); ushort4 __ovld __cnfn convert_ushort4_sat_rtn(double4); ushort4 __ovld __cnfn convert_ushort4_sat_rtp(double4); ushort4 __ovld __cnfn convert_ushort4_sat_rtz(double4); ushort8 __ovld __cnfn convert_ushort8(double8); ushort8 __ovld __cnfn convert_ushort8_rte(double8); ushort8 __ovld __cnfn convert_ushort8_rtn(double8); ushort8 __ovld __cnfn convert_ushort8_rtp(double8); ushort8 __ovld __cnfn convert_ushort8_rtz(double8); ushort8 __ovld __cnfn convert_ushort8_sat(double8); ushort8 __ovld __cnfn convert_ushort8_sat_rte(double8); ushort8 __ovld __cnfn convert_ushort8_sat_rtn(double8); ushort8 __ovld __cnfn convert_ushort8_sat_rtp(double8); ushort8 __ovld __cnfn convert_ushort8_sat_rtz(double8); ushort16 __ovld __cnfn convert_ushort16(double16); ushort16 __ovld __cnfn convert_ushort16_rte(double16); ushort16 __ovld __cnfn convert_ushort16_rtn(double16); ushort16 __ovld __cnfn convert_ushort16_rtp(double16); ushort16 __ovld __cnfn convert_ushort16_rtz(double16); ushort16 __ovld __cnfn convert_ushort16_sat(double16); ushort16 __ovld __cnfn convert_ushort16_sat_rte(double16); ushort16 __ovld __cnfn convert_ushort16_sat_rtn(double16); ushort16 __ovld __cnfn convert_ushort16_sat_rtp(double16); ushort16 __ovld __cnfn convert_ushort16_sat_rtz(double16); int __ovld __cnfn convert_int(double); int __ovld __cnfn convert_int_rte(double); int __ovld __cnfn convert_int_rtn(double); int __ovld __cnfn convert_int_rtp(double); int __ovld __cnfn convert_int_rtz(double); int __ovld __cnfn convert_int_sat(double); int __ovld __cnfn convert_int_sat_rte(double); int __ovld __cnfn convert_int_sat_rtn(double); int __ovld __cnfn convert_int_sat_rtp(double); int __ovld __cnfn convert_int_sat_rtz(double); int2 __ovld __cnfn convert_int2(double2); int2 __ovld __cnfn convert_int2_rte(double2); int2 __ovld __cnfn convert_int2_rtn(double2); int2 __ovld __cnfn convert_int2_rtp(double2); int2 __ovld __cnfn convert_int2_rtz(double2); int2 __ovld __cnfn convert_int2_sat(double2); int2 __ovld __cnfn convert_int2_sat_rte(double2); int2 __ovld __cnfn convert_int2_sat_rtn(double2); int2 __ovld __cnfn convert_int2_sat_rtp(double2); int2 __ovld __cnfn convert_int2_sat_rtz(double2); int3 __ovld __cnfn convert_int3(double3); int3 __ovld __cnfn convert_int3_rte(double3); int3 __ovld __cnfn convert_int3_rtn(double3); int3 __ovld __cnfn convert_int3_rtp(double3); int3 __ovld __cnfn convert_int3_rtz(double3); int3 __ovld __cnfn convert_int3_sat(double3); int3 __ovld __cnfn convert_int3_sat_rte(double3); int3 __ovld __cnfn convert_int3_sat_rtn(double3); int3 __ovld __cnfn convert_int3_sat_rtp(double3); int3 __ovld __cnfn convert_int3_sat_rtz(double3); int4 __ovld __cnfn convert_int4(double4); int4 __ovld __cnfn convert_int4_rte(double4); int4 __ovld __cnfn convert_int4_rtn(double4); int4 __ovld __cnfn convert_int4_rtp(double4); int4 __ovld __cnfn convert_int4_rtz(double4); int4 __ovld __cnfn convert_int4_sat(double4); int4 __ovld __cnfn convert_int4_sat_rte(double4); int4 __ovld __cnfn convert_int4_sat_rtn(double4); int4 __ovld __cnfn convert_int4_sat_rtp(double4); int4 __ovld __cnfn convert_int4_sat_rtz(double4); int8 __ovld __cnfn convert_int8(double8); int8 __ovld __cnfn convert_int8_rte(double8); int8 __ovld __cnfn convert_int8_rtn(double8); int8 __ovld __cnfn convert_int8_rtp(double8); int8 __ovld __cnfn convert_int8_rtz(double8); int8 __ovld __cnfn convert_int8_sat(double8); int8 __ovld __cnfn convert_int8_sat_rte(double8); int8 __ovld __cnfn convert_int8_sat_rtn(double8); int8 __ovld __cnfn convert_int8_sat_rtp(double8); int8 __ovld __cnfn convert_int8_sat_rtz(double8); int16 __ovld __cnfn convert_int16(double16); int16 __ovld __cnfn convert_int16_rte(double16); int16 __ovld __cnfn convert_int16_rtn(double16); int16 __ovld __cnfn convert_int16_rtp(double16); int16 __ovld __cnfn convert_int16_rtz(double16); int16 __ovld __cnfn convert_int16_sat(double16); int16 __ovld __cnfn convert_int16_sat_rte(double16); int16 __ovld __cnfn convert_int16_sat_rtn(double16); int16 __ovld __cnfn convert_int16_sat_rtp(double16); int16 __ovld __cnfn convert_int16_sat_rtz(double16); uint __ovld __cnfn convert_uint(double); uint __ovld __cnfn convert_uint_rte(double); uint __ovld __cnfn convert_uint_rtn(double); uint __ovld __cnfn convert_uint_rtp(double); uint __ovld __cnfn convert_uint_rtz(double); uint __ovld __cnfn convert_uint_sat(double); uint __ovld __cnfn convert_uint_sat_rte(double); uint __ovld __cnfn convert_uint_sat_rtn(double); uint __ovld __cnfn convert_uint_sat_rtp(double); uint __ovld __cnfn convert_uint_sat_rtz(double); uint2 __ovld __cnfn convert_uint2(double2); uint2 __ovld __cnfn convert_uint2_rte(double2); uint2 __ovld __cnfn convert_uint2_rtn(double2); uint2 __ovld __cnfn convert_uint2_rtp(double2); uint2 __ovld __cnfn convert_uint2_rtz(double2); uint2 __ovld __cnfn convert_uint2_sat(double2); uint2 __ovld __cnfn convert_uint2_sat_rte(double2); uint2 __ovld __cnfn convert_uint2_sat_rtn(double2); uint2 __ovld __cnfn convert_uint2_sat_rtp(double2); uint2 __ovld __cnfn convert_uint2_sat_rtz(double2); uint3 __ovld __cnfn convert_uint3(double3); uint3 __ovld __cnfn convert_uint3_rte(double3); uint3 __ovld __cnfn convert_uint3_rtn(double3); uint3 __ovld __cnfn convert_uint3_rtp(double3); uint3 __ovld __cnfn convert_uint3_rtz(double3); uint3 __ovld __cnfn convert_uint3_sat(double3); uint3 __ovld __cnfn convert_uint3_sat_rte(double3); uint3 __ovld __cnfn convert_uint3_sat_rtn(double3); uint3 __ovld __cnfn convert_uint3_sat_rtp(double3); uint3 __ovld __cnfn convert_uint3_sat_rtz(double3); uint4 __ovld __cnfn convert_uint4(double4); uint4 __ovld __cnfn convert_uint4_rte(double4); uint4 __ovld __cnfn convert_uint4_rtn(double4); uint4 __ovld __cnfn convert_uint4_rtp(double4); uint4 __ovld __cnfn convert_uint4_rtz(double4); uint4 __ovld __cnfn convert_uint4_sat(double4); uint4 __ovld __cnfn convert_uint4_sat_rte(double4); uint4 __ovld __cnfn convert_uint4_sat_rtn(double4); uint4 __ovld __cnfn convert_uint4_sat_rtp(double4); uint4 __ovld __cnfn convert_uint4_sat_rtz(double4); uint8 __ovld __cnfn convert_uint8(double8); uint8 __ovld __cnfn convert_uint8_rte(double8); uint8 __ovld __cnfn convert_uint8_rtn(double8); uint8 __ovld __cnfn convert_uint8_rtp(double8); uint8 __ovld __cnfn convert_uint8_rtz(double8); uint8 __ovld __cnfn convert_uint8_sat(double8); uint8 __ovld __cnfn convert_uint8_sat_rte(double8); uint8 __ovld __cnfn convert_uint8_sat_rtn(double8); uint8 __ovld __cnfn convert_uint8_sat_rtp(double8); uint8 __ovld __cnfn convert_uint8_sat_rtz(double8); uint16 __ovld __cnfn convert_uint16(double16); uint16 __ovld __cnfn convert_uint16_rte(double16); uint16 __ovld __cnfn convert_uint16_rtn(double16); uint16 __ovld __cnfn convert_uint16_rtp(double16); uint16 __ovld __cnfn convert_uint16_rtz(double16); uint16 __ovld __cnfn convert_uint16_sat(double16); uint16 __ovld __cnfn convert_uint16_sat_rte(double16); uint16 __ovld __cnfn convert_uint16_sat_rtn(double16); uint16 __ovld __cnfn convert_uint16_sat_rtp(double16); uint16 __ovld __cnfn convert_uint16_sat_rtz(double16); long __ovld __cnfn convert_long(double); long __ovld __cnfn convert_long_rte(double); long __ovld __cnfn convert_long_rtn(double); long __ovld __cnfn convert_long_rtp(double); long __ovld __cnfn convert_long_rtz(double); long __ovld __cnfn convert_long_sat(double); long __ovld __cnfn convert_long_sat_rte(double); long __ovld __cnfn convert_long_sat_rtn(double); long __ovld __cnfn convert_long_sat_rtp(double); long __ovld __cnfn convert_long_sat_rtz(double); long2 __ovld __cnfn convert_long2(double2); long2 __ovld __cnfn convert_long2_rte(double2); long2 __ovld __cnfn convert_long2_rtn(double2); long2 __ovld __cnfn convert_long2_rtp(double2); long2 __ovld __cnfn convert_long2_rtz(double2); long2 __ovld __cnfn convert_long2_sat(double2); long2 __ovld __cnfn convert_long2_sat_rte(double2); long2 __ovld __cnfn convert_long2_sat_rtn(double2); long2 __ovld __cnfn convert_long2_sat_rtp(double2); long2 __ovld __cnfn convert_long2_sat_rtz(double2); long3 __ovld __cnfn convert_long3(double3); long3 __ovld __cnfn convert_long3_rte(double3); long3 __ovld __cnfn convert_long3_rtn(double3); long3 __ovld __cnfn convert_long3_rtp(double3); long3 __ovld __cnfn convert_long3_rtz(double3); long3 __ovld __cnfn convert_long3_sat(double3); long3 __ovld __cnfn convert_long3_sat_rte(double3); long3 __ovld __cnfn convert_long3_sat_rtn(double3); long3 __ovld __cnfn convert_long3_sat_rtp(double3); long3 __ovld __cnfn convert_long3_sat_rtz(double3); long4 __ovld __cnfn convert_long4(double4); long4 __ovld __cnfn convert_long4_rte(double4); long4 __ovld __cnfn convert_long4_rtn(double4); long4 __ovld __cnfn convert_long4_rtp(double4); long4 __ovld __cnfn convert_long4_rtz(double4); long4 __ovld __cnfn convert_long4_sat(double4); long4 __ovld __cnfn convert_long4_sat_rte(double4); long4 __ovld __cnfn convert_long4_sat_rtn(double4); long4 __ovld __cnfn convert_long4_sat_rtp(double4); long4 __ovld __cnfn convert_long4_sat_rtz(double4); long8 __ovld __cnfn convert_long8(double8); long8 __ovld __cnfn convert_long8_rte(double8); long8 __ovld __cnfn convert_long8_rtn(double8); long8 __ovld __cnfn convert_long8_rtp(double8); long8 __ovld __cnfn convert_long8_rtz(double8); long8 __ovld __cnfn convert_long8_sat(double8); long8 __ovld __cnfn convert_long8_sat_rte(double8); long8 __ovld __cnfn convert_long8_sat_rtn(double8); long8 __ovld __cnfn convert_long8_sat_rtp(double8); long8 __ovld __cnfn convert_long8_sat_rtz(double8); long16 __ovld __cnfn convert_long16(double16); long16 __ovld __cnfn convert_long16_rte(double16); long16 __ovld __cnfn convert_long16_rtn(double16); long16 __ovld __cnfn convert_long16_rtp(double16); long16 __ovld __cnfn convert_long16_rtz(double16); long16 __ovld __cnfn convert_long16_sat(double16); long16 __ovld __cnfn convert_long16_sat_rte(double16); long16 __ovld __cnfn convert_long16_sat_rtn(double16); long16 __ovld __cnfn convert_long16_sat_rtp(double16); long16 __ovld __cnfn convert_long16_sat_rtz(double16); ulong __ovld __cnfn convert_ulong(double); ulong __ovld __cnfn convert_ulong_rte(double); ulong __ovld __cnfn convert_ulong_rtn(double); ulong __ovld __cnfn convert_ulong_rtp(double); ulong __ovld __cnfn convert_ulong_rtz(double); ulong __ovld __cnfn convert_ulong_sat(double); ulong __ovld __cnfn convert_ulong_sat_rte(double); ulong __ovld __cnfn convert_ulong_sat_rtn(double); ulong __ovld __cnfn convert_ulong_sat_rtp(double); ulong __ovld __cnfn convert_ulong_sat_rtz(double); ulong2 __ovld __cnfn convert_ulong2(double2); ulong2 __ovld __cnfn convert_ulong2_rte(double2); ulong2 __ovld __cnfn convert_ulong2_rtn(double2); ulong2 __ovld __cnfn convert_ulong2_rtp(double2); ulong2 __ovld __cnfn convert_ulong2_rtz(double2); ulong2 __ovld __cnfn convert_ulong2_sat(double2); ulong2 __ovld __cnfn convert_ulong2_sat_rte(double2); ulong2 __ovld __cnfn convert_ulong2_sat_rtn(double2); ulong2 __ovld __cnfn convert_ulong2_sat_rtp(double2); ulong2 __ovld __cnfn convert_ulong2_sat_rtz(double2); ulong3 __ovld __cnfn convert_ulong3(double3); ulong3 __ovld __cnfn convert_ulong3_rte(double3); ulong3 __ovld __cnfn convert_ulong3_rtn(double3); ulong3 __ovld __cnfn convert_ulong3_rtp(double3); ulong3 __ovld __cnfn convert_ulong3_rtz(double3); ulong3 __ovld __cnfn convert_ulong3_sat(double3); ulong3 __ovld __cnfn convert_ulong3_sat_rte(double3); ulong3 __ovld __cnfn convert_ulong3_sat_rtn(double3); ulong3 __ovld __cnfn convert_ulong3_sat_rtp(double3); ulong3 __ovld __cnfn convert_ulong3_sat_rtz(double3); ulong4 __ovld __cnfn convert_ulong4(double4); ulong4 __ovld __cnfn convert_ulong4_rte(double4); ulong4 __ovld __cnfn convert_ulong4_rtn(double4); ulong4 __ovld __cnfn convert_ulong4_rtp(double4); ulong4 __ovld __cnfn convert_ulong4_rtz(double4); ulong4 __ovld __cnfn convert_ulong4_sat(double4); ulong4 __ovld __cnfn convert_ulong4_sat_rte(double4); ulong4 __ovld __cnfn convert_ulong4_sat_rtn(double4); ulong4 __ovld __cnfn convert_ulong4_sat_rtp(double4); ulong4 __ovld __cnfn convert_ulong4_sat_rtz(double4); ulong8 __ovld __cnfn convert_ulong8(double8); ulong8 __ovld __cnfn convert_ulong8_rte(double8); ulong8 __ovld __cnfn convert_ulong8_rtn(double8); ulong8 __ovld __cnfn convert_ulong8_rtp(double8); ulong8 __ovld __cnfn convert_ulong8_rtz(double8); ulong8 __ovld __cnfn convert_ulong8_sat(double8); ulong8 __ovld __cnfn convert_ulong8_sat_rte(double8); ulong8 __ovld __cnfn convert_ulong8_sat_rtn(double8); ulong8 __ovld __cnfn convert_ulong8_sat_rtp(double8); ulong8 __ovld __cnfn convert_ulong8_sat_rtz(double8); ulong16 __ovld __cnfn convert_ulong16(double16); ulong16 __ovld __cnfn convert_ulong16_rte(double16); ulong16 __ovld __cnfn convert_ulong16_rtn(double16); ulong16 __ovld __cnfn convert_ulong16_rtp(double16); ulong16 __ovld __cnfn convert_ulong16_rtz(double16); ulong16 __ovld __cnfn convert_ulong16_sat(double16); ulong16 __ovld __cnfn convert_ulong16_sat_rte(double16); ulong16 __ovld __cnfn convert_ulong16_sat_rtn(double16); ulong16 __ovld __cnfn convert_ulong16_sat_rtp(double16); ulong16 __ovld __cnfn convert_ulong16_sat_rtz(double16); float __ovld __cnfn convert_float(double); float __ovld __cnfn convert_float_rte(double); float __ovld __cnfn convert_float_rtn(double); float __ovld __cnfn convert_float_rtp(double); float __ovld __cnfn convert_float_rtz(double); float2 __ovld __cnfn convert_float2(double2); float2 __ovld __cnfn convert_float2_rte(double2); float2 __ovld __cnfn convert_float2_rtn(double2); float2 __ovld __cnfn convert_float2_rtp(double2); float2 __ovld __cnfn convert_float2_rtz(double2); float3 __ovld __cnfn convert_float3(double3); float3 __ovld __cnfn convert_float3_rte(double3); float3 __ovld __cnfn convert_float3_rtn(double3); float3 __ovld __cnfn convert_float3_rtp(double3); float3 __ovld __cnfn convert_float3_rtz(double3); float4 __ovld __cnfn convert_float4(double4); float4 __ovld __cnfn convert_float4_rte(double4); float4 __ovld __cnfn convert_float4_rtn(double4); float4 __ovld __cnfn convert_float4_rtp(double4); float4 __ovld __cnfn convert_float4_rtz(double4); float8 __ovld __cnfn convert_float8(double8); float8 __ovld __cnfn convert_float8_rte(double8); float8 __ovld __cnfn convert_float8_rtn(double8); float8 __ovld __cnfn convert_float8_rtp(double8); float8 __ovld __cnfn convert_float8_rtz(double8); float16 __ovld __cnfn convert_float16(double16); float16 __ovld __cnfn convert_float16_rte(double16); float16 __ovld __cnfn convert_float16_rtn(double16); float16 __ovld __cnfn convert_float16_rtp(double16); float16 __ovld __cnfn convert_float16_rtz(double16); double __ovld __cnfn convert_double(char); double __ovld __cnfn convert_double(double); double __ovld __cnfn convert_double(float); double __ovld __cnfn convert_double(int); double __ovld __cnfn convert_double(long); double __ovld __cnfn convert_double(short); double __ovld __cnfn convert_double(uchar); double __ovld __cnfn convert_double(uint); double __ovld __cnfn convert_double(ulong); double __ovld __cnfn convert_double(ushort); double __ovld __cnfn convert_double_rte(char); double __ovld __cnfn convert_double_rte(double); double __ovld __cnfn convert_double_rte(float); double __ovld __cnfn convert_double_rte(int); double __ovld __cnfn convert_double_rte(long); double __ovld __cnfn convert_double_rte(short); double __ovld __cnfn convert_double_rte(uchar); double __ovld __cnfn convert_double_rte(uint); double __ovld __cnfn convert_double_rte(ulong); double __ovld __cnfn convert_double_rte(ushort); double __ovld __cnfn convert_double_rtn(char); double __ovld __cnfn convert_double_rtn(double); double __ovld __cnfn convert_double_rtn(float); double __ovld __cnfn convert_double_rtn(int); double __ovld __cnfn convert_double_rtn(long); double __ovld __cnfn convert_double_rtn(short); double __ovld __cnfn convert_double_rtn(uchar); double __ovld __cnfn convert_double_rtn(uint); double __ovld __cnfn convert_double_rtn(ulong); double __ovld __cnfn convert_double_rtn(ushort); double __ovld __cnfn convert_double_rtp(char); double __ovld __cnfn convert_double_rtp(double); double __ovld __cnfn convert_double_rtp(float); double __ovld __cnfn convert_double_rtp(int); double __ovld __cnfn convert_double_rtp(long); double __ovld __cnfn convert_double_rtp(short); double __ovld __cnfn convert_double_rtp(uchar); double __ovld __cnfn convert_double_rtp(uint); double __ovld __cnfn convert_double_rtp(ulong); double __ovld __cnfn convert_double_rtp(ushort); double __ovld __cnfn convert_double_rtz(char); double __ovld __cnfn convert_double_rtz(double); double __ovld __cnfn convert_double_rtz(float); double __ovld __cnfn convert_double_rtz(int); double __ovld __cnfn convert_double_rtz(long); double __ovld __cnfn convert_double_rtz(short); double __ovld __cnfn convert_double_rtz(uchar); double __ovld __cnfn convert_double_rtz(uint); double __ovld __cnfn convert_double_rtz(ulong); double __ovld __cnfn convert_double_rtz(ushort); double2 __ovld __cnfn convert_double2(char2); double2 __ovld __cnfn convert_double2(double2); double2 __ovld __cnfn convert_double2(float2); double2 __ovld __cnfn convert_double2(int2); double2 __ovld __cnfn convert_double2(long2); double2 __ovld __cnfn convert_double2(short2); double2 __ovld __cnfn convert_double2(uchar2); double2 __ovld __cnfn convert_double2(uint2); double2 __ovld __cnfn convert_double2(ulong2); double2 __ovld __cnfn convert_double2(ushort2); double2 __ovld __cnfn convert_double2_rte(char2); double2 __ovld __cnfn convert_double2_rte(double2); double2 __ovld __cnfn convert_double2_rte(float2); double2 __ovld __cnfn convert_double2_rte(int2); double2 __ovld __cnfn convert_double2_rte(long2); double2 __ovld __cnfn convert_double2_rte(short2); double2 __ovld __cnfn convert_double2_rte(uchar2); double2 __ovld __cnfn convert_double2_rte(uint2); double2 __ovld __cnfn convert_double2_rte(ulong2); double2 __ovld __cnfn convert_double2_rte(ushort2); double2 __ovld __cnfn convert_double2_rtn(char2); double2 __ovld __cnfn convert_double2_rtn(double2); double2 __ovld __cnfn convert_double2_rtn(float2); double2 __ovld __cnfn convert_double2_rtn(int2); double2 __ovld __cnfn convert_double2_rtn(long2); double2 __ovld __cnfn convert_double2_rtn(short2); double2 __ovld __cnfn convert_double2_rtn(uchar2); double2 __ovld __cnfn convert_double2_rtn(uint2); double2 __ovld __cnfn convert_double2_rtn(ulong2); double2 __ovld __cnfn convert_double2_rtn(ushort2); double2 __ovld __cnfn convert_double2_rtp(char2); double2 __ovld __cnfn convert_double2_rtp(double2); double2 __ovld __cnfn convert_double2_rtp(float2); double2 __ovld __cnfn convert_double2_rtp(int2); double2 __ovld __cnfn convert_double2_rtp(long2); double2 __ovld __cnfn convert_double2_rtp(short2); double2 __ovld __cnfn convert_double2_rtp(uchar2); double2 __ovld __cnfn convert_double2_rtp(uint2); double2 __ovld __cnfn convert_double2_rtp(ulong2); double2 __ovld __cnfn convert_double2_rtp(ushort2); double2 __ovld __cnfn convert_double2_rtz(char2); double2 __ovld __cnfn convert_double2_rtz(double2); double2 __ovld __cnfn convert_double2_rtz(float2); double2 __ovld __cnfn convert_double2_rtz(int2); double2 __ovld __cnfn convert_double2_rtz(long2); double2 __ovld __cnfn convert_double2_rtz(short2); double2 __ovld __cnfn convert_double2_rtz(uchar2); double2 __ovld __cnfn convert_double2_rtz(uint2); double2 __ovld __cnfn convert_double2_rtz(ulong2); double2 __ovld __cnfn convert_double2_rtz(ushort2); double3 __ovld __cnfn convert_double3(char3); double3 __ovld __cnfn convert_double3(double3); double3 __ovld __cnfn convert_double3(float3); double3 __ovld __cnfn convert_double3(int3); double3 __ovld __cnfn convert_double3(long3); double3 __ovld __cnfn convert_double3(short3); double3 __ovld __cnfn convert_double3(uchar3); double3 __ovld __cnfn convert_double3(uint3); double3 __ovld __cnfn convert_double3(ulong3); double3 __ovld __cnfn convert_double3(ushort3); double3 __ovld __cnfn convert_double3_rte(char3); double3 __ovld __cnfn convert_double3_rte(double3); double3 __ovld __cnfn convert_double3_rte(float3); double3 __ovld __cnfn convert_double3_rte(int3); double3 __ovld __cnfn convert_double3_rte(long3); double3 __ovld __cnfn convert_double3_rte(short3); double3 __ovld __cnfn convert_double3_rte(uchar3); double3 __ovld __cnfn convert_double3_rte(uint3); double3 __ovld __cnfn convert_double3_rte(ulong3); double3 __ovld __cnfn convert_double3_rte(ushort3); double3 __ovld __cnfn convert_double3_rtn(char3); double3 __ovld __cnfn convert_double3_rtn(double3); double3 __ovld __cnfn convert_double3_rtn(float3); double3 __ovld __cnfn convert_double3_rtn(int3); double3 __ovld __cnfn convert_double3_rtn(long3); double3 __ovld __cnfn convert_double3_rtn(short3); double3 __ovld __cnfn convert_double3_rtn(uchar3); double3 __ovld __cnfn convert_double3_rtn(uint3); double3 __ovld __cnfn convert_double3_rtn(ulong3); double3 __ovld __cnfn convert_double3_rtn(ushort3); double3 __ovld __cnfn convert_double3_rtp(char3); double3 __ovld __cnfn convert_double3_rtp(double3); double3 __ovld __cnfn convert_double3_rtp(float3); double3 __ovld __cnfn convert_double3_rtp(int3); double3 __ovld __cnfn convert_double3_rtp(long3); double3 __ovld __cnfn convert_double3_rtp(short3); double3 __ovld __cnfn convert_double3_rtp(uchar3); double3 __ovld __cnfn convert_double3_rtp(uint3); double3 __ovld __cnfn convert_double3_rtp(ulong3); double3 __ovld __cnfn convert_double3_rtp(ushort3); double3 __ovld __cnfn convert_double3_rtz(char3); double3 __ovld __cnfn convert_double3_rtz(double3); double3 __ovld __cnfn convert_double3_rtz(float3); double3 __ovld __cnfn convert_double3_rtz(int3); double3 __ovld __cnfn convert_double3_rtz(long3); double3 __ovld __cnfn convert_double3_rtz(short3); double3 __ovld __cnfn convert_double3_rtz(uchar3); double3 __ovld __cnfn convert_double3_rtz(uint3); double3 __ovld __cnfn convert_double3_rtz(ulong3); double3 __ovld __cnfn convert_double3_rtz(ushort3); double4 __ovld __cnfn convert_double4(char4); double4 __ovld __cnfn convert_double4(double4); double4 __ovld __cnfn convert_double4(float4); double4 __ovld __cnfn convert_double4(int4); double4 __ovld __cnfn convert_double4(long4); double4 __ovld __cnfn convert_double4(short4); double4 __ovld __cnfn convert_double4(uchar4); double4 __ovld __cnfn convert_double4(uint4); double4 __ovld __cnfn convert_double4(ulong4); double4 __ovld __cnfn convert_double4(ushort4); double4 __ovld __cnfn convert_double4_rte(char4); double4 __ovld __cnfn convert_double4_rte(double4); double4 __ovld __cnfn convert_double4_rte(float4); double4 __ovld __cnfn convert_double4_rte(int4); double4 __ovld __cnfn convert_double4_rte(long4); double4 __ovld __cnfn convert_double4_rte(short4); double4 __ovld __cnfn convert_double4_rte(uchar4); double4 __ovld __cnfn convert_double4_rte(uint4); double4 __ovld __cnfn convert_double4_rte(ulong4); double4 __ovld __cnfn convert_double4_rte(ushort4); double4 __ovld __cnfn convert_double4_rtn(char4); double4 __ovld __cnfn convert_double4_rtn(double4); double4 __ovld __cnfn convert_double4_rtn(float4); double4 __ovld __cnfn convert_double4_rtn(int4); double4 __ovld __cnfn convert_double4_rtn(long4); double4 __ovld __cnfn convert_double4_rtn(short4); double4 __ovld __cnfn convert_double4_rtn(uchar4); double4 __ovld __cnfn convert_double4_rtn(uint4); double4 __ovld __cnfn convert_double4_rtn(ulong4); double4 __ovld __cnfn convert_double4_rtn(ushort4); double4 __ovld __cnfn convert_double4_rtp(char4); double4 __ovld __cnfn convert_double4_rtp(double4); double4 __ovld __cnfn convert_double4_rtp(float4); double4 __ovld __cnfn convert_double4_rtp(int4); double4 __ovld __cnfn convert_double4_rtp(long4); double4 __ovld __cnfn convert_double4_rtp(short4); double4 __ovld __cnfn convert_double4_rtp(uchar4); double4 __ovld __cnfn convert_double4_rtp(uint4); double4 __ovld __cnfn convert_double4_rtp(ulong4); double4 __ovld __cnfn convert_double4_rtp(ushort4); double4 __ovld __cnfn convert_double4_rtz(char4); double4 __ovld __cnfn convert_double4_rtz(double4); double4 __ovld __cnfn convert_double4_rtz(float4); double4 __ovld __cnfn convert_double4_rtz(int4); double4 __ovld __cnfn convert_double4_rtz(long4); double4 __ovld __cnfn convert_double4_rtz(short4); double4 __ovld __cnfn convert_double4_rtz(uchar4); double4 __ovld __cnfn convert_double4_rtz(uint4); double4 __ovld __cnfn convert_double4_rtz(ulong4); double4 __ovld __cnfn convert_double4_rtz(ushort4); double8 __ovld __cnfn convert_double8(char8); double8 __ovld __cnfn convert_double8(double8); double8 __ovld __cnfn convert_double8(float8); double8 __ovld __cnfn convert_double8(int8); double8 __ovld __cnfn convert_double8(long8); double8 __ovld __cnfn convert_double8(short8); double8 __ovld __cnfn convert_double8(uchar8); double8 __ovld __cnfn convert_double8(uint8); double8 __ovld __cnfn convert_double8(ulong8); double8 __ovld __cnfn convert_double8(ushort8); double8 __ovld __cnfn convert_double8_rte(char8); double8 __ovld __cnfn convert_double8_rte(double8); double8 __ovld __cnfn convert_double8_rte(float8); double8 __ovld __cnfn convert_double8_rte(int8); double8 __ovld __cnfn convert_double8_rte(long8); double8 __ovld __cnfn convert_double8_rte(short8); double8 __ovld __cnfn convert_double8_rte(uchar8); double8 __ovld __cnfn convert_double8_rte(uint8); double8 __ovld __cnfn convert_double8_rte(ulong8); double8 __ovld __cnfn convert_double8_rte(ushort8); double8 __ovld __cnfn convert_double8_rtn(char8); double8 __ovld __cnfn convert_double8_rtn(double8); double8 __ovld __cnfn convert_double8_rtn(float8); double8 __ovld __cnfn convert_double8_rtn(int8); double8 __ovld __cnfn convert_double8_rtn(long8); double8 __ovld __cnfn convert_double8_rtn(short8); double8 __ovld __cnfn convert_double8_rtn(uchar8); double8 __ovld __cnfn convert_double8_rtn(uint8); double8 __ovld __cnfn convert_double8_rtn(ulong8); double8 __ovld __cnfn convert_double8_rtn(ushort8); double8 __ovld __cnfn convert_double8_rtp(char8); double8 __ovld __cnfn convert_double8_rtp(double8); double8 __ovld __cnfn convert_double8_rtp(float8); double8 __ovld __cnfn convert_double8_rtp(int8); double8 __ovld __cnfn convert_double8_rtp(long8); double8 __ovld __cnfn convert_double8_rtp(short8); double8 __ovld __cnfn convert_double8_rtp(uchar8); double8 __ovld __cnfn convert_double8_rtp(uint8); double8 __ovld __cnfn convert_double8_rtp(ulong8); double8 __ovld __cnfn convert_double8_rtp(ushort8); double8 __ovld __cnfn convert_double8_rtz(char8); double8 __ovld __cnfn convert_double8_rtz(double8); double8 __ovld __cnfn convert_double8_rtz(float8); double8 __ovld __cnfn convert_double8_rtz(int8); double8 __ovld __cnfn convert_double8_rtz(long8); double8 __ovld __cnfn convert_double8_rtz(short8); double8 __ovld __cnfn convert_double8_rtz(uchar8); double8 __ovld __cnfn convert_double8_rtz(uint8); double8 __ovld __cnfn convert_double8_rtz(ulong8); double8 __ovld __cnfn convert_double8_rtz(ushort8); double16 __ovld __cnfn convert_double16(char16); double16 __ovld __cnfn convert_double16(double16); double16 __ovld __cnfn convert_double16(float16); double16 __ovld __cnfn convert_double16(int16); double16 __ovld __cnfn convert_double16(long16); double16 __ovld __cnfn convert_double16(short16); double16 __ovld __cnfn convert_double16(uchar16); double16 __ovld __cnfn convert_double16(uint16); double16 __ovld __cnfn convert_double16(ulong16); double16 __ovld __cnfn convert_double16(ushort16); double16 __ovld __cnfn convert_double16_rte(char16); double16 __ovld __cnfn convert_double16_rte(double16); double16 __ovld __cnfn convert_double16_rte(float16); double16 __ovld __cnfn convert_double16_rte(int16); double16 __ovld __cnfn convert_double16_rte(long16); double16 __ovld __cnfn convert_double16_rte(short16); double16 __ovld __cnfn convert_double16_rte(uchar16); double16 __ovld __cnfn convert_double16_rte(uint16); double16 __ovld __cnfn convert_double16_rte(ulong16); double16 __ovld __cnfn convert_double16_rte(ushort16); double16 __ovld __cnfn convert_double16_rtn(char16); double16 __ovld __cnfn convert_double16_rtn(double16); double16 __ovld __cnfn convert_double16_rtn(float16); double16 __ovld __cnfn convert_double16_rtn(int16); double16 __ovld __cnfn convert_double16_rtn(long16); double16 __ovld __cnfn convert_double16_rtn(short16); double16 __ovld __cnfn convert_double16_rtn(uchar16); double16 __ovld __cnfn convert_double16_rtn(uint16); double16 __ovld __cnfn convert_double16_rtn(ulong16); double16 __ovld __cnfn convert_double16_rtn(ushort16); double16 __ovld __cnfn convert_double16_rtp(char16); double16 __ovld __cnfn convert_double16_rtp(double16); double16 __ovld __cnfn convert_double16_rtp(float16); double16 __ovld __cnfn convert_double16_rtp(int16); double16 __ovld __cnfn convert_double16_rtp(long16); double16 __ovld __cnfn convert_double16_rtp(short16); double16 __ovld __cnfn convert_double16_rtp(uchar16); double16 __ovld __cnfn convert_double16_rtp(uint16); double16 __ovld __cnfn convert_double16_rtp(ulong16); double16 __ovld __cnfn convert_double16_rtp(ushort16); double16 __ovld __cnfn convert_double16_rtz(char16); double16 __ovld __cnfn convert_double16_rtz(double16); double16 __ovld __cnfn convert_double16_rtz(float16); double16 __ovld __cnfn convert_double16_rtz(int16); double16 __ovld __cnfn convert_double16_rtz(long16); double16 __ovld __cnfn convert_double16_rtz(short16); double16 __ovld __cnfn convert_double16_rtz(uchar16); double16 __ovld __cnfn convert_double16_rtz(uint16); double16 __ovld __cnfn convert_double16_rtz(ulong16); double16 __ovld __cnfn convert_double16_rtz(ushort16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 #pragma OPENCL EXTENSION cl_khr_fp16 : enable // Convert half types to non-double types. uchar __ovld __cnfn convert_uchar(half); uchar __ovld __cnfn convert_uchar_rte(half); uchar __ovld __cnfn convert_uchar_rtp(half); uchar __ovld __cnfn convert_uchar_rtn(half); uchar __ovld __cnfn convert_uchar_rtz(half); uchar __ovld __cnfn convert_uchar_sat(half); uchar __ovld __cnfn convert_uchar_sat_rte(half); uchar __ovld __cnfn convert_uchar_sat_rtp(half); uchar __ovld __cnfn convert_uchar_sat_rtn(half); uchar __ovld __cnfn convert_uchar_sat_rtz(half); uchar2 __ovld __cnfn convert_uchar2(half2); uchar2 __ovld __cnfn convert_uchar2_rte(half2); uchar2 __ovld __cnfn convert_uchar2_rtp(half2); uchar2 __ovld __cnfn convert_uchar2_rtn(half2); uchar2 __ovld __cnfn convert_uchar2_rtz(half2); uchar2 __ovld __cnfn convert_uchar2_sat(half2); uchar2 __ovld __cnfn convert_uchar2_sat_rte(half2); uchar2 __ovld __cnfn convert_uchar2_sat_rtp(half2); uchar2 __ovld __cnfn convert_uchar2_sat_rtn(half2); uchar2 __ovld __cnfn convert_uchar2_sat_rtz(half2); uchar3 __ovld __cnfn convert_uchar3(half3); uchar3 __ovld __cnfn convert_uchar3_rte(half3); uchar3 __ovld __cnfn convert_uchar3_rtp(half3); uchar3 __ovld __cnfn convert_uchar3_rtn(half3); uchar3 __ovld __cnfn convert_uchar3_rtz(half3); uchar3 __ovld __cnfn convert_uchar3_sat(half3); uchar3 __ovld __cnfn convert_uchar3_sat_rte(half3); uchar3 __ovld __cnfn convert_uchar3_sat_rtp(half3); uchar3 __ovld __cnfn convert_uchar3_sat_rtn(half3); uchar3 __ovld __cnfn convert_uchar3_sat_rtz(half3); uchar4 __ovld __cnfn convert_uchar4(half4); uchar4 __ovld __cnfn convert_uchar4_rte(half4); uchar4 __ovld __cnfn convert_uchar4_rtp(half4); uchar4 __ovld __cnfn convert_uchar4_rtn(half4); uchar4 __ovld __cnfn convert_uchar4_rtz(half4); uchar4 __ovld __cnfn convert_uchar4_sat(half4); uchar4 __ovld __cnfn convert_uchar4_sat_rte(half4); uchar4 __ovld __cnfn convert_uchar4_sat_rtp(half4); uchar4 __ovld __cnfn convert_uchar4_sat_rtn(half4); uchar4 __ovld __cnfn convert_uchar4_sat_rtz(half4); uchar8 __ovld __cnfn convert_uchar8(half8); uchar8 __ovld __cnfn convert_uchar8_rte(half8); uchar8 __ovld __cnfn convert_uchar8_rtp(half8); uchar8 __ovld __cnfn convert_uchar8_rtn(half8); uchar8 __ovld __cnfn convert_uchar8_rtz(half8); uchar8 __ovld __cnfn convert_uchar8_sat(half8); uchar8 __ovld __cnfn convert_uchar8_sat_rte(half8); uchar8 __ovld __cnfn convert_uchar8_sat_rtp(half8); uchar8 __ovld __cnfn convert_uchar8_sat_rtn(half8); uchar8 __ovld __cnfn convert_uchar8_sat_rtz(half8); uchar16 __ovld __cnfn convert_uchar16(half16); uchar16 __ovld __cnfn convert_uchar16_rte(half16); uchar16 __ovld __cnfn convert_uchar16_rtp(half16); uchar16 __ovld __cnfn convert_uchar16_rtn(half16); uchar16 __ovld __cnfn convert_uchar16_rtz(half16); uchar16 __ovld __cnfn convert_uchar16_sat(half16); uchar16 __ovld __cnfn convert_uchar16_sat_rte(half16); uchar16 __ovld __cnfn convert_uchar16_sat_rtp(half16); uchar16 __ovld __cnfn convert_uchar16_sat_rtn(half16); uchar16 __ovld __cnfn convert_uchar16_sat_rtz(half16); ushort __ovld __cnfn convert_ushort(half); ushort __ovld __cnfn convert_ushort_rte(half); ushort __ovld __cnfn convert_ushort_rtp(half); ushort __ovld __cnfn convert_ushort_rtn(half); ushort __ovld __cnfn convert_ushort_rtz(half); ushort __ovld __cnfn convert_ushort_sat(half); ushort __ovld __cnfn convert_ushort_sat_rte(half); ushort __ovld __cnfn convert_ushort_sat_rtp(half); ushort __ovld __cnfn convert_ushort_sat_rtn(half); ushort __ovld __cnfn convert_ushort_sat_rtz(half); ushort2 __ovld __cnfn convert_ushort2(half2); ushort2 __ovld __cnfn convert_ushort2_rte(half2); ushort2 __ovld __cnfn convert_ushort2_rtp(half2); ushort2 __ovld __cnfn convert_ushort2_rtn(half2); ushort2 __ovld __cnfn convert_ushort2_rtz(half2); ushort2 __ovld __cnfn convert_ushort2_sat(half2); ushort2 __ovld __cnfn convert_ushort2_sat_rte(half2); ushort2 __ovld __cnfn convert_ushort2_sat_rtp(half2); ushort2 __ovld __cnfn convert_ushort2_sat_rtn(half2); ushort2 __ovld __cnfn convert_ushort2_sat_rtz(half2); ushort3 __ovld __cnfn convert_ushort3(half3); ushort3 __ovld __cnfn convert_ushort3_rte(half3); ushort3 __ovld __cnfn convert_ushort3_rtp(half3); ushort3 __ovld __cnfn convert_ushort3_rtn(half3); ushort3 __ovld __cnfn convert_ushort3_rtz(half3); ushort3 __ovld __cnfn convert_ushort3_sat(half3); ushort3 __ovld __cnfn convert_ushort3_sat_rte(half3); ushort3 __ovld __cnfn convert_ushort3_sat_rtp(half3); ushort3 __ovld __cnfn convert_ushort3_sat_rtn(half3); ushort3 __ovld __cnfn convert_ushort3_sat_rtz(half3); ushort4 __ovld __cnfn convert_ushort4(half4); ushort4 __ovld __cnfn convert_ushort4_rte(half4); ushort4 __ovld __cnfn convert_ushort4_rtp(half4); ushort4 __ovld __cnfn convert_ushort4_rtn(half4); ushort4 __ovld __cnfn convert_ushort4_rtz(half4); ushort4 __ovld __cnfn convert_ushort4_sat(half4); ushort4 __ovld __cnfn convert_ushort4_sat_rte(half4); ushort4 __ovld __cnfn convert_ushort4_sat_rtp(half4); ushort4 __ovld __cnfn convert_ushort4_sat_rtn(half4); ushort4 __ovld __cnfn convert_ushort4_sat_rtz(half4); ushort8 __ovld __cnfn convert_ushort8(half8); ushort8 __ovld __cnfn convert_ushort8_rte(half8); ushort8 __ovld __cnfn convert_ushort8_rtp(half8); ushort8 __ovld __cnfn convert_ushort8_rtn(half8); ushort8 __ovld __cnfn convert_ushort8_rtz(half8); ushort8 __ovld __cnfn convert_ushort8_sat(half8); ushort8 __ovld __cnfn convert_ushort8_sat_rte(half8); ushort8 __ovld __cnfn convert_ushort8_sat_rtp(half8); ushort8 __ovld __cnfn convert_ushort8_sat_rtn(half8); ushort8 __ovld __cnfn convert_ushort8_sat_rtz(half8); ushort16 __ovld __cnfn convert_ushort16(half16); ushort16 __ovld __cnfn convert_ushort16_rte(half16); ushort16 __ovld __cnfn convert_ushort16_rtp(half16); ushort16 __ovld __cnfn convert_ushort16_rtn(half16); ushort16 __ovld __cnfn convert_ushort16_rtz(half16); ushort16 __ovld __cnfn convert_ushort16_sat(half16); ushort16 __ovld __cnfn convert_ushort16_sat_rte(half16); ushort16 __ovld __cnfn convert_ushort16_sat_rtp(half16); ushort16 __ovld __cnfn convert_ushort16_sat_rtn(half16); ushort16 __ovld __cnfn convert_ushort16_sat_rtz(half16); uint __ovld __cnfn convert_uint(half); uint __ovld __cnfn convert_uint_rte(half); uint __ovld __cnfn convert_uint_rtp(half); uint __ovld __cnfn convert_uint_rtn(half); uint __ovld __cnfn convert_uint_rtz(half); uint __ovld __cnfn convert_uint_sat(half); uint __ovld __cnfn convert_uint_sat_rte(half); uint __ovld __cnfn convert_uint_sat_rtp(half); uint __ovld __cnfn convert_uint_sat_rtn(half); uint __ovld __cnfn convert_uint_sat_rtz(half); uint2 __ovld __cnfn convert_uint2(half2); uint2 __ovld __cnfn convert_uint2_rte(half2); uint2 __ovld __cnfn convert_uint2_rtp(half2); uint2 __ovld __cnfn convert_uint2_rtn(half2); uint2 __ovld __cnfn convert_uint2_rtz(half2); uint2 __ovld __cnfn convert_uint2_sat(half2); uint2 __ovld __cnfn convert_uint2_sat_rte(half2); uint2 __ovld __cnfn convert_uint2_sat_rtp(half2); uint2 __ovld __cnfn convert_uint2_sat_rtn(half2); uint2 __ovld __cnfn convert_uint2_sat_rtz(half2); uint3 __ovld __cnfn convert_uint3(half3); uint3 __ovld __cnfn convert_uint3_rte(half3); uint3 __ovld __cnfn convert_uint3_rtp(half3); uint3 __ovld __cnfn convert_uint3_rtn(half3); uint3 __ovld __cnfn convert_uint3_rtz(half3); uint3 __ovld __cnfn convert_uint3_sat(half3); uint3 __ovld __cnfn convert_uint3_sat_rte(half3); uint3 __ovld __cnfn convert_uint3_sat_rtp(half3); uint3 __ovld __cnfn convert_uint3_sat_rtn(half3); uint3 __ovld __cnfn convert_uint3_sat_rtz(half3); uint4 __ovld __cnfn convert_uint4(half4); uint4 __ovld __cnfn convert_uint4_rte(half4); uint4 __ovld __cnfn convert_uint4_rtp(half4); uint4 __ovld __cnfn convert_uint4_rtn(half4); uint4 __ovld __cnfn convert_uint4_rtz(half4); uint4 __ovld __cnfn convert_uint4_sat(half4); uint4 __ovld __cnfn convert_uint4_sat_rte(half4); uint4 __ovld __cnfn convert_uint4_sat_rtp(half4); uint4 __ovld __cnfn convert_uint4_sat_rtn(half4); uint4 __ovld __cnfn convert_uint4_sat_rtz(half4); uint8 __ovld __cnfn convert_uint8(half8); uint8 __ovld __cnfn convert_uint8_rte(half8); uint8 __ovld __cnfn convert_uint8_rtp(half8); uint8 __ovld __cnfn convert_uint8_rtn(half8); uint8 __ovld __cnfn convert_uint8_rtz(half8); uint8 __ovld __cnfn convert_uint8_sat(half8); uint8 __ovld __cnfn convert_uint8_sat_rte(half8); uint8 __ovld __cnfn convert_uint8_sat_rtp(half8); uint8 __ovld __cnfn convert_uint8_sat_rtn(half8); uint8 __ovld __cnfn convert_uint8_sat_rtz(half8); uint16 __ovld __cnfn convert_uint16(half16); uint16 __ovld __cnfn convert_uint16_rte(half16); uint16 __ovld __cnfn convert_uint16_rtp(half16); uint16 __ovld __cnfn convert_uint16_rtn(half16); uint16 __ovld __cnfn convert_uint16_rtz(half16); uint16 __ovld __cnfn convert_uint16_sat(half16); uint16 __ovld __cnfn convert_uint16_sat_rte(half16); uint16 __ovld __cnfn convert_uint16_sat_rtp(half16); uint16 __ovld __cnfn convert_uint16_sat_rtn(half16); uint16 __ovld __cnfn convert_uint16_sat_rtz(half16); ulong __ovld __cnfn convert_ulong(half); ulong __ovld __cnfn convert_ulong_rte(half); ulong __ovld __cnfn convert_ulong_rtp(half); ulong __ovld __cnfn convert_ulong_rtn(half); ulong __ovld __cnfn convert_ulong_rtz(half); ulong __ovld __cnfn convert_ulong_sat(half); ulong __ovld __cnfn convert_ulong_sat_rte(half); ulong __ovld __cnfn convert_ulong_sat_rtp(half); ulong __ovld __cnfn convert_ulong_sat_rtn(half); ulong __ovld __cnfn convert_ulong_sat_rtz(half); ulong2 __ovld __cnfn convert_ulong2(half2); ulong2 __ovld __cnfn convert_ulong2_rte(half2); ulong2 __ovld __cnfn convert_ulong2_rtp(half2); ulong2 __ovld __cnfn convert_ulong2_rtn(half2); ulong2 __ovld __cnfn convert_ulong2_rtz(half2); ulong2 __ovld __cnfn convert_ulong2_sat(half2); ulong2 __ovld __cnfn convert_ulong2_sat_rte(half2); ulong2 __ovld __cnfn convert_ulong2_sat_rtp(half2); ulong2 __ovld __cnfn convert_ulong2_sat_rtn(half2); ulong2 __ovld __cnfn convert_ulong2_sat_rtz(half2); ulong3 __ovld __cnfn convert_ulong3(half3); ulong3 __ovld __cnfn convert_ulong3_rte(half3); ulong3 __ovld __cnfn convert_ulong3_rtp(half3); ulong3 __ovld __cnfn convert_ulong3_rtn(half3); ulong3 __ovld __cnfn convert_ulong3_rtz(half3); ulong3 __ovld __cnfn convert_ulong3_sat(half3); ulong3 __ovld __cnfn convert_ulong3_sat_rte(half3); ulong3 __ovld __cnfn convert_ulong3_sat_rtp(half3); ulong3 __ovld __cnfn convert_ulong3_sat_rtn(half3); ulong3 __ovld __cnfn convert_ulong3_sat_rtz(half3); ulong4 __ovld __cnfn convert_ulong4(half4); ulong4 __ovld __cnfn convert_ulong4_rte(half4); ulong4 __ovld __cnfn convert_ulong4_rtp(half4); ulong4 __ovld __cnfn convert_ulong4_rtn(half4); ulong4 __ovld __cnfn convert_ulong4_rtz(half4); ulong4 __ovld __cnfn convert_ulong4_sat(half4); ulong4 __ovld __cnfn convert_ulong4_sat_rte(half4); ulong4 __ovld __cnfn convert_ulong4_sat_rtp(half4); ulong4 __ovld __cnfn convert_ulong4_sat_rtn(half4); ulong4 __ovld __cnfn convert_ulong4_sat_rtz(half4); ulong8 __ovld __cnfn convert_ulong8(half8); ulong8 __ovld __cnfn convert_ulong8_rte(half8); ulong8 __ovld __cnfn convert_ulong8_rtp(half8); ulong8 __ovld __cnfn convert_ulong8_rtn(half8); ulong8 __ovld __cnfn convert_ulong8_rtz(half8); ulong8 __ovld __cnfn convert_ulong8_sat(half8); ulong8 __ovld __cnfn convert_ulong8_sat_rte(half8); ulong8 __ovld __cnfn convert_ulong8_sat_rtp(half8); ulong8 __ovld __cnfn convert_ulong8_sat_rtn(half8); ulong8 __ovld __cnfn convert_ulong8_sat_rtz(half8); ulong16 __ovld __cnfn convert_ulong16(half16); ulong16 __ovld __cnfn convert_ulong16_rte(half16); ulong16 __ovld __cnfn convert_ulong16_rtp(half16); ulong16 __ovld __cnfn convert_ulong16_rtn(half16); ulong16 __ovld __cnfn convert_ulong16_rtz(half16); ulong16 __ovld __cnfn convert_ulong16_sat(half16); ulong16 __ovld __cnfn convert_ulong16_sat_rte(half16); ulong16 __ovld __cnfn convert_ulong16_sat_rtp(half16); ulong16 __ovld __cnfn convert_ulong16_sat_rtn(half16); ulong16 __ovld __cnfn convert_ulong16_sat_rtz(half16); char __ovld __cnfn convert_char(half); char __ovld __cnfn convert_char_rte(half); char __ovld __cnfn convert_char_rtp(half); char __ovld __cnfn convert_char_rtn(half); char __ovld __cnfn convert_char_rtz(half); char __ovld __cnfn convert_char_sat(half); char __ovld __cnfn convert_char_sat_rte(half); char __ovld __cnfn convert_char_sat_rtp(half); char __ovld __cnfn convert_char_sat_rtn(half); char __ovld __cnfn convert_char_sat_rtz(half); char2 __ovld __cnfn convert_char2(half2); char2 __ovld __cnfn convert_char2_rte(half2); char2 __ovld __cnfn convert_char2_rtp(half2); char2 __ovld __cnfn convert_char2_rtn(half2); char2 __ovld __cnfn convert_char2_rtz(half2); char2 __ovld __cnfn convert_char2_sat(half2); char2 __ovld __cnfn convert_char2_sat_rte(half2); char2 __ovld __cnfn convert_char2_sat_rtp(half2); char2 __ovld __cnfn convert_char2_sat_rtn(half2); char2 __ovld __cnfn convert_char2_sat_rtz(half2); char3 __ovld __cnfn convert_char3(half3); char3 __ovld __cnfn convert_char3_rte(half3); char3 __ovld __cnfn convert_char3_rtp(half3); char3 __ovld __cnfn convert_char3_rtn(half3); char3 __ovld __cnfn convert_char3_rtz(half3); char3 __ovld __cnfn convert_char3_sat(half3); char3 __ovld __cnfn convert_char3_sat_rte(half3); char3 __ovld __cnfn convert_char3_sat_rtp(half3); char3 __ovld __cnfn convert_char3_sat_rtn(half3); char3 __ovld __cnfn convert_char3_sat_rtz(half3); char4 __ovld __cnfn convert_char4(half4); char4 __ovld __cnfn convert_char4_rte(half4); char4 __ovld __cnfn convert_char4_rtp(half4); char4 __ovld __cnfn convert_char4_rtn(half4); char4 __ovld __cnfn convert_char4_rtz(half4); char4 __ovld __cnfn convert_char4_sat(half4); char4 __ovld __cnfn convert_char4_sat_rte(half4); char4 __ovld __cnfn convert_char4_sat_rtp(half4); char4 __ovld __cnfn convert_char4_sat_rtn(half4); char4 __ovld __cnfn convert_char4_sat_rtz(half4); char8 __ovld __cnfn convert_char8(half8); char8 __ovld __cnfn convert_char8_rte(half8); char8 __ovld __cnfn convert_char8_rtp(half8); char8 __ovld __cnfn convert_char8_rtn(half8); char8 __ovld __cnfn convert_char8_rtz(half8); char8 __ovld __cnfn convert_char8_sat(half8); char8 __ovld __cnfn convert_char8_sat_rte(half8); char8 __ovld __cnfn convert_char8_sat_rtp(half8); char8 __ovld __cnfn convert_char8_sat_rtn(half8); char8 __ovld __cnfn convert_char8_sat_rtz(half8); char16 __ovld __cnfn convert_char16(half16); char16 __ovld __cnfn convert_char16_rte(half16); char16 __ovld __cnfn convert_char16_rtp(half16); char16 __ovld __cnfn convert_char16_rtn(half16); char16 __ovld __cnfn convert_char16_rtz(half16); char16 __ovld __cnfn convert_char16_sat(half16); char16 __ovld __cnfn convert_char16_sat_rte(half16); char16 __ovld __cnfn convert_char16_sat_rtp(half16); char16 __ovld __cnfn convert_char16_sat_rtn(half16); char16 __ovld __cnfn convert_char16_sat_rtz(half16); short __ovld __cnfn convert_short(half); short __ovld __cnfn convert_short_rte(half); short __ovld __cnfn convert_short_rtp(half); short __ovld __cnfn convert_short_rtn(half); short __ovld __cnfn convert_short_rtz(half); short __ovld __cnfn convert_short_sat(half); short __ovld __cnfn convert_short_sat_rte(half); short __ovld __cnfn convert_short_sat_rtp(half); short __ovld __cnfn convert_short_sat_rtn(half); short __ovld __cnfn convert_short_sat_rtz(half); short2 __ovld __cnfn convert_short2(half2); short2 __ovld __cnfn convert_short2_rte(half2); short2 __ovld __cnfn convert_short2_rtp(half2); short2 __ovld __cnfn convert_short2_rtn(half2); short2 __ovld __cnfn convert_short2_rtz(half2); short2 __ovld __cnfn convert_short2_sat(half2); short2 __ovld __cnfn convert_short2_sat_rte(half2); short2 __ovld __cnfn convert_short2_sat_rtp(half2); short2 __ovld __cnfn convert_short2_sat_rtn(half2); short2 __ovld __cnfn convert_short2_sat_rtz(half2); short3 __ovld __cnfn convert_short3(half3); short3 __ovld __cnfn convert_short3_rte(half3); short3 __ovld __cnfn convert_short3_rtp(half3); short3 __ovld __cnfn convert_short3_rtn(half3); short3 __ovld __cnfn convert_short3_rtz(half3); short3 __ovld __cnfn convert_short3_sat(half3); short3 __ovld __cnfn convert_short3_sat_rte(half3); short3 __ovld __cnfn convert_short3_sat_rtp(half3); short3 __ovld __cnfn convert_short3_sat_rtn(half3); short3 __ovld __cnfn convert_short3_sat_rtz(half3); short4 __ovld __cnfn convert_short4(half4); short4 __ovld __cnfn convert_short4_rte(half4); short4 __ovld __cnfn convert_short4_rtp(half4); short4 __ovld __cnfn convert_short4_rtn(half4); short4 __ovld __cnfn convert_short4_rtz(half4); short4 __ovld __cnfn convert_short4_sat(half4); short4 __ovld __cnfn convert_short4_sat_rte(half4); short4 __ovld __cnfn convert_short4_sat_rtp(half4); short4 __ovld __cnfn convert_short4_sat_rtn(half4); short4 __ovld __cnfn convert_short4_sat_rtz(half4); short8 __ovld __cnfn convert_short8(half8); short8 __ovld __cnfn convert_short8_rte(half8); short8 __ovld __cnfn convert_short8_rtp(half8); short8 __ovld __cnfn convert_short8_rtn(half8); short8 __ovld __cnfn convert_short8_rtz(half8); short8 __ovld __cnfn convert_short8_sat(half8); short8 __ovld __cnfn convert_short8_sat_rte(half8); short8 __ovld __cnfn convert_short8_sat_rtp(half8); short8 __ovld __cnfn convert_short8_sat_rtn(half8); short8 __ovld __cnfn convert_short8_sat_rtz(half8); short16 __ovld __cnfn convert_short16(half16); short16 __ovld __cnfn convert_short16_rte(half16); short16 __ovld __cnfn convert_short16_rtp(half16); short16 __ovld __cnfn convert_short16_rtn(half16); short16 __ovld __cnfn convert_short16_rtz(half16); short16 __ovld __cnfn convert_short16_sat(half16); short16 __ovld __cnfn convert_short16_sat_rte(half16); short16 __ovld __cnfn convert_short16_sat_rtp(half16); short16 __ovld __cnfn convert_short16_sat_rtn(half16); short16 __ovld __cnfn convert_short16_sat_rtz(half16); int __ovld __cnfn convert_int(half); int __ovld __cnfn convert_int_rte(half); int __ovld __cnfn convert_int_rtp(half); int __ovld __cnfn convert_int_rtn(half); int __ovld __cnfn convert_int_rtz(half); int __ovld __cnfn convert_int_sat(half); int __ovld __cnfn convert_int_sat_rte(half); int __ovld __cnfn convert_int_sat_rtp(half); int __ovld __cnfn convert_int_sat_rtn(half); int __ovld __cnfn convert_int_sat_rtz(half); int2 __ovld __cnfn convert_int2(half2); int2 __ovld __cnfn convert_int2_rte(half2); int2 __ovld __cnfn convert_int2_rtp(half2); int2 __ovld __cnfn convert_int2_rtn(half2); int2 __ovld __cnfn convert_int2_rtz(half2); int2 __ovld __cnfn convert_int2_sat(half2); int2 __ovld __cnfn convert_int2_sat_rte(half2); int2 __ovld __cnfn convert_int2_sat_rtp(half2); int2 __ovld __cnfn convert_int2_sat_rtn(half2); int2 __ovld __cnfn convert_int2_sat_rtz(half2); int3 __ovld __cnfn convert_int3(half3); int3 __ovld __cnfn convert_int3_rte(half3); int3 __ovld __cnfn convert_int3_rtp(half3); int3 __ovld __cnfn convert_int3_rtn(half3); int3 __ovld __cnfn convert_int3_rtz(half3); int3 __ovld __cnfn convert_int3_sat(half3); int3 __ovld __cnfn convert_int3_sat_rte(half3); int3 __ovld __cnfn convert_int3_sat_rtp(half3); int3 __ovld __cnfn convert_int3_sat_rtn(half3); int3 __ovld __cnfn convert_int3_sat_rtz(half3); int4 __ovld __cnfn convert_int4(half4); int4 __ovld __cnfn convert_int4_rte(half4); int4 __ovld __cnfn convert_int4_rtp(half4); int4 __ovld __cnfn convert_int4_rtn(half4); int4 __ovld __cnfn convert_int4_rtz(half4); int4 __ovld __cnfn convert_int4_sat(half4); int4 __ovld __cnfn convert_int4_sat_rte(half4); int4 __ovld __cnfn convert_int4_sat_rtp(half4); int4 __ovld __cnfn convert_int4_sat_rtn(half4); int4 __ovld __cnfn convert_int4_sat_rtz(half4); int8 __ovld __cnfn convert_int8(half8); int8 __ovld __cnfn convert_int8_rte(half8); int8 __ovld __cnfn convert_int8_rtp(half8); int8 __ovld __cnfn convert_int8_rtn(half8); int8 __ovld __cnfn convert_int8_rtz(half8); int8 __ovld __cnfn convert_int8_sat(half8); int8 __ovld __cnfn convert_int8_sat_rte(half8); int8 __ovld __cnfn convert_int8_sat_rtp(half8); int8 __ovld __cnfn convert_int8_sat_rtn(half8); int8 __ovld __cnfn convert_int8_sat_rtz(half8); int16 __ovld __cnfn convert_int16(half16); int16 __ovld __cnfn convert_int16_rte(half16); int16 __ovld __cnfn convert_int16_rtp(half16); int16 __ovld __cnfn convert_int16_rtn(half16); int16 __ovld __cnfn convert_int16_rtz(half16); int16 __ovld __cnfn convert_int16_sat(half16); int16 __ovld __cnfn convert_int16_sat_rte(half16); int16 __ovld __cnfn convert_int16_sat_rtp(half16); int16 __ovld __cnfn convert_int16_sat_rtn(half16); int16 __ovld __cnfn convert_int16_sat_rtz(half16); long __ovld __cnfn convert_long(half); long __ovld __cnfn convert_long_rte(half); long __ovld __cnfn convert_long_rtp(half); long __ovld __cnfn convert_long_rtn(half); long __ovld __cnfn convert_long_rtz(half); long __ovld __cnfn convert_long_sat(half); long __ovld __cnfn convert_long_sat_rte(half); long __ovld __cnfn convert_long_sat_rtp(half); long __ovld __cnfn convert_long_sat_rtn(half); long __ovld __cnfn convert_long_sat_rtz(half); long2 __ovld __cnfn convert_long2(half2); long2 __ovld __cnfn convert_long2_rte(half2); long2 __ovld __cnfn convert_long2_rtp(half2); long2 __ovld __cnfn convert_long2_rtn(half2); long2 __ovld __cnfn convert_long2_rtz(half2); long2 __ovld __cnfn convert_long2_sat(half2); long2 __ovld __cnfn convert_long2_sat_rte(half2); long2 __ovld __cnfn convert_long2_sat_rtp(half2); long2 __ovld __cnfn convert_long2_sat_rtn(half2); long2 __ovld __cnfn convert_long2_sat_rtz(half2); long3 __ovld __cnfn convert_long3(half3); long3 __ovld __cnfn convert_long3_rte(half3); long3 __ovld __cnfn convert_long3_rtp(half3); long3 __ovld __cnfn convert_long3_rtn(half3); long3 __ovld __cnfn convert_long3_rtz(half3); long3 __ovld __cnfn convert_long3_sat(half3); long3 __ovld __cnfn convert_long3_sat_rte(half3); long3 __ovld __cnfn convert_long3_sat_rtp(half3); long3 __ovld __cnfn convert_long3_sat_rtn(half3); long3 __ovld __cnfn convert_long3_sat_rtz(half3); long4 __ovld __cnfn convert_long4(half4); long4 __ovld __cnfn convert_long4_rte(half4); long4 __ovld __cnfn convert_long4_rtp(half4); long4 __ovld __cnfn convert_long4_rtn(half4); long4 __ovld __cnfn convert_long4_rtz(half4); long4 __ovld __cnfn convert_long4_sat(half4); long4 __ovld __cnfn convert_long4_sat_rte(half4); long4 __ovld __cnfn convert_long4_sat_rtp(half4); long4 __ovld __cnfn convert_long4_sat_rtn(half4); long4 __ovld __cnfn convert_long4_sat_rtz(half4); long8 __ovld __cnfn convert_long8(half8); long8 __ovld __cnfn convert_long8_rte(half8); long8 __ovld __cnfn convert_long8_rtp(half8); long8 __ovld __cnfn convert_long8_rtn(half8); long8 __ovld __cnfn convert_long8_rtz(half8); long8 __ovld __cnfn convert_long8_sat(half8); long8 __ovld __cnfn convert_long8_sat_rte(half8); long8 __ovld __cnfn convert_long8_sat_rtp(half8); long8 __ovld __cnfn convert_long8_sat_rtn(half8); long8 __ovld __cnfn convert_long8_sat_rtz(half8); long16 __ovld __cnfn convert_long16(half16); long16 __ovld __cnfn convert_long16_rte(half16); long16 __ovld __cnfn convert_long16_rtp(half16); long16 __ovld __cnfn convert_long16_rtn(half16); long16 __ovld __cnfn convert_long16_rtz(half16); long16 __ovld __cnfn convert_long16_sat(half16); long16 __ovld __cnfn convert_long16_sat_rte(half16); long16 __ovld __cnfn convert_long16_sat_rtp(half16); long16 __ovld __cnfn convert_long16_sat_rtn(half16); long16 __ovld __cnfn convert_long16_sat_rtz(half16); float __ovld __cnfn convert_float(half); float __ovld __cnfn convert_float_rte(half); float __ovld __cnfn convert_float_rtp(half); float __ovld __cnfn convert_float_rtn(half); float __ovld __cnfn convert_float_rtz(half); float2 __ovld __cnfn convert_float2(half2); float2 __ovld __cnfn convert_float2_rte(half2); float2 __ovld __cnfn convert_float2_rtp(half2); float2 __ovld __cnfn convert_float2_rtn(half2); float2 __ovld __cnfn convert_float2_rtz(half2); float3 __ovld __cnfn convert_float3(half3); float3 __ovld __cnfn convert_float3_rte(half3); float3 __ovld __cnfn convert_float3_rtp(half3); float3 __ovld __cnfn convert_float3_rtn(half3); float3 __ovld __cnfn convert_float3_rtz(half3); float4 __ovld __cnfn convert_float4(half4); float4 __ovld __cnfn convert_float4_rte(half4); float4 __ovld __cnfn convert_float4_rtp(half4); float4 __ovld __cnfn convert_float4_rtn(half4); float4 __ovld __cnfn convert_float4_rtz(half4); float8 __ovld __cnfn convert_float8(half8); float8 __ovld __cnfn convert_float8_rte(half8); float8 __ovld __cnfn convert_float8_rtp(half8); float8 __ovld __cnfn convert_float8_rtn(half8); float8 __ovld __cnfn convert_float8_rtz(half8); float16 __ovld __cnfn convert_float16(half16); float16 __ovld __cnfn convert_float16_rte(half16); float16 __ovld __cnfn convert_float16_rtp(half16); float16 __ovld __cnfn convert_float16_rtn(half16); float16 __ovld __cnfn convert_float16_rtz(half16); // Convert non-double types to half types. half __ovld __cnfn convert_half(uchar); half __ovld __cnfn convert_half(ushort); half __ovld __cnfn convert_half(uint); half __ovld __cnfn convert_half(ulong); half __ovld __cnfn convert_half(char); half __ovld __cnfn convert_half(short); half __ovld __cnfn convert_half(int); half __ovld __cnfn convert_half(long); half __ovld __cnfn convert_half(float); half __ovld __cnfn convert_half(half); half __ovld __cnfn convert_half_rte(uchar); half __ovld __cnfn convert_half_rte(ushort); half __ovld __cnfn convert_half_rte(uint); half __ovld __cnfn convert_half_rte(ulong); half __ovld __cnfn convert_half_rte(char); half __ovld __cnfn convert_half_rte(short); half __ovld __cnfn convert_half_rte(int); half __ovld __cnfn convert_half_rte(long); half __ovld __cnfn convert_half_rte(float); half __ovld __cnfn convert_half_rte(half); half __ovld __cnfn convert_half_rtp(uchar); half __ovld __cnfn convert_half_rtp(ushort); half __ovld __cnfn convert_half_rtp(uint); half __ovld __cnfn convert_half_rtp(ulong); half __ovld __cnfn convert_half_rtp(char); half __ovld __cnfn convert_half_rtp(short); half __ovld __cnfn convert_half_rtp(int); half __ovld __cnfn convert_half_rtp(long); half __ovld __cnfn convert_half_rtp(float); half __ovld __cnfn convert_half_rtp(half); half __ovld __cnfn convert_half_rtn(uchar); half __ovld __cnfn convert_half_rtn(ushort); half __ovld __cnfn convert_half_rtn(uint); half __ovld __cnfn convert_half_rtn(ulong); half __ovld __cnfn convert_half_rtn(char); half __ovld __cnfn convert_half_rtn(short); half __ovld __cnfn convert_half_rtn(int); half __ovld __cnfn convert_half_rtn(long); half __ovld __cnfn convert_half_rtn(float); half __ovld __cnfn convert_half_rtn(half); half __ovld __cnfn convert_half_rtz(uchar); half __ovld __cnfn convert_half_rtz(ushort); half __ovld __cnfn convert_half_rtz(uint); half __ovld __cnfn convert_half_rtz(ulong); half __ovld __cnfn convert_half_rtz(char); half __ovld __cnfn convert_half_rtz(short); half __ovld __cnfn convert_half_rtz(int); half __ovld __cnfn convert_half_rtz(long); half __ovld __cnfn convert_half_rtz(float); half __ovld __cnfn convert_half_rtz(half); half2 __ovld __cnfn convert_half2(char2); half2 __ovld __cnfn convert_half2(uchar2); half2 __ovld __cnfn convert_half2(short2); half2 __ovld __cnfn convert_half2(ushort2); half2 __ovld __cnfn convert_half2(int2); half2 __ovld __cnfn convert_half2(uint2); half2 __ovld __cnfn convert_half2(long2); half2 __ovld __cnfn convert_half2(ulong2); half2 __ovld __cnfn convert_half2(float2); half2 __ovld __cnfn convert_half2(half2); half2 __ovld __cnfn convert_half2_rte(char2); half2 __ovld __cnfn convert_half2_rte(uchar2); half2 __ovld __cnfn convert_half2_rte(short2); half2 __ovld __cnfn convert_half2_rte(ushort2); half2 __ovld __cnfn convert_half2_rte(int2); half2 __ovld __cnfn convert_half2_rte(uint2); half2 __ovld __cnfn convert_half2_rte(long2); half2 __ovld __cnfn convert_half2_rte(ulong2); half2 __ovld __cnfn convert_half2_rte(float2); half2 __ovld __cnfn convert_half2_rte(half2); half2 __ovld __cnfn convert_half2_rtp(char2); half2 __ovld __cnfn convert_half2_rtp(uchar2); half2 __ovld __cnfn convert_half2_rtp(short2); half2 __ovld __cnfn convert_half2_rtp(ushort2); half2 __ovld __cnfn convert_half2_rtp(int2); half2 __ovld __cnfn convert_half2_rtp(uint2); half2 __ovld __cnfn convert_half2_rtp(long2); half2 __ovld __cnfn convert_half2_rtp(ulong2); half2 __ovld __cnfn convert_half2_rtp(float2); half2 __ovld __cnfn convert_half2_rtp(half2); half2 __ovld __cnfn convert_half2_rtn(char2); half2 __ovld __cnfn convert_half2_rtn(uchar2); half2 __ovld __cnfn convert_half2_rtn(short2); half2 __ovld __cnfn convert_half2_rtn(ushort2); half2 __ovld __cnfn convert_half2_rtn(int2); half2 __ovld __cnfn convert_half2_rtn(uint2); half2 __ovld __cnfn convert_half2_rtn(long2); half2 __ovld __cnfn convert_half2_rtn(ulong2); half2 __ovld __cnfn convert_half2_rtn(float2); half2 __ovld __cnfn convert_half2_rtn(half2); half2 __ovld __cnfn convert_half2_rtz(char2); half2 __ovld __cnfn convert_half2_rtz(uchar2); half2 __ovld __cnfn convert_half2_rtz(short2); half2 __ovld __cnfn convert_half2_rtz(ushort2); half2 __ovld __cnfn convert_half2_rtz(int2); half2 __ovld __cnfn convert_half2_rtz(uint2); half2 __ovld __cnfn convert_half2_rtz(long2); half2 __ovld __cnfn convert_half2_rtz(ulong2); half2 __ovld __cnfn convert_half2_rtz(float2); half2 __ovld __cnfn convert_half2_rtz(half2); half3 __ovld __cnfn convert_half3(char3); half3 __ovld __cnfn convert_half3(uchar3); half3 __ovld __cnfn convert_half3(short3); half3 __ovld __cnfn convert_half3(ushort3); half3 __ovld __cnfn convert_half3(int3); half3 __ovld __cnfn convert_half3(uint3); half3 __ovld __cnfn convert_half3(long3); half3 __ovld __cnfn convert_half3(ulong3); half3 __ovld __cnfn convert_half3(float3); half3 __ovld __cnfn convert_half3(half3); half3 __ovld __cnfn convert_half3_rte(char3); half3 __ovld __cnfn convert_half3_rte(uchar3); half3 __ovld __cnfn convert_half3_rte(short3); half3 __ovld __cnfn convert_half3_rte(ushort3); half3 __ovld __cnfn convert_half3_rte(int3); half3 __ovld __cnfn convert_half3_rte(uint3); half3 __ovld __cnfn convert_half3_rte(long3); half3 __ovld __cnfn convert_half3_rte(ulong3); half3 __ovld __cnfn convert_half3_rte(float3); half3 __ovld __cnfn convert_half3_rte(half3); half3 __ovld __cnfn convert_half3_rtp(char3); half3 __ovld __cnfn convert_half3_rtp(uchar3); half3 __ovld __cnfn convert_half3_rtp(short3); half3 __ovld __cnfn convert_half3_rtp(ushort3); half3 __ovld __cnfn convert_half3_rtp(int3); half3 __ovld __cnfn convert_half3_rtp(uint3); half3 __ovld __cnfn convert_half3_rtp(long3); half3 __ovld __cnfn convert_half3_rtp(ulong3); half3 __ovld __cnfn convert_half3_rtp(float3); half3 __ovld __cnfn convert_half3_rtp(half3); half3 __ovld __cnfn convert_half3_rtn(char3); half3 __ovld __cnfn convert_half3_rtn(uchar3); half3 __ovld __cnfn convert_half3_rtn(short3); half3 __ovld __cnfn convert_half3_rtn(ushort3); half3 __ovld __cnfn convert_half3_rtn(int3); half3 __ovld __cnfn convert_half3_rtn(uint3); half3 __ovld __cnfn convert_half3_rtn(long3); half3 __ovld __cnfn convert_half3_rtn(ulong3); half3 __ovld __cnfn convert_half3_rtn(float3); half3 __ovld __cnfn convert_half3_rtn(half3); half3 __ovld __cnfn convert_half3_rtz(char3); half3 __ovld __cnfn convert_half3_rtz(uchar3); half3 __ovld __cnfn convert_half3_rtz(short3); half3 __ovld __cnfn convert_half3_rtz(ushort3); half3 __ovld __cnfn convert_half3_rtz(int3); half3 __ovld __cnfn convert_half3_rtz(uint3); half3 __ovld __cnfn convert_half3_rtz(long3); half3 __ovld __cnfn convert_half3_rtz(ulong3); half3 __ovld __cnfn convert_half3_rtz(float3); half3 __ovld __cnfn convert_half3_rtz(half3); half4 __ovld __cnfn convert_half4(char4); half4 __ovld __cnfn convert_half4(uchar4); half4 __ovld __cnfn convert_half4(short4); half4 __ovld __cnfn convert_half4(ushort4); half4 __ovld __cnfn convert_half4(int4); half4 __ovld __cnfn convert_half4(uint4); half4 __ovld __cnfn convert_half4(long4); half4 __ovld __cnfn convert_half4(ulong4); half4 __ovld __cnfn convert_half4(float4); half4 __ovld __cnfn convert_half4(half4); half4 __ovld __cnfn convert_half4_rte(char4); half4 __ovld __cnfn convert_half4_rte(uchar4); half4 __ovld __cnfn convert_half4_rte(short4); half4 __ovld __cnfn convert_half4_rte(ushort4); half4 __ovld __cnfn convert_half4_rte(int4); half4 __ovld __cnfn convert_half4_rte(uint4); half4 __ovld __cnfn convert_half4_rte(long4); half4 __ovld __cnfn convert_half4_rte(ulong4); half4 __ovld __cnfn convert_half4_rte(float4); half4 __ovld __cnfn convert_half4_rte(half4); half4 __ovld __cnfn convert_half4_rtp(char4); half4 __ovld __cnfn convert_half4_rtp(uchar4); half4 __ovld __cnfn convert_half4_rtp(short4); half4 __ovld __cnfn convert_half4_rtp(ushort4); half4 __ovld __cnfn convert_half4_rtp(int4); half4 __ovld __cnfn convert_half4_rtp(uint4); half4 __ovld __cnfn convert_half4_rtp(long4); half4 __ovld __cnfn convert_half4_rtp(ulong4); half4 __ovld __cnfn convert_half4_rtp(float4); half4 __ovld __cnfn convert_half4_rtp(half4); half4 __ovld __cnfn convert_half4_rtn(char4); half4 __ovld __cnfn convert_half4_rtn(uchar4); half4 __ovld __cnfn convert_half4_rtn(short4); half4 __ovld __cnfn convert_half4_rtn(ushort4); half4 __ovld __cnfn convert_half4_rtn(int4); half4 __ovld __cnfn convert_half4_rtn(uint4); half4 __ovld __cnfn convert_half4_rtn(long4); half4 __ovld __cnfn convert_half4_rtn(ulong4); half4 __ovld __cnfn convert_half4_rtn(float4); half4 __ovld __cnfn convert_half4_rtn(half4); half4 __ovld __cnfn convert_half4_rtz(char4); half4 __ovld __cnfn convert_half4_rtz(uchar4); half4 __ovld __cnfn convert_half4_rtz(short4); half4 __ovld __cnfn convert_half4_rtz(ushort4); half4 __ovld __cnfn convert_half4_rtz(int4); half4 __ovld __cnfn convert_half4_rtz(uint4); half4 __ovld __cnfn convert_half4_rtz(long4); half4 __ovld __cnfn convert_half4_rtz(ulong4); half4 __ovld __cnfn convert_half4_rtz(float4); half4 __ovld __cnfn convert_half4_rtz(half4); half8 __ovld __cnfn convert_half8(char8); half8 __ovld __cnfn convert_half8(uchar8); half8 __ovld __cnfn convert_half8(short8); half8 __ovld __cnfn convert_half8(ushort8); half8 __ovld __cnfn convert_half8(int8); half8 __ovld __cnfn convert_half8(uint8); half8 __ovld __cnfn convert_half8(long8); half8 __ovld __cnfn convert_half8(ulong8); half8 __ovld __cnfn convert_half8(float8); half8 __ovld __cnfn convert_half8(half8); half8 __ovld __cnfn convert_half8_rte(char8); half8 __ovld __cnfn convert_half8_rte(uchar8); half8 __ovld __cnfn convert_half8_rte(short8); half8 __ovld __cnfn convert_half8_rte(ushort8); half8 __ovld __cnfn convert_half8_rte(int8); half8 __ovld __cnfn convert_half8_rte(uint8); half8 __ovld __cnfn convert_half8_rte(long8); half8 __ovld __cnfn convert_half8_rte(ulong8); half8 __ovld __cnfn convert_half8_rte(float8); half8 __ovld __cnfn convert_half8_rte(half8); half8 __ovld __cnfn convert_half8_rtp(char8); half8 __ovld __cnfn convert_half8_rtp(uchar8); half8 __ovld __cnfn convert_half8_rtp(short8); half8 __ovld __cnfn convert_half8_rtp(ushort8); half8 __ovld __cnfn convert_half8_rtp(int8); half8 __ovld __cnfn convert_half8_rtp(uint8); half8 __ovld __cnfn convert_half8_rtp(long8); half8 __ovld __cnfn convert_half8_rtp(ulong8); half8 __ovld __cnfn convert_half8_rtp(float8); half8 __ovld __cnfn convert_half8_rtp(half8); half8 __ovld __cnfn convert_half8_rtn(char8); half8 __ovld __cnfn convert_half8_rtn(uchar8); half8 __ovld __cnfn convert_half8_rtn(short8); half8 __ovld __cnfn convert_half8_rtn(ushort8); half8 __ovld __cnfn convert_half8_rtn(int8); half8 __ovld __cnfn convert_half8_rtn(uint8); half8 __ovld __cnfn convert_half8_rtn(long8); half8 __ovld __cnfn convert_half8_rtn(ulong8); half8 __ovld __cnfn convert_half8_rtn(float8); half8 __ovld __cnfn convert_half8_rtn(half8); half8 __ovld __cnfn convert_half8_rtz(char8); half8 __ovld __cnfn convert_half8_rtz(uchar8); half8 __ovld __cnfn convert_half8_rtz(short8); half8 __ovld __cnfn convert_half8_rtz(ushort8); half8 __ovld __cnfn convert_half8_rtz(int8); half8 __ovld __cnfn convert_half8_rtz(uint8); half8 __ovld __cnfn convert_half8_rtz(long8); half8 __ovld __cnfn convert_half8_rtz(ulong8); half8 __ovld __cnfn convert_half8_rtz(float8); half8 __ovld __cnfn convert_half8_rtz(half8); half16 __ovld __cnfn convert_half16(char16); half16 __ovld __cnfn convert_half16(uchar16); half16 __ovld __cnfn convert_half16(short16); half16 __ovld __cnfn convert_half16(ushort16); half16 __ovld __cnfn convert_half16(int16); half16 __ovld __cnfn convert_half16(uint16); half16 __ovld __cnfn convert_half16(long16); half16 __ovld __cnfn convert_half16(ulong16); half16 __ovld __cnfn convert_half16(float16); half16 __ovld __cnfn convert_half16(half16); half16 __ovld __cnfn convert_half16_rte(char16); half16 __ovld __cnfn convert_half16_rte(uchar16); half16 __ovld __cnfn convert_half16_rte(short16); half16 __ovld __cnfn convert_half16_rte(ushort16); half16 __ovld __cnfn convert_half16_rte(int16); half16 __ovld __cnfn convert_half16_rte(uint16); half16 __ovld __cnfn convert_half16_rte(long16); half16 __ovld __cnfn convert_half16_rte(ulong16); half16 __ovld __cnfn convert_half16_rte(float16); half16 __ovld __cnfn convert_half16_rte(half16); half16 __ovld __cnfn convert_half16_rtp(char16); half16 __ovld __cnfn convert_half16_rtp(uchar16); half16 __ovld __cnfn convert_half16_rtp(short16); half16 __ovld __cnfn convert_half16_rtp(ushort16); half16 __ovld __cnfn convert_half16_rtp(int16); half16 __ovld __cnfn convert_half16_rtp(uint16); half16 __ovld __cnfn convert_half16_rtp(long16); half16 __ovld __cnfn convert_half16_rtp(ulong16); half16 __ovld __cnfn convert_half16_rtp(float16); half16 __ovld __cnfn convert_half16_rtp(half16); half16 __ovld __cnfn convert_half16_rtn(char16); half16 __ovld __cnfn convert_half16_rtn(uchar16); half16 __ovld __cnfn convert_half16_rtn(short16); half16 __ovld __cnfn convert_half16_rtn(ushort16); half16 __ovld __cnfn convert_half16_rtn(int16); half16 __ovld __cnfn convert_half16_rtn(uint16); half16 __ovld __cnfn convert_half16_rtn(long16); half16 __ovld __cnfn convert_half16_rtn(ulong16); half16 __ovld __cnfn convert_half16_rtn(float16); half16 __ovld __cnfn convert_half16_rtn(half16); half16 __ovld __cnfn convert_half16_rtz(char16); half16 __ovld __cnfn convert_half16_rtz(uchar16); half16 __ovld __cnfn convert_half16_rtz(short16); half16 __ovld __cnfn convert_half16_rtz(ushort16); half16 __ovld __cnfn convert_half16_rtz(int16); half16 __ovld __cnfn convert_half16_rtz(uint16); half16 __ovld __cnfn convert_half16_rtz(long16); half16 __ovld __cnfn convert_half16_rtz(ulong16); half16 __ovld __cnfn convert_half16_rtz(float16); half16 __ovld __cnfn convert_half16_rtz(half16); // Convert half types to double types. #ifdef cl_khr_fp64 double __ovld __cnfn convert_double(half); double __ovld __cnfn convert_double_rte(half); double __ovld __cnfn convert_double_rtp(half); double __ovld __cnfn convert_double_rtn(half); double __ovld __cnfn convert_double_rtz(half); double2 __ovld __cnfn convert_double2(half2); double2 __ovld __cnfn convert_double2_rte(half2); double2 __ovld __cnfn convert_double2_rtp(half2); double2 __ovld __cnfn convert_double2_rtn(half2); double2 __ovld __cnfn convert_double2_rtz(half2); double3 __ovld __cnfn convert_double3(half3); double3 __ovld __cnfn convert_double3_rte(half3); double3 __ovld __cnfn convert_double3_rtp(half3); double3 __ovld __cnfn convert_double3_rtn(half3); double3 __ovld __cnfn convert_double3_rtz(half3); double4 __ovld __cnfn convert_double4(half4); double4 __ovld __cnfn convert_double4_rte(half4); double4 __ovld __cnfn convert_double4_rtp(half4); double4 __ovld __cnfn convert_double4_rtn(half4); double4 __ovld __cnfn convert_double4_rtz(half4); double8 __ovld __cnfn convert_double8(half8); double8 __ovld __cnfn convert_double8_rte(half8); double8 __ovld __cnfn convert_double8_rtp(half8); double8 __ovld __cnfn convert_double8_rtn(half8); double8 __ovld __cnfn convert_double8_rtz(half8); double16 __ovld __cnfn convert_double16(half16); double16 __ovld __cnfn convert_double16_rte(half16); double16 __ovld __cnfn convert_double16_rtp(half16); double16 __ovld __cnfn convert_double16_rtn(half16); double16 __ovld __cnfn convert_double16_rtz(half16); // Convert double types to half types. half __ovld __cnfn convert_half(double); half __ovld __cnfn convert_half_rte(double); half __ovld __cnfn convert_half_rtp(double); half __ovld __cnfn convert_half_rtn(double); half __ovld __cnfn convert_half_rtz(double); half2 __ovld __cnfn convert_half2(double2); half2 __ovld __cnfn convert_half2_rte(double2); half2 __ovld __cnfn convert_half2_rtp(double2); half2 __ovld __cnfn convert_half2_rtn(double2); half2 __ovld __cnfn convert_half2_rtz(double2); half3 __ovld __cnfn convert_half3(double3); half3 __ovld __cnfn convert_half3_rte(double3); half3 __ovld __cnfn convert_half3_rtp(double3); half3 __ovld __cnfn convert_half3_rtn(double3); half3 __ovld __cnfn convert_half3_rtz(double3); half4 __ovld __cnfn convert_half4(double4); half4 __ovld __cnfn convert_half4_rte(double4); half4 __ovld __cnfn convert_half4_rtp(double4); half4 __ovld __cnfn convert_half4_rtn(double4); half4 __ovld __cnfn convert_half4_rtz(double4); half8 __ovld __cnfn convert_half8(double8); half8 __ovld __cnfn convert_half8_rte(double8); half8 __ovld __cnfn convert_half8_rtp(double8); half8 __ovld __cnfn convert_half8_rtn(double8); half8 __ovld __cnfn convert_half8_rtz(double8); half16 __ovld __cnfn convert_half16(double16); half16 __ovld __cnfn convert_half16_rte(double16); half16 __ovld __cnfn convert_half16_rtp(double16); half16 __ovld __cnfn convert_half16_rtn(double16); half16 __ovld __cnfn convert_half16_rtz(double16); #endif //cl_khr_fp64 #endif // cl_khr_fp16 // OpenCL v1.1 s6.11.1, v1.2 s6.12.1, v2.0 s6.13.1 - Work-item Functions /** * Returns the number of dimensions in use. This is the * value given to the work_dim argument specified in * clEnqueueNDRangeKernel. * For clEnqueueTask, this returns 1. */ uint __ovld __cnfn get_work_dim(void); /** * Returns the number of global work-items specified for * dimension identified by dimindx. This value is given by * the global_work_size argument to * clEnqueueNDRangeKernel. Valid values of dimindx * are 0 to get_work_dim() - 1. For other values of * dimindx, get_global_size() returns 1. * For clEnqueueTask, this always returns 1. */ size_t __ovld __cnfn get_global_size(uint); /** * Returns the unique global work-item ID value for * dimension identified by dimindx. The global work-item * ID specifies the work-item ID based on the number of * global work-items specified to execute the kernel. Valid * values of dimindx are 0 to get_work_dim() - 1. For * other values of dimindx, get_global_id() returns 0. * For clEnqueueTask, this returns 0. */ size_t __ovld __cnfn get_global_id(uint); /** * Returns the number of local work-items specified in * dimension identified by dimindx. This value is given by * the local_work_size argument to * clEnqueueNDRangeKernel if local_work_size is not * NULL; otherwise the OpenCL implementation chooses * an appropriate local_work_size value which is returned * by this function. Valid values of dimindx are 0 to * get_work_dim() - 1. For other values of dimindx, * get_local_size() returns 1. * For clEnqueueTask, this always returns 1. */ size_t __ovld __cnfn get_local_size(uint); /** * Returns the unique local work-item ID i.e. a work-item * within a specific work-group for dimension identified by * dimindx. Valid values of dimindx are 0 to * get_work_dim() - 1. For other values of dimindx, * get_local_id() returns 0. * For clEnqueueTask, this returns 0. */ size_t __ovld __cnfn get_local_id(uint); /** * Returns the number of work-groups that will execute a * kernel for dimension identified by dimindx. * Valid values of dimindx are 0 to get_work_dim() - 1. * For other values of dimindx, get_num_groups() returns 1. * For clEnqueueTask, this always returns 1. */ size_t __ovld __cnfn get_num_groups(uint); /** * get_group_id returns the work-group ID which is a * number from 0 .. get_num_groups(dimindx) - 1. * Valid values of dimindx are 0 to get_work_dim() - 1. * For other values, get_group_id() returns 0. * For clEnqueueTask, this returns 0. */ size_t __ovld __cnfn get_group_id(uint); /** * get_global_offset returns the offset values specified in * global_work_offset argument to * clEnqueueNDRangeKernel. * Valid values of dimindx are 0 to get_work_dim() - 1. * For other values, get_global_offset() returns 0. * For clEnqueueTask, this returns 0. */ size_t __ovld __cnfn get_global_offset(uint); #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) size_t __ovld get_enqueued_local_size(uint); size_t __ovld get_global_linear_id(void); size_t __ovld get_local_linear_id(void); #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) // OpenCL v1.1 s6.11.2, v1.2 s6.12.2, v2.0 s6.13.2 - Math functions /** * Arc cosine function. */ float __ovld __cnfn acos(float); float2 __ovld __cnfn acos(float2); float3 __ovld __cnfn acos(float3); float4 __ovld __cnfn acos(float4); float8 __ovld __cnfn acos(float8); float16 __ovld __cnfn acos(float16); #ifdef cl_khr_fp64 double __ovld __cnfn acos(double); double2 __ovld __cnfn acos(double2); double3 __ovld __cnfn acos(double3); double4 __ovld __cnfn acos(double4); double8 __ovld __cnfn acos(double8); double16 __ovld __cnfn acos(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn acos(half); half2 __ovld __cnfn acos(half2); half3 __ovld __cnfn acos(half3); half4 __ovld __cnfn acos(half4); half8 __ovld __cnfn acos(half8); half16 __ovld __cnfn acos(half16); #endif //cl_khr_fp16 /** * Inverse hyperbolic cosine. */ float __ovld __cnfn acosh(float); float2 __ovld __cnfn acosh(float2); float3 __ovld __cnfn acosh(float3); float4 __ovld __cnfn acosh(float4); float8 __ovld __cnfn acosh(float8); float16 __ovld __cnfn acosh(float16); #ifdef cl_khr_fp64 double __ovld __cnfn acosh(double); double2 __ovld __cnfn acosh(double2); double3 __ovld __cnfn acosh(double3); double4 __ovld __cnfn acosh(double4); double8 __ovld __cnfn acosh(double8); double16 __ovld __cnfn acosh(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn acosh(half); half2 __ovld __cnfn acosh(half2); half3 __ovld __cnfn acosh(half3); half4 __ovld __cnfn acosh(half4); half8 __ovld __cnfn acosh(half8); half16 __ovld __cnfn acosh(half16); #endif //cl_khr_fp16 /** * Compute acos (x) / PI. */ float __ovld __cnfn acospi(float); float2 __ovld __cnfn acospi(float2); float3 __ovld __cnfn acospi(float3); float4 __ovld __cnfn acospi(float4); float8 __ovld __cnfn acospi(float8); float16 __ovld __cnfn acospi(float16); #ifdef cl_khr_fp64 double __ovld __cnfn acospi(double); double2 __ovld __cnfn acospi(double2); double3 __ovld __cnfn acospi(double3); double4 __ovld __cnfn acospi(double4); double8 __ovld __cnfn acospi(double8); double16 __ovld __cnfn acospi(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn acospi(half); half2 __ovld __cnfn acospi(half2); half3 __ovld __cnfn acospi(half3); half4 __ovld __cnfn acospi(half4); half8 __ovld __cnfn acospi(half8); half16 __ovld __cnfn acospi(half16); #endif //cl_khr_fp16 /** * Arc sine function. */ float __ovld __cnfn asin(float); float2 __ovld __cnfn asin(float2); float3 __ovld __cnfn asin(float3); float4 __ovld __cnfn asin(float4); float8 __ovld __cnfn asin(float8); float16 __ovld __cnfn asin(float16); #ifdef cl_khr_fp64 double __ovld __cnfn asin(double); double2 __ovld __cnfn asin(double2); double3 __ovld __cnfn asin(double3); double4 __ovld __cnfn asin(double4); double8 __ovld __cnfn asin(double8); double16 __ovld __cnfn asin(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn asin(half); half2 __ovld __cnfn asin(half2); half3 __ovld __cnfn asin(half3); half4 __ovld __cnfn asin(half4); half8 __ovld __cnfn asin(half8); half16 __ovld __cnfn asin(half16); #endif //cl_khr_fp16 /** * Inverse hyperbolic sine. */ float __ovld __cnfn asinh(float); float2 __ovld __cnfn asinh(float2); float3 __ovld __cnfn asinh(float3); float4 __ovld __cnfn asinh(float4); float8 __ovld __cnfn asinh(float8); float16 __ovld __cnfn asinh(float16); #ifdef cl_khr_fp64 double __ovld __cnfn asinh(double); double2 __ovld __cnfn asinh(double2); double3 __ovld __cnfn asinh(double3); double4 __ovld __cnfn asinh(double4); double8 __ovld __cnfn asinh(double8); double16 __ovld __cnfn asinh(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn asinh(half); half2 __ovld __cnfn asinh(half2); half3 __ovld __cnfn asinh(half3); half4 __ovld __cnfn asinh(half4); half8 __ovld __cnfn asinh(half8); half16 __ovld __cnfn asinh(half16); #endif //cl_khr_fp16 /** * Compute asin (x) / PI. */ float __ovld __cnfn asinpi(float); float2 __ovld __cnfn asinpi(float2); float3 __ovld __cnfn asinpi(float3); float4 __ovld __cnfn asinpi(float4); float8 __ovld __cnfn asinpi(float8); float16 __ovld __cnfn asinpi(float16); #ifdef cl_khr_fp64 double __ovld __cnfn asinpi(double); double2 __ovld __cnfn asinpi(double2); double3 __ovld __cnfn asinpi(double3); double4 __ovld __cnfn asinpi(double4); double8 __ovld __cnfn asinpi(double8); double16 __ovld __cnfn asinpi(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn asinpi(half); half2 __ovld __cnfn asinpi(half2); half3 __ovld __cnfn asinpi(half3); half4 __ovld __cnfn asinpi(half4); half8 __ovld __cnfn asinpi(half8); half16 __ovld __cnfn asinpi(half16); #endif //cl_khr_fp16 /** * Arc tangent function. */ float __ovld __cnfn atan(float); float2 __ovld __cnfn atan(float2); float3 __ovld __cnfn atan(float3); float4 __ovld __cnfn atan(float4); float8 __ovld __cnfn atan(float8); float16 __ovld __cnfn atan(float16); #ifdef cl_khr_fp64 double __ovld __cnfn atan(double); double2 __ovld __cnfn atan(double2); double3 __ovld __cnfn atan(double3); double4 __ovld __cnfn atan(double4); double8 __ovld __cnfn atan(double8); double16 __ovld __cnfn atan(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn atan(half); half2 __ovld __cnfn atan(half2); half3 __ovld __cnfn atan(half3); half4 __ovld __cnfn atan(half4); half8 __ovld __cnfn atan(half8); half16 __ovld __cnfn atan(half16); #endif //cl_khr_fp16 /** * Arc tangent of y / x. */ float __ovld __cnfn atan2(float, float); float2 __ovld __cnfn atan2(float2, float2); float3 __ovld __cnfn atan2(float3, float3); float4 __ovld __cnfn atan2(float4, float4); float8 __ovld __cnfn atan2(float8, float8); float16 __ovld __cnfn atan2(float16, float16); #ifdef cl_khr_fp64 double __ovld __cnfn atan2(double, double); double2 __ovld __cnfn atan2(double2, double2); double3 __ovld __cnfn atan2(double3, double3); double4 __ovld __cnfn atan2(double4, double4); double8 __ovld __cnfn atan2(double8, double8); double16 __ovld __cnfn atan2(double16, double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn atan2(half, half); half2 __ovld __cnfn atan2(half2, half2); half3 __ovld __cnfn atan2(half3, half3); half4 __ovld __cnfn atan2(half4, half4); half8 __ovld __cnfn atan2(half8, half8); half16 __ovld __cnfn atan2(half16, half16); #endif //cl_khr_fp16 /** * Hyperbolic arc tangent. */ float __ovld __cnfn atanh(float); float2 __ovld __cnfn atanh(float2); float3 __ovld __cnfn atanh(float3); float4 __ovld __cnfn atanh(float4); float8 __ovld __cnfn atanh(float8); float16 __ovld __cnfn atanh(float16); #ifdef cl_khr_fp64 double __ovld __cnfn atanh(double); double2 __ovld __cnfn atanh(double2); double3 __ovld __cnfn atanh(double3); double4 __ovld __cnfn atanh(double4); double8 __ovld __cnfn atanh(double8); double16 __ovld __cnfn atanh(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn atanh(half); half2 __ovld __cnfn atanh(half2); half3 __ovld __cnfn atanh(half3); half4 __ovld __cnfn atanh(half4); half8 __ovld __cnfn atanh(half8); half16 __ovld __cnfn atanh(half16); #endif //cl_khr_fp16 /** * Compute atan (x) / PI. */ float __ovld __cnfn atanpi(float); float2 __ovld __cnfn atanpi(float2); float3 __ovld __cnfn atanpi(float3); float4 __ovld __cnfn atanpi(float4); float8 __ovld __cnfn atanpi(float8); float16 __ovld __cnfn atanpi(float16); #ifdef cl_khr_fp64 double __ovld __cnfn atanpi(double); double2 __ovld __cnfn atanpi(double2); double3 __ovld __cnfn atanpi(double3); double4 __ovld __cnfn atanpi(double4); double8 __ovld __cnfn atanpi(double8); double16 __ovld __cnfn atanpi(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn atanpi(half); half2 __ovld __cnfn atanpi(half2); half3 __ovld __cnfn atanpi(half3); half4 __ovld __cnfn atanpi(half4); half8 __ovld __cnfn atanpi(half8); half16 __ovld __cnfn atanpi(half16); #endif //cl_khr_fp16 /** * Compute atan2 (y, x) / PI. */ float __ovld __cnfn atan2pi(float, float); float2 __ovld __cnfn atan2pi(float2, float2); float3 __ovld __cnfn atan2pi(float3, float3); float4 __ovld __cnfn atan2pi(float4, float4); float8 __ovld __cnfn atan2pi(float8, float8); float16 __ovld __cnfn atan2pi(float16, float16); #ifdef cl_khr_fp64 double __ovld __cnfn atan2pi(double, double); double2 __ovld __cnfn atan2pi(double2, double2); double3 __ovld __cnfn atan2pi(double3, double3); double4 __ovld __cnfn atan2pi(double4, double4); double8 __ovld __cnfn atan2pi(double8, double8); double16 __ovld __cnfn atan2pi(double16, double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn atan2pi(half, half); half2 __ovld __cnfn atan2pi(half2, half2); half3 __ovld __cnfn atan2pi(half3, half3); half4 __ovld __cnfn atan2pi(half4, half4); half8 __ovld __cnfn atan2pi(half8, half8); half16 __ovld __cnfn atan2pi(half16, half16); #endif //cl_khr_fp16 /** * Compute cube-root. */ float __ovld __cnfn cbrt(float); float2 __ovld __cnfn cbrt(float2); float3 __ovld __cnfn cbrt(float3); float4 __ovld __cnfn cbrt(float4); float8 __ovld __cnfn cbrt(float8); float16 __ovld __cnfn cbrt(float16); #ifdef cl_khr_fp64 double __ovld __cnfn cbrt(double); double2 __ovld __cnfn cbrt(double2); double3 __ovld __cnfn cbrt(double3); double4 __ovld __cnfn cbrt(double4); double8 __ovld __cnfn cbrt(double8); double16 __ovld __cnfn cbrt(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn cbrt(half); half2 __ovld __cnfn cbrt(half2); half3 __ovld __cnfn cbrt(half3); half4 __ovld __cnfn cbrt(half4); half8 __ovld __cnfn cbrt(half8); half16 __ovld __cnfn cbrt(half16); #endif //cl_khr_fp16 /** * Round to integral value using the round to positive * infinity rounding mode. */ float __ovld __cnfn ceil(float); float2 __ovld __cnfn ceil(float2); float3 __ovld __cnfn ceil(float3); float4 __ovld __cnfn ceil(float4); float8 __ovld __cnfn ceil(float8); float16 __ovld __cnfn ceil(float16); #ifdef cl_khr_fp64 double __ovld __cnfn ceil(double); double2 __ovld __cnfn ceil(double2); double3 __ovld __cnfn ceil(double3); double4 __ovld __cnfn ceil(double4); double8 __ovld __cnfn ceil(double8); double16 __ovld __cnfn ceil(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn ceil(half); half2 __ovld __cnfn ceil(half2); half3 __ovld __cnfn ceil(half3); half4 __ovld __cnfn ceil(half4); half8 __ovld __cnfn ceil(half8); half16 __ovld __cnfn ceil(half16); #endif //cl_khr_fp16 /** * Returns x with its sign changed to match the sign of y. */ float __ovld __cnfn copysign(float, float); float2 __ovld __cnfn copysign(float2, float2); float3 __ovld __cnfn copysign(float3, float3); float4 __ovld __cnfn copysign(float4, float4); float8 __ovld __cnfn copysign(float8, float8); float16 __ovld __cnfn copysign(float16, float16); #ifdef cl_khr_fp64 double __ovld __cnfn copysign(double, double); double2 __ovld __cnfn copysign(double2, double2); double3 __ovld __cnfn copysign(double3, double3); double4 __ovld __cnfn copysign(double4, double4); double8 __ovld __cnfn copysign(double8, double8); double16 __ovld __cnfn copysign(double16, double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn copysign(half, half); half2 __ovld __cnfn copysign(half2, half2); half3 __ovld __cnfn copysign(half3, half3); half4 __ovld __cnfn copysign(half4, half4); half8 __ovld __cnfn copysign(half8, half8); half16 __ovld __cnfn copysign(half16, half16); #endif //cl_khr_fp16 /** * Compute cosine. */ float __ovld __cnfn cos(float); float2 __ovld __cnfn cos(float2); float3 __ovld __cnfn cos(float3); float4 __ovld __cnfn cos(float4); float8 __ovld __cnfn cos(float8); float16 __ovld __cnfn cos(float16); #ifdef cl_khr_fp64 double __ovld __cnfn cos(double); double2 __ovld __cnfn cos(double2); double3 __ovld __cnfn cos(double3); double4 __ovld __cnfn cos(double4); double8 __ovld __cnfn cos(double8); double16 __ovld __cnfn cos(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn cos(half); half2 __ovld __cnfn cos(half2); half3 __ovld __cnfn cos(half3); half4 __ovld __cnfn cos(half4); half8 __ovld __cnfn cos(half8); half16 __ovld __cnfn cos(half16); #endif //cl_khr_fp16 /** * Compute hyperbolic cosine. */ float __ovld __cnfn cosh(float); float2 __ovld __cnfn cosh(float2); float3 __ovld __cnfn cosh(float3); float4 __ovld __cnfn cosh(float4); float8 __ovld __cnfn cosh(float8); float16 __ovld __cnfn cosh(float16); #ifdef cl_khr_fp64 double __ovld __cnfn cosh(double); double2 __ovld __cnfn cosh(double2); double3 __ovld __cnfn cosh(double3); double4 __ovld __cnfn cosh(double4); double8 __ovld __cnfn cosh(double8); double16 __ovld __cnfn cosh(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn cosh(half); half2 __ovld __cnfn cosh(half2); half3 __ovld __cnfn cosh(half3); half4 __ovld __cnfn cosh(half4); half8 __ovld __cnfn cosh(half8); half16 __ovld __cnfn cosh(half16); #endif //cl_khr_fp16 /** * Compute cos (PI * x). */ float __ovld __cnfn cospi(float); float2 __ovld __cnfn cospi(float2); float3 __ovld __cnfn cospi(float3); float4 __ovld __cnfn cospi(float4); float8 __ovld __cnfn cospi(float8); float16 __ovld __cnfn cospi(float16); #ifdef cl_khr_fp64 double __ovld __cnfn cospi(double); double2 __ovld __cnfn cospi(double2); double3 __ovld __cnfn cospi(double3); double4 __ovld __cnfn cospi(double4); double8 __ovld __cnfn cospi(double8); double16 __ovld __cnfn cospi(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn cospi(half); half2 __ovld __cnfn cospi(half2); half3 __ovld __cnfn cospi(half3); half4 __ovld __cnfn cospi(half4); half8 __ovld __cnfn cospi(half8); half16 __ovld __cnfn cospi(half16); #endif //cl_khr_fp16 /** * Complementary error function. */ float __ovld __cnfn erfc(float); float2 __ovld __cnfn erfc(float2); float3 __ovld __cnfn erfc(float3); float4 __ovld __cnfn erfc(float4); float8 __ovld __cnfn erfc(float8); float16 __ovld __cnfn erfc(float16); #ifdef cl_khr_fp64 double __ovld __cnfn erfc(double); double2 __ovld __cnfn erfc(double2); double3 __ovld __cnfn erfc(double3); double4 __ovld __cnfn erfc(double4); double8 __ovld __cnfn erfc(double8); double16 __ovld __cnfn erfc(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn erfc(half); half2 __ovld __cnfn erfc(half2); half3 __ovld __cnfn erfc(half3); half4 __ovld __cnfn erfc(half4); half8 __ovld __cnfn erfc(half8); half16 __ovld __cnfn erfc(half16); #endif //cl_khr_fp16 /** * Error function encountered in integrating the * normal distribution. */ float __ovld __cnfn erf(float); float2 __ovld __cnfn erf(float2); float3 __ovld __cnfn erf(float3); float4 __ovld __cnfn erf(float4); float8 __ovld __cnfn erf(float8); float16 __ovld __cnfn erf(float16); #ifdef cl_khr_fp64 double __ovld __cnfn erf(double); double2 __ovld __cnfn erf(double2); double3 __ovld __cnfn erf(double3); double4 __ovld __cnfn erf(double4); double8 __ovld __cnfn erf(double8); double16 __ovld __cnfn erf(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn erf(half); half2 __ovld __cnfn erf(half2); half3 __ovld __cnfn erf(half3); half4 __ovld __cnfn erf(half4); half8 __ovld __cnfn erf(half8); half16 __ovld __cnfn erf(half16); #endif //cl_khr_fp16 /** * Compute the base e exponential function of x. */ float __ovld __cnfn exp(float); float2 __ovld __cnfn exp(float2); float3 __ovld __cnfn exp(float3); float4 __ovld __cnfn exp(float4); float8 __ovld __cnfn exp(float8); float16 __ovld __cnfn exp(float16); #ifdef cl_khr_fp64 double __ovld __cnfn exp(double); double2 __ovld __cnfn exp(double2); double3 __ovld __cnfn exp(double3); double4 __ovld __cnfn exp(double4); double8 __ovld __cnfn exp(double8); double16 __ovld __cnfn exp(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn exp(half); half2 __ovld __cnfn exp(half2); half3 __ovld __cnfn exp(half3); half4 __ovld __cnfn exp(half4); half8 __ovld __cnfn exp(half8); half16 __ovld __cnfn exp(half16); #endif //cl_khr_fp16 /** * Exponential base 2 function. */ float __ovld __cnfn exp2(float); float2 __ovld __cnfn exp2(float2); float3 __ovld __cnfn exp2(float3); float4 __ovld __cnfn exp2(float4); float8 __ovld __cnfn exp2(float8); float16 __ovld __cnfn exp2(float16); #ifdef cl_khr_fp64 double __ovld __cnfn exp2(double); double2 __ovld __cnfn exp2(double2); double3 __ovld __cnfn exp2(double3); double4 __ovld __cnfn exp2(double4); double8 __ovld __cnfn exp2(double8); double16 __ovld __cnfn exp2(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn exp2(half); half2 __ovld __cnfn exp2(half2); half3 __ovld __cnfn exp2(half3); half4 __ovld __cnfn exp2(half4); half8 __ovld __cnfn exp2(half8); half16 __ovld __cnfn exp2(half16); #endif //cl_khr_fp16 /** * Exponential base 10 function. */ float __ovld __cnfn exp10(float); float2 __ovld __cnfn exp10(float2); float3 __ovld __cnfn exp10(float3); float4 __ovld __cnfn exp10(float4); float8 __ovld __cnfn exp10(float8); float16 __ovld __cnfn exp10(float16); #ifdef cl_khr_fp64 double __ovld __cnfn exp10(double); double2 __ovld __cnfn exp10(double2); double3 __ovld __cnfn exp10(double3); double4 __ovld __cnfn exp10(double4); double8 __ovld __cnfn exp10(double8); double16 __ovld __cnfn exp10(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn exp10(half); half2 __ovld __cnfn exp10(half2); half3 __ovld __cnfn exp10(half3); half4 __ovld __cnfn exp10(half4); half8 __ovld __cnfn exp10(half8); half16 __ovld __cnfn exp10(half16); #endif //cl_khr_fp16 /** * Compute e^x- 1.0. */ float __ovld __cnfn expm1(float); float2 __ovld __cnfn expm1(float2); float3 __ovld __cnfn expm1(float3); float4 __ovld __cnfn expm1(float4); float8 __ovld __cnfn expm1(float8); float16 __ovld __cnfn expm1(float16); #ifdef cl_khr_fp64 double __ovld __cnfn expm1(double); double2 __ovld __cnfn expm1(double2); double3 __ovld __cnfn expm1(double3); double4 __ovld __cnfn expm1(double4); double8 __ovld __cnfn expm1(double8); double16 __ovld __cnfn expm1(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn expm1(half); half2 __ovld __cnfn expm1(half2); half3 __ovld __cnfn expm1(half3); half4 __ovld __cnfn expm1(half4); half8 __ovld __cnfn expm1(half8); half16 __ovld __cnfn expm1(half16); #endif //cl_khr_fp16 /** * Compute absolute value of a floating-point number. */ float __ovld __cnfn fabs(float); float2 __ovld __cnfn fabs(float2); float3 __ovld __cnfn fabs(float3); float4 __ovld __cnfn fabs(float4); float8 __ovld __cnfn fabs(float8); float16 __ovld __cnfn fabs(float16); #ifdef cl_khr_fp64 double __ovld __cnfn fabs(double); double2 __ovld __cnfn fabs(double2); double3 __ovld __cnfn fabs(double3); double4 __ovld __cnfn fabs(double4); double8 __ovld __cnfn fabs(double8); double16 __ovld __cnfn fabs(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn fabs(half); half2 __ovld __cnfn fabs(half2); half3 __ovld __cnfn fabs(half3); half4 __ovld __cnfn fabs(half4); half8 __ovld __cnfn fabs(half8); half16 __ovld __cnfn fabs(half16); #endif //cl_khr_fp16 /** * x - y if x > y, +0 if x is less than or equal to y. */ float __ovld __cnfn fdim(float, float); float2 __ovld __cnfn fdim(float2, float2); float3 __ovld __cnfn fdim(float3, float3); float4 __ovld __cnfn fdim(float4, float4); float8 __ovld __cnfn fdim(float8, float8); float16 __ovld __cnfn fdim(float16, float16); #ifdef cl_khr_fp64 double __ovld __cnfn fdim(double, double); double2 __ovld __cnfn fdim(double2, double2); double3 __ovld __cnfn fdim(double3, double3); double4 __ovld __cnfn fdim(double4, double4); double8 __ovld __cnfn fdim(double8, double8); double16 __ovld __cnfn fdim(double16, double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn fdim(half, half); half2 __ovld __cnfn fdim(half2, half2); half3 __ovld __cnfn fdim(half3, half3); half4 __ovld __cnfn fdim(half4, half4); half8 __ovld __cnfn fdim(half8, half8); half16 __ovld __cnfn fdim(half16, half16); #endif //cl_khr_fp16 /** * Round to integral value using the round to -ve * infinity rounding mode. */ float __ovld __cnfn floor(float); float2 __ovld __cnfn floor(float2); float3 __ovld __cnfn floor(float3); float4 __ovld __cnfn floor(float4); float8 __ovld __cnfn floor(float8); float16 __ovld __cnfn floor(float16); #ifdef cl_khr_fp64 double __ovld __cnfn floor(double); double2 __ovld __cnfn floor(double2); double3 __ovld __cnfn floor(double3); double4 __ovld __cnfn floor(double4); double8 __ovld __cnfn floor(double8); double16 __ovld __cnfn floor(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn floor(half); half2 __ovld __cnfn floor(half2); half3 __ovld __cnfn floor(half3); half4 __ovld __cnfn floor(half4); half8 __ovld __cnfn floor(half8); half16 __ovld __cnfn floor(half16); #endif //cl_khr_fp16 /** * Returns the correctly rounded floating-point * representation of the sum of c with the infinitely * precise product of a and b. Rounding of * intermediate products shall not occur. Edge case * behavior is per the IEEE 754-2008 standard. */ float __ovld __cnfn fma(float, float, float); float2 __ovld __cnfn fma(float2, float2, float2); float3 __ovld __cnfn fma(float3, float3, float3); float4 __ovld __cnfn fma(float4, float4, float4); float8 __ovld __cnfn fma(float8, float8, float8); float16 __ovld __cnfn fma(float16, float16, float16); #ifdef cl_khr_fp64 double __ovld __cnfn fma(double, double, double); double2 __ovld __cnfn fma(double2, double2, double2); double3 __ovld __cnfn fma(double3, double3, double3); double4 __ovld __cnfn fma(double4, double4, double4); double8 __ovld __cnfn fma(double8, double8, double8); double16 __ovld __cnfn fma(double16, double16, double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn fma(half, half, half); half2 __ovld __cnfn fma(half2, half2, half2); half3 __ovld __cnfn fma(half3, half3, half3); half4 __ovld __cnfn fma(half4, half4, half4); half8 __ovld __cnfn fma(half8, half8, half8); half16 __ovld __cnfn fma(half16, half16, half16); #endif //cl_khr_fp16 /** * Returns y if x < y, otherwise it returns x. If one * argument is a NaN, fmax() returns the other * argument. If both arguments are NaNs, fmax() * returns a NaN. */ float __ovld __cnfn fmax(float, float); float2 __ovld __cnfn fmax(float2, float2); float3 __ovld __cnfn fmax(float3, float3); float4 __ovld __cnfn fmax(float4, float4); float8 __ovld __cnfn fmax(float8, float8); float16 __ovld __cnfn fmax(float16, float16); float2 __ovld __cnfn fmax(float2, float); float3 __ovld __cnfn fmax(float3, float); float4 __ovld __cnfn fmax(float4, float); float8 __ovld __cnfn fmax(float8, float); float16 __ovld __cnfn fmax(float16, float); #ifdef cl_khr_fp64 double __ovld __cnfn fmax(double, double); double2 __ovld __cnfn fmax(double2, double2); double3 __ovld __cnfn fmax(double3, double3); double4 __ovld __cnfn fmax(double4, double4); double8 __ovld __cnfn fmax(double8, double8); double16 __ovld __cnfn fmax(double16, double16); double2 __ovld __cnfn fmax(double2, double); double3 __ovld __cnfn fmax(double3, double); double4 __ovld __cnfn fmax(double4, double); double8 __ovld __cnfn fmax(double8, double); double16 __ovld __cnfn fmax(double16, double); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn fmax(half, half); half2 __ovld __cnfn fmax(half2, half2); half3 __ovld __cnfn fmax(half3, half3); half4 __ovld __cnfn fmax(half4, half4); half8 __ovld __cnfn fmax(half8, half8); half16 __ovld __cnfn fmax(half16, half16); half2 __ovld __cnfn fmax(half2, half); half3 __ovld __cnfn fmax(half3, half); half4 __ovld __cnfn fmax(half4, half); half8 __ovld __cnfn fmax(half8, half); half16 __ovld __cnfn fmax(half16, half); #endif //cl_khr_fp16 /** * Returns y if y < x, otherwise it returns x. If one * argument is a NaN, fmin() returns the other * argument. If both arguments are NaNs, fmin() * returns a NaN. */ float __ovld __cnfn fmin(float, float); float2 __ovld __cnfn fmin(float2, float2); float3 __ovld __cnfn fmin(float3, float3); float4 __ovld __cnfn fmin(float4, float4); float8 __ovld __cnfn fmin(float8, float8); float16 __ovld __cnfn fmin(float16, float16); float2 __ovld __cnfn fmin(float2, float); float3 __ovld __cnfn fmin(float3, float); float4 __ovld __cnfn fmin(float4, float); float8 __ovld __cnfn fmin(float8, float); float16 __ovld __cnfn fmin(float16, float); #ifdef cl_khr_fp64 double __ovld __cnfn fmin(double, double); double2 __ovld __cnfn fmin(double2, double2); double3 __ovld __cnfn fmin(double3, double3); double4 __ovld __cnfn fmin(double4, double4); double8 __ovld __cnfn fmin(double8, double8); double16 __ovld __cnfn fmin(double16, double16); double2 __ovld __cnfn fmin(double2, double); double3 __ovld __cnfn fmin(double3, double); double4 __ovld __cnfn fmin(double4, double); double8 __ovld __cnfn fmin(double8, double); double16 __ovld __cnfn fmin(double16, double); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn fmin(half, half); half2 __ovld __cnfn fmin(half2, half2); half3 __ovld __cnfn fmin(half3, half3); half4 __ovld __cnfn fmin(half4, half4); half8 __ovld __cnfn fmin(half8, half8); half16 __ovld __cnfn fmin(half16, half16); half2 __ovld __cnfn fmin(half2, half); half3 __ovld __cnfn fmin(half3, half); half4 __ovld __cnfn fmin(half4, half); half8 __ovld __cnfn fmin(half8, half); half16 __ovld __cnfn fmin(half16, half); #endif //cl_khr_fp16 /** * Modulus. Returns x - y * trunc (x/y). */ float __ovld __cnfn fmod(float, float); float2 __ovld __cnfn fmod(float2, float2); float3 __ovld __cnfn fmod(float3, float3); float4 __ovld __cnfn fmod(float4, float4); float8 __ovld __cnfn fmod(float8, float8); float16 __ovld __cnfn fmod(float16, float16); #ifdef cl_khr_fp64 double __ovld __cnfn fmod(double, double); double2 __ovld __cnfn fmod(double2, double2); double3 __ovld __cnfn fmod(double3, double3); double4 __ovld __cnfn fmod(double4, double4); double8 __ovld __cnfn fmod(double8, double8); double16 __ovld __cnfn fmod(double16, double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn fmod(half, half); half2 __ovld __cnfn fmod(half2, half2); half3 __ovld __cnfn fmod(half3, half3); half4 __ovld __cnfn fmod(half4, half4); half8 __ovld __cnfn fmod(half8, half8); half16 __ovld __cnfn fmod(half16, half16); #endif //cl_khr_fp16 /** * Returns fmin(x - floor (x), 0x1.fffffep-1f ). * floor(x) is returned in iptr. */ #if defined(__opencl_c_generic_address_space) float __ovld fract(float, float *); float2 __ovld fract(float2, float2 *); float3 __ovld fract(float3, float3 *); float4 __ovld fract(float4, float4 *); float8 __ovld fract(float8, float8 *); float16 __ovld fract(float16, float16 *); #ifdef cl_khr_fp64 double __ovld fract(double, double *); double2 __ovld fract(double2, double2 *); double3 __ovld fract(double3, double3 *); double4 __ovld fract(double4, double4 *); double8 __ovld fract(double8, double8 *); double16 __ovld fract(double16, double16 *); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld fract(half, half *); half2 __ovld fract(half2, half2 *); half3 __ovld fract(half3, half3 *); half4 __ovld fract(half4, half4 *); half8 __ovld fract(half8, half8 *); half16 __ovld fract(half16, half16 *); #endif //cl_khr_fp16 #endif //defined(__opencl_c_generic_address_space) #if defined(__opencl_c_named_address_space_builtins) float __ovld fract(float, __global float *); float2 __ovld fract(float2, __global float2 *); float3 __ovld fract(float3, __global float3 *); float4 __ovld fract(float4, __global float4 *); float8 __ovld fract(float8, __global float8 *); float16 __ovld fract(float16, __global float16 *); float __ovld fract(float, __local float *); float2 __ovld fract(float2, __local float2 *); float3 __ovld fract(float3, __local float3 *); float4 __ovld fract(float4, __local float4 *); float8 __ovld fract(float8, __local float8 *); float16 __ovld fract(float16, __local float16 *); float __ovld fract(float, __private float *); float2 __ovld fract(float2, __private float2 *); float3 __ovld fract(float3, __private float3 *); float4 __ovld fract(float4, __private float4 *); float8 __ovld fract(float8, __private float8 *); float16 __ovld fract(float16, __private float16 *); #ifdef cl_khr_fp64 double __ovld fract(double, __global double *); double2 __ovld fract(double2, __global double2 *); double3 __ovld fract(double3, __global double3 *); double4 __ovld fract(double4, __global double4 *); double8 __ovld fract(double8, __global double8 *); double16 __ovld fract(double16, __global double16 *); double __ovld fract(double, __local double *); double2 __ovld fract(double2, __local double2 *); double3 __ovld fract(double3, __local double3 *); double4 __ovld fract(double4, __local double4 *); double8 __ovld fract(double8, __local double8 *); double16 __ovld fract(double16, __local double16 *); double __ovld fract(double, __private double *); double2 __ovld fract(double2, __private double2 *); double3 __ovld fract(double3, __private double3 *); double4 __ovld fract(double4, __private double4 *); double8 __ovld fract(double8, __private double8 *); double16 __ovld fract(double16, __private double16 *); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld fract(half, __global half *); half2 __ovld fract(half2, __global half2 *); half3 __ovld fract(half3, __global half3 *); half4 __ovld fract(half4, __global half4 *); half8 __ovld fract(half8, __global half8 *); half16 __ovld fract(half16, __global half16 *); half __ovld fract(half, __local half *); half2 __ovld fract(half2, __local half2 *); half3 __ovld fract(half3, __local half3 *); half4 __ovld fract(half4, __local half4 *); half8 __ovld fract(half8, __local half8 *); half16 __ovld fract(half16, __local half16 *); half __ovld fract(half, __private half *); half2 __ovld fract(half2, __private half2 *); half3 __ovld fract(half3, __private half3 *); half4 __ovld fract(half4, __private half4 *); half8 __ovld fract(half8, __private half8 *); half16 __ovld fract(half16, __private half16 *); #endif //cl_khr_fp16 #endif //defined(__opencl_c_named_address_space_builtins) /** * Extract mantissa and exponent from x. For each * component the mantissa returned is a float with * magnitude in the interval [1/2, 1) or 0. Each * component of x equals mantissa returned * 2^exp. */ #if defined(__opencl_c_generic_address_space) float __ovld frexp(float, int *); float2 __ovld frexp(float2, int2 *); float3 __ovld frexp(float3, int3 *); float4 __ovld frexp(float4, int4 *); float8 __ovld frexp(float8, int8 *); float16 __ovld frexp(float16, int16 *); #ifdef cl_khr_fp64 double __ovld frexp(double, int *); double2 __ovld frexp(double2, int2 *); double3 __ovld frexp(double3, int3 *); double4 __ovld frexp(double4, int4 *); double8 __ovld frexp(double8, int8 *); double16 __ovld frexp(double16, int16 *); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld frexp(half, int *); half2 __ovld frexp(half2, int2 *); half3 __ovld frexp(half3, int3 *); half4 __ovld frexp(half4, int4 *); half8 __ovld frexp(half8, int8 *); half16 __ovld frexp(half16, int16 *); #endif //cl_khr_fp16 #endif //defined(__opencl_c_generic_address_space) #if defined(__opencl_c_named_address_space_builtins) float __ovld frexp(float, __global int *); float2 __ovld frexp(float2, __global int2 *); float3 __ovld frexp(float3, __global int3 *); float4 __ovld frexp(float4, __global int4 *); float8 __ovld frexp(float8, __global int8 *); float16 __ovld frexp(float16, __global int16 *); float __ovld frexp(float, __local int *); float2 __ovld frexp(float2, __local int2 *); float3 __ovld frexp(float3, __local int3 *); float4 __ovld frexp(float4, __local int4 *); float8 __ovld frexp(float8, __local int8 *); float16 __ovld frexp(float16, __local int16 *); float __ovld frexp(float, __private int *); float2 __ovld frexp(float2, __private int2 *); float3 __ovld frexp(float3, __private int3 *); float4 __ovld frexp(float4, __private int4 *); float8 __ovld frexp(float8, __private int8 *); float16 __ovld frexp(float16, __private int16 *); #ifdef cl_khr_fp64 double __ovld frexp(double, __global int *); double2 __ovld frexp(double2, __global int2 *); double3 __ovld frexp(double3, __global int3 *); double4 __ovld frexp(double4, __global int4 *); double8 __ovld frexp(double8, __global int8 *); double16 __ovld frexp(double16, __global int16 *); double __ovld frexp(double, __local int *); double2 __ovld frexp(double2, __local int2 *); double3 __ovld frexp(double3, __local int3 *); double4 __ovld frexp(double4, __local int4 *); double8 __ovld frexp(double8, __local int8 *); double16 __ovld frexp(double16, __local int16 *); double __ovld frexp(double, __private int *); double2 __ovld frexp(double2, __private int2 *); double3 __ovld frexp(double3, __private int3 *); double4 __ovld frexp(double4, __private int4 *); double8 __ovld frexp(double8, __private int8 *); double16 __ovld frexp(double16, __private int16 *); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld frexp(half, __global int *); half2 __ovld frexp(half2, __global int2 *); half3 __ovld frexp(half3, __global int3 *); half4 __ovld frexp(half4, __global int4 *); half8 __ovld frexp(half8, __global int8 *); half16 __ovld frexp(half16, __global int16 *); half __ovld frexp(half, __local int *); half2 __ovld frexp(half2, __local int2 *); half3 __ovld frexp(half3, __local int3 *); half4 __ovld frexp(half4, __local int4 *); half8 __ovld frexp(half8, __local int8 *); half16 __ovld frexp(half16, __local int16 *); half __ovld frexp(half, __private int *); half2 __ovld frexp(half2, __private int2 *); half3 __ovld frexp(half3, __private int3 *); half4 __ovld frexp(half4, __private int4 *); half8 __ovld frexp(half8, __private int8 *); half16 __ovld frexp(half16, __private int16 *); #endif //cl_khr_fp16 #endif //defined(__opencl_c_named_address_space_builtins) /** * Compute the value of the square root of x^2 + y^2 * without undue overflow or underflow. */ float __ovld __cnfn hypot(float, float); float2 __ovld __cnfn hypot(float2, float2); float3 __ovld __cnfn hypot(float3, float3); float4 __ovld __cnfn hypot(float4, float4); float8 __ovld __cnfn hypot(float8, float8); float16 __ovld __cnfn hypot(float16, float16); #ifdef cl_khr_fp64 double __ovld __cnfn hypot(double, double); double2 __ovld __cnfn hypot(double2, double2); double3 __ovld __cnfn hypot(double3, double3); double4 __ovld __cnfn hypot(double4, double4); double8 __ovld __cnfn hypot(double8, double8); double16 __ovld __cnfn hypot(double16, double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn hypot(half, half); half2 __ovld __cnfn hypot(half2, half2); half3 __ovld __cnfn hypot(half3, half3); half4 __ovld __cnfn hypot(half4, half4); half8 __ovld __cnfn hypot(half8, half8); half16 __ovld __cnfn hypot(half16, half16); #endif //cl_khr_fp16 /** * Return the exponent as an integer value. */ int __ovld __cnfn ilogb(float); int2 __ovld __cnfn ilogb(float2); int3 __ovld __cnfn ilogb(float3); int4 __ovld __cnfn ilogb(float4); int8 __ovld __cnfn ilogb(float8); int16 __ovld __cnfn ilogb(float16); #ifdef cl_khr_fp64 int __ovld __cnfn ilogb(double); int2 __ovld __cnfn ilogb(double2); int3 __ovld __cnfn ilogb(double3); int4 __ovld __cnfn ilogb(double4); int8 __ovld __cnfn ilogb(double8); int16 __ovld __cnfn ilogb(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 int __ovld __cnfn ilogb(half); int2 __ovld __cnfn ilogb(half2); int3 __ovld __cnfn ilogb(half3); int4 __ovld __cnfn ilogb(half4); int8 __ovld __cnfn ilogb(half8); int16 __ovld __cnfn ilogb(half16); #endif //cl_khr_fp16 /** * Multiply x by 2 to the power n. */ float __ovld __cnfn ldexp(float, int); float2 __ovld __cnfn ldexp(float2, int2); float3 __ovld __cnfn ldexp(float3, int3); float4 __ovld __cnfn ldexp(float4, int4); float8 __ovld __cnfn ldexp(float8, int8); float16 __ovld __cnfn ldexp(float16, int16); float2 __ovld __cnfn ldexp(float2, int); float3 __ovld __cnfn ldexp(float3, int); float4 __ovld __cnfn ldexp(float4, int); float8 __ovld __cnfn ldexp(float8, int); float16 __ovld __cnfn ldexp(float16, int); #ifdef cl_khr_fp64 double __ovld __cnfn ldexp(double, int); double2 __ovld __cnfn ldexp(double2, int2); double3 __ovld __cnfn ldexp(double3, int3); double4 __ovld __cnfn ldexp(double4, int4); double8 __ovld __cnfn ldexp(double8, int8); double16 __ovld __cnfn ldexp(double16, int16); double2 __ovld __cnfn ldexp(double2, int); double3 __ovld __cnfn ldexp(double3, int); double4 __ovld __cnfn ldexp(double4, int); double8 __ovld __cnfn ldexp(double8, int); double16 __ovld __cnfn ldexp(double16, int); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn ldexp(half, int); half2 __ovld __cnfn ldexp(half2, int2); half3 __ovld __cnfn ldexp(half3, int3); half4 __ovld __cnfn ldexp(half4, int4); half8 __ovld __cnfn ldexp(half8, int8); half16 __ovld __cnfn ldexp(half16, int16); half2 __ovld __cnfn ldexp(half2, int); half3 __ovld __cnfn ldexp(half3, int); half4 __ovld __cnfn ldexp(half4, int); half8 __ovld __cnfn ldexp(half8, int); half16 __ovld __cnfn ldexp(half16, int); #endif //cl_khr_fp16 /** * Log gamma function. Returns the natural * logarithm of the absolute value of the gamma * function. The sign of the gamma function is * returned in the signp argument of lgamma_r. */ float __ovld __cnfn lgamma(float); float2 __ovld __cnfn lgamma(float2); float3 __ovld __cnfn lgamma(float3); float4 __ovld __cnfn lgamma(float4); float8 __ovld __cnfn lgamma(float8); float16 __ovld __cnfn lgamma(float16); #ifdef cl_khr_fp64 double __ovld __cnfn lgamma(double); double2 __ovld __cnfn lgamma(double2); double3 __ovld __cnfn lgamma(double3); double4 __ovld __cnfn lgamma(double4); double8 __ovld __cnfn lgamma(double8); double16 __ovld __cnfn lgamma(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn lgamma(half); half2 __ovld __cnfn lgamma(half2); half3 __ovld __cnfn lgamma(half3); half4 __ovld __cnfn lgamma(half4); half8 __ovld __cnfn lgamma(half8); half16 __ovld __cnfn lgamma(half16); #endif //cl_khr_fp16 #if defined(__opencl_c_generic_address_space) float __ovld lgamma_r(float, int *); float2 __ovld lgamma_r(float2, int2 *); float3 __ovld lgamma_r(float3, int3 *); float4 __ovld lgamma_r(float4, int4 *); float8 __ovld lgamma_r(float8, int8 *); float16 __ovld lgamma_r(float16, int16 *); #ifdef cl_khr_fp64 double __ovld lgamma_r(double, int *); double2 __ovld lgamma_r(double2, int2 *); double3 __ovld lgamma_r(double3, int3 *); double4 __ovld lgamma_r(double4, int4 *); double8 __ovld lgamma_r(double8, int8 *); double16 __ovld lgamma_r(double16, int16 *); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld lgamma_r(half, int *); half2 __ovld lgamma_r(half2, int2 *); half3 __ovld lgamma_r(half3, int3 *); half4 __ovld lgamma_r(half4, int4 *); half8 __ovld lgamma_r(half8, int8 *); half16 __ovld lgamma_r(half16, int16 *); #endif //cl_khr_fp16 #endif //defined(__opencl_c_generic_address_space) #if defined(__opencl_c_named_address_space_builtins) float __ovld lgamma_r(float, __global int *); float2 __ovld lgamma_r(float2, __global int2 *); float3 __ovld lgamma_r(float3, __global int3 *); float4 __ovld lgamma_r(float4, __global int4 *); float8 __ovld lgamma_r(float8, __global int8 *); float16 __ovld lgamma_r(float16, __global int16 *); float __ovld lgamma_r(float, __local int *); float2 __ovld lgamma_r(float2, __local int2 *); float3 __ovld lgamma_r(float3, __local int3 *); float4 __ovld lgamma_r(float4, __local int4 *); float8 __ovld lgamma_r(float8, __local int8 *); float16 __ovld lgamma_r(float16, __local int16 *); float __ovld lgamma_r(float, __private int *); float2 __ovld lgamma_r(float2, __private int2 *); float3 __ovld lgamma_r(float3, __private int3 *); float4 __ovld lgamma_r(float4, __private int4 *); float8 __ovld lgamma_r(float8, __private int8 *); float16 __ovld lgamma_r(float16, __private int16 *); #ifdef cl_khr_fp64 double __ovld lgamma_r(double, __global int *); double2 __ovld lgamma_r(double2, __global int2 *); double3 __ovld lgamma_r(double3, __global int3 *); double4 __ovld lgamma_r(double4, __global int4 *); double8 __ovld lgamma_r(double8, __global int8 *); double16 __ovld lgamma_r(double16, __global int16 *); double __ovld lgamma_r(double, __local int *); double2 __ovld lgamma_r(double2, __local int2 *); double3 __ovld lgamma_r(double3, __local int3 *); double4 __ovld lgamma_r(double4, __local int4 *); double8 __ovld lgamma_r(double8, __local int8 *); double16 __ovld lgamma_r(double16, __local int16 *); double __ovld lgamma_r(double, __private int *); double2 __ovld lgamma_r(double2, __private int2 *); double3 __ovld lgamma_r(double3, __private int3 *); double4 __ovld lgamma_r(double4, __private int4 *); double8 __ovld lgamma_r(double8, __private int8 *); double16 __ovld lgamma_r(double16, __private int16 *); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld lgamma_r(half, __global int *); half2 __ovld lgamma_r(half2, __global int2 *); half3 __ovld lgamma_r(half3, __global int3 *); half4 __ovld lgamma_r(half4, __global int4 *); half8 __ovld lgamma_r(half8, __global int8 *); half16 __ovld lgamma_r(half16, __global int16 *); half __ovld lgamma_r(half, __local int *); half2 __ovld lgamma_r(half2, __local int2 *); half3 __ovld lgamma_r(half3, __local int3 *); half4 __ovld lgamma_r(half4, __local int4 *); half8 __ovld lgamma_r(half8, __local int8 *); half16 __ovld lgamma_r(half16, __local int16 *); half __ovld lgamma_r(half, __private int *); half2 __ovld lgamma_r(half2, __private int2 *); half3 __ovld lgamma_r(half3, __private int3 *); half4 __ovld lgamma_r(half4, __private int4 *); half8 __ovld lgamma_r(half8, __private int8 *); half16 __ovld lgamma_r(half16, __private int16 *); #endif //cl_khr_fp16 #endif //defined(__opencl_c_named_address_space_builtins) /** * Compute natural logarithm. */ float __ovld __cnfn log(float); float2 __ovld __cnfn log(float2); float3 __ovld __cnfn log(float3); float4 __ovld __cnfn log(float4); float8 __ovld __cnfn log(float8); float16 __ovld __cnfn log(float16); #ifdef cl_khr_fp64 double __ovld __cnfn log(double); double2 __ovld __cnfn log(double2); double3 __ovld __cnfn log(double3); double4 __ovld __cnfn log(double4); double8 __ovld __cnfn log(double8); double16 __ovld __cnfn log(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn log(half); half2 __ovld __cnfn log(half2); half3 __ovld __cnfn log(half3); half4 __ovld __cnfn log(half4); half8 __ovld __cnfn log(half8); half16 __ovld __cnfn log(half16); #endif //cl_khr_fp16 /** * Compute a base 2 logarithm. */ float __ovld __cnfn log2(float); float2 __ovld __cnfn log2(float2); float3 __ovld __cnfn log2(float3); float4 __ovld __cnfn log2(float4); float8 __ovld __cnfn log2(float8); float16 __ovld __cnfn log2(float16); #ifdef cl_khr_fp64 double __ovld __cnfn log2(double); double2 __ovld __cnfn log2(double2); double3 __ovld __cnfn log2(double3); double4 __ovld __cnfn log2(double4); double8 __ovld __cnfn log2(double8); double16 __ovld __cnfn log2(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn log2(half); half2 __ovld __cnfn log2(half2); half3 __ovld __cnfn log2(half3); half4 __ovld __cnfn log2(half4); half8 __ovld __cnfn log2(half8); half16 __ovld __cnfn log2(half16); #endif //cl_khr_fp16 /** * Compute a base 10 logarithm. */ float __ovld __cnfn log10(float); float2 __ovld __cnfn log10(float2); float3 __ovld __cnfn log10(float3); float4 __ovld __cnfn log10(float4); float8 __ovld __cnfn log10(float8); float16 __ovld __cnfn log10(float16); #ifdef cl_khr_fp64 double __ovld __cnfn log10(double); double2 __ovld __cnfn log10(double2); double3 __ovld __cnfn log10(double3); double4 __ovld __cnfn log10(double4); double8 __ovld __cnfn log10(double8); double16 __ovld __cnfn log10(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn log10(half); half2 __ovld __cnfn log10(half2); half3 __ovld __cnfn log10(half3); half4 __ovld __cnfn log10(half4); half8 __ovld __cnfn log10(half8); half16 __ovld __cnfn log10(half16); #endif //cl_khr_fp16 /** * Compute a base e logarithm of (1.0 + x). */ float __ovld __cnfn log1p(float); float2 __ovld __cnfn log1p(float2); float3 __ovld __cnfn log1p(float3); float4 __ovld __cnfn log1p(float4); float8 __ovld __cnfn log1p(float8); float16 __ovld __cnfn log1p(float16); #ifdef cl_khr_fp64 double __ovld __cnfn log1p(double); double2 __ovld __cnfn log1p(double2); double3 __ovld __cnfn log1p(double3); double4 __ovld __cnfn log1p(double4); double8 __ovld __cnfn log1p(double8); double16 __ovld __cnfn log1p(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn log1p(half); half2 __ovld __cnfn log1p(half2); half3 __ovld __cnfn log1p(half3); half4 __ovld __cnfn log1p(half4); half8 __ovld __cnfn log1p(half8); half16 __ovld __cnfn log1p(half16); #endif //cl_khr_fp16 /** * Compute the exponent of x, which is the integral * part of logr | x |. */ float __ovld __cnfn logb(float); float2 __ovld __cnfn logb(float2); float3 __ovld __cnfn logb(float3); float4 __ovld __cnfn logb(float4); float8 __ovld __cnfn logb(float8); float16 __ovld __cnfn logb(float16); #ifdef cl_khr_fp64 double __ovld __cnfn logb(double); double2 __ovld __cnfn logb(double2); double3 __ovld __cnfn logb(double3); double4 __ovld __cnfn logb(double4); double8 __ovld __cnfn logb(double8); double16 __ovld __cnfn logb(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn logb(half); half2 __ovld __cnfn logb(half2); half3 __ovld __cnfn logb(half3); half4 __ovld __cnfn logb(half4); half8 __ovld __cnfn logb(half8); half16 __ovld __cnfn logb(half16); #endif //cl_khr_fp16 /** * mad approximates a * b + c. Whether or how the * product of a * b is rounded and how supernormal or * subnormal intermediate products are handled is not * defined. mad is intended to be used where speed is * preferred over accuracy. */ float __ovld __cnfn mad(float, float, float); float2 __ovld __cnfn mad(float2, float2, float2); float3 __ovld __cnfn mad(float3, float3, float3); float4 __ovld __cnfn mad(float4, float4, float4); float8 __ovld __cnfn mad(float8, float8, float8); float16 __ovld __cnfn mad(float16, float16, float16); #ifdef cl_khr_fp64 double __ovld __cnfn mad(double, double, double); double2 __ovld __cnfn mad(double2, double2, double2); double3 __ovld __cnfn mad(double3, double3, double3); double4 __ovld __cnfn mad(double4, double4, double4); double8 __ovld __cnfn mad(double8, double8, double8); double16 __ovld __cnfn mad(double16, double16, double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn mad(half, half, half); half2 __ovld __cnfn mad(half2, half2, half2); half3 __ovld __cnfn mad(half3, half3, half3); half4 __ovld __cnfn mad(half4, half4, half4); half8 __ovld __cnfn mad(half8, half8, half8); half16 __ovld __cnfn mad(half16, half16, half16); #endif //cl_khr_fp16 /** * Returns x if | x | > | y |, y if | y | > | x |, otherwise * fmax(x, y). */ float __ovld __cnfn maxmag(float, float); float2 __ovld __cnfn maxmag(float2, float2); float3 __ovld __cnfn maxmag(float3, float3); float4 __ovld __cnfn maxmag(float4, float4); float8 __ovld __cnfn maxmag(float8, float8); float16 __ovld __cnfn maxmag(float16, float16); #ifdef cl_khr_fp64 double __ovld __cnfn maxmag(double, double); double2 __ovld __cnfn maxmag(double2, double2); double3 __ovld __cnfn maxmag(double3, double3); double4 __ovld __cnfn maxmag(double4, double4); double8 __ovld __cnfn maxmag(double8, double8); double16 __ovld __cnfn maxmag(double16, double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn maxmag(half, half); half2 __ovld __cnfn maxmag(half2, half2); half3 __ovld __cnfn maxmag(half3, half3); half4 __ovld __cnfn maxmag(half4, half4); half8 __ovld __cnfn maxmag(half8, half8); half16 __ovld __cnfn maxmag(half16, half16); #endif //cl_khr_fp16 /** * Returns x if | x | < | y |, y if | y | < | x |, otherwise * fmin(x, y). */ float __ovld __cnfn minmag(float, float); float2 __ovld __cnfn minmag(float2, float2); float3 __ovld __cnfn minmag(float3, float3); float4 __ovld __cnfn minmag(float4, float4); float8 __ovld __cnfn minmag(float8, float8); float16 __ovld __cnfn minmag(float16, float16); #ifdef cl_khr_fp64 double __ovld __cnfn minmag(double, double); double2 __ovld __cnfn minmag(double2, double2); double3 __ovld __cnfn minmag(double3, double3); double4 __ovld __cnfn minmag(double4, double4); double8 __ovld __cnfn minmag(double8, double8); double16 __ovld __cnfn minmag(double16, double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn minmag(half, half); half2 __ovld __cnfn minmag(half2, half2); half3 __ovld __cnfn minmag(half3, half3); half4 __ovld __cnfn minmag(half4, half4); half8 __ovld __cnfn minmag(half8, half8); half16 __ovld __cnfn minmag(half16, half16); #endif //cl_khr_fp16 /** * Decompose a floating-point number. The modf * function breaks the argument x into integral and * fractional parts, each of which has the same sign as * the argument. It stores the integral part in the object * pointed to by iptr. */ #if defined(__opencl_c_generic_address_space) float __ovld modf(float, float *); float2 __ovld modf(float2, float2 *); float3 __ovld modf(float3, float3 *); float4 __ovld modf(float4, float4 *); float8 __ovld modf(float8, float8 *); float16 __ovld modf(float16, float16 *); #ifdef cl_khr_fp64 double __ovld modf(double, double *); double2 __ovld modf(double2, double2 *); double3 __ovld modf(double3, double3 *); double4 __ovld modf(double4, double4 *); double8 __ovld modf(double8, double8 *); double16 __ovld modf(double16, double16 *); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld modf(half, half *); half2 __ovld modf(half2, half2 *); half3 __ovld modf(half3, half3 *); half4 __ovld modf(half4, half4 *); half8 __ovld modf(half8, half8 *); half16 __ovld modf(half16, half16 *); #endif //cl_khr_fp16 #endif //defined(__opencl_c_generic_address_space) #if defined(__opencl_c_named_address_space_builtins) float __ovld modf(float, __global float *); float2 __ovld modf(float2, __global float2 *); float3 __ovld modf(float3, __global float3 *); float4 __ovld modf(float4, __global float4 *); float8 __ovld modf(float8, __global float8 *); float16 __ovld modf(float16, __global float16 *); float __ovld modf(float, __local float *); float2 __ovld modf(float2, __local float2 *); float3 __ovld modf(float3, __local float3 *); float4 __ovld modf(float4, __local float4 *); float8 __ovld modf(float8, __local float8 *); float16 __ovld modf(float16, __local float16 *); float __ovld modf(float, __private float *); float2 __ovld modf(float2, __private float2 *); float3 __ovld modf(float3, __private float3 *); float4 __ovld modf(float4, __private float4 *); float8 __ovld modf(float8, __private float8 *); float16 __ovld modf(float16, __private float16 *); #ifdef cl_khr_fp64 double __ovld modf(double, __global double *); double2 __ovld modf(double2, __global double2 *); double3 __ovld modf(double3, __global double3 *); double4 __ovld modf(double4, __global double4 *); double8 __ovld modf(double8, __global double8 *); double16 __ovld modf(double16, __global double16 *); double __ovld modf(double, __local double *); double2 __ovld modf(double2, __local double2 *); double3 __ovld modf(double3, __local double3 *); double4 __ovld modf(double4, __local double4 *); double8 __ovld modf(double8, __local double8 *); double16 __ovld modf(double16, __local double16 *); double __ovld modf(double, __private double *); double2 __ovld modf(double2, __private double2 *); double3 __ovld modf(double3, __private double3 *); double4 __ovld modf(double4, __private double4 *); double8 __ovld modf(double8, __private double8 *); double16 __ovld modf(double16, __private double16 *); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld modf(half, __global half *); half2 __ovld modf(half2, __global half2 *); half3 __ovld modf(half3, __global half3 *); half4 __ovld modf(half4, __global half4 *); half8 __ovld modf(half8, __global half8 *); half16 __ovld modf(half16, __global half16 *); half __ovld modf(half, __local half *); half2 __ovld modf(half2, __local half2 *); half3 __ovld modf(half3, __local half3 *); half4 __ovld modf(half4, __local half4 *); half8 __ovld modf(half8, __local half8 *); half16 __ovld modf(half16, __local half16 *); half __ovld modf(half, __private half *); half2 __ovld modf(half2, __private half2 *); half3 __ovld modf(half3, __private half3 *); half4 __ovld modf(half4, __private half4 *); half8 __ovld modf(half8, __private half8 *); half16 __ovld modf(half16, __private half16 *); #endif //cl_khr_fp16 #endif //defined(__opencl_c_named_address_space_builtins) /** * Returns a quiet NaN. The nancode may be placed * in the significand of the resulting NaN. */ float __ovld __cnfn nan(uint); float2 __ovld __cnfn nan(uint2); float3 __ovld __cnfn nan(uint3); float4 __ovld __cnfn nan(uint4); float8 __ovld __cnfn nan(uint8); float16 __ovld __cnfn nan(uint16); #ifdef cl_khr_fp64 double __ovld __cnfn nan(ulong); double2 __ovld __cnfn nan(ulong2); double3 __ovld __cnfn nan(ulong3); double4 __ovld __cnfn nan(ulong4); double8 __ovld __cnfn nan(ulong8); double16 __ovld __cnfn nan(ulong16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn nan(ushort); half2 __ovld __cnfn nan(ushort2); half3 __ovld __cnfn nan(ushort3); half4 __ovld __cnfn nan(ushort4); half8 __ovld __cnfn nan(ushort8); half16 __ovld __cnfn nan(ushort16); #endif //cl_khr_fp16 /** * Computes the next representable single-precision * floating-point value following x in the direction of * y. Thus, if y is less than x, nextafter() returns the * largest representable floating-point number less * than x. */ float __ovld __cnfn nextafter(float, float); float2 __ovld __cnfn nextafter(float2, float2); float3 __ovld __cnfn nextafter(float3, float3); float4 __ovld __cnfn nextafter(float4, float4); float8 __ovld __cnfn nextafter(float8, float8); float16 __ovld __cnfn nextafter(float16, float16); #ifdef cl_khr_fp64 double __ovld __cnfn nextafter(double, double); double2 __ovld __cnfn nextafter(double2, double2); double3 __ovld __cnfn nextafter(double3, double3); double4 __ovld __cnfn nextafter(double4, double4); double8 __ovld __cnfn nextafter(double8, double8); double16 __ovld __cnfn nextafter(double16, double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn nextafter(half, half); half2 __ovld __cnfn nextafter(half2, half2); half3 __ovld __cnfn nextafter(half3, half3); half4 __ovld __cnfn nextafter(half4, half4); half8 __ovld __cnfn nextafter(half8, half8); half16 __ovld __cnfn nextafter(half16, half16); #endif //cl_khr_fp16 /** * Compute x to the power y. */ float __ovld __cnfn pow(float, float); float2 __ovld __cnfn pow(float2, float2); float3 __ovld __cnfn pow(float3, float3); float4 __ovld __cnfn pow(float4, float4); float8 __ovld __cnfn pow(float8, float8); float16 __ovld __cnfn pow(float16, float16); #ifdef cl_khr_fp64 double __ovld __cnfn pow(double, double); double2 __ovld __cnfn pow(double2, double2); double3 __ovld __cnfn pow(double3, double3); double4 __ovld __cnfn pow(double4, double4); double8 __ovld __cnfn pow(double8, double8); double16 __ovld __cnfn pow(double16, double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn pow(half, half); half2 __ovld __cnfn pow(half2, half2); half3 __ovld __cnfn pow(half3, half3); half4 __ovld __cnfn pow(half4, half4); half8 __ovld __cnfn pow(half8, half8); half16 __ovld __cnfn pow(half16, half16); #endif //cl_khr_fp16 /** * Compute x to the power y, where y is an integer. */ float __ovld __cnfn pown(float, int); float2 __ovld __cnfn pown(float2, int2); float3 __ovld __cnfn pown(float3, int3); float4 __ovld __cnfn pown(float4, int4); float8 __ovld __cnfn pown(float8, int8); float16 __ovld __cnfn pown(float16, int16); #ifdef cl_khr_fp64 double __ovld __cnfn pown(double, int); double2 __ovld __cnfn pown(double2, int2); double3 __ovld __cnfn pown(double3, int3); double4 __ovld __cnfn pown(double4, int4); double8 __ovld __cnfn pown(double8, int8); double16 __ovld __cnfn pown(double16, int16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn pown(half, int); half2 __ovld __cnfn pown(half2, int2); half3 __ovld __cnfn pown(half3, int3); half4 __ovld __cnfn pown(half4, int4); half8 __ovld __cnfn pown(half8, int8); half16 __ovld __cnfn pown(half16, int16); #endif //cl_khr_fp16 /** * Compute x to the power y, where x is >= 0. */ float __ovld __cnfn powr(float, float); float2 __ovld __cnfn powr(float2, float2); float3 __ovld __cnfn powr(float3, float3); float4 __ovld __cnfn powr(float4, float4); float8 __ovld __cnfn powr(float8, float8); float16 __ovld __cnfn powr(float16, float16); #ifdef cl_khr_fp64 double __ovld __cnfn powr(double, double); double2 __ovld __cnfn powr(double2, double2); double3 __ovld __cnfn powr(double3, double3); double4 __ovld __cnfn powr(double4, double4); double8 __ovld __cnfn powr(double8, double8); double16 __ovld __cnfn powr(double16, double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn powr(half, half); half2 __ovld __cnfn powr(half2, half2); half3 __ovld __cnfn powr(half3, half3); half4 __ovld __cnfn powr(half4, half4); half8 __ovld __cnfn powr(half8, half8); half16 __ovld __cnfn powr(half16, half16); #endif //cl_khr_fp16 /** * Compute the value r such that r = x - n*y, where n * is the integer nearest the exact value of x/y. If there * are two integers closest to x/y, n shall be the even * one. If r is zero, it is given the same sign as x. */ float __ovld __cnfn remainder(float, float); float2 __ovld __cnfn remainder(float2, float2); float3 __ovld __cnfn remainder(float3, float3); float4 __ovld __cnfn remainder(float4, float4); float8 __ovld __cnfn remainder(float8, float8); float16 __ovld __cnfn remainder(float16, float16); #ifdef cl_khr_fp64 double __ovld __cnfn remainder(double, double); double2 __ovld __cnfn remainder(double2, double2); double3 __ovld __cnfn remainder(double3, double3); double4 __ovld __cnfn remainder(double4, double4); double8 __ovld __cnfn remainder(double8, double8); double16 __ovld __cnfn remainder(double16, double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn remainder(half, half); half2 __ovld __cnfn remainder(half2, half2); half3 __ovld __cnfn remainder(half3, half3); half4 __ovld __cnfn remainder(half4, half4); half8 __ovld __cnfn remainder(half8, half8); half16 __ovld __cnfn remainder(half16, half16); #endif //cl_khr_fp16 /** * The remquo function computes the value r such * that r = x - n*y, where n is the integer nearest the * exact value of x/y. If there are two integers closest * to x/y, n shall be the even one. If r is zero, it is * given the same sign as x. This is the same value * that is returned by the remainder function. * remquo also calculates the lower seven bits of the * integral quotient x/y, and gives that value the same * sign as x/y. It stores this signed value in the object * pointed to by quo. */ #if defined(__opencl_c_generic_address_space) float __ovld remquo(float, float, int *); float2 __ovld remquo(float2, float2, int2 *); float3 __ovld remquo(float3, float3, int3 *); float4 __ovld remquo(float4, float4, int4 *); float8 __ovld remquo(float8, float8, int8 *); float16 __ovld remquo(float16, float16, int16 *); #ifdef cl_khr_fp64 double __ovld remquo(double, double, int *); double2 __ovld remquo(double2, double2, int2 *); double3 __ovld remquo(double3, double3, int3 *); double4 __ovld remquo(double4, double4, int4 *); double8 __ovld remquo(double8, double8, int8 *); double16 __ovld remquo(double16, double16, int16 *); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld remquo(half, half, int *); half2 __ovld remquo(half2, half2, int2 *); half3 __ovld remquo(half3, half3, int3 *); half4 __ovld remquo(half4, half4, int4 *); half8 __ovld remquo(half8, half8, int8 *); half16 __ovld remquo(half16, half16, int16 *); #endif //cl_khr_fp16 #endif //defined(__opencl_c_generic_address_space) #if defined(__opencl_c_named_address_space_builtins) float __ovld remquo(float, float, __global int *); float2 __ovld remquo(float2, float2, __global int2 *); float3 __ovld remquo(float3, float3, __global int3 *); float4 __ovld remquo(float4, float4, __global int4 *); float8 __ovld remquo(float8, float8, __global int8 *); float16 __ovld remquo(float16, float16, __global int16 *); float __ovld remquo(float, float, __local int *); float2 __ovld remquo(float2, float2, __local int2 *); float3 __ovld remquo(float3, float3, __local int3 *); float4 __ovld remquo(float4, float4, __local int4 *); float8 __ovld remquo(float8, float8, __local int8 *); float16 __ovld remquo(float16, float16, __local int16 *); float __ovld remquo(float, float, __private int *); float2 __ovld remquo(float2, float2, __private int2 *); float3 __ovld remquo(float3, float3, __private int3 *); float4 __ovld remquo(float4, float4, __private int4 *); float8 __ovld remquo(float8, float8, __private int8 *); float16 __ovld remquo(float16, float16, __private int16 *); #ifdef cl_khr_fp64 double __ovld remquo(double, double, __global int *); double2 __ovld remquo(double2, double2, __global int2 *); double3 __ovld remquo(double3, double3, __global int3 *); double4 __ovld remquo(double4, double4, __global int4 *); double8 __ovld remquo(double8, double8, __global int8 *); double16 __ovld remquo(double16, double16, __global int16 *); double __ovld remquo(double, double, __local int *); double2 __ovld remquo(double2, double2, __local int2 *); double3 __ovld remquo(double3, double3, __local int3 *); double4 __ovld remquo(double4, double4, __local int4 *); double8 __ovld remquo(double8, double8, __local int8 *); double16 __ovld remquo(double16, double16, __local int16 *); double __ovld remquo(double, double, __private int *); double2 __ovld remquo(double2, double2, __private int2 *); double3 __ovld remquo(double3, double3, __private int3 *); double4 __ovld remquo(double4, double4, __private int4 *); double8 __ovld remquo(double8, double8, __private int8 *); double16 __ovld remquo(double16, double16, __private int16 *); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld remquo(half, half, __global int *); half2 __ovld remquo(half2, half2, __global int2 *); half3 __ovld remquo(half3, half3, __global int3 *); half4 __ovld remquo(half4, half4, __global int4 *); half8 __ovld remquo(half8, half8, __global int8 *); half16 __ovld remquo(half16, half16, __global int16 *); half __ovld remquo(half, half, __local int *); half2 __ovld remquo(half2, half2, __local int2 *); half3 __ovld remquo(half3, half3, __local int3 *); half4 __ovld remquo(half4, half4, __local int4 *); half8 __ovld remquo(half8, half8, __local int8 *); half16 __ovld remquo(half16, half16, __local int16 *); half __ovld remquo(half, half, __private int *); half2 __ovld remquo(half2, half2, __private int2 *); half3 __ovld remquo(half3, half3, __private int3 *); half4 __ovld remquo(half4, half4, __private int4 *); half8 __ovld remquo(half8, half8, __private int8 *); half16 __ovld remquo(half16, half16, __private int16 *); #endif //cl_khr_fp16 #endif //defined(__opencl_c_named_address_space_builtins) /** * Round to integral value (using round to nearest * even rounding mode) in floating-point format. * Refer to section 7.1 for description of rounding * modes. */ float __ovld __cnfn rint(float); float2 __ovld __cnfn rint(float2); float3 __ovld __cnfn rint(float3); float4 __ovld __cnfn rint(float4); float8 __ovld __cnfn rint(float8); float16 __ovld __cnfn rint(float16); #ifdef cl_khr_fp64 double __ovld __cnfn rint(double); double2 __ovld __cnfn rint(double2); double3 __ovld __cnfn rint(double3); double4 __ovld __cnfn rint(double4); double8 __ovld __cnfn rint(double8); double16 __ovld __cnfn rint(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn rint(half); half2 __ovld __cnfn rint(half2); half3 __ovld __cnfn rint(half3); half4 __ovld __cnfn rint(half4); half8 __ovld __cnfn rint(half8); half16 __ovld __cnfn rint(half16); #endif //cl_khr_fp16 /** * Compute x to the power 1/y. */ float __ovld __cnfn rootn(float, int); float2 __ovld __cnfn rootn(float2, int2); float3 __ovld __cnfn rootn(float3, int3); float4 __ovld __cnfn rootn(float4, int4); float8 __ovld __cnfn rootn(float8, int8); float16 __ovld __cnfn rootn(float16, int16); #ifdef cl_khr_fp64 double __ovld __cnfn rootn(double, int); double2 __ovld __cnfn rootn(double2, int2); double3 __ovld __cnfn rootn(double3, int3); double4 __ovld __cnfn rootn(double4, int4); double8 __ovld __cnfn rootn(double8, int8); double16 __ovld __cnfn rootn(double16, int16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn rootn(half, int); half2 __ovld __cnfn rootn(half2, int2); half3 __ovld __cnfn rootn(half3, int3); half4 __ovld __cnfn rootn(half4, int4); half8 __ovld __cnfn rootn(half8, int8); half16 __ovld __cnfn rootn(half16, int16); #endif //cl_khr_fp16 /** * Return the integral value nearest to x rounding * halfway cases away from zero, regardless of the * current rounding direction. */ float __ovld __cnfn round(float); float2 __ovld __cnfn round(float2); float3 __ovld __cnfn round(float3); float4 __ovld __cnfn round(float4); float8 __ovld __cnfn round(float8); float16 __ovld __cnfn round(float16); #ifdef cl_khr_fp64 double __ovld __cnfn round(double); double2 __ovld __cnfn round(double2); double3 __ovld __cnfn round(double3); double4 __ovld __cnfn round(double4); double8 __ovld __cnfn round(double8); double16 __ovld __cnfn round(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn round(half); half2 __ovld __cnfn round(half2); half3 __ovld __cnfn round(half3); half4 __ovld __cnfn round(half4); half8 __ovld __cnfn round(half8); half16 __ovld __cnfn round(half16); #endif //cl_khr_fp16 /** * Compute inverse square root. */ float __ovld __cnfn rsqrt(float); float2 __ovld __cnfn rsqrt(float2); float3 __ovld __cnfn rsqrt(float3); float4 __ovld __cnfn rsqrt(float4); float8 __ovld __cnfn rsqrt(float8); float16 __ovld __cnfn rsqrt(float16); #ifdef cl_khr_fp64 double __ovld __cnfn rsqrt(double); double2 __ovld __cnfn rsqrt(double2); double3 __ovld __cnfn rsqrt(double3); double4 __ovld __cnfn rsqrt(double4); double8 __ovld __cnfn rsqrt(double8); double16 __ovld __cnfn rsqrt(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn rsqrt(half); half2 __ovld __cnfn rsqrt(half2); half3 __ovld __cnfn rsqrt(half3); half4 __ovld __cnfn rsqrt(half4); half8 __ovld __cnfn rsqrt(half8); half16 __ovld __cnfn rsqrt(half16); #endif //cl_khr_fp16 /** * Compute sine. */ float __ovld __cnfn sin(float); float2 __ovld __cnfn sin(float2); float3 __ovld __cnfn sin(float3); float4 __ovld __cnfn sin(float4); float8 __ovld __cnfn sin(float8); float16 __ovld __cnfn sin(float16); #ifdef cl_khr_fp64 double __ovld __cnfn sin(double); double2 __ovld __cnfn sin(double2); double3 __ovld __cnfn sin(double3); double4 __ovld __cnfn sin(double4); double8 __ovld __cnfn sin(double8); double16 __ovld __cnfn sin(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn sin(half); half2 __ovld __cnfn sin(half2); half3 __ovld __cnfn sin(half3); half4 __ovld __cnfn sin(half4); half8 __ovld __cnfn sin(half8); half16 __ovld __cnfn sin(half16); #endif //cl_khr_fp16 /** * Compute sine and cosine of x. The computed sine * is the return value and computed cosine is returned * in cosval. */ #if defined(__opencl_c_generic_address_space) float __ovld sincos(float, float *); float2 __ovld sincos(float2, float2 *); float3 __ovld sincos(float3, float3 *); float4 __ovld sincos(float4, float4 *); float8 __ovld sincos(float8, float8 *); float16 __ovld sincos(float16, float16 *); #ifdef cl_khr_fp64 double __ovld sincos(double, double *); double2 __ovld sincos(double2, double2 *); double3 __ovld sincos(double3, double3 *); double4 __ovld sincos(double4, double4 *); double8 __ovld sincos(double8, double8 *); double16 __ovld sincos(double16, double16 *); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld sincos(half, half *); half2 __ovld sincos(half2, half2 *); half3 __ovld sincos(half3, half3 *); half4 __ovld sincos(half4, half4 *); half8 __ovld sincos(half8, half8 *); half16 __ovld sincos(half16, half16 *); #endif //cl_khr_fp16 #endif //defined(__opencl_c_generic_address_space) #if defined(__opencl_c_named_address_space_builtins) float __ovld sincos(float, __global float *); float2 __ovld sincos(float2, __global float2 *); float3 __ovld sincos(float3, __global float3 *); float4 __ovld sincos(float4, __global float4 *); float8 __ovld sincos(float8, __global float8 *); float16 __ovld sincos(float16, __global float16 *); float __ovld sincos(float, __local float *); float2 __ovld sincos(float2, __local float2 *); float3 __ovld sincos(float3, __local float3 *); float4 __ovld sincos(float4, __local float4 *); float8 __ovld sincos(float8, __local float8 *); float16 __ovld sincos(float16, __local float16 *); float __ovld sincos(float, __private float *); float2 __ovld sincos(float2, __private float2 *); float3 __ovld sincos(float3, __private float3 *); float4 __ovld sincos(float4, __private float4 *); float8 __ovld sincos(float8, __private float8 *); float16 __ovld sincos(float16, __private float16 *); #ifdef cl_khr_fp64 double __ovld sincos(double, __global double *); double2 __ovld sincos(double2, __global double2 *); double3 __ovld sincos(double3, __global double3 *); double4 __ovld sincos(double4, __global double4 *); double8 __ovld sincos(double8, __global double8 *); double16 __ovld sincos(double16, __global double16 *); double __ovld sincos(double, __local double *); double2 __ovld sincos(double2, __local double2 *); double3 __ovld sincos(double3, __local double3 *); double4 __ovld sincos(double4, __local double4 *); double8 __ovld sincos(double8, __local double8 *); double16 __ovld sincos(double16, __local double16 *); double __ovld sincos(double, __private double *); double2 __ovld sincos(double2, __private double2 *); double3 __ovld sincos(double3, __private double3 *); double4 __ovld sincos(double4, __private double4 *); double8 __ovld sincos(double8, __private double8 *); double16 __ovld sincos(double16, __private double16 *); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld sincos(half, __global half *); half2 __ovld sincos(half2, __global half2 *); half3 __ovld sincos(half3, __global half3 *); half4 __ovld sincos(half4, __global half4 *); half8 __ovld sincos(half8, __global half8 *); half16 __ovld sincos(half16, __global half16 *); half __ovld sincos(half, __local half *); half2 __ovld sincos(half2, __local half2 *); half3 __ovld sincos(half3, __local half3 *); half4 __ovld sincos(half4, __local half4 *); half8 __ovld sincos(half8, __local half8 *); half16 __ovld sincos(half16, __local half16 *); half __ovld sincos(half, __private half *); half2 __ovld sincos(half2, __private half2 *); half3 __ovld sincos(half3, __private half3 *); half4 __ovld sincos(half4, __private half4 *); half8 __ovld sincos(half8, __private half8 *); half16 __ovld sincos(half16, __private half16 *); #endif //cl_khr_fp16 #endif //defined(__opencl_c_named_address_space_builtins) /** * Compute hyperbolic sine. */ float __ovld __cnfn sinh(float); float2 __ovld __cnfn sinh(float2); float3 __ovld __cnfn sinh(float3); float4 __ovld __cnfn sinh(float4); float8 __ovld __cnfn sinh(float8); float16 __ovld __cnfn sinh(float16); #ifdef cl_khr_fp64 double __ovld __cnfn sinh(double); double2 __ovld __cnfn sinh(double2); double3 __ovld __cnfn sinh(double3); double4 __ovld __cnfn sinh(double4); double8 __ovld __cnfn sinh(double8); double16 __ovld __cnfn sinh(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn sinh(half); half2 __ovld __cnfn sinh(half2); half3 __ovld __cnfn sinh(half3); half4 __ovld __cnfn sinh(half4); half8 __ovld __cnfn sinh(half8); half16 __ovld __cnfn sinh(half16); #endif //cl_khr_fp16 /** * Compute sin (PI * x). */ float __ovld __cnfn sinpi(float); float2 __ovld __cnfn sinpi(float2); float3 __ovld __cnfn sinpi(float3); float4 __ovld __cnfn sinpi(float4); float8 __ovld __cnfn sinpi(float8); float16 __ovld __cnfn sinpi(float16); #ifdef cl_khr_fp64 double __ovld __cnfn sinpi(double); double2 __ovld __cnfn sinpi(double2); double3 __ovld __cnfn sinpi(double3); double4 __ovld __cnfn sinpi(double4); double8 __ovld __cnfn sinpi(double8); double16 __ovld __cnfn sinpi(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn sinpi(half); half2 __ovld __cnfn sinpi(half2); half3 __ovld __cnfn sinpi(half3); half4 __ovld __cnfn sinpi(half4); half8 __ovld __cnfn sinpi(half8); half16 __ovld __cnfn sinpi(half16); #endif //cl_khr_fp16 /** * Compute square root. */ float __ovld __cnfn sqrt(float); float2 __ovld __cnfn sqrt(float2); float3 __ovld __cnfn sqrt(float3); float4 __ovld __cnfn sqrt(float4); float8 __ovld __cnfn sqrt(float8); float16 __ovld __cnfn sqrt(float16); #ifdef cl_khr_fp64 double __ovld __cnfn sqrt(double); double2 __ovld __cnfn sqrt(double2); double3 __ovld __cnfn sqrt(double3); double4 __ovld __cnfn sqrt(double4); double8 __ovld __cnfn sqrt(double8); double16 __ovld __cnfn sqrt(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn sqrt(half); half2 __ovld __cnfn sqrt(half2); half3 __ovld __cnfn sqrt(half3); half4 __ovld __cnfn sqrt(half4); half8 __ovld __cnfn sqrt(half8); half16 __ovld __cnfn sqrt(half16); #endif //cl_khr_fp16 /** * Compute tangent. */ float __ovld __cnfn tan(float); float2 __ovld __cnfn tan(float2); float3 __ovld __cnfn tan(float3); float4 __ovld __cnfn tan(float4); float8 __ovld __cnfn tan(float8); float16 __ovld __cnfn tan(float16); #ifdef cl_khr_fp64 double __ovld __cnfn tan(double); double2 __ovld __cnfn tan(double2); double3 __ovld __cnfn tan(double3); double4 __ovld __cnfn tan(double4); double8 __ovld __cnfn tan(double8); double16 __ovld __cnfn tan(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn tan(half); half2 __ovld __cnfn tan(half2); half3 __ovld __cnfn tan(half3); half4 __ovld __cnfn tan(half4); half8 __ovld __cnfn tan(half8); half16 __ovld __cnfn tan(half16); #endif //cl_khr_fp16 /** * Compute hyperbolic tangent. */ float __ovld __cnfn tanh(float); float2 __ovld __cnfn tanh(float2); float3 __ovld __cnfn tanh(float3); float4 __ovld __cnfn tanh(float4); float8 __ovld __cnfn tanh(float8); float16 __ovld __cnfn tanh(float16); #ifdef cl_khr_fp64 double __ovld __cnfn tanh(double); double2 __ovld __cnfn tanh(double2); double3 __ovld __cnfn tanh(double3); double4 __ovld __cnfn tanh(double4); double8 __ovld __cnfn tanh(double8); double16 __ovld __cnfn tanh(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn tanh(half); half2 __ovld __cnfn tanh(half2); half3 __ovld __cnfn tanh(half3); half4 __ovld __cnfn tanh(half4); half8 __ovld __cnfn tanh(half8); half16 __ovld __cnfn tanh(half16); #endif //cl_khr_fp16 /** * Compute tan (PI * x). */ float __ovld __cnfn tanpi(float); float2 __ovld __cnfn tanpi(float2); float3 __ovld __cnfn tanpi(float3); float4 __ovld __cnfn tanpi(float4); float8 __ovld __cnfn tanpi(float8); float16 __ovld __cnfn tanpi(float16); #ifdef cl_khr_fp64 double __ovld __cnfn tanpi(double); double2 __ovld __cnfn tanpi(double2); double3 __ovld __cnfn tanpi(double3); double4 __ovld __cnfn tanpi(double4); double8 __ovld __cnfn tanpi(double8); double16 __ovld __cnfn tanpi(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn tanpi(half); half2 __ovld __cnfn tanpi(half2); half3 __ovld __cnfn tanpi(half3); half4 __ovld __cnfn tanpi(half4); half8 __ovld __cnfn tanpi(half8); half16 __ovld __cnfn tanpi(half16); #endif //cl_khr_fp16 /** * Compute the gamma function. */ float __ovld __cnfn tgamma(float); float2 __ovld __cnfn tgamma(float2); float3 __ovld __cnfn tgamma(float3); float4 __ovld __cnfn tgamma(float4); float8 __ovld __cnfn tgamma(float8); float16 __ovld __cnfn tgamma(float16); #ifdef cl_khr_fp64 double __ovld __cnfn tgamma(double); double2 __ovld __cnfn tgamma(double2); double3 __ovld __cnfn tgamma(double3); double4 __ovld __cnfn tgamma(double4); double8 __ovld __cnfn tgamma(double8); double16 __ovld __cnfn tgamma(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn tgamma(half); half2 __ovld __cnfn tgamma(half2); half3 __ovld __cnfn tgamma(half3); half4 __ovld __cnfn tgamma(half4); half8 __ovld __cnfn tgamma(half8); half16 __ovld __cnfn tgamma(half16); #endif //cl_khr_fp16 /** * Round to integral value using the round to zero * rounding mode. */ float __ovld __cnfn trunc(float); float2 __ovld __cnfn trunc(float2); float3 __ovld __cnfn trunc(float3); float4 __ovld __cnfn trunc(float4); float8 __ovld __cnfn trunc(float8); float16 __ovld __cnfn trunc(float16); #ifdef cl_khr_fp64 double __ovld __cnfn trunc(double); double2 __ovld __cnfn trunc(double2); double3 __ovld __cnfn trunc(double3); double4 __ovld __cnfn trunc(double4); double8 __ovld __cnfn trunc(double8); double16 __ovld __cnfn trunc(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn trunc(half); half2 __ovld __cnfn trunc(half2); half3 __ovld __cnfn trunc(half3); half4 __ovld __cnfn trunc(half4); half8 __ovld __cnfn trunc(half8); half16 __ovld __cnfn trunc(half16); #endif //cl_khr_fp16 /** * Compute cosine. x must be in the range -2^16 ... +2^16. */ float __ovld __cnfn half_cos(float); float2 __ovld __cnfn half_cos(float2); float3 __ovld __cnfn half_cos(float3); float4 __ovld __cnfn half_cos(float4); float8 __ovld __cnfn half_cos(float8); float16 __ovld __cnfn half_cos(float16); /** * Compute x / y. */ float __ovld __cnfn half_divide(float, float); float2 __ovld __cnfn half_divide(float2, float2); float3 __ovld __cnfn half_divide(float3, float3); float4 __ovld __cnfn half_divide(float4, float4); float8 __ovld __cnfn half_divide(float8, float8); float16 __ovld __cnfn half_divide(float16, float16); /** * Compute the base- e exponential of x. */ float __ovld __cnfn half_exp(float); float2 __ovld __cnfn half_exp(float2); float3 __ovld __cnfn half_exp(float3); float4 __ovld __cnfn half_exp(float4); float8 __ovld __cnfn half_exp(float8); float16 __ovld __cnfn half_exp(float16); /** * Compute the base- 2 exponential of x. */ float __ovld __cnfn half_exp2(float); float2 __ovld __cnfn half_exp2(float2); float3 __ovld __cnfn half_exp2(float3); float4 __ovld __cnfn half_exp2(float4); float8 __ovld __cnfn half_exp2(float8); float16 __ovld __cnfn half_exp2(float16); /** * Compute the base- 10 exponential of x. */ float __ovld __cnfn half_exp10(float); float2 __ovld __cnfn half_exp10(float2); float3 __ovld __cnfn half_exp10(float3); float4 __ovld __cnfn half_exp10(float4); float8 __ovld __cnfn half_exp10(float8); float16 __ovld __cnfn half_exp10(float16); /** * Compute natural logarithm. */ float __ovld __cnfn half_log(float); float2 __ovld __cnfn half_log(float2); float3 __ovld __cnfn half_log(float3); float4 __ovld __cnfn half_log(float4); float8 __ovld __cnfn half_log(float8); float16 __ovld __cnfn half_log(float16); /** * Compute a base 2 logarithm. */ float __ovld __cnfn half_log2(float); float2 __ovld __cnfn half_log2(float2); float3 __ovld __cnfn half_log2(float3); float4 __ovld __cnfn half_log2(float4); float8 __ovld __cnfn half_log2(float8); float16 __ovld __cnfn half_log2(float16); /** * Compute a base 10 logarithm. */ float __ovld __cnfn half_log10(float); float2 __ovld __cnfn half_log10(float2); float3 __ovld __cnfn half_log10(float3); float4 __ovld __cnfn half_log10(float4); float8 __ovld __cnfn half_log10(float8); float16 __ovld __cnfn half_log10(float16); /** * Compute x to the power y, where x is >= 0. */ float __ovld __cnfn half_powr(float, float); float2 __ovld __cnfn half_powr(float2, float2); float3 __ovld __cnfn half_powr(float3, float3); float4 __ovld __cnfn half_powr(float4, float4); float8 __ovld __cnfn half_powr(float8, float8); float16 __ovld __cnfn half_powr(float16, float16); /** * Compute reciprocal. */ float __ovld __cnfn half_recip(float); float2 __ovld __cnfn half_recip(float2); float3 __ovld __cnfn half_recip(float3); float4 __ovld __cnfn half_recip(float4); float8 __ovld __cnfn half_recip(float8); float16 __ovld __cnfn half_recip(float16); /** * Compute inverse square root. */ float __ovld __cnfn half_rsqrt(float); float2 __ovld __cnfn half_rsqrt(float2); float3 __ovld __cnfn half_rsqrt(float3); float4 __ovld __cnfn half_rsqrt(float4); float8 __ovld __cnfn half_rsqrt(float8); float16 __ovld __cnfn half_rsqrt(float16); /** * Compute sine. x must be in the range -2^16 ... +2^16. */ float __ovld __cnfn half_sin(float); float2 __ovld __cnfn half_sin(float2); float3 __ovld __cnfn half_sin(float3); float4 __ovld __cnfn half_sin(float4); float8 __ovld __cnfn half_sin(float8); float16 __ovld __cnfn half_sin(float16); /** * Compute square root. */ float __ovld __cnfn half_sqrt(float); float2 __ovld __cnfn half_sqrt(float2); float3 __ovld __cnfn half_sqrt(float3); float4 __ovld __cnfn half_sqrt(float4); float8 __ovld __cnfn half_sqrt(float8); float16 __ovld __cnfn half_sqrt(float16); /** * Compute tangent. x must be in the range -216 ... +216. */ float __ovld __cnfn half_tan(float); float2 __ovld __cnfn half_tan(float2); float3 __ovld __cnfn half_tan(float3); float4 __ovld __cnfn half_tan(float4); float8 __ovld __cnfn half_tan(float8); float16 __ovld __cnfn half_tan(float16); /** * Compute cosine over an implementation-defined range. * The maximum error is implementation-defined. */ float __ovld __cnfn native_cos(float); float2 __ovld __cnfn native_cos(float2); float3 __ovld __cnfn native_cos(float3); float4 __ovld __cnfn native_cos(float4); float8 __ovld __cnfn native_cos(float8); float16 __ovld __cnfn native_cos(float16); /** * Compute x / y over an implementation-defined range. * The maximum error is implementation-defined. */ float __ovld __cnfn native_divide(float, float); float2 __ovld __cnfn native_divide(float2, float2); float3 __ovld __cnfn native_divide(float3, float3); float4 __ovld __cnfn native_divide(float4, float4); float8 __ovld __cnfn native_divide(float8, float8); float16 __ovld __cnfn native_divide(float16, float16); /** * Compute the base- e exponential of x over an * implementation-defined range. The maximum error is * implementation-defined. */ float __ovld __cnfn native_exp(float); float2 __ovld __cnfn native_exp(float2); float3 __ovld __cnfn native_exp(float3); float4 __ovld __cnfn native_exp(float4); float8 __ovld __cnfn native_exp(float8); float16 __ovld __cnfn native_exp(float16); /** * Compute the base- 2 exponential of x over an * implementation-defined range. The maximum error is * implementation-defined. */ float __ovld __cnfn native_exp2(float); float2 __ovld __cnfn native_exp2(float2); float3 __ovld __cnfn native_exp2(float3); float4 __ovld __cnfn native_exp2(float4); float8 __ovld __cnfn native_exp2(float8); float16 __ovld __cnfn native_exp2(float16); /** * Compute the base- 10 exponential of x over an * implementation-defined range. The maximum error is * implementation-defined. */ float __ovld __cnfn native_exp10(float); float2 __ovld __cnfn native_exp10(float2); float3 __ovld __cnfn native_exp10(float3); float4 __ovld __cnfn native_exp10(float4); float8 __ovld __cnfn native_exp10(float8); float16 __ovld __cnfn native_exp10(float16); /** * Compute natural logarithm over an implementationdefined * range. The maximum error is implementation * defined. */ float __ovld __cnfn native_log(float); float2 __ovld __cnfn native_log(float2); float3 __ovld __cnfn native_log(float3); float4 __ovld __cnfn native_log(float4); float8 __ovld __cnfn native_log(float8); float16 __ovld __cnfn native_log(float16); /** * Compute a base 2 logarithm over an implementationdefined * range. The maximum error is implementationdefined. */ float __ovld __cnfn native_log2(float); float2 __ovld __cnfn native_log2(float2); float3 __ovld __cnfn native_log2(float3); float4 __ovld __cnfn native_log2(float4); float8 __ovld __cnfn native_log2(float8); float16 __ovld __cnfn native_log2(float16); /** * Compute a base 10 logarithm over an implementationdefined * range. The maximum error is implementationdefined. */ float __ovld __cnfn native_log10(float); float2 __ovld __cnfn native_log10(float2); float3 __ovld __cnfn native_log10(float3); float4 __ovld __cnfn native_log10(float4); float8 __ovld __cnfn native_log10(float8); float16 __ovld __cnfn native_log10(float16); /** * Compute x to the power y, where x is >= 0. The range of * x and y are implementation-defined. The maximum error * is implementation-defined. */ float __ovld __cnfn native_powr(float, float); float2 __ovld __cnfn native_powr(float2, float2); float3 __ovld __cnfn native_powr(float3, float3); float4 __ovld __cnfn native_powr(float4, float4); float8 __ovld __cnfn native_powr(float8, float8); float16 __ovld __cnfn native_powr(float16, float16); /** * Compute reciprocal over an implementation-defined * range. The maximum error is implementation-defined. */ float __ovld __cnfn native_recip(float); float2 __ovld __cnfn native_recip(float2); float3 __ovld __cnfn native_recip(float3); float4 __ovld __cnfn native_recip(float4); float8 __ovld __cnfn native_recip(float8); float16 __ovld __cnfn native_recip(float16); /** * Compute inverse square root over an implementationdefined * range. The maximum error is implementationdefined. */ float __ovld __cnfn native_rsqrt(float); float2 __ovld __cnfn native_rsqrt(float2); float3 __ovld __cnfn native_rsqrt(float3); float4 __ovld __cnfn native_rsqrt(float4); float8 __ovld __cnfn native_rsqrt(float8); float16 __ovld __cnfn native_rsqrt(float16); /** * Compute sine over an implementation-defined range. * The maximum error is implementation-defined. */ float __ovld __cnfn native_sin(float); float2 __ovld __cnfn native_sin(float2); float3 __ovld __cnfn native_sin(float3); float4 __ovld __cnfn native_sin(float4); float8 __ovld __cnfn native_sin(float8); float16 __ovld __cnfn native_sin(float16); /** * Compute square root over an implementation-defined * range. The maximum error is implementation-defined. */ float __ovld __cnfn native_sqrt(float); float2 __ovld __cnfn native_sqrt(float2); float3 __ovld __cnfn native_sqrt(float3); float4 __ovld __cnfn native_sqrt(float4); float8 __ovld __cnfn native_sqrt(float8); float16 __ovld __cnfn native_sqrt(float16); /** * Compute tangent over an implementation-defined range. * The maximum error is implementation-defined. */ float __ovld __cnfn native_tan(float); float2 __ovld __cnfn native_tan(float2); float3 __ovld __cnfn native_tan(float3); float4 __ovld __cnfn native_tan(float4); float8 __ovld __cnfn native_tan(float8); float16 __ovld __cnfn native_tan(float16); // OpenCL v1.1 s6.11.3, v1.2 s6.12.3, v2.0 s6.13.3 - Integer Functions /** * Returns | x |. */ uchar __ovld __cnfn abs(char); uchar __ovld __cnfn abs(uchar); uchar2 __ovld __cnfn abs(char2); uchar2 __ovld __cnfn abs(uchar2); uchar3 __ovld __cnfn abs(char3); uchar3 __ovld __cnfn abs(uchar3); uchar4 __ovld __cnfn abs(char4); uchar4 __ovld __cnfn abs(uchar4); uchar8 __ovld __cnfn abs(char8); uchar8 __ovld __cnfn abs(uchar8); uchar16 __ovld __cnfn abs(char16); uchar16 __ovld __cnfn abs(uchar16); ushort __ovld __cnfn abs(short); ushort __ovld __cnfn abs(ushort); ushort2 __ovld __cnfn abs(short2); ushort2 __ovld __cnfn abs(ushort2); ushort3 __ovld __cnfn abs(short3); ushort3 __ovld __cnfn abs(ushort3); ushort4 __ovld __cnfn abs(short4); ushort4 __ovld __cnfn abs(ushort4); ushort8 __ovld __cnfn abs(short8); ushort8 __ovld __cnfn abs(ushort8); ushort16 __ovld __cnfn abs(short16); ushort16 __ovld __cnfn abs(ushort16); uint __ovld __cnfn abs(int); uint __ovld __cnfn abs(uint); uint2 __ovld __cnfn abs(int2); uint2 __ovld __cnfn abs(uint2); uint3 __ovld __cnfn abs(int3); uint3 __ovld __cnfn abs(uint3); uint4 __ovld __cnfn abs(int4); uint4 __ovld __cnfn abs(uint4); uint8 __ovld __cnfn abs(int8); uint8 __ovld __cnfn abs(uint8); uint16 __ovld __cnfn abs(int16); uint16 __ovld __cnfn abs(uint16); ulong __ovld __cnfn abs(long); ulong __ovld __cnfn abs(ulong); ulong2 __ovld __cnfn abs(long2); ulong2 __ovld __cnfn abs(ulong2); ulong3 __ovld __cnfn abs(long3); ulong3 __ovld __cnfn abs(ulong3); ulong4 __ovld __cnfn abs(long4); ulong4 __ovld __cnfn abs(ulong4); ulong8 __ovld __cnfn abs(long8); ulong8 __ovld __cnfn abs(ulong8); ulong16 __ovld __cnfn abs(long16); ulong16 __ovld __cnfn abs(ulong16); /** * Returns | x - y | without modulo overflow. */ uchar __ovld __cnfn abs_diff(char, char); uchar __ovld __cnfn abs_diff(uchar, uchar); uchar2 __ovld __cnfn abs_diff(char2, char2); uchar2 __ovld __cnfn abs_diff(uchar2, uchar2); uchar3 __ovld __cnfn abs_diff(char3, char3); uchar3 __ovld __cnfn abs_diff(uchar3, uchar3); uchar4 __ovld __cnfn abs_diff(char4, char4); uchar4 __ovld __cnfn abs_diff(uchar4, uchar4); uchar8 __ovld __cnfn abs_diff(char8, char8); uchar8 __ovld __cnfn abs_diff(uchar8, uchar8); uchar16 __ovld __cnfn abs_diff(char16, char16); uchar16 __ovld __cnfn abs_diff(uchar16, uchar16); ushort __ovld __cnfn abs_diff(short, short); ushort __ovld __cnfn abs_diff(ushort, ushort); ushort2 __ovld __cnfn abs_diff(short2, short2); ushort2 __ovld __cnfn abs_diff(ushort2, ushort2); ushort3 __ovld __cnfn abs_diff(short3, short3); ushort3 __ovld __cnfn abs_diff(ushort3, ushort3); ushort4 __ovld __cnfn abs_diff(short4, short4); ushort4 __ovld __cnfn abs_diff(ushort4, ushort4); ushort8 __ovld __cnfn abs_diff(short8, short8); ushort8 __ovld __cnfn abs_diff(ushort8, ushort8); ushort16 __ovld __cnfn abs_diff(short16, short16); ushort16 __ovld __cnfn abs_diff(ushort16, ushort16); uint __ovld __cnfn abs_diff(int, int); uint __ovld __cnfn abs_diff(uint, uint); uint2 __ovld __cnfn abs_diff(int2, int2); uint2 __ovld __cnfn abs_diff(uint2, uint2); uint3 __ovld __cnfn abs_diff(int3, int3); uint3 __ovld __cnfn abs_diff(uint3, uint3); uint4 __ovld __cnfn abs_diff(int4, int4); uint4 __ovld __cnfn abs_diff(uint4, uint4); uint8 __ovld __cnfn abs_diff(int8, int8); uint8 __ovld __cnfn abs_diff(uint8, uint8); uint16 __ovld __cnfn abs_diff(int16, int16); uint16 __ovld __cnfn abs_diff(uint16, uint16); ulong __ovld __cnfn abs_diff(long, long); ulong __ovld __cnfn abs_diff(ulong, ulong); ulong2 __ovld __cnfn abs_diff(long2, long2); ulong2 __ovld __cnfn abs_diff(ulong2, ulong2); ulong3 __ovld __cnfn abs_diff(long3, long3); ulong3 __ovld __cnfn abs_diff(ulong3, ulong3); ulong4 __ovld __cnfn abs_diff(long4, long4); ulong4 __ovld __cnfn abs_diff(ulong4, ulong4); ulong8 __ovld __cnfn abs_diff(long8, long8); ulong8 __ovld __cnfn abs_diff(ulong8, ulong8); ulong16 __ovld __cnfn abs_diff(long16, long16); ulong16 __ovld __cnfn abs_diff(ulong16, ulong16); /** * Returns x + y and saturates the result. */ char __ovld __cnfn add_sat(char, char); uchar __ovld __cnfn add_sat(uchar, uchar); char2 __ovld __cnfn add_sat(char2, char2); uchar2 __ovld __cnfn add_sat(uchar2, uchar2); char3 __ovld __cnfn add_sat(char3, char3); uchar3 __ovld __cnfn add_sat(uchar3, uchar3); char4 __ovld __cnfn add_sat(char4, char4); uchar4 __ovld __cnfn add_sat(uchar4, uchar4); char8 __ovld __cnfn add_sat(char8, char8); uchar8 __ovld __cnfn add_sat(uchar8, uchar8); char16 __ovld __cnfn add_sat(char16, char16); uchar16 __ovld __cnfn add_sat(uchar16, uchar16); short __ovld __cnfn add_sat(short, short); ushort __ovld __cnfn add_sat(ushort, ushort); short2 __ovld __cnfn add_sat(short2, short2); ushort2 __ovld __cnfn add_sat(ushort2, ushort2); short3 __ovld __cnfn add_sat(short3, short3); ushort3 __ovld __cnfn add_sat(ushort3, ushort3); short4 __ovld __cnfn add_sat(short4, short4); ushort4 __ovld __cnfn add_sat(ushort4, ushort4); short8 __ovld __cnfn add_sat(short8, short8); ushort8 __ovld __cnfn add_sat(ushort8, ushort8); short16 __ovld __cnfn add_sat(short16, short16); ushort16 __ovld __cnfn add_sat(ushort16, ushort16); int __ovld __cnfn add_sat(int, int); uint __ovld __cnfn add_sat(uint, uint); int2 __ovld __cnfn add_sat(int2, int2); uint2 __ovld __cnfn add_sat(uint2, uint2); int3 __ovld __cnfn add_sat(int3, int3); uint3 __ovld __cnfn add_sat(uint3, uint3); int4 __ovld __cnfn add_sat(int4, int4); uint4 __ovld __cnfn add_sat(uint4, uint4); int8 __ovld __cnfn add_sat(int8, int8); uint8 __ovld __cnfn add_sat(uint8, uint8); int16 __ovld __cnfn add_sat(int16, int16); uint16 __ovld __cnfn add_sat(uint16, uint16); long __ovld __cnfn add_sat(long, long); ulong __ovld __cnfn add_sat(ulong, ulong); long2 __ovld __cnfn add_sat(long2, long2); ulong2 __ovld __cnfn add_sat(ulong2, ulong2); long3 __ovld __cnfn add_sat(long3, long3); ulong3 __ovld __cnfn add_sat(ulong3, ulong3); long4 __ovld __cnfn add_sat(long4, long4); ulong4 __ovld __cnfn add_sat(ulong4, ulong4); long8 __ovld __cnfn add_sat(long8, long8); ulong8 __ovld __cnfn add_sat(ulong8, ulong8); long16 __ovld __cnfn add_sat(long16, long16); ulong16 __ovld __cnfn add_sat(ulong16, ulong16); /** * Returns (x + y) >> 1. The intermediate sum does * not modulo overflow. */ char __ovld __cnfn hadd(char, char); uchar __ovld __cnfn hadd(uchar, uchar); char2 __ovld __cnfn hadd(char2, char2); uchar2 __ovld __cnfn hadd(uchar2, uchar2); char3 __ovld __cnfn hadd(char3, char3); uchar3 __ovld __cnfn hadd(uchar3, uchar3); char4 __ovld __cnfn hadd(char4, char4); uchar4 __ovld __cnfn hadd(uchar4, uchar4); char8 __ovld __cnfn hadd(char8, char8); uchar8 __ovld __cnfn hadd(uchar8, uchar8); char16 __ovld __cnfn hadd(char16, char16); uchar16 __ovld __cnfn hadd(uchar16, uchar16); short __ovld __cnfn hadd(short, short); ushort __ovld __cnfn hadd(ushort, ushort); short2 __ovld __cnfn hadd(short2, short2); ushort2 __ovld __cnfn hadd(ushort2, ushort2); short3 __ovld __cnfn hadd(short3, short3); ushort3 __ovld __cnfn hadd(ushort3, ushort3); short4 __ovld __cnfn hadd(short4, short4); ushort4 __ovld __cnfn hadd(ushort4, ushort4); short8 __ovld __cnfn hadd(short8, short8); ushort8 __ovld __cnfn hadd(ushort8, ushort8); short16 __ovld __cnfn hadd(short16, short16); ushort16 __ovld __cnfn hadd(ushort16, ushort16); int __ovld __cnfn hadd(int, int); uint __ovld __cnfn hadd(uint, uint); int2 __ovld __cnfn hadd(int2, int2); uint2 __ovld __cnfn hadd(uint2, uint2); int3 __ovld __cnfn hadd(int3, int3); uint3 __ovld __cnfn hadd(uint3, uint3); int4 __ovld __cnfn hadd(int4, int4); uint4 __ovld __cnfn hadd(uint4, uint4); int8 __ovld __cnfn hadd(int8, int8); uint8 __ovld __cnfn hadd(uint8, uint8); int16 __ovld __cnfn hadd(int16, int16); uint16 __ovld __cnfn hadd(uint16, uint16); long __ovld __cnfn hadd(long, long); ulong __ovld __cnfn hadd(ulong, ulong); long2 __ovld __cnfn hadd(long2, long2); ulong2 __ovld __cnfn hadd(ulong2, ulong2); long3 __ovld __cnfn hadd(long3, long3); ulong3 __ovld __cnfn hadd(ulong3, ulong3); long4 __ovld __cnfn hadd(long4, long4); ulong4 __ovld __cnfn hadd(ulong4, ulong4); long8 __ovld __cnfn hadd(long8, long8); ulong8 __ovld __cnfn hadd(ulong8, ulong8); long16 __ovld __cnfn hadd(long16, long16); ulong16 __ovld __cnfn hadd(ulong16, ulong16); /** * Returns (x + y + 1) >> 1. The intermediate sum * does not modulo overflow. */ char __ovld __cnfn rhadd(char, char); uchar __ovld __cnfn rhadd(uchar, uchar); char2 __ovld __cnfn rhadd(char2, char2); uchar2 __ovld __cnfn rhadd(uchar2, uchar2); char3 __ovld __cnfn rhadd(char3, char3); uchar3 __ovld __cnfn rhadd(uchar3, uchar3); char4 __ovld __cnfn rhadd(char4, char4); uchar4 __ovld __cnfn rhadd(uchar4, uchar4); char8 __ovld __cnfn rhadd(char8, char8); uchar8 __ovld __cnfn rhadd(uchar8, uchar8); char16 __ovld __cnfn rhadd(char16, char16); uchar16 __ovld __cnfn rhadd(uchar16, uchar16); short __ovld __cnfn rhadd(short, short); ushort __ovld __cnfn rhadd(ushort, ushort); short2 __ovld __cnfn rhadd(short2, short2); ushort2 __ovld __cnfn rhadd(ushort2, ushort2); short3 __ovld __cnfn rhadd(short3, short3); ushort3 __ovld __cnfn rhadd(ushort3, ushort3); short4 __ovld __cnfn rhadd(short4, short4); ushort4 __ovld __cnfn rhadd(ushort4, ushort4); short8 __ovld __cnfn rhadd(short8, short8); ushort8 __ovld __cnfn rhadd(ushort8, ushort8); short16 __ovld __cnfn rhadd(short16, short16); ushort16 __ovld __cnfn rhadd(ushort16, ushort16); int __ovld __cnfn rhadd(int, int); uint __ovld __cnfn rhadd(uint, uint); int2 __ovld __cnfn rhadd(int2, int2); uint2 __ovld __cnfn rhadd(uint2, uint2); int3 __ovld __cnfn rhadd(int3, int3); uint3 __ovld __cnfn rhadd(uint3, uint3); int4 __ovld __cnfn rhadd(int4, int4); uint4 __ovld __cnfn rhadd(uint4, uint4); int8 __ovld __cnfn rhadd(int8, int8); uint8 __ovld __cnfn rhadd(uint8, uint8); int16 __ovld __cnfn rhadd(int16, int16); uint16 __ovld __cnfn rhadd(uint16, uint16); long __ovld __cnfn rhadd(long, long); ulong __ovld __cnfn rhadd(ulong, ulong); long2 __ovld __cnfn rhadd(long2, long2); ulong2 __ovld __cnfn rhadd(ulong2, ulong2); long3 __ovld __cnfn rhadd(long3, long3); ulong3 __ovld __cnfn rhadd(ulong3, ulong3); long4 __ovld __cnfn rhadd(long4, long4); ulong4 __ovld __cnfn rhadd(ulong4, ulong4); long8 __ovld __cnfn rhadd(long8, long8); ulong8 __ovld __cnfn rhadd(ulong8, ulong8); long16 __ovld __cnfn rhadd(long16, long16); ulong16 __ovld __cnfn rhadd(ulong16, ulong16); /** * Returns min(max(x, minval), maxval). * Results are undefined if minval > maxval. */ char __ovld __cnfn clamp(char, char, char); uchar __ovld __cnfn clamp(uchar, uchar, uchar); char2 __ovld __cnfn clamp(char2, char2, char2); uchar2 __ovld __cnfn clamp(uchar2, uchar2, uchar2); char3 __ovld __cnfn clamp(char3, char3, char3); uchar3 __ovld __cnfn clamp(uchar3, uchar3, uchar3); char4 __ovld __cnfn clamp(char4, char4, char4); uchar4 __ovld __cnfn clamp(uchar4, uchar4, uchar4); char8 __ovld __cnfn clamp(char8, char8, char8); uchar8 __ovld __cnfn clamp(uchar8, uchar8, uchar8); char16 __ovld __cnfn clamp(char16, char16, char16); uchar16 __ovld __cnfn clamp(uchar16, uchar16, uchar16); short __ovld __cnfn clamp(short, short, short); ushort __ovld __cnfn clamp(ushort, ushort, ushort); short2 __ovld __cnfn clamp(short2, short2, short2); ushort2 __ovld __cnfn clamp(ushort2, ushort2, ushort2); short3 __ovld __cnfn clamp(short3, short3, short3); ushort3 __ovld __cnfn clamp(ushort3, ushort3, ushort3); short4 __ovld __cnfn clamp(short4, short4, short4); ushort4 __ovld __cnfn clamp(ushort4, ushort4, ushort4); short8 __ovld __cnfn clamp(short8, short8, short8); ushort8 __ovld __cnfn clamp(ushort8, ushort8, ushort8); short16 __ovld __cnfn clamp(short16, short16, short16); ushort16 __ovld __cnfn clamp(ushort16, ushort16, ushort16); int __ovld __cnfn clamp(int, int, int); uint __ovld __cnfn clamp(uint, uint, uint); int2 __ovld __cnfn clamp(int2, int2, int2); uint2 __ovld __cnfn clamp(uint2, uint2, uint2); int3 __ovld __cnfn clamp(int3, int3, int3); uint3 __ovld __cnfn clamp(uint3, uint3, uint3); int4 __ovld __cnfn clamp(int4, int4, int4); uint4 __ovld __cnfn clamp(uint4, uint4, uint4); int8 __ovld __cnfn clamp(int8, int8, int8); uint8 __ovld __cnfn clamp(uint8, uint8, uint8); int16 __ovld __cnfn clamp(int16, int16, int16); uint16 __ovld __cnfn clamp(uint16, uint16, uint16); long __ovld __cnfn clamp(long, long, long); ulong __ovld __cnfn clamp(ulong, ulong, ulong); long2 __ovld __cnfn clamp(long2, long2, long2); ulong2 __ovld __cnfn clamp(ulong2, ulong2, ulong2); long3 __ovld __cnfn clamp(long3, long3, long3); ulong3 __ovld __cnfn clamp(ulong3, ulong3, ulong3); long4 __ovld __cnfn clamp(long4, long4, long4); ulong4 __ovld __cnfn clamp(ulong4, ulong4, ulong4); long8 __ovld __cnfn clamp(long8, long8, long8); ulong8 __ovld __cnfn clamp(ulong8, ulong8, ulong8); long16 __ovld __cnfn clamp(long16, long16, long16); ulong16 __ovld __cnfn clamp(ulong16, ulong16, ulong16); char2 __ovld __cnfn clamp(char2, char, char); uchar2 __ovld __cnfn clamp(uchar2, uchar, uchar); char3 __ovld __cnfn clamp(char3, char, char); uchar3 __ovld __cnfn clamp(uchar3, uchar, uchar); char4 __ovld __cnfn clamp(char4, char, char); uchar4 __ovld __cnfn clamp(uchar4, uchar, uchar); char8 __ovld __cnfn clamp(char8, char, char); uchar8 __ovld __cnfn clamp(uchar8, uchar, uchar); char16 __ovld __cnfn clamp(char16, char, char); uchar16 __ovld __cnfn clamp(uchar16, uchar, uchar); short2 __ovld __cnfn clamp(short2, short, short); ushort2 __ovld __cnfn clamp(ushort2, ushort, ushort); short3 __ovld __cnfn clamp(short3, short, short); ushort3 __ovld __cnfn clamp(ushort3, ushort, ushort); short4 __ovld __cnfn clamp(short4, short, short); ushort4 __ovld __cnfn clamp(ushort4, ushort, ushort); short8 __ovld __cnfn clamp(short8, short, short); ushort8 __ovld __cnfn clamp(ushort8, ushort, ushort); short16 __ovld __cnfn clamp(short16, short, short); ushort16 __ovld __cnfn clamp(ushort16, ushort, ushort); int2 __ovld __cnfn clamp(int2, int, int); uint2 __ovld __cnfn clamp(uint2, uint, uint); int3 __ovld __cnfn clamp(int3, int, int); uint3 __ovld __cnfn clamp(uint3, uint, uint); int4 __ovld __cnfn clamp(int4, int, int); uint4 __ovld __cnfn clamp(uint4, uint, uint); int8 __ovld __cnfn clamp(int8, int, int); uint8 __ovld __cnfn clamp(uint8, uint, uint); int16 __ovld __cnfn clamp(int16, int, int); uint16 __ovld __cnfn clamp(uint16, uint, uint); long2 __ovld __cnfn clamp(long2, long, long); ulong2 __ovld __cnfn clamp(ulong2, ulong, ulong); long3 __ovld __cnfn clamp(long3, long, long); ulong3 __ovld __cnfn clamp(ulong3, ulong, ulong); long4 __ovld __cnfn clamp(long4, long, long); ulong4 __ovld __cnfn clamp(ulong4, ulong, ulong); long8 __ovld __cnfn clamp(long8, long, long); ulong8 __ovld __cnfn clamp(ulong8, ulong, ulong); long16 __ovld __cnfn clamp(long16, long, long); ulong16 __ovld __cnfn clamp(ulong16, ulong, ulong); /** * Returns the number of leading 0-bits in x, starting * at the most significant bit position. */ char __ovld __cnfn clz(char); uchar __ovld __cnfn clz(uchar); char2 __ovld __cnfn clz(char2); uchar2 __ovld __cnfn clz(uchar2); char3 __ovld __cnfn clz(char3); uchar3 __ovld __cnfn clz(uchar3); char4 __ovld __cnfn clz(char4); uchar4 __ovld __cnfn clz(uchar4); char8 __ovld __cnfn clz(char8); uchar8 __ovld __cnfn clz(uchar8); char16 __ovld __cnfn clz(char16); uchar16 __ovld __cnfn clz(uchar16); short __ovld __cnfn clz(short); ushort __ovld __cnfn clz(ushort); short2 __ovld __cnfn clz(short2); ushort2 __ovld __cnfn clz(ushort2); short3 __ovld __cnfn clz(short3); ushort3 __ovld __cnfn clz(ushort3); short4 __ovld __cnfn clz(short4); ushort4 __ovld __cnfn clz(ushort4); short8 __ovld __cnfn clz(short8); ushort8 __ovld __cnfn clz(ushort8); short16 __ovld __cnfn clz(short16); ushort16 __ovld __cnfn clz(ushort16); int __ovld __cnfn clz(int); uint __ovld __cnfn clz(uint); int2 __ovld __cnfn clz(int2); uint2 __ovld __cnfn clz(uint2); int3 __ovld __cnfn clz(int3); uint3 __ovld __cnfn clz(uint3); int4 __ovld __cnfn clz(int4); uint4 __ovld __cnfn clz(uint4); int8 __ovld __cnfn clz(int8); uint8 __ovld __cnfn clz(uint8); int16 __ovld __cnfn clz(int16); uint16 __ovld __cnfn clz(uint16); long __ovld __cnfn clz(long); ulong __ovld __cnfn clz(ulong); long2 __ovld __cnfn clz(long2); ulong2 __ovld __cnfn clz(ulong2); long3 __ovld __cnfn clz(long3); ulong3 __ovld __cnfn clz(ulong3); long4 __ovld __cnfn clz(long4); ulong4 __ovld __cnfn clz(ulong4); long8 __ovld __cnfn clz(long8); ulong8 __ovld __cnfn clz(ulong8); long16 __ovld __cnfn clz(long16); ulong16 __ovld __cnfn clz(ulong16); /** * Returns the count of trailing 0-bits in x. If x is 0, * returns the size in bits of the type of x or * component type of x, if x is a vector. */ #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) char __ovld __cnfn ctz(char); uchar __ovld __cnfn ctz(uchar); char2 __ovld __cnfn ctz(char2); uchar2 __ovld __cnfn ctz(uchar2); char3 __ovld __cnfn ctz(char3); uchar3 __ovld __cnfn ctz(uchar3); char4 __ovld __cnfn ctz(char4); uchar4 __ovld __cnfn ctz(uchar4); char8 __ovld __cnfn ctz(char8); uchar8 __ovld __cnfn ctz(uchar8); char16 __ovld __cnfn ctz(char16); uchar16 __ovld __cnfn ctz(uchar16); short __ovld __cnfn ctz(short); ushort __ovld __cnfn ctz(ushort); short2 __ovld __cnfn ctz(short2); ushort2 __ovld __cnfn ctz(ushort2); short3 __ovld __cnfn ctz(short3); ushort3 __ovld __cnfn ctz(ushort3); short4 __ovld __cnfn ctz(short4); ushort4 __ovld __cnfn ctz(ushort4); short8 __ovld __cnfn ctz(short8); ushort8 __ovld __cnfn ctz(ushort8); short16 __ovld __cnfn ctz(short16); ushort16 __ovld __cnfn ctz(ushort16); int __ovld __cnfn ctz(int); uint __ovld __cnfn ctz(uint); int2 __ovld __cnfn ctz(int2); uint2 __ovld __cnfn ctz(uint2); int3 __ovld __cnfn ctz(int3); uint3 __ovld __cnfn ctz(uint3); int4 __ovld __cnfn ctz(int4); uint4 __ovld __cnfn ctz(uint4); int8 __ovld __cnfn ctz(int8); uint8 __ovld __cnfn ctz(uint8); int16 __ovld __cnfn ctz(int16); uint16 __ovld __cnfn ctz(uint16); long __ovld __cnfn ctz(long); ulong __ovld __cnfn ctz(ulong); long2 __ovld __cnfn ctz(long2); ulong2 __ovld __cnfn ctz(ulong2); long3 __ovld __cnfn ctz(long3); ulong3 __ovld __cnfn ctz(ulong3); long4 __ovld __cnfn ctz(long4); ulong4 __ovld __cnfn ctz(ulong4); long8 __ovld __cnfn ctz(long8); ulong8 __ovld __cnfn ctz(ulong8); long16 __ovld __cnfn ctz(long16); ulong16 __ovld __cnfn ctz(ulong16); #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) /** * Returns mul_hi(a, b) + c. */ char __ovld __cnfn mad_hi(char, char, char); uchar __ovld __cnfn mad_hi(uchar, uchar, uchar); char2 __ovld __cnfn mad_hi(char2, char2, char2); uchar2 __ovld __cnfn mad_hi(uchar2, uchar2, uchar2); char3 __ovld __cnfn mad_hi(char3, char3, char3); uchar3 __ovld __cnfn mad_hi(uchar3, uchar3, uchar3); char4 __ovld __cnfn mad_hi(char4, char4, char4); uchar4 __ovld __cnfn mad_hi(uchar4, uchar4, uchar4); char8 __ovld __cnfn mad_hi(char8, char8, char8); uchar8 __ovld __cnfn mad_hi(uchar8, uchar8, uchar8); char16 __ovld __cnfn mad_hi(char16, char16, char16); uchar16 __ovld __cnfn mad_hi(uchar16, uchar16, uchar16); short __ovld __cnfn mad_hi(short, short, short); ushort __ovld __cnfn mad_hi(ushort, ushort, ushort); short2 __ovld __cnfn mad_hi(short2, short2, short2); ushort2 __ovld __cnfn mad_hi(ushort2, ushort2, ushort2); short3 __ovld __cnfn mad_hi(short3, short3, short3); ushort3 __ovld __cnfn mad_hi(ushort3, ushort3, ushort3); short4 __ovld __cnfn mad_hi(short4, short4, short4); ushort4 __ovld __cnfn mad_hi(ushort4, ushort4, ushort4); short8 __ovld __cnfn mad_hi(short8, short8, short8); ushort8 __ovld __cnfn mad_hi(ushort8, ushort8, ushort8); short16 __ovld __cnfn mad_hi(short16, short16, short16); ushort16 __ovld __cnfn mad_hi(ushort16, ushort16, ushort16); int __ovld __cnfn mad_hi(int, int, int); uint __ovld __cnfn mad_hi(uint, uint, uint); int2 __ovld __cnfn mad_hi(int2, int2, int2); uint2 __ovld __cnfn mad_hi(uint2, uint2, uint2); int3 __ovld __cnfn mad_hi(int3, int3, int3); uint3 __ovld __cnfn mad_hi(uint3, uint3, uint3); int4 __ovld __cnfn mad_hi(int4, int4, int4); uint4 __ovld __cnfn mad_hi(uint4, uint4, uint4); int8 __ovld __cnfn mad_hi(int8, int8, int8); uint8 __ovld __cnfn mad_hi(uint8, uint8, uint8); int16 __ovld __cnfn mad_hi(int16, int16, int16); uint16 __ovld __cnfn mad_hi(uint16, uint16, uint16); long __ovld __cnfn mad_hi(long, long, long); ulong __ovld __cnfn mad_hi(ulong, ulong, ulong); long2 __ovld __cnfn mad_hi(long2, long2, long2); ulong2 __ovld __cnfn mad_hi(ulong2, ulong2, ulong2); long3 __ovld __cnfn mad_hi(long3, long3, long3); ulong3 __ovld __cnfn mad_hi(ulong3, ulong3, ulong3); long4 __ovld __cnfn mad_hi(long4, long4, long4); ulong4 __ovld __cnfn mad_hi(ulong4, ulong4, ulong4); long8 __ovld __cnfn mad_hi(long8, long8, long8); ulong8 __ovld __cnfn mad_hi(ulong8, ulong8, ulong8); long16 __ovld __cnfn mad_hi(long16, long16, long16); ulong16 __ovld __cnfn mad_hi(ulong16, ulong16, ulong16); /** * Returns a * b + c and saturates the result. */ char __ovld __cnfn mad_sat(char, char, char); uchar __ovld __cnfn mad_sat(uchar, uchar, uchar); char2 __ovld __cnfn mad_sat(char2, char2, char2); uchar2 __ovld __cnfn mad_sat(uchar2, uchar2, uchar2); char3 __ovld __cnfn mad_sat(char3, char3, char3); uchar3 __ovld __cnfn mad_sat(uchar3, uchar3, uchar3); char4 __ovld __cnfn mad_sat(char4, char4, char4); uchar4 __ovld __cnfn mad_sat(uchar4, uchar4, uchar4); char8 __ovld __cnfn mad_sat(char8, char8, char8); uchar8 __ovld __cnfn mad_sat(uchar8, uchar8, uchar8); char16 __ovld __cnfn mad_sat(char16, char16, char16); uchar16 __ovld __cnfn mad_sat(uchar16, uchar16, uchar16); short __ovld __cnfn mad_sat(short, short, short); ushort __ovld __cnfn mad_sat(ushort, ushort, ushort); short2 __ovld __cnfn mad_sat(short2, short2, short2); ushort2 __ovld __cnfn mad_sat(ushort2, ushort2, ushort2); short3 __ovld __cnfn mad_sat(short3, short3, short3); ushort3 __ovld __cnfn mad_sat(ushort3, ushort3, ushort3); short4 __ovld __cnfn mad_sat(short4, short4, short4); ushort4 __ovld __cnfn mad_sat(ushort4, ushort4, ushort4); short8 __ovld __cnfn mad_sat(short8, short8, short8); ushort8 __ovld __cnfn mad_sat(ushort8, ushort8, ushort8); short16 __ovld __cnfn mad_sat(short16, short16, short16); ushort16 __ovld __cnfn mad_sat(ushort16, ushort16, ushort16); int __ovld __cnfn mad_sat(int, int, int); uint __ovld __cnfn mad_sat(uint, uint, uint); int2 __ovld __cnfn mad_sat(int2, int2, int2); uint2 __ovld __cnfn mad_sat(uint2, uint2, uint2); int3 __ovld __cnfn mad_sat(int3, int3, int3); uint3 __ovld __cnfn mad_sat(uint3, uint3, uint3); int4 __ovld __cnfn mad_sat(int4, int4, int4); uint4 __ovld __cnfn mad_sat(uint4, uint4, uint4); int8 __ovld __cnfn mad_sat(int8, int8, int8); uint8 __ovld __cnfn mad_sat(uint8, uint8, uint8); int16 __ovld __cnfn mad_sat(int16, int16, int16); uint16 __ovld __cnfn mad_sat(uint16, uint16, uint16); long __ovld __cnfn mad_sat(long, long, long); ulong __ovld __cnfn mad_sat(ulong, ulong, ulong); long2 __ovld __cnfn mad_sat(long2, long2, long2); ulong2 __ovld __cnfn mad_sat(ulong2, ulong2, ulong2); long3 __ovld __cnfn mad_sat(long3, long3, long3); ulong3 __ovld __cnfn mad_sat(ulong3, ulong3, ulong3); long4 __ovld __cnfn mad_sat(long4, long4, long4); ulong4 __ovld __cnfn mad_sat(ulong4, ulong4, ulong4); long8 __ovld __cnfn mad_sat(long8, long8, long8); ulong8 __ovld __cnfn mad_sat(ulong8, ulong8, ulong8); long16 __ovld __cnfn mad_sat(long16, long16, long16); ulong16 __ovld __cnfn mad_sat(ulong16, ulong16, ulong16); /** * Returns y if x < y, otherwise it returns x. */ char __ovld __cnfn max(char, char); uchar __ovld __cnfn max(uchar, uchar); char2 __ovld __cnfn max(char2, char2); uchar2 __ovld __cnfn max(uchar2, uchar2); char3 __ovld __cnfn max(char3, char3); uchar3 __ovld __cnfn max(uchar3, uchar3); char4 __ovld __cnfn max(char4, char4); uchar4 __ovld __cnfn max(uchar4, uchar4); char8 __ovld __cnfn max(char8, char8); uchar8 __ovld __cnfn max(uchar8, uchar8); char16 __ovld __cnfn max(char16, char16); uchar16 __ovld __cnfn max(uchar16, uchar16); short __ovld __cnfn max(short, short); ushort __ovld __cnfn max(ushort, ushort); short2 __ovld __cnfn max(short2, short2); ushort2 __ovld __cnfn max(ushort2, ushort2); short3 __ovld __cnfn max(short3, short3); ushort3 __ovld __cnfn max(ushort3, ushort3); short4 __ovld __cnfn max(short4, short4); ushort4 __ovld __cnfn max(ushort4, ushort4); short8 __ovld __cnfn max(short8, short8); ushort8 __ovld __cnfn max(ushort8, ushort8); short16 __ovld __cnfn max(short16, short16); ushort16 __ovld __cnfn max(ushort16, ushort16); int __ovld __cnfn max(int, int); uint __ovld __cnfn max(uint, uint); int2 __ovld __cnfn max(int2, int2); uint2 __ovld __cnfn max(uint2, uint2); int3 __ovld __cnfn max(int3, int3); uint3 __ovld __cnfn max(uint3, uint3); int4 __ovld __cnfn max(int4, int4); uint4 __ovld __cnfn max(uint4, uint4); int8 __ovld __cnfn max(int8, int8); uint8 __ovld __cnfn max(uint8, uint8); int16 __ovld __cnfn max(int16, int16); uint16 __ovld __cnfn max(uint16, uint16); long __ovld __cnfn max(long, long); ulong __ovld __cnfn max(ulong, ulong); long2 __ovld __cnfn max(long2, long2); ulong2 __ovld __cnfn max(ulong2, ulong2); long3 __ovld __cnfn max(long3, long3); ulong3 __ovld __cnfn max(ulong3, ulong3); long4 __ovld __cnfn max(long4, long4); ulong4 __ovld __cnfn max(ulong4, ulong4); long8 __ovld __cnfn max(long8, long8); ulong8 __ovld __cnfn max(ulong8, ulong8); long16 __ovld __cnfn max(long16, long16); ulong16 __ovld __cnfn max(ulong16, ulong16); char2 __ovld __cnfn max(char2, char); uchar2 __ovld __cnfn max(uchar2, uchar); char3 __ovld __cnfn max(char3, char); uchar3 __ovld __cnfn max(uchar3, uchar); char4 __ovld __cnfn max(char4, char); uchar4 __ovld __cnfn max(uchar4, uchar); char8 __ovld __cnfn max(char8, char); uchar8 __ovld __cnfn max(uchar8, uchar); char16 __ovld __cnfn max(char16, char); uchar16 __ovld __cnfn max(uchar16, uchar); short2 __ovld __cnfn max(short2, short); ushort2 __ovld __cnfn max(ushort2, ushort); short3 __ovld __cnfn max(short3, short); ushort3 __ovld __cnfn max(ushort3, ushort); short4 __ovld __cnfn max(short4, short); ushort4 __ovld __cnfn max(ushort4, ushort); short8 __ovld __cnfn max(short8, short); ushort8 __ovld __cnfn max(ushort8, ushort); short16 __ovld __cnfn max(short16, short); ushort16 __ovld __cnfn max(ushort16, ushort); int2 __ovld __cnfn max(int2, int); uint2 __ovld __cnfn max(uint2, uint); int3 __ovld __cnfn max(int3, int); uint3 __ovld __cnfn max(uint3, uint); int4 __ovld __cnfn max(int4, int); uint4 __ovld __cnfn max(uint4, uint); int8 __ovld __cnfn max(int8, int); uint8 __ovld __cnfn max(uint8, uint); int16 __ovld __cnfn max(int16, int); uint16 __ovld __cnfn max(uint16, uint); long2 __ovld __cnfn max(long2, long); ulong2 __ovld __cnfn max(ulong2, ulong); long3 __ovld __cnfn max(long3, long); ulong3 __ovld __cnfn max(ulong3, ulong); long4 __ovld __cnfn max(long4, long); ulong4 __ovld __cnfn max(ulong4, ulong); long8 __ovld __cnfn max(long8, long); ulong8 __ovld __cnfn max(ulong8, ulong); long16 __ovld __cnfn max(long16, long); ulong16 __ovld __cnfn max(ulong16, ulong); /** * Returns y if y < x, otherwise it returns x. */ char __ovld __cnfn min(char, char); uchar __ovld __cnfn min(uchar, uchar); char2 __ovld __cnfn min(char2, char2); uchar2 __ovld __cnfn min(uchar2, uchar2); char3 __ovld __cnfn min(char3, char3); uchar3 __ovld __cnfn min(uchar3, uchar3); char4 __ovld __cnfn min(char4, char4); uchar4 __ovld __cnfn min(uchar4, uchar4); char8 __ovld __cnfn min(char8, char8); uchar8 __ovld __cnfn min(uchar8, uchar8); char16 __ovld __cnfn min(char16, char16); uchar16 __ovld __cnfn min(uchar16, uchar16); short __ovld __cnfn min(short, short); ushort __ovld __cnfn min(ushort, ushort); short2 __ovld __cnfn min(short2, short2); ushort2 __ovld __cnfn min(ushort2, ushort2); short3 __ovld __cnfn min(short3, short3); ushort3 __ovld __cnfn min(ushort3, ushort3); short4 __ovld __cnfn min(short4, short4); ushort4 __ovld __cnfn min(ushort4, ushort4); short8 __ovld __cnfn min(short8, short8); ushort8 __ovld __cnfn min(ushort8, ushort8); short16 __ovld __cnfn min(short16, short16); ushort16 __ovld __cnfn min(ushort16, ushort16); int __ovld __cnfn min(int, int); uint __ovld __cnfn min(uint, uint); int2 __ovld __cnfn min(int2, int2); uint2 __ovld __cnfn min(uint2, uint2); int3 __ovld __cnfn min(int3, int3); uint3 __ovld __cnfn min(uint3, uint3); int4 __ovld __cnfn min(int4, int4); uint4 __ovld __cnfn min(uint4, uint4); int8 __ovld __cnfn min(int8, int8); uint8 __ovld __cnfn min(uint8, uint8); int16 __ovld __cnfn min(int16, int16); uint16 __ovld __cnfn min(uint16, uint16); long __ovld __cnfn min(long, long); ulong __ovld __cnfn min(ulong, ulong); long2 __ovld __cnfn min(long2, long2); ulong2 __ovld __cnfn min(ulong2, ulong2); long3 __ovld __cnfn min(long3, long3); ulong3 __ovld __cnfn min(ulong3, ulong3); long4 __ovld __cnfn min(long4, long4); ulong4 __ovld __cnfn min(ulong4, ulong4); long8 __ovld __cnfn min(long8, long8); ulong8 __ovld __cnfn min(ulong8, ulong8); long16 __ovld __cnfn min(long16, long16); ulong16 __ovld __cnfn min(ulong16, ulong16); char2 __ovld __cnfn min(char2, char); uchar2 __ovld __cnfn min(uchar2, uchar); char3 __ovld __cnfn min(char3, char); uchar3 __ovld __cnfn min(uchar3, uchar); char4 __ovld __cnfn min(char4, char); uchar4 __ovld __cnfn min(uchar4, uchar); char8 __ovld __cnfn min(char8, char); uchar8 __ovld __cnfn min(uchar8, uchar); char16 __ovld __cnfn min(char16, char); uchar16 __ovld __cnfn min(uchar16, uchar); short2 __ovld __cnfn min(short2, short); ushort2 __ovld __cnfn min(ushort2, ushort); short3 __ovld __cnfn min(short3, short); ushort3 __ovld __cnfn min(ushort3, ushort); short4 __ovld __cnfn min(short4, short); ushort4 __ovld __cnfn min(ushort4, ushort); short8 __ovld __cnfn min(short8, short); ushort8 __ovld __cnfn min(ushort8, ushort); short16 __ovld __cnfn min(short16, short); ushort16 __ovld __cnfn min(ushort16, ushort); int2 __ovld __cnfn min(int2, int); uint2 __ovld __cnfn min(uint2, uint); int3 __ovld __cnfn min(int3, int); uint3 __ovld __cnfn min(uint3, uint); int4 __ovld __cnfn min(int4, int); uint4 __ovld __cnfn min(uint4, uint); int8 __ovld __cnfn min(int8, int); uint8 __ovld __cnfn min(uint8, uint); int16 __ovld __cnfn min(int16, int); uint16 __ovld __cnfn min(uint16, uint); long2 __ovld __cnfn min(long2, long); ulong2 __ovld __cnfn min(ulong2, ulong); long3 __ovld __cnfn min(long3, long); ulong3 __ovld __cnfn min(ulong3, ulong); long4 __ovld __cnfn min(long4, long); ulong4 __ovld __cnfn min(ulong4, ulong); long8 __ovld __cnfn min(long8, long); ulong8 __ovld __cnfn min(ulong8, ulong); long16 __ovld __cnfn min(long16, long); ulong16 __ovld __cnfn min(ulong16, ulong); /** * Computes x * y and returns the high half of the * product of x and y. */ char __ovld __cnfn mul_hi(char, char); uchar __ovld __cnfn mul_hi(uchar, uchar); char2 __ovld __cnfn mul_hi(char2, char2); uchar2 __ovld __cnfn mul_hi(uchar2, uchar2); char3 __ovld __cnfn mul_hi(char3, char3); uchar3 __ovld __cnfn mul_hi(uchar3, uchar3); char4 __ovld __cnfn mul_hi(char4, char4); uchar4 __ovld __cnfn mul_hi(uchar4, uchar4); char8 __ovld __cnfn mul_hi(char8, char8); uchar8 __ovld __cnfn mul_hi(uchar8, uchar8); char16 __ovld __cnfn mul_hi(char16, char16); uchar16 __ovld __cnfn mul_hi(uchar16, uchar16); short __ovld __cnfn mul_hi(short, short); ushort __ovld __cnfn mul_hi(ushort, ushort); short2 __ovld __cnfn mul_hi(short2, short2); ushort2 __ovld __cnfn mul_hi(ushort2, ushort2); short3 __ovld __cnfn mul_hi(short3, short3); ushort3 __ovld __cnfn mul_hi(ushort3, ushort3); short4 __ovld __cnfn mul_hi(short4, short4); ushort4 __ovld __cnfn mul_hi(ushort4, ushort4); short8 __ovld __cnfn mul_hi(short8, short8); ushort8 __ovld __cnfn mul_hi(ushort8, ushort8); short16 __ovld __cnfn mul_hi(short16, short16); ushort16 __ovld __cnfn mul_hi(ushort16, ushort16); int __ovld __cnfn mul_hi(int, int); uint __ovld __cnfn mul_hi(uint, uint); int2 __ovld __cnfn mul_hi(int2, int2); uint2 __ovld __cnfn mul_hi(uint2, uint2); int3 __ovld __cnfn mul_hi(int3, int3); uint3 __ovld __cnfn mul_hi(uint3, uint3); int4 __ovld __cnfn mul_hi(int4, int4); uint4 __ovld __cnfn mul_hi(uint4, uint4); int8 __ovld __cnfn mul_hi(int8, int8); uint8 __ovld __cnfn mul_hi(uint8, uint8); int16 __ovld __cnfn mul_hi(int16, int16); uint16 __ovld __cnfn mul_hi(uint16, uint16); long __ovld __cnfn mul_hi(long, long); ulong __ovld __cnfn mul_hi(ulong, ulong); long2 __ovld __cnfn mul_hi(long2, long2); ulong2 __ovld __cnfn mul_hi(ulong2, ulong2); long3 __ovld __cnfn mul_hi(long3, long3); ulong3 __ovld __cnfn mul_hi(ulong3, ulong3); long4 __ovld __cnfn mul_hi(long4, long4); ulong4 __ovld __cnfn mul_hi(ulong4, ulong4); long8 __ovld __cnfn mul_hi(long8, long8); ulong8 __ovld __cnfn mul_hi(ulong8, ulong8); long16 __ovld __cnfn mul_hi(long16, long16); ulong16 __ovld __cnfn mul_hi(ulong16, ulong16); /** * For each element in v, the bits are shifted left by * the number of bits given by the corresponding * element in i (subject to usual shift modulo rules * described in section 6.3). Bits shifted off the left * side of the element are shifted back in from the * right. */ char __ovld __cnfn rotate(char, char); uchar __ovld __cnfn rotate(uchar, uchar); char2 __ovld __cnfn rotate(char2, char2); uchar2 __ovld __cnfn rotate(uchar2, uchar2); char3 __ovld __cnfn rotate(char3, char3); uchar3 __ovld __cnfn rotate(uchar3, uchar3); char4 __ovld __cnfn rotate(char4, char4); uchar4 __ovld __cnfn rotate(uchar4, uchar4); char8 __ovld __cnfn rotate(char8, char8); uchar8 __ovld __cnfn rotate(uchar8, uchar8); char16 __ovld __cnfn rotate(char16, char16); uchar16 __ovld __cnfn rotate(uchar16, uchar16); short __ovld __cnfn rotate(short, short); ushort __ovld __cnfn rotate(ushort, ushort); short2 __ovld __cnfn rotate(short2, short2); ushort2 __ovld __cnfn rotate(ushort2, ushort2); short3 __ovld __cnfn rotate(short3, short3); ushort3 __ovld __cnfn rotate(ushort3, ushort3); short4 __ovld __cnfn rotate(short4, short4); ushort4 __ovld __cnfn rotate(ushort4, ushort4); short8 __ovld __cnfn rotate(short8, short8); ushort8 __ovld __cnfn rotate(ushort8, ushort8); short16 __ovld __cnfn rotate(short16, short16); ushort16 __ovld __cnfn rotate(ushort16, ushort16); int __ovld __cnfn rotate(int, int); uint __ovld __cnfn rotate(uint, uint); int2 __ovld __cnfn rotate(int2, int2); uint2 __ovld __cnfn rotate(uint2, uint2); int3 __ovld __cnfn rotate(int3, int3); uint3 __ovld __cnfn rotate(uint3, uint3); int4 __ovld __cnfn rotate(int4, int4); uint4 __ovld __cnfn rotate(uint4, uint4); int8 __ovld __cnfn rotate(int8, int8); uint8 __ovld __cnfn rotate(uint8, uint8); int16 __ovld __cnfn rotate(int16, int16); uint16 __ovld __cnfn rotate(uint16, uint16); long __ovld __cnfn rotate(long, long); ulong __ovld __cnfn rotate(ulong, ulong); long2 __ovld __cnfn rotate(long2, long2); ulong2 __ovld __cnfn rotate(ulong2, ulong2); long3 __ovld __cnfn rotate(long3, long3); ulong3 __ovld __cnfn rotate(ulong3, ulong3); long4 __ovld __cnfn rotate(long4, long4); ulong4 __ovld __cnfn rotate(ulong4, ulong4); long8 __ovld __cnfn rotate(long8, long8); ulong8 __ovld __cnfn rotate(ulong8, ulong8); long16 __ovld __cnfn rotate(long16, long16); ulong16 __ovld __cnfn rotate(ulong16, ulong16); /** * Returns x - y and saturates the result. */ char __ovld __cnfn sub_sat(char, char); uchar __ovld __cnfn sub_sat(uchar, uchar); char2 __ovld __cnfn sub_sat(char2, char2); uchar2 __ovld __cnfn sub_sat(uchar2, uchar2); char3 __ovld __cnfn sub_sat(char3, char3); uchar3 __ovld __cnfn sub_sat(uchar3, uchar3); char4 __ovld __cnfn sub_sat(char4, char4); uchar4 __ovld __cnfn sub_sat(uchar4, uchar4); char8 __ovld __cnfn sub_sat(char8, char8); uchar8 __ovld __cnfn sub_sat(uchar8, uchar8); char16 __ovld __cnfn sub_sat(char16, char16); uchar16 __ovld __cnfn sub_sat(uchar16, uchar16); short __ovld __cnfn sub_sat(short, short); ushort __ovld __cnfn sub_sat(ushort, ushort); short2 __ovld __cnfn sub_sat(short2, short2); ushort2 __ovld __cnfn sub_sat(ushort2, ushort2); short3 __ovld __cnfn sub_sat(short3, short3); ushort3 __ovld __cnfn sub_sat(ushort3, ushort3); short4 __ovld __cnfn sub_sat(short4, short4); ushort4 __ovld __cnfn sub_sat(ushort4, ushort4); short8 __ovld __cnfn sub_sat(short8, short8); ushort8 __ovld __cnfn sub_sat(ushort8, ushort8); short16 __ovld __cnfn sub_sat(short16, short16); ushort16 __ovld __cnfn sub_sat(ushort16, ushort16); int __ovld __cnfn sub_sat(int, int); uint __ovld __cnfn sub_sat(uint, uint); int2 __ovld __cnfn sub_sat(int2, int2); uint2 __ovld __cnfn sub_sat(uint2, uint2); int3 __ovld __cnfn sub_sat(int3, int3); uint3 __ovld __cnfn sub_sat(uint3, uint3); int4 __ovld __cnfn sub_sat(int4, int4); uint4 __ovld __cnfn sub_sat(uint4, uint4); int8 __ovld __cnfn sub_sat(int8, int8); uint8 __ovld __cnfn sub_sat(uint8, uint8); int16 __ovld __cnfn sub_sat(int16, int16); uint16 __ovld __cnfn sub_sat(uint16, uint16); long __ovld __cnfn sub_sat(long, long); ulong __ovld __cnfn sub_sat(ulong, ulong); long2 __ovld __cnfn sub_sat(long2, long2); ulong2 __ovld __cnfn sub_sat(ulong2, ulong2); long3 __ovld __cnfn sub_sat(long3, long3); ulong3 __ovld __cnfn sub_sat(ulong3, ulong3); long4 __ovld __cnfn sub_sat(long4, long4); ulong4 __ovld __cnfn sub_sat(ulong4, ulong4); long8 __ovld __cnfn sub_sat(long8, long8); ulong8 __ovld __cnfn sub_sat(ulong8, ulong8); long16 __ovld __cnfn sub_sat(long16, long16); ulong16 __ovld __cnfn sub_sat(ulong16, ulong16); /** * result[i] = ((short)hi[i] << 8) | lo[i] * result[i] = ((ushort)hi[i] << 8) | lo[i] */ short __ovld __cnfn upsample(char, uchar); ushort __ovld __cnfn upsample(uchar, uchar); short2 __ovld __cnfn upsample(char2, uchar2); short3 __ovld __cnfn upsample(char3, uchar3); short4 __ovld __cnfn upsample(char4, uchar4); short8 __ovld __cnfn upsample(char8, uchar8); short16 __ovld __cnfn upsample(char16, uchar16); ushort2 __ovld __cnfn upsample(uchar2, uchar2); ushort3 __ovld __cnfn upsample(uchar3, uchar3); ushort4 __ovld __cnfn upsample(uchar4, uchar4); ushort8 __ovld __cnfn upsample(uchar8, uchar8); ushort16 __ovld __cnfn upsample(uchar16, uchar16); /** * result[i] = ((int)hi[i] << 16) | lo[i] * result[i] = ((uint)hi[i] << 16) | lo[i] */ int __ovld __cnfn upsample(short, ushort); uint __ovld __cnfn upsample(ushort, ushort); int2 __ovld __cnfn upsample(short2, ushort2); int3 __ovld __cnfn upsample(short3, ushort3); int4 __ovld __cnfn upsample(short4, ushort4); int8 __ovld __cnfn upsample(short8, ushort8); int16 __ovld __cnfn upsample(short16, ushort16); uint2 __ovld __cnfn upsample(ushort2, ushort2); uint3 __ovld __cnfn upsample(ushort3, ushort3); uint4 __ovld __cnfn upsample(ushort4, ushort4); uint8 __ovld __cnfn upsample(ushort8, ushort8); uint16 __ovld __cnfn upsample(ushort16, ushort16); /** * result[i] = ((long)hi[i] << 32) | lo[i] * result[i] = ((ulong)hi[i] << 32) | lo[i] */ long __ovld __cnfn upsample(int, uint); ulong __ovld __cnfn upsample(uint, uint); long2 __ovld __cnfn upsample(int2, uint2); long3 __ovld __cnfn upsample(int3, uint3); long4 __ovld __cnfn upsample(int4, uint4); long8 __ovld __cnfn upsample(int8, uint8); long16 __ovld __cnfn upsample(int16, uint16); ulong2 __ovld __cnfn upsample(uint2, uint2); ulong3 __ovld __cnfn upsample(uint3, uint3); ulong4 __ovld __cnfn upsample(uint4, uint4); ulong8 __ovld __cnfn upsample(uint8, uint8); ulong16 __ovld __cnfn upsample(uint16, uint16); /* * popcount(x): returns the number of set bit in x */ #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_1_2) char __ovld __cnfn popcount(char); uchar __ovld __cnfn popcount(uchar); char2 __ovld __cnfn popcount(char2); uchar2 __ovld __cnfn popcount(uchar2); char3 __ovld __cnfn popcount(char3); uchar3 __ovld __cnfn popcount(uchar3); char4 __ovld __cnfn popcount(char4); uchar4 __ovld __cnfn popcount(uchar4); char8 __ovld __cnfn popcount(char8); uchar8 __ovld __cnfn popcount(uchar8); char16 __ovld __cnfn popcount(char16); uchar16 __ovld __cnfn popcount(uchar16); short __ovld __cnfn popcount(short); ushort __ovld __cnfn popcount(ushort); short2 __ovld __cnfn popcount(short2); ushort2 __ovld __cnfn popcount(ushort2); short3 __ovld __cnfn popcount(short3); ushort3 __ovld __cnfn popcount(ushort3); short4 __ovld __cnfn popcount(short4); ushort4 __ovld __cnfn popcount(ushort4); short8 __ovld __cnfn popcount(short8); ushort8 __ovld __cnfn popcount(ushort8); short16 __ovld __cnfn popcount(short16); ushort16 __ovld __cnfn popcount(ushort16); int __ovld __cnfn popcount(int); uint __ovld __cnfn popcount(uint); int2 __ovld __cnfn popcount(int2); uint2 __ovld __cnfn popcount(uint2); int3 __ovld __cnfn popcount(int3); uint3 __ovld __cnfn popcount(uint3); int4 __ovld __cnfn popcount(int4); uint4 __ovld __cnfn popcount(uint4); int8 __ovld __cnfn popcount(int8); uint8 __ovld __cnfn popcount(uint8); int16 __ovld __cnfn popcount(int16); uint16 __ovld __cnfn popcount(uint16); long __ovld __cnfn popcount(long); ulong __ovld __cnfn popcount(ulong); long2 __ovld __cnfn popcount(long2); ulong2 __ovld __cnfn popcount(ulong2); long3 __ovld __cnfn popcount(long3); ulong3 __ovld __cnfn popcount(ulong3); long4 __ovld __cnfn popcount(long4); ulong4 __ovld __cnfn popcount(ulong4); long8 __ovld __cnfn popcount(long8); ulong8 __ovld __cnfn popcount(ulong8); long16 __ovld __cnfn popcount(long16); ulong16 __ovld __cnfn popcount(ulong16); #endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_1_2) /** * Multiply two 24-bit integer values x and y and add * the 32-bit integer result to the 32-bit integer z. * Refer to definition of mul24 to see how the 24-bit * integer multiplication is performed. */ int __ovld __cnfn mad24(int, int, int); uint __ovld __cnfn mad24(uint, uint, uint); int2 __ovld __cnfn mad24(int2, int2, int2); uint2 __ovld __cnfn mad24(uint2, uint2, uint2); int3 __ovld __cnfn mad24(int3, int3, int3); uint3 __ovld __cnfn mad24(uint3, uint3, uint3); int4 __ovld __cnfn mad24(int4, int4, int4); uint4 __ovld __cnfn mad24(uint4, uint4, uint4); int8 __ovld __cnfn mad24(int8, int8, int8); uint8 __ovld __cnfn mad24(uint8, uint8, uint8); int16 __ovld __cnfn mad24(int16, int16, int16); uint16 __ovld __cnfn mad24(uint16, uint16, uint16); /** * Multiply two 24-bit integer values x and y. x and y * are 32-bit integers but only the low 24-bits are used * to perform the multiplication. mul24 should only * be used when values in x and y are in the range [- * 2^23, 2^23-1] if x and y are signed integers and in the * range [0, 2^24-1] if x and y are unsigned integers. If * x and y are not in this range, the multiplication * result is implementation-defined. */ int __ovld __cnfn mul24(int, int); uint __ovld __cnfn mul24(uint, uint); int2 __ovld __cnfn mul24(int2, int2); uint2 __ovld __cnfn mul24(uint2, uint2); int3 __ovld __cnfn mul24(int3, int3); uint3 __ovld __cnfn mul24(uint3, uint3); int4 __ovld __cnfn mul24(int4, int4); uint4 __ovld __cnfn mul24(uint4, uint4); int8 __ovld __cnfn mul24(int8, int8); uint8 __ovld __cnfn mul24(uint8, uint8); int16 __ovld __cnfn mul24(int16, int16); uint16 __ovld __cnfn mul24(uint16, uint16); // OpenCL v1.1 s6.11.4, v1.2 s6.12.4, v2.0 s6.13.4 - Common Functions /** * Returns fmin(fmax(x, minval), maxval). * Results are undefined if minval > maxval. */ float __ovld __cnfn clamp(float, float, float); float2 __ovld __cnfn clamp(float2, float2, float2); float3 __ovld __cnfn clamp(float3, float3, float3); float4 __ovld __cnfn clamp(float4, float4, float4); float8 __ovld __cnfn clamp(float8, float8, float8); float16 __ovld __cnfn clamp(float16, float16, float16); float2 __ovld __cnfn clamp(float2, float, float); float3 __ovld __cnfn clamp(float3, float, float); float4 __ovld __cnfn clamp(float4, float, float); float8 __ovld __cnfn clamp(float8, float, float); float16 __ovld __cnfn clamp(float16, float, float); #ifdef cl_khr_fp64 double __ovld __cnfn clamp(double, double, double); double2 __ovld __cnfn clamp(double2, double2, double2); double3 __ovld __cnfn clamp(double3, double3, double3); double4 __ovld __cnfn clamp(double4, double4, double4); double8 __ovld __cnfn clamp(double8, double8, double8); double16 __ovld __cnfn clamp(double16, double16, double16); double2 __ovld __cnfn clamp(double2, double, double); double3 __ovld __cnfn clamp(double3, double, double); double4 __ovld __cnfn clamp(double4, double, double); double8 __ovld __cnfn clamp(double8, double, double); double16 __ovld __cnfn clamp(double16, double, double); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn clamp(half, half, half); half2 __ovld __cnfn clamp(half2, half2, half2); half3 __ovld __cnfn clamp(half3, half3, half3); half4 __ovld __cnfn clamp(half4, half4, half4); half8 __ovld __cnfn clamp(half8, half8, half8); half16 __ovld __cnfn clamp(half16, half16, half16); half2 __ovld __cnfn clamp(half2, half, half); half3 __ovld __cnfn clamp(half3, half, half); half4 __ovld __cnfn clamp(half4, half, half); half8 __ovld __cnfn clamp(half8, half, half); half16 __ovld __cnfn clamp(half16, half, half); #endif //cl_khr_fp16 /** * Converts radians to degrees, i.e. (180 / PI) * * radians. */ float __ovld __cnfn degrees(float); float2 __ovld __cnfn degrees(float2); float3 __ovld __cnfn degrees(float3); float4 __ovld __cnfn degrees(float4); float8 __ovld __cnfn degrees(float8); float16 __ovld __cnfn degrees(float16); #ifdef cl_khr_fp64 double __ovld __cnfn degrees(double); double2 __ovld __cnfn degrees(double2); double3 __ovld __cnfn degrees(double3); double4 __ovld __cnfn degrees(double4); double8 __ovld __cnfn degrees(double8); double16 __ovld __cnfn degrees(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn degrees(half); half2 __ovld __cnfn degrees(half2); half3 __ovld __cnfn degrees(half3); half4 __ovld __cnfn degrees(half4); half8 __ovld __cnfn degrees(half8); half16 __ovld __cnfn degrees(half16); #endif //cl_khr_fp16 /** * Returns y if x < y, otherwise it returns x. If x and y * are infinite or NaN, the return values are undefined. */ float __ovld __cnfn max(float, float); float2 __ovld __cnfn max(float2, float2); float3 __ovld __cnfn max(float3, float3); float4 __ovld __cnfn max(float4, float4); float8 __ovld __cnfn max(float8, float8); float16 __ovld __cnfn max(float16, float16); float2 __ovld __cnfn max(float2, float); float3 __ovld __cnfn max(float3, float); float4 __ovld __cnfn max(float4, float); float8 __ovld __cnfn max(float8, float); float16 __ovld __cnfn max(float16, float); #ifdef cl_khr_fp64 double __ovld __cnfn max(double, double); double2 __ovld __cnfn max(double2, double2); double3 __ovld __cnfn max(double3, double3); double4 __ovld __cnfn max(double4, double4); double8 __ovld __cnfn max(double8, double8); double16 __ovld __cnfn max(double16, double16); double2 __ovld __cnfn max(double2, double); double3 __ovld __cnfn max(double3, double); double4 __ovld __cnfn max(double4, double); double8 __ovld __cnfn max(double8, double); double16 __ovld __cnfn max(double16, double); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn max(half, half); half2 __ovld __cnfn max(half2, half2); half3 __ovld __cnfn max(half3, half3); half4 __ovld __cnfn max(half4, half4); half8 __ovld __cnfn max(half8, half8); half16 __ovld __cnfn max(half16, half16); half2 __ovld __cnfn max(half2, half); half3 __ovld __cnfn max(half3, half); half4 __ovld __cnfn max(half4, half); half8 __ovld __cnfn max(half8, half); half16 __ovld __cnfn max(half16, half); #endif //cl_khr_fp16 /** * Returns y if y < x, otherwise it returns x. If x and y * are infinite or NaN, the return values are undefined. */ float __ovld __cnfn min(float, float); float2 __ovld __cnfn min(float2, float2); float3 __ovld __cnfn min(float3, float3); float4 __ovld __cnfn min(float4, float4); float8 __ovld __cnfn min(float8, float8); float16 __ovld __cnfn min(float16, float16); float2 __ovld __cnfn min(float2, float); float3 __ovld __cnfn min(float3, float); float4 __ovld __cnfn min(float4, float); float8 __ovld __cnfn min(float8, float); float16 __ovld __cnfn min(float16, float); #ifdef cl_khr_fp64 double __ovld __cnfn min(double, double); double2 __ovld __cnfn min(double2, double2); double3 __ovld __cnfn min(double3, double3); double4 __ovld __cnfn min(double4, double4); double8 __ovld __cnfn min(double8, double8); double16 __ovld __cnfn min(double16, double16); double2 __ovld __cnfn min(double2, double); double3 __ovld __cnfn min(double3, double); double4 __ovld __cnfn min(double4, double); double8 __ovld __cnfn min(double8, double); double16 __ovld __cnfn min(double16, double); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn min(half, half); half2 __ovld __cnfn min(half2, half2); half3 __ovld __cnfn min(half3, half3); half4 __ovld __cnfn min(half4, half4); half8 __ovld __cnfn min(half8, half8); half16 __ovld __cnfn min(half16, half16); half2 __ovld __cnfn min(half2, half); half3 __ovld __cnfn min(half3, half); half4 __ovld __cnfn min(half4, half); half8 __ovld __cnfn min(half8, half); half16 __ovld __cnfn min(half16, half); #endif //cl_khr_fp16 /** * Returns the linear blend of x & y implemented as: * x + (y - x) * a * a must be a value in the range 0.0 ... 1.0. If a is not * in the range 0.0 ... 1.0, the return values are * undefined. */ float __ovld __cnfn mix(float, float, float); float2 __ovld __cnfn mix(float2, float2, float2); float3 __ovld __cnfn mix(float3, float3, float3); float4 __ovld __cnfn mix(float4, float4, float4); float8 __ovld __cnfn mix(float8, float8, float8); float16 __ovld __cnfn mix(float16, float16, float16); float2 __ovld __cnfn mix(float2, float2, float); float3 __ovld __cnfn mix(float3, float3, float); float4 __ovld __cnfn mix(float4, float4, float); float8 __ovld __cnfn mix(float8, float8, float); float16 __ovld __cnfn mix(float16, float16, float); #ifdef cl_khr_fp64 double __ovld __cnfn mix(double, double, double); double2 __ovld __cnfn mix(double2, double2, double2); double3 __ovld __cnfn mix(double3, double3, double3); double4 __ovld __cnfn mix(double4, double4, double4); double8 __ovld __cnfn mix(double8, double8, double8); double16 __ovld __cnfn mix(double16, double16, double16); double2 __ovld __cnfn mix(double2, double2, double); double3 __ovld __cnfn mix(double3, double3, double); double4 __ovld __cnfn mix(double4, double4, double); double8 __ovld __cnfn mix(double8, double8, double); double16 __ovld __cnfn mix(double16, double16, double); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn mix(half, half, half); half2 __ovld __cnfn mix(half2, half2, half2); half3 __ovld __cnfn mix(half3, half3, half3); half4 __ovld __cnfn mix(half4, half4, half4); half8 __ovld __cnfn mix(half8, half8, half8); half16 __ovld __cnfn mix(half16, half16, half16); half2 __ovld __cnfn mix(half2, half2, half); half3 __ovld __cnfn mix(half3, half3, half); half4 __ovld __cnfn mix(half4, half4, half); half8 __ovld __cnfn mix(half8, half8, half); half16 __ovld __cnfn mix(half16, half16, half); #endif //cl_khr_fp16 /** * Converts degrees to radians, i.e. (PI / 180) * * degrees. */ float __ovld __cnfn radians(float); float2 __ovld __cnfn radians(float2); float3 __ovld __cnfn radians(float3); float4 __ovld __cnfn radians(float4); float8 __ovld __cnfn radians(float8); float16 __ovld __cnfn radians(float16); #ifdef cl_khr_fp64 double __ovld __cnfn radians(double); double2 __ovld __cnfn radians(double2); double3 __ovld __cnfn radians(double3); double4 __ovld __cnfn radians(double4); double8 __ovld __cnfn radians(double8); double16 __ovld __cnfn radians(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn radians(half); half2 __ovld __cnfn radians(half2); half3 __ovld __cnfn radians(half3); half4 __ovld __cnfn radians(half4); half8 __ovld __cnfn radians(half8); half16 __ovld __cnfn radians(half16); #endif //cl_khr_fp16 /** * Returns 0.0 if x < edge, otherwise it returns 1.0. */ float __ovld __cnfn step(float, float); float2 __ovld __cnfn step(float2, float2); float3 __ovld __cnfn step(float3, float3); float4 __ovld __cnfn step(float4, float4); float8 __ovld __cnfn step(float8, float8); float16 __ovld __cnfn step(float16, float16); float2 __ovld __cnfn step(float, float2); float3 __ovld __cnfn step(float, float3); float4 __ovld __cnfn step(float, float4); float8 __ovld __cnfn step(float, float8); float16 __ovld __cnfn step(float, float16); #ifdef cl_khr_fp64 double __ovld __cnfn step(double, double); double2 __ovld __cnfn step(double2, double2); double3 __ovld __cnfn step(double3, double3); double4 __ovld __cnfn step(double4, double4); double8 __ovld __cnfn step(double8, double8); double16 __ovld __cnfn step(double16, double16); double2 __ovld __cnfn step(double, double2); double3 __ovld __cnfn step(double, double3); double4 __ovld __cnfn step(double, double4); double8 __ovld __cnfn step(double, double8); double16 __ovld __cnfn step(double, double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn step(half, half); half2 __ovld __cnfn step(half2, half2); half3 __ovld __cnfn step(half3, half3); half4 __ovld __cnfn step(half4, half4); half8 __ovld __cnfn step(half8, half8); half16 __ovld __cnfn step(half16, half16); half2 __ovld __cnfn step(half, half2); half3 __ovld __cnfn step(half, half3); half4 __ovld __cnfn step(half, half4); half8 __ovld __cnfn step(half, half8); half16 __ovld __cnfn step(half, half16); #endif //cl_khr_fp16 /** * Returns 0.0 if x <= edge0 and 1.0 if x >= edge1 and * performs smooth Hermite interpolation between 0 * and 1when edge0 < x < edge1. This is useful in * cases where you would want a threshold function * with a smooth transition. * This is equivalent to: * gentype t; * t = clamp ((x - edge0) / (edge1 - edge0), 0, 1); * return t * t * (3 - 2 * t); * Results are undefined if edge0 >= edge1 or if x, * edge0 or edge1 is a NaN. */ float __ovld __cnfn smoothstep(float, float, float); float2 __ovld __cnfn smoothstep(float2, float2, float2); float3 __ovld __cnfn smoothstep(float3, float3, float3); float4 __ovld __cnfn smoothstep(float4, float4, float4); float8 __ovld __cnfn smoothstep(float8, float8, float8); float16 __ovld __cnfn smoothstep(float16, float16, float16); float2 __ovld __cnfn smoothstep(float, float, float2); float3 __ovld __cnfn smoothstep(float, float, float3); float4 __ovld __cnfn smoothstep(float, float, float4); float8 __ovld __cnfn smoothstep(float, float, float8); float16 __ovld __cnfn smoothstep(float, float, float16); #ifdef cl_khr_fp64 double __ovld __cnfn smoothstep(double, double, double); double2 __ovld __cnfn smoothstep(double2, double2, double2); double3 __ovld __cnfn smoothstep(double3, double3, double3); double4 __ovld __cnfn smoothstep(double4, double4, double4); double8 __ovld __cnfn smoothstep(double8, double8, double8); double16 __ovld __cnfn smoothstep(double16, double16, double16); double2 __ovld __cnfn smoothstep(double, double, double2); double3 __ovld __cnfn smoothstep(double, double, double3); double4 __ovld __cnfn smoothstep(double, double, double4); double8 __ovld __cnfn smoothstep(double, double, double8); double16 __ovld __cnfn smoothstep(double, double, double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn smoothstep(half, half, half); half2 __ovld __cnfn smoothstep(half2, half2, half2); half3 __ovld __cnfn smoothstep(half3, half3, half3); half4 __ovld __cnfn smoothstep(half4, half4, half4); half8 __ovld __cnfn smoothstep(half8, half8, half8); half16 __ovld __cnfn smoothstep(half16, half16, half16); half2 __ovld __cnfn smoothstep(half, half, half2); half3 __ovld __cnfn smoothstep(half, half, half3); half4 __ovld __cnfn smoothstep(half, half, half4); half8 __ovld __cnfn smoothstep(half, half, half8); half16 __ovld __cnfn smoothstep(half, half, half16); #endif //cl_khr_fp16 /** * Returns 1.0 if x > 0, -0.0 if x = -0.0, +0.0 if x = * +0.0, or -1.0 if x < 0. Returns 0.0 if x is a NaN. */ float __ovld __cnfn sign(float); float2 __ovld __cnfn sign(float2); float3 __ovld __cnfn sign(float3); float4 __ovld __cnfn sign(float4); float8 __ovld __cnfn sign(float8); float16 __ovld __cnfn sign(float16); #ifdef cl_khr_fp64 double __ovld __cnfn sign(double); double2 __ovld __cnfn sign(double2); double3 __ovld __cnfn sign(double3); double4 __ovld __cnfn sign(double4); double8 __ovld __cnfn sign(double8); double16 __ovld __cnfn sign(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn sign(half); half2 __ovld __cnfn sign(half2); half3 __ovld __cnfn sign(half3); half4 __ovld __cnfn sign(half4); half8 __ovld __cnfn sign(half8); half16 __ovld __cnfn sign(half16); #endif //cl_khr_fp16 // OpenCL v1.1 s6.11.5, v1.2 s6.12.5, v2.0 s6.13.5 - Geometric Functions /** * Returns the cross product of p0.xyz and p1.xyz. The * w component of float4 result returned will be 0.0. */ float4 __ovld __cnfn cross(float4, float4); float3 __ovld __cnfn cross(float3, float3); #ifdef cl_khr_fp64 double4 __ovld __cnfn cross(double4, double4); double3 __ovld __cnfn cross(double3, double3); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half4 __ovld __cnfn cross(half4, half4); half3 __ovld __cnfn cross(half3, half3); #endif //cl_khr_fp16 /** * Compute dot product. */ float __ovld __cnfn dot(float, float); float __ovld __cnfn dot(float2, float2); float __ovld __cnfn dot(float3, float3); float __ovld __cnfn dot(float4, float4); #ifdef cl_khr_fp64 double __ovld __cnfn dot(double, double); double __ovld __cnfn dot(double2, double2); double __ovld __cnfn dot(double3, double3); double __ovld __cnfn dot(double4, double4); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn dot(half, half); half __ovld __cnfn dot(half2, half2); half __ovld __cnfn dot(half3, half3); half __ovld __cnfn dot(half4, half4); #endif //cl_khr_fp16 /** * Returns the distance between p0 and p1. This is * calculated as length(p0 - p1). */ float __ovld __cnfn distance(float, float); float __ovld __cnfn distance(float2, float2); float __ovld __cnfn distance(float3, float3); float __ovld __cnfn distance(float4, float4); #ifdef cl_khr_fp64 double __ovld __cnfn distance(double, double); double __ovld __cnfn distance(double2, double2); double __ovld __cnfn distance(double3, double3); double __ovld __cnfn distance(double4, double4); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn distance(half, half); half __ovld __cnfn distance(half2, half2); half __ovld __cnfn distance(half3, half3); half __ovld __cnfn distance(half4, half4); #endif //cl_khr_fp16 /** * Return the length of vector p, i.e., * sqrt(p.x2 + p.y 2 + ...) */ float __ovld __cnfn length(float); float __ovld __cnfn length(float2); float __ovld __cnfn length(float3); float __ovld __cnfn length(float4); #ifdef cl_khr_fp64 double __ovld __cnfn length(double); double __ovld __cnfn length(double2); double __ovld __cnfn length(double3); double __ovld __cnfn length(double4); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn length(half); half __ovld __cnfn length(half2); half __ovld __cnfn length(half3); half __ovld __cnfn length(half4); #endif //cl_khr_fp16 /** * Returns a vector in the same direction as p but with a * length of 1. */ float __ovld __cnfn normalize(float); float2 __ovld __cnfn normalize(float2); float3 __ovld __cnfn normalize(float3); float4 __ovld __cnfn normalize(float4); #ifdef cl_khr_fp64 double __ovld __cnfn normalize(double); double2 __ovld __cnfn normalize(double2); double3 __ovld __cnfn normalize(double3); double4 __ovld __cnfn normalize(double4); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn normalize(half); half2 __ovld __cnfn normalize(half2); half3 __ovld __cnfn normalize(half3); half4 __ovld __cnfn normalize(half4); #endif //cl_khr_fp16 /** * Returns fast_length(p0 - p1). */ float __ovld __cnfn fast_distance(float, float); float __ovld __cnfn fast_distance(float2, float2); float __ovld __cnfn fast_distance(float3, float3); float __ovld __cnfn fast_distance(float4, float4); /** * Returns the length of vector p computed as: * half_sqrt(p.x2 + p.y2 + ...) */ float __ovld __cnfn fast_length(float); float __ovld __cnfn fast_length(float2); float __ovld __cnfn fast_length(float3); float __ovld __cnfn fast_length(float4); /** * Returns a vector in the same direction as p but with a * length of 1. fast_normalize is computed as: * p * half_rsqrt (p.x^2 + p.y^2 + ... ) * The result shall be within 8192 ulps error from the * infinitely precise result of * if (all(p == 0.0f)) * result = p; * else * result = p / sqrt (p.x^2 + p.y^2 + ...); * with the following exceptions: * 1) If the sum of squares is greater than FLT_MAX * then the value of the floating-point values in the * result vector are undefined. * 2) If the sum of squares is less than FLT_MIN then * the implementation may return back p. * 3) If the device is in "denorms are flushed to zero" * mode, individual operand elements with magnitude * less than sqrt(FLT_MIN) may be flushed to zero * before proceeding with the calculation. */ float __ovld __cnfn fast_normalize(float); float2 __ovld __cnfn fast_normalize(float2); float3 __ovld __cnfn fast_normalize(float3); float4 __ovld __cnfn fast_normalize(float4); // OpenCL v1.1 s6.11.6, v1.2 s6.12.6, v2.0 s6.13.6 - Relational Functions /** * intn isequal (floatn x, floatn y) * Returns the component-wise compare of x == y. */ int __ovld __cnfn isequal(float, float); int2 __ovld __cnfn isequal(float2, float2); int3 __ovld __cnfn isequal(float3, float3); int4 __ovld __cnfn isequal(float4, float4); int8 __ovld __cnfn isequal(float8, float8); int16 __ovld __cnfn isequal(float16, float16); #ifdef cl_khr_fp64 int __ovld __cnfn isequal(double, double); long2 __ovld __cnfn isequal(double2, double2); long3 __ovld __cnfn isequal(double3, double3); long4 __ovld __cnfn isequal(double4, double4); long8 __ovld __cnfn isequal(double8, double8); long16 __ovld __cnfn isequal(double16, double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 int __ovld __cnfn isequal(half, half); short2 __ovld __cnfn isequal(half2, half2); short3 __ovld __cnfn isequal(half3, half3); short4 __ovld __cnfn isequal(half4, half4); short8 __ovld __cnfn isequal(half8, half8); short16 __ovld __cnfn isequal(half16, half16); #endif //cl_khr_fp16 /** * Returns the component-wise compare of x != y. */ int __ovld __cnfn isnotequal(float, float); int2 __ovld __cnfn isnotequal(float2, float2); int3 __ovld __cnfn isnotequal(float3, float3); int4 __ovld __cnfn isnotequal(float4, float4); int8 __ovld __cnfn isnotequal(float8, float8); int16 __ovld __cnfn isnotequal(float16, float16); #ifdef cl_khr_fp64 int __ovld __cnfn isnotequal(double, double); long2 __ovld __cnfn isnotequal(double2, double2); long3 __ovld __cnfn isnotequal(double3, double3); long4 __ovld __cnfn isnotequal(double4, double4); long8 __ovld __cnfn isnotequal(double8, double8); long16 __ovld __cnfn isnotequal(double16, double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 int __ovld __cnfn isnotequal(half, half); short2 __ovld __cnfn isnotequal(half2, half2); short3 __ovld __cnfn isnotequal(half3, half3); short4 __ovld __cnfn isnotequal(half4, half4); short8 __ovld __cnfn isnotequal(half8, half8); short16 __ovld __cnfn isnotequal(half16, half16); #endif //cl_khr_fp16 /** * Returns the component-wise compare of x > y. */ int __ovld __cnfn isgreater(float, float); int2 __ovld __cnfn isgreater(float2, float2); int3 __ovld __cnfn isgreater(float3, float3); int4 __ovld __cnfn isgreater(float4, float4); int8 __ovld __cnfn isgreater(float8, float8); int16 __ovld __cnfn isgreater(float16, float16); #ifdef cl_khr_fp64 int __ovld __cnfn isgreater(double, double); long2 __ovld __cnfn isgreater(double2, double2); long3 __ovld __cnfn isgreater(double3, double3); long4 __ovld __cnfn isgreater(double4, double4); long8 __ovld __cnfn isgreater(double8, double8); long16 __ovld __cnfn isgreater(double16, double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 int __ovld __cnfn isgreater(half, half); short2 __ovld __cnfn isgreater(half2, half2); short3 __ovld __cnfn isgreater(half3, half3); short4 __ovld __cnfn isgreater(half4, half4); short8 __ovld __cnfn isgreater(half8, half8); short16 __ovld __cnfn isgreater(half16, half16); #endif //cl_khr_fp16 /** * Returns the component-wise compare of x >= y. */ int __ovld __cnfn isgreaterequal(float, float); int2 __ovld __cnfn isgreaterequal(float2, float2); int3 __ovld __cnfn isgreaterequal(float3, float3); int4 __ovld __cnfn isgreaterequal(float4, float4); int8 __ovld __cnfn isgreaterequal(float8, float8); int16 __ovld __cnfn isgreaterequal(float16, float16); #ifdef cl_khr_fp64 int __ovld __cnfn isgreaterequal(double, double); long2 __ovld __cnfn isgreaterequal(double2, double2); long3 __ovld __cnfn isgreaterequal(double3, double3); long4 __ovld __cnfn isgreaterequal(double4, double4); long8 __ovld __cnfn isgreaterequal(double8, double8); long16 __ovld __cnfn isgreaterequal(double16, double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 int __ovld __cnfn isgreaterequal(half, half); short2 __ovld __cnfn isgreaterequal(half2, half2); short3 __ovld __cnfn isgreaterequal(half3, half3); short4 __ovld __cnfn isgreaterequal(half4, half4); short8 __ovld __cnfn isgreaterequal(half8, half8); short16 __ovld __cnfn isgreaterequal(half16, half16); #endif //cl_khr_fp16 /** * Returns the component-wise compare of x < y. */ int __ovld __cnfn isless(float, float); int2 __ovld __cnfn isless(float2, float2); int3 __ovld __cnfn isless(float3, float3); int4 __ovld __cnfn isless(float4, float4); int8 __ovld __cnfn isless(float8, float8); int16 __ovld __cnfn isless(float16, float16); #ifdef cl_khr_fp64 int __ovld __cnfn isless(double, double); long2 __ovld __cnfn isless(double2, double2); long3 __ovld __cnfn isless(double3, double3); long4 __ovld __cnfn isless(double4, double4); long8 __ovld __cnfn isless(double8, double8); long16 __ovld __cnfn isless(double16, double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 int __ovld __cnfn isless(half, half); short2 __ovld __cnfn isless(half2, half2); short3 __ovld __cnfn isless(half3, half3); short4 __ovld __cnfn isless(half4, half4); short8 __ovld __cnfn isless(half8, half8); short16 __ovld __cnfn isless(half16, half16); #endif //cl_khr_fp16 /** * Returns the component-wise compare of x <= y. */ int __ovld __cnfn islessequal(float, float); int2 __ovld __cnfn islessequal(float2, float2); int3 __ovld __cnfn islessequal(float3, float3); int4 __ovld __cnfn islessequal(float4, float4); int8 __ovld __cnfn islessequal(float8, float8); int16 __ovld __cnfn islessequal(float16, float16); #ifdef cl_khr_fp64 int __ovld __cnfn islessequal(double, double); long2 __ovld __cnfn islessequal(double2, double2); long3 __ovld __cnfn islessequal(double3, double3); long4 __ovld __cnfn islessequal(double4, double4); long8 __ovld __cnfn islessequal(double8, double8); long16 __ovld __cnfn islessequal(double16, double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 int __ovld __cnfn islessequal(half, half); short2 __ovld __cnfn islessequal(half2, half2); short3 __ovld __cnfn islessequal(half3, half3); short4 __ovld __cnfn islessequal(half4, half4); short8 __ovld __cnfn islessequal(half8, half8); short16 __ovld __cnfn islessequal(half16, half16); #endif //cl_khr_fp16 /** * Returns the component-wise compare of * (x < y) || (x > y) . */ int __ovld __cnfn islessgreater(float, float); int2 __ovld __cnfn islessgreater(float2, float2); int3 __ovld __cnfn islessgreater(float3, float3); int4 __ovld __cnfn islessgreater(float4, float4); int8 __ovld __cnfn islessgreater(float8, float8); int16 __ovld __cnfn islessgreater(float16, float16); #ifdef cl_khr_fp64 int __ovld __cnfn islessgreater(double, double); long2 __ovld __cnfn islessgreater(double2, double2); long3 __ovld __cnfn islessgreater(double3, double3); long4 __ovld __cnfn islessgreater(double4, double4); long8 __ovld __cnfn islessgreater(double8, double8); long16 __ovld __cnfn islessgreater(double16, double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 int __ovld __cnfn islessgreater(half, half); short2 __ovld __cnfn islessgreater(half2, half2); short3 __ovld __cnfn islessgreater(half3, half3); short4 __ovld __cnfn islessgreater(half4, half4); short8 __ovld __cnfn islessgreater(half8, half8); short16 __ovld __cnfn islessgreater(half16, half16); #endif //cl_khr_fp16 /** * Test for finite value. */ int __ovld __cnfn isfinite(float); int2 __ovld __cnfn isfinite(float2); int3 __ovld __cnfn isfinite(float3); int4 __ovld __cnfn isfinite(float4); int8 __ovld __cnfn isfinite(float8); int16 __ovld __cnfn isfinite(float16); #ifdef cl_khr_fp64 int __ovld __cnfn isfinite(double); long2 __ovld __cnfn isfinite(double2); long3 __ovld __cnfn isfinite(double3); long4 __ovld __cnfn isfinite(double4); long8 __ovld __cnfn isfinite(double8); long16 __ovld __cnfn isfinite(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 int __ovld __cnfn isfinite(half); short2 __ovld __cnfn isfinite(half2); short3 __ovld __cnfn isfinite(half3); short4 __ovld __cnfn isfinite(half4); short8 __ovld __cnfn isfinite(half8); short16 __ovld __cnfn isfinite(half16); #endif //cl_khr_fp16 /** * Test for infinity value (+ve or -ve) . */ int __ovld __cnfn isinf(float); int2 __ovld __cnfn isinf(float2); int3 __ovld __cnfn isinf(float3); int4 __ovld __cnfn isinf(float4); int8 __ovld __cnfn isinf(float8); int16 __ovld __cnfn isinf(float16); #ifdef cl_khr_fp64 int __ovld __cnfn isinf(double); long2 __ovld __cnfn isinf(double2); long3 __ovld __cnfn isinf(double3); long4 __ovld __cnfn isinf(double4); long8 __ovld __cnfn isinf(double8); long16 __ovld __cnfn isinf(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 int __ovld __cnfn isinf(half); short2 __ovld __cnfn isinf(half2); short3 __ovld __cnfn isinf(half3); short4 __ovld __cnfn isinf(half4); short8 __ovld __cnfn isinf(half8); short16 __ovld __cnfn isinf(half16); #endif //cl_khr_fp16 /** * Test for a NaN. */ int __ovld __cnfn isnan(float); int2 __ovld __cnfn isnan(float2); int3 __ovld __cnfn isnan(float3); int4 __ovld __cnfn isnan(float4); int8 __ovld __cnfn isnan(float8); int16 __ovld __cnfn isnan(float16); #ifdef cl_khr_fp64 int __ovld __cnfn isnan(double); long2 __ovld __cnfn isnan(double2); long3 __ovld __cnfn isnan(double3); long4 __ovld __cnfn isnan(double4); long8 __ovld __cnfn isnan(double8); long16 __ovld __cnfn isnan(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 int __ovld __cnfn isnan(half); short2 __ovld __cnfn isnan(half2); short3 __ovld __cnfn isnan(half3); short4 __ovld __cnfn isnan(half4); short8 __ovld __cnfn isnan(half8); short16 __ovld __cnfn isnan(half16); #endif //cl_khr_fp16 /** * Test for a normal value. */ int __ovld __cnfn isnormal(float); int2 __ovld __cnfn isnormal(float2); int3 __ovld __cnfn isnormal(float3); int4 __ovld __cnfn isnormal(float4); int8 __ovld __cnfn isnormal(float8); int16 __ovld __cnfn isnormal(float16); #ifdef cl_khr_fp64 int __ovld __cnfn isnormal(double); long2 __ovld __cnfn isnormal(double2); long3 __ovld __cnfn isnormal(double3); long4 __ovld __cnfn isnormal(double4); long8 __ovld __cnfn isnormal(double8); long16 __ovld __cnfn isnormal(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 int __ovld __cnfn isnormal(half); short2 __ovld __cnfn isnormal(half2); short3 __ovld __cnfn isnormal(half3); short4 __ovld __cnfn isnormal(half4); short8 __ovld __cnfn isnormal(half8); short16 __ovld __cnfn isnormal(half16); #endif //cl_khr_fp16 /** * Test if arguments are ordered. isordered() takes * arguments x and y, and returns the result * isequal(x, x) && isequal(y, y). */ int __ovld __cnfn isordered(float, float); int2 __ovld __cnfn isordered(float2, float2); int3 __ovld __cnfn isordered(float3, float3); int4 __ovld __cnfn isordered(float4, float4); int8 __ovld __cnfn isordered(float8, float8); int16 __ovld __cnfn isordered(float16, float16); #ifdef cl_khr_fp64 int __ovld __cnfn isordered(double, double); long2 __ovld __cnfn isordered(double2, double2); long3 __ovld __cnfn isordered(double3, double3); long4 __ovld __cnfn isordered(double4, double4); long8 __ovld __cnfn isordered(double8, double8); long16 __ovld __cnfn isordered(double16, double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 int __ovld __cnfn isordered(half, half); short2 __ovld __cnfn isordered(half2, half2); short3 __ovld __cnfn isordered(half3, half3); short4 __ovld __cnfn isordered(half4, half4); short8 __ovld __cnfn isordered(half8, half8); short16 __ovld __cnfn isordered(half16, half16); #endif //cl_khr_fp16 /** * Test if arguments are unordered. isunordered() * takes arguments x and y, returning non-zero if x or y * is NaN, and zero otherwise. */ int __ovld __cnfn isunordered(float, float); int2 __ovld __cnfn isunordered(float2, float2); int3 __ovld __cnfn isunordered(float3, float3); int4 __ovld __cnfn isunordered(float4, float4); int8 __ovld __cnfn isunordered(float8, float8); int16 __ovld __cnfn isunordered(float16, float16); #ifdef cl_khr_fp64 int __ovld __cnfn isunordered(double, double); long2 __ovld __cnfn isunordered(double2, double2); long3 __ovld __cnfn isunordered(double3, double3); long4 __ovld __cnfn isunordered(double4, double4); long8 __ovld __cnfn isunordered(double8, double8); long16 __ovld __cnfn isunordered(double16, double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 int __ovld __cnfn isunordered(half, half); short2 __ovld __cnfn isunordered(half2, half2); short3 __ovld __cnfn isunordered(half3, half3); short4 __ovld __cnfn isunordered(half4, half4); short8 __ovld __cnfn isunordered(half8, half8); short16 __ovld __cnfn isunordered(half16, half16); #endif //cl_khr_fp16 /** * Test for sign bit. The scalar version of the function * returns a 1 if the sign bit in the float is set else returns * 0. The vector version of the function returns the * following for each component in floatn: a -1 if the * sign bit in the float is set else returns 0. */ int __ovld __cnfn signbit(float); int2 __ovld __cnfn signbit(float2); int3 __ovld __cnfn signbit(float3); int4 __ovld __cnfn signbit(float4); int8 __ovld __cnfn signbit(float8); int16 __ovld __cnfn signbit(float16); #ifdef cl_khr_fp64 int __ovld __cnfn signbit(double); long2 __ovld __cnfn signbit(double2); long3 __ovld __cnfn signbit(double3); long4 __ovld __cnfn signbit(double4); long8 __ovld __cnfn signbit(double8); long16 __ovld __cnfn signbit(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 int __ovld __cnfn signbit(half); short2 __ovld __cnfn signbit(half2); short3 __ovld __cnfn signbit(half3); short4 __ovld __cnfn signbit(half4); short8 __ovld __cnfn signbit(half8); short16 __ovld __cnfn signbit(half16); #endif //cl_khr_fp16 /** * Returns 1 if the most significant bit in any component * of x is set; otherwise returns 0. */ int __ovld __cnfn any(char); int __ovld __cnfn any(char2); int __ovld __cnfn any(char3); int __ovld __cnfn any(char4); int __ovld __cnfn any(char8); int __ovld __cnfn any(char16); int __ovld __cnfn any(short); int __ovld __cnfn any(short2); int __ovld __cnfn any(short3); int __ovld __cnfn any(short4); int __ovld __cnfn any(short8); int __ovld __cnfn any(short16); int __ovld __cnfn any(int); int __ovld __cnfn any(int2); int __ovld __cnfn any(int3); int __ovld __cnfn any(int4); int __ovld __cnfn any(int8); int __ovld __cnfn any(int16); int __ovld __cnfn any(long); int __ovld __cnfn any(long2); int __ovld __cnfn any(long3); int __ovld __cnfn any(long4); int __ovld __cnfn any(long8); int __ovld __cnfn any(long16); /** * Returns 1 if the most significant bit in all components * of x is set; otherwise returns 0. */ int __ovld __cnfn all(char); int __ovld __cnfn all(char2); int __ovld __cnfn all(char3); int __ovld __cnfn all(char4); int __ovld __cnfn all(char8); int __ovld __cnfn all(char16); int __ovld __cnfn all(short); int __ovld __cnfn all(short2); int __ovld __cnfn all(short3); int __ovld __cnfn all(short4); int __ovld __cnfn all(short8); int __ovld __cnfn all(short16); int __ovld __cnfn all(int); int __ovld __cnfn all(int2); int __ovld __cnfn all(int3); int __ovld __cnfn all(int4); int __ovld __cnfn all(int8); int __ovld __cnfn all(int16); int __ovld __cnfn all(long); int __ovld __cnfn all(long2); int __ovld __cnfn all(long3); int __ovld __cnfn all(long4); int __ovld __cnfn all(long8); int __ovld __cnfn all(long16); /** * Each bit of the result is the corresponding bit of a if * the corresponding bit of c is 0. Otherwise it is the * corresponding bit of b. */ char __ovld __cnfn bitselect(char, char, char); uchar __ovld __cnfn bitselect(uchar, uchar, uchar); char2 __ovld __cnfn bitselect(char2, char2, char2); uchar2 __ovld __cnfn bitselect(uchar2, uchar2, uchar2); char3 __ovld __cnfn bitselect(char3, char3, char3); uchar3 __ovld __cnfn bitselect(uchar3, uchar3, uchar3); char4 __ovld __cnfn bitselect(char4, char4, char4); uchar4 __ovld __cnfn bitselect(uchar4, uchar4, uchar4); char8 __ovld __cnfn bitselect(char8, char8, char8); uchar8 __ovld __cnfn bitselect(uchar8, uchar8, uchar8); char16 __ovld __cnfn bitselect(char16, char16, char16); uchar16 __ovld __cnfn bitselect(uchar16, uchar16, uchar16); short __ovld __cnfn bitselect(short, short, short); ushort __ovld __cnfn bitselect(ushort, ushort, ushort); short2 __ovld __cnfn bitselect(short2, short2, short2); ushort2 __ovld __cnfn bitselect(ushort2, ushort2, ushort2); short3 __ovld __cnfn bitselect(short3, short3, short3); ushort3 __ovld __cnfn bitselect(ushort3, ushort3, ushort3); short4 __ovld __cnfn bitselect(short4, short4, short4); ushort4 __ovld __cnfn bitselect(ushort4, ushort4, ushort4); short8 __ovld __cnfn bitselect(short8, short8, short8); ushort8 __ovld __cnfn bitselect(ushort8, ushort8, ushort8); short16 __ovld __cnfn bitselect(short16, short16, short16); ushort16 __ovld __cnfn bitselect(ushort16, ushort16, ushort16); int __ovld __cnfn bitselect(int, int, int); uint __ovld __cnfn bitselect(uint, uint, uint); int2 __ovld __cnfn bitselect(int2, int2, int2); uint2 __ovld __cnfn bitselect(uint2, uint2, uint2); int3 __ovld __cnfn bitselect(int3, int3, int3); uint3 __ovld __cnfn bitselect(uint3, uint3, uint3); int4 __ovld __cnfn bitselect(int4, int4, int4); uint4 __ovld __cnfn bitselect(uint4, uint4, uint4); int8 __ovld __cnfn bitselect(int8, int8, int8); uint8 __ovld __cnfn bitselect(uint8, uint8, uint8); int16 __ovld __cnfn bitselect(int16, int16, int16); uint16 __ovld __cnfn bitselect(uint16, uint16, uint16); long __ovld __cnfn bitselect(long, long, long); ulong __ovld __cnfn bitselect(ulong, ulong, ulong); long2 __ovld __cnfn bitselect(long2, long2, long2); ulong2 __ovld __cnfn bitselect(ulong2, ulong2, ulong2); long3 __ovld __cnfn bitselect(long3, long3, long3); ulong3 __ovld __cnfn bitselect(ulong3, ulong3, ulong3); long4 __ovld __cnfn bitselect(long4, long4, long4); ulong4 __ovld __cnfn bitselect(ulong4, ulong4, ulong4); long8 __ovld __cnfn bitselect(long8, long8, long8); ulong8 __ovld __cnfn bitselect(ulong8, ulong8, ulong8); long16 __ovld __cnfn bitselect(long16, long16, long16); ulong16 __ovld __cnfn bitselect(ulong16, ulong16, ulong16); float __ovld __cnfn bitselect(float, float, float); float2 __ovld __cnfn bitselect(float2, float2, float2); float3 __ovld __cnfn bitselect(float3, float3, float3); float4 __ovld __cnfn bitselect(float4, float4, float4); float8 __ovld __cnfn bitselect(float8, float8, float8); float16 __ovld __cnfn bitselect(float16, float16, float16); #ifdef cl_khr_fp64 double __ovld __cnfn bitselect(double, double, double); double2 __ovld __cnfn bitselect(double2, double2, double2); double3 __ovld __cnfn bitselect(double3, double3, double3); double4 __ovld __cnfn bitselect(double4, double4, double4); double8 __ovld __cnfn bitselect(double8, double8, double8); double16 __ovld __cnfn bitselect(double16, double16, double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn bitselect(half, half, half); half2 __ovld __cnfn bitselect(half2, half2, half2); half3 __ovld __cnfn bitselect(half3, half3, half3); half4 __ovld __cnfn bitselect(half4, half4, half4); half8 __ovld __cnfn bitselect(half8, half8, half8); half16 __ovld __cnfn bitselect(half16, half16, half16); #endif //cl_khr_fp16 /** * For each component of a vector type, * result[i] = if MSB of c[i] is set ? b[i] : a[i]. * For a scalar type, result = c ? b : a. * b and a must have the same type. * c must have the same number of elements and bits as a. */ char __ovld __cnfn select(char, char, char); uchar __ovld __cnfn select(uchar, uchar, char); char2 __ovld __cnfn select(char2, char2, char2); uchar2 __ovld __cnfn select(uchar2, uchar2, char2); char3 __ovld __cnfn select(char3, char3, char3); uchar3 __ovld __cnfn select(uchar3, uchar3, char3); char4 __ovld __cnfn select(char4, char4, char4); uchar4 __ovld __cnfn select(uchar4, uchar4, char4); char8 __ovld __cnfn select(char8, char8, char8); uchar8 __ovld __cnfn select(uchar8, uchar8, char8); char16 __ovld __cnfn select(char16, char16, char16); uchar16 __ovld __cnfn select(uchar16, uchar16, char16); short __ovld __cnfn select(short, short, short); ushort __ovld __cnfn select(ushort, ushort, short); short2 __ovld __cnfn select(short2, short2, short2); ushort2 __ovld __cnfn select(ushort2, ushort2, short2); short3 __ovld __cnfn select(short3, short3, short3); ushort3 __ovld __cnfn select(ushort3, ushort3, short3); short4 __ovld __cnfn select(short4, short4, short4); ushort4 __ovld __cnfn select(ushort4, ushort4, short4); short8 __ovld __cnfn select(short8, short8, short8); ushort8 __ovld __cnfn select(ushort8, ushort8, short8); short16 __ovld __cnfn select(short16, short16, short16); ushort16 __ovld __cnfn select(ushort16, ushort16, short16); int __ovld __cnfn select(int, int, int); uint __ovld __cnfn select(uint, uint, int); int2 __ovld __cnfn select(int2, int2, int2); uint2 __ovld __cnfn select(uint2, uint2, int2); int3 __ovld __cnfn select(int3, int3, int3); uint3 __ovld __cnfn select(uint3, uint3, int3); int4 __ovld __cnfn select(int4, int4, int4); uint4 __ovld __cnfn select(uint4, uint4, int4); int8 __ovld __cnfn select(int8, int8, int8); uint8 __ovld __cnfn select(uint8, uint8, int8); int16 __ovld __cnfn select(int16, int16, int16); uint16 __ovld __cnfn select(uint16, uint16, int16); float __ovld __cnfn select(float, float, int); float2 __ovld __cnfn select(float2, float2, int2); float3 __ovld __cnfn select(float3, float3, int3); float4 __ovld __cnfn select(float4, float4, int4); float8 __ovld __cnfn select(float8, float8, int8); float16 __ovld __cnfn select(float16, float16, int16); long __ovld __cnfn select(long, long, long); ulong __ovld __cnfn select(ulong, ulong, long); long2 __ovld __cnfn select(long2, long2, long2); ulong2 __ovld __cnfn select(ulong2, ulong2, long2); long3 __ovld __cnfn select(long3, long3, long3); ulong3 __ovld __cnfn select(ulong3, ulong3, long3); long4 __ovld __cnfn select(long4, long4, long4); ulong4 __ovld __cnfn select(ulong4, ulong4, long4); long8 __ovld __cnfn select(long8, long8, long8); ulong8 __ovld __cnfn select(ulong8, ulong8, long8); long16 __ovld __cnfn select(long16, long16, long16); ulong16 __ovld __cnfn select(ulong16, ulong16, long16); char __ovld __cnfn select(char, char, uchar); uchar __ovld __cnfn select(uchar, uchar, uchar); char2 __ovld __cnfn select(char2, char2, uchar2); uchar2 __ovld __cnfn select(uchar2, uchar2, uchar2); char3 __ovld __cnfn select(char3, char3, uchar3); uchar3 __ovld __cnfn select(uchar3, uchar3, uchar3); char4 __ovld __cnfn select(char4, char4, uchar4); uchar4 __ovld __cnfn select(uchar4, uchar4, uchar4); char8 __ovld __cnfn select(char8, char8, uchar8); uchar8 __ovld __cnfn select(uchar8, uchar8, uchar8); char16 __ovld __cnfn select(char16, char16, uchar16); uchar16 __ovld __cnfn select(uchar16, uchar16, uchar16); short __ovld __cnfn select(short, short, ushort); ushort __ovld __cnfn select(ushort, ushort, ushort); short2 __ovld __cnfn select(short2, short2, ushort2); ushort2 __ovld __cnfn select(ushort2, ushort2, ushort2); short3 __ovld __cnfn select(short3, short3, ushort3); ushort3 __ovld __cnfn select(ushort3, ushort3, ushort3); short4 __ovld __cnfn select(short4, short4, ushort4); ushort4 __ovld __cnfn select(ushort4, ushort4, ushort4); short8 __ovld __cnfn select(short8, short8, ushort8); ushort8 __ovld __cnfn select(ushort8, ushort8, ushort8); short16 __ovld __cnfn select(short16, short16, ushort16); ushort16 __ovld __cnfn select(ushort16, ushort16, ushort16); int __ovld __cnfn select(int, int, uint); uint __ovld __cnfn select(uint, uint, uint); int2 __ovld __cnfn select(int2, int2, uint2); uint2 __ovld __cnfn select(uint2, uint2, uint2); int3 __ovld __cnfn select(int3, int3, uint3); uint3 __ovld __cnfn select(uint3, uint3, uint3); int4 __ovld __cnfn select(int4, int4, uint4); uint4 __ovld __cnfn select(uint4, uint4, uint4); int8 __ovld __cnfn select(int8, int8, uint8); uint8 __ovld __cnfn select(uint8, uint8, uint8); int16 __ovld __cnfn select(int16, int16, uint16); uint16 __ovld __cnfn select(uint16, uint16, uint16); float __ovld __cnfn select(float, float, uint); float2 __ovld __cnfn select(float2, float2, uint2); float3 __ovld __cnfn select(float3, float3, uint3); float4 __ovld __cnfn select(float4, float4, uint4); float8 __ovld __cnfn select(float8, float8, uint8); float16 __ovld __cnfn select(float16, float16, uint16); long __ovld __cnfn select(long, long, ulong); ulong __ovld __cnfn select(ulong, ulong, ulong); long2 __ovld __cnfn select(long2, long2, ulong2); ulong2 __ovld __cnfn select(ulong2, ulong2, ulong2); long3 __ovld __cnfn select(long3, long3, ulong3); ulong3 __ovld __cnfn select(ulong3, ulong3, ulong3); long4 __ovld __cnfn select(long4, long4, ulong4); ulong4 __ovld __cnfn select(ulong4, ulong4, ulong4); long8 __ovld __cnfn select(long8, long8, ulong8); ulong8 __ovld __cnfn select(ulong8, ulong8, ulong8); long16 __ovld __cnfn select(long16, long16, ulong16); ulong16 __ovld __cnfn select(ulong16, ulong16, ulong16); #ifdef cl_khr_fp64 double __ovld __cnfn select(double, double, long); double2 __ovld __cnfn select(double2, double2, long2); double3 __ovld __cnfn select(double3, double3, long3); double4 __ovld __cnfn select(double4, double4, long4); double8 __ovld __cnfn select(double8, double8, long8); double16 __ovld __cnfn select(double16, double16, long16); double __ovld __cnfn select(double, double, ulong); double2 __ovld __cnfn select(double2, double2, ulong2); double3 __ovld __cnfn select(double3, double3, ulong3); double4 __ovld __cnfn select(double4, double4, ulong4); double8 __ovld __cnfn select(double8, double8, ulong8); double16 __ovld __cnfn select(double16, double16, ulong16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn select(half, half, short); half2 __ovld __cnfn select(half2, half2, short2); half3 __ovld __cnfn select(half3, half3, short3); half4 __ovld __cnfn select(half4, half4, short4); half8 __ovld __cnfn select(half8, half8, short8); half16 __ovld __cnfn select(half16, half16, short16); half __ovld __cnfn select(half, half, ushort); half2 __ovld __cnfn select(half2, half2, ushort2); half3 __ovld __cnfn select(half3, half3, ushort3); half4 __ovld __cnfn select(half4, half4, ushort4); half8 __ovld __cnfn select(half8, half8, ushort8); half16 __ovld __cnfn select(half16, half16, ushort16); #endif //cl_khr_fp16 // OpenCL v1.1 s6.11.7, v1.2 s6.12.7, v2.0 s6.13.7 - Vector Data Load and Store Functions // OpenCL extensions v1.1 s9.6.6, v1.2 s9.5.6, v2.0 s9.4.6 - Vector Data Load and Store Functions for Half Type /** * Use generic type gentype to indicate the built-in data types * char, uchar, short, ushort, int, uint, long, ulong, float, * double or half. * * vloadn return sizeof (gentypen) bytes of data read from address (p + (offset * n)). * * vstoren write sizeof (gentypen) bytes given by data to address (p + (offset * n)). * * The address computed as (p + (offset * n)) must be * 8-bit aligned if gentype is char, uchar; * 16-bit aligned if gentype is short, ushort, half; * 32-bit aligned if gentype is int, uint, float; * 64-bit aligned if gentype is long, ulong, double. */ char2 __ovld __purefn vload2(size_t, const __constant char *); uchar2 __ovld __purefn vload2(size_t, const __constant uchar *); short2 __ovld __purefn vload2(size_t, const __constant short *); ushort2 __ovld __purefn vload2(size_t, const __constant ushort *); int2 __ovld __purefn vload2(size_t, const __constant int *); uint2 __ovld __purefn vload2(size_t, const __constant uint *); long2 __ovld __purefn vload2(size_t, const __constant long *); ulong2 __ovld __purefn vload2(size_t, const __constant ulong *); float2 __ovld __purefn vload2(size_t, const __constant float *); char3 __ovld __purefn vload3(size_t, const __constant char *); uchar3 __ovld __purefn vload3(size_t, const __constant uchar *); short3 __ovld __purefn vload3(size_t, const __constant short *); ushort3 __ovld __purefn vload3(size_t, const __constant ushort *); int3 __ovld __purefn vload3(size_t, const __constant int *); uint3 __ovld __purefn vload3(size_t, const __constant uint *); long3 __ovld __purefn vload3(size_t, const __constant long *); ulong3 __ovld __purefn vload3(size_t, const __constant ulong *); float3 __ovld __purefn vload3(size_t, const __constant float *); char4 __ovld __purefn vload4(size_t, const __constant char *); uchar4 __ovld __purefn vload4(size_t, const __constant uchar *); short4 __ovld __purefn vload4(size_t, const __constant short *); ushort4 __ovld __purefn vload4(size_t, const __constant ushort *); int4 __ovld __purefn vload4(size_t, const __constant int *); uint4 __ovld __purefn vload4(size_t, const __constant uint *); long4 __ovld __purefn vload4(size_t, const __constant long *); ulong4 __ovld __purefn vload4(size_t, const __constant ulong *); float4 __ovld __purefn vload4(size_t, const __constant float *); char8 __ovld __purefn vload8(size_t, const __constant char *); uchar8 __ovld __purefn vload8(size_t, const __constant uchar *); short8 __ovld __purefn vload8(size_t, const __constant short *); ushort8 __ovld __purefn vload8(size_t, const __constant ushort *); int8 __ovld __purefn vload8(size_t, const __constant int *); uint8 __ovld __purefn vload8(size_t, const __constant uint *); long8 __ovld __purefn vload8(size_t, const __constant long *); ulong8 __ovld __purefn vload8(size_t, const __constant ulong *); float8 __ovld __purefn vload8(size_t, const __constant float *); char16 __ovld __purefn vload16(size_t, const __constant char *); uchar16 __ovld __purefn vload16(size_t, const __constant uchar *); short16 __ovld __purefn vload16(size_t, const __constant short *); ushort16 __ovld __purefn vload16(size_t, const __constant ushort *); int16 __ovld __purefn vload16(size_t, const __constant int *); uint16 __ovld __purefn vload16(size_t, const __constant uint *); long16 __ovld __purefn vload16(size_t, const __constant long *); ulong16 __ovld __purefn vload16(size_t, const __constant ulong *); float16 __ovld __purefn vload16(size_t, const __constant float *); #ifdef cl_khr_fp64 double2 __ovld __purefn vload2(size_t, const __constant double *); double3 __ovld __purefn vload3(size_t, const __constant double *); double4 __ovld __purefn vload4(size_t, const __constant double *); double8 __ovld __purefn vload8(size_t, const __constant double *); double16 __ovld __purefn vload16(size_t, const __constant double *); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half2 __ovld __purefn vload2(size_t, const __constant half *); half3 __ovld __purefn vload3(size_t, const __constant half *); half4 __ovld __purefn vload4(size_t, const __constant half *); half8 __ovld __purefn vload8(size_t, const __constant half *); half16 __ovld __purefn vload16(size_t, const __constant half *); #endif //cl_khr_fp16 #if defined(__opencl_c_generic_address_space) char2 __ovld __purefn vload2(size_t, const char *); uchar2 __ovld __purefn vload2(size_t, const uchar *); short2 __ovld __purefn vload2(size_t, const short *); ushort2 __ovld __purefn vload2(size_t, const ushort *); int2 __ovld __purefn vload2(size_t, const int *); uint2 __ovld __purefn vload2(size_t, const uint *); long2 __ovld __purefn vload2(size_t, const long *); ulong2 __ovld __purefn vload2(size_t, const ulong *); float2 __ovld __purefn vload2(size_t, const float *); char3 __ovld __purefn vload3(size_t, const char *); uchar3 __ovld __purefn vload3(size_t, const uchar *); short3 __ovld __purefn vload3(size_t, const short *); ushort3 __ovld __purefn vload3(size_t, const ushort *); int3 __ovld __purefn vload3(size_t, const int *); uint3 __ovld __purefn vload3(size_t, const uint *); long3 __ovld __purefn vload3(size_t, const long *); ulong3 __ovld __purefn vload3(size_t, const ulong *); float3 __ovld __purefn vload3(size_t, const float *); char4 __ovld __purefn vload4(size_t, const char *); uchar4 __ovld __purefn vload4(size_t, const uchar *); short4 __ovld __purefn vload4(size_t, const short *); ushort4 __ovld __purefn vload4(size_t, const ushort *); int4 __ovld __purefn vload4(size_t, const int *); uint4 __ovld __purefn vload4(size_t, const uint *); long4 __ovld __purefn vload4(size_t, const long *); ulong4 __ovld __purefn vload4(size_t, const ulong *); float4 __ovld __purefn vload4(size_t, const float *); char8 __ovld __purefn vload8(size_t, const char *); uchar8 __ovld __purefn vload8(size_t, const uchar *); short8 __ovld __purefn vload8(size_t, const short *); ushort8 __ovld __purefn vload8(size_t, const ushort *); int8 __ovld __purefn vload8(size_t, const int *); uint8 __ovld __purefn vload8(size_t, const uint *); long8 __ovld __purefn vload8(size_t, const long *); ulong8 __ovld __purefn vload8(size_t, const ulong *); float8 __ovld __purefn vload8(size_t, const float *); char16 __ovld __purefn vload16(size_t, const char *); uchar16 __ovld __purefn vload16(size_t, const uchar *); short16 __ovld __purefn vload16(size_t, const short *); ushort16 __ovld __purefn vload16(size_t, const ushort *); int16 __ovld __purefn vload16(size_t, const int *); uint16 __ovld __purefn vload16(size_t, const uint *); long16 __ovld __purefn vload16(size_t, const long *); ulong16 __ovld __purefn vload16(size_t, const ulong *); float16 __ovld __purefn vload16(size_t, const float *); #ifdef cl_khr_fp64 double2 __ovld __purefn vload2(size_t, const double *); double3 __ovld __purefn vload3(size_t, const double *); double4 __ovld __purefn vload4(size_t, const double *); double8 __ovld __purefn vload8(size_t, const double *); double16 __ovld __purefn vload16(size_t, const double *); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half2 __ovld __purefn vload2(size_t, const half *); half3 __ovld __purefn vload3(size_t, const half *); half4 __ovld __purefn vload4(size_t, const half *); half8 __ovld __purefn vload8(size_t, const half *); half16 __ovld __purefn vload16(size_t, const half *); #endif //cl_khr_fp16 #endif //defined(__opencl_c_generic_address_space) #if defined(__opencl_c_named_address_space_builtins) char2 __ovld __purefn vload2(size_t, const __global char *); uchar2 __ovld __purefn vload2(size_t, const __global uchar *); short2 __ovld __purefn vload2(size_t, const __global short *); ushort2 __ovld __purefn vload2(size_t, const __global ushort *); int2 __ovld __purefn vload2(size_t, const __global int *); uint2 __ovld __purefn vload2(size_t, const __global uint *); long2 __ovld __purefn vload2(size_t, const __global long *); ulong2 __ovld __purefn vload2(size_t, const __global ulong *); float2 __ovld __purefn vload2(size_t, const __global float *); char3 __ovld __purefn vload3(size_t, const __global char *); uchar3 __ovld __purefn vload3(size_t, const __global uchar *); short3 __ovld __purefn vload3(size_t, const __global short *); ushort3 __ovld __purefn vload3(size_t, const __global ushort *); int3 __ovld __purefn vload3(size_t, const __global int *); uint3 __ovld __purefn vload3(size_t, const __global uint *); long3 __ovld __purefn vload3(size_t, const __global long *); ulong3 __ovld __purefn vload3(size_t, const __global ulong *); float3 __ovld __purefn vload3(size_t, const __global float *); char4 __ovld __purefn vload4(size_t, const __global char *); uchar4 __ovld __purefn vload4(size_t, const __global uchar *); short4 __ovld __purefn vload4(size_t, const __global short *); ushort4 __ovld __purefn vload4(size_t, const __global ushort *); int4 __ovld __purefn vload4(size_t, const __global int *); uint4 __ovld __purefn vload4(size_t, const __global uint *); long4 __ovld __purefn vload4(size_t, const __global long *); ulong4 __ovld __purefn vload4(size_t, const __global ulong *); float4 __ovld __purefn vload4(size_t, const __global float *); char8 __ovld __purefn vload8(size_t, const __global char *); uchar8 __ovld __purefn vload8(size_t, const __global uchar *); short8 __ovld __purefn vload8(size_t, const __global short *); ushort8 __ovld __purefn vload8(size_t, const __global ushort *); int8 __ovld __purefn vload8(size_t, const __global int *); uint8 __ovld __purefn vload8(size_t, const __global uint *); long8 __ovld __purefn vload8(size_t, const __global long *); ulong8 __ovld __purefn vload8(size_t, const __global ulong *); float8 __ovld __purefn vload8(size_t, const __global float *); char16 __ovld __purefn vload16(size_t, const __global char *); uchar16 __ovld __purefn vload16(size_t, const __global uchar *); short16 __ovld __purefn vload16(size_t, const __global short *); ushort16 __ovld __purefn vload16(size_t, const __global ushort *); int16 __ovld __purefn vload16(size_t, const __global int *); uint16 __ovld __purefn vload16(size_t, const __global uint *); long16 __ovld __purefn vload16(size_t, const __global long *); ulong16 __ovld __purefn vload16(size_t, const __global ulong *); float16 __ovld __purefn vload16(size_t, const __global float *); char2 __ovld __purefn vload2(size_t, const __local char *); uchar2 __ovld __purefn vload2(size_t, const __local uchar *); short2 __ovld __purefn vload2(size_t, const __local short *); ushort2 __ovld __purefn vload2(size_t, const __local ushort *); int2 __ovld __purefn vload2(size_t, const __local int *); uint2 __ovld __purefn vload2(size_t, const __local uint *); long2 __ovld __purefn vload2(size_t, const __local long *); ulong2 __ovld __purefn vload2(size_t, const __local ulong *); float2 __ovld __purefn vload2(size_t, const __local float *); char3 __ovld __purefn vload3(size_t, const __local char *); uchar3 __ovld __purefn vload3(size_t, const __local uchar *); short3 __ovld __purefn vload3(size_t, const __local short *); ushort3 __ovld __purefn vload3(size_t, const __local ushort *); int3 __ovld __purefn vload3(size_t, const __local int *); uint3 __ovld __purefn vload3(size_t, const __local uint *); long3 __ovld __purefn vload3(size_t, const __local long *); ulong3 __ovld __purefn vload3(size_t, const __local ulong *); float3 __ovld __purefn vload3(size_t, const __local float *); char4 __ovld __purefn vload4(size_t, const __local char *); uchar4 __ovld __purefn vload4(size_t, const __local uchar *); short4 __ovld __purefn vload4(size_t, const __local short *); ushort4 __ovld __purefn vload4(size_t, const __local ushort *); int4 __ovld __purefn vload4(size_t, const __local int *); uint4 __ovld __purefn vload4(size_t, const __local uint *); long4 __ovld __purefn vload4(size_t, const __local long *); ulong4 __ovld __purefn vload4(size_t, const __local ulong *); float4 __ovld __purefn vload4(size_t, const __local float *); char8 __ovld __purefn vload8(size_t, const __local char *); uchar8 __ovld __purefn vload8(size_t, const __local uchar *); short8 __ovld __purefn vload8(size_t, const __local short *); ushort8 __ovld __purefn vload8(size_t, const __local ushort *); int8 __ovld __purefn vload8(size_t, const __local int *); uint8 __ovld __purefn vload8(size_t, const __local uint *); long8 __ovld __purefn vload8(size_t, const __local long *); ulong8 __ovld __purefn vload8(size_t, const __local ulong *); float8 __ovld __purefn vload8(size_t, const __local float *); char16 __ovld __purefn vload16(size_t, const __local char *); uchar16 __ovld __purefn vload16(size_t, const __local uchar *); short16 __ovld __purefn vload16(size_t, const __local short *); ushort16 __ovld __purefn vload16(size_t, const __local ushort *); int16 __ovld __purefn vload16(size_t, const __local int *); uint16 __ovld __purefn vload16(size_t, const __local uint *); long16 __ovld __purefn vload16(size_t, const __local long *); ulong16 __ovld __purefn vload16(size_t, const __local ulong *); float16 __ovld __purefn vload16(size_t, const __local float *); char2 __ovld __purefn vload2(size_t, const __private char *); uchar2 __ovld __purefn vload2(size_t, const __private uchar *); short2 __ovld __purefn vload2(size_t, const __private short *); ushort2 __ovld __purefn vload2(size_t, const __private ushort *); int2 __ovld __purefn vload2(size_t, const __private int *); uint2 __ovld __purefn vload2(size_t, const __private uint *); long2 __ovld __purefn vload2(size_t, const __private long *); ulong2 __ovld __purefn vload2(size_t, const __private ulong *); float2 __ovld __purefn vload2(size_t, const __private float *); char3 __ovld __purefn vload3(size_t, const __private char *); uchar3 __ovld __purefn vload3(size_t, const __private uchar *); short3 __ovld __purefn vload3(size_t, const __private short *); ushort3 __ovld __purefn vload3(size_t, const __private ushort *); int3 __ovld __purefn vload3(size_t, const __private int *); uint3 __ovld __purefn vload3(size_t, const __private uint *); long3 __ovld __purefn vload3(size_t, const __private long *); ulong3 __ovld __purefn vload3(size_t, const __private ulong *); float3 __ovld __purefn vload3(size_t, const __private float *); char4 __ovld __purefn vload4(size_t, const __private char *); uchar4 __ovld __purefn vload4(size_t, const __private uchar *); short4 __ovld __purefn vload4(size_t, const __private short *); ushort4 __ovld __purefn vload4(size_t, const __private ushort *); int4 __ovld __purefn vload4(size_t, const __private int *); uint4 __ovld __purefn vload4(size_t, const __private uint *); long4 __ovld __purefn vload4(size_t, const __private long *); ulong4 __ovld __purefn vload4(size_t, const __private ulong *); float4 __ovld __purefn vload4(size_t, const __private float *); char8 __ovld __purefn vload8(size_t, const __private char *); uchar8 __ovld __purefn vload8(size_t, const __private uchar *); short8 __ovld __purefn vload8(size_t, const __private short *); ushort8 __ovld __purefn vload8(size_t, const __private ushort *); int8 __ovld __purefn vload8(size_t, const __private int *); uint8 __ovld __purefn vload8(size_t, const __private uint *); long8 __ovld __purefn vload8(size_t, const __private long *); ulong8 __ovld __purefn vload8(size_t, const __private ulong *); float8 __ovld __purefn vload8(size_t, const __private float *); char16 __ovld __purefn vload16(size_t, const __private char *); uchar16 __ovld __purefn vload16(size_t, const __private uchar *); short16 __ovld __purefn vload16(size_t, const __private short *); ushort16 __ovld __purefn vload16(size_t, const __private ushort *); int16 __ovld __purefn vload16(size_t, const __private int *); uint16 __ovld __purefn vload16(size_t, const __private uint *); long16 __ovld __purefn vload16(size_t, const __private long *); ulong16 __ovld __purefn vload16(size_t, const __private ulong *); float16 __ovld __purefn vload16(size_t, const __private float *); #ifdef cl_khr_fp64 double2 __ovld __purefn vload2(size_t, const __global double *); double3 __ovld __purefn vload3(size_t, const __global double *); double4 __ovld __purefn vload4(size_t, const __global double *); double8 __ovld __purefn vload8(size_t, const __global double *); double16 __ovld __purefn vload16(size_t, const __global double *); double2 __ovld __purefn vload2(size_t, const __local double *); double3 __ovld __purefn vload3(size_t, const __local double *); double4 __ovld __purefn vload4(size_t, const __local double *); double8 __ovld __purefn vload8(size_t, const __local double *); double16 __ovld __purefn vload16(size_t, const __local double *); double2 __ovld __purefn vload2(size_t, const __private double *); double3 __ovld __purefn vload3(size_t, const __private double *); double4 __ovld __purefn vload4(size_t, const __private double *); double8 __ovld __purefn vload8(size_t, const __private double *); double16 __ovld __purefn vload16(size_t, const __private double *); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half2 __ovld __purefn vload2(size_t, const __global half *); half3 __ovld __purefn vload3(size_t, const __global half *); half4 __ovld __purefn vload4(size_t, const __global half *); half8 __ovld __purefn vload8(size_t, const __global half *); half16 __ovld __purefn vload16(size_t, const __global half *); half2 __ovld __purefn vload2(size_t, const __local half *); half3 __ovld __purefn vload3(size_t, const __local half *); half4 __ovld __purefn vload4(size_t, const __local half *); half8 __ovld __purefn vload8(size_t, const __local half *); half16 __ovld __purefn vload16(size_t, const __local half *); half2 __ovld __purefn vload2(size_t, const __private half *); half3 __ovld __purefn vload3(size_t, const __private half *); half4 __ovld __purefn vload4(size_t, const __private half *); half8 __ovld __purefn vload8(size_t, const __private half *); half16 __ovld __purefn vload16(size_t, const __private half *); #endif //cl_khr_fp16 #endif //defined(__opencl_c_named_address_space_builtins) #if defined(__opencl_c_generic_address_space) void __ovld vstore2(char2, size_t, char *); void __ovld vstore2(uchar2, size_t, uchar *); void __ovld vstore2(short2, size_t, short *); void __ovld vstore2(ushort2, size_t, ushort *); void __ovld vstore2(int2, size_t, int *); void __ovld vstore2(uint2, size_t, uint *); void __ovld vstore2(long2, size_t, long *); void __ovld vstore2(ulong2, size_t, ulong *); void __ovld vstore2(float2, size_t, float *); void __ovld vstore3(char3, size_t, char *); void __ovld vstore3(uchar3, size_t, uchar *); void __ovld vstore3(short3, size_t, short *); void __ovld vstore3(ushort3, size_t, ushort *); void __ovld vstore3(int3, size_t, int *); void __ovld vstore3(uint3, size_t, uint *); void __ovld vstore3(long3, size_t, long *); void __ovld vstore3(ulong3, size_t, ulong *); void __ovld vstore3(float3, size_t, float *); void __ovld vstore4(char4, size_t, char *); void __ovld vstore4(uchar4, size_t, uchar *); void __ovld vstore4(short4, size_t, short *); void __ovld vstore4(ushort4, size_t, ushort *); void __ovld vstore4(int4, size_t, int *); void __ovld vstore4(uint4, size_t, uint *); void __ovld vstore4(long4, size_t, long *); void __ovld vstore4(ulong4, size_t, ulong *); void __ovld vstore4(float4, size_t, float *); void __ovld vstore8(char8, size_t, char *); void __ovld vstore8(uchar8, size_t, uchar *); void __ovld vstore8(short8, size_t, short *); void __ovld vstore8(ushort8, size_t, ushort *); void __ovld vstore8(int8, size_t, int *); void __ovld vstore8(uint8, size_t, uint *); void __ovld vstore8(long8, size_t, long *); void __ovld vstore8(ulong8, size_t, ulong *); void __ovld vstore8(float8, size_t, float *); void __ovld vstore16(char16, size_t, char *); void __ovld vstore16(uchar16, size_t, uchar *); void __ovld vstore16(short16, size_t, short *); void __ovld vstore16(ushort16, size_t, ushort *); void __ovld vstore16(int16, size_t, int *); void __ovld vstore16(uint16, size_t, uint *); void __ovld vstore16(long16, size_t, long *); void __ovld vstore16(ulong16, size_t, ulong *); void __ovld vstore16(float16, size_t, float *); #ifdef cl_khr_fp64 void __ovld vstore2(double2, size_t, double *); void __ovld vstore3(double3, size_t, double *); void __ovld vstore4(double4, size_t, double *); void __ovld vstore8(double8, size_t, double *); void __ovld vstore16(double16, size_t, double *); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 void __ovld vstore2(half2, size_t, half *); void __ovld vstore3(half3, size_t, half *); void __ovld vstore4(half4, size_t, half *); void __ovld vstore8(half8, size_t, half *); void __ovld vstore16(half16, size_t, half *); #endif //cl_khr_fp16 #endif //defined(__opencl_c_generic_address_space) #if defined(__opencl_c_named_address_space_builtins) void __ovld vstore2(char2, size_t, __global char *); void __ovld vstore2(uchar2, size_t, __global uchar *); void __ovld vstore2(short2, size_t, __global short *); void __ovld vstore2(ushort2, size_t, __global ushort *); void __ovld vstore2(int2, size_t, __global int *); void __ovld vstore2(uint2, size_t, __global uint *); void __ovld vstore2(long2, size_t, __global long *); void __ovld vstore2(ulong2, size_t, __global ulong *); void __ovld vstore2(float2, size_t, __global float *); void __ovld vstore3(char3, size_t, __global char *); void __ovld vstore3(uchar3, size_t, __global uchar *); void __ovld vstore3(short3, size_t, __global short *); void __ovld vstore3(ushort3, size_t, __global ushort *); void __ovld vstore3(int3, size_t, __global int *); void __ovld vstore3(uint3, size_t, __global uint *); void __ovld vstore3(long3, size_t, __global long *); void __ovld vstore3(ulong3, size_t, __global ulong *); void __ovld vstore3(float3, size_t, __global float *); void __ovld vstore4(char4, size_t, __global char *); void __ovld vstore4(uchar4, size_t, __global uchar *); void __ovld vstore4(short4, size_t, __global short *); void __ovld vstore4(ushort4, size_t, __global ushort *); void __ovld vstore4(int4, size_t, __global int *); void __ovld vstore4(uint4, size_t, __global uint *); void __ovld vstore4(long4, size_t, __global long *); void __ovld vstore4(ulong4, size_t, __global ulong *); void __ovld vstore4(float4, size_t, __global float *); void __ovld vstore8(char8, size_t, __global char *); void __ovld vstore8(uchar8, size_t, __global uchar *); void __ovld vstore8(short8, size_t, __global short *); void __ovld vstore8(ushort8, size_t, __global ushort *); void __ovld vstore8(int8, size_t, __global int *); void __ovld vstore8(uint8, size_t, __global uint *); void __ovld vstore8(long8, size_t, __global long *); void __ovld vstore8(ulong8, size_t, __global ulong *); void __ovld vstore8(float8, size_t, __global float *); void __ovld vstore16(char16, size_t, __global char *); void __ovld vstore16(uchar16, size_t, __global uchar *); void __ovld vstore16(short16, size_t, __global short *); void __ovld vstore16(ushort16, size_t, __global ushort *); void __ovld vstore16(int16, size_t, __global int *); void __ovld vstore16(uint16, size_t, __global uint *); void __ovld vstore16(long16, size_t, __global long *); void __ovld vstore16(ulong16, size_t, __global ulong *); void __ovld vstore16(float16, size_t, __global float *); void __ovld vstore2(char2, size_t, __local char *); void __ovld vstore2(uchar2, size_t, __local uchar *); void __ovld vstore2(short2, size_t, __local short *); void __ovld vstore2(ushort2, size_t, __local ushort *); void __ovld vstore2(int2, size_t, __local int *); void __ovld vstore2(uint2, size_t, __local uint *); void __ovld vstore2(long2, size_t, __local long *); void __ovld vstore2(ulong2, size_t, __local ulong *); void __ovld vstore2(float2, size_t, __local float *); void __ovld vstore3(char3, size_t, __local char *); void __ovld vstore3(uchar3, size_t, __local uchar *); void __ovld vstore3(short3, size_t, __local short *); void __ovld vstore3(ushort3, size_t, __local ushort *); void __ovld vstore3(int3, size_t, __local int *); void __ovld vstore3(uint3, size_t, __local uint *); void __ovld vstore3(long3, size_t, __local long *); void __ovld vstore3(ulong3, size_t, __local ulong *); void __ovld vstore3(float3, size_t, __local float *); void __ovld vstore4(char4, size_t, __local char *); void __ovld vstore4(uchar4, size_t, __local uchar *); void __ovld vstore4(short4, size_t, __local short *); void __ovld vstore4(ushort4, size_t, __local ushort *); void __ovld vstore4(int4, size_t, __local int *); void __ovld vstore4(uint4, size_t, __local uint *); void __ovld vstore4(long4, size_t, __local long *); void __ovld vstore4(ulong4, size_t, __local ulong *); void __ovld vstore4(float4, size_t, __local float *); void __ovld vstore8(char8, size_t, __local char *); void __ovld vstore8(uchar8, size_t, __local uchar *); void __ovld vstore8(short8, size_t, __local short *); void __ovld vstore8(ushort8, size_t, __local ushort *); void __ovld vstore8(int8, size_t, __local int *); void __ovld vstore8(uint8, size_t, __local uint *); void __ovld vstore8(long8, size_t, __local long *); void __ovld vstore8(ulong8, size_t, __local ulong *); void __ovld vstore8(float8, size_t, __local float *); void __ovld vstore16(char16, size_t, __local char *); void __ovld vstore16(uchar16, size_t, __local uchar *); void __ovld vstore16(short16, size_t, __local short *); void __ovld vstore16(ushort16, size_t, __local ushort *); void __ovld vstore16(int16, size_t, __local int *); void __ovld vstore16(uint16, size_t, __local uint *); void __ovld vstore16(long16, size_t, __local long *); void __ovld vstore16(ulong16, size_t, __local ulong *); void __ovld vstore16(float16, size_t, __local float *); void __ovld vstore2(char2, size_t, __private char *); void __ovld vstore2(uchar2, size_t, __private uchar *); void __ovld vstore2(short2, size_t, __private short *); void __ovld vstore2(ushort2, size_t, __private ushort *); void __ovld vstore2(int2, size_t, __private int *); void __ovld vstore2(uint2, size_t, __private uint *); void __ovld vstore2(long2, size_t, __private long *); void __ovld vstore2(ulong2, size_t, __private ulong *); void __ovld vstore2(float2, size_t, __private float *); void __ovld vstore3(char3, size_t, __private char *); void __ovld vstore3(uchar3, size_t, __private uchar *); void __ovld vstore3(short3, size_t, __private short *); void __ovld vstore3(ushort3, size_t, __private ushort *); void __ovld vstore3(int3, size_t, __private int *); void __ovld vstore3(uint3, size_t, __private uint *); void __ovld vstore3(long3, size_t, __private long *); void __ovld vstore3(ulong3, size_t, __private ulong *); void __ovld vstore3(float3, size_t, __private float *); void __ovld vstore4(char4, size_t, __private char *); void __ovld vstore4(uchar4, size_t, __private uchar *); void __ovld vstore4(short4, size_t, __private short *); void __ovld vstore4(ushort4, size_t, __private ushort *); void __ovld vstore4(int4, size_t, __private int *); void __ovld vstore4(uint4, size_t, __private uint *); void __ovld vstore4(long4, size_t, __private long *); void __ovld vstore4(ulong4, size_t, __private ulong *); void __ovld vstore4(float4, size_t, __private float *); void __ovld vstore8(char8, size_t, __private char *); void __ovld vstore8(uchar8, size_t, __private uchar *); void __ovld vstore8(short8, size_t, __private short *); void __ovld vstore8(ushort8, size_t, __private ushort *); void __ovld vstore8(int8, size_t, __private int *); void __ovld vstore8(uint8, size_t, __private uint *); void __ovld vstore8(long8, size_t, __private long *); void __ovld vstore8(ulong8, size_t, __private ulong *); void __ovld vstore8(float8, size_t, __private float *); void __ovld vstore16(char16, size_t, __private char *); void __ovld vstore16(uchar16, size_t, __private uchar *); void __ovld vstore16(short16, size_t, __private short *); void __ovld vstore16(ushort16, size_t, __private ushort *); void __ovld vstore16(int16, size_t, __private int *); void __ovld vstore16(uint16, size_t, __private uint *); void __ovld vstore16(long16, size_t, __private long *); void __ovld vstore16(ulong16, size_t, __private ulong *); void __ovld vstore16(float16, size_t, __private float *); #ifdef cl_khr_fp64 void __ovld vstore2(double2, size_t, __global double *); void __ovld vstore3(double3, size_t, __global double *); void __ovld vstore4(double4, size_t, __global double *); void __ovld vstore8(double8, size_t, __global double *); void __ovld vstore16(double16, size_t, __global double *); void __ovld vstore2(double2, size_t, __local double *); void __ovld vstore3(double3, size_t, __local double *); void __ovld vstore4(double4, size_t, __local double *); void __ovld vstore8(double8, size_t, __local double *); void __ovld vstore16(double16, size_t, __local double *); void __ovld vstore2(double2, size_t, __private double *); void __ovld vstore3(double3, size_t, __private double *); void __ovld vstore4(double4, size_t, __private double *); void __ovld vstore8(double8, size_t, __private double *); void __ovld vstore16(double16, size_t, __private double *); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 void __ovld vstore2(half2, size_t, __global half *); void __ovld vstore3(half3, size_t, __global half *); void __ovld vstore4(half4, size_t, __global half *); void __ovld vstore8(half8, size_t, __global half *); void __ovld vstore16(half16, size_t, __global half *); void __ovld vstore2(half2, size_t, __local half *); void __ovld vstore3(half3, size_t, __local half *); void __ovld vstore4(half4, size_t, __local half *); void __ovld vstore8(half8, size_t, __local half *); void __ovld vstore16(half16, size_t, __local half *); void __ovld vstore2(half2, size_t, __private half *); void __ovld vstore3(half3, size_t, __private half *); void __ovld vstore4(half4, size_t, __private half *); void __ovld vstore8(half8, size_t, __private half *); void __ovld vstore16(half16, size_t, __private half *); #endif //cl_khr_fp16 #endif //defined(__opencl_c_named_address_space_builtins) /** * Read sizeof (half) bytes of data from address * (p + offset). The data read is interpreted as a * half value. The half value is converted to a * float value and the float value is returned. * The read address computed as (p + offset) * must be 16-bit aligned. */ float __ovld __purefn vload_half(size_t, const __constant half *); #if defined(__opencl_c_generic_address_space) float __ovld __purefn vload_half(size_t, const half *); #endif //defined(__opencl_c_generic_address_space) #if defined(__opencl_c_named_address_space_builtins) float __ovld __purefn vload_half(size_t, const __global half *); float __ovld __purefn vload_half(size_t, const __local half *); float __ovld __purefn vload_half(size_t, const __private half *); #endif //defined(__opencl_c_named_address_space_builtins) /** * Read sizeof (halfn) bytes of data from address * (p + (offset * n)). The data read is interpreted * as a halfn value. The halfn value read is * converted to a floatn value and the floatn * value is returned. The read address computed * as (p + (offset * n)) must be 16-bit aligned. */ float2 __ovld __purefn vload_half2(size_t, const __constant half *); float3 __ovld __purefn vload_half3(size_t, const __constant half *); float4 __ovld __purefn vload_half4(size_t, const __constant half *); float8 __ovld __purefn vload_half8(size_t, const __constant half *); float16 __ovld __purefn vload_half16(size_t, const __constant half *); #if defined(__opencl_c_generic_address_space) float2 __ovld __purefn vload_half2(size_t, const half *); float3 __ovld __purefn vload_half3(size_t, const half *); float4 __ovld __purefn vload_half4(size_t, const half *); float8 __ovld __purefn vload_half8(size_t, const half *); float16 __ovld __purefn vload_half16(size_t, const half *); #endif //defined(__opencl_c_generic_address_space) #if defined(__opencl_c_named_address_space_builtins) float2 __ovld __purefn vload_half2(size_t, const __global half *); float3 __ovld __purefn vload_half3(size_t, const __global half *); float4 __ovld __purefn vload_half4(size_t, const __global half *); float8 __ovld __purefn vload_half8(size_t, const __global half *); float16 __ovld __purefn vload_half16(size_t, const __global half *); float2 __ovld __purefn vload_half2(size_t, const __local half *); float3 __ovld __purefn vload_half3(size_t, const __local half *); float4 __ovld __purefn vload_half4(size_t, const __local half *); float8 __ovld __purefn vload_half8(size_t, const __local half *); float16 __ovld __purefn vload_half16(size_t, const __local half *); float2 __ovld __purefn vload_half2(size_t, const __private half *); float3 __ovld __purefn vload_half3(size_t, const __private half *); float4 __ovld __purefn vload_half4(size_t, const __private half *); float8 __ovld __purefn vload_half8(size_t, const __private half *); float16 __ovld __purefn vload_half16(size_t, const __private half *); #endif //defined(__opencl_c_named_address_space_builtins) /** * The float value given by data is first * converted to a half value using the appropriate * rounding mode. The half value is then written * to address computed as (p + offset). The * address computed as (p + offset) must be 16- * bit aligned. * vstore_half use the current rounding mode. * The default current rounding mode is round to * nearest even. */ #if defined(__opencl_c_generic_address_space) void __ovld vstore_half(float, size_t, half *); void __ovld vstore_half_rte(float, size_t, half *); void __ovld vstore_half_rtz(float, size_t, half *); void __ovld vstore_half_rtp(float, size_t, half *); void __ovld vstore_half_rtn(float, size_t, half *); #ifdef cl_khr_fp64 void __ovld vstore_half(double, size_t, half *); void __ovld vstore_half_rte(double, size_t, half *); void __ovld vstore_half_rtz(double, size_t, half *); void __ovld vstore_half_rtp(double, size_t, half *); void __ovld vstore_half_rtn(double, size_t, half *); #endif //cl_khr_fp64 #endif //defined(__opencl_c_generic_address_space) #if defined(__opencl_c_named_address_space_builtins) void __ovld vstore_half(float, size_t, __global half *); void __ovld vstore_half_rte(float, size_t, __global half *); void __ovld vstore_half_rtz(float, size_t, __global half *); void __ovld vstore_half_rtp(float, size_t, __global half *); void __ovld vstore_half_rtn(float, size_t, __global half *); void __ovld vstore_half(float, size_t, __local half *); void __ovld vstore_half_rte(float, size_t, __local half *); void __ovld vstore_half_rtz(float, size_t, __local half *); void __ovld vstore_half_rtp(float, size_t, __local half *); void __ovld vstore_half_rtn(float, size_t, __local half *); void __ovld vstore_half(float, size_t, __private half *); void __ovld vstore_half_rte(float, size_t, __private half *); void __ovld vstore_half_rtz(float, size_t, __private half *); void __ovld vstore_half_rtp(float, size_t, __private half *); void __ovld vstore_half_rtn(float, size_t, __private half *); #ifdef cl_khr_fp64 void __ovld vstore_half(double, size_t, __global half *); void __ovld vstore_half_rte(double, size_t, __global half *); void __ovld vstore_half_rtz(double, size_t, __global half *); void __ovld vstore_half_rtp(double, size_t, __global half *); void __ovld vstore_half_rtn(double, size_t, __global half *); void __ovld vstore_half(double, size_t, __local half *); void __ovld vstore_half_rte(double, size_t, __local half *); void __ovld vstore_half_rtz(double, size_t, __local half *); void __ovld vstore_half_rtp(double, size_t, __local half *); void __ovld vstore_half_rtn(double, size_t, __local half *); void __ovld vstore_half(double, size_t, __private half *); void __ovld vstore_half_rte(double, size_t, __private half *); void __ovld vstore_half_rtz(double, size_t, __private half *); void __ovld vstore_half_rtp(double, size_t, __private half *); void __ovld vstore_half_rtn(double, size_t, __private half *); #endif //cl_khr_fp64 #endif //defined(__opencl_c_named_address_space_builtins) /** * The floatn value given by data is converted to * a halfn value using the appropriate rounding * mode. The halfn value is then written to * address computed as (p + (offset * n)). The * address computed as (p + (offset * n)) must be * 16-bit aligned. * vstore_halfn uses the current rounding mode. * The default current rounding mode is round to * nearest even. */ #if defined(__opencl_c_generic_address_space) void __ovld vstore_half2(float2, size_t, half *); void __ovld vstore_half3(float3, size_t, half *); void __ovld vstore_half4(float4, size_t, half *); void __ovld vstore_half8(float8, size_t, half *); void __ovld vstore_half16(float16, size_t, half *); void __ovld vstore_half2_rte(float2, size_t, half *); void __ovld vstore_half3_rte(float3, size_t, half *); void __ovld vstore_half4_rte(float4, size_t, half *); void __ovld vstore_half8_rte(float8, size_t, half *); void __ovld vstore_half16_rte(float16, size_t, half *); void __ovld vstore_half2_rtz(float2, size_t, half *); void __ovld vstore_half3_rtz(float3, size_t, half *); void __ovld vstore_half4_rtz(float4, size_t, half *); void __ovld vstore_half8_rtz(float8, size_t, half *); void __ovld vstore_half16_rtz(float16, size_t, half *); void __ovld vstore_half2_rtp(float2, size_t, half *); void __ovld vstore_half3_rtp(float3, size_t, half *); void __ovld vstore_half4_rtp(float4, size_t, half *); void __ovld vstore_half8_rtp(float8, size_t, half *); void __ovld vstore_half16_rtp(float16, size_t, half *); void __ovld vstore_half2_rtn(float2, size_t, half *); void __ovld vstore_half3_rtn(float3, size_t, half *); void __ovld vstore_half4_rtn(float4, size_t, half *); void __ovld vstore_half8_rtn(float8, size_t, half *); void __ovld vstore_half16_rtn(float16, size_t, half *); #ifdef cl_khr_fp64 void __ovld vstore_half2(double2, size_t, half *); void __ovld vstore_half3(double3, size_t, half *); void __ovld vstore_half4(double4, size_t, half *); void __ovld vstore_half8(double8, size_t, half *); void __ovld vstore_half16(double16, size_t, half *); void __ovld vstore_half2_rte(double2, size_t, half *); void __ovld vstore_half3_rte(double3, size_t, half *); void __ovld vstore_half4_rte(double4, size_t, half *); void __ovld vstore_half8_rte(double8, size_t, half *); void __ovld vstore_half16_rte(double16, size_t, half *); void __ovld vstore_half2_rtz(double2, size_t, half *); void __ovld vstore_half3_rtz(double3, size_t, half *); void __ovld vstore_half4_rtz(double4, size_t, half *); void __ovld vstore_half8_rtz(double8, size_t, half *); void __ovld vstore_half16_rtz(double16, size_t, half *); void __ovld vstore_half2_rtp(double2, size_t, half *); void __ovld vstore_half3_rtp(double3, size_t, half *); void __ovld vstore_half4_rtp(double4, size_t, half *); void __ovld vstore_half8_rtp(double8, size_t, half *); void __ovld vstore_half16_rtp(double16, size_t, half *); void __ovld vstore_half2_rtn(double2, size_t, half *); void __ovld vstore_half3_rtn(double3, size_t, half *); void __ovld vstore_half4_rtn(double4, size_t, half *); void __ovld vstore_half8_rtn(double8, size_t, half *); void __ovld vstore_half16_rtn(double16, size_t, half *); #endif //cl_khr_fp64 #endif //defined(__opencl_c_generic_address_space) #if defined(__opencl_c_named_address_space_builtins) void __ovld vstore_half2(float2, size_t, __global half *); void __ovld vstore_half3(float3, size_t, __global half *); void __ovld vstore_half4(float4, size_t, __global half *); void __ovld vstore_half8(float8, size_t, __global half *); void __ovld vstore_half16(float16, size_t, __global half *); void __ovld vstore_half2_rte(float2, size_t, __global half *); void __ovld vstore_half3_rte(float3, size_t, __global half *); void __ovld vstore_half4_rte(float4, size_t, __global half *); void __ovld vstore_half8_rte(float8, size_t, __global half *); void __ovld vstore_half16_rte(float16, size_t, __global half *); void __ovld vstore_half2_rtz(float2, size_t, __global half *); void __ovld vstore_half3_rtz(float3, size_t, __global half *); void __ovld vstore_half4_rtz(float4, size_t, __global half *); void __ovld vstore_half8_rtz(float8, size_t, __global half *); void __ovld vstore_half16_rtz(float16, size_t, __global half *); void __ovld vstore_half2_rtp(float2, size_t, __global half *); void __ovld vstore_half3_rtp(float3, size_t, __global half *); void __ovld vstore_half4_rtp(float4, size_t, __global half *); void __ovld vstore_half8_rtp(float8, size_t, __global half *); void __ovld vstore_half16_rtp(float16, size_t, __global half *); void __ovld vstore_half2_rtn(float2, size_t, __global half *); void __ovld vstore_half3_rtn(float3, size_t, __global half *); void __ovld vstore_half4_rtn(float4, size_t, __global half *); void __ovld vstore_half8_rtn(float8, size_t, __global half *); void __ovld vstore_half16_rtn(float16, size_t, __global half *); void __ovld vstore_half2(float2, size_t, __local half *); void __ovld vstore_half3(float3, size_t, __local half *); void __ovld vstore_half4(float4, size_t, __local half *); void __ovld vstore_half8(float8, size_t, __local half *); void __ovld vstore_half16(float16, size_t, __local half *); void __ovld vstore_half2_rte(float2, size_t, __local half *); void __ovld vstore_half3_rte(float3, size_t, __local half *); void __ovld vstore_half4_rte(float4, size_t, __local half *); void __ovld vstore_half8_rte(float8, size_t, __local half *); void __ovld vstore_half16_rte(float16, size_t, __local half *); void __ovld vstore_half2_rtz(float2, size_t, __local half *); void __ovld vstore_half3_rtz(float3, size_t, __local half *); void __ovld vstore_half4_rtz(float4, size_t, __local half *); void __ovld vstore_half8_rtz(float8, size_t, __local half *); void __ovld vstore_half16_rtz(float16, size_t, __local half *); void __ovld vstore_half2_rtp(float2, size_t, __local half *); void __ovld vstore_half3_rtp(float3, size_t, __local half *); void __ovld vstore_half4_rtp(float4, size_t, __local half *); void __ovld vstore_half8_rtp(float8, size_t, __local half *); void __ovld vstore_half16_rtp(float16, size_t, __local half *); void __ovld vstore_half2_rtn(float2, size_t, __local half *); void __ovld vstore_half3_rtn(float3, size_t, __local half *); void __ovld vstore_half4_rtn(float4, size_t, __local half *); void __ovld vstore_half8_rtn(float8, size_t, __local half *); void __ovld vstore_half16_rtn(float16, size_t, __local half *); void __ovld vstore_half2(float2, size_t, __private half *); void __ovld vstore_half3(float3, size_t, __private half *); void __ovld vstore_half4(float4, size_t, __private half *); void __ovld vstore_half8(float8, size_t, __private half *); void __ovld vstore_half16(float16, size_t, __private half *); void __ovld vstore_half2_rte(float2, size_t, __private half *); void __ovld vstore_half3_rte(float3, size_t, __private half *); void __ovld vstore_half4_rte(float4, size_t, __private half *); void __ovld vstore_half8_rte(float8, size_t, __private half *); void __ovld vstore_half16_rte(float16, size_t, __private half *); void __ovld vstore_half2_rtz(float2, size_t, __private half *); void __ovld vstore_half3_rtz(float3, size_t, __private half *); void __ovld vstore_half4_rtz(float4, size_t, __private half *); void __ovld vstore_half8_rtz(float8, size_t, __private half *); void __ovld vstore_half16_rtz(float16, size_t, __private half *); void __ovld vstore_half2_rtp(float2, size_t, __private half *); void __ovld vstore_half3_rtp(float3, size_t, __private half *); void __ovld vstore_half4_rtp(float4, size_t, __private half *); void __ovld vstore_half8_rtp(float8, size_t, __private half *); void __ovld vstore_half16_rtp(float16, size_t, __private half *); void __ovld vstore_half2_rtn(float2, size_t, __private half *); void __ovld vstore_half3_rtn(float3, size_t, __private half *); void __ovld vstore_half4_rtn(float4, size_t, __private half *); void __ovld vstore_half8_rtn(float8, size_t, __private half *); void __ovld vstore_half16_rtn(float16, size_t, __private half *); #ifdef cl_khr_fp64 void __ovld vstore_half2(double2, size_t, __global half *); void __ovld vstore_half3(double3, size_t, __global half *); void __ovld vstore_half4(double4, size_t, __global half *); void __ovld vstore_half8(double8, size_t, __global half *); void __ovld vstore_half16(double16, size_t, __global half *); void __ovld vstore_half2_rte(double2, size_t, __global half *); void __ovld vstore_half3_rte(double3, size_t, __global half *); void __ovld vstore_half4_rte(double4, size_t, __global half *); void __ovld vstore_half8_rte(double8, size_t, __global half *); void __ovld vstore_half16_rte(double16, size_t, __global half *); void __ovld vstore_half2_rtz(double2, size_t, __global half *); void __ovld vstore_half3_rtz(double3, size_t, __global half *); void __ovld vstore_half4_rtz(double4, size_t, __global half *); void __ovld vstore_half8_rtz(double8, size_t, __global half *); void __ovld vstore_half16_rtz(double16, size_t, __global half *); void __ovld vstore_half2_rtp(double2, size_t, __global half *); void __ovld vstore_half3_rtp(double3, size_t, __global half *); void __ovld vstore_half4_rtp(double4, size_t, __global half *); void __ovld vstore_half8_rtp(double8, size_t, __global half *); void __ovld vstore_half16_rtp(double16, size_t, __global half *); void __ovld vstore_half2_rtn(double2, size_t, __global half *); void __ovld vstore_half3_rtn(double3, size_t, __global half *); void __ovld vstore_half4_rtn(double4, size_t, __global half *); void __ovld vstore_half8_rtn(double8, size_t, __global half *); void __ovld vstore_half16_rtn(double16, size_t, __global half *); void __ovld vstore_half2(double2, size_t, __local half *); void __ovld vstore_half3(double3, size_t, __local half *); void __ovld vstore_half4(double4, size_t, __local half *); void __ovld vstore_half8(double8, size_t, __local half *); void __ovld vstore_half16(double16, size_t, __local half *); void __ovld vstore_half2_rte(double2, size_t, __local half *); void __ovld vstore_half3_rte(double3, size_t, __local half *); void __ovld vstore_half4_rte(double4, size_t, __local half *); void __ovld vstore_half8_rte(double8, size_t, __local half *); void __ovld vstore_half16_rte(double16, size_t, __local half *); void __ovld vstore_half2_rtz(double2, size_t, __local half *); void __ovld vstore_half3_rtz(double3, size_t, __local half *); void __ovld vstore_half4_rtz(double4, size_t, __local half *); void __ovld vstore_half8_rtz(double8, size_t, __local half *); void __ovld vstore_half16_rtz(double16, size_t, __local half *); void __ovld vstore_half2_rtp(double2, size_t, __local half *); void __ovld vstore_half3_rtp(double3, size_t, __local half *); void __ovld vstore_half4_rtp(double4, size_t, __local half *); void __ovld vstore_half8_rtp(double8, size_t, __local half *); void __ovld vstore_half16_rtp(double16, size_t, __local half *); void __ovld vstore_half2_rtn(double2, size_t, __local half *); void __ovld vstore_half3_rtn(double3, size_t, __local half *); void __ovld vstore_half4_rtn(double4, size_t, __local half *); void __ovld vstore_half8_rtn(double8, size_t, __local half *); void __ovld vstore_half16_rtn(double16, size_t, __local half *); void __ovld vstore_half2(double2, size_t, __private half *); void __ovld vstore_half3(double3, size_t, __private half *); void __ovld vstore_half4(double4, size_t, __private half *); void __ovld vstore_half8(double8, size_t, __private half *); void __ovld vstore_half16(double16, size_t, __private half *); void __ovld vstore_half2_rte(double2, size_t, __private half *); void __ovld vstore_half3_rte(double3, size_t, __private half *); void __ovld vstore_half4_rte(double4, size_t, __private half *); void __ovld vstore_half8_rte(double8, size_t, __private half *); void __ovld vstore_half16_rte(double16, size_t, __private half *); void __ovld vstore_half2_rtz(double2, size_t, __private half *); void __ovld vstore_half3_rtz(double3, size_t, __private half *); void __ovld vstore_half4_rtz(double4, size_t, __private half *); void __ovld vstore_half8_rtz(double8, size_t, __private half *); void __ovld vstore_half16_rtz(double16, size_t, __private half *); void __ovld vstore_half2_rtp(double2, size_t, __private half *); void __ovld vstore_half3_rtp(double3, size_t, __private half *); void __ovld vstore_half4_rtp(double4, size_t, __private half *); void __ovld vstore_half8_rtp(double8, size_t, __private half *); void __ovld vstore_half16_rtp(double16, size_t, __private half *); void __ovld vstore_half2_rtn(double2, size_t, __private half *); void __ovld vstore_half3_rtn(double3, size_t, __private half *); void __ovld vstore_half4_rtn(double4, size_t, __private half *); void __ovld vstore_half8_rtn(double8, size_t, __private half *); void __ovld vstore_half16_rtn(double16, size_t, __private half *); #endif //cl_khr_fp64 #endif //defined(__opencl_c_named_address_space_builtins) /** * For n = 1, 2, 4, 8 and 16 read sizeof (halfn) * bytes of data from address (p + (offset * n)). * The data read is interpreted as a halfn value. * The halfn value read is converted to a floatn * value and the floatn value is returned. * The address computed as (p + (offset * n)) * must be aligned to sizeof (halfn) bytes. * For n = 3, vloada_half3 reads a half3 from * address (p + (offset * 4)) and returns a float3. * The address computed as (p + (offset * 4)) * must be aligned to sizeof (half) * 4 bytes. */ float2 __ovld __purefn vloada_half2(size_t, const __constant half *); float3 __ovld __purefn vloada_half3(size_t, const __constant half *); float4 __ovld __purefn vloada_half4(size_t, const __constant half *); float8 __ovld __purefn vloada_half8(size_t, const __constant half *); float16 __ovld __purefn vloada_half16(size_t, const __constant half *); #if defined(__opencl_c_generic_address_space) float2 __ovld __purefn vloada_half2(size_t, const half *); float3 __ovld __purefn vloada_half3(size_t, const half *); float4 __ovld __purefn vloada_half4(size_t, const half *); float8 __ovld __purefn vloada_half8(size_t, const half *); float16 __ovld __purefn vloada_half16(size_t, const half *); #endif //defined(__opencl_c_generic_address_space) #if defined(__opencl_c_named_address_space_builtins) float2 __ovld __purefn vloada_half2(size_t, const __global half *); float3 __ovld __purefn vloada_half3(size_t, const __global half *); float4 __ovld __purefn vloada_half4(size_t, const __global half *); float8 __ovld __purefn vloada_half8(size_t, const __global half *); float16 __ovld __purefn vloada_half16(size_t, const __global half *); float2 __ovld __purefn vloada_half2(size_t, const __local half *); float3 __ovld __purefn vloada_half3(size_t, const __local half *); float4 __ovld __purefn vloada_half4(size_t, const __local half *); float8 __ovld __purefn vloada_half8(size_t, const __local half *); float16 __ovld __purefn vloada_half16(size_t, const __local half *); float2 __ovld __purefn vloada_half2(size_t, const __private half *); float3 __ovld __purefn vloada_half3(size_t, const __private half *); float4 __ovld __purefn vloada_half4(size_t, const __private half *); float8 __ovld __purefn vloada_half8(size_t, const __private half *); float16 __ovld __purefn vloada_half16(size_t, const __private half *); #endif //defined(__opencl_c_named_address_space_builtins) /** * The floatn value given by data is converted to * a halfn value using the appropriate rounding * mode. * For n = 1, 2, 4, 8 and 16, the halfn value is * written to the address computed as (p + (offset * * n)). The address computed as (p + (offset * * n)) must be aligned to sizeof (halfn) bytes. * For n = 3, the half3 value is written to the * address computed as (p + (offset * 4)). The * address computed as (p + (offset * 4)) must be * aligned to sizeof (half) * 4 bytes. * vstorea_halfn uses the current rounding * mode. The default current rounding mode is * round to nearest even. */ #if defined(__opencl_c_generic_address_space) void __ovld vstorea_half2(float2, size_t, half *); void __ovld vstorea_half3(float3, size_t, half *); void __ovld vstorea_half4(float4, size_t, half *); void __ovld vstorea_half8(float8, size_t, half *); void __ovld vstorea_half16(float16, size_t, half *); void __ovld vstorea_half2_rte(float2, size_t, half *); void __ovld vstorea_half3_rte(float3, size_t, half *); void __ovld vstorea_half4_rte(float4, size_t, half *); void __ovld vstorea_half8_rte(float8, size_t, half *); void __ovld vstorea_half16_rte(float16, size_t, half *); void __ovld vstorea_half2_rtz(float2, size_t, half *); void __ovld vstorea_half3_rtz(float3, size_t, half *); void __ovld vstorea_half4_rtz(float4, size_t, half *); void __ovld vstorea_half8_rtz(float8, size_t, half *); void __ovld vstorea_half16_rtz(float16, size_t, half *); void __ovld vstorea_half2_rtp(float2, size_t, half *); void __ovld vstorea_half3_rtp(float3, size_t, half *); void __ovld vstorea_half4_rtp(float4, size_t, half *); void __ovld vstorea_half8_rtp(float8, size_t, half *); void __ovld vstorea_half16_rtp(float16, size_t, half *); void __ovld vstorea_half2_rtn(float2, size_t, half *); void __ovld vstorea_half3_rtn(float3, size_t, half *); void __ovld vstorea_half4_rtn(float4, size_t, half *); void __ovld vstorea_half8_rtn(float8, size_t, half *); void __ovld vstorea_half16_rtn(float16, size_t, half *); #ifdef cl_khr_fp64 void __ovld vstorea_half2(double2, size_t, half *); void __ovld vstorea_half3(double3, size_t, half *); void __ovld vstorea_half4(double4, size_t, half *); void __ovld vstorea_half8(double8, size_t, half *); void __ovld vstorea_half16(double16, size_t, half *); void __ovld vstorea_half2_rte(double2, size_t, half *); void __ovld vstorea_half3_rte(double3, size_t, half *); void __ovld vstorea_half4_rte(double4, size_t, half *); void __ovld vstorea_half8_rte(double8, size_t, half *); void __ovld vstorea_half16_rte(double16, size_t, half *); void __ovld vstorea_half2_rtz(double2, size_t, half *); void __ovld vstorea_half3_rtz(double3, size_t, half *); void __ovld vstorea_half4_rtz(double4, size_t, half *); void __ovld vstorea_half8_rtz(double8, size_t, half *); void __ovld vstorea_half16_rtz(double16, size_t, half *); void __ovld vstorea_half2_rtp(double2, size_t, half *); void __ovld vstorea_half3_rtp(double3, size_t, half *); void __ovld vstorea_half4_rtp(double4, size_t, half *); void __ovld vstorea_half8_rtp(double8, size_t, half *); void __ovld vstorea_half16_rtp(double16, size_t, half *); void __ovld vstorea_half2_rtn(double2, size_t, half *); void __ovld vstorea_half3_rtn(double3, size_t, half *); void __ovld vstorea_half4_rtn(double4, size_t, half *); void __ovld vstorea_half8_rtn(double8, size_t, half *); void __ovld vstorea_half16_rtn(double16, size_t, half *); #endif //cl_khr_fp64 #endif //defined(__opencl_c_generic_address_space) #if defined(__opencl_c_named_address_space_builtins) void __ovld vstorea_half2(float2, size_t, __global half *); void __ovld vstorea_half3(float3, size_t, __global half *); void __ovld vstorea_half4(float4, size_t, __global half *); void __ovld vstorea_half8(float8, size_t, __global half *); void __ovld vstorea_half16(float16, size_t, __global half *); void __ovld vstorea_half2_rte(float2, size_t, __global half *); void __ovld vstorea_half3_rte(float3, size_t, __global half *); void __ovld vstorea_half4_rte(float4, size_t, __global half *); void __ovld vstorea_half8_rte(float8, size_t, __global half *); void __ovld vstorea_half16_rte(float16, size_t, __global half *); void __ovld vstorea_half2_rtz(float2, size_t, __global half *); void __ovld vstorea_half3_rtz(float3, size_t, __global half *); void __ovld vstorea_half4_rtz(float4, size_t, __global half *); void __ovld vstorea_half8_rtz(float8, size_t, __global half *); void __ovld vstorea_half16_rtz(float16, size_t, __global half *); void __ovld vstorea_half2_rtp(float2, size_t, __global half *); void __ovld vstorea_half3_rtp(float3, size_t, __global half *); void __ovld vstorea_half4_rtp(float4, size_t, __global half *); void __ovld vstorea_half8_rtp(float8, size_t, __global half *); void __ovld vstorea_half16_rtp(float16, size_t, __global half *); void __ovld vstorea_half2_rtn(float2, size_t, __global half *); void __ovld vstorea_half3_rtn(float3, size_t, __global half *); void __ovld vstorea_half4_rtn(float4, size_t, __global half *); void __ovld vstorea_half8_rtn(float8, size_t, __global half *); void __ovld vstorea_half16_rtn(float16, size_t, __global half *); void __ovld vstorea_half2(float2, size_t, __local half *); void __ovld vstorea_half3(float3, size_t, __local half *); void __ovld vstorea_half4(float4, size_t, __local half *); void __ovld vstorea_half8(float8, size_t, __local half *); void __ovld vstorea_half16(float16, size_t, __local half *); void __ovld vstorea_half2_rte(float2, size_t, __local half *); void __ovld vstorea_half3_rte(float3, size_t, __local half *); void __ovld vstorea_half4_rte(float4, size_t, __local half *); void __ovld vstorea_half8_rte(float8, size_t, __local half *); void __ovld vstorea_half16_rte(float16, size_t, __local half *); void __ovld vstorea_half2_rtz(float2, size_t, __local half *); void __ovld vstorea_half3_rtz(float3, size_t, __local half *); void __ovld vstorea_half4_rtz(float4, size_t, __local half *); void __ovld vstorea_half8_rtz(float8, size_t, __local half *); void __ovld vstorea_half16_rtz(float16, size_t, __local half *); void __ovld vstorea_half2_rtp(float2, size_t, __local half *); void __ovld vstorea_half3_rtp(float3, size_t, __local half *); void __ovld vstorea_half4_rtp(float4, size_t, __local half *); void __ovld vstorea_half8_rtp(float8, size_t, __local half *); void __ovld vstorea_half16_rtp(float16, size_t, __local half *); void __ovld vstorea_half2_rtn(float2, size_t, __local half *); void __ovld vstorea_half3_rtn(float3, size_t, __local half *); void __ovld vstorea_half4_rtn(float4, size_t, __local half *); void __ovld vstorea_half8_rtn(float8, size_t, __local half *); void __ovld vstorea_half16_rtn(float16, size_t, __local half *); void __ovld vstorea_half2(float2, size_t, __private half *); void __ovld vstorea_half3(float3, size_t, __private half *); void __ovld vstorea_half4(float4, size_t, __private half *); void __ovld vstorea_half8(float8, size_t, __private half *); void __ovld vstorea_half16(float16, size_t, __private half *); void __ovld vstorea_half2_rte(float2, size_t, __private half *); void __ovld vstorea_half3_rte(float3, size_t, __private half *); void __ovld vstorea_half4_rte(float4, size_t, __private half *); void __ovld vstorea_half8_rte(float8, size_t, __private half *); void __ovld vstorea_half16_rte(float16, size_t, __private half *); void __ovld vstorea_half2_rtz(float2, size_t, __private half *); void __ovld vstorea_half3_rtz(float3, size_t, __private half *); void __ovld vstorea_half4_rtz(float4, size_t, __private half *); void __ovld vstorea_half8_rtz(float8, size_t, __private half *); void __ovld vstorea_half16_rtz(float16, size_t, __private half *); void __ovld vstorea_half2_rtp(float2, size_t, __private half *); void __ovld vstorea_half3_rtp(float3, size_t, __private half *); void __ovld vstorea_half4_rtp(float4, size_t, __private half *); void __ovld vstorea_half8_rtp(float8, size_t, __private half *); void __ovld vstorea_half16_rtp(float16, size_t, __private half *); void __ovld vstorea_half2_rtn(float2, size_t, __private half *); void __ovld vstorea_half3_rtn(float3, size_t, __private half *); void __ovld vstorea_half4_rtn(float4, size_t, __private half *); void __ovld vstorea_half8_rtn(float8, size_t, __private half *); void __ovld vstorea_half16_rtn(float16, size_t, __private half *); #ifdef cl_khr_fp64 void __ovld vstorea_half2(double2, size_t, __global half *); void __ovld vstorea_half3(double3, size_t, __global half *); void __ovld vstorea_half4(double4, size_t, __global half *); void __ovld vstorea_half8(double8, size_t, __global half *); void __ovld vstorea_half16(double16, size_t, __global half *); void __ovld vstorea_half2_rte(double2, size_t, __global half *); void __ovld vstorea_half3_rte(double3, size_t, __global half *); void __ovld vstorea_half4_rte(double4, size_t, __global half *); void __ovld vstorea_half8_rte(double8, size_t, __global half *); void __ovld vstorea_half16_rte(double16, size_t, __global half *); void __ovld vstorea_half2_rtz(double2, size_t, __global half *); void __ovld vstorea_half3_rtz(double3, size_t, __global half *); void __ovld vstorea_half4_rtz(double4, size_t, __global half *); void __ovld vstorea_half8_rtz(double8, size_t, __global half *); void __ovld vstorea_half16_rtz(double16, size_t, __global half *); void __ovld vstorea_half2_rtp(double2, size_t, __global half *); void __ovld vstorea_half3_rtp(double3, size_t, __global half *); void __ovld vstorea_half4_rtp(double4, size_t, __global half *); void __ovld vstorea_half8_rtp(double8, size_t, __global half *); void __ovld vstorea_half16_rtp(double16, size_t, __global half *); void __ovld vstorea_half2_rtn(double2, size_t, __global half *); void __ovld vstorea_half3_rtn(double3, size_t, __global half *); void __ovld vstorea_half4_rtn(double4, size_t, __global half *); void __ovld vstorea_half8_rtn(double8, size_t, __global half *); void __ovld vstorea_half16_rtn(double16, size_t, __global half *); void __ovld vstorea_half2(double2, size_t, __local half *); void __ovld vstorea_half3(double3, size_t, __local half *); void __ovld vstorea_half4(double4, size_t, __local half *); void __ovld vstorea_half8(double8, size_t, __local half *); void __ovld vstorea_half16(double16, size_t, __local half *); void __ovld vstorea_half2_rte(double2, size_t, __local half *); void __ovld vstorea_half3_rte(double3, size_t, __local half *); void __ovld vstorea_half4_rte(double4, size_t, __local half *); void __ovld vstorea_half8_rte(double8, size_t, __local half *); void __ovld vstorea_half16_rte(double16, size_t, __local half *); void __ovld vstorea_half2_rtz(double2, size_t, __local half *); void __ovld vstorea_half3_rtz(double3, size_t, __local half *); void __ovld vstorea_half4_rtz(double4, size_t, __local half *); void __ovld vstorea_half8_rtz(double8, size_t, __local half *); void __ovld vstorea_half16_rtz(double16, size_t, __local half *); void __ovld vstorea_half2_rtp(double2, size_t, __local half *); void __ovld vstorea_half3_rtp(double3, size_t, __local half *); void __ovld vstorea_half4_rtp(double4, size_t, __local half *); void __ovld vstorea_half8_rtp(double8, size_t, __local half *); void __ovld vstorea_half16_rtp(double16, size_t, __local half *); void __ovld vstorea_half2_rtn(double2, size_t, __local half *); void __ovld vstorea_half3_rtn(double3, size_t, __local half *); void __ovld vstorea_half4_rtn(double4, size_t, __local half *); void __ovld vstorea_half8_rtn(double8, size_t, __local half *); void __ovld vstorea_half16_rtn(double16, size_t, __local half *); void __ovld vstorea_half2(double2, size_t, __private half *); void __ovld vstorea_half3(double3, size_t, __private half *); void __ovld vstorea_half4(double4, size_t, __private half *); void __ovld vstorea_half8(double8, size_t, __private half *); void __ovld vstorea_half16(double16, size_t, __private half *); void __ovld vstorea_half2_rte(double2, size_t, __private half *); void __ovld vstorea_half3_rte(double3, size_t, __private half *); void __ovld vstorea_half4_rte(double4, size_t, __private half *); void __ovld vstorea_half8_rte(double8, size_t, __private half *); void __ovld vstorea_half16_rte(double16, size_t, __private half *); void __ovld vstorea_half2_rtz(double2, size_t, __private half *); void __ovld vstorea_half3_rtz(double3, size_t, __private half *); void __ovld vstorea_half4_rtz(double4, size_t, __private half *); void __ovld vstorea_half8_rtz(double8, size_t, __private half *); void __ovld vstorea_half16_rtz(double16, size_t, __private half *); void __ovld vstorea_half2_rtp(double2, size_t, __private half *); void __ovld vstorea_half3_rtp(double3, size_t, __private half *); void __ovld vstorea_half4_rtp(double4, size_t, __private half *); void __ovld vstorea_half8_rtp(double8, size_t, __private half *); void __ovld vstorea_half16_rtp(double16, size_t, __private half *); void __ovld vstorea_half2_rtn(double2, size_t, __private half *); void __ovld vstorea_half3_rtn(double3, size_t, __private half *); void __ovld vstorea_half4_rtn(double4, size_t, __private half *); void __ovld vstorea_half8_rtn(double8, size_t, __private half *); void __ovld vstorea_half16_rtn(double16, size_t, __private half *); #endif //cl_khr_fp64 #endif //defined(__opencl_c_named_address_space_builtins) // OpenCL v1.1 s6.11.8, v1.2 s6.12.8, v2.0 s6.13.8 - Synchronization Functions /** * All work-items in a work-group executing the kernel * on a processor must execute this function before any * are allowed to continue execution beyond the barrier. * This function must be encountered by all work-items in * a work-group executing the kernel. * If barrier is inside a conditional statement, then all * work-items must enter the conditional if any work-item * enters the conditional statement and executes the * barrier. * If barrer is inside a loop, all work-items must execute * the barrier for each iteration of the loop before any are * allowed to continue execution beyond the barrier. * The barrier function also queues a memory fence * (reads and writes) to ensure correct ordering of * memory operations to local or global memory. * The flags argument specifies the memory address space * and can be set to a combination of the following literal * values. * CLK_LOCAL_MEM_FENCE - The barrier function * will either flush any variables stored in local memory * or queue a memory fence to ensure correct ordering of * memory operations to local memory. * CLK_GLOBAL_MEM_FENCE - The barrier function * will queue a memory fence to ensure correct ordering * of memory operations to global memory. This can be * useful when work-items, for example, write to buffer or * image objects and then want to read the updated data. */ void __ovld __conv barrier(cl_mem_fence_flags); #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) void __ovld __conv work_group_barrier(cl_mem_fence_flags, memory_scope); void __ovld __conv work_group_barrier(cl_mem_fence_flags); #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) // OpenCL v1.1 s6.11.9, v1.2 s6.12.9 - Explicit Memory Fence Functions /** * Orders loads and stores of a work-item * executing a kernel. This means that loads * and stores preceding the mem_fence will * be committed to memory before any loads * and stores following the mem_fence. * The flags argument specifies the memory * address space and can be set to a * combination of the following literal * values: * CLK_LOCAL_MEM_FENCE * CLK_GLOBAL_MEM_FENCE. */ void __ovld mem_fence(cl_mem_fence_flags); /** * Read memory barrier that orders only * loads. * The flags argument specifies the memory * address space and can be set to a * combination of the following literal * values: * CLK_LOCAL_MEM_FENCE * CLK_GLOBAL_MEM_FENCE. */ void __ovld read_mem_fence(cl_mem_fence_flags); /** * Write memory barrier that orders only * stores. * The flags argument specifies the memory * address space and can be set to a * combination of the following literal * values: * CLK_LOCAL_MEM_FENCE * CLK_GLOBAL_MEM_FENCE. */ void __ovld write_mem_fence(cl_mem_fence_flags); // OpenCL v2.0 s6.13.9 - Address Space Qualifier Functions #if defined(__opencl_c_generic_address_space) cl_mem_fence_flags __ovld get_fence(const void *ptr); cl_mem_fence_flags __ovld get_fence(void *ptr); /** * Builtin functions to_global, to_local, and to_private need to be declared as Clang builtin functions * and checked in Sema since they should be declared as * addr gentype* to_addr (gentype*); * where gentype is builtin type or user defined type. */ #endif //defined(__opencl_c_generic_address_space) // OpenCL v1.1 s6.11.10, v1.2 s6.12.10, v2.0 s6.13.10 - Async Copies from Global to Local Memory, Local to Global Memory, and Prefetch /** * event_t async_work_group_copy ( * __global gentype *dst, * const __local gentype *src, * size_t num_elements, * event_t event) * Perform an async copy of num_elements * gentype elements from src to dst. The async * copy is performed by all work-items in a workgroup * and this built-in function must therefore * be encountered by all work-items in a workgroup * executing the kernel with the same * argument values; otherwise the results are * undefined. * Returns an event object that can be used by * wait_group_events to wait for the async copy * to finish. The event argument can also be used * to associate the async_work_group_copy with * a previous async copy allowing an event to be * shared by multiple async copies; otherwise event * should be zero. * If event argument is non-zero, the event object * supplied in event argument will be returned. * This function does not perform any implicit * synchronization of source data such as using a * barrier before performing the copy. */ event_t __ovld async_work_group_copy(__local char *, const __global char *, size_t, event_t); event_t __ovld async_work_group_copy(__local uchar *, const __global uchar *, size_t, event_t); event_t __ovld async_work_group_copy(__local short *, const __global short *, size_t, event_t); event_t __ovld async_work_group_copy(__local ushort *, const __global ushort *, size_t, event_t); event_t __ovld async_work_group_copy(__local int *, const __global int *, size_t, event_t); event_t __ovld async_work_group_copy(__local uint *, const __global uint *, size_t, event_t); event_t __ovld async_work_group_copy(__local long *, const __global long *, size_t, event_t); event_t __ovld async_work_group_copy(__local ulong *, const __global ulong *, size_t, event_t); event_t __ovld async_work_group_copy(__local float *, const __global float *, size_t, event_t); event_t __ovld async_work_group_copy(__local char2 *, const __global char2 *, size_t, event_t); event_t __ovld async_work_group_copy(__local uchar2 *, const __global uchar2 *, size_t, event_t); event_t __ovld async_work_group_copy(__local short2 *, const __global short2 *, size_t, event_t); event_t __ovld async_work_group_copy(__local ushort2 *, const __global ushort2 *, size_t, event_t); event_t __ovld async_work_group_copy(__local int2 *, const __global int2 *, size_t, event_t); event_t __ovld async_work_group_copy(__local uint2 *, const __global uint2 *, size_t, event_t); event_t __ovld async_work_group_copy(__local long2 *, const __global long2 *, size_t, event_t); event_t __ovld async_work_group_copy(__local ulong2 *, const __global ulong2 *, size_t, event_t); event_t __ovld async_work_group_copy(__local float2 *, const __global float2 *, size_t, event_t); event_t __ovld async_work_group_copy(__local char3 *, const __global char3 *, size_t, event_t); event_t __ovld async_work_group_copy(__local uchar3 *, const __global uchar3 *, size_t, event_t); event_t __ovld async_work_group_copy(__local short3 *, const __global short3 *, size_t, event_t); event_t __ovld async_work_group_copy(__local ushort3 *, const __global ushort3 *, size_t, event_t); event_t __ovld async_work_group_copy(__local int3 *, const __global int3 *, size_t, event_t); event_t __ovld async_work_group_copy(__local uint3 *, const __global uint3 *, size_t, event_t); event_t __ovld async_work_group_copy(__local long3 *, const __global long3 *, size_t, event_t); event_t __ovld async_work_group_copy(__local ulong3 *, const __global ulong3 *, size_t, event_t); event_t __ovld async_work_group_copy(__local float3 *, const __global float3 *, size_t, event_t); event_t __ovld async_work_group_copy(__local char4 *, const __global char4 *, size_t, event_t); event_t __ovld async_work_group_copy(__local uchar4 *, const __global uchar4 *, size_t, event_t); event_t __ovld async_work_group_copy(__local short4 *, const __global short4 *, size_t, event_t); event_t __ovld async_work_group_copy(__local ushort4 *, const __global ushort4 *, size_t, event_t); event_t __ovld async_work_group_copy(__local int4 *, const __global int4 *, size_t, event_t); event_t __ovld async_work_group_copy(__local uint4 *, const __global uint4 *, size_t, event_t); event_t __ovld async_work_group_copy(__local long4 *, const __global long4 *, size_t, event_t); event_t __ovld async_work_group_copy(__local ulong4 *, const __global ulong4 *, size_t, event_t); event_t __ovld async_work_group_copy(__local float4 *, const __global float4 *, size_t, event_t); event_t __ovld async_work_group_copy(__local char8 *, const __global char8 *, size_t, event_t); event_t __ovld async_work_group_copy(__local uchar8 *, const __global uchar8 *, size_t, event_t); event_t __ovld async_work_group_copy(__local short8 *, const __global short8 *, size_t, event_t); event_t __ovld async_work_group_copy(__local ushort8 *, const __global ushort8 *, size_t, event_t); event_t __ovld async_work_group_copy(__local int8 *, const __global int8 *, size_t, event_t); event_t __ovld async_work_group_copy(__local uint8 *, const __global uint8 *, size_t, event_t); event_t __ovld async_work_group_copy(__local long8 *, const __global long8 *, size_t, event_t); event_t __ovld async_work_group_copy(__local ulong8 *, const __global ulong8 *, size_t, event_t); event_t __ovld async_work_group_copy(__local float8 *, const __global float8 *, size_t, event_t); event_t __ovld async_work_group_copy(__local char16 *, const __global char16 *, size_t, event_t); event_t __ovld async_work_group_copy(__local uchar16 *, const __global uchar16 *, size_t, event_t); event_t __ovld async_work_group_copy(__local short16 *, const __global short16 *, size_t, event_t); event_t __ovld async_work_group_copy(__local ushort16 *, const __global ushort16 *, size_t, event_t); event_t __ovld async_work_group_copy(__local int16 *, const __global int16 *, size_t, event_t); event_t __ovld async_work_group_copy(__local uint16 *, const __global uint16 *, size_t, event_t); event_t __ovld async_work_group_copy(__local long16 *, const __global long16 *, size_t, event_t); event_t __ovld async_work_group_copy(__local ulong16 *, const __global ulong16 *, size_t, event_t); event_t __ovld async_work_group_copy(__local float16 *, const __global float16 *, size_t, event_t); event_t __ovld async_work_group_copy(__global char *, const __local char *, size_t, event_t); event_t __ovld async_work_group_copy(__global uchar *, const __local uchar *, size_t, event_t); event_t __ovld async_work_group_copy(__global short *, const __local short *, size_t, event_t); event_t __ovld async_work_group_copy(__global ushort *, const __local ushort *, size_t, event_t); event_t __ovld async_work_group_copy(__global int *, const __local int *, size_t, event_t); event_t __ovld async_work_group_copy(__global uint *, const __local uint *, size_t, event_t); event_t __ovld async_work_group_copy(__global long *, const __local long *, size_t, event_t); event_t __ovld async_work_group_copy(__global ulong *, const __local ulong *, size_t, event_t); event_t __ovld async_work_group_copy(__global float *, const __local float *, size_t, event_t); event_t __ovld async_work_group_copy(__global char2 *, const __local char2 *, size_t, event_t); event_t __ovld async_work_group_copy(__global uchar2 *, const __local uchar2 *, size_t, event_t); event_t __ovld async_work_group_copy(__global short2 *, const __local short2 *, size_t, event_t); event_t __ovld async_work_group_copy(__global ushort2 *, const __local ushort2 *, size_t, event_t); event_t __ovld async_work_group_copy(__global int2 *, const __local int2 *, size_t, event_t); event_t __ovld async_work_group_copy(__global uint2 *, const __local uint2 *, size_t, event_t); event_t __ovld async_work_group_copy(__global long2 *, const __local long2 *, size_t, event_t); event_t __ovld async_work_group_copy(__global ulong2 *, const __local ulong2 *, size_t, event_t); event_t __ovld async_work_group_copy(__global float2 *, const __local float2 *, size_t, event_t); event_t __ovld async_work_group_copy(__global char3 *, const __local char3 *, size_t, event_t); event_t __ovld async_work_group_copy(__global uchar3 *, const __local uchar3 *, size_t, event_t); event_t __ovld async_work_group_copy(__global short3 *, const __local short3 *, size_t, event_t); event_t __ovld async_work_group_copy(__global ushort3 *, const __local ushort3 *, size_t, event_t); event_t __ovld async_work_group_copy(__global int3 *, const __local int3 *, size_t, event_t); event_t __ovld async_work_group_copy(__global uint3 *, const __local uint3 *, size_t, event_t); event_t __ovld async_work_group_copy(__global long3 *, const __local long3 *, size_t, event_t); event_t __ovld async_work_group_copy(__global ulong3 *, const __local ulong3 *, size_t, event_t); event_t __ovld async_work_group_copy(__global float3 *, const __local float3 *, size_t, event_t); event_t __ovld async_work_group_copy(__global char4 *, const __local char4 *, size_t, event_t); event_t __ovld async_work_group_copy(__global uchar4 *, const __local uchar4 *, size_t, event_t); event_t __ovld async_work_group_copy(__global short4 *, const __local short4 *, size_t, event_t); event_t __ovld async_work_group_copy(__global ushort4 *, const __local ushort4 *, size_t, event_t); event_t __ovld async_work_group_copy(__global int4 *, const __local int4 *, size_t, event_t); event_t __ovld async_work_group_copy(__global uint4 *, const __local uint4 *, size_t, event_t); event_t __ovld async_work_group_copy(__global long4 *, const __local long4 *, size_t, event_t); event_t __ovld async_work_group_copy(__global ulong4 *, const __local ulong4 *, size_t, event_t); event_t __ovld async_work_group_copy(__global float4 *, const __local float4 *, size_t, event_t); event_t __ovld async_work_group_copy(__global char8 *, const __local char8 *, size_t, event_t); event_t __ovld async_work_group_copy(__global uchar8 *, const __local uchar8 *, size_t, event_t); event_t __ovld async_work_group_copy(__global short8 *, const __local short8 *, size_t, event_t); event_t __ovld async_work_group_copy(__global ushort8 *, const __local ushort8 *, size_t, event_t); event_t __ovld async_work_group_copy(__global int8 *, const __local int8 *, size_t, event_t); event_t __ovld async_work_group_copy(__global uint8 *, const __local uint8 *, size_t, event_t); event_t __ovld async_work_group_copy(__global long8 *, const __local long8 *, size_t, event_t); event_t __ovld async_work_group_copy(__global ulong8 *, const __local ulong8 *, size_t, event_t); event_t __ovld async_work_group_copy(__global float8 *, const __local float8 *, size_t, event_t); event_t __ovld async_work_group_copy(__global char16 *, const __local char16 *, size_t, event_t); event_t __ovld async_work_group_copy(__global uchar16 *, const __local uchar16 *, size_t, event_t); event_t __ovld async_work_group_copy(__global short16 *, const __local short16 *, size_t, event_t); event_t __ovld async_work_group_copy(__global ushort16 *, const __local ushort16 *, size_t, event_t); event_t __ovld async_work_group_copy(__global int16 *, const __local int16 *, size_t, event_t); event_t __ovld async_work_group_copy(__global uint16 *, const __local uint16 *, size_t, event_t); event_t __ovld async_work_group_copy(__global long16 *, const __local long16 *, size_t, event_t); event_t __ovld async_work_group_copy(__global ulong16 *, const __local ulong16 *, size_t, event_t); event_t __ovld async_work_group_copy(__global float16 *, const __local float16 *, size_t, event_t); #ifdef cl_khr_fp64 event_t __ovld async_work_group_copy(__local double *, const __global double *, size_t, event_t); event_t __ovld async_work_group_copy(__local double2 *, const __global double2 *, size_t, event_t); event_t __ovld async_work_group_copy(__local double3 *, const __global double3 *, size_t, event_t); event_t __ovld async_work_group_copy(__local double4 *, const __global double4 *, size_t, event_t); event_t __ovld async_work_group_copy(__local double8 *, const __global double8 *, size_t, event_t); event_t __ovld async_work_group_copy(__local double16 *, const __global double16 *, size_t, event_t); event_t __ovld async_work_group_copy(__global double *, const __local double *, size_t, event_t); event_t __ovld async_work_group_copy(__global double2 *, const __local double2 *, size_t, event_t); event_t __ovld async_work_group_copy(__global double3 *, const __local double3 *, size_t, event_t); event_t __ovld async_work_group_copy(__global double4 *, const __local double4 *, size_t, event_t); event_t __ovld async_work_group_copy(__global double8 *, const __local double8 *, size_t, event_t); event_t __ovld async_work_group_copy(__global double16 *, const __local double16 *, size_t, event_t); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 event_t __ovld async_work_group_copy(__local half *, const __global half *, size_t, event_t); event_t __ovld async_work_group_copy(__local half2 *, const __global half2 *, size_t, event_t); event_t __ovld async_work_group_copy(__local half3 *, const __global half3 *, size_t, event_t); event_t __ovld async_work_group_copy(__local half4 *, const __global half4 *, size_t, event_t); event_t __ovld async_work_group_copy(__local half8 *, const __global half8 *, size_t, event_t); event_t __ovld async_work_group_copy(__local half16 *, const __global half16 *, size_t, event_t); event_t __ovld async_work_group_copy(__global half *, const __local half *, size_t, event_t); event_t __ovld async_work_group_copy(__global half2 *, const __local half2 *, size_t, event_t); event_t __ovld async_work_group_copy(__global half3 *, const __local half3 *, size_t, event_t); event_t __ovld async_work_group_copy(__global half4 *, const __local half4 *, size_t, event_t); event_t __ovld async_work_group_copy(__global half8 *, const __local half8 *, size_t, event_t); event_t __ovld async_work_group_copy(__global half16 *, const __local half16 *, size_t, event_t); #endif //cl_khr_fp16 /** * Perform an async gather of num_elements * gentype elements from src to dst. The * src_stride is the stride in elements for each * gentype element read from src. The dst_stride * is the stride in elements for each gentype * element written to dst. The async gather is * performed by all work-items in a work-group. * This built-in function must therefore be * encountered by all work-items in a work-group * executing the kernel with the same argument * values; otherwise the results are undefined. * Returns an event object that can be used by * wait_group_events to wait for the async copy * to finish. The event argument can also be used * to associate the * async_work_group_strided_copy with a * previous async copy allowing an event to be * shared by multiple async copies; otherwise event * should be zero. * If event argument is non-zero, the event object * supplied in event argument will be returned. * This function does not perform any implicit * synchronization of source data such as using a * barrier before performing the copy. */ event_t __ovld async_work_group_strided_copy(__local char *, const __global char *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local uchar *, const __global uchar *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local short *, const __global short *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local ushort *, const __global ushort *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local int *, const __global int *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local uint *, const __global uint *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local long *, const __global long *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local ulong *, const __global ulong *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local float *, const __global float *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local char2 *, const __global char2 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local uchar2 *, const __global uchar2 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local short2 *, const __global short2 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local ushort2 *, const __global ushort2 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local int2 *, const __global int2 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local uint2 *, const __global uint2 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local long2 *, const __global long2 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local ulong2 *, const __global ulong2 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local float2 *, const __global float2 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local char3 *, const __global char3 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local uchar3 *, const __global uchar3 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local short3 *, const __global short3 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local ushort3 *, const __global ushort3 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local int3 *, const __global int3 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local uint3 *, const __global uint3 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local long3 *, const __global long3 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local ulong3 *, const __global ulong3 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local float3 *, const __global float3 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local char4 *, const __global char4 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local uchar4 *, const __global uchar4 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local short4 *, const __global short4 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local ushort4 *, const __global ushort4 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local int4 *, const __global int4 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local uint4 *, const __global uint4 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local long4 *, const __global long4 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local ulong4 *, const __global ulong4 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local float4 *, const __global float4 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local char8 *, const __global char8 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local uchar8 *, const __global uchar8 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local short8 *, const __global short8 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local ushort8 *, const __global ushort8 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local int8 *, const __global int8 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local uint8 *, const __global uint8 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local long8 *, const __global long8 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local ulong8 *, const __global ulong8 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local float8 *, const __global float8 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local char16 *, const __global char16 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local uchar16 *, const __global uchar16 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local short16 *, const __global short16 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local ushort16 *, const __global ushort16 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local int16 *, const __global int16 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local uint16 *, const __global uint16 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local long16 *, const __global long16 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local ulong16 *, const __global ulong16 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local float16 *, const __global float16 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global char *, const __local char *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global uchar *, const __local uchar *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global short *, const __local short *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global ushort *, const __local ushort *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global int *, const __local int *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global uint *, const __local uint *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global long *, const __local long *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global ulong *, const __local ulong *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global float *, const __local float *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global char2 *, const __local char2 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global uchar2 *, const __local uchar2 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global short2 *, const __local short2 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global ushort2 *, const __local ushort2 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global int2 *, const __local int2 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global uint2 *, const __local uint2 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global long2 *, const __local long2 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global ulong2 *, const __local ulong2 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global float2 *, const __local float2 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global char3 *, const __local char3 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global uchar3 *, const __local uchar3 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global short3 *, const __local short3 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global ushort3 *, const __local ushort3 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global int3 *, const __local int3 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global uint3 *, const __local uint3 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global long3 *, const __local long3 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global ulong3 *, const __local ulong3 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global float3 *, const __local float3 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global char4 *, const __local char4 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global uchar4 *, const __local uchar4 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global short4 *, const __local short4 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global ushort4 *, const __local ushort4 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global int4 *, const __local int4 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global uint4 *, const __local uint4 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global long4 *, const __local long4 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global ulong4 *, const __local ulong4 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global float4 *, const __local float4 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global char8 *, const __local char8 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global uchar8 *, const __local uchar8 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global short8 *, const __local short8 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global ushort8 *, const __local ushort8 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global int8 *, const __local int8 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global uint8 *, const __local uint8 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global long8 *, const __local long8 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global ulong8 *, const __local ulong8 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global float8 *, const __local float8 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global char16 *, const __local char16 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global uchar16 *, const __local uchar16 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global short16 *, const __local short16 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global ushort16 *, const __local ushort16 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global int16 *, const __local int16 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global uint16 *, const __local uint16 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global long16 *, const __local long16 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global ulong16 *, const __local ulong16 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global float16 *, const __local float16 *, size_t, size_t, event_t); #ifdef cl_khr_fp64 event_t __ovld async_work_group_strided_copy(__local double *, const __global double *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local double2 *, const __global double2 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local double3 *, const __global double3 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local double4 *, const __global double4 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local double8 *, const __global double8 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local double16 *, const __global double16 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global double *, const __local double *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global double2 *, const __local double2 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global double3 *, const __local double3 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global double4 *, const __local double4 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global double8 *, const __local double8 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global double16 *, const __local double16 *, size_t, size_t, event_t); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 event_t __ovld async_work_group_strided_copy(__local half *, const __global half *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local half2 *, const __global half2 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local half3 *, const __global half3 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local half4 *, const __global half4 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local half8 *, const __global half8 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local half16 *, const __global half16 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global half *, const __local half *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global half2 *, const __local half2 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global half3 *, const __local half3 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global half4 *, const __local half4 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global half8 *, const __local half8 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global half16 *, const __local half16 *, size_t, size_t, event_t); #endif //cl_khr_fp16 /** * Wait for events that identify the * async_work_group_copy operations to * complete. The event objects specified in * event_list will be released after the wait is * performed. * This function must be encountered by all workitems * in a work-group executing the kernel with * the same num_events and event objects specified * in event_list; otherwise the results are undefined. */ void __ovld wait_group_events(int, event_t *); /** * Prefetch num_elements * sizeof(gentype) * bytes into the global cache. The prefetch * instruction is applied to a work-item in a workgroup * and does not affect the functional * behavior of the kernel. */ void __ovld prefetch(const __global char *, size_t); void __ovld prefetch(const __global uchar *, size_t); void __ovld prefetch(const __global short *, size_t); void __ovld prefetch(const __global ushort *, size_t); void __ovld prefetch(const __global int *, size_t); void __ovld prefetch(const __global uint *, size_t); void __ovld prefetch(const __global long *, size_t); void __ovld prefetch(const __global ulong *, size_t); void __ovld prefetch(const __global float *, size_t); void __ovld prefetch(const __global char2 *, size_t); void __ovld prefetch(const __global uchar2 *, size_t); void __ovld prefetch(const __global short2 *, size_t); void __ovld prefetch(const __global ushort2 *, size_t); void __ovld prefetch(const __global int2 *, size_t); void __ovld prefetch(const __global uint2 *, size_t); void __ovld prefetch(const __global long2 *, size_t); void __ovld prefetch(const __global ulong2 *, size_t); void __ovld prefetch(const __global float2 *, size_t); void __ovld prefetch(const __global char3 *, size_t); void __ovld prefetch(const __global uchar3 *, size_t); void __ovld prefetch(const __global short3 *, size_t); void __ovld prefetch(const __global ushort3 *, size_t); void __ovld prefetch(const __global int3 *, size_t); void __ovld prefetch(const __global uint3 *, size_t); void __ovld prefetch(const __global long3 *, size_t); void __ovld prefetch(const __global ulong3 *, size_t); void __ovld prefetch(const __global float3 *, size_t); void __ovld prefetch(const __global char4 *, size_t); void __ovld prefetch(const __global uchar4 *, size_t); void __ovld prefetch(const __global short4 *, size_t); void __ovld prefetch(const __global ushort4 *, size_t); void __ovld prefetch(const __global int4 *, size_t); void __ovld prefetch(const __global uint4 *, size_t); void __ovld prefetch(const __global long4 *, size_t); void __ovld prefetch(const __global ulong4 *, size_t); void __ovld prefetch(const __global float4 *, size_t); void __ovld prefetch(const __global char8 *, size_t); void __ovld prefetch(const __global uchar8 *, size_t); void __ovld prefetch(const __global short8 *, size_t); void __ovld prefetch(const __global ushort8 *, size_t); void __ovld prefetch(const __global int8 *, size_t); void __ovld prefetch(const __global uint8 *, size_t); void __ovld prefetch(const __global long8 *, size_t); void __ovld prefetch(const __global ulong8 *, size_t); void __ovld prefetch(const __global float8 *, size_t); void __ovld prefetch(const __global char16 *, size_t); void __ovld prefetch(const __global uchar16 *, size_t); void __ovld prefetch(const __global short16 *, size_t); void __ovld prefetch(const __global ushort16 *, size_t); void __ovld prefetch(const __global int16 *, size_t); void __ovld prefetch(const __global uint16 *, size_t); void __ovld prefetch(const __global long16 *, size_t); void __ovld prefetch(const __global ulong16 *, size_t); void __ovld prefetch(const __global float16 *, size_t); #ifdef cl_khr_fp64 void __ovld prefetch(const __global double *, size_t); void __ovld prefetch(const __global double2 *, size_t); void __ovld prefetch(const __global double3 *, size_t); void __ovld prefetch(const __global double4 *, size_t); void __ovld prefetch(const __global double8 *, size_t); void __ovld prefetch(const __global double16 *, size_t); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 void __ovld prefetch(const __global half *, size_t); void __ovld prefetch(const __global half2 *, size_t); void __ovld prefetch(const __global half3 *, size_t); void __ovld prefetch(const __global half4 *, size_t); void __ovld prefetch(const __global half8 *, size_t); void __ovld prefetch(const __global half16 *, size_t); #endif // cl_khr_fp16 // OpenCL v1.1 s6.11.1, v1.2 s6.12.11 - Atomic Functions #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable #pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable #endif /** * Read the 32-bit value (referred to as old) * stored at location pointed by p. Compute * (old + val) and store result at location * pointed by p. The function returns old. */ int __ovld atomic_add(volatile __global int *, int); uint __ovld atomic_add(volatile __global uint *, uint); int __ovld atomic_add(volatile __local int *, int); uint __ovld atomic_add(volatile __local uint *, uint); #ifdef __OPENCL_CPP_VERSION__ int __ovld atomic_add(volatile int *, int); uint __ovld atomic_add(volatile uint *, uint); #endif #if defined(cl_khr_global_int32_base_atomics) int __ovld atom_add(volatile __global int *, int); uint __ovld atom_add(volatile __global uint *, uint); #endif #if defined(cl_khr_local_int32_base_atomics) int __ovld atom_add(volatile __local int *, int); uint __ovld atom_add(volatile __local uint *, uint); #endif #if defined(cl_khr_int64_base_atomics) long __ovld atom_add(volatile __global long *, long); ulong __ovld atom_add(volatile __global ulong *, ulong); long __ovld atom_add(volatile __local long *, long); ulong __ovld atom_add(volatile __local ulong *, ulong); #endif /** * Read the 32-bit value (referred to as old) stored at location pointed by p. * Compute (old - val) and store result at location pointed by p. The function * returns old. */ int __ovld atomic_sub(volatile __global int *, int); uint __ovld atomic_sub(volatile __global uint *, uint); int __ovld atomic_sub(volatile __local int *, int); uint __ovld atomic_sub(volatile __local uint *, uint); #ifdef __OPENCL_CPP_VERSION__ int __ovld atomic_sub(volatile int *, int); uint __ovld atomic_sub(volatile uint *, uint); #endif #if defined(cl_khr_global_int32_base_atomics) int __ovld atom_sub(volatile __global int *, int); uint __ovld atom_sub(volatile __global uint *, uint); #endif #if defined(cl_khr_local_int32_base_atomics) int __ovld atom_sub(volatile __local int *, int); uint __ovld atom_sub(volatile __local uint *, uint); #endif #if defined(cl_khr_int64_base_atomics) long __ovld atom_sub(volatile __global long *, long); ulong __ovld atom_sub(volatile __global ulong *, ulong); long __ovld atom_sub(volatile __local long *, long); ulong __ovld atom_sub(volatile __local ulong *, ulong); #endif /** * Swaps the old value stored at location p * with new value given by val. Returns old * value. */ int __ovld atomic_xchg(volatile __global int *, int); uint __ovld atomic_xchg(volatile __global uint *, uint); int __ovld atomic_xchg(volatile __local int *, int); uint __ovld atomic_xchg(volatile __local uint *, uint); float __ovld atomic_xchg(volatile __global float *, float); float __ovld atomic_xchg(volatile __local float *, float); #ifdef __OPENCL_CPP_VERSION__ int __ovld atomic_xchg(volatile int *, int); uint __ovld atomic_xchg(volatile uint *, uint); float __ovld atomic_xchg(volatile float *, float); #endif #if defined(cl_khr_global_int32_base_atomics) int __ovld atom_xchg(volatile __global int *, int); uint __ovld atom_xchg(volatile __global uint *, uint); #endif #if defined(cl_khr_local_int32_base_atomics) int __ovld atom_xchg(volatile __local int *, int); uint __ovld atom_xchg(volatile __local uint *, uint); #endif #if defined(cl_khr_int64_base_atomics) long __ovld atom_xchg(volatile __global long *, long); long __ovld atom_xchg(volatile __local long *, long); ulong __ovld atom_xchg(volatile __global ulong *, ulong); ulong __ovld atom_xchg(volatile __local ulong *, ulong); #endif /** * Read the 32-bit value (referred to as old) * stored at location pointed by p. Compute * (old + 1) and store result at location * pointed by p. The function returns old. */ int __ovld atomic_inc(volatile __global int *); uint __ovld atomic_inc(volatile __global uint *); int __ovld atomic_inc(volatile __local int *); uint __ovld atomic_inc(volatile __local uint *); #ifdef __OPENCL_CPP_VERSION__ int __ovld atomic_inc(volatile int *); uint __ovld atomic_inc(volatile uint *); #endif #if defined(cl_khr_global_int32_base_atomics) int __ovld atom_inc(volatile __global int *); uint __ovld atom_inc(volatile __global uint *); #endif #if defined(cl_khr_local_int32_base_atomics) int __ovld atom_inc(volatile __local int *); uint __ovld atom_inc(volatile __local uint *); #endif #if defined(cl_khr_int64_base_atomics) long __ovld atom_inc(volatile __global long *); ulong __ovld atom_inc(volatile __global ulong *); long __ovld atom_inc(volatile __local long *); ulong __ovld atom_inc(volatile __local ulong *); #endif /** * Read the 32-bit value (referred to as old) * stored at location pointed by p. Compute * (old - 1) and store result at location * pointed by p. The function returns old. */ int __ovld atomic_dec(volatile __global int *); uint __ovld atomic_dec(volatile __global uint *); int __ovld atomic_dec(volatile __local int *); uint __ovld atomic_dec(volatile __local uint *); #ifdef __OPENCL_CPP_VERSION__ int __ovld atomic_dec(volatile int *); uint __ovld atomic_dec(volatile uint *); #endif #if defined(cl_khr_global_int32_base_atomics) int __ovld atom_dec(volatile __global int *); uint __ovld atom_dec(volatile __global uint *); #endif #if defined(cl_khr_local_int32_base_atomics) int __ovld atom_dec(volatile __local int *); uint __ovld atom_dec(volatile __local uint *); #endif #if defined(cl_khr_int64_base_atomics) long __ovld atom_dec(volatile __global long *); ulong __ovld atom_dec(volatile __global ulong *); long __ovld atom_dec(volatile __local long *); ulong __ovld atom_dec(volatile __local ulong *); #endif /** * Read the 32-bit value (referred to as old) * stored at location pointed by p. Compute * (old == cmp) ? val : old and store result at * location pointed by p. The function * returns old. */ int __ovld atomic_cmpxchg(volatile __global int *, int, int); uint __ovld atomic_cmpxchg(volatile __global uint *, uint, uint); int __ovld atomic_cmpxchg(volatile __local int *, int, int); uint __ovld atomic_cmpxchg(volatile __local uint *, uint, uint); #ifdef __OPENCL_CPP_VERSION__ int __ovld atomic_cmpxchg(volatile int *, int, int); uint __ovld atomic_cmpxchg(volatile uint *, uint, uint); #endif #if defined(cl_khr_global_int32_base_atomics) int __ovld atom_cmpxchg(volatile __global int *, int, int); uint __ovld atom_cmpxchg(volatile __global uint *, uint, uint); #endif #if defined(cl_khr_local_int32_base_atomics) int __ovld atom_cmpxchg(volatile __local int *, int, int); uint __ovld atom_cmpxchg(volatile __local uint *, uint, uint); #endif #if defined(cl_khr_int64_base_atomics) long __ovld atom_cmpxchg(volatile __global long *, long, long); ulong __ovld atom_cmpxchg(volatile __global ulong *, ulong, ulong); long __ovld atom_cmpxchg(volatile __local long *, long, long); ulong __ovld atom_cmpxchg(volatile __local ulong *, ulong, ulong); #endif /** * Read the 32-bit value (referred to as old) * stored at location pointed by p. Compute * min(old, val) and store minimum value at * location pointed by p. The function * returns old. */ int __ovld atomic_min(volatile __global int *, int); uint __ovld atomic_min(volatile __global uint *, uint); int __ovld atomic_min(volatile __local int *, int); uint __ovld atomic_min(volatile __local uint *, uint); #ifdef __OPENCL_CPP_VERSION__ int __ovld atomic_min(volatile int *, int); uint __ovld atomic_min(volatile uint *, uint); #endif #if defined(cl_khr_global_int32_extended_atomics) int __ovld atom_min(volatile __global int *, int); uint __ovld atom_min(volatile __global uint *, uint); #endif #if defined(cl_khr_local_int32_extended_atomics) int __ovld atom_min(volatile __local int *, int); uint __ovld atom_min(volatile __local uint *, uint); #endif #if defined(cl_khr_int64_extended_atomics) long __ovld atom_min(volatile __global long *, long); ulong __ovld atom_min(volatile __global ulong *, ulong); long __ovld atom_min(volatile __local long *, long); ulong __ovld atom_min(volatile __local ulong *, ulong); #endif /** * Read the 32-bit value (referred to as old) * stored at location pointed by p. Compute * max(old, val) and store maximum value at * location pointed by p. The function * returns old. */ int __ovld atomic_max(volatile __global int *, int); uint __ovld atomic_max(volatile __global uint *, uint); int __ovld atomic_max(volatile __local int *, int); uint __ovld atomic_max(volatile __local uint *, uint); #ifdef __OPENCL_CPP_VERSION__ int __ovld atomic_max(volatile int *, int); uint __ovld atomic_max(volatile uint *, uint); #endif #if defined(cl_khr_global_int32_extended_atomics) int __ovld atom_max(volatile __global int *, int); uint __ovld atom_max(volatile __global uint *, uint); #endif #if defined(cl_khr_local_int32_extended_atomics) int __ovld atom_max(volatile __local int *, int); uint __ovld atom_max(volatile __local uint *, uint); #endif #if defined(cl_khr_int64_extended_atomics) long __ovld atom_max(volatile __global long *, long); ulong __ovld atom_max(volatile __global ulong *, ulong); long __ovld atom_max(volatile __local long *, long); ulong __ovld atom_max(volatile __local ulong *, ulong); #endif /** * Read the 32-bit value (referred to as old) * stored at location pointed by p. Compute * (old & val) and store result at location * pointed by p. The function returns old. */ int __ovld atomic_and(volatile __global int *, int); uint __ovld atomic_and(volatile __global uint *, uint); int __ovld atomic_and(volatile __local int *, int); uint __ovld atomic_and(volatile __local uint *, uint); #ifdef __OPENCL_CPP_VERSION__ int __ovld atomic_and(volatile int *, int); uint __ovld atomic_and(volatile uint *, uint); #endif #if defined(cl_khr_global_int32_extended_atomics) int __ovld atom_and(volatile __global int *, int); uint __ovld atom_and(volatile __global uint *, uint); #endif #if defined(cl_khr_local_int32_extended_atomics) int __ovld atom_and(volatile __local int *, int); uint __ovld atom_and(volatile __local uint *, uint); #endif #if defined(cl_khr_int64_extended_atomics) long __ovld atom_and(volatile __global long *, long); ulong __ovld atom_and(volatile __global ulong *, ulong); long __ovld atom_and(volatile __local long *, long); ulong __ovld atom_and(volatile __local ulong *, ulong); #endif /** * Read the 32-bit value (referred to as old) * stored at location pointed by p. Compute * (old | val) and store result at location * pointed by p. The function returns old. */ int __ovld atomic_or(volatile __global int *, int); uint __ovld atomic_or(volatile __global uint *, uint); int __ovld atomic_or(volatile __local int *, int); uint __ovld atomic_or(volatile __local uint *, uint); #ifdef __OPENCL_CPP_VERSION__ int __ovld atomic_or(volatile int *, int); uint __ovld atomic_or(volatile uint *, uint); #endif #if defined(cl_khr_global_int32_extended_atomics) int __ovld atom_or(volatile __global int *, int); uint __ovld atom_or(volatile __global uint *, uint); #endif #if defined(cl_khr_local_int32_extended_atomics) int __ovld atom_or(volatile __local int *, int); uint __ovld atom_or(volatile __local uint *, uint); #endif #if defined(cl_khr_int64_extended_atomics) long __ovld atom_or(volatile __global long *, long); ulong __ovld atom_or(volatile __global ulong *, ulong); long __ovld atom_or(volatile __local long *, long); ulong __ovld atom_or(volatile __local ulong *, ulong); #endif /** * Read the 32-bit value (referred to as old) * stored at location pointed by p. Compute * (old ^ val) and store result at location * pointed by p. The function returns old. */ int __ovld atomic_xor(volatile __global int *, int); uint __ovld atomic_xor(volatile __global uint *, uint); int __ovld atomic_xor(volatile __local int *, int); uint __ovld atomic_xor(volatile __local uint *, uint); #ifdef __OPENCL_CPP_VERSION__ int __ovld atomic_xor(volatile int *, int); uint __ovld atomic_xor(volatile uint *, uint); #endif #if defined(cl_khr_global_int32_extended_atomics) int __ovld atom_xor(volatile __global int *, int); uint __ovld atom_xor(volatile __global uint *, uint); #endif #if defined(cl_khr_local_int32_extended_atomics) int __ovld atom_xor(volatile __local int *, int); uint __ovld atom_xor(volatile __local uint *, uint); #endif #if defined(cl_khr_int64_extended_atomics) long __ovld atom_xor(volatile __global long *, long); ulong __ovld atom_xor(volatile __global ulong *, ulong); long __ovld atom_xor(volatile __local long *, long); ulong __ovld atom_xor(volatile __local ulong *, ulong); #endif #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #pragma OPENCL EXTENSION cl_khr_int64_base_atomics : disable #pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : disable #endif // OpenCL v2.0 s6.13.11 - Atomics Functions #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) // double atomics support requires extensions cl_khr_int64_base_atomics and cl_khr_int64_extended_atomics #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable #pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable #endif // atomic_init() #if defined(__opencl_c_generic_address_space) void __ovld atomic_init(volatile atomic_int *, int); void __ovld atomic_init(volatile atomic_uint *, uint); void __ovld atomic_init(volatile atomic_float *, float); #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) void __ovld atomic_init(volatile atomic_long *, long); void __ovld atomic_init(volatile atomic_ulong *, ulong); #ifdef cl_khr_fp64 void __ovld atomic_init(volatile atomic_double *, double); #endif //cl_khr_fp64 #endif #endif //defined(__opencl_c_generic_address_space) #if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) void __ovld atomic_init(volatile __global atomic_int *, int); void __ovld atomic_init(volatile __local atomic_int *, int); void __ovld atomic_init(volatile __global atomic_uint *, uint); void __ovld atomic_init(volatile __local atomic_uint *, uint); void __ovld atomic_init(volatile __global atomic_float *, float); void __ovld atomic_init(volatile __local atomic_float *, float); #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) void __ovld atomic_init(volatile __global atomic_long *, long); void __ovld atomic_init(volatile __local atomic_long *, long); void __ovld atomic_init(volatile __global atomic_ulong *, ulong); void __ovld atomic_init(volatile __local atomic_ulong *, ulong); #ifdef cl_khr_fp64 void __ovld atomic_init(volatile __global atomic_double *, double); void __ovld atomic_init(volatile __local atomic_double *, double); #endif //cl_khr_fp64 #endif #endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) // atomic_work_item_fence() void __ovld atomic_work_item_fence(cl_mem_fence_flags, memory_order, memory_scope); // atomic_fetch() // OpenCL v2.0 s6.13.11.7.5: // add/sub: atomic type argument can be uintptr_t/intptr_t, value type argument can be ptrdiff_t. #if defined(__opencl_c_atomic_order_seq_cst) && defined(__opencl_c_atomic_scope_device) #if defined(__opencl_c_generic_address_space) int __ovld atomic_fetch_add(volatile atomic_int *, int); uint __ovld atomic_fetch_add(volatile atomic_uint *, uint); int __ovld atomic_fetch_sub(volatile atomic_int *, int); uint __ovld atomic_fetch_sub(volatile atomic_uint *, uint); int __ovld atomic_fetch_or(volatile atomic_int *, int); uint __ovld atomic_fetch_or(volatile atomic_uint *, uint); int __ovld atomic_fetch_xor(volatile atomic_int *, int); uint __ovld atomic_fetch_xor(volatile atomic_uint *, uint); int __ovld atomic_fetch_and(volatile atomic_int *, int); uint __ovld atomic_fetch_and(volatile atomic_uint *, uint); int __ovld atomic_fetch_min(volatile atomic_int *, int); uint __ovld atomic_fetch_min(volatile atomic_uint *, uint); int __ovld atomic_fetch_max(volatile atomic_int *, int); uint __ovld atomic_fetch_max(volatile atomic_uint *, uint); #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) long __ovld atomic_fetch_add(volatile atomic_long *, long); ulong __ovld atomic_fetch_add(volatile atomic_ulong *, ulong); long __ovld atomic_fetch_sub(volatile atomic_long *, long); ulong __ovld atomic_fetch_sub(volatile atomic_ulong *, ulong); long __ovld atomic_fetch_or(volatile atomic_long *, long); ulong __ovld atomic_fetch_or(volatile atomic_ulong *, ulong); long __ovld atomic_fetch_xor(volatile atomic_long *, long); ulong __ovld atomic_fetch_xor(volatile atomic_ulong *, ulong); long __ovld atomic_fetch_and(volatile atomic_long *, long); ulong __ovld atomic_fetch_and(volatile atomic_ulong *, ulong); long __ovld atomic_fetch_min(volatile atomic_long *, long); ulong __ovld atomic_fetch_min(volatile atomic_ulong *, ulong); long __ovld atomic_fetch_max(volatile atomic_long *, long); ulong __ovld atomic_fetch_max(volatile atomic_ulong *, ulong); uintptr_t __ovld atomic_fetch_add(volatile atomic_uintptr_t *, ptrdiff_t); uintptr_t __ovld atomic_fetch_sub(volatile atomic_uintptr_t *, ptrdiff_t); #endif //defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #endif //defined(__opencl_c_generic_address_space) #if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) int __ovld atomic_fetch_add(volatile __global atomic_int *, int); int __ovld atomic_fetch_add(volatile __local atomic_int *, int); uint __ovld atomic_fetch_add(volatile __global atomic_uint *, uint); uint __ovld atomic_fetch_add(volatile __local atomic_uint *, uint); int __ovld atomic_fetch_sub(volatile __global atomic_int *, int); int __ovld atomic_fetch_sub(volatile __local atomic_int *, int); uint __ovld atomic_fetch_sub(volatile __global atomic_uint *, uint); uint __ovld atomic_fetch_sub(volatile __local atomic_uint *, uint); int __ovld atomic_fetch_or(volatile __global atomic_int *, int); int __ovld atomic_fetch_or(volatile __local atomic_int *, int); uint __ovld atomic_fetch_or(volatile __global atomic_uint *, uint); uint __ovld atomic_fetch_or(volatile __local atomic_uint *, uint); int __ovld atomic_fetch_xor(volatile __global atomic_int *, int); int __ovld atomic_fetch_xor(volatile __local atomic_int *, int); uint __ovld atomic_fetch_xor(volatile __global atomic_uint *, uint); uint __ovld atomic_fetch_xor(volatile __local atomic_uint *, uint); int __ovld atomic_fetch_and(volatile __global atomic_int *, int); int __ovld atomic_fetch_and(volatile __local atomic_int *, int); uint __ovld atomic_fetch_and(volatile __global atomic_uint *, uint); uint __ovld atomic_fetch_and(volatile __local atomic_uint *, uint); int __ovld atomic_fetch_min(volatile __global atomic_int *, int); int __ovld atomic_fetch_min(volatile __local atomic_int *, int); uint __ovld atomic_fetch_min(volatile __global atomic_uint *, uint); uint __ovld atomic_fetch_min(volatile __local atomic_uint *, uint); int __ovld atomic_fetch_max(volatile __global atomic_int *, int); int __ovld atomic_fetch_max(volatile __local atomic_int *, int); uint __ovld atomic_fetch_max(volatile __global atomic_uint *, uint); uint __ovld atomic_fetch_max(volatile __local atomic_uint *, uint); #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) long __ovld atomic_fetch_add(volatile __global atomic_long *, long); long __ovld atomic_fetch_add(volatile __local atomic_long *, long); ulong __ovld atomic_fetch_add(volatile __global atomic_ulong *, ulong); ulong __ovld atomic_fetch_add(volatile __local atomic_ulong *, ulong); uintptr_t __ovld atomic_fetch_add(volatile __global atomic_uintptr_t *, ptrdiff_t); uintptr_t __ovld atomic_fetch_add(volatile __local atomic_uintptr_t *, ptrdiff_t); long __ovld atomic_fetch_sub(volatile __global atomic_long *, long); long __ovld atomic_fetch_sub(volatile __local atomic_long *, long); ulong __ovld atomic_fetch_sub(volatile __global atomic_ulong *, ulong); ulong __ovld atomic_fetch_sub(volatile __local atomic_ulong *, ulong); uintptr_t __ovld atomic_fetch_sub(volatile __global atomic_uintptr_t *, ptrdiff_t); uintptr_t __ovld atomic_fetch_sub(volatile __local atomic_uintptr_t *, ptrdiff_t); long __ovld atomic_fetch_or(volatile __global atomic_long *, long); long __ovld atomic_fetch_or(volatile __local atomic_long *, long); ulong __ovld atomic_fetch_or(volatile __global atomic_ulong *, ulong); ulong __ovld atomic_fetch_or(volatile __local atomic_ulong *, ulong); uintptr_t __ovld atomic_fetch_or(volatile __global atomic_uintptr_t *, intptr_t); uintptr_t __ovld atomic_fetch_or(volatile __local atomic_uintptr_t *, intptr_t); intptr_t __ovld atomic_fetch_or(volatile __global atomic_intptr_t *, uintptr_t); intptr_t __ovld atomic_fetch_or(volatile __local atomic_intptr_t *, uintptr_t); long __ovld atomic_fetch_xor(volatile __global atomic_long *, long); long __ovld atomic_fetch_xor(volatile __local atomic_long *, long); ulong __ovld atomic_fetch_xor(volatile __global atomic_ulong *, ulong); ulong __ovld atomic_fetch_xor(volatile __local atomic_ulong *, ulong); uintptr_t __ovld atomic_fetch_xor(volatile __global atomic_uintptr_t *, intptr_t); uintptr_t __ovld atomic_fetch_xor(volatile __local atomic_uintptr_t *, intptr_t); intptr_t __ovld atomic_fetch_xor(volatile __global atomic_intptr_t *, uintptr_t); intptr_t __ovld atomic_fetch_xor(volatile __local atomic_intptr_t *, uintptr_t); long __ovld atomic_fetch_and(volatile __global atomic_long *, long); long __ovld atomic_fetch_and(volatile __local atomic_long *, long); ulong __ovld atomic_fetch_and(volatile __global atomic_ulong *, ulong); ulong __ovld atomic_fetch_and(volatile __local atomic_ulong *, ulong); uintptr_t __ovld atomic_fetch_and(volatile __global atomic_uintptr_t *, intptr_t); uintptr_t __ovld atomic_fetch_and(volatile __local atomic_uintptr_t *, intptr_t); intptr_t __ovld atomic_fetch_and(volatile __global atomic_intptr_t *, uintptr_t); intptr_t __ovld atomic_fetch_and(volatile __local atomic_intptr_t *, uintptr_t); long __ovld atomic_fetch_min(volatile __global atomic_long *, long); long __ovld atomic_fetch_min(volatile __local atomic_long *, long); ulong __ovld atomic_fetch_min(volatile __global atomic_ulong *, ulong); ulong __ovld atomic_fetch_min(volatile __local atomic_ulong *, ulong); uintptr_t __ovld atomic_fetch_min(volatile __global atomic_uintptr_t *, intptr_t); uintptr_t __ovld atomic_fetch_min(volatile __local atomic_uintptr_t *, intptr_t); intptr_t __ovld atomic_fetch_min(volatile __global atomic_intptr_t *, uintptr_t); intptr_t __ovld atomic_fetch_min(volatile __local atomic_intptr_t *, uintptr_t); long __ovld atomic_fetch_max(volatile __global atomic_long *, long); long __ovld atomic_fetch_max(volatile __local atomic_long *, long); ulong __ovld atomic_fetch_max(volatile __global atomic_ulong *, ulong); ulong __ovld atomic_fetch_max(volatile __local atomic_ulong *, ulong); uintptr_t __ovld atomic_fetch_max(volatile __global atomic_uintptr_t *, uintptr_t); uintptr_t __ovld atomic_fetch_max(volatile __local atomic_uintptr_t *, uintptr_t); #endif //defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) #endif #if defined(__opencl_c_atomic_scope_device) #if defined(__opencl_c_generic_address_space) int __ovld atomic_fetch_add_explicit(volatile atomic_int *, int, memory_order); uint __ovld atomic_fetch_add_explicit(volatile atomic_uint *, uint, memory_order); int __ovld atomic_fetch_sub_explicit(volatile atomic_int *, int, memory_order); uint __ovld atomic_fetch_sub_explicit(volatile atomic_uint *, uint, memory_order); int __ovld atomic_fetch_or_explicit(volatile atomic_int *, int, memory_order); uint __ovld atomic_fetch_or_explicit(volatile atomic_uint *, uint, memory_order); int __ovld atomic_fetch_xor_explicit(volatile atomic_int *, int, memory_order); uint __ovld atomic_fetch_xor_explicit(volatile atomic_uint *, uint, memory_order); int __ovld atomic_fetch_and_explicit(volatile atomic_int *, int, memory_order); uint __ovld atomic_fetch_and_explicit(volatile atomic_uint *, uint, memory_order); int __ovld atomic_fetch_min_explicit(volatile atomic_int *, int, memory_order); uint __ovld atomic_fetch_min_explicit(volatile atomic_uint *, uint, memory_order); int __ovld atomic_fetch_max_explicit(volatile atomic_int *, int, memory_order); uint __ovld atomic_fetch_max_explicit(volatile atomic_uint *, uint, memory_order); #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) long __ovld atomic_fetch_add_explicit(volatile atomic_long *, long, memory_order); ulong __ovld atomic_fetch_add_explicit(volatile atomic_ulong *, ulong, memory_order); long __ovld atomic_fetch_sub_explicit(volatile atomic_long *, long, memory_order); ulong __ovld atomic_fetch_sub_explicit(volatile atomic_ulong *, ulong, memory_order); long __ovld atomic_fetch_or_explicit(volatile atomic_long *, long, memory_order); ulong __ovld atomic_fetch_or_explicit(volatile atomic_ulong *, ulong, memory_order); long __ovld atomic_fetch_xor_explicit(volatile atomic_long *, long, memory_order); ulong __ovld atomic_fetch_xor_explicit(volatile atomic_ulong *, ulong, memory_order); long __ovld atomic_fetch_and_explicit(volatile atomic_long *, long, memory_order); ulong __ovld atomic_fetch_and_explicit(volatile atomic_ulong *, ulong, memory_order); long __ovld atomic_fetch_min_explicit(volatile atomic_long *, long, memory_order); ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong *, ulong, memory_order); long __ovld atomic_fetch_max_explicit(volatile atomic_long *, long, memory_order); ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *, ulong, memory_order); uintptr_t __ovld atomic_fetch_add_explicit(volatile atomic_uintptr_t *, ptrdiff_t, memory_order); uintptr_t __ovld atomic_fetch_sub_explicit(volatile atomic_uintptr_t *, ptrdiff_t, memory_order); #endif //defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #endif //defined(__opencl_c_generic_address_space) #if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) int __ovld atomic_fetch_add_explicit(volatile __global atomic_int *, int, memory_order); int __ovld atomic_fetch_add_explicit(volatile __local atomic_int *, int, memory_order); uint __ovld atomic_fetch_add_explicit(volatile __global atomic_uint *, uint, memory_order); uint __ovld atomic_fetch_add_explicit(volatile __local atomic_uint *, uint, memory_order); int __ovld atomic_fetch_sub_explicit(volatile __global atomic_int *, int, memory_order); int __ovld atomic_fetch_sub_explicit(volatile __local atomic_int *, int, memory_order); uint __ovld atomic_fetch_sub_explicit(volatile __global atomic_uint *, uint, memory_order); uint __ovld atomic_fetch_sub_explicit(volatile __local atomic_uint *, uint, memory_order); int __ovld atomic_fetch_or_explicit(volatile __global atomic_int *, int, memory_order); int __ovld atomic_fetch_or_explicit(volatile __local atomic_int *, int, memory_order); uint __ovld atomic_fetch_or_explicit(volatile __global atomic_uint *, uint, memory_order); uint __ovld atomic_fetch_or_explicit(volatile __local atomic_uint *, uint, memory_order); int __ovld atomic_fetch_xor_explicit(volatile __global atomic_int *, int, memory_order); int __ovld atomic_fetch_xor_explicit(volatile __local atomic_int *, int, memory_order); uint __ovld atomic_fetch_xor_explicit(volatile __global atomic_uint *, uint, memory_order); uint __ovld atomic_fetch_xor_explicit(volatile __local atomic_uint *, uint, memory_order); int __ovld atomic_fetch_and_explicit(volatile __global atomic_int *, int, memory_order); int __ovld atomic_fetch_and_explicit(volatile __local atomic_int *, int, memory_order); uint __ovld atomic_fetch_and_explicit(volatile __global atomic_uint *, uint, memory_order); uint __ovld atomic_fetch_and_explicit(volatile __local atomic_uint *, uint, memory_order); int __ovld atomic_fetch_min_explicit(volatile __global atomic_int *, int, memory_order); int __ovld atomic_fetch_min_explicit(volatile __local atomic_int *, int, memory_order); uint __ovld atomic_fetch_min_explicit(volatile __global atomic_uint *, uint, memory_order); uint __ovld atomic_fetch_min_explicit(volatile __local atomic_uint *, uint, memory_order); int __ovld atomic_fetch_max_explicit(volatile __global atomic_int *, int, memory_order); int __ovld atomic_fetch_max_explicit(volatile __local atomic_int *, int, memory_order); uint __ovld atomic_fetch_max_explicit(volatile __global atomic_uint *, uint, memory_order); uint __ovld atomic_fetch_max_explicit(volatile __local atomic_uint *, uint, memory_order); #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) long __ovld atomic_fetch_add_explicit(volatile __global atomic_long *, long, memory_order); long __ovld atomic_fetch_add_explicit(volatile __local atomic_long *, long, memory_order); ulong __ovld atomic_fetch_add_explicit(volatile __global atomic_ulong *, ulong, memory_order); ulong __ovld atomic_fetch_add_explicit(volatile __local atomic_ulong *, ulong, memory_order); uintptr_t __ovld atomic_fetch_add_explicit(volatile __global atomic_uintptr_t *, ptrdiff_t, memory_order); uintptr_t __ovld atomic_fetch_add_explicit(volatile __local atomic_uintptr_t *, ptrdiff_t, memory_order); long __ovld atomic_fetch_sub_explicit(volatile __global atomic_long *, long, memory_order); long __ovld atomic_fetch_sub_explicit(volatile __local atomic_long *, long, memory_order); ulong __ovld atomic_fetch_sub_explicit(volatile __global atomic_ulong *, ulong, memory_order); ulong __ovld atomic_fetch_sub_explicit(volatile __local atomic_ulong *, ulong, memory_order); uintptr_t __ovld atomic_fetch_sub_explicit(volatile __global atomic_uintptr_t *, ptrdiff_t, memory_order); uintptr_t __ovld atomic_fetch_sub_explicit(volatile __local atomic_uintptr_t *, ptrdiff_t, memory_order); long __ovld atomic_fetch_or_explicit(volatile __global atomic_long *, long, memory_order); long __ovld atomic_fetch_or_explicit(volatile __local atomic_long *, long, memory_order); ulong __ovld atomic_fetch_or_explicit(volatile __global atomic_ulong *, ulong, memory_order); ulong __ovld atomic_fetch_or_explicit(volatile __local atomic_ulong *, ulong, memory_order); uintptr_t __ovld atomic_fetch_or_explicit(volatile __global atomic_uintptr_t *, intptr_t, memory_order); uintptr_t __ovld atomic_fetch_or_explicit(volatile __local atomic_uintptr_t *, intptr_t, memory_order); intptr_t __ovld atomic_fetch_or_explicit(volatile __global atomic_intptr_t *, uintptr_t, memory_order); intptr_t __ovld atomic_fetch_or_explicit(volatile __local atomic_intptr_t *, uintptr_t, memory_order); long __ovld atomic_fetch_xor_explicit(volatile __global atomic_long *, long, memory_order); long __ovld atomic_fetch_xor_explicit(volatile __local atomic_long *, long, memory_order); ulong __ovld atomic_fetch_xor_explicit(volatile __global atomic_ulong *, ulong, memory_order); ulong __ovld atomic_fetch_xor_explicit(volatile __local atomic_ulong *, ulong, memory_order); uintptr_t __ovld atomic_fetch_xor_explicit(volatile __global atomic_uintptr_t *, intptr_t, memory_order); uintptr_t __ovld atomic_fetch_xor_explicit(volatile __local atomic_uintptr_t *, intptr_t, memory_order); intptr_t __ovld atomic_fetch_xor_explicit(volatile __global atomic_intptr_t *, uintptr_t, memory_order); intptr_t __ovld atomic_fetch_xor_explicit(volatile __local atomic_intptr_t *, uintptr_t, memory_order); long __ovld atomic_fetch_and_explicit(volatile __global atomic_long *, long, memory_order); long __ovld atomic_fetch_and_explicit(volatile __local atomic_long *, long, memory_order); ulong __ovld atomic_fetch_and_explicit(volatile __global atomic_ulong *, ulong, memory_order); ulong __ovld atomic_fetch_and_explicit(volatile __local atomic_ulong *, ulong, memory_order); uintptr_t __ovld atomic_fetch_and_explicit(volatile __global atomic_uintptr_t *, intptr_t, memory_order); uintptr_t __ovld atomic_fetch_and_explicit(volatile __local atomic_uintptr_t *, intptr_t, memory_order); intptr_t __ovld atomic_fetch_and_explicit(volatile __global atomic_intptr_t *, uintptr_t, memory_order); intptr_t __ovld atomic_fetch_and_explicit(volatile __local atomic_intptr_t *, uintptr_t, memory_order); long __ovld atomic_fetch_min_explicit(volatile __global atomic_long *, long, memory_order); long __ovld atomic_fetch_min_explicit(volatile __local atomic_long *, long, memory_order); ulong __ovld atomic_fetch_min_explicit(volatile __global atomic_ulong *, ulong, memory_order); ulong __ovld atomic_fetch_min_explicit(volatile __local atomic_ulong *, ulong, memory_order); uintptr_t __ovld atomic_fetch_min_explicit(volatile __global atomic_uintptr_t *, intptr_t, memory_order); uintptr_t __ovld atomic_fetch_min_explicit(volatile __local atomic_uintptr_t *, intptr_t, memory_order); intptr_t __ovld atomic_fetch_min_explicit(volatile __global atomic_intptr_t *, uintptr_t, memory_order); intptr_t __ovld atomic_fetch_min_explicit(volatile __local atomic_intptr_t *, uintptr_t, memory_order); long __ovld atomic_fetch_max_explicit(volatile __global atomic_long *, long, memory_order); long __ovld atomic_fetch_max_explicit(volatile __local atomic_long *, long, memory_order); ulong __ovld atomic_fetch_max_explicit(volatile __global atomic_ulong *, ulong, memory_order); ulong __ovld atomic_fetch_max_explicit(volatile __local atomic_ulong *, ulong, memory_order); uintptr_t __ovld atomic_fetch_max_explicit(volatile __global atomic_uintptr_t *, uintptr_t, memory_order); uintptr_t __ovld atomic_fetch_max_explicit(volatile __local atomic_uintptr_t *, uintptr_t, memory_order); #endif //defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) #endif #if defined(__opencl_c_generic_address_space) int __ovld atomic_fetch_add_explicit(volatile atomic_int *, int, memory_order, memory_scope); uint __ovld atomic_fetch_add_explicit(volatile atomic_uint *, uint, memory_order, memory_scope); int __ovld atomic_fetch_sub_explicit(volatile atomic_int *, int, memory_order, memory_scope); uint __ovld atomic_fetch_sub_explicit(volatile atomic_uint *, uint, memory_order, memory_scope); int __ovld atomic_fetch_or_explicit(volatile atomic_int *, int, memory_order, memory_scope); uint __ovld atomic_fetch_or_explicit(volatile atomic_uint *, uint, memory_order, memory_scope); int __ovld atomic_fetch_xor_explicit(volatile atomic_int *, int, memory_order, memory_scope); uint __ovld atomic_fetch_xor_explicit(volatile atomic_uint *, uint, memory_order, memory_scope); int __ovld atomic_fetch_and_explicit(volatile atomic_int *, int, memory_order, memory_scope); uint __ovld atomic_fetch_and_explicit(volatile atomic_uint *, uint, memory_order, memory_scope); int __ovld atomic_fetch_min_explicit(volatile atomic_int *, int, memory_order, memory_scope); uint __ovld atomic_fetch_min_explicit(volatile atomic_uint *, uint, memory_order, memory_scope); int __ovld atomic_fetch_max_explicit(volatile atomic_int *, int, memory_order, memory_scope); uint __ovld atomic_fetch_max_explicit(volatile atomic_uint *, uint, memory_order, memory_scope); #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) long __ovld atomic_fetch_add_explicit(volatile atomic_long *, long, memory_order, memory_scope); ulong __ovld atomic_fetch_add_explicit(volatile atomic_ulong *, ulong, memory_order, memory_scope); long __ovld atomic_fetch_sub_explicit(volatile atomic_long *, long, memory_order, memory_scope); ulong __ovld atomic_fetch_sub_explicit(volatile atomic_ulong *, ulong, memory_order, memory_scope); long __ovld atomic_fetch_or_explicit(volatile atomic_long *, long, memory_order, memory_scope); ulong __ovld atomic_fetch_or_explicit(volatile atomic_ulong *, ulong, memory_order, memory_scope); long __ovld atomic_fetch_xor_explicit(volatile atomic_long *, long, memory_order, memory_scope); ulong __ovld atomic_fetch_xor_explicit(volatile atomic_ulong *, ulong, memory_order, memory_scope); long __ovld atomic_fetch_and_explicit(volatile atomic_long *, long, memory_order, memory_scope); ulong __ovld atomic_fetch_and_explicit(volatile atomic_ulong *, ulong, memory_order, memory_scope); long __ovld atomic_fetch_min_explicit(volatile atomic_long *, long, memory_order, memory_scope); ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong *, ulong, memory_order, memory_scope); long __ovld atomic_fetch_max_explicit(volatile atomic_long *, long, memory_order, memory_scope); ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *, ulong, memory_order, memory_scope); uintptr_t __ovld atomic_fetch_add_explicit(volatile atomic_uintptr_t *, ptrdiff_t, memory_order, memory_scope); uintptr_t __ovld atomic_fetch_sub_explicit(volatile atomic_uintptr_t *, ptrdiff_t, memory_order, memory_scope); #endif #endif //defined(__opencl_c_generic_address_space) #if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) int __ovld atomic_fetch_add_explicit(volatile __global atomic_int *, int, memory_order, memory_scope); int __ovld atomic_fetch_add_explicit(volatile __local atomic_int *, int, memory_order, memory_scope); uint __ovld atomic_fetch_add_explicit(volatile __global atomic_uint *, uint, memory_order, memory_scope); uint __ovld atomic_fetch_add_explicit(volatile __local atomic_uint *, uint, memory_order, memory_scope); int __ovld atomic_fetch_sub_explicit(volatile __global atomic_int *, int, memory_order, memory_scope); int __ovld atomic_fetch_sub_explicit(volatile __local atomic_int *, int, memory_order, memory_scope); uint __ovld atomic_fetch_sub_explicit(volatile __global atomic_uint *, uint, memory_order, memory_scope); uint __ovld atomic_fetch_sub_explicit(volatile __local atomic_uint *, uint, memory_order, memory_scope); int __ovld atomic_fetch_or_explicit(volatile __global atomic_int *, int, memory_order, memory_scope); int __ovld atomic_fetch_or_explicit(volatile __local atomic_int *, int, memory_order, memory_scope); uint __ovld atomic_fetch_or_explicit(volatile __global atomic_uint *, uint, memory_order, memory_scope); uint __ovld atomic_fetch_or_explicit(volatile __local atomic_uint *, uint, memory_order, memory_scope); int __ovld atomic_fetch_xor_explicit(volatile __global atomic_int *, int, memory_order, memory_scope); int __ovld atomic_fetch_xor_explicit(volatile __local atomic_int *, int, memory_order, memory_scope); uint __ovld atomic_fetch_xor_explicit(volatile __global atomic_uint *, uint, memory_order, memory_scope); uint __ovld atomic_fetch_xor_explicit(volatile __local atomic_uint *, uint, memory_order, memory_scope); int __ovld atomic_fetch_and_explicit(volatile __global atomic_int *, int, memory_order, memory_scope); int __ovld atomic_fetch_and_explicit(volatile __local atomic_int *, int, memory_order, memory_scope); uint __ovld atomic_fetch_and_explicit(volatile __global atomic_uint *, uint, memory_order, memory_scope); uint __ovld atomic_fetch_and_explicit(volatile __local atomic_uint *, uint, memory_order, memory_scope); int __ovld atomic_fetch_min_explicit(volatile __global atomic_int *, int, memory_order, memory_scope); int __ovld atomic_fetch_min_explicit(volatile __local atomic_int *, int, memory_order, memory_scope); uint __ovld atomic_fetch_min_explicit(volatile __global atomic_uint *, uint, memory_order, memory_scope); uint __ovld atomic_fetch_min_explicit(volatile __local atomic_uint *, uint, memory_order, memory_scope); int __ovld atomic_fetch_max_explicit(volatile __global atomic_int *, int, memory_order, memory_scope); int __ovld atomic_fetch_max_explicit(volatile __local atomic_int *, int, memory_order, memory_scope); uint __ovld atomic_fetch_max_explicit(volatile __global atomic_uint *, uint, memory_order, memory_scope); uint __ovld atomic_fetch_max_explicit(volatile __local atomic_uint *, uint, memory_order, memory_scope); #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) long __ovld atomic_fetch_add_explicit(volatile __global atomic_long *, long, memory_order, memory_scope); long __ovld atomic_fetch_add_explicit(volatile __local atomic_long *, long, memory_order, memory_scope); uintptr_t __ovld atomic_fetch_add_explicit(volatile __global atomic_uintptr_t *, ptrdiff_t, memory_order, memory_scope); uintptr_t __ovld atomic_fetch_add_explicit(volatile __local atomic_uintptr_t *, ptrdiff_t, memory_order, memory_scope); ulong __ovld atomic_fetch_add_explicit(volatile __global atomic_ulong *, ulong, memory_order, memory_scope); ulong __ovld atomic_fetch_add_explicit(volatile __local atomic_ulong *, ulong, memory_order, memory_scope); long __ovld atomic_fetch_sub_explicit(volatile __global atomic_long *, long, memory_order, memory_scope); long __ovld atomic_fetch_sub_explicit(volatile __local atomic_long *, long, memory_order, memory_scope); ulong __ovld atomic_fetch_sub_explicit(volatile __global atomic_ulong *, ulong, memory_order, memory_scope); ulong __ovld atomic_fetch_sub_explicit(volatile __local atomic_ulong *, ulong, memory_order, memory_scope); uintptr_t __ovld atomic_fetch_sub_explicit(volatile __global atomic_uintptr_t *, ptrdiff_t, memory_order, memory_scope); uintptr_t __ovld atomic_fetch_sub_explicit(volatile __local atomic_uintptr_t *, ptrdiff_t, memory_order, memory_scope); long __ovld atomic_fetch_or_explicit(volatile __global atomic_long *, long, memory_order, memory_scope); long __ovld atomic_fetch_or_explicit(volatile __local atomic_long *, long, memory_order, memory_scope); ulong __ovld atomic_fetch_or_explicit(volatile __global atomic_ulong *, ulong, memory_order, memory_scope); ulong __ovld atomic_fetch_or_explicit(volatile __local atomic_ulong *, ulong, memory_order, memory_scope); uintptr_t __ovld atomic_fetch_or_explicit(volatile __global atomic_uintptr_t *, intptr_t, memory_order, memory_scope); uintptr_t __ovld atomic_fetch_or_explicit(volatile __local atomic_uintptr_t *, intptr_t, memory_order, memory_scope); intptr_t __ovld atomic_fetch_or_explicit(volatile __global atomic_intptr_t *, uintptr_t, memory_order, memory_scope); intptr_t __ovld atomic_fetch_or_explicit(volatile __local atomic_intptr_t *, uintptr_t, memory_order, memory_scope); long __ovld atomic_fetch_xor_explicit(volatile __global atomic_long *, long, memory_order, memory_scope); long __ovld atomic_fetch_xor_explicit(volatile __local atomic_long *, long, memory_order, memory_scope); ulong __ovld atomic_fetch_xor_explicit(volatile __global atomic_ulong *, ulong, memory_order, memory_scope); ulong __ovld atomic_fetch_xor_explicit(volatile __local atomic_ulong *, ulong, memory_order, memory_scope); uintptr_t __ovld atomic_fetch_xor_explicit(volatile __global atomic_uintptr_t *, intptr_t, memory_order, memory_scope); uintptr_t __ovld atomic_fetch_xor_explicit(volatile __local atomic_uintptr_t *, intptr_t, memory_order, memory_scope); intptr_t __ovld atomic_fetch_xor_explicit(volatile __global atomic_intptr_t *, uintptr_t, memory_order, memory_scope); intptr_t __ovld atomic_fetch_xor_explicit(volatile __local atomic_intptr_t *, uintptr_t, memory_order, memory_scope); long __ovld atomic_fetch_and_explicit(volatile __global atomic_long *, long, memory_order, memory_scope); long __ovld atomic_fetch_and_explicit(volatile __local atomic_long *, long, memory_order, memory_scope); ulong __ovld atomic_fetch_and_explicit(volatile __global atomic_ulong *, ulong, memory_order, memory_scope); ulong __ovld atomic_fetch_and_explicit(volatile __local atomic_ulong *, ulong, memory_order, memory_scope); uintptr_t __ovld atomic_fetch_and_explicit(volatile __global atomic_uintptr_t *, intptr_t, memory_order, memory_scope); uintptr_t __ovld atomic_fetch_and_explicit(volatile __local atomic_uintptr_t *, intptr_t, memory_order, memory_scope); intptr_t __ovld atomic_fetch_and_explicit(volatile __global atomic_intptr_t *, uintptr_t, memory_order, memory_scope); intptr_t __ovld atomic_fetch_and_explicit(volatile __local atomic_intptr_t *, uintptr_t, memory_order, memory_scope); long __ovld atomic_fetch_min_explicit(volatile __global atomic_long *, long, memory_order, memory_scope); long __ovld atomic_fetch_min_explicit(volatile __local atomic_long *, long, memory_order, memory_scope); ulong __ovld atomic_fetch_min_explicit(volatile __global atomic_ulong *, ulong, memory_order, memory_scope); ulong __ovld atomic_fetch_min_explicit(volatile __local atomic_ulong *, ulong, memory_order, memory_scope); uintptr_t __ovld atomic_fetch_min_explicit(volatile __global atomic_uintptr_t *, intptr_t, memory_order, memory_scope); uintptr_t __ovld atomic_fetch_min_explicit(volatile __local atomic_uintptr_t *, intptr_t, memory_order, memory_scope); intptr_t __ovld atomic_fetch_min_explicit(volatile __global atomic_intptr_t *, uintptr_t, memory_order, memory_scope); intptr_t __ovld atomic_fetch_min_explicit(volatile __local atomic_intptr_t *, uintptr_t, memory_order, memory_scope); long __ovld atomic_fetch_max_explicit(volatile __global atomic_long *, long, memory_order, memory_scope); long __ovld atomic_fetch_max_explicit(volatile __local atomic_long *, long, memory_order, memory_scope); ulong __ovld atomic_fetch_max_explicit(volatile __global atomic_ulong *, ulong, memory_order, memory_scope); ulong __ovld atomic_fetch_max_explicit(volatile __local atomic_ulong *, ulong, memory_order, memory_scope); uintptr_t __ovld atomic_fetch_max_explicit(volatile __global atomic_uintptr_t *, uintptr_t, memory_order, memory_scope); uintptr_t __ovld atomic_fetch_max_explicit(volatile __local atomic_uintptr_t *, uintptr_t, memory_order, memory_scope); #endif //defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) // The functionality added by cl_ext_float_atomics extension #if defined(cl_ext_float_atomics) #if defined(__opencl_c_ext_fp16_global_atomic_load_store) void __ovld atomic_store(volatile __global atomic_half *, half); void __ovld atomic_store_explicit(volatile __global atomic_half *, half, memory_order); void __ovld atomic_store_explicit(volatile __global atomic_half *, half, memory_order, memory_scope); half __ovld atomic_load(volatile __global atomic_half *); half __ovld atomic_load_explicit(volatile __global atomic_half *, memory_order); half __ovld atomic_load_explicit(volatile __global atomic_half *, memory_order, memory_scope); half __ovld atomic_exchange(volatile __global atomic_half *, half); half __ovld atomic_exchange_explicit(volatile __global atomic_half *, half, memory_order); half __ovld atomic_exchange_explicit(volatile __global atomic_half *, half, memory_order, memory_scope); #endif // defined(__opencl_c_ext_fp16_global_atomic_load_store) #if defined(__opencl_c_ext_fp16_local_atomic_load_store) void __ovld atomic_store(volatile __local atomic_half *, half); void __ovld atomic_store_explicit(volatile __local atomic_half *, half, memory_order); void __ovld atomic_store_explicit(volatile __local atomic_half *, half, memory_order, memory_scope); half __ovld atomic_load(volatile __local atomic_half *); half __ovld atomic_load_explicit(volatile __local atomic_half *, memory_order); half __ovld atomic_load_explicit(volatile __local atomic_half *, memory_order, memory_scope); half __ovld atomic_exchange(volatile __local atomic_half *, half); half __ovld atomic_exchange_explicit(volatile __local atomic_half *, half, memory_order); half __ovld atomic_exchange_explicit(volatile __local atomic_half *, half, memory_order, memory_scope); #endif // defined(__opencl_c_ext_fp16_local_atomic_load_store) #if defined(__opencl_c_ext_fp16_global_atomic_load_store) && \ defined(__opencl_c_ext_fp16_local_atomic_load_store) void __ovld atomic_store(volatile atomic_half *, half); void __ovld atomic_store_explicit(volatile atomic_half *, half, memory_order); void __ovld atomic_store_explicit(volatile atomic_half *, half, memory_order, memory_scope); half __ovld atomic_load(volatile atomic_half *); half __ovld atomic_load_explicit(volatile atomic_half *, memory_order); half __ovld atomic_load_explicit(volatile atomic_half *, memory_order, memory_scope); half __ovld atomic_exchange(volatile atomic_half *, half); half __ovld atomic_exchange_explicit(volatile atomic_half *, half, memory_order); half __ovld atomic_exchange_explicit(volatile atomic_half *, half, memory_order, memory_scope); #endif // defined(__opencl_c_ext_fp16_global_atomic_load_store) && // defined(__opencl_c_ext_fp16_local_atomic_load_store) #if defined(__opencl_c_ext_fp16_global_atomic_min_max) half __ovld atomic_fetch_min(volatile __global atomic_half *, half); half __ovld atomic_fetch_max(volatile __global atomic_half *, half); half __ovld atomic_fetch_min_explicit(volatile __global atomic_half *, half, memory_order); half __ovld atomic_fetch_max_explicit(volatile __global atomic_half *, half, memory_order); half __ovld atomic_fetch_min_explicit(volatile __global atomic_half *, half, memory_order, memory_scope); half __ovld atomic_fetch_max_explicit(volatile __global atomic_half *, half, memory_order, memory_scope); #endif // defined(__opencl_c_ext_fp16_global_atomic_min_max) #if defined(__opencl_c_ext_fp16_local_atomic_min_max) half __ovld atomic_fetch_min(volatile __local atomic_half *, half); half __ovld atomic_fetch_max(volatile __local atomic_half *, half); half __ovld atomic_fetch_min_explicit(volatile __local atomic_half *, half, memory_order); half __ovld atomic_fetch_max_explicit(volatile __local atomic_half *, half, memory_order); half __ovld atomic_fetch_min_explicit(volatile __local atomic_half *, half, memory_order, memory_scope); half __ovld atomic_fetch_max_explicit(volatile __local atomic_half *, half, memory_order, memory_scope); #endif // defined(__opencl_c_ext_fp16_local_atomic_min_max) #if defined(__opencl_c_ext_fp16_global_atomic_min_max) && \ defined(__opencl_c_ext_fp16_local_atomic_min_max) half __ovld atomic_fetch_min(volatile atomic_half *, half); half __ovld atomic_fetch_max(volatile atomic_half *, half); half __ovld atomic_fetch_min_explicit(volatile atomic_half *, half, memory_order); half __ovld atomic_fetch_max_explicit(volatile atomic_half *, half, memory_order); half __ovld atomic_fetch_min_explicit(volatile atomic_half *, half, memory_order, memory_scope); half __ovld atomic_fetch_max_explicit(volatile atomic_half *, half, memory_order, memory_scope); #endif // defined(__opencl_c_ext_fp16_global_atomic_min_max) && \ defined(__opencl_c_ext_fp16_local_atomic_min_max) #if defined(__opencl_c_ext_fp32_global_atomic_min_max) float __ovld atomic_fetch_min(volatile __global atomic_float *, float); float __ovld atomic_fetch_max(volatile __global atomic_float *, float); float __ovld atomic_fetch_min_explicit(volatile __global atomic_float *, float, memory_order); float __ovld atomic_fetch_max_explicit(volatile __global atomic_float *, float, memory_order); float __ovld atomic_fetch_min_explicit(volatile __global atomic_float *, float, memory_order, memory_scope); float __ovld atomic_fetch_max_explicit(volatile __global atomic_float *, float, memory_order, memory_scope); #endif // defined(__opencl_c_ext_fp32_global_atomic_min_max) #if defined(__opencl_c_ext_fp32_local_atomic_min_max) float __ovld atomic_fetch_min(volatile __local atomic_float *, float); float __ovld atomic_fetch_max(volatile __local atomic_float *, float); float __ovld atomic_fetch_min_explicit(volatile __local atomic_float *, float, memory_order); float __ovld atomic_fetch_max_explicit(volatile __local atomic_float *, float, memory_order); float __ovld atomic_fetch_min_explicit(volatile __local atomic_float *, float, memory_order, memory_scope); float __ovld atomic_fetch_max_explicit(volatile __local atomic_float *, float, memory_order, memory_scope); #endif // defined(__opencl_c_ext_fp32_local_atomic_min_max) #if defined(__opencl_c_ext_fp32_global_atomic_min_max) && \ defined(__opencl_c_ext_fp32_local_atomic_min_max) float __ovld atomic_fetch_min(volatile atomic_float *, float); float __ovld atomic_fetch_max(volatile atomic_float *, float); float __ovld atomic_fetch_min_explicit(volatile atomic_float *, float, memory_order); float __ovld atomic_fetch_max_explicit(volatile atomic_float *, float, memory_order); float __ovld atomic_fetch_min_explicit(volatile atomic_float *, float, memory_order, memory_scope); float __ovld atomic_fetch_max_explicit(volatile atomic_float *, float, memory_order, memory_scope); #endif // defined(__opencl_c_ext_fp32_global_atomic_min_max) && \ defined(__opencl_c_ext_fp32_local_atomic_min_max) #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #if defined(__opencl_c_ext_fp64_global_atomic_min_max) double __ovld atomic_fetch_min(volatile __global atomic_double *, double); double __ovld atomic_fetch_max(volatile __global atomic_double *, double); double __ovld atomic_fetch_min_explicit(volatile __global atomic_double *, double, memory_order); double __ovld atomic_fetch_max_explicit(volatile __global atomic_double *, double, memory_order); double __ovld atomic_fetch_min_explicit(volatile __global atomic_double *, double, memory_order, memory_scope); double __ovld atomic_fetch_max_explicit(volatile __global atomic_double *, double, memory_order, memory_scope); #endif // defined(__opencl_c_ext_fp64_global_atomic_min_max) #if defined(__opencl_c_ext_fp64_local_atomic_min_max) double __ovld atomic_fetch_min(volatile __local atomic_double *, double); double __ovld atomic_fetch_max(volatile __local atomic_double *, double); double __ovld atomic_fetch_min_explicit(volatile __local atomic_double *, double, memory_order); double __ovld atomic_fetch_max_explicit(volatile __local atomic_double *, double, memory_order); double __ovld atomic_fetch_min_explicit(volatile __local atomic_double *, double, memory_order, memory_scope); double __ovld atomic_fetch_max_explicit(volatile __local atomic_double *, double, memory_order, memory_scope); #endif // defined(__opencl_c_ext_fp64_local_atomic_min_max) #if defined(__opencl_c_ext_fp64_global_atomic_min_max) && \ defined(__opencl_c_ext_fp64_local_atomic_min_max) double __ovld atomic_fetch_min(volatile atomic_double *, double); double __ovld atomic_fetch_max(volatile atomic_double *, double); double __ovld atomic_fetch_min_explicit(volatile atomic_double *, double, memory_order); double __ovld atomic_fetch_max_explicit(volatile atomic_double *, double, memory_order); double __ovld atomic_fetch_min_explicit(volatile atomic_double *, double, memory_order, memory_scope); double __ovld atomic_fetch_max_explicit(volatile atomic_double *, double, memory_order, memory_scope); #endif // defined(__opencl_c_ext_fp64_global_atomic_min_max) && \ defined(__opencl_c_ext_fp64_local_atomic_min_max) #endif // defined(cl_khr_int64_base_atomics) && \ defined(cl_khr_int64_extended_atomics) #if defined(__opencl_c_ext_fp16_global_atomic_add) half __ovld atomic_fetch_add(volatile __global atomic_half *, half); half __ovld atomic_fetch_sub(volatile __global atomic_half *, half); half __ovld atomic_fetch_add_explicit(volatile __global atomic_half *, half, memory_order); half __ovld atomic_fetch_sub_explicit(volatile __global atomic_half *, half, memory_order); half __ovld atomic_fetch_add_explicit(volatile __global atomic_half *, half, memory_order, memory_scope); half __ovld atomic_fetch_sub_explicit(volatile __global atomic_half *, half, memory_order, memory_scope); #endif // defined(__opencl_c_ext_fp16_global_atomic_add) #if defined(__opencl_c_ext_fp16_local_atomic_add) half __ovld atomic_fetch_add(volatile __local atomic_half *, half); half __ovld atomic_fetch_sub(volatile __local atomic_half *, half); half __ovld atomic_fetch_add_explicit(volatile __local atomic_half *, half, memory_order); half __ovld atomic_fetch_sub_explicit(volatile __local atomic_half *, half, memory_order); half __ovld atomic_fetch_add_explicit(volatile __local atomic_half *, half, memory_order, memory_scope); half __ovld atomic_fetch_sub_explicit(volatile __local atomic_half *, half, memory_order, memory_scope); #endif // defined(__opencl_c_ext_fp16_local_atomic_add) #if defined(__opencl_c_ext_fp16_global_atomic_add) && \ defined(__opencl_c_ext_fp16_local_atomic_add) half __ovld atomic_fetch_add(volatile atomic_half *, half); half __ovld atomic_fetch_sub(volatile atomic_half *, half); half __ovld atomic_fetch_add_explicit(volatile atomic_half *, half, memory_order); half __ovld atomic_fetch_sub_explicit(volatile atomic_half *, half, memory_order); half __ovld atomic_fetch_add_explicit(volatile atomic_half *, half, memory_order, memory_scope); half __ovld atomic_fetch_sub_explicit(volatile atomic_half *, half, memory_order, memory_scope); #endif // defined(__opencl_c_ext_fp16_global_atomic_add) && \ defined(__opencl_c_ext_fp16_local_atomic_add) #if defined(__opencl_c_ext_fp32_global_atomic_add) float __ovld atomic_fetch_add(volatile __global atomic_float *, float); float __ovld atomic_fetch_sub(volatile __global atomic_float *, float); float __ovld atomic_fetch_add_explicit(volatile __global atomic_float *, float, memory_order); float __ovld atomic_fetch_sub_explicit(volatile __global atomic_float *, float, memory_order); float __ovld atomic_fetch_add_explicit(volatile __global atomic_float *, float, memory_order, memory_scope); float __ovld atomic_fetch_sub_explicit(volatile __global atomic_float *, float, memory_order, memory_scope); #endif // defined(__opencl_c_ext_fp32_global_atomic_add) #if defined(__opencl_c_ext_fp32_local_atomic_add) float __ovld atomic_fetch_add(volatile __local atomic_float *, float); float __ovld atomic_fetch_sub(volatile __local atomic_float *, float); float __ovld atomic_fetch_add_explicit(volatile __local atomic_float *, float, memory_order); float __ovld atomic_fetch_sub_explicit(volatile __local atomic_float *, float, memory_order); float __ovld atomic_fetch_add_explicit(volatile __local atomic_float *, float, memory_order, memory_scope); float __ovld atomic_fetch_sub_explicit(volatile __local atomic_float *, float, memory_order, memory_scope); #endif // defined(__opencl_c_ext_fp32_local_atomic_add) #if defined(__opencl_c_ext_fp32_global_atomic_add) && \ defined(__opencl_c_ext_fp32_local_atomic_add) float __ovld atomic_fetch_add(volatile atomic_float *, float); float __ovld atomic_fetch_sub(volatile atomic_float *, float); float __ovld atomic_fetch_add_explicit(volatile atomic_float *, float, memory_order); float __ovld atomic_fetch_sub_explicit(volatile atomic_float *, float, memory_order); float __ovld atomic_fetch_add_explicit(volatile atomic_float *, float, memory_order, memory_scope); float __ovld atomic_fetch_sub_explicit(volatile atomic_float *, float, memory_order, memory_scope); #endif // defined(__opencl_c_ext_fp32_global_atomic_add) && \ defined(__opencl_c_ext_fp32_local_atomic_add) #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #if defined(__opencl_c_ext_fp64_global_atomic_add) double __ovld atomic_fetch_add(volatile __global atomic_double *, double); double __ovld atomic_fetch_sub(volatile __global atomic_double *, double); double __ovld atomic_fetch_add_explicit(volatile __global atomic_double *, double, memory_order); double __ovld atomic_fetch_sub_explicit(volatile __global atomic_double *, double, memory_order); double __ovld atomic_fetch_add_explicit(volatile __global atomic_double *, double, memory_order, memory_scope); double __ovld atomic_fetch_sub_explicit(volatile __global atomic_double *, double, memory_order, memory_scope); #endif // defined(__opencl_c_ext_fp64_global_atomic_add) #if defined(__opencl_c_ext_fp64_local_atomic_add) double __ovld atomic_fetch_add(volatile __local atomic_double *, double); double __ovld atomic_fetch_sub(volatile __local atomic_double *, double); double __ovld atomic_fetch_add_explicit(volatile __local atomic_double *, double, memory_order); double __ovld atomic_fetch_sub_explicit(volatile __local atomic_double *, double, memory_order); double __ovld atomic_fetch_add_explicit(volatile __local atomic_double *, double, memory_order, memory_scope); double __ovld atomic_fetch_sub_explicit(volatile __local atomic_double *, double, memory_order, memory_scope); #endif // defined(__opencl_c_ext_fp64_local_atomic_add) #if defined(__opencl_c_ext_fp64_global_atomic_add) && \ defined(__opencl_c_ext_fp64_local_atomic_add) double __ovld atomic_fetch_add(volatile atomic_double *, double); double __ovld atomic_fetch_sub(volatile atomic_double *, double); double __ovld atomic_fetch_add_explicit(volatile atomic_double *, double, memory_order); double __ovld atomic_fetch_sub_explicit(volatile atomic_double *, double, memory_order); double __ovld atomic_fetch_add_explicit(volatile atomic_double *, double, memory_order, memory_scope); double __ovld atomic_fetch_sub_explicit(volatile atomic_double *, double, memory_order, memory_scope); #endif // defined(__opencl_c_ext_fp64_global_atomic_add) && \ defined(__opencl_c_ext_fp64_local_atomic_add) #endif // defined(cl_khr_int64_base_atomics) && \ defined(cl_khr_int64_extended_atomics) #endif // cl_ext_float_atomics // atomic_store() #if defined(__opencl_c_atomic_order_seq_cst) && defined(__opencl_c_atomic_scope_device) #if defined(__opencl_c_generic_address_space) void __ovld atomic_store(volatile atomic_int *, int); void __ovld atomic_store(volatile atomic_uint *, uint); void __ovld atomic_store(volatile atomic_float *, float); #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #ifdef cl_khr_fp64 void __ovld atomic_store(volatile atomic_double *, double); #endif //cl_khr_fp64 void __ovld atomic_store(volatile atomic_long *, long); void __ovld atomic_store(volatile atomic_ulong *, ulong); #endif #endif //defined(__opencl_c_generic_address_space) #if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) void __ovld atomic_store(volatile __global atomic_int *, int); void __ovld atomic_store(volatile __local atomic_int *, int); void __ovld atomic_store(volatile __global atomic_uint *, uint); void __ovld atomic_store(volatile __local atomic_uint *, uint); void __ovld atomic_store(volatile __global atomic_float *, float); void __ovld atomic_store(volatile __local atomic_float *, float); #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #ifdef cl_khr_fp64 void __ovld atomic_store(volatile __global atomic_double *, double); void __ovld atomic_store(volatile __local atomic_double *, double); #endif //cl_khr_fp64 void __ovld atomic_store(volatile __global atomic_long *, long); void __ovld atomic_store(volatile __local atomic_long *, long); void __ovld atomic_store(volatile __global atomic_ulong *, ulong); void __ovld atomic_store(volatile __local atomic_ulong *, ulong); #endif //defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) #endif #if defined(__opencl_c_atomic_scope_device) #if defined(__opencl_c_generic_address_space) void __ovld atomic_store_explicit(volatile atomic_int *, int, memory_order); void __ovld atomic_store_explicit(volatile atomic_uint *, uint, memory_order); void __ovld atomic_store_explicit(volatile atomic_float *, float, memory_order); #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #ifdef cl_khr_fp64 void __ovld atomic_store_explicit(volatile atomic_double *, double, memory_order); #endif //cl_khr_fp64 void __ovld atomic_store_explicit(volatile atomic_long *, long, memory_order); void __ovld atomic_store_explicit(volatile atomic_ulong *, ulong, memory_order); #endif #endif //defined(__opencl_c_generic_address_space) #if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) void __ovld atomic_store_explicit(volatile __global atomic_int *, int, memory_order); void __ovld atomic_store_explicit(volatile __local atomic_int *, int, memory_order); void __ovld atomic_store_explicit(volatile __global atomic_uint *, uint, memory_order); void __ovld atomic_store_explicit(volatile __local atomic_uint *, uint, memory_order); void __ovld atomic_store_explicit(volatile __global atomic_float *, float, memory_order); void __ovld atomic_store_explicit(volatile __local atomic_float *, float, memory_order); #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #ifdef cl_khr_fp64 void __ovld atomic_store_explicit(volatile __global atomic_double *, double, memory_order); void __ovld atomic_store_explicit(volatile __local atomic_double *, double, memory_order); #endif void __ovld atomic_store_explicit(volatile __global atomic_long *, long, memory_order); void __ovld atomic_store_explicit(volatile __local atomic_long *, long, memory_order); void __ovld atomic_store_explicit(volatile __global atomic_ulong *, ulong, memory_order); void __ovld atomic_store_explicit(volatile __local atomic_ulong *, ulong, memory_order); #endif //defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) #endif #if defined(__opencl_c_generic_address_space) void __ovld atomic_store_explicit(volatile atomic_int *, int, memory_order, memory_scope); void __ovld atomic_store_explicit(volatile atomic_uint *, uint, memory_order, memory_scope); void __ovld atomic_store_explicit(volatile atomic_float *, float, memory_order, memory_scope); #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #ifdef cl_khr_fp64 void __ovld atomic_store_explicit(volatile atomic_double *, double, memory_order, memory_scope); #endif //cl_khr_fp64 void __ovld atomic_store_explicit(volatile atomic_long *, long, memory_order, memory_scope); void __ovld atomic_store_explicit(volatile atomic_ulong *, ulong, memory_order, memory_scope); #endif #endif //defined(__opencl_c_generic_address_space) #if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) void __ovld atomic_store_explicit(volatile __global atomic_int *, int, memory_order, memory_scope); void __ovld atomic_store_explicit(volatile __local atomic_int *, int, memory_order, memory_scope); void __ovld atomic_store_explicit(volatile __global atomic_uint *, uint, memory_order, memory_scope); void __ovld atomic_store_explicit(volatile __local atomic_uint *, uint, memory_order, memory_scope); void __ovld atomic_store_explicit(volatile __global atomic_float *, float, memory_order, memory_scope); void __ovld atomic_store_explicit(volatile __local atomic_float *, float, memory_order, memory_scope); #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #ifdef cl_khr_fp64 void __ovld atomic_store_explicit(volatile __global atomic_double *, double, memory_order, memory_scope); void __ovld atomic_store_explicit(volatile __local atomic_double *, double, memory_order, memory_scope); #endif //cl_khr_fp64 void __ovld atomic_store_explicit(volatile __global atomic_long *, long, memory_order, memory_scope); void __ovld atomic_store_explicit(volatile __local atomic_long *, long, memory_order, memory_scope); void __ovld atomic_store_explicit(volatile __global atomic_ulong *, ulong, memory_order, memory_scope); void __ovld atomic_store_explicit(volatile __local atomic_ulong *, ulong, memory_order, memory_scope); #endif //defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) // atomic_load() #if defined(__opencl_c_atomic_order_seq_cst) && defined(__opencl_c_atomic_scope_device) #if defined(__opencl_c_generic_address_space) int __ovld atomic_load(volatile atomic_int *); uint __ovld atomic_load(volatile atomic_uint *); float __ovld atomic_load(volatile atomic_float *); #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #ifdef cl_khr_fp64 double __ovld atomic_load(volatile atomic_double *); #endif //cl_khr_fp64 long __ovld atomic_load(volatile atomic_long *); ulong __ovld atomic_load(volatile atomic_ulong *); #endif #endif //defined(__opencl_c_generic_address_space) #if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) int __ovld atomic_load(volatile __global atomic_int *); int __ovld atomic_load(volatile __local atomic_int *); uint __ovld atomic_load(volatile __global atomic_uint *); uint __ovld atomic_load(volatile __local atomic_uint *); float __ovld atomic_load(volatile __global atomic_float *); float __ovld atomic_load(volatile __local atomic_float *); #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #ifdef cl_khr_fp64 double __ovld atomic_load(volatile __global atomic_double *); double __ovld atomic_load(volatile __local atomic_double *); #endif //cl_khr_fp64 long __ovld atomic_load(volatile __global atomic_long *); long __ovld atomic_load(volatile __local atomic_long *); ulong __ovld atomic_load(volatile __global atomic_ulong *); ulong __ovld atomic_load(volatile __local atomic_ulong *); #endif //defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) #endif #if defined(__opencl_c_atomic_scope_device) #if defined(__opencl_c_generic_address_space) int __ovld atomic_load_explicit(volatile atomic_int *, memory_order); uint __ovld atomic_load_explicit(volatile atomic_uint *, memory_order); float __ovld atomic_load_explicit(volatile atomic_float *, memory_order); #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #ifdef cl_khr_fp64 double __ovld atomic_load_explicit(volatile atomic_double *, memory_order); #endif //cl_khr_fp64 long __ovld atomic_load_explicit(volatile atomic_long *, memory_order); ulong __ovld atomic_load_explicit(volatile atomic_ulong *, memory_order); #endif #endif //defined(__opencl_c_generic_address_space) #if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) int __ovld atomic_load_explicit(volatile __global atomic_int *, memory_order); int __ovld atomic_load_explicit(volatile __local atomic_int *, memory_order); uint __ovld atomic_load_explicit(volatile __global atomic_uint *, memory_order); uint __ovld atomic_load_explicit(volatile __local atomic_uint *, memory_order); float __ovld atomic_load_explicit(volatile __global atomic_float *, memory_order); float __ovld atomic_load_explicit(volatile __local atomic_float *, memory_order); #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #ifdef cl_khr_fp64 double __ovld atomic_load_explicit(volatile __global atomic_double *, memory_order); double __ovld atomic_load_explicit(volatile __local atomic_double *, memory_order); #endif //cl_khr_fp64 long __ovld atomic_load_explicit(volatile __global atomic_long *, memory_order); long __ovld atomic_load_explicit(volatile __local atomic_long *, memory_order); ulong __ovld atomic_load_explicit(volatile __global atomic_ulong *, memory_order); ulong __ovld atomic_load_explicit(volatile __local atomic_ulong *, memory_order); #endif //defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) #endif #if defined(__opencl_c_generic_address_space) int __ovld atomic_load_explicit(volatile atomic_int *, memory_order, memory_scope); uint __ovld atomic_load_explicit(volatile atomic_uint *, memory_order, memory_scope); float __ovld atomic_load_explicit(volatile atomic_float *, memory_order, memory_scope); #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #ifdef cl_khr_fp64 double __ovld atomic_load_explicit(volatile atomic_double *, memory_order, memory_scope); #endif //cl_khr_fp64 long __ovld atomic_load_explicit(volatile atomic_long *, memory_order, memory_scope); ulong __ovld atomic_load_explicit(volatile atomic_ulong *, memory_order, memory_scope); #endif #endif //defined(__opencl_c_generic_address_space) #if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) int __ovld atomic_load_explicit(volatile __global atomic_int *, memory_order, memory_scope); int __ovld atomic_load_explicit(volatile __local atomic_int *, memory_order, memory_scope); uint __ovld atomic_load_explicit(volatile __global atomic_uint *, memory_order, memory_scope); uint __ovld atomic_load_explicit(volatile __local atomic_uint *, memory_order, memory_scope); float __ovld atomic_load_explicit(volatile __global atomic_float *, memory_order, memory_scope); float __ovld atomic_load_explicit(volatile __local atomic_float *, memory_order, memory_scope); #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #ifdef cl_khr_fp64 double __ovld atomic_load_explicit(volatile __global atomic_double *, memory_order, memory_scope); double __ovld atomic_load_explicit(volatile __local atomic_double *, memory_order, memory_scope); #endif long __ovld atomic_load_explicit(volatile __global atomic_long *, memory_order, memory_scope); long __ovld atomic_load_explicit(volatile __local atomic_long *, memory_order, memory_scope); ulong __ovld atomic_load_explicit(volatile __global atomic_ulong *, memory_order, memory_scope); ulong __ovld atomic_load_explicit(volatile __local atomic_ulong *, memory_order, memory_scope); #endif //defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) // atomic_exchange() #if defined(__opencl_c_atomic_order_seq_cst) && defined(__opencl_c_atomic_scope_device) #if defined(__opencl_c_generic_address_space) int __ovld atomic_exchange(volatile atomic_int *, int); uint __ovld atomic_exchange(volatile atomic_uint *, uint); float __ovld atomic_exchange(volatile atomic_float *, float); #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #ifdef cl_khr_fp64 double __ovld atomic_exchange(volatile atomic_double *, double); #endif //cl_khr_fp64 long __ovld atomic_exchange(volatile atomic_long *, long); ulong __ovld atomic_exchange(volatile atomic_ulong *, ulong); #endif #endif //defined(__opencl_c_generic_address_space) #if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) int __ovld atomic_exchange(volatile __global atomic_int *, int); int __ovld atomic_exchange(volatile __local atomic_int *, int); uint __ovld atomic_exchange(volatile __global atomic_uint *, uint); uint __ovld atomic_exchange(volatile __local atomic_uint *, uint); float __ovld atomic_exchange(volatile __global atomic_float *, float); float __ovld atomic_exchange(volatile __local atomic_float *, float); #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #ifdef cl_khr_fp64 double __ovld atomic_exchange(volatile __global atomic_double *, double); double __ovld atomic_exchange(volatile __local atomic_double *, double); #endif //cl_khr_fp64 long __ovld atomic_exchange(volatile __global atomic_long *, long); long __ovld atomic_exchange(volatile __local atomic_long *, long); ulong __ovld atomic_exchange(volatile __global atomic_ulong *, ulong); ulong __ovld atomic_exchange(volatile __local atomic_ulong *, ulong); #endif //defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) #endif #if defined(__opencl_c_atomic_scope_device) #if defined(__opencl_c_generic_address_space) int __ovld atomic_exchange_explicit(volatile atomic_int *, int, memory_order); uint __ovld atomic_exchange_explicit(volatile atomic_uint *, uint, memory_order); float __ovld atomic_exchange_explicit(volatile atomic_float *, float, memory_order); #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #ifdef cl_khr_fp64 double __ovld atomic_exchange_explicit(volatile atomic_double *, double, memory_order); #endif //cl_khr_fp64 long __ovld atomic_exchange_explicit(volatile atomic_long *, long, memory_order); ulong __ovld atomic_exchange_explicit(volatile atomic_ulong *, ulong, memory_order); #endif #endif //defined(__opencl_c_generic_address_space) #if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) int __ovld atomic_exchange_explicit(volatile __global atomic_int *, int, memory_order); int __ovld atomic_exchange_explicit(volatile __local atomic_int *, int, memory_order); uint __ovld atomic_exchange_explicit(volatile __global atomic_uint *, uint, memory_order); uint __ovld atomic_exchange_explicit(volatile __local atomic_uint *, uint, memory_order); float __ovld atomic_exchange_explicit(volatile __global atomic_float *, float, memory_order); float __ovld atomic_exchange_explicit(volatile __local atomic_float *, float, memory_order); #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #ifdef cl_khr_fp64 double __ovld atomic_exchange_explicit(volatile __global atomic_double *, double, memory_order); double __ovld atomic_exchange_explicit(volatile __local atomic_double *, double, memory_order); #endif //cl_khr_fp64 long __ovld atomic_exchange_explicit(volatile __global atomic_long *, long, memory_order); long __ovld atomic_exchange_explicit(volatile __local atomic_long *, long, memory_order); ulong __ovld atomic_exchange_explicit(volatile __global atomic_ulong *, ulong, memory_order); ulong __ovld atomic_exchange_explicit(volatile __local atomic_ulong *, ulong, memory_order); #endif //defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)wi #endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) #endif #if defined(__opencl_c_generic_address_space) int __ovld atomic_exchange_explicit(volatile atomic_int *, int, memory_order, memory_scope); uint __ovld atomic_exchange_explicit(volatile atomic_uint *, uint, memory_order, memory_scope); float __ovld atomic_exchange_explicit(volatile atomic_float *, float, memory_order, memory_scope); #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #ifdef cl_khr_fp64 double __ovld atomic_exchange_explicit(volatile atomic_double *, double, memory_order, memory_scope); #endif //cl_khr_fp64 long __ovld atomic_exchange_explicit(volatile atomic_long *, long, memory_order, memory_scope); ulong __ovld atomic_exchange_explicit(volatile atomic_ulong *, ulong, memory_order, memory_scope); #endif #endif //defined(__opencl_c_generic_address_space) #if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) int __ovld atomic_exchange_explicit(volatile __global atomic_int *, int, memory_order, memory_scope); int __ovld atomic_exchange_explicit(volatile __local atomic_int *, int, memory_order, memory_scope); uint __ovld atomic_exchange_explicit(volatile __global atomic_uint *, uint, memory_order, memory_scope); uint __ovld atomic_exchange_explicit(volatile __local atomic_uint *, uint, memory_order, memory_scope); float __ovld atomic_exchange_explicit(volatile __global atomic_float *, float, memory_order, memory_scope); float __ovld atomic_exchange_explicit(volatile __local atomic_float *, float, memory_order, memory_scope); #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #ifdef cl_khr_fp64 double __ovld atomic_exchange_explicit(volatile __global atomic_double *, double, memory_order, memory_scope); double __ovld atomic_exchange_explicit(volatile __local atomic_double *, double, memory_order, memory_scope); #endif //cl_khr_fp64 long __ovld atomic_exchange_explicit(volatile __global atomic_long *, long, memory_order, memory_scope); long __ovld atomic_exchange_explicit(volatile __local atomic_long *, long, memory_order, memory_scope); ulong __ovld atomic_exchange_explicit(volatile __global atomic_ulong *, ulong, memory_order, memory_scope); ulong __ovld atomic_exchange_explicit(volatile __local atomic_ulong *, ulong, memory_order, memory_scope); #endif //defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) // atomic_compare_exchange_strong() and atomic_compare_exchange_weak() #if defined(__opencl_c_atomic_order_seq_cst) && defined(__opencl_c_atomic_scope_device) #if defined(__opencl_c_generic_address_space) bool __ovld atomic_compare_exchange_strong(volatile atomic_int *, int *, int); bool __ovld atomic_compare_exchange_strong(volatile atomic_uint *, uint *, uint); bool __ovld atomic_compare_exchange_weak(volatile atomic_int *, int *, int); bool __ovld atomic_compare_exchange_weak(volatile atomic_uint *, uint *, uint); bool __ovld atomic_compare_exchange_strong(volatile atomic_float *, float *, float); bool __ovld atomic_compare_exchange_weak(volatile atomic_float *, float *, float); #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #ifdef cl_khr_fp64 bool __ovld atomic_compare_exchange_strong(volatile atomic_double *, double *, double); bool __ovld atomic_compare_exchange_weak(volatile atomic_double *, double *, double); #endif //cl_khr_fp64 bool __ovld atomic_compare_exchange_strong(volatile atomic_long *, long *, long); bool __ovld atomic_compare_exchange_weak(volatile atomic_long *, long *, long); bool __ovld atomic_compare_exchange_strong(volatile atomic_ulong *, ulong *, ulong); bool __ovld atomic_compare_exchange_weak(volatile atomic_ulong *, ulong *, ulong); #endif #endif //defined(__opencl_c_generic_address_space) #if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) bool __ovld atomic_compare_exchange_strong(volatile __global atomic_int *, __global int *, int); bool __ovld atomic_compare_exchange_strong(volatile __global atomic_int *, __local int *, int); bool __ovld atomic_compare_exchange_strong(volatile __global atomic_int *, __private int *, int); bool __ovld atomic_compare_exchange_strong(volatile __local atomic_int *, __global int *, int); bool __ovld atomic_compare_exchange_strong(volatile __local atomic_int *, __local int *, int); bool __ovld atomic_compare_exchange_strong(volatile __local atomic_int *, __private int *, int); bool __ovld atomic_compare_exchange_strong(volatile __global atomic_uint *, __global uint *, uint); bool __ovld atomic_compare_exchange_strong(volatile __global atomic_uint *, __local uint *, uint); bool __ovld atomic_compare_exchange_strong(volatile __global atomic_uint *, __private uint *, uint); bool __ovld atomic_compare_exchange_strong(volatile __local atomic_uint *, __global uint *, uint); bool __ovld atomic_compare_exchange_strong(volatile __local atomic_uint *, __local uint *, uint); bool __ovld atomic_compare_exchange_strong(volatile __local atomic_uint *, __private uint *, uint); bool __ovld atomic_compare_exchange_strong(volatile __global atomic_float *, __global float *, float); bool __ovld atomic_compare_exchange_strong(volatile __global atomic_float *, __local float *, float); bool __ovld atomic_compare_exchange_strong(volatile __global atomic_float *, __private float *, float); bool __ovld atomic_compare_exchange_strong(volatile __local atomic_float *, __global float *, float); bool __ovld atomic_compare_exchange_strong(volatile __local atomic_float *, __local float *, float); bool __ovld atomic_compare_exchange_strong(volatile __local atomic_float *, __private float *, float); bool __ovld atomic_compare_exchange_weak(volatile __global atomic_int *, __global int *, int); bool __ovld atomic_compare_exchange_weak(volatile __global atomic_int *, __local int *, int); bool __ovld atomic_compare_exchange_weak(volatile __global atomic_int *, __private int *, int); bool __ovld atomic_compare_exchange_weak(volatile __local atomic_int *, __global int *, int); bool __ovld atomic_compare_exchange_weak(volatile __local atomic_int *, __local int *, int); bool __ovld atomic_compare_exchange_weak(volatile __local atomic_int *, __private int *, int); bool __ovld atomic_compare_exchange_weak(volatile __global atomic_uint *, __global uint *, uint); bool __ovld atomic_compare_exchange_weak(volatile __global atomic_uint *, __local uint *, uint); bool __ovld atomic_compare_exchange_weak(volatile __global atomic_uint *, __private uint *, uint); bool __ovld atomic_compare_exchange_weak(volatile __local atomic_uint *, __global uint *, uint); bool __ovld atomic_compare_exchange_weak(volatile __local atomic_uint *, __local uint *, uint); bool __ovld atomic_compare_exchange_weak(volatile __local atomic_uint *, __private uint *, uint); bool __ovld atomic_compare_exchange_weak(volatile __global atomic_float *, __global float *, float); bool __ovld atomic_compare_exchange_weak(volatile __global atomic_float *, __local float *, float); bool __ovld atomic_compare_exchange_weak(volatile __global atomic_float *, __private float *, float); bool __ovld atomic_compare_exchange_weak(volatile __local atomic_float *, __global float *, float); bool __ovld atomic_compare_exchange_weak(volatile __local atomic_float *, __local float *, float); bool __ovld atomic_compare_exchange_weak(volatile __local atomic_float *, __private float *, float); #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #ifdef cl_khr_fp64 bool __ovld atomic_compare_exchange_strong(volatile __global atomic_double *, __global double *, double); bool __ovld atomic_compare_exchange_strong(volatile __global atomic_double *, __local double *, double); bool __ovld atomic_compare_exchange_strong(volatile __global atomic_double *, __private double *, double); bool __ovld atomic_compare_exchange_strong(volatile __local atomic_double *, __global double *, double); bool __ovld atomic_compare_exchange_strong(volatile __local atomic_double *, __local double *, double); bool __ovld atomic_compare_exchange_strong(volatile __local atomic_double *, __private double *, double); bool __ovld atomic_compare_exchange_weak(volatile __global atomic_double *, __global double *, double); bool __ovld atomic_compare_exchange_weak(volatile __global atomic_double *, __local double *, double); bool __ovld atomic_compare_exchange_weak(volatile __global atomic_double *, __private double *, double); bool __ovld atomic_compare_exchange_weak(volatile __local atomic_double *, __global double *, double); bool __ovld atomic_compare_exchange_weak(volatile __local atomic_double *, __local double *, double); bool __ovld atomic_compare_exchange_weak(volatile __local atomic_double *, __private double *, double); #endif //cl_khr_fp64 bool __ovld atomic_compare_exchange_strong(volatile __global atomic_long *, __global long *, long); bool __ovld atomic_compare_exchange_strong(volatile __global atomic_long *, __local long *, long); bool __ovld atomic_compare_exchange_strong(volatile __global atomic_long *, __private long *, long); bool __ovld atomic_compare_exchange_strong(volatile __local atomic_long *, __global long *, long); bool __ovld atomic_compare_exchange_strong(volatile __local atomic_long *, __local long *, long); bool __ovld atomic_compare_exchange_strong(volatile __local atomic_long *, __private long *, long); bool __ovld atomic_compare_exchange_strong(volatile __global atomic_ulong *, __global ulong *, ulong); bool __ovld atomic_compare_exchange_strong(volatile __global atomic_ulong *, __local ulong *, ulong); bool __ovld atomic_compare_exchange_strong(volatile __global atomic_ulong *, __private ulong *, ulong); bool __ovld atomic_compare_exchange_strong(volatile __local atomic_ulong *, __global ulong *, ulong); bool __ovld atomic_compare_exchange_strong(volatile __local atomic_ulong *, __local ulong *, ulong); bool __ovld atomic_compare_exchange_strong(volatile __local atomic_ulong *, __private ulong *, ulong); bool __ovld atomic_compare_exchange_weak(volatile __global atomic_long *, __global long *, long); bool __ovld atomic_compare_exchange_weak(volatile __global atomic_long *, __local long *, long); bool __ovld atomic_compare_exchange_weak(volatile __global atomic_long *, __private long *, long); bool __ovld atomic_compare_exchange_weak(volatile __local atomic_long *, __global long *, long); bool __ovld atomic_compare_exchange_weak(volatile __local atomic_long *, __local long *, long); bool __ovld atomic_compare_exchange_weak(volatile __local atomic_long *, __private long *, long); bool __ovld atomic_compare_exchange_weak(volatile __global atomic_ulong *, __global ulong *, ulong); bool __ovld atomic_compare_exchange_weak(volatile __global atomic_ulong *, __local ulong *, ulong); bool __ovld atomic_compare_exchange_weak(volatile __global atomic_ulong *, __private ulong *, ulong); bool __ovld atomic_compare_exchange_weak(volatile __local atomic_ulong *, __global ulong *, ulong); bool __ovld atomic_compare_exchange_weak(volatile __local atomic_ulong *, __local ulong *, ulong); bool __ovld atomic_compare_exchange_weak(volatile __local atomic_ulong *, __private ulong *, ulong); #endif //defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) #endif #if defined(__opencl_c_atomic_scope_device) #if defined(__opencl_c_generic_address_space) bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_int *, int *, int, memory_order, memory_order); bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_uint *, uint *, uint, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_int *, int *, int, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_uint *, uint *, uint, memory_order, memory_order); bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_float *, float *, float, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_float *, float *, float, memory_order, memory_order); #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #ifdef cl_khr_fp64 bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_double *, double *, double, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_double *, double *, double, memory_order, memory_order); #endif //cl_khr_fp64 bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_long *, long *, long, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_long *, long *, long, memory_order, memory_order); bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_ulong *, ulong *, ulong, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_ulong *, ulong *, ulong, memory_order, memory_order); #endif #endif //defined(__opencl_c_generic_address_space) #if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_int *, __global int *, int, memory_order, memory_order); bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_int *, __local int *, int, memory_order, memory_order); bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_int *, __private int *, int, memory_order, memory_order); bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_int *, __global int *, int, memory_order, memory_order); bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_int *, __local int *, int, memory_order, memory_order); bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_int *, __private int *, int, memory_order, memory_order); bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_uint *, __global uint *, uint, memory_order, memory_order); bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_uint *, __local uint *, uint, memory_order, memory_order); bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_uint *, __private uint *, uint, memory_order, memory_order); bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_uint *, __global uint *, uint, memory_order, memory_order); bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_uint *, __local uint *, uint, memory_order, memory_order); bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_uint *, __private uint *, uint, memory_order, memory_order); bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_float *, __global float *, float, memory_order, memory_order); bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_float *, __local float *, float, memory_order, memory_order); bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_float *, __private float *, float, memory_order, memory_order); bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_float *, __global float *, float, memory_order, memory_order); bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_float *, __local float *, float, memory_order, memory_order); bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_float *, __private float *, float, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_int *, __global int *, int, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_int *, __local int *, int, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_int *, __private int *, int, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_int *, __global int *, int, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_int *, __local int *, int, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_int *, __private int *, int, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_uint *, __global uint *, uint, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_uint *, __local uint *, uint, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_uint *, __private uint *, uint, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_uint *, __global uint *, uint, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_uint *, __local uint *, uint, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_uint *, __private uint *, uint, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_float *, __global float *, float, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_float *, __local float *, float, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_float *, __private float *, float, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_float *, __global float *, float, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_float *, __local float *, float, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_float *, __private float *, float, memory_order, memory_order); #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #ifdef cl_khr_fp64 bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_double *, __global double *, double, memory_order, memory_order); bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_double *, __local double *, double, memory_order, memory_order); bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_double *, __private double *, double, memory_order, memory_order); bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_double *, __global double *, double, memory_order, memory_order); bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_double *, __local double *, double, memory_order, memory_order); bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_double *, __private double *, double, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_double *, __global double *, double, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_double *, __local double *, double, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_double *, __private double *, double, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_double *, __global double *, double, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_double *, __local double *, double, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_double *, __private double *, double, memory_order, memory_order); #endif //cl_khr_fp64 bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_long *, __global long *, long, memory_order, memory_order); bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_long *, __local long *, long, memory_order, memory_order); bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_long *, __private long *, long, memory_order, memory_order); bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_long *, __global long *, long, memory_order, memory_order); bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_long *, __local long *, long, memory_order, memory_order); bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_long *, __private long *, long, memory_order, memory_order); bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_ulong *, __global ulong *, ulong, memory_order, memory_order); bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_ulong *, __local ulong *, ulong, memory_order, memory_order); bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_ulong *, __private ulong *, ulong, memory_order, memory_order); bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_ulong *, __global ulong *, ulong, memory_order, memory_order); bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_ulong *, __local ulong *, ulong, memory_order, memory_order); bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_ulong *, __private ulong *, ulong, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_long *, __global long *, long, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_long *, __local long *, long, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_long *, __private long *, long, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_long *, __global long *, long, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_long *, __local long *, long, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_long *, __private long *, long, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_ulong *, __global ulong *, ulong, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_ulong *, __local ulong *, ulong, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_ulong *, __private ulong *, ulong, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_ulong *, __global ulong *, ulong, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_ulong *, __local ulong *, ulong, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_ulong *, __private ulong *, ulong, memory_order, memory_order); #endif //defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) #endif //defined(__opencl_c_atomic_scope_device) #if defined(__opencl_c_generic_address_space) bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_int *, int *, int, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_uint *, uint *, uint, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_int *, int *, int, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_uint *, uint *, uint, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_float *, float *, float, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_float *, float *, float, memory_order, memory_order, memory_scope); #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #ifdef cl_khr_fp64 bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_double *, double *, double, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_double *, double *, double, memory_order, memory_order, memory_scope); #endif //cl_khr_fp64 bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_long *, long *, long, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_long *, long *, long, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_ulong *, ulong *, ulong, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_ulong *, ulong *, ulong, memory_order, memory_order, memory_scope); #endif #endif //defined(__opencl_c_generic_address_space) #if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_int *, __global int *, int, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_int *, __local int *, int, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_int *, __private int *, int, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_int *, __global int *, int, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_int *, __local int *, int, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_int *, __private int *, int, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_uint *, __global uint *, uint, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_uint *, __local uint *, uint, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_uint *, __private uint *, uint, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_uint *, __global uint *, uint, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_uint *, __local uint *, uint, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_uint *, __private uint *, uint, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_float *, __global float *, float, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_float *, __local float *, float, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_float *, __private float *, float, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_float *, __global float *, float, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_float *, __local float *, float, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_float *, __private float *, float, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_int *, __global int *, int, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_int *, __local int *, int, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_int *, __private int *, int, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_int *, __global int *, int, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_int *, __local int *, int, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_int *, __private int *, int, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_uint *, __global uint *, uint, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_uint *, __local uint *, uint, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_uint *, __private uint *, uint, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_uint *, __global uint *, uint, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_uint *, __local uint *, uint, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_uint *, __private uint *, uint, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_float *, __global float *, float, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_float *, __local float *, float, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_float *, __private float *, float, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_float *, __global float *, float, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_float *, __local float *, float, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_float *, __private float *, float, memory_order, memory_order, memory_scope); #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #ifdef cl_khr_fp64 bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_double *, __global double *, double, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_double *, __local double *, double, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_double *, __private double *, double, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_double *, __global double *, double, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_double *, __local double *, double, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_double *, __private double *, double, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_double *, __global double *, double, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_double *, __local double *, double, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_double *, __private double *, double, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_double *, __global double *, double, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_double *, __local double *, double, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_double *, __private double *, double, memory_order, memory_order, memory_scope); #endif //cl_khr_fp64 bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_long *, __global long *, long, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_long *, __local long *, long, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_long *, __private long *, long, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_long *, __global long *, long, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_long *, __local long *, long, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_long *, __private long *, long, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_ulong *, __global ulong *, ulong, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_ulong *, __local ulong *, ulong, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_ulong *, __private ulong *, ulong, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_ulong *, __global ulong *, ulong, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_ulong *, __local ulong *, ulong, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_ulong *, __private ulong *, ulong, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_long *, __global long *, long, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_long *, __local long *, long, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_long *, __private long *, long, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_long *, __global long *, long, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_long *, __local long *, long, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_long *, __private long *, long, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_ulong *, __global ulong *, ulong, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_ulong *, __local ulong *, ulong, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_ulong *, __private ulong *, ulong, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_ulong *, __global ulong *, ulong, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_ulong *, __local ulong *, ulong, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_ulong *, __private ulong *, ulong, memory_order, memory_order, memory_scope); #endif //defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) // atomic_flag_test_and_set() and atomic_flag_clear() #if defined(__opencl_c_atomic_order_seq_cst) && defined(__opencl_c_atomic_scope_device) #if defined(__opencl_c_generic_address_space) bool __ovld atomic_flag_test_and_set(volatile atomic_flag *); void __ovld atomic_flag_clear(volatile atomic_flag *); #endif //defined(__opencl_c_generic_address_space) #if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) bool __ovld atomic_flag_test_and_set(volatile __global atomic_flag *); bool __ovld atomic_flag_test_and_set(volatile __local atomic_flag *); void __ovld atomic_flag_clear(volatile __global atomic_flag *); void __ovld atomic_flag_clear(volatile __local atomic_flag *); #endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) #endif #if defined(__opencl_c_atomic_scope_device) #if defined(__opencl_c_generic_address_space) bool __ovld atomic_flag_test_and_set_explicit(volatile atomic_flag *, memory_order); void __ovld atomic_flag_clear_explicit(volatile atomic_flag *, memory_order); #endif //defined(__opencl_c_generic_address_space) #if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) bool __ovld atomic_flag_test_and_set_explicit(volatile __global atomic_flag *, memory_order); bool __ovld atomic_flag_test_and_set_explicit(volatile __local atomic_flag *, memory_order); void __ovld atomic_flag_clear_explicit(volatile __global atomic_flag *, memory_order); void __ovld atomic_flag_clear_explicit(volatile __local atomic_flag *, memory_order); #endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) #endif #if defined(__opencl_c_generic_address_space) bool __ovld atomic_flag_test_and_set_explicit(volatile atomic_flag *, memory_order, memory_scope); void __ovld atomic_flag_clear_explicit(volatile atomic_flag *, memory_order, memory_scope); #endif //defined(__opencl_c_generic_address_space) #if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) bool __ovld atomic_flag_test_and_set_explicit(volatile __global atomic_flag *, memory_order, memory_scope); bool __ovld atomic_flag_test_and_set_explicit(volatile __local atomic_flag *, memory_order, memory_scope); void __ovld atomic_flag_clear_explicit(volatile __global atomic_flag *, memory_order, memory_scope); void __ovld atomic_flag_clear_explicit(volatile __local atomic_flag *, memory_order, memory_scope); #endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) // OpenCL v1.1 s6.11.12, v1.2 s6.12.12, v2.0 s6.13.12 - Miscellaneous Vector Functions /** * The shuffle and shuffle2 built-in functions construct * a permutation of elements from one or two input * vectors respectively that are of the same type, * returning a vector with the same element type as the * input and length that is the same as the shuffle mask. * The size of each element in the mask must match the * size of each element in the result. For shuffle, only * the ilogb(2m-1) least significant bits of each mask * element are considered. For shuffle2, only the * ilogb(2m-1)+1 least significant bits of each mask * element are considered. Other bits in the mask shall * be ignored. * The elements of the input vectors are numbered from * left to right across one or both of the vectors. For this * purpose, the number of elements in a vector is given * by vec_step(gentypem). The shuffle mask operand * specifies, for each element of the result vector, which * element of the one or two input vectors the result * element gets. * Examples: * uint4 mask = (uint4)(3, 2, * 1, 0); * float4 a; * float4 r = shuffle(a, mask); * // r.s0123 = a.wzyx * uint8 mask = (uint8)(0, 1, 2, 3, * 4, 5, 6, 7); * float4 a, b; * float8 r = shuffle2(a, b, mask); * // r.s0123 = a.xyzw * // r.s4567 = b.xyzw * uint4 mask; * float8 a; * float4 b; * b = shuffle(a, mask); * Examples that are not valid are: * uint8 mask; * short16 a; * short8 b; * b = shuffle(a, mask); <- not valid */ char2 __ovld __cnfn shuffle(char2, uchar2); char2 __ovld __cnfn shuffle(char4, uchar2); char2 __ovld __cnfn shuffle(char8, uchar2); char2 __ovld __cnfn shuffle(char16, uchar2); uchar2 __ovld __cnfn shuffle(uchar2, uchar2); uchar2 __ovld __cnfn shuffle(uchar4, uchar2); uchar2 __ovld __cnfn shuffle(uchar8, uchar2); uchar2 __ovld __cnfn shuffle(uchar16, uchar2); short2 __ovld __cnfn shuffle(short2, ushort2); short2 __ovld __cnfn shuffle(short4, ushort2); short2 __ovld __cnfn shuffle(short8, ushort2); short2 __ovld __cnfn shuffle(short16, ushort2); ushort2 __ovld __cnfn shuffle(ushort2, ushort2); ushort2 __ovld __cnfn shuffle(ushort4, ushort2); ushort2 __ovld __cnfn shuffle(ushort8, ushort2); ushort2 __ovld __cnfn shuffle(ushort16, ushort2); int2 __ovld __cnfn shuffle(int2, uint2); int2 __ovld __cnfn shuffle(int4, uint2); int2 __ovld __cnfn shuffle(int8, uint2); int2 __ovld __cnfn shuffle(int16, uint2); uint2 __ovld __cnfn shuffle(uint2, uint2); uint2 __ovld __cnfn shuffle(uint4, uint2); uint2 __ovld __cnfn shuffle(uint8, uint2); uint2 __ovld __cnfn shuffle(uint16, uint2); long2 __ovld __cnfn shuffle(long2, ulong2); long2 __ovld __cnfn shuffle(long4, ulong2); long2 __ovld __cnfn shuffle(long8, ulong2); long2 __ovld __cnfn shuffle(long16, ulong2); ulong2 __ovld __cnfn shuffle(ulong2, ulong2); ulong2 __ovld __cnfn shuffle(ulong4, ulong2); ulong2 __ovld __cnfn shuffle(ulong8, ulong2); ulong2 __ovld __cnfn shuffle(ulong16, ulong2); float2 __ovld __cnfn shuffle(float2, uint2); float2 __ovld __cnfn shuffle(float4, uint2); float2 __ovld __cnfn shuffle(float8, uint2); float2 __ovld __cnfn shuffle(float16, uint2); char4 __ovld __cnfn shuffle(char2, uchar4); char4 __ovld __cnfn shuffle(char4, uchar4); char4 __ovld __cnfn shuffle(char8, uchar4); char4 __ovld __cnfn shuffle(char16, uchar4); uchar4 __ovld __cnfn shuffle(uchar2, uchar4); uchar4 __ovld __cnfn shuffle(uchar4, uchar4); uchar4 __ovld __cnfn shuffle(uchar8, uchar4); uchar4 __ovld __cnfn shuffle(uchar16, uchar4); short4 __ovld __cnfn shuffle(short2, ushort4); short4 __ovld __cnfn shuffle(short4, ushort4); short4 __ovld __cnfn shuffle(short8, ushort4); short4 __ovld __cnfn shuffle(short16, ushort4); ushort4 __ovld __cnfn shuffle(ushort2, ushort4); ushort4 __ovld __cnfn shuffle(ushort4, ushort4); ushort4 __ovld __cnfn shuffle(ushort8, ushort4); ushort4 __ovld __cnfn shuffle(ushort16, ushort4); int4 __ovld __cnfn shuffle(int2, uint4); int4 __ovld __cnfn shuffle(int4, uint4); int4 __ovld __cnfn shuffle(int8, uint4); int4 __ovld __cnfn shuffle(int16, uint4); uint4 __ovld __cnfn shuffle(uint2, uint4); uint4 __ovld __cnfn shuffle(uint4, uint4); uint4 __ovld __cnfn shuffle(uint8, uint4); uint4 __ovld __cnfn shuffle(uint16, uint4); long4 __ovld __cnfn shuffle(long2, ulong4); long4 __ovld __cnfn shuffle(long4, ulong4); long4 __ovld __cnfn shuffle(long8, ulong4); long4 __ovld __cnfn shuffle(long16, ulong4); ulong4 __ovld __cnfn shuffle(ulong2, ulong4); ulong4 __ovld __cnfn shuffle(ulong4, ulong4); ulong4 __ovld __cnfn shuffle(ulong8, ulong4); ulong4 __ovld __cnfn shuffle(ulong16, ulong4); float4 __ovld __cnfn shuffle(float2, uint4); float4 __ovld __cnfn shuffle(float4, uint4); float4 __ovld __cnfn shuffle(float8, uint4); float4 __ovld __cnfn shuffle(float16, uint4); char8 __ovld __cnfn shuffle(char2, uchar8); char8 __ovld __cnfn shuffle(char4, uchar8); char8 __ovld __cnfn shuffle(char8, uchar8); char8 __ovld __cnfn shuffle(char16, uchar8); uchar8 __ovld __cnfn shuffle(uchar2, uchar8); uchar8 __ovld __cnfn shuffle(uchar4, uchar8); uchar8 __ovld __cnfn shuffle(uchar8, uchar8); uchar8 __ovld __cnfn shuffle(uchar16, uchar8); short8 __ovld __cnfn shuffle(short2, ushort8); short8 __ovld __cnfn shuffle(short4, ushort8); short8 __ovld __cnfn shuffle(short8, ushort8); short8 __ovld __cnfn shuffle(short16, ushort8); ushort8 __ovld __cnfn shuffle(ushort2, ushort8); ushort8 __ovld __cnfn shuffle(ushort4, ushort8); ushort8 __ovld __cnfn shuffle(ushort8, ushort8); ushort8 __ovld __cnfn shuffle(ushort16, ushort8); int8 __ovld __cnfn shuffle(int2, uint8); int8 __ovld __cnfn shuffle(int4, uint8); int8 __ovld __cnfn shuffle(int8, uint8); int8 __ovld __cnfn shuffle(int16, uint8); uint8 __ovld __cnfn shuffle(uint2, uint8); uint8 __ovld __cnfn shuffle(uint4, uint8); uint8 __ovld __cnfn shuffle(uint8, uint8); uint8 __ovld __cnfn shuffle(uint16, uint8); long8 __ovld __cnfn shuffle(long2, ulong8); long8 __ovld __cnfn shuffle(long4, ulong8); long8 __ovld __cnfn shuffle(long8, ulong8); long8 __ovld __cnfn shuffle(long16, ulong8); ulong8 __ovld __cnfn shuffle(ulong2, ulong8); ulong8 __ovld __cnfn shuffle(ulong4, ulong8); ulong8 __ovld __cnfn shuffle(ulong8, ulong8); ulong8 __ovld __cnfn shuffle(ulong16, ulong8); float8 __ovld __cnfn shuffle(float2, uint8); float8 __ovld __cnfn shuffle(float4, uint8); float8 __ovld __cnfn shuffle(float8, uint8); float8 __ovld __cnfn shuffle(float16, uint8); char16 __ovld __cnfn shuffle(char2, uchar16); char16 __ovld __cnfn shuffle(char4, uchar16); char16 __ovld __cnfn shuffle(char8, uchar16); char16 __ovld __cnfn shuffle(char16, uchar16); uchar16 __ovld __cnfn shuffle(uchar2, uchar16); uchar16 __ovld __cnfn shuffle(uchar4, uchar16); uchar16 __ovld __cnfn shuffle(uchar8, uchar16); uchar16 __ovld __cnfn shuffle(uchar16, uchar16); short16 __ovld __cnfn shuffle(short2, ushort16); short16 __ovld __cnfn shuffle(short4, ushort16); short16 __ovld __cnfn shuffle(short8, ushort16); short16 __ovld __cnfn shuffle(short16, ushort16); ushort16 __ovld __cnfn shuffle(ushort2, ushort16); ushort16 __ovld __cnfn shuffle(ushort4, ushort16); ushort16 __ovld __cnfn shuffle(ushort8, ushort16); ushort16 __ovld __cnfn shuffle(ushort16, ushort16); int16 __ovld __cnfn shuffle(int2, uint16); int16 __ovld __cnfn shuffle(int4, uint16); int16 __ovld __cnfn shuffle(int8, uint16); int16 __ovld __cnfn shuffle(int16, uint16); uint16 __ovld __cnfn shuffle(uint2, uint16); uint16 __ovld __cnfn shuffle(uint4, uint16); uint16 __ovld __cnfn shuffle(uint8, uint16); uint16 __ovld __cnfn shuffle(uint16, uint16); long16 __ovld __cnfn shuffle(long2, ulong16); long16 __ovld __cnfn shuffle(long4, ulong16); long16 __ovld __cnfn shuffle(long8, ulong16); long16 __ovld __cnfn shuffle(long16, ulong16); ulong16 __ovld __cnfn shuffle(ulong2, ulong16); ulong16 __ovld __cnfn shuffle(ulong4, ulong16); ulong16 __ovld __cnfn shuffle(ulong8, ulong16); ulong16 __ovld __cnfn shuffle(ulong16, ulong16); float16 __ovld __cnfn shuffle(float2, uint16); float16 __ovld __cnfn shuffle(float4, uint16); float16 __ovld __cnfn shuffle(float8, uint16); float16 __ovld __cnfn shuffle(float16, uint16); #ifdef cl_khr_fp64 double2 __ovld __cnfn shuffle(double2, ulong2); double2 __ovld __cnfn shuffle(double4, ulong2); double2 __ovld __cnfn shuffle(double8, ulong2); double2 __ovld __cnfn shuffle(double16, ulong2); double4 __ovld __cnfn shuffle(double2, ulong4); double4 __ovld __cnfn shuffle(double4, ulong4); double4 __ovld __cnfn shuffle(double8, ulong4); double4 __ovld __cnfn shuffle(double16, ulong4); double8 __ovld __cnfn shuffle(double2, ulong8); double8 __ovld __cnfn shuffle(double4, ulong8); double8 __ovld __cnfn shuffle(double8, ulong8); double8 __ovld __cnfn shuffle(double16, ulong8); double16 __ovld __cnfn shuffle(double2, ulong16); double16 __ovld __cnfn shuffle(double4, ulong16); double16 __ovld __cnfn shuffle(double8, ulong16); double16 __ovld __cnfn shuffle(double16, ulong16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half2 __ovld __cnfn shuffle(half2, ushort2); half2 __ovld __cnfn shuffle(half4, ushort2); half2 __ovld __cnfn shuffle(half8, ushort2); half2 __ovld __cnfn shuffle(half16, ushort2); half4 __ovld __cnfn shuffle(half2, ushort4); half4 __ovld __cnfn shuffle(half4, ushort4); half4 __ovld __cnfn shuffle(half8, ushort4); half4 __ovld __cnfn shuffle(half16, ushort4); half8 __ovld __cnfn shuffle(half2, ushort8); half8 __ovld __cnfn shuffle(half4, ushort8); half8 __ovld __cnfn shuffle(half8, ushort8); half8 __ovld __cnfn shuffle(half16, ushort8); half16 __ovld __cnfn shuffle(half2, ushort16); half16 __ovld __cnfn shuffle(half4, ushort16); half16 __ovld __cnfn shuffle(half8, ushort16); half16 __ovld __cnfn shuffle(half16, ushort16); #endif //cl_khr_fp16 char2 __ovld __cnfn shuffle2(char2, char2, uchar2); char2 __ovld __cnfn shuffle2(char4, char4, uchar2); char2 __ovld __cnfn shuffle2(char8, char8, uchar2); char2 __ovld __cnfn shuffle2(char16, char16, uchar2); uchar2 __ovld __cnfn shuffle2(uchar2, uchar2, uchar2); uchar2 __ovld __cnfn shuffle2(uchar4, uchar4, uchar2); uchar2 __ovld __cnfn shuffle2(uchar8, uchar8, uchar2); uchar2 __ovld __cnfn shuffle2(uchar16, uchar16, uchar2); short2 __ovld __cnfn shuffle2(short2, short2, ushort2); short2 __ovld __cnfn shuffle2(short4, short4, ushort2); short2 __ovld __cnfn shuffle2(short8, short8, ushort2); short2 __ovld __cnfn shuffle2(short16, short16, ushort2); ushort2 __ovld __cnfn shuffle2(ushort2, ushort2, ushort2); ushort2 __ovld __cnfn shuffle2(ushort4, ushort4, ushort2); ushort2 __ovld __cnfn shuffle2(ushort8, ushort8, ushort2); ushort2 __ovld __cnfn shuffle2(ushort16, ushort16, ushort2); int2 __ovld __cnfn shuffle2(int2, int2, uint2); int2 __ovld __cnfn shuffle2(int4, int4, uint2); int2 __ovld __cnfn shuffle2(int8, int8, uint2); int2 __ovld __cnfn shuffle2(int16, int16, uint2); uint2 __ovld __cnfn shuffle2(uint2, uint2, uint2); uint2 __ovld __cnfn shuffle2(uint4, uint4, uint2); uint2 __ovld __cnfn shuffle2(uint8, uint8, uint2); uint2 __ovld __cnfn shuffle2(uint16, uint16, uint2); long2 __ovld __cnfn shuffle2(long2, long2, ulong2); long2 __ovld __cnfn shuffle2(long4, long4, ulong2); long2 __ovld __cnfn shuffle2(long8, long8, ulong2); long2 __ovld __cnfn shuffle2(long16, long16, ulong2); ulong2 __ovld __cnfn shuffle2(ulong2, ulong2, ulong2); ulong2 __ovld __cnfn shuffle2(ulong4, ulong4, ulong2); ulong2 __ovld __cnfn shuffle2(ulong8, ulong8, ulong2); ulong2 __ovld __cnfn shuffle2(ulong16, ulong16, ulong2); float2 __ovld __cnfn shuffle2(float2, float2, uint2); float2 __ovld __cnfn shuffle2(float4, float4, uint2); float2 __ovld __cnfn shuffle2(float8, float8, uint2); float2 __ovld __cnfn shuffle2(float16, float16, uint2); char4 __ovld __cnfn shuffle2(char2, char2, uchar4); char4 __ovld __cnfn shuffle2(char4, char4, uchar4); char4 __ovld __cnfn shuffle2(char8, char8, uchar4); char4 __ovld __cnfn shuffle2(char16, char16, uchar4); uchar4 __ovld __cnfn shuffle2(uchar2, uchar2, uchar4); uchar4 __ovld __cnfn shuffle2(uchar4, uchar4, uchar4); uchar4 __ovld __cnfn shuffle2(uchar8, uchar8, uchar4); uchar4 __ovld __cnfn shuffle2(uchar16, uchar16, uchar4); short4 __ovld __cnfn shuffle2(short2, short2, ushort4); short4 __ovld __cnfn shuffle2(short4, short4, ushort4); short4 __ovld __cnfn shuffle2(short8, short8, ushort4); short4 __ovld __cnfn shuffle2(short16, short16, ushort4); ushort4 __ovld __cnfn shuffle2(ushort2, ushort2, ushort4); ushort4 __ovld __cnfn shuffle2(ushort4, ushort4, ushort4); ushort4 __ovld __cnfn shuffle2(ushort8, ushort8, ushort4); ushort4 __ovld __cnfn shuffle2(ushort16, ushort16, ushort4); int4 __ovld __cnfn shuffle2(int2, int2, uint4); int4 __ovld __cnfn shuffle2(int4, int4, uint4); int4 __ovld __cnfn shuffle2(int8, int8, uint4); int4 __ovld __cnfn shuffle2(int16, int16, uint4); uint4 __ovld __cnfn shuffle2(uint2, uint2, uint4); uint4 __ovld __cnfn shuffle2(uint4, uint4, uint4); uint4 __ovld __cnfn shuffle2(uint8, uint8, uint4); uint4 __ovld __cnfn shuffle2(uint16, uint16, uint4); long4 __ovld __cnfn shuffle2(long2, long2, ulong4); long4 __ovld __cnfn shuffle2(long4, long4, ulong4); long4 __ovld __cnfn shuffle2(long8, long8, ulong4); long4 __ovld __cnfn shuffle2(long16, long16, ulong4); ulong4 __ovld __cnfn shuffle2(ulong2, ulong2, ulong4); ulong4 __ovld __cnfn shuffle2(ulong4, ulong4, ulong4); ulong4 __ovld __cnfn shuffle2(ulong8, ulong8, ulong4); ulong4 __ovld __cnfn shuffle2(ulong16, ulong16, ulong4); float4 __ovld __cnfn shuffle2(float2, float2, uint4); float4 __ovld __cnfn shuffle2(float4, float4, uint4); float4 __ovld __cnfn shuffle2(float8, float8, uint4); float4 __ovld __cnfn shuffle2(float16, float16, uint4); char8 __ovld __cnfn shuffle2(char2, char2, uchar8); char8 __ovld __cnfn shuffle2(char4, char4, uchar8); char8 __ovld __cnfn shuffle2(char8, char8, uchar8); char8 __ovld __cnfn shuffle2(char16, char16, uchar8); uchar8 __ovld __cnfn shuffle2(uchar2, uchar2, uchar8); uchar8 __ovld __cnfn shuffle2(uchar4, uchar4, uchar8); uchar8 __ovld __cnfn shuffle2(uchar8, uchar8, uchar8); uchar8 __ovld __cnfn shuffle2(uchar16, uchar16, uchar8); short8 __ovld __cnfn shuffle2(short2, short2, ushort8); short8 __ovld __cnfn shuffle2(short4, short4, ushort8); short8 __ovld __cnfn shuffle2(short8, short8, ushort8); short8 __ovld __cnfn shuffle2(short16, short16, ushort8); ushort8 __ovld __cnfn shuffle2(ushort2, ushort2, ushort8); ushort8 __ovld __cnfn shuffle2(ushort4, ushort4, ushort8); ushort8 __ovld __cnfn shuffle2(ushort8, ushort8, ushort8); ushort8 __ovld __cnfn shuffle2(ushort16, ushort16, ushort8); int8 __ovld __cnfn shuffle2(int2, int2, uint8); int8 __ovld __cnfn shuffle2(int4, int4, uint8); int8 __ovld __cnfn shuffle2(int8, int8, uint8); int8 __ovld __cnfn shuffle2(int16, int16, uint8); uint8 __ovld __cnfn shuffle2(uint2, uint2, uint8); uint8 __ovld __cnfn shuffle2(uint4, uint4, uint8); uint8 __ovld __cnfn shuffle2(uint8, uint8, uint8); uint8 __ovld __cnfn shuffle2(uint16, uint16, uint8); long8 __ovld __cnfn shuffle2(long2, long2, ulong8); long8 __ovld __cnfn shuffle2(long4, long4, ulong8); long8 __ovld __cnfn shuffle2(long8, long8, ulong8); long8 __ovld __cnfn shuffle2(long16, long16, ulong8); ulong8 __ovld __cnfn shuffle2(ulong2, ulong2, ulong8); ulong8 __ovld __cnfn shuffle2(ulong4, ulong4, ulong8); ulong8 __ovld __cnfn shuffle2(ulong8, ulong8, ulong8); ulong8 __ovld __cnfn shuffle2(ulong16, ulong16, ulong8); float8 __ovld __cnfn shuffle2(float2, float2, uint8); float8 __ovld __cnfn shuffle2(float4, float4, uint8); float8 __ovld __cnfn shuffle2(float8, float8, uint8); float8 __ovld __cnfn shuffle2(float16, float16, uint8); char16 __ovld __cnfn shuffle2(char2, char2, uchar16); char16 __ovld __cnfn shuffle2(char4, char4, uchar16); char16 __ovld __cnfn shuffle2(char8, char8, uchar16); char16 __ovld __cnfn shuffle2(char16, char16, uchar16); uchar16 __ovld __cnfn shuffle2(uchar2, uchar2, uchar16); uchar16 __ovld __cnfn shuffle2(uchar4, uchar4, uchar16); uchar16 __ovld __cnfn shuffle2(uchar8, uchar8, uchar16); uchar16 __ovld __cnfn shuffle2(uchar16, uchar16, uchar16); short16 __ovld __cnfn shuffle2(short2, short2, ushort16); short16 __ovld __cnfn shuffle2(short4, short4, ushort16); short16 __ovld __cnfn shuffle2(short8, short8, ushort16); short16 __ovld __cnfn shuffle2(short16, short16, ushort16); ushort16 __ovld __cnfn shuffle2(ushort2, ushort2, ushort16); ushort16 __ovld __cnfn shuffle2(ushort4, ushort4, ushort16); ushort16 __ovld __cnfn shuffle2(ushort8, ushort8, ushort16); ushort16 __ovld __cnfn shuffle2(ushort16, ushort16, ushort16); int16 __ovld __cnfn shuffle2(int2, int2, uint16); int16 __ovld __cnfn shuffle2(int4, int4, uint16); int16 __ovld __cnfn shuffle2(int8, int8, uint16); int16 __ovld __cnfn shuffle2(int16, int16, uint16); uint16 __ovld __cnfn shuffle2(uint2, uint2, uint16); uint16 __ovld __cnfn shuffle2(uint4, uint4, uint16); uint16 __ovld __cnfn shuffle2(uint8, uint8, uint16); uint16 __ovld __cnfn shuffle2(uint16, uint16, uint16); long16 __ovld __cnfn shuffle2(long2, long2, ulong16); long16 __ovld __cnfn shuffle2(long4, long4, ulong16); long16 __ovld __cnfn shuffle2(long8, long8, ulong16); long16 __ovld __cnfn shuffle2(long16, long16, ulong16); ulong16 __ovld __cnfn shuffle2(ulong2, ulong2, ulong16); ulong16 __ovld __cnfn shuffle2(ulong4, ulong4, ulong16); ulong16 __ovld __cnfn shuffle2(ulong8, ulong8, ulong16); ulong16 __ovld __cnfn shuffle2(ulong16, ulong16, ulong16); float16 __ovld __cnfn shuffle2(float2, float2, uint16); float16 __ovld __cnfn shuffle2(float4, float4, uint16); float16 __ovld __cnfn shuffle2(float8, float8, uint16); float16 __ovld __cnfn shuffle2(float16, float16, uint16); #ifdef cl_khr_fp64 double2 __ovld __cnfn shuffle2(double2, double2, ulong2); double2 __ovld __cnfn shuffle2(double4, double4, ulong2); double2 __ovld __cnfn shuffle2(double8, double8, ulong2); double2 __ovld __cnfn shuffle2(double16, double16, ulong2); double4 __ovld __cnfn shuffle2(double2, double2, ulong4); double4 __ovld __cnfn shuffle2(double4, double4, ulong4); double4 __ovld __cnfn shuffle2(double8, double8, ulong4); double4 __ovld __cnfn shuffle2(double16, double16, ulong4); double8 __ovld __cnfn shuffle2(double2, double2, ulong8); double8 __ovld __cnfn shuffle2(double4, double4, ulong8); double8 __ovld __cnfn shuffle2(double8, double8, ulong8); double8 __ovld __cnfn shuffle2(double16, double16, ulong8); double16 __ovld __cnfn shuffle2(double2, double2, ulong16); double16 __ovld __cnfn shuffle2(double4, double4, ulong16); double16 __ovld __cnfn shuffle2(double8, double8, ulong16); double16 __ovld __cnfn shuffle2(double16, double16, ulong16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half2 __ovld __cnfn shuffle2(half2, half2, ushort2); half2 __ovld __cnfn shuffle2(half4, half4, ushort2); half2 __ovld __cnfn shuffle2(half8, half8, ushort2); half2 __ovld __cnfn shuffle2(half16, half16, ushort2); half4 __ovld __cnfn shuffle2(half2, half2, ushort4); half4 __ovld __cnfn shuffle2(half4, half4, ushort4); half4 __ovld __cnfn shuffle2(half8, half8, ushort4); half4 __ovld __cnfn shuffle2(half16, half16, ushort4); half8 __ovld __cnfn shuffle2(half2, half2, ushort8); half8 __ovld __cnfn shuffle2(half4, half4, ushort8); half8 __ovld __cnfn shuffle2(half8, half8, ushort8); half8 __ovld __cnfn shuffle2(half16, half16, ushort8); half16 __ovld __cnfn shuffle2(half2, half2, ushort16); half16 __ovld __cnfn shuffle2(half4, half4, ushort16); half16 __ovld __cnfn shuffle2(half8, half8, ushort16); half16 __ovld __cnfn shuffle2(half16, half16, ushort16); #endif //cl_khr_fp16 // OpenCL v1.1 s6.11.3, v1.2 s6.12.14, v2.0 s6.13.14 - Image Read and Write Functions #ifdef cl_khr_gl_msaa_sharing #pragma OPENCL EXTENSION cl_khr_gl_msaa_sharing : enable #endif //cl_khr_gl_msaa_sharing /** * Use the coordinate (coord.xy) to do an element lookup in * the 2D image object specified by image. * * Use the coordinate (coord.x, coord.y, coord.z) to do * an element lookup in the 3D image object specified * by image. coord.w is ignored. * * Use the coordinate (coord.z) to index into the * 2D image array object specified by image_array * and (coord.x, coord.y) to do an element lookup in * the 2D image object specified by image. * * Use the coordinate (x) to do an element lookup in * the 1D image object specified by image. * * Use the coordinate (coord.y) to index into the * 1D image array object specified by image_array * and (coord.x) to do an element lookup in * the 1D image object specified by image. * * Use the coordinate (cood.xy) and sample to do an * element lookup in the 2D multi-sample image specified * by image. * * Use coord.xy and sample to do an element * lookup in the 2D multi-sample image layer * identified by index coord.z in the 2D multi-sample * image array specified by image. * * For mipmap images, use the mip-level specified by * the Level-of-Detail (lod) or use gradients for LOD * computation. * * read_imagef returns floating-point values in the * range [0.0 ... 1.0] for image objects created with * image_channel_data_type set to one of the predefined * packed formats or CL_UNORM_INT8, or * CL_UNORM_INT16. * * read_imagef returns floating-point values in the * range [-1.0 ... 1.0] for image objects created with * image_channel_data_type set to CL_SNORM_INT8, * or CL_SNORM_INT16. * * read_imagef returns floating-point values for image * objects created with image_channel_data_type set to * CL_HALF_FLOAT or CL_FLOAT. * * read_imagei and read_imageui return * unnormalized signed integer and unsigned integer * values respectively. Each channel will be stored in a * 32-bit integer. * * read_imagei can only be used with image objects * created with image_channel_data_type set to one of * the following values: * CL_SIGNED_INT8, * CL_SIGNED_INT16 and * CL_SIGNED_INT32. * If the image_channel_data_type is not one of the * above values, the values returned by read_imagei * are undefined. * * read_imageui can only be used with image objects * created with image_channel_data_type set to one of * the following values: * CL_UNSIGNED_INT8, * CL_UNSIGNED_INT16 and * CL_UNSIGNED_INT32. * If the image_channel_data_type is not one of the * above values, the values returned by read_imageui * are undefined. * * The read_image{i|ui} calls support a nearest filter * only. The filter_mode specified in sampler * must be set to CLK_FILTER_NEAREST; otherwise * the values returned are undefined. * The read_image{f|i|ui} calls that take * integer coordinates must use a sampler with * normalized coordinates set to * CLK_NORMALIZED_COORDS_FALSE and * addressing mode set to * CLK_ADDRESS_CLAMP_TO_EDGE, * CLK_ADDRESS_CLAMP or CLK_ADDRESS_NONE; * otherwise the values returned are undefined. * * Values returned by read_imagef for image objects * with image_channel_data_type values not specified * in the description above are undefined. */ float4 __ovld __purefn read_imagef(read_only image2d_t, sampler_t, int2); float4 __ovld __purefn read_imagef(read_only image2d_t, sampler_t, float2); int4 __ovld __purefn read_imagei(read_only image2d_t, sampler_t, int2); int4 __ovld __purefn read_imagei(read_only image2d_t, sampler_t, float2); uint4 __ovld __purefn read_imageui(read_only image2d_t, sampler_t, int2); uint4 __ovld __purefn read_imageui(read_only image2d_t, sampler_t, float2); float4 __ovld __purefn read_imagef(read_only image3d_t, sampler_t, int4); float4 __ovld __purefn read_imagef(read_only image3d_t, sampler_t, float4); int4 __ovld __purefn read_imagei(read_only image3d_t, sampler_t, int4); int4 __ovld __purefn read_imagei(read_only image3d_t, sampler_t, float4); uint4 __ovld __purefn read_imageui(read_only image3d_t, sampler_t, int4); uint4 __ovld __purefn read_imageui(read_only image3d_t, sampler_t, float4); #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_1_2) float4 __ovld __purefn read_imagef(read_only image2d_array_t, sampler_t, int4); float4 __ovld __purefn read_imagef(read_only image2d_array_t, sampler_t, float4); int4 __ovld __purefn read_imagei(read_only image2d_array_t, sampler_t, int4); int4 __ovld __purefn read_imagei(read_only image2d_array_t, sampler_t, float4); uint4 __ovld __purefn read_imageui(read_only image2d_array_t, sampler_t, int4); uint4 __ovld __purefn read_imageui(read_only image2d_array_t, sampler_t, float4); #endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_1_2) float4 __ovld __purefn read_imagef(read_only image1d_t, sampler_t, int); float4 __ovld __purefn read_imagef(read_only image1d_t, sampler_t, float); int4 __ovld __purefn read_imagei(read_only image1d_t, sampler_t, int); int4 __ovld __purefn read_imagei(read_only image1d_t, sampler_t, float); uint4 __ovld __purefn read_imageui(read_only image1d_t, sampler_t, int); uint4 __ovld __purefn read_imageui(read_only image1d_t, sampler_t, float); #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_1_2) float4 __ovld __purefn read_imagef(read_only image1d_array_t, sampler_t, int2); float4 __ovld __purefn read_imagef(read_only image1d_array_t, sampler_t, float2); int4 __ovld __purefn read_imagei(read_only image1d_array_t, sampler_t, int2); int4 __ovld __purefn read_imagei(read_only image1d_array_t, sampler_t, float2); uint4 __ovld __purefn read_imageui(read_only image1d_array_t, sampler_t, int2); uint4 __ovld __purefn read_imageui(read_only image1d_array_t, sampler_t, float2); #endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_1_2) #ifdef cl_khr_depth_images float __ovld __purefn read_imagef(read_only image2d_depth_t, sampler_t, float2); float __ovld __purefn read_imagef(read_only image2d_depth_t, sampler_t, int2); float __ovld __purefn read_imagef(read_only image2d_array_depth_t, sampler_t, float4); float __ovld __purefn read_imagef(read_only image2d_array_depth_t, sampler_t, int4); #endif //cl_khr_depth_images #if defined(cl_khr_gl_msaa_sharing) float4 __ovld __purefn read_imagef(read_only image2d_msaa_t, int2, int); int4 __ovld __purefn read_imagei(read_only image2d_msaa_t, int2, int); uint4 __ovld __purefn read_imageui(read_only image2d_msaa_t, int2, int); float __ovld __purefn read_imagef(read_only image2d_msaa_depth_t, int2, int); float4 __ovld __purefn read_imagef(read_only image2d_array_msaa_t, int4, int); int4 __ovld __purefn read_imagei(read_only image2d_array_msaa_t, int4, int); uint4 __ovld __purefn read_imageui(read_only image2d_array_msaa_t, int4, int); float __ovld __purefn read_imagef(read_only image2d_array_msaa_depth_t, int4, int); #endif //cl_khr_gl_msaa_sharing // OpenCL Extension v2.0 s9.18 - Mipmaps #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) #ifdef cl_khr_mipmap_image float4 __ovld __purefn read_imagef(read_only image1d_t, sampler_t, float, float); int4 __ovld __purefn read_imagei(read_only image1d_t, sampler_t, float, float); uint4 __ovld __purefn read_imageui(read_only image1d_t, sampler_t, float, float); float4 __ovld __purefn read_imagef(read_only image1d_array_t, sampler_t, float2, float); int4 __ovld __purefn read_imagei(read_only image1d_array_t, sampler_t, float2, float); uint4 __ovld __purefn read_imageui(read_only image1d_array_t, sampler_t, float2, float); float4 __ovld __purefn read_imagef(read_only image2d_t, sampler_t, float2, float); int4 __ovld __purefn read_imagei(read_only image2d_t, sampler_t, float2, float); uint4 __ovld __purefn read_imageui(read_only image2d_t, sampler_t, float2, float); #ifdef cl_khr_depth_images float __ovld __purefn read_imagef(read_only image2d_depth_t, sampler_t, float2, float); #endif // cl_khr_depth_images float4 __ovld __purefn read_imagef(read_only image2d_array_t, sampler_t, float4, float); int4 __ovld __purefn read_imagei(read_only image2d_array_t, sampler_t, float4, float); uint4 __ovld __purefn read_imageui(read_only image2d_array_t, sampler_t, float4, float); #ifdef cl_khr_depth_images float __ovld __purefn read_imagef(read_only image2d_array_depth_t, sampler_t, float4, float); #endif // cl_khr_depth_images float4 __ovld __purefn read_imagef(read_only image3d_t, sampler_t, float4, float); int4 __ovld __purefn read_imagei(read_only image3d_t, sampler_t, float4, float); uint4 __ovld __purefn read_imageui(read_only image3d_t, sampler_t, float4, float); float4 __ovld __purefn read_imagef(read_only image1d_t, sampler_t, float, float, float); int4 __ovld __purefn read_imagei(read_only image1d_t, sampler_t, float, float, float); uint4 __ovld __purefn read_imageui(read_only image1d_t, sampler_t, float, float, float); float4 __ovld __purefn read_imagef(read_only image1d_array_t, sampler_t, float2, float, float); int4 __ovld __purefn read_imagei(read_only image1d_array_t, sampler_t, float2, float, float); uint4 __ovld __purefn read_imageui(read_only image1d_array_t, sampler_t, float2, float, float); float4 __ovld __purefn read_imagef(read_only image2d_t, sampler_t, float2, float2, float2); int4 __ovld __purefn read_imagei(read_only image2d_t, sampler_t, float2, float2, float2); uint4 __ovld __purefn read_imageui(read_only image2d_t, sampler_t, float2, float2, float2); #ifdef cl_khr_depth_images float __ovld __purefn read_imagef(read_only image2d_depth_t, sampler_t, float2, float2, float2); #endif // cl_khr_depth_images float4 __ovld __purefn read_imagef(read_only image2d_array_t, sampler_t, float4, float2, float2); int4 __ovld __purefn read_imagei(read_only image2d_array_t, sampler_t, float4, float2, float2); uint4 __ovld __purefn read_imageui(read_only image2d_array_t, sampler_t, float4, float2, float2); #ifdef cl_khr_depth_images float __ovld __purefn read_imagef(read_only image2d_array_depth_t, sampler_t, float4, float2, float2); #endif // cl_khr_depth_images float4 __ovld __purefn read_imagef(read_only image3d_t, sampler_t, float4, float4, float4); int4 __ovld __purefn read_imagei(read_only image3d_t, sampler_t, float4, float4, float4); uint4 __ovld __purefn read_imageui(read_only image3d_t, sampler_t, float4, float4, float4); #endif //cl_khr_mipmap_image #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_1_2) /** * Sampler-less Image Access */ float4 __ovld __purefn read_imagef(read_only image1d_t, int); int4 __ovld __purefn read_imagei(read_only image1d_t, int); uint4 __ovld __purefn read_imageui(read_only image1d_t, int); float4 __ovld __purefn read_imagef(read_only image1d_buffer_t, int); int4 __ovld __purefn read_imagei(read_only image1d_buffer_t, int); uint4 __ovld __purefn read_imageui(read_only image1d_buffer_t, int); float4 __ovld __purefn read_imagef(read_only image1d_array_t, int2); int4 __ovld __purefn read_imagei(read_only image1d_array_t, int2); uint4 __ovld __purefn read_imageui(read_only image1d_array_t, int2); float4 __ovld __purefn read_imagef(read_only image2d_t, int2); int4 __ovld __purefn read_imagei(read_only image2d_t, int2); uint4 __ovld __purefn read_imageui(read_only image2d_t, int2); float4 __ovld __purefn read_imagef(read_only image2d_array_t, int4); int4 __ovld __purefn read_imagei(read_only image2d_array_t, int4); uint4 __ovld __purefn read_imageui(read_only image2d_array_t, int4); #ifdef cl_khr_depth_images float __ovld __purefn read_imagef(read_only image2d_depth_t, int2); float __ovld __purefn read_imagef(read_only image2d_array_depth_t, int4); #endif //cl_khr_depth_images float4 __ovld __purefn read_imagef(read_only image3d_t, int4); int4 __ovld __purefn read_imagei(read_only image3d_t, int4); uint4 __ovld __purefn read_imageui(read_only image3d_t, int4); #endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_1_2) // Image read functions returning half4 type #ifdef cl_khr_fp16 half4 __ovld __purefn read_imageh(read_only image1d_t, sampler_t, int); half4 __ovld __purefn read_imageh(read_only image1d_t, sampler_t, float); half4 __ovld __purefn read_imageh(read_only image2d_t, sampler_t, int2); half4 __ovld __purefn read_imageh(read_only image2d_t, sampler_t, float2); half4 __ovld __purefn read_imageh(read_only image3d_t, sampler_t, int4); half4 __ovld __purefn read_imageh(read_only image3d_t, sampler_t, float4); #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_1_2) half4 __ovld __purefn read_imageh(read_only image1d_array_t, sampler_t, int2); half4 __ovld __purefn read_imageh(read_only image1d_array_t, sampler_t, float2); half4 __ovld __purefn read_imageh(read_only image2d_array_t, sampler_t, int4); half4 __ovld __purefn read_imageh(read_only image2d_array_t, sampler_t, float4); /** * Sampler-less Image Access */ half4 __ovld __purefn read_imageh(read_only image1d_t, int); half4 __ovld __purefn read_imageh(read_only image2d_t, int2); half4 __ovld __purefn read_imageh(read_only image3d_t, int4); half4 __ovld __purefn read_imageh(read_only image1d_array_t, int2); half4 __ovld __purefn read_imageh(read_only image2d_array_t, int4); half4 __ovld __purefn read_imageh(read_only image1d_buffer_t, int); #endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_1_2) #endif //cl_khr_fp16 // Image read functions for read_write images #if defined(__opencl_c_read_write_images) float4 __ovld __purefn read_imagef(read_write image1d_t, int); int4 __ovld __purefn read_imagei(read_write image1d_t, int); uint4 __ovld __purefn read_imageui(read_write image1d_t, int); float4 __ovld __purefn read_imagef(read_write image1d_buffer_t, int); int4 __ovld __purefn read_imagei(read_write image1d_buffer_t, int); uint4 __ovld __purefn read_imageui(read_write image1d_buffer_t, int); float4 __ovld __purefn read_imagef(read_write image1d_array_t, int2); int4 __ovld __purefn read_imagei(read_write image1d_array_t, int2); uint4 __ovld __purefn read_imageui(read_write image1d_array_t, int2); float4 __ovld __purefn read_imagef(read_write image2d_t, int2); int4 __ovld __purefn read_imagei(read_write image2d_t, int2); uint4 __ovld __purefn read_imageui(read_write image2d_t, int2); float4 __ovld __purefn read_imagef(read_write image2d_array_t, int4); int4 __ovld __purefn read_imagei(read_write image2d_array_t, int4); uint4 __ovld __purefn read_imageui(read_write image2d_array_t, int4); #ifdef cl_khr_3d_image_writes float4 __ovld __purefn read_imagef(read_write image3d_t, int4); int4 __ovld __purefn read_imagei(read_write image3d_t, int4); uint4 __ovld __purefn read_imageui(read_write image3d_t, int4); #endif // cl_khr_3d_image_writes #ifdef cl_khr_depth_images float __ovld __purefn read_imagef(read_write image2d_depth_t, int2); float __ovld __purefn read_imagef(read_write image2d_array_depth_t, int4); #endif //cl_khr_depth_images #if cl_khr_gl_msaa_sharing float4 __ovld __purefn read_imagef(read_write image2d_msaa_t, int2, int); int4 __ovld __purefn read_imagei(read_write image2d_msaa_t, int2, int); uint4 __ovld __purefn read_imageui(read_write image2d_msaa_t, int2, int); float4 __ovld __purefn read_imagef(read_write image2d_array_msaa_t, int4, int); int4 __ovld __purefn read_imagei(read_write image2d_array_msaa_t, int4, int); uint4 __ovld __purefn read_imageui(read_write image2d_array_msaa_t, int4, int); float __ovld __purefn read_imagef(read_write image2d_msaa_depth_t, int2, int); float __ovld __purefn read_imagef(read_write image2d_array_msaa_depth_t, int4, int); #endif //cl_khr_gl_msaa_sharing #ifdef cl_khr_mipmap_image float4 __ovld __purefn read_imagef(read_write image1d_t, sampler_t, float, float); int4 __ovld __purefn read_imagei(read_write image1d_t, sampler_t, float, float); uint4 __ovld __purefn read_imageui(read_write image1d_t, sampler_t, float, float); float4 __ovld __purefn read_imagef(read_write image1d_array_t, sampler_t, float2, float); int4 __ovld __purefn read_imagei(read_write image1d_array_t, sampler_t, float2, float); uint4 __ovld __purefn read_imageui(read_write image1d_array_t, sampler_t, float2, float); float4 __ovld __purefn read_imagef(read_write image2d_t, sampler_t, float2, float); int4 __ovld __purefn read_imagei(read_write image2d_t, sampler_t, float2, float); uint4 __ovld __purefn read_imageui(read_write image2d_t, sampler_t, float2, float); float __ovld __purefn read_imagef(read_write image2d_depth_t, sampler_t, float2, float); float4 __ovld __purefn read_imagef(read_write image2d_array_t, sampler_t, float4, float); int4 __ovld __purefn read_imagei(read_write image2d_array_t, sampler_t, float4, float); uint4 __ovld __purefn read_imageui(read_write image2d_array_t, sampler_t, float4, float); float __ovld __purefn read_imagef(read_write image2d_array_depth_t, sampler_t, float4, float); #ifdef cl_khr_3d_image_writes float4 __ovld __purefn read_imagef(read_write image3d_t, sampler_t, float4, float); int4 __ovld __purefn read_imagei(read_write image3d_t, sampler_t, float4, float); uint4 __ovld __purefn read_imageui(read_write image3d_t, sampler_t, float4, float); #endif // cl_khr_3d_image_writes float4 __ovld __purefn read_imagef(read_write image1d_t, sampler_t, float, float, float); int4 __ovld __purefn read_imagei(read_write image1d_t, sampler_t, float, float, float); uint4 __ovld __purefn read_imageui(read_write image1d_t, sampler_t, float, float, float); float4 __ovld __purefn read_imagef(read_write image1d_array_t, sampler_t, float2, float, float); int4 __ovld __purefn read_imagei(read_write image1d_array_t, sampler_t, float2, float, float); uint4 __ovld __purefn read_imageui(read_write image1d_array_t, sampler_t, float2, float, float); float4 __ovld __purefn read_imagef(read_write image2d_t, sampler_t, float2, float2, float2); int4 __ovld __purefn read_imagei(read_write image2d_t, sampler_t, float2, float2, float2); uint4 __ovld __purefn read_imageui(read_write image2d_t, sampler_t, float2, float2, float2); float __ovld __purefn read_imagef(read_write image2d_depth_t, sampler_t, float2, float2, float2); float4 __ovld __purefn read_imagef(read_write image2d_array_t, sampler_t, float4, float2, float2); int4 __ovld __purefn read_imagei(read_write image2d_array_t, sampler_t, float4, float2, float2); uint4 __ovld __purefn read_imageui(read_write image2d_array_t, sampler_t, float4, float2, float2); float __ovld __purefn read_imagef(read_write image2d_array_depth_t, sampler_t, float4, float2, float2); #ifdef cl_khr_3d_image_writes float4 __ovld __purefn read_imagef(read_write image3d_t, sampler_t, float4, float4, float4); int4 __ovld __purefn read_imagei(read_write image3d_t, sampler_t, float4, float4, float4); uint4 __ovld __purefn read_imageui(read_write image3d_t, sampler_t, float4, float4, float4); #endif // cl_khr_3d_image_writes #endif //cl_khr_mipmap_image // Image read functions returning half4 type #ifdef cl_khr_fp16 half4 __ovld __purefn read_imageh(read_write image1d_t, int); half4 __ovld __purefn read_imageh(read_write image2d_t, int2); #ifdef cl_khr_3d_image_writes half4 __ovld __purefn read_imageh(read_write image3d_t, int4); #endif // cl_khr_3d_image_writes half4 __ovld __purefn read_imageh(read_write image1d_array_t, int2); half4 __ovld __purefn read_imageh(read_write image2d_array_t, int4); half4 __ovld __purefn read_imageh(read_write image1d_buffer_t, int); #endif //cl_khr_fp16 #endif //defined(__opencl_c_read_write_images) /** * Write color value to location specified by coordinate * (coord.x, coord.y) in the 2D image object specified by image. * (coord.x, coord.y) are considered to be unnormalized coordinates * and must be in the range 0 ... image width - 1, and 0 * ... image height - 1. * Write color value to location specified by coordinate * (coord.x, coord.y) in the 2D image object specified by index * (coord.z) of the 2D image array object image_array. * (coord.x, coord.y) are considered to be unnormalized * coordinates and must be in the range 0 ... image width * - 1. * * Write color value to location specified by coordinate * (coord) in the 1D image (buffer) object specified by image. * coord is considered to be unnormalized coordinates * and must be in the range 0 ... image width - 1. * * Write color value to location specified by coordinate * (coord.x) in the 1D image object specified by index * (coord.y) of the 1D image array object image_array. * x is considered to be unnormalized coordinates * and must be in the range 0 ... image width - 1. * * Write color value to location specified by coordinate * (coord.x, coord.y, coord.z) in the 3D image object specified by image. * coord.x & coord.y are considered to be unnormalized coordinates * and must be in the range 0 ... image width - 1, and 0 * ... image height - 1. * * For mipmap images, use mip-level specified by lod. * * Appropriate data format conversion to the specified * image format is done before writing the color value. * * write_imagef can only be used with image objects * created with image_channel_data_type set to one of * the pre-defined packed formats or set to * CL_SNORM_INT8, CL_UNORM_INT8, * CL_SNORM_INT16, CL_UNORM_INT16, * CL_HALF_FLOAT or CL_FLOAT. Appropriate data * format conversion will be done to convert channel * data from a floating-point value to actual data format * in which the channels are stored. * * write_imagei can only be used with image objects * created with image_channel_data_type set to one of * the following values: * CL_SIGNED_INT8, * CL_SIGNED_INT16 and * CL_SIGNED_INT32. * * write_imageui can only be used with image objects * created with image_channel_data_type set to one of * the following values: * CL_UNSIGNED_INT8, * CL_UNSIGNED_INT16 and * CL_UNSIGNED_INT32. * * The behavior of write_imagef, write_imagei and * write_imageui for image objects created with * image_channel_data_type values not specified in * the description above or with (x, y) coordinate * values that are not in the range (0 ... image width -1, * 0 ... image height - 1), respectively, is undefined. */ void __ovld write_imagef(write_only image2d_t, int2, float4); void __ovld write_imagei(write_only image2d_t, int2, int4); void __ovld write_imageui(write_only image2d_t, int2, uint4); void __ovld write_imagef(write_only image2d_array_t, int4, float4); void __ovld write_imagei(write_only image2d_array_t, int4, int4); void __ovld write_imageui(write_only image2d_array_t, int4, uint4); void __ovld write_imagef(write_only image1d_t, int, float4); void __ovld write_imagei(write_only image1d_t, int, int4); void __ovld write_imageui(write_only image1d_t, int, uint4); void __ovld write_imagef(write_only image1d_buffer_t, int, float4); void __ovld write_imagei(write_only image1d_buffer_t, int, int4); void __ovld write_imageui(write_only image1d_buffer_t, int, uint4); void __ovld write_imagef(write_only image1d_array_t, int2, float4); void __ovld write_imagei(write_only image1d_array_t, int2, int4); void __ovld write_imageui(write_only image1d_array_t, int2, uint4); #ifdef cl_khr_3d_image_writes void __ovld write_imagef(write_only image3d_t, int4, float4); void __ovld write_imagei(write_only image3d_t, int4, int4); void __ovld write_imageui(write_only image3d_t, int4, uint4); #endif #ifdef cl_khr_depth_images void __ovld write_imagef(write_only image2d_depth_t, int2, float); void __ovld write_imagef(write_only image2d_array_depth_t, int4, float); #endif //cl_khr_depth_images // OpenCL Extension v2.0 s9.18 - Mipmaps #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) #if defined(cl_khr_mipmap_image_writes) void __ovld write_imagef(write_only image1d_t, int, int, float4); void __ovld write_imagei(write_only image1d_t, int, int, int4); void __ovld write_imageui(write_only image1d_t, int, int, uint4); void __ovld write_imagef(write_only image1d_array_t, int2, int, float4); void __ovld write_imagei(write_only image1d_array_t, int2, int, int4); void __ovld write_imageui(write_only image1d_array_t, int2, int, uint4); void __ovld write_imagef(write_only image2d_t, int2, int, float4); void __ovld write_imagei(write_only image2d_t, int2, int, int4); void __ovld write_imageui(write_only image2d_t, int2, int, uint4); void __ovld write_imagef(write_only image2d_array_t, int4, int, float4); void __ovld write_imagei(write_only image2d_array_t, int4, int, int4); void __ovld write_imageui(write_only image2d_array_t, int4, int, uint4); void __ovld write_imagef(write_only image2d_depth_t, int2, int, float); void __ovld write_imagef(write_only image2d_array_depth_t, int4, int, float); #ifdef cl_khr_3d_image_writes void __ovld write_imagef(write_only image3d_t, int4, int, float4); void __ovld write_imagei(write_only image3d_t, int4, int, int4); void __ovld write_imageui(write_only image3d_t, int4, int, uint4); #endif //cl_khr_3d_image_writes #endif //defined(cl_khr_mipmap_image_writes) #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) // Image write functions for half4 type #ifdef cl_khr_fp16 void __ovld write_imageh(write_only image1d_t, int, half4); void __ovld write_imageh(write_only image2d_t, int2, half4); #ifdef cl_khr_3d_image_writes void __ovld write_imageh(write_only image3d_t, int4, half4); #endif void __ovld write_imageh(write_only image1d_array_t, int2, half4); void __ovld write_imageh(write_only image2d_array_t, int4, half4); void __ovld write_imageh(write_only image1d_buffer_t, int, half4); #endif //cl_khr_fp16 // Image write functions for read_write images #if defined(__opencl_c_read_write_images) void __ovld write_imagef(read_write image2d_t, int2, float4); void __ovld write_imagei(read_write image2d_t, int2, int4); void __ovld write_imageui(read_write image2d_t, int2, uint4); void __ovld write_imagef(read_write image2d_array_t, int4, float4); void __ovld write_imagei(read_write image2d_array_t, int4, int4); void __ovld write_imageui(read_write image2d_array_t, int4, uint4); void __ovld write_imagef(read_write image1d_t, int, float4); void __ovld write_imagei(read_write image1d_t, int, int4); void __ovld write_imageui(read_write image1d_t, int, uint4); void __ovld write_imagef(read_write image1d_buffer_t, int, float4); void __ovld write_imagei(read_write image1d_buffer_t, int, int4); void __ovld write_imageui(read_write image1d_buffer_t, int, uint4); void __ovld write_imagef(read_write image1d_array_t, int2, float4); void __ovld write_imagei(read_write image1d_array_t, int2, int4); void __ovld write_imageui(read_write image1d_array_t, int2, uint4); #ifdef cl_khr_3d_image_writes void __ovld write_imagef(read_write image3d_t, int4, float4); void __ovld write_imagei(read_write image3d_t, int4, int4); void __ovld write_imageui(read_write image3d_t, int4, uint4); #endif #ifdef cl_khr_depth_images void __ovld write_imagef(read_write image2d_depth_t, int2, float); void __ovld write_imagef(read_write image2d_array_depth_t, int4, float); #endif //cl_khr_depth_images #if defined(cl_khr_mipmap_image_writes) void __ovld write_imagef(read_write image1d_t, int, int, float4); void __ovld write_imagei(read_write image1d_t, int, int, int4); void __ovld write_imageui(read_write image1d_t, int, int, uint4); void __ovld write_imagef(read_write image1d_array_t, int2, int, float4); void __ovld write_imagei(read_write image1d_array_t, int2, int, int4); void __ovld write_imageui(read_write image1d_array_t, int2, int, uint4); void __ovld write_imagef(read_write image2d_t, int2, int, float4); void __ovld write_imagei(read_write image2d_t, int2, int, int4); void __ovld write_imageui(read_write image2d_t, int2, int, uint4); void __ovld write_imagef(read_write image2d_array_t, int4, int, float4); void __ovld write_imagei(read_write image2d_array_t, int4, int, int4); void __ovld write_imageui(read_write image2d_array_t, int4, int, uint4); void __ovld write_imagef(read_write image2d_depth_t, int2, int, float); void __ovld write_imagef(read_write image2d_array_depth_t, int4, int, float); #ifdef cl_khr_3d_image_writes void __ovld write_imagef(read_write image3d_t, int4, int, float4); void __ovld write_imagei(read_write image3d_t, int4, int, int4); void __ovld write_imageui(read_write image3d_t, int4, int, uint4); #endif //cl_khr_3d_image_writes #endif //cl_khr_mipmap_image_writes // Image write functions for half4 type #ifdef cl_khr_fp16 void __ovld write_imageh(read_write image1d_t, int, half4); void __ovld write_imageh(read_write image2d_t, int2, half4); #ifdef cl_khr_3d_image_writes void __ovld write_imageh(read_write image3d_t, int4, half4); #endif void __ovld write_imageh(read_write image1d_array_t, int2, half4); void __ovld write_imageh(read_write image2d_array_t, int4, half4); void __ovld write_imageh(read_write image1d_buffer_t, int, half4); #endif //cl_khr_fp16 #endif //defined(__opencl_c_read_write_images) // Note: In OpenCL v1.0/1.1/1.2, image argument of image query builtin functions does not have // access qualifier, which by default assume read_only access qualifier. Image query builtin // functions with write_only image argument should also be declared. /** * Return the image width in pixels. * */ int __ovld __cnfn get_image_width(read_only image1d_t); int __ovld __cnfn get_image_width(read_only image1d_buffer_t); int __ovld __cnfn get_image_width(read_only image2d_t); int __ovld __cnfn get_image_width(read_only image3d_t); int __ovld __cnfn get_image_width(read_only image1d_array_t); int __ovld __cnfn get_image_width(read_only image2d_array_t); #ifdef cl_khr_depth_images int __ovld __cnfn get_image_width(read_only image2d_depth_t); int __ovld __cnfn get_image_width(read_only image2d_array_depth_t); #endif //cl_khr_depth_images #if defined(cl_khr_gl_msaa_sharing) int __ovld __cnfn get_image_width(read_only image2d_msaa_t); int __ovld __cnfn get_image_width(read_only image2d_msaa_depth_t); int __ovld __cnfn get_image_width(read_only image2d_array_msaa_t); int __ovld __cnfn get_image_width(read_only image2d_array_msaa_depth_t); #endif //cl_khr_gl_msaa_sharing int __ovld __cnfn get_image_width(write_only image1d_t); int __ovld __cnfn get_image_width(write_only image1d_buffer_t); int __ovld __cnfn get_image_width(write_only image2d_t); #ifdef cl_khr_3d_image_writes int __ovld __cnfn get_image_width(write_only image3d_t); #endif int __ovld __cnfn get_image_width(write_only image1d_array_t); int __ovld __cnfn get_image_width(write_only image2d_array_t); #ifdef cl_khr_depth_images int __ovld __cnfn get_image_width(write_only image2d_depth_t); int __ovld __cnfn get_image_width(write_only image2d_array_depth_t); #endif //cl_khr_depth_images #if defined(cl_khr_gl_msaa_sharing) int __ovld __cnfn get_image_width(write_only image2d_msaa_t); int __ovld __cnfn get_image_width(write_only image2d_msaa_depth_t); int __ovld __cnfn get_image_width(write_only image2d_array_msaa_t); int __ovld __cnfn get_image_width(write_only image2d_array_msaa_depth_t); #endif //cl_khr_gl_msaa_sharing #if defined(__opencl_c_read_write_images) int __ovld __cnfn get_image_width(read_write image1d_t); int __ovld __cnfn get_image_width(read_write image1d_buffer_t); int __ovld __cnfn get_image_width(read_write image2d_t); #ifdef cl_khr_3d_image_writes int __ovld __cnfn get_image_width(read_write image3d_t); #endif // cl_khr_3d_image_writes int __ovld __cnfn get_image_width(read_write image1d_array_t); int __ovld __cnfn get_image_width(read_write image2d_array_t); #ifdef cl_khr_depth_images int __ovld __cnfn get_image_width(read_write image2d_depth_t); int __ovld __cnfn get_image_width(read_write image2d_array_depth_t); #endif //cl_khr_depth_images #if defined(cl_khr_gl_msaa_sharing) int __ovld __cnfn get_image_width(read_write image2d_msaa_t); int __ovld __cnfn get_image_width(read_write image2d_msaa_depth_t); int __ovld __cnfn get_image_width(read_write image2d_array_msaa_t); int __ovld __cnfn get_image_width(read_write image2d_array_msaa_depth_t); #endif //cl_khr_gl_msaa_sharing #endif //defined(__opencl_c_read_write_images) /** * Return the image height in pixels. */ int __ovld __cnfn get_image_height(read_only image2d_t); int __ovld __cnfn get_image_height(read_only image3d_t); int __ovld __cnfn get_image_height(read_only image2d_array_t); #ifdef cl_khr_depth_images int __ovld __cnfn get_image_height(read_only image2d_depth_t); int __ovld __cnfn get_image_height(read_only image2d_array_depth_t); #endif //cl_khr_depth_images #if defined(cl_khr_gl_msaa_sharing) int __ovld __cnfn get_image_height(read_only image2d_msaa_t); int __ovld __cnfn get_image_height(read_only image2d_msaa_depth_t); int __ovld __cnfn get_image_height(read_only image2d_array_msaa_t); int __ovld __cnfn get_image_height(read_only image2d_array_msaa_depth_t); #endif //cl_khr_gl_msaa_sharing int __ovld __cnfn get_image_height(write_only image2d_t); #ifdef cl_khr_3d_image_writes int __ovld __cnfn get_image_height(write_only image3d_t); #endif int __ovld __cnfn get_image_height(write_only image2d_array_t); #ifdef cl_khr_depth_images int __ovld __cnfn get_image_height(write_only image2d_depth_t); int __ovld __cnfn get_image_height(write_only image2d_array_depth_t); #endif //cl_khr_depth_images #if defined(cl_khr_gl_msaa_sharing) int __ovld __cnfn get_image_height(write_only image2d_msaa_t); int __ovld __cnfn get_image_height(write_only image2d_msaa_depth_t); int __ovld __cnfn get_image_height(write_only image2d_array_msaa_t); int __ovld __cnfn get_image_height(write_only image2d_array_msaa_depth_t); #endif //cl_khr_gl_msaa_sharing #if defined(__opencl_c_read_write_images) int __ovld __cnfn get_image_height(read_write image2d_t); #ifdef cl_khr_3d_image_writes int __ovld __cnfn get_image_height(read_write image3d_t); #endif // cl_khr_3d_image_writes int __ovld __cnfn get_image_height(read_write image2d_array_t); #ifdef cl_khr_depth_images int __ovld __cnfn get_image_height(read_write image2d_depth_t); int __ovld __cnfn get_image_height(read_write image2d_array_depth_t); #endif //cl_khr_depth_images #if defined(cl_khr_gl_msaa_sharing) int __ovld __cnfn get_image_height(read_write image2d_msaa_t); int __ovld __cnfn get_image_height(read_write image2d_msaa_depth_t); int __ovld __cnfn get_image_height(read_write image2d_array_msaa_t); int __ovld __cnfn get_image_height(read_write image2d_array_msaa_depth_t); #endif //cl_khr_gl_msaa_sharing #endif //defined(__opencl_c_read_write_images) /** * Return the image depth in pixels. */ int __ovld __cnfn get_image_depth(read_only image3d_t); #ifdef cl_khr_3d_image_writes int __ovld __cnfn get_image_depth(write_only image3d_t); #if defined(__opencl_c_read_write_images) int __ovld __cnfn get_image_depth(read_write image3d_t); #endif //defined(__opencl_c_read_write_images) #endif // cl_khr_3d_image_writes // OpenCL Extension v2.0 s9.18 - Mipmaps #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) #ifdef cl_khr_mipmap_image /** * Return the image miplevels. */ int __ovld get_image_num_mip_levels(read_only image1d_t); int __ovld get_image_num_mip_levels(read_only image2d_t); int __ovld get_image_num_mip_levels(read_only image3d_t); int __ovld get_image_num_mip_levels(write_only image1d_t); int __ovld get_image_num_mip_levels(write_only image2d_t); #ifdef cl_khr_3d_image_writes int __ovld get_image_num_mip_levels(write_only image3d_t); #endif #if defined(__opencl_c_read_write_images) int __ovld get_image_num_mip_levels(read_write image1d_t); int __ovld get_image_num_mip_levels(read_write image2d_t); #ifdef cl_khr_3d_image_writes int __ovld get_image_num_mip_levels(read_write image3d_t); #endif // cl_khr_3d_image_writes #endif //defined(__opencl_c_read_write_images) int __ovld get_image_num_mip_levels(read_only image1d_array_t); int __ovld get_image_num_mip_levels(read_only image2d_array_t); #ifdef cl_khr_depth_images int __ovld get_image_num_mip_levels(read_only image2d_array_depth_t); int __ovld get_image_num_mip_levels(read_only image2d_depth_t); #endif // cl_khr_depth_images int __ovld get_image_num_mip_levels(write_only image1d_array_t); int __ovld get_image_num_mip_levels(write_only image2d_array_t); #ifdef cl_khr_depth_images int __ovld get_image_num_mip_levels(write_only image2d_array_depth_t); int __ovld get_image_num_mip_levels(write_only image2d_depth_t); #endif // cl_khr_depth_images #if defined(__opencl_c_read_write_images) int __ovld get_image_num_mip_levels(read_write image1d_array_t); int __ovld get_image_num_mip_levels(read_write image2d_array_t); #ifdef cl_khr_depth_images int __ovld get_image_num_mip_levels(read_write image2d_array_depth_t); int __ovld get_image_num_mip_levels(read_write image2d_depth_t); #endif // cl_khr_depth_images #endif //defined(__opencl_c_read_write_images) #endif //cl_khr_mipmap_image #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) /** * Return the channel data type. Valid values are: * CLK_SNORM_INT8 * CLK_SNORM_INT16 * CLK_UNORM_INT8 * CLK_UNORM_INT16 * CLK_UNORM_SHORT_565 * CLK_UNORM_SHORT_555 * CLK_UNORM_SHORT_101010 * CLK_SIGNED_INT8 * CLK_SIGNED_INT16 * CLK_SIGNED_INT32 * CLK_UNSIGNED_INT8 * CLK_UNSIGNED_INT16 * CLK_UNSIGNED_INT32 * CLK_HALF_FLOAT * CLK_FLOAT */ int __ovld __cnfn get_image_channel_data_type(read_only image1d_t); int __ovld __cnfn get_image_channel_data_type(read_only image1d_buffer_t); int __ovld __cnfn get_image_channel_data_type(read_only image2d_t); int __ovld __cnfn get_image_channel_data_type(read_only image3d_t); int __ovld __cnfn get_image_channel_data_type(read_only image1d_array_t); int __ovld __cnfn get_image_channel_data_type(read_only image2d_array_t); #ifdef cl_khr_depth_images int __ovld __cnfn get_image_channel_data_type(read_only image2d_depth_t); int __ovld __cnfn get_image_channel_data_type(read_only image2d_array_depth_t); #endif //cl_khr_depth_images #if defined(cl_khr_gl_msaa_sharing) int __ovld __cnfn get_image_channel_data_type(read_only image2d_msaa_t); int __ovld __cnfn get_image_channel_data_type(read_only image2d_msaa_depth_t); int __ovld __cnfn get_image_channel_data_type(read_only image2d_array_msaa_t); int __ovld __cnfn get_image_channel_data_type(read_only image2d_array_msaa_depth_t); #endif //cl_khr_gl_msaa_sharing int __ovld __cnfn get_image_channel_data_type(write_only image1d_t); int __ovld __cnfn get_image_channel_data_type(write_only image1d_buffer_t); int __ovld __cnfn get_image_channel_data_type(write_only image2d_t); #ifdef cl_khr_3d_image_writes int __ovld __cnfn get_image_channel_data_type(write_only image3d_t); #endif int __ovld __cnfn get_image_channel_data_type(write_only image1d_array_t); int __ovld __cnfn get_image_channel_data_type(write_only image2d_array_t); #ifdef cl_khr_depth_images int __ovld __cnfn get_image_channel_data_type(write_only image2d_depth_t); int __ovld __cnfn get_image_channel_data_type(write_only image2d_array_depth_t); #endif //cl_khr_depth_images #if defined(cl_khr_gl_msaa_sharing) int __ovld __cnfn get_image_channel_data_type(write_only image2d_msaa_t); int __ovld __cnfn get_image_channel_data_type(write_only image2d_msaa_depth_t); int __ovld __cnfn get_image_channel_data_type(write_only image2d_array_msaa_t); int __ovld __cnfn get_image_channel_data_type(write_only image2d_array_msaa_depth_t); #endif //cl_khr_gl_msaa_sharing #if defined(__opencl_c_read_write_images) int __ovld __cnfn get_image_channel_data_type(read_write image1d_t); int __ovld __cnfn get_image_channel_data_type(read_write image1d_buffer_t); int __ovld __cnfn get_image_channel_data_type(read_write image2d_t); #ifdef cl_khr_3d_image_writes int __ovld __cnfn get_image_channel_data_type(read_write image3d_t); #endif // cl_khr_3d_image_writes int __ovld __cnfn get_image_channel_data_type(read_write image1d_array_t); int __ovld __cnfn get_image_channel_data_type(read_write image2d_array_t); #ifdef cl_khr_depth_images int __ovld __cnfn get_image_channel_data_type(read_write image2d_depth_t); int __ovld __cnfn get_image_channel_data_type(read_write image2d_array_depth_t); #endif //cl_khr_depth_images #if defined(cl_khr_gl_msaa_sharing) int __ovld __cnfn get_image_channel_data_type(read_write image2d_msaa_t); int __ovld __cnfn get_image_channel_data_type(read_write image2d_msaa_depth_t); int __ovld __cnfn get_image_channel_data_type(read_write image2d_array_msaa_t); int __ovld __cnfn get_image_channel_data_type(read_write image2d_array_msaa_depth_t); #endif //cl_khr_gl_msaa_sharing #endif //defined(__opencl_c_read_write_images) /** * Return the image channel order. Valid values are: * CLK_A * CLK_R * CLK_Rx * CLK_RG * CLK_RGx * CLK_RA * CLK_RGB * CLK_RGBx * CLK_RGBA * CLK_ARGB * CLK_BGRA * CLK_INTENSITY * CLK_LUMINANCE */ int __ovld __cnfn get_image_channel_order(read_only image1d_t); int __ovld __cnfn get_image_channel_order(read_only image1d_buffer_t); int __ovld __cnfn get_image_channel_order(read_only image2d_t); int __ovld __cnfn get_image_channel_order(read_only image3d_t); int __ovld __cnfn get_image_channel_order(read_only image1d_array_t); int __ovld __cnfn get_image_channel_order(read_only image2d_array_t); #ifdef cl_khr_depth_images int __ovld __cnfn get_image_channel_order(read_only image2d_depth_t); int __ovld __cnfn get_image_channel_order(read_only image2d_array_depth_t); #endif //cl_khr_depth_images #if defined(cl_khr_gl_msaa_sharing) int __ovld __cnfn get_image_channel_order(read_only image2d_msaa_t); int __ovld __cnfn get_image_channel_order(read_only image2d_msaa_depth_t); int __ovld __cnfn get_image_channel_order(read_only image2d_array_msaa_t); int __ovld __cnfn get_image_channel_order(read_only image2d_array_msaa_depth_t); #endif //cl_khr_gl_msaa_sharing int __ovld __cnfn get_image_channel_order(write_only image1d_t); int __ovld __cnfn get_image_channel_order(write_only image1d_buffer_t); int __ovld __cnfn get_image_channel_order(write_only image2d_t); #ifdef cl_khr_3d_image_writes int __ovld __cnfn get_image_channel_order(write_only image3d_t); #endif int __ovld __cnfn get_image_channel_order(write_only image1d_array_t); int __ovld __cnfn get_image_channel_order(write_only image2d_array_t); #ifdef cl_khr_depth_images int __ovld __cnfn get_image_channel_order(write_only image2d_depth_t); int __ovld __cnfn get_image_channel_order(write_only image2d_array_depth_t); #endif //cl_khr_depth_images #if defined(cl_khr_gl_msaa_sharing) int __ovld __cnfn get_image_channel_order(write_only image2d_msaa_t); int __ovld __cnfn get_image_channel_order(write_only image2d_msaa_depth_t); int __ovld __cnfn get_image_channel_order(write_only image2d_array_msaa_t); int __ovld __cnfn get_image_channel_order(write_only image2d_array_msaa_depth_t); #endif //cl_khr_gl_msaa_sharing #if defined(__opencl_c_read_write_images) int __ovld __cnfn get_image_channel_order(read_write image1d_t); int __ovld __cnfn get_image_channel_order(read_write image1d_buffer_t); int __ovld __cnfn get_image_channel_order(read_write image2d_t); #ifdef cl_khr_3d_image_writes int __ovld __cnfn get_image_channel_order(read_write image3d_t); #endif // cl_khr_3d_image_writes int __ovld __cnfn get_image_channel_order(read_write image1d_array_t); int __ovld __cnfn get_image_channel_order(read_write image2d_array_t); #ifdef cl_khr_depth_images int __ovld __cnfn get_image_channel_order(read_write image2d_depth_t); int __ovld __cnfn get_image_channel_order(read_write image2d_array_depth_t); #endif //cl_khr_depth_images #if defined(cl_khr_gl_msaa_sharing) int __ovld __cnfn get_image_channel_order(read_write image2d_msaa_t); int __ovld __cnfn get_image_channel_order(read_write image2d_msaa_depth_t); int __ovld __cnfn get_image_channel_order(read_write image2d_array_msaa_t); int __ovld __cnfn get_image_channel_order(read_write image2d_array_msaa_depth_t); #endif //cl_khr_gl_msaa_sharing #endif //defined(__opencl_c_read_write_images) /** * Return the 2D image width and height as an int2 * type. The width is returned in the x component, and * the height in the y component. */ int2 __ovld __cnfn get_image_dim(read_only image2d_t); int2 __ovld __cnfn get_image_dim(read_only image2d_array_t); #ifdef cl_khr_depth_images int2 __ovld __cnfn get_image_dim(read_only image2d_array_depth_t); int2 __ovld __cnfn get_image_dim(read_only image2d_depth_t); #endif //cl_khr_depth_images #if defined(cl_khr_gl_msaa_sharing) int2 __ovld __cnfn get_image_dim(read_only image2d_msaa_t); int2 __ovld __cnfn get_image_dim(read_only image2d_msaa_depth_t); int2 __ovld __cnfn get_image_dim(read_only image2d_array_msaa_t); int2 __ovld __cnfn get_image_dim(read_only image2d_array_msaa_depth_t); #endif //cl_khr_gl_msaa_sharing int2 __ovld __cnfn get_image_dim(write_only image2d_t); int2 __ovld __cnfn get_image_dim(write_only image2d_array_t); #ifdef cl_khr_depth_images int2 __ovld __cnfn get_image_dim(write_only image2d_array_depth_t); int2 __ovld __cnfn get_image_dim(write_only image2d_depth_t); #endif //cl_khr_depth_images #if defined(cl_khr_gl_msaa_sharing) int2 __ovld __cnfn get_image_dim(write_only image2d_msaa_t); int2 __ovld __cnfn get_image_dim(write_only image2d_msaa_depth_t); int2 __ovld __cnfn get_image_dim(write_only image2d_array_msaa_t); int2 __ovld __cnfn get_image_dim(write_only image2d_array_msaa_depth_t); #endif //cl_khr_gl_msaa_sharing #if defined(__opencl_c_read_write_images) int2 __ovld __cnfn get_image_dim(read_write image2d_t); int2 __ovld __cnfn get_image_dim(read_write image2d_array_t); #ifdef cl_khr_depth_images int2 __ovld __cnfn get_image_dim(read_write image2d_array_depth_t); int2 __ovld __cnfn get_image_dim(read_write image2d_depth_t); #endif //cl_khr_depth_images #if defined(cl_khr_gl_msaa_sharing) int2 __ovld __cnfn get_image_dim(read_write image2d_msaa_t); int2 __ovld __cnfn get_image_dim(read_write image2d_msaa_depth_t); int2 __ovld __cnfn get_image_dim(read_write image2d_array_msaa_t); int2 __ovld __cnfn get_image_dim(read_write image2d_array_msaa_depth_t); #endif //cl_khr_gl_msaa_sharing #endif //defined(__opencl_c_read_write_images) /** * Return the 3D image width, height, and depth as an * int4 type. The width is returned in the x * component, height in the y component, depth in the z * component and the w component is 0. */ int4 __ovld __cnfn get_image_dim(read_only image3d_t); #ifdef cl_khr_3d_image_writes int4 __ovld __cnfn get_image_dim(write_only image3d_t); #if defined(__opencl_c_read_write_images) int4 __ovld __cnfn get_image_dim(read_write image3d_t); #endif //defined(__opencl_c_read_write_images) #endif // cl_khr_3d_image_writes /** * Return the image array size. */ size_t __ovld __cnfn get_image_array_size(read_only image1d_array_t); size_t __ovld __cnfn get_image_array_size(read_only image2d_array_t); #ifdef cl_khr_depth_images size_t __ovld __cnfn get_image_array_size(read_only image2d_array_depth_t); #endif //cl_khr_depth_images #if defined(cl_khr_gl_msaa_sharing) size_t __ovld __cnfn get_image_array_size(read_only image2d_array_msaa_t); size_t __ovld __cnfn get_image_array_size(read_only image2d_array_msaa_depth_t); #endif //cl_khr_gl_msaa_sharing size_t __ovld __cnfn get_image_array_size(write_only image1d_array_t); size_t __ovld __cnfn get_image_array_size(write_only image2d_array_t); #ifdef cl_khr_depth_images size_t __ovld __cnfn get_image_array_size(write_only image2d_array_depth_t); #endif //cl_khr_depth_images #if defined(cl_khr_gl_msaa_sharing) size_t __ovld __cnfn get_image_array_size(write_only image2d_array_msaa_t); size_t __ovld __cnfn get_image_array_size(write_only image2d_array_msaa_depth_t); #endif //cl_khr_gl_msaa_sharing #if defined(__opencl_c_read_write_images) size_t __ovld __cnfn get_image_array_size(read_write image1d_array_t); size_t __ovld __cnfn get_image_array_size(read_write image2d_array_t); #ifdef cl_khr_depth_images size_t __ovld __cnfn get_image_array_size(read_write image2d_array_depth_t); #endif //cl_khr_depth_images #if defined(cl_khr_gl_msaa_sharing) size_t __ovld __cnfn get_image_array_size(read_write image2d_array_msaa_t); size_t __ovld __cnfn get_image_array_size(read_write image2d_array_msaa_depth_t); #endif //cl_khr_gl_msaa_sharing #endif //defined(__opencl_c_read_write_images) /** * Return the number of samples associated with image */ #if defined(cl_khr_gl_msaa_sharing) int __ovld __cnfn get_image_num_samples(read_only image2d_msaa_t); int __ovld __cnfn get_image_num_samples(read_only image2d_msaa_depth_t); int __ovld __cnfn get_image_num_samples(read_only image2d_array_msaa_t); int __ovld __cnfn get_image_num_samples(read_only image2d_array_msaa_depth_t); int __ovld __cnfn get_image_num_samples(write_only image2d_msaa_t); int __ovld __cnfn get_image_num_samples(write_only image2d_msaa_depth_t); int __ovld __cnfn get_image_num_samples(write_only image2d_array_msaa_t); int __ovld __cnfn get_image_num_samples(write_only image2d_array_msaa_depth_t); #if defined(__opencl_c_read_write_images) int __ovld __cnfn get_image_num_samples(read_write image2d_msaa_t); int __ovld __cnfn get_image_num_samples(read_write image2d_msaa_depth_t); int __ovld __cnfn get_image_num_samples(read_write image2d_array_msaa_t); int __ovld __cnfn get_image_num_samples(read_write image2d_array_msaa_depth_t); #endif //defined(__opencl_c_read_write_images) #endif // OpenCL v2.0 s6.13.15 - Work-group Functions #if defined(__opencl_c_work_group_collective_functions) int __ovld __conv work_group_all(int predicate); int __ovld __conv work_group_any(int predicate); #ifdef cl_khr_fp16 half __ovld __conv work_group_broadcast(half, size_t local_id); half __ovld __conv work_group_broadcast(half, size_t, size_t); half __ovld __conv work_group_broadcast(half, size_t, size_t, size_t); #endif int __ovld __conv work_group_broadcast(int, size_t local_id); int __ovld __conv work_group_broadcast(int, size_t, size_t); int __ovld __conv work_group_broadcast(int, size_t, size_t, size_t); uint __ovld __conv work_group_broadcast(uint, size_t local_id); uint __ovld __conv work_group_broadcast(uint, size_t, size_t); uint __ovld __conv work_group_broadcast(uint, size_t, size_t, size_t); long __ovld __conv work_group_broadcast(long, size_t local_id); long __ovld __conv work_group_broadcast(long, size_t, size_t); long __ovld __conv work_group_broadcast(long, size_t, size_t, size_t); ulong __ovld __conv work_group_broadcast(ulong, size_t local_id); ulong __ovld __conv work_group_broadcast(ulong, size_t, size_t); ulong __ovld __conv work_group_broadcast(ulong, size_t, size_t, size_t); float __ovld __conv work_group_broadcast(float, size_t local_id); float __ovld __conv work_group_broadcast(float, size_t, size_t); float __ovld __conv work_group_broadcast(float, size_t, size_t, size_t); #ifdef cl_khr_fp64 double __ovld __conv work_group_broadcast(double, size_t local_id); double __ovld __conv work_group_broadcast(double, size_t, size_t); double __ovld __conv work_group_broadcast(double, size_t, size_t, size_t); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __conv work_group_reduce_add(half); half __ovld __conv work_group_reduce_min(half); half __ovld __conv work_group_reduce_max(half); half __ovld __conv work_group_scan_exclusive_add(half); half __ovld __conv work_group_scan_exclusive_min(half); half __ovld __conv work_group_scan_exclusive_max(half); half __ovld __conv work_group_scan_inclusive_add(half); half __ovld __conv work_group_scan_inclusive_min(half); half __ovld __conv work_group_scan_inclusive_max(half); #endif int __ovld __conv work_group_reduce_add(int); int __ovld __conv work_group_reduce_min(int); int __ovld __conv work_group_reduce_max(int); int __ovld __conv work_group_scan_exclusive_add(int); int __ovld __conv work_group_scan_exclusive_min(int); int __ovld __conv work_group_scan_exclusive_max(int); int __ovld __conv work_group_scan_inclusive_add(int); int __ovld __conv work_group_scan_inclusive_min(int); int __ovld __conv work_group_scan_inclusive_max(int); uint __ovld __conv work_group_reduce_add(uint); uint __ovld __conv work_group_reduce_min(uint); uint __ovld __conv work_group_reduce_max(uint); uint __ovld __conv work_group_scan_exclusive_add(uint); uint __ovld __conv work_group_scan_exclusive_min(uint); uint __ovld __conv work_group_scan_exclusive_max(uint); uint __ovld __conv work_group_scan_inclusive_add(uint); uint __ovld __conv work_group_scan_inclusive_min(uint); uint __ovld __conv work_group_scan_inclusive_max(uint); long __ovld __conv work_group_reduce_add(long); long __ovld __conv work_group_reduce_min(long); long __ovld __conv work_group_reduce_max(long); long __ovld __conv work_group_scan_exclusive_add(long); long __ovld __conv work_group_scan_exclusive_min(long); long __ovld __conv work_group_scan_exclusive_max(long); long __ovld __conv work_group_scan_inclusive_add(long); long __ovld __conv work_group_scan_inclusive_min(long); long __ovld __conv work_group_scan_inclusive_max(long); ulong __ovld __conv work_group_reduce_add(ulong); ulong __ovld __conv work_group_reduce_min(ulong); ulong __ovld __conv work_group_reduce_max(ulong); ulong __ovld __conv work_group_scan_exclusive_add(ulong); ulong __ovld __conv work_group_scan_exclusive_min(ulong); ulong __ovld __conv work_group_scan_exclusive_max(ulong); ulong __ovld __conv work_group_scan_inclusive_add(ulong); ulong __ovld __conv work_group_scan_inclusive_min(ulong); ulong __ovld __conv work_group_scan_inclusive_max(ulong); float __ovld __conv work_group_reduce_add(float); float __ovld __conv work_group_reduce_min(float); float __ovld __conv work_group_reduce_max(float); float __ovld __conv work_group_scan_exclusive_add(float); float __ovld __conv work_group_scan_exclusive_min(float); float __ovld __conv work_group_scan_exclusive_max(float); float __ovld __conv work_group_scan_inclusive_add(float); float __ovld __conv work_group_scan_inclusive_min(float); float __ovld __conv work_group_scan_inclusive_max(float); #ifdef cl_khr_fp64 double __ovld __conv work_group_reduce_add(double); double __ovld __conv work_group_reduce_min(double); double __ovld __conv work_group_reduce_max(double); double __ovld __conv work_group_scan_exclusive_add(double); double __ovld __conv work_group_scan_exclusive_min(double); double __ovld __conv work_group_scan_exclusive_max(double); double __ovld __conv work_group_scan_inclusive_add(double); double __ovld __conv work_group_scan_inclusive_min(double); double __ovld __conv work_group_scan_inclusive_max(double); #endif //cl_khr_fp64 #endif //defined(__opencl_c_work_group_collective_functions) // OpenCL v2.0 s6.13.16 - Pipe Functions #if defined(__opencl_c_pipes) bool __ovld is_valid_reserve_id(reserve_id_t reserve_id); #endif //defined(__opencl_c_pipes) // OpenCL v2.0 s6.13.17 - Enqueue Kernels #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) #ifdef __opencl_c_device_enqueue ndrange_t __ovld ndrange_1D(size_t); ndrange_t __ovld ndrange_1D(size_t, size_t); ndrange_t __ovld ndrange_1D(size_t, size_t, size_t); ndrange_t __ovld ndrange_2D(const size_t[2]); ndrange_t __ovld ndrange_2D(const size_t[2], const size_t[2]); ndrange_t __ovld ndrange_2D(const size_t[2], const size_t[2], const size_t[2]); ndrange_t __ovld ndrange_3D(const size_t[3]); ndrange_t __ovld ndrange_3D(const size_t[3], const size_t[3]); ndrange_t __ovld ndrange_3D(const size_t[3], const size_t[3], const size_t[3]); int __ovld enqueue_marker(queue_t, uint, const clk_event_t*, clk_event_t*); void __ovld retain_event(clk_event_t); void __ovld release_event(clk_event_t); clk_event_t __ovld create_user_event(void); void __ovld set_user_event_status(clk_event_t e, int state); bool __ovld is_valid_event (clk_event_t event); void __ovld capture_event_profiling_info(clk_event_t, clk_profiling_info, __global void*); queue_t __ovld get_default_queue(void); #endif //__opencl_c_device_enqueue #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) // OpenCL Extension v2.0 s9.17 - Sub-groups #if defined(__opencl_subgroup_builtins) // Shared Sub Group Functions uint __ovld get_sub_group_size(void); uint __ovld get_max_sub_group_size(void); uint __ovld get_num_sub_groups(void); #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) uint __ovld get_enqueued_num_sub_groups(void); #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) uint __ovld get_sub_group_id(void); uint __ovld get_sub_group_local_id(void); void __ovld __conv sub_group_barrier(cl_mem_fence_flags); #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) void __ovld __conv sub_group_barrier(cl_mem_fence_flags, memory_scope); #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) int __ovld __conv sub_group_all(int predicate); int __ovld __conv sub_group_any(int predicate); int __ovld __conv sub_group_broadcast(int , uint sub_group_local_id); uint __ovld __conv sub_group_broadcast(uint , uint sub_group_local_id); long __ovld __conv sub_group_broadcast(long , uint sub_group_local_id); ulong __ovld __conv sub_group_broadcast(ulong, uint sub_group_local_id); float __ovld __conv sub_group_broadcast(float, uint sub_group_local_id); int __ovld __conv sub_group_reduce_add(int ); uint __ovld __conv sub_group_reduce_add(uint ); long __ovld __conv sub_group_reduce_add(long ); ulong __ovld __conv sub_group_reduce_add(ulong); float __ovld __conv sub_group_reduce_add(float); int __ovld __conv sub_group_reduce_min(int ); uint __ovld __conv sub_group_reduce_min(uint ); long __ovld __conv sub_group_reduce_min(long ); ulong __ovld __conv sub_group_reduce_min(ulong); float __ovld __conv sub_group_reduce_min(float); int __ovld __conv sub_group_reduce_max(int ); uint __ovld __conv sub_group_reduce_max(uint ); long __ovld __conv sub_group_reduce_max(long ); ulong __ovld __conv sub_group_reduce_max(ulong); float __ovld __conv sub_group_reduce_max(float); int __ovld __conv sub_group_scan_exclusive_add(int ); uint __ovld __conv sub_group_scan_exclusive_add(uint ); long __ovld __conv sub_group_scan_exclusive_add(long ); ulong __ovld __conv sub_group_scan_exclusive_add(ulong); float __ovld __conv sub_group_scan_exclusive_add(float); int __ovld __conv sub_group_scan_exclusive_min(int ); uint __ovld __conv sub_group_scan_exclusive_min(uint ); long __ovld __conv sub_group_scan_exclusive_min(long ); ulong __ovld __conv sub_group_scan_exclusive_min(ulong); float __ovld __conv sub_group_scan_exclusive_min(float); int __ovld __conv sub_group_scan_exclusive_max(int ); uint __ovld __conv sub_group_scan_exclusive_max(uint ); long __ovld __conv sub_group_scan_exclusive_max(long ); ulong __ovld __conv sub_group_scan_exclusive_max(ulong); float __ovld __conv sub_group_scan_exclusive_max(float); int __ovld __conv sub_group_scan_inclusive_add(int ); uint __ovld __conv sub_group_scan_inclusive_add(uint ); long __ovld __conv sub_group_scan_inclusive_add(long ); ulong __ovld __conv sub_group_scan_inclusive_add(ulong); float __ovld __conv sub_group_scan_inclusive_add(float); int __ovld __conv sub_group_scan_inclusive_min(int ); uint __ovld __conv sub_group_scan_inclusive_min(uint ); long __ovld __conv sub_group_scan_inclusive_min(long ); ulong __ovld __conv sub_group_scan_inclusive_min(ulong); float __ovld __conv sub_group_scan_inclusive_min(float); int __ovld __conv sub_group_scan_inclusive_max(int ); uint __ovld __conv sub_group_scan_inclusive_max(uint ); long __ovld __conv sub_group_scan_inclusive_max(long ); ulong __ovld __conv sub_group_scan_inclusive_max(ulong); float __ovld __conv sub_group_scan_inclusive_max(float); #ifdef cl_khr_fp16 half __ovld __conv sub_group_broadcast(half, uint sub_group_local_id); half __ovld __conv sub_group_reduce_add(half); half __ovld __conv sub_group_reduce_min(half); half __ovld __conv sub_group_reduce_max(half); half __ovld __conv sub_group_scan_exclusive_add(half); half __ovld __conv sub_group_scan_exclusive_min(half); half __ovld __conv sub_group_scan_exclusive_max(half); half __ovld __conv sub_group_scan_inclusive_add(half); half __ovld __conv sub_group_scan_inclusive_min(half); half __ovld __conv sub_group_scan_inclusive_max(half); #endif //cl_khr_fp16 #ifdef cl_khr_fp64 double __ovld __conv sub_group_broadcast(double, uint sub_group_local_id); double __ovld __conv sub_group_reduce_add(double); double __ovld __conv sub_group_reduce_min(double); double __ovld __conv sub_group_reduce_max(double); double __ovld __conv sub_group_scan_exclusive_add(double); double __ovld __conv sub_group_scan_exclusive_min(double); double __ovld __conv sub_group_scan_exclusive_max(double); double __ovld __conv sub_group_scan_inclusive_add(double); double __ovld __conv sub_group_scan_inclusive_min(double); double __ovld __conv sub_group_scan_inclusive_max(double); #endif //cl_khr_fp64 #endif // __opencl_subgroup_builtins #if defined(cl_khr_subgroup_extended_types) char __ovld __conv sub_group_broadcast( char value, uint index ); char2 __ovld __conv sub_group_broadcast( char2 value, uint index ); char3 __ovld __conv sub_group_broadcast( char3 value, uint index ); char4 __ovld __conv sub_group_broadcast( char4 value, uint index ); char8 __ovld __conv sub_group_broadcast( char8 value, uint index ); char16 __ovld __conv sub_group_broadcast( char16 value, uint index ); uchar __ovld __conv sub_group_broadcast( uchar value, uint index ); uchar2 __ovld __conv sub_group_broadcast( uchar2 value, uint index ); uchar3 __ovld __conv sub_group_broadcast( uchar3 value, uint index ); uchar4 __ovld __conv sub_group_broadcast( uchar4 value, uint index ); uchar8 __ovld __conv sub_group_broadcast( uchar8 value, uint index ); uchar16 __ovld __conv sub_group_broadcast( uchar16 value, uint index ); short __ovld __conv sub_group_broadcast( short value, uint index ); short2 __ovld __conv sub_group_broadcast( short2 value, uint index ); short3 __ovld __conv sub_group_broadcast( short3 value, uint index ); short4 __ovld __conv sub_group_broadcast( short4 value, uint index ); short8 __ovld __conv sub_group_broadcast( short8 value, uint index ); short16 __ovld __conv sub_group_broadcast( short16 value, uint index ); ushort __ovld __conv sub_group_broadcast( ushort value, uint index ); ushort2 __ovld __conv sub_group_broadcast( ushort2 value, uint index ); ushort3 __ovld __conv sub_group_broadcast( ushort3 value, uint index ); ushort4 __ovld __conv sub_group_broadcast( ushort4 value, uint index ); ushort8 __ovld __conv sub_group_broadcast( ushort8 value, uint index ); ushort16 __ovld __conv sub_group_broadcast( ushort16 value, uint index ); // scalar int broadcast is part of cl_khr_subgroups int2 __ovld __conv sub_group_broadcast( int2 value, uint index ); int3 __ovld __conv sub_group_broadcast( int3 value, uint index ); int4 __ovld __conv sub_group_broadcast( int4 value, uint index ); int8 __ovld __conv sub_group_broadcast( int8 value, uint index ); int16 __ovld __conv sub_group_broadcast( int16 value, uint index ); // scalar uint broadcast is part of cl_khr_subgroups uint2 __ovld __conv sub_group_broadcast( uint2 value, uint index ); uint3 __ovld __conv sub_group_broadcast( uint3 value, uint index ); uint4 __ovld __conv sub_group_broadcast( uint4 value, uint index ); uint8 __ovld __conv sub_group_broadcast( uint8 value, uint index ); uint16 __ovld __conv sub_group_broadcast( uint16 value, uint index ); // scalar long broadcast is part of cl_khr_subgroups long2 __ovld __conv sub_group_broadcast( long2 value, uint index ); long3 __ovld __conv sub_group_broadcast( long3 value, uint index ); long4 __ovld __conv sub_group_broadcast( long4 value, uint index ); long8 __ovld __conv sub_group_broadcast( long8 value, uint index ); long16 __ovld __conv sub_group_broadcast( long16 value, uint index ); // scalar ulong broadcast is part of cl_khr_subgroups ulong2 __ovld __conv sub_group_broadcast( ulong2 value, uint index ); ulong3 __ovld __conv sub_group_broadcast( ulong3 value, uint index ); ulong4 __ovld __conv sub_group_broadcast( ulong4 value, uint index ); ulong8 __ovld __conv sub_group_broadcast( ulong8 value, uint index ); ulong16 __ovld __conv sub_group_broadcast( ulong16 value, uint index ); // scalar float broadcast is part of cl_khr_subgroups float2 __ovld __conv sub_group_broadcast( float2 value, uint index ); float3 __ovld __conv sub_group_broadcast( float3 value, uint index ); float4 __ovld __conv sub_group_broadcast( float4 value, uint index ); float8 __ovld __conv sub_group_broadcast( float8 value, uint index ); float16 __ovld __conv sub_group_broadcast( float16 value, uint index ); char __ovld __conv sub_group_reduce_add( char value ); uchar __ovld __conv sub_group_reduce_add( uchar value ); short __ovld __conv sub_group_reduce_add( short value ); ushort __ovld __conv sub_group_reduce_add( ushort value ); char __ovld __conv sub_group_reduce_min( char value ); uchar __ovld __conv sub_group_reduce_min( uchar value ); short __ovld __conv sub_group_reduce_min( short value ); ushort __ovld __conv sub_group_reduce_min( ushort value ); char __ovld __conv sub_group_reduce_max( char value ); uchar __ovld __conv sub_group_reduce_max( uchar value ); short __ovld __conv sub_group_reduce_max( short value ); ushort __ovld __conv sub_group_reduce_max( ushort value ); char __ovld __conv sub_group_scan_inclusive_add( char value ); uchar __ovld __conv sub_group_scan_inclusive_add( uchar value ); short __ovld __conv sub_group_scan_inclusive_add( short value ); ushort __ovld __conv sub_group_scan_inclusive_add( ushort value ); char __ovld __conv sub_group_scan_inclusive_min( char value ); uchar __ovld __conv sub_group_scan_inclusive_min( uchar value ); short __ovld __conv sub_group_scan_inclusive_min( short value ); ushort __ovld __conv sub_group_scan_inclusive_min( ushort value ); char __ovld __conv sub_group_scan_inclusive_max( char value ); uchar __ovld __conv sub_group_scan_inclusive_max( uchar value ); short __ovld __conv sub_group_scan_inclusive_max( short value ); ushort __ovld __conv sub_group_scan_inclusive_max( ushort value ); char __ovld __conv sub_group_scan_exclusive_add( char value ); uchar __ovld __conv sub_group_scan_exclusive_add( uchar value ); short __ovld __conv sub_group_scan_exclusive_add( short value ); ushort __ovld __conv sub_group_scan_exclusive_add( ushort value ); char __ovld __conv sub_group_scan_exclusive_min( char value ); uchar __ovld __conv sub_group_scan_exclusive_min( uchar value ); short __ovld __conv sub_group_scan_exclusive_min( short value ); ushort __ovld __conv sub_group_scan_exclusive_min( ushort value ); char __ovld __conv sub_group_scan_exclusive_max( char value ); uchar __ovld __conv sub_group_scan_exclusive_max( uchar value ); short __ovld __conv sub_group_scan_exclusive_max( short value ); ushort __ovld __conv sub_group_scan_exclusive_max( ushort value ); #if defined(cl_khr_fp16) // scalar half broadcast is part of cl_khr_subgroups half2 __ovld __conv sub_group_broadcast( half2 value, uint index ); half3 __ovld __conv sub_group_broadcast( half3 value, uint index ); half4 __ovld __conv sub_group_broadcast( half4 value, uint index ); half8 __ovld __conv sub_group_broadcast( half8 value, uint index ); half16 __ovld __conv sub_group_broadcast( half16 value, uint index ); #endif // cl_khr_fp16 #if defined(cl_khr_fp64) // scalar double broadcast is part of cl_khr_subgroups double2 __ovld __conv sub_group_broadcast( double2 value, uint index ); double3 __ovld __conv sub_group_broadcast( double3 value, uint index ); double4 __ovld __conv sub_group_broadcast( double4 value, uint index ); double8 __ovld __conv sub_group_broadcast( double8 value, uint index ); double16 __ovld __conv sub_group_broadcast( double16 value, uint index ); #endif // cl_khr_fp64 #endif // cl_khr_subgroup_extended_types #if defined(cl_khr_subgroup_non_uniform_vote) int __ovld sub_group_elect(void); int __ovld sub_group_non_uniform_all( int predicate ); int __ovld sub_group_non_uniform_any( int predicate ); int __ovld sub_group_non_uniform_all_equal( char value ); int __ovld sub_group_non_uniform_all_equal( uchar value ); int __ovld sub_group_non_uniform_all_equal( short value ); int __ovld sub_group_non_uniform_all_equal( ushort value ); int __ovld sub_group_non_uniform_all_equal( int value ); int __ovld sub_group_non_uniform_all_equal( uint value ); int __ovld sub_group_non_uniform_all_equal( long value ); int __ovld sub_group_non_uniform_all_equal( ulong value ); int __ovld sub_group_non_uniform_all_equal( float value ); #if defined(cl_khr_fp16) int __ovld sub_group_non_uniform_all_equal( half value ); #endif // cl_khr_fp16 #if defined(cl_khr_fp64) int __ovld sub_group_non_uniform_all_equal( double value ); #endif // cl_khr_fp64 #endif // cl_khr_subgroup_non_uniform_vote #if defined(cl_khr_subgroup_ballot) char __ovld sub_group_non_uniform_broadcast( char value, uint index ); char2 __ovld sub_group_non_uniform_broadcast( char2 value, uint index ); char3 __ovld sub_group_non_uniform_broadcast( char3 value, uint index ); char4 __ovld sub_group_non_uniform_broadcast( char4 value, uint index ); char8 __ovld sub_group_non_uniform_broadcast( char8 value, uint index ); char16 __ovld sub_group_non_uniform_broadcast( char16 value, uint index ); uchar __ovld sub_group_non_uniform_broadcast( uchar value, uint index ); uchar2 __ovld sub_group_non_uniform_broadcast( uchar2 value, uint index ); uchar3 __ovld sub_group_non_uniform_broadcast( uchar3 value, uint index ); uchar4 __ovld sub_group_non_uniform_broadcast( uchar4 value, uint index ); uchar8 __ovld sub_group_non_uniform_broadcast( uchar8 value, uint index ); uchar16 __ovld sub_group_non_uniform_broadcast( uchar16 value, uint index ); short __ovld sub_group_non_uniform_broadcast( short value, uint index ); short2 __ovld sub_group_non_uniform_broadcast( short2 value, uint index ); short3 __ovld sub_group_non_uniform_broadcast( short3 value, uint index ); short4 __ovld sub_group_non_uniform_broadcast( short4 value, uint index ); short8 __ovld sub_group_non_uniform_broadcast( short8 value, uint index ); short16 __ovld sub_group_non_uniform_broadcast( short16 value, uint index ); ushort __ovld sub_group_non_uniform_broadcast( ushort value, uint index ); ushort2 __ovld sub_group_non_uniform_broadcast( ushort2 value, uint index ); ushort3 __ovld sub_group_non_uniform_broadcast( ushort3 value, uint index ); ushort4 __ovld sub_group_non_uniform_broadcast( ushort4 value, uint index ); ushort8 __ovld sub_group_non_uniform_broadcast( ushort8 value, uint index ); ushort16 __ovld sub_group_non_uniform_broadcast( ushort16 value, uint index ); int __ovld sub_group_non_uniform_broadcast( int value, uint index ); int2 __ovld sub_group_non_uniform_broadcast( int2 value, uint index ); int3 __ovld sub_group_non_uniform_broadcast( int3 value, uint index ); int4 __ovld sub_group_non_uniform_broadcast( int4 value, uint index ); int8 __ovld sub_group_non_uniform_broadcast( int8 value, uint index ); int16 __ovld sub_group_non_uniform_broadcast( int16 value, uint index ); uint __ovld sub_group_non_uniform_broadcast( uint value, uint index ); uint2 __ovld sub_group_non_uniform_broadcast( uint2 value, uint index ); uint3 __ovld sub_group_non_uniform_broadcast( uint3 value, uint index ); uint4 __ovld sub_group_non_uniform_broadcast( uint4 value, uint index ); uint8 __ovld sub_group_non_uniform_broadcast( uint8 value, uint index ); uint16 __ovld sub_group_non_uniform_broadcast( uint16 value, uint index ); long __ovld sub_group_non_uniform_broadcast( long value, uint index ); long2 __ovld sub_group_non_uniform_broadcast( long2 value, uint index ); long3 __ovld sub_group_non_uniform_broadcast( long3 value, uint index ); long4 __ovld sub_group_non_uniform_broadcast( long4 value, uint index ); long8 __ovld sub_group_non_uniform_broadcast( long8 value, uint index ); long16 __ovld sub_group_non_uniform_broadcast( long16 value, uint index ); ulong __ovld sub_group_non_uniform_broadcast( ulong value, uint index ); ulong2 __ovld sub_group_non_uniform_broadcast( ulong2 value, uint index ); ulong3 __ovld sub_group_non_uniform_broadcast( ulong3 value, uint index ); ulong4 __ovld sub_group_non_uniform_broadcast( ulong4 value, uint index ); ulong8 __ovld sub_group_non_uniform_broadcast( ulong8 value, uint index ); ulong16 __ovld sub_group_non_uniform_broadcast( ulong16 value, uint index ); float __ovld sub_group_non_uniform_broadcast( float value, uint index ); float2 __ovld sub_group_non_uniform_broadcast( float2 value, uint index ); float3 __ovld sub_group_non_uniform_broadcast( float3 value, uint index ); float4 __ovld sub_group_non_uniform_broadcast( float4 value, uint index ); float8 __ovld sub_group_non_uniform_broadcast( float8 value, uint index ); float16 __ovld sub_group_non_uniform_broadcast( float16 value, uint index ); char __ovld sub_group_broadcast_first( char value ); uchar __ovld sub_group_broadcast_first( uchar value ); short __ovld sub_group_broadcast_first( short value ); ushort __ovld sub_group_broadcast_first( ushort value ); int __ovld sub_group_broadcast_first( int value ); uint __ovld sub_group_broadcast_first( uint value ); long __ovld sub_group_broadcast_first( long value ); ulong __ovld sub_group_broadcast_first( ulong value ); float __ovld sub_group_broadcast_first( float value ); uint4 __ovld sub_group_ballot( int predicate ); int __ovld __cnfn sub_group_inverse_ballot( uint4 value ); int __ovld __cnfn sub_group_ballot_bit_extract( uint4 value, uint index ); uint __ovld __cnfn sub_group_ballot_bit_count( uint4 value ); uint __ovld sub_group_ballot_inclusive_scan( uint4 value ); uint __ovld sub_group_ballot_exclusive_scan( uint4 value ); uint __ovld sub_group_ballot_find_lsb( uint4 value ); uint __ovld sub_group_ballot_find_msb( uint4 value ); uint4 __ovld __cnfn get_sub_group_eq_mask(void); uint4 __ovld __cnfn get_sub_group_ge_mask(void); uint4 __ovld __cnfn get_sub_group_gt_mask(void); uint4 __ovld __cnfn get_sub_group_le_mask(void); uint4 __ovld __cnfn get_sub_group_lt_mask(void); #if defined(cl_khr_fp16) half __ovld sub_group_non_uniform_broadcast( half value, uint index ); half2 __ovld sub_group_non_uniform_broadcast( half2 value, uint index ); half3 __ovld sub_group_non_uniform_broadcast( half3 value, uint index ); half4 __ovld sub_group_non_uniform_broadcast( half4 value, uint index ); half8 __ovld sub_group_non_uniform_broadcast( half8 value, uint index ); half16 __ovld sub_group_non_uniform_broadcast( half16 value, uint index ); half __ovld sub_group_broadcast_first( half value ); #endif // cl_khr_fp16 #if defined(cl_khr_fp64) double __ovld sub_group_non_uniform_broadcast( double value, uint index ); double2 __ovld sub_group_non_uniform_broadcast( double2 value, uint index ); double3 __ovld sub_group_non_uniform_broadcast( double3 value, uint index ); double4 __ovld sub_group_non_uniform_broadcast( double4 value, uint index ); double8 __ovld sub_group_non_uniform_broadcast( double8 value, uint index ); double16 __ovld sub_group_non_uniform_broadcast( double16 value, uint index ); double __ovld sub_group_broadcast_first( double value ); #endif // cl_khr_fp64 #endif // cl_khr_subgroup_ballot #if defined(cl_khr_subgroup_non_uniform_arithmetic) char __ovld sub_group_non_uniform_reduce_add( char value ); uchar __ovld sub_group_non_uniform_reduce_add( uchar value ); short __ovld sub_group_non_uniform_reduce_add( short value ); ushort __ovld sub_group_non_uniform_reduce_add( ushort value ); int __ovld sub_group_non_uniform_reduce_add( int value ); uint __ovld sub_group_non_uniform_reduce_add( uint value ); long __ovld sub_group_non_uniform_reduce_add( long value ); ulong __ovld sub_group_non_uniform_reduce_add( ulong value ); float __ovld sub_group_non_uniform_reduce_add( float value ); char __ovld sub_group_non_uniform_reduce_mul( char value ); uchar __ovld sub_group_non_uniform_reduce_mul( uchar value ); short __ovld sub_group_non_uniform_reduce_mul( short value ); ushort __ovld sub_group_non_uniform_reduce_mul( ushort value ); int __ovld sub_group_non_uniform_reduce_mul( int value ); uint __ovld sub_group_non_uniform_reduce_mul( uint value ); long __ovld sub_group_non_uniform_reduce_mul( long value ); ulong __ovld sub_group_non_uniform_reduce_mul( ulong value ); float __ovld sub_group_non_uniform_reduce_mul( float value ); char __ovld sub_group_non_uniform_reduce_min( char value ); uchar __ovld sub_group_non_uniform_reduce_min( uchar value ); short __ovld sub_group_non_uniform_reduce_min( short value ); ushort __ovld sub_group_non_uniform_reduce_min( ushort value ); int __ovld sub_group_non_uniform_reduce_min( int value ); uint __ovld sub_group_non_uniform_reduce_min( uint value ); long __ovld sub_group_non_uniform_reduce_min( long value ); ulong __ovld sub_group_non_uniform_reduce_min( ulong value ); float __ovld sub_group_non_uniform_reduce_min( float value ); char __ovld sub_group_non_uniform_reduce_max( char value ); uchar __ovld sub_group_non_uniform_reduce_max( uchar value ); short __ovld sub_group_non_uniform_reduce_max( short value ); ushort __ovld sub_group_non_uniform_reduce_max( ushort value ); int __ovld sub_group_non_uniform_reduce_max( int value ); uint __ovld sub_group_non_uniform_reduce_max( uint value ); long __ovld sub_group_non_uniform_reduce_max( long value ); ulong __ovld sub_group_non_uniform_reduce_max( ulong value ); float __ovld sub_group_non_uniform_reduce_max( float value ); char __ovld sub_group_non_uniform_scan_inclusive_add( char value ); uchar __ovld sub_group_non_uniform_scan_inclusive_add( uchar value ); short __ovld sub_group_non_uniform_scan_inclusive_add( short value ); ushort __ovld sub_group_non_uniform_scan_inclusive_add( ushort value ); int __ovld sub_group_non_uniform_scan_inclusive_add( int value ); uint __ovld sub_group_non_uniform_scan_inclusive_add( uint value ); long __ovld sub_group_non_uniform_scan_inclusive_add( long value ); ulong __ovld sub_group_non_uniform_scan_inclusive_add( ulong value ); float __ovld sub_group_non_uniform_scan_inclusive_add( float value ); char __ovld sub_group_non_uniform_scan_inclusive_mul( char value ); uchar __ovld sub_group_non_uniform_scan_inclusive_mul( uchar value ); short __ovld sub_group_non_uniform_scan_inclusive_mul( short value ); ushort __ovld sub_group_non_uniform_scan_inclusive_mul( ushort value ); int __ovld sub_group_non_uniform_scan_inclusive_mul( int value ); uint __ovld sub_group_non_uniform_scan_inclusive_mul( uint value ); long __ovld sub_group_non_uniform_scan_inclusive_mul( long value ); ulong __ovld sub_group_non_uniform_scan_inclusive_mul( ulong value ); float __ovld sub_group_non_uniform_scan_inclusive_mul( float value ); char __ovld sub_group_non_uniform_scan_inclusive_min( char value ); uchar __ovld sub_group_non_uniform_scan_inclusive_min( uchar value ); short __ovld sub_group_non_uniform_scan_inclusive_min( short value ); ushort __ovld sub_group_non_uniform_scan_inclusive_min( ushort value ); int __ovld sub_group_non_uniform_scan_inclusive_min( int value ); uint __ovld sub_group_non_uniform_scan_inclusive_min( uint value ); long __ovld sub_group_non_uniform_scan_inclusive_min( long value ); ulong __ovld sub_group_non_uniform_scan_inclusive_min( ulong value ); float __ovld sub_group_non_uniform_scan_inclusive_min( float value ); char __ovld sub_group_non_uniform_scan_inclusive_max( char value ); uchar __ovld sub_group_non_uniform_scan_inclusive_max( uchar value ); short __ovld sub_group_non_uniform_scan_inclusive_max( short value ); ushort __ovld sub_group_non_uniform_scan_inclusive_max( ushort value ); int __ovld sub_group_non_uniform_scan_inclusive_max( int value ); uint __ovld sub_group_non_uniform_scan_inclusive_max( uint value ); long __ovld sub_group_non_uniform_scan_inclusive_max( long value ); ulong __ovld sub_group_non_uniform_scan_inclusive_max( ulong value ); float __ovld sub_group_non_uniform_scan_inclusive_max( float value ); char __ovld sub_group_non_uniform_scan_exclusive_add( char value ); uchar __ovld sub_group_non_uniform_scan_exclusive_add( uchar value ); short __ovld sub_group_non_uniform_scan_exclusive_add( short value ); ushort __ovld sub_group_non_uniform_scan_exclusive_add( ushort value ); int __ovld sub_group_non_uniform_scan_exclusive_add( int value ); uint __ovld sub_group_non_uniform_scan_exclusive_add( uint value ); long __ovld sub_group_non_uniform_scan_exclusive_add( long value ); ulong __ovld sub_group_non_uniform_scan_exclusive_add( ulong value ); float __ovld sub_group_non_uniform_scan_exclusive_add( float value ); char __ovld sub_group_non_uniform_scan_exclusive_mul( char value ); uchar __ovld sub_group_non_uniform_scan_exclusive_mul( uchar value ); short __ovld sub_group_non_uniform_scan_exclusive_mul( short value ); ushort __ovld sub_group_non_uniform_scan_exclusive_mul( ushort value ); int __ovld sub_group_non_uniform_scan_exclusive_mul( int value ); uint __ovld sub_group_non_uniform_scan_exclusive_mul( uint value ); long __ovld sub_group_non_uniform_scan_exclusive_mul( long value ); ulong __ovld sub_group_non_uniform_scan_exclusive_mul( ulong value ); float __ovld sub_group_non_uniform_scan_exclusive_mul( float value ); char __ovld sub_group_non_uniform_scan_exclusive_min( char value ); uchar __ovld sub_group_non_uniform_scan_exclusive_min( uchar value ); short __ovld sub_group_non_uniform_scan_exclusive_min( short value ); ushort __ovld sub_group_non_uniform_scan_exclusive_min( ushort value ); int __ovld sub_group_non_uniform_scan_exclusive_min( int value ); uint __ovld sub_group_non_uniform_scan_exclusive_min( uint value ); long __ovld sub_group_non_uniform_scan_exclusive_min( long value ); ulong __ovld sub_group_non_uniform_scan_exclusive_min( ulong value ); float __ovld sub_group_non_uniform_scan_exclusive_min( float value ); char __ovld sub_group_non_uniform_scan_exclusive_max( char value ); uchar __ovld sub_group_non_uniform_scan_exclusive_max( uchar value ); short __ovld sub_group_non_uniform_scan_exclusive_max( short value ); ushort __ovld sub_group_non_uniform_scan_exclusive_max( ushort value ); int __ovld sub_group_non_uniform_scan_exclusive_max( int value ); uint __ovld sub_group_non_uniform_scan_exclusive_max( uint value ); long __ovld sub_group_non_uniform_scan_exclusive_max( long value ); ulong __ovld sub_group_non_uniform_scan_exclusive_max( ulong value ); float __ovld sub_group_non_uniform_scan_exclusive_max( float value ); char __ovld sub_group_non_uniform_reduce_and( char value ); uchar __ovld sub_group_non_uniform_reduce_and( uchar value ); short __ovld sub_group_non_uniform_reduce_and( short value ); ushort __ovld sub_group_non_uniform_reduce_and( ushort value ); int __ovld sub_group_non_uniform_reduce_and( int value ); uint __ovld sub_group_non_uniform_reduce_and( uint value ); long __ovld sub_group_non_uniform_reduce_and( long value ); ulong __ovld sub_group_non_uniform_reduce_and( ulong value ); char __ovld sub_group_non_uniform_reduce_or( char value ); uchar __ovld sub_group_non_uniform_reduce_or( uchar value ); short __ovld sub_group_non_uniform_reduce_or( short value ); ushort __ovld sub_group_non_uniform_reduce_or( ushort value ); int __ovld sub_group_non_uniform_reduce_or( int value ); uint __ovld sub_group_non_uniform_reduce_or( uint value ); long __ovld sub_group_non_uniform_reduce_or( long value ); ulong __ovld sub_group_non_uniform_reduce_or( ulong value ); char __ovld sub_group_non_uniform_reduce_xor( char value ); uchar __ovld sub_group_non_uniform_reduce_xor( uchar value ); short __ovld sub_group_non_uniform_reduce_xor( short value ); ushort __ovld sub_group_non_uniform_reduce_xor( ushort value ); int __ovld sub_group_non_uniform_reduce_xor( int value ); uint __ovld sub_group_non_uniform_reduce_xor( uint value ); long __ovld sub_group_non_uniform_reduce_xor( long value ); ulong __ovld sub_group_non_uniform_reduce_xor( ulong value ); char __ovld sub_group_non_uniform_scan_inclusive_and( char value ); uchar __ovld sub_group_non_uniform_scan_inclusive_and( uchar value ); short __ovld sub_group_non_uniform_scan_inclusive_and( short value ); ushort __ovld sub_group_non_uniform_scan_inclusive_and( ushort value ); int __ovld sub_group_non_uniform_scan_inclusive_and( int value ); uint __ovld sub_group_non_uniform_scan_inclusive_and( uint value ); long __ovld sub_group_non_uniform_scan_inclusive_and( long value ); ulong __ovld sub_group_non_uniform_scan_inclusive_and( ulong value ); char __ovld sub_group_non_uniform_scan_inclusive_or( char value ); uchar __ovld sub_group_non_uniform_scan_inclusive_or( uchar value ); short __ovld sub_group_non_uniform_scan_inclusive_or( short value ); ushort __ovld sub_group_non_uniform_scan_inclusive_or( ushort value ); int __ovld sub_group_non_uniform_scan_inclusive_or( int value ); uint __ovld sub_group_non_uniform_scan_inclusive_or( uint value ); long __ovld sub_group_non_uniform_scan_inclusive_or( long value ); ulong __ovld sub_group_non_uniform_scan_inclusive_or( ulong value ); char __ovld sub_group_non_uniform_scan_inclusive_xor( char value ); uchar __ovld sub_group_non_uniform_scan_inclusive_xor( uchar value ); short __ovld sub_group_non_uniform_scan_inclusive_xor( short value ); ushort __ovld sub_group_non_uniform_scan_inclusive_xor( ushort value ); int __ovld sub_group_non_uniform_scan_inclusive_xor( int value ); uint __ovld sub_group_non_uniform_scan_inclusive_xor( uint value ); long __ovld sub_group_non_uniform_scan_inclusive_xor( long value ); ulong __ovld sub_group_non_uniform_scan_inclusive_xor( ulong value ); char __ovld sub_group_non_uniform_scan_exclusive_and( char value ); uchar __ovld sub_group_non_uniform_scan_exclusive_and( uchar value ); short __ovld sub_group_non_uniform_scan_exclusive_and( short value ); ushort __ovld sub_group_non_uniform_scan_exclusive_and( ushort value ); int __ovld sub_group_non_uniform_scan_exclusive_and( int value ); uint __ovld sub_group_non_uniform_scan_exclusive_and( uint value ); long __ovld sub_group_non_uniform_scan_exclusive_and( long value ); ulong __ovld sub_group_non_uniform_scan_exclusive_and( ulong value ); char __ovld sub_group_non_uniform_scan_exclusive_or( char value ); uchar __ovld sub_group_non_uniform_scan_exclusive_or( uchar value ); short __ovld sub_group_non_uniform_scan_exclusive_or( short value ); ushort __ovld sub_group_non_uniform_scan_exclusive_or( ushort value ); int __ovld sub_group_non_uniform_scan_exclusive_or( int value ); uint __ovld sub_group_non_uniform_scan_exclusive_or( uint value ); long __ovld sub_group_non_uniform_scan_exclusive_or( long value ); ulong __ovld sub_group_non_uniform_scan_exclusive_or( ulong value ); char __ovld sub_group_non_uniform_scan_exclusive_xor( char value ); uchar __ovld sub_group_non_uniform_scan_exclusive_xor( uchar value ); short __ovld sub_group_non_uniform_scan_exclusive_xor( short value ); ushort __ovld sub_group_non_uniform_scan_exclusive_xor( ushort value ); int __ovld sub_group_non_uniform_scan_exclusive_xor( int value ); uint __ovld sub_group_non_uniform_scan_exclusive_xor( uint value ); long __ovld sub_group_non_uniform_scan_exclusive_xor( long value ); ulong __ovld sub_group_non_uniform_scan_exclusive_xor( ulong value ); int __ovld sub_group_non_uniform_reduce_logical_and( int predicate ); int __ovld sub_group_non_uniform_reduce_logical_or( int predicate ); int __ovld sub_group_non_uniform_reduce_logical_xor( int predicate ); int __ovld sub_group_non_uniform_scan_inclusive_logical_and( int predicate ); int __ovld sub_group_non_uniform_scan_inclusive_logical_or( int predicate ); int __ovld sub_group_non_uniform_scan_inclusive_logical_xor( int predicate ); int __ovld sub_group_non_uniform_scan_exclusive_logical_and( int predicate ); int __ovld sub_group_non_uniform_scan_exclusive_logical_or( int predicate ); int __ovld sub_group_non_uniform_scan_exclusive_logical_xor( int predicate ); #if defined(cl_khr_fp16) half __ovld sub_group_non_uniform_reduce_add( half value ); half __ovld sub_group_non_uniform_reduce_mul( half value ); half __ovld sub_group_non_uniform_reduce_min( half value ); half __ovld sub_group_non_uniform_reduce_max( half value ); half __ovld sub_group_non_uniform_scan_inclusive_add( half value ); half __ovld sub_group_non_uniform_scan_inclusive_mul( half value ); half __ovld sub_group_non_uniform_scan_inclusive_min( half value ); half __ovld sub_group_non_uniform_scan_inclusive_max( half value ); half __ovld sub_group_non_uniform_scan_exclusive_add( half value ); half __ovld sub_group_non_uniform_scan_exclusive_mul( half value ); half __ovld sub_group_non_uniform_scan_exclusive_min( half value ); half __ovld sub_group_non_uniform_scan_exclusive_max( half value ); #endif // cl_khr_fp16 #if defined(cl_khr_fp64) double __ovld sub_group_non_uniform_reduce_add( double value ); double __ovld sub_group_non_uniform_reduce_mul( double value ); double __ovld sub_group_non_uniform_reduce_min( double value ); double __ovld sub_group_non_uniform_reduce_max( double value ); double __ovld sub_group_non_uniform_scan_inclusive_add( double value ); double __ovld sub_group_non_uniform_scan_inclusive_mul( double value ); double __ovld sub_group_non_uniform_scan_inclusive_min( double value ); double __ovld sub_group_non_uniform_scan_inclusive_max( double value ); double __ovld sub_group_non_uniform_scan_exclusive_add( double value ); double __ovld sub_group_non_uniform_scan_exclusive_mul( double value ); double __ovld sub_group_non_uniform_scan_exclusive_min( double value ); double __ovld sub_group_non_uniform_scan_exclusive_max( double value ); #endif // cl_khr_fp64 #endif // cl_khr_subgroup_non_uniform_arithmetic #if defined(cl_khr_subgroup_shuffle) char __ovld sub_group_shuffle( char value, uint index ); uchar __ovld sub_group_shuffle( uchar value, uint index ); short __ovld sub_group_shuffle( short value, uint index ); ushort __ovld sub_group_shuffle( ushort value, uint index ); int __ovld sub_group_shuffle( int value, uint index ); uint __ovld sub_group_shuffle( uint value, uint index ); long __ovld sub_group_shuffle( long value, uint index ); ulong __ovld sub_group_shuffle( ulong value, uint index ); float __ovld sub_group_shuffle( float value, uint index ); char __ovld sub_group_shuffle_xor( char value, uint mask ); uchar __ovld sub_group_shuffle_xor( uchar value, uint mask ); short __ovld sub_group_shuffle_xor( short value, uint mask ); ushort __ovld sub_group_shuffle_xor( ushort value, uint mask ); int __ovld sub_group_shuffle_xor( int value, uint mask ); uint __ovld sub_group_shuffle_xor( uint value, uint mask ); long __ovld sub_group_shuffle_xor( long value, uint mask ); ulong __ovld sub_group_shuffle_xor( ulong value, uint mask ); float __ovld sub_group_shuffle_xor( float value, uint mask ); #if defined(cl_khr_fp16) half __ovld sub_group_shuffle( half value, uint index ); half __ovld sub_group_shuffle_xor( half value, uint mask ); #endif // cl_khr_fp16 #if defined(cl_khr_fp64) double __ovld sub_group_shuffle( double value, uint index ); double __ovld sub_group_shuffle_xor( double value, uint mask ); #endif // cl_khr_fp64 #endif // cl_khr_subgroup_shuffle #if defined(cl_khr_subgroup_shuffle_relative) char __ovld sub_group_shuffle_up( char value, uint delta ); uchar __ovld sub_group_shuffle_up( uchar value, uint delta ); short __ovld sub_group_shuffle_up( short value, uint delta ); ushort __ovld sub_group_shuffle_up( ushort value, uint delta ); int __ovld sub_group_shuffle_up( int value, uint delta ); uint __ovld sub_group_shuffle_up( uint value, uint delta ); long __ovld sub_group_shuffle_up( long value, uint delta ); ulong __ovld sub_group_shuffle_up( ulong value, uint delta ); float __ovld sub_group_shuffle_up( float value, uint delta ); char __ovld sub_group_shuffle_down( char value, uint delta ); uchar __ovld sub_group_shuffle_down( uchar value, uint delta ); short __ovld sub_group_shuffle_down( short value, uint delta ); ushort __ovld sub_group_shuffle_down( ushort value, uint delta ); int __ovld sub_group_shuffle_down( int value, uint delta ); uint __ovld sub_group_shuffle_down( uint value, uint delta ); long __ovld sub_group_shuffle_down( long value, uint delta ); ulong __ovld sub_group_shuffle_down( ulong value, uint delta ); float __ovld sub_group_shuffle_down( float value, uint delta ); #if defined(cl_khr_fp16) half __ovld sub_group_shuffle_up( half value, uint delta ); half __ovld sub_group_shuffle_down( half value, uint delta ); #endif // cl_khr_fp16 #if defined(cl_khr_fp64) double __ovld sub_group_shuffle_up( double value, uint delta ); double __ovld sub_group_shuffle_down( double value, uint delta ); #endif // cl_khr_fp64 #endif // cl_khr_subgroup_shuffle_relative #if defined(cl_khr_subgroup_clustered_reduce) char __ovld sub_group_clustered_reduce_add( char value, uint clustersize ); uchar __ovld sub_group_clustered_reduce_add( uchar value, uint clustersize ); short __ovld sub_group_clustered_reduce_add( short value, uint clustersize ); ushort __ovld sub_group_clustered_reduce_add( ushort value, uint clustersize ); int __ovld sub_group_clustered_reduce_add( int value, uint clustersize ); uint __ovld sub_group_clustered_reduce_add( uint value, uint clustersize ); long __ovld sub_group_clustered_reduce_add( long value, uint clustersize ); ulong __ovld sub_group_clustered_reduce_add( ulong value, uint clustersize ); float __ovld sub_group_clustered_reduce_add( float value, uint clustersize ); char __ovld sub_group_clustered_reduce_mul( char value, uint clustersize ); uchar __ovld sub_group_clustered_reduce_mul( uchar value, uint clustersize ); short __ovld sub_group_clustered_reduce_mul( short value, uint clustersize ); ushort __ovld sub_group_clustered_reduce_mul( ushort value, uint clustersize ); int __ovld sub_group_clustered_reduce_mul( int value, uint clustersize ); uint __ovld sub_group_clustered_reduce_mul( uint value, uint clustersize ); long __ovld sub_group_clustered_reduce_mul( long value, uint clustersize ); ulong __ovld sub_group_clustered_reduce_mul( ulong value, uint clustersize ); float __ovld sub_group_clustered_reduce_mul( float value, uint clustersize ); char __ovld sub_group_clustered_reduce_min( char value, uint clustersize ); uchar __ovld sub_group_clustered_reduce_min( uchar value, uint clustersize ); short __ovld sub_group_clustered_reduce_min( short value, uint clustersize ); ushort __ovld sub_group_clustered_reduce_min( ushort value, uint clustersize ); int __ovld sub_group_clustered_reduce_min( int value, uint clustersize ); uint __ovld sub_group_clustered_reduce_min( uint value, uint clustersize ); long __ovld sub_group_clustered_reduce_min( long value, uint clustersize ); ulong __ovld sub_group_clustered_reduce_min( ulong value, uint clustersize ); float __ovld sub_group_clustered_reduce_min( float value, uint clustersize ); char __ovld sub_group_clustered_reduce_max( char value, uint clustersize ); uchar __ovld sub_group_clustered_reduce_max( uchar value, uint clustersize ); short __ovld sub_group_clustered_reduce_max( short value, uint clustersize ); ushort __ovld sub_group_clustered_reduce_max( ushort value, uint clustersize ); int __ovld sub_group_clustered_reduce_max( int value, uint clustersize ); uint __ovld sub_group_clustered_reduce_max( uint value, uint clustersize ); long __ovld sub_group_clustered_reduce_max( long value, uint clustersize ); ulong __ovld sub_group_clustered_reduce_max( ulong value, uint clustersize ); float __ovld sub_group_clustered_reduce_max( float value, uint clustersize ); char __ovld sub_group_clustered_reduce_and( char value, uint clustersize ); uchar __ovld sub_group_clustered_reduce_and( uchar value, uint clustersize ); short __ovld sub_group_clustered_reduce_and( short value, uint clustersize ); ushort __ovld sub_group_clustered_reduce_and( ushort value, uint clustersize ); int __ovld sub_group_clustered_reduce_and( int value, uint clustersize ); uint __ovld sub_group_clustered_reduce_and( uint value, uint clustersize ); long __ovld sub_group_clustered_reduce_and( long value, uint clustersize ); ulong __ovld sub_group_clustered_reduce_and( ulong value, uint clustersize ); char __ovld sub_group_clustered_reduce_or( char value, uint clustersize ); uchar __ovld sub_group_clustered_reduce_or( uchar value, uint clustersize ); short __ovld sub_group_clustered_reduce_or( short value, uint clustersize ); ushort __ovld sub_group_clustered_reduce_or( ushort value, uint clustersize ); int __ovld sub_group_clustered_reduce_or( int value, uint clustersize ); uint __ovld sub_group_clustered_reduce_or( uint value, uint clustersize ); long __ovld sub_group_clustered_reduce_or( long value, uint clustersize ); ulong __ovld sub_group_clustered_reduce_or( ulong value, uint clustersize ); char __ovld sub_group_clustered_reduce_xor( char value, uint clustersize ); uchar __ovld sub_group_clustered_reduce_xor( uchar value, uint clustersize ); short __ovld sub_group_clustered_reduce_xor( short value, uint clustersize ); ushort __ovld sub_group_clustered_reduce_xor( ushort value, uint clustersize ); int __ovld sub_group_clustered_reduce_xor( int value, uint clustersize ); uint __ovld sub_group_clustered_reduce_xor( uint value, uint clustersize ); long __ovld sub_group_clustered_reduce_xor( long value, uint clustersize ); ulong __ovld sub_group_clustered_reduce_xor( ulong value, uint clustersize ); int __ovld sub_group_clustered_reduce_logical_and( int predicate, uint clustersize ); int __ovld sub_group_clustered_reduce_logical_or( int predicate, uint clustersize ); int __ovld sub_group_clustered_reduce_logical_xor( int predicate, uint clustersize ); #if defined(cl_khr_fp16) half __ovld sub_group_clustered_reduce_add( half value, uint clustersize ); half __ovld sub_group_clustered_reduce_mul( half value, uint clustersize ); half __ovld sub_group_clustered_reduce_min( half value, uint clustersize ); half __ovld sub_group_clustered_reduce_max( half value, uint clustersize ); #endif // cl_khr_fp16 #if defined(cl_khr_fp64) double __ovld sub_group_clustered_reduce_add( double value, uint clustersize ); double __ovld sub_group_clustered_reduce_mul( double value, uint clustersize ); double __ovld sub_group_clustered_reduce_min( double value, uint clustersize ); double __ovld sub_group_clustered_reduce_max( double value, uint clustersize ); #endif // cl_khr_fp64 #endif // cl_khr_subgroup_clustered_reduce #if defined(cl_khr_extended_bit_ops) char __ovld __cnfn bitfield_insert(char, char, uint, uint); uchar __ovld __cnfn bitfield_insert(uchar, uchar, uint, uint); short __ovld __cnfn bitfield_insert(short, short, uint, uint); ushort __ovld __cnfn bitfield_insert(ushort, ushort, uint, uint); int __ovld __cnfn bitfield_insert(int, int, uint, uint); uint __ovld __cnfn bitfield_insert(uint, uint, uint, uint); long __ovld __cnfn bitfield_insert(long, long, uint, uint); ulong __ovld __cnfn bitfield_insert(ulong, ulong, uint, uint); char2 __ovld __cnfn bitfield_insert(char2, char2, uint, uint); uchar2 __ovld __cnfn bitfield_insert(uchar2, uchar2, uint, uint); short2 __ovld __cnfn bitfield_insert(short2, short2, uint, uint); ushort2 __ovld __cnfn bitfield_insert(ushort2, ushort2, uint, uint); int2 __ovld __cnfn bitfield_insert(int2, int2, uint, uint); uint2 __ovld __cnfn bitfield_insert(uint2, uint2, uint, uint); long2 __ovld __cnfn bitfield_insert(long2, long2, uint, uint); ulong2 __ovld __cnfn bitfield_insert(ulong2, ulong2, uint, uint); char3 __ovld __cnfn bitfield_insert(char3, char3, uint, uint); uchar3 __ovld __cnfn bitfield_insert(uchar3, uchar3, uint, uint); short3 __ovld __cnfn bitfield_insert(short3, short3, uint, uint); ushort3 __ovld __cnfn bitfield_insert(ushort3, ushort3, uint, uint); int3 __ovld __cnfn bitfield_insert(int3, int3, uint, uint); uint3 __ovld __cnfn bitfield_insert(uint3, uint3, uint, uint); long3 __ovld __cnfn bitfield_insert(long3, long3, uint, uint); ulong3 __ovld __cnfn bitfield_insert(ulong3, ulong3, uint, uint); char4 __ovld __cnfn bitfield_insert(char4, char4, uint, uint); uchar4 __ovld __cnfn bitfield_insert(uchar4, uchar4, uint, uint); short4 __ovld __cnfn bitfield_insert(short4, short4, uint, uint); ushort4 __ovld __cnfn bitfield_insert(ushort4, ushort4, uint, uint); int4 __ovld __cnfn bitfield_insert(int4, int4, uint, uint); uint4 __ovld __cnfn bitfield_insert(uint4, uint4, uint, uint); long4 __ovld __cnfn bitfield_insert(long4, long4, uint, uint); ulong4 __ovld __cnfn bitfield_insert(ulong4, ulong4, uint, uint); char8 __ovld __cnfn bitfield_insert(char8, char8, uint, uint); uchar8 __ovld __cnfn bitfield_insert(uchar8, uchar8, uint, uint); short8 __ovld __cnfn bitfield_insert(short8, short8, uint, uint); ushort8 __ovld __cnfn bitfield_insert(ushort8, ushort8, uint, uint); int8 __ovld __cnfn bitfield_insert(int8, int8, uint, uint); uint8 __ovld __cnfn bitfield_insert(uint8, uint8, uint, uint); long8 __ovld __cnfn bitfield_insert(long8, long8, uint, uint); ulong8 __ovld __cnfn bitfield_insert(ulong8, ulong8, uint, uint); char16 __ovld __cnfn bitfield_insert(char16, char16, uint, uint); uchar16 __ovld __cnfn bitfield_insert(uchar16, uchar16, uint, uint); short16 __ovld __cnfn bitfield_insert(short16, short16, uint, uint); ushort16 __ovld __cnfn bitfield_insert(ushort16, ushort16, uint, uint); int16 __ovld __cnfn bitfield_insert(int16, int16, uint, uint); uint16 __ovld __cnfn bitfield_insert(uint16, uint16, uint, uint); long16 __ovld __cnfn bitfield_insert(long16, long16, uint, uint); ulong16 __ovld __cnfn bitfield_insert(ulong16, ulong16, uint, uint); char __ovld __cnfn bitfield_extract_signed(char, uint, uint); short __ovld __cnfn bitfield_extract_signed(short, uint, uint); int __ovld __cnfn bitfield_extract_signed(int, uint, uint); long __ovld __cnfn bitfield_extract_signed(long, uint, uint); char2 __ovld __cnfn bitfield_extract_signed(char2, uint, uint); short2 __ovld __cnfn bitfield_extract_signed(short2, uint, uint); int2 __ovld __cnfn bitfield_extract_signed(int2, uint, uint); long2 __ovld __cnfn bitfield_extract_signed(long2, uint, uint); char3 __ovld __cnfn bitfield_extract_signed(char3, uint, uint); short3 __ovld __cnfn bitfield_extract_signed(short3, uint, uint); int3 __ovld __cnfn bitfield_extract_signed(int3, uint, uint); long3 __ovld __cnfn bitfield_extract_signed(long3, uint, uint); char4 __ovld __cnfn bitfield_extract_signed(char4, uint, uint); short4 __ovld __cnfn bitfield_extract_signed(short4, uint, uint); int4 __ovld __cnfn bitfield_extract_signed(int4, uint, uint); long4 __ovld __cnfn bitfield_extract_signed(long4, uint, uint); char8 __ovld __cnfn bitfield_extract_signed(char8, uint, uint); short8 __ovld __cnfn bitfield_extract_signed(short8, uint, uint); int8 __ovld __cnfn bitfield_extract_signed(int8, uint, uint); long8 __ovld __cnfn bitfield_extract_signed(long8, uint, uint); char16 __ovld __cnfn bitfield_extract_signed(char16, uint, uint); short16 __ovld __cnfn bitfield_extract_signed(short16, uint, uint); int16 __ovld __cnfn bitfield_extract_signed(int16, uint, uint); long16 __ovld __cnfn bitfield_extract_signed(long16, uint, uint); char __ovld __cnfn bitfield_extract_signed(uchar, uint, uint); short __ovld __cnfn bitfield_extract_signed(ushort, uint, uint); int __ovld __cnfn bitfield_extract_signed(uint, uint, uint); long __ovld __cnfn bitfield_extract_signed(ulong, uint, uint); char2 __ovld __cnfn bitfield_extract_signed(uchar2, uint, uint); short2 __ovld __cnfn bitfield_extract_signed(ushort2, uint, uint); int2 __ovld __cnfn bitfield_extract_signed(uint2, uint, uint); long2 __ovld __cnfn bitfield_extract_signed(ulong2, uint, uint); char3 __ovld __cnfn bitfield_extract_signed(uchar3, uint, uint); short3 __ovld __cnfn bitfield_extract_signed(ushort3, uint, uint); int3 __ovld __cnfn bitfield_extract_signed(uint3, uint, uint); long3 __ovld __cnfn bitfield_extract_signed(ulong3, uint, uint); char4 __ovld __cnfn bitfield_extract_signed(uchar4, uint, uint); short4 __ovld __cnfn bitfield_extract_signed(ushort4, uint, uint); int4 __ovld __cnfn bitfield_extract_signed(uint4, uint, uint); long4 __ovld __cnfn bitfield_extract_signed(ulong4, uint, uint); char8 __ovld __cnfn bitfield_extract_signed(uchar8, uint, uint); short8 __ovld __cnfn bitfield_extract_signed(ushort8, uint, uint); int8 __ovld __cnfn bitfield_extract_signed(uint8, uint, uint); long8 __ovld __cnfn bitfield_extract_signed(ulong8, uint, uint); char16 __ovld __cnfn bitfield_extract_signed(uchar16, uint, uint); short16 __ovld __cnfn bitfield_extract_signed(ushort16, uint, uint); int16 __ovld __cnfn bitfield_extract_signed(uint16, uint, uint); long16 __ovld __cnfn bitfield_extract_signed(ulong16, uint, uint); uchar __ovld __cnfn bitfield_extract_unsigned(char, uint, uint); ushort __ovld __cnfn bitfield_extract_unsigned(short, uint, uint); uint __ovld __cnfn bitfield_extract_unsigned(int, uint, uint); ulong __ovld __cnfn bitfield_extract_unsigned(long, uint, uint); uchar2 __ovld __cnfn bitfield_extract_unsigned(char2, uint, uint); ushort2 __ovld __cnfn bitfield_extract_unsigned(short2, uint, uint); uint2 __ovld __cnfn bitfield_extract_unsigned(int2, uint, uint); ulong2 __ovld __cnfn bitfield_extract_unsigned(long2, uint, uint); uchar3 __ovld __cnfn bitfield_extract_unsigned(char3, uint, uint); ushort3 __ovld __cnfn bitfield_extract_unsigned(short3, uint, uint); uint3 __ovld __cnfn bitfield_extract_unsigned(int3, uint, uint); ulong3 __ovld __cnfn bitfield_extract_unsigned(long3, uint, uint); uchar4 __ovld __cnfn bitfield_extract_unsigned(char4, uint, uint); ushort4 __ovld __cnfn bitfield_extract_unsigned(short4, uint, uint); uint4 __ovld __cnfn bitfield_extract_unsigned(int4, uint, uint); ulong4 __ovld __cnfn bitfield_extract_unsigned(long4, uint, uint); uchar8 __ovld __cnfn bitfield_extract_unsigned(char8, uint, uint); ushort8 __ovld __cnfn bitfield_extract_unsigned(short8, uint, uint); uint8 __ovld __cnfn bitfield_extract_unsigned(int8, uint, uint); ulong8 __ovld __cnfn bitfield_extract_unsigned(long8, uint, uint); uchar16 __ovld __cnfn bitfield_extract_unsigned(char16, uint, uint); ushort16 __ovld __cnfn bitfield_extract_unsigned(short16, uint, uint); uint16 __ovld __cnfn bitfield_extract_unsigned(int16, uint, uint); ulong16 __ovld __cnfn bitfield_extract_unsigned(long16, uint, uint); uchar __ovld __cnfn bitfield_extract_unsigned(uchar, uint, uint); ushort __ovld __cnfn bitfield_extract_unsigned(ushort, uint, uint); uint __ovld __cnfn bitfield_extract_unsigned(uint, uint, uint); ulong __ovld __cnfn bitfield_extract_unsigned(ulong, uint, uint); uchar2 __ovld __cnfn bitfield_extract_unsigned(uchar2, uint, uint); ushort2 __ovld __cnfn bitfield_extract_unsigned(ushort2, uint, uint); uint2 __ovld __cnfn bitfield_extract_unsigned(uint2, uint, uint); ulong2 __ovld __cnfn bitfield_extract_unsigned(ulong2, uint, uint); uchar3 __ovld __cnfn bitfield_extract_unsigned(uchar3, uint, uint); ushort3 __ovld __cnfn bitfield_extract_unsigned(ushort3, uint, uint); uint3 __ovld __cnfn bitfield_extract_unsigned(uint3, uint, uint); ulong3 __ovld __cnfn bitfield_extract_unsigned(ulong3, uint, uint); uchar4 __ovld __cnfn bitfield_extract_unsigned(uchar4, uint, uint); ushort4 __ovld __cnfn bitfield_extract_unsigned(ushort4, uint, uint); uint4 __ovld __cnfn bitfield_extract_unsigned(uint4, uint, uint); ulong4 __ovld __cnfn bitfield_extract_unsigned(ulong4, uint, uint); uchar8 __ovld __cnfn bitfield_extract_unsigned(uchar8, uint, uint); ushort8 __ovld __cnfn bitfield_extract_unsigned(ushort8, uint, uint); uint8 __ovld __cnfn bitfield_extract_unsigned(uint8, uint, uint); ulong8 __ovld __cnfn bitfield_extract_unsigned(ulong8, uint, uint); uchar16 __ovld __cnfn bitfield_extract_unsigned(uchar16, uint, uint); ushort16 __ovld __cnfn bitfield_extract_unsigned(ushort16, uint, uint); uint16 __ovld __cnfn bitfield_extract_unsigned(uint16, uint, uint); ulong16 __ovld __cnfn bitfield_extract_unsigned(ulong16, uint, uint); char __ovld __cnfn bit_reverse(char); uchar __ovld __cnfn bit_reverse(uchar); short __ovld __cnfn bit_reverse(short); ushort __ovld __cnfn bit_reverse(ushort); int __ovld __cnfn bit_reverse(int); uint __ovld __cnfn bit_reverse(uint); long __ovld __cnfn bit_reverse(long); ulong __ovld __cnfn bit_reverse(ulong); char2 __ovld __cnfn bit_reverse(char2); uchar2 __ovld __cnfn bit_reverse(uchar2); short2 __ovld __cnfn bit_reverse(short2); ushort2 __ovld __cnfn bit_reverse(ushort2); int2 __ovld __cnfn bit_reverse(int2); uint2 __ovld __cnfn bit_reverse(uint2); long2 __ovld __cnfn bit_reverse(long2); ulong2 __ovld __cnfn bit_reverse(ulong2); char3 __ovld __cnfn bit_reverse(char3); uchar3 __ovld __cnfn bit_reverse(uchar3); short3 __ovld __cnfn bit_reverse(short3); ushort3 __ovld __cnfn bit_reverse(ushort3); int3 __ovld __cnfn bit_reverse(int3); uint3 __ovld __cnfn bit_reverse(uint3); long3 __ovld __cnfn bit_reverse(long3); ulong3 __ovld __cnfn bit_reverse(ulong3); char4 __ovld __cnfn bit_reverse(char4); uchar4 __ovld __cnfn bit_reverse(uchar4); short4 __ovld __cnfn bit_reverse(short4); ushort4 __ovld __cnfn bit_reverse(ushort4); int4 __ovld __cnfn bit_reverse(int4); uint4 __ovld __cnfn bit_reverse(uint4); long4 __ovld __cnfn bit_reverse(long4); ulong4 __ovld __cnfn bit_reverse(ulong4); char8 __ovld __cnfn bit_reverse(char8); uchar8 __ovld __cnfn bit_reverse(uchar8); short8 __ovld __cnfn bit_reverse(short8); ushort8 __ovld __cnfn bit_reverse(ushort8); int8 __ovld __cnfn bit_reverse(int8); uint8 __ovld __cnfn bit_reverse(uint8); long8 __ovld __cnfn bit_reverse(long8); ulong8 __ovld __cnfn bit_reverse(ulong8); char16 __ovld __cnfn bit_reverse(char16); uchar16 __ovld __cnfn bit_reverse(uchar16); short16 __ovld __cnfn bit_reverse(short16); ushort16 __ovld __cnfn bit_reverse(ushort16); int16 __ovld __cnfn bit_reverse(int16); uint16 __ovld __cnfn bit_reverse(uint16); long16 __ovld __cnfn bit_reverse(long16); ulong16 __ovld __cnfn bit_reverse(ulong16); #endif // cl_khr_extended_bit_ops #if defined(__opencl_c_integer_dot_product_input_4x8bit) uint __ovld __cnfn dot(uchar4, uchar4); int __ovld __cnfn dot(char4, char4); int __ovld __cnfn dot(uchar4, char4); int __ovld __cnfn dot(char4, uchar4); uint __ovld __cnfn dot_acc_sat(uchar4, uchar4, uint); int __ovld __cnfn dot_acc_sat(char4, char4, int); int __ovld __cnfn dot_acc_sat(uchar4, char4, int); int __ovld __cnfn dot_acc_sat(char4, uchar4, int); #endif // __opencl_c_integer_dot_product_input_4x8bit #if defined(__opencl_c_integer_dot_product_input_4x8bit_packed) uint __ovld __cnfn dot_4x8packed_uu_uint(uint, uint); int __ovld __cnfn dot_4x8packed_ss_int(uint, uint); int __ovld __cnfn dot_4x8packed_us_int(uint, uint); int __ovld __cnfn dot_4x8packed_su_int(uint, uint); uint __ovld __cnfn dot_acc_sat_4x8packed_uu_uint(uint, uint, uint); int __ovld __cnfn dot_acc_sat_4x8packed_ss_int(uint, uint, int); int __ovld __cnfn dot_acc_sat_4x8packed_us_int(uint, uint, int); int __ovld __cnfn dot_acc_sat_4x8packed_su_int(uint, uint, int); #endif // __opencl_c_integer_dot_product_input_4x8bit_packed #if defined(cl_khr_subgroup_rotate) char __ovld __conv sub_group_rotate(char, int); uchar __ovld __conv sub_group_rotate(uchar, int); short __ovld __conv sub_group_rotate(short, int); ushort __ovld __conv sub_group_rotate(ushort, int); int __ovld __conv sub_group_rotate(int, int); uint __ovld __conv sub_group_rotate(uint, int); long __ovld __conv sub_group_rotate(long, int); ulong __ovld __conv sub_group_rotate(ulong, int); float __ovld __conv sub_group_rotate(float, int); #if defined(cl_khr_fp64) double __ovld __conv sub_group_rotate(double, int); #endif // cl_khr_fp64 #if defined(cl_khr_fp16) half __ovld __conv sub_group_rotate(half, int); #endif // cl_khr_fp16 char __ovld __conv sub_group_clustered_rotate(char, int, uint); uchar __ovld __conv sub_group_clustered_rotate(uchar, int, uint); short __ovld __conv sub_group_clustered_rotate(short, int, uint); ushort __ovld __conv sub_group_clustered_rotate(ushort, int, uint); int __ovld __conv sub_group_clustered_rotate(int, int, uint); uint __ovld __conv sub_group_clustered_rotate(uint, int, uint); long __ovld __conv sub_group_clustered_rotate(long, int, uint); ulong __ovld __conv sub_group_clustered_rotate(ulong, int, uint); float __ovld __conv sub_group_clustered_rotate(float, int, uint); #if defined(cl_khr_fp64) double __ovld __conv sub_group_clustered_rotate(double, int, uint); #endif // cl_khr_fp64 #if defined(cl_khr_fp16) half __ovld __conv sub_group_clustered_rotate(half, int, uint); #endif // cl_khr_fp16 #endif // cl_khr_subgroup_rotate #if defined(cl_intel_subgroups) // Intel-Specific Sub Group Functions float __ovld __conv intel_sub_group_shuffle( float , uint ); float2 __ovld __conv intel_sub_group_shuffle( float2, uint ); float3 __ovld __conv intel_sub_group_shuffle( float3, uint ); float4 __ovld __conv intel_sub_group_shuffle( float4, uint ); float8 __ovld __conv intel_sub_group_shuffle( float8, uint ); float16 __ovld __conv intel_sub_group_shuffle( float16, uint ); int __ovld __conv intel_sub_group_shuffle( int , uint ); int2 __ovld __conv intel_sub_group_shuffle( int2, uint ); int3 __ovld __conv intel_sub_group_shuffle( int3, uint ); int4 __ovld __conv intel_sub_group_shuffle( int4, uint ); int8 __ovld __conv intel_sub_group_shuffle( int8, uint ); int16 __ovld __conv intel_sub_group_shuffle( int16, uint ); uint __ovld __conv intel_sub_group_shuffle( uint , uint ); uint2 __ovld __conv intel_sub_group_shuffle( uint2, uint ); uint3 __ovld __conv intel_sub_group_shuffle( uint3, uint ); uint4 __ovld __conv intel_sub_group_shuffle( uint4, uint ); uint8 __ovld __conv intel_sub_group_shuffle( uint8, uint ); uint16 __ovld __conv intel_sub_group_shuffle( uint16, uint ); long __ovld __conv intel_sub_group_shuffle( long, uint ); ulong __ovld __conv intel_sub_group_shuffle( ulong, uint ); float __ovld __conv intel_sub_group_shuffle_down( float cur, float next, uint ); float2 __ovld __conv intel_sub_group_shuffle_down( float2 cur, float2 next, uint ); float3 __ovld __conv intel_sub_group_shuffle_down( float3 cur, float3 next, uint ); float4 __ovld __conv intel_sub_group_shuffle_down( float4 cur, float4 next, uint ); float8 __ovld __conv intel_sub_group_shuffle_down( float8 cur, float8 next, uint ); float16 __ovld __conv intel_sub_group_shuffle_down( float16 cur, float16 next, uint ); int __ovld __conv intel_sub_group_shuffle_down( int cur, int next, uint ); int2 __ovld __conv intel_sub_group_shuffle_down( int2 cur, int2 next, uint ); int3 __ovld __conv intel_sub_group_shuffle_down( int3 cur, int3 next, uint ); int4 __ovld __conv intel_sub_group_shuffle_down( int4 cur, int4 next, uint ); int8 __ovld __conv intel_sub_group_shuffle_down( int8 cur, int8 next, uint ); int16 __ovld __conv intel_sub_group_shuffle_down( int16 cur, int16 next, uint ); uint __ovld __conv intel_sub_group_shuffle_down( uint cur, uint next, uint ); uint2 __ovld __conv intel_sub_group_shuffle_down( uint2 cur, uint2 next, uint ); uint3 __ovld __conv intel_sub_group_shuffle_down( uint3 cur, uint3 next, uint ); uint4 __ovld __conv intel_sub_group_shuffle_down( uint4 cur, uint4 next, uint ); uint8 __ovld __conv intel_sub_group_shuffle_down( uint8 cur, uint8 next, uint ); uint16 __ovld __conv intel_sub_group_shuffle_down( uint16 cur, uint16 next, uint ); long __ovld __conv intel_sub_group_shuffle_down( long prev, long cur, uint ); ulong __ovld __conv intel_sub_group_shuffle_down( ulong prev, ulong cur, uint ); float __ovld __conv intel_sub_group_shuffle_up( float prev, float cur, uint ); float2 __ovld __conv intel_sub_group_shuffle_up( float2 prev, float2 cur, uint ); float3 __ovld __conv intel_sub_group_shuffle_up( float3 prev, float3 cur, uint ); float4 __ovld __conv intel_sub_group_shuffle_up( float4 prev, float4 cur, uint ); float8 __ovld __conv intel_sub_group_shuffle_up( float8 prev, float8 cur, uint ); float16 __ovld __conv intel_sub_group_shuffle_up( float16 prev, float16 cur, uint ); int __ovld __conv intel_sub_group_shuffle_up( int prev, int cur, uint ); int2 __ovld __conv intel_sub_group_shuffle_up( int2 prev, int2 cur, uint ); int3 __ovld __conv intel_sub_group_shuffle_up( int3 prev, int3 cur, uint ); int4 __ovld __conv intel_sub_group_shuffle_up( int4 prev, int4 cur, uint ); int8 __ovld __conv intel_sub_group_shuffle_up( int8 prev, int8 cur, uint ); int16 __ovld __conv intel_sub_group_shuffle_up( int16 prev, int16 cur, uint ); uint __ovld __conv intel_sub_group_shuffle_up( uint prev, uint cur, uint ); uint2 __ovld __conv intel_sub_group_shuffle_up( uint2 prev, uint2 cur, uint ); uint3 __ovld __conv intel_sub_group_shuffle_up( uint3 prev, uint3 cur, uint ); uint4 __ovld __conv intel_sub_group_shuffle_up( uint4 prev, uint4 cur, uint ); uint8 __ovld __conv intel_sub_group_shuffle_up( uint8 prev, uint8 cur, uint ); uint16 __ovld __conv intel_sub_group_shuffle_up( uint16 prev, uint16 cur, uint ); long __ovld __conv intel_sub_group_shuffle_up( long prev, long cur, uint ); ulong __ovld __conv intel_sub_group_shuffle_up( ulong prev, ulong cur, uint ); float __ovld __conv intel_sub_group_shuffle_xor( float , uint ); float2 __ovld __conv intel_sub_group_shuffle_xor( float2, uint ); float3 __ovld __conv intel_sub_group_shuffle_xor( float3, uint ); float4 __ovld __conv intel_sub_group_shuffle_xor( float4, uint ); float8 __ovld __conv intel_sub_group_shuffle_xor( float8, uint ); float16 __ovld __conv intel_sub_group_shuffle_xor( float16, uint ); int __ovld __conv intel_sub_group_shuffle_xor( int , uint ); int2 __ovld __conv intel_sub_group_shuffle_xor( int2, uint ); int3 __ovld __conv intel_sub_group_shuffle_xor( int3, uint ); int4 __ovld __conv intel_sub_group_shuffle_xor( int4, uint ); int8 __ovld __conv intel_sub_group_shuffle_xor( int8, uint ); int16 __ovld __conv intel_sub_group_shuffle_xor( int16, uint ); uint __ovld __conv intel_sub_group_shuffle_xor( uint , uint ); uint2 __ovld __conv intel_sub_group_shuffle_xor( uint2, uint ); uint3 __ovld __conv intel_sub_group_shuffle_xor( uint3, uint ); uint4 __ovld __conv intel_sub_group_shuffle_xor( uint4, uint ); uint8 __ovld __conv intel_sub_group_shuffle_xor( uint8, uint ); uint16 __ovld __conv intel_sub_group_shuffle_xor( uint16, uint ); long __ovld __conv intel_sub_group_shuffle_xor( long, uint ); ulong __ovld __conv intel_sub_group_shuffle_xor( ulong, uint ); #if defined(__opencl_c_images) uint __ovld __conv intel_sub_group_block_read(read_only image2d_t, int2); uint2 __ovld __conv intel_sub_group_block_read2(read_only image2d_t, int2); uint4 __ovld __conv intel_sub_group_block_read4(read_only image2d_t, int2); uint8 __ovld __conv intel_sub_group_block_read8(read_only image2d_t, int2); #endif #if defined(__opencl_c_read_write_images) uint __ovld __conv intel_sub_group_block_read(read_write image2d_t, int2); uint2 __ovld __conv intel_sub_group_block_read2(read_write image2d_t, int2); uint4 __ovld __conv intel_sub_group_block_read4(read_write image2d_t, int2); uint8 __ovld __conv intel_sub_group_block_read8(read_write image2d_t, int2); #endif // defined(__opencl_c_read_write_images) uint __ovld __conv intel_sub_group_block_read( const __global uint* p ); uint2 __ovld __conv intel_sub_group_block_read2( const __global uint* p ); uint4 __ovld __conv intel_sub_group_block_read4( const __global uint* p ); uint8 __ovld __conv intel_sub_group_block_read8( const __global uint* p ); #if defined(__opencl_c_images) void __ovld __conv intel_sub_group_block_write(write_only image2d_t, int2, uint); void __ovld __conv intel_sub_group_block_write2(write_only image2d_t, int2, uint2); void __ovld __conv intel_sub_group_block_write4(write_only image2d_t, int2, uint4); void __ovld __conv intel_sub_group_block_write8(write_only image2d_t, int2, uint8); #endif // defined(__opencl_c_images) #if defined(__opencl_c_read_write_images) void __ovld __conv intel_sub_group_block_write(read_write image2d_t, int2, uint); void __ovld __conv intel_sub_group_block_write2(read_write image2d_t, int2, uint2); void __ovld __conv intel_sub_group_block_write4(read_write image2d_t, int2, uint4); void __ovld __conv intel_sub_group_block_write8(read_write image2d_t, int2, uint8); #endif // defined(__opencl_c_read_write_images) void __ovld __conv intel_sub_group_block_write( __global uint* p, uint data ); void __ovld __conv intel_sub_group_block_write2( __global uint* p, uint2 data ); void __ovld __conv intel_sub_group_block_write4( __global uint* p, uint4 data ); void __ovld __conv intel_sub_group_block_write8( __global uint* p, uint8 data ); #ifdef cl_khr_fp16 half __ovld __conv intel_sub_group_shuffle( half, uint ); half __ovld __conv intel_sub_group_shuffle_down( half prev, half cur, uint ); half __ovld __conv intel_sub_group_shuffle_up( half prev, half cur, uint ); half __ovld __conv intel_sub_group_shuffle_xor( half, uint ); #endif #if defined(cl_khr_fp64) double __ovld __conv intel_sub_group_shuffle( double, uint ); double __ovld __conv intel_sub_group_shuffle_down( double prev, double cur, uint ); double __ovld __conv intel_sub_group_shuffle_up( double prev, double cur, uint ); double __ovld __conv intel_sub_group_shuffle_xor( double, uint ); #endif #endif //cl_intel_subgroups #if defined(cl_intel_subgroups_short) short __ovld __conv intel_sub_group_broadcast( short , uint sub_group_local_id ); short2 __ovld __conv intel_sub_group_broadcast( short2, uint sub_group_local_id ); short3 __ovld __conv intel_sub_group_broadcast( short3, uint sub_group_local_id ); short4 __ovld __conv intel_sub_group_broadcast( short4, uint sub_group_local_id ); short8 __ovld __conv intel_sub_group_broadcast( short8, uint sub_group_local_id ); ushort __ovld __conv intel_sub_group_broadcast( ushort , uint sub_group_local_id ); ushort2 __ovld __conv intel_sub_group_broadcast( ushort2, uint sub_group_local_id ); ushort3 __ovld __conv intel_sub_group_broadcast( ushort3, uint sub_group_local_id ); ushort4 __ovld __conv intel_sub_group_broadcast( ushort4, uint sub_group_local_id ); ushort8 __ovld __conv intel_sub_group_broadcast( ushort8, uint sub_group_local_id ); short __ovld __conv intel_sub_group_shuffle( short , uint ); short2 __ovld __conv intel_sub_group_shuffle( short2 , uint ); short3 __ovld __conv intel_sub_group_shuffle( short3 , uint ); short4 __ovld __conv intel_sub_group_shuffle( short4 , uint ); short8 __ovld __conv intel_sub_group_shuffle( short8 , uint ); short16 __ovld __conv intel_sub_group_shuffle( short16, uint); ushort __ovld __conv intel_sub_group_shuffle( ushort , uint ); ushort2 __ovld __conv intel_sub_group_shuffle( ushort2 , uint ); ushort3 __ovld __conv intel_sub_group_shuffle( ushort3 , uint ); ushort4 __ovld __conv intel_sub_group_shuffle( ushort4 , uint ); ushort8 __ovld __conv intel_sub_group_shuffle( ushort8 , uint ); ushort16 __ovld __conv intel_sub_group_shuffle( ushort16, uint ); short __ovld __conv intel_sub_group_shuffle_down( short cur, short next, uint ); short2 __ovld __conv intel_sub_group_shuffle_down( short2 cur, short2 next, uint ); short3 __ovld __conv intel_sub_group_shuffle_down( short3 cur, short3 next, uint ); short4 __ovld __conv intel_sub_group_shuffle_down( short4 cur, short4 next, uint ); short8 __ovld __conv intel_sub_group_shuffle_down( short8 cur, short8 next, uint ); short16 __ovld __conv intel_sub_group_shuffle_down( short16 cur, short16 next, uint ); ushort __ovld __conv intel_sub_group_shuffle_down( ushort cur, ushort next, uint ); ushort2 __ovld __conv intel_sub_group_shuffle_down( ushort2 cur, ushort2 next, uint ); ushort3 __ovld __conv intel_sub_group_shuffle_down( ushort3 cur, ushort3 next, uint ); ushort4 __ovld __conv intel_sub_group_shuffle_down( ushort4 cur, ushort4 next, uint ); ushort8 __ovld __conv intel_sub_group_shuffle_down( ushort8 cur, ushort8 next, uint ); ushort16 __ovld __conv intel_sub_group_shuffle_down( ushort16 cur, ushort16 next, uint ); short __ovld __conv intel_sub_group_shuffle_up( short cur, short next, uint ); short2 __ovld __conv intel_sub_group_shuffle_up( short2 cur, short2 next, uint ); short3 __ovld __conv intel_sub_group_shuffle_up( short3 cur, short3 next, uint ); short4 __ovld __conv intel_sub_group_shuffle_up( short4 cur, short4 next, uint ); short8 __ovld __conv intel_sub_group_shuffle_up( short8 cur, short8 next, uint ); short16 __ovld __conv intel_sub_group_shuffle_up( short16 cur, short16 next, uint ); ushort __ovld __conv intel_sub_group_shuffle_up( ushort cur, ushort next, uint ); ushort2 __ovld __conv intel_sub_group_shuffle_up( ushort2 cur, ushort2 next, uint ); ushort3 __ovld __conv intel_sub_group_shuffle_up( ushort3 cur, ushort3 next, uint ); ushort4 __ovld __conv intel_sub_group_shuffle_up( ushort4 cur, ushort4 next, uint ); ushort8 __ovld __conv intel_sub_group_shuffle_up( ushort8 cur, ushort8 next, uint ); ushort16 __ovld __conv intel_sub_group_shuffle_up( ushort16 cur, ushort16 next, uint ); short __ovld __conv intel_sub_group_shuffle_xor( short , uint ); short2 __ovld __conv intel_sub_group_shuffle_xor( short2 , uint ); short3 __ovld __conv intel_sub_group_shuffle_xor( short3 , uint ); short4 __ovld __conv intel_sub_group_shuffle_xor( short4 , uint ); short8 __ovld __conv intel_sub_group_shuffle_xor( short8 , uint ); short16 __ovld __conv intel_sub_group_shuffle_xor( short16, uint ); ushort __ovld __conv intel_sub_group_shuffle_xor( ushort , uint ); ushort2 __ovld __conv intel_sub_group_shuffle_xor( ushort2 , uint ); ushort3 __ovld __conv intel_sub_group_shuffle_xor( ushort3 , uint ); ushort4 __ovld __conv intel_sub_group_shuffle_xor( ushort4 , uint ); ushort8 __ovld __conv intel_sub_group_shuffle_xor( ushort8 , uint ); ushort16 __ovld __conv intel_sub_group_shuffle_xor( ushort16, uint ); short __ovld __conv intel_sub_group_reduce_add( short x ); ushort __ovld __conv intel_sub_group_reduce_add( ushort x ); short __ovld __conv intel_sub_group_reduce_min( short x ); ushort __ovld __conv intel_sub_group_reduce_min( ushort x ); short __ovld __conv intel_sub_group_reduce_max( short x ); ushort __ovld __conv intel_sub_group_reduce_max( ushort x ); short __ovld __conv intel_sub_group_scan_exclusive_add( short x ); ushort __ovld __conv intel_sub_group_scan_exclusive_add( ushort x ); short __ovld __conv intel_sub_group_scan_exclusive_min( short x ); ushort __ovld __conv intel_sub_group_scan_exclusive_min( ushort x ); short __ovld __conv intel_sub_group_scan_exclusive_max( short x ); ushort __ovld __conv intel_sub_group_scan_exclusive_max( ushort x ); short __ovld __conv intel_sub_group_scan_inclusive_add( short x ); ushort __ovld __conv intel_sub_group_scan_inclusive_add( ushort x ); short __ovld __conv intel_sub_group_scan_inclusive_min( short x ); ushort __ovld __conv intel_sub_group_scan_inclusive_min( ushort x ); short __ovld __conv intel_sub_group_scan_inclusive_max( short x ); ushort __ovld __conv intel_sub_group_scan_inclusive_max( ushort x ); #if defined(__opencl_c_images) uint __ovld __conv intel_sub_group_block_read_ui(read_only image2d_t, int2); uint2 __ovld __conv intel_sub_group_block_read_ui2(read_only image2d_t, int2); uint4 __ovld __conv intel_sub_group_block_read_ui4(read_only image2d_t, int2); uint8 __ovld __conv intel_sub_group_block_read_ui8(read_only image2d_t, int2); #endif // defined(__opencl_c_images) #if defined(__opencl_c_read_write_images) uint __ovld __conv intel_sub_group_block_read_ui(read_write image2d_t, int2); uint2 __ovld __conv intel_sub_group_block_read_ui2(read_write image2d_t, int2); uint4 __ovld __conv intel_sub_group_block_read_ui4(read_write image2d_t, int2); uint8 __ovld __conv intel_sub_group_block_read_ui8(read_write image2d_t, int2); #endif // defined(__opencl_c_read_write_images) uint __ovld __conv intel_sub_group_block_read_ui( const __global uint* p ); uint2 __ovld __conv intel_sub_group_block_read_ui2( const __global uint* p ); uint4 __ovld __conv intel_sub_group_block_read_ui4( const __global uint* p ); uint8 __ovld __conv intel_sub_group_block_read_ui8( const __global uint* p ); #if defined(__opencl_c_images) void __ovld __conv intel_sub_group_block_write_ui(read_only image2d_t, int2, uint); void __ovld __conv intel_sub_group_block_write_ui2(read_only image2d_t, int2, uint2); void __ovld __conv intel_sub_group_block_write_ui4(read_only image2d_t, int2, uint4); void __ovld __conv intel_sub_group_block_write_ui8(read_only image2d_t, int2, uint8); #endif //defined(__opencl_c_images) #if defined(__opencl_c_read_write_images) void __ovld __conv intel_sub_group_block_write_ui(read_write image2d_t, int2, uint); void __ovld __conv intel_sub_group_block_write_ui2(read_write image2d_t, int2, uint2); void __ovld __conv intel_sub_group_block_write_ui4(read_write image2d_t, int2, uint4); void __ovld __conv intel_sub_group_block_write_ui8(read_write image2d_t, int2, uint8); #endif // defined(__opencl_c_read_write_images) void __ovld __conv intel_sub_group_block_write_ui( __global uint* p, uint data ); void __ovld __conv intel_sub_group_block_write_ui2( __global uint* p, uint2 data ); void __ovld __conv intel_sub_group_block_write_ui4( __global uint* p, uint4 data ); void __ovld __conv intel_sub_group_block_write_ui8( __global uint* p, uint8 data ); #if defined(__opencl_c_images) ushort __ovld __conv intel_sub_group_block_read_us(read_only image2d_t, int2); ushort2 __ovld __conv intel_sub_group_block_read_us2(read_only image2d_t, int2); ushort4 __ovld __conv intel_sub_group_block_read_us4(read_only image2d_t, int2); ushort8 __ovld __conv intel_sub_group_block_read_us8(read_only image2d_t, int2); #endif // defined(__opencl_c_images) #if defined(__opencl_c_read_write_images) ushort __ovld __conv intel_sub_group_block_read_us(read_write image2d_t, int2); ushort2 __ovld __conv intel_sub_group_block_read_us2(read_write image2d_t, int2); ushort4 __ovld __conv intel_sub_group_block_read_us4(read_write image2d_t, int2); ushort8 __ovld __conv intel_sub_group_block_read_us8(read_write image2d_t, int2); #endif // defined(__opencl_c_read_write_images) ushort __ovld __conv intel_sub_group_block_read_us( const __global ushort* p ); ushort2 __ovld __conv intel_sub_group_block_read_us2( const __global ushort* p ); ushort4 __ovld __conv intel_sub_group_block_read_us4( const __global ushort* p ); ushort8 __ovld __conv intel_sub_group_block_read_us8( const __global ushort* p ); #if defined(__opencl_c_images) void __ovld __conv intel_sub_group_block_write_us(write_only image2d_t, int2, ushort); void __ovld __conv intel_sub_group_block_write_us2(write_only image2d_t, int2, ushort2); void __ovld __conv intel_sub_group_block_write_us4(write_only image2d_t, int2, ushort4); void __ovld __conv intel_sub_group_block_write_us8(write_only image2d_t, int2, ushort8); #endif // defined(__opencl_c_images) #if defined(__opencl_c_read_write_images) void __ovld __conv intel_sub_group_block_write_us(read_write image2d_t, int2, ushort); void __ovld __conv intel_sub_group_block_write_us2(read_write image2d_t, int2, ushort2); void __ovld __conv intel_sub_group_block_write_us4(read_write image2d_t, int2, ushort4); void __ovld __conv intel_sub_group_block_write_us8(read_write image2d_t, int2, ushort8); #endif // defined(__opencl_c_read_write_images) void __ovld __conv intel_sub_group_block_write_us( __global ushort* p, ushort data ); void __ovld __conv intel_sub_group_block_write_us2( __global ushort* p, ushort2 data ); void __ovld __conv intel_sub_group_block_write_us4( __global ushort* p, ushort4 data ); void __ovld __conv intel_sub_group_block_write_us8( __global ushort* p, ushort8 data ); #endif // cl_intel_subgroups_short #ifdef cl_intel_device_side_avc_motion_estimation #pragma OPENCL EXTENSION cl_intel_device_side_avc_motion_estimation : begin // MCE built-in functions uchar __ovld intel_sub_group_avc_mce_get_default_inter_base_multi_reference_penalty( uchar slice_type, uchar qp); ulong __ovld intel_sub_group_avc_mce_get_default_inter_shape_penalty( uchar slice_type, uchar qp); uchar __ovld intel_sub_group_avc_mce_get_default_inter_direction_penalty( uchar slice_type, uchar qp); uint __ovld intel_sub_group_avc_mce_get_default_intra_luma_shape_penalty( uchar slice_type, uchar qp); uint2 __ovld intel_sub_group_avc_mce_get_default_inter_motion_vector_cost_table( uchar slice_type, uchar qp); uchar __ovld intel_sub_group_avc_mce_get_default_intra_luma_mode_penalty( uchar slice_type, uchar qp); uint2 __ovld intel_sub_group_avc_mce_get_default_high_penalty_cost_table(); uint2 __ovld intel_sub_group_avc_mce_get_default_medium_penalty_cost_table(); uint2 __ovld intel_sub_group_avc_mce_get_default_low_penalty_cost_table(); uint __ovld intel_sub_group_avc_mce_get_default_non_dc_luma_intra_penalty(); uchar __ovld intel_sub_group_avc_mce_get_default_intra_chroma_mode_base_penalty(); intel_sub_group_avc_mce_payload_t __ovld intel_sub_group_avc_mce_set_inter_base_multi_reference_penalty( uchar reference_base_penalty, intel_sub_group_avc_mce_payload_t payload); intel_sub_group_avc_mce_payload_t __ovld intel_sub_group_avc_mce_set_inter_shape_penalty( ulong packed_shape_penalty, intel_sub_group_avc_mce_payload_t payload); intel_sub_group_avc_mce_payload_t __ovld intel_sub_group_avc_mce_set_inter_direction_penalty( uchar direction_cost, intel_sub_group_avc_mce_payload_t payload); intel_sub_group_avc_mce_payload_t __ovld intel_sub_group_avc_mce_set_motion_vector_cost_function( ulong packed_cost_center_delta, uint2 packed_cost_table, uchar cost_precision, intel_sub_group_avc_mce_payload_t payload); intel_sub_group_avc_mce_payload_t __ovld intel_sub_group_avc_mce_set_ac_only_haar( intel_sub_group_avc_mce_payload_t payload); intel_sub_group_avc_mce_payload_t __ovld intel_sub_group_avc_mce_set_source_interlaced_field_polarity( uchar src_field_polarity, intel_sub_group_avc_mce_payload_t payload); intel_sub_group_avc_mce_payload_t __ovld intel_sub_group_avc_mce_set_single_reference_interlaced_field_polarity( uchar ref_field_polarity, intel_sub_group_avc_mce_payload_t payload); intel_sub_group_avc_mce_payload_t __ovld intel_sub_group_avc_mce_set_dual_reference_interlaced_field_polarities( uchar fwd_ref_field_polarity, uchar bwd_ref_field_polarity, intel_sub_group_avc_mce_payload_t payload); ulong __ovld intel_sub_group_avc_mce_get_motion_vectors( intel_sub_group_avc_mce_result_t result); ushort __ovld intel_sub_group_avc_mce_get_inter_distortions( intel_sub_group_avc_mce_result_t result); ushort __ovld intel_sub_group_avc_mce_get_best_inter_distortion( intel_sub_group_avc_mce_result_t result); uchar __ovld intel_sub_group_avc_mce_get_inter_major_shape( intel_sub_group_avc_mce_result_t result); uchar __ovld intel_sub_group_avc_mce_get_inter_minor_shapes( intel_sub_group_avc_mce_result_t result); uchar __ovld intel_sub_group_avc_mce_get_inter_directions( intel_sub_group_avc_mce_result_t result); uchar __ovld intel_sub_group_avc_mce_get_inter_motion_vector_count( intel_sub_group_avc_mce_result_t result); uint __ovld intel_sub_group_avc_mce_get_inter_reference_ids( intel_sub_group_avc_mce_result_t result); uchar __ovld intel_sub_group_avc_mce_get_inter_reference_interlaced_field_polarities( uint packed_reference_ids, uint packed_reference_parameter_field_polarities, intel_sub_group_avc_mce_result_t result); // IME built-in functions intel_sub_group_avc_ime_payload_t __ovld intel_sub_group_avc_ime_initialize( ushort2 src_coord, uchar partition_mask, uchar sad_adjustment); intel_sub_group_avc_ime_payload_t __ovld intel_sub_group_avc_ime_set_single_reference( short2 ref_offset, uchar search_window_config, intel_sub_group_avc_ime_payload_t payload); intel_sub_group_avc_ime_payload_t __ovld intel_sub_group_avc_ime_set_dual_reference( short2 fwd_ref_offset, short2 bwd_ref_offset, uchar search_window_config, intel_sub_group_avc_ime_payload_t payload); intel_sub_group_avc_ime_payload_t __ovld intel_sub_group_avc_ime_set_max_motion_vector_count( uchar max_motion_vector_count, intel_sub_group_avc_ime_payload_t payload); intel_sub_group_avc_ime_payload_t __ovld intel_sub_group_avc_ime_set_unidirectional_mix_disable( intel_sub_group_avc_ime_payload_t payload); intel_sub_group_avc_ime_payload_t __ovld intel_sub_group_avc_ime_set_early_search_termination_threshold( uchar threshold, intel_sub_group_avc_ime_payload_t payload); intel_sub_group_avc_ime_payload_t __ovld intel_sub_group_avc_ime_set_weighted_sad( uint packed_sad_weights, intel_sub_group_avc_ime_payload_t payload); __attribute__((deprecated("If you use the latest Intel driver, please use " "intel_sub_group_avc_ime_ref_window_size instead", "intel_sub_group_avc_ime_ref_window_size"))) ushort2 __ovld intel_sub_group_ime_ref_window_size(uchar search_window_config, char dual_ref); ushort2 __ovld intel_sub_group_avc_ime_ref_window_size( uchar search_window_config, char dual_ref); short2 __ovld intel_sub_group_avc_ime_adjust_ref_offset( short2 ref_offset, ushort2 src_coord, ushort2 ref_window_size, ushort2 image_size); #if defined(__opencl_c_images) intel_sub_group_avc_ime_result_t __ovld intel_sub_group_avc_ime_evaluate_with_single_reference( read_only image2d_t src_image, read_only image2d_t ref_image, sampler_t vme_media_sampler, intel_sub_group_avc_ime_payload_t payload); intel_sub_group_avc_ime_result_t __ovld intel_sub_group_avc_ime_evaluate_with_dual_reference( read_only image2d_t src_image, read_only image2d_t fwd_ref_image, read_only image2d_t bwd_ref_image, sampler_t vme_media_sampler, intel_sub_group_avc_ime_payload_t payload); intel_sub_group_avc_ime_result_single_reference_streamout_t __ovld intel_sub_group_avc_ime_evaluate_with_single_reference_streamout( read_only image2d_t src_image, read_only image2d_t ref_image, sampler_t vme_media_sampler, intel_sub_group_avc_ime_payload_t payload); intel_sub_group_avc_ime_result_dual_reference_streamout_t __ovld intel_sub_group_avc_ime_evaluate_with_dual_reference_streamout( read_only image2d_t src_image, read_only image2d_t fwd_ref_image, read_only image2d_t bwd_ref_image, sampler_t vme_media_sampler, intel_sub_group_avc_ime_payload_t payload); intel_sub_group_avc_ime_result_t __ovld intel_sub_group_avc_ime_evaluate_with_single_reference_streamin( read_only image2d_t src_image, read_only image2d_t ref_image, sampler_t vme_media_sampler, intel_sub_group_avc_ime_payload_t payload, intel_sub_group_avc_ime_single_reference_streamin_t streamin_components); intel_sub_group_avc_ime_result_t __ovld intel_sub_group_avc_ime_evaluate_with_dual_reference_streamin( read_only image2d_t src_image, read_only image2d_t fwd_ref_image, read_only image2d_t bwd_ref_image, sampler_t vme_media_sampler, intel_sub_group_avc_ime_payload_t payload, intel_sub_group_avc_ime_dual_reference_streamin_t streamin_components); intel_sub_group_avc_ime_result_single_reference_streamout_t __ovld intel_sub_group_avc_ime_evaluate_with_single_reference_streaminout( read_only image2d_t src_image, read_only image2d_t ref_image, sampler_t vme_media_sampler, intel_sub_group_avc_ime_payload_t payload, intel_sub_group_avc_ime_single_reference_streamin_t streamin_components); intel_sub_group_avc_ime_result_dual_reference_streamout_t __ovld intel_sub_group_avc_ime_evaluate_with_dual_reference_streaminout( read_only image2d_t src_image, read_only image2d_t fwd_ref_image, read_only image2d_t bwd_ref_image, sampler_t vme_media_sampler, intel_sub_group_avc_ime_payload_t payload, intel_sub_group_avc_ime_dual_reference_streamin_t streamin_components); #endif intel_sub_group_avc_ime_single_reference_streamin_t __ovld intel_sub_group_avc_ime_get_single_reference_streamin( intel_sub_group_avc_ime_result_single_reference_streamout_t result); intel_sub_group_avc_ime_dual_reference_streamin_t __ovld intel_sub_group_avc_ime_get_dual_reference_streamin( intel_sub_group_avc_ime_result_dual_reference_streamout_t result); intel_sub_group_avc_ime_result_t __ovld intel_sub_group_avc_ime_strip_single_reference_streamout( intel_sub_group_avc_ime_result_single_reference_streamout_t result); intel_sub_group_avc_ime_result_t __ovld intel_sub_group_avc_ime_strip_dual_reference_streamout( intel_sub_group_avc_ime_result_dual_reference_streamout_t result); uint __ovld intel_sub_group_avc_ime_get_streamout_major_shape_motion_vectors( intel_sub_group_avc_ime_result_single_reference_streamout_t result, uchar major_shape); ushort __ovld intel_sub_group_avc_ime_get_streamout_major_shape_distortions( intel_sub_group_avc_ime_result_single_reference_streamout_t result, uchar major_shape); uchar __ovld intel_sub_group_avc_ime_get_streamout_major_shape_reference_ids( intel_sub_group_avc_ime_result_single_reference_streamout_t result, uchar major_shape); uint __ovld intel_sub_group_avc_ime_get_streamout_major_shape_motion_vectors( intel_sub_group_avc_ime_result_dual_reference_streamout_t result, uchar major_shape, uchar direction); ushort __ovld intel_sub_group_avc_ime_get_streamout_major_shape_distortions( intel_sub_group_avc_ime_result_dual_reference_streamout_t result, uchar major_shape, uchar direction); uchar __ovld intel_sub_group_avc_ime_get_streamout_major_shape_reference_ids( intel_sub_group_avc_ime_result_dual_reference_streamout_t result, uchar major_shape, uchar direction); uchar __ovld intel_sub_group_avc_ime_get_border_reached( uchar image_select, intel_sub_group_avc_ime_result_t result); uchar __ovld intel_sub_group_avc_ime_get_truncated_search_indication( intel_sub_group_avc_ime_result_t result); uchar __ovld intel_sub_group_avc_ime_get_unidirectional_early_search_termination( intel_sub_group_avc_ime_result_t result); uint __ovld intel_sub_group_avc_ime_get_weighting_pattern_minimum_motion_vector( intel_sub_group_avc_ime_result_t result); ushort __ovld intel_sub_group_avc_ime_get_weighting_pattern_minimum_distortion( intel_sub_group_avc_ime_result_t result); // REF built-in functions intel_sub_group_avc_ref_payload_t __ovld intel_sub_group_avc_fme_initialize( ushort2 src_coord, ulong motion_vectors, uchar major_shapes, uchar minor_shapes, uchar directions, uchar pixel_resolution, uchar sad_adjustment); intel_sub_group_avc_ref_payload_t __ovld intel_sub_group_avc_bme_initialize( ushort2 src_coord, ulong motion_vectors, uchar major_shapes, uchar minor_shapes, uchar directions, uchar pixel_resolution, uchar bidirectional_weight, uchar sad_adjustment); intel_sub_group_avc_ref_payload_t __ovld intel_sub_group_avc_ref_set_bidirectional_mix_disable( intel_sub_group_avc_ref_payload_t payload); intel_sub_group_avc_ref_payload_t __ovld intel_sub_group_avc_ref_set_bilinear_filter_enable( intel_sub_group_avc_ref_payload_t payload); #if defined(__opencl_c_images) intel_sub_group_avc_ref_result_t __ovld intel_sub_group_avc_ref_evaluate_with_single_reference( read_only image2d_t src_image, read_only image2d_t ref_image, sampler_t vme_media_sampler, intel_sub_group_avc_ref_payload_t payload); intel_sub_group_avc_ref_result_t __ovld intel_sub_group_avc_ref_evaluate_with_dual_reference( read_only image2d_t src_image, read_only image2d_t fwd_ref_image, read_only image2d_t bwd_ref_image, sampler_t vme_media_sampler, intel_sub_group_avc_ref_payload_t payload); intel_sub_group_avc_ref_result_t __ovld intel_sub_group_avc_ref_evaluate_with_multi_reference( read_only image2d_t src_image, uint packed_reference_ids, sampler_t vme_media_sampler, intel_sub_group_avc_ref_payload_t payload); intel_sub_group_avc_ref_result_t __ovld intel_sub_group_avc_ref_evaluate_with_multi_reference( read_only image2d_t src_image, uint packed_reference_ids, uchar packed_reference_field_polarities, sampler_t vme_media_sampler, intel_sub_group_avc_ref_payload_t payload); #endif //defined(__opencl_c_images) // SIC built-in functions intel_sub_group_avc_sic_payload_t __ovld intel_sub_group_avc_sic_initialize( ushort2 src_coord); intel_sub_group_avc_sic_payload_t __ovld intel_sub_group_avc_sic_configure_skc( uint skip_block_partition_type, uint skip_motion_vector_mask, ulong motion_vectors, uchar bidirectional_weight, uchar skip_sad_adjustment, intel_sub_group_avc_sic_payload_t payload); intel_sub_group_avc_sic_payload_t __ovld intel_sub_group_avc_sic_configure_ipe( uchar luma_intra_partition_mask, uchar intra_neighbour_availability, uchar left_edge_luma_pixels, uchar upper_left_corner_luma_pixel, uchar upper_edge_luma_pixels, uchar upper_right_edge_luma_pixels, uchar intra_sad_adjustment, intel_sub_group_avc_sic_payload_t payload); intel_sub_group_avc_sic_payload_t __ovld intel_sub_group_avc_sic_configure_ipe( uchar luma_intra_partition_mask, uchar intra_neighbour_availability, uchar left_edge_luma_pixels, uchar upper_left_corner_luma_pixel, uchar upper_edge_luma_pixels, uchar upper_right_edge_luma_pixels, ushort left_edge_chroma_pixels, ushort upper_left_corner_chroma_pixel, ushort upper_edge_chroma_pixels, uchar intra_sad_adjustment, intel_sub_group_avc_sic_payload_t payload); uint __ovld intel_sub_group_avc_sic_get_motion_vector_mask( uint skip_block_partition_type, uchar direction); intel_sub_group_avc_sic_payload_t __ovld intel_sub_group_avc_sic_set_intra_luma_shape_penalty( uint packed_shape_cost, intel_sub_group_avc_sic_payload_t payload); intel_sub_group_avc_sic_payload_t __ovld intel_sub_group_avc_sic_set_intra_luma_mode_cost_function( uchar luma_mode_penalty, uint luma_packed_neighbor_modes, uint luma_packed_non_dc_penalty, intel_sub_group_avc_sic_payload_t payload); intel_sub_group_avc_sic_payload_t __ovld intel_sub_group_avc_sic_set_intra_chroma_mode_cost_function( uchar chroma_mode_penalty, intel_sub_group_avc_sic_payload_t payload); intel_sub_group_avc_sic_payload_t __ovld intel_sub_group_avc_sic_set_skc_bilinear_filter_enable( intel_sub_group_avc_sic_payload_t payload); intel_sub_group_avc_sic_payload_t __ovld intel_sub_group_avc_sic_set_skc_forward_transform_enable( ulong packed_sad_coefficients, intel_sub_group_avc_sic_payload_t payload); intel_sub_group_avc_sic_payload_t __ovld intel_sub_group_avc_sic_set_block_based_raw_skip_sad( uchar block_based_skip_type, intel_sub_group_avc_sic_payload_t payload); #if defined(__opencl_c_images) intel_sub_group_avc_sic_result_t __ovld intel_sub_group_avc_sic_evaluate_ipe( read_only image2d_t src_image, sampler_t vme_media_sampler, intel_sub_group_avc_sic_payload_t payload); intel_sub_group_avc_sic_result_t __ovld intel_sub_group_avc_sic_evaluate_with_single_reference( read_only image2d_t src_image, read_only image2d_t ref_image, sampler_t vme_media_sampler, intel_sub_group_avc_sic_payload_t payload); intel_sub_group_avc_sic_result_t __ovld intel_sub_group_avc_sic_evaluate_with_dual_reference( read_only image2d_t src_image, read_only image2d_t fwd_ref_image, read_only image2d_t bwd_ref_image, sampler_t vme_media_sampler, intel_sub_group_avc_sic_payload_t payload); intel_sub_group_avc_sic_result_t __ovld intel_sub_group_avc_sic_evaluate_with_multi_reference( read_only image2d_t src_image, uint packed_reference_ids, sampler_t vme_media_sampler, intel_sub_group_avc_sic_payload_t payload); intel_sub_group_avc_sic_result_t __ovld intel_sub_group_avc_sic_evaluate_with_multi_reference( read_only image2d_t src_image, uint packed_reference_ids, uchar packed_reference_field_polarities, sampler_t vme_media_sampler, intel_sub_group_avc_sic_payload_t payload); #endif //defined(__opencl_c_images) uchar __ovld intel_sub_group_avc_sic_get_ipe_luma_shape( intel_sub_group_avc_sic_result_t result); ushort __ovld intel_sub_group_avc_sic_get_best_ipe_luma_distortion( intel_sub_group_avc_sic_result_t result); ushort __ovld intel_sub_group_avc_sic_get_best_ipe_chroma_distortion( intel_sub_group_avc_sic_result_t result); ulong __ovld intel_sub_group_avc_sic_get_packed_ipe_luma_modes( intel_sub_group_avc_sic_result_t result); uchar __ovld intel_sub_group_avc_sic_get_ipe_chroma_mode( intel_sub_group_avc_sic_result_t result); uint __ovld intel_sub_group_avc_sic_get_packed_skc_luma_count_threshold( intel_sub_group_avc_sic_result_t result); ulong __ovld intel_sub_group_avc_sic_get_packed_skc_luma_sum_threshold( intel_sub_group_avc_sic_result_t result); ushort __ovld intel_sub_group_avc_sic_get_inter_raw_sads( intel_sub_group_avc_sic_result_t result); // Wrappers intel_sub_group_avc_ime_payload_t __ovld intel_sub_group_avc_ime_set_inter_base_multi_reference_penalty( uchar reference_base_penalty, intel_sub_group_avc_ime_payload_t payload); intel_sub_group_avc_ref_payload_t __ovld intel_sub_group_avc_ref_set_inter_base_multi_reference_penalty( uchar reference_base_penalty, intel_sub_group_avc_ref_payload_t payload); intel_sub_group_avc_sic_payload_t __ovld intel_sub_group_avc_sic_set_inter_base_multi_reference_penalty( uchar reference_base_penalty, intel_sub_group_avc_sic_payload_t payload); intel_sub_group_avc_ime_payload_t __ovld intel_sub_group_avc_ime_set_inter_shape_penalty( ulong packed_shape_cost, intel_sub_group_avc_ime_payload_t payload); intel_sub_group_avc_ref_payload_t __ovld intel_sub_group_avc_ref_set_inter_shape_penalty( ulong packed_shape_cost, intel_sub_group_avc_ref_payload_t payload); intel_sub_group_avc_sic_payload_t __ovld intel_sub_group_avc_sic_set_inter_shape_penalty( ulong packed_shape_cost, intel_sub_group_avc_sic_payload_t payload); intel_sub_group_avc_ime_payload_t __ovld intel_sub_group_avc_ime_set_inter_direction_penalty( uchar direction_cost, intel_sub_group_avc_ime_payload_t payload); intel_sub_group_avc_ref_payload_t __ovld intel_sub_group_avc_ref_set_inter_direction_penalty( uchar direction_cost, intel_sub_group_avc_ref_payload_t payload); intel_sub_group_avc_sic_payload_t __ovld intel_sub_group_avc_sic_set_inter_direction_penalty( uchar direction_cost, intel_sub_group_avc_sic_payload_t payload); intel_sub_group_avc_ime_payload_t __ovld intel_sub_group_avc_ime_set_motion_vector_cost_function( ulong packed_cost_center_delta, uint2 packed_cost_table, uchar cost_precision, intel_sub_group_avc_ime_payload_t payload); intel_sub_group_avc_ref_payload_t __ovld intel_sub_group_avc_ref_set_motion_vector_cost_function( ulong packed_cost_center_delta, uint2 packed_cost_table, uchar cost_precision, intel_sub_group_avc_ref_payload_t payload); intel_sub_group_avc_sic_payload_t __ovld intel_sub_group_avc_sic_set_motion_vector_cost_function( ulong packed_cost_center_delta, uint2 packed_cost_table, uchar cost_precision, intel_sub_group_avc_sic_payload_t payload); intel_sub_group_avc_ime_payload_t __ovld intel_sub_group_avc_ime_set_source_interlaced_field_polarity( uchar src_field_polarity, intel_sub_group_avc_ime_payload_t payload); intel_sub_group_avc_ref_payload_t __ovld intel_sub_group_avc_ref_set_source_interlaced_field_polarity( uchar src_field_polarity, intel_sub_group_avc_ref_payload_t payload); intel_sub_group_avc_sic_payload_t __ovld intel_sub_group_avc_sic_set_source_interlaced_field_polarity( uchar src_field_polarity, intel_sub_group_avc_sic_payload_t payload); intel_sub_group_avc_ime_payload_t __ovld intel_sub_group_avc_ime_set_single_reference_interlaced_field_polarity( uchar ref_field_polarity, intel_sub_group_avc_ime_payload_t payload); intel_sub_group_avc_ref_payload_t __ovld intel_sub_group_avc_ref_set_single_reference_interlaced_field_polarity( uchar ref_field_polarity, intel_sub_group_avc_ref_payload_t payload); intel_sub_group_avc_sic_payload_t __ovld intel_sub_group_avc_sic_set_single_reference_interlaced_field_polarity( uchar ref_field_polarity, intel_sub_group_avc_sic_payload_t payload); intel_sub_group_avc_ime_payload_t __ovld intel_sub_group_avc_ime_set_dual_reference_interlaced_field_polarities( uchar fwd_ref_field_polarity, uchar bwd_ref_field_polarity, intel_sub_group_avc_ime_payload_t payload); intel_sub_group_avc_ref_payload_t __ovld intel_sub_group_avc_ref_set_dual_reference_interlaced_field_polarities( uchar fwd_ref_field_polarity, uchar bwd_ref_field_polarity, intel_sub_group_avc_ref_payload_t payload); intel_sub_group_avc_sic_payload_t __ovld intel_sub_group_avc_sic_set_dual_reference_interlaced_field_polarities( uchar fwd_ref_field_polarity, uchar bwd_ref_field_polarity, intel_sub_group_avc_sic_payload_t payload); intel_sub_group_avc_ime_payload_t __ovld intel_sub_group_avc_ime_set_ac_only_haar( intel_sub_group_avc_ime_payload_t payload); intel_sub_group_avc_ref_payload_t __ovld intel_sub_group_avc_ref_set_ac_only_haar( intel_sub_group_avc_ref_payload_t payload); intel_sub_group_avc_sic_payload_t __ovld intel_sub_group_avc_sic_set_ac_only_haar( intel_sub_group_avc_sic_payload_t payload); ulong __ovld intel_sub_group_avc_ime_get_motion_vectors( intel_sub_group_avc_ime_result_t result); ulong __ovld intel_sub_group_avc_ref_get_motion_vectors( intel_sub_group_avc_ref_result_t result); ushort __ovld intel_sub_group_avc_ime_get_inter_distortions( intel_sub_group_avc_ime_result_t result); ushort __ovld intel_sub_group_avc_ref_get_inter_distortions( intel_sub_group_avc_ref_result_t result); ushort __ovld intel_sub_group_avc_sic_get_inter_distortions( intel_sub_group_avc_sic_result_t result); ushort __ovld intel_sub_group_avc_ime_get_best_inter_distortion( intel_sub_group_avc_ime_result_t result); ushort __ovld intel_sub_group_avc_ref_get_best_inter_distortion( intel_sub_group_avc_ref_result_t result); uchar __ovld intel_sub_group_avc_ime_get_inter_major_shape( intel_sub_group_avc_ime_result_t result); uchar __ovld intel_sub_group_avc_ref_get_inter_major_shape( intel_sub_group_avc_ref_result_t result); uchar __ovld intel_sub_group_avc_ime_get_inter_minor_shapes( intel_sub_group_avc_ime_result_t result); uchar __ovld intel_sub_group_avc_ref_get_inter_minor_shapes( intel_sub_group_avc_ref_result_t result); uchar __ovld intel_sub_group_avc_ime_get_inter_directions( intel_sub_group_avc_ime_result_t result); uchar __ovld intel_sub_group_avc_ref_get_inter_directions( intel_sub_group_avc_ref_result_t result); uchar __ovld intel_sub_group_avc_ime_get_inter_motion_vector_count( intel_sub_group_avc_ime_result_t result); uchar __ovld intel_sub_group_avc_ref_get_inter_motion_vector_count( intel_sub_group_avc_ref_result_t result); uint __ovld intel_sub_group_avc_ime_get_inter_reference_ids( intel_sub_group_avc_ime_result_t result); uint __ovld intel_sub_group_avc_ref_get_inter_reference_ids( intel_sub_group_avc_ref_result_t result); uchar __ovld intel_sub_group_avc_ime_get_inter_reference_interlaced_field_polarities( uint packed_reference_ids, uint packed_reference_parameter_field_polarities, intel_sub_group_avc_ime_result_t result); uchar __ovld intel_sub_group_avc_ref_get_inter_reference_interlaced_field_polarities( uint packed_reference_ids, uint packed_reference_parameter_field_polarities, intel_sub_group_avc_ref_result_t result); // Type conversion functions intel_sub_group_avc_mce_payload_t __ovld intel_sub_group_avc_ime_convert_to_mce_payload( intel_sub_group_avc_ime_payload_t payload); intel_sub_group_avc_ime_payload_t __ovld intel_sub_group_avc_mce_convert_to_ime_payload( intel_sub_group_avc_mce_payload_t payload); intel_sub_group_avc_mce_payload_t __ovld intel_sub_group_avc_ref_convert_to_mce_payload( intel_sub_group_avc_ref_payload_t payload); intel_sub_group_avc_ref_payload_t __ovld intel_sub_group_avc_mce_convert_to_ref_payload( intel_sub_group_avc_mce_payload_t payload); intel_sub_group_avc_mce_payload_t __ovld intel_sub_group_avc_sic_convert_to_mce_payload( intel_sub_group_avc_sic_payload_t payload); intel_sub_group_avc_sic_payload_t __ovld intel_sub_group_avc_mce_convert_to_sic_payload( intel_sub_group_avc_mce_payload_t payload); intel_sub_group_avc_mce_result_t __ovld intel_sub_group_avc_ime_convert_to_mce_result( intel_sub_group_avc_ime_result_t result); intel_sub_group_avc_ime_result_t __ovld intel_sub_group_avc_mce_convert_to_ime_result( intel_sub_group_avc_mce_result_t result); intel_sub_group_avc_mce_result_t __ovld intel_sub_group_avc_ref_convert_to_mce_result( intel_sub_group_avc_ref_result_t result); intel_sub_group_avc_ref_result_t __ovld intel_sub_group_avc_mce_convert_to_ref_result( intel_sub_group_avc_mce_result_t result); intel_sub_group_avc_mce_result_t __ovld intel_sub_group_avc_sic_convert_to_mce_result( intel_sub_group_avc_sic_result_t result); intel_sub_group_avc_sic_result_t __ovld intel_sub_group_avc_mce_convert_to_sic_result( intel_sub_group_avc_mce_result_t result); #pragma OPENCL EXTENSION cl_intel_device_side_avc_motion_estimation : end #endif // cl_intel_device_side_avc_motion_estimation #ifdef cl_amd_media_ops uint __ovld amd_bitalign(uint, uint, uint); uint2 __ovld amd_bitalign(uint2, uint2, uint2); uint3 __ovld amd_bitalign(uint3, uint3, uint3); uint4 __ovld amd_bitalign(uint4, uint4, uint4); uint8 __ovld amd_bitalign(uint8, uint8, uint8); uint16 __ovld amd_bitalign(uint16, uint16, uint16); uint __ovld amd_bytealign(uint, uint, uint); uint2 __ovld amd_bytealign(uint2, uint2, uint2); uint3 __ovld amd_bytealign(uint3, uint3, uint3); uint4 __ovld amd_bytealign(uint4, uint4, uint4); uint8 __ovld amd_bytealign(uint8, uint8, uint8); uint16 __ovld amd_bytealign(uint16, uint16, uint16); uint __ovld amd_lerp(uint, uint, uint); uint2 __ovld amd_lerp(uint2, uint2, uint2); uint3 __ovld amd_lerp(uint3, uint3, uint3); uint4 __ovld amd_lerp(uint4, uint4, uint4); uint8 __ovld amd_lerp(uint8, uint8, uint8); uint16 __ovld amd_lerp(uint16, uint16, uint16); uint __ovld amd_pack(float4 v); uint __ovld amd_sad4(uint4, uint4, uint); uint __ovld amd_sadhi(uint, uint, uint); uint2 __ovld amd_sadhi(uint2, uint2, uint2); uint3 __ovld amd_sadhi(uint3, uint3, uint3); uint4 __ovld amd_sadhi(uint4, uint4, uint4); uint8 __ovld amd_sadhi(uint8, uint8, uint8); uint16 __ovld amd_sadhi(uint16, uint16, uint16); uint __ovld amd_sad(uint, uint, uint); uint2 __ovld amd_sad(uint2, uint2, uint2); uint3 __ovld amd_sad(uint3, uint3, uint3); uint4 __ovld amd_sad(uint4, uint4, uint4); uint8 __ovld amd_sad(uint8, uint8, uint8); uint16 __ovld amd_sad(uint16, uint16, uint16); float __ovld amd_unpack0(uint); float2 __ovld amd_unpack0(uint2); float3 __ovld amd_unpack0(uint3); float4 __ovld amd_unpack0(uint4); float8 __ovld amd_unpack0(uint8); float16 __ovld amd_unpack0(uint16); float __ovld amd_unpack1(uint); float2 __ovld amd_unpack1(uint2); float3 __ovld amd_unpack1(uint3); float4 __ovld amd_unpack1(uint4); float8 __ovld amd_unpack1(uint8); float16 __ovld amd_unpack1(uint16); float __ovld amd_unpack2(uint); float2 __ovld amd_unpack2(uint2); float3 __ovld amd_unpack2(uint3); float4 __ovld amd_unpack2(uint4); float8 __ovld amd_unpack2(uint8); float16 __ovld amd_unpack2(uint16); float __ovld amd_unpack3(uint); float2 __ovld amd_unpack3(uint2); float3 __ovld amd_unpack3(uint3); float4 __ovld amd_unpack3(uint4); float8 __ovld amd_unpack3(uint8); float16 __ovld amd_unpack3(uint16); #endif // cl_amd_media_ops #ifdef cl_amd_media_ops2 int __ovld amd_bfe(int src0, uint src1, uint src2); int2 __ovld amd_bfe(int2 src0, uint2 src1, uint2 src2); int3 __ovld amd_bfe(int3 src0, uint3 src1, uint3 src2); int4 __ovld amd_bfe(int4 src0, uint4 src1, uint4 src2); int8 __ovld amd_bfe(int8 src0, uint8 src1, uint8 src2); int16 __ovld amd_bfe(int16 src0, uint16 src1, uint16 src2); uint __ovld amd_bfe(uint src0, uint src1, uint src2); uint2 __ovld amd_bfe(uint2 src0, uint2 src1, uint2 src2); uint3 __ovld amd_bfe(uint3 src0, uint3 src1, uint3 src2); uint4 __ovld amd_bfe(uint4 src0, uint4 src1, uint4 src2); uint8 __ovld amd_bfe(uint8 src0, uint8 src1, uint8 src2); uint16 __ovld amd_bfe(uint16 src0, uint16 src1, uint16 src2); uint __ovld amd_bfm(uint src0, uint src1); uint2 __ovld amd_bfm(uint2 src0, uint2 src1); uint3 __ovld amd_bfm(uint3 src0, uint3 src1); uint4 __ovld amd_bfm(uint4 src0, uint4 src1); uint8 __ovld amd_bfm(uint8 src0, uint8 src1); uint16 __ovld amd_bfm(uint16 src0, uint16 src1); float __ovld amd_max3(float src0, float src1, float src2); float2 __ovld amd_max3(float2 src0, float2 src1, float2 src2); float3 __ovld amd_max3(float3 src0, float3 src1, float3 src2); float4 __ovld amd_max3(float4 src0, float4 src1, float4 src2); float8 __ovld amd_max3(float8 src0, float8 src1, float8 src2); float16 __ovld amd_max3(float16 src0, float16 src1, float16 src2); int __ovld amd_max3(int src0, int src1, int src2); int2 __ovld amd_max3(int2 src0, int2 src1, int2 src2); int3 __ovld amd_max3(int3 src0, int3 src1, int3 src2); int4 __ovld amd_max3(int4 src0, int4 src1, int4 src2); int8 __ovld amd_max3(int8 src0, int8 src1, int8 src2); int16 __ovld amd_max3(int16 src0, int16 src1, int16 src2); uint __ovld amd_max3(uint src0, uint src1, uint src2); uint2 __ovld amd_max3(uint2 src0, uint2 src1, uint2 src2); uint3 __ovld amd_max3(uint3 src0, uint3 src1, uint3 src2); uint4 __ovld amd_max3(uint4 src0, uint4 src1, uint4 src2); uint8 __ovld amd_max3(uint8 src0, uint8 src1, uint8 src2); uint16 __ovld amd_max3(uint16 src0, uint16 src1, uint16 src2); float __ovld amd_median3(float src0, float src1, float src2); float2 __ovld amd_median3(float2 src0, float2 src1, float2 src2); float3 __ovld amd_median3(float3 src0, float3 src1, float3 src2); float4 __ovld amd_median3(float4 src0, float4 src1, float4 src2); float8 __ovld amd_median3(float8 src0, float8 src1, float8 src2); float16 __ovld amd_median3(float16 src0, float16 src1, float16 src2); int __ovld amd_median3(int src0, int src1, int src2); int2 __ovld amd_median3(int2 src0, int2 src1, int2 src2); int3 __ovld amd_median3(int3 src0, int3 src1, int3 src2); int4 __ovld amd_median3(int4 src0, int4 src1, int4 src2); int8 __ovld amd_median3(int8 src0, int8 src1, int8 src2); int16 __ovld amd_median3(int16 src0, int16 src1, int16 src2); uint __ovld amd_median3(uint src0, uint src1, uint src2); uint2 __ovld amd_median3(uint2 src0, uint2 src1, uint2 src2); uint3 __ovld amd_median3(uint3 src0, uint3 src1, uint3 src2); uint4 __ovld amd_median3(uint4 src0, uint4 src1, uint4 src2); uint8 __ovld amd_median3(uint8 src0, uint8 src1, uint8 src2); uint16 __ovld amd_median3(uint16 src0, uint16 src1, uint16 src2); float __ovld amd_min3(float src0, float src1, float src); float2 __ovld amd_min3(float2 src0, float2 src1, float2 src); float3 __ovld amd_min3(float3 src0, float3 src1, float3 src); float4 __ovld amd_min3(float4 src0, float4 src1, float4 src); float8 __ovld amd_min3(float8 src0, float8 src1, float8 src); float16 __ovld amd_min3(float16 src0, float16 src1, float16 src); int __ovld amd_min3(int src0, int src1, int src2); int2 __ovld amd_min3(int2 src0, int2 src1, int2 src2); int3 __ovld amd_min3(int3 src0, int3 src1, int3 src2); int4 __ovld amd_min3(int4 src0, int4 src1, int4 src2); int8 __ovld amd_min3(int8 src0, int8 src1, int8 src2); int16 __ovld amd_min3(int16 src0, int16 src1, int16 src2); uint __ovld amd_min3(uint src0, uint src1, uint src2); uint2 __ovld amd_min3(uint2 src0, uint2 src1, uint2 src2); uint3 __ovld amd_min3(uint3 src0, uint3 src1, uint3 src2); uint4 __ovld amd_min3(uint4 src0, uint4 src1, uint4 src2); uint8 __ovld amd_min3(uint8 src0, uint8 src1, uint8 src2); uint16 __ovld amd_min3(uint16 src0, uint16 src1, uint16 src2); ulong __ovld amd_mqsad(ulong src0, uint src1, ulong src2); ulong2 __ovld amd_mqsad(ulong2 src0, uint2 src1, ulong2 src2); ulong3 __ovld amd_mqsad(ulong3 src0, uint3 src1, ulong3 src2); ulong4 __ovld amd_mqsad(ulong4 src0, uint4 src1, ulong4 src2); ulong8 __ovld amd_mqsad(ulong8 src0, uint8 src1, ulong8 src2); ulong16 __ovld amd_mqsad(ulong16 src0, uint16 src1, ulong16 src2); ulong __ovld amd_qsad(ulong src0, uint src1, ulong src2); ulong2 __ovld amd_qsad(ulong2 src0, uint2 src1, ulong2 src2); ulong3 __ovld amd_qsad(ulong3 src0, uint3 src1, ulong3 src2); ulong4 __ovld amd_qsad(ulong4 src0, uint4 src1, ulong4 src2); ulong8 __ovld amd_qsad(ulong8 src0, uint8 src1, ulong8 src2); ulong16 __ovld amd_qsad(ulong16 src0, uint16 src1, ulong16 src2); uint __ovld amd_msad(uint src0, uint src1, uint src2); uint2 __ovld amd_msad(uint2 src0, uint2 src1, uint2 src2); uint3 __ovld amd_msad(uint3 src0, uint3 src1, uint3 src2); uint4 __ovld amd_msad(uint4 src0, uint4 src1, uint4 src2); uint8 __ovld amd_msad(uint8 src0, uint8 src1, uint8 src2); uint16 __ovld amd_msad(uint16 src0, uint16 src1, uint16 src2); uint __ovld amd_sadd(uint src0, uint src1, uint src2); uint2 __ovld amd_sadd(uint2 src0, uint2 src1, uint2 src2); uint3 __ovld amd_sadd(uint3 src0, uint3 src1, uint3 src2); uint4 __ovld amd_sadd(uint4 src0, uint4 src1, uint4 src2); uint8 __ovld amd_sadd(uint8 src0, uint8 src1, uint8 src2); uint16 __ovld amd_sadd(uint16 src0, uint16 src1, uint16 src2); uint __ovld amd_sadw(uint src0, uint src1, uint src2); uint2 __ovld amd_sadw(uint2 src0, uint2 src1, uint2 src2); uint3 __ovld amd_sadw(uint3 src0, uint3 src1, uint3 src2); uint4 __ovld amd_sadw(uint4 src0, uint4 src1, uint4 src2); uint8 __ovld amd_sadw(uint8 src0, uint8 src1, uint8 src2); uint16 __ovld amd_sadw(uint16 src0, uint16 src1, uint16 src2); #endif // cl_amd_media_ops2 #if defined(cl_arm_integer_dot_product_int8) uint __ovld arm_dot(uchar4, uchar4); int __ovld arm_dot(char4, char4); #endif // defined(cl_arm_integer_dot_product_int8) #if defined(cl_arm_integer_dot_product_accumulate_int8) uint __ovld arm_dot_acc(uchar4, uchar4, uint); int __ovld arm_dot_acc(char4, char4, int); #endif // defined(cl_arm_integer_dot_product_accumulate_int8) #if defined(cl_arm_integer_dot_product_accumulate_int16) uint __ovld arm_dot_acc(ushort2, ushort2, uint); int __ovld arm_dot_acc(short2, short2, int); #endif // defined(cl_arm_integer_dot_product_accumulate_int16) #if defined(cl_arm_integer_dot_product_accumulate_saturate_int8) uint __ovld arm_dot_acc_sat(uchar4, uchar4, uint); int __ovld arm_dot_acc_sat(char4, char4, int); #endif // defined(cl_arm_integer_dot_product_accumulate_saturate_int8) // Disable any extensions we may have enabled previously. #pragma OPENCL EXTENSION all : disable #undef __opencl_c_named_address_space_builtins #undef __cnfn #undef __ovld #endif //_OPENCL_H_ /*===------------ ptwriteintrin.h - PTWRITE intrinsic --------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #if !defined __X86INTRIN_H && !defined __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __PTWRITEINTRIN_H #define __PTWRITEINTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("ptwrite"))) static __inline__ void __DEFAULT_FN_ATTRS _ptwrite32(unsigned int __value) { __builtin_ia32_ptwrite32(__value); } #ifdef __x86_64__ static __inline__ void __DEFAULT_FN_ATTRS _ptwrite64(unsigned long long __value) { __builtin_ia32_ptwrite64(__value); } #endif /* __x86_64__ */ #undef __DEFAULT_FN_ATTRS #endif /* __PTWRITEINTRIN_H */ /*===---- riscv_vector.h - RISC-V V-extension RVVIntrinsics -------------------=== * * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __RISCV_VECTOR_H #define __RISCV_VECTOR_H #include #include #ifndef __riscv_vector #error "Vector intrinsics require the vector extension." #endif #ifdef __cplusplus extern "C" { #endif #pragma clang riscv intrinsic vector enum __RISCV_FRM { __RISCV_FRM_RNE = 0, __RISCV_FRM_RTZ = 1, __RISCV_FRM_RDN = 2, __RISCV_FRM_RUP = 3, __RISCV_FRM_RMM = 4, }; #define __riscv_vlenb() __builtin_rvv_vlenb() #define __riscv_vsetvl_e8mf4(avl) __builtin_rvv_vsetvli((size_t)(avl), 0, 6) #define __riscv_vsetvl_e8mf2(avl) __builtin_rvv_vsetvli((size_t)(avl), 0, 7) #define __riscv_vsetvl_e8m1(avl) __builtin_rvv_vsetvli((size_t)(avl), 0, 0) #define __riscv_vsetvl_e8m2(avl) __builtin_rvv_vsetvli((size_t)(avl), 0, 1) #define __riscv_vsetvl_e8m4(avl) __builtin_rvv_vsetvli((size_t)(avl), 0, 2) #define __riscv_vsetvl_e8m8(avl) __builtin_rvv_vsetvli((size_t)(avl), 0, 3) #define __riscv_vsetvl_e16mf2(avl) __builtin_rvv_vsetvli((size_t)(avl), 1, 7) #define __riscv_vsetvl_e16m1(avl) __builtin_rvv_vsetvli((size_t)(avl), 1, 0) #define __riscv_vsetvl_e16m2(avl) __builtin_rvv_vsetvli((size_t)(avl), 1, 1) #define __riscv_vsetvl_e16m4(avl) __builtin_rvv_vsetvli((size_t)(avl), 1, 2) #define __riscv_vsetvl_e16m8(avl) __builtin_rvv_vsetvli((size_t)(avl), 1, 3) #define __riscv_vsetvl_e32m1(avl) __builtin_rvv_vsetvli((size_t)(avl), 2, 0) #define __riscv_vsetvl_e32m2(avl) __builtin_rvv_vsetvli((size_t)(avl), 2, 1) #define __riscv_vsetvl_e32m4(avl) __builtin_rvv_vsetvli((size_t)(avl), 2, 2) #define __riscv_vsetvl_e32m8(avl) __builtin_rvv_vsetvli((size_t)(avl), 2, 3) #if __riscv_v_elen >= 64 #define __riscv_vsetvl_e8mf8(avl) __builtin_rvv_vsetvli((size_t)(avl), 0, 5) #define __riscv_vsetvl_e16mf4(avl) __builtin_rvv_vsetvli((size_t)(avl), 1, 6) #define __riscv_vsetvl_e32mf2(avl) __builtin_rvv_vsetvli((size_t)(avl), 2, 7) #define __riscv_vsetvl_e64m1(avl) __builtin_rvv_vsetvli((size_t)(avl), 3, 0) #define __riscv_vsetvl_e64m2(avl) __builtin_rvv_vsetvli((size_t)(avl), 3, 1) #define __riscv_vsetvl_e64m4(avl) __builtin_rvv_vsetvli((size_t)(avl), 3, 2) #define __riscv_vsetvl_e64m8(avl) __builtin_rvv_vsetvli((size_t)(avl), 3, 3) #endif #define __riscv_vsetvlmax_e8mf4() __builtin_rvv_vsetvlimax(0, 6) #define __riscv_vsetvlmax_e8mf2() __builtin_rvv_vsetvlimax(0, 7) #define __riscv_vsetvlmax_e8m1() __builtin_rvv_vsetvlimax(0, 0) #define __riscv_vsetvlmax_e8m2() __builtin_rvv_vsetvlimax(0, 1) #define __riscv_vsetvlmax_e8m4() __builtin_rvv_vsetvlimax(0, 2) #define __riscv_vsetvlmax_e8m8() __builtin_rvv_vsetvlimax(0, 3) #define __riscv_vsetvlmax_e16mf2() __builtin_rvv_vsetvlimax(1, 7) #define __riscv_vsetvlmax_e16m1() __builtin_rvv_vsetvlimax(1, 0) #define __riscv_vsetvlmax_e16m2() __builtin_rvv_vsetvlimax(1, 1) #define __riscv_vsetvlmax_e16m4() __builtin_rvv_vsetvlimax(1, 2) #define __riscv_vsetvlmax_e16m8() __builtin_rvv_vsetvlimax(1, 3) #define __riscv_vsetvlmax_e32m1() __builtin_rvv_vsetvlimax(2, 0) #define __riscv_vsetvlmax_e32m2() __builtin_rvv_vsetvlimax(2, 1) #define __riscv_vsetvlmax_e32m4() __builtin_rvv_vsetvlimax(2, 2) #define __riscv_vsetvlmax_e32m8() __builtin_rvv_vsetvlimax(2, 3) #if __riscv_v_elen >= 64 #define __riscv_vsetvlmax_e8mf8() __builtin_rvv_vsetvlimax(0, 5) #define __riscv_vsetvlmax_e16mf4() __builtin_rvv_vsetvlimax(1, 6) #define __riscv_vsetvlmax_e32mf2() __builtin_rvv_vsetvlimax(2, 7) #define __riscv_vsetvlmax_e64m1() __builtin_rvv_vsetvlimax(3, 0) #define __riscv_vsetvlmax_e64m2() __builtin_rvv_vsetvlimax(3, 1) #define __riscv_vsetvlmax_e64m4() __builtin_rvv_vsetvlimax(3, 2) #define __riscv_vsetvlmax_e64m8() __builtin_rvv_vsetvlimax(3, 3) #endif enum __RISCV_VXRM { __RISCV_VXRM_RNU = 0, __RISCV_VXRM_RNE = 1, __RISCV_VXRM_RDN = 2, __RISCV_VXRM_ROD = 3, }; typedef __rvv_bool64_t vbool64_t; typedef __rvv_bool32_t vbool32_t; typedef __rvv_bool16_t vbool16_t; typedef __rvv_bool8_t vbool8_t; typedef __rvv_bool4_t vbool4_t; typedef __rvv_bool2_t vbool2_t; typedef __rvv_bool1_t vbool1_t; typedef __rvv_int8mf8_t vint8mf8_t; typedef __rvv_uint8mf8_t vuint8mf8_t; typedef __rvv_int8mf8x2_t vint8mf8x2_t; typedef __rvv_uint8mf8x2_t vuint8mf8x2_t; typedef __rvv_int8mf8x3_t vint8mf8x3_t; typedef __rvv_uint8mf8x3_t vuint8mf8x3_t; typedef __rvv_int8mf8x4_t vint8mf8x4_t; typedef __rvv_uint8mf8x4_t vuint8mf8x4_t; typedef __rvv_int8mf8x5_t vint8mf8x5_t; typedef __rvv_uint8mf8x5_t vuint8mf8x5_t; typedef __rvv_int8mf8x6_t vint8mf8x6_t; typedef __rvv_uint8mf8x6_t vuint8mf8x6_t; typedef __rvv_int8mf8x7_t vint8mf8x7_t; typedef __rvv_uint8mf8x7_t vuint8mf8x7_t; typedef __rvv_int8mf8x8_t vint8mf8x8_t; typedef __rvv_uint8mf8x8_t vuint8mf8x8_t; typedef __rvv_int8mf4_t vint8mf4_t; typedef __rvv_uint8mf4_t vuint8mf4_t; typedef __rvv_int8mf4x2_t vint8mf4x2_t; typedef __rvv_uint8mf4x2_t vuint8mf4x2_t; typedef __rvv_int8mf4x3_t vint8mf4x3_t; typedef __rvv_uint8mf4x3_t vuint8mf4x3_t; typedef __rvv_int8mf4x4_t vint8mf4x4_t; typedef __rvv_uint8mf4x4_t vuint8mf4x4_t; typedef __rvv_int8mf4x5_t vint8mf4x5_t; typedef __rvv_uint8mf4x5_t vuint8mf4x5_t; typedef __rvv_int8mf4x6_t vint8mf4x6_t; typedef __rvv_uint8mf4x6_t vuint8mf4x6_t; typedef __rvv_int8mf4x7_t vint8mf4x7_t; typedef __rvv_uint8mf4x7_t vuint8mf4x7_t; typedef __rvv_int8mf4x8_t vint8mf4x8_t; typedef __rvv_uint8mf4x8_t vuint8mf4x8_t; typedef __rvv_int8mf2_t vint8mf2_t; typedef __rvv_uint8mf2_t vuint8mf2_t; typedef __rvv_int8mf2x2_t vint8mf2x2_t; typedef __rvv_uint8mf2x2_t vuint8mf2x2_t; typedef __rvv_int8mf2x3_t vint8mf2x3_t; typedef __rvv_uint8mf2x3_t vuint8mf2x3_t; typedef __rvv_int8mf2x4_t vint8mf2x4_t; typedef __rvv_uint8mf2x4_t vuint8mf2x4_t; typedef __rvv_int8mf2x5_t vint8mf2x5_t; typedef __rvv_uint8mf2x5_t vuint8mf2x5_t; typedef __rvv_int8mf2x6_t vint8mf2x6_t; typedef __rvv_uint8mf2x6_t vuint8mf2x6_t; typedef __rvv_int8mf2x7_t vint8mf2x7_t; typedef __rvv_uint8mf2x7_t vuint8mf2x7_t; typedef __rvv_int8mf2x8_t vint8mf2x8_t; typedef __rvv_uint8mf2x8_t vuint8mf2x8_t; typedef __rvv_int8m1_t vint8m1_t; typedef __rvv_uint8m1_t vuint8m1_t; typedef __rvv_int8m1x2_t vint8m1x2_t; typedef __rvv_uint8m1x2_t vuint8m1x2_t; typedef __rvv_int8m1x3_t vint8m1x3_t; typedef __rvv_uint8m1x3_t vuint8m1x3_t; typedef __rvv_int8m1x4_t vint8m1x4_t; typedef __rvv_uint8m1x4_t vuint8m1x4_t; typedef __rvv_int8m1x5_t vint8m1x5_t; typedef __rvv_uint8m1x5_t vuint8m1x5_t; typedef __rvv_int8m1x6_t vint8m1x6_t; typedef __rvv_uint8m1x6_t vuint8m1x6_t; typedef __rvv_int8m1x7_t vint8m1x7_t; typedef __rvv_uint8m1x7_t vuint8m1x7_t; typedef __rvv_int8m1x8_t vint8m1x8_t; typedef __rvv_uint8m1x8_t vuint8m1x8_t; typedef __rvv_int8m2_t vint8m2_t; typedef __rvv_uint8m2_t vuint8m2_t; typedef __rvv_int8m2x2_t vint8m2x2_t; typedef __rvv_uint8m2x2_t vuint8m2x2_t; typedef __rvv_int8m2x3_t vint8m2x3_t; typedef __rvv_uint8m2x3_t vuint8m2x3_t; typedef __rvv_int8m2x4_t vint8m2x4_t; typedef __rvv_uint8m2x4_t vuint8m2x4_t; typedef __rvv_int8m4_t vint8m4_t; typedef __rvv_uint8m4_t vuint8m4_t; typedef __rvv_int8m4x2_t vint8m4x2_t; typedef __rvv_uint8m4x2_t vuint8m4x2_t; typedef __rvv_int8m8_t vint8m8_t; typedef __rvv_uint8m8_t vuint8m8_t; typedef __rvv_int16mf4_t vint16mf4_t; typedef __rvv_uint16mf4_t vuint16mf4_t; typedef __rvv_int16mf4x2_t vint16mf4x2_t; typedef __rvv_uint16mf4x2_t vuint16mf4x2_t; typedef __rvv_int16mf4x3_t vint16mf4x3_t; typedef __rvv_uint16mf4x3_t vuint16mf4x3_t; typedef __rvv_int16mf4x4_t vint16mf4x4_t; typedef __rvv_uint16mf4x4_t vuint16mf4x4_t; typedef __rvv_int16mf4x5_t vint16mf4x5_t; typedef __rvv_uint16mf4x5_t vuint16mf4x5_t; typedef __rvv_int16mf4x6_t vint16mf4x6_t; typedef __rvv_uint16mf4x6_t vuint16mf4x6_t; typedef __rvv_int16mf4x7_t vint16mf4x7_t; typedef __rvv_uint16mf4x7_t vuint16mf4x7_t; typedef __rvv_int16mf4x8_t vint16mf4x8_t; typedef __rvv_uint16mf4x8_t vuint16mf4x8_t; typedef __rvv_int16mf2_t vint16mf2_t; typedef __rvv_uint16mf2_t vuint16mf2_t; typedef __rvv_int16mf2x2_t vint16mf2x2_t; typedef __rvv_uint16mf2x2_t vuint16mf2x2_t; typedef __rvv_int16mf2x3_t vint16mf2x3_t; typedef __rvv_uint16mf2x3_t vuint16mf2x3_t; typedef __rvv_int16mf2x4_t vint16mf2x4_t; typedef __rvv_uint16mf2x4_t vuint16mf2x4_t; typedef __rvv_int16mf2x5_t vint16mf2x5_t; typedef __rvv_uint16mf2x5_t vuint16mf2x5_t; typedef __rvv_int16mf2x6_t vint16mf2x6_t; typedef __rvv_uint16mf2x6_t vuint16mf2x6_t; typedef __rvv_int16mf2x7_t vint16mf2x7_t; typedef __rvv_uint16mf2x7_t vuint16mf2x7_t; typedef __rvv_int16mf2x8_t vint16mf2x8_t; typedef __rvv_uint16mf2x8_t vuint16mf2x8_t; typedef __rvv_int16m1_t vint16m1_t; typedef __rvv_uint16m1_t vuint16m1_t; typedef __rvv_int16m1x2_t vint16m1x2_t; typedef __rvv_uint16m1x2_t vuint16m1x2_t; typedef __rvv_int16m1x3_t vint16m1x3_t; typedef __rvv_uint16m1x3_t vuint16m1x3_t; typedef __rvv_int16m1x4_t vint16m1x4_t; typedef __rvv_uint16m1x4_t vuint16m1x4_t; typedef __rvv_int16m1x5_t vint16m1x5_t; typedef __rvv_uint16m1x5_t vuint16m1x5_t; typedef __rvv_int16m1x6_t vint16m1x6_t; typedef __rvv_uint16m1x6_t vuint16m1x6_t; typedef __rvv_int16m1x7_t vint16m1x7_t; typedef __rvv_uint16m1x7_t vuint16m1x7_t; typedef __rvv_int16m1x8_t vint16m1x8_t; typedef __rvv_uint16m1x8_t vuint16m1x8_t; typedef __rvv_int16m2_t vint16m2_t; typedef __rvv_uint16m2_t vuint16m2_t; typedef __rvv_int16m2x2_t vint16m2x2_t; typedef __rvv_uint16m2x2_t vuint16m2x2_t; typedef __rvv_int16m2x3_t vint16m2x3_t; typedef __rvv_uint16m2x3_t vuint16m2x3_t; typedef __rvv_int16m2x4_t vint16m2x4_t; typedef __rvv_uint16m2x4_t vuint16m2x4_t; typedef __rvv_int16m4_t vint16m4_t; typedef __rvv_uint16m4_t vuint16m4_t; typedef __rvv_int16m4x2_t vint16m4x2_t; typedef __rvv_uint16m4x2_t vuint16m4x2_t; typedef __rvv_int16m8_t vint16m8_t; typedef __rvv_uint16m8_t vuint16m8_t; typedef __rvv_int32mf2_t vint32mf2_t; typedef __rvv_uint32mf2_t vuint32mf2_t; typedef __rvv_int32mf2x2_t vint32mf2x2_t; typedef __rvv_uint32mf2x2_t vuint32mf2x2_t; typedef __rvv_int32mf2x3_t vint32mf2x3_t; typedef __rvv_uint32mf2x3_t vuint32mf2x3_t; typedef __rvv_int32mf2x4_t vint32mf2x4_t; typedef __rvv_uint32mf2x4_t vuint32mf2x4_t; typedef __rvv_int32mf2x5_t vint32mf2x5_t; typedef __rvv_uint32mf2x5_t vuint32mf2x5_t; typedef __rvv_int32mf2x6_t vint32mf2x6_t; typedef __rvv_uint32mf2x6_t vuint32mf2x6_t; typedef __rvv_int32mf2x7_t vint32mf2x7_t; typedef __rvv_uint32mf2x7_t vuint32mf2x7_t; typedef __rvv_int32mf2x8_t vint32mf2x8_t; typedef __rvv_uint32mf2x8_t vuint32mf2x8_t; typedef __rvv_int32m1_t vint32m1_t; typedef __rvv_uint32m1_t vuint32m1_t; typedef __rvv_int32m1x2_t vint32m1x2_t; typedef __rvv_uint32m1x2_t vuint32m1x2_t; typedef __rvv_int32m1x3_t vint32m1x3_t; typedef __rvv_uint32m1x3_t vuint32m1x3_t; typedef __rvv_int32m1x4_t vint32m1x4_t; typedef __rvv_uint32m1x4_t vuint32m1x4_t; typedef __rvv_int32m1x5_t vint32m1x5_t; typedef __rvv_uint32m1x5_t vuint32m1x5_t; typedef __rvv_int32m1x6_t vint32m1x6_t; typedef __rvv_uint32m1x6_t vuint32m1x6_t; typedef __rvv_int32m1x7_t vint32m1x7_t; typedef __rvv_uint32m1x7_t vuint32m1x7_t; typedef __rvv_int32m1x8_t vint32m1x8_t; typedef __rvv_uint32m1x8_t vuint32m1x8_t; typedef __rvv_int32m2_t vint32m2_t; typedef __rvv_uint32m2_t vuint32m2_t; typedef __rvv_int32m2x2_t vint32m2x2_t; typedef __rvv_uint32m2x2_t vuint32m2x2_t; typedef __rvv_int32m2x3_t vint32m2x3_t; typedef __rvv_uint32m2x3_t vuint32m2x3_t; typedef __rvv_int32m2x4_t vint32m2x4_t; typedef __rvv_uint32m2x4_t vuint32m2x4_t; typedef __rvv_int32m4_t vint32m4_t; typedef __rvv_uint32m4_t vuint32m4_t; typedef __rvv_int32m4x2_t vint32m4x2_t; typedef __rvv_uint32m4x2_t vuint32m4x2_t; typedef __rvv_int32m8_t vint32m8_t; typedef __rvv_uint32m8_t vuint32m8_t; typedef __rvv_int64m1_t vint64m1_t; typedef __rvv_uint64m1_t vuint64m1_t; typedef __rvv_int64m1x2_t vint64m1x2_t; typedef __rvv_uint64m1x2_t vuint64m1x2_t; typedef __rvv_int64m1x3_t vint64m1x3_t; typedef __rvv_uint64m1x3_t vuint64m1x3_t; typedef __rvv_int64m1x4_t vint64m1x4_t; typedef __rvv_uint64m1x4_t vuint64m1x4_t; typedef __rvv_int64m1x5_t vint64m1x5_t; typedef __rvv_uint64m1x5_t vuint64m1x5_t; typedef __rvv_int64m1x6_t vint64m1x6_t; typedef __rvv_uint64m1x6_t vuint64m1x6_t; typedef __rvv_int64m1x7_t vint64m1x7_t; typedef __rvv_uint64m1x7_t vuint64m1x7_t; typedef __rvv_int64m1x8_t vint64m1x8_t; typedef __rvv_uint64m1x8_t vuint64m1x8_t; typedef __rvv_int64m2_t vint64m2_t; typedef __rvv_uint64m2_t vuint64m2_t; typedef __rvv_int64m2x2_t vint64m2x2_t; typedef __rvv_uint64m2x2_t vuint64m2x2_t; typedef __rvv_int64m2x3_t vint64m2x3_t; typedef __rvv_uint64m2x3_t vuint64m2x3_t; typedef __rvv_int64m2x4_t vint64m2x4_t; typedef __rvv_uint64m2x4_t vuint64m2x4_t; typedef __rvv_int64m4_t vint64m4_t; typedef __rvv_uint64m4_t vuint64m4_t; typedef __rvv_int64m4x2_t vint64m4x2_t; typedef __rvv_uint64m4x2_t vuint64m4x2_t; typedef __rvv_int64m8_t vint64m8_t; typedef __rvv_uint64m8_t vuint64m8_t; typedef __rvv_float16mf4_t vfloat16mf4_t; typedef __rvv_float16mf4x2_t vfloat16mf4x2_t; typedef __rvv_float16mf4x3_t vfloat16mf4x3_t; typedef __rvv_float16mf4x4_t vfloat16mf4x4_t; typedef __rvv_float16mf4x5_t vfloat16mf4x5_t; typedef __rvv_float16mf4x6_t vfloat16mf4x6_t; typedef __rvv_float16mf4x7_t vfloat16mf4x7_t; typedef __rvv_float16mf4x8_t vfloat16mf4x8_t; typedef __rvv_float16mf2_t vfloat16mf2_t; typedef __rvv_float16mf2x2_t vfloat16mf2x2_t; typedef __rvv_float16mf2x3_t vfloat16mf2x3_t; typedef __rvv_float16mf2x4_t vfloat16mf2x4_t; typedef __rvv_float16mf2x5_t vfloat16mf2x5_t; typedef __rvv_float16mf2x6_t vfloat16mf2x6_t; typedef __rvv_float16mf2x7_t vfloat16mf2x7_t; typedef __rvv_float16mf2x8_t vfloat16mf2x8_t; typedef __rvv_float16m1_t vfloat16m1_t; typedef __rvv_float16m1x2_t vfloat16m1x2_t; typedef __rvv_float16m1x3_t vfloat16m1x3_t; typedef __rvv_float16m1x4_t vfloat16m1x4_t; typedef __rvv_float16m1x5_t vfloat16m1x5_t; typedef __rvv_float16m1x6_t vfloat16m1x6_t; typedef __rvv_float16m1x7_t vfloat16m1x7_t; typedef __rvv_float16m1x8_t vfloat16m1x8_t; typedef __rvv_float16m2_t vfloat16m2_t; typedef __rvv_float16m2x2_t vfloat16m2x2_t; typedef __rvv_float16m2x3_t vfloat16m2x3_t; typedef __rvv_float16m2x4_t vfloat16m2x4_t; typedef __rvv_float16m4_t vfloat16m4_t; typedef __rvv_float16m4x2_t vfloat16m4x2_t; typedef __rvv_float16m8_t vfloat16m8_t; typedef __rvv_float32mf2_t vfloat32mf2_t; typedef __rvv_float32mf2x2_t vfloat32mf2x2_t; typedef __rvv_float32mf2x3_t vfloat32mf2x3_t; typedef __rvv_float32mf2x4_t vfloat32mf2x4_t; typedef __rvv_float32mf2x5_t vfloat32mf2x5_t; typedef __rvv_float32mf2x6_t vfloat32mf2x6_t; typedef __rvv_float32mf2x7_t vfloat32mf2x7_t; typedef __rvv_float32mf2x8_t vfloat32mf2x8_t; typedef __rvv_float32m1_t vfloat32m1_t; typedef __rvv_float32m1x2_t vfloat32m1x2_t; typedef __rvv_float32m1x3_t vfloat32m1x3_t; typedef __rvv_float32m1x4_t vfloat32m1x4_t; typedef __rvv_float32m1x5_t vfloat32m1x5_t; typedef __rvv_float32m1x6_t vfloat32m1x6_t; typedef __rvv_float32m1x7_t vfloat32m1x7_t; typedef __rvv_float32m1x8_t vfloat32m1x8_t; typedef __rvv_float32m2_t vfloat32m2_t; typedef __rvv_float32m2x2_t vfloat32m2x2_t; typedef __rvv_float32m2x3_t vfloat32m2x3_t; typedef __rvv_float32m2x4_t vfloat32m2x4_t; typedef __rvv_float32m4_t vfloat32m4_t; typedef __rvv_float32m4x2_t vfloat32m4x2_t; typedef __rvv_float32m8_t vfloat32m8_t; typedef __rvv_float64m1_t vfloat64m1_t; typedef __rvv_float64m1x2_t vfloat64m1x2_t; typedef __rvv_float64m1x3_t vfloat64m1x3_t; typedef __rvv_float64m1x4_t vfloat64m1x4_t; typedef __rvv_float64m1x5_t vfloat64m1x5_t; typedef __rvv_float64m1x6_t vfloat64m1x6_t; typedef __rvv_float64m1x7_t vfloat64m1x7_t; typedef __rvv_float64m1x8_t vfloat64m1x8_t; typedef __rvv_float64m2_t vfloat64m2_t; typedef __rvv_float64m2x2_t vfloat64m2x2_t; typedef __rvv_float64m2x3_t vfloat64m2x3_t; typedef __rvv_float64m2x4_t vfloat64m2x4_t; typedef __rvv_float64m4_t vfloat64m4_t; typedef __rvv_float64m4x2_t vfloat64m4x2_t; typedef __rvv_float64m8_t vfloat64m8_t; typedef __rvv_bfloat16mf4_t vbfloat16mf4_t; typedef __rvv_bfloat16mf4x2_t vbfloat16mf4x2_t; typedef __rvv_bfloat16mf4x3_t vbfloat16mf4x3_t; typedef __rvv_bfloat16mf4x4_t vbfloat16mf4x4_t; typedef __rvv_bfloat16mf4x5_t vbfloat16mf4x5_t; typedef __rvv_bfloat16mf4x6_t vbfloat16mf4x6_t; typedef __rvv_bfloat16mf4x7_t vbfloat16mf4x7_t; typedef __rvv_bfloat16mf4x8_t vbfloat16mf4x8_t; typedef __rvv_bfloat16mf2_t vbfloat16mf2_t; typedef __rvv_bfloat16mf2x2_t vbfloat16mf2x2_t; typedef __rvv_bfloat16mf2x3_t vbfloat16mf2x3_t; typedef __rvv_bfloat16mf2x4_t vbfloat16mf2x4_t; typedef __rvv_bfloat16mf2x5_t vbfloat16mf2x5_t; typedef __rvv_bfloat16mf2x6_t vbfloat16mf2x6_t; typedef __rvv_bfloat16mf2x7_t vbfloat16mf2x7_t; typedef __rvv_bfloat16mf2x8_t vbfloat16mf2x8_t; typedef __rvv_bfloat16m1_t vbfloat16m1_t; typedef __rvv_bfloat16m1x2_t vbfloat16m1x2_t; typedef __rvv_bfloat16m1x3_t vbfloat16m1x3_t; typedef __rvv_bfloat16m1x4_t vbfloat16m1x4_t; typedef __rvv_bfloat16m1x5_t vbfloat16m1x5_t; typedef __rvv_bfloat16m1x6_t vbfloat16m1x6_t; typedef __rvv_bfloat16m1x7_t vbfloat16m1x7_t; typedef __rvv_bfloat16m1x8_t vbfloat16m1x8_t; typedef __rvv_bfloat16m2_t vbfloat16m2_t; typedef __rvv_bfloat16m2x2_t vbfloat16m2x2_t; typedef __rvv_bfloat16m2x3_t vbfloat16m2x3_t; typedef __rvv_bfloat16m2x4_t vbfloat16m2x4_t; typedef __rvv_bfloat16m4_t vbfloat16m4_t; typedef __rvv_bfloat16m4x2_t vbfloat16m4x2_t; typedef __rvv_bfloat16m8_t vbfloat16m8_t; #define __riscv_v_intrinsic_overloading 1 #ifdef __cplusplus } #endif // __cplusplus #endif // __RISCV_VECTOR_H /*===---- smmintrin.h - SSE4 intrinsics ------------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __SMMINTRIN_H #define __SMMINTRIN_H #if !defined(__i386__) && !defined(__x86_64__) #error "This header is only meant to be used on x86 and x64 architecture" #endif #include /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, \ __target__("sse4.1,no-evex512"), __min_vector_width__(128))) /* SSE4 Rounding macros. */ #define _MM_FROUND_TO_NEAREST_INT 0x00 #define _MM_FROUND_TO_NEG_INF 0x01 #define _MM_FROUND_TO_POS_INF 0x02 #define _MM_FROUND_TO_ZERO 0x03 #define _MM_FROUND_CUR_DIRECTION 0x04 #define _MM_FROUND_RAISE_EXC 0x00 #define _MM_FROUND_NO_EXC 0x08 #define _MM_FROUND_NINT (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_NEAREST_INT) #define _MM_FROUND_FLOOR (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_NEG_INF) #define _MM_FROUND_CEIL (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_POS_INF) #define _MM_FROUND_TRUNC (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_ZERO) #define _MM_FROUND_RINT (_MM_FROUND_RAISE_EXC | _MM_FROUND_CUR_DIRECTION) #define _MM_FROUND_NEARBYINT (_MM_FROUND_NO_EXC | _MM_FROUND_CUR_DIRECTION) /// Rounds up each element of the 128-bit vector of [4 x float] to an /// integer and returns the rounded values in a 128-bit vector of /// [4 x float]. /// /// \headerfile /// /// \code /// __m128 _mm_ceil_ps(__m128 X); /// \endcode /// /// This intrinsic corresponds to the VROUNDPS / ROUNDPS instruction. /// /// \param X /// A 128-bit vector of [4 x float] values to be rounded up. /// \returns A 128-bit vector of [4 x float] containing the rounded values. #define _mm_ceil_ps(X) _mm_round_ps((X), _MM_FROUND_CEIL) /// Rounds up each element of the 128-bit vector of [2 x double] to an /// integer and returns the rounded values in a 128-bit vector of /// [2 x double]. /// /// \headerfile /// /// \code /// __m128d _mm_ceil_pd(__m128d X); /// \endcode /// /// This intrinsic corresponds to the VROUNDPD / ROUNDPD instruction. /// /// \param X /// A 128-bit vector of [2 x double] values to be rounded up. /// \returns A 128-bit vector of [2 x double] containing the rounded values. #define _mm_ceil_pd(X) _mm_round_pd((X), _MM_FROUND_CEIL) /// Copies three upper elements of the first 128-bit vector operand to /// the corresponding three upper elements of the 128-bit result vector of /// [4 x float]. Rounds up the lowest element of the second 128-bit vector /// operand to an integer and copies it to the lowest element of the 128-bit /// result vector of [4 x float]. /// /// \headerfile /// /// \code /// __m128 _mm_ceil_ss(__m128 X, __m128 Y); /// \endcode /// /// This intrinsic corresponds to the VROUNDSS / ROUNDSS instruction. /// /// \param X /// A 128-bit vector of [4 x float]. The values stored in bits [127:32] are /// copied to the corresponding bits of the result. /// \param Y /// A 128-bit vector of [4 x float]. The value stored in bits [31:0] is /// rounded up to the nearest integer and copied to the corresponding bits /// of the result. /// \returns A 128-bit vector of [4 x float] containing the copied and rounded /// values. #define _mm_ceil_ss(X, Y) _mm_round_ss((X), (Y), _MM_FROUND_CEIL) /// Copies the upper element of the first 128-bit vector operand to the /// corresponding upper element of the 128-bit result vector of [2 x double]. /// Rounds up the lower element of the second 128-bit vector operand to an /// integer and copies it to the lower element of the 128-bit result vector /// of [2 x double]. /// /// \headerfile /// /// \code /// __m128d _mm_ceil_sd(__m128d X, __m128d Y); /// \endcode /// /// This intrinsic corresponds to the VROUNDSD / ROUNDSD instruction. /// /// \param X /// A 128-bit vector of [2 x double]. The value stored in bits [127:64] is /// copied to the corresponding bits of the result. /// \param Y /// A 128-bit vector of [2 x double]. The value stored in bits [63:0] is /// rounded up to the nearest integer and copied to the corresponding bits /// of the result. /// \returns A 128-bit vector of [2 x double] containing the copied and rounded /// values. #define _mm_ceil_sd(X, Y) _mm_round_sd((X), (Y), _MM_FROUND_CEIL) /// Rounds down each element of the 128-bit vector of [4 x float] to an /// an integer and returns the rounded values in a 128-bit vector of /// [4 x float]. /// /// \headerfile /// /// \code /// __m128 _mm_floor_ps(__m128 X); /// \endcode /// /// This intrinsic corresponds to the VROUNDPS / ROUNDPS instruction. /// /// \param X /// A 128-bit vector of [4 x float] values to be rounded down. /// \returns A 128-bit vector of [4 x float] containing the rounded values. #define _mm_floor_ps(X) _mm_round_ps((X), _MM_FROUND_FLOOR) /// Rounds down each element of the 128-bit vector of [2 x double] to an /// integer and returns the rounded values in a 128-bit vector of /// [2 x double]. /// /// \headerfile /// /// \code /// __m128d _mm_floor_pd(__m128d X); /// \endcode /// /// This intrinsic corresponds to the VROUNDPD / ROUNDPD instruction. /// /// \param X /// A 128-bit vector of [2 x double]. /// \returns A 128-bit vector of [2 x double] containing the rounded values. #define _mm_floor_pd(X) _mm_round_pd((X), _MM_FROUND_FLOOR) /// Copies three upper elements of the first 128-bit vector operand to /// the corresponding three upper elements of the 128-bit result vector of /// [4 x float]. Rounds down the lowest element of the second 128-bit vector /// operand to an integer and copies it to the lowest element of the 128-bit /// result vector of [4 x float]. /// /// \headerfile /// /// \code /// __m128 _mm_floor_ss(__m128 X, __m128 Y); /// \endcode /// /// This intrinsic corresponds to the VROUNDSS / ROUNDSS instruction. /// /// \param X /// A 128-bit vector of [4 x float]. The values stored in bits [127:32] are /// copied to the corresponding bits of the result. /// \param Y /// A 128-bit vector of [4 x float]. The value stored in bits [31:0] is /// rounded down to the nearest integer and copied to the corresponding bits /// of the result. /// \returns A 128-bit vector of [4 x float] containing the copied and rounded /// values. #define _mm_floor_ss(X, Y) _mm_round_ss((X), (Y), _MM_FROUND_FLOOR) /// Copies the upper element of the first 128-bit vector operand to the /// corresponding upper element of the 128-bit result vector of [2 x double]. /// Rounds down the lower element of the second 128-bit vector operand to an /// integer and copies it to the lower element of the 128-bit result vector /// of [2 x double]. /// /// \headerfile /// /// \code /// __m128d _mm_floor_sd(__m128d X, __m128d Y); /// \endcode /// /// This intrinsic corresponds to the VROUNDSD / ROUNDSD instruction. /// /// \param X /// A 128-bit vector of [2 x double]. The value stored in bits [127:64] is /// copied to the corresponding bits of the result. /// \param Y /// A 128-bit vector of [2 x double]. The value stored in bits [63:0] is /// rounded down to the nearest integer and copied to the corresponding bits /// of the result. /// \returns A 128-bit vector of [2 x double] containing the copied and rounded /// values. #define _mm_floor_sd(X, Y) _mm_round_sd((X), (Y), _MM_FROUND_FLOOR) /// Rounds each element of the 128-bit vector of [4 x float] to an /// integer value according to the rounding control specified by the second /// argument and returns the rounded values in a 128-bit vector of /// [4 x float]. /// /// \headerfile /// /// \code /// __m128 _mm_round_ps(__m128 X, const int M); /// \endcode /// /// This intrinsic corresponds to the VROUNDPS / ROUNDPS instruction. /// /// \param X /// A 128-bit vector of [4 x float]. /// \param M /// An integer value that specifies the rounding operation. \n /// Bits [7:4] are reserved. \n /// Bit [3] is a precision exception value: \n /// 0: A normal PE exception is used \n /// 1: The PE field is not updated \n /// Bit [2] is the rounding control source: \n /// 0: Use bits [1:0] of \a M \n /// 1: Use the current MXCSR setting \n /// Bits [1:0] contain the rounding control definition: \n /// 00: Nearest \n /// 01: Downward (toward negative infinity) \n /// 10: Upward (toward positive infinity) \n /// 11: Truncated /// \returns A 128-bit vector of [4 x float] containing the rounded values. #define _mm_round_ps(X, M) \ ((__m128)__builtin_ia32_roundps((__v4sf)(__m128)(X), (M))) /// Copies three upper elements of the first 128-bit vector operand to /// the corresponding three upper elements of the 128-bit result vector of /// [4 x float]. Rounds the lowest element of the second 128-bit vector /// operand to an integer value according to the rounding control specified /// by the third argument and copies it to the lowest element of the 128-bit /// result vector of [4 x float]. /// /// \headerfile /// /// \code /// __m128 _mm_round_ss(__m128 X, __m128 Y, const int M); /// \endcode /// /// This intrinsic corresponds to the VROUNDSS / ROUNDSS instruction. /// /// \param X /// A 128-bit vector of [4 x float]. The values stored in bits [127:32] are /// copied to the corresponding bits of the result. /// \param Y /// A 128-bit vector of [4 x float]. The value stored in bits [31:0] is /// rounded to the nearest integer using the specified rounding control and /// copied to the corresponding bits of the result. /// \param M /// An integer value that specifies the rounding operation. \n /// Bits [7:4] are reserved. \n /// Bit [3] is a precision exception value: \n /// 0: A normal PE exception is used \n /// 1: The PE field is not updated \n /// Bit [2] is the rounding control source: \n /// 0: Use bits [1:0] of \a M \n /// 1: Use the current MXCSR setting \n /// Bits [1:0] contain the rounding control definition: \n /// 00: Nearest \n /// 01: Downward (toward negative infinity) \n /// 10: Upward (toward positive infinity) \n /// 11: Truncated /// \returns A 128-bit vector of [4 x float] containing the copied and rounded /// values. #define _mm_round_ss(X, Y, M) \ ((__m128)__builtin_ia32_roundss((__v4sf)(__m128)(X), (__v4sf)(__m128)(Y), \ (M))) /// Rounds each element of the 128-bit vector of [2 x double] to an /// integer value according to the rounding control specified by the second /// argument and returns the rounded values in a 128-bit vector of /// [2 x double]. /// /// \headerfile /// /// \code /// __m128d _mm_round_pd(__m128d X, const int M); /// \endcode /// /// This intrinsic corresponds to the VROUNDPD / ROUNDPD instruction. /// /// \param X /// A 128-bit vector of [2 x double]. /// \param M /// An integer value that specifies the rounding operation. \n /// Bits [7:4] are reserved. \n /// Bit [3] is a precision exception value: \n /// 0: A normal PE exception is used \n /// 1: The PE field is not updated \n /// Bit [2] is the rounding control source: \n /// 0: Use bits [1:0] of \a M \n /// 1: Use the current MXCSR setting \n /// Bits [1:0] contain the rounding control definition: \n /// 00: Nearest \n /// 01: Downward (toward negative infinity) \n /// 10: Upward (toward positive infinity) \n /// 11: Truncated /// \returns A 128-bit vector of [2 x double] containing the rounded values. #define _mm_round_pd(X, M) \ ((__m128d)__builtin_ia32_roundpd((__v2df)(__m128d)(X), (M))) /// Copies the upper element of the first 128-bit vector operand to the /// corresponding upper element of the 128-bit result vector of [2 x double]. /// Rounds the lower element of the second 128-bit vector operand to an /// integer value according to the rounding control specified by the third /// argument and copies it to the lower element of the 128-bit result vector /// of [2 x double]. /// /// \headerfile /// /// \code /// __m128d _mm_round_sd(__m128d X, __m128d Y, const int M); /// \endcode /// /// This intrinsic corresponds to the VROUNDSD / ROUNDSD instruction. /// /// \param X /// A 128-bit vector of [2 x double]. The value stored in bits [127:64] is /// copied to the corresponding bits of the result. /// \param Y /// A 128-bit vector of [2 x double]. The value stored in bits [63:0] is /// rounded to the nearest integer using the specified rounding control and /// copied to the corresponding bits of the result. /// \param M /// An integer value that specifies the rounding operation. \n /// Bits [7:4] are reserved. \n /// Bit [3] is a precision exception value: \n /// 0: A normal PE exception is used \n /// 1: The PE field is not updated \n /// Bit [2] is the rounding control source: \n /// 0: Use bits [1:0] of \a M \n /// 1: Use the current MXCSR setting \n /// Bits [1:0] contain the rounding control definition: \n /// 00: Nearest \n /// 01: Downward (toward negative infinity) \n /// 10: Upward (toward positive infinity) \n /// 11: Truncated /// \returns A 128-bit vector of [2 x double] containing the copied and rounded /// values. #define _mm_round_sd(X, Y, M) \ ((__m128d)__builtin_ia32_roundsd((__v2df)(__m128d)(X), (__v2df)(__m128d)(Y), \ (M))) /* SSE4 Packed Blending Intrinsics. */ /// Returns a 128-bit vector of [2 x double] where the values are /// selected from either the first or second operand as specified by the /// third operand, the control mask. /// /// \headerfile /// /// \code /// __m128d _mm_blend_pd(__m128d V1, __m128d V2, const int M); /// \endcode /// /// This intrinsic corresponds to the VBLENDPD / BLENDPD instruction. /// /// \param V1 /// A 128-bit vector of [2 x double]. /// \param V2 /// A 128-bit vector of [2 x double]. /// \param M /// An immediate integer operand, with mask bits [1:0] specifying how the /// values are to be copied. The position of the mask bit corresponds to the /// index of a copied value. When a mask bit is 0, the corresponding 64-bit /// element in operand \a V1 is copied to the same position in the result. /// When a mask bit is 1, the corresponding 64-bit element in operand \a V2 /// is copied to the same position in the result. /// \returns A 128-bit vector of [2 x double] containing the copied values. #define _mm_blend_pd(V1, V2, M) \ ((__m128d)__builtin_ia32_blendpd((__v2df)(__m128d)(V1), \ (__v2df)(__m128d)(V2), (int)(M))) /// Returns a 128-bit vector of [4 x float] where the values are selected /// from either the first or second operand as specified by the third /// operand, the control mask. /// /// \headerfile /// /// \code /// __m128 _mm_blend_ps(__m128 V1, __m128 V2, const int M); /// \endcode /// /// This intrinsic corresponds to the VBLENDPS / BLENDPS instruction. /// /// \param V1 /// A 128-bit vector of [4 x float]. /// \param V2 /// A 128-bit vector of [4 x float]. /// \param M /// An immediate integer operand, with mask bits [3:0] specifying how the /// values are to be copied. The position of the mask bit corresponds to the /// index of a copied value. When a mask bit is 0, the corresponding 32-bit /// element in operand \a V1 is copied to the same position in the result. /// When a mask bit is 1, the corresponding 32-bit element in operand \a V2 /// is copied to the same position in the result. /// \returns A 128-bit vector of [4 x float] containing the copied values. #define _mm_blend_ps(V1, V2, M) \ ((__m128)__builtin_ia32_blendps((__v4sf)(__m128)(V1), (__v4sf)(__m128)(V2), \ (int)(M))) /// Returns a 128-bit vector of [2 x double] where the values are /// selected from either the first or second operand as specified by the /// third operand, the control mask. /// /// \headerfile /// /// This intrinsic corresponds to the VBLENDVPD / BLENDVPD instruction. /// /// \param __V1 /// A 128-bit vector of [2 x double]. /// \param __V2 /// A 128-bit vector of [2 x double]. /// \param __M /// A 128-bit vector operand, with mask bits 127 and 63 specifying how the /// values are to be copied. The position of the mask bit corresponds to the /// most significant bit of a copied value. When a mask bit is 0, the /// corresponding 64-bit element in operand \a __V1 is copied to the same /// position in the result. When a mask bit is 1, the corresponding 64-bit /// element in operand \a __V2 is copied to the same position in the result. /// \returns A 128-bit vector of [2 x double] containing the copied values. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_blendv_pd(__m128d __V1, __m128d __V2, __m128d __M) { return (__m128d)__builtin_ia32_blendvpd((__v2df)__V1, (__v2df)__V2, (__v2df)__M); } /// Returns a 128-bit vector of [4 x float] where the values are /// selected from either the first or second operand as specified by the /// third operand, the control mask. /// /// \headerfile /// /// This intrinsic corresponds to the VBLENDVPS / BLENDVPS instruction. /// /// \param __V1 /// A 128-bit vector of [4 x float]. /// \param __V2 /// A 128-bit vector of [4 x float]. /// \param __M /// A 128-bit vector operand, with mask bits 127, 95, 63, and 31 specifying /// how the values are to be copied. The position of the mask bit corresponds /// to the most significant bit of a copied value. When a mask bit is 0, the /// corresponding 32-bit element in operand \a __V1 is copied to the same /// position in the result. When a mask bit is 1, the corresponding 32-bit /// element in operand \a __V2 is copied to the same position in the result. /// \returns A 128-bit vector of [4 x float] containing the copied values. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_blendv_ps(__m128 __V1, __m128 __V2, __m128 __M) { return (__m128)__builtin_ia32_blendvps((__v4sf)__V1, (__v4sf)__V2, (__v4sf)__M); } /// Returns a 128-bit vector of [16 x i8] where the values are selected /// from either of the first or second operand as specified by the third /// operand, the control mask. /// /// \headerfile /// /// This intrinsic corresponds to the VPBLENDVB / PBLENDVB instruction. /// /// \param __V1 /// A 128-bit vector of [16 x i8]. /// \param __V2 /// A 128-bit vector of [16 x i8]. /// \param __M /// A 128-bit vector operand, with mask bits 127, 119, 111...7 specifying /// how the values are to be copied. The position of the mask bit corresponds /// to the most significant bit of a copied value. When a mask bit is 0, the /// corresponding 8-bit element in operand \a __V1 is copied to the same /// position in the result. When a mask bit is 1, the corresponding 8-bit /// element in operand \a __V2 is copied to the same position in the result. /// \returns A 128-bit vector of [16 x i8] containing the copied values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_blendv_epi8(__m128i __V1, __m128i __V2, __m128i __M) { return (__m128i)__builtin_ia32_pblendvb128((__v16qi)__V1, (__v16qi)__V2, (__v16qi)__M); } /// Returns a 128-bit vector of [8 x i16] where the values are selected /// from either of the first or second operand as specified by the third /// operand, the control mask. /// /// \headerfile /// /// \code /// __m128i _mm_blend_epi16(__m128i V1, __m128i V2, const int M); /// \endcode /// /// This intrinsic corresponds to the VPBLENDW / PBLENDW instruction. /// /// \param V1 /// A 128-bit vector of [8 x i16]. /// \param V2 /// A 128-bit vector of [8 x i16]. /// \param M /// An immediate integer operand, with mask bits [7:0] specifying how the /// values are to be copied. The position of the mask bit corresponds to the /// index of a copied value. When a mask bit is 0, the corresponding 16-bit /// element in operand \a V1 is copied to the same position in the result. /// When a mask bit is 1, the corresponding 16-bit element in operand \a V2 /// is copied to the same position in the result. /// \returns A 128-bit vector of [8 x i16] containing the copied values. #define _mm_blend_epi16(V1, V2, M) \ ((__m128i)__builtin_ia32_pblendw128((__v8hi)(__m128i)(V1), \ (__v8hi)(__m128i)(V2), (int)(M))) /* SSE4 Dword Multiply Instructions. */ /// Multiples corresponding elements of two 128-bit vectors of [4 x i32] /// and returns the lower 32 bits of the each product in a 128-bit vector of /// [4 x i32]. /// /// \headerfile /// /// This intrinsic corresponds to the VPMULLD / PMULLD instruction. /// /// \param __V1 /// A 128-bit integer vector. /// \param __V2 /// A 128-bit integer vector. /// \returns A 128-bit integer vector containing the products of both operands. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mullo_epi32(__m128i __V1, __m128i __V2) { return (__m128i)((__v4su)__V1 * (__v4su)__V2); } /// Multiplies corresponding even-indexed elements of two 128-bit /// vectors of [4 x i32] and returns a 128-bit vector of [2 x i64] /// containing the products. /// /// \headerfile /// /// This intrinsic corresponds to the VPMULDQ / PMULDQ instruction. /// /// \param __V1 /// A 128-bit vector of [4 x i32]. /// \param __V2 /// A 128-bit vector of [4 x i32]. /// \returns A 128-bit vector of [2 x i64] containing the products of both /// operands. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mul_epi32(__m128i __V1, __m128i __V2) { return (__m128i)__builtin_ia32_pmuldq128((__v4si)__V1, (__v4si)__V2); } /* SSE4 Floating Point Dot Product Instructions. */ /// Computes the dot product of the two 128-bit vectors of [4 x float] /// and returns it in the elements of the 128-bit result vector of /// [4 x float]. /// /// The immediate integer operand controls which input elements /// will contribute to the dot product, and where the final results are /// returned. /// /// \headerfile /// /// \code /// __m128 _mm_dp_ps(__m128 X, __m128 Y, const int M); /// \endcode /// /// This intrinsic corresponds to the VDPPS / DPPS instruction. /// /// \param X /// A 128-bit vector of [4 x float]. /// \param Y /// A 128-bit vector of [4 x float]. /// \param M /// An immediate integer operand. Mask bits [7:4] determine which elements /// of the input vectors are used, with bit [4] corresponding to the lowest /// element and bit [7] corresponding to the highest element of each [4 x /// float] vector. If a bit is set, the corresponding elements from the two /// input vectors are used as an input for dot product; otherwise that input /// is treated as zero. Bits [3:0] determine which elements of the result /// will receive a copy of the final dot product, with bit [0] corresponding /// to the lowest element and bit [3] corresponding to the highest element of /// each [4 x float] subvector. If a bit is set, the dot product is returned /// in the corresponding element; otherwise that element is set to zero. /// \returns A 128-bit vector of [4 x float] containing the dot product. #define _mm_dp_ps(X, Y, M) \ ((__m128)__builtin_ia32_dpps((__v4sf)(__m128)(X), (__v4sf)(__m128)(Y), (M))) /// Computes the dot product of the two 128-bit vectors of [2 x double] /// and returns it in the elements of the 128-bit result vector of /// [2 x double]. /// /// The immediate integer operand controls which input /// elements will contribute to the dot product, and where the final results /// are returned. /// /// \headerfile /// /// \code /// __m128d _mm_dp_pd(__m128d X, __m128d Y, const int M); /// \endcode /// /// This intrinsic corresponds to the VDPPD / DPPD instruction. /// /// \param X /// A 128-bit vector of [2 x double]. /// \param Y /// A 128-bit vector of [2 x double]. /// \param M /// An immediate integer operand. Mask bits [5:4] determine which elements /// of the input vectors are used, with bit [4] corresponding to the lowest /// element and bit [5] corresponding to the highest element of each of [2 x /// double] vector. If a bit is set, the corresponding elements from the two /// input vectors are used as an input for dot product; otherwise that input /// is treated as zero. Bits [1:0] determine which elements of the result /// will receive a copy of the final dot product, with bit [0] corresponding /// to the lowest element and bit [1] corresponding to the highest element of /// each [2 x double] vector. If a bit is set, the dot product is returned in /// the corresponding element; otherwise that element is set to zero. #define _mm_dp_pd(X, Y, M) \ ((__m128d)__builtin_ia32_dppd((__v2df)(__m128d)(X), (__v2df)(__m128d)(Y), \ (M))) /* SSE4 Streaming Load Hint Instruction. */ /// Loads integer values from a 128-bit aligned memory location to a /// 128-bit integer vector. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVNTDQA / MOVNTDQA instruction. /// /// \param __V /// A pointer to a 128-bit aligned memory location that contains the integer /// values. /// \returns A 128-bit integer vector containing the data stored at the /// specified memory location. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_stream_load_si128(const void *__V) { return (__m128i)__builtin_nontemporal_load((const __v2di *)__V); } /* SSE4 Packed Integer Min/Max Instructions. */ /// Compares the corresponding elements of two 128-bit vectors of /// [16 x i8] and returns a 128-bit vector of [16 x i8] containing the lesser /// of the two values. /// /// \headerfile /// /// This intrinsic corresponds to the VPMINSB / PMINSB instruction. /// /// \param __V1 /// A 128-bit vector of [16 x i8]. /// \param __V2 /// A 128-bit vector of [16 x i8] /// \returns A 128-bit vector of [16 x i8] containing the lesser values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi8(__m128i __V1, __m128i __V2) { return (__m128i)__builtin_elementwise_min((__v16qs)__V1, (__v16qs)__V2); } /// Compares the corresponding elements of two 128-bit vectors of /// [16 x i8] and returns a 128-bit vector of [16 x i8] containing the /// greater value of the two. /// /// \headerfile /// /// This intrinsic corresponds to the VPMAXSB / PMAXSB instruction. /// /// \param __V1 /// A 128-bit vector of [16 x i8]. /// \param __V2 /// A 128-bit vector of [16 x i8]. /// \returns A 128-bit vector of [16 x i8] containing the greater values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi8(__m128i __V1, __m128i __V2) { return (__m128i)__builtin_elementwise_max((__v16qs)__V1, (__v16qs)__V2); } /// Compares the corresponding elements of two 128-bit vectors of /// [8 x u16] and returns a 128-bit vector of [8 x u16] containing the lesser /// value of the two. /// /// \headerfile /// /// This intrinsic corresponds to the VPMINUW / PMINUW instruction. /// /// \param __V1 /// A 128-bit vector of [8 x u16]. /// \param __V2 /// A 128-bit vector of [8 x u16]. /// \returns A 128-bit vector of [8 x u16] containing the lesser values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu16(__m128i __V1, __m128i __V2) { return (__m128i)__builtin_elementwise_min((__v8hu)__V1, (__v8hu)__V2); } /// Compares the corresponding elements of two 128-bit vectors of /// [8 x u16] and returns a 128-bit vector of [8 x u16] containing the /// greater value of the two. /// /// \headerfile /// /// This intrinsic corresponds to the VPMAXUW / PMAXUW instruction. /// /// \param __V1 /// A 128-bit vector of [8 x u16]. /// \param __V2 /// A 128-bit vector of [8 x u16]. /// \returns A 128-bit vector of [8 x u16] containing the greater values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epu16(__m128i __V1, __m128i __V2) { return (__m128i)__builtin_elementwise_max((__v8hu)__V1, (__v8hu)__V2); } /// Compares the corresponding elements of two 128-bit vectors of /// [4 x i32] and returns a 128-bit vector of [4 x i32] containing the lesser /// value of the two. /// /// \headerfile /// /// This intrinsic corresponds to the VPMINSD / PMINSD instruction. /// /// \param __V1 /// A 128-bit vector of [4 x i32]. /// \param __V2 /// A 128-bit vector of [4 x i32]. /// \returns A 128-bit vector of [4 x i32] containing the lesser values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi32(__m128i __V1, __m128i __V2) { return (__m128i)__builtin_elementwise_min((__v4si)__V1, (__v4si)__V2); } /// Compares the corresponding elements of two 128-bit vectors of /// [4 x i32] and returns a 128-bit vector of [4 x i32] containing the /// greater value of the two. /// /// \headerfile /// /// This intrinsic corresponds to the VPMAXSD / PMAXSD instruction. /// /// \param __V1 /// A 128-bit vector of [4 x i32]. /// \param __V2 /// A 128-bit vector of [4 x i32]. /// \returns A 128-bit vector of [4 x i32] containing the greater values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi32(__m128i __V1, __m128i __V2) { return (__m128i)__builtin_elementwise_max((__v4si)__V1, (__v4si)__V2); } /// Compares the corresponding elements of two 128-bit vectors of /// [4 x u32] and returns a 128-bit vector of [4 x u32] containing the lesser /// value of the two. /// /// \headerfile /// /// This intrinsic corresponds to the VPMINUD / PMINUD instruction. /// /// \param __V1 /// A 128-bit vector of [4 x u32]. /// \param __V2 /// A 128-bit vector of [4 x u32]. /// \returns A 128-bit vector of [4 x u32] containing the lesser values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu32(__m128i __V1, __m128i __V2) { return (__m128i)__builtin_elementwise_min((__v4su)__V1, (__v4su)__V2); } /// Compares the corresponding elements of two 128-bit vectors of /// [4 x u32] and returns a 128-bit vector of [4 x u32] containing the /// greater value of the two. /// /// \headerfile /// /// This intrinsic corresponds to the VPMAXUD / PMAXUD instruction. /// /// \param __V1 /// A 128-bit vector of [4 x u32]. /// \param __V2 /// A 128-bit vector of [4 x u32]. /// \returns A 128-bit vector of [4 x u32] containing the greater values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epu32(__m128i __V1, __m128i __V2) { return (__m128i)__builtin_elementwise_max((__v4su)__V1, (__v4su)__V2); } /* SSE4 Insertion and Extraction from XMM Register Instructions. */ /// Takes the first argument \a X and inserts an element from the second /// argument \a Y as selected by the third argument \a N. That result then /// has elements zeroed out also as selected by the third argument \a N. The /// resulting 128-bit vector of [4 x float] is then returned. /// /// \headerfile /// /// \code /// __m128 _mm_insert_ps(__m128 X, __m128 Y, const int N); /// \endcode /// /// This intrinsic corresponds to the VINSERTPS instruction. /// /// \param X /// A 128-bit vector source operand of [4 x float]. With the exception of /// those bits in the result copied from parameter \a Y and zeroed by bits /// [3:0] of \a N, all bits from this parameter are copied to the result. /// \param Y /// A 128-bit vector source operand of [4 x float]. One single-precision /// floating-point element from this source, as determined by the immediate /// parameter, is copied to the result. /// \param N /// Specifies which bits from operand \a Y will be copied, which bits in the /// result they will be copied to, and which bits in the result will be /// cleared. The following assignments are made: \n /// Bits [7:6] specify the bits to copy from operand \a Y: \n /// 00: Selects bits [31:0] from operand \a Y. \n /// 01: Selects bits [63:32] from operand \a Y. \n /// 10: Selects bits [95:64] from operand \a Y. \n /// 11: Selects bits [127:96] from operand \a Y. \n /// Bits [5:4] specify the bits in the result to which the selected bits /// from operand \a Y are copied: \n /// 00: Copies the selected bits from \a Y to result bits [31:0]. \n /// 01: Copies the selected bits from \a Y to result bits [63:32]. \n /// 10: Copies the selected bits from \a Y to result bits [95:64]. \n /// 11: Copies the selected bits from \a Y to result bits [127:96]. \n /// Bits[3:0]: If any of these bits are set, the corresponding result /// element is cleared. /// \returns A 128-bit vector of [4 x float] containing the copied /// single-precision floating point elements from the operands. #define _mm_insert_ps(X, Y, N) __builtin_ia32_insertps128((X), (Y), (N)) /// Extracts a 32-bit integer from a 128-bit vector of [4 x float] and /// returns it, using the immediate value parameter \a N as a selector. /// /// \headerfile /// /// \code /// int _mm_extract_ps(__m128 X, const int N); /// \endcode /// /// This intrinsic corresponds to the VEXTRACTPS / EXTRACTPS /// instruction. /// /// \param X /// A 128-bit vector of [4 x float]. /// \param N /// An immediate value. Bits [1:0] determines which bits from the argument /// \a X are extracted and returned: \n /// 00: Bits [31:0] of parameter \a X are returned. \n /// 01: Bits [63:32] of parameter \a X are returned. \n /// 10: Bits [95:64] of parameter \a X are returned. \n /// 11: Bits [127:96] of parameter \a X are returned. /// \returns A 32-bit integer containing the extracted 32 bits of float data. #define _mm_extract_ps(X, N) \ __builtin_bit_cast( \ int, __builtin_ia32_vec_ext_v4sf((__v4sf)(__m128)(X), (int)(N))) /* Miscellaneous insert and extract macros. */ /* Extract a single-precision float from X at index N into D. */ #define _MM_EXTRACT_FLOAT(D, X, N) \ do { \ (D) = __builtin_ia32_vec_ext_v4sf((__v4sf)(__m128)(X), (int)(N)); \ } while (0) /* Or together 2 sets of indexes (X and Y) with the zeroing bits (Z) to create an index suitable for _mm_insert_ps. */ #define _MM_MK_INSERTPS_NDX(X, Y, Z) (((X) << 6) | ((Y) << 4) | (Z)) /* Extract a float from X at index N into the first index of the return. */ #define _MM_PICK_OUT_PS(X, N) \ _mm_insert_ps(_mm_setzero_ps(), (X), _MM_MK_INSERTPS_NDX((N), 0, 0x0e)) /* Insert int into packed integer array at index. */ /// Constructs a 128-bit vector of [16 x i8] by first making a copy of /// the 128-bit integer vector parameter, and then inserting the lower 8 bits /// of an integer parameter \a I into an offset specified by the immediate /// value parameter \a N. /// /// \headerfile /// /// \code /// __m128i _mm_insert_epi8(__m128i X, int I, const int N); /// \endcode /// /// This intrinsic corresponds to the VPINSRB / PINSRB instruction. /// /// \param X /// A 128-bit integer vector of [16 x i8]. This vector is copied to the /// result and then one of the sixteen elements in the result vector is /// replaced by the lower 8 bits of \a I. /// \param I /// An integer. The lower 8 bits of this operand are written to the result /// beginning at the offset specified by \a N. /// \param N /// An immediate value. Bits [3:0] specify the bit offset in the result at /// which the lower 8 bits of \a I are written. \n /// 0000: Bits [7:0] of the result are used for insertion. \n /// 0001: Bits [15:8] of the result are used for insertion. \n /// 0010: Bits [23:16] of the result are used for insertion. \n /// 0011: Bits [31:24] of the result are used for insertion. \n /// 0100: Bits [39:32] of the result are used for insertion. \n /// 0101: Bits [47:40] of the result are used for insertion. \n /// 0110: Bits [55:48] of the result are used for insertion. \n /// 0111: Bits [63:56] of the result are used for insertion. \n /// 1000: Bits [71:64] of the result are used for insertion. \n /// 1001: Bits [79:72] of the result are used for insertion. \n /// 1010: Bits [87:80] of the result are used for insertion. \n /// 1011: Bits [95:88] of the result are used for insertion. \n /// 1100: Bits [103:96] of the result are used for insertion. \n /// 1101: Bits [111:104] of the result are used for insertion. \n /// 1110: Bits [119:112] of the result are used for insertion. \n /// 1111: Bits [127:120] of the result are used for insertion. /// \returns A 128-bit integer vector containing the constructed values. #define _mm_insert_epi8(X, I, N) \ ((__m128i)__builtin_ia32_vec_set_v16qi((__v16qi)(__m128i)(X), (int)(I), \ (int)(N))) /// Constructs a 128-bit vector of [4 x i32] by first making a copy of /// the 128-bit integer vector parameter, and then inserting the 32-bit /// integer parameter \a I at the offset specified by the immediate value /// parameter \a N. /// /// \headerfile /// /// \code /// __m128i _mm_insert_epi32(__m128i X, int I, const int N); /// \endcode /// /// This intrinsic corresponds to the VPINSRD / PINSRD instruction. /// /// \param X /// A 128-bit integer vector of [4 x i32]. This vector is copied to the /// result and then one of the four elements in the result vector is /// replaced by \a I. /// \param I /// A 32-bit integer that is written to the result beginning at the offset /// specified by \a N. /// \param N /// An immediate value. Bits [1:0] specify the bit offset in the result at /// which the integer \a I is written. \n /// 00: Bits [31:0] of the result are used for insertion. \n /// 01: Bits [63:32] of the result are used for insertion. \n /// 10: Bits [95:64] of the result are used for insertion. \n /// 11: Bits [127:96] of the result are used for insertion. /// \returns A 128-bit integer vector containing the constructed values. #define _mm_insert_epi32(X, I, N) \ ((__m128i)__builtin_ia32_vec_set_v4si((__v4si)(__m128i)(X), (int)(I), \ (int)(N))) #ifdef __x86_64__ /// Constructs a 128-bit vector of [2 x i64] by first making a copy of /// the 128-bit integer vector parameter, and then inserting the 64-bit /// integer parameter \a I, using the immediate value parameter \a N as an /// insertion location selector. /// /// \headerfile /// /// \code /// __m128i _mm_insert_epi64(__m128i X, long long I, const int N); /// \endcode /// /// This intrinsic corresponds to the VPINSRQ / PINSRQ instruction. /// /// \param X /// A 128-bit integer vector of [2 x i64]. This vector is copied to the /// result and then one of the two elements in the result vector is replaced /// by \a I. /// \param I /// A 64-bit integer that is written to the result beginning at the offset /// specified by \a N. /// \param N /// An immediate value. Bit [0] specifies the bit offset in the result at /// which the integer \a I is written. \n /// 0: Bits [63:0] of the result are used for insertion. \n /// 1: Bits [127:64] of the result are used for insertion. \n /// \returns A 128-bit integer vector containing the constructed values. #define _mm_insert_epi64(X, I, N) \ ((__m128i)__builtin_ia32_vec_set_v2di((__v2di)(__m128i)(X), (long long)(I), \ (int)(N))) #endif /* __x86_64__ */ /* Extract int from packed integer array at index. This returns the element * as a zero extended value, so it is unsigned. */ /// Extracts an 8-bit element from the 128-bit integer vector of /// [16 x i8], using the immediate value parameter \a N as a selector. /// /// \headerfile /// /// \code /// int _mm_extract_epi8(__m128i X, const int N); /// \endcode /// /// This intrinsic corresponds to the VPEXTRB / PEXTRB instruction. /// /// \param X /// A 128-bit integer vector. /// \param N /// An immediate value. Bits [3:0] specify which 8-bit vector element from /// the argument \a X to extract and copy to the result. \n /// 0000: Bits [7:0] of parameter \a X are extracted. \n /// 0001: Bits [15:8] of the parameter \a X are extracted. \n /// 0010: Bits [23:16] of the parameter \a X are extracted. \n /// 0011: Bits [31:24] of the parameter \a X are extracted. \n /// 0100: Bits [39:32] of the parameter \a X are extracted. \n /// 0101: Bits [47:40] of the parameter \a X are extracted. \n /// 0110: Bits [55:48] of the parameter \a X are extracted. \n /// 0111: Bits [63:56] of the parameter \a X are extracted. \n /// 1000: Bits [71:64] of the parameter \a X are extracted. \n /// 1001: Bits [79:72] of the parameter \a X are extracted. \n /// 1010: Bits [87:80] of the parameter \a X are extracted. \n /// 1011: Bits [95:88] of the parameter \a X are extracted. \n /// 1100: Bits [103:96] of the parameter \a X are extracted. \n /// 1101: Bits [111:104] of the parameter \a X are extracted. \n /// 1110: Bits [119:112] of the parameter \a X are extracted. \n /// 1111: Bits [127:120] of the parameter \a X are extracted. /// \returns An unsigned integer, whose lower 8 bits are selected from the /// 128-bit integer vector parameter and the remaining bits are assigned /// zeros. #define _mm_extract_epi8(X, N) \ ((int)(unsigned char)__builtin_ia32_vec_ext_v16qi((__v16qi)(__m128i)(X), \ (int)(N))) /// Extracts a 32-bit element from the 128-bit integer vector of /// [4 x i32], using the immediate value parameter \a N as a selector. /// /// \headerfile /// /// \code /// int _mm_extract_epi32(__m128i X, const int N); /// \endcode /// /// This intrinsic corresponds to the VPEXTRD / PEXTRD instruction. /// /// \param X /// A 128-bit integer vector. /// \param N /// An immediate value. Bits [1:0] specify which 32-bit vector element from /// the argument \a X to extract and copy to the result. \n /// 00: Bits [31:0] of the parameter \a X are extracted. \n /// 01: Bits [63:32] of the parameter \a X are extracted. \n /// 10: Bits [95:64] of the parameter \a X are extracted. \n /// 11: Bits [127:96] of the parameter \a X are exracted. /// \returns An integer, whose lower 32 bits are selected from the 128-bit /// integer vector parameter and the remaining bits are assigned zeros. #define _mm_extract_epi32(X, N) \ ((int)__builtin_ia32_vec_ext_v4si((__v4si)(__m128i)(X), (int)(N))) /// Extracts a 64-bit element from the 128-bit integer vector of /// [2 x i64], using the immediate value parameter \a N as a selector. /// /// \headerfile /// /// \code /// long long _mm_extract_epi64(__m128i X, const int N); /// \endcode /// /// This intrinsic corresponds to the VPEXTRQ / PEXTRQ instruction /// in 64-bit mode. /// /// \param X /// A 128-bit integer vector. /// \param N /// An immediate value. Bit [0] specifies which 64-bit vector element from /// the argument \a X to return. \n /// 0: Bits [63:0] are returned. \n /// 1: Bits [127:64] are returned. \n /// \returns A 64-bit integer. #define _mm_extract_epi64(X, N) \ ((long long)__builtin_ia32_vec_ext_v2di((__v2di)(__m128i)(X), (int)(N))) /* SSE4 128-bit Packed Integer Comparisons. */ /// Tests whether the specified bits in a 128-bit integer vector are all /// zeros. /// /// \headerfile /// /// This intrinsic corresponds to the VPTEST / PTEST instruction. /// /// \param __M /// A 128-bit integer vector containing the bits to be tested. /// \param __V /// A 128-bit integer vector selecting which bits to test in operand \a __M. /// \returns TRUE if the specified bits are all zeros; FALSE otherwise. static __inline__ int __DEFAULT_FN_ATTRS _mm_testz_si128(__m128i __M, __m128i __V) { return __builtin_ia32_ptestz128((__v2di)__M, (__v2di)__V); } /// Tests whether the specified bits in a 128-bit integer vector are all /// ones. /// /// \headerfile /// /// This intrinsic corresponds to the VPTEST / PTEST instruction. /// /// \param __M /// A 128-bit integer vector containing the bits to be tested. /// \param __V /// A 128-bit integer vector selecting which bits to test in operand \a __M. /// \returns TRUE if the specified bits are all ones; FALSE otherwise. static __inline__ int __DEFAULT_FN_ATTRS _mm_testc_si128(__m128i __M, __m128i __V) { return __builtin_ia32_ptestc128((__v2di)__M, (__v2di)__V); } /// Tests whether the specified bits in a 128-bit integer vector are /// neither all zeros nor all ones. /// /// \headerfile /// /// This intrinsic corresponds to the VPTEST / PTEST instruction. /// /// \param __M /// A 128-bit integer vector containing the bits to be tested. /// \param __V /// A 128-bit integer vector selecting which bits to test in operand \a __M. /// \returns TRUE if the specified bits are neither all zeros nor all ones; /// FALSE otherwise. static __inline__ int __DEFAULT_FN_ATTRS _mm_testnzc_si128(__m128i __M, __m128i __V) { return __builtin_ia32_ptestnzc128((__v2di)__M, (__v2di)__V); } /// Tests whether the specified bits in a 128-bit integer vector are all /// ones. /// /// \headerfile /// /// \code /// int _mm_test_all_ones(__m128i V); /// \endcode /// /// This intrinsic corresponds to the VPTEST / PTEST instruction. /// /// \param V /// A 128-bit integer vector containing the bits to be tested. /// \returns TRUE if the bits specified in the operand are all set to 1; FALSE /// otherwise. #define _mm_test_all_ones(V) _mm_testc_si128((V), _mm_set1_epi32(-1)) /// Tests whether the specified bits in a 128-bit integer vector are /// neither all zeros nor all ones. /// /// \headerfile /// /// \code /// int _mm_test_mix_ones_zeros(__m128i M, __m128i V); /// \endcode /// /// This intrinsic corresponds to the VPTEST / PTEST instruction. /// /// \param M /// A 128-bit integer vector containing the bits to be tested. /// \param V /// A 128-bit integer vector selecting which bits to test in operand \a M. /// \returns TRUE if the specified bits are neither all zeros nor all ones; /// FALSE otherwise. #define _mm_test_mix_ones_zeros(M, V) _mm_testnzc_si128((M), (V)) /// Tests whether the specified bits in a 128-bit integer vector are all /// zeros. /// /// \headerfile /// /// \code /// int _mm_test_all_zeros(__m128i M, __m128i V); /// \endcode /// /// This intrinsic corresponds to the VPTEST / PTEST instruction. /// /// \param M /// A 128-bit integer vector containing the bits to be tested. /// \param V /// A 128-bit integer vector selecting which bits to test in operand \a M. /// \returns TRUE if the specified bits are all zeros; FALSE otherwise. #define _mm_test_all_zeros(M, V) _mm_testz_si128((M), (V)) /* SSE4 64-bit Packed Integer Comparisons. */ /// Compares each of the corresponding 64-bit values of the 128-bit /// integer vectors for equality. /// /// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VPCMPEQQ / PCMPEQQ instruction. /// /// \param __V1 /// A 128-bit integer vector. /// \param __V2 /// A 128-bit integer vector. /// \returns A 128-bit integer vector containing the comparison results. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi64(__m128i __V1, __m128i __V2) { return (__m128i)((__v2di)__V1 == (__v2di)__V2); } /* SSE4 Packed Integer Sign-Extension. */ /// Sign-extends each of the lower eight 8-bit integer elements of a /// 128-bit vector of [16 x i8] to 16-bit values and returns them in a /// 128-bit vector of [8 x i16]. The upper eight elements of the input vector /// are unused. /// /// \headerfile /// /// This intrinsic corresponds to the VPMOVSXBW / PMOVSXBW instruction. /// /// \param __V /// A 128-bit vector of [16 x i8]. The lower eight 8-bit elements are /// sign-extended to 16-bit values. /// \returns A 128-bit vector of [8 x i16] containing the sign-extended values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi8_epi16(__m128i __V) { /* This function always performs a signed extension, but __v16qi is a char which may be signed or unsigned, so use __v16qs. */ return (__m128i) __builtin_convertvector( __builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8hi); } /// Sign-extends each of the lower four 8-bit integer elements of a /// 128-bit vector of [16 x i8] to 32-bit values and returns them in a /// 128-bit vector of [4 x i32]. The upper twelve elements of the input /// vector are unused. /// /// \headerfile /// /// This intrinsic corresponds to the VPMOVSXBD / PMOVSXBD instruction. /// /// \param __V /// A 128-bit vector of [16 x i8]. The lower four 8-bit elements are /// sign-extended to 32-bit values. /// \returns A 128-bit vector of [4 x i32] containing the sign-extended values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi8_epi32(__m128i __V) { /* This function always performs a signed extension, but __v16qi is a char which may be signed or unsigned, so use __v16qs. */ return (__m128i) __builtin_convertvector( __builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3), __v4si); } /// Sign-extends each of the lower two 8-bit integer elements of a /// 128-bit integer vector of [16 x i8] to 64-bit values and returns them in /// a 128-bit vector of [2 x i64]. The upper fourteen elements of the input /// vector are unused. /// /// \headerfile /// /// This intrinsic corresponds to the VPMOVSXBQ / PMOVSXBQ instruction. /// /// \param __V /// A 128-bit vector of [16 x i8]. The lower two 8-bit elements are /// sign-extended to 64-bit values. /// \returns A 128-bit vector of [2 x i64] containing the sign-extended values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi8_epi64(__m128i __V) { /* This function always performs a signed extension, but __v16qi is a char which may be signed or unsigned, so use __v16qs. */ return (__m128i) __builtin_convertvector( __builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1), __v2di); } /// Sign-extends each of the lower four 16-bit integer elements of a /// 128-bit integer vector of [8 x i16] to 32-bit values and returns them in /// a 128-bit vector of [4 x i32]. The upper four elements of the input /// vector are unused. /// /// \headerfile /// /// This intrinsic corresponds to the VPMOVSXWD / PMOVSXWD instruction. /// /// \param __V /// A 128-bit vector of [8 x i16]. The lower four 16-bit elements are /// sign-extended to 32-bit values. /// \returns A 128-bit vector of [4 x i32] containing the sign-extended values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi16_epi32(__m128i __V) { return (__m128i) __builtin_convertvector( __builtin_shufflevector((__v8hi)__V, (__v8hi)__V, 0, 1, 2, 3), __v4si); } /// Sign-extends each of the lower two 16-bit integer elements of a /// 128-bit integer vector of [8 x i16] to 64-bit values and returns them in /// a 128-bit vector of [2 x i64]. The upper six elements of the input /// vector are unused. /// /// \headerfile /// /// This intrinsic corresponds to the VPMOVSXWQ / PMOVSXWQ instruction. /// /// \param __V /// A 128-bit vector of [8 x i16]. The lower two 16-bit elements are /// sign-extended to 64-bit values. /// \returns A 128-bit vector of [2 x i64] containing the sign-extended values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi16_epi64(__m128i __V) { return (__m128i) __builtin_convertvector( __builtin_shufflevector((__v8hi)__V, (__v8hi)__V, 0, 1), __v2di); } /// Sign-extends each of the lower two 32-bit integer elements of a /// 128-bit integer vector of [4 x i32] to 64-bit values and returns them in /// a 128-bit vector of [2 x i64]. The upper two elements of the input vector /// are unused. /// /// \headerfile /// /// This intrinsic corresponds to the VPMOVSXDQ / PMOVSXDQ instruction. /// /// \param __V /// A 128-bit vector of [4 x i32]. The lower two 32-bit elements are /// sign-extended to 64-bit values. /// \returns A 128-bit vector of [2 x i64] containing the sign-extended values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi32_epi64(__m128i __V) { return (__m128i) __builtin_convertvector( __builtin_shufflevector((__v4si)__V, (__v4si)__V, 0, 1), __v2di); } /* SSE4 Packed Integer Zero-Extension. */ /// Zero-extends each of the lower eight 8-bit integer elements of a /// 128-bit vector of [16 x i8] to 16-bit values and returns them in a /// 128-bit vector of [8 x i16]. The upper eight elements of the input vector /// are unused. /// /// \headerfile /// /// This intrinsic corresponds to the VPMOVZXBW / PMOVZXBW instruction. /// /// \param __V /// A 128-bit vector of [16 x i8]. The lower eight 8-bit elements are /// zero-extended to 16-bit values. /// \returns A 128-bit vector of [8 x i16] containing the zero-extended values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu8_epi16(__m128i __V) { return (__m128i) __builtin_convertvector( __builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8hi); } /// Zero-extends each of the lower four 8-bit integer elements of a /// 128-bit vector of [16 x i8] to 32-bit values and returns them in a /// 128-bit vector of [4 x i32]. The upper twelve elements of the input /// vector are unused. /// /// \headerfile /// /// This intrinsic corresponds to the VPMOVZXBD / PMOVZXBD instruction. /// /// \param __V /// A 128-bit vector of [16 x i8]. The lower four 8-bit elements are /// zero-extended to 32-bit values. /// \returns A 128-bit vector of [4 x i32] containing the zero-extended values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu8_epi32(__m128i __V) { return (__m128i) __builtin_convertvector( __builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3), __v4si); } /// Zero-extends each of the lower two 8-bit integer elements of a /// 128-bit integer vector of [16 x i8] to 64-bit values and returns them in /// a 128-bit vector of [2 x i64]. The upper fourteen elements of the input /// vector are unused. /// /// \headerfile /// /// This intrinsic corresponds to the VPMOVZXBQ / PMOVZXBQ instruction. /// /// \param __V /// A 128-bit vector of [16 x i8]. The lower two 8-bit elements are /// zero-extended to 64-bit values. /// \returns A 128-bit vector of [2 x i64] containing the zero-extended values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu8_epi64(__m128i __V) { return (__m128i) __builtin_convertvector( __builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1), __v2di); } /// Zero-extends each of the lower four 16-bit integer elements of a /// 128-bit integer vector of [8 x i16] to 32-bit values and returns them in /// a 128-bit vector of [4 x i32]. The upper four elements of the input /// vector are unused. /// /// \headerfile /// /// This intrinsic corresponds to the VPMOVZXWD / PMOVZXWD instruction. /// /// \param __V /// A 128-bit vector of [8 x i16]. The lower four 16-bit elements are /// zero-extended to 32-bit values. /// \returns A 128-bit vector of [4 x i32] containing the zero-extended values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu16_epi32(__m128i __V) { return (__m128i) __builtin_convertvector( __builtin_shufflevector((__v8hu)__V, (__v8hu)__V, 0, 1, 2, 3), __v4si); } /// Zero-extends each of the lower two 16-bit integer elements of a /// 128-bit integer vector of [8 x i16] to 64-bit values and returns them in /// a 128-bit vector of [2 x i64]. The upper six elements of the input vector /// are unused. /// /// \headerfile /// /// This intrinsic corresponds to the VPMOVZXWQ / PMOVZXWQ instruction. /// /// \param __V /// A 128-bit vector of [8 x i16]. The lower two 16-bit elements are /// zero-extended to 64-bit values. /// \returns A 128-bit vector of [2 x i64] containing the zero-extended values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu16_epi64(__m128i __V) { return (__m128i) __builtin_convertvector( __builtin_shufflevector((__v8hu)__V, (__v8hu)__V, 0, 1), __v2di); } /// Zero-extends each of the lower two 32-bit integer elements of a /// 128-bit integer vector of [4 x i32] to 64-bit values and returns them in /// a 128-bit vector of [2 x i64]. The upper two elements of the input vector /// are unused. /// /// \headerfile /// /// This intrinsic corresponds to the VPMOVZXDQ / PMOVZXDQ instruction. /// /// \param __V /// A 128-bit vector of [4 x i32]. The lower two 32-bit elements are /// zero-extended to 64-bit values. /// \returns A 128-bit vector of [2 x i64] containing the zero-extended values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu32_epi64(__m128i __V) { return (__m128i) __builtin_convertvector( __builtin_shufflevector((__v4su)__V, (__v4su)__V, 0, 1), __v2di); } /* SSE4 Pack with Unsigned Saturation. */ /// Converts, with saturation, 32-bit signed integers from both 128-bit integer /// vector operands into 16-bit unsigned integers, and returns the packed /// result. /// /// Values greater than 0xFFFF are saturated to 0xFFFF. Values less than /// 0x0000 are saturated to 0x0000. /// /// \headerfile /// /// This intrinsic corresponds to the VPACKUSDW / PACKUSDW instruction. /// /// \param __V1 /// A 128-bit vector of [4 x i32]. The converted [4 x i16] values are /// written to the lower 64 bits of the result. /// \param __V2 /// A 128-bit vector of [4 x i32]. The converted [4 x i16] values are /// written to the higher 64 bits of the result. /// \returns A 128-bit vector of [8 x i16] containing the converted values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packus_epi32(__m128i __V1, __m128i __V2) { return (__m128i)__builtin_ia32_packusdw128((__v4si)__V1, (__v4si)__V2); } /* SSE4 Multiple Packed Sums of Absolute Difference. */ /// Subtracts 8-bit unsigned integer values and computes the absolute /// values of the differences to the corresponding bits in the destination. /// Then sums of the absolute differences are returned according to the bit /// fields in the immediate operand. /// /// \headerfile /// /// \code /// __m128i _mm_mpsadbw_epu8(__m128i X, __m128i Y, const int M); /// \endcode /// /// This intrinsic corresponds to the VMPSADBW / MPSADBW instruction. /// /// \param X /// A 128-bit vector of [16 x i8]. /// \param Y /// A 128-bit vector of [16 x i8]. /// \param M /// An 8-bit immediate operand specifying how the absolute differences are to /// be calculated, according to the following algorithm: /// \code /// // M2 represents bit 2 of the immediate operand /// // M10 represents bits [1:0] of the immediate operand /// i = M2 * 4; /// j = M10 * 4; /// for (k = 0; k < 8; k = k + 1) { /// d0 = abs(X[i + k + 0] - Y[j + 0]); /// d1 = abs(X[i + k + 1] - Y[j + 1]); /// d2 = abs(X[i + k + 2] - Y[j + 2]); /// d3 = abs(X[i + k + 3] - Y[j + 3]); /// r[k] = d0 + d1 + d2 + d3; /// } /// \endcode /// \returns A 128-bit integer vector containing the sums of the sets of /// absolute differences between both operands. #define _mm_mpsadbw_epu8(X, Y, M) \ ((__m128i)__builtin_ia32_mpsadbw128((__v16qi)(__m128i)(X), \ (__v16qi)(__m128i)(Y), (M))) /// Finds the minimum unsigned 16-bit element in the input 128-bit /// vector of [8 x u16] and returns it and along with its index. /// /// \headerfile /// /// This intrinsic corresponds to the VPHMINPOSUW / PHMINPOSUW /// instruction. /// /// \param __V /// A 128-bit vector of [8 x u16]. /// \returns A 128-bit value where bits [15:0] contain the minimum value found /// in parameter \a __V, bits [18:16] contain the index of the minimum value /// and the remaining bits are set to 0. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_minpos_epu16(__m128i __V) { return (__m128i)__builtin_ia32_phminposuw128((__v8hi)__V); } /* Handle the sse4.2 definitions here. */ /* These definitions are normally in nmmintrin.h, but gcc puts them in here so we'll do the same. */ #undef __DEFAULT_FN_ATTRS #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("sse4.2"))) /* These specify the type of data that we're comparing. */ #define _SIDD_UBYTE_OPS 0x00 #define _SIDD_UWORD_OPS 0x01 #define _SIDD_SBYTE_OPS 0x02 #define _SIDD_SWORD_OPS 0x03 /* These specify the type of comparison operation. */ #define _SIDD_CMP_EQUAL_ANY 0x00 #define _SIDD_CMP_RANGES 0x04 #define _SIDD_CMP_EQUAL_EACH 0x08 #define _SIDD_CMP_EQUAL_ORDERED 0x0c /* These macros specify the polarity of the operation. */ #define _SIDD_POSITIVE_POLARITY 0x00 #define _SIDD_NEGATIVE_POLARITY 0x10 #define _SIDD_MASKED_POSITIVE_POLARITY 0x20 #define _SIDD_MASKED_NEGATIVE_POLARITY 0x30 /* These macros are used in _mm_cmpXstri() to specify the return. */ #define _SIDD_LEAST_SIGNIFICANT 0x00 #define _SIDD_MOST_SIGNIFICANT 0x40 /* These macros are used in _mm_cmpXstri() to specify the return. */ #define _SIDD_BIT_MASK 0x00 #define _SIDD_UNIT_MASK 0x40 /* SSE4.2 Packed Comparison Intrinsics. */ /// Uses the immediate operand \a M to perform a comparison of string /// data with implicitly defined lengths that is contained in source operands /// \a A and \a B. Returns a 128-bit integer vector representing the result /// mask of the comparison. /// /// \headerfile /// /// \code /// __m128i _mm_cmpistrm(__m128i A, __m128i B, const int M); /// \endcode /// /// This intrinsic corresponds to the VPCMPISTRM / PCMPISTRM /// instruction. /// /// \param A /// A 128-bit integer vector containing one of the source operands to be /// compared. /// \param B /// A 128-bit integer vector containing one of the source operands to be /// compared. /// \param M /// An 8-bit immediate operand specifying whether the characters are bytes or /// words, the type of comparison to perform, and the format of the return /// value. \n /// Bits [1:0]: Determine source data format. \n /// 00: 16 unsigned bytes \n /// 01: 8 unsigned words \n /// 10: 16 signed bytes \n /// 11: 8 signed words \n /// Bits [3:2]: Determine comparison type and aggregation method. \n /// 00: Subset: Each character in \a B is compared for equality with all /// the characters in \a A. \n /// 01: Ranges: Each character in \a B is compared to \a A. The comparison /// basis is greater than or equal for even-indexed elements in \a A, /// and less than or equal for odd-indexed elements in \a A. \n /// 10: Match: Compare each pair of corresponding characters in \a A and /// \a B for equality. \n /// 11: Substring: Search \a B for substring matches of \a A. \n /// Bits [5:4]: Determine whether to perform a one's complement on the bit /// mask of the comparison results. \n /// 00: No effect. \n /// 01: Negate the bit mask. \n /// 10: No effect. \n /// 11: Negate the bit mask only for bits with an index less than or equal /// to the size of \a A or \a B. \n /// Bit [6]: Determines whether the result is zero-extended or expanded to 16 /// bytes. \n /// 0: The result is zero-extended to 16 bytes. \n /// 1: The result is expanded to 16 bytes (this expansion is performed by /// repeating each bit 8 or 16 times). /// \returns Returns a 128-bit integer vector representing the result mask of /// the comparison. #define _mm_cmpistrm(A, B, M) \ ((__m128i)__builtin_ia32_pcmpistrm128((__v16qi)(__m128i)(A), \ (__v16qi)(__m128i)(B), (int)(M))) /// Uses the immediate operand \a M to perform a comparison of string /// data with implicitly defined lengths that is contained in source operands /// \a A and \a B. Returns an integer representing the result index of the /// comparison. /// /// \headerfile /// /// \code /// int _mm_cmpistri(__m128i A, __m128i B, const int M); /// \endcode /// /// This intrinsic corresponds to the VPCMPISTRI / PCMPISTRI /// instruction. /// /// \param A /// A 128-bit integer vector containing one of the source operands to be /// compared. /// \param B /// A 128-bit integer vector containing one of the source operands to be /// compared. /// \param M /// An 8-bit immediate operand specifying whether the characters are bytes or /// words, the type of comparison to perform, and the format of the return /// value. \n /// Bits [1:0]: Determine source data format. \n /// 00: 16 unsigned bytes \n /// 01: 8 unsigned words \n /// 10: 16 signed bytes \n /// 11: 8 signed words \n /// Bits [3:2]: Determine comparison type and aggregation method. \n /// 00: Subset: Each character in \a B is compared for equality with all /// the characters in \a A. \n /// 01: Ranges: Each character in \a B is compared to \a A. The comparison /// basis is greater than or equal for even-indexed elements in \a A, /// and less than or equal for odd-indexed elements in \a A. \n /// 10: Match: Compare each pair of corresponding characters in \a A and /// \a B for equality. \n /// 11: Substring: Search B for substring matches of \a A. \n /// Bits [5:4]: Determine whether to perform a one's complement on the bit /// mask of the comparison results. \n /// 00: No effect. \n /// 01: Negate the bit mask. \n /// 10: No effect. \n /// 11: Negate the bit mask only for bits with an index less than or equal /// to the size of \a A or \a B. \n /// Bit [6]: Determines whether the index of the lowest set bit or the /// highest set bit is returned. \n /// 0: The index of the least significant set bit. \n /// 1: The index of the most significant set bit. \n /// \returns Returns an integer representing the result index of the comparison. #define _mm_cmpistri(A, B, M) \ ((int)__builtin_ia32_pcmpistri128((__v16qi)(__m128i)(A), \ (__v16qi)(__m128i)(B), (int)(M))) /// Uses the immediate operand \a M to perform a comparison of string /// data with explicitly defined lengths that is contained in source operands /// \a A and \a B. Returns a 128-bit integer vector representing the result /// mask of the comparison. /// /// \headerfile /// /// \code /// __m128i _mm_cmpestrm(__m128i A, int LA, __m128i B, int LB, const int M); /// \endcode /// /// This intrinsic corresponds to the VPCMPESTRM / PCMPESTRM /// instruction. /// /// \param A /// A 128-bit integer vector containing one of the source operands to be /// compared. /// \param LA /// An integer that specifies the length of the string in \a A. /// \param B /// A 128-bit integer vector containing one of the source operands to be /// compared. /// \param LB /// An integer that specifies the length of the string in \a B. /// \param M /// An 8-bit immediate operand specifying whether the characters are bytes or /// words, the type of comparison to perform, and the format of the return /// value. \n /// Bits [1:0]: Determine source data format. \n /// 00: 16 unsigned bytes \n /// 01: 8 unsigned words \n /// 10: 16 signed bytes \n /// 11: 8 signed words \n /// Bits [3:2]: Determine comparison type and aggregation method. \n /// 00: Subset: Each character in \a B is compared for equality with all /// the characters in \a A. \n /// 01: Ranges: Each character in \a B is compared to \a A. The comparison /// basis is greater than or equal for even-indexed elements in \a A, /// and less than or equal for odd-indexed elements in \a A. \n /// 10: Match: Compare each pair of corresponding characters in \a A and /// \a B for equality. \n /// 11: Substring: Search \a B for substring matches of \a A. \n /// Bits [5:4]: Determine whether to perform a one's complement on the bit /// mask of the comparison results. \n /// 00: No effect. \n /// 01: Negate the bit mask. \n /// 10: No effect. \n /// 11: Negate the bit mask only for bits with an index less than or equal /// to the size of \a A or \a B. \n /// Bit [6]: Determines whether the result is zero-extended or expanded to 16 /// bytes. \n /// 0: The result is zero-extended to 16 bytes. \n /// 1: The result is expanded to 16 bytes (this expansion is performed by /// repeating each bit 8 or 16 times). \n /// \returns Returns a 128-bit integer vector representing the result mask of /// the comparison. #define _mm_cmpestrm(A, LA, B, LB, M) \ ((__m128i)__builtin_ia32_pcmpestrm128((__v16qi)(__m128i)(A), (int)(LA), \ (__v16qi)(__m128i)(B), (int)(LB), \ (int)(M))) /// Uses the immediate operand \a M to perform a comparison of string /// data with explicitly defined lengths that is contained in source operands /// \a A and \a B. Returns an integer representing the result index of the /// comparison. /// /// \headerfile /// /// \code /// int _mm_cmpestri(__m128i A, int LA, __m128i B, int LB, const int M); /// \endcode /// /// This intrinsic corresponds to the VPCMPESTRI / PCMPESTRI /// instruction. /// /// \param A /// A 128-bit integer vector containing one of the source operands to be /// compared. /// \param LA /// An integer that specifies the length of the string in \a A. /// \param B /// A 128-bit integer vector containing one of the source operands to be /// compared. /// \param LB /// An integer that specifies the length of the string in \a B. /// \param M /// An 8-bit immediate operand specifying whether the characters are bytes or /// words, the type of comparison to perform, and the format of the return /// value. \n /// Bits [1:0]: Determine source data format. \n /// 00: 16 unsigned bytes \n /// 01: 8 unsigned words \n /// 10: 16 signed bytes \n /// 11: 8 signed words \n /// Bits [3:2]: Determine comparison type and aggregation method. \n /// 00: Subset: Each character in \a B is compared for equality with all /// the characters in \a A. \n /// 01: Ranges: Each character in \a B is compared to \a A. The comparison /// basis is greater than or equal for even-indexed elements in \a A, /// and less than or equal for odd-indexed elements in \a A. \n /// 10: Match: Compare each pair of corresponding characters in \a A and /// \a B for equality. \n /// 11: Substring: Search B for substring matches of \a A. \n /// Bits [5:4]: Determine whether to perform a one's complement on the bit /// mask of the comparison results. \n /// 00: No effect. \n /// 01: Negate the bit mask. \n /// 10: No effect. \n /// 11: Negate the bit mask only for bits with an index less than or equal /// to the size of \a A or \a B. \n /// Bit [6]: Determines whether the index of the lowest set bit or the /// highest set bit is returned. \n /// 0: The index of the least significant set bit. \n /// 1: The index of the most significant set bit. \n /// \returns Returns an integer representing the result index of the comparison. #define _mm_cmpestri(A, LA, B, LB, M) \ ((int)__builtin_ia32_pcmpestri128((__v16qi)(__m128i)(A), (int)(LA), \ (__v16qi)(__m128i)(B), (int)(LB), \ (int)(M))) /* SSE4.2 Packed Comparison Intrinsics and EFlag Reading. */ /// Uses the immediate operand \a M to perform a comparison of string /// data with implicitly defined lengths that is contained in source operands /// \a A and \a B. Returns 1 if the bit mask is zero and the length of the /// string in \a B is the maximum, otherwise, returns 0. /// /// \headerfile /// /// \code /// int _mm_cmpistra(__m128i A, __m128i B, const int M); /// \endcode /// /// This intrinsic corresponds to the VPCMPISTRI / PCMPISTRI /// instruction. /// /// \param A /// A 128-bit integer vector containing one of the source operands to be /// compared. /// \param B /// A 128-bit integer vector containing one of the source operands to be /// compared. /// \param M /// An 8-bit immediate operand specifying whether the characters are bytes or /// words and the type of comparison to perform. \n /// Bits [1:0]: Determine source data format. \n /// 00: 16 unsigned bytes \n /// 01: 8 unsigned words \n /// 10: 16 signed bytes \n /// 11: 8 signed words \n /// Bits [3:2]: Determine comparison type and aggregation method. \n /// 00: Subset: Each character in \a B is compared for equality with all /// the characters in \a A. \n /// 01: Ranges: Each character in \a B is compared to \a A. The comparison /// basis is greater than or equal for even-indexed elements in \a A, /// and less than or equal for odd-indexed elements in \a A. \n /// 10: Match: Compare each pair of corresponding characters in \a A and /// \a B for equality. \n /// 11: Substring: Search \a B for substring matches of \a A. \n /// Bits [5:4]: Determine whether to perform a one's complement on the bit /// mask of the comparison results. \n /// 00: No effect. \n /// 01: Negate the bit mask. \n /// 10: No effect. \n /// 11: Negate the bit mask only for bits with an index less than or equal /// to the size of \a A or \a B. \n /// \returns Returns 1 if the bit mask is zero and the length of the string in /// \a B is the maximum; otherwise, returns 0. #define _mm_cmpistra(A, B, M) \ ((int)__builtin_ia32_pcmpistria128((__v16qi)(__m128i)(A), \ (__v16qi)(__m128i)(B), (int)(M))) /// Uses the immediate operand \a M to perform a comparison of string /// data with implicitly defined lengths that is contained in source operands /// \a A and \a B. Returns 1 if the bit mask is non-zero, otherwise, returns /// 0. /// /// \headerfile /// /// \code /// int _mm_cmpistrc(__m128i A, __m128i B, const int M); /// \endcode /// /// This intrinsic corresponds to the VPCMPISTRI / PCMPISTRI /// instruction. /// /// \param A /// A 128-bit integer vector containing one of the source operands to be /// compared. /// \param B /// A 128-bit integer vector containing one of the source operands to be /// compared. /// \param M /// An 8-bit immediate operand specifying whether the characters are bytes or /// words and the type of comparison to perform. \n /// Bits [1:0]: Determine source data format. \n /// 00: 16 unsigned bytes \n /// 01: 8 unsigned words \n /// 10: 16 signed bytes \n /// 11: 8 signed words \n /// Bits [3:2]: Determine comparison type and aggregation method. \n /// 00: Subset: Each character in \a B is compared for equality with all /// the characters in \a A. \n /// 01: Ranges: Each character in \a B is compared to \a A. The comparison /// basis is greater than or equal for even-indexed elements in \a A, /// and less than or equal for odd-indexed elements in \a A. \n /// 10: Match: Compare each pair of corresponding characters in \a A and /// \a B for equality. \n /// 11: Substring: Search B for substring matches of \a A. \n /// Bits [5:4]: Determine whether to perform a one's complement on the bit /// mask of the comparison results. \n /// 00: No effect. \n /// 01: Negate the bit mask. \n /// 10: No effect. \n /// 11: Negate the bit mask only for bits with an index less than or equal /// to the size of \a A or \a B. /// \returns Returns 1 if the bit mask is non-zero, otherwise, returns 0. #define _mm_cmpistrc(A, B, M) \ ((int)__builtin_ia32_pcmpistric128((__v16qi)(__m128i)(A), \ (__v16qi)(__m128i)(B), (int)(M))) /// Uses the immediate operand \a M to perform a comparison of string /// data with implicitly defined lengths that is contained in source operands /// \a A and \a B. Returns bit 0 of the resulting bit mask. /// /// \headerfile /// /// \code /// int _mm_cmpistro(__m128i A, __m128i B, const int M); /// \endcode /// /// This intrinsic corresponds to the VPCMPISTRI / PCMPISTRI /// instruction. /// /// \param A /// A 128-bit integer vector containing one of the source operands to be /// compared. /// \param B /// A 128-bit integer vector containing one of the source operands to be /// compared. /// \param M /// An 8-bit immediate operand specifying whether the characters are bytes or /// words and the type of comparison to perform. \n /// Bits [1:0]: Determine source data format. \n /// 00: 16 unsigned bytes \n /// 01: 8 unsigned words \n /// 10: 16 signed bytes \n /// 11: 8 signed words \n /// Bits [3:2]: Determine comparison type and aggregation method. \n /// 00: Subset: Each character in \a B is compared for equality with all /// the characters in \a A. \n /// 01: Ranges: Each character in \a B is compared to \a A. The comparison /// basis is greater than or equal for even-indexed elements in \a A, /// and less than or equal for odd-indexed elements in \a A. \n /// 10: Match: Compare each pair of corresponding characters in \a A and /// \a B for equality. \n /// 11: Substring: Search B for substring matches of \a A. \n /// Bits [5:4]: Determine whether to perform a one's complement on the bit /// mask of the comparison results. \n /// 00: No effect. \n /// 01: Negate the bit mask. \n /// 10: No effect. \n /// 11: Negate the bit mask only for bits with an index less than or equal /// to the size of \a A or \a B. \n /// \returns Returns bit 0 of the resulting bit mask. #define _mm_cmpistro(A, B, M) \ ((int)__builtin_ia32_pcmpistrio128((__v16qi)(__m128i)(A), \ (__v16qi)(__m128i)(B), (int)(M))) /// Uses the immediate operand \a M to perform a comparison of string /// data with implicitly defined lengths that is contained in source operands /// \a A and \a B. Returns 1 if the length of the string in \a A is less than /// the maximum, otherwise, returns 0. /// /// \headerfile /// /// \code /// int _mm_cmpistrs(__m128i A, __m128i B, const int M); /// \endcode /// /// This intrinsic corresponds to the VPCMPISTRI / PCMPISTRI /// instruction. /// /// \param A /// A 128-bit integer vector containing one of the source operands to be /// compared. /// \param B /// A 128-bit integer vector containing one of the source operands to be /// compared. /// \param M /// An 8-bit immediate operand specifying whether the characters are bytes or /// words and the type of comparison to perform. \n /// Bits [1:0]: Determine source data format. \n /// 00: 16 unsigned bytes \n /// 01: 8 unsigned words \n /// 10: 16 signed bytes \n /// 11: 8 signed words \n /// Bits [3:2]: Determine comparison type and aggregation method. \n /// 00: Subset: Each character in \a B is compared for equality with all /// the characters in \a A. \n /// 01: Ranges: Each character in \a B is compared to \a A. The comparison /// basis is greater than or equal for even-indexed elements in \a A, /// and less than or equal for odd-indexed elements in \a A. \n /// 10: Match: Compare each pair of corresponding characters in \a A and /// \a B for equality. \n /// 11: Substring: Search \a B for substring matches of \a A. \n /// Bits [5:4]: Determine whether to perform a one's complement on the bit /// mask of the comparison results. \n /// 00: No effect. \n /// 01: Negate the bit mask. \n /// 10: No effect. \n /// 11: Negate the bit mask only for bits with an index less than or equal /// to the size of \a A or \a B. \n /// \returns Returns 1 if the length of the string in \a A is less than the /// maximum, otherwise, returns 0. #define _mm_cmpistrs(A, B, M) \ ((int)__builtin_ia32_pcmpistris128((__v16qi)(__m128i)(A), \ (__v16qi)(__m128i)(B), (int)(M))) /// Uses the immediate operand \a M to perform a comparison of string /// data with implicitly defined lengths that is contained in source operands /// \a A and \a B. Returns 1 if the length of the string in \a B is less than /// the maximum, otherwise, returns 0. /// /// \headerfile /// /// \code /// int _mm_cmpistrz(__m128i A, __m128i B, const int M); /// \endcode /// /// This intrinsic corresponds to the VPCMPISTRI / PCMPISTRI /// instruction. /// /// \param A /// A 128-bit integer vector containing one of the source operands to be /// compared. /// \param B /// A 128-bit integer vector containing one of the source operands to be /// compared. /// \param M /// An 8-bit immediate operand specifying whether the characters are bytes or /// words and the type of comparison to perform. \n /// Bits [1:0]: Determine source data format. \n /// 00: 16 unsigned bytes \n /// 01: 8 unsigned words \n /// 10: 16 signed bytes \n /// 11: 8 signed words \n /// Bits [3:2]: Determine comparison type and aggregation method. \n /// 00: Subset: Each character in \a B is compared for equality with all /// the characters in \a A. \n /// 01: Ranges: Each character in \a B is compared to \a A. The comparison /// basis is greater than or equal for even-indexed elements in \a A, /// and less than or equal for odd-indexed elements in \a A. \n /// 10: Match: Compare each pair of corresponding characters in \a A and /// \a B for equality. \n /// 11: Substring: Search \a B for substring matches of \a A. \n /// Bits [5:4]: Determine whether to perform a one's complement on the bit /// mask of the comparison results. \n /// 00: No effect. \n /// 01: Negate the bit mask. \n /// 10: No effect. \n /// 11: Negate the bit mask only for bits with an index less than or equal /// to the size of \a A or \a B. /// \returns Returns 1 if the length of the string in \a B is less than the /// maximum, otherwise, returns 0. #define _mm_cmpistrz(A, B, M) \ ((int)__builtin_ia32_pcmpistriz128((__v16qi)(__m128i)(A), \ (__v16qi)(__m128i)(B), (int)(M))) /// Uses the immediate operand \a M to perform a comparison of string /// data with explicitly defined lengths that is contained in source operands /// \a A and \a B. Returns 1 if the bit mask is zero and the length of the /// string in \a B is the maximum, otherwise, returns 0. /// /// \headerfile /// /// \code /// int _mm_cmpestra(__m128i A, int LA, __m128i B, int LB, const int M); /// \endcode /// /// This intrinsic corresponds to the VPCMPESTRI / PCMPESTRI /// instruction. /// /// \param A /// A 128-bit integer vector containing one of the source operands to be /// compared. /// \param LA /// An integer that specifies the length of the string in \a A. /// \param B /// A 128-bit integer vector containing one of the source operands to be /// compared. /// \param LB /// An integer that specifies the length of the string in \a B. /// \param M /// An 8-bit immediate operand specifying whether the characters are bytes or /// words and the type of comparison to perform. \n /// Bits [1:0]: Determine source data format. \n /// 00: 16 unsigned bytes \n /// 01: 8 unsigned words \n /// 10: 16 signed bytes \n /// 11: 8 signed words \n /// Bits [3:2]: Determine comparison type and aggregation method. \n /// 00: Subset: Each character in \a B is compared for equality with all /// the characters in \a A. \n /// 01: Ranges: Each character in \a B is compared to \a A. The comparison /// basis is greater than or equal for even-indexed elements in \a A, /// and less than or equal for odd-indexed elements in \a A. \n /// 10: Match: Compare each pair of corresponding characters in \a A and /// \a B for equality. \n /// 11: Substring: Search \a B for substring matches of \a A. \n /// Bits [5:4]: Determine whether to perform a one's complement on the bit /// mask of the comparison results. \n /// 00: No effect. \n /// 01: Negate the bit mask. \n /// 10: No effect. \n /// 11: Negate the bit mask only for bits with an index less than or equal /// to the size of \a A or \a B. /// \returns Returns 1 if the bit mask is zero and the length of the string in /// \a B is the maximum, otherwise, returns 0. #define _mm_cmpestra(A, LA, B, LB, M) \ ((int)__builtin_ia32_pcmpestria128((__v16qi)(__m128i)(A), (int)(LA), \ (__v16qi)(__m128i)(B), (int)(LB), \ (int)(M))) /// Uses the immediate operand \a M to perform a comparison of string /// data with explicitly defined lengths that is contained in source operands /// \a A and \a B. Returns 1 if the resulting mask is non-zero, otherwise, /// returns 0. /// /// \headerfile /// /// \code /// int _mm_cmpestrc(__m128i A, int LA, __m128i B, int LB, const int M); /// \endcode /// /// This intrinsic corresponds to the VPCMPESTRI / PCMPESTRI /// instruction. /// /// \param A /// A 128-bit integer vector containing one of the source operands to be /// compared. /// \param LA /// An integer that specifies the length of the string in \a A. /// \param B /// A 128-bit integer vector containing one of the source operands to be /// compared. /// \param LB /// An integer that specifies the length of the string in \a B. /// \param M /// An 8-bit immediate operand specifying whether the characters are bytes or /// words and the type of comparison to perform. \n /// Bits [1:0]: Determine source data format. \n /// 00: 16 unsigned bytes \n /// 01: 8 unsigned words \n /// 10: 16 signed bytes \n /// 11: 8 signed words \n /// Bits [3:2]: Determine comparison type and aggregation method. \n /// 00: Subset: Each character in \a B is compared for equality with all /// the characters in \a A. \n /// 01: Ranges: Each character in \a B is compared to \a A. The comparison /// basis is greater than or equal for even-indexed elements in \a A, /// and less than or equal for odd-indexed elements in \a A. \n /// 10: Match: Compare each pair of corresponding characters in \a A and /// \a B for equality. \n /// 11: Substring: Search \a B for substring matches of \a A. \n /// Bits [5:4]: Determine whether to perform a one's complement on the bit /// mask of the comparison results. \n /// 00: No effect. \n /// 01: Negate the bit mask. \n /// 10: No effect. \n /// 11: Negate the bit mask only for bits with an index less than or equal /// to the size of \a A or \a B. \n /// \returns Returns 1 if the resulting mask is non-zero, otherwise, returns 0. #define _mm_cmpestrc(A, LA, B, LB, M) \ ((int)__builtin_ia32_pcmpestric128((__v16qi)(__m128i)(A), (int)(LA), \ (__v16qi)(__m128i)(B), (int)(LB), \ (int)(M))) /// Uses the immediate operand \a M to perform a comparison of string /// data with explicitly defined lengths that is contained in source operands /// \a A and \a B. Returns bit 0 of the resulting bit mask. /// /// \headerfile /// /// \code /// int _mm_cmpestro(__m128i A, int LA, __m128i B, int LB, const int M); /// \endcode /// /// This intrinsic corresponds to the VPCMPESTRI / PCMPESTRI /// instruction. /// /// \param A /// A 128-bit integer vector containing one of the source operands to be /// compared. /// \param LA /// An integer that specifies the length of the string in \a A. /// \param B /// A 128-bit integer vector containing one of the source operands to be /// compared. /// \param LB /// An integer that specifies the length of the string in \a B. /// \param M /// An 8-bit immediate operand specifying whether the characters are bytes or /// words and the type of comparison to perform. \n /// Bits [1:0]: Determine source data format. \n /// 00: 16 unsigned bytes \n /// 01: 8 unsigned words \n /// 10: 16 signed bytes \n /// 11: 8 signed words \n /// Bits [3:2]: Determine comparison type and aggregation method. \n /// 00: Subset: Each character in \a B is compared for equality with all /// the characters in \a A. \n /// 01: Ranges: Each character in \a B is compared to \a A. The comparison /// basis is greater than or equal for even-indexed elements in \a A, /// and less than or equal for odd-indexed elements in \a A. \n /// 10: Match: Compare each pair of corresponding characters in \a A and /// \a B for equality. \n /// 11: Substring: Search \a B for substring matches of \a A. \n /// Bits [5:4]: Determine whether to perform a one's complement on the bit /// mask of the comparison results. \n /// 00: No effect. \n /// 01: Negate the bit mask. \n /// 10: No effect. \n /// 11: Negate the bit mask only for bits with an index less than or equal /// to the size of \a A or \a B. /// \returns Returns bit 0 of the resulting bit mask. #define _mm_cmpestro(A, LA, B, LB, M) \ ((int)__builtin_ia32_pcmpestrio128((__v16qi)(__m128i)(A), (int)(LA), \ (__v16qi)(__m128i)(B), (int)(LB), \ (int)(M))) /// Uses the immediate operand \a M to perform a comparison of string /// data with explicitly defined lengths that is contained in source operands /// \a A and \a B. Returns 1 if the length of the string in \a A is less than /// the maximum, otherwise, returns 0. /// /// \headerfile /// /// \code /// int _mm_cmpestrs(__m128i A, int LA, __m128i B, int LB, const int M); /// \endcode /// /// This intrinsic corresponds to the VPCMPESTRI / PCMPESTRI /// instruction. /// /// \param A /// A 128-bit integer vector containing one of the source operands to be /// compared. /// \param LA /// An integer that specifies the length of the string in \a A. /// \param B /// A 128-bit integer vector containing one of the source operands to be /// compared. /// \param LB /// An integer that specifies the length of the string in \a B. /// \param M /// An 8-bit immediate operand specifying whether the characters are bytes or /// words and the type of comparison to perform. \n /// Bits [1:0]: Determine source data format. \n /// 00: 16 unsigned bytes \n /// 01: 8 unsigned words \n /// 10: 16 signed bytes \n /// 11: 8 signed words \n /// Bits [3:2]: Determine comparison type and aggregation method. \n /// 00: Subset: Each character in \a B is compared for equality with all /// the characters in \a A. \n /// 01: Ranges: Each character in \a B is compared to \a A. The comparison /// basis is greater than or equal for even-indexed elements in \a A, /// and less than or equal for odd-indexed elements in \a A. \n /// 10: Match: Compare each pair of corresponding characters in \a A and /// \a B for equality. \n /// 11: Substring: Search \a B for substring matches of \a A. \n /// Bits [5:4]: Determine whether to perform a one's complement in the bit /// mask of the comparison results. \n /// 00: No effect. \n /// 01: Negate the bit mask. \n /// 10: No effect. \n /// 11: Negate the bit mask only for bits with an index less than or equal /// to the size of \a A or \a B. \n /// \returns Returns 1 if the length of the string in \a A is less than the /// maximum, otherwise, returns 0. #define _mm_cmpestrs(A, LA, B, LB, M) \ ((int)__builtin_ia32_pcmpestris128((__v16qi)(__m128i)(A), (int)(LA), \ (__v16qi)(__m128i)(B), (int)(LB), \ (int)(M))) /// Uses the immediate operand \a M to perform a comparison of string /// data with explicitly defined lengths that is contained in source operands /// \a A and \a B. Returns 1 if the length of the string in \a B is less than /// the maximum, otherwise, returns 0. /// /// \headerfile /// /// \code /// int _mm_cmpestrz(__m128i A, int LA, __m128i B, int LB, const int M); /// \endcode /// /// This intrinsic corresponds to the VPCMPESTRI instruction. /// /// \param A /// A 128-bit integer vector containing one of the source operands to be /// compared. /// \param LA /// An integer that specifies the length of the string in \a A. /// \param B /// A 128-bit integer vector containing one of the source operands to be /// compared. /// \param LB /// An integer that specifies the length of the string in \a B. /// \param M /// An 8-bit immediate operand specifying whether the characters are bytes or /// words and the type of comparison to perform. \n /// Bits [1:0]: Determine source data format. \n /// 00: 16 unsigned bytes \n /// 01: 8 unsigned words \n /// 10: 16 signed bytes \n /// 11: 8 signed words \n /// Bits [3:2]: Determine comparison type and aggregation method. \n /// 00: Subset: Each character in \a B is compared for equality with all /// the characters in \a A. \n /// 01: Ranges: Each character in \a B is compared to \a A. The comparison /// basis is greater than or equal for even-indexed elements in \a A, /// and less than or equal for odd-indexed elements in \a A. \n /// 10: Match: Compare each pair of corresponding characters in \a A and /// \a B for equality. \n /// 11: Substring: Search \a B for substring matches of \a A. \n /// Bits [5:4]: Determine whether to perform a one's complement on the bit /// mask of the comparison results. \n /// 00: No effect. \n /// 01: Negate the bit mask. \n /// 10: No effect. \n /// 11: Negate the bit mask only for bits with an index less than or equal /// to the size of \a A or \a B. /// \returns Returns 1 if the length of the string in \a B is less than the /// maximum, otherwise, returns 0. #define _mm_cmpestrz(A, LA, B, LB, M) \ ((int)__builtin_ia32_pcmpestriz128((__v16qi)(__m128i)(A), (int)(LA), \ (__v16qi)(__m128i)(B), (int)(LB), \ (int)(M))) /* SSE4.2 Compare Packed Data -- Greater Than. */ /// Compares each of the corresponding 64-bit values of the 128-bit /// integer vectors to determine if the values in the first operand are /// greater than those in the second operand. /// /// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VPCMPGTQ / PCMPGTQ instruction. /// /// \param __V1 /// A 128-bit integer vector. /// \param __V2 /// A 128-bit integer vector. /// \returns A 128-bit integer vector containing the comparison results. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpgt_epi64(__m128i __V1, __m128i __V2) { return (__m128i)((__v2di)__V1 > (__v2di)__V2); } #undef __DEFAULT_FN_ATTRS #include #include #endif /* __SMMINTRIN_H */ ppc_wrappers/emmintrin.hppc_wrappers/smmintrin.hsanitizer/netbsd_syscall_hooks.hrb, (-1)=VName rule is missing its pattern.Invalid encoding in string.Unknown error.endpos: SearchNFA inconsistencyexternal/regex-re2/re2/walker-inl.h[:^digit:][:graph:](%d,%d)job_.size() = invalid perl operator((?:)\x{%x}GeorgianHangulLisuLtOgham%.*s/sys/devices/system/cpu/cpu0/tsc_freq_khzarena != nullptrunsigned charor>>lSAn OK status is not a valid constructor argument to StatusOr\"\'external/abseil-cpp/absl/strings/escaping.ccExtn, len = clock_gettime() failednsnot availablebignum routinesBIO routinesDigest functionsFILE pointerexternal/boringssl/src/crypto/evp/p_ed25519_asn1.ckythe.proto.KzipInfo.absolute_pathsCall set methods to initialize MapKey.kythe.proto.common.CorpusPath.path)$inputs.size() /// /// This intrinsic corresponds to the \c ADC instruction. /// /// \param __cf /// The 8-bit unsigned carry flag; any non-zero value indicates carry. /// \param __x /// A 32-bit unsigned addend. /// \param __y /// A 32-bit unsigned addend. /// \param __p /// Pointer to memory for storing the sum. /// \returns The 8-bit unsigned carry-out value. __INLINE unsigned char __DEFAULT_FN_ATTRS _addcarry_u32(unsigned char __cf, unsigned int __x, unsigned int __y, unsigned int *__p) { return __builtin_ia32_addcarryx_u32(__cf, __x, __y, __p); } /// Adds unsigned 32-bit integer \a __y to 0 or 1 as indicated by the carry /// flag \a __cf, and subtracts the result from unsigned 32-bit integer /// \a __x. Stores the unsigned 32-bit difference in the memory at \a __p, /// and returns the 8-bit carry-out (carry or overflow flag). /// /// \code{.operation} /// temp := (__cf == 0) ? 0 : 1 /// Store32(__p, __x - (__y + temp)) /// result := CF /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c SBB instruction. /// /// \param __cf /// The 8-bit unsigned carry flag; any non-zero value indicates carry. /// \param __x /// The 32-bit unsigned minuend. /// \param __y /// The 32-bit unsigned subtrahend. /// \param __p /// Pointer to memory for storing the difference. /// \returns The 8-bit unsigned carry-out value. __INLINE unsigned char __DEFAULT_FN_ATTRS _subborrow_u32(unsigned char __cf, unsigned int __x, unsigned int __y, unsigned int *__p) { return __builtin_ia32_subborrow_u32(__cf, __x, __y, __p); } #ifdef __x86_64__ /// Adds unsigned 64-bit integers \a __x and \a __y, plus 0 or 1 as indicated /// by the carry flag \a __cf. Stores the unsigned 64-bit sum in the memory /// at \a __p, and returns the 8-bit carry-out (carry flag). /// /// \code{.operation} /// temp := (__cf == 0) ? 0 : 1 /// Store64(__p, __x + __y + temp) /// result := CF /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c ADC instruction. /// /// \param __cf /// The 8-bit unsigned carry flag; any non-zero value indicates carry. /// \param __x /// A 64-bit unsigned addend. /// \param __y /// A 64-bit unsigned addend. /// \param __p /// Pointer to memory for storing the sum. /// \returns The 8-bit unsigned carry-out value. __INLINE unsigned char __DEFAULT_FN_ATTRS _addcarry_u64(unsigned char __cf, unsigned long long __x, unsigned long long __y, unsigned long long *__p) { return __builtin_ia32_addcarryx_u64(__cf, __x, __y, __p); } /// Adds unsigned 64-bit integer \a __y to 0 or 1 as indicated by the carry /// flag \a __cf, and subtracts the result from unsigned 64-bit integer /// \a __x. Stores the unsigned 64-bit difference in the memory at \a __p, /// and returns the 8-bit carry-out (carry or overflow flag). /// /// \code{.operation} /// temp := (__cf == 0) ? 0 : 1 /// Store64(__p, __x - (__y + temp)) /// result := CF /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c ADC instruction. /// /// \param __cf /// The 8-bit unsigned carry flag; any non-zero value indicates carry. /// \param __x /// The 64-bit unsigned minuend. /// \param __y /// The 64-bit unsigned subtrahend. /// \param __p /// Pointer to memory for storing the difference. /// \returns The 8-bit unsigned carry-out value. __INLINE unsigned char __DEFAULT_FN_ATTRS _subborrow_u64(unsigned char __cf, unsigned long long __x, unsigned long long __y, unsigned long long *__p) { return __builtin_ia32_subborrow_u64(__cf, __x, __y, __p); } #endif #if defined(__cplusplus) } #endif #undef __INLINE #undef __DEFAULT_FN_ATTRS #endif /* __ADCINTRIN_H */ /*===---- arm_sve.h - ARM SVE intrinsics -----------------------------------=== * * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __ARM_SVE_H #define __ARM_SVE_H #if !defined(__LITTLE_ENDIAN__) #error "Big endian is currently not supported for arm_sve.h" #endif #include #ifdef __cplusplus extern "C" { #else #include #endif typedef __fp16 float16_t; typedef float float32_t; typedef double float64_t; typedef __SVInt8_t svint8_t; typedef __SVInt16_t svint16_t; typedef __SVInt32_t svint32_t; typedef __SVInt64_t svint64_t; typedef __SVUint8_t svuint8_t; typedef __SVUint16_t svuint16_t; typedef __SVUint32_t svuint32_t; typedef __SVUint64_t svuint64_t; typedef __SVFloat16_t svfloat16_t; typedef __SVBfloat16_t svbfloat16_t; #include #include typedef __SVFloat32_t svfloat32_t; typedef __SVFloat64_t svfloat64_t; typedef __clang_svint8x2_t svint8x2_t; typedef __clang_svint16x2_t svint16x2_t; typedef __clang_svint32x2_t svint32x2_t; typedef __clang_svint64x2_t svint64x2_t; typedef __clang_svuint8x2_t svuint8x2_t; typedef __clang_svuint16x2_t svuint16x2_t; typedef __clang_svuint32x2_t svuint32x2_t; typedef __clang_svuint64x2_t svuint64x2_t; typedef __clang_svfloat16x2_t svfloat16x2_t; typedef __clang_svfloat32x2_t svfloat32x2_t; typedef __clang_svfloat64x2_t svfloat64x2_t; typedef __clang_svint8x3_t svint8x3_t; typedef __clang_svint16x3_t svint16x3_t; typedef __clang_svint32x3_t svint32x3_t; typedef __clang_svint64x3_t svint64x3_t; typedef __clang_svuint8x3_t svuint8x3_t; typedef __clang_svuint16x3_t svuint16x3_t; typedef __clang_svuint32x3_t svuint32x3_t; typedef __clang_svuint64x3_t svuint64x3_t; typedef __clang_svfloat16x3_t svfloat16x3_t; typedef __clang_svfloat32x3_t svfloat32x3_t; typedef __clang_svfloat64x3_t svfloat64x3_t; typedef __clang_svint8x4_t svint8x4_t; typedef __clang_svint16x4_t svint16x4_t; typedef __clang_svint32x4_t svint32x4_t; typedef __clang_svint64x4_t svint64x4_t; typedef __clang_svuint8x4_t svuint8x4_t; typedef __clang_svuint16x4_t svuint16x4_t; typedef __clang_svuint32x4_t svuint32x4_t; typedef __clang_svuint64x4_t svuint64x4_t; typedef __clang_svfloat16x4_t svfloat16x4_t; typedef __clang_svfloat32x4_t svfloat32x4_t; typedef __clang_svfloat64x4_t svfloat64x4_t; typedef __SVBool_t svbool_t; typedef __clang_svboolx2_t svboolx2_t; typedef __clang_svboolx4_t svboolx4_t; typedef __clang_svbfloat16x2_t svbfloat16x2_t; typedef __clang_svbfloat16x3_t svbfloat16x3_t; typedef __clang_svbfloat16x4_t svbfloat16x4_t; typedef __SVCount_t svcount_t; enum svpattern { SV_POW2 = 0, SV_VL1 = 1, SV_VL2 = 2, SV_VL3 = 3, SV_VL4 = 4, SV_VL5 = 5, SV_VL6 = 6, SV_VL7 = 7, SV_VL8 = 8, SV_VL16 = 9, SV_VL32 = 10, SV_VL64 = 11, SV_VL128 = 12, SV_VL256 = 13, SV_MUL4 = 29, SV_MUL3 = 30, SV_ALL = 31 }; enum svprfop { SV_PLDL1KEEP = 0, SV_PLDL1STRM = 1, SV_PLDL2KEEP = 2, SV_PLDL2STRM = 3, SV_PLDL3KEEP = 4, SV_PLDL3STRM = 5, SV_PSTL1KEEP = 8, SV_PSTL1STRM = 9, SV_PSTL2KEEP = 10, SV_PSTL2STRM = 11, SV_PSTL3KEEP = 12, SV_PSTL3STRM = 13 }; /* Function attributes */ #define __ai static __inline__ __attribute__((__always_inline__, __nodebug__)) #define __aio static __inline__ __attribute__((__always_inline__, __nodebug__, __overloadable__)) #define svreinterpret_s8_s8(...) __builtin_sve_reinterpret_s8_s8(__VA_ARGS__) #define svreinterpret_s8_u8(...) __builtin_sve_reinterpret_s8_u8(__VA_ARGS__) #define svreinterpret_s8_s16(...) __builtin_sve_reinterpret_s8_s16(__VA_ARGS__) #define svreinterpret_s8_u16(...) __builtin_sve_reinterpret_s8_u16(__VA_ARGS__) #define svreinterpret_s8_s32(...) __builtin_sve_reinterpret_s8_s32(__VA_ARGS__) #define svreinterpret_s8_u32(...) __builtin_sve_reinterpret_s8_u32(__VA_ARGS__) #define svreinterpret_s8_s64(...) __builtin_sve_reinterpret_s8_s64(__VA_ARGS__) #define svreinterpret_s8_u64(...) __builtin_sve_reinterpret_s8_u64(__VA_ARGS__) #define svreinterpret_s8_f16(...) __builtin_sve_reinterpret_s8_f16(__VA_ARGS__) #define svreinterpret_s8_bf16(...) __builtin_sve_reinterpret_s8_bf16(__VA_ARGS__) #define svreinterpret_s8_f32(...) __builtin_sve_reinterpret_s8_f32(__VA_ARGS__) #define svreinterpret_s8_f64(...) __builtin_sve_reinterpret_s8_f64(__VA_ARGS__) #define svreinterpret_u8_s8(...) __builtin_sve_reinterpret_u8_s8(__VA_ARGS__) #define svreinterpret_u8_u8(...) __builtin_sve_reinterpret_u8_u8(__VA_ARGS__) #define svreinterpret_u8_s16(...) __builtin_sve_reinterpret_u8_s16(__VA_ARGS__) #define svreinterpret_u8_u16(...) __builtin_sve_reinterpret_u8_u16(__VA_ARGS__) #define svreinterpret_u8_s32(...) __builtin_sve_reinterpret_u8_s32(__VA_ARGS__) #define svreinterpret_u8_u32(...) __builtin_sve_reinterpret_u8_u32(__VA_ARGS__) #define svreinterpret_u8_s64(...) __builtin_sve_reinterpret_u8_s64(__VA_ARGS__) #define svreinterpret_u8_u64(...) __builtin_sve_reinterpret_u8_u64(__VA_ARGS__) #define svreinterpret_u8_f16(...) __builtin_sve_reinterpret_u8_f16(__VA_ARGS__) #define svreinterpret_u8_bf16(...) __builtin_sve_reinterpret_u8_bf16(__VA_ARGS__) #define svreinterpret_u8_f32(...) __builtin_sve_reinterpret_u8_f32(__VA_ARGS__) #define svreinterpret_u8_f64(...) __builtin_sve_reinterpret_u8_f64(__VA_ARGS__) #define svreinterpret_s16_s8(...) __builtin_sve_reinterpret_s16_s8(__VA_ARGS__) #define svreinterpret_s16_u8(...) __builtin_sve_reinterpret_s16_u8(__VA_ARGS__) #define svreinterpret_s16_s16(...) __builtin_sve_reinterpret_s16_s16(__VA_ARGS__) #define svreinterpret_s16_u16(...) __builtin_sve_reinterpret_s16_u16(__VA_ARGS__) #define svreinterpret_s16_s32(...) __builtin_sve_reinterpret_s16_s32(__VA_ARGS__) #define svreinterpret_s16_u32(...) __builtin_sve_reinterpret_s16_u32(__VA_ARGS__) #define svreinterpret_s16_s64(...) __builtin_sve_reinterpret_s16_s64(__VA_ARGS__) #define svreinterpret_s16_u64(...) __builtin_sve_reinterpret_s16_u64(__VA_ARGS__) #define svreinterpret_s16_f16(...) __builtin_sve_reinterpret_s16_f16(__VA_ARGS__) #define svreinterpret_s16_bf16(...) __builtin_sve_reinterpret_s16_bf16(__VA_ARGS__) #define svreinterpret_s16_f32(...) __builtin_sve_reinterpret_s16_f32(__VA_ARGS__) #define svreinterpret_s16_f64(...) __builtin_sve_reinterpret_s16_f64(__VA_ARGS__) #define svreinterpret_u16_s8(...) __builtin_sve_reinterpret_u16_s8(__VA_ARGS__) #define svreinterpret_u16_u8(...) __builtin_sve_reinterpret_u16_u8(__VA_ARGS__) #define svreinterpret_u16_s16(...) __builtin_sve_reinterpret_u16_s16(__VA_ARGS__) #define svreinterpret_u16_u16(...) __builtin_sve_reinterpret_u16_u16(__VA_ARGS__) #define svreinterpret_u16_s32(...) __builtin_sve_reinterpret_u16_s32(__VA_ARGS__) #define svreinterpret_u16_u32(...) __builtin_sve_reinterpret_u16_u32(__VA_ARGS__) #define svreinterpret_u16_s64(...) __builtin_sve_reinterpret_u16_s64(__VA_ARGS__) #define svreinterpret_u16_u64(...) __builtin_sve_reinterpret_u16_u64(__VA_ARGS__) #define svreinterpret_u16_f16(...) __builtin_sve_reinterpret_u16_f16(__VA_ARGS__) #define svreinterpret_u16_bf16(...) __builtin_sve_reinterpret_u16_bf16(__VA_ARGS__) #define svreinterpret_u16_f32(...) __builtin_sve_reinterpret_u16_f32(__VA_ARGS__) #define svreinterpret_u16_f64(...) __builtin_sve_reinterpret_u16_f64(__VA_ARGS__) #define svreinterpret_s32_s8(...) __builtin_sve_reinterpret_s32_s8(__VA_ARGS__) #define svreinterpret_s32_u8(...) __builtin_sve_reinterpret_s32_u8(__VA_ARGS__) #define svreinterpret_s32_s16(...) __builtin_sve_reinterpret_s32_s16(__VA_ARGS__) #define svreinterpret_s32_u16(...) __builtin_sve_reinterpret_s32_u16(__VA_ARGS__) #define svreinterpret_s32_s32(...) __builtin_sve_reinterpret_s32_s32(__VA_ARGS__) #define svreinterpret_s32_u32(...) __builtin_sve_reinterpret_s32_u32(__VA_ARGS__) #define svreinterpret_s32_s64(...) __builtin_sve_reinterpret_s32_s64(__VA_ARGS__) #define svreinterpret_s32_u64(...) __builtin_sve_reinterpret_s32_u64(__VA_ARGS__) #define svreinterpret_s32_f16(...) __builtin_sve_reinterpret_s32_f16(__VA_ARGS__) #define svreinterpret_s32_bf16(...) __builtin_sve_reinterpret_s32_bf16(__VA_ARGS__) #define svreinterpret_s32_f32(...) __builtin_sve_reinterpret_s32_f32(__VA_ARGS__) #define svreinterpret_s32_f64(...) __builtin_sve_reinterpret_s32_f64(__VA_ARGS__) #define svreinterpret_u32_s8(...) __builtin_sve_reinterpret_u32_s8(__VA_ARGS__) #define svreinterpret_u32_u8(...) __builtin_sve_reinterpret_u32_u8(__VA_ARGS__) #define svreinterpret_u32_s16(...) __builtin_sve_reinterpret_u32_s16(__VA_ARGS__) #define svreinterpret_u32_u16(...) __builtin_sve_reinterpret_u32_u16(__VA_ARGS__) #define svreinterpret_u32_s32(...) __builtin_sve_reinterpret_u32_s32(__VA_ARGS__) #define svreinterpret_u32_u32(...) __builtin_sve_reinterpret_u32_u32(__VA_ARGS__) #define svreinterpret_u32_s64(...) __builtin_sve_reinterpret_u32_s64(__VA_ARGS__) #define svreinterpret_u32_u64(...) __builtin_sve_reinterpret_u32_u64(__VA_ARGS__) #define svreinterpret_u32_f16(...) __builtin_sve_reinterpret_u32_f16(__VA_ARGS__) #define svreinterpret_u32_bf16(...) __builtin_sve_reinterpret_u32_bf16(__VA_ARGS__) #define svreinterpret_u32_f32(...) __builtin_sve_reinterpret_u32_f32(__VA_ARGS__) #define svreinterpret_u32_f64(...) __builtin_sve_reinterpret_u32_f64(__VA_ARGS__) #define svreinterpret_s64_s8(...) __builtin_sve_reinterpret_s64_s8(__VA_ARGS__) #define svreinterpret_s64_u8(...) __builtin_sve_reinterpret_s64_u8(__VA_ARGS__) #define svreinterpret_s64_s16(...) __builtin_sve_reinterpret_s64_s16(__VA_ARGS__) #define svreinterpret_s64_u16(...) __builtin_sve_reinterpret_s64_u16(__VA_ARGS__) #define svreinterpret_s64_s32(...) __builtin_sve_reinterpret_s64_s32(__VA_ARGS__) #define svreinterpret_s64_u32(...) __builtin_sve_reinterpret_s64_u32(__VA_ARGS__) #define svreinterpret_s64_s64(...) __builtin_sve_reinterpret_s64_s64(__VA_ARGS__) #define svreinterpret_s64_u64(...) __builtin_sve_reinterpret_s64_u64(__VA_ARGS__) #define svreinterpret_s64_f16(...) __builtin_sve_reinterpret_s64_f16(__VA_ARGS__) #define svreinterpret_s64_bf16(...) __builtin_sve_reinterpret_s64_bf16(__VA_ARGS__) #define svreinterpret_s64_f32(...) __builtin_sve_reinterpret_s64_f32(__VA_ARGS__) #define svreinterpret_s64_f64(...) __builtin_sve_reinterpret_s64_f64(__VA_ARGS__) #define svreinterpret_u64_s8(...) __builtin_sve_reinterpret_u64_s8(__VA_ARGS__) #define svreinterpret_u64_u8(...) __builtin_sve_reinterpret_u64_u8(__VA_ARGS__) #define svreinterpret_u64_s16(...) __builtin_sve_reinterpret_u64_s16(__VA_ARGS__) #define svreinterpret_u64_u16(...) __builtin_sve_reinterpret_u64_u16(__VA_ARGS__) #define svreinterpret_u64_s32(...) __builtin_sve_reinterpret_u64_s32(__VA_ARGS__) #define svreinterpret_u64_u32(...) __builtin_sve_reinterpret_u64_u32(__VA_ARGS__) #define svreinterpret_u64_s64(...) __builtin_sve_reinterpret_u64_s64(__VA_ARGS__) #define svreinterpret_u64_u64(...) __builtin_sve_reinterpret_u64_u64(__VA_ARGS__) #define svreinterpret_u64_f16(...) __builtin_sve_reinterpret_u64_f16(__VA_ARGS__) #define svreinterpret_u64_bf16(...) __builtin_sve_reinterpret_u64_bf16(__VA_ARGS__) #define svreinterpret_u64_f32(...) __builtin_sve_reinterpret_u64_f32(__VA_ARGS__) #define svreinterpret_u64_f64(...) __builtin_sve_reinterpret_u64_f64(__VA_ARGS__) #define svreinterpret_f16_s8(...) __builtin_sve_reinterpret_f16_s8(__VA_ARGS__) #define svreinterpret_f16_u8(...) __builtin_sve_reinterpret_f16_u8(__VA_ARGS__) #define svreinterpret_f16_s16(...) __builtin_sve_reinterpret_f16_s16(__VA_ARGS__) #define svreinterpret_f16_u16(...) __builtin_sve_reinterpret_f16_u16(__VA_ARGS__) #define svreinterpret_f16_s32(...) __builtin_sve_reinterpret_f16_s32(__VA_ARGS__) #define svreinterpret_f16_u32(...) __builtin_sve_reinterpret_f16_u32(__VA_ARGS__) #define svreinterpret_f16_s64(...) __builtin_sve_reinterpret_f16_s64(__VA_ARGS__) #define svreinterpret_f16_u64(...) __builtin_sve_reinterpret_f16_u64(__VA_ARGS__) #define svreinterpret_f16_f16(...) __builtin_sve_reinterpret_f16_f16(__VA_ARGS__) #define svreinterpret_f16_bf16(...) __builtin_sve_reinterpret_f16_bf16(__VA_ARGS__) #define svreinterpret_f16_f32(...) __builtin_sve_reinterpret_f16_f32(__VA_ARGS__) #define svreinterpret_f16_f64(...) __builtin_sve_reinterpret_f16_f64(__VA_ARGS__) #define svreinterpret_bf16_s8(...) __builtin_sve_reinterpret_bf16_s8(__VA_ARGS__) #define svreinterpret_bf16_u8(...) __builtin_sve_reinterpret_bf16_u8(__VA_ARGS__) #define svreinterpret_bf16_s16(...) __builtin_sve_reinterpret_bf16_s16(__VA_ARGS__) #define svreinterpret_bf16_u16(...) __builtin_sve_reinterpret_bf16_u16(__VA_ARGS__) #define svreinterpret_bf16_s32(...) __builtin_sve_reinterpret_bf16_s32(__VA_ARGS__) #define svreinterpret_bf16_u32(...) __builtin_sve_reinterpret_bf16_u32(__VA_ARGS__) #define svreinterpret_bf16_s64(...) __builtin_sve_reinterpret_bf16_s64(__VA_ARGS__) #define svreinterpret_bf16_u64(...) __builtin_sve_reinterpret_bf16_u64(__VA_ARGS__) #define svreinterpret_bf16_f16(...) __builtin_sve_reinterpret_bf16_f16(__VA_ARGS__) #define svreinterpret_bf16_bf16(...) __builtin_sve_reinterpret_bf16_bf16(__VA_ARGS__) #define svreinterpret_bf16_f32(...) __builtin_sve_reinterpret_bf16_f32(__VA_ARGS__) #define svreinterpret_bf16_f64(...) __builtin_sve_reinterpret_bf16_f64(__VA_ARGS__) #define svreinterpret_f32_s8(...) __builtin_sve_reinterpret_f32_s8(__VA_ARGS__) #define svreinterpret_f32_u8(...) __builtin_sve_reinterpret_f32_u8(__VA_ARGS__) #define svreinterpret_f32_s16(...) __builtin_sve_reinterpret_f32_s16(__VA_ARGS__) #define svreinterpret_f32_u16(...) __builtin_sve_reinterpret_f32_u16(__VA_ARGS__) #define svreinterpret_f32_s32(...) __builtin_sve_reinterpret_f32_s32(__VA_ARGS__) #define svreinterpret_f32_u32(...) __builtin_sve_reinterpret_f32_u32(__VA_ARGS__) #define svreinterpret_f32_s64(...) __builtin_sve_reinterpret_f32_s64(__VA_ARGS__) #define svreinterpret_f32_u64(...) __builtin_sve_reinterpret_f32_u64(__VA_ARGS__) #define svreinterpret_f32_f16(...) __builtin_sve_reinterpret_f32_f16(__VA_ARGS__) #define svreinterpret_f32_bf16(...) __builtin_sve_reinterpret_f32_bf16(__VA_ARGS__) #define svreinterpret_f32_f32(...) __builtin_sve_reinterpret_f32_f32(__VA_ARGS__) #define svreinterpret_f32_f64(...) __builtin_sve_reinterpret_f32_f64(__VA_ARGS__) #define svreinterpret_f64_s8(...) __builtin_sve_reinterpret_f64_s8(__VA_ARGS__) #define svreinterpret_f64_u8(...) __builtin_sve_reinterpret_f64_u8(__VA_ARGS__) #define svreinterpret_f64_s16(...) __builtin_sve_reinterpret_f64_s16(__VA_ARGS__) #define svreinterpret_f64_u16(...) __builtin_sve_reinterpret_f64_u16(__VA_ARGS__) #define svreinterpret_f64_s32(...) __builtin_sve_reinterpret_f64_s32(__VA_ARGS__) #define svreinterpret_f64_u32(...) __builtin_sve_reinterpret_f64_u32(__VA_ARGS__) #define svreinterpret_f64_s64(...) __builtin_sve_reinterpret_f64_s64(__VA_ARGS__) #define svreinterpret_f64_u64(...) __builtin_sve_reinterpret_f64_u64(__VA_ARGS__) #define svreinterpret_f64_f16(...) __builtin_sve_reinterpret_f64_f16(__VA_ARGS__) #define svreinterpret_f64_bf16(...) __builtin_sve_reinterpret_f64_bf16(__VA_ARGS__) #define svreinterpret_f64_f32(...) __builtin_sve_reinterpret_f64_f32(__VA_ARGS__) #define svreinterpret_f64_f64(...) __builtin_sve_reinterpret_f64_f64(__VA_ARGS__) __aio __attribute__((target("sve"))) svint8_t svreinterpret_s8(svint8_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s8_s8(op); } __aio __attribute__((target("sve"))) svint8_t svreinterpret_s8(svuint8_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s8_u8(op); } __aio __attribute__((target("sve"))) svint8_t svreinterpret_s8(svint16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s8_s16(op); } __aio __attribute__((target("sve"))) svint8_t svreinterpret_s8(svuint16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s8_u16(op); } __aio __attribute__((target("sve"))) svint8_t svreinterpret_s8(svint32_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s8_s32(op); } __aio __attribute__((target("sve"))) svint8_t svreinterpret_s8(svuint32_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s8_u32(op); } __aio __attribute__((target("sve"))) svint8_t svreinterpret_s8(svint64_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s8_s64(op); } __aio __attribute__((target("sve"))) svint8_t svreinterpret_s8(svuint64_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s8_u64(op); } __aio __attribute__((target("sve"))) svint8_t svreinterpret_s8(svfloat16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s8_f16(op); } __aio __attribute__((target("sve"))) svint8_t svreinterpret_s8(svbfloat16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s8_bf16(op); } __aio __attribute__((target("sve"))) svint8_t svreinterpret_s8(svfloat32_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s8_f32(op); } __aio __attribute__((target("sve"))) svint8_t svreinterpret_s8(svfloat64_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s8_f64(op); } __aio __attribute__((target("sve"))) svuint8_t svreinterpret_u8(svint8_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u8_s8(op); } __aio __attribute__((target("sve"))) svuint8_t svreinterpret_u8(svuint8_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u8_u8(op); } __aio __attribute__((target("sve"))) svuint8_t svreinterpret_u8(svint16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u8_s16(op); } __aio __attribute__((target("sve"))) svuint8_t svreinterpret_u8(svuint16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u8_u16(op); } __aio __attribute__((target("sve"))) svuint8_t svreinterpret_u8(svint32_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u8_s32(op); } __aio __attribute__((target("sve"))) svuint8_t svreinterpret_u8(svuint32_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u8_u32(op); } __aio __attribute__((target("sve"))) svuint8_t svreinterpret_u8(svint64_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u8_s64(op); } __aio __attribute__((target("sve"))) svuint8_t svreinterpret_u8(svuint64_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u8_u64(op); } __aio __attribute__((target("sve"))) svuint8_t svreinterpret_u8(svfloat16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u8_f16(op); } __aio __attribute__((target("sve"))) svuint8_t svreinterpret_u8(svbfloat16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u8_bf16(op); } __aio __attribute__((target("sve"))) svuint8_t svreinterpret_u8(svfloat32_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u8_f32(op); } __aio __attribute__((target("sve"))) svuint8_t svreinterpret_u8(svfloat64_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u8_f64(op); } __aio __attribute__((target("sve"))) svint16_t svreinterpret_s16(svint8_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s16_s8(op); } __aio __attribute__((target("sve"))) svint16_t svreinterpret_s16(svuint8_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s16_u8(op); } __aio __attribute__((target("sve"))) svint16_t svreinterpret_s16(svint16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s16_s16(op); } __aio __attribute__((target("sve"))) svint16_t svreinterpret_s16(svuint16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s16_u16(op); } __aio __attribute__((target("sve"))) svint16_t svreinterpret_s16(svint32_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s16_s32(op); } __aio __attribute__((target("sve"))) svint16_t svreinterpret_s16(svuint32_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s16_u32(op); } __aio __attribute__((target("sve"))) svint16_t svreinterpret_s16(svint64_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s16_s64(op); } __aio __attribute__((target("sve"))) svint16_t svreinterpret_s16(svuint64_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s16_u64(op); } __aio __attribute__((target("sve"))) svint16_t svreinterpret_s16(svfloat16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s16_f16(op); } __aio __attribute__((target("sve"))) svint16_t svreinterpret_s16(svbfloat16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s16_bf16(op); } __aio __attribute__((target("sve"))) svint16_t svreinterpret_s16(svfloat32_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s16_f32(op); } __aio __attribute__((target("sve"))) svint16_t svreinterpret_s16(svfloat64_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s16_f64(op); } __aio __attribute__((target("sve"))) svuint16_t svreinterpret_u16(svint8_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u16_s8(op); } __aio __attribute__((target("sve"))) svuint16_t svreinterpret_u16(svuint8_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u16_u8(op); } __aio __attribute__((target("sve"))) svuint16_t svreinterpret_u16(svint16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u16_s16(op); } __aio __attribute__((target("sve"))) svuint16_t svreinterpret_u16(svuint16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u16_u16(op); } __aio __attribute__((target("sve"))) svuint16_t svreinterpret_u16(svint32_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u16_s32(op); } __aio __attribute__((target("sve"))) svuint16_t svreinterpret_u16(svuint32_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u16_u32(op); } __aio __attribute__((target("sve"))) svuint16_t svreinterpret_u16(svint64_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u16_s64(op); } __aio __attribute__((target("sve"))) svuint16_t svreinterpret_u16(svuint64_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u16_u64(op); } __aio __attribute__((target("sve"))) svuint16_t svreinterpret_u16(svfloat16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u16_f16(op); } __aio __attribute__((target("sve"))) svuint16_t svreinterpret_u16(svbfloat16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u16_bf16(op); } __aio __attribute__((target("sve"))) svuint16_t svreinterpret_u16(svfloat32_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u16_f32(op); } __aio __attribute__((target("sve"))) svuint16_t svreinterpret_u16(svfloat64_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u16_f64(op); } __aio __attribute__((target("sve"))) svint32_t svreinterpret_s32(svint8_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s32_s8(op); } __aio __attribute__((target("sve"))) svint32_t svreinterpret_s32(svuint8_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s32_u8(op); } __aio __attribute__((target("sve"))) svint32_t svreinterpret_s32(svint16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s32_s16(op); } __aio __attribute__((target("sve"))) svint32_t svreinterpret_s32(svuint16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s32_u16(op); } __aio __attribute__((target("sve"))) svint32_t svreinterpret_s32(svint32_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s32_s32(op); } __aio __attribute__((target("sve"))) svint32_t svreinterpret_s32(svuint32_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s32_u32(op); } __aio __attribute__((target("sve"))) svint32_t svreinterpret_s32(svint64_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s32_s64(op); } __aio __attribute__((target("sve"))) svint32_t svreinterpret_s32(svuint64_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s32_u64(op); } __aio __attribute__((target("sve"))) svint32_t svreinterpret_s32(svfloat16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s32_f16(op); } __aio __attribute__((target("sve"))) svint32_t svreinterpret_s32(svbfloat16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s32_bf16(op); } __aio __attribute__((target("sve"))) svint32_t svreinterpret_s32(svfloat32_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s32_f32(op); } __aio __attribute__((target("sve"))) svint32_t svreinterpret_s32(svfloat64_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s32_f64(op); } __aio __attribute__((target("sve"))) svuint32_t svreinterpret_u32(svint8_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u32_s8(op); } __aio __attribute__((target("sve"))) svuint32_t svreinterpret_u32(svuint8_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u32_u8(op); } __aio __attribute__((target("sve"))) svuint32_t svreinterpret_u32(svint16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u32_s16(op); } __aio __attribute__((target("sve"))) svuint32_t svreinterpret_u32(svuint16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u32_u16(op); } __aio __attribute__((target("sve"))) svuint32_t svreinterpret_u32(svint32_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u32_s32(op); } __aio __attribute__((target("sve"))) svuint32_t svreinterpret_u32(svuint32_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u32_u32(op); } __aio __attribute__((target("sve"))) svuint32_t svreinterpret_u32(svint64_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u32_s64(op); } __aio __attribute__((target("sve"))) svuint32_t svreinterpret_u32(svuint64_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u32_u64(op); } __aio __attribute__((target("sve"))) svuint32_t svreinterpret_u32(svfloat16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u32_f16(op); } __aio __attribute__((target("sve"))) svuint32_t svreinterpret_u32(svbfloat16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u32_bf16(op); } __aio __attribute__((target("sve"))) svuint32_t svreinterpret_u32(svfloat32_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u32_f32(op); } __aio __attribute__((target("sve"))) svuint32_t svreinterpret_u32(svfloat64_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u32_f64(op); } __aio __attribute__((target("sve"))) svint64_t svreinterpret_s64(svint8_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s64_s8(op); } __aio __attribute__((target("sve"))) svint64_t svreinterpret_s64(svuint8_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s64_u8(op); } __aio __attribute__((target("sve"))) svint64_t svreinterpret_s64(svint16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s64_s16(op); } __aio __attribute__((target("sve"))) svint64_t svreinterpret_s64(svuint16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s64_u16(op); } __aio __attribute__((target("sve"))) svint64_t svreinterpret_s64(svint32_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s64_s32(op); } __aio __attribute__((target("sve"))) svint64_t svreinterpret_s64(svuint32_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s64_u32(op); } __aio __attribute__((target("sve"))) svint64_t svreinterpret_s64(svint64_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s64_s64(op); } __aio __attribute__((target("sve"))) svint64_t svreinterpret_s64(svuint64_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s64_u64(op); } __aio __attribute__((target("sve"))) svint64_t svreinterpret_s64(svfloat16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s64_f16(op); } __aio __attribute__((target("sve"))) svint64_t svreinterpret_s64(svbfloat16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s64_bf16(op); } __aio __attribute__((target("sve"))) svint64_t svreinterpret_s64(svfloat32_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s64_f32(op); } __aio __attribute__((target("sve"))) svint64_t svreinterpret_s64(svfloat64_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s64_f64(op); } __aio __attribute__((target("sve"))) svuint64_t svreinterpret_u64(svint8_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u64_s8(op); } __aio __attribute__((target("sve"))) svuint64_t svreinterpret_u64(svuint8_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u64_u8(op); } __aio __attribute__((target("sve"))) svuint64_t svreinterpret_u64(svint16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u64_s16(op); } __aio __attribute__((target("sve"))) svuint64_t svreinterpret_u64(svuint16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u64_u16(op); } __aio __attribute__((target("sve"))) svuint64_t svreinterpret_u64(svint32_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u64_s32(op); } __aio __attribute__((target("sve"))) svuint64_t svreinterpret_u64(svuint32_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u64_u32(op); } __aio __attribute__((target("sve"))) svuint64_t svreinterpret_u64(svint64_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u64_s64(op); } __aio __attribute__((target("sve"))) svuint64_t svreinterpret_u64(svuint64_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u64_u64(op); } __aio __attribute__((target("sve"))) svuint64_t svreinterpret_u64(svfloat16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u64_f16(op); } __aio __attribute__((target("sve"))) svuint64_t svreinterpret_u64(svbfloat16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u64_bf16(op); } __aio __attribute__((target("sve"))) svuint64_t svreinterpret_u64(svfloat32_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u64_f32(op); } __aio __attribute__((target("sve"))) svuint64_t svreinterpret_u64(svfloat64_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u64_f64(op); } __aio __attribute__((target("sve"))) svfloat16_t svreinterpret_f16(svint8_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f16_s8(op); } __aio __attribute__((target("sve"))) svfloat16_t svreinterpret_f16(svuint8_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f16_u8(op); } __aio __attribute__((target("sve"))) svfloat16_t svreinterpret_f16(svint16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f16_s16(op); } __aio __attribute__((target("sve"))) svfloat16_t svreinterpret_f16(svuint16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f16_u16(op); } __aio __attribute__((target("sve"))) svfloat16_t svreinterpret_f16(svint32_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f16_s32(op); } __aio __attribute__((target("sve"))) svfloat16_t svreinterpret_f16(svuint32_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f16_u32(op); } __aio __attribute__((target("sve"))) svfloat16_t svreinterpret_f16(svint64_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f16_s64(op); } __aio __attribute__((target("sve"))) svfloat16_t svreinterpret_f16(svuint64_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f16_u64(op); } __aio __attribute__((target("sve"))) svfloat16_t svreinterpret_f16(svfloat16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f16_f16(op); } __aio __attribute__((target("sve"))) svfloat16_t svreinterpret_f16(svbfloat16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f16_bf16(op); } __aio __attribute__((target("sve"))) svfloat16_t svreinterpret_f16(svfloat32_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f16_f32(op); } __aio __attribute__((target("sve"))) svfloat16_t svreinterpret_f16(svfloat64_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f16_f64(op); } __aio __attribute__((target("sve"))) svbfloat16_t svreinterpret_bf16(svint8_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_bf16_s8(op); } __aio __attribute__((target("sve"))) svbfloat16_t svreinterpret_bf16(svuint8_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_bf16_u8(op); } __aio __attribute__((target("sve"))) svbfloat16_t svreinterpret_bf16(svint16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_bf16_s16(op); } __aio __attribute__((target("sve"))) svbfloat16_t svreinterpret_bf16(svuint16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_bf16_u16(op); } __aio __attribute__((target("sve"))) svbfloat16_t svreinterpret_bf16(svint32_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_bf16_s32(op); } __aio __attribute__((target("sve"))) svbfloat16_t svreinterpret_bf16(svuint32_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_bf16_u32(op); } __aio __attribute__((target("sve"))) svbfloat16_t svreinterpret_bf16(svint64_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_bf16_s64(op); } __aio __attribute__((target("sve"))) svbfloat16_t svreinterpret_bf16(svuint64_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_bf16_u64(op); } __aio __attribute__((target("sve"))) svbfloat16_t svreinterpret_bf16(svfloat16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_bf16_f16(op); } __aio __attribute__((target("sve"))) svbfloat16_t svreinterpret_bf16(svbfloat16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_bf16_bf16(op); } __aio __attribute__((target("sve"))) svbfloat16_t svreinterpret_bf16(svfloat32_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_bf16_f32(op); } __aio __attribute__((target("sve"))) svbfloat16_t svreinterpret_bf16(svfloat64_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_bf16_f64(op); } __aio __attribute__((target("sve"))) svfloat32_t svreinterpret_f32(svint8_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f32_s8(op); } __aio __attribute__((target("sve"))) svfloat32_t svreinterpret_f32(svuint8_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f32_u8(op); } __aio __attribute__((target("sve"))) svfloat32_t svreinterpret_f32(svint16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f32_s16(op); } __aio __attribute__((target("sve"))) svfloat32_t svreinterpret_f32(svuint16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f32_u16(op); } __aio __attribute__((target("sve"))) svfloat32_t svreinterpret_f32(svint32_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f32_s32(op); } __aio __attribute__((target("sve"))) svfloat32_t svreinterpret_f32(svuint32_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f32_u32(op); } __aio __attribute__((target("sve"))) svfloat32_t svreinterpret_f32(svint64_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f32_s64(op); } __aio __attribute__((target("sve"))) svfloat32_t svreinterpret_f32(svuint64_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f32_u64(op); } __aio __attribute__((target("sve"))) svfloat32_t svreinterpret_f32(svfloat16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f32_f16(op); } __aio __attribute__((target("sve"))) svfloat32_t svreinterpret_f32(svbfloat16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f32_bf16(op); } __aio __attribute__((target("sve"))) svfloat32_t svreinterpret_f32(svfloat32_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f32_f32(op); } __aio __attribute__((target("sve"))) svfloat32_t svreinterpret_f32(svfloat64_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f32_f64(op); } __aio __attribute__((target("sve"))) svfloat64_t svreinterpret_f64(svint8_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f64_s8(op); } __aio __attribute__((target("sve"))) svfloat64_t svreinterpret_f64(svuint8_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f64_u8(op); } __aio __attribute__((target("sve"))) svfloat64_t svreinterpret_f64(svint16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f64_s16(op); } __aio __attribute__((target("sve"))) svfloat64_t svreinterpret_f64(svuint16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f64_u16(op); } __aio __attribute__((target("sve"))) svfloat64_t svreinterpret_f64(svint32_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f64_s32(op); } __aio __attribute__((target("sve"))) svfloat64_t svreinterpret_f64(svuint32_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f64_u32(op); } __aio __attribute__((target("sve"))) svfloat64_t svreinterpret_f64(svint64_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f64_s64(op); } __aio __attribute__((target("sve"))) svfloat64_t svreinterpret_f64(svuint64_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f64_u64(op); } __aio __attribute__((target("sve"))) svfloat64_t svreinterpret_f64(svfloat16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f64_f16(op); } __aio __attribute__((target("sve"))) svfloat64_t svreinterpret_f64(svbfloat16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f64_bf16(op); } __aio __attribute__((target("sve"))) svfloat64_t svreinterpret_f64(svfloat32_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f64_f32(op); } __aio __attribute__((target("sve"))) svfloat64_t svreinterpret_f64(svfloat64_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f64_f64(op); } #define svreinterpret_s8_s8_x2(...) __builtin_sve_reinterpret_s8_s8_x2(__VA_ARGS__) #define svreinterpret_s8_u8_x2(...) __builtin_sve_reinterpret_s8_u8_x2(__VA_ARGS__) #define svreinterpret_s8_s16_x2(...) __builtin_sve_reinterpret_s8_s16_x2(__VA_ARGS__) #define svreinterpret_s8_u16_x2(...) __builtin_sve_reinterpret_s8_u16_x2(__VA_ARGS__) #define svreinterpret_s8_s32_x2(...) __builtin_sve_reinterpret_s8_s32_x2(__VA_ARGS__) #define svreinterpret_s8_u32_x2(...) __builtin_sve_reinterpret_s8_u32_x2(__VA_ARGS__) #define svreinterpret_s8_s64_x2(...) __builtin_sve_reinterpret_s8_s64_x2(__VA_ARGS__) #define svreinterpret_s8_u64_x2(...) __builtin_sve_reinterpret_s8_u64_x2(__VA_ARGS__) #define svreinterpret_s8_f16_x2(...) __builtin_sve_reinterpret_s8_f16_x2(__VA_ARGS__) #define svreinterpret_s8_bf16_x2(...) __builtin_sve_reinterpret_s8_bf16_x2(__VA_ARGS__) #define svreinterpret_s8_f32_x2(...) __builtin_sve_reinterpret_s8_f32_x2(__VA_ARGS__) #define svreinterpret_s8_f64_x2(...) __builtin_sve_reinterpret_s8_f64_x2(__VA_ARGS__) #define svreinterpret_u8_s8_x2(...) __builtin_sve_reinterpret_u8_s8_x2(__VA_ARGS__) #define svreinterpret_u8_u8_x2(...) __builtin_sve_reinterpret_u8_u8_x2(__VA_ARGS__) #define svreinterpret_u8_s16_x2(...) __builtin_sve_reinterpret_u8_s16_x2(__VA_ARGS__) #define svreinterpret_u8_u16_x2(...) __builtin_sve_reinterpret_u8_u16_x2(__VA_ARGS__) #define svreinterpret_u8_s32_x2(...) __builtin_sve_reinterpret_u8_s32_x2(__VA_ARGS__) #define svreinterpret_u8_u32_x2(...) __builtin_sve_reinterpret_u8_u32_x2(__VA_ARGS__) #define svreinterpret_u8_s64_x2(...) __builtin_sve_reinterpret_u8_s64_x2(__VA_ARGS__) #define svreinterpret_u8_u64_x2(...) __builtin_sve_reinterpret_u8_u64_x2(__VA_ARGS__) #define svreinterpret_u8_f16_x2(...) __builtin_sve_reinterpret_u8_f16_x2(__VA_ARGS__) #define svreinterpret_u8_bf16_x2(...) __builtin_sve_reinterpret_u8_bf16_x2(__VA_ARGS__) #define svreinterpret_u8_f32_x2(...) __builtin_sve_reinterpret_u8_f32_x2(__VA_ARGS__) #define svreinterpret_u8_f64_x2(...) __builtin_sve_reinterpret_u8_f64_x2(__VA_ARGS__) #define svreinterpret_s16_s8_x2(...) __builtin_sve_reinterpret_s16_s8_x2(__VA_ARGS__) #define svreinterpret_s16_u8_x2(...) __builtin_sve_reinterpret_s16_u8_x2(__VA_ARGS__) #define svreinterpret_s16_s16_x2(...) __builtin_sve_reinterpret_s16_s16_x2(__VA_ARGS__) #define svreinterpret_s16_u16_x2(...) __builtin_sve_reinterpret_s16_u16_x2(__VA_ARGS__) #define svreinterpret_s16_s32_x2(...) __builtin_sve_reinterpret_s16_s32_x2(__VA_ARGS__) #define svreinterpret_s16_u32_x2(...) __builtin_sve_reinterpret_s16_u32_x2(__VA_ARGS__) #define svreinterpret_s16_s64_x2(...) __builtin_sve_reinterpret_s16_s64_x2(__VA_ARGS__) #define svreinterpret_s16_u64_x2(...) __builtin_sve_reinterpret_s16_u64_x2(__VA_ARGS__) #define svreinterpret_s16_f16_x2(...) __builtin_sve_reinterpret_s16_f16_x2(__VA_ARGS__) #define svreinterpret_s16_bf16_x2(...) __builtin_sve_reinterpret_s16_bf16_x2(__VA_ARGS__) #define svreinterpret_s16_f32_x2(...) __builtin_sve_reinterpret_s16_f32_x2(__VA_ARGS__) #define svreinterpret_s16_f64_x2(...) __builtin_sve_reinterpret_s16_f64_x2(__VA_ARGS__) #define svreinterpret_u16_s8_x2(...) __builtin_sve_reinterpret_u16_s8_x2(__VA_ARGS__) #define svreinterpret_u16_u8_x2(...) __builtin_sve_reinterpret_u16_u8_x2(__VA_ARGS__) #define svreinterpret_u16_s16_x2(...) __builtin_sve_reinterpret_u16_s16_x2(__VA_ARGS__) #define svreinterpret_u16_u16_x2(...) __builtin_sve_reinterpret_u16_u16_x2(__VA_ARGS__) #define svreinterpret_u16_s32_x2(...) __builtin_sve_reinterpret_u16_s32_x2(__VA_ARGS__) #define svreinterpret_u16_u32_x2(...) __builtin_sve_reinterpret_u16_u32_x2(__VA_ARGS__) #define svreinterpret_u16_s64_x2(...) __builtin_sve_reinterpret_u16_s64_x2(__VA_ARGS__) #define svreinterpret_u16_u64_x2(...) __builtin_sve_reinterpret_u16_u64_x2(__VA_ARGS__) #define svreinterpret_u16_f16_x2(...) __builtin_sve_reinterpret_u16_f16_x2(__VA_ARGS__) #define svreinterpret_u16_bf16_x2(...) __builtin_sve_reinterpret_u16_bf16_x2(__VA_ARGS__) #define svreinterpret_u16_f32_x2(...) __builtin_sve_reinterpret_u16_f32_x2(__VA_ARGS__) #define svreinterpret_u16_f64_x2(...) __builtin_sve_reinterpret_u16_f64_x2(__VA_ARGS__) #define svreinterpret_s32_s8_x2(...) __builtin_sve_reinterpret_s32_s8_x2(__VA_ARGS__) #define svreinterpret_s32_u8_x2(...) __builtin_sve_reinterpret_s32_u8_x2(__VA_ARGS__) #define svreinterpret_s32_s16_x2(...) __builtin_sve_reinterpret_s32_s16_x2(__VA_ARGS__) #define svreinterpret_s32_u16_x2(...) __builtin_sve_reinterpret_s32_u16_x2(__VA_ARGS__) #define svreinterpret_s32_s32_x2(...) __builtin_sve_reinterpret_s32_s32_x2(__VA_ARGS__) #define svreinterpret_s32_u32_x2(...) __builtin_sve_reinterpret_s32_u32_x2(__VA_ARGS__) #define svreinterpret_s32_s64_x2(...) __builtin_sve_reinterpret_s32_s64_x2(__VA_ARGS__) #define svreinterpret_s32_u64_x2(...) __builtin_sve_reinterpret_s32_u64_x2(__VA_ARGS__) #define svreinterpret_s32_f16_x2(...) __builtin_sve_reinterpret_s32_f16_x2(__VA_ARGS__) #define svreinterpret_s32_bf16_x2(...) __builtin_sve_reinterpret_s32_bf16_x2(__VA_ARGS__) #define svreinterpret_s32_f32_x2(...) __builtin_sve_reinterpret_s32_f32_x2(__VA_ARGS__) #define svreinterpret_s32_f64_x2(...) __builtin_sve_reinterpret_s32_f64_x2(__VA_ARGS__) #define svreinterpret_u32_s8_x2(...) __builtin_sve_reinterpret_u32_s8_x2(__VA_ARGS__) #define svreinterpret_u32_u8_x2(...) __builtin_sve_reinterpret_u32_u8_x2(__VA_ARGS__) #define svreinterpret_u32_s16_x2(...) __builtin_sve_reinterpret_u32_s16_x2(__VA_ARGS__) #define svreinterpret_u32_u16_x2(...) __builtin_sve_reinterpret_u32_u16_x2(__VA_ARGS__) #define svreinterpret_u32_s32_x2(...) __builtin_sve_reinterpret_u32_s32_x2(__VA_ARGS__) #define svreinterpret_u32_u32_x2(...) __builtin_sve_reinterpret_u32_u32_x2(__VA_ARGS__) #define svreinterpret_u32_s64_x2(...) __builtin_sve_reinterpret_u32_s64_x2(__VA_ARGS__) #define svreinterpret_u32_u64_x2(...) __builtin_sve_reinterpret_u32_u64_x2(__VA_ARGS__) #define svreinterpret_u32_f16_x2(...) __builtin_sve_reinterpret_u32_f16_x2(__VA_ARGS__) #define svreinterpret_u32_bf16_x2(...) __builtin_sve_reinterpret_u32_bf16_x2(__VA_ARGS__) #define svreinterpret_u32_f32_x2(...) __builtin_sve_reinterpret_u32_f32_x2(__VA_ARGS__) #define svreinterpret_u32_f64_x2(...) __builtin_sve_reinterpret_u32_f64_x2(__VA_ARGS__) #define svreinterpret_s64_s8_x2(...) __builtin_sve_reinterpret_s64_s8_x2(__VA_ARGS__) #define svreinterpret_s64_u8_x2(...) __builtin_sve_reinterpret_s64_u8_x2(__VA_ARGS__) #define svreinterpret_s64_s16_x2(...) __builtin_sve_reinterpret_s64_s16_x2(__VA_ARGS__) #define svreinterpret_s64_u16_x2(...) __builtin_sve_reinterpret_s64_u16_x2(__VA_ARGS__) #define svreinterpret_s64_s32_x2(...) __builtin_sve_reinterpret_s64_s32_x2(__VA_ARGS__) #define svreinterpret_s64_u32_x2(...) __builtin_sve_reinterpret_s64_u32_x2(__VA_ARGS__) #define svreinterpret_s64_s64_x2(...) __builtin_sve_reinterpret_s64_s64_x2(__VA_ARGS__) #define svreinterpret_s64_u64_x2(...) __builtin_sve_reinterpret_s64_u64_x2(__VA_ARGS__) #define svreinterpret_s64_f16_x2(...) __builtin_sve_reinterpret_s64_f16_x2(__VA_ARGS__) #define svreinterpret_s64_bf16_x2(...) __builtin_sve_reinterpret_s64_bf16_x2(__VA_ARGS__) #define svreinterpret_s64_f32_x2(...) __builtin_sve_reinterpret_s64_f32_x2(__VA_ARGS__) #define svreinterpret_s64_f64_x2(...) __builtin_sve_reinterpret_s64_f64_x2(__VA_ARGS__) #define svreinterpret_u64_s8_x2(...) __builtin_sve_reinterpret_u64_s8_x2(__VA_ARGS__) #define svreinterpret_u64_u8_x2(...) __builtin_sve_reinterpret_u64_u8_x2(__VA_ARGS__) #define svreinterpret_u64_s16_x2(...) __builtin_sve_reinterpret_u64_s16_x2(__VA_ARGS__) #define svreinterpret_u64_u16_x2(...) __builtin_sve_reinterpret_u64_u16_x2(__VA_ARGS__) #define svreinterpret_u64_s32_x2(...) __builtin_sve_reinterpret_u64_s32_x2(__VA_ARGS__) #define svreinterpret_u64_u32_x2(...) __builtin_sve_reinterpret_u64_u32_x2(__VA_ARGS__) #define svreinterpret_u64_s64_x2(...) __builtin_sve_reinterpret_u64_s64_x2(__VA_ARGS__) #define svreinterpret_u64_u64_x2(...) __builtin_sve_reinterpret_u64_u64_x2(__VA_ARGS__) #define svreinterpret_u64_f16_x2(...) __builtin_sve_reinterpret_u64_f16_x2(__VA_ARGS__) #define svreinterpret_u64_bf16_x2(...) __builtin_sve_reinterpret_u64_bf16_x2(__VA_ARGS__) #define svreinterpret_u64_f32_x2(...) __builtin_sve_reinterpret_u64_f32_x2(__VA_ARGS__) #define svreinterpret_u64_f64_x2(...) __builtin_sve_reinterpret_u64_f64_x2(__VA_ARGS__) #define svreinterpret_f16_s8_x2(...) __builtin_sve_reinterpret_f16_s8_x2(__VA_ARGS__) #define svreinterpret_f16_u8_x2(...) __builtin_sve_reinterpret_f16_u8_x2(__VA_ARGS__) #define svreinterpret_f16_s16_x2(...) __builtin_sve_reinterpret_f16_s16_x2(__VA_ARGS__) #define svreinterpret_f16_u16_x2(...) __builtin_sve_reinterpret_f16_u16_x2(__VA_ARGS__) #define svreinterpret_f16_s32_x2(...) __builtin_sve_reinterpret_f16_s32_x2(__VA_ARGS__) #define svreinterpret_f16_u32_x2(...) __builtin_sve_reinterpret_f16_u32_x2(__VA_ARGS__) #define svreinterpret_f16_s64_x2(...) __builtin_sve_reinterpret_f16_s64_x2(__VA_ARGS__) #define svreinterpret_f16_u64_x2(...) __builtin_sve_reinterpret_f16_u64_x2(__VA_ARGS__) #define svreinterpret_f16_f16_x2(...) __builtin_sve_reinterpret_f16_f16_x2(__VA_ARGS__) #define svreinterpret_f16_bf16_x2(...) __builtin_sve_reinterpret_f16_bf16_x2(__VA_ARGS__) #define svreinterpret_f16_f32_x2(...) __builtin_sve_reinterpret_f16_f32_x2(__VA_ARGS__) #define svreinterpret_f16_f64_x2(...) __builtin_sve_reinterpret_f16_f64_x2(__VA_ARGS__) #define svreinterpret_bf16_s8_x2(...) __builtin_sve_reinterpret_bf16_s8_x2(__VA_ARGS__) #define svreinterpret_bf16_u8_x2(...) __builtin_sve_reinterpret_bf16_u8_x2(__VA_ARGS__) #define svreinterpret_bf16_s16_x2(...) __builtin_sve_reinterpret_bf16_s16_x2(__VA_ARGS__) #define svreinterpret_bf16_u16_x2(...) __builtin_sve_reinterpret_bf16_u16_x2(__VA_ARGS__) #define svreinterpret_bf16_s32_x2(...) __builtin_sve_reinterpret_bf16_s32_x2(__VA_ARGS__) #define svreinterpret_bf16_u32_x2(...) __builtin_sve_reinterpret_bf16_u32_x2(__VA_ARGS__) #define svreinterpret_bf16_s64_x2(...) __builtin_sve_reinterpret_bf16_s64_x2(__VA_ARGS__) #define svreinterpret_bf16_u64_x2(...) __builtin_sve_reinterpret_bf16_u64_x2(__VA_ARGS__) #define svreinterpret_bf16_f16_x2(...) __builtin_sve_reinterpret_bf16_f16_x2(__VA_ARGS__) #define svreinterpret_bf16_bf16_x2(...) __builtin_sve_reinterpret_bf16_bf16_x2(__VA_ARGS__) #define svreinterpret_bf16_f32_x2(...) __builtin_sve_reinterpret_bf16_f32_x2(__VA_ARGS__) #define svreinterpret_bf16_f64_x2(...) __builtin_sve_reinterpret_bf16_f64_x2(__VA_ARGS__) #define svreinterpret_f32_s8_x2(...) __builtin_sve_reinterpret_f32_s8_x2(__VA_ARGS__) #define svreinterpret_f32_u8_x2(...) __builtin_sve_reinterpret_f32_u8_x2(__VA_ARGS__) #define svreinterpret_f32_s16_x2(...) __builtin_sve_reinterpret_f32_s16_x2(__VA_ARGS__) #define svreinterpret_f32_u16_x2(...) __builtin_sve_reinterpret_f32_u16_x2(__VA_ARGS__) #define svreinterpret_f32_s32_x2(...) __builtin_sve_reinterpret_f32_s32_x2(__VA_ARGS__) #define svreinterpret_f32_u32_x2(...) __builtin_sve_reinterpret_f32_u32_x2(__VA_ARGS__) #define svreinterpret_f32_s64_x2(...) __builtin_sve_reinterpret_f32_s64_x2(__VA_ARGS__) #define svreinterpret_f32_u64_x2(...) __builtin_sve_reinterpret_f32_u64_x2(__VA_ARGS__) #define svreinterpret_f32_f16_x2(...) __builtin_sve_reinterpret_f32_f16_x2(__VA_ARGS__) #define svreinterpret_f32_bf16_x2(...) __builtin_sve_reinterpret_f32_bf16_x2(__VA_ARGS__) #define svreinterpret_f32_f32_x2(...) __builtin_sve_reinterpret_f32_f32_x2(__VA_ARGS__) #define svreinterpret_f32_f64_x2(...) __builtin_sve_reinterpret_f32_f64_x2(__VA_ARGS__) #define svreinterpret_f64_s8_x2(...) __builtin_sve_reinterpret_f64_s8_x2(__VA_ARGS__) #define svreinterpret_f64_u8_x2(...) __builtin_sve_reinterpret_f64_u8_x2(__VA_ARGS__) #define svreinterpret_f64_s16_x2(...) __builtin_sve_reinterpret_f64_s16_x2(__VA_ARGS__) #define svreinterpret_f64_u16_x2(...) __builtin_sve_reinterpret_f64_u16_x2(__VA_ARGS__) #define svreinterpret_f64_s32_x2(...) __builtin_sve_reinterpret_f64_s32_x2(__VA_ARGS__) #define svreinterpret_f64_u32_x2(...) __builtin_sve_reinterpret_f64_u32_x2(__VA_ARGS__) #define svreinterpret_f64_s64_x2(...) __builtin_sve_reinterpret_f64_s64_x2(__VA_ARGS__) #define svreinterpret_f64_u64_x2(...) __builtin_sve_reinterpret_f64_u64_x2(__VA_ARGS__) #define svreinterpret_f64_f16_x2(...) __builtin_sve_reinterpret_f64_f16_x2(__VA_ARGS__) #define svreinterpret_f64_bf16_x2(...) __builtin_sve_reinterpret_f64_bf16_x2(__VA_ARGS__) #define svreinterpret_f64_f32_x2(...) __builtin_sve_reinterpret_f64_f32_x2(__VA_ARGS__) #define svreinterpret_f64_f64_x2(...) __builtin_sve_reinterpret_f64_f64_x2(__VA_ARGS__) __aio __attribute__((target("sve"))) svint8x2_t svreinterpret_s8(svint8x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s8_s8_x2(op); } __aio __attribute__((target("sve"))) svint8x2_t svreinterpret_s8(svuint8x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s8_u8_x2(op); } __aio __attribute__((target("sve"))) svint8x2_t svreinterpret_s8(svint16x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s8_s16_x2(op); } __aio __attribute__((target("sve"))) svint8x2_t svreinterpret_s8(svuint16x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s8_u16_x2(op); } __aio __attribute__((target("sve"))) svint8x2_t svreinterpret_s8(svint32x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s8_s32_x2(op); } __aio __attribute__((target("sve"))) svint8x2_t svreinterpret_s8(svuint32x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s8_u32_x2(op); } __aio __attribute__((target("sve"))) svint8x2_t svreinterpret_s8(svint64x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s8_s64_x2(op); } __aio __attribute__((target("sve"))) svint8x2_t svreinterpret_s8(svuint64x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s8_u64_x2(op); } __aio __attribute__((target("sve"))) svint8x2_t svreinterpret_s8(svfloat16x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s8_f16_x2(op); } __aio __attribute__((target("sve"))) svint8x2_t svreinterpret_s8(svbfloat16x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s8_bf16_x2(op); } __aio __attribute__((target("sve"))) svint8x2_t svreinterpret_s8(svfloat32x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s8_f32_x2(op); } __aio __attribute__((target("sve"))) svint8x2_t svreinterpret_s8(svfloat64x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s8_f64_x2(op); } __aio __attribute__((target("sve"))) svuint8x2_t svreinterpret_u8(svint8x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u8_s8_x2(op); } __aio __attribute__((target("sve"))) svuint8x2_t svreinterpret_u8(svuint8x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u8_u8_x2(op); } __aio __attribute__((target("sve"))) svuint8x2_t svreinterpret_u8(svint16x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u8_s16_x2(op); } __aio __attribute__((target("sve"))) svuint8x2_t svreinterpret_u8(svuint16x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u8_u16_x2(op); } __aio __attribute__((target("sve"))) svuint8x2_t svreinterpret_u8(svint32x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u8_s32_x2(op); } __aio __attribute__((target("sve"))) svuint8x2_t svreinterpret_u8(svuint32x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u8_u32_x2(op); } __aio __attribute__((target("sve"))) svuint8x2_t svreinterpret_u8(svint64x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u8_s64_x2(op); } __aio __attribute__((target("sve"))) svuint8x2_t svreinterpret_u8(svuint64x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u8_u64_x2(op); } __aio __attribute__((target("sve"))) svuint8x2_t svreinterpret_u8(svfloat16x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u8_f16_x2(op); } __aio __attribute__((target("sve"))) svuint8x2_t svreinterpret_u8(svbfloat16x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u8_bf16_x2(op); } __aio __attribute__((target("sve"))) svuint8x2_t svreinterpret_u8(svfloat32x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u8_f32_x2(op); } __aio __attribute__((target("sve"))) svuint8x2_t svreinterpret_u8(svfloat64x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u8_f64_x2(op); } __aio __attribute__((target("sve"))) svint16x2_t svreinterpret_s16(svint8x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s16_s8_x2(op); } __aio __attribute__((target("sve"))) svint16x2_t svreinterpret_s16(svuint8x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s16_u8_x2(op); } __aio __attribute__((target("sve"))) svint16x2_t svreinterpret_s16(svint16x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s16_s16_x2(op); } __aio __attribute__((target("sve"))) svint16x2_t svreinterpret_s16(svuint16x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s16_u16_x2(op); } __aio __attribute__((target("sve"))) svint16x2_t svreinterpret_s16(svint32x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s16_s32_x2(op); } __aio __attribute__((target("sve"))) svint16x2_t svreinterpret_s16(svuint32x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s16_u32_x2(op); } __aio __attribute__((target("sve"))) svint16x2_t svreinterpret_s16(svint64x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s16_s64_x2(op); } __aio __attribute__((target("sve"))) svint16x2_t svreinterpret_s16(svuint64x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s16_u64_x2(op); } __aio __attribute__((target("sve"))) svint16x2_t svreinterpret_s16(svfloat16x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s16_f16_x2(op); } __aio __attribute__((target("sve"))) svint16x2_t svreinterpret_s16(svbfloat16x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s16_bf16_x2(op); } __aio __attribute__((target("sve"))) svint16x2_t svreinterpret_s16(svfloat32x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s16_f32_x2(op); } __aio __attribute__((target("sve"))) svint16x2_t svreinterpret_s16(svfloat64x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s16_f64_x2(op); } __aio __attribute__((target("sve"))) svuint16x2_t svreinterpret_u16(svint8x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u16_s8_x2(op); } __aio __attribute__((target("sve"))) svuint16x2_t svreinterpret_u16(svuint8x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u16_u8_x2(op); } __aio __attribute__((target("sve"))) svuint16x2_t svreinterpret_u16(svint16x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u16_s16_x2(op); } __aio __attribute__((target("sve"))) svuint16x2_t svreinterpret_u16(svuint16x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u16_u16_x2(op); } __aio __attribute__((target("sve"))) svuint16x2_t svreinterpret_u16(svint32x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u16_s32_x2(op); } __aio __attribute__((target("sve"))) svuint16x2_t svreinterpret_u16(svuint32x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u16_u32_x2(op); } __aio __attribute__((target("sve"))) svuint16x2_t svreinterpret_u16(svint64x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u16_s64_x2(op); } __aio __attribute__((target("sve"))) svuint16x2_t svreinterpret_u16(svuint64x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u16_u64_x2(op); } __aio __attribute__((target("sve"))) svuint16x2_t svreinterpret_u16(svfloat16x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u16_f16_x2(op); } __aio __attribute__((target("sve"))) svuint16x2_t svreinterpret_u16(svbfloat16x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u16_bf16_x2(op); } __aio __attribute__((target("sve"))) svuint16x2_t svreinterpret_u16(svfloat32x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u16_f32_x2(op); } __aio __attribute__((target("sve"))) svuint16x2_t svreinterpret_u16(svfloat64x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u16_f64_x2(op); } __aio __attribute__((target("sve"))) svint32x2_t svreinterpret_s32(svint8x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s32_s8_x2(op); } __aio __attribute__((target("sve"))) svint32x2_t svreinterpret_s32(svuint8x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s32_u8_x2(op); } __aio __attribute__((target("sve"))) svint32x2_t svreinterpret_s32(svint16x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s32_s16_x2(op); } __aio __attribute__((target("sve"))) svint32x2_t svreinterpret_s32(svuint16x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s32_u16_x2(op); } __aio __attribute__((target("sve"))) svint32x2_t svreinterpret_s32(svint32x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s32_s32_x2(op); } __aio __attribute__((target("sve"))) svint32x2_t svreinterpret_s32(svuint32x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s32_u32_x2(op); } __aio __attribute__((target("sve"))) svint32x2_t svreinterpret_s32(svint64x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s32_s64_x2(op); } __aio __attribute__((target("sve"))) svint32x2_t svreinterpret_s32(svuint64x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s32_u64_x2(op); } __aio __attribute__((target("sve"))) svint32x2_t svreinterpret_s32(svfloat16x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s32_f16_x2(op); } __aio __attribute__((target("sve"))) svint32x2_t svreinterpret_s32(svbfloat16x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s32_bf16_x2(op); } __aio __attribute__((target("sve"))) svint32x2_t svreinterpret_s32(svfloat32x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s32_f32_x2(op); } __aio __attribute__((target("sve"))) svint32x2_t svreinterpret_s32(svfloat64x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s32_f64_x2(op); } __aio __attribute__((target("sve"))) svuint32x2_t svreinterpret_u32(svint8x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u32_s8_x2(op); } __aio __attribute__((target("sve"))) svuint32x2_t svreinterpret_u32(svuint8x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u32_u8_x2(op); } __aio __attribute__((target("sve"))) svuint32x2_t svreinterpret_u32(svint16x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u32_s16_x2(op); } __aio __attribute__((target("sve"))) svuint32x2_t svreinterpret_u32(svuint16x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u32_u16_x2(op); } __aio __attribute__((target("sve"))) svuint32x2_t svreinterpret_u32(svint32x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u32_s32_x2(op); } __aio __attribute__((target("sve"))) svuint32x2_t svreinterpret_u32(svuint32x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u32_u32_x2(op); } __aio __attribute__((target("sve"))) svuint32x2_t svreinterpret_u32(svint64x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u32_s64_x2(op); } __aio __attribute__((target("sve"))) svuint32x2_t svreinterpret_u32(svuint64x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u32_u64_x2(op); } __aio __attribute__((target("sve"))) svuint32x2_t svreinterpret_u32(svfloat16x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u32_f16_x2(op); } __aio __attribute__((target("sve"))) svuint32x2_t svreinterpret_u32(svbfloat16x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u32_bf16_x2(op); } __aio __attribute__((target("sve"))) svuint32x2_t svreinterpret_u32(svfloat32x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u32_f32_x2(op); } __aio __attribute__((target("sve"))) svuint32x2_t svreinterpret_u32(svfloat64x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u32_f64_x2(op); } __aio __attribute__((target("sve"))) svint64x2_t svreinterpret_s64(svint8x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s64_s8_x2(op); } __aio __attribute__((target("sve"))) svint64x2_t svreinterpret_s64(svuint8x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s64_u8_x2(op); } __aio __attribute__((target("sve"))) svint64x2_t svreinterpret_s64(svint16x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s64_s16_x2(op); } __aio __attribute__((target("sve"))) svint64x2_t svreinterpret_s64(svuint16x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s64_u16_x2(op); } __aio __attribute__((target("sve"))) svint64x2_t svreinterpret_s64(svint32x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s64_s32_x2(op); } __aio __attribute__((target("sve"))) svint64x2_t svreinterpret_s64(svuint32x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s64_u32_x2(op); } __aio __attribute__((target("sve"))) svint64x2_t svreinterpret_s64(svint64x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s64_s64_x2(op); } __aio __attribute__((target("sve"))) svint64x2_t svreinterpret_s64(svuint64x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s64_u64_x2(op); } __aio __attribute__((target("sve"))) svint64x2_t svreinterpret_s64(svfloat16x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s64_f16_x2(op); } __aio __attribute__((target("sve"))) svint64x2_t svreinterpret_s64(svbfloat16x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s64_bf16_x2(op); } __aio __attribute__((target("sve"))) svint64x2_t svreinterpret_s64(svfloat32x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s64_f32_x2(op); } __aio __attribute__((target("sve"))) svint64x2_t svreinterpret_s64(svfloat64x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s64_f64_x2(op); } __aio __attribute__((target("sve"))) svuint64x2_t svreinterpret_u64(svint8x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u64_s8_x2(op); } __aio __attribute__((target("sve"))) svuint64x2_t svreinterpret_u64(svuint8x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u64_u8_x2(op); } __aio __attribute__((target("sve"))) svuint64x2_t svreinterpret_u64(svint16x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u64_s16_x2(op); } __aio __attribute__((target("sve"))) svuint64x2_t svreinterpret_u64(svuint16x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u64_u16_x2(op); } __aio __attribute__((target("sve"))) svuint64x2_t svreinterpret_u64(svint32x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u64_s32_x2(op); } __aio __attribute__((target("sve"))) svuint64x2_t svreinterpret_u64(svuint32x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u64_u32_x2(op); } __aio __attribute__((target("sve"))) svuint64x2_t svreinterpret_u64(svint64x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u64_s64_x2(op); } __aio __attribute__((target("sve"))) svuint64x2_t svreinterpret_u64(svuint64x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u64_u64_x2(op); } __aio __attribute__((target("sve"))) svuint64x2_t svreinterpret_u64(svfloat16x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u64_f16_x2(op); } __aio __attribute__((target("sve"))) svuint64x2_t svreinterpret_u64(svbfloat16x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u64_bf16_x2(op); } __aio __attribute__((target("sve"))) svuint64x2_t svreinterpret_u64(svfloat32x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u64_f32_x2(op); } __aio __attribute__((target("sve"))) svuint64x2_t svreinterpret_u64(svfloat64x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u64_f64_x2(op); } __aio __attribute__((target("sve"))) svfloat16x2_t svreinterpret_f16(svint8x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f16_s8_x2(op); } __aio __attribute__((target("sve"))) svfloat16x2_t svreinterpret_f16(svuint8x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f16_u8_x2(op); } __aio __attribute__((target("sve"))) svfloat16x2_t svreinterpret_f16(svint16x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f16_s16_x2(op); } __aio __attribute__((target("sve"))) svfloat16x2_t svreinterpret_f16(svuint16x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f16_u16_x2(op); } __aio __attribute__((target("sve"))) svfloat16x2_t svreinterpret_f16(svint32x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f16_s32_x2(op); } __aio __attribute__((target("sve"))) svfloat16x2_t svreinterpret_f16(svuint32x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f16_u32_x2(op); } __aio __attribute__((target("sve"))) svfloat16x2_t svreinterpret_f16(svint64x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f16_s64_x2(op); } __aio __attribute__((target("sve"))) svfloat16x2_t svreinterpret_f16(svuint64x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f16_u64_x2(op); } __aio __attribute__((target("sve"))) svfloat16x2_t svreinterpret_f16(svfloat16x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f16_f16_x2(op); } __aio __attribute__((target("sve"))) svfloat16x2_t svreinterpret_f16(svbfloat16x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f16_bf16_x2(op); } __aio __attribute__((target("sve"))) svfloat16x2_t svreinterpret_f16(svfloat32x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f16_f32_x2(op); } __aio __attribute__((target("sve"))) svfloat16x2_t svreinterpret_f16(svfloat64x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f16_f64_x2(op); } __aio __attribute__((target("sve"))) svbfloat16x2_t svreinterpret_bf16(svint8x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_bf16_s8_x2(op); } __aio __attribute__((target("sve"))) svbfloat16x2_t svreinterpret_bf16(svuint8x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_bf16_u8_x2(op); } __aio __attribute__((target("sve"))) svbfloat16x2_t svreinterpret_bf16(svint16x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_bf16_s16_x2(op); } __aio __attribute__((target("sve"))) svbfloat16x2_t svreinterpret_bf16(svuint16x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_bf16_u16_x2(op); } __aio __attribute__((target("sve"))) svbfloat16x2_t svreinterpret_bf16(svint32x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_bf16_s32_x2(op); } __aio __attribute__((target("sve"))) svbfloat16x2_t svreinterpret_bf16(svuint32x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_bf16_u32_x2(op); } __aio __attribute__((target("sve"))) svbfloat16x2_t svreinterpret_bf16(svint64x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_bf16_s64_x2(op); } __aio __attribute__((target("sve"))) svbfloat16x2_t svreinterpret_bf16(svuint64x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_bf16_u64_x2(op); } __aio __attribute__((target("sve"))) svbfloat16x2_t svreinterpret_bf16(svfloat16x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_bf16_f16_x2(op); } __aio __attribute__((target("sve"))) svbfloat16x2_t svreinterpret_bf16(svbfloat16x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_bf16_bf16_x2(op); } __aio __attribute__((target("sve"))) svbfloat16x2_t svreinterpret_bf16(svfloat32x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_bf16_f32_x2(op); } __aio __attribute__((target("sve"))) svbfloat16x2_t svreinterpret_bf16(svfloat64x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_bf16_f64_x2(op); } __aio __attribute__((target("sve"))) svfloat32x2_t svreinterpret_f32(svint8x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f32_s8_x2(op); } __aio __attribute__((target("sve"))) svfloat32x2_t svreinterpret_f32(svuint8x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f32_u8_x2(op); } __aio __attribute__((target("sve"))) svfloat32x2_t svreinterpret_f32(svint16x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f32_s16_x2(op); } __aio __attribute__((target("sve"))) svfloat32x2_t svreinterpret_f32(svuint16x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f32_u16_x2(op); } __aio __attribute__((target("sve"))) svfloat32x2_t svreinterpret_f32(svint32x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f32_s32_x2(op); } __aio __attribute__((target("sve"))) svfloat32x2_t svreinterpret_f32(svuint32x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f32_u32_x2(op); } __aio __attribute__((target("sve"))) svfloat32x2_t svreinterpret_f32(svint64x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f32_s64_x2(op); } __aio __attribute__((target("sve"))) svfloat32x2_t svreinterpret_f32(svuint64x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f32_u64_x2(op); } __aio __attribute__((target("sve"))) svfloat32x2_t svreinterpret_f32(svfloat16x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f32_f16_x2(op); } __aio __attribute__((target("sve"))) svfloat32x2_t svreinterpret_f32(svbfloat16x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f32_bf16_x2(op); } __aio __attribute__((target("sve"))) svfloat32x2_t svreinterpret_f32(svfloat32x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f32_f32_x2(op); } __aio __attribute__((target("sve"))) svfloat32x2_t svreinterpret_f32(svfloat64x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f32_f64_x2(op); } __aio __attribute__((target("sve"))) svfloat64x2_t svreinterpret_f64(svint8x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f64_s8_x2(op); } __aio __attribute__((target("sve"))) svfloat64x2_t svreinterpret_f64(svuint8x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f64_u8_x2(op); } __aio __attribute__((target("sve"))) svfloat64x2_t svreinterpret_f64(svint16x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f64_s16_x2(op); } __aio __attribute__((target("sve"))) svfloat64x2_t svreinterpret_f64(svuint16x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f64_u16_x2(op); } __aio __attribute__((target("sve"))) svfloat64x2_t svreinterpret_f64(svint32x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f64_s32_x2(op); } __aio __attribute__((target("sve"))) svfloat64x2_t svreinterpret_f64(svuint32x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f64_u32_x2(op); } __aio __attribute__((target("sve"))) svfloat64x2_t svreinterpret_f64(svint64x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f64_s64_x2(op); } __aio __attribute__((target("sve"))) svfloat64x2_t svreinterpret_f64(svuint64x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f64_u64_x2(op); } __aio __attribute__((target("sve"))) svfloat64x2_t svreinterpret_f64(svfloat16x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f64_f16_x2(op); } __aio __attribute__((target("sve"))) svfloat64x2_t svreinterpret_f64(svbfloat16x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f64_bf16_x2(op); } __aio __attribute__((target("sve"))) svfloat64x2_t svreinterpret_f64(svfloat32x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f64_f32_x2(op); } __aio __attribute__((target("sve"))) svfloat64x2_t svreinterpret_f64(svfloat64x2_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f64_f64_x2(op); } #define svreinterpret_s8_s8_x3(...) __builtin_sve_reinterpret_s8_s8_x3(__VA_ARGS__) #define svreinterpret_s8_u8_x3(...) __builtin_sve_reinterpret_s8_u8_x3(__VA_ARGS__) #define svreinterpret_s8_s16_x3(...) __builtin_sve_reinterpret_s8_s16_x3(__VA_ARGS__) #define svreinterpret_s8_u16_x3(...) __builtin_sve_reinterpret_s8_u16_x3(__VA_ARGS__) #define svreinterpret_s8_s32_x3(...) __builtin_sve_reinterpret_s8_s32_x3(__VA_ARGS__) #define svreinterpret_s8_u32_x3(...) __builtin_sve_reinterpret_s8_u32_x3(__VA_ARGS__) #define svreinterpret_s8_s64_x3(...) __builtin_sve_reinterpret_s8_s64_x3(__VA_ARGS__) #define svreinterpret_s8_u64_x3(...) __builtin_sve_reinterpret_s8_u64_x3(__VA_ARGS__) #define svreinterpret_s8_f16_x3(...) __builtin_sve_reinterpret_s8_f16_x3(__VA_ARGS__) #define svreinterpret_s8_bf16_x3(...) __builtin_sve_reinterpret_s8_bf16_x3(__VA_ARGS__) #define svreinterpret_s8_f32_x3(...) __builtin_sve_reinterpret_s8_f32_x3(__VA_ARGS__) #define svreinterpret_s8_f64_x3(...) __builtin_sve_reinterpret_s8_f64_x3(__VA_ARGS__) #define svreinterpret_u8_s8_x3(...) __builtin_sve_reinterpret_u8_s8_x3(__VA_ARGS__) #define svreinterpret_u8_u8_x3(...) __builtin_sve_reinterpret_u8_u8_x3(__VA_ARGS__) #define svreinterpret_u8_s16_x3(...) __builtin_sve_reinterpret_u8_s16_x3(__VA_ARGS__) #define svreinterpret_u8_u16_x3(...) __builtin_sve_reinterpret_u8_u16_x3(__VA_ARGS__) #define svreinterpret_u8_s32_x3(...) __builtin_sve_reinterpret_u8_s32_x3(__VA_ARGS__) #define svreinterpret_u8_u32_x3(...) __builtin_sve_reinterpret_u8_u32_x3(__VA_ARGS__) #define svreinterpret_u8_s64_x3(...) __builtin_sve_reinterpret_u8_s64_x3(__VA_ARGS__) #define svreinterpret_u8_u64_x3(...) __builtin_sve_reinterpret_u8_u64_x3(__VA_ARGS__) #define svreinterpret_u8_f16_x3(...) __builtin_sve_reinterpret_u8_f16_x3(__VA_ARGS__) #define svreinterpret_u8_bf16_x3(...) __builtin_sve_reinterpret_u8_bf16_x3(__VA_ARGS__) #define svreinterpret_u8_f32_x3(...) __builtin_sve_reinterpret_u8_f32_x3(__VA_ARGS__) #define svreinterpret_u8_f64_x3(...) __builtin_sve_reinterpret_u8_f64_x3(__VA_ARGS__) #define svreinterpret_s16_s8_x3(...) __builtin_sve_reinterpret_s16_s8_x3(__VA_ARGS__) #define svreinterpret_s16_u8_x3(...) __builtin_sve_reinterpret_s16_u8_x3(__VA_ARGS__) #define svreinterpret_s16_s16_x3(...) __builtin_sve_reinterpret_s16_s16_x3(__VA_ARGS__) #define svreinterpret_s16_u16_x3(...) __builtin_sve_reinterpret_s16_u16_x3(__VA_ARGS__) #define svreinterpret_s16_s32_x3(...) __builtin_sve_reinterpret_s16_s32_x3(__VA_ARGS__) #define svreinterpret_s16_u32_x3(...) __builtin_sve_reinterpret_s16_u32_x3(__VA_ARGS__) #define svreinterpret_s16_s64_x3(...) __builtin_sve_reinterpret_s16_s64_x3(__VA_ARGS__) #define svreinterpret_s16_u64_x3(...) __builtin_sve_reinterpret_s16_u64_x3(__VA_ARGS__) #define svreinterpret_s16_f16_x3(...) __builtin_sve_reinterpret_s16_f16_x3(__VA_ARGS__) #define svreinterpret_s16_bf16_x3(...) __builtin_sve_reinterpret_s16_bf16_x3(__VA_ARGS__) #define svreinterpret_s16_f32_x3(...) __builtin_sve_reinterpret_s16_f32_x3(__VA_ARGS__) #define svreinterpret_s16_f64_x3(...) __builtin_sve_reinterpret_s16_f64_x3(__VA_ARGS__) #define svreinterpret_u16_s8_x3(...) __builtin_sve_reinterpret_u16_s8_x3(__VA_ARGS__) #define svreinterpret_u16_u8_x3(...) __builtin_sve_reinterpret_u16_u8_x3(__VA_ARGS__) #define svreinterpret_u16_s16_x3(...) __builtin_sve_reinterpret_u16_s16_x3(__VA_ARGS__) #define svreinterpret_u16_u16_x3(...) __builtin_sve_reinterpret_u16_u16_x3(__VA_ARGS__) #define svreinterpret_u16_s32_x3(...) __builtin_sve_reinterpret_u16_s32_x3(__VA_ARGS__) #define svreinterpret_u16_u32_x3(...) __builtin_sve_reinterpret_u16_u32_x3(__VA_ARGS__) #define svreinterpret_u16_s64_x3(...) __builtin_sve_reinterpret_u16_s64_x3(__VA_ARGS__) #define svreinterpret_u16_u64_x3(...) __builtin_sve_reinterpret_u16_u64_x3(__VA_ARGS__) #define svreinterpret_u16_f16_x3(...) __builtin_sve_reinterpret_u16_f16_x3(__VA_ARGS__) #define svreinterpret_u16_bf16_x3(...) __builtin_sve_reinterpret_u16_bf16_x3(__VA_ARGS__) #define svreinterpret_u16_f32_x3(...) __builtin_sve_reinterpret_u16_f32_x3(__VA_ARGS__) #define svreinterpret_u16_f64_x3(...) __builtin_sve_reinterpret_u16_f64_x3(__VA_ARGS__) #define svreinterpret_s32_s8_x3(...) __builtin_sve_reinterpret_s32_s8_x3(__VA_ARGS__) #define svreinterpret_s32_u8_x3(...) __builtin_sve_reinterpret_s32_u8_x3(__VA_ARGS__) #define svreinterpret_s32_s16_x3(...) __builtin_sve_reinterpret_s32_s16_x3(__VA_ARGS__) #define svreinterpret_s32_u16_x3(...) __builtin_sve_reinterpret_s32_u16_x3(__VA_ARGS__) #define svreinterpret_s32_s32_x3(...) __builtin_sve_reinterpret_s32_s32_x3(__VA_ARGS__) #define svreinterpret_s32_u32_x3(...) __builtin_sve_reinterpret_s32_u32_x3(__VA_ARGS__) #define svreinterpret_s32_s64_x3(...) __builtin_sve_reinterpret_s32_s64_x3(__VA_ARGS__) #define svreinterpret_s32_u64_x3(...) __builtin_sve_reinterpret_s32_u64_x3(__VA_ARGS__) #define svreinterpret_s32_f16_x3(...) __builtin_sve_reinterpret_s32_f16_x3(__VA_ARGS__) #define svreinterpret_s32_bf16_x3(...) __builtin_sve_reinterpret_s32_bf16_x3(__VA_ARGS__) #define svreinterpret_s32_f32_x3(...) __builtin_sve_reinterpret_s32_f32_x3(__VA_ARGS__) #define svreinterpret_s32_f64_x3(...) __builtin_sve_reinterpret_s32_f64_x3(__VA_ARGS__) #define svreinterpret_u32_s8_x3(...) __builtin_sve_reinterpret_u32_s8_x3(__VA_ARGS__) #define svreinterpret_u32_u8_x3(...) __builtin_sve_reinterpret_u32_u8_x3(__VA_ARGS__) #define svreinterpret_u32_s16_x3(...) __builtin_sve_reinterpret_u32_s16_x3(__VA_ARGS__) #define svreinterpret_u32_u16_x3(...) __builtin_sve_reinterpret_u32_u16_x3(__VA_ARGS__) #define svreinterpret_u32_s32_x3(...) __builtin_sve_reinterpret_u32_s32_x3(__VA_ARGS__) #define svreinterpret_u32_u32_x3(...) __builtin_sve_reinterpret_u32_u32_x3(__VA_ARGS__) #define svreinterpret_u32_s64_x3(...) __builtin_sve_reinterpret_u32_s64_x3(__VA_ARGS__) #define svreinterpret_u32_u64_x3(...) __builtin_sve_reinterpret_u32_u64_x3(__VA_ARGS__) #define svreinterpret_u32_f16_x3(...) __builtin_sve_reinterpret_u32_f16_x3(__VA_ARGS__) #define svreinterpret_u32_bf16_x3(...) __builtin_sve_reinterpret_u32_bf16_x3(__VA_ARGS__) #define svreinterpret_u32_f32_x3(...) __builtin_sve_reinterpret_u32_f32_x3(__VA_ARGS__) #define svreinterpret_u32_f64_x3(...) __builtin_sve_reinterpret_u32_f64_x3(__VA_ARGS__) #define svreinterpret_s64_s8_x3(...) __builtin_sve_reinterpret_s64_s8_x3(__VA_ARGS__) #define svreinterpret_s64_u8_x3(...) __builtin_sve_reinterpret_s64_u8_x3(__VA_ARGS__) #define svreinterpret_s64_s16_x3(...) __builtin_sve_reinterpret_s64_s16_x3(__VA_ARGS__) #define svreinterpret_s64_u16_x3(...) __builtin_sve_reinterpret_s64_u16_x3(__VA_ARGS__) #define svreinterpret_s64_s32_x3(...) __builtin_sve_reinterpret_s64_s32_x3(__VA_ARGS__) #define svreinterpret_s64_u32_x3(...) __builtin_sve_reinterpret_s64_u32_x3(__VA_ARGS__) #define svreinterpret_s64_s64_x3(...) __builtin_sve_reinterpret_s64_s64_x3(__VA_ARGS__) #define svreinterpret_s64_u64_x3(...) __builtin_sve_reinterpret_s64_u64_x3(__VA_ARGS__) #define svreinterpret_s64_f16_x3(...) __builtin_sve_reinterpret_s64_f16_x3(__VA_ARGS__) #define svreinterpret_s64_bf16_x3(...) __builtin_sve_reinterpret_s64_bf16_x3(__VA_ARGS__) #define svreinterpret_s64_f32_x3(...) __builtin_sve_reinterpret_s64_f32_x3(__VA_ARGS__) #define svreinterpret_s64_f64_x3(...) __builtin_sve_reinterpret_s64_f64_x3(__VA_ARGS__) #define svreinterpret_u64_s8_x3(...) __builtin_sve_reinterpret_u64_s8_x3(__VA_ARGS__) #define svreinterpret_u64_u8_x3(...) __builtin_sve_reinterpret_u64_u8_x3(__VA_ARGS__) #define svreinterpret_u64_s16_x3(...) __builtin_sve_reinterpret_u64_s16_x3(__VA_ARGS__) #define svreinterpret_u64_u16_x3(...) __builtin_sve_reinterpret_u64_u16_x3(__VA_ARGS__) #define svreinterpret_u64_s32_x3(...) __builtin_sve_reinterpret_u64_s32_x3(__VA_ARGS__) #define svreinterpret_u64_u32_x3(...) __builtin_sve_reinterpret_u64_u32_x3(__VA_ARGS__) #define svreinterpret_u64_s64_x3(...) __builtin_sve_reinterpret_u64_s64_x3(__VA_ARGS__) #define svreinterpret_u64_u64_x3(...) __builtin_sve_reinterpret_u64_u64_x3(__VA_ARGS__) #define svreinterpret_u64_f16_x3(...) __builtin_sve_reinterpret_u64_f16_x3(__VA_ARGS__) #define svreinterpret_u64_bf16_x3(...) __builtin_sve_reinterpret_u64_bf16_x3(__VA_ARGS__) #define svreinterpret_u64_f32_x3(...) __builtin_sve_reinterpret_u64_f32_x3(__VA_ARGS__) #define svreinterpret_u64_f64_x3(...) __builtin_sve_reinterpret_u64_f64_x3(__VA_ARGS__) #define svreinterpret_f16_s8_x3(...) __builtin_sve_reinterpret_f16_s8_x3(__VA_ARGS__) #define svreinterpret_f16_u8_x3(...) __builtin_sve_reinterpret_f16_u8_x3(__VA_ARGS__) #define svreinterpret_f16_s16_x3(...) __builtin_sve_reinterpret_f16_s16_x3(__VA_ARGS__) #define svreinterpret_f16_u16_x3(...) __builtin_sve_reinterpret_f16_u16_x3(__VA_ARGS__) #define svreinterpret_f16_s32_x3(...) __builtin_sve_reinterpret_f16_s32_x3(__VA_ARGS__) #define svreinterpret_f16_u32_x3(...) __builtin_sve_reinterpret_f16_u32_x3(__VA_ARGS__) #define svreinterpret_f16_s64_x3(...) __builtin_sve_reinterpret_f16_s64_x3(__VA_ARGS__) #define svreinterpret_f16_u64_x3(...) __builtin_sve_reinterpret_f16_u64_x3(__VA_ARGS__) #define svreinterpret_f16_f16_x3(...) __builtin_sve_reinterpret_f16_f16_x3(__VA_ARGS__) #define svreinterpret_f16_bf16_x3(...) __builtin_sve_reinterpret_f16_bf16_x3(__VA_ARGS__) #define svreinterpret_f16_f32_x3(...) __builtin_sve_reinterpret_f16_f32_x3(__VA_ARGS__) #define svreinterpret_f16_f64_x3(...) __builtin_sve_reinterpret_f16_f64_x3(__VA_ARGS__) #define svreinterpret_bf16_s8_x3(...) __builtin_sve_reinterpret_bf16_s8_x3(__VA_ARGS__) #define svreinterpret_bf16_u8_x3(...) __builtin_sve_reinterpret_bf16_u8_x3(__VA_ARGS__) #define svreinterpret_bf16_s16_x3(...) __builtin_sve_reinterpret_bf16_s16_x3(__VA_ARGS__) #define svreinterpret_bf16_u16_x3(...) __builtin_sve_reinterpret_bf16_u16_x3(__VA_ARGS__) #define svreinterpret_bf16_s32_x3(...) __builtin_sve_reinterpret_bf16_s32_x3(__VA_ARGS__) #define svreinterpret_bf16_u32_x3(...) __builtin_sve_reinterpret_bf16_u32_x3(__VA_ARGS__) #define svreinterpret_bf16_s64_x3(...) __builtin_sve_reinterpret_bf16_s64_x3(__VA_ARGS__) #define svreinterpret_bf16_u64_x3(...) __builtin_sve_reinterpret_bf16_u64_x3(__VA_ARGS__) #define svreinterpret_bf16_f16_x3(...) __builtin_sve_reinterpret_bf16_f16_x3(__VA_ARGS__) #define svreinterpret_bf16_bf16_x3(...) __builtin_sve_reinterpret_bf16_bf16_x3(__VA_ARGS__) #define svreinterpret_bf16_f32_x3(...) __builtin_sve_reinterpret_bf16_f32_x3(__VA_ARGS__) #define svreinterpret_bf16_f64_x3(...) __builtin_sve_reinterpret_bf16_f64_x3(__VA_ARGS__) #define svreinterpret_f32_s8_x3(...) __builtin_sve_reinterpret_f32_s8_x3(__VA_ARGS__) #define svreinterpret_f32_u8_x3(...) __builtin_sve_reinterpret_f32_u8_x3(__VA_ARGS__) #define svreinterpret_f32_s16_x3(...) __builtin_sve_reinterpret_f32_s16_x3(__VA_ARGS__) #define svreinterpret_f32_u16_x3(...) __builtin_sve_reinterpret_f32_u16_x3(__VA_ARGS__) #define svreinterpret_f32_s32_x3(...) __builtin_sve_reinterpret_f32_s32_x3(__VA_ARGS__) #define svreinterpret_f32_u32_x3(...) __builtin_sve_reinterpret_f32_u32_x3(__VA_ARGS__) #define svreinterpret_f32_s64_x3(...) __builtin_sve_reinterpret_f32_s64_x3(__VA_ARGS__) #define svreinterpret_f32_u64_x3(...) __builtin_sve_reinterpret_f32_u64_x3(__VA_ARGS__) #define svreinterpret_f32_f16_x3(...) __builtin_sve_reinterpret_f32_f16_x3(__VA_ARGS__) #define svreinterpret_f32_bf16_x3(...) __builtin_sve_reinterpret_f32_bf16_x3(__VA_ARGS__) #define svreinterpret_f32_f32_x3(...) __builtin_sve_reinterpret_f32_f32_x3(__VA_ARGS__) #define svreinterpret_f32_f64_x3(...) __builtin_sve_reinterpret_f32_f64_x3(__VA_ARGS__) #define svreinterpret_f64_s8_x3(...) __builtin_sve_reinterpret_f64_s8_x3(__VA_ARGS__) #define svreinterpret_f64_u8_x3(...) __builtin_sve_reinterpret_f64_u8_x3(__VA_ARGS__) #define svreinterpret_f64_s16_x3(...) __builtin_sve_reinterpret_f64_s16_x3(__VA_ARGS__) #define svreinterpret_f64_u16_x3(...) __builtin_sve_reinterpret_f64_u16_x3(__VA_ARGS__) #define svreinterpret_f64_s32_x3(...) __builtin_sve_reinterpret_f64_s32_x3(__VA_ARGS__) #define svreinterpret_f64_u32_x3(...) __builtin_sve_reinterpret_f64_u32_x3(__VA_ARGS__) #define svreinterpret_f64_s64_x3(...) __builtin_sve_reinterpret_f64_s64_x3(__VA_ARGS__) #define svreinterpret_f64_u64_x3(...) __builtin_sve_reinterpret_f64_u64_x3(__VA_ARGS__) #define svreinterpret_f64_f16_x3(...) __builtin_sve_reinterpret_f64_f16_x3(__VA_ARGS__) #define svreinterpret_f64_bf16_x3(...) __builtin_sve_reinterpret_f64_bf16_x3(__VA_ARGS__) #define svreinterpret_f64_f32_x3(...) __builtin_sve_reinterpret_f64_f32_x3(__VA_ARGS__) #define svreinterpret_f64_f64_x3(...) __builtin_sve_reinterpret_f64_f64_x3(__VA_ARGS__) __aio __attribute__((target("sve"))) svint8x3_t svreinterpret_s8(svint8x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s8_s8_x3(op); } __aio __attribute__((target("sve"))) svint8x3_t svreinterpret_s8(svuint8x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s8_u8_x3(op); } __aio __attribute__((target("sve"))) svint8x3_t svreinterpret_s8(svint16x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s8_s16_x3(op); } __aio __attribute__((target("sve"))) svint8x3_t svreinterpret_s8(svuint16x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s8_u16_x3(op); } __aio __attribute__((target("sve"))) svint8x3_t svreinterpret_s8(svint32x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s8_s32_x3(op); } __aio __attribute__((target("sve"))) svint8x3_t svreinterpret_s8(svuint32x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s8_u32_x3(op); } __aio __attribute__((target("sve"))) svint8x3_t svreinterpret_s8(svint64x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s8_s64_x3(op); } __aio __attribute__((target("sve"))) svint8x3_t svreinterpret_s8(svuint64x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s8_u64_x3(op); } __aio __attribute__((target("sve"))) svint8x3_t svreinterpret_s8(svfloat16x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s8_f16_x3(op); } __aio __attribute__((target("sve"))) svint8x3_t svreinterpret_s8(svbfloat16x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s8_bf16_x3(op); } __aio __attribute__((target("sve"))) svint8x3_t svreinterpret_s8(svfloat32x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s8_f32_x3(op); } __aio __attribute__((target("sve"))) svint8x3_t svreinterpret_s8(svfloat64x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s8_f64_x3(op); } __aio __attribute__((target("sve"))) svuint8x3_t svreinterpret_u8(svint8x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u8_s8_x3(op); } __aio __attribute__((target("sve"))) svuint8x3_t svreinterpret_u8(svuint8x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u8_u8_x3(op); } __aio __attribute__((target("sve"))) svuint8x3_t svreinterpret_u8(svint16x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u8_s16_x3(op); } __aio __attribute__((target("sve"))) svuint8x3_t svreinterpret_u8(svuint16x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u8_u16_x3(op); } __aio __attribute__((target("sve"))) svuint8x3_t svreinterpret_u8(svint32x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u8_s32_x3(op); } __aio __attribute__((target("sve"))) svuint8x3_t svreinterpret_u8(svuint32x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u8_u32_x3(op); } __aio __attribute__((target("sve"))) svuint8x3_t svreinterpret_u8(svint64x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u8_s64_x3(op); } __aio __attribute__((target("sve"))) svuint8x3_t svreinterpret_u8(svuint64x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u8_u64_x3(op); } __aio __attribute__((target("sve"))) svuint8x3_t svreinterpret_u8(svfloat16x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u8_f16_x3(op); } __aio __attribute__((target("sve"))) svuint8x3_t svreinterpret_u8(svbfloat16x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u8_bf16_x3(op); } __aio __attribute__((target("sve"))) svuint8x3_t svreinterpret_u8(svfloat32x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u8_f32_x3(op); } __aio __attribute__((target("sve"))) svuint8x3_t svreinterpret_u8(svfloat64x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u8_f64_x3(op); } __aio __attribute__((target("sve"))) svint16x3_t svreinterpret_s16(svint8x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s16_s8_x3(op); } __aio __attribute__((target("sve"))) svint16x3_t svreinterpret_s16(svuint8x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s16_u8_x3(op); } __aio __attribute__((target("sve"))) svint16x3_t svreinterpret_s16(svint16x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s16_s16_x3(op); } __aio __attribute__((target("sve"))) svint16x3_t svreinterpret_s16(svuint16x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s16_u16_x3(op); } __aio __attribute__((target("sve"))) svint16x3_t svreinterpret_s16(svint32x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s16_s32_x3(op); } __aio __attribute__((target("sve"))) svint16x3_t svreinterpret_s16(svuint32x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s16_u32_x3(op); } __aio __attribute__((target("sve"))) svint16x3_t svreinterpret_s16(svint64x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s16_s64_x3(op); } __aio __attribute__((target("sve"))) svint16x3_t svreinterpret_s16(svuint64x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s16_u64_x3(op); } __aio __attribute__((target("sve"))) svint16x3_t svreinterpret_s16(svfloat16x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s16_f16_x3(op); } __aio __attribute__((target("sve"))) svint16x3_t svreinterpret_s16(svbfloat16x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s16_bf16_x3(op); } __aio __attribute__((target("sve"))) svint16x3_t svreinterpret_s16(svfloat32x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s16_f32_x3(op); } __aio __attribute__((target("sve"))) svint16x3_t svreinterpret_s16(svfloat64x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s16_f64_x3(op); } __aio __attribute__((target("sve"))) svuint16x3_t svreinterpret_u16(svint8x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u16_s8_x3(op); } __aio __attribute__((target("sve"))) svuint16x3_t svreinterpret_u16(svuint8x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u16_u8_x3(op); } __aio __attribute__((target("sve"))) svuint16x3_t svreinterpret_u16(svint16x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u16_s16_x3(op); } __aio __attribute__((target("sve"))) svuint16x3_t svreinterpret_u16(svuint16x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u16_u16_x3(op); } __aio __attribute__((target("sve"))) svuint16x3_t svreinterpret_u16(svint32x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u16_s32_x3(op); } __aio __attribute__((target("sve"))) svuint16x3_t svreinterpret_u16(svuint32x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u16_u32_x3(op); } __aio __attribute__((target("sve"))) svuint16x3_t svreinterpret_u16(svint64x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u16_s64_x3(op); } __aio __attribute__((target("sve"))) svuint16x3_t svreinterpret_u16(svuint64x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u16_u64_x3(op); } __aio __attribute__((target("sve"))) svuint16x3_t svreinterpret_u16(svfloat16x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u16_f16_x3(op); } __aio __attribute__((target("sve"))) svuint16x3_t svreinterpret_u16(svbfloat16x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u16_bf16_x3(op); } __aio __attribute__((target("sve"))) svuint16x3_t svreinterpret_u16(svfloat32x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u16_f32_x3(op); } __aio __attribute__((target("sve"))) svuint16x3_t svreinterpret_u16(svfloat64x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u16_f64_x3(op); } __aio __attribute__((target("sve"))) svint32x3_t svreinterpret_s32(svint8x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s32_s8_x3(op); } __aio __attribute__((target("sve"))) svint32x3_t svreinterpret_s32(svuint8x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s32_u8_x3(op); } __aio __attribute__((target("sve"))) svint32x3_t svreinterpret_s32(svint16x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s32_s16_x3(op); } __aio __attribute__((target("sve"))) svint32x3_t svreinterpret_s32(svuint16x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s32_u16_x3(op); } __aio __attribute__((target("sve"))) svint32x3_t svreinterpret_s32(svint32x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s32_s32_x3(op); } __aio __attribute__((target("sve"))) svint32x3_t svreinterpret_s32(svuint32x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s32_u32_x3(op); } __aio __attribute__((target("sve"))) svint32x3_t svreinterpret_s32(svint64x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s32_s64_x3(op); } __aio __attribute__((target("sve"))) svint32x3_t svreinterpret_s32(svuint64x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s32_u64_x3(op); } __aio __attribute__((target("sve"))) svint32x3_t svreinterpret_s32(svfloat16x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s32_f16_x3(op); } __aio __attribute__((target("sve"))) svint32x3_t svreinterpret_s32(svbfloat16x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s32_bf16_x3(op); } __aio __attribute__((target("sve"))) svint32x3_t svreinterpret_s32(svfloat32x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s32_f32_x3(op); } __aio __attribute__((target("sve"))) svint32x3_t svreinterpret_s32(svfloat64x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s32_f64_x3(op); } __aio __attribute__((target("sve"))) svuint32x3_t svreinterpret_u32(svint8x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u32_s8_x3(op); } __aio __attribute__((target("sve"))) svuint32x3_t svreinterpret_u32(svuint8x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u32_u8_x3(op); } __aio __attribute__((target("sve"))) svuint32x3_t svreinterpret_u32(svint16x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u32_s16_x3(op); } __aio __attribute__((target("sve"))) svuint32x3_t svreinterpret_u32(svuint16x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u32_u16_x3(op); } __aio __attribute__((target("sve"))) svuint32x3_t svreinterpret_u32(svint32x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u32_s32_x3(op); } __aio __attribute__((target("sve"))) svuint32x3_t svreinterpret_u32(svuint32x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u32_u32_x3(op); } __aio __attribute__((target("sve"))) svuint32x3_t svreinterpret_u32(svint64x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u32_s64_x3(op); } __aio __attribute__((target("sve"))) svuint32x3_t svreinterpret_u32(svuint64x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u32_u64_x3(op); } __aio __attribute__((target("sve"))) svuint32x3_t svreinterpret_u32(svfloat16x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u32_f16_x3(op); } __aio __attribute__((target("sve"))) svuint32x3_t svreinterpret_u32(svbfloat16x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u32_bf16_x3(op); } __aio __attribute__((target("sve"))) svuint32x3_t svreinterpret_u32(svfloat32x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u32_f32_x3(op); } __aio __attribute__((target("sve"))) svuint32x3_t svreinterpret_u32(svfloat64x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u32_f64_x3(op); } __aio __attribute__((target("sve"))) svint64x3_t svreinterpret_s64(svint8x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s64_s8_x3(op); } __aio __attribute__((target("sve"))) svint64x3_t svreinterpret_s64(svuint8x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s64_u8_x3(op); } __aio __attribute__((target("sve"))) svint64x3_t svreinterpret_s64(svint16x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s64_s16_x3(op); } __aio __attribute__((target("sve"))) svint64x3_t svreinterpret_s64(svuint16x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s64_u16_x3(op); } __aio __attribute__((target("sve"))) svint64x3_t svreinterpret_s64(svint32x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s64_s32_x3(op); } __aio __attribute__((target("sve"))) svint64x3_t svreinterpret_s64(svuint32x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s64_u32_x3(op); } __aio __attribute__((target("sve"))) svint64x3_t svreinterpret_s64(svint64x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s64_s64_x3(op); } __aio __attribute__((target("sve"))) svint64x3_t svreinterpret_s64(svuint64x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s64_u64_x3(op); } __aio __attribute__((target("sve"))) svint64x3_t svreinterpret_s64(svfloat16x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s64_f16_x3(op); } __aio __attribute__((target("sve"))) svint64x3_t svreinterpret_s64(svbfloat16x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s64_bf16_x3(op); } __aio __attribute__((target("sve"))) svint64x3_t svreinterpret_s64(svfloat32x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s64_f32_x3(op); } __aio __attribute__((target("sve"))) svint64x3_t svreinterpret_s64(svfloat64x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s64_f64_x3(op); } __aio __attribute__((target("sve"))) svuint64x3_t svreinterpret_u64(svint8x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u64_s8_x3(op); } __aio __attribute__((target("sve"))) svuint64x3_t svreinterpret_u64(svuint8x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u64_u8_x3(op); } __aio __attribute__((target("sve"))) svuint64x3_t svreinterpret_u64(svint16x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u64_s16_x3(op); } __aio __attribute__((target("sve"))) svuint64x3_t svreinterpret_u64(svuint16x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u64_u16_x3(op); } __aio __attribute__((target("sve"))) svuint64x3_t svreinterpret_u64(svint32x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u64_s32_x3(op); } __aio __attribute__((target("sve"))) svuint64x3_t svreinterpret_u64(svuint32x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u64_u32_x3(op); } __aio __attribute__((target("sve"))) svuint64x3_t svreinterpret_u64(svint64x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u64_s64_x3(op); } __aio __attribute__((target("sve"))) svuint64x3_t svreinterpret_u64(svuint64x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u64_u64_x3(op); } __aio __attribute__((target("sve"))) svuint64x3_t svreinterpret_u64(svfloat16x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u64_f16_x3(op); } __aio __attribute__((target("sve"))) svuint64x3_t svreinterpret_u64(svbfloat16x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u64_bf16_x3(op); } __aio __attribute__((target("sve"))) svuint64x3_t svreinterpret_u64(svfloat32x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u64_f32_x3(op); } __aio __attribute__((target("sve"))) svuint64x3_t svreinterpret_u64(svfloat64x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u64_f64_x3(op); } __aio __attribute__((target("sve"))) svfloat16x3_t svreinterpret_f16(svint8x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f16_s8_x3(op); } __aio __attribute__((target("sve"))) svfloat16x3_t svreinterpret_f16(svuint8x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f16_u8_x3(op); } __aio __attribute__((target("sve"))) svfloat16x3_t svreinterpret_f16(svint16x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f16_s16_x3(op); } __aio __attribute__((target("sve"))) svfloat16x3_t svreinterpret_f16(svuint16x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f16_u16_x3(op); } __aio __attribute__((target("sve"))) svfloat16x3_t svreinterpret_f16(svint32x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f16_s32_x3(op); } __aio __attribute__((target("sve"))) svfloat16x3_t svreinterpret_f16(svuint32x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f16_u32_x3(op); } __aio __attribute__((target("sve"))) svfloat16x3_t svreinterpret_f16(svint64x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f16_s64_x3(op); } __aio __attribute__((target("sve"))) svfloat16x3_t svreinterpret_f16(svuint64x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f16_u64_x3(op); } __aio __attribute__((target("sve"))) svfloat16x3_t svreinterpret_f16(svfloat16x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f16_f16_x3(op); } __aio __attribute__((target("sve"))) svfloat16x3_t svreinterpret_f16(svbfloat16x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f16_bf16_x3(op); } __aio __attribute__((target("sve"))) svfloat16x3_t svreinterpret_f16(svfloat32x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f16_f32_x3(op); } __aio __attribute__((target("sve"))) svfloat16x3_t svreinterpret_f16(svfloat64x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f16_f64_x3(op); } __aio __attribute__((target("sve"))) svbfloat16x3_t svreinterpret_bf16(svint8x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_bf16_s8_x3(op); } __aio __attribute__((target("sve"))) svbfloat16x3_t svreinterpret_bf16(svuint8x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_bf16_u8_x3(op); } __aio __attribute__((target("sve"))) svbfloat16x3_t svreinterpret_bf16(svint16x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_bf16_s16_x3(op); } __aio __attribute__((target("sve"))) svbfloat16x3_t svreinterpret_bf16(svuint16x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_bf16_u16_x3(op); } __aio __attribute__((target("sve"))) svbfloat16x3_t svreinterpret_bf16(svint32x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_bf16_s32_x3(op); } __aio __attribute__((target("sve"))) svbfloat16x3_t svreinterpret_bf16(svuint32x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_bf16_u32_x3(op); } __aio __attribute__((target("sve"))) svbfloat16x3_t svreinterpret_bf16(svint64x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_bf16_s64_x3(op); } __aio __attribute__((target("sve"))) svbfloat16x3_t svreinterpret_bf16(svuint64x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_bf16_u64_x3(op); } __aio __attribute__((target("sve"))) svbfloat16x3_t svreinterpret_bf16(svfloat16x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_bf16_f16_x3(op); } __aio __attribute__((target("sve"))) svbfloat16x3_t svreinterpret_bf16(svbfloat16x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_bf16_bf16_x3(op); } __aio __attribute__((target("sve"))) svbfloat16x3_t svreinterpret_bf16(svfloat32x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_bf16_f32_x3(op); } __aio __attribute__((target("sve"))) svbfloat16x3_t svreinterpret_bf16(svfloat64x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_bf16_f64_x3(op); } __aio __attribute__((target("sve"))) svfloat32x3_t svreinterpret_f32(svint8x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f32_s8_x3(op); } __aio __attribute__((target("sve"))) svfloat32x3_t svreinterpret_f32(svuint8x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f32_u8_x3(op); } __aio __attribute__((target("sve"))) svfloat32x3_t svreinterpret_f32(svint16x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f32_s16_x3(op); } __aio __attribute__((target("sve"))) svfloat32x3_t svreinterpret_f32(svuint16x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f32_u16_x3(op); } __aio __attribute__((target("sve"))) svfloat32x3_t svreinterpret_f32(svint32x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f32_s32_x3(op); } __aio __attribute__((target("sve"))) svfloat32x3_t svreinterpret_f32(svuint32x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f32_u32_x3(op); } __aio __attribute__((target("sve"))) svfloat32x3_t svreinterpret_f32(svint64x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f32_s64_x3(op); } __aio __attribute__((target("sve"))) svfloat32x3_t svreinterpret_f32(svuint64x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f32_u64_x3(op); } __aio __attribute__((target("sve"))) svfloat32x3_t svreinterpret_f32(svfloat16x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f32_f16_x3(op); } __aio __attribute__((target("sve"))) svfloat32x3_t svreinterpret_f32(svbfloat16x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f32_bf16_x3(op); } __aio __attribute__((target("sve"))) svfloat32x3_t svreinterpret_f32(svfloat32x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f32_f32_x3(op); } __aio __attribute__((target("sve"))) svfloat32x3_t svreinterpret_f32(svfloat64x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f32_f64_x3(op); } __aio __attribute__((target("sve"))) svfloat64x3_t svreinterpret_f64(svint8x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f64_s8_x3(op); } __aio __attribute__((target("sve"))) svfloat64x3_t svreinterpret_f64(svuint8x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f64_u8_x3(op); } __aio __attribute__((target("sve"))) svfloat64x3_t svreinterpret_f64(svint16x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f64_s16_x3(op); } __aio __attribute__((target("sve"))) svfloat64x3_t svreinterpret_f64(svuint16x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f64_u16_x3(op); } __aio __attribute__((target("sve"))) svfloat64x3_t svreinterpret_f64(svint32x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f64_s32_x3(op); } __aio __attribute__((target("sve"))) svfloat64x3_t svreinterpret_f64(svuint32x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f64_u32_x3(op); } __aio __attribute__((target("sve"))) svfloat64x3_t svreinterpret_f64(svint64x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f64_s64_x3(op); } __aio __attribute__((target("sve"))) svfloat64x3_t svreinterpret_f64(svuint64x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f64_u64_x3(op); } __aio __attribute__((target("sve"))) svfloat64x3_t svreinterpret_f64(svfloat16x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f64_f16_x3(op); } __aio __attribute__((target("sve"))) svfloat64x3_t svreinterpret_f64(svbfloat16x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f64_bf16_x3(op); } __aio __attribute__((target("sve"))) svfloat64x3_t svreinterpret_f64(svfloat32x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f64_f32_x3(op); } __aio __attribute__((target("sve"))) svfloat64x3_t svreinterpret_f64(svfloat64x3_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f64_f64_x3(op); } #define svreinterpret_s8_s8_x4(...) __builtin_sve_reinterpret_s8_s8_x4(__VA_ARGS__) #define svreinterpret_s8_u8_x4(...) __builtin_sve_reinterpret_s8_u8_x4(__VA_ARGS__) #define svreinterpret_s8_s16_x4(...) __builtin_sve_reinterpret_s8_s16_x4(__VA_ARGS__) #define svreinterpret_s8_u16_x4(...) __builtin_sve_reinterpret_s8_u16_x4(__VA_ARGS__) #define svreinterpret_s8_s32_x4(...) __builtin_sve_reinterpret_s8_s32_x4(__VA_ARGS__) #define svreinterpret_s8_u32_x4(...) __builtin_sve_reinterpret_s8_u32_x4(__VA_ARGS__) #define svreinterpret_s8_s64_x4(...) __builtin_sve_reinterpret_s8_s64_x4(__VA_ARGS__) #define svreinterpret_s8_u64_x4(...) __builtin_sve_reinterpret_s8_u64_x4(__VA_ARGS__) #define svreinterpret_s8_f16_x4(...) __builtin_sve_reinterpret_s8_f16_x4(__VA_ARGS__) #define svreinterpret_s8_bf16_x4(...) __builtin_sve_reinterpret_s8_bf16_x4(__VA_ARGS__) #define svreinterpret_s8_f32_x4(...) __builtin_sve_reinterpret_s8_f32_x4(__VA_ARGS__) #define svreinterpret_s8_f64_x4(...) __builtin_sve_reinterpret_s8_f64_x4(__VA_ARGS__) #define svreinterpret_u8_s8_x4(...) __builtin_sve_reinterpret_u8_s8_x4(__VA_ARGS__) #define svreinterpret_u8_u8_x4(...) __builtin_sve_reinterpret_u8_u8_x4(__VA_ARGS__) #define svreinterpret_u8_s16_x4(...) __builtin_sve_reinterpret_u8_s16_x4(__VA_ARGS__) #define svreinterpret_u8_u16_x4(...) __builtin_sve_reinterpret_u8_u16_x4(__VA_ARGS__) #define svreinterpret_u8_s32_x4(...) __builtin_sve_reinterpret_u8_s32_x4(__VA_ARGS__) #define svreinterpret_u8_u32_x4(...) __builtin_sve_reinterpret_u8_u32_x4(__VA_ARGS__) #define svreinterpret_u8_s64_x4(...) __builtin_sve_reinterpret_u8_s64_x4(__VA_ARGS__) #define svreinterpret_u8_u64_x4(...) __builtin_sve_reinterpret_u8_u64_x4(__VA_ARGS__) #define svreinterpret_u8_f16_x4(...) __builtin_sve_reinterpret_u8_f16_x4(__VA_ARGS__) #define svreinterpret_u8_bf16_x4(...) __builtin_sve_reinterpret_u8_bf16_x4(__VA_ARGS__) #define svreinterpret_u8_f32_x4(...) __builtin_sve_reinterpret_u8_f32_x4(__VA_ARGS__) #define svreinterpret_u8_f64_x4(...) __builtin_sve_reinterpret_u8_f64_x4(__VA_ARGS__) #define svreinterpret_s16_s8_x4(...) __builtin_sve_reinterpret_s16_s8_x4(__VA_ARGS__) #define svreinterpret_s16_u8_x4(...) __builtin_sve_reinterpret_s16_u8_x4(__VA_ARGS__) #define svreinterpret_s16_s16_x4(...) __builtin_sve_reinterpret_s16_s16_x4(__VA_ARGS__) #define svreinterpret_s16_u16_x4(...) __builtin_sve_reinterpret_s16_u16_x4(__VA_ARGS__) #define svreinterpret_s16_s32_x4(...) __builtin_sve_reinterpret_s16_s32_x4(__VA_ARGS__) #define svreinterpret_s16_u32_x4(...) __builtin_sve_reinterpret_s16_u32_x4(__VA_ARGS__) #define svreinterpret_s16_s64_x4(...) __builtin_sve_reinterpret_s16_s64_x4(__VA_ARGS__) #define svreinterpret_s16_u64_x4(...) __builtin_sve_reinterpret_s16_u64_x4(__VA_ARGS__) #define svreinterpret_s16_f16_x4(...) __builtin_sve_reinterpret_s16_f16_x4(__VA_ARGS__) #define svreinterpret_s16_bf16_x4(...) __builtin_sve_reinterpret_s16_bf16_x4(__VA_ARGS__) #define svreinterpret_s16_f32_x4(...) __builtin_sve_reinterpret_s16_f32_x4(__VA_ARGS__) #define svreinterpret_s16_f64_x4(...) __builtin_sve_reinterpret_s16_f64_x4(__VA_ARGS__) #define svreinterpret_u16_s8_x4(...) __builtin_sve_reinterpret_u16_s8_x4(__VA_ARGS__) #define svreinterpret_u16_u8_x4(...) __builtin_sve_reinterpret_u16_u8_x4(__VA_ARGS__) #define svreinterpret_u16_s16_x4(...) __builtin_sve_reinterpret_u16_s16_x4(__VA_ARGS__) #define svreinterpret_u16_u16_x4(...) __builtin_sve_reinterpret_u16_u16_x4(__VA_ARGS__) #define svreinterpret_u16_s32_x4(...) __builtin_sve_reinterpret_u16_s32_x4(__VA_ARGS__) #define svreinterpret_u16_u32_x4(...) __builtin_sve_reinterpret_u16_u32_x4(__VA_ARGS__) #define svreinterpret_u16_s64_x4(...) __builtin_sve_reinterpret_u16_s64_x4(__VA_ARGS__) #define svreinterpret_u16_u64_x4(...) __builtin_sve_reinterpret_u16_u64_x4(__VA_ARGS__) #define svreinterpret_u16_f16_x4(...) __builtin_sve_reinterpret_u16_f16_x4(__VA_ARGS__) #define svreinterpret_u16_bf16_x4(...) __builtin_sve_reinterpret_u16_bf16_x4(__VA_ARGS__) #define svreinterpret_u16_f32_x4(...) __builtin_sve_reinterpret_u16_f32_x4(__VA_ARGS__) #define svreinterpret_u16_f64_x4(...) __builtin_sve_reinterpret_u16_f64_x4(__VA_ARGS__) #define svreinterpret_s32_s8_x4(...) __builtin_sve_reinterpret_s32_s8_x4(__VA_ARGS__) #define svreinterpret_s32_u8_x4(...) __builtin_sve_reinterpret_s32_u8_x4(__VA_ARGS__) #define svreinterpret_s32_s16_x4(...) __builtin_sve_reinterpret_s32_s16_x4(__VA_ARGS__) #define svreinterpret_s32_u16_x4(...) __builtin_sve_reinterpret_s32_u16_x4(__VA_ARGS__) #define svreinterpret_s32_s32_x4(...) __builtin_sve_reinterpret_s32_s32_x4(__VA_ARGS__) #define svreinterpret_s32_u32_x4(...) __builtin_sve_reinterpret_s32_u32_x4(__VA_ARGS__) #define svreinterpret_s32_s64_x4(...) __builtin_sve_reinterpret_s32_s64_x4(__VA_ARGS__) #define svreinterpret_s32_u64_x4(...) __builtin_sve_reinterpret_s32_u64_x4(__VA_ARGS__) #define svreinterpret_s32_f16_x4(...) __builtin_sve_reinterpret_s32_f16_x4(__VA_ARGS__) #define svreinterpret_s32_bf16_x4(...) __builtin_sve_reinterpret_s32_bf16_x4(__VA_ARGS__) #define svreinterpret_s32_f32_x4(...) __builtin_sve_reinterpret_s32_f32_x4(__VA_ARGS__) #define svreinterpret_s32_f64_x4(...) __builtin_sve_reinterpret_s32_f64_x4(__VA_ARGS__) #define svreinterpret_u32_s8_x4(...) __builtin_sve_reinterpret_u32_s8_x4(__VA_ARGS__) #define svreinterpret_u32_u8_x4(...) __builtin_sve_reinterpret_u32_u8_x4(__VA_ARGS__) #define svreinterpret_u32_s16_x4(...) __builtin_sve_reinterpret_u32_s16_x4(__VA_ARGS__) #define svreinterpret_u32_u16_x4(...) __builtin_sve_reinterpret_u32_u16_x4(__VA_ARGS__) #define svreinterpret_u32_s32_x4(...) __builtin_sve_reinterpret_u32_s32_x4(__VA_ARGS__) #define svreinterpret_u32_u32_x4(...) __builtin_sve_reinterpret_u32_u32_x4(__VA_ARGS__) #define svreinterpret_u32_s64_x4(...) __builtin_sve_reinterpret_u32_s64_x4(__VA_ARGS__) #define svreinterpret_u32_u64_x4(...) __builtin_sve_reinterpret_u32_u64_x4(__VA_ARGS__) #define svreinterpret_u32_f16_x4(...) __builtin_sve_reinterpret_u32_f16_x4(__VA_ARGS__) #define svreinterpret_u32_bf16_x4(...) __builtin_sve_reinterpret_u32_bf16_x4(__VA_ARGS__) #define svreinterpret_u32_f32_x4(...) __builtin_sve_reinterpret_u32_f32_x4(__VA_ARGS__) #define svreinterpret_u32_f64_x4(...) __builtin_sve_reinterpret_u32_f64_x4(__VA_ARGS__) #define svreinterpret_s64_s8_x4(...) __builtin_sve_reinterpret_s64_s8_x4(__VA_ARGS__) #define svreinterpret_s64_u8_x4(...) __builtin_sve_reinterpret_s64_u8_x4(__VA_ARGS__) #define svreinterpret_s64_s16_x4(...) __builtin_sve_reinterpret_s64_s16_x4(__VA_ARGS__) #define svreinterpret_s64_u16_x4(...) __builtin_sve_reinterpret_s64_u16_x4(__VA_ARGS__) #define svreinterpret_s64_s32_x4(...) __builtin_sve_reinterpret_s64_s32_x4(__VA_ARGS__) #define svreinterpret_s64_u32_x4(...) __builtin_sve_reinterpret_s64_u32_x4(__VA_ARGS__) #define svreinterpret_s64_s64_x4(...) __builtin_sve_reinterpret_s64_s64_x4(__VA_ARGS__) #define svreinterpret_s64_u64_x4(...) __builtin_sve_reinterpret_s64_u64_x4(__VA_ARGS__) #define svreinterpret_s64_f16_x4(...) __builtin_sve_reinterpret_s64_f16_x4(__VA_ARGS__) #define svreinterpret_s64_bf16_x4(...) __builtin_sve_reinterpret_s64_bf16_x4(__VA_ARGS__) #define svreinterpret_s64_f32_x4(...) __builtin_sve_reinterpret_s64_f32_x4(__VA_ARGS__) #define svreinterpret_s64_f64_x4(...) __builtin_sve_reinterpret_s64_f64_x4(__VA_ARGS__) #define svreinterpret_u64_s8_x4(...) __builtin_sve_reinterpret_u64_s8_x4(__VA_ARGS__) #define svreinterpret_u64_u8_x4(...) __builtin_sve_reinterpret_u64_u8_x4(__VA_ARGS__) #define svreinterpret_u64_s16_x4(...) __builtin_sve_reinterpret_u64_s16_x4(__VA_ARGS__) #define svreinterpret_u64_u16_x4(...) __builtin_sve_reinterpret_u64_u16_x4(__VA_ARGS__) #define svreinterpret_u64_s32_x4(...) __builtin_sve_reinterpret_u64_s32_x4(__VA_ARGS__) #define svreinterpret_u64_u32_x4(...) __builtin_sve_reinterpret_u64_u32_x4(__VA_ARGS__) #define svreinterpret_u64_s64_x4(...) __builtin_sve_reinterpret_u64_s64_x4(__VA_ARGS__) #define svreinterpret_u64_u64_x4(...) __builtin_sve_reinterpret_u64_u64_x4(__VA_ARGS__) #define svreinterpret_u64_f16_x4(...) __builtin_sve_reinterpret_u64_f16_x4(__VA_ARGS__) #define svreinterpret_u64_bf16_x4(...) __builtin_sve_reinterpret_u64_bf16_x4(__VA_ARGS__) #define svreinterpret_u64_f32_x4(...) __builtin_sve_reinterpret_u64_f32_x4(__VA_ARGS__) #define svreinterpret_u64_f64_x4(...) __builtin_sve_reinterpret_u64_f64_x4(__VA_ARGS__) #define svreinterpret_f16_s8_x4(...) __builtin_sve_reinterpret_f16_s8_x4(__VA_ARGS__) #define svreinterpret_f16_u8_x4(...) __builtin_sve_reinterpret_f16_u8_x4(__VA_ARGS__) #define svreinterpret_f16_s16_x4(...) __builtin_sve_reinterpret_f16_s16_x4(__VA_ARGS__) #define svreinterpret_f16_u16_x4(...) __builtin_sve_reinterpret_f16_u16_x4(__VA_ARGS__) #define svreinterpret_f16_s32_x4(...) __builtin_sve_reinterpret_f16_s32_x4(__VA_ARGS__) #define svreinterpret_f16_u32_x4(...) __builtin_sve_reinterpret_f16_u32_x4(__VA_ARGS__) #define svreinterpret_f16_s64_x4(...) __builtin_sve_reinterpret_f16_s64_x4(__VA_ARGS__) #define svreinterpret_f16_u64_x4(...) __builtin_sve_reinterpret_f16_u64_x4(__VA_ARGS__) #define svreinterpret_f16_f16_x4(...) __builtin_sve_reinterpret_f16_f16_x4(__VA_ARGS__) #define svreinterpret_f16_bf16_x4(...) __builtin_sve_reinterpret_f16_bf16_x4(__VA_ARGS__) #define svreinterpret_f16_f32_x4(...) __builtin_sve_reinterpret_f16_f32_x4(__VA_ARGS__) #define svreinterpret_f16_f64_x4(...) __builtin_sve_reinterpret_f16_f64_x4(__VA_ARGS__) #define svreinterpret_bf16_s8_x4(...) __builtin_sve_reinterpret_bf16_s8_x4(__VA_ARGS__) #define svreinterpret_bf16_u8_x4(...) __builtin_sve_reinterpret_bf16_u8_x4(__VA_ARGS__) #define svreinterpret_bf16_s16_x4(...) __builtin_sve_reinterpret_bf16_s16_x4(__VA_ARGS__) #define svreinterpret_bf16_u16_x4(...) __builtin_sve_reinterpret_bf16_u16_x4(__VA_ARGS__) #define svreinterpret_bf16_s32_x4(...) __builtin_sve_reinterpret_bf16_s32_x4(__VA_ARGS__) #define svreinterpret_bf16_u32_x4(...) __builtin_sve_reinterpret_bf16_u32_x4(__VA_ARGS__) #define svreinterpret_bf16_s64_x4(...) __builtin_sve_reinterpret_bf16_s64_x4(__VA_ARGS__) #define svreinterpret_bf16_u64_x4(...) __builtin_sve_reinterpret_bf16_u64_x4(__VA_ARGS__) #define svreinterpret_bf16_f16_x4(...) __builtin_sve_reinterpret_bf16_f16_x4(__VA_ARGS__) #define svreinterpret_bf16_bf16_x4(...) __builtin_sve_reinterpret_bf16_bf16_x4(__VA_ARGS__) #define svreinterpret_bf16_f32_x4(...) __builtin_sve_reinterpret_bf16_f32_x4(__VA_ARGS__) #define svreinterpret_bf16_f64_x4(...) __builtin_sve_reinterpret_bf16_f64_x4(__VA_ARGS__) #define svreinterpret_f32_s8_x4(...) __builtin_sve_reinterpret_f32_s8_x4(__VA_ARGS__) #define svreinterpret_f32_u8_x4(...) __builtin_sve_reinterpret_f32_u8_x4(__VA_ARGS__) #define svreinterpret_f32_s16_x4(...) __builtin_sve_reinterpret_f32_s16_x4(__VA_ARGS__) #define svreinterpret_f32_u16_x4(...) __builtin_sve_reinterpret_f32_u16_x4(__VA_ARGS__) #define svreinterpret_f32_s32_x4(...) __builtin_sve_reinterpret_f32_s32_x4(__VA_ARGS__) #define svreinterpret_f32_u32_x4(...) __builtin_sve_reinterpret_f32_u32_x4(__VA_ARGS__) #define svreinterpret_f32_s64_x4(...) __builtin_sve_reinterpret_f32_s64_x4(__VA_ARGS__) #define svreinterpret_f32_u64_x4(...) __builtin_sve_reinterpret_f32_u64_x4(__VA_ARGS__) #define svreinterpret_f32_f16_x4(...) __builtin_sve_reinterpret_f32_f16_x4(__VA_ARGS__) #define svreinterpret_f32_bf16_x4(...) __builtin_sve_reinterpret_f32_bf16_x4(__VA_ARGS__) #define svreinterpret_f32_f32_x4(...) __builtin_sve_reinterpret_f32_f32_x4(__VA_ARGS__) #define svreinterpret_f32_f64_x4(...) __builtin_sve_reinterpret_f32_f64_x4(__VA_ARGS__) #define svreinterpret_f64_s8_x4(...) __builtin_sve_reinterpret_f64_s8_x4(__VA_ARGS__) #define svreinterpret_f64_u8_x4(...) __builtin_sve_reinterpret_f64_u8_x4(__VA_ARGS__) #define svreinterpret_f64_s16_x4(...) __builtin_sve_reinterpret_f64_s16_x4(__VA_ARGS__) #define svreinterpret_f64_u16_x4(...) __builtin_sve_reinterpret_f64_u16_x4(__VA_ARGS__) #define svreinterpret_f64_s32_x4(...) __builtin_sve_reinterpret_f64_s32_x4(__VA_ARGS__) #define svreinterpret_f64_u32_x4(...) __builtin_sve_reinterpret_f64_u32_x4(__VA_ARGS__) #define svreinterpret_f64_s64_x4(...) __builtin_sve_reinterpret_f64_s64_x4(__VA_ARGS__) #define svreinterpret_f64_u64_x4(...) __builtin_sve_reinterpret_f64_u64_x4(__VA_ARGS__) #define svreinterpret_f64_f16_x4(...) __builtin_sve_reinterpret_f64_f16_x4(__VA_ARGS__) #define svreinterpret_f64_bf16_x4(...) __builtin_sve_reinterpret_f64_bf16_x4(__VA_ARGS__) #define svreinterpret_f64_f32_x4(...) __builtin_sve_reinterpret_f64_f32_x4(__VA_ARGS__) #define svreinterpret_f64_f64_x4(...) __builtin_sve_reinterpret_f64_f64_x4(__VA_ARGS__) __aio __attribute__((target("sve"))) svint8x4_t svreinterpret_s8(svint8x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s8_s8_x4(op); } __aio __attribute__((target("sve"))) svint8x4_t svreinterpret_s8(svuint8x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s8_u8_x4(op); } __aio __attribute__((target("sve"))) svint8x4_t svreinterpret_s8(svint16x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s8_s16_x4(op); } __aio __attribute__((target("sve"))) svint8x4_t svreinterpret_s8(svuint16x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s8_u16_x4(op); } __aio __attribute__((target("sve"))) svint8x4_t svreinterpret_s8(svint32x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s8_s32_x4(op); } __aio __attribute__((target("sve"))) svint8x4_t svreinterpret_s8(svuint32x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s8_u32_x4(op); } __aio __attribute__((target("sve"))) svint8x4_t svreinterpret_s8(svint64x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s8_s64_x4(op); } __aio __attribute__((target("sve"))) svint8x4_t svreinterpret_s8(svuint64x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s8_u64_x4(op); } __aio __attribute__((target("sve"))) svint8x4_t svreinterpret_s8(svfloat16x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s8_f16_x4(op); } __aio __attribute__((target("sve"))) svint8x4_t svreinterpret_s8(svbfloat16x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s8_bf16_x4(op); } __aio __attribute__((target("sve"))) svint8x4_t svreinterpret_s8(svfloat32x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s8_f32_x4(op); } __aio __attribute__((target("sve"))) svint8x4_t svreinterpret_s8(svfloat64x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s8_f64_x4(op); } __aio __attribute__((target("sve"))) svuint8x4_t svreinterpret_u8(svint8x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u8_s8_x4(op); } __aio __attribute__((target("sve"))) svuint8x4_t svreinterpret_u8(svuint8x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u8_u8_x4(op); } __aio __attribute__((target("sve"))) svuint8x4_t svreinterpret_u8(svint16x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u8_s16_x4(op); } __aio __attribute__((target("sve"))) svuint8x4_t svreinterpret_u8(svuint16x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u8_u16_x4(op); } __aio __attribute__((target("sve"))) svuint8x4_t svreinterpret_u8(svint32x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u8_s32_x4(op); } __aio __attribute__((target("sve"))) svuint8x4_t svreinterpret_u8(svuint32x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u8_u32_x4(op); } __aio __attribute__((target("sve"))) svuint8x4_t svreinterpret_u8(svint64x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u8_s64_x4(op); } __aio __attribute__((target("sve"))) svuint8x4_t svreinterpret_u8(svuint64x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u8_u64_x4(op); } __aio __attribute__((target("sve"))) svuint8x4_t svreinterpret_u8(svfloat16x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u8_f16_x4(op); } __aio __attribute__((target("sve"))) svuint8x4_t svreinterpret_u8(svbfloat16x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u8_bf16_x4(op); } __aio __attribute__((target("sve"))) svuint8x4_t svreinterpret_u8(svfloat32x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u8_f32_x4(op); } __aio __attribute__((target("sve"))) svuint8x4_t svreinterpret_u8(svfloat64x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u8_f64_x4(op); } __aio __attribute__((target("sve"))) svint16x4_t svreinterpret_s16(svint8x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s16_s8_x4(op); } __aio __attribute__((target("sve"))) svint16x4_t svreinterpret_s16(svuint8x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s16_u8_x4(op); } __aio __attribute__((target("sve"))) svint16x4_t svreinterpret_s16(svint16x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s16_s16_x4(op); } __aio __attribute__((target("sve"))) svint16x4_t svreinterpret_s16(svuint16x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s16_u16_x4(op); } __aio __attribute__((target("sve"))) svint16x4_t svreinterpret_s16(svint32x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s16_s32_x4(op); } __aio __attribute__((target("sve"))) svint16x4_t svreinterpret_s16(svuint32x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s16_u32_x4(op); } __aio __attribute__((target("sve"))) svint16x4_t svreinterpret_s16(svint64x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s16_s64_x4(op); } __aio __attribute__((target("sve"))) svint16x4_t svreinterpret_s16(svuint64x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s16_u64_x4(op); } __aio __attribute__((target("sve"))) svint16x4_t svreinterpret_s16(svfloat16x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s16_f16_x4(op); } __aio __attribute__((target("sve"))) svint16x4_t svreinterpret_s16(svbfloat16x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s16_bf16_x4(op); } __aio __attribute__((target("sve"))) svint16x4_t svreinterpret_s16(svfloat32x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s16_f32_x4(op); } __aio __attribute__((target("sve"))) svint16x4_t svreinterpret_s16(svfloat64x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s16_f64_x4(op); } __aio __attribute__((target("sve"))) svuint16x4_t svreinterpret_u16(svint8x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u16_s8_x4(op); } __aio __attribute__((target("sve"))) svuint16x4_t svreinterpret_u16(svuint8x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u16_u8_x4(op); } __aio __attribute__((target("sve"))) svuint16x4_t svreinterpret_u16(svint16x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u16_s16_x4(op); } __aio __attribute__((target("sve"))) svuint16x4_t svreinterpret_u16(svuint16x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u16_u16_x4(op); } __aio __attribute__((target("sve"))) svuint16x4_t svreinterpret_u16(svint32x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u16_s32_x4(op); } __aio __attribute__((target("sve"))) svuint16x4_t svreinterpret_u16(svuint32x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u16_u32_x4(op); } __aio __attribute__((target("sve"))) svuint16x4_t svreinterpret_u16(svint64x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u16_s64_x4(op); } __aio __attribute__((target("sve"))) svuint16x4_t svreinterpret_u16(svuint64x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u16_u64_x4(op); } __aio __attribute__((target("sve"))) svuint16x4_t svreinterpret_u16(svfloat16x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u16_f16_x4(op); } __aio __attribute__((target("sve"))) svuint16x4_t svreinterpret_u16(svbfloat16x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u16_bf16_x4(op); } __aio __attribute__((target("sve"))) svuint16x4_t svreinterpret_u16(svfloat32x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u16_f32_x4(op); } __aio __attribute__((target("sve"))) svuint16x4_t svreinterpret_u16(svfloat64x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u16_f64_x4(op); } __aio __attribute__((target("sve"))) svint32x4_t svreinterpret_s32(svint8x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s32_s8_x4(op); } __aio __attribute__((target("sve"))) svint32x4_t svreinterpret_s32(svuint8x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s32_u8_x4(op); } __aio __attribute__((target("sve"))) svint32x4_t svreinterpret_s32(svint16x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s32_s16_x4(op); } __aio __attribute__((target("sve"))) svint32x4_t svreinterpret_s32(svuint16x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s32_u16_x4(op); } __aio __attribute__((target("sve"))) svint32x4_t svreinterpret_s32(svint32x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s32_s32_x4(op); } __aio __attribute__((target("sve"))) svint32x4_t svreinterpret_s32(svuint32x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s32_u32_x4(op); } __aio __attribute__((target("sve"))) svint32x4_t svreinterpret_s32(svint64x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s32_s64_x4(op); } __aio __attribute__((target("sve"))) svint32x4_t svreinterpret_s32(svuint64x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s32_u64_x4(op); } __aio __attribute__((target("sve"))) svint32x4_t svreinterpret_s32(svfloat16x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s32_f16_x4(op); } __aio __attribute__((target("sve"))) svint32x4_t svreinterpret_s32(svbfloat16x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s32_bf16_x4(op); } __aio __attribute__((target("sve"))) svint32x4_t svreinterpret_s32(svfloat32x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s32_f32_x4(op); } __aio __attribute__((target("sve"))) svint32x4_t svreinterpret_s32(svfloat64x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s32_f64_x4(op); } __aio __attribute__((target("sve"))) svuint32x4_t svreinterpret_u32(svint8x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u32_s8_x4(op); } __aio __attribute__((target("sve"))) svuint32x4_t svreinterpret_u32(svuint8x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u32_u8_x4(op); } __aio __attribute__((target("sve"))) svuint32x4_t svreinterpret_u32(svint16x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u32_s16_x4(op); } __aio __attribute__((target("sve"))) svuint32x4_t svreinterpret_u32(svuint16x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u32_u16_x4(op); } __aio __attribute__((target("sve"))) svuint32x4_t svreinterpret_u32(svint32x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u32_s32_x4(op); } __aio __attribute__((target("sve"))) svuint32x4_t svreinterpret_u32(svuint32x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u32_u32_x4(op); } __aio __attribute__((target("sve"))) svuint32x4_t svreinterpret_u32(svint64x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u32_s64_x4(op); } __aio __attribute__((target("sve"))) svuint32x4_t svreinterpret_u32(svuint64x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u32_u64_x4(op); } __aio __attribute__((target("sve"))) svuint32x4_t svreinterpret_u32(svfloat16x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u32_f16_x4(op); } __aio __attribute__((target("sve"))) svuint32x4_t svreinterpret_u32(svbfloat16x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u32_bf16_x4(op); } __aio __attribute__((target("sve"))) svuint32x4_t svreinterpret_u32(svfloat32x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u32_f32_x4(op); } __aio __attribute__((target("sve"))) svuint32x4_t svreinterpret_u32(svfloat64x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u32_f64_x4(op); } __aio __attribute__((target("sve"))) svint64x4_t svreinterpret_s64(svint8x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s64_s8_x4(op); } __aio __attribute__((target("sve"))) svint64x4_t svreinterpret_s64(svuint8x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s64_u8_x4(op); } __aio __attribute__((target("sve"))) svint64x4_t svreinterpret_s64(svint16x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s64_s16_x4(op); } __aio __attribute__((target("sve"))) svint64x4_t svreinterpret_s64(svuint16x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s64_u16_x4(op); } __aio __attribute__((target("sve"))) svint64x4_t svreinterpret_s64(svint32x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s64_s32_x4(op); } __aio __attribute__((target("sve"))) svint64x4_t svreinterpret_s64(svuint32x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s64_u32_x4(op); } __aio __attribute__((target("sve"))) svint64x4_t svreinterpret_s64(svint64x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s64_s64_x4(op); } __aio __attribute__((target("sve"))) svint64x4_t svreinterpret_s64(svuint64x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s64_u64_x4(op); } __aio __attribute__((target("sve"))) svint64x4_t svreinterpret_s64(svfloat16x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s64_f16_x4(op); } __aio __attribute__((target("sve"))) svint64x4_t svreinterpret_s64(svbfloat16x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s64_bf16_x4(op); } __aio __attribute__((target("sve"))) svint64x4_t svreinterpret_s64(svfloat32x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s64_f32_x4(op); } __aio __attribute__((target("sve"))) svint64x4_t svreinterpret_s64(svfloat64x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s64_f64_x4(op); } __aio __attribute__((target("sve"))) svuint64x4_t svreinterpret_u64(svint8x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u64_s8_x4(op); } __aio __attribute__((target("sve"))) svuint64x4_t svreinterpret_u64(svuint8x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u64_u8_x4(op); } __aio __attribute__((target("sve"))) svuint64x4_t svreinterpret_u64(svint16x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u64_s16_x4(op); } __aio __attribute__((target("sve"))) svuint64x4_t svreinterpret_u64(svuint16x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u64_u16_x4(op); } __aio __attribute__((target("sve"))) svuint64x4_t svreinterpret_u64(svint32x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u64_s32_x4(op); } __aio __attribute__((target("sve"))) svuint64x4_t svreinterpret_u64(svuint32x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u64_u32_x4(op); } __aio __attribute__((target("sve"))) svuint64x4_t svreinterpret_u64(svint64x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u64_s64_x4(op); } __aio __attribute__((target("sve"))) svuint64x4_t svreinterpret_u64(svuint64x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u64_u64_x4(op); } __aio __attribute__((target("sve"))) svuint64x4_t svreinterpret_u64(svfloat16x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u64_f16_x4(op); } __aio __attribute__((target("sve"))) svuint64x4_t svreinterpret_u64(svbfloat16x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u64_bf16_x4(op); } __aio __attribute__((target("sve"))) svuint64x4_t svreinterpret_u64(svfloat32x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u64_f32_x4(op); } __aio __attribute__((target("sve"))) svuint64x4_t svreinterpret_u64(svfloat64x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u64_f64_x4(op); } __aio __attribute__((target("sve"))) svfloat16x4_t svreinterpret_f16(svint8x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f16_s8_x4(op); } __aio __attribute__((target("sve"))) svfloat16x4_t svreinterpret_f16(svuint8x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f16_u8_x4(op); } __aio __attribute__((target("sve"))) svfloat16x4_t svreinterpret_f16(svint16x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f16_s16_x4(op); } __aio __attribute__((target("sve"))) svfloat16x4_t svreinterpret_f16(svuint16x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f16_u16_x4(op); } __aio __attribute__((target("sve"))) svfloat16x4_t svreinterpret_f16(svint32x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f16_s32_x4(op); } __aio __attribute__((target("sve"))) svfloat16x4_t svreinterpret_f16(svuint32x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f16_u32_x4(op); } __aio __attribute__((target("sve"))) svfloat16x4_t svreinterpret_f16(svint64x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f16_s64_x4(op); } __aio __attribute__((target("sve"))) svfloat16x4_t svreinterpret_f16(svuint64x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f16_u64_x4(op); } __aio __attribute__((target("sve"))) svfloat16x4_t svreinterpret_f16(svfloat16x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f16_f16_x4(op); } __aio __attribute__((target("sve"))) svfloat16x4_t svreinterpret_f16(svbfloat16x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f16_bf16_x4(op); } __aio __attribute__((target("sve"))) svfloat16x4_t svreinterpret_f16(svfloat32x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f16_f32_x4(op); } __aio __attribute__((target("sve"))) svfloat16x4_t svreinterpret_f16(svfloat64x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f16_f64_x4(op); } __aio __attribute__((target("sve"))) svbfloat16x4_t svreinterpret_bf16(svint8x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_bf16_s8_x4(op); } __aio __attribute__((target("sve"))) svbfloat16x4_t svreinterpret_bf16(svuint8x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_bf16_u8_x4(op); } __aio __attribute__((target("sve"))) svbfloat16x4_t svreinterpret_bf16(svint16x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_bf16_s16_x4(op); } __aio __attribute__((target("sve"))) svbfloat16x4_t svreinterpret_bf16(svuint16x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_bf16_u16_x4(op); } __aio __attribute__((target("sve"))) svbfloat16x4_t svreinterpret_bf16(svint32x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_bf16_s32_x4(op); } __aio __attribute__((target("sve"))) svbfloat16x4_t svreinterpret_bf16(svuint32x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_bf16_u32_x4(op); } __aio __attribute__((target("sve"))) svbfloat16x4_t svreinterpret_bf16(svint64x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_bf16_s64_x4(op); } __aio __attribute__((target("sve"))) svbfloat16x4_t svreinterpret_bf16(svuint64x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_bf16_u64_x4(op); } __aio __attribute__((target("sve"))) svbfloat16x4_t svreinterpret_bf16(svfloat16x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_bf16_f16_x4(op); } __aio __attribute__((target("sve"))) svbfloat16x4_t svreinterpret_bf16(svbfloat16x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_bf16_bf16_x4(op); } __aio __attribute__((target("sve"))) svbfloat16x4_t svreinterpret_bf16(svfloat32x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_bf16_f32_x4(op); } __aio __attribute__((target("sve"))) svbfloat16x4_t svreinterpret_bf16(svfloat64x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_bf16_f64_x4(op); } __aio __attribute__((target("sve"))) svfloat32x4_t svreinterpret_f32(svint8x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f32_s8_x4(op); } __aio __attribute__((target("sve"))) svfloat32x4_t svreinterpret_f32(svuint8x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f32_u8_x4(op); } __aio __attribute__((target("sve"))) svfloat32x4_t svreinterpret_f32(svint16x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f32_s16_x4(op); } __aio __attribute__((target("sve"))) svfloat32x4_t svreinterpret_f32(svuint16x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f32_u16_x4(op); } __aio __attribute__((target("sve"))) svfloat32x4_t svreinterpret_f32(svint32x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f32_s32_x4(op); } __aio __attribute__((target("sve"))) svfloat32x4_t svreinterpret_f32(svuint32x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f32_u32_x4(op); } __aio __attribute__((target("sve"))) svfloat32x4_t svreinterpret_f32(svint64x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f32_s64_x4(op); } __aio __attribute__((target("sve"))) svfloat32x4_t svreinterpret_f32(svuint64x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f32_u64_x4(op); } __aio __attribute__((target("sve"))) svfloat32x4_t svreinterpret_f32(svfloat16x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f32_f16_x4(op); } __aio __attribute__((target("sve"))) svfloat32x4_t svreinterpret_f32(svbfloat16x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f32_bf16_x4(op); } __aio __attribute__((target("sve"))) svfloat32x4_t svreinterpret_f32(svfloat32x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f32_f32_x4(op); } __aio __attribute__((target("sve"))) svfloat32x4_t svreinterpret_f32(svfloat64x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f32_f64_x4(op); } __aio __attribute__((target("sve"))) svfloat64x4_t svreinterpret_f64(svint8x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f64_s8_x4(op); } __aio __attribute__((target("sve"))) svfloat64x4_t svreinterpret_f64(svuint8x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f64_u8_x4(op); } __aio __attribute__((target("sve"))) svfloat64x4_t svreinterpret_f64(svint16x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f64_s16_x4(op); } __aio __attribute__((target("sve"))) svfloat64x4_t svreinterpret_f64(svuint16x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f64_u16_x4(op); } __aio __attribute__((target("sve"))) svfloat64x4_t svreinterpret_f64(svint32x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f64_s32_x4(op); } __aio __attribute__((target("sve"))) svfloat64x4_t svreinterpret_f64(svuint32x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f64_u32_x4(op); } __aio __attribute__((target("sve"))) svfloat64x4_t svreinterpret_f64(svint64x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f64_s64_x4(op); } __aio __attribute__((target("sve"))) svfloat64x4_t svreinterpret_f64(svuint64x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f64_u64_x4(op); } __aio __attribute__((target("sve"))) svfloat64x4_t svreinterpret_f64(svfloat16x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f64_f16_x4(op); } __aio __attribute__((target("sve"))) svfloat64x4_t svreinterpret_f64(svbfloat16x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f64_bf16_x4(op); } __aio __attribute__((target("sve"))) svfloat64x4_t svreinterpret_f64(svfloat32x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f64_f32_x4(op); } __aio __attribute__((target("sve"))) svfloat64x4_t svreinterpret_f64(svfloat64x4_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f64_f64_x4(op); } __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_bf16_m))) svbfloat16_t svadd_n_bf16_m(svbool_t, svbfloat16_t, bfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_bf16_x))) svbfloat16_t svadd_n_bf16_x(svbool_t, svbfloat16_t, bfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_bf16_z))) svbfloat16_t svadd_n_bf16_z(svbool_t, svbfloat16_t, bfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_bf16_m))) svbfloat16_t svadd_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_bf16_x))) svbfloat16_t svadd_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_bf16_z))) svbfloat16_t svadd_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_bf16))) svbfloat16_t svclamp_bf16(svbfloat16_t, svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_bf16_m))) svbfloat16_t svmax_n_bf16_m(svbool_t, svbfloat16_t, bfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_bf16_x))) svbfloat16_t svmax_n_bf16_x(svbool_t, svbfloat16_t, bfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_bf16_z))) svbfloat16_t svmax_n_bf16_z(svbool_t, svbfloat16_t, bfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_bf16_m))) svbfloat16_t svmax_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_bf16_x))) svbfloat16_t svmax_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_bf16_z))) svbfloat16_t svmax_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_bf16_m))) svbfloat16_t svmaxnm_n_bf16_m(svbool_t, svbfloat16_t, bfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_bf16_x))) svbfloat16_t svmaxnm_n_bf16_x(svbool_t, svbfloat16_t, bfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_bf16_z))) svbfloat16_t svmaxnm_n_bf16_z(svbool_t, svbfloat16_t, bfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_bf16_m))) svbfloat16_t svmaxnm_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_bf16_x))) svbfloat16_t svmaxnm_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_bf16_z))) svbfloat16_t svmaxnm_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_bf16_m))) svbfloat16_t svmin_n_bf16_m(svbool_t, svbfloat16_t, bfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_bf16_x))) svbfloat16_t svmin_n_bf16_x(svbool_t, svbfloat16_t, bfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_bf16_z))) svbfloat16_t svmin_n_bf16_z(svbool_t, svbfloat16_t, bfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_bf16_m))) svbfloat16_t svmin_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_bf16_x))) svbfloat16_t svmin_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_bf16_z))) svbfloat16_t svmin_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_bf16_m))) svbfloat16_t svminnm_n_bf16_m(svbool_t, svbfloat16_t, bfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_bf16_x))) svbfloat16_t svminnm_n_bf16_x(svbool_t, svbfloat16_t, bfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_bf16_z))) svbfloat16_t svminnm_n_bf16_z(svbool_t, svbfloat16_t, bfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_bf16_m))) svbfloat16_t svminnm_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_bf16_x))) svbfloat16_t svminnm_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_bf16_z))) svbfloat16_t svminnm_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_bf16_m))) svbfloat16_t svmla_n_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_bf16_x))) svbfloat16_t svmla_n_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_bf16_z))) svbfloat16_t svmla_n_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_bf16_m))) svbfloat16_t svmla_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_bf16_x))) svbfloat16_t svmla_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_bf16_z))) svbfloat16_t svmla_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_lane_bf16))) svbfloat16_t svmla_lane_bf16(svbfloat16_t, svbfloat16_t, svbfloat16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_bf16_m))) svbfloat16_t svmls_n_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_bf16_x))) svbfloat16_t svmls_n_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_bf16_z))) svbfloat16_t svmls_n_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_bf16_m))) svbfloat16_t svmls_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_bf16_x))) svbfloat16_t svmls_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_bf16_z))) svbfloat16_t svmls_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_lane_bf16))) svbfloat16_t svmls_lane_bf16(svbfloat16_t, svbfloat16_t, svbfloat16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_bf16_m))) svbfloat16_t svmul_n_bf16_m(svbool_t, svbfloat16_t, bfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_bf16_x))) svbfloat16_t svmul_n_bf16_x(svbool_t, svbfloat16_t, bfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_bf16_z))) svbfloat16_t svmul_n_bf16_z(svbool_t, svbfloat16_t, bfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_bf16_m))) svbfloat16_t svmul_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_bf16_x))) svbfloat16_t svmul_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_bf16_z))) svbfloat16_t svmul_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_lane_bf16))) svbfloat16_t svmul_lane_bf16(svbfloat16_t, svbfloat16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_bf16_m))) svbfloat16_t svsub_n_bf16_m(svbool_t, svbfloat16_t, bfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_bf16_x))) svbfloat16_t svsub_n_bf16_x(svbool_t, svbfloat16_t, bfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_bf16_z))) svbfloat16_t svsub_n_bf16_z(svbool_t, svbfloat16_t, bfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_bf16_m))) svbfloat16_t svsub_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_bf16_x))) svbfloat16_t svsub_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_bf16_z))) svbfloat16_t svsub_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_bf16_m))) svbfloat16_t svadd_m(svbool_t, svbfloat16_t, bfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_bf16_x))) svbfloat16_t svadd_x(svbool_t, svbfloat16_t, bfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_bf16_z))) svbfloat16_t svadd_z(svbool_t, svbfloat16_t, bfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_bf16_m))) svbfloat16_t svadd_m(svbool_t, svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_bf16_x))) svbfloat16_t svadd_x(svbool_t, svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_bf16_z))) svbfloat16_t svadd_z(svbool_t, svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_bf16))) svbfloat16_t svclamp(svbfloat16_t, svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_bf16_m))) svbfloat16_t svmax_m(svbool_t, svbfloat16_t, bfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_bf16_x))) svbfloat16_t svmax_x(svbool_t, svbfloat16_t, bfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_bf16_z))) svbfloat16_t svmax_z(svbool_t, svbfloat16_t, bfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_bf16_m))) svbfloat16_t svmax_m(svbool_t, svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_bf16_x))) svbfloat16_t svmax_x(svbool_t, svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_bf16_z))) svbfloat16_t svmax_z(svbool_t, svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_bf16_m))) svbfloat16_t svmaxnm_m(svbool_t, svbfloat16_t, bfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_bf16_x))) svbfloat16_t svmaxnm_x(svbool_t, svbfloat16_t, bfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_bf16_z))) svbfloat16_t svmaxnm_z(svbool_t, svbfloat16_t, bfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_bf16_m))) svbfloat16_t svmaxnm_m(svbool_t, svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_bf16_x))) svbfloat16_t svmaxnm_x(svbool_t, svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_bf16_z))) svbfloat16_t svmaxnm_z(svbool_t, svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_bf16_m))) svbfloat16_t svmin_m(svbool_t, svbfloat16_t, bfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_bf16_x))) svbfloat16_t svmin_x(svbool_t, svbfloat16_t, bfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_bf16_z))) svbfloat16_t svmin_z(svbool_t, svbfloat16_t, bfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_bf16_m))) svbfloat16_t svmin_m(svbool_t, svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_bf16_x))) svbfloat16_t svmin_x(svbool_t, svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_bf16_z))) svbfloat16_t svmin_z(svbool_t, svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_bf16_m))) svbfloat16_t svminnm_m(svbool_t, svbfloat16_t, bfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_bf16_x))) svbfloat16_t svminnm_x(svbool_t, svbfloat16_t, bfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_bf16_z))) svbfloat16_t svminnm_z(svbool_t, svbfloat16_t, bfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_bf16_m))) svbfloat16_t svminnm_m(svbool_t, svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_bf16_x))) svbfloat16_t svminnm_x(svbool_t, svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_bf16_z))) svbfloat16_t svminnm_z(svbool_t, svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_bf16_m))) svbfloat16_t svmla_m(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_bf16_x))) svbfloat16_t svmla_x(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_bf16_z))) svbfloat16_t svmla_z(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_bf16_m))) svbfloat16_t svmla_m(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_bf16_x))) svbfloat16_t svmla_x(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_bf16_z))) svbfloat16_t svmla_z(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_lane_bf16))) svbfloat16_t svmla_lane(svbfloat16_t, svbfloat16_t, svbfloat16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_bf16_m))) svbfloat16_t svmls_m(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_bf16_x))) svbfloat16_t svmls_x(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_bf16_z))) svbfloat16_t svmls_z(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_bf16_m))) svbfloat16_t svmls_m(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_bf16_x))) svbfloat16_t svmls_x(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_bf16_z))) svbfloat16_t svmls_z(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_lane_bf16))) svbfloat16_t svmls_lane(svbfloat16_t, svbfloat16_t, svbfloat16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_bf16_m))) svbfloat16_t svmul_m(svbool_t, svbfloat16_t, bfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_bf16_x))) svbfloat16_t svmul_x(svbool_t, svbfloat16_t, bfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_bf16_z))) svbfloat16_t svmul_z(svbool_t, svbfloat16_t, bfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_bf16_m))) svbfloat16_t svmul_m(svbool_t, svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_bf16_x))) svbfloat16_t svmul_x(svbool_t, svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_bf16_z))) svbfloat16_t svmul_z(svbool_t, svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_lane_bf16))) svbfloat16_t svmul_lane(svbfloat16_t, svbfloat16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_bf16_m))) svbfloat16_t svsub_m(svbool_t, svbfloat16_t, bfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_bf16_x))) svbfloat16_t svsub_x(svbool_t, svbfloat16_t, bfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_bf16_z))) svbfloat16_t svsub_z(svbool_t, svbfloat16_t, bfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_bf16_m))) svbfloat16_t svsub_m(svbool_t, svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_bf16_x))) svbfloat16_t svsub_x(svbool_t, svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_bf16_z))) svbfloat16_t svsub_z(svbool_t, svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_bf16))) svbfloat16_t svdup_laneq_bf16(svbfloat16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_bf16))) svbfloat16_t svdup_laneq(svbfloat16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_u8_x2))) svuint8x2_t svadd_single_u8_x2(svuint8x2_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_u32_x2))) svuint32x2_t svadd_single_u32_x2(svuint32x2_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_u64_x2))) svuint64x2_t svadd_single_u64_x2(svuint64x2_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_u16_x2))) svuint16x2_t svadd_single_u16_x2(svuint16x2_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_s8_x2))) svint8x2_t svadd_single_s8_x2(svint8x2_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_s32_x2))) svint32x2_t svadd_single_s32_x2(svint32x2_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_s64_x2))) svint64x2_t svadd_single_s64_x2(svint64x2_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_s16_x2))) svint16x2_t svadd_single_s16_x2(svint16x2_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_u8_x4))) svuint8x4_t svadd_single_u8_x4(svuint8x4_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_u32_x4))) svuint32x4_t svadd_single_u32_x4(svuint32x4_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_u64_x4))) svuint64x4_t svadd_single_u64_x4(svuint64x4_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_u16_x4))) svuint16x4_t svadd_single_u16_x4(svuint16x4_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_s8_x4))) svint8x4_t svadd_single_s8_x4(svint8x4_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_s32_x4))) svint32x4_t svadd_single_s32_x4(svint32x4_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_s64_x4))) svint64x4_t svadd_single_s64_x4(svint64x4_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_s16_x4))) svint16x4_t svadd_single_s16_x4(svint16x4_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_f64_x2))) svfloat64x2_t svclamp_single_f64_x2(svfloat64x2_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_f32_x2))) svfloat32x2_t svclamp_single_f32_x2(svfloat32x2_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_f16_x2))) svfloat16x2_t svclamp_single_f16_x2(svfloat16x2_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_s8_x2))) svint8x2_t svclamp_single_s8_x2(svint8x2_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_s32_x2))) svint32x2_t svclamp_single_s32_x2(svint32x2_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_s64_x2))) svint64x2_t svclamp_single_s64_x2(svint64x2_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_s16_x2))) svint16x2_t svclamp_single_s16_x2(svint16x2_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_u8_x2))) svuint8x2_t svclamp_single_u8_x2(svuint8x2_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_u32_x2))) svuint32x2_t svclamp_single_u32_x2(svuint32x2_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_u64_x2))) svuint64x2_t svclamp_single_u64_x2(svuint64x2_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_u16_x2))) svuint16x2_t svclamp_single_u16_x2(svuint16x2_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_f64_x4))) svfloat64x4_t svclamp_single_f64_x4(svfloat64x4_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_f32_x4))) svfloat32x4_t svclamp_single_f32_x4(svfloat32x4_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_f16_x4))) svfloat16x4_t svclamp_single_f16_x4(svfloat16x4_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_s8_x4))) svint8x4_t svclamp_single_s8_x4(svint8x4_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_s32_x4))) svint32x4_t svclamp_single_s32_x4(svint32x4_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_s64_x4))) svint64x4_t svclamp_single_s64_x4(svint64x4_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_s16_x4))) svint16x4_t svclamp_single_s16_x4(svint16x4_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_u8_x4))) svuint8x4_t svclamp_single_u8_x4(svuint8x4_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_u32_x4))) svuint32x4_t svclamp_single_u32_x4(svuint32x4_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_u64_x4))) svuint64x4_t svclamp_single_u64_x4(svuint64x4_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_u16_x4))) svuint16x4_t svclamp_single_u16_x4(svuint16x4_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_bf16_f32_x2))) svbfloat16_t svcvt_bf16_f32_x2(svfloat32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_f32_x2))) svfloat16_t svcvt_f16_f32_x2(svfloat32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s32_f32_x2))) svint32x2_t svcvt_s32_f32_x2(svfloat32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u32_f32_x2))) svuint32x2_t svcvt_u32_f32_x2(svfloat32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s32_f32_x4))) svint32x4_t svcvt_s32_f32_x4(svfloat32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u32_f32_x4))) svuint32x4_t svcvt_u32_f32_x4(svfloat32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_s32_x2))) svfloat32x2_t svcvt_f32_s32_x2(svint32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_s32_x4))) svfloat32x4_t svcvt_f32_s32_x4(svint32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_u32_x2))) svfloat32x2_t svcvt_f32_u32_x2(svuint32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_u32_x4))) svfloat32x4_t svcvt_f32_u32_x4(svuint32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtn_bf16_f32_x2))) svbfloat16_t svcvtn_bf16_f32_x2(svfloat32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtn_f16_f32_x2))) svfloat16_t svcvtn_f16_f32_x2(svfloat32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_f64_x2))) svfloat64x2_t svmax_single_f64_x2(svfloat64x2_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_f32_x2))) svfloat32x2_t svmax_single_f32_x2(svfloat32x2_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_f16_x2))) svfloat16x2_t svmax_single_f16_x2(svfloat16x2_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_s8_x2))) svint8x2_t svmax_single_s8_x2(svint8x2_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_s32_x2))) svint32x2_t svmax_single_s32_x2(svint32x2_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_s64_x2))) svint64x2_t svmax_single_s64_x2(svint64x2_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_s16_x2))) svint16x2_t svmax_single_s16_x2(svint16x2_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_u8_x2))) svuint8x2_t svmax_single_u8_x2(svuint8x2_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_u32_x2))) svuint32x2_t svmax_single_u32_x2(svuint32x2_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_u64_x2))) svuint64x2_t svmax_single_u64_x2(svuint64x2_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_u16_x2))) svuint16x2_t svmax_single_u16_x2(svuint16x2_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_f64_x4))) svfloat64x4_t svmax_single_f64_x4(svfloat64x4_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_f32_x4))) svfloat32x4_t svmax_single_f32_x4(svfloat32x4_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_f16_x4))) svfloat16x4_t svmax_single_f16_x4(svfloat16x4_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_s8_x4))) svint8x4_t svmax_single_s8_x4(svint8x4_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_s32_x4))) svint32x4_t svmax_single_s32_x4(svint32x4_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_s64_x4))) svint64x4_t svmax_single_s64_x4(svint64x4_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_s16_x4))) svint16x4_t svmax_single_s16_x4(svint16x4_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_u8_x4))) svuint8x4_t svmax_single_u8_x4(svuint8x4_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_u32_x4))) svuint32x4_t svmax_single_u32_x4(svuint32x4_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_u64_x4))) svuint64x4_t svmax_single_u64_x4(svuint64x4_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_u16_x4))) svuint16x4_t svmax_single_u16_x4(svuint16x4_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_f64_x2))) svfloat64x2_t svmax_f64_x2(svfloat64x2_t, svfloat64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_f32_x2))) svfloat32x2_t svmax_f32_x2(svfloat32x2_t, svfloat32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_f16_x2))) svfloat16x2_t svmax_f16_x2(svfloat16x2_t, svfloat16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s8_x2))) svint8x2_t svmax_s8_x2(svint8x2_t, svint8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s32_x2))) svint32x2_t svmax_s32_x2(svint32x2_t, svint32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s64_x2))) svint64x2_t svmax_s64_x2(svint64x2_t, svint64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s16_x2))) svint16x2_t svmax_s16_x2(svint16x2_t, svint16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u8_x2))) svuint8x2_t svmax_u8_x2(svuint8x2_t, svuint8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u32_x2))) svuint32x2_t svmax_u32_x2(svuint32x2_t, svuint32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u64_x2))) svuint64x2_t svmax_u64_x2(svuint64x2_t, svuint64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u16_x2))) svuint16x2_t svmax_u16_x2(svuint16x2_t, svuint16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_f64_x4))) svfloat64x4_t svmax_f64_x4(svfloat64x4_t, svfloat64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_f32_x4))) svfloat32x4_t svmax_f32_x4(svfloat32x4_t, svfloat32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_f16_x4))) svfloat16x4_t svmax_f16_x4(svfloat16x4_t, svfloat16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s8_x4))) svint8x4_t svmax_s8_x4(svint8x4_t, svint8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s32_x4))) svint32x4_t svmax_s32_x4(svint32x4_t, svint32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s64_x4))) svint64x4_t svmax_s64_x4(svint64x4_t, svint64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s16_x4))) svint16x4_t svmax_s16_x4(svint16x4_t, svint16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u8_x4))) svuint8x4_t svmax_u8_x4(svuint8x4_t, svuint8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u32_x4))) svuint32x4_t svmax_u32_x4(svuint32x4_t, svuint32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u64_x4))) svuint64x4_t svmax_u64_x4(svuint64x4_t, svuint64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u16_x4))) svuint16x4_t svmax_u16_x4(svuint16x4_t, svuint16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_single_f64_x2))) svfloat64x2_t svmaxnm_single_f64_x2(svfloat64x2_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_single_f32_x2))) svfloat32x2_t svmaxnm_single_f32_x2(svfloat32x2_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_single_f16_x2))) svfloat16x2_t svmaxnm_single_f16_x2(svfloat16x2_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_single_f64_x4))) svfloat64x4_t svmaxnm_single_f64_x4(svfloat64x4_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_single_f32_x4))) svfloat32x4_t svmaxnm_single_f32_x4(svfloat32x4_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_single_f16_x4))) svfloat16x4_t svmaxnm_single_f16_x4(svfloat16x4_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_f64_x2))) svfloat64x2_t svmaxnm_f64_x2(svfloat64x2_t, svfloat64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_f32_x2))) svfloat32x2_t svmaxnm_f32_x2(svfloat32x2_t, svfloat32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_f16_x2))) svfloat16x2_t svmaxnm_f16_x2(svfloat16x2_t, svfloat16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_f64_x4))) svfloat64x4_t svmaxnm_f64_x4(svfloat64x4_t, svfloat64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_f32_x4))) svfloat32x4_t svmaxnm_f32_x4(svfloat32x4_t, svfloat32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_f16_x4))) svfloat16x4_t svmaxnm_f16_x4(svfloat16x4_t, svfloat16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_f64_x2))) svfloat64x2_t svmin_single_f64_x2(svfloat64x2_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_f32_x2))) svfloat32x2_t svmin_single_f32_x2(svfloat32x2_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_f16_x2))) svfloat16x2_t svmin_single_f16_x2(svfloat16x2_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_s8_x2))) svint8x2_t svmin_single_s8_x2(svint8x2_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_s32_x2))) svint32x2_t svmin_single_s32_x2(svint32x2_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_s64_x2))) svint64x2_t svmin_single_s64_x2(svint64x2_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_s16_x2))) svint16x2_t svmin_single_s16_x2(svint16x2_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_u8_x2))) svuint8x2_t svmin_single_u8_x2(svuint8x2_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_u32_x2))) svuint32x2_t svmin_single_u32_x2(svuint32x2_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_u64_x2))) svuint64x2_t svmin_single_u64_x2(svuint64x2_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_u16_x2))) svuint16x2_t svmin_single_u16_x2(svuint16x2_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_f64_x4))) svfloat64x4_t svmin_single_f64_x4(svfloat64x4_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_f32_x4))) svfloat32x4_t svmin_single_f32_x4(svfloat32x4_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_f16_x4))) svfloat16x4_t svmin_single_f16_x4(svfloat16x4_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_s8_x4))) svint8x4_t svmin_single_s8_x4(svint8x4_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_s32_x4))) svint32x4_t svmin_single_s32_x4(svint32x4_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_s64_x4))) svint64x4_t svmin_single_s64_x4(svint64x4_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_s16_x4))) svint16x4_t svmin_single_s16_x4(svint16x4_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_u8_x4))) svuint8x4_t svmin_single_u8_x4(svuint8x4_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_u32_x4))) svuint32x4_t svmin_single_u32_x4(svuint32x4_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_u64_x4))) svuint64x4_t svmin_single_u64_x4(svuint64x4_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_u16_x4))) svuint16x4_t svmin_single_u16_x4(svuint16x4_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_f64_x2))) svfloat64x2_t svmin_f64_x2(svfloat64x2_t, svfloat64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_f32_x2))) svfloat32x2_t svmin_f32_x2(svfloat32x2_t, svfloat32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_f16_x2))) svfloat16x2_t svmin_f16_x2(svfloat16x2_t, svfloat16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s8_x2))) svint8x2_t svmin_s8_x2(svint8x2_t, svint8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s32_x2))) svint32x2_t svmin_s32_x2(svint32x2_t, svint32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s64_x2))) svint64x2_t svmin_s64_x2(svint64x2_t, svint64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s16_x2))) svint16x2_t svmin_s16_x2(svint16x2_t, svint16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u8_x2))) svuint8x2_t svmin_u8_x2(svuint8x2_t, svuint8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u32_x2))) svuint32x2_t svmin_u32_x2(svuint32x2_t, svuint32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u64_x2))) svuint64x2_t svmin_u64_x2(svuint64x2_t, svuint64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u16_x2))) svuint16x2_t svmin_u16_x2(svuint16x2_t, svuint16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_f64_x4))) svfloat64x4_t svmin_f64_x4(svfloat64x4_t, svfloat64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_f32_x4))) svfloat32x4_t svmin_f32_x4(svfloat32x4_t, svfloat32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_f16_x4))) svfloat16x4_t svmin_f16_x4(svfloat16x4_t, svfloat16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s8_x4))) svint8x4_t svmin_s8_x4(svint8x4_t, svint8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s32_x4))) svint32x4_t svmin_s32_x4(svint32x4_t, svint32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s64_x4))) svint64x4_t svmin_s64_x4(svint64x4_t, svint64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s16_x4))) svint16x4_t svmin_s16_x4(svint16x4_t, svint16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u8_x4))) svuint8x4_t svmin_u8_x4(svuint8x4_t, svuint8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u32_x4))) svuint32x4_t svmin_u32_x4(svuint32x4_t, svuint32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u64_x4))) svuint64x4_t svmin_u64_x4(svuint64x4_t, svuint64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u16_x4))) svuint16x4_t svmin_u16_x4(svuint16x4_t, svuint16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_single_f64_x2))) svfloat64x2_t svminnm_single_f64_x2(svfloat64x2_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_single_f32_x2))) svfloat32x2_t svminnm_single_f32_x2(svfloat32x2_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_single_f16_x2))) svfloat16x2_t svminnm_single_f16_x2(svfloat16x2_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_single_f64_x4))) svfloat64x4_t svminnm_single_f64_x4(svfloat64x4_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_single_f32_x4))) svfloat32x4_t svminnm_single_f32_x4(svfloat32x4_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_single_f16_x4))) svfloat16x4_t svminnm_single_f16_x4(svfloat16x4_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_f64_x2))) svfloat64x2_t svminnm_f64_x2(svfloat64x2_t, svfloat64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_f32_x2))) svfloat32x2_t svminnm_f32_x2(svfloat32x2_t, svfloat32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_f16_x2))) svfloat16x2_t svminnm_f16_x2(svfloat16x2_t, svfloat16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_f64_x4))) svfloat64x4_t svminnm_f64_x4(svfloat64x4_t, svfloat64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_f32_x4))) svfloat32x4_t svminnm_f32_x4(svfloat32x4_t, svfloat32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_f16_x4))) svfloat16x4_t svminnm_f16_x4(svfloat16x4_t, svfloat16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvt_s16_s32_x2))) svint16_t svqcvt_s16_s32_x2(svint32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvt_s16_s64_x4))) svint16_t svqcvt_s16_s64_x4(svint64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvt_s8_s32_x4))) svint8_t svqcvt_s8_s32_x4(svint32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvt_u16_s32_x2))) svuint16_t svqcvt_u16_s32_x2(svint32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvt_u16_u32_x2))) svuint16_t svqcvt_u16_u32_x2(svuint32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvt_u16_s64_x4))) svuint16_t svqcvt_u16_s64_x4(svint64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvt_u16_u64_x4))) svuint16_t svqcvt_u16_u64_x4(svuint64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvt_u8_s32_x4))) svuint8_t svqcvt_u8_s32_x4(svint32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvt_u8_u32_x4))) svuint8_t svqcvt_u8_u32_x4(svuint32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvtn_s16_s64_x4))) svint16_t svqcvtn_s16_s64_x4(svint64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvtn_s8_s32_x4))) svint8_t svqcvtn_s8_s32_x4(svint32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvtn_u16_s64_x4))) svuint16_t svqcvtn_u16_s64_x4(svint64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvtn_u16_u64_x4))) svuint16_t svqcvtn_u16_u64_x4(svuint64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvtn_u8_s32_x4))) svuint8_t svqcvtn_u8_s32_x4(svint32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvtn_u8_u32_x4))) svuint8_t svqcvtn_u8_u32_x4(svuint32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_single_s8_x2))) svint8x2_t svqdmulh_single_s8_x2(svint8x2_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_single_s32_x2))) svint32x2_t svqdmulh_single_s32_x2(svint32x2_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_single_s64_x2))) svint64x2_t svqdmulh_single_s64_x2(svint64x2_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_single_s16_x2))) svint16x2_t svqdmulh_single_s16_x2(svint16x2_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_single_s8_x4))) svint8x4_t svqdmulh_single_s8_x4(svint8x4_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_single_s32_x4))) svint32x4_t svqdmulh_single_s32_x4(svint32x4_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_single_s64_x4))) svint64x4_t svqdmulh_single_s64_x4(svint64x4_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_single_s16_x4))) svint16x4_t svqdmulh_single_s16_x4(svint16x4_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_s8_x2))) svint8x2_t svqdmulh_s8_x2(svint8x2_t, svint8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_s32_x2))) svint32x2_t svqdmulh_s32_x2(svint32x2_t, svint32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_s64_x2))) svint64x2_t svqdmulh_s64_x2(svint64x2_t, svint64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_s16_x2))) svint16x2_t svqdmulh_s16_x2(svint16x2_t, svint16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_s8_x4))) svint8x4_t svqdmulh_s8_x4(svint8x4_t, svint8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_s32_x4))) svint32x4_t svqdmulh_s32_x4(svint32x4_t, svint32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_s64_x4))) svint64x4_t svqdmulh_s64_x4(svint64x4_t, svint64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_s16_x4))) svint16x4_t svqdmulh_s16_x4(svint16x4_t, svint16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshr_n_s16_s32_x2))) svint16_t svqrshr_n_s16_s32_x2(svint32x2_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshr_n_u16_u32_x2))) svuint16_t svqrshr_n_u16_u32_x2(svuint32x2_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshr_n_s8_s32_x4))) svint8_t svqrshr_n_s8_s32_x4(svint32x4_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshr_n_s16_s64_x4))) svint16_t svqrshr_n_s16_s64_x4(svint64x4_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshr_n_u8_u32_x4))) svuint8_t svqrshr_n_u8_u32_x4(svuint32x4_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshr_n_u16_u64_x4))) svuint16_t svqrshr_n_u16_u64_x4(svuint64x4_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrn_n_s8_s32_x4))) svint8_t svqrshrn_n_s8_s32_x4(svint32x4_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrn_n_s16_s64_x4))) svint16_t svqrshrn_n_s16_s64_x4(svint64x4_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrn_n_u8_u32_x4))) svuint8_t svqrshrn_n_u8_u32_x4(svuint32x4_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrn_n_u16_u64_x4))) svuint16_t svqrshrn_n_u16_u64_x4(svuint64x4_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshru_n_u16_s32_x2))) svuint16_t svqrshru_n_u16_s32_x2(svint32x2_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshru_n_u8_s32_x4))) svuint8_t svqrshru_n_u8_s32_x4(svint32x4_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshru_n_u16_s64_x4))) svuint16_t svqrshru_n_u16_s64_x4(svint64x4_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrun_n_u8_s32_x4))) svuint8_t svqrshrun_n_u8_s32_x4(svint32x4_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrun_n_u16_s64_x4))) svuint16_t svqrshrun_n_u16_s64_x4(svint64x4_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svreinterpret_b))) svbool_t svreinterpret_b(svcount_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svreinterpret_c))) svcount_t svreinterpret_c(svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinta_f32_x2))) svfloat32x2_t svrinta_f32_x2(svfloat32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinta_f32_x4))) svfloat32x4_t svrinta_f32_x4(svfloat32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintm_f32_x2))) svfloat32x2_t svrintm_f32_x2(svfloat32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintm_f32_x4))) svfloat32x4_t svrintm_f32_x4(svfloat32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintn_f32_x2))) svfloat32x2_t svrintn_f32_x2(svfloat32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintn_f32_x4))) svfloat32x4_t svrintn_f32_x4(svfloat32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintp_f32_x2))) svfloat32x2_t svrintp_f32_x2(svfloat32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintp_f32_x4))) svfloat32x4_t svrintp_f32_x4(svfloat32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_s8_x2))) svint8x2_t svrshl_single_s8_x2(svint8x2_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_s32_x2))) svint32x2_t svrshl_single_s32_x2(svint32x2_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_s64_x2))) svint64x2_t svrshl_single_s64_x2(svint64x2_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_s16_x2))) svint16x2_t svrshl_single_s16_x2(svint16x2_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_u8_x2))) svuint8x2_t svrshl_single_u8_x2(svuint8x2_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_u32_x2))) svuint32x2_t svrshl_single_u32_x2(svuint32x2_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_u64_x2))) svuint64x2_t svrshl_single_u64_x2(svuint64x2_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_u16_x2))) svuint16x2_t svrshl_single_u16_x2(svuint16x2_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_s8_x4))) svint8x4_t svrshl_single_s8_x4(svint8x4_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_s32_x4))) svint32x4_t svrshl_single_s32_x4(svint32x4_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_s64_x4))) svint64x4_t svrshl_single_s64_x4(svint64x4_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_s16_x4))) svint16x4_t svrshl_single_s16_x4(svint16x4_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_u8_x4))) svuint8x4_t svrshl_single_u8_x4(svuint8x4_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_u32_x4))) svuint32x4_t svrshl_single_u32_x4(svuint32x4_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_u64_x4))) svuint64x4_t svrshl_single_u64_x4(svuint64x4_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_u16_x4))) svuint16x4_t svrshl_single_u16_x4(svuint16x4_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s8_x2))) svint8x2_t svrshl_s8_x2(svint8x2_t, svint8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s32_x2))) svint32x2_t svrshl_s32_x2(svint32x2_t, svint32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s64_x2))) svint64x2_t svrshl_s64_x2(svint64x2_t, svint64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s16_x2))) svint16x2_t svrshl_s16_x2(svint16x2_t, svint16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u8_x2))) svuint8x2_t svrshl_u8_x2(svuint8x2_t, svuint8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u32_x2))) svuint32x2_t svrshl_u32_x2(svuint32x2_t, svuint32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u64_x2))) svuint64x2_t svrshl_u64_x2(svuint64x2_t, svuint64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u16_x2))) svuint16x2_t svrshl_u16_x2(svuint16x2_t, svuint16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s8_x4))) svint8x4_t svrshl_s8_x4(svint8x4_t, svint8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s32_x4))) svint32x4_t svrshl_s32_x4(svint32x4_t, svint32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s64_x4))) svint64x4_t svrshl_s64_x4(svint64x4_t, svint64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s16_x4))) svint16x4_t svrshl_s16_x4(svint16x4_t, svint16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u8_x4))) svuint8x4_t svrshl_u8_x4(svuint8x4_t, svuint8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u32_x4))) svuint32x4_t svrshl_u32_x4(svuint32x4_t, svuint32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u64_x4))) svuint64x4_t svrshl_u64_x4(svuint64x4_t, svuint64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u16_x4))) svuint16x4_t svrshl_u16_x4(svuint16x4_t, svuint16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_u8_x2))) svuint8x2_t svsel_u8_x2(svcount_t, svuint8x2_t, svuint8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_u32_x2))) svuint32x2_t svsel_u32_x2(svcount_t, svuint32x2_t, svuint32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_u64_x2))) svuint64x2_t svsel_u64_x2(svcount_t, svuint64x2_t, svuint64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_u16_x2))) svuint16x2_t svsel_u16_x2(svcount_t, svuint16x2_t, svuint16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_bf16_x2))) svbfloat16x2_t svsel_bf16_x2(svcount_t, svbfloat16x2_t, svbfloat16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s8_x2))) svint8x2_t svsel_s8_x2(svcount_t, svint8x2_t, svint8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_f64_x2))) svfloat64x2_t svsel_f64_x2(svcount_t, svfloat64x2_t, svfloat64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_f32_x2))) svfloat32x2_t svsel_f32_x2(svcount_t, svfloat32x2_t, svfloat32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_f16_x2))) svfloat16x2_t svsel_f16_x2(svcount_t, svfloat16x2_t, svfloat16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s32_x2))) svint32x2_t svsel_s32_x2(svcount_t, svint32x2_t, svint32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s64_x2))) svint64x2_t svsel_s64_x2(svcount_t, svint64x2_t, svint64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s16_x2))) svint16x2_t svsel_s16_x2(svcount_t, svint16x2_t, svint16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_u8_x4))) svuint8x4_t svsel_u8_x4(svcount_t, svuint8x4_t, svuint8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_u32_x4))) svuint32x4_t svsel_u32_x4(svcount_t, svuint32x4_t, svuint32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_u64_x4))) svuint64x4_t svsel_u64_x4(svcount_t, svuint64x4_t, svuint64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_u16_x4))) svuint16x4_t svsel_u16_x4(svcount_t, svuint16x4_t, svuint16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_bf16_x4))) svbfloat16x4_t svsel_bf16_x4(svcount_t, svbfloat16x4_t, svbfloat16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s8_x4))) svint8x4_t svsel_s8_x4(svcount_t, svint8x4_t, svint8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_f64_x4))) svfloat64x4_t svsel_f64_x4(svcount_t, svfloat64x4_t, svfloat64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_f32_x4))) svfloat32x4_t svsel_f32_x4(svcount_t, svfloat32x4_t, svfloat32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_f16_x4))) svfloat16x4_t svsel_f16_x4(svcount_t, svfloat16x4_t, svfloat16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s32_x4))) svint32x4_t svsel_s32_x4(svcount_t, svint32x4_t, svint32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s64_x4))) svint64x4_t svsel_s64_x4(svcount_t, svint64x4_t, svint64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s16_x4))) svint16x4_t svsel_s16_x4(svcount_t, svint16x4_t, svint16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpk_s32_s16_x2))) svint32x2_t svunpk_s32_s16_x2(svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpk_s64_s32_x2))) svint64x2_t svunpk_s64_s32_x2(svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpk_s16_s8_x2))) svint16x2_t svunpk_s16_s8_x2(svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpk_u32_u16_x2))) svuint32x2_t svunpk_u32_u16_x2(svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpk_u64_u32_x2))) svuint64x2_t svunpk_u64_u32_x2(svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpk_u16_u8_x2))) svuint16x2_t svunpk_u16_u8_x2(svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpk_s32_s16_x4))) svint32x4_t svunpk_s32_s16_x4(svint16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpk_s64_s32_x4))) svint64x4_t svunpk_s64_s32_x4(svint32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpk_s16_s8_x4))) svint16x4_t svunpk_s16_s8_x4(svint8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpk_u32_u16_x4))) svuint32x4_t svunpk_u32_u16_x4(svuint16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpk_u64_u32_x4))) svuint64x4_t svunpk_u64_u32_x4(svuint32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpk_u16_u8_x4))) svuint16x4_t svunpk_u16_u8_x4(svuint8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_u8_x2))) svuint8x2_t svuzp_u8_x2(svuint8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_u32_x2))) svuint32x2_t svuzp_u32_x2(svuint32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_u64_x2))) svuint64x2_t svuzp_u64_x2(svuint64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_u16_x2))) svuint16x2_t svuzp_u16_x2(svuint16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_bf16_x2))) svbfloat16x2_t svuzp_bf16_x2(svbfloat16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_s8_x2))) svint8x2_t svuzp_s8_x2(svint8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_f64_x2))) svfloat64x2_t svuzp_f64_x2(svfloat64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_f32_x2))) svfloat32x2_t svuzp_f32_x2(svfloat32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_f16_x2))) svfloat16x2_t svuzp_f16_x2(svfloat16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_s32_x2))) svint32x2_t svuzp_s32_x2(svint32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_s64_x2))) svint64x2_t svuzp_s64_x2(svint64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_s16_x2))) svint16x2_t svuzp_s16_x2(svint16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_u8_x4))) svuint8x4_t svuzp_u8_x4(svuint8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_u32_x4))) svuint32x4_t svuzp_u32_x4(svuint32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_u64_x4))) svuint64x4_t svuzp_u64_x4(svuint64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_u16_x4))) svuint16x4_t svuzp_u16_x4(svuint16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_bf16_x4))) svbfloat16x4_t svuzp_bf16_x4(svbfloat16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_s8_x4))) svint8x4_t svuzp_s8_x4(svint8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_f64_x4))) svfloat64x4_t svuzp_f64_x4(svfloat64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_f32_x4))) svfloat32x4_t svuzp_f32_x4(svfloat32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_f16_x4))) svfloat16x4_t svuzp_f16_x4(svfloat16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_s32_x4))) svint32x4_t svuzp_s32_x4(svint32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_s64_x4))) svint64x4_t svuzp_s64_x4(svint64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_s16_x4))) svint16x4_t svuzp_s16_x4(svint16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_u8_x2))) svuint8x2_t svuzpq_u8_x2(svuint8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_u32_x2))) svuint32x2_t svuzpq_u32_x2(svuint32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_u64_x2))) svuint64x2_t svuzpq_u64_x2(svuint64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_u16_x2))) svuint16x2_t svuzpq_u16_x2(svuint16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_bf16_x2))) svbfloat16x2_t svuzpq_bf16_x2(svbfloat16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_s8_x2))) svint8x2_t svuzpq_s8_x2(svint8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_f64_x2))) svfloat64x2_t svuzpq_f64_x2(svfloat64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_f32_x2))) svfloat32x2_t svuzpq_f32_x2(svfloat32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_f16_x2))) svfloat16x2_t svuzpq_f16_x2(svfloat16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_s32_x2))) svint32x2_t svuzpq_s32_x2(svint32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_s64_x2))) svint64x2_t svuzpq_s64_x2(svint64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_s16_x2))) svint16x2_t svuzpq_s16_x2(svint16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_u8_x4))) svuint8x4_t svuzpq_u8_x4(svuint8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_u32_x4))) svuint32x4_t svuzpq_u32_x4(svuint32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_u64_x4))) svuint64x4_t svuzpq_u64_x4(svuint64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_u16_x4))) svuint16x4_t svuzpq_u16_x4(svuint16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_bf16_x4))) svbfloat16x4_t svuzpq_bf16_x4(svbfloat16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_s8_x4))) svint8x4_t svuzpq_s8_x4(svint8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_f64_x4))) svfloat64x4_t svuzpq_f64_x4(svfloat64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_f32_x4))) svfloat32x4_t svuzpq_f32_x4(svfloat32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_f16_x4))) svfloat16x4_t svuzpq_f16_x4(svfloat16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_s32_x4))) svint32x4_t svuzpq_s32_x4(svint32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_s64_x4))) svint64x4_t svuzpq_s64_x4(svint64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_s16_x4))) svint16x4_t svuzpq_s16_x4(svint16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_u8_x2))) svuint8x2_t svzip_u8_x2(svuint8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_u32_x2))) svuint32x2_t svzip_u32_x2(svuint32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_u64_x2))) svuint64x2_t svzip_u64_x2(svuint64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_u16_x2))) svuint16x2_t svzip_u16_x2(svuint16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_bf16_x2))) svbfloat16x2_t svzip_bf16_x2(svbfloat16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_s8_x2))) svint8x2_t svzip_s8_x2(svint8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_f64_x2))) svfloat64x2_t svzip_f64_x2(svfloat64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_f32_x2))) svfloat32x2_t svzip_f32_x2(svfloat32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_f16_x2))) svfloat16x2_t svzip_f16_x2(svfloat16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_s32_x2))) svint32x2_t svzip_s32_x2(svint32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_s64_x2))) svint64x2_t svzip_s64_x2(svint64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_s16_x2))) svint16x2_t svzip_s16_x2(svint16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_u8_x4))) svuint8x4_t svzip_u8_x4(svuint8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_u32_x4))) svuint32x4_t svzip_u32_x4(svuint32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_u64_x4))) svuint64x4_t svzip_u64_x4(svuint64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_u16_x4))) svuint16x4_t svzip_u16_x4(svuint16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_bf16_x4))) svbfloat16x4_t svzip_bf16_x4(svbfloat16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_s8_x4))) svint8x4_t svzip_s8_x4(svint8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_f64_x4))) svfloat64x4_t svzip_f64_x4(svfloat64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_f32_x4))) svfloat32x4_t svzip_f32_x4(svfloat32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_f16_x4))) svfloat16x4_t svzip_f16_x4(svfloat16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_s32_x4))) svint32x4_t svzip_s32_x4(svint32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_s64_x4))) svint64x4_t svzip_s64_x4(svint64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_s16_x4))) svint16x4_t svzip_s16_x4(svint16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_u8_x2))) svuint8x2_t svzipq_u8_x2(svuint8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_u32_x2))) svuint32x2_t svzipq_u32_x2(svuint32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_u64_x2))) svuint64x2_t svzipq_u64_x2(svuint64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_u16_x2))) svuint16x2_t svzipq_u16_x2(svuint16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_bf16_x2))) svbfloat16x2_t svzipq_bf16_x2(svbfloat16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_s8_x2))) svint8x2_t svzipq_s8_x2(svint8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_f64_x2))) svfloat64x2_t svzipq_f64_x2(svfloat64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_f32_x2))) svfloat32x2_t svzipq_f32_x2(svfloat32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_f16_x2))) svfloat16x2_t svzipq_f16_x2(svfloat16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_s32_x2))) svint32x2_t svzipq_s32_x2(svint32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_s64_x2))) svint64x2_t svzipq_s64_x2(svint64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_s16_x2))) svint16x2_t svzipq_s16_x2(svint16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_u8_x4))) svuint8x4_t svzipq_u8_x4(svuint8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_u32_x4))) svuint32x4_t svzipq_u32_x4(svuint32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_u64_x4))) svuint64x4_t svzipq_u64_x4(svuint64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_u16_x4))) svuint16x4_t svzipq_u16_x4(svuint16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_bf16_x4))) svbfloat16x4_t svzipq_bf16_x4(svbfloat16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_s8_x4))) svint8x4_t svzipq_s8_x4(svint8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_f64_x4))) svfloat64x4_t svzipq_f64_x4(svfloat64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_f32_x4))) svfloat32x4_t svzipq_f32_x4(svfloat32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_f16_x4))) svfloat16x4_t svzipq_f16_x4(svfloat16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_s32_x4))) svint32x4_t svzipq_s32_x4(svint32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_s64_x4))) svint64x4_t svzipq_s64_x4(svint64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_s16_x4))) svint16x4_t svzipq_s16_x4(svint16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_u8_x2))) svuint8x2_t svadd(svuint8x2_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_u32_x2))) svuint32x2_t svadd(svuint32x2_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_u64_x2))) svuint64x2_t svadd(svuint64x2_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_u16_x2))) svuint16x2_t svadd(svuint16x2_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_s8_x2))) svint8x2_t svadd(svint8x2_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_s32_x2))) svint32x2_t svadd(svint32x2_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_s64_x2))) svint64x2_t svadd(svint64x2_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_s16_x2))) svint16x2_t svadd(svint16x2_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_u8_x4))) svuint8x4_t svadd(svuint8x4_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_u32_x4))) svuint32x4_t svadd(svuint32x4_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_u64_x4))) svuint64x4_t svadd(svuint64x4_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_u16_x4))) svuint16x4_t svadd(svuint16x4_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_s8_x4))) svint8x4_t svadd(svint8x4_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_s32_x4))) svint32x4_t svadd(svint32x4_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_s64_x4))) svint64x4_t svadd(svint64x4_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_s16_x4))) svint16x4_t svadd(svint16x4_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_f64_x2))) svfloat64x2_t svclamp(svfloat64x2_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_f32_x2))) svfloat32x2_t svclamp(svfloat32x2_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_f16_x2))) svfloat16x2_t svclamp(svfloat16x2_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_s8_x2))) svint8x2_t svclamp(svint8x2_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_s32_x2))) svint32x2_t svclamp(svint32x2_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_s64_x2))) svint64x2_t svclamp(svint64x2_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_s16_x2))) svint16x2_t svclamp(svint16x2_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_u8_x2))) svuint8x2_t svclamp(svuint8x2_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_u32_x2))) svuint32x2_t svclamp(svuint32x2_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_u64_x2))) svuint64x2_t svclamp(svuint64x2_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_u16_x2))) svuint16x2_t svclamp(svuint16x2_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_f64_x4))) svfloat64x4_t svclamp(svfloat64x4_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_f32_x4))) svfloat32x4_t svclamp(svfloat32x4_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_f16_x4))) svfloat16x4_t svclamp(svfloat16x4_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_s8_x4))) svint8x4_t svclamp(svint8x4_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_s32_x4))) svint32x4_t svclamp(svint32x4_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_s64_x4))) svint64x4_t svclamp(svint64x4_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_s16_x4))) svint16x4_t svclamp(svint16x4_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_u8_x4))) svuint8x4_t svclamp(svuint8x4_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_u32_x4))) svuint32x4_t svclamp(svuint32x4_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_u64_x4))) svuint64x4_t svclamp(svuint64x4_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_u16_x4))) svuint16x4_t svclamp(svuint16x4_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_bf16_f32_x2))) svbfloat16_t svcvt_bf16(svfloat32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_f32_x2))) svfloat16_t svcvt_f16(svfloat32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s32_f32_x2))) svint32x2_t svcvt_s32(svfloat32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u32_f32_x2))) svuint32x2_t svcvt_u32(svfloat32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s32_f32_x4))) svint32x4_t svcvt_s32(svfloat32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u32_f32_x4))) svuint32x4_t svcvt_u32(svfloat32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_s32_x2))) svfloat32x2_t svcvt_f32(svint32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_s32_x4))) svfloat32x4_t svcvt_f32(svint32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_u32_x2))) svfloat32x2_t svcvt_f32(svuint32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_u32_x4))) svfloat32x4_t svcvt_f32(svuint32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtn_bf16_f32_x2))) svbfloat16_t svcvtn_bf16(svfloat32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtn_f16_f32_x2))) svfloat16_t svcvtn_f16(svfloat32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_f64_x2))) svfloat64x2_t svmax(svfloat64x2_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_f32_x2))) svfloat32x2_t svmax(svfloat32x2_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_f16_x2))) svfloat16x2_t svmax(svfloat16x2_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_s8_x2))) svint8x2_t svmax(svint8x2_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_s32_x2))) svint32x2_t svmax(svint32x2_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_s64_x2))) svint64x2_t svmax(svint64x2_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_s16_x2))) svint16x2_t svmax(svint16x2_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_u8_x2))) svuint8x2_t svmax(svuint8x2_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_u32_x2))) svuint32x2_t svmax(svuint32x2_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_u64_x2))) svuint64x2_t svmax(svuint64x2_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_u16_x2))) svuint16x2_t svmax(svuint16x2_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_f64_x4))) svfloat64x4_t svmax(svfloat64x4_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_f32_x4))) svfloat32x4_t svmax(svfloat32x4_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_f16_x4))) svfloat16x4_t svmax(svfloat16x4_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_s8_x4))) svint8x4_t svmax(svint8x4_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_s32_x4))) svint32x4_t svmax(svint32x4_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_s64_x4))) svint64x4_t svmax(svint64x4_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_s16_x4))) svint16x4_t svmax(svint16x4_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_u8_x4))) svuint8x4_t svmax(svuint8x4_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_u32_x4))) svuint32x4_t svmax(svuint32x4_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_u64_x4))) svuint64x4_t svmax(svuint64x4_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_u16_x4))) svuint16x4_t svmax(svuint16x4_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_f64_x2))) svfloat64x2_t svmax(svfloat64x2_t, svfloat64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_f32_x2))) svfloat32x2_t svmax(svfloat32x2_t, svfloat32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_f16_x2))) svfloat16x2_t svmax(svfloat16x2_t, svfloat16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s8_x2))) svint8x2_t svmax(svint8x2_t, svint8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s32_x2))) svint32x2_t svmax(svint32x2_t, svint32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s64_x2))) svint64x2_t svmax(svint64x2_t, svint64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s16_x2))) svint16x2_t svmax(svint16x2_t, svint16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u8_x2))) svuint8x2_t svmax(svuint8x2_t, svuint8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u32_x2))) svuint32x2_t svmax(svuint32x2_t, svuint32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u64_x2))) svuint64x2_t svmax(svuint64x2_t, svuint64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u16_x2))) svuint16x2_t svmax(svuint16x2_t, svuint16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_f64_x4))) svfloat64x4_t svmax(svfloat64x4_t, svfloat64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_f32_x4))) svfloat32x4_t svmax(svfloat32x4_t, svfloat32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_f16_x4))) svfloat16x4_t svmax(svfloat16x4_t, svfloat16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s8_x4))) svint8x4_t svmax(svint8x4_t, svint8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s32_x4))) svint32x4_t svmax(svint32x4_t, svint32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s64_x4))) svint64x4_t svmax(svint64x4_t, svint64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s16_x4))) svint16x4_t svmax(svint16x4_t, svint16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u8_x4))) svuint8x4_t svmax(svuint8x4_t, svuint8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u32_x4))) svuint32x4_t svmax(svuint32x4_t, svuint32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u64_x4))) svuint64x4_t svmax(svuint64x4_t, svuint64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u16_x4))) svuint16x4_t svmax(svuint16x4_t, svuint16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_single_f64_x2))) svfloat64x2_t svmaxnm(svfloat64x2_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_single_f32_x2))) svfloat32x2_t svmaxnm(svfloat32x2_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_single_f16_x2))) svfloat16x2_t svmaxnm(svfloat16x2_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_single_f64_x4))) svfloat64x4_t svmaxnm(svfloat64x4_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_single_f32_x4))) svfloat32x4_t svmaxnm(svfloat32x4_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_single_f16_x4))) svfloat16x4_t svmaxnm(svfloat16x4_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_f64_x2))) svfloat64x2_t svmaxnm(svfloat64x2_t, svfloat64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_f32_x2))) svfloat32x2_t svmaxnm(svfloat32x2_t, svfloat32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_f16_x2))) svfloat16x2_t svmaxnm(svfloat16x2_t, svfloat16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_f64_x4))) svfloat64x4_t svmaxnm(svfloat64x4_t, svfloat64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_f32_x4))) svfloat32x4_t svmaxnm(svfloat32x4_t, svfloat32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_f16_x4))) svfloat16x4_t svmaxnm(svfloat16x4_t, svfloat16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_f64_x2))) svfloat64x2_t svmin(svfloat64x2_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_f32_x2))) svfloat32x2_t svmin(svfloat32x2_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_f16_x2))) svfloat16x2_t svmin(svfloat16x2_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_s8_x2))) svint8x2_t svmin(svint8x2_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_s32_x2))) svint32x2_t svmin(svint32x2_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_s64_x2))) svint64x2_t svmin(svint64x2_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_s16_x2))) svint16x2_t svmin(svint16x2_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_u8_x2))) svuint8x2_t svmin(svuint8x2_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_u32_x2))) svuint32x2_t svmin(svuint32x2_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_u64_x2))) svuint64x2_t svmin(svuint64x2_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_u16_x2))) svuint16x2_t svmin(svuint16x2_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_f64_x4))) svfloat64x4_t svmin(svfloat64x4_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_f32_x4))) svfloat32x4_t svmin(svfloat32x4_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_f16_x4))) svfloat16x4_t svmin(svfloat16x4_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_s8_x4))) svint8x4_t svmin(svint8x4_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_s32_x4))) svint32x4_t svmin(svint32x4_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_s64_x4))) svint64x4_t svmin(svint64x4_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_s16_x4))) svint16x4_t svmin(svint16x4_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_u8_x4))) svuint8x4_t svmin(svuint8x4_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_u32_x4))) svuint32x4_t svmin(svuint32x4_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_u64_x4))) svuint64x4_t svmin(svuint64x4_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_u16_x4))) svuint16x4_t svmin(svuint16x4_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_f64_x2))) svfloat64x2_t svmin(svfloat64x2_t, svfloat64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_f32_x2))) svfloat32x2_t svmin(svfloat32x2_t, svfloat32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_f16_x2))) svfloat16x2_t svmin(svfloat16x2_t, svfloat16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s8_x2))) svint8x2_t svmin(svint8x2_t, svint8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s32_x2))) svint32x2_t svmin(svint32x2_t, svint32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s64_x2))) svint64x2_t svmin(svint64x2_t, svint64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s16_x2))) svint16x2_t svmin(svint16x2_t, svint16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u8_x2))) svuint8x2_t svmin(svuint8x2_t, svuint8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u32_x2))) svuint32x2_t svmin(svuint32x2_t, svuint32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u64_x2))) svuint64x2_t svmin(svuint64x2_t, svuint64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u16_x2))) svuint16x2_t svmin(svuint16x2_t, svuint16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_f64_x4))) svfloat64x4_t svmin(svfloat64x4_t, svfloat64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_f32_x4))) svfloat32x4_t svmin(svfloat32x4_t, svfloat32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_f16_x4))) svfloat16x4_t svmin(svfloat16x4_t, svfloat16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s8_x4))) svint8x4_t svmin(svint8x4_t, svint8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s32_x4))) svint32x4_t svmin(svint32x4_t, svint32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s64_x4))) svint64x4_t svmin(svint64x4_t, svint64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s16_x4))) svint16x4_t svmin(svint16x4_t, svint16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u8_x4))) svuint8x4_t svmin(svuint8x4_t, svuint8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u32_x4))) svuint32x4_t svmin(svuint32x4_t, svuint32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u64_x4))) svuint64x4_t svmin(svuint64x4_t, svuint64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u16_x4))) svuint16x4_t svmin(svuint16x4_t, svuint16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_single_f64_x2))) svfloat64x2_t svminnm(svfloat64x2_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_single_f32_x2))) svfloat32x2_t svminnm(svfloat32x2_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_single_f16_x2))) svfloat16x2_t svminnm(svfloat16x2_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_single_f64_x4))) svfloat64x4_t svminnm(svfloat64x4_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_single_f32_x4))) svfloat32x4_t svminnm(svfloat32x4_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_single_f16_x4))) svfloat16x4_t svminnm(svfloat16x4_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_f64_x2))) svfloat64x2_t svminnm(svfloat64x2_t, svfloat64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_f32_x2))) svfloat32x2_t svminnm(svfloat32x2_t, svfloat32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_f16_x2))) svfloat16x2_t svminnm(svfloat16x2_t, svfloat16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_f64_x4))) svfloat64x4_t svminnm(svfloat64x4_t, svfloat64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_f32_x4))) svfloat32x4_t svminnm(svfloat32x4_t, svfloat32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_f16_x4))) svfloat16x4_t svminnm(svfloat16x4_t, svfloat16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvt_s16_s32_x2))) svint16_t svqcvt_s16(svint32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvt_s16_s64_x4))) svint16_t svqcvt_s16(svint64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvt_s8_s32_x4))) svint8_t svqcvt_s8(svint32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvt_u16_s32_x2))) svuint16_t svqcvt_u16(svint32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvt_u16_u32_x2))) svuint16_t svqcvt_u16(svuint32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvt_u16_s64_x4))) svuint16_t svqcvt_u16(svint64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvt_u16_u64_x4))) svuint16_t svqcvt_u16(svuint64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvt_u8_s32_x4))) svuint8_t svqcvt_u8(svint32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvt_u8_u32_x4))) svuint8_t svqcvt_u8(svuint32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvtn_s16_s64_x4))) svint16_t svqcvtn_s16(svint64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvtn_s8_s32_x4))) svint8_t svqcvtn_s8(svint32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvtn_u16_s64_x4))) svuint16_t svqcvtn_u16(svint64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvtn_u16_u64_x4))) svuint16_t svqcvtn_u16(svuint64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvtn_u8_s32_x4))) svuint8_t svqcvtn_u8(svint32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvtn_u8_u32_x4))) svuint8_t svqcvtn_u8(svuint32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_single_s8_x2))) svint8x2_t svqdmulh(svint8x2_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_single_s32_x2))) svint32x2_t svqdmulh(svint32x2_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_single_s64_x2))) svint64x2_t svqdmulh(svint64x2_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_single_s16_x2))) svint16x2_t svqdmulh(svint16x2_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_single_s8_x4))) svint8x4_t svqdmulh(svint8x4_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_single_s32_x4))) svint32x4_t svqdmulh(svint32x4_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_single_s64_x4))) svint64x4_t svqdmulh(svint64x4_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_single_s16_x4))) svint16x4_t svqdmulh(svint16x4_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_s8_x2))) svint8x2_t svqdmulh(svint8x2_t, svint8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_s32_x2))) svint32x2_t svqdmulh(svint32x2_t, svint32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_s64_x2))) svint64x2_t svqdmulh(svint64x2_t, svint64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_s16_x2))) svint16x2_t svqdmulh(svint16x2_t, svint16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_s8_x4))) svint8x4_t svqdmulh(svint8x4_t, svint8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_s32_x4))) svint32x4_t svqdmulh(svint32x4_t, svint32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_s64_x4))) svint64x4_t svqdmulh(svint64x4_t, svint64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_s16_x4))) svint16x4_t svqdmulh(svint16x4_t, svint16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshr_n_s16_s32_x2))) svint16_t svqrshr_s16(svint32x2_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshr_n_u16_u32_x2))) svuint16_t svqrshr_u16(svuint32x2_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshr_n_s8_s32_x4))) svint8_t svqrshr_s8(svint32x4_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshr_n_s16_s64_x4))) svint16_t svqrshr_s16(svint64x4_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshr_n_u8_u32_x4))) svuint8_t svqrshr_u8(svuint32x4_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshr_n_u16_u64_x4))) svuint16_t svqrshr_u16(svuint64x4_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrn_n_s8_s32_x4))) svint8_t svqrshrn_s8(svint32x4_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrn_n_s16_s64_x4))) svint16_t svqrshrn_s16(svint64x4_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrn_n_u8_u32_x4))) svuint8_t svqrshrn_u8(svuint32x4_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrn_n_u16_u64_x4))) svuint16_t svqrshrn_u16(svuint64x4_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshru_n_u16_s32_x2))) svuint16_t svqrshru_u16(svint32x2_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshru_n_u8_s32_x4))) svuint8_t svqrshru_u8(svint32x4_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshru_n_u16_s64_x4))) svuint16_t svqrshru_u16(svint64x4_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrun_n_u8_s32_x4))) svuint8_t svqrshrun_u8(svint32x4_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrun_n_u16_s64_x4))) svuint16_t svqrshrun_u16(svint64x4_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svreinterpret_b))) svbool_t svreinterpret(svcount_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svreinterpret_c))) svcount_t svreinterpret(svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinta_f32_x2))) svfloat32x2_t svrinta(svfloat32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinta_f32_x4))) svfloat32x4_t svrinta(svfloat32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintm_f32_x2))) svfloat32x2_t svrintm(svfloat32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintm_f32_x4))) svfloat32x4_t svrintm(svfloat32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintn_f32_x2))) svfloat32x2_t svrintn(svfloat32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintn_f32_x4))) svfloat32x4_t svrintn(svfloat32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintp_f32_x2))) svfloat32x2_t svrintp(svfloat32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintp_f32_x4))) svfloat32x4_t svrintp(svfloat32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_s8_x2))) svint8x2_t svrshl(svint8x2_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_s32_x2))) svint32x2_t svrshl(svint32x2_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_s64_x2))) svint64x2_t svrshl(svint64x2_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_s16_x2))) svint16x2_t svrshl(svint16x2_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_u8_x2))) svuint8x2_t svrshl(svuint8x2_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_u32_x2))) svuint32x2_t svrshl(svuint32x2_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_u64_x2))) svuint64x2_t svrshl(svuint64x2_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_u16_x2))) svuint16x2_t svrshl(svuint16x2_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_s8_x4))) svint8x4_t svrshl(svint8x4_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_s32_x4))) svint32x4_t svrshl(svint32x4_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_s64_x4))) svint64x4_t svrshl(svint64x4_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_s16_x4))) svint16x4_t svrshl(svint16x4_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_u8_x4))) svuint8x4_t svrshl(svuint8x4_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_u32_x4))) svuint32x4_t svrshl(svuint32x4_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_u64_x4))) svuint64x4_t svrshl(svuint64x4_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_u16_x4))) svuint16x4_t svrshl(svuint16x4_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s8_x2))) svint8x2_t svrshl(svint8x2_t, svint8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s32_x2))) svint32x2_t svrshl(svint32x2_t, svint32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s64_x2))) svint64x2_t svrshl(svint64x2_t, svint64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s16_x2))) svint16x2_t svrshl(svint16x2_t, svint16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u8_x2))) svuint8x2_t svrshl(svuint8x2_t, svuint8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u32_x2))) svuint32x2_t svrshl(svuint32x2_t, svuint32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u64_x2))) svuint64x2_t svrshl(svuint64x2_t, svuint64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u16_x2))) svuint16x2_t svrshl(svuint16x2_t, svuint16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s8_x4))) svint8x4_t svrshl(svint8x4_t, svint8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s32_x4))) svint32x4_t svrshl(svint32x4_t, svint32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s64_x4))) svint64x4_t svrshl(svint64x4_t, svint64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s16_x4))) svint16x4_t svrshl(svint16x4_t, svint16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u8_x4))) svuint8x4_t svrshl(svuint8x4_t, svuint8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u32_x4))) svuint32x4_t svrshl(svuint32x4_t, svuint32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u64_x4))) svuint64x4_t svrshl(svuint64x4_t, svuint64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u16_x4))) svuint16x4_t svrshl(svuint16x4_t, svuint16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_u8_x2))) svuint8x2_t svsel(svcount_t, svuint8x2_t, svuint8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_u32_x2))) svuint32x2_t svsel(svcount_t, svuint32x2_t, svuint32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_u64_x2))) svuint64x2_t svsel(svcount_t, svuint64x2_t, svuint64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_u16_x2))) svuint16x2_t svsel(svcount_t, svuint16x2_t, svuint16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_bf16_x2))) svbfloat16x2_t svsel(svcount_t, svbfloat16x2_t, svbfloat16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s8_x2))) svint8x2_t svsel(svcount_t, svint8x2_t, svint8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_f64_x2))) svfloat64x2_t svsel(svcount_t, svfloat64x2_t, svfloat64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_f32_x2))) svfloat32x2_t svsel(svcount_t, svfloat32x2_t, svfloat32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_f16_x2))) svfloat16x2_t svsel(svcount_t, svfloat16x2_t, svfloat16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s32_x2))) svint32x2_t svsel(svcount_t, svint32x2_t, svint32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s64_x2))) svint64x2_t svsel(svcount_t, svint64x2_t, svint64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s16_x2))) svint16x2_t svsel(svcount_t, svint16x2_t, svint16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_u8_x4))) svuint8x4_t svsel(svcount_t, svuint8x4_t, svuint8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_u32_x4))) svuint32x4_t svsel(svcount_t, svuint32x4_t, svuint32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_u64_x4))) svuint64x4_t svsel(svcount_t, svuint64x4_t, svuint64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_u16_x4))) svuint16x4_t svsel(svcount_t, svuint16x4_t, svuint16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_bf16_x4))) svbfloat16x4_t svsel(svcount_t, svbfloat16x4_t, svbfloat16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s8_x4))) svint8x4_t svsel(svcount_t, svint8x4_t, svint8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_f64_x4))) svfloat64x4_t svsel(svcount_t, svfloat64x4_t, svfloat64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_f32_x4))) svfloat32x4_t svsel(svcount_t, svfloat32x4_t, svfloat32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_f16_x4))) svfloat16x4_t svsel(svcount_t, svfloat16x4_t, svfloat16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s32_x4))) svint32x4_t svsel(svcount_t, svint32x4_t, svint32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s64_x4))) svint64x4_t svsel(svcount_t, svint64x4_t, svint64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s16_x4))) svint16x4_t svsel(svcount_t, svint16x4_t, svint16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpk_s32_s16_x2))) svint32x2_t svunpk_s32(svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpk_s64_s32_x2))) svint64x2_t svunpk_s64(svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpk_s16_s8_x2))) svint16x2_t svunpk_s16(svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpk_u32_u16_x2))) svuint32x2_t svunpk_u32(svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpk_u64_u32_x2))) svuint64x2_t svunpk_u64(svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpk_u16_u8_x2))) svuint16x2_t svunpk_u16(svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpk_s32_s16_x4))) svint32x4_t svunpk_s32(svint16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpk_s64_s32_x4))) svint64x4_t svunpk_s64(svint32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpk_s16_s8_x4))) svint16x4_t svunpk_s16(svint8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpk_u32_u16_x4))) svuint32x4_t svunpk_u32(svuint16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpk_u64_u32_x4))) svuint64x4_t svunpk_u64(svuint32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpk_u16_u8_x4))) svuint16x4_t svunpk_u16(svuint8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_u8_x2))) svuint8x2_t svuzp(svuint8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_u32_x2))) svuint32x2_t svuzp(svuint32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_u64_x2))) svuint64x2_t svuzp(svuint64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_u16_x2))) svuint16x2_t svuzp(svuint16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_bf16_x2))) svbfloat16x2_t svuzp(svbfloat16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_s8_x2))) svint8x2_t svuzp(svint8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_f64_x2))) svfloat64x2_t svuzp(svfloat64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_f32_x2))) svfloat32x2_t svuzp(svfloat32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_f16_x2))) svfloat16x2_t svuzp(svfloat16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_s32_x2))) svint32x2_t svuzp(svint32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_s64_x2))) svint64x2_t svuzp(svint64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_s16_x2))) svint16x2_t svuzp(svint16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_u8_x4))) svuint8x4_t svuzp(svuint8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_u32_x4))) svuint32x4_t svuzp(svuint32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_u64_x4))) svuint64x4_t svuzp(svuint64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_u16_x4))) svuint16x4_t svuzp(svuint16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_bf16_x4))) svbfloat16x4_t svuzp(svbfloat16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_s8_x4))) svint8x4_t svuzp(svint8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_f64_x4))) svfloat64x4_t svuzp(svfloat64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_f32_x4))) svfloat32x4_t svuzp(svfloat32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_f16_x4))) svfloat16x4_t svuzp(svfloat16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_s32_x4))) svint32x4_t svuzp(svint32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_s64_x4))) svint64x4_t svuzp(svint64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_s16_x4))) svint16x4_t svuzp(svint16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_u8_x2))) svuint8x2_t svuzpq(svuint8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_u32_x2))) svuint32x2_t svuzpq(svuint32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_u64_x2))) svuint64x2_t svuzpq(svuint64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_u16_x2))) svuint16x2_t svuzpq(svuint16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_bf16_x2))) svbfloat16x2_t svuzpq(svbfloat16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_s8_x2))) svint8x2_t svuzpq(svint8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_f64_x2))) svfloat64x2_t svuzpq(svfloat64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_f32_x2))) svfloat32x2_t svuzpq(svfloat32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_f16_x2))) svfloat16x2_t svuzpq(svfloat16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_s32_x2))) svint32x2_t svuzpq(svint32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_s64_x2))) svint64x2_t svuzpq(svint64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_s16_x2))) svint16x2_t svuzpq(svint16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_u8_x4))) svuint8x4_t svuzpq(svuint8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_u32_x4))) svuint32x4_t svuzpq(svuint32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_u64_x4))) svuint64x4_t svuzpq(svuint64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_u16_x4))) svuint16x4_t svuzpq(svuint16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_bf16_x4))) svbfloat16x4_t svuzpq(svbfloat16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_s8_x4))) svint8x4_t svuzpq(svint8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_f64_x4))) svfloat64x4_t svuzpq(svfloat64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_f32_x4))) svfloat32x4_t svuzpq(svfloat32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_f16_x4))) svfloat16x4_t svuzpq(svfloat16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_s32_x4))) svint32x4_t svuzpq(svint32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_s64_x4))) svint64x4_t svuzpq(svint64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_s16_x4))) svint16x4_t svuzpq(svint16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_u8_x2))) svuint8x2_t svzip(svuint8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_u32_x2))) svuint32x2_t svzip(svuint32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_u64_x2))) svuint64x2_t svzip(svuint64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_u16_x2))) svuint16x2_t svzip(svuint16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_bf16_x2))) svbfloat16x2_t svzip(svbfloat16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_s8_x2))) svint8x2_t svzip(svint8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_f64_x2))) svfloat64x2_t svzip(svfloat64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_f32_x2))) svfloat32x2_t svzip(svfloat32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_f16_x2))) svfloat16x2_t svzip(svfloat16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_s32_x2))) svint32x2_t svzip(svint32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_s64_x2))) svint64x2_t svzip(svint64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_s16_x2))) svint16x2_t svzip(svint16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_u8_x4))) svuint8x4_t svzip(svuint8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_u32_x4))) svuint32x4_t svzip(svuint32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_u64_x4))) svuint64x4_t svzip(svuint64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_u16_x4))) svuint16x4_t svzip(svuint16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_bf16_x4))) svbfloat16x4_t svzip(svbfloat16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_s8_x4))) svint8x4_t svzip(svint8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_f64_x4))) svfloat64x4_t svzip(svfloat64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_f32_x4))) svfloat32x4_t svzip(svfloat32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_f16_x4))) svfloat16x4_t svzip(svfloat16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_s32_x4))) svint32x4_t svzip(svint32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_s64_x4))) svint64x4_t svzip(svint64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_s16_x4))) svint16x4_t svzip(svint16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_u8_x2))) svuint8x2_t svzipq(svuint8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_u32_x2))) svuint32x2_t svzipq(svuint32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_u64_x2))) svuint64x2_t svzipq(svuint64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_u16_x2))) svuint16x2_t svzipq(svuint16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_bf16_x2))) svbfloat16x2_t svzipq(svbfloat16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_s8_x2))) svint8x2_t svzipq(svint8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_f64_x2))) svfloat64x2_t svzipq(svfloat64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_f32_x2))) svfloat32x2_t svzipq(svfloat32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_f16_x2))) svfloat16x2_t svzipq(svfloat16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_s32_x2))) svint32x2_t svzipq(svint32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_s64_x2))) svint64x2_t svzipq(svint64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_s16_x2))) svint16x2_t svzipq(svint16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_u8_x4))) svuint8x4_t svzipq(svuint8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_u32_x4))) svuint32x4_t svzipq(svuint32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_u64_x4))) svuint64x4_t svzipq(svuint64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_u16_x4))) svuint16x4_t svzipq(svuint16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_bf16_x4))) svbfloat16x4_t svzipq(svbfloat16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_s8_x4))) svint8x4_t svzipq(svint8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_f64_x4))) svfloat64x4_t svzipq(svfloat64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_f32_x4))) svfloat32x4_t svzipq(svfloat32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_f16_x4))) svfloat16x4_t svzipq(svfloat16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_s32_x4))) svint32x4_t svzipq(svint32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_s64_x4))) svint64x4_t svzipq(svint64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_s16_x4))) svint16x4_t svzipq(svint16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvtn_s16_s32_x2))) svint16_t svqcvtn_s16_s32_x2(svint32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvtn_u16_s32_x2))) svuint16_t svqcvtn_u16_s32_x2(svint32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvtn_u16_u32_x2))) svuint16_t svqcvtn_u16_u32_x2(svuint32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvtn_s16_s32_x2))) svint16_t svqcvtn_s16(svint32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvtn_u16_s32_x2))) svuint16_t svqcvtn_u16(svint32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvtn_u16_u32_x2))) svuint16_t svqcvtn_u16(svuint32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_f64_m))) svfloat64_t svabd_n_f64_m(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_f32_m))) svfloat32_t svabd_n_f32_m(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_f16_m))) svfloat16_t svabd_n_f16_m(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_f64_x))) svfloat64_t svabd_n_f64_x(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_f32_x))) svfloat32_t svabd_n_f32_x(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_f16_x))) svfloat16_t svabd_n_f16_x(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_f64_z))) svfloat64_t svabd_n_f64_z(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_f32_z))) svfloat32_t svabd_n_f32_z(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_f16_z))) svfloat16_t svabd_n_f16_z(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_s8_m))) svint8_t svabd_n_s8_m(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_s32_m))) svint32_t svabd_n_s32_m(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_s64_m))) svint64_t svabd_n_s64_m(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_s16_m))) svint16_t svabd_n_s16_m(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_s8_x))) svint8_t svabd_n_s8_x(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_s32_x))) svint32_t svabd_n_s32_x(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_s64_x))) svint64_t svabd_n_s64_x(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_s16_x))) svint16_t svabd_n_s16_x(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_s8_z))) svint8_t svabd_n_s8_z(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_s32_z))) svint32_t svabd_n_s32_z(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_s64_z))) svint64_t svabd_n_s64_z(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_s16_z))) svint16_t svabd_n_s16_z(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_u8_m))) svuint8_t svabd_n_u8_m(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_u32_m))) svuint32_t svabd_n_u32_m(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_u64_m))) svuint64_t svabd_n_u64_m(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_u16_m))) svuint16_t svabd_n_u16_m(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_u8_x))) svuint8_t svabd_n_u8_x(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_u32_x))) svuint32_t svabd_n_u32_x(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_u64_x))) svuint64_t svabd_n_u64_x(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_u16_x))) svuint16_t svabd_n_u16_x(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_u8_z))) svuint8_t svabd_n_u8_z(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_u32_z))) svuint32_t svabd_n_u32_z(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_u64_z))) svuint64_t svabd_n_u64_z(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_u16_z))) svuint16_t svabd_n_u16_z(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_f64_m))) svfloat64_t svabd_f64_m(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_f32_m))) svfloat32_t svabd_f32_m(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_f16_m))) svfloat16_t svabd_f16_m(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_f64_x))) svfloat64_t svabd_f64_x(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_f32_x))) svfloat32_t svabd_f32_x(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_f16_x))) svfloat16_t svabd_f16_x(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_f64_z))) svfloat64_t svabd_f64_z(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_f32_z))) svfloat32_t svabd_f32_z(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_f16_z))) svfloat16_t svabd_f16_z(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_s8_m))) svint8_t svabd_s8_m(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_s32_m))) svint32_t svabd_s32_m(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_s64_m))) svint64_t svabd_s64_m(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_s16_m))) svint16_t svabd_s16_m(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_s8_x))) svint8_t svabd_s8_x(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_s32_x))) svint32_t svabd_s32_x(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_s64_x))) svint64_t svabd_s64_x(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_s16_x))) svint16_t svabd_s16_x(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_s8_z))) svint8_t svabd_s8_z(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_s32_z))) svint32_t svabd_s32_z(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_s64_z))) svint64_t svabd_s64_z(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_s16_z))) svint16_t svabd_s16_z(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_u8_m))) svuint8_t svabd_u8_m(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_u32_m))) svuint32_t svabd_u32_m(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_u64_m))) svuint64_t svabd_u64_m(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_u16_m))) svuint16_t svabd_u16_m(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_u8_x))) svuint8_t svabd_u8_x(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_u32_x))) svuint32_t svabd_u32_x(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_u64_x))) svuint64_t svabd_u64_x(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_u16_x))) svuint16_t svabd_u16_x(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_u8_z))) svuint8_t svabd_u8_z(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_u32_z))) svuint32_t svabd_u32_z(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_u64_z))) svuint64_t svabd_u64_z(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_u16_z))) svuint16_t svabd_u16_z(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_f64_m))) svfloat64_t svabs_f64_m(svfloat64_t, svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_f32_m))) svfloat32_t svabs_f32_m(svfloat32_t, svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_f16_m))) svfloat16_t svabs_f16_m(svfloat16_t, svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_f64_x))) svfloat64_t svabs_f64_x(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_f32_x))) svfloat32_t svabs_f32_x(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_f16_x))) svfloat16_t svabs_f16_x(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_f64_z))) svfloat64_t svabs_f64_z(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_f32_z))) svfloat32_t svabs_f32_z(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_f16_z))) svfloat16_t svabs_f16_z(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_s8_m))) svint8_t svabs_s8_m(svint8_t, svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_s32_m))) svint32_t svabs_s32_m(svint32_t, svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_s64_m))) svint64_t svabs_s64_m(svint64_t, svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_s16_m))) svint16_t svabs_s16_m(svint16_t, svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_s8_x))) svint8_t svabs_s8_x(svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_s32_x))) svint32_t svabs_s32_x(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_s64_x))) svint64_t svabs_s64_x(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_s16_x))) svint16_t svabs_s16_x(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_s8_z))) svint8_t svabs_s8_z(svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_s32_z))) svint32_t svabs_s32_z(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_s64_z))) svint64_t svabs_s64_z(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_s16_z))) svint16_t svabs_s16_z(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svacge_n_f64))) svbool_t svacge_n_f64(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svacge_n_f32))) svbool_t svacge_n_f32(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svacge_n_f16))) svbool_t svacge_n_f16(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svacge_f64))) svbool_t svacge_f64(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svacge_f32))) svbool_t svacge_f32(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svacge_f16))) svbool_t svacge_f16(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svacgt_n_f64))) svbool_t svacgt_n_f64(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svacgt_n_f32))) svbool_t svacgt_n_f32(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svacgt_n_f16))) svbool_t svacgt_n_f16(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svacgt_f64))) svbool_t svacgt_f64(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svacgt_f32))) svbool_t svacgt_f32(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svacgt_f16))) svbool_t svacgt_f16(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svacle_n_f64))) svbool_t svacle_n_f64(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svacle_n_f32))) svbool_t svacle_n_f32(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svacle_n_f16))) svbool_t svacle_n_f16(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svacle_f64))) svbool_t svacle_f64(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svacle_f32))) svbool_t svacle_f32(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svacle_f16))) svbool_t svacle_f16(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaclt_n_f64))) svbool_t svaclt_n_f64(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaclt_n_f32))) svbool_t svaclt_n_f32(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaclt_n_f16))) svbool_t svaclt_n_f16(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaclt_f64))) svbool_t svaclt_f64(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaclt_f32))) svbool_t svaclt_f32(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaclt_f16))) svbool_t svaclt_f16(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_f64_m))) svfloat64_t svadd_n_f64_m(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_f32_m))) svfloat32_t svadd_n_f32_m(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_f16_m))) svfloat16_t svadd_n_f16_m(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_f64_x))) svfloat64_t svadd_n_f64_x(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_f32_x))) svfloat32_t svadd_n_f32_x(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_f16_x))) svfloat16_t svadd_n_f16_x(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_f64_z))) svfloat64_t svadd_n_f64_z(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_f32_z))) svfloat32_t svadd_n_f32_z(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_f16_z))) svfloat16_t svadd_n_f16_z(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_u8_m))) svuint8_t svadd_n_u8_m(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_u32_m))) svuint32_t svadd_n_u32_m(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_u64_m))) svuint64_t svadd_n_u64_m(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_u16_m))) svuint16_t svadd_n_u16_m(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_s8_m))) svint8_t svadd_n_s8_m(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_s32_m))) svint32_t svadd_n_s32_m(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_s64_m))) svint64_t svadd_n_s64_m(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_s16_m))) svint16_t svadd_n_s16_m(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_u8_x))) svuint8_t svadd_n_u8_x(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_u32_x))) svuint32_t svadd_n_u32_x(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_u64_x))) svuint64_t svadd_n_u64_x(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_u16_x))) svuint16_t svadd_n_u16_x(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_s8_x))) svint8_t svadd_n_s8_x(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_s32_x))) svint32_t svadd_n_s32_x(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_s64_x))) svint64_t svadd_n_s64_x(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_s16_x))) svint16_t svadd_n_s16_x(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_u8_z))) svuint8_t svadd_n_u8_z(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_u32_z))) svuint32_t svadd_n_u32_z(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_u64_z))) svuint64_t svadd_n_u64_z(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_u16_z))) svuint16_t svadd_n_u16_z(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_s8_z))) svint8_t svadd_n_s8_z(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_s32_z))) svint32_t svadd_n_s32_z(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_s64_z))) svint64_t svadd_n_s64_z(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_s16_z))) svint16_t svadd_n_s16_z(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_f64_m))) svfloat64_t svadd_f64_m(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_f32_m))) svfloat32_t svadd_f32_m(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_f16_m))) svfloat16_t svadd_f16_m(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_f64_x))) svfloat64_t svadd_f64_x(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_f32_x))) svfloat32_t svadd_f32_x(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_f16_x))) svfloat16_t svadd_f16_x(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_f64_z))) svfloat64_t svadd_f64_z(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_f32_z))) svfloat32_t svadd_f32_z(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_f16_z))) svfloat16_t svadd_f16_z(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_u8_m))) svuint8_t svadd_u8_m(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_u32_m))) svuint32_t svadd_u32_m(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_u64_m))) svuint64_t svadd_u64_m(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_u16_m))) svuint16_t svadd_u16_m(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_s8_m))) svint8_t svadd_s8_m(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_s32_m))) svint32_t svadd_s32_m(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_s64_m))) svint64_t svadd_s64_m(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_s16_m))) svint16_t svadd_s16_m(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_u8_x))) svuint8_t svadd_u8_x(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_u32_x))) svuint32_t svadd_u32_x(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_u64_x))) svuint64_t svadd_u64_x(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_u16_x))) svuint16_t svadd_u16_x(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_s8_x))) svint8_t svadd_s8_x(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_s32_x))) svint32_t svadd_s32_x(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_s64_x))) svint64_t svadd_s64_x(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_s16_x))) svint16_t svadd_s16_x(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_u8_z))) svuint8_t svadd_u8_z(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_u32_z))) svuint32_t svadd_u32_z(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_u64_z))) svuint64_t svadd_u64_z(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_u16_z))) svuint16_t svadd_u16_z(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_s8_z))) svint8_t svadd_s8_z(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_s32_z))) svint32_t svadd_s32_z(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_s64_z))) svint64_t svadd_s64_z(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_s16_z))) svint16_t svadd_s16_z(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadda_f64))) float64_t svadda_f64(svbool_t, float64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadda_f32))) float32_t svadda_f32(svbool_t, float32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadda_f16))) float16_t svadda_f16(svbool_t, float16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddv_s8))) int64_t svaddv_s8(svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddv_s32))) int64_t svaddv_s32(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddv_s64))) int64_t svaddv_s64(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddv_s16))) int64_t svaddv_s16(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddv_u8))) uint64_t svaddv_u8(svbool_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddv_u32))) uint64_t svaddv_u32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddv_u64))) uint64_t svaddv_u64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddv_u16))) uint64_t svaddv_u16(svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddv_f64))) float64_t svaddv_f64(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddv_f32))) float32_t svaddv_f32(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddv_f16))) float16_t svaddv_f16(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrb_u32base_u32offset))) svuint32_t svadrb_u32base_u32offset(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrb_u64base_u64offset))) svuint64_t svadrb_u64base_u64offset(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrb_u32base_s32offset))) svuint32_t svadrb_u32base_s32offset(svuint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrb_u64base_s64offset))) svuint64_t svadrb_u64base_s64offset(svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrd_u32base_u32index))) svuint32_t svadrd_u32base_u32index(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrd_u64base_u64index))) svuint64_t svadrd_u64base_u64index(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrd_u32base_s32index))) svuint32_t svadrd_u32base_s32index(svuint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrd_u64base_s64index))) svuint64_t svadrd_u64base_s64index(svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrh_u32base_u32index))) svuint32_t svadrh_u32base_u32index(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrh_u64base_u64index))) svuint64_t svadrh_u64base_u64index(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrh_u32base_s32index))) svuint32_t svadrh_u32base_s32index(svuint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrh_u64base_s64index))) svuint64_t svadrh_u64base_s64index(svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrw_u32base_u32index))) svuint32_t svadrw_u32base_u32index(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrw_u64base_u64index))) svuint64_t svadrw_u64base_u64index(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrw_u32base_s32index))) svuint32_t svadrw_u32base_s32index(svuint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrw_u64base_s64index))) svuint64_t svadrw_u64base_s64index(svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_b_z))) svbool_t svand_b_z(svbool_t, svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_u8_m))) svuint8_t svand_n_u8_m(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_u32_m))) svuint32_t svand_n_u32_m(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_u64_m))) svuint64_t svand_n_u64_m(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_u16_m))) svuint16_t svand_n_u16_m(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_s8_m))) svint8_t svand_n_s8_m(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_s32_m))) svint32_t svand_n_s32_m(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_s64_m))) svint64_t svand_n_s64_m(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_s16_m))) svint16_t svand_n_s16_m(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_u8_x))) svuint8_t svand_n_u8_x(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_u32_x))) svuint32_t svand_n_u32_x(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_u64_x))) svuint64_t svand_n_u64_x(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_u16_x))) svuint16_t svand_n_u16_x(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_s8_x))) svint8_t svand_n_s8_x(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_s32_x))) svint32_t svand_n_s32_x(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_s64_x))) svint64_t svand_n_s64_x(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_s16_x))) svint16_t svand_n_s16_x(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_u8_z))) svuint8_t svand_n_u8_z(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_u32_z))) svuint32_t svand_n_u32_z(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_u64_z))) svuint64_t svand_n_u64_z(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_u16_z))) svuint16_t svand_n_u16_z(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_s8_z))) svint8_t svand_n_s8_z(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_s32_z))) svint32_t svand_n_s32_z(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_s64_z))) svint64_t svand_n_s64_z(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_s16_z))) svint16_t svand_n_s16_z(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_u8_m))) svuint8_t svand_u8_m(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_u32_m))) svuint32_t svand_u32_m(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_u64_m))) svuint64_t svand_u64_m(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_u16_m))) svuint16_t svand_u16_m(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_s8_m))) svint8_t svand_s8_m(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_s32_m))) svint32_t svand_s32_m(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_s64_m))) svint64_t svand_s64_m(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_s16_m))) svint16_t svand_s16_m(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_u8_x))) svuint8_t svand_u8_x(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_u32_x))) svuint32_t svand_u32_x(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_u64_x))) svuint64_t svand_u64_x(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_u16_x))) svuint16_t svand_u16_x(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_s8_x))) svint8_t svand_s8_x(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_s32_x))) svint32_t svand_s32_x(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_s64_x))) svint64_t svand_s64_x(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_s16_x))) svint16_t svand_s16_x(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_u8_z))) svuint8_t svand_u8_z(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_u32_z))) svuint32_t svand_u32_z(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_u64_z))) svuint64_t svand_u64_z(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_u16_z))) svuint16_t svand_u16_z(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_s8_z))) svint8_t svand_s8_z(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_s32_z))) svint32_t svand_s32_z(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_s64_z))) svint64_t svand_s64_z(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_s16_z))) svint16_t svand_s16_z(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandv_u8))) uint8_t svandv_u8(svbool_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandv_u32))) uint32_t svandv_u32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandv_u64))) uint64_t svandv_u64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandv_u16))) uint16_t svandv_u16(svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandv_s8))) int8_t svandv_s8(svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandv_s32))) int32_t svandv_s32(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandv_s64))) int64_t svandv_s64(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandv_s16))) int16_t svandv_s16(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_n_s8_m))) svint8_t svasr_n_s8_m(svbool_t, svint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_n_s32_m))) svint32_t svasr_n_s32_m(svbool_t, svint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_n_s64_m))) svint64_t svasr_n_s64_m(svbool_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_n_s16_m))) svint16_t svasr_n_s16_m(svbool_t, svint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_n_s8_x))) svint8_t svasr_n_s8_x(svbool_t, svint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_n_s32_x))) svint32_t svasr_n_s32_x(svbool_t, svint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_n_s64_x))) svint64_t svasr_n_s64_x(svbool_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_n_s16_x))) svint16_t svasr_n_s16_x(svbool_t, svint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_n_s8_z))) svint8_t svasr_n_s8_z(svbool_t, svint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_n_s32_z))) svint32_t svasr_n_s32_z(svbool_t, svint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_n_s64_z))) svint64_t svasr_n_s64_z(svbool_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_n_s16_z))) svint16_t svasr_n_s16_z(svbool_t, svint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_s8_m))) svint8_t svasr_s8_m(svbool_t, svint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_s32_m))) svint32_t svasr_s32_m(svbool_t, svint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_s64_m))) svint64_t svasr_s64_m(svbool_t, svint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_s16_m))) svint16_t svasr_s16_m(svbool_t, svint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_s8_x))) svint8_t svasr_s8_x(svbool_t, svint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_s32_x))) svint32_t svasr_s32_x(svbool_t, svint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_s64_x))) svint64_t svasr_s64_x(svbool_t, svint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_s16_x))) svint16_t svasr_s16_x(svbool_t, svint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_s8_z))) svint8_t svasr_s8_z(svbool_t, svint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_s32_z))) svint32_t svasr_s32_z(svbool_t, svint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_s64_z))) svint64_t svasr_s64_z(svbool_t, svint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_s16_z))) svint16_t svasr_s16_z(svbool_t, svint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_wide_n_s8_m))) svint8_t svasr_wide_n_s8_m(svbool_t, svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_wide_n_s32_m))) svint32_t svasr_wide_n_s32_m(svbool_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_wide_n_s16_m))) svint16_t svasr_wide_n_s16_m(svbool_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_wide_n_s8_x))) svint8_t svasr_wide_n_s8_x(svbool_t, svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_wide_n_s32_x))) svint32_t svasr_wide_n_s32_x(svbool_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_wide_n_s16_x))) svint16_t svasr_wide_n_s16_x(svbool_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_wide_n_s8_z))) svint8_t svasr_wide_n_s8_z(svbool_t, svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_wide_n_s32_z))) svint32_t svasr_wide_n_s32_z(svbool_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_wide_n_s16_z))) svint16_t svasr_wide_n_s16_z(svbool_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_wide_s8_m))) svint8_t svasr_wide_s8_m(svbool_t, svint8_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_wide_s32_m))) svint32_t svasr_wide_s32_m(svbool_t, svint32_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_wide_s16_m))) svint16_t svasr_wide_s16_m(svbool_t, svint16_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_wide_s8_x))) svint8_t svasr_wide_s8_x(svbool_t, svint8_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_wide_s32_x))) svint32_t svasr_wide_s32_x(svbool_t, svint32_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_wide_s16_x))) svint16_t svasr_wide_s16_x(svbool_t, svint16_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_wide_s8_z))) svint8_t svasr_wide_s8_z(svbool_t, svint8_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_wide_s32_z))) svint32_t svasr_wide_s32_z(svbool_t, svint32_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_wide_s16_z))) svint16_t svasr_wide_s16_z(svbool_t, svint16_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasrd_n_s8_m))) svint8_t svasrd_n_s8_m(svbool_t, svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasrd_n_s32_m))) svint32_t svasrd_n_s32_m(svbool_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasrd_n_s64_m))) svint64_t svasrd_n_s64_m(svbool_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasrd_n_s16_m))) svint16_t svasrd_n_s16_m(svbool_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasrd_n_s8_x))) svint8_t svasrd_n_s8_x(svbool_t, svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasrd_n_s32_x))) svint32_t svasrd_n_s32_x(svbool_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasrd_n_s64_x))) svint64_t svasrd_n_s64_x(svbool_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasrd_n_s16_x))) svint16_t svasrd_n_s16_x(svbool_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasrd_n_s8_z))) svint8_t svasrd_n_s8_z(svbool_t, svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasrd_n_s32_z))) svint32_t svasrd_n_s32_z(svbool_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasrd_n_s64_z))) svint64_t svasrd_n_s64_z(svbool_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasrd_n_s16_z))) svint16_t svasrd_n_s16_z(svbool_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_b_z))) svbool_t svbic_b_z(svbool_t, svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_u8_m))) svuint8_t svbic_n_u8_m(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_u32_m))) svuint32_t svbic_n_u32_m(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_u64_m))) svuint64_t svbic_n_u64_m(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_u16_m))) svuint16_t svbic_n_u16_m(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_s8_m))) svint8_t svbic_n_s8_m(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_s32_m))) svint32_t svbic_n_s32_m(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_s64_m))) svint64_t svbic_n_s64_m(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_s16_m))) svint16_t svbic_n_s16_m(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_u8_x))) svuint8_t svbic_n_u8_x(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_u32_x))) svuint32_t svbic_n_u32_x(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_u64_x))) svuint64_t svbic_n_u64_x(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_u16_x))) svuint16_t svbic_n_u16_x(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_s8_x))) svint8_t svbic_n_s8_x(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_s32_x))) svint32_t svbic_n_s32_x(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_s64_x))) svint64_t svbic_n_s64_x(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_s16_x))) svint16_t svbic_n_s16_x(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_u8_z))) svuint8_t svbic_n_u8_z(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_u32_z))) svuint32_t svbic_n_u32_z(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_u64_z))) svuint64_t svbic_n_u64_z(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_u16_z))) svuint16_t svbic_n_u16_z(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_s8_z))) svint8_t svbic_n_s8_z(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_s32_z))) svint32_t svbic_n_s32_z(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_s64_z))) svint64_t svbic_n_s64_z(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_s16_z))) svint16_t svbic_n_s16_z(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_u8_m))) svuint8_t svbic_u8_m(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_u32_m))) svuint32_t svbic_u32_m(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_u64_m))) svuint64_t svbic_u64_m(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_u16_m))) svuint16_t svbic_u16_m(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_s8_m))) svint8_t svbic_s8_m(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_s32_m))) svint32_t svbic_s32_m(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_s64_m))) svint64_t svbic_s64_m(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_s16_m))) svint16_t svbic_s16_m(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_u8_x))) svuint8_t svbic_u8_x(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_u32_x))) svuint32_t svbic_u32_x(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_u64_x))) svuint64_t svbic_u64_x(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_u16_x))) svuint16_t svbic_u16_x(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_s8_x))) svint8_t svbic_s8_x(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_s32_x))) svint32_t svbic_s32_x(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_s64_x))) svint64_t svbic_s64_x(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_s16_x))) svint16_t svbic_s16_x(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_u8_z))) svuint8_t svbic_u8_z(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_u32_z))) svuint32_t svbic_u32_z(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_u64_z))) svuint64_t svbic_u64_z(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_u16_z))) svuint16_t svbic_u16_z(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_s8_z))) svint8_t svbic_s8_z(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_s32_z))) svint32_t svbic_s32_z(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_s64_z))) svint64_t svbic_s64_z(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_s16_z))) svint16_t svbic_s16_z(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbrka_b_m))) svbool_t svbrka_b_m(svbool_t, svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbrka_b_z))) svbool_t svbrka_b_z(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbrkb_b_m))) svbool_t svbrkb_b_m(svbool_t, svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbrkb_b_z))) svbool_t svbrkb_b_z(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbrkn_b_z))) svbool_t svbrkn_b_z(svbool_t, svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbrkpa_b_z))) svbool_t svbrkpa_b_z(svbool_t, svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbrkpb_b_z))) svbool_t svbrkpb_b_z(svbool_t, svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcadd_f64_m))) svfloat64_t svcadd_f64_m(svbool_t, svfloat64_t, svfloat64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcadd_f32_m))) svfloat32_t svcadd_f32_m(svbool_t, svfloat32_t, svfloat32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcadd_f16_m))) svfloat16_t svcadd_f16_m(svbool_t, svfloat16_t, svfloat16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcadd_f64_x))) svfloat64_t svcadd_f64_x(svbool_t, svfloat64_t, svfloat64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcadd_f32_x))) svfloat32_t svcadd_f32_x(svbool_t, svfloat32_t, svfloat32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcadd_f16_x))) svfloat16_t svcadd_f16_x(svbool_t, svfloat16_t, svfloat16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcadd_f64_z))) svfloat64_t svcadd_f64_z(svbool_t, svfloat64_t, svfloat64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcadd_f32_z))) svfloat32_t svcadd_f32_z(svbool_t, svfloat32_t, svfloat32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcadd_f16_z))) svfloat16_t svcadd_f16_z(svbool_t, svfloat16_t, svfloat16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_n_u8))) uint8_t svclasta_n_u8(svbool_t, uint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_n_u32))) uint32_t svclasta_n_u32(svbool_t, uint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_n_u64))) uint64_t svclasta_n_u64(svbool_t, uint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_n_u16))) uint16_t svclasta_n_u16(svbool_t, uint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_n_s8))) int8_t svclasta_n_s8(svbool_t, int8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_n_f64))) float64_t svclasta_n_f64(svbool_t, float64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_n_f32))) float32_t svclasta_n_f32(svbool_t, float32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_n_f16))) float16_t svclasta_n_f16(svbool_t, float16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_n_s32))) int32_t svclasta_n_s32(svbool_t, int32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_n_s64))) int64_t svclasta_n_s64(svbool_t, int64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_n_s16))) int16_t svclasta_n_s16(svbool_t, int16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_u8))) svuint8_t svclasta_u8(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_u32))) svuint32_t svclasta_u32(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_u64))) svuint64_t svclasta_u64(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_u16))) svuint16_t svclasta_u16(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_s8))) svint8_t svclasta_s8(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_f64))) svfloat64_t svclasta_f64(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_f32))) svfloat32_t svclasta_f32(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_f16))) svfloat16_t svclasta_f16(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_s32))) svint32_t svclasta_s32(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_s64))) svint64_t svclasta_s64(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_s16))) svint16_t svclasta_s16(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_n_u8))) uint8_t svclastb_n_u8(svbool_t, uint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_n_u32))) uint32_t svclastb_n_u32(svbool_t, uint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_n_u64))) uint64_t svclastb_n_u64(svbool_t, uint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_n_u16))) uint16_t svclastb_n_u16(svbool_t, uint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_n_s8))) int8_t svclastb_n_s8(svbool_t, int8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_n_f64))) float64_t svclastb_n_f64(svbool_t, float64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_n_f32))) float32_t svclastb_n_f32(svbool_t, float32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_n_f16))) float16_t svclastb_n_f16(svbool_t, float16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_n_s32))) int32_t svclastb_n_s32(svbool_t, int32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_n_s64))) int64_t svclastb_n_s64(svbool_t, int64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_n_s16))) int16_t svclastb_n_s16(svbool_t, int16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_u8))) svuint8_t svclastb_u8(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_u32))) svuint32_t svclastb_u32(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_u64))) svuint64_t svclastb_u64(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_u16))) svuint16_t svclastb_u16(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_s8))) svint8_t svclastb_s8(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_f64))) svfloat64_t svclastb_f64(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_f32))) svfloat32_t svclastb_f32(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_f16))) svfloat16_t svclastb_f16(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_s32))) svint32_t svclastb_s32(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_s64))) svint64_t svclastb_s64(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_s16))) svint16_t svclastb_s16(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcls_s8_m))) svuint8_t svcls_s8_m(svuint8_t, svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcls_s32_m))) svuint32_t svcls_s32_m(svuint32_t, svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcls_s64_m))) svuint64_t svcls_s64_m(svuint64_t, svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcls_s16_m))) svuint16_t svcls_s16_m(svuint16_t, svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcls_s8_x))) svuint8_t svcls_s8_x(svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcls_s32_x))) svuint32_t svcls_s32_x(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcls_s64_x))) svuint64_t svcls_s64_x(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcls_s16_x))) svuint16_t svcls_s16_x(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcls_s8_z))) svuint8_t svcls_s8_z(svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcls_s32_z))) svuint32_t svcls_s32_z(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcls_s64_z))) svuint64_t svcls_s64_z(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcls_s16_z))) svuint16_t svcls_s16_z(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_u8_m))) svuint8_t svclz_u8_m(svuint8_t, svbool_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_u32_m))) svuint32_t svclz_u32_m(svuint32_t, svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_u64_m))) svuint64_t svclz_u64_m(svuint64_t, svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_u16_m))) svuint16_t svclz_u16_m(svuint16_t, svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_s8_m))) svuint8_t svclz_s8_m(svuint8_t, svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_s32_m))) svuint32_t svclz_s32_m(svuint32_t, svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_s64_m))) svuint64_t svclz_s64_m(svuint64_t, svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_s16_m))) svuint16_t svclz_s16_m(svuint16_t, svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_u8_x))) svuint8_t svclz_u8_x(svbool_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_u32_x))) svuint32_t svclz_u32_x(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_u64_x))) svuint64_t svclz_u64_x(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_u16_x))) svuint16_t svclz_u16_x(svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_s8_x))) svuint8_t svclz_s8_x(svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_s32_x))) svuint32_t svclz_s32_x(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_s64_x))) svuint64_t svclz_s64_x(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_s16_x))) svuint16_t svclz_s16_x(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_u8_z))) svuint8_t svclz_u8_z(svbool_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_u32_z))) svuint32_t svclz_u32_z(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_u64_z))) svuint64_t svclz_u64_z(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_u16_z))) svuint16_t svclz_u16_z(svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_s8_z))) svuint8_t svclz_s8_z(svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_s32_z))) svuint32_t svclz_s32_z(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_s64_z))) svuint64_t svclz_s64_z(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_s16_z))) svuint16_t svclz_s16_z(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_f64_m))) svfloat64_t svcmla_f64_m(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_f32_m))) svfloat32_t svcmla_f32_m(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_f16_m))) svfloat16_t svcmla_f16_m(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_f64_x))) svfloat64_t svcmla_f64_x(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_f32_x))) svfloat32_t svcmla_f32_x(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_f16_x))) svfloat16_t svcmla_f16_x(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_f64_z))) svfloat64_t svcmla_f64_z(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_f32_z))) svfloat32_t svcmla_f32_z(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_f16_z))) svfloat16_t svcmla_f16_z(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_lane_f32))) svfloat32_t svcmla_lane_f32(svfloat32_t, svfloat32_t, svfloat32_t, uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_lane_f16))) svfloat16_t svcmla_lane_f16(svfloat16_t, svfloat16_t, svfloat16_t, uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_n_f64))) svbool_t svcmpeq_n_f64(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_n_f32))) svbool_t svcmpeq_n_f32(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_n_f16))) svbool_t svcmpeq_n_f16(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_n_u8))) svbool_t svcmpeq_n_u8(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_n_u32))) svbool_t svcmpeq_n_u32(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_n_u64))) svbool_t svcmpeq_n_u64(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_n_u16))) svbool_t svcmpeq_n_u16(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_n_s8))) svbool_t svcmpeq_n_s8(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_n_s32))) svbool_t svcmpeq_n_s32(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_n_s64))) svbool_t svcmpeq_n_s64(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_n_s16))) svbool_t svcmpeq_n_s16(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_u8))) svbool_t svcmpeq_u8(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_u32))) svbool_t svcmpeq_u32(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_u64))) svbool_t svcmpeq_u64(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_u16))) svbool_t svcmpeq_u16(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_s8))) svbool_t svcmpeq_s8(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_s32))) svbool_t svcmpeq_s32(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_s64))) svbool_t svcmpeq_s64(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_s16))) svbool_t svcmpeq_s16(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_f64))) svbool_t svcmpeq_f64(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_f32))) svbool_t svcmpeq_f32(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_f16))) svbool_t svcmpeq_f16(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_wide_n_s8))) svbool_t svcmpeq_wide_n_s8(svbool_t, svint8_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_wide_n_s32))) svbool_t svcmpeq_wide_n_s32(svbool_t, svint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_wide_n_s16))) svbool_t svcmpeq_wide_n_s16(svbool_t, svint16_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_wide_s8))) svbool_t svcmpeq_wide_s8(svbool_t, svint8_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_wide_s32))) svbool_t svcmpeq_wide_s32(svbool_t, svint32_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_wide_s16))) svbool_t svcmpeq_wide_s16(svbool_t, svint16_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_n_f64))) svbool_t svcmpge_n_f64(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_n_f32))) svbool_t svcmpge_n_f32(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_n_f16))) svbool_t svcmpge_n_f16(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_n_s8))) svbool_t svcmpge_n_s8(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_n_s32))) svbool_t svcmpge_n_s32(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_n_s64))) svbool_t svcmpge_n_s64(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_n_s16))) svbool_t svcmpge_n_s16(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_n_u8))) svbool_t svcmpge_n_u8(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_n_u32))) svbool_t svcmpge_n_u32(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_n_u64))) svbool_t svcmpge_n_u64(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_n_u16))) svbool_t svcmpge_n_u16(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_s8))) svbool_t svcmpge_s8(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_s32))) svbool_t svcmpge_s32(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_s64))) svbool_t svcmpge_s64(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_s16))) svbool_t svcmpge_s16(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_f64))) svbool_t svcmpge_f64(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_f32))) svbool_t svcmpge_f32(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_f16))) svbool_t svcmpge_f16(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_u8))) svbool_t svcmpge_u8(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_u32))) svbool_t svcmpge_u32(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_u64))) svbool_t svcmpge_u64(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_u16))) svbool_t svcmpge_u16(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_wide_n_s8))) svbool_t svcmpge_wide_n_s8(svbool_t, svint8_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_wide_n_s32))) svbool_t svcmpge_wide_n_s32(svbool_t, svint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_wide_n_s16))) svbool_t svcmpge_wide_n_s16(svbool_t, svint16_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_wide_n_u8))) svbool_t svcmpge_wide_n_u8(svbool_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_wide_n_u32))) svbool_t svcmpge_wide_n_u32(svbool_t, svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_wide_n_u16))) svbool_t svcmpge_wide_n_u16(svbool_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_wide_s8))) svbool_t svcmpge_wide_s8(svbool_t, svint8_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_wide_s32))) svbool_t svcmpge_wide_s32(svbool_t, svint32_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_wide_s16))) svbool_t svcmpge_wide_s16(svbool_t, svint16_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_wide_u8))) svbool_t svcmpge_wide_u8(svbool_t, svuint8_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_wide_u32))) svbool_t svcmpge_wide_u32(svbool_t, svuint32_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_wide_u16))) svbool_t svcmpge_wide_u16(svbool_t, svuint16_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_n_f64))) svbool_t svcmpgt_n_f64(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_n_f32))) svbool_t svcmpgt_n_f32(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_n_f16))) svbool_t svcmpgt_n_f16(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_n_s8))) svbool_t svcmpgt_n_s8(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_n_s32))) svbool_t svcmpgt_n_s32(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_n_s64))) svbool_t svcmpgt_n_s64(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_n_s16))) svbool_t svcmpgt_n_s16(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_n_u8))) svbool_t svcmpgt_n_u8(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_n_u32))) svbool_t svcmpgt_n_u32(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_n_u64))) svbool_t svcmpgt_n_u64(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_n_u16))) svbool_t svcmpgt_n_u16(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_s8))) svbool_t svcmpgt_s8(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_s32))) svbool_t svcmpgt_s32(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_s64))) svbool_t svcmpgt_s64(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_s16))) svbool_t svcmpgt_s16(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_f64))) svbool_t svcmpgt_f64(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_f32))) svbool_t svcmpgt_f32(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_f16))) svbool_t svcmpgt_f16(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_u8))) svbool_t svcmpgt_u8(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_u32))) svbool_t svcmpgt_u32(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_u64))) svbool_t svcmpgt_u64(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_u16))) svbool_t svcmpgt_u16(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_wide_n_s8))) svbool_t svcmpgt_wide_n_s8(svbool_t, svint8_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_wide_n_s32))) svbool_t svcmpgt_wide_n_s32(svbool_t, svint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_wide_n_s16))) svbool_t svcmpgt_wide_n_s16(svbool_t, svint16_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_wide_n_u8))) svbool_t svcmpgt_wide_n_u8(svbool_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_wide_n_u32))) svbool_t svcmpgt_wide_n_u32(svbool_t, svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_wide_n_u16))) svbool_t svcmpgt_wide_n_u16(svbool_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_wide_s8))) svbool_t svcmpgt_wide_s8(svbool_t, svint8_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_wide_s32))) svbool_t svcmpgt_wide_s32(svbool_t, svint32_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_wide_s16))) svbool_t svcmpgt_wide_s16(svbool_t, svint16_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_wide_u8))) svbool_t svcmpgt_wide_u8(svbool_t, svuint8_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_wide_u32))) svbool_t svcmpgt_wide_u32(svbool_t, svuint32_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_wide_u16))) svbool_t svcmpgt_wide_u16(svbool_t, svuint16_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_n_f64))) svbool_t svcmple_n_f64(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_n_f32))) svbool_t svcmple_n_f32(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_n_f16))) svbool_t svcmple_n_f16(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_n_s8))) svbool_t svcmple_n_s8(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_n_s32))) svbool_t svcmple_n_s32(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_n_s64))) svbool_t svcmple_n_s64(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_n_s16))) svbool_t svcmple_n_s16(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_n_u8))) svbool_t svcmple_n_u8(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_n_u32))) svbool_t svcmple_n_u32(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_n_u64))) svbool_t svcmple_n_u64(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_n_u16))) svbool_t svcmple_n_u16(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_s8))) svbool_t svcmple_s8(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_s32))) svbool_t svcmple_s32(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_s64))) svbool_t svcmple_s64(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_s16))) svbool_t svcmple_s16(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_f64))) svbool_t svcmple_f64(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_f32))) svbool_t svcmple_f32(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_f16))) svbool_t svcmple_f16(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_u8))) svbool_t svcmple_u8(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_u32))) svbool_t svcmple_u32(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_u64))) svbool_t svcmple_u64(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_u16))) svbool_t svcmple_u16(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_wide_n_s8))) svbool_t svcmple_wide_n_s8(svbool_t, svint8_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_wide_n_s32))) svbool_t svcmple_wide_n_s32(svbool_t, svint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_wide_n_s16))) svbool_t svcmple_wide_n_s16(svbool_t, svint16_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_wide_n_u8))) svbool_t svcmple_wide_n_u8(svbool_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_wide_n_u32))) svbool_t svcmple_wide_n_u32(svbool_t, svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_wide_n_u16))) svbool_t svcmple_wide_n_u16(svbool_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_wide_s8))) svbool_t svcmple_wide_s8(svbool_t, svint8_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_wide_s32))) svbool_t svcmple_wide_s32(svbool_t, svint32_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_wide_s16))) svbool_t svcmple_wide_s16(svbool_t, svint16_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_wide_u8))) svbool_t svcmple_wide_u8(svbool_t, svuint8_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_wide_u32))) svbool_t svcmple_wide_u32(svbool_t, svuint32_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_wide_u16))) svbool_t svcmple_wide_u16(svbool_t, svuint16_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_n_u8))) svbool_t svcmplt_n_u8(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_n_u32))) svbool_t svcmplt_n_u32(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_n_u64))) svbool_t svcmplt_n_u64(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_n_u16))) svbool_t svcmplt_n_u16(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_n_f64))) svbool_t svcmplt_n_f64(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_n_f32))) svbool_t svcmplt_n_f32(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_n_f16))) svbool_t svcmplt_n_f16(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_n_s8))) svbool_t svcmplt_n_s8(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_n_s32))) svbool_t svcmplt_n_s32(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_n_s64))) svbool_t svcmplt_n_s64(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_n_s16))) svbool_t svcmplt_n_s16(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_u8))) svbool_t svcmplt_u8(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_u32))) svbool_t svcmplt_u32(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_u64))) svbool_t svcmplt_u64(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_u16))) svbool_t svcmplt_u16(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_s8))) svbool_t svcmplt_s8(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_s32))) svbool_t svcmplt_s32(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_s64))) svbool_t svcmplt_s64(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_s16))) svbool_t svcmplt_s16(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_f64))) svbool_t svcmplt_f64(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_f32))) svbool_t svcmplt_f32(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_f16))) svbool_t svcmplt_f16(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_wide_n_u8))) svbool_t svcmplt_wide_n_u8(svbool_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_wide_n_u32))) svbool_t svcmplt_wide_n_u32(svbool_t, svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_wide_n_u16))) svbool_t svcmplt_wide_n_u16(svbool_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_wide_n_s8))) svbool_t svcmplt_wide_n_s8(svbool_t, svint8_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_wide_n_s32))) svbool_t svcmplt_wide_n_s32(svbool_t, svint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_wide_n_s16))) svbool_t svcmplt_wide_n_s16(svbool_t, svint16_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_wide_u8))) svbool_t svcmplt_wide_u8(svbool_t, svuint8_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_wide_u32))) svbool_t svcmplt_wide_u32(svbool_t, svuint32_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_wide_u16))) svbool_t svcmplt_wide_u16(svbool_t, svuint16_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_wide_s8))) svbool_t svcmplt_wide_s8(svbool_t, svint8_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_wide_s32))) svbool_t svcmplt_wide_s32(svbool_t, svint32_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_wide_s16))) svbool_t svcmplt_wide_s16(svbool_t, svint16_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_n_f64))) svbool_t svcmpne_n_f64(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_n_f32))) svbool_t svcmpne_n_f32(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_n_f16))) svbool_t svcmpne_n_f16(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_n_u8))) svbool_t svcmpne_n_u8(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_n_u32))) svbool_t svcmpne_n_u32(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_n_u64))) svbool_t svcmpne_n_u64(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_n_u16))) svbool_t svcmpne_n_u16(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_n_s8))) svbool_t svcmpne_n_s8(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_n_s32))) svbool_t svcmpne_n_s32(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_n_s64))) svbool_t svcmpne_n_s64(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_n_s16))) svbool_t svcmpne_n_s16(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_u8))) svbool_t svcmpne_u8(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_u32))) svbool_t svcmpne_u32(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_u64))) svbool_t svcmpne_u64(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_u16))) svbool_t svcmpne_u16(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_s8))) svbool_t svcmpne_s8(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_s32))) svbool_t svcmpne_s32(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_s64))) svbool_t svcmpne_s64(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_s16))) svbool_t svcmpne_s16(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_f64))) svbool_t svcmpne_f64(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_f32))) svbool_t svcmpne_f32(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_f16))) svbool_t svcmpne_f16(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_wide_n_s8))) svbool_t svcmpne_wide_n_s8(svbool_t, svint8_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_wide_n_s32))) svbool_t svcmpne_wide_n_s32(svbool_t, svint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_wide_n_s16))) svbool_t svcmpne_wide_n_s16(svbool_t, svint16_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_wide_s8))) svbool_t svcmpne_wide_s8(svbool_t, svint8_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_wide_s32))) svbool_t svcmpne_wide_s32(svbool_t, svint32_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_wide_s16))) svbool_t svcmpne_wide_s16(svbool_t, svint16_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpuo_n_f64))) svbool_t svcmpuo_n_f64(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpuo_n_f32))) svbool_t svcmpuo_n_f32(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpuo_n_f16))) svbool_t svcmpuo_n_f16(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpuo_f64))) svbool_t svcmpuo_f64(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpuo_f32))) svbool_t svcmpuo_f32(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpuo_f16))) svbool_t svcmpuo_f16(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_u8_m))) svuint8_t svcnot_u8_m(svuint8_t, svbool_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_u32_m))) svuint32_t svcnot_u32_m(svuint32_t, svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_u64_m))) svuint64_t svcnot_u64_m(svuint64_t, svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_u16_m))) svuint16_t svcnot_u16_m(svuint16_t, svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_s8_m))) svint8_t svcnot_s8_m(svint8_t, svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_s32_m))) svint32_t svcnot_s32_m(svint32_t, svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_s64_m))) svint64_t svcnot_s64_m(svint64_t, svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_s16_m))) svint16_t svcnot_s16_m(svint16_t, svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_u8_x))) svuint8_t svcnot_u8_x(svbool_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_u32_x))) svuint32_t svcnot_u32_x(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_u64_x))) svuint64_t svcnot_u64_x(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_u16_x))) svuint16_t svcnot_u16_x(svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_s8_x))) svint8_t svcnot_s8_x(svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_s32_x))) svint32_t svcnot_s32_x(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_s64_x))) svint64_t svcnot_s64_x(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_s16_x))) svint16_t svcnot_s16_x(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_u8_z))) svuint8_t svcnot_u8_z(svbool_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_u32_z))) svuint32_t svcnot_u32_z(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_u64_z))) svuint64_t svcnot_u64_z(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_u16_z))) svuint16_t svcnot_u16_z(svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_s8_z))) svint8_t svcnot_s8_z(svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_s32_z))) svint32_t svcnot_s32_z(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_s64_z))) svint64_t svcnot_s64_z(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_s16_z))) svint16_t svcnot_s16_z(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_u8_m))) svuint8_t svcnt_u8_m(svuint8_t, svbool_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_u32_m))) svuint32_t svcnt_u32_m(svuint32_t, svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_u64_m))) svuint64_t svcnt_u64_m(svuint64_t, svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_u16_m))) svuint16_t svcnt_u16_m(svuint16_t, svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_s8_m))) svuint8_t svcnt_s8_m(svuint8_t, svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_f64_m))) svuint64_t svcnt_f64_m(svuint64_t, svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_f32_m))) svuint32_t svcnt_f32_m(svuint32_t, svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_f16_m))) svuint16_t svcnt_f16_m(svuint16_t, svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_s32_m))) svuint32_t svcnt_s32_m(svuint32_t, svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_s64_m))) svuint64_t svcnt_s64_m(svuint64_t, svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_s16_m))) svuint16_t svcnt_s16_m(svuint16_t, svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_u8_x))) svuint8_t svcnt_u8_x(svbool_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_u32_x))) svuint32_t svcnt_u32_x(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_u64_x))) svuint64_t svcnt_u64_x(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_u16_x))) svuint16_t svcnt_u16_x(svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_s8_x))) svuint8_t svcnt_s8_x(svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_f64_x))) svuint64_t svcnt_f64_x(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_f32_x))) svuint32_t svcnt_f32_x(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_f16_x))) svuint16_t svcnt_f16_x(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_s32_x))) svuint32_t svcnt_s32_x(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_s64_x))) svuint64_t svcnt_s64_x(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_s16_x))) svuint16_t svcnt_s16_x(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_u8_z))) svuint8_t svcnt_u8_z(svbool_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_u32_z))) svuint32_t svcnt_u32_z(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_u64_z))) svuint64_t svcnt_u64_z(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_u16_z))) svuint16_t svcnt_u16_z(svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_s8_z))) svuint8_t svcnt_s8_z(svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_f64_z))) svuint64_t svcnt_f64_z(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_f32_z))) svuint32_t svcnt_f32_z(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_f16_z))) svuint16_t svcnt_f16_z(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_s32_z))) svuint32_t svcnt_s32_z(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_s64_z))) svuint64_t svcnt_s64_z(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_s16_z))) svuint16_t svcnt_s16_z(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcntb))) uint64_t svcntb(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcntb_pat))) uint64_t svcntb_pat(enum svpattern); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcntd))) uint64_t svcntd(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcntd_pat))) uint64_t svcntd_pat(enum svpattern); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnth))) uint64_t svcnth(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnth_pat))) uint64_t svcnth_pat(enum svpattern); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcntp_b8))) uint64_t svcntp_b8(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcntp_b32))) uint64_t svcntp_b32(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcntp_b64))) uint64_t svcntp_b64(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcntp_b16))) uint64_t svcntp_b16(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcntw))) uint64_t svcntw(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcntw_pat))) uint64_t svcntw_pat(enum svpattern); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_u32))) svuint32_t svcompact_u32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_u64))) svuint64_t svcompact_u64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_f64))) svfloat64_t svcompact_f64(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_f32))) svfloat32_t svcompact_f32(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_s32))) svint32_t svcompact_s32(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_s64))) svint64_t svcompact_s64(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_u8))) svuint8x2_t svcreate2_u8(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_u32))) svuint32x2_t svcreate2_u32(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_u64))) svuint64x2_t svcreate2_u64(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_u16))) svuint16x2_t svcreate2_u16(svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_s8))) svint8x2_t svcreate2_s8(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_f64))) svfloat64x2_t svcreate2_f64(svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_f32))) svfloat32x2_t svcreate2_f32(svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_f16))) svfloat16x2_t svcreate2_f16(svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_s32))) svint32x2_t svcreate2_s32(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_s64))) svint64x2_t svcreate2_s64(svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_s16))) svint16x2_t svcreate2_s16(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate3_u8))) svuint8x3_t svcreate3_u8(svuint8_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate3_u32))) svuint32x3_t svcreate3_u32(svuint32_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate3_u64))) svuint64x3_t svcreate3_u64(svuint64_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate3_u16))) svuint16x3_t svcreate3_u16(svuint16_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate3_s8))) svint8x3_t svcreate3_s8(svint8_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate3_f64))) svfloat64x3_t svcreate3_f64(svfloat64_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate3_f32))) svfloat32x3_t svcreate3_f32(svfloat32_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate3_f16))) svfloat16x3_t svcreate3_f16(svfloat16_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate3_s32))) svint32x3_t svcreate3_s32(svint32_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate3_s64))) svint64x3_t svcreate3_s64(svint64_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate3_s16))) svint16x3_t svcreate3_s16(svint16_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_u8))) svuint8x4_t svcreate4_u8(svuint8_t, svuint8_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_u32))) svuint32x4_t svcreate4_u32(svuint32_t, svuint32_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_u64))) svuint64x4_t svcreate4_u64(svuint64_t, svuint64_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_u16))) svuint16x4_t svcreate4_u16(svuint16_t, svuint16_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_s8))) svint8x4_t svcreate4_s8(svint8_t, svint8_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_f64))) svfloat64x4_t svcreate4_f64(svfloat64_t, svfloat64_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_f32))) svfloat32x4_t svcreate4_f32(svfloat32_t, svfloat32_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_f16))) svfloat16x4_t svcreate4_f16(svfloat16_t, svfloat16_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_s32))) svint32x4_t svcreate4_s32(svint32_t, svint32_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_s64))) svint64x4_t svcreate4_s64(svint64_t, svint64_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_s16))) svint16x4_t svcreate4_s16(svint16_t, svint16_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_f32_m))) svfloat16_t svcvt_f16_f32_m(svfloat16_t, svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_f32_x))) svfloat16_t svcvt_f16_f32_x(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_f32_z))) svfloat16_t svcvt_f16_f32_z(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_f64_m))) svfloat16_t svcvt_f16_f64_m(svfloat16_t, svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_f64_x))) svfloat16_t svcvt_f16_f64_x(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_f64_z))) svfloat16_t svcvt_f16_f64_z(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_s16_m))) svfloat16_t svcvt_f16_s16_m(svfloat16_t, svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_s16_x))) svfloat16_t svcvt_f16_s16_x(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_s16_z))) svfloat16_t svcvt_f16_s16_z(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_s32_m))) svfloat16_t svcvt_f16_s32_m(svfloat16_t, svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_s32_x))) svfloat16_t svcvt_f16_s32_x(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_s32_z))) svfloat16_t svcvt_f16_s32_z(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_s64_m))) svfloat16_t svcvt_f16_s64_m(svfloat16_t, svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_s64_x))) svfloat16_t svcvt_f16_s64_x(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_s64_z))) svfloat16_t svcvt_f16_s64_z(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_u16_m))) svfloat16_t svcvt_f16_u16_m(svfloat16_t, svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_u16_x))) svfloat16_t svcvt_f16_u16_x(svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_u16_z))) svfloat16_t svcvt_f16_u16_z(svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_u32_m))) svfloat16_t svcvt_f16_u32_m(svfloat16_t, svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_u32_x))) svfloat16_t svcvt_f16_u32_x(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_u32_z))) svfloat16_t svcvt_f16_u32_z(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_u64_m))) svfloat16_t svcvt_f16_u64_m(svfloat16_t, svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_u64_x))) svfloat16_t svcvt_f16_u64_x(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_u64_z))) svfloat16_t svcvt_f16_u64_z(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_f16_m))) svfloat32_t svcvt_f32_f16_m(svfloat32_t, svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_f16_x))) svfloat32_t svcvt_f32_f16_x(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_f16_z))) svfloat32_t svcvt_f32_f16_z(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_f64_m))) svfloat32_t svcvt_f32_f64_m(svfloat32_t, svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_f64_x))) svfloat32_t svcvt_f32_f64_x(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_f64_z))) svfloat32_t svcvt_f32_f64_z(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_s32_m))) svfloat32_t svcvt_f32_s32_m(svfloat32_t, svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_s32_x))) svfloat32_t svcvt_f32_s32_x(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_s32_z))) svfloat32_t svcvt_f32_s32_z(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_s64_m))) svfloat32_t svcvt_f32_s64_m(svfloat32_t, svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_s64_x))) svfloat32_t svcvt_f32_s64_x(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_s64_z))) svfloat32_t svcvt_f32_s64_z(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_u32_m))) svfloat32_t svcvt_f32_u32_m(svfloat32_t, svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_u32_x))) svfloat32_t svcvt_f32_u32_x(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_u32_z))) svfloat32_t svcvt_f32_u32_z(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_u64_m))) svfloat32_t svcvt_f32_u64_m(svfloat32_t, svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_u64_x))) svfloat32_t svcvt_f32_u64_x(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_u64_z))) svfloat32_t svcvt_f32_u64_z(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f64_f16_m))) svfloat64_t svcvt_f64_f16_m(svfloat64_t, svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f64_f16_x))) svfloat64_t svcvt_f64_f16_x(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f64_f16_z))) svfloat64_t svcvt_f64_f16_z(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f64_f32_m))) svfloat64_t svcvt_f64_f32_m(svfloat64_t, svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f64_f32_x))) svfloat64_t svcvt_f64_f32_x(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f64_f32_z))) svfloat64_t svcvt_f64_f32_z(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f64_s32_m))) svfloat64_t svcvt_f64_s32_m(svfloat64_t, svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f64_s32_x))) svfloat64_t svcvt_f64_s32_x(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f64_s32_z))) svfloat64_t svcvt_f64_s32_z(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f64_s64_m))) svfloat64_t svcvt_f64_s64_m(svfloat64_t, svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f64_s64_x))) svfloat64_t svcvt_f64_s64_x(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f64_s64_z))) svfloat64_t svcvt_f64_s64_z(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f64_u32_m))) svfloat64_t svcvt_f64_u32_m(svfloat64_t, svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f64_u32_x))) svfloat64_t svcvt_f64_u32_x(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f64_u32_z))) svfloat64_t svcvt_f64_u32_z(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f64_u64_m))) svfloat64_t svcvt_f64_u64_m(svfloat64_t, svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f64_u64_x))) svfloat64_t svcvt_f64_u64_x(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f64_u64_z))) svfloat64_t svcvt_f64_u64_z(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s16_f16_m))) svint16_t svcvt_s16_f16_m(svint16_t, svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s16_f16_x))) svint16_t svcvt_s16_f16_x(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s16_f16_z))) svint16_t svcvt_s16_f16_z(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s32_f16_m))) svint32_t svcvt_s32_f16_m(svint32_t, svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s32_f16_x))) svint32_t svcvt_s32_f16_x(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s32_f16_z))) svint32_t svcvt_s32_f16_z(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s32_f32_m))) svint32_t svcvt_s32_f32_m(svint32_t, svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s32_f32_x))) svint32_t svcvt_s32_f32_x(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s32_f32_z))) svint32_t svcvt_s32_f32_z(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s32_f64_m))) svint32_t svcvt_s32_f64_m(svint32_t, svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s32_f64_x))) svint32_t svcvt_s32_f64_x(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s32_f64_z))) svint32_t svcvt_s32_f64_z(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s64_f16_m))) svint64_t svcvt_s64_f16_m(svint64_t, svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s64_f16_x))) svint64_t svcvt_s64_f16_x(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s64_f16_z))) svint64_t svcvt_s64_f16_z(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s64_f32_m))) svint64_t svcvt_s64_f32_m(svint64_t, svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s64_f32_x))) svint64_t svcvt_s64_f32_x(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s64_f32_z))) svint64_t svcvt_s64_f32_z(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s64_f64_m))) svint64_t svcvt_s64_f64_m(svint64_t, svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s64_f64_x))) svint64_t svcvt_s64_f64_x(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s64_f64_z))) svint64_t svcvt_s64_f64_z(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u16_f16_m))) svuint16_t svcvt_u16_f16_m(svuint16_t, svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u16_f16_x))) svuint16_t svcvt_u16_f16_x(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u16_f16_z))) svuint16_t svcvt_u16_f16_z(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u32_f16_m))) svuint32_t svcvt_u32_f16_m(svuint32_t, svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u32_f16_x))) svuint32_t svcvt_u32_f16_x(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u32_f16_z))) svuint32_t svcvt_u32_f16_z(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u32_f32_m))) svuint32_t svcvt_u32_f32_m(svuint32_t, svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u32_f32_x))) svuint32_t svcvt_u32_f32_x(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u32_f32_z))) svuint32_t svcvt_u32_f32_z(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u32_f64_m))) svuint32_t svcvt_u32_f64_m(svuint32_t, svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u32_f64_x))) svuint32_t svcvt_u32_f64_x(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u32_f64_z))) svuint32_t svcvt_u32_f64_z(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u64_f16_m))) svuint64_t svcvt_u64_f16_m(svuint64_t, svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u64_f16_x))) svuint64_t svcvt_u64_f16_x(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u64_f16_z))) svuint64_t svcvt_u64_f16_z(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u64_f32_m))) svuint64_t svcvt_u64_f32_m(svuint64_t, svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u64_f32_x))) svuint64_t svcvt_u64_f32_x(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u64_f32_z))) svuint64_t svcvt_u64_f32_z(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u64_f64_m))) svuint64_t svcvt_u64_f64_m(svuint64_t, svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u64_f64_x))) svuint64_t svcvt_u64_f64_x(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u64_f64_z))) svuint64_t svcvt_u64_f64_z(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_f64_m))) svfloat64_t svdiv_n_f64_m(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_f32_m))) svfloat32_t svdiv_n_f32_m(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_f16_m))) svfloat16_t svdiv_n_f16_m(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_f64_x))) svfloat64_t svdiv_n_f64_x(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_f32_x))) svfloat32_t svdiv_n_f32_x(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_f16_x))) svfloat16_t svdiv_n_f16_x(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_f64_z))) svfloat64_t svdiv_n_f64_z(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_f32_z))) svfloat32_t svdiv_n_f32_z(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_f16_z))) svfloat16_t svdiv_n_f16_z(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_s32_m))) svint32_t svdiv_n_s32_m(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_s64_m))) svint64_t svdiv_n_s64_m(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_s32_x))) svint32_t svdiv_n_s32_x(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_s64_x))) svint64_t svdiv_n_s64_x(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_s32_z))) svint32_t svdiv_n_s32_z(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_s64_z))) svint64_t svdiv_n_s64_z(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_u32_m))) svuint32_t svdiv_n_u32_m(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_u64_m))) svuint64_t svdiv_n_u64_m(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_u32_x))) svuint32_t svdiv_n_u32_x(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_u64_x))) svuint64_t svdiv_n_u64_x(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_u32_z))) svuint32_t svdiv_n_u32_z(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_u64_z))) svuint64_t svdiv_n_u64_z(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_f64_m))) svfloat64_t svdiv_f64_m(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_f32_m))) svfloat32_t svdiv_f32_m(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_f16_m))) svfloat16_t svdiv_f16_m(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_f64_x))) svfloat64_t svdiv_f64_x(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_f32_x))) svfloat32_t svdiv_f32_x(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_f16_x))) svfloat16_t svdiv_f16_x(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_f64_z))) svfloat64_t svdiv_f64_z(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_f32_z))) svfloat32_t svdiv_f32_z(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_f16_z))) svfloat16_t svdiv_f16_z(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_s32_m))) svint32_t svdiv_s32_m(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_s64_m))) svint64_t svdiv_s64_m(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_s32_x))) svint32_t svdiv_s32_x(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_s64_x))) svint64_t svdiv_s64_x(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_s32_z))) svint32_t svdiv_s32_z(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_s64_z))) svint64_t svdiv_s64_z(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_u32_m))) svuint32_t svdiv_u32_m(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_u64_m))) svuint64_t svdiv_u64_m(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_u32_x))) svuint32_t svdiv_u32_x(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_u64_x))) svuint64_t svdiv_u64_x(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_u32_z))) svuint32_t svdiv_u32_z(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_u64_z))) svuint64_t svdiv_u64_z(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_f64_m))) svfloat64_t svdivr_n_f64_m(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_f32_m))) svfloat32_t svdivr_n_f32_m(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_f16_m))) svfloat16_t svdivr_n_f16_m(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_f64_x))) svfloat64_t svdivr_n_f64_x(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_f32_x))) svfloat32_t svdivr_n_f32_x(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_f16_x))) svfloat16_t svdivr_n_f16_x(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_f64_z))) svfloat64_t svdivr_n_f64_z(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_f32_z))) svfloat32_t svdivr_n_f32_z(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_f16_z))) svfloat16_t svdivr_n_f16_z(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_s32_m))) svint32_t svdivr_n_s32_m(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_s64_m))) svint64_t svdivr_n_s64_m(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_s32_x))) svint32_t svdivr_n_s32_x(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_s64_x))) svint64_t svdivr_n_s64_x(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_s32_z))) svint32_t svdivr_n_s32_z(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_s64_z))) svint64_t svdivr_n_s64_z(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_u32_m))) svuint32_t svdivr_n_u32_m(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_u64_m))) svuint64_t svdivr_n_u64_m(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_u32_x))) svuint32_t svdivr_n_u32_x(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_u64_x))) svuint64_t svdivr_n_u64_x(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_u32_z))) svuint32_t svdivr_n_u32_z(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_u64_z))) svuint64_t svdivr_n_u64_z(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_f64_m))) svfloat64_t svdivr_f64_m(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_f32_m))) svfloat32_t svdivr_f32_m(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_f16_m))) svfloat16_t svdivr_f16_m(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_f64_x))) svfloat64_t svdivr_f64_x(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_f32_x))) svfloat32_t svdivr_f32_x(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_f16_x))) svfloat16_t svdivr_f16_x(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_f64_z))) svfloat64_t svdivr_f64_z(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_f32_z))) svfloat32_t svdivr_f32_z(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_f16_z))) svfloat16_t svdivr_f16_z(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_s32_m))) svint32_t svdivr_s32_m(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_s64_m))) svint64_t svdivr_s64_m(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_s32_x))) svint32_t svdivr_s32_x(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_s64_x))) svint64_t svdivr_s64_x(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_s32_z))) svint32_t svdivr_s32_z(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_s64_z))) svint64_t svdivr_s64_z(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_u32_m))) svuint32_t svdivr_u32_m(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_u64_m))) svuint64_t svdivr_u64_m(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_u32_x))) svuint32_t svdivr_u32_x(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_u64_x))) svuint64_t svdivr_u64_x(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_u32_z))) svuint32_t svdivr_u32_z(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_u64_z))) svuint64_t svdivr_u64_z(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_n_s32))) svint32_t svdot_n_s32(svint32_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_n_s64))) svint64_t svdot_n_s64(svint64_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_n_u32))) svuint32_t svdot_n_u32(svuint32_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_n_u64))) svuint64_t svdot_n_u64(svuint64_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_s32))) svint32_t svdot_s32(svint32_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_s64))) svint64_t svdot_s64(svint64_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_u32))) svuint32_t svdot_u32(svuint32_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_u64))) svuint64_t svdot_u64(svuint64_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_lane_s32))) svint32_t svdot_lane_s32(svint32_t, svint8_t, svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_lane_s64))) svint64_t svdot_lane_s64(svint64_t, svint16_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_lane_u32))) svuint32_t svdot_lane_u32(svuint32_t, svuint8_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_lane_u64))) svuint64_t svdot_lane_u64(svuint64_t, svuint16_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u8))) svuint8_t svdup_n_u8(uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u32))) svuint32_t svdup_n_u32(uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u64))) svuint64_t svdup_n_u64(uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u16))) svuint16_t svdup_n_u16(uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_s8))) svint8_t svdup_n_s8(int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_f64))) svfloat64_t svdup_n_f64(float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_f32))) svfloat32_t svdup_n_f32(float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_f16))) svfloat16_t svdup_n_f16(float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_s32))) svint32_t svdup_n_s32(int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_s64))) svint64_t svdup_n_s64(int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_s16))) svint16_t svdup_n_s16(int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u8_m))) svuint8_t svdup_n_u8_m(svuint8_t, svbool_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u32_m))) svuint32_t svdup_n_u32_m(svuint32_t, svbool_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u64_m))) svuint64_t svdup_n_u64_m(svuint64_t, svbool_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u16_m))) svuint16_t svdup_n_u16_m(svuint16_t, svbool_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_s8_m))) svint8_t svdup_n_s8_m(svint8_t, svbool_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_f64_m))) svfloat64_t svdup_n_f64_m(svfloat64_t, svbool_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_f32_m))) svfloat32_t svdup_n_f32_m(svfloat32_t, svbool_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_f16_m))) svfloat16_t svdup_n_f16_m(svfloat16_t, svbool_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_s32_m))) svint32_t svdup_n_s32_m(svint32_t, svbool_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_s64_m))) svint64_t svdup_n_s64_m(svint64_t, svbool_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_s16_m))) svint16_t svdup_n_s16_m(svint16_t, svbool_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_b8))) svbool_t svdup_n_b8(bool); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_b32))) svbool_t svdup_n_b32(bool); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_b64))) svbool_t svdup_n_b64(bool); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_b16))) svbool_t svdup_n_b16(bool); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u8_x))) svuint8_t svdup_n_u8_x(svbool_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u32_x))) svuint32_t svdup_n_u32_x(svbool_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u64_x))) svuint64_t svdup_n_u64_x(svbool_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u16_x))) svuint16_t svdup_n_u16_x(svbool_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_s8_x))) svint8_t svdup_n_s8_x(svbool_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_f64_x))) svfloat64_t svdup_n_f64_x(svbool_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_f32_x))) svfloat32_t svdup_n_f32_x(svbool_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_f16_x))) svfloat16_t svdup_n_f16_x(svbool_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_s32_x))) svint32_t svdup_n_s32_x(svbool_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_s64_x))) svint64_t svdup_n_s64_x(svbool_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_s16_x))) svint16_t svdup_n_s16_x(svbool_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u8_z))) svuint8_t svdup_n_u8_z(svbool_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u32_z))) svuint32_t svdup_n_u32_z(svbool_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u64_z))) svuint64_t svdup_n_u64_z(svbool_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u16_z))) svuint16_t svdup_n_u16_z(svbool_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_s8_z))) svint8_t svdup_n_s8_z(svbool_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_f64_z))) svfloat64_t svdup_n_f64_z(svbool_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_f32_z))) svfloat32_t svdup_n_f32_z(svbool_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_f16_z))) svfloat16_t svdup_n_f16_z(svbool_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_s32_z))) svint32_t svdup_n_s32_z(svbool_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_s64_z))) svint64_t svdup_n_s64_z(svbool_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_s16_z))) svint16_t svdup_n_s16_z(svbool_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_lane_u8))) svuint8_t svdup_lane_u8(svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_lane_u32))) svuint32_t svdup_lane_u32(svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_lane_u64))) svuint64_t svdup_lane_u64(svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_lane_u16))) svuint16_t svdup_lane_u16(svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_lane_s8))) svint8_t svdup_lane_s8(svint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_lane_f64))) svfloat64_t svdup_lane_f64(svfloat64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_lane_f32))) svfloat32_t svdup_lane_f32(svfloat32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_lane_f16))) svfloat16_t svdup_lane_f16(svfloat16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_lane_s32))) svint32_t svdup_lane_s32(svint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_lane_s64))) svint64_t svdup_lane_s64(svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_lane_s16))) svint16_t svdup_lane_s16(svint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_u8))) svuint8_t svdupq_n_u8(uint8_t, uint8_t, uint8_t, uint8_t, uint8_t, uint8_t, uint8_t, uint8_t, uint8_t, uint8_t, uint8_t, uint8_t, uint8_t, uint8_t, uint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_s8))) svint8_t svdupq_n_s8(int8_t, int8_t, int8_t, int8_t, int8_t, int8_t, int8_t, int8_t, int8_t, int8_t, int8_t, int8_t, int8_t, int8_t, int8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_u16))) svuint16_t svdupq_n_u16(uint16_t, uint16_t, uint16_t, uint16_t, uint16_t, uint16_t, uint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_f16))) svfloat16_t svdupq_n_f16(float16_t, float16_t, float16_t, float16_t, float16_t, float16_t, float16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_s16))) svint16_t svdupq_n_s16(int16_t, int16_t, int16_t, int16_t, int16_t, int16_t, int16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_u32))) svuint32_t svdupq_n_u32(uint32_t, uint32_t, uint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_f32))) svfloat32_t svdupq_n_f32(float32_t, float32_t, float32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_s32))) svint32_t svdupq_n_s32(int32_t, int32_t, int32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_u64))) svuint64_t svdupq_n_u64(uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_f64))) svfloat64_t svdupq_n_f64(float64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_s64))) svint64_t svdupq_n_s64(int64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_b8))) svbool_t svdupq_n_b8(bool, bool, bool, bool, bool, bool, bool, bool, bool, bool, bool, bool, bool, bool, bool, bool); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_b16))) svbool_t svdupq_n_b16(bool, bool, bool, bool, bool, bool, bool, bool); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_b32))) svbool_t svdupq_n_b32(bool, bool, bool, bool); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_b64))) svbool_t svdupq_n_b64(bool, bool); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_lane_u8))) svuint8_t svdupq_lane_u8(svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_lane_u32))) svuint32_t svdupq_lane_u32(svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_lane_u64))) svuint64_t svdupq_lane_u64(svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_lane_u16))) svuint16_t svdupq_lane_u16(svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_lane_s8))) svint8_t svdupq_lane_s8(svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_lane_f64))) svfloat64_t svdupq_lane_f64(svfloat64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_lane_f32))) svfloat32_t svdupq_lane_f32(svfloat32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_lane_f16))) svfloat16_t svdupq_lane_f16(svfloat16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_lane_s32))) svint32_t svdupq_lane_s32(svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_lane_s64))) svint64_t svdupq_lane_s64(svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_lane_s16))) svint16_t svdupq_lane_s16(svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_b_z))) svbool_t sveor_b_z(svbool_t, svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_u8_m))) svuint8_t sveor_n_u8_m(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_u32_m))) svuint32_t sveor_n_u32_m(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_u64_m))) svuint64_t sveor_n_u64_m(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_u16_m))) svuint16_t sveor_n_u16_m(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_s8_m))) svint8_t sveor_n_s8_m(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_s32_m))) svint32_t sveor_n_s32_m(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_s64_m))) svint64_t sveor_n_s64_m(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_s16_m))) svint16_t sveor_n_s16_m(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_u8_x))) svuint8_t sveor_n_u8_x(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_u32_x))) svuint32_t sveor_n_u32_x(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_u64_x))) svuint64_t sveor_n_u64_x(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_u16_x))) svuint16_t sveor_n_u16_x(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_s8_x))) svint8_t sveor_n_s8_x(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_s32_x))) svint32_t sveor_n_s32_x(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_s64_x))) svint64_t sveor_n_s64_x(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_s16_x))) svint16_t sveor_n_s16_x(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_u8_z))) svuint8_t sveor_n_u8_z(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_u32_z))) svuint32_t sveor_n_u32_z(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_u64_z))) svuint64_t sveor_n_u64_z(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_u16_z))) svuint16_t sveor_n_u16_z(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_s8_z))) svint8_t sveor_n_s8_z(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_s32_z))) svint32_t sveor_n_s32_z(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_s64_z))) svint64_t sveor_n_s64_z(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_s16_z))) svint16_t sveor_n_s16_z(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_u8_m))) svuint8_t sveor_u8_m(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_u32_m))) svuint32_t sveor_u32_m(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_u64_m))) svuint64_t sveor_u64_m(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_u16_m))) svuint16_t sveor_u16_m(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_s8_m))) svint8_t sveor_s8_m(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_s32_m))) svint32_t sveor_s32_m(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_s64_m))) svint64_t sveor_s64_m(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_s16_m))) svint16_t sveor_s16_m(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_u8_x))) svuint8_t sveor_u8_x(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_u32_x))) svuint32_t sveor_u32_x(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_u64_x))) svuint64_t sveor_u64_x(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_u16_x))) svuint16_t sveor_u16_x(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_s8_x))) svint8_t sveor_s8_x(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_s32_x))) svint32_t sveor_s32_x(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_s64_x))) svint64_t sveor_s64_x(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_s16_x))) svint16_t sveor_s16_x(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_u8_z))) svuint8_t sveor_u8_z(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_u32_z))) svuint32_t sveor_u32_z(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_u64_z))) svuint64_t sveor_u64_z(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_u16_z))) svuint16_t sveor_u16_z(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_s8_z))) svint8_t sveor_s8_z(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_s32_z))) svint32_t sveor_s32_z(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_s64_z))) svint64_t sveor_s64_z(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_s16_z))) svint16_t sveor_s16_z(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorv_u8))) uint8_t sveorv_u8(svbool_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorv_u32))) uint32_t sveorv_u32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorv_u64))) uint64_t sveorv_u64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorv_u16))) uint16_t sveorv_u16(svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorv_s8))) int8_t sveorv_s8(svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorv_s32))) int32_t sveorv_s32(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorv_s64))) int64_t sveorv_s64(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorv_s16))) int16_t sveorv_s16(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpa_f64))) svfloat64_t svexpa_f64(svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpa_f32))) svfloat32_t svexpa_f32(svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpa_f16))) svfloat16_t svexpa_f16(svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svext_u8))) svuint8_t svext_u8(svuint8_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svext_u32))) svuint32_t svext_u32(svuint32_t, svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svext_u64))) svuint64_t svext_u64(svuint64_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svext_u16))) svuint16_t svext_u16(svuint16_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svext_s8))) svint8_t svext_s8(svint8_t, svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svext_f64))) svfloat64_t svext_f64(svfloat64_t, svfloat64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svext_f32))) svfloat32_t svext_f32(svfloat32_t, svfloat32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svext_f16))) svfloat16_t svext_f16(svfloat16_t, svfloat16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svext_s32))) svint32_t svext_s32(svint32_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svext_s64))) svint64_t svext_s64(svint64_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svext_s16))) svint16_t svext_s16(svint16_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextb_s32_m))) svint32_t svextb_s32_m(svint32_t, svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextb_s64_m))) svint64_t svextb_s64_m(svint64_t, svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextb_s16_m))) svint16_t svextb_s16_m(svint16_t, svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextb_s32_x))) svint32_t svextb_s32_x(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextb_s64_x))) svint64_t svextb_s64_x(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextb_s16_x))) svint16_t svextb_s16_x(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextb_s32_z))) svint32_t svextb_s32_z(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextb_s64_z))) svint64_t svextb_s64_z(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextb_s16_z))) svint16_t svextb_s16_z(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextb_u32_m))) svuint32_t svextb_u32_m(svuint32_t, svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextb_u64_m))) svuint64_t svextb_u64_m(svuint64_t, svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextb_u16_m))) svuint16_t svextb_u16_m(svuint16_t, svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextb_u32_x))) svuint32_t svextb_u32_x(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextb_u64_x))) svuint64_t svextb_u64_x(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextb_u16_x))) svuint16_t svextb_u16_x(svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextb_u32_z))) svuint32_t svextb_u32_z(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextb_u64_z))) svuint64_t svextb_u64_z(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextb_u16_z))) svuint16_t svextb_u16_z(svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexth_s32_m))) svint32_t svexth_s32_m(svint32_t, svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexth_s64_m))) svint64_t svexth_s64_m(svint64_t, svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexth_s32_x))) svint32_t svexth_s32_x(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexth_s64_x))) svint64_t svexth_s64_x(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexth_s32_z))) svint32_t svexth_s32_z(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexth_s64_z))) svint64_t svexth_s64_z(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexth_u32_m))) svuint32_t svexth_u32_m(svuint32_t, svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexth_u64_m))) svuint64_t svexth_u64_m(svuint64_t, svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexth_u32_x))) svuint32_t svexth_u32_x(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexth_u64_x))) svuint64_t svexth_u64_x(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexth_u32_z))) svuint32_t svexth_u32_z(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexth_u64_z))) svuint64_t svexth_u64_z(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextw_s64_m))) svint64_t svextw_s64_m(svint64_t, svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextw_s64_x))) svint64_t svextw_s64_x(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextw_s64_z))) svint64_t svextw_s64_z(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextw_u64_m))) svuint64_t svextw_u64_m(svuint64_t, svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextw_u64_x))) svuint64_t svextw_u64_x(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextw_u64_z))) svuint64_t svextw_u64_z(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_u8))) svuint8_t svget2_u8(svuint8x2_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_u32))) svuint32_t svget2_u32(svuint32x2_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_u64))) svuint64_t svget2_u64(svuint64x2_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_u16))) svuint16_t svget2_u16(svuint16x2_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_s8))) svint8_t svget2_s8(svint8x2_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_f64))) svfloat64_t svget2_f64(svfloat64x2_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_f32))) svfloat32_t svget2_f32(svfloat32x2_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_f16))) svfloat16_t svget2_f16(svfloat16x2_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_s32))) svint32_t svget2_s32(svint32x2_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_s64))) svint64_t svget2_s64(svint64x2_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_s16))) svint16_t svget2_s16(svint16x2_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget3_u8))) svuint8_t svget3_u8(svuint8x3_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget3_u32))) svuint32_t svget3_u32(svuint32x3_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget3_u64))) svuint64_t svget3_u64(svuint64x3_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget3_u16))) svuint16_t svget3_u16(svuint16x3_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget3_s8))) svint8_t svget3_s8(svint8x3_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget3_f64))) svfloat64_t svget3_f64(svfloat64x3_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget3_f32))) svfloat32_t svget3_f32(svfloat32x3_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget3_f16))) svfloat16_t svget3_f16(svfloat16x3_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget3_s32))) svint32_t svget3_s32(svint32x3_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget3_s64))) svint64_t svget3_s64(svint64x3_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget3_s16))) svint16_t svget3_s16(svint16x3_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_u8))) svuint8_t svget4_u8(svuint8x4_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_u32))) svuint32_t svget4_u32(svuint32x4_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_u64))) svuint64_t svget4_u64(svuint64x4_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_u16))) svuint16_t svget4_u16(svuint16x4_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_s8))) svint8_t svget4_s8(svint8x4_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_f64))) svfloat64_t svget4_f64(svfloat64x4_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_f32))) svfloat32_t svget4_f32(svfloat32x4_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_f16))) svfloat16_t svget4_f16(svfloat16x4_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_s32))) svint32_t svget4_s32(svint32x4_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_s64))) svint64_t svget4_s64(svint64x4_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_s16))) svint16_t svget4_s16(svint16x4_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svindex_u8))) svuint8_t svindex_u8(uint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svindex_u32))) svuint32_t svindex_u32(uint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svindex_u64))) svuint64_t svindex_u64(uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svindex_u16))) svuint16_t svindex_u16(uint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svindex_s8))) svint8_t svindex_s8(int8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svindex_s32))) svint32_t svindex_s32(int32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svindex_s64))) svint64_t svindex_s64(int64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svindex_s16))) svint16_t svindex_s16(int16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svinsr_n_u8))) svuint8_t svinsr_n_u8(svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svinsr_n_u32))) svuint32_t svinsr_n_u32(svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svinsr_n_u64))) svuint64_t svinsr_n_u64(svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svinsr_n_u16))) svuint16_t svinsr_n_u16(svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svinsr_n_s8))) svint8_t svinsr_n_s8(svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svinsr_n_f64))) svfloat64_t svinsr_n_f64(svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svinsr_n_f32))) svfloat32_t svinsr_n_f32(svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svinsr_n_f16))) svfloat16_t svinsr_n_f16(svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svinsr_n_s32))) svint32_t svinsr_n_s32(svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svinsr_n_s64))) svint64_t svinsr_n_s64(svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svinsr_n_s16))) svint16_t svinsr_n_s16(svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlasta_u8))) uint8_t svlasta_u8(svbool_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlasta_u32))) uint32_t svlasta_u32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlasta_u64))) uint64_t svlasta_u64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlasta_u16))) uint16_t svlasta_u16(svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlasta_s8))) int8_t svlasta_s8(svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlasta_f64))) float64_t svlasta_f64(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlasta_f32))) float32_t svlasta_f32(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlasta_f16))) float16_t svlasta_f16(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlasta_s32))) int32_t svlasta_s32(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlasta_s64))) int64_t svlasta_s64(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlasta_s16))) int16_t svlasta_s16(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlastb_u8))) uint8_t svlastb_u8(svbool_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlastb_u32))) uint32_t svlastb_u32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlastb_u64))) uint64_t svlastb_u64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlastb_u16))) uint16_t svlastb_u16(svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlastb_s8))) int8_t svlastb_s8(svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlastb_f64))) float64_t svlastb_f64(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlastb_f32))) float32_t svlastb_f32(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlastb_f16))) float16_t svlastb_f16(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlastb_s32))) int32_t svlastb_s32(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlastb_s64))) int64_t svlastb_s64(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlastb_s16))) int16_t svlastb_s16(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u8))) svuint8_t svld1_u8(svbool_t, uint8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u32))) svuint32_t svld1_u32(svbool_t, uint32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u64))) svuint64_t svld1_u64(svbool_t, uint64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u16))) svuint16_t svld1_u16(svbool_t, uint16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s8))) svint8_t svld1_s8(svbool_t, int8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f64))) svfloat64_t svld1_f64(svbool_t, float64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f32))) svfloat32_t svld1_f32(svbool_t, float32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f16))) svfloat16_t svld1_f16(svbool_t, float16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s32))) svint32_t svld1_s32(svbool_t, int32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s64))) svint64_t svld1_s64(svbool_t, int64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s16))) svint16_t svld1_s16(svbool_t, int16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u32base_index_u32))) svuint32_t svld1_gather_u32base_index_u32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u64base_index_u64))) svuint64_t svld1_gather_u64base_index_u64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u64base_index_f64))) svfloat64_t svld1_gather_u64base_index_f64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u32base_index_f32))) svfloat32_t svld1_gather_u32base_index_f32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u32base_index_s32))) svint32_t svld1_gather_u32base_index_s32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u64base_index_s64))) svint64_t svld1_gather_u64base_index_s64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u32base_offset_u32))) svuint32_t svld1_gather_u32base_offset_u32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u64base_offset_u64))) svuint64_t svld1_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u64base_offset_f64))) svfloat64_t svld1_gather_u64base_offset_f64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u32base_offset_f32))) svfloat32_t svld1_gather_u32base_offset_f32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u32base_offset_s32))) svint32_t svld1_gather_u32base_offset_s32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u64base_offset_s64))) svint64_t svld1_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u32base_u32))) svuint32_t svld1_gather_u32base_u32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u64base_u64))) svuint64_t svld1_gather_u64base_u64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u64base_f64))) svfloat64_t svld1_gather_u64base_f64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u32base_f32))) svfloat32_t svld1_gather_u32base_f32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u32base_s32))) svint32_t svld1_gather_u32base_s32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u64base_s64))) svint64_t svld1_gather_u64base_s64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_s32index_u32))) svuint32_t svld1_gather_s32index_u32(svbool_t, uint32_t const *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_s32index_f32))) svfloat32_t svld1_gather_s32index_f32(svbool_t, float32_t const *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_s32index_s32))) svint32_t svld1_gather_s32index_s32(svbool_t, int32_t const *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u32index_u32))) svuint32_t svld1_gather_u32index_u32(svbool_t, uint32_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u32index_f32))) svfloat32_t svld1_gather_u32index_f32(svbool_t, float32_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u32index_s32))) svint32_t svld1_gather_u32index_s32(svbool_t, int32_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_s64index_u64))) svuint64_t svld1_gather_s64index_u64(svbool_t, uint64_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_s64index_f64))) svfloat64_t svld1_gather_s64index_f64(svbool_t, float64_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_s64index_s64))) svint64_t svld1_gather_s64index_s64(svbool_t, int64_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u64index_u64))) svuint64_t svld1_gather_u64index_u64(svbool_t, uint64_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u64index_f64))) svfloat64_t svld1_gather_u64index_f64(svbool_t, float64_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u64index_s64))) svint64_t svld1_gather_u64index_s64(svbool_t, int64_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_s32offset_u32))) svuint32_t svld1_gather_s32offset_u32(svbool_t, uint32_t const *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_s32offset_f32))) svfloat32_t svld1_gather_s32offset_f32(svbool_t, float32_t const *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_s32offset_s32))) svint32_t svld1_gather_s32offset_s32(svbool_t, int32_t const *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u32offset_u32))) svuint32_t svld1_gather_u32offset_u32(svbool_t, uint32_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u32offset_f32))) svfloat32_t svld1_gather_u32offset_f32(svbool_t, float32_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u32offset_s32))) svint32_t svld1_gather_u32offset_s32(svbool_t, int32_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_s64offset_u64))) svuint64_t svld1_gather_s64offset_u64(svbool_t, uint64_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_s64offset_f64))) svfloat64_t svld1_gather_s64offset_f64(svbool_t, float64_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_s64offset_s64))) svint64_t svld1_gather_s64offset_s64(svbool_t, int64_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u64offset_u64))) svuint64_t svld1_gather_u64offset_u64(svbool_t, uint64_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u64offset_f64))) svfloat64_t svld1_gather_u64offset_f64(svbool_t, float64_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u64offset_s64))) svint64_t svld1_gather_u64offset_s64(svbool_t, int64_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u8))) svuint8_t svld1_vnum_u8(svbool_t, uint8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u32))) svuint32_t svld1_vnum_u32(svbool_t, uint32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u64))) svuint64_t svld1_vnum_u64(svbool_t, uint64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u16))) svuint16_t svld1_vnum_u16(svbool_t, uint16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s8))) svint8_t svld1_vnum_s8(svbool_t, int8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f64))) svfloat64_t svld1_vnum_f64(svbool_t, float64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f32))) svfloat32_t svld1_vnum_f32(svbool_t, float32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f16))) svfloat16_t svld1_vnum_f16(svbool_t, float16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s32))) svint32_t svld1_vnum_s32(svbool_t, int32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s64))) svint64_t svld1_vnum_s64(svbool_t, int64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s16))) svint16_t svld1_vnum_s16(svbool_t, int16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1rq_u8))) svuint8_t svld1rq_u8(svbool_t, uint8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1rq_u32))) svuint32_t svld1rq_u32(svbool_t, uint32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1rq_u64))) svuint64_t svld1rq_u64(svbool_t, uint64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1rq_u16))) svuint16_t svld1rq_u16(svbool_t, uint16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1rq_s8))) svint8_t svld1rq_s8(svbool_t, int8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1rq_f64))) svfloat64_t svld1rq_f64(svbool_t, float64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1rq_f32))) svfloat32_t svld1rq_f32(svbool_t, float32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1rq_f16))) svfloat16_t svld1rq_f16(svbool_t, float16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1rq_s32))) svint32_t svld1rq_s32(svbool_t, int32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1rq_s64))) svint64_t svld1rq_s64(svbool_t, int64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1rq_s16))) svint16_t svld1rq_s16(svbool_t, int16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_gather_u32base_offset_u32))) svuint32_t svld1sb_gather_u32base_offset_u32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_gather_u64base_offset_u64))) svuint64_t svld1sb_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_gather_u32base_offset_s32))) svint32_t svld1sb_gather_u32base_offset_s32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_gather_u64base_offset_s64))) svint64_t svld1sb_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_gather_u32base_u32))) svuint32_t svld1sb_gather_u32base_u32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_gather_u64base_u64))) svuint64_t svld1sb_gather_u64base_u64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_gather_u32base_s32))) svint32_t svld1sb_gather_u32base_s32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_gather_u64base_s64))) svint64_t svld1sb_gather_u64base_s64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_gather_s32offset_u32))) svuint32_t svld1sb_gather_s32offset_u32(svbool_t, int8_t const *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_gather_s32offset_s32))) svint32_t svld1sb_gather_s32offset_s32(svbool_t, int8_t const *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_gather_u32offset_u32))) svuint32_t svld1sb_gather_u32offset_u32(svbool_t, int8_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_gather_u32offset_s32))) svint32_t svld1sb_gather_u32offset_s32(svbool_t, int8_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_gather_s64offset_u64))) svuint64_t svld1sb_gather_s64offset_u64(svbool_t, int8_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_gather_s64offset_s64))) svint64_t svld1sb_gather_s64offset_s64(svbool_t, int8_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_gather_u64offset_u64))) svuint64_t svld1sb_gather_u64offset_u64(svbool_t, int8_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_gather_u64offset_s64))) svint64_t svld1sb_gather_u64offset_s64(svbool_t, int8_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_vnum_u32))) svuint32_t svld1sb_vnum_u32(svbool_t, int8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_vnum_u64))) svuint64_t svld1sb_vnum_u64(svbool_t, int8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_vnum_u16))) svuint16_t svld1sb_vnum_u16(svbool_t, int8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_vnum_s32))) svint32_t svld1sb_vnum_s32(svbool_t, int8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_vnum_s64))) svint64_t svld1sb_vnum_s64(svbool_t, int8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_vnum_s16))) svint16_t svld1sb_vnum_s16(svbool_t, int8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_u32))) svuint32_t svld1sb_u32(svbool_t, int8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_u64))) svuint64_t svld1sb_u64(svbool_t, int8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_u16))) svuint16_t svld1sb_u16(svbool_t, int8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_s32))) svint32_t svld1sb_s32(svbool_t, int8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_s64))) svint64_t svld1sb_s64(svbool_t, int8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_s16))) svint16_t svld1sb_s16(svbool_t, int8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u32base_index_u32))) svuint32_t svld1sh_gather_u32base_index_u32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u64base_index_u64))) svuint64_t svld1sh_gather_u64base_index_u64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u32base_index_s32))) svint32_t svld1sh_gather_u32base_index_s32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u64base_index_s64))) svint64_t svld1sh_gather_u64base_index_s64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u32base_offset_u32))) svuint32_t svld1sh_gather_u32base_offset_u32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u64base_offset_u64))) svuint64_t svld1sh_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u32base_offset_s32))) svint32_t svld1sh_gather_u32base_offset_s32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u64base_offset_s64))) svint64_t svld1sh_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u32base_u32))) svuint32_t svld1sh_gather_u32base_u32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u64base_u64))) svuint64_t svld1sh_gather_u64base_u64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u32base_s32))) svint32_t svld1sh_gather_u32base_s32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u64base_s64))) svint64_t svld1sh_gather_u64base_s64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_s32index_u32))) svuint32_t svld1sh_gather_s32index_u32(svbool_t, int16_t const *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_s32index_s32))) svint32_t svld1sh_gather_s32index_s32(svbool_t, int16_t const *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u32index_u32))) svuint32_t svld1sh_gather_u32index_u32(svbool_t, int16_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u32index_s32))) svint32_t svld1sh_gather_u32index_s32(svbool_t, int16_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_s64index_u64))) svuint64_t svld1sh_gather_s64index_u64(svbool_t, int16_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_s64index_s64))) svint64_t svld1sh_gather_s64index_s64(svbool_t, int16_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u64index_u64))) svuint64_t svld1sh_gather_u64index_u64(svbool_t, int16_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u64index_s64))) svint64_t svld1sh_gather_u64index_s64(svbool_t, int16_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_s32offset_u32))) svuint32_t svld1sh_gather_s32offset_u32(svbool_t, int16_t const *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_s32offset_s32))) svint32_t svld1sh_gather_s32offset_s32(svbool_t, int16_t const *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u32offset_u32))) svuint32_t svld1sh_gather_u32offset_u32(svbool_t, int16_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u32offset_s32))) svint32_t svld1sh_gather_u32offset_s32(svbool_t, int16_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_s64offset_u64))) svuint64_t svld1sh_gather_s64offset_u64(svbool_t, int16_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_s64offset_s64))) svint64_t svld1sh_gather_s64offset_s64(svbool_t, int16_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u64offset_u64))) svuint64_t svld1sh_gather_u64offset_u64(svbool_t, int16_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u64offset_s64))) svint64_t svld1sh_gather_u64offset_s64(svbool_t, int16_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_vnum_u32))) svuint32_t svld1sh_vnum_u32(svbool_t, int16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_vnum_u64))) svuint64_t svld1sh_vnum_u64(svbool_t, int16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_vnum_s32))) svint32_t svld1sh_vnum_s32(svbool_t, int16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_vnum_s64))) svint64_t svld1sh_vnum_s64(svbool_t, int16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_u32))) svuint32_t svld1sh_u32(svbool_t, int16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_u64))) svuint64_t svld1sh_u64(svbool_t, int16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_s32))) svint32_t svld1sh_s32(svbool_t, int16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_s64))) svint64_t svld1sh_s64(svbool_t, int16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sw_gather_u64base_index_u64))) svuint64_t svld1sw_gather_u64base_index_u64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sw_gather_u64base_index_s64))) svint64_t svld1sw_gather_u64base_index_s64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sw_gather_u64base_offset_u64))) svuint64_t svld1sw_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sw_gather_u64base_offset_s64))) svint64_t svld1sw_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sw_gather_u64base_u64))) svuint64_t svld1sw_gather_u64base_u64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sw_gather_u64base_s64))) svint64_t svld1sw_gather_u64base_s64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sw_gather_s64index_u64))) svuint64_t svld1sw_gather_s64index_u64(svbool_t, int32_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sw_gather_s64index_s64))) svint64_t svld1sw_gather_s64index_s64(svbool_t, int32_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sw_gather_u64index_u64))) svuint64_t svld1sw_gather_u64index_u64(svbool_t, int32_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sw_gather_u64index_s64))) svint64_t svld1sw_gather_u64index_s64(svbool_t, int32_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sw_gather_s64offset_u64))) svuint64_t svld1sw_gather_s64offset_u64(svbool_t, int32_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sw_gather_s64offset_s64))) svint64_t svld1sw_gather_s64offset_s64(svbool_t, int32_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sw_gather_u64offset_u64))) svuint64_t svld1sw_gather_u64offset_u64(svbool_t, int32_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sw_gather_u64offset_s64))) svint64_t svld1sw_gather_u64offset_s64(svbool_t, int32_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sw_vnum_u64))) svuint64_t svld1sw_vnum_u64(svbool_t, int32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sw_vnum_s64))) svint64_t svld1sw_vnum_s64(svbool_t, int32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sw_u64))) svuint64_t svld1sw_u64(svbool_t, int32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sw_s64))) svint64_t svld1sw_s64(svbool_t, int32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_gather_u32base_offset_u32))) svuint32_t svld1ub_gather_u32base_offset_u32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_gather_u64base_offset_u64))) svuint64_t svld1ub_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_gather_u32base_offset_s32))) svint32_t svld1ub_gather_u32base_offset_s32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_gather_u64base_offset_s64))) svint64_t svld1ub_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_gather_u32base_u32))) svuint32_t svld1ub_gather_u32base_u32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_gather_u64base_u64))) svuint64_t svld1ub_gather_u64base_u64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_gather_u32base_s32))) svint32_t svld1ub_gather_u32base_s32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_gather_u64base_s64))) svint64_t svld1ub_gather_u64base_s64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_gather_s32offset_u32))) svuint32_t svld1ub_gather_s32offset_u32(svbool_t, uint8_t const *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_gather_s32offset_s32))) svint32_t svld1ub_gather_s32offset_s32(svbool_t, uint8_t const *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_gather_u32offset_u32))) svuint32_t svld1ub_gather_u32offset_u32(svbool_t, uint8_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_gather_u32offset_s32))) svint32_t svld1ub_gather_u32offset_s32(svbool_t, uint8_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_gather_s64offset_u64))) svuint64_t svld1ub_gather_s64offset_u64(svbool_t, uint8_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_gather_s64offset_s64))) svint64_t svld1ub_gather_s64offset_s64(svbool_t, uint8_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_gather_u64offset_u64))) svuint64_t svld1ub_gather_u64offset_u64(svbool_t, uint8_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_gather_u64offset_s64))) svint64_t svld1ub_gather_u64offset_s64(svbool_t, uint8_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_vnum_u32))) svuint32_t svld1ub_vnum_u32(svbool_t, uint8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_vnum_u64))) svuint64_t svld1ub_vnum_u64(svbool_t, uint8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_vnum_u16))) svuint16_t svld1ub_vnum_u16(svbool_t, uint8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_vnum_s32))) svint32_t svld1ub_vnum_s32(svbool_t, uint8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_vnum_s64))) svint64_t svld1ub_vnum_s64(svbool_t, uint8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_vnum_s16))) svint16_t svld1ub_vnum_s16(svbool_t, uint8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_u32))) svuint32_t svld1ub_u32(svbool_t, uint8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_u64))) svuint64_t svld1ub_u64(svbool_t, uint8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_u16))) svuint16_t svld1ub_u16(svbool_t, uint8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_s32))) svint32_t svld1ub_s32(svbool_t, uint8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_s64))) svint64_t svld1ub_s64(svbool_t, uint8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_s16))) svint16_t svld1ub_s16(svbool_t, uint8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u32base_index_u32))) svuint32_t svld1uh_gather_u32base_index_u32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u64base_index_u64))) svuint64_t svld1uh_gather_u64base_index_u64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u32base_index_s32))) svint32_t svld1uh_gather_u32base_index_s32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u64base_index_s64))) svint64_t svld1uh_gather_u64base_index_s64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u32base_offset_u32))) svuint32_t svld1uh_gather_u32base_offset_u32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u64base_offset_u64))) svuint64_t svld1uh_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u32base_offset_s32))) svint32_t svld1uh_gather_u32base_offset_s32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u64base_offset_s64))) svint64_t svld1uh_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u32base_u32))) svuint32_t svld1uh_gather_u32base_u32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u64base_u64))) svuint64_t svld1uh_gather_u64base_u64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u32base_s32))) svint32_t svld1uh_gather_u32base_s32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u64base_s64))) svint64_t svld1uh_gather_u64base_s64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_s32index_u32))) svuint32_t svld1uh_gather_s32index_u32(svbool_t, uint16_t const *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_s32index_s32))) svint32_t svld1uh_gather_s32index_s32(svbool_t, uint16_t const *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u32index_u32))) svuint32_t svld1uh_gather_u32index_u32(svbool_t, uint16_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u32index_s32))) svint32_t svld1uh_gather_u32index_s32(svbool_t, uint16_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_s64index_u64))) svuint64_t svld1uh_gather_s64index_u64(svbool_t, uint16_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_s64index_s64))) svint64_t svld1uh_gather_s64index_s64(svbool_t, uint16_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u64index_u64))) svuint64_t svld1uh_gather_u64index_u64(svbool_t, uint16_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u64index_s64))) svint64_t svld1uh_gather_u64index_s64(svbool_t, uint16_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_s32offset_u32))) svuint32_t svld1uh_gather_s32offset_u32(svbool_t, uint16_t const *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_s32offset_s32))) svint32_t svld1uh_gather_s32offset_s32(svbool_t, uint16_t const *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u32offset_u32))) svuint32_t svld1uh_gather_u32offset_u32(svbool_t, uint16_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u32offset_s32))) svint32_t svld1uh_gather_u32offset_s32(svbool_t, uint16_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_s64offset_u64))) svuint64_t svld1uh_gather_s64offset_u64(svbool_t, uint16_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_s64offset_s64))) svint64_t svld1uh_gather_s64offset_s64(svbool_t, uint16_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u64offset_u64))) svuint64_t svld1uh_gather_u64offset_u64(svbool_t, uint16_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u64offset_s64))) svint64_t svld1uh_gather_u64offset_s64(svbool_t, uint16_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_vnum_u32))) svuint32_t svld1uh_vnum_u32(svbool_t, uint16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_vnum_u64))) svuint64_t svld1uh_vnum_u64(svbool_t, uint16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_vnum_s32))) svint32_t svld1uh_vnum_s32(svbool_t, uint16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_vnum_s64))) svint64_t svld1uh_vnum_s64(svbool_t, uint16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_u32))) svuint32_t svld1uh_u32(svbool_t, uint16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_u64))) svuint64_t svld1uh_u64(svbool_t, uint16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_s32))) svint32_t svld1uh_s32(svbool_t, uint16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_s64))) svint64_t svld1uh_s64(svbool_t, uint16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uw_gather_u64base_index_u64))) svuint64_t svld1uw_gather_u64base_index_u64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uw_gather_u64base_index_s64))) svint64_t svld1uw_gather_u64base_index_s64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uw_gather_u64base_offset_u64))) svuint64_t svld1uw_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uw_gather_u64base_offset_s64))) svint64_t svld1uw_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uw_gather_u64base_u64))) svuint64_t svld1uw_gather_u64base_u64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uw_gather_u64base_s64))) svint64_t svld1uw_gather_u64base_s64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uw_gather_s64index_u64))) svuint64_t svld1uw_gather_s64index_u64(svbool_t, uint32_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uw_gather_s64index_s64))) svint64_t svld1uw_gather_s64index_s64(svbool_t, uint32_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uw_gather_u64index_u64))) svuint64_t svld1uw_gather_u64index_u64(svbool_t, uint32_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uw_gather_u64index_s64))) svint64_t svld1uw_gather_u64index_s64(svbool_t, uint32_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uw_gather_s64offset_u64))) svuint64_t svld1uw_gather_s64offset_u64(svbool_t, uint32_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uw_gather_s64offset_s64))) svint64_t svld1uw_gather_s64offset_s64(svbool_t, uint32_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uw_gather_u64offset_u64))) svuint64_t svld1uw_gather_u64offset_u64(svbool_t, uint32_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uw_gather_u64offset_s64))) svint64_t svld1uw_gather_u64offset_s64(svbool_t, uint32_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uw_vnum_u64))) svuint64_t svld1uw_vnum_u64(svbool_t, uint32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uw_vnum_s64))) svint64_t svld1uw_vnum_s64(svbool_t, uint32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uw_u64))) svuint64_t svld1uw_u64(svbool_t, uint32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uw_s64))) svint64_t svld1uw_s64(svbool_t, uint32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_u8))) svuint8x2_t svld2_u8(svbool_t, uint8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_u32))) svuint32x2_t svld2_u32(svbool_t, uint32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_u64))) svuint64x2_t svld2_u64(svbool_t, uint64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_u16))) svuint16x2_t svld2_u16(svbool_t, uint16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_s8))) svint8x2_t svld2_s8(svbool_t, int8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_f64))) svfloat64x2_t svld2_f64(svbool_t, float64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_f32))) svfloat32x2_t svld2_f32(svbool_t, float32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_f16))) svfloat16x2_t svld2_f16(svbool_t, float16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_s32))) svint32x2_t svld2_s32(svbool_t, int32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_s64))) svint64x2_t svld2_s64(svbool_t, int64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_s16))) svint16x2_t svld2_s16(svbool_t, int16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_vnum_u8))) svuint8x2_t svld2_vnum_u8(svbool_t, uint8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_vnum_u32))) svuint32x2_t svld2_vnum_u32(svbool_t, uint32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_vnum_u64))) svuint64x2_t svld2_vnum_u64(svbool_t, uint64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_vnum_u16))) svuint16x2_t svld2_vnum_u16(svbool_t, uint16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_vnum_s8))) svint8x2_t svld2_vnum_s8(svbool_t, int8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_vnum_f64))) svfloat64x2_t svld2_vnum_f64(svbool_t, float64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_vnum_f32))) svfloat32x2_t svld2_vnum_f32(svbool_t, float32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_vnum_f16))) svfloat16x2_t svld2_vnum_f16(svbool_t, float16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_vnum_s32))) svint32x2_t svld2_vnum_s32(svbool_t, int32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_vnum_s64))) svint64x2_t svld2_vnum_s64(svbool_t, int64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_vnum_s16))) svint16x2_t svld2_vnum_s16(svbool_t, int16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_u8))) svuint8x3_t svld3_u8(svbool_t, uint8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_u32))) svuint32x3_t svld3_u32(svbool_t, uint32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_u64))) svuint64x3_t svld3_u64(svbool_t, uint64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_u16))) svuint16x3_t svld3_u16(svbool_t, uint16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_s8))) svint8x3_t svld3_s8(svbool_t, int8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_f64))) svfloat64x3_t svld3_f64(svbool_t, float64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_f32))) svfloat32x3_t svld3_f32(svbool_t, float32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_f16))) svfloat16x3_t svld3_f16(svbool_t, float16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_s32))) svint32x3_t svld3_s32(svbool_t, int32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_s64))) svint64x3_t svld3_s64(svbool_t, int64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_s16))) svint16x3_t svld3_s16(svbool_t, int16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_vnum_u8))) svuint8x3_t svld3_vnum_u8(svbool_t, uint8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_vnum_u32))) svuint32x3_t svld3_vnum_u32(svbool_t, uint32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_vnum_u64))) svuint64x3_t svld3_vnum_u64(svbool_t, uint64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_vnum_u16))) svuint16x3_t svld3_vnum_u16(svbool_t, uint16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_vnum_s8))) svint8x3_t svld3_vnum_s8(svbool_t, int8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_vnum_f64))) svfloat64x3_t svld3_vnum_f64(svbool_t, float64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_vnum_f32))) svfloat32x3_t svld3_vnum_f32(svbool_t, float32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_vnum_f16))) svfloat16x3_t svld3_vnum_f16(svbool_t, float16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_vnum_s32))) svint32x3_t svld3_vnum_s32(svbool_t, int32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_vnum_s64))) svint64x3_t svld3_vnum_s64(svbool_t, int64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_vnum_s16))) svint16x3_t svld3_vnum_s16(svbool_t, int16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_u8))) svuint8x4_t svld4_u8(svbool_t, uint8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_u32))) svuint32x4_t svld4_u32(svbool_t, uint32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_u64))) svuint64x4_t svld4_u64(svbool_t, uint64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_u16))) svuint16x4_t svld4_u16(svbool_t, uint16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_s8))) svint8x4_t svld4_s8(svbool_t, int8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_f64))) svfloat64x4_t svld4_f64(svbool_t, float64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_f32))) svfloat32x4_t svld4_f32(svbool_t, float32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_f16))) svfloat16x4_t svld4_f16(svbool_t, float16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_s32))) svint32x4_t svld4_s32(svbool_t, int32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_s64))) svint64x4_t svld4_s64(svbool_t, int64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_s16))) svint16x4_t svld4_s16(svbool_t, int16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_vnum_u8))) svuint8x4_t svld4_vnum_u8(svbool_t, uint8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_vnum_u32))) svuint32x4_t svld4_vnum_u32(svbool_t, uint32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_vnum_u64))) svuint64x4_t svld4_vnum_u64(svbool_t, uint64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_vnum_u16))) svuint16x4_t svld4_vnum_u16(svbool_t, uint16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_vnum_s8))) svint8x4_t svld4_vnum_s8(svbool_t, int8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_vnum_f64))) svfloat64x4_t svld4_vnum_f64(svbool_t, float64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_vnum_f32))) svfloat32x4_t svld4_vnum_f32(svbool_t, float32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_vnum_f16))) svfloat16x4_t svld4_vnum_f16(svbool_t, float16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_vnum_s32))) svint32x4_t svld4_vnum_s32(svbool_t, int32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_vnum_s64))) svint64x4_t svld4_vnum_s64(svbool_t, int64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_vnum_s16))) svint16x4_t svld4_vnum_s16(svbool_t, int16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_u8))) svuint8_t svldff1_u8(svbool_t, uint8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_u32))) svuint32_t svldff1_u32(svbool_t, uint32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_u64))) svuint64_t svldff1_u64(svbool_t, uint64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_u16))) svuint16_t svldff1_u16(svbool_t, uint16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_s8))) svint8_t svldff1_s8(svbool_t, int8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_f64))) svfloat64_t svldff1_f64(svbool_t, float64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_f32))) svfloat32_t svldff1_f32(svbool_t, float32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_f16))) svfloat16_t svldff1_f16(svbool_t, float16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_s32))) svint32_t svldff1_s32(svbool_t, int32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_s64))) svint64_t svldff1_s64(svbool_t, int64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_s16))) svint16_t svldff1_s16(svbool_t, int16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u32base_index_u32))) svuint32_t svldff1_gather_u32base_index_u32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u64base_index_u64))) svuint64_t svldff1_gather_u64base_index_u64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u64base_index_f64))) svfloat64_t svldff1_gather_u64base_index_f64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u32base_index_f32))) svfloat32_t svldff1_gather_u32base_index_f32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u32base_index_s32))) svint32_t svldff1_gather_u32base_index_s32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u64base_index_s64))) svint64_t svldff1_gather_u64base_index_s64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u32base_offset_u32))) svuint32_t svldff1_gather_u32base_offset_u32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u64base_offset_u64))) svuint64_t svldff1_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u64base_offset_f64))) svfloat64_t svldff1_gather_u64base_offset_f64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u32base_offset_f32))) svfloat32_t svldff1_gather_u32base_offset_f32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u32base_offset_s32))) svint32_t svldff1_gather_u32base_offset_s32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u64base_offset_s64))) svint64_t svldff1_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u32base_u32))) svuint32_t svldff1_gather_u32base_u32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u64base_u64))) svuint64_t svldff1_gather_u64base_u64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u64base_f64))) svfloat64_t svldff1_gather_u64base_f64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u32base_f32))) svfloat32_t svldff1_gather_u32base_f32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u32base_s32))) svint32_t svldff1_gather_u32base_s32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u64base_s64))) svint64_t svldff1_gather_u64base_s64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_s32index_u32))) svuint32_t svldff1_gather_s32index_u32(svbool_t, uint32_t const *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_s32index_f32))) svfloat32_t svldff1_gather_s32index_f32(svbool_t, float32_t const *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_s32index_s32))) svint32_t svldff1_gather_s32index_s32(svbool_t, int32_t const *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u32index_u32))) svuint32_t svldff1_gather_u32index_u32(svbool_t, uint32_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u32index_f32))) svfloat32_t svldff1_gather_u32index_f32(svbool_t, float32_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u32index_s32))) svint32_t svldff1_gather_u32index_s32(svbool_t, int32_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_s64index_u64))) svuint64_t svldff1_gather_s64index_u64(svbool_t, uint64_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_s64index_f64))) svfloat64_t svldff1_gather_s64index_f64(svbool_t, float64_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_s64index_s64))) svint64_t svldff1_gather_s64index_s64(svbool_t, int64_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u64index_u64))) svuint64_t svldff1_gather_u64index_u64(svbool_t, uint64_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u64index_f64))) svfloat64_t svldff1_gather_u64index_f64(svbool_t, float64_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u64index_s64))) svint64_t svldff1_gather_u64index_s64(svbool_t, int64_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_s32offset_u32))) svuint32_t svldff1_gather_s32offset_u32(svbool_t, uint32_t const *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_s32offset_f32))) svfloat32_t svldff1_gather_s32offset_f32(svbool_t, float32_t const *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_s32offset_s32))) svint32_t svldff1_gather_s32offset_s32(svbool_t, int32_t const *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u32offset_u32))) svuint32_t svldff1_gather_u32offset_u32(svbool_t, uint32_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u32offset_f32))) svfloat32_t svldff1_gather_u32offset_f32(svbool_t, float32_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u32offset_s32))) svint32_t svldff1_gather_u32offset_s32(svbool_t, int32_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_s64offset_u64))) svuint64_t svldff1_gather_s64offset_u64(svbool_t, uint64_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_s64offset_f64))) svfloat64_t svldff1_gather_s64offset_f64(svbool_t, float64_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_s64offset_s64))) svint64_t svldff1_gather_s64offset_s64(svbool_t, int64_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u64offset_u64))) svuint64_t svldff1_gather_u64offset_u64(svbool_t, uint64_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u64offset_f64))) svfloat64_t svldff1_gather_u64offset_f64(svbool_t, float64_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u64offset_s64))) svint64_t svldff1_gather_u64offset_s64(svbool_t, int64_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_vnum_u8))) svuint8_t svldff1_vnum_u8(svbool_t, uint8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_vnum_u32))) svuint32_t svldff1_vnum_u32(svbool_t, uint32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_vnum_u64))) svuint64_t svldff1_vnum_u64(svbool_t, uint64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_vnum_u16))) svuint16_t svldff1_vnum_u16(svbool_t, uint16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_vnum_s8))) svint8_t svldff1_vnum_s8(svbool_t, int8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_vnum_f64))) svfloat64_t svldff1_vnum_f64(svbool_t, float64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_vnum_f32))) svfloat32_t svldff1_vnum_f32(svbool_t, float32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_vnum_f16))) svfloat16_t svldff1_vnum_f16(svbool_t, float16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_vnum_s32))) svint32_t svldff1_vnum_s32(svbool_t, int32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_vnum_s64))) svint64_t svldff1_vnum_s64(svbool_t, int64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_vnum_s16))) svint16_t svldff1_vnum_s16(svbool_t, int16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_gather_u32base_offset_u32))) svuint32_t svldff1sb_gather_u32base_offset_u32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_gather_u64base_offset_u64))) svuint64_t svldff1sb_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_gather_u32base_offset_s32))) svint32_t svldff1sb_gather_u32base_offset_s32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_gather_u64base_offset_s64))) svint64_t svldff1sb_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_gather_u32base_u32))) svuint32_t svldff1sb_gather_u32base_u32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_gather_u64base_u64))) svuint64_t svldff1sb_gather_u64base_u64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_gather_u32base_s32))) svint32_t svldff1sb_gather_u32base_s32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_gather_u64base_s64))) svint64_t svldff1sb_gather_u64base_s64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_gather_s32offset_u32))) svuint32_t svldff1sb_gather_s32offset_u32(svbool_t, int8_t const *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_gather_s32offset_s32))) svint32_t svldff1sb_gather_s32offset_s32(svbool_t, int8_t const *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_gather_u32offset_u32))) svuint32_t svldff1sb_gather_u32offset_u32(svbool_t, int8_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_gather_u32offset_s32))) svint32_t svldff1sb_gather_u32offset_s32(svbool_t, int8_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_gather_s64offset_u64))) svuint64_t svldff1sb_gather_s64offset_u64(svbool_t, int8_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_gather_s64offset_s64))) svint64_t svldff1sb_gather_s64offset_s64(svbool_t, int8_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_gather_u64offset_u64))) svuint64_t svldff1sb_gather_u64offset_u64(svbool_t, int8_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_gather_u64offset_s64))) svint64_t svldff1sb_gather_u64offset_s64(svbool_t, int8_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_vnum_u32))) svuint32_t svldff1sb_vnum_u32(svbool_t, int8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_vnum_u64))) svuint64_t svldff1sb_vnum_u64(svbool_t, int8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_vnum_u16))) svuint16_t svldff1sb_vnum_u16(svbool_t, int8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_vnum_s32))) svint32_t svldff1sb_vnum_s32(svbool_t, int8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_vnum_s64))) svint64_t svldff1sb_vnum_s64(svbool_t, int8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_vnum_s16))) svint16_t svldff1sb_vnum_s16(svbool_t, int8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_u32))) svuint32_t svldff1sb_u32(svbool_t, int8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_u64))) svuint64_t svldff1sb_u64(svbool_t, int8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_u16))) svuint16_t svldff1sb_u16(svbool_t, int8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_s32))) svint32_t svldff1sb_s32(svbool_t, int8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_s64))) svint64_t svldff1sb_s64(svbool_t, int8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_s16))) svint16_t svldff1sb_s16(svbool_t, int8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u32base_index_u32))) svuint32_t svldff1sh_gather_u32base_index_u32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u64base_index_u64))) svuint64_t svldff1sh_gather_u64base_index_u64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u32base_index_s32))) svint32_t svldff1sh_gather_u32base_index_s32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u64base_index_s64))) svint64_t svldff1sh_gather_u64base_index_s64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u32base_offset_u32))) svuint32_t svldff1sh_gather_u32base_offset_u32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u64base_offset_u64))) svuint64_t svldff1sh_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u32base_offset_s32))) svint32_t svldff1sh_gather_u32base_offset_s32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u64base_offset_s64))) svint64_t svldff1sh_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u32base_u32))) svuint32_t svldff1sh_gather_u32base_u32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u64base_u64))) svuint64_t svldff1sh_gather_u64base_u64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u32base_s32))) svint32_t svldff1sh_gather_u32base_s32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u64base_s64))) svint64_t svldff1sh_gather_u64base_s64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_s32index_u32))) svuint32_t svldff1sh_gather_s32index_u32(svbool_t, int16_t const *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_s32index_s32))) svint32_t svldff1sh_gather_s32index_s32(svbool_t, int16_t const *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u32index_u32))) svuint32_t svldff1sh_gather_u32index_u32(svbool_t, int16_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u32index_s32))) svint32_t svldff1sh_gather_u32index_s32(svbool_t, int16_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_s64index_u64))) svuint64_t svldff1sh_gather_s64index_u64(svbool_t, int16_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_s64index_s64))) svint64_t svldff1sh_gather_s64index_s64(svbool_t, int16_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u64index_u64))) svuint64_t svldff1sh_gather_u64index_u64(svbool_t, int16_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u64index_s64))) svint64_t svldff1sh_gather_u64index_s64(svbool_t, int16_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_s32offset_u32))) svuint32_t svldff1sh_gather_s32offset_u32(svbool_t, int16_t const *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_s32offset_s32))) svint32_t svldff1sh_gather_s32offset_s32(svbool_t, int16_t const *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u32offset_u32))) svuint32_t svldff1sh_gather_u32offset_u32(svbool_t, int16_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u32offset_s32))) svint32_t svldff1sh_gather_u32offset_s32(svbool_t, int16_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_s64offset_u64))) svuint64_t svldff1sh_gather_s64offset_u64(svbool_t, int16_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_s64offset_s64))) svint64_t svldff1sh_gather_s64offset_s64(svbool_t, int16_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u64offset_u64))) svuint64_t svldff1sh_gather_u64offset_u64(svbool_t, int16_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u64offset_s64))) svint64_t svldff1sh_gather_u64offset_s64(svbool_t, int16_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_vnum_u32))) svuint32_t svldff1sh_vnum_u32(svbool_t, int16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_vnum_u64))) svuint64_t svldff1sh_vnum_u64(svbool_t, int16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_vnum_s32))) svint32_t svldff1sh_vnum_s32(svbool_t, int16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_vnum_s64))) svint64_t svldff1sh_vnum_s64(svbool_t, int16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_u32))) svuint32_t svldff1sh_u32(svbool_t, int16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_u64))) svuint64_t svldff1sh_u64(svbool_t, int16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_s32))) svint32_t svldff1sh_s32(svbool_t, int16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_s64))) svint64_t svldff1sh_s64(svbool_t, int16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sw_gather_u64base_index_u64))) svuint64_t svldff1sw_gather_u64base_index_u64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sw_gather_u64base_index_s64))) svint64_t svldff1sw_gather_u64base_index_s64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sw_gather_u64base_offset_u64))) svuint64_t svldff1sw_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sw_gather_u64base_offset_s64))) svint64_t svldff1sw_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sw_gather_u64base_u64))) svuint64_t svldff1sw_gather_u64base_u64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sw_gather_u64base_s64))) svint64_t svldff1sw_gather_u64base_s64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sw_gather_s64index_u64))) svuint64_t svldff1sw_gather_s64index_u64(svbool_t, int32_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sw_gather_s64index_s64))) svint64_t svldff1sw_gather_s64index_s64(svbool_t, int32_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sw_gather_u64index_u64))) svuint64_t svldff1sw_gather_u64index_u64(svbool_t, int32_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sw_gather_u64index_s64))) svint64_t svldff1sw_gather_u64index_s64(svbool_t, int32_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sw_gather_s64offset_u64))) svuint64_t svldff1sw_gather_s64offset_u64(svbool_t, int32_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sw_gather_s64offset_s64))) svint64_t svldff1sw_gather_s64offset_s64(svbool_t, int32_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sw_gather_u64offset_u64))) svuint64_t svldff1sw_gather_u64offset_u64(svbool_t, int32_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sw_gather_u64offset_s64))) svint64_t svldff1sw_gather_u64offset_s64(svbool_t, int32_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sw_vnum_u64))) svuint64_t svldff1sw_vnum_u64(svbool_t, int32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sw_vnum_s64))) svint64_t svldff1sw_vnum_s64(svbool_t, int32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sw_u64))) svuint64_t svldff1sw_u64(svbool_t, int32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sw_s64))) svint64_t svldff1sw_s64(svbool_t, int32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_gather_u32base_offset_u32))) svuint32_t svldff1ub_gather_u32base_offset_u32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_gather_u64base_offset_u64))) svuint64_t svldff1ub_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_gather_u32base_offset_s32))) svint32_t svldff1ub_gather_u32base_offset_s32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_gather_u64base_offset_s64))) svint64_t svldff1ub_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_gather_u32base_u32))) svuint32_t svldff1ub_gather_u32base_u32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_gather_u64base_u64))) svuint64_t svldff1ub_gather_u64base_u64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_gather_u32base_s32))) svint32_t svldff1ub_gather_u32base_s32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_gather_u64base_s64))) svint64_t svldff1ub_gather_u64base_s64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_gather_s32offset_u32))) svuint32_t svldff1ub_gather_s32offset_u32(svbool_t, uint8_t const *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_gather_s32offset_s32))) svint32_t svldff1ub_gather_s32offset_s32(svbool_t, uint8_t const *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_gather_u32offset_u32))) svuint32_t svldff1ub_gather_u32offset_u32(svbool_t, uint8_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_gather_u32offset_s32))) svint32_t svldff1ub_gather_u32offset_s32(svbool_t, uint8_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_gather_s64offset_u64))) svuint64_t svldff1ub_gather_s64offset_u64(svbool_t, uint8_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_gather_s64offset_s64))) svint64_t svldff1ub_gather_s64offset_s64(svbool_t, uint8_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_gather_u64offset_u64))) svuint64_t svldff1ub_gather_u64offset_u64(svbool_t, uint8_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_gather_u64offset_s64))) svint64_t svldff1ub_gather_u64offset_s64(svbool_t, uint8_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_vnum_u32))) svuint32_t svldff1ub_vnum_u32(svbool_t, uint8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_vnum_u64))) svuint64_t svldff1ub_vnum_u64(svbool_t, uint8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_vnum_u16))) svuint16_t svldff1ub_vnum_u16(svbool_t, uint8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_vnum_s32))) svint32_t svldff1ub_vnum_s32(svbool_t, uint8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_vnum_s64))) svint64_t svldff1ub_vnum_s64(svbool_t, uint8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_vnum_s16))) svint16_t svldff1ub_vnum_s16(svbool_t, uint8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_u32))) svuint32_t svldff1ub_u32(svbool_t, uint8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_u64))) svuint64_t svldff1ub_u64(svbool_t, uint8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_u16))) svuint16_t svldff1ub_u16(svbool_t, uint8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_s32))) svint32_t svldff1ub_s32(svbool_t, uint8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_s64))) svint64_t svldff1ub_s64(svbool_t, uint8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_s16))) svint16_t svldff1ub_s16(svbool_t, uint8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u32base_index_u32))) svuint32_t svldff1uh_gather_u32base_index_u32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u64base_index_u64))) svuint64_t svldff1uh_gather_u64base_index_u64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u32base_index_s32))) svint32_t svldff1uh_gather_u32base_index_s32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u64base_index_s64))) svint64_t svldff1uh_gather_u64base_index_s64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u32base_offset_u32))) svuint32_t svldff1uh_gather_u32base_offset_u32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u64base_offset_u64))) svuint64_t svldff1uh_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u32base_offset_s32))) svint32_t svldff1uh_gather_u32base_offset_s32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u64base_offset_s64))) svint64_t svldff1uh_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u32base_u32))) svuint32_t svldff1uh_gather_u32base_u32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u64base_u64))) svuint64_t svldff1uh_gather_u64base_u64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u32base_s32))) svint32_t svldff1uh_gather_u32base_s32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u64base_s64))) svint64_t svldff1uh_gather_u64base_s64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_s32index_u32))) svuint32_t svldff1uh_gather_s32index_u32(svbool_t, uint16_t const *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_s32index_s32))) svint32_t svldff1uh_gather_s32index_s32(svbool_t, uint16_t const *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u32index_u32))) svuint32_t svldff1uh_gather_u32index_u32(svbool_t, uint16_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u32index_s32))) svint32_t svldff1uh_gather_u32index_s32(svbool_t, uint16_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_s64index_u64))) svuint64_t svldff1uh_gather_s64index_u64(svbool_t, uint16_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_s64index_s64))) svint64_t svldff1uh_gather_s64index_s64(svbool_t, uint16_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u64index_u64))) svuint64_t svldff1uh_gather_u64index_u64(svbool_t, uint16_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u64index_s64))) svint64_t svldff1uh_gather_u64index_s64(svbool_t, uint16_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_s32offset_u32))) svuint32_t svldff1uh_gather_s32offset_u32(svbool_t, uint16_t const *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_s32offset_s32))) svint32_t svldff1uh_gather_s32offset_s32(svbool_t, uint16_t const *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u32offset_u32))) svuint32_t svldff1uh_gather_u32offset_u32(svbool_t, uint16_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u32offset_s32))) svint32_t svldff1uh_gather_u32offset_s32(svbool_t, uint16_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_s64offset_u64))) svuint64_t svldff1uh_gather_s64offset_u64(svbool_t, uint16_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_s64offset_s64))) svint64_t svldff1uh_gather_s64offset_s64(svbool_t, uint16_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u64offset_u64))) svuint64_t svldff1uh_gather_u64offset_u64(svbool_t, uint16_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u64offset_s64))) svint64_t svldff1uh_gather_u64offset_s64(svbool_t, uint16_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_vnum_u32))) svuint32_t svldff1uh_vnum_u32(svbool_t, uint16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_vnum_u64))) svuint64_t svldff1uh_vnum_u64(svbool_t, uint16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_vnum_s32))) svint32_t svldff1uh_vnum_s32(svbool_t, uint16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_vnum_s64))) svint64_t svldff1uh_vnum_s64(svbool_t, uint16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_u32))) svuint32_t svldff1uh_u32(svbool_t, uint16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_u64))) svuint64_t svldff1uh_u64(svbool_t, uint16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_s32))) svint32_t svldff1uh_s32(svbool_t, uint16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_s64))) svint64_t svldff1uh_s64(svbool_t, uint16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uw_gather_u64base_index_u64))) svuint64_t svldff1uw_gather_u64base_index_u64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uw_gather_u64base_index_s64))) svint64_t svldff1uw_gather_u64base_index_s64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uw_gather_u64base_offset_u64))) svuint64_t svldff1uw_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uw_gather_u64base_offset_s64))) svint64_t svldff1uw_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uw_gather_u64base_u64))) svuint64_t svldff1uw_gather_u64base_u64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uw_gather_u64base_s64))) svint64_t svldff1uw_gather_u64base_s64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uw_gather_s64index_u64))) svuint64_t svldff1uw_gather_s64index_u64(svbool_t, uint32_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uw_gather_s64index_s64))) svint64_t svldff1uw_gather_s64index_s64(svbool_t, uint32_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uw_gather_u64index_u64))) svuint64_t svldff1uw_gather_u64index_u64(svbool_t, uint32_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uw_gather_u64index_s64))) svint64_t svldff1uw_gather_u64index_s64(svbool_t, uint32_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uw_gather_s64offset_u64))) svuint64_t svldff1uw_gather_s64offset_u64(svbool_t, uint32_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uw_gather_s64offset_s64))) svint64_t svldff1uw_gather_s64offset_s64(svbool_t, uint32_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uw_gather_u64offset_u64))) svuint64_t svldff1uw_gather_u64offset_u64(svbool_t, uint32_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uw_gather_u64offset_s64))) svint64_t svldff1uw_gather_u64offset_s64(svbool_t, uint32_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uw_vnum_u64))) svuint64_t svldff1uw_vnum_u64(svbool_t, uint32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uw_vnum_s64))) svint64_t svldff1uw_vnum_s64(svbool_t, uint32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uw_u64))) svuint64_t svldff1uw_u64(svbool_t, uint32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uw_s64))) svint64_t svldff1uw_s64(svbool_t, uint32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_u8))) svuint8_t svldnf1_u8(svbool_t, uint8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_u32))) svuint32_t svldnf1_u32(svbool_t, uint32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_u64))) svuint64_t svldnf1_u64(svbool_t, uint64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_u16))) svuint16_t svldnf1_u16(svbool_t, uint16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_s8))) svint8_t svldnf1_s8(svbool_t, int8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_f64))) svfloat64_t svldnf1_f64(svbool_t, float64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_f32))) svfloat32_t svldnf1_f32(svbool_t, float32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_f16))) svfloat16_t svldnf1_f16(svbool_t, float16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_s32))) svint32_t svldnf1_s32(svbool_t, int32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_s64))) svint64_t svldnf1_s64(svbool_t, int64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_s16))) svint16_t svldnf1_s16(svbool_t, int16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_vnum_u8))) svuint8_t svldnf1_vnum_u8(svbool_t, uint8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_vnum_u32))) svuint32_t svldnf1_vnum_u32(svbool_t, uint32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_vnum_u64))) svuint64_t svldnf1_vnum_u64(svbool_t, uint64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_vnum_u16))) svuint16_t svldnf1_vnum_u16(svbool_t, uint16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_vnum_s8))) svint8_t svldnf1_vnum_s8(svbool_t, int8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_vnum_f64))) svfloat64_t svldnf1_vnum_f64(svbool_t, float64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_vnum_f32))) svfloat32_t svldnf1_vnum_f32(svbool_t, float32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_vnum_f16))) svfloat16_t svldnf1_vnum_f16(svbool_t, float16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_vnum_s32))) svint32_t svldnf1_vnum_s32(svbool_t, int32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_vnum_s64))) svint64_t svldnf1_vnum_s64(svbool_t, int64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_vnum_s16))) svint16_t svldnf1_vnum_s16(svbool_t, int16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1sb_vnum_u32))) svuint32_t svldnf1sb_vnum_u32(svbool_t, int8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1sb_vnum_u64))) svuint64_t svldnf1sb_vnum_u64(svbool_t, int8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1sb_vnum_u16))) svuint16_t svldnf1sb_vnum_u16(svbool_t, int8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1sb_vnum_s32))) svint32_t svldnf1sb_vnum_s32(svbool_t, int8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1sb_vnum_s64))) svint64_t svldnf1sb_vnum_s64(svbool_t, int8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1sb_vnum_s16))) svint16_t svldnf1sb_vnum_s16(svbool_t, int8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1sb_u32))) svuint32_t svldnf1sb_u32(svbool_t, int8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1sb_u64))) svuint64_t svldnf1sb_u64(svbool_t, int8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1sb_u16))) svuint16_t svldnf1sb_u16(svbool_t, int8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1sb_s32))) svint32_t svldnf1sb_s32(svbool_t, int8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1sb_s64))) svint64_t svldnf1sb_s64(svbool_t, int8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1sb_s16))) svint16_t svldnf1sb_s16(svbool_t, int8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1sh_vnum_u32))) svuint32_t svldnf1sh_vnum_u32(svbool_t, int16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1sh_vnum_u64))) svuint64_t svldnf1sh_vnum_u64(svbool_t, int16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1sh_vnum_s32))) svint32_t svldnf1sh_vnum_s32(svbool_t, int16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1sh_vnum_s64))) svint64_t svldnf1sh_vnum_s64(svbool_t, int16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1sh_u32))) svuint32_t svldnf1sh_u32(svbool_t, int16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1sh_u64))) svuint64_t svldnf1sh_u64(svbool_t, int16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1sh_s32))) svint32_t svldnf1sh_s32(svbool_t, int16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1sh_s64))) svint64_t svldnf1sh_s64(svbool_t, int16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1sw_vnum_u64))) svuint64_t svldnf1sw_vnum_u64(svbool_t, int32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1sw_vnum_s64))) svint64_t svldnf1sw_vnum_s64(svbool_t, int32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1sw_u64))) svuint64_t svldnf1sw_u64(svbool_t, int32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1sw_s64))) svint64_t svldnf1sw_s64(svbool_t, int32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1ub_vnum_u32))) svuint32_t svldnf1ub_vnum_u32(svbool_t, uint8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1ub_vnum_u64))) svuint64_t svldnf1ub_vnum_u64(svbool_t, uint8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1ub_vnum_u16))) svuint16_t svldnf1ub_vnum_u16(svbool_t, uint8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1ub_vnum_s32))) svint32_t svldnf1ub_vnum_s32(svbool_t, uint8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1ub_vnum_s64))) svint64_t svldnf1ub_vnum_s64(svbool_t, uint8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1ub_vnum_s16))) svint16_t svldnf1ub_vnum_s16(svbool_t, uint8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1ub_u32))) svuint32_t svldnf1ub_u32(svbool_t, uint8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1ub_u64))) svuint64_t svldnf1ub_u64(svbool_t, uint8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1ub_u16))) svuint16_t svldnf1ub_u16(svbool_t, uint8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1ub_s32))) svint32_t svldnf1ub_s32(svbool_t, uint8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1ub_s64))) svint64_t svldnf1ub_s64(svbool_t, uint8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1ub_s16))) svint16_t svldnf1ub_s16(svbool_t, uint8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1uh_vnum_u32))) svuint32_t svldnf1uh_vnum_u32(svbool_t, uint16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1uh_vnum_u64))) svuint64_t svldnf1uh_vnum_u64(svbool_t, uint16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1uh_vnum_s32))) svint32_t svldnf1uh_vnum_s32(svbool_t, uint16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1uh_vnum_s64))) svint64_t svldnf1uh_vnum_s64(svbool_t, uint16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1uh_u32))) svuint32_t svldnf1uh_u32(svbool_t, uint16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1uh_u64))) svuint64_t svldnf1uh_u64(svbool_t, uint16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1uh_s32))) svint32_t svldnf1uh_s32(svbool_t, uint16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1uh_s64))) svint64_t svldnf1uh_s64(svbool_t, uint16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1uw_vnum_u64))) svuint64_t svldnf1uw_vnum_u64(svbool_t, uint32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1uw_vnum_s64))) svint64_t svldnf1uw_vnum_s64(svbool_t, uint32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1uw_u64))) svuint64_t svldnf1uw_u64(svbool_t, uint32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1uw_s64))) svint64_t svldnf1uw_s64(svbool_t, uint32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u8))) svuint8_t svldnt1_u8(svbool_t, uint8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u32))) svuint32_t svldnt1_u32(svbool_t, uint32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u64))) svuint64_t svldnt1_u64(svbool_t, uint64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u16))) svuint16_t svldnt1_u16(svbool_t, uint16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s8))) svint8_t svldnt1_s8(svbool_t, int8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f64))) svfloat64_t svldnt1_f64(svbool_t, float64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f32))) svfloat32_t svldnt1_f32(svbool_t, float32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f16))) svfloat16_t svldnt1_f16(svbool_t, float16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s32))) svint32_t svldnt1_s32(svbool_t, int32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s64))) svint64_t svldnt1_s64(svbool_t, int64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s16))) svint16_t svldnt1_s16(svbool_t, int16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u8))) svuint8_t svldnt1_vnum_u8(svbool_t, uint8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u32))) svuint32_t svldnt1_vnum_u32(svbool_t, uint32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u64))) svuint64_t svldnt1_vnum_u64(svbool_t, uint64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u16))) svuint16_t svldnt1_vnum_u16(svbool_t, uint16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s8))) svint8_t svldnt1_vnum_s8(svbool_t, int8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f64))) svfloat64_t svldnt1_vnum_f64(svbool_t, float64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f32))) svfloat32_t svldnt1_vnum_f32(svbool_t, float32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f16))) svfloat16_t svldnt1_vnum_f16(svbool_t, float16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s32))) svint32_t svldnt1_vnum_s32(svbool_t, int32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s64))) svint64_t svldnt1_vnum_s64(svbool_t, int64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s16))) svint16_t svldnt1_vnum_s16(svbool_t, int16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlen_u8))) uint64_t svlen_u8(svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlen_u32))) uint64_t svlen_u32(svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlen_u64))) uint64_t svlen_u64(svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlen_u16))) uint64_t svlen_u16(svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlen_s8))) uint64_t svlen_s8(svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlen_f64))) uint64_t svlen_f64(svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlen_f32))) uint64_t svlen_f32(svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlen_f16))) uint64_t svlen_f16(svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlen_s32))) uint64_t svlen_s32(svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlen_s64))) uint64_t svlen_s64(svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlen_s16))) uint64_t svlen_s16(svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_u8_m))) svuint8_t svlsl_n_u8_m(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_u32_m))) svuint32_t svlsl_n_u32_m(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_u64_m))) svuint64_t svlsl_n_u64_m(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_u16_m))) svuint16_t svlsl_n_u16_m(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_s8_m))) svint8_t svlsl_n_s8_m(svbool_t, svint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_s32_m))) svint32_t svlsl_n_s32_m(svbool_t, svint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_s64_m))) svint64_t svlsl_n_s64_m(svbool_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_s16_m))) svint16_t svlsl_n_s16_m(svbool_t, svint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_u8_x))) svuint8_t svlsl_n_u8_x(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_u32_x))) svuint32_t svlsl_n_u32_x(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_u64_x))) svuint64_t svlsl_n_u64_x(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_u16_x))) svuint16_t svlsl_n_u16_x(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_s8_x))) svint8_t svlsl_n_s8_x(svbool_t, svint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_s32_x))) svint32_t svlsl_n_s32_x(svbool_t, svint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_s64_x))) svint64_t svlsl_n_s64_x(svbool_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_s16_x))) svint16_t svlsl_n_s16_x(svbool_t, svint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_u8_z))) svuint8_t svlsl_n_u8_z(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_u32_z))) svuint32_t svlsl_n_u32_z(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_u64_z))) svuint64_t svlsl_n_u64_z(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_u16_z))) svuint16_t svlsl_n_u16_z(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_s8_z))) svint8_t svlsl_n_s8_z(svbool_t, svint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_s32_z))) svint32_t svlsl_n_s32_z(svbool_t, svint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_s64_z))) svint64_t svlsl_n_s64_z(svbool_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_s16_z))) svint16_t svlsl_n_s16_z(svbool_t, svint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_u8_m))) svuint8_t svlsl_u8_m(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_u32_m))) svuint32_t svlsl_u32_m(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_u64_m))) svuint64_t svlsl_u64_m(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_u16_m))) svuint16_t svlsl_u16_m(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_s8_m))) svint8_t svlsl_s8_m(svbool_t, svint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_s32_m))) svint32_t svlsl_s32_m(svbool_t, svint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_s64_m))) svint64_t svlsl_s64_m(svbool_t, svint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_s16_m))) svint16_t svlsl_s16_m(svbool_t, svint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_u8_x))) svuint8_t svlsl_u8_x(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_u32_x))) svuint32_t svlsl_u32_x(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_u64_x))) svuint64_t svlsl_u64_x(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_u16_x))) svuint16_t svlsl_u16_x(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_s8_x))) svint8_t svlsl_s8_x(svbool_t, svint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_s32_x))) svint32_t svlsl_s32_x(svbool_t, svint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_s64_x))) svint64_t svlsl_s64_x(svbool_t, svint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_s16_x))) svint16_t svlsl_s16_x(svbool_t, svint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_u8_z))) svuint8_t svlsl_u8_z(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_u32_z))) svuint32_t svlsl_u32_z(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_u64_z))) svuint64_t svlsl_u64_z(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_u16_z))) svuint16_t svlsl_u16_z(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_s8_z))) svint8_t svlsl_s8_z(svbool_t, svint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_s32_z))) svint32_t svlsl_s32_z(svbool_t, svint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_s64_z))) svint64_t svlsl_s64_z(svbool_t, svint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_s16_z))) svint16_t svlsl_s16_z(svbool_t, svint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_n_u8_m))) svuint8_t svlsl_wide_n_u8_m(svbool_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_n_u32_m))) svuint32_t svlsl_wide_n_u32_m(svbool_t, svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_n_u16_m))) svuint16_t svlsl_wide_n_u16_m(svbool_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_n_s8_m))) svint8_t svlsl_wide_n_s8_m(svbool_t, svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_n_s32_m))) svint32_t svlsl_wide_n_s32_m(svbool_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_n_s16_m))) svint16_t svlsl_wide_n_s16_m(svbool_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_n_u8_x))) svuint8_t svlsl_wide_n_u8_x(svbool_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_n_u32_x))) svuint32_t svlsl_wide_n_u32_x(svbool_t, svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_n_u16_x))) svuint16_t svlsl_wide_n_u16_x(svbool_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_n_s8_x))) svint8_t svlsl_wide_n_s8_x(svbool_t, svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_n_s32_x))) svint32_t svlsl_wide_n_s32_x(svbool_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_n_s16_x))) svint16_t svlsl_wide_n_s16_x(svbool_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_n_u8_z))) svuint8_t svlsl_wide_n_u8_z(svbool_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_n_u32_z))) svuint32_t svlsl_wide_n_u32_z(svbool_t, svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_n_u16_z))) svuint16_t svlsl_wide_n_u16_z(svbool_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_n_s8_z))) svint8_t svlsl_wide_n_s8_z(svbool_t, svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_n_s32_z))) svint32_t svlsl_wide_n_s32_z(svbool_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_n_s16_z))) svint16_t svlsl_wide_n_s16_z(svbool_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_u8_m))) svuint8_t svlsl_wide_u8_m(svbool_t, svuint8_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_u32_m))) svuint32_t svlsl_wide_u32_m(svbool_t, svuint32_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_u16_m))) svuint16_t svlsl_wide_u16_m(svbool_t, svuint16_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_s8_m))) svint8_t svlsl_wide_s8_m(svbool_t, svint8_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_s32_m))) svint32_t svlsl_wide_s32_m(svbool_t, svint32_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_s16_m))) svint16_t svlsl_wide_s16_m(svbool_t, svint16_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_u8_x))) svuint8_t svlsl_wide_u8_x(svbool_t, svuint8_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_u32_x))) svuint32_t svlsl_wide_u32_x(svbool_t, svuint32_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_u16_x))) svuint16_t svlsl_wide_u16_x(svbool_t, svuint16_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_s8_x))) svint8_t svlsl_wide_s8_x(svbool_t, svint8_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_s32_x))) svint32_t svlsl_wide_s32_x(svbool_t, svint32_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_s16_x))) svint16_t svlsl_wide_s16_x(svbool_t, svint16_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_u8_z))) svuint8_t svlsl_wide_u8_z(svbool_t, svuint8_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_u32_z))) svuint32_t svlsl_wide_u32_z(svbool_t, svuint32_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_u16_z))) svuint16_t svlsl_wide_u16_z(svbool_t, svuint16_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_s8_z))) svint8_t svlsl_wide_s8_z(svbool_t, svint8_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_s32_z))) svint32_t svlsl_wide_s32_z(svbool_t, svint32_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_s16_z))) svint16_t svlsl_wide_s16_z(svbool_t, svint16_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_n_u8_m))) svuint8_t svlsr_n_u8_m(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_n_u32_m))) svuint32_t svlsr_n_u32_m(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_n_u64_m))) svuint64_t svlsr_n_u64_m(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_n_u16_m))) svuint16_t svlsr_n_u16_m(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_n_u8_x))) svuint8_t svlsr_n_u8_x(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_n_u32_x))) svuint32_t svlsr_n_u32_x(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_n_u64_x))) svuint64_t svlsr_n_u64_x(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_n_u16_x))) svuint16_t svlsr_n_u16_x(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_n_u8_z))) svuint8_t svlsr_n_u8_z(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_n_u32_z))) svuint32_t svlsr_n_u32_z(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_n_u64_z))) svuint64_t svlsr_n_u64_z(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_n_u16_z))) svuint16_t svlsr_n_u16_z(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_u8_m))) svuint8_t svlsr_u8_m(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_u32_m))) svuint32_t svlsr_u32_m(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_u64_m))) svuint64_t svlsr_u64_m(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_u16_m))) svuint16_t svlsr_u16_m(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_u8_x))) svuint8_t svlsr_u8_x(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_u32_x))) svuint32_t svlsr_u32_x(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_u64_x))) svuint64_t svlsr_u64_x(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_u16_x))) svuint16_t svlsr_u16_x(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_u8_z))) svuint8_t svlsr_u8_z(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_u32_z))) svuint32_t svlsr_u32_z(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_u64_z))) svuint64_t svlsr_u64_z(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_u16_z))) svuint16_t svlsr_u16_z(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_wide_n_u8_m))) svuint8_t svlsr_wide_n_u8_m(svbool_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_wide_n_u32_m))) svuint32_t svlsr_wide_n_u32_m(svbool_t, svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_wide_n_u16_m))) svuint16_t svlsr_wide_n_u16_m(svbool_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_wide_n_u8_x))) svuint8_t svlsr_wide_n_u8_x(svbool_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_wide_n_u32_x))) svuint32_t svlsr_wide_n_u32_x(svbool_t, svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_wide_n_u16_x))) svuint16_t svlsr_wide_n_u16_x(svbool_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_wide_n_u8_z))) svuint8_t svlsr_wide_n_u8_z(svbool_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_wide_n_u32_z))) svuint32_t svlsr_wide_n_u32_z(svbool_t, svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_wide_n_u16_z))) svuint16_t svlsr_wide_n_u16_z(svbool_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_wide_u8_m))) svuint8_t svlsr_wide_u8_m(svbool_t, svuint8_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_wide_u32_m))) svuint32_t svlsr_wide_u32_m(svbool_t, svuint32_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_wide_u16_m))) svuint16_t svlsr_wide_u16_m(svbool_t, svuint16_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_wide_u8_x))) svuint8_t svlsr_wide_u8_x(svbool_t, svuint8_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_wide_u32_x))) svuint32_t svlsr_wide_u32_x(svbool_t, svuint32_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_wide_u16_x))) svuint16_t svlsr_wide_u16_x(svbool_t, svuint16_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_wide_u8_z))) svuint8_t svlsr_wide_u8_z(svbool_t, svuint8_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_wide_u32_z))) svuint32_t svlsr_wide_u32_z(svbool_t, svuint32_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_wide_u16_z))) svuint16_t svlsr_wide_u16_z(svbool_t, svuint16_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_f64_m))) svfloat64_t svmad_n_f64_m(svbool_t, svfloat64_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_f32_m))) svfloat32_t svmad_n_f32_m(svbool_t, svfloat32_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_f16_m))) svfloat16_t svmad_n_f16_m(svbool_t, svfloat16_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_f64_x))) svfloat64_t svmad_n_f64_x(svbool_t, svfloat64_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_f32_x))) svfloat32_t svmad_n_f32_x(svbool_t, svfloat32_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_f16_x))) svfloat16_t svmad_n_f16_x(svbool_t, svfloat16_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_f64_z))) svfloat64_t svmad_n_f64_z(svbool_t, svfloat64_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_f32_z))) svfloat32_t svmad_n_f32_z(svbool_t, svfloat32_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_f16_z))) svfloat16_t svmad_n_f16_z(svbool_t, svfloat16_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_u8_m))) svuint8_t svmad_n_u8_m(svbool_t, svuint8_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_u32_m))) svuint32_t svmad_n_u32_m(svbool_t, svuint32_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_u64_m))) svuint64_t svmad_n_u64_m(svbool_t, svuint64_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_u16_m))) svuint16_t svmad_n_u16_m(svbool_t, svuint16_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_s8_m))) svint8_t svmad_n_s8_m(svbool_t, svint8_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_s32_m))) svint32_t svmad_n_s32_m(svbool_t, svint32_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_s64_m))) svint64_t svmad_n_s64_m(svbool_t, svint64_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_s16_m))) svint16_t svmad_n_s16_m(svbool_t, svint16_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_u8_x))) svuint8_t svmad_n_u8_x(svbool_t, svuint8_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_u32_x))) svuint32_t svmad_n_u32_x(svbool_t, svuint32_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_u64_x))) svuint64_t svmad_n_u64_x(svbool_t, svuint64_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_u16_x))) svuint16_t svmad_n_u16_x(svbool_t, svuint16_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_s8_x))) svint8_t svmad_n_s8_x(svbool_t, svint8_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_s32_x))) svint32_t svmad_n_s32_x(svbool_t, svint32_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_s64_x))) svint64_t svmad_n_s64_x(svbool_t, svint64_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_s16_x))) svint16_t svmad_n_s16_x(svbool_t, svint16_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_u8_z))) svuint8_t svmad_n_u8_z(svbool_t, svuint8_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_u32_z))) svuint32_t svmad_n_u32_z(svbool_t, svuint32_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_u64_z))) svuint64_t svmad_n_u64_z(svbool_t, svuint64_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_u16_z))) svuint16_t svmad_n_u16_z(svbool_t, svuint16_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_s8_z))) svint8_t svmad_n_s8_z(svbool_t, svint8_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_s32_z))) svint32_t svmad_n_s32_z(svbool_t, svint32_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_s64_z))) svint64_t svmad_n_s64_z(svbool_t, svint64_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_s16_z))) svint16_t svmad_n_s16_z(svbool_t, svint16_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_f64_m))) svfloat64_t svmad_f64_m(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_f32_m))) svfloat32_t svmad_f32_m(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_f16_m))) svfloat16_t svmad_f16_m(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_f64_x))) svfloat64_t svmad_f64_x(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_f32_x))) svfloat32_t svmad_f32_x(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_f16_x))) svfloat16_t svmad_f16_x(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_f64_z))) svfloat64_t svmad_f64_z(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_f32_z))) svfloat32_t svmad_f32_z(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_f16_z))) svfloat16_t svmad_f16_z(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_u8_m))) svuint8_t svmad_u8_m(svbool_t, svuint8_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_u32_m))) svuint32_t svmad_u32_m(svbool_t, svuint32_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_u64_m))) svuint64_t svmad_u64_m(svbool_t, svuint64_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_u16_m))) svuint16_t svmad_u16_m(svbool_t, svuint16_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_s8_m))) svint8_t svmad_s8_m(svbool_t, svint8_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_s32_m))) svint32_t svmad_s32_m(svbool_t, svint32_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_s64_m))) svint64_t svmad_s64_m(svbool_t, svint64_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_s16_m))) svint16_t svmad_s16_m(svbool_t, svint16_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_u8_x))) svuint8_t svmad_u8_x(svbool_t, svuint8_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_u32_x))) svuint32_t svmad_u32_x(svbool_t, svuint32_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_u64_x))) svuint64_t svmad_u64_x(svbool_t, svuint64_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_u16_x))) svuint16_t svmad_u16_x(svbool_t, svuint16_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_s8_x))) svint8_t svmad_s8_x(svbool_t, svint8_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_s32_x))) svint32_t svmad_s32_x(svbool_t, svint32_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_s64_x))) svint64_t svmad_s64_x(svbool_t, svint64_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_s16_x))) svint16_t svmad_s16_x(svbool_t, svint16_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_u8_z))) svuint8_t svmad_u8_z(svbool_t, svuint8_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_u32_z))) svuint32_t svmad_u32_z(svbool_t, svuint32_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_u64_z))) svuint64_t svmad_u64_z(svbool_t, svuint64_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_u16_z))) svuint16_t svmad_u16_z(svbool_t, svuint16_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_s8_z))) svint8_t svmad_s8_z(svbool_t, svint8_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_s32_z))) svint32_t svmad_s32_z(svbool_t, svint32_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_s64_z))) svint64_t svmad_s64_z(svbool_t, svint64_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_s16_z))) svint16_t svmad_s16_z(svbool_t, svint16_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_f64_m))) svfloat64_t svmax_n_f64_m(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_f32_m))) svfloat32_t svmax_n_f32_m(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_f16_m))) svfloat16_t svmax_n_f16_m(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_f64_x))) svfloat64_t svmax_n_f64_x(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_f32_x))) svfloat32_t svmax_n_f32_x(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_f16_x))) svfloat16_t svmax_n_f16_x(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_f64_z))) svfloat64_t svmax_n_f64_z(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_f32_z))) svfloat32_t svmax_n_f32_z(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_f16_z))) svfloat16_t svmax_n_f16_z(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_s8_m))) svint8_t svmax_n_s8_m(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_s32_m))) svint32_t svmax_n_s32_m(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_s64_m))) svint64_t svmax_n_s64_m(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_s16_m))) svint16_t svmax_n_s16_m(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_s8_x))) svint8_t svmax_n_s8_x(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_s32_x))) svint32_t svmax_n_s32_x(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_s64_x))) svint64_t svmax_n_s64_x(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_s16_x))) svint16_t svmax_n_s16_x(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_s8_z))) svint8_t svmax_n_s8_z(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_s32_z))) svint32_t svmax_n_s32_z(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_s64_z))) svint64_t svmax_n_s64_z(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_s16_z))) svint16_t svmax_n_s16_z(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_u8_m))) svuint8_t svmax_n_u8_m(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_u32_m))) svuint32_t svmax_n_u32_m(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_u64_m))) svuint64_t svmax_n_u64_m(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_u16_m))) svuint16_t svmax_n_u16_m(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_u8_x))) svuint8_t svmax_n_u8_x(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_u32_x))) svuint32_t svmax_n_u32_x(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_u64_x))) svuint64_t svmax_n_u64_x(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_u16_x))) svuint16_t svmax_n_u16_x(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_u8_z))) svuint8_t svmax_n_u8_z(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_u32_z))) svuint32_t svmax_n_u32_z(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_u64_z))) svuint64_t svmax_n_u64_z(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_u16_z))) svuint16_t svmax_n_u16_z(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_f64_m))) svfloat64_t svmax_f64_m(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_f32_m))) svfloat32_t svmax_f32_m(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_f16_m))) svfloat16_t svmax_f16_m(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_f64_x))) svfloat64_t svmax_f64_x(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_f32_x))) svfloat32_t svmax_f32_x(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_f16_x))) svfloat16_t svmax_f16_x(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_f64_z))) svfloat64_t svmax_f64_z(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_f32_z))) svfloat32_t svmax_f32_z(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_f16_z))) svfloat16_t svmax_f16_z(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s8_m))) svint8_t svmax_s8_m(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s32_m))) svint32_t svmax_s32_m(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s64_m))) svint64_t svmax_s64_m(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s16_m))) svint16_t svmax_s16_m(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s8_x))) svint8_t svmax_s8_x(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s32_x))) svint32_t svmax_s32_x(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s64_x))) svint64_t svmax_s64_x(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s16_x))) svint16_t svmax_s16_x(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s8_z))) svint8_t svmax_s8_z(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s32_z))) svint32_t svmax_s32_z(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s64_z))) svint64_t svmax_s64_z(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s16_z))) svint16_t svmax_s16_z(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u8_m))) svuint8_t svmax_u8_m(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u32_m))) svuint32_t svmax_u32_m(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u64_m))) svuint64_t svmax_u64_m(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u16_m))) svuint16_t svmax_u16_m(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u8_x))) svuint8_t svmax_u8_x(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u32_x))) svuint32_t svmax_u32_x(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u64_x))) svuint64_t svmax_u64_x(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u16_x))) svuint16_t svmax_u16_x(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u8_z))) svuint8_t svmax_u8_z(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u32_z))) svuint32_t svmax_u32_z(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u64_z))) svuint64_t svmax_u64_z(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u16_z))) svuint16_t svmax_u16_z(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_f64_m))) svfloat64_t svmaxnm_n_f64_m(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_f32_m))) svfloat32_t svmaxnm_n_f32_m(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_f16_m))) svfloat16_t svmaxnm_n_f16_m(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_f64_x))) svfloat64_t svmaxnm_n_f64_x(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_f32_x))) svfloat32_t svmaxnm_n_f32_x(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_f16_x))) svfloat16_t svmaxnm_n_f16_x(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_f64_z))) svfloat64_t svmaxnm_n_f64_z(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_f32_z))) svfloat32_t svmaxnm_n_f32_z(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_f16_z))) svfloat16_t svmaxnm_n_f16_z(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_f64_m))) svfloat64_t svmaxnm_f64_m(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_f32_m))) svfloat32_t svmaxnm_f32_m(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_f16_m))) svfloat16_t svmaxnm_f16_m(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_f64_x))) svfloat64_t svmaxnm_f64_x(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_f32_x))) svfloat32_t svmaxnm_f32_x(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_f16_x))) svfloat16_t svmaxnm_f16_x(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_f64_z))) svfloat64_t svmaxnm_f64_z(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_f32_z))) svfloat32_t svmaxnm_f32_z(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_f16_z))) svfloat16_t svmaxnm_f16_z(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnmv_f64))) float64_t svmaxnmv_f64(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnmv_f32))) float32_t svmaxnmv_f32(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnmv_f16))) float16_t svmaxnmv_f16(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxv_f64))) float64_t svmaxv_f64(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxv_f32))) float32_t svmaxv_f32(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxv_f16))) float16_t svmaxv_f16(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxv_s8))) int8_t svmaxv_s8(svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxv_s32))) int32_t svmaxv_s32(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxv_s64))) int64_t svmaxv_s64(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxv_s16))) int16_t svmaxv_s16(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxv_u8))) uint8_t svmaxv_u8(svbool_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxv_u32))) uint32_t svmaxv_u32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxv_u64))) uint64_t svmaxv_u64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxv_u16))) uint16_t svmaxv_u16(svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_f64_m))) svfloat64_t svmin_n_f64_m(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_f32_m))) svfloat32_t svmin_n_f32_m(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_f16_m))) svfloat16_t svmin_n_f16_m(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_f64_x))) svfloat64_t svmin_n_f64_x(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_f32_x))) svfloat32_t svmin_n_f32_x(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_f16_x))) svfloat16_t svmin_n_f16_x(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_f64_z))) svfloat64_t svmin_n_f64_z(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_f32_z))) svfloat32_t svmin_n_f32_z(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_f16_z))) svfloat16_t svmin_n_f16_z(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_s8_m))) svint8_t svmin_n_s8_m(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_s32_m))) svint32_t svmin_n_s32_m(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_s64_m))) svint64_t svmin_n_s64_m(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_s16_m))) svint16_t svmin_n_s16_m(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_s8_x))) svint8_t svmin_n_s8_x(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_s32_x))) svint32_t svmin_n_s32_x(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_s64_x))) svint64_t svmin_n_s64_x(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_s16_x))) svint16_t svmin_n_s16_x(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_s8_z))) svint8_t svmin_n_s8_z(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_s32_z))) svint32_t svmin_n_s32_z(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_s64_z))) svint64_t svmin_n_s64_z(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_s16_z))) svint16_t svmin_n_s16_z(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_u8_m))) svuint8_t svmin_n_u8_m(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_u32_m))) svuint32_t svmin_n_u32_m(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_u64_m))) svuint64_t svmin_n_u64_m(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_u16_m))) svuint16_t svmin_n_u16_m(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_u8_x))) svuint8_t svmin_n_u8_x(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_u32_x))) svuint32_t svmin_n_u32_x(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_u64_x))) svuint64_t svmin_n_u64_x(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_u16_x))) svuint16_t svmin_n_u16_x(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_u8_z))) svuint8_t svmin_n_u8_z(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_u32_z))) svuint32_t svmin_n_u32_z(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_u64_z))) svuint64_t svmin_n_u64_z(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_u16_z))) svuint16_t svmin_n_u16_z(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_f64_m))) svfloat64_t svmin_f64_m(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_f32_m))) svfloat32_t svmin_f32_m(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_f16_m))) svfloat16_t svmin_f16_m(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_f64_x))) svfloat64_t svmin_f64_x(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_f32_x))) svfloat32_t svmin_f32_x(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_f16_x))) svfloat16_t svmin_f16_x(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_f64_z))) svfloat64_t svmin_f64_z(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_f32_z))) svfloat32_t svmin_f32_z(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_f16_z))) svfloat16_t svmin_f16_z(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s8_m))) svint8_t svmin_s8_m(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s32_m))) svint32_t svmin_s32_m(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s64_m))) svint64_t svmin_s64_m(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s16_m))) svint16_t svmin_s16_m(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s8_x))) svint8_t svmin_s8_x(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s32_x))) svint32_t svmin_s32_x(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s64_x))) svint64_t svmin_s64_x(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s16_x))) svint16_t svmin_s16_x(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s8_z))) svint8_t svmin_s8_z(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s32_z))) svint32_t svmin_s32_z(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s64_z))) svint64_t svmin_s64_z(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s16_z))) svint16_t svmin_s16_z(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u8_m))) svuint8_t svmin_u8_m(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u32_m))) svuint32_t svmin_u32_m(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u64_m))) svuint64_t svmin_u64_m(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u16_m))) svuint16_t svmin_u16_m(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u8_x))) svuint8_t svmin_u8_x(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u32_x))) svuint32_t svmin_u32_x(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u64_x))) svuint64_t svmin_u64_x(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u16_x))) svuint16_t svmin_u16_x(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u8_z))) svuint8_t svmin_u8_z(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u32_z))) svuint32_t svmin_u32_z(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u64_z))) svuint64_t svmin_u64_z(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u16_z))) svuint16_t svmin_u16_z(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_f64_m))) svfloat64_t svminnm_n_f64_m(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_f32_m))) svfloat32_t svminnm_n_f32_m(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_f16_m))) svfloat16_t svminnm_n_f16_m(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_f64_x))) svfloat64_t svminnm_n_f64_x(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_f32_x))) svfloat32_t svminnm_n_f32_x(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_f16_x))) svfloat16_t svminnm_n_f16_x(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_f64_z))) svfloat64_t svminnm_n_f64_z(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_f32_z))) svfloat32_t svminnm_n_f32_z(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_f16_z))) svfloat16_t svminnm_n_f16_z(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_f64_m))) svfloat64_t svminnm_f64_m(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_f32_m))) svfloat32_t svminnm_f32_m(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_f16_m))) svfloat16_t svminnm_f16_m(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_f64_x))) svfloat64_t svminnm_f64_x(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_f32_x))) svfloat32_t svminnm_f32_x(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_f16_x))) svfloat16_t svminnm_f16_x(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_f64_z))) svfloat64_t svminnm_f64_z(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_f32_z))) svfloat32_t svminnm_f32_z(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_f16_z))) svfloat16_t svminnm_f16_z(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnmv_f64))) float64_t svminnmv_f64(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnmv_f32))) float32_t svminnmv_f32(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnmv_f16))) float16_t svminnmv_f16(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminv_f64))) float64_t svminv_f64(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminv_f32))) float32_t svminv_f32(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminv_f16))) float16_t svminv_f16(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminv_s8))) int8_t svminv_s8(svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminv_s32))) int32_t svminv_s32(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminv_s64))) int64_t svminv_s64(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminv_s16))) int16_t svminv_s16(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminv_u8))) uint8_t svminv_u8(svbool_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminv_u32))) uint32_t svminv_u32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminv_u64))) uint64_t svminv_u64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminv_u16))) uint16_t svminv_u16(svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_f64_m))) svfloat64_t svmla_n_f64_m(svbool_t, svfloat64_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_f32_m))) svfloat32_t svmla_n_f32_m(svbool_t, svfloat32_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_f16_m))) svfloat16_t svmla_n_f16_m(svbool_t, svfloat16_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_f64_x))) svfloat64_t svmla_n_f64_x(svbool_t, svfloat64_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_f32_x))) svfloat32_t svmla_n_f32_x(svbool_t, svfloat32_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_f16_x))) svfloat16_t svmla_n_f16_x(svbool_t, svfloat16_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_f64_z))) svfloat64_t svmla_n_f64_z(svbool_t, svfloat64_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_f32_z))) svfloat32_t svmla_n_f32_z(svbool_t, svfloat32_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_f16_z))) svfloat16_t svmla_n_f16_z(svbool_t, svfloat16_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_u8_m))) svuint8_t svmla_n_u8_m(svbool_t, svuint8_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_u32_m))) svuint32_t svmla_n_u32_m(svbool_t, svuint32_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_u64_m))) svuint64_t svmla_n_u64_m(svbool_t, svuint64_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_u16_m))) svuint16_t svmla_n_u16_m(svbool_t, svuint16_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_s8_m))) svint8_t svmla_n_s8_m(svbool_t, svint8_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_s32_m))) svint32_t svmla_n_s32_m(svbool_t, svint32_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_s64_m))) svint64_t svmla_n_s64_m(svbool_t, svint64_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_s16_m))) svint16_t svmla_n_s16_m(svbool_t, svint16_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_u8_x))) svuint8_t svmla_n_u8_x(svbool_t, svuint8_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_u32_x))) svuint32_t svmla_n_u32_x(svbool_t, svuint32_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_u64_x))) svuint64_t svmla_n_u64_x(svbool_t, svuint64_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_u16_x))) svuint16_t svmla_n_u16_x(svbool_t, svuint16_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_s8_x))) svint8_t svmla_n_s8_x(svbool_t, svint8_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_s32_x))) svint32_t svmla_n_s32_x(svbool_t, svint32_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_s64_x))) svint64_t svmla_n_s64_x(svbool_t, svint64_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_s16_x))) svint16_t svmla_n_s16_x(svbool_t, svint16_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_u8_z))) svuint8_t svmla_n_u8_z(svbool_t, svuint8_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_u32_z))) svuint32_t svmla_n_u32_z(svbool_t, svuint32_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_u64_z))) svuint64_t svmla_n_u64_z(svbool_t, svuint64_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_u16_z))) svuint16_t svmla_n_u16_z(svbool_t, svuint16_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_s8_z))) svint8_t svmla_n_s8_z(svbool_t, svint8_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_s32_z))) svint32_t svmla_n_s32_z(svbool_t, svint32_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_s64_z))) svint64_t svmla_n_s64_z(svbool_t, svint64_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_s16_z))) svint16_t svmla_n_s16_z(svbool_t, svint16_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_f64_m))) svfloat64_t svmla_f64_m(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_f32_m))) svfloat32_t svmla_f32_m(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_f16_m))) svfloat16_t svmla_f16_m(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_f64_x))) svfloat64_t svmla_f64_x(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_f32_x))) svfloat32_t svmla_f32_x(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_f16_x))) svfloat16_t svmla_f16_x(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_f64_z))) svfloat64_t svmla_f64_z(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_f32_z))) svfloat32_t svmla_f32_z(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_f16_z))) svfloat16_t svmla_f16_z(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_u8_m))) svuint8_t svmla_u8_m(svbool_t, svuint8_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_u32_m))) svuint32_t svmla_u32_m(svbool_t, svuint32_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_u64_m))) svuint64_t svmla_u64_m(svbool_t, svuint64_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_u16_m))) svuint16_t svmla_u16_m(svbool_t, svuint16_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_s8_m))) svint8_t svmla_s8_m(svbool_t, svint8_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_s32_m))) svint32_t svmla_s32_m(svbool_t, svint32_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_s64_m))) svint64_t svmla_s64_m(svbool_t, svint64_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_s16_m))) svint16_t svmla_s16_m(svbool_t, svint16_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_u8_x))) svuint8_t svmla_u8_x(svbool_t, svuint8_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_u32_x))) svuint32_t svmla_u32_x(svbool_t, svuint32_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_u64_x))) svuint64_t svmla_u64_x(svbool_t, svuint64_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_u16_x))) svuint16_t svmla_u16_x(svbool_t, svuint16_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_s8_x))) svint8_t svmla_s8_x(svbool_t, svint8_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_s32_x))) svint32_t svmla_s32_x(svbool_t, svint32_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_s64_x))) svint64_t svmla_s64_x(svbool_t, svint64_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_s16_x))) svint16_t svmla_s16_x(svbool_t, svint16_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_u8_z))) svuint8_t svmla_u8_z(svbool_t, svuint8_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_u32_z))) svuint32_t svmla_u32_z(svbool_t, svuint32_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_u64_z))) svuint64_t svmla_u64_z(svbool_t, svuint64_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_u16_z))) svuint16_t svmla_u16_z(svbool_t, svuint16_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_s8_z))) svint8_t svmla_s8_z(svbool_t, svint8_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_s32_z))) svint32_t svmla_s32_z(svbool_t, svint32_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_s64_z))) svint64_t svmla_s64_z(svbool_t, svint64_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_s16_z))) svint16_t svmla_s16_z(svbool_t, svint16_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_lane_f64))) svfloat64_t svmla_lane_f64(svfloat64_t, svfloat64_t, svfloat64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_lane_f32))) svfloat32_t svmla_lane_f32(svfloat32_t, svfloat32_t, svfloat32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_lane_f16))) svfloat16_t svmla_lane_f16(svfloat16_t, svfloat16_t, svfloat16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_f64_m))) svfloat64_t svmls_n_f64_m(svbool_t, svfloat64_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_f32_m))) svfloat32_t svmls_n_f32_m(svbool_t, svfloat32_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_f16_m))) svfloat16_t svmls_n_f16_m(svbool_t, svfloat16_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_f64_x))) svfloat64_t svmls_n_f64_x(svbool_t, svfloat64_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_f32_x))) svfloat32_t svmls_n_f32_x(svbool_t, svfloat32_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_f16_x))) svfloat16_t svmls_n_f16_x(svbool_t, svfloat16_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_f64_z))) svfloat64_t svmls_n_f64_z(svbool_t, svfloat64_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_f32_z))) svfloat32_t svmls_n_f32_z(svbool_t, svfloat32_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_f16_z))) svfloat16_t svmls_n_f16_z(svbool_t, svfloat16_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_u8_m))) svuint8_t svmls_n_u8_m(svbool_t, svuint8_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_u32_m))) svuint32_t svmls_n_u32_m(svbool_t, svuint32_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_u64_m))) svuint64_t svmls_n_u64_m(svbool_t, svuint64_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_u16_m))) svuint16_t svmls_n_u16_m(svbool_t, svuint16_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_s8_m))) svint8_t svmls_n_s8_m(svbool_t, svint8_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_s32_m))) svint32_t svmls_n_s32_m(svbool_t, svint32_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_s64_m))) svint64_t svmls_n_s64_m(svbool_t, svint64_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_s16_m))) svint16_t svmls_n_s16_m(svbool_t, svint16_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_u8_x))) svuint8_t svmls_n_u8_x(svbool_t, svuint8_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_u32_x))) svuint32_t svmls_n_u32_x(svbool_t, svuint32_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_u64_x))) svuint64_t svmls_n_u64_x(svbool_t, svuint64_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_u16_x))) svuint16_t svmls_n_u16_x(svbool_t, svuint16_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_s8_x))) svint8_t svmls_n_s8_x(svbool_t, svint8_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_s32_x))) svint32_t svmls_n_s32_x(svbool_t, svint32_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_s64_x))) svint64_t svmls_n_s64_x(svbool_t, svint64_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_s16_x))) svint16_t svmls_n_s16_x(svbool_t, svint16_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_u8_z))) svuint8_t svmls_n_u8_z(svbool_t, svuint8_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_u32_z))) svuint32_t svmls_n_u32_z(svbool_t, svuint32_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_u64_z))) svuint64_t svmls_n_u64_z(svbool_t, svuint64_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_u16_z))) svuint16_t svmls_n_u16_z(svbool_t, svuint16_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_s8_z))) svint8_t svmls_n_s8_z(svbool_t, svint8_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_s32_z))) svint32_t svmls_n_s32_z(svbool_t, svint32_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_s64_z))) svint64_t svmls_n_s64_z(svbool_t, svint64_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_s16_z))) svint16_t svmls_n_s16_z(svbool_t, svint16_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_f64_m))) svfloat64_t svmls_f64_m(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_f32_m))) svfloat32_t svmls_f32_m(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_f16_m))) svfloat16_t svmls_f16_m(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_f64_x))) svfloat64_t svmls_f64_x(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_f32_x))) svfloat32_t svmls_f32_x(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_f16_x))) svfloat16_t svmls_f16_x(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_f64_z))) svfloat64_t svmls_f64_z(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_f32_z))) svfloat32_t svmls_f32_z(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_f16_z))) svfloat16_t svmls_f16_z(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_u8_m))) svuint8_t svmls_u8_m(svbool_t, svuint8_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_u32_m))) svuint32_t svmls_u32_m(svbool_t, svuint32_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_u64_m))) svuint64_t svmls_u64_m(svbool_t, svuint64_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_u16_m))) svuint16_t svmls_u16_m(svbool_t, svuint16_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_s8_m))) svint8_t svmls_s8_m(svbool_t, svint8_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_s32_m))) svint32_t svmls_s32_m(svbool_t, svint32_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_s64_m))) svint64_t svmls_s64_m(svbool_t, svint64_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_s16_m))) svint16_t svmls_s16_m(svbool_t, svint16_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_u8_x))) svuint8_t svmls_u8_x(svbool_t, svuint8_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_u32_x))) svuint32_t svmls_u32_x(svbool_t, svuint32_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_u64_x))) svuint64_t svmls_u64_x(svbool_t, svuint64_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_u16_x))) svuint16_t svmls_u16_x(svbool_t, svuint16_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_s8_x))) svint8_t svmls_s8_x(svbool_t, svint8_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_s32_x))) svint32_t svmls_s32_x(svbool_t, svint32_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_s64_x))) svint64_t svmls_s64_x(svbool_t, svint64_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_s16_x))) svint16_t svmls_s16_x(svbool_t, svint16_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_u8_z))) svuint8_t svmls_u8_z(svbool_t, svuint8_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_u32_z))) svuint32_t svmls_u32_z(svbool_t, svuint32_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_u64_z))) svuint64_t svmls_u64_z(svbool_t, svuint64_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_u16_z))) svuint16_t svmls_u16_z(svbool_t, svuint16_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_s8_z))) svint8_t svmls_s8_z(svbool_t, svint8_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_s32_z))) svint32_t svmls_s32_z(svbool_t, svint32_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_s64_z))) svint64_t svmls_s64_z(svbool_t, svint64_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_s16_z))) svint16_t svmls_s16_z(svbool_t, svint16_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_lane_f64))) svfloat64_t svmls_lane_f64(svfloat64_t, svfloat64_t, svfloat64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_lane_f32))) svfloat32_t svmls_lane_f32(svfloat32_t, svfloat32_t, svfloat32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_lane_f16))) svfloat16_t svmls_lane_f16(svfloat16_t, svfloat16_t, svfloat16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmov_b_z))) svbool_t svmov_b_z(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_f64_m))) svfloat64_t svmsb_n_f64_m(svbool_t, svfloat64_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_f32_m))) svfloat32_t svmsb_n_f32_m(svbool_t, svfloat32_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_f16_m))) svfloat16_t svmsb_n_f16_m(svbool_t, svfloat16_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_f64_x))) svfloat64_t svmsb_n_f64_x(svbool_t, svfloat64_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_f32_x))) svfloat32_t svmsb_n_f32_x(svbool_t, svfloat32_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_f16_x))) svfloat16_t svmsb_n_f16_x(svbool_t, svfloat16_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_f64_z))) svfloat64_t svmsb_n_f64_z(svbool_t, svfloat64_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_f32_z))) svfloat32_t svmsb_n_f32_z(svbool_t, svfloat32_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_f16_z))) svfloat16_t svmsb_n_f16_z(svbool_t, svfloat16_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_u8_m))) svuint8_t svmsb_n_u8_m(svbool_t, svuint8_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_u32_m))) svuint32_t svmsb_n_u32_m(svbool_t, svuint32_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_u64_m))) svuint64_t svmsb_n_u64_m(svbool_t, svuint64_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_u16_m))) svuint16_t svmsb_n_u16_m(svbool_t, svuint16_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_s8_m))) svint8_t svmsb_n_s8_m(svbool_t, svint8_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_s32_m))) svint32_t svmsb_n_s32_m(svbool_t, svint32_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_s64_m))) svint64_t svmsb_n_s64_m(svbool_t, svint64_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_s16_m))) svint16_t svmsb_n_s16_m(svbool_t, svint16_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_u8_x))) svuint8_t svmsb_n_u8_x(svbool_t, svuint8_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_u32_x))) svuint32_t svmsb_n_u32_x(svbool_t, svuint32_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_u64_x))) svuint64_t svmsb_n_u64_x(svbool_t, svuint64_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_u16_x))) svuint16_t svmsb_n_u16_x(svbool_t, svuint16_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_s8_x))) svint8_t svmsb_n_s8_x(svbool_t, svint8_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_s32_x))) svint32_t svmsb_n_s32_x(svbool_t, svint32_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_s64_x))) svint64_t svmsb_n_s64_x(svbool_t, svint64_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_s16_x))) svint16_t svmsb_n_s16_x(svbool_t, svint16_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_u8_z))) svuint8_t svmsb_n_u8_z(svbool_t, svuint8_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_u32_z))) svuint32_t svmsb_n_u32_z(svbool_t, svuint32_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_u64_z))) svuint64_t svmsb_n_u64_z(svbool_t, svuint64_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_u16_z))) svuint16_t svmsb_n_u16_z(svbool_t, svuint16_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_s8_z))) svint8_t svmsb_n_s8_z(svbool_t, svint8_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_s32_z))) svint32_t svmsb_n_s32_z(svbool_t, svint32_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_s64_z))) svint64_t svmsb_n_s64_z(svbool_t, svint64_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_s16_z))) svint16_t svmsb_n_s16_z(svbool_t, svint16_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_f64_m))) svfloat64_t svmsb_f64_m(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_f32_m))) svfloat32_t svmsb_f32_m(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_f16_m))) svfloat16_t svmsb_f16_m(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_f64_x))) svfloat64_t svmsb_f64_x(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_f32_x))) svfloat32_t svmsb_f32_x(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_f16_x))) svfloat16_t svmsb_f16_x(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_f64_z))) svfloat64_t svmsb_f64_z(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_f32_z))) svfloat32_t svmsb_f32_z(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_f16_z))) svfloat16_t svmsb_f16_z(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_u8_m))) svuint8_t svmsb_u8_m(svbool_t, svuint8_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_u32_m))) svuint32_t svmsb_u32_m(svbool_t, svuint32_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_u64_m))) svuint64_t svmsb_u64_m(svbool_t, svuint64_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_u16_m))) svuint16_t svmsb_u16_m(svbool_t, svuint16_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_s8_m))) svint8_t svmsb_s8_m(svbool_t, svint8_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_s32_m))) svint32_t svmsb_s32_m(svbool_t, svint32_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_s64_m))) svint64_t svmsb_s64_m(svbool_t, svint64_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_s16_m))) svint16_t svmsb_s16_m(svbool_t, svint16_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_u8_x))) svuint8_t svmsb_u8_x(svbool_t, svuint8_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_u32_x))) svuint32_t svmsb_u32_x(svbool_t, svuint32_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_u64_x))) svuint64_t svmsb_u64_x(svbool_t, svuint64_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_u16_x))) svuint16_t svmsb_u16_x(svbool_t, svuint16_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_s8_x))) svint8_t svmsb_s8_x(svbool_t, svint8_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_s32_x))) svint32_t svmsb_s32_x(svbool_t, svint32_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_s64_x))) svint64_t svmsb_s64_x(svbool_t, svint64_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_s16_x))) svint16_t svmsb_s16_x(svbool_t, svint16_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_u8_z))) svuint8_t svmsb_u8_z(svbool_t, svuint8_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_u32_z))) svuint32_t svmsb_u32_z(svbool_t, svuint32_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_u64_z))) svuint64_t svmsb_u64_z(svbool_t, svuint64_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_u16_z))) svuint16_t svmsb_u16_z(svbool_t, svuint16_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_s8_z))) svint8_t svmsb_s8_z(svbool_t, svint8_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_s32_z))) svint32_t svmsb_s32_z(svbool_t, svint32_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_s64_z))) svint64_t svmsb_s64_z(svbool_t, svint64_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_s16_z))) svint16_t svmsb_s16_z(svbool_t, svint16_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_f64_m))) svfloat64_t svmul_n_f64_m(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_f32_m))) svfloat32_t svmul_n_f32_m(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_f16_m))) svfloat16_t svmul_n_f16_m(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_f64_x))) svfloat64_t svmul_n_f64_x(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_f32_x))) svfloat32_t svmul_n_f32_x(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_f16_x))) svfloat16_t svmul_n_f16_x(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_f64_z))) svfloat64_t svmul_n_f64_z(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_f32_z))) svfloat32_t svmul_n_f32_z(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_f16_z))) svfloat16_t svmul_n_f16_z(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_u8_m))) svuint8_t svmul_n_u8_m(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_u32_m))) svuint32_t svmul_n_u32_m(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_u64_m))) svuint64_t svmul_n_u64_m(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_u16_m))) svuint16_t svmul_n_u16_m(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_s8_m))) svint8_t svmul_n_s8_m(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_s32_m))) svint32_t svmul_n_s32_m(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_s64_m))) svint64_t svmul_n_s64_m(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_s16_m))) svint16_t svmul_n_s16_m(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_u8_x))) svuint8_t svmul_n_u8_x(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_u32_x))) svuint32_t svmul_n_u32_x(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_u64_x))) svuint64_t svmul_n_u64_x(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_u16_x))) svuint16_t svmul_n_u16_x(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_s8_x))) svint8_t svmul_n_s8_x(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_s32_x))) svint32_t svmul_n_s32_x(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_s64_x))) svint64_t svmul_n_s64_x(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_s16_x))) svint16_t svmul_n_s16_x(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_u8_z))) svuint8_t svmul_n_u8_z(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_u32_z))) svuint32_t svmul_n_u32_z(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_u64_z))) svuint64_t svmul_n_u64_z(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_u16_z))) svuint16_t svmul_n_u16_z(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_s8_z))) svint8_t svmul_n_s8_z(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_s32_z))) svint32_t svmul_n_s32_z(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_s64_z))) svint64_t svmul_n_s64_z(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_s16_z))) svint16_t svmul_n_s16_z(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_f64_m))) svfloat64_t svmul_f64_m(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_f32_m))) svfloat32_t svmul_f32_m(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_f16_m))) svfloat16_t svmul_f16_m(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_f64_x))) svfloat64_t svmul_f64_x(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_f32_x))) svfloat32_t svmul_f32_x(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_f16_x))) svfloat16_t svmul_f16_x(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_f64_z))) svfloat64_t svmul_f64_z(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_f32_z))) svfloat32_t svmul_f32_z(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_f16_z))) svfloat16_t svmul_f16_z(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_u8_m))) svuint8_t svmul_u8_m(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_u32_m))) svuint32_t svmul_u32_m(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_u64_m))) svuint64_t svmul_u64_m(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_u16_m))) svuint16_t svmul_u16_m(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_s8_m))) svint8_t svmul_s8_m(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_s32_m))) svint32_t svmul_s32_m(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_s64_m))) svint64_t svmul_s64_m(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_s16_m))) svint16_t svmul_s16_m(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_u8_x))) svuint8_t svmul_u8_x(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_u32_x))) svuint32_t svmul_u32_x(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_u64_x))) svuint64_t svmul_u64_x(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_u16_x))) svuint16_t svmul_u16_x(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_s8_x))) svint8_t svmul_s8_x(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_s32_x))) svint32_t svmul_s32_x(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_s64_x))) svint64_t svmul_s64_x(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_s16_x))) svint16_t svmul_s16_x(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_u8_z))) svuint8_t svmul_u8_z(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_u32_z))) svuint32_t svmul_u32_z(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_u64_z))) svuint64_t svmul_u64_z(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_u16_z))) svuint16_t svmul_u16_z(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_s8_z))) svint8_t svmul_s8_z(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_s32_z))) svint32_t svmul_s32_z(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_s64_z))) svint64_t svmul_s64_z(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_s16_z))) svint16_t svmul_s16_z(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_lane_f64))) svfloat64_t svmul_lane_f64(svfloat64_t, svfloat64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_lane_f32))) svfloat32_t svmul_lane_f32(svfloat32_t, svfloat32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_lane_f16))) svfloat16_t svmul_lane_f16(svfloat16_t, svfloat16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_s8_m))) svint8_t svmulh_n_s8_m(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_s32_m))) svint32_t svmulh_n_s32_m(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_s64_m))) svint64_t svmulh_n_s64_m(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_s16_m))) svint16_t svmulh_n_s16_m(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_s8_x))) svint8_t svmulh_n_s8_x(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_s32_x))) svint32_t svmulh_n_s32_x(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_s64_x))) svint64_t svmulh_n_s64_x(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_s16_x))) svint16_t svmulh_n_s16_x(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_s8_z))) svint8_t svmulh_n_s8_z(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_s32_z))) svint32_t svmulh_n_s32_z(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_s64_z))) svint64_t svmulh_n_s64_z(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_s16_z))) svint16_t svmulh_n_s16_z(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_u8_m))) svuint8_t svmulh_n_u8_m(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_u32_m))) svuint32_t svmulh_n_u32_m(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_u64_m))) svuint64_t svmulh_n_u64_m(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_u16_m))) svuint16_t svmulh_n_u16_m(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_u8_x))) svuint8_t svmulh_n_u8_x(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_u32_x))) svuint32_t svmulh_n_u32_x(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_u64_x))) svuint64_t svmulh_n_u64_x(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_u16_x))) svuint16_t svmulh_n_u16_x(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_u8_z))) svuint8_t svmulh_n_u8_z(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_u32_z))) svuint32_t svmulh_n_u32_z(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_u64_z))) svuint64_t svmulh_n_u64_z(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_u16_z))) svuint16_t svmulh_n_u16_z(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_s8_m))) svint8_t svmulh_s8_m(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_s32_m))) svint32_t svmulh_s32_m(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_s64_m))) svint64_t svmulh_s64_m(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_s16_m))) svint16_t svmulh_s16_m(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_s8_x))) svint8_t svmulh_s8_x(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_s32_x))) svint32_t svmulh_s32_x(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_s64_x))) svint64_t svmulh_s64_x(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_s16_x))) svint16_t svmulh_s16_x(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_s8_z))) svint8_t svmulh_s8_z(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_s32_z))) svint32_t svmulh_s32_z(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_s64_z))) svint64_t svmulh_s64_z(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_s16_z))) svint16_t svmulh_s16_z(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_u8_m))) svuint8_t svmulh_u8_m(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_u32_m))) svuint32_t svmulh_u32_m(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_u64_m))) svuint64_t svmulh_u64_m(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_u16_m))) svuint16_t svmulh_u16_m(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_u8_x))) svuint8_t svmulh_u8_x(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_u32_x))) svuint32_t svmulh_u32_x(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_u64_x))) svuint64_t svmulh_u64_x(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_u16_x))) svuint16_t svmulh_u16_x(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_u8_z))) svuint8_t svmulh_u8_z(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_u32_z))) svuint32_t svmulh_u32_z(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_u64_z))) svuint64_t svmulh_u64_z(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_u16_z))) svuint16_t svmulh_u16_z(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulx_n_f64_m))) svfloat64_t svmulx_n_f64_m(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulx_n_f32_m))) svfloat32_t svmulx_n_f32_m(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulx_n_f16_m))) svfloat16_t svmulx_n_f16_m(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulx_n_f64_x))) svfloat64_t svmulx_n_f64_x(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulx_n_f32_x))) svfloat32_t svmulx_n_f32_x(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulx_n_f16_x))) svfloat16_t svmulx_n_f16_x(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulx_n_f64_z))) svfloat64_t svmulx_n_f64_z(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulx_n_f32_z))) svfloat32_t svmulx_n_f32_z(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulx_n_f16_z))) svfloat16_t svmulx_n_f16_z(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulx_f64_m))) svfloat64_t svmulx_f64_m(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulx_f32_m))) svfloat32_t svmulx_f32_m(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulx_f16_m))) svfloat16_t svmulx_f16_m(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulx_f64_x))) svfloat64_t svmulx_f64_x(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulx_f32_x))) svfloat32_t svmulx_f32_x(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulx_f16_x))) svfloat16_t svmulx_f16_x(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulx_f64_z))) svfloat64_t svmulx_f64_z(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulx_f32_z))) svfloat32_t svmulx_f32_z(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulx_f16_z))) svfloat16_t svmulx_f16_z(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnand_b_z))) svbool_t svnand_b_z(svbool_t, svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_f64_m))) svfloat64_t svneg_f64_m(svfloat64_t, svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_f32_m))) svfloat32_t svneg_f32_m(svfloat32_t, svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_f16_m))) svfloat16_t svneg_f16_m(svfloat16_t, svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_f64_x))) svfloat64_t svneg_f64_x(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_f32_x))) svfloat32_t svneg_f32_x(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_f16_x))) svfloat16_t svneg_f16_x(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_f64_z))) svfloat64_t svneg_f64_z(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_f32_z))) svfloat32_t svneg_f32_z(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_f16_z))) svfloat16_t svneg_f16_z(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_s8_m))) svint8_t svneg_s8_m(svint8_t, svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_s32_m))) svint32_t svneg_s32_m(svint32_t, svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_s64_m))) svint64_t svneg_s64_m(svint64_t, svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_s16_m))) svint16_t svneg_s16_m(svint16_t, svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_s8_x))) svint8_t svneg_s8_x(svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_s32_x))) svint32_t svneg_s32_x(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_s64_x))) svint64_t svneg_s64_x(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_s16_x))) svint16_t svneg_s16_x(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_s8_z))) svint8_t svneg_s8_z(svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_s32_z))) svint32_t svneg_s32_z(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_s64_z))) svint64_t svneg_s64_z(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_s16_z))) svint16_t svneg_s16_z(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmad_n_f64_m))) svfloat64_t svnmad_n_f64_m(svbool_t, svfloat64_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmad_n_f32_m))) svfloat32_t svnmad_n_f32_m(svbool_t, svfloat32_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmad_n_f16_m))) svfloat16_t svnmad_n_f16_m(svbool_t, svfloat16_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmad_n_f64_x))) svfloat64_t svnmad_n_f64_x(svbool_t, svfloat64_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmad_n_f32_x))) svfloat32_t svnmad_n_f32_x(svbool_t, svfloat32_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmad_n_f16_x))) svfloat16_t svnmad_n_f16_x(svbool_t, svfloat16_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmad_n_f64_z))) svfloat64_t svnmad_n_f64_z(svbool_t, svfloat64_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmad_n_f32_z))) svfloat32_t svnmad_n_f32_z(svbool_t, svfloat32_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmad_n_f16_z))) svfloat16_t svnmad_n_f16_z(svbool_t, svfloat16_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmad_f64_m))) svfloat64_t svnmad_f64_m(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmad_f32_m))) svfloat32_t svnmad_f32_m(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmad_f16_m))) svfloat16_t svnmad_f16_m(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmad_f64_x))) svfloat64_t svnmad_f64_x(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmad_f32_x))) svfloat32_t svnmad_f32_x(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmad_f16_x))) svfloat16_t svnmad_f16_x(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmad_f64_z))) svfloat64_t svnmad_f64_z(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmad_f32_z))) svfloat32_t svnmad_f32_z(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmad_f16_z))) svfloat16_t svnmad_f16_z(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmla_n_f64_m))) svfloat64_t svnmla_n_f64_m(svbool_t, svfloat64_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmla_n_f32_m))) svfloat32_t svnmla_n_f32_m(svbool_t, svfloat32_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmla_n_f16_m))) svfloat16_t svnmla_n_f16_m(svbool_t, svfloat16_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmla_n_f64_x))) svfloat64_t svnmla_n_f64_x(svbool_t, svfloat64_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmla_n_f32_x))) svfloat32_t svnmla_n_f32_x(svbool_t, svfloat32_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmla_n_f16_x))) svfloat16_t svnmla_n_f16_x(svbool_t, svfloat16_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmla_n_f64_z))) svfloat64_t svnmla_n_f64_z(svbool_t, svfloat64_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmla_n_f32_z))) svfloat32_t svnmla_n_f32_z(svbool_t, svfloat32_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmla_n_f16_z))) svfloat16_t svnmla_n_f16_z(svbool_t, svfloat16_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmla_f64_m))) svfloat64_t svnmla_f64_m(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmla_f32_m))) svfloat32_t svnmla_f32_m(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmla_f16_m))) svfloat16_t svnmla_f16_m(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmla_f64_x))) svfloat64_t svnmla_f64_x(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmla_f32_x))) svfloat32_t svnmla_f32_x(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmla_f16_x))) svfloat16_t svnmla_f16_x(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmla_f64_z))) svfloat64_t svnmla_f64_z(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmla_f32_z))) svfloat32_t svnmla_f32_z(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmla_f16_z))) svfloat16_t svnmla_f16_z(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmls_n_f64_m))) svfloat64_t svnmls_n_f64_m(svbool_t, svfloat64_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmls_n_f32_m))) svfloat32_t svnmls_n_f32_m(svbool_t, svfloat32_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmls_n_f16_m))) svfloat16_t svnmls_n_f16_m(svbool_t, svfloat16_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmls_n_f64_x))) svfloat64_t svnmls_n_f64_x(svbool_t, svfloat64_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmls_n_f32_x))) svfloat32_t svnmls_n_f32_x(svbool_t, svfloat32_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmls_n_f16_x))) svfloat16_t svnmls_n_f16_x(svbool_t, svfloat16_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmls_n_f64_z))) svfloat64_t svnmls_n_f64_z(svbool_t, svfloat64_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmls_n_f32_z))) svfloat32_t svnmls_n_f32_z(svbool_t, svfloat32_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmls_n_f16_z))) svfloat16_t svnmls_n_f16_z(svbool_t, svfloat16_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmls_f64_m))) svfloat64_t svnmls_f64_m(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmls_f32_m))) svfloat32_t svnmls_f32_m(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmls_f16_m))) svfloat16_t svnmls_f16_m(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmls_f64_x))) svfloat64_t svnmls_f64_x(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmls_f32_x))) svfloat32_t svnmls_f32_x(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmls_f16_x))) svfloat16_t svnmls_f16_x(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmls_f64_z))) svfloat64_t svnmls_f64_z(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmls_f32_z))) svfloat32_t svnmls_f32_z(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmls_f16_z))) svfloat16_t svnmls_f16_z(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmsb_n_f64_m))) svfloat64_t svnmsb_n_f64_m(svbool_t, svfloat64_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmsb_n_f32_m))) svfloat32_t svnmsb_n_f32_m(svbool_t, svfloat32_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmsb_n_f16_m))) svfloat16_t svnmsb_n_f16_m(svbool_t, svfloat16_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmsb_n_f64_x))) svfloat64_t svnmsb_n_f64_x(svbool_t, svfloat64_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmsb_n_f32_x))) svfloat32_t svnmsb_n_f32_x(svbool_t, svfloat32_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmsb_n_f16_x))) svfloat16_t svnmsb_n_f16_x(svbool_t, svfloat16_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmsb_n_f64_z))) svfloat64_t svnmsb_n_f64_z(svbool_t, svfloat64_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmsb_n_f32_z))) svfloat32_t svnmsb_n_f32_z(svbool_t, svfloat32_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmsb_n_f16_z))) svfloat16_t svnmsb_n_f16_z(svbool_t, svfloat16_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmsb_f64_m))) svfloat64_t svnmsb_f64_m(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmsb_f32_m))) svfloat32_t svnmsb_f32_m(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmsb_f16_m))) svfloat16_t svnmsb_f16_m(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmsb_f64_x))) svfloat64_t svnmsb_f64_x(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmsb_f32_x))) svfloat32_t svnmsb_f32_x(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmsb_f16_x))) svfloat16_t svnmsb_f16_x(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmsb_f64_z))) svfloat64_t svnmsb_f64_z(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmsb_f32_z))) svfloat32_t svnmsb_f32_z(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmsb_f16_z))) svfloat16_t svnmsb_f16_z(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnor_b_z))) svbool_t svnor_b_z(svbool_t, svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_b_z))) svbool_t svnot_b_z(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_u8_m))) svuint8_t svnot_u8_m(svuint8_t, svbool_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_u32_m))) svuint32_t svnot_u32_m(svuint32_t, svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_u64_m))) svuint64_t svnot_u64_m(svuint64_t, svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_u16_m))) svuint16_t svnot_u16_m(svuint16_t, svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_s8_m))) svint8_t svnot_s8_m(svint8_t, svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_s32_m))) svint32_t svnot_s32_m(svint32_t, svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_s64_m))) svint64_t svnot_s64_m(svint64_t, svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_s16_m))) svint16_t svnot_s16_m(svint16_t, svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_u8_x))) svuint8_t svnot_u8_x(svbool_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_u32_x))) svuint32_t svnot_u32_x(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_u64_x))) svuint64_t svnot_u64_x(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_u16_x))) svuint16_t svnot_u16_x(svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_s8_x))) svint8_t svnot_s8_x(svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_s32_x))) svint32_t svnot_s32_x(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_s64_x))) svint64_t svnot_s64_x(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_s16_x))) svint16_t svnot_s16_x(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_u8_z))) svuint8_t svnot_u8_z(svbool_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_u32_z))) svuint32_t svnot_u32_z(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_u64_z))) svuint64_t svnot_u64_z(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_u16_z))) svuint16_t svnot_u16_z(svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_s8_z))) svint8_t svnot_s8_z(svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_s32_z))) svint32_t svnot_s32_z(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_s64_z))) svint64_t svnot_s64_z(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_s16_z))) svint16_t svnot_s16_z(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorn_b_z))) svbool_t svorn_b_z(svbool_t, svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_b_z))) svbool_t svorr_b_z(svbool_t, svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_u8_m))) svuint8_t svorr_n_u8_m(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_u32_m))) svuint32_t svorr_n_u32_m(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_u64_m))) svuint64_t svorr_n_u64_m(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_u16_m))) svuint16_t svorr_n_u16_m(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_s8_m))) svint8_t svorr_n_s8_m(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_s32_m))) svint32_t svorr_n_s32_m(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_s64_m))) svint64_t svorr_n_s64_m(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_s16_m))) svint16_t svorr_n_s16_m(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_u8_x))) svuint8_t svorr_n_u8_x(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_u32_x))) svuint32_t svorr_n_u32_x(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_u64_x))) svuint64_t svorr_n_u64_x(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_u16_x))) svuint16_t svorr_n_u16_x(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_s8_x))) svint8_t svorr_n_s8_x(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_s32_x))) svint32_t svorr_n_s32_x(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_s64_x))) svint64_t svorr_n_s64_x(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_s16_x))) svint16_t svorr_n_s16_x(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_u8_z))) svuint8_t svorr_n_u8_z(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_u32_z))) svuint32_t svorr_n_u32_z(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_u64_z))) svuint64_t svorr_n_u64_z(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_u16_z))) svuint16_t svorr_n_u16_z(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_s8_z))) svint8_t svorr_n_s8_z(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_s32_z))) svint32_t svorr_n_s32_z(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_s64_z))) svint64_t svorr_n_s64_z(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_s16_z))) svint16_t svorr_n_s16_z(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_u8_m))) svuint8_t svorr_u8_m(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_u32_m))) svuint32_t svorr_u32_m(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_u64_m))) svuint64_t svorr_u64_m(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_u16_m))) svuint16_t svorr_u16_m(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_s8_m))) svint8_t svorr_s8_m(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_s32_m))) svint32_t svorr_s32_m(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_s64_m))) svint64_t svorr_s64_m(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_s16_m))) svint16_t svorr_s16_m(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_u8_x))) svuint8_t svorr_u8_x(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_u32_x))) svuint32_t svorr_u32_x(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_u64_x))) svuint64_t svorr_u64_x(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_u16_x))) svuint16_t svorr_u16_x(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_s8_x))) svint8_t svorr_s8_x(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_s32_x))) svint32_t svorr_s32_x(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_s64_x))) svint64_t svorr_s64_x(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_s16_x))) svint16_t svorr_s16_x(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_u8_z))) svuint8_t svorr_u8_z(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_u32_z))) svuint32_t svorr_u32_z(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_u64_z))) svuint64_t svorr_u64_z(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_u16_z))) svuint16_t svorr_u16_z(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_s8_z))) svint8_t svorr_s8_z(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_s32_z))) svint32_t svorr_s32_z(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_s64_z))) svint64_t svorr_s64_z(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_s16_z))) svint16_t svorr_s16_z(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorv_u8))) uint8_t svorv_u8(svbool_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorv_u32))) uint32_t svorv_u32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorv_u64))) uint64_t svorv_u64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorv_u16))) uint16_t svorv_u16(svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorv_s8))) int8_t svorv_s8(svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorv_s32))) int32_t svorv_s32(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorv_s64))) int64_t svorv_s64(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorv_s16))) int16_t svorv_s16(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpfalse_b))) svbool_t svpfalse_b(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpfirst_b))) svbool_t svpfirst_b(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpnext_b8))) svbool_t svpnext_b8(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpnext_b32))) svbool_t svpnext_b32(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpnext_b64))) svbool_t svpnext_b64(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpnext_b16))) svbool_t svpnext_b16(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfb))) void svprfb(svbool_t, void const *, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfb_gather_u32base))) void svprfb_gather_u32base(svbool_t, svuint32_t, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfb_gather_u64base))) void svprfb_gather_u64base(svbool_t, svuint64_t, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfb_gather_u32base_offset))) void svprfb_gather_u32base_offset(svbool_t, svuint32_t, int64_t, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfb_gather_u64base_offset))) void svprfb_gather_u64base_offset(svbool_t, svuint64_t, int64_t, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfb_gather_s32offset))) void svprfb_gather_s32offset(svbool_t, void const *, svint32_t, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfb_gather_u32offset))) void svprfb_gather_u32offset(svbool_t, void const *, svuint32_t, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfb_gather_s64offset))) void svprfb_gather_s64offset(svbool_t, void const *, svint64_t, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfb_gather_u64offset))) void svprfb_gather_u64offset(svbool_t, void const *, svuint64_t, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfb_vnum))) void svprfb_vnum(svbool_t, void const *, int64_t, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfd))) void svprfd(svbool_t, void const *, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfd_gather_u32base))) void svprfd_gather_u32base(svbool_t, svuint32_t, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfd_gather_u64base))) void svprfd_gather_u64base(svbool_t, svuint64_t, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfd_gather_u32base_index))) void svprfd_gather_u32base_index(svbool_t, svuint32_t, int64_t, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfd_gather_u64base_index))) void svprfd_gather_u64base_index(svbool_t, svuint64_t, int64_t, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfd_gather_s32index))) void svprfd_gather_s32index(svbool_t, void const *, svint32_t, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfd_gather_u32index))) void svprfd_gather_u32index(svbool_t, void const *, svuint32_t, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfd_gather_s64index))) void svprfd_gather_s64index(svbool_t, void const *, svint64_t, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfd_gather_u64index))) void svprfd_gather_u64index(svbool_t, void const *, svuint64_t, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfd_vnum))) void svprfd_vnum(svbool_t, void const *, int64_t, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfh))) void svprfh(svbool_t, void const *, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfh_gather_u32base))) void svprfh_gather_u32base(svbool_t, svuint32_t, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfh_gather_u64base))) void svprfh_gather_u64base(svbool_t, svuint64_t, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfh_gather_u32base_index))) void svprfh_gather_u32base_index(svbool_t, svuint32_t, int64_t, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfh_gather_u64base_index))) void svprfh_gather_u64base_index(svbool_t, svuint64_t, int64_t, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfh_gather_s32index))) void svprfh_gather_s32index(svbool_t, void const *, svint32_t, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfh_gather_u32index))) void svprfh_gather_u32index(svbool_t, void const *, svuint32_t, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfh_gather_s64index))) void svprfh_gather_s64index(svbool_t, void const *, svint64_t, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfh_gather_u64index))) void svprfh_gather_u64index(svbool_t, void const *, svuint64_t, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfh_vnum))) void svprfh_vnum(svbool_t, void const *, int64_t, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfw))) void svprfw(svbool_t, void const *, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfw_gather_u32base))) void svprfw_gather_u32base(svbool_t, svuint32_t, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfw_gather_u64base))) void svprfw_gather_u64base(svbool_t, svuint64_t, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfw_gather_u32base_index))) void svprfw_gather_u32base_index(svbool_t, svuint32_t, int64_t, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfw_gather_u64base_index))) void svprfw_gather_u64base_index(svbool_t, svuint64_t, int64_t, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfw_gather_s32index))) void svprfw_gather_s32index(svbool_t, void const *, svint32_t, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfw_gather_u32index))) void svprfw_gather_u32index(svbool_t, void const *, svuint32_t, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfw_gather_s64index))) void svprfw_gather_s64index(svbool_t, void const *, svint64_t, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfw_gather_u64index))) void svprfw_gather_u64index(svbool_t, void const *, svuint64_t, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfw_vnum))) void svprfw_vnum(svbool_t, void const *, int64_t, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svptest_any))) bool svptest_any(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svptest_first))) bool svptest_first(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svptest_last))) bool svptest_last(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svptrue_pat_b8))) svbool_t svptrue_pat_b8(enum svpattern); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svptrue_pat_b32))) svbool_t svptrue_pat_b32(enum svpattern); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svptrue_pat_b64))) svbool_t svptrue_pat_b64(enum svpattern); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svptrue_pat_b16))) svbool_t svptrue_pat_b16(enum svpattern); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svptrue_b8))) svbool_t svptrue_b8(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svptrue_b32))) svbool_t svptrue_b32(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svptrue_b64))) svbool_t svptrue_b64(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svptrue_b16))) svbool_t svptrue_b16(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_s8))) svint8_t svqadd_n_s8(svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_s32))) svint32_t svqadd_n_s32(svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_s64))) svint64_t svqadd_n_s64(svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_s16))) svint16_t svqadd_n_s16(svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_u8))) svuint8_t svqadd_n_u8(svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_u32))) svuint32_t svqadd_n_u32(svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_u64))) svuint64_t svqadd_n_u64(svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_u16))) svuint16_t svqadd_n_u16(svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_s8))) svint8_t svqadd_s8(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_s32))) svint32_t svqadd_s32(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_s64))) svint64_t svqadd_s64(svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_s16))) svint16_t svqadd_s16(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_u8))) svuint8_t svqadd_u8(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_u32))) svuint32_t svqadd_u32(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_u64))) svuint64_t svqadd_u64(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_u16))) svuint16_t svqadd_u16(svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecb_n_s32))) int32_t svqdecb_n_s32(int32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecb_n_s64))) int64_t svqdecb_n_s64(int64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecb_n_u32))) uint32_t svqdecb_n_u32(uint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecb_n_u64))) uint64_t svqdecb_n_u64(uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecb_pat_n_s32))) int32_t svqdecb_pat_n_s32(int32_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecb_pat_n_s64))) int64_t svqdecb_pat_n_s64(int64_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecb_pat_n_u32))) uint32_t svqdecb_pat_n_u32(uint32_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecb_pat_n_u64))) uint64_t svqdecb_pat_n_u64(uint64_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecd_n_s32))) int32_t svqdecd_n_s32(int32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecd_n_s64))) int64_t svqdecd_n_s64(int64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecd_n_u32))) uint32_t svqdecd_n_u32(uint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecd_n_u64))) uint64_t svqdecd_n_u64(uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecd_s64))) svint64_t svqdecd_s64(svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecd_u64))) svuint64_t svqdecd_u64(svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecd_pat_n_s32))) int32_t svqdecd_pat_n_s32(int32_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecd_pat_n_s64))) int64_t svqdecd_pat_n_s64(int64_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecd_pat_n_u32))) uint32_t svqdecd_pat_n_u32(uint32_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecd_pat_n_u64))) uint64_t svqdecd_pat_n_u64(uint64_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecd_pat_s64))) svint64_t svqdecd_pat_s64(svint64_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecd_pat_u64))) svuint64_t svqdecd_pat_u64(svuint64_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdech_n_s32))) int32_t svqdech_n_s32(int32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdech_n_s64))) int64_t svqdech_n_s64(int64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdech_n_u32))) uint32_t svqdech_n_u32(uint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdech_n_u64))) uint64_t svqdech_n_u64(uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdech_s16))) svint16_t svqdech_s16(svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdech_u16))) svuint16_t svqdech_u16(svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdech_pat_n_s32))) int32_t svqdech_pat_n_s32(int32_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdech_pat_n_s64))) int64_t svqdech_pat_n_s64(int64_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdech_pat_n_u32))) uint32_t svqdech_pat_n_u32(uint32_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdech_pat_n_u64))) uint64_t svqdech_pat_n_u64(uint64_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdech_pat_s16))) svint16_t svqdech_pat_s16(svint16_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdech_pat_u16))) svuint16_t svqdech_pat_u16(svuint16_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_n_s32_b8))) int32_t svqdecp_n_s32_b8(int32_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_n_s32_b32))) int32_t svqdecp_n_s32_b32(int32_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_n_s32_b64))) int32_t svqdecp_n_s32_b64(int32_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_n_s32_b16))) int32_t svqdecp_n_s32_b16(int32_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_n_s64_b8))) int64_t svqdecp_n_s64_b8(int64_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_n_s64_b32))) int64_t svqdecp_n_s64_b32(int64_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_n_s64_b64))) int64_t svqdecp_n_s64_b64(int64_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_n_s64_b16))) int64_t svqdecp_n_s64_b16(int64_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_n_u32_b8))) uint32_t svqdecp_n_u32_b8(uint32_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_n_u32_b32))) uint32_t svqdecp_n_u32_b32(uint32_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_n_u32_b64))) uint32_t svqdecp_n_u32_b64(uint32_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_n_u32_b16))) uint32_t svqdecp_n_u32_b16(uint32_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_n_u64_b8))) uint64_t svqdecp_n_u64_b8(uint64_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_n_u64_b32))) uint64_t svqdecp_n_u64_b32(uint64_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_n_u64_b64))) uint64_t svqdecp_n_u64_b64(uint64_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_n_u64_b16))) uint64_t svqdecp_n_u64_b16(uint64_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_s32))) svint32_t svqdecp_s32(svint32_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_s64))) svint64_t svqdecp_s64(svint64_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_s16))) svint16_t svqdecp_s16(svint16_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_u32))) svuint32_t svqdecp_u32(svuint32_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_u64))) svuint64_t svqdecp_u64(svuint64_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_u16))) svuint16_t svqdecp_u16(svuint16_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecw_n_s32))) int32_t svqdecw_n_s32(int32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecw_n_s64))) int64_t svqdecw_n_s64(int64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecw_n_u32))) uint32_t svqdecw_n_u32(uint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecw_n_u64))) uint64_t svqdecw_n_u64(uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecw_s32))) svint32_t svqdecw_s32(svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecw_u32))) svuint32_t svqdecw_u32(svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecw_pat_n_s32))) int32_t svqdecw_pat_n_s32(int32_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecw_pat_n_s64))) int64_t svqdecw_pat_n_s64(int64_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecw_pat_n_u32))) uint32_t svqdecw_pat_n_u32(uint32_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecw_pat_n_u64))) uint64_t svqdecw_pat_n_u64(uint64_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecw_pat_s32))) svint32_t svqdecw_pat_s32(svint32_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecw_pat_u32))) svuint32_t svqdecw_pat_u32(svuint32_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincb_n_s32))) int32_t svqincb_n_s32(int32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincb_n_s64))) int64_t svqincb_n_s64(int64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincb_n_u32))) uint32_t svqincb_n_u32(uint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincb_n_u64))) uint64_t svqincb_n_u64(uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincb_pat_n_s32))) int32_t svqincb_pat_n_s32(int32_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincb_pat_n_s64))) int64_t svqincb_pat_n_s64(int64_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincb_pat_n_u32))) uint32_t svqincb_pat_n_u32(uint32_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincb_pat_n_u64))) uint64_t svqincb_pat_n_u64(uint64_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincd_n_s32))) int32_t svqincd_n_s32(int32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincd_n_s64))) int64_t svqincd_n_s64(int64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincd_n_u32))) uint32_t svqincd_n_u32(uint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincd_n_u64))) uint64_t svqincd_n_u64(uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincd_s64))) svint64_t svqincd_s64(svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincd_u64))) svuint64_t svqincd_u64(svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincd_pat_n_s32))) int32_t svqincd_pat_n_s32(int32_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincd_pat_n_s64))) int64_t svqincd_pat_n_s64(int64_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincd_pat_n_u32))) uint32_t svqincd_pat_n_u32(uint32_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincd_pat_n_u64))) uint64_t svqincd_pat_n_u64(uint64_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincd_pat_s64))) svint64_t svqincd_pat_s64(svint64_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincd_pat_u64))) svuint64_t svqincd_pat_u64(svuint64_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqinch_n_s32))) int32_t svqinch_n_s32(int32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqinch_n_s64))) int64_t svqinch_n_s64(int64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqinch_n_u32))) uint32_t svqinch_n_u32(uint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqinch_n_u64))) uint64_t svqinch_n_u64(uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqinch_s16))) svint16_t svqinch_s16(svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqinch_u16))) svuint16_t svqinch_u16(svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqinch_pat_n_s32))) int32_t svqinch_pat_n_s32(int32_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqinch_pat_n_s64))) int64_t svqinch_pat_n_s64(int64_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqinch_pat_n_u32))) uint32_t svqinch_pat_n_u32(uint32_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqinch_pat_n_u64))) uint64_t svqinch_pat_n_u64(uint64_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqinch_pat_s16))) svint16_t svqinch_pat_s16(svint16_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqinch_pat_u16))) svuint16_t svqinch_pat_u16(svuint16_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_n_s32_b8))) int32_t svqincp_n_s32_b8(int32_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_n_s32_b32))) int32_t svqincp_n_s32_b32(int32_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_n_s32_b64))) int32_t svqincp_n_s32_b64(int32_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_n_s32_b16))) int32_t svqincp_n_s32_b16(int32_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_n_s64_b8))) int64_t svqincp_n_s64_b8(int64_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_n_s64_b32))) int64_t svqincp_n_s64_b32(int64_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_n_s64_b64))) int64_t svqincp_n_s64_b64(int64_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_n_s64_b16))) int64_t svqincp_n_s64_b16(int64_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_n_u32_b8))) uint32_t svqincp_n_u32_b8(uint32_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_n_u32_b32))) uint32_t svqincp_n_u32_b32(uint32_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_n_u32_b64))) uint32_t svqincp_n_u32_b64(uint32_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_n_u32_b16))) uint32_t svqincp_n_u32_b16(uint32_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_n_u64_b8))) uint64_t svqincp_n_u64_b8(uint64_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_n_u64_b32))) uint64_t svqincp_n_u64_b32(uint64_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_n_u64_b64))) uint64_t svqincp_n_u64_b64(uint64_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_n_u64_b16))) uint64_t svqincp_n_u64_b16(uint64_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_s32))) svint32_t svqincp_s32(svint32_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_s64))) svint64_t svqincp_s64(svint64_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_s16))) svint16_t svqincp_s16(svint16_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_u32))) svuint32_t svqincp_u32(svuint32_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_u64))) svuint64_t svqincp_u64(svuint64_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_u16))) svuint16_t svqincp_u16(svuint16_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincw_n_s32))) int32_t svqincw_n_s32(int32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincw_n_s64))) int64_t svqincw_n_s64(int64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincw_n_u32))) uint32_t svqincw_n_u32(uint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincw_n_u64))) uint64_t svqincw_n_u64(uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincw_s32))) svint32_t svqincw_s32(svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincw_u32))) svuint32_t svqincw_u32(svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincw_pat_n_s32))) int32_t svqincw_pat_n_s32(int32_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincw_pat_n_s64))) int64_t svqincw_pat_n_s64(int64_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincw_pat_n_u32))) uint32_t svqincw_pat_n_u32(uint32_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincw_pat_n_u64))) uint64_t svqincw_pat_n_u64(uint64_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincw_pat_s32))) svint32_t svqincw_pat_s32(svint32_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincw_pat_u32))) svuint32_t svqincw_pat_u32(svuint32_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_s8))) svint8_t svqsub_n_s8(svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_s32))) svint32_t svqsub_n_s32(svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_s64))) svint64_t svqsub_n_s64(svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_s16))) svint16_t svqsub_n_s16(svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_u8))) svuint8_t svqsub_n_u8(svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_u32))) svuint32_t svqsub_n_u32(svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_u64))) svuint64_t svqsub_n_u64(svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_u16))) svuint16_t svqsub_n_u16(svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_s8))) svint8_t svqsub_s8(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_s32))) svint32_t svqsub_s32(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_s64))) svint64_t svqsub_s64(svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_s16))) svint16_t svqsub_s16(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_u8))) svuint8_t svqsub_u8(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_u32))) svuint32_t svqsub_u32(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_u64))) svuint64_t svqsub_u64(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_u16))) svuint16_t svqsub_u16(svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_u8_m))) svuint8_t svrbit_u8_m(svuint8_t, svbool_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_u32_m))) svuint32_t svrbit_u32_m(svuint32_t, svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_u64_m))) svuint64_t svrbit_u64_m(svuint64_t, svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_u16_m))) svuint16_t svrbit_u16_m(svuint16_t, svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_s8_m))) svint8_t svrbit_s8_m(svint8_t, svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_s32_m))) svint32_t svrbit_s32_m(svint32_t, svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_s64_m))) svint64_t svrbit_s64_m(svint64_t, svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_s16_m))) svint16_t svrbit_s16_m(svint16_t, svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_u8_x))) svuint8_t svrbit_u8_x(svbool_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_u32_x))) svuint32_t svrbit_u32_x(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_u64_x))) svuint64_t svrbit_u64_x(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_u16_x))) svuint16_t svrbit_u16_x(svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_s8_x))) svint8_t svrbit_s8_x(svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_s32_x))) svint32_t svrbit_s32_x(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_s64_x))) svint64_t svrbit_s64_x(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_s16_x))) svint16_t svrbit_s16_x(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_u8_z))) svuint8_t svrbit_u8_z(svbool_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_u32_z))) svuint32_t svrbit_u32_z(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_u64_z))) svuint64_t svrbit_u64_z(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_u16_z))) svuint16_t svrbit_u16_z(svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_s8_z))) svint8_t svrbit_s8_z(svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_s32_z))) svint32_t svrbit_s32_z(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_s64_z))) svint64_t svrbit_s64_z(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_s16_z))) svint16_t svrbit_s16_z(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrdffr))) svbool_t svrdffr(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrdffr_z))) svbool_t svrdffr_z(svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrecpe_f64))) svfloat64_t svrecpe_f64(svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrecpe_f32))) svfloat32_t svrecpe_f32(svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrecpe_f16))) svfloat16_t svrecpe_f16(svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrecps_f64))) svfloat64_t svrecps_f64(svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrecps_f32))) svfloat32_t svrecps_f32(svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrecps_f16))) svfloat16_t svrecps_f16(svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrecpx_f64_m))) svfloat64_t svrecpx_f64_m(svfloat64_t, svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrecpx_f32_m))) svfloat32_t svrecpx_f32_m(svfloat32_t, svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrecpx_f16_m))) svfloat16_t svrecpx_f16_m(svfloat16_t, svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrecpx_f64_x))) svfloat64_t svrecpx_f64_x(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrecpx_f32_x))) svfloat32_t svrecpx_f32_x(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrecpx_f16_x))) svfloat16_t svrecpx_f16_x(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrecpx_f64_z))) svfloat64_t svrecpx_f64_z(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrecpx_f32_z))) svfloat32_t svrecpx_f32_z(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrecpx_f16_z))) svfloat16_t svrecpx_f16_z(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrev_u8))) svuint8_t svrev_u8(svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrev_u32))) svuint32_t svrev_u32(svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrev_u64))) svuint64_t svrev_u64(svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrev_u16))) svuint16_t svrev_u16(svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrev_s8))) svint8_t svrev_s8(svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrev_f64))) svfloat64_t svrev_f64(svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrev_f32))) svfloat32_t svrev_f32(svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrev_f16))) svfloat16_t svrev_f16(svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrev_s32))) svint32_t svrev_s32(svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrev_s64))) svint64_t svrev_s64(svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrev_s16))) svint16_t svrev_s16(svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrev_b16))) svbool_t svrev_b16(svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrev_b32))) svbool_t svrev_b32(svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrev_b64))) svbool_t svrev_b64(svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrev_b8))) svbool_t svrev_b8(svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevb_u32_m))) svuint32_t svrevb_u32_m(svuint32_t, svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevb_u64_m))) svuint64_t svrevb_u64_m(svuint64_t, svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevb_u16_m))) svuint16_t svrevb_u16_m(svuint16_t, svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevb_s32_m))) svint32_t svrevb_s32_m(svint32_t, svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevb_s64_m))) svint64_t svrevb_s64_m(svint64_t, svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevb_s16_m))) svint16_t svrevb_s16_m(svint16_t, svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevb_u32_x))) svuint32_t svrevb_u32_x(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevb_u64_x))) svuint64_t svrevb_u64_x(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevb_u16_x))) svuint16_t svrevb_u16_x(svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevb_s32_x))) svint32_t svrevb_s32_x(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevb_s64_x))) svint64_t svrevb_s64_x(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevb_s16_x))) svint16_t svrevb_s16_x(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevb_u32_z))) svuint32_t svrevb_u32_z(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevb_u64_z))) svuint64_t svrevb_u64_z(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevb_u16_z))) svuint16_t svrevb_u16_z(svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevb_s32_z))) svint32_t svrevb_s32_z(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevb_s64_z))) svint64_t svrevb_s64_z(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevb_s16_z))) svint16_t svrevb_s16_z(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevh_u32_m))) svuint32_t svrevh_u32_m(svuint32_t, svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevh_u64_m))) svuint64_t svrevh_u64_m(svuint64_t, svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevh_s32_m))) svint32_t svrevh_s32_m(svint32_t, svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevh_s64_m))) svint64_t svrevh_s64_m(svint64_t, svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevh_u32_x))) svuint32_t svrevh_u32_x(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevh_u64_x))) svuint64_t svrevh_u64_x(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevh_s32_x))) svint32_t svrevh_s32_x(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevh_s64_x))) svint64_t svrevh_s64_x(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevh_u32_z))) svuint32_t svrevh_u32_z(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevh_u64_z))) svuint64_t svrevh_u64_z(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevh_s32_z))) svint32_t svrevh_s32_z(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevh_s64_z))) svint64_t svrevh_s64_z(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevw_u64_m))) svuint64_t svrevw_u64_m(svuint64_t, svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevw_s64_m))) svint64_t svrevw_s64_m(svint64_t, svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevw_u64_x))) svuint64_t svrevw_u64_x(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevw_s64_x))) svint64_t svrevw_s64_x(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevw_u64_z))) svuint64_t svrevw_u64_z(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevw_s64_z))) svint64_t svrevw_s64_z(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinta_f64_m))) svfloat64_t svrinta_f64_m(svfloat64_t, svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinta_f32_m))) svfloat32_t svrinta_f32_m(svfloat32_t, svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinta_f16_m))) svfloat16_t svrinta_f16_m(svfloat16_t, svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinta_f64_x))) svfloat64_t svrinta_f64_x(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinta_f32_x))) svfloat32_t svrinta_f32_x(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinta_f16_x))) svfloat16_t svrinta_f16_x(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinta_f64_z))) svfloat64_t svrinta_f64_z(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinta_f32_z))) svfloat32_t svrinta_f32_z(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinta_f16_z))) svfloat16_t svrinta_f16_z(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinti_f64_m))) svfloat64_t svrinti_f64_m(svfloat64_t, svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinti_f32_m))) svfloat32_t svrinti_f32_m(svfloat32_t, svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinti_f16_m))) svfloat16_t svrinti_f16_m(svfloat16_t, svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinti_f64_x))) svfloat64_t svrinti_f64_x(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinti_f32_x))) svfloat32_t svrinti_f32_x(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinti_f16_x))) svfloat16_t svrinti_f16_x(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinti_f64_z))) svfloat64_t svrinti_f64_z(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinti_f32_z))) svfloat32_t svrinti_f32_z(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinti_f16_z))) svfloat16_t svrinti_f16_z(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintm_f64_m))) svfloat64_t svrintm_f64_m(svfloat64_t, svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintm_f32_m))) svfloat32_t svrintm_f32_m(svfloat32_t, svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintm_f16_m))) svfloat16_t svrintm_f16_m(svfloat16_t, svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintm_f64_x))) svfloat64_t svrintm_f64_x(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintm_f32_x))) svfloat32_t svrintm_f32_x(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintm_f16_x))) svfloat16_t svrintm_f16_x(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintm_f64_z))) svfloat64_t svrintm_f64_z(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintm_f32_z))) svfloat32_t svrintm_f32_z(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintm_f16_z))) svfloat16_t svrintm_f16_z(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintn_f64_m))) svfloat64_t svrintn_f64_m(svfloat64_t, svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintn_f32_m))) svfloat32_t svrintn_f32_m(svfloat32_t, svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintn_f16_m))) svfloat16_t svrintn_f16_m(svfloat16_t, svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintn_f64_x))) svfloat64_t svrintn_f64_x(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintn_f32_x))) svfloat32_t svrintn_f32_x(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintn_f16_x))) svfloat16_t svrintn_f16_x(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintn_f64_z))) svfloat64_t svrintn_f64_z(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintn_f32_z))) svfloat32_t svrintn_f32_z(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintn_f16_z))) svfloat16_t svrintn_f16_z(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintp_f64_m))) svfloat64_t svrintp_f64_m(svfloat64_t, svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintp_f32_m))) svfloat32_t svrintp_f32_m(svfloat32_t, svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintp_f16_m))) svfloat16_t svrintp_f16_m(svfloat16_t, svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintp_f64_x))) svfloat64_t svrintp_f64_x(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintp_f32_x))) svfloat32_t svrintp_f32_x(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintp_f16_x))) svfloat16_t svrintp_f16_x(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintp_f64_z))) svfloat64_t svrintp_f64_z(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintp_f32_z))) svfloat32_t svrintp_f32_z(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintp_f16_z))) svfloat16_t svrintp_f16_z(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintx_f64_m))) svfloat64_t svrintx_f64_m(svfloat64_t, svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintx_f32_m))) svfloat32_t svrintx_f32_m(svfloat32_t, svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintx_f16_m))) svfloat16_t svrintx_f16_m(svfloat16_t, svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintx_f64_x))) svfloat64_t svrintx_f64_x(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintx_f32_x))) svfloat32_t svrintx_f32_x(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintx_f16_x))) svfloat16_t svrintx_f16_x(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintx_f64_z))) svfloat64_t svrintx_f64_z(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintx_f32_z))) svfloat32_t svrintx_f32_z(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintx_f16_z))) svfloat16_t svrintx_f16_z(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintz_f64_m))) svfloat64_t svrintz_f64_m(svfloat64_t, svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintz_f32_m))) svfloat32_t svrintz_f32_m(svfloat32_t, svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintz_f16_m))) svfloat16_t svrintz_f16_m(svfloat16_t, svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintz_f64_x))) svfloat64_t svrintz_f64_x(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintz_f32_x))) svfloat32_t svrintz_f32_x(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintz_f16_x))) svfloat16_t svrintz_f16_x(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintz_f64_z))) svfloat64_t svrintz_f64_z(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintz_f32_z))) svfloat32_t svrintz_f32_z(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintz_f16_z))) svfloat16_t svrintz_f16_z(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsqrte_f64))) svfloat64_t svrsqrte_f64(svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsqrte_f32))) svfloat32_t svrsqrte_f32(svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsqrte_f16))) svfloat16_t svrsqrte_f16(svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsqrts_f64))) svfloat64_t svrsqrts_f64(svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsqrts_f32))) svfloat32_t svrsqrts_f32(svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsqrts_f16))) svfloat16_t svrsqrts_f16(svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_n_f64_m))) svfloat64_t svscale_n_f64_m(svbool_t, svfloat64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_n_f32_m))) svfloat32_t svscale_n_f32_m(svbool_t, svfloat32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_n_f16_m))) svfloat16_t svscale_n_f16_m(svbool_t, svfloat16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_n_f64_x))) svfloat64_t svscale_n_f64_x(svbool_t, svfloat64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_n_f32_x))) svfloat32_t svscale_n_f32_x(svbool_t, svfloat32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_n_f16_x))) svfloat16_t svscale_n_f16_x(svbool_t, svfloat16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_n_f64_z))) svfloat64_t svscale_n_f64_z(svbool_t, svfloat64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_n_f32_z))) svfloat32_t svscale_n_f32_z(svbool_t, svfloat32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_n_f16_z))) svfloat16_t svscale_n_f16_z(svbool_t, svfloat16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_f64_m))) svfloat64_t svscale_f64_m(svbool_t, svfloat64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_f32_m))) svfloat32_t svscale_f32_m(svbool_t, svfloat32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_f16_m))) svfloat16_t svscale_f16_m(svbool_t, svfloat16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_f64_x))) svfloat64_t svscale_f64_x(svbool_t, svfloat64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_f32_x))) svfloat32_t svscale_f32_x(svbool_t, svfloat32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_f16_x))) svfloat16_t svscale_f16_x(svbool_t, svfloat16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_f64_z))) svfloat64_t svscale_f64_z(svbool_t, svfloat64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_f32_z))) svfloat32_t svscale_f32_z(svbool_t, svfloat32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_f16_z))) svfloat16_t svscale_f16_z(svbool_t, svfloat16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_b))) svbool_t svsel_b(svbool_t, svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_u8))) svuint8_t svsel_u8(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_u32))) svuint32_t svsel_u32(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_u64))) svuint64_t svsel_u64(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_u16))) svuint16_t svsel_u16(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s8))) svint8_t svsel_s8(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_f64))) svfloat64_t svsel_f64(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_f32))) svfloat32_t svsel_f32(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_f16))) svfloat16_t svsel_f16(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s32))) svint32_t svsel_s32(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s64))) svint64_t svsel_s64(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s16))) svint16_t svsel_s16(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_u8))) svuint8x2_t svset2_u8(svuint8x2_t, uint64_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_u32))) svuint32x2_t svset2_u32(svuint32x2_t, uint64_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_u64))) svuint64x2_t svset2_u64(svuint64x2_t, uint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_u16))) svuint16x2_t svset2_u16(svuint16x2_t, uint64_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_s8))) svint8x2_t svset2_s8(svint8x2_t, uint64_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_f64))) svfloat64x2_t svset2_f64(svfloat64x2_t, uint64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_f32))) svfloat32x2_t svset2_f32(svfloat32x2_t, uint64_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_f16))) svfloat16x2_t svset2_f16(svfloat16x2_t, uint64_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_s32))) svint32x2_t svset2_s32(svint32x2_t, uint64_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_s64))) svint64x2_t svset2_s64(svint64x2_t, uint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_s16))) svint16x2_t svset2_s16(svint16x2_t, uint64_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset3_u8))) svuint8x3_t svset3_u8(svuint8x3_t, uint64_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset3_u32))) svuint32x3_t svset3_u32(svuint32x3_t, uint64_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset3_u64))) svuint64x3_t svset3_u64(svuint64x3_t, uint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset3_u16))) svuint16x3_t svset3_u16(svuint16x3_t, uint64_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset3_s8))) svint8x3_t svset3_s8(svint8x3_t, uint64_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset3_f64))) svfloat64x3_t svset3_f64(svfloat64x3_t, uint64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset3_f32))) svfloat32x3_t svset3_f32(svfloat32x3_t, uint64_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset3_f16))) svfloat16x3_t svset3_f16(svfloat16x3_t, uint64_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset3_s32))) svint32x3_t svset3_s32(svint32x3_t, uint64_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset3_s64))) svint64x3_t svset3_s64(svint64x3_t, uint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset3_s16))) svint16x3_t svset3_s16(svint16x3_t, uint64_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_u8))) svuint8x4_t svset4_u8(svuint8x4_t, uint64_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_u32))) svuint32x4_t svset4_u32(svuint32x4_t, uint64_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_u64))) svuint64x4_t svset4_u64(svuint64x4_t, uint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_u16))) svuint16x4_t svset4_u16(svuint16x4_t, uint64_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_s8))) svint8x4_t svset4_s8(svint8x4_t, uint64_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_f64))) svfloat64x4_t svset4_f64(svfloat64x4_t, uint64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_f32))) svfloat32x4_t svset4_f32(svfloat32x4_t, uint64_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_f16))) svfloat16x4_t svset4_f16(svfloat16x4_t, uint64_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_s32))) svint32x4_t svset4_s32(svint32x4_t, uint64_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_s64))) svint64x4_t svset4_s64(svint64x4_t, uint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_s16))) svint16x4_t svset4_s16(svint16x4_t, uint64_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsetffr))) void svsetffr(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsplice_u8))) svuint8_t svsplice_u8(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsplice_u32))) svuint32_t svsplice_u32(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsplice_u64))) svuint64_t svsplice_u64(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsplice_u16))) svuint16_t svsplice_u16(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsplice_s8))) svint8_t svsplice_s8(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsplice_f64))) svfloat64_t svsplice_f64(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsplice_f32))) svfloat32_t svsplice_f32(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsplice_f16))) svfloat16_t svsplice_f16(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsplice_s32))) svint32_t svsplice_s32(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsplice_s64))) svint64_t svsplice_s64(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsplice_s16))) svint16_t svsplice_s16(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqrt_f64_m))) svfloat64_t svsqrt_f64_m(svfloat64_t, svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqrt_f32_m))) svfloat32_t svsqrt_f32_m(svfloat32_t, svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqrt_f16_m))) svfloat16_t svsqrt_f16_m(svfloat16_t, svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqrt_f64_x))) svfloat64_t svsqrt_f64_x(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqrt_f32_x))) svfloat32_t svsqrt_f32_x(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqrt_f16_x))) svfloat16_t svsqrt_f16_x(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqrt_f64_z))) svfloat64_t svsqrt_f64_z(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqrt_f32_z))) svfloat32_t svsqrt_f32_z(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqrt_f16_z))) svfloat16_t svsqrt_f16_z(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u8))) void svst1_u8(svbool_t, uint8_t *, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u32))) void svst1_u32(svbool_t, uint32_t *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u64))) void svst1_u64(svbool_t, uint64_t *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u16))) void svst1_u16(svbool_t, uint16_t *, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s8))) void svst1_s8(svbool_t, int8_t *, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f64))) void svst1_f64(svbool_t, float64_t *, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f32))) void svst1_f32(svbool_t, float32_t *, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f16))) void svst1_f16(svbool_t, float16_t *, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s32))) void svst1_s32(svbool_t, int32_t *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s64))) void svst1_s64(svbool_t, int64_t *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s16))) void svst1_s16(svbool_t, int16_t *, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u32base_index_u32))) void svst1_scatter_u32base_index_u32(svbool_t, svuint32_t, int64_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u64base_index_u64))) void svst1_scatter_u64base_index_u64(svbool_t, svuint64_t, int64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u64base_index_f64))) void svst1_scatter_u64base_index_f64(svbool_t, svuint64_t, int64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u32base_index_f32))) void svst1_scatter_u32base_index_f32(svbool_t, svuint32_t, int64_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u32base_index_s32))) void svst1_scatter_u32base_index_s32(svbool_t, svuint32_t, int64_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u64base_index_s64))) void svst1_scatter_u64base_index_s64(svbool_t, svuint64_t, int64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u32base_offset_u32))) void svst1_scatter_u32base_offset_u32(svbool_t, svuint32_t, int64_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u64base_offset_u64))) void svst1_scatter_u64base_offset_u64(svbool_t, svuint64_t, int64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u64base_offset_f64))) void svst1_scatter_u64base_offset_f64(svbool_t, svuint64_t, int64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u32base_offset_f32))) void svst1_scatter_u32base_offset_f32(svbool_t, svuint32_t, int64_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u32base_offset_s32))) void svst1_scatter_u32base_offset_s32(svbool_t, svuint32_t, int64_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u64base_offset_s64))) void svst1_scatter_u64base_offset_s64(svbool_t, svuint64_t, int64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u32base_u32))) void svst1_scatter_u32base_u32(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u64base_u64))) void svst1_scatter_u64base_u64(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u64base_f64))) void svst1_scatter_u64base_f64(svbool_t, svuint64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u32base_f32))) void svst1_scatter_u32base_f32(svbool_t, svuint32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u32base_s32))) void svst1_scatter_u32base_s32(svbool_t, svuint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u64base_s64))) void svst1_scatter_u64base_s64(svbool_t, svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_s32index_u32))) void svst1_scatter_s32index_u32(svbool_t, uint32_t *, svint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_s32index_f32))) void svst1_scatter_s32index_f32(svbool_t, float32_t *, svint32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_s32index_s32))) void svst1_scatter_s32index_s32(svbool_t, int32_t *, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u32index_u32))) void svst1_scatter_u32index_u32(svbool_t, uint32_t *, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u32index_f32))) void svst1_scatter_u32index_f32(svbool_t, float32_t *, svuint32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u32index_s32))) void svst1_scatter_u32index_s32(svbool_t, int32_t *, svuint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_s64index_u64))) void svst1_scatter_s64index_u64(svbool_t, uint64_t *, svint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_s64index_f64))) void svst1_scatter_s64index_f64(svbool_t, float64_t *, svint64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_s64index_s64))) void svst1_scatter_s64index_s64(svbool_t, int64_t *, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u64index_u64))) void svst1_scatter_u64index_u64(svbool_t, uint64_t *, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u64index_f64))) void svst1_scatter_u64index_f64(svbool_t, float64_t *, svuint64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u64index_s64))) void svst1_scatter_u64index_s64(svbool_t, int64_t *, svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_s32offset_u32))) void svst1_scatter_s32offset_u32(svbool_t, uint32_t *, svint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_s32offset_f32))) void svst1_scatter_s32offset_f32(svbool_t, float32_t *, svint32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_s32offset_s32))) void svst1_scatter_s32offset_s32(svbool_t, int32_t *, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u32offset_u32))) void svst1_scatter_u32offset_u32(svbool_t, uint32_t *, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u32offset_f32))) void svst1_scatter_u32offset_f32(svbool_t, float32_t *, svuint32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u32offset_s32))) void svst1_scatter_u32offset_s32(svbool_t, int32_t *, svuint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_s64offset_u64))) void svst1_scatter_s64offset_u64(svbool_t, uint64_t *, svint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_s64offset_f64))) void svst1_scatter_s64offset_f64(svbool_t, float64_t *, svint64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_s64offset_s64))) void svst1_scatter_s64offset_s64(svbool_t, int64_t *, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u64offset_u64))) void svst1_scatter_u64offset_u64(svbool_t, uint64_t *, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u64offset_f64))) void svst1_scatter_u64offset_f64(svbool_t, float64_t *, svuint64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u64offset_s64))) void svst1_scatter_u64offset_s64(svbool_t, int64_t *, svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u8))) void svst1_vnum_u8(svbool_t, uint8_t *, int64_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u32))) void svst1_vnum_u32(svbool_t, uint32_t *, int64_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u64))) void svst1_vnum_u64(svbool_t, uint64_t *, int64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u16))) void svst1_vnum_u16(svbool_t, uint16_t *, int64_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s8))) void svst1_vnum_s8(svbool_t, int8_t *, int64_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f64))) void svst1_vnum_f64(svbool_t, float64_t *, int64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f32))) void svst1_vnum_f32(svbool_t, float32_t *, int64_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f16))) void svst1_vnum_f16(svbool_t, float16_t *, int64_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s32))) void svst1_vnum_s32(svbool_t, int32_t *, int64_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s64))) void svst1_vnum_s64(svbool_t, int64_t *, int64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s16))) void svst1_vnum_s16(svbool_t, int16_t *, int64_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_s32))) void svst1b_s32(svbool_t, int8_t *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_s64))) void svst1b_s64(svbool_t, int8_t *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_s16))) void svst1b_s16(svbool_t, int8_t *, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_u32))) void svst1b_u32(svbool_t, uint8_t *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_u64))) void svst1b_u64(svbool_t, uint8_t *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_u16))) void svst1b_u16(svbool_t, uint8_t *, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_scatter_u32base_offset_u32))) void svst1b_scatter_u32base_offset_u32(svbool_t, svuint32_t, int64_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_scatter_u64base_offset_u64))) void svst1b_scatter_u64base_offset_u64(svbool_t, svuint64_t, int64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_scatter_u32base_offset_s32))) void svst1b_scatter_u32base_offset_s32(svbool_t, svuint32_t, int64_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_scatter_u64base_offset_s64))) void svst1b_scatter_u64base_offset_s64(svbool_t, svuint64_t, int64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_scatter_u32base_u32))) void svst1b_scatter_u32base_u32(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_scatter_u64base_u64))) void svst1b_scatter_u64base_u64(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_scatter_u32base_s32))) void svst1b_scatter_u32base_s32(svbool_t, svuint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_scatter_u64base_s64))) void svst1b_scatter_u64base_s64(svbool_t, svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_scatter_s32offset_s32))) void svst1b_scatter_s32offset_s32(svbool_t, int8_t *, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_scatter_s32offset_u32))) void svst1b_scatter_s32offset_u32(svbool_t, uint8_t *, svint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_scatter_u32offset_s32))) void svst1b_scatter_u32offset_s32(svbool_t, int8_t *, svuint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_scatter_u32offset_u32))) void svst1b_scatter_u32offset_u32(svbool_t, uint8_t *, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_scatter_s64offset_s64))) void svst1b_scatter_s64offset_s64(svbool_t, int8_t *, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_scatter_s64offset_u64))) void svst1b_scatter_s64offset_u64(svbool_t, uint8_t *, svint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_scatter_u64offset_s64))) void svst1b_scatter_u64offset_s64(svbool_t, int8_t *, svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_scatter_u64offset_u64))) void svst1b_scatter_u64offset_u64(svbool_t, uint8_t *, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_vnum_s32))) void svst1b_vnum_s32(svbool_t, int8_t *, int64_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_vnum_s64))) void svst1b_vnum_s64(svbool_t, int8_t *, int64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_vnum_s16))) void svst1b_vnum_s16(svbool_t, int8_t *, int64_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_vnum_u32))) void svst1b_vnum_u32(svbool_t, uint8_t *, int64_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_vnum_u64))) void svst1b_vnum_u64(svbool_t, uint8_t *, int64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_vnum_u16))) void svst1b_vnum_u16(svbool_t, uint8_t *, int64_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_s32))) void svst1h_s32(svbool_t, int16_t *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_s64))) void svst1h_s64(svbool_t, int16_t *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_u32))) void svst1h_u32(svbool_t, uint16_t *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_u64))) void svst1h_u64(svbool_t, uint16_t *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u32base_index_u32))) void svst1h_scatter_u32base_index_u32(svbool_t, svuint32_t, int64_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u64base_index_u64))) void svst1h_scatter_u64base_index_u64(svbool_t, svuint64_t, int64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u32base_index_s32))) void svst1h_scatter_u32base_index_s32(svbool_t, svuint32_t, int64_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u64base_index_s64))) void svst1h_scatter_u64base_index_s64(svbool_t, svuint64_t, int64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u32base_offset_u32))) void svst1h_scatter_u32base_offset_u32(svbool_t, svuint32_t, int64_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u64base_offset_u64))) void svst1h_scatter_u64base_offset_u64(svbool_t, svuint64_t, int64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u32base_offset_s32))) void svst1h_scatter_u32base_offset_s32(svbool_t, svuint32_t, int64_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u64base_offset_s64))) void svst1h_scatter_u64base_offset_s64(svbool_t, svuint64_t, int64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u32base_u32))) void svst1h_scatter_u32base_u32(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u64base_u64))) void svst1h_scatter_u64base_u64(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u32base_s32))) void svst1h_scatter_u32base_s32(svbool_t, svuint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u64base_s64))) void svst1h_scatter_u64base_s64(svbool_t, svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_s32index_s32))) void svst1h_scatter_s32index_s32(svbool_t, int16_t *, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_s32index_u32))) void svst1h_scatter_s32index_u32(svbool_t, uint16_t *, svint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u32index_s32))) void svst1h_scatter_u32index_s32(svbool_t, int16_t *, svuint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u32index_u32))) void svst1h_scatter_u32index_u32(svbool_t, uint16_t *, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_s64index_s64))) void svst1h_scatter_s64index_s64(svbool_t, int16_t *, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_s64index_u64))) void svst1h_scatter_s64index_u64(svbool_t, uint16_t *, svint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u64index_s64))) void svst1h_scatter_u64index_s64(svbool_t, int16_t *, svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u64index_u64))) void svst1h_scatter_u64index_u64(svbool_t, uint16_t *, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_s32offset_s32))) void svst1h_scatter_s32offset_s32(svbool_t, int16_t *, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_s32offset_u32))) void svst1h_scatter_s32offset_u32(svbool_t, uint16_t *, svint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u32offset_s32))) void svst1h_scatter_u32offset_s32(svbool_t, int16_t *, svuint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u32offset_u32))) void svst1h_scatter_u32offset_u32(svbool_t, uint16_t *, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_s64offset_s64))) void svst1h_scatter_s64offset_s64(svbool_t, int16_t *, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_s64offset_u64))) void svst1h_scatter_s64offset_u64(svbool_t, uint16_t *, svint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u64offset_s64))) void svst1h_scatter_u64offset_s64(svbool_t, int16_t *, svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u64offset_u64))) void svst1h_scatter_u64offset_u64(svbool_t, uint16_t *, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_vnum_s32))) void svst1h_vnum_s32(svbool_t, int16_t *, int64_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_vnum_s64))) void svst1h_vnum_s64(svbool_t, int16_t *, int64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_vnum_u32))) void svst1h_vnum_u32(svbool_t, uint16_t *, int64_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_vnum_u64))) void svst1h_vnum_u64(svbool_t, uint16_t *, int64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1w_s64))) void svst1w_s64(svbool_t, int32_t *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1w_u64))) void svst1w_u64(svbool_t, uint32_t *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1w_scatter_u64base_index_u64))) void svst1w_scatter_u64base_index_u64(svbool_t, svuint64_t, int64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1w_scatter_u64base_index_s64))) void svst1w_scatter_u64base_index_s64(svbool_t, svuint64_t, int64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1w_scatter_u64base_offset_u64))) void svst1w_scatter_u64base_offset_u64(svbool_t, svuint64_t, int64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1w_scatter_u64base_offset_s64))) void svst1w_scatter_u64base_offset_s64(svbool_t, svuint64_t, int64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1w_scatter_u64base_u64))) void svst1w_scatter_u64base_u64(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1w_scatter_u64base_s64))) void svst1w_scatter_u64base_s64(svbool_t, svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1w_scatter_s64index_s64))) void svst1w_scatter_s64index_s64(svbool_t, int32_t *, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1w_scatter_s64index_u64))) void svst1w_scatter_s64index_u64(svbool_t, uint32_t *, svint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1w_scatter_u64index_s64))) void svst1w_scatter_u64index_s64(svbool_t, int32_t *, svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1w_scatter_u64index_u64))) void svst1w_scatter_u64index_u64(svbool_t, uint32_t *, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1w_scatter_s64offset_s64))) void svst1w_scatter_s64offset_s64(svbool_t, int32_t *, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1w_scatter_s64offset_u64))) void svst1w_scatter_s64offset_u64(svbool_t, uint32_t *, svint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1w_scatter_u64offset_s64))) void svst1w_scatter_u64offset_s64(svbool_t, int32_t *, svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1w_scatter_u64offset_u64))) void svst1w_scatter_u64offset_u64(svbool_t, uint32_t *, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1w_vnum_s64))) void svst1w_vnum_s64(svbool_t, int32_t *, int64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1w_vnum_u64))) void svst1w_vnum_u64(svbool_t, uint32_t *, int64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_u8))) void svst2_u8(svbool_t, uint8_t *, svuint8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_u32))) void svst2_u32(svbool_t, uint32_t *, svuint32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_u64))) void svst2_u64(svbool_t, uint64_t *, svuint64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_u16))) void svst2_u16(svbool_t, uint16_t *, svuint16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_s8))) void svst2_s8(svbool_t, int8_t *, svint8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_f64))) void svst2_f64(svbool_t, float64_t *, svfloat64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_f32))) void svst2_f32(svbool_t, float32_t *, svfloat32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_f16))) void svst2_f16(svbool_t, float16_t *, svfloat16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_s32))) void svst2_s32(svbool_t, int32_t *, svint32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_s64))) void svst2_s64(svbool_t, int64_t *, svint64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_s16))) void svst2_s16(svbool_t, int16_t *, svint16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_vnum_u8))) void svst2_vnum_u8(svbool_t, uint8_t *, int64_t, svuint8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_vnum_u32))) void svst2_vnum_u32(svbool_t, uint32_t *, int64_t, svuint32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_vnum_u64))) void svst2_vnum_u64(svbool_t, uint64_t *, int64_t, svuint64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_vnum_u16))) void svst2_vnum_u16(svbool_t, uint16_t *, int64_t, svuint16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_vnum_s8))) void svst2_vnum_s8(svbool_t, int8_t *, int64_t, svint8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_vnum_f64))) void svst2_vnum_f64(svbool_t, float64_t *, int64_t, svfloat64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_vnum_f32))) void svst2_vnum_f32(svbool_t, float32_t *, int64_t, svfloat32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_vnum_f16))) void svst2_vnum_f16(svbool_t, float16_t *, int64_t, svfloat16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_vnum_s32))) void svst2_vnum_s32(svbool_t, int32_t *, int64_t, svint32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_vnum_s64))) void svst2_vnum_s64(svbool_t, int64_t *, int64_t, svint64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_vnum_s16))) void svst2_vnum_s16(svbool_t, int16_t *, int64_t, svint16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_u8))) void svst3_u8(svbool_t, uint8_t *, svuint8x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_u32))) void svst3_u32(svbool_t, uint32_t *, svuint32x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_u64))) void svst3_u64(svbool_t, uint64_t *, svuint64x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_u16))) void svst3_u16(svbool_t, uint16_t *, svuint16x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_s8))) void svst3_s8(svbool_t, int8_t *, svint8x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_f64))) void svst3_f64(svbool_t, float64_t *, svfloat64x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_f32))) void svst3_f32(svbool_t, float32_t *, svfloat32x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_f16))) void svst3_f16(svbool_t, float16_t *, svfloat16x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_s32))) void svst3_s32(svbool_t, int32_t *, svint32x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_s64))) void svst3_s64(svbool_t, int64_t *, svint64x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_s16))) void svst3_s16(svbool_t, int16_t *, svint16x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_vnum_u8))) void svst3_vnum_u8(svbool_t, uint8_t *, int64_t, svuint8x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_vnum_u32))) void svst3_vnum_u32(svbool_t, uint32_t *, int64_t, svuint32x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_vnum_u64))) void svst3_vnum_u64(svbool_t, uint64_t *, int64_t, svuint64x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_vnum_u16))) void svst3_vnum_u16(svbool_t, uint16_t *, int64_t, svuint16x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_vnum_s8))) void svst3_vnum_s8(svbool_t, int8_t *, int64_t, svint8x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_vnum_f64))) void svst3_vnum_f64(svbool_t, float64_t *, int64_t, svfloat64x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_vnum_f32))) void svst3_vnum_f32(svbool_t, float32_t *, int64_t, svfloat32x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_vnum_f16))) void svst3_vnum_f16(svbool_t, float16_t *, int64_t, svfloat16x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_vnum_s32))) void svst3_vnum_s32(svbool_t, int32_t *, int64_t, svint32x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_vnum_s64))) void svst3_vnum_s64(svbool_t, int64_t *, int64_t, svint64x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_vnum_s16))) void svst3_vnum_s16(svbool_t, int16_t *, int64_t, svint16x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_u8))) void svst4_u8(svbool_t, uint8_t *, svuint8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_u32))) void svst4_u32(svbool_t, uint32_t *, svuint32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_u64))) void svst4_u64(svbool_t, uint64_t *, svuint64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_u16))) void svst4_u16(svbool_t, uint16_t *, svuint16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_s8))) void svst4_s8(svbool_t, int8_t *, svint8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_f64))) void svst4_f64(svbool_t, float64_t *, svfloat64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_f32))) void svst4_f32(svbool_t, float32_t *, svfloat32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_f16))) void svst4_f16(svbool_t, float16_t *, svfloat16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_s32))) void svst4_s32(svbool_t, int32_t *, svint32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_s64))) void svst4_s64(svbool_t, int64_t *, svint64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_s16))) void svst4_s16(svbool_t, int16_t *, svint16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_vnum_u8))) void svst4_vnum_u8(svbool_t, uint8_t *, int64_t, svuint8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_vnum_u32))) void svst4_vnum_u32(svbool_t, uint32_t *, int64_t, svuint32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_vnum_u64))) void svst4_vnum_u64(svbool_t, uint64_t *, int64_t, svuint64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_vnum_u16))) void svst4_vnum_u16(svbool_t, uint16_t *, int64_t, svuint16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_vnum_s8))) void svst4_vnum_s8(svbool_t, int8_t *, int64_t, svint8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_vnum_f64))) void svst4_vnum_f64(svbool_t, float64_t *, int64_t, svfloat64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_vnum_f32))) void svst4_vnum_f32(svbool_t, float32_t *, int64_t, svfloat32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_vnum_f16))) void svst4_vnum_f16(svbool_t, float16_t *, int64_t, svfloat16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_vnum_s32))) void svst4_vnum_s32(svbool_t, int32_t *, int64_t, svint32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_vnum_s64))) void svst4_vnum_s64(svbool_t, int64_t *, int64_t, svint64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_vnum_s16))) void svst4_vnum_s16(svbool_t, int16_t *, int64_t, svint16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u8))) void svstnt1_u8(svbool_t, uint8_t *, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u32))) void svstnt1_u32(svbool_t, uint32_t *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u64))) void svstnt1_u64(svbool_t, uint64_t *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u16))) void svstnt1_u16(svbool_t, uint16_t *, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s8))) void svstnt1_s8(svbool_t, int8_t *, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f64))) void svstnt1_f64(svbool_t, float64_t *, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f32))) void svstnt1_f32(svbool_t, float32_t *, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f16))) void svstnt1_f16(svbool_t, float16_t *, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s32))) void svstnt1_s32(svbool_t, int32_t *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s64))) void svstnt1_s64(svbool_t, int64_t *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s16))) void svstnt1_s16(svbool_t, int16_t *, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u8))) void svstnt1_vnum_u8(svbool_t, uint8_t *, int64_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u32))) void svstnt1_vnum_u32(svbool_t, uint32_t *, int64_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u64))) void svstnt1_vnum_u64(svbool_t, uint64_t *, int64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u16))) void svstnt1_vnum_u16(svbool_t, uint16_t *, int64_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s8))) void svstnt1_vnum_s8(svbool_t, int8_t *, int64_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f64))) void svstnt1_vnum_f64(svbool_t, float64_t *, int64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f32))) void svstnt1_vnum_f32(svbool_t, float32_t *, int64_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f16))) void svstnt1_vnum_f16(svbool_t, float16_t *, int64_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s32))) void svstnt1_vnum_s32(svbool_t, int32_t *, int64_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s64))) void svstnt1_vnum_s64(svbool_t, int64_t *, int64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s16))) void svstnt1_vnum_s16(svbool_t, int16_t *, int64_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_f64_m))) svfloat64_t svsub_n_f64_m(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_f32_m))) svfloat32_t svsub_n_f32_m(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_f16_m))) svfloat16_t svsub_n_f16_m(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_f64_x))) svfloat64_t svsub_n_f64_x(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_f32_x))) svfloat32_t svsub_n_f32_x(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_f16_x))) svfloat16_t svsub_n_f16_x(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_f64_z))) svfloat64_t svsub_n_f64_z(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_f32_z))) svfloat32_t svsub_n_f32_z(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_f16_z))) svfloat16_t svsub_n_f16_z(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_u8_m))) svuint8_t svsub_n_u8_m(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_u32_m))) svuint32_t svsub_n_u32_m(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_u64_m))) svuint64_t svsub_n_u64_m(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_u16_m))) svuint16_t svsub_n_u16_m(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_s8_m))) svint8_t svsub_n_s8_m(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_s32_m))) svint32_t svsub_n_s32_m(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_s64_m))) svint64_t svsub_n_s64_m(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_s16_m))) svint16_t svsub_n_s16_m(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_u8_x))) svuint8_t svsub_n_u8_x(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_u32_x))) svuint32_t svsub_n_u32_x(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_u64_x))) svuint64_t svsub_n_u64_x(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_u16_x))) svuint16_t svsub_n_u16_x(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_s8_x))) svint8_t svsub_n_s8_x(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_s32_x))) svint32_t svsub_n_s32_x(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_s64_x))) svint64_t svsub_n_s64_x(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_s16_x))) svint16_t svsub_n_s16_x(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_u8_z))) svuint8_t svsub_n_u8_z(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_u32_z))) svuint32_t svsub_n_u32_z(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_u64_z))) svuint64_t svsub_n_u64_z(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_u16_z))) svuint16_t svsub_n_u16_z(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_s8_z))) svint8_t svsub_n_s8_z(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_s32_z))) svint32_t svsub_n_s32_z(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_s64_z))) svint64_t svsub_n_s64_z(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_s16_z))) svint16_t svsub_n_s16_z(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_f64_m))) svfloat64_t svsub_f64_m(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_f32_m))) svfloat32_t svsub_f32_m(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_f16_m))) svfloat16_t svsub_f16_m(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_f64_x))) svfloat64_t svsub_f64_x(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_f32_x))) svfloat32_t svsub_f32_x(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_f16_x))) svfloat16_t svsub_f16_x(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_f64_z))) svfloat64_t svsub_f64_z(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_f32_z))) svfloat32_t svsub_f32_z(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_f16_z))) svfloat16_t svsub_f16_z(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_u8_m))) svuint8_t svsub_u8_m(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_u32_m))) svuint32_t svsub_u32_m(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_u64_m))) svuint64_t svsub_u64_m(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_u16_m))) svuint16_t svsub_u16_m(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_s8_m))) svint8_t svsub_s8_m(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_s32_m))) svint32_t svsub_s32_m(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_s64_m))) svint64_t svsub_s64_m(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_s16_m))) svint16_t svsub_s16_m(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_u8_x))) svuint8_t svsub_u8_x(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_u32_x))) svuint32_t svsub_u32_x(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_u64_x))) svuint64_t svsub_u64_x(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_u16_x))) svuint16_t svsub_u16_x(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_s8_x))) svint8_t svsub_s8_x(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_s32_x))) svint32_t svsub_s32_x(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_s64_x))) svint64_t svsub_s64_x(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_s16_x))) svint16_t svsub_s16_x(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_u8_z))) svuint8_t svsub_u8_z(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_u32_z))) svuint32_t svsub_u32_z(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_u64_z))) svuint64_t svsub_u64_z(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_u16_z))) svuint16_t svsub_u16_z(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_s8_z))) svint8_t svsub_s8_z(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_s32_z))) svint32_t svsub_s32_z(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_s64_z))) svint64_t svsub_s64_z(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_s16_z))) svint16_t svsub_s16_z(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_f64_m))) svfloat64_t svsubr_n_f64_m(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_f32_m))) svfloat32_t svsubr_n_f32_m(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_f16_m))) svfloat16_t svsubr_n_f16_m(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_f64_x))) svfloat64_t svsubr_n_f64_x(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_f32_x))) svfloat32_t svsubr_n_f32_x(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_f16_x))) svfloat16_t svsubr_n_f16_x(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_f64_z))) svfloat64_t svsubr_n_f64_z(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_f32_z))) svfloat32_t svsubr_n_f32_z(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_f16_z))) svfloat16_t svsubr_n_f16_z(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_u8_m))) svuint8_t svsubr_n_u8_m(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_u32_m))) svuint32_t svsubr_n_u32_m(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_u64_m))) svuint64_t svsubr_n_u64_m(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_u16_m))) svuint16_t svsubr_n_u16_m(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_s8_m))) svint8_t svsubr_n_s8_m(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_s32_m))) svint32_t svsubr_n_s32_m(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_s64_m))) svint64_t svsubr_n_s64_m(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_s16_m))) svint16_t svsubr_n_s16_m(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_u8_x))) svuint8_t svsubr_n_u8_x(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_u32_x))) svuint32_t svsubr_n_u32_x(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_u64_x))) svuint64_t svsubr_n_u64_x(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_u16_x))) svuint16_t svsubr_n_u16_x(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_s8_x))) svint8_t svsubr_n_s8_x(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_s32_x))) svint32_t svsubr_n_s32_x(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_s64_x))) svint64_t svsubr_n_s64_x(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_s16_x))) svint16_t svsubr_n_s16_x(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_u8_z))) svuint8_t svsubr_n_u8_z(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_u32_z))) svuint32_t svsubr_n_u32_z(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_u64_z))) svuint64_t svsubr_n_u64_z(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_u16_z))) svuint16_t svsubr_n_u16_z(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_s8_z))) svint8_t svsubr_n_s8_z(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_s32_z))) svint32_t svsubr_n_s32_z(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_s64_z))) svint64_t svsubr_n_s64_z(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_s16_z))) svint16_t svsubr_n_s16_z(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_f64_m))) svfloat64_t svsubr_f64_m(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_f32_m))) svfloat32_t svsubr_f32_m(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_f16_m))) svfloat16_t svsubr_f16_m(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_f64_x))) svfloat64_t svsubr_f64_x(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_f32_x))) svfloat32_t svsubr_f32_x(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_f16_x))) svfloat16_t svsubr_f16_x(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_f64_z))) svfloat64_t svsubr_f64_z(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_f32_z))) svfloat32_t svsubr_f32_z(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_f16_z))) svfloat16_t svsubr_f16_z(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_u8_m))) svuint8_t svsubr_u8_m(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_u32_m))) svuint32_t svsubr_u32_m(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_u64_m))) svuint64_t svsubr_u64_m(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_u16_m))) svuint16_t svsubr_u16_m(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_s8_m))) svint8_t svsubr_s8_m(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_s32_m))) svint32_t svsubr_s32_m(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_s64_m))) svint64_t svsubr_s64_m(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_s16_m))) svint16_t svsubr_s16_m(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_u8_x))) svuint8_t svsubr_u8_x(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_u32_x))) svuint32_t svsubr_u32_x(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_u64_x))) svuint64_t svsubr_u64_x(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_u16_x))) svuint16_t svsubr_u16_x(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_s8_x))) svint8_t svsubr_s8_x(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_s32_x))) svint32_t svsubr_s32_x(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_s64_x))) svint64_t svsubr_s64_x(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_s16_x))) svint16_t svsubr_s16_x(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_u8_z))) svuint8_t svsubr_u8_z(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_u32_z))) svuint32_t svsubr_u32_z(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_u64_z))) svuint64_t svsubr_u64_z(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_u16_z))) svuint16_t svsubr_u16_z(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_s8_z))) svint8_t svsubr_s8_z(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_s32_z))) svint32_t svsubr_s32_z(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_s64_z))) svint64_t svsubr_s64_z(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_s16_z))) svint16_t svsubr_s16_z(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl_u8))) svuint8_t svtbl_u8(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl_u32))) svuint32_t svtbl_u32(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl_u64))) svuint64_t svtbl_u64(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl_u16))) svuint16_t svtbl_u16(svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl_s8))) svint8_t svtbl_s8(svint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl_f64))) svfloat64_t svtbl_f64(svfloat64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl_f32))) svfloat32_t svtbl_f32(svfloat32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl_f16))) svfloat16_t svtbl_f16(svfloat16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl_s32))) svint32_t svtbl_s32(svint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl_s64))) svint64_t svtbl_s64(svint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl_s16))) svint16_t svtbl_s16(svint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtmad_f64))) svfloat64_t svtmad_f64(svfloat64_t, svfloat64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtmad_f32))) svfloat32_t svtmad_f32(svfloat32_t, svfloat32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtmad_f16))) svfloat16_t svtmad_f16(svfloat16_t, svfloat16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1_u8))) svuint8_t svtrn1_u8(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1_u32))) svuint32_t svtrn1_u32(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1_u64))) svuint64_t svtrn1_u64(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1_u16))) svuint16_t svtrn1_u16(svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1_s8))) svint8_t svtrn1_s8(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1_f64))) svfloat64_t svtrn1_f64(svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1_f32))) svfloat32_t svtrn1_f32(svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1_f16))) svfloat16_t svtrn1_f16(svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1_s32))) svint32_t svtrn1_s32(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1_s64))) svint64_t svtrn1_s64(svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1_s16))) svint16_t svtrn1_s16(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1_b16))) svbool_t svtrn1_b16(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1_b32))) svbool_t svtrn1_b32(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1_b64))) svbool_t svtrn1_b64(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1_b8))) svbool_t svtrn1_b8(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2_u8))) svuint8_t svtrn2_u8(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2_u32))) svuint32_t svtrn2_u32(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2_u64))) svuint64_t svtrn2_u64(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2_u16))) svuint16_t svtrn2_u16(svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2_s8))) svint8_t svtrn2_s8(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2_f64))) svfloat64_t svtrn2_f64(svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2_f32))) svfloat32_t svtrn2_f32(svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2_f16))) svfloat16_t svtrn2_f16(svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2_s32))) svint32_t svtrn2_s32(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2_s64))) svint64_t svtrn2_s64(svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2_s16))) svint16_t svtrn2_s16(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2_b16))) svbool_t svtrn2_b16(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2_b32))) svbool_t svtrn2_b32(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2_b64))) svbool_t svtrn2_b64(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2_b8))) svbool_t svtrn2_b8(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtsmul_f64))) svfloat64_t svtsmul_f64(svfloat64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtsmul_f32))) svfloat32_t svtsmul_f32(svfloat32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtsmul_f16))) svfloat16_t svtsmul_f16(svfloat16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtssel_f64))) svfloat64_t svtssel_f64(svfloat64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtssel_f32))) svfloat32_t svtssel_f32(svfloat32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtssel_f16))) svfloat16_t svtssel_f16(svfloat16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef2_u8))) svuint8x2_t svundef2_u8(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef2_u32))) svuint32x2_t svundef2_u32(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef2_u64))) svuint64x2_t svundef2_u64(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef2_u16))) svuint16x2_t svundef2_u16(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef2_s8))) svint8x2_t svundef2_s8(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef2_f64))) svfloat64x2_t svundef2_f64(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef2_f32))) svfloat32x2_t svundef2_f32(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef2_f16))) svfloat16x2_t svundef2_f16(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef2_s32))) svint32x2_t svundef2_s32(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef2_s64))) svint64x2_t svundef2_s64(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef2_s16))) svint16x2_t svundef2_s16(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef3_u8))) svuint8x3_t svundef3_u8(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef3_u32))) svuint32x3_t svundef3_u32(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef3_u64))) svuint64x3_t svundef3_u64(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef3_u16))) svuint16x3_t svundef3_u16(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef3_s8))) svint8x3_t svundef3_s8(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef3_f64))) svfloat64x3_t svundef3_f64(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef3_f32))) svfloat32x3_t svundef3_f32(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef3_f16))) svfloat16x3_t svundef3_f16(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef3_s32))) svint32x3_t svundef3_s32(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef3_s64))) svint64x3_t svundef3_s64(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef3_s16))) svint16x3_t svundef3_s16(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef4_u8))) svuint8x4_t svundef4_u8(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef4_u32))) svuint32x4_t svundef4_u32(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef4_u64))) svuint64x4_t svundef4_u64(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef4_u16))) svuint16x4_t svundef4_u16(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef4_s8))) svint8x4_t svundef4_s8(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef4_f64))) svfloat64x4_t svundef4_f64(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef4_f32))) svfloat32x4_t svundef4_f32(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef4_f16))) svfloat16x4_t svundef4_f16(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef4_s32))) svint32x4_t svundef4_s32(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef4_s64))) svint64x4_t svundef4_s64(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef4_s16))) svint16x4_t svundef4_s16(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef_u8))) svuint8_t svundef_u8(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef_u32))) svuint32_t svundef_u32(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef_u64))) svuint64_t svundef_u64(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef_u16))) svuint16_t svundef_u16(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef_s8))) svint8_t svundef_s8(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef_f64))) svfloat64_t svundef_f64(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef_f32))) svfloat32_t svundef_f32(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef_f16))) svfloat16_t svundef_f16(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef_s32))) svint32_t svundef_s32(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef_s64))) svint64_t svundef_s64(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef_s16))) svint16_t svundef_s16(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpkhi_b))) svbool_t svunpkhi_b(svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpkhi_s32))) svint32_t svunpkhi_s32(svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpkhi_s64))) svint64_t svunpkhi_s64(svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpkhi_s16))) svint16_t svunpkhi_s16(svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpkhi_u32))) svuint32_t svunpkhi_u32(svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpkhi_u64))) svuint64_t svunpkhi_u64(svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpkhi_u16))) svuint16_t svunpkhi_u16(svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpklo_b))) svbool_t svunpklo_b(svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpklo_s32))) svint32_t svunpklo_s32(svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpklo_s64))) svint64_t svunpklo_s64(svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpklo_s16))) svint16_t svunpklo_s16(svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpklo_u32))) svuint32_t svunpklo_u32(svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpklo_u64))) svuint64_t svunpklo_u64(svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpklo_u16))) svuint16_t svunpklo_u16(svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1_u8))) svuint8_t svuzp1_u8(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1_u32))) svuint32_t svuzp1_u32(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1_u64))) svuint64_t svuzp1_u64(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1_u16))) svuint16_t svuzp1_u16(svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1_s8))) svint8_t svuzp1_s8(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1_f64))) svfloat64_t svuzp1_f64(svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1_f32))) svfloat32_t svuzp1_f32(svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1_f16))) svfloat16_t svuzp1_f16(svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1_s32))) svint32_t svuzp1_s32(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1_s64))) svint64_t svuzp1_s64(svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1_s16))) svint16_t svuzp1_s16(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1_b16))) svbool_t svuzp1_b16(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1_b32))) svbool_t svuzp1_b32(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1_b64))) svbool_t svuzp1_b64(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1_b8))) svbool_t svuzp1_b8(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2_u8))) svuint8_t svuzp2_u8(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2_u32))) svuint32_t svuzp2_u32(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2_u64))) svuint64_t svuzp2_u64(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2_u16))) svuint16_t svuzp2_u16(svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2_s8))) svint8_t svuzp2_s8(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2_f64))) svfloat64_t svuzp2_f64(svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2_f32))) svfloat32_t svuzp2_f32(svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2_f16))) svfloat16_t svuzp2_f16(svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2_s32))) svint32_t svuzp2_s32(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2_s64))) svint64_t svuzp2_s64(svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2_s16))) svint16_t svuzp2_s16(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2_b16))) svbool_t svuzp2_b16(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2_b32))) svbool_t svuzp2_b32(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2_b64))) svbool_t svuzp2_b64(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2_b8))) svbool_t svuzp2_b8(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b8_s32))) svbool_t svwhilele_b8_s32(int32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b32_s32))) svbool_t svwhilele_b32_s32(int32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b64_s32))) svbool_t svwhilele_b64_s32(int32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b16_s32))) svbool_t svwhilele_b16_s32(int32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b8_s64))) svbool_t svwhilele_b8_s64(int64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b32_s64))) svbool_t svwhilele_b32_s64(int64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b64_s64))) svbool_t svwhilele_b64_s64(int64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b16_s64))) svbool_t svwhilele_b16_s64(int64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b8_u32))) svbool_t svwhilele_b8_u32(uint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b32_u32))) svbool_t svwhilele_b32_u32(uint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b64_u32))) svbool_t svwhilele_b64_u32(uint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b16_u32))) svbool_t svwhilele_b16_u32(uint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b8_u64))) svbool_t svwhilele_b8_u64(uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b32_u64))) svbool_t svwhilele_b32_u64(uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b64_u64))) svbool_t svwhilele_b64_u64(uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b16_u64))) svbool_t svwhilele_b16_u64(uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b8_u32))) svbool_t svwhilelt_b8_u32(uint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b32_u32))) svbool_t svwhilelt_b32_u32(uint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b64_u32))) svbool_t svwhilelt_b64_u32(uint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b16_u32))) svbool_t svwhilelt_b16_u32(uint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b8_u64))) svbool_t svwhilelt_b8_u64(uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b32_u64))) svbool_t svwhilelt_b32_u64(uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b64_u64))) svbool_t svwhilelt_b64_u64(uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b16_u64))) svbool_t svwhilelt_b16_u64(uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b8_s32))) svbool_t svwhilelt_b8_s32(int32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b32_s32))) svbool_t svwhilelt_b32_s32(int32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b64_s32))) svbool_t svwhilelt_b64_s32(int32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b16_s32))) svbool_t svwhilelt_b16_s32(int32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b8_s64))) svbool_t svwhilelt_b8_s64(int64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b32_s64))) svbool_t svwhilelt_b32_s64(int64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b64_s64))) svbool_t svwhilelt_b64_s64(int64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b16_s64))) svbool_t svwhilelt_b16_s64(int64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwrffr))) void svwrffr(svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1_u8))) svuint8_t svzip1_u8(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1_u32))) svuint32_t svzip1_u32(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1_u64))) svuint64_t svzip1_u64(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1_u16))) svuint16_t svzip1_u16(svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1_s8))) svint8_t svzip1_s8(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1_f64))) svfloat64_t svzip1_f64(svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1_f32))) svfloat32_t svzip1_f32(svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1_f16))) svfloat16_t svzip1_f16(svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1_s32))) svint32_t svzip1_s32(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1_s64))) svint64_t svzip1_s64(svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1_s16))) svint16_t svzip1_s16(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1_b16))) svbool_t svzip1_b16(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1_b32))) svbool_t svzip1_b32(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1_b64))) svbool_t svzip1_b64(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1_b8))) svbool_t svzip1_b8(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2_u8))) svuint8_t svzip2_u8(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2_u32))) svuint32_t svzip2_u32(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2_u64))) svuint64_t svzip2_u64(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2_u16))) svuint16_t svzip2_u16(svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2_s8))) svint8_t svzip2_s8(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2_f64))) svfloat64_t svzip2_f64(svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2_f32))) svfloat32_t svzip2_f32(svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2_f16))) svfloat16_t svzip2_f16(svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2_s32))) svint32_t svzip2_s32(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2_s64))) svint64_t svzip2_s64(svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2_s16))) svint16_t svzip2_s16(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2_b16))) svbool_t svzip2_b16(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2_b32))) svbool_t svzip2_b32(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2_b64))) svbool_t svzip2_b64(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2_b8))) svbool_t svzip2_b8(svbool_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_f64_m))) svfloat64_t svabd_m(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_f32_m))) svfloat32_t svabd_m(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_f16_m))) svfloat16_t svabd_m(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_f64_x))) svfloat64_t svabd_x(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_f32_x))) svfloat32_t svabd_x(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_f16_x))) svfloat16_t svabd_x(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_f64_z))) svfloat64_t svabd_z(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_f32_z))) svfloat32_t svabd_z(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_f16_z))) svfloat16_t svabd_z(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_s8_m))) svint8_t svabd_m(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_s32_m))) svint32_t svabd_m(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_s64_m))) svint64_t svabd_m(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_s16_m))) svint16_t svabd_m(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_s8_x))) svint8_t svabd_x(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_s32_x))) svint32_t svabd_x(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_s64_x))) svint64_t svabd_x(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_s16_x))) svint16_t svabd_x(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_s8_z))) svint8_t svabd_z(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_s32_z))) svint32_t svabd_z(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_s64_z))) svint64_t svabd_z(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_s16_z))) svint16_t svabd_z(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_u8_m))) svuint8_t svabd_m(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_u32_m))) svuint32_t svabd_m(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_u64_m))) svuint64_t svabd_m(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_u16_m))) svuint16_t svabd_m(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_u8_x))) svuint8_t svabd_x(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_u32_x))) svuint32_t svabd_x(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_u64_x))) svuint64_t svabd_x(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_u16_x))) svuint16_t svabd_x(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_u8_z))) svuint8_t svabd_z(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_u32_z))) svuint32_t svabd_z(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_u64_z))) svuint64_t svabd_z(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_u16_z))) svuint16_t svabd_z(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_f64_m))) svfloat64_t svabd_m(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_f32_m))) svfloat32_t svabd_m(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_f16_m))) svfloat16_t svabd_m(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_f64_x))) svfloat64_t svabd_x(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_f32_x))) svfloat32_t svabd_x(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_f16_x))) svfloat16_t svabd_x(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_f64_z))) svfloat64_t svabd_z(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_f32_z))) svfloat32_t svabd_z(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_f16_z))) svfloat16_t svabd_z(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_s8_m))) svint8_t svabd_m(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_s32_m))) svint32_t svabd_m(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_s64_m))) svint64_t svabd_m(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_s16_m))) svint16_t svabd_m(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_s8_x))) svint8_t svabd_x(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_s32_x))) svint32_t svabd_x(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_s64_x))) svint64_t svabd_x(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_s16_x))) svint16_t svabd_x(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_s8_z))) svint8_t svabd_z(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_s32_z))) svint32_t svabd_z(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_s64_z))) svint64_t svabd_z(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_s16_z))) svint16_t svabd_z(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_u8_m))) svuint8_t svabd_m(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_u32_m))) svuint32_t svabd_m(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_u64_m))) svuint64_t svabd_m(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_u16_m))) svuint16_t svabd_m(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_u8_x))) svuint8_t svabd_x(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_u32_x))) svuint32_t svabd_x(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_u64_x))) svuint64_t svabd_x(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_u16_x))) svuint16_t svabd_x(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_u8_z))) svuint8_t svabd_z(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_u32_z))) svuint32_t svabd_z(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_u64_z))) svuint64_t svabd_z(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_u16_z))) svuint16_t svabd_z(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_f64_m))) svfloat64_t svabs_m(svfloat64_t, svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_f32_m))) svfloat32_t svabs_m(svfloat32_t, svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_f16_m))) svfloat16_t svabs_m(svfloat16_t, svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_f64_x))) svfloat64_t svabs_x(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_f32_x))) svfloat32_t svabs_x(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_f16_x))) svfloat16_t svabs_x(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_f64_z))) svfloat64_t svabs_z(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_f32_z))) svfloat32_t svabs_z(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_f16_z))) svfloat16_t svabs_z(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_s8_m))) svint8_t svabs_m(svint8_t, svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_s32_m))) svint32_t svabs_m(svint32_t, svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_s64_m))) svint64_t svabs_m(svint64_t, svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_s16_m))) svint16_t svabs_m(svint16_t, svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_s8_x))) svint8_t svabs_x(svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_s32_x))) svint32_t svabs_x(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_s64_x))) svint64_t svabs_x(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_s16_x))) svint16_t svabs_x(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_s8_z))) svint8_t svabs_z(svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_s32_z))) svint32_t svabs_z(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_s64_z))) svint64_t svabs_z(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_s16_z))) svint16_t svabs_z(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svacge_n_f64))) svbool_t svacge(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svacge_n_f32))) svbool_t svacge(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svacge_n_f16))) svbool_t svacge(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svacge_f64))) svbool_t svacge(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svacge_f32))) svbool_t svacge(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svacge_f16))) svbool_t svacge(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svacgt_n_f64))) svbool_t svacgt(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svacgt_n_f32))) svbool_t svacgt(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svacgt_n_f16))) svbool_t svacgt(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svacgt_f64))) svbool_t svacgt(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svacgt_f32))) svbool_t svacgt(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svacgt_f16))) svbool_t svacgt(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svacle_n_f64))) svbool_t svacle(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svacle_n_f32))) svbool_t svacle(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svacle_n_f16))) svbool_t svacle(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svacle_f64))) svbool_t svacle(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svacle_f32))) svbool_t svacle(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svacle_f16))) svbool_t svacle(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaclt_n_f64))) svbool_t svaclt(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaclt_n_f32))) svbool_t svaclt(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaclt_n_f16))) svbool_t svaclt(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaclt_f64))) svbool_t svaclt(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaclt_f32))) svbool_t svaclt(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaclt_f16))) svbool_t svaclt(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_f64_m))) svfloat64_t svadd_m(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_f32_m))) svfloat32_t svadd_m(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_f16_m))) svfloat16_t svadd_m(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_f64_x))) svfloat64_t svadd_x(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_f32_x))) svfloat32_t svadd_x(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_f16_x))) svfloat16_t svadd_x(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_f64_z))) svfloat64_t svadd_z(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_f32_z))) svfloat32_t svadd_z(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_f16_z))) svfloat16_t svadd_z(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_u8_m))) svuint8_t svadd_m(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_u32_m))) svuint32_t svadd_m(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_u64_m))) svuint64_t svadd_m(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_u16_m))) svuint16_t svadd_m(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_s8_m))) svint8_t svadd_m(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_s32_m))) svint32_t svadd_m(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_s64_m))) svint64_t svadd_m(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_s16_m))) svint16_t svadd_m(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_u8_x))) svuint8_t svadd_x(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_u32_x))) svuint32_t svadd_x(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_u64_x))) svuint64_t svadd_x(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_u16_x))) svuint16_t svadd_x(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_s8_x))) svint8_t svadd_x(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_s32_x))) svint32_t svadd_x(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_s64_x))) svint64_t svadd_x(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_s16_x))) svint16_t svadd_x(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_u8_z))) svuint8_t svadd_z(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_u32_z))) svuint32_t svadd_z(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_u64_z))) svuint64_t svadd_z(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_u16_z))) svuint16_t svadd_z(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_s8_z))) svint8_t svadd_z(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_s32_z))) svint32_t svadd_z(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_s64_z))) svint64_t svadd_z(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_s16_z))) svint16_t svadd_z(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_f64_m))) svfloat64_t svadd_m(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_f32_m))) svfloat32_t svadd_m(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_f16_m))) svfloat16_t svadd_m(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_f64_x))) svfloat64_t svadd_x(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_f32_x))) svfloat32_t svadd_x(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_f16_x))) svfloat16_t svadd_x(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_f64_z))) svfloat64_t svadd_z(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_f32_z))) svfloat32_t svadd_z(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_f16_z))) svfloat16_t svadd_z(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_u8_m))) svuint8_t svadd_m(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_u32_m))) svuint32_t svadd_m(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_u64_m))) svuint64_t svadd_m(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_u16_m))) svuint16_t svadd_m(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_s8_m))) svint8_t svadd_m(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_s32_m))) svint32_t svadd_m(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_s64_m))) svint64_t svadd_m(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_s16_m))) svint16_t svadd_m(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_u8_x))) svuint8_t svadd_x(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_u32_x))) svuint32_t svadd_x(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_u64_x))) svuint64_t svadd_x(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_u16_x))) svuint16_t svadd_x(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_s8_x))) svint8_t svadd_x(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_s32_x))) svint32_t svadd_x(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_s64_x))) svint64_t svadd_x(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_s16_x))) svint16_t svadd_x(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_u8_z))) svuint8_t svadd_z(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_u32_z))) svuint32_t svadd_z(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_u64_z))) svuint64_t svadd_z(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_u16_z))) svuint16_t svadd_z(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_s8_z))) svint8_t svadd_z(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_s32_z))) svint32_t svadd_z(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_s64_z))) svint64_t svadd_z(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_s16_z))) svint16_t svadd_z(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadda_f64))) float64_t svadda(svbool_t, float64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadda_f32))) float32_t svadda(svbool_t, float32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadda_f16))) float16_t svadda(svbool_t, float16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddv_s8))) int64_t svaddv(svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddv_s32))) int64_t svaddv(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddv_s64))) int64_t svaddv(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddv_s16))) int64_t svaddv(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddv_u8))) uint64_t svaddv(svbool_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddv_u32))) uint64_t svaddv(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddv_u64))) uint64_t svaddv(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddv_u16))) uint64_t svaddv(svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddv_f64))) float64_t svaddv(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddv_f32))) float32_t svaddv(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddv_f16))) float16_t svaddv(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrb_u32base_u32offset))) svuint32_t svadrb_offset(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrb_u64base_u64offset))) svuint64_t svadrb_offset(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrb_u32base_s32offset))) svuint32_t svadrb_offset(svuint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrb_u64base_s64offset))) svuint64_t svadrb_offset(svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrd_u32base_u32index))) svuint32_t svadrd_index(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrd_u64base_u64index))) svuint64_t svadrd_index(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrd_u32base_s32index))) svuint32_t svadrd_index(svuint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrd_u64base_s64index))) svuint64_t svadrd_index(svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrh_u32base_u32index))) svuint32_t svadrh_index(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrh_u64base_u64index))) svuint64_t svadrh_index(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrh_u32base_s32index))) svuint32_t svadrh_index(svuint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrh_u64base_s64index))) svuint64_t svadrh_index(svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrw_u32base_u32index))) svuint32_t svadrw_index(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrw_u64base_u64index))) svuint64_t svadrw_index(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrw_u32base_s32index))) svuint32_t svadrw_index(svuint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrw_u64base_s64index))) svuint64_t svadrw_index(svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_b_z))) svbool_t svand_z(svbool_t, svbool_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_u8_m))) svuint8_t svand_m(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_u32_m))) svuint32_t svand_m(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_u64_m))) svuint64_t svand_m(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_u16_m))) svuint16_t svand_m(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_s8_m))) svint8_t svand_m(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_s32_m))) svint32_t svand_m(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_s64_m))) svint64_t svand_m(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_s16_m))) svint16_t svand_m(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_u8_x))) svuint8_t svand_x(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_u32_x))) svuint32_t svand_x(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_u64_x))) svuint64_t svand_x(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_u16_x))) svuint16_t svand_x(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_s8_x))) svint8_t svand_x(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_s32_x))) svint32_t svand_x(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_s64_x))) svint64_t svand_x(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_s16_x))) svint16_t svand_x(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_u8_z))) svuint8_t svand_z(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_u32_z))) svuint32_t svand_z(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_u64_z))) svuint64_t svand_z(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_u16_z))) svuint16_t svand_z(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_s8_z))) svint8_t svand_z(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_s32_z))) svint32_t svand_z(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_s64_z))) svint64_t svand_z(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_s16_z))) svint16_t svand_z(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_u8_m))) svuint8_t svand_m(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_u32_m))) svuint32_t svand_m(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_u64_m))) svuint64_t svand_m(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_u16_m))) svuint16_t svand_m(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_s8_m))) svint8_t svand_m(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_s32_m))) svint32_t svand_m(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_s64_m))) svint64_t svand_m(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_s16_m))) svint16_t svand_m(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_u8_x))) svuint8_t svand_x(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_u32_x))) svuint32_t svand_x(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_u64_x))) svuint64_t svand_x(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_u16_x))) svuint16_t svand_x(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_s8_x))) svint8_t svand_x(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_s32_x))) svint32_t svand_x(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_s64_x))) svint64_t svand_x(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_s16_x))) svint16_t svand_x(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_u8_z))) svuint8_t svand_z(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_u32_z))) svuint32_t svand_z(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_u64_z))) svuint64_t svand_z(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_u16_z))) svuint16_t svand_z(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_s8_z))) svint8_t svand_z(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_s32_z))) svint32_t svand_z(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_s64_z))) svint64_t svand_z(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_s16_z))) svint16_t svand_z(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandv_u8))) uint8_t svandv(svbool_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandv_u32))) uint32_t svandv(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandv_u64))) uint64_t svandv(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandv_u16))) uint16_t svandv(svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandv_s8))) int8_t svandv(svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandv_s32))) int32_t svandv(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandv_s64))) int64_t svandv(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandv_s16))) int16_t svandv(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_n_s8_m))) svint8_t svasr_m(svbool_t, svint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_n_s32_m))) svint32_t svasr_m(svbool_t, svint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_n_s64_m))) svint64_t svasr_m(svbool_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_n_s16_m))) svint16_t svasr_m(svbool_t, svint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_n_s8_x))) svint8_t svasr_x(svbool_t, svint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_n_s32_x))) svint32_t svasr_x(svbool_t, svint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_n_s64_x))) svint64_t svasr_x(svbool_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_n_s16_x))) svint16_t svasr_x(svbool_t, svint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_n_s8_z))) svint8_t svasr_z(svbool_t, svint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_n_s32_z))) svint32_t svasr_z(svbool_t, svint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_n_s64_z))) svint64_t svasr_z(svbool_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_n_s16_z))) svint16_t svasr_z(svbool_t, svint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_s8_m))) svint8_t svasr_m(svbool_t, svint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_s32_m))) svint32_t svasr_m(svbool_t, svint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_s64_m))) svint64_t svasr_m(svbool_t, svint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_s16_m))) svint16_t svasr_m(svbool_t, svint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_s8_x))) svint8_t svasr_x(svbool_t, svint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_s32_x))) svint32_t svasr_x(svbool_t, svint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_s64_x))) svint64_t svasr_x(svbool_t, svint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_s16_x))) svint16_t svasr_x(svbool_t, svint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_s8_z))) svint8_t svasr_z(svbool_t, svint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_s32_z))) svint32_t svasr_z(svbool_t, svint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_s64_z))) svint64_t svasr_z(svbool_t, svint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_s16_z))) svint16_t svasr_z(svbool_t, svint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_wide_n_s8_m))) svint8_t svasr_wide_m(svbool_t, svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_wide_n_s32_m))) svint32_t svasr_wide_m(svbool_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_wide_n_s16_m))) svint16_t svasr_wide_m(svbool_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_wide_n_s8_x))) svint8_t svasr_wide_x(svbool_t, svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_wide_n_s32_x))) svint32_t svasr_wide_x(svbool_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_wide_n_s16_x))) svint16_t svasr_wide_x(svbool_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_wide_n_s8_z))) svint8_t svasr_wide_z(svbool_t, svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_wide_n_s32_z))) svint32_t svasr_wide_z(svbool_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_wide_n_s16_z))) svint16_t svasr_wide_z(svbool_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_wide_s8_m))) svint8_t svasr_wide_m(svbool_t, svint8_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_wide_s32_m))) svint32_t svasr_wide_m(svbool_t, svint32_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_wide_s16_m))) svint16_t svasr_wide_m(svbool_t, svint16_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_wide_s8_x))) svint8_t svasr_wide_x(svbool_t, svint8_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_wide_s32_x))) svint32_t svasr_wide_x(svbool_t, svint32_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_wide_s16_x))) svint16_t svasr_wide_x(svbool_t, svint16_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_wide_s8_z))) svint8_t svasr_wide_z(svbool_t, svint8_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_wide_s32_z))) svint32_t svasr_wide_z(svbool_t, svint32_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_wide_s16_z))) svint16_t svasr_wide_z(svbool_t, svint16_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasrd_n_s8_m))) svint8_t svasrd_m(svbool_t, svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasrd_n_s32_m))) svint32_t svasrd_m(svbool_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasrd_n_s64_m))) svint64_t svasrd_m(svbool_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasrd_n_s16_m))) svint16_t svasrd_m(svbool_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasrd_n_s8_x))) svint8_t svasrd_x(svbool_t, svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasrd_n_s32_x))) svint32_t svasrd_x(svbool_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasrd_n_s64_x))) svint64_t svasrd_x(svbool_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasrd_n_s16_x))) svint16_t svasrd_x(svbool_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasrd_n_s8_z))) svint8_t svasrd_z(svbool_t, svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasrd_n_s32_z))) svint32_t svasrd_z(svbool_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasrd_n_s64_z))) svint64_t svasrd_z(svbool_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasrd_n_s16_z))) svint16_t svasrd_z(svbool_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_b_z))) svbool_t svbic_z(svbool_t, svbool_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_u8_m))) svuint8_t svbic_m(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_u32_m))) svuint32_t svbic_m(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_u64_m))) svuint64_t svbic_m(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_u16_m))) svuint16_t svbic_m(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_s8_m))) svint8_t svbic_m(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_s32_m))) svint32_t svbic_m(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_s64_m))) svint64_t svbic_m(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_s16_m))) svint16_t svbic_m(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_u8_x))) svuint8_t svbic_x(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_u32_x))) svuint32_t svbic_x(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_u64_x))) svuint64_t svbic_x(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_u16_x))) svuint16_t svbic_x(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_s8_x))) svint8_t svbic_x(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_s32_x))) svint32_t svbic_x(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_s64_x))) svint64_t svbic_x(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_s16_x))) svint16_t svbic_x(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_u8_z))) svuint8_t svbic_z(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_u32_z))) svuint32_t svbic_z(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_u64_z))) svuint64_t svbic_z(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_u16_z))) svuint16_t svbic_z(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_s8_z))) svint8_t svbic_z(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_s32_z))) svint32_t svbic_z(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_s64_z))) svint64_t svbic_z(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_s16_z))) svint16_t svbic_z(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_u8_m))) svuint8_t svbic_m(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_u32_m))) svuint32_t svbic_m(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_u64_m))) svuint64_t svbic_m(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_u16_m))) svuint16_t svbic_m(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_s8_m))) svint8_t svbic_m(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_s32_m))) svint32_t svbic_m(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_s64_m))) svint64_t svbic_m(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_s16_m))) svint16_t svbic_m(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_u8_x))) svuint8_t svbic_x(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_u32_x))) svuint32_t svbic_x(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_u64_x))) svuint64_t svbic_x(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_u16_x))) svuint16_t svbic_x(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_s8_x))) svint8_t svbic_x(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_s32_x))) svint32_t svbic_x(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_s64_x))) svint64_t svbic_x(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_s16_x))) svint16_t svbic_x(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_u8_z))) svuint8_t svbic_z(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_u32_z))) svuint32_t svbic_z(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_u64_z))) svuint64_t svbic_z(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_u16_z))) svuint16_t svbic_z(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_s8_z))) svint8_t svbic_z(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_s32_z))) svint32_t svbic_z(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_s64_z))) svint64_t svbic_z(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_s16_z))) svint16_t svbic_z(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbrka_b_m))) svbool_t svbrka_m(svbool_t, svbool_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbrka_b_z))) svbool_t svbrka_z(svbool_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbrkb_b_m))) svbool_t svbrkb_m(svbool_t, svbool_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbrkb_b_z))) svbool_t svbrkb_z(svbool_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbrkn_b_z))) svbool_t svbrkn_z(svbool_t, svbool_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbrkpa_b_z))) svbool_t svbrkpa_z(svbool_t, svbool_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbrkpb_b_z))) svbool_t svbrkpb_z(svbool_t, svbool_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcadd_f64_m))) svfloat64_t svcadd_m(svbool_t, svfloat64_t, svfloat64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcadd_f32_m))) svfloat32_t svcadd_m(svbool_t, svfloat32_t, svfloat32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcadd_f16_m))) svfloat16_t svcadd_m(svbool_t, svfloat16_t, svfloat16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcadd_f64_x))) svfloat64_t svcadd_x(svbool_t, svfloat64_t, svfloat64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcadd_f32_x))) svfloat32_t svcadd_x(svbool_t, svfloat32_t, svfloat32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcadd_f16_x))) svfloat16_t svcadd_x(svbool_t, svfloat16_t, svfloat16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcadd_f64_z))) svfloat64_t svcadd_z(svbool_t, svfloat64_t, svfloat64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcadd_f32_z))) svfloat32_t svcadd_z(svbool_t, svfloat32_t, svfloat32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcadd_f16_z))) svfloat16_t svcadd_z(svbool_t, svfloat16_t, svfloat16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_n_u8))) uint8_t svclasta(svbool_t, uint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_n_u32))) uint32_t svclasta(svbool_t, uint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_n_u64))) uint64_t svclasta(svbool_t, uint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_n_u16))) uint16_t svclasta(svbool_t, uint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_n_s8))) int8_t svclasta(svbool_t, int8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_n_f64))) float64_t svclasta(svbool_t, float64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_n_f32))) float32_t svclasta(svbool_t, float32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_n_f16))) float16_t svclasta(svbool_t, float16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_n_s32))) int32_t svclasta(svbool_t, int32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_n_s64))) int64_t svclasta(svbool_t, int64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_n_s16))) int16_t svclasta(svbool_t, int16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_u8))) svuint8_t svclasta(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_u32))) svuint32_t svclasta(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_u64))) svuint64_t svclasta(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_u16))) svuint16_t svclasta(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_s8))) svint8_t svclasta(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_f64))) svfloat64_t svclasta(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_f32))) svfloat32_t svclasta(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_f16))) svfloat16_t svclasta(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_s32))) svint32_t svclasta(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_s64))) svint64_t svclasta(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_s16))) svint16_t svclasta(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_n_u8))) uint8_t svclastb(svbool_t, uint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_n_u32))) uint32_t svclastb(svbool_t, uint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_n_u64))) uint64_t svclastb(svbool_t, uint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_n_u16))) uint16_t svclastb(svbool_t, uint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_n_s8))) int8_t svclastb(svbool_t, int8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_n_f64))) float64_t svclastb(svbool_t, float64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_n_f32))) float32_t svclastb(svbool_t, float32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_n_f16))) float16_t svclastb(svbool_t, float16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_n_s32))) int32_t svclastb(svbool_t, int32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_n_s64))) int64_t svclastb(svbool_t, int64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_n_s16))) int16_t svclastb(svbool_t, int16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_u8))) svuint8_t svclastb(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_u32))) svuint32_t svclastb(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_u64))) svuint64_t svclastb(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_u16))) svuint16_t svclastb(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_s8))) svint8_t svclastb(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_f64))) svfloat64_t svclastb(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_f32))) svfloat32_t svclastb(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_f16))) svfloat16_t svclastb(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_s32))) svint32_t svclastb(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_s64))) svint64_t svclastb(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_s16))) svint16_t svclastb(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcls_s8_m))) svuint8_t svcls_m(svuint8_t, svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcls_s32_m))) svuint32_t svcls_m(svuint32_t, svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcls_s64_m))) svuint64_t svcls_m(svuint64_t, svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcls_s16_m))) svuint16_t svcls_m(svuint16_t, svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcls_s8_x))) svuint8_t svcls_x(svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcls_s32_x))) svuint32_t svcls_x(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcls_s64_x))) svuint64_t svcls_x(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcls_s16_x))) svuint16_t svcls_x(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcls_s8_z))) svuint8_t svcls_z(svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcls_s32_z))) svuint32_t svcls_z(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcls_s64_z))) svuint64_t svcls_z(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcls_s16_z))) svuint16_t svcls_z(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_u8_m))) svuint8_t svclz_m(svuint8_t, svbool_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_u32_m))) svuint32_t svclz_m(svuint32_t, svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_u64_m))) svuint64_t svclz_m(svuint64_t, svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_u16_m))) svuint16_t svclz_m(svuint16_t, svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_s8_m))) svuint8_t svclz_m(svuint8_t, svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_s32_m))) svuint32_t svclz_m(svuint32_t, svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_s64_m))) svuint64_t svclz_m(svuint64_t, svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_s16_m))) svuint16_t svclz_m(svuint16_t, svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_u8_x))) svuint8_t svclz_x(svbool_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_u32_x))) svuint32_t svclz_x(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_u64_x))) svuint64_t svclz_x(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_u16_x))) svuint16_t svclz_x(svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_s8_x))) svuint8_t svclz_x(svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_s32_x))) svuint32_t svclz_x(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_s64_x))) svuint64_t svclz_x(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_s16_x))) svuint16_t svclz_x(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_u8_z))) svuint8_t svclz_z(svbool_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_u32_z))) svuint32_t svclz_z(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_u64_z))) svuint64_t svclz_z(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_u16_z))) svuint16_t svclz_z(svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_s8_z))) svuint8_t svclz_z(svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_s32_z))) svuint32_t svclz_z(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_s64_z))) svuint64_t svclz_z(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_s16_z))) svuint16_t svclz_z(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_f64_m))) svfloat64_t svcmla_m(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_f32_m))) svfloat32_t svcmla_m(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_f16_m))) svfloat16_t svcmla_m(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_f64_x))) svfloat64_t svcmla_x(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_f32_x))) svfloat32_t svcmla_x(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_f16_x))) svfloat16_t svcmla_x(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_f64_z))) svfloat64_t svcmla_z(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_f32_z))) svfloat32_t svcmla_z(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_f16_z))) svfloat16_t svcmla_z(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_lane_f32))) svfloat32_t svcmla_lane(svfloat32_t, svfloat32_t, svfloat32_t, uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_lane_f16))) svfloat16_t svcmla_lane(svfloat16_t, svfloat16_t, svfloat16_t, uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_n_f64))) svbool_t svcmpeq(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_n_f32))) svbool_t svcmpeq(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_n_f16))) svbool_t svcmpeq(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_n_u8))) svbool_t svcmpeq(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_n_u32))) svbool_t svcmpeq(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_n_u64))) svbool_t svcmpeq(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_n_u16))) svbool_t svcmpeq(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_n_s8))) svbool_t svcmpeq(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_n_s32))) svbool_t svcmpeq(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_n_s64))) svbool_t svcmpeq(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_n_s16))) svbool_t svcmpeq(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_u8))) svbool_t svcmpeq(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_u32))) svbool_t svcmpeq(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_u64))) svbool_t svcmpeq(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_u16))) svbool_t svcmpeq(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_s8))) svbool_t svcmpeq(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_s32))) svbool_t svcmpeq(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_s64))) svbool_t svcmpeq(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_s16))) svbool_t svcmpeq(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_f64))) svbool_t svcmpeq(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_f32))) svbool_t svcmpeq(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_f16))) svbool_t svcmpeq(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_wide_n_s8))) svbool_t svcmpeq_wide(svbool_t, svint8_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_wide_n_s32))) svbool_t svcmpeq_wide(svbool_t, svint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_wide_n_s16))) svbool_t svcmpeq_wide(svbool_t, svint16_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_wide_s8))) svbool_t svcmpeq_wide(svbool_t, svint8_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_wide_s32))) svbool_t svcmpeq_wide(svbool_t, svint32_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_wide_s16))) svbool_t svcmpeq_wide(svbool_t, svint16_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_n_f64))) svbool_t svcmpge(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_n_f32))) svbool_t svcmpge(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_n_f16))) svbool_t svcmpge(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_n_s8))) svbool_t svcmpge(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_n_s32))) svbool_t svcmpge(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_n_s64))) svbool_t svcmpge(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_n_s16))) svbool_t svcmpge(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_n_u8))) svbool_t svcmpge(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_n_u32))) svbool_t svcmpge(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_n_u64))) svbool_t svcmpge(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_n_u16))) svbool_t svcmpge(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_s8))) svbool_t svcmpge(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_s32))) svbool_t svcmpge(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_s64))) svbool_t svcmpge(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_s16))) svbool_t svcmpge(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_f64))) svbool_t svcmpge(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_f32))) svbool_t svcmpge(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_f16))) svbool_t svcmpge(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_u8))) svbool_t svcmpge(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_u32))) svbool_t svcmpge(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_u64))) svbool_t svcmpge(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_u16))) svbool_t svcmpge(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_wide_n_s8))) svbool_t svcmpge_wide(svbool_t, svint8_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_wide_n_s32))) svbool_t svcmpge_wide(svbool_t, svint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_wide_n_s16))) svbool_t svcmpge_wide(svbool_t, svint16_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_wide_n_u8))) svbool_t svcmpge_wide(svbool_t, svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_wide_n_u32))) svbool_t svcmpge_wide(svbool_t, svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_wide_n_u16))) svbool_t svcmpge_wide(svbool_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_wide_s8))) svbool_t svcmpge_wide(svbool_t, svint8_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_wide_s32))) svbool_t svcmpge_wide(svbool_t, svint32_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_wide_s16))) svbool_t svcmpge_wide(svbool_t, svint16_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_wide_u8))) svbool_t svcmpge_wide(svbool_t, svuint8_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_wide_u32))) svbool_t svcmpge_wide(svbool_t, svuint32_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_wide_u16))) svbool_t svcmpge_wide(svbool_t, svuint16_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_n_f64))) svbool_t svcmpgt(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_n_f32))) svbool_t svcmpgt(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_n_f16))) svbool_t svcmpgt(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_n_s8))) svbool_t svcmpgt(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_n_s32))) svbool_t svcmpgt(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_n_s64))) svbool_t svcmpgt(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_n_s16))) svbool_t svcmpgt(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_n_u8))) svbool_t svcmpgt(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_n_u32))) svbool_t svcmpgt(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_n_u64))) svbool_t svcmpgt(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_n_u16))) svbool_t svcmpgt(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_s8))) svbool_t svcmpgt(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_s32))) svbool_t svcmpgt(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_s64))) svbool_t svcmpgt(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_s16))) svbool_t svcmpgt(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_f64))) svbool_t svcmpgt(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_f32))) svbool_t svcmpgt(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_f16))) svbool_t svcmpgt(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_u8))) svbool_t svcmpgt(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_u32))) svbool_t svcmpgt(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_u64))) svbool_t svcmpgt(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_u16))) svbool_t svcmpgt(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_wide_n_s8))) svbool_t svcmpgt_wide(svbool_t, svint8_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_wide_n_s32))) svbool_t svcmpgt_wide(svbool_t, svint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_wide_n_s16))) svbool_t svcmpgt_wide(svbool_t, svint16_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_wide_n_u8))) svbool_t svcmpgt_wide(svbool_t, svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_wide_n_u32))) svbool_t svcmpgt_wide(svbool_t, svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_wide_n_u16))) svbool_t svcmpgt_wide(svbool_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_wide_s8))) svbool_t svcmpgt_wide(svbool_t, svint8_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_wide_s32))) svbool_t svcmpgt_wide(svbool_t, svint32_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_wide_s16))) svbool_t svcmpgt_wide(svbool_t, svint16_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_wide_u8))) svbool_t svcmpgt_wide(svbool_t, svuint8_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_wide_u32))) svbool_t svcmpgt_wide(svbool_t, svuint32_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_wide_u16))) svbool_t svcmpgt_wide(svbool_t, svuint16_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_n_f64))) svbool_t svcmple(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_n_f32))) svbool_t svcmple(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_n_f16))) svbool_t svcmple(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_n_s8))) svbool_t svcmple(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_n_s32))) svbool_t svcmple(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_n_s64))) svbool_t svcmple(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_n_s16))) svbool_t svcmple(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_n_u8))) svbool_t svcmple(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_n_u32))) svbool_t svcmple(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_n_u64))) svbool_t svcmple(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_n_u16))) svbool_t svcmple(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_s8))) svbool_t svcmple(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_s32))) svbool_t svcmple(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_s64))) svbool_t svcmple(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_s16))) svbool_t svcmple(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_f64))) svbool_t svcmple(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_f32))) svbool_t svcmple(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_f16))) svbool_t svcmple(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_u8))) svbool_t svcmple(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_u32))) svbool_t svcmple(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_u64))) svbool_t svcmple(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_u16))) svbool_t svcmple(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_wide_n_s8))) svbool_t svcmple_wide(svbool_t, svint8_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_wide_n_s32))) svbool_t svcmple_wide(svbool_t, svint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_wide_n_s16))) svbool_t svcmple_wide(svbool_t, svint16_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_wide_n_u8))) svbool_t svcmple_wide(svbool_t, svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_wide_n_u32))) svbool_t svcmple_wide(svbool_t, svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_wide_n_u16))) svbool_t svcmple_wide(svbool_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_wide_s8))) svbool_t svcmple_wide(svbool_t, svint8_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_wide_s32))) svbool_t svcmple_wide(svbool_t, svint32_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_wide_s16))) svbool_t svcmple_wide(svbool_t, svint16_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_wide_u8))) svbool_t svcmple_wide(svbool_t, svuint8_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_wide_u32))) svbool_t svcmple_wide(svbool_t, svuint32_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_wide_u16))) svbool_t svcmple_wide(svbool_t, svuint16_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_n_u8))) svbool_t svcmplt(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_n_u32))) svbool_t svcmplt(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_n_u64))) svbool_t svcmplt(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_n_u16))) svbool_t svcmplt(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_n_f64))) svbool_t svcmplt(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_n_f32))) svbool_t svcmplt(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_n_f16))) svbool_t svcmplt(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_n_s8))) svbool_t svcmplt(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_n_s32))) svbool_t svcmplt(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_n_s64))) svbool_t svcmplt(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_n_s16))) svbool_t svcmplt(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_u8))) svbool_t svcmplt(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_u32))) svbool_t svcmplt(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_u64))) svbool_t svcmplt(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_u16))) svbool_t svcmplt(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_s8))) svbool_t svcmplt(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_s32))) svbool_t svcmplt(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_s64))) svbool_t svcmplt(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_s16))) svbool_t svcmplt(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_f64))) svbool_t svcmplt(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_f32))) svbool_t svcmplt(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_f16))) svbool_t svcmplt(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_wide_n_u8))) svbool_t svcmplt_wide(svbool_t, svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_wide_n_u32))) svbool_t svcmplt_wide(svbool_t, svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_wide_n_u16))) svbool_t svcmplt_wide(svbool_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_wide_n_s8))) svbool_t svcmplt_wide(svbool_t, svint8_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_wide_n_s32))) svbool_t svcmplt_wide(svbool_t, svint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_wide_n_s16))) svbool_t svcmplt_wide(svbool_t, svint16_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_wide_u8))) svbool_t svcmplt_wide(svbool_t, svuint8_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_wide_u32))) svbool_t svcmplt_wide(svbool_t, svuint32_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_wide_u16))) svbool_t svcmplt_wide(svbool_t, svuint16_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_wide_s8))) svbool_t svcmplt_wide(svbool_t, svint8_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_wide_s32))) svbool_t svcmplt_wide(svbool_t, svint32_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_wide_s16))) svbool_t svcmplt_wide(svbool_t, svint16_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_n_f64))) svbool_t svcmpne(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_n_f32))) svbool_t svcmpne(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_n_f16))) svbool_t svcmpne(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_n_u8))) svbool_t svcmpne(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_n_u32))) svbool_t svcmpne(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_n_u64))) svbool_t svcmpne(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_n_u16))) svbool_t svcmpne(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_n_s8))) svbool_t svcmpne(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_n_s32))) svbool_t svcmpne(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_n_s64))) svbool_t svcmpne(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_n_s16))) svbool_t svcmpne(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_u8))) svbool_t svcmpne(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_u32))) svbool_t svcmpne(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_u64))) svbool_t svcmpne(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_u16))) svbool_t svcmpne(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_s8))) svbool_t svcmpne(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_s32))) svbool_t svcmpne(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_s64))) svbool_t svcmpne(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_s16))) svbool_t svcmpne(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_f64))) svbool_t svcmpne(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_f32))) svbool_t svcmpne(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_f16))) svbool_t svcmpne(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_wide_n_s8))) svbool_t svcmpne_wide(svbool_t, svint8_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_wide_n_s32))) svbool_t svcmpne_wide(svbool_t, svint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_wide_n_s16))) svbool_t svcmpne_wide(svbool_t, svint16_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_wide_s8))) svbool_t svcmpne_wide(svbool_t, svint8_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_wide_s32))) svbool_t svcmpne_wide(svbool_t, svint32_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_wide_s16))) svbool_t svcmpne_wide(svbool_t, svint16_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpuo_n_f64))) svbool_t svcmpuo(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpuo_n_f32))) svbool_t svcmpuo(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpuo_n_f16))) svbool_t svcmpuo(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpuo_f64))) svbool_t svcmpuo(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpuo_f32))) svbool_t svcmpuo(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpuo_f16))) svbool_t svcmpuo(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_u8_m))) svuint8_t svcnot_m(svuint8_t, svbool_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_u32_m))) svuint32_t svcnot_m(svuint32_t, svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_u64_m))) svuint64_t svcnot_m(svuint64_t, svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_u16_m))) svuint16_t svcnot_m(svuint16_t, svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_s8_m))) svint8_t svcnot_m(svint8_t, svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_s32_m))) svint32_t svcnot_m(svint32_t, svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_s64_m))) svint64_t svcnot_m(svint64_t, svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_s16_m))) svint16_t svcnot_m(svint16_t, svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_u8_x))) svuint8_t svcnot_x(svbool_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_u32_x))) svuint32_t svcnot_x(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_u64_x))) svuint64_t svcnot_x(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_u16_x))) svuint16_t svcnot_x(svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_s8_x))) svint8_t svcnot_x(svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_s32_x))) svint32_t svcnot_x(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_s64_x))) svint64_t svcnot_x(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_s16_x))) svint16_t svcnot_x(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_u8_z))) svuint8_t svcnot_z(svbool_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_u32_z))) svuint32_t svcnot_z(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_u64_z))) svuint64_t svcnot_z(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_u16_z))) svuint16_t svcnot_z(svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_s8_z))) svint8_t svcnot_z(svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_s32_z))) svint32_t svcnot_z(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_s64_z))) svint64_t svcnot_z(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_s16_z))) svint16_t svcnot_z(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_u8_m))) svuint8_t svcnt_m(svuint8_t, svbool_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_u32_m))) svuint32_t svcnt_m(svuint32_t, svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_u64_m))) svuint64_t svcnt_m(svuint64_t, svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_u16_m))) svuint16_t svcnt_m(svuint16_t, svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_s8_m))) svuint8_t svcnt_m(svuint8_t, svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_f64_m))) svuint64_t svcnt_m(svuint64_t, svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_f32_m))) svuint32_t svcnt_m(svuint32_t, svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_f16_m))) svuint16_t svcnt_m(svuint16_t, svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_s32_m))) svuint32_t svcnt_m(svuint32_t, svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_s64_m))) svuint64_t svcnt_m(svuint64_t, svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_s16_m))) svuint16_t svcnt_m(svuint16_t, svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_u8_x))) svuint8_t svcnt_x(svbool_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_u32_x))) svuint32_t svcnt_x(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_u64_x))) svuint64_t svcnt_x(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_u16_x))) svuint16_t svcnt_x(svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_s8_x))) svuint8_t svcnt_x(svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_f64_x))) svuint64_t svcnt_x(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_f32_x))) svuint32_t svcnt_x(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_f16_x))) svuint16_t svcnt_x(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_s32_x))) svuint32_t svcnt_x(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_s64_x))) svuint64_t svcnt_x(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_s16_x))) svuint16_t svcnt_x(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_u8_z))) svuint8_t svcnt_z(svbool_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_u32_z))) svuint32_t svcnt_z(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_u64_z))) svuint64_t svcnt_z(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_u16_z))) svuint16_t svcnt_z(svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_s8_z))) svuint8_t svcnt_z(svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_f64_z))) svuint64_t svcnt_z(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_f32_z))) svuint32_t svcnt_z(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_f16_z))) svuint16_t svcnt_z(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_s32_z))) svuint32_t svcnt_z(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_s64_z))) svuint64_t svcnt_z(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_s16_z))) svuint16_t svcnt_z(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_u32))) svuint32_t svcompact(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_u64))) svuint64_t svcompact(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_f64))) svfloat64_t svcompact(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_f32))) svfloat32_t svcompact(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_s32))) svint32_t svcompact(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_s64))) svint64_t svcompact(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_u8))) svuint8x2_t svcreate2(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_u32))) svuint32x2_t svcreate2(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_u64))) svuint64x2_t svcreate2(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_u16))) svuint16x2_t svcreate2(svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_s8))) svint8x2_t svcreate2(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_f64))) svfloat64x2_t svcreate2(svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_f32))) svfloat32x2_t svcreate2(svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_f16))) svfloat16x2_t svcreate2(svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_s32))) svint32x2_t svcreate2(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_s64))) svint64x2_t svcreate2(svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_s16))) svint16x2_t svcreate2(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate3_u8))) svuint8x3_t svcreate3(svuint8_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate3_u32))) svuint32x3_t svcreate3(svuint32_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate3_u64))) svuint64x3_t svcreate3(svuint64_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate3_u16))) svuint16x3_t svcreate3(svuint16_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate3_s8))) svint8x3_t svcreate3(svint8_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate3_f64))) svfloat64x3_t svcreate3(svfloat64_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate3_f32))) svfloat32x3_t svcreate3(svfloat32_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate3_f16))) svfloat16x3_t svcreate3(svfloat16_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate3_s32))) svint32x3_t svcreate3(svint32_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate3_s64))) svint64x3_t svcreate3(svint64_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate3_s16))) svint16x3_t svcreate3(svint16_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_u8))) svuint8x4_t svcreate4(svuint8_t, svuint8_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_u32))) svuint32x4_t svcreate4(svuint32_t, svuint32_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_u64))) svuint64x4_t svcreate4(svuint64_t, svuint64_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_u16))) svuint16x4_t svcreate4(svuint16_t, svuint16_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_s8))) svint8x4_t svcreate4(svint8_t, svint8_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_f64))) svfloat64x4_t svcreate4(svfloat64_t, svfloat64_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_f32))) svfloat32x4_t svcreate4(svfloat32_t, svfloat32_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_f16))) svfloat16x4_t svcreate4(svfloat16_t, svfloat16_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_s32))) svint32x4_t svcreate4(svint32_t, svint32_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_s64))) svint64x4_t svcreate4(svint64_t, svint64_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_s16))) svint16x4_t svcreate4(svint16_t, svint16_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_f32_m))) svfloat16_t svcvt_f16_m(svfloat16_t, svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_f32_x))) svfloat16_t svcvt_f16_x(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_f32_z))) svfloat16_t svcvt_f16_z(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_f64_m))) svfloat16_t svcvt_f16_m(svfloat16_t, svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_f64_x))) svfloat16_t svcvt_f16_x(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_f64_z))) svfloat16_t svcvt_f16_z(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_s16_m))) svfloat16_t svcvt_f16_m(svfloat16_t, svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_s16_x))) svfloat16_t svcvt_f16_x(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_s16_z))) svfloat16_t svcvt_f16_z(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_s32_m))) svfloat16_t svcvt_f16_m(svfloat16_t, svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_s32_x))) svfloat16_t svcvt_f16_x(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_s32_z))) svfloat16_t svcvt_f16_z(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_s64_m))) svfloat16_t svcvt_f16_m(svfloat16_t, svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_s64_x))) svfloat16_t svcvt_f16_x(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_s64_z))) svfloat16_t svcvt_f16_z(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_u16_m))) svfloat16_t svcvt_f16_m(svfloat16_t, svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_u16_x))) svfloat16_t svcvt_f16_x(svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_u16_z))) svfloat16_t svcvt_f16_z(svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_u32_m))) svfloat16_t svcvt_f16_m(svfloat16_t, svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_u32_x))) svfloat16_t svcvt_f16_x(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_u32_z))) svfloat16_t svcvt_f16_z(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_u64_m))) svfloat16_t svcvt_f16_m(svfloat16_t, svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_u64_x))) svfloat16_t svcvt_f16_x(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_u64_z))) svfloat16_t svcvt_f16_z(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_f16_m))) svfloat32_t svcvt_f32_m(svfloat32_t, svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_f16_x))) svfloat32_t svcvt_f32_x(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_f16_z))) svfloat32_t svcvt_f32_z(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_f64_m))) svfloat32_t svcvt_f32_m(svfloat32_t, svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_f64_x))) svfloat32_t svcvt_f32_x(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_f64_z))) svfloat32_t svcvt_f32_z(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_s32_m))) svfloat32_t svcvt_f32_m(svfloat32_t, svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_s32_x))) svfloat32_t svcvt_f32_x(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_s32_z))) svfloat32_t svcvt_f32_z(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_s64_m))) svfloat32_t svcvt_f32_m(svfloat32_t, svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_s64_x))) svfloat32_t svcvt_f32_x(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_s64_z))) svfloat32_t svcvt_f32_z(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_u32_m))) svfloat32_t svcvt_f32_m(svfloat32_t, svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_u32_x))) svfloat32_t svcvt_f32_x(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_u32_z))) svfloat32_t svcvt_f32_z(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_u64_m))) svfloat32_t svcvt_f32_m(svfloat32_t, svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_u64_x))) svfloat32_t svcvt_f32_x(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_u64_z))) svfloat32_t svcvt_f32_z(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f64_f16_m))) svfloat64_t svcvt_f64_m(svfloat64_t, svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f64_f16_x))) svfloat64_t svcvt_f64_x(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f64_f16_z))) svfloat64_t svcvt_f64_z(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f64_f32_m))) svfloat64_t svcvt_f64_m(svfloat64_t, svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f64_f32_x))) svfloat64_t svcvt_f64_x(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f64_f32_z))) svfloat64_t svcvt_f64_z(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f64_s32_m))) svfloat64_t svcvt_f64_m(svfloat64_t, svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f64_s32_x))) svfloat64_t svcvt_f64_x(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f64_s32_z))) svfloat64_t svcvt_f64_z(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f64_s64_m))) svfloat64_t svcvt_f64_m(svfloat64_t, svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f64_s64_x))) svfloat64_t svcvt_f64_x(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f64_s64_z))) svfloat64_t svcvt_f64_z(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f64_u32_m))) svfloat64_t svcvt_f64_m(svfloat64_t, svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f64_u32_x))) svfloat64_t svcvt_f64_x(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f64_u32_z))) svfloat64_t svcvt_f64_z(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f64_u64_m))) svfloat64_t svcvt_f64_m(svfloat64_t, svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f64_u64_x))) svfloat64_t svcvt_f64_x(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f64_u64_z))) svfloat64_t svcvt_f64_z(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s16_f16_m))) svint16_t svcvt_s16_m(svint16_t, svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s16_f16_x))) svint16_t svcvt_s16_x(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s16_f16_z))) svint16_t svcvt_s16_z(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s32_f16_m))) svint32_t svcvt_s32_m(svint32_t, svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s32_f16_x))) svint32_t svcvt_s32_x(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s32_f16_z))) svint32_t svcvt_s32_z(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s32_f32_m))) svint32_t svcvt_s32_m(svint32_t, svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s32_f32_x))) svint32_t svcvt_s32_x(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s32_f32_z))) svint32_t svcvt_s32_z(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s32_f64_m))) svint32_t svcvt_s32_m(svint32_t, svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s32_f64_x))) svint32_t svcvt_s32_x(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s32_f64_z))) svint32_t svcvt_s32_z(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s64_f16_m))) svint64_t svcvt_s64_m(svint64_t, svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s64_f16_x))) svint64_t svcvt_s64_x(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s64_f16_z))) svint64_t svcvt_s64_z(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s64_f32_m))) svint64_t svcvt_s64_m(svint64_t, svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s64_f32_x))) svint64_t svcvt_s64_x(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s64_f32_z))) svint64_t svcvt_s64_z(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s64_f64_m))) svint64_t svcvt_s64_m(svint64_t, svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s64_f64_x))) svint64_t svcvt_s64_x(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s64_f64_z))) svint64_t svcvt_s64_z(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u16_f16_m))) svuint16_t svcvt_u16_m(svuint16_t, svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u16_f16_x))) svuint16_t svcvt_u16_x(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u16_f16_z))) svuint16_t svcvt_u16_z(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u32_f16_m))) svuint32_t svcvt_u32_m(svuint32_t, svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u32_f16_x))) svuint32_t svcvt_u32_x(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u32_f16_z))) svuint32_t svcvt_u32_z(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u32_f32_m))) svuint32_t svcvt_u32_m(svuint32_t, svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u32_f32_x))) svuint32_t svcvt_u32_x(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u32_f32_z))) svuint32_t svcvt_u32_z(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u32_f64_m))) svuint32_t svcvt_u32_m(svuint32_t, svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u32_f64_x))) svuint32_t svcvt_u32_x(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u32_f64_z))) svuint32_t svcvt_u32_z(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u64_f16_m))) svuint64_t svcvt_u64_m(svuint64_t, svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u64_f16_x))) svuint64_t svcvt_u64_x(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u64_f16_z))) svuint64_t svcvt_u64_z(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u64_f32_m))) svuint64_t svcvt_u64_m(svuint64_t, svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u64_f32_x))) svuint64_t svcvt_u64_x(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u64_f32_z))) svuint64_t svcvt_u64_z(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u64_f64_m))) svuint64_t svcvt_u64_m(svuint64_t, svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u64_f64_x))) svuint64_t svcvt_u64_x(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u64_f64_z))) svuint64_t svcvt_u64_z(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_f64_m))) svfloat64_t svdiv_m(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_f32_m))) svfloat32_t svdiv_m(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_f16_m))) svfloat16_t svdiv_m(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_f64_x))) svfloat64_t svdiv_x(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_f32_x))) svfloat32_t svdiv_x(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_f16_x))) svfloat16_t svdiv_x(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_f64_z))) svfloat64_t svdiv_z(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_f32_z))) svfloat32_t svdiv_z(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_f16_z))) svfloat16_t svdiv_z(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_s32_m))) svint32_t svdiv_m(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_s64_m))) svint64_t svdiv_m(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_s32_x))) svint32_t svdiv_x(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_s64_x))) svint64_t svdiv_x(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_s32_z))) svint32_t svdiv_z(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_s64_z))) svint64_t svdiv_z(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_u32_m))) svuint32_t svdiv_m(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_u64_m))) svuint64_t svdiv_m(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_u32_x))) svuint32_t svdiv_x(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_u64_x))) svuint64_t svdiv_x(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_u32_z))) svuint32_t svdiv_z(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_u64_z))) svuint64_t svdiv_z(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_f64_m))) svfloat64_t svdiv_m(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_f32_m))) svfloat32_t svdiv_m(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_f16_m))) svfloat16_t svdiv_m(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_f64_x))) svfloat64_t svdiv_x(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_f32_x))) svfloat32_t svdiv_x(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_f16_x))) svfloat16_t svdiv_x(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_f64_z))) svfloat64_t svdiv_z(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_f32_z))) svfloat32_t svdiv_z(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_f16_z))) svfloat16_t svdiv_z(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_s32_m))) svint32_t svdiv_m(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_s64_m))) svint64_t svdiv_m(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_s32_x))) svint32_t svdiv_x(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_s64_x))) svint64_t svdiv_x(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_s32_z))) svint32_t svdiv_z(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_s64_z))) svint64_t svdiv_z(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_u32_m))) svuint32_t svdiv_m(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_u64_m))) svuint64_t svdiv_m(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_u32_x))) svuint32_t svdiv_x(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_u64_x))) svuint64_t svdiv_x(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_u32_z))) svuint32_t svdiv_z(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_u64_z))) svuint64_t svdiv_z(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_f64_m))) svfloat64_t svdivr_m(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_f32_m))) svfloat32_t svdivr_m(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_f16_m))) svfloat16_t svdivr_m(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_f64_x))) svfloat64_t svdivr_x(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_f32_x))) svfloat32_t svdivr_x(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_f16_x))) svfloat16_t svdivr_x(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_f64_z))) svfloat64_t svdivr_z(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_f32_z))) svfloat32_t svdivr_z(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_f16_z))) svfloat16_t svdivr_z(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_s32_m))) svint32_t svdivr_m(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_s64_m))) svint64_t svdivr_m(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_s32_x))) svint32_t svdivr_x(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_s64_x))) svint64_t svdivr_x(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_s32_z))) svint32_t svdivr_z(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_s64_z))) svint64_t svdivr_z(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_u32_m))) svuint32_t svdivr_m(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_u64_m))) svuint64_t svdivr_m(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_u32_x))) svuint32_t svdivr_x(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_u64_x))) svuint64_t svdivr_x(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_u32_z))) svuint32_t svdivr_z(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_u64_z))) svuint64_t svdivr_z(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_f64_m))) svfloat64_t svdivr_m(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_f32_m))) svfloat32_t svdivr_m(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_f16_m))) svfloat16_t svdivr_m(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_f64_x))) svfloat64_t svdivr_x(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_f32_x))) svfloat32_t svdivr_x(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_f16_x))) svfloat16_t svdivr_x(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_f64_z))) svfloat64_t svdivr_z(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_f32_z))) svfloat32_t svdivr_z(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_f16_z))) svfloat16_t svdivr_z(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_s32_m))) svint32_t svdivr_m(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_s64_m))) svint64_t svdivr_m(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_s32_x))) svint32_t svdivr_x(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_s64_x))) svint64_t svdivr_x(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_s32_z))) svint32_t svdivr_z(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_s64_z))) svint64_t svdivr_z(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_u32_m))) svuint32_t svdivr_m(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_u64_m))) svuint64_t svdivr_m(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_u32_x))) svuint32_t svdivr_x(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_u64_x))) svuint64_t svdivr_x(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_u32_z))) svuint32_t svdivr_z(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_u64_z))) svuint64_t svdivr_z(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_n_s32))) svint32_t svdot(svint32_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_n_s64))) svint64_t svdot(svint64_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_n_u32))) svuint32_t svdot(svuint32_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_n_u64))) svuint64_t svdot(svuint64_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_s32))) svint32_t svdot(svint32_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_s64))) svint64_t svdot(svint64_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_u32))) svuint32_t svdot(svuint32_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_u64))) svuint64_t svdot(svuint64_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_lane_s32))) svint32_t svdot_lane(svint32_t, svint8_t, svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_lane_s64))) svint64_t svdot_lane(svint64_t, svint16_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_lane_u32))) svuint32_t svdot_lane(svuint32_t, svuint8_t, svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_lane_u64))) svuint64_t svdot_lane(svuint64_t, svuint16_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u8))) svuint8_t svdup_u8(uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u32))) svuint32_t svdup_u32(uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u64))) svuint64_t svdup_u64(uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u16))) svuint16_t svdup_u16(uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_s8))) svint8_t svdup_s8(int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_f64))) svfloat64_t svdup_f64(float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_f32))) svfloat32_t svdup_f32(float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_f16))) svfloat16_t svdup_f16(float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_s32))) svint32_t svdup_s32(int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_s64))) svint64_t svdup_s64(int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_s16))) svint16_t svdup_s16(int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u8_m))) svuint8_t svdup_u8_m(svuint8_t, svbool_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u32_m))) svuint32_t svdup_u32_m(svuint32_t, svbool_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u64_m))) svuint64_t svdup_u64_m(svuint64_t, svbool_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u16_m))) svuint16_t svdup_u16_m(svuint16_t, svbool_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_s8_m))) svint8_t svdup_s8_m(svint8_t, svbool_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_f64_m))) svfloat64_t svdup_f64_m(svfloat64_t, svbool_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_f32_m))) svfloat32_t svdup_f32_m(svfloat32_t, svbool_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_f16_m))) svfloat16_t svdup_f16_m(svfloat16_t, svbool_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_s32_m))) svint32_t svdup_s32_m(svint32_t, svbool_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_s64_m))) svint64_t svdup_s64_m(svint64_t, svbool_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_s16_m))) svint16_t svdup_s16_m(svint16_t, svbool_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_b8))) svbool_t svdup_b8(bool); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_b32))) svbool_t svdup_b32(bool); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_b64))) svbool_t svdup_b64(bool); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_b16))) svbool_t svdup_b16(bool); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u8_x))) svuint8_t svdup_u8_x(svbool_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u32_x))) svuint32_t svdup_u32_x(svbool_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u64_x))) svuint64_t svdup_u64_x(svbool_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u16_x))) svuint16_t svdup_u16_x(svbool_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_s8_x))) svint8_t svdup_s8_x(svbool_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_f64_x))) svfloat64_t svdup_f64_x(svbool_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_f32_x))) svfloat32_t svdup_f32_x(svbool_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_f16_x))) svfloat16_t svdup_f16_x(svbool_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_s32_x))) svint32_t svdup_s32_x(svbool_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_s64_x))) svint64_t svdup_s64_x(svbool_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_s16_x))) svint16_t svdup_s16_x(svbool_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u8_z))) svuint8_t svdup_u8_z(svbool_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u32_z))) svuint32_t svdup_u32_z(svbool_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u64_z))) svuint64_t svdup_u64_z(svbool_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u16_z))) svuint16_t svdup_u16_z(svbool_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_s8_z))) svint8_t svdup_s8_z(svbool_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_f64_z))) svfloat64_t svdup_f64_z(svbool_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_f32_z))) svfloat32_t svdup_f32_z(svbool_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_f16_z))) svfloat16_t svdup_f16_z(svbool_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_s32_z))) svint32_t svdup_s32_z(svbool_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_s64_z))) svint64_t svdup_s64_z(svbool_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_s16_z))) svint16_t svdup_s16_z(svbool_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_lane_u8))) svuint8_t svdup_lane(svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_lane_u32))) svuint32_t svdup_lane(svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_lane_u64))) svuint64_t svdup_lane(svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_lane_u16))) svuint16_t svdup_lane(svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_lane_s8))) svint8_t svdup_lane(svint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_lane_f64))) svfloat64_t svdup_lane(svfloat64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_lane_f32))) svfloat32_t svdup_lane(svfloat32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_lane_f16))) svfloat16_t svdup_lane(svfloat16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_lane_s32))) svint32_t svdup_lane(svint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_lane_s64))) svint64_t svdup_lane(svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_lane_s16))) svint16_t svdup_lane(svint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_u8))) svuint8_t svdupq_u8(uint8_t, uint8_t, uint8_t, uint8_t, uint8_t, uint8_t, uint8_t, uint8_t, uint8_t, uint8_t, uint8_t, uint8_t, uint8_t, uint8_t, uint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_s8))) svint8_t svdupq_s8(int8_t, int8_t, int8_t, int8_t, int8_t, int8_t, int8_t, int8_t, int8_t, int8_t, int8_t, int8_t, int8_t, int8_t, int8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_u16))) svuint16_t svdupq_u16(uint16_t, uint16_t, uint16_t, uint16_t, uint16_t, uint16_t, uint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_f16))) svfloat16_t svdupq_f16(float16_t, float16_t, float16_t, float16_t, float16_t, float16_t, float16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_s16))) svint16_t svdupq_s16(int16_t, int16_t, int16_t, int16_t, int16_t, int16_t, int16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_u32))) svuint32_t svdupq_u32(uint32_t, uint32_t, uint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_f32))) svfloat32_t svdupq_f32(float32_t, float32_t, float32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_s32))) svint32_t svdupq_s32(int32_t, int32_t, int32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_u64))) svuint64_t svdupq_u64(uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_f64))) svfloat64_t svdupq_f64(float64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_s64))) svint64_t svdupq_s64(int64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_b8))) svbool_t svdupq_b8(bool, bool, bool, bool, bool, bool, bool, bool, bool, bool, bool, bool, bool, bool, bool, bool); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_b16))) svbool_t svdupq_b16(bool, bool, bool, bool, bool, bool, bool, bool); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_b32))) svbool_t svdupq_b32(bool, bool, bool, bool); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_b64))) svbool_t svdupq_b64(bool, bool); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_lane_u8))) svuint8_t svdupq_lane(svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_lane_u32))) svuint32_t svdupq_lane(svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_lane_u64))) svuint64_t svdupq_lane(svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_lane_u16))) svuint16_t svdupq_lane(svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_lane_s8))) svint8_t svdupq_lane(svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_lane_f64))) svfloat64_t svdupq_lane(svfloat64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_lane_f32))) svfloat32_t svdupq_lane(svfloat32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_lane_f16))) svfloat16_t svdupq_lane(svfloat16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_lane_s32))) svint32_t svdupq_lane(svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_lane_s64))) svint64_t svdupq_lane(svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_lane_s16))) svint16_t svdupq_lane(svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_b_z))) svbool_t sveor_z(svbool_t, svbool_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_u8_m))) svuint8_t sveor_m(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_u32_m))) svuint32_t sveor_m(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_u64_m))) svuint64_t sveor_m(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_u16_m))) svuint16_t sveor_m(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_s8_m))) svint8_t sveor_m(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_s32_m))) svint32_t sveor_m(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_s64_m))) svint64_t sveor_m(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_s16_m))) svint16_t sveor_m(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_u8_x))) svuint8_t sveor_x(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_u32_x))) svuint32_t sveor_x(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_u64_x))) svuint64_t sveor_x(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_u16_x))) svuint16_t sveor_x(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_s8_x))) svint8_t sveor_x(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_s32_x))) svint32_t sveor_x(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_s64_x))) svint64_t sveor_x(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_s16_x))) svint16_t sveor_x(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_u8_z))) svuint8_t sveor_z(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_u32_z))) svuint32_t sveor_z(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_u64_z))) svuint64_t sveor_z(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_u16_z))) svuint16_t sveor_z(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_s8_z))) svint8_t sveor_z(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_s32_z))) svint32_t sveor_z(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_s64_z))) svint64_t sveor_z(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_s16_z))) svint16_t sveor_z(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_u8_m))) svuint8_t sveor_m(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_u32_m))) svuint32_t sveor_m(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_u64_m))) svuint64_t sveor_m(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_u16_m))) svuint16_t sveor_m(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_s8_m))) svint8_t sveor_m(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_s32_m))) svint32_t sveor_m(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_s64_m))) svint64_t sveor_m(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_s16_m))) svint16_t sveor_m(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_u8_x))) svuint8_t sveor_x(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_u32_x))) svuint32_t sveor_x(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_u64_x))) svuint64_t sveor_x(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_u16_x))) svuint16_t sveor_x(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_s8_x))) svint8_t sveor_x(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_s32_x))) svint32_t sveor_x(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_s64_x))) svint64_t sveor_x(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_s16_x))) svint16_t sveor_x(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_u8_z))) svuint8_t sveor_z(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_u32_z))) svuint32_t sveor_z(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_u64_z))) svuint64_t sveor_z(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_u16_z))) svuint16_t sveor_z(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_s8_z))) svint8_t sveor_z(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_s32_z))) svint32_t sveor_z(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_s64_z))) svint64_t sveor_z(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_s16_z))) svint16_t sveor_z(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorv_u8))) uint8_t sveorv(svbool_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorv_u32))) uint32_t sveorv(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorv_u64))) uint64_t sveorv(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorv_u16))) uint16_t sveorv(svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorv_s8))) int8_t sveorv(svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorv_s32))) int32_t sveorv(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorv_s64))) int64_t sveorv(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorv_s16))) int16_t sveorv(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpa_f64))) svfloat64_t svexpa(svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpa_f32))) svfloat32_t svexpa(svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpa_f16))) svfloat16_t svexpa(svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svext_u8))) svuint8_t svext(svuint8_t, svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svext_u32))) svuint32_t svext(svuint32_t, svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svext_u64))) svuint64_t svext(svuint64_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svext_u16))) svuint16_t svext(svuint16_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svext_s8))) svint8_t svext(svint8_t, svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svext_f64))) svfloat64_t svext(svfloat64_t, svfloat64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svext_f32))) svfloat32_t svext(svfloat32_t, svfloat32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svext_f16))) svfloat16_t svext(svfloat16_t, svfloat16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svext_s32))) svint32_t svext(svint32_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svext_s64))) svint64_t svext(svint64_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svext_s16))) svint16_t svext(svint16_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextb_s32_m))) svint32_t svextb_m(svint32_t, svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextb_s64_m))) svint64_t svextb_m(svint64_t, svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextb_s16_m))) svint16_t svextb_m(svint16_t, svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextb_s32_x))) svint32_t svextb_x(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextb_s64_x))) svint64_t svextb_x(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextb_s16_x))) svint16_t svextb_x(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextb_s32_z))) svint32_t svextb_z(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextb_s64_z))) svint64_t svextb_z(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextb_s16_z))) svint16_t svextb_z(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextb_u32_m))) svuint32_t svextb_m(svuint32_t, svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextb_u64_m))) svuint64_t svextb_m(svuint64_t, svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextb_u16_m))) svuint16_t svextb_m(svuint16_t, svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextb_u32_x))) svuint32_t svextb_x(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextb_u64_x))) svuint64_t svextb_x(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextb_u16_x))) svuint16_t svextb_x(svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextb_u32_z))) svuint32_t svextb_z(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextb_u64_z))) svuint64_t svextb_z(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextb_u16_z))) svuint16_t svextb_z(svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexth_s32_m))) svint32_t svexth_m(svint32_t, svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexth_s64_m))) svint64_t svexth_m(svint64_t, svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexth_s32_x))) svint32_t svexth_x(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexth_s64_x))) svint64_t svexth_x(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexth_s32_z))) svint32_t svexth_z(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexth_s64_z))) svint64_t svexth_z(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexth_u32_m))) svuint32_t svexth_m(svuint32_t, svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexth_u64_m))) svuint64_t svexth_m(svuint64_t, svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexth_u32_x))) svuint32_t svexth_x(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexth_u64_x))) svuint64_t svexth_x(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexth_u32_z))) svuint32_t svexth_z(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexth_u64_z))) svuint64_t svexth_z(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextw_s64_m))) svint64_t svextw_m(svint64_t, svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextw_s64_x))) svint64_t svextw_x(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextw_s64_z))) svint64_t svextw_z(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextw_u64_m))) svuint64_t svextw_m(svuint64_t, svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextw_u64_x))) svuint64_t svextw_x(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextw_u64_z))) svuint64_t svextw_z(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_u8))) svuint8_t svget2(svuint8x2_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_u32))) svuint32_t svget2(svuint32x2_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_u64))) svuint64_t svget2(svuint64x2_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_u16))) svuint16_t svget2(svuint16x2_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_s8))) svint8_t svget2(svint8x2_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_f64))) svfloat64_t svget2(svfloat64x2_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_f32))) svfloat32_t svget2(svfloat32x2_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_f16))) svfloat16_t svget2(svfloat16x2_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_s32))) svint32_t svget2(svint32x2_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_s64))) svint64_t svget2(svint64x2_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_s16))) svint16_t svget2(svint16x2_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget3_u8))) svuint8_t svget3(svuint8x3_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget3_u32))) svuint32_t svget3(svuint32x3_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget3_u64))) svuint64_t svget3(svuint64x3_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget3_u16))) svuint16_t svget3(svuint16x3_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget3_s8))) svint8_t svget3(svint8x3_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget3_f64))) svfloat64_t svget3(svfloat64x3_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget3_f32))) svfloat32_t svget3(svfloat32x3_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget3_f16))) svfloat16_t svget3(svfloat16x3_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget3_s32))) svint32_t svget3(svint32x3_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget3_s64))) svint64_t svget3(svint64x3_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget3_s16))) svint16_t svget3(svint16x3_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_u8))) svuint8_t svget4(svuint8x4_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_u32))) svuint32_t svget4(svuint32x4_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_u64))) svuint64_t svget4(svuint64x4_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_u16))) svuint16_t svget4(svuint16x4_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_s8))) svint8_t svget4(svint8x4_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_f64))) svfloat64_t svget4(svfloat64x4_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_f32))) svfloat32_t svget4(svfloat32x4_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_f16))) svfloat16_t svget4(svfloat16x4_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_s32))) svint32_t svget4(svint32x4_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_s64))) svint64_t svget4(svint64x4_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_s16))) svint16_t svget4(svint16x4_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svinsr_n_u8))) svuint8_t svinsr(svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svinsr_n_u32))) svuint32_t svinsr(svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svinsr_n_u64))) svuint64_t svinsr(svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svinsr_n_u16))) svuint16_t svinsr(svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svinsr_n_s8))) svint8_t svinsr(svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svinsr_n_f64))) svfloat64_t svinsr(svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svinsr_n_f32))) svfloat32_t svinsr(svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svinsr_n_f16))) svfloat16_t svinsr(svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svinsr_n_s32))) svint32_t svinsr(svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svinsr_n_s64))) svint64_t svinsr(svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svinsr_n_s16))) svint16_t svinsr(svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlasta_u8))) uint8_t svlasta(svbool_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlasta_u32))) uint32_t svlasta(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlasta_u64))) uint64_t svlasta(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlasta_u16))) uint16_t svlasta(svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlasta_s8))) int8_t svlasta(svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlasta_f64))) float64_t svlasta(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlasta_f32))) float32_t svlasta(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlasta_f16))) float16_t svlasta(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlasta_s32))) int32_t svlasta(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlasta_s64))) int64_t svlasta(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlasta_s16))) int16_t svlasta(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlastb_u8))) uint8_t svlastb(svbool_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlastb_u32))) uint32_t svlastb(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlastb_u64))) uint64_t svlastb(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlastb_u16))) uint16_t svlastb(svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlastb_s8))) int8_t svlastb(svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlastb_f64))) float64_t svlastb(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlastb_f32))) float32_t svlastb(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlastb_f16))) float16_t svlastb(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlastb_s32))) int32_t svlastb(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlastb_s64))) int64_t svlastb(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlastb_s16))) int16_t svlastb(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u8))) svuint8_t svld1(svbool_t, uint8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u32))) svuint32_t svld1(svbool_t, uint32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u64))) svuint64_t svld1(svbool_t, uint64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u16))) svuint16_t svld1(svbool_t, uint16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s8))) svint8_t svld1(svbool_t, int8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f64))) svfloat64_t svld1(svbool_t, float64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f32))) svfloat32_t svld1(svbool_t, float32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f16))) svfloat16_t svld1(svbool_t, float16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s32))) svint32_t svld1(svbool_t, int32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s64))) svint64_t svld1(svbool_t, int64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s16))) svint16_t svld1(svbool_t, int16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u32base_index_u32))) svuint32_t svld1_gather_index_u32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u64base_index_u64))) svuint64_t svld1_gather_index_u64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u64base_index_f64))) svfloat64_t svld1_gather_index_f64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u32base_index_f32))) svfloat32_t svld1_gather_index_f32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u32base_index_s32))) svint32_t svld1_gather_index_s32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u64base_index_s64))) svint64_t svld1_gather_index_s64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u32base_offset_u32))) svuint32_t svld1_gather_offset_u32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u64base_offset_u64))) svuint64_t svld1_gather_offset_u64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u64base_offset_f64))) svfloat64_t svld1_gather_offset_f64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u32base_offset_f32))) svfloat32_t svld1_gather_offset_f32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u32base_offset_s32))) svint32_t svld1_gather_offset_s32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u64base_offset_s64))) svint64_t svld1_gather_offset_s64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u32base_u32))) svuint32_t svld1_gather_u32(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u64base_u64))) svuint64_t svld1_gather_u64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u64base_f64))) svfloat64_t svld1_gather_f64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u32base_f32))) svfloat32_t svld1_gather_f32(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u32base_s32))) svint32_t svld1_gather_s32(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u64base_s64))) svint64_t svld1_gather_s64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_s32index_u32))) svuint32_t svld1_gather_index(svbool_t, uint32_t const *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_s32index_f32))) svfloat32_t svld1_gather_index(svbool_t, float32_t const *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_s32index_s32))) svint32_t svld1_gather_index(svbool_t, int32_t const *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u32index_u32))) svuint32_t svld1_gather_index(svbool_t, uint32_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u32index_f32))) svfloat32_t svld1_gather_index(svbool_t, float32_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u32index_s32))) svint32_t svld1_gather_index(svbool_t, int32_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_s64index_u64))) svuint64_t svld1_gather_index(svbool_t, uint64_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_s64index_f64))) svfloat64_t svld1_gather_index(svbool_t, float64_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_s64index_s64))) svint64_t svld1_gather_index(svbool_t, int64_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u64index_u64))) svuint64_t svld1_gather_index(svbool_t, uint64_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u64index_f64))) svfloat64_t svld1_gather_index(svbool_t, float64_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u64index_s64))) svint64_t svld1_gather_index(svbool_t, int64_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_s32offset_u32))) svuint32_t svld1_gather_offset(svbool_t, uint32_t const *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_s32offset_f32))) svfloat32_t svld1_gather_offset(svbool_t, float32_t const *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_s32offset_s32))) svint32_t svld1_gather_offset(svbool_t, int32_t const *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u32offset_u32))) svuint32_t svld1_gather_offset(svbool_t, uint32_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u32offset_f32))) svfloat32_t svld1_gather_offset(svbool_t, float32_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u32offset_s32))) svint32_t svld1_gather_offset(svbool_t, int32_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_s64offset_u64))) svuint64_t svld1_gather_offset(svbool_t, uint64_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_s64offset_f64))) svfloat64_t svld1_gather_offset(svbool_t, float64_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_s64offset_s64))) svint64_t svld1_gather_offset(svbool_t, int64_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u64offset_u64))) svuint64_t svld1_gather_offset(svbool_t, uint64_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u64offset_f64))) svfloat64_t svld1_gather_offset(svbool_t, float64_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u64offset_s64))) svint64_t svld1_gather_offset(svbool_t, int64_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u8))) svuint8_t svld1_vnum(svbool_t, uint8_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u32))) svuint32_t svld1_vnum(svbool_t, uint32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u64))) svuint64_t svld1_vnum(svbool_t, uint64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u16))) svuint16_t svld1_vnum(svbool_t, uint16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s8))) svint8_t svld1_vnum(svbool_t, int8_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f64))) svfloat64_t svld1_vnum(svbool_t, float64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f32))) svfloat32_t svld1_vnum(svbool_t, float32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f16))) svfloat16_t svld1_vnum(svbool_t, float16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s32))) svint32_t svld1_vnum(svbool_t, int32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s64))) svint64_t svld1_vnum(svbool_t, int64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s16))) svint16_t svld1_vnum(svbool_t, int16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1rq_u8))) svuint8_t svld1rq(svbool_t, uint8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1rq_u32))) svuint32_t svld1rq(svbool_t, uint32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1rq_u64))) svuint64_t svld1rq(svbool_t, uint64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1rq_u16))) svuint16_t svld1rq(svbool_t, uint16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1rq_s8))) svint8_t svld1rq(svbool_t, int8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1rq_f64))) svfloat64_t svld1rq(svbool_t, float64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1rq_f32))) svfloat32_t svld1rq(svbool_t, float32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1rq_f16))) svfloat16_t svld1rq(svbool_t, float16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1rq_s32))) svint32_t svld1rq(svbool_t, int32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1rq_s64))) svint64_t svld1rq(svbool_t, int64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1rq_s16))) svint16_t svld1rq(svbool_t, int16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_gather_u32base_offset_u32))) svuint32_t svld1sb_gather_offset_u32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_gather_u64base_offset_u64))) svuint64_t svld1sb_gather_offset_u64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_gather_u32base_offset_s32))) svint32_t svld1sb_gather_offset_s32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_gather_u64base_offset_s64))) svint64_t svld1sb_gather_offset_s64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_gather_u32base_u32))) svuint32_t svld1sb_gather_u32(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_gather_u64base_u64))) svuint64_t svld1sb_gather_u64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_gather_u32base_s32))) svint32_t svld1sb_gather_s32(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_gather_u64base_s64))) svint64_t svld1sb_gather_s64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_gather_s32offset_u32))) svuint32_t svld1sb_gather_offset_u32(svbool_t, int8_t const *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_gather_s32offset_s32))) svint32_t svld1sb_gather_offset_s32(svbool_t, int8_t const *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_gather_u32offset_u32))) svuint32_t svld1sb_gather_offset_u32(svbool_t, int8_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_gather_u32offset_s32))) svint32_t svld1sb_gather_offset_s32(svbool_t, int8_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_gather_s64offset_u64))) svuint64_t svld1sb_gather_offset_u64(svbool_t, int8_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_gather_s64offset_s64))) svint64_t svld1sb_gather_offset_s64(svbool_t, int8_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_gather_u64offset_u64))) svuint64_t svld1sb_gather_offset_u64(svbool_t, int8_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_gather_u64offset_s64))) svint64_t svld1sb_gather_offset_s64(svbool_t, int8_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u32base_index_u32))) svuint32_t svld1sh_gather_index_u32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u64base_index_u64))) svuint64_t svld1sh_gather_index_u64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u32base_index_s32))) svint32_t svld1sh_gather_index_s32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u64base_index_s64))) svint64_t svld1sh_gather_index_s64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u32base_offset_u32))) svuint32_t svld1sh_gather_offset_u32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u64base_offset_u64))) svuint64_t svld1sh_gather_offset_u64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u32base_offset_s32))) svint32_t svld1sh_gather_offset_s32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u64base_offset_s64))) svint64_t svld1sh_gather_offset_s64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u32base_u32))) svuint32_t svld1sh_gather_u32(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u64base_u64))) svuint64_t svld1sh_gather_u64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u32base_s32))) svint32_t svld1sh_gather_s32(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u64base_s64))) svint64_t svld1sh_gather_s64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_s32index_u32))) svuint32_t svld1sh_gather_index_u32(svbool_t, int16_t const *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_s32index_s32))) svint32_t svld1sh_gather_index_s32(svbool_t, int16_t const *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u32index_u32))) svuint32_t svld1sh_gather_index_u32(svbool_t, int16_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u32index_s32))) svint32_t svld1sh_gather_index_s32(svbool_t, int16_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_s64index_u64))) svuint64_t svld1sh_gather_index_u64(svbool_t, int16_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_s64index_s64))) svint64_t svld1sh_gather_index_s64(svbool_t, int16_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u64index_u64))) svuint64_t svld1sh_gather_index_u64(svbool_t, int16_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u64index_s64))) svint64_t svld1sh_gather_index_s64(svbool_t, int16_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_s32offset_u32))) svuint32_t svld1sh_gather_offset_u32(svbool_t, int16_t const *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_s32offset_s32))) svint32_t svld1sh_gather_offset_s32(svbool_t, int16_t const *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u32offset_u32))) svuint32_t svld1sh_gather_offset_u32(svbool_t, int16_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u32offset_s32))) svint32_t svld1sh_gather_offset_s32(svbool_t, int16_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_s64offset_u64))) svuint64_t svld1sh_gather_offset_u64(svbool_t, int16_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_s64offset_s64))) svint64_t svld1sh_gather_offset_s64(svbool_t, int16_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u64offset_u64))) svuint64_t svld1sh_gather_offset_u64(svbool_t, int16_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u64offset_s64))) svint64_t svld1sh_gather_offset_s64(svbool_t, int16_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sw_gather_u64base_index_u64))) svuint64_t svld1sw_gather_index_u64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sw_gather_u64base_index_s64))) svint64_t svld1sw_gather_index_s64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sw_gather_u64base_offset_u64))) svuint64_t svld1sw_gather_offset_u64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sw_gather_u64base_offset_s64))) svint64_t svld1sw_gather_offset_s64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sw_gather_u64base_u64))) svuint64_t svld1sw_gather_u64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sw_gather_u64base_s64))) svint64_t svld1sw_gather_s64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sw_gather_s64index_u64))) svuint64_t svld1sw_gather_index_u64(svbool_t, int32_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sw_gather_s64index_s64))) svint64_t svld1sw_gather_index_s64(svbool_t, int32_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sw_gather_u64index_u64))) svuint64_t svld1sw_gather_index_u64(svbool_t, int32_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sw_gather_u64index_s64))) svint64_t svld1sw_gather_index_s64(svbool_t, int32_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sw_gather_s64offset_u64))) svuint64_t svld1sw_gather_offset_u64(svbool_t, int32_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sw_gather_s64offset_s64))) svint64_t svld1sw_gather_offset_s64(svbool_t, int32_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sw_gather_u64offset_u64))) svuint64_t svld1sw_gather_offset_u64(svbool_t, int32_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sw_gather_u64offset_s64))) svint64_t svld1sw_gather_offset_s64(svbool_t, int32_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_gather_u32base_offset_u32))) svuint32_t svld1ub_gather_offset_u32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_gather_u64base_offset_u64))) svuint64_t svld1ub_gather_offset_u64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_gather_u32base_offset_s32))) svint32_t svld1ub_gather_offset_s32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_gather_u64base_offset_s64))) svint64_t svld1ub_gather_offset_s64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_gather_u32base_u32))) svuint32_t svld1ub_gather_u32(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_gather_u64base_u64))) svuint64_t svld1ub_gather_u64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_gather_u32base_s32))) svint32_t svld1ub_gather_s32(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_gather_u64base_s64))) svint64_t svld1ub_gather_s64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_gather_s32offset_u32))) svuint32_t svld1ub_gather_offset_u32(svbool_t, uint8_t const *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_gather_s32offset_s32))) svint32_t svld1ub_gather_offset_s32(svbool_t, uint8_t const *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_gather_u32offset_u32))) svuint32_t svld1ub_gather_offset_u32(svbool_t, uint8_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_gather_u32offset_s32))) svint32_t svld1ub_gather_offset_s32(svbool_t, uint8_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_gather_s64offset_u64))) svuint64_t svld1ub_gather_offset_u64(svbool_t, uint8_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_gather_s64offset_s64))) svint64_t svld1ub_gather_offset_s64(svbool_t, uint8_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_gather_u64offset_u64))) svuint64_t svld1ub_gather_offset_u64(svbool_t, uint8_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_gather_u64offset_s64))) svint64_t svld1ub_gather_offset_s64(svbool_t, uint8_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u32base_index_u32))) svuint32_t svld1uh_gather_index_u32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u64base_index_u64))) svuint64_t svld1uh_gather_index_u64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u32base_index_s32))) svint32_t svld1uh_gather_index_s32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u64base_index_s64))) svint64_t svld1uh_gather_index_s64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u32base_offset_u32))) svuint32_t svld1uh_gather_offset_u32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u64base_offset_u64))) svuint64_t svld1uh_gather_offset_u64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u32base_offset_s32))) svint32_t svld1uh_gather_offset_s32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u64base_offset_s64))) svint64_t svld1uh_gather_offset_s64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u32base_u32))) svuint32_t svld1uh_gather_u32(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u64base_u64))) svuint64_t svld1uh_gather_u64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u32base_s32))) svint32_t svld1uh_gather_s32(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u64base_s64))) svint64_t svld1uh_gather_s64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_s32index_u32))) svuint32_t svld1uh_gather_index_u32(svbool_t, uint16_t const *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_s32index_s32))) svint32_t svld1uh_gather_index_s32(svbool_t, uint16_t const *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u32index_u32))) svuint32_t svld1uh_gather_index_u32(svbool_t, uint16_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u32index_s32))) svint32_t svld1uh_gather_index_s32(svbool_t, uint16_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_s64index_u64))) svuint64_t svld1uh_gather_index_u64(svbool_t, uint16_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_s64index_s64))) svint64_t svld1uh_gather_index_s64(svbool_t, uint16_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u64index_u64))) svuint64_t svld1uh_gather_index_u64(svbool_t, uint16_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u64index_s64))) svint64_t svld1uh_gather_index_s64(svbool_t, uint16_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_s32offset_u32))) svuint32_t svld1uh_gather_offset_u32(svbool_t, uint16_t const *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_s32offset_s32))) svint32_t svld1uh_gather_offset_s32(svbool_t, uint16_t const *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u32offset_u32))) svuint32_t svld1uh_gather_offset_u32(svbool_t, uint16_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u32offset_s32))) svint32_t svld1uh_gather_offset_s32(svbool_t, uint16_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_s64offset_u64))) svuint64_t svld1uh_gather_offset_u64(svbool_t, uint16_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_s64offset_s64))) svint64_t svld1uh_gather_offset_s64(svbool_t, uint16_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u64offset_u64))) svuint64_t svld1uh_gather_offset_u64(svbool_t, uint16_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u64offset_s64))) svint64_t svld1uh_gather_offset_s64(svbool_t, uint16_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uw_gather_u64base_index_u64))) svuint64_t svld1uw_gather_index_u64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uw_gather_u64base_index_s64))) svint64_t svld1uw_gather_index_s64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uw_gather_u64base_offset_u64))) svuint64_t svld1uw_gather_offset_u64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uw_gather_u64base_offset_s64))) svint64_t svld1uw_gather_offset_s64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uw_gather_u64base_u64))) svuint64_t svld1uw_gather_u64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uw_gather_u64base_s64))) svint64_t svld1uw_gather_s64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uw_gather_s64index_u64))) svuint64_t svld1uw_gather_index_u64(svbool_t, uint32_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uw_gather_s64index_s64))) svint64_t svld1uw_gather_index_s64(svbool_t, uint32_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uw_gather_u64index_u64))) svuint64_t svld1uw_gather_index_u64(svbool_t, uint32_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uw_gather_u64index_s64))) svint64_t svld1uw_gather_index_s64(svbool_t, uint32_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uw_gather_s64offset_u64))) svuint64_t svld1uw_gather_offset_u64(svbool_t, uint32_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uw_gather_s64offset_s64))) svint64_t svld1uw_gather_offset_s64(svbool_t, uint32_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uw_gather_u64offset_u64))) svuint64_t svld1uw_gather_offset_u64(svbool_t, uint32_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uw_gather_u64offset_s64))) svint64_t svld1uw_gather_offset_s64(svbool_t, uint32_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_u8))) svuint8x2_t svld2(svbool_t, uint8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_u32))) svuint32x2_t svld2(svbool_t, uint32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_u64))) svuint64x2_t svld2(svbool_t, uint64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_u16))) svuint16x2_t svld2(svbool_t, uint16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_s8))) svint8x2_t svld2(svbool_t, int8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_f64))) svfloat64x2_t svld2(svbool_t, float64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_f32))) svfloat32x2_t svld2(svbool_t, float32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_f16))) svfloat16x2_t svld2(svbool_t, float16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_s32))) svint32x2_t svld2(svbool_t, int32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_s64))) svint64x2_t svld2(svbool_t, int64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_s16))) svint16x2_t svld2(svbool_t, int16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_vnum_u8))) svuint8x2_t svld2_vnum(svbool_t, uint8_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_vnum_u32))) svuint32x2_t svld2_vnum(svbool_t, uint32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_vnum_u64))) svuint64x2_t svld2_vnum(svbool_t, uint64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_vnum_u16))) svuint16x2_t svld2_vnum(svbool_t, uint16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_vnum_s8))) svint8x2_t svld2_vnum(svbool_t, int8_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_vnum_f64))) svfloat64x2_t svld2_vnum(svbool_t, float64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_vnum_f32))) svfloat32x2_t svld2_vnum(svbool_t, float32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_vnum_f16))) svfloat16x2_t svld2_vnum(svbool_t, float16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_vnum_s32))) svint32x2_t svld2_vnum(svbool_t, int32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_vnum_s64))) svint64x2_t svld2_vnum(svbool_t, int64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_vnum_s16))) svint16x2_t svld2_vnum(svbool_t, int16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_u8))) svuint8x3_t svld3(svbool_t, uint8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_u32))) svuint32x3_t svld3(svbool_t, uint32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_u64))) svuint64x3_t svld3(svbool_t, uint64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_u16))) svuint16x3_t svld3(svbool_t, uint16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_s8))) svint8x3_t svld3(svbool_t, int8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_f64))) svfloat64x3_t svld3(svbool_t, float64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_f32))) svfloat32x3_t svld3(svbool_t, float32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_f16))) svfloat16x3_t svld3(svbool_t, float16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_s32))) svint32x3_t svld3(svbool_t, int32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_s64))) svint64x3_t svld3(svbool_t, int64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_s16))) svint16x3_t svld3(svbool_t, int16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_vnum_u8))) svuint8x3_t svld3_vnum(svbool_t, uint8_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_vnum_u32))) svuint32x3_t svld3_vnum(svbool_t, uint32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_vnum_u64))) svuint64x3_t svld3_vnum(svbool_t, uint64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_vnum_u16))) svuint16x3_t svld3_vnum(svbool_t, uint16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_vnum_s8))) svint8x3_t svld3_vnum(svbool_t, int8_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_vnum_f64))) svfloat64x3_t svld3_vnum(svbool_t, float64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_vnum_f32))) svfloat32x3_t svld3_vnum(svbool_t, float32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_vnum_f16))) svfloat16x3_t svld3_vnum(svbool_t, float16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_vnum_s32))) svint32x3_t svld3_vnum(svbool_t, int32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_vnum_s64))) svint64x3_t svld3_vnum(svbool_t, int64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_vnum_s16))) svint16x3_t svld3_vnum(svbool_t, int16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_u8))) svuint8x4_t svld4(svbool_t, uint8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_u32))) svuint32x4_t svld4(svbool_t, uint32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_u64))) svuint64x4_t svld4(svbool_t, uint64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_u16))) svuint16x4_t svld4(svbool_t, uint16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_s8))) svint8x4_t svld4(svbool_t, int8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_f64))) svfloat64x4_t svld4(svbool_t, float64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_f32))) svfloat32x4_t svld4(svbool_t, float32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_f16))) svfloat16x4_t svld4(svbool_t, float16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_s32))) svint32x4_t svld4(svbool_t, int32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_s64))) svint64x4_t svld4(svbool_t, int64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_s16))) svint16x4_t svld4(svbool_t, int16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_vnum_u8))) svuint8x4_t svld4_vnum(svbool_t, uint8_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_vnum_u32))) svuint32x4_t svld4_vnum(svbool_t, uint32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_vnum_u64))) svuint64x4_t svld4_vnum(svbool_t, uint64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_vnum_u16))) svuint16x4_t svld4_vnum(svbool_t, uint16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_vnum_s8))) svint8x4_t svld4_vnum(svbool_t, int8_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_vnum_f64))) svfloat64x4_t svld4_vnum(svbool_t, float64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_vnum_f32))) svfloat32x4_t svld4_vnum(svbool_t, float32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_vnum_f16))) svfloat16x4_t svld4_vnum(svbool_t, float16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_vnum_s32))) svint32x4_t svld4_vnum(svbool_t, int32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_vnum_s64))) svint64x4_t svld4_vnum(svbool_t, int64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_vnum_s16))) svint16x4_t svld4_vnum(svbool_t, int16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_u8))) svuint8_t svldff1(svbool_t, uint8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_u32))) svuint32_t svldff1(svbool_t, uint32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_u64))) svuint64_t svldff1(svbool_t, uint64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_u16))) svuint16_t svldff1(svbool_t, uint16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_s8))) svint8_t svldff1(svbool_t, int8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_f64))) svfloat64_t svldff1(svbool_t, float64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_f32))) svfloat32_t svldff1(svbool_t, float32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_f16))) svfloat16_t svldff1(svbool_t, float16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_s32))) svint32_t svldff1(svbool_t, int32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_s64))) svint64_t svldff1(svbool_t, int64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_s16))) svint16_t svldff1(svbool_t, int16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u32base_index_u32))) svuint32_t svldff1_gather_index_u32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u64base_index_u64))) svuint64_t svldff1_gather_index_u64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u64base_index_f64))) svfloat64_t svldff1_gather_index_f64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u32base_index_f32))) svfloat32_t svldff1_gather_index_f32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u32base_index_s32))) svint32_t svldff1_gather_index_s32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u64base_index_s64))) svint64_t svldff1_gather_index_s64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u32base_offset_u32))) svuint32_t svldff1_gather_offset_u32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u64base_offset_u64))) svuint64_t svldff1_gather_offset_u64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u64base_offset_f64))) svfloat64_t svldff1_gather_offset_f64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u32base_offset_f32))) svfloat32_t svldff1_gather_offset_f32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u32base_offset_s32))) svint32_t svldff1_gather_offset_s32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u64base_offset_s64))) svint64_t svldff1_gather_offset_s64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u32base_u32))) svuint32_t svldff1_gather_u32(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u64base_u64))) svuint64_t svldff1_gather_u64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u64base_f64))) svfloat64_t svldff1_gather_f64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u32base_f32))) svfloat32_t svldff1_gather_f32(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u32base_s32))) svint32_t svldff1_gather_s32(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u64base_s64))) svint64_t svldff1_gather_s64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_s32index_u32))) svuint32_t svldff1_gather_index(svbool_t, uint32_t const *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_s32index_f32))) svfloat32_t svldff1_gather_index(svbool_t, float32_t const *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_s32index_s32))) svint32_t svldff1_gather_index(svbool_t, int32_t const *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u32index_u32))) svuint32_t svldff1_gather_index(svbool_t, uint32_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u32index_f32))) svfloat32_t svldff1_gather_index(svbool_t, float32_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u32index_s32))) svint32_t svldff1_gather_index(svbool_t, int32_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_s64index_u64))) svuint64_t svldff1_gather_index(svbool_t, uint64_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_s64index_f64))) svfloat64_t svldff1_gather_index(svbool_t, float64_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_s64index_s64))) svint64_t svldff1_gather_index(svbool_t, int64_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u64index_u64))) svuint64_t svldff1_gather_index(svbool_t, uint64_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u64index_f64))) svfloat64_t svldff1_gather_index(svbool_t, float64_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u64index_s64))) svint64_t svldff1_gather_index(svbool_t, int64_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_s32offset_u32))) svuint32_t svldff1_gather_offset(svbool_t, uint32_t const *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_s32offset_f32))) svfloat32_t svldff1_gather_offset(svbool_t, float32_t const *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_s32offset_s32))) svint32_t svldff1_gather_offset(svbool_t, int32_t const *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u32offset_u32))) svuint32_t svldff1_gather_offset(svbool_t, uint32_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u32offset_f32))) svfloat32_t svldff1_gather_offset(svbool_t, float32_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u32offset_s32))) svint32_t svldff1_gather_offset(svbool_t, int32_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_s64offset_u64))) svuint64_t svldff1_gather_offset(svbool_t, uint64_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_s64offset_f64))) svfloat64_t svldff1_gather_offset(svbool_t, float64_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_s64offset_s64))) svint64_t svldff1_gather_offset(svbool_t, int64_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u64offset_u64))) svuint64_t svldff1_gather_offset(svbool_t, uint64_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u64offset_f64))) svfloat64_t svldff1_gather_offset(svbool_t, float64_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u64offset_s64))) svint64_t svldff1_gather_offset(svbool_t, int64_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_vnum_u8))) svuint8_t svldff1_vnum(svbool_t, uint8_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_vnum_u32))) svuint32_t svldff1_vnum(svbool_t, uint32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_vnum_u64))) svuint64_t svldff1_vnum(svbool_t, uint64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_vnum_u16))) svuint16_t svldff1_vnum(svbool_t, uint16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_vnum_s8))) svint8_t svldff1_vnum(svbool_t, int8_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_vnum_f64))) svfloat64_t svldff1_vnum(svbool_t, float64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_vnum_f32))) svfloat32_t svldff1_vnum(svbool_t, float32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_vnum_f16))) svfloat16_t svldff1_vnum(svbool_t, float16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_vnum_s32))) svint32_t svldff1_vnum(svbool_t, int32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_vnum_s64))) svint64_t svldff1_vnum(svbool_t, int64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_vnum_s16))) svint16_t svldff1_vnum(svbool_t, int16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_gather_u32base_offset_u32))) svuint32_t svldff1sb_gather_offset_u32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_gather_u64base_offset_u64))) svuint64_t svldff1sb_gather_offset_u64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_gather_u32base_offset_s32))) svint32_t svldff1sb_gather_offset_s32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_gather_u64base_offset_s64))) svint64_t svldff1sb_gather_offset_s64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_gather_u32base_u32))) svuint32_t svldff1sb_gather_u32(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_gather_u64base_u64))) svuint64_t svldff1sb_gather_u64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_gather_u32base_s32))) svint32_t svldff1sb_gather_s32(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_gather_u64base_s64))) svint64_t svldff1sb_gather_s64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_gather_s32offset_u32))) svuint32_t svldff1sb_gather_offset_u32(svbool_t, int8_t const *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_gather_s32offset_s32))) svint32_t svldff1sb_gather_offset_s32(svbool_t, int8_t const *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_gather_u32offset_u32))) svuint32_t svldff1sb_gather_offset_u32(svbool_t, int8_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_gather_u32offset_s32))) svint32_t svldff1sb_gather_offset_s32(svbool_t, int8_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_gather_s64offset_u64))) svuint64_t svldff1sb_gather_offset_u64(svbool_t, int8_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_gather_s64offset_s64))) svint64_t svldff1sb_gather_offset_s64(svbool_t, int8_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_gather_u64offset_u64))) svuint64_t svldff1sb_gather_offset_u64(svbool_t, int8_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_gather_u64offset_s64))) svint64_t svldff1sb_gather_offset_s64(svbool_t, int8_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u32base_index_u32))) svuint32_t svldff1sh_gather_index_u32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u64base_index_u64))) svuint64_t svldff1sh_gather_index_u64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u32base_index_s32))) svint32_t svldff1sh_gather_index_s32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u64base_index_s64))) svint64_t svldff1sh_gather_index_s64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u32base_offset_u32))) svuint32_t svldff1sh_gather_offset_u32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u64base_offset_u64))) svuint64_t svldff1sh_gather_offset_u64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u32base_offset_s32))) svint32_t svldff1sh_gather_offset_s32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u64base_offset_s64))) svint64_t svldff1sh_gather_offset_s64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u32base_u32))) svuint32_t svldff1sh_gather_u32(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u64base_u64))) svuint64_t svldff1sh_gather_u64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u32base_s32))) svint32_t svldff1sh_gather_s32(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u64base_s64))) svint64_t svldff1sh_gather_s64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_s32index_u32))) svuint32_t svldff1sh_gather_index_u32(svbool_t, int16_t const *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_s32index_s32))) svint32_t svldff1sh_gather_index_s32(svbool_t, int16_t const *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u32index_u32))) svuint32_t svldff1sh_gather_index_u32(svbool_t, int16_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u32index_s32))) svint32_t svldff1sh_gather_index_s32(svbool_t, int16_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_s64index_u64))) svuint64_t svldff1sh_gather_index_u64(svbool_t, int16_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_s64index_s64))) svint64_t svldff1sh_gather_index_s64(svbool_t, int16_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u64index_u64))) svuint64_t svldff1sh_gather_index_u64(svbool_t, int16_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u64index_s64))) svint64_t svldff1sh_gather_index_s64(svbool_t, int16_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_s32offset_u32))) svuint32_t svldff1sh_gather_offset_u32(svbool_t, int16_t const *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_s32offset_s32))) svint32_t svldff1sh_gather_offset_s32(svbool_t, int16_t const *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u32offset_u32))) svuint32_t svldff1sh_gather_offset_u32(svbool_t, int16_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u32offset_s32))) svint32_t svldff1sh_gather_offset_s32(svbool_t, int16_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_s64offset_u64))) svuint64_t svldff1sh_gather_offset_u64(svbool_t, int16_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_s64offset_s64))) svint64_t svldff1sh_gather_offset_s64(svbool_t, int16_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u64offset_u64))) svuint64_t svldff1sh_gather_offset_u64(svbool_t, int16_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u64offset_s64))) svint64_t svldff1sh_gather_offset_s64(svbool_t, int16_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sw_gather_u64base_index_u64))) svuint64_t svldff1sw_gather_index_u64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sw_gather_u64base_index_s64))) svint64_t svldff1sw_gather_index_s64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sw_gather_u64base_offset_u64))) svuint64_t svldff1sw_gather_offset_u64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sw_gather_u64base_offset_s64))) svint64_t svldff1sw_gather_offset_s64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sw_gather_u64base_u64))) svuint64_t svldff1sw_gather_u64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sw_gather_u64base_s64))) svint64_t svldff1sw_gather_s64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sw_gather_s64index_u64))) svuint64_t svldff1sw_gather_index_u64(svbool_t, int32_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sw_gather_s64index_s64))) svint64_t svldff1sw_gather_index_s64(svbool_t, int32_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sw_gather_u64index_u64))) svuint64_t svldff1sw_gather_index_u64(svbool_t, int32_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sw_gather_u64index_s64))) svint64_t svldff1sw_gather_index_s64(svbool_t, int32_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sw_gather_s64offset_u64))) svuint64_t svldff1sw_gather_offset_u64(svbool_t, int32_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sw_gather_s64offset_s64))) svint64_t svldff1sw_gather_offset_s64(svbool_t, int32_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sw_gather_u64offset_u64))) svuint64_t svldff1sw_gather_offset_u64(svbool_t, int32_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sw_gather_u64offset_s64))) svint64_t svldff1sw_gather_offset_s64(svbool_t, int32_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_gather_u32base_offset_u32))) svuint32_t svldff1ub_gather_offset_u32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_gather_u64base_offset_u64))) svuint64_t svldff1ub_gather_offset_u64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_gather_u32base_offset_s32))) svint32_t svldff1ub_gather_offset_s32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_gather_u64base_offset_s64))) svint64_t svldff1ub_gather_offset_s64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_gather_u32base_u32))) svuint32_t svldff1ub_gather_u32(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_gather_u64base_u64))) svuint64_t svldff1ub_gather_u64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_gather_u32base_s32))) svint32_t svldff1ub_gather_s32(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_gather_u64base_s64))) svint64_t svldff1ub_gather_s64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_gather_s32offset_u32))) svuint32_t svldff1ub_gather_offset_u32(svbool_t, uint8_t const *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_gather_s32offset_s32))) svint32_t svldff1ub_gather_offset_s32(svbool_t, uint8_t const *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_gather_u32offset_u32))) svuint32_t svldff1ub_gather_offset_u32(svbool_t, uint8_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_gather_u32offset_s32))) svint32_t svldff1ub_gather_offset_s32(svbool_t, uint8_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_gather_s64offset_u64))) svuint64_t svldff1ub_gather_offset_u64(svbool_t, uint8_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_gather_s64offset_s64))) svint64_t svldff1ub_gather_offset_s64(svbool_t, uint8_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_gather_u64offset_u64))) svuint64_t svldff1ub_gather_offset_u64(svbool_t, uint8_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_gather_u64offset_s64))) svint64_t svldff1ub_gather_offset_s64(svbool_t, uint8_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u32base_index_u32))) svuint32_t svldff1uh_gather_index_u32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u64base_index_u64))) svuint64_t svldff1uh_gather_index_u64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u32base_index_s32))) svint32_t svldff1uh_gather_index_s32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u64base_index_s64))) svint64_t svldff1uh_gather_index_s64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u32base_offset_u32))) svuint32_t svldff1uh_gather_offset_u32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u64base_offset_u64))) svuint64_t svldff1uh_gather_offset_u64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u32base_offset_s32))) svint32_t svldff1uh_gather_offset_s32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u64base_offset_s64))) svint64_t svldff1uh_gather_offset_s64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u32base_u32))) svuint32_t svldff1uh_gather_u32(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u64base_u64))) svuint64_t svldff1uh_gather_u64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u32base_s32))) svint32_t svldff1uh_gather_s32(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u64base_s64))) svint64_t svldff1uh_gather_s64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_s32index_u32))) svuint32_t svldff1uh_gather_index_u32(svbool_t, uint16_t const *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_s32index_s32))) svint32_t svldff1uh_gather_index_s32(svbool_t, uint16_t const *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u32index_u32))) svuint32_t svldff1uh_gather_index_u32(svbool_t, uint16_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u32index_s32))) svint32_t svldff1uh_gather_index_s32(svbool_t, uint16_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_s64index_u64))) svuint64_t svldff1uh_gather_index_u64(svbool_t, uint16_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_s64index_s64))) svint64_t svldff1uh_gather_index_s64(svbool_t, uint16_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u64index_u64))) svuint64_t svldff1uh_gather_index_u64(svbool_t, uint16_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u64index_s64))) svint64_t svldff1uh_gather_index_s64(svbool_t, uint16_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_s32offset_u32))) svuint32_t svldff1uh_gather_offset_u32(svbool_t, uint16_t const *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_s32offset_s32))) svint32_t svldff1uh_gather_offset_s32(svbool_t, uint16_t const *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u32offset_u32))) svuint32_t svldff1uh_gather_offset_u32(svbool_t, uint16_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u32offset_s32))) svint32_t svldff1uh_gather_offset_s32(svbool_t, uint16_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_s64offset_u64))) svuint64_t svldff1uh_gather_offset_u64(svbool_t, uint16_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_s64offset_s64))) svint64_t svldff1uh_gather_offset_s64(svbool_t, uint16_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u64offset_u64))) svuint64_t svldff1uh_gather_offset_u64(svbool_t, uint16_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u64offset_s64))) svint64_t svldff1uh_gather_offset_s64(svbool_t, uint16_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uw_gather_u64base_index_u64))) svuint64_t svldff1uw_gather_index_u64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uw_gather_u64base_index_s64))) svint64_t svldff1uw_gather_index_s64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uw_gather_u64base_offset_u64))) svuint64_t svldff1uw_gather_offset_u64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uw_gather_u64base_offset_s64))) svint64_t svldff1uw_gather_offset_s64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uw_gather_u64base_u64))) svuint64_t svldff1uw_gather_u64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uw_gather_u64base_s64))) svint64_t svldff1uw_gather_s64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uw_gather_s64index_u64))) svuint64_t svldff1uw_gather_index_u64(svbool_t, uint32_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uw_gather_s64index_s64))) svint64_t svldff1uw_gather_index_s64(svbool_t, uint32_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uw_gather_u64index_u64))) svuint64_t svldff1uw_gather_index_u64(svbool_t, uint32_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uw_gather_u64index_s64))) svint64_t svldff1uw_gather_index_s64(svbool_t, uint32_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uw_gather_s64offset_u64))) svuint64_t svldff1uw_gather_offset_u64(svbool_t, uint32_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uw_gather_s64offset_s64))) svint64_t svldff1uw_gather_offset_s64(svbool_t, uint32_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uw_gather_u64offset_u64))) svuint64_t svldff1uw_gather_offset_u64(svbool_t, uint32_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uw_gather_u64offset_s64))) svint64_t svldff1uw_gather_offset_s64(svbool_t, uint32_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_u8))) svuint8_t svldnf1(svbool_t, uint8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_u32))) svuint32_t svldnf1(svbool_t, uint32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_u64))) svuint64_t svldnf1(svbool_t, uint64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_u16))) svuint16_t svldnf1(svbool_t, uint16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_s8))) svint8_t svldnf1(svbool_t, int8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_f64))) svfloat64_t svldnf1(svbool_t, float64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_f32))) svfloat32_t svldnf1(svbool_t, float32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_f16))) svfloat16_t svldnf1(svbool_t, float16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_s32))) svint32_t svldnf1(svbool_t, int32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_s64))) svint64_t svldnf1(svbool_t, int64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_s16))) svint16_t svldnf1(svbool_t, int16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_vnum_u8))) svuint8_t svldnf1_vnum(svbool_t, uint8_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_vnum_u32))) svuint32_t svldnf1_vnum(svbool_t, uint32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_vnum_u64))) svuint64_t svldnf1_vnum(svbool_t, uint64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_vnum_u16))) svuint16_t svldnf1_vnum(svbool_t, uint16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_vnum_s8))) svint8_t svldnf1_vnum(svbool_t, int8_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_vnum_f64))) svfloat64_t svldnf1_vnum(svbool_t, float64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_vnum_f32))) svfloat32_t svldnf1_vnum(svbool_t, float32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_vnum_f16))) svfloat16_t svldnf1_vnum(svbool_t, float16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_vnum_s32))) svint32_t svldnf1_vnum(svbool_t, int32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_vnum_s64))) svint64_t svldnf1_vnum(svbool_t, int64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_vnum_s16))) svint16_t svldnf1_vnum(svbool_t, int16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u8))) svuint8_t svldnt1(svbool_t, uint8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u32))) svuint32_t svldnt1(svbool_t, uint32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u64))) svuint64_t svldnt1(svbool_t, uint64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u16))) svuint16_t svldnt1(svbool_t, uint16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s8))) svint8_t svldnt1(svbool_t, int8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f64))) svfloat64_t svldnt1(svbool_t, float64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f32))) svfloat32_t svldnt1(svbool_t, float32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f16))) svfloat16_t svldnt1(svbool_t, float16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s32))) svint32_t svldnt1(svbool_t, int32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s64))) svint64_t svldnt1(svbool_t, int64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s16))) svint16_t svldnt1(svbool_t, int16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u8))) svuint8_t svldnt1_vnum(svbool_t, uint8_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u32))) svuint32_t svldnt1_vnum(svbool_t, uint32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u64))) svuint64_t svldnt1_vnum(svbool_t, uint64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u16))) svuint16_t svldnt1_vnum(svbool_t, uint16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s8))) svint8_t svldnt1_vnum(svbool_t, int8_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f64))) svfloat64_t svldnt1_vnum(svbool_t, float64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f32))) svfloat32_t svldnt1_vnum(svbool_t, float32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f16))) svfloat16_t svldnt1_vnum(svbool_t, float16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s32))) svint32_t svldnt1_vnum(svbool_t, int32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s64))) svint64_t svldnt1_vnum(svbool_t, int64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s16))) svint16_t svldnt1_vnum(svbool_t, int16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlen_u8))) uint64_t svlen(svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlen_u32))) uint64_t svlen(svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlen_u64))) uint64_t svlen(svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlen_u16))) uint64_t svlen(svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlen_s8))) uint64_t svlen(svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlen_f64))) uint64_t svlen(svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlen_f32))) uint64_t svlen(svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlen_f16))) uint64_t svlen(svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlen_s32))) uint64_t svlen(svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlen_s64))) uint64_t svlen(svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlen_s16))) uint64_t svlen(svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_u8_m))) svuint8_t svlsl_m(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_u32_m))) svuint32_t svlsl_m(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_u64_m))) svuint64_t svlsl_m(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_u16_m))) svuint16_t svlsl_m(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_s8_m))) svint8_t svlsl_m(svbool_t, svint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_s32_m))) svint32_t svlsl_m(svbool_t, svint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_s64_m))) svint64_t svlsl_m(svbool_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_s16_m))) svint16_t svlsl_m(svbool_t, svint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_u8_x))) svuint8_t svlsl_x(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_u32_x))) svuint32_t svlsl_x(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_u64_x))) svuint64_t svlsl_x(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_u16_x))) svuint16_t svlsl_x(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_s8_x))) svint8_t svlsl_x(svbool_t, svint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_s32_x))) svint32_t svlsl_x(svbool_t, svint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_s64_x))) svint64_t svlsl_x(svbool_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_s16_x))) svint16_t svlsl_x(svbool_t, svint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_u8_z))) svuint8_t svlsl_z(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_u32_z))) svuint32_t svlsl_z(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_u64_z))) svuint64_t svlsl_z(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_u16_z))) svuint16_t svlsl_z(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_s8_z))) svint8_t svlsl_z(svbool_t, svint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_s32_z))) svint32_t svlsl_z(svbool_t, svint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_s64_z))) svint64_t svlsl_z(svbool_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_s16_z))) svint16_t svlsl_z(svbool_t, svint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_u8_m))) svuint8_t svlsl_m(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_u32_m))) svuint32_t svlsl_m(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_u64_m))) svuint64_t svlsl_m(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_u16_m))) svuint16_t svlsl_m(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_s8_m))) svint8_t svlsl_m(svbool_t, svint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_s32_m))) svint32_t svlsl_m(svbool_t, svint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_s64_m))) svint64_t svlsl_m(svbool_t, svint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_s16_m))) svint16_t svlsl_m(svbool_t, svint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_u8_x))) svuint8_t svlsl_x(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_u32_x))) svuint32_t svlsl_x(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_u64_x))) svuint64_t svlsl_x(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_u16_x))) svuint16_t svlsl_x(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_s8_x))) svint8_t svlsl_x(svbool_t, svint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_s32_x))) svint32_t svlsl_x(svbool_t, svint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_s64_x))) svint64_t svlsl_x(svbool_t, svint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_s16_x))) svint16_t svlsl_x(svbool_t, svint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_u8_z))) svuint8_t svlsl_z(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_u32_z))) svuint32_t svlsl_z(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_u64_z))) svuint64_t svlsl_z(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_u16_z))) svuint16_t svlsl_z(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_s8_z))) svint8_t svlsl_z(svbool_t, svint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_s32_z))) svint32_t svlsl_z(svbool_t, svint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_s64_z))) svint64_t svlsl_z(svbool_t, svint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_s16_z))) svint16_t svlsl_z(svbool_t, svint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_n_u8_m))) svuint8_t svlsl_wide_m(svbool_t, svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_n_u32_m))) svuint32_t svlsl_wide_m(svbool_t, svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_n_u16_m))) svuint16_t svlsl_wide_m(svbool_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_n_s8_m))) svint8_t svlsl_wide_m(svbool_t, svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_n_s32_m))) svint32_t svlsl_wide_m(svbool_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_n_s16_m))) svint16_t svlsl_wide_m(svbool_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_n_u8_x))) svuint8_t svlsl_wide_x(svbool_t, svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_n_u32_x))) svuint32_t svlsl_wide_x(svbool_t, svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_n_u16_x))) svuint16_t svlsl_wide_x(svbool_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_n_s8_x))) svint8_t svlsl_wide_x(svbool_t, svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_n_s32_x))) svint32_t svlsl_wide_x(svbool_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_n_s16_x))) svint16_t svlsl_wide_x(svbool_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_n_u8_z))) svuint8_t svlsl_wide_z(svbool_t, svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_n_u32_z))) svuint32_t svlsl_wide_z(svbool_t, svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_n_u16_z))) svuint16_t svlsl_wide_z(svbool_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_n_s8_z))) svint8_t svlsl_wide_z(svbool_t, svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_n_s32_z))) svint32_t svlsl_wide_z(svbool_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_n_s16_z))) svint16_t svlsl_wide_z(svbool_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_u8_m))) svuint8_t svlsl_wide_m(svbool_t, svuint8_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_u32_m))) svuint32_t svlsl_wide_m(svbool_t, svuint32_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_u16_m))) svuint16_t svlsl_wide_m(svbool_t, svuint16_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_s8_m))) svint8_t svlsl_wide_m(svbool_t, svint8_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_s32_m))) svint32_t svlsl_wide_m(svbool_t, svint32_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_s16_m))) svint16_t svlsl_wide_m(svbool_t, svint16_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_u8_x))) svuint8_t svlsl_wide_x(svbool_t, svuint8_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_u32_x))) svuint32_t svlsl_wide_x(svbool_t, svuint32_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_u16_x))) svuint16_t svlsl_wide_x(svbool_t, svuint16_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_s8_x))) svint8_t svlsl_wide_x(svbool_t, svint8_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_s32_x))) svint32_t svlsl_wide_x(svbool_t, svint32_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_s16_x))) svint16_t svlsl_wide_x(svbool_t, svint16_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_u8_z))) svuint8_t svlsl_wide_z(svbool_t, svuint8_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_u32_z))) svuint32_t svlsl_wide_z(svbool_t, svuint32_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_u16_z))) svuint16_t svlsl_wide_z(svbool_t, svuint16_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_s8_z))) svint8_t svlsl_wide_z(svbool_t, svint8_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_s32_z))) svint32_t svlsl_wide_z(svbool_t, svint32_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_s16_z))) svint16_t svlsl_wide_z(svbool_t, svint16_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_n_u8_m))) svuint8_t svlsr_m(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_n_u32_m))) svuint32_t svlsr_m(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_n_u64_m))) svuint64_t svlsr_m(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_n_u16_m))) svuint16_t svlsr_m(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_n_u8_x))) svuint8_t svlsr_x(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_n_u32_x))) svuint32_t svlsr_x(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_n_u64_x))) svuint64_t svlsr_x(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_n_u16_x))) svuint16_t svlsr_x(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_n_u8_z))) svuint8_t svlsr_z(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_n_u32_z))) svuint32_t svlsr_z(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_n_u64_z))) svuint64_t svlsr_z(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_n_u16_z))) svuint16_t svlsr_z(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_u8_m))) svuint8_t svlsr_m(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_u32_m))) svuint32_t svlsr_m(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_u64_m))) svuint64_t svlsr_m(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_u16_m))) svuint16_t svlsr_m(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_u8_x))) svuint8_t svlsr_x(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_u32_x))) svuint32_t svlsr_x(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_u64_x))) svuint64_t svlsr_x(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_u16_x))) svuint16_t svlsr_x(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_u8_z))) svuint8_t svlsr_z(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_u32_z))) svuint32_t svlsr_z(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_u64_z))) svuint64_t svlsr_z(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_u16_z))) svuint16_t svlsr_z(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_wide_n_u8_m))) svuint8_t svlsr_wide_m(svbool_t, svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_wide_n_u32_m))) svuint32_t svlsr_wide_m(svbool_t, svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_wide_n_u16_m))) svuint16_t svlsr_wide_m(svbool_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_wide_n_u8_x))) svuint8_t svlsr_wide_x(svbool_t, svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_wide_n_u32_x))) svuint32_t svlsr_wide_x(svbool_t, svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_wide_n_u16_x))) svuint16_t svlsr_wide_x(svbool_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_wide_n_u8_z))) svuint8_t svlsr_wide_z(svbool_t, svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_wide_n_u32_z))) svuint32_t svlsr_wide_z(svbool_t, svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_wide_n_u16_z))) svuint16_t svlsr_wide_z(svbool_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_wide_u8_m))) svuint8_t svlsr_wide_m(svbool_t, svuint8_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_wide_u32_m))) svuint32_t svlsr_wide_m(svbool_t, svuint32_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_wide_u16_m))) svuint16_t svlsr_wide_m(svbool_t, svuint16_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_wide_u8_x))) svuint8_t svlsr_wide_x(svbool_t, svuint8_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_wide_u32_x))) svuint32_t svlsr_wide_x(svbool_t, svuint32_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_wide_u16_x))) svuint16_t svlsr_wide_x(svbool_t, svuint16_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_wide_u8_z))) svuint8_t svlsr_wide_z(svbool_t, svuint8_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_wide_u32_z))) svuint32_t svlsr_wide_z(svbool_t, svuint32_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_wide_u16_z))) svuint16_t svlsr_wide_z(svbool_t, svuint16_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_f64_m))) svfloat64_t svmad_m(svbool_t, svfloat64_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_f32_m))) svfloat32_t svmad_m(svbool_t, svfloat32_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_f16_m))) svfloat16_t svmad_m(svbool_t, svfloat16_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_f64_x))) svfloat64_t svmad_x(svbool_t, svfloat64_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_f32_x))) svfloat32_t svmad_x(svbool_t, svfloat32_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_f16_x))) svfloat16_t svmad_x(svbool_t, svfloat16_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_f64_z))) svfloat64_t svmad_z(svbool_t, svfloat64_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_f32_z))) svfloat32_t svmad_z(svbool_t, svfloat32_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_f16_z))) svfloat16_t svmad_z(svbool_t, svfloat16_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_u8_m))) svuint8_t svmad_m(svbool_t, svuint8_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_u32_m))) svuint32_t svmad_m(svbool_t, svuint32_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_u64_m))) svuint64_t svmad_m(svbool_t, svuint64_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_u16_m))) svuint16_t svmad_m(svbool_t, svuint16_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_s8_m))) svint8_t svmad_m(svbool_t, svint8_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_s32_m))) svint32_t svmad_m(svbool_t, svint32_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_s64_m))) svint64_t svmad_m(svbool_t, svint64_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_s16_m))) svint16_t svmad_m(svbool_t, svint16_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_u8_x))) svuint8_t svmad_x(svbool_t, svuint8_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_u32_x))) svuint32_t svmad_x(svbool_t, svuint32_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_u64_x))) svuint64_t svmad_x(svbool_t, svuint64_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_u16_x))) svuint16_t svmad_x(svbool_t, svuint16_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_s8_x))) svint8_t svmad_x(svbool_t, svint8_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_s32_x))) svint32_t svmad_x(svbool_t, svint32_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_s64_x))) svint64_t svmad_x(svbool_t, svint64_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_s16_x))) svint16_t svmad_x(svbool_t, svint16_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_u8_z))) svuint8_t svmad_z(svbool_t, svuint8_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_u32_z))) svuint32_t svmad_z(svbool_t, svuint32_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_u64_z))) svuint64_t svmad_z(svbool_t, svuint64_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_u16_z))) svuint16_t svmad_z(svbool_t, svuint16_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_s8_z))) svint8_t svmad_z(svbool_t, svint8_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_s32_z))) svint32_t svmad_z(svbool_t, svint32_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_s64_z))) svint64_t svmad_z(svbool_t, svint64_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_s16_z))) svint16_t svmad_z(svbool_t, svint16_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_f64_m))) svfloat64_t svmad_m(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_f32_m))) svfloat32_t svmad_m(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_f16_m))) svfloat16_t svmad_m(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_f64_x))) svfloat64_t svmad_x(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_f32_x))) svfloat32_t svmad_x(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_f16_x))) svfloat16_t svmad_x(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_f64_z))) svfloat64_t svmad_z(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_f32_z))) svfloat32_t svmad_z(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_f16_z))) svfloat16_t svmad_z(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_u8_m))) svuint8_t svmad_m(svbool_t, svuint8_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_u32_m))) svuint32_t svmad_m(svbool_t, svuint32_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_u64_m))) svuint64_t svmad_m(svbool_t, svuint64_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_u16_m))) svuint16_t svmad_m(svbool_t, svuint16_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_s8_m))) svint8_t svmad_m(svbool_t, svint8_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_s32_m))) svint32_t svmad_m(svbool_t, svint32_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_s64_m))) svint64_t svmad_m(svbool_t, svint64_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_s16_m))) svint16_t svmad_m(svbool_t, svint16_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_u8_x))) svuint8_t svmad_x(svbool_t, svuint8_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_u32_x))) svuint32_t svmad_x(svbool_t, svuint32_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_u64_x))) svuint64_t svmad_x(svbool_t, svuint64_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_u16_x))) svuint16_t svmad_x(svbool_t, svuint16_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_s8_x))) svint8_t svmad_x(svbool_t, svint8_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_s32_x))) svint32_t svmad_x(svbool_t, svint32_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_s64_x))) svint64_t svmad_x(svbool_t, svint64_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_s16_x))) svint16_t svmad_x(svbool_t, svint16_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_u8_z))) svuint8_t svmad_z(svbool_t, svuint8_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_u32_z))) svuint32_t svmad_z(svbool_t, svuint32_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_u64_z))) svuint64_t svmad_z(svbool_t, svuint64_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_u16_z))) svuint16_t svmad_z(svbool_t, svuint16_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_s8_z))) svint8_t svmad_z(svbool_t, svint8_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_s32_z))) svint32_t svmad_z(svbool_t, svint32_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_s64_z))) svint64_t svmad_z(svbool_t, svint64_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_s16_z))) svint16_t svmad_z(svbool_t, svint16_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_f64_m))) svfloat64_t svmax_m(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_f32_m))) svfloat32_t svmax_m(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_f16_m))) svfloat16_t svmax_m(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_f64_x))) svfloat64_t svmax_x(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_f32_x))) svfloat32_t svmax_x(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_f16_x))) svfloat16_t svmax_x(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_f64_z))) svfloat64_t svmax_z(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_f32_z))) svfloat32_t svmax_z(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_f16_z))) svfloat16_t svmax_z(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_s8_m))) svint8_t svmax_m(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_s32_m))) svint32_t svmax_m(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_s64_m))) svint64_t svmax_m(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_s16_m))) svint16_t svmax_m(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_s8_x))) svint8_t svmax_x(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_s32_x))) svint32_t svmax_x(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_s64_x))) svint64_t svmax_x(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_s16_x))) svint16_t svmax_x(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_s8_z))) svint8_t svmax_z(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_s32_z))) svint32_t svmax_z(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_s64_z))) svint64_t svmax_z(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_s16_z))) svint16_t svmax_z(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_u8_m))) svuint8_t svmax_m(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_u32_m))) svuint32_t svmax_m(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_u64_m))) svuint64_t svmax_m(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_u16_m))) svuint16_t svmax_m(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_u8_x))) svuint8_t svmax_x(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_u32_x))) svuint32_t svmax_x(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_u64_x))) svuint64_t svmax_x(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_u16_x))) svuint16_t svmax_x(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_u8_z))) svuint8_t svmax_z(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_u32_z))) svuint32_t svmax_z(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_u64_z))) svuint64_t svmax_z(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_u16_z))) svuint16_t svmax_z(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_f64_m))) svfloat64_t svmax_m(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_f32_m))) svfloat32_t svmax_m(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_f16_m))) svfloat16_t svmax_m(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_f64_x))) svfloat64_t svmax_x(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_f32_x))) svfloat32_t svmax_x(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_f16_x))) svfloat16_t svmax_x(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_f64_z))) svfloat64_t svmax_z(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_f32_z))) svfloat32_t svmax_z(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_f16_z))) svfloat16_t svmax_z(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s8_m))) svint8_t svmax_m(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s32_m))) svint32_t svmax_m(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s64_m))) svint64_t svmax_m(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s16_m))) svint16_t svmax_m(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s8_x))) svint8_t svmax_x(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s32_x))) svint32_t svmax_x(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s64_x))) svint64_t svmax_x(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s16_x))) svint16_t svmax_x(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s8_z))) svint8_t svmax_z(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s32_z))) svint32_t svmax_z(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s64_z))) svint64_t svmax_z(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s16_z))) svint16_t svmax_z(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u8_m))) svuint8_t svmax_m(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u32_m))) svuint32_t svmax_m(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u64_m))) svuint64_t svmax_m(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u16_m))) svuint16_t svmax_m(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u8_x))) svuint8_t svmax_x(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u32_x))) svuint32_t svmax_x(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u64_x))) svuint64_t svmax_x(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u16_x))) svuint16_t svmax_x(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u8_z))) svuint8_t svmax_z(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u32_z))) svuint32_t svmax_z(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u64_z))) svuint64_t svmax_z(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u16_z))) svuint16_t svmax_z(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_f64_m))) svfloat64_t svmaxnm_m(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_f32_m))) svfloat32_t svmaxnm_m(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_f16_m))) svfloat16_t svmaxnm_m(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_f64_x))) svfloat64_t svmaxnm_x(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_f32_x))) svfloat32_t svmaxnm_x(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_f16_x))) svfloat16_t svmaxnm_x(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_f64_z))) svfloat64_t svmaxnm_z(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_f32_z))) svfloat32_t svmaxnm_z(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_f16_z))) svfloat16_t svmaxnm_z(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_f64_m))) svfloat64_t svmaxnm_m(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_f32_m))) svfloat32_t svmaxnm_m(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_f16_m))) svfloat16_t svmaxnm_m(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_f64_x))) svfloat64_t svmaxnm_x(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_f32_x))) svfloat32_t svmaxnm_x(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_f16_x))) svfloat16_t svmaxnm_x(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_f64_z))) svfloat64_t svmaxnm_z(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_f32_z))) svfloat32_t svmaxnm_z(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_f16_z))) svfloat16_t svmaxnm_z(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnmv_f64))) float64_t svmaxnmv(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnmv_f32))) float32_t svmaxnmv(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnmv_f16))) float16_t svmaxnmv(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxv_f64))) float64_t svmaxv(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxv_f32))) float32_t svmaxv(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxv_f16))) float16_t svmaxv(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxv_s8))) int8_t svmaxv(svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxv_s32))) int32_t svmaxv(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxv_s64))) int64_t svmaxv(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxv_s16))) int16_t svmaxv(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxv_u8))) uint8_t svmaxv(svbool_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxv_u32))) uint32_t svmaxv(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxv_u64))) uint64_t svmaxv(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxv_u16))) uint16_t svmaxv(svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_f64_m))) svfloat64_t svmin_m(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_f32_m))) svfloat32_t svmin_m(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_f16_m))) svfloat16_t svmin_m(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_f64_x))) svfloat64_t svmin_x(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_f32_x))) svfloat32_t svmin_x(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_f16_x))) svfloat16_t svmin_x(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_f64_z))) svfloat64_t svmin_z(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_f32_z))) svfloat32_t svmin_z(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_f16_z))) svfloat16_t svmin_z(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_s8_m))) svint8_t svmin_m(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_s32_m))) svint32_t svmin_m(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_s64_m))) svint64_t svmin_m(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_s16_m))) svint16_t svmin_m(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_s8_x))) svint8_t svmin_x(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_s32_x))) svint32_t svmin_x(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_s64_x))) svint64_t svmin_x(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_s16_x))) svint16_t svmin_x(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_s8_z))) svint8_t svmin_z(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_s32_z))) svint32_t svmin_z(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_s64_z))) svint64_t svmin_z(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_s16_z))) svint16_t svmin_z(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_u8_m))) svuint8_t svmin_m(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_u32_m))) svuint32_t svmin_m(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_u64_m))) svuint64_t svmin_m(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_u16_m))) svuint16_t svmin_m(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_u8_x))) svuint8_t svmin_x(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_u32_x))) svuint32_t svmin_x(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_u64_x))) svuint64_t svmin_x(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_u16_x))) svuint16_t svmin_x(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_u8_z))) svuint8_t svmin_z(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_u32_z))) svuint32_t svmin_z(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_u64_z))) svuint64_t svmin_z(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_u16_z))) svuint16_t svmin_z(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_f64_m))) svfloat64_t svmin_m(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_f32_m))) svfloat32_t svmin_m(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_f16_m))) svfloat16_t svmin_m(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_f64_x))) svfloat64_t svmin_x(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_f32_x))) svfloat32_t svmin_x(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_f16_x))) svfloat16_t svmin_x(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_f64_z))) svfloat64_t svmin_z(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_f32_z))) svfloat32_t svmin_z(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_f16_z))) svfloat16_t svmin_z(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s8_m))) svint8_t svmin_m(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s32_m))) svint32_t svmin_m(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s64_m))) svint64_t svmin_m(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s16_m))) svint16_t svmin_m(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s8_x))) svint8_t svmin_x(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s32_x))) svint32_t svmin_x(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s64_x))) svint64_t svmin_x(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s16_x))) svint16_t svmin_x(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s8_z))) svint8_t svmin_z(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s32_z))) svint32_t svmin_z(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s64_z))) svint64_t svmin_z(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s16_z))) svint16_t svmin_z(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u8_m))) svuint8_t svmin_m(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u32_m))) svuint32_t svmin_m(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u64_m))) svuint64_t svmin_m(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u16_m))) svuint16_t svmin_m(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u8_x))) svuint8_t svmin_x(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u32_x))) svuint32_t svmin_x(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u64_x))) svuint64_t svmin_x(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u16_x))) svuint16_t svmin_x(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u8_z))) svuint8_t svmin_z(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u32_z))) svuint32_t svmin_z(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u64_z))) svuint64_t svmin_z(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u16_z))) svuint16_t svmin_z(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_f64_m))) svfloat64_t svminnm_m(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_f32_m))) svfloat32_t svminnm_m(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_f16_m))) svfloat16_t svminnm_m(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_f64_x))) svfloat64_t svminnm_x(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_f32_x))) svfloat32_t svminnm_x(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_f16_x))) svfloat16_t svminnm_x(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_f64_z))) svfloat64_t svminnm_z(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_f32_z))) svfloat32_t svminnm_z(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_f16_z))) svfloat16_t svminnm_z(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_f64_m))) svfloat64_t svminnm_m(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_f32_m))) svfloat32_t svminnm_m(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_f16_m))) svfloat16_t svminnm_m(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_f64_x))) svfloat64_t svminnm_x(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_f32_x))) svfloat32_t svminnm_x(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_f16_x))) svfloat16_t svminnm_x(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_f64_z))) svfloat64_t svminnm_z(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_f32_z))) svfloat32_t svminnm_z(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_f16_z))) svfloat16_t svminnm_z(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnmv_f64))) float64_t svminnmv(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnmv_f32))) float32_t svminnmv(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnmv_f16))) float16_t svminnmv(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminv_f64))) float64_t svminv(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminv_f32))) float32_t svminv(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminv_f16))) float16_t svminv(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminv_s8))) int8_t svminv(svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminv_s32))) int32_t svminv(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminv_s64))) int64_t svminv(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminv_s16))) int16_t svminv(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminv_u8))) uint8_t svminv(svbool_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminv_u32))) uint32_t svminv(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminv_u64))) uint64_t svminv(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminv_u16))) uint16_t svminv(svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_f64_m))) svfloat64_t svmla_m(svbool_t, svfloat64_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_f32_m))) svfloat32_t svmla_m(svbool_t, svfloat32_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_f16_m))) svfloat16_t svmla_m(svbool_t, svfloat16_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_f64_x))) svfloat64_t svmla_x(svbool_t, svfloat64_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_f32_x))) svfloat32_t svmla_x(svbool_t, svfloat32_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_f16_x))) svfloat16_t svmla_x(svbool_t, svfloat16_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_f64_z))) svfloat64_t svmla_z(svbool_t, svfloat64_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_f32_z))) svfloat32_t svmla_z(svbool_t, svfloat32_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_f16_z))) svfloat16_t svmla_z(svbool_t, svfloat16_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_u8_m))) svuint8_t svmla_m(svbool_t, svuint8_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_u32_m))) svuint32_t svmla_m(svbool_t, svuint32_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_u64_m))) svuint64_t svmla_m(svbool_t, svuint64_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_u16_m))) svuint16_t svmla_m(svbool_t, svuint16_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_s8_m))) svint8_t svmla_m(svbool_t, svint8_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_s32_m))) svint32_t svmla_m(svbool_t, svint32_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_s64_m))) svint64_t svmla_m(svbool_t, svint64_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_s16_m))) svint16_t svmla_m(svbool_t, svint16_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_u8_x))) svuint8_t svmla_x(svbool_t, svuint8_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_u32_x))) svuint32_t svmla_x(svbool_t, svuint32_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_u64_x))) svuint64_t svmla_x(svbool_t, svuint64_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_u16_x))) svuint16_t svmla_x(svbool_t, svuint16_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_s8_x))) svint8_t svmla_x(svbool_t, svint8_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_s32_x))) svint32_t svmla_x(svbool_t, svint32_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_s64_x))) svint64_t svmla_x(svbool_t, svint64_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_s16_x))) svint16_t svmla_x(svbool_t, svint16_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_u8_z))) svuint8_t svmla_z(svbool_t, svuint8_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_u32_z))) svuint32_t svmla_z(svbool_t, svuint32_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_u64_z))) svuint64_t svmla_z(svbool_t, svuint64_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_u16_z))) svuint16_t svmla_z(svbool_t, svuint16_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_s8_z))) svint8_t svmla_z(svbool_t, svint8_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_s32_z))) svint32_t svmla_z(svbool_t, svint32_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_s64_z))) svint64_t svmla_z(svbool_t, svint64_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_s16_z))) svint16_t svmla_z(svbool_t, svint16_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_f64_m))) svfloat64_t svmla_m(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_f32_m))) svfloat32_t svmla_m(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_f16_m))) svfloat16_t svmla_m(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_f64_x))) svfloat64_t svmla_x(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_f32_x))) svfloat32_t svmla_x(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_f16_x))) svfloat16_t svmla_x(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_f64_z))) svfloat64_t svmla_z(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_f32_z))) svfloat32_t svmla_z(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_f16_z))) svfloat16_t svmla_z(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_u8_m))) svuint8_t svmla_m(svbool_t, svuint8_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_u32_m))) svuint32_t svmla_m(svbool_t, svuint32_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_u64_m))) svuint64_t svmla_m(svbool_t, svuint64_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_u16_m))) svuint16_t svmla_m(svbool_t, svuint16_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_s8_m))) svint8_t svmla_m(svbool_t, svint8_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_s32_m))) svint32_t svmla_m(svbool_t, svint32_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_s64_m))) svint64_t svmla_m(svbool_t, svint64_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_s16_m))) svint16_t svmla_m(svbool_t, svint16_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_u8_x))) svuint8_t svmla_x(svbool_t, svuint8_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_u32_x))) svuint32_t svmla_x(svbool_t, svuint32_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_u64_x))) svuint64_t svmla_x(svbool_t, svuint64_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_u16_x))) svuint16_t svmla_x(svbool_t, svuint16_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_s8_x))) svint8_t svmla_x(svbool_t, svint8_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_s32_x))) svint32_t svmla_x(svbool_t, svint32_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_s64_x))) svint64_t svmla_x(svbool_t, svint64_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_s16_x))) svint16_t svmla_x(svbool_t, svint16_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_u8_z))) svuint8_t svmla_z(svbool_t, svuint8_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_u32_z))) svuint32_t svmla_z(svbool_t, svuint32_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_u64_z))) svuint64_t svmla_z(svbool_t, svuint64_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_u16_z))) svuint16_t svmla_z(svbool_t, svuint16_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_s8_z))) svint8_t svmla_z(svbool_t, svint8_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_s32_z))) svint32_t svmla_z(svbool_t, svint32_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_s64_z))) svint64_t svmla_z(svbool_t, svint64_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_s16_z))) svint16_t svmla_z(svbool_t, svint16_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_lane_f64))) svfloat64_t svmla_lane(svfloat64_t, svfloat64_t, svfloat64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_lane_f32))) svfloat32_t svmla_lane(svfloat32_t, svfloat32_t, svfloat32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_lane_f16))) svfloat16_t svmla_lane(svfloat16_t, svfloat16_t, svfloat16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_f64_m))) svfloat64_t svmls_m(svbool_t, svfloat64_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_f32_m))) svfloat32_t svmls_m(svbool_t, svfloat32_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_f16_m))) svfloat16_t svmls_m(svbool_t, svfloat16_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_f64_x))) svfloat64_t svmls_x(svbool_t, svfloat64_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_f32_x))) svfloat32_t svmls_x(svbool_t, svfloat32_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_f16_x))) svfloat16_t svmls_x(svbool_t, svfloat16_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_f64_z))) svfloat64_t svmls_z(svbool_t, svfloat64_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_f32_z))) svfloat32_t svmls_z(svbool_t, svfloat32_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_f16_z))) svfloat16_t svmls_z(svbool_t, svfloat16_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_u8_m))) svuint8_t svmls_m(svbool_t, svuint8_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_u32_m))) svuint32_t svmls_m(svbool_t, svuint32_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_u64_m))) svuint64_t svmls_m(svbool_t, svuint64_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_u16_m))) svuint16_t svmls_m(svbool_t, svuint16_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_s8_m))) svint8_t svmls_m(svbool_t, svint8_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_s32_m))) svint32_t svmls_m(svbool_t, svint32_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_s64_m))) svint64_t svmls_m(svbool_t, svint64_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_s16_m))) svint16_t svmls_m(svbool_t, svint16_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_u8_x))) svuint8_t svmls_x(svbool_t, svuint8_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_u32_x))) svuint32_t svmls_x(svbool_t, svuint32_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_u64_x))) svuint64_t svmls_x(svbool_t, svuint64_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_u16_x))) svuint16_t svmls_x(svbool_t, svuint16_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_s8_x))) svint8_t svmls_x(svbool_t, svint8_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_s32_x))) svint32_t svmls_x(svbool_t, svint32_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_s64_x))) svint64_t svmls_x(svbool_t, svint64_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_s16_x))) svint16_t svmls_x(svbool_t, svint16_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_u8_z))) svuint8_t svmls_z(svbool_t, svuint8_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_u32_z))) svuint32_t svmls_z(svbool_t, svuint32_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_u64_z))) svuint64_t svmls_z(svbool_t, svuint64_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_u16_z))) svuint16_t svmls_z(svbool_t, svuint16_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_s8_z))) svint8_t svmls_z(svbool_t, svint8_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_s32_z))) svint32_t svmls_z(svbool_t, svint32_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_s64_z))) svint64_t svmls_z(svbool_t, svint64_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_s16_z))) svint16_t svmls_z(svbool_t, svint16_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_f64_m))) svfloat64_t svmls_m(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_f32_m))) svfloat32_t svmls_m(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_f16_m))) svfloat16_t svmls_m(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_f64_x))) svfloat64_t svmls_x(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_f32_x))) svfloat32_t svmls_x(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_f16_x))) svfloat16_t svmls_x(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_f64_z))) svfloat64_t svmls_z(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_f32_z))) svfloat32_t svmls_z(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_f16_z))) svfloat16_t svmls_z(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_u8_m))) svuint8_t svmls_m(svbool_t, svuint8_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_u32_m))) svuint32_t svmls_m(svbool_t, svuint32_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_u64_m))) svuint64_t svmls_m(svbool_t, svuint64_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_u16_m))) svuint16_t svmls_m(svbool_t, svuint16_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_s8_m))) svint8_t svmls_m(svbool_t, svint8_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_s32_m))) svint32_t svmls_m(svbool_t, svint32_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_s64_m))) svint64_t svmls_m(svbool_t, svint64_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_s16_m))) svint16_t svmls_m(svbool_t, svint16_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_u8_x))) svuint8_t svmls_x(svbool_t, svuint8_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_u32_x))) svuint32_t svmls_x(svbool_t, svuint32_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_u64_x))) svuint64_t svmls_x(svbool_t, svuint64_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_u16_x))) svuint16_t svmls_x(svbool_t, svuint16_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_s8_x))) svint8_t svmls_x(svbool_t, svint8_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_s32_x))) svint32_t svmls_x(svbool_t, svint32_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_s64_x))) svint64_t svmls_x(svbool_t, svint64_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_s16_x))) svint16_t svmls_x(svbool_t, svint16_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_u8_z))) svuint8_t svmls_z(svbool_t, svuint8_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_u32_z))) svuint32_t svmls_z(svbool_t, svuint32_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_u64_z))) svuint64_t svmls_z(svbool_t, svuint64_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_u16_z))) svuint16_t svmls_z(svbool_t, svuint16_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_s8_z))) svint8_t svmls_z(svbool_t, svint8_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_s32_z))) svint32_t svmls_z(svbool_t, svint32_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_s64_z))) svint64_t svmls_z(svbool_t, svint64_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_s16_z))) svint16_t svmls_z(svbool_t, svint16_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_lane_f64))) svfloat64_t svmls_lane(svfloat64_t, svfloat64_t, svfloat64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_lane_f32))) svfloat32_t svmls_lane(svfloat32_t, svfloat32_t, svfloat32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_lane_f16))) svfloat16_t svmls_lane(svfloat16_t, svfloat16_t, svfloat16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmov_b_z))) svbool_t svmov_z(svbool_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_f64_m))) svfloat64_t svmsb_m(svbool_t, svfloat64_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_f32_m))) svfloat32_t svmsb_m(svbool_t, svfloat32_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_f16_m))) svfloat16_t svmsb_m(svbool_t, svfloat16_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_f64_x))) svfloat64_t svmsb_x(svbool_t, svfloat64_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_f32_x))) svfloat32_t svmsb_x(svbool_t, svfloat32_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_f16_x))) svfloat16_t svmsb_x(svbool_t, svfloat16_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_f64_z))) svfloat64_t svmsb_z(svbool_t, svfloat64_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_f32_z))) svfloat32_t svmsb_z(svbool_t, svfloat32_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_f16_z))) svfloat16_t svmsb_z(svbool_t, svfloat16_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_u8_m))) svuint8_t svmsb_m(svbool_t, svuint8_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_u32_m))) svuint32_t svmsb_m(svbool_t, svuint32_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_u64_m))) svuint64_t svmsb_m(svbool_t, svuint64_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_u16_m))) svuint16_t svmsb_m(svbool_t, svuint16_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_s8_m))) svint8_t svmsb_m(svbool_t, svint8_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_s32_m))) svint32_t svmsb_m(svbool_t, svint32_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_s64_m))) svint64_t svmsb_m(svbool_t, svint64_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_s16_m))) svint16_t svmsb_m(svbool_t, svint16_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_u8_x))) svuint8_t svmsb_x(svbool_t, svuint8_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_u32_x))) svuint32_t svmsb_x(svbool_t, svuint32_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_u64_x))) svuint64_t svmsb_x(svbool_t, svuint64_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_u16_x))) svuint16_t svmsb_x(svbool_t, svuint16_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_s8_x))) svint8_t svmsb_x(svbool_t, svint8_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_s32_x))) svint32_t svmsb_x(svbool_t, svint32_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_s64_x))) svint64_t svmsb_x(svbool_t, svint64_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_s16_x))) svint16_t svmsb_x(svbool_t, svint16_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_u8_z))) svuint8_t svmsb_z(svbool_t, svuint8_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_u32_z))) svuint32_t svmsb_z(svbool_t, svuint32_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_u64_z))) svuint64_t svmsb_z(svbool_t, svuint64_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_u16_z))) svuint16_t svmsb_z(svbool_t, svuint16_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_s8_z))) svint8_t svmsb_z(svbool_t, svint8_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_s32_z))) svint32_t svmsb_z(svbool_t, svint32_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_s64_z))) svint64_t svmsb_z(svbool_t, svint64_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_s16_z))) svint16_t svmsb_z(svbool_t, svint16_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_f64_m))) svfloat64_t svmsb_m(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_f32_m))) svfloat32_t svmsb_m(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_f16_m))) svfloat16_t svmsb_m(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_f64_x))) svfloat64_t svmsb_x(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_f32_x))) svfloat32_t svmsb_x(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_f16_x))) svfloat16_t svmsb_x(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_f64_z))) svfloat64_t svmsb_z(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_f32_z))) svfloat32_t svmsb_z(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_f16_z))) svfloat16_t svmsb_z(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_u8_m))) svuint8_t svmsb_m(svbool_t, svuint8_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_u32_m))) svuint32_t svmsb_m(svbool_t, svuint32_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_u64_m))) svuint64_t svmsb_m(svbool_t, svuint64_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_u16_m))) svuint16_t svmsb_m(svbool_t, svuint16_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_s8_m))) svint8_t svmsb_m(svbool_t, svint8_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_s32_m))) svint32_t svmsb_m(svbool_t, svint32_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_s64_m))) svint64_t svmsb_m(svbool_t, svint64_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_s16_m))) svint16_t svmsb_m(svbool_t, svint16_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_u8_x))) svuint8_t svmsb_x(svbool_t, svuint8_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_u32_x))) svuint32_t svmsb_x(svbool_t, svuint32_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_u64_x))) svuint64_t svmsb_x(svbool_t, svuint64_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_u16_x))) svuint16_t svmsb_x(svbool_t, svuint16_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_s8_x))) svint8_t svmsb_x(svbool_t, svint8_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_s32_x))) svint32_t svmsb_x(svbool_t, svint32_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_s64_x))) svint64_t svmsb_x(svbool_t, svint64_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_s16_x))) svint16_t svmsb_x(svbool_t, svint16_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_u8_z))) svuint8_t svmsb_z(svbool_t, svuint8_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_u32_z))) svuint32_t svmsb_z(svbool_t, svuint32_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_u64_z))) svuint64_t svmsb_z(svbool_t, svuint64_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_u16_z))) svuint16_t svmsb_z(svbool_t, svuint16_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_s8_z))) svint8_t svmsb_z(svbool_t, svint8_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_s32_z))) svint32_t svmsb_z(svbool_t, svint32_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_s64_z))) svint64_t svmsb_z(svbool_t, svint64_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_s16_z))) svint16_t svmsb_z(svbool_t, svint16_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_f64_m))) svfloat64_t svmul_m(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_f32_m))) svfloat32_t svmul_m(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_f16_m))) svfloat16_t svmul_m(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_f64_x))) svfloat64_t svmul_x(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_f32_x))) svfloat32_t svmul_x(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_f16_x))) svfloat16_t svmul_x(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_f64_z))) svfloat64_t svmul_z(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_f32_z))) svfloat32_t svmul_z(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_f16_z))) svfloat16_t svmul_z(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_u8_m))) svuint8_t svmul_m(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_u32_m))) svuint32_t svmul_m(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_u64_m))) svuint64_t svmul_m(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_u16_m))) svuint16_t svmul_m(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_s8_m))) svint8_t svmul_m(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_s32_m))) svint32_t svmul_m(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_s64_m))) svint64_t svmul_m(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_s16_m))) svint16_t svmul_m(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_u8_x))) svuint8_t svmul_x(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_u32_x))) svuint32_t svmul_x(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_u64_x))) svuint64_t svmul_x(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_u16_x))) svuint16_t svmul_x(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_s8_x))) svint8_t svmul_x(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_s32_x))) svint32_t svmul_x(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_s64_x))) svint64_t svmul_x(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_s16_x))) svint16_t svmul_x(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_u8_z))) svuint8_t svmul_z(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_u32_z))) svuint32_t svmul_z(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_u64_z))) svuint64_t svmul_z(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_u16_z))) svuint16_t svmul_z(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_s8_z))) svint8_t svmul_z(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_s32_z))) svint32_t svmul_z(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_s64_z))) svint64_t svmul_z(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_s16_z))) svint16_t svmul_z(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_f64_m))) svfloat64_t svmul_m(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_f32_m))) svfloat32_t svmul_m(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_f16_m))) svfloat16_t svmul_m(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_f64_x))) svfloat64_t svmul_x(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_f32_x))) svfloat32_t svmul_x(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_f16_x))) svfloat16_t svmul_x(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_f64_z))) svfloat64_t svmul_z(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_f32_z))) svfloat32_t svmul_z(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_f16_z))) svfloat16_t svmul_z(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_u8_m))) svuint8_t svmul_m(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_u32_m))) svuint32_t svmul_m(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_u64_m))) svuint64_t svmul_m(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_u16_m))) svuint16_t svmul_m(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_s8_m))) svint8_t svmul_m(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_s32_m))) svint32_t svmul_m(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_s64_m))) svint64_t svmul_m(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_s16_m))) svint16_t svmul_m(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_u8_x))) svuint8_t svmul_x(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_u32_x))) svuint32_t svmul_x(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_u64_x))) svuint64_t svmul_x(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_u16_x))) svuint16_t svmul_x(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_s8_x))) svint8_t svmul_x(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_s32_x))) svint32_t svmul_x(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_s64_x))) svint64_t svmul_x(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_s16_x))) svint16_t svmul_x(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_u8_z))) svuint8_t svmul_z(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_u32_z))) svuint32_t svmul_z(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_u64_z))) svuint64_t svmul_z(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_u16_z))) svuint16_t svmul_z(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_s8_z))) svint8_t svmul_z(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_s32_z))) svint32_t svmul_z(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_s64_z))) svint64_t svmul_z(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_s16_z))) svint16_t svmul_z(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_lane_f64))) svfloat64_t svmul_lane(svfloat64_t, svfloat64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_lane_f32))) svfloat32_t svmul_lane(svfloat32_t, svfloat32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_lane_f16))) svfloat16_t svmul_lane(svfloat16_t, svfloat16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_s8_m))) svint8_t svmulh_m(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_s32_m))) svint32_t svmulh_m(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_s64_m))) svint64_t svmulh_m(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_s16_m))) svint16_t svmulh_m(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_s8_x))) svint8_t svmulh_x(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_s32_x))) svint32_t svmulh_x(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_s64_x))) svint64_t svmulh_x(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_s16_x))) svint16_t svmulh_x(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_s8_z))) svint8_t svmulh_z(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_s32_z))) svint32_t svmulh_z(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_s64_z))) svint64_t svmulh_z(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_s16_z))) svint16_t svmulh_z(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_u8_m))) svuint8_t svmulh_m(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_u32_m))) svuint32_t svmulh_m(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_u64_m))) svuint64_t svmulh_m(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_u16_m))) svuint16_t svmulh_m(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_u8_x))) svuint8_t svmulh_x(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_u32_x))) svuint32_t svmulh_x(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_u64_x))) svuint64_t svmulh_x(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_u16_x))) svuint16_t svmulh_x(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_u8_z))) svuint8_t svmulh_z(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_u32_z))) svuint32_t svmulh_z(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_u64_z))) svuint64_t svmulh_z(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_u16_z))) svuint16_t svmulh_z(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_s8_m))) svint8_t svmulh_m(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_s32_m))) svint32_t svmulh_m(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_s64_m))) svint64_t svmulh_m(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_s16_m))) svint16_t svmulh_m(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_s8_x))) svint8_t svmulh_x(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_s32_x))) svint32_t svmulh_x(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_s64_x))) svint64_t svmulh_x(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_s16_x))) svint16_t svmulh_x(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_s8_z))) svint8_t svmulh_z(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_s32_z))) svint32_t svmulh_z(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_s64_z))) svint64_t svmulh_z(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_s16_z))) svint16_t svmulh_z(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_u8_m))) svuint8_t svmulh_m(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_u32_m))) svuint32_t svmulh_m(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_u64_m))) svuint64_t svmulh_m(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_u16_m))) svuint16_t svmulh_m(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_u8_x))) svuint8_t svmulh_x(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_u32_x))) svuint32_t svmulh_x(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_u64_x))) svuint64_t svmulh_x(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_u16_x))) svuint16_t svmulh_x(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_u8_z))) svuint8_t svmulh_z(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_u32_z))) svuint32_t svmulh_z(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_u64_z))) svuint64_t svmulh_z(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_u16_z))) svuint16_t svmulh_z(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulx_n_f64_m))) svfloat64_t svmulx_m(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulx_n_f32_m))) svfloat32_t svmulx_m(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulx_n_f16_m))) svfloat16_t svmulx_m(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulx_n_f64_x))) svfloat64_t svmulx_x(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulx_n_f32_x))) svfloat32_t svmulx_x(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulx_n_f16_x))) svfloat16_t svmulx_x(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulx_n_f64_z))) svfloat64_t svmulx_z(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulx_n_f32_z))) svfloat32_t svmulx_z(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulx_n_f16_z))) svfloat16_t svmulx_z(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulx_f64_m))) svfloat64_t svmulx_m(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulx_f32_m))) svfloat32_t svmulx_m(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulx_f16_m))) svfloat16_t svmulx_m(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulx_f64_x))) svfloat64_t svmulx_x(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulx_f32_x))) svfloat32_t svmulx_x(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulx_f16_x))) svfloat16_t svmulx_x(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulx_f64_z))) svfloat64_t svmulx_z(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulx_f32_z))) svfloat32_t svmulx_z(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulx_f16_z))) svfloat16_t svmulx_z(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnand_b_z))) svbool_t svnand_z(svbool_t, svbool_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_f64_m))) svfloat64_t svneg_m(svfloat64_t, svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_f32_m))) svfloat32_t svneg_m(svfloat32_t, svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_f16_m))) svfloat16_t svneg_m(svfloat16_t, svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_f64_x))) svfloat64_t svneg_x(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_f32_x))) svfloat32_t svneg_x(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_f16_x))) svfloat16_t svneg_x(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_f64_z))) svfloat64_t svneg_z(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_f32_z))) svfloat32_t svneg_z(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_f16_z))) svfloat16_t svneg_z(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_s8_m))) svint8_t svneg_m(svint8_t, svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_s32_m))) svint32_t svneg_m(svint32_t, svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_s64_m))) svint64_t svneg_m(svint64_t, svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_s16_m))) svint16_t svneg_m(svint16_t, svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_s8_x))) svint8_t svneg_x(svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_s32_x))) svint32_t svneg_x(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_s64_x))) svint64_t svneg_x(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_s16_x))) svint16_t svneg_x(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_s8_z))) svint8_t svneg_z(svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_s32_z))) svint32_t svneg_z(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_s64_z))) svint64_t svneg_z(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_s16_z))) svint16_t svneg_z(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmad_n_f64_m))) svfloat64_t svnmad_m(svbool_t, svfloat64_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmad_n_f32_m))) svfloat32_t svnmad_m(svbool_t, svfloat32_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmad_n_f16_m))) svfloat16_t svnmad_m(svbool_t, svfloat16_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmad_n_f64_x))) svfloat64_t svnmad_x(svbool_t, svfloat64_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmad_n_f32_x))) svfloat32_t svnmad_x(svbool_t, svfloat32_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmad_n_f16_x))) svfloat16_t svnmad_x(svbool_t, svfloat16_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmad_n_f64_z))) svfloat64_t svnmad_z(svbool_t, svfloat64_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmad_n_f32_z))) svfloat32_t svnmad_z(svbool_t, svfloat32_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmad_n_f16_z))) svfloat16_t svnmad_z(svbool_t, svfloat16_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmad_f64_m))) svfloat64_t svnmad_m(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmad_f32_m))) svfloat32_t svnmad_m(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmad_f16_m))) svfloat16_t svnmad_m(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmad_f64_x))) svfloat64_t svnmad_x(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmad_f32_x))) svfloat32_t svnmad_x(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmad_f16_x))) svfloat16_t svnmad_x(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmad_f64_z))) svfloat64_t svnmad_z(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmad_f32_z))) svfloat32_t svnmad_z(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmad_f16_z))) svfloat16_t svnmad_z(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmla_n_f64_m))) svfloat64_t svnmla_m(svbool_t, svfloat64_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmla_n_f32_m))) svfloat32_t svnmla_m(svbool_t, svfloat32_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmla_n_f16_m))) svfloat16_t svnmla_m(svbool_t, svfloat16_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmla_n_f64_x))) svfloat64_t svnmla_x(svbool_t, svfloat64_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmla_n_f32_x))) svfloat32_t svnmla_x(svbool_t, svfloat32_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmla_n_f16_x))) svfloat16_t svnmla_x(svbool_t, svfloat16_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmla_n_f64_z))) svfloat64_t svnmla_z(svbool_t, svfloat64_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmla_n_f32_z))) svfloat32_t svnmla_z(svbool_t, svfloat32_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmla_n_f16_z))) svfloat16_t svnmla_z(svbool_t, svfloat16_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmla_f64_m))) svfloat64_t svnmla_m(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmla_f32_m))) svfloat32_t svnmla_m(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmla_f16_m))) svfloat16_t svnmla_m(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmla_f64_x))) svfloat64_t svnmla_x(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmla_f32_x))) svfloat32_t svnmla_x(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmla_f16_x))) svfloat16_t svnmla_x(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmla_f64_z))) svfloat64_t svnmla_z(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmla_f32_z))) svfloat32_t svnmla_z(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmla_f16_z))) svfloat16_t svnmla_z(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmls_n_f64_m))) svfloat64_t svnmls_m(svbool_t, svfloat64_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmls_n_f32_m))) svfloat32_t svnmls_m(svbool_t, svfloat32_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmls_n_f16_m))) svfloat16_t svnmls_m(svbool_t, svfloat16_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmls_n_f64_x))) svfloat64_t svnmls_x(svbool_t, svfloat64_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmls_n_f32_x))) svfloat32_t svnmls_x(svbool_t, svfloat32_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmls_n_f16_x))) svfloat16_t svnmls_x(svbool_t, svfloat16_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmls_n_f64_z))) svfloat64_t svnmls_z(svbool_t, svfloat64_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmls_n_f32_z))) svfloat32_t svnmls_z(svbool_t, svfloat32_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmls_n_f16_z))) svfloat16_t svnmls_z(svbool_t, svfloat16_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmls_f64_m))) svfloat64_t svnmls_m(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmls_f32_m))) svfloat32_t svnmls_m(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmls_f16_m))) svfloat16_t svnmls_m(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmls_f64_x))) svfloat64_t svnmls_x(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmls_f32_x))) svfloat32_t svnmls_x(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmls_f16_x))) svfloat16_t svnmls_x(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmls_f64_z))) svfloat64_t svnmls_z(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmls_f32_z))) svfloat32_t svnmls_z(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmls_f16_z))) svfloat16_t svnmls_z(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmsb_n_f64_m))) svfloat64_t svnmsb_m(svbool_t, svfloat64_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmsb_n_f32_m))) svfloat32_t svnmsb_m(svbool_t, svfloat32_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmsb_n_f16_m))) svfloat16_t svnmsb_m(svbool_t, svfloat16_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmsb_n_f64_x))) svfloat64_t svnmsb_x(svbool_t, svfloat64_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmsb_n_f32_x))) svfloat32_t svnmsb_x(svbool_t, svfloat32_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmsb_n_f16_x))) svfloat16_t svnmsb_x(svbool_t, svfloat16_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmsb_n_f64_z))) svfloat64_t svnmsb_z(svbool_t, svfloat64_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmsb_n_f32_z))) svfloat32_t svnmsb_z(svbool_t, svfloat32_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmsb_n_f16_z))) svfloat16_t svnmsb_z(svbool_t, svfloat16_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmsb_f64_m))) svfloat64_t svnmsb_m(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmsb_f32_m))) svfloat32_t svnmsb_m(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmsb_f16_m))) svfloat16_t svnmsb_m(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmsb_f64_x))) svfloat64_t svnmsb_x(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmsb_f32_x))) svfloat32_t svnmsb_x(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmsb_f16_x))) svfloat16_t svnmsb_x(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmsb_f64_z))) svfloat64_t svnmsb_z(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmsb_f32_z))) svfloat32_t svnmsb_z(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmsb_f16_z))) svfloat16_t svnmsb_z(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnor_b_z))) svbool_t svnor_z(svbool_t, svbool_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_b_z))) svbool_t svnot_z(svbool_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_u8_m))) svuint8_t svnot_m(svuint8_t, svbool_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_u32_m))) svuint32_t svnot_m(svuint32_t, svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_u64_m))) svuint64_t svnot_m(svuint64_t, svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_u16_m))) svuint16_t svnot_m(svuint16_t, svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_s8_m))) svint8_t svnot_m(svint8_t, svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_s32_m))) svint32_t svnot_m(svint32_t, svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_s64_m))) svint64_t svnot_m(svint64_t, svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_s16_m))) svint16_t svnot_m(svint16_t, svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_u8_x))) svuint8_t svnot_x(svbool_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_u32_x))) svuint32_t svnot_x(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_u64_x))) svuint64_t svnot_x(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_u16_x))) svuint16_t svnot_x(svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_s8_x))) svint8_t svnot_x(svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_s32_x))) svint32_t svnot_x(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_s64_x))) svint64_t svnot_x(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_s16_x))) svint16_t svnot_x(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_u8_z))) svuint8_t svnot_z(svbool_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_u32_z))) svuint32_t svnot_z(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_u64_z))) svuint64_t svnot_z(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_u16_z))) svuint16_t svnot_z(svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_s8_z))) svint8_t svnot_z(svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_s32_z))) svint32_t svnot_z(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_s64_z))) svint64_t svnot_z(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_s16_z))) svint16_t svnot_z(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorn_b_z))) svbool_t svorn_z(svbool_t, svbool_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_b_z))) svbool_t svorr_z(svbool_t, svbool_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_u8_m))) svuint8_t svorr_m(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_u32_m))) svuint32_t svorr_m(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_u64_m))) svuint64_t svorr_m(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_u16_m))) svuint16_t svorr_m(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_s8_m))) svint8_t svorr_m(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_s32_m))) svint32_t svorr_m(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_s64_m))) svint64_t svorr_m(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_s16_m))) svint16_t svorr_m(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_u8_x))) svuint8_t svorr_x(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_u32_x))) svuint32_t svorr_x(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_u64_x))) svuint64_t svorr_x(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_u16_x))) svuint16_t svorr_x(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_s8_x))) svint8_t svorr_x(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_s32_x))) svint32_t svorr_x(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_s64_x))) svint64_t svorr_x(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_s16_x))) svint16_t svorr_x(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_u8_z))) svuint8_t svorr_z(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_u32_z))) svuint32_t svorr_z(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_u64_z))) svuint64_t svorr_z(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_u16_z))) svuint16_t svorr_z(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_s8_z))) svint8_t svorr_z(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_s32_z))) svint32_t svorr_z(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_s64_z))) svint64_t svorr_z(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_s16_z))) svint16_t svorr_z(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_u8_m))) svuint8_t svorr_m(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_u32_m))) svuint32_t svorr_m(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_u64_m))) svuint64_t svorr_m(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_u16_m))) svuint16_t svorr_m(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_s8_m))) svint8_t svorr_m(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_s32_m))) svint32_t svorr_m(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_s64_m))) svint64_t svorr_m(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_s16_m))) svint16_t svorr_m(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_u8_x))) svuint8_t svorr_x(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_u32_x))) svuint32_t svorr_x(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_u64_x))) svuint64_t svorr_x(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_u16_x))) svuint16_t svorr_x(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_s8_x))) svint8_t svorr_x(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_s32_x))) svint32_t svorr_x(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_s64_x))) svint64_t svorr_x(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_s16_x))) svint16_t svorr_x(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_u8_z))) svuint8_t svorr_z(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_u32_z))) svuint32_t svorr_z(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_u64_z))) svuint64_t svorr_z(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_u16_z))) svuint16_t svorr_z(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_s8_z))) svint8_t svorr_z(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_s32_z))) svint32_t svorr_z(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_s64_z))) svint64_t svorr_z(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_s16_z))) svint16_t svorr_z(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorv_u8))) uint8_t svorv(svbool_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorv_u32))) uint32_t svorv(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorv_u64))) uint64_t svorv(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorv_u16))) uint16_t svorv(svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorv_s8))) int8_t svorv(svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorv_s32))) int32_t svorv(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorv_s64))) int64_t svorv(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorv_s16))) int16_t svorv(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpfalse_b))) svbool_t svpfalse(void); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpfirst_b))) svbool_t svpfirst(svbool_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfb_gather_u32base))) void svprfb_gather(svbool_t, svuint32_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfb_gather_u64base))) void svprfb_gather(svbool_t, svuint64_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfb_gather_u32base_offset))) void svprfb_gather_offset(svbool_t, svuint32_t, int64_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfb_gather_u64base_offset))) void svprfb_gather_offset(svbool_t, svuint64_t, int64_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfb_gather_s32offset))) void svprfb_gather_offset(svbool_t, void const *, svint32_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfb_gather_u32offset))) void svprfb_gather_offset(svbool_t, void const *, svuint32_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfb_gather_s64offset))) void svprfb_gather_offset(svbool_t, void const *, svint64_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfb_gather_u64offset))) void svprfb_gather_offset(svbool_t, void const *, svuint64_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfd_gather_u32base))) void svprfd_gather(svbool_t, svuint32_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfd_gather_u64base))) void svprfd_gather(svbool_t, svuint64_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfd_gather_u32base_index))) void svprfd_gather_index(svbool_t, svuint32_t, int64_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfd_gather_u64base_index))) void svprfd_gather_index(svbool_t, svuint64_t, int64_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfd_gather_s32index))) void svprfd_gather_index(svbool_t, void const *, svint32_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfd_gather_u32index))) void svprfd_gather_index(svbool_t, void const *, svuint32_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfd_gather_s64index))) void svprfd_gather_index(svbool_t, void const *, svint64_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfd_gather_u64index))) void svprfd_gather_index(svbool_t, void const *, svuint64_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfh_gather_u32base))) void svprfh_gather(svbool_t, svuint32_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfh_gather_u64base))) void svprfh_gather(svbool_t, svuint64_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfh_gather_u32base_index))) void svprfh_gather_index(svbool_t, svuint32_t, int64_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfh_gather_u64base_index))) void svprfh_gather_index(svbool_t, svuint64_t, int64_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfh_gather_s32index))) void svprfh_gather_index(svbool_t, void const *, svint32_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfh_gather_u32index))) void svprfh_gather_index(svbool_t, void const *, svuint32_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfh_gather_s64index))) void svprfh_gather_index(svbool_t, void const *, svint64_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfh_gather_u64index))) void svprfh_gather_index(svbool_t, void const *, svuint64_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfw_gather_u32base))) void svprfw_gather(svbool_t, svuint32_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfw_gather_u64base))) void svprfw_gather(svbool_t, svuint64_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfw_gather_u32base_index))) void svprfw_gather_index(svbool_t, svuint32_t, int64_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfw_gather_u64base_index))) void svprfw_gather_index(svbool_t, svuint64_t, int64_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfw_gather_s32index))) void svprfw_gather_index(svbool_t, void const *, svint32_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfw_gather_u32index))) void svprfw_gather_index(svbool_t, void const *, svuint32_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfw_gather_s64index))) void svprfw_gather_index(svbool_t, void const *, svint64_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfw_gather_u64index))) void svprfw_gather_index(svbool_t, void const *, svuint64_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_s8))) svint8_t svqadd(svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_s32))) svint32_t svqadd(svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_s64))) svint64_t svqadd(svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_s16))) svint16_t svqadd(svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_u8))) svuint8_t svqadd(svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_u32))) svuint32_t svqadd(svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_u64))) svuint64_t svqadd(svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_u16))) svuint16_t svqadd(svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_s8))) svint8_t svqadd(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_s32))) svint32_t svqadd(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_s64))) svint64_t svqadd(svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_s16))) svint16_t svqadd(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_u8))) svuint8_t svqadd(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_u32))) svuint32_t svqadd(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_u64))) svuint64_t svqadd(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_u16))) svuint16_t svqadd(svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecb_n_s32))) int32_t svqdecb(int32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecb_n_s64))) int64_t svqdecb(int64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecb_n_u32))) uint32_t svqdecb(uint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecb_n_u64))) uint64_t svqdecb(uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecb_pat_n_s32))) int32_t svqdecb_pat(int32_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecb_pat_n_s64))) int64_t svqdecb_pat(int64_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecb_pat_n_u32))) uint32_t svqdecb_pat(uint32_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecb_pat_n_u64))) uint64_t svqdecb_pat(uint64_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecd_n_s32))) int32_t svqdecd(int32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecd_n_s64))) int64_t svqdecd(int64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecd_n_u32))) uint32_t svqdecd(uint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecd_n_u64))) uint64_t svqdecd(uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecd_s64))) svint64_t svqdecd(svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecd_u64))) svuint64_t svqdecd(svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecd_pat_n_s32))) int32_t svqdecd_pat(int32_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecd_pat_n_s64))) int64_t svqdecd_pat(int64_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecd_pat_n_u32))) uint32_t svqdecd_pat(uint32_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecd_pat_n_u64))) uint64_t svqdecd_pat(uint64_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecd_pat_s64))) svint64_t svqdecd_pat(svint64_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecd_pat_u64))) svuint64_t svqdecd_pat(svuint64_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdech_n_s32))) int32_t svqdech(int32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdech_n_s64))) int64_t svqdech(int64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdech_n_u32))) uint32_t svqdech(uint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdech_n_u64))) uint64_t svqdech(uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdech_s16))) svint16_t svqdech(svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdech_u16))) svuint16_t svqdech(svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdech_pat_n_s32))) int32_t svqdech_pat(int32_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdech_pat_n_s64))) int64_t svqdech_pat(int64_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdech_pat_n_u32))) uint32_t svqdech_pat(uint32_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdech_pat_n_u64))) uint64_t svqdech_pat(uint64_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdech_pat_s16))) svint16_t svqdech_pat(svint16_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdech_pat_u16))) svuint16_t svqdech_pat(svuint16_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_n_s32_b8))) int32_t svqdecp_b8(int32_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_n_s32_b32))) int32_t svqdecp_b32(int32_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_n_s32_b64))) int32_t svqdecp_b64(int32_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_n_s32_b16))) int32_t svqdecp_b16(int32_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_n_s64_b8))) int64_t svqdecp_b8(int64_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_n_s64_b32))) int64_t svqdecp_b32(int64_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_n_s64_b64))) int64_t svqdecp_b64(int64_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_n_s64_b16))) int64_t svqdecp_b16(int64_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_n_u32_b8))) uint32_t svqdecp_b8(uint32_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_n_u32_b32))) uint32_t svqdecp_b32(uint32_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_n_u32_b64))) uint32_t svqdecp_b64(uint32_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_n_u32_b16))) uint32_t svqdecp_b16(uint32_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_n_u64_b8))) uint64_t svqdecp_b8(uint64_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_n_u64_b32))) uint64_t svqdecp_b32(uint64_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_n_u64_b64))) uint64_t svqdecp_b64(uint64_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_n_u64_b16))) uint64_t svqdecp_b16(uint64_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_s32))) svint32_t svqdecp(svint32_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_s64))) svint64_t svqdecp(svint64_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_s16))) svint16_t svqdecp(svint16_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_u32))) svuint32_t svqdecp(svuint32_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_u64))) svuint64_t svqdecp(svuint64_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_u16))) svuint16_t svqdecp(svuint16_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecw_n_s32))) int32_t svqdecw(int32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecw_n_s64))) int64_t svqdecw(int64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecw_n_u32))) uint32_t svqdecw(uint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecw_n_u64))) uint64_t svqdecw(uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecw_s32))) svint32_t svqdecw(svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecw_u32))) svuint32_t svqdecw(svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecw_pat_n_s32))) int32_t svqdecw_pat(int32_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecw_pat_n_s64))) int64_t svqdecw_pat(int64_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecw_pat_n_u32))) uint32_t svqdecw_pat(uint32_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecw_pat_n_u64))) uint64_t svqdecw_pat(uint64_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecw_pat_s32))) svint32_t svqdecw_pat(svint32_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecw_pat_u32))) svuint32_t svqdecw_pat(svuint32_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincb_n_s32))) int32_t svqincb(int32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincb_n_s64))) int64_t svqincb(int64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincb_n_u32))) uint32_t svqincb(uint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincb_n_u64))) uint64_t svqincb(uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincb_pat_n_s32))) int32_t svqincb_pat(int32_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincb_pat_n_s64))) int64_t svqincb_pat(int64_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincb_pat_n_u32))) uint32_t svqincb_pat(uint32_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincb_pat_n_u64))) uint64_t svqincb_pat(uint64_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincd_n_s32))) int32_t svqincd(int32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincd_n_s64))) int64_t svqincd(int64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincd_n_u32))) uint32_t svqincd(uint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincd_n_u64))) uint64_t svqincd(uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincd_s64))) svint64_t svqincd(svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincd_u64))) svuint64_t svqincd(svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincd_pat_n_s32))) int32_t svqincd_pat(int32_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincd_pat_n_s64))) int64_t svqincd_pat(int64_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincd_pat_n_u32))) uint32_t svqincd_pat(uint32_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincd_pat_n_u64))) uint64_t svqincd_pat(uint64_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincd_pat_s64))) svint64_t svqincd_pat(svint64_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincd_pat_u64))) svuint64_t svqincd_pat(svuint64_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqinch_n_s32))) int32_t svqinch(int32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqinch_n_s64))) int64_t svqinch(int64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqinch_n_u32))) uint32_t svqinch(uint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqinch_n_u64))) uint64_t svqinch(uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqinch_s16))) svint16_t svqinch(svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqinch_u16))) svuint16_t svqinch(svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqinch_pat_n_s32))) int32_t svqinch_pat(int32_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqinch_pat_n_s64))) int64_t svqinch_pat(int64_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqinch_pat_n_u32))) uint32_t svqinch_pat(uint32_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqinch_pat_n_u64))) uint64_t svqinch_pat(uint64_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqinch_pat_s16))) svint16_t svqinch_pat(svint16_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqinch_pat_u16))) svuint16_t svqinch_pat(svuint16_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_n_s32_b8))) int32_t svqincp_b8(int32_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_n_s32_b32))) int32_t svqincp_b32(int32_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_n_s32_b64))) int32_t svqincp_b64(int32_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_n_s32_b16))) int32_t svqincp_b16(int32_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_n_s64_b8))) int64_t svqincp_b8(int64_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_n_s64_b32))) int64_t svqincp_b32(int64_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_n_s64_b64))) int64_t svqincp_b64(int64_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_n_s64_b16))) int64_t svqincp_b16(int64_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_n_u32_b8))) uint32_t svqincp_b8(uint32_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_n_u32_b32))) uint32_t svqincp_b32(uint32_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_n_u32_b64))) uint32_t svqincp_b64(uint32_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_n_u32_b16))) uint32_t svqincp_b16(uint32_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_n_u64_b8))) uint64_t svqincp_b8(uint64_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_n_u64_b32))) uint64_t svqincp_b32(uint64_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_n_u64_b64))) uint64_t svqincp_b64(uint64_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_n_u64_b16))) uint64_t svqincp_b16(uint64_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_s32))) svint32_t svqincp(svint32_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_s64))) svint64_t svqincp(svint64_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_s16))) svint16_t svqincp(svint16_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_u32))) svuint32_t svqincp(svuint32_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_u64))) svuint64_t svqincp(svuint64_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_u16))) svuint16_t svqincp(svuint16_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincw_n_s32))) int32_t svqincw(int32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincw_n_s64))) int64_t svqincw(int64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincw_n_u32))) uint32_t svqincw(uint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincw_n_u64))) uint64_t svqincw(uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincw_s32))) svint32_t svqincw(svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincw_u32))) svuint32_t svqincw(svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincw_pat_n_s32))) int32_t svqincw_pat(int32_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincw_pat_n_s64))) int64_t svqincw_pat(int64_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincw_pat_n_u32))) uint32_t svqincw_pat(uint32_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincw_pat_n_u64))) uint64_t svqincw_pat(uint64_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincw_pat_s32))) svint32_t svqincw_pat(svint32_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincw_pat_u32))) svuint32_t svqincw_pat(svuint32_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_s8))) svint8_t svqsub(svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_s32))) svint32_t svqsub(svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_s64))) svint64_t svqsub(svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_s16))) svint16_t svqsub(svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_u8))) svuint8_t svqsub(svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_u32))) svuint32_t svqsub(svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_u64))) svuint64_t svqsub(svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_u16))) svuint16_t svqsub(svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_s8))) svint8_t svqsub(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_s32))) svint32_t svqsub(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_s64))) svint64_t svqsub(svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_s16))) svint16_t svqsub(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_u8))) svuint8_t svqsub(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_u32))) svuint32_t svqsub(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_u64))) svuint64_t svqsub(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_u16))) svuint16_t svqsub(svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_u8_m))) svuint8_t svrbit_m(svuint8_t, svbool_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_u32_m))) svuint32_t svrbit_m(svuint32_t, svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_u64_m))) svuint64_t svrbit_m(svuint64_t, svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_u16_m))) svuint16_t svrbit_m(svuint16_t, svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_s8_m))) svint8_t svrbit_m(svint8_t, svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_s32_m))) svint32_t svrbit_m(svint32_t, svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_s64_m))) svint64_t svrbit_m(svint64_t, svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_s16_m))) svint16_t svrbit_m(svint16_t, svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_u8_x))) svuint8_t svrbit_x(svbool_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_u32_x))) svuint32_t svrbit_x(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_u64_x))) svuint64_t svrbit_x(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_u16_x))) svuint16_t svrbit_x(svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_s8_x))) svint8_t svrbit_x(svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_s32_x))) svint32_t svrbit_x(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_s64_x))) svint64_t svrbit_x(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_s16_x))) svint16_t svrbit_x(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_u8_z))) svuint8_t svrbit_z(svbool_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_u32_z))) svuint32_t svrbit_z(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_u64_z))) svuint64_t svrbit_z(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_u16_z))) svuint16_t svrbit_z(svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_s8_z))) svint8_t svrbit_z(svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_s32_z))) svint32_t svrbit_z(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_s64_z))) svint64_t svrbit_z(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_s16_z))) svint16_t svrbit_z(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrecpe_f64))) svfloat64_t svrecpe(svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrecpe_f32))) svfloat32_t svrecpe(svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrecpe_f16))) svfloat16_t svrecpe(svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrecps_f64))) svfloat64_t svrecps(svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrecps_f32))) svfloat32_t svrecps(svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrecps_f16))) svfloat16_t svrecps(svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrecpx_f64_m))) svfloat64_t svrecpx_m(svfloat64_t, svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrecpx_f32_m))) svfloat32_t svrecpx_m(svfloat32_t, svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrecpx_f16_m))) svfloat16_t svrecpx_m(svfloat16_t, svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrecpx_f64_x))) svfloat64_t svrecpx_x(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrecpx_f32_x))) svfloat32_t svrecpx_x(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrecpx_f16_x))) svfloat16_t svrecpx_x(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrecpx_f64_z))) svfloat64_t svrecpx_z(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrecpx_f32_z))) svfloat32_t svrecpx_z(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrecpx_f16_z))) svfloat16_t svrecpx_z(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrev_u8))) svuint8_t svrev(svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrev_u32))) svuint32_t svrev(svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrev_u64))) svuint64_t svrev(svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrev_u16))) svuint16_t svrev(svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrev_s8))) svint8_t svrev(svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrev_f64))) svfloat64_t svrev(svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrev_f32))) svfloat32_t svrev(svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrev_f16))) svfloat16_t svrev(svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrev_s32))) svint32_t svrev(svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrev_s64))) svint64_t svrev(svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrev_s16))) svint16_t svrev(svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevb_u32_m))) svuint32_t svrevb_m(svuint32_t, svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevb_u64_m))) svuint64_t svrevb_m(svuint64_t, svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevb_u16_m))) svuint16_t svrevb_m(svuint16_t, svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevb_s32_m))) svint32_t svrevb_m(svint32_t, svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevb_s64_m))) svint64_t svrevb_m(svint64_t, svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevb_s16_m))) svint16_t svrevb_m(svint16_t, svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevb_u32_x))) svuint32_t svrevb_x(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevb_u64_x))) svuint64_t svrevb_x(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevb_u16_x))) svuint16_t svrevb_x(svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevb_s32_x))) svint32_t svrevb_x(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevb_s64_x))) svint64_t svrevb_x(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevb_s16_x))) svint16_t svrevb_x(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevb_u32_z))) svuint32_t svrevb_z(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevb_u64_z))) svuint64_t svrevb_z(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevb_u16_z))) svuint16_t svrevb_z(svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevb_s32_z))) svint32_t svrevb_z(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevb_s64_z))) svint64_t svrevb_z(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevb_s16_z))) svint16_t svrevb_z(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevh_u32_m))) svuint32_t svrevh_m(svuint32_t, svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevh_u64_m))) svuint64_t svrevh_m(svuint64_t, svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevh_s32_m))) svint32_t svrevh_m(svint32_t, svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevh_s64_m))) svint64_t svrevh_m(svint64_t, svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevh_u32_x))) svuint32_t svrevh_x(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevh_u64_x))) svuint64_t svrevh_x(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevh_s32_x))) svint32_t svrevh_x(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevh_s64_x))) svint64_t svrevh_x(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevh_u32_z))) svuint32_t svrevh_z(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevh_u64_z))) svuint64_t svrevh_z(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevh_s32_z))) svint32_t svrevh_z(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevh_s64_z))) svint64_t svrevh_z(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevw_u64_m))) svuint64_t svrevw_m(svuint64_t, svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevw_s64_m))) svint64_t svrevw_m(svint64_t, svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevw_u64_x))) svuint64_t svrevw_x(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevw_s64_x))) svint64_t svrevw_x(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevw_u64_z))) svuint64_t svrevw_z(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevw_s64_z))) svint64_t svrevw_z(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinta_f64_m))) svfloat64_t svrinta_m(svfloat64_t, svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinta_f32_m))) svfloat32_t svrinta_m(svfloat32_t, svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinta_f16_m))) svfloat16_t svrinta_m(svfloat16_t, svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinta_f64_x))) svfloat64_t svrinta_x(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinta_f32_x))) svfloat32_t svrinta_x(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinta_f16_x))) svfloat16_t svrinta_x(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinta_f64_z))) svfloat64_t svrinta_z(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinta_f32_z))) svfloat32_t svrinta_z(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinta_f16_z))) svfloat16_t svrinta_z(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinti_f64_m))) svfloat64_t svrinti_m(svfloat64_t, svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinti_f32_m))) svfloat32_t svrinti_m(svfloat32_t, svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinti_f16_m))) svfloat16_t svrinti_m(svfloat16_t, svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinti_f64_x))) svfloat64_t svrinti_x(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinti_f32_x))) svfloat32_t svrinti_x(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinti_f16_x))) svfloat16_t svrinti_x(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinti_f64_z))) svfloat64_t svrinti_z(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinti_f32_z))) svfloat32_t svrinti_z(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinti_f16_z))) svfloat16_t svrinti_z(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintm_f64_m))) svfloat64_t svrintm_m(svfloat64_t, svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintm_f32_m))) svfloat32_t svrintm_m(svfloat32_t, svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintm_f16_m))) svfloat16_t svrintm_m(svfloat16_t, svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintm_f64_x))) svfloat64_t svrintm_x(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintm_f32_x))) svfloat32_t svrintm_x(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintm_f16_x))) svfloat16_t svrintm_x(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintm_f64_z))) svfloat64_t svrintm_z(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintm_f32_z))) svfloat32_t svrintm_z(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintm_f16_z))) svfloat16_t svrintm_z(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintn_f64_m))) svfloat64_t svrintn_m(svfloat64_t, svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintn_f32_m))) svfloat32_t svrintn_m(svfloat32_t, svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintn_f16_m))) svfloat16_t svrintn_m(svfloat16_t, svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintn_f64_x))) svfloat64_t svrintn_x(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintn_f32_x))) svfloat32_t svrintn_x(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintn_f16_x))) svfloat16_t svrintn_x(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintn_f64_z))) svfloat64_t svrintn_z(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintn_f32_z))) svfloat32_t svrintn_z(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintn_f16_z))) svfloat16_t svrintn_z(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintp_f64_m))) svfloat64_t svrintp_m(svfloat64_t, svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintp_f32_m))) svfloat32_t svrintp_m(svfloat32_t, svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintp_f16_m))) svfloat16_t svrintp_m(svfloat16_t, svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintp_f64_x))) svfloat64_t svrintp_x(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintp_f32_x))) svfloat32_t svrintp_x(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintp_f16_x))) svfloat16_t svrintp_x(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintp_f64_z))) svfloat64_t svrintp_z(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintp_f32_z))) svfloat32_t svrintp_z(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintp_f16_z))) svfloat16_t svrintp_z(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintx_f64_m))) svfloat64_t svrintx_m(svfloat64_t, svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintx_f32_m))) svfloat32_t svrintx_m(svfloat32_t, svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintx_f16_m))) svfloat16_t svrintx_m(svfloat16_t, svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintx_f64_x))) svfloat64_t svrintx_x(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintx_f32_x))) svfloat32_t svrintx_x(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintx_f16_x))) svfloat16_t svrintx_x(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintx_f64_z))) svfloat64_t svrintx_z(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintx_f32_z))) svfloat32_t svrintx_z(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintx_f16_z))) svfloat16_t svrintx_z(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintz_f64_m))) svfloat64_t svrintz_m(svfloat64_t, svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintz_f32_m))) svfloat32_t svrintz_m(svfloat32_t, svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintz_f16_m))) svfloat16_t svrintz_m(svfloat16_t, svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintz_f64_x))) svfloat64_t svrintz_x(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintz_f32_x))) svfloat32_t svrintz_x(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintz_f16_x))) svfloat16_t svrintz_x(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintz_f64_z))) svfloat64_t svrintz_z(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintz_f32_z))) svfloat32_t svrintz_z(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintz_f16_z))) svfloat16_t svrintz_z(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsqrte_f64))) svfloat64_t svrsqrte(svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsqrte_f32))) svfloat32_t svrsqrte(svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsqrte_f16))) svfloat16_t svrsqrte(svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsqrts_f64))) svfloat64_t svrsqrts(svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsqrts_f32))) svfloat32_t svrsqrts(svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsqrts_f16))) svfloat16_t svrsqrts(svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_n_f64_m))) svfloat64_t svscale_m(svbool_t, svfloat64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_n_f32_m))) svfloat32_t svscale_m(svbool_t, svfloat32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_n_f16_m))) svfloat16_t svscale_m(svbool_t, svfloat16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_n_f64_x))) svfloat64_t svscale_x(svbool_t, svfloat64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_n_f32_x))) svfloat32_t svscale_x(svbool_t, svfloat32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_n_f16_x))) svfloat16_t svscale_x(svbool_t, svfloat16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_n_f64_z))) svfloat64_t svscale_z(svbool_t, svfloat64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_n_f32_z))) svfloat32_t svscale_z(svbool_t, svfloat32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_n_f16_z))) svfloat16_t svscale_z(svbool_t, svfloat16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_f64_m))) svfloat64_t svscale_m(svbool_t, svfloat64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_f32_m))) svfloat32_t svscale_m(svbool_t, svfloat32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_f16_m))) svfloat16_t svscale_m(svbool_t, svfloat16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_f64_x))) svfloat64_t svscale_x(svbool_t, svfloat64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_f32_x))) svfloat32_t svscale_x(svbool_t, svfloat32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_f16_x))) svfloat16_t svscale_x(svbool_t, svfloat16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_f64_z))) svfloat64_t svscale_z(svbool_t, svfloat64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_f32_z))) svfloat32_t svscale_z(svbool_t, svfloat32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_f16_z))) svfloat16_t svscale_z(svbool_t, svfloat16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_b))) svbool_t svsel(svbool_t, svbool_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_u8))) svuint8_t svsel(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_u32))) svuint32_t svsel(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_u64))) svuint64_t svsel(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_u16))) svuint16_t svsel(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s8))) svint8_t svsel(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_f64))) svfloat64_t svsel(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_f32))) svfloat32_t svsel(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_f16))) svfloat16_t svsel(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s32))) svint32_t svsel(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s64))) svint64_t svsel(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s16))) svint16_t svsel(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_u8))) svuint8x2_t svset2(svuint8x2_t, uint64_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_u32))) svuint32x2_t svset2(svuint32x2_t, uint64_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_u64))) svuint64x2_t svset2(svuint64x2_t, uint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_u16))) svuint16x2_t svset2(svuint16x2_t, uint64_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_s8))) svint8x2_t svset2(svint8x2_t, uint64_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_f64))) svfloat64x2_t svset2(svfloat64x2_t, uint64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_f32))) svfloat32x2_t svset2(svfloat32x2_t, uint64_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_f16))) svfloat16x2_t svset2(svfloat16x2_t, uint64_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_s32))) svint32x2_t svset2(svint32x2_t, uint64_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_s64))) svint64x2_t svset2(svint64x2_t, uint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_s16))) svint16x2_t svset2(svint16x2_t, uint64_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset3_u8))) svuint8x3_t svset3(svuint8x3_t, uint64_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset3_u32))) svuint32x3_t svset3(svuint32x3_t, uint64_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset3_u64))) svuint64x3_t svset3(svuint64x3_t, uint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset3_u16))) svuint16x3_t svset3(svuint16x3_t, uint64_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset3_s8))) svint8x3_t svset3(svint8x3_t, uint64_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset3_f64))) svfloat64x3_t svset3(svfloat64x3_t, uint64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset3_f32))) svfloat32x3_t svset3(svfloat32x3_t, uint64_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset3_f16))) svfloat16x3_t svset3(svfloat16x3_t, uint64_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset3_s32))) svint32x3_t svset3(svint32x3_t, uint64_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset3_s64))) svint64x3_t svset3(svint64x3_t, uint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset3_s16))) svint16x3_t svset3(svint16x3_t, uint64_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_u8))) svuint8x4_t svset4(svuint8x4_t, uint64_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_u32))) svuint32x4_t svset4(svuint32x4_t, uint64_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_u64))) svuint64x4_t svset4(svuint64x4_t, uint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_u16))) svuint16x4_t svset4(svuint16x4_t, uint64_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_s8))) svint8x4_t svset4(svint8x4_t, uint64_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_f64))) svfloat64x4_t svset4(svfloat64x4_t, uint64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_f32))) svfloat32x4_t svset4(svfloat32x4_t, uint64_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_f16))) svfloat16x4_t svset4(svfloat16x4_t, uint64_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_s32))) svint32x4_t svset4(svint32x4_t, uint64_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_s64))) svint64x4_t svset4(svint64x4_t, uint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_s16))) svint16x4_t svset4(svint16x4_t, uint64_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsplice_u8))) svuint8_t svsplice(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsplice_u32))) svuint32_t svsplice(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsplice_u64))) svuint64_t svsplice(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsplice_u16))) svuint16_t svsplice(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsplice_s8))) svint8_t svsplice(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsplice_f64))) svfloat64_t svsplice(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsplice_f32))) svfloat32_t svsplice(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsplice_f16))) svfloat16_t svsplice(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsplice_s32))) svint32_t svsplice(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsplice_s64))) svint64_t svsplice(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsplice_s16))) svint16_t svsplice(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqrt_f64_m))) svfloat64_t svsqrt_m(svfloat64_t, svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqrt_f32_m))) svfloat32_t svsqrt_m(svfloat32_t, svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqrt_f16_m))) svfloat16_t svsqrt_m(svfloat16_t, svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqrt_f64_x))) svfloat64_t svsqrt_x(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqrt_f32_x))) svfloat32_t svsqrt_x(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqrt_f16_x))) svfloat16_t svsqrt_x(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqrt_f64_z))) svfloat64_t svsqrt_z(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqrt_f32_z))) svfloat32_t svsqrt_z(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqrt_f16_z))) svfloat16_t svsqrt_z(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u8))) void svst1(svbool_t, uint8_t *, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u32))) void svst1(svbool_t, uint32_t *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u64))) void svst1(svbool_t, uint64_t *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u16))) void svst1(svbool_t, uint16_t *, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s8))) void svst1(svbool_t, int8_t *, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f64))) void svst1(svbool_t, float64_t *, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f32))) void svst1(svbool_t, float32_t *, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f16))) void svst1(svbool_t, float16_t *, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s32))) void svst1(svbool_t, int32_t *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s64))) void svst1(svbool_t, int64_t *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s16))) void svst1(svbool_t, int16_t *, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u32base_index_u32))) void svst1_scatter_index(svbool_t, svuint32_t, int64_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u64base_index_u64))) void svst1_scatter_index(svbool_t, svuint64_t, int64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u64base_index_f64))) void svst1_scatter_index(svbool_t, svuint64_t, int64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u32base_index_f32))) void svst1_scatter_index(svbool_t, svuint32_t, int64_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u32base_index_s32))) void svst1_scatter_index(svbool_t, svuint32_t, int64_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u64base_index_s64))) void svst1_scatter_index(svbool_t, svuint64_t, int64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u32base_offset_u32))) void svst1_scatter_offset(svbool_t, svuint32_t, int64_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u64base_offset_u64))) void svst1_scatter_offset(svbool_t, svuint64_t, int64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u64base_offset_f64))) void svst1_scatter_offset(svbool_t, svuint64_t, int64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u32base_offset_f32))) void svst1_scatter_offset(svbool_t, svuint32_t, int64_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u32base_offset_s32))) void svst1_scatter_offset(svbool_t, svuint32_t, int64_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u64base_offset_s64))) void svst1_scatter_offset(svbool_t, svuint64_t, int64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u32base_u32))) void svst1_scatter(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u64base_u64))) void svst1_scatter(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u64base_f64))) void svst1_scatter(svbool_t, svuint64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u32base_f32))) void svst1_scatter(svbool_t, svuint32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u32base_s32))) void svst1_scatter(svbool_t, svuint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u64base_s64))) void svst1_scatter(svbool_t, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_s32index_u32))) void svst1_scatter_index(svbool_t, uint32_t *, svint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_s32index_f32))) void svst1_scatter_index(svbool_t, float32_t *, svint32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_s32index_s32))) void svst1_scatter_index(svbool_t, int32_t *, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u32index_u32))) void svst1_scatter_index(svbool_t, uint32_t *, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u32index_f32))) void svst1_scatter_index(svbool_t, float32_t *, svuint32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u32index_s32))) void svst1_scatter_index(svbool_t, int32_t *, svuint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_s64index_u64))) void svst1_scatter_index(svbool_t, uint64_t *, svint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_s64index_f64))) void svst1_scatter_index(svbool_t, float64_t *, svint64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_s64index_s64))) void svst1_scatter_index(svbool_t, int64_t *, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u64index_u64))) void svst1_scatter_index(svbool_t, uint64_t *, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u64index_f64))) void svst1_scatter_index(svbool_t, float64_t *, svuint64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u64index_s64))) void svst1_scatter_index(svbool_t, int64_t *, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_s32offset_u32))) void svst1_scatter_offset(svbool_t, uint32_t *, svint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_s32offset_f32))) void svst1_scatter_offset(svbool_t, float32_t *, svint32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_s32offset_s32))) void svst1_scatter_offset(svbool_t, int32_t *, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u32offset_u32))) void svst1_scatter_offset(svbool_t, uint32_t *, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u32offset_f32))) void svst1_scatter_offset(svbool_t, float32_t *, svuint32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u32offset_s32))) void svst1_scatter_offset(svbool_t, int32_t *, svuint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_s64offset_u64))) void svst1_scatter_offset(svbool_t, uint64_t *, svint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_s64offset_f64))) void svst1_scatter_offset(svbool_t, float64_t *, svint64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_s64offset_s64))) void svst1_scatter_offset(svbool_t, int64_t *, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u64offset_u64))) void svst1_scatter_offset(svbool_t, uint64_t *, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u64offset_f64))) void svst1_scatter_offset(svbool_t, float64_t *, svuint64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u64offset_s64))) void svst1_scatter_offset(svbool_t, int64_t *, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u8))) void svst1_vnum(svbool_t, uint8_t *, int64_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u32))) void svst1_vnum(svbool_t, uint32_t *, int64_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u64))) void svst1_vnum(svbool_t, uint64_t *, int64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u16))) void svst1_vnum(svbool_t, uint16_t *, int64_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s8))) void svst1_vnum(svbool_t, int8_t *, int64_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f64))) void svst1_vnum(svbool_t, float64_t *, int64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f32))) void svst1_vnum(svbool_t, float32_t *, int64_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f16))) void svst1_vnum(svbool_t, float16_t *, int64_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s32))) void svst1_vnum(svbool_t, int32_t *, int64_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s64))) void svst1_vnum(svbool_t, int64_t *, int64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s16))) void svst1_vnum(svbool_t, int16_t *, int64_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_s32))) void svst1b(svbool_t, int8_t *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_s64))) void svst1b(svbool_t, int8_t *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_s16))) void svst1b(svbool_t, int8_t *, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_u32))) void svst1b(svbool_t, uint8_t *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_u64))) void svst1b(svbool_t, uint8_t *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_u16))) void svst1b(svbool_t, uint8_t *, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_scatter_u32base_offset_u32))) void svst1b_scatter_offset(svbool_t, svuint32_t, int64_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_scatter_u64base_offset_u64))) void svst1b_scatter_offset(svbool_t, svuint64_t, int64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_scatter_u32base_offset_s32))) void svst1b_scatter_offset(svbool_t, svuint32_t, int64_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_scatter_u64base_offset_s64))) void svst1b_scatter_offset(svbool_t, svuint64_t, int64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_scatter_u32base_u32))) void svst1b_scatter(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_scatter_u64base_u64))) void svst1b_scatter(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_scatter_u32base_s32))) void svst1b_scatter(svbool_t, svuint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_scatter_u64base_s64))) void svst1b_scatter(svbool_t, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_scatter_s32offset_s32))) void svst1b_scatter_offset(svbool_t, int8_t *, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_scatter_s32offset_u32))) void svst1b_scatter_offset(svbool_t, uint8_t *, svint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_scatter_u32offset_s32))) void svst1b_scatter_offset(svbool_t, int8_t *, svuint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_scatter_u32offset_u32))) void svst1b_scatter_offset(svbool_t, uint8_t *, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_scatter_s64offset_s64))) void svst1b_scatter_offset(svbool_t, int8_t *, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_scatter_s64offset_u64))) void svst1b_scatter_offset(svbool_t, uint8_t *, svint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_scatter_u64offset_s64))) void svst1b_scatter_offset(svbool_t, int8_t *, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_scatter_u64offset_u64))) void svst1b_scatter_offset(svbool_t, uint8_t *, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_vnum_s32))) void svst1b_vnum(svbool_t, int8_t *, int64_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_vnum_s64))) void svst1b_vnum(svbool_t, int8_t *, int64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_vnum_s16))) void svst1b_vnum(svbool_t, int8_t *, int64_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_vnum_u32))) void svst1b_vnum(svbool_t, uint8_t *, int64_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_vnum_u64))) void svst1b_vnum(svbool_t, uint8_t *, int64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_vnum_u16))) void svst1b_vnum(svbool_t, uint8_t *, int64_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_s32))) void svst1h(svbool_t, int16_t *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_s64))) void svst1h(svbool_t, int16_t *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_u32))) void svst1h(svbool_t, uint16_t *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_u64))) void svst1h(svbool_t, uint16_t *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u32base_index_u32))) void svst1h_scatter_index(svbool_t, svuint32_t, int64_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u64base_index_u64))) void svst1h_scatter_index(svbool_t, svuint64_t, int64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u32base_index_s32))) void svst1h_scatter_index(svbool_t, svuint32_t, int64_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u64base_index_s64))) void svst1h_scatter_index(svbool_t, svuint64_t, int64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u32base_offset_u32))) void svst1h_scatter_offset(svbool_t, svuint32_t, int64_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u64base_offset_u64))) void svst1h_scatter_offset(svbool_t, svuint64_t, int64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u32base_offset_s32))) void svst1h_scatter_offset(svbool_t, svuint32_t, int64_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u64base_offset_s64))) void svst1h_scatter_offset(svbool_t, svuint64_t, int64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u32base_u32))) void svst1h_scatter(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u64base_u64))) void svst1h_scatter(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u32base_s32))) void svst1h_scatter(svbool_t, svuint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u64base_s64))) void svst1h_scatter(svbool_t, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_s32index_s32))) void svst1h_scatter_index(svbool_t, int16_t *, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_s32index_u32))) void svst1h_scatter_index(svbool_t, uint16_t *, svint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u32index_s32))) void svst1h_scatter_index(svbool_t, int16_t *, svuint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u32index_u32))) void svst1h_scatter_index(svbool_t, uint16_t *, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_s64index_s64))) void svst1h_scatter_index(svbool_t, int16_t *, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_s64index_u64))) void svst1h_scatter_index(svbool_t, uint16_t *, svint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u64index_s64))) void svst1h_scatter_index(svbool_t, int16_t *, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u64index_u64))) void svst1h_scatter_index(svbool_t, uint16_t *, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_s32offset_s32))) void svst1h_scatter_offset(svbool_t, int16_t *, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_s32offset_u32))) void svst1h_scatter_offset(svbool_t, uint16_t *, svint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u32offset_s32))) void svst1h_scatter_offset(svbool_t, int16_t *, svuint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u32offset_u32))) void svst1h_scatter_offset(svbool_t, uint16_t *, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_s64offset_s64))) void svst1h_scatter_offset(svbool_t, int16_t *, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_s64offset_u64))) void svst1h_scatter_offset(svbool_t, uint16_t *, svint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u64offset_s64))) void svst1h_scatter_offset(svbool_t, int16_t *, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u64offset_u64))) void svst1h_scatter_offset(svbool_t, uint16_t *, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_vnum_s32))) void svst1h_vnum(svbool_t, int16_t *, int64_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_vnum_s64))) void svst1h_vnum(svbool_t, int16_t *, int64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_vnum_u32))) void svst1h_vnum(svbool_t, uint16_t *, int64_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_vnum_u64))) void svst1h_vnum(svbool_t, uint16_t *, int64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1w_s64))) void svst1w(svbool_t, int32_t *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1w_u64))) void svst1w(svbool_t, uint32_t *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1w_scatter_u64base_index_u64))) void svst1w_scatter_index(svbool_t, svuint64_t, int64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1w_scatter_u64base_index_s64))) void svst1w_scatter_index(svbool_t, svuint64_t, int64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1w_scatter_u64base_offset_u64))) void svst1w_scatter_offset(svbool_t, svuint64_t, int64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1w_scatter_u64base_offset_s64))) void svst1w_scatter_offset(svbool_t, svuint64_t, int64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1w_scatter_u64base_u64))) void svst1w_scatter(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1w_scatter_u64base_s64))) void svst1w_scatter(svbool_t, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1w_scatter_s64index_s64))) void svst1w_scatter_index(svbool_t, int32_t *, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1w_scatter_s64index_u64))) void svst1w_scatter_index(svbool_t, uint32_t *, svint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1w_scatter_u64index_s64))) void svst1w_scatter_index(svbool_t, int32_t *, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1w_scatter_u64index_u64))) void svst1w_scatter_index(svbool_t, uint32_t *, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1w_scatter_s64offset_s64))) void svst1w_scatter_offset(svbool_t, int32_t *, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1w_scatter_s64offset_u64))) void svst1w_scatter_offset(svbool_t, uint32_t *, svint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1w_scatter_u64offset_s64))) void svst1w_scatter_offset(svbool_t, int32_t *, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1w_scatter_u64offset_u64))) void svst1w_scatter_offset(svbool_t, uint32_t *, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1w_vnum_s64))) void svst1w_vnum(svbool_t, int32_t *, int64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1w_vnum_u64))) void svst1w_vnum(svbool_t, uint32_t *, int64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_u8))) void svst2(svbool_t, uint8_t *, svuint8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_u32))) void svst2(svbool_t, uint32_t *, svuint32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_u64))) void svst2(svbool_t, uint64_t *, svuint64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_u16))) void svst2(svbool_t, uint16_t *, svuint16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_s8))) void svst2(svbool_t, int8_t *, svint8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_f64))) void svst2(svbool_t, float64_t *, svfloat64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_f32))) void svst2(svbool_t, float32_t *, svfloat32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_f16))) void svst2(svbool_t, float16_t *, svfloat16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_s32))) void svst2(svbool_t, int32_t *, svint32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_s64))) void svst2(svbool_t, int64_t *, svint64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_s16))) void svst2(svbool_t, int16_t *, svint16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_vnum_u8))) void svst2_vnum(svbool_t, uint8_t *, int64_t, svuint8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_vnum_u32))) void svst2_vnum(svbool_t, uint32_t *, int64_t, svuint32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_vnum_u64))) void svst2_vnum(svbool_t, uint64_t *, int64_t, svuint64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_vnum_u16))) void svst2_vnum(svbool_t, uint16_t *, int64_t, svuint16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_vnum_s8))) void svst2_vnum(svbool_t, int8_t *, int64_t, svint8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_vnum_f64))) void svst2_vnum(svbool_t, float64_t *, int64_t, svfloat64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_vnum_f32))) void svst2_vnum(svbool_t, float32_t *, int64_t, svfloat32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_vnum_f16))) void svst2_vnum(svbool_t, float16_t *, int64_t, svfloat16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_vnum_s32))) void svst2_vnum(svbool_t, int32_t *, int64_t, svint32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_vnum_s64))) void svst2_vnum(svbool_t, int64_t *, int64_t, svint64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_vnum_s16))) void svst2_vnum(svbool_t, int16_t *, int64_t, svint16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_u8))) void svst3(svbool_t, uint8_t *, svuint8x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_u32))) void svst3(svbool_t, uint32_t *, svuint32x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_u64))) void svst3(svbool_t, uint64_t *, svuint64x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_u16))) void svst3(svbool_t, uint16_t *, svuint16x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_s8))) void svst3(svbool_t, int8_t *, svint8x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_f64))) void svst3(svbool_t, float64_t *, svfloat64x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_f32))) void svst3(svbool_t, float32_t *, svfloat32x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_f16))) void svst3(svbool_t, float16_t *, svfloat16x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_s32))) void svst3(svbool_t, int32_t *, svint32x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_s64))) void svst3(svbool_t, int64_t *, svint64x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_s16))) void svst3(svbool_t, int16_t *, svint16x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_vnum_u8))) void svst3_vnum(svbool_t, uint8_t *, int64_t, svuint8x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_vnum_u32))) void svst3_vnum(svbool_t, uint32_t *, int64_t, svuint32x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_vnum_u64))) void svst3_vnum(svbool_t, uint64_t *, int64_t, svuint64x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_vnum_u16))) void svst3_vnum(svbool_t, uint16_t *, int64_t, svuint16x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_vnum_s8))) void svst3_vnum(svbool_t, int8_t *, int64_t, svint8x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_vnum_f64))) void svst3_vnum(svbool_t, float64_t *, int64_t, svfloat64x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_vnum_f32))) void svst3_vnum(svbool_t, float32_t *, int64_t, svfloat32x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_vnum_f16))) void svst3_vnum(svbool_t, float16_t *, int64_t, svfloat16x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_vnum_s32))) void svst3_vnum(svbool_t, int32_t *, int64_t, svint32x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_vnum_s64))) void svst3_vnum(svbool_t, int64_t *, int64_t, svint64x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_vnum_s16))) void svst3_vnum(svbool_t, int16_t *, int64_t, svint16x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_u8))) void svst4(svbool_t, uint8_t *, svuint8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_u32))) void svst4(svbool_t, uint32_t *, svuint32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_u64))) void svst4(svbool_t, uint64_t *, svuint64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_u16))) void svst4(svbool_t, uint16_t *, svuint16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_s8))) void svst4(svbool_t, int8_t *, svint8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_f64))) void svst4(svbool_t, float64_t *, svfloat64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_f32))) void svst4(svbool_t, float32_t *, svfloat32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_f16))) void svst4(svbool_t, float16_t *, svfloat16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_s32))) void svst4(svbool_t, int32_t *, svint32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_s64))) void svst4(svbool_t, int64_t *, svint64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_s16))) void svst4(svbool_t, int16_t *, svint16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_vnum_u8))) void svst4_vnum(svbool_t, uint8_t *, int64_t, svuint8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_vnum_u32))) void svst4_vnum(svbool_t, uint32_t *, int64_t, svuint32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_vnum_u64))) void svst4_vnum(svbool_t, uint64_t *, int64_t, svuint64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_vnum_u16))) void svst4_vnum(svbool_t, uint16_t *, int64_t, svuint16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_vnum_s8))) void svst4_vnum(svbool_t, int8_t *, int64_t, svint8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_vnum_f64))) void svst4_vnum(svbool_t, float64_t *, int64_t, svfloat64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_vnum_f32))) void svst4_vnum(svbool_t, float32_t *, int64_t, svfloat32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_vnum_f16))) void svst4_vnum(svbool_t, float16_t *, int64_t, svfloat16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_vnum_s32))) void svst4_vnum(svbool_t, int32_t *, int64_t, svint32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_vnum_s64))) void svst4_vnum(svbool_t, int64_t *, int64_t, svint64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_vnum_s16))) void svst4_vnum(svbool_t, int16_t *, int64_t, svint16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u8))) void svstnt1(svbool_t, uint8_t *, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u32))) void svstnt1(svbool_t, uint32_t *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u64))) void svstnt1(svbool_t, uint64_t *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u16))) void svstnt1(svbool_t, uint16_t *, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s8))) void svstnt1(svbool_t, int8_t *, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f64))) void svstnt1(svbool_t, float64_t *, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f32))) void svstnt1(svbool_t, float32_t *, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f16))) void svstnt1(svbool_t, float16_t *, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s32))) void svstnt1(svbool_t, int32_t *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s64))) void svstnt1(svbool_t, int64_t *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s16))) void svstnt1(svbool_t, int16_t *, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u8))) void svstnt1_vnum(svbool_t, uint8_t *, int64_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u32))) void svstnt1_vnum(svbool_t, uint32_t *, int64_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u64))) void svstnt1_vnum(svbool_t, uint64_t *, int64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u16))) void svstnt1_vnum(svbool_t, uint16_t *, int64_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s8))) void svstnt1_vnum(svbool_t, int8_t *, int64_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f64))) void svstnt1_vnum(svbool_t, float64_t *, int64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f32))) void svstnt1_vnum(svbool_t, float32_t *, int64_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f16))) void svstnt1_vnum(svbool_t, float16_t *, int64_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s32))) void svstnt1_vnum(svbool_t, int32_t *, int64_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s64))) void svstnt1_vnum(svbool_t, int64_t *, int64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s16))) void svstnt1_vnum(svbool_t, int16_t *, int64_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_f64_m))) svfloat64_t svsub_m(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_f32_m))) svfloat32_t svsub_m(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_f16_m))) svfloat16_t svsub_m(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_f64_x))) svfloat64_t svsub_x(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_f32_x))) svfloat32_t svsub_x(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_f16_x))) svfloat16_t svsub_x(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_f64_z))) svfloat64_t svsub_z(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_f32_z))) svfloat32_t svsub_z(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_f16_z))) svfloat16_t svsub_z(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_u8_m))) svuint8_t svsub_m(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_u32_m))) svuint32_t svsub_m(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_u64_m))) svuint64_t svsub_m(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_u16_m))) svuint16_t svsub_m(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_s8_m))) svint8_t svsub_m(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_s32_m))) svint32_t svsub_m(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_s64_m))) svint64_t svsub_m(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_s16_m))) svint16_t svsub_m(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_u8_x))) svuint8_t svsub_x(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_u32_x))) svuint32_t svsub_x(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_u64_x))) svuint64_t svsub_x(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_u16_x))) svuint16_t svsub_x(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_s8_x))) svint8_t svsub_x(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_s32_x))) svint32_t svsub_x(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_s64_x))) svint64_t svsub_x(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_s16_x))) svint16_t svsub_x(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_u8_z))) svuint8_t svsub_z(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_u32_z))) svuint32_t svsub_z(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_u64_z))) svuint64_t svsub_z(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_u16_z))) svuint16_t svsub_z(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_s8_z))) svint8_t svsub_z(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_s32_z))) svint32_t svsub_z(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_s64_z))) svint64_t svsub_z(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_s16_z))) svint16_t svsub_z(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_f64_m))) svfloat64_t svsub_m(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_f32_m))) svfloat32_t svsub_m(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_f16_m))) svfloat16_t svsub_m(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_f64_x))) svfloat64_t svsub_x(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_f32_x))) svfloat32_t svsub_x(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_f16_x))) svfloat16_t svsub_x(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_f64_z))) svfloat64_t svsub_z(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_f32_z))) svfloat32_t svsub_z(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_f16_z))) svfloat16_t svsub_z(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_u8_m))) svuint8_t svsub_m(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_u32_m))) svuint32_t svsub_m(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_u64_m))) svuint64_t svsub_m(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_u16_m))) svuint16_t svsub_m(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_s8_m))) svint8_t svsub_m(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_s32_m))) svint32_t svsub_m(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_s64_m))) svint64_t svsub_m(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_s16_m))) svint16_t svsub_m(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_u8_x))) svuint8_t svsub_x(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_u32_x))) svuint32_t svsub_x(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_u64_x))) svuint64_t svsub_x(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_u16_x))) svuint16_t svsub_x(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_s8_x))) svint8_t svsub_x(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_s32_x))) svint32_t svsub_x(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_s64_x))) svint64_t svsub_x(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_s16_x))) svint16_t svsub_x(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_u8_z))) svuint8_t svsub_z(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_u32_z))) svuint32_t svsub_z(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_u64_z))) svuint64_t svsub_z(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_u16_z))) svuint16_t svsub_z(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_s8_z))) svint8_t svsub_z(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_s32_z))) svint32_t svsub_z(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_s64_z))) svint64_t svsub_z(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_s16_z))) svint16_t svsub_z(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_f64_m))) svfloat64_t svsubr_m(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_f32_m))) svfloat32_t svsubr_m(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_f16_m))) svfloat16_t svsubr_m(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_f64_x))) svfloat64_t svsubr_x(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_f32_x))) svfloat32_t svsubr_x(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_f16_x))) svfloat16_t svsubr_x(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_f64_z))) svfloat64_t svsubr_z(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_f32_z))) svfloat32_t svsubr_z(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_f16_z))) svfloat16_t svsubr_z(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_u8_m))) svuint8_t svsubr_m(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_u32_m))) svuint32_t svsubr_m(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_u64_m))) svuint64_t svsubr_m(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_u16_m))) svuint16_t svsubr_m(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_s8_m))) svint8_t svsubr_m(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_s32_m))) svint32_t svsubr_m(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_s64_m))) svint64_t svsubr_m(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_s16_m))) svint16_t svsubr_m(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_u8_x))) svuint8_t svsubr_x(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_u32_x))) svuint32_t svsubr_x(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_u64_x))) svuint64_t svsubr_x(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_u16_x))) svuint16_t svsubr_x(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_s8_x))) svint8_t svsubr_x(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_s32_x))) svint32_t svsubr_x(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_s64_x))) svint64_t svsubr_x(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_s16_x))) svint16_t svsubr_x(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_u8_z))) svuint8_t svsubr_z(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_u32_z))) svuint32_t svsubr_z(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_u64_z))) svuint64_t svsubr_z(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_u16_z))) svuint16_t svsubr_z(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_s8_z))) svint8_t svsubr_z(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_s32_z))) svint32_t svsubr_z(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_s64_z))) svint64_t svsubr_z(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_s16_z))) svint16_t svsubr_z(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_f64_m))) svfloat64_t svsubr_m(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_f32_m))) svfloat32_t svsubr_m(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_f16_m))) svfloat16_t svsubr_m(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_f64_x))) svfloat64_t svsubr_x(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_f32_x))) svfloat32_t svsubr_x(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_f16_x))) svfloat16_t svsubr_x(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_f64_z))) svfloat64_t svsubr_z(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_f32_z))) svfloat32_t svsubr_z(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_f16_z))) svfloat16_t svsubr_z(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_u8_m))) svuint8_t svsubr_m(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_u32_m))) svuint32_t svsubr_m(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_u64_m))) svuint64_t svsubr_m(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_u16_m))) svuint16_t svsubr_m(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_s8_m))) svint8_t svsubr_m(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_s32_m))) svint32_t svsubr_m(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_s64_m))) svint64_t svsubr_m(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_s16_m))) svint16_t svsubr_m(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_u8_x))) svuint8_t svsubr_x(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_u32_x))) svuint32_t svsubr_x(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_u64_x))) svuint64_t svsubr_x(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_u16_x))) svuint16_t svsubr_x(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_s8_x))) svint8_t svsubr_x(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_s32_x))) svint32_t svsubr_x(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_s64_x))) svint64_t svsubr_x(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_s16_x))) svint16_t svsubr_x(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_u8_z))) svuint8_t svsubr_z(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_u32_z))) svuint32_t svsubr_z(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_u64_z))) svuint64_t svsubr_z(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_u16_z))) svuint16_t svsubr_z(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_s8_z))) svint8_t svsubr_z(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_s32_z))) svint32_t svsubr_z(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_s64_z))) svint64_t svsubr_z(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_s16_z))) svint16_t svsubr_z(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl_u8))) svuint8_t svtbl(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl_u32))) svuint32_t svtbl(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl_u64))) svuint64_t svtbl(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl_u16))) svuint16_t svtbl(svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl_s8))) svint8_t svtbl(svint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl_f64))) svfloat64_t svtbl(svfloat64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl_f32))) svfloat32_t svtbl(svfloat32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl_f16))) svfloat16_t svtbl(svfloat16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl_s32))) svint32_t svtbl(svint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl_s64))) svint64_t svtbl(svint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl_s16))) svint16_t svtbl(svint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtmad_f64))) svfloat64_t svtmad(svfloat64_t, svfloat64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtmad_f32))) svfloat32_t svtmad(svfloat32_t, svfloat32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtmad_f16))) svfloat16_t svtmad(svfloat16_t, svfloat16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1_u8))) svuint8_t svtrn1(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1_u32))) svuint32_t svtrn1(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1_u64))) svuint64_t svtrn1(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1_u16))) svuint16_t svtrn1(svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1_s8))) svint8_t svtrn1(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1_f64))) svfloat64_t svtrn1(svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1_f32))) svfloat32_t svtrn1(svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1_f16))) svfloat16_t svtrn1(svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1_s32))) svint32_t svtrn1(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1_s64))) svint64_t svtrn1(svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1_s16))) svint16_t svtrn1(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2_u8))) svuint8_t svtrn2(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2_u32))) svuint32_t svtrn2(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2_u64))) svuint64_t svtrn2(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2_u16))) svuint16_t svtrn2(svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2_s8))) svint8_t svtrn2(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2_f64))) svfloat64_t svtrn2(svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2_f32))) svfloat32_t svtrn2(svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2_f16))) svfloat16_t svtrn2(svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2_s32))) svint32_t svtrn2(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2_s64))) svint64_t svtrn2(svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2_s16))) svint16_t svtrn2(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtsmul_f64))) svfloat64_t svtsmul(svfloat64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtsmul_f32))) svfloat32_t svtsmul(svfloat32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtsmul_f16))) svfloat16_t svtsmul(svfloat16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtssel_f64))) svfloat64_t svtssel(svfloat64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtssel_f32))) svfloat32_t svtssel(svfloat32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtssel_f16))) svfloat16_t svtssel(svfloat16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpkhi_b))) svbool_t svunpkhi(svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpkhi_s32))) svint32_t svunpkhi(svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpkhi_s64))) svint64_t svunpkhi(svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpkhi_s16))) svint16_t svunpkhi(svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpkhi_u32))) svuint32_t svunpkhi(svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpkhi_u64))) svuint64_t svunpkhi(svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpkhi_u16))) svuint16_t svunpkhi(svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpklo_b))) svbool_t svunpklo(svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpklo_s32))) svint32_t svunpklo(svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpklo_s64))) svint64_t svunpklo(svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpklo_s16))) svint16_t svunpklo(svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpklo_u32))) svuint32_t svunpklo(svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpklo_u64))) svuint64_t svunpklo(svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpklo_u16))) svuint16_t svunpklo(svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1_u8))) svuint8_t svuzp1(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1_u32))) svuint32_t svuzp1(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1_u64))) svuint64_t svuzp1(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1_u16))) svuint16_t svuzp1(svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1_s8))) svint8_t svuzp1(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1_f64))) svfloat64_t svuzp1(svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1_f32))) svfloat32_t svuzp1(svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1_f16))) svfloat16_t svuzp1(svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1_s32))) svint32_t svuzp1(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1_s64))) svint64_t svuzp1(svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1_s16))) svint16_t svuzp1(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2_u8))) svuint8_t svuzp2(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2_u32))) svuint32_t svuzp2(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2_u64))) svuint64_t svuzp2(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2_u16))) svuint16_t svuzp2(svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2_s8))) svint8_t svuzp2(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2_f64))) svfloat64_t svuzp2(svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2_f32))) svfloat32_t svuzp2(svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2_f16))) svfloat16_t svuzp2(svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2_s32))) svint32_t svuzp2(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2_s64))) svint64_t svuzp2(svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2_s16))) svint16_t svuzp2(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b8_s32))) svbool_t svwhilele_b8(int32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b32_s32))) svbool_t svwhilele_b32(int32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b64_s32))) svbool_t svwhilele_b64(int32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b16_s32))) svbool_t svwhilele_b16(int32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b8_s64))) svbool_t svwhilele_b8(int64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b32_s64))) svbool_t svwhilele_b32(int64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b64_s64))) svbool_t svwhilele_b64(int64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b16_s64))) svbool_t svwhilele_b16(int64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b8_u32))) svbool_t svwhilele_b8(uint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b32_u32))) svbool_t svwhilele_b32(uint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b64_u32))) svbool_t svwhilele_b64(uint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b16_u32))) svbool_t svwhilele_b16(uint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b8_u64))) svbool_t svwhilele_b8(uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b32_u64))) svbool_t svwhilele_b32(uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b64_u64))) svbool_t svwhilele_b64(uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b16_u64))) svbool_t svwhilele_b16(uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b8_u32))) svbool_t svwhilelt_b8(uint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b32_u32))) svbool_t svwhilelt_b32(uint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b64_u32))) svbool_t svwhilelt_b64(uint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b16_u32))) svbool_t svwhilelt_b16(uint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b8_u64))) svbool_t svwhilelt_b8(uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b32_u64))) svbool_t svwhilelt_b32(uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b64_u64))) svbool_t svwhilelt_b64(uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b16_u64))) svbool_t svwhilelt_b16(uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b8_s32))) svbool_t svwhilelt_b8(int32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b32_s32))) svbool_t svwhilelt_b32(int32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b64_s32))) svbool_t svwhilelt_b64(int32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b16_s32))) svbool_t svwhilelt_b16(int32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b8_s64))) svbool_t svwhilelt_b8(int64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b32_s64))) svbool_t svwhilelt_b32(int64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b64_s64))) svbool_t svwhilelt_b64(int64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b16_s64))) svbool_t svwhilelt_b16(int64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1_u8))) svuint8_t svzip1(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1_u32))) svuint32_t svzip1(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1_u64))) svuint64_t svzip1(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1_u16))) svuint16_t svzip1(svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1_s8))) svint8_t svzip1(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1_f64))) svfloat64_t svzip1(svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1_f32))) svfloat32_t svzip1(svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1_f16))) svfloat16_t svzip1(svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1_s32))) svint32_t svzip1(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1_s64))) svint64_t svzip1(svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1_s16))) svint16_t svzip1(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2_u8))) svuint8_t svzip2(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2_u32))) svuint32_t svzip2(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2_u64))) svuint64_t svzip2(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2_u16))) svuint16_t svzip2(svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2_s8))) svint8_t svzip2(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2_f64))) svfloat64_t svzip2(svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2_f32))) svfloat32_t svzip2(svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2_f16))) svfloat16_t svzip2(svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2_s32))) svint32_t svzip2(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2_s64))) svint64_t svzip2(svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2_s16))) svint16_t svzip2(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfdot_n_f32))) svfloat32_t svbfdot_n_f32(svfloat32_t, svbfloat16_t, bfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfdot_f32))) svfloat32_t svbfdot_f32(svfloat32_t, svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfdot_lane_f32))) svfloat32_t svbfdot_lane_f32(svfloat32_t, svbfloat16_t, svbfloat16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalb_n_f32))) svfloat32_t svbfmlalb_n_f32(svfloat32_t, svbfloat16_t, bfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalb_f32))) svfloat32_t svbfmlalb_f32(svfloat32_t, svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalb_lane_f32))) svfloat32_t svbfmlalb_lane_f32(svfloat32_t, svbfloat16_t, svbfloat16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalt_n_f32))) svfloat32_t svbfmlalt_n_f32(svfloat32_t, svbfloat16_t, bfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalt_f32))) svfloat32_t svbfmlalt_f32(svfloat32_t, svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalt_lane_f32))) svfloat32_t svbfmlalt_lane_f32(svfloat32_t, svbfloat16_t, svbfloat16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmmla_f32))) svfloat32_t svbfmmla_f32(svfloat32_t, svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_n_bf16))) bfloat16_t svclasta_n_bf16(svbool_t, bfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_bf16))) svbfloat16_t svclasta_bf16(svbool_t, svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_n_bf16))) bfloat16_t svclastb_n_bf16(svbool_t, bfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_bf16))) svbfloat16_t svclastb_bf16(svbool_t, svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_bf16_m))) svuint16_t svcnt_bf16_m(svuint16_t, svbool_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_bf16_x))) svuint16_t svcnt_bf16_x(svbool_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_bf16_z))) svuint16_t svcnt_bf16_z(svbool_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_bf16))) svbfloat16x2_t svcreate2_bf16(svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate3_bf16))) svbfloat16x3_t svcreate3_bf16(svbfloat16_t, svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_bf16))) svbfloat16x4_t svcreate4_bf16(svbfloat16_t, svbfloat16_t, svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_bf16_f32_m))) svbfloat16_t svcvt_bf16_f32_m(svbfloat16_t, svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_bf16_f32_x))) svbfloat16_t svcvt_bf16_f32_x(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_bf16_f32_z))) svbfloat16_t svcvt_bf16_f32_z(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtnt_bf16_f32_m))) svbfloat16_t svcvtnt_bf16_f32_m(svbfloat16_t, svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_bf16))) svbfloat16_t svdup_n_bf16(bfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_bf16_m))) svbfloat16_t svdup_n_bf16_m(svbfloat16_t, svbool_t, bfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_bf16_x))) svbfloat16_t svdup_n_bf16_x(svbool_t, bfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_bf16_z))) svbfloat16_t svdup_n_bf16_z(svbool_t, bfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_lane_bf16))) svbfloat16_t svdup_lane_bf16(svbfloat16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_bf16))) svbfloat16_t svdupq_n_bf16(bfloat16_t, bfloat16_t, bfloat16_t, bfloat16_t, bfloat16_t, bfloat16_t, bfloat16_t, bfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_lane_bf16))) svbfloat16_t svdupq_lane_bf16(svbfloat16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svext_bf16))) svbfloat16_t svext_bf16(svbfloat16_t, svbfloat16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_bf16))) svbfloat16_t svget2_bf16(svbfloat16x2_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget3_bf16))) svbfloat16_t svget3_bf16(svbfloat16x3_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_bf16))) svbfloat16_t svget4_bf16(svbfloat16x4_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svinsr_n_bf16))) svbfloat16_t svinsr_n_bf16(svbfloat16_t, bfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlasta_bf16))) bfloat16_t svlasta_bf16(svbool_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlastb_bf16))) bfloat16_t svlastb_bf16(svbool_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_bf16))) svbfloat16_t svld1_bf16(svbool_t, bfloat16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_bf16))) svbfloat16_t svld1_vnum_bf16(svbool_t, bfloat16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1rq_bf16))) svbfloat16_t svld1rq_bf16(svbool_t, bfloat16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_bf16))) svbfloat16x2_t svld2_bf16(svbool_t, bfloat16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_vnum_bf16))) svbfloat16x2_t svld2_vnum_bf16(svbool_t, bfloat16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_bf16))) svbfloat16x3_t svld3_bf16(svbool_t, bfloat16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_vnum_bf16))) svbfloat16x3_t svld3_vnum_bf16(svbool_t, bfloat16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_bf16))) svbfloat16x4_t svld4_bf16(svbool_t, bfloat16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_vnum_bf16))) svbfloat16x4_t svld4_vnum_bf16(svbool_t, bfloat16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_bf16))) svbfloat16_t svldff1_bf16(svbool_t, bfloat16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_vnum_bf16))) svbfloat16_t svldff1_vnum_bf16(svbool_t, bfloat16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_bf16))) svbfloat16_t svldnf1_bf16(svbool_t, bfloat16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_vnum_bf16))) svbfloat16_t svldnf1_vnum_bf16(svbool_t, bfloat16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_bf16))) svbfloat16_t svldnt1_bf16(svbool_t, bfloat16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_bf16))) svbfloat16_t svldnt1_vnum_bf16(svbool_t, bfloat16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlen_bf16))) uint64_t svlen_bf16(svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrev_bf16))) svbfloat16_t svrev_bf16(svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_bf16))) svbfloat16_t svsel_bf16(svbool_t, svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_bf16))) svbfloat16x2_t svset2_bf16(svbfloat16x2_t, uint64_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset3_bf16))) svbfloat16x3_t svset3_bf16(svbfloat16x3_t, uint64_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_bf16))) svbfloat16x4_t svset4_bf16(svbfloat16x4_t, uint64_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsplice_bf16))) svbfloat16_t svsplice_bf16(svbool_t, svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_bf16))) void svst1_bf16(svbool_t, bfloat16_t *, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_bf16))) void svst1_vnum_bf16(svbool_t, bfloat16_t *, int64_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_bf16))) void svst2_bf16(svbool_t, bfloat16_t *, svbfloat16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_vnum_bf16))) void svst2_vnum_bf16(svbool_t, bfloat16_t *, int64_t, svbfloat16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_bf16))) void svst3_bf16(svbool_t, bfloat16_t *, svbfloat16x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_vnum_bf16))) void svst3_vnum_bf16(svbool_t, bfloat16_t *, int64_t, svbfloat16x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_bf16))) void svst4_bf16(svbool_t, bfloat16_t *, svbfloat16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_vnum_bf16))) void svst4_vnum_bf16(svbool_t, bfloat16_t *, int64_t, svbfloat16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_bf16))) void svstnt1_bf16(svbool_t, bfloat16_t *, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_bf16))) void svstnt1_vnum_bf16(svbool_t, bfloat16_t *, int64_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl_bf16))) svbfloat16_t svtbl_bf16(svbfloat16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1_bf16))) svbfloat16_t svtrn1_bf16(svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2_bf16))) svbfloat16_t svtrn2_bf16(svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef2_bf16))) svbfloat16x2_t svundef2_bf16(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef3_bf16))) svbfloat16x3_t svundef3_bf16(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef4_bf16))) svbfloat16x4_t svundef4_bf16(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef_bf16))) svbfloat16_t svundef_bf16(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1_bf16))) svbfloat16_t svuzp1_bf16(svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2_bf16))) svbfloat16_t svuzp2_bf16(svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1_bf16))) svbfloat16_t svzip1_bf16(svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2_bf16))) svbfloat16_t svzip2_bf16(svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfdot_n_f32))) svfloat32_t svbfdot(svfloat32_t, svbfloat16_t, bfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfdot_f32))) svfloat32_t svbfdot(svfloat32_t, svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfdot_lane_f32))) svfloat32_t svbfdot_lane(svfloat32_t, svbfloat16_t, svbfloat16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalb_n_f32))) svfloat32_t svbfmlalb(svfloat32_t, svbfloat16_t, bfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalb_f32))) svfloat32_t svbfmlalb(svfloat32_t, svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalb_lane_f32))) svfloat32_t svbfmlalb_lane(svfloat32_t, svbfloat16_t, svbfloat16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalt_n_f32))) svfloat32_t svbfmlalt(svfloat32_t, svbfloat16_t, bfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalt_f32))) svfloat32_t svbfmlalt(svfloat32_t, svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalt_lane_f32))) svfloat32_t svbfmlalt_lane(svfloat32_t, svbfloat16_t, svbfloat16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmmla_f32))) svfloat32_t svbfmmla(svfloat32_t, svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_n_bf16))) bfloat16_t svclasta(svbool_t, bfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_bf16))) svbfloat16_t svclasta(svbool_t, svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_n_bf16))) bfloat16_t svclastb(svbool_t, bfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_bf16))) svbfloat16_t svclastb(svbool_t, svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_bf16_m))) svuint16_t svcnt_m(svuint16_t, svbool_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_bf16_x))) svuint16_t svcnt_x(svbool_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_bf16_z))) svuint16_t svcnt_z(svbool_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_bf16))) svbfloat16x2_t svcreate2(svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate3_bf16))) svbfloat16x3_t svcreate3(svbfloat16_t, svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_bf16))) svbfloat16x4_t svcreate4(svbfloat16_t, svbfloat16_t, svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_bf16_f32_m))) svbfloat16_t svcvt_bf16_m(svbfloat16_t, svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_bf16_f32_x))) svbfloat16_t svcvt_bf16_x(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_bf16_f32_z))) svbfloat16_t svcvt_bf16_z(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtnt_bf16_f32_m))) svbfloat16_t svcvtnt_bf16_m(svbfloat16_t, svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_bf16))) svbfloat16_t svdup_bf16(bfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_bf16_m))) svbfloat16_t svdup_bf16_m(svbfloat16_t, svbool_t, bfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_bf16_x))) svbfloat16_t svdup_bf16_x(svbool_t, bfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_bf16_z))) svbfloat16_t svdup_bf16_z(svbool_t, bfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_lane_bf16))) svbfloat16_t svdup_lane(svbfloat16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_bf16))) svbfloat16_t svdupq_bf16(bfloat16_t, bfloat16_t, bfloat16_t, bfloat16_t, bfloat16_t, bfloat16_t, bfloat16_t, bfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_lane_bf16))) svbfloat16_t svdupq_lane(svbfloat16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svext_bf16))) svbfloat16_t svext(svbfloat16_t, svbfloat16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_bf16))) svbfloat16_t svget2(svbfloat16x2_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget3_bf16))) svbfloat16_t svget3(svbfloat16x3_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_bf16))) svbfloat16_t svget4(svbfloat16x4_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svinsr_n_bf16))) svbfloat16_t svinsr(svbfloat16_t, bfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlasta_bf16))) bfloat16_t svlasta(svbool_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlastb_bf16))) bfloat16_t svlastb(svbool_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_bf16))) svbfloat16_t svld1(svbool_t, bfloat16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_bf16))) svbfloat16_t svld1_vnum(svbool_t, bfloat16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1rq_bf16))) svbfloat16_t svld1rq(svbool_t, bfloat16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_bf16))) svbfloat16x2_t svld2(svbool_t, bfloat16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_vnum_bf16))) svbfloat16x2_t svld2_vnum(svbool_t, bfloat16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_bf16))) svbfloat16x3_t svld3(svbool_t, bfloat16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_vnum_bf16))) svbfloat16x3_t svld3_vnum(svbool_t, bfloat16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_bf16))) svbfloat16x4_t svld4(svbool_t, bfloat16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_vnum_bf16))) svbfloat16x4_t svld4_vnum(svbool_t, bfloat16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_bf16))) svbfloat16_t svldff1(svbool_t, bfloat16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_vnum_bf16))) svbfloat16_t svldff1_vnum(svbool_t, bfloat16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_bf16))) svbfloat16_t svldnf1(svbool_t, bfloat16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_vnum_bf16))) svbfloat16_t svldnf1_vnum(svbool_t, bfloat16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_bf16))) svbfloat16_t svldnt1(svbool_t, bfloat16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_bf16))) svbfloat16_t svldnt1_vnum(svbool_t, bfloat16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlen_bf16))) uint64_t svlen(svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrev_bf16))) svbfloat16_t svrev(svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_bf16))) svbfloat16_t svsel(svbool_t, svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_bf16))) svbfloat16x2_t svset2(svbfloat16x2_t, uint64_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset3_bf16))) svbfloat16x3_t svset3(svbfloat16x3_t, uint64_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_bf16))) svbfloat16x4_t svset4(svbfloat16x4_t, uint64_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsplice_bf16))) svbfloat16_t svsplice(svbool_t, svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_bf16))) void svst1(svbool_t, bfloat16_t *, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_bf16))) void svst1_vnum(svbool_t, bfloat16_t *, int64_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_bf16))) void svst2(svbool_t, bfloat16_t *, svbfloat16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_vnum_bf16))) void svst2_vnum(svbool_t, bfloat16_t *, int64_t, svbfloat16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_bf16))) void svst3(svbool_t, bfloat16_t *, svbfloat16x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_vnum_bf16))) void svst3_vnum(svbool_t, bfloat16_t *, int64_t, svbfloat16x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_bf16))) void svst4(svbool_t, bfloat16_t *, svbfloat16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_vnum_bf16))) void svst4_vnum(svbool_t, bfloat16_t *, int64_t, svbfloat16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_bf16))) void svstnt1(svbool_t, bfloat16_t *, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_bf16))) void svstnt1_vnum(svbool_t, bfloat16_t *, int64_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl_bf16))) svbfloat16_t svtbl(svbfloat16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1_bf16))) svbfloat16_t svtrn1(svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2_bf16))) svbfloat16_t svtrn2(svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1_bf16))) svbfloat16_t svuzp1(svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2_bf16))) svbfloat16_t svuzp2(svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1_bf16))) svbfloat16_t svzip1(svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2_bf16))) svbfloat16_t svzip2(svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_bf16))) svbfloat16_t svtrn1q_bf16(svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_bf16))) svbfloat16_t svtrn2q_bf16(svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_bf16))) svbfloat16_t svuzp1q_bf16(svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_bf16))) svbfloat16_t svuzp2q_bf16(svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_bf16))) svbfloat16_t svzip1q_bf16(svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_bf16))) svbfloat16_t svzip2q_bf16(svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_bf16))) svbfloat16_t svtrn1q(svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_bf16))) svbfloat16_t svtrn2q(svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_bf16))) svbfloat16_t svuzp1q(svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_bf16))) svbfloat16_t svuzp2q(svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_bf16))) svbfloat16_t svzip1q(svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_bf16))) svbfloat16_t svzip2q(svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmmla_f32))) svfloat32_t svmmla_f32(svfloat32_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmmla_f32))) svfloat32_t svmmla(svfloat32_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_u8))) svuint8_t svld1ro_u8(svbool_t, uint8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_u32))) svuint32_t svld1ro_u32(svbool_t, uint32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_u64))) svuint64_t svld1ro_u64(svbool_t, uint64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_u16))) svuint16_t svld1ro_u16(svbool_t, uint16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_s8))) svint8_t svld1ro_s8(svbool_t, int8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_f64))) svfloat64_t svld1ro_f64(svbool_t, float64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_f32))) svfloat32_t svld1ro_f32(svbool_t, float32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_f16))) svfloat16_t svld1ro_f16(svbool_t, float16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_s32))) svint32_t svld1ro_s32(svbool_t, int32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_s64))) svint64_t svld1ro_s64(svbool_t, int64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_s16))) svint16_t svld1ro_s16(svbool_t, int16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmmla_f64))) svfloat64_t svmmla_f64(svfloat64_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_u8))) svuint8_t svtrn1q_u8(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_u32))) svuint32_t svtrn1q_u32(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_u64))) svuint64_t svtrn1q_u64(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_u16))) svuint16_t svtrn1q_u16(svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_s8))) svint8_t svtrn1q_s8(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_f64))) svfloat64_t svtrn1q_f64(svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_f32))) svfloat32_t svtrn1q_f32(svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_f16))) svfloat16_t svtrn1q_f16(svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_s32))) svint32_t svtrn1q_s32(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_s64))) svint64_t svtrn1q_s64(svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_s16))) svint16_t svtrn1q_s16(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_u8))) svuint8_t svtrn2q_u8(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_u32))) svuint32_t svtrn2q_u32(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_u64))) svuint64_t svtrn2q_u64(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_u16))) svuint16_t svtrn2q_u16(svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_s8))) svint8_t svtrn2q_s8(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_f64))) svfloat64_t svtrn2q_f64(svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_f32))) svfloat32_t svtrn2q_f32(svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_f16))) svfloat16_t svtrn2q_f16(svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_s32))) svint32_t svtrn2q_s32(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_s64))) svint64_t svtrn2q_s64(svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_s16))) svint16_t svtrn2q_s16(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_u8))) svuint8_t svuzp1q_u8(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_u32))) svuint32_t svuzp1q_u32(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_u64))) svuint64_t svuzp1q_u64(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_u16))) svuint16_t svuzp1q_u16(svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_s8))) svint8_t svuzp1q_s8(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_f64))) svfloat64_t svuzp1q_f64(svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_f32))) svfloat32_t svuzp1q_f32(svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_f16))) svfloat16_t svuzp1q_f16(svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_s32))) svint32_t svuzp1q_s32(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_s64))) svint64_t svuzp1q_s64(svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_s16))) svint16_t svuzp1q_s16(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_u8))) svuint8_t svuzp2q_u8(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_u32))) svuint32_t svuzp2q_u32(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_u64))) svuint64_t svuzp2q_u64(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_u16))) svuint16_t svuzp2q_u16(svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_s8))) svint8_t svuzp2q_s8(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_f64))) svfloat64_t svuzp2q_f64(svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_f32))) svfloat32_t svuzp2q_f32(svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_f16))) svfloat16_t svuzp2q_f16(svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_s32))) svint32_t svuzp2q_s32(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_s64))) svint64_t svuzp2q_s64(svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_s16))) svint16_t svuzp2q_s16(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_u8))) svuint8_t svzip1q_u8(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_u32))) svuint32_t svzip1q_u32(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_u64))) svuint64_t svzip1q_u64(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_u16))) svuint16_t svzip1q_u16(svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_s8))) svint8_t svzip1q_s8(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_f64))) svfloat64_t svzip1q_f64(svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_f32))) svfloat32_t svzip1q_f32(svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_f16))) svfloat16_t svzip1q_f16(svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_s32))) svint32_t svzip1q_s32(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_s64))) svint64_t svzip1q_s64(svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_s16))) svint16_t svzip1q_s16(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_u8))) svuint8_t svzip2q_u8(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_u32))) svuint32_t svzip2q_u32(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_u64))) svuint64_t svzip2q_u64(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_u16))) svuint16_t svzip2q_u16(svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_s8))) svint8_t svzip2q_s8(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_f64))) svfloat64_t svzip2q_f64(svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_f32))) svfloat32_t svzip2q_f32(svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_f16))) svfloat16_t svzip2q_f16(svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_s32))) svint32_t svzip2q_s32(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_s64))) svint64_t svzip2q_s64(svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_s16))) svint16_t svzip2q_s16(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_u8))) svuint8_t svld1ro(svbool_t, uint8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_u32))) svuint32_t svld1ro(svbool_t, uint32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_u64))) svuint64_t svld1ro(svbool_t, uint64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_u16))) svuint16_t svld1ro(svbool_t, uint16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_s8))) svint8_t svld1ro(svbool_t, int8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_f64))) svfloat64_t svld1ro(svbool_t, float64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_f32))) svfloat32_t svld1ro(svbool_t, float32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_f16))) svfloat16_t svld1ro(svbool_t, float16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_s32))) svint32_t svld1ro(svbool_t, int32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_s64))) svint64_t svld1ro(svbool_t, int64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_s16))) svint16_t svld1ro(svbool_t, int16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmmla_f64))) svfloat64_t svmmla(svfloat64_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_u8))) svuint8_t svtrn1q(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_u32))) svuint32_t svtrn1q(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_u64))) svuint64_t svtrn1q(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_u16))) svuint16_t svtrn1q(svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_s8))) svint8_t svtrn1q(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_f64))) svfloat64_t svtrn1q(svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_f32))) svfloat32_t svtrn1q(svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_f16))) svfloat16_t svtrn1q(svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_s32))) svint32_t svtrn1q(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_s64))) svint64_t svtrn1q(svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_s16))) svint16_t svtrn1q(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_u8))) svuint8_t svtrn2q(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_u32))) svuint32_t svtrn2q(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_u64))) svuint64_t svtrn2q(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_u16))) svuint16_t svtrn2q(svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_s8))) svint8_t svtrn2q(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_f64))) svfloat64_t svtrn2q(svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_f32))) svfloat32_t svtrn2q(svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_f16))) svfloat16_t svtrn2q(svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_s32))) svint32_t svtrn2q(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_s64))) svint64_t svtrn2q(svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_s16))) svint16_t svtrn2q(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_u8))) svuint8_t svuzp1q(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_u32))) svuint32_t svuzp1q(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_u64))) svuint64_t svuzp1q(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_u16))) svuint16_t svuzp1q(svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_s8))) svint8_t svuzp1q(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_f64))) svfloat64_t svuzp1q(svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_f32))) svfloat32_t svuzp1q(svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_f16))) svfloat16_t svuzp1q(svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_s32))) svint32_t svuzp1q(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_s64))) svint64_t svuzp1q(svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_s16))) svint16_t svuzp1q(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_u8))) svuint8_t svuzp2q(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_u32))) svuint32_t svuzp2q(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_u64))) svuint64_t svuzp2q(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_u16))) svuint16_t svuzp2q(svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_s8))) svint8_t svuzp2q(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_f64))) svfloat64_t svuzp2q(svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_f32))) svfloat32_t svuzp2q(svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_f16))) svfloat16_t svuzp2q(svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_s32))) svint32_t svuzp2q(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_s64))) svint64_t svuzp2q(svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_s16))) svint16_t svuzp2q(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_u8))) svuint8_t svzip1q(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_u32))) svuint32_t svzip1q(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_u64))) svuint64_t svzip1q(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_u16))) svuint16_t svzip1q(svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_s8))) svint8_t svzip1q(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_f64))) svfloat64_t svzip1q(svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_f32))) svfloat32_t svzip1q(svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_f16))) svfloat16_t svzip1q(svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_s32))) svint32_t svzip1q(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_s64))) svint64_t svzip1q(svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_s16))) svint16_t svzip1q(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_u8))) svuint8_t svzip2q(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_u32))) svuint32_t svzip2q(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_u64))) svuint64_t svzip2q(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_u16))) svuint16_t svzip2q(svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_s8))) svint8_t svzip2q(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_f64))) svfloat64_t svzip2q(svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_f32))) svfloat32_t svzip2q(svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_f16))) svfloat16_t svzip2q(svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_s32))) svint32_t svzip2q(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_s64))) svint64_t svzip2q(svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_s16))) svint16_t svzip2q(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_bf16))) svbfloat16_t svld1ro_bf16(svbool_t, bfloat16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_bf16))) svbfloat16_t svld1ro(svbool_t, bfloat16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmmla_s32))) svint32_t svmmla_s32(svint32_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmmla_u32))) svuint32_t svmmla_u32(svuint32_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsudot_n_s32))) svint32_t svsudot_n_s32(svint32_t, svint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsudot_s32))) svint32_t svsudot_s32(svint32_t, svint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsudot_lane_s32))) svint32_t svsudot_lane_s32(svint32_t, svint8_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svusdot_n_s32))) svint32_t svusdot_n_s32(svint32_t, svuint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svusdot_s32))) svint32_t svusdot_s32(svint32_t, svuint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svusdot_lane_s32))) svint32_t svusdot_lane_s32(svint32_t, svuint8_t, svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svusmmla_s32))) svint32_t svusmmla_s32(svint32_t, svuint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmmla_s32))) svint32_t svmmla(svint32_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmmla_u32))) svuint32_t svmmla(svuint32_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsudot_n_s32))) svint32_t svsudot(svint32_t, svint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsudot_s32))) svint32_t svsudot(svint32_t, svint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsudot_lane_s32))) svint32_t svsudot_lane(svint32_t, svint8_t, svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svusdot_n_s32))) svint32_t svusdot(svint32_t, svuint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svusdot_s32))) svint32_t svusdot(svint32_t, svuint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svusdot_lane_s32))) svint32_t svusdot_lane(svint32_t, svuint8_t, svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svusmmla_s32))) svint32_t svusmmla(svint32_t, svuint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaba_n_s8))) svint8_t svaba_n_s8(svint8_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaba_n_s32))) svint32_t svaba_n_s32(svint32_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaba_n_s64))) svint64_t svaba_n_s64(svint64_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaba_n_s16))) svint16_t svaba_n_s16(svint16_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaba_n_u8))) svuint8_t svaba_n_u8(svuint8_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaba_n_u32))) svuint32_t svaba_n_u32(svuint32_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaba_n_u64))) svuint64_t svaba_n_u64(svuint64_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaba_n_u16))) svuint16_t svaba_n_u16(svuint16_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaba_s8))) svint8_t svaba_s8(svint8_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaba_s32))) svint32_t svaba_s32(svint32_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaba_s64))) svint64_t svaba_s64(svint64_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaba_s16))) svint16_t svaba_s16(svint16_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaba_u8))) svuint8_t svaba_u8(svuint8_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaba_u32))) svuint32_t svaba_u32(svuint32_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaba_u64))) svuint64_t svaba_u64(svuint64_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaba_u16))) svuint16_t svaba_u16(svuint16_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalb_n_s32))) svint32_t svabalb_n_s32(svint32_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalb_n_s64))) svint64_t svabalb_n_s64(svint64_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalb_n_s16))) svint16_t svabalb_n_s16(svint16_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalb_n_u32))) svuint32_t svabalb_n_u32(svuint32_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalb_n_u64))) svuint64_t svabalb_n_u64(svuint64_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalb_n_u16))) svuint16_t svabalb_n_u16(svuint16_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalb_s32))) svint32_t svabalb_s32(svint32_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalb_s64))) svint64_t svabalb_s64(svint64_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalb_s16))) svint16_t svabalb_s16(svint16_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalb_u32))) svuint32_t svabalb_u32(svuint32_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalb_u64))) svuint64_t svabalb_u64(svuint64_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalb_u16))) svuint16_t svabalb_u16(svuint16_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalt_n_s32))) svint32_t svabalt_n_s32(svint32_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalt_n_s64))) svint64_t svabalt_n_s64(svint64_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalt_n_s16))) svint16_t svabalt_n_s16(svint16_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalt_n_u32))) svuint32_t svabalt_n_u32(svuint32_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalt_n_u64))) svuint64_t svabalt_n_u64(svuint64_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalt_n_u16))) svuint16_t svabalt_n_u16(svuint16_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalt_s32))) svint32_t svabalt_s32(svint32_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalt_s64))) svint64_t svabalt_s64(svint64_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalt_s16))) svint16_t svabalt_s16(svint16_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalt_u32))) svuint32_t svabalt_u32(svuint32_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalt_u64))) svuint64_t svabalt_u64(svuint64_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalt_u16))) svuint16_t svabalt_u16(svuint16_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlb_n_s32))) svint32_t svabdlb_n_s32(svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlb_n_s64))) svint64_t svabdlb_n_s64(svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlb_n_s16))) svint16_t svabdlb_n_s16(svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlb_n_u32))) svuint32_t svabdlb_n_u32(svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlb_n_u64))) svuint64_t svabdlb_n_u64(svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlb_n_u16))) svuint16_t svabdlb_n_u16(svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlb_s32))) svint32_t svabdlb_s32(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlb_s64))) svint64_t svabdlb_s64(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlb_s16))) svint16_t svabdlb_s16(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlb_u32))) svuint32_t svabdlb_u32(svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlb_u64))) svuint64_t svabdlb_u64(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlb_u16))) svuint16_t svabdlb_u16(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlt_n_s32))) svint32_t svabdlt_n_s32(svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlt_n_s64))) svint64_t svabdlt_n_s64(svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlt_n_s16))) svint16_t svabdlt_n_s16(svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlt_n_u32))) svuint32_t svabdlt_n_u32(svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlt_n_u64))) svuint64_t svabdlt_n_u64(svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlt_n_u16))) svuint16_t svabdlt_n_u16(svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlt_s32))) svint32_t svabdlt_s32(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlt_s64))) svint64_t svabdlt_s64(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlt_s16))) svint16_t svabdlt_s16(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlt_u32))) svuint32_t svabdlt_u32(svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlt_u64))) svuint64_t svabdlt_u64(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlt_u16))) svuint16_t svabdlt_u16(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadalp_s32_m))) svint32_t svadalp_s32_m(svbool_t, svint32_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadalp_s64_m))) svint64_t svadalp_s64_m(svbool_t, svint64_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadalp_s16_m))) svint16_t svadalp_s16_m(svbool_t, svint16_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadalp_s32_x))) svint32_t svadalp_s32_x(svbool_t, svint32_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadalp_s64_x))) svint64_t svadalp_s64_x(svbool_t, svint64_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadalp_s16_x))) svint16_t svadalp_s16_x(svbool_t, svint16_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadalp_s32_z))) svint32_t svadalp_s32_z(svbool_t, svint32_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadalp_s64_z))) svint64_t svadalp_s64_z(svbool_t, svint64_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadalp_s16_z))) svint16_t svadalp_s16_z(svbool_t, svint16_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadalp_u32_m))) svuint32_t svadalp_u32_m(svbool_t, svuint32_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadalp_u64_m))) svuint64_t svadalp_u64_m(svbool_t, svuint64_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadalp_u16_m))) svuint16_t svadalp_u16_m(svbool_t, svuint16_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadalp_u32_x))) svuint32_t svadalp_u32_x(svbool_t, svuint32_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadalp_u64_x))) svuint64_t svadalp_u64_x(svbool_t, svuint64_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadalp_u16_x))) svuint16_t svadalp_u16_x(svbool_t, svuint16_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadalp_u32_z))) svuint32_t svadalp_u32_z(svbool_t, svuint32_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadalp_u64_z))) svuint64_t svadalp_u64_z(svbool_t, svuint64_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadalp_u16_z))) svuint16_t svadalp_u16_z(svbool_t, svuint16_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadclb_n_u32))) svuint32_t svadclb_n_u32(svuint32_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadclb_n_u64))) svuint64_t svadclb_n_u64(svuint64_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadclb_u32))) svuint32_t svadclb_u32(svuint32_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadclb_u64))) svuint64_t svadclb_u64(svuint64_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadclt_n_u32))) svuint32_t svadclt_n_u32(svuint32_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadclt_n_u64))) svuint64_t svadclt_n_u64(svuint64_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadclt_u32))) svuint32_t svadclt_u32(svuint32_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadclt_u64))) svuint64_t svadclt_u64(svuint64_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnb_n_u32))) svuint16_t svaddhnb_n_u32(svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnb_n_u64))) svuint32_t svaddhnb_n_u64(svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnb_n_u16))) svuint8_t svaddhnb_n_u16(svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnb_n_s32))) svint16_t svaddhnb_n_s32(svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnb_n_s64))) svint32_t svaddhnb_n_s64(svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnb_n_s16))) svint8_t svaddhnb_n_s16(svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnb_u32))) svuint16_t svaddhnb_u32(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnb_u64))) svuint32_t svaddhnb_u64(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnb_u16))) svuint8_t svaddhnb_u16(svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnb_s32))) svint16_t svaddhnb_s32(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnb_s64))) svint32_t svaddhnb_s64(svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnb_s16))) svint8_t svaddhnb_s16(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnt_n_u32))) svuint16_t svaddhnt_n_u32(svuint16_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnt_n_u64))) svuint32_t svaddhnt_n_u64(svuint32_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnt_n_u16))) svuint8_t svaddhnt_n_u16(svuint8_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnt_n_s32))) svint16_t svaddhnt_n_s32(svint16_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnt_n_s64))) svint32_t svaddhnt_n_s64(svint32_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnt_n_s16))) svint8_t svaddhnt_n_s16(svint8_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnt_u32))) svuint16_t svaddhnt_u32(svuint16_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnt_u64))) svuint32_t svaddhnt_u64(svuint32_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnt_u16))) svuint8_t svaddhnt_u16(svuint8_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnt_s32))) svint16_t svaddhnt_s32(svint16_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnt_s64))) svint32_t svaddhnt_s64(svint32_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnt_s16))) svint8_t svaddhnt_s16(svint8_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlb_n_s32))) svint32_t svaddlb_n_s32(svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlb_n_s64))) svint64_t svaddlb_n_s64(svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlb_n_s16))) svint16_t svaddlb_n_s16(svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlb_n_u32))) svuint32_t svaddlb_n_u32(svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlb_n_u64))) svuint64_t svaddlb_n_u64(svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlb_n_u16))) svuint16_t svaddlb_n_u16(svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlb_s32))) svint32_t svaddlb_s32(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlb_s64))) svint64_t svaddlb_s64(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlb_s16))) svint16_t svaddlb_s16(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlb_u32))) svuint32_t svaddlb_u32(svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlb_u64))) svuint64_t svaddlb_u64(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlb_u16))) svuint16_t svaddlb_u16(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlbt_n_s32))) svint32_t svaddlbt_n_s32(svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlbt_n_s64))) svint64_t svaddlbt_n_s64(svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlbt_n_s16))) svint16_t svaddlbt_n_s16(svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlbt_s32))) svint32_t svaddlbt_s32(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlbt_s64))) svint64_t svaddlbt_s64(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlbt_s16))) svint16_t svaddlbt_s16(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlt_n_s32))) svint32_t svaddlt_n_s32(svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlt_n_s64))) svint64_t svaddlt_n_s64(svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlt_n_s16))) svint16_t svaddlt_n_s16(svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlt_n_u32))) svuint32_t svaddlt_n_u32(svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlt_n_u64))) svuint64_t svaddlt_n_u64(svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlt_n_u16))) svuint16_t svaddlt_n_u16(svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlt_s32))) svint32_t svaddlt_s32(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlt_s64))) svint64_t svaddlt_s64(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlt_s16))) svint16_t svaddlt_s16(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlt_u32))) svuint32_t svaddlt_u32(svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlt_u64))) svuint64_t svaddlt_u64(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlt_u16))) svuint16_t svaddlt_u16(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_f64_m))) svfloat64_t svaddp_f64_m(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_f32_m))) svfloat32_t svaddp_f32_m(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_f16_m))) svfloat16_t svaddp_f16_m(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_f64_x))) svfloat64_t svaddp_f64_x(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_f32_x))) svfloat32_t svaddp_f32_x(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_f16_x))) svfloat16_t svaddp_f16_x(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_u8_m))) svuint8_t svaddp_u8_m(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_u32_m))) svuint32_t svaddp_u32_m(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_u64_m))) svuint64_t svaddp_u64_m(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_u16_m))) svuint16_t svaddp_u16_m(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_s8_m))) svint8_t svaddp_s8_m(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_s32_m))) svint32_t svaddp_s32_m(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_s64_m))) svint64_t svaddp_s64_m(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_s16_m))) svint16_t svaddp_s16_m(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_u8_x))) svuint8_t svaddp_u8_x(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_u32_x))) svuint32_t svaddp_u32_x(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_u64_x))) svuint64_t svaddp_u64_x(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_u16_x))) svuint16_t svaddp_u16_x(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_s8_x))) svint8_t svaddp_s8_x(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_s32_x))) svint32_t svaddp_s32_x(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_s64_x))) svint64_t svaddp_s64_x(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_s16_x))) svint16_t svaddp_s16_x(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwb_n_s32))) svint32_t svaddwb_n_s32(svint32_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwb_n_s64))) svint64_t svaddwb_n_s64(svint64_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwb_n_s16))) svint16_t svaddwb_n_s16(svint16_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwb_n_u32))) svuint32_t svaddwb_n_u32(svuint32_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwb_n_u64))) svuint64_t svaddwb_n_u64(svuint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwb_n_u16))) svuint16_t svaddwb_n_u16(svuint16_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwb_s32))) svint32_t svaddwb_s32(svint32_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwb_s64))) svint64_t svaddwb_s64(svint64_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwb_s16))) svint16_t svaddwb_s16(svint16_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwb_u32))) svuint32_t svaddwb_u32(svuint32_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwb_u64))) svuint64_t svaddwb_u64(svuint64_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwb_u16))) svuint16_t svaddwb_u16(svuint16_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwt_n_s32))) svint32_t svaddwt_n_s32(svint32_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwt_n_s64))) svint64_t svaddwt_n_s64(svint64_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwt_n_s16))) svint16_t svaddwt_n_s16(svint16_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwt_n_u32))) svuint32_t svaddwt_n_u32(svuint32_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwt_n_u64))) svuint64_t svaddwt_n_u64(svuint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwt_n_u16))) svuint16_t svaddwt_n_u16(svuint16_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwt_s32))) svint32_t svaddwt_s32(svint32_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwt_s64))) svint64_t svaddwt_s64(svint64_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwt_s16))) svint16_t svaddwt_s16(svint16_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwt_u32))) svuint32_t svaddwt_u32(svuint32_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwt_u64))) svuint64_t svaddwt_u64(svuint64_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwt_u16))) svuint16_t svaddwt_u16(svuint16_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbcax_n_u8))) svuint8_t svbcax_n_u8(svuint8_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbcax_n_u32))) svuint32_t svbcax_n_u32(svuint32_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbcax_n_u64))) svuint64_t svbcax_n_u64(svuint64_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbcax_n_u16))) svuint16_t svbcax_n_u16(svuint16_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbcax_n_s8))) svint8_t svbcax_n_s8(svint8_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbcax_n_s32))) svint32_t svbcax_n_s32(svint32_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbcax_n_s64))) svint64_t svbcax_n_s64(svint64_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbcax_n_s16))) svint16_t svbcax_n_s16(svint16_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbcax_u8))) svuint8_t svbcax_u8(svuint8_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbcax_u32))) svuint32_t svbcax_u32(svuint32_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbcax_u64))) svuint64_t svbcax_u64(svuint64_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbcax_u16))) svuint16_t svbcax_u16(svuint16_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbcax_s8))) svint8_t svbcax_s8(svint8_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbcax_s32))) svint32_t svbcax_s32(svint32_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbcax_s64))) svint64_t svbcax_s64(svint64_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbcax_s16))) svint16_t svbcax_s16(svint16_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl1n_n_u8))) svuint8_t svbsl1n_n_u8(svuint8_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl1n_n_u32))) svuint32_t svbsl1n_n_u32(svuint32_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl1n_n_u64))) svuint64_t svbsl1n_n_u64(svuint64_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl1n_n_u16))) svuint16_t svbsl1n_n_u16(svuint16_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl1n_n_s8))) svint8_t svbsl1n_n_s8(svint8_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl1n_n_s32))) svint32_t svbsl1n_n_s32(svint32_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl1n_n_s64))) svint64_t svbsl1n_n_s64(svint64_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl1n_n_s16))) svint16_t svbsl1n_n_s16(svint16_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl1n_u8))) svuint8_t svbsl1n_u8(svuint8_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl1n_u32))) svuint32_t svbsl1n_u32(svuint32_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl1n_u64))) svuint64_t svbsl1n_u64(svuint64_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl1n_u16))) svuint16_t svbsl1n_u16(svuint16_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl1n_s8))) svint8_t svbsl1n_s8(svint8_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl1n_s32))) svint32_t svbsl1n_s32(svint32_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl1n_s64))) svint64_t svbsl1n_s64(svint64_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl1n_s16))) svint16_t svbsl1n_s16(svint16_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl2n_n_u8))) svuint8_t svbsl2n_n_u8(svuint8_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl2n_n_u32))) svuint32_t svbsl2n_n_u32(svuint32_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl2n_n_u64))) svuint64_t svbsl2n_n_u64(svuint64_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl2n_n_u16))) svuint16_t svbsl2n_n_u16(svuint16_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl2n_n_s8))) svint8_t svbsl2n_n_s8(svint8_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl2n_n_s32))) svint32_t svbsl2n_n_s32(svint32_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl2n_n_s64))) svint64_t svbsl2n_n_s64(svint64_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl2n_n_s16))) svint16_t svbsl2n_n_s16(svint16_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl2n_u8))) svuint8_t svbsl2n_u8(svuint8_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl2n_u32))) svuint32_t svbsl2n_u32(svuint32_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl2n_u64))) svuint64_t svbsl2n_u64(svuint64_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl2n_u16))) svuint16_t svbsl2n_u16(svuint16_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl2n_s8))) svint8_t svbsl2n_s8(svint8_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl2n_s32))) svint32_t svbsl2n_s32(svint32_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl2n_s64))) svint64_t svbsl2n_s64(svint64_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl2n_s16))) svint16_t svbsl2n_s16(svint16_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl_n_u8))) svuint8_t svbsl_n_u8(svuint8_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl_n_u32))) svuint32_t svbsl_n_u32(svuint32_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl_n_u64))) svuint64_t svbsl_n_u64(svuint64_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl_n_u16))) svuint16_t svbsl_n_u16(svuint16_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl_n_s8))) svint8_t svbsl_n_s8(svint8_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl_n_s32))) svint32_t svbsl_n_s32(svint32_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl_n_s64))) svint64_t svbsl_n_s64(svint64_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl_n_s16))) svint16_t svbsl_n_s16(svint16_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl_u8))) svuint8_t svbsl_u8(svuint8_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl_u32))) svuint32_t svbsl_u32(svuint32_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl_u64))) svuint64_t svbsl_u64(svuint64_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl_u16))) svuint16_t svbsl_u16(svuint16_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl_s8))) svint8_t svbsl_s8(svint8_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl_s32))) svint32_t svbsl_s32(svint32_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl_s64))) svint64_t svbsl_s64(svint64_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl_s16))) svint16_t svbsl_s16(svint16_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcadd_u8))) svuint8_t svcadd_u8(svuint8_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcadd_u32))) svuint32_t svcadd_u32(svuint32_t, svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcadd_u64))) svuint64_t svcadd_u64(svuint64_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcadd_u16))) svuint16_t svcadd_u16(svuint16_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcadd_s8))) svint8_t svcadd_s8(svint8_t, svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcadd_s32))) svint32_t svcadd_s32(svint32_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcadd_s64))) svint64_t svcadd_s64(svint64_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcadd_s16))) svint16_t svcadd_s16(svint16_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcdot_s32))) svint32_t svcdot_s32(svint32_t, svint8_t, svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcdot_s64))) svint64_t svcdot_s64(svint64_t, svint16_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcdot_lane_s32))) svint32_t svcdot_lane_s32(svint32_t, svint8_t, svint8_t, uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcdot_lane_s64))) svint64_t svcdot_lane_s64(svint64_t, svint16_t, svint16_t, uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_u8))) svuint8_t svcmla_u8(svuint8_t, svuint8_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_u32))) svuint32_t svcmla_u32(svuint32_t, svuint32_t, svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_u64))) svuint64_t svcmla_u64(svuint64_t, svuint64_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_u16))) svuint16_t svcmla_u16(svuint16_t, svuint16_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_s8))) svint8_t svcmla_s8(svint8_t, svint8_t, svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_s32))) svint32_t svcmla_s32(svint32_t, svint32_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_s64))) svint64_t svcmla_s64(svint64_t, svint64_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_s16))) svint16_t svcmla_s16(svint16_t, svint16_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_lane_u32))) svuint32_t svcmla_lane_u32(svuint32_t, svuint32_t, svuint32_t, uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_lane_u16))) svuint16_t svcmla_lane_u16(svuint16_t, svuint16_t, svuint16_t, uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_lane_s32))) svint32_t svcmla_lane_s32(svint32_t, svint32_t, svint32_t, uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_lane_s16))) svint16_t svcmla_lane_s16(svint16_t, svint16_t, svint16_t, uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtlt_f32_f16_m))) svfloat32_t svcvtlt_f32_f16_m(svfloat32_t, svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtlt_f32_f16_x))) svfloat32_t svcvtlt_f32_f16_x(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtlt_f64_f32_m))) svfloat64_t svcvtlt_f64_f32_m(svfloat64_t, svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtlt_f64_f32_x))) svfloat64_t svcvtlt_f64_f32_x(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtnt_f16_f32_m))) svfloat16_t svcvtnt_f16_f32_m(svfloat16_t, svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtnt_f32_f64_m))) svfloat32_t svcvtnt_f32_f64_m(svfloat32_t, svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtx_f32_f64_m))) svfloat32_t svcvtx_f32_f64_m(svfloat32_t, svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtx_f32_f64_x))) svfloat32_t svcvtx_f32_f64_x(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtx_f32_f64_z))) svfloat32_t svcvtx_f32_f64_z(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtxnt_f32_f64_m))) svfloat32_t svcvtxnt_f32_f64_m(svfloat32_t, svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor3_n_u8))) svuint8_t sveor3_n_u8(svuint8_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor3_n_u32))) svuint32_t sveor3_n_u32(svuint32_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor3_n_u64))) svuint64_t sveor3_n_u64(svuint64_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor3_n_u16))) svuint16_t sveor3_n_u16(svuint16_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor3_n_s8))) svint8_t sveor3_n_s8(svint8_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor3_n_s32))) svint32_t sveor3_n_s32(svint32_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor3_n_s64))) svint64_t sveor3_n_s64(svint64_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor3_n_s16))) svint16_t sveor3_n_s16(svint16_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor3_u8))) svuint8_t sveor3_u8(svuint8_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor3_u32))) svuint32_t sveor3_u32(svuint32_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor3_u64))) svuint64_t sveor3_u64(svuint64_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor3_u16))) svuint16_t sveor3_u16(svuint16_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor3_s8))) svint8_t sveor3_s8(svint8_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor3_s32))) svint32_t sveor3_s32(svint32_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor3_s64))) svint64_t sveor3_s64(svint64_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor3_s16))) svint16_t sveor3_s16(svint16_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorbt_n_u8))) svuint8_t sveorbt_n_u8(svuint8_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorbt_n_u32))) svuint32_t sveorbt_n_u32(svuint32_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorbt_n_u64))) svuint64_t sveorbt_n_u64(svuint64_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorbt_n_u16))) svuint16_t sveorbt_n_u16(svuint16_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorbt_n_s8))) svint8_t sveorbt_n_s8(svint8_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorbt_n_s32))) svint32_t sveorbt_n_s32(svint32_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorbt_n_s64))) svint64_t sveorbt_n_s64(svint64_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorbt_n_s16))) svint16_t sveorbt_n_s16(svint16_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorbt_u8))) svuint8_t sveorbt_u8(svuint8_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorbt_u32))) svuint32_t sveorbt_u32(svuint32_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorbt_u64))) svuint64_t sveorbt_u64(svuint64_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorbt_u16))) svuint16_t sveorbt_u16(svuint16_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorbt_s8))) svint8_t sveorbt_s8(svint8_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorbt_s32))) svint32_t sveorbt_s32(svint32_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorbt_s64))) svint64_t sveorbt_s64(svint64_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorbt_s16))) svint16_t sveorbt_s16(svint16_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveortb_n_u8))) svuint8_t sveortb_n_u8(svuint8_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveortb_n_u32))) svuint32_t sveortb_n_u32(svuint32_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveortb_n_u64))) svuint64_t sveortb_n_u64(svuint64_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveortb_n_u16))) svuint16_t sveortb_n_u16(svuint16_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveortb_n_s8))) svint8_t sveortb_n_s8(svint8_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveortb_n_s32))) svint32_t sveortb_n_s32(svint32_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveortb_n_s64))) svint64_t sveortb_n_s64(svint64_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveortb_n_s16))) svint16_t sveortb_n_s16(svint16_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveortb_u8))) svuint8_t sveortb_u8(svuint8_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveortb_u32))) svuint32_t sveortb_u32(svuint32_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveortb_u64))) svuint64_t sveortb_u64(svuint64_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveortb_u16))) svuint16_t sveortb_u16(svuint16_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveortb_s8))) svint8_t sveortb_s8(svint8_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveortb_s32))) svint32_t sveortb_s32(svint32_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveortb_s64))) svint64_t sveortb_s64(svint64_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveortb_s16))) svint16_t sveortb_s16(svint16_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_s8_m))) svint8_t svhadd_n_s8_m(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_s32_m))) svint32_t svhadd_n_s32_m(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_s64_m))) svint64_t svhadd_n_s64_m(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_s16_m))) svint16_t svhadd_n_s16_m(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_s8_x))) svint8_t svhadd_n_s8_x(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_s32_x))) svint32_t svhadd_n_s32_x(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_s64_x))) svint64_t svhadd_n_s64_x(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_s16_x))) svint16_t svhadd_n_s16_x(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_s8_z))) svint8_t svhadd_n_s8_z(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_s32_z))) svint32_t svhadd_n_s32_z(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_s64_z))) svint64_t svhadd_n_s64_z(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_s16_z))) svint16_t svhadd_n_s16_z(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_u8_m))) svuint8_t svhadd_n_u8_m(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_u32_m))) svuint32_t svhadd_n_u32_m(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_u64_m))) svuint64_t svhadd_n_u64_m(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_u16_m))) svuint16_t svhadd_n_u16_m(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_u8_x))) svuint8_t svhadd_n_u8_x(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_u32_x))) svuint32_t svhadd_n_u32_x(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_u64_x))) svuint64_t svhadd_n_u64_x(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_u16_x))) svuint16_t svhadd_n_u16_x(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_u8_z))) svuint8_t svhadd_n_u8_z(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_u32_z))) svuint32_t svhadd_n_u32_z(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_u64_z))) svuint64_t svhadd_n_u64_z(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_u16_z))) svuint16_t svhadd_n_u16_z(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_s8_m))) svint8_t svhadd_s8_m(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_s32_m))) svint32_t svhadd_s32_m(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_s64_m))) svint64_t svhadd_s64_m(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_s16_m))) svint16_t svhadd_s16_m(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_s8_x))) svint8_t svhadd_s8_x(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_s32_x))) svint32_t svhadd_s32_x(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_s64_x))) svint64_t svhadd_s64_x(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_s16_x))) svint16_t svhadd_s16_x(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_s8_z))) svint8_t svhadd_s8_z(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_s32_z))) svint32_t svhadd_s32_z(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_s64_z))) svint64_t svhadd_s64_z(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_s16_z))) svint16_t svhadd_s16_z(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_u8_m))) svuint8_t svhadd_u8_m(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_u32_m))) svuint32_t svhadd_u32_m(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_u64_m))) svuint64_t svhadd_u64_m(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_u16_m))) svuint16_t svhadd_u16_m(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_u8_x))) svuint8_t svhadd_u8_x(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_u32_x))) svuint32_t svhadd_u32_x(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_u64_x))) svuint64_t svhadd_u64_x(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_u16_x))) svuint16_t svhadd_u16_x(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_u8_z))) svuint8_t svhadd_u8_z(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_u32_z))) svuint32_t svhadd_u32_z(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_u64_z))) svuint64_t svhadd_u64_z(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_u16_z))) svuint16_t svhadd_u16_z(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhistcnt_u32_z))) svuint32_t svhistcnt_u32_z(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhistcnt_u64_z))) svuint64_t svhistcnt_u64_z(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhistcnt_s32_z))) svuint32_t svhistcnt_s32_z(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhistcnt_s64_z))) svuint64_t svhistcnt_s64_z(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhistseg_u8))) svuint8_t svhistseg_u8(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhistseg_s8))) svuint8_t svhistseg_s8(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_s8_m))) svint8_t svhsub_n_s8_m(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_s32_m))) svint32_t svhsub_n_s32_m(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_s64_m))) svint64_t svhsub_n_s64_m(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_s16_m))) svint16_t svhsub_n_s16_m(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_s8_x))) svint8_t svhsub_n_s8_x(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_s32_x))) svint32_t svhsub_n_s32_x(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_s64_x))) svint64_t svhsub_n_s64_x(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_s16_x))) svint16_t svhsub_n_s16_x(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_s8_z))) svint8_t svhsub_n_s8_z(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_s32_z))) svint32_t svhsub_n_s32_z(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_s64_z))) svint64_t svhsub_n_s64_z(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_s16_z))) svint16_t svhsub_n_s16_z(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_u8_m))) svuint8_t svhsub_n_u8_m(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_u32_m))) svuint32_t svhsub_n_u32_m(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_u64_m))) svuint64_t svhsub_n_u64_m(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_u16_m))) svuint16_t svhsub_n_u16_m(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_u8_x))) svuint8_t svhsub_n_u8_x(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_u32_x))) svuint32_t svhsub_n_u32_x(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_u64_x))) svuint64_t svhsub_n_u64_x(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_u16_x))) svuint16_t svhsub_n_u16_x(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_u8_z))) svuint8_t svhsub_n_u8_z(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_u32_z))) svuint32_t svhsub_n_u32_z(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_u64_z))) svuint64_t svhsub_n_u64_z(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_u16_z))) svuint16_t svhsub_n_u16_z(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_s8_m))) svint8_t svhsub_s8_m(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_s32_m))) svint32_t svhsub_s32_m(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_s64_m))) svint64_t svhsub_s64_m(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_s16_m))) svint16_t svhsub_s16_m(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_s8_x))) svint8_t svhsub_s8_x(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_s32_x))) svint32_t svhsub_s32_x(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_s64_x))) svint64_t svhsub_s64_x(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_s16_x))) svint16_t svhsub_s16_x(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_s8_z))) svint8_t svhsub_s8_z(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_s32_z))) svint32_t svhsub_s32_z(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_s64_z))) svint64_t svhsub_s64_z(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_s16_z))) svint16_t svhsub_s16_z(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_u8_m))) svuint8_t svhsub_u8_m(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_u32_m))) svuint32_t svhsub_u32_m(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_u64_m))) svuint64_t svhsub_u64_m(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_u16_m))) svuint16_t svhsub_u16_m(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_u8_x))) svuint8_t svhsub_u8_x(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_u32_x))) svuint32_t svhsub_u32_x(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_u64_x))) svuint64_t svhsub_u64_x(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_u16_x))) svuint16_t svhsub_u16_x(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_u8_z))) svuint8_t svhsub_u8_z(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_u32_z))) svuint32_t svhsub_u32_z(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_u64_z))) svuint64_t svhsub_u64_z(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_u16_z))) svuint16_t svhsub_u16_z(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_s8_m))) svint8_t svhsubr_n_s8_m(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_s32_m))) svint32_t svhsubr_n_s32_m(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_s64_m))) svint64_t svhsubr_n_s64_m(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_s16_m))) svint16_t svhsubr_n_s16_m(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_s8_x))) svint8_t svhsubr_n_s8_x(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_s32_x))) svint32_t svhsubr_n_s32_x(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_s64_x))) svint64_t svhsubr_n_s64_x(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_s16_x))) svint16_t svhsubr_n_s16_x(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_s8_z))) svint8_t svhsubr_n_s8_z(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_s32_z))) svint32_t svhsubr_n_s32_z(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_s64_z))) svint64_t svhsubr_n_s64_z(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_s16_z))) svint16_t svhsubr_n_s16_z(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_u8_m))) svuint8_t svhsubr_n_u8_m(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_u32_m))) svuint32_t svhsubr_n_u32_m(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_u64_m))) svuint64_t svhsubr_n_u64_m(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_u16_m))) svuint16_t svhsubr_n_u16_m(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_u8_x))) svuint8_t svhsubr_n_u8_x(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_u32_x))) svuint32_t svhsubr_n_u32_x(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_u64_x))) svuint64_t svhsubr_n_u64_x(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_u16_x))) svuint16_t svhsubr_n_u16_x(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_u8_z))) svuint8_t svhsubr_n_u8_z(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_u32_z))) svuint32_t svhsubr_n_u32_z(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_u64_z))) svuint64_t svhsubr_n_u64_z(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_u16_z))) svuint16_t svhsubr_n_u16_z(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_s8_m))) svint8_t svhsubr_s8_m(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_s32_m))) svint32_t svhsubr_s32_m(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_s64_m))) svint64_t svhsubr_s64_m(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_s16_m))) svint16_t svhsubr_s16_m(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_s8_x))) svint8_t svhsubr_s8_x(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_s32_x))) svint32_t svhsubr_s32_x(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_s64_x))) svint64_t svhsubr_s64_x(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_s16_x))) svint16_t svhsubr_s16_x(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_s8_z))) svint8_t svhsubr_s8_z(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_s32_z))) svint32_t svhsubr_s32_z(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_s64_z))) svint64_t svhsubr_s64_z(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_s16_z))) svint16_t svhsubr_s16_z(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_u8_m))) svuint8_t svhsubr_u8_m(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_u32_m))) svuint32_t svhsubr_u32_m(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_u64_m))) svuint64_t svhsubr_u64_m(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_u16_m))) svuint16_t svhsubr_u16_m(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_u8_x))) svuint8_t svhsubr_u8_x(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_u32_x))) svuint32_t svhsubr_u32_x(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_u64_x))) svuint64_t svhsubr_u64_x(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_u16_x))) svuint16_t svhsubr_u16_x(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_u8_z))) svuint8_t svhsubr_u8_z(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_u32_z))) svuint32_t svhsubr_u32_z(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_u64_z))) svuint64_t svhsubr_u64_z(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_u16_z))) svuint16_t svhsubr_u16_z(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_index_u32))) svuint32_t svldnt1_gather_u32base_index_u32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_index_u64))) svuint64_t svldnt1_gather_u64base_index_u64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_index_f64))) svfloat64_t svldnt1_gather_u64base_index_f64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_index_f32))) svfloat32_t svldnt1_gather_u32base_index_f32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_index_s32))) svint32_t svldnt1_gather_u32base_index_s32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_index_s64))) svint64_t svldnt1_gather_u64base_index_s64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_offset_u32))) svuint32_t svldnt1_gather_u32base_offset_u32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_offset_u64))) svuint64_t svldnt1_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_offset_f64))) svfloat64_t svldnt1_gather_u64base_offset_f64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_offset_f32))) svfloat32_t svldnt1_gather_u32base_offset_f32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_offset_s32))) svint32_t svldnt1_gather_u32base_offset_s32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_offset_s64))) svint64_t svldnt1_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_u32))) svuint32_t svldnt1_gather_u32base_u32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_u64))) svuint64_t svldnt1_gather_u64base_u64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_f64))) svfloat64_t svldnt1_gather_u64base_f64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_f32))) svfloat32_t svldnt1_gather_u32base_f32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_s32))) svint32_t svldnt1_gather_u32base_s32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_s64))) svint64_t svldnt1_gather_u64base_s64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_s64index_u64))) svuint64_t svldnt1_gather_s64index_u64(svbool_t, uint64_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_s64index_f64))) svfloat64_t svldnt1_gather_s64index_f64(svbool_t, float64_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_s64index_s64))) svint64_t svldnt1_gather_s64index_s64(svbool_t, int64_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64index_u64))) svuint64_t svldnt1_gather_u64index_u64(svbool_t, uint64_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64index_f64))) svfloat64_t svldnt1_gather_u64index_f64(svbool_t, float64_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64index_s64))) svint64_t svldnt1_gather_u64index_s64(svbool_t, int64_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32offset_u32))) svuint32_t svldnt1_gather_u32offset_u32(svbool_t, uint32_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32offset_f32))) svfloat32_t svldnt1_gather_u32offset_f32(svbool_t, float32_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32offset_s32))) svint32_t svldnt1_gather_u32offset_s32(svbool_t, int32_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_s64offset_u64))) svuint64_t svldnt1_gather_s64offset_u64(svbool_t, uint64_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_s64offset_f64))) svfloat64_t svldnt1_gather_s64offset_f64(svbool_t, float64_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_s64offset_s64))) svint64_t svldnt1_gather_s64offset_s64(svbool_t, int64_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64offset_u64))) svuint64_t svldnt1_gather_u64offset_u64(svbool_t, uint64_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64offset_f64))) svfloat64_t svldnt1_gather_u64offset_f64(svbool_t, float64_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64offset_s64))) svint64_t svldnt1_gather_u64offset_s64(svbool_t, int64_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u32base_offset_u32))) svuint32_t svldnt1sb_gather_u32base_offset_u32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u64base_offset_u64))) svuint64_t svldnt1sb_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u32base_offset_s32))) svint32_t svldnt1sb_gather_u32base_offset_s32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u64base_offset_s64))) svint64_t svldnt1sb_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u32base_u32))) svuint32_t svldnt1sb_gather_u32base_u32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u64base_u64))) svuint64_t svldnt1sb_gather_u64base_u64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u32base_s32))) svint32_t svldnt1sb_gather_u32base_s32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u64base_s64))) svint64_t svldnt1sb_gather_u64base_s64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u32offset_u32))) svuint32_t svldnt1sb_gather_u32offset_u32(svbool_t, int8_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u32offset_s32))) svint32_t svldnt1sb_gather_u32offset_s32(svbool_t, int8_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_s64offset_u64))) svuint64_t svldnt1sb_gather_s64offset_u64(svbool_t, int8_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_s64offset_s64))) svint64_t svldnt1sb_gather_s64offset_s64(svbool_t, int8_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u64offset_u64))) svuint64_t svldnt1sb_gather_u64offset_u64(svbool_t, int8_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u64offset_s64))) svint64_t svldnt1sb_gather_u64offset_s64(svbool_t, int8_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32base_index_u32))) svuint32_t svldnt1sh_gather_u32base_index_u32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64base_index_u64))) svuint64_t svldnt1sh_gather_u64base_index_u64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32base_index_s32))) svint32_t svldnt1sh_gather_u32base_index_s32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64base_index_s64))) svint64_t svldnt1sh_gather_u64base_index_s64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32base_offset_u32))) svuint32_t svldnt1sh_gather_u32base_offset_u32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64base_offset_u64))) svuint64_t svldnt1sh_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32base_offset_s32))) svint32_t svldnt1sh_gather_u32base_offset_s32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64base_offset_s64))) svint64_t svldnt1sh_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32base_u32))) svuint32_t svldnt1sh_gather_u32base_u32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64base_u64))) svuint64_t svldnt1sh_gather_u64base_u64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32base_s32))) svint32_t svldnt1sh_gather_u32base_s32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64base_s64))) svint64_t svldnt1sh_gather_u64base_s64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_s64index_u64))) svuint64_t svldnt1sh_gather_s64index_u64(svbool_t, int16_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_s64index_s64))) svint64_t svldnt1sh_gather_s64index_s64(svbool_t, int16_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64index_u64))) svuint64_t svldnt1sh_gather_u64index_u64(svbool_t, int16_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64index_s64))) svint64_t svldnt1sh_gather_u64index_s64(svbool_t, int16_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32offset_u32))) svuint32_t svldnt1sh_gather_u32offset_u32(svbool_t, int16_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32offset_s32))) svint32_t svldnt1sh_gather_u32offset_s32(svbool_t, int16_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_s64offset_u64))) svuint64_t svldnt1sh_gather_s64offset_u64(svbool_t, int16_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_s64offset_s64))) svint64_t svldnt1sh_gather_s64offset_s64(svbool_t, int16_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64offset_u64))) svuint64_t svldnt1sh_gather_u64offset_u64(svbool_t, int16_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64offset_s64))) svint64_t svldnt1sh_gather_u64offset_s64(svbool_t, int16_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64base_index_u64))) svuint64_t svldnt1sw_gather_u64base_index_u64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64base_index_s64))) svint64_t svldnt1sw_gather_u64base_index_s64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64base_offset_u64))) svuint64_t svldnt1sw_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64base_offset_s64))) svint64_t svldnt1sw_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64base_u64))) svuint64_t svldnt1sw_gather_u64base_u64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64base_s64))) svint64_t svldnt1sw_gather_u64base_s64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_s64index_u64))) svuint64_t svldnt1sw_gather_s64index_u64(svbool_t, int32_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_s64index_s64))) svint64_t svldnt1sw_gather_s64index_s64(svbool_t, int32_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64index_u64))) svuint64_t svldnt1sw_gather_u64index_u64(svbool_t, int32_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64index_s64))) svint64_t svldnt1sw_gather_u64index_s64(svbool_t, int32_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_s64offset_u64))) svuint64_t svldnt1sw_gather_s64offset_u64(svbool_t, int32_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_s64offset_s64))) svint64_t svldnt1sw_gather_s64offset_s64(svbool_t, int32_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64offset_u64))) svuint64_t svldnt1sw_gather_u64offset_u64(svbool_t, int32_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64offset_s64))) svint64_t svldnt1sw_gather_u64offset_s64(svbool_t, int32_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u32base_offset_u32))) svuint32_t svldnt1ub_gather_u32base_offset_u32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u64base_offset_u64))) svuint64_t svldnt1ub_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u32base_offset_s32))) svint32_t svldnt1ub_gather_u32base_offset_s32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u64base_offset_s64))) svint64_t svldnt1ub_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u32base_u32))) svuint32_t svldnt1ub_gather_u32base_u32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u64base_u64))) svuint64_t svldnt1ub_gather_u64base_u64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u32base_s32))) svint32_t svldnt1ub_gather_u32base_s32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u64base_s64))) svint64_t svldnt1ub_gather_u64base_s64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u32offset_u32))) svuint32_t svldnt1ub_gather_u32offset_u32(svbool_t, uint8_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u32offset_s32))) svint32_t svldnt1ub_gather_u32offset_s32(svbool_t, uint8_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_s64offset_u64))) svuint64_t svldnt1ub_gather_s64offset_u64(svbool_t, uint8_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_s64offset_s64))) svint64_t svldnt1ub_gather_s64offset_s64(svbool_t, uint8_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u64offset_u64))) svuint64_t svldnt1ub_gather_u64offset_u64(svbool_t, uint8_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u64offset_s64))) svint64_t svldnt1ub_gather_u64offset_s64(svbool_t, uint8_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32base_index_u32))) svuint32_t svldnt1uh_gather_u32base_index_u32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64base_index_u64))) svuint64_t svldnt1uh_gather_u64base_index_u64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32base_index_s32))) svint32_t svldnt1uh_gather_u32base_index_s32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64base_index_s64))) svint64_t svldnt1uh_gather_u64base_index_s64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32base_offset_u32))) svuint32_t svldnt1uh_gather_u32base_offset_u32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64base_offset_u64))) svuint64_t svldnt1uh_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32base_offset_s32))) svint32_t svldnt1uh_gather_u32base_offset_s32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64base_offset_s64))) svint64_t svldnt1uh_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32base_u32))) svuint32_t svldnt1uh_gather_u32base_u32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64base_u64))) svuint64_t svldnt1uh_gather_u64base_u64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32base_s32))) svint32_t svldnt1uh_gather_u32base_s32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64base_s64))) svint64_t svldnt1uh_gather_u64base_s64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_s64index_u64))) svuint64_t svldnt1uh_gather_s64index_u64(svbool_t, uint16_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_s64index_s64))) svint64_t svldnt1uh_gather_s64index_s64(svbool_t, uint16_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64index_u64))) svuint64_t svldnt1uh_gather_u64index_u64(svbool_t, uint16_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64index_s64))) svint64_t svldnt1uh_gather_u64index_s64(svbool_t, uint16_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32offset_u32))) svuint32_t svldnt1uh_gather_u32offset_u32(svbool_t, uint16_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32offset_s32))) svint32_t svldnt1uh_gather_u32offset_s32(svbool_t, uint16_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_s64offset_u64))) svuint64_t svldnt1uh_gather_s64offset_u64(svbool_t, uint16_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_s64offset_s64))) svint64_t svldnt1uh_gather_s64offset_s64(svbool_t, uint16_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64offset_u64))) svuint64_t svldnt1uh_gather_u64offset_u64(svbool_t, uint16_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64offset_s64))) svint64_t svldnt1uh_gather_u64offset_s64(svbool_t, uint16_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64base_index_u64))) svuint64_t svldnt1uw_gather_u64base_index_u64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64base_index_s64))) svint64_t svldnt1uw_gather_u64base_index_s64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64base_offset_u64))) svuint64_t svldnt1uw_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64base_offset_s64))) svint64_t svldnt1uw_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64base_u64))) svuint64_t svldnt1uw_gather_u64base_u64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64base_s64))) svint64_t svldnt1uw_gather_u64base_s64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_s64index_u64))) svuint64_t svldnt1uw_gather_s64index_u64(svbool_t, uint32_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_s64index_s64))) svint64_t svldnt1uw_gather_s64index_s64(svbool_t, uint32_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64index_u64))) svuint64_t svldnt1uw_gather_u64index_u64(svbool_t, uint32_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64index_s64))) svint64_t svldnt1uw_gather_u64index_s64(svbool_t, uint32_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_s64offset_u64))) svuint64_t svldnt1uw_gather_s64offset_u64(svbool_t, uint32_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_s64offset_s64))) svint64_t svldnt1uw_gather_s64offset_s64(svbool_t, uint32_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64offset_u64))) svuint64_t svldnt1uw_gather_u64offset_u64(svbool_t, uint32_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64offset_s64))) svint64_t svldnt1uw_gather_u64offset_s64(svbool_t, uint32_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlogb_f64_m))) svint64_t svlogb_f64_m(svint64_t, svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlogb_f32_m))) svint32_t svlogb_f32_m(svint32_t, svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlogb_f16_m))) svint16_t svlogb_f16_m(svint16_t, svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlogb_f64_x))) svint64_t svlogb_f64_x(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlogb_f32_x))) svint32_t svlogb_f32_x(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlogb_f16_x))) svint16_t svlogb_f16_x(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlogb_f64_z))) svint64_t svlogb_f64_z(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlogb_f32_z))) svint32_t svlogb_f32_z(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlogb_f16_z))) svint16_t svlogb_f16_z(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmatch_u8))) svbool_t svmatch_u8(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmatch_u16))) svbool_t svmatch_u16(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmatch_s8))) svbool_t svmatch_s8(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmatch_s16))) svbool_t svmatch_s16(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnmp_f64_m))) svfloat64_t svmaxnmp_f64_m(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnmp_f32_m))) svfloat32_t svmaxnmp_f32_m(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnmp_f16_m))) svfloat16_t svmaxnmp_f16_m(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnmp_f64_x))) svfloat64_t svmaxnmp_f64_x(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnmp_f32_x))) svfloat32_t svmaxnmp_f32_x(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnmp_f16_x))) svfloat16_t svmaxnmp_f16_x(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_f64_m))) svfloat64_t svmaxp_f64_m(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_f32_m))) svfloat32_t svmaxp_f32_m(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_f16_m))) svfloat16_t svmaxp_f16_m(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_f64_x))) svfloat64_t svmaxp_f64_x(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_f32_x))) svfloat32_t svmaxp_f32_x(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_f16_x))) svfloat16_t svmaxp_f16_x(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_s8_m))) svint8_t svmaxp_s8_m(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_s32_m))) svint32_t svmaxp_s32_m(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_s64_m))) svint64_t svmaxp_s64_m(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_s16_m))) svint16_t svmaxp_s16_m(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_s8_x))) svint8_t svmaxp_s8_x(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_s32_x))) svint32_t svmaxp_s32_x(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_s64_x))) svint64_t svmaxp_s64_x(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_s16_x))) svint16_t svmaxp_s16_x(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_u8_m))) svuint8_t svmaxp_u8_m(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_u32_m))) svuint32_t svmaxp_u32_m(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_u64_m))) svuint64_t svmaxp_u64_m(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_u16_m))) svuint16_t svmaxp_u16_m(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_u8_x))) svuint8_t svmaxp_u8_x(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_u32_x))) svuint32_t svmaxp_u32_x(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_u64_x))) svuint64_t svmaxp_u64_x(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_u16_x))) svuint16_t svmaxp_u16_x(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnmp_f64_m))) svfloat64_t svminnmp_f64_m(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnmp_f32_m))) svfloat32_t svminnmp_f32_m(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnmp_f16_m))) svfloat16_t svminnmp_f16_m(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnmp_f64_x))) svfloat64_t svminnmp_f64_x(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnmp_f32_x))) svfloat32_t svminnmp_f32_x(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnmp_f16_x))) svfloat16_t svminnmp_f16_x(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_f64_m))) svfloat64_t svminp_f64_m(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_f32_m))) svfloat32_t svminp_f32_m(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_f16_m))) svfloat16_t svminp_f16_m(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_f64_x))) svfloat64_t svminp_f64_x(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_f32_x))) svfloat32_t svminp_f32_x(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_f16_x))) svfloat16_t svminp_f16_x(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_s8_m))) svint8_t svminp_s8_m(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_s32_m))) svint32_t svminp_s32_m(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_s64_m))) svint64_t svminp_s64_m(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_s16_m))) svint16_t svminp_s16_m(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_s8_x))) svint8_t svminp_s8_x(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_s32_x))) svint32_t svminp_s32_x(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_s64_x))) svint64_t svminp_s64_x(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_s16_x))) svint16_t svminp_s16_x(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_u8_m))) svuint8_t svminp_u8_m(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_u32_m))) svuint32_t svminp_u32_m(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_u64_m))) svuint64_t svminp_u64_m(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_u16_m))) svuint16_t svminp_u16_m(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_u8_x))) svuint8_t svminp_u8_x(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_u32_x))) svuint32_t svminp_u32_x(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_u64_x))) svuint64_t svminp_u64_x(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_u16_x))) svuint16_t svminp_u16_x(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_lane_u32))) svuint32_t svmla_lane_u32(svuint32_t, svuint32_t, svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_lane_u64))) svuint64_t svmla_lane_u64(svuint64_t, svuint64_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_lane_u16))) svuint16_t svmla_lane_u16(svuint16_t, svuint16_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_lane_s32))) svint32_t svmla_lane_s32(svint32_t, svint32_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_lane_s64))) svint64_t svmla_lane_s64(svint64_t, svint64_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_lane_s16))) svint16_t svmla_lane_s16(svint16_t, svint16_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_n_f32))) svfloat32_t svmlalb_n_f32(svfloat32_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_n_s32))) svint32_t svmlalb_n_s32(svint32_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_n_s64))) svint64_t svmlalb_n_s64(svint64_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_n_s16))) svint16_t svmlalb_n_s16(svint16_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_n_u32))) svuint32_t svmlalb_n_u32(svuint32_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_n_u64))) svuint64_t svmlalb_n_u64(svuint64_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_n_u16))) svuint16_t svmlalb_n_u16(svuint16_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_f32))) svfloat32_t svmlalb_f32(svfloat32_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_s32))) svint32_t svmlalb_s32(svint32_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_s64))) svint64_t svmlalb_s64(svint64_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_s16))) svint16_t svmlalb_s16(svint16_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_u32))) svuint32_t svmlalb_u32(svuint32_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_u64))) svuint64_t svmlalb_u64(svuint64_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_u16))) svuint16_t svmlalb_u16(svuint16_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_lane_f32))) svfloat32_t svmlalb_lane_f32(svfloat32_t, svfloat16_t, svfloat16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_lane_s32))) svint32_t svmlalb_lane_s32(svint32_t, svint16_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_lane_s64))) svint64_t svmlalb_lane_s64(svint64_t, svint32_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_lane_u32))) svuint32_t svmlalb_lane_u32(svuint32_t, svuint16_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_lane_u64))) svuint64_t svmlalb_lane_u64(svuint64_t, svuint32_t, svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_n_f32))) svfloat32_t svmlalt_n_f32(svfloat32_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_n_s32))) svint32_t svmlalt_n_s32(svint32_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_n_s64))) svint64_t svmlalt_n_s64(svint64_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_n_s16))) svint16_t svmlalt_n_s16(svint16_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_n_u32))) svuint32_t svmlalt_n_u32(svuint32_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_n_u64))) svuint64_t svmlalt_n_u64(svuint64_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_n_u16))) svuint16_t svmlalt_n_u16(svuint16_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_f32))) svfloat32_t svmlalt_f32(svfloat32_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_s32))) svint32_t svmlalt_s32(svint32_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_s64))) svint64_t svmlalt_s64(svint64_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_s16))) svint16_t svmlalt_s16(svint16_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_u32))) svuint32_t svmlalt_u32(svuint32_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_u64))) svuint64_t svmlalt_u64(svuint64_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_u16))) svuint16_t svmlalt_u16(svuint16_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_lane_f32))) svfloat32_t svmlalt_lane_f32(svfloat32_t, svfloat16_t, svfloat16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_lane_s32))) svint32_t svmlalt_lane_s32(svint32_t, svint16_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_lane_s64))) svint64_t svmlalt_lane_s64(svint64_t, svint32_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_lane_u32))) svuint32_t svmlalt_lane_u32(svuint32_t, svuint16_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_lane_u64))) svuint64_t svmlalt_lane_u64(svuint64_t, svuint32_t, svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_lane_u32))) svuint32_t svmls_lane_u32(svuint32_t, svuint32_t, svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_lane_u64))) svuint64_t svmls_lane_u64(svuint64_t, svuint64_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_lane_u16))) svuint16_t svmls_lane_u16(svuint16_t, svuint16_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_lane_s32))) svint32_t svmls_lane_s32(svint32_t, svint32_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_lane_s64))) svint64_t svmls_lane_s64(svint64_t, svint64_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_lane_s16))) svint16_t svmls_lane_s16(svint16_t, svint16_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_n_f32))) svfloat32_t svmlslb_n_f32(svfloat32_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_n_s32))) svint32_t svmlslb_n_s32(svint32_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_n_s64))) svint64_t svmlslb_n_s64(svint64_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_n_s16))) svint16_t svmlslb_n_s16(svint16_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_n_u32))) svuint32_t svmlslb_n_u32(svuint32_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_n_u64))) svuint64_t svmlslb_n_u64(svuint64_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_n_u16))) svuint16_t svmlslb_n_u16(svuint16_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_f32))) svfloat32_t svmlslb_f32(svfloat32_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_s32))) svint32_t svmlslb_s32(svint32_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_s64))) svint64_t svmlslb_s64(svint64_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_s16))) svint16_t svmlslb_s16(svint16_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_u32))) svuint32_t svmlslb_u32(svuint32_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_u64))) svuint64_t svmlslb_u64(svuint64_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_u16))) svuint16_t svmlslb_u16(svuint16_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_lane_f32))) svfloat32_t svmlslb_lane_f32(svfloat32_t, svfloat16_t, svfloat16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_lane_s32))) svint32_t svmlslb_lane_s32(svint32_t, svint16_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_lane_s64))) svint64_t svmlslb_lane_s64(svint64_t, svint32_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_lane_u32))) svuint32_t svmlslb_lane_u32(svuint32_t, svuint16_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_lane_u64))) svuint64_t svmlslb_lane_u64(svuint64_t, svuint32_t, svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_n_f32))) svfloat32_t svmlslt_n_f32(svfloat32_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_n_s32))) svint32_t svmlslt_n_s32(svint32_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_n_s64))) svint64_t svmlslt_n_s64(svint64_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_n_s16))) svint16_t svmlslt_n_s16(svint16_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_n_u32))) svuint32_t svmlslt_n_u32(svuint32_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_n_u64))) svuint64_t svmlslt_n_u64(svuint64_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_n_u16))) svuint16_t svmlslt_n_u16(svuint16_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_f32))) svfloat32_t svmlslt_f32(svfloat32_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_s32))) svint32_t svmlslt_s32(svint32_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_s64))) svint64_t svmlslt_s64(svint64_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_s16))) svint16_t svmlslt_s16(svint16_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_u32))) svuint32_t svmlslt_u32(svuint32_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_u64))) svuint64_t svmlslt_u64(svuint64_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_u16))) svuint16_t svmlslt_u16(svuint16_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_lane_f32))) svfloat32_t svmlslt_lane_f32(svfloat32_t, svfloat16_t, svfloat16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_lane_s32))) svint32_t svmlslt_lane_s32(svint32_t, svint16_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_lane_s64))) svint64_t svmlslt_lane_s64(svint64_t, svint32_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_lane_u32))) svuint32_t svmlslt_lane_u32(svuint32_t, svuint16_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_lane_u64))) svuint64_t svmlslt_lane_u64(svuint64_t, svuint32_t, svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmovlb_s32))) svint32_t svmovlb_s32(svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmovlb_s64))) svint64_t svmovlb_s64(svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmovlb_s16))) svint16_t svmovlb_s16(svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmovlb_u32))) svuint32_t svmovlb_u32(svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmovlb_u64))) svuint64_t svmovlb_u64(svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmovlb_u16))) svuint16_t svmovlb_u16(svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmovlt_s32))) svint32_t svmovlt_s32(svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmovlt_s64))) svint64_t svmovlt_s64(svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmovlt_s16))) svint16_t svmovlt_s16(svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmovlt_u32))) svuint32_t svmovlt_u32(svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmovlt_u64))) svuint64_t svmovlt_u64(svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmovlt_u16))) svuint16_t svmovlt_u16(svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_lane_u32))) svuint32_t svmul_lane_u32(svuint32_t, svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_lane_u64))) svuint64_t svmul_lane_u64(svuint64_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_lane_u16))) svuint16_t svmul_lane_u16(svuint16_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_lane_s32))) svint32_t svmul_lane_s32(svint32_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_lane_s64))) svint64_t svmul_lane_s64(svint64_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_lane_s16))) svint16_t svmul_lane_s16(svint16_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullb_n_s32))) svint32_t svmullb_n_s32(svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullb_n_s64))) svint64_t svmullb_n_s64(svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullb_n_s16))) svint16_t svmullb_n_s16(svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullb_n_u32))) svuint32_t svmullb_n_u32(svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullb_n_u64))) svuint64_t svmullb_n_u64(svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullb_n_u16))) svuint16_t svmullb_n_u16(svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullb_s32))) svint32_t svmullb_s32(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullb_s64))) svint64_t svmullb_s64(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullb_s16))) svint16_t svmullb_s16(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullb_u32))) svuint32_t svmullb_u32(svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullb_u64))) svuint64_t svmullb_u64(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullb_u16))) svuint16_t svmullb_u16(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullb_lane_s32))) svint32_t svmullb_lane_s32(svint16_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullb_lane_s64))) svint64_t svmullb_lane_s64(svint32_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullb_lane_u32))) svuint32_t svmullb_lane_u32(svuint16_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullb_lane_u64))) svuint64_t svmullb_lane_u64(svuint32_t, svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullt_n_s32))) svint32_t svmullt_n_s32(svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullt_n_s64))) svint64_t svmullt_n_s64(svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullt_n_s16))) svint16_t svmullt_n_s16(svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullt_n_u32))) svuint32_t svmullt_n_u32(svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullt_n_u64))) svuint64_t svmullt_n_u64(svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullt_n_u16))) svuint16_t svmullt_n_u16(svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullt_s32))) svint32_t svmullt_s32(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullt_s64))) svint64_t svmullt_s64(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullt_s16))) svint16_t svmullt_s16(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullt_u32))) svuint32_t svmullt_u32(svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullt_u64))) svuint64_t svmullt_u64(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullt_u16))) svuint16_t svmullt_u16(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullt_lane_s32))) svint32_t svmullt_lane_s32(svint16_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullt_lane_s64))) svint64_t svmullt_lane_s64(svint32_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullt_lane_u32))) svuint32_t svmullt_lane_u32(svuint16_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullt_lane_u64))) svuint64_t svmullt_lane_u64(svuint32_t, svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnbsl_n_u8))) svuint8_t svnbsl_n_u8(svuint8_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnbsl_n_u32))) svuint32_t svnbsl_n_u32(svuint32_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnbsl_n_u64))) svuint64_t svnbsl_n_u64(svuint64_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnbsl_n_u16))) svuint16_t svnbsl_n_u16(svuint16_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnbsl_n_s8))) svint8_t svnbsl_n_s8(svint8_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnbsl_n_s32))) svint32_t svnbsl_n_s32(svint32_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnbsl_n_s64))) svint64_t svnbsl_n_s64(svint64_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnbsl_n_s16))) svint16_t svnbsl_n_s16(svint16_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnbsl_u8))) svuint8_t svnbsl_u8(svuint8_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnbsl_u32))) svuint32_t svnbsl_u32(svuint32_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnbsl_u64))) svuint64_t svnbsl_u64(svuint64_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnbsl_u16))) svuint16_t svnbsl_u16(svuint16_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnbsl_s8))) svint8_t svnbsl_s8(svint8_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnbsl_s32))) svint32_t svnbsl_s32(svint32_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnbsl_s64))) svint64_t svnbsl_s64(svint64_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnbsl_s16))) svint16_t svnbsl_s16(svint16_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmatch_u8))) svbool_t svnmatch_u8(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmatch_u16))) svbool_t svnmatch_u16(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmatch_s8))) svbool_t svnmatch_s8(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmatch_s16))) svbool_t svnmatch_s16(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmul_n_u8))) svuint8_t svpmul_n_u8(svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmul_u8))) svuint8_t svpmul_u8(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullb_n_u64))) svuint64_t svpmullb_n_u64(svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullb_n_u16))) svuint16_t svpmullb_n_u16(svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullb_u64))) svuint64_t svpmullb_u64(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullb_u16))) svuint16_t svpmullb_u16(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullb_pair_n_u8))) svuint8_t svpmullb_pair_n_u8(svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullb_pair_n_u32))) svuint32_t svpmullb_pair_n_u32(svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullb_pair_u8))) svuint8_t svpmullb_pair_u8(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullb_pair_u32))) svuint32_t svpmullb_pair_u32(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullt_n_u64))) svuint64_t svpmullt_n_u64(svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullt_n_u16))) svuint16_t svpmullt_n_u16(svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullt_u64))) svuint64_t svpmullt_u64(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullt_u16))) svuint16_t svpmullt_u16(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullt_pair_n_u8))) svuint8_t svpmullt_pair_n_u8(svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullt_pair_n_u32))) svuint32_t svpmullt_pair_n_u32(svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullt_pair_u8))) svuint8_t svpmullt_pair_u8(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullt_pair_u32))) svuint32_t svpmullt_pair_u32(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqabs_s8_m))) svint8_t svqabs_s8_m(svint8_t, svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqabs_s32_m))) svint32_t svqabs_s32_m(svint32_t, svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqabs_s64_m))) svint64_t svqabs_s64_m(svint64_t, svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqabs_s16_m))) svint16_t svqabs_s16_m(svint16_t, svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqabs_s8_x))) svint8_t svqabs_s8_x(svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqabs_s32_x))) svint32_t svqabs_s32_x(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqabs_s64_x))) svint64_t svqabs_s64_x(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqabs_s16_x))) svint16_t svqabs_s16_x(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqabs_s8_z))) svint8_t svqabs_s8_z(svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqabs_s32_z))) svint32_t svqabs_s32_z(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqabs_s64_z))) svint64_t svqabs_s64_z(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqabs_s16_z))) svint16_t svqabs_s16_z(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_s8_m))) svint8_t svqadd_n_s8_m(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_s32_m))) svint32_t svqadd_n_s32_m(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_s64_m))) svint64_t svqadd_n_s64_m(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_s16_m))) svint16_t svqadd_n_s16_m(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_s8_x))) svint8_t svqadd_n_s8_x(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_s32_x))) svint32_t svqadd_n_s32_x(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_s64_x))) svint64_t svqadd_n_s64_x(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_s16_x))) svint16_t svqadd_n_s16_x(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_s8_z))) svint8_t svqadd_n_s8_z(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_s32_z))) svint32_t svqadd_n_s32_z(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_s64_z))) svint64_t svqadd_n_s64_z(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_s16_z))) svint16_t svqadd_n_s16_z(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_u8_m))) svuint8_t svqadd_n_u8_m(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_u32_m))) svuint32_t svqadd_n_u32_m(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_u64_m))) svuint64_t svqadd_n_u64_m(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_u16_m))) svuint16_t svqadd_n_u16_m(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_u8_x))) svuint8_t svqadd_n_u8_x(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_u32_x))) svuint32_t svqadd_n_u32_x(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_u64_x))) svuint64_t svqadd_n_u64_x(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_u16_x))) svuint16_t svqadd_n_u16_x(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_u8_z))) svuint8_t svqadd_n_u8_z(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_u32_z))) svuint32_t svqadd_n_u32_z(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_u64_z))) svuint64_t svqadd_n_u64_z(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_u16_z))) svuint16_t svqadd_n_u16_z(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_s8_m))) svint8_t svqadd_s8_m(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_s32_m))) svint32_t svqadd_s32_m(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_s64_m))) svint64_t svqadd_s64_m(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_s16_m))) svint16_t svqadd_s16_m(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_s8_x))) svint8_t svqadd_s8_x(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_s32_x))) svint32_t svqadd_s32_x(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_s64_x))) svint64_t svqadd_s64_x(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_s16_x))) svint16_t svqadd_s16_x(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_s8_z))) svint8_t svqadd_s8_z(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_s32_z))) svint32_t svqadd_s32_z(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_s64_z))) svint64_t svqadd_s64_z(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_s16_z))) svint16_t svqadd_s16_z(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_u8_m))) svuint8_t svqadd_u8_m(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_u32_m))) svuint32_t svqadd_u32_m(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_u64_m))) svuint64_t svqadd_u64_m(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_u16_m))) svuint16_t svqadd_u16_m(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_u8_x))) svuint8_t svqadd_u8_x(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_u32_x))) svuint32_t svqadd_u32_x(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_u64_x))) svuint64_t svqadd_u64_x(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_u16_x))) svuint16_t svqadd_u16_x(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_u8_z))) svuint8_t svqadd_u8_z(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_u32_z))) svuint32_t svqadd_u32_z(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_u64_z))) svuint64_t svqadd_u64_z(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_u16_z))) svuint16_t svqadd_u16_z(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcadd_s8))) svint8_t svqcadd_s8(svint8_t, svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcadd_s32))) svint32_t svqcadd_s32(svint32_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcadd_s64))) svint64_t svqcadd_s64(svint64_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcadd_s16))) svint16_t svqcadd_s16(svint16_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalb_n_s32))) svint32_t svqdmlalb_n_s32(svint32_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalb_n_s64))) svint64_t svqdmlalb_n_s64(svint64_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalb_n_s16))) svint16_t svqdmlalb_n_s16(svint16_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalb_s32))) svint32_t svqdmlalb_s32(svint32_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalb_s64))) svint64_t svqdmlalb_s64(svint64_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalb_s16))) svint16_t svqdmlalb_s16(svint16_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalb_lane_s32))) svint32_t svqdmlalb_lane_s32(svint32_t, svint16_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalb_lane_s64))) svint64_t svqdmlalb_lane_s64(svint64_t, svint32_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalbt_n_s32))) svint32_t svqdmlalbt_n_s32(svint32_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalbt_n_s64))) svint64_t svqdmlalbt_n_s64(svint64_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalbt_n_s16))) svint16_t svqdmlalbt_n_s16(svint16_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalbt_s32))) svint32_t svqdmlalbt_s32(svint32_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalbt_s64))) svint64_t svqdmlalbt_s64(svint64_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalbt_s16))) svint16_t svqdmlalbt_s16(svint16_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalt_n_s32))) svint32_t svqdmlalt_n_s32(svint32_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalt_n_s64))) svint64_t svqdmlalt_n_s64(svint64_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalt_n_s16))) svint16_t svqdmlalt_n_s16(svint16_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalt_s32))) svint32_t svqdmlalt_s32(svint32_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalt_s64))) svint64_t svqdmlalt_s64(svint64_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalt_s16))) svint16_t svqdmlalt_s16(svint16_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalt_lane_s32))) svint32_t svqdmlalt_lane_s32(svint32_t, svint16_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalt_lane_s64))) svint64_t svqdmlalt_lane_s64(svint64_t, svint32_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslb_n_s32))) svint32_t svqdmlslb_n_s32(svint32_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslb_n_s64))) svint64_t svqdmlslb_n_s64(svint64_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslb_n_s16))) svint16_t svqdmlslb_n_s16(svint16_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslb_s32))) svint32_t svqdmlslb_s32(svint32_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslb_s64))) svint64_t svqdmlslb_s64(svint64_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslb_s16))) svint16_t svqdmlslb_s16(svint16_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslb_lane_s32))) svint32_t svqdmlslb_lane_s32(svint32_t, svint16_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslb_lane_s64))) svint64_t svqdmlslb_lane_s64(svint64_t, svint32_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslbt_n_s32))) svint32_t svqdmlslbt_n_s32(svint32_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslbt_n_s64))) svint64_t svqdmlslbt_n_s64(svint64_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslbt_n_s16))) svint16_t svqdmlslbt_n_s16(svint16_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslbt_s32))) svint32_t svqdmlslbt_s32(svint32_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslbt_s64))) svint64_t svqdmlslbt_s64(svint64_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslbt_s16))) svint16_t svqdmlslbt_s16(svint16_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslt_n_s32))) svint32_t svqdmlslt_n_s32(svint32_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslt_n_s64))) svint64_t svqdmlslt_n_s64(svint64_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslt_n_s16))) svint16_t svqdmlslt_n_s16(svint16_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslt_s32))) svint32_t svqdmlslt_s32(svint32_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslt_s64))) svint64_t svqdmlslt_s64(svint64_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslt_s16))) svint16_t svqdmlslt_s16(svint16_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslt_lane_s32))) svint32_t svqdmlslt_lane_s32(svint32_t, svint16_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslt_lane_s64))) svint64_t svqdmlslt_lane_s64(svint64_t, svint32_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_n_s8))) svint8_t svqdmulh_n_s8(svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_n_s32))) svint32_t svqdmulh_n_s32(svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_n_s64))) svint64_t svqdmulh_n_s64(svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_n_s16))) svint16_t svqdmulh_n_s16(svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_s8))) svint8_t svqdmulh_s8(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_s32))) svint32_t svqdmulh_s32(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_s64))) svint64_t svqdmulh_s64(svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_s16))) svint16_t svqdmulh_s16(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_lane_s32))) svint32_t svqdmulh_lane_s32(svint32_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_lane_s64))) svint64_t svqdmulh_lane_s64(svint64_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_lane_s16))) svint16_t svqdmulh_lane_s16(svint16_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmullb_n_s32))) svint32_t svqdmullb_n_s32(svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmullb_n_s64))) svint64_t svqdmullb_n_s64(svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmullb_n_s16))) svint16_t svqdmullb_n_s16(svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmullb_s32))) svint32_t svqdmullb_s32(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmullb_s64))) svint64_t svqdmullb_s64(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmullb_s16))) svint16_t svqdmullb_s16(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmullb_lane_s32))) svint32_t svqdmullb_lane_s32(svint16_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmullb_lane_s64))) svint64_t svqdmullb_lane_s64(svint32_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmullt_n_s32))) svint32_t svqdmullt_n_s32(svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmullt_n_s64))) svint64_t svqdmullt_n_s64(svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmullt_n_s16))) svint16_t svqdmullt_n_s16(svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmullt_s32))) svint32_t svqdmullt_s32(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmullt_s64))) svint64_t svqdmullt_s64(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmullt_s16))) svint16_t svqdmullt_s16(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmullt_lane_s32))) svint32_t svqdmullt_lane_s32(svint16_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmullt_lane_s64))) svint64_t svqdmullt_lane_s64(svint32_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqneg_s8_m))) svint8_t svqneg_s8_m(svint8_t, svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqneg_s32_m))) svint32_t svqneg_s32_m(svint32_t, svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqneg_s64_m))) svint64_t svqneg_s64_m(svint64_t, svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqneg_s16_m))) svint16_t svqneg_s16_m(svint16_t, svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqneg_s8_x))) svint8_t svqneg_s8_x(svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqneg_s32_x))) svint32_t svqneg_s32_x(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqneg_s64_x))) svint64_t svqneg_s64_x(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqneg_s16_x))) svint16_t svqneg_s16_x(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqneg_s8_z))) svint8_t svqneg_s8_z(svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqneg_s32_z))) svint32_t svqneg_s32_z(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqneg_s64_z))) svint64_t svqneg_s64_z(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqneg_s16_z))) svint16_t svqneg_s16_z(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdcmlah_s8))) svint8_t svqrdcmlah_s8(svint8_t, svint8_t, svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdcmlah_s32))) svint32_t svqrdcmlah_s32(svint32_t, svint32_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdcmlah_s64))) svint64_t svqrdcmlah_s64(svint64_t, svint64_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdcmlah_s16))) svint16_t svqrdcmlah_s16(svint16_t, svint16_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdcmlah_lane_s32))) svint32_t svqrdcmlah_lane_s32(svint32_t, svint32_t, svint32_t, uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdcmlah_lane_s16))) svint16_t svqrdcmlah_lane_s16(svint16_t, svint16_t, svint16_t, uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlah_n_s8))) svint8_t svqrdmlah_n_s8(svint8_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlah_n_s32))) svint32_t svqrdmlah_n_s32(svint32_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlah_n_s64))) svint64_t svqrdmlah_n_s64(svint64_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlah_n_s16))) svint16_t svqrdmlah_n_s16(svint16_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlah_s8))) svint8_t svqrdmlah_s8(svint8_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlah_s32))) svint32_t svqrdmlah_s32(svint32_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlah_s64))) svint64_t svqrdmlah_s64(svint64_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlah_s16))) svint16_t svqrdmlah_s16(svint16_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlah_lane_s32))) svint32_t svqrdmlah_lane_s32(svint32_t, svint32_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlah_lane_s64))) svint64_t svqrdmlah_lane_s64(svint64_t, svint64_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlah_lane_s16))) svint16_t svqrdmlah_lane_s16(svint16_t, svint16_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlsh_n_s8))) svint8_t svqrdmlsh_n_s8(svint8_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlsh_n_s32))) svint32_t svqrdmlsh_n_s32(svint32_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlsh_n_s64))) svint64_t svqrdmlsh_n_s64(svint64_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlsh_n_s16))) svint16_t svqrdmlsh_n_s16(svint16_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlsh_s8))) svint8_t svqrdmlsh_s8(svint8_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlsh_s32))) svint32_t svqrdmlsh_s32(svint32_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlsh_s64))) svint64_t svqrdmlsh_s64(svint64_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlsh_s16))) svint16_t svqrdmlsh_s16(svint16_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlsh_lane_s32))) svint32_t svqrdmlsh_lane_s32(svint32_t, svint32_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlsh_lane_s64))) svint64_t svqrdmlsh_lane_s64(svint64_t, svint64_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlsh_lane_s16))) svint16_t svqrdmlsh_lane_s16(svint16_t, svint16_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmulh_n_s8))) svint8_t svqrdmulh_n_s8(svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmulh_n_s32))) svint32_t svqrdmulh_n_s32(svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmulh_n_s64))) svint64_t svqrdmulh_n_s64(svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmulh_n_s16))) svint16_t svqrdmulh_n_s16(svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmulh_s8))) svint8_t svqrdmulh_s8(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmulh_s32))) svint32_t svqrdmulh_s32(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmulh_s64))) svint64_t svqrdmulh_s64(svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmulh_s16))) svint16_t svqrdmulh_s16(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmulh_lane_s32))) svint32_t svqrdmulh_lane_s32(svint32_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmulh_lane_s64))) svint64_t svqrdmulh_lane_s64(svint64_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmulh_lane_s16))) svint16_t svqrdmulh_lane_s16(svint16_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_s8_m))) svint8_t svqrshl_n_s8_m(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_s32_m))) svint32_t svqrshl_n_s32_m(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_s64_m))) svint64_t svqrshl_n_s64_m(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_s16_m))) svint16_t svqrshl_n_s16_m(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_s8_x))) svint8_t svqrshl_n_s8_x(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_s32_x))) svint32_t svqrshl_n_s32_x(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_s64_x))) svint64_t svqrshl_n_s64_x(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_s16_x))) svint16_t svqrshl_n_s16_x(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_s8_z))) svint8_t svqrshl_n_s8_z(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_s32_z))) svint32_t svqrshl_n_s32_z(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_s64_z))) svint64_t svqrshl_n_s64_z(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_s16_z))) svint16_t svqrshl_n_s16_z(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_u8_m))) svuint8_t svqrshl_n_u8_m(svbool_t, svuint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_u32_m))) svuint32_t svqrshl_n_u32_m(svbool_t, svuint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_u64_m))) svuint64_t svqrshl_n_u64_m(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_u16_m))) svuint16_t svqrshl_n_u16_m(svbool_t, svuint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_u8_x))) svuint8_t svqrshl_n_u8_x(svbool_t, svuint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_u32_x))) svuint32_t svqrshl_n_u32_x(svbool_t, svuint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_u64_x))) svuint64_t svqrshl_n_u64_x(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_u16_x))) svuint16_t svqrshl_n_u16_x(svbool_t, svuint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_u8_z))) svuint8_t svqrshl_n_u8_z(svbool_t, svuint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_u32_z))) svuint32_t svqrshl_n_u32_z(svbool_t, svuint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_u64_z))) svuint64_t svqrshl_n_u64_z(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_u16_z))) svuint16_t svqrshl_n_u16_z(svbool_t, svuint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_s8_m))) svint8_t svqrshl_s8_m(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_s32_m))) svint32_t svqrshl_s32_m(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_s64_m))) svint64_t svqrshl_s64_m(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_s16_m))) svint16_t svqrshl_s16_m(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_s8_x))) svint8_t svqrshl_s8_x(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_s32_x))) svint32_t svqrshl_s32_x(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_s64_x))) svint64_t svqrshl_s64_x(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_s16_x))) svint16_t svqrshl_s16_x(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_s8_z))) svint8_t svqrshl_s8_z(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_s32_z))) svint32_t svqrshl_s32_z(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_s64_z))) svint64_t svqrshl_s64_z(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_s16_z))) svint16_t svqrshl_s16_z(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_u8_m))) svuint8_t svqrshl_u8_m(svbool_t, svuint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_u32_m))) svuint32_t svqrshl_u32_m(svbool_t, svuint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_u64_m))) svuint64_t svqrshl_u64_m(svbool_t, svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_u16_m))) svuint16_t svqrshl_u16_m(svbool_t, svuint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_u8_x))) svuint8_t svqrshl_u8_x(svbool_t, svuint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_u32_x))) svuint32_t svqrshl_u32_x(svbool_t, svuint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_u64_x))) svuint64_t svqrshl_u64_x(svbool_t, svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_u16_x))) svuint16_t svqrshl_u16_x(svbool_t, svuint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_u8_z))) svuint8_t svqrshl_u8_z(svbool_t, svuint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_u32_z))) svuint32_t svqrshl_u32_z(svbool_t, svuint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_u64_z))) svuint64_t svqrshl_u64_z(svbool_t, svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_u16_z))) svuint16_t svqrshl_u16_z(svbool_t, svuint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrnb_n_s32))) svint16_t svqrshrnb_n_s32(svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrnb_n_s64))) svint32_t svqrshrnb_n_s64(svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrnb_n_s16))) svint8_t svqrshrnb_n_s16(svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrnb_n_u32))) svuint16_t svqrshrnb_n_u32(svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrnb_n_u64))) svuint32_t svqrshrnb_n_u64(svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrnb_n_u16))) svuint8_t svqrshrnb_n_u16(svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrnt_n_s32))) svint16_t svqrshrnt_n_s32(svint16_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrnt_n_s64))) svint32_t svqrshrnt_n_s64(svint32_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrnt_n_s16))) svint8_t svqrshrnt_n_s16(svint8_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrnt_n_u32))) svuint16_t svqrshrnt_n_u32(svuint16_t, svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrnt_n_u64))) svuint32_t svqrshrnt_n_u64(svuint32_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrnt_n_u16))) svuint8_t svqrshrnt_n_u16(svuint8_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrunb_n_s32))) svuint16_t svqrshrunb_n_s32(svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrunb_n_s64))) svuint32_t svqrshrunb_n_s64(svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrunb_n_s16))) svuint8_t svqrshrunb_n_s16(svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrunt_n_s32))) svuint16_t svqrshrunt_n_s32(svuint16_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrunt_n_s64))) svuint32_t svqrshrunt_n_s64(svuint32_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrunt_n_s16))) svuint8_t svqrshrunt_n_s16(svuint8_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_s8_m))) svint8_t svqshl_n_s8_m(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_s32_m))) svint32_t svqshl_n_s32_m(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_s64_m))) svint64_t svqshl_n_s64_m(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_s16_m))) svint16_t svqshl_n_s16_m(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_s8_x))) svint8_t svqshl_n_s8_x(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_s32_x))) svint32_t svqshl_n_s32_x(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_s64_x))) svint64_t svqshl_n_s64_x(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_s16_x))) svint16_t svqshl_n_s16_x(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_s8_z))) svint8_t svqshl_n_s8_z(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_s32_z))) svint32_t svqshl_n_s32_z(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_s64_z))) svint64_t svqshl_n_s64_z(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_s16_z))) svint16_t svqshl_n_s16_z(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_u8_m))) svuint8_t svqshl_n_u8_m(svbool_t, svuint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_u32_m))) svuint32_t svqshl_n_u32_m(svbool_t, svuint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_u64_m))) svuint64_t svqshl_n_u64_m(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_u16_m))) svuint16_t svqshl_n_u16_m(svbool_t, svuint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_u8_x))) svuint8_t svqshl_n_u8_x(svbool_t, svuint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_u32_x))) svuint32_t svqshl_n_u32_x(svbool_t, svuint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_u64_x))) svuint64_t svqshl_n_u64_x(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_u16_x))) svuint16_t svqshl_n_u16_x(svbool_t, svuint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_u8_z))) svuint8_t svqshl_n_u8_z(svbool_t, svuint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_u32_z))) svuint32_t svqshl_n_u32_z(svbool_t, svuint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_u64_z))) svuint64_t svqshl_n_u64_z(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_u16_z))) svuint16_t svqshl_n_u16_z(svbool_t, svuint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_s8_m))) svint8_t svqshl_s8_m(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_s32_m))) svint32_t svqshl_s32_m(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_s64_m))) svint64_t svqshl_s64_m(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_s16_m))) svint16_t svqshl_s16_m(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_s8_x))) svint8_t svqshl_s8_x(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_s32_x))) svint32_t svqshl_s32_x(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_s64_x))) svint64_t svqshl_s64_x(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_s16_x))) svint16_t svqshl_s16_x(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_s8_z))) svint8_t svqshl_s8_z(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_s32_z))) svint32_t svqshl_s32_z(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_s64_z))) svint64_t svqshl_s64_z(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_s16_z))) svint16_t svqshl_s16_z(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_u8_m))) svuint8_t svqshl_u8_m(svbool_t, svuint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_u32_m))) svuint32_t svqshl_u32_m(svbool_t, svuint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_u64_m))) svuint64_t svqshl_u64_m(svbool_t, svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_u16_m))) svuint16_t svqshl_u16_m(svbool_t, svuint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_u8_x))) svuint8_t svqshl_u8_x(svbool_t, svuint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_u32_x))) svuint32_t svqshl_u32_x(svbool_t, svuint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_u64_x))) svuint64_t svqshl_u64_x(svbool_t, svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_u16_x))) svuint16_t svqshl_u16_x(svbool_t, svuint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_u8_z))) svuint8_t svqshl_u8_z(svbool_t, svuint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_u32_z))) svuint32_t svqshl_u32_z(svbool_t, svuint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_u64_z))) svuint64_t svqshl_u64_z(svbool_t, svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_u16_z))) svuint16_t svqshl_u16_z(svbool_t, svuint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshlu_n_s8_m))) svuint8_t svqshlu_n_s8_m(svbool_t, svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshlu_n_s32_m))) svuint32_t svqshlu_n_s32_m(svbool_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshlu_n_s64_m))) svuint64_t svqshlu_n_s64_m(svbool_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshlu_n_s16_m))) svuint16_t svqshlu_n_s16_m(svbool_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshlu_n_s8_x))) svuint8_t svqshlu_n_s8_x(svbool_t, svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshlu_n_s32_x))) svuint32_t svqshlu_n_s32_x(svbool_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshlu_n_s64_x))) svuint64_t svqshlu_n_s64_x(svbool_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshlu_n_s16_x))) svuint16_t svqshlu_n_s16_x(svbool_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshlu_n_s8_z))) svuint8_t svqshlu_n_s8_z(svbool_t, svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshlu_n_s32_z))) svuint32_t svqshlu_n_s32_z(svbool_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshlu_n_s64_z))) svuint64_t svqshlu_n_s64_z(svbool_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshlu_n_s16_z))) svuint16_t svqshlu_n_s16_z(svbool_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshrnb_n_s32))) svint16_t svqshrnb_n_s32(svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshrnb_n_s64))) svint32_t svqshrnb_n_s64(svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshrnb_n_s16))) svint8_t svqshrnb_n_s16(svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshrnb_n_u32))) svuint16_t svqshrnb_n_u32(svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshrnb_n_u64))) svuint32_t svqshrnb_n_u64(svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshrnb_n_u16))) svuint8_t svqshrnb_n_u16(svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshrnt_n_s32))) svint16_t svqshrnt_n_s32(svint16_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshrnt_n_s64))) svint32_t svqshrnt_n_s64(svint32_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshrnt_n_s16))) svint8_t svqshrnt_n_s16(svint8_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshrnt_n_u32))) svuint16_t svqshrnt_n_u32(svuint16_t, svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshrnt_n_u64))) svuint32_t svqshrnt_n_u64(svuint32_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshrnt_n_u16))) svuint8_t svqshrnt_n_u16(svuint8_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshrunb_n_s32))) svuint16_t svqshrunb_n_s32(svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshrunb_n_s64))) svuint32_t svqshrunb_n_s64(svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshrunb_n_s16))) svuint8_t svqshrunb_n_s16(svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshrunt_n_s32))) svuint16_t svqshrunt_n_s32(svuint16_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshrunt_n_s64))) svuint32_t svqshrunt_n_s64(svuint32_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshrunt_n_s16))) svuint8_t svqshrunt_n_s16(svuint8_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_s8_m))) svint8_t svqsub_n_s8_m(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_s32_m))) svint32_t svqsub_n_s32_m(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_s64_m))) svint64_t svqsub_n_s64_m(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_s16_m))) svint16_t svqsub_n_s16_m(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_s8_x))) svint8_t svqsub_n_s8_x(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_s32_x))) svint32_t svqsub_n_s32_x(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_s64_x))) svint64_t svqsub_n_s64_x(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_s16_x))) svint16_t svqsub_n_s16_x(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_s8_z))) svint8_t svqsub_n_s8_z(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_s32_z))) svint32_t svqsub_n_s32_z(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_s64_z))) svint64_t svqsub_n_s64_z(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_s16_z))) svint16_t svqsub_n_s16_z(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_u8_m))) svuint8_t svqsub_n_u8_m(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_u32_m))) svuint32_t svqsub_n_u32_m(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_u64_m))) svuint64_t svqsub_n_u64_m(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_u16_m))) svuint16_t svqsub_n_u16_m(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_u8_x))) svuint8_t svqsub_n_u8_x(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_u32_x))) svuint32_t svqsub_n_u32_x(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_u64_x))) svuint64_t svqsub_n_u64_x(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_u16_x))) svuint16_t svqsub_n_u16_x(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_u8_z))) svuint8_t svqsub_n_u8_z(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_u32_z))) svuint32_t svqsub_n_u32_z(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_u64_z))) svuint64_t svqsub_n_u64_z(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_u16_z))) svuint16_t svqsub_n_u16_z(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_s8_m))) svint8_t svqsub_s8_m(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_s32_m))) svint32_t svqsub_s32_m(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_s64_m))) svint64_t svqsub_s64_m(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_s16_m))) svint16_t svqsub_s16_m(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_s8_x))) svint8_t svqsub_s8_x(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_s32_x))) svint32_t svqsub_s32_x(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_s64_x))) svint64_t svqsub_s64_x(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_s16_x))) svint16_t svqsub_s16_x(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_s8_z))) svint8_t svqsub_s8_z(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_s32_z))) svint32_t svqsub_s32_z(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_s64_z))) svint64_t svqsub_s64_z(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_s16_z))) svint16_t svqsub_s16_z(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_u8_m))) svuint8_t svqsub_u8_m(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_u32_m))) svuint32_t svqsub_u32_m(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_u64_m))) svuint64_t svqsub_u64_m(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_u16_m))) svuint16_t svqsub_u16_m(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_u8_x))) svuint8_t svqsub_u8_x(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_u32_x))) svuint32_t svqsub_u32_x(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_u64_x))) svuint64_t svqsub_u64_x(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_u16_x))) svuint16_t svqsub_u16_x(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_u8_z))) svuint8_t svqsub_u8_z(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_u32_z))) svuint32_t svqsub_u32_z(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_u64_z))) svuint64_t svqsub_u64_z(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_u16_z))) svuint16_t svqsub_u16_z(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_s8_m))) svint8_t svqsubr_n_s8_m(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_s32_m))) svint32_t svqsubr_n_s32_m(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_s64_m))) svint64_t svqsubr_n_s64_m(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_s16_m))) svint16_t svqsubr_n_s16_m(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_s8_x))) svint8_t svqsubr_n_s8_x(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_s32_x))) svint32_t svqsubr_n_s32_x(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_s64_x))) svint64_t svqsubr_n_s64_x(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_s16_x))) svint16_t svqsubr_n_s16_x(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_s8_z))) svint8_t svqsubr_n_s8_z(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_s32_z))) svint32_t svqsubr_n_s32_z(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_s64_z))) svint64_t svqsubr_n_s64_z(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_s16_z))) svint16_t svqsubr_n_s16_z(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_u8_m))) svuint8_t svqsubr_n_u8_m(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_u32_m))) svuint32_t svqsubr_n_u32_m(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_u64_m))) svuint64_t svqsubr_n_u64_m(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_u16_m))) svuint16_t svqsubr_n_u16_m(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_u8_x))) svuint8_t svqsubr_n_u8_x(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_u32_x))) svuint32_t svqsubr_n_u32_x(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_u64_x))) svuint64_t svqsubr_n_u64_x(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_u16_x))) svuint16_t svqsubr_n_u16_x(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_u8_z))) svuint8_t svqsubr_n_u8_z(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_u32_z))) svuint32_t svqsubr_n_u32_z(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_u64_z))) svuint64_t svqsubr_n_u64_z(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_u16_z))) svuint16_t svqsubr_n_u16_z(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_s8_m))) svint8_t svqsubr_s8_m(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_s32_m))) svint32_t svqsubr_s32_m(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_s64_m))) svint64_t svqsubr_s64_m(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_s16_m))) svint16_t svqsubr_s16_m(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_s8_x))) svint8_t svqsubr_s8_x(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_s32_x))) svint32_t svqsubr_s32_x(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_s64_x))) svint64_t svqsubr_s64_x(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_s16_x))) svint16_t svqsubr_s16_x(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_s8_z))) svint8_t svqsubr_s8_z(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_s32_z))) svint32_t svqsubr_s32_z(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_s64_z))) svint64_t svqsubr_s64_z(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_s16_z))) svint16_t svqsubr_s16_z(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_u8_m))) svuint8_t svqsubr_u8_m(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_u32_m))) svuint32_t svqsubr_u32_m(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_u64_m))) svuint64_t svqsubr_u64_m(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_u16_m))) svuint16_t svqsubr_u16_m(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_u8_x))) svuint8_t svqsubr_u8_x(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_u32_x))) svuint32_t svqsubr_u32_x(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_u64_x))) svuint64_t svqsubr_u64_x(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_u16_x))) svuint16_t svqsubr_u16_x(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_u8_z))) svuint8_t svqsubr_u8_z(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_u32_z))) svuint32_t svqsubr_u32_z(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_u64_z))) svuint64_t svqsubr_u64_z(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_u16_z))) svuint16_t svqsubr_u16_z(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqxtnb_s32))) svint16_t svqxtnb_s32(svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqxtnb_s64))) svint32_t svqxtnb_s64(svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqxtnb_s16))) svint8_t svqxtnb_s16(svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqxtnb_u32))) svuint16_t svqxtnb_u32(svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqxtnb_u64))) svuint32_t svqxtnb_u64(svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqxtnb_u16))) svuint8_t svqxtnb_u16(svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqxtnt_s32))) svint16_t svqxtnt_s32(svint16_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqxtnt_s64))) svint32_t svqxtnt_s64(svint32_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqxtnt_s16))) svint8_t svqxtnt_s16(svint8_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqxtnt_u32))) svuint16_t svqxtnt_u32(svuint16_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqxtnt_u64))) svuint32_t svqxtnt_u64(svuint32_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqxtnt_u16))) svuint8_t svqxtnt_u16(svuint8_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqxtunb_s32))) svuint16_t svqxtunb_s32(svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqxtunb_s64))) svuint32_t svqxtunb_s64(svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqxtunb_s16))) svuint8_t svqxtunb_s16(svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqxtunt_s32))) svuint16_t svqxtunt_s32(svuint16_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqxtunt_s64))) svuint32_t svqxtunt_s64(svuint32_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqxtunt_s16))) svuint8_t svqxtunt_s16(svuint8_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnb_n_u32))) svuint16_t svraddhnb_n_u32(svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnb_n_u64))) svuint32_t svraddhnb_n_u64(svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnb_n_u16))) svuint8_t svraddhnb_n_u16(svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnb_n_s32))) svint16_t svraddhnb_n_s32(svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnb_n_s64))) svint32_t svraddhnb_n_s64(svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnb_n_s16))) svint8_t svraddhnb_n_s16(svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnb_u32))) svuint16_t svraddhnb_u32(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnb_u64))) svuint32_t svraddhnb_u64(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnb_u16))) svuint8_t svraddhnb_u16(svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnb_s32))) svint16_t svraddhnb_s32(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnb_s64))) svint32_t svraddhnb_s64(svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnb_s16))) svint8_t svraddhnb_s16(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnt_n_u32))) svuint16_t svraddhnt_n_u32(svuint16_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnt_n_u64))) svuint32_t svraddhnt_n_u64(svuint32_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnt_n_u16))) svuint8_t svraddhnt_n_u16(svuint8_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnt_n_s32))) svint16_t svraddhnt_n_s32(svint16_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnt_n_s64))) svint32_t svraddhnt_n_s64(svint32_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnt_n_s16))) svint8_t svraddhnt_n_s16(svint8_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnt_u32))) svuint16_t svraddhnt_u32(svuint16_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnt_u64))) svuint32_t svraddhnt_u64(svuint32_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnt_u16))) svuint8_t svraddhnt_u16(svuint8_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnt_s32))) svint16_t svraddhnt_s32(svint16_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnt_s64))) svint32_t svraddhnt_s64(svint32_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnt_s16))) svint8_t svraddhnt_s16(svint8_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrecpe_u32_m))) svuint32_t svrecpe_u32_m(svuint32_t, svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrecpe_u32_x))) svuint32_t svrecpe_u32_x(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrecpe_u32_z))) svuint32_t svrecpe_u32_z(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_s8_m))) svint8_t svrhadd_n_s8_m(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_s32_m))) svint32_t svrhadd_n_s32_m(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_s64_m))) svint64_t svrhadd_n_s64_m(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_s16_m))) svint16_t svrhadd_n_s16_m(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_s8_x))) svint8_t svrhadd_n_s8_x(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_s32_x))) svint32_t svrhadd_n_s32_x(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_s64_x))) svint64_t svrhadd_n_s64_x(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_s16_x))) svint16_t svrhadd_n_s16_x(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_s8_z))) svint8_t svrhadd_n_s8_z(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_s32_z))) svint32_t svrhadd_n_s32_z(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_s64_z))) svint64_t svrhadd_n_s64_z(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_s16_z))) svint16_t svrhadd_n_s16_z(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_u8_m))) svuint8_t svrhadd_n_u8_m(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_u32_m))) svuint32_t svrhadd_n_u32_m(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_u64_m))) svuint64_t svrhadd_n_u64_m(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_u16_m))) svuint16_t svrhadd_n_u16_m(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_u8_x))) svuint8_t svrhadd_n_u8_x(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_u32_x))) svuint32_t svrhadd_n_u32_x(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_u64_x))) svuint64_t svrhadd_n_u64_x(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_u16_x))) svuint16_t svrhadd_n_u16_x(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_u8_z))) svuint8_t svrhadd_n_u8_z(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_u32_z))) svuint32_t svrhadd_n_u32_z(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_u64_z))) svuint64_t svrhadd_n_u64_z(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_u16_z))) svuint16_t svrhadd_n_u16_z(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_s8_m))) svint8_t svrhadd_s8_m(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_s32_m))) svint32_t svrhadd_s32_m(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_s64_m))) svint64_t svrhadd_s64_m(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_s16_m))) svint16_t svrhadd_s16_m(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_s8_x))) svint8_t svrhadd_s8_x(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_s32_x))) svint32_t svrhadd_s32_x(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_s64_x))) svint64_t svrhadd_s64_x(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_s16_x))) svint16_t svrhadd_s16_x(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_s8_z))) svint8_t svrhadd_s8_z(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_s32_z))) svint32_t svrhadd_s32_z(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_s64_z))) svint64_t svrhadd_s64_z(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_s16_z))) svint16_t svrhadd_s16_z(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_u8_m))) svuint8_t svrhadd_u8_m(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_u32_m))) svuint32_t svrhadd_u32_m(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_u64_m))) svuint64_t svrhadd_u64_m(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_u16_m))) svuint16_t svrhadd_u16_m(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_u8_x))) svuint8_t svrhadd_u8_x(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_u32_x))) svuint32_t svrhadd_u32_x(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_u64_x))) svuint64_t svrhadd_u64_x(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_u16_x))) svuint16_t svrhadd_u16_x(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_u8_z))) svuint8_t svrhadd_u8_z(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_u32_z))) svuint32_t svrhadd_u32_z(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_u64_z))) svuint64_t svrhadd_u64_z(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_u16_z))) svuint16_t svrhadd_u16_z(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_s8_m))) svint8_t svrshl_n_s8_m(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_s32_m))) svint32_t svrshl_n_s32_m(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_s64_m))) svint64_t svrshl_n_s64_m(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_s16_m))) svint16_t svrshl_n_s16_m(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_s8_x))) svint8_t svrshl_n_s8_x(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_s32_x))) svint32_t svrshl_n_s32_x(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_s64_x))) svint64_t svrshl_n_s64_x(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_s16_x))) svint16_t svrshl_n_s16_x(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_s8_z))) svint8_t svrshl_n_s8_z(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_s32_z))) svint32_t svrshl_n_s32_z(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_s64_z))) svint64_t svrshl_n_s64_z(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_s16_z))) svint16_t svrshl_n_s16_z(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_u8_m))) svuint8_t svrshl_n_u8_m(svbool_t, svuint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_u32_m))) svuint32_t svrshl_n_u32_m(svbool_t, svuint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_u64_m))) svuint64_t svrshl_n_u64_m(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_u16_m))) svuint16_t svrshl_n_u16_m(svbool_t, svuint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_u8_x))) svuint8_t svrshl_n_u8_x(svbool_t, svuint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_u32_x))) svuint32_t svrshl_n_u32_x(svbool_t, svuint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_u64_x))) svuint64_t svrshl_n_u64_x(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_u16_x))) svuint16_t svrshl_n_u16_x(svbool_t, svuint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_u8_z))) svuint8_t svrshl_n_u8_z(svbool_t, svuint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_u32_z))) svuint32_t svrshl_n_u32_z(svbool_t, svuint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_u64_z))) svuint64_t svrshl_n_u64_z(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_u16_z))) svuint16_t svrshl_n_u16_z(svbool_t, svuint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s8_m))) svint8_t svrshl_s8_m(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s32_m))) svint32_t svrshl_s32_m(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s64_m))) svint64_t svrshl_s64_m(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s16_m))) svint16_t svrshl_s16_m(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s8_x))) svint8_t svrshl_s8_x(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s32_x))) svint32_t svrshl_s32_x(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s64_x))) svint64_t svrshl_s64_x(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s16_x))) svint16_t svrshl_s16_x(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s8_z))) svint8_t svrshl_s8_z(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s32_z))) svint32_t svrshl_s32_z(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s64_z))) svint64_t svrshl_s64_z(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s16_z))) svint16_t svrshl_s16_z(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u8_m))) svuint8_t svrshl_u8_m(svbool_t, svuint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u32_m))) svuint32_t svrshl_u32_m(svbool_t, svuint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u64_m))) svuint64_t svrshl_u64_m(svbool_t, svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u16_m))) svuint16_t svrshl_u16_m(svbool_t, svuint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u8_x))) svuint8_t svrshl_u8_x(svbool_t, svuint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u32_x))) svuint32_t svrshl_u32_x(svbool_t, svuint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u64_x))) svuint64_t svrshl_u64_x(svbool_t, svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u16_x))) svuint16_t svrshl_u16_x(svbool_t, svuint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u8_z))) svuint8_t svrshl_u8_z(svbool_t, svuint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u32_z))) svuint32_t svrshl_u32_z(svbool_t, svuint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u64_z))) svuint64_t svrshl_u64_z(svbool_t, svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u16_z))) svuint16_t svrshl_u16_z(svbool_t, svuint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_s8_m))) svint8_t svrshr_n_s8_m(svbool_t, svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_s32_m))) svint32_t svrshr_n_s32_m(svbool_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_s64_m))) svint64_t svrshr_n_s64_m(svbool_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_s16_m))) svint16_t svrshr_n_s16_m(svbool_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_u8_m))) svuint8_t svrshr_n_u8_m(svbool_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_u32_m))) svuint32_t svrshr_n_u32_m(svbool_t, svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_u64_m))) svuint64_t svrshr_n_u64_m(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_u16_m))) svuint16_t svrshr_n_u16_m(svbool_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_s8_x))) svint8_t svrshr_n_s8_x(svbool_t, svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_s32_x))) svint32_t svrshr_n_s32_x(svbool_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_s64_x))) svint64_t svrshr_n_s64_x(svbool_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_s16_x))) svint16_t svrshr_n_s16_x(svbool_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_u8_x))) svuint8_t svrshr_n_u8_x(svbool_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_u32_x))) svuint32_t svrshr_n_u32_x(svbool_t, svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_u64_x))) svuint64_t svrshr_n_u64_x(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_u16_x))) svuint16_t svrshr_n_u16_x(svbool_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_s8_z))) svint8_t svrshr_n_s8_z(svbool_t, svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_s32_z))) svint32_t svrshr_n_s32_z(svbool_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_s64_z))) svint64_t svrshr_n_s64_z(svbool_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_s16_z))) svint16_t svrshr_n_s16_z(svbool_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_u8_z))) svuint8_t svrshr_n_u8_z(svbool_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_u32_z))) svuint32_t svrshr_n_u32_z(svbool_t, svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_u64_z))) svuint64_t svrshr_n_u64_z(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_u16_z))) svuint16_t svrshr_n_u16_z(svbool_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshrnb_n_u32))) svuint16_t svrshrnb_n_u32(svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshrnb_n_u64))) svuint32_t svrshrnb_n_u64(svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshrnb_n_u16))) svuint8_t svrshrnb_n_u16(svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshrnb_n_s32))) svint16_t svrshrnb_n_s32(svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshrnb_n_s64))) svint32_t svrshrnb_n_s64(svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshrnb_n_s16))) svint8_t svrshrnb_n_s16(svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshrnt_n_u32))) svuint16_t svrshrnt_n_u32(svuint16_t, svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshrnt_n_u64))) svuint32_t svrshrnt_n_u64(svuint32_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshrnt_n_u16))) svuint8_t svrshrnt_n_u16(svuint8_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshrnt_n_s32))) svint16_t svrshrnt_n_s32(svint16_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshrnt_n_s64))) svint32_t svrshrnt_n_s64(svint32_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshrnt_n_s16))) svint8_t svrshrnt_n_s16(svint8_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsqrte_u32_m))) svuint32_t svrsqrte_u32_m(svuint32_t, svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsqrte_u32_x))) svuint32_t svrsqrte_u32_x(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsqrte_u32_z))) svuint32_t svrsqrte_u32_z(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsra_n_s8))) svint8_t svrsra_n_s8(svint8_t, svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsra_n_s32))) svint32_t svrsra_n_s32(svint32_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsra_n_s64))) svint64_t svrsra_n_s64(svint64_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsra_n_s16))) svint16_t svrsra_n_s16(svint16_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsra_n_u8))) svuint8_t svrsra_n_u8(svuint8_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsra_n_u32))) svuint32_t svrsra_n_u32(svuint32_t, svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsra_n_u64))) svuint64_t svrsra_n_u64(svuint64_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsra_n_u16))) svuint16_t svrsra_n_u16(svuint16_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnb_n_u32))) svuint16_t svrsubhnb_n_u32(svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnb_n_u64))) svuint32_t svrsubhnb_n_u64(svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnb_n_u16))) svuint8_t svrsubhnb_n_u16(svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnb_n_s32))) svint16_t svrsubhnb_n_s32(svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnb_n_s64))) svint32_t svrsubhnb_n_s64(svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnb_n_s16))) svint8_t svrsubhnb_n_s16(svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnb_u32))) svuint16_t svrsubhnb_u32(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnb_u64))) svuint32_t svrsubhnb_u64(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnb_u16))) svuint8_t svrsubhnb_u16(svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnb_s32))) svint16_t svrsubhnb_s32(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnb_s64))) svint32_t svrsubhnb_s64(svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnb_s16))) svint8_t svrsubhnb_s16(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnt_n_u32))) svuint16_t svrsubhnt_n_u32(svuint16_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnt_n_u64))) svuint32_t svrsubhnt_n_u64(svuint32_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnt_n_u16))) svuint8_t svrsubhnt_n_u16(svuint8_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnt_n_s32))) svint16_t svrsubhnt_n_s32(svint16_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnt_n_s64))) svint32_t svrsubhnt_n_s64(svint32_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnt_n_s16))) svint8_t svrsubhnt_n_s16(svint8_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnt_u32))) svuint16_t svrsubhnt_u32(svuint16_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnt_u64))) svuint32_t svrsubhnt_u64(svuint32_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnt_u16))) svuint8_t svrsubhnt_u16(svuint8_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnt_s32))) svint16_t svrsubhnt_s32(svint16_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnt_s64))) svint32_t svrsubhnt_s64(svint32_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnt_s16))) svint8_t svrsubhnt_s16(svint8_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsbclb_n_u32))) svuint32_t svsbclb_n_u32(svuint32_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsbclb_n_u64))) svuint64_t svsbclb_n_u64(svuint64_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsbclb_u32))) svuint32_t svsbclb_u32(svuint32_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsbclb_u64))) svuint64_t svsbclb_u64(svuint64_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsbclt_n_u32))) svuint32_t svsbclt_n_u32(svuint32_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsbclt_n_u64))) svuint64_t svsbclt_n_u64(svuint64_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsbclt_u32))) svuint32_t svsbclt_u32(svuint32_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsbclt_u64))) svuint64_t svsbclt_u64(svuint64_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshllb_n_s32))) svint32_t svshllb_n_s32(svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshllb_n_s64))) svint64_t svshllb_n_s64(svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshllb_n_s16))) svint16_t svshllb_n_s16(svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshllb_n_u32))) svuint32_t svshllb_n_u32(svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshllb_n_u64))) svuint64_t svshllb_n_u64(svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshllb_n_u16))) svuint16_t svshllb_n_u16(svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshllt_n_s32))) svint32_t svshllt_n_s32(svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshllt_n_s64))) svint64_t svshllt_n_s64(svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshllt_n_s16))) svint16_t svshllt_n_s16(svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshllt_n_u32))) svuint32_t svshllt_n_u32(svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshllt_n_u64))) svuint64_t svshllt_n_u64(svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshllt_n_u16))) svuint16_t svshllt_n_u16(svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshrnb_n_u32))) svuint16_t svshrnb_n_u32(svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshrnb_n_u64))) svuint32_t svshrnb_n_u64(svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshrnb_n_u16))) svuint8_t svshrnb_n_u16(svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshrnb_n_s32))) svint16_t svshrnb_n_s32(svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshrnb_n_s64))) svint32_t svshrnb_n_s64(svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshrnb_n_s16))) svint8_t svshrnb_n_s16(svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshrnt_n_u32))) svuint16_t svshrnt_n_u32(svuint16_t, svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshrnt_n_u64))) svuint32_t svshrnt_n_u64(svuint32_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshrnt_n_u16))) svuint8_t svshrnt_n_u16(svuint8_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshrnt_n_s32))) svint16_t svshrnt_n_s32(svint16_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshrnt_n_s64))) svint32_t svshrnt_n_s64(svint32_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshrnt_n_s16))) svint8_t svshrnt_n_s16(svint8_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsli_n_u8))) svuint8_t svsli_n_u8(svuint8_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsli_n_u32))) svuint32_t svsli_n_u32(svuint32_t, svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsli_n_u64))) svuint64_t svsli_n_u64(svuint64_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsli_n_u16))) svuint16_t svsli_n_u16(svuint16_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsli_n_s8))) svint8_t svsli_n_s8(svint8_t, svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsli_n_s32))) svint32_t svsli_n_s32(svint32_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsli_n_s64))) svint64_t svsli_n_s64(svint64_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsli_n_s16))) svint16_t svsli_n_s16(svint16_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_n_u8_m))) svuint8_t svsqadd_n_u8_m(svbool_t, svuint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_n_u32_m))) svuint32_t svsqadd_n_u32_m(svbool_t, svuint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_n_u64_m))) svuint64_t svsqadd_n_u64_m(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_n_u16_m))) svuint16_t svsqadd_n_u16_m(svbool_t, svuint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_n_u8_x))) svuint8_t svsqadd_n_u8_x(svbool_t, svuint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_n_u32_x))) svuint32_t svsqadd_n_u32_x(svbool_t, svuint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_n_u64_x))) svuint64_t svsqadd_n_u64_x(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_n_u16_x))) svuint16_t svsqadd_n_u16_x(svbool_t, svuint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_n_u8_z))) svuint8_t svsqadd_n_u8_z(svbool_t, svuint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_n_u32_z))) svuint32_t svsqadd_n_u32_z(svbool_t, svuint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_n_u64_z))) svuint64_t svsqadd_n_u64_z(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_n_u16_z))) svuint16_t svsqadd_n_u16_z(svbool_t, svuint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_u8_m))) svuint8_t svsqadd_u8_m(svbool_t, svuint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_u32_m))) svuint32_t svsqadd_u32_m(svbool_t, svuint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_u64_m))) svuint64_t svsqadd_u64_m(svbool_t, svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_u16_m))) svuint16_t svsqadd_u16_m(svbool_t, svuint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_u8_x))) svuint8_t svsqadd_u8_x(svbool_t, svuint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_u32_x))) svuint32_t svsqadd_u32_x(svbool_t, svuint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_u64_x))) svuint64_t svsqadd_u64_x(svbool_t, svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_u16_x))) svuint16_t svsqadd_u16_x(svbool_t, svuint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_u8_z))) svuint8_t svsqadd_u8_z(svbool_t, svuint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_u32_z))) svuint32_t svsqadd_u32_z(svbool_t, svuint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_u64_z))) svuint64_t svsqadd_u64_z(svbool_t, svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_u16_z))) svuint16_t svsqadd_u16_z(svbool_t, svuint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsra_n_s8))) svint8_t svsra_n_s8(svint8_t, svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsra_n_s32))) svint32_t svsra_n_s32(svint32_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsra_n_s64))) svint64_t svsra_n_s64(svint64_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsra_n_s16))) svint16_t svsra_n_s16(svint16_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsra_n_u8))) svuint8_t svsra_n_u8(svuint8_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsra_n_u32))) svuint32_t svsra_n_u32(svuint32_t, svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsra_n_u64))) svuint64_t svsra_n_u64(svuint64_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsra_n_u16))) svuint16_t svsra_n_u16(svuint16_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsri_n_u8))) svuint8_t svsri_n_u8(svuint8_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsri_n_u32))) svuint32_t svsri_n_u32(svuint32_t, svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsri_n_u64))) svuint64_t svsri_n_u64(svuint64_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsri_n_u16))) svuint16_t svsri_n_u16(svuint16_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsri_n_s8))) svint8_t svsri_n_s8(svint8_t, svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsri_n_s32))) svint32_t svsri_n_s32(svint32_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsri_n_s64))) svint64_t svsri_n_s64(svint64_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsri_n_s16))) svint16_t svsri_n_s16(svint16_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_index_u32))) void svstnt1_scatter_u32base_index_u32(svbool_t, svuint32_t, int64_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_index_u64))) void svstnt1_scatter_u64base_index_u64(svbool_t, svuint64_t, int64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_index_f64))) void svstnt1_scatter_u64base_index_f64(svbool_t, svuint64_t, int64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_index_f32))) void svstnt1_scatter_u32base_index_f32(svbool_t, svuint32_t, int64_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_index_s32))) void svstnt1_scatter_u32base_index_s32(svbool_t, svuint32_t, int64_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_index_s64))) void svstnt1_scatter_u64base_index_s64(svbool_t, svuint64_t, int64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_offset_u32))) void svstnt1_scatter_u32base_offset_u32(svbool_t, svuint32_t, int64_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_offset_u64))) void svstnt1_scatter_u64base_offset_u64(svbool_t, svuint64_t, int64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_offset_f64))) void svstnt1_scatter_u64base_offset_f64(svbool_t, svuint64_t, int64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_offset_f32))) void svstnt1_scatter_u32base_offset_f32(svbool_t, svuint32_t, int64_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_offset_s32))) void svstnt1_scatter_u32base_offset_s32(svbool_t, svuint32_t, int64_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_offset_s64))) void svstnt1_scatter_u64base_offset_s64(svbool_t, svuint64_t, int64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_u32))) void svstnt1_scatter_u32base_u32(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_u64))) void svstnt1_scatter_u64base_u64(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_f64))) void svstnt1_scatter_u64base_f64(svbool_t, svuint64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_f32))) void svstnt1_scatter_u32base_f32(svbool_t, svuint32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_s32))) void svstnt1_scatter_u32base_s32(svbool_t, svuint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_s64))) void svstnt1_scatter_u64base_s64(svbool_t, svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_s64index_u64))) void svstnt1_scatter_s64index_u64(svbool_t, uint64_t *, svint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_s64index_f64))) void svstnt1_scatter_s64index_f64(svbool_t, float64_t *, svint64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_s64index_s64))) void svstnt1_scatter_s64index_s64(svbool_t, int64_t *, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64index_u64))) void svstnt1_scatter_u64index_u64(svbool_t, uint64_t *, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64index_f64))) void svstnt1_scatter_u64index_f64(svbool_t, float64_t *, svuint64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64index_s64))) void svstnt1_scatter_u64index_s64(svbool_t, int64_t *, svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32offset_u32))) void svstnt1_scatter_u32offset_u32(svbool_t, uint32_t *, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32offset_f32))) void svstnt1_scatter_u32offset_f32(svbool_t, float32_t *, svuint32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32offset_s32))) void svstnt1_scatter_u32offset_s32(svbool_t, int32_t *, svuint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_s64offset_u64))) void svstnt1_scatter_s64offset_u64(svbool_t, uint64_t *, svint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_s64offset_f64))) void svstnt1_scatter_s64offset_f64(svbool_t, float64_t *, svint64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_s64offset_s64))) void svstnt1_scatter_s64offset_s64(svbool_t, int64_t *, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64offset_u64))) void svstnt1_scatter_u64offset_u64(svbool_t, uint64_t *, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64offset_f64))) void svstnt1_scatter_u64offset_f64(svbool_t, float64_t *, svuint64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64offset_s64))) void svstnt1_scatter_u64offset_s64(svbool_t, int64_t *, svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u32base_offset_u32))) void svstnt1b_scatter_u32base_offset_u32(svbool_t, svuint32_t, int64_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u64base_offset_u64))) void svstnt1b_scatter_u64base_offset_u64(svbool_t, svuint64_t, int64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u32base_offset_s32))) void svstnt1b_scatter_u32base_offset_s32(svbool_t, svuint32_t, int64_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u64base_offset_s64))) void svstnt1b_scatter_u64base_offset_s64(svbool_t, svuint64_t, int64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u32base_u32))) void svstnt1b_scatter_u32base_u32(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u64base_u64))) void svstnt1b_scatter_u64base_u64(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u32base_s32))) void svstnt1b_scatter_u32base_s32(svbool_t, svuint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u64base_s64))) void svstnt1b_scatter_u64base_s64(svbool_t, svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u32offset_s32))) void svstnt1b_scatter_u32offset_s32(svbool_t, int8_t *, svuint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u32offset_u32))) void svstnt1b_scatter_u32offset_u32(svbool_t, uint8_t *, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_s64offset_s64))) void svstnt1b_scatter_s64offset_s64(svbool_t, int8_t *, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_s64offset_u64))) void svstnt1b_scatter_s64offset_u64(svbool_t, uint8_t *, svint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u64offset_s64))) void svstnt1b_scatter_u64offset_s64(svbool_t, int8_t *, svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u64offset_u64))) void svstnt1b_scatter_u64offset_u64(svbool_t, uint8_t *, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32base_index_u32))) void svstnt1h_scatter_u32base_index_u32(svbool_t, svuint32_t, int64_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64base_index_u64))) void svstnt1h_scatter_u64base_index_u64(svbool_t, svuint64_t, int64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32base_index_s32))) void svstnt1h_scatter_u32base_index_s32(svbool_t, svuint32_t, int64_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64base_index_s64))) void svstnt1h_scatter_u64base_index_s64(svbool_t, svuint64_t, int64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32base_offset_u32))) void svstnt1h_scatter_u32base_offset_u32(svbool_t, svuint32_t, int64_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64base_offset_u64))) void svstnt1h_scatter_u64base_offset_u64(svbool_t, svuint64_t, int64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32base_offset_s32))) void svstnt1h_scatter_u32base_offset_s32(svbool_t, svuint32_t, int64_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64base_offset_s64))) void svstnt1h_scatter_u64base_offset_s64(svbool_t, svuint64_t, int64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32base_u32))) void svstnt1h_scatter_u32base_u32(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64base_u64))) void svstnt1h_scatter_u64base_u64(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32base_s32))) void svstnt1h_scatter_u32base_s32(svbool_t, svuint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64base_s64))) void svstnt1h_scatter_u64base_s64(svbool_t, svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_s64index_s64))) void svstnt1h_scatter_s64index_s64(svbool_t, int16_t *, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_s64index_u64))) void svstnt1h_scatter_s64index_u64(svbool_t, uint16_t *, svint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64index_s64))) void svstnt1h_scatter_u64index_s64(svbool_t, int16_t *, svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64index_u64))) void svstnt1h_scatter_u64index_u64(svbool_t, uint16_t *, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32offset_s32))) void svstnt1h_scatter_u32offset_s32(svbool_t, int16_t *, svuint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32offset_u32))) void svstnt1h_scatter_u32offset_u32(svbool_t, uint16_t *, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_s64offset_s64))) void svstnt1h_scatter_s64offset_s64(svbool_t, int16_t *, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_s64offset_u64))) void svstnt1h_scatter_s64offset_u64(svbool_t, uint16_t *, svint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64offset_s64))) void svstnt1h_scatter_u64offset_s64(svbool_t, int16_t *, svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64offset_u64))) void svstnt1h_scatter_u64offset_u64(svbool_t, uint16_t *, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64base_index_u64))) void svstnt1w_scatter_u64base_index_u64(svbool_t, svuint64_t, int64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64base_index_s64))) void svstnt1w_scatter_u64base_index_s64(svbool_t, svuint64_t, int64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64base_offset_u64))) void svstnt1w_scatter_u64base_offset_u64(svbool_t, svuint64_t, int64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64base_offset_s64))) void svstnt1w_scatter_u64base_offset_s64(svbool_t, svuint64_t, int64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64base_u64))) void svstnt1w_scatter_u64base_u64(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64base_s64))) void svstnt1w_scatter_u64base_s64(svbool_t, svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_s64index_s64))) void svstnt1w_scatter_s64index_s64(svbool_t, int32_t *, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_s64index_u64))) void svstnt1w_scatter_s64index_u64(svbool_t, uint32_t *, svint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64index_s64))) void svstnt1w_scatter_u64index_s64(svbool_t, int32_t *, svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64index_u64))) void svstnt1w_scatter_u64index_u64(svbool_t, uint32_t *, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_s64offset_s64))) void svstnt1w_scatter_s64offset_s64(svbool_t, int32_t *, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_s64offset_u64))) void svstnt1w_scatter_s64offset_u64(svbool_t, uint32_t *, svint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64offset_s64))) void svstnt1w_scatter_u64offset_s64(svbool_t, int32_t *, svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64offset_u64))) void svstnt1w_scatter_u64offset_u64(svbool_t, uint32_t *, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnb_n_u32))) svuint16_t svsubhnb_n_u32(svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnb_n_u64))) svuint32_t svsubhnb_n_u64(svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnb_n_u16))) svuint8_t svsubhnb_n_u16(svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnb_n_s32))) svint16_t svsubhnb_n_s32(svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnb_n_s64))) svint32_t svsubhnb_n_s64(svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnb_n_s16))) svint8_t svsubhnb_n_s16(svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnb_u32))) svuint16_t svsubhnb_u32(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnb_u64))) svuint32_t svsubhnb_u64(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnb_u16))) svuint8_t svsubhnb_u16(svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnb_s32))) svint16_t svsubhnb_s32(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnb_s64))) svint32_t svsubhnb_s64(svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnb_s16))) svint8_t svsubhnb_s16(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnt_n_u32))) svuint16_t svsubhnt_n_u32(svuint16_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnt_n_u64))) svuint32_t svsubhnt_n_u64(svuint32_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnt_n_u16))) svuint8_t svsubhnt_n_u16(svuint8_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnt_n_s32))) svint16_t svsubhnt_n_s32(svint16_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnt_n_s64))) svint32_t svsubhnt_n_s64(svint32_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnt_n_s16))) svint8_t svsubhnt_n_s16(svint8_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnt_u32))) svuint16_t svsubhnt_u32(svuint16_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnt_u64))) svuint32_t svsubhnt_u64(svuint32_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnt_u16))) svuint8_t svsubhnt_u16(svuint8_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnt_s32))) svint16_t svsubhnt_s32(svint16_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnt_s64))) svint32_t svsubhnt_s64(svint32_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnt_s16))) svint8_t svsubhnt_s16(svint8_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublb_n_s32))) svint32_t svsublb_n_s32(svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublb_n_s64))) svint64_t svsublb_n_s64(svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublb_n_s16))) svint16_t svsublb_n_s16(svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublb_n_u32))) svuint32_t svsublb_n_u32(svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublb_n_u64))) svuint64_t svsublb_n_u64(svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublb_n_u16))) svuint16_t svsublb_n_u16(svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublb_s32))) svint32_t svsublb_s32(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublb_s64))) svint64_t svsublb_s64(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublb_s16))) svint16_t svsublb_s16(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublb_u32))) svuint32_t svsublb_u32(svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublb_u64))) svuint64_t svsublb_u64(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublb_u16))) svuint16_t svsublb_u16(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublbt_n_s32))) svint32_t svsublbt_n_s32(svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublbt_n_s64))) svint64_t svsublbt_n_s64(svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublbt_n_s16))) svint16_t svsublbt_n_s16(svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublbt_s32))) svint32_t svsublbt_s32(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublbt_s64))) svint64_t svsublbt_s64(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublbt_s16))) svint16_t svsublbt_s16(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublt_n_s32))) svint32_t svsublt_n_s32(svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublt_n_s64))) svint64_t svsublt_n_s64(svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublt_n_s16))) svint16_t svsublt_n_s16(svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublt_n_u32))) svuint32_t svsublt_n_u32(svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublt_n_u64))) svuint64_t svsublt_n_u64(svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublt_n_u16))) svuint16_t svsublt_n_u16(svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublt_s32))) svint32_t svsublt_s32(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublt_s64))) svint64_t svsublt_s64(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublt_s16))) svint16_t svsublt_s16(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublt_u32))) svuint32_t svsublt_u32(svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublt_u64))) svuint64_t svsublt_u64(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublt_u16))) svuint16_t svsublt_u16(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubltb_n_s32))) svint32_t svsubltb_n_s32(svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubltb_n_s64))) svint64_t svsubltb_n_s64(svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubltb_n_s16))) svint16_t svsubltb_n_s16(svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubltb_s32))) svint32_t svsubltb_s32(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubltb_s64))) svint64_t svsubltb_s64(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubltb_s16))) svint16_t svsubltb_s16(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwb_n_s32))) svint32_t svsubwb_n_s32(svint32_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwb_n_s64))) svint64_t svsubwb_n_s64(svint64_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwb_n_s16))) svint16_t svsubwb_n_s16(svint16_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwb_n_u32))) svuint32_t svsubwb_n_u32(svuint32_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwb_n_u64))) svuint64_t svsubwb_n_u64(svuint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwb_n_u16))) svuint16_t svsubwb_n_u16(svuint16_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwb_s32))) svint32_t svsubwb_s32(svint32_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwb_s64))) svint64_t svsubwb_s64(svint64_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwb_s16))) svint16_t svsubwb_s16(svint16_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwb_u32))) svuint32_t svsubwb_u32(svuint32_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwb_u64))) svuint64_t svsubwb_u64(svuint64_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwb_u16))) svuint16_t svsubwb_u16(svuint16_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwt_n_s32))) svint32_t svsubwt_n_s32(svint32_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwt_n_s64))) svint64_t svsubwt_n_s64(svint64_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwt_n_s16))) svint16_t svsubwt_n_s16(svint16_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwt_n_u32))) svuint32_t svsubwt_n_u32(svuint32_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwt_n_u64))) svuint64_t svsubwt_n_u64(svuint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwt_n_u16))) svuint16_t svsubwt_n_u16(svuint16_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwt_s32))) svint32_t svsubwt_s32(svint32_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwt_s64))) svint64_t svsubwt_s64(svint64_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwt_s16))) svint16_t svsubwt_s16(svint16_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwt_u32))) svuint32_t svsubwt_u32(svuint32_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwt_u64))) svuint64_t svsubwt_u64(svuint64_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwt_u16))) svuint16_t svsubwt_u16(svuint16_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl2_u8))) svuint8_t svtbl2_u8(svuint8x2_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl2_u32))) svuint32_t svtbl2_u32(svuint32x2_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl2_u64))) svuint64_t svtbl2_u64(svuint64x2_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl2_u16))) svuint16_t svtbl2_u16(svuint16x2_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl2_s8))) svint8_t svtbl2_s8(svint8x2_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl2_f64))) svfloat64_t svtbl2_f64(svfloat64x2_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl2_f32))) svfloat32_t svtbl2_f32(svfloat32x2_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl2_f16))) svfloat16_t svtbl2_f16(svfloat16x2_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl2_s32))) svint32_t svtbl2_s32(svint32x2_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl2_s64))) svint64_t svtbl2_s64(svint64x2_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl2_s16))) svint16_t svtbl2_s16(svint16x2_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbx_u8))) svuint8_t svtbx_u8(svuint8_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbx_u32))) svuint32_t svtbx_u32(svuint32_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbx_u64))) svuint64_t svtbx_u64(svuint64_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbx_u16))) svuint16_t svtbx_u16(svuint16_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbx_s8))) svint8_t svtbx_s8(svint8_t, svint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbx_f64))) svfloat64_t svtbx_f64(svfloat64_t, svfloat64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbx_f32))) svfloat32_t svtbx_f32(svfloat32_t, svfloat32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbx_f16))) svfloat16_t svtbx_f16(svfloat16_t, svfloat16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbx_s32))) svint32_t svtbx_s32(svint32_t, svint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbx_s64))) svint64_t svtbx_s64(svint64_t, svint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbx_s16))) svint16_t svtbx_s16(svint16_t, svint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_n_s8_m))) svint8_t svuqadd_n_s8_m(svbool_t, svint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_n_s32_m))) svint32_t svuqadd_n_s32_m(svbool_t, svint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_n_s64_m))) svint64_t svuqadd_n_s64_m(svbool_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_n_s16_m))) svint16_t svuqadd_n_s16_m(svbool_t, svint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_n_s8_x))) svint8_t svuqadd_n_s8_x(svbool_t, svint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_n_s32_x))) svint32_t svuqadd_n_s32_x(svbool_t, svint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_n_s64_x))) svint64_t svuqadd_n_s64_x(svbool_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_n_s16_x))) svint16_t svuqadd_n_s16_x(svbool_t, svint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_n_s8_z))) svint8_t svuqadd_n_s8_z(svbool_t, svint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_n_s32_z))) svint32_t svuqadd_n_s32_z(svbool_t, svint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_n_s64_z))) svint64_t svuqadd_n_s64_z(svbool_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_n_s16_z))) svint16_t svuqadd_n_s16_z(svbool_t, svint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_s8_m))) svint8_t svuqadd_s8_m(svbool_t, svint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_s32_m))) svint32_t svuqadd_s32_m(svbool_t, svint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_s64_m))) svint64_t svuqadd_s64_m(svbool_t, svint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_s16_m))) svint16_t svuqadd_s16_m(svbool_t, svint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_s8_x))) svint8_t svuqadd_s8_x(svbool_t, svint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_s32_x))) svint32_t svuqadd_s32_x(svbool_t, svint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_s64_x))) svint64_t svuqadd_s64_x(svbool_t, svint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_s16_x))) svint16_t svuqadd_s16_x(svbool_t, svint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_s8_z))) svint8_t svuqadd_s8_z(svbool_t, svint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_s32_z))) svint32_t svuqadd_s32_z(svbool_t, svint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_s64_z))) svint64_t svuqadd_s64_z(svbool_t, svint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_s16_z))) svint16_t svuqadd_s16_z(svbool_t, svint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b8_s32))) svbool_t svwhilege_b8_s32(int32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b32_s32))) svbool_t svwhilege_b32_s32(int32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b64_s32))) svbool_t svwhilege_b64_s32(int32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b16_s32))) svbool_t svwhilege_b16_s32(int32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b8_s64))) svbool_t svwhilege_b8_s64(int64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b32_s64))) svbool_t svwhilege_b32_s64(int64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b64_s64))) svbool_t svwhilege_b64_s64(int64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b16_s64))) svbool_t svwhilege_b16_s64(int64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b8_u32))) svbool_t svwhilege_b8_u32(uint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b32_u32))) svbool_t svwhilege_b32_u32(uint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b64_u32))) svbool_t svwhilege_b64_u32(uint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b16_u32))) svbool_t svwhilege_b16_u32(uint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b8_u64))) svbool_t svwhilege_b8_u64(uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b32_u64))) svbool_t svwhilege_b32_u64(uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b64_u64))) svbool_t svwhilege_b64_u64(uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b16_u64))) svbool_t svwhilege_b16_u64(uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b8_s32))) svbool_t svwhilegt_b8_s32(int32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b32_s32))) svbool_t svwhilegt_b32_s32(int32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b64_s32))) svbool_t svwhilegt_b64_s32(int32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b16_s32))) svbool_t svwhilegt_b16_s32(int32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b8_s64))) svbool_t svwhilegt_b8_s64(int64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b32_s64))) svbool_t svwhilegt_b32_s64(int64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b64_s64))) svbool_t svwhilegt_b64_s64(int64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b16_s64))) svbool_t svwhilegt_b16_s64(int64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b8_u32))) svbool_t svwhilegt_b8_u32(uint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b32_u32))) svbool_t svwhilegt_b32_u32(uint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b64_u32))) svbool_t svwhilegt_b64_u32(uint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b16_u32))) svbool_t svwhilegt_b16_u32(uint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b8_u64))) svbool_t svwhilegt_b8_u64(uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b32_u64))) svbool_t svwhilegt_b32_u64(uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b64_u64))) svbool_t svwhilegt_b64_u64(uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b16_u64))) svbool_t svwhilegt_b16_u64(uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilerw_u8))) svbool_t svwhilerw_u8(uint8_t const *, uint8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilerw_s8))) svbool_t svwhilerw_s8(int8_t const *, int8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilerw_u64))) svbool_t svwhilerw_u64(uint64_t const *, uint64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilerw_f64))) svbool_t svwhilerw_f64(float64_t const *, float64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilerw_s64))) svbool_t svwhilerw_s64(int64_t const *, int64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilerw_u16))) svbool_t svwhilerw_u16(uint16_t const *, uint16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilerw_f16))) svbool_t svwhilerw_f16(float16_t const *, float16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilerw_s16))) svbool_t svwhilerw_s16(int16_t const *, int16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilerw_u32))) svbool_t svwhilerw_u32(uint32_t const *, uint32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilerw_f32))) svbool_t svwhilerw_f32(float32_t const *, float32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilerw_s32))) svbool_t svwhilerw_s32(int32_t const *, int32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilewr_u8))) svbool_t svwhilewr_u8(uint8_t const *, uint8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilewr_s8))) svbool_t svwhilewr_s8(int8_t const *, int8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilewr_u64))) svbool_t svwhilewr_u64(uint64_t const *, uint64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilewr_f64))) svbool_t svwhilewr_f64(float64_t const *, float64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilewr_s64))) svbool_t svwhilewr_s64(int64_t const *, int64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilewr_u16))) svbool_t svwhilewr_u16(uint16_t const *, uint16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilewr_f16))) svbool_t svwhilewr_f16(float16_t const *, float16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilewr_s16))) svbool_t svwhilewr_s16(int16_t const *, int16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilewr_u32))) svbool_t svwhilewr_u32(uint32_t const *, uint32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilewr_f32))) svbool_t svwhilewr_f32(float32_t const *, float32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilewr_s32))) svbool_t svwhilewr_s32(int32_t const *, int32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svxar_n_u8))) svuint8_t svxar_n_u8(svuint8_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svxar_n_u32))) svuint32_t svxar_n_u32(svuint32_t, svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svxar_n_u64))) svuint64_t svxar_n_u64(svuint64_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svxar_n_u16))) svuint16_t svxar_n_u16(svuint16_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svxar_n_s8))) svint8_t svxar_n_s8(svint8_t, svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svxar_n_s32))) svint32_t svxar_n_s32(svint32_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svxar_n_s64))) svint64_t svxar_n_s64(svint64_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svxar_n_s16))) svint16_t svxar_n_s16(svint16_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaba_n_s8))) svint8_t svaba(svint8_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaba_n_s32))) svint32_t svaba(svint32_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaba_n_s64))) svint64_t svaba(svint64_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaba_n_s16))) svint16_t svaba(svint16_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaba_n_u8))) svuint8_t svaba(svuint8_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaba_n_u32))) svuint32_t svaba(svuint32_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaba_n_u64))) svuint64_t svaba(svuint64_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaba_n_u16))) svuint16_t svaba(svuint16_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaba_s8))) svint8_t svaba(svint8_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaba_s32))) svint32_t svaba(svint32_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaba_s64))) svint64_t svaba(svint64_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaba_s16))) svint16_t svaba(svint16_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaba_u8))) svuint8_t svaba(svuint8_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaba_u32))) svuint32_t svaba(svuint32_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaba_u64))) svuint64_t svaba(svuint64_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaba_u16))) svuint16_t svaba(svuint16_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalb_n_s32))) svint32_t svabalb(svint32_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalb_n_s64))) svint64_t svabalb(svint64_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalb_n_s16))) svint16_t svabalb(svint16_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalb_n_u32))) svuint32_t svabalb(svuint32_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalb_n_u64))) svuint64_t svabalb(svuint64_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalb_n_u16))) svuint16_t svabalb(svuint16_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalb_s32))) svint32_t svabalb(svint32_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalb_s64))) svint64_t svabalb(svint64_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalb_s16))) svint16_t svabalb(svint16_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalb_u32))) svuint32_t svabalb(svuint32_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalb_u64))) svuint64_t svabalb(svuint64_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalb_u16))) svuint16_t svabalb(svuint16_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalt_n_s32))) svint32_t svabalt(svint32_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalt_n_s64))) svint64_t svabalt(svint64_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalt_n_s16))) svint16_t svabalt(svint16_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalt_n_u32))) svuint32_t svabalt(svuint32_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalt_n_u64))) svuint64_t svabalt(svuint64_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalt_n_u16))) svuint16_t svabalt(svuint16_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalt_s32))) svint32_t svabalt(svint32_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalt_s64))) svint64_t svabalt(svint64_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalt_s16))) svint16_t svabalt(svint16_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalt_u32))) svuint32_t svabalt(svuint32_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalt_u64))) svuint64_t svabalt(svuint64_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalt_u16))) svuint16_t svabalt(svuint16_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlb_n_s32))) svint32_t svabdlb(svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlb_n_s64))) svint64_t svabdlb(svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlb_n_s16))) svint16_t svabdlb(svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlb_n_u32))) svuint32_t svabdlb(svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlb_n_u64))) svuint64_t svabdlb(svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlb_n_u16))) svuint16_t svabdlb(svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlb_s32))) svint32_t svabdlb(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlb_s64))) svint64_t svabdlb(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlb_s16))) svint16_t svabdlb(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlb_u32))) svuint32_t svabdlb(svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlb_u64))) svuint64_t svabdlb(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlb_u16))) svuint16_t svabdlb(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlt_n_s32))) svint32_t svabdlt(svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlt_n_s64))) svint64_t svabdlt(svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlt_n_s16))) svint16_t svabdlt(svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlt_n_u32))) svuint32_t svabdlt(svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlt_n_u64))) svuint64_t svabdlt(svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlt_n_u16))) svuint16_t svabdlt(svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlt_s32))) svint32_t svabdlt(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlt_s64))) svint64_t svabdlt(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlt_s16))) svint16_t svabdlt(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlt_u32))) svuint32_t svabdlt(svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlt_u64))) svuint64_t svabdlt(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlt_u16))) svuint16_t svabdlt(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadalp_s32_m))) svint32_t svadalp_m(svbool_t, svint32_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadalp_s64_m))) svint64_t svadalp_m(svbool_t, svint64_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadalp_s16_m))) svint16_t svadalp_m(svbool_t, svint16_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadalp_s32_x))) svint32_t svadalp_x(svbool_t, svint32_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadalp_s64_x))) svint64_t svadalp_x(svbool_t, svint64_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadalp_s16_x))) svint16_t svadalp_x(svbool_t, svint16_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadalp_s32_z))) svint32_t svadalp_z(svbool_t, svint32_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadalp_s64_z))) svint64_t svadalp_z(svbool_t, svint64_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadalp_s16_z))) svint16_t svadalp_z(svbool_t, svint16_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadalp_u32_m))) svuint32_t svadalp_m(svbool_t, svuint32_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadalp_u64_m))) svuint64_t svadalp_m(svbool_t, svuint64_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadalp_u16_m))) svuint16_t svadalp_m(svbool_t, svuint16_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadalp_u32_x))) svuint32_t svadalp_x(svbool_t, svuint32_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadalp_u64_x))) svuint64_t svadalp_x(svbool_t, svuint64_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadalp_u16_x))) svuint16_t svadalp_x(svbool_t, svuint16_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadalp_u32_z))) svuint32_t svadalp_z(svbool_t, svuint32_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadalp_u64_z))) svuint64_t svadalp_z(svbool_t, svuint64_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadalp_u16_z))) svuint16_t svadalp_z(svbool_t, svuint16_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadclb_n_u32))) svuint32_t svadclb(svuint32_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadclb_n_u64))) svuint64_t svadclb(svuint64_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadclb_u32))) svuint32_t svadclb(svuint32_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadclb_u64))) svuint64_t svadclb(svuint64_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadclt_n_u32))) svuint32_t svadclt(svuint32_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadclt_n_u64))) svuint64_t svadclt(svuint64_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadclt_u32))) svuint32_t svadclt(svuint32_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadclt_u64))) svuint64_t svadclt(svuint64_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnb_n_u32))) svuint16_t svaddhnb(svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnb_n_u64))) svuint32_t svaddhnb(svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnb_n_u16))) svuint8_t svaddhnb(svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnb_n_s32))) svint16_t svaddhnb(svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnb_n_s64))) svint32_t svaddhnb(svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnb_n_s16))) svint8_t svaddhnb(svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnb_u32))) svuint16_t svaddhnb(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnb_u64))) svuint32_t svaddhnb(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnb_u16))) svuint8_t svaddhnb(svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnb_s32))) svint16_t svaddhnb(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnb_s64))) svint32_t svaddhnb(svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnb_s16))) svint8_t svaddhnb(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnt_n_u32))) svuint16_t svaddhnt(svuint16_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnt_n_u64))) svuint32_t svaddhnt(svuint32_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnt_n_u16))) svuint8_t svaddhnt(svuint8_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnt_n_s32))) svint16_t svaddhnt(svint16_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnt_n_s64))) svint32_t svaddhnt(svint32_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnt_n_s16))) svint8_t svaddhnt(svint8_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnt_u32))) svuint16_t svaddhnt(svuint16_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnt_u64))) svuint32_t svaddhnt(svuint32_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnt_u16))) svuint8_t svaddhnt(svuint8_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnt_s32))) svint16_t svaddhnt(svint16_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnt_s64))) svint32_t svaddhnt(svint32_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnt_s16))) svint8_t svaddhnt(svint8_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlb_n_s32))) svint32_t svaddlb(svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlb_n_s64))) svint64_t svaddlb(svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlb_n_s16))) svint16_t svaddlb(svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlb_n_u32))) svuint32_t svaddlb(svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlb_n_u64))) svuint64_t svaddlb(svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlb_n_u16))) svuint16_t svaddlb(svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlb_s32))) svint32_t svaddlb(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlb_s64))) svint64_t svaddlb(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlb_s16))) svint16_t svaddlb(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlb_u32))) svuint32_t svaddlb(svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlb_u64))) svuint64_t svaddlb(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlb_u16))) svuint16_t svaddlb(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlbt_n_s32))) svint32_t svaddlbt(svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlbt_n_s64))) svint64_t svaddlbt(svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlbt_n_s16))) svint16_t svaddlbt(svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlbt_s32))) svint32_t svaddlbt(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlbt_s64))) svint64_t svaddlbt(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlbt_s16))) svint16_t svaddlbt(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlt_n_s32))) svint32_t svaddlt(svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlt_n_s64))) svint64_t svaddlt(svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlt_n_s16))) svint16_t svaddlt(svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlt_n_u32))) svuint32_t svaddlt(svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlt_n_u64))) svuint64_t svaddlt(svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlt_n_u16))) svuint16_t svaddlt(svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlt_s32))) svint32_t svaddlt(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlt_s64))) svint64_t svaddlt(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlt_s16))) svint16_t svaddlt(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlt_u32))) svuint32_t svaddlt(svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlt_u64))) svuint64_t svaddlt(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlt_u16))) svuint16_t svaddlt(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_f64_m))) svfloat64_t svaddp_m(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_f32_m))) svfloat32_t svaddp_m(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_f16_m))) svfloat16_t svaddp_m(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_f64_x))) svfloat64_t svaddp_x(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_f32_x))) svfloat32_t svaddp_x(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_f16_x))) svfloat16_t svaddp_x(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_u8_m))) svuint8_t svaddp_m(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_u32_m))) svuint32_t svaddp_m(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_u64_m))) svuint64_t svaddp_m(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_u16_m))) svuint16_t svaddp_m(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_s8_m))) svint8_t svaddp_m(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_s32_m))) svint32_t svaddp_m(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_s64_m))) svint64_t svaddp_m(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_s16_m))) svint16_t svaddp_m(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_u8_x))) svuint8_t svaddp_x(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_u32_x))) svuint32_t svaddp_x(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_u64_x))) svuint64_t svaddp_x(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_u16_x))) svuint16_t svaddp_x(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_s8_x))) svint8_t svaddp_x(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_s32_x))) svint32_t svaddp_x(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_s64_x))) svint64_t svaddp_x(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_s16_x))) svint16_t svaddp_x(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwb_n_s32))) svint32_t svaddwb(svint32_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwb_n_s64))) svint64_t svaddwb(svint64_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwb_n_s16))) svint16_t svaddwb(svint16_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwb_n_u32))) svuint32_t svaddwb(svuint32_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwb_n_u64))) svuint64_t svaddwb(svuint64_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwb_n_u16))) svuint16_t svaddwb(svuint16_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwb_s32))) svint32_t svaddwb(svint32_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwb_s64))) svint64_t svaddwb(svint64_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwb_s16))) svint16_t svaddwb(svint16_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwb_u32))) svuint32_t svaddwb(svuint32_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwb_u64))) svuint64_t svaddwb(svuint64_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwb_u16))) svuint16_t svaddwb(svuint16_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwt_n_s32))) svint32_t svaddwt(svint32_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwt_n_s64))) svint64_t svaddwt(svint64_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwt_n_s16))) svint16_t svaddwt(svint16_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwt_n_u32))) svuint32_t svaddwt(svuint32_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwt_n_u64))) svuint64_t svaddwt(svuint64_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwt_n_u16))) svuint16_t svaddwt(svuint16_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwt_s32))) svint32_t svaddwt(svint32_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwt_s64))) svint64_t svaddwt(svint64_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwt_s16))) svint16_t svaddwt(svint16_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwt_u32))) svuint32_t svaddwt(svuint32_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwt_u64))) svuint64_t svaddwt(svuint64_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwt_u16))) svuint16_t svaddwt(svuint16_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbcax_n_u8))) svuint8_t svbcax(svuint8_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbcax_n_u32))) svuint32_t svbcax(svuint32_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbcax_n_u64))) svuint64_t svbcax(svuint64_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbcax_n_u16))) svuint16_t svbcax(svuint16_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbcax_n_s8))) svint8_t svbcax(svint8_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbcax_n_s32))) svint32_t svbcax(svint32_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbcax_n_s64))) svint64_t svbcax(svint64_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbcax_n_s16))) svint16_t svbcax(svint16_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbcax_u8))) svuint8_t svbcax(svuint8_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbcax_u32))) svuint32_t svbcax(svuint32_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbcax_u64))) svuint64_t svbcax(svuint64_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbcax_u16))) svuint16_t svbcax(svuint16_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbcax_s8))) svint8_t svbcax(svint8_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbcax_s32))) svint32_t svbcax(svint32_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbcax_s64))) svint64_t svbcax(svint64_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbcax_s16))) svint16_t svbcax(svint16_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl1n_n_u8))) svuint8_t svbsl1n(svuint8_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl1n_n_u32))) svuint32_t svbsl1n(svuint32_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl1n_n_u64))) svuint64_t svbsl1n(svuint64_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl1n_n_u16))) svuint16_t svbsl1n(svuint16_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl1n_n_s8))) svint8_t svbsl1n(svint8_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl1n_n_s32))) svint32_t svbsl1n(svint32_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl1n_n_s64))) svint64_t svbsl1n(svint64_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl1n_n_s16))) svint16_t svbsl1n(svint16_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl1n_u8))) svuint8_t svbsl1n(svuint8_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl1n_u32))) svuint32_t svbsl1n(svuint32_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl1n_u64))) svuint64_t svbsl1n(svuint64_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl1n_u16))) svuint16_t svbsl1n(svuint16_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl1n_s8))) svint8_t svbsl1n(svint8_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl1n_s32))) svint32_t svbsl1n(svint32_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl1n_s64))) svint64_t svbsl1n(svint64_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl1n_s16))) svint16_t svbsl1n(svint16_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl2n_n_u8))) svuint8_t svbsl2n(svuint8_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl2n_n_u32))) svuint32_t svbsl2n(svuint32_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl2n_n_u64))) svuint64_t svbsl2n(svuint64_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl2n_n_u16))) svuint16_t svbsl2n(svuint16_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl2n_n_s8))) svint8_t svbsl2n(svint8_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl2n_n_s32))) svint32_t svbsl2n(svint32_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl2n_n_s64))) svint64_t svbsl2n(svint64_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl2n_n_s16))) svint16_t svbsl2n(svint16_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl2n_u8))) svuint8_t svbsl2n(svuint8_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl2n_u32))) svuint32_t svbsl2n(svuint32_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl2n_u64))) svuint64_t svbsl2n(svuint64_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl2n_u16))) svuint16_t svbsl2n(svuint16_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl2n_s8))) svint8_t svbsl2n(svint8_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl2n_s32))) svint32_t svbsl2n(svint32_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl2n_s64))) svint64_t svbsl2n(svint64_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl2n_s16))) svint16_t svbsl2n(svint16_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl_n_u8))) svuint8_t svbsl(svuint8_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl_n_u32))) svuint32_t svbsl(svuint32_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl_n_u64))) svuint64_t svbsl(svuint64_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl_n_u16))) svuint16_t svbsl(svuint16_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl_n_s8))) svint8_t svbsl(svint8_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl_n_s32))) svint32_t svbsl(svint32_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl_n_s64))) svint64_t svbsl(svint64_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl_n_s16))) svint16_t svbsl(svint16_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl_u8))) svuint8_t svbsl(svuint8_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl_u32))) svuint32_t svbsl(svuint32_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl_u64))) svuint64_t svbsl(svuint64_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl_u16))) svuint16_t svbsl(svuint16_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl_s8))) svint8_t svbsl(svint8_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl_s32))) svint32_t svbsl(svint32_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl_s64))) svint64_t svbsl(svint64_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl_s16))) svint16_t svbsl(svint16_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcadd_u8))) svuint8_t svcadd(svuint8_t, svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcadd_u32))) svuint32_t svcadd(svuint32_t, svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcadd_u64))) svuint64_t svcadd(svuint64_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcadd_u16))) svuint16_t svcadd(svuint16_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcadd_s8))) svint8_t svcadd(svint8_t, svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcadd_s32))) svint32_t svcadd(svint32_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcadd_s64))) svint64_t svcadd(svint64_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcadd_s16))) svint16_t svcadd(svint16_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcdot_s32))) svint32_t svcdot(svint32_t, svint8_t, svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcdot_s64))) svint64_t svcdot(svint64_t, svint16_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcdot_lane_s32))) svint32_t svcdot_lane(svint32_t, svint8_t, svint8_t, uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcdot_lane_s64))) svint64_t svcdot_lane(svint64_t, svint16_t, svint16_t, uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_u8))) svuint8_t svcmla(svuint8_t, svuint8_t, svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_u32))) svuint32_t svcmla(svuint32_t, svuint32_t, svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_u64))) svuint64_t svcmla(svuint64_t, svuint64_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_u16))) svuint16_t svcmla(svuint16_t, svuint16_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_s8))) svint8_t svcmla(svint8_t, svint8_t, svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_s32))) svint32_t svcmla(svint32_t, svint32_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_s64))) svint64_t svcmla(svint64_t, svint64_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_s16))) svint16_t svcmla(svint16_t, svint16_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_lane_u32))) svuint32_t svcmla_lane(svuint32_t, svuint32_t, svuint32_t, uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_lane_u16))) svuint16_t svcmla_lane(svuint16_t, svuint16_t, svuint16_t, uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_lane_s32))) svint32_t svcmla_lane(svint32_t, svint32_t, svint32_t, uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_lane_s16))) svint16_t svcmla_lane(svint16_t, svint16_t, svint16_t, uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtlt_f32_f16_m))) svfloat32_t svcvtlt_f32_m(svfloat32_t, svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtlt_f32_f16_x))) svfloat32_t svcvtlt_f32_x(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtlt_f64_f32_m))) svfloat64_t svcvtlt_f64_m(svfloat64_t, svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtlt_f64_f32_x))) svfloat64_t svcvtlt_f64_x(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtnt_f16_f32_m))) svfloat16_t svcvtnt_f16_m(svfloat16_t, svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtnt_f32_f64_m))) svfloat32_t svcvtnt_f32_m(svfloat32_t, svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtx_f32_f64_m))) svfloat32_t svcvtx_f32_m(svfloat32_t, svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtx_f32_f64_x))) svfloat32_t svcvtx_f32_x(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtx_f32_f64_z))) svfloat32_t svcvtx_f32_z(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtxnt_f32_f64_m))) svfloat32_t svcvtxnt_f32_m(svfloat32_t, svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor3_n_u8))) svuint8_t sveor3(svuint8_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor3_n_u32))) svuint32_t sveor3(svuint32_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor3_n_u64))) svuint64_t sveor3(svuint64_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor3_n_u16))) svuint16_t sveor3(svuint16_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor3_n_s8))) svint8_t sveor3(svint8_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor3_n_s32))) svint32_t sveor3(svint32_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor3_n_s64))) svint64_t sveor3(svint64_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor3_n_s16))) svint16_t sveor3(svint16_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor3_u8))) svuint8_t sveor3(svuint8_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor3_u32))) svuint32_t sveor3(svuint32_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor3_u64))) svuint64_t sveor3(svuint64_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor3_u16))) svuint16_t sveor3(svuint16_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor3_s8))) svint8_t sveor3(svint8_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor3_s32))) svint32_t sveor3(svint32_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor3_s64))) svint64_t sveor3(svint64_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor3_s16))) svint16_t sveor3(svint16_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorbt_n_u8))) svuint8_t sveorbt(svuint8_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorbt_n_u32))) svuint32_t sveorbt(svuint32_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorbt_n_u64))) svuint64_t sveorbt(svuint64_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorbt_n_u16))) svuint16_t sveorbt(svuint16_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorbt_n_s8))) svint8_t sveorbt(svint8_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorbt_n_s32))) svint32_t sveorbt(svint32_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorbt_n_s64))) svint64_t sveorbt(svint64_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorbt_n_s16))) svint16_t sveorbt(svint16_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorbt_u8))) svuint8_t sveorbt(svuint8_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorbt_u32))) svuint32_t sveorbt(svuint32_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorbt_u64))) svuint64_t sveorbt(svuint64_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorbt_u16))) svuint16_t sveorbt(svuint16_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorbt_s8))) svint8_t sveorbt(svint8_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorbt_s32))) svint32_t sveorbt(svint32_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorbt_s64))) svint64_t sveorbt(svint64_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorbt_s16))) svint16_t sveorbt(svint16_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveortb_n_u8))) svuint8_t sveortb(svuint8_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveortb_n_u32))) svuint32_t sveortb(svuint32_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveortb_n_u64))) svuint64_t sveortb(svuint64_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveortb_n_u16))) svuint16_t sveortb(svuint16_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveortb_n_s8))) svint8_t sveortb(svint8_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveortb_n_s32))) svint32_t sveortb(svint32_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveortb_n_s64))) svint64_t sveortb(svint64_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveortb_n_s16))) svint16_t sveortb(svint16_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveortb_u8))) svuint8_t sveortb(svuint8_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveortb_u32))) svuint32_t sveortb(svuint32_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveortb_u64))) svuint64_t sveortb(svuint64_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveortb_u16))) svuint16_t sveortb(svuint16_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveortb_s8))) svint8_t sveortb(svint8_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveortb_s32))) svint32_t sveortb(svint32_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveortb_s64))) svint64_t sveortb(svint64_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveortb_s16))) svint16_t sveortb(svint16_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_s8_m))) svint8_t svhadd_m(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_s32_m))) svint32_t svhadd_m(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_s64_m))) svint64_t svhadd_m(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_s16_m))) svint16_t svhadd_m(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_s8_x))) svint8_t svhadd_x(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_s32_x))) svint32_t svhadd_x(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_s64_x))) svint64_t svhadd_x(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_s16_x))) svint16_t svhadd_x(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_s8_z))) svint8_t svhadd_z(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_s32_z))) svint32_t svhadd_z(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_s64_z))) svint64_t svhadd_z(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_s16_z))) svint16_t svhadd_z(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_u8_m))) svuint8_t svhadd_m(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_u32_m))) svuint32_t svhadd_m(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_u64_m))) svuint64_t svhadd_m(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_u16_m))) svuint16_t svhadd_m(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_u8_x))) svuint8_t svhadd_x(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_u32_x))) svuint32_t svhadd_x(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_u64_x))) svuint64_t svhadd_x(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_u16_x))) svuint16_t svhadd_x(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_u8_z))) svuint8_t svhadd_z(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_u32_z))) svuint32_t svhadd_z(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_u64_z))) svuint64_t svhadd_z(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_u16_z))) svuint16_t svhadd_z(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_s8_m))) svint8_t svhadd_m(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_s32_m))) svint32_t svhadd_m(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_s64_m))) svint64_t svhadd_m(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_s16_m))) svint16_t svhadd_m(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_s8_x))) svint8_t svhadd_x(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_s32_x))) svint32_t svhadd_x(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_s64_x))) svint64_t svhadd_x(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_s16_x))) svint16_t svhadd_x(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_s8_z))) svint8_t svhadd_z(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_s32_z))) svint32_t svhadd_z(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_s64_z))) svint64_t svhadd_z(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_s16_z))) svint16_t svhadd_z(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_u8_m))) svuint8_t svhadd_m(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_u32_m))) svuint32_t svhadd_m(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_u64_m))) svuint64_t svhadd_m(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_u16_m))) svuint16_t svhadd_m(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_u8_x))) svuint8_t svhadd_x(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_u32_x))) svuint32_t svhadd_x(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_u64_x))) svuint64_t svhadd_x(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_u16_x))) svuint16_t svhadd_x(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_u8_z))) svuint8_t svhadd_z(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_u32_z))) svuint32_t svhadd_z(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_u64_z))) svuint64_t svhadd_z(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_u16_z))) svuint16_t svhadd_z(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhistcnt_u32_z))) svuint32_t svhistcnt_z(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhistcnt_u64_z))) svuint64_t svhistcnt_z(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhistcnt_s32_z))) svuint32_t svhistcnt_z(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhistcnt_s64_z))) svuint64_t svhistcnt_z(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhistseg_u8))) svuint8_t svhistseg(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhistseg_s8))) svuint8_t svhistseg(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_s8_m))) svint8_t svhsub_m(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_s32_m))) svint32_t svhsub_m(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_s64_m))) svint64_t svhsub_m(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_s16_m))) svint16_t svhsub_m(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_s8_x))) svint8_t svhsub_x(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_s32_x))) svint32_t svhsub_x(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_s64_x))) svint64_t svhsub_x(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_s16_x))) svint16_t svhsub_x(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_s8_z))) svint8_t svhsub_z(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_s32_z))) svint32_t svhsub_z(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_s64_z))) svint64_t svhsub_z(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_s16_z))) svint16_t svhsub_z(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_u8_m))) svuint8_t svhsub_m(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_u32_m))) svuint32_t svhsub_m(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_u64_m))) svuint64_t svhsub_m(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_u16_m))) svuint16_t svhsub_m(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_u8_x))) svuint8_t svhsub_x(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_u32_x))) svuint32_t svhsub_x(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_u64_x))) svuint64_t svhsub_x(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_u16_x))) svuint16_t svhsub_x(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_u8_z))) svuint8_t svhsub_z(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_u32_z))) svuint32_t svhsub_z(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_u64_z))) svuint64_t svhsub_z(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_u16_z))) svuint16_t svhsub_z(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_s8_m))) svint8_t svhsub_m(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_s32_m))) svint32_t svhsub_m(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_s64_m))) svint64_t svhsub_m(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_s16_m))) svint16_t svhsub_m(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_s8_x))) svint8_t svhsub_x(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_s32_x))) svint32_t svhsub_x(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_s64_x))) svint64_t svhsub_x(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_s16_x))) svint16_t svhsub_x(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_s8_z))) svint8_t svhsub_z(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_s32_z))) svint32_t svhsub_z(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_s64_z))) svint64_t svhsub_z(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_s16_z))) svint16_t svhsub_z(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_u8_m))) svuint8_t svhsub_m(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_u32_m))) svuint32_t svhsub_m(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_u64_m))) svuint64_t svhsub_m(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_u16_m))) svuint16_t svhsub_m(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_u8_x))) svuint8_t svhsub_x(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_u32_x))) svuint32_t svhsub_x(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_u64_x))) svuint64_t svhsub_x(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_u16_x))) svuint16_t svhsub_x(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_u8_z))) svuint8_t svhsub_z(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_u32_z))) svuint32_t svhsub_z(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_u64_z))) svuint64_t svhsub_z(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_u16_z))) svuint16_t svhsub_z(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_s8_m))) svint8_t svhsubr_m(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_s32_m))) svint32_t svhsubr_m(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_s64_m))) svint64_t svhsubr_m(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_s16_m))) svint16_t svhsubr_m(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_s8_x))) svint8_t svhsubr_x(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_s32_x))) svint32_t svhsubr_x(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_s64_x))) svint64_t svhsubr_x(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_s16_x))) svint16_t svhsubr_x(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_s8_z))) svint8_t svhsubr_z(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_s32_z))) svint32_t svhsubr_z(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_s64_z))) svint64_t svhsubr_z(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_s16_z))) svint16_t svhsubr_z(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_u8_m))) svuint8_t svhsubr_m(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_u32_m))) svuint32_t svhsubr_m(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_u64_m))) svuint64_t svhsubr_m(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_u16_m))) svuint16_t svhsubr_m(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_u8_x))) svuint8_t svhsubr_x(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_u32_x))) svuint32_t svhsubr_x(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_u64_x))) svuint64_t svhsubr_x(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_u16_x))) svuint16_t svhsubr_x(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_u8_z))) svuint8_t svhsubr_z(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_u32_z))) svuint32_t svhsubr_z(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_u64_z))) svuint64_t svhsubr_z(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_u16_z))) svuint16_t svhsubr_z(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_s8_m))) svint8_t svhsubr_m(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_s32_m))) svint32_t svhsubr_m(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_s64_m))) svint64_t svhsubr_m(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_s16_m))) svint16_t svhsubr_m(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_s8_x))) svint8_t svhsubr_x(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_s32_x))) svint32_t svhsubr_x(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_s64_x))) svint64_t svhsubr_x(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_s16_x))) svint16_t svhsubr_x(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_s8_z))) svint8_t svhsubr_z(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_s32_z))) svint32_t svhsubr_z(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_s64_z))) svint64_t svhsubr_z(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_s16_z))) svint16_t svhsubr_z(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_u8_m))) svuint8_t svhsubr_m(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_u32_m))) svuint32_t svhsubr_m(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_u64_m))) svuint64_t svhsubr_m(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_u16_m))) svuint16_t svhsubr_m(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_u8_x))) svuint8_t svhsubr_x(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_u32_x))) svuint32_t svhsubr_x(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_u64_x))) svuint64_t svhsubr_x(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_u16_x))) svuint16_t svhsubr_x(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_u8_z))) svuint8_t svhsubr_z(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_u32_z))) svuint32_t svhsubr_z(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_u64_z))) svuint64_t svhsubr_z(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_u16_z))) svuint16_t svhsubr_z(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_index_u32))) svuint32_t svldnt1_gather_index_u32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_index_u64))) svuint64_t svldnt1_gather_index_u64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_index_f64))) svfloat64_t svldnt1_gather_index_f64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_index_f32))) svfloat32_t svldnt1_gather_index_f32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_index_s32))) svint32_t svldnt1_gather_index_s32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_index_s64))) svint64_t svldnt1_gather_index_s64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_offset_u32))) svuint32_t svldnt1_gather_offset_u32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_offset_u64))) svuint64_t svldnt1_gather_offset_u64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_offset_f64))) svfloat64_t svldnt1_gather_offset_f64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_offset_f32))) svfloat32_t svldnt1_gather_offset_f32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_offset_s32))) svint32_t svldnt1_gather_offset_s32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_offset_s64))) svint64_t svldnt1_gather_offset_s64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_u32))) svuint32_t svldnt1_gather_u32(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_u64))) svuint64_t svldnt1_gather_u64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_f64))) svfloat64_t svldnt1_gather_f64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_f32))) svfloat32_t svldnt1_gather_f32(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_s32))) svint32_t svldnt1_gather_s32(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_s64))) svint64_t svldnt1_gather_s64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_s64index_u64))) svuint64_t svldnt1_gather_index(svbool_t, uint64_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_s64index_f64))) svfloat64_t svldnt1_gather_index(svbool_t, float64_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_s64index_s64))) svint64_t svldnt1_gather_index(svbool_t, int64_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64index_u64))) svuint64_t svldnt1_gather_index(svbool_t, uint64_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64index_f64))) svfloat64_t svldnt1_gather_index(svbool_t, float64_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64index_s64))) svint64_t svldnt1_gather_index(svbool_t, int64_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32offset_u32))) svuint32_t svldnt1_gather_offset(svbool_t, uint32_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32offset_f32))) svfloat32_t svldnt1_gather_offset(svbool_t, float32_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32offset_s32))) svint32_t svldnt1_gather_offset(svbool_t, int32_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_s64offset_u64))) svuint64_t svldnt1_gather_offset(svbool_t, uint64_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_s64offset_f64))) svfloat64_t svldnt1_gather_offset(svbool_t, float64_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_s64offset_s64))) svint64_t svldnt1_gather_offset(svbool_t, int64_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64offset_u64))) svuint64_t svldnt1_gather_offset(svbool_t, uint64_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64offset_f64))) svfloat64_t svldnt1_gather_offset(svbool_t, float64_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64offset_s64))) svint64_t svldnt1_gather_offset(svbool_t, int64_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u32base_offset_u32))) svuint32_t svldnt1sb_gather_offset_u32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u64base_offset_u64))) svuint64_t svldnt1sb_gather_offset_u64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u32base_offset_s32))) svint32_t svldnt1sb_gather_offset_s32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u64base_offset_s64))) svint64_t svldnt1sb_gather_offset_s64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u32base_u32))) svuint32_t svldnt1sb_gather_u32(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u64base_u64))) svuint64_t svldnt1sb_gather_u64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u32base_s32))) svint32_t svldnt1sb_gather_s32(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u64base_s64))) svint64_t svldnt1sb_gather_s64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u32offset_u32))) svuint32_t svldnt1sb_gather_offset_u32(svbool_t, int8_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u32offset_s32))) svint32_t svldnt1sb_gather_offset_s32(svbool_t, int8_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_s64offset_u64))) svuint64_t svldnt1sb_gather_offset_u64(svbool_t, int8_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_s64offset_s64))) svint64_t svldnt1sb_gather_offset_s64(svbool_t, int8_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u64offset_u64))) svuint64_t svldnt1sb_gather_offset_u64(svbool_t, int8_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u64offset_s64))) svint64_t svldnt1sb_gather_offset_s64(svbool_t, int8_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32base_index_u32))) svuint32_t svldnt1sh_gather_index_u32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64base_index_u64))) svuint64_t svldnt1sh_gather_index_u64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32base_index_s32))) svint32_t svldnt1sh_gather_index_s32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64base_index_s64))) svint64_t svldnt1sh_gather_index_s64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32base_offset_u32))) svuint32_t svldnt1sh_gather_offset_u32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64base_offset_u64))) svuint64_t svldnt1sh_gather_offset_u64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32base_offset_s32))) svint32_t svldnt1sh_gather_offset_s32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64base_offset_s64))) svint64_t svldnt1sh_gather_offset_s64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32base_u32))) svuint32_t svldnt1sh_gather_u32(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64base_u64))) svuint64_t svldnt1sh_gather_u64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32base_s32))) svint32_t svldnt1sh_gather_s32(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64base_s64))) svint64_t svldnt1sh_gather_s64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_s64index_u64))) svuint64_t svldnt1sh_gather_index_u64(svbool_t, int16_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_s64index_s64))) svint64_t svldnt1sh_gather_index_s64(svbool_t, int16_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64index_u64))) svuint64_t svldnt1sh_gather_index_u64(svbool_t, int16_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64index_s64))) svint64_t svldnt1sh_gather_index_s64(svbool_t, int16_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32offset_u32))) svuint32_t svldnt1sh_gather_offset_u32(svbool_t, int16_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32offset_s32))) svint32_t svldnt1sh_gather_offset_s32(svbool_t, int16_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_s64offset_u64))) svuint64_t svldnt1sh_gather_offset_u64(svbool_t, int16_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_s64offset_s64))) svint64_t svldnt1sh_gather_offset_s64(svbool_t, int16_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64offset_u64))) svuint64_t svldnt1sh_gather_offset_u64(svbool_t, int16_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64offset_s64))) svint64_t svldnt1sh_gather_offset_s64(svbool_t, int16_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64base_index_u64))) svuint64_t svldnt1sw_gather_index_u64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64base_index_s64))) svint64_t svldnt1sw_gather_index_s64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64base_offset_u64))) svuint64_t svldnt1sw_gather_offset_u64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64base_offset_s64))) svint64_t svldnt1sw_gather_offset_s64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64base_u64))) svuint64_t svldnt1sw_gather_u64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64base_s64))) svint64_t svldnt1sw_gather_s64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_s64index_u64))) svuint64_t svldnt1sw_gather_index_u64(svbool_t, int32_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_s64index_s64))) svint64_t svldnt1sw_gather_index_s64(svbool_t, int32_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64index_u64))) svuint64_t svldnt1sw_gather_index_u64(svbool_t, int32_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64index_s64))) svint64_t svldnt1sw_gather_index_s64(svbool_t, int32_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_s64offset_u64))) svuint64_t svldnt1sw_gather_offset_u64(svbool_t, int32_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_s64offset_s64))) svint64_t svldnt1sw_gather_offset_s64(svbool_t, int32_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64offset_u64))) svuint64_t svldnt1sw_gather_offset_u64(svbool_t, int32_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64offset_s64))) svint64_t svldnt1sw_gather_offset_s64(svbool_t, int32_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u32base_offset_u32))) svuint32_t svldnt1ub_gather_offset_u32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u64base_offset_u64))) svuint64_t svldnt1ub_gather_offset_u64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u32base_offset_s32))) svint32_t svldnt1ub_gather_offset_s32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u64base_offset_s64))) svint64_t svldnt1ub_gather_offset_s64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u32base_u32))) svuint32_t svldnt1ub_gather_u32(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u64base_u64))) svuint64_t svldnt1ub_gather_u64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u32base_s32))) svint32_t svldnt1ub_gather_s32(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u64base_s64))) svint64_t svldnt1ub_gather_s64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u32offset_u32))) svuint32_t svldnt1ub_gather_offset_u32(svbool_t, uint8_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u32offset_s32))) svint32_t svldnt1ub_gather_offset_s32(svbool_t, uint8_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_s64offset_u64))) svuint64_t svldnt1ub_gather_offset_u64(svbool_t, uint8_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_s64offset_s64))) svint64_t svldnt1ub_gather_offset_s64(svbool_t, uint8_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u64offset_u64))) svuint64_t svldnt1ub_gather_offset_u64(svbool_t, uint8_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u64offset_s64))) svint64_t svldnt1ub_gather_offset_s64(svbool_t, uint8_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32base_index_u32))) svuint32_t svldnt1uh_gather_index_u32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64base_index_u64))) svuint64_t svldnt1uh_gather_index_u64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32base_index_s32))) svint32_t svldnt1uh_gather_index_s32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64base_index_s64))) svint64_t svldnt1uh_gather_index_s64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32base_offset_u32))) svuint32_t svldnt1uh_gather_offset_u32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64base_offset_u64))) svuint64_t svldnt1uh_gather_offset_u64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32base_offset_s32))) svint32_t svldnt1uh_gather_offset_s32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64base_offset_s64))) svint64_t svldnt1uh_gather_offset_s64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32base_u32))) svuint32_t svldnt1uh_gather_u32(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64base_u64))) svuint64_t svldnt1uh_gather_u64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32base_s32))) svint32_t svldnt1uh_gather_s32(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64base_s64))) svint64_t svldnt1uh_gather_s64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_s64index_u64))) svuint64_t svldnt1uh_gather_index_u64(svbool_t, uint16_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_s64index_s64))) svint64_t svldnt1uh_gather_index_s64(svbool_t, uint16_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64index_u64))) svuint64_t svldnt1uh_gather_index_u64(svbool_t, uint16_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64index_s64))) svint64_t svldnt1uh_gather_index_s64(svbool_t, uint16_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32offset_u32))) svuint32_t svldnt1uh_gather_offset_u32(svbool_t, uint16_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32offset_s32))) svint32_t svldnt1uh_gather_offset_s32(svbool_t, uint16_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_s64offset_u64))) svuint64_t svldnt1uh_gather_offset_u64(svbool_t, uint16_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_s64offset_s64))) svint64_t svldnt1uh_gather_offset_s64(svbool_t, uint16_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64offset_u64))) svuint64_t svldnt1uh_gather_offset_u64(svbool_t, uint16_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64offset_s64))) svint64_t svldnt1uh_gather_offset_s64(svbool_t, uint16_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64base_index_u64))) svuint64_t svldnt1uw_gather_index_u64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64base_index_s64))) svint64_t svldnt1uw_gather_index_s64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64base_offset_u64))) svuint64_t svldnt1uw_gather_offset_u64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64base_offset_s64))) svint64_t svldnt1uw_gather_offset_s64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64base_u64))) svuint64_t svldnt1uw_gather_u64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64base_s64))) svint64_t svldnt1uw_gather_s64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_s64index_u64))) svuint64_t svldnt1uw_gather_index_u64(svbool_t, uint32_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_s64index_s64))) svint64_t svldnt1uw_gather_index_s64(svbool_t, uint32_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64index_u64))) svuint64_t svldnt1uw_gather_index_u64(svbool_t, uint32_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64index_s64))) svint64_t svldnt1uw_gather_index_s64(svbool_t, uint32_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_s64offset_u64))) svuint64_t svldnt1uw_gather_offset_u64(svbool_t, uint32_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_s64offset_s64))) svint64_t svldnt1uw_gather_offset_s64(svbool_t, uint32_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64offset_u64))) svuint64_t svldnt1uw_gather_offset_u64(svbool_t, uint32_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64offset_s64))) svint64_t svldnt1uw_gather_offset_s64(svbool_t, uint32_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlogb_f64_m))) svint64_t svlogb_m(svint64_t, svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlogb_f32_m))) svint32_t svlogb_m(svint32_t, svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlogb_f16_m))) svint16_t svlogb_m(svint16_t, svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlogb_f64_x))) svint64_t svlogb_x(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlogb_f32_x))) svint32_t svlogb_x(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlogb_f16_x))) svint16_t svlogb_x(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlogb_f64_z))) svint64_t svlogb_z(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlogb_f32_z))) svint32_t svlogb_z(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlogb_f16_z))) svint16_t svlogb_z(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmatch_u8))) svbool_t svmatch(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmatch_u16))) svbool_t svmatch(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmatch_s8))) svbool_t svmatch(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmatch_s16))) svbool_t svmatch(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnmp_f64_m))) svfloat64_t svmaxnmp_m(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnmp_f32_m))) svfloat32_t svmaxnmp_m(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnmp_f16_m))) svfloat16_t svmaxnmp_m(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnmp_f64_x))) svfloat64_t svmaxnmp_x(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnmp_f32_x))) svfloat32_t svmaxnmp_x(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnmp_f16_x))) svfloat16_t svmaxnmp_x(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_f64_m))) svfloat64_t svmaxp_m(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_f32_m))) svfloat32_t svmaxp_m(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_f16_m))) svfloat16_t svmaxp_m(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_f64_x))) svfloat64_t svmaxp_x(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_f32_x))) svfloat32_t svmaxp_x(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_f16_x))) svfloat16_t svmaxp_x(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_s8_m))) svint8_t svmaxp_m(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_s32_m))) svint32_t svmaxp_m(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_s64_m))) svint64_t svmaxp_m(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_s16_m))) svint16_t svmaxp_m(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_s8_x))) svint8_t svmaxp_x(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_s32_x))) svint32_t svmaxp_x(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_s64_x))) svint64_t svmaxp_x(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_s16_x))) svint16_t svmaxp_x(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_u8_m))) svuint8_t svmaxp_m(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_u32_m))) svuint32_t svmaxp_m(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_u64_m))) svuint64_t svmaxp_m(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_u16_m))) svuint16_t svmaxp_m(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_u8_x))) svuint8_t svmaxp_x(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_u32_x))) svuint32_t svmaxp_x(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_u64_x))) svuint64_t svmaxp_x(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_u16_x))) svuint16_t svmaxp_x(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnmp_f64_m))) svfloat64_t svminnmp_m(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnmp_f32_m))) svfloat32_t svminnmp_m(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnmp_f16_m))) svfloat16_t svminnmp_m(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnmp_f64_x))) svfloat64_t svminnmp_x(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnmp_f32_x))) svfloat32_t svminnmp_x(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnmp_f16_x))) svfloat16_t svminnmp_x(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_f64_m))) svfloat64_t svminp_m(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_f32_m))) svfloat32_t svminp_m(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_f16_m))) svfloat16_t svminp_m(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_f64_x))) svfloat64_t svminp_x(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_f32_x))) svfloat32_t svminp_x(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_f16_x))) svfloat16_t svminp_x(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_s8_m))) svint8_t svminp_m(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_s32_m))) svint32_t svminp_m(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_s64_m))) svint64_t svminp_m(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_s16_m))) svint16_t svminp_m(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_s8_x))) svint8_t svminp_x(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_s32_x))) svint32_t svminp_x(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_s64_x))) svint64_t svminp_x(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_s16_x))) svint16_t svminp_x(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_u8_m))) svuint8_t svminp_m(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_u32_m))) svuint32_t svminp_m(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_u64_m))) svuint64_t svminp_m(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_u16_m))) svuint16_t svminp_m(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_u8_x))) svuint8_t svminp_x(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_u32_x))) svuint32_t svminp_x(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_u64_x))) svuint64_t svminp_x(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_u16_x))) svuint16_t svminp_x(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_lane_u32))) svuint32_t svmla_lane(svuint32_t, svuint32_t, svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_lane_u64))) svuint64_t svmla_lane(svuint64_t, svuint64_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_lane_u16))) svuint16_t svmla_lane(svuint16_t, svuint16_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_lane_s32))) svint32_t svmla_lane(svint32_t, svint32_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_lane_s64))) svint64_t svmla_lane(svint64_t, svint64_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_lane_s16))) svint16_t svmla_lane(svint16_t, svint16_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_n_f32))) svfloat32_t svmlalb(svfloat32_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_n_s32))) svint32_t svmlalb(svint32_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_n_s64))) svint64_t svmlalb(svint64_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_n_s16))) svint16_t svmlalb(svint16_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_n_u32))) svuint32_t svmlalb(svuint32_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_n_u64))) svuint64_t svmlalb(svuint64_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_n_u16))) svuint16_t svmlalb(svuint16_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_f32))) svfloat32_t svmlalb(svfloat32_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_s32))) svint32_t svmlalb(svint32_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_s64))) svint64_t svmlalb(svint64_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_s16))) svint16_t svmlalb(svint16_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_u32))) svuint32_t svmlalb(svuint32_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_u64))) svuint64_t svmlalb(svuint64_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_u16))) svuint16_t svmlalb(svuint16_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_lane_f32))) svfloat32_t svmlalb_lane(svfloat32_t, svfloat16_t, svfloat16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_lane_s32))) svint32_t svmlalb_lane(svint32_t, svint16_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_lane_s64))) svint64_t svmlalb_lane(svint64_t, svint32_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_lane_u32))) svuint32_t svmlalb_lane(svuint32_t, svuint16_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_lane_u64))) svuint64_t svmlalb_lane(svuint64_t, svuint32_t, svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_n_f32))) svfloat32_t svmlalt(svfloat32_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_n_s32))) svint32_t svmlalt(svint32_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_n_s64))) svint64_t svmlalt(svint64_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_n_s16))) svint16_t svmlalt(svint16_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_n_u32))) svuint32_t svmlalt(svuint32_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_n_u64))) svuint64_t svmlalt(svuint64_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_n_u16))) svuint16_t svmlalt(svuint16_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_f32))) svfloat32_t svmlalt(svfloat32_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_s32))) svint32_t svmlalt(svint32_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_s64))) svint64_t svmlalt(svint64_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_s16))) svint16_t svmlalt(svint16_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_u32))) svuint32_t svmlalt(svuint32_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_u64))) svuint64_t svmlalt(svuint64_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_u16))) svuint16_t svmlalt(svuint16_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_lane_f32))) svfloat32_t svmlalt_lane(svfloat32_t, svfloat16_t, svfloat16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_lane_s32))) svint32_t svmlalt_lane(svint32_t, svint16_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_lane_s64))) svint64_t svmlalt_lane(svint64_t, svint32_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_lane_u32))) svuint32_t svmlalt_lane(svuint32_t, svuint16_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_lane_u64))) svuint64_t svmlalt_lane(svuint64_t, svuint32_t, svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_lane_u32))) svuint32_t svmls_lane(svuint32_t, svuint32_t, svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_lane_u64))) svuint64_t svmls_lane(svuint64_t, svuint64_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_lane_u16))) svuint16_t svmls_lane(svuint16_t, svuint16_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_lane_s32))) svint32_t svmls_lane(svint32_t, svint32_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_lane_s64))) svint64_t svmls_lane(svint64_t, svint64_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_lane_s16))) svint16_t svmls_lane(svint16_t, svint16_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_n_f32))) svfloat32_t svmlslb(svfloat32_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_n_s32))) svint32_t svmlslb(svint32_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_n_s64))) svint64_t svmlslb(svint64_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_n_s16))) svint16_t svmlslb(svint16_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_n_u32))) svuint32_t svmlslb(svuint32_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_n_u64))) svuint64_t svmlslb(svuint64_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_n_u16))) svuint16_t svmlslb(svuint16_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_f32))) svfloat32_t svmlslb(svfloat32_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_s32))) svint32_t svmlslb(svint32_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_s64))) svint64_t svmlslb(svint64_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_s16))) svint16_t svmlslb(svint16_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_u32))) svuint32_t svmlslb(svuint32_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_u64))) svuint64_t svmlslb(svuint64_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_u16))) svuint16_t svmlslb(svuint16_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_lane_f32))) svfloat32_t svmlslb_lane(svfloat32_t, svfloat16_t, svfloat16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_lane_s32))) svint32_t svmlslb_lane(svint32_t, svint16_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_lane_s64))) svint64_t svmlslb_lane(svint64_t, svint32_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_lane_u32))) svuint32_t svmlslb_lane(svuint32_t, svuint16_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_lane_u64))) svuint64_t svmlslb_lane(svuint64_t, svuint32_t, svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_n_f32))) svfloat32_t svmlslt(svfloat32_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_n_s32))) svint32_t svmlslt(svint32_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_n_s64))) svint64_t svmlslt(svint64_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_n_s16))) svint16_t svmlslt(svint16_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_n_u32))) svuint32_t svmlslt(svuint32_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_n_u64))) svuint64_t svmlslt(svuint64_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_n_u16))) svuint16_t svmlslt(svuint16_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_f32))) svfloat32_t svmlslt(svfloat32_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_s32))) svint32_t svmlslt(svint32_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_s64))) svint64_t svmlslt(svint64_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_s16))) svint16_t svmlslt(svint16_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_u32))) svuint32_t svmlslt(svuint32_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_u64))) svuint64_t svmlslt(svuint64_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_u16))) svuint16_t svmlslt(svuint16_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_lane_f32))) svfloat32_t svmlslt_lane(svfloat32_t, svfloat16_t, svfloat16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_lane_s32))) svint32_t svmlslt_lane(svint32_t, svint16_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_lane_s64))) svint64_t svmlslt_lane(svint64_t, svint32_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_lane_u32))) svuint32_t svmlslt_lane(svuint32_t, svuint16_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_lane_u64))) svuint64_t svmlslt_lane(svuint64_t, svuint32_t, svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmovlb_s32))) svint32_t svmovlb(svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmovlb_s64))) svint64_t svmovlb(svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmovlb_s16))) svint16_t svmovlb(svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmovlb_u32))) svuint32_t svmovlb(svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmovlb_u64))) svuint64_t svmovlb(svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmovlb_u16))) svuint16_t svmovlb(svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmovlt_s32))) svint32_t svmovlt(svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmovlt_s64))) svint64_t svmovlt(svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmovlt_s16))) svint16_t svmovlt(svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmovlt_u32))) svuint32_t svmovlt(svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmovlt_u64))) svuint64_t svmovlt(svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmovlt_u16))) svuint16_t svmovlt(svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_lane_u32))) svuint32_t svmul_lane(svuint32_t, svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_lane_u64))) svuint64_t svmul_lane(svuint64_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_lane_u16))) svuint16_t svmul_lane(svuint16_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_lane_s32))) svint32_t svmul_lane(svint32_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_lane_s64))) svint64_t svmul_lane(svint64_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_lane_s16))) svint16_t svmul_lane(svint16_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullb_n_s32))) svint32_t svmullb(svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullb_n_s64))) svint64_t svmullb(svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullb_n_s16))) svint16_t svmullb(svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullb_n_u32))) svuint32_t svmullb(svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullb_n_u64))) svuint64_t svmullb(svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullb_n_u16))) svuint16_t svmullb(svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullb_s32))) svint32_t svmullb(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullb_s64))) svint64_t svmullb(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullb_s16))) svint16_t svmullb(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullb_u32))) svuint32_t svmullb(svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullb_u64))) svuint64_t svmullb(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullb_u16))) svuint16_t svmullb(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullb_lane_s32))) svint32_t svmullb_lane(svint16_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullb_lane_s64))) svint64_t svmullb_lane(svint32_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullb_lane_u32))) svuint32_t svmullb_lane(svuint16_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullb_lane_u64))) svuint64_t svmullb_lane(svuint32_t, svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullt_n_s32))) svint32_t svmullt(svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullt_n_s64))) svint64_t svmullt(svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullt_n_s16))) svint16_t svmullt(svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullt_n_u32))) svuint32_t svmullt(svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullt_n_u64))) svuint64_t svmullt(svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullt_n_u16))) svuint16_t svmullt(svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullt_s32))) svint32_t svmullt(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullt_s64))) svint64_t svmullt(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullt_s16))) svint16_t svmullt(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullt_u32))) svuint32_t svmullt(svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullt_u64))) svuint64_t svmullt(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullt_u16))) svuint16_t svmullt(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullt_lane_s32))) svint32_t svmullt_lane(svint16_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullt_lane_s64))) svint64_t svmullt_lane(svint32_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullt_lane_u32))) svuint32_t svmullt_lane(svuint16_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullt_lane_u64))) svuint64_t svmullt_lane(svuint32_t, svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnbsl_n_u8))) svuint8_t svnbsl(svuint8_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnbsl_n_u32))) svuint32_t svnbsl(svuint32_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnbsl_n_u64))) svuint64_t svnbsl(svuint64_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnbsl_n_u16))) svuint16_t svnbsl(svuint16_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnbsl_n_s8))) svint8_t svnbsl(svint8_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnbsl_n_s32))) svint32_t svnbsl(svint32_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnbsl_n_s64))) svint64_t svnbsl(svint64_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnbsl_n_s16))) svint16_t svnbsl(svint16_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnbsl_u8))) svuint8_t svnbsl(svuint8_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnbsl_u32))) svuint32_t svnbsl(svuint32_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnbsl_u64))) svuint64_t svnbsl(svuint64_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnbsl_u16))) svuint16_t svnbsl(svuint16_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnbsl_s8))) svint8_t svnbsl(svint8_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnbsl_s32))) svint32_t svnbsl(svint32_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnbsl_s64))) svint64_t svnbsl(svint64_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnbsl_s16))) svint16_t svnbsl(svint16_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmatch_u8))) svbool_t svnmatch(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmatch_u16))) svbool_t svnmatch(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmatch_s8))) svbool_t svnmatch(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmatch_s16))) svbool_t svnmatch(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmul_n_u8))) svuint8_t svpmul(svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmul_u8))) svuint8_t svpmul(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullb_n_u64))) svuint64_t svpmullb(svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullb_n_u16))) svuint16_t svpmullb(svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullb_u64))) svuint64_t svpmullb(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullb_u16))) svuint16_t svpmullb(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullb_pair_n_u8))) svuint8_t svpmullb_pair(svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullb_pair_n_u32))) svuint32_t svpmullb_pair(svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullb_pair_u8))) svuint8_t svpmullb_pair(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullb_pair_u32))) svuint32_t svpmullb_pair(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullt_n_u64))) svuint64_t svpmullt(svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullt_n_u16))) svuint16_t svpmullt(svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullt_u64))) svuint64_t svpmullt(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullt_u16))) svuint16_t svpmullt(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullt_pair_n_u8))) svuint8_t svpmullt_pair(svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullt_pair_n_u32))) svuint32_t svpmullt_pair(svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullt_pair_u8))) svuint8_t svpmullt_pair(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullt_pair_u32))) svuint32_t svpmullt_pair(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqabs_s8_m))) svint8_t svqabs_m(svint8_t, svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqabs_s32_m))) svint32_t svqabs_m(svint32_t, svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqabs_s64_m))) svint64_t svqabs_m(svint64_t, svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqabs_s16_m))) svint16_t svqabs_m(svint16_t, svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqabs_s8_x))) svint8_t svqabs_x(svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqabs_s32_x))) svint32_t svqabs_x(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqabs_s64_x))) svint64_t svqabs_x(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqabs_s16_x))) svint16_t svqabs_x(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqabs_s8_z))) svint8_t svqabs_z(svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqabs_s32_z))) svint32_t svqabs_z(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqabs_s64_z))) svint64_t svqabs_z(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqabs_s16_z))) svint16_t svqabs_z(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_s8_m))) svint8_t svqadd_m(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_s32_m))) svint32_t svqadd_m(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_s64_m))) svint64_t svqadd_m(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_s16_m))) svint16_t svqadd_m(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_s8_x))) svint8_t svqadd_x(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_s32_x))) svint32_t svqadd_x(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_s64_x))) svint64_t svqadd_x(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_s16_x))) svint16_t svqadd_x(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_s8_z))) svint8_t svqadd_z(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_s32_z))) svint32_t svqadd_z(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_s64_z))) svint64_t svqadd_z(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_s16_z))) svint16_t svqadd_z(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_u8_m))) svuint8_t svqadd_m(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_u32_m))) svuint32_t svqadd_m(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_u64_m))) svuint64_t svqadd_m(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_u16_m))) svuint16_t svqadd_m(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_u8_x))) svuint8_t svqadd_x(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_u32_x))) svuint32_t svqadd_x(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_u64_x))) svuint64_t svqadd_x(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_u16_x))) svuint16_t svqadd_x(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_u8_z))) svuint8_t svqadd_z(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_u32_z))) svuint32_t svqadd_z(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_u64_z))) svuint64_t svqadd_z(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_u16_z))) svuint16_t svqadd_z(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_s8_m))) svint8_t svqadd_m(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_s32_m))) svint32_t svqadd_m(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_s64_m))) svint64_t svqadd_m(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_s16_m))) svint16_t svqadd_m(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_s8_x))) svint8_t svqadd_x(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_s32_x))) svint32_t svqadd_x(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_s64_x))) svint64_t svqadd_x(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_s16_x))) svint16_t svqadd_x(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_s8_z))) svint8_t svqadd_z(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_s32_z))) svint32_t svqadd_z(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_s64_z))) svint64_t svqadd_z(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_s16_z))) svint16_t svqadd_z(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_u8_m))) svuint8_t svqadd_m(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_u32_m))) svuint32_t svqadd_m(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_u64_m))) svuint64_t svqadd_m(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_u16_m))) svuint16_t svqadd_m(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_u8_x))) svuint8_t svqadd_x(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_u32_x))) svuint32_t svqadd_x(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_u64_x))) svuint64_t svqadd_x(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_u16_x))) svuint16_t svqadd_x(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_u8_z))) svuint8_t svqadd_z(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_u32_z))) svuint32_t svqadd_z(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_u64_z))) svuint64_t svqadd_z(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_u16_z))) svuint16_t svqadd_z(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcadd_s8))) svint8_t svqcadd(svint8_t, svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcadd_s32))) svint32_t svqcadd(svint32_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcadd_s64))) svint64_t svqcadd(svint64_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcadd_s16))) svint16_t svqcadd(svint16_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalb_n_s32))) svint32_t svqdmlalb(svint32_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalb_n_s64))) svint64_t svqdmlalb(svint64_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalb_n_s16))) svint16_t svqdmlalb(svint16_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalb_s32))) svint32_t svqdmlalb(svint32_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalb_s64))) svint64_t svqdmlalb(svint64_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalb_s16))) svint16_t svqdmlalb(svint16_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalb_lane_s32))) svint32_t svqdmlalb_lane(svint32_t, svint16_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalb_lane_s64))) svint64_t svqdmlalb_lane(svint64_t, svint32_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalbt_n_s32))) svint32_t svqdmlalbt(svint32_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalbt_n_s64))) svint64_t svqdmlalbt(svint64_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalbt_n_s16))) svint16_t svqdmlalbt(svint16_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalbt_s32))) svint32_t svqdmlalbt(svint32_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalbt_s64))) svint64_t svqdmlalbt(svint64_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalbt_s16))) svint16_t svqdmlalbt(svint16_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalt_n_s32))) svint32_t svqdmlalt(svint32_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalt_n_s64))) svint64_t svqdmlalt(svint64_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalt_n_s16))) svint16_t svqdmlalt(svint16_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalt_s32))) svint32_t svqdmlalt(svint32_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalt_s64))) svint64_t svqdmlalt(svint64_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalt_s16))) svint16_t svqdmlalt(svint16_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalt_lane_s32))) svint32_t svqdmlalt_lane(svint32_t, svint16_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalt_lane_s64))) svint64_t svqdmlalt_lane(svint64_t, svint32_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslb_n_s32))) svint32_t svqdmlslb(svint32_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslb_n_s64))) svint64_t svqdmlslb(svint64_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslb_n_s16))) svint16_t svqdmlslb(svint16_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslb_s32))) svint32_t svqdmlslb(svint32_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslb_s64))) svint64_t svqdmlslb(svint64_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslb_s16))) svint16_t svqdmlslb(svint16_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslb_lane_s32))) svint32_t svqdmlslb_lane(svint32_t, svint16_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslb_lane_s64))) svint64_t svqdmlslb_lane(svint64_t, svint32_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslbt_n_s32))) svint32_t svqdmlslbt(svint32_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslbt_n_s64))) svint64_t svqdmlslbt(svint64_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslbt_n_s16))) svint16_t svqdmlslbt(svint16_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslbt_s32))) svint32_t svqdmlslbt(svint32_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslbt_s64))) svint64_t svqdmlslbt(svint64_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslbt_s16))) svint16_t svqdmlslbt(svint16_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslt_n_s32))) svint32_t svqdmlslt(svint32_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslt_n_s64))) svint64_t svqdmlslt(svint64_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslt_n_s16))) svint16_t svqdmlslt(svint16_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslt_s32))) svint32_t svqdmlslt(svint32_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslt_s64))) svint64_t svqdmlslt(svint64_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslt_s16))) svint16_t svqdmlslt(svint16_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslt_lane_s32))) svint32_t svqdmlslt_lane(svint32_t, svint16_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslt_lane_s64))) svint64_t svqdmlslt_lane(svint64_t, svint32_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_n_s8))) svint8_t svqdmulh(svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_n_s32))) svint32_t svqdmulh(svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_n_s64))) svint64_t svqdmulh(svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_n_s16))) svint16_t svqdmulh(svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_s8))) svint8_t svqdmulh(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_s32))) svint32_t svqdmulh(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_s64))) svint64_t svqdmulh(svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_s16))) svint16_t svqdmulh(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_lane_s32))) svint32_t svqdmulh_lane(svint32_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_lane_s64))) svint64_t svqdmulh_lane(svint64_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_lane_s16))) svint16_t svqdmulh_lane(svint16_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmullb_n_s32))) svint32_t svqdmullb(svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmullb_n_s64))) svint64_t svqdmullb(svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmullb_n_s16))) svint16_t svqdmullb(svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmullb_s32))) svint32_t svqdmullb(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmullb_s64))) svint64_t svqdmullb(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmullb_s16))) svint16_t svqdmullb(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmullb_lane_s32))) svint32_t svqdmullb_lane(svint16_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmullb_lane_s64))) svint64_t svqdmullb_lane(svint32_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmullt_n_s32))) svint32_t svqdmullt(svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmullt_n_s64))) svint64_t svqdmullt(svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmullt_n_s16))) svint16_t svqdmullt(svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmullt_s32))) svint32_t svqdmullt(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmullt_s64))) svint64_t svqdmullt(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmullt_s16))) svint16_t svqdmullt(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmullt_lane_s32))) svint32_t svqdmullt_lane(svint16_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmullt_lane_s64))) svint64_t svqdmullt_lane(svint32_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqneg_s8_m))) svint8_t svqneg_m(svint8_t, svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqneg_s32_m))) svint32_t svqneg_m(svint32_t, svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqneg_s64_m))) svint64_t svqneg_m(svint64_t, svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqneg_s16_m))) svint16_t svqneg_m(svint16_t, svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqneg_s8_x))) svint8_t svqneg_x(svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqneg_s32_x))) svint32_t svqneg_x(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqneg_s64_x))) svint64_t svqneg_x(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqneg_s16_x))) svint16_t svqneg_x(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqneg_s8_z))) svint8_t svqneg_z(svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqneg_s32_z))) svint32_t svqneg_z(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqneg_s64_z))) svint64_t svqneg_z(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqneg_s16_z))) svint16_t svqneg_z(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdcmlah_s8))) svint8_t svqrdcmlah(svint8_t, svint8_t, svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdcmlah_s32))) svint32_t svqrdcmlah(svint32_t, svint32_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdcmlah_s64))) svint64_t svqrdcmlah(svint64_t, svint64_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdcmlah_s16))) svint16_t svqrdcmlah(svint16_t, svint16_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdcmlah_lane_s32))) svint32_t svqrdcmlah_lane(svint32_t, svint32_t, svint32_t, uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdcmlah_lane_s16))) svint16_t svqrdcmlah_lane(svint16_t, svint16_t, svint16_t, uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlah_n_s8))) svint8_t svqrdmlah(svint8_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlah_n_s32))) svint32_t svqrdmlah(svint32_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlah_n_s64))) svint64_t svqrdmlah(svint64_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlah_n_s16))) svint16_t svqrdmlah(svint16_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlah_s8))) svint8_t svqrdmlah(svint8_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlah_s32))) svint32_t svqrdmlah(svint32_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlah_s64))) svint64_t svqrdmlah(svint64_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlah_s16))) svint16_t svqrdmlah(svint16_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlah_lane_s32))) svint32_t svqrdmlah_lane(svint32_t, svint32_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlah_lane_s64))) svint64_t svqrdmlah_lane(svint64_t, svint64_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlah_lane_s16))) svint16_t svqrdmlah_lane(svint16_t, svint16_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlsh_n_s8))) svint8_t svqrdmlsh(svint8_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlsh_n_s32))) svint32_t svqrdmlsh(svint32_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlsh_n_s64))) svint64_t svqrdmlsh(svint64_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlsh_n_s16))) svint16_t svqrdmlsh(svint16_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlsh_s8))) svint8_t svqrdmlsh(svint8_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlsh_s32))) svint32_t svqrdmlsh(svint32_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlsh_s64))) svint64_t svqrdmlsh(svint64_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlsh_s16))) svint16_t svqrdmlsh(svint16_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlsh_lane_s32))) svint32_t svqrdmlsh_lane(svint32_t, svint32_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlsh_lane_s64))) svint64_t svqrdmlsh_lane(svint64_t, svint64_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlsh_lane_s16))) svint16_t svqrdmlsh_lane(svint16_t, svint16_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmulh_n_s8))) svint8_t svqrdmulh(svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmulh_n_s32))) svint32_t svqrdmulh(svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmulh_n_s64))) svint64_t svqrdmulh(svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmulh_n_s16))) svint16_t svqrdmulh(svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmulh_s8))) svint8_t svqrdmulh(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmulh_s32))) svint32_t svqrdmulh(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmulh_s64))) svint64_t svqrdmulh(svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmulh_s16))) svint16_t svqrdmulh(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmulh_lane_s32))) svint32_t svqrdmulh_lane(svint32_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmulh_lane_s64))) svint64_t svqrdmulh_lane(svint64_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmulh_lane_s16))) svint16_t svqrdmulh_lane(svint16_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_s8_m))) svint8_t svqrshl_m(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_s32_m))) svint32_t svqrshl_m(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_s64_m))) svint64_t svqrshl_m(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_s16_m))) svint16_t svqrshl_m(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_s8_x))) svint8_t svqrshl_x(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_s32_x))) svint32_t svqrshl_x(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_s64_x))) svint64_t svqrshl_x(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_s16_x))) svint16_t svqrshl_x(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_s8_z))) svint8_t svqrshl_z(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_s32_z))) svint32_t svqrshl_z(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_s64_z))) svint64_t svqrshl_z(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_s16_z))) svint16_t svqrshl_z(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_u8_m))) svuint8_t svqrshl_m(svbool_t, svuint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_u32_m))) svuint32_t svqrshl_m(svbool_t, svuint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_u64_m))) svuint64_t svqrshl_m(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_u16_m))) svuint16_t svqrshl_m(svbool_t, svuint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_u8_x))) svuint8_t svqrshl_x(svbool_t, svuint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_u32_x))) svuint32_t svqrshl_x(svbool_t, svuint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_u64_x))) svuint64_t svqrshl_x(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_u16_x))) svuint16_t svqrshl_x(svbool_t, svuint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_u8_z))) svuint8_t svqrshl_z(svbool_t, svuint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_u32_z))) svuint32_t svqrshl_z(svbool_t, svuint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_u64_z))) svuint64_t svqrshl_z(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_u16_z))) svuint16_t svqrshl_z(svbool_t, svuint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_s8_m))) svint8_t svqrshl_m(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_s32_m))) svint32_t svqrshl_m(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_s64_m))) svint64_t svqrshl_m(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_s16_m))) svint16_t svqrshl_m(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_s8_x))) svint8_t svqrshl_x(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_s32_x))) svint32_t svqrshl_x(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_s64_x))) svint64_t svqrshl_x(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_s16_x))) svint16_t svqrshl_x(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_s8_z))) svint8_t svqrshl_z(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_s32_z))) svint32_t svqrshl_z(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_s64_z))) svint64_t svqrshl_z(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_s16_z))) svint16_t svqrshl_z(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_u8_m))) svuint8_t svqrshl_m(svbool_t, svuint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_u32_m))) svuint32_t svqrshl_m(svbool_t, svuint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_u64_m))) svuint64_t svqrshl_m(svbool_t, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_u16_m))) svuint16_t svqrshl_m(svbool_t, svuint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_u8_x))) svuint8_t svqrshl_x(svbool_t, svuint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_u32_x))) svuint32_t svqrshl_x(svbool_t, svuint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_u64_x))) svuint64_t svqrshl_x(svbool_t, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_u16_x))) svuint16_t svqrshl_x(svbool_t, svuint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_u8_z))) svuint8_t svqrshl_z(svbool_t, svuint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_u32_z))) svuint32_t svqrshl_z(svbool_t, svuint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_u64_z))) svuint64_t svqrshl_z(svbool_t, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_u16_z))) svuint16_t svqrshl_z(svbool_t, svuint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrnb_n_s32))) svint16_t svqrshrnb(svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrnb_n_s64))) svint32_t svqrshrnb(svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrnb_n_s16))) svint8_t svqrshrnb(svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrnb_n_u32))) svuint16_t svqrshrnb(svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrnb_n_u64))) svuint32_t svqrshrnb(svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrnb_n_u16))) svuint8_t svqrshrnb(svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrnt_n_s32))) svint16_t svqrshrnt(svint16_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrnt_n_s64))) svint32_t svqrshrnt(svint32_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrnt_n_s16))) svint8_t svqrshrnt(svint8_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrnt_n_u32))) svuint16_t svqrshrnt(svuint16_t, svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrnt_n_u64))) svuint32_t svqrshrnt(svuint32_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrnt_n_u16))) svuint8_t svqrshrnt(svuint8_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrunb_n_s32))) svuint16_t svqrshrunb(svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrunb_n_s64))) svuint32_t svqrshrunb(svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrunb_n_s16))) svuint8_t svqrshrunb(svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrunt_n_s32))) svuint16_t svqrshrunt(svuint16_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrunt_n_s64))) svuint32_t svqrshrunt(svuint32_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrunt_n_s16))) svuint8_t svqrshrunt(svuint8_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_s8_m))) svint8_t svqshl_m(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_s32_m))) svint32_t svqshl_m(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_s64_m))) svint64_t svqshl_m(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_s16_m))) svint16_t svqshl_m(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_s8_x))) svint8_t svqshl_x(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_s32_x))) svint32_t svqshl_x(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_s64_x))) svint64_t svqshl_x(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_s16_x))) svint16_t svqshl_x(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_s8_z))) svint8_t svqshl_z(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_s32_z))) svint32_t svqshl_z(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_s64_z))) svint64_t svqshl_z(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_s16_z))) svint16_t svqshl_z(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_u8_m))) svuint8_t svqshl_m(svbool_t, svuint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_u32_m))) svuint32_t svqshl_m(svbool_t, svuint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_u64_m))) svuint64_t svqshl_m(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_u16_m))) svuint16_t svqshl_m(svbool_t, svuint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_u8_x))) svuint8_t svqshl_x(svbool_t, svuint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_u32_x))) svuint32_t svqshl_x(svbool_t, svuint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_u64_x))) svuint64_t svqshl_x(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_u16_x))) svuint16_t svqshl_x(svbool_t, svuint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_u8_z))) svuint8_t svqshl_z(svbool_t, svuint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_u32_z))) svuint32_t svqshl_z(svbool_t, svuint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_u64_z))) svuint64_t svqshl_z(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_u16_z))) svuint16_t svqshl_z(svbool_t, svuint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_s8_m))) svint8_t svqshl_m(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_s32_m))) svint32_t svqshl_m(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_s64_m))) svint64_t svqshl_m(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_s16_m))) svint16_t svqshl_m(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_s8_x))) svint8_t svqshl_x(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_s32_x))) svint32_t svqshl_x(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_s64_x))) svint64_t svqshl_x(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_s16_x))) svint16_t svqshl_x(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_s8_z))) svint8_t svqshl_z(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_s32_z))) svint32_t svqshl_z(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_s64_z))) svint64_t svqshl_z(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_s16_z))) svint16_t svqshl_z(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_u8_m))) svuint8_t svqshl_m(svbool_t, svuint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_u32_m))) svuint32_t svqshl_m(svbool_t, svuint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_u64_m))) svuint64_t svqshl_m(svbool_t, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_u16_m))) svuint16_t svqshl_m(svbool_t, svuint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_u8_x))) svuint8_t svqshl_x(svbool_t, svuint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_u32_x))) svuint32_t svqshl_x(svbool_t, svuint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_u64_x))) svuint64_t svqshl_x(svbool_t, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_u16_x))) svuint16_t svqshl_x(svbool_t, svuint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_u8_z))) svuint8_t svqshl_z(svbool_t, svuint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_u32_z))) svuint32_t svqshl_z(svbool_t, svuint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_u64_z))) svuint64_t svqshl_z(svbool_t, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_u16_z))) svuint16_t svqshl_z(svbool_t, svuint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshlu_n_s8_m))) svuint8_t svqshlu_m(svbool_t, svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshlu_n_s32_m))) svuint32_t svqshlu_m(svbool_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshlu_n_s64_m))) svuint64_t svqshlu_m(svbool_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshlu_n_s16_m))) svuint16_t svqshlu_m(svbool_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshlu_n_s8_x))) svuint8_t svqshlu_x(svbool_t, svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshlu_n_s32_x))) svuint32_t svqshlu_x(svbool_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshlu_n_s64_x))) svuint64_t svqshlu_x(svbool_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshlu_n_s16_x))) svuint16_t svqshlu_x(svbool_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshlu_n_s8_z))) svuint8_t svqshlu_z(svbool_t, svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshlu_n_s32_z))) svuint32_t svqshlu_z(svbool_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshlu_n_s64_z))) svuint64_t svqshlu_z(svbool_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshlu_n_s16_z))) svuint16_t svqshlu_z(svbool_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshrnb_n_s32))) svint16_t svqshrnb(svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshrnb_n_s64))) svint32_t svqshrnb(svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshrnb_n_s16))) svint8_t svqshrnb(svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshrnb_n_u32))) svuint16_t svqshrnb(svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshrnb_n_u64))) svuint32_t svqshrnb(svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshrnb_n_u16))) svuint8_t svqshrnb(svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshrnt_n_s32))) svint16_t svqshrnt(svint16_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshrnt_n_s64))) svint32_t svqshrnt(svint32_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshrnt_n_s16))) svint8_t svqshrnt(svint8_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshrnt_n_u32))) svuint16_t svqshrnt(svuint16_t, svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshrnt_n_u64))) svuint32_t svqshrnt(svuint32_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshrnt_n_u16))) svuint8_t svqshrnt(svuint8_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshrunb_n_s32))) svuint16_t svqshrunb(svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshrunb_n_s64))) svuint32_t svqshrunb(svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshrunb_n_s16))) svuint8_t svqshrunb(svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshrunt_n_s32))) svuint16_t svqshrunt(svuint16_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshrunt_n_s64))) svuint32_t svqshrunt(svuint32_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshrunt_n_s16))) svuint8_t svqshrunt(svuint8_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_s8_m))) svint8_t svqsub_m(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_s32_m))) svint32_t svqsub_m(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_s64_m))) svint64_t svqsub_m(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_s16_m))) svint16_t svqsub_m(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_s8_x))) svint8_t svqsub_x(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_s32_x))) svint32_t svqsub_x(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_s64_x))) svint64_t svqsub_x(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_s16_x))) svint16_t svqsub_x(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_s8_z))) svint8_t svqsub_z(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_s32_z))) svint32_t svqsub_z(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_s64_z))) svint64_t svqsub_z(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_s16_z))) svint16_t svqsub_z(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_u8_m))) svuint8_t svqsub_m(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_u32_m))) svuint32_t svqsub_m(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_u64_m))) svuint64_t svqsub_m(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_u16_m))) svuint16_t svqsub_m(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_u8_x))) svuint8_t svqsub_x(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_u32_x))) svuint32_t svqsub_x(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_u64_x))) svuint64_t svqsub_x(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_u16_x))) svuint16_t svqsub_x(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_u8_z))) svuint8_t svqsub_z(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_u32_z))) svuint32_t svqsub_z(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_u64_z))) svuint64_t svqsub_z(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_u16_z))) svuint16_t svqsub_z(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_s8_m))) svint8_t svqsub_m(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_s32_m))) svint32_t svqsub_m(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_s64_m))) svint64_t svqsub_m(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_s16_m))) svint16_t svqsub_m(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_s8_x))) svint8_t svqsub_x(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_s32_x))) svint32_t svqsub_x(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_s64_x))) svint64_t svqsub_x(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_s16_x))) svint16_t svqsub_x(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_s8_z))) svint8_t svqsub_z(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_s32_z))) svint32_t svqsub_z(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_s64_z))) svint64_t svqsub_z(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_s16_z))) svint16_t svqsub_z(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_u8_m))) svuint8_t svqsub_m(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_u32_m))) svuint32_t svqsub_m(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_u64_m))) svuint64_t svqsub_m(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_u16_m))) svuint16_t svqsub_m(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_u8_x))) svuint8_t svqsub_x(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_u32_x))) svuint32_t svqsub_x(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_u64_x))) svuint64_t svqsub_x(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_u16_x))) svuint16_t svqsub_x(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_u8_z))) svuint8_t svqsub_z(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_u32_z))) svuint32_t svqsub_z(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_u64_z))) svuint64_t svqsub_z(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_u16_z))) svuint16_t svqsub_z(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_s8_m))) svint8_t svqsubr_m(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_s32_m))) svint32_t svqsubr_m(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_s64_m))) svint64_t svqsubr_m(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_s16_m))) svint16_t svqsubr_m(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_s8_x))) svint8_t svqsubr_x(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_s32_x))) svint32_t svqsubr_x(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_s64_x))) svint64_t svqsubr_x(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_s16_x))) svint16_t svqsubr_x(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_s8_z))) svint8_t svqsubr_z(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_s32_z))) svint32_t svqsubr_z(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_s64_z))) svint64_t svqsubr_z(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_s16_z))) svint16_t svqsubr_z(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_u8_m))) svuint8_t svqsubr_m(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_u32_m))) svuint32_t svqsubr_m(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_u64_m))) svuint64_t svqsubr_m(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_u16_m))) svuint16_t svqsubr_m(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_u8_x))) svuint8_t svqsubr_x(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_u32_x))) svuint32_t svqsubr_x(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_u64_x))) svuint64_t svqsubr_x(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_u16_x))) svuint16_t svqsubr_x(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_u8_z))) svuint8_t svqsubr_z(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_u32_z))) svuint32_t svqsubr_z(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_u64_z))) svuint64_t svqsubr_z(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_u16_z))) svuint16_t svqsubr_z(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_s8_m))) svint8_t svqsubr_m(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_s32_m))) svint32_t svqsubr_m(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_s64_m))) svint64_t svqsubr_m(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_s16_m))) svint16_t svqsubr_m(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_s8_x))) svint8_t svqsubr_x(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_s32_x))) svint32_t svqsubr_x(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_s64_x))) svint64_t svqsubr_x(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_s16_x))) svint16_t svqsubr_x(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_s8_z))) svint8_t svqsubr_z(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_s32_z))) svint32_t svqsubr_z(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_s64_z))) svint64_t svqsubr_z(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_s16_z))) svint16_t svqsubr_z(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_u8_m))) svuint8_t svqsubr_m(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_u32_m))) svuint32_t svqsubr_m(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_u64_m))) svuint64_t svqsubr_m(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_u16_m))) svuint16_t svqsubr_m(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_u8_x))) svuint8_t svqsubr_x(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_u32_x))) svuint32_t svqsubr_x(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_u64_x))) svuint64_t svqsubr_x(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_u16_x))) svuint16_t svqsubr_x(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_u8_z))) svuint8_t svqsubr_z(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_u32_z))) svuint32_t svqsubr_z(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_u64_z))) svuint64_t svqsubr_z(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_u16_z))) svuint16_t svqsubr_z(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqxtnb_s32))) svint16_t svqxtnb(svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqxtnb_s64))) svint32_t svqxtnb(svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqxtnb_s16))) svint8_t svqxtnb(svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqxtnb_u32))) svuint16_t svqxtnb(svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqxtnb_u64))) svuint32_t svqxtnb(svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqxtnb_u16))) svuint8_t svqxtnb(svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqxtnt_s32))) svint16_t svqxtnt(svint16_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqxtnt_s64))) svint32_t svqxtnt(svint32_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqxtnt_s16))) svint8_t svqxtnt(svint8_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqxtnt_u32))) svuint16_t svqxtnt(svuint16_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqxtnt_u64))) svuint32_t svqxtnt(svuint32_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqxtnt_u16))) svuint8_t svqxtnt(svuint8_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqxtunb_s32))) svuint16_t svqxtunb(svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqxtunb_s64))) svuint32_t svqxtunb(svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqxtunb_s16))) svuint8_t svqxtunb(svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqxtunt_s32))) svuint16_t svqxtunt(svuint16_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqxtunt_s64))) svuint32_t svqxtunt(svuint32_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqxtunt_s16))) svuint8_t svqxtunt(svuint8_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnb_n_u32))) svuint16_t svraddhnb(svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnb_n_u64))) svuint32_t svraddhnb(svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnb_n_u16))) svuint8_t svraddhnb(svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnb_n_s32))) svint16_t svraddhnb(svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnb_n_s64))) svint32_t svraddhnb(svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnb_n_s16))) svint8_t svraddhnb(svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnb_u32))) svuint16_t svraddhnb(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnb_u64))) svuint32_t svraddhnb(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnb_u16))) svuint8_t svraddhnb(svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnb_s32))) svint16_t svraddhnb(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnb_s64))) svint32_t svraddhnb(svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnb_s16))) svint8_t svraddhnb(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnt_n_u32))) svuint16_t svraddhnt(svuint16_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnt_n_u64))) svuint32_t svraddhnt(svuint32_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnt_n_u16))) svuint8_t svraddhnt(svuint8_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnt_n_s32))) svint16_t svraddhnt(svint16_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnt_n_s64))) svint32_t svraddhnt(svint32_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnt_n_s16))) svint8_t svraddhnt(svint8_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnt_u32))) svuint16_t svraddhnt(svuint16_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnt_u64))) svuint32_t svraddhnt(svuint32_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnt_u16))) svuint8_t svraddhnt(svuint8_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnt_s32))) svint16_t svraddhnt(svint16_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnt_s64))) svint32_t svraddhnt(svint32_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnt_s16))) svint8_t svraddhnt(svint8_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrecpe_u32_m))) svuint32_t svrecpe_m(svuint32_t, svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrecpe_u32_x))) svuint32_t svrecpe_x(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrecpe_u32_z))) svuint32_t svrecpe_z(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_s8_m))) svint8_t svrhadd_m(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_s32_m))) svint32_t svrhadd_m(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_s64_m))) svint64_t svrhadd_m(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_s16_m))) svint16_t svrhadd_m(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_s8_x))) svint8_t svrhadd_x(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_s32_x))) svint32_t svrhadd_x(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_s64_x))) svint64_t svrhadd_x(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_s16_x))) svint16_t svrhadd_x(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_s8_z))) svint8_t svrhadd_z(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_s32_z))) svint32_t svrhadd_z(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_s64_z))) svint64_t svrhadd_z(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_s16_z))) svint16_t svrhadd_z(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_u8_m))) svuint8_t svrhadd_m(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_u32_m))) svuint32_t svrhadd_m(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_u64_m))) svuint64_t svrhadd_m(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_u16_m))) svuint16_t svrhadd_m(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_u8_x))) svuint8_t svrhadd_x(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_u32_x))) svuint32_t svrhadd_x(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_u64_x))) svuint64_t svrhadd_x(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_u16_x))) svuint16_t svrhadd_x(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_u8_z))) svuint8_t svrhadd_z(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_u32_z))) svuint32_t svrhadd_z(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_u64_z))) svuint64_t svrhadd_z(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_u16_z))) svuint16_t svrhadd_z(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_s8_m))) svint8_t svrhadd_m(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_s32_m))) svint32_t svrhadd_m(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_s64_m))) svint64_t svrhadd_m(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_s16_m))) svint16_t svrhadd_m(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_s8_x))) svint8_t svrhadd_x(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_s32_x))) svint32_t svrhadd_x(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_s64_x))) svint64_t svrhadd_x(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_s16_x))) svint16_t svrhadd_x(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_s8_z))) svint8_t svrhadd_z(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_s32_z))) svint32_t svrhadd_z(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_s64_z))) svint64_t svrhadd_z(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_s16_z))) svint16_t svrhadd_z(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_u8_m))) svuint8_t svrhadd_m(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_u32_m))) svuint32_t svrhadd_m(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_u64_m))) svuint64_t svrhadd_m(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_u16_m))) svuint16_t svrhadd_m(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_u8_x))) svuint8_t svrhadd_x(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_u32_x))) svuint32_t svrhadd_x(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_u64_x))) svuint64_t svrhadd_x(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_u16_x))) svuint16_t svrhadd_x(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_u8_z))) svuint8_t svrhadd_z(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_u32_z))) svuint32_t svrhadd_z(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_u64_z))) svuint64_t svrhadd_z(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_u16_z))) svuint16_t svrhadd_z(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_s8_m))) svint8_t svrshl_m(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_s32_m))) svint32_t svrshl_m(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_s64_m))) svint64_t svrshl_m(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_s16_m))) svint16_t svrshl_m(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_s8_x))) svint8_t svrshl_x(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_s32_x))) svint32_t svrshl_x(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_s64_x))) svint64_t svrshl_x(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_s16_x))) svint16_t svrshl_x(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_s8_z))) svint8_t svrshl_z(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_s32_z))) svint32_t svrshl_z(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_s64_z))) svint64_t svrshl_z(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_s16_z))) svint16_t svrshl_z(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_u8_m))) svuint8_t svrshl_m(svbool_t, svuint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_u32_m))) svuint32_t svrshl_m(svbool_t, svuint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_u64_m))) svuint64_t svrshl_m(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_u16_m))) svuint16_t svrshl_m(svbool_t, svuint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_u8_x))) svuint8_t svrshl_x(svbool_t, svuint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_u32_x))) svuint32_t svrshl_x(svbool_t, svuint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_u64_x))) svuint64_t svrshl_x(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_u16_x))) svuint16_t svrshl_x(svbool_t, svuint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_u8_z))) svuint8_t svrshl_z(svbool_t, svuint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_u32_z))) svuint32_t svrshl_z(svbool_t, svuint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_u64_z))) svuint64_t svrshl_z(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_u16_z))) svuint16_t svrshl_z(svbool_t, svuint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s8_m))) svint8_t svrshl_m(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s32_m))) svint32_t svrshl_m(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s64_m))) svint64_t svrshl_m(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s16_m))) svint16_t svrshl_m(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s8_x))) svint8_t svrshl_x(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s32_x))) svint32_t svrshl_x(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s64_x))) svint64_t svrshl_x(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s16_x))) svint16_t svrshl_x(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s8_z))) svint8_t svrshl_z(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s32_z))) svint32_t svrshl_z(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s64_z))) svint64_t svrshl_z(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s16_z))) svint16_t svrshl_z(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u8_m))) svuint8_t svrshl_m(svbool_t, svuint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u32_m))) svuint32_t svrshl_m(svbool_t, svuint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u64_m))) svuint64_t svrshl_m(svbool_t, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u16_m))) svuint16_t svrshl_m(svbool_t, svuint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u8_x))) svuint8_t svrshl_x(svbool_t, svuint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u32_x))) svuint32_t svrshl_x(svbool_t, svuint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u64_x))) svuint64_t svrshl_x(svbool_t, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u16_x))) svuint16_t svrshl_x(svbool_t, svuint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u8_z))) svuint8_t svrshl_z(svbool_t, svuint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u32_z))) svuint32_t svrshl_z(svbool_t, svuint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u64_z))) svuint64_t svrshl_z(svbool_t, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u16_z))) svuint16_t svrshl_z(svbool_t, svuint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_s8_m))) svint8_t svrshr_m(svbool_t, svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_s32_m))) svint32_t svrshr_m(svbool_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_s64_m))) svint64_t svrshr_m(svbool_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_s16_m))) svint16_t svrshr_m(svbool_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_u8_m))) svuint8_t svrshr_m(svbool_t, svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_u32_m))) svuint32_t svrshr_m(svbool_t, svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_u64_m))) svuint64_t svrshr_m(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_u16_m))) svuint16_t svrshr_m(svbool_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_s8_x))) svint8_t svrshr_x(svbool_t, svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_s32_x))) svint32_t svrshr_x(svbool_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_s64_x))) svint64_t svrshr_x(svbool_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_s16_x))) svint16_t svrshr_x(svbool_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_u8_x))) svuint8_t svrshr_x(svbool_t, svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_u32_x))) svuint32_t svrshr_x(svbool_t, svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_u64_x))) svuint64_t svrshr_x(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_u16_x))) svuint16_t svrshr_x(svbool_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_s8_z))) svint8_t svrshr_z(svbool_t, svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_s32_z))) svint32_t svrshr_z(svbool_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_s64_z))) svint64_t svrshr_z(svbool_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_s16_z))) svint16_t svrshr_z(svbool_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_u8_z))) svuint8_t svrshr_z(svbool_t, svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_u32_z))) svuint32_t svrshr_z(svbool_t, svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_u64_z))) svuint64_t svrshr_z(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_u16_z))) svuint16_t svrshr_z(svbool_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshrnb_n_u32))) svuint16_t svrshrnb(svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshrnb_n_u64))) svuint32_t svrshrnb(svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshrnb_n_u16))) svuint8_t svrshrnb(svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshrnb_n_s32))) svint16_t svrshrnb(svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshrnb_n_s64))) svint32_t svrshrnb(svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshrnb_n_s16))) svint8_t svrshrnb(svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshrnt_n_u32))) svuint16_t svrshrnt(svuint16_t, svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshrnt_n_u64))) svuint32_t svrshrnt(svuint32_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshrnt_n_u16))) svuint8_t svrshrnt(svuint8_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshrnt_n_s32))) svint16_t svrshrnt(svint16_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshrnt_n_s64))) svint32_t svrshrnt(svint32_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshrnt_n_s16))) svint8_t svrshrnt(svint8_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsqrte_u32_m))) svuint32_t svrsqrte_m(svuint32_t, svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsqrte_u32_x))) svuint32_t svrsqrte_x(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsqrte_u32_z))) svuint32_t svrsqrte_z(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsra_n_s8))) svint8_t svrsra(svint8_t, svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsra_n_s32))) svint32_t svrsra(svint32_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsra_n_s64))) svint64_t svrsra(svint64_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsra_n_s16))) svint16_t svrsra(svint16_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsra_n_u8))) svuint8_t svrsra(svuint8_t, svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsra_n_u32))) svuint32_t svrsra(svuint32_t, svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsra_n_u64))) svuint64_t svrsra(svuint64_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsra_n_u16))) svuint16_t svrsra(svuint16_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnb_n_u32))) svuint16_t svrsubhnb(svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnb_n_u64))) svuint32_t svrsubhnb(svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnb_n_u16))) svuint8_t svrsubhnb(svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnb_n_s32))) svint16_t svrsubhnb(svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnb_n_s64))) svint32_t svrsubhnb(svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnb_n_s16))) svint8_t svrsubhnb(svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnb_u32))) svuint16_t svrsubhnb(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnb_u64))) svuint32_t svrsubhnb(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnb_u16))) svuint8_t svrsubhnb(svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnb_s32))) svint16_t svrsubhnb(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnb_s64))) svint32_t svrsubhnb(svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnb_s16))) svint8_t svrsubhnb(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnt_n_u32))) svuint16_t svrsubhnt(svuint16_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnt_n_u64))) svuint32_t svrsubhnt(svuint32_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnt_n_u16))) svuint8_t svrsubhnt(svuint8_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnt_n_s32))) svint16_t svrsubhnt(svint16_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnt_n_s64))) svint32_t svrsubhnt(svint32_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnt_n_s16))) svint8_t svrsubhnt(svint8_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnt_u32))) svuint16_t svrsubhnt(svuint16_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnt_u64))) svuint32_t svrsubhnt(svuint32_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnt_u16))) svuint8_t svrsubhnt(svuint8_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnt_s32))) svint16_t svrsubhnt(svint16_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnt_s64))) svint32_t svrsubhnt(svint32_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnt_s16))) svint8_t svrsubhnt(svint8_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsbclb_n_u32))) svuint32_t svsbclb(svuint32_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsbclb_n_u64))) svuint64_t svsbclb(svuint64_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsbclb_u32))) svuint32_t svsbclb(svuint32_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsbclb_u64))) svuint64_t svsbclb(svuint64_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsbclt_n_u32))) svuint32_t svsbclt(svuint32_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsbclt_n_u64))) svuint64_t svsbclt(svuint64_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsbclt_u32))) svuint32_t svsbclt(svuint32_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsbclt_u64))) svuint64_t svsbclt(svuint64_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshllb_n_s32))) svint32_t svshllb(svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshllb_n_s64))) svint64_t svshllb(svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshllb_n_s16))) svint16_t svshllb(svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshllb_n_u32))) svuint32_t svshllb(svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshllb_n_u64))) svuint64_t svshllb(svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshllb_n_u16))) svuint16_t svshllb(svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshllt_n_s32))) svint32_t svshllt(svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshllt_n_s64))) svint64_t svshllt(svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshllt_n_s16))) svint16_t svshllt(svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshllt_n_u32))) svuint32_t svshllt(svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshllt_n_u64))) svuint64_t svshllt(svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshllt_n_u16))) svuint16_t svshllt(svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshrnb_n_u32))) svuint16_t svshrnb(svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshrnb_n_u64))) svuint32_t svshrnb(svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshrnb_n_u16))) svuint8_t svshrnb(svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshrnb_n_s32))) svint16_t svshrnb(svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshrnb_n_s64))) svint32_t svshrnb(svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshrnb_n_s16))) svint8_t svshrnb(svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshrnt_n_u32))) svuint16_t svshrnt(svuint16_t, svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshrnt_n_u64))) svuint32_t svshrnt(svuint32_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshrnt_n_u16))) svuint8_t svshrnt(svuint8_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshrnt_n_s32))) svint16_t svshrnt(svint16_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshrnt_n_s64))) svint32_t svshrnt(svint32_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshrnt_n_s16))) svint8_t svshrnt(svint8_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsli_n_u8))) svuint8_t svsli(svuint8_t, svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsli_n_u32))) svuint32_t svsli(svuint32_t, svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsli_n_u64))) svuint64_t svsli(svuint64_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsli_n_u16))) svuint16_t svsli(svuint16_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsli_n_s8))) svint8_t svsli(svint8_t, svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsli_n_s32))) svint32_t svsli(svint32_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsli_n_s64))) svint64_t svsli(svint64_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsli_n_s16))) svint16_t svsli(svint16_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_n_u8_m))) svuint8_t svsqadd_m(svbool_t, svuint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_n_u32_m))) svuint32_t svsqadd_m(svbool_t, svuint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_n_u64_m))) svuint64_t svsqadd_m(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_n_u16_m))) svuint16_t svsqadd_m(svbool_t, svuint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_n_u8_x))) svuint8_t svsqadd_x(svbool_t, svuint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_n_u32_x))) svuint32_t svsqadd_x(svbool_t, svuint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_n_u64_x))) svuint64_t svsqadd_x(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_n_u16_x))) svuint16_t svsqadd_x(svbool_t, svuint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_n_u8_z))) svuint8_t svsqadd_z(svbool_t, svuint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_n_u32_z))) svuint32_t svsqadd_z(svbool_t, svuint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_n_u64_z))) svuint64_t svsqadd_z(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_n_u16_z))) svuint16_t svsqadd_z(svbool_t, svuint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_u8_m))) svuint8_t svsqadd_m(svbool_t, svuint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_u32_m))) svuint32_t svsqadd_m(svbool_t, svuint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_u64_m))) svuint64_t svsqadd_m(svbool_t, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_u16_m))) svuint16_t svsqadd_m(svbool_t, svuint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_u8_x))) svuint8_t svsqadd_x(svbool_t, svuint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_u32_x))) svuint32_t svsqadd_x(svbool_t, svuint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_u64_x))) svuint64_t svsqadd_x(svbool_t, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_u16_x))) svuint16_t svsqadd_x(svbool_t, svuint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_u8_z))) svuint8_t svsqadd_z(svbool_t, svuint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_u32_z))) svuint32_t svsqadd_z(svbool_t, svuint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_u64_z))) svuint64_t svsqadd_z(svbool_t, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_u16_z))) svuint16_t svsqadd_z(svbool_t, svuint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsra_n_s8))) svint8_t svsra(svint8_t, svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsra_n_s32))) svint32_t svsra(svint32_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsra_n_s64))) svint64_t svsra(svint64_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsra_n_s16))) svint16_t svsra(svint16_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsra_n_u8))) svuint8_t svsra(svuint8_t, svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsra_n_u32))) svuint32_t svsra(svuint32_t, svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsra_n_u64))) svuint64_t svsra(svuint64_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsra_n_u16))) svuint16_t svsra(svuint16_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsri_n_u8))) svuint8_t svsri(svuint8_t, svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsri_n_u32))) svuint32_t svsri(svuint32_t, svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsri_n_u64))) svuint64_t svsri(svuint64_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsri_n_u16))) svuint16_t svsri(svuint16_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsri_n_s8))) svint8_t svsri(svint8_t, svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsri_n_s32))) svint32_t svsri(svint32_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsri_n_s64))) svint64_t svsri(svint64_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsri_n_s16))) svint16_t svsri(svint16_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_index_u32))) void svstnt1_scatter_index(svbool_t, svuint32_t, int64_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_index_u64))) void svstnt1_scatter_index(svbool_t, svuint64_t, int64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_index_f64))) void svstnt1_scatter_index(svbool_t, svuint64_t, int64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_index_f32))) void svstnt1_scatter_index(svbool_t, svuint32_t, int64_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_index_s32))) void svstnt1_scatter_index(svbool_t, svuint32_t, int64_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_index_s64))) void svstnt1_scatter_index(svbool_t, svuint64_t, int64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_offset_u32))) void svstnt1_scatter_offset(svbool_t, svuint32_t, int64_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_offset_u64))) void svstnt1_scatter_offset(svbool_t, svuint64_t, int64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_offset_f64))) void svstnt1_scatter_offset(svbool_t, svuint64_t, int64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_offset_f32))) void svstnt1_scatter_offset(svbool_t, svuint32_t, int64_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_offset_s32))) void svstnt1_scatter_offset(svbool_t, svuint32_t, int64_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_offset_s64))) void svstnt1_scatter_offset(svbool_t, svuint64_t, int64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_u32))) void svstnt1_scatter(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_u64))) void svstnt1_scatter(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_f64))) void svstnt1_scatter(svbool_t, svuint64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_f32))) void svstnt1_scatter(svbool_t, svuint32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_s32))) void svstnt1_scatter(svbool_t, svuint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_s64))) void svstnt1_scatter(svbool_t, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_s64index_u64))) void svstnt1_scatter_index(svbool_t, uint64_t *, svint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_s64index_f64))) void svstnt1_scatter_index(svbool_t, float64_t *, svint64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_s64index_s64))) void svstnt1_scatter_index(svbool_t, int64_t *, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64index_u64))) void svstnt1_scatter_index(svbool_t, uint64_t *, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64index_f64))) void svstnt1_scatter_index(svbool_t, float64_t *, svuint64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64index_s64))) void svstnt1_scatter_index(svbool_t, int64_t *, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32offset_u32))) void svstnt1_scatter_offset(svbool_t, uint32_t *, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32offset_f32))) void svstnt1_scatter_offset(svbool_t, float32_t *, svuint32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32offset_s32))) void svstnt1_scatter_offset(svbool_t, int32_t *, svuint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_s64offset_u64))) void svstnt1_scatter_offset(svbool_t, uint64_t *, svint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_s64offset_f64))) void svstnt1_scatter_offset(svbool_t, float64_t *, svint64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_s64offset_s64))) void svstnt1_scatter_offset(svbool_t, int64_t *, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64offset_u64))) void svstnt1_scatter_offset(svbool_t, uint64_t *, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64offset_f64))) void svstnt1_scatter_offset(svbool_t, float64_t *, svuint64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64offset_s64))) void svstnt1_scatter_offset(svbool_t, int64_t *, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u32base_offset_u32))) void svstnt1b_scatter_offset(svbool_t, svuint32_t, int64_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u64base_offset_u64))) void svstnt1b_scatter_offset(svbool_t, svuint64_t, int64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u32base_offset_s32))) void svstnt1b_scatter_offset(svbool_t, svuint32_t, int64_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u64base_offset_s64))) void svstnt1b_scatter_offset(svbool_t, svuint64_t, int64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u32base_u32))) void svstnt1b_scatter(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u64base_u64))) void svstnt1b_scatter(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u32base_s32))) void svstnt1b_scatter(svbool_t, svuint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u64base_s64))) void svstnt1b_scatter(svbool_t, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u32offset_s32))) void svstnt1b_scatter_offset(svbool_t, int8_t *, svuint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u32offset_u32))) void svstnt1b_scatter_offset(svbool_t, uint8_t *, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_s64offset_s64))) void svstnt1b_scatter_offset(svbool_t, int8_t *, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_s64offset_u64))) void svstnt1b_scatter_offset(svbool_t, uint8_t *, svint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u64offset_s64))) void svstnt1b_scatter_offset(svbool_t, int8_t *, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u64offset_u64))) void svstnt1b_scatter_offset(svbool_t, uint8_t *, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32base_index_u32))) void svstnt1h_scatter_index(svbool_t, svuint32_t, int64_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64base_index_u64))) void svstnt1h_scatter_index(svbool_t, svuint64_t, int64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32base_index_s32))) void svstnt1h_scatter_index(svbool_t, svuint32_t, int64_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64base_index_s64))) void svstnt1h_scatter_index(svbool_t, svuint64_t, int64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32base_offset_u32))) void svstnt1h_scatter_offset(svbool_t, svuint32_t, int64_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64base_offset_u64))) void svstnt1h_scatter_offset(svbool_t, svuint64_t, int64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32base_offset_s32))) void svstnt1h_scatter_offset(svbool_t, svuint32_t, int64_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64base_offset_s64))) void svstnt1h_scatter_offset(svbool_t, svuint64_t, int64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32base_u32))) void svstnt1h_scatter(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64base_u64))) void svstnt1h_scatter(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32base_s32))) void svstnt1h_scatter(svbool_t, svuint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64base_s64))) void svstnt1h_scatter(svbool_t, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_s64index_s64))) void svstnt1h_scatter_index(svbool_t, int16_t *, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_s64index_u64))) void svstnt1h_scatter_index(svbool_t, uint16_t *, svint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64index_s64))) void svstnt1h_scatter_index(svbool_t, int16_t *, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64index_u64))) void svstnt1h_scatter_index(svbool_t, uint16_t *, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32offset_s32))) void svstnt1h_scatter_offset(svbool_t, int16_t *, svuint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32offset_u32))) void svstnt1h_scatter_offset(svbool_t, uint16_t *, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_s64offset_s64))) void svstnt1h_scatter_offset(svbool_t, int16_t *, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_s64offset_u64))) void svstnt1h_scatter_offset(svbool_t, uint16_t *, svint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64offset_s64))) void svstnt1h_scatter_offset(svbool_t, int16_t *, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64offset_u64))) void svstnt1h_scatter_offset(svbool_t, uint16_t *, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64base_index_u64))) void svstnt1w_scatter_index(svbool_t, svuint64_t, int64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64base_index_s64))) void svstnt1w_scatter_index(svbool_t, svuint64_t, int64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64base_offset_u64))) void svstnt1w_scatter_offset(svbool_t, svuint64_t, int64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64base_offset_s64))) void svstnt1w_scatter_offset(svbool_t, svuint64_t, int64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64base_u64))) void svstnt1w_scatter(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64base_s64))) void svstnt1w_scatter(svbool_t, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_s64index_s64))) void svstnt1w_scatter_index(svbool_t, int32_t *, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_s64index_u64))) void svstnt1w_scatter_index(svbool_t, uint32_t *, svint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64index_s64))) void svstnt1w_scatter_index(svbool_t, int32_t *, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64index_u64))) void svstnt1w_scatter_index(svbool_t, uint32_t *, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_s64offset_s64))) void svstnt1w_scatter_offset(svbool_t, int32_t *, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_s64offset_u64))) void svstnt1w_scatter_offset(svbool_t, uint32_t *, svint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64offset_s64))) void svstnt1w_scatter_offset(svbool_t, int32_t *, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64offset_u64))) void svstnt1w_scatter_offset(svbool_t, uint32_t *, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnb_n_u32))) svuint16_t svsubhnb(svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnb_n_u64))) svuint32_t svsubhnb(svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnb_n_u16))) svuint8_t svsubhnb(svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnb_n_s32))) svint16_t svsubhnb(svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnb_n_s64))) svint32_t svsubhnb(svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnb_n_s16))) svint8_t svsubhnb(svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnb_u32))) svuint16_t svsubhnb(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnb_u64))) svuint32_t svsubhnb(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnb_u16))) svuint8_t svsubhnb(svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnb_s32))) svint16_t svsubhnb(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnb_s64))) svint32_t svsubhnb(svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnb_s16))) svint8_t svsubhnb(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnt_n_u32))) svuint16_t svsubhnt(svuint16_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnt_n_u64))) svuint32_t svsubhnt(svuint32_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnt_n_u16))) svuint8_t svsubhnt(svuint8_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnt_n_s32))) svint16_t svsubhnt(svint16_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnt_n_s64))) svint32_t svsubhnt(svint32_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnt_n_s16))) svint8_t svsubhnt(svint8_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnt_u32))) svuint16_t svsubhnt(svuint16_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnt_u64))) svuint32_t svsubhnt(svuint32_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnt_u16))) svuint8_t svsubhnt(svuint8_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnt_s32))) svint16_t svsubhnt(svint16_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnt_s64))) svint32_t svsubhnt(svint32_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnt_s16))) svint8_t svsubhnt(svint8_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublb_n_s32))) svint32_t svsublb(svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublb_n_s64))) svint64_t svsublb(svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublb_n_s16))) svint16_t svsublb(svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublb_n_u32))) svuint32_t svsublb(svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublb_n_u64))) svuint64_t svsublb(svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublb_n_u16))) svuint16_t svsublb(svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublb_s32))) svint32_t svsublb(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublb_s64))) svint64_t svsublb(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublb_s16))) svint16_t svsublb(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublb_u32))) svuint32_t svsublb(svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublb_u64))) svuint64_t svsublb(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublb_u16))) svuint16_t svsublb(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublbt_n_s32))) svint32_t svsublbt(svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublbt_n_s64))) svint64_t svsublbt(svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublbt_n_s16))) svint16_t svsublbt(svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublbt_s32))) svint32_t svsublbt(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublbt_s64))) svint64_t svsublbt(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublbt_s16))) svint16_t svsublbt(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublt_n_s32))) svint32_t svsublt(svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublt_n_s64))) svint64_t svsublt(svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublt_n_s16))) svint16_t svsublt(svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublt_n_u32))) svuint32_t svsublt(svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublt_n_u64))) svuint64_t svsublt(svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublt_n_u16))) svuint16_t svsublt(svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublt_s32))) svint32_t svsublt(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublt_s64))) svint64_t svsublt(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublt_s16))) svint16_t svsublt(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublt_u32))) svuint32_t svsublt(svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublt_u64))) svuint64_t svsublt(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublt_u16))) svuint16_t svsublt(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubltb_n_s32))) svint32_t svsubltb(svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubltb_n_s64))) svint64_t svsubltb(svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubltb_n_s16))) svint16_t svsubltb(svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubltb_s32))) svint32_t svsubltb(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubltb_s64))) svint64_t svsubltb(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubltb_s16))) svint16_t svsubltb(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwb_n_s32))) svint32_t svsubwb(svint32_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwb_n_s64))) svint64_t svsubwb(svint64_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwb_n_s16))) svint16_t svsubwb(svint16_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwb_n_u32))) svuint32_t svsubwb(svuint32_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwb_n_u64))) svuint64_t svsubwb(svuint64_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwb_n_u16))) svuint16_t svsubwb(svuint16_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwb_s32))) svint32_t svsubwb(svint32_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwb_s64))) svint64_t svsubwb(svint64_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwb_s16))) svint16_t svsubwb(svint16_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwb_u32))) svuint32_t svsubwb(svuint32_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwb_u64))) svuint64_t svsubwb(svuint64_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwb_u16))) svuint16_t svsubwb(svuint16_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwt_n_s32))) svint32_t svsubwt(svint32_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwt_n_s64))) svint64_t svsubwt(svint64_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwt_n_s16))) svint16_t svsubwt(svint16_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwt_n_u32))) svuint32_t svsubwt(svuint32_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwt_n_u64))) svuint64_t svsubwt(svuint64_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwt_n_u16))) svuint16_t svsubwt(svuint16_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwt_s32))) svint32_t svsubwt(svint32_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwt_s64))) svint64_t svsubwt(svint64_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwt_s16))) svint16_t svsubwt(svint16_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwt_u32))) svuint32_t svsubwt(svuint32_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwt_u64))) svuint64_t svsubwt(svuint64_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwt_u16))) svuint16_t svsubwt(svuint16_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl2_u8))) svuint8_t svtbl2(svuint8x2_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl2_u32))) svuint32_t svtbl2(svuint32x2_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl2_u64))) svuint64_t svtbl2(svuint64x2_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl2_u16))) svuint16_t svtbl2(svuint16x2_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl2_s8))) svint8_t svtbl2(svint8x2_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl2_f64))) svfloat64_t svtbl2(svfloat64x2_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl2_f32))) svfloat32_t svtbl2(svfloat32x2_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl2_f16))) svfloat16_t svtbl2(svfloat16x2_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl2_s32))) svint32_t svtbl2(svint32x2_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl2_s64))) svint64_t svtbl2(svint64x2_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl2_s16))) svint16_t svtbl2(svint16x2_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbx_u8))) svuint8_t svtbx(svuint8_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbx_u32))) svuint32_t svtbx(svuint32_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbx_u64))) svuint64_t svtbx(svuint64_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbx_u16))) svuint16_t svtbx(svuint16_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbx_s8))) svint8_t svtbx(svint8_t, svint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbx_f64))) svfloat64_t svtbx(svfloat64_t, svfloat64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbx_f32))) svfloat32_t svtbx(svfloat32_t, svfloat32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbx_f16))) svfloat16_t svtbx(svfloat16_t, svfloat16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbx_s32))) svint32_t svtbx(svint32_t, svint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbx_s64))) svint64_t svtbx(svint64_t, svint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbx_s16))) svint16_t svtbx(svint16_t, svint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_n_s8_m))) svint8_t svuqadd_m(svbool_t, svint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_n_s32_m))) svint32_t svuqadd_m(svbool_t, svint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_n_s64_m))) svint64_t svuqadd_m(svbool_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_n_s16_m))) svint16_t svuqadd_m(svbool_t, svint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_n_s8_x))) svint8_t svuqadd_x(svbool_t, svint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_n_s32_x))) svint32_t svuqadd_x(svbool_t, svint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_n_s64_x))) svint64_t svuqadd_x(svbool_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_n_s16_x))) svint16_t svuqadd_x(svbool_t, svint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_n_s8_z))) svint8_t svuqadd_z(svbool_t, svint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_n_s32_z))) svint32_t svuqadd_z(svbool_t, svint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_n_s64_z))) svint64_t svuqadd_z(svbool_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_n_s16_z))) svint16_t svuqadd_z(svbool_t, svint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_s8_m))) svint8_t svuqadd_m(svbool_t, svint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_s32_m))) svint32_t svuqadd_m(svbool_t, svint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_s64_m))) svint64_t svuqadd_m(svbool_t, svint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_s16_m))) svint16_t svuqadd_m(svbool_t, svint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_s8_x))) svint8_t svuqadd_x(svbool_t, svint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_s32_x))) svint32_t svuqadd_x(svbool_t, svint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_s64_x))) svint64_t svuqadd_x(svbool_t, svint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_s16_x))) svint16_t svuqadd_x(svbool_t, svint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_s8_z))) svint8_t svuqadd_z(svbool_t, svint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_s32_z))) svint32_t svuqadd_z(svbool_t, svint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_s64_z))) svint64_t svuqadd_z(svbool_t, svint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_s16_z))) svint16_t svuqadd_z(svbool_t, svint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b8_s32))) svbool_t svwhilege_b8(int32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b32_s32))) svbool_t svwhilege_b32(int32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b64_s32))) svbool_t svwhilege_b64(int32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b16_s32))) svbool_t svwhilege_b16(int32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b8_s64))) svbool_t svwhilege_b8(int64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b32_s64))) svbool_t svwhilege_b32(int64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b64_s64))) svbool_t svwhilege_b64(int64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b16_s64))) svbool_t svwhilege_b16(int64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b8_u32))) svbool_t svwhilege_b8(uint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b32_u32))) svbool_t svwhilege_b32(uint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b64_u32))) svbool_t svwhilege_b64(uint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b16_u32))) svbool_t svwhilege_b16(uint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b8_u64))) svbool_t svwhilege_b8(uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b32_u64))) svbool_t svwhilege_b32(uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b64_u64))) svbool_t svwhilege_b64(uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b16_u64))) svbool_t svwhilege_b16(uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b8_s32))) svbool_t svwhilegt_b8(int32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b32_s32))) svbool_t svwhilegt_b32(int32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b64_s32))) svbool_t svwhilegt_b64(int32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b16_s32))) svbool_t svwhilegt_b16(int32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b8_s64))) svbool_t svwhilegt_b8(int64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b32_s64))) svbool_t svwhilegt_b32(int64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b64_s64))) svbool_t svwhilegt_b64(int64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b16_s64))) svbool_t svwhilegt_b16(int64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b8_u32))) svbool_t svwhilegt_b8(uint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b32_u32))) svbool_t svwhilegt_b32(uint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b64_u32))) svbool_t svwhilegt_b64(uint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b16_u32))) svbool_t svwhilegt_b16(uint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b8_u64))) svbool_t svwhilegt_b8(uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b32_u64))) svbool_t svwhilegt_b32(uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b64_u64))) svbool_t svwhilegt_b64(uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b16_u64))) svbool_t svwhilegt_b16(uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilerw_u8))) svbool_t svwhilerw(uint8_t const *, uint8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilerw_s8))) svbool_t svwhilerw(int8_t const *, int8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilerw_u64))) svbool_t svwhilerw(uint64_t const *, uint64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilerw_f64))) svbool_t svwhilerw(float64_t const *, float64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilerw_s64))) svbool_t svwhilerw(int64_t const *, int64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilerw_u16))) svbool_t svwhilerw(uint16_t const *, uint16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilerw_f16))) svbool_t svwhilerw(float16_t const *, float16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilerw_s16))) svbool_t svwhilerw(int16_t const *, int16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilerw_u32))) svbool_t svwhilerw(uint32_t const *, uint32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilerw_f32))) svbool_t svwhilerw(float32_t const *, float32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilerw_s32))) svbool_t svwhilerw(int32_t const *, int32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilewr_u8))) svbool_t svwhilewr(uint8_t const *, uint8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilewr_s8))) svbool_t svwhilewr(int8_t const *, int8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilewr_u64))) svbool_t svwhilewr(uint64_t const *, uint64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilewr_f64))) svbool_t svwhilewr(float64_t const *, float64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilewr_s64))) svbool_t svwhilewr(int64_t const *, int64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilewr_u16))) svbool_t svwhilewr(uint16_t const *, uint16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilewr_f16))) svbool_t svwhilewr(float16_t const *, float16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilewr_s16))) svbool_t svwhilewr(int16_t const *, int16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilewr_u32))) svbool_t svwhilewr(uint32_t const *, uint32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilewr_f32))) svbool_t svwhilewr(float32_t const *, float32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilewr_s32))) svbool_t svwhilewr(int32_t const *, int32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svxar_n_u8))) svuint8_t svxar(svuint8_t, svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svxar_n_u32))) svuint32_t svxar(svuint32_t, svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svxar_n_u64))) svuint64_t svxar(svuint64_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svxar_n_u16))) svuint16_t svxar(svuint16_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svxar_n_s8))) svint8_t svxar(svint8_t, svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svxar_n_s32))) svint32_t svxar(svint32_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svxar_n_s64))) svint64_t svxar(svint64_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svxar_n_s16))) svint16_t svxar(svint16_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl2_bf16))) svbfloat16_t svtbl2_bf16(svbfloat16x2_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbx_bf16))) svbfloat16_t svtbx_bf16(svbfloat16_t, svbfloat16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilerw_bf16))) svbool_t svwhilerw_bf16(bfloat16_t const *, bfloat16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilewr_bf16))) svbool_t svwhilewr_bf16(bfloat16_t const *, bfloat16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl2_bf16))) svbfloat16_t svtbl2(svbfloat16x2_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbx_bf16))) svbfloat16_t svtbx(svbfloat16_t, svbfloat16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilerw_bf16))) svbool_t svwhilerw(bfloat16_t const *, bfloat16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilewr_bf16))) svbool_t svwhilewr(bfloat16_t const *, bfloat16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaesd_u8))) svuint8_t svaesd_u8(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaese_u8))) svuint8_t svaese_u8(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaesimc_u8))) svuint8_t svaesimc_u8(svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaesmc_u8))) svuint8_t svaesmc_u8(svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullb_pair_n_u64))) svuint64_t svpmullb_pair_n_u64(svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullb_pair_u64))) svuint64_t svpmullb_pair_u64(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullt_pair_n_u64))) svuint64_t svpmullt_pair_n_u64(svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullt_pair_u64))) svuint64_t svpmullt_pair_u64(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaesd_u8))) svuint8_t svaesd(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaese_u8))) svuint8_t svaese(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaesimc_u8))) svuint8_t svaesimc(svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaesmc_u8))) svuint8_t svaesmc(svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullb_pair_n_u64))) svuint64_t svpmullb_pair(svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullb_pair_u64))) svuint64_t svpmullb_pair(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullt_pair_n_u64))) svuint64_t svpmullt_pair(svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullt_pair_u64))) svuint64_t svpmullt_pair(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_n_u8))) svuint8_t svbdep_n_u8(svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_n_u32))) svuint32_t svbdep_n_u32(svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_n_u64))) svuint64_t svbdep_n_u64(svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_n_u16))) svuint16_t svbdep_n_u16(svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_u8))) svuint8_t svbdep_u8(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_u32))) svuint32_t svbdep_u32(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_u64))) svuint64_t svbdep_u64(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_u16))) svuint16_t svbdep_u16(svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_n_u8))) svuint8_t svbext_n_u8(svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_n_u32))) svuint32_t svbext_n_u32(svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_n_u64))) svuint64_t svbext_n_u64(svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_n_u16))) svuint16_t svbext_n_u16(svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_u8))) svuint8_t svbext_u8(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_u32))) svuint32_t svbext_u32(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_u64))) svuint64_t svbext_u64(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_u16))) svuint16_t svbext_u16(svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_n_u8))) svuint8_t svbgrp_n_u8(svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_n_u32))) svuint32_t svbgrp_n_u32(svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_n_u64))) svuint64_t svbgrp_n_u64(svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_n_u16))) svuint16_t svbgrp_n_u16(svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_u8))) svuint8_t svbgrp_u8(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_u32))) svuint32_t svbgrp_u32(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_u64))) svuint64_t svbgrp_u64(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_u16))) svuint16_t svbgrp_u16(svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_n_u8))) svuint8_t svbdep(svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_n_u32))) svuint32_t svbdep(svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_n_u64))) svuint64_t svbdep(svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_n_u16))) svuint16_t svbdep(svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_u8))) svuint8_t svbdep(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_u32))) svuint32_t svbdep(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_u64))) svuint64_t svbdep(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_u16))) svuint16_t svbdep(svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_n_u8))) svuint8_t svbext(svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_n_u32))) svuint32_t svbext(svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_n_u64))) svuint64_t svbext(svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_n_u16))) svuint16_t svbext(svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_u8))) svuint8_t svbext(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_u32))) svuint32_t svbext(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_u64))) svuint64_t svbext(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_u16))) svuint16_t svbext(svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_n_u8))) svuint8_t svbgrp(svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_n_u32))) svuint32_t svbgrp(svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_n_u64))) svuint64_t svbgrp(svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_n_u16))) svuint16_t svbgrp(svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_u8))) svuint8_t svbgrp(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_u32))) svuint32_t svbgrp(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_u64))) svuint64_t svbgrp(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_u16))) svuint16_t svbgrp(svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrax1_u64))) svuint64_t svrax1_u64(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrax1_s64))) svint64_t svrax1_s64(svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrax1_u64))) svuint64_t svrax1(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrax1_s64))) svint64_t svrax1(svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsm4e_u32))) svuint32_t svsm4e_u32(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsm4ekey_u32))) svuint32_t svsm4ekey_u32(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsm4e_u32))) svuint32_t svsm4e(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsm4ekey_u32))) svuint32_t svsm4ekey(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_u8))) uint8x16_t svaddqv_u8(svbool_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_u32))) uint32x4_t svaddqv_u32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_u64))) uint64x2_t svaddqv_u64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_u16))) uint16x8_t svaddqv_u16(svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_s8))) int8x16_t svaddqv_s8(svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_f64))) float64x2_t svaddqv_f64(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_f32))) float32x4_t svaddqv_f32(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_f16))) float16x8_t svaddqv_f16(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_s32))) int32x4_t svaddqv_s32(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_s64))) int64x2_t svaddqv_s64(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_s16))) int16x8_t svaddqv_s16(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_u8))) uint8x16_t svandqv_u8(svbool_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_u32))) uint32x4_t svandqv_u32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_u64))) uint64x2_t svandqv_u64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_u16))) uint16x8_t svandqv_u16(svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_s8))) int8x16_t svandqv_s8(svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_s32))) int32x4_t svandqv_s32(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_s64))) int64x2_t svandqv_s64(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_s16))) int16x8_t svandqv_s16(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_u8))) svuint8_t svdup_laneq_u8(svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_s8))) svint8_t svdup_laneq_s8(svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_u64))) svuint64_t svdup_laneq_u64(svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_f64))) svfloat64_t svdup_laneq_f64(svfloat64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_s64))) svint64_t svdup_laneq_s64(svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_u16))) svuint16_t svdup_laneq_u16(svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_f16))) svfloat16_t svdup_laneq_f16(svfloat16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_s16))) svint16_t svdup_laneq_s16(svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_u32))) svuint32_t svdup_laneq_u32(svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_f32))) svfloat32_t svdup_laneq_f32(svfloat32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_s32))) svint32_t svdup_laneq_s32(svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_u8))) uint8x16_t sveorqv_u8(svbool_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_u32))) uint32x4_t sveorqv_u32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_u64))) uint64x2_t sveorqv_u64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_u16))) uint16x8_t sveorqv_u16(svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_s8))) int8x16_t sveorqv_s8(svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_s32))) int32x4_t sveorqv_s32(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_s64))) int64x2_t sveorqv_s64(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_s16))) int16x8_t sveorqv_s16(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_u8))) svuint8_t svextq_u8(svuint8_t, svuint8_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_u32))) svuint32_t svextq_u32(svuint32_t, svuint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_u64))) svuint64_t svextq_u64(svuint64_t, svuint64_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_u16))) svuint16_t svextq_u16(svuint16_t, svuint16_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_bf16))) svbfloat16_t svextq_bf16(svbfloat16_t, svbfloat16_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_s8))) svint8_t svextq_s8(svint8_t, svint8_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_f64))) svfloat64_t svextq_f64(svfloat64_t, svfloat64_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_f32))) svfloat32_t svextq_f32(svfloat32_t, svfloat32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_f16))) svfloat16_t svextq_f16(svfloat16_t, svfloat16_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_s32))) svint32_t svextq_s32(svint32_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_s64))) svint64_t svextq_s64(svint64_t, svint64_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_s16))) svint16_t svextq_s16(svint16_t, svint16_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_u32))) svuint32_t svld1q_gather_u64base_index_u32(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_u64))) svuint64_t svld1q_gather_u64base_index_u64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_u16))) svuint16_t svld1q_gather_u64base_index_u16(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_bf16))) svbfloat16_t svld1q_gather_u64base_index_bf16(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_f64))) svfloat64_t svld1q_gather_u64base_index_f64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_f32))) svfloat32_t svld1q_gather_u64base_index_f32(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_f16))) svfloat16_t svld1q_gather_u64base_index_f16(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_s32))) svint32_t svld1q_gather_u64base_index_s32(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_s64))) svint64_t svld1q_gather_u64base_index_s64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_s16))) svint16_t svld1q_gather_u64base_index_s16(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_u8))) svuint8_t svld1q_gather_u64base_offset_u8(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_u32))) svuint32_t svld1q_gather_u64base_offset_u32(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_u64))) svuint64_t svld1q_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_u16))) svuint16_t svld1q_gather_u64base_offset_u16(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_bf16))) svbfloat16_t svld1q_gather_u64base_offset_bf16(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_s8))) svint8_t svld1q_gather_u64base_offset_s8(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_f64))) svfloat64_t svld1q_gather_u64base_offset_f64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_f32))) svfloat32_t svld1q_gather_u64base_offset_f32(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_f16))) svfloat16_t svld1q_gather_u64base_offset_f16(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_s32))) svint32_t svld1q_gather_u64base_offset_s32(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_s64))) svint64_t svld1q_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_s16))) svint16_t svld1q_gather_u64base_offset_s16(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_u8))) svuint8_t svld1q_gather_u64base_u8(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_u32))) svuint32_t svld1q_gather_u64base_u32(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_u64))) svuint64_t svld1q_gather_u64base_u64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_u16))) svuint16_t svld1q_gather_u64base_u16(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_bf16))) svbfloat16_t svld1q_gather_u64base_bf16(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_s8))) svint8_t svld1q_gather_u64base_s8(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_f64))) svfloat64_t svld1q_gather_u64base_f64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_f32))) svfloat32_t svld1q_gather_u64base_f32(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_f16))) svfloat16_t svld1q_gather_u64base_f16(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_s32))) svint32_t svld1q_gather_u64base_s32(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_s64))) svint64_t svld1q_gather_u64base_s64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_s16))) svint16_t svld1q_gather_u64base_s16(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_u32))) svuint32_t svld1q_gather_u64index_u32(svbool_t, uint32_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_u64))) svuint64_t svld1q_gather_u64index_u64(svbool_t, uint64_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_u16))) svuint16_t svld1q_gather_u64index_u16(svbool_t, uint16_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_bf16))) svbfloat16_t svld1q_gather_u64index_bf16(svbool_t, bfloat16_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_f64))) svfloat64_t svld1q_gather_u64index_f64(svbool_t, float64_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_f32))) svfloat32_t svld1q_gather_u64index_f32(svbool_t, float32_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_f16))) svfloat16_t svld1q_gather_u64index_f16(svbool_t, float16_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_s32))) svint32_t svld1q_gather_u64index_s32(svbool_t, int32_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_s64))) svint64_t svld1q_gather_u64index_s64(svbool_t, int64_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_s16))) svint16_t svld1q_gather_u64index_s16(svbool_t, int16_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_u8))) svuint8_t svld1q_gather_u64offset_u8(svbool_t, uint8_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_u32))) svuint32_t svld1q_gather_u64offset_u32(svbool_t, uint32_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_u64))) svuint64_t svld1q_gather_u64offset_u64(svbool_t, uint64_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_u16))) svuint16_t svld1q_gather_u64offset_u16(svbool_t, uint16_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_bf16))) svbfloat16_t svld1q_gather_u64offset_bf16(svbool_t, bfloat16_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_s8))) svint8_t svld1q_gather_u64offset_s8(svbool_t, int8_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_f64))) svfloat64_t svld1q_gather_u64offset_f64(svbool_t, float64_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_f32))) svfloat32_t svld1q_gather_u64offset_f32(svbool_t, float32_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_f16))) svfloat16_t svld1q_gather_u64offset_f16(svbool_t, float16_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_s32))) svint32_t svld1q_gather_u64offset_s32(svbool_t, int32_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_s64))) svint64_t svld1q_gather_u64offset_s64(svbool_t, int64_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_s16))) svint16_t svld1q_gather_u64offset_s16(svbool_t, int16_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_u64))) svuint64_t svld1udq_u64(svbool_t, uint64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_f64))) svfloat64_t svld1udq_f64(svbool_t, float64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_s64))) svint64_t svld1udq_s64(svbool_t, int64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_vnum_u64))) svuint64_t svld1udq_vnum_u64(svbool_t, uint64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_vnum_f64))) svfloat64_t svld1udq_vnum_f64(svbool_t, float64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_vnum_s64))) svint64_t svld1udq_vnum_s64(svbool_t, int64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_u32))) svuint32_t svld1uwq_u32(svbool_t, uint32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_f32))) svfloat32_t svld1uwq_f32(svbool_t, float32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_s32))) svint32_t svld1uwq_s32(svbool_t, int32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_vnum_u32))) svuint32_t svld1uwq_vnum_u32(svbool_t, uint32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_vnum_f32))) svfloat32_t svld1uwq_vnum_f32(svbool_t, float32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_vnum_s32))) svint32_t svld1uwq_vnum_s32(svbool_t, int32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_u8))) svuint8x2_t svld2q_u8(svbool_t, uint8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_u32))) svuint32x2_t svld2q_u32(svbool_t, uint32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_u64))) svuint64x2_t svld2q_u64(svbool_t, uint64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_u16))) svuint16x2_t svld2q_u16(svbool_t, uint16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_s8))) svint8x2_t svld2q_s8(svbool_t, int8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_f64))) svfloat64x2_t svld2q_f64(svbool_t, float64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_f32))) svfloat32x2_t svld2q_f32(svbool_t, float32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_f16))) svfloat16x2_t svld2q_f16(svbool_t, float16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_s32))) svint32x2_t svld2q_s32(svbool_t, int32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_s64))) svint64x2_t svld2q_s64(svbool_t, int64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_s16))) svint16x2_t svld2q_s16(svbool_t, int16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_bf16))) svbfloat16x2_t svld2q_bf16(svbool_t, bfloat16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_u8))) svuint8x2_t svld2q_vnum_u8(svbool_t, uint8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_u32))) svuint32x2_t svld2q_vnum_u32(svbool_t, uint32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_u64))) svuint64x2_t svld2q_vnum_u64(svbool_t, uint64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_u16))) svuint16x2_t svld2q_vnum_u16(svbool_t, uint16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_s8))) svint8x2_t svld2q_vnum_s8(svbool_t, int8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_f64))) svfloat64x2_t svld2q_vnum_f64(svbool_t, float64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_f32))) svfloat32x2_t svld2q_vnum_f32(svbool_t, float32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_f16))) svfloat16x2_t svld2q_vnum_f16(svbool_t, float16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_s32))) svint32x2_t svld2q_vnum_s32(svbool_t, int32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_s64))) svint64x2_t svld2q_vnum_s64(svbool_t, int64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_s16))) svint16x2_t svld2q_vnum_s16(svbool_t, int16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_bf16))) svbfloat16x2_t svld2q_vnum_bf16(svbool_t, bfloat16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_u8))) svuint8x3_t svld3q_u8(svbool_t, uint8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_u32))) svuint32x3_t svld3q_u32(svbool_t, uint32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_u64))) svuint64x3_t svld3q_u64(svbool_t, uint64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_u16))) svuint16x3_t svld3q_u16(svbool_t, uint16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_s8))) svint8x3_t svld3q_s8(svbool_t, int8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_f64))) svfloat64x3_t svld3q_f64(svbool_t, float64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_f32))) svfloat32x3_t svld3q_f32(svbool_t, float32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_f16))) svfloat16x3_t svld3q_f16(svbool_t, float16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_s32))) svint32x3_t svld3q_s32(svbool_t, int32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_s64))) svint64x3_t svld3q_s64(svbool_t, int64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_s16))) svint16x3_t svld3q_s16(svbool_t, int16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_bf16))) svbfloat16x3_t svld3q_bf16(svbool_t, bfloat16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_u8))) svuint8x3_t svld3q_vnum_u8(svbool_t, uint8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_u32))) svuint32x3_t svld3q_vnum_u32(svbool_t, uint32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_u64))) svuint64x3_t svld3q_vnum_u64(svbool_t, uint64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_u16))) svuint16x3_t svld3q_vnum_u16(svbool_t, uint16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_s8))) svint8x3_t svld3q_vnum_s8(svbool_t, int8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_f64))) svfloat64x3_t svld3q_vnum_f64(svbool_t, float64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_f32))) svfloat32x3_t svld3q_vnum_f32(svbool_t, float32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_f16))) svfloat16x3_t svld3q_vnum_f16(svbool_t, float16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_s32))) svint32x3_t svld3q_vnum_s32(svbool_t, int32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_s64))) svint64x3_t svld3q_vnum_s64(svbool_t, int64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_s16))) svint16x3_t svld3q_vnum_s16(svbool_t, int16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_bf16))) svbfloat16x3_t svld3q_vnum_bf16(svbool_t, bfloat16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_u8))) svuint8x4_t svld4q_u8(svbool_t, uint8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_u32))) svuint32x4_t svld4q_u32(svbool_t, uint32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_u64))) svuint64x4_t svld4q_u64(svbool_t, uint64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_u16))) svuint16x4_t svld4q_u16(svbool_t, uint16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_s8))) svint8x4_t svld4q_s8(svbool_t, int8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_f64))) svfloat64x4_t svld4q_f64(svbool_t, float64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_f32))) svfloat32x4_t svld4q_f32(svbool_t, float32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_f16))) svfloat16x4_t svld4q_f16(svbool_t, float16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_s32))) svint32x4_t svld4q_s32(svbool_t, int32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_s64))) svint64x4_t svld4q_s64(svbool_t, int64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_s16))) svint16x4_t svld4q_s16(svbool_t, int16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_bf16))) svbfloat16x4_t svld4q_bf16(svbool_t, bfloat16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_u8))) svuint8x4_t svld4q_vnum_u8(svbool_t, uint8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_u32))) svuint32x4_t svld4q_vnum_u32(svbool_t, uint32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_u64))) svuint64x4_t svld4q_vnum_u64(svbool_t, uint64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_u16))) svuint16x4_t svld4q_vnum_u16(svbool_t, uint16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_s8))) svint8x4_t svld4q_vnum_s8(svbool_t, int8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_f64))) svfloat64x4_t svld4q_vnum_f64(svbool_t, float64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_f32))) svfloat32x4_t svld4q_vnum_f32(svbool_t, float32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_f16))) svfloat16x4_t svld4q_vnum_f16(svbool_t, float16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_s32))) svint32x4_t svld4q_vnum_s32(svbool_t, int32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_s64))) svint64x4_t svld4q_vnum_s64(svbool_t, int64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_s16))) svint16x4_t svld4q_vnum_s16(svbool_t, int16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_bf16))) svbfloat16x4_t svld4q_vnum_bf16(svbool_t, bfloat16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnmqv_f64))) float64x2_t svmaxnmqv_f64(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnmqv_f32))) float32x4_t svmaxnmqv_f32(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnmqv_f16))) float16x8_t svmaxnmqv_f16(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_f64))) float64x2_t svmaxqv_f64(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_f32))) float32x4_t svmaxqv_f32(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_f16))) float16x8_t svmaxqv_f16(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_s8))) int8x16_t svmaxqv_s8(svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_s32))) int32x4_t svmaxqv_s32(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_s64))) int64x2_t svmaxqv_s64(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_s16))) int16x8_t svmaxqv_s16(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_u8))) uint8x16_t svmaxqv_u8(svbool_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_u32))) uint32x4_t svmaxqv_u32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_u64))) uint64x2_t svmaxqv_u64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_u16))) uint16x8_t svmaxqv_u16(svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnmqv_f64))) float64x2_t svminnmqv_f64(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnmqv_f32))) float32x4_t svminnmqv_f32(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnmqv_f16))) float16x8_t svminnmqv_f16(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_f64))) float64x2_t svminqv_f64(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_f32))) float32x4_t svminqv_f32(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_f16))) float16x8_t svminqv_f16(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_s8))) int8x16_t svminqv_s8(svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_s32))) int32x4_t svminqv_s32(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_s64))) int64x2_t svminqv_s64(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_s16))) int16x8_t svminqv_s16(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_u8))) uint8x16_t svminqv_u8(svbool_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_u32))) uint32x4_t svminqv_u32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_u64))) uint64x2_t svminqv_u64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_u16))) uint16x8_t svminqv_u16(svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_u8))) uint8x16_t svorqv_u8(svbool_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_u32))) uint32x4_t svorqv_u32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_u64))) uint64x2_t svorqv_u64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_u16))) uint16x8_t svorqv_u16(svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_s8))) int8x16_t svorqv_s8(svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_s32))) int32x4_t svorqv_s32(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_s64))) int64x2_t svorqv_s64(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_s16))) int16x8_t svorqv_s16(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_u8))) svbool_t svpmov_u8(svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_s8))) svbool_t svpmov_s8(svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_u64))) svbool_t svpmov_u64(svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_s64))) svbool_t svpmov_s64(svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_u16))) svbool_t svpmov_u16(svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_s16))) svbool_t svpmov_s16(svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_u32))) svbool_t svpmov_u32(svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_s32))) svbool_t svpmov_s32(svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_u8))) svbool_t svpmov_lane_u8(svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_s8))) svbool_t svpmov_lane_s8(svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_u64))) svbool_t svpmov_lane_u64(svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_s64))) svbool_t svpmov_lane_s64(svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_u16))) svbool_t svpmov_lane_u16(svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_s16))) svbool_t svpmov_lane_s16(svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_u32))) svbool_t svpmov_lane_u32(svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_s32))) svbool_t svpmov_lane_s32(svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_u64_m))) svuint64_t svpmov_lane_u64_m(svuint64_t, svbool_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_s64_m))) svint64_t svpmov_lane_s64_m(svint64_t, svbool_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_u16_m))) svuint16_t svpmov_lane_u16_m(svuint16_t, svbool_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_s16_m))) svint16_t svpmov_lane_s16_m(svint16_t, svbool_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_u32_m))) svuint32_t svpmov_lane_u32_m(svuint32_t, svbool_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_s32_m))) svint32_t svpmov_lane_s32_m(svint32_t, svbool_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_u8_z))) svuint8_t svpmov_u8_z(svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_s8_z))) svint8_t svpmov_s8_z(svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_u64_z))) svuint64_t svpmov_u64_z(svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_s64_z))) svint64_t svpmov_s64_z(svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_u16_z))) svuint16_t svpmov_u16_z(svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_s16_z))) svint16_t svpmov_s16_z(svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_u32_z))) svuint32_t svpmov_u32_z(svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_s32_z))) svint32_t svpmov_s32_z(svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_u64))) void svst1dq_u64(svbool_t, uint64_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_f64))) void svst1dq_f64(svbool_t, float64_t const *, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_s64))) void svst1dq_s64(svbool_t, int64_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_vnum_u64))) void svst1dq_vnum_u64(svbool_t, uint64_t const *, int64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_vnum_f64))) void svst1dq_vnum_f64(svbool_t, float64_t const *, int64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_vnum_s64))) void svst1dq_vnum_s64(svbool_t, int64_t const *, int64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_u8))) void svst1q_scatter_u64base_u8(svbool_t, svuint64_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_u32))) void svst1q_scatter_u64base_u32(svbool_t, svuint64_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_u64))) void svst1q_scatter_u64base_u64(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_u16))) void svst1q_scatter_u64base_u16(svbool_t, svuint64_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_bf16))) void svst1q_scatter_u64base_bf16(svbool_t, svuint64_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_s8))) void svst1q_scatter_u64base_s8(svbool_t, svuint64_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_f64))) void svst1q_scatter_u64base_f64(svbool_t, svuint64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_f32))) void svst1q_scatter_u64base_f32(svbool_t, svuint64_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_f16))) void svst1q_scatter_u64base_f16(svbool_t, svuint64_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_s32))) void svst1q_scatter_u64base_s32(svbool_t, svuint64_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_s64))) void svst1q_scatter_u64base_s64(svbool_t, svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_s16))) void svst1q_scatter_u64base_s16(svbool_t, svuint64_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_u32))) void svst1q_scatter_u64base_index_u32(svbool_t, svuint64_t, int64_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_u64))) void svst1q_scatter_u64base_index_u64(svbool_t, svuint64_t, int64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_u16))) void svst1q_scatter_u64base_index_u16(svbool_t, svuint64_t, int64_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_bf16))) void svst1q_scatter_u64base_index_bf16(svbool_t, svuint64_t, int64_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_f64))) void svst1q_scatter_u64base_index_f64(svbool_t, svuint64_t, int64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_f32))) void svst1q_scatter_u64base_index_f32(svbool_t, svuint64_t, int64_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_f16))) void svst1q_scatter_u64base_index_f16(svbool_t, svuint64_t, int64_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_s32))) void svst1q_scatter_u64base_index_s32(svbool_t, svuint64_t, int64_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_s64))) void svst1q_scatter_u64base_index_s64(svbool_t, svuint64_t, int64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_s16))) void svst1q_scatter_u64base_index_s16(svbool_t, svuint64_t, int64_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_u8))) void svst1q_scatter_u64base_offset_u8(svbool_t, svuint64_t, int64_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_u32))) void svst1q_scatter_u64base_offset_u32(svbool_t, svuint64_t, int64_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_u64))) void svst1q_scatter_u64base_offset_u64(svbool_t, svuint64_t, int64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_u16))) void svst1q_scatter_u64base_offset_u16(svbool_t, svuint64_t, int64_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_bf16))) void svst1q_scatter_u64base_offset_bf16(svbool_t, svuint64_t, int64_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_s8))) void svst1q_scatter_u64base_offset_s8(svbool_t, svuint64_t, int64_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_f64))) void svst1q_scatter_u64base_offset_f64(svbool_t, svuint64_t, int64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_f32))) void svst1q_scatter_u64base_offset_f32(svbool_t, svuint64_t, int64_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_f16))) void svst1q_scatter_u64base_offset_f16(svbool_t, svuint64_t, int64_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_s32))) void svst1q_scatter_u64base_offset_s32(svbool_t, svuint64_t, int64_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_s64))) void svst1q_scatter_u64base_offset_s64(svbool_t, svuint64_t, int64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_s16))) void svst1q_scatter_u64base_offset_s16(svbool_t, svuint64_t, int64_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_u32))) void svst1q_scatter_u64index_u32(svbool_t, uint32_t *, svuint64_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_u64))) void svst1q_scatter_u64index_u64(svbool_t, uint64_t *, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_u16))) void svst1q_scatter_u64index_u16(svbool_t, uint16_t *, svuint64_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_bf16))) void svst1q_scatter_u64index_bf16(svbool_t, bfloat16_t *, svuint64_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_f64))) void svst1q_scatter_u64index_f64(svbool_t, float64_t *, svuint64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_f32))) void svst1q_scatter_u64index_f32(svbool_t, float32_t *, svuint64_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_f16))) void svst1q_scatter_u64index_f16(svbool_t, float16_t *, svuint64_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_s32))) void svst1q_scatter_u64index_s32(svbool_t, int32_t *, svuint64_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_s64))) void svst1q_scatter_u64index_s64(svbool_t, int64_t *, svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_s16))) void svst1q_scatter_u64index_s16(svbool_t, int16_t *, svuint64_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_u8))) void svst1q_scatter_u64offset_u8(svbool_t, uint8_t *, svuint64_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_u32))) void svst1q_scatter_u64offset_u32(svbool_t, uint32_t *, svuint64_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_u64))) void svst1q_scatter_u64offset_u64(svbool_t, uint64_t *, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_u16))) void svst1q_scatter_u64offset_u16(svbool_t, uint16_t *, svuint64_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_bf16))) void svst1q_scatter_u64offset_bf16(svbool_t, bfloat16_t *, svuint64_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_s8))) void svst1q_scatter_u64offset_s8(svbool_t, int8_t *, svuint64_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_f64))) void svst1q_scatter_u64offset_f64(svbool_t, float64_t *, svuint64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_f32))) void svst1q_scatter_u64offset_f32(svbool_t, float32_t *, svuint64_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_f16))) void svst1q_scatter_u64offset_f16(svbool_t, float16_t *, svuint64_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_s32))) void svst1q_scatter_u64offset_s32(svbool_t, int32_t *, svuint64_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_s64))) void svst1q_scatter_u64offset_s64(svbool_t, int64_t *, svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_s16))) void svst1q_scatter_u64offset_s16(svbool_t, int16_t *, svuint64_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_u32))) void svst1wq_u32(svbool_t, uint32_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_f32))) void svst1wq_f32(svbool_t, float32_t const *, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_s32))) void svst1wq_s32(svbool_t, int32_t const *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_vnum_u32))) void svst1wq_vnum_u32(svbool_t, uint32_t const *, int64_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_vnum_f32))) void svst1wq_vnum_f32(svbool_t, float32_t const *, int64_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_vnum_s32))) void svst1wq_vnum_s32(svbool_t, int32_t const *, int64_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_u8))) void svst2q_u8(svbool_t, uint8_t const *, svuint8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_u32))) void svst2q_u32(svbool_t, uint32_t const *, svuint32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_u64))) void svst2q_u64(svbool_t, uint64_t const *, svuint64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_u16))) void svst2q_u16(svbool_t, uint16_t const *, svuint16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_s8))) void svst2q_s8(svbool_t, int8_t const *, svint8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_f64))) void svst2q_f64(svbool_t, float64_t const *, svfloat64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_f32))) void svst2q_f32(svbool_t, float32_t const *, svfloat32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_f16))) void svst2q_f16(svbool_t, float16_t const *, svfloat16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_s32))) void svst2q_s32(svbool_t, int32_t const *, svint32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_s64))) void svst2q_s64(svbool_t, int64_t const *, svint64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_s16))) void svst2q_s16(svbool_t, int16_t const *, svint16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_bf16))) void svst2q_bf16(svbool_t, bfloat16_t const *, svbfloat16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_u8))) void svst2q_vnum_u8(svbool_t, uint8_t const *, int64_t, svuint8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_u32))) void svst2q_vnum_u32(svbool_t, uint32_t const *, int64_t, svuint32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_u64))) void svst2q_vnum_u64(svbool_t, uint64_t const *, int64_t, svuint64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_u16))) void svst2q_vnum_u16(svbool_t, uint16_t const *, int64_t, svuint16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_s8))) void svst2q_vnum_s8(svbool_t, int8_t const *, int64_t, svint8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_f64))) void svst2q_vnum_f64(svbool_t, float64_t const *, int64_t, svfloat64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_f32))) void svst2q_vnum_f32(svbool_t, float32_t const *, int64_t, svfloat32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_f16))) void svst2q_vnum_f16(svbool_t, float16_t const *, int64_t, svfloat16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_s32))) void svst2q_vnum_s32(svbool_t, int32_t const *, int64_t, svint32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_s64))) void svst2q_vnum_s64(svbool_t, int64_t const *, int64_t, svint64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_s16))) void svst2q_vnum_s16(svbool_t, int16_t const *, int64_t, svint16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_bf16))) void svst2q_vnum_bf16(svbool_t, bfloat16_t const *, int64_t, svbfloat16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_u8))) void svst3q_u8(svbool_t, uint8_t const *, svuint8x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_u32))) void svst3q_u32(svbool_t, uint32_t const *, svuint32x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_u64))) void svst3q_u64(svbool_t, uint64_t const *, svuint64x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_u16))) void svst3q_u16(svbool_t, uint16_t const *, svuint16x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_s8))) void svst3q_s8(svbool_t, int8_t const *, svint8x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_f64))) void svst3q_f64(svbool_t, float64_t const *, svfloat64x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_f32))) void svst3q_f32(svbool_t, float32_t const *, svfloat32x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_f16))) void svst3q_f16(svbool_t, float16_t const *, svfloat16x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_s32))) void svst3q_s32(svbool_t, int32_t const *, svint32x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_s64))) void svst3q_s64(svbool_t, int64_t const *, svint64x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_s16))) void svst3q_s16(svbool_t, int16_t const *, svint16x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_bf16))) void svst3q_bf16(svbool_t, bfloat16_t const *, svbfloat16x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_u8))) void svst3q_vnum_u8(svbool_t, uint8_t const *, int64_t, svuint8x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_u32))) void svst3q_vnum_u32(svbool_t, uint32_t const *, int64_t, svuint32x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_u64))) void svst3q_vnum_u64(svbool_t, uint64_t const *, int64_t, svuint64x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_u16))) void svst3q_vnum_u16(svbool_t, uint16_t const *, int64_t, svuint16x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_s8))) void svst3q_vnum_s8(svbool_t, int8_t const *, int64_t, svint8x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_f64))) void svst3q_vnum_f64(svbool_t, float64_t const *, int64_t, svfloat64x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_f32))) void svst3q_vnum_f32(svbool_t, float32_t const *, int64_t, svfloat32x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_f16))) void svst3q_vnum_f16(svbool_t, float16_t const *, int64_t, svfloat16x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_s32))) void svst3q_vnum_s32(svbool_t, int32_t const *, int64_t, svint32x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_s64))) void svst3q_vnum_s64(svbool_t, int64_t const *, int64_t, svint64x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_s16))) void svst3q_vnum_s16(svbool_t, int16_t const *, int64_t, svint16x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_bf16))) void svst3q_vnum_bf16(svbool_t, bfloat16_t const *, int64_t, svbfloat16x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_u8))) void svst4q_u8(svbool_t, uint8_t const *, svuint8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_u32))) void svst4q_u32(svbool_t, uint32_t const *, svuint32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_u64))) void svst4q_u64(svbool_t, uint64_t const *, svuint64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_u16))) void svst4q_u16(svbool_t, uint16_t const *, svuint16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_s8))) void svst4q_s8(svbool_t, int8_t const *, svint8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_f64))) void svst4q_f64(svbool_t, float64_t const *, svfloat64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_f32))) void svst4q_f32(svbool_t, float32_t const *, svfloat32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_f16))) void svst4q_f16(svbool_t, float16_t const *, svfloat16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_s32))) void svst4q_s32(svbool_t, int32_t const *, svint32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_s64))) void svst4q_s64(svbool_t, int64_t const *, svint64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_s16))) void svst4q_s16(svbool_t, int16_t const *, svint16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_bf16))) void svst4q_bf16(svbool_t, bfloat16_t const *, svbfloat16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_u8))) void svst4q_vnum_u8(svbool_t, uint8_t const *, int64_t, svuint8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_u32))) void svst4q_vnum_u32(svbool_t, uint32_t const *, int64_t, svuint32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_u64))) void svst4q_vnum_u64(svbool_t, uint64_t const *, int64_t, svuint64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_u16))) void svst4q_vnum_u16(svbool_t, uint16_t const *, int64_t, svuint16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_s8))) void svst4q_vnum_s8(svbool_t, int8_t const *, int64_t, svint8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_f64))) void svst4q_vnum_f64(svbool_t, float64_t const *, int64_t, svfloat64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_f32))) void svst4q_vnum_f32(svbool_t, float32_t const *, int64_t, svfloat32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_f16))) void svst4q_vnum_f16(svbool_t, float16_t const *, int64_t, svfloat16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_s32))) void svst4q_vnum_s32(svbool_t, int32_t const *, int64_t, svint32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_s64))) void svst4q_vnum_s64(svbool_t, int64_t const *, int64_t, svint64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_s16))) void svst4q_vnum_s16(svbool_t, int16_t const *, int64_t, svint16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_bf16))) void svst4q_vnum_bf16(svbool_t, bfloat16_t const *, int64_t, svbfloat16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_u8))) svuint8_t svtblq_u8(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_u32))) svuint32_t svtblq_u32(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_u64))) svuint64_t svtblq_u64(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_u16))) svuint16_t svtblq_u16(svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_bf16))) svbfloat16_t svtblq_bf16(svbfloat16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_s8))) svint8_t svtblq_s8(svint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_f64))) svfloat64_t svtblq_f64(svfloat64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_f32))) svfloat32_t svtblq_f32(svfloat32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_f16))) svfloat16_t svtblq_f16(svfloat16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_s32))) svint32_t svtblq_s32(svint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_s64))) svint64_t svtblq_s64(svint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_s16))) svint16_t svtblq_s16(svint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_u8))) svuint8_t svtbxq_u8(svuint8_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_u32))) svuint32_t svtbxq_u32(svuint32_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_u64))) svuint64_t svtbxq_u64(svuint64_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_u16))) svuint16_t svtbxq_u16(svuint16_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_bf16))) svbfloat16_t svtbxq_bf16(svbfloat16_t, svbfloat16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_s8))) svint8_t svtbxq_s8(svint8_t, svint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_f64))) svfloat64_t svtbxq_f64(svfloat64_t, svfloat64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_f32))) svfloat32_t svtbxq_f32(svfloat32_t, svfloat32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_f16))) svfloat16_t svtbxq_f16(svfloat16_t, svfloat16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_s32))) svint32_t svtbxq_s32(svint32_t, svint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_s64))) svint64_t svtbxq_s64(svint64_t, svint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_s16))) svint16_t svtbxq_s16(svint16_t, svint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_u8))) svuint8_t svuzpq1_u8(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_u32))) svuint32_t svuzpq1_u32(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_u64))) svuint64_t svuzpq1_u64(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_u16))) svuint16_t svuzpq1_u16(svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_bf16))) svbfloat16_t svuzpq1_bf16(svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_s8))) svint8_t svuzpq1_s8(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_f64))) svfloat64_t svuzpq1_f64(svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_f32))) svfloat32_t svuzpq1_f32(svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_f16))) svfloat16_t svuzpq1_f16(svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_s32))) svint32_t svuzpq1_s32(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_s64))) svint64_t svuzpq1_s64(svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_s16))) svint16_t svuzpq1_s16(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_u8))) svuint8_t svuzpq2_u8(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_u32))) svuint32_t svuzpq2_u32(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_u64))) svuint64_t svuzpq2_u64(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_u16))) svuint16_t svuzpq2_u16(svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_bf16))) svbfloat16_t svuzpq2_bf16(svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_s8))) svint8_t svuzpq2_s8(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_f64))) svfloat64_t svuzpq2_f64(svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_f32))) svfloat32_t svuzpq2_f32(svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_f16))) svfloat16_t svuzpq2_f16(svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_s32))) svint32_t svuzpq2_s32(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_s64))) svint64_t svuzpq2_s64(svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_s16))) svint16_t svuzpq2_s16(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_u8))) svuint8_t svzipq1_u8(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_u32))) svuint32_t svzipq1_u32(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_u64))) svuint64_t svzipq1_u64(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_u16))) svuint16_t svzipq1_u16(svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_bf16))) svbfloat16_t svzipq1_bf16(svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_s8))) svint8_t svzipq1_s8(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_f64))) svfloat64_t svzipq1_f64(svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_f32))) svfloat32_t svzipq1_f32(svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_f16))) svfloat16_t svzipq1_f16(svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_s32))) svint32_t svzipq1_s32(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_s64))) svint64_t svzipq1_s64(svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_s16))) svint16_t svzipq1_s16(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_u8))) svuint8_t svzipq2_u8(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_u32))) svuint32_t svzipq2_u32(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_u64))) svuint64_t svzipq2_u64(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_u16))) svuint16_t svzipq2_u16(svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_bf16))) svbfloat16_t svzipq2_bf16(svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_s8))) svint8_t svzipq2_s8(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_f64))) svfloat64_t svzipq2_f64(svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_f32))) svfloat32_t svzipq2_f32(svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_f16))) svfloat16_t svzipq2_f16(svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_s32))) svint32_t svzipq2_s32(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_s64))) svint64_t svzipq2_s64(svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_s16))) svint16_t svzipq2_s16(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_u8))) uint8x16_t svaddqv(svbool_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_u32))) uint32x4_t svaddqv(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_u64))) uint64x2_t svaddqv(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_u16))) uint16x8_t svaddqv(svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_s8))) int8x16_t svaddqv(svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_f64))) float64x2_t svaddqv(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_f32))) float32x4_t svaddqv(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_f16))) float16x8_t svaddqv(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_s32))) int32x4_t svaddqv(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_s64))) int64x2_t svaddqv(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_s16))) int16x8_t svaddqv(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_u8))) uint8x16_t svandqv(svbool_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_u32))) uint32x4_t svandqv(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_u64))) uint64x2_t svandqv(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_u16))) uint16x8_t svandqv(svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_s8))) int8x16_t svandqv(svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_s32))) int32x4_t svandqv(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_s64))) int64x2_t svandqv(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_s16))) int16x8_t svandqv(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_u8))) svuint8_t svdup_laneq(svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_s8))) svint8_t svdup_laneq(svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_u64))) svuint64_t svdup_laneq(svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_f64))) svfloat64_t svdup_laneq(svfloat64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_s64))) svint64_t svdup_laneq(svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_u16))) svuint16_t svdup_laneq(svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_f16))) svfloat16_t svdup_laneq(svfloat16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_s16))) svint16_t svdup_laneq(svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_u32))) svuint32_t svdup_laneq(svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_f32))) svfloat32_t svdup_laneq(svfloat32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_s32))) svint32_t svdup_laneq(svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_u8))) uint8x16_t sveorqv(svbool_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_u32))) uint32x4_t sveorqv(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_u64))) uint64x2_t sveorqv(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_u16))) uint16x8_t sveorqv(svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_s8))) int8x16_t sveorqv(svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_s32))) int32x4_t sveorqv(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_s64))) int64x2_t sveorqv(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_s16))) int16x8_t sveorqv(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_u8))) svuint8_t svextq(svuint8_t, svuint8_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_u32))) svuint32_t svextq(svuint32_t, svuint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_u64))) svuint64_t svextq(svuint64_t, svuint64_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_u16))) svuint16_t svextq(svuint16_t, svuint16_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_bf16))) svbfloat16_t svextq(svbfloat16_t, svbfloat16_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_s8))) svint8_t svextq(svint8_t, svint8_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_f64))) svfloat64_t svextq(svfloat64_t, svfloat64_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_f32))) svfloat32_t svextq(svfloat32_t, svfloat32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_f16))) svfloat16_t svextq(svfloat16_t, svfloat16_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_s32))) svint32_t svextq(svint32_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_s64))) svint64_t svextq(svint64_t, svint64_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_s16))) svint16_t svextq(svint16_t, svint16_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_u32))) svuint32_t svld1q_gather_index_u32(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_u64))) svuint64_t svld1q_gather_index_u64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_u16))) svuint16_t svld1q_gather_index_u16(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_bf16))) svbfloat16_t svld1q_gather_index_bf16(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_f64))) svfloat64_t svld1q_gather_index_f64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_f32))) svfloat32_t svld1q_gather_index_f32(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_f16))) svfloat16_t svld1q_gather_index_f16(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_s32))) svint32_t svld1q_gather_index_s32(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_s64))) svint64_t svld1q_gather_index_s64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_s16))) svint16_t svld1q_gather_index_s16(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_u8))) svuint8_t svld1q_gather_offset_u8(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_u32))) svuint32_t svld1q_gather_offset_u32(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_u64))) svuint64_t svld1q_gather_offset_u64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_u16))) svuint16_t svld1q_gather_offset_u16(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_bf16))) svbfloat16_t svld1q_gather_offset_bf16(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_s8))) svint8_t svld1q_gather_offset_s8(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_f64))) svfloat64_t svld1q_gather_offset_f64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_f32))) svfloat32_t svld1q_gather_offset_f32(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_f16))) svfloat16_t svld1q_gather_offset_f16(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_s32))) svint32_t svld1q_gather_offset_s32(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_s64))) svint64_t svld1q_gather_offset_s64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_s16))) svint16_t svld1q_gather_offset_s16(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_u8))) svuint8_t svld1q_gather_u8(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_u32))) svuint32_t svld1q_gather_u32(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_u64))) svuint64_t svld1q_gather_u64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_u16))) svuint16_t svld1q_gather_u16(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_bf16))) svbfloat16_t svld1q_gather_bf16(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_s8))) svint8_t svld1q_gather_s8(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_f64))) svfloat64_t svld1q_gather_f64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_f32))) svfloat32_t svld1q_gather_f32(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_f16))) svfloat16_t svld1q_gather_f16(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_s32))) svint32_t svld1q_gather_s32(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_s64))) svint64_t svld1q_gather_s64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_s16))) svint16_t svld1q_gather_s16(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_u32))) svuint32_t svld1q_gather_index(svbool_t, uint32_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_u64))) svuint64_t svld1q_gather_index(svbool_t, uint64_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_u16))) svuint16_t svld1q_gather_index(svbool_t, uint16_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_bf16))) svbfloat16_t svld1q_gather_index(svbool_t, bfloat16_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_f64))) svfloat64_t svld1q_gather_index(svbool_t, float64_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_f32))) svfloat32_t svld1q_gather_index(svbool_t, float32_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_f16))) svfloat16_t svld1q_gather_index(svbool_t, float16_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_s32))) svint32_t svld1q_gather_index(svbool_t, int32_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_s64))) svint64_t svld1q_gather_index(svbool_t, int64_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_s16))) svint16_t svld1q_gather_index(svbool_t, int16_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_u8))) svuint8_t svld1q_gather_offset(svbool_t, uint8_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_u32))) svuint32_t svld1q_gather_offset(svbool_t, uint32_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_u64))) svuint64_t svld1q_gather_offset(svbool_t, uint64_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_u16))) svuint16_t svld1q_gather_offset(svbool_t, uint16_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_bf16))) svbfloat16_t svld1q_gather_offset(svbool_t, bfloat16_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_s8))) svint8_t svld1q_gather_offset(svbool_t, int8_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_f64))) svfloat64_t svld1q_gather_offset(svbool_t, float64_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_f32))) svfloat32_t svld1q_gather_offset(svbool_t, float32_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_f16))) svfloat16_t svld1q_gather_offset(svbool_t, float16_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_s32))) svint32_t svld1q_gather_offset(svbool_t, int32_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_s64))) svint64_t svld1q_gather_offset(svbool_t, int64_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_s16))) svint16_t svld1q_gather_offset(svbool_t, int16_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_u64))) svuint64_t svld1udq(svbool_t, uint64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_f64))) svfloat64_t svld1udq(svbool_t, float64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_s64))) svint64_t svld1udq(svbool_t, int64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_vnum_u64))) svuint64_t svld1udq_vnum(svbool_t, uint64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_vnum_f64))) svfloat64_t svld1udq_vnum(svbool_t, float64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_vnum_s64))) svint64_t svld1udq_vnum(svbool_t, int64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_u32))) svuint32_t svld1uwq(svbool_t, uint32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_f32))) svfloat32_t svld1uwq(svbool_t, float32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_s32))) svint32_t svld1uwq(svbool_t, int32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_vnum_u32))) svuint32_t svld1uwq_vnum(svbool_t, uint32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_vnum_f32))) svfloat32_t svld1uwq_vnum(svbool_t, float32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_vnum_s32))) svint32_t svld1uwq_vnum(svbool_t, int32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_u8))) svuint8x2_t svld2q(svbool_t, uint8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_u32))) svuint32x2_t svld2q(svbool_t, uint32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_u64))) svuint64x2_t svld2q(svbool_t, uint64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_u16))) svuint16x2_t svld2q(svbool_t, uint16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_s8))) svint8x2_t svld2q(svbool_t, int8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_f64))) svfloat64x2_t svld2q(svbool_t, float64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_f32))) svfloat32x2_t svld2q(svbool_t, float32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_f16))) svfloat16x2_t svld2q(svbool_t, float16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_s32))) svint32x2_t svld2q(svbool_t, int32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_s64))) svint64x2_t svld2q(svbool_t, int64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_s16))) svint16x2_t svld2q(svbool_t, int16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_bf16))) svbfloat16x2_t svld2q(svbool_t, bfloat16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_u8))) svuint8x2_t svld2q_vnum(svbool_t, uint8_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_u32))) svuint32x2_t svld2q_vnum(svbool_t, uint32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_u64))) svuint64x2_t svld2q_vnum(svbool_t, uint64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_u16))) svuint16x2_t svld2q_vnum(svbool_t, uint16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_s8))) svint8x2_t svld2q_vnum(svbool_t, int8_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_f64))) svfloat64x2_t svld2q_vnum(svbool_t, float64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_f32))) svfloat32x2_t svld2q_vnum(svbool_t, float32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_f16))) svfloat16x2_t svld2q_vnum(svbool_t, float16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_s32))) svint32x2_t svld2q_vnum(svbool_t, int32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_s64))) svint64x2_t svld2q_vnum(svbool_t, int64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_s16))) svint16x2_t svld2q_vnum(svbool_t, int16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_bf16))) svbfloat16x2_t svld2q_vnum(svbool_t, bfloat16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_u8))) svuint8x3_t svld3q(svbool_t, uint8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_u32))) svuint32x3_t svld3q(svbool_t, uint32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_u64))) svuint64x3_t svld3q(svbool_t, uint64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_u16))) svuint16x3_t svld3q(svbool_t, uint16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_s8))) svint8x3_t svld3q(svbool_t, int8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_f64))) svfloat64x3_t svld3q(svbool_t, float64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_f32))) svfloat32x3_t svld3q(svbool_t, float32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_f16))) svfloat16x3_t svld3q(svbool_t, float16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_s32))) svint32x3_t svld3q(svbool_t, int32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_s64))) svint64x3_t svld3q(svbool_t, int64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_s16))) svint16x3_t svld3q(svbool_t, int16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_bf16))) svbfloat16x3_t svld3q(svbool_t, bfloat16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_u8))) svuint8x3_t svld3q_vnum(svbool_t, uint8_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_u32))) svuint32x3_t svld3q_vnum(svbool_t, uint32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_u64))) svuint64x3_t svld3q_vnum(svbool_t, uint64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_u16))) svuint16x3_t svld3q_vnum(svbool_t, uint16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_s8))) svint8x3_t svld3q_vnum(svbool_t, int8_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_f64))) svfloat64x3_t svld3q_vnum(svbool_t, float64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_f32))) svfloat32x3_t svld3q_vnum(svbool_t, float32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_f16))) svfloat16x3_t svld3q_vnum(svbool_t, float16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_s32))) svint32x3_t svld3q_vnum(svbool_t, int32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_s64))) svint64x3_t svld3q_vnum(svbool_t, int64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_s16))) svint16x3_t svld3q_vnum(svbool_t, int16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_bf16))) svbfloat16x3_t svld3q_vnum(svbool_t, bfloat16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_u8))) svuint8x4_t svld4q(svbool_t, uint8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_u32))) svuint32x4_t svld4q(svbool_t, uint32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_u64))) svuint64x4_t svld4q(svbool_t, uint64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_u16))) svuint16x4_t svld4q(svbool_t, uint16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_s8))) svint8x4_t svld4q(svbool_t, int8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_f64))) svfloat64x4_t svld4q(svbool_t, float64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_f32))) svfloat32x4_t svld4q(svbool_t, float32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_f16))) svfloat16x4_t svld4q(svbool_t, float16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_s32))) svint32x4_t svld4q(svbool_t, int32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_s64))) svint64x4_t svld4q(svbool_t, int64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_s16))) svint16x4_t svld4q(svbool_t, int16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_bf16))) svbfloat16x4_t svld4q(svbool_t, bfloat16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_u8))) svuint8x4_t svld4q_vnum(svbool_t, uint8_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_u32))) svuint32x4_t svld4q_vnum(svbool_t, uint32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_u64))) svuint64x4_t svld4q_vnum(svbool_t, uint64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_u16))) svuint16x4_t svld4q_vnum(svbool_t, uint16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_s8))) svint8x4_t svld4q_vnum(svbool_t, int8_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_f64))) svfloat64x4_t svld4q_vnum(svbool_t, float64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_f32))) svfloat32x4_t svld4q_vnum(svbool_t, float32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_f16))) svfloat16x4_t svld4q_vnum(svbool_t, float16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_s32))) svint32x4_t svld4q_vnum(svbool_t, int32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_s64))) svint64x4_t svld4q_vnum(svbool_t, int64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_s16))) svint16x4_t svld4q_vnum(svbool_t, int16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_bf16))) svbfloat16x4_t svld4q_vnum(svbool_t, bfloat16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnmqv_f64))) float64x2_t svmaxnmqv(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnmqv_f32))) float32x4_t svmaxnmqv(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnmqv_f16))) float16x8_t svmaxnmqv(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_f64))) float64x2_t svmaxqv(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_f32))) float32x4_t svmaxqv(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_f16))) float16x8_t svmaxqv(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_s8))) int8x16_t svmaxqv(svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_s32))) int32x4_t svmaxqv(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_s64))) int64x2_t svmaxqv(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_s16))) int16x8_t svmaxqv(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_u8))) uint8x16_t svmaxqv(svbool_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_u32))) uint32x4_t svmaxqv(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_u64))) uint64x2_t svmaxqv(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_u16))) uint16x8_t svmaxqv(svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnmqv_f64))) float64x2_t svminnmqv(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnmqv_f32))) float32x4_t svminnmqv(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnmqv_f16))) float16x8_t svminnmqv(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_f64))) float64x2_t svminqv(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_f32))) float32x4_t svminqv(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_f16))) float16x8_t svminqv(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_s8))) int8x16_t svminqv(svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_s32))) int32x4_t svminqv(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_s64))) int64x2_t svminqv(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_s16))) int16x8_t svminqv(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_u8))) uint8x16_t svminqv(svbool_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_u32))) uint32x4_t svminqv(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_u64))) uint64x2_t svminqv(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_u16))) uint16x8_t svminqv(svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_u8))) uint8x16_t svorqv(svbool_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_u32))) uint32x4_t svorqv(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_u64))) uint64x2_t svorqv(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_u16))) uint16x8_t svorqv(svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_s8))) int8x16_t svorqv(svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_s32))) int32x4_t svorqv(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_s64))) int64x2_t svorqv(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_s16))) int16x8_t svorqv(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_u8))) svbool_t svpmov(svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_s8))) svbool_t svpmov(svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_u64))) svbool_t svpmov(svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_s64))) svbool_t svpmov(svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_u16))) svbool_t svpmov(svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_s16))) svbool_t svpmov(svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_u32))) svbool_t svpmov(svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_s32))) svbool_t svpmov(svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_u8))) svbool_t svpmov_lane(svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_s8))) svbool_t svpmov_lane(svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_u64))) svbool_t svpmov_lane(svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_s64))) svbool_t svpmov_lane(svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_u16))) svbool_t svpmov_lane(svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_s16))) svbool_t svpmov_lane(svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_u32))) svbool_t svpmov_lane(svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_s32))) svbool_t svpmov_lane(svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_u64_m))) svuint64_t svpmov_lane_m(svuint64_t, svbool_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_s64_m))) svint64_t svpmov_lane_m(svint64_t, svbool_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_u16_m))) svuint16_t svpmov_lane_m(svuint16_t, svbool_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_s16_m))) svint16_t svpmov_lane_m(svint16_t, svbool_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_u32_m))) svuint32_t svpmov_lane_m(svuint32_t, svbool_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_s32_m))) svint32_t svpmov_lane_m(svint32_t, svbool_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_u64))) void svst1dq(svbool_t, uint64_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_f64))) void svst1dq(svbool_t, float64_t const *, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_s64))) void svst1dq(svbool_t, int64_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_vnum_u64))) void svst1dq_vnum(svbool_t, uint64_t const *, int64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_vnum_f64))) void svst1dq_vnum(svbool_t, float64_t const *, int64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_vnum_s64))) void svst1dq_vnum(svbool_t, int64_t const *, int64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_u8))) void svst1q_scatter(svbool_t, svuint64_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_u32))) void svst1q_scatter(svbool_t, svuint64_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_u64))) void svst1q_scatter(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_u16))) void svst1q_scatter(svbool_t, svuint64_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_bf16))) void svst1q_scatter(svbool_t, svuint64_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_s8))) void svst1q_scatter(svbool_t, svuint64_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_f64))) void svst1q_scatter(svbool_t, svuint64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_f32))) void svst1q_scatter(svbool_t, svuint64_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_f16))) void svst1q_scatter(svbool_t, svuint64_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_s32))) void svst1q_scatter(svbool_t, svuint64_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_s64))) void svst1q_scatter(svbool_t, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_s16))) void svst1q_scatter(svbool_t, svuint64_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_u32))) void svst1q_scatter_index(svbool_t, svuint64_t, int64_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_u64))) void svst1q_scatter_index(svbool_t, svuint64_t, int64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_u16))) void svst1q_scatter_index(svbool_t, svuint64_t, int64_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_bf16))) void svst1q_scatter_index(svbool_t, svuint64_t, int64_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_f64))) void svst1q_scatter_index(svbool_t, svuint64_t, int64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_f32))) void svst1q_scatter_index(svbool_t, svuint64_t, int64_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_f16))) void svst1q_scatter_index(svbool_t, svuint64_t, int64_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_s32))) void svst1q_scatter_index(svbool_t, svuint64_t, int64_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_s64))) void svst1q_scatter_index(svbool_t, svuint64_t, int64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_s16))) void svst1q_scatter_index(svbool_t, svuint64_t, int64_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_u8))) void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_u32))) void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_u64))) void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_u16))) void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_bf16))) void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_s8))) void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_f64))) void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_f32))) void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_f16))) void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_s32))) void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_s64))) void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_s16))) void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_u32))) void svst1q_scatter_index(svbool_t, uint32_t *, svuint64_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_u64))) void svst1q_scatter_index(svbool_t, uint64_t *, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_u16))) void svst1q_scatter_index(svbool_t, uint16_t *, svuint64_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_bf16))) void svst1q_scatter_index(svbool_t, bfloat16_t *, svuint64_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_f64))) void svst1q_scatter_index(svbool_t, float64_t *, svuint64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_f32))) void svst1q_scatter_index(svbool_t, float32_t *, svuint64_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_f16))) void svst1q_scatter_index(svbool_t, float16_t *, svuint64_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_s32))) void svst1q_scatter_index(svbool_t, int32_t *, svuint64_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_s64))) void svst1q_scatter_index(svbool_t, int64_t *, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_s16))) void svst1q_scatter_index(svbool_t, int16_t *, svuint64_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_u8))) void svst1q_scatter_offset(svbool_t, uint8_t *, svuint64_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_u32))) void svst1q_scatter_offset(svbool_t, uint32_t *, svuint64_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_u64))) void svst1q_scatter_offset(svbool_t, uint64_t *, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_u16))) void svst1q_scatter_offset(svbool_t, uint16_t *, svuint64_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_bf16))) void svst1q_scatter_offset(svbool_t, bfloat16_t *, svuint64_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_s8))) void svst1q_scatter_offset(svbool_t, int8_t *, svuint64_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_f64))) void svst1q_scatter_offset(svbool_t, float64_t *, svuint64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_f32))) void svst1q_scatter_offset(svbool_t, float32_t *, svuint64_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_f16))) void svst1q_scatter_offset(svbool_t, float16_t *, svuint64_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_s32))) void svst1q_scatter_offset(svbool_t, int32_t *, svuint64_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_s64))) void svst1q_scatter_offset(svbool_t, int64_t *, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_s16))) void svst1q_scatter_offset(svbool_t, int16_t *, svuint64_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_u32))) void svst1wq(svbool_t, uint32_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_f32))) void svst1wq(svbool_t, float32_t const *, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_s32))) void svst1wq(svbool_t, int32_t const *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_vnum_u32))) void svst1wq_vnum(svbool_t, uint32_t const *, int64_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_vnum_f32))) void svst1wq_vnum(svbool_t, float32_t const *, int64_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_vnum_s32))) void svst1wq_vnum(svbool_t, int32_t const *, int64_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_u8))) void svst2q(svbool_t, uint8_t const *, svuint8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_u32))) void svst2q(svbool_t, uint32_t const *, svuint32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_u64))) void svst2q(svbool_t, uint64_t const *, svuint64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_u16))) void svst2q(svbool_t, uint16_t const *, svuint16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_s8))) void svst2q(svbool_t, int8_t const *, svint8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_f64))) void svst2q(svbool_t, float64_t const *, svfloat64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_f32))) void svst2q(svbool_t, float32_t const *, svfloat32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_f16))) void svst2q(svbool_t, float16_t const *, svfloat16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_s32))) void svst2q(svbool_t, int32_t const *, svint32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_s64))) void svst2q(svbool_t, int64_t const *, svint64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_s16))) void svst2q(svbool_t, int16_t const *, svint16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_bf16))) void svst2q(svbool_t, bfloat16_t const *, svbfloat16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_u8))) void svst2q_vnum(svbool_t, uint8_t const *, int64_t, svuint8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_u32))) void svst2q_vnum(svbool_t, uint32_t const *, int64_t, svuint32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_u64))) void svst2q_vnum(svbool_t, uint64_t const *, int64_t, svuint64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_u16))) void svst2q_vnum(svbool_t, uint16_t const *, int64_t, svuint16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_s8))) void svst2q_vnum(svbool_t, int8_t const *, int64_t, svint8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_f64))) void svst2q_vnum(svbool_t, float64_t const *, int64_t, svfloat64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_f32))) void svst2q_vnum(svbool_t, float32_t const *, int64_t, svfloat32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_f16))) void svst2q_vnum(svbool_t, float16_t const *, int64_t, svfloat16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_s32))) void svst2q_vnum(svbool_t, int32_t const *, int64_t, svint32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_s64))) void svst2q_vnum(svbool_t, int64_t const *, int64_t, svint64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_s16))) void svst2q_vnum(svbool_t, int16_t const *, int64_t, svint16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_bf16))) void svst2q_vnum(svbool_t, bfloat16_t const *, int64_t, svbfloat16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_u8))) void svst3q(svbool_t, uint8_t const *, svuint8x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_u32))) void svst3q(svbool_t, uint32_t const *, svuint32x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_u64))) void svst3q(svbool_t, uint64_t const *, svuint64x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_u16))) void svst3q(svbool_t, uint16_t const *, svuint16x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_s8))) void svst3q(svbool_t, int8_t const *, svint8x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_f64))) void svst3q(svbool_t, float64_t const *, svfloat64x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_f32))) void svst3q(svbool_t, float32_t const *, svfloat32x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_f16))) void svst3q(svbool_t, float16_t const *, svfloat16x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_s32))) void svst3q(svbool_t, int32_t const *, svint32x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_s64))) void svst3q(svbool_t, int64_t const *, svint64x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_s16))) void svst3q(svbool_t, int16_t const *, svint16x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_bf16))) void svst3q(svbool_t, bfloat16_t const *, svbfloat16x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_u8))) void svst3q_vnum(svbool_t, uint8_t const *, int64_t, svuint8x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_u32))) void svst3q_vnum(svbool_t, uint32_t const *, int64_t, svuint32x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_u64))) void svst3q_vnum(svbool_t, uint64_t const *, int64_t, svuint64x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_u16))) void svst3q_vnum(svbool_t, uint16_t const *, int64_t, svuint16x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_s8))) void svst3q_vnum(svbool_t, int8_t const *, int64_t, svint8x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_f64))) void svst3q_vnum(svbool_t, float64_t const *, int64_t, svfloat64x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_f32))) void svst3q_vnum(svbool_t, float32_t const *, int64_t, svfloat32x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_f16))) void svst3q_vnum(svbool_t, float16_t const *, int64_t, svfloat16x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_s32))) void svst3q_vnum(svbool_t, int32_t const *, int64_t, svint32x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_s64))) void svst3q_vnum(svbool_t, int64_t const *, int64_t, svint64x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_s16))) void svst3q_vnum(svbool_t, int16_t const *, int64_t, svint16x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_bf16))) void svst3q_vnum(svbool_t, bfloat16_t const *, int64_t, svbfloat16x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_u8))) void svst4q(svbool_t, uint8_t const *, svuint8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_u32))) void svst4q(svbool_t, uint32_t const *, svuint32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_u64))) void svst4q(svbool_t, uint64_t const *, svuint64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_u16))) void svst4q(svbool_t, uint16_t const *, svuint16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_s8))) void svst4q(svbool_t, int8_t const *, svint8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_f64))) void svst4q(svbool_t, float64_t const *, svfloat64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_f32))) void svst4q(svbool_t, float32_t const *, svfloat32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_f16))) void svst4q(svbool_t, float16_t const *, svfloat16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_s32))) void svst4q(svbool_t, int32_t const *, svint32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_s64))) void svst4q(svbool_t, int64_t const *, svint64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_s16))) void svst4q(svbool_t, int16_t const *, svint16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_bf16))) void svst4q(svbool_t, bfloat16_t const *, svbfloat16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_u8))) void svst4q_vnum(svbool_t, uint8_t const *, int64_t, svuint8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_u32))) void svst4q_vnum(svbool_t, uint32_t const *, int64_t, svuint32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_u64))) void svst4q_vnum(svbool_t, uint64_t const *, int64_t, svuint64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_u16))) void svst4q_vnum(svbool_t, uint16_t const *, int64_t, svuint16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_s8))) void svst4q_vnum(svbool_t, int8_t const *, int64_t, svint8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_f64))) void svst4q_vnum(svbool_t, float64_t const *, int64_t, svfloat64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_f32))) void svst4q_vnum(svbool_t, float32_t const *, int64_t, svfloat32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_f16))) void svst4q_vnum(svbool_t, float16_t const *, int64_t, svfloat16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_s32))) void svst4q_vnum(svbool_t, int32_t const *, int64_t, svint32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_s64))) void svst4q_vnum(svbool_t, int64_t const *, int64_t, svint64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_s16))) void svst4q_vnum(svbool_t, int16_t const *, int64_t, svint16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_bf16))) void svst4q_vnum(svbool_t, bfloat16_t const *, int64_t, svbfloat16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_u8))) svuint8_t svtblq(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_u32))) svuint32_t svtblq(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_u64))) svuint64_t svtblq(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_u16))) svuint16_t svtblq(svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_bf16))) svbfloat16_t svtblq(svbfloat16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_s8))) svint8_t svtblq(svint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_f64))) svfloat64_t svtblq(svfloat64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_f32))) svfloat32_t svtblq(svfloat32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_f16))) svfloat16_t svtblq(svfloat16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_s32))) svint32_t svtblq(svint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_s64))) svint64_t svtblq(svint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_s16))) svint16_t svtblq(svint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_u8))) svuint8_t svtbxq(svuint8_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_u32))) svuint32_t svtbxq(svuint32_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_u64))) svuint64_t svtbxq(svuint64_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_u16))) svuint16_t svtbxq(svuint16_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_bf16))) svbfloat16_t svtbxq(svbfloat16_t, svbfloat16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_s8))) svint8_t svtbxq(svint8_t, svint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_f64))) svfloat64_t svtbxq(svfloat64_t, svfloat64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_f32))) svfloat32_t svtbxq(svfloat32_t, svfloat32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_f16))) svfloat16_t svtbxq(svfloat16_t, svfloat16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_s32))) svint32_t svtbxq(svint32_t, svint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_s64))) svint64_t svtbxq(svint64_t, svint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_s16))) svint16_t svtbxq(svint16_t, svint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_u8))) svuint8_t svuzpq1(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_u32))) svuint32_t svuzpq1(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_u64))) svuint64_t svuzpq1(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_u16))) svuint16_t svuzpq1(svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_bf16))) svbfloat16_t svuzpq1(svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_s8))) svint8_t svuzpq1(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_f64))) svfloat64_t svuzpq1(svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_f32))) svfloat32_t svuzpq1(svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_f16))) svfloat16_t svuzpq1(svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_s32))) svint32_t svuzpq1(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_s64))) svint64_t svuzpq1(svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_s16))) svint16_t svuzpq1(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_u8))) svuint8_t svuzpq2(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_u32))) svuint32_t svuzpq2(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_u64))) svuint64_t svuzpq2(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_u16))) svuint16_t svuzpq2(svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_bf16))) svbfloat16_t svuzpq2(svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_s8))) svint8_t svuzpq2(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_f64))) svfloat64_t svuzpq2(svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_f32))) svfloat32_t svuzpq2(svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_f16))) svfloat16_t svuzpq2(svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_s32))) svint32_t svuzpq2(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_s64))) svint64_t svuzpq2(svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_s16))) svint16_t svuzpq2(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_u8))) svuint8_t svzipq1(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_u32))) svuint32_t svzipq1(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_u64))) svuint64_t svzipq1(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_u16))) svuint16_t svzipq1(svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_bf16))) svbfloat16_t svzipq1(svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_s8))) svint8_t svzipq1(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_f64))) svfloat64_t svzipq1(svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_f32))) svfloat32_t svzipq1(svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_f16))) svfloat16_t svzipq1(svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_s32))) svint32_t svzipq1(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_s64))) svint64_t svzipq1(svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_s16))) svint16_t svzipq1(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_u8))) svuint8_t svzipq2(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_u32))) svuint32_t svzipq2(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_u64))) svuint64_t svzipq2(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_u16))) svuint16_t svzipq2(svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_bf16))) svbfloat16_t svzipq2(svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_s8))) svint8_t svzipq2(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_f64))) svfloat64_t svzipq2(svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_f32))) svfloat32_t svzipq2(svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_f16))) svfloat16_t svzipq2(svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_s32))) svint32_t svzipq2(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_s64))) svint64_t svzipq2(svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_s16))) svint16_t svzipq2(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpsel_lane_b16))) svbool_t svpsel_lane_b16(svbool_t, svbool_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpsel_lane_b32))) svbool_t svpsel_lane_b32(svbool_t, svbool_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpsel_lane_b64))) svbool_t svpsel_lane_b64(svbool_t, svbool_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpsel_lane_b8))) svbool_t svpsel_lane_b8(svbool_t, svbool_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlslb_f32))) svfloat32_t svbfmlslb_f32(svfloat32_t, svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlslb_lane_f32))) svfloat32_t svbfmlslb_lane_f32(svfloat32_t, svbfloat16_t, svbfloat16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlslt_f32))) svfloat32_t svbfmlslt_f32(svfloat32_t, svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlslt_lane_f32))) svfloat32_t svbfmlslt_lane_f32(svfloat32_t, svbfloat16_t, svbfloat16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_f64))) svfloat64_t svclamp_f64(svfloat64_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_f32))) svfloat32_t svclamp_f32(svfloat32_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_f16))) svfloat16_t svclamp_f16(svfloat16_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_s8))) svint8_t svclamp_s8(svint8_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_s32))) svint32_t svclamp_s32(svint32_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_s64))) svint64_t svclamp_s64(svint64_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_s16))) svint16_t svclamp_s16(svint16_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_u8))) svuint8_t svclamp_u8(svuint8_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_u32))) svuint32_t svclamp_u32(svuint32_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_u64))) svuint64_t svclamp_u64(svuint64_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_u16))) svuint16_t svclamp_u16(svuint16_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcntp_c8))) uint64_t svcntp_c8(svcount_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcntp_c32))) uint64_t svcntp_c32(svcount_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcntp_c64))) uint64_t svcntp_c64(svcount_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcntp_c16))) uint64_t svcntp_c16(svcount_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_b))) svboolx2_t svcreate2_b(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_b))) svboolx4_t svcreate4_b(svbool_t, svbool_t, svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_f32_f16))) svfloat32_t svdot_f32_f16(svfloat32_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_s32_s16))) svint32_t svdot_s32_s16(svint32_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_u32_u16))) svuint32_t svdot_u32_u16(svuint32_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_lane_f32_f16))) svfloat32_t svdot_lane_f32_f16(svfloat32_t, svfloat16_t, svfloat16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_lane_s32_s16))) svint32_t svdot_lane_s32_s16(svint32_t, svint16_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_lane_u32_u16))) svuint32_t svdot_lane_u32_u16(svuint32_t, svuint16_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_b))) svbool_t svget2_b(svboolx2_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_b))) svbool_t svget4_b(svboolx4_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u8_x2))) svuint8x2_t svld1_u8_x2(svcount_t, uint8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s8_x2))) svint8x2_t svld1_s8_x2(svcount_t, int8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u64_x2))) svuint64x2_t svld1_u64_x2(svcount_t, uint64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f64_x2))) svfloat64x2_t svld1_f64_x2(svcount_t, float64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s64_x2))) svint64x2_t svld1_s64_x2(svcount_t, int64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u16_x2))) svuint16x2_t svld1_u16_x2(svcount_t, uint16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_bf16_x2))) svbfloat16x2_t svld1_bf16_x2(svcount_t, bfloat16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f16_x2))) svfloat16x2_t svld1_f16_x2(svcount_t, float16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s16_x2))) svint16x2_t svld1_s16_x2(svcount_t, int16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u32_x2))) svuint32x2_t svld1_u32_x2(svcount_t, uint32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f32_x2))) svfloat32x2_t svld1_f32_x2(svcount_t, float32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s32_x2))) svint32x2_t svld1_s32_x2(svcount_t, int32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u8_x4))) svuint8x4_t svld1_u8_x4(svcount_t, uint8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s8_x4))) svint8x4_t svld1_s8_x4(svcount_t, int8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u64_x4))) svuint64x4_t svld1_u64_x4(svcount_t, uint64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f64_x4))) svfloat64x4_t svld1_f64_x4(svcount_t, float64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s64_x4))) svint64x4_t svld1_s64_x4(svcount_t, int64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u16_x4))) svuint16x4_t svld1_u16_x4(svcount_t, uint16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_bf16_x4))) svbfloat16x4_t svld1_bf16_x4(svcount_t, bfloat16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f16_x4))) svfloat16x4_t svld1_f16_x4(svcount_t, float16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s16_x4))) svint16x4_t svld1_s16_x4(svcount_t, int16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u32_x4))) svuint32x4_t svld1_u32_x4(svcount_t, uint32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f32_x4))) svfloat32x4_t svld1_f32_x4(svcount_t, float32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s32_x4))) svint32x4_t svld1_s32_x4(svcount_t, int32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u8_x2))) svuint8x2_t svld1_vnum_u8_x2(svcount_t, uint8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s8_x2))) svint8x2_t svld1_vnum_s8_x2(svcount_t, int8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u64_x2))) svuint64x2_t svld1_vnum_u64_x2(svcount_t, uint64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f64_x2))) svfloat64x2_t svld1_vnum_f64_x2(svcount_t, float64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s64_x2))) svint64x2_t svld1_vnum_s64_x2(svcount_t, int64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u16_x2))) svuint16x2_t svld1_vnum_u16_x2(svcount_t, uint16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_bf16_x2))) svbfloat16x2_t svld1_vnum_bf16_x2(svcount_t, bfloat16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f16_x2))) svfloat16x2_t svld1_vnum_f16_x2(svcount_t, float16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s16_x2))) svint16x2_t svld1_vnum_s16_x2(svcount_t, int16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u32_x2))) svuint32x2_t svld1_vnum_u32_x2(svcount_t, uint32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f32_x2))) svfloat32x2_t svld1_vnum_f32_x2(svcount_t, float32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s32_x2))) svint32x2_t svld1_vnum_s32_x2(svcount_t, int32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u8_x4))) svuint8x4_t svld1_vnum_u8_x4(svcount_t, uint8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s8_x4))) svint8x4_t svld1_vnum_s8_x4(svcount_t, int8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u64_x4))) svuint64x4_t svld1_vnum_u64_x4(svcount_t, uint64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f64_x4))) svfloat64x4_t svld1_vnum_f64_x4(svcount_t, float64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s64_x4))) svint64x4_t svld1_vnum_s64_x4(svcount_t, int64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u16_x4))) svuint16x4_t svld1_vnum_u16_x4(svcount_t, uint16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_bf16_x4))) svbfloat16x4_t svld1_vnum_bf16_x4(svcount_t, bfloat16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f16_x4))) svfloat16x4_t svld1_vnum_f16_x4(svcount_t, float16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s16_x4))) svint16x4_t svld1_vnum_s16_x4(svcount_t, int16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u32_x4))) svuint32x4_t svld1_vnum_u32_x4(svcount_t, uint32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f32_x4))) svfloat32x4_t svld1_vnum_f32_x4(svcount_t, float32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s32_x4))) svint32x4_t svld1_vnum_s32_x4(svcount_t, int32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u8_x2))) svuint8x2_t svldnt1_u8_x2(svcount_t, uint8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s8_x2))) svint8x2_t svldnt1_s8_x2(svcount_t, int8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u64_x2))) svuint64x2_t svldnt1_u64_x2(svcount_t, uint64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f64_x2))) svfloat64x2_t svldnt1_f64_x2(svcount_t, float64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s64_x2))) svint64x2_t svldnt1_s64_x2(svcount_t, int64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u16_x2))) svuint16x2_t svldnt1_u16_x2(svcount_t, uint16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_bf16_x2))) svbfloat16x2_t svldnt1_bf16_x2(svcount_t, bfloat16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f16_x2))) svfloat16x2_t svldnt1_f16_x2(svcount_t, float16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s16_x2))) svint16x2_t svldnt1_s16_x2(svcount_t, int16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u32_x2))) svuint32x2_t svldnt1_u32_x2(svcount_t, uint32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f32_x2))) svfloat32x2_t svldnt1_f32_x2(svcount_t, float32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s32_x2))) svint32x2_t svldnt1_s32_x2(svcount_t, int32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u8_x4))) svuint8x4_t svldnt1_u8_x4(svcount_t, uint8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s8_x4))) svint8x4_t svldnt1_s8_x4(svcount_t, int8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u64_x4))) svuint64x4_t svldnt1_u64_x4(svcount_t, uint64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f64_x4))) svfloat64x4_t svldnt1_f64_x4(svcount_t, float64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s64_x4))) svint64x4_t svldnt1_s64_x4(svcount_t, int64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u16_x4))) svuint16x4_t svldnt1_u16_x4(svcount_t, uint16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_bf16_x4))) svbfloat16x4_t svldnt1_bf16_x4(svcount_t, bfloat16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f16_x4))) svfloat16x4_t svldnt1_f16_x4(svcount_t, float16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s16_x4))) svint16x4_t svldnt1_s16_x4(svcount_t, int16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u32_x4))) svuint32x4_t svldnt1_u32_x4(svcount_t, uint32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f32_x4))) svfloat32x4_t svldnt1_f32_x4(svcount_t, float32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s32_x4))) svint32x4_t svldnt1_s32_x4(svcount_t, int32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u8_x2))) svuint8x2_t svldnt1_vnum_u8_x2(svcount_t, uint8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s8_x2))) svint8x2_t svldnt1_vnum_s8_x2(svcount_t, int8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u64_x2))) svuint64x2_t svldnt1_vnum_u64_x2(svcount_t, uint64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f64_x2))) svfloat64x2_t svldnt1_vnum_f64_x2(svcount_t, float64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s64_x2))) svint64x2_t svldnt1_vnum_s64_x2(svcount_t, int64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u16_x2))) svuint16x2_t svldnt1_vnum_u16_x2(svcount_t, uint16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_bf16_x2))) svbfloat16x2_t svldnt1_vnum_bf16_x2(svcount_t, bfloat16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f16_x2))) svfloat16x2_t svldnt1_vnum_f16_x2(svcount_t, float16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s16_x2))) svint16x2_t svldnt1_vnum_s16_x2(svcount_t, int16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u32_x2))) svuint32x2_t svldnt1_vnum_u32_x2(svcount_t, uint32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f32_x2))) svfloat32x2_t svldnt1_vnum_f32_x2(svcount_t, float32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s32_x2))) svint32x2_t svldnt1_vnum_s32_x2(svcount_t, int32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u8_x4))) svuint8x4_t svldnt1_vnum_u8_x4(svcount_t, uint8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s8_x4))) svint8x4_t svldnt1_vnum_s8_x4(svcount_t, int8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u64_x4))) svuint64x4_t svldnt1_vnum_u64_x4(svcount_t, uint64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f64_x4))) svfloat64x4_t svldnt1_vnum_f64_x4(svcount_t, float64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s64_x4))) svint64x4_t svldnt1_vnum_s64_x4(svcount_t, int64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u16_x4))) svuint16x4_t svldnt1_vnum_u16_x4(svcount_t, uint16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_bf16_x4))) svbfloat16x4_t svldnt1_vnum_bf16_x4(svcount_t, bfloat16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f16_x4))) svfloat16x4_t svldnt1_vnum_f16_x4(svcount_t, float16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s16_x4))) svint16x4_t svldnt1_vnum_s16_x4(svcount_t, int16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u32_x4))) svuint32x4_t svldnt1_vnum_u32_x4(svcount_t, uint32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f32_x4))) svfloat32x4_t svldnt1_vnum_f32_x4(svcount_t, float32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s32_x4))) svint32x4_t svldnt1_vnum_s32_x4(svcount_t, int32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpext_lane_c8))) svbool_t svpext_lane_c8(svcount_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpext_lane_c32))) svbool_t svpext_lane_c32(svcount_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpext_lane_c64))) svbool_t svpext_lane_c64(svcount_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpext_lane_c16))) svbool_t svpext_lane_c16(svcount_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpext_lane_c8_x2))) svboolx2_t svpext_lane_c8_x2(svcount_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpext_lane_c32_x2))) svboolx2_t svpext_lane_c32_x2(svcount_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpext_lane_c64_x2))) svboolx2_t svpext_lane_c64_x2(svcount_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpext_lane_c16_x2))) svboolx2_t svpext_lane_c16_x2(svcount_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpfalse_c))) svcount_t svpfalse_c(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpsel_lane_c16))) svcount_t svpsel_lane_c16(svcount_t, svbool_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpsel_lane_c32))) svcount_t svpsel_lane_c32(svcount_t, svbool_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpsel_lane_c64))) svcount_t svpsel_lane_c64(svcount_t, svbool_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpsel_lane_c8))) svcount_t svpsel_lane_c8(svcount_t, svbool_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svptrue_c8))) svcount_t svptrue_c8(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svptrue_c32))) svcount_t svptrue_c32(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svptrue_c64))) svcount_t svptrue_c64(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svptrue_c16))) svcount_t svptrue_c16(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrn_n_s16_s32_x2))) svint16_t svqrshrn_n_s16_s32_x2(svint32x2_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrn_n_u16_u32_x2))) svuint16_t svqrshrn_n_u16_u32_x2(svuint32x2_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrun_n_u16_s32_x2))) svuint16_t svqrshrun_n_u16_s32_x2(svint32x2_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u8_m))) svuint8_t svrevd_u8_m(svuint8_t, svbool_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u32_m))) svuint32_t svrevd_u32_m(svuint32_t, svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u64_m))) svuint64_t svrevd_u64_m(svuint64_t, svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u16_m))) svuint16_t svrevd_u16_m(svuint16_t, svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_bf16_m))) svbfloat16_t svrevd_bf16_m(svbfloat16_t, svbool_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s8_m))) svint8_t svrevd_s8_m(svint8_t, svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f64_m))) svfloat64_t svrevd_f64_m(svfloat64_t, svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f32_m))) svfloat32_t svrevd_f32_m(svfloat32_t, svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f16_m))) svfloat16_t svrevd_f16_m(svfloat16_t, svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s32_m))) svint32_t svrevd_s32_m(svint32_t, svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s64_m))) svint64_t svrevd_s64_m(svint64_t, svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s16_m))) svint16_t svrevd_s16_m(svint16_t, svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u8_x))) svuint8_t svrevd_u8_x(svbool_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u32_x))) svuint32_t svrevd_u32_x(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u64_x))) svuint64_t svrevd_u64_x(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u16_x))) svuint16_t svrevd_u16_x(svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_bf16_x))) svbfloat16_t svrevd_bf16_x(svbool_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s8_x))) svint8_t svrevd_s8_x(svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f64_x))) svfloat64_t svrevd_f64_x(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f32_x))) svfloat32_t svrevd_f32_x(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f16_x))) svfloat16_t svrevd_f16_x(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s32_x))) svint32_t svrevd_s32_x(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s64_x))) svint64_t svrevd_s64_x(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s16_x))) svint16_t svrevd_s16_x(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u8_z))) svuint8_t svrevd_u8_z(svbool_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u32_z))) svuint32_t svrevd_u32_z(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u64_z))) svuint64_t svrevd_u64_z(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u16_z))) svuint16_t svrevd_u16_z(svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_bf16_z))) svbfloat16_t svrevd_bf16_z(svbool_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s8_z))) svint8_t svrevd_s8_z(svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f64_z))) svfloat64_t svrevd_f64_z(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f32_z))) svfloat32_t svrevd_f32_z(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f16_z))) svfloat16_t svrevd_f16_z(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s32_z))) svint32_t svrevd_s32_z(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s64_z))) svint64_t svrevd_s64_z(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s16_z))) svint16_t svrevd_s16_z(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_b))) svboolx2_t svset2_b(svboolx2_t, uint64_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_b))) svboolx4_t svset4_b(svboolx4_t, uint64_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u8_x2))) void svst1_u8_x2(svcount_t, uint8_t *, svuint8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s8_x2))) void svst1_s8_x2(svcount_t, int8_t *, svint8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u64_x2))) void svst1_u64_x2(svcount_t, uint64_t *, svuint64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f64_x2))) void svst1_f64_x2(svcount_t, float64_t *, svfloat64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s64_x2))) void svst1_s64_x2(svcount_t, int64_t *, svint64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u16_x2))) void svst1_u16_x2(svcount_t, uint16_t *, svuint16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_bf16_x2))) void svst1_bf16_x2(svcount_t, bfloat16_t *, svbfloat16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f16_x2))) void svst1_f16_x2(svcount_t, float16_t *, svfloat16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s16_x2))) void svst1_s16_x2(svcount_t, int16_t *, svint16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u32_x2))) void svst1_u32_x2(svcount_t, uint32_t *, svuint32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f32_x2))) void svst1_f32_x2(svcount_t, float32_t *, svfloat32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s32_x2))) void svst1_s32_x2(svcount_t, int32_t *, svint32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u8_x4))) void svst1_u8_x4(svcount_t, uint8_t *, svuint8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s8_x4))) void svst1_s8_x4(svcount_t, int8_t *, svint8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u64_x4))) void svst1_u64_x4(svcount_t, uint64_t *, svuint64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f64_x4))) void svst1_f64_x4(svcount_t, float64_t *, svfloat64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s64_x4))) void svst1_s64_x4(svcount_t, int64_t *, svint64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u16_x4))) void svst1_u16_x4(svcount_t, uint16_t *, svuint16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_bf16_x4))) void svst1_bf16_x4(svcount_t, bfloat16_t *, svbfloat16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f16_x4))) void svst1_f16_x4(svcount_t, float16_t *, svfloat16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s16_x4))) void svst1_s16_x4(svcount_t, int16_t *, svint16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u32_x4))) void svst1_u32_x4(svcount_t, uint32_t *, svuint32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f32_x4))) void svst1_f32_x4(svcount_t, float32_t *, svfloat32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s32_x4))) void svst1_s32_x4(svcount_t, int32_t *, svint32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u8_x2))) void svst1_vnum_u8_x2(svcount_t, uint8_t *, int64_t, svuint8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s8_x2))) void svst1_vnum_s8_x2(svcount_t, int8_t *, int64_t, svint8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u64_x2))) void svst1_vnum_u64_x2(svcount_t, uint64_t *, int64_t, svuint64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f64_x2))) void svst1_vnum_f64_x2(svcount_t, float64_t *, int64_t, svfloat64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s64_x2))) void svst1_vnum_s64_x2(svcount_t, int64_t *, int64_t, svint64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u16_x2))) void svst1_vnum_u16_x2(svcount_t, uint16_t *, int64_t, svuint16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_bf16_x2))) void svst1_vnum_bf16_x2(svcount_t, bfloat16_t *, int64_t, svbfloat16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f16_x2))) void svst1_vnum_f16_x2(svcount_t, float16_t *, int64_t, svfloat16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s16_x2))) void svst1_vnum_s16_x2(svcount_t, int16_t *, int64_t, svint16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u32_x2))) void svst1_vnum_u32_x2(svcount_t, uint32_t *, int64_t, svuint32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f32_x2))) void svst1_vnum_f32_x2(svcount_t, float32_t *, int64_t, svfloat32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s32_x2))) void svst1_vnum_s32_x2(svcount_t, int32_t *, int64_t, svint32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u8_x4))) void svst1_vnum_u8_x4(svcount_t, uint8_t *, int64_t, svuint8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s8_x4))) void svst1_vnum_s8_x4(svcount_t, int8_t *, int64_t, svint8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u64_x4))) void svst1_vnum_u64_x4(svcount_t, uint64_t *, int64_t, svuint64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f64_x4))) void svst1_vnum_f64_x4(svcount_t, float64_t *, int64_t, svfloat64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s64_x4))) void svst1_vnum_s64_x4(svcount_t, int64_t *, int64_t, svint64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u16_x4))) void svst1_vnum_u16_x4(svcount_t, uint16_t *, int64_t, svuint16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_bf16_x4))) void svst1_vnum_bf16_x4(svcount_t, bfloat16_t *, int64_t, svbfloat16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f16_x4))) void svst1_vnum_f16_x4(svcount_t, float16_t *, int64_t, svfloat16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s16_x4))) void svst1_vnum_s16_x4(svcount_t, int16_t *, int64_t, svint16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u32_x4))) void svst1_vnum_u32_x4(svcount_t, uint32_t *, int64_t, svuint32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f32_x4))) void svst1_vnum_f32_x4(svcount_t, float32_t *, int64_t, svfloat32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s32_x4))) void svst1_vnum_s32_x4(svcount_t, int32_t *, int64_t, svint32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u8_x2))) void svstnt1_u8_x2(svcount_t, uint8_t *, svuint8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s8_x2))) void svstnt1_s8_x2(svcount_t, int8_t *, svint8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u64_x2))) void svstnt1_u64_x2(svcount_t, uint64_t *, svuint64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f64_x2))) void svstnt1_f64_x2(svcount_t, float64_t *, svfloat64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s64_x2))) void svstnt1_s64_x2(svcount_t, int64_t *, svint64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u16_x2))) void svstnt1_u16_x2(svcount_t, uint16_t *, svuint16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_bf16_x2))) void svstnt1_bf16_x2(svcount_t, bfloat16_t *, svbfloat16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f16_x2))) void svstnt1_f16_x2(svcount_t, float16_t *, svfloat16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s16_x2))) void svstnt1_s16_x2(svcount_t, int16_t *, svint16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u32_x2))) void svstnt1_u32_x2(svcount_t, uint32_t *, svuint32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f32_x2))) void svstnt1_f32_x2(svcount_t, float32_t *, svfloat32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s32_x2))) void svstnt1_s32_x2(svcount_t, int32_t *, svint32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u8_x4))) void svstnt1_u8_x4(svcount_t, uint8_t *, svuint8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s8_x4))) void svstnt1_s8_x4(svcount_t, int8_t *, svint8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u64_x4))) void svstnt1_u64_x4(svcount_t, uint64_t *, svuint64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f64_x4))) void svstnt1_f64_x4(svcount_t, float64_t *, svfloat64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s64_x4))) void svstnt1_s64_x4(svcount_t, int64_t *, svint64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u16_x4))) void svstnt1_u16_x4(svcount_t, uint16_t *, svuint16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_bf16_x4))) void svstnt1_bf16_x4(svcount_t, bfloat16_t *, svbfloat16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f16_x4))) void svstnt1_f16_x4(svcount_t, float16_t *, svfloat16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s16_x4))) void svstnt1_s16_x4(svcount_t, int16_t *, svint16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u32_x4))) void svstnt1_u32_x4(svcount_t, uint32_t *, svuint32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f32_x4))) void svstnt1_f32_x4(svcount_t, float32_t *, svfloat32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s32_x4))) void svstnt1_s32_x4(svcount_t, int32_t *, svint32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u8_x2))) void svstnt1_vnum_u8_x2(svcount_t, uint8_t *, int64_t, svuint8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s8_x2))) void svstnt1_vnum_s8_x2(svcount_t, int8_t *, int64_t, svint8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u64_x2))) void svstnt1_vnum_u64_x2(svcount_t, uint64_t *, int64_t, svuint64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f64_x2))) void svstnt1_vnum_f64_x2(svcount_t, float64_t *, int64_t, svfloat64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s64_x2))) void svstnt1_vnum_s64_x2(svcount_t, int64_t *, int64_t, svint64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u16_x2))) void svstnt1_vnum_u16_x2(svcount_t, uint16_t *, int64_t, svuint16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_bf16_x2))) void svstnt1_vnum_bf16_x2(svcount_t, bfloat16_t *, int64_t, svbfloat16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f16_x2))) void svstnt1_vnum_f16_x2(svcount_t, float16_t *, int64_t, svfloat16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s16_x2))) void svstnt1_vnum_s16_x2(svcount_t, int16_t *, int64_t, svint16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u32_x2))) void svstnt1_vnum_u32_x2(svcount_t, uint32_t *, int64_t, svuint32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f32_x2))) void svstnt1_vnum_f32_x2(svcount_t, float32_t *, int64_t, svfloat32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s32_x2))) void svstnt1_vnum_s32_x2(svcount_t, int32_t *, int64_t, svint32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u8_x4))) void svstnt1_vnum_u8_x4(svcount_t, uint8_t *, int64_t, svuint8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s8_x4))) void svstnt1_vnum_s8_x4(svcount_t, int8_t *, int64_t, svint8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u64_x4))) void svstnt1_vnum_u64_x4(svcount_t, uint64_t *, int64_t, svuint64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f64_x4))) void svstnt1_vnum_f64_x4(svcount_t, float64_t *, int64_t, svfloat64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s64_x4))) void svstnt1_vnum_s64_x4(svcount_t, int64_t *, int64_t, svint64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u16_x4))) void svstnt1_vnum_u16_x4(svcount_t, uint16_t *, int64_t, svuint16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_bf16_x4))) void svstnt1_vnum_bf16_x4(svcount_t, bfloat16_t *, int64_t, svbfloat16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f16_x4))) void svstnt1_vnum_f16_x4(svcount_t, float16_t *, int64_t, svfloat16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s16_x4))) void svstnt1_vnum_s16_x4(svcount_t, int16_t *, int64_t, svint16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u32_x4))) void svstnt1_vnum_u32_x4(svcount_t, uint32_t *, int64_t, svuint32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f32_x4))) void svstnt1_vnum_f32_x4(svcount_t, float32_t *, int64_t, svfloat32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s32_x4))) void svstnt1_vnum_s32_x4(svcount_t, int32_t *, int64_t, svint32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef2_b))) svboolx2_t svundef2_b(); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef4_b))) svboolx4_t svundef4_b(); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c8_s64))) svcount_t svwhilege_c8_s64(int64_t, int64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c32_s64))) svcount_t svwhilege_c32_s64(int64_t, int64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c64_s64))) svcount_t svwhilege_c64_s64(int64_t, int64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c16_s64))) svcount_t svwhilege_c16_s64(int64_t, int64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c8_u64))) svcount_t svwhilege_c8_u64(uint64_t, uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c32_u64))) svcount_t svwhilege_c32_u64(uint64_t, uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c64_u64))) svcount_t svwhilege_c64_u64(uint64_t, uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c16_u64))) svcount_t svwhilege_c16_u64(uint64_t, uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b8_s64_x2))) svboolx2_t svwhilege_b8_s64_x2(int64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b32_s64_x2))) svboolx2_t svwhilege_b32_s64_x2(int64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b64_s64_x2))) svboolx2_t svwhilege_b64_s64_x2(int64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b16_s64_x2))) svboolx2_t svwhilege_b16_s64_x2(int64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b8_u64_x2))) svboolx2_t svwhilege_b8_u64_x2(uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b32_u64_x2))) svboolx2_t svwhilege_b32_u64_x2(uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b64_u64_x2))) svboolx2_t svwhilege_b64_u64_x2(uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b16_u64_x2))) svboolx2_t svwhilege_b16_u64_x2(uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c8_s64))) svcount_t svwhilegt_c8_s64(int64_t, int64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c32_s64))) svcount_t svwhilegt_c32_s64(int64_t, int64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c64_s64))) svcount_t svwhilegt_c64_s64(int64_t, int64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c16_s64))) svcount_t svwhilegt_c16_s64(int64_t, int64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c8_u64))) svcount_t svwhilegt_c8_u64(uint64_t, uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c32_u64))) svcount_t svwhilegt_c32_u64(uint64_t, uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c64_u64))) svcount_t svwhilegt_c64_u64(uint64_t, uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c16_u64))) svcount_t svwhilegt_c16_u64(uint64_t, uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b8_s64_x2))) svboolx2_t svwhilegt_b8_s64_x2(int64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b32_s64_x2))) svboolx2_t svwhilegt_b32_s64_x2(int64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b64_s64_x2))) svboolx2_t svwhilegt_b64_s64_x2(int64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b16_s64_x2))) svboolx2_t svwhilegt_b16_s64_x2(int64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b8_u64_x2))) svboolx2_t svwhilegt_b8_u64_x2(uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b32_u64_x2))) svboolx2_t svwhilegt_b32_u64_x2(uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b64_u64_x2))) svboolx2_t svwhilegt_b64_u64_x2(uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b16_u64_x2))) svboolx2_t svwhilegt_b16_u64_x2(uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c8_s64))) svcount_t svwhilele_c8_s64(int64_t, int64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c32_s64))) svcount_t svwhilele_c32_s64(int64_t, int64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c64_s64))) svcount_t svwhilele_c64_s64(int64_t, int64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c16_s64))) svcount_t svwhilele_c16_s64(int64_t, int64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c8_u64))) svcount_t svwhilele_c8_u64(uint64_t, uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c32_u64))) svcount_t svwhilele_c32_u64(uint64_t, uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c64_u64))) svcount_t svwhilele_c64_u64(uint64_t, uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c16_u64))) svcount_t svwhilele_c16_u64(uint64_t, uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b8_s64_x2))) svboolx2_t svwhilele_b8_s64_x2(int64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b32_s64_x2))) svboolx2_t svwhilele_b32_s64_x2(int64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b64_s64_x2))) svboolx2_t svwhilele_b64_s64_x2(int64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b16_s64_x2))) svboolx2_t svwhilele_b16_s64_x2(int64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b8_u64_x2))) svboolx2_t svwhilele_b8_u64_x2(uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b32_u64_x2))) svboolx2_t svwhilele_b32_u64_x2(uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b64_u64_x2))) svboolx2_t svwhilele_b64_u64_x2(uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b16_u64_x2))) svboolx2_t svwhilele_b16_u64_x2(uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c8_u64))) svcount_t svwhilelt_c8_u64(uint64_t, uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c32_u64))) svcount_t svwhilelt_c32_u64(uint64_t, uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c64_u64))) svcount_t svwhilelt_c64_u64(uint64_t, uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c16_u64))) svcount_t svwhilelt_c16_u64(uint64_t, uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c8_s64))) svcount_t svwhilelt_c8_s64(int64_t, int64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c32_s64))) svcount_t svwhilelt_c32_s64(int64_t, int64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c64_s64))) svcount_t svwhilelt_c64_s64(int64_t, int64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c16_s64))) svcount_t svwhilelt_c16_s64(int64_t, int64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b8_u64_x2))) svboolx2_t svwhilelt_b8_u64_x2(uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b32_u64_x2))) svboolx2_t svwhilelt_b32_u64_x2(uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b64_u64_x2))) svboolx2_t svwhilelt_b64_u64_x2(uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b16_u64_x2))) svboolx2_t svwhilelt_b16_u64_x2(uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b8_s64_x2))) svboolx2_t svwhilelt_b8_s64_x2(int64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b32_s64_x2))) svboolx2_t svwhilelt_b32_s64_x2(int64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b64_s64_x2))) svboolx2_t svwhilelt_b64_s64_x2(int64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b16_s64_x2))) svboolx2_t svwhilelt_b16_s64_x2(int64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlslb_f32))) svfloat32_t svbfmlslb(svfloat32_t, svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlslb_lane_f32))) svfloat32_t svbfmlslb_lane(svfloat32_t, svbfloat16_t, svbfloat16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlslt_f32))) svfloat32_t svbfmlslt(svfloat32_t, svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlslt_lane_f32))) svfloat32_t svbfmlslt_lane(svfloat32_t, svbfloat16_t, svbfloat16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_f64))) svfloat64_t svclamp(svfloat64_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_f32))) svfloat32_t svclamp(svfloat32_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_f16))) svfloat16_t svclamp(svfloat16_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_s8))) svint8_t svclamp(svint8_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_s32))) svint32_t svclamp(svint32_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_s64))) svint64_t svclamp(svint64_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_s16))) svint16_t svclamp(svint16_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_u8))) svuint8_t svclamp(svuint8_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_u32))) svuint32_t svclamp(svuint32_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_u64))) svuint64_t svclamp(svuint64_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_u16))) svuint16_t svclamp(svuint16_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_b))) svboolx2_t svcreate2(svbool_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_b))) svboolx4_t svcreate4(svbool_t, svbool_t, svbool_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_f32_f16))) svfloat32_t svdot(svfloat32_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_s32_s16))) svint32_t svdot(svint32_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_u32_u16))) svuint32_t svdot(svuint32_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_lane_f32_f16))) svfloat32_t svdot_lane(svfloat32_t, svfloat16_t, svfloat16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_lane_s32_s16))) svint32_t svdot_lane(svint32_t, svint16_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_lane_u32_u16))) svuint32_t svdot_lane(svuint32_t, svuint16_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_b))) svbool_t svget2(svboolx2_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_b))) svbool_t svget4(svboolx4_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u8_x2))) svuint8x2_t svld1_x2(svcount_t, uint8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s8_x2))) svint8x2_t svld1_x2(svcount_t, int8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u64_x2))) svuint64x2_t svld1_x2(svcount_t, uint64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f64_x2))) svfloat64x2_t svld1_x2(svcount_t, float64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s64_x2))) svint64x2_t svld1_x2(svcount_t, int64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u16_x2))) svuint16x2_t svld1_x2(svcount_t, uint16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_bf16_x2))) svbfloat16x2_t svld1_x2(svcount_t, bfloat16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f16_x2))) svfloat16x2_t svld1_x2(svcount_t, float16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s16_x2))) svint16x2_t svld1_x2(svcount_t, int16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u32_x2))) svuint32x2_t svld1_x2(svcount_t, uint32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f32_x2))) svfloat32x2_t svld1_x2(svcount_t, float32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s32_x2))) svint32x2_t svld1_x2(svcount_t, int32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u8_x4))) svuint8x4_t svld1_x4(svcount_t, uint8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s8_x4))) svint8x4_t svld1_x4(svcount_t, int8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u64_x4))) svuint64x4_t svld1_x4(svcount_t, uint64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f64_x4))) svfloat64x4_t svld1_x4(svcount_t, float64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s64_x4))) svint64x4_t svld1_x4(svcount_t, int64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u16_x4))) svuint16x4_t svld1_x4(svcount_t, uint16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_bf16_x4))) svbfloat16x4_t svld1_x4(svcount_t, bfloat16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f16_x4))) svfloat16x4_t svld1_x4(svcount_t, float16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s16_x4))) svint16x4_t svld1_x4(svcount_t, int16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u32_x4))) svuint32x4_t svld1_x4(svcount_t, uint32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f32_x4))) svfloat32x4_t svld1_x4(svcount_t, float32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s32_x4))) svint32x4_t svld1_x4(svcount_t, int32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u8_x2))) svuint8x2_t svld1_vnum_x2(svcount_t, uint8_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s8_x2))) svint8x2_t svld1_vnum_x2(svcount_t, int8_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u64_x2))) svuint64x2_t svld1_vnum_x2(svcount_t, uint64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f64_x2))) svfloat64x2_t svld1_vnum_x2(svcount_t, float64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s64_x2))) svint64x2_t svld1_vnum_x2(svcount_t, int64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u16_x2))) svuint16x2_t svld1_vnum_x2(svcount_t, uint16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_bf16_x2))) svbfloat16x2_t svld1_vnum_x2(svcount_t, bfloat16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f16_x2))) svfloat16x2_t svld1_vnum_x2(svcount_t, float16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s16_x2))) svint16x2_t svld1_vnum_x2(svcount_t, int16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u32_x2))) svuint32x2_t svld1_vnum_x2(svcount_t, uint32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f32_x2))) svfloat32x2_t svld1_vnum_x2(svcount_t, float32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s32_x2))) svint32x2_t svld1_vnum_x2(svcount_t, int32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u8_x4))) svuint8x4_t svld1_vnum_x4(svcount_t, uint8_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s8_x4))) svint8x4_t svld1_vnum_x4(svcount_t, int8_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u64_x4))) svuint64x4_t svld1_vnum_x4(svcount_t, uint64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f64_x4))) svfloat64x4_t svld1_vnum_x4(svcount_t, float64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s64_x4))) svint64x4_t svld1_vnum_x4(svcount_t, int64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u16_x4))) svuint16x4_t svld1_vnum_x4(svcount_t, uint16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_bf16_x4))) svbfloat16x4_t svld1_vnum_x4(svcount_t, bfloat16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f16_x4))) svfloat16x4_t svld1_vnum_x4(svcount_t, float16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s16_x4))) svint16x4_t svld1_vnum_x4(svcount_t, int16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u32_x4))) svuint32x4_t svld1_vnum_x4(svcount_t, uint32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f32_x4))) svfloat32x4_t svld1_vnum_x4(svcount_t, float32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s32_x4))) svint32x4_t svld1_vnum_x4(svcount_t, int32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u8_x2))) svuint8x2_t svldnt1_x2(svcount_t, uint8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s8_x2))) svint8x2_t svldnt1_x2(svcount_t, int8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u64_x2))) svuint64x2_t svldnt1_x2(svcount_t, uint64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f64_x2))) svfloat64x2_t svldnt1_x2(svcount_t, float64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s64_x2))) svint64x2_t svldnt1_x2(svcount_t, int64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u16_x2))) svuint16x2_t svldnt1_x2(svcount_t, uint16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_bf16_x2))) svbfloat16x2_t svldnt1_x2(svcount_t, bfloat16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f16_x2))) svfloat16x2_t svldnt1_x2(svcount_t, float16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s16_x2))) svint16x2_t svldnt1_x2(svcount_t, int16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u32_x2))) svuint32x2_t svldnt1_x2(svcount_t, uint32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f32_x2))) svfloat32x2_t svldnt1_x2(svcount_t, float32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s32_x2))) svint32x2_t svldnt1_x2(svcount_t, int32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u8_x4))) svuint8x4_t svldnt1_x4(svcount_t, uint8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s8_x4))) svint8x4_t svldnt1_x4(svcount_t, int8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u64_x4))) svuint64x4_t svldnt1_x4(svcount_t, uint64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f64_x4))) svfloat64x4_t svldnt1_x4(svcount_t, float64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s64_x4))) svint64x4_t svldnt1_x4(svcount_t, int64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u16_x4))) svuint16x4_t svldnt1_x4(svcount_t, uint16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_bf16_x4))) svbfloat16x4_t svldnt1_x4(svcount_t, bfloat16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f16_x4))) svfloat16x4_t svldnt1_x4(svcount_t, float16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s16_x4))) svint16x4_t svldnt1_x4(svcount_t, int16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u32_x4))) svuint32x4_t svldnt1_x4(svcount_t, uint32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f32_x4))) svfloat32x4_t svldnt1_x4(svcount_t, float32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s32_x4))) svint32x4_t svldnt1_x4(svcount_t, int32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u8_x2))) svuint8x2_t svldnt1_vnum_x2(svcount_t, uint8_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s8_x2))) svint8x2_t svldnt1_vnum_x2(svcount_t, int8_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u64_x2))) svuint64x2_t svldnt1_vnum_x2(svcount_t, uint64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f64_x2))) svfloat64x2_t svldnt1_vnum_x2(svcount_t, float64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s64_x2))) svint64x2_t svldnt1_vnum_x2(svcount_t, int64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u16_x2))) svuint16x2_t svldnt1_vnum_x2(svcount_t, uint16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_bf16_x2))) svbfloat16x2_t svldnt1_vnum_x2(svcount_t, bfloat16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f16_x2))) svfloat16x2_t svldnt1_vnum_x2(svcount_t, float16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s16_x2))) svint16x2_t svldnt1_vnum_x2(svcount_t, int16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u32_x2))) svuint32x2_t svldnt1_vnum_x2(svcount_t, uint32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f32_x2))) svfloat32x2_t svldnt1_vnum_x2(svcount_t, float32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s32_x2))) svint32x2_t svldnt1_vnum_x2(svcount_t, int32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u8_x4))) svuint8x4_t svldnt1_vnum_x4(svcount_t, uint8_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s8_x4))) svint8x4_t svldnt1_vnum_x4(svcount_t, int8_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u64_x4))) svuint64x4_t svldnt1_vnum_x4(svcount_t, uint64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f64_x4))) svfloat64x4_t svldnt1_vnum_x4(svcount_t, float64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s64_x4))) svint64x4_t svldnt1_vnum_x4(svcount_t, int64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u16_x4))) svuint16x4_t svldnt1_vnum_x4(svcount_t, uint16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_bf16_x4))) svbfloat16x4_t svldnt1_vnum_x4(svcount_t, bfloat16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f16_x4))) svfloat16x4_t svldnt1_vnum_x4(svcount_t, float16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s16_x4))) svint16x4_t svldnt1_vnum_x4(svcount_t, int16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u32_x4))) svuint32x4_t svldnt1_vnum_x4(svcount_t, uint32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f32_x4))) svfloat32x4_t svldnt1_vnum_x4(svcount_t, float32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s32_x4))) svint32x4_t svldnt1_vnum_x4(svcount_t, int32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrn_n_s16_s32_x2))) svint16_t svqrshrn_s16(svint32x2_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrn_n_u16_u32_x2))) svuint16_t svqrshrn_u16(svuint32x2_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrun_n_u16_s32_x2))) svuint16_t svqrshrun_u16(svint32x2_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u8_m))) svuint8_t svrevd_m(svuint8_t, svbool_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u32_m))) svuint32_t svrevd_m(svuint32_t, svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u64_m))) svuint64_t svrevd_m(svuint64_t, svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u16_m))) svuint16_t svrevd_m(svuint16_t, svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_bf16_m))) svbfloat16_t svrevd_m(svbfloat16_t, svbool_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s8_m))) svint8_t svrevd_m(svint8_t, svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f64_m))) svfloat64_t svrevd_m(svfloat64_t, svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f32_m))) svfloat32_t svrevd_m(svfloat32_t, svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f16_m))) svfloat16_t svrevd_m(svfloat16_t, svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s32_m))) svint32_t svrevd_m(svint32_t, svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s64_m))) svint64_t svrevd_m(svint64_t, svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s16_m))) svint16_t svrevd_m(svint16_t, svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u8_x))) svuint8_t svrevd_x(svbool_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u32_x))) svuint32_t svrevd_x(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u64_x))) svuint64_t svrevd_x(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u16_x))) svuint16_t svrevd_x(svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_bf16_x))) svbfloat16_t svrevd_x(svbool_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s8_x))) svint8_t svrevd_x(svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f64_x))) svfloat64_t svrevd_x(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f32_x))) svfloat32_t svrevd_x(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f16_x))) svfloat16_t svrevd_x(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s32_x))) svint32_t svrevd_x(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s64_x))) svint64_t svrevd_x(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s16_x))) svint16_t svrevd_x(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u8_z))) svuint8_t svrevd_z(svbool_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u32_z))) svuint32_t svrevd_z(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u64_z))) svuint64_t svrevd_z(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u16_z))) svuint16_t svrevd_z(svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_bf16_z))) svbfloat16_t svrevd_z(svbool_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s8_z))) svint8_t svrevd_z(svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f64_z))) svfloat64_t svrevd_z(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f32_z))) svfloat32_t svrevd_z(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f16_z))) svfloat16_t svrevd_z(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s32_z))) svint32_t svrevd_z(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s64_z))) svint64_t svrevd_z(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s16_z))) svint16_t svrevd_z(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_b))) svboolx2_t svset2(svboolx2_t, uint64_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_b))) svboolx4_t svset4(svboolx4_t, uint64_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u8_x2))) void svst1(svcount_t, uint8_t *, svuint8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s8_x2))) void svst1(svcount_t, int8_t *, svint8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u64_x2))) void svst1(svcount_t, uint64_t *, svuint64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f64_x2))) void svst1(svcount_t, float64_t *, svfloat64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s64_x2))) void svst1(svcount_t, int64_t *, svint64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u16_x2))) void svst1(svcount_t, uint16_t *, svuint16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_bf16_x2))) void svst1(svcount_t, bfloat16_t *, svbfloat16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f16_x2))) void svst1(svcount_t, float16_t *, svfloat16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s16_x2))) void svst1(svcount_t, int16_t *, svint16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u32_x2))) void svst1(svcount_t, uint32_t *, svuint32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f32_x2))) void svst1(svcount_t, float32_t *, svfloat32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s32_x2))) void svst1(svcount_t, int32_t *, svint32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u8_x4))) void svst1(svcount_t, uint8_t *, svuint8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s8_x4))) void svst1(svcount_t, int8_t *, svint8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u64_x4))) void svst1(svcount_t, uint64_t *, svuint64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f64_x4))) void svst1(svcount_t, float64_t *, svfloat64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s64_x4))) void svst1(svcount_t, int64_t *, svint64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u16_x4))) void svst1(svcount_t, uint16_t *, svuint16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_bf16_x4))) void svst1(svcount_t, bfloat16_t *, svbfloat16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f16_x4))) void svst1(svcount_t, float16_t *, svfloat16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s16_x4))) void svst1(svcount_t, int16_t *, svint16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u32_x4))) void svst1(svcount_t, uint32_t *, svuint32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f32_x4))) void svst1(svcount_t, float32_t *, svfloat32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s32_x4))) void svst1(svcount_t, int32_t *, svint32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u8_x2))) void svst1_vnum(svcount_t, uint8_t *, int64_t, svuint8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s8_x2))) void svst1_vnum(svcount_t, int8_t *, int64_t, svint8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u64_x2))) void svst1_vnum(svcount_t, uint64_t *, int64_t, svuint64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f64_x2))) void svst1_vnum(svcount_t, float64_t *, int64_t, svfloat64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s64_x2))) void svst1_vnum(svcount_t, int64_t *, int64_t, svint64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u16_x2))) void svst1_vnum(svcount_t, uint16_t *, int64_t, svuint16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_bf16_x2))) void svst1_vnum(svcount_t, bfloat16_t *, int64_t, svbfloat16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f16_x2))) void svst1_vnum(svcount_t, float16_t *, int64_t, svfloat16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s16_x2))) void svst1_vnum(svcount_t, int16_t *, int64_t, svint16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u32_x2))) void svst1_vnum(svcount_t, uint32_t *, int64_t, svuint32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f32_x2))) void svst1_vnum(svcount_t, float32_t *, int64_t, svfloat32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s32_x2))) void svst1_vnum(svcount_t, int32_t *, int64_t, svint32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u8_x4))) void svst1_vnum(svcount_t, uint8_t *, int64_t, svuint8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s8_x4))) void svst1_vnum(svcount_t, int8_t *, int64_t, svint8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u64_x4))) void svst1_vnum(svcount_t, uint64_t *, int64_t, svuint64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f64_x4))) void svst1_vnum(svcount_t, float64_t *, int64_t, svfloat64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s64_x4))) void svst1_vnum(svcount_t, int64_t *, int64_t, svint64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u16_x4))) void svst1_vnum(svcount_t, uint16_t *, int64_t, svuint16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_bf16_x4))) void svst1_vnum(svcount_t, bfloat16_t *, int64_t, svbfloat16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f16_x4))) void svst1_vnum(svcount_t, float16_t *, int64_t, svfloat16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s16_x4))) void svst1_vnum(svcount_t, int16_t *, int64_t, svint16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u32_x4))) void svst1_vnum(svcount_t, uint32_t *, int64_t, svuint32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f32_x4))) void svst1_vnum(svcount_t, float32_t *, int64_t, svfloat32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s32_x4))) void svst1_vnum(svcount_t, int32_t *, int64_t, svint32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u8_x2))) void svstnt1(svcount_t, uint8_t *, svuint8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s8_x2))) void svstnt1(svcount_t, int8_t *, svint8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u64_x2))) void svstnt1(svcount_t, uint64_t *, svuint64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f64_x2))) void svstnt1(svcount_t, float64_t *, svfloat64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s64_x2))) void svstnt1(svcount_t, int64_t *, svint64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u16_x2))) void svstnt1(svcount_t, uint16_t *, svuint16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_bf16_x2))) void svstnt1(svcount_t, bfloat16_t *, svbfloat16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f16_x2))) void svstnt1(svcount_t, float16_t *, svfloat16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s16_x2))) void svstnt1(svcount_t, int16_t *, svint16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u32_x2))) void svstnt1(svcount_t, uint32_t *, svuint32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f32_x2))) void svstnt1(svcount_t, float32_t *, svfloat32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s32_x2))) void svstnt1(svcount_t, int32_t *, svint32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u8_x4))) void svstnt1(svcount_t, uint8_t *, svuint8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s8_x4))) void svstnt1(svcount_t, int8_t *, svint8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u64_x4))) void svstnt1(svcount_t, uint64_t *, svuint64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f64_x4))) void svstnt1(svcount_t, float64_t *, svfloat64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s64_x4))) void svstnt1(svcount_t, int64_t *, svint64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u16_x4))) void svstnt1(svcount_t, uint16_t *, svuint16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_bf16_x4))) void svstnt1(svcount_t, bfloat16_t *, svbfloat16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f16_x4))) void svstnt1(svcount_t, float16_t *, svfloat16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s16_x4))) void svstnt1(svcount_t, int16_t *, svint16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u32_x4))) void svstnt1(svcount_t, uint32_t *, svuint32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f32_x4))) void svstnt1(svcount_t, float32_t *, svfloat32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s32_x4))) void svstnt1(svcount_t, int32_t *, svint32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u8_x2))) void svstnt1_vnum(svcount_t, uint8_t *, int64_t, svuint8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s8_x2))) void svstnt1_vnum(svcount_t, int8_t *, int64_t, svint8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u64_x2))) void svstnt1_vnum(svcount_t, uint64_t *, int64_t, svuint64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f64_x2))) void svstnt1_vnum(svcount_t, float64_t *, int64_t, svfloat64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s64_x2))) void svstnt1_vnum(svcount_t, int64_t *, int64_t, svint64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u16_x2))) void svstnt1_vnum(svcount_t, uint16_t *, int64_t, svuint16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_bf16_x2))) void svstnt1_vnum(svcount_t, bfloat16_t *, int64_t, svbfloat16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f16_x2))) void svstnt1_vnum(svcount_t, float16_t *, int64_t, svfloat16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s16_x2))) void svstnt1_vnum(svcount_t, int16_t *, int64_t, svint16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u32_x2))) void svstnt1_vnum(svcount_t, uint32_t *, int64_t, svuint32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f32_x2))) void svstnt1_vnum(svcount_t, float32_t *, int64_t, svfloat32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s32_x2))) void svstnt1_vnum(svcount_t, int32_t *, int64_t, svint32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u8_x4))) void svstnt1_vnum(svcount_t, uint8_t *, int64_t, svuint8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s8_x4))) void svstnt1_vnum(svcount_t, int8_t *, int64_t, svint8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u64_x4))) void svstnt1_vnum(svcount_t, uint64_t *, int64_t, svuint64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f64_x4))) void svstnt1_vnum(svcount_t, float64_t *, int64_t, svfloat64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s64_x4))) void svstnt1_vnum(svcount_t, int64_t *, int64_t, svint64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u16_x4))) void svstnt1_vnum(svcount_t, uint16_t *, int64_t, svuint16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_bf16_x4))) void svstnt1_vnum(svcount_t, bfloat16_t *, int64_t, svbfloat16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f16_x4))) void svstnt1_vnum(svcount_t, float16_t *, int64_t, svfloat16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s16_x4))) void svstnt1_vnum(svcount_t, int16_t *, int64_t, svint16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u32_x4))) void svstnt1_vnum(svcount_t, uint32_t *, int64_t, svuint32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f32_x4))) void svstnt1_vnum(svcount_t, float32_t *, int64_t, svfloat32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s32_x4))) void svstnt1_vnum(svcount_t, int32_t *, int64_t, svint32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c8_s64))) svcount_t svwhilege_c8(int64_t, int64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c32_s64))) svcount_t svwhilege_c32(int64_t, int64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c64_s64))) svcount_t svwhilege_c64(int64_t, int64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c16_s64))) svcount_t svwhilege_c16(int64_t, int64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c8_u64))) svcount_t svwhilege_c8(uint64_t, uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c32_u64))) svcount_t svwhilege_c32(uint64_t, uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c64_u64))) svcount_t svwhilege_c64(uint64_t, uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c16_u64))) svcount_t svwhilege_c16(uint64_t, uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b8_s64_x2))) svboolx2_t svwhilege_b8_x2(int64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b32_s64_x2))) svboolx2_t svwhilege_b32_x2(int64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b64_s64_x2))) svboolx2_t svwhilege_b64_x2(int64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b16_s64_x2))) svboolx2_t svwhilege_b16_x2(int64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b8_u64_x2))) svboolx2_t svwhilege_b8_x2(uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b32_u64_x2))) svboolx2_t svwhilege_b32_x2(uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b64_u64_x2))) svboolx2_t svwhilege_b64_x2(uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b16_u64_x2))) svboolx2_t svwhilege_b16_x2(uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c8_s64))) svcount_t svwhilegt_c8(int64_t, int64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c32_s64))) svcount_t svwhilegt_c32(int64_t, int64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c64_s64))) svcount_t svwhilegt_c64(int64_t, int64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c16_s64))) svcount_t svwhilegt_c16(int64_t, int64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c8_u64))) svcount_t svwhilegt_c8(uint64_t, uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c32_u64))) svcount_t svwhilegt_c32(uint64_t, uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c64_u64))) svcount_t svwhilegt_c64(uint64_t, uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c16_u64))) svcount_t svwhilegt_c16(uint64_t, uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b8_s64_x2))) svboolx2_t svwhilegt_b8_x2(int64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b32_s64_x2))) svboolx2_t svwhilegt_b32_x2(int64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b64_s64_x2))) svboolx2_t svwhilegt_b64_x2(int64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b16_s64_x2))) svboolx2_t svwhilegt_b16_x2(int64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b8_u64_x2))) svboolx2_t svwhilegt_b8_x2(uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b32_u64_x2))) svboolx2_t svwhilegt_b32_x2(uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b64_u64_x2))) svboolx2_t svwhilegt_b64_x2(uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b16_u64_x2))) svboolx2_t svwhilegt_b16_x2(uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c8_s64))) svcount_t svwhilele_c8(int64_t, int64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c32_s64))) svcount_t svwhilele_c32(int64_t, int64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c64_s64))) svcount_t svwhilele_c64(int64_t, int64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c16_s64))) svcount_t svwhilele_c16(int64_t, int64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c8_u64))) svcount_t svwhilele_c8(uint64_t, uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c32_u64))) svcount_t svwhilele_c32(uint64_t, uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c64_u64))) svcount_t svwhilele_c64(uint64_t, uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c16_u64))) svcount_t svwhilele_c16(uint64_t, uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b8_s64_x2))) svboolx2_t svwhilele_b8_x2(int64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b32_s64_x2))) svboolx2_t svwhilele_b32_x2(int64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b64_s64_x2))) svboolx2_t svwhilele_b64_x2(int64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b16_s64_x2))) svboolx2_t svwhilele_b16_x2(int64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b8_u64_x2))) svboolx2_t svwhilele_b8_x2(uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b32_u64_x2))) svboolx2_t svwhilele_b32_x2(uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b64_u64_x2))) svboolx2_t svwhilele_b64_x2(uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b16_u64_x2))) svboolx2_t svwhilele_b16_x2(uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c8_u64))) svcount_t svwhilelt_c8(uint64_t, uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c32_u64))) svcount_t svwhilelt_c32(uint64_t, uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c64_u64))) svcount_t svwhilelt_c64(uint64_t, uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c16_u64))) svcount_t svwhilelt_c16(uint64_t, uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c8_s64))) svcount_t svwhilelt_c8(int64_t, int64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c32_s64))) svcount_t svwhilelt_c32(int64_t, int64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c64_s64))) svcount_t svwhilelt_c64(int64_t, int64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c16_s64))) svcount_t svwhilelt_c16(int64_t, int64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b8_u64_x2))) svboolx2_t svwhilelt_b8_x2(uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b32_u64_x2))) svboolx2_t svwhilelt_b32_x2(uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b64_u64_x2))) svboolx2_t svwhilelt_b64_x2(uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b16_u64_x2))) svboolx2_t svwhilelt_b16_x2(uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b8_s64_x2))) svboolx2_t svwhilelt_b8_x2(int64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b32_s64_x2))) svboolx2_t svwhilelt_b32_x2(int64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b64_s64_x2))) svboolx2_t svwhilelt_b64_x2(int64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b16_s64_x2))) svboolx2_t svwhilelt_b16_x2(int64_t, int64_t); #define svcvtnt_bf16_x svcvtnt_bf16_m #define svcvtnt_bf16_f32_x svcvtnt_bf16_f32_m #define svcvtnt_f16_x svcvtnt_f16_m #define svcvtnt_f16_f32_x svcvtnt_f16_f32_m #define svcvtnt_f32_x svcvtnt_f32_m #define svcvtnt_f32_f64_x svcvtnt_f32_f64_m #define svcvtxnt_f32_x svcvtxnt_f32_m #define svcvtxnt_f32_f64_x svcvtxnt_f32_f64_m #ifdef __cplusplus } // extern "C" #endif #undef __ai #undef __aio #endif /* __ARM_SVE_H */ gfniintrin.homp.hptwriteintrin.hsm4intrin.h/*===---- wasm_simd128.h - WebAssembly portable SIMD intrinsics ------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __WASM_SIMD128_H #define __WASM_SIMD128_H #include #include // User-facing type typedef int32_t v128_t __attribute__((__vector_size__(16), __aligned__(16))); // Internal types determined by clang builtin definitions typedef int32_t __v128_u __attribute__((__vector_size__(16), __aligned__(1))); typedef signed char __i8x16 __attribute__((__vector_size__(16), __aligned__(16))); typedef unsigned char __u8x16 __attribute__((__vector_size__(16), __aligned__(16))); typedef short __i16x8 __attribute__((__vector_size__(16), __aligned__(16))); typedef unsigned short __u16x8 __attribute__((__vector_size__(16), __aligned__(16))); typedef int __i32x4 __attribute__((__vector_size__(16), __aligned__(16))); typedef unsigned int __u32x4 __attribute__((__vector_size__(16), __aligned__(16))); typedef long long __i64x2 __attribute__((__vector_size__(16), __aligned__(16))); typedef unsigned long long __u64x2 __attribute__((__vector_size__(16), __aligned__(16))); typedef float __f32x4 __attribute__((__vector_size__(16), __aligned__(16))); typedef double __f64x2 __attribute__((__vector_size__(16), __aligned__(16))); typedef signed char __i8x8 __attribute__((__vector_size__(8), __aligned__(8))); typedef unsigned char __u8x8 __attribute__((__vector_size__(8), __aligned__(8))); typedef short __i16x4 __attribute__((__vector_size__(8), __aligned__(8))); typedef unsigned short __u16x4 __attribute__((__vector_size__(8), __aligned__(8))); typedef int __i32x2 __attribute__((__vector_size__(8), __aligned__(8))); typedef unsigned int __u32x2 __attribute__((__vector_size__(8), __aligned__(8))); typedef float __f32x2 __attribute__((__vector_size__(8), __aligned__(8))); #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("simd128"), \ __min_vector_width__(128))) #define __REQUIRE_CONSTANT(c) \ __attribute__((__diagnose_if__(!__builtin_constant_p(c), \ #c " must be constant", "error"))) static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_v128_load(const void *__mem) { // UB-free unaligned access copied from xmmintrin.h struct __wasm_v128_load_struct { __v128_u __v; } __attribute__((__packed__, __may_alias__)); return ((const struct __wasm_v128_load_struct *)__mem)->__v; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_v128_load8_splat(const void *__mem) { struct __wasm_v128_load8_splat_struct { uint8_t __v; } __attribute__((__packed__, __may_alias__)); uint8_t __v = ((const struct __wasm_v128_load8_splat_struct *)__mem)->__v; return (v128_t)(__u8x16){__v, __v, __v, __v, __v, __v, __v, __v, __v, __v, __v, __v, __v, __v, __v, __v}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_v128_load16_splat(const void *__mem) { struct __wasm_v128_load16_splat_struct { uint16_t __v; } __attribute__((__packed__, __may_alias__)); uint16_t __v = ((const struct __wasm_v128_load16_splat_struct *)__mem)->__v; return (v128_t)(__u16x8){__v, __v, __v, __v, __v, __v, __v, __v}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_v128_load32_splat(const void *__mem) { struct __wasm_v128_load32_splat_struct { uint32_t __v; } __attribute__((__packed__, __may_alias__)); uint32_t __v = ((const struct __wasm_v128_load32_splat_struct *)__mem)->__v; return (v128_t)(__u32x4){__v, __v, __v, __v}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_v128_load64_splat(const void *__mem) { struct __wasm_v128_load64_splat_struct { uint64_t __v; } __attribute__((__packed__, __may_alias__)); uint64_t __v = ((const struct __wasm_v128_load64_splat_struct *)__mem)->__v; return (v128_t)(__u64x2){__v, __v}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_load8x8(const void *__mem) { struct __wasm_i16x8_load8x8_struct { __i8x8 __v; } __attribute__((__packed__, __may_alias__)); __i8x8 __v = ((const struct __wasm_i16x8_load8x8_struct *)__mem)->__v; return (v128_t) __builtin_convertvector(__v, __i16x8); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_load8x8(const void *__mem) { struct __wasm_u16x8_load8x8_struct { __u8x8 __v; } __attribute__((__packed__, __may_alias__)); __u8x8 __v = ((const struct __wasm_u16x8_load8x8_struct *)__mem)->__v; return (v128_t) __builtin_convertvector(__v, __u16x8); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_load16x4(const void *__mem) { struct __wasm_i32x4_load16x4_struct { __i16x4 __v; } __attribute__((__packed__, __may_alias__)); __i16x4 __v = ((const struct __wasm_i32x4_load16x4_struct *)__mem)->__v; return (v128_t) __builtin_convertvector(__v, __i32x4); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_load16x4(const void *__mem) { struct __wasm_u32x4_load16x4_struct { __u16x4 __v; } __attribute__((__packed__, __may_alias__)); __u16x4 __v = ((const struct __wasm_u32x4_load16x4_struct *)__mem)->__v; return (v128_t) __builtin_convertvector(__v, __u32x4); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_load32x2(const void *__mem) { struct __wasm_i64x2_load32x2_struct { __i32x2 __v; } __attribute__((__packed__, __may_alias__)); __i32x2 __v = ((const struct __wasm_i64x2_load32x2_struct *)__mem)->__v; return (v128_t) __builtin_convertvector(__v, __i64x2); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u64x2_load32x2(const void *__mem) { struct __wasm_u64x2_load32x2_struct { __u32x2 __v; } __attribute__((__packed__, __may_alias__)); __u32x2 __v = ((const struct __wasm_u64x2_load32x2_struct *)__mem)->__v; return (v128_t) __builtin_convertvector(__v, __u64x2); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_v128_load32_zero(const void *__mem) { struct __wasm_v128_load32_zero_struct { int32_t __v; } __attribute__((__packed__, __may_alias__)); int32_t __v = ((const struct __wasm_v128_load32_zero_struct *)__mem)->__v; return (v128_t)(__i32x4){__v, 0, 0, 0}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_v128_load64_zero(const void *__mem) { struct __wasm_v128_load64_zero_struct { int64_t __v; } __attribute__((__packed__, __may_alias__)); int64_t __v = ((const struct __wasm_v128_load64_zero_struct *)__mem)->__v; return (v128_t)(__i64x2){__v, 0}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_v128_load8_lane( const void *__mem, v128_t __vec, int __i) __REQUIRE_CONSTANT(__i) { struct __wasm_v128_load8_lane_struct { int8_t __v; } __attribute__((__packed__, __may_alias__)); int8_t __v = ((const struct __wasm_v128_load8_lane_struct *)__mem)->__v; __i8x16 __ret = (__i8x16)__vec; __ret[__i] = __v; return (v128_t)__ret; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_v128_load16_lane( const void *__mem, v128_t __vec, int __i) __REQUIRE_CONSTANT(__i) { struct __wasm_v128_load16_lane_struct { int16_t __v; } __attribute__((__packed__, __may_alias__)); int16_t __v = ((const struct __wasm_v128_load16_lane_struct *)__mem)->__v; __i16x8 __ret = (__i16x8)__vec; __ret[__i] = __v; return (v128_t)__ret; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_v128_load32_lane( const void *__mem, v128_t __vec, int __i) __REQUIRE_CONSTANT(__i) { struct __wasm_v128_load32_lane_struct { int32_t __v; } __attribute__((__packed__, __may_alias__)); int32_t __v = ((const struct __wasm_v128_load32_lane_struct *)__mem)->__v; __i32x4 __ret = (__i32x4)__vec; __ret[__i] = __v; return (v128_t)__ret; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_v128_load64_lane( const void *__mem, v128_t __vec, int __i) __REQUIRE_CONSTANT(__i) { struct __wasm_v128_load64_lane_struct { int64_t __v; } __attribute__((__packed__, __may_alias__)); int64_t __v = ((const struct __wasm_v128_load64_lane_struct *)__mem)->__v; __i64x2 __ret = (__i64x2)__vec; __ret[__i] = __v; return (v128_t)__ret; } static __inline__ void __DEFAULT_FN_ATTRS wasm_v128_store(void *__mem, v128_t __a) { // UB-free unaligned access copied from xmmintrin.h struct __wasm_v128_store_struct { __v128_u __v; } __attribute__((__packed__, __may_alias__)); ((struct __wasm_v128_store_struct *)__mem)->__v = __a; } static __inline__ void __DEFAULT_FN_ATTRS wasm_v128_store8_lane(void *__mem, v128_t __vec, int __i) __REQUIRE_CONSTANT(__i) { struct __wasm_v128_store8_lane_struct { int8_t __v; } __attribute__((__packed__, __may_alias__)); ((struct __wasm_v128_store8_lane_struct *)__mem)->__v = ((__i8x16)__vec)[__i]; } static __inline__ void __DEFAULT_FN_ATTRS wasm_v128_store16_lane(void *__mem, v128_t __vec, int __i) __REQUIRE_CONSTANT(__i) { struct __wasm_v128_store16_lane_struct { int16_t __v; } __attribute__((__packed__, __may_alias__)); ((struct __wasm_v128_store16_lane_struct *)__mem)->__v = ((__i16x8)__vec)[__i]; } static __inline__ void __DEFAULT_FN_ATTRS wasm_v128_store32_lane(void *__mem, v128_t __vec, int __i) __REQUIRE_CONSTANT(__i) { struct __wasm_v128_store32_lane_struct { int32_t __v; } __attribute__((__packed__, __may_alias__)); ((struct __wasm_v128_store32_lane_struct *)__mem)->__v = ((__i32x4)__vec)[__i]; } static __inline__ void __DEFAULT_FN_ATTRS wasm_v128_store64_lane(void *__mem, v128_t __vec, int __i) __REQUIRE_CONSTANT(__i) { struct __wasm_v128_store64_lane_struct { int64_t __v; } __attribute__((__packed__, __may_alias__)); ((struct __wasm_v128_store64_lane_struct *)__mem)->__v = ((__i64x2)__vec)[__i]; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_make(int8_t __c0, int8_t __c1, int8_t __c2, int8_t __c3, int8_t __c4, int8_t __c5, int8_t __c6, int8_t __c7, int8_t __c8, int8_t __c9, int8_t __c10, int8_t __c11, int8_t __c12, int8_t __c13, int8_t __c14, int8_t __c15) { return (v128_t)(__i8x16){__c0, __c1, __c2, __c3, __c4, __c5, __c6, __c7, __c8, __c9, __c10, __c11, __c12, __c13, __c14, __c15}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_make(uint8_t __c0, uint8_t __c1, uint8_t __c2, uint8_t __c3, uint8_t __c4, uint8_t __c5, uint8_t __c6, uint8_t __c7, uint8_t __c8, uint8_t __c9, uint8_t __c10, uint8_t __c11, uint8_t __c12, uint8_t __c13, uint8_t __c14, uint8_t __c15) { return (v128_t)(__u8x16){__c0, __c1, __c2, __c3, __c4, __c5, __c6, __c7, __c8, __c9, __c10, __c11, __c12, __c13, __c14, __c15}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_make(int16_t __c0, int16_t __c1, int16_t __c2, int16_t __c3, int16_t __c4, int16_t __c5, int16_t __c6, int16_t __c7) { return (v128_t)(__i16x8){__c0, __c1, __c2, __c3, __c4, __c5, __c6, __c7}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_make(uint16_t __c0, uint16_t __c1, uint16_t __c2, uint16_t __c3, uint16_t __c4, uint16_t __c5, uint16_t __c6, uint16_t __c7) { return (v128_t)(__u16x8){__c0, __c1, __c2, __c3, __c4, __c5, __c6, __c7}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_make(int32_t __c0, int32_t __c1, int32_t __c2, int32_t __c3) { return (v128_t)(__i32x4){__c0, __c1, __c2, __c3}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_make(uint32_t __c0, uint32_t __c1, uint32_t __c2, uint32_t __c3) { return (v128_t)(__u32x4){__c0, __c1, __c2, __c3}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_make(int64_t __c0, int64_t __c1) { return (v128_t)(__i64x2){__c0, __c1}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u64x2_make(uint64_t __c0, uint64_t __c1) { return (v128_t)(__u64x2){__c0, __c1}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_make(float __c0, float __c1, float __c2, float __c3) { return (v128_t)(__f32x4){__c0, __c1, __c2, __c3}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_make(double __c0, double __c1) { return (v128_t)(__f64x2){__c0, __c1}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_const(int8_t __c0, int8_t __c1, int8_t __c2, int8_t __c3, int8_t __c4, int8_t __c5, int8_t __c6, int8_t __c7, int8_t __c8, int8_t __c9, int8_t __c10, int8_t __c11, int8_t __c12, int8_t __c13, int8_t __c14, int8_t __c15) __REQUIRE_CONSTANT(__c0) __REQUIRE_CONSTANT(__c1) __REQUIRE_CONSTANT(__c2) __REQUIRE_CONSTANT(__c3) __REQUIRE_CONSTANT(__c4) __REQUIRE_CONSTANT(__c5) __REQUIRE_CONSTANT(__c6) __REQUIRE_CONSTANT(__c7) __REQUIRE_CONSTANT(__c8) __REQUIRE_CONSTANT(__c9) __REQUIRE_CONSTANT(__c10) __REQUIRE_CONSTANT(__c11) __REQUIRE_CONSTANT(__c12) __REQUIRE_CONSTANT(__c13) __REQUIRE_CONSTANT(__c14) __REQUIRE_CONSTANT(__c15) { return (v128_t)(__i8x16){__c0, __c1, __c2, __c3, __c4, __c5, __c6, __c7, __c8, __c9, __c10, __c11, __c12, __c13, __c14, __c15}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_const(uint8_t __c0, uint8_t __c1, uint8_t __c2, uint8_t __c3, uint8_t __c4, uint8_t __c5, uint8_t __c6, uint8_t __c7, uint8_t __c8, uint8_t __c9, uint8_t __c10, uint8_t __c11, uint8_t __c12, uint8_t __c13, uint8_t __c14, uint8_t __c15) __REQUIRE_CONSTANT(__c0) __REQUIRE_CONSTANT(__c1) __REQUIRE_CONSTANT(__c2) __REQUIRE_CONSTANT(__c3) __REQUIRE_CONSTANT(__c4) __REQUIRE_CONSTANT(__c5) __REQUIRE_CONSTANT(__c6) __REQUIRE_CONSTANT(__c7) __REQUIRE_CONSTANT(__c8) __REQUIRE_CONSTANT(__c9) __REQUIRE_CONSTANT(__c10) __REQUIRE_CONSTANT(__c11) __REQUIRE_CONSTANT(__c12) __REQUIRE_CONSTANT(__c13) __REQUIRE_CONSTANT(__c14) __REQUIRE_CONSTANT(__c15) { return (v128_t)(__u8x16){__c0, __c1, __c2, __c3, __c4, __c5, __c6, __c7, __c8, __c9, __c10, __c11, __c12, __c13, __c14, __c15}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_const(int16_t __c0, int16_t __c1, int16_t __c2, int16_t __c3, int16_t __c4, int16_t __c5, int16_t __c6, int16_t __c7) __REQUIRE_CONSTANT(__c0) __REQUIRE_CONSTANT(__c1) __REQUIRE_CONSTANT(__c2) __REQUIRE_CONSTANT(__c3) __REQUIRE_CONSTANT(__c4) __REQUIRE_CONSTANT(__c5) __REQUIRE_CONSTANT(__c6) __REQUIRE_CONSTANT(__c7) { return (v128_t)(__i16x8){__c0, __c1, __c2, __c3, __c4, __c5, __c6, __c7}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_const(uint16_t __c0, uint16_t __c1, uint16_t __c2, uint16_t __c3, uint16_t __c4, uint16_t __c5, uint16_t __c6, uint16_t __c7) __REQUIRE_CONSTANT(__c0) __REQUIRE_CONSTANT(__c1) __REQUIRE_CONSTANT(__c2) __REQUIRE_CONSTANT(__c3) __REQUIRE_CONSTANT(__c4) __REQUIRE_CONSTANT(__c5) __REQUIRE_CONSTANT(__c6) __REQUIRE_CONSTANT(__c7) { return (v128_t)(__u16x8){__c0, __c1, __c2, __c3, __c4, __c5, __c6, __c7}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_const(int32_t __c0, int32_t __c1, int32_t __c2, int32_t __c3) __REQUIRE_CONSTANT(__c0) __REQUIRE_CONSTANT(__c1) __REQUIRE_CONSTANT(__c2) __REQUIRE_CONSTANT(__c3) { return (v128_t)(__i32x4){__c0, __c1, __c2, __c3}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_const(uint32_t __c0, uint32_t __c1, uint32_t __c2, uint32_t __c3) __REQUIRE_CONSTANT(__c0) __REQUIRE_CONSTANT(__c1) __REQUIRE_CONSTANT(__c2) __REQUIRE_CONSTANT(__c3) { return (v128_t)(__u32x4){__c0, __c1, __c2, __c3}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_const(int64_t __c0, int64_t __c1) __REQUIRE_CONSTANT(__c0) __REQUIRE_CONSTANT(__c1) { return (v128_t)(__i64x2){__c0, __c1}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u64x2_const(uint64_t __c0, uint64_t __c1) __REQUIRE_CONSTANT(__c0) __REQUIRE_CONSTANT(__c1) { return (v128_t)(__u64x2){__c0, __c1}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_const(float __c0, float __c1, float __c2, float __c3) __REQUIRE_CONSTANT(__c0) __REQUIRE_CONSTANT(__c1) __REQUIRE_CONSTANT(__c2) __REQUIRE_CONSTANT(__c3) { return (v128_t)(__f32x4){__c0, __c1, __c2, __c3}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_const(double __c0, double __c1) __REQUIRE_CONSTANT(__c0) __REQUIRE_CONSTANT(__c1) { return (v128_t)(__f64x2){__c0, __c1}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_const_splat(int8_t __c) __REQUIRE_CONSTANT(__c) { return (v128_t)(__i8x16){__c, __c, __c, __c, __c, __c, __c, __c, __c, __c, __c, __c, __c, __c, __c, __c}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_const_splat(uint8_t __c) __REQUIRE_CONSTANT(__c) { return (v128_t)(__u8x16){__c, __c, __c, __c, __c, __c, __c, __c, __c, __c, __c, __c, __c, __c, __c, __c}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_const_splat(int16_t __c) __REQUIRE_CONSTANT(__c) { return (v128_t)(__i16x8){__c, __c, __c, __c, __c, __c, __c, __c}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_const_splat(uint16_t __c) __REQUIRE_CONSTANT(__c) { return (v128_t)(__u16x8){__c, __c, __c, __c, __c, __c, __c, __c}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_const_splat(int32_t __c) __REQUIRE_CONSTANT(__c) { return (v128_t)(__i32x4){__c, __c, __c, __c}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_const_splat(uint32_t __c) __REQUIRE_CONSTANT(__c) { return (v128_t)(__u32x4){__c, __c, __c, __c}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_const_splat(int64_t __c) __REQUIRE_CONSTANT(__c) { return (v128_t)(__i64x2){__c, __c}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u64x2_const_splat(uint64_t __c) __REQUIRE_CONSTANT(__c) { return (v128_t)(__u64x2){__c, __c}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_const_splat(float __c) __REQUIRE_CONSTANT(__c) { return (v128_t)(__f32x4){__c, __c, __c, __c}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_const_splat(double __c) __REQUIRE_CONSTANT(__c) { return (v128_t)(__f64x2){__c, __c}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_splat(int8_t __a) { return (v128_t)(__i8x16){__a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_splat(uint8_t __a) { return (v128_t)(__u8x16){__a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a}; } static __inline__ int8_t __DEFAULT_FN_ATTRS wasm_i8x16_extract_lane(v128_t __a, int __i) __REQUIRE_CONSTANT(__i) { return ((__i8x16)__a)[__i]; } static __inline__ uint8_t __DEFAULT_FN_ATTRS wasm_u8x16_extract_lane(v128_t __a, int __i) __REQUIRE_CONSTANT(__i) { return ((__u8x16)__a)[__i]; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_replace_lane(v128_t __a, int __i, int8_t __b) __REQUIRE_CONSTANT(__i) { __i8x16 __v = (__i8x16)__a; __v[__i] = __b; return (v128_t)__v; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_replace_lane(v128_t __a, int __i, uint8_t __b) __REQUIRE_CONSTANT(__i) { __u8x16 __v = (__u8x16)__a; __v[__i] = __b; return (v128_t)__v; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_splat(int16_t __a) { return (v128_t)(__i16x8){__a, __a, __a, __a, __a, __a, __a, __a}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_splat(uint16_t __a) { return (v128_t)(__u16x8){__a, __a, __a, __a, __a, __a, __a, __a}; } static __inline__ int16_t __DEFAULT_FN_ATTRS wasm_i16x8_extract_lane(v128_t __a, int __i) __REQUIRE_CONSTANT(__i) { return ((__i16x8)__a)[__i]; } static __inline__ uint16_t __DEFAULT_FN_ATTRS wasm_u16x8_extract_lane(v128_t __a, int __i) __REQUIRE_CONSTANT(__i) { return ((__u16x8)__a)[__i]; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_replace_lane(v128_t __a, int __i, int16_t __b) __REQUIRE_CONSTANT(__i) { __i16x8 __v = (__i16x8)__a; __v[__i] = __b; return (v128_t)__v; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_replace_lane( v128_t __a, int __i, uint16_t __b) __REQUIRE_CONSTANT(__i) { __u16x8 __v = (__u16x8)__a; __v[__i] = __b; return (v128_t)__v; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_splat(int32_t __a) { return (v128_t)(__i32x4){__a, __a, __a, __a}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_splat(uint32_t __a) { return (v128_t)(__u32x4){__a, __a, __a, __a}; } static __inline__ int32_t __DEFAULT_FN_ATTRS wasm_i32x4_extract_lane(v128_t __a, int __i) __REQUIRE_CONSTANT(__i) { return ((__i32x4)__a)[__i]; } static __inline__ uint32_t __DEFAULT_FN_ATTRS wasm_u32x4_extract_lane(v128_t __a, int __i) __REQUIRE_CONSTANT(__i) { return ((__u32x4)__a)[__i]; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_replace_lane(v128_t __a, int __i, int32_t __b) __REQUIRE_CONSTANT(__i) { __i32x4 __v = (__i32x4)__a; __v[__i] = __b; return (v128_t)__v; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_replace_lane( v128_t __a, int __i, uint32_t __b) __REQUIRE_CONSTANT(__i) { __u32x4 __v = (__u32x4)__a; __v[__i] = __b; return (v128_t)__v; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_splat(int64_t __a) { return (v128_t)(__i64x2){__a, __a}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u64x2_splat(uint64_t __a) { return (v128_t)(__u64x2){__a, __a}; } static __inline__ int64_t __DEFAULT_FN_ATTRS wasm_i64x2_extract_lane(v128_t __a, int __i) __REQUIRE_CONSTANT(__i) { return ((__i64x2)__a)[__i]; } static __inline__ uint64_t __DEFAULT_FN_ATTRS wasm_u64x2_extract_lane(v128_t __a, int __i) __REQUIRE_CONSTANT(__i) { return ((__u64x2)__a)[__i]; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_replace_lane(v128_t __a, int __i, int64_t __b) __REQUIRE_CONSTANT(__i) { __i64x2 __v = (__i64x2)__a; __v[__i] = __b; return (v128_t)__v; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u64x2_replace_lane( v128_t __a, int __i, uint64_t __b) __REQUIRE_CONSTANT(__i) { __u64x2 __v = (__u64x2)__a; __v[__i] = __b; return (v128_t)__v; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_splat(float __a) { return (v128_t)(__f32x4){__a, __a, __a, __a}; } static __inline__ float __DEFAULT_FN_ATTRS wasm_f32x4_extract_lane(v128_t __a, int __i) __REQUIRE_CONSTANT(__i) { return ((__f32x4)__a)[__i]; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_replace_lane(v128_t __a, int __i, float __b) __REQUIRE_CONSTANT(__i) { __f32x4 __v = (__f32x4)__a; __v[__i] = __b; return (v128_t)__v; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_splat(double __a) { return (v128_t)(__f64x2){__a, __a}; } static __inline__ double __DEFAULT_FN_ATTRS wasm_f64x2_extract_lane(v128_t __a, int __i) __REQUIRE_CONSTANT(__i) { return ((__f64x2)__a)[__i]; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_replace_lane(v128_t __a, int __i, double __b) __REQUIRE_CONSTANT(__i) { __f64x2 __v = (__f64x2)__a; __v[__i] = __b; return (v128_t)__v; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_eq(v128_t __a, v128_t __b) { return (v128_t)((__i8x16)__a == (__i8x16)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_ne(v128_t __a, v128_t __b) { return (v128_t)((__i8x16)__a != (__i8x16)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_lt(v128_t __a, v128_t __b) { return (v128_t)((__i8x16)__a < (__i8x16)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_lt(v128_t __a, v128_t __b) { return (v128_t)((__u8x16)__a < (__u8x16)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_gt(v128_t __a, v128_t __b) { return (v128_t)((__i8x16)__a > (__i8x16)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_gt(v128_t __a, v128_t __b) { return (v128_t)((__u8x16)__a > (__u8x16)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_le(v128_t __a, v128_t __b) { return (v128_t)((__i8x16)__a <= (__i8x16)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_le(v128_t __a, v128_t __b) { return (v128_t)((__u8x16)__a <= (__u8x16)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_ge(v128_t __a, v128_t __b) { return (v128_t)((__i8x16)__a >= (__i8x16)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_ge(v128_t __a, v128_t __b) { return (v128_t)((__u8x16)__a >= (__u8x16)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_eq(v128_t __a, v128_t __b) { return (v128_t)((__i16x8)__a == (__i16x8)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_ne(v128_t __a, v128_t __b) { return (v128_t)((__u16x8)__a != (__u16x8)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_lt(v128_t __a, v128_t __b) { return (v128_t)((__i16x8)__a < (__i16x8)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_lt(v128_t __a, v128_t __b) { return (v128_t)((__u16x8)__a < (__u16x8)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_gt(v128_t __a, v128_t __b) { return (v128_t)((__i16x8)__a > (__i16x8)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_gt(v128_t __a, v128_t __b) { return (v128_t)((__u16x8)__a > (__u16x8)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_le(v128_t __a, v128_t __b) { return (v128_t)((__i16x8)__a <= (__i16x8)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_le(v128_t __a, v128_t __b) { return (v128_t)((__u16x8)__a <= (__u16x8)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_ge(v128_t __a, v128_t __b) { return (v128_t)((__i16x8)__a >= (__i16x8)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_ge(v128_t __a, v128_t __b) { return (v128_t)((__u16x8)__a >= (__u16x8)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_eq(v128_t __a, v128_t __b) { return (v128_t)((__i32x4)__a == (__i32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_ne(v128_t __a, v128_t __b) { return (v128_t)((__i32x4)__a != (__i32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_lt(v128_t __a, v128_t __b) { return (v128_t)((__i32x4)__a < (__i32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_lt(v128_t __a, v128_t __b) { return (v128_t)((__u32x4)__a < (__u32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_gt(v128_t __a, v128_t __b) { return (v128_t)((__i32x4)__a > (__i32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_gt(v128_t __a, v128_t __b) { return (v128_t)((__u32x4)__a > (__u32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_le(v128_t __a, v128_t __b) { return (v128_t)((__i32x4)__a <= (__i32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_le(v128_t __a, v128_t __b) { return (v128_t)((__u32x4)__a <= (__u32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_ge(v128_t __a, v128_t __b) { return (v128_t)((__i32x4)__a >= (__i32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_ge(v128_t __a, v128_t __b) { return (v128_t)((__u32x4)__a >= (__u32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_eq(v128_t __a, v128_t __b) { return (v128_t)((__i64x2)__a == (__i64x2)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_ne(v128_t __a, v128_t __b) { return (v128_t)((__i64x2)__a != (__i64x2)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_lt(v128_t __a, v128_t __b) { return (v128_t)((__i64x2)__a < (__i64x2)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_gt(v128_t __a, v128_t __b) { return (v128_t)((__i64x2)__a > (__i64x2)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_le(v128_t __a, v128_t __b) { return (v128_t)((__i64x2)__a <= (__i64x2)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_ge(v128_t __a, v128_t __b) { return (v128_t)((__i64x2)__a >= (__i64x2)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_eq(v128_t __a, v128_t __b) { return (v128_t)((__f32x4)__a == (__f32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_ne(v128_t __a, v128_t __b) { return (v128_t)((__f32x4)__a != (__f32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_lt(v128_t __a, v128_t __b) { return (v128_t)((__f32x4)__a < (__f32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_gt(v128_t __a, v128_t __b) { return (v128_t)((__f32x4)__a > (__f32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_le(v128_t __a, v128_t __b) { return (v128_t)((__f32x4)__a <= (__f32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_ge(v128_t __a, v128_t __b) { return (v128_t)((__f32x4)__a >= (__f32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_eq(v128_t __a, v128_t __b) { return (v128_t)((__f64x2)__a == (__f64x2)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_ne(v128_t __a, v128_t __b) { return (v128_t)((__f64x2)__a != (__f64x2)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_lt(v128_t __a, v128_t __b) { return (v128_t)((__f64x2)__a < (__f64x2)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_gt(v128_t __a, v128_t __b) { return (v128_t)((__f64x2)__a > (__f64x2)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_le(v128_t __a, v128_t __b) { return (v128_t)((__f64x2)__a <= (__f64x2)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_ge(v128_t __a, v128_t __b) { return (v128_t)((__f64x2)__a >= (__f64x2)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_v128_not(v128_t __a) { return ~__a; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_v128_and(v128_t __a, v128_t __b) { return __a & __b; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_v128_or(v128_t __a, v128_t __b) { return __a | __b; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_v128_xor(v128_t __a, v128_t __b) { return __a ^ __b; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_v128_andnot(v128_t __a, v128_t __b) { return __a & ~__b; } static __inline__ bool __DEFAULT_FN_ATTRS wasm_v128_any_true(v128_t __a) { return __builtin_wasm_any_true_v128((__i8x16)__a); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_v128_bitselect(v128_t __a, v128_t __b, v128_t __mask) { return (v128_t)__builtin_wasm_bitselect((__i32x4)__a, (__i32x4)__b, (__i32x4)__mask); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_abs(v128_t __a) { return (v128_t)__builtin_wasm_abs_i8x16((__i8x16)__a); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_neg(v128_t __a) { return (v128_t)(-(__u8x16)__a); } static __inline__ bool __DEFAULT_FN_ATTRS wasm_i8x16_all_true(v128_t __a) { return __builtin_wasm_all_true_i8x16((__i8x16)__a); } static __inline__ uint32_t __DEFAULT_FN_ATTRS wasm_i8x16_bitmask(v128_t __a) { return __builtin_wasm_bitmask_i8x16((__i8x16)__a); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_popcnt(v128_t __a) { return (v128_t)__builtin_wasm_popcnt_i8x16((__i8x16)__a); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_shl(v128_t __a, uint32_t __b) { return (v128_t)((__i8x16)__a << (__b & 0x7)); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_shr(v128_t __a, uint32_t __b) { return (v128_t)((__i8x16)__a >> (__b & 0x7)); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_shr(v128_t __a, uint32_t __b) { return (v128_t)((__u8x16)__a >> (__b & 0x7)); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_add(v128_t __a, v128_t __b) { return (v128_t)((__u8x16)__a + (__u8x16)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_add_sat(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_add_sat_s_i8x16((__i8x16)__a, (__i8x16)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_add_sat(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_add_sat_u_i8x16((__u8x16)__a, (__u8x16)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_sub(v128_t __a, v128_t __b) { return (v128_t)((__u8x16)__a - (__u8x16)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_sub_sat(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_sub_sat_s_i8x16((__i8x16)__a, (__i8x16)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_sub_sat(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_sub_sat_u_i8x16((__u8x16)__a, (__u8x16)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_min(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_min_s_i8x16((__i8x16)__a, (__i8x16)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_min(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_min_u_i8x16((__u8x16)__a, (__u8x16)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_max(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_max_s_i8x16((__i8x16)__a, (__i8x16)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_max(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_max_u_i8x16((__u8x16)__a, (__u8x16)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_avgr(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_avgr_u_i8x16((__u8x16)__a, (__u8x16)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_abs(v128_t __a) { return (v128_t)__builtin_wasm_abs_i16x8((__i16x8)__a); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_neg(v128_t __a) { return (v128_t)(-(__u16x8)__a); } static __inline__ bool __DEFAULT_FN_ATTRS wasm_i16x8_all_true(v128_t __a) { return __builtin_wasm_all_true_i16x8((__i16x8)__a); } static __inline__ uint32_t __DEFAULT_FN_ATTRS wasm_i16x8_bitmask(v128_t __a) { return __builtin_wasm_bitmask_i16x8((__i16x8)__a); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_shl(v128_t __a, uint32_t __b) { return (v128_t)((__i16x8)__a << (__b & 0xF)); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_shr(v128_t __a, uint32_t __b) { return (v128_t)((__i16x8)__a >> (__b & 0xF)); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_shr(v128_t __a, uint32_t __b) { return (v128_t)((__u16x8)__a >> (__b & 0xF)); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_add(v128_t __a, v128_t __b) { return (v128_t)((__u16x8)__a + (__u16x8)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_add_sat(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_add_sat_s_i16x8((__i16x8)__a, (__i16x8)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_add_sat(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_add_sat_u_i16x8((__u16x8)__a, (__u16x8)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_sub(v128_t __a, v128_t __b) { return (v128_t)((__i16x8)__a - (__i16x8)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_sub_sat(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_sub_sat_s_i16x8((__i16x8)__a, (__i16x8)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_sub_sat(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_sub_sat_u_i16x8((__u16x8)__a, (__u16x8)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_mul(v128_t __a, v128_t __b) { return (v128_t)((__u16x8)__a * (__u16x8)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_min(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_min_s_i16x8((__i16x8)__a, (__i16x8)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_min(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_min_u_i16x8((__u16x8)__a, (__u16x8)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_max(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_max_s_i16x8((__i16x8)__a, (__i16x8)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_max(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_max_u_i16x8((__u16x8)__a, (__u16x8)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_avgr(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_avgr_u_i16x8((__u16x8)__a, (__u16x8)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_abs(v128_t __a) { return (v128_t)__builtin_wasm_abs_i32x4((__i32x4)__a); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_neg(v128_t __a) { return (v128_t)(-(__u32x4)__a); } static __inline__ bool __DEFAULT_FN_ATTRS wasm_i32x4_all_true(v128_t __a) { return __builtin_wasm_all_true_i32x4((__i32x4)__a); } static __inline__ uint32_t __DEFAULT_FN_ATTRS wasm_i32x4_bitmask(v128_t __a) { return __builtin_wasm_bitmask_i32x4((__i32x4)__a); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_shl(v128_t __a, uint32_t __b) { return (v128_t)((__i32x4)__a << (__b & 0x1F)); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_shr(v128_t __a, uint32_t __b) { return (v128_t)((__i32x4)__a >> (__b & 0x1F)); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_shr(v128_t __a, uint32_t __b) { return (v128_t)((__u32x4)__a >> (__b & 0x1F)); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_add(v128_t __a, v128_t __b) { return (v128_t)((__u32x4)__a + (__u32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_sub(v128_t __a, v128_t __b) { return (v128_t)((__u32x4)__a - (__u32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_mul(v128_t __a, v128_t __b) { return (v128_t)((__u32x4)__a * (__u32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_min(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_min_s_i32x4((__i32x4)__a, (__i32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_min(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_min_u_i32x4((__u32x4)__a, (__u32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_max(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_max_s_i32x4((__i32x4)__a, (__i32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_max(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_max_u_i32x4((__u32x4)__a, (__u32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_dot_i16x8(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_dot_s_i32x4_i16x8((__i16x8)__a, (__i16x8)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_abs(v128_t __a) { return (v128_t)__builtin_wasm_abs_i64x2((__i64x2)__a); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_neg(v128_t __a) { return (v128_t)(-(__u64x2)__a); } static __inline__ bool __DEFAULT_FN_ATTRS wasm_i64x2_all_true(v128_t __a) { return __builtin_wasm_all_true_i64x2((__i64x2)__a); } static __inline__ uint32_t __DEFAULT_FN_ATTRS wasm_i64x2_bitmask(v128_t __a) { return __builtin_wasm_bitmask_i64x2((__i64x2)__a); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_shl(v128_t __a, uint32_t __b) { return (v128_t)((__i64x2)__a << ((int64_t)__b & 0x3F)); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_shr(v128_t __a, uint32_t __b) { return (v128_t)((__i64x2)__a >> ((int64_t)__b & 0x3F)); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u64x2_shr(v128_t __a, uint32_t __b) { return (v128_t)((__u64x2)__a >> ((int64_t)__b & 0x3F)); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_add(v128_t __a, v128_t __b) { return (v128_t)((__u64x2)__a + (__u64x2)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_sub(v128_t __a, v128_t __b) { return (v128_t)((__u64x2)__a - (__u64x2)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_mul(v128_t __a, v128_t __b) { return (v128_t)((__u64x2)__a * (__u64x2)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_abs(v128_t __a) { return (v128_t)__builtin_wasm_abs_f32x4((__f32x4)__a); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_neg(v128_t __a) { return (v128_t)(-(__f32x4)__a); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_sqrt(v128_t __a) { return (v128_t)__builtin_wasm_sqrt_f32x4((__f32x4)__a); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_ceil(v128_t __a) { return (v128_t)__builtin_wasm_ceil_f32x4((__f32x4)__a); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_floor(v128_t __a) { return (v128_t)__builtin_wasm_floor_f32x4((__f32x4)__a); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_trunc(v128_t __a) { return (v128_t)__builtin_wasm_trunc_f32x4((__f32x4)__a); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_nearest(v128_t __a) { return (v128_t)__builtin_wasm_nearest_f32x4((__f32x4)__a); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_add(v128_t __a, v128_t __b) { return (v128_t)((__f32x4)__a + (__f32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_sub(v128_t __a, v128_t __b) { return (v128_t)((__f32x4)__a - (__f32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_mul(v128_t __a, v128_t __b) { return (v128_t)((__f32x4)__a * (__f32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_div(v128_t __a, v128_t __b) { return (v128_t)((__f32x4)__a / (__f32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_min(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_min_f32x4((__f32x4)__a, (__f32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_max(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_max_f32x4((__f32x4)__a, (__f32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_pmin(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_pmin_f32x4((__f32x4)__a, (__f32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_pmax(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_pmax_f32x4((__f32x4)__a, (__f32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_abs(v128_t __a) { return (v128_t)__builtin_wasm_abs_f64x2((__f64x2)__a); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_neg(v128_t __a) { return (v128_t)(-(__f64x2)__a); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_sqrt(v128_t __a) { return (v128_t)__builtin_wasm_sqrt_f64x2((__f64x2)__a); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_ceil(v128_t __a) { return (v128_t)__builtin_wasm_ceil_f64x2((__f64x2)__a); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_floor(v128_t __a) { return (v128_t)__builtin_wasm_floor_f64x2((__f64x2)__a); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_trunc(v128_t __a) { return (v128_t)__builtin_wasm_trunc_f64x2((__f64x2)__a); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_nearest(v128_t __a) { return (v128_t)__builtin_wasm_nearest_f64x2((__f64x2)__a); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_add(v128_t __a, v128_t __b) { return (v128_t)((__f64x2)__a + (__f64x2)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_sub(v128_t __a, v128_t __b) { return (v128_t)((__f64x2)__a - (__f64x2)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_mul(v128_t __a, v128_t __b) { return (v128_t)((__f64x2)__a * (__f64x2)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_div(v128_t __a, v128_t __b) { return (v128_t)((__f64x2)__a / (__f64x2)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_min(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_min_f64x2((__f64x2)__a, (__f64x2)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_max(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_max_f64x2((__f64x2)__a, (__f64x2)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_pmin(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_pmin_f64x2((__f64x2)__a, (__f64x2)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_pmax(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_pmax_f64x2((__f64x2)__a, (__f64x2)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_trunc_sat_f32x4(v128_t __a) { return (v128_t)__builtin_wasm_trunc_saturate_s_i32x4_f32x4((__f32x4)__a); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_trunc_sat_f32x4(v128_t __a) { return (v128_t)__builtin_wasm_trunc_saturate_u_i32x4_f32x4((__f32x4)__a); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_convert_i32x4(v128_t __a) { return (v128_t) __builtin_convertvector((__i32x4)__a, __f32x4); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_convert_u32x4(v128_t __a) { return (v128_t) __builtin_convertvector((__u32x4)__a, __f32x4); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_convert_low_i32x4(v128_t __a) { return (v128_t) __builtin_convertvector((__i32x2){__a[0], __a[1]}, __f64x2); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_convert_low_u32x4(v128_t __a) { return (v128_t) __builtin_convertvector((__u32x2){__a[0], __a[1]}, __f64x2); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_trunc_sat_f64x2_zero(v128_t __a) { return (v128_t)__builtin_wasm_trunc_sat_s_zero_f64x2_i32x4((__f64x2)__a); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_trunc_sat_f64x2_zero(v128_t __a) { return (v128_t)__builtin_wasm_trunc_sat_u_zero_f64x2_i32x4((__f64x2)__a); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_demote_f64x2_zero(v128_t __a) { return (v128_t) __builtin_convertvector( __builtin_shufflevector((__f64x2)__a, (__f64x2){0, 0}, 0, 1, 2, 3), __f32x4); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_promote_low_f32x4(v128_t __a) { return (v128_t) __builtin_convertvector( (__f32x2){((__f32x4)__a)[0], ((__f32x4)__a)[1]}, __f64x2); } #define wasm_i8x16_shuffle(__a, __b, __c0, __c1, __c2, __c3, __c4, __c5, __c6, \ __c7, __c8, __c9, __c10, __c11, __c12, __c13, \ __c14, __c15) \ ((v128_t)__builtin_wasm_shuffle_i8x16( \ (__i8x16)(__a), (__i8x16)(__b), __c0, __c1, __c2, __c3, __c4, __c5, \ __c6, __c7, __c8, __c9, __c10, __c11, __c12, __c13, __c14, __c15)) #define wasm_i16x8_shuffle(__a, __b, __c0, __c1, __c2, __c3, __c4, __c5, __c6, \ __c7) \ ((v128_t)__builtin_wasm_shuffle_i8x16( \ (__i8x16)(__a), (__i8x16)(__b), (__c0)*2, (__c0)*2 + 1, (__c1)*2, \ (__c1)*2 + 1, (__c2)*2, (__c2)*2 + 1, (__c3)*2, (__c3)*2 + 1, (__c4)*2, \ (__c4)*2 + 1, (__c5)*2, (__c5)*2 + 1, (__c6)*2, (__c6)*2 + 1, (__c7)*2, \ (__c7)*2 + 1)) #define wasm_i32x4_shuffle(__a, __b, __c0, __c1, __c2, __c3) \ ((v128_t)__builtin_wasm_shuffle_i8x16( \ (__i8x16)(__a), (__i8x16)(__b), (__c0)*4, (__c0)*4 + 1, (__c0)*4 + 2, \ (__c0)*4 + 3, (__c1)*4, (__c1)*4 + 1, (__c1)*4 + 2, (__c1)*4 + 3, \ (__c2)*4, (__c2)*4 + 1, (__c2)*4 + 2, (__c2)*4 + 3, (__c3)*4, \ (__c3)*4 + 1, (__c3)*4 + 2, (__c3)*4 + 3)) #define wasm_i64x2_shuffle(__a, __b, __c0, __c1) \ ((v128_t)__builtin_wasm_shuffle_i8x16( \ (__i8x16)(__a), (__i8x16)(__b), (__c0)*8, (__c0)*8 + 1, (__c0)*8 + 2, \ (__c0)*8 + 3, (__c0)*8 + 4, (__c0)*8 + 5, (__c0)*8 + 6, (__c0)*8 + 7, \ (__c1)*8, (__c1)*8 + 1, (__c1)*8 + 2, (__c1)*8 + 3, (__c1)*8 + 4, \ (__c1)*8 + 5, (__c1)*8 + 6, (__c1)*8 + 7)) static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_swizzle(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_swizzle_i8x16((__i8x16)__a, (__i8x16)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_narrow_i16x8(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_narrow_s_i8x16_i16x8((__i16x8)__a, (__i16x8)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_narrow_i16x8(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_narrow_u_i8x16_i16x8((__i16x8)__a, (__i16x8)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_narrow_i32x4(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_narrow_s_i16x8_i32x4((__i32x4)__a, (__i32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_narrow_i32x4(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_narrow_u_i16x8_i32x4((__i32x4)__a, (__i32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_extend_low_i8x16(v128_t __a) { return (v128_t) __builtin_convertvector( (__i8x8){((__i8x16)__a)[0], ((__i8x16)__a)[1], ((__i8x16)__a)[2], ((__i8x16)__a)[3], ((__i8x16)__a)[4], ((__i8x16)__a)[5], ((__i8x16)__a)[6], ((__i8x16)__a)[7]}, __i16x8); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_extend_high_i8x16(v128_t __a) { return (v128_t) __builtin_convertvector( (__i8x8){((__i8x16)__a)[8], ((__i8x16)__a)[9], ((__i8x16)__a)[10], ((__i8x16)__a)[11], ((__i8x16)__a)[12], ((__i8x16)__a)[13], ((__i8x16)__a)[14], ((__i8x16)__a)[15]}, __i16x8); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_extend_low_u8x16(v128_t __a) { return (v128_t) __builtin_convertvector( (__u8x8){((__u8x16)__a)[0], ((__u8x16)__a)[1], ((__u8x16)__a)[2], ((__u8x16)__a)[3], ((__u8x16)__a)[4], ((__u8x16)__a)[5], ((__u8x16)__a)[6], ((__u8x16)__a)[7]}, __u16x8); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_extend_high_u8x16(v128_t __a) { return (v128_t) __builtin_convertvector( (__u8x8){((__u8x16)__a)[8], ((__u8x16)__a)[9], ((__u8x16)__a)[10], ((__u8x16)__a)[11], ((__u8x16)__a)[12], ((__u8x16)__a)[13], ((__u8x16)__a)[14], ((__u8x16)__a)[15]}, __u16x8); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_extend_low_i16x8(v128_t __a) { return (v128_t) __builtin_convertvector( (__i16x4){((__i16x8)__a)[0], ((__i16x8)__a)[1], ((__i16x8)__a)[2], ((__i16x8)__a)[3]}, __i32x4); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_extend_high_i16x8(v128_t __a) { return (v128_t) __builtin_convertvector( (__i16x4){((__i16x8)__a)[4], ((__i16x8)__a)[5], ((__i16x8)__a)[6], ((__i16x8)__a)[7]}, __i32x4); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_extend_low_u16x8(v128_t __a) { return (v128_t) __builtin_convertvector( (__u16x4){((__u16x8)__a)[0], ((__u16x8)__a)[1], ((__u16x8)__a)[2], ((__u16x8)__a)[3]}, __u32x4); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_extend_high_u16x8(v128_t __a) { return (v128_t) __builtin_convertvector( (__u16x4){((__u16x8)__a)[4], ((__u16x8)__a)[5], ((__u16x8)__a)[6], ((__u16x8)__a)[7]}, __u32x4); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_extend_low_i32x4(v128_t __a) { return (v128_t) __builtin_convertvector( (__i32x2){((__i32x4)__a)[0], ((__i32x4)__a)[1]}, __i64x2); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_extend_high_i32x4(v128_t __a) { return (v128_t) __builtin_convertvector( (__i32x2){((__i32x4)__a)[2], ((__i32x4)__a)[3]}, __i64x2); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u64x2_extend_low_u32x4(v128_t __a) { return (v128_t) __builtin_convertvector( (__u32x2){((__u32x4)__a)[0], ((__u32x4)__a)[1]}, __u64x2); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u64x2_extend_high_u32x4(v128_t __a) { return (v128_t) __builtin_convertvector( (__u32x2){((__u32x4)__a)[2], ((__u32x4)__a)[3]}, __u64x2); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_extadd_pairwise_i8x16(v128_t __a) { return (v128_t)__builtin_wasm_extadd_pairwise_i8x16_s_i16x8((__i8x16)__a); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_extadd_pairwise_u8x16(v128_t __a) { return (v128_t)__builtin_wasm_extadd_pairwise_i8x16_u_i16x8((__u8x16)__a); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_extadd_pairwise_i16x8(v128_t __a) { return (v128_t)__builtin_wasm_extadd_pairwise_i16x8_s_i32x4((__i16x8)__a); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_extadd_pairwise_u16x8(v128_t __a) { return (v128_t)__builtin_wasm_extadd_pairwise_i16x8_u_i32x4((__u16x8)__a); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_extmul_low_i8x16(v128_t __a, v128_t __b) { return (v128_t)((__i16x8)wasm_i16x8_extend_low_i8x16(__a) * (__i16x8)wasm_i16x8_extend_low_i8x16(__b)); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_extmul_high_i8x16(v128_t __a, v128_t __b) { return (v128_t)((__i16x8)wasm_i16x8_extend_high_i8x16(__a) * (__i16x8)wasm_i16x8_extend_high_i8x16(__b)); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_extmul_low_u8x16(v128_t __a, v128_t __b) { return (v128_t)((__u16x8)wasm_u16x8_extend_low_u8x16(__a) * (__u16x8)wasm_u16x8_extend_low_u8x16(__b)); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_extmul_high_u8x16(v128_t __a, v128_t __b) { return (v128_t)((__u16x8)wasm_u16x8_extend_high_u8x16(__a) * (__u16x8)wasm_u16x8_extend_high_u8x16(__b)); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_extmul_low_i16x8(v128_t __a, v128_t __b) { return (v128_t)((__i32x4)wasm_i32x4_extend_low_i16x8(__a) * (__i32x4)wasm_i32x4_extend_low_i16x8(__b)); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_extmul_high_i16x8(v128_t __a, v128_t __b) { return (v128_t)((__i32x4)wasm_i32x4_extend_high_i16x8(__a) * (__i32x4)wasm_i32x4_extend_high_i16x8(__b)); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_extmul_low_u16x8(v128_t __a, v128_t __b) { return (v128_t)((__u32x4)wasm_u32x4_extend_low_u16x8(__a) * (__u32x4)wasm_u32x4_extend_low_u16x8(__b)); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_extmul_high_u16x8(v128_t __a, v128_t __b) { return (v128_t)((__u32x4)wasm_u32x4_extend_high_u16x8(__a) * (__u32x4)wasm_u32x4_extend_high_u16x8(__b)); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_extmul_low_i32x4(v128_t __a, v128_t __b) { return (v128_t)((__i64x2)wasm_i64x2_extend_low_i32x4(__a) * (__i64x2)wasm_i64x2_extend_low_i32x4(__b)); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_extmul_high_i32x4(v128_t __a, v128_t __b) { return (v128_t)((__i64x2)wasm_i64x2_extend_high_i32x4(__a) * (__i64x2)wasm_i64x2_extend_high_i32x4(__b)); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u64x2_extmul_low_u32x4(v128_t __a, v128_t __b) { return (v128_t)((__u64x2)wasm_u64x2_extend_low_u32x4(__a) * (__u64x2)wasm_u64x2_extend_low_u32x4(__b)); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u64x2_extmul_high_u32x4(v128_t __a, v128_t __b) { return (v128_t)((__u64x2)wasm_u64x2_extend_high_u32x4(__a) * (__u64x2)wasm_u64x2_extend_high_u32x4(__b)); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_q15mulr_sat(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_q15mulr_sat_s_i16x8((__i16x8)__a, (__i16x8)__b); } // Old intrinsic names supported to ease transitioning to the standard names. Do // not use these; they will be removed in the near future. #define __DEPRECATED_FN_ATTRS(__replacement) \ __DEFAULT_FN_ATTRS __attribute__( \ (deprecated("use " __replacement " instead", __replacement))) #define __WASM_STR(X) #X #ifdef __DEPRECATED #define __DEPRECATED_WASM_MACRO(__name, __replacement) \ _Pragma(__WASM_STR(GCC warning( \ "'" __name "' is deprecated: use '" __replacement "' instead"))) #else #define __DEPRECATED_WASM_MACRO(__name, __replacement) #endif static __inline__ v128_t __DEPRECATED_FN_ATTRS("wasm_v128_load8_splat") wasm_v8x16_load_splat(const void *__mem) { return wasm_v128_load8_splat(__mem); } static __inline__ v128_t __DEPRECATED_FN_ATTRS("wasm_v128_load16_splat") wasm_v16x8_load_splat(const void *__mem) { return wasm_v128_load16_splat(__mem); } static __inline__ v128_t __DEPRECATED_FN_ATTRS("wasm_v128_load32_splat") wasm_v32x4_load_splat(const void *__mem) { return wasm_v128_load32_splat(__mem); } static __inline__ v128_t __DEPRECATED_FN_ATTRS("wasm_v128_load64_splat") wasm_v64x2_load_splat(const void *__mem) { return wasm_v128_load64_splat(__mem); } static __inline__ v128_t __DEPRECATED_FN_ATTRS("wasm_i16x8_load8x8") wasm_i16x8_load_8x8(const void *__mem) { return wasm_i16x8_load8x8(__mem); } static __inline__ v128_t __DEPRECATED_FN_ATTRS("wasm_u16x8_load8x8") wasm_u16x8_load_8x8(const void *__mem) { return wasm_u16x8_load8x8(__mem); } static __inline__ v128_t __DEPRECATED_FN_ATTRS("wasm_i32x4_load16x4") wasm_i32x4_load_16x4(const void *__mem) { return wasm_i32x4_load16x4(__mem); } static __inline__ v128_t __DEPRECATED_FN_ATTRS("wasm_u32x4_load16x4") wasm_u32x4_load_16x4(const void *__mem) { return wasm_u32x4_load16x4(__mem); } static __inline__ v128_t __DEPRECATED_FN_ATTRS("wasm_i64x2_load32x2") wasm_i64x2_load_32x2(const void *__mem) { return wasm_i64x2_load32x2(__mem); } static __inline__ v128_t __DEPRECATED_FN_ATTRS("wasm_u64x2_load32x2") wasm_u64x2_load_32x2(const void *__mem) { return wasm_u64x2_load32x2(__mem); } #define wasm_v8x16_shuffle(__a, __b, __c0, __c1, __c2, __c3, __c4, __c5, __c6, \ __c7, __c8, __c9, __c10, __c11, __c12, __c13, \ __c14, __c15) \ __DEPRECATED_WASM_MACRO("wasm_v8x16_shuffle", "wasm_i8x16_shuffle") \ wasm_i8x16_shuffle(__a, __b, __c0, __c1, __c2, __c3, __c4, __c5, __c6, __c7, \ __c8, __c9, __c10, __c11, __c12, __c13, __c14, __c15) #define wasm_v16x8_shuffle(__a, __b, __c0, __c1, __c2, __c3, __c4, __c5, __c6, \ __c7) \ __DEPRECATED_WASM_MACRO("wasm_v16x8_shuffle", "wasm_i16x8_shuffle") \ wasm_i16x8_shuffle(__a, __b, __c0, __c1, __c2, __c3, __c4, __c5, __c6, __c7) #define wasm_v32x4_shuffle(__a, __b, __c0, __c1, __c2, __c3) \ __DEPRECATED_WASM_MACRO("wasm_v32x4_shuffle", "wasm_i32x4_shuffle") \ wasm_i32x4_shuffle(__a, __b, __c0, __c1, __c2, __c3) #define wasm_v64x2_shuffle(__a, __b, __c0, __c1) \ __DEPRECATED_WASM_MACRO("wasm_v64x2_shuffle", "wasm_i64x2_shuffle") \ wasm_i64x2_shuffle(__a, __b, __c0, __c1) // Relaxed SIMD intrinsics #define __RELAXED_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("relaxed-simd"), \ __min_vector_width__(128))) static __inline__ v128_t __RELAXED_FN_ATTRS wasm_f32x4_relaxed_madd(v128_t __a, v128_t __b, v128_t __c) { return (v128_t)__builtin_wasm_relaxed_madd_f32x4((__f32x4)__a, (__f32x4)__b, (__f32x4)__c); } static __inline__ v128_t __RELAXED_FN_ATTRS wasm_f32x4_relaxed_nmadd(v128_t __a, v128_t __b, v128_t __c) { return (v128_t)__builtin_wasm_relaxed_nmadd_f32x4((__f32x4)__a, (__f32x4)__b, (__f32x4)__c); } static __inline__ v128_t __RELAXED_FN_ATTRS wasm_f64x2_relaxed_madd(v128_t __a, v128_t __b, v128_t __c) { return (v128_t)__builtin_wasm_relaxed_madd_f64x2((__f64x2)__a, (__f64x2)__b, (__f64x2)__c); } static __inline__ v128_t __RELAXED_FN_ATTRS wasm_f64x2_relaxed_nmadd(v128_t __a, v128_t __b, v128_t __c) { return (v128_t)__builtin_wasm_relaxed_nmadd_f64x2((__f64x2)__a, (__f64x2)__b, (__f64x2)__c); } static __inline__ v128_t __RELAXED_FN_ATTRS wasm_i8x16_relaxed_laneselect(v128_t __a, v128_t __b, v128_t __m) { return (v128_t)__builtin_wasm_relaxed_laneselect_i8x16( (__i8x16)__a, (__i8x16)__b, (__i8x16)__m); } static __inline__ v128_t __RELAXED_FN_ATTRS wasm_i16x8_relaxed_laneselect(v128_t __a, v128_t __b, v128_t __m) { return (v128_t)__builtin_wasm_relaxed_laneselect_i16x8( (__i16x8)__a, (__i16x8)__b, (__i16x8)__m); } static __inline__ v128_t __RELAXED_FN_ATTRS wasm_i32x4_relaxed_laneselect(v128_t __a, v128_t __b, v128_t __m) { return (v128_t)__builtin_wasm_relaxed_laneselect_i32x4( (__i32x4)__a, (__i32x4)__b, (__i32x4)__m); } static __inline__ v128_t __RELAXED_FN_ATTRS wasm_i64x2_relaxed_laneselect(v128_t __a, v128_t __b, v128_t __m) { return (v128_t)__builtin_wasm_relaxed_laneselect_i64x2( (__i64x2)__a, (__i64x2)__b, (__i64x2)__m); } static __inline__ v128_t __RELAXED_FN_ATTRS wasm_i8x16_relaxed_swizzle(v128_t __a, v128_t __s) { return (v128_t)__builtin_wasm_relaxed_swizzle_i8x16((__i8x16)__a, (__i8x16)__s); } static __inline__ v128_t __RELAXED_FN_ATTRS wasm_f32x4_relaxed_min(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_relaxed_min_f32x4((__f32x4)__a, (__f32x4)__b); } static __inline__ v128_t __RELAXED_FN_ATTRS wasm_f32x4_relaxed_max(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_relaxed_max_f32x4((__f32x4)__a, (__f32x4)__b); } static __inline__ v128_t __RELAXED_FN_ATTRS wasm_f64x2_relaxed_min(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_relaxed_min_f64x2((__f64x2)__a, (__f64x2)__b); } static __inline__ v128_t __RELAXED_FN_ATTRS wasm_f64x2_relaxed_max(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_relaxed_max_f64x2((__f64x2)__a, (__f64x2)__b); } static __inline__ v128_t __RELAXED_FN_ATTRS wasm_i32x4_relaxed_trunc_f32x4(v128_t __a) { return (v128_t)__builtin_wasm_relaxed_trunc_s_i32x4_f32x4((__f32x4)__a); } static __inline__ v128_t __RELAXED_FN_ATTRS wasm_u32x4_relaxed_trunc_f32x4(v128_t __a) { return (v128_t)__builtin_wasm_relaxed_trunc_u_i32x4_f32x4((__f32x4)__a); } static __inline__ v128_t __RELAXED_FN_ATTRS wasm_i32x4_relaxed_trunc_f64x2_zero(v128_t __a) { return (v128_t)__builtin_wasm_relaxed_trunc_s_zero_i32x4_f64x2((__f64x2)__a); } static __inline__ v128_t __RELAXED_FN_ATTRS wasm_u32x4_relaxed_trunc_f64x2_zero(v128_t __a) { return (v128_t)__builtin_wasm_relaxed_trunc_u_zero_i32x4_f64x2((__f64x2)__a); } static __inline__ v128_t __RELAXED_FN_ATTRS wasm_i16x8_relaxed_q15mulr(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_relaxed_q15mulr_s_i16x8((__i16x8)__a, (__i16x8)__b); } static __inline__ v128_t __RELAXED_FN_ATTRS wasm_i16x8_relaxed_dot_i8x16_i7x16(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_relaxed_dot_i8x16_i7x16_s_i16x8((__i8x16)__a, (__i8x16)__b); } static __inline__ v128_t __RELAXED_FN_ATTRS wasm_i32x4_relaxed_dot_i8x16_i7x16_add(v128_t __a, v128_t __b, v128_t __c) { return (v128_t)__builtin_wasm_relaxed_dot_i8x16_i7x16_add_s_i32x4( (__i8x16)__a, (__i8x16)__b, (__i32x4)__c); } // Deprecated intrinsics static __inline__ v128_t __DEPRECATED_FN_ATTRS("wasm_i8x16_swizzle") wasm_v8x16_swizzle(v128_t __a, v128_t __b) { return wasm_i8x16_swizzle(__a, __b); } static __inline__ bool __DEPRECATED_FN_ATTRS("wasm_v128_any_true") wasm_i8x16_any_true(v128_t __a) { return wasm_v128_any_true(__a); } static __inline__ bool __DEPRECATED_FN_ATTRS("wasm_v128_any_true") wasm_i16x8_any_true(v128_t __a) { return wasm_v128_any_true(__a); } static __inline__ bool __DEPRECATED_FN_ATTRS("wasm_v128_any_true") wasm_i32x4_any_true(v128_t __a) { return wasm_v128_any_true(__a); } static __inline__ v128_t __DEPRECATED_FN_ATTRS("wasm_i8x16_add_sat") wasm_i8x16_add_saturate(v128_t __a, v128_t __b) { return wasm_i8x16_add_sat(__a, __b); } static __inline__ v128_t __DEPRECATED_FN_ATTRS("wasm_u8x16_add_sat") wasm_u8x16_add_saturate(v128_t __a, v128_t __b) { return wasm_u8x16_add_sat(__a, __b); } static __inline__ v128_t __DEPRECATED_FN_ATTRS("wasm_i8x16_sub_sat") wasm_i8x16_sub_saturate(v128_t __a, v128_t __b) { return wasm_i8x16_sub_sat(__a, __b); } static __inline__ v128_t __DEPRECATED_FN_ATTRS("wasm_u8x16_sub_sat") wasm_u8x16_sub_saturate(v128_t __a, v128_t __b) { return wasm_u8x16_sub_sat(__a, __b); } static __inline__ v128_t __DEPRECATED_FN_ATTRS("wasm_i16x8_add_sat") wasm_i16x8_add_saturate(v128_t __a, v128_t __b) { return wasm_i16x8_add_sat(__a, __b); } static __inline__ v128_t __DEPRECATED_FN_ATTRS("wasm_u16x8_add_sat") wasm_u16x8_add_saturate(v128_t __a, v128_t __b) { return wasm_u16x8_add_sat(__a, __b); } static __inline__ v128_t __DEPRECATED_FN_ATTRS("wasm_i16x8_sub_sat") wasm_i16x8_sub_saturate(v128_t __a, v128_t __b) { return wasm_i16x8_sub_sat(__a, __b); } static __inline__ v128_t __DEPRECATED_FN_ATTRS("wasm_u16x8_sub_sat") wasm_u16x8_sub_saturate(v128_t __a, v128_t __b) { return wasm_u16x8_sub_sat(__a, __b); } static __inline__ v128_t __DEPRECATED_FN_ATTRS("wasm_i16x8_extend_low_i8x16") wasm_i16x8_widen_low_i8x16(v128_t __a) { return wasm_i16x8_extend_low_i8x16(__a); } static __inline__ v128_t __DEPRECATED_FN_ATTRS("wasm_i16x8_extend_high_i8x16") wasm_i16x8_widen_high_i8x16(v128_t __a) { return wasm_i16x8_extend_high_i8x16(__a); } static __inline__ v128_t __DEPRECATED_FN_ATTRS("wasm_u16x8_extend_low_u8x16") wasm_i16x8_widen_low_u8x16(v128_t __a) { return wasm_u16x8_extend_low_u8x16(__a); } static __inline__ v128_t __DEPRECATED_FN_ATTRS("wasm_u16x8_extend_high_u8x16") wasm_i16x8_widen_high_u8x16(v128_t __a) { return wasm_u16x8_extend_high_u8x16(__a); } static __inline__ v128_t __DEPRECATED_FN_ATTRS("wasm_i32x4_extend_low_i16x8") wasm_i32x4_widen_low_i16x8(v128_t __a) { return wasm_i32x4_extend_low_i16x8(__a); } static __inline__ v128_t __DEPRECATED_FN_ATTRS("wasm_i32x4_extend_high_i16x8") wasm_i32x4_widen_high_i16x8(v128_t __a) { return wasm_i32x4_extend_high_i16x8(__a); } static __inline__ v128_t __DEPRECATED_FN_ATTRS("wasm_u32x4_extend_low_u16x8") wasm_i32x4_widen_low_u16x8(v128_t __a) { return wasm_u32x4_extend_low_u16x8(__a); } static __inline__ v128_t __DEPRECATED_FN_ATTRS("wasm_u32x4_extend_high_u16x8") wasm_i32x4_widen_high_u16x8(v128_t __a) { return wasm_u32x4_extend_high_u16x8(__a); } static __inline__ v128_t __DEPRECATED_FN_ATTRS("wasm_i32x4_trunc_sat_f32x4") wasm_i32x4_trunc_saturate_f32x4(v128_t __a) { return wasm_i32x4_trunc_sat_f32x4(__a); } static __inline__ v128_t __DEPRECATED_FN_ATTRS("wasm_u32x4_trunc_sat_f32x4") wasm_u32x4_trunc_saturate_f32x4(v128_t __a) { return wasm_u32x4_trunc_sat_f32x4(__a); } // Undefine helper macros #undef __DEFAULT_FN_ATTRS #undef __DEPRECATED_FN_ATTRS #endif // __WASM_SIMD128_H /*===---- nmmintrin.h - Implementation of SSE4 intrinsics on PowerPC -------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef NO_WARN_X86_INTRINSICS /* This header is distributed to simplify porting x86_64 code that makes explicit use of Intel intrinsics to powerpc64le. It is the user's responsibility to determine if the results are acceptable and make additional changes as necessary. Note that much code that uses Intel intrinsics can be rewritten in standard C or GNU C extensions, which are more portable and better optimized across multiple targets. */ #endif #ifndef NMMINTRIN_H_ #define NMMINTRIN_H_ /* We just include SSE4.1 header file. */ #include #endif /* NMMINTRIN_H_ */ ppc_wrappers/pmmintrin.hppc_wrappers/x86gprintrin.h//===-- sanitizer/common_interface_defs.h -----------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // Common part of the public sanitizer interface. //===----------------------------------------------------------------------===// #ifndef SANITIZER_COMMON_INTERFACE_DEFS_H #define SANITIZER_COMMON_INTERFACE_DEFS_H #include #include // Windows allows a user to set their default calling convention, but we always // use __cdecl #ifdef _WIN32 #define SANITIZER_CDECL __cdecl #else #define SANITIZER_CDECL #endif #ifdef __cplusplus extern "C" { #endif // Arguments for __sanitizer_sandbox_on_notify() below. typedef struct { // Enable sandbox support in sanitizer coverage. int coverage_sandboxed; // File descriptor to write coverage data to. If -1 is passed, a file will // be pre-opened by __sanitizer_sandbox_on_notify(). This field has no // effect if coverage_sandboxed == 0. intptr_t coverage_fd; // If non-zero, split the coverage data into well-formed blocks. This is // useful when coverage_fd is a socket descriptor. Each block will contain // a header, allowing data from multiple processes to be sent over the same // socket. unsigned int coverage_max_block_size; } __sanitizer_sandbox_arguments; // Tell the tools to write their reports to "path." instead of stderr. void SANITIZER_CDECL __sanitizer_set_report_path(const char *path); // Tell the tools to write their reports to the provided file descriptor // (casted to void *). void SANITIZER_CDECL __sanitizer_set_report_fd(void *fd); // Get the current full report file path, if a path was specified by // an earlier call to __sanitizer_set_report_path. Returns null otherwise. const char *SANITIZER_CDECL __sanitizer_get_report_path(); // Notify the tools that the sandbox is going to be turned on. The reserved // parameter will be used in the future to hold a structure with functions // that the tools may call to bypass the sandbox. void SANITIZER_CDECL __sanitizer_sandbox_on_notify(__sanitizer_sandbox_arguments *args); // This function is called by the tool when it has just finished reporting // an error. 'error_summary' is a one-line string that summarizes // the error message. This function can be overridden by the client. void SANITIZER_CDECL __sanitizer_report_error_summary(const char *error_summary); // Some of the sanitizers (for example ASan/TSan) could miss bugs that happen // in unaligned loads/stores. To find such bugs reliably, you need to replace // plain unaligned loads/stores with these calls. /// Loads a 16-bit unaligned value. // /// \param p Pointer to unaligned memory. /// /// \returns Loaded value. uint16_t SANITIZER_CDECL __sanitizer_unaligned_load16(const void *p); /// Loads a 32-bit unaligned value. /// /// \param p Pointer to unaligned memory. /// /// \returns Loaded value. uint32_t SANITIZER_CDECL __sanitizer_unaligned_load32(const void *p); /// Loads a 64-bit unaligned value. /// /// \param p Pointer to unaligned memory. /// /// \returns Loaded value. uint64_t SANITIZER_CDECL __sanitizer_unaligned_load64(const void *p); /// Stores a 16-bit unaligned value. /// /// \param p Pointer to unaligned memory. /// \param x 16-bit value to store. void SANITIZER_CDECL __sanitizer_unaligned_store16(void *p, uint16_t x); /// Stores a 32-bit unaligned value. /// /// \param p Pointer to unaligned memory. /// \param x 32-bit value to store. void SANITIZER_CDECL __sanitizer_unaligned_store32(void *p, uint32_t x); /// Stores a 64-bit unaligned value. /// /// \param p Pointer to unaligned memory. /// \param x 64-bit value to store. void SANITIZER_CDECL __sanitizer_unaligned_store64(void *p, uint64_t x); // Returns 1 on the first call, then returns 0 thereafter. Called by the tool // to ensure only one report is printed when multiple errors occur // simultaneously. int SANITIZER_CDECL __sanitizer_acquire_crash_state(); /// Annotates the current state of a contiguous container, such as /// std::vector, std::string, or similar. /// /// A contiguous container is a container that keeps all of its elements /// in a contiguous region of memory. The container owns the region of memory /// [beg, end); the memory [beg, mid) is used to store the /// current elements, and the memory [mid, end) is reserved for future /// elements (beg <= mid <= end). For example, in /// std::vector<> v: /// /// \code /// beg = &v[0]; /// end = beg + v.capacity() * sizeof(v[0]); /// mid = beg + v.size() * sizeof(v[0]); /// \endcode /// /// This annotation tells the Sanitizer tool about the current state of the /// container so that the tool can report errors when memory from /// [mid, end) is accessed. Insert this annotation into methods like /// push_back() or pop_back(). Supply the old and new values of /// mid(old_mid and new_mid). In the initial /// state mid == end, so that should be the final state when the /// container is destroyed or when the container reallocates the storage. /// /// For ASan, beg no longer needs to be 8-aligned, /// first and last granule may be shared with other objects /// and therefore the function can be used for any allocator. /// /// The following example shows how to use the function: /// /// \code /// int32_t x[3]; // 12 bytes /// char *beg = (char*)&x[0]; /// char *end = beg + 12; /// __sanitizer_annotate_contiguous_container(beg, end, beg, end); /// \endcode /// /// \note Use this function with caution and do not use for anything other /// than vector-like classes. /// \note Unaligned beg or end may miss bugs in /// these granules. /// /// \param beg Beginning of memory region. /// \param end End of memory region. /// \param old_mid Old middle of memory region. /// \param new_mid New middle of memory region. void SANITIZER_CDECL __sanitizer_annotate_contiguous_container( const void *beg, const void *end, const void *old_mid, const void *new_mid); /// Similar to __sanitizer_annotate_contiguous_container. /// /// Annotates the current state of a contiguous container memory, /// such as std::deque's single chunk, when the boundries are moved. /// /// A contiguous chunk is a chunk that keeps all of its elements /// in a contiguous region of memory. The container owns the region of memory /// [storage_beg, storage_end); the memory [container_beg, /// container_end) is used to store the current elements, and the memory /// [storage_beg, container_beg), [container_end, storage_end) is /// reserved for future elements (storage_beg <= container_beg <= /// container_end <= storage_end). For example, in std::deque : /// - chunk with a frist deques element will have container_beg equal to address /// of the first element. /// - in every next chunk with elements, true is container_beg == /// storage_beg . /// /// Argument requirements: /// During unpoisoning memory of empty container (before first element is /// added): /// - old_container_beg_p == old_container_end_p /// During poisoning after last element was removed: /// - new_container_beg_p == new_container_end_p /// \param storage_beg Beginning of memory region. /// \param storage_end End of memory region. /// \param old_container_beg Old beginning of used region. /// \param old_container_end End of used region. /// \param new_container_beg New beginning of used region. /// \param new_container_end New end of used region. void SANITIZER_CDECL __sanitizer_annotate_double_ended_contiguous_container( const void *storage_beg, const void *storage_end, const void *old_container_beg, const void *old_container_end, const void *new_container_beg, const void *new_container_end); /// Returns true if the contiguous container [beg, end) is properly /// poisoned. /// /// Proper poisoning could occur, for example, with /// __sanitizer_annotate_contiguous_container), that is, if /// [beg, mid) is addressable and [mid, end) is unaddressable. /// Full verification requires O (end - beg) time; this function tries /// to avoid such complexity by touching only parts of the container around /// beg, mid, and end. /// /// \param beg Beginning of memory region. /// \param mid Middle of memory region. /// \param end Old end of memory region. /// /// \returns True if the contiguous container [beg, end) is properly /// poisoned. int SANITIZER_CDECL __sanitizer_verify_contiguous_container(const void *beg, const void *mid, const void *end); /// Returns true if the double ended contiguous /// container [storage_beg, storage_end) is properly poisoned. /// /// Proper poisoning could occur, for example, with /// __sanitizer_annotate_double_ended_contiguous_container), that is, if /// [storage_beg, container_beg) is not addressable, [container_beg, /// container_end) is addressable and [container_end, end) is /// unaddressable. Full verification requires O (storage_end - /// storage_beg) time; this function tries to avoid such complexity by /// touching only parts of the container around storage_beg, /// container_beg, container_end, and /// storage_end. /// /// \param storage_beg Beginning of memory region. /// \param container_beg Beginning of used region. /// \param container_end End of used region. /// \param storage_end End of memory region. /// /// \returns True if the double-ended contiguous container [storage_beg, /// container_beg, container_end, end) is properly poisoned - only /// [container_beg; container_end) is addressable. int SANITIZER_CDECL __sanitizer_verify_double_ended_contiguous_container( const void *storage_beg, const void *container_beg, const void *container_end, const void *storage_end); /// Similar to __sanitizer_verify_contiguous_container() but also /// returns the address of the first improperly poisoned byte. /// /// Returns NULL if the area is poisoned properly. /// /// \param beg Beginning of memory region. /// \param mid Middle of memory region. /// \param end Old end of memory region. /// /// \returns The bad address or NULL. const void *SANITIZER_CDECL __sanitizer_contiguous_container_find_bad_address( const void *beg, const void *mid, const void *end); /// returns the address of the first improperly poisoned byte. /// /// Returns NULL if the area is poisoned properly. /// /// \param storage_beg Beginning of memory region. /// \param container_beg Beginning of used region. /// \param container_end End of used region. /// \param storage_end End of memory region. /// /// \returns The bad address or NULL. const void *SANITIZER_CDECL __sanitizer_double_ended_contiguous_container_find_bad_address( const void *storage_beg, const void *container_beg, const void *container_end, const void *storage_end); /// Prints the stack trace leading to this call (useful for calling from the /// debugger). void SANITIZER_CDECL __sanitizer_print_stack_trace(void); // Symbolizes the supplied 'pc' using the format string 'fmt'. // Outputs at most 'out_buf_size' bytes into 'out_buf'. // If 'out_buf' is not empty then output is zero or more non empty C strings // followed by single empty C string. Multiple strings can be returned if PC // corresponds to inlined function. Inlined frames are printed in the order // from "most-inlined" to the "least-inlined", so the last frame should be the // not inlined function. // Inlined frames can be removed with 'symbolize_inline_frames=0'. // The format syntax is described in // lib/sanitizer_common/sanitizer_stacktrace_printer.h. void SANITIZER_CDECL __sanitizer_symbolize_pc(void *pc, const char *fmt, char *out_buf, size_t out_buf_size); // Same as __sanitizer_symbolize_pc, but for data section (i.e. globals). void SANITIZER_CDECL __sanitizer_symbolize_global(void *data_ptr, const char *fmt, char *out_buf, size_t out_buf_size); // Determine the return address. #if !defined(_MSC_VER) || defined(__clang__) #define __sanitizer_return_address() \ __builtin_extract_return_addr(__builtin_return_address(0)) #else void *_ReturnAddress(void); #pragma intrinsic(_ReturnAddress) #define __sanitizer_return_address() _ReturnAddress() #endif /// Sets the callback to be called immediately before death on error. /// /// Passing 0 will unset the callback. /// /// \param callback User-provided callback. void SANITIZER_CDECL __sanitizer_set_death_callback(void (*callback)(void)); // Interceptor hooks. // Whenever a libc function interceptor is called, it checks if the // corresponding weak hook is defined, and calls it if it is indeed defined. // The primary use-case is data-flow-guided fuzzing, where the fuzzer needs // to know what is being passed to libc functions (for example memcmp). // FIXME: implement more hooks. /// Interceptor hook for memcmp(). /// /// \param called_pc PC (program counter) address of the original call. /// \param s1 Pointer to block of memory. /// \param s2 Pointer to block of memory. /// \param n Number of bytes to compare. /// \param result Value returned by the intercepted function. void SANITIZER_CDECL __sanitizer_weak_hook_memcmp(void *called_pc, const void *s1, const void *s2, size_t n, int result); /// Interceptor hook for strncmp(). /// /// \param called_pc PC (program counter) address of the original call. /// \param s1 Pointer to block of memory. /// \param s2 Pointer to block of memory. /// \param n Number of bytes to compare. /// \param result Value returned by the intercepted function. void SANITIZER_CDECL __sanitizer_weak_hook_strncmp(void *called_pc, const char *s1, const char *s2, size_t n, int result); /// Interceptor hook for strncasecmp(). /// /// \param called_pc PC (program counter) address of the original call. /// \param s1 Pointer to block of memory. /// \param s2 Pointer to block of memory. /// \param n Number of bytes to compare. /// \param result Value returned by the intercepted function. void SANITIZER_CDECL __sanitizer_weak_hook_strncasecmp(void *called_pc, const char *s1, const char *s2, size_t n, int result); /// Interceptor hook for strcmp(). /// /// \param called_pc PC (program counter) address of the original call. /// \param s1 Pointer to block of memory. /// \param s2 Pointer to block of memory. /// \param result Value returned by the intercepted function. void SANITIZER_CDECL __sanitizer_weak_hook_strcmp(void *called_pc, const char *s1, const char *s2, int result); /// Interceptor hook for strcasecmp(). /// /// \param called_pc PC (program counter) address of the original call. /// \param s1 Pointer to block of memory. /// \param s2 Pointer to block of memory. /// \param result Value returned by the intercepted function. void SANITIZER_CDECL __sanitizer_weak_hook_strcasecmp(void *called_pc, const char *s1, const char *s2, int result); /// Interceptor hook for strstr(). /// /// \param called_pc PC (program counter) address of the original call. /// \param s1 Pointer to block of memory. /// \param s2 Pointer to block of memory. /// \param result Value returned by the intercepted function. void SANITIZER_CDECL __sanitizer_weak_hook_strstr(void *called_pc, const char *s1, const char *s2, char *result); void SANITIZER_CDECL __sanitizer_weak_hook_strcasestr(void *called_pc, const char *s1, const char *s2, char *result); void SANITIZER_CDECL __sanitizer_weak_hook_memmem(void *called_pc, const void *s1, size_t len1, const void *s2, size_t len2, void *result); // Prints stack traces for all live heap allocations ordered by total // allocation size until top_percent of total live heap is shown. top_percent // should be between 1 and 100. At most max_number_of_contexts contexts // (stack traces) are printed. // Experimental feature currently available only with ASan on Linux/x86_64. void SANITIZER_CDECL __sanitizer_print_memory_profile( size_t top_percent, size_t max_number_of_contexts); /// Notify ASan that a fiber switch has started (required only if implementing /// your own fiber library). /// /// Before switching to a different stack, you must call /// __sanitizer_start_switch_fiber() with a pointer to the bottom of the /// destination stack and with its size. When code starts running on the new /// stack, it must call __sanitizer_finish_switch_fiber() to finalize /// the switch. The __sanitizer_start_switch_fiber() function takes a /// void** pointer argument to store the current fake stack if there is /// one (it is necessary when the runtime option /// detect_stack_use_after_return is enabled). /// /// When restoring a stack, this void** pointer must be given to the /// __sanitizer_finish_switch_fiber() function. In most cases, this /// pointer can be stored on the stack immediately before switching. When /// leaving a fiber definitely, NULL must be passed as the first argument to /// the __sanitizer_start_switch_fiber() function so that the fake stack /// is destroyed. If your program does not need stack use-after-return /// detection, you can always pass NULL to these two functions. /// /// \note The fake stack mechanism is disabled during fiber switch, so if a /// signal callback runs during the switch, it will not benefit from stack /// use-after-return detection. /// /// \param[out] fake_stack_save Fake stack save location. /// \param bottom Bottom address of stack. /// \param size Size of stack in bytes. void SANITIZER_CDECL __sanitizer_start_switch_fiber(void **fake_stack_save, const void *bottom, size_t size); /// Notify ASan that a fiber switch has completed (required only if /// implementing your own fiber library). /// /// When code starts running on the new stack, it must call /// __sanitizer_finish_switch_fiber() to finalize /// the switch. For usage details, see the description of /// __sanitizer_start_switch_fiber(). /// /// \param fake_stack_save Fake stack save location. /// \param[out] bottom_old Bottom address of old stack. /// \param[out] size_old Size of old stack in bytes. void SANITIZER_CDECL __sanitizer_finish_switch_fiber(void *fake_stack_save, const void **bottom_old, size_t *size_old); // Get full module name and calculate pc offset within it. // Returns 1 if pc belongs to some module, 0 if module was not found. int SANITIZER_CDECL __sanitizer_get_module_and_offset_for_pc( void *pc, char *module_path, size_t module_path_len, void **pc_offset); #ifdef __cplusplus } // extern "C" #endif #endif // SANITIZER_COMMON_INTERFACE_DEFS_H Check failed: handle != nullptrvnameIncorrect hex digit after \u escape in string.The surrogate pair in string is invalid.(unknown):[:digit:]Unexpected special state in RunStateOnByteStateSaver failed to restore state.invalid named capture groupCase not handled in ComputeSimple: Bad final char: \BCsLoMeroitic_HieroglyphsOld_PersianSaurashtras->header.arena == arenacharintxellipsisDfchar32_tdldv<<wrong number of entriesexternal/abseil-cpp/absl/strings/cord.ccINFexternal/abseil-cpp/absl/synchronization/mutex.ccwaiters disappeared during Enqueue()!old_h == nullptr || h->maybe_unlockingMutex queue changed beneath usLock returning Mutex Enqueue failureexternal/boringssl/src/crypto/fipsmodule/ec/p256.cBoringSSLmemory buffer routinesECDSA_LIBfopen('external/boringssl/src/crypto/rsa_extra/rsa_crypt.ckythe.proto.CompilationUnit.Env.value-x-Xclang|-targetexternal/kythe/kythe/cxx/extractor/cxx_extractor.ccCouldn't configure vnames from %s KYTHE_ANALYSIS_TARGET1Check failed: !current_files_.empty()Search path was /*===---- __wmmintrin_pclmul.h - PCMUL intrinsics ---------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __WMMINTRIN_H #error "Never use <__wmmintrin_pclmul.h> directly; include instead." #endif #ifndef __WMMINTRIN_PCLMUL_H #define __WMMINTRIN_PCLMUL_H /// Multiplies two 64-bit integer values, which are selected from source /// operands using the immediate-value operand. The multiplication is a /// carry-less multiplication, and the 128-bit integer product is stored in /// the destination. /// /// \headerfile /// /// \code /// __m128i _mm_clmulepi64_si128(__m128i X, __m128i Y, const int I); /// \endcode /// /// This intrinsic corresponds to the VPCLMULQDQ instruction. /// /// \param X /// A 128-bit vector of [2 x i64] containing one of the source operands. /// \param Y /// A 128-bit vector of [2 x i64] containing one of the source operands. /// \param I /// An immediate value specifying which 64-bit values to select from the /// operands. Bit 0 is used to select a value from operand \a X, and bit /// 4 is used to select a value from operand \a Y: \n /// Bit[0]=0 indicates that bits[63:0] of operand \a X are used. \n /// Bit[0]=1 indicates that bits[127:64] of operand \a X are used. \n /// Bit[4]=0 indicates that bits[63:0] of operand \a Y are used. \n /// Bit[4]=1 indicates that bits[127:64] of operand \a Y are used. /// \returns The 128-bit integer vector containing the result of the carry-less /// multiplication of the selected 64-bit values. #define _mm_clmulepi64_si128(X, Y, I) \ ((__m128i)__builtin_ia32_pclmulqdq128((__v2di)(__m128i)(X), \ (__v2di)(__m128i)(Y), (char)(I))) #endif /* __WMMINTRIN_PCLMUL_H */ /*===---- arm_bf16.h - ARM BF16 intrinsics -----------------------------------=== * * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __ARM_BF16_H #define __ARM_BF16_H typedef __bf16 bfloat16_t; #define __ai static __inline__ __attribute__((__always_inline__, __nodebug__)) #undef __ai #endif arm_fp16.havx512bitalgintrin.havx512vbmi2intrin.h/*===---------- avx512vlfp16intrin.h - AVX512-FP16 intrinsics --------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error \ "Never use directly; include instead." #endif #ifdef __SSE2__ #ifndef __AVX512VLFP16INTRIN_H #define __AVX512VLFP16INTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS256 \ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512fp16,avx512vl,no-evex512"), \ __min_vector_width__(256))) #define __DEFAULT_FN_ATTRS128 \ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512fp16,avx512vl,no-evex512"), \ __min_vector_width__(128))) static __inline__ _Float16 __DEFAULT_FN_ATTRS128 _mm_cvtsh_h(__m128h __a) { return __a[0]; } static __inline__ _Float16 __DEFAULT_FN_ATTRS256 _mm256_cvtsh_h(__m256h __a) { return __a[0]; } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_set_sh(_Float16 __h) { return __extension__(__m128h){__h, 0, 0, 0, 0, 0, 0, 0}; } static __inline __m128h __DEFAULT_FN_ATTRS128 _mm_set1_ph(_Float16 __h) { return (__m128h)(__v8hf){__h, __h, __h, __h, __h, __h, __h, __h}; } static __inline __m256h __DEFAULT_FN_ATTRS256 _mm256_set1_ph(_Float16 __h) { return (__m256h)(__v16hf){__h, __h, __h, __h, __h, __h, __h, __h, __h, __h, __h, __h, __h, __h, __h, __h}; } static __inline __m128h __DEFAULT_FN_ATTRS128 _mm_set_ph(_Float16 __h1, _Float16 __h2, _Float16 __h3, _Float16 __h4, _Float16 __h5, _Float16 __h6, _Float16 __h7, _Float16 __h8) { return (__m128h)(__v8hf){__h8, __h7, __h6, __h5, __h4, __h3, __h2, __h1}; } static __inline __m256h __DEFAULT_FN_ATTRS256 _mm256_set1_pch(_Float16 _Complex h) { return (__m256h)_mm256_set1_ps(__builtin_bit_cast(float, h)); } static __inline __m128h __DEFAULT_FN_ATTRS128 _mm_set1_pch(_Float16 _Complex h) { return (__m128h)_mm_set1_ps(__builtin_bit_cast(float, h)); } static __inline __m256h __DEFAULT_FN_ATTRS256 _mm256_set_ph(_Float16 __h1, _Float16 __h2, _Float16 __h3, _Float16 __h4, _Float16 __h5, _Float16 __h6, _Float16 __h7, _Float16 __h8, _Float16 __h9, _Float16 __h10, _Float16 __h11, _Float16 __h12, _Float16 __h13, _Float16 __h14, _Float16 __h15, _Float16 __h16) { return (__m256h)(__v16hf){__h16, __h15, __h14, __h13, __h12, __h11, __h10, __h9, __h8, __h7, __h6, __h5, __h4, __h3, __h2, __h1}; } #define _mm_setr_ph(h1, h2, h3, h4, h5, h6, h7, h8) \ _mm_set_ph((h8), (h7), (h6), (h5), (h4), (h3), (h2), (h1)) #define _mm256_setr_ph(h1, h2, h3, h4, h5, h6, h7, h8, h9, h10, h11, h12, h13, \ h14, h15, h16) \ _mm256_set_ph((h16), (h15), (h14), (h13), (h12), (h11), (h10), (h9), (h8), \ (h7), (h6), (h5), (h4), (h3), (h2), (h1)) static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_add_ph(__m256h __A, __m256h __B) { return (__m256h)((__v16hf)__A + (__v16hf)__B); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_add_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) { return (__m256h)__builtin_ia32_selectph_256( __U, (__v16hf)_mm256_add_ph(__A, __B), (__v16hf)__W); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_add_ph(__mmask16 __U, __m256h __A, __m256h __B) { return (__m256h)__builtin_ia32_selectph_256( __U, (__v16hf)_mm256_add_ph(__A, __B), (__v16hf)_mm256_setzero_ph()); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_add_ph(__m128h __A, __m128h __B) { return (__m128h)((__v8hf)__A + (__v8hf)__B); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_add_ph(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_add_ph(__A, __B), (__v8hf)__W); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_add_ph(__mmask8 __U, __m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_add_ph(__A, __B), (__v8hf)_mm_setzero_ph()); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_sub_ph(__m256h __A, __m256h __B) { return (__m256h)((__v16hf)__A - (__v16hf)__B); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_sub_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) { return (__m256h)__builtin_ia32_selectph_256( __U, (__v16hf)_mm256_sub_ph(__A, __B), (__v16hf)__W); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_sub_ph(__mmask16 __U, __m256h __A, __m256h __B) { return (__m256h)__builtin_ia32_selectph_256( __U, (__v16hf)_mm256_sub_ph(__A, __B), (__v16hf)_mm256_setzero_ph()); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_sub_ph(__m128h __A, __m128h __B) { return (__m128h)((__v8hf)__A - (__v8hf)__B); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_sub_ph(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_sub_ph(__A, __B), (__v8hf)__W); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_sub_ph(__mmask8 __U, __m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_sub_ph(__A, __B), (__v8hf)_mm_setzero_ph()); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mul_ph(__m256h __A, __m256h __B) { return (__m256h)((__v16hf)__A * (__v16hf)__B); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_mul_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) { return (__m256h)__builtin_ia32_selectph_256( __U, (__v16hf)_mm256_mul_ph(__A, __B), (__v16hf)__W); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_mul_ph(__mmask16 __U, __m256h __A, __m256h __B) { return (__m256h)__builtin_ia32_selectph_256( __U, (__v16hf)_mm256_mul_ph(__A, __B), (__v16hf)_mm256_setzero_ph()); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mul_ph(__m128h __A, __m128h __B) { return (__m128h)((__v8hf)__A * (__v8hf)__B); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_mul_ph(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_mul_ph(__A, __B), (__v8hf)__W); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_mul_ph(__mmask8 __U, __m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_mul_ph(__A, __B), (__v8hf)_mm_setzero_ph()); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_div_ph(__m256h __A, __m256h __B) { return (__m256h)((__v16hf)__A / (__v16hf)__B); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_div_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) { return (__m256h)__builtin_ia32_selectph_256( __U, (__v16hf)_mm256_div_ph(__A, __B), (__v16hf)__W); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_div_ph(__mmask16 __U, __m256h __A, __m256h __B) { return (__m256h)__builtin_ia32_selectph_256( __U, (__v16hf)_mm256_div_ph(__A, __B), (__v16hf)_mm256_setzero_ph()); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_div_ph(__m128h __A, __m128h __B) { return (__m128h)((__v8hf)__A / (__v8hf)__B); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_div_ph(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_div_ph(__A, __B), (__v8hf)__W); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_div_ph(__mmask8 __U, __m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_div_ph(__A, __B), (__v8hf)_mm_setzero_ph()); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_min_ph(__m256h __A, __m256h __B) { return (__m256h)__builtin_ia32_minph256((__v16hf)__A, (__v16hf)__B); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_min_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, (__v16hf)__builtin_ia32_minph256((__v16hf)__A, (__v16hf)__B), (__v16hf)__W); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_min_ph(__mmask16 __U, __m256h __A, __m256h __B) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, (__v16hf)__builtin_ia32_minph256((__v16hf)__A, (__v16hf)__B), (__v16hf)_mm256_setzero_ph()); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_min_ph(__m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_minph128((__v8hf)__A, (__v8hf)__B); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_min_ph(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, (__v8hf)__builtin_ia32_minph128((__v8hf)__A, (__v8hf)__B), (__v8hf)__W); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_min_ph(__mmask8 __U, __m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, (__v8hf)__builtin_ia32_minph128((__v8hf)__A, (__v8hf)__B), (__v8hf)_mm_setzero_ph()); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_max_ph(__m256h __A, __m256h __B) { return (__m256h)__builtin_ia32_maxph256((__v16hf)__A, (__v16hf)__B); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_max_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, (__v16hf)__builtin_ia32_maxph256((__v16hf)__A, (__v16hf)__B), (__v16hf)__W); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_max_ph(__mmask16 __U, __m256h __A, __m256h __B) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, (__v16hf)__builtin_ia32_maxph256((__v16hf)__A, (__v16hf)__B), (__v16hf)_mm256_setzero_ph()); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_max_ph(__m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_maxph128((__v8hf)__A, (__v8hf)__B); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_max_ph(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, (__v8hf)__builtin_ia32_maxph128((__v8hf)__A, (__v8hf)__B), (__v8hf)__W); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_max_ph(__mmask8 __U, __m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, (__v8hf)__builtin_ia32_maxph128((__v8hf)__A, (__v8hf)__B), (__v8hf)_mm_setzero_ph()); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_abs_ph(__m256h __A) { return (__m256h)_mm256_and_epi32(_mm256_set1_epi32(0x7FFF7FFF), (__m256i)__A); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_abs_ph(__m128h __A) { return (__m128h)_mm_and_epi32(_mm_set1_epi32(0x7FFF7FFF), (__m128i)__A); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_conj_pch(__m256h __A) { return (__m256h)_mm256_xor_ps((__m256)__A, _mm256_set1_ps(-0.0f)); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_conj_pch(__m256h __W, __mmask8 __U, __m256h __A) { return (__m256h)__builtin_ia32_selectps_256( (__mmask8)__U, (__v8sf)_mm256_conj_pch(__A), (__v8sf)__W); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_conj_pch(__mmask8 __U, __m256h __A) { return (__m256h)__builtin_ia32_selectps_256( (__mmask8)__U, (__v8sf)_mm256_conj_pch(__A), (__v8sf)_mm256_setzero_ps()); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_conj_pch(__m128h __A) { return (__m128h)_mm_xor_ps((__m128)__A, _mm_set1_ps(-0.0f)); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_conj_pch(__m128h __W, __mmask8 __U, __m128h __A) { return (__m128h)__builtin_ia32_selectps_128( (__mmask8)__U, (__v4sf)_mm_conj_pch(__A), (__v4sf)__W); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_conj_pch(__mmask8 __U, __m128h __A) { return (__m128h)__builtin_ia32_selectps_128( (__mmask8)__U, (__v4sf)_mm_conj_pch(__A), (__v4sf)_mm_setzero_ps()); } #define _mm256_cmp_ph_mask(a, b, p) \ ((__mmask16)__builtin_ia32_cmpph256_mask( \ (__v16hf)(__m256h)(a), (__v16hf)(__m256h)(b), (int)(p), (__mmask16)-1)) #define _mm256_mask_cmp_ph_mask(m, a, b, p) \ ((__mmask16)__builtin_ia32_cmpph256_mask( \ (__v16hf)(__m256h)(a), (__v16hf)(__m256h)(b), (int)(p), (__mmask16)(m))) #define _mm_cmp_ph_mask(a, b, p) \ ((__mmask8)__builtin_ia32_cmpph128_mask( \ (__v8hf)(__m128h)(a), (__v8hf)(__m128h)(b), (int)(p), (__mmask8)-1)) #define _mm_mask_cmp_ph_mask(m, a, b, p) \ ((__mmask8)__builtin_ia32_cmpph128_mask( \ (__v8hf)(__m128h)(a), (__v8hf)(__m128h)(b), (int)(p), (__mmask8)(m))) static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_rcp_ph(__m256h __A) { return (__m256h)__builtin_ia32_rcpph256_mask( (__v16hf)__A, (__v16hf)_mm256_undefined_ph(), (__mmask16)-1); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_rcp_ph(__m256h __W, __mmask16 __U, __m256h __A) { return (__m256h)__builtin_ia32_rcpph256_mask((__v16hf)__A, (__v16hf)__W, (__mmask16)__U); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_rcp_ph(__mmask16 __U, __m256h __A) { return (__m256h)__builtin_ia32_rcpph256_mask( (__v16hf)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_rcp_ph(__m128h __A) { return (__m128h)__builtin_ia32_rcpph128_mask( (__v8hf)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_rcp_ph(__m128h __W, __mmask8 __U, __m128h __A) { return (__m128h)__builtin_ia32_rcpph128_mask((__v8hf)__A, (__v8hf)__W, (__mmask8)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_rcp_ph(__mmask8 __U, __m128h __A) { return (__m128h)__builtin_ia32_rcpph128_mask( (__v8hf)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_rsqrt_ph(__m256h __A) { return (__m256h)__builtin_ia32_rsqrtph256_mask( (__v16hf)__A, (__v16hf)_mm256_undefined_ph(), (__mmask16)-1); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_rsqrt_ph(__m256h __W, __mmask16 __U, __m256h __A) { return (__m256h)__builtin_ia32_rsqrtph256_mask((__v16hf)__A, (__v16hf)__W, (__mmask16)__U); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_rsqrt_ph(__mmask16 __U, __m256h __A) { return (__m256h)__builtin_ia32_rsqrtph256_mask( (__v16hf)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_rsqrt_ph(__m128h __A) { return (__m128h)__builtin_ia32_rsqrtph128_mask( (__v8hf)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt_ph(__m128h __W, __mmask8 __U, __m128h __A) { return (__m128h)__builtin_ia32_rsqrtph128_mask((__v8hf)__A, (__v8hf)__W, (__mmask8)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_rsqrt_ph(__mmask8 __U, __m128h __A) { return (__m128h)__builtin_ia32_rsqrtph128_mask( (__v8hf)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_getexp_ph(__m128h __A) { return (__m128h)__builtin_ia32_getexpph128_mask( (__v8hf)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)-1); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_getexp_ph(__m128h __W, __mmask8 __U, __m128h __A) { return (__m128h)__builtin_ia32_getexpph128_mask((__v8hf)__A, (__v8hf)__W, (__mmask8)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_ph(__mmask8 __U, __m128h __A) { return (__m128h)__builtin_ia32_getexpph128_mask( (__v8hf)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_getexp_ph(__m256h __A) { return (__m256h)__builtin_ia32_getexpph256_mask( (__v16hf)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)-1); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_getexp_ph(__m256h __W, __mmask16 __U, __m256h __A) { return (__m256h)__builtin_ia32_getexpph256_mask((__v16hf)__A, (__v16hf)__W, (__mmask16)__U); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_getexp_ph(__mmask16 __U, __m256h __A) { return (__m256h)__builtin_ia32_getexpph256_mask( (__v16hf)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U); } #define _mm_getmant_ph(A, B, C) \ ((__m128h)__builtin_ia32_getmantph128_mask( \ (__v8hf)(__m128h)(A), (int)(((C) << 2) | (B)), (__v8hf)_mm_setzero_ph(), \ (__mmask8)-1)) #define _mm_mask_getmant_ph(W, U, A, B, C) \ ((__m128h)__builtin_ia32_getmantph128_mask( \ (__v8hf)(__m128h)(A), (int)(((C) << 2) | (B)), (__v8hf)(__m128h)(W), \ (__mmask8)(U))) #define _mm_maskz_getmant_ph(U, A, B, C) \ ((__m128h)__builtin_ia32_getmantph128_mask( \ (__v8hf)(__m128h)(A), (int)(((C) << 2) | (B)), (__v8hf)_mm_setzero_ph(), \ (__mmask8)(U))) #define _mm256_getmant_ph(A, B, C) \ ((__m256h)__builtin_ia32_getmantph256_mask( \ (__v16hf)(__m256h)(A), (int)(((C) << 2) | (B)), \ (__v16hf)_mm256_setzero_ph(), (__mmask16)-1)) #define _mm256_mask_getmant_ph(W, U, A, B, C) \ ((__m256h)__builtin_ia32_getmantph256_mask( \ (__v16hf)(__m256h)(A), (int)(((C) << 2) | (B)), (__v16hf)(__m256h)(W), \ (__mmask16)(U))) #define _mm256_maskz_getmant_ph(U, A, B, C) \ ((__m256h)__builtin_ia32_getmantph256_mask( \ (__v16hf)(__m256h)(A), (int)(((C) << 2) | (B)), \ (__v16hf)_mm256_setzero_ph(), (__mmask16)(U))) static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_scalef_ph(__m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_scalefph128_mask( (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)-1); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_scalef_ph(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_scalefph128_mask((__v8hf)__A, (__v8hf)__B, (__v8hf)__W, (__mmask8)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_ph(__mmask8 __U, __m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_scalefph128_mask( (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_scalef_ph(__m256h __A, __m256h __B) { return (__m256h)__builtin_ia32_scalefph256_mask( (__v16hf)__A, (__v16hf)__B, (__v16hf)_mm256_setzero_ph(), (__mmask16)-1); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_scalef_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) { return (__m256h)__builtin_ia32_scalefph256_mask((__v16hf)__A, (__v16hf)__B, (__v16hf)__W, (__mmask16)__U); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_scalef_ph(__mmask16 __U, __m256h __A, __m256h __B) { return (__m256h)__builtin_ia32_scalefph256_mask( (__v16hf)__A, (__v16hf)__B, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U); } #define _mm_roundscale_ph(A, imm) \ ((__m128h)__builtin_ia32_rndscaleph_128_mask( \ (__v8hf)(__m128h)(A), (int)(imm), (__v8hf)_mm_setzero_ph(), \ (__mmask8)-1)) #define _mm_mask_roundscale_ph(W, U, A, imm) \ ((__m128h)__builtin_ia32_rndscaleph_128_mask( \ (__v8hf)(__m128h)(A), (int)(imm), (__v8hf)(__m128h)(W), (__mmask8)(U))) #define _mm_maskz_roundscale_ph(U, A, imm) \ ((__m128h)__builtin_ia32_rndscaleph_128_mask( \ (__v8hf)(__m128h)(A), (int)(imm), (__v8hf)_mm_setzero_ph(), \ (__mmask8)(U))) #define _mm256_roundscale_ph(A, imm) \ ((__m256h)__builtin_ia32_rndscaleph_256_mask( \ (__v16hf)(__m256h)(A), (int)(imm), (__v16hf)_mm256_setzero_ph(), \ (__mmask16)-1)) #define _mm256_mask_roundscale_ph(W, U, A, imm) \ ((__m256h)__builtin_ia32_rndscaleph_256_mask( \ (__v16hf)(__m256h)(A), (int)(imm), (__v16hf)(__m256h)(W), \ (__mmask16)(U))) #define _mm256_maskz_roundscale_ph(U, A, imm) \ ((__m256h)__builtin_ia32_rndscaleph_256_mask( \ (__v16hf)(__m256h)(A), (int)(imm), (__v16hf)_mm256_setzero_ph(), \ (__mmask16)(U))) #define _mm_reduce_ph(A, imm) \ ((__m128h)__builtin_ia32_reduceph128_mask((__v8hf)(__m128h)(A), (int)(imm), \ (__v8hf)_mm_setzero_ph(), \ (__mmask8)-1)) #define _mm_mask_reduce_ph(W, U, A, imm) \ ((__m128h)__builtin_ia32_reduceph128_mask( \ (__v8hf)(__m128h)(A), (int)(imm), (__v8hf)(__m128h)(W), (__mmask8)(U))) #define _mm_maskz_reduce_ph(U, A, imm) \ ((__m128h)__builtin_ia32_reduceph128_mask((__v8hf)(__m128h)(A), (int)(imm), \ (__v8hf)_mm_setzero_ph(), \ (__mmask8)(U))) #define _mm256_reduce_ph(A, imm) \ ((__m256h)__builtin_ia32_reduceph256_mask((__v16hf)(__m256h)(A), (int)(imm), \ (__v16hf)_mm256_setzero_ph(), \ (__mmask16)-1)) #define _mm256_mask_reduce_ph(W, U, A, imm) \ ((__m256h)__builtin_ia32_reduceph256_mask((__v16hf)(__m256h)(A), (int)(imm), \ (__v16hf)(__m256h)(W), \ (__mmask16)(U))) #define _mm256_maskz_reduce_ph(U, A, imm) \ ((__m256h)__builtin_ia32_reduceph256_mask((__v16hf)(__m256h)(A), (int)(imm), \ (__v16hf)_mm256_setzero_ph(), \ (__mmask16)(U))) static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_sqrt_ph(__m128h __a) { return __builtin_ia32_sqrtph((__v8hf)__a); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_ph(__m128h __W, __mmask8 __U, __m128h __A) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, (__v8hf)_mm_sqrt_ph(__A), (__v8hf)__W); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_ph(__mmask8 __U, __m128h __A) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, (__v8hf)_mm_sqrt_ph(__A), (__v8hf)_mm_setzero_ph()); } static __inline __m256h __DEFAULT_FN_ATTRS256 _mm256_sqrt_ph(__m256h __a) { return (__m256h)__builtin_ia32_sqrtph256((__v16hf)__a); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_sqrt_ph(__m256h __W, __mmask16 __U, __m256h __A) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, (__v16hf)_mm256_sqrt_ph(__A), (__v16hf)__W); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_sqrt_ph(__mmask16 __U, __m256h __A) { return (__m256h)__builtin_ia32_selectph_256((__mmask16)__U, (__v16hf)_mm256_sqrt_ph(__A), (__v16hf)_mm256_setzero_ph()); } #define _mm_mask_fpclass_ph_mask(U, A, imm) \ ((__mmask8)__builtin_ia32_fpclassph128_mask((__v8hf)(__m128h)(A), \ (int)(imm), (__mmask8)(U))) #define _mm_fpclass_ph_mask(A, imm) \ ((__mmask8)__builtin_ia32_fpclassph128_mask((__v8hf)(__m128h)(A), \ (int)(imm), (__mmask8)-1)) #define _mm256_mask_fpclass_ph_mask(U, A, imm) \ ((__mmask16)__builtin_ia32_fpclassph256_mask((__v16hf)(__m256h)(A), \ (int)(imm), (__mmask16)(U))) #define _mm256_fpclass_ph_mask(A, imm) \ ((__mmask16)__builtin_ia32_fpclassph256_mask((__v16hf)(__m256h)(A), \ (int)(imm), (__mmask16)-1)) static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtpd_ph(__m128d __A) { return (__m128h)__builtin_ia32_vcvtpd2ph128_mask( (__v2df)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvtpd_ph(__m128h __W, __mmask8 __U, __m128d __A) { return (__m128h)__builtin_ia32_vcvtpd2ph128_mask((__v2df)__A, (__v8hf)__W, (__mmask8)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_cvtpd_ph(__mmask8 __U, __m128d __A) { return (__m128h)__builtin_ia32_vcvtpd2ph128_mask( (__v2df)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_cvtpd_ph(__m256d __A) { return (__m128h)__builtin_ia32_vcvtpd2ph256_mask( (__v4df)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1); } static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_mask_cvtpd_ph(__m128h __W, __mmask8 __U, __m256d __A) { return (__m128h)__builtin_ia32_vcvtpd2ph256_mask((__v4df)__A, (__v8hf)__W, (__mmask8)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtpd_ph(__mmask8 __U, __m256d __A) { return (__m128h)__builtin_ia32_vcvtpd2ph256_mask( (__v4df)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtph_pd(__m128h __A) { return (__m128d)__builtin_ia32_vcvtph2pd128_mask( (__v8hf)__A, (__v2df)_mm_undefined_pd(), (__mmask8)-1); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtph_pd(__m128d __W, __mmask8 __U, __m128h __A) { return (__m128d)__builtin_ia32_vcvtph2pd128_mask((__v8hf)__A, (__v2df)__W, (__mmask8)__U); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtph_pd(__mmask8 __U, __m128h __A) { return (__m128d)__builtin_ia32_vcvtph2pd128_mask( (__v8hf)__A, (__v2df)_mm_setzero_pd(), (__mmask8)__U); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_cvtph_pd(__m128h __A) { return (__m256d)__builtin_ia32_vcvtph2pd256_mask( (__v8hf)__A, (__v4df)_mm256_undefined_pd(), (__mmask8)-1); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_cvtph_pd(__m256d __W, __mmask8 __U, __m128h __A) { return (__m256d)__builtin_ia32_vcvtph2pd256_mask((__v8hf)__A, (__v4df)__W, (__mmask8)__U); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtph_pd(__mmask8 __U, __m128h __A) { return (__m256d)__builtin_ia32_vcvtph2pd256_mask( (__v8hf)__A, (__v4df)_mm256_setzero_pd(), (__mmask8)__U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epi16(__m128h __A) { return (__m128i)__builtin_ia32_vcvtph2w128_mask( (__v8hf)__A, (__v8hi)_mm_undefined_si128(), (__mmask8)-1); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtph_epi16(__m128i __W, __mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvtph2w128_mask((__v8hf)__A, (__v8hi)__W, (__mmask8)__U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtph_epi16(__mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvtph2w128_mask( (__v8hf)__A, (__v8hi)_mm_setzero_si128(), (__mmask8)__U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtph_epi16(__m256h __A) { return (__m256i)__builtin_ia32_vcvtph2w256_mask( (__v16hf)__A, (__v16hi)_mm256_undefined_si256(), (__mmask16)-1); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtph_epi16(__m256i __W, __mmask16 __U, __m256h __A) { return (__m256i)__builtin_ia32_vcvtph2w256_mask((__v16hf)__A, (__v16hi)__W, (__mmask16)__U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtph_epi16(__mmask16 __U, __m256h __A) { return (__m256i)__builtin_ia32_vcvtph2w256_mask( (__v16hf)__A, (__v16hi)_mm256_setzero_si256(), (__mmask16)__U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epi16(__m128h __A) { return (__m128i)__builtin_ia32_vcvttph2w128_mask( (__v8hf)__A, (__v8hi)_mm_undefined_si128(), (__mmask8)-1); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttph_epi16(__m128i __W, __mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvttph2w128_mask((__v8hf)__A, (__v8hi)__W, (__mmask8)__U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttph_epi16(__mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvttph2w128_mask( (__v8hf)__A, (__v8hi)_mm_setzero_si128(), (__mmask8)__U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvttph_epi16(__m256h __A) { return (__m256i)__builtin_ia32_vcvttph2w256_mask( (__v16hf)__A, (__v16hi)_mm256_undefined_si256(), (__mmask16)-1); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttph_epi16(__m256i __W, __mmask16 __U, __m256h __A) { return (__m256i)__builtin_ia32_vcvttph2w256_mask((__v16hf)__A, (__v16hi)__W, (__mmask16)__U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttph_epi16(__mmask16 __U, __m256h __A) { return (__m256i)__builtin_ia32_vcvttph2w256_mask( (__v16hf)__A, (__v16hi)_mm256_setzero_si256(), (__mmask16)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepi16_ph(__m128i __A) { return (__m128h) __builtin_convertvector((__v8hi)__A, __v8hf); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi16_ph(__m128h __W, __mmask8 __U, __m128i __A) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, (__v8hf)_mm_cvtepi16_ph(__A), (__v8hf)__W); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi16_ph(__mmask8 __U, __m128i __A) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, (__v8hf)_mm_cvtepi16_ph(__A), (__v8hf)_mm_setzero_ph()); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_cvtepi16_ph(__m256i __A) { return (__m256h) __builtin_convertvector((__v16hi)__A, __v16hf); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi16_ph(__m256h __W, __mmask16 __U, __m256i __A) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, (__v16hf)_mm256_cvtepi16_ph(__A), (__v16hf)__W); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi16_ph(__mmask16 __U, __m256i __A) { return (__m256h)__builtin_ia32_selectph_256((__mmask16)__U, (__v16hf)_mm256_cvtepi16_ph(__A), (__v16hf)_mm256_setzero_ph()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epu16(__m128h __A) { return (__m128i)__builtin_ia32_vcvtph2uw128_mask( (__v8hf)__A, (__v8hu)_mm_undefined_si128(), (__mmask8)-1); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtph_epu16(__m128i __W, __mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvtph2uw128_mask((__v8hf)__A, (__v8hu)__W, (__mmask8)__U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtph_epu16(__mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvtph2uw128_mask( (__v8hf)__A, (__v8hu)_mm_setzero_si128(), (__mmask8)__U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtph_epu16(__m256h __A) { return (__m256i)__builtin_ia32_vcvtph2uw256_mask( (__v16hf)__A, (__v16hu)_mm256_undefined_si256(), (__mmask16)-1); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtph_epu16(__m256i __W, __mmask16 __U, __m256h __A) { return (__m256i)__builtin_ia32_vcvtph2uw256_mask((__v16hf)__A, (__v16hu)__W, (__mmask16)__U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtph_epu16(__mmask16 __U, __m256h __A) { return (__m256i)__builtin_ia32_vcvtph2uw256_mask( (__v16hf)__A, (__v16hu)_mm256_setzero_si256(), (__mmask16)__U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epu16(__m128h __A) { return (__m128i)__builtin_ia32_vcvttph2uw128_mask( (__v8hf)__A, (__v8hu)_mm_undefined_si128(), (__mmask8)-1); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttph_epu16(__m128i __W, __mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvttph2uw128_mask((__v8hf)__A, (__v8hu)__W, (__mmask8)__U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttph_epu16(__mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvttph2uw128_mask( (__v8hf)__A, (__v8hu)_mm_setzero_si128(), (__mmask8)__U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvttph_epu16(__m256h __A) { return (__m256i)__builtin_ia32_vcvttph2uw256_mask( (__v16hf)__A, (__v16hu)_mm256_undefined_si256(), (__mmask16)-1); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttph_epu16(__m256i __W, __mmask16 __U, __m256h __A) { return (__m256i)__builtin_ia32_vcvttph2uw256_mask((__v16hf)__A, (__v16hu)__W, (__mmask16)__U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttph_epu16(__mmask16 __U, __m256h __A) { return (__m256i)__builtin_ia32_vcvttph2uw256_mask( (__v16hf)__A, (__v16hu)_mm256_setzero_si256(), (__mmask16)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepu16_ph(__m128i __A) { return (__m128h) __builtin_convertvector((__v8hu)__A, __v8hf); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu16_ph(__m128h __W, __mmask8 __U, __m128i __A) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, (__v8hf)_mm_cvtepu16_ph(__A), (__v8hf)__W); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu16_ph(__mmask8 __U, __m128i __A) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, (__v8hf)_mm_cvtepu16_ph(__A), (__v8hf)_mm_setzero_ph()); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_cvtepu16_ph(__m256i __A) { return (__m256h) __builtin_convertvector((__v16hu)__A, __v16hf); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu16_ph(__m256h __W, __mmask16 __U, __m256i __A) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, (__v16hf)_mm256_cvtepu16_ph(__A), (__v16hf)__W); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu16_ph(__mmask16 __U, __m256i __A) { return (__m256h)__builtin_ia32_selectph_256((__mmask16)__U, (__v16hf)_mm256_cvtepu16_ph(__A), (__v16hf)_mm256_setzero_ph()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epi32(__m128h __A) { return (__m128i)__builtin_ia32_vcvtph2dq128_mask( (__v8hf)__A, (__v4si)_mm_undefined_si128(), (__mmask8)-1); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtph_epi32(__m128i __W, __mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvtph2dq128_mask((__v8hf)__A, (__v4si)__W, (__mmask8)__U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtph_epi32(__mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvtph2dq128_mask( (__v8hf)__A, (__v4si)_mm_setzero_si128(), (__mmask8)__U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtph_epi32(__m128h __A) { return (__m256i)__builtin_ia32_vcvtph2dq256_mask( (__v8hf)__A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtph_epi32(__m256i __W, __mmask8 __U, __m128h __A) { return (__m256i)__builtin_ia32_vcvtph2dq256_mask((__v8hf)__A, (__v8si)__W, (__mmask8)__U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtph_epi32(__mmask8 __U, __m128h __A) { return (__m256i)__builtin_ia32_vcvtph2dq256_mask( (__v8hf)__A, (__v8si)_mm256_setzero_si256(), (__mmask8)__U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epu32(__m128h __A) { return (__m128i)__builtin_ia32_vcvtph2udq128_mask( (__v8hf)__A, (__v4su)_mm_undefined_si128(), (__mmask8)-1); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtph_epu32(__m128i __W, __mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvtph2udq128_mask((__v8hf)__A, (__v4su)__W, (__mmask8)__U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtph_epu32(__mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvtph2udq128_mask( (__v8hf)__A, (__v4su)_mm_setzero_si128(), (__mmask8)__U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtph_epu32(__m128h __A) { return (__m256i)__builtin_ia32_vcvtph2udq256_mask( (__v8hf)__A, (__v8su)_mm256_undefined_si256(), (__mmask8)-1); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtph_epu32(__m256i __W, __mmask8 __U, __m128h __A) { return (__m256i)__builtin_ia32_vcvtph2udq256_mask((__v8hf)__A, (__v8su)__W, (__mmask8)__U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtph_epu32(__mmask8 __U, __m128h __A) { return (__m256i)__builtin_ia32_vcvtph2udq256_mask( (__v8hf)__A, (__v8su)_mm256_setzero_si256(), (__mmask8)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepi32_ph(__m128i __A) { return (__m128h)__builtin_ia32_vcvtdq2ph128_mask( (__v4si)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi32_ph(__m128h __W, __mmask8 __U, __m128i __A) { return (__m128h)__builtin_ia32_vcvtdq2ph128_mask((__v4si)__A, (__v8hf)__W, (__mmask8)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi32_ph(__mmask8 __U, __m128i __A) { return (__m128h)__builtin_ia32_vcvtdq2ph128_mask( (__v4si)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_cvtepi32_ph(__m256i __A) { return (__m128h) __builtin_convertvector((__v8si)__A, __v8hf); } static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_ph(__m128h __W, __mmask8 __U, __m256i __A) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, (__v8hf)_mm256_cvtepi32_ph(__A), (__v8hf)__W); } static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi32_ph(__mmask8 __U, __m256i __A) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, (__v8hf)_mm256_cvtepi32_ph(__A), (__v8hf)_mm_setzero_ph()); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepu32_ph(__m128i __A) { return (__m128h)__builtin_ia32_vcvtudq2ph128_mask( (__v4su)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu32_ph(__m128h __W, __mmask8 __U, __m128i __A) { return (__m128h)__builtin_ia32_vcvtudq2ph128_mask((__v4su)__A, (__v8hf)__W, (__mmask8)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu32_ph(__mmask8 __U, __m128i __A) { return (__m128h)__builtin_ia32_vcvtudq2ph128_mask( (__v4su)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_cvtepu32_ph(__m256i __A) { return (__m128h) __builtin_convertvector((__v8su)__A, __v8hf); } static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu32_ph(__m128h __W, __mmask8 __U, __m256i __A) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, (__v8hf)_mm256_cvtepu32_ph(__A), (__v8hf)__W); } static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu32_ph(__mmask8 __U, __m256i __A) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, (__v8hf)_mm256_cvtepu32_ph(__A), (__v8hf)_mm_setzero_ph()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epi32(__m128h __A) { return (__m128i)__builtin_ia32_vcvttph2dq128_mask( (__v8hf)__A, (__v4si)_mm_undefined_si128(), (__mmask8)-1); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttph_epi32(__m128i __W, __mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvttph2dq128_mask((__v8hf)__A, (__v4si)__W, (__mmask8)__U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttph_epi32(__mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvttph2dq128_mask( (__v8hf)__A, (__v4si)_mm_setzero_si128(), (__mmask8)__U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvttph_epi32(__m128h __A) { return (__m256i)__builtin_ia32_vcvttph2dq256_mask( (__v8hf)__A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttph_epi32(__m256i __W, __mmask8 __U, __m128h __A) { return (__m256i)__builtin_ia32_vcvttph2dq256_mask((__v8hf)__A, (__v8si)__W, (__mmask8)__U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttph_epi32(__mmask8 __U, __m128h __A) { return (__m256i)__builtin_ia32_vcvttph2dq256_mask( (__v8hf)__A, (__v8si)_mm256_setzero_si256(), (__mmask8)__U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epu32(__m128h __A) { return (__m128i)__builtin_ia32_vcvttph2udq128_mask( (__v8hf)__A, (__v4su)_mm_undefined_si128(), (__mmask8)-1); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttph_epu32(__m128i __W, __mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvttph2udq128_mask((__v8hf)__A, (__v4su)__W, (__mmask8)__U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttph_epu32(__mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvttph2udq128_mask( (__v8hf)__A, (__v4su)_mm_setzero_si128(), (__mmask8)__U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvttph_epu32(__m128h __A) { return (__m256i)__builtin_ia32_vcvttph2udq256_mask( (__v8hf)__A, (__v8su)_mm256_undefined_si256(), (__mmask8)-1); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttph_epu32(__m256i __W, __mmask8 __U, __m128h __A) { return (__m256i)__builtin_ia32_vcvttph2udq256_mask((__v8hf)__A, (__v8su)__W, (__mmask8)__U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttph_epu32(__mmask8 __U, __m128h __A) { return (__m256i)__builtin_ia32_vcvttph2udq256_mask( (__v8hf)__A, (__v8su)_mm256_setzero_si256(), (__mmask8)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepi64_ph(__m128i __A) { return (__m128h)__builtin_ia32_vcvtqq2ph128_mask( (__v2di)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_ph(__m128h __W, __mmask8 __U, __m128i __A) { return (__m128h)__builtin_ia32_vcvtqq2ph128_mask((__v2di)__A, (__v8hf)__W, (__mmask8)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi64_ph(__mmask8 __U, __m128i __A) { return (__m128h)__builtin_ia32_vcvtqq2ph128_mask( (__v2di)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_cvtepi64_ph(__m256i __A) { return (__m128h)__builtin_ia32_vcvtqq2ph256_mask( (__v4di)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1); } static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_ph(__m128h __W, __mmask8 __U, __m256i __A) { return (__m128h)__builtin_ia32_vcvtqq2ph256_mask((__v4di)__A, (__v8hf)__W, (__mmask8)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi64_ph(__mmask8 __U, __m256i __A) { return (__m128h)__builtin_ia32_vcvtqq2ph256_mask( (__v4di)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epi64(__m128h __A) { return (__m128i)__builtin_ia32_vcvtph2qq128_mask( (__v8hf)__A, (__v2di)_mm_undefined_si128(), (__mmask8)-1); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtph_epi64(__m128i __W, __mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvtph2qq128_mask((__v8hf)__A, (__v2di)__W, (__mmask8)__U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtph_epi64(__mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvtph2qq128_mask( (__v8hf)__A, (__v2di)_mm_setzero_si128(), (__mmask8)__U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtph_epi64(__m128h __A) { return (__m256i)__builtin_ia32_vcvtph2qq256_mask( (__v8hf)__A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtph_epi64(__m256i __W, __mmask8 __U, __m128h __A) { return (__m256i)__builtin_ia32_vcvtph2qq256_mask((__v8hf)__A, (__v4di)__W, (__mmask8)__U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtph_epi64(__mmask8 __U, __m128h __A) { return (__m256i)__builtin_ia32_vcvtph2qq256_mask( (__v8hf)__A, (__v4di)_mm256_setzero_si256(), (__mmask8)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepu64_ph(__m128i __A) { return (__m128h)__builtin_ia32_vcvtuqq2ph128_mask( (__v2du)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu64_ph(__m128h __W, __mmask8 __U, __m128i __A) { return (__m128h)__builtin_ia32_vcvtuqq2ph128_mask((__v2du)__A, (__v8hf)__W, (__mmask8)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu64_ph(__mmask8 __U, __m128i __A) { return (__m128h)__builtin_ia32_vcvtuqq2ph128_mask( (__v2du)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_cvtepu64_ph(__m256i __A) { return (__m128h)__builtin_ia32_vcvtuqq2ph256_mask( (__v4du)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1); } static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu64_ph(__m128h __W, __mmask8 __U, __m256i __A) { return (__m128h)__builtin_ia32_vcvtuqq2ph256_mask((__v4du)__A, (__v8hf)__W, (__mmask8)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu64_ph(__mmask8 __U, __m256i __A) { return (__m128h)__builtin_ia32_vcvtuqq2ph256_mask( (__v4du)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epu64(__m128h __A) { return (__m128i)__builtin_ia32_vcvtph2uqq128_mask( (__v8hf)__A, (__v2du)_mm_undefined_si128(), (__mmask8)-1); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtph_epu64(__m128i __W, __mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvtph2uqq128_mask((__v8hf)__A, (__v2du)__W, (__mmask8)__U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtph_epu64(__mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvtph2uqq128_mask( (__v8hf)__A, (__v2du)_mm_setzero_si128(), (__mmask8)__U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtph_epu64(__m128h __A) { return (__m256i)__builtin_ia32_vcvtph2uqq256_mask( (__v8hf)__A, (__v4du)_mm256_undefined_si256(), (__mmask8)-1); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtph_epu64(__m256i __W, __mmask8 __U, __m128h __A) { return (__m256i)__builtin_ia32_vcvtph2uqq256_mask((__v8hf)__A, (__v4du)__W, (__mmask8)__U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtph_epu64(__mmask8 __U, __m128h __A) { return (__m256i)__builtin_ia32_vcvtph2uqq256_mask( (__v8hf)__A, (__v4du)_mm256_setzero_si256(), (__mmask8)__U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epi64(__m128h __A) { return (__m128i)__builtin_ia32_vcvttph2qq128_mask( (__v8hf)__A, (__v2di)_mm_undefined_si128(), (__mmask8)-1); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttph_epi64(__m128i __W, __mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvttph2qq128_mask((__v8hf)__A, (__v2di)__W, (__mmask8)__U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttph_epi64(__mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvttph2qq128_mask( (__v8hf)__A, (__v2di)_mm_setzero_si128(), (__mmask8)__U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvttph_epi64(__m128h __A) { return (__m256i)__builtin_ia32_vcvttph2qq256_mask( (__v8hf)__A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttph_epi64(__m256i __W, __mmask8 __U, __m128h __A) { return (__m256i)__builtin_ia32_vcvttph2qq256_mask((__v8hf)__A, (__v4di)__W, (__mmask8)__U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttph_epi64(__mmask8 __U, __m128h __A) { return (__m256i)__builtin_ia32_vcvttph2qq256_mask( (__v8hf)__A, (__v4di)_mm256_setzero_si256(), (__mmask8)__U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epu64(__m128h __A) { return (__m128i)__builtin_ia32_vcvttph2uqq128_mask( (__v8hf)__A, (__v2du)_mm_undefined_si128(), (__mmask8)-1); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttph_epu64(__m128i __W, __mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvttph2uqq128_mask((__v8hf)__A, (__v2du)__W, (__mmask8)__U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttph_epu64(__mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvttph2uqq128_mask( (__v8hf)__A, (__v2du)_mm_setzero_si128(), (__mmask8)__U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvttph_epu64(__m128h __A) { return (__m256i)__builtin_ia32_vcvttph2uqq256_mask( (__v8hf)__A, (__v4du)_mm256_undefined_si256(), (__mmask8)-1); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttph_epu64(__m256i __W, __mmask8 __U, __m128h __A) { return (__m256i)__builtin_ia32_vcvttph2uqq256_mask((__v8hf)__A, (__v4du)__W, (__mmask8)__U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttph_epu64(__mmask8 __U, __m128h __A) { return (__m256i)__builtin_ia32_vcvttph2uqq256_mask( (__v8hf)__A, (__v4du)_mm256_setzero_si256(), (__mmask8)__U); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtxph_ps(__m128h __A) { return (__m128)__builtin_ia32_vcvtph2psx128_mask( (__v8hf)__A, (__v4sf)_mm_undefined_ps(), (__mmask8)-1); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtxph_ps(__m128 __W, __mmask8 __U, __m128h __A) { return (__m128)__builtin_ia32_vcvtph2psx128_mask((__v8hf)__A, (__v4sf)__W, (__mmask8)__U); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtxph_ps(__mmask8 __U, __m128h __A) { return (__m128)__builtin_ia32_vcvtph2psx128_mask( (__v8hf)__A, (__v4sf)_mm_setzero_ps(), (__mmask8)__U); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_cvtxph_ps(__m128h __A) { return (__m256)__builtin_ia32_vcvtph2psx256_mask( (__v8hf)__A, (__v8sf)_mm256_undefined_ps(), (__mmask8)-1); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_cvtxph_ps(__m256 __W, __mmask8 __U, __m128h __A) { return (__m256)__builtin_ia32_vcvtph2psx256_mask((__v8hf)__A, (__v8sf)__W, (__mmask8)__U); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtxph_ps(__mmask8 __U, __m128h __A) { return (__m256)__builtin_ia32_vcvtph2psx256_mask( (__v8hf)__A, (__v8sf)_mm256_setzero_ps(), (__mmask8)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtxps_ph(__m128 __A) { return (__m128h)__builtin_ia32_vcvtps2phx128_mask( (__v4sf)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvtxps_ph(__m128h __W, __mmask8 __U, __m128 __A) { return (__m128h)__builtin_ia32_vcvtps2phx128_mask((__v4sf)__A, (__v8hf)__W, (__mmask8)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_cvtxps_ph(__mmask8 __U, __m128 __A) { return (__m128h)__builtin_ia32_vcvtps2phx128_mask( (__v4sf)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_cvtxps_ph(__m256 __A) { return (__m128h)__builtin_ia32_vcvtps2phx256_mask( (__v8sf)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1); } static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_mask_cvtxps_ph(__m128h __W, __mmask8 __U, __m256 __A) { return (__m128h)__builtin_ia32_vcvtps2phx256_mask((__v8sf)__A, (__v8hf)__W, (__mmask8)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtxps_ph(__mmask8 __U, __m256 __A) { return (__m128h)__builtin_ia32_vcvtps2phx256_mask( (__v8sf)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmadd_ph(__m128h __A, __m128h __B, __m128h __C) { return (__m128h)__builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_ph(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, __builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C), (__v8hf)__A); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, __builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C), (__v8hf)__C); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, __builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C), (__v8hf)_mm_setzero_ph()); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmsub_ph(__m128h __A, __m128h __B, __m128h __C) { return (__m128h)__builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B, -(__v8hf)__C); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_ph(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, _mm_fmsub_ph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C), (__v8hf)__A); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, _mm_fmsub_ph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C), (__v8hf)_mm_setzero_ph()); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, __builtin_ia32_vfmaddph(-(__v8hf)__A, (__v8hf)__B, (__v8hf)__C), (__v8hf)__C); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, __builtin_ia32_vfmaddph(-(__v8hf)__A, (__v8hf)__B, (__v8hf)__C), (__v8hf)_mm_setzero_ph()); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, __builtin_ia32_vfmaddph(-(__v8hf)__A, (__v8hf)__B, -(__v8hf)__C), (__v8hf)_mm_setzero_ph()); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fmadd_ph(__m256h __A, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_fmadd_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, __builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C), (__v16hf)__A); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask3_fmadd_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, __builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C), (__v16hf)__C); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_fmadd_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, __builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C), (__v16hf)_mm256_setzero_ph()); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fmsub_ph(__m256h __A, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_fmsub_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, __builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C), (__v16hf)__A); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_fmsub_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, __builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C), (__v16hf)_mm256_setzero_ph()); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask3_fnmadd_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, __builtin_ia32_vfmaddph256(-(__v16hf)__A, (__v16hf)__B, (__v16hf)__C), (__v16hf)__C); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmadd_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, __builtin_ia32_vfmaddph256(-(__v16hf)__A, (__v16hf)__B, (__v16hf)__C), (__v16hf)_mm256_setzero_ph()); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmsub_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, __builtin_ia32_vfmaddph256(-(__v16hf)__A, (__v16hf)__B, -(__v16hf)__C), (__v16hf)_mm256_setzero_ph()); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmaddsub_ph(__m128h __A, __m128h __B, __m128h __C) { return (__m128h)__builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmaddsub_ph(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, __builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C), (__v8hf)__A); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask3_fmaddsub_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, __builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C), (__v8hf)__C); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_fmaddsub_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, __builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C), (__v8hf)_mm_setzero_ph()); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmsubadd_ph(__m128h __A, __m128h __B, __m128h __C) { return (__m128h)__builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, -(__v8hf)__C); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmsubadd_ph(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, __builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, -(__v8hf)__C), (__v8hf)__A); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_fmsubadd_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, __builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, -(__v8hf)__C), (__v8hf)_mm_setzero_ph()); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fmaddsub_ph(__m256h __A, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_fmaddsub_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, __builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C), (__v16hf)__A); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask3_fmaddsub_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, __builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C), (__v16hf)__C); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_fmaddsub_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, __builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C), (__v16hf)_mm256_setzero_ph()); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fmsubadd_ph(__m256h __A, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_fmsubadd_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, __builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C), (__v16hf)__A); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_fmsubadd_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, __builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C), (__v16hf)_mm256_setzero_ph()); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, __builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B, -(__v8hf)__C), (__v8hf)__C); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask3_fmsub_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, __builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C), (__v16hf)__C); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask3_fmsubadd_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, __builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, -(__v8hf)__C), (__v8hf)__C); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask3_fmsubadd_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, __builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C), (__v16hf)__C); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fnmadd_ph(__m128h __A, __m128h __B, __m128h __C) { return (__m128h)__builtin_ia32_vfmaddph((__v8hf)__A, -(__v8hf)__B, (__v8hf)__C); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_ph(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, __builtin_ia32_vfmaddph((__v8hf)__A, -(__v8hf)__B, (__v8hf)__C), (__v8hf)__A); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fnmadd_ph(__m256h __A, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_vfmaddph256((__v16hf)__A, -(__v16hf)__B, (__v16hf)__C); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_fnmadd_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, __builtin_ia32_vfmaddph256((__v16hf)__A, -(__v16hf)__B, (__v16hf)__C), (__v16hf)__A); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fnmsub_ph(__m128h __A, __m128h __B, __m128h __C) { return (__m128h)__builtin_ia32_vfmaddph((__v8hf)__A, -(__v8hf)__B, -(__v8hf)__C); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_ph(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, __builtin_ia32_vfmaddph((__v8hf)__A, -(__v8hf)__B, -(__v8hf)__C), (__v8hf)__A); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, __builtin_ia32_vfmaddph((__v8hf)__A, -(__v8hf)__B, -(__v8hf)__C), (__v8hf)__C); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fnmsub_ph(__m256h __A, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_vfmaddph256((__v16hf)__A, -(__v16hf)__B, -(__v16hf)__C); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_fnmsub_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, __builtin_ia32_vfmaddph256((__v16hf)__A, -(__v16hf)__B, -(__v16hf)__C), (__v16hf)__A); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask3_fnmsub_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, __builtin_ia32_vfmaddph256((__v16hf)__A, -(__v16hf)__B, -(__v16hf)__C), (__v16hf)__C); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fcmul_pch(__m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_vfcmulcph128_mask( (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_undefined_ph(), (__mmask8)-1); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fcmul_pch(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_vfcmulcph128_mask((__v4sf)__A, (__v4sf)__B, (__v4sf)__W, (__mmask8)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_fcmul_pch(__mmask8 __U, __m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_vfcmulcph128_mask( (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_setzero_ph(), (__mmask8)__U); } static __inline__ __m256h __DEFAULT_FN_ATTRS128 _mm256_fcmul_pch(__m256h __A, __m256h __B) { return (__m256h)__builtin_ia32_vfcmulcph256_mask( (__v8sf)__A, (__v8sf)__B, (__v8sf)_mm256_undefined_ph(), (__mmask8)-1); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_fcmul_pch(__m256h __W, __mmask8 __U, __m256h __A, __m256h __B) { return (__m256h)__builtin_ia32_vfcmulcph256_mask((__v8sf)__A, (__v8sf)__B, (__v8sf)__W, (__mmask8)__U); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_fcmul_pch(__mmask8 __U, __m256h __A, __m256h __B) { return (__m256h)__builtin_ia32_vfcmulcph256_mask( (__v8sf)__A, (__v8sf)__B, (__v8sf)_mm256_setzero_ph(), (__mmask8)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fcmadd_pch(__m128h __A, __m128h __B, __m128h __C) { return (__m128h)__builtin_ia32_vfcmaddcph128_mask((__v4sf)__A, (__v4sf)__B, (__v4sf)__C, (__mmask8)-1); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fcmadd_pch(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) { return (__m128h)__builtin_ia32_selectps_128( __U, __builtin_ia32_vfcmaddcph128_mask((__v4sf)__A, (__v4sf)(__m128h)__B, (__v4sf)__C, (__mmask8)__U), (__v4sf)__A); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask3_fcmadd_pch(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) { return (__m128h)__builtin_ia32_vfcmaddcph128_mask((__v4sf)__A, (__v4sf)__B, (__v4sf)__C, (__mmask8)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_fcmadd_pch(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) { return (__m128h)__builtin_ia32_vfcmaddcph128_maskz( (__v4sf)__A, (__v4sf)__B, (__v4sf)__C, (__mmask8)__U); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fcmadd_pch(__m256h __A, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_vfcmaddcph256_mask((__v8sf)__A, (__v8sf)__B, (__v8sf)__C, (__mmask8)-1); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_fcmadd_pch(__m256h __A, __mmask8 __U, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_selectps_256( __U, __builtin_ia32_vfcmaddcph256_mask((__v8sf)__A, (__v8sf)__B, (__v8sf)__C, (__mmask8)__U), (__v8sf)__A); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask3_fcmadd_pch(__m256h __A, __m256h __B, __m256h __C, __mmask8 __U) { return (__m256h)__builtin_ia32_vfcmaddcph256_mask((__v8sf)__A, (__v8sf)__B, (__v8sf)__C, (__mmask8)__U); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_fcmadd_pch(__mmask8 __U, __m256h __A, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_vfcmaddcph256_maskz( (__v8sf)__A, (__v8sf)__B, (__v8sf)__C, (__mmask8)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmul_pch(__m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_vfmulcph128_mask( (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_undefined_ph(), (__mmask8)-1); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmul_pch(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_vfmulcph128_mask((__v4sf)__A, (__v4sf)__B, (__v4sf)__W, (__mmask8)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_fmul_pch(__mmask8 __U, __m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_vfmulcph128_mask( (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_setzero_ph(), (__mmask8)__U); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fmul_pch(__m256h __A, __m256h __B) { return (__m256h)__builtin_ia32_vfmulcph256_mask( (__v8sf)__A, (__v8sf)__B, (__v8sf)_mm256_undefined_ph(), (__mmask8)-1); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_fmul_pch(__m256h __W, __mmask8 __U, __m256h __A, __m256h __B) { return (__m256h)__builtin_ia32_vfmulcph256_mask((__v8sf)__A, (__v8sf)__B, (__v8sf)__W, (__mmask8)__U); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_fmul_pch(__mmask8 __U, __m256h __A, __m256h __B) { return (__m256h)__builtin_ia32_vfmulcph256_mask( (__v8sf)__A, (__v8sf)__B, (__v8sf)_mm256_setzero_ph(), (__mmask8)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmadd_pch(__m128h __A, __m128h __B, __m128h __C) { return (__m128h)__builtin_ia32_vfmaddcph128_mask((__v4sf)__A, (__v4sf)__B, (__v4sf)__C, (__mmask8)-1); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_pch(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) { return (__m128h)__builtin_ia32_selectps_128( __U, __builtin_ia32_vfmaddcph128_mask((__v4sf)__A, (__v4sf)__B, (__v4sf)__C, (__mmask8)__U), (__v4sf)__A); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_pch(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) { return (__m128h)__builtin_ia32_vfmaddcph128_mask((__v4sf)__A, (__v4sf)__B, (__v4sf)__C, (__mmask8)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_pch(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) { return (__m128h)__builtin_ia32_vfmaddcph128_maskz((__v4sf)__A, (__v4sf)__B, (__v4sf)__C, (__mmask8)__U); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fmadd_pch(__m256h __A, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_vfmaddcph256_mask((__v8sf)__A, (__v8sf)__B, (__v8sf)__C, (__mmask8)-1); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_fmadd_pch(__m256h __A, __mmask8 __U, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_selectps_256( __U, __builtin_ia32_vfmaddcph256_mask((__v8sf)__A, (__v8sf)__B, (__v8sf)__C, (__mmask8)__U), (__v8sf)__A); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask3_fmadd_pch(__m256h __A, __m256h __B, __m256h __C, __mmask8 __U) { return (__m256h)__builtin_ia32_vfmaddcph256_mask((__v8sf)__A, (__v8sf)__B, (__v8sf)__C, (__mmask8)__U); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_fmadd_pch(__mmask8 __U, __m256h __A, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_vfmaddcph256_maskz((__v8sf)__A, (__v8sf)__B, (__v8sf)__C, (__mmask8)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_blend_ph(__mmask8 __U, __m128h __A, __m128h __W) { return (__m128h)__builtin_ia32_selectph_128((__mmask8)__U, (__v8hf)__W, (__v8hf)__A); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_blend_ph(__mmask16 __U, __m256h __A, __m256h __W) { return (__m256h)__builtin_ia32_selectph_256((__mmask16)__U, (__v16hf)__W, (__v16hf)__A); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_permutex2var_ph(__m128h __A, __m128i __I, __m128h __B) { return (__m128h)__builtin_ia32_vpermi2varhi128((__v8hi)__A, (__v8hi)__I, (__v8hi)__B); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_permutex2var_ph(__m256h __A, __m256i __I, __m256h __B) { return (__m256h)__builtin_ia32_vpermi2varhi256((__v16hi)__A, (__v16hi)__I, (__v16hi)__B); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_permutexvar_ph(__m128i __A, __m128h __B) { return (__m128h)__builtin_ia32_permvarhi128((__v8hi)__B, (__v8hi)__A); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_permutexvar_ph(__m256i __A, __m256h __B) { return (__m256h)__builtin_ia32_permvarhi256((__v16hi)__B, (__v16hi)__A); } static __inline__ _Float16 __DEFAULT_FN_ATTRS256 _mm256_reduce_add_ph(__m256h __W) { return __builtin_ia32_reduce_fadd_ph256(-0.0f16, __W); } static __inline__ _Float16 __DEFAULT_FN_ATTRS256 _mm256_reduce_mul_ph(__m256h __W) { return __builtin_ia32_reduce_fmul_ph256(1.0f16, __W); } static __inline__ _Float16 __DEFAULT_FN_ATTRS256 _mm256_reduce_max_ph(__m256h __V) { return __builtin_ia32_reduce_fmax_ph256(__V); } static __inline__ _Float16 __DEFAULT_FN_ATTRS256 _mm256_reduce_min_ph(__m256h __V) { return __builtin_ia32_reduce_fmin_ph256(__V); } static __inline__ _Float16 __DEFAULT_FN_ATTRS128 _mm_reduce_add_ph(__m128h __W) { return __builtin_ia32_reduce_fadd_ph128(-0.0f16, __W); } static __inline__ _Float16 __DEFAULT_FN_ATTRS128 _mm_reduce_mul_ph(__m128h __W) { return __builtin_ia32_reduce_fmul_ph128(1.0f16, __W); } static __inline__ _Float16 __DEFAULT_FN_ATTRS128 _mm_reduce_max_ph(__m128h __V) { return __builtin_ia32_reduce_fmax_ph128(__V); } static __inline__ _Float16 __DEFAULT_FN_ATTRS128 _mm_reduce_min_ph(__m128h __V) { return __builtin_ia32_reduce_fmin_ph128(__V); } // intrinsics below are alias for f*mul_*ch #define _mm_mul_pch(A, B) _mm_fmul_pch(A, B) #define _mm_mask_mul_pch(W, U, A, B) _mm_mask_fmul_pch(W, U, A, B) #define _mm_maskz_mul_pch(U, A, B) _mm_maskz_fmul_pch(U, A, B) #define _mm256_mul_pch(A, B) _mm256_fmul_pch(A, B) #define _mm256_mask_mul_pch(W, U, A, B) _mm256_mask_fmul_pch(W, U, A, B) #define _mm256_maskz_mul_pch(U, A, B) _mm256_maskz_fmul_pch(U, A, B) #define _mm_cmul_pch(A, B) _mm_fcmul_pch(A, B) #define _mm_mask_cmul_pch(W, U, A, B) _mm_mask_fcmul_pch(W, U, A, B) #define _mm_maskz_cmul_pch(U, A, B) _mm_maskz_fcmul_pch(U, A, B) #define _mm256_cmul_pch(A, B) _mm256_fcmul_pch(A, B) #define _mm256_mask_cmul_pch(W, U, A, B) _mm256_mask_fcmul_pch(W, U, A, B) #define _mm256_maskz_cmul_pch(U, A, B) _mm256_maskz_fcmul_pch(U, A, B) #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 #endif #endif /*===---- htmxlintrin.h - XL compiler HTM execution intrinsics-------------===*\ * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * \*===----------------------------------------------------------------------===*/ #ifndef __HTMXLINTRIN_H #define __HTMXLINTRIN_H #ifndef __HTM__ #error "HTM instruction set not enabled" #endif #include #ifdef __powerpc__ #ifdef __cplusplus extern "C" { #endif #define _TEXASR_PTR(TM_BUF) ((texasr_t *)((char *)(TM_BUF) + 0)) #define _TEXASRU_PTR(TM_BUF) ((texasru_t *)((char *)(TM_BUF) + 0)) #define _TEXASRL_PTR(TM_BUF) ((texasrl_t *)((char *)(TM_BUF) + 4)) #define _TFIAR_PTR(TM_BUF) ((tfiar_t *)((char *)(TM_BUF) + 8)) typedef char TM_buff_type[16]; /* This macro can be used to determine whether a transaction was successfully started from the __TM_begin() and __TM_simple_begin() intrinsic functions below. */ #define _HTM_TBEGIN_STARTED 1 extern __inline long __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __TM_simple_begin (void) { if (__builtin_expect (__builtin_tbegin (0), 1)) return _HTM_TBEGIN_STARTED; return 0; } extern __inline long __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __TM_begin (void* const __TM_buff) { *_TEXASRL_PTR (__TM_buff) = 0; if (__builtin_expect (__builtin_tbegin (0), 1)) return _HTM_TBEGIN_STARTED; #ifdef __powerpc64__ *_TEXASR_PTR (__TM_buff) = __builtin_get_texasr (); #else *_TEXASRU_PTR (__TM_buff) = __builtin_get_texasru (); *_TEXASRL_PTR (__TM_buff) = __builtin_get_texasr (); #endif *_TFIAR_PTR (__TM_buff) = __builtin_get_tfiar (); return 0; } extern __inline long __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __TM_end (void) { if (__builtin_expect (__builtin_tend (0), 1)) return 1; return 0; } extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __TM_abort (void) { __builtin_tabort (0); } extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __TM_named_abort (unsigned char const __code) { __builtin_tabort (__code); } extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __TM_resume (void) { __builtin_tresume (); } extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __TM_suspend (void) { __builtin_tsuspend (); } extern __inline long __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __TM_is_user_abort (void* const __TM_buff) { texasru_t texasru = *_TEXASRU_PTR (__TM_buff); return _TEXASRU_ABORT (texasru); } extern __inline long __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __TM_is_named_user_abort (void* const __TM_buff, unsigned char *__code) { texasru_t texasru = *_TEXASRU_PTR (__TM_buff); *__code = _TEXASRU_FAILURE_CODE (texasru); return _TEXASRU_ABORT (texasru); } extern __inline long __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __TM_is_illegal (void* const __TM_buff) { texasru_t texasru = *_TEXASRU_PTR (__TM_buff); return _TEXASRU_DISALLOWED (texasru); } extern __inline long __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __TM_is_footprint_exceeded (void* const __TM_buff) { texasru_t texasru = *_TEXASRU_PTR (__TM_buff); return _TEXASRU_FOOTPRINT_OVERFLOW (texasru); } extern __inline long __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __TM_nesting_depth (void* const __TM_buff) { texasrl_t texasrl; if (_HTM_STATE (__builtin_ttest ()) == _HTM_NONTRANSACTIONAL) { texasrl = *_TEXASRL_PTR (__TM_buff); if (!_TEXASR_FAILURE_SUMMARY (texasrl)) texasrl = 0; } else texasrl = (texasrl_t) __builtin_get_texasr (); return _TEXASR_TRANSACTION_LEVEL (texasrl); } extern __inline long __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __TM_is_nested_too_deep(void* const __TM_buff) { texasru_t texasru = *_TEXASRU_PTR (__TM_buff); return _TEXASRU_NESTING_OVERFLOW (texasru); } extern __inline long __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __TM_is_conflict(void* const __TM_buff) { texasru_t texasru = *_TEXASRU_PTR (__TM_buff); /* Return TEXASR bits 11 (Self-Induced Conflict) through 14 (Translation Invalidation Conflict). */ return (_TEXASRU_EXTRACT_BITS (texasru, 14, 4)) ? 1 : 0; } extern __inline long __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __TM_is_failure_persistent(void* const __TM_buff) { texasru_t texasru = *_TEXASRU_PTR (__TM_buff); return _TEXASRU_FAILURE_PERSISTENT (texasru); } extern __inline long __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __TM_failure_address(void* const __TM_buff) { return *_TFIAR_PTR (__TM_buff); } extern __inline long long __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __TM_failure_code(void* const __TM_buff) { return *_TEXASR_PTR (__TM_buff); } #ifdef __cplusplus } #endif #endif /* __powerpc__ */ #ifdef __s390__ #include /* These intrinsics are being made available for compatibility with the IBM XL compiler. For documentation please see the "z/OS XL C/C++ Programming Guide" publicly available on the web. */ static __inline long __attribute__((__always_inline__, __nodebug__)) __TM_simple_begin () { return __builtin_tbegin_nofloat (0); } static __inline long __attribute__((__always_inline__, __nodebug__)) __TM_begin (void* const __tdb) { return __builtin_tbegin_nofloat (__tdb); } static __inline long __attribute__((__always_inline__, __nodebug__)) __TM_end () { return __builtin_tend (); } static __inline void __attribute__((__always_inline__)) __TM_abort () { return __builtin_tabort (_HTM_FIRST_USER_ABORT_CODE); } static __inline void __attribute__((__always_inline__, __nodebug__)) __TM_named_abort (unsigned char const __code) { return __builtin_tabort ((int)_HTM_FIRST_USER_ABORT_CODE + __code); } static __inline void __attribute__((__always_inline__, __nodebug__)) __TM_non_transactional_store (void* const __addr, long long const __value) { __builtin_non_tx_store ((uint64_t*)__addr, (uint64_t)__value); } static __inline long __attribute__((__always_inline__, __nodebug__)) __TM_nesting_depth (void* const __tdb_ptr) { int depth = __builtin_tx_nesting_depth (); struct __htm_tdb *tdb = (struct __htm_tdb*)__tdb_ptr; if (depth != 0) return depth; if (tdb->format != 1) return 0; return tdb->nesting_depth; } /* Transaction failure diagnostics */ static __inline long __attribute__((__always_inline__, __nodebug__)) __TM_is_user_abort (void* const __tdb_ptr) { struct __htm_tdb *tdb = (struct __htm_tdb*)__tdb_ptr; if (tdb->format != 1) return 0; return !!(tdb->abort_code >= _HTM_FIRST_USER_ABORT_CODE); } static __inline long __attribute__((__always_inline__, __nodebug__)) __TM_is_named_user_abort (void* const __tdb_ptr, unsigned char* __code) { struct __htm_tdb *tdb = (struct __htm_tdb*)__tdb_ptr; if (tdb->format != 1) return 0; if (tdb->abort_code >= _HTM_FIRST_USER_ABORT_CODE) { *__code = tdb->abort_code - _HTM_FIRST_USER_ABORT_CODE; return 1; } return 0; } static __inline long __attribute__((__always_inline__, __nodebug__)) __TM_is_illegal (void* const __tdb_ptr) { struct __htm_tdb *tdb = (struct __htm_tdb*)__tdb_ptr; return (tdb->format == 1 && (tdb->abort_code == 4 /* unfiltered program interruption */ || tdb->abort_code == 11 /* restricted instruction */)); } static __inline long __attribute__((__always_inline__, __nodebug__)) __TM_is_footprint_exceeded (void* const __tdb_ptr) { struct __htm_tdb *tdb = (struct __htm_tdb*)__tdb_ptr; return (tdb->format == 1 && (tdb->abort_code == 7 /* fetch overflow */ || tdb->abort_code == 8 /* store overflow */)); } static __inline long __attribute__((__always_inline__, __nodebug__)) __TM_is_nested_too_deep (void* const __tdb_ptr) { struct __htm_tdb *tdb = (struct __htm_tdb*)__tdb_ptr; return tdb->format == 1 && tdb->abort_code == 13; /* depth exceeded */ } static __inline long __attribute__((__always_inline__, __nodebug__)) __TM_is_conflict (void* const __tdb_ptr) { struct __htm_tdb *tdb = (struct __htm_tdb*)__tdb_ptr; return (tdb->format == 1 && (tdb->abort_code == 9 /* fetch conflict */ || tdb->abort_code == 10 /* store conflict */)); } static __inline long __attribute__((__always_inline__, __nodebug__)) __TM_is_failure_persistent (long const __result) { return __result == _HTM_TBEGIN_PERSISTENT; } static __inline long __attribute__((__always_inline__, __nodebug__)) __TM_failure_address (void* const __tdb_ptr) { struct __htm_tdb *tdb = (struct __htm_tdb*)__tdb_ptr; return tdb->atia; } static __inline long __attribute__((__always_inline__, __nodebug__)) __TM_failure_code (void* const __tdb_ptr) { struct __htm_tdb *tdb = (struct __htm_tdb*)__tdb_ptr; return tdb->abort_code; } #endif /* __s390__ */ #endif /* __HTMXLINTRIN_H */ /*===------------- lsxintrin.h - LoongArch LSX intrinsics ------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef _LOONGSON_SXINTRIN_H #define _LOONGSON_SXINTRIN_H 1 #if defined(__loongarch_sx) typedef signed char v16i8 __attribute__((vector_size(16), aligned(16))); typedef signed char v16i8_b __attribute__((vector_size(16), aligned(1))); typedef unsigned char v16u8 __attribute__((vector_size(16), aligned(16))); typedef unsigned char v16u8_b __attribute__((vector_size(16), aligned(1))); typedef short v8i16 __attribute__((vector_size(16), aligned(16))); typedef short v8i16_h __attribute__((vector_size(16), aligned(2))); typedef unsigned short v8u16 __attribute__((vector_size(16), aligned(16))); typedef unsigned short v8u16_h __attribute__((vector_size(16), aligned(2))); typedef int v4i32 __attribute__((vector_size(16), aligned(16))); typedef int v4i32_w __attribute__((vector_size(16), aligned(4))); typedef unsigned int v4u32 __attribute__((vector_size(16), aligned(16))); typedef unsigned int v4u32_w __attribute__((vector_size(16), aligned(4))); typedef long long v2i64 __attribute__((vector_size(16), aligned(16))); typedef long long v2i64_d __attribute__((vector_size(16), aligned(8))); typedef unsigned long long v2u64 __attribute__((vector_size(16), aligned(16))); typedef unsigned long long v2u64_d __attribute__((vector_size(16), aligned(8))); typedef float v4f32 __attribute__((vector_size(16), aligned(16))); typedef float v4f32_w __attribute__((vector_size(16), aligned(4))); typedef double v2f64 __attribute__((vector_size(16), aligned(16))); typedef double v2f64_d __attribute__((vector_size(16), aligned(8))); typedef long long __m128i __attribute__((__vector_size__(16), __may_alias__)); typedef float __m128 __attribute__((__vector_size__(16), __may_alias__)); typedef double __m128d __attribute__((__vector_size__(16), __may_alias__)); extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vsll_b(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vsll_b((v16i8)_1, (v16i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vsll_h(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vsll_h((v8i16)_1, (v8i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vsll_w(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vsll_w((v4i32)_1, (v4i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vsll_d(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vsll_d((v2i64)_1, (v2i64)_2); } #define __lsx_vslli_b(/*__m128i*/ _1, /*ui3*/ _2) \ ((__m128i)__builtin_lsx_vslli_b((v16i8)(_1), (_2))) #define __lsx_vslli_h(/*__m128i*/ _1, /*ui4*/ _2) \ ((__m128i)__builtin_lsx_vslli_h((v8i16)(_1), (_2))) #define __lsx_vslli_w(/*__m128i*/ _1, /*ui5*/ _2) \ ((__m128i)__builtin_lsx_vslli_w((v4i32)(_1), (_2))) #define __lsx_vslli_d(/*__m128i*/ _1, /*ui6*/ _2) \ ((__m128i)__builtin_lsx_vslli_d((v2i64)(_1), (_2))) extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vsra_b(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vsra_b((v16i8)_1, (v16i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vsra_h(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vsra_h((v8i16)_1, (v8i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vsra_w(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vsra_w((v4i32)_1, (v4i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vsra_d(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vsra_d((v2i64)_1, (v2i64)_2); } #define __lsx_vsrai_b(/*__m128i*/ _1, /*ui3*/ _2) \ ((__m128i)__builtin_lsx_vsrai_b((v16i8)(_1), (_2))) #define __lsx_vsrai_h(/*__m128i*/ _1, /*ui4*/ _2) \ ((__m128i)__builtin_lsx_vsrai_h((v8i16)(_1), (_2))) #define __lsx_vsrai_w(/*__m128i*/ _1, /*ui5*/ _2) \ ((__m128i)__builtin_lsx_vsrai_w((v4i32)(_1), (_2))) #define __lsx_vsrai_d(/*__m128i*/ _1, /*ui6*/ _2) \ ((__m128i)__builtin_lsx_vsrai_d((v2i64)(_1), (_2))) extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vsrar_b(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vsrar_b((v16i8)_1, (v16i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vsrar_h(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vsrar_h((v8i16)_1, (v8i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vsrar_w(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vsrar_w((v4i32)_1, (v4i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vsrar_d(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vsrar_d((v2i64)_1, (v2i64)_2); } #define __lsx_vsrari_b(/*__m128i*/ _1, /*ui3*/ _2) \ ((__m128i)__builtin_lsx_vsrari_b((v16i8)(_1), (_2))) #define __lsx_vsrari_h(/*__m128i*/ _1, /*ui4*/ _2) \ ((__m128i)__builtin_lsx_vsrari_h((v8i16)(_1), (_2))) #define __lsx_vsrari_w(/*__m128i*/ _1, /*ui5*/ _2) \ ((__m128i)__builtin_lsx_vsrari_w((v4i32)(_1), (_2))) #define __lsx_vsrari_d(/*__m128i*/ _1, /*ui6*/ _2) \ ((__m128i)__builtin_lsx_vsrari_d((v2i64)(_1), (_2))) extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vsrl_b(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vsrl_b((v16i8)_1, (v16i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vsrl_h(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vsrl_h((v8i16)_1, (v8i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vsrl_w(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vsrl_w((v4i32)_1, (v4i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vsrl_d(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vsrl_d((v2i64)_1, (v2i64)_2); } #define __lsx_vsrli_b(/*__m128i*/ _1, /*ui3*/ _2) \ ((__m128i)__builtin_lsx_vsrli_b((v16i8)(_1), (_2))) #define __lsx_vsrli_h(/*__m128i*/ _1, /*ui4*/ _2) \ ((__m128i)__builtin_lsx_vsrli_h((v8i16)(_1), (_2))) #define __lsx_vsrli_w(/*__m128i*/ _1, /*ui5*/ _2) \ ((__m128i)__builtin_lsx_vsrli_w((v4i32)(_1), (_2))) #define __lsx_vsrli_d(/*__m128i*/ _1, /*ui6*/ _2) \ ((__m128i)__builtin_lsx_vsrli_d((v2i64)(_1), (_2))) extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vsrlr_b(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vsrlr_b((v16i8)_1, (v16i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vsrlr_h(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vsrlr_h((v8i16)_1, (v8i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vsrlr_w(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vsrlr_w((v4i32)_1, (v4i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vsrlr_d(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vsrlr_d((v2i64)_1, (v2i64)_2); } #define __lsx_vsrlri_b(/*__m128i*/ _1, /*ui3*/ _2) \ ((__m128i)__builtin_lsx_vsrlri_b((v16i8)(_1), (_2))) #define __lsx_vsrlri_h(/*__m128i*/ _1, /*ui4*/ _2) \ ((__m128i)__builtin_lsx_vsrlri_h((v8i16)(_1), (_2))) #define __lsx_vsrlri_w(/*__m128i*/ _1, /*ui5*/ _2) \ ((__m128i)__builtin_lsx_vsrlri_w((v4i32)(_1), (_2))) #define __lsx_vsrlri_d(/*__m128i*/ _1, /*ui6*/ _2) \ ((__m128i)__builtin_lsx_vsrlri_d((v2i64)(_1), (_2))) extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vbitclr_b(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vbitclr_b((v16u8)_1, (v16u8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vbitclr_h(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vbitclr_h((v8u16)_1, (v8u16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vbitclr_w(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vbitclr_w((v4u32)_1, (v4u32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vbitclr_d(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vbitclr_d((v2u64)_1, (v2u64)_2); } #define __lsx_vbitclri_b(/*__m128i*/ _1, /*ui3*/ _2) \ ((__m128i)__builtin_lsx_vbitclri_b((v16u8)(_1), (_2))) #define __lsx_vbitclri_h(/*__m128i*/ _1, /*ui4*/ _2) \ ((__m128i)__builtin_lsx_vbitclri_h((v8u16)(_1), (_2))) #define __lsx_vbitclri_w(/*__m128i*/ _1, /*ui5*/ _2) \ ((__m128i)__builtin_lsx_vbitclri_w((v4u32)(_1), (_2))) #define __lsx_vbitclri_d(/*__m128i*/ _1, /*ui6*/ _2) \ ((__m128i)__builtin_lsx_vbitclri_d((v2u64)(_1), (_2))) extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vbitset_b(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vbitset_b((v16u8)_1, (v16u8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vbitset_h(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vbitset_h((v8u16)_1, (v8u16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vbitset_w(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vbitset_w((v4u32)_1, (v4u32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vbitset_d(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vbitset_d((v2u64)_1, (v2u64)_2); } #define __lsx_vbitseti_b(/*__m128i*/ _1, /*ui3*/ _2) \ ((__m128i)__builtin_lsx_vbitseti_b((v16u8)(_1), (_2))) #define __lsx_vbitseti_h(/*__m128i*/ _1, /*ui4*/ _2) \ ((__m128i)__builtin_lsx_vbitseti_h((v8u16)(_1), (_2))) #define __lsx_vbitseti_w(/*__m128i*/ _1, /*ui5*/ _2) \ ((__m128i)__builtin_lsx_vbitseti_w((v4u32)(_1), (_2))) #define __lsx_vbitseti_d(/*__m128i*/ _1, /*ui6*/ _2) \ ((__m128i)__builtin_lsx_vbitseti_d((v2u64)(_1), (_2))) extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vbitrev_b(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vbitrev_b((v16u8)_1, (v16u8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vbitrev_h(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vbitrev_h((v8u16)_1, (v8u16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vbitrev_w(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vbitrev_w((v4u32)_1, (v4u32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vbitrev_d(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vbitrev_d((v2u64)_1, (v2u64)_2); } #define __lsx_vbitrevi_b(/*__m128i*/ _1, /*ui3*/ _2) \ ((__m128i)__builtin_lsx_vbitrevi_b((v16u8)(_1), (_2))) #define __lsx_vbitrevi_h(/*__m128i*/ _1, /*ui4*/ _2) \ ((__m128i)__builtin_lsx_vbitrevi_h((v8u16)(_1), (_2))) #define __lsx_vbitrevi_w(/*__m128i*/ _1, /*ui5*/ _2) \ ((__m128i)__builtin_lsx_vbitrevi_w((v4u32)(_1), (_2))) #define __lsx_vbitrevi_d(/*__m128i*/ _1, /*ui6*/ _2) \ ((__m128i)__builtin_lsx_vbitrevi_d((v2u64)(_1), (_2))) extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vadd_b(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vadd_b((v16i8)_1, (v16i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vadd_h(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vadd_h((v8i16)_1, (v8i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vadd_w(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vadd_w((v4i32)_1, (v4i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vadd_d(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vadd_d((v2i64)_1, (v2i64)_2); } #define __lsx_vaddi_bu(/*__m128i*/ _1, /*ui5*/ _2) \ ((__m128i)__builtin_lsx_vaddi_bu((v16i8)(_1), (_2))) #define __lsx_vaddi_hu(/*__m128i*/ _1, /*ui5*/ _2) \ ((__m128i)__builtin_lsx_vaddi_hu((v8i16)(_1), (_2))) #define __lsx_vaddi_wu(/*__m128i*/ _1, /*ui5*/ _2) \ ((__m128i)__builtin_lsx_vaddi_wu((v4i32)(_1), (_2))) #define __lsx_vaddi_du(/*__m128i*/ _1, /*ui5*/ _2) \ ((__m128i)__builtin_lsx_vaddi_du((v2i64)(_1), (_2))) extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vsub_b(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vsub_b((v16i8)_1, (v16i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vsub_h(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vsub_h((v8i16)_1, (v8i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vsub_w(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vsub_w((v4i32)_1, (v4i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vsub_d(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vsub_d((v2i64)_1, (v2i64)_2); } #define __lsx_vsubi_bu(/*__m128i*/ _1, /*ui5*/ _2) \ ((__m128i)__builtin_lsx_vsubi_bu((v16i8)(_1), (_2))) #define __lsx_vsubi_hu(/*__m128i*/ _1, /*ui5*/ _2) \ ((__m128i)__builtin_lsx_vsubi_hu((v8i16)(_1), (_2))) #define __lsx_vsubi_wu(/*__m128i*/ _1, /*ui5*/ _2) \ ((__m128i)__builtin_lsx_vsubi_wu((v4i32)(_1), (_2))) #define __lsx_vsubi_du(/*__m128i*/ _1, /*ui5*/ _2) \ ((__m128i)__builtin_lsx_vsubi_du((v2i64)(_1), (_2))) extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmax_b(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vmax_b((v16i8)_1, (v16i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmax_h(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vmax_h((v8i16)_1, (v8i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmax_w(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vmax_w((v4i32)_1, (v4i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmax_d(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vmax_d((v2i64)_1, (v2i64)_2); } #define __lsx_vmaxi_b(/*__m128i*/ _1, /*si5*/ _2) \ ((__m128i)__builtin_lsx_vmaxi_b((v16i8)(_1), (_2))) #define __lsx_vmaxi_h(/*__m128i*/ _1, /*si5*/ _2) \ ((__m128i)__builtin_lsx_vmaxi_h((v8i16)(_1), (_2))) #define __lsx_vmaxi_w(/*__m128i*/ _1, /*si5*/ _2) \ ((__m128i)__builtin_lsx_vmaxi_w((v4i32)(_1), (_2))) #define __lsx_vmaxi_d(/*__m128i*/ _1, /*si5*/ _2) \ ((__m128i)__builtin_lsx_vmaxi_d((v2i64)(_1), (_2))) extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmax_bu(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vmax_bu((v16u8)_1, (v16u8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmax_hu(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vmax_hu((v8u16)_1, (v8u16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmax_wu(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vmax_wu((v4u32)_1, (v4u32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmax_du(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vmax_du((v2u64)_1, (v2u64)_2); } #define __lsx_vmaxi_bu(/*__m128i*/ _1, /*ui5*/ _2) \ ((__m128i)__builtin_lsx_vmaxi_bu((v16u8)(_1), (_2))) #define __lsx_vmaxi_hu(/*__m128i*/ _1, /*ui5*/ _2) \ ((__m128i)__builtin_lsx_vmaxi_hu((v8u16)(_1), (_2))) #define __lsx_vmaxi_wu(/*__m128i*/ _1, /*ui5*/ _2) \ ((__m128i)__builtin_lsx_vmaxi_wu((v4u32)(_1), (_2))) #define __lsx_vmaxi_du(/*__m128i*/ _1, /*ui5*/ _2) \ ((__m128i)__builtin_lsx_vmaxi_du((v2u64)(_1), (_2))) extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmin_b(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vmin_b((v16i8)_1, (v16i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmin_h(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vmin_h((v8i16)_1, (v8i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmin_w(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vmin_w((v4i32)_1, (v4i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmin_d(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vmin_d((v2i64)_1, (v2i64)_2); } #define __lsx_vmini_b(/*__m128i*/ _1, /*si5*/ _2) \ ((__m128i)__builtin_lsx_vmini_b((v16i8)(_1), (_2))) #define __lsx_vmini_h(/*__m128i*/ _1, /*si5*/ _2) \ ((__m128i)__builtin_lsx_vmini_h((v8i16)(_1), (_2))) #define __lsx_vmini_w(/*__m128i*/ _1, /*si5*/ _2) \ ((__m128i)__builtin_lsx_vmini_w((v4i32)(_1), (_2))) #define __lsx_vmini_d(/*__m128i*/ _1, /*si5*/ _2) \ ((__m128i)__builtin_lsx_vmini_d((v2i64)(_1), (_2))) extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmin_bu(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vmin_bu((v16u8)_1, (v16u8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmin_hu(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vmin_hu((v8u16)_1, (v8u16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmin_wu(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vmin_wu((v4u32)_1, (v4u32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmin_du(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vmin_du((v2u64)_1, (v2u64)_2); } #define __lsx_vmini_bu(/*__m128i*/ _1, /*ui5*/ _2) \ ((__m128i)__builtin_lsx_vmini_bu((v16u8)(_1), (_2))) #define __lsx_vmini_hu(/*__m128i*/ _1, /*ui5*/ _2) \ ((__m128i)__builtin_lsx_vmini_hu((v8u16)(_1), (_2))) #define __lsx_vmini_wu(/*__m128i*/ _1, /*ui5*/ _2) \ ((__m128i)__builtin_lsx_vmini_wu((v4u32)(_1), (_2))) #define __lsx_vmini_du(/*__m128i*/ _1, /*ui5*/ _2) \ ((__m128i)__builtin_lsx_vmini_du((v2u64)(_1), (_2))) extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vseq_b(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vseq_b((v16i8)_1, (v16i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vseq_h(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vseq_h((v8i16)_1, (v8i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vseq_w(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vseq_w((v4i32)_1, (v4i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vseq_d(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vseq_d((v2i64)_1, (v2i64)_2); } #define __lsx_vseqi_b(/*__m128i*/ _1, /*si5*/ _2) \ ((__m128i)__builtin_lsx_vseqi_b((v16i8)(_1), (_2))) #define __lsx_vseqi_h(/*__m128i*/ _1, /*si5*/ _2) \ ((__m128i)__builtin_lsx_vseqi_h((v8i16)(_1), (_2))) #define __lsx_vseqi_w(/*__m128i*/ _1, /*si5*/ _2) \ ((__m128i)__builtin_lsx_vseqi_w((v4i32)(_1), (_2))) #define __lsx_vseqi_d(/*__m128i*/ _1, /*si5*/ _2) \ ((__m128i)__builtin_lsx_vseqi_d((v2i64)(_1), (_2))) #define __lsx_vslti_b(/*__m128i*/ _1, /*si5*/ _2) \ ((__m128i)__builtin_lsx_vslti_b((v16i8)(_1), (_2))) extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vslt_b(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vslt_b((v16i8)_1, (v16i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vslt_h(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vslt_h((v8i16)_1, (v8i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vslt_w(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vslt_w((v4i32)_1, (v4i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vslt_d(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vslt_d((v2i64)_1, (v2i64)_2); } #define __lsx_vslti_h(/*__m128i*/ _1, /*si5*/ _2) \ ((__m128i)__builtin_lsx_vslti_h((v8i16)(_1), (_2))) #define __lsx_vslti_w(/*__m128i*/ _1, /*si5*/ _2) \ ((__m128i)__builtin_lsx_vslti_w((v4i32)(_1), (_2))) #define __lsx_vslti_d(/*__m128i*/ _1, /*si5*/ _2) \ ((__m128i)__builtin_lsx_vslti_d((v2i64)(_1), (_2))) extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vslt_bu(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vslt_bu((v16u8)_1, (v16u8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vslt_hu(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vslt_hu((v8u16)_1, (v8u16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vslt_wu(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vslt_wu((v4u32)_1, (v4u32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vslt_du(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vslt_du((v2u64)_1, (v2u64)_2); } #define __lsx_vslti_bu(/*__m128i*/ _1, /*ui5*/ _2) \ ((__m128i)__builtin_lsx_vslti_bu((v16u8)(_1), (_2))) #define __lsx_vslti_hu(/*__m128i*/ _1, /*ui5*/ _2) \ ((__m128i)__builtin_lsx_vslti_hu((v8u16)(_1), (_2))) #define __lsx_vslti_wu(/*__m128i*/ _1, /*ui5*/ _2) \ ((__m128i)__builtin_lsx_vslti_wu((v4u32)(_1), (_2))) #define __lsx_vslti_du(/*__m128i*/ _1, /*ui5*/ _2) \ ((__m128i)__builtin_lsx_vslti_du((v2u64)(_1), (_2))) extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vsle_b(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vsle_b((v16i8)_1, (v16i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vsle_h(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vsle_h((v8i16)_1, (v8i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vsle_w(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vsle_w((v4i32)_1, (v4i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vsle_d(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vsle_d((v2i64)_1, (v2i64)_2); } #define __lsx_vslei_b(/*__m128i*/ _1, /*si5*/ _2) \ ((__m128i)__builtin_lsx_vslei_b((v16i8)(_1), (_2))) #define __lsx_vslei_h(/*__m128i*/ _1, /*si5*/ _2) \ ((__m128i)__builtin_lsx_vslei_h((v8i16)(_1), (_2))) #define __lsx_vslei_w(/*__m128i*/ _1, /*si5*/ _2) \ ((__m128i)__builtin_lsx_vslei_w((v4i32)(_1), (_2))) #define __lsx_vslei_d(/*__m128i*/ _1, /*si5*/ _2) \ ((__m128i)__builtin_lsx_vslei_d((v2i64)(_1), (_2))) extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vsle_bu(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vsle_bu((v16u8)_1, (v16u8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vsle_hu(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vsle_hu((v8u16)_1, (v8u16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vsle_wu(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vsle_wu((v4u32)_1, (v4u32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vsle_du(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vsle_du((v2u64)_1, (v2u64)_2); } #define __lsx_vslei_bu(/*__m128i*/ _1, /*ui5*/ _2) \ ((__m128i)__builtin_lsx_vslei_bu((v16u8)(_1), (_2))) #define __lsx_vslei_hu(/*__m128i*/ _1, /*ui5*/ _2) \ ((__m128i)__builtin_lsx_vslei_hu((v8u16)(_1), (_2))) #define __lsx_vslei_wu(/*__m128i*/ _1, /*ui5*/ _2) \ ((__m128i)__builtin_lsx_vslei_wu((v4u32)(_1), (_2))) #define __lsx_vslei_du(/*__m128i*/ _1, /*ui5*/ _2) \ ((__m128i)__builtin_lsx_vslei_du((v2u64)(_1), (_2))) #define __lsx_vsat_b(/*__m128i*/ _1, /*ui3*/ _2) \ ((__m128i)__builtin_lsx_vsat_b((v16i8)(_1), (_2))) #define __lsx_vsat_h(/*__m128i*/ _1, /*ui4*/ _2) \ ((__m128i)__builtin_lsx_vsat_h((v8i16)(_1), (_2))) #define __lsx_vsat_w(/*__m128i*/ _1, /*ui5*/ _2) \ ((__m128i)__builtin_lsx_vsat_w((v4i32)(_1), (_2))) #define __lsx_vsat_d(/*__m128i*/ _1, /*ui6*/ _2) \ ((__m128i)__builtin_lsx_vsat_d((v2i64)(_1), (_2))) #define __lsx_vsat_bu(/*__m128i*/ _1, /*ui3*/ _2) \ ((__m128i)__builtin_lsx_vsat_bu((v16u8)(_1), (_2))) #define __lsx_vsat_hu(/*__m128i*/ _1, /*ui4*/ _2) \ ((__m128i)__builtin_lsx_vsat_hu((v8u16)(_1), (_2))) #define __lsx_vsat_wu(/*__m128i*/ _1, /*ui5*/ _2) \ ((__m128i)__builtin_lsx_vsat_wu((v4u32)(_1), (_2))) #define __lsx_vsat_du(/*__m128i*/ _1, /*ui6*/ _2) \ ((__m128i)__builtin_lsx_vsat_du((v2u64)(_1), (_2))) extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vadda_b(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vadda_b((v16i8)_1, (v16i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vadda_h(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vadda_h((v8i16)_1, (v8i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vadda_w(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vadda_w((v4i32)_1, (v4i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vadda_d(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vadda_d((v2i64)_1, (v2i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vsadd_b(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vsadd_b((v16i8)_1, (v16i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vsadd_h(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vsadd_h((v8i16)_1, (v8i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vsadd_w(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vsadd_w((v4i32)_1, (v4i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vsadd_d(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vsadd_d((v2i64)_1, (v2i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vsadd_bu(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vsadd_bu((v16u8)_1, (v16u8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vsadd_hu(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vsadd_hu((v8u16)_1, (v8u16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vsadd_wu(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vsadd_wu((v4u32)_1, (v4u32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vsadd_du(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vsadd_du((v2u64)_1, (v2u64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vavg_b(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vavg_b((v16i8)_1, (v16i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vavg_h(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vavg_h((v8i16)_1, (v8i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vavg_w(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vavg_w((v4i32)_1, (v4i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vavg_d(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vavg_d((v2i64)_1, (v2i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vavg_bu(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vavg_bu((v16u8)_1, (v16u8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vavg_hu(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vavg_hu((v8u16)_1, (v8u16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vavg_wu(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vavg_wu((v4u32)_1, (v4u32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vavg_du(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vavg_du((v2u64)_1, (v2u64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vavgr_b(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vavgr_b((v16i8)_1, (v16i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vavgr_h(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vavgr_h((v8i16)_1, (v8i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vavgr_w(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vavgr_w((v4i32)_1, (v4i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vavgr_d(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vavgr_d((v2i64)_1, (v2i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vavgr_bu(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vavgr_bu((v16u8)_1, (v16u8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vavgr_hu(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vavgr_hu((v8u16)_1, (v8u16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vavgr_wu(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vavgr_wu((v4u32)_1, (v4u32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vavgr_du(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vavgr_du((v2u64)_1, (v2u64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vssub_b(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vssub_b((v16i8)_1, (v16i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vssub_h(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vssub_h((v8i16)_1, (v8i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vssub_w(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vssub_w((v4i32)_1, (v4i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vssub_d(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vssub_d((v2i64)_1, (v2i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vssub_bu(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vssub_bu((v16u8)_1, (v16u8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vssub_hu(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vssub_hu((v8u16)_1, (v8u16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vssub_wu(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vssub_wu((v4u32)_1, (v4u32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vssub_du(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vssub_du((v2u64)_1, (v2u64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vabsd_b(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vabsd_b((v16i8)_1, (v16i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vabsd_h(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vabsd_h((v8i16)_1, (v8i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vabsd_w(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vabsd_w((v4i32)_1, (v4i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vabsd_d(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vabsd_d((v2i64)_1, (v2i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vabsd_bu(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vabsd_bu((v16u8)_1, (v16u8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vabsd_hu(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vabsd_hu((v8u16)_1, (v8u16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vabsd_wu(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vabsd_wu((v4u32)_1, (v4u32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vabsd_du(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vabsd_du((v2u64)_1, (v2u64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmul_b(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vmul_b((v16i8)_1, (v16i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmul_h(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vmul_h((v8i16)_1, (v8i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmul_w(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vmul_w((v4i32)_1, (v4i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmul_d(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vmul_d((v2i64)_1, (v2i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmadd_b(__m128i _1, __m128i _2, __m128i _3) { return (__m128i)__builtin_lsx_vmadd_b((v16i8)_1, (v16i8)_2, (v16i8)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmadd_h(__m128i _1, __m128i _2, __m128i _3) { return (__m128i)__builtin_lsx_vmadd_h((v8i16)_1, (v8i16)_2, (v8i16)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmadd_w(__m128i _1, __m128i _2, __m128i _3) { return (__m128i)__builtin_lsx_vmadd_w((v4i32)_1, (v4i32)_2, (v4i32)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmadd_d(__m128i _1, __m128i _2, __m128i _3) { return (__m128i)__builtin_lsx_vmadd_d((v2i64)_1, (v2i64)_2, (v2i64)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmsub_b(__m128i _1, __m128i _2, __m128i _3) { return (__m128i)__builtin_lsx_vmsub_b((v16i8)_1, (v16i8)_2, (v16i8)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmsub_h(__m128i _1, __m128i _2, __m128i _3) { return (__m128i)__builtin_lsx_vmsub_h((v8i16)_1, (v8i16)_2, (v8i16)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmsub_w(__m128i _1, __m128i _2, __m128i _3) { return (__m128i)__builtin_lsx_vmsub_w((v4i32)_1, (v4i32)_2, (v4i32)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmsub_d(__m128i _1, __m128i _2, __m128i _3) { return (__m128i)__builtin_lsx_vmsub_d((v2i64)_1, (v2i64)_2, (v2i64)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vdiv_b(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vdiv_b((v16i8)_1, (v16i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vdiv_h(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vdiv_h((v8i16)_1, (v8i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vdiv_w(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vdiv_w((v4i32)_1, (v4i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vdiv_d(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vdiv_d((v2i64)_1, (v2i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vdiv_bu(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vdiv_bu((v16u8)_1, (v16u8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vdiv_hu(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vdiv_hu((v8u16)_1, (v8u16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vdiv_wu(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vdiv_wu((v4u32)_1, (v4u32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vdiv_du(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vdiv_du((v2u64)_1, (v2u64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vhaddw_h_b(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vhaddw_h_b((v16i8)_1, (v16i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vhaddw_w_h(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vhaddw_w_h((v8i16)_1, (v8i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vhaddw_d_w(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vhaddw_d_w((v4i32)_1, (v4i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vhaddw_hu_bu(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vhaddw_hu_bu((v16u8)_1, (v16u8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vhaddw_wu_hu(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vhaddw_wu_hu((v8u16)_1, (v8u16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vhaddw_du_wu(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vhaddw_du_wu((v4u32)_1, (v4u32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vhsubw_h_b(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vhsubw_h_b((v16i8)_1, (v16i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vhsubw_w_h(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vhsubw_w_h((v8i16)_1, (v8i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vhsubw_d_w(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vhsubw_d_w((v4i32)_1, (v4i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vhsubw_hu_bu(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vhsubw_hu_bu((v16u8)_1, (v16u8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vhsubw_wu_hu(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vhsubw_wu_hu((v8u16)_1, (v8u16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vhsubw_du_wu(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vhsubw_du_wu((v4u32)_1, (v4u32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmod_b(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vmod_b((v16i8)_1, (v16i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmod_h(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vmod_h((v8i16)_1, (v8i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmod_w(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vmod_w((v4i32)_1, (v4i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmod_d(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vmod_d((v2i64)_1, (v2i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmod_bu(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vmod_bu((v16u8)_1, (v16u8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmod_hu(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vmod_hu((v8u16)_1, (v8u16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmod_wu(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vmod_wu((v4u32)_1, (v4u32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmod_du(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vmod_du((v2u64)_1, (v2u64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vreplve_b(__m128i _1, int _2) { return (__m128i)__builtin_lsx_vreplve_b((v16i8)_1, (int)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vreplve_h(__m128i _1, int _2) { return (__m128i)__builtin_lsx_vreplve_h((v8i16)_1, (int)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vreplve_w(__m128i _1, int _2) { return (__m128i)__builtin_lsx_vreplve_w((v4i32)_1, (int)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vreplve_d(__m128i _1, int _2) { return (__m128i)__builtin_lsx_vreplve_d((v2i64)_1, (int)_2); } #define __lsx_vreplvei_b(/*__m128i*/ _1, /*ui4*/ _2) \ ((__m128i)__builtin_lsx_vreplvei_b((v16i8)(_1), (_2))) #define __lsx_vreplvei_h(/*__m128i*/ _1, /*ui3*/ _2) \ ((__m128i)__builtin_lsx_vreplvei_h((v8i16)(_1), (_2))) #define __lsx_vreplvei_w(/*__m128i*/ _1, /*ui2*/ _2) \ ((__m128i)__builtin_lsx_vreplvei_w((v4i32)(_1), (_2))) #define __lsx_vreplvei_d(/*__m128i*/ _1, /*ui1*/ _2) \ ((__m128i)__builtin_lsx_vreplvei_d((v2i64)(_1), (_2))) extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vpickev_b(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vpickev_b((v16i8)_1, (v16i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vpickev_h(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vpickev_h((v8i16)_1, (v8i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vpickev_w(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vpickev_w((v4i32)_1, (v4i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vpickev_d(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vpickev_d((v2i64)_1, (v2i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vpickod_b(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vpickod_b((v16i8)_1, (v16i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vpickod_h(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vpickod_h((v8i16)_1, (v8i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vpickod_w(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vpickod_w((v4i32)_1, (v4i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vpickod_d(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vpickod_d((v2i64)_1, (v2i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vilvh_b(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vilvh_b((v16i8)_1, (v16i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vilvh_h(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vilvh_h((v8i16)_1, (v8i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vilvh_w(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vilvh_w((v4i32)_1, (v4i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vilvh_d(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vilvh_d((v2i64)_1, (v2i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vilvl_b(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vilvl_b((v16i8)_1, (v16i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vilvl_h(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vilvl_h((v8i16)_1, (v8i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vilvl_w(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vilvl_w((v4i32)_1, (v4i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vilvl_d(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vilvl_d((v2i64)_1, (v2i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vpackev_b(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vpackev_b((v16i8)_1, (v16i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vpackev_h(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vpackev_h((v8i16)_1, (v8i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vpackev_w(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vpackev_w((v4i32)_1, (v4i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vpackev_d(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vpackev_d((v2i64)_1, (v2i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vpackod_b(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vpackod_b((v16i8)_1, (v16i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vpackod_h(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vpackod_h((v8i16)_1, (v8i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vpackod_w(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vpackod_w((v4i32)_1, (v4i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vpackod_d(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vpackod_d((v2i64)_1, (v2i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vshuf_h(__m128i _1, __m128i _2, __m128i _3) { return (__m128i)__builtin_lsx_vshuf_h((v8i16)_1, (v8i16)_2, (v8i16)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vshuf_w(__m128i _1, __m128i _2, __m128i _3) { return (__m128i)__builtin_lsx_vshuf_w((v4i32)_1, (v4i32)_2, (v4i32)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vshuf_d(__m128i _1, __m128i _2, __m128i _3) { return (__m128i)__builtin_lsx_vshuf_d((v2i64)_1, (v2i64)_2, (v2i64)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vand_v(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vand_v((v16u8)_1, (v16u8)_2); } #define __lsx_vandi_b(/*__m128i*/ _1, /*ui8*/ _2) \ ((__m128i)__builtin_lsx_vandi_b((v16u8)(_1), (_2))) extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vor_v(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vor_v((v16u8)_1, (v16u8)_2); } #define __lsx_vori_b(/*__m128i*/ _1, /*ui8*/ _2) \ ((__m128i)__builtin_lsx_vori_b((v16u8)(_1), (_2))) extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vnor_v(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vnor_v((v16u8)_1, (v16u8)_2); } #define __lsx_vnori_b(/*__m128i*/ _1, /*ui8*/ _2) \ ((__m128i)__builtin_lsx_vnori_b((v16u8)(_1), (_2))) extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vxor_v(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vxor_v((v16u8)_1, (v16u8)_2); } #define __lsx_vxori_b(/*__m128i*/ _1, /*ui8*/ _2) \ ((__m128i)__builtin_lsx_vxori_b((v16u8)(_1), (_2))) extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vbitsel_v(__m128i _1, __m128i _2, __m128i _3) { return (__m128i)__builtin_lsx_vbitsel_v((v16u8)_1, (v16u8)_2, (v16u8)_3); } #define __lsx_vbitseli_b(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ ((__m128i)__builtin_lsx_vbitseli_b((v16u8)(_1), (v16u8)(_2), (_3))) #define __lsx_vshuf4i_b(/*__m128i*/ _1, /*ui8*/ _2) \ ((__m128i)__builtin_lsx_vshuf4i_b((v16i8)(_1), (_2))) #define __lsx_vshuf4i_h(/*__m128i*/ _1, /*ui8*/ _2) \ ((__m128i)__builtin_lsx_vshuf4i_h((v8i16)(_1), (_2))) #define __lsx_vshuf4i_w(/*__m128i*/ _1, /*ui8*/ _2) \ ((__m128i)__builtin_lsx_vshuf4i_w((v4i32)(_1), (_2))) extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vreplgr2vr_b(int _1) { return (__m128i)__builtin_lsx_vreplgr2vr_b((int)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vreplgr2vr_h(int _1) { return (__m128i)__builtin_lsx_vreplgr2vr_h((int)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vreplgr2vr_w(int _1) { return (__m128i)__builtin_lsx_vreplgr2vr_w((int)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vreplgr2vr_d(long int _1) { return (__m128i)__builtin_lsx_vreplgr2vr_d((long int)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vpcnt_b(__m128i _1) { return (__m128i)__builtin_lsx_vpcnt_b((v16i8)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vpcnt_h(__m128i _1) { return (__m128i)__builtin_lsx_vpcnt_h((v8i16)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vpcnt_w(__m128i _1) { return (__m128i)__builtin_lsx_vpcnt_w((v4i32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vpcnt_d(__m128i _1) { return (__m128i)__builtin_lsx_vpcnt_d((v2i64)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vclo_b(__m128i _1) { return (__m128i)__builtin_lsx_vclo_b((v16i8)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vclo_h(__m128i _1) { return (__m128i)__builtin_lsx_vclo_h((v8i16)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vclo_w(__m128i _1) { return (__m128i)__builtin_lsx_vclo_w((v4i32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vclo_d(__m128i _1) { return (__m128i)__builtin_lsx_vclo_d((v2i64)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vclz_b(__m128i _1) { return (__m128i)__builtin_lsx_vclz_b((v16i8)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vclz_h(__m128i _1) { return (__m128i)__builtin_lsx_vclz_h((v8i16)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vclz_w(__m128i _1) { return (__m128i)__builtin_lsx_vclz_w((v4i32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vclz_d(__m128i _1) { return (__m128i)__builtin_lsx_vclz_d((v2i64)_1); } #define __lsx_vpickve2gr_b(/*__m128i*/ _1, /*ui4*/ _2) \ ((int)__builtin_lsx_vpickve2gr_b((v16i8)(_1), (_2))) #define __lsx_vpickve2gr_h(/*__m128i*/ _1, /*ui3*/ _2) \ ((int)__builtin_lsx_vpickve2gr_h((v8i16)(_1), (_2))) #define __lsx_vpickve2gr_w(/*__m128i*/ _1, /*ui2*/ _2) \ ((int)__builtin_lsx_vpickve2gr_w((v4i32)(_1), (_2))) #define __lsx_vpickve2gr_d(/*__m128i*/ _1, /*ui1*/ _2) \ ((long int)__builtin_lsx_vpickve2gr_d((v2i64)(_1), (_2))) #define __lsx_vpickve2gr_bu(/*__m128i*/ _1, /*ui4*/ _2) \ ((unsigned int)__builtin_lsx_vpickve2gr_bu((v16i8)(_1), (_2))) #define __lsx_vpickve2gr_hu(/*__m128i*/ _1, /*ui3*/ _2) \ ((unsigned int)__builtin_lsx_vpickve2gr_hu((v8i16)(_1), (_2))) #define __lsx_vpickve2gr_wu(/*__m128i*/ _1, /*ui2*/ _2) \ ((unsigned int)__builtin_lsx_vpickve2gr_wu((v4i32)(_1), (_2))) #define __lsx_vpickve2gr_du(/*__m128i*/ _1, /*ui1*/ _2) \ ((unsigned long int)__builtin_lsx_vpickve2gr_du((v2i64)(_1), (_2))) #define __lsx_vinsgr2vr_b(/*__m128i*/ _1, /*int*/ _2, /*ui4*/ _3) \ ((__m128i)__builtin_lsx_vinsgr2vr_b((v16i8)(_1), (int)(_2), (_3))) #define __lsx_vinsgr2vr_h(/*__m128i*/ _1, /*int*/ _2, /*ui3*/ _3) \ ((__m128i)__builtin_lsx_vinsgr2vr_h((v8i16)(_1), (int)(_2), (_3))) #define __lsx_vinsgr2vr_w(/*__m128i*/ _1, /*int*/ _2, /*ui2*/ _3) \ ((__m128i)__builtin_lsx_vinsgr2vr_w((v4i32)(_1), (int)(_2), (_3))) #define __lsx_vinsgr2vr_d(/*__m128i*/ _1, /*long int*/ _2, /*ui1*/ _3) \ ((__m128i)__builtin_lsx_vinsgr2vr_d((v2i64)(_1), (long int)(_2), (_3))) extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 __lsx_vfadd_s(__m128 _1, __m128 _2) { return (__m128)__builtin_lsx_vfadd_s((v4f32)_1, (v4f32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d __lsx_vfadd_d(__m128d _1, __m128d _2) { return (__m128d)__builtin_lsx_vfadd_d((v2f64)_1, (v2f64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 __lsx_vfsub_s(__m128 _1, __m128 _2) { return (__m128)__builtin_lsx_vfsub_s((v4f32)_1, (v4f32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d __lsx_vfsub_d(__m128d _1, __m128d _2) { return (__m128d)__builtin_lsx_vfsub_d((v2f64)_1, (v2f64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 __lsx_vfmul_s(__m128 _1, __m128 _2) { return (__m128)__builtin_lsx_vfmul_s((v4f32)_1, (v4f32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d __lsx_vfmul_d(__m128d _1, __m128d _2) { return (__m128d)__builtin_lsx_vfmul_d((v2f64)_1, (v2f64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 __lsx_vfdiv_s(__m128 _1, __m128 _2) { return (__m128)__builtin_lsx_vfdiv_s((v4f32)_1, (v4f32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d __lsx_vfdiv_d(__m128d _1, __m128d _2) { return (__m128d)__builtin_lsx_vfdiv_d((v2f64)_1, (v2f64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vfcvt_h_s(__m128 _1, __m128 _2) { return (__m128i)__builtin_lsx_vfcvt_h_s((v4f32)_1, (v4f32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 __lsx_vfcvt_s_d(__m128d _1, __m128d _2) { return (__m128)__builtin_lsx_vfcvt_s_d((v2f64)_1, (v2f64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 __lsx_vfmin_s(__m128 _1, __m128 _2) { return (__m128)__builtin_lsx_vfmin_s((v4f32)_1, (v4f32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d __lsx_vfmin_d(__m128d _1, __m128d _2) { return (__m128d)__builtin_lsx_vfmin_d((v2f64)_1, (v2f64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 __lsx_vfmina_s(__m128 _1, __m128 _2) { return (__m128)__builtin_lsx_vfmina_s((v4f32)_1, (v4f32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d __lsx_vfmina_d(__m128d _1, __m128d _2) { return (__m128d)__builtin_lsx_vfmina_d((v2f64)_1, (v2f64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 __lsx_vfmax_s(__m128 _1, __m128 _2) { return (__m128)__builtin_lsx_vfmax_s((v4f32)_1, (v4f32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d __lsx_vfmax_d(__m128d _1, __m128d _2) { return (__m128d)__builtin_lsx_vfmax_d((v2f64)_1, (v2f64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 __lsx_vfmaxa_s(__m128 _1, __m128 _2) { return (__m128)__builtin_lsx_vfmaxa_s((v4f32)_1, (v4f32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d __lsx_vfmaxa_d(__m128d _1, __m128d _2) { return (__m128d)__builtin_lsx_vfmaxa_d((v2f64)_1, (v2f64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vfclass_s(__m128 _1) { return (__m128i)__builtin_lsx_vfclass_s((v4f32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vfclass_d(__m128d _1) { return (__m128i)__builtin_lsx_vfclass_d((v2f64)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 __lsx_vfsqrt_s(__m128 _1) { return (__m128)__builtin_lsx_vfsqrt_s((v4f32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d __lsx_vfsqrt_d(__m128d _1) { return (__m128d)__builtin_lsx_vfsqrt_d((v2f64)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 __lsx_vfrecip_s(__m128 _1) { return (__m128)__builtin_lsx_vfrecip_s((v4f32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d __lsx_vfrecip_d(__m128d _1) { return (__m128d)__builtin_lsx_vfrecip_d((v2f64)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 __lsx_vfrecipe_s(__m128 _1) { return (__m128)__builtin_lsx_vfrecipe_s((v4f32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d __lsx_vfrecipe_d(__m128d _1) { return (__m128d)__builtin_lsx_vfrecipe_d((v2f64)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 __lsx_vfrint_s(__m128 _1) { return (__m128)__builtin_lsx_vfrint_s((v4f32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d __lsx_vfrint_d(__m128d _1) { return (__m128d)__builtin_lsx_vfrint_d((v2f64)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 __lsx_vfrsqrt_s(__m128 _1) { return (__m128)__builtin_lsx_vfrsqrt_s((v4f32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d __lsx_vfrsqrt_d(__m128d _1) { return (__m128d)__builtin_lsx_vfrsqrt_d((v2f64)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 __lsx_vfrsqrte_s(__m128 _1) { return (__m128)__builtin_lsx_vfrsqrte_s((v4f32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d __lsx_vfrsqrte_d(__m128d _1) { return (__m128d)__builtin_lsx_vfrsqrte_d((v2f64)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 __lsx_vflogb_s(__m128 _1) { return (__m128)__builtin_lsx_vflogb_s((v4f32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d __lsx_vflogb_d(__m128d _1) { return (__m128d)__builtin_lsx_vflogb_d((v2f64)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 __lsx_vfcvth_s_h(__m128i _1) { return (__m128)__builtin_lsx_vfcvth_s_h((v8i16)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d __lsx_vfcvth_d_s(__m128 _1) { return (__m128d)__builtin_lsx_vfcvth_d_s((v4f32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 __lsx_vfcvtl_s_h(__m128i _1) { return (__m128)__builtin_lsx_vfcvtl_s_h((v8i16)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d __lsx_vfcvtl_d_s(__m128 _1) { return (__m128d)__builtin_lsx_vfcvtl_d_s((v4f32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vftint_w_s(__m128 _1) { return (__m128i)__builtin_lsx_vftint_w_s((v4f32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vftint_l_d(__m128d _1) { return (__m128i)__builtin_lsx_vftint_l_d((v2f64)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vftint_wu_s(__m128 _1) { return (__m128i)__builtin_lsx_vftint_wu_s((v4f32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vftint_lu_d(__m128d _1) { return (__m128i)__builtin_lsx_vftint_lu_d((v2f64)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vftintrz_w_s(__m128 _1) { return (__m128i)__builtin_lsx_vftintrz_w_s((v4f32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vftintrz_l_d(__m128d _1) { return (__m128i)__builtin_lsx_vftintrz_l_d((v2f64)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vftintrz_wu_s(__m128 _1) { return (__m128i)__builtin_lsx_vftintrz_wu_s((v4f32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vftintrz_lu_d(__m128d _1) { return (__m128i)__builtin_lsx_vftintrz_lu_d((v2f64)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 __lsx_vffint_s_w(__m128i _1) { return (__m128)__builtin_lsx_vffint_s_w((v4i32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d __lsx_vffint_d_l(__m128i _1) { return (__m128d)__builtin_lsx_vffint_d_l((v2i64)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 __lsx_vffint_s_wu(__m128i _1) { return (__m128)__builtin_lsx_vffint_s_wu((v4u32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d __lsx_vffint_d_lu(__m128i _1) { return (__m128d)__builtin_lsx_vffint_d_lu((v2u64)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vandn_v(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vandn_v((v16u8)_1, (v16u8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vneg_b(__m128i _1) { return (__m128i)__builtin_lsx_vneg_b((v16i8)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vneg_h(__m128i _1) { return (__m128i)__builtin_lsx_vneg_h((v8i16)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vneg_w(__m128i _1) { return (__m128i)__builtin_lsx_vneg_w((v4i32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vneg_d(__m128i _1) { return (__m128i)__builtin_lsx_vneg_d((v2i64)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmuh_b(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vmuh_b((v16i8)_1, (v16i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmuh_h(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vmuh_h((v8i16)_1, (v8i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmuh_w(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vmuh_w((v4i32)_1, (v4i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmuh_d(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vmuh_d((v2i64)_1, (v2i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmuh_bu(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vmuh_bu((v16u8)_1, (v16u8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmuh_hu(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vmuh_hu((v8u16)_1, (v8u16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmuh_wu(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vmuh_wu((v4u32)_1, (v4u32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmuh_du(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vmuh_du((v2u64)_1, (v2u64)_2); } #define __lsx_vsllwil_h_b(/*__m128i*/ _1, /*ui3*/ _2) \ ((__m128i)__builtin_lsx_vsllwil_h_b((v16i8)(_1), (_2))) #define __lsx_vsllwil_w_h(/*__m128i*/ _1, /*ui4*/ _2) \ ((__m128i)__builtin_lsx_vsllwil_w_h((v8i16)(_1), (_2))) #define __lsx_vsllwil_d_w(/*__m128i*/ _1, /*ui5*/ _2) \ ((__m128i)__builtin_lsx_vsllwil_d_w((v4i32)(_1), (_2))) #define __lsx_vsllwil_hu_bu(/*__m128i*/ _1, /*ui3*/ _2) \ ((__m128i)__builtin_lsx_vsllwil_hu_bu((v16u8)(_1), (_2))) #define __lsx_vsllwil_wu_hu(/*__m128i*/ _1, /*ui4*/ _2) \ ((__m128i)__builtin_lsx_vsllwil_wu_hu((v8u16)(_1), (_2))) #define __lsx_vsllwil_du_wu(/*__m128i*/ _1, /*ui5*/ _2) \ ((__m128i)__builtin_lsx_vsllwil_du_wu((v4u32)(_1), (_2))) extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vsran_b_h(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vsran_b_h((v8i16)_1, (v8i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vsran_h_w(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vsran_h_w((v4i32)_1, (v4i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vsran_w_d(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vsran_w_d((v2i64)_1, (v2i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vssran_b_h(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vssran_b_h((v8i16)_1, (v8i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vssran_h_w(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vssran_h_w((v4i32)_1, (v4i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vssran_w_d(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vssran_w_d((v2i64)_1, (v2i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vssran_bu_h(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vssran_bu_h((v8u16)_1, (v8u16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vssran_hu_w(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vssran_hu_w((v4u32)_1, (v4u32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vssran_wu_d(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vssran_wu_d((v2u64)_1, (v2u64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vsrarn_b_h(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vsrarn_b_h((v8i16)_1, (v8i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vsrarn_h_w(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vsrarn_h_w((v4i32)_1, (v4i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vsrarn_w_d(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vsrarn_w_d((v2i64)_1, (v2i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vssrarn_b_h(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vssrarn_b_h((v8i16)_1, (v8i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vssrarn_h_w(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vssrarn_h_w((v4i32)_1, (v4i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vssrarn_w_d(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vssrarn_w_d((v2i64)_1, (v2i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vssrarn_bu_h(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vssrarn_bu_h((v8u16)_1, (v8u16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vssrarn_hu_w(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vssrarn_hu_w((v4u32)_1, (v4u32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vssrarn_wu_d(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vssrarn_wu_d((v2u64)_1, (v2u64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vsrln_b_h(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vsrln_b_h((v8i16)_1, (v8i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vsrln_h_w(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vsrln_h_w((v4i32)_1, (v4i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vsrln_w_d(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vsrln_w_d((v2i64)_1, (v2i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vssrln_bu_h(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vssrln_bu_h((v8u16)_1, (v8u16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vssrln_hu_w(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vssrln_hu_w((v4u32)_1, (v4u32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vssrln_wu_d(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vssrln_wu_d((v2u64)_1, (v2u64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vsrlrn_b_h(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vsrlrn_b_h((v8i16)_1, (v8i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vsrlrn_h_w(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vsrlrn_h_w((v4i32)_1, (v4i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vsrlrn_w_d(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vsrlrn_w_d((v2i64)_1, (v2i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vssrlrn_bu_h(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vssrlrn_bu_h((v8u16)_1, (v8u16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vssrlrn_hu_w(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vssrlrn_hu_w((v4u32)_1, (v4u32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vssrlrn_wu_d(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vssrlrn_wu_d((v2u64)_1, (v2u64)_2); } #define __lsx_vfrstpi_b(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ((__m128i)__builtin_lsx_vfrstpi_b((v16i8)(_1), (v16i8)(_2), (_3))) #define __lsx_vfrstpi_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ((__m128i)__builtin_lsx_vfrstpi_h((v8i16)(_1), (v8i16)(_2), (_3))) extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vfrstp_b(__m128i _1, __m128i _2, __m128i _3) { return (__m128i)__builtin_lsx_vfrstp_b((v16i8)_1, (v16i8)_2, (v16i8)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vfrstp_h(__m128i _1, __m128i _2, __m128i _3) { return (__m128i)__builtin_lsx_vfrstp_h((v8i16)_1, (v8i16)_2, (v8i16)_3); } #define __lsx_vshuf4i_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ ((__m128i)__builtin_lsx_vshuf4i_d((v2i64)(_1), (v2i64)(_2), (_3))) #define __lsx_vbsrl_v(/*__m128i*/ _1, /*ui5*/ _2) \ ((__m128i)__builtin_lsx_vbsrl_v((v16i8)(_1), (_2))) #define __lsx_vbsll_v(/*__m128i*/ _1, /*ui5*/ _2) \ ((__m128i)__builtin_lsx_vbsll_v((v16i8)(_1), (_2))) #define __lsx_vextrins_b(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ ((__m128i)__builtin_lsx_vextrins_b((v16i8)(_1), (v16i8)(_2), (_3))) #define __lsx_vextrins_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ ((__m128i)__builtin_lsx_vextrins_h((v8i16)(_1), (v8i16)(_2), (_3))) #define __lsx_vextrins_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ ((__m128i)__builtin_lsx_vextrins_w((v4i32)(_1), (v4i32)(_2), (_3))) #define __lsx_vextrins_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ ((__m128i)__builtin_lsx_vextrins_d((v2i64)(_1), (v2i64)(_2), (_3))) extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmskltz_b(__m128i _1) { return (__m128i)__builtin_lsx_vmskltz_b((v16i8)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmskltz_h(__m128i _1) { return (__m128i)__builtin_lsx_vmskltz_h((v8i16)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmskltz_w(__m128i _1) { return (__m128i)__builtin_lsx_vmskltz_w((v4i32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmskltz_d(__m128i _1) { return (__m128i)__builtin_lsx_vmskltz_d((v2i64)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vsigncov_b(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vsigncov_b((v16i8)_1, (v16i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vsigncov_h(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vsigncov_h((v8i16)_1, (v8i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vsigncov_w(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vsigncov_w((v4i32)_1, (v4i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vsigncov_d(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vsigncov_d((v2i64)_1, (v2i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 __lsx_vfmadd_s(__m128 _1, __m128 _2, __m128 _3) { return (__m128)__builtin_lsx_vfmadd_s((v4f32)_1, (v4f32)_2, (v4f32)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d __lsx_vfmadd_d(__m128d _1, __m128d _2, __m128d _3) { return (__m128d)__builtin_lsx_vfmadd_d((v2f64)_1, (v2f64)_2, (v2f64)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 __lsx_vfmsub_s(__m128 _1, __m128 _2, __m128 _3) { return (__m128)__builtin_lsx_vfmsub_s((v4f32)_1, (v4f32)_2, (v4f32)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d __lsx_vfmsub_d(__m128d _1, __m128d _2, __m128d _3) { return (__m128d)__builtin_lsx_vfmsub_d((v2f64)_1, (v2f64)_2, (v2f64)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 __lsx_vfnmadd_s(__m128 _1, __m128 _2, __m128 _3) { return (__m128)__builtin_lsx_vfnmadd_s((v4f32)_1, (v4f32)_2, (v4f32)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d __lsx_vfnmadd_d(__m128d _1, __m128d _2, __m128d _3) { return (__m128d)__builtin_lsx_vfnmadd_d((v2f64)_1, (v2f64)_2, (v2f64)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 __lsx_vfnmsub_s(__m128 _1, __m128 _2, __m128 _3) { return (__m128)__builtin_lsx_vfnmsub_s((v4f32)_1, (v4f32)_2, (v4f32)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d __lsx_vfnmsub_d(__m128d _1, __m128d _2, __m128d _3) { return (__m128d)__builtin_lsx_vfnmsub_d((v2f64)_1, (v2f64)_2, (v2f64)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vftintrne_w_s(__m128 _1) { return (__m128i)__builtin_lsx_vftintrne_w_s((v4f32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vftintrne_l_d(__m128d _1) { return (__m128i)__builtin_lsx_vftintrne_l_d((v2f64)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vftintrp_w_s(__m128 _1) { return (__m128i)__builtin_lsx_vftintrp_w_s((v4f32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vftintrp_l_d(__m128d _1) { return (__m128i)__builtin_lsx_vftintrp_l_d((v2f64)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vftintrm_w_s(__m128 _1) { return (__m128i)__builtin_lsx_vftintrm_w_s((v4f32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vftintrm_l_d(__m128d _1) { return (__m128i)__builtin_lsx_vftintrm_l_d((v2f64)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vftint_w_d(__m128d _1, __m128d _2) { return (__m128i)__builtin_lsx_vftint_w_d((v2f64)_1, (v2f64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 __lsx_vffint_s_l(__m128i _1, __m128i _2) { return (__m128)__builtin_lsx_vffint_s_l((v2i64)_1, (v2i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vftintrz_w_d(__m128d _1, __m128d _2) { return (__m128i)__builtin_lsx_vftintrz_w_d((v2f64)_1, (v2f64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vftintrp_w_d(__m128d _1, __m128d _2) { return (__m128i)__builtin_lsx_vftintrp_w_d((v2f64)_1, (v2f64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vftintrm_w_d(__m128d _1, __m128d _2) { return (__m128i)__builtin_lsx_vftintrm_w_d((v2f64)_1, (v2f64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vftintrne_w_d(__m128d _1, __m128d _2) { return (__m128i)__builtin_lsx_vftintrne_w_d((v2f64)_1, (v2f64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vftintl_l_s(__m128 _1) { return (__m128i)__builtin_lsx_vftintl_l_s((v4f32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vftinth_l_s(__m128 _1) { return (__m128i)__builtin_lsx_vftinth_l_s((v4f32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d __lsx_vffinth_d_w(__m128i _1) { return (__m128d)__builtin_lsx_vffinth_d_w((v4i32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d __lsx_vffintl_d_w(__m128i _1) { return (__m128d)__builtin_lsx_vffintl_d_w((v4i32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vftintrzl_l_s(__m128 _1) { return (__m128i)__builtin_lsx_vftintrzl_l_s((v4f32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vftintrzh_l_s(__m128 _1) { return (__m128i)__builtin_lsx_vftintrzh_l_s((v4f32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vftintrpl_l_s(__m128 _1) { return (__m128i)__builtin_lsx_vftintrpl_l_s((v4f32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vftintrph_l_s(__m128 _1) { return (__m128i)__builtin_lsx_vftintrph_l_s((v4f32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vftintrml_l_s(__m128 _1) { return (__m128i)__builtin_lsx_vftintrml_l_s((v4f32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vftintrmh_l_s(__m128 _1) { return (__m128i)__builtin_lsx_vftintrmh_l_s((v4f32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vftintrnel_l_s(__m128 _1) { return (__m128i)__builtin_lsx_vftintrnel_l_s((v4f32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vftintrneh_l_s(__m128 _1) { return (__m128i)__builtin_lsx_vftintrneh_l_s((v4f32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 __lsx_vfrintrne_s(__m128 _1) { return (__m128)__builtin_lsx_vfrintrne_s((v4f32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d __lsx_vfrintrne_d(__m128d _1) { return (__m128d)__builtin_lsx_vfrintrne_d((v2f64)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 __lsx_vfrintrz_s(__m128 _1) { return (__m128)__builtin_lsx_vfrintrz_s((v4f32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d __lsx_vfrintrz_d(__m128d _1) { return (__m128d)__builtin_lsx_vfrintrz_d((v2f64)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 __lsx_vfrintrp_s(__m128 _1) { return (__m128)__builtin_lsx_vfrintrp_s((v4f32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d __lsx_vfrintrp_d(__m128d _1) { return (__m128d)__builtin_lsx_vfrintrp_d((v2f64)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 __lsx_vfrintrm_s(__m128 _1) { return (__m128)__builtin_lsx_vfrintrm_s((v4f32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d __lsx_vfrintrm_d(__m128d _1) { return (__m128d)__builtin_lsx_vfrintrm_d((v2f64)_1); } #define __lsx_vstelm_b(/*__m128i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4) \ ((void)__builtin_lsx_vstelm_b((v16i8)(_1), (void *)(_2), (_3), (_4))) #define __lsx_vstelm_h(/*__m128i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4) \ ((void)__builtin_lsx_vstelm_h((v8i16)(_1), (void *)(_2), (_3), (_4))) #define __lsx_vstelm_w(/*__m128i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4) \ ((void)__builtin_lsx_vstelm_w((v4i32)(_1), (void *)(_2), (_3), (_4))) #define __lsx_vstelm_d(/*__m128i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4) \ ((void)__builtin_lsx_vstelm_d((v2i64)(_1), (void *)(_2), (_3), (_4))) extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vaddwev_d_w(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vaddwev_d_w((v4i32)_1, (v4i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vaddwev_w_h(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vaddwev_w_h((v8i16)_1, (v8i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vaddwev_h_b(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vaddwev_h_b((v16i8)_1, (v16i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vaddwod_d_w(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vaddwod_d_w((v4i32)_1, (v4i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vaddwod_w_h(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vaddwod_w_h((v8i16)_1, (v8i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vaddwod_h_b(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vaddwod_h_b((v16i8)_1, (v16i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vaddwev_d_wu(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vaddwev_d_wu((v4u32)_1, (v4u32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vaddwev_w_hu(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vaddwev_w_hu((v8u16)_1, (v8u16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vaddwev_h_bu(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vaddwev_h_bu((v16u8)_1, (v16u8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vaddwod_d_wu(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vaddwod_d_wu((v4u32)_1, (v4u32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vaddwod_w_hu(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vaddwod_w_hu((v8u16)_1, (v8u16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vaddwod_h_bu(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vaddwod_h_bu((v16u8)_1, (v16u8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vaddwev_d_wu_w(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vaddwev_d_wu_w((v4u32)_1, (v4i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vaddwev_w_hu_h(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vaddwev_w_hu_h((v8u16)_1, (v8i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vaddwev_h_bu_b(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vaddwev_h_bu_b((v16u8)_1, (v16i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vaddwod_d_wu_w(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vaddwod_d_wu_w((v4u32)_1, (v4i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vaddwod_w_hu_h(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vaddwod_w_hu_h((v8u16)_1, (v8i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vaddwod_h_bu_b(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vaddwod_h_bu_b((v16u8)_1, (v16i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vsubwev_d_w(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vsubwev_d_w((v4i32)_1, (v4i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vsubwev_w_h(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vsubwev_w_h((v8i16)_1, (v8i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vsubwev_h_b(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vsubwev_h_b((v16i8)_1, (v16i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vsubwod_d_w(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vsubwod_d_w((v4i32)_1, (v4i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vsubwod_w_h(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vsubwod_w_h((v8i16)_1, (v8i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vsubwod_h_b(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vsubwod_h_b((v16i8)_1, (v16i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vsubwev_d_wu(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vsubwev_d_wu((v4u32)_1, (v4u32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vsubwev_w_hu(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vsubwev_w_hu((v8u16)_1, (v8u16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vsubwev_h_bu(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vsubwev_h_bu((v16u8)_1, (v16u8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vsubwod_d_wu(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vsubwod_d_wu((v4u32)_1, (v4u32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vsubwod_w_hu(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vsubwod_w_hu((v8u16)_1, (v8u16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vsubwod_h_bu(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vsubwod_h_bu((v16u8)_1, (v16u8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vaddwev_q_d(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vaddwev_q_d((v2i64)_1, (v2i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vaddwod_q_d(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vaddwod_q_d((v2i64)_1, (v2i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vaddwev_q_du(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vaddwev_q_du((v2u64)_1, (v2u64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vaddwod_q_du(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vaddwod_q_du((v2u64)_1, (v2u64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vsubwev_q_d(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vsubwev_q_d((v2i64)_1, (v2i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vsubwod_q_d(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vsubwod_q_d((v2i64)_1, (v2i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vsubwev_q_du(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vsubwev_q_du((v2u64)_1, (v2u64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vsubwod_q_du(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vsubwod_q_du((v2u64)_1, (v2u64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vaddwev_q_du_d(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vaddwev_q_du_d((v2u64)_1, (v2i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vaddwod_q_du_d(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vaddwod_q_du_d((v2u64)_1, (v2i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmulwev_d_w(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vmulwev_d_w((v4i32)_1, (v4i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmulwev_w_h(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vmulwev_w_h((v8i16)_1, (v8i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmulwev_h_b(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vmulwev_h_b((v16i8)_1, (v16i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmulwod_d_w(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vmulwod_d_w((v4i32)_1, (v4i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmulwod_w_h(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vmulwod_w_h((v8i16)_1, (v8i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmulwod_h_b(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vmulwod_h_b((v16i8)_1, (v16i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmulwev_d_wu(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vmulwev_d_wu((v4u32)_1, (v4u32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmulwev_w_hu(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vmulwev_w_hu((v8u16)_1, (v8u16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmulwev_h_bu(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vmulwev_h_bu((v16u8)_1, (v16u8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmulwod_d_wu(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vmulwod_d_wu((v4u32)_1, (v4u32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmulwod_w_hu(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vmulwod_w_hu((v8u16)_1, (v8u16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmulwod_h_bu(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vmulwod_h_bu((v16u8)_1, (v16u8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmulwev_d_wu_w(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vmulwev_d_wu_w((v4u32)_1, (v4i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmulwev_w_hu_h(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vmulwev_w_hu_h((v8u16)_1, (v8i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmulwev_h_bu_b(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vmulwev_h_bu_b((v16u8)_1, (v16i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmulwod_d_wu_w(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vmulwod_d_wu_w((v4u32)_1, (v4i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmulwod_w_hu_h(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vmulwod_w_hu_h((v8u16)_1, (v8i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmulwod_h_bu_b(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vmulwod_h_bu_b((v16u8)_1, (v16i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmulwev_q_d(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vmulwev_q_d((v2i64)_1, (v2i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmulwod_q_d(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vmulwod_q_d((v2i64)_1, (v2i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmulwev_q_du(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vmulwev_q_du((v2u64)_1, (v2u64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmulwod_q_du(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vmulwod_q_du((v2u64)_1, (v2u64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmulwev_q_du_d(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vmulwev_q_du_d((v2u64)_1, (v2i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmulwod_q_du_d(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vmulwod_q_du_d((v2u64)_1, (v2i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vhaddw_q_d(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vhaddw_q_d((v2i64)_1, (v2i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vhaddw_qu_du(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vhaddw_qu_du((v2u64)_1, (v2u64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vhsubw_q_d(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vhsubw_q_d((v2i64)_1, (v2i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vhsubw_qu_du(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vhsubw_qu_du((v2u64)_1, (v2u64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmaddwev_d_w(__m128i _1, __m128i _2, __m128i _3) { return (__m128i)__builtin_lsx_vmaddwev_d_w((v2i64)_1, (v4i32)_2, (v4i32)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmaddwev_w_h(__m128i _1, __m128i _2, __m128i _3) { return (__m128i)__builtin_lsx_vmaddwev_w_h((v4i32)_1, (v8i16)_2, (v8i16)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmaddwev_h_b(__m128i _1, __m128i _2, __m128i _3) { return (__m128i)__builtin_lsx_vmaddwev_h_b((v8i16)_1, (v16i8)_2, (v16i8)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmaddwev_d_wu(__m128i _1, __m128i _2, __m128i _3) { return (__m128i)__builtin_lsx_vmaddwev_d_wu((v2u64)_1, (v4u32)_2, (v4u32)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmaddwev_w_hu(__m128i _1, __m128i _2, __m128i _3) { return (__m128i)__builtin_lsx_vmaddwev_w_hu((v4u32)_1, (v8u16)_2, (v8u16)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmaddwev_h_bu(__m128i _1, __m128i _2, __m128i _3) { return (__m128i)__builtin_lsx_vmaddwev_h_bu((v8u16)_1, (v16u8)_2, (v16u8)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmaddwod_d_w(__m128i _1, __m128i _2, __m128i _3) { return (__m128i)__builtin_lsx_vmaddwod_d_w((v2i64)_1, (v4i32)_2, (v4i32)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmaddwod_w_h(__m128i _1, __m128i _2, __m128i _3) { return (__m128i)__builtin_lsx_vmaddwod_w_h((v4i32)_1, (v8i16)_2, (v8i16)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmaddwod_h_b(__m128i _1, __m128i _2, __m128i _3) { return (__m128i)__builtin_lsx_vmaddwod_h_b((v8i16)_1, (v16i8)_2, (v16i8)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmaddwod_d_wu(__m128i _1, __m128i _2, __m128i _3) { return (__m128i)__builtin_lsx_vmaddwod_d_wu((v2u64)_1, (v4u32)_2, (v4u32)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmaddwod_w_hu(__m128i _1, __m128i _2, __m128i _3) { return (__m128i)__builtin_lsx_vmaddwod_w_hu((v4u32)_1, (v8u16)_2, (v8u16)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmaddwod_h_bu(__m128i _1, __m128i _2, __m128i _3) { return (__m128i)__builtin_lsx_vmaddwod_h_bu((v8u16)_1, (v16u8)_2, (v16u8)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmaddwev_d_wu_w(__m128i _1, __m128i _2, __m128i _3) { return (__m128i)__builtin_lsx_vmaddwev_d_wu_w((v2i64)_1, (v4u32)_2, (v4i32)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmaddwev_w_hu_h(__m128i _1, __m128i _2, __m128i _3) { return (__m128i)__builtin_lsx_vmaddwev_w_hu_h((v4i32)_1, (v8u16)_2, (v8i16)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmaddwev_h_bu_b(__m128i _1, __m128i _2, __m128i _3) { return (__m128i)__builtin_lsx_vmaddwev_h_bu_b((v8i16)_1, (v16u8)_2, (v16i8)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmaddwod_d_wu_w(__m128i _1, __m128i _2, __m128i _3) { return (__m128i)__builtin_lsx_vmaddwod_d_wu_w((v2i64)_1, (v4u32)_2, (v4i32)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmaddwod_w_hu_h(__m128i _1, __m128i _2, __m128i _3) { return (__m128i)__builtin_lsx_vmaddwod_w_hu_h((v4i32)_1, (v8u16)_2, (v8i16)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmaddwod_h_bu_b(__m128i _1, __m128i _2, __m128i _3) { return (__m128i)__builtin_lsx_vmaddwod_h_bu_b((v8i16)_1, (v16u8)_2, (v16i8)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmaddwev_q_d(__m128i _1, __m128i _2, __m128i _3) { return (__m128i)__builtin_lsx_vmaddwev_q_d((v2i64)_1, (v2i64)_2, (v2i64)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmaddwod_q_d(__m128i _1, __m128i _2, __m128i _3) { return (__m128i)__builtin_lsx_vmaddwod_q_d((v2i64)_1, (v2i64)_2, (v2i64)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmaddwev_q_du(__m128i _1, __m128i _2, __m128i _3) { return (__m128i)__builtin_lsx_vmaddwev_q_du((v2u64)_1, (v2u64)_2, (v2u64)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmaddwod_q_du(__m128i _1, __m128i _2, __m128i _3) { return (__m128i)__builtin_lsx_vmaddwod_q_du((v2u64)_1, (v2u64)_2, (v2u64)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmaddwev_q_du_d(__m128i _1, __m128i _2, __m128i _3) { return (__m128i)__builtin_lsx_vmaddwev_q_du_d((v2i64)_1, (v2u64)_2, (v2i64)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmaddwod_q_du_d(__m128i _1, __m128i _2, __m128i _3) { return (__m128i)__builtin_lsx_vmaddwod_q_du_d((v2i64)_1, (v2u64)_2, (v2i64)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vrotr_b(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vrotr_b((v16i8)_1, (v16i8)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vrotr_h(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vrotr_h((v8i16)_1, (v8i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vrotr_w(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vrotr_w((v4i32)_1, (v4i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vrotr_d(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vrotr_d((v2i64)_1, (v2i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vadd_q(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vadd_q((v2i64)_1, (v2i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vsub_q(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vsub_q((v2i64)_1, (v2i64)_2); } #define __lsx_vldrepl_b(/*void **/ _1, /*si12*/ _2) \ ((__m128i)__builtin_lsx_vldrepl_b((void const *)(_1), (_2))) #define __lsx_vldrepl_h(/*void **/ _1, /*si11*/ _2) \ ((__m128i)__builtin_lsx_vldrepl_h((void const *)(_1), (_2))) #define __lsx_vldrepl_w(/*void **/ _1, /*si10*/ _2) \ ((__m128i)__builtin_lsx_vldrepl_w((void const *)(_1), (_2))) #define __lsx_vldrepl_d(/*void **/ _1, /*si9*/ _2) \ ((__m128i)__builtin_lsx_vldrepl_d((void const *)(_1), (_2))) extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmskgez_b(__m128i _1) { return (__m128i)__builtin_lsx_vmskgez_b((v16i8)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vmsknz_b(__m128i _1) { return (__m128i)__builtin_lsx_vmsknz_b((v16i8)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vexth_h_b(__m128i _1) { return (__m128i)__builtin_lsx_vexth_h_b((v16i8)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vexth_w_h(__m128i _1) { return (__m128i)__builtin_lsx_vexth_w_h((v8i16)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vexth_d_w(__m128i _1) { return (__m128i)__builtin_lsx_vexth_d_w((v4i32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vexth_q_d(__m128i _1) { return (__m128i)__builtin_lsx_vexth_q_d((v2i64)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vexth_hu_bu(__m128i _1) { return (__m128i)__builtin_lsx_vexth_hu_bu((v16u8)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vexth_wu_hu(__m128i _1) { return (__m128i)__builtin_lsx_vexth_wu_hu((v8u16)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vexth_du_wu(__m128i _1) { return (__m128i)__builtin_lsx_vexth_du_wu((v4u32)_1); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vexth_qu_du(__m128i _1) { return (__m128i)__builtin_lsx_vexth_qu_du((v2u64)_1); } #define __lsx_vrotri_b(/*__m128i*/ _1, /*ui3*/ _2) \ ((__m128i)__builtin_lsx_vrotri_b((v16i8)(_1), (_2))) #define __lsx_vrotri_h(/*__m128i*/ _1, /*ui4*/ _2) \ ((__m128i)__builtin_lsx_vrotri_h((v8i16)(_1), (_2))) #define __lsx_vrotri_w(/*__m128i*/ _1, /*ui5*/ _2) \ ((__m128i)__builtin_lsx_vrotri_w((v4i32)(_1), (_2))) #define __lsx_vrotri_d(/*__m128i*/ _1, /*ui6*/ _2) \ ((__m128i)__builtin_lsx_vrotri_d((v2i64)(_1), (_2))) extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vextl_q_d(__m128i _1) { return (__m128i)__builtin_lsx_vextl_q_d((v2i64)_1); } #define __lsx_vsrlni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ((__m128i)__builtin_lsx_vsrlni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) #define __lsx_vsrlni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ((__m128i)__builtin_lsx_vsrlni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) #define __lsx_vsrlni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ((__m128i)__builtin_lsx_vsrlni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) #define __lsx_vsrlni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ((__m128i)__builtin_lsx_vsrlni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) #define __lsx_vsrlrni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ((__m128i)__builtin_lsx_vsrlrni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) #define __lsx_vsrlrni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ((__m128i)__builtin_lsx_vsrlrni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) #define __lsx_vsrlrni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ((__m128i)__builtin_lsx_vsrlrni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) #define __lsx_vsrlrni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ((__m128i)__builtin_lsx_vsrlrni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) #define __lsx_vssrlni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ((__m128i)__builtin_lsx_vssrlni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) #define __lsx_vssrlni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ((__m128i)__builtin_lsx_vssrlni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) #define __lsx_vssrlni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ((__m128i)__builtin_lsx_vssrlni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) #define __lsx_vssrlni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ((__m128i)__builtin_lsx_vssrlni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) #define __lsx_vssrlni_bu_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ((__m128i)__builtin_lsx_vssrlni_bu_h((v16u8)(_1), (v16i8)(_2), (_3))) #define __lsx_vssrlni_hu_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ((__m128i)__builtin_lsx_vssrlni_hu_w((v8u16)(_1), (v8i16)(_2), (_3))) #define __lsx_vssrlni_wu_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ((__m128i)__builtin_lsx_vssrlni_wu_d((v4u32)(_1), (v4i32)(_2), (_3))) #define __lsx_vssrlni_du_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ((__m128i)__builtin_lsx_vssrlni_du_q((v2u64)(_1), (v2i64)(_2), (_3))) #define __lsx_vssrlrni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ((__m128i)__builtin_lsx_vssrlrni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) #define __lsx_vssrlrni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ((__m128i)__builtin_lsx_vssrlrni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) #define __lsx_vssrlrni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ((__m128i)__builtin_lsx_vssrlrni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) #define __lsx_vssrlrni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ((__m128i)__builtin_lsx_vssrlrni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) #define __lsx_vssrlrni_bu_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ((__m128i)__builtin_lsx_vssrlrni_bu_h((v16u8)(_1), (v16i8)(_2), (_3))) #define __lsx_vssrlrni_hu_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ((__m128i)__builtin_lsx_vssrlrni_hu_w((v8u16)(_1), (v8i16)(_2), (_3))) #define __lsx_vssrlrni_wu_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ((__m128i)__builtin_lsx_vssrlrni_wu_d((v4u32)(_1), (v4i32)(_2), (_3))) #define __lsx_vssrlrni_du_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ((__m128i)__builtin_lsx_vssrlrni_du_q((v2u64)(_1), (v2i64)(_2), (_3))) #define __lsx_vsrani_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ((__m128i)__builtin_lsx_vsrani_b_h((v16i8)(_1), (v16i8)(_2), (_3))) #define __lsx_vsrani_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ((__m128i)__builtin_lsx_vsrani_h_w((v8i16)(_1), (v8i16)(_2), (_3))) #define __lsx_vsrani_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ((__m128i)__builtin_lsx_vsrani_w_d((v4i32)(_1), (v4i32)(_2), (_3))) #define __lsx_vsrani_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ((__m128i)__builtin_lsx_vsrani_d_q((v2i64)(_1), (v2i64)(_2), (_3))) #define __lsx_vsrarni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ((__m128i)__builtin_lsx_vsrarni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) #define __lsx_vsrarni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ((__m128i)__builtin_lsx_vsrarni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) #define __lsx_vsrarni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ((__m128i)__builtin_lsx_vsrarni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) #define __lsx_vsrarni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ((__m128i)__builtin_lsx_vsrarni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) #define __lsx_vssrani_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ((__m128i)__builtin_lsx_vssrani_b_h((v16i8)(_1), (v16i8)(_2), (_3))) #define __lsx_vssrani_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ((__m128i)__builtin_lsx_vssrani_h_w((v8i16)(_1), (v8i16)(_2), (_3))) #define __lsx_vssrani_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ((__m128i)__builtin_lsx_vssrani_w_d((v4i32)(_1), (v4i32)(_2), (_3))) #define __lsx_vssrani_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ((__m128i)__builtin_lsx_vssrani_d_q((v2i64)(_1), (v2i64)(_2), (_3))) #define __lsx_vssrani_bu_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ((__m128i)__builtin_lsx_vssrani_bu_h((v16u8)(_1), (v16i8)(_2), (_3))) #define __lsx_vssrani_hu_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ((__m128i)__builtin_lsx_vssrani_hu_w((v8u16)(_1), (v8i16)(_2), (_3))) #define __lsx_vssrani_wu_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ((__m128i)__builtin_lsx_vssrani_wu_d((v4u32)(_1), (v4i32)(_2), (_3))) #define __lsx_vssrani_du_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ((__m128i)__builtin_lsx_vssrani_du_q((v2u64)(_1), (v2i64)(_2), (_3))) #define __lsx_vssrarni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ((__m128i)__builtin_lsx_vssrarni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) #define __lsx_vssrarni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ((__m128i)__builtin_lsx_vssrarni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) #define __lsx_vssrarni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ((__m128i)__builtin_lsx_vssrarni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) #define __lsx_vssrarni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ((__m128i)__builtin_lsx_vssrarni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) #define __lsx_vssrarni_bu_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ((__m128i)__builtin_lsx_vssrarni_bu_h((v16u8)(_1), (v16i8)(_2), (_3))) #define __lsx_vssrarni_hu_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ((__m128i)__builtin_lsx_vssrarni_hu_w((v8u16)(_1), (v8i16)(_2), (_3))) #define __lsx_vssrarni_wu_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ((__m128i)__builtin_lsx_vssrarni_wu_d((v4u32)(_1), (v4i32)(_2), (_3))) #define __lsx_vssrarni_du_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ((__m128i)__builtin_lsx_vssrarni_du_q((v2u64)(_1), (v2i64)(_2), (_3))) #define __lsx_vpermi_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ ((__m128i)__builtin_lsx_vpermi_w((v4i32)(_1), (v4i32)(_2), (_3))) #define __lsx_vld(/*void **/ _1, /*si12*/ _2) \ ((__m128i)__builtin_lsx_vld((void const *)(_1), (_2))) #define __lsx_vst(/*__m128i*/ _1, /*void **/ _2, /*si12*/ _3) \ ((void)__builtin_lsx_vst((v16i8)(_1), (void *)(_2), (_3))) extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vssrlrn_b_h(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vssrlrn_b_h((v8i16)_1, (v8i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vssrlrn_h_w(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vssrlrn_h_w((v4i32)_1, (v4i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vssrlrn_w_d(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vssrlrn_w_d((v2i64)_1, (v2i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vssrln_b_h(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vssrln_b_h((v8i16)_1, (v8i16)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vssrln_h_w(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vssrln_h_w((v4i32)_1, (v4i32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vssrln_w_d(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vssrln_w_d((v2i64)_1, (v2i64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vorn_v(__m128i _1, __m128i _2) { return (__m128i)__builtin_lsx_vorn_v((v16i8)_1, (v16i8)_2); } #define __lsx_vldi(/*i13*/ _1) ((__m128i)__builtin_lsx_vldi((_1))) extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vshuf_b(__m128i _1, __m128i _2, __m128i _3) { return (__m128i)__builtin_lsx_vshuf_b((v16i8)_1, (v16i8)_2, (v16i8)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vldx(void const *_1, long int _2) { return (__m128i)__builtin_lsx_vldx((void const *)_1, (long int)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) void __lsx_vstx(__m128i _1, void *_2, long int _3) { return (void)__builtin_lsx_vstx((v16i8)_1, (void *)_2, (long int)_3); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vextl_qu_du(__m128i _1) { return (__m128i)__builtin_lsx_vextl_qu_du((v2u64)_1); } #define __lsx_bnz_b(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_b((v16u8)(_1))) #define __lsx_bnz_d(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_d((v2u64)(_1))) #define __lsx_bnz_h(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_h((v8u16)(_1))) #define __lsx_bnz_v(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_v((v16u8)(_1))) #define __lsx_bnz_w(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_w((v4u32)(_1))) #define __lsx_bz_b(/*__m128i*/ _1) ((int)__builtin_lsx_bz_b((v16u8)(_1))) #define __lsx_bz_d(/*__m128i*/ _1) ((int)__builtin_lsx_bz_d((v2u64)(_1))) #define __lsx_bz_h(/*__m128i*/ _1) ((int)__builtin_lsx_bz_h((v8u16)(_1))) #define __lsx_bz_v(/*__m128i*/ _1) ((int)__builtin_lsx_bz_v((v16u8)(_1))) #define __lsx_bz_w(/*__m128i*/ _1) ((int)__builtin_lsx_bz_w((v4u32)(_1))) extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vfcmp_caf_d(__m128d _1, __m128d _2) { return (__m128i)__builtin_lsx_vfcmp_caf_d((v2f64)_1, (v2f64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vfcmp_caf_s(__m128 _1, __m128 _2) { return (__m128i)__builtin_lsx_vfcmp_caf_s((v4f32)_1, (v4f32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vfcmp_ceq_d(__m128d _1, __m128d _2) { return (__m128i)__builtin_lsx_vfcmp_ceq_d((v2f64)_1, (v2f64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vfcmp_ceq_s(__m128 _1, __m128 _2) { return (__m128i)__builtin_lsx_vfcmp_ceq_s((v4f32)_1, (v4f32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vfcmp_cle_d(__m128d _1, __m128d _2) { return (__m128i)__builtin_lsx_vfcmp_cle_d((v2f64)_1, (v2f64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vfcmp_cle_s(__m128 _1, __m128 _2) { return (__m128i)__builtin_lsx_vfcmp_cle_s((v4f32)_1, (v4f32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vfcmp_clt_d(__m128d _1, __m128d _2) { return (__m128i)__builtin_lsx_vfcmp_clt_d((v2f64)_1, (v2f64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vfcmp_clt_s(__m128 _1, __m128 _2) { return (__m128i)__builtin_lsx_vfcmp_clt_s((v4f32)_1, (v4f32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vfcmp_cne_d(__m128d _1, __m128d _2) { return (__m128i)__builtin_lsx_vfcmp_cne_d((v2f64)_1, (v2f64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vfcmp_cne_s(__m128 _1, __m128 _2) { return (__m128i)__builtin_lsx_vfcmp_cne_s((v4f32)_1, (v4f32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vfcmp_cor_d(__m128d _1, __m128d _2) { return (__m128i)__builtin_lsx_vfcmp_cor_d((v2f64)_1, (v2f64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vfcmp_cor_s(__m128 _1, __m128 _2) { return (__m128i)__builtin_lsx_vfcmp_cor_s((v4f32)_1, (v4f32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vfcmp_cueq_d(__m128d _1, __m128d _2) { return (__m128i)__builtin_lsx_vfcmp_cueq_d((v2f64)_1, (v2f64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vfcmp_cueq_s(__m128 _1, __m128 _2) { return (__m128i)__builtin_lsx_vfcmp_cueq_s((v4f32)_1, (v4f32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vfcmp_cule_d(__m128d _1, __m128d _2) { return (__m128i)__builtin_lsx_vfcmp_cule_d((v2f64)_1, (v2f64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vfcmp_cule_s(__m128 _1, __m128 _2) { return (__m128i)__builtin_lsx_vfcmp_cule_s((v4f32)_1, (v4f32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vfcmp_cult_d(__m128d _1, __m128d _2) { return (__m128i)__builtin_lsx_vfcmp_cult_d((v2f64)_1, (v2f64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vfcmp_cult_s(__m128 _1, __m128 _2) { return (__m128i)__builtin_lsx_vfcmp_cult_s((v4f32)_1, (v4f32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vfcmp_cun_d(__m128d _1, __m128d _2) { return (__m128i)__builtin_lsx_vfcmp_cun_d((v2f64)_1, (v2f64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vfcmp_cune_d(__m128d _1, __m128d _2) { return (__m128i)__builtin_lsx_vfcmp_cune_d((v2f64)_1, (v2f64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vfcmp_cune_s(__m128 _1, __m128 _2) { return (__m128i)__builtin_lsx_vfcmp_cune_s((v4f32)_1, (v4f32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vfcmp_cun_s(__m128 _1, __m128 _2) { return (__m128i)__builtin_lsx_vfcmp_cun_s((v4f32)_1, (v4f32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vfcmp_saf_d(__m128d _1, __m128d _2) { return (__m128i)__builtin_lsx_vfcmp_saf_d((v2f64)_1, (v2f64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vfcmp_saf_s(__m128 _1, __m128 _2) { return (__m128i)__builtin_lsx_vfcmp_saf_s((v4f32)_1, (v4f32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vfcmp_seq_d(__m128d _1, __m128d _2) { return (__m128i)__builtin_lsx_vfcmp_seq_d((v2f64)_1, (v2f64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vfcmp_seq_s(__m128 _1, __m128 _2) { return (__m128i)__builtin_lsx_vfcmp_seq_s((v4f32)_1, (v4f32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vfcmp_sle_d(__m128d _1, __m128d _2) { return (__m128i)__builtin_lsx_vfcmp_sle_d((v2f64)_1, (v2f64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vfcmp_sle_s(__m128 _1, __m128 _2) { return (__m128i)__builtin_lsx_vfcmp_sle_s((v4f32)_1, (v4f32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vfcmp_slt_d(__m128d _1, __m128d _2) { return (__m128i)__builtin_lsx_vfcmp_slt_d((v2f64)_1, (v2f64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vfcmp_slt_s(__m128 _1, __m128 _2) { return (__m128i)__builtin_lsx_vfcmp_slt_s((v4f32)_1, (v4f32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vfcmp_sne_d(__m128d _1, __m128d _2) { return (__m128i)__builtin_lsx_vfcmp_sne_d((v2f64)_1, (v2f64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vfcmp_sne_s(__m128 _1, __m128 _2) { return (__m128i)__builtin_lsx_vfcmp_sne_s((v4f32)_1, (v4f32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vfcmp_sor_d(__m128d _1, __m128d _2) { return (__m128i)__builtin_lsx_vfcmp_sor_d((v2f64)_1, (v2f64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vfcmp_sor_s(__m128 _1, __m128 _2) { return (__m128i)__builtin_lsx_vfcmp_sor_s((v4f32)_1, (v4f32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vfcmp_sueq_d(__m128d _1, __m128d _2) { return (__m128i)__builtin_lsx_vfcmp_sueq_d((v2f64)_1, (v2f64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vfcmp_sueq_s(__m128 _1, __m128 _2) { return (__m128i)__builtin_lsx_vfcmp_sueq_s((v4f32)_1, (v4f32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vfcmp_sule_d(__m128d _1, __m128d _2) { return (__m128i)__builtin_lsx_vfcmp_sule_d((v2f64)_1, (v2f64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vfcmp_sule_s(__m128 _1, __m128 _2) { return (__m128i)__builtin_lsx_vfcmp_sule_s((v4f32)_1, (v4f32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vfcmp_sult_d(__m128d _1, __m128d _2) { return (__m128i)__builtin_lsx_vfcmp_sult_d((v2f64)_1, (v2f64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vfcmp_sult_s(__m128 _1, __m128 _2) { return (__m128i)__builtin_lsx_vfcmp_sult_s((v4f32)_1, (v4f32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vfcmp_sun_d(__m128d _1, __m128d _2) { return (__m128i)__builtin_lsx_vfcmp_sun_d((v2f64)_1, (v2f64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vfcmp_sune_d(__m128d _1, __m128d _2) { return (__m128i)__builtin_lsx_vfcmp_sune_d((v2f64)_1, (v2f64)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vfcmp_sune_s(__m128 _1, __m128 _2) { return (__m128i)__builtin_lsx_vfcmp_sune_s((v4f32)_1, (v4f32)_2); } extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vfcmp_sun_s(__m128 _1, __m128 _2) { return (__m128i)__builtin_lsx_vfcmp_sun_s((v4f32)_1, (v4f32)_2); } #define __lsx_vrepli_b(/*si10*/ _1) ((__m128i)__builtin_lsx_vrepli_b((_1))) #define __lsx_vrepli_d(/*si10*/ _1) ((__m128i)__builtin_lsx_vrepli_d((_1))) #define __lsx_vrepli_h(/*si10*/ _1) ((__m128i)__builtin_lsx_vrepli_h((_1))) #define __lsx_vrepli_w(/*si10*/ _1) ((__m128i)__builtin_lsx_vrepli_w((_1))) #endif /* defined(__loongarch_sx) */ #endif /* _LOONGSON_SXINTRIN_H */ riscv_bitmanip.h/*===- __clang_openmp_device_functions.h - OpenMP device function declares -=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __CLANG_OPENMP_DEVICE_FUNCTIONS_H__ #define __CLANG_OPENMP_DEVICE_FUNCTIONS_H__ #ifndef _OPENMP #error "This file is for OpenMP compilation only." #endif #ifdef __cplusplus extern "C" { #endif #pragma omp begin declare variant match( \ device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)}) #define __CUDA__ #define __OPENMP_NVPTX__ /// Include declarations for libdevice functions. #include <__clang_cuda_libdevice_declares.h> /// Provide definitions for these functions. #include <__clang_cuda_device_functions.h> #undef __OPENMP_NVPTX__ #undef __CUDA__ #pragma omp end declare variant #ifdef __AMDGCN__ #pragma omp begin declare variant match(device = {arch(amdgcn)}) // Import types which will be used by __clang_hip_libdevice_declares.h #ifndef __cplusplus #include #endif #define __OPENMP_AMDGCN__ #pragma push_macro("__device__") #define __device__ /// Include declarations for libdevice functions. #include <__clang_hip_libdevice_declares.h> #pragma pop_macro("__device__") #undef __OPENMP_AMDGCN__ #pragma omp end declare variant #endif #ifdef __cplusplus } // extern "C" #endif // Ensure we make `_ZdlPv`, aka. `operator delete(void*)` available without the // need to `include ` in C++ mode. #ifdef __cplusplus // We require malloc/free. #include #pragma push_macro("OPENMP_NOEXCEPT") #if __cplusplus >= 201103L #define OPENMP_NOEXCEPT noexcept #else #define OPENMP_NOEXCEPT #endif // Device overrides for non-placement new and delete. inline void *operator new(__SIZE_TYPE__ size) { if (size == 0) size = 1; return ::malloc(size); } inline void *operator new[](__SIZE_TYPE__ size) { return ::operator new(size); } inline void operator delete(void *ptr)OPENMP_NOEXCEPT { ::free(ptr); } inline void operator delete[](void *ptr) OPENMP_NOEXCEPT { ::operator delete(ptr); } // Sized delete, C++14 only. #if __cplusplus >= 201402L inline void operator delete(void *ptr, __SIZE_TYPE__ size)OPENMP_NOEXCEPT { ::operator delete(ptr); } inline void operator delete[](void *ptr, __SIZE_TYPE__ size) OPENMP_NOEXCEPT { ::operator delete(ptr); } #endif #pragma pop_macro("OPENMP_NOEXCEPT") #endif #endif //===-- xray_log_interface.h ----------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file is a part of XRay, a function call tracing system. // // APIs for installing a new logging implementation. // //===----------------------------------------------------------------------===// /// /// XRay allows users to implement their own logging handlers and install them /// to replace the default runtime-controllable implementation that comes with /// compiler-rt/xray. The "flight data recorder" (FDR) mode implementation uses /// this API to install itself in an XRay-enabled binary. See /// compiler-rt/lib/xray_fdr_logging.{h,cc} for details of that implementation. /// /// The high-level usage pattern for these APIs look like the following: /// /// // We choose the mode which we'd like to install, and check whether this /// // has succeeded. Each mode will have their own set of flags they will /// // support, outside of the global XRay configuration options that are /// // defined in the XRAY_OPTIONS environment variable. /// auto select_status = __xray_log_select_mode("xray-fdr"); /// if (select_status != XRayLogRegisterStatus::XRAY_REGISTRATION_OK) { /// // This failed, we should not proceed with attempting to initialise /// // the currently selected mode. /// return; /// } /// /// // Once that's done, we can now attempt to configure the implementation. /// // To do this, we provide the string flags configuration for the mode. /// auto config_status = __xray_log_init_mode( /// "xray-fdr", "verbosity=1 some_flag=1 another_flag=2"); /// if (config_status != XRayLogInitStatus::XRAY_LOG_INITIALIZED) { /// // deal with the error here, if there is one. /// } /// /// // When the log implementation has had the chance to initialize, we can /// // now patch the instrumentation points. Note that we could have patched /// // the instrumentation points first, but there's no strict ordering to /// // these operations. /// auto patch_status = __xray_patch(); /// if (patch_status != XRayPatchingStatus::SUCCESS) { /// // deal with the error here, if it is an error. /// } /// /// // If we want to stop the implementation, we can then finalize it (before /// // optionally flushing the log). /// auto fin_status = __xray_log_finalize(); /// if (fin_status != XRayLogInitStatus::XRAY_LOG_FINALIZED) { /// // deal with the error here, if it is an error. /// } /// /// // We can optionally wait before flushing the log to give other threads a /// // chance to see that the implementation is already finalized. Also, at /// // this point we can optionally unpatch the instrumentation points to /// // reduce overheads at runtime. /// auto unpatch_status = __xray_unpatch(); /// if (unpatch_status != XRayPatchingStatus::SUCCESS) { /// // deal with the error here, if it is an error. /// } /// /// // If there are logs or data to be flushed somewhere, we can do so only /// // after we've finalized the log. Some implementations may not actually /// // have anything to log (it might keep the data in memory, or periodically /// // be logging the data anyway). /// auto flush_status = __xray_log_flushLog(); /// if (flush_status != XRayLogFlushStatus::XRAY_LOG_FLUSHED) { /// // deal with the error here, if it is an error. /// } /// /// // Alternatively, we can go through the buffers ourselves without /// // relying on the implementations' flushing semantics (if the /// // implementation supports exporting this data directly). /// auto MyBufferProcessor = +[](const char* mode, XRayBuffer buffer) { /// // Check the "mode" to see if it's something we know how to handle... /// // and/or do something with an XRayBuffer instance. /// }; /// auto process_status = __xray_log_process_buffers(MyBufferProcessor); /// if (process_status != XRayLogFlushStatus::XRAY_LOG_FLUSHED) { /// // deal with the error here, if it is an error. /// } /// /// NOTE: Before calling __xray_patch() again, consider re-initializing the /// implementation first. Some implementations might stay in an "off" state when /// they are finalized, while some might be in an invalid/unknown state. /// #ifndef XRAY_XRAY_LOG_INTERFACE_H #define XRAY_XRAY_LOG_INTERFACE_H #include "xray/xray_interface.h" #include extern "C" { /// This enum defines the valid states in which the logging implementation can /// be at. enum XRayLogInitStatus { /// The default state is uninitialized, and in case there were errors in the /// initialization, the implementation MUST return XRAY_LOG_UNINITIALIZED. XRAY_LOG_UNINITIALIZED = 0, /// Some implementations support multi-stage init (or asynchronous init), and /// may return XRAY_LOG_INITIALIZING to signal callers of the API that /// there's an ongoing initialization routine running. This allows /// implementations to support concurrent threads attempting to initialize, /// while only signalling success in one. XRAY_LOG_INITIALIZING = 1, /// When an implementation is done initializing, it MUST return /// XRAY_LOG_INITIALIZED. When users call `__xray_patch()`, they are /// guaranteed that the implementation installed with /// `__xray_set_log_impl(...)` has been initialized. XRAY_LOG_INITIALIZED = 2, /// Some implementations might support multi-stage finalization (or /// asynchronous finalization), and may return XRAY_LOG_FINALIZING to signal /// callers of the API that there's an ongoing finalization routine running. /// This allows implementations to support concurrent threads attempting to /// finalize, while only signalling success/completion in one. XRAY_LOG_FINALIZING = 3, /// When an implementation is done finalizing, it MUST return /// XRAY_LOG_FINALIZED. It is up to the implementation to determine what the /// semantics of a finalized implementation is. Some implementations might /// allow re-initialization once the log is finalized, while some might always /// be on (and that finalization is a no-op). XRAY_LOG_FINALIZED = 4, }; /// This enum allows an implementation to signal log flushing operations via /// `__xray_log_flushLog()`, and the state of flushing the log. enum XRayLogFlushStatus { XRAY_LOG_NOT_FLUSHING = 0, XRAY_LOG_FLUSHING = 1, XRAY_LOG_FLUSHED = 2, }; /// This enum indicates the installation state of a logging implementation, when /// associating a mode to a particular logging implementation through /// `__xray_log_register_impl(...)` or through `__xray_log_select_mode(...`. enum XRayLogRegisterStatus { XRAY_REGISTRATION_OK = 0, XRAY_DUPLICATE_MODE = 1, XRAY_MODE_NOT_FOUND = 2, XRAY_INCOMPLETE_IMPL = 3, }; /// A valid XRay logging implementation MUST provide all of the function /// pointers in XRayLogImpl when being installed through `__xray_set_log_impl`. /// To be precise, ALL the functions pointers MUST NOT be nullptr. struct XRayLogImpl { /// The log initialization routine provided by the implementation, always /// provided with the following parameters: /// /// - buffer size (unused) /// - maximum number of buffers (unused) /// - a pointer to an argument struct that the implementation MUST handle /// - the size of the argument struct /// /// See XRayLogInitStatus for details on what the implementation MUST return /// when called. /// /// If the implementation needs to install handlers aside from the 0-argument /// function call handler, it MUST do so in this initialization handler. /// /// See xray_interface.h for available handler installation routines. XRayLogInitStatus (*log_init)(size_t, size_t, void *, size_t); /// The log finalization routine provided by the implementation. /// /// See XRayLogInitStatus for details on what the implementation MUST return /// when called. XRayLogInitStatus (*log_finalize)(); /// The 0-argument function call handler. XRay logging implementations MUST /// always have a handler for function entry and exit events. In case the /// implementation wants to support arg1 (or other future extensions to XRay /// logging) those MUST be installed by the installed 'log_init' handler. /// /// Because we didn't want to change the ABI of this struct, the arg1 handler /// may be silently overwritten during initialization as well. void (*handle_arg0)(int32_t, XRayEntryType); /// The log implementation provided routine for when __xray_log_flushLog() is /// called. /// /// See XRayLogFlushStatus for details on what the implementation MUST return /// when called. XRayLogFlushStatus (*flush_log)(); }; /// DEPRECATED: Use the mode registration workflow instead with /// __xray_log_register_mode(...) and __xray_log_select_mode(...). See the /// documentation for those function. /// /// This function installs a new logging implementation that XRay will use. In /// case there are any nullptr members in Impl, XRay will *uninstall any /// existing implementations*. It does NOT patch the instrumentation points. /// /// NOTE: This function does NOT attempt to finalize the currently installed /// implementation. Use with caution. /// /// It is guaranteed safe to call this function in the following states: /// /// - When the implementation is UNINITIALIZED. /// - When the implementation is FINALIZED. /// - When there is no current implementation installed. /// /// It is logging implementation defined what happens when this function is /// called while in any other states. void __xray_set_log_impl(XRayLogImpl Impl); /// This function registers a logging implementation against a "mode" /// identifier. This allows multiple modes to be registered, and chosen at /// runtime using the same mode identifier through /// `__xray_log_select_mode(...)`. /// /// We treat the Mode identifier as a null-terminated byte string, as the /// identifier used when retrieving the log impl. /// /// Returns: /// - XRAY_REGISTRATION_OK on success. /// - XRAY_DUPLICATE_MODE when an implementation is already associated with /// the provided Mode; does not update the already-registered /// implementation. XRayLogRegisterStatus __xray_log_register_mode(const char *Mode, XRayLogImpl Impl); /// This function selects the implementation associated with Mode that has been /// registered through __xray_log_register_mode(...) and installs that /// implementation (as if through calling __xray_set_log_impl(...)). The same /// caveats apply to __xray_log_select_mode(...) as with /// __xray_log_set_log_impl(...). /// /// Returns: /// - XRAY_REGISTRATION_OK on success. /// - XRAY_MODE_NOT_FOUND if there is no implementation associated with Mode; /// does not update the currently installed implementation. XRayLogRegisterStatus __xray_log_select_mode(const char *Mode); /// Returns an identifier for the currently selected XRay mode chosen through /// the __xray_log_select_mode(...) function call. Returns nullptr if there is /// no currently installed mode. const char *__xray_log_get_current_mode(); /// This function removes the currently installed implementation. It will also /// uninstall any handlers that have been previously installed. It does NOT /// unpatch the instrumentation points. /// /// NOTE: This function does NOT attempt to finalize the currently installed /// implementation. Use with caution. /// /// It is guaranteed safe to call this function in the following states: /// /// - When the implementation is UNINITIALIZED. /// - When the implementation is FINALIZED. /// - When there is no current implementation installed. /// /// It is logging implementation defined what happens when this function is /// called while in any other states. void __xray_remove_log_impl(); /// DEPRECATED: Use __xray_log_init_mode() instead, and provide all the options /// in string form. /// Invokes the installed implementation initialization routine. See /// XRayLogInitStatus for what the return values mean. XRayLogInitStatus __xray_log_init(size_t BufferSize, size_t MaxBuffers, void *Args, size_t ArgsSize); /// Invokes the installed initialization routine, which *must* support the /// string based form. /// /// NOTE: When this API is used, we still invoke the installed initialization /// routine, but we will call it with the following convention to signal that we /// are using the string form: /// /// - BufferSize = 0 /// - MaxBuffers = 0 /// - ArgsSize = 0 /// - Args will be the pointer to the character buffer representing the /// configuration. /// /// FIXME: Updating the XRayLogImpl struct is an ABI breaking change. When we /// are ready to make a breaking change, we should clean this up appropriately. XRayLogInitStatus __xray_log_init_mode(const char *Mode, const char *Config); /// Like __xray_log_init_mode(...) this version allows for providing /// configurations that might have non-null-terminated strings. This will /// operate similarly to __xray_log_init_mode, with the exception that /// |ArgsSize| will be what |ConfigSize| is. XRayLogInitStatus __xray_log_init_mode_bin(const char *Mode, const char *Config, size_t ConfigSize); /// Invokes the installed implementation finalization routine. See /// XRayLogInitStatus for what the return values mean. XRayLogInitStatus __xray_log_finalize(); /// Invokes the install implementation log flushing routine. See /// XRayLogFlushStatus for what the return values mean. XRayLogFlushStatus __xray_log_flushLog(); /// An XRayBuffer represents a section of memory which can be treated by log /// processing functions as bytes stored in the logging implementation's /// buffers. struct XRayBuffer { const void *Data; size_t Size; }; /// Registers an iterator function which takes an XRayBuffer argument, then /// returns another XRayBuffer function representing the next buffer. When the /// Iterator function returns an empty XRayBuffer (Data = nullptr, Size = 0), /// this signifies the end of the buffers. /// /// The first invocation of this Iterator function will always take an empty /// XRayBuffer (Data = nullptr, Size = 0). void __xray_log_set_buffer_iterator(XRayBuffer (*Iterator)(XRayBuffer)); /// Removes the currently registered buffer iterator function. void __xray_log_remove_buffer_iterator(); /// Invokes the provided handler to process data maintained by the logging /// handler. This API will be provided raw access to the data available in /// memory from the logging implementation. The callback function must: /// /// 1) Not modify the data, to avoid running into undefined behaviour. /// /// 2) Either know the data layout, or treat the data as raw bytes for later /// interpretation. /// /// This API is best used in place of the `__xray_log_flushLog()` implementation /// above to enable the caller to provide an alternative means of extracting the /// data from the XRay implementation. /// /// Implementations MUST then provide: /// /// 1) A function that will return an XRayBuffer. Functions that return an /// "empty" XRayBuffer signifies that there are no more buffers to be /// processed. This function should be registered through the /// `__xray_log_set_buffer_iterator(...)` function. /// /// 2) Its own means of converting data it holds in memory into an XRayBuffer /// structure. /// /// See XRayLogFlushStatus for what the return values mean. /// XRayLogFlushStatus __xray_log_process_buffers(void (*Processor)(const char *, XRayBuffer)); } // extern "C" #endif // XRAY_XRAY_LOG_INTERFACE_H Missing a comma or ']' after an array element.policy not one of: clean-only, prefer-relative, prefer-realbytemap range [:ascii:]should never happenemptywidth %#x -> %d|Bad arg in kInstAltMatch: invalid repetition size?P<\CCommonLuMroSignWritingTifinaghYiZDuplicate addr 0x%lx: %s <-> 0x%lx: %s%s: no interesting LOAD segmentsltostream<>PERMISSION_DENIEDUnknown escape sequence: \\xexternal/abseil-cpp/absl/strings/numbers.ccCordRepBtree::CheckValid() FAILED: %s != %s (%s vs %s)UTC0revision.txtpublic key routinesOBJexternal/boringssl/src/crypto/stack/stack.cexternal/boringssl/src/crypto/bn_extra/convert.ckythe.proto.AnalysisRequest.file_data_servicekythe.proto.BuildDetails.build_targetkythe.proto.BuildDetails.build_configkythe.proto.WriteRequest.Update.fact_name%s (regex was %s) out_of_range was thrown in -fno-exceptions mode with message "%s"OpenIndex() called twiceno_builtinfreestanding for transcript __clang_cuda_cmath.h/*===---- __clang_hip_stdlib.h - Device-side HIP math support --------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __CLANG_HIP_STDLIB_H__ #if !defined(__HIP__) && !defined(__OPENMP_AMDGCN__) #error "This file is for HIP and OpenMP AMDGCN device compilation only." #endif #if !defined(__cplusplus) #include #ifdef __OPENMP_AMDGCN__ #define __DEVICE__ static inline __attribute__((always_inline, nothrow)) #else #define __DEVICE__ static __device__ inline __attribute__((always_inline)) #endif __DEVICE__ int abs(int __x) { int __sgn = __x >> (sizeof(int) * CHAR_BIT - 1); return (__x ^ __sgn) - __sgn; } __DEVICE__ long labs(long __x) { long __sgn = __x >> (sizeof(long) * CHAR_BIT - 1); return (__x ^ __sgn) - __sgn; } __DEVICE__ long long llabs(long long __x) { long long __sgn = __x >> (sizeof(long long) * CHAR_BIT - 1); return (__x ^ __sgn) - __sgn; } #endif // !defined(__cplusplus) #endif // #define __CLANG_HIP_STDLIB_H__ __stddef_wint_t.h/*===---- arm_acle.h - ARM Non-Neon intrinsics -----------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * * The Arm C Language Extensions specifications can be found in the following * link: https://github.com/ARM-software/acle/releases * * The ACLE section numbers are subject to change. When consulting the * specifications, it is recommended to search using section titles if * the section numbers look outdated. * *===-----------------------------------------------------------------------=== */ #ifndef __ARM_ACLE_H #define __ARM_ACLE_H #ifndef __ARM_ACLE #error "ACLE intrinsics support not enabled." #endif #include #if defined(__cplusplus) extern "C" { #endif /* 7 SYNCHRONIZATION, BARRIER AND HINT INTRINSICS */ /* 7.3 Memory barriers */ #if !__has_builtin(__dmb) #define __dmb(i) __builtin_arm_dmb(i) #endif #if !__has_builtin(__dsb) #define __dsb(i) __builtin_arm_dsb(i) #endif #if !__has_builtin(__isb) #define __isb(i) __builtin_arm_isb(i) #endif /* 7.4 Hints */ #if !__has_builtin(__wfi) static __inline__ void __attribute__((__always_inline__, __nodebug__)) __wfi(void) { __builtin_arm_wfi(); } #endif #if !__has_builtin(__wfe) static __inline__ void __attribute__((__always_inline__, __nodebug__)) __wfe(void) { __builtin_arm_wfe(); } #endif #if !__has_builtin(__sev) static __inline__ void __attribute__((__always_inline__, __nodebug__)) __sev(void) { __builtin_arm_sev(); } #endif #if !__has_builtin(__sevl) static __inline__ void __attribute__((__always_inline__, __nodebug__)) __sevl(void) { __builtin_arm_sevl(); } #endif #if !__has_builtin(__yield) static __inline__ void __attribute__((__always_inline__, __nodebug__)) __yield(void) { __builtin_arm_yield(); } #endif #if defined(__ARM_32BIT_STATE) && __ARM_32BIT_STATE #define __dbg(t) __builtin_arm_dbg(t) #endif /* 7.5 Swap */ static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) __swp(uint32_t __x, volatile uint32_t *__p) { uint32_t v; do v = __builtin_arm_ldrex(__p); while (__builtin_arm_strex(__x, __p)); return v; } /* 7.6 Memory prefetch intrinsics */ /* 7.6.1 Data prefetch */ #define __pld(addr) __pldx(0, 0, 0, addr) #if defined(__ARM_32BIT_STATE) && __ARM_32BIT_STATE #define __pldx(access_kind, cache_level, retention_policy, addr) \ __builtin_arm_prefetch(addr, access_kind, 1) #else #define __pldx(access_kind, cache_level, retention_policy, addr) \ __builtin_arm_prefetch(addr, access_kind, cache_level, retention_policy, 1) #endif /* 7.6.2 Instruction prefetch */ #define __pli(addr) __plix(0, 0, addr) #if defined(__ARM_32BIT_STATE) && __ARM_32BIT_STATE #define __plix(cache_level, retention_policy, addr) \ __builtin_arm_prefetch(addr, 0, 0) #else #define __plix(cache_level, retention_policy, addr) \ __builtin_arm_prefetch(addr, 0, cache_level, retention_policy, 0) #endif /* 7.7 NOP */ #if !defined(_MSC_VER) || !defined(__aarch64__) static __inline__ void __attribute__((__always_inline__, __nodebug__)) __nop(void) { __builtin_arm_nop(); } #endif /* 8 DATA-PROCESSING INTRINSICS */ /* 8.2 Miscellaneous data-processing intrinsics */ /* ROR */ static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) __ror(uint32_t __x, uint32_t __y) { __y %= 32; if (__y == 0) return __x; return (__x >> __y) | (__x << (32 - __y)); } static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__)) __rorll(uint64_t __x, uint32_t __y) { __y %= 64; if (__y == 0) return __x; return (__x >> __y) | (__x << (64 - __y)); } static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__)) __rorl(unsigned long __x, uint32_t __y) { #if __SIZEOF_LONG__ == 4 return __ror(__x, __y); #else return __rorll(__x, __y); #endif } /* CLZ */ static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) __clz(uint32_t __t) { return __builtin_arm_clz(__t); } static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) __clzl(unsigned long __t) { #if __SIZEOF_LONG__ == 4 return __builtin_arm_clz(__t); #else return __builtin_arm_clz64(__t); #endif } static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) __clzll(uint64_t __t) { return __builtin_arm_clz64(__t); } /* CLS */ static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) __cls(uint32_t __t) { return __builtin_arm_cls(__t); } static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) __clsl(unsigned long __t) { #if __SIZEOF_LONG__ == 4 return __builtin_arm_cls(__t); #else return __builtin_arm_cls64(__t); #endif } static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) __clsll(uint64_t __t) { return __builtin_arm_cls64(__t); } /* REV */ static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) __rev(uint32_t __t) { return __builtin_bswap32(__t); } static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__)) __revl(unsigned long __t) { #if __SIZEOF_LONG__ == 4 return __builtin_bswap32(__t); #else return __builtin_bswap64(__t); #endif } static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__)) __revll(uint64_t __t) { return __builtin_bswap64(__t); } /* REV16 */ static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) __rev16(uint32_t __t) { return __ror(__rev(__t), 16); } static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__)) __rev16ll(uint64_t __t) { return (((uint64_t)__rev16(__t >> 32)) << 32) | (uint64_t)__rev16((uint32_t)__t); } static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__)) __rev16l(unsigned long __t) { #if __SIZEOF_LONG__ == 4 return __rev16(__t); #else return __rev16ll(__t); #endif } /* REVSH */ static __inline__ int16_t __attribute__((__always_inline__, __nodebug__)) __revsh(int16_t __t) { return (int16_t)__builtin_bswap16((uint16_t)__t); } /* RBIT */ static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) __rbit(uint32_t __t) { return __builtin_arm_rbit(__t); } static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__)) __rbitll(uint64_t __t) { #if defined(__ARM_32BIT_STATE) && __ARM_32BIT_STATE return (((uint64_t)__builtin_arm_rbit(__t)) << 32) | __builtin_arm_rbit(__t >> 32); #else return __builtin_arm_rbit64(__t); #endif } static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__)) __rbitl(unsigned long __t) { #if __SIZEOF_LONG__ == 4 return __rbit(__t); #else return __rbitll(__t); #endif } /* 8.3 16-bit multiplications */ #if defined(__ARM_FEATURE_DSP) && __ARM_FEATURE_DSP static __inline__ int32_t __attribute__((__always_inline__,__nodebug__)) __smulbb(int32_t __a, int32_t __b) { return __builtin_arm_smulbb(__a, __b); } static __inline__ int32_t __attribute__((__always_inline__,__nodebug__)) __smulbt(int32_t __a, int32_t __b) { return __builtin_arm_smulbt(__a, __b); } static __inline__ int32_t __attribute__((__always_inline__,__nodebug__)) __smultb(int32_t __a, int32_t __b) { return __builtin_arm_smultb(__a, __b); } static __inline__ int32_t __attribute__((__always_inline__,__nodebug__)) __smultt(int32_t __a, int32_t __b) { return __builtin_arm_smultt(__a, __b); } static __inline__ int32_t __attribute__((__always_inline__,__nodebug__)) __smulwb(int32_t __a, int32_t __b) { return __builtin_arm_smulwb(__a, __b); } static __inline__ int32_t __attribute__((__always_inline__,__nodebug__)) __smulwt(int32_t __a, int32_t __b) { return __builtin_arm_smulwt(__a, __b); } #endif /* * 8.4 Saturating intrinsics * * FIXME: Change guard to their corresponding __ARM_FEATURE flag when Q flag * intrinsics are implemented and the flag is enabled. */ /* 8.4.1 Width-specified saturation intrinsics */ #if defined(__ARM_FEATURE_SAT) && __ARM_FEATURE_SAT #define __ssat(x, y) __builtin_arm_ssat(x, y) #define __usat(x, y) __builtin_arm_usat(x, y) #endif /* 8.4.2 Saturating addition and subtraction intrinsics */ #if defined(__ARM_FEATURE_DSP) && __ARM_FEATURE_DSP static __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) __qadd(int32_t __t, int32_t __v) { return __builtin_arm_qadd(__t, __v); } static __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) __qsub(int32_t __t, int32_t __v) { return __builtin_arm_qsub(__t, __v); } static __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) __qdbl(int32_t __t) { return __builtin_arm_qadd(__t, __t); } #endif /* 8.4.3 Accumulating multiplications */ #if defined(__ARM_FEATURE_DSP) && __ARM_FEATURE_DSP static __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) __smlabb(int32_t __a, int32_t __b, int32_t __c) { return __builtin_arm_smlabb(__a, __b, __c); } static __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) __smlabt(int32_t __a, int32_t __b, int32_t __c) { return __builtin_arm_smlabt(__a, __b, __c); } static __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) __smlatb(int32_t __a, int32_t __b, int32_t __c) { return __builtin_arm_smlatb(__a, __b, __c); } static __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) __smlatt(int32_t __a, int32_t __b, int32_t __c) { return __builtin_arm_smlatt(__a, __b, __c); } static __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) __smlawb(int32_t __a, int32_t __b, int32_t __c) { return __builtin_arm_smlawb(__a, __b, __c); } static __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) __smlawt(int32_t __a, int32_t __b, int32_t __c) { return __builtin_arm_smlawt(__a, __b, __c); } #endif /* 8.5.4 Parallel 16-bit saturation */ #if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32 #define __ssat16(x, y) __builtin_arm_ssat16(x, y) #define __usat16(x, y) __builtin_arm_usat16(x, y) #endif /* 8.5.5 Packing and unpacking */ #if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32 typedef int32_t int8x4_t; typedef int32_t int16x2_t; typedef uint32_t uint8x4_t; typedef uint32_t uint16x2_t; static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__)) __sxtab16(int16x2_t __a, int8x4_t __b) { return __builtin_arm_sxtab16(__a, __b); } static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__)) __sxtb16(int8x4_t __a) { return __builtin_arm_sxtb16(__a); } static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__)) __uxtab16(int16x2_t __a, int8x4_t __b) { return __builtin_arm_uxtab16(__a, __b); } static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__)) __uxtb16(int8x4_t __a) { return __builtin_arm_uxtb16(__a); } #endif /* 8.5.6 Parallel selection */ #if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32 static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__)) __sel(uint8x4_t __a, uint8x4_t __b) { return __builtin_arm_sel(__a, __b); } #endif /* 8.5.7 Parallel 8-bit addition and subtraction */ #if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32 static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__)) __qadd8(int8x4_t __a, int8x4_t __b) { return __builtin_arm_qadd8(__a, __b); } static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__)) __qsub8(int8x4_t __a, int8x4_t __b) { return __builtin_arm_qsub8(__a, __b); } static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__)) __sadd8(int8x4_t __a, int8x4_t __b) { return __builtin_arm_sadd8(__a, __b); } static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__)) __shadd8(int8x4_t __a, int8x4_t __b) { return __builtin_arm_shadd8(__a, __b); } static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__)) __shsub8(int8x4_t __a, int8x4_t __b) { return __builtin_arm_shsub8(__a, __b); } static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__)) __ssub8(int8x4_t __a, int8x4_t __b) { return __builtin_arm_ssub8(__a, __b); } static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__)) __uadd8(uint8x4_t __a, uint8x4_t __b) { return __builtin_arm_uadd8(__a, __b); } static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__)) __uhadd8(uint8x4_t __a, uint8x4_t __b) { return __builtin_arm_uhadd8(__a, __b); } static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__)) __uhsub8(uint8x4_t __a, uint8x4_t __b) { return __builtin_arm_uhsub8(__a, __b); } static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__)) __uqadd8(uint8x4_t __a, uint8x4_t __b) { return __builtin_arm_uqadd8(__a, __b); } static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__)) __uqsub8(uint8x4_t __a, uint8x4_t __b) { return __builtin_arm_uqsub8(__a, __b); } static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__)) __usub8(uint8x4_t __a, uint8x4_t __b) { return __builtin_arm_usub8(__a, __b); } #endif /* 8.5.8 Sum of 8-bit absolute differences */ #if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) __usad8(uint8x4_t __a, uint8x4_t __b) { return __builtin_arm_usad8(__a, __b); } static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) __usada8(uint8x4_t __a, uint8x4_t __b, uint32_t __c) { return __builtin_arm_usada8(__a, __b, __c); } #endif /* 8.5.9 Parallel 16-bit addition and subtraction */ #if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32 static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__)) __qadd16(int16x2_t __a, int16x2_t __b) { return __builtin_arm_qadd16(__a, __b); } static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__)) __qasx(int16x2_t __a, int16x2_t __b) { return __builtin_arm_qasx(__a, __b); } static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__)) __qsax(int16x2_t __a, int16x2_t __b) { return __builtin_arm_qsax(__a, __b); } static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__)) __qsub16(int16x2_t __a, int16x2_t __b) { return __builtin_arm_qsub16(__a, __b); } static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__)) __sadd16(int16x2_t __a, int16x2_t __b) { return __builtin_arm_sadd16(__a, __b); } static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__)) __sasx(int16x2_t __a, int16x2_t __b) { return __builtin_arm_sasx(__a, __b); } static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__)) __shadd16(int16x2_t __a, int16x2_t __b) { return __builtin_arm_shadd16(__a, __b); } static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__)) __shasx(int16x2_t __a, int16x2_t __b) { return __builtin_arm_shasx(__a, __b); } static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__)) __shsax(int16x2_t __a, int16x2_t __b) { return __builtin_arm_shsax(__a, __b); } static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__)) __shsub16(int16x2_t __a, int16x2_t __b) { return __builtin_arm_shsub16(__a, __b); } static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__)) __ssax(int16x2_t __a, int16x2_t __b) { return __builtin_arm_ssax(__a, __b); } static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__)) __ssub16(int16x2_t __a, int16x2_t __b) { return __builtin_arm_ssub16(__a, __b); } static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__)) __uadd16(uint16x2_t __a, uint16x2_t __b) { return __builtin_arm_uadd16(__a, __b); } static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__)) __uasx(uint16x2_t __a, uint16x2_t __b) { return __builtin_arm_uasx(__a, __b); } static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__)) __uhadd16(uint16x2_t __a, uint16x2_t __b) { return __builtin_arm_uhadd16(__a, __b); } static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__)) __uhasx(uint16x2_t __a, uint16x2_t __b) { return __builtin_arm_uhasx(__a, __b); } static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__)) __uhsax(uint16x2_t __a, uint16x2_t __b) { return __builtin_arm_uhsax(__a, __b); } static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__)) __uhsub16(uint16x2_t __a, uint16x2_t __b) { return __builtin_arm_uhsub16(__a, __b); } static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__)) __uqadd16(uint16x2_t __a, uint16x2_t __b) { return __builtin_arm_uqadd16(__a, __b); } static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__)) __uqasx(uint16x2_t __a, uint16x2_t __b) { return __builtin_arm_uqasx(__a, __b); } static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__)) __uqsax(uint16x2_t __a, uint16x2_t __b) { return __builtin_arm_uqsax(__a, __b); } static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__)) __uqsub16(uint16x2_t __a, uint16x2_t __b) { return __builtin_arm_uqsub16(__a, __b); } static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__)) __usax(uint16x2_t __a, uint16x2_t __b) { return __builtin_arm_usax(__a, __b); } static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__)) __usub16(uint16x2_t __a, uint16x2_t __b) { return __builtin_arm_usub16(__a, __b); } #endif /* 8.5.10 Parallel 16-bit multiplication */ #if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) __smlad(int16x2_t __a, int16x2_t __b, int32_t __c) { return __builtin_arm_smlad(__a, __b, __c); } static __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) __smladx(int16x2_t __a, int16x2_t __b, int32_t __c) { return __builtin_arm_smladx(__a, __b, __c); } static __inline__ int64_t __attribute__((__always_inline__, __nodebug__)) __smlald(int16x2_t __a, int16x2_t __b, int64_t __c) { return __builtin_arm_smlald(__a, __b, __c); } static __inline__ int64_t __attribute__((__always_inline__, __nodebug__)) __smlaldx(int16x2_t __a, int16x2_t __b, int64_t __c) { return __builtin_arm_smlaldx(__a, __b, __c); } static __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) __smlsd(int16x2_t __a, int16x2_t __b, int32_t __c) { return __builtin_arm_smlsd(__a, __b, __c); } static __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) __smlsdx(int16x2_t __a, int16x2_t __b, int32_t __c) { return __builtin_arm_smlsdx(__a, __b, __c); } static __inline__ int64_t __attribute__((__always_inline__, __nodebug__)) __smlsld(int16x2_t __a, int16x2_t __b, int64_t __c) { return __builtin_arm_smlsld(__a, __b, __c); } static __inline__ int64_t __attribute__((__always_inline__, __nodebug__)) __smlsldx(int16x2_t __a, int16x2_t __b, int64_t __c) { return __builtin_arm_smlsldx(__a, __b, __c); } static __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) __smuad(int16x2_t __a, int16x2_t __b) { return __builtin_arm_smuad(__a, __b); } static __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) __smuadx(int16x2_t __a, int16x2_t __b) { return __builtin_arm_smuadx(__a, __b); } static __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) __smusd(int16x2_t __a, int16x2_t __b) { return __builtin_arm_smusd(__a, __b); } static __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) __smusdx(int16x2_t __a, int16x2_t __b) { return __builtin_arm_smusdx(__a, __b); } #endif /* 8.6 Floating-point data-processing intrinsics */ #if (defined(__ARM_FEATURE_DIRECTED_ROUNDING) && \ (__ARM_FEATURE_DIRECTED_ROUNDING)) && \ (defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE) static __inline__ double __attribute__((__always_inline__, __nodebug__)) __rintn(double __a) { return __builtin_roundeven(__a); } static __inline__ float __attribute__((__always_inline__, __nodebug__)) __rintnf(float __a) { return __builtin_roundevenf(__a); } #endif /* 8.8 CRC32 intrinsics */ #if (defined(__ARM_FEATURE_CRC32) && __ARM_FEATURE_CRC32) || \ (defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE) static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc"))) __crc32b(uint32_t __a, uint8_t __b) { return __builtin_arm_crc32b(__a, __b); } static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc"))) __crc32h(uint32_t __a, uint16_t __b) { return __builtin_arm_crc32h(__a, __b); } static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc"))) __crc32w(uint32_t __a, uint32_t __b) { return __builtin_arm_crc32w(__a, __b); } static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc"))) __crc32d(uint32_t __a, uint64_t __b) { return __builtin_arm_crc32d(__a, __b); } static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc"))) __crc32cb(uint32_t __a, uint8_t __b) { return __builtin_arm_crc32cb(__a, __b); } static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc"))) __crc32ch(uint32_t __a, uint16_t __b) { return __builtin_arm_crc32ch(__a, __b); } static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc"))) __crc32cw(uint32_t __a, uint32_t __b) { return __builtin_arm_crc32cw(__a, __b); } static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc"))) __crc32cd(uint32_t __a, uint64_t __b) { return __builtin_arm_crc32cd(__a, __b); } #endif /* 8.6 Floating-point data-processing intrinsics */ /* Armv8.3-A Javascript conversion intrinsic */ #if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE static __inline__ int32_t __attribute__((__always_inline__, __nodebug__, target("v8.3a"))) __jcvt(double __a) { return __builtin_arm_jcvt(__a); } #endif /* Armv8.5-A FP rounding intrinsics */ #if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE static __inline__ float __attribute__((__always_inline__, __nodebug__, target("v8.5a"))) __rint32zf(float __a) { return __builtin_arm_rint32zf(__a); } static __inline__ double __attribute__((__always_inline__, __nodebug__, target("v8.5a"))) __rint32z(double __a) { return __builtin_arm_rint32z(__a); } static __inline__ float __attribute__((__always_inline__, __nodebug__, target("v8.5a"))) __rint64zf(float __a) { return __builtin_arm_rint64zf(__a); } static __inline__ double __attribute__((__always_inline__, __nodebug__, target("v8.5a"))) __rint64z(double __a) { return __builtin_arm_rint64z(__a); } static __inline__ float __attribute__((__always_inline__, __nodebug__, target("v8.5a"))) __rint32xf(float __a) { return __builtin_arm_rint32xf(__a); } static __inline__ double __attribute__((__always_inline__, __nodebug__, target("v8.5a"))) __rint32x(double __a) { return __builtin_arm_rint32x(__a); } static __inline__ float __attribute__((__always_inline__, __nodebug__, target("v8.5a"))) __rint64xf(float __a) { return __builtin_arm_rint64xf(__a); } static __inline__ double __attribute__((__always_inline__, __nodebug__, target("v8.5a"))) __rint64x(double __a) { return __builtin_arm_rint64x(__a); } #endif /* 8.9 Armv8.7-A load/store 64-byte intrinsics */ #if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE typedef struct { uint64_t val[8]; } data512_t; static __inline__ data512_t __attribute__((__always_inline__, __nodebug__, target("ls64"))) __arm_ld64b(const void *__addr) { data512_t __value; __builtin_arm_ld64b(__addr, __value.val); return __value; } static __inline__ void __attribute__((__always_inline__, __nodebug__, target("ls64"))) __arm_st64b(void *__addr, data512_t __value) { __builtin_arm_st64b(__addr, __value.val); } static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__, target("ls64"))) __arm_st64bv(void *__addr, data512_t __value) { return __builtin_arm_st64bv(__addr, __value.val); } static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__, target("ls64"))) __arm_st64bv0(void *__addr, data512_t __value) { return __builtin_arm_st64bv0(__addr, __value.val); } #endif /* 11.1 Special register intrinsics */ #define __arm_rsr(sysreg) __builtin_arm_rsr(sysreg) #define __arm_rsr64(sysreg) __builtin_arm_rsr64(sysreg) #define __arm_rsr128(sysreg) __builtin_arm_rsr128(sysreg) #define __arm_rsrp(sysreg) __builtin_arm_rsrp(sysreg) #define __arm_rsrf(sysreg) __builtin_bit_cast(float, __arm_rsr(sysreg)) #define __arm_rsrf64(sysreg) __builtin_bit_cast(double, __arm_rsr64(sysreg)) #define __arm_wsr(sysreg, v) __builtin_arm_wsr(sysreg, v) #define __arm_wsr64(sysreg, v) __builtin_arm_wsr64(sysreg, v) #define __arm_wsr128(sysreg, v) __builtin_arm_wsr128(sysreg, v) #define __arm_wsrp(sysreg, v) __builtin_arm_wsrp(sysreg, v) #define __arm_wsrf(sysreg, v) __arm_wsr(sysreg, __builtin_bit_cast(uint32_t, v)) #define __arm_wsrf64(sysreg, v) __arm_wsr64(sysreg, __builtin_bit_cast(uint64_t, v)) /* 10.3 MTE intrinsics */ #if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE #define __arm_mte_create_random_tag(__ptr, __mask) __builtin_arm_irg(__ptr, __mask) #define __arm_mte_increment_tag(__ptr, __tag_offset) __builtin_arm_addg(__ptr, __tag_offset) #define __arm_mte_exclude_tag(__ptr, __excluded) __builtin_arm_gmi(__ptr, __excluded) #define __arm_mte_get_tag(__ptr) __builtin_arm_ldg(__ptr) #define __arm_mte_set_tag(__ptr) __builtin_arm_stg(__ptr) #define __arm_mte_ptrdiff(__ptra, __ptrb) __builtin_arm_subp(__ptra, __ptrb) /* 18 memcpy family of operations intrinsics - MOPS */ #define __arm_mops_memset_tag(__tagged_address, __value, __size) \ __builtin_arm_mops_memset_tag(__tagged_address, __value, __size) #endif /* 11.3 Coprocessor Intrinsics */ #if defined(__ARM_FEATURE_COPROC) #if (__ARM_FEATURE_COPROC & 0x1) #if (__ARM_ARCH < 8) #define __arm_cdp(coproc, opc1, CRd, CRn, CRm, opc2) \ __builtin_arm_cdp(coproc, opc1, CRd, CRn, CRm, opc2) #endif /* __ARM_ARCH < 8 */ #define __arm_ldc(coproc, CRd, p) __builtin_arm_ldc(coproc, CRd, p) #define __arm_stc(coproc, CRd, p) __builtin_arm_stc(coproc, CRd, p) #define __arm_mcr(coproc, opc1, value, CRn, CRm, opc2) \ __builtin_arm_mcr(coproc, opc1, value, CRn, CRm, opc2) #define __arm_mrc(coproc, opc1, CRn, CRm, opc2) \ __builtin_arm_mrc(coproc, opc1, CRn, CRm, opc2) #if (__ARM_ARCH != 4) && (__ARM_ARCH < 8) #define __arm_ldcl(coproc, CRd, p) __builtin_arm_ldcl(coproc, CRd, p) #define __arm_stcl(coproc, CRd, p) __builtin_arm_stcl(coproc, CRd, p) #endif /* (__ARM_ARCH != 4) && (__ARM_ARCH != 8) */ #if (__ARM_ARCH_8M_MAIN__) || (__ARM_ARCH_8_1M_MAIN__) #define __arm_cdp(coproc, opc1, CRd, CRn, CRm, opc2) \ __builtin_arm_cdp(coproc, opc1, CRd, CRn, CRm, opc2) #define __arm_ldcl(coproc, CRd, p) __builtin_arm_ldcl(coproc, CRd, p) #define __arm_stcl(coproc, CRd, p) __builtin_arm_stcl(coproc, CRd, p) #endif /* ___ARM_ARCH_8M_MAIN__ */ #endif /* __ARM_FEATURE_COPROC & 0x1 */ #if (__ARM_FEATURE_COPROC & 0x2) #define __arm_cdp2(coproc, opc1, CRd, CRn, CRm, opc2) \ __builtin_arm_cdp2(coproc, opc1, CRd, CRn, CRm, opc2) #define __arm_ldc2(coproc, CRd, p) __builtin_arm_ldc2(coproc, CRd, p) #define __arm_stc2(coproc, CRd, p) __builtin_arm_stc2(coproc, CRd, p) #define __arm_ldc2l(coproc, CRd, p) __builtin_arm_ldc2l(coproc, CRd, p) #define __arm_stc2l(coproc, CRd, p) __builtin_arm_stc2l(coproc, CRd, p) #define __arm_mcr2(coproc, opc1, value, CRn, CRm, opc2) \ __builtin_arm_mcr2(coproc, opc1, value, CRn, CRm, opc2) #define __arm_mrc2(coproc, opc1, CRn, CRm, opc2) \ __builtin_arm_mrc2(coproc, opc1, CRn, CRm, opc2) #endif #if (__ARM_FEATURE_COPROC & 0x4) #define __arm_mcrr(coproc, opc1, value, CRm) \ __builtin_arm_mcrr(coproc, opc1, value, CRm) #define __arm_mrrc(coproc, opc1, CRm) __builtin_arm_mrrc(coproc, opc1, CRm) #endif #if (__ARM_FEATURE_COPROC & 0x8) #define __arm_mcrr2(coproc, opc1, value, CRm) \ __builtin_arm_mcrr2(coproc, opc1, value, CRm) #define __arm_mrrc2(coproc, opc1, CRm) __builtin_arm_mrrc2(coproc, opc1, CRm) #endif #endif // __ARM_FEATURE_COPROC /* 17 Transactional Memory Extension (TME) Intrinsics */ #if defined(__ARM_FEATURE_TME) && __ARM_FEATURE_TME #define _TMFAILURE_REASON 0x00007fffu #define _TMFAILURE_RTRY 0x00008000u #define _TMFAILURE_CNCL 0x00010000u #define _TMFAILURE_MEM 0x00020000u #define _TMFAILURE_IMP 0x00040000u #define _TMFAILURE_ERR 0x00080000u #define _TMFAILURE_SIZE 0x00100000u #define _TMFAILURE_NEST 0x00200000u #define _TMFAILURE_DBG 0x00400000u #define _TMFAILURE_INT 0x00800000u #define _TMFAILURE_TRIVIAL 0x01000000u #define __tstart() __builtin_arm_tstart() #define __tcommit() __builtin_arm_tcommit() #define __tcancel(__arg) __builtin_arm_tcancel(__arg) #define __ttest() __builtin_arm_ttest() #endif /* __ARM_FEATURE_TME */ /* 8.7 Armv8.5-A Random number generation intrinsics */ #if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE static __inline__ int __attribute__((__always_inline__, __nodebug__, target("rand"))) __rndr(uint64_t *__p) { return __builtin_arm_rndr(__p); } static __inline__ int __attribute__((__always_inline__, __nodebug__, target("rand"))) __rndrrs(uint64_t *__p) { return __builtin_arm_rndrrs(__p); } #endif #if defined(__cplusplus) } #endif #endif /* __ARM_ACLE_H */ //===---- arm_cmse.h - Arm CMSE support -----------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef __ARM_CMSE_H #define __ARM_CMSE_H #if (__ARM_FEATURE_CMSE & 0x1) #include #include #define __ARM_CMSE_SECURE_MODE (__ARM_FEATURE_CMSE & 0x2) #define CMSE_MPU_READWRITE 1 /* checks if readwrite_ok field is set */ #define CMSE_AU_NONSECURE 2 /* checks if permissions have secure field unset */ #define CMSE_MPU_UNPRIV 4 /* sets T flag on TT insrtuction */ #define CMSE_MPU_READ 8 /* checks if read_ok field is set */ #define CMSE_MPU_NONSECURE 16 /* sets A flag, checks if secure field unset */ #define CMSE_NONSECURE (CMSE_AU_NONSECURE | CMSE_MPU_NONSECURE) #define cmse_check_pointed_object(p, f) \ cmse_check_address_range((p), sizeof(*(p)), (f)) #if defined(__cplusplus) extern "C" { #endif typedef union { struct cmse_address_info { #ifdef __ARM_BIG_ENDIAN /* __ARM_BIG_ENDIAN */ #if (__ARM_CMSE_SECURE_MODE) unsigned idau_region : 8; unsigned idau_region_valid : 1; unsigned secure : 1; unsigned nonsecure_readwrite_ok : 1; unsigned nonsecure_read_ok : 1; #else unsigned : 12; #endif unsigned readwrite_ok : 1; unsigned read_ok : 1; #if (__ARM_CMSE_SECURE_MODE) unsigned sau_region_valid : 1; #else unsigned : 1; #endif unsigned mpu_region_valid : 1; #if (__ARM_CMSE_SECURE_MODE) unsigned sau_region : 8; #else unsigned : 8; #endif unsigned mpu_region : 8; #else /* __ARM_LITTLE_ENDIAN */ unsigned mpu_region : 8; #if (__ARM_CMSE_SECURE_MODE) unsigned sau_region : 8; #else unsigned : 8; #endif unsigned mpu_region_valid : 1; #if (__ARM_CMSE_SECURE_MODE) unsigned sau_region_valid : 1; #else unsigned : 1; #endif unsigned read_ok : 1; unsigned readwrite_ok : 1; #if (__ARM_CMSE_SECURE_MODE) unsigned nonsecure_read_ok : 1; unsigned nonsecure_readwrite_ok : 1; unsigned secure : 1; unsigned idau_region_valid : 1; unsigned idau_region : 8; #else unsigned : 12; #endif #endif /*__ARM_LITTLE_ENDIAN */ } flags; unsigned value; } cmse_address_info_t; static cmse_address_info_t __attribute__((__always_inline__, __nodebug__)) cmse_TT(void *__p) { cmse_address_info_t __u; __u.value = __builtin_arm_cmse_TT(__p); return __u; } static cmse_address_info_t __attribute__((__always_inline__, __nodebug__)) cmse_TTT(void *__p) { cmse_address_info_t __u; __u.value = __builtin_arm_cmse_TTT(__p); return __u; } #if __ARM_CMSE_SECURE_MODE static cmse_address_info_t __attribute__((__always_inline__, __nodebug__)) cmse_TTA(void *__p) { cmse_address_info_t __u; __u.value = __builtin_arm_cmse_TTA(__p); return __u; } static cmse_address_info_t __attribute__((__always_inline__, __nodebug__)) cmse_TTAT(void *__p) { cmse_address_info_t __u; __u.value = __builtin_arm_cmse_TTAT(__p); return __u; } #endif #define cmse_TT_fptr(p) cmse_TT(__builtin_bit_cast(void *, (p))) #define cmse_TTT_fptr(p) cmse_TTT(__builtin_bit_cast(void *, (p))) #if __ARM_CMSE_SECURE_MODE #define cmse_TTA_fptr(p) cmse_TTA(__builtin_bit_cast(void *, (p))) #define cmse_TTAT_fptr(p) cmse_TTAT(__builtin_bit_cast(void *, (p))) #endif static void *__attribute__((__always_inline__)) cmse_check_address_range(void *__pb, size_t __s, int __flags) { uintptr_t __begin = (uintptr_t)__pb; uintptr_t __end = __begin + __s - 1; if (__end < __begin) return NULL; /* wrap around check */ /* Check whether the range crosses a 32-bytes aligned address */ const int __single_check = (__begin ^ __end) < 0x20u; /* execute the right variant of the TT instructions */ void *__pe = (void *)__end; cmse_address_info_t __permb, __perme; switch (__flags & (CMSE_MPU_UNPRIV | CMSE_MPU_NONSECURE)) { case 0: __permb = cmse_TT(__pb); __perme = __single_check ? __permb : cmse_TT(__pe); break; case CMSE_MPU_UNPRIV: __permb = cmse_TTT(__pb); __perme = __single_check ? __permb : cmse_TTT(__pe); break; #if __ARM_CMSE_SECURE_MODE case CMSE_MPU_NONSECURE: __permb = cmse_TTA(__pb); __perme = __single_check ? __permb : cmse_TTA(__pe); break; case CMSE_MPU_UNPRIV | CMSE_MPU_NONSECURE: __permb = cmse_TTAT(__pb); __perme = __single_check ? __permb : cmse_TTAT(__pe); break; #endif /* if CMSE_NONSECURE is specified w/o __ARM_CMSE_SECURE_MODE */ default: return NULL; } /* check that the range does not cross MPU, SAU, or IDAU region boundaries */ if (__permb.value != __perme.value) return NULL; #if !(__ARM_CMSE_SECURE_MODE) /* CMSE_AU_NONSECURE is only supported when __ARM_FEATURE_CMSE & 0x2 */ if (__flags & CMSE_AU_NONSECURE) return NULL; #endif /* check the permission on the range */ switch (__flags & ~(CMSE_MPU_UNPRIV | CMSE_MPU_NONSECURE)) { #if (__ARM_CMSE_SECURE_MODE) case CMSE_MPU_READ | CMSE_MPU_READWRITE | CMSE_AU_NONSECURE: case CMSE_MPU_READWRITE | CMSE_AU_NONSECURE: return __permb.flags.nonsecure_readwrite_ok ? __pb : NULL; case CMSE_MPU_READ | CMSE_AU_NONSECURE: return __permb.flags.nonsecure_read_ok ? __pb : NULL; case CMSE_AU_NONSECURE: return __permb.flags.secure ? NULL : __pb; #endif case CMSE_MPU_READ | CMSE_MPU_READWRITE: case CMSE_MPU_READWRITE: return __permb.flags.readwrite_ok ? __pb : NULL; case CMSE_MPU_READ: return __permb.flags.read_ok ? __pb : NULL; default: return NULL; } } #if __ARM_CMSE_SECURE_MODE static int __attribute__((__always_inline__, __nodebug__)) cmse_nonsecure_caller(void) { return !((uintptr_t)__builtin_return_address(0) & 1); } #define cmse_nsfptr_create(p) \ __builtin_bit_cast(__typeof__(p), \ (__builtin_bit_cast(uintptr_t, p) & ~(uintptr_t)1)) #define cmse_is_nsfptr(p) ((__builtin_bit_cast(uintptr_t, p) & 1) == 0) #endif /* __ARM_CMSE_SECURE_MODE */ void __attribute__((__noreturn__)) cmse_abort(void); #if defined(__cplusplus) } #endif #endif /* (__ARM_FEATURE_CMSE & 0x1) */ #endif /* __ARM_CMSE_H */ /*===---- bmiintrin.h - BMI intrinsics -------------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #if !defined __X86INTRIN_H && !defined __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __BMIINTRIN_H #define __BMIINTRIN_H /* Allow using the tzcnt intrinsics even for non-BMI targets. Since the TZCNT instruction behaves as BSF on non-BMI targets, there is code that expects to use it as a potentially faster version of BSF. */ #define __RELAXED_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) /// Counts the number of trailing zero bits in the operand. /// /// \headerfile /// /// This intrinsic corresponds to the \c TZCNT instruction. /// /// \param __X /// An unsigned 16-bit integer whose trailing zeros are to be counted. /// \returns An unsigned 16-bit integer containing the number of trailing zero /// bits in the operand. /// \see _tzcnt_u16 static __inline__ unsigned short __RELAXED_FN_ATTRS __tzcnt_u16(unsigned short __X) { return __builtin_ia32_tzcnt_u16(__X); } /// Counts the number of trailing zero bits in the operand. /// /// \headerfile /// /// \code /// unsigned short _tzcnt_u16(unsigned short __X); /// \endcode /// /// This intrinsic corresponds to the \c TZCNT instruction. /// /// \param __X /// An unsigned 16-bit integer whose trailing zeros are to be counted. /// \returns An unsigned 16-bit integer containing the number of trailing zero /// bits in the operand. /// \see __tzcnt_u16 #define _tzcnt_u16 __tzcnt_u16 /// Counts the number of trailing zero bits in the operand. /// /// \headerfile /// /// This intrinsic corresponds to the \c TZCNT instruction. /// /// \param __X /// An unsigned 32-bit integer whose trailing zeros are to be counted. /// \returns An unsigned 32-bit integer containing the number of trailing zero /// bits in the operand. /// \see { _mm_tzcnt_32 _tzcnt_u32 } static __inline__ unsigned int __RELAXED_FN_ATTRS __tzcnt_u32(unsigned int __X) { return __builtin_ia32_tzcnt_u32(__X); } /// Counts the number of trailing zero bits in the operand. /// /// \headerfile /// /// This intrinsic corresponds to the \c TZCNT instruction. /// /// \param __X /// An unsigned 32-bit integer whose trailing zeros are to be counted. /// \returns A 32-bit integer containing the number of trailing zero bits in /// the operand. /// \see { __tzcnt_u32 _tzcnt_u32 } static __inline__ int __RELAXED_FN_ATTRS _mm_tzcnt_32(unsigned int __X) { return (int)__builtin_ia32_tzcnt_u32(__X); } /// Counts the number of trailing zero bits in the operand. /// /// \headerfile /// /// \code /// unsigned int _tzcnt_u32(unsigned int __X); /// \endcode /// /// This intrinsic corresponds to the \c TZCNT instruction. /// /// \param __X /// An unsigned 32-bit integer whose trailing zeros are to be counted. /// \returns An unsigned 32-bit integer containing the number of trailing zero /// bits in the operand. /// \see { _mm_tzcnt_32 __tzcnt_u32 } #define _tzcnt_u32 __tzcnt_u32 #ifdef __x86_64__ /// Counts the number of trailing zero bits in the operand. /// /// \headerfile /// /// This intrinsic corresponds to the \c TZCNT instruction. /// /// \param __X /// An unsigned 64-bit integer whose trailing zeros are to be counted. /// \returns An unsigned 64-bit integer containing the number of trailing zero /// bits in the operand. /// \see { _mm_tzcnt_64 _tzcnt_u64 } static __inline__ unsigned long long __RELAXED_FN_ATTRS __tzcnt_u64(unsigned long long __X) { return __builtin_ia32_tzcnt_u64(__X); } /// Counts the number of trailing zero bits in the operand. /// /// \headerfile /// /// This intrinsic corresponds to the \c TZCNT instruction. /// /// \param __X /// An unsigned 64-bit integer whose trailing zeros are to be counted. /// \returns An 64-bit integer containing the number of trailing zero bits in /// the operand. /// \see { __tzcnt_u64 _tzcnt_u64 } static __inline__ long long __RELAXED_FN_ATTRS _mm_tzcnt_64(unsigned long long __X) { return (long long)__builtin_ia32_tzcnt_u64(__X); } /// Counts the number of trailing zero bits in the operand. /// /// \headerfile /// /// \code /// unsigned long long _tzcnt_u64(unsigned long long __X); /// \endcode /// /// This intrinsic corresponds to the \c TZCNT instruction. /// /// \param __X /// An unsigned 64-bit integer whose trailing zeros are to be counted. /// \returns An unsigned 64-bit integer containing the number of trailing zero /// bits in the operand. /// \see { _mm_tzcnt_64 __tzcnt_u64 #define _tzcnt_u64 __tzcnt_u64 #endif /* __x86_64__ */ #undef __RELAXED_FN_ATTRS #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__BMI__) /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("bmi"))) /// Performs a bitwise AND of the second operand with the one's /// complement of the first operand. /// /// \headerfile /// /// This intrinsic corresponds to the \c ANDN instruction. /// /// \param __X /// An unsigned integer containing one of the operands. /// \param __Y /// An unsigned integer containing one of the operands. /// \returns An unsigned integer containing the bitwise AND of the second /// operand with the one's complement of the first operand. /// \see _andn_u32 static __inline__ unsigned int __DEFAULT_FN_ATTRS __andn_u32(unsigned int __X, unsigned int __Y) { return ~__X & __Y; } /// Performs a bitwise AND of the second operand with the one's /// complement of the first operand. /// /// \headerfile /// /// \code /// unsigned int _andn_u32(unsigned int __X, unsigned int __Y); /// \endcode /// /// This intrinsic corresponds to the \c ANDN instruction. /// /// \param __X /// An unsigned integer containing one of the operands. /// \param __Y /// An unsigned integer containing one of the operands. /// \returns An unsigned integer containing the bitwise AND of the second /// operand with the one's complement of the first operand. /// \see __andn_u32 #define _andn_u32 __andn_u32 /* AMD-specified, double-leading-underscore version of BEXTR */ /// Extracts the specified bits from the first operand and returns them /// in the least significant bits of the result. /// /// \headerfile /// /// This intrinsic corresponds to the \c BEXTR instruction. /// /// \param __X /// An unsigned integer whose bits are to be extracted. /// \param __Y /// An unsigned integer used to specify which bits are extracted. Bits [7:0] /// specify the index of the least significant bit. Bits [15:8] specify the /// number of bits to be extracted. /// \returns An unsigned integer whose least significant bits contain the /// extracted bits. /// \see _bextr_u32 static __inline__ unsigned int __DEFAULT_FN_ATTRS __bextr_u32(unsigned int __X, unsigned int __Y) { return __builtin_ia32_bextr_u32(__X, __Y); } /* Intel-specified, single-leading-underscore version of BEXTR */ /// Extracts the specified bits from the first operand and returns them /// in the least significant bits of the result. /// /// \headerfile /// /// This intrinsic corresponds to the \c BEXTR instruction. /// /// \param __X /// An unsigned integer whose bits are to be extracted. /// \param __Y /// An unsigned integer used to specify the index of the least significant /// bit for the bits to be extracted. Bits [7:0] specify the index. /// \param __Z /// An unsigned integer used to specify the number of bits to be extracted. /// Bits [7:0] specify the number of bits. /// \returns An unsigned integer whose least significant bits contain the /// extracted bits. /// \see __bextr_u32 static __inline__ unsigned int __DEFAULT_FN_ATTRS _bextr_u32(unsigned int __X, unsigned int __Y, unsigned int __Z) { return __builtin_ia32_bextr_u32 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8))); } /* Intel-specified, single-leading-underscore version of BEXTR2 */ /// Extracts the specified bits from the first operand and returns them /// in the least significant bits of the result. /// /// \headerfile /// /// This intrinsic corresponds to the \c BEXTR instruction. /// /// \param __X /// An unsigned integer whose bits are to be extracted. /// \param __Y /// An unsigned integer used to specify which bits are extracted. Bits [7:0] /// specify the index of the least significant bit. Bits [15:8] specify the /// number of bits to be extracted. /// \returns An unsigned integer whose least significant bits contain the /// extracted bits. /// \see __bextr_u32 static __inline__ unsigned int __DEFAULT_FN_ATTRS _bextr2_u32(unsigned int __X, unsigned int __Y) { return __builtin_ia32_bextr_u32(__X, __Y); } /// Clears all bits in the source except for the least significant bit /// containing a value of 1 and returns the result. /// /// \headerfile /// /// This intrinsic corresponds to the \c BLSI instruction. /// /// \param __X /// An unsigned integer whose bits are to be cleared. /// \returns An unsigned integer containing the result of clearing the bits from /// the source operand. /// \see _blsi_u32 static __inline__ unsigned int __DEFAULT_FN_ATTRS __blsi_u32(unsigned int __X) { return __X & -__X; } /// Clears all bits in the source except for the least significant bit /// containing a value of 1 and returns the result. /// /// \headerfile /// /// \code /// unsigned int _blsi_u32(unsigned int __X); /// \endcode /// /// This intrinsic corresponds to the \c BLSI instruction. /// /// \param __X /// An unsigned integer whose bits are to be cleared. /// \returns An unsigned integer containing the result of clearing the bits from /// the source operand. /// \see __blsi_u32 #define _blsi_u32 __blsi_u32 /// Creates a mask whose bits are set to 1, using bit 0 up to and /// including the least significant bit that is set to 1 in the source /// operand and returns the result. /// /// \headerfile /// /// This intrinsic corresponds to the \c BLSMSK instruction. /// /// \param __X /// An unsigned integer used to create the mask. /// \returns An unsigned integer containing the newly created mask. /// \see _blsmsk_u32 static __inline__ unsigned int __DEFAULT_FN_ATTRS __blsmsk_u32(unsigned int __X) { return __X ^ (__X - 1); } /// Creates a mask whose bits are set to 1, using bit 0 up to and /// including the least significant bit that is set to 1 in the source /// operand and returns the result. /// /// \headerfile /// /// \code /// unsigned int _blsmsk_u32(unsigned int __X); /// \endcode /// /// This intrinsic corresponds to the \c BLSMSK instruction. /// /// \param __X /// An unsigned integer used to create the mask. /// \returns An unsigned integer containing the newly created mask. /// \see __blsmsk_u32 #define _blsmsk_u32 __blsmsk_u32 /// Clears the least significant bit that is set to 1 in the source /// operand and returns the result. /// /// \headerfile /// /// This intrinsic corresponds to the \c BLSR instruction. /// /// \param __X /// An unsigned integer containing the operand to be cleared. /// \returns An unsigned integer containing the result of clearing the source /// operand. /// \see _blsr_u32 static __inline__ unsigned int __DEFAULT_FN_ATTRS __blsr_u32(unsigned int __X) { return __X & (__X - 1); } /// Clears the least significant bit that is set to 1 in the source /// operand and returns the result. /// /// \headerfile /// /// \code /// unsigned int _bls4_u32(unsigned int __X); /// \endcode /// /// This intrinsic corresponds to the \c BLSR instruction. /// /// \param __X /// An unsigned integer containing the operand to be cleared. /// \returns An unsigned integer containing the result of clearing the source /// operand. /// \see __blsr_u32 #define _blsr_u32 __blsr_u32 #ifdef __x86_64__ /// Performs a bitwise AND of the second operand with the one's /// complement of the first operand. /// /// \headerfile /// /// This intrinsic corresponds to the \c ANDN instruction. /// /// \param __X /// An unsigned 64-bit integer containing one of the operands. /// \param __Y /// An unsigned 64-bit integer containing one of the operands. /// \returns An unsigned 64-bit integer containing the bitwise AND of the second /// operand with the one's complement of the first operand. /// \see _andn_u64 static __inline__ unsigned long long __DEFAULT_FN_ATTRS __andn_u64 (unsigned long long __X, unsigned long long __Y) { return ~__X & __Y; } /// Performs a bitwise AND of the second operand with the one's /// complement of the first operand. /// /// \headerfile /// /// \code /// unsigned long long _andn_u64(unsigned long long __X, /// unsigned long long __Y); /// \endcode /// /// This intrinsic corresponds to the \c ANDN instruction. /// /// \param __X /// An unsigned 64-bit integer containing one of the operands. /// \param __Y /// An unsigned 64-bit integer containing one of the operands. /// \returns An unsigned 64-bit integer containing the bitwise AND of the second /// operand with the one's complement of the first operand. /// \see __andn_u64 #define _andn_u64 __andn_u64 /* AMD-specified, double-leading-underscore version of BEXTR */ /// Extracts the specified bits from the first operand and returns them /// in the least significant bits of the result. /// /// \headerfile /// /// This intrinsic corresponds to the \c BEXTR instruction. /// /// \param __X /// An unsigned 64-bit integer whose bits are to be extracted. /// \param __Y /// An unsigned 64-bit integer used to specify which bits are extracted. Bits /// [7:0] specify the index of the least significant bit. Bits [15:8] specify /// the number of bits to be extracted. /// \returns An unsigned 64-bit integer whose least significant bits contain the /// extracted bits. /// \see _bextr_u64 static __inline__ unsigned long long __DEFAULT_FN_ATTRS __bextr_u64(unsigned long long __X, unsigned long long __Y) { return __builtin_ia32_bextr_u64(__X, __Y); } /* Intel-specified, single-leading-underscore version of BEXTR */ /// Extracts the specified bits from the first operand and returns them /// in the least significant bits of the result. /// /// \headerfile /// /// This intrinsic corresponds to the \c BEXTR instruction. /// /// \param __X /// An unsigned 64-bit integer whose bits are to be extracted. /// \param __Y /// An unsigned integer used to specify the index of the least significant /// bit for the bits to be extracted. Bits [7:0] specify the index. /// \param __Z /// An unsigned integer used to specify the number of bits to be extracted. /// Bits [7:0] specify the number of bits. /// \returns An unsigned 64-bit integer whose least significant bits contain the /// extracted bits. /// \see __bextr_u64 static __inline__ unsigned long long __DEFAULT_FN_ATTRS _bextr_u64(unsigned long long __X, unsigned int __Y, unsigned int __Z) { return __builtin_ia32_bextr_u64 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8))); } /* Intel-specified, single-leading-underscore version of BEXTR2 */ /// Extracts the specified bits from the first operand and returns them /// in the least significant bits of the result. /// /// \headerfile /// /// This intrinsic corresponds to the \c BEXTR instruction. /// /// \param __X /// An unsigned 64-bit integer whose bits are to be extracted. /// \param __Y /// An unsigned 64-bit integer used to specify which bits are extracted. Bits /// [7:0] specify the index of the least significant bit. Bits [15:8] specify /// the number of bits to be extracted. /// \returns An unsigned 64-bit integer whose least significant bits contain the /// extracted bits. /// \see __bextr_u64 static __inline__ unsigned long long __DEFAULT_FN_ATTRS _bextr2_u64(unsigned long long __X, unsigned long long __Y) { return __builtin_ia32_bextr_u64(__X, __Y); } /// Clears all bits in the source except for the least significant bit /// containing a value of 1 and returns the result. /// /// \headerfile /// /// This intrinsic corresponds to the \c BLSI instruction. /// /// \param __X /// An unsigned 64-bit integer whose bits are to be cleared. /// \returns An unsigned 64-bit integer containing the result of clearing the /// bits from the source operand. /// \see _blsi_u64 static __inline__ unsigned long long __DEFAULT_FN_ATTRS __blsi_u64(unsigned long long __X) { return __X & -__X; } /// Clears all bits in the source except for the least significant bit /// containing a value of 1 and returns the result. /// /// \headerfile /// /// \code /// unsigned long long _blsi_u64(unsigned long long __X); /// \endcode /// /// This intrinsic corresponds to the \c BLSI instruction. /// /// \param __X /// An unsigned 64-bit integer whose bits are to be cleared. /// \returns An unsigned 64-bit integer containing the result of clearing the /// bits from the source operand. /// \see __blsi_u64 #define _blsi_u64 __blsi_u64 /// Creates a mask whose bits are set to 1, using bit 0 up to and /// including the least significant bit that is set to 1 in the source /// operand and returns the result. /// /// \headerfile /// /// This intrinsic corresponds to the \c BLSMSK instruction. /// /// \param __X /// An unsigned 64-bit integer used to create the mask. /// \returns An unsigned 64-bit integer containing the newly created mask. /// \see _blsmsk_u64 static __inline__ unsigned long long __DEFAULT_FN_ATTRS __blsmsk_u64(unsigned long long __X) { return __X ^ (__X - 1); } /// Creates a mask whose bits are set to 1, using bit 0 up to and /// including the least significant bit that is set to 1 in the source /// operand and returns the result. /// /// \headerfile /// /// \code /// unsigned long long _blsmsk_u64(unsigned long long __X); /// \endcode /// /// This intrinsic corresponds to the \c BLSMSK instruction. /// /// \param __X /// An unsigned 64-bit integer used to create the mask. /// \returns An unsigned 64-bit integer containing the newly created mask. /// \see __blsmsk_u64 #define _blsmsk_u64 __blsmsk_u64 /// Clears the least significant bit that is set to 1 in the source /// operand and returns the result. /// /// \headerfile /// /// This intrinsic corresponds to the \c BLSR instruction. /// /// \param __X /// An unsigned 64-bit integer containing the operand to be cleared. /// \returns An unsigned 64-bit integer containing the result of clearing the /// source operand. /// \see _blsr_u64 static __inline__ unsigned long long __DEFAULT_FN_ATTRS __blsr_u64(unsigned long long __X) { return __X & (__X - 1); } /// Clears the least significant bit that is set to 1 in the source /// operand and returns the result. /// /// \headerfile /// /// \code /// unsigned long long _blsr_u64(unsigned long long __X); /// \endcode /// /// This intrinsic corresponds to the \c BLSR instruction. /// /// \param __X /// An unsigned 64-bit integer containing the operand to be cleared. /// \returns An unsigned 64-bit integer containing the result of clearing the /// source operand. /// \see __blsr_u64 #define _blsr_u64 __blsr_u64 #endif /* __x86_64__ */ #undef __DEFAULT_FN_ATTRS #endif /* !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) \ || defined(__BMI__) */ #endif /* __BMIINTRIN_H */ /*===---- builtins.h - Standard header for extra builtins -----------------===*\ * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * \*===----------------------------------------------------------------------===*/ /// Some legacy compilers have builtin definitions in a file named builtins.h. /// This header file has been added to allow compatibility with code that was /// written for those compilers. Code may have an include line for this file /// and to avoid an error an empty file with this name is provided. #ifndef __BUILTINS_H #define __BUILTINS_H #endif /* __BUILTINS_H */ hexagon_types.hhvx_hexagon_protos.hmmintrin.hs390intrin.hstdnoreturn.h/*===---- stdnoreturn.h - Standard header for noreturn macro ---------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __STDNORETURN_H #define __STDNORETURN_H #define noreturn _Noreturn #define __noreturn_is_defined 1 #if (defined(__STDC_VERSION__) && __STDC_VERSION__ > 201710L) && \ !defined(_CLANG_DISABLE_CRT_DEPRECATION_WARNINGS) /* The noreturn macro is deprecated in C23. We do not mark it as such because including the header file in C23 is also deprecated and we do not want to issue a confusing diagnostic for code which includes followed by code that writes [[noreturn]]. The issue with such code is not with the attribute, or the use of 'noreturn', but the inclusion of the header. */ /* FIXME: We should be issuing a deprecation warning here, but cannot yet due * to system headers which include this header file unconditionally. */ #endif #endif /* __STDNORETURN_H */ tmmintrin.hvarargs.h/*===---- vecintrin.h - Vector intrinsics ----------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #if defined(__s390x__) && defined(__VEC__) #define __ATTRS_ai __attribute__((__always_inline__)) #define __ATTRS_o __attribute__((__overloadable__)) #define __ATTRS_o_ai __attribute__((__overloadable__, __always_inline__)) #define __constant(PARM) \ __attribute__((__enable_if__ ((PARM) == (PARM), \ "argument must be a constant integer"))) #define __constant_range(PARM, LOW, HIGH) \ __attribute__((__enable_if__ ((PARM) >= (LOW) && (PARM) <= (HIGH), \ "argument must be a constant integer from " #LOW " to " #HIGH))) #define __constant_pow2_range(PARM, LOW, HIGH) \ __attribute__((__enable_if__ ((PARM) >= (LOW) && (PARM) <= (HIGH) && \ ((PARM) & ((PARM) - 1)) == 0, \ "argument must be a constant power of 2 from " #LOW " to " #HIGH))) /*-- __lcbb -----------------------------------------------------------------*/ extern __ATTRS_o unsigned int __lcbb(const void *__ptr, unsigned short __len) __constant_pow2_range(__len, 64, 4096); #define __lcbb(X, Y) ((__typeof__((__lcbb)((X), (Y)))) \ __builtin_s390_lcbb((X), __builtin_constant_p((Y))? \ ((Y) == 64 ? 0 : \ (Y) == 128 ? 1 : \ (Y) == 256 ? 2 : \ (Y) == 512 ? 3 : \ (Y) == 1024 ? 4 : \ (Y) == 2048 ? 5 : \ (Y) == 4096 ? 6 : 0) : 0)) /*-- vec_extract ------------------------------------------------------------*/ static inline __ATTRS_o_ai signed char vec_extract(__vector signed char __vec, int __index) { return __vec[__index & 15]; } static inline __ATTRS_o_ai unsigned char vec_extract(__vector __bool char __vec, int __index) { return __vec[__index & 15]; } static inline __ATTRS_o_ai unsigned char vec_extract(__vector unsigned char __vec, int __index) { return __vec[__index & 15]; } static inline __ATTRS_o_ai signed short vec_extract(__vector signed short __vec, int __index) { return __vec[__index & 7]; } static inline __ATTRS_o_ai unsigned short vec_extract(__vector __bool short __vec, int __index) { return __vec[__index & 7]; } static inline __ATTRS_o_ai unsigned short vec_extract(__vector unsigned short __vec, int __index) { return __vec[__index & 7]; } static inline __ATTRS_o_ai signed int vec_extract(__vector signed int __vec, int __index) { return __vec[__index & 3]; } static inline __ATTRS_o_ai unsigned int vec_extract(__vector __bool int __vec, int __index) { return __vec[__index & 3]; } static inline __ATTRS_o_ai unsigned int vec_extract(__vector unsigned int __vec, int __index) { return __vec[__index & 3]; } static inline __ATTRS_o_ai signed long long vec_extract(__vector signed long long __vec, int __index) { return __vec[__index & 1]; } static inline __ATTRS_o_ai unsigned long long vec_extract(__vector __bool long long __vec, int __index) { return __vec[__index & 1]; } static inline __ATTRS_o_ai unsigned long long vec_extract(__vector unsigned long long __vec, int __index) { return __vec[__index & 1]; } #if __ARCH__ >= 12 static inline __ATTRS_o_ai float vec_extract(__vector float __vec, int __index) { return __vec[__index & 3]; } #endif static inline __ATTRS_o_ai double vec_extract(__vector double __vec, int __index) { return __vec[__index & 1]; } /*-- vec_insert -------------------------------------------------------------*/ static inline __ATTRS_o_ai __vector signed char vec_insert(signed char __scalar, __vector signed char __vec, int __index) { __vec[__index & 15] = __scalar; return __vec; } // This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned char vec_insert(unsigned char __scalar, __vector __bool char __vec, int __index) { __vector unsigned char __newvec = (__vector unsigned char)__vec; __newvec[__index & 15] = (unsigned char)__scalar; return __newvec; } static inline __ATTRS_o_ai __vector unsigned char vec_insert(unsigned char __scalar, __vector unsigned char __vec, int __index) { __vec[__index & 15] = __scalar; return __vec; } static inline __ATTRS_o_ai __vector signed short vec_insert(signed short __scalar, __vector signed short __vec, int __index) { __vec[__index & 7] = __scalar; return __vec; } // This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned short vec_insert(unsigned short __scalar, __vector __bool short __vec, int __index) { __vector unsigned short __newvec = (__vector unsigned short)__vec; __newvec[__index & 7] = (unsigned short)__scalar; return __newvec; } static inline __ATTRS_o_ai __vector unsigned short vec_insert(unsigned short __scalar, __vector unsigned short __vec, int __index) { __vec[__index & 7] = __scalar; return __vec; } static inline __ATTRS_o_ai __vector signed int vec_insert(signed int __scalar, __vector signed int __vec, int __index) { __vec[__index & 3] = __scalar; return __vec; } // This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned int vec_insert(unsigned int __scalar, __vector __bool int __vec, int __index) { __vector unsigned int __newvec = (__vector unsigned int)__vec; __newvec[__index & 3] = __scalar; return __newvec; } static inline __ATTRS_o_ai __vector unsigned int vec_insert(unsigned int __scalar, __vector unsigned int __vec, int __index) { __vec[__index & 3] = __scalar; return __vec; } static inline __ATTRS_o_ai __vector signed long long vec_insert(signed long long __scalar, __vector signed long long __vec, int __index) { __vec[__index & 1] = __scalar; return __vec; } // This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned long long vec_insert(unsigned long long __scalar, __vector __bool long long __vec, int __index) { __vector unsigned long long __newvec = (__vector unsigned long long)__vec; __newvec[__index & 1] = __scalar; return __newvec; } static inline __ATTRS_o_ai __vector unsigned long long vec_insert(unsigned long long __scalar, __vector unsigned long long __vec, int __index) { __vec[__index & 1] = __scalar; return __vec; } #if __ARCH__ >= 12 static inline __ATTRS_o_ai __vector float vec_insert(float __scalar, __vector float __vec, int __index) { __vec[__index & 1] = __scalar; return __vec; } #endif static inline __ATTRS_o_ai __vector double vec_insert(double __scalar, __vector double __vec, int __index) { __vec[__index & 1] = __scalar; return __vec; } /*-- vec_promote ------------------------------------------------------------*/ static inline __ATTRS_o_ai __vector signed char vec_promote(signed char __scalar, int __index) { const __vector signed char __zero = (__vector signed char)0; __vector signed char __vec = __builtin_shufflevector(__zero, __zero, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); __vec[__index & 15] = __scalar; return __vec; } static inline __ATTRS_o_ai __vector unsigned char vec_promote(unsigned char __scalar, int __index) { const __vector unsigned char __zero = (__vector unsigned char)0; __vector unsigned char __vec = __builtin_shufflevector(__zero, __zero, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); __vec[__index & 15] = __scalar; return __vec; } static inline __ATTRS_o_ai __vector signed short vec_promote(signed short __scalar, int __index) { const __vector signed short __zero = (__vector signed short)0; __vector signed short __vec = __builtin_shufflevector(__zero, __zero, -1, -1, -1, -1, -1, -1, -1, -1); __vec[__index & 7] = __scalar; return __vec; } static inline __ATTRS_o_ai __vector unsigned short vec_promote(unsigned short __scalar, int __index) { const __vector unsigned short __zero = (__vector unsigned short)0; __vector unsigned short __vec = __builtin_shufflevector(__zero, __zero, -1, -1, -1, -1, -1, -1, -1, -1); __vec[__index & 7] = __scalar; return __vec; } static inline __ATTRS_o_ai __vector signed int vec_promote(signed int __scalar, int __index) { const __vector signed int __zero = (__vector signed int)0; __vector signed int __vec = __builtin_shufflevector(__zero, __zero, -1, -1, -1, -1); __vec[__index & 3] = __scalar; return __vec; } static inline __ATTRS_o_ai __vector unsigned int vec_promote(unsigned int __scalar, int __index) { const __vector unsigned int __zero = (__vector unsigned int)0; __vector unsigned int __vec = __builtin_shufflevector(__zero, __zero, -1, -1, -1, -1); __vec[__index & 3] = __scalar; return __vec; } static inline __ATTRS_o_ai __vector signed long long vec_promote(signed long long __scalar, int __index) { const __vector signed long long __zero = (__vector signed long long)0; __vector signed long long __vec = __builtin_shufflevector(__zero, __zero, -1, -1); __vec[__index & 1] = __scalar; return __vec; } static inline __ATTRS_o_ai __vector unsigned long long vec_promote(unsigned long long __scalar, int __index) { const __vector unsigned long long __zero = (__vector unsigned long long)0; __vector unsigned long long __vec = __builtin_shufflevector(__zero, __zero, -1, -1); __vec[__index & 1] = __scalar; return __vec; } #if __ARCH__ >= 12 static inline __ATTRS_o_ai __vector float vec_promote(float __scalar, int __index) { const __vector float __zero = (__vector float)0.0f; __vector float __vec = __builtin_shufflevector(__zero, __zero, -1, -1, -1, -1); __vec[__index & 3] = __scalar; return __vec; } #endif static inline __ATTRS_o_ai __vector double vec_promote(double __scalar, int __index) { const __vector double __zero = (__vector double)0.0; __vector double __vec = __builtin_shufflevector(__zero, __zero, -1, -1); __vec[__index & 1] = __scalar; return __vec; } /*-- vec_insert_and_zero ----------------------------------------------------*/ static inline __ATTRS_o_ai __vector signed char vec_insert_and_zero(const signed char *__ptr) { __vector signed char __vec = (__vector signed char)0; __vec[7] = *__ptr; return __vec; } static inline __ATTRS_o_ai __vector unsigned char vec_insert_and_zero(const unsigned char *__ptr) { __vector unsigned char __vec = (__vector unsigned char)0; __vec[7] = *__ptr; return __vec; } static inline __ATTRS_o_ai __vector signed short vec_insert_and_zero(const signed short *__ptr) { __vector signed short __vec = (__vector signed short)0; __vec[3] = *__ptr; return __vec; } static inline __ATTRS_o_ai __vector unsigned short vec_insert_and_zero(const unsigned short *__ptr) { __vector unsigned short __vec = (__vector unsigned short)0; __vec[3] = *__ptr; return __vec; } static inline __ATTRS_o_ai __vector signed int vec_insert_and_zero(const signed int *__ptr) { __vector signed int __vec = (__vector signed int)0; __vec[1] = *__ptr; return __vec; } static inline __ATTRS_o_ai __vector unsigned int vec_insert_and_zero(const unsigned int *__ptr) { __vector unsigned int __vec = (__vector unsigned int)0; __vec[1] = *__ptr; return __vec; } static inline __ATTRS_o_ai __vector signed long long vec_insert_and_zero(const signed long long *__ptr) { __vector signed long long __vec = (__vector signed long long)0; __vec[0] = *__ptr; return __vec; } static inline __ATTRS_o_ai __vector unsigned long long vec_insert_and_zero(const unsigned long long *__ptr) { __vector unsigned long long __vec = (__vector unsigned long long)0; __vec[0] = *__ptr; return __vec; } #if __ARCH__ >= 12 static inline __ATTRS_o_ai __vector float vec_insert_and_zero(const float *__ptr) { __vector float __vec = (__vector float)0.0f; __vec[1] = *__ptr; return __vec; } #endif static inline __ATTRS_o_ai __vector double vec_insert_and_zero(const double *__ptr) { __vector double __vec = (__vector double)0.0; __vec[0] = *__ptr; return __vec; } /*-- vec_perm ---------------------------------------------------------------*/ static inline __ATTRS_o_ai __vector signed char vec_perm(__vector signed char __a, __vector signed char __b, __vector unsigned char __c) { return (__vector signed char)__builtin_s390_vperm( (__vector unsigned char)__a, (__vector unsigned char)__b, __c); } static inline __ATTRS_o_ai __vector unsigned char vec_perm(__vector unsigned char __a, __vector unsigned char __b, __vector unsigned char __c) { return (__vector unsigned char)__builtin_s390_vperm( (__vector unsigned char)__a, (__vector unsigned char)__b, __c); } static inline __ATTRS_o_ai __vector __bool char vec_perm(__vector __bool char __a, __vector __bool char __b, __vector unsigned char __c) { return (__vector __bool char)__builtin_s390_vperm( (__vector unsigned char)__a, (__vector unsigned char)__b, __c); } static inline __ATTRS_o_ai __vector signed short vec_perm(__vector signed short __a, __vector signed short __b, __vector unsigned char __c) { return (__vector signed short)__builtin_s390_vperm( (__vector unsigned char)__a, (__vector unsigned char)__b, __c); } static inline __ATTRS_o_ai __vector unsigned short vec_perm(__vector unsigned short __a, __vector unsigned short __b, __vector unsigned char __c) { return (__vector unsigned short)__builtin_s390_vperm( (__vector unsigned char)__a, (__vector unsigned char)__b, __c); } static inline __ATTRS_o_ai __vector __bool short vec_perm(__vector __bool short __a, __vector __bool short __b, __vector unsigned char __c) { return (__vector __bool short)__builtin_s390_vperm( (__vector unsigned char)__a, (__vector unsigned char)__b, __c); } static inline __ATTRS_o_ai __vector signed int vec_perm(__vector signed int __a, __vector signed int __b, __vector unsigned char __c) { return (__vector signed int)__builtin_s390_vperm( (__vector unsigned char)__a, (__vector unsigned char)__b, __c); } static inline __ATTRS_o_ai __vector unsigned int vec_perm(__vector unsigned int __a, __vector unsigned int __b, __vector unsigned char __c) { return (__vector unsigned int)__builtin_s390_vperm( (__vector unsigned char)__a, (__vector unsigned char)__b, __c); } static inline __ATTRS_o_ai __vector __bool int vec_perm(__vector __bool int __a, __vector __bool int __b, __vector unsigned char __c) { return (__vector __bool int)__builtin_s390_vperm( (__vector unsigned char)__a, (__vector unsigned char)__b, __c); } static inline __ATTRS_o_ai __vector signed long long vec_perm(__vector signed long long __a, __vector signed long long __b, __vector unsigned char __c) { return (__vector signed long long)__builtin_s390_vperm( (__vector unsigned char)__a, (__vector unsigned char)__b, __c); } static inline __ATTRS_o_ai __vector unsigned long long vec_perm(__vector unsigned long long __a, __vector unsigned long long __b, __vector unsigned char __c) { return (__vector unsigned long long)__builtin_s390_vperm( (__vector unsigned char)__a, (__vector unsigned char)__b, __c); } static inline __ATTRS_o_ai __vector __bool long long vec_perm(__vector __bool long long __a, __vector __bool long long __b, __vector unsigned char __c) { return (__vector __bool long long)__builtin_s390_vperm( (__vector unsigned char)__a, (__vector unsigned char)__b, __c); } #if __ARCH__ >= 12 static inline __ATTRS_o_ai __vector float vec_perm(__vector float __a, __vector float __b, __vector unsigned char __c) { return (__vector float)__builtin_s390_vperm( (__vector unsigned char)__a, (__vector unsigned char)__b, __c); } #endif static inline __ATTRS_o_ai __vector double vec_perm(__vector double __a, __vector double __b, __vector unsigned char __c) { return (__vector double)__builtin_s390_vperm( (__vector unsigned char)__a, (__vector unsigned char)__b, __c); } /*-- vec_permi --------------------------------------------------------------*/ // This prototype is deprecated. extern __ATTRS_o __vector signed long long vec_permi(__vector signed long long __a, __vector signed long long __b, int __c) __constant_range(__c, 0, 3); // This prototype is deprecated. extern __ATTRS_o __vector unsigned long long vec_permi(__vector unsigned long long __a, __vector unsigned long long __b, int __c) __constant_range(__c, 0, 3); // This prototype is deprecated. extern __ATTRS_o __vector __bool long long vec_permi(__vector __bool long long __a, __vector __bool long long __b, int __c) __constant_range(__c, 0, 3); // This prototype is deprecated. extern __ATTRS_o __vector double vec_permi(__vector double __a, __vector double __b, int __c) __constant_range(__c, 0, 3); #define vec_permi(X, Y, Z) ((__typeof__((vec_permi)((X), (Y), (Z)))) \ __builtin_s390_vpdi((__vector unsigned long long)(X), \ (__vector unsigned long long)(Y), \ (((Z) & 2) << 1) | ((Z) & 1))) /*-- vec_bperm_u128 ---------------------------------------------------------*/ #if __ARCH__ >= 12 static inline __ATTRS_ai __vector unsigned long long vec_bperm_u128(__vector unsigned char __a, __vector unsigned char __b) { return __builtin_s390_vbperm(__a, __b); } #endif /*-- vec_revb ---------------------------------------------------------------*/ static inline __ATTRS_o_ai __vector signed short vec_revb(__vector signed short __vec) { return (__vector signed short) __builtin_s390_vlbrh((__vector unsigned short)__vec); } static inline __ATTRS_o_ai __vector unsigned short vec_revb(__vector unsigned short __vec) { return __builtin_s390_vlbrh(__vec); } static inline __ATTRS_o_ai __vector signed int vec_revb(__vector signed int __vec) { return (__vector signed int) __builtin_s390_vlbrf((__vector unsigned int)__vec); } static inline __ATTRS_o_ai __vector unsigned int vec_revb(__vector unsigned int __vec) { return __builtin_s390_vlbrf(__vec); } static inline __ATTRS_o_ai __vector signed long long vec_revb(__vector signed long long __vec) { return (__vector signed long long) __builtin_s390_vlbrg((__vector unsigned long long)__vec); } static inline __ATTRS_o_ai __vector unsigned long long vec_revb(__vector unsigned long long __vec) { return __builtin_s390_vlbrg(__vec); } #if __ARCH__ >= 12 static inline __ATTRS_o_ai __vector float vec_revb(__vector float __vec) { return (__vector float) __builtin_s390_vlbrf((__vector unsigned int)__vec); } #endif static inline __ATTRS_o_ai __vector double vec_revb(__vector double __vec) { return (__vector double) __builtin_s390_vlbrg((__vector unsigned long long)__vec); } /*-- vec_reve ---------------------------------------------------------------*/ static inline __ATTRS_o_ai __vector signed char vec_reve(__vector signed char __vec) { return (__vector signed char) { __vec[15], __vec[14], __vec[13], __vec[12], __vec[11], __vec[10], __vec[9], __vec[8], __vec[7], __vec[6], __vec[5], __vec[4], __vec[3], __vec[2], __vec[1], __vec[0] }; } static inline __ATTRS_o_ai __vector unsigned char vec_reve(__vector unsigned char __vec) { return (__vector unsigned char) { __vec[15], __vec[14], __vec[13], __vec[12], __vec[11], __vec[10], __vec[9], __vec[8], __vec[7], __vec[6], __vec[5], __vec[4], __vec[3], __vec[2], __vec[1], __vec[0] }; } static inline __ATTRS_o_ai __vector __bool char vec_reve(__vector __bool char __vec) { return (__vector __bool char) { __vec[15], __vec[14], __vec[13], __vec[12], __vec[11], __vec[10], __vec[9], __vec[8], __vec[7], __vec[6], __vec[5], __vec[4], __vec[3], __vec[2], __vec[1], __vec[0] }; } static inline __ATTRS_o_ai __vector signed short vec_reve(__vector signed short __vec) { return (__vector signed short) { __vec[7], __vec[6], __vec[5], __vec[4], __vec[3], __vec[2], __vec[1], __vec[0] }; } static inline __ATTRS_o_ai __vector unsigned short vec_reve(__vector unsigned short __vec) { return (__vector unsigned short) { __vec[7], __vec[6], __vec[5], __vec[4], __vec[3], __vec[2], __vec[1], __vec[0] }; } static inline __ATTRS_o_ai __vector __bool short vec_reve(__vector __bool short __vec) { return (__vector __bool short) { __vec[7], __vec[6], __vec[5], __vec[4], __vec[3], __vec[2], __vec[1], __vec[0] }; } static inline __ATTRS_o_ai __vector signed int vec_reve(__vector signed int __vec) { return (__vector signed int) { __vec[3], __vec[2], __vec[1], __vec[0] }; } static inline __ATTRS_o_ai __vector unsigned int vec_reve(__vector unsigned int __vec) { return (__vector unsigned int) { __vec[3], __vec[2], __vec[1], __vec[0] }; } static inline __ATTRS_o_ai __vector __bool int vec_reve(__vector __bool int __vec) { return (__vector __bool int) { __vec[3], __vec[2], __vec[1], __vec[0] }; } static inline __ATTRS_o_ai __vector signed long long vec_reve(__vector signed long long __vec) { return (__vector signed long long) { __vec[1], __vec[0] }; } static inline __ATTRS_o_ai __vector unsigned long long vec_reve(__vector unsigned long long __vec) { return (__vector unsigned long long) { __vec[1], __vec[0] }; } static inline __ATTRS_o_ai __vector __bool long long vec_reve(__vector __bool long long __vec) { return (__vector __bool long long) { __vec[1], __vec[0] }; } #if __ARCH__ >= 12 static inline __ATTRS_o_ai __vector float vec_reve(__vector float __vec) { return (__vector float) { __vec[3], __vec[2], __vec[1], __vec[0] }; } #endif static inline __ATTRS_o_ai __vector double vec_reve(__vector double __vec) { return (__vector double) { __vec[1], __vec[0] }; } /*-- vec_sel ----------------------------------------------------------------*/ static inline __ATTRS_o_ai __vector signed char vec_sel(__vector signed char __a, __vector signed char __b, __vector unsigned char __c) { return (((__vector signed char)__c & __b) | (~(__vector signed char)__c & __a)); } static inline __ATTRS_o_ai __vector signed char vec_sel(__vector signed char __a, __vector signed char __b, __vector __bool char __c) { return (((__vector signed char)__c & __b) | (~(__vector signed char)__c & __a)); } static inline __ATTRS_o_ai __vector __bool char vec_sel(__vector __bool char __a, __vector __bool char __b, __vector unsigned char __c) { return (((__vector __bool char)__c & __b) | (~(__vector __bool char)__c & __a)); } static inline __ATTRS_o_ai __vector __bool char vec_sel(__vector __bool char __a, __vector __bool char __b, __vector __bool char __c) { return (__c & __b) | (~__c & __a); } static inline __ATTRS_o_ai __vector unsigned char vec_sel(__vector unsigned char __a, __vector unsigned char __b, __vector unsigned char __c) { return (__c & __b) | (~__c & __a); } static inline __ATTRS_o_ai __vector unsigned char vec_sel(__vector unsigned char __a, __vector unsigned char __b, __vector __bool char __c) { return (((__vector unsigned char)__c & __b) | (~(__vector unsigned char)__c & __a)); } static inline __ATTRS_o_ai __vector signed short vec_sel(__vector signed short __a, __vector signed short __b, __vector unsigned short __c) { return (((__vector signed short)__c & __b) | (~(__vector signed short)__c & __a)); } static inline __ATTRS_o_ai __vector signed short vec_sel(__vector signed short __a, __vector signed short __b, __vector __bool short __c) { return (((__vector signed short)__c & __b) | (~(__vector signed short)__c & __a)); } static inline __ATTRS_o_ai __vector __bool short vec_sel(__vector __bool short __a, __vector __bool short __b, __vector unsigned short __c) { return (((__vector __bool short)__c & __b) | (~(__vector __bool short)__c & __a)); } static inline __ATTRS_o_ai __vector __bool short vec_sel(__vector __bool short __a, __vector __bool short __b, __vector __bool short __c) { return (__c & __b) | (~__c & __a); } static inline __ATTRS_o_ai __vector unsigned short vec_sel(__vector unsigned short __a, __vector unsigned short __b, __vector unsigned short __c) { return (__c & __b) | (~__c & __a); } static inline __ATTRS_o_ai __vector unsigned short vec_sel(__vector unsigned short __a, __vector unsigned short __b, __vector __bool short __c) { return (((__vector unsigned short)__c & __b) | (~(__vector unsigned short)__c & __a)); } static inline __ATTRS_o_ai __vector signed int vec_sel(__vector signed int __a, __vector signed int __b, __vector unsigned int __c) { return (((__vector signed int)__c & __b) | (~(__vector signed int)__c & __a)); } static inline __ATTRS_o_ai __vector signed int vec_sel(__vector signed int __a, __vector signed int __b, __vector __bool int __c) { return (((__vector signed int)__c & __b) | (~(__vector signed int)__c & __a)); } static inline __ATTRS_o_ai __vector __bool int vec_sel(__vector __bool int __a, __vector __bool int __b, __vector unsigned int __c) { return (((__vector __bool int)__c & __b) | (~(__vector __bool int)__c & __a)); } static inline __ATTRS_o_ai __vector __bool int vec_sel(__vector __bool int __a, __vector __bool int __b, __vector __bool int __c) { return (__c & __b) | (~__c & __a); } static inline __ATTRS_o_ai __vector unsigned int vec_sel(__vector unsigned int __a, __vector unsigned int __b, __vector unsigned int __c) { return (__c & __b) | (~__c & __a); } static inline __ATTRS_o_ai __vector unsigned int vec_sel(__vector unsigned int __a, __vector unsigned int __b, __vector __bool int __c) { return (((__vector unsigned int)__c & __b) | (~(__vector unsigned int)__c & __a)); } static inline __ATTRS_o_ai __vector signed long long vec_sel(__vector signed long long __a, __vector signed long long __b, __vector unsigned long long __c) { return (((__vector signed long long)__c & __b) | (~(__vector signed long long)__c & __a)); } static inline __ATTRS_o_ai __vector signed long long vec_sel(__vector signed long long __a, __vector signed long long __b, __vector __bool long long __c) { return (((__vector signed long long)__c & __b) | (~(__vector signed long long)__c & __a)); } static inline __ATTRS_o_ai __vector __bool long long vec_sel(__vector __bool long long __a, __vector __bool long long __b, __vector unsigned long long __c) { return (((__vector __bool long long)__c & __b) | (~(__vector __bool long long)__c & __a)); } static inline __ATTRS_o_ai __vector __bool long long vec_sel(__vector __bool long long __a, __vector __bool long long __b, __vector __bool long long __c) { return (__c & __b) | (~__c & __a); } static inline __ATTRS_o_ai __vector unsigned long long vec_sel(__vector unsigned long long __a, __vector unsigned long long __b, __vector unsigned long long __c) { return (__c & __b) | (~__c & __a); } static inline __ATTRS_o_ai __vector unsigned long long vec_sel(__vector unsigned long long __a, __vector unsigned long long __b, __vector __bool long long __c) { return (((__vector unsigned long long)__c & __b) | (~(__vector unsigned long long)__c & __a)); } #if __ARCH__ >= 12 static inline __ATTRS_o_ai __vector float vec_sel(__vector float __a, __vector float __b, __vector unsigned int __c) { return (__vector float)((__c & (__vector unsigned int)__b) | (~__c & (__vector unsigned int)__a)); } static inline __ATTRS_o_ai __vector float vec_sel(__vector float __a, __vector float __b, __vector __bool int __c) { __vector unsigned int __ac = (__vector unsigned int)__a; __vector unsigned int __bc = (__vector unsigned int)__b; __vector unsigned int __cc = (__vector unsigned int)__c; return (__vector float)((__cc & __bc) | (~__cc & __ac)); } #endif static inline __ATTRS_o_ai __vector double vec_sel(__vector double __a, __vector double __b, __vector unsigned long long __c) { return (__vector double)((__c & (__vector unsigned long long)__b) | (~__c & (__vector unsigned long long)__a)); } static inline __ATTRS_o_ai __vector double vec_sel(__vector double __a, __vector double __b, __vector __bool long long __c) { __vector unsigned long long __ac = (__vector unsigned long long)__a; __vector unsigned long long __bc = (__vector unsigned long long)__b; __vector unsigned long long __cc = (__vector unsigned long long)__c; return (__vector double)((__cc & __bc) | (~__cc & __ac)); } /*-- vec_gather_element -----------------------------------------------------*/ static inline __ATTRS_o_ai __vector signed int vec_gather_element(__vector signed int __vec, __vector unsigned int __offset, const signed int *__ptr, int __index) __constant_range(__index, 0, 3) { __vec[__index] = *(const signed int *)( (const char *)__ptr + __offset[__index]); return __vec; } static inline __ATTRS_o_ai __vector __bool int vec_gather_element(__vector __bool int __vec, __vector unsigned int __offset, const unsigned int *__ptr, int __index) __constant_range(__index, 0, 3) { __vec[__index] = *(const unsigned int *)( (const char *)__ptr + __offset[__index]); return __vec; } static inline __ATTRS_o_ai __vector unsigned int vec_gather_element(__vector unsigned int __vec, __vector unsigned int __offset, const unsigned int *__ptr, int __index) __constant_range(__index, 0, 3) { __vec[__index] = *(const unsigned int *)( (const char *)__ptr + __offset[__index]); return __vec; } static inline __ATTRS_o_ai __vector signed long long vec_gather_element(__vector signed long long __vec, __vector unsigned long long __offset, const signed long long *__ptr, int __index) __constant_range(__index, 0, 1) { __vec[__index] = *(const signed long long *)( (const char *)__ptr + __offset[__index]); return __vec; } static inline __ATTRS_o_ai __vector __bool long long vec_gather_element(__vector __bool long long __vec, __vector unsigned long long __offset, const unsigned long long *__ptr, int __index) __constant_range(__index, 0, 1) { __vec[__index] = *(const unsigned long long *)( (const char *)__ptr + __offset[__index]); return __vec; } static inline __ATTRS_o_ai __vector unsigned long long vec_gather_element(__vector unsigned long long __vec, __vector unsigned long long __offset, const unsigned long long *__ptr, int __index) __constant_range(__index, 0, 1) { __vec[__index] = *(const unsigned long long *)( (const char *)__ptr + __offset[__index]); return __vec; } #if __ARCH__ >= 12 static inline __ATTRS_o_ai __vector float vec_gather_element(__vector float __vec, __vector unsigned int __offset, const float *__ptr, int __index) __constant_range(__index, 0, 3) { __vec[__index] = *(const float *)( (const char *)__ptr + __offset[__index]); return __vec; } #endif static inline __ATTRS_o_ai __vector double vec_gather_element(__vector double __vec, __vector unsigned long long __offset, const double *__ptr, int __index) __constant_range(__index, 0, 1) { __vec[__index] = *(const double *)( (const char *)__ptr + __offset[__index]); return __vec; } /*-- vec_scatter_element ----------------------------------------------------*/ static inline __ATTRS_o_ai void vec_scatter_element(__vector signed int __vec, __vector unsigned int __offset, signed int *__ptr, int __index) __constant_range(__index, 0, 3) { *(signed int *)((char *)__ptr + __offset[__index]) = __vec[__index]; } static inline __ATTRS_o_ai void vec_scatter_element(__vector __bool int __vec, __vector unsigned int __offset, unsigned int *__ptr, int __index) __constant_range(__index, 0, 3) { *(unsigned int *)((char *)__ptr + __offset[__index]) = __vec[__index]; } static inline __ATTRS_o_ai void vec_scatter_element(__vector unsigned int __vec, __vector unsigned int __offset, unsigned int *__ptr, int __index) __constant_range(__index, 0, 3) { *(unsigned int *)((char *)__ptr + __offset[__index]) = __vec[__index]; } static inline __ATTRS_o_ai void vec_scatter_element(__vector signed long long __vec, __vector unsigned long long __offset, signed long long *__ptr, int __index) __constant_range(__index, 0, 1) { *(signed long long *)((char *)__ptr + __offset[__index]) = __vec[__index]; } static inline __ATTRS_o_ai void vec_scatter_element(__vector __bool long long __vec, __vector unsigned long long __offset, unsigned long long *__ptr, int __index) __constant_range(__index, 0, 1) { *(unsigned long long *)((char *)__ptr + __offset[__index]) = __vec[__index]; } static inline __ATTRS_o_ai void vec_scatter_element(__vector unsigned long long __vec, __vector unsigned long long __offset, unsigned long long *__ptr, int __index) __constant_range(__index, 0, 1) { *(unsigned long long *)((char *)__ptr + __offset[__index]) = __vec[__index]; } #if __ARCH__ >= 12 static inline __ATTRS_o_ai void vec_scatter_element(__vector float __vec, __vector unsigned int __offset, float *__ptr, int __index) __constant_range(__index, 0, 3) { *(float *)((char *)__ptr + __offset[__index]) = __vec[__index]; } #endif static inline __ATTRS_o_ai void vec_scatter_element(__vector double __vec, __vector unsigned long long __offset, double *__ptr, int __index) __constant_range(__index, 0, 1) { *(double *)((char *)__ptr + __offset[__index]) = __vec[__index]; } /*-- vec_xl -----------------------------------------------------------------*/ static inline __ATTRS_o_ai __vector signed char vec_xl(long __offset, const signed char *__ptr) { __vector signed char V; __builtin_memcpy(&V, ((const char *)__ptr + __offset), sizeof(__vector signed char)); return V; } static inline __ATTRS_o_ai __vector unsigned char vec_xl(long __offset, const unsigned char *__ptr) { __vector unsigned char V; __builtin_memcpy(&V, ((const char *)__ptr + __offset), sizeof(__vector unsigned char)); return V; } static inline __ATTRS_o_ai __vector signed short vec_xl(long __offset, const signed short *__ptr) { __vector signed short V; __builtin_memcpy(&V, ((const char *)__ptr + __offset), sizeof(__vector signed short)); return V; } static inline __ATTRS_o_ai __vector unsigned short vec_xl(long __offset, const unsigned short *__ptr) { __vector unsigned short V; __builtin_memcpy(&V, ((const char *)__ptr + __offset), sizeof(__vector unsigned short)); return V; } static inline __ATTRS_o_ai __vector signed int vec_xl(long __offset, const signed int *__ptr) { __vector signed int V; __builtin_memcpy(&V, ((const char *)__ptr + __offset), sizeof(__vector signed int)); return V; } static inline __ATTRS_o_ai __vector unsigned int vec_xl(long __offset, const unsigned int *__ptr) { __vector unsigned int V; __builtin_memcpy(&V, ((const char *)__ptr + __offset), sizeof(__vector unsigned int)); return V; } static inline __ATTRS_o_ai __vector signed long long vec_xl(long __offset, const signed long long *__ptr) { __vector signed long long V; __builtin_memcpy(&V, ((const char *)__ptr + __offset), sizeof(__vector signed long long)); return V; } static inline __ATTRS_o_ai __vector unsigned long long vec_xl(long __offset, const unsigned long long *__ptr) { __vector unsigned long long V; __builtin_memcpy(&V, ((const char *)__ptr + __offset), sizeof(__vector unsigned long long)); return V; } #if __ARCH__ >= 12 static inline __ATTRS_o_ai __vector float vec_xl(long __offset, const float *__ptr) { __vector float V; __builtin_memcpy(&V, ((const char *)__ptr + __offset), sizeof(__vector float)); return V; } #endif static inline __ATTRS_o_ai __vector double vec_xl(long __offset, const double *__ptr) { __vector double V; __builtin_memcpy(&V, ((const char *)__ptr + __offset), sizeof(__vector double)); return V; } /*-- vec_xld2 ---------------------------------------------------------------*/ // This prototype is deprecated. static inline __ATTRS_o_ai __vector signed char vec_xld2(long __offset, const signed char *__ptr) { __vector signed char V; __builtin_memcpy(&V, ((const char *)__ptr + __offset), sizeof(__vector signed char)); return V; } // This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned char vec_xld2(long __offset, const unsigned char *__ptr) { __vector unsigned char V; __builtin_memcpy(&V, ((const char *)__ptr + __offset), sizeof(__vector unsigned char)); return V; } // This prototype is deprecated. static inline __ATTRS_o_ai __vector signed short vec_xld2(long __offset, const signed short *__ptr) { __vector signed short V; __builtin_memcpy(&V, ((const char *)__ptr + __offset), sizeof(__vector signed short)); return V; } // This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned short vec_xld2(long __offset, const unsigned short *__ptr) { __vector unsigned short V; __builtin_memcpy(&V, ((const char *)__ptr + __offset), sizeof(__vector unsigned short)); return V; } // This prototype is deprecated. static inline __ATTRS_o_ai __vector signed int vec_xld2(long __offset, const signed int *__ptr) { __vector signed int V; __builtin_memcpy(&V, ((const char *)__ptr + __offset), sizeof(__vector signed int)); return V; } // This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned int vec_xld2(long __offset, const unsigned int *__ptr) { __vector unsigned int V; __builtin_memcpy(&V, ((const char *)__ptr + __offset), sizeof(__vector unsigned int)); return V; } // This prototype is deprecated. static inline __ATTRS_o_ai __vector signed long long vec_xld2(long __offset, const signed long long *__ptr) { __vector signed long long V; __builtin_memcpy(&V, ((const char *)__ptr + __offset), sizeof(__vector signed long long)); return V; } // This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned long long vec_xld2(long __offset, const unsigned long long *__ptr) { __vector unsigned long long V; __builtin_memcpy(&V, ((const char *)__ptr + __offset), sizeof(__vector unsigned long long)); return V; } // This prototype is deprecated. static inline __ATTRS_o_ai __vector double vec_xld2(long __offset, const double *__ptr) { __vector double V; __builtin_memcpy(&V, ((const char *)__ptr + __offset), sizeof(__vector double)); return V; } /*-- vec_xlw4 ---------------------------------------------------------------*/ // This prototype is deprecated. static inline __ATTRS_o_ai __vector signed char vec_xlw4(long __offset, const signed char *__ptr) { __vector signed char V; __builtin_memcpy(&V, ((const char *)__ptr + __offset), sizeof(__vector signed char)); return V; } // This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned char vec_xlw4(long __offset, const unsigned char *__ptr) { __vector unsigned char V; __builtin_memcpy(&V, ((const char *)__ptr + __offset), sizeof(__vector unsigned char)); return V; } // This prototype is deprecated. static inline __ATTRS_o_ai __vector signed short vec_xlw4(long __offset, const signed short *__ptr) { __vector signed short V; __builtin_memcpy(&V, ((const char *)__ptr + __offset), sizeof(__vector signed short)); return V; } // This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned short vec_xlw4(long __offset, const unsigned short *__ptr) { __vector unsigned short V; __builtin_memcpy(&V, ((const char *)__ptr + __offset), sizeof(__vector unsigned short)); return V; } // This prototype is deprecated. static inline __ATTRS_o_ai __vector signed int vec_xlw4(long __offset, const signed int *__ptr) { __vector signed int V; __builtin_memcpy(&V, ((const char *)__ptr + __offset), sizeof(__vector signed int)); return V; } // This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned int vec_xlw4(long __offset, const unsigned int *__ptr) { __vector unsigned int V; __builtin_memcpy(&V, ((const char *)__ptr + __offset), sizeof(__vector unsigned int)); return V; } /*-- vec_xst ----------------------------------------------------------------*/ static inline __ATTRS_o_ai void vec_xst(__vector signed char __vec, long __offset, signed char *__ptr) { __vector signed char V = __vec; __builtin_memcpy(((char *)__ptr + __offset), &V, sizeof(__vector signed char)); } static inline __ATTRS_o_ai void vec_xst(__vector unsigned char __vec, long __offset, unsigned char *__ptr) { __vector unsigned char V = __vec; __builtin_memcpy(((char *)__ptr + __offset), &V, sizeof(__vector unsigned char)); } static inline __ATTRS_o_ai void vec_xst(__vector signed short __vec, long __offset, signed short *__ptr) { __vector signed short V = __vec; __builtin_memcpy(((char *)__ptr + __offset), &V, sizeof(__vector signed short)); } static inline __ATTRS_o_ai void vec_xst(__vector unsigned short __vec, long __offset, unsigned short *__ptr) { __vector unsigned short V = __vec; __builtin_memcpy(((char *)__ptr + __offset), &V, sizeof(__vector unsigned short)); } static inline __ATTRS_o_ai void vec_xst(__vector signed int __vec, long __offset, signed int *__ptr) { __vector signed int V = __vec; __builtin_memcpy(((char *)__ptr + __offset), &V, sizeof(__vector signed int)); } static inline __ATTRS_o_ai void vec_xst(__vector unsigned int __vec, long __offset, unsigned int *__ptr) { __vector unsigned int V = __vec; __builtin_memcpy(((char *)__ptr + __offset), &V, sizeof(__vector unsigned int)); } static inline __ATTRS_o_ai void vec_xst(__vector signed long long __vec, long __offset, signed long long *__ptr) { __vector signed long long V = __vec; __builtin_memcpy(((char *)__ptr + __offset), &V, sizeof(__vector signed long long)); } static inline __ATTRS_o_ai void vec_xst(__vector unsigned long long __vec, long __offset, unsigned long long *__ptr) { __vector unsigned long long V = __vec; __builtin_memcpy(((char *)__ptr + __offset), &V, sizeof(__vector unsigned long long)); } #if __ARCH__ >= 12 static inline __ATTRS_o_ai void vec_xst(__vector float __vec, long __offset, float *__ptr) { __vector float V = __vec; __builtin_memcpy(((char *)__ptr + __offset), &V, sizeof(__vector float)); } #endif static inline __ATTRS_o_ai void vec_xst(__vector double __vec, long __offset, double *__ptr) { __vector double V = __vec; __builtin_memcpy(((char *)__ptr + __offset), &V, sizeof(__vector double)); } /*-- vec_xstd2 --------------------------------------------------------------*/ // This prototype is deprecated. static inline __ATTRS_o_ai void vec_xstd2(__vector signed char __vec, long __offset, signed char *__ptr) { __vector signed char V = __vec; __builtin_memcpy(((char *)__ptr + __offset), &V, sizeof(__vector signed char)); } // This prototype is deprecated. static inline __ATTRS_o_ai void vec_xstd2(__vector unsigned char __vec, long __offset, unsigned char *__ptr) { __vector unsigned char V = __vec; __builtin_memcpy(((char *)__ptr + __offset), &V, sizeof(__vector unsigned char)); } // This prototype is deprecated. static inline __ATTRS_o_ai void vec_xstd2(__vector signed short __vec, long __offset, signed short *__ptr) { __vector signed short V = __vec; __builtin_memcpy(((char *)__ptr + __offset), &V, sizeof(__vector signed short)); } // This prototype is deprecated. static inline __ATTRS_o_ai void vec_xstd2(__vector unsigned short __vec, long __offset, unsigned short *__ptr) { __vector unsigned short V = __vec; __builtin_memcpy(((char *)__ptr + __offset), &V, sizeof(__vector unsigned short)); } // This prototype is deprecated. static inline __ATTRS_o_ai void vec_xstd2(__vector signed int __vec, long __offset, signed int *__ptr) { __vector signed int V = __vec; __builtin_memcpy(((char *)__ptr + __offset), &V, sizeof(__vector signed int)); } // This prototype is deprecated. static inline __ATTRS_o_ai void vec_xstd2(__vector unsigned int __vec, long __offset, unsigned int *__ptr) { __vector unsigned int V = __vec; __builtin_memcpy(((char *)__ptr + __offset), &V, sizeof(__vector unsigned int)); } // This prototype is deprecated. static inline __ATTRS_o_ai void vec_xstd2(__vector signed long long __vec, long __offset, signed long long *__ptr) { __vector signed long long V = __vec; __builtin_memcpy(((char *)__ptr + __offset), &V, sizeof(__vector signed long long)); } // This prototype is deprecated. static inline __ATTRS_o_ai void vec_xstd2(__vector unsigned long long __vec, long __offset, unsigned long long *__ptr) { __vector unsigned long long V = __vec; __builtin_memcpy(((char *)__ptr + __offset), &V, sizeof(__vector unsigned long long)); } // This prototype is deprecated. static inline __ATTRS_o_ai void vec_xstd2(__vector double __vec, long __offset, double *__ptr) { __vector double V = __vec; __builtin_memcpy(((char *)__ptr + __offset), &V, sizeof(__vector double)); } /*-- vec_xstw4 --------------------------------------------------------------*/ // This prototype is deprecated. static inline __ATTRS_o_ai void vec_xstw4(__vector signed char __vec, long __offset, signed char *__ptr) { __vector signed char V = __vec; __builtin_memcpy(((char *)__ptr + __offset), &V, sizeof(__vector signed char)); } // This prototype is deprecated. static inline __ATTRS_o_ai void vec_xstw4(__vector unsigned char __vec, long __offset, unsigned char *__ptr) { __vector unsigned char V = __vec; __builtin_memcpy(((char *)__ptr + __offset), &V, sizeof(__vector unsigned char)); } // This prototype is deprecated. static inline __ATTRS_o_ai void vec_xstw4(__vector signed short __vec, long __offset, signed short *__ptr) { __vector signed short V = __vec; __builtin_memcpy(((char *)__ptr + __offset), &V, sizeof(__vector signed short)); } // This prototype is deprecated. static inline __ATTRS_o_ai void vec_xstw4(__vector unsigned short __vec, long __offset, unsigned short *__ptr) { __vector unsigned short V = __vec; __builtin_memcpy(((char *)__ptr + __offset), &V, sizeof(__vector unsigned short)); } // This prototype is deprecated. static inline __ATTRS_o_ai void vec_xstw4(__vector signed int __vec, long __offset, signed int *__ptr) { __vector signed int V = __vec; __builtin_memcpy(((char *)__ptr + __offset), &V, sizeof(__vector signed int)); } // This prototype is deprecated. static inline __ATTRS_o_ai void vec_xstw4(__vector unsigned int __vec, long __offset, unsigned int *__ptr) { __vector unsigned int V = __vec; __builtin_memcpy(((char *)__ptr + __offset), &V, sizeof(__vector unsigned int)); } /*-- vec_load_bndry ---------------------------------------------------------*/ extern __ATTRS_o __vector signed char vec_load_bndry(const signed char *__ptr, unsigned short __len) __constant_pow2_range(__len, 64, 4096); extern __ATTRS_o __vector unsigned char vec_load_bndry(const unsigned char *__ptr, unsigned short __len) __constant_pow2_range(__len, 64, 4096); extern __ATTRS_o __vector signed short vec_load_bndry(const signed short *__ptr, unsigned short __len) __constant_pow2_range(__len, 64, 4096); extern __ATTRS_o __vector unsigned short vec_load_bndry(const unsigned short *__ptr, unsigned short __len) __constant_pow2_range(__len, 64, 4096); extern __ATTRS_o __vector signed int vec_load_bndry(const signed int *__ptr, unsigned short __len) __constant_pow2_range(__len, 64, 4096); extern __ATTRS_o __vector unsigned int vec_load_bndry(const unsigned int *__ptr, unsigned short __len) __constant_pow2_range(__len, 64, 4096); extern __ATTRS_o __vector signed long long vec_load_bndry(const signed long long *__ptr, unsigned short __len) __constant_pow2_range(__len, 64, 4096); extern __ATTRS_o __vector unsigned long long vec_load_bndry(const unsigned long long *__ptr, unsigned short __len) __constant_pow2_range(__len, 64, 4096); #if __ARCH__ >= 12 extern __ATTRS_o __vector float vec_load_bndry(const float *__ptr, unsigned short __len) __constant_pow2_range(__len, 64, 4096); #endif extern __ATTRS_o __vector double vec_load_bndry(const double *__ptr, unsigned short __len) __constant_pow2_range(__len, 64, 4096); #define vec_load_bndry(X, Y) ((__typeof__((vec_load_bndry)((X), (Y)))) \ __builtin_s390_vlbb((X), ((Y) == 64 ? 0 : \ (Y) == 128 ? 1 : \ (Y) == 256 ? 2 : \ (Y) == 512 ? 3 : \ (Y) == 1024 ? 4 : \ (Y) == 2048 ? 5 : \ (Y) == 4096 ? 6 : -1))) /*-- vec_load_len -----------------------------------------------------------*/ static inline __ATTRS_o_ai __vector signed char vec_load_len(const signed char *__ptr, unsigned int __len) { return (__vector signed char)__builtin_s390_vll(__len, __ptr); } static inline __ATTRS_o_ai __vector unsigned char vec_load_len(const unsigned char *__ptr, unsigned int __len) { return (__vector unsigned char)__builtin_s390_vll(__len, __ptr); } static inline __ATTRS_o_ai __vector signed short vec_load_len(const signed short *__ptr, unsigned int __len) { return (__vector signed short)__builtin_s390_vll(__len, __ptr); } static inline __ATTRS_o_ai __vector unsigned short vec_load_len(const unsigned short *__ptr, unsigned int __len) { return (__vector unsigned short)__builtin_s390_vll(__len, __ptr); } static inline __ATTRS_o_ai __vector signed int vec_load_len(const signed int *__ptr, unsigned int __len) { return (__vector signed int)__builtin_s390_vll(__len, __ptr); } static inline __ATTRS_o_ai __vector unsigned int vec_load_len(const unsigned int *__ptr, unsigned int __len) { return (__vector unsigned int)__builtin_s390_vll(__len, __ptr); } static inline __ATTRS_o_ai __vector signed long long vec_load_len(const signed long long *__ptr, unsigned int __len) { return (__vector signed long long)__builtin_s390_vll(__len, __ptr); } static inline __ATTRS_o_ai __vector unsigned long long vec_load_len(const unsigned long long *__ptr, unsigned int __len) { return (__vector unsigned long long)__builtin_s390_vll(__len, __ptr); } #if __ARCH__ >= 12 static inline __ATTRS_o_ai __vector float vec_load_len(const float *__ptr, unsigned int __len) { return (__vector float)__builtin_s390_vll(__len, __ptr); } #endif static inline __ATTRS_o_ai __vector double vec_load_len(const double *__ptr, unsigned int __len) { return (__vector double)__builtin_s390_vll(__len, __ptr); } /*-- vec_load_len_r ---------------------------------------------------------*/ #if __ARCH__ >= 12 static inline __ATTRS_ai __vector unsigned char vec_load_len_r(const unsigned char *__ptr, unsigned int __len) { return (__vector unsigned char)__builtin_s390_vlrlr(__len, __ptr); } #endif /*-- vec_store_len ----------------------------------------------------------*/ static inline __ATTRS_o_ai void vec_store_len(__vector signed char __vec, signed char *__ptr, unsigned int __len) { __builtin_s390_vstl((__vector signed char)__vec, __len, __ptr); } static inline __ATTRS_o_ai void vec_store_len(__vector unsigned char __vec, unsigned char *__ptr, unsigned int __len) { __builtin_s390_vstl((__vector signed char)__vec, __len, __ptr); } static inline __ATTRS_o_ai void vec_store_len(__vector signed short __vec, signed short *__ptr, unsigned int __len) { __builtin_s390_vstl((__vector signed char)__vec, __len, __ptr); } static inline __ATTRS_o_ai void vec_store_len(__vector unsigned short __vec, unsigned short *__ptr, unsigned int __len) { __builtin_s390_vstl((__vector signed char)__vec, __len, __ptr); } static inline __ATTRS_o_ai void vec_store_len(__vector signed int __vec, signed int *__ptr, unsigned int __len) { __builtin_s390_vstl((__vector signed char)__vec, __len, __ptr); } static inline __ATTRS_o_ai void vec_store_len(__vector unsigned int __vec, unsigned int *__ptr, unsigned int __len) { __builtin_s390_vstl((__vector signed char)__vec, __len, __ptr); } static inline __ATTRS_o_ai void vec_store_len(__vector signed long long __vec, signed long long *__ptr, unsigned int __len) { __builtin_s390_vstl((__vector signed char)__vec, __len, __ptr); } static inline __ATTRS_o_ai void vec_store_len(__vector unsigned long long __vec, unsigned long long *__ptr, unsigned int __len) { __builtin_s390_vstl((__vector signed char)__vec, __len, __ptr); } #if __ARCH__ >= 12 static inline __ATTRS_o_ai void vec_store_len(__vector float __vec, float *__ptr, unsigned int __len) { __builtin_s390_vstl((__vector signed char)__vec, __len, __ptr); } #endif static inline __ATTRS_o_ai void vec_store_len(__vector double __vec, double *__ptr, unsigned int __len) { __builtin_s390_vstl((__vector signed char)__vec, __len, __ptr); } /*-- vec_store_len_r --------------------------------------------------------*/ #if __ARCH__ >= 12 static inline __ATTRS_ai void vec_store_len_r(__vector unsigned char __vec, unsigned char *__ptr, unsigned int __len) { __builtin_s390_vstrlr((__vector signed char)__vec, __len, __ptr); } #endif /*-- vec_load_pair ----------------------------------------------------------*/ static inline __ATTRS_o_ai __vector signed long long vec_load_pair(signed long long __a, signed long long __b) { return (__vector signed long long)(__a, __b); } static inline __ATTRS_o_ai __vector unsigned long long vec_load_pair(unsigned long long __a, unsigned long long __b) { return (__vector unsigned long long)(__a, __b); } /*-- vec_genmask ------------------------------------------------------------*/ static inline __ATTRS_o_ai __vector unsigned char vec_genmask(unsigned short __mask) __constant(__mask) { return (__vector unsigned char)( __mask & 0x8000 ? 0xff : 0, __mask & 0x4000 ? 0xff : 0, __mask & 0x2000 ? 0xff : 0, __mask & 0x1000 ? 0xff : 0, __mask & 0x0800 ? 0xff : 0, __mask & 0x0400 ? 0xff : 0, __mask & 0x0200 ? 0xff : 0, __mask & 0x0100 ? 0xff : 0, __mask & 0x0080 ? 0xff : 0, __mask & 0x0040 ? 0xff : 0, __mask & 0x0020 ? 0xff : 0, __mask & 0x0010 ? 0xff : 0, __mask & 0x0008 ? 0xff : 0, __mask & 0x0004 ? 0xff : 0, __mask & 0x0002 ? 0xff : 0, __mask & 0x0001 ? 0xff : 0); } /*-- vec_genmasks_* ---------------------------------------------------------*/ static inline __ATTRS_o_ai __vector unsigned char vec_genmasks_8(unsigned char __first, unsigned char __last) __constant(__first) __constant(__last) { unsigned char __bit1 = __first & 7; unsigned char __bit2 = __last & 7; unsigned char __mask1 = (unsigned char)(1U << (7 - __bit1) << 1) - 1; unsigned char __mask2 = (unsigned char)(1U << (7 - __bit2)) - 1; unsigned char __value = (__bit1 <= __bit2 ? __mask1 & ~__mask2 : __mask1 | ~__mask2); return (__vector unsigned char)__value; } static inline __ATTRS_o_ai __vector unsigned short vec_genmasks_16(unsigned char __first, unsigned char __last) __constant(__first) __constant(__last) { unsigned char __bit1 = __first & 15; unsigned char __bit2 = __last & 15; unsigned short __mask1 = (unsigned short)(1U << (15 - __bit1) << 1) - 1; unsigned short __mask2 = (unsigned short)(1U << (15 - __bit2)) - 1; unsigned short __value = (__bit1 <= __bit2 ? __mask1 & ~__mask2 : __mask1 | ~__mask2); return (__vector unsigned short)__value; } static inline __ATTRS_o_ai __vector unsigned int vec_genmasks_32(unsigned char __first, unsigned char __last) __constant(__first) __constant(__last) { unsigned char __bit1 = __first & 31; unsigned char __bit2 = __last & 31; unsigned int __mask1 = (1U << (31 - __bit1) << 1) - 1; unsigned int __mask2 = (1U << (31 - __bit2)) - 1; unsigned int __value = (__bit1 <= __bit2 ? __mask1 & ~__mask2 : __mask1 | ~__mask2); return (__vector unsigned int)__value; } static inline __ATTRS_o_ai __vector unsigned long long vec_genmasks_64(unsigned char __first, unsigned char __last) __constant(__first) __constant(__last) { unsigned char __bit1 = __first & 63; unsigned char __bit2 = __last & 63; unsigned long long __mask1 = (1ULL << (63 - __bit1) << 1) - 1; unsigned long long __mask2 = (1ULL << (63 - __bit2)) - 1; unsigned long long __value = (__bit1 <= __bit2 ? __mask1 & ~__mask2 : __mask1 | ~__mask2); return (__vector unsigned long long)__value; } /*-- vec_splat --------------------------------------------------------------*/ static inline __ATTRS_o_ai __vector signed char vec_splat(__vector signed char __vec, int __index) __constant_range(__index, 0, 15) { return (__vector signed char)__vec[__index]; } static inline __ATTRS_o_ai __vector __bool char vec_splat(__vector __bool char __vec, int __index) __constant_range(__index, 0, 15) { return (__vector __bool char)(__vector unsigned char)__vec[__index]; } static inline __ATTRS_o_ai __vector unsigned char vec_splat(__vector unsigned char __vec, int __index) __constant_range(__index, 0, 15) { return (__vector unsigned char)__vec[__index]; } static inline __ATTRS_o_ai __vector signed short vec_splat(__vector signed short __vec, int __index) __constant_range(__index, 0, 7) { return (__vector signed short)__vec[__index]; } static inline __ATTRS_o_ai __vector __bool short vec_splat(__vector __bool short __vec, int __index) __constant_range(__index, 0, 7) { return (__vector __bool short)(__vector unsigned short)__vec[__index]; } static inline __ATTRS_o_ai __vector unsigned short vec_splat(__vector unsigned short __vec, int __index) __constant_range(__index, 0, 7) { return (__vector unsigned short)__vec[__index]; } static inline __ATTRS_o_ai __vector signed int vec_splat(__vector signed int __vec, int __index) __constant_range(__index, 0, 3) { return (__vector signed int)__vec[__index]; } static inline __ATTRS_o_ai __vector __bool int vec_splat(__vector __bool int __vec, int __index) __constant_range(__index, 0, 3) { return (__vector __bool int)(__vector unsigned int)__vec[__index]; } static inline __ATTRS_o_ai __vector unsigned int vec_splat(__vector unsigned int __vec, int __index) __constant_range(__index, 0, 3) { return (__vector unsigned int)__vec[__index]; } static inline __ATTRS_o_ai __vector signed long long vec_splat(__vector signed long long __vec, int __index) __constant_range(__index, 0, 1) { return (__vector signed long long)__vec[__index]; } static inline __ATTRS_o_ai __vector __bool long long vec_splat(__vector __bool long long __vec, int __index) __constant_range(__index, 0, 1) { return ((__vector __bool long long) (__vector unsigned long long)__vec[__index]); } static inline __ATTRS_o_ai __vector unsigned long long vec_splat(__vector unsigned long long __vec, int __index) __constant_range(__index, 0, 1) { return (__vector unsigned long long)__vec[__index]; } #if __ARCH__ >= 12 static inline __ATTRS_o_ai __vector float vec_splat(__vector float __vec, int __index) __constant_range(__index, 0, 3) { return (__vector float)__vec[__index]; } #endif static inline __ATTRS_o_ai __vector double vec_splat(__vector double __vec, int __index) __constant_range(__index, 0, 1) { return (__vector double)__vec[__index]; } /*-- vec_splat_s* -----------------------------------------------------------*/ static inline __ATTRS_ai __vector signed char vec_splat_s8(signed char __scalar) __constant(__scalar) { return (__vector signed char)__scalar; } static inline __ATTRS_ai __vector signed short vec_splat_s16(signed short __scalar) __constant(__scalar) { return (__vector signed short)__scalar; } static inline __ATTRS_ai __vector signed int vec_splat_s32(signed short __scalar) __constant(__scalar) { return (__vector signed int)(signed int)__scalar; } static inline __ATTRS_ai __vector signed long long vec_splat_s64(signed short __scalar) __constant(__scalar) { return (__vector signed long long)(signed long)__scalar; } /*-- vec_splat_u* -----------------------------------------------------------*/ static inline __ATTRS_ai __vector unsigned char vec_splat_u8(unsigned char __scalar) __constant(__scalar) { return (__vector unsigned char)__scalar; } static inline __ATTRS_ai __vector unsigned short vec_splat_u16(unsigned short __scalar) __constant(__scalar) { return (__vector unsigned short)__scalar; } static inline __ATTRS_ai __vector unsigned int vec_splat_u32(signed short __scalar) __constant(__scalar) { return (__vector unsigned int)(signed int)__scalar; } static inline __ATTRS_ai __vector unsigned long long vec_splat_u64(signed short __scalar) __constant(__scalar) { return (__vector unsigned long long)(signed long long)__scalar; } /*-- vec_splats -------------------------------------------------------------*/ static inline __ATTRS_o_ai __vector signed char vec_splats(signed char __scalar) { return (__vector signed char)__scalar; } static inline __ATTRS_o_ai __vector unsigned char vec_splats(unsigned char __scalar) { return (__vector unsigned char)__scalar; } static inline __ATTRS_o_ai __vector signed short vec_splats(signed short __scalar) { return (__vector signed short)__scalar; } static inline __ATTRS_o_ai __vector unsigned short vec_splats(unsigned short __scalar) { return (__vector unsigned short)__scalar; } static inline __ATTRS_o_ai __vector signed int vec_splats(signed int __scalar) { return (__vector signed int)__scalar; } static inline __ATTRS_o_ai __vector unsigned int vec_splats(unsigned int __scalar) { return (__vector unsigned int)__scalar; } static inline __ATTRS_o_ai __vector signed long long vec_splats(signed long long __scalar) { return (__vector signed long long)__scalar; } static inline __ATTRS_o_ai __vector unsigned long long vec_splats(unsigned long long __scalar) { return (__vector unsigned long long)__scalar; } #if __ARCH__ >= 12 static inline __ATTRS_o_ai __vector float vec_splats(float __scalar) { return (__vector float)__scalar; } #endif static inline __ATTRS_o_ai __vector double vec_splats(double __scalar) { return (__vector double)__scalar; } /*-- vec_extend_s64 ---------------------------------------------------------*/ static inline __ATTRS_o_ai __vector signed long long vec_extend_s64(__vector signed char __a) { return (__vector signed long long)(__a[7], __a[15]); } static inline __ATTRS_o_ai __vector signed long long vec_extend_s64(__vector signed short __a) { return (__vector signed long long)(__a[3], __a[7]); } static inline __ATTRS_o_ai __vector signed long long vec_extend_s64(__vector signed int __a) { return (__vector signed long long)(__a[1], __a[3]); } /*-- vec_mergeh -------------------------------------------------------------*/ static inline __ATTRS_o_ai __vector signed char vec_mergeh(__vector signed char __a, __vector signed char __b) { return (__vector signed char)( __a[0], __b[0], __a[1], __b[1], __a[2], __b[2], __a[3], __b[3], __a[4], __b[4], __a[5], __b[5], __a[6], __b[6], __a[7], __b[7]); } static inline __ATTRS_o_ai __vector __bool char vec_mergeh(__vector __bool char __a, __vector __bool char __b) { return (__vector __bool char)( __a[0], __b[0], __a[1], __b[1], __a[2], __b[2], __a[3], __b[3], __a[4], __b[4], __a[5], __b[5], __a[6], __b[6], __a[7], __b[7]); } static inline __ATTRS_o_ai __vector unsigned char vec_mergeh(__vector unsigned char __a, __vector unsigned char __b) { return (__vector unsigned char)( __a[0], __b[0], __a[1], __b[1], __a[2], __b[2], __a[3], __b[3], __a[4], __b[4], __a[5], __b[5], __a[6], __b[6], __a[7], __b[7]); } static inline __ATTRS_o_ai __vector signed short vec_mergeh(__vector signed short __a, __vector signed short __b) { return (__vector signed short)( __a[0], __b[0], __a[1], __b[1], __a[2], __b[2], __a[3], __b[3]); } static inline __ATTRS_o_ai __vector __bool short vec_mergeh(__vector __bool short __a, __vector __bool short __b) { return (__vector __bool short)( __a[0], __b[0], __a[1], __b[1], __a[2], __b[2], __a[3], __b[3]); } static inline __ATTRS_o_ai __vector unsigned short vec_mergeh(__vector unsigned short __a, __vector unsigned short __b) { return (__vector unsigned short)( __a[0], __b[0], __a[1], __b[1], __a[2], __b[2], __a[3], __b[3]); } static inline __ATTRS_o_ai __vector signed int vec_mergeh(__vector signed int __a, __vector signed int __b) { return (__vector signed int)(__a[0], __b[0], __a[1], __b[1]); } static inline __ATTRS_o_ai __vector __bool int vec_mergeh(__vector __bool int __a, __vector __bool int __b) { return (__vector __bool int)(__a[0], __b[0], __a[1], __b[1]); } static inline __ATTRS_o_ai __vector unsigned int vec_mergeh(__vector unsigned int __a, __vector unsigned int __b) { return (__vector unsigned int)(__a[0], __b[0], __a[1], __b[1]); } static inline __ATTRS_o_ai __vector signed long long vec_mergeh(__vector signed long long __a, __vector signed long long __b) { return (__vector signed long long)(__a[0], __b[0]); } static inline __ATTRS_o_ai __vector __bool long long vec_mergeh(__vector __bool long long __a, __vector __bool long long __b) { return (__vector __bool long long)(__a[0], __b[0]); } static inline __ATTRS_o_ai __vector unsigned long long vec_mergeh(__vector unsigned long long __a, __vector unsigned long long __b) { return (__vector unsigned long long)(__a[0], __b[0]); } #if __ARCH__ >= 12 static inline __ATTRS_o_ai __vector float vec_mergeh(__vector float __a, __vector float __b) { return (__vector float)(__a[0], __b[0], __a[1], __b[1]); } #endif static inline __ATTRS_o_ai __vector double vec_mergeh(__vector double __a, __vector double __b) { return (__vector double)(__a[0], __b[0]); } /*-- vec_mergel -------------------------------------------------------------*/ static inline __ATTRS_o_ai __vector signed char vec_mergel(__vector signed char __a, __vector signed char __b) { return (__vector signed char)( __a[8], __b[8], __a[9], __b[9], __a[10], __b[10], __a[11], __b[11], __a[12], __b[12], __a[13], __b[13], __a[14], __b[14], __a[15], __b[15]); } static inline __ATTRS_o_ai __vector __bool char vec_mergel(__vector __bool char __a, __vector __bool char __b) { return (__vector __bool char)( __a[8], __b[8], __a[9], __b[9], __a[10], __b[10], __a[11], __b[11], __a[12], __b[12], __a[13], __b[13], __a[14], __b[14], __a[15], __b[15]); } static inline __ATTRS_o_ai __vector unsigned char vec_mergel(__vector unsigned char __a, __vector unsigned char __b) { return (__vector unsigned char)( __a[8], __b[8], __a[9], __b[9], __a[10], __b[10], __a[11], __b[11], __a[12], __b[12], __a[13], __b[13], __a[14], __b[14], __a[15], __b[15]); } static inline __ATTRS_o_ai __vector signed short vec_mergel(__vector signed short __a, __vector signed short __b) { return (__vector signed short)( __a[4], __b[4], __a[5], __b[5], __a[6], __b[6], __a[7], __b[7]); } static inline __ATTRS_o_ai __vector __bool short vec_mergel(__vector __bool short __a, __vector __bool short __b) { return (__vector __bool short)( __a[4], __b[4], __a[5], __b[5], __a[6], __b[6], __a[7], __b[7]); } static inline __ATTRS_o_ai __vector unsigned short vec_mergel(__vector unsigned short __a, __vector unsigned short __b) { return (__vector unsigned short)( __a[4], __b[4], __a[5], __b[5], __a[6], __b[6], __a[7], __b[7]); } static inline __ATTRS_o_ai __vector signed int vec_mergel(__vector signed int __a, __vector signed int __b) { return (__vector signed int)(__a[2], __b[2], __a[3], __b[3]); } static inline __ATTRS_o_ai __vector __bool int vec_mergel(__vector __bool int __a, __vector __bool int __b) { return (__vector __bool int)(__a[2], __b[2], __a[3], __b[3]); } static inline __ATTRS_o_ai __vector unsigned int vec_mergel(__vector unsigned int __a, __vector unsigned int __b) { return (__vector unsigned int)(__a[2], __b[2], __a[3], __b[3]); } static inline __ATTRS_o_ai __vector signed long long vec_mergel(__vector signed long long __a, __vector signed long long __b) { return (__vector signed long long)(__a[1], __b[1]); } static inline __ATTRS_o_ai __vector __bool long long vec_mergel(__vector __bool long long __a, __vector __bool long long __b) { return (__vector __bool long long)(__a[1], __b[1]); } static inline __ATTRS_o_ai __vector unsigned long long vec_mergel(__vector unsigned long long __a, __vector unsigned long long __b) { return (__vector unsigned long long)(__a[1], __b[1]); } #if __ARCH__ >= 12 static inline __ATTRS_o_ai __vector float vec_mergel(__vector float __a, __vector float __b) { return (__vector float)(__a[2], __b[2], __a[3], __b[3]); } #endif static inline __ATTRS_o_ai __vector double vec_mergel(__vector double __a, __vector double __b) { return (__vector double)(__a[1], __b[1]); } /*-- vec_pack ---------------------------------------------------------------*/ static inline __ATTRS_o_ai __vector signed char vec_pack(__vector signed short __a, __vector signed short __b) { __vector signed char __ac = (__vector signed char)__a; __vector signed char __bc = (__vector signed char)__b; return (__vector signed char)( __ac[1], __ac[3], __ac[5], __ac[7], __ac[9], __ac[11], __ac[13], __ac[15], __bc[1], __bc[3], __bc[5], __bc[7], __bc[9], __bc[11], __bc[13], __bc[15]); } static inline __ATTRS_o_ai __vector __bool char vec_pack(__vector __bool short __a, __vector __bool short __b) { __vector __bool char __ac = (__vector __bool char)__a; __vector __bool char __bc = (__vector __bool char)__b; return (__vector __bool char)( __ac[1], __ac[3], __ac[5], __ac[7], __ac[9], __ac[11], __ac[13], __ac[15], __bc[1], __bc[3], __bc[5], __bc[7], __bc[9], __bc[11], __bc[13], __bc[15]); } static inline __ATTRS_o_ai __vector unsigned char vec_pack(__vector unsigned short __a, __vector unsigned short __b) { __vector unsigned char __ac = (__vector unsigned char)__a; __vector unsigned char __bc = (__vector unsigned char)__b; return (__vector unsigned char)( __ac[1], __ac[3], __ac[5], __ac[7], __ac[9], __ac[11], __ac[13], __ac[15], __bc[1], __bc[3], __bc[5], __bc[7], __bc[9], __bc[11], __bc[13], __bc[15]); } static inline __ATTRS_o_ai __vector signed short vec_pack(__vector signed int __a, __vector signed int __b) { __vector signed short __ac = (__vector signed short)__a; __vector signed short __bc = (__vector signed short)__b; return (__vector signed short)( __ac[1], __ac[3], __ac[5], __ac[7], __bc[1], __bc[3], __bc[5], __bc[7]); } static inline __ATTRS_o_ai __vector __bool short vec_pack(__vector __bool int __a, __vector __bool int __b) { __vector __bool short __ac = (__vector __bool short)__a; __vector __bool short __bc = (__vector __bool short)__b; return (__vector __bool short)( __ac[1], __ac[3], __ac[5], __ac[7], __bc[1], __bc[3], __bc[5], __bc[7]); } static inline __ATTRS_o_ai __vector unsigned short vec_pack(__vector unsigned int __a, __vector unsigned int __b) { __vector unsigned short __ac = (__vector unsigned short)__a; __vector unsigned short __bc = (__vector unsigned short)__b; return (__vector unsigned short)( __ac[1], __ac[3], __ac[5], __ac[7], __bc[1], __bc[3], __bc[5], __bc[7]); } static inline __ATTRS_o_ai __vector signed int vec_pack(__vector signed long long __a, __vector signed long long __b) { __vector signed int __ac = (__vector signed int)__a; __vector signed int __bc = (__vector signed int)__b; return (__vector signed int)(__ac[1], __ac[3], __bc[1], __bc[3]); } static inline __ATTRS_o_ai __vector __bool int vec_pack(__vector __bool long long __a, __vector __bool long long __b) { __vector __bool int __ac = (__vector __bool int)__a; __vector __bool int __bc = (__vector __bool int)__b; return (__vector __bool int)(__ac[1], __ac[3], __bc[1], __bc[3]); } static inline __ATTRS_o_ai __vector unsigned int vec_pack(__vector unsigned long long __a, __vector unsigned long long __b) { __vector unsigned int __ac = (__vector unsigned int)__a; __vector unsigned int __bc = (__vector unsigned int)__b; return (__vector unsigned int)(__ac[1], __ac[3], __bc[1], __bc[3]); } /*-- vec_packs --------------------------------------------------------------*/ static inline __ATTRS_o_ai __vector signed char vec_packs(__vector signed short __a, __vector signed short __b) { return __builtin_s390_vpksh(__a, __b); } static inline __ATTRS_o_ai __vector unsigned char vec_packs(__vector unsigned short __a, __vector unsigned short __b) { return __builtin_s390_vpklsh(__a, __b); } static inline __ATTRS_o_ai __vector signed short vec_packs(__vector signed int __a, __vector signed int __b) { return __builtin_s390_vpksf(__a, __b); } static inline __ATTRS_o_ai __vector unsigned short vec_packs(__vector unsigned int __a, __vector unsigned int __b) { return __builtin_s390_vpklsf(__a, __b); } static inline __ATTRS_o_ai __vector signed int vec_packs(__vector signed long long __a, __vector signed long long __b) { return __builtin_s390_vpksg(__a, __b); } static inline __ATTRS_o_ai __vector unsigned int vec_packs(__vector unsigned long long __a, __vector unsigned long long __b) { return __builtin_s390_vpklsg(__a, __b); } /*-- vec_packs_cc -----------------------------------------------------------*/ static inline __ATTRS_o_ai __vector signed char vec_packs_cc(__vector signed short __a, __vector signed short __b, int *__cc) { return __builtin_s390_vpkshs(__a, __b, __cc); } static inline __ATTRS_o_ai __vector unsigned char vec_packs_cc(__vector unsigned short __a, __vector unsigned short __b, int *__cc) { return __builtin_s390_vpklshs(__a, __b, __cc); } static inline __ATTRS_o_ai __vector signed short vec_packs_cc(__vector signed int __a, __vector signed int __b, int *__cc) { return __builtin_s390_vpksfs(__a, __b, __cc); } static inline __ATTRS_o_ai __vector unsigned short vec_packs_cc(__vector unsigned int __a, __vector unsigned int __b, int *__cc) { return __builtin_s390_vpklsfs(__a, __b, __cc); } static inline __ATTRS_o_ai __vector signed int vec_packs_cc(__vector signed long long __a, __vector signed long long __b, int *__cc) { return __builtin_s390_vpksgs(__a, __b, __cc); } static inline __ATTRS_o_ai __vector unsigned int vec_packs_cc(__vector unsigned long long __a, __vector unsigned long long __b, int *__cc) { return __builtin_s390_vpklsgs(__a, __b, __cc); } /*-- vec_packsu -------------------------------------------------------------*/ static inline __ATTRS_o_ai __vector unsigned char vec_packsu(__vector signed short __a, __vector signed short __b) { const __vector signed short __zero = (__vector signed short)0; return __builtin_s390_vpklsh( (__vector unsigned short)(__a >= __zero) & (__vector unsigned short)__a, (__vector unsigned short)(__b >= __zero) & (__vector unsigned short)__b); } static inline __ATTRS_o_ai __vector unsigned char vec_packsu(__vector unsigned short __a, __vector unsigned short __b) { return __builtin_s390_vpklsh(__a, __b); } static inline __ATTRS_o_ai __vector unsigned short vec_packsu(__vector signed int __a, __vector signed int __b) { const __vector signed int __zero = (__vector signed int)0; return __builtin_s390_vpklsf( (__vector unsigned int)(__a >= __zero) & (__vector unsigned int)__a, (__vector unsigned int)(__b >= __zero) & (__vector unsigned int)__b); } static inline __ATTRS_o_ai __vector unsigned short vec_packsu(__vector unsigned int __a, __vector unsigned int __b) { return __builtin_s390_vpklsf(__a, __b); } static inline __ATTRS_o_ai __vector unsigned int vec_packsu(__vector signed long long __a, __vector signed long long __b) { const __vector signed long long __zero = (__vector signed long long)0; return __builtin_s390_vpklsg( (__vector unsigned long long)(__a >= __zero) & (__vector unsigned long long)__a, (__vector unsigned long long)(__b >= __zero) & (__vector unsigned long long)__b); } static inline __ATTRS_o_ai __vector unsigned int vec_packsu(__vector unsigned long long __a, __vector unsigned long long __b) { return __builtin_s390_vpklsg(__a, __b); } /*-- vec_packsu_cc ----------------------------------------------------------*/ static inline __ATTRS_o_ai __vector unsigned char vec_packsu_cc(__vector unsigned short __a, __vector unsigned short __b, int *__cc) { return __builtin_s390_vpklshs(__a, __b, __cc); } static inline __ATTRS_o_ai __vector unsigned short vec_packsu_cc(__vector unsigned int __a, __vector unsigned int __b, int *__cc) { return __builtin_s390_vpklsfs(__a, __b, __cc); } static inline __ATTRS_o_ai __vector unsigned int vec_packsu_cc(__vector unsigned long long __a, __vector unsigned long long __b, int *__cc) { return __builtin_s390_vpklsgs(__a, __b, __cc); } /*-- vec_unpackh ------------------------------------------------------------*/ static inline __ATTRS_o_ai __vector signed short vec_unpackh(__vector signed char __a) { return __builtin_s390_vuphb(__a); } static inline __ATTRS_o_ai __vector __bool short vec_unpackh(__vector __bool char __a) { return ((__vector __bool short) __builtin_s390_vuphb((__vector signed char)__a)); } static inline __ATTRS_o_ai __vector unsigned short vec_unpackh(__vector unsigned char __a) { return __builtin_s390_vuplhb(__a); } static inline __ATTRS_o_ai __vector signed int vec_unpackh(__vector signed short __a) { return __builtin_s390_vuphh(__a); } static inline __ATTRS_o_ai __vector __bool int vec_unpackh(__vector __bool short __a) { return (__vector __bool int)__builtin_s390_vuphh((__vector signed short)__a); } static inline __ATTRS_o_ai __vector unsigned int vec_unpackh(__vector unsigned short __a) { return __builtin_s390_vuplhh(__a); } static inline __ATTRS_o_ai __vector signed long long vec_unpackh(__vector signed int __a) { return __builtin_s390_vuphf(__a); } static inline __ATTRS_o_ai __vector __bool long long vec_unpackh(__vector __bool int __a) { return ((__vector __bool long long) __builtin_s390_vuphf((__vector signed int)__a)); } static inline __ATTRS_o_ai __vector unsigned long long vec_unpackh(__vector unsigned int __a) { return __builtin_s390_vuplhf(__a); } /*-- vec_unpackl ------------------------------------------------------------*/ static inline __ATTRS_o_ai __vector signed short vec_unpackl(__vector signed char __a) { return __builtin_s390_vuplb(__a); } static inline __ATTRS_o_ai __vector __bool short vec_unpackl(__vector __bool char __a) { return ((__vector __bool short) __builtin_s390_vuplb((__vector signed char)__a)); } static inline __ATTRS_o_ai __vector unsigned short vec_unpackl(__vector unsigned char __a) { return __builtin_s390_vupllb(__a); } static inline __ATTRS_o_ai __vector signed int vec_unpackl(__vector signed short __a) { return __builtin_s390_vuplhw(__a); } static inline __ATTRS_o_ai __vector __bool int vec_unpackl(__vector __bool short __a) { return ((__vector __bool int) __builtin_s390_vuplhw((__vector signed short)__a)); } static inline __ATTRS_o_ai __vector unsigned int vec_unpackl(__vector unsigned short __a) { return __builtin_s390_vupllh(__a); } static inline __ATTRS_o_ai __vector signed long long vec_unpackl(__vector signed int __a) { return __builtin_s390_vuplf(__a); } static inline __ATTRS_o_ai __vector __bool long long vec_unpackl(__vector __bool int __a) { return ((__vector __bool long long) __builtin_s390_vuplf((__vector signed int)__a)); } static inline __ATTRS_o_ai __vector unsigned long long vec_unpackl(__vector unsigned int __a) { return __builtin_s390_vupllf(__a); } /*-- vec_cmpeq --------------------------------------------------------------*/ static inline __ATTRS_o_ai __vector __bool char vec_cmpeq(__vector __bool char __a, __vector __bool char __b) { return (__vector __bool char)(__a == __b); } static inline __ATTRS_o_ai __vector __bool char vec_cmpeq(__vector signed char __a, __vector signed char __b) { return (__vector __bool char)(__a == __b); } static inline __ATTRS_o_ai __vector __bool char vec_cmpeq(__vector unsigned char __a, __vector unsigned char __b) { return (__vector __bool char)(__a == __b); } static inline __ATTRS_o_ai __vector __bool short vec_cmpeq(__vector __bool short __a, __vector __bool short __b) { return (__vector __bool short)(__a == __b); } static inline __ATTRS_o_ai __vector __bool short vec_cmpeq(__vector signed short __a, __vector signed short __b) { return (__vector __bool short)(__a == __b); } static inline __ATTRS_o_ai __vector __bool short vec_cmpeq(__vector unsigned short __a, __vector unsigned short __b) { return (__vector __bool short)(__a == __b); } static inline __ATTRS_o_ai __vector __bool int vec_cmpeq(__vector __bool int __a, __vector __bool int __b) { return (__vector __bool int)(__a == __b); } static inline __ATTRS_o_ai __vector __bool int vec_cmpeq(__vector signed int __a, __vector signed int __b) { return (__vector __bool int)(__a == __b); } static inline __ATTRS_o_ai __vector __bool int vec_cmpeq(__vector unsigned int __a, __vector unsigned int __b) { return (__vector __bool int)(__a == __b); } static inline __ATTRS_o_ai __vector __bool long long vec_cmpeq(__vector __bool long long __a, __vector __bool long long __b) { return (__vector __bool long long)(__a == __b); } static inline __ATTRS_o_ai __vector __bool long long vec_cmpeq(__vector signed long long __a, __vector signed long long __b) { return (__vector __bool long long)(__a == __b); } static inline __ATTRS_o_ai __vector __bool long long vec_cmpeq(__vector unsigned long long __a, __vector unsigned long long __b) { return (__vector __bool long long)(__a == __b); } #if __ARCH__ >= 12 static inline __ATTRS_o_ai __vector __bool int vec_cmpeq(__vector float __a, __vector float __b) { return (__vector __bool int)(__a == __b); } #endif static inline __ATTRS_o_ai __vector __bool long long vec_cmpeq(__vector double __a, __vector double __b) { return (__vector __bool long long)(__a == __b); } /*-- vec_cmpge --------------------------------------------------------------*/ static inline __ATTRS_o_ai __vector __bool char vec_cmpge(__vector signed char __a, __vector signed char __b) { return (__vector __bool char)(__a >= __b); } static inline __ATTRS_o_ai __vector __bool char vec_cmpge(__vector unsigned char __a, __vector unsigned char __b) { return (__vector __bool char)(__a >= __b); } static inline __ATTRS_o_ai __vector __bool short vec_cmpge(__vector signed short __a, __vector signed short __b) { return (__vector __bool short)(__a >= __b); } static inline __ATTRS_o_ai __vector __bool short vec_cmpge(__vector unsigned short __a, __vector unsigned short __b) { return (__vector __bool short)(__a >= __b); } static inline __ATTRS_o_ai __vector __bool int vec_cmpge(__vector signed int __a, __vector signed int __b) { return (__vector __bool int)(__a >= __b); } static inline __ATTRS_o_ai __vector __bool int vec_cmpge(__vector unsigned int __a, __vector unsigned int __b) { return (__vector __bool int)(__a >= __b); } static inline __ATTRS_o_ai __vector __bool long long vec_cmpge(__vector signed long long __a, __vector signed long long __b) { return (__vector __bool long long)(__a >= __b); } static inline __ATTRS_o_ai __vector __bool long long vec_cmpge(__vector unsigned long long __a, __vector unsigned long long __b) { return (__vector __bool long long)(__a >= __b); } #if __ARCH__ >= 12 static inline __ATTRS_o_ai __vector __bool int vec_cmpge(__vector float __a, __vector float __b) { return (__vector __bool int)(__a >= __b); } #endif static inline __ATTRS_o_ai __vector __bool long long vec_cmpge(__vector double __a, __vector double __b) { return (__vector __bool long long)(__a >= __b); } /*-- vec_cmpgt --------------------------------------------------------------*/ static inline __ATTRS_o_ai __vector __bool char vec_cmpgt(__vector signed char __a, __vector signed char __b) { return (__vector __bool char)(__a > __b); } static inline __ATTRS_o_ai __vector __bool char vec_cmpgt(__vector unsigned char __a, __vector unsigned char __b) { return (__vector __bool char)(__a > __b); } static inline __ATTRS_o_ai __vector __bool short vec_cmpgt(__vector signed short __a, __vector signed short __b) { return (__vector __bool short)(__a > __b); } static inline __ATTRS_o_ai __vector __bool short vec_cmpgt(__vector unsigned short __a, __vector unsigned short __b) { return (__vector __bool short)(__a > __b); } static inline __ATTRS_o_ai __vector __bool int vec_cmpgt(__vector signed int __a, __vector signed int __b) { return (__vector __bool int)(__a > __b); } static inline __ATTRS_o_ai __vector __bool int vec_cmpgt(__vector unsigned int __a, __vector unsigned int __b) { return (__vector __bool int)(__a > __b); } static inline __ATTRS_o_ai __vector __bool long long vec_cmpgt(__vector signed long long __a, __vector signed long long __b) { return (__vector __bool long long)(__a > __b); } static inline __ATTRS_o_ai __vector __bool long long vec_cmpgt(__vector unsigned long long __a, __vector unsigned long long __b) { return (__vector __bool long long)(__a > __b); } #if __ARCH__ >= 12 static inline __ATTRS_o_ai __vector __bool int vec_cmpgt(__vector float __a, __vector float __b) { return (__vector __bool int)(__a > __b); } #endif static inline __ATTRS_o_ai __vector __bool long long vec_cmpgt(__vector double __a, __vector double __b) { return (__vector __bool long long)(__a > __b); } /*-- vec_cmple --------------------------------------------------------------*/ static inline __ATTRS_o_ai __vector __bool char vec_cmple(__vector signed char __a, __vector signed char __b) { return (__vector __bool char)(__a <= __b); } static inline __ATTRS_o_ai __vector __bool char vec_cmple(__vector unsigned char __a, __vector unsigned char __b) { return (__vector __bool char)(__a <= __b); } static inline __ATTRS_o_ai __vector __bool short vec_cmple(__vector signed short __a, __vector signed short __b) { return (__vector __bool short)(__a <= __b); } static inline __ATTRS_o_ai __vector __bool short vec_cmple(__vector unsigned short __a, __vector unsigned short __b) { return (__vector __bool short)(__a <= __b); } static inline __ATTRS_o_ai __vector __bool int vec_cmple(__vector signed int __a, __vector signed int __b) { return (__vector __bool int)(__a <= __b); } static inline __ATTRS_o_ai __vector __bool int vec_cmple(__vector unsigned int __a, __vector unsigned int __b) { return (__vector __bool int)(__a <= __b); } static inline __ATTRS_o_ai __vector __bool long long vec_cmple(__vector signed long long __a, __vector signed long long __b) { return (__vector __bool long long)(__a <= __b); } static inline __ATTRS_o_ai __vector __bool long long vec_cmple(__vector unsigned long long __a, __vector unsigned long long __b) { return (__vector __bool long long)(__a <= __b); } #if __ARCH__ >= 12 static inline __ATTRS_o_ai __vector __bool int vec_cmple(__vector float __a, __vector float __b) { return (__vector __bool int)(__a <= __b); } #endif static inline __ATTRS_o_ai __vector __bool long long vec_cmple(__vector double __a, __vector double __b) { return (__vector __bool long long)(__a <= __b); } /*-- vec_cmplt --------------------------------------------------------------*/ static inline __ATTRS_o_ai __vector __bool char vec_cmplt(__vector signed char __a, __vector signed char __b) { return (__vector __bool char)(__a < __b); } static inline __ATTRS_o_ai __vector __bool char vec_cmplt(__vector unsigned char __a, __vector unsigned char __b) { return (__vector __bool char)(__a < __b); } static inline __ATTRS_o_ai __vector __bool short vec_cmplt(__vector signed short __a, __vector signed short __b) { return (__vector __bool short)(__a < __b); } static inline __ATTRS_o_ai __vector __bool short vec_cmplt(__vector unsigned short __a, __vector unsigned short __b) { return (__vector __bool short)(__a < __b); } static inline __ATTRS_o_ai __vector __bool int vec_cmplt(__vector signed int __a, __vector signed int __b) { return (__vector __bool int)(__a < __b); } static inline __ATTRS_o_ai __vector __bool int vec_cmplt(__vector unsigned int __a, __vector unsigned int __b) { return (__vector __bool int)(__a < __b); } static inline __ATTRS_o_ai __vector __bool long long vec_cmplt(__vector signed long long __a, __vector signed long long __b) { return (__vector __bool long long)(__a < __b); } static inline __ATTRS_o_ai __vector __bool long long vec_cmplt(__vector unsigned long long __a, __vector unsigned long long __b) { return (__vector __bool long long)(__a < __b); } #if __ARCH__ >= 12 static inline __ATTRS_o_ai __vector __bool int vec_cmplt(__vector float __a, __vector float __b) { return (__vector __bool int)(__a < __b); } #endif static inline __ATTRS_o_ai __vector __bool long long vec_cmplt(__vector double __a, __vector double __b) { return (__vector __bool long long)(__a < __b); } /*-- vec_all_eq -------------------------------------------------------------*/ static inline __ATTRS_o_ai int vec_all_eq(__vector signed char __a, __vector signed char __b) { int __cc; __builtin_s390_vceqbs((__vector unsigned char)__a, (__vector unsigned char)__b, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_eq(__vector signed char __a, __vector __bool char __b) { int __cc; __builtin_s390_vceqbs((__vector unsigned char)__a, (__vector unsigned char)__b, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_eq(__vector __bool char __a, __vector signed char __b) { int __cc; __builtin_s390_vceqbs((__vector unsigned char)__a, (__vector unsigned char)__b, &__cc); return __cc == 0; } static inline __ATTRS_o_ai int vec_all_eq(__vector unsigned char __a, __vector unsigned char __b) { int __cc; __builtin_s390_vceqbs(__a, __b, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_eq(__vector unsigned char __a, __vector __bool char __b) { int __cc; __builtin_s390_vceqbs(__a, (__vector unsigned char)__b, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_eq(__vector __bool char __a, __vector unsigned char __b) { int __cc; __builtin_s390_vceqbs((__vector unsigned char)__a, __b, &__cc); return __cc == 0; } static inline __ATTRS_o_ai int vec_all_eq(__vector __bool char __a, __vector __bool char __b) { int __cc; __builtin_s390_vceqbs((__vector unsigned char)__a, (__vector unsigned char)__b, &__cc); return __cc == 0; } static inline __ATTRS_o_ai int vec_all_eq(__vector signed short __a, __vector signed short __b) { int __cc; __builtin_s390_vceqhs((__vector unsigned short)__a, (__vector unsigned short)__b, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_eq(__vector signed short __a, __vector __bool short __b) { int __cc; __builtin_s390_vceqhs((__vector unsigned short)__a, (__vector unsigned short)__b, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_eq(__vector __bool short __a, __vector signed short __b) { int __cc; __builtin_s390_vceqhs((__vector unsigned short)__a, (__vector unsigned short)__b, &__cc); return __cc == 0; } static inline __ATTRS_o_ai int vec_all_eq(__vector unsigned short __a, __vector unsigned short __b) { int __cc; __builtin_s390_vceqhs(__a, __b, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_eq(__vector unsigned short __a, __vector __bool short __b) { int __cc; __builtin_s390_vceqhs(__a, (__vector unsigned short)__b, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_eq(__vector __bool short __a, __vector unsigned short __b) { int __cc; __builtin_s390_vceqhs((__vector unsigned short)__a, __b, &__cc); return __cc == 0; } static inline __ATTRS_o_ai int vec_all_eq(__vector __bool short __a, __vector __bool short __b) { int __cc; __builtin_s390_vceqhs((__vector unsigned short)__a, (__vector unsigned short)__b, &__cc); return __cc == 0; } static inline __ATTRS_o_ai int vec_all_eq(__vector signed int __a, __vector signed int __b) { int __cc; __builtin_s390_vceqfs((__vector unsigned int)__a, (__vector unsigned int)__b, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_eq(__vector signed int __a, __vector __bool int __b) { int __cc; __builtin_s390_vceqfs((__vector unsigned int)__a, (__vector unsigned int)__b, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_eq(__vector __bool int __a, __vector signed int __b) { int __cc; __builtin_s390_vceqfs((__vector unsigned int)__a, (__vector unsigned int)__b, &__cc); return __cc == 0; } static inline __ATTRS_o_ai int vec_all_eq(__vector unsigned int __a, __vector unsigned int __b) { int __cc; __builtin_s390_vceqfs(__a, __b, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_eq(__vector unsigned int __a, __vector __bool int __b) { int __cc; __builtin_s390_vceqfs(__a, (__vector unsigned int)__b, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_eq(__vector __bool int __a, __vector unsigned int __b) { int __cc; __builtin_s390_vceqfs((__vector unsigned int)__a, __b, &__cc); return __cc == 0; } static inline __ATTRS_o_ai int vec_all_eq(__vector __bool int __a, __vector __bool int __b) { int __cc; __builtin_s390_vceqfs((__vector unsigned int)__a, (__vector unsigned int)__b, &__cc); return __cc == 0; } static inline __ATTRS_o_ai int vec_all_eq(__vector signed long long __a, __vector signed long long __b) { int __cc; __builtin_s390_vceqgs((__vector unsigned long long)__a, (__vector unsigned long long)__b, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_eq(__vector signed long long __a, __vector __bool long long __b) { int __cc; __builtin_s390_vceqgs((__vector unsigned long long)__a, (__vector unsigned long long)__b, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_eq(__vector __bool long long __a, __vector signed long long __b) { int __cc; __builtin_s390_vceqgs((__vector unsigned long long)__a, (__vector unsigned long long)__b, &__cc); return __cc == 0; } static inline __ATTRS_o_ai int vec_all_eq(__vector unsigned long long __a, __vector unsigned long long __b) { int __cc; __builtin_s390_vceqgs(__a, __b, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_eq(__vector unsigned long long __a, __vector __bool long long __b) { int __cc; __builtin_s390_vceqgs(__a, (__vector unsigned long long)__b, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_eq(__vector __bool long long __a, __vector unsigned long long __b) { int __cc; __builtin_s390_vceqgs((__vector unsigned long long)__a, __b, &__cc); return __cc == 0; } static inline __ATTRS_o_ai int vec_all_eq(__vector __bool long long __a, __vector __bool long long __b) { int __cc; __builtin_s390_vceqgs((__vector unsigned long long)__a, (__vector unsigned long long)__b, &__cc); return __cc == 0; } #if __ARCH__ >= 12 static inline __ATTRS_o_ai int vec_all_eq(__vector float __a, __vector float __b) { int __cc; __builtin_s390_vfcesbs(__a, __b, &__cc); return __cc == 0; } #endif static inline __ATTRS_o_ai int vec_all_eq(__vector double __a, __vector double __b) { int __cc; __builtin_s390_vfcedbs(__a, __b, &__cc); return __cc == 0; } /*-- vec_all_ne -------------------------------------------------------------*/ static inline __ATTRS_o_ai int vec_all_ne(__vector signed char __a, __vector signed char __b) { int __cc; __builtin_s390_vceqbs((__vector unsigned char)__a, (__vector unsigned char)__b, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_ne(__vector signed char __a, __vector __bool char __b) { int __cc; __builtin_s390_vceqbs((__vector unsigned char)__a, (__vector unsigned char)__b, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_ne(__vector __bool char __a, __vector signed char __b) { int __cc; __builtin_s390_vceqbs((__vector unsigned char)__a, (__vector unsigned char)__b, &__cc); return __cc == 3; } static inline __ATTRS_o_ai int vec_all_ne(__vector unsigned char __a, __vector unsigned char __b) { int __cc; __builtin_s390_vceqbs((__vector unsigned char)__a, (__vector unsigned char)__b, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_ne(__vector unsigned char __a, __vector __bool char __b) { int __cc; __builtin_s390_vceqbs(__a, (__vector unsigned char)__b, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_ne(__vector __bool char __a, __vector unsigned char __b) { int __cc; __builtin_s390_vceqbs((__vector unsigned char)__a, __b, &__cc); return __cc == 3; } static inline __ATTRS_o_ai int vec_all_ne(__vector __bool char __a, __vector __bool char __b) { int __cc; __builtin_s390_vceqbs((__vector unsigned char)__a, (__vector unsigned char)__b, &__cc); return __cc == 3; } static inline __ATTRS_o_ai int vec_all_ne(__vector signed short __a, __vector signed short __b) { int __cc; __builtin_s390_vceqhs((__vector unsigned short)__a, (__vector unsigned short)__b, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_ne(__vector signed short __a, __vector __bool short __b) { int __cc; __builtin_s390_vceqhs((__vector unsigned short)__a, (__vector unsigned short)__b, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_ne(__vector __bool short __a, __vector signed short __b) { int __cc; __builtin_s390_vceqhs((__vector unsigned short)__a, (__vector unsigned short)__b, &__cc); return __cc == 3; } static inline __ATTRS_o_ai int vec_all_ne(__vector unsigned short __a, __vector unsigned short __b) { int __cc; __builtin_s390_vceqhs(__a, __b, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_ne(__vector unsigned short __a, __vector __bool short __b) { int __cc; __builtin_s390_vceqhs(__a, (__vector unsigned short)__b, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_ne(__vector __bool short __a, __vector unsigned short __b) { int __cc; __builtin_s390_vceqhs((__vector unsigned short)__a, __b, &__cc); return __cc == 3; } static inline __ATTRS_o_ai int vec_all_ne(__vector __bool short __a, __vector __bool short __b) { int __cc; __builtin_s390_vceqhs((__vector unsigned short)__a, (__vector unsigned short)__b, &__cc); return __cc == 3; } static inline __ATTRS_o_ai int vec_all_ne(__vector signed int __a, __vector signed int __b) { int __cc; __builtin_s390_vceqfs((__vector unsigned int)__a, (__vector unsigned int)__b, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_ne(__vector signed int __a, __vector __bool int __b) { int __cc; __builtin_s390_vceqfs((__vector unsigned int)__a, (__vector unsigned int)__b, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_ne(__vector __bool int __a, __vector signed int __b) { int __cc; __builtin_s390_vceqfs((__vector unsigned int)__a, (__vector unsigned int)__b, &__cc); return __cc == 3; } static inline __ATTRS_o_ai int vec_all_ne(__vector unsigned int __a, __vector unsigned int __b) { int __cc; __builtin_s390_vceqfs(__a, __b, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_ne(__vector unsigned int __a, __vector __bool int __b) { int __cc; __builtin_s390_vceqfs(__a, (__vector unsigned int)__b, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_ne(__vector __bool int __a, __vector unsigned int __b) { int __cc; __builtin_s390_vceqfs((__vector unsigned int)__a, __b, &__cc); return __cc == 3; } static inline __ATTRS_o_ai int vec_all_ne(__vector __bool int __a, __vector __bool int __b) { int __cc; __builtin_s390_vceqfs((__vector unsigned int)__a, (__vector unsigned int)__b, &__cc); return __cc == 3; } static inline __ATTRS_o_ai int vec_all_ne(__vector signed long long __a, __vector signed long long __b) { int __cc; __builtin_s390_vceqgs((__vector unsigned long long)__a, (__vector unsigned long long)__b, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_ne(__vector signed long long __a, __vector __bool long long __b) { int __cc; __builtin_s390_vceqgs((__vector unsigned long long)__a, (__vector unsigned long long)__b, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_ne(__vector __bool long long __a, __vector signed long long __b) { int __cc; __builtin_s390_vceqgs((__vector unsigned long long)__a, (__vector unsigned long long)__b, &__cc); return __cc == 3; } static inline __ATTRS_o_ai int vec_all_ne(__vector unsigned long long __a, __vector unsigned long long __b) { int __cc; __builtin_s390_vceqgs(__a, __b, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_ne(__vector unsigned long long __a, __vector __bool long long __b) { int __cc; __builtin_s390_vceqgs(__a, (__vector unsigned long long)__b, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_ne(__vector __bool long long __a, __vector unsigned long long __b) { int __cc; __builtin_s390_vceqgs((__vector unsigned long long)__a, __b, &__cc); return __cc == 3; } static inline __ATTRS_o_ai int vec_all_ne(__vector __bool long long __a, __vector __bool long long __b) { int __cc; __builtin_s390_vceqgs((__vector unsigned long long)__a, (__vector unsigned long long)__b, &__cc); return __cc == 3; } #if __ARCH__ >= 12 static inline __ATTRS_o_ai int vec_all_ne(__vector float __a, __vector float __b) { int __cc; __builtin_s390_vfcesbs(__a, __b, &__cc); return __cc == 3; } #endif static inline __ATTRS_o_ai int vec_all_ne(__vector double __a, __vector double __b) { int __cc; __builtin_s390_vfcedbs(__a, __b, &__cc); return __cc == 3; } /*-- vec_all_ge -------------------------------------------------------------*/ static inline __ATTRS_o_ai int vec_all_ge(__vector signed char __a, __vector signed char __b) { int __cc; __builtin_s390_vchbs(__b, __a, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_ge(__vector signed char __a, __vector __bool char __b) { int __cc; __builtin_s390_vchbs((__vector signed char)__b, __a, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_ge(__vector __bool char __a, __vector signed char __b) { int __cc; __builtin_s390_vchbs(__b, (__vector signed char)__a, &__cc); return __cc == 3; } static inline __ATTRS_o_ai int vec_all_ge(__vector unsigned char __a, __vector unsigned char __b) { int __cc; __builtin_s390_vchlbs(__b, __a, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_ge(__vector unsigned char __a, __vector __bool char __b) { int __cc; __builtin_s390_vchlbs((__vector unsigned char)__b, __a, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_ge(__vector __bool char __a, __vector unsigned char __b) { int __cc; __builtin_s390_vchlbs(__b, (__vector unsigned char)__a, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_ge(__vector __bool char __a, __vector __bool char __b) { int __cc; __builtin_s390_vchlbs((__vector unsigned char)__b, (__vector unsigned char)__a, &__cc); return __cc == 3; } static inline __ATTRS_o_ai int vec_all_ge(__vector signed short __a, __vector signed short __b) { int __cc; __builtin_s390_vchhs(__b, __a, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_ge(__vector signed short __a, __vector __bool short __b) { int __cc; __builtin_s390_vchhs((__vector signed short)__b, __a, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_ge(__vector __bool short __a, __vector signed short __b) { int __cc; __builtin_s390_vchhs(__b, (__vector signed short)__a, &__cc); return __cc == 3; } static inline __ATTRS_o_ai int vec_all_ge(__vector unsigned short __a, __vector unsigned short __b) { int __cc; __builtin_s390_vchlhs(__b, __a, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_ge(__vector unsigned short __a, __vector __bool short __b) { int __cc; __builtin_s390_vchlhs((__vector unsigned short)__b, __a, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_ge(__vector __bool short __a, __vector unsigned short __b) { int __cc; __builtin_s390_vchlhs(__b, (__vector unsigned short)__a, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_ge(__vector __bool short __a, __vector __bool short __b) { int __cc; __builtin_s390_vchlhs((__vector unsigned short)__b, (__vector unsigned short)__a, &__cc); return __cc == 3; } static inline __ATTRS_o_ai int vec_all_ge(__vector signed int __a, __vector signed int __b) { int __cc; __builtin_s390_vchfs(__b, __a, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_ge(__vector signed int __a, __vector __bool int __b) { int __cc; __builtin_s390_vchfs((__vector signed int)__b, __a, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_ge(__vector __bool int __a, __vector signed int __b) { int __cc; __builtin_s390_vchfs(__b, (__vector signed int)__a, &__cc); return __cc == 3; } static inline __ATTRS_o_ai int vec_all_ge(__vector unsigned int __a, __vector unsigned int __b) { int __cc; __builtin_s390_vchlfs(__b, __a, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_ge(__vector unsigned int __a, __vector __bool int __b) { int __cc; __builtin_s390_vchlfs((__vector unsigned int)__b, __a, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_ge(__vector __bool int __a, __vector unsigned int __b) { int __cc; __builtin_s390_vchlfs(__b, (__vector unsigned int)__a, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_ge(__vector __bool int __a, __vector __bool int __b) { int __cc; __builtin_s390_vchlfs((__vector unsigned int)__b, (__vector unsigned int)__a, &__cc); return __cc == 3; } static inline __ATTRS_o_ai int vec_all_ge(__vector signed long long __a, __vector signed long long __b) { int __cc; __builtin_s390_vchgs(__b, __a, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_ge(__vector signed long long __a, __vector __bool long long __b) { int __cc; __builtin_s390_vchgs((__vector signed long long)__b, __a, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_ge(__vector __bool long long __a, __vector signed long long __b) { int __cc; __builtin_s390_vchgs(__b, (__vector signed long long)__a, &__cc); return __cc == 3; } static inline __ATTRS_o_ai int vec_all_ge(__vector unsigned long long __a, __vector unsigned long long __b) { int __cc; __builtin_s390_vchlgs(__b, __a, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_ge(__vector unsigned long long __a, __vector __bool long long __b) { int __cc; __builtin_s390_vchlgs((__vector unsigned long long)__b, __a, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_ge(__vector __bool long long __a, __vector unsigned long long __b) { int __cc; __builtin_s390_vchlgs(__b, (__vector unsigned long long)__a, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_ge(__vector __bool long long __a, __vector __bool long long __b) { int __cc; __builtin_s390_vchlgs((__vector unsigned long long)__b, (__vector unsigned long long)__a, &__cc); return __cc == 3; } #if __ARCH__ >= 12 static inline __ATTRS_o_ai int vec_all_ge(__vector float __a, __vector float __b) { int __cc; __builtin_s390_vfchesbs(__a, __b, &__cc); return __cc == 0; } #endif static inline __ATTRS_o_ai int vec_all_ge(__vector double __a, __vector double __b) { int __cc; __builtin_s390_vfchedbs(__a, __b, &__cc); return __cc == 0; } /*-- vec_all_gt -------------------------------------------------------------*/ static inline __ATTRS_o_ai int vec_all_gt(__vector signed char __a, __vector signed char __b) { int __cc; __builtin_s390_vchbs(__a, __b, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_gt(__vector signed char __a, __vector __bool char __b) { int __cc; __builtin_s390_vchbs(__a, (__vector signed char)__b, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_gt(__vector __bool char __a, __vector signed char __b) { int __cc; __builtin_s390_vchbs((__vector signed char)__a, __b, &__cc); return __cc == 0; } static inline __ATTRS_o_ai int vec_all_gt(__vector unsigned char __a, __vector unsigned char __b) { int __cc; __builtin_s390_vchlbs(__a, __b, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_gt(__vector unsigned char __a, __vector __bool char __b) { int __cc; __builtin_s390_vchlbs(__a, (__vector unsigned char)__b, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_gt(__vector __bool char __a, __vector unsigned char __b) { int __cc; __builtin_s390_vchlbs((__vector unsigned char)__a, __b, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_gt(__vector __bool char __a, __vector __bool char __b) { int __cc; __builtin_s390_vchlbs((__vector unsigned char)__a, (__vector unsigned char)__b, &__cc); return __cc == 0; } static inline __ATTRS_o_ai int vec_all_gt(__vector signed short __a, __vector signed short __b) { int __cc; __builtin_s390_vchhs(__a, __b, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_gt(__vector signed short __a, __vector __bool short __b) { int __cc; __builtin_s390_vchhs(__a, (__vector signed short)__b, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_gt(__vector __bool short __a, __vector signed short __b) { int __cc; __builtin_s390_vchhs((__vector signed short)__a, __b, &__cc); return __cc == 0; } static inline __ATTRS_o_ai int vec_all_gt(__vector unsigned short __a, __vector unsigned short __b) { int __cc; __builtin_s390_vchlhs(__a, __b, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_gt(__vector unsigned short __a, __vector __bool short __b) { int __cc; __builtin_s390_vchlhs(__a, (__vector unsigned short)__b, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_gt(__vector __bool short __a, __vector unsigned short __b) { int __cc; __builtin_s390_vchlhs((__vector unsigned short)__a, __b, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_gt(__vector __bool short __a, __vector __bool short __b) { int __cc; __builtin_s390_vchlhs((__vector unsigned short)__a, (__vector unsigned short)__b, &__cc); return __cc == 0; } static inline __ATTRS_o_ai int vec_all_gt(__vector signed int __a, __vector signed int __b) { int __cc; __builtin_s390_vchfs(__a, __b, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_gt(__vector signed int __a, __vector __bool int __b) { int __cc; __builtin_s390_vchfs(__a, (__vector signed int)__b, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_gt(__vector __bool int __a, __vector signed int __b) { int __cc; __builtin_s390_vchfs((__vector signed int)__a, __b, &__cc); return __cc == 0; } static inline __ATTRS_o_ai int vec_all_gt(__vector unsigned int __a, __vector unsigned int __b) { int __cc; __builtin_s390_vchlfs(__a, __b, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_gt(__vector unsigned int __a, __vector __bool int __b) { int __cc; __builtin_s390_vchlfs(__a, (__vector unsigned int)__b, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_gt(__vector __bool int __a, __vector unsigned int __b) { int __cc; __builtin_s390_vchlfs((__vector unsigned int)__a, __b, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_gt(__vector __bool int __a, __vector __bool int __b) { int __cc; __builtin_s390_vchlfs((__vector unsigned int)__a, (__vector unsigned int)__b, &__cc); return __cc == 0; } static inline __ATTRS_o_ai int vec_all_gt(__vector signed long long __a, __vector signed long long __b) { int __cc; __builtin_s390_vchgs(__a, __b, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_gt(__vector signed long long __a, __vector __bool long long __b) { int __cc; __builtin_s390_vchgs(__a, (__vector signed long long)__b, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_gt(__vector __bool long long __a, __vector signed long long __b) { int __cc; __builtin_s390_vchgs((__vector signed long long)__a, __b, &__cc); return __cc == 0; } static inline __ATTRS_o_ai int vec_all_gt(__vector unsigned long long __a, __vector unsigned long long __b) { int __cc; __builtin_s390_vchlgs(__a, __b, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_gt(__vector unsigned long long __a, __vector __bool long long __b) { int __cc; __builtin_s390_vchlgs(__a, (__vector unsigned long long)__b, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_gt(__vector __bool long long __a, __vector unsigned long long __b) { int __cc; __builtin_s390_vchlgs((__vector unsigned long long)__a, __b, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_gt(__vector __bool long long __a, __vector __bool long long __b) { int __cc; __builtin_s390_vchlgs((__vector unsigned long long)__a, (__vector unsigned long long)__b, &__cc); return __cc == 0; } #if __ARCH__ >= 12 static inline __ATTRS_o_ai int vec_all_gt(__vector float __a, __vector float __b) { int __cc; __builtin_s390_vfchsbs(__a, __b, &__cc); return __cc == 0; } #endif static inline __ATTRS_o_ai int vec_all_gt(__vector double __a, __vector double __b) { int __cc; __builtin_s390_vfchdbs(__a, __b, &__cc); return __cc == 0; } /*-- vec_all_le -------------------------------------------------------------*/ static inline __ATTRS_o_ai int vec_all_le(__vector signed char __a, __vector signed char __b) { int __cc; __builtin_s390_vchbs(__a, __b, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_le(__vector signed char __a, __vector __bool char __b) { int __cc; __builtin_s390_vchbs(__a, (__vector signed char)__b, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_le(__vector __bool char __a, __vector signed char __b) { int __cc; __builtin_s390_vchbs((__vector signed char)__a, __b, &__cc); return __cc == 3; } static inline __ATTRS_o_ai int vec_all_le(__vector unsigned char __a, __vector unsigned char __b) { int __cc; __builtin_s390_vchlbs(__a, __b, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_le(__vector unsigned char __a, __vector __bool char __b) { int __cc; __builtin_s390_vchlbs(__a, (__vector unsigned char)__b, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_le(__vector __bool char __a, __vector unsigned char __b) { int __cc; __builtin_s390_vchlbs((__vector unsigned char)__a, __b, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_le(__vector __bool char __a, __vector __bool char __b) { int __cc; __builtin_s390_vchlbs((__vector unsigned char)__a, (__vector unsigned char)__b, &__cc); return __cc == 3; } static inline __ATTRS_o_ai int vec_all_le(__vector signed short __a, __vector signed short __b) { int __cc; __builtin_s390_vchhs(__a, __b, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_le(__vector signed short __a, __vector __bool short __b) { int __cc; __builtin_s390_vchhs(__a, (__vector signed short)__b, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_le(__vector __bool short __a, __vector signed short __b) { int __cc; __builtin_s390_vchhs((__vector signed short)__a, __b, &__cc); return __cc == 3; } static inline __ATTRS_o_ai int vec_all_le(__vector unsigned short __a, __vector unsigned short __b) { int __cc; __builtin_s390_vchlhs(__a, __b, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_le(__vector unsigned short __a, __vector __bool short __b) { int __cc; __builtin_s390_vchlhs(__a, (__vector unsigned short)__b, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_le(__vector __bool short __a, __vector unsigned short __b) { int __cc; __builtin_s390_vchlhs((__vector unsigned short)__a, __b, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_le(__vector __bool short __a, __vector __bool short __b) { int __cc; __builtin_s390_vchlhs((__vector unsigned short)__a, (__vector unsigned short)__b, &__cc); return __cc == 3; } static inline __ATTRS_o_ai int vec_all_le(__vector signed int __a, __vector signed int __b) { int __cc; __builtin_s390_vchfs(__a, __b, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_le(__vector signed int __a, __vector __bool int __b) { int __cc; __builtin_s390_vchfs(__a, (__vector signed int)__b, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_le(__vector __bool int __a, __vector signed int __b) { int __cc; __builtin_s390_vchfs((__vector signed int)__a, __b, &__cc); return __cc == 3; } static inline __ATTRS_o_ai int vec_all_le(__vector unsigned int __a, __vector unsigned int __b) { int __cc; __builtin_s390_vchlfs(__a, __b, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_le(__vector unsigned int __a, __vector __bool int __b) { int __cc; __builtin_s390_vchlfs(__a, (__vector unsigned int)__b, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_le(__vector __bool int __a, __vector unsigned int __b) { int __cc; __builtin_s390_vchlfs((__vector unsigned int)__a, __b, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_le(__vector __bool int __a, __vector __bool int __b) { int __cc; __builtin_s390_vchlfs((__vector unsigned int)__a, (__vector unsigned int)__b, &__cc); return __cc == 3; } static inline __ATTRS_o_ai int vec_all_le(__vector signed long long __a, __vector signed long long __b) { int __cc; __builtin_s390_vchgs(__a, __b, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_le(__vector signed long long __a, __vector __bool long long __b) { int __cc; __builtin_s390_vchgs(__a, (__vector signed long long)__b, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_le(__vector __bool long long __a, __vector signed long long __b) { int __cc; __builtin_s390_vchgs((__vector signed long long)__a, __b, &__cc); return __cc == 3; } static inline __ATTRS_o_ai int vec_all_le(__vector unsigned long long __a, __vector unsigned long long __b) { int __cc; __builtin_s390_vchlgs(__a, __b, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_le(__vector unsigned long long __a, __vector __bool long long __b) { int __cc; __builtin_s390_vchlgs(__a, (__vector unsigned long long)__b, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_le(__vector __bool long long __a, __vector unsigned long long __b) { int __cc; __builtin_s390_vchlgs((__vector unsigned long long)__a, __b, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_le(__vector __bool long long __a, __vector __bool long long __b) { int __cc; __builtin_s390_vchlgs((__vector unsigned long long)__a, (__vector unsigned long long)__b, &__cc); return __cc == 3; } #if __ARCH__ >= 12 static inline __ATTRS_o_ai int vec_all_le(__vector float __a, __vector float __b) { int __cc; __builtin_s390_vfchesbs(__b, __a, &__cc); return __cc == 0; } #endif static inline __ATTRS_o_ai int vec_all_le(__vector double __a, __vector double __b) { int __cc; __builtin_s390_vfchedbs(__b, __a, &__cc); return __cc == 0; } /*-- vec_all_lt -------------------------------------------------------------*/ static inline __ATTRS_o_ai int vec_all_lt(__vector signed char __a, __vector signed char __b) { int __cc; __builtin_s390_vchbs(__b, __a, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_lt(__vector signed char __a, __vector __bool char __b) { int __cc; __builtin_s390_vchbs((__vector signed char)__b, __a, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_lt(__vector __bool char __a, __vector signed char __b) { int __cc; __builtin_s390_vchbs(__b, (__vector signed char)__a, &__cc); return __cc == 0; } static inline __ATTRS_o_ai int vec_all_lt(__vector unsigned char __a, __vector unsigned char __b) { int __cc; __builtin_s390_vchlbs(__b, __a, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_lt(__vector unsigned char __a, __vector __bool char __b) { int __cc; __builtin_s390_vchlbs((__vector unsigned char)__b, __a, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_lt(__vector __bool char __a, __vector unsigned char __b) { int __cc; __builtin_s390_vchlbs(__b, (__vector unsigned char)__a, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_lt(__vector __bool char __a, __vector __bool char __b) { int __cc; __builtin_s390_vchlbs((__vector unsigned char)__b, (__vector unsigned char)__a, &__cc); return __cc == 0; } static inline __ATTRS_o_ai int vec_all_lt(__vector signed short __a, __vector signed short __b) { int __cc; __builtin_s390_vchhs(__b, __a, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_lt(__vector signed short __a, __vector __bool short __b) { int __cc; __builtin_s390_vchhs((__vector signed short)__b, __a, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_lt(__vector __bool short __a, __vector signed short __b) { int __cc; __builtin_s390_vchhs(__b, (__vector signed short)__a, &__cc); return __cc == 0; } static inline __ATTRS_o_ai int vec_all_lt(__vector unsigned short __a, __vector unsigned short __b) { int __cc; __builtin_s390_vchlhs(__b, __a, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_lt(__vector unsigned short __a, __vector __bool short __b) { int __cc; __builtin_s390_vchlhs((__vector unsigned short)__b, __a, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_lt(__vector __bool short __a, __vector unsigned short __b) { int __cc; __builtin_s390_vchlhs(__b, (__vector unsigned short)__a, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_lt(__vector __bool short __a, __vector __bool short __b) { int __cc; __builtin_s390_vchlhs((__vector unsigned short)__b, (__vector unsigned short)__a, &__cc); return __cc == 0; } static inline __ATTRS_o_ai int vec_all_lt(__vector signed int __a, __vector signed int __b) { int __cc; __builtin_s390_vchfs(__b, __a, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_lt(__vector signed int __a, __vector __bool int __b) { int __cc; __builtin_s390_vchfs((__vector signed int)__b, __a, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_lt(__vector __bool int __a, __vector signed int __b) { int __cc; __builtin_s390_vchfs(__b, (__vector signed int)__a, &__cc); return __cc == 0; } static inline __ATTRS_o_ai int vec_all_lt(__vector unsigned int __a, __vector unsigned int __b) { int __cc; __builtin_s390_vchlfs(__b, __a, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_lt(__vector unsigned int __a, __vector __bool int __b) { int __cc; __builtin_s390_vchlfs((__vector unsigned int)__b, __a, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_lt(__vector __bool int __a, __vector unsigned int __b) { int __cc; __builtin_s390_vchlfs(__b, (__vector unsigned int)__a, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_lt(__vector __bool int __a, __vector __bool int __b) { int __cc; __builtin_s390_vchlfs((__vector unsigned int)__b, (__vector unsigned int)__a, &__cc); return __cc == 0; } static inline __ATTRS_o_ai int vec_all_lt(__vector signed long long __a, __vector signed long long __b) { int __cc; __builtin_s390_vchgs(__b, __a, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_lt(__vector signed long long __a, __vector __bool long long __b) { int __cc; __builtin_s390_vchgs((__vector signed long long)__b, __a, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_lt(__vector __bool long long __a, __vector signed long long __b) { int __cc; __builtin_s390_vchgs(__b, (__vector signed long long)__a, &__cc); return __cc == 0; } static inline __ATTRS_o_ai int vec_all_lt(__vector unsigned long long __a, __vector unsigned long long __b) { int __cc; __builtin_s390_vchlgs(__b, __a, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_lt(__vector unsigned long long __a, __vector __bool long long __b) { int __cc; __builtin_s390_vchlgs((__vector unsigned long long)__b, __a, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_lt(__vector __bool long long __a, __vector unsigned long long __b) { int __cc; __builtin_s390_vchlgs(__b, (__vector unsigned long long)__a, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_lt(__vector __bool long long __a, __vector __bool long long __b) { int __cc; __builtin_s390_vchlgs((__vector unsigned long long)__b, (__vector unsigned long long)__a, &__cc); return __cc == 0; } #if __ARCH__ >= 12 static inline __ATTRS_o_ai int vec_all_lt(__vector float __a, __vector float __b) { int __cc; __builtin_s390_vfchsbs(__b, __a, &__cc); return __cc == 0; } #endif static inline __ATTRS_o_ai int vec_all_lt(__vector double __a, __vector double __b) { int __cc; __builtin_s390_vfchdbs(__b, __a, &__cc); return __cc == 0; } /*-- vec_all_nge ------------------------------------------------------------*/ #if __ARCH__ >= 12 static inline __ATTRS_o_ai int vec_all_nge(__vector float __a, __vector float __b) { int __cc; __builtin_s390_vfchesbs(__a, __b, &__cc); return __cc == 3; } #endif static inline __ATTRS_o_ai int vec_all_nge(__vector double __a, __vector double __b) { int __cc; __builtin_s390_vfchedbs(__a, __b, &__cc); return __cc == 3; } /*-- vec_all_ngt ------------------------------------------------------------*/ #if __ARCH__ >= 12 static inline __ATTRS_o_ai int vec_all_ngt(__vector float __a, __vector float __b) { int __cc; __builtin_s390_vfchsbs(__a, __b, &__cc); return __cc == 3; } #endif static inline __ATTRS_o_ai int vec_all_ngt(__vector double __a, __vector double __b) { int __cc; __builtin_s390_vfchdbs(__a, __b, &__cc); return __cc == 3; } /*-- vec_all_nle ------------------------------------------------------------*/ #if __ARCH__ >= 12 static inline __ATTRS_o_ai int vec_all_nle(__vector float __a, __vector float __b) { int __cc; __builtin_s390_vfchesbs(__b, __a, &__cc); return __cc == 3; } #endif static inline __ATTRS_o_ai int vec_all_nle(__vector double __a, __vector double __b) { int __cc; __builtin_s390_vfchedbs(__b, __a, &__cc); return __cc == 3; } /*-- vec_all_nlt ------------------------------------------------------------*/ #if __ARCH__ >= 12 static inline __ATTRS_o_ai int vec_all_nlt(__vector float __a, __vector float __b) { int __cc; __builtin_s390_vfchsbs(__b, __a, &__cc); return __cc == 3; } #endif static inline __ATTRS_o_ai int vec_all_nlt(__vector double __a, __vector double __b) { int __cc; __builtin_s390_vfchdbs(__b, __a, &__cc); return __cc == 3; } /*-- vec_all_nan ------------------------------------------------------------*/ #if __ARCH__ >= 12 static inline __ATTRS_o_ai int vec_all_nan(__vector float __a) { int __cc; __builtin_s390_vftcisb(__a, 15, &__cc); return __cc == 0; } #endif static inline __ATTRS_o_ai int vec_all_nan(__vector double __a) { int __cc; __builtin_s390_vftcidb(__a, 15, &__cc); return __cc == 0; } /*-- vec_all_numeric --------------------------------------------------------*/ #if __ARCH__ >= 12 static inline __ATTRS_o_ai int vec_all_numeric(__vector float __a) { int __cc; __builtin_s390_vftcisb(__a, 15, &__cc); return __cc == 3; } #endif static inline __ATTRS_o_ai int vec_all_numeric(__vector double __a) { int __cc; __builtin_s390_vftcidb(__a, 15, &__cc); return __cc == 3; } /*-- vec_any_eq -------------------------------------------------------------*/ static inline __ATTRS_o_ai int vec_any_eq(__vector signed char __a, __vector signed char __b) { int __cc; __builtin_s390_vceqbs((__vector unsigned char)__a, (__vector unsigned char)__b, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_eq(__vector signed char __a, __vector __bool char __b) { int __cc; __builtin_s390_vceqbs((__vector unsigned char)__a, (__vector unsigned char)__b, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_eq(__vector __bool char __a, __vector signed char __b) { int __cc; __builtin_s390_vceqbs((__vector unsigned char)__a, (__vector unsigned char)__b, &__cc); return __cc <= 1; } static inline __ATTRS_o_ai int vec_any_eq(__vector unsigned char __a, __vector unsigned char __b) { int __cc; __builtin_s390_vceqbs(__a, __b, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_eq(__vector unsigned char __a, __vector __bool char __b) { int __cc; __builtin_s390_vceqbs(__a, (__vector unsigned char)__b, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_eq(__vector __bool char __a, __vector unsigned char __b) { int __cc; __builtin_s390_vceqbs((__vector unsigned char)__a, __b, &__cc); return __cc <= 1; } static inline __ATTRS_o_ai int vec_any_eq(__vector __bool char __a, __vector __bool char __b) { int __cc; __builtin_s390_vceqbs((__vector unsigned char)__a, (__vector unsigned char)__b, &__cc); return __cc <= 1; } static inline __ATTRS_o_ai int vec_any_eq(__vector signed short __a, __vector signed short __b) { int __cc; __builtin_s390_vceqhs((__vector unsigned short)__a, (__vector unsigned short)__b, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_eq(__vector signed short __a, __vector __bool short __b) { int __cc; __builtin_s390_vceqhs((__vector unsigned short)__a, (__vector unsigned short)__b, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_eq(__vector __bool short __a, __vector signed short __b) { int __cc; __builtin_s390_vceqhs((__vector unsigned short)__a, (__vector unsigned short)__b, &__cc); return __cc <= 1; } static inline __ATTRS_o_ai int vec_any_eq(__vector unsigned short __a, __vector unsigned short __b) { int __cc; __builtin_s390_vceqhs(__a, __b, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_eq(__vector unsigned short __a, __vector __bool short __b) { int __cc; __builtin_s390_vceqhs(__a, (__vector unsigned short)__b, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_eq(__vector __bool short __a, __vector unsigned short __b) { int __cc; __builtin_s390_vceqhs((__vector unsigned short)__a, __b, &__cc); return __cc <= 1; } static inline __ATTRS_o_ai int vec_any_eq(__vector __bool short __a, __vector __bool short __b) { int __cc; __builtin_s390_vceqhs((__vector unsigned short)__a, (__vector unsigned short)__b, &__cc); return __cc <= 1; } static inline __ATTRS_o_ai int vec_any_eq(__vector signed int __a, __vector signed int __b) { int __cc; __builtin_s390_vceqfs((__vector unsigned int)__a, (__vector unsigned int)__b, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_eq(__vector signed int __a, __vector __bool int __b) { int __cc; __builtin_s390_vceqfs((__vector unsigned int)__a, (__vector unsigned int)__b, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_eq(__vector __bool int __a, __vector signed int __b) { int __cc; __builtin_s390_vceqfs((__vector unsigned int)__a, (__vector unsigned int)__b, &__cc); return __cc <= 1; } static inline __ATTRS_o_ai int vec_any_eq(__vector unsigned int __a, __vector unsigned int __b) { int __cc; __builtin_s390_vceqfs(__a, __b, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_eq(__vector unsigned int __a, __vector __bool int __b) { int __cc; __builtin_s390_vceqfs(__a, (__vector unsigned int)__b, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_eq(__vector __bool int __a, __vector unsigned int __b) { int __cc; __builtin_s390_vceqfs((__vector unsigned int)__a, __b, &__cc); return __cc <= 1; } static inline __ATTRS_o_ai int vec_any_eq(__vector __bool int __a, __vector __bool int __b) { int __cc; __builtin_s390_vceqfs((__vector unsigned int)__a, (__vector unsigned int)__b, &__cc); return __cc <= 1; } static inline __ATTRS_o_ai int vec_any_eq(__vector signed long long __a, __vector signed long long __b) { int __cc; __builtin_s390_vceqgs((__vector unsigned long long)__a, (__vector unsigned long long)__b, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_eq(__vector signed long long __a, __vector __bool long long __b) { int __cc; __builtin_s390_vceqgs((__vector unsigned long long)__a, (__vector unsigned long long)__b, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_eq(__vector __bool long long __a, __vector signed long long __b) { int __cc; __builtin_s390_vceqgs((__vector unsigned long long)__a, (__vector unsigned long long)__b, &__cc); return __cc <= 1; } static inline __ATTRS_o_ai int vec_any_eq(__vector unsigned long long __a, __vector unsigned long long __b) { int __cc; __builtin_s390_vceqgs(__a, __b, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_eq(__vector unsigned long long __a, __vector __bool long long __b) { int __cc; __builtin_s390_vceqgs(__a, (__vector unsigned long long)__b, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_eq(__vector __bool long long __a, __vector unsigned long long __b) { int __cc; __builtin_s390_vceqgs((__vector unsigned long long)__a, __b, &__cc); return __cc <= 1; } static inline __ATTRS_o_ai int vec_any_eq(__vector __bool long long __a, __vector __bool long long __b) { int __cc; __builtin_s390_vceqgs((__vector unsigned long long)__a, (__vector unsigned long long)__b, &__cc); return __cc <= 1; } #if __ARCH__ >= 12 static inline __ATTRS_o_ai int vec_any_eq(__vector float __a, __vector float __b) { int __cc; __builtin_s390_vfcesbs(__a, __b, &__cc); return __cc <= 1; } #endif static inline __ATTRS_o_ai int vec_any_eq(__vector double __a, __vector double __b) { int __cc; __builtin_s390_vfcedbs(__a, __b, &__cc); return __cc <= 1; } /*-- vec_any_ne -------------------------------------------------------------*/ static inline __ATTRS_o_ai int vec_any_ne(__vector signed char __a, __vector signed char __b) { int __cc; __builtin_s390_vceqbs((__vector unsigned char)__a, (__vector unsigned char)__b, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_ne(__vector signed char __a, __vector __bool char __b) { int __cc; __builtin_s390_vceqbs((__vector unsigned char)__a, (__vector unsigned char)__b, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_ne(__vector __bool char __a, __vector signed char __b) { int __cc; __builtin_s390_vceqbs((__vector unsigned char)__a, (__vector unsigned char)__b, &__cc); return __cc != 0; } static inline __ATTRS_o_ai int vec_any_ne(__vector unsigned char __a, __vector unsigned char __b) { int __cc; __builtin_s390_vceqbs(__a, __b, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_ne(__vector unsigned char __a, __vector __bool char __b) { int __cc; __builtin_s390_vceqbs(__a, (__vector unsigned char)__b, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_ne(__vector __bool char __a, __vector unsigned char __b) { int __cc; __builtin_s390_vceqbs((__vector unsigned char)__a, __b, &__cc); return __cc != 0; } static inline __ATTRS_o_ai int vec_any_ne(__vector __bool char __a, __vector __bool char __b) { int __cc; __builtin_s390_vceqbs((__vector unsigned char)__a, (__vector unsigned char)__b, &__cc); return __cc != 0; } static inline __ATTRS_o_ai int vec_any_ne(__vector signed short __a, __vector signed short __b) { int __cc; __builtin_s390_vceqhs((__vector unsigned short)__a, (__vector unsigned short)__b, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_ne(__vector signed short __a, __vector __bool short __b) { int __cc; __builtin_s390_vceqhs((__vector unsigned short)__a, (__vector unsigned short)__b, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_ne(__vector __bool short __a, __vector signed short __b) { int __cc; __builtin_s390_vceqhs((__vector unsigned short)__a, (__vector unsigned short)__b, &__cc); return __cc != 0; } static inline __ATTRS_o_ai int vec_any_ne(__vector unsigned short __a, __vector unsigned short __b) { int __cc; __builtin_s390_vceqhs(__a, __b, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_ne(__vector unsigned short __a, __vector __bool short __b) { int __cc; __builtin_s390_vceqhs(__a, (__vector unsigned short)__b, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_ne(__vector __bool short __a, __vector unsigned short __b) { int __cc; __builtin_s390_vceqhs((__vector unsigned short)__a, __b, &__cc); return __cc != 0; } static inline __ATTRS_o_ai int vec_any_ne(__vector __bool short __a, __vector __bool short __b) { int __cc; __builtin_s390_vceqhs((__vector unsigned short)__a, (__vector unsigned short)__b, &__cc); return __cc != 0; } static inline __ATTRS_o_ai int vec_any_ne(__vector signed int __a, __vector signed int __b) { int __cc; __builtin_s390_vceqfs((__vector unsigned int)__a, (__vector unsigned int)__b, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_ne(__vector signed int __a, __vector __bool int __b) { int __cc; __builtin_s390_vceqfs((__vector unsigned int)__a, (__vector unsigned int)__b, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_ne(__vector __bool int __a, __vector signed int __b) { int __cc; __builtin_s390_vceqfs((__vector unsigned int)__a, (__vector unsigned int)__b, &__cc); return __cc != 0; } static inline __ATTRS_o_ai int vec_any_ne(__vector unsigned int __a, __vector unsigned int __b) { int __cc; __builtin_s390_vceqfs(__a, __b, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_ne(__vector unsigned int __a, __vector __bool int __b) { int __cc; __builtin_s390_vceqfs(__a, (__vector unsigned int)__b, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_ne(__vector __bool int __a, __vector unsigned int __b) { int __cc; __builtin_s390_vceqfs((__vector unsigned int)__a, __b, &__cc); return __cc != 0; } static inline __ATTRS_o_ai int vec_any_ne(__vector __bool int __a, __vector __bool int __b) { int __cc; __builtin_s390_vceqfs((__vector unsigned int)__a, (__vector unsigned int)__b, &__cc); return __cc != 0; } static inline __ATTRS_o_ai int vec_any_ne(__vector signed long long __a, __vector signed long long __b) { int __cc; __builtin_s390_vceqgs((__vector unsigned long long)__a, (__vector unsigned long long)__b, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_ne(__vector signed long long __a, __vector __bool long long __b) { int __cc; __builtin_s390_vceqgs((__vector unsigned long long)__a, (__vector unsigned long long)__b, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_ne(__vector __bool long long __a, __vector signed long long __b) { int __cc; __builtin_s390_vceqgs((__vector unsigned long long)__a, (__vector unsigned long long)__b, &__cc); return __cc != 0; } static inline __ATTRS_o_ai int vec_any_ne(__vector unsigned long long __a, __vector unsigned long long __b) { int __cc; __builtin_s390_vceqgs(__a, __b, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_ne(__vector unsigned long long __a, __vector __bool long long __b) { int __cc; __builtin_s390_vceqgs(__a, (__vector unsigned long long)__b, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_ne(__vector __bool long long __a, __vector unsigned long long __b) { int __cc; __builtin_s390_vceqgs((__vector unsigned long long)__a, __b, &__cc); return __cc != 0; } static inline __ATTRS_o_ai int vec_any_ne(__vector __bool long long __a, __vector __bool long long __b) { int __cc; __builtin_s390_vceqgs((__vector unsigned long long)__a, (__vector unsigned long long)__b, &__cc); return __cc != 0; } #if __ARCH__ >= 12 static inline __ATTRS_o_ai int vec_any_ne(__vector float __a, __vector float __b) { int __cc; __builtin_s390_vfcesbs(__a, __b, &__cc); return __cc != 0; } #endif static inline __ATTRS_o_ai int vec_any_ne(__vector double __a, __vector double __b) { int __cc; __builtin_s390_vfcedbs(__a, __b, &__cc); return __cc != 0; } /*-- vec_any_ge -------------------------------------------------------------*/ static inline __ATTRS_o_ai int vec_any_ge(__vector signed char __a, __vector signed char __b) { int __cc; __builtin_s390_vchbs(__b, __a, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_ge(__vector signed char __a, __vector __bool char __b) { int __cc; __builtin_s390_vchbs((__vector signed char)__b, __a, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_ge(__vector __bool char __a, __vector signed char __b) { int __cc; __builtin_s390_vchbs(__b, (__vector signed char)__a, &__cc); return __cc != 0; } static inline __ATTRS_o_ai int vec_any_ge(__vector unsigned char __a, __vector unsigned char __b) { int __cc; __builtin_s390_vchlbs(__b, __a, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_ge(__vector unsigned char __a, __vector __bool char __b) { int __cc; __builtin_s390_vchlbs((__vector unsigned char)__b, __a, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_ge(__vector __bool char __a, __vector unsigned char __b) { int __cc; __builtin_s390_vchlbs(__b, (__vector unsigned char)__a, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_ge(__vector __bool char __a, __vector __bool char __b) { int __cc; __builtin_s390_vchlbs((__vector unsigned char)__b, (__vector unsigned char)__a, &__cc); return __cc != 0; } static inline __ATTRS_o_ai int vec_any_ge(__vector signed short __a, __vector signed short __b) { int __cc; __builtin_s390_vchhs(__b, __a, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_ge(__vector signed short __a, __vector __bool short __b) { int __cc; __builtin_s390_vchhs((__vector signed short)__b, __a, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_ge(__vector __bool short __a, __vector signed short __b) { int __cc; __builtin_s390_vchhs(__b, (__vector signed short)__a, &__cc); return __cc != 0; } static inline __ATTRS_o_ai int vec_any_ge(__vector unsigned short __a, __vector unsigned short __b) { int __cc; __builtin_s390_vchlhs(__b, __a, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_ge(__vector unsigned short __a, __vector __bool short __b) { int __cc; __builtin_s390_vchlhs((__vector unsigned short)__b, __a, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_ge(__vector __bool short __a, __vector unsigned short __b) { int __cc; __builtin_s390_vchlhs(__b, (__vector unsigned short)__a, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_ge(__vector __bool short __a, __vector __bool short __b) { int __cc; __builtin_s390_vchlhs((__vector unsigned short)__b, (__vector unsigned short)__a, &__cc); return __cc != 0; } static inline __ATTRS_o_ai int vec_any_ge(__vector signed int __a, __vector signed int __b) { int __cc; __builtin_s390_vchfs(__b, __a, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_ge(__vector signed int __a, __vector __bool int __b) { int __cc; __builtin_s390_vchfs((__vector signed int)__b, __a, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_ge(__vector __bool int __a, __vector signed int __b) { int __cc; __builtin_s390_vchfs(__b, (__vector signed int)__a, &__cc); return __cc != 0; } static inline __ATTRS_o_ai int vec_any_ge(__vector unsigned int __a, __vector unsigned int __b) { int __cc; __builtin_s390_vchlfs(__b, __a, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_ge(__vector unsigned int __a, __vector __bool int __b) { int __cc; __builtin_s390_vchlfs((__vector unsigned int)__b, __a, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_ge(__vector __bool int __a, __vector unsigned int __b) { int __cc; __builtin_s390_vchlfs(__b, (__vector unsigned int)__a, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_ge(__vector __bool int __a, __vector __bool int __b) { int __cc; __builtin_s390_vchlfs((__vector unsigned int)__b, (__vector unsigned int)__a, &__cc); return __cc != 0; } static inline __ATTRS_o_ai int vec_any_ge(__vector signed long long __a, __vector signed long long __b) { int __cc; __builtin_s390_vchgs(__b, __a, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_ge(__vector signed long long __a, __vector __bool long long __b) { int __cc; __builtin_s390_vchgs((__vector signed long long)__b, __a, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_ge(__vector __bool long long __a, __vector signed long long __b) { int __cc; __builtin_s390_vchgs(__b, (__vector signed long long)__a, &__cc); return __cc != 0; } static inline __ATTRS_o_ai int vec_any_ge(__vector unsigned long long __a, __vector unsigned long long __b) { int __cc; __builtin_s390_vchlgs(__b, __a, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_ge(__vector unsigned long long __a, __vector __bool long long __b) { int __cc; __builtin_s390_vchlgs((__vector unsigned long long)__b, __a, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_ge(__vector __bool long long __a, __vector unsigned long long __b) { int __cc; __builtin_s390_vchlgs(__b, (__vector unsigned long long)__a, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_ge(__vector __bool long long __a, __vector __bool long long __b) { int __cc; __builtin_s390_vchlgs((__vector unsigned long long)__b, (__vector unsigned long long)__a, &__cc); return __cc != 0; } #if __ARCH__ >= 12 static inline __ATTRS_o_ai int vec_any_ge(__vector float __a, __vector float __b) { int __cc; __builtin_s390_vfchesbs(__a, __b, &__cc); return __cc <= 1; } #endif static inline __ATTRS_o_ai int vec_any_ge(__vector double __a, __vector double __b) { int __cc; __builtin_s390_vfchedbs(__a, __b, &__cc); return __cc <= 1; } /*-- vec_any_gt -------------------------------------------------------------*/ static inline __ATTRS_o_ai int vec_any_gt(__vector signed char __a, __vector signed char __b) { int __cc; __builtin_s390_vchbs(__a, __b, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_gt(__vector signed char __a, __vector __bool char __b) { int __cc; __builtin_s390_vchbs(__a, (__vector signed char)__b, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_gt(__vector __bool char __a, __vector signed char __b) { int __cc; __builtin_s390_vchbs((__vector signed char)__a, __b, &__cc); return __cc <= 1; } static inline __ATTRS_o_ai int vec_any_gt(__vector unsigned char __a, __vector unsigned char __b) { int __cc; __builtin_s390_vchlbs(__a, __b, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_gt(__vector unsigned char __a, __vector __bool char __b) { int __cc; __builtin_s390_vchlbs(__a, (__vector unsigned char)__b, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_gt(__vector __bool char __a, __vector unsigned char __b) { int __cc; __builtin_s390_vchlbs((__vector unsigned char)__a, __b, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_gt(__vector __bool char __a, __vector __bool char __b) { int __cc; __builtin_s390_vchlbs((__vector unsigned char)__a, (__vector unsigned char)__b, &__cc); return __cc <= 1; } static inline __ATTRS_o_ai int vec_any_gt(__vector signed short __a, __vector signed short __b) { int __cc; __builtin_s390_vchhs(__a, __b, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_gt(__vector signed short __a, __vector __bool short __b) { int __cc; __builtin_s390_vchhs(__a, (__vector signed short)__b, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_gt(__vector __bool short __a, __vector signed short __b) { int __cc; __builtin_s390_vchhs((__vector signed short)__a, __b, &__cc); return __cc <= 1; } static inline __ATTRS_o_ai int vec_any_gt(__vector unsigned short __a, __vector unsigned short __b) { int __cc; __builtin_s390_vchlhs(__a, __b, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_gt(__vector unsigned short __a, __vector __bool short __b) { int __cc; __builtin_s390_vchlhs(__a, (__vector unsigned short)__b, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_gt(__vector __bool short __a, __vector unsigned short __b) { int __cc; __builtin_s390_vchlhs((__vector unsigned short)__a, __b, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_gt(__vector __bool short __a, __vector __bool short __b) { int __cc; __builtin_s390_vchlhs((__vector unsigned short)__a, (__vector unsigned short)__b, &__cc); return __cc <= 1; } static inline __ATTRS_o_ai int vec_any_gt(__vector signed int __a, __vector signed int __b) { int __cc; __builtin_s390_vchfs(__a, __b, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_gt(__vector signed int __a, __vector __bool int __b) { int __cc; __builtin_s390_vchfs(__a, (__vector signed int)__b, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_gt(__vector __bool int __a, __vector signed int __b) { int __cc; __builtin_s390_vchfs((__vector signed int)__a, __b, &__cc); return __cc <= 1; } static inline __ATTRS_o_ai int vec_any_gt(__vector unsigned int __a, __vector unsigned int __b) { int __cc; __builtin_s390_vchlfs(__a, __b, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_gt(__vector unsigned int __a, __vector __bool int __b) { int __cc; __builtin_s390_vchlfs(__a, (__vector unsigned int)__b, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_gt(__vector __bool int __a, __vector unsigned int __b) { int __cc; __builtin_s390_vchlfs((__vector unsigned int)__a, __b, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_gt(__vector __bool int __a, __vector __bool int __b) { int __cc; __builtin_s390_vchlfs((__vector unsigned int)__a, (__vector unsigned int)__b, &__cc); return __cc <= 1; } static inline __ATTRS_o_ai int vec_any_gt(__vector signed long long __a, __vector signed long long __b) { int __cc; __builtin_s390_vchgs(__a, __b, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_gt(__vector signed long long __a, __vector __bool long long __b) { int __cc; __builtin_s390_vchgs(__a, (__vector signed long long)__b, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_gt(__vector __bool long long __a, __vector signed long long __b) { int __cc; __builtin_s390_vchgs((__vector signed long long)__a, __b, &__cc); return __cc <= 1; } static inline __ATTRS_o_ai int vec_any_gt(__vector unsigned long long __a, __vector unsigned long long __b) { int __cc; __builtin_s390_vchlgs(__a, __b, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_gt(__vector unsigned long long __a, __vector __bool long long __b) { int __cc; __builtin_s390_vchlgs(__a, (__vector unsigned long long)__b, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_gt(__vector __bool long long __a, __vector unsigned long long __b) { int __cc; __builtin_s390_vchlgs((__vector unsigned long long)__a, __b, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_gt(__vector __bool long long __a, __vector __bool long long __b) { int __cc; __builtin_s390_vchlgs((__vector unsigned long long)__a, (__vector unsigned long long)__b, &__cc); return __cc <= 1; } #if __ARCH__ >= 12 static inline __ATTRS_o_ai int vec_any_gt(__vector float __a, __vector float __b) { int __cc; __builtin_s390_vfchsbs(__a, __b, &__cc); return __cc <= 1; } #endif static inline __ATTRS_o_ai int vec_any_gt(__vector double __a, __vector double __b) { int __cc; __builtin_s390_vfchdbs(__a, __b, &__cc); return __cc <= 1; } /*-- vec_any_le -------------------------------------------------------------*/ static inline __ATTRS_o_ai int vec_any_le(__vector signed char __a, __vector signed char __b) { int __cc; __builtin_s390_vchbs(__a, __b, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_le(__vector signed char __a, __vector __bool char __b) { int __cc; __builtin_s390_vchbs(__a, (__vector signed char)__b, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_le(__vector __bool char __a, __vector signed char __b) { int __cc; __builtin_s390_vchbs((__vector signed char)__a, __b, &__cc); return __cc != 0; } static inline __ATTRS_o_ai int vec_any_le(__vector unsigned char __a, __vector unsigned char __b) { int __cc; __builtin_s390_vchlbs(__a, __b, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_le(__vector unsigned char __a, __vector __bool char __b) { int __cc; __builtin_s390_vchlbs(__a, (__vector unsigned char)__b, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_le(__vector __bool char __a, __vector unsigned char __b) { int __cc; __builtin_s390_vchlbs((__vector unsigned char)__a, __b, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_le(__vector __bool char __a, __vector __bool char __b) { int __cc; __builtin_s390_vchlbs((__vector unsigned char)__a, (__vector unsigned char)__b, &__cc); return __cc != 0; } static inline __ATTRS_o_ai int vec_any_le(__vector signed short __a, __vector signed short __b) { int __cc; __builtin_s390_vchhs(__a, __b, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_le(__vector signed short __a, __vector __bool short __b) { int __cc; __builtin_s390_vchhs(__a, (__vector signed short)__b, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_le(__vector __bool short __a, __vector signed short __b) { int __cc; __builtin_s390_vchhs((__vector signed short)__a, __b, &__cc); return __cc != 0; } static inline __ATTRS_o_ai int vec_any_le(__vector unsigned short __a, __vector unsigned short __b) { int __cc; __builtin_s390_vchlhs(__a, __b, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_le(__vector unsigned short __a, __vector __bool short __b) { int __cc; __builtin_s390_vchlhs(__a, (__vector unsigned short)__b, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_le(__vector __bool short __a, __vector unsigned short __b) { int __cc; __builtin_s390_vchlhs((__vector unsigned short)__a, __b, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_le(__vector __bool short __a, __vector __bool short __b) { int __cc; __builtin_s390_vchlhs((__vector unsigned short)__a, (__vector unsigned short)__b, &__cc); return __cc != 0; } static inline __ATTRS_o_ai int vec_any_le(__vector signed int __a, __vector signed int __b) { int __cc; __builtin_s390_vchfs(__a, __b, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_le(__vector signed int __a, __vector __bool int __b) { int __cc; __builtin_s390_vchfs(__a, (__vector signed int)__b, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_le(__vector __bool int __a, __vector signed int __b) { int __cc; __builtin_s390_vchfs((__vector signed int)__a, __b, &__cc); return __cc != 0; } static inline __ATTRS_o_ai int vec_any_le(__vector unsigned int __a, __vector unsigned int __b) { int __cc; __builtin_s390_vchlfs(__a, __b, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_le(__vector unsigned int __a, __vector __bool int __b) { int __cc; __builtin_s390_vchlfs(__a, (__vector unsigned int)__b, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_le(__vector __bool int __a, __vector unsigned int __b) { int __cc; __builtin_s390_vchlfs((__vector unsigned int)__a, __b, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_le(__vector __bool int __a, __vector __bool int __b) { int __cc; __builtin_s390_vchlfs((__vector unsigned int)__a, (__vector unsigned int)__b, &__cc); return __cc != 0; } static inline __ATTRS_o_ai int vec_any_le(__vector signed long long __a, __vector signed long long __b) { int __cc; __builtin_s390_vchgs(__a, __b, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_le(__vector signed long long __a, __vector __bool long long __b) { int __cc; __builtin_s390_vchgs(__a, (__vector signed long long)__b, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_le(__vector __bool long long __a, __vector signed long long __b) { int __cc; __builtin_s390_vchgs((__vector signed long long)__a, __b, &__cc); return __cc != 0; } static inline __ATTRS_o_ai int vec_any_le(__vector unsigned long long __a, __vector unsigned long long __b) { int __cc; __builtin_s390_vchlgs(__a, __b, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_le(__vector unsigned long long __a, __vector __bool long long __b) { int __cc; __builtin_s390_vchlgs(__a, (__vector unsigned long long)__b, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_le(__vector __bool long long __a, __vector unsigned long long __b) { int __cc; __builtin_s390_vchlgs((__vector unsigned long long)__a, __b, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_le(__vector __bool long long __a, __vector __bool long long __b) { int __cc; __builtin_s390_vchlgs((__vector unsigned long long)__a, (__vector unsigned long long)__b, &__cc); return __cc != 0; } #if __ARCH__ >= 12 static inline __ATTRS_o_ai int vec_any_le(__vector float __a, __vector float __b) { int __cc; __builtin_s390_vfchesbs(__b, __a, &__cc); return __cc <= 1; } #endif static inline __ATTRS_o_ai int vec_any_le(__vector double __a, __vector double __b) { int __cc; __builtin_s390_vfchedbs(__b, __a, &__cc); return __cc <= 1; } /*-- vec_any_lt -------------------------------------------------------------*/ static inline __ATTRS_o_ai int vec_any_lt(__vector signed char __a, __vector signed char __b) { int __cc; __builtin_s390_vchbs(__b, __a, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_lt(__vector signed char __a, __vector __bool char __b) { int __cc; __builtin_s390_vchbs((__vector signed char)__b, __a, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_lt(__vector __bool char __a, __vector signed char __b) { int __cc; __builtin_s390_vchbs(__b, (__vector signed char)__a, &__cc); return __cc <= 1; } static inline __ATTRS_o_ai int vec_any_lt(__vector unsigned char __a, __vector unsigned char __b) { int __cc; __builtin_s390_vchlbs(__b, __a, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_lt(__vector unsigned char __a, __vector __bool char __b) { int __cc; __builtin_s390_vchlbs((__vector unsigned char)__b, __a, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_lt(__vector __bool char __a, __vector unsigned char __b) { int __cc; __builtin_s390_vchlbs(__b, (__vector unsigned char)__a, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_lt(__vector __bool char __a, __vector __bool char __b) { int __cc; __builtin_s390_vchlbs((__vector unsigned char)__b, (__vector unsigned char)__a, &__cc); return __cc <= 1; } static inline __ATTRS_o_ai int vec_any_lt(__vector signed short __a, __vector signed short __b) { int __cc; __builtin_s390_vchhs(__b, __a, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_lt(__vector signed short __a, __vector __bool short __b) { int __cc; __builtin_s390_vchhs((__vector signed short)__b, __a, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_lt(__vector __bool short __a, __vector signed short __b) { int __cc; __builtin_s390_vchhs(__b, (__vector signed short)__a, &__cc); return __cc <= 1; } static inline __ATTRS_o_ai int vec_any_lt(__vector unsigned short __a, __vector unsigned short __b) { int __cc; __builtin_s390_vchlhs(__b, __a, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_lt(__vector unsigned short __a, __vector __bool short __b) { int __cc; __builtin_s390_vchlhs((__vector unsigned short)__b, __a, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_lt(__vector __bool short __a, __vector unsigned short __b) { int __cc; __builtin_s390_vchlhs(__b, (__vector unsigned short)__a, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_lt(__vector __bool short __a, __vector __bool short __b) { int __cc; __builtin_s390_vchlhs((__vector unsigned short)__b, (__vector unsigned short)__a, &__cc); return __cc <= 1; } static inline __ATTRS_o_ai int vec_any_lt(__vector signed int __a, __vector signed int __b) { int __cc; __builtin_s390_vchfs(__b, __a, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_lt(__vector signed int __a, __vector __bool int __b) { int __cc; __builtin_s390_vchfs((__vector signed int)__b, __a, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_lt(__vector __bool int __a, __vector signed int __b) { int __cc; __builtin_s390_vchfs(__b, (__vector signed int)__a, &__cc); return __cc <= 1; } static inline __ATTRS_o_ai int vec_any_lt(__vector unsigned int __a, __vector unsigned int __b) { int __cc; __builtin_s390_vchlfs(__b, __a, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_lt(__vector unsigned int __a, __vector __bool int __b) { int __cc; __builtin_s390_vchlfs((__vector unsigned int)__b, __a, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_lt(__vector __bool int __a, __vector unsigned int __b) { int __cc; __builtin_s390_vchlfs(__b, (__vector unsigned int)__a, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_lt(__vector __bool int __a, __vector __bool int __b) { int __cc; __builtin_s390_vchlfs((__vector unsigned int)__b, (__vector unsigned int)__a, &__cc); return __cc <= 1; } static inline __ATTRS_o_ai int vec_any_lt(__vector signed long long __a, __vector signed long long __b) { int __cc; __builtin_s390_vchgs(__b, __a, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_lt(__vector signed long long __a, __vector __bool long long __b) { int __cc; __builtin_s390_vchgs((__vector signed long long)__b, __a, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_lt(__vector __bool long long __a, __vector signed long long __b) { int __cc; __builtin_s390_vchgs(__b, (__vector signed long long)__a, &__cc); return __cc <= 1; } static inline __ATTRS_o_ai int vec_any_lt(__vector unsigned long long __a, __vector unsigned long long __b) { int __cc; __builtin_s390_vchlgs(__b, __a, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_lt(__vector unsigned long long __a, __vector __bool long long __b) { int __cc; __builtin_s390_vchlgs((__vector unsigned long long)__b, __a, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_lt(__vector __bool long long __a, __vector unsigned long long __b) { int __cc; __builtin_s390_vchlgs(__b, (__vector unsigned long long)__a, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_lt(__vector __bool long long __a, __vector __bool long long __b) { int __cc; __builtin_s390_vchlgs((__vector unsigned long long)__b, (__vector unsigned long long)__a, &__cc); return __cc <= 1; } #if __ARCH__ >= 12 static inline __ATTRS_o_ai int vec_any_lt(__vector float __a, __vector float __b) { int __cc; __builtin_s390_vfchsbs(__b, __a, &__cc); return __cc <= 1; } #endif static inline __ATTRS_o_ai int vec_any_lt(__vector double __a, __vector double __b) { int __cc; __builtin_s390_vfchdbs(__b, __a, &__cc); return __cc <= 1; } /*-- vec_any_nge ------------------------------------------------------------*/ #if __ARCH__ >= 12 static inline __ATTRS_o_ai int vec_any_nge(__vector float __a, __vector float __b) { int __cc; __builtin_s390_vfchesbs(__a, __b, &__cc); return __cc != 0; } #endif static inline __ATTRS_o_ai int vec_any_nge(__vector double __a, __vector double __b) { int __cc; __builtin_s390_vfchedbs(__a, __b, &__cc); return __cc != 0; } /*-- vec_any_ngt ------------------------------------------------------------*/ #if __ARCH__ >= 12 static inline __ATTRS_o_ai int vec_any_ngt(__vector float __a, __vector float __b) { int __cc; __builtin_s390_vfchsbs(__a, __b, &__cc); return __cc != 0; } #endif static inline __ATTRS_o_ai int vec_any_ngt(__vector double __a, __vector double __b) { int __cc; __builtin_s390_vfchdbs(__a, __b, &__cc); return __cc != 0; } /*-- vec_any_nle ------------------------------------------------------------*/ #if __ARCH__ >= 12 static inline __ATTRS_o_ai int vec_any_nle(__vector float __a, __vector float __b) { int __cc; __builtin_s390_vfchesbs(__b, __a, &__cc); return __cc != 0; } #endif static inline __ATTRS_o_ai int vec_any_nle(__vector double __a, __vector double __b) { int __cc; __builtin_s390_vfchedbs(__b, __a, &__cc); return __cc != 0; } /*-- vec_any_nlt ------------------------------------------------------------*/ #if __ARCH__ >= 12 static inline __ATTRS_o_ai int vec_any_nlt(__vector float __a, __vector float __b) { int __cc; __builtin_s390_vfchsbs(__b, __a, &__cc); return __cc != 0; } #endif static inline __ATTRS_o_ai int vec_any_nlt(__vector double __a, __vector double __b) { int __cc; __builtin_s390_vfchdbs(__b, __a, &__cc); return __cc != 0; } /*-- vec_any_nan ------------------------------------------------------------*/ #if __ARCH__ >= 12 static inline __ATTRS_o_ai int vec_any_nan(__vector float __a) { int __cc; __builtin_s390_vftcisb(__a, 15, &__cc); return __cc != 3; } #endif static inline __ATTRS_o_ai int vec_any_nan(__vector double __a) { int __cc; __builtin_s390_vftcidb(__a, 15, &__cc); return __cc != 3; } /*-- vec_any_numeric --------------------------------------------------------*/ #if __ARCH__ >= 12 static inline __ATTRS_o_ai int vec_any_numeric(__vector float __a) { int __cc; __builtin_s390_vftcisb(__a, 15, &__cc); return __cc != 0; } #endif static inline __ATTRS_o_ai int vec_any_numeric(__vector double __a) { int __cc; __builtin_s390_vftcidb(__a, 15, &__cc); return __cc != 0; } /*-- vec_andc ---------------------------------------------------------------*/ static inline __ATTRS_o_ai __vector __bool char vec_andc(__vector __bool char __a, __vector __bool char __b) { return __a & ~__b; } static inline __ATTRS_o_ai __vector signed char vec_andc(__vector signed char __a, __vector signed char __b) { return __a & ~__b; } // This prototype is deprecated. static inline __ATTRS_o_ai __vector signed char vec_andc(__vector __bool char __a, __vector signed char __b) { return __a & ~__b; } // This prototype is deprecated. static inline __ATTRS_o_ai __vector signed char vec_andc(__vector signed char __a, __vector __bool char __b) { return __a & ~__b; } static inline __ATTRS_o_ai __vector unsigned char vec_andc(__vector unsigned char __a, __vector unsigned char __b) { return __a & ~__b; } // This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned char vec_andc(__vector __bool char __a, __vector unsigned char __b) { return __a & ~__b; } // This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned char vec_andc(__vector unsigned char __a, __vector __bool char __b) { return __a & ~__b; } static inline __ATTRS_o_ai __vector __bool short vec_andc(__vector __bool short __a, __vector __bool short __b) { return __a & ~__b; } static inline __ATTRS_o_ai __vector signed short vec_andc(__vector signed short __a, __vector signed short __b) { return __a & ~__b; } // This prototype is deprecated. static inline __ATTRS_o_ai __vector signed short vec_andc(__vector __bool short __a, __vector signed short __b) { return __a & ~__b; } // This prototype is deprecated. static inline __ATTRS_o_ai __vector signed short vec_andc(__vector signed short __a, __vector __bool short __b) { return __a & ~__b; } static inline __ATTRS_o_ai __vector unsigned short vec_andc(__vector unsigned short __a, __vector unsigned short __b) { return __a & ~__b; } // This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned short vec_andc(__vector __bool short __a, __vector unsigned short __b) { return __a & ~__b; } // This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned short vec_andc(__vector unsigned short __a, __vector __bool short __b) { return __a & ~__b; } static inline __ATTRS_o_ai __vector __bool int vec_andc(__vector __bool int __a, __vector __bool int __b) { return __a & ~__b; } static inline __ATTRS_o_ai __vector signed int vec_andc(__vector signed int __a, __vector signed int __b) { return __a & ~__b; } // This prototype is deprecated. static inline __ATTRS_o_ai __vector signed int vec_andc(__vector __bool int __a, __vector signed int __b) { return __a & ~__b; } // This prototype is deprecated. static inline __ATTRS_o_ai __vector signed int vec_andc(__vector signed int __a, __vector __bool int __b) { return __a & ~__b; } static inline __ATTRS_o_ai __vector unsigned int vec_andc(__vector unsigned int __a, __vector unsigned int __b) { return __a & ~__b; } // This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned int vec_andc(__vector __bool int __a, __vector unsigned int __b) { return __a & ~__b; } // This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned int vec_andc(__vector unsigned int __a, __vector __bool int __b) { return __a & ~__b; } static inline __ATTRS_o_ai __vector __bool long long vec_andc(__vector __bool long long __a, __vector __bool long long __b) { return __a & ~__b; } static inline __ATTRS_o_ai __vector signed long long vec_andc(__vector signed long long __a, __vector signed long long __b) { return __a & ~__b; } // This prototype is deprecated. static inline __ATTRS_o_ai __vector signed long long vec_andc(__vector __bool long long __a, __vector signed long long __b) { return __a & ~__b; } // This prototype is deprecated. static inline __ATTRS_o_ai __vector signed long long vec_andc(__vector signed long long __a, __vector __bool long long __b) { return __a & ~__b; } static inline __ATTRS_o_ai __vector unsigned long long vec_andc(__vector unsigned long long __a, __vector unsigned long long __b) { return __a & ~__b; } // This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned long long vec_andc(__vector __bool long long __a, __vector unsigned long long __b) { return __a & ~__b; } // This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned long long vec_andc(__vector unsigned long long __a, __vector __bool long long __b) { return __a & ~__b; } #if __ARCH__ >= 12 static inline __ATTRS_o_ai __vector float vec_andc(__vector float __a, __vector float __b) { return (__vector float)((__vector unsigned int)__a & ~(__vector unsigned int)__b); } #endif static inline __ATTRS_o_ai __vector double vec_andc(__vector double __a, __vector double __b) { return (__vector double)((__vector unsigned long long)__a & ~(__vector unsigned long long)__b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector double vec_andc(__vector __bool long long __a, __vector double __b) { return (__vector double)((__vector unsigned long long)__a & ~(__vector unsigned long long)__b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector double vec_andc(__vector double __a, __vector __bool long long __b) { return (__vector double)((__vector unsigned long long)__a & ~(__vector unsigned long long)__b); } /*-- vec_nor ----------------------------------------------------------------*/ static inline __ATTRS_o_ai __vector __bool char vec_nor(__vector __bool char __a, __vector __bool char __b) { return ~(__a | __b); } static inline __ATTRS_o_ai __vector signed char vec_nor(__vector signed char __a, __vector signed char __b) { return ~(__a | __b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector signed char vec_nor(__vector __bool char __a, __vector signed char __b) { return ~(__a | __b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector signed char vec_nor(__vector signed char __a, __vector __bool char __b) { return ~(__a | __b); } static inline __ATTRS_o_ai __vector unsigned char vec_nor(__vector unsigned char __a, __vector unsigned char __b) { return ~(__a | __b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned char vec_nor(__vector __bool char __a, __vector unsigned char __b) { return ~(__a | __b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned char vec_nor(__vector unsigned char __a, __vector __bool char __b) { return ~(__a | __b); } static inline __ATTRS_o_ai __vector __bool short vec_nor(__vector __bool short __a, __vector __bool short __b) { return ~(__a | __b); } static inline __ATTRS_o_ai __vector signed short vec_nor(__vector signed short __a, __vector signed short __b) { return ~(__a | __b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector signed short vec_nor(__vector __bool short __a, __vector signed short __b) { return ~(__a | __b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector signed short vec_nor(__vector signed short __a, __vector __bool short __b) { return ~(__a | __b); } static inline __ATTRS_o_ai __vector unsigned short vec_nor(__vector unsigned short __a, __vector unsigned short __b) { return ~(__a | __b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned short vec_nor(__vector __bool short __a, __vector unsigned short __b) { return ~(__a | __b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned short vec_nor(__vector unsigned short __a, __vector __bool short __b) { return ~(__a | __b); } static inline __ATTRS_o_ai __vector __bool int vec_nor(__vector __bool int __a, __vector __bool int __b) { return ~(__a | __b); } static inline __ATTRS_o_ai __vector signed int vec_nor(__vector signed int __a, __vector signed int __b) { return ~(__a | __b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector signed int vec_nor(__vector __bool int __a, __vector signed int __b) { return ~(__a | __b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector signed int vec_nor(__vector signed int __a, __vector __bool int __b) { return ~(__a | __b); } static inline __ATTRS_o_ai __vector unsigned int vec_nor(__vector unsigned int __a, __vector unsigned int __b) { return ~(__a | __b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned int vec_nor(__vector __bool int __a, __vector unsigned int __b) { return ~(__a | __b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned int vec_nor(__vector unsigned int __a, __vector __bool int __b) { return ~(__a | __b); } static inline __ATTRS_o_ai __vector __bool long long vec_nor(__vector __bool long long __a, __vector __bool long long __b) { return ~(__a | __b); } static inline __ATTRS_o_ai __vector signed long long vec_nor(__vector signed long long __a, __vector signed long long __b) { return ~(__a | __b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector signed long long vec_nor(__vector __bool long long __a, __vector signed long long __b) { return ~(__a | __b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector signed long long vec_nor(__vector signed long long __a, __vector __bool long long __b) { return ~(__a | __b); } static inline __ATTRS_o_ai __vector unsigned long long vec_nor(__vector unsigned long long __a, __vector unsigned long long __b) { return ~(__a | __b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned long long vec_nor(__vector __bool long long __a, __vector unsigned long long __b) { return ~(__a | __b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned long long vec_nor(__vector unsigned long long __a, __vector __bool long long __b) { return ~(__a | __b); } #if __ARCH__ >= 12 static inline __ATTRS_o_ai __vector float vec_nor(__vector float __a, __vector float __b) { return (__vector float)~((__vector unsigned int)__a | (__vector unsigned int)__b); } #endif static inline __ATTRS_o_ai __vector double vec_nor(__vector double __a, __vector double __b) { return (__vector double)~((__vector unsigned long long)__a | (__vector unsigned long long)__b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector double vec_nor(__vector __bool long long __a, __vector double __b) { return (__vector double)~((__vector unsigned long long)__a | (__vector unsigned long long)__b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector double vec_nor(__vector double __a, __vector __bool long long __b) { return (__vector double)~((__vector unsigned long long)__a | (__vector unsigned long long)__b); } /*-- vec_orc ----------------------------------------------------------------*/ #if __ARCH__ >= 12 static inline __ATTRS_o_ai __vector __bool char vec_orc(__vector __bool char __a, __vector __bool char __b) { return __a | ~__b; } static inline __ATTRS_o_ai __vector signed char vec_orc(__vector signed char __a, __vector signed char __b) { return __a | ~__b; } static inline __ATTRS_o_ai __vector unsigned char vec_orc(__vector unsigned char __a, __vector unsigned char __b) { return __a | ~__b; } static inline __ATTRS_o_ai __vector __bool short vec_orc(__vector __bool short __a, __vector __bool short __b) { return __a | ~__b; } static inline __ATTRS_o_ai __vector signed short vec_orc(__vector signed short __a, __vector signed short __b) { return __a | ~__b; } static inline __ATTRS_o_ai __vector unsigned short vec_orc(__vector unsigned short __a, __vector unsigned short __b) { return __a | ~__b; } static inline __ATTRS_o_ai __vector __bool int vec_orc(__vector __bool int __a, __vector __bool int __b) { return __a | ~__b; } static inline __ATTRS_o_ai __vector signed int vec_orc(__vector signed int __a, __vector signed int __b) { return __a | ~__b; } static inline __ATTRS_o_ai __vector unsigned int vec_orc(__vector unsigned int __a, __vector unsigned int __b) { return __a | ~__b; } static inline __ATTRS_o_ai __vector __bool long long vec_orc(__vector __bool long long __a, __vector __bool long long __b) { return __a | ~__b; } static inline __ATTRS_o_ai __vector signed long long vec_orc(__vector signed long long __a, __vector signed long long __b) { return __a | ~__b; } static inline __ATTRS_o_ai __vector unsigned long long vec_orc(__vector unsigned long long __a, __vector unsigned long long __b) { return __a | ~__b; } static inline __ATTRS_o_ai __vector float vec_orc(__vector float __a, __vector float __b) { return (__vector float)((__vector unsigned int)__a | ~(__vector unsigned int)__b); } static inline __ATTRS_o_ai __vector double vec_orc(__vector double __a, __vector double __b) { return (__vector double)((__vector unsigned long long)__a | ~(__vector unsigned long long)__b); } #endif /*-- vec_nand ---------------------------------------------------------------*/ #if __ARCH__ >= 12 static inline __ATTRS_o_ai __vector __bool char vec_nand(__vector __bool char __a, __vector __bool char __b) { return ~(__a & __b); } static inline __ATTRS_o_ai __vector signed char vec_nand(__vector signed char __a, __vector signed char __b) { return ~(__a & __b); } static inline __ATTRS_o_ai __vector unsigned char vec_nand(__vector unsigned char __a, __vector unsigned char __b) { return ~(__a & __b); } static inline __ATTRS_o_ai __vector __bool short vec_nand(__vector __bool short __a, __vector __bool short __b) { return ~(__a & __b); } static inline __ATTRS_o_ai __vector signed short vec_nand(__vector signed short __a, __vector signed short __b) { return ~(__a & __b); } static inline __ATTRS_o_ai __vector unsigned short vec_nand(__vector unsigned short __a, __vector unsigned short __b) { return ~(__a & __b); } static inline __ATTRS_o_ai __vector __bool int vec_nand(__vector __bool int __a, __vector __bool int __b) { return ~(__a & __b); } static inline __ATTRS_o_ai __vector signed int vec_nand(__vector signed int __a, __vector signed int __b) { return ~(__a & __b); } static inline __ATTRS_o_ai __vector unsigned int vec_nand(__vector unsigned int __a, __vector unsigned int __b) { return ~(__a & __b); } static inline __ATTRS_o_ai __vector __bool long long vec_nand(__vector __bool long long __a, __vector __bool long long __b) { return ~(__a & __b); } static inline __ATTRS_o_ai __vector signed long long vec_nand(__vector signed long long __a, __vector signed long long __b) { return ~(__a & __b); } static inline __ATTRS_o_ai __vector unsigned long long vec_nand(__vector unsigned long long __a, __vector unsigned long long __b) { return ~(__a & __b); } static inline __ATTRS_o_ai __vector float vec_nand(__vector float __a, __vector float __b) { return (__vector float)~((__vector unsigned int)__a & (__vector unsigned int)__b); } static inline __ATTRS_o_ai __vector double vec_nand(__vector double __a, __vector double __b) { return (__vector double)~((__vector unsigned long long)__a & (__vector unsigned long long)__b); } #endif /*-- vec_eqv ----------------------------------------------------------------*/ #if __ARCH__ >= 12 static inline __ATTRS_o_ai __vector __bool char vec_eqv(__vector __bool char __a, __vector __bool char __b) { return ~(__a ^ __b); } static inline __ATTRS_o_ai __vector signed char vec_eqv(__vector signed char __a, __vector signed char __b) { return ~(__a ^ __b); } static inline __ATTRS_o_ai __vector unsigned char vec_eqv(__vector unsigned char __a, __vector unsigned char __b) { return ~(__a ^ __b); } static inline __ATTRS_o_ai __vector __bool short vec_eqv(__vector __bool short __a, __vector __bool short __b) { return ~(__a ^ __b); } static inline __ATTRS_o_ai __vector signed short vec_eqv(__vector signed short __a, __vector signed short __b) { return ~(__a ^ __b); } static inline __ATTRS_o_ai __vector unsigned short vec_eqv(__vector unsigned short __a, __vector unsigned short __b) { return ~(__a ^ __b); } static inline __ATTRS_o_ai __vector __bool int vec_eqv(__vector __bool int __a, __vector __bool int __b) { return ~(__a ^ __b); } static inline __ATTRS_o_ai __vector signed int vec_eqv(__vector signed int __a, __vector signed int __b) { return ~(__a ^ __b); } static inline __ATTRS_o_ai __vector unsigned int vec_eqv(__vector unsigned int __a, __vector unsigned int __b) { return ~(__a ^ __b); } static inline __ATTRS_o_ai __vector __bool long long vec_eqv(__vector __bool long long __a, __vector __bool long long __b) { return ~(__a ^ __b); } static inline __ATTRS_o_ai __vector signed long long vec_eqv(__vector signed long long __a, __vector signed long long __b) { return ~(__a ^ __b); } static inline __ATTRS_o_ai __vector unsigned long long vec_eqv(__vector unsigned long long __a, __vector unsigned long long __b) { return ~(__a ^ __b); } static inline __ATTRS_o_ai __vector float vec_eqv(__vector float __a, __vector float __b) { return (__vector float)~((__vector unsigned int)__a ^ (__vector unsigned int)__b); } static inline __ATTRS_o_ai __vector double vec_eqv(__vector double __a, __vector double __b) { return (__vector double)~((__vector unsigned long long)__a ^ (__vector unsigned long long)__b); } #endif /*-- vec_cntlz --------------------------------------------------------------*/ static inline __ATTRS_o_ai __vector unsigned char vec_cntlz(__vector signed char __a) { return __builtin_s390_vclzb((__vector unsigned char)__a); } static inline __ATTRS_o_ai __vector unsigned char vec_cntlz(__vector unsigned char __a) { return __builtin_s390_vclzb(__a); } static inline __ATTRS_o_ai __vector unsigned short vec_cntlz(__vector signed short __a) { return __builtin_s390_vclzh((__vector unsigned short)__a); } static inline __ATTRS_o_ai __vector unsigned short vec_cntlz(__vector unsigned short __a) { return __builtin_s390_vclzh(__a); } static inline __ATTRS_o_ai __vector unsigned int vec_cntlz(__vector signed int __a) { return __builtin_s390_vclzf((__vector unsigned int)__a); } static inline __ATTRS_o_ai __vector unsigned int vec_cntlz(__vector unsigned int __a) { return __builtin_s390_vclzf(__a); } static inline __ATTRS_o_ai __vector unsigned long long vec_cntlz(__vector signed long long __a) { return __builtin_s390_vclzg((__vector unsigned long long)__a); } static inline __ATTRS_o_ai __vector unsigned long long vec_cntlz(__vector unsigned long long __a) { return __builtin_s390_vclzg(__a); } /*-- vec_cnttz --------------------------------------------------------------*/ static inline __ATTRS_o_ai __vector unsigned char vec_cnttz(__vector signed char __a) { return __builtin_s390_vctzb((__vector unsigned char)__a); } static inline __ATTRS_o_ai __vector unsigned char vec_cnttz(__vector unsigned char __a) { return __builtin_s390_vctzb(__a); } static inline __ATTRS_o_ai __vector unsigned short vec_cnttz(__vector signed short __a) { return __builtin_s390_vctzh((__vector unsigned short)__a); } static inline __ATTRS_o_ai __vector unsigned short vec_cnttz(__vector unsigned short __a) { return __builtin_s390_vctzh(__a); } static inline __ATTRS_o_ai __vector unsigned int vec_cnttz(__vector signed int __a) { return __builtin_s390_vctzf((__vector unsigned int)__a); } static inline __ATTRS_o_ai __vector unsigned int vec_cnttz(__vector unsigned int __a) { return __builtin_s390_vctzf(__a); } static inline __ATTRS_o_ai __vector unsigned long long vec_cnttz(__vector signed long long __a) { return __builtin_s390_vctzg((__vector unsigned long long)__a); } static inline __ATTRS_o_ai __vector unsigned long long vec_cnttz(__vector unsigned long long __a) { return __builtin_s390_vctzg(__a); } /*-- vec_popcnt -------------------------------------------------------------*/ static inline __ATTRS_o_ai __vector unsigned char vec_popcnt(__vector signed char __a) { return __builtin_s390_vpopctb((__vector unsigned char)__a); } static inline __ATTRS_o_ai __vector unsigned char vec_popcnt(__vector unsigned char __a) { return __builtin_s390_vpopctb(__a); } static inline __ATTRS_o_ai __vector unsigned short vec_popcnt(__vector signed short __a) { return __builtin_s390_vpopcth((__vector unsigned short)__a); } static inline __ATTRS_o_ai __vector unsigned short vec_popcnt(__vector unsigned short __a) { return __builtin_s390_vpopcth(__a); } static inline __ATTRS_o_ai __vector unsigned int vec_popcnt(__vector signed int __a) { return __builtin_s390_vpopctf((__vector unsigned int)__a); } static inline __ATTRS_o_ai __vector unsigned int vec_popcnt(__vector unsigned int __a) { return __builtin_s390_vpopctf(__a); } static inline __ATTRS_o_ai __vector unsigned long long vec_popcnt(__vector signed long long __a) { return __builtin_s390_vpopctg((__vector unsigned long long)__a); } static inline __ATTRS_o_ai __vector unsigned long long vec_popcnt(__vector unsigned long long __a) { return __builtin_s390_vpopctg(__a); } /*-- vec_rl -----------------------------------------------------------------*/ static inline __ATTRS_o_ai __vector signed char vec_rl(__vector signed char __a, __vector unsigned char __b) { return (__vector signed char)__builtin_s390_verllvb( (__vector unsigned char)__a, __b); } static inline __ATTRS_o_ai __vector unsigned char vec_rl(__vector unsigned char __a, __vector unsigned char __b) { return __builtin_s390_verllvb(__a, __b); } static inline __ATTRS_o_ai __vector signed short vec_rl(__vector signed short __a, __vector unsigned short __b) { return (__vector signed short)__builtin_s390_verllvh( (__vector unsigned short)__a, __b); } static inline __ATTRS_o_ai __vector unsigned short vec_rl(__vector unsigned short __a, __vector unsigned short __b) { return __builtin_s390_verllvh(__a, __b); } static inline __ATTRS_o_ai __vector signed int vec_rl(__vector signed int __a, __vector unsigned int __b) { return (__vector signed int)__builtin_s390_verllvf( (__vector unsigned int)__a, __b); } static inline __ATTRS_o_ai __vector unsigned int vec_rl(__vector unsigned int __a, __vector unsigned int __b) { return __builtin_s390_verllvf(__a, __b); } static inline __ATTRS_o_ai __vector signed long long vec_rl(__vector signed long long __a, __vector unsigned long long __b) { return (__vector signed long long)__builtin_s390_verllvg( (__vector unsigned long long)__a, __b); } static inline __ATTRS_o_ai __vector unsigned long long vec_rl(__vector unsigned long long __a, __vector unsigned long long __b) { return __builtin_s390_verllvg(__a, __b); } /*-- vec_rli ----------------------------------------------------------------*/ static inline __ATTRS_o_ai __vector signed char vec_rli(__vector signed char __a, unsigned long __b) { return (__vector signed char)__builtin_s390_verllb( (__vector unsigned char)__a, (unsigned char)__b); } static inline __ATTRS_o_ai __vector unsigned char vec_rli(__vector unsigned char __a, unsigned long __b) { return __builtin_s390_verllb(__a, (unsigned char)__b); } static inline __ATTRS_o_ai __vector signed short vec_rli(__vector signed short __a, unsigned long __b) { return (__vector signed short)__builtin_s390_verllh( (__vector unsigned short)__a, (unsigned char)__b); } static inline __ATTRS_o_ai __vector unsigned short vec_rli(__vector unsigned short __a, unsigned long __b) { return __builtin_s390_verllh(__a, (unsigned char)__b); } static inline __ATTRS_o_ai __vector signed int vec_rli(__vector signed int __a, unsigned long __b) { return (__vector signed int)__builtin_s390_verllf( (__vector unsigned int)__a, (unsigned char)__b); } static inline __ATTRS_o_ai __vector unsigned int vec_rli(__vector unsigned int __a, unsigned long __b) { return __builtin_s390_verllf(__a, (unsigned char)__b); } static inline __ATTRS_o_ai __vector signed long long vec_rli(__vector signed long long __a, unsigned long __b) { return (__vector signed long long)__builtin_s390_verllg( (__vector unsigned long long)__a, (unsigned char)__b); } static inline __ATTRS_o_ai __vector unsigned long long vec_rli(__vector unsigned long long __a, unsigned long __b) { return __builtin_s390_verllg(__a, (unsigned char)__b); } /*-- vec_rl_mask ------------------------------------------------------------*/ extern __ATTRS_o __vector signed char vec_rl_mask(__vector signed char __a, __vector unsigned char __b, unsigned char __c) __constant(__c); extern __ATTRS_o __vector unsigned char vec_rl_mask(__vector unsigned char __a, __vector unsigned char __b, unsigned char __c) __constant(__c); extern __ATTRS_o __vector signed short vec_rl_mask(__vector signed short __a, __vector unsigned short __b, unsigned char __c) __constant(__c); extern __ATTRS_o __vector unsigned short vec_rl_mask(__vector unsigned short __a, __vector unsigned short __b, unsigned char __c) __constant(__c); extern __ATTRS_o __vector signed int vec_rl_mask(__vector signed int __a, __vector unsigned int __b, unsigned char __c) __constant(__c); extern __ATTRS_o __vector unsigned int vec_rl_mask(__vector unsigned int __a, __vector unsigned int __b, unsigned char __c) __constant(__c); extern __ATTRS_o __vector signed long long vec_rl_mask(__vector signed long long __a, __vector unsigned long long __b, unsigned char __c) __constant(__c); extern __ATTRS_o __vector unsigned long long vec_rl_mask(__vector unsigned long long __a, __vector unsigned long long __b, unsigned char __c) __constant(__c); #define vec_rl_mask(X, Y, Z) ((__typeof__((vec_rl_mask)((X), (Y), (Z)))) \ __extension__ ({ \ __vector unsigned char __res; \ __vector unsigned char __x = (__vector unsigned char)(X); \ __vector unsigned char __y = (__vector unsigned char)(Y); \ switch (sizeof ((X)[0])) { \ case 1: __res = (__vector unsigned char) __builtin_s390_verimb( \ (__vector unsigned char)__x, (__vector unsigned char)__x, \ (__vector unsigned char)__y, (Z)); break; \ case 2: __res = (__vector unsigned char) __builtin_s390_verimh( \ (__vector unsigned short)__x, (__vector unsigned short)__x, \ (__vector unsigned short)__y, (Z)); break; \ case 4: __res = (__vector unsigned char) __builtin_s390_verimf( \ (__vector unsigned int)__x, (__vector unsigned int)__x, \ (__vector unsigned int)__y, (Z)); break; \ default: __res = (__vector unsigned char) __builtin_s390_verimg( \ (__vector unsigned long long)__x, (__vector unsigned long long)__x, \ (__vector unsigned long long)__y, (Z)); break; \ } __res; })) /*-- vec_sll ----------------------------------------------------------------*/ static inline __ATTRS_o_ai __vector signed char vec_sll(__vector signed char __a, __vector unsigned char __b) { return (__vector signed char)__builtin_s390_vsl( (__vector unsigned char)__a, __b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector signed char vec_sll(__vector signed char __a, __vector unsigned short __b) { return (__vector signed char)__builtin_s390_vsl( (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector signed char vec_sll(__vector signed char __a, __vector unsigned int __b) { return (__vector signed char)__builtin_s390_vsl( (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector __bool char vec_sll(__vector __bool char __a, __vector unsigned char __b) { return (__vector __bool char)__builtin_s390_vsl( (__vector unsigned char)__a, __b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector __bool char vec_sll(__vector __bool char __a, __vector unsigned short __b) { return (__vector __bool char)__builtin_s390_vsl( (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector __bool char vec_sll(__vector __bool char __a, __vector unsigned int __b) { return (__vector __bool char)__builtin_s390_vsl( (__vector unsigned char)__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai __vector unsigned char vec_sll(__vector unsigned char __a, __vector unsigned char __b) { return __builtin_s390_vsl(__a, __b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned char vec_sll(__vector unsigned char __a, __vector unsigned short __b) { return __builtin_s390_vsl(__a, (__vector unsigned char)__b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned char vec_sll(__vector unsigned char __a, __vector unsigned int __b) { return __builtin_s390_vsl(__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai __vector signed short vec_sll(__vector signed short __a, __vector unsigned char __b) { return (__vector signed short)__builtin_s390_vsl( (__vector unsigned char)__a, __b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector signed short vec_sll(__vector signed short __a, __vector unsigned short __b) { return (__vector signed short)__builtin_s390_vsl( (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector signed short vec_sll(__vector signed short __a, __vector unsigned int __b) { return (__vector signed short)__builtin_s390_vsl( (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector __bool short vec_sll(__vector __bool short __a, __vector unsigned char __b) { return (__vector __bool short)__builtin_s390_vsl( (__vector unsigned char)__a, __b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector __bool short vec_sll(__vector __bool short __a, __vector unsigned short __b) { return (__vector __bool short)__builtin_s390_vsl( (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector __bool short vec_sll(__vector __bool short __a, __vector unsigned int __b) { return (__vector __bool short)__builtin_s390_vsl( (__vector unsigned char)__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai __vector unsigned short vec_sll(__vector unsigned short __a, __vector unsigned char __b) { return (__vector unsigned short)__builtin_s390_vsl( (__vector unsigned char)__a, __b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned short vec_sll(__vector unsigned short __a, __vector unsigned short __b) { return (__vector unsigned short)__builtin_s390_vsl( (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned short vec_sll(__vector unsigned short __a, __vector unsigned int __b) { return (__vector unsigned short)__builtin_s390_vsl( (__vector unsigned char)__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai __vector signed int vec_sll(__vector signed int __a, __vector unsigned char __b) { return (__vector signed int)__builtin_s390_vsl( (__vector unsigned char)__a, __b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector signed int vec_sll(__vector signed int __a, __vector unsigned short __b) { return (__vector signed int)__builtin_s390_vsl( (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector signed int vec_sll(__vector signed int __a, __vector unsigned int __b) { return (__vector signed int)__builtin_s390_vsl( (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector __bool int vec_sll(__vector __bool int __a, __vector unsigned char __b) { return (__vector __bool int)__builtin_s390_vsl( (__vector unsigned char)__a, __b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector __bool int vec_sll(__vector __bool int __a, __vector unsigned short __b) { return (__vector __bool int)__builtin_s390_vsl( (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector __bool int vec_sll(__vector __bool int __a, __vector unsigned int __b) { return (__vector __bool int)__builtin_s390_vsl( (__vector unsigned char)__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai __vector unsigned int vec_sll(__vector unsigned int __a, __vector unsigned char __b) { return (__vector unsigned int)__builtin_s390_vsl( (__vector unsigned char)__a, __b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned int vec_sll(__vector unsigned int __a, __vector unsigned short __b) { return (__vector unsigned int)__builtin_s390_vsl( (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned int vec_sll(__vector unsigned int __a, __vector unsigned int __b) { return (__vector unsigned int)__builtin_s390_vsl( (__vector unsigned char)__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai __vector signed long long vec_sll(__vector signed long long __a, __vector unsigned char __b) { return (__vector signed long long)__builtin_s390_vsl( (__vector unsigned char)__a, __b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector signed long long vec_sll(__vector signed long long __a, __vector unsigned short __b) { return (__vector signed long long)__builtin_s390_vsl( (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector signed long long vec_sll(__vector signed long long __a, __vector unsigned int __b) { return (__vector signed long long)__builtin_s390_vsl( (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector __bool long long vec_sll(__vector __bool long long __a, __vector unsigned char __b) { return (__vector __bool long long)__builtin_s390_vsl( (__vector unsigned char)__a, __b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector __bool long long vec_sll(__vector __bool long long __a, __vector unsigned short __b) { return (__vector __bool long long)__builtin_s390_vsl( (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector __bool long long vec_sll(__vector __bool long long __a, __vector unsigned int __b) { return (__vector __bool long long)__builtin_s390_vsl( (__vector unsigned char)__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai __vector unsigned long long vec_sll(__vector unsigned long long __a, __vector unsigned char __b) { return (__vector unsigned long long)__builtin_s390_vsl( (__vector unsigned char)__a, __b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned long long vec_sll(__vector unsigned long long __a, __vector unsigned short __b) { return (__vector unsigned long long)__builtin_s390_vsl( (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned long long vec_sll(__vector unsigned long long __a, __vector unsigned int __b) { return (__vector unsigned long long)__builtin_s390_vsl( (__vector unsigned char)__a, (__vector unsigned char)__b); } /*-- vec_slb ----------------------------------------------------------------*/ static inline __ATTRS_o_ai __vector signed char vec_slb(__vector signed char __a, __vector signed char __b) { return (__vector signed char)__builtin_s390_vslb( (__vector unsigned char)__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai __vector signed char vec_slb(__vector signed char __a, __vector unsigned char __b) { return (__vector signed char)__builtin_s390_vslb( (__vector unsigned char)__a, __b); } static inline __ATTRS_o_ai __vector unsigned char vec_slb(__vector unsigned char __a, __vector signed char __b) { return __builtin_s390_vslb(__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai __vector unsigned char vec_slb(__vector unsigned char __a, __vector unsigned char __b) { return __builtin_s390_vslb(__a, __b); } static inline __ATTRS_o_ai __vector signed short vec_slb(__vector signed short __a, __vector signed short __b) { return (__vector signed short)__builtin_s390_vslb( (__vector unsigned char)__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai __vector signed short vec_slb(__vector signed short __a, __vector unsigned short __b) { return (__vector signed short)__builtin_s390_vslb( (__vector unsigned char)__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai __vector unsigned short vec_slb(__vector unsigned short __a, __vector signed short __b) { return (__vector unsigned short)__builtin_s390_vslb( (__vector unsigned char)__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai __vector unsigned short vec_slb(__vector unsigned short __a, __vector unsigned short __b) { return (__vector unsigned short)__builtin_s390_vslb( (__vector unsigned char)__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai __vector signed int vec_slb(__vector signed int __a, __vector signed int __b) { return (__vector signed int)__builtin_s390_vslb( (__vector unsigned char)__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai __vector signed int vec_slb(__vector signed int __a, __vector unsigned int __b) { return (__vector signed int)__builtin_s390_vslb( (__vector unsigned char)__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai __vector unsigned int vec_slb(__vector unsigned int __a, __vector signed int __b) { return (__vector unsigned int)__builtin_s390_vslb( (__vector unsigned char)__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai __vector unsigned int vec_slb(__vector unsigned int __a, __vector unsigned int __b) { return (__vector unsigned int)__builtin_s390_vslb( (__vector unsigned char)__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai __vector signed long long vec_slb(__vector signed long long __a, __vector signed long long __b) { return (__vector signed long long)__builtin_s390_vslb( (__vector unsigned char)__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai __vector signed long long vec_slb(__vector signed long long __a, __vector unsigned long long __b) { return (__vector signed long long)__builtin_s390_vslb( (__vector unsigned char)__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai __vector unsigned long long vec_slb(__vector unsigned long long __a, __vector signed long long __b) { return (__vector unsigned long long)__builtin_s390_vslb( (__vector unsigned char)__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai __vector unsigned long long vec_slb(__vector unsigned long long __a, __vector unsigned long long __b) { return (__vector unsigned long long)__builtin_s390_vslb( (__vector unsigned char)__a, (__vector unsigned char)__b); } #if __ARCH__ >= 12 static inline __ATTRS_o_ai __vector float vec_slb(__vector float __a, __vector signed int __b) { return (__vector float)__builtin_s390_vslb( (__vector unsigned char)__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai __vector float vec_slb(__vector float __a, __vector unsigned int __b) { return (__vector float)__builtin_s390_vslb( (__vector unsigned char)__a, (__vector unsigned char)__b); } #endif static inline __ATTRS_o_ai __vector double vec_slb(__vector double __a, __vector signed long long __b) { return (__vector double)__builtin_s390_vslb( (__vector unsigned char)__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai __vector double vec_slb(__vector double __a, __vector unsigned long long __b) { return (__vector double)__builtin_s390_vslb( (__vector unsigned char)__a, (__vector unsigned char)__b); } /*-- vec_sld ----------------------------------------------------------------*/ extern __ATTRS_o __vector signed char vec_sld(__vector signed char __a, __vector signed char __b, int __c) __constant_range(__c, 0, 15); extern __ATTRS_o __vector __bool char vec_sld(__vector __bool char __a, __vector __bool char __b, int __c) __constant_range(__c, 0, 15); extern __ATTRS_o __vector unsigned char vec_sld(__vector unsigned char __a, __vector unsigned char __b, int __c) __constant_range(__c, 0, 15); extern __ATTRS_o __vector signed short vec_sld(__vector signed short __a, __vector signed short __b, int __c) __constant_range(__c, 0, 15); extern __ATTRS_o __vector __bool short vec_sld(__vector __bool short __a, __vector __bool short __b, int __c) __constant_range(__c, 0, 15); extern __ATTRS_o __vector unsigned short vec_sld(__vector unsigned short __a, __vector unsigned short __b, int __c) __constant_range(__c, 0, 15); extern __ATTRS_o __vector signed int vec_sld(__vector signed int __a, __vector signed int __b, int __c) __constant_range(__c, 0, 15); extern __ATTRS_o __vector __bool int vec_sld(__vector __bool int __a, __vector __bool int __b, int __c) __constant_range(__c, 0, 15); extern __ATTRS_o __vector unsigned int vec_sld(__vector unsigned int __a, __vector unsigned int __b, int __c) __constant_range(__c, 0, 15); extern __ATTRS_o __vector signed long long vec_sld(__vector signed long long __a, __vector signed long long __b, int __c) __constant_range(__c, 0, 15); extern __ATTRS_o __vector __bool long long vec_sld(__vector __bool long long __a, __vector __bool long long __b, int __c) __constant_range(__c, 0, 15); extern __ATTRS_o __vector unsigned long long vec_sld(__vector unsigned long long __a, __vector unsigned long long __b, int __c) __constant_range(__c, 0, 15); #if __ARCH__ >= 12 extern __ATTRS_o __vector float vec_sld(__vector float __a, __vector float __b, int __c) __constant_range(__c, 0, 15); #endif extern __ATTRS_o __vector double vec_sld(__vector double __a, __vector double __b, int __c) __constant_range(__c, 0, 15); #define vec_sld(X, Y, Z) ((__typeof__((vec_sld)((X), (Y), (Z)))) \ __builtin_s390_vsldb((__vector unsigned char)(X), \ (__vector unsigned char)(Y), (Z))) /*-- vec_sldw ---------------------------------------------------------------*/ extern __ATTRS_o __vector signed char vec_sldw(__vector signed char __a, __vector signed char __b, int __c) __constant_range(__c, 0, 3); extern __ATTRS_o __vector unsigned char vec_sldw(__vector unsigned char __a, __vector unsigned char __b, int __c) __constant_range(__c, 0, 3); extern __ATTRS_o __vector signed short vec_sldw(__vector signed short __a, __vector signed short __b, int __c) __constant_range(__c, 0, 3); extern __ATTRS_o __vector unsigned short vec_sldw(__vector unsigned short __a, __vector unsigned short __b, int __c) __constant_range(__c, 0, 3); extern __ATTRS_o __vector signed int vec_sldw(__vector signed int __a, __vector signed int __b, int __c) __constant_range(__c, 0, 3); extern __ATTRS_o __vector unsigned int vec_sldw(__vector unsigned int __a, __vector unsigned int __b, int __c) __constant_range(__c, 0, 3); extern __ATTRS_o __vector signed long long vec_sldw(__vector signed long long __a, __vector signed long long __b, int __c) __constant_range(__c, 0, 3); extern __ATTRS_o __vector unsigned long long vec_sldw(__vector unsigned long long __a, __vector unsigned long long __b, int __c) __constant_range(__c, 0, 3); // This prototype is deprecated. extern __ATTRS_o __vector double vec_sldw(__vector double __a, __vector double __b, int __c) __constant_range(__c, 0, 3); #define vec_sldw(X, Y, Z) ((__typeof__((vec_sldw)((X), (Y), (Z)))) \ __builtin_s390_vsldb((__vector unsigned char)(X), \ (__vector unsigned char)(Y), (Z) * 4)) /*-- vec_sldb ---------------------------------------------------------------*/ #if __ARCH__ >= 13 extern __ATTRS_o __vector signed char vec_sldb(__vector signed char __a, __vector signed char __b, int __c) __constant_range(__c, 0, 7); extern __ATTRS_o __vector unsigned char vec_sldb(__vector unsigned char __a, __vector unsigned char __b, int __c) __constant_range(__c, 0, 7); extern __ATTRS_o __vector signed short vec_sldb(__vector signed short __a, __vector signed short __b, int __c) __constant_range(__c, 0, 7); extern __ATTRS_o __vector unsigned short vec_sldb(__vector unsigned short __a, __vector unsigned short __b, int __c) __constant_range(__c, 0, 7); extern __ATTRS_o __vector signed int vec_sldb(__vector signed int __a, __vector signed int __b, int __c) __constant_range(__c, 0, 7); extern __ATTRS_o __vector unsigned int vec_sldb(__vector unsigned int __a, __vector unsigned int __b, int __c) __constant_range(__c, 0, 7); extern __ATTRS_o __vector signed long long vec_sldb(__vector signed long long __a, __vector signed long long __b, int __c) __constant_range(__c, 0, 7); extern __ATTRS_o __vector unsigned long long vec_sldb(__vector unsigned long long __a, __vector unsigned long long __b, int __c) __constant_range(__c, 0, 7); extern __ATTRS_o __vector float vec_sldb(__vector float __a, __vector float __b, int __c) __constant_range(__c, 0, 7); extern __ATTRS_o __vector double vec_sldb(__vector double __a, __vector double __b, int __c) __constant_range(__c, 0, 7); #define vec_sldb(X, Y, Z) ((__typeof__((vec_sldb)((X), (Y), (Z)))) \ __builtin_s390_vsld((__vector unsigned char)(X), \ (__vector unsigned char)(Y), (Z))) #endif /*-- vec_sral ---------------------------------------------------------------*/ static inline __ATTRS_o_ai __vector signed char vec_sral(__vector signed char __a, __vector unsigned char __b) { return (__vector signed char)__builtin_s390_vsra( (__vector unsigned char)__a, __b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector signed char vec_sral(__vector signed char __a, __vector unsigned short __b) { return (__vector signed char)__builtin_s390_vsra( (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector signed char vec_sral(__vector signed char __a, __vector unsigned int __b) { return (__vector signed char)__builtin_s390_vsra( (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector __bool char vec_sral(__vector __bool char __a, __vector unsigned char __b) { return (__vector __bool char)__builtin_s390_vsra( (__vector unsigned char)__a, __b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector __bool char vec_sral(__vector __bool char __a, __vector unsigned short __b) { return (__vector __bool char)__builtin_s390_vsra( (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector __bool char vec_sral(__vector __bool char __a, __vector unsigned int __b) { return (__vector __bool char)__builtin_s390_vsra( (__vector unsigned char)__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai __vector unsigned char vec_sral(__vector unsigned char __a, __vector unsigned char __b) { return __builtin_s390_vsra(__a, __b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned char vec_sral(__vector unsigned char __a, __vector unsigned short __b) { return __builtin_s390_vsra(__a, (__vector unsigned char)__b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned char vec_sral(__vector unsigned char __a, __vector unsigned int __b) { return __builtin_s390_vsra(__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai __vector signed short vec_sral(__vector signed short __a, __vector unsigned char __b) { return (__vector signed short)__builtin_s390_vsra( (__vector unsigned char)__a, __b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector signed short vec_sral(__vector signed short __a, __vector unsigned short __b) { return (__vector signed short)__builtin_s390_vsra( (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector signed short vec_sral(__vector signed short __a, __vector unsigned int __b) { return (__vector signed short)__builtin_s390_vsra( (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector __bool short vec_sral(__vector __bool short __a, __vector unsigned char __b) { return (__vector __bool short)__builtin_s390_vsra( (__vector unsigned char)__a, __b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector __bool short vec_sral(__vector __bool short __a, __vector unsigned short __b) { return (__vector __bool short)__builtin_s390_vsra( (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector __bool short vec_sral(__vector __bool short __a, __vector unsigned int __b) { return (__vector __bool short)__builtin_s390_vsra( (__vector unsigned char)__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai __vector unsigned short vec_sral(__vector unsigned short __a, __vector unsigned char __b) { return (__vector unsigned short)__builtin_s390_vsra( (__vector unsigned char)__a, __b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned short vec_sral(__vector unsigned short __a, __vector unsigned short __b) { return (__vector unsigned short)__builtin_s390_vsra( (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned short vec_sral(__vector unsigned short __a, __vector unsigned int __b) { return (__vector unsigned short)__builtin_s390_vsra( (__vector unsigned char)__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai __vector signed int vec_sral(__vector signed int __a, __vector unsigned char __b) { return (__vector signed int)__builtin_s390_vsra( (__vector unsigned char)__a, __b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector signed int vec_sral(__vector signed int __a, __vector unsigned short __b) { return (__vector signed int)__builtin_s390_vsra( (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector signed int vec_sral(__vector signed int __a, __vector unsigned int __b) { return (__vector signed int)__builtin_s390_vsra( (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector __bool int vec_sral(__vector __bool int __a, __vector unsigned char __b) { return (__vector __bool int)__builtin_s390_vsra( (__vector unsigned char)__a, __b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector __bool int vec_sral(__vector __bool int __a, __vector unsigned short __b) { return (__vector __bool int)__builtin_s390_vsra( (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector __bool int vec_sral(__vector __bool int __a, __vector unsigned int __b) { return (__vector __bool int)__builtin_s390_vsra( (__vector unsigned char)__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai __vector unsigned int vec_sral(__vector unsigned int __a, __vector unsigned char __b) { return (__vector unsigned int)__builtin_s390_vsra( (__vector unsigned char)__a, __b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned int vec_sral(__vector unsigned int __a, __vector unsigned short __b) { return (__vector unsigned int)__builtin_s390_vsra( (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned int vec_sral(__vector unsigned int __a, __vector unsigned int __b) { return (__vector unsigned int)__builtin_s390_vsra( (__vector unsigned char)__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai __vector signed long long vec_sral(__vector signed long long __a, __vector unsigned char __b) { return (__vector signed long long)__builtin_s390_vsra( (__vector unsigned char)__a, __b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector signed long long vec_sral(__vector signed long long __a, __vector unsigned short __b) { return (__vector signed long long)__builtin_s390_vsra( (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector signed long long vec_sral(__vector signed long long __a, __vector unsigned int __b) { return (__vector signed long long)__builtin_s390_vsra( (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector __bool long long vec_sral(__vector __bool long long __a, __vector unsigned char __b) { return (__vector __bool long long)__builtin_s390_vsra( (__vector unsigned char)__a, __b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector __bool long long vec_sral(__vector __bool long long __a, __vector unsigned short __b) { return (__vector __bool long long)__builtin_s390_vsra( (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector __bool long long vec_sral(__vector __bool long long __a, __vector unsigned int __b) { return (__vector __bool long long)__builtin_s390_vsra( (__vector unsigned char)__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai __vector unsigned long long vec_sral(__vector unsigned long long __a, __vector unsigned char __b) { return (__vector unsigned long long)__builtin_s390_vsra( (__vector unsigned char)__a, __b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned long long vec_sral(__vector unsigned long long __a, __vector unsigned short __b) { return (__vector unsigned long long)__builtin_s390_vsra( (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned long long vec_sral(__vector unsigned long long __a, __vector unsigned int __b) { return (__vector unsigned long long)__builtin_s390_vsra( (__vector unsigned char)__a, (__vector unsigned char)__b); } /*-- vec_srab ---------------------------------------------------------------*/ static inline __ATTRS_o_ai __vector signed char vec_srab(__vector signed char __a, __vector signed char __b) { return (__vector signed char)__builtin_s390_vsrab( (__vector unsigned char)__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai __vector signed char vec_srab(__vector signed char __a, __vector unsigned char __b) { return (__vector signed char)__builtin_s390_vsrab( (__vector unsigned char)__a, __b); } static inline __ATTRS_o_ai __vector unsigned char vec_srab(__vector unsigned char __a, __vector signed char __b) { return __builtin_s390_vsrab(__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai __vector unsigned char vec_srab(__vector unsigned char __a, __vector unsigned char __b) { return __builtin_s390_vsrab(__a, __b); } static inline __ATTRS_o_ai __vector signed short vec_srab(__vector signed short __a, __vector signed short __b) { return (__vector signed short)__builtin_s390_vsrab( (__vector unsigned char)__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai __vector signed short vec_srab(__vector signed short __a, __vector unsigned short __b) { return (__vector signed short)__builtin_s390_vsrab( (__vector unsigned char)__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai __vector unsigned short vec_srab(__vector unsigned short __a, __vector signed short __b) { return (__vector unsigned short)__builtin_s390_vsrab( (__vector unsigned char)__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai __vector unsigned short vec_srab(__vector unsigned short __a, __vector unsigned short __b) { return (__vector unsigned short)__builtin_s390_vsrab( (__vector unsigned char)__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai __vector signed int vec_srab(__vector signed int __a, __vector signed int __b) { return (__vector signed int)__builtin_s390_vsrab( (__vector unsigned char)__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai __vector signed int vec_srab(__vector signed int __a, __vector unsigned int __b) { return (__vector signed int)__builtin_s390_vsrab( (__vector unsigned char)__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai __vector unsigned int vec_srab(__vector unsigned int __a, __vector signed int __b) { return (__vector unsigned int)__builtin_s390_vsrab( (__vector unsigned char)__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai __vector unsigned int vec_srab(__vector unsigned int __a, __vector unsigned int __b) { return (__vector unsigned int)__builtin_s390_vsrab( (__vector unsigned char)__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai __vector signed long long vec_srab(__vector signed long long __a, __vector signed long long __b) { return (__vector signed long long)__builtin_s390_vsrab( (__vector unsigned char)__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai __vector signed long long vec_srab(__vector signed long long __a, __vector unsigned long long __b) { return (__vector signed long long)__builtin_s390_vsrab( (__vector unsigned char)__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai __vector unsigned long long vec_srab(__vector unsigned long long __a, __vector signed long long __b) { return (__vector unsigned long long)__builtin_s390_vsrab( (__vector unsigned char)__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai __vector unsigned long long vec_srab(__vector unsigned long long __a, __vector unsigned long long __b) { return (__vector unsigned long long)__builtin_s390_vsrab( (__vector unsigned char)__a, (__vector unsigned char)__b); } #if __ARCH__ >= 12 static inline __ATTRS_o_ai __vector float vec_srab(__vector float __a, __vector signed int __b) { return (__vector float)__builtin_s390_vsrab( (__vector unsigned char)__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai __vector float vec_srab(__vector float __a, __vector unsigned int __b) { return (__vector float)__builtin_s390_vsrab( (__vector unsigned char)__a, (__vector unsigned char)__b); } #endif static inline __ATTRS_o_ai __vector double vec_srab(__vector double __a, __vector signed long long __b) { return (__vector double)__builtin_s390_vsrab( (__vector unsigned char)__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai __vector double vec_srab(__vector double __a, __vector unsigned long long __b) { return (__vector double)__builtin_s390_vsrab( (__vector unsigned char)__a, (__vector unsigned char)__b); } /*-- vec_srl ----------------------------------------------------------------*/ static inline __ATTRS_o_ai __vector signed char vec_srl(__vector signed char __a, __vector unsigned char __b) { return (__vector signed char)__builtin_s390_vsrl( (__vector unsigned char)__a, __b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector signed char vec_srl(__vector signed char __a, __vector unsigned short __b) { return (__vector signed char)__builtin_s390_vsrl( (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector signed char vec_srl(__vector signed char __a, __vector unsigned int __b) { return (__vector signed char)__builtin_s390_vsrl( (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector __bool char vec_srl(__vector __bool char __a, __vector unsigned char __b) { return (__vector __bool char)__builtin_s390_vsrl( (__vector unsigned char)__a, __b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector __bool char vec_srl(__vector __bool char __a, __vector unsigned short __b) { return (__vector __bool char)__builtin_s390_vsrl( (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector __bool char vec_srl(__vector __bool char __a, __vector unsigned int __b) { return (__vector __bool char)__builtin_s390_vsrl( (__vector unsigned char)__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai __vector unsigned char vec_srl(__vector unsigned char __a, __vector unsigned char __b) { return __builtin_s390_vsrl(__a, __b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned char vec_srl(__vector unsigned char __a, __vector unsigned short __b) { return __builtin_s390_vsrl(__a, (__vector unsigned char)__b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned char vec_srl(__vector unsigned char __a, __vector unsigned int __b) { return __builtin_s390_vsrl(__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai __vector signed short vec_srl(__vector signed short __a, __vector unsigned char __b) { return (__vector signed short)__builtin_s390_vsrl( (__vector unsigned char)__a, __b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector signed short vec_srl(__vector signed short __a, __vector unsigned short __b) { return (__vector signed short)__builtin_s390_vsrl( (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector signed short vec_srl(__vector signed short __a, __vector unsigned int __b) { return (__vector signed short)__builtin_s390_vsrl( (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector __bool short vec_srl(__vector __bool short __a, __vector unsigned char __b) { return (__vector __bool short)__builtin_s390_vsrl( (__vector unsigned char)__a, __b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector __bool short vec_srl(__vector __bool short __a, __vector unsigned short __b) { return (__vector __bool short)__builtin_s390_vsrl( (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector __bool short vec_srl(__vector __bool short __a, __vector unsigned int __b) { return (__vector __bool short)__builtin_s390_vsrl( (__vector unsigned char)__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai __vector unsigned short vec_srl(__vector unsigned short __a, __vector unsigned char __b) { return (__vector unsigned short)__builtin_s390_vsrl( (__vector unsigned char)__a, __b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned short vec_srl(__vector unsigned short __a, __vector unsigned short __b) { return (__vector unsigned short)__builtin_s390_vsrl( (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned short vec_srl(__vector unsigned short __a, __vector unsigned int __b) { return (__vector unsigned short)__builtin_s390_vsrl( (__vector unsigned char)__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai __vector signed int vec_srl(__vector signed int __a, __vector unsigned char __b) { return (__vector signed int)__builtin_s390_vsrl( (__vector unsigned char)__a, __b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector signed int vec_srl(__vector signed int __a, __vector unsigned short __b) { return (__vector signed int)__builtin_s390_vsrl( (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector signed int vec_srl(__vector signed int __a, __vector unsigned int __b) { return (__vector signed int)__builtin_s390_vsrl( (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector __bool int vec_srl(__vector __bool int __a, __vector unsigned char __b) { return (__vector __bool int)__builtin_s390_vsrl( (__vector unsigned char)__a, __b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector __bool int vec_srl(__vector __bool int __a, __vector unsigned short __b) { return (__vector __bool int)__builtin_s390_vsrl( (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector __bool int vec_srl(__vector __bool int __a, __vector unsigned int __b) { return (__vector __bool int)__builtin_s390_vsrl( (__vector unsigned char)__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai __vector unsigned int vec_srl(__vector unsigned int __a, __vector unsigned char __b) { return (__vector unsigned int)__builtin_s390_vsrl( (__vector unsigned char)__a, __b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned int vec_srl(__vector unsigned int __a, __vector unsigned short __b) { return (__vector unsigned int)__builtin_s390_vsrl( (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned int vec_srl(__vector unsigned int __a, __vector unsigned int __b) { return (__vector unsigned int)__builtin_s390_vsrl( (__vector unsigned char)__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai __vector signed long long vec_srl(__vector signed long long __a, __vector unsigned char __b) { return (__vector signed long long)__builtin_s390_vsrl( (__vector unsigned char)__a, __b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector signed long long vec_srl(__vector signed long long __a, __vector unsigned short __b) { return (__vector signed long long)__builtin_s390_vsrl( (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector signed long long vec_srl(__vector signed long long __a, __vector unsigned int __b) { return (__vector signed long long)__builtin_s390_vsrl( (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector __bool long long vec_srl(__vector __bool long long __a, __vector unsigned char __b) { return (__vector __bool long long)__builtin_s390_vsrl( (__vector unsigned char)__a, __b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector __bool long long vec_srl(__vector __bool long long __a, __vector unsigned short __b) { return (__vector __bool long long)__builtin_s390_vsrl( (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector __bool long long vec_srl(__vector __bool long long __a, __vector unsigned int __b) { return (__vector __bool long long)__builtin_s390_vsrl( (__vector unsigned char)__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai __vector unsigned long long vec_srl(__vector unsigned long long __a, __vector unsigned char __b) { return (__vector unsigned long long)__builtin_s390_vsrl( (__vector unsigned char)__a, __b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned long long vec_srl(__vector unsigned long long __a, __vector unsigned short __b) { return (__vector unsigned long long)__builtin_s390_vsrl( (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned long long vec_srl(__vector unsigned long long __a, __vector unsigned int __b) { return (__vector unsigned long long)__builtin_s390_vsrl( (__vector unsigned char)__a, (__vector unsigned char)__b); } /*-- vec_srb ----------------------------------------------------------------*/ static inline __ATTRS_o_ai __vector signed char vec_srb(__vector signed char __a, __vector signed char __b) { return (__vector signed char)__builtin_s390_vsrlb( (__vector unsigned char)__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai __vector signed char vec_srb(__vector signed char __a, __vector unsigned char __b) { return (__vector signed char)__builtin_s390_vsrlb( (__vector unsigned char)__a, __b); } static inline __ATTRS_o_ai __vector unsigned char vec_srb(__vector unsigned char __a, __vector signed char __b) { return __builtin_s390_vsrlb(__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai __vector unsigned char vec_srb(__vector unsigned char __a, __vector unsigned char __b) { return __builtin_s390_vsrlb(__a, __b); } static inline __ATTRS_o_ai __vector signed short vec_srb(__vector signed short __a, __vector signed short __b) { return (__vector signed short)__builtin_s390_vsrlb( (__vector unsigned char)__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai __vector signed short vec_srb(__vector signed short __a, __vector unsigned short __b) { return (__vector signed short)__builtin_s390_vsrlb( (__vector unsigned char)__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai __vector unsigned short vec_srb(__vector unsigned short __a, __vector signed short __b) { return (__vector unsigned short)__builtin_s390_vsrlb( (__vector unsigned char)__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai __vector unsigned short vec_srb(__vector unsigned short __a, __vector unsigned short __b) { return (__vector unsigned short)__builtin_s390_vsrlb( (__vector unsigned char)__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai __vector signed int vec_srb(__vector signed int __a, __vector signed int __b) { return (__vector signed int)__builtin_s390_vsrlb( (__vector unsigned char)__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai __vector signed int vec_srb(__vector signed int __a, __vector unsigned int __b) { return (__vector signed int)__builtin_s390_vsrlb( (__vector unsigned char)__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai __vector unsigned int vec_srb(__vector unsigned int __a, __vector signed int __b) { return (__vector unsigned int)__builtin_s390_vsrlb( (__vector unsigned char)__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai __vector unsigned int vec_srb(__vector unsigned int __a, __vector unsigned int __b) { return (__vector unsigned int)__builtin_s390_vsrlb( (__vector unsigned char)__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai __vector signed long long vec_srb(__vector signed long long __a, __vector signed long long __b) { return (__vector signed long long)__builtin_s390_vsrlb( (__vector unsigned char)__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai __vector signed long long vec_srb(__vector signed long long __a, __vector unsigned long long __b) { return (__vector signed long long)__builtin_s390_vsrlb( (__vector unsigned char)__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai __vector unsigned long long vec_srb(__vector unsigned long long __a, __vector signed long long __b) { return (__vector unsigned long long)__builtin_s390_vsrlb( (__vector unsigned char)__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai __vector unsigned long long vec_srb(__vector unsigned long long __a, __vector unsigned long long __b) { return (__vector unsigned long long)__builtin_s390_vsrlb( (__vector unsigned char)__a, (__vector unsigned char)__b); } #if __ARCH__ >= 12 static inline __ATTRS_o_ai __vector float vec_srb(__vector float __a, __vector signed int __b) { return (__vector float)__builtin_s390_vsrlb( (__vector unsigned char)__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai __vector float vec_srb(__vector float __a, __vector unsigned int __b) { return (__vector float)__builtin_s390_vsrlb( (__vector unsigned char)__a, (__vector unsigned char)__b); } #endif static inline __ATTRS_o_ai __vector double vec_srb(__vector double __a, __vector signed long long __b) { return (__vector double)__builtin_s390_vsrlb( (__vector unsigned char)__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai __vector double vec_srb(__vector double __a, __vector unsigned long long __b) { return (__vector double)__builtin_s390_vsrlb( (__vector unsigned char)__a, (__vector unsigned char)__b); } /*-- vec_srdb ---------------------------------------------------------------*/ #if __ARCH__ >= 13 extern __ATTRS_o __vector signed char vec_srdb(__vector signed char __a, __vector signed char __b, int __c) __constant_range(__c, 0, 7); extern __ATTRS_o __vector unsigned char vec_srdb(__vector unsigned char __a, __vector unsigned char __b, int __c) __constant_range(__c, 0, 7); extern __ATTRS_o __vector signed short vec_srdb(__vector signed short __a, __vector signed short __b, int __c) __constant_range(__c, 0, 7); extern __ATTRS_o __vector unsigned short vec_srdb(__vector unsigned short __a, __vector unsigned short __b, int __c) __constant_range(__c, 0, 7); extern __ATTRS_o __vector signed int vec_srdb(__vector signed int __a, __vector signed int __b, int __c) __constant_range(__c, 0, 7); extern __ATTRS_o __vector unsigned int vec_srdb(__vector unsigned int __a, __vector unsigned int __b, int __c) __constant_range(__c, 0, 7); extern __ATTRS_o __vector signed long long vec_srdb(__vector signed long long __a, __vector signed long long __b, int __c) __constant_range(__c, 0, 7); extern __ATTRS_o __vector unsigned long long vec_srdb(__vector unsigned long long __a, __vector unsigned long long __b, int __c) __constant_range(__c, 0, 7); extern __ATTRS_o __vector float vec_srdb(__vector float __a, __vector float __b, int __c) __constant_range(__c, 0, 7); extern __ATTRS_o __vector double vec_srdb(__vector double __a, __vector double __b, int __c) __constant_range(__c, 0, 7); #define vec_srdb(X, Y, Z) ((__typeof__((vec_srdb)((X), (Y), (Z)))) \ __builtin_s390_vsrd((__vector unsigned char)(X), \ (__vector unsigned char)(Y), (Z))) #endif /*-- vec_abs ----------------------------------------------------------------*/ static inline __ATTRS_o_ai __vector signed char vec_abs(__vector signed char __a) { return vec_sel(__a, -__a, vec_cmplt(__a, (__vector signed char)0)); } static inline __ATTRS_o_ai __vector signed short vec_abs(__vector signed short __a) { return vec_sel(__a, -__a, vec_cmplt(__a, (__vector signed short)0)); } static inline __ATTRS_o_ai __vector signed int vec_abs(__vector signed int __a) { return vec_sel(__a, -__a, vec_cmplt(__a, (__vector signed int)0)); } static inline __ATTRS_o_ai __vector signed long long vec_abs(__vector signed long long __a) { return vec_sel(__a, -__a, vec_cmplt(__a, (__vector signed long long)0)); } #if __ARCH__ >= 12 static inline __ATTRS_o_ai __vector float vec_abs(__vector float __a) { return __builtin_s390_vflpsb(__a); } #endif static inline __ATTRS_o_ai __vector double vec_abs(__vector double __a) { return __builtin_s390_vflpdb(__a); } /*-- vec_nabs ---------------------------------------------------------------*/ #if __ARCH__ >= 12 static inline __ATTRS_o_ai __vector float vec_nabs(__vector float __a) { return __builtin_s390_vflnsb(__a); } #endif static inline __ATTRS_o_ai __vector double vec_nabs(__vector double __a) { return __builtin_s390_vflndb(__a); } /*-- vec_max ----------------------------------------------------------------*/ static inline __ATTRS_o_ai __vector signed char vec_max(__vector signed char __a, __vector signed char __b) { return vec_sel(__b, __a, vec_cmpgt(__a, __b)); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector signed char vec_max(__vector signed char __a, __vector __bool char __b) { __vector signed char __bc = (__vector signed char)__b; return vec_sel(__bc, __a, vec_cmpgt(__a, __bc)); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector signed char vec_max(__vector __bool char __a, __vector signed char __b) { __vector signed char __ac = (__vector signed char)__a; return vec_sel(__b, __ac, vec_cmpgt(__ac, __b)); } static inline __ATTRS_o_ai __vector unsigned char vec_max(__vector unsigned char __a, __vector unsigned char __b) { return vec_sel(__b, __a, vec_cmpgt(__a, __b)); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned char vec_max(__vector unsigned char __a, __vector __bool char __b) { __vector unsigned char __bc = (__vector unsigned char)__b; return vec_sel(__bc, __a, vec_cmpgt(__a, __bc)); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned char vec_max(__vector __bool char __a, __vector unsigned char __b) { __vector unsigned char __ac = (__vector unsigned char)__a; return vec_sel(__b, __ac, vec_cmpgt(__ac, __b)); } static inline __ATTRS_o_ai __vector signed short vec_max(__vector signed short __a, __vector signed short __b) { return vec_sel(__b, __a, vec_cmpgt(__a, __b)); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector signed short vec_max(__vector signed short __a, __vector __bool short __b) { __vector signed short __bc = (__vector signed short)__b; return vec_sel(__bc, __a, vec_cmpgt(__a, __bc)); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector signed short vec_max(__vector __bool short __a, __vector signed short __b) { __vector signed short __ac = (__vector signed short)__a; return vec_sel(__b, __ac, vec_cmpgt(__ac, __b)); } static inline __ATTRS_o_ai __vector unsigned short vec_max(__vector unsigned short __a, __vector unsigned short __b) { return vec_sel(__b, __a, vec_cmpgt(__a, __b)); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned short vec_max(__vector unsigned short __a, __vector __bool short __b) { __vector unsigned short __bc = (__vector unsigned short)__b; return vec_sel(__bc, __a, vec_cmpgt(__a, __bc)); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned short vec_max(__vector __bool short __a, __vector unsigned short __b) { __vector unsigned short __ac = (__vector unsigned short)__a; return vec_sel(__b, __ac, vec_cmpgt(__ac, __b)); } static inline __ATTRS_o_ai __vector signed int vec_max(__vector signed int __a, __vector signed int __b) { return vec_sel(__b, __a, vec_cmpgt(__a, __b)); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector signed int vec_max(__vector signed int __a, __vector __bool int __b) { __vector signed int __bc = (__vector signed int)__b; return vec_sel(__bc, __a, vec_cmpgt(__a, __bc)); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector signed int vec_max(__vector __bool int __a, __vector signed int __b) { __vector signed int __ac = (__vector signed int)__a; return vec_sel(__b, __ac, vec_cmpgt(__ac, __b)); } static inline __ATTRS_o_ai __vector unsigned int vec_max(__vector unsigned int __a, __vector unsigned int __b) { return vec_sel(__b, __a, vec_cmpgt(__a, __b)); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned int vec_max(__vector unsigned int __a, __vector __bool int __b) { __vector unsigned int __bc = (__vector unsigned int)__b; return vec_sel(__bc, __a, vec_cmpgt(__a, __bc)); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned int vec_max(__vector __bool int __a, __vector unsigned int __b) { __vector unsigned int __ac = (__vector unsigned int)__a; return vec_sel(__b, __ac, vec_cmpgt(__ac, __b)); } static inline __ATTRS_o_ai __vector signed long long vec_max(__vector signed long long __a, __vector signed long long __b) { return vec_sel(__b, __a, vec_cmpgt(__a, __b)); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector signed long long vec_max(__vector signed long long __a, __vector __bool long long __b) { __vector signed long long __bc = (__vector signed long long)__b; return vec_sel(__bc, __a, vec_cmpgt(__a, __bc)); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector signed long long vec_max(__vector __bool long long __a, __vector signed long long __b) { __vector signed long long __ac = (__vector signed long long)__a; return vec_sel(__b, __ac, vec_cmpgt(__ac, __b)); } static inline __ATTRS_o_ai __vector unsigned long long vec_max(__vector unsigned long long __a, __vector unsigned long long __b) { return vec_sel(__b, __a, vec_cmpgt(__a, __b)); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned long long vec_max(__vector unsigned long long __a, __vector __bool long long __b) { __vector unsigned long long __bc = (__vector unsigned long long)__b; return vec_sel(__bc, __a, vec_cmpgt(__a, __bc)); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned long long vec_max(__vector __bool long long __a, __vector unsigned long long __b) { __vector unsigned long long __ac = (__vector unsigned long long)__a; return vec_sel(__b, __ac, vec_cmpgt(__ac, __b)); } #if __ARCH__ >= 12 static inline __ATTRS_o_ai __vector float vec_max(__vector float __a, __vector float __b) { return __builtin_s390_vfmaxsb(__a, __b, 0); } #endif static inline __ATTRS_o_ai __vector double vec_max(__vector double __a, __vector double __b) { #if __ARCH__ >= 12 return __builtin_s390_vfmaxdb(__a, __b, 0); #else return vec_sel(__b, __a, vec_cmpgt(__a, __b)); #endif } /*-- vec_min ----------------------------------------------------------------*/ static inline __ATTRS_o_ai __vector signed char vec_min(__vector signed char __a, __vector signed char __b) { return vec_sel(__a, __b, vec_cmpgt(__a, __b)); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector signed char vec_min(__vector signed char __a, __vector __bool char __b) { __vector signed char __bc = (__vector signed char)__b; return vec_sel(__a, __bc, vec_cmpgt(__a, __bc)); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector signed char vec_min(__vector __bool char __a, __vector signed char __b) { __vector signed char __ac = (__vector signed char)__a; return vec_sel(__ac, __b, vec_cmpgt(__ac, __b)); } static inline __ATTRS_o_ai __vector unsigned char vec_min(__vector unsigned char __a, __vector unsigned char __b) { return vec_sel(__a, __b, vec_cmpgt(__a, __b)); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned char vec_min(__vector unsigned char __a, __vector __bool char __b) { __vector unsigned char __bc = (__vector unsigned char)__b; return vec_sel(__a, __bc, vec_cmpgt(__a, __bc)); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned char vec_min(__vector __bool char __a, __vector unsigned char __b) { __vector unsigned char __ac = (__vector unsigned char)__a; return vec_sel(__ac, __b, vec_cmpgt(__ac, __b)); } static inline __ATTRS_o_ai __vector signed short vec_min(__vector signed short __a, __vector signed short __b) { return vec_sel(__a, __b, vec_cmpgt(__a, __b)); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector signed short vec_min(__vector signed short __a, __vector __bool short __b) { __vector signed short __bc = (__vector signed short)__b; return vec_sel(__a, __bc, vec_cmpgt(__a, __bc)); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector signed short vec_min(__vector __bool short __a, __vector signed short __b) { __vector signed short __ac = (__vector signed short)__a; return vec_sel(__ac, __b, vec_cmpgt(__ac, __b)); } static inline __ATTRS_o_ai __vector unsigned short vec_min(__vector unsigned short __a, __vector unsigned short __b) { return vec_sel(__a, __b, vec_cmpgt(__a, __b)); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned short vec_min(__vector unsigned short __a, __vector __bool short __b) { __vector unsigned short __bc = (__vector unsigned short)__b; return vec_sel(__a, __bc, vec_cmpgt(__a, __bc)); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned short vec_min(__vector __bool short __a, __vector unsigned short __b) { __vector unsigned short __ac = (__vector unsigned short)__a; return vec_sel(__ac, __b, vec_cmpgt(__ac, __b)); } static inline __ATTRS_o_ai __vector signed int vec_min(__vector signed int __a, __vector signed int __b) { return vec_sel(__a, __b, vec_cmpgt(__a, __b)); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector signed int vec_min(__vector signed int __a, __vector __bool int __b) { __vector signed int __bc = (__vector signed int)__b; return vec_sel(__a, __bc, vec_cmpgt(__a, __bc)); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector signed int vec_min(__vector __bool int __a, __vector signed int __b) { __vector signed int __ac = (__vector signed int)__a; return vec_sel(__ac, __b, vec_cmpgt(__ac, __b)); } static inline __ATTRS_o_ai __vector unsigned int vec_min(__vector unsigned int __a, __vector unsigned int __b) { return vec_sel(__a, __b, vec_cmpgt(__a, __b)); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned int vec_min(__vector unsigned int __a, __vector __bool int __b) { __vector unsigned int __bc = (__vector unsigned int)__b; return vec_sel(__a, __bc, vec_cmpgt(__a, __bc)); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned int vec_min(__vector __bool int __a, __vector unsigned int __b) { __vector unsigned int __ac = (__vector unsigned int)__a; return vec_sel(__ac, __b, vec_cmpgt(__ac, __b)); } static inline __ATTRS_o_ai __vector signed long long vec_min(__vector signed long long __a, __vector signed long long __b) { return vec_sel(__a, __b, vec_cmpgt(__a, __b)); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector signed long long vec_min(__vector signed long long __a, __vector __bool long long __b) { __vector signed long long __bc = (__vector signed long long)__b; return vec_sel(__a, __bc, vec_cmpgt(__a, __bc)); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector signed long long vec_min(__vector __bool long long __a, __vector signed long long __b) { __vector signed long long __ac = (__vector signed long long)__a; return vec_sel(__ac, __b, vec_cmpgt(__ac, __b)); } static inline __ATTRS_o_ai __vector unsigned long long vec_min(__vector unsigned long long __a, __vector unsigned long long __b) { return vec_sel(__a, __b, vec_cmpgt(__a, __b)); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned long long vec_min(__vector unsigned long long __a, __vector __bool long long __b) { __vector unsigned long long __bc = (__vector unsigned long long)__b; return vec_sel(__a, __bc, vec_cmpgt(__a, __bc)); } // This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned long long vec_min(__vector __bool long long __a, __vector unsigned long long __b) { __vector unsigned long long __ac = (__vector unsigned long long)__a; return vec_sel(__ac, __b, vec_cmpgt(__ac, __b)); } #if __ARCH__ >= 12 static inline __ATTRS_o_ai __vector float vec_min(__vector float __a, __vector float __b) { return __builtin_s390_vfminsb(__a, __b, 0); } #endif static inline __ATTRS_o_ai __vector double vec_min(__vector double __a, __vector double __b) { #if __ARCH__ >= 12 return __builtin_s390_vfmindb(__a, __b, 0); #else return vec_sel(__a, __b, vec_cmpgt(__a, __b)); #endif } /*-- vec_add_u128 -----------------------------------------------------------*/ static inline __ATTRS_ai __vector unsigned char vec_add_u128(__vector unsigned char __a, __vector unsigned char __b) { return (__vector unsigned char)((__int128)__a + (__int128)__b); } /*-- vec_addc ---------------------------------------------------------------*/ static inline __ATTRS_o_ai __vector unsigned char vec_addc(__vector unsigned char __a, __vector unsigned char __b) { return __builtin_s390_vaccb(__a, __b); } static inline __ATTRS_o_ai __vector unsigned short vec_addc(__vector unsigned short __a, __vector unsigned short __b) { return __builtin_s390_vacch(__a, __b); } static inline __ATTRS_o_ai __vector unsigned int vec_addc(__vector unsigned int __a, __vector unsigned int __b) { return __builtin_s390_vaccf(__a, __b); } static inline __ATTRS_o_ai __vector unsigned long long vec_addc(__vector unsigned long long __a, __vector unsigned long long __b) { return __builtin_s390_vaccg(__a, __b); } /*-- vec_addc_u128 ----------------------------------------------------------*/ static inline __ATTRS_ai __vector unsigned char vec_addc_u128(__vector unsigned char __a, __vector unsigned char __b) { return (__vector unsigned char) __builtin_s390_vaccq((unsigned __int128)__a, (unsigned __int128)__b); } /*-- vec_adde_u128 ----------------------------------------------------------*/ static inline __ATTRS_ai __vector unsigned char vec_adde_u128(__vector unsigned char __a, __vector unsigned char __b, __vector unsigned char __c) { return (__vector unsigned char) __builtin_s390_vacq((unsigned __int128)__a, (unsigned __int128)__b, (unsigned __int128)__c); } /*-- vec_addec_u128 ---------------------------------------------------------*/ static inline __ATTRS_ai __vector unsigned char vec_addec_u128(__vector unsigned char __a, __vector unsigned char __b, __vector unsigned char __c) { return (__vector unsigned char) __builtin_s390_vacccq((unsigned __int128)__a, (unsigned __int128)__b, (unsigned __int128)__c); } /*-- vec_avg ----------------------------------------------------------------*/ static inline __ATTRS_o_ai __vector signed char vec_avg(__vector signed char __a, __vector signed char __b) { return __builtin_s390_vavgb(__a, __b); } static inline __ATTRS_o_ai __vector signed short vec_avg(__vector signed short __a, __vector signed short __b) { return __builtin_s390_vavgh(__a, __b); } static inline __ATTRS_o_ai __vector signed int vec_avg(__vector signed int __a, __vector signed int __b) { return __builtin_s390_vavgf(__a, __b); } static inline __ATTRS_o_ai __vector signed long long vec_avg(__vector signed long long __a, __vector signed long long __b) { return __builtin_s390_vavgg(__a, __b); } static inline __ATTRS_o_ai __vector unsigned char vec_avg(__vector unsigned char __a, __vector unsigned char __b) { return __builtin_s390_vavglb(__a, __b); } static inline __ATTRS_o_ai __vector unsigned short vec_avg(__vector unsigned short __a, __vector unsigned short __b) { return __builtin_s390_vavglh(__a, __b); } static inline __ATTRS_o_ai __vector unsigned int vec_avg(__vector unsigned int __a, __vector unsigned int __b) { return __builtin_s390_vavglf(__a, __b); } static inline __ATTRS_o_ai __vector unsigned long long vec_avg(__vector unsigned long long __a, __vector unsigned long long __b) { return __builtin_s390_vavglg(__a, __b); } /*-- vec_checksum -----------------------------------------------------------*/ static inline __ATTRS_ai __vector unsigned int vec_checksum(__vector unsigned int __a, __vector unsigned int __b) { return __builtin_s390_vcksm(__a, __b); } /*-- vec_gfmsum -------------------------------------------------------------*/ static inline __ATTRS_o_ai __vector unsigned short vec_gfmsum(__vector unsigned char __a, __vector unsigned char __b) { return __builtin_s390_vgfmb(__a, __b); } static inline __ATTRS_o_ai __vector unsigned int vec_gfmsum(__vector unsigned short __a, __vector unsigned short __b) { return __builtin_s390_vgfmh(__a, __b); } static inline __ATTRS_o_ai __vector unsigned long long vec_gfmsum(__vector unsigned int __a, __vector unsigned int __b) { return __builtin_s390_vgfmf(__a, __b); } /*-- vec_gfmsum_128 ---------------------------------------------------------*/ static inline __ATTRS_o_ai __vector unsigned char vec_gfmsum_128(__vector unsigned long long __a, __vector unsigned long long __b) { return (__vector unsigned char)__builtin_s390_vgfmg(__a, __b); } /*-- vec_gfmsum_accum -------------------------------------------------------*/ static inline __ATTRS_o_ai __vector unsigned short vec_gfmsum_accum(__vector unsigned char __a, __vector unsigned char __b, __vector unsigned short __c) { return __builtin_s390_vgfmab(__a, __b, __c); } static inline __ATTRS_o_ai __vector unsigned int vec_gfmsum_accum(__vector unsigned short __a, __vector unsigned short __b, __vector unsigned int __c) { return __builtin_s390_vgfmah(__a, __b, __c); } static inline __ATTRS_o_ai __vector unsigned long long vec_gfmsum_accum(__vector unsigned int __a, __vector unsigned int __b, __vector unsigned long long __c) { return __builtin_s390_vgfmaf(__a, __b, __c); } /*-- vec_gfmsum_accum_128 ---------------------------------------------------*/ static inline __ATTRS_o_ai __vector unsigned char vec_gfmsum_accum_128(__vector unsigned long long __a, __vector unsigned long long __b, __vector unsigned char __c) { return (__vector unsigned char) __builtin_s390_vgfmag(__a, __b, (unsigned __int128)__c); } /*-- vec_mladd --------------------------------------------------------------*/ static inline __ATTRS_o_ai __vector signed char vec_mladd(__vector signed char __a, __vector signed char __b, __vector signed char __c) { return __a * __b + __c; } static inline __ATTRS_o_ai __vector signed char vec_mladd(__vector unsigned char __a, __vector signed char __b, __vector signed char __c) { return (__vector signed char)__a * __b + __c; } static inline __ATTRS_o_ai __vector signed char vec_mladd(__vector signed char __a, __vector unsigned char __b, __vector unsigned char __c) { return __a * (__vector signed char)__b + (__vector signed char)__c; } static inline __ATTRS_o_ai __vector unsigned char vec_mladd(__vector unsigned char __a, __vector unsigned char __b, __vector unsigned char __c) { return __a * __b + __c; } static inline __ATTRS_o_ai __vector signed short vec_mladd(__vector signed short __a, __vector signed short __b, __vector signed short __c) { return __a * __b + __c; } static inline __ATTRS_o_ai __vector signed short vec_mladd(__vector unsigned short __a, __vector signed short __b, __vector signed short __c) { return (__vector signed short)__a * __b + __c; } static inline __ATTRS_o_ai __vector signed short vec_mladd(__vector signed short __a, __vector unsigned short __b, __vector unsigned short __c) { return __a * (__vector signed short)__b + (__vector signed short)__c; } static inline __ATTRS_o_ai __vector unsigned short vec_mladd(__vector unsigned short __a, __vector unsigned short __b, __vector unsigned short __c) { return __a * __b + __c; } static inline __ATTRS_o_ai __vector signed int vec_mladd(__vector signed int __a, __vector signed int __b, __vector signed int __c) { return __a * __b + __c; } static inline __ATTRS_o_ai __vector signed int vec_mladd(__vector unsigned int __a, __vector signed int __b, __vector signed int __c) { return (__vector signed int)__a * __b + __c; } static inline __ATTRS_o_ai __vector signed int vec_mladd(__vector signed int __a, __vector unsigned int __b, __vector unsigned int __c) { return __a * (__vector signed int)__b + (__vector signed int)__c; } static inline __ATTRS_o_ai __vector unsigned int vec_mladd(__vector unsigned int __a, __vector unsigned int __b, __vector unsigned int __c) { return __a * __b + __c; } /*-- vec_mhadd --------------------------------------------------------------*/ static inline __ATTRS_o_ai __vector signed char vec_mhadd(__vector signed char __a, __vector signed char __b, __vector signed char __c) { return __builtin_s390_vmahb(__a, __b, __c); } static inline __ATTRS_o_ai __vector unsigned char vec_mhadd(__vector unsigned char __a, __vector unsigned char __b, __vector unsigned char __c) { return __builtin_s390_vmalhb(__a, __b, __c); } static inline __ATTRS_o_ai __vector signed short vec_mhadd(__vector signed short __a, __vector signed short __b, __vector signed short __c) { return __builtin_s390_vmahh(__a, __b, __c); } static inline __ATTRS_o_ai __vector unsigned short vec_mhadd(__vector unsigned short __a, __vector unsigned short __b, __vector unsigned short __c) { return __builtin_s390_vmalhh(__a, __b, __c); } static inline __ATTRS_o_ai __vector signed int vec_mhadd(__vector signed int __a, __vector signed int __b, __vector signed int __c) { return __builtin_s390_vmahf(__a, __b, __c); } static inline __ATTRS_o_ai __vector unsigned int vec_mhadd(__vector unsigned int __a, __vector unsigned int __b, __vector unsigned int __c) { return __builtin_s390_vmalhf(__a, __b, __c); } /*-- vec_meadd --------------------------------------------------------------*/ static inline __ATTRS_o_ai __vector signed short vec_meadd(__vector signed char __a, __vector signed char __b, __vector signed short __c) { return __builtin_s390_vmaeb(__a, __b, __c); } static inline __ATTRS_o_ai __vector unsigned short vec_meadd(__vector unsigned char __a, __vector unsigned char __b, __vector unsigned short __c) { return __builtin_s390_vmaleb(__a, __b, __c); } static inline __ATTRS_o_ai __vector signed int vec_meadd(__vector signed short __a, __vector signed short __b, __vector signed int __c) { return __builtin_s390_vmaeh(__a, __b, __c); } static inline __ATTRS_o_ai __vector unsigned int vec_meadd(__vector unsigned short __a, __vector unsigned short __b, __vector unsigned int __c) { return __builtin_s390_vmaleh(__a, __b, __c); } static inline __ATTRS_o_ai __vector signed long long vec_meadd(__vector signed int __a, __vector signed int __b, __vector signed long long __c) { return __builtin_s390_vmaef(__a, __b, __c); } static inline __ATTRS_o_ai __vector unsigned long long vec_meadd(__vector unsigned int __a, __vector unsigned int __b, __vector unsigned long long __c) { return __builtin_s390_vmalef(__a, __b, __c); } /*-- vec_moadd --------------------------------------------------------------*/ static inline __ATTRS_o_ai __vector signed short vec_moadd(__vector signed char __a, __vector signed char __b, __vector signed short __c) { return __builtin_s390_vmaob(__a, __b, __c); } static inline __ATTRS_o_ai __vector unsigned short vec_moadd(__vector unsigned char __a, __vector unsigned char __b, __vector unsigned short __c) { return __builtin_s390_vmalob(__a, __b, __c); } static inline __ATTRS_o_ai __vector signed int vec_moadd(__vector signed short __a, __vector signed short __b, __vector signed int __c) { return __builtin_s390_vmaoh(__a, __b, __c); } static inline __ATTRS_o_ai __vector unsigned int vec_moadd(__vector unsigned short __a, __vector unsigned short __b, __vector unsigned int __c) { return __builtin_s390_vmaloh(__a, __b, __c); } static inline __ATTRS_o_ai __vector signed long long vec_moadd(__vector signed int __a, __vector signed int __b, __vector signed long long __c) { return __builtin_s390_vmaof(__a, __b, __c); } static inline __ATTRS_o_ai __vector unsigned long long vec_moadd(__vector unsigned int __a, __vector unsigned int __b, __vector unsigned long long __c) { return __builtin_s390_vmalof(__a, __b, __c); } /*-- vec_mulh ---------------------------------------------------------------*/ static inline __ATTRS_o_ai __vector signed char vec_mulh(__vector signed char __a, __vector signed char __b) { return __builtin_s390_vmhb(__a, __b); } static inline __ATTRS_o_ai __vector unsigned char vec_mulh(__vector unsigned char __a, __vector unsigned char __b) { return __builtin_s390_vmlhb(__a, __b); } static inline __ATTRS_o_ai __vector signed short vec_mulh(__vector signed short __a, __vector signed short __b) { return __builtin_s390_vmhh(__a, __b); } static inline __ATTRS_o_ai __vector unsigned short vec_mulh(__vector unsigned short __a, __vector unsigned short __b) { return __builtin_s390_vmlhh(__a, __b); } static inline __ATTRS_o_ai __vector signed int vec_mulh(__vector signed int __a, __vector signed int __b) { return __builtin_s390_vmhf(__a, __b); } static inline __ATTRS_o_ai __vector unsigned int vec_mulh(__vector unsigned int __a, __vector unsigned int __b) { return __builtin_s390_vmlhf(__a, __b); } /*-- vec_mule ---------------------------------------------------------------*/ static inline __ATTRS_o_ai __vector signed short vec_mule(__vector signed char __a, __vector signed char __b) { return __builtin_s390_vmeb(__a, __b); } static inline __ATTRS_o_ai __vector unsigned short vec_mule(__vector unsigned char __a, __vector unsigned char __b) { return __builtin_s390_vmleb(__a, __b); } static inline __ATTRS_o_ai __vector signed int vec_mule(__vector signed short __a, __vector signed short __b) { return __builtin_s390_vmeh(__a, __b); } static inline __ATTRS_o_ai __vector unsigned int vec_mule(__vector unsigned short __a, __vector unsigned short __b) { return __builtin_s390_vmleh(__a, __b); } static inline __ATTRS_o_ai __vector signed long long vec_mule(__vector signed int __a, __vector signed int __b) { return __builtin_s390_vmef(__a, __b); } static inline __ATTRS_o_ai __vector unsigned long long vec_mule(__vector unsigned int __a, __vector unsigned int __b) { return __builtin_s390_vmlef(__a, __b); } /*-- vec_mulo ---------------------------------------------------------------*/ static inline __ATTRS_o_ai __vector signed short vec_mulo(__vector signed char __a, __vector signed char __b) { return __builtin_s390_vmob(__a, __b); } static inline __ATTRS_o_ai __vector unsigned short vec_mulo(__vector unsigned char __a, __vector unsigned char __b) { return __builtin_s390_vmlob(__a, __b); } static inline __ATTRS_o_ai __vector signed int vec_mulo(__vector signed short __a, __vector signed short __b) { return __builtin_s390_vmoh(__a, __b); } static inline __ATTRS_o_ai __vector unsigned int vec_mulo(__vector unsigned short __a, __vector unsigned short __b) { return __builtin_s390_vmloh(__a, __b); } static inline __ATTRS_o_ai __vector signed long long vec_mulo(__vector signed int __a, __vector signed int __b) { return __builtin_s390_vmof(__a, __b); } static inline __ATTRS_o_ai __vector unsigned long long vec_mulo(__vector unsigned int __a, __vector unsigned int __b) { return __builtin_s390_vmlof(__a, __b); } /*-- vec_msum_u128 ----------------------------------------------------------*/ #if __ARCH__ >= 12 extern __ATTRS_o __vector unsigned char vec_msum_u128(__vector unsigned long long __a, __vector unsigned long long __b, __vector unsigned char __c, int __d) __constant_range(__d, 0, 15); #define vec_msum_u128(X, Y, Z, W) \ ((__typeof__((vec_msum_u128)((X), (Y), (Z), (W)))) \ __builtin_s390_vmslg((X), (Y), (unsigned __int128)(Z), (W))) #endif /*-- vec_sub_u128 -----------------------------------------------------------*/ static inline __ATTRS_ai __vector unsigned char vec_sub_u128(__vector unsigned char __a, __vector unsigned char __b) { return (__vector unsigned char)((__int128)__a - (__int128)__b); } /*-- vec_subc ---------------------------------------------------------------*/ static inline __ATTRS_o_ai __vector unsigned char vec_subc(__vector unsigned char __a, __vector unsigned char __b) { return __builtin_s390_vscbib(__a, __b); } static inline __ATTRS_o_ai __vector unsigned short vec_subc(__vector unsigned short __a, __vector unsigned short __b) { return __builtin_s390_vscbih(__a, __b); } static inline __ATTRS_o_ai __vector unsigned int vec_subc(__vector unsigned int __a, __vector unsigned int __b) { return __builtin_s390_vscbif(__a, __b); } static inline __ATTRS_o_ai __vector unsigned long long vec_subc(__vector unsigned long long __a, __vector unsigned long long __b) { return __builtin_s390_vscbig(__a, __b); } /*-- vec_subc_u128 ----------------------------------------------------------*/ static inline __ATTRS_ai __vector unsigned char vec_subc_u128(__vector unsigned char __a, __vector unsigned char __b) { return (__vector unsigned char) __builtin_s390_vscbiq((unsigned __int128)__a, (unsigned __int128)__b); } /*-- vec_sube_u128 ----------------------------------------------------------*/ static inline __ATTRS_ai __vector unsigned char vec_sube_u128(__vector unsigned char __a, __vector unsigned char __b, __vector unsigned char __c) { return (__vector unsigned char) __builtin_s390_vsbiq((unsigned __int128)__a, (unsigned __int128)__b, (unsigned __int128)__c); } /*-- vec_subec_u128 ---------------------------------------------------------*/ static inline __ATTRS_ai __vector unsigned char vec_subec_u128(__vector unsigned char __a, __vector unsigned char __b, __vector unsigned char __c) { return (__vector unsigned char) __builtin_s390_vsbcbiq((unsigned __int128)__a, (unsigned __int128)__b, (unsigned __int128)__c); } /*-- vec_sum2 ---------------------------------------------------------------*/ static inline __ATTRS_o_ai __vector unsigned long long vec_sum2(__vector unsigned short __a, __vector unsigned short __b) { return __builtin_s390_vsumgh(__a, __b); } static inline __ATTRS_o_ai __vector unsigned long long vec_sum2(__vector unsigned int __a, __vector unsigned int __b) { return __builtin_s390_vsumgf(__a, __b); } /*-- vec_sum_u128 -----------------------------------------------------------*/ static inline __ATTRS_o_ai __vector unsigned char vec_sum_u128(__vector unsigned int __a, __vector unsigned int __b) { return (__vector unsigned char)__builtin_s390_vsumqf(__a, __b); } static inline __ATTRS_o_ai __vector unsigned char vec_sum_u128(__vector unsigned long long __a, __vector unsigned long long __b) { return (__vector unsigned char)__builtin_s390_vsumqg(__a, __b); } /*-- vec_sum4 ---------------------------------------------------------------*/ static inline __ATTRS_o_ai __vector unsigned int vec_sum4(__vector unsigned char __a, __vector unsigned char __b) { return __builtin_s390_vsumb(__a, __b); } static inline __ATTRS_o_ai __vector unsigned int vec_sum4(__vector unsigned short __a, __vector unsigned short __b) { return __builtin_s390_vsumh(__a, __b); } /*-- vec_test_mask ----------------------------------------------------------*/ static inline __ATTRS_o_ai int vec_test_mask(__vector signed char __a, __vector unsigned char __b) { return __builtin_s390_vtm((__vector unsigned char)__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai int vec_test_mask(__vector unsigned char __a, __vector unsigned char __b) { return __builtin_s390_vtm(__a, __b); } static inline __ATTRS_o_ai int vec_test_mask(__vector signed short __a, __vector unsigned short __b) { return __builtin_s390_vtm((__vector unsigned char)__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai int vec_test_mask(__vector unsigned short __a, __vector unsigned short __b) { return __builtin_s390_vtm((__vector unsigned char)__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai int vec_test_mask(__vector signed int __a, __vector unsigned int __b) { return __builtin_s390_vtm((__vector unsigned char)__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai int vec_test_mask(__vector unsigned int __a, __vector unsigned int __b) { return __builtin_s390_vtm((__vector unsigned char)__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai int vec_test_mask(__vector signed long long __a, __vector unsigned long long __b) { return __builtin_s390_vtm((__vector unsigned char)__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai int vec_test_mask(__vector unsigned long long __a, __vector unsigned long long __b) { return __builtin_s390_vtm((__vector unsigned char)__a, (__vector unsigned char)__b); } #if __ARCH__ >= 12 static inline __ATTRS_o_ai int vec_test_mask(__vector float __a, __vector unsigned int __b) { return __builtin_s390_vtm((__vector unsigned char)__a, (__vector unsigned char)__b); } #endif static inline __ATTRS_o_ai int vec_test_mask(__vector double __a, __vector unsigned long long __b) { return __builtin_s390_vtm((__vector unsigned char)__a, (__vector unsigned char)__b); } /*-- vec_madd ---------------------------------------------------------------*/ #if __ARCH__ >= 12 static inline __ATTRS_o_ai __vector float vec_madd(__vector float __a, __vector float __b, __vector float __c) { return __builtin_s390_vfmasb(__a, __b, __c); } #endif static inline __ATTRS_o_ai __vector double vec_madd(__vector double __a, __vector double __b, __vector double __c) { return __builtin_s390_vfmadb(__a, __b, __c); } /*-- vec_msub ---------------------------------------------------------------*/ #if __ARCH__ >= 12 static inline __ATTRS_o_ai __vector float vec_msub(__vector float __a, __vector float __b, __vector float __c) { return __builtin_s390_vfmssb(__a, __b, __c); } #endif static inline __ATTRS_o_ai __vector double vec_msub(__vector double __a, __vector double __b, __vector double __c) { return __builtin_s390_vfmsdb(__a, __b, __c); } /*-- vec_nmadd ---------------------------------------------------------------*/ #if __ARCH__ >= 12 static inline __ATTRS_o_ai __vector float vec_nmadd(__vector float __a, __vector float __b, __vector float __c) { return __builtin_s390_vfnmasb(__a, __b, __c); } static inline __ATTRS_o_ai __vector double vec_nmadd(__vector double __a, __vector double __b, __vector double __c) { return __builtin_s390_vfnmadb(__a, __b, __c); } #endif /*-- vec_nmsub ---------------------------------------------------------------*/ #if __ARCH__ >= 12 static inline __ATTRS_o_ai __vector float vec_nmsub(__vector float __a, __vector float __b, __vector float __c) { return __builtin_s390_vfnmssb(__a, __b, __c); } static inline __ATTRS_o_ai __vector double vec_nmsub(__vector double __a, __vector double __b, __vector double __c) { return __builtin_s390_vfnmsdb(__a, __b, __c); } #endif /*-- vec_sqrt ---------------------------------------------------------------*/ #if __ARCH__ >= 12 static inline __ATTRS_o_ai __vector float vec_sqrt(__vector float __a) { return __builtin_s390_vfsqsb(__a); } #endif static inline __ATTRS_o_ai __vector double vec_sqrt(__vector double __a) { return __builtin_s390_vfsqdb(__a); } /*-- vec_ld2f ---------------------------------------------------------------*/ // This prototype is deprecated. static inline __ATTRS_ai __vector double vec_ld2f(const float *__ptr) { typedef float __v2f32 __attribute__((__vector_size__(8))); return __builtin_convertvector(*(const __v2f32 *)__ptr, __vector double); } /*-- vec_st2f ---------------------------------------------------------------*/ // This prototype is deprecated. static inline __ATTRS_ai void vec_st2f(__vector double __a, float *__ptr) { typedef float __v2f32 __attribute__((__vector_size__(8))); *(__v2f32 *)__ptr = __builtin_convertvector(__a, __v2f32); } /*-- vec_ctd ----------------------------------------------------------------*/ // This prototype is deprecated. static inline __ATTRS_o_ai __vector double vec_ctd(__vector signed long long __a, int __b) __constant_range(__b, 0, 31) { __vector double __conv = __builtin_convertvector(__a, __vector double); __conv *= ((__vector double)(__vector unsigned long long) ((0x3ffULL - __b) << 52)); return __conv; } // This prototype is deprecated. static inline __ATTRS_o_ai __vector double vec_ctd(__vector unsigned long long __a, int __b) __constant_range(__b, 0, 31) { __vector double __conv = __builtin_convertvector(__a, __vector double); __conv *= ((__vector double)(__vector unsigned long long) ((0x3ffULL - __b) << 52)); return __conv; } /*-- vec_ctsl ---------------------------------------------------------------*/ // This prototype is deprecated. static inline __ATTRS_o_ai __vector signed long long vec_ctsl(__vector double __a, int __b) __constant_range(__b, 0, 31) { __a *= ((__vector double)(__vector unsigned long long) ((0x3ffULL + __b) << 52)); return __builtin_convertvector(__a, __vector signed long long); } /*-- vec_ctul ---------------------------------------------------------------*/ // This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned long long vec_ctul(__vector double __a, int __b) __constant_range(__b, 0, 31) { __a *= ((__vector double)(__vector unsigned long long) ((0x3ffULL + __b) << 52)); return __builtin_convertvector(__a, __vector unsigned long long); } /*-- vec_doublee ------------------------------------------------------------*/ #if __ARCH__ >= 12 static inline __ATTRS_ai __vector double vec_doublee(__vector float __a) { typedef float __v2f32 __attribute__((__vector_size__(8))); __v2f32 __pack = __builtin_shufflevector(__a, __a, 0, 2); return __builtin_convertvector(__pack, __vector double); } #endif /*-- vec_floate -------------------------------------------------------------*/ #if __ARCH__ >= 12 static inline __ATTRS_ai __vector float vec_floate(__vector double __a) { typedef float __v2f32 __attribute__((__vector_size__(8))); __v2f32 __pack = __builtin_convertvector(__a, __v2f32); return __builtin_shufflevector(__pack, __pack, 0, -1, 1, -1); } #endif /*-- vec_double -------------------------------------------------------------*/ static inline __ATTRS_o_ai __vector double vec_double(__vector signed long long __a) { return __builtin_convertvector(__a, __vector double); } static inline __ATTRS_o_ai __vector double vec_double(__vector unsigned long long __a) { return __builtin_convertvector(__a, __vector double); } /*-- vec_float --------------------------------------------------------------*/ #if __ARCH__ >= 13 static inline __ATTRS_o_ai __vector float vec_float(__vector signed int __a) { return __builtin_convertvector(__a, __vector float); } static inline __ATTRS_o_ai __vector float vec_float(__vector unsigned int __a) { return __builtin_convertvector(__a, __vector float); } #endif /*-- vec_signed -------------------------------------------------------------*/ static inline __ATTRS_o_ai __vector signed long long vec_signed(__vector double __a) { return __builtin_convertvector(__a, __vector signed long long); } #if __ARCH__ >= 13 static inline __ATTRS_o_ai __vector signed int vec_signed(__vector float __a) { return __builtin_convertvector(__a, __vector signed int); } #endif /*-- vec_unsigned -----------------------------------------------------------*/ static inline __ATTRS_o_ai __vector unsigned long long vec_unsigned(__vector double __a) { return __builtin_convertvector(__a, __vector unsigned long long); } #if __ARCH__ >= 13 static inline __ATTRS_o_ai __vector unsigned int vec_unsigned(__vector float __a) { return __builtin_convertvector(__a, __vector unsigned int); } #endif /*-- vec_roundp -------------------------------------------------------------*/ #if __ARCH__ >= 12 static inline __ATTRS_o_ai __vector float vec_roundp(__vector float __a) { return __builtin_s390_vfisb(__a, 4, 6); } #endif static inline __ATTRS_o_ai __vector double vec_roundp(__vector double __a) { return __builtin_s390_vfidb(__a, 4, 6); } /*-- vec_ceil ---------------------------------------------------------------*/ #if __ARCH__ >= 12 static inline __ATTRS_o_ai __vector float vec_ceil(__vector float __a) { // On this platform, vec_ceil never triggers the IEEE-inexact exception. return __builtin_s390_vfisb(__a, 4, 6); } #endif static inline __ATTRS_o_ai __vector double vec_ceil(__vector double __a) { // On this platform, vec_ceil never triggers the IEEE-inexact exception. return __builtin_s390_vfidb(__a, 4, 6); } /*-- vec_roundm -------------------------------------------------------------*/ #if __ARCH__ >= 12 static inline __ATTRS_o_ai __vector float vec_roundm(__vector float __a) { return __builtin_s390_vfisb(__a, 4, 7); } #endif static inline __ATTRS_o_ai __vector double vec_roundm(__vector double __a) { return __builtin_s390_vfidb(__a, 4, 7); } /*-- vec_floor --------------------------------------------------------------*/ #if __ARCH__ >= 12 static inline __ATTRS_o_ai __vector float vec_floor(__vector float __a) { // On this platform, vec_floor never triggers the IEEE-inexact exception. return __builtin_s390_vfisb(__a, 4, 7); } #endif static inline __ATTRS_o_ai __vector double vec_floor(__vector double __a) { // On this platform, vec_floor never triggers the IEEE-inexact exception. return __builtin_s390_vfidb(__a, 4, 7); } /*-- vec_roundz -------------------------------------------------------------*/ #if __ARCH__ >= 12 static inline __ATTRS_o_ai __vector float vec_roundz(__vector float __a) { return __builtin_s390_vfisb(__a, 4, 5); } #endif static inline __ATTRS_o_ai __vector double vec_roundz(__vector double __a) { return __builtin_s390_vfidb(__a, 4, 5); } /*-- vec_trunc --------------------------------------------------------------*/ #if __ARCH__ >= 12 static inline __ATTRS_o_ai __vector float vec_trunc(__vector float __a) { // On this platform, vec_trunc never triggers the IEEE-inexact exception. return __builtin_s390_vfisb(__a, 4, 5); } #endif static inline __ATTRS_o_ai __vector double vec_trunc(__vector double __a) { // On this platform, vec_trunc never triggers the IEEE-inexact exception. return __builtin_s390_vfidb(__a, 4, 5); } /*-- vec_roundc -------------------------------------------------------------*/ #if __ARCH__ >= 12 static inline __ATTRS_o_ai __vector float vec_roundc(__vector float __a) { return __builtin_s390_vfisb(__a, 4, 0); } #endif static inline __ATTRS_o_ai __vector double vec_roundc(__vector double __a) { return __builtin_s390_vfidb(__a, 4, 0); } /*-- vec_rint ---------------------------------------------------------------*/ #if __ARCH__ >= 12 static inline __ATTRS_o_ai __vector float vec_rint(__vector float __a) { // vec_rint may trigger the IEEE-inexact exception. return __builtin_s390_vfisb(__a, 0, 0); } #endif static inline __ATTRS_o_ai __vector double vec_rint(__vector double __a) { // vec_rint may trigger the IEEE-inexact exception. return __builtin_s390_vfidb(__a, 0, 0); } /*-- vec_round --------------------------------------------------------------*/ #if __ARCH__ >= 12 static inline __ATTRS_o_ai __vector float vec_round(__vector float __a) { return __builtin_s390_vfisb(__a, 4, 4); } #endif static inline __ATTRS_o_ai __vector double vec_round(__vector double __a) { return __builtin_s390_vfidb(__a, 4, 4); } /*-- vec_fp_test_data_class -------------------------------------------------*/ #if __ARCH__ >= 12 extern __ATTRS_o __vector __bool int vec_fp_test_data_class(__vector float __a, int __b, int *__c) __constant_range(__b, 0, 4095); extern __ATTRS_o __vector __bool long long vec_fp_test_data_class(__vector double __a, int __b, int *__c) __constant_range(__b, 0, 4095); #define vec_fp_test_data_class(X, Y, Z) \ ((__typeof__((vec_fp_test_data_class)((X), (Y), (Z)))) \ __extension__ ({ \ __vector unsigned char __res; \ __vector unsigned char __x = (__vector unsigned char)(X); \ int *__z = (Z); \ switch (sizeof ((X)[0])) { \ case 4: __res = (__vector unsigned char) \ __builtin_s390_vftcisb((__vector float)__x, (Y), __z); \ break; \ default: __res = (__vector unsigned char) \ __builtin_s390_vftcidb((__vector double)__x, (Y), __z); \ break; \ } __res; })) #else #define vec_fp_test_data_class(X, Y, Z) \ ((__vector __bool long long)__builtin_s390_vftcidb((X), (Y), (Z))) #endif #define __VEC_CLASS_FP_ZERO_P (1 << 11) #define __VEC_CLASS_FP_ZERO_N (1 << 10) #define __VEC_CLASS_FP_ZERO (__VEC_CLASS_FP_ZERO_P | __VEC_CLASS_FP_ZERO_N) #define __VEC_CLASS_FP_NORMAL_P (1 << 9) #define __VEC_CLASS_FP_NORMAL_N (1 << 8) #define __VEC_CLASS_FP_NORMAL (__VEC_CLASS_FP_NORMAL_P | \ __VEC_CLASS_FP_NORMAL_N) #define __VEC_CLASS_FP_SUBNORMAL_P (1 << 7) #define __VEC_CLASS_FP_SUBNORMAL_N (1 << 6) #define __VEC_CLASS_FP_SUBNORMAL (__VEC_CLASS_FP_SUBNORMAL_P | \ __VEC_CLASS_FP_SUBNORMAL_N) #define __VEC_CLASS_FP_INFINITY_P (1 << 5) #define __VEC_CLASS_FP_INFINITY_N (1 << 4) #define __VEC_CLASS_FP_INFINITY (__VEC_CLASS_FP_INFINITY_P | \ __VEC_CLASS_FP_INFINITY_N) #define __VEC_CLASS_FP_QNAN_P (1 << 3) #define __VEC_CLASS_FP_QNAN_N (1 << 2) #define __VEC_CLASS_FP_QNAN (__VEC_CLASS_FP_QNAN_P | __VEC_CLASS_FP_QNAN_N) #define __VEC_CLASS_FP_SNAN_P (1 << 1) #define __VEC_CLASS_FP_SNAN_N (1 << 0) #define __VEC_CLASS_FP_SNAN (__VEC_CLASS_FP_SNAN_P | __VEC_CLASS_FP_SNAN_N) #define __VEC_CLASS_FP_NAN (__VEC_CLASS_FP_QNAN | __VEC_CLASS_FP_SNAN) #define __VEC_CLASS_FP_NOT_NORMAL (__VEC_CLASS_FP_NAN | \ __VEC_CLASS_FP_SUBNORMAL | \ __VEC_CLASS_FP_ZERO | \ __VEC_CLASS_FP_INFINITY) /*-- vec_extend_to_fp32_hi --------------------------------------------------*/ #if __ARCH__ >= 14 #define vec_extend_to_fp32_hi(X, W) \ ((__vector float)__builtin_s390_vclfnhs((X), (W))); #endif /*-- vec_extend_to_fp32_hi --------------------------------------------------*/ #if __ARCH__ >= 14 #define vec_extend_to_fp32_lo(X, W) \ ((__vector float)__builtin_s390_vclfnls((X), (W))); #endif /*-- vec_round_from_fp32 ----------------------------------------------------*/ #if __ARCH__ >= 14 #define vec_round_from_fp32(X, Y, W) \ ((__vector unsigned short)__builtin_s390_vcrnfs((X), (Y), (W))); #endif /*-- vec_convert_to_fp16 ----------------------------------------------------*/ #if __ARCH__ >= 14 #define vec_convert_to_fp16(X, W) \ ((__vector unsigned short)__builtin_s390_vcfn((X), (W))); #endif /*-- vec_convert_from_fp16 --------------------------------------------------*/ #if __ARCH__ >= 14 #define vec_convert_from_fp16(X, W) \ ((__vector unsigned short)__builtin_s390_vcnf((X), (W))); #endif /*-- vec_cp_until_zero ------------------------------------------------------*/ static inline __ATTRS_o_ai __vector signed char vec_cp_until_zero(__vector signed char __a) { return ((__vector signed char) __builtin_s390_vistrb((__vector unsigned char)__a)); } static inline __ATTRS_o_ai __vector __bool char vec_cp_until_zero(__vector __bool char __a) { return ((__vector __bool char) __builtin_s390_vistrb((__vector unsigned char)__a)); } static inline __ATTRS_o_ai __vector unsigned char vec_cp_until_zero(__vector unsigned char __a) { return __builtin_s390_vistrb(__a); } static inline __ATTRS_o_ai __vector signed short vec_cp_until_zero(__vector signed short __a) { return ((__vector signed short) __builtin_s390_vistrh((__vector unsigned short)__a)); } static inline __ATTRS_o_ai __vector __bool short vec_cp_until_zero(__vector __bool short __a) { return ((__vector __bool short) __builtin_s390_vistrh((__vector unsigned short)__a)); } static inline __ATTRS_o_ai __vector unsigned short vec_cp_until_zero(__vector unsigned short __a) { return __builtin_s390_vistrh(__a); } static inline __ATTRS_o_ai __vector signed int vec_cp_until_zero(__vector signed int __a) { return ((__vector signed int) __builtin_s390_vistrf((__vector unsigned int)__a)); } static inline __ATTRS_o_ai __vector __bool int vec_cp_until_zero(__vector __bool int __a) { return ((__vector __bool int) __builtin_s390_vistrf((__vector unsigned int)__a)); } static inline __ATTRS_o_ai __vector unsigned int vec_cp_until_zero(__vector unsigned int __a) { return __builtin_s390_vistrf(__a); } /*-- vec_cp_until_zero_cc ---------------------------------------------------*/ static inline __ATTRS_o_ai __vector signed char vec_cp_until_zero_cc(__vector signed char __a, int *__cc) { return (__vector signed char) __builtin_s390_vistrbs((__vector unsigned char)__a, __cc); } static inline __ATTRS_o_ai __vector __bool char vec_cp_until_zero_cc(__vector __bool char __a, int *__cc) { return (__vector __bool char) __builtin_s390_vistrbs((__vector unsigned char)__a, __cc); } static inline __ATTRS_o_ai __vector unsigned char vec_cp_until_zero_cc(__vector unsigned char __a, int *__cc) { return __builtin_s390_vistrbs(__a, __cc); } static inline __ATTRS_o_ai __vector signed short vec_cp_until_zero_cc(__vector signed short __a, int *__cc) { return (__vector signed short) __builtin_s390_vistrhs((__vector unsigned short)__a, __cc); } static inline __ATTRS_o_ai __vector __bool short vec_cp_until_zero_cc(__vector __bool short __a, int *__cc) { return (__vector __bool short) __builtin_s390_vistrhs((__vector unsigned short)__a, __cc); } static inline __ATTRS_o_ai __vector unsigned short vec_cp_until_zero_cc(__vector unsigned short __a, int *__cc) { return __builtin_s390_vistrhs(__a, __cc); } static inline __ATTRS_o_ai __vector signed int vec_cp_until_zero_cc(__vector signed int __a, int *__cc) { return (__vector signed int) __builtin_s390_vistrfs((__vector unsigned int)__a, __cc); } static inline __ATTRS_o_ai __vector __bool int vec_cp_until_zero_cc(__vector __bool int __a, int *__cc) { return (__vector __bool int) __builtin_s390_vistrfs((__vector unsigned int)__a, __cc); } static inline __ATTRS_o_ai __vector unsigned int vec_cp_until_zero_cc(__vector unsigned int __a, int *__cc) { return __builtin_s390_vistrfs(__a, __cc); } /*-- vec_cmpeq_idx ----------------------------------------------------------*/ static inline __ATTRS_o_ai __vector signed char vec_cmpeq_idx(__vector signed char __a, __vector signed char __b) { return (__vector signed char) __builtin_s390_vfeeb((__vector unsigned char)__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai __vector unsigned char vec_cmpeq_idx(__vector __bool char __a, __vector __bool char __b) { return __builtin_s390_vfeeb((__vector unsigned char)__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai __vector unsigned char vec_cmpeq_idx(__vector unsigned char __a, __vector unsigned char __b) { return __builtin_s390_vfeeb(__a, __b); } static inline __ATTRS_o_ai __vector signed short vec_cmpeq_idx(__vector signed short __a, __vector signed short __b) { return (__vector signed short) __builtin_s390_vfeeh((__vector unsigned short)__a, (__vector unsigned short)__b); } static inline __ATTRS_o_ai __vector unsigned short vec_cmpeq_idx(__vector __bool short __a, __vector __bool short __b) { return __builtin_s390_vfeeh((__vector unsigned short)__a, (__vector unsigned short)__b); } static inline __ATTRS_o_ai __vector unsigned short vec_cmpeq_idx(__vector unsigned short __a, __vector unsigned short __b) { return __builtin_s390_vfeeh(__a, __b); } static inline __ATTRS_o_ai __vector signed int vec_cmpeq_idx(__vector signed int __a, __vector signed int __b) { return (__vector signed int) __builtin_s390_vfeef((__vector unsigned int)__a, (__vector unsigned int)__b); } static inline __ATTRS_o_ai __vector unsigned int vec_cmpeq_idx(__vector __bool int __a, __vector __bool int __b) { return __builtin_s390_vfeef((__vector unsigned int)__a, (__vector unsigned int)__b); } static inline __ATTRS_o_ai __vector unsigned int vec_cmpeq_idx(__vector unsigned int __a, __vector unsigned int __b) { return __builtin_s390_vfeef(__a, __b); } /*-- vec_cmpeq_idx_cc -------------------------------------------------------*/ static inline __ATTRS_o_ai __vector signed char vec_cmpeq_idx_cc(__vector signed char __a, __vector signed char __b, int *__cc) { return (__vector signed char) __builtin_s390_vfeebs((__vector unsigned char)__a, (__vector unsigned char)__b, __cc); } static inline __ATTRS_o_ai __vector unsigned char vec_cmpeq_idx_cc(__vector __bool char __a, __vector __bool char __b, int *__cc) { return __builtin_s390_vfeebs((__vector unsigned char)__a, (__vector unsigned char)__b, __cc); } static inline __ATTRS_o_ai __vector unsigned char vec_cmpeq_idx_cc(__vector unsigned char __a, __vector unsigned char __b, int *__cc) { return __builtin_s390_vfeebs(__a, __b, __cc); } static inline __ATTRS_o_ai __vector signed short vec_cmpeq_idx_cc(__vector signed short __a, __vector signed short __b, int *__cc) { return (__vector signed short) __builtin_s390_vfeehs((__vector unsigned short)__a, (__vector unsigned short)__b, __cc); } static inline __ATTRS_o_ai __vector unsigned short vec_cmpeq_idx_cc(__vector __bool short __a, __vector __bool short __b, int *__cc) { return __builtin_s390_vfeehs((__vector unsigned short)__a, (__vector unsigned short)__b, __cc); } static inline __ATTRS_o_ai __vector unsigned short vec_cmpeq_idx_cc(__vector unsigned short __a, __vector unsigned short __b, int *__cc) { return __builtin_s390_vfeehs(__a, __b, __cc); } static inline __ATTRS_o_ai __vector signed int vec_cmpeq_idx_cc(__vector signed int __a, __vector signed int __b, int *__cc) { return (__vector signed int) __builtin_s390_vfeefs((__vector unsigned int)__a, (__vector unsigned int)__b, __cc); } static inline __ATTRS_o_ai __vector unsigned int vec_cmpeq_idx_cc(__vector __bool int __a, __vector __bool int __b, int *__cc) { return __builtin_s390_vfeefs((__vector unsigned int)__a, (__vector unsigned int)__b, __cc); } static inline __ATTRS_o_ai __vector unsigned int vec_cmpeq_idx_cc(__vector unsigned int __a, __vector unsigned int __b, int *__cc) { return __builtin_s390_vfeefs(__a, __b, __cc); } /*-- vec_cmpeq_or_0_idx -----------------------------------------------------*/ static inline __ATTRS_o_ai __vector signed char vec_cmpeq_or_0_idx(__vector signed char __a, __vector signed char __b) { return (__vector signed char) __builtin_s390_vfeezb((__vector unsigned char)__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai __vector unsigned char vec_cmpeq_or_0_idx(__vector __bool char __a, __vector __bool char __b) { return __builtin_s390_vfeezb((__vector unsigned char)__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai __vector unsigned char vec_cmpeq_or_0_idx(__vector unsigned char __a, __vector unsigned char __b) { return __builtin_s390_vfeezb(__a, __b); } static inline __ATTRS_o_ai __vector signed short vec_cmpeq_or_0_idx(__vector signed short __a, __vector signed short __b) { return (__vector signed short) __builtin_s390_vfeezh((__vector unsigned short)__a, (__vector unsigned short)__b); } static inline __ATTRS_o_ai __vector unsigned short vec_cmpeq_or_0_idx(__vector __bool short __a, __vector __bool short __b) { return __builtin_s390_vfeezh((__vector unsigned short)__a, (__vector unsigned short)__b); } static inline __ATTRS_o_ai __vector unsigned short vec_cmpeq_or_0_idx(__vector unsigned short __a, __vector unsigned short __b) { return __builtin_s390_vfeezh(__a, __b); } static inline __ATTRS_o_ai __vector signed int vec_cmpeq_or_0_idx(__vector signed int __a, __vector signed int __b) { return (__vector signed int) __builtin_s390_vfeezf((__vector unsigned int)__a, (__vector unsigned int)__b); } static inline __ATTRS_o_ai __vector unsigned int vec_cmpeq_or_0_idx(__vector __bool int __a, __vector __bool int __b) { return __builtin_s390_vfeezf((__vector unsigned int)__a, (__vector unsigned int)__b); } static inline __ATTRS_o_ai __vector unsigned int vec_cmpeq_or_0_idx(__vector unsigned int __a, __vector unsigned int __b) { return __builtin_s390_vfeezf(__a, __b); } /*-- vec_cmpeq_or_0_idx_cc --------------------------------------------------*/ static inline __ATTRS_o_ai __vector signed char vec_cmpeq_or_0_idx_cc(__vector signed char __a, __vector signed char __b, int *__cc) { return (__vector signed char) __builtin_s390_vfeezbs((__vector unsigned char)__a, (__vector unsigned char)__b, __cc); } static inline __ATTRS_o_ai __vector unsigned char vec_cmpeq_or_0_idx_cc(__vector __bool char __a, __vector __bool char __b, int *__cc) { return __builtin_s390_vfeezbs((__vector unsigned char)__a, (__vector unsigned char)__b, __cc); } static inline __ATTRS_o_ai __vector unsigned char vec_cmpeq_or_0_idx_cc(__vector unsigned char __a, __vector unsigned char __b, int *__cc) { return __builtin_s390_vfeezbs(__a, __b, __cc); } static inline __ATTRS_o_ai __vector signed short vec_cmpeq_or_0_idx_cc(__vector signed short __a, __vector signed short __b, int *__cc) { return (__vector signed short) __builtin_s390_vfeezhs((__vector unsigned short)__a, (__vector unsigned short)__b, __cc); } static inline __ATTRS_o_ai __vector unsigned short vec_cmpeq_or_0_idx_cc(__vector __bool short __a, __vector __bool short __b, int *__cc) { return __builtin_s390_vfeezhs((__vector unsigned short)__a, (__vector unsigned short)__b, __cc); } static inline __ATTRS_o_ai __vector unsigned short vec_cmpeq_or_0_idx_cc(__vector unsigned short __a, __vector unsigned short __b, int *__cc) { return __builtin_s390_vfeezhs(__a, __b, __cc); } static inline __ATTRS_o_ai __vector signed int vec_cmpeq_or_0_idx_cc(__vector signed int __a, __vector signed int __b, int *__cc) { return (__vector signed int) __builtin_s390_vfeezfs((__vector unsigned int)__a, (__vector unsigned int)__b, __cc); } static inline __ATTRS_o_ai __vector unsigned int vec_cmpeq_or_0_idx_cc(__vector __bool int __a, __vector __bool int __b, int *__cc) { return __builtin_s390_vfeezfs((__vector unsigned int)__a, (__vector unsigned int)__b, __cc); } static inline __ATTRS_o_ai __vector unsigned int vec_cmpeq_or_0_idx_cc(__vector unsigned int __a, __vector unsigned int __b, int *__cc) { return __builtin_s390_vfeezfs(__a, __b, __cc); } /*-- vec_cmpne_idx ----------------------------------------------------------*/ static inline __ATTRS_o_ai __vector signed char vec_cmpne_idx(__vector signed char __a, __vector signed char __b) { return (__vector signed char) __builtin_s390_vfeneb((__vector unsigned char)__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai __vector unsigned char vec_cmpne_idx(__vector __bool char __a, __vector __bool char __b) { return __builtin_s390_vfeneb((__vector unsigned char)__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai __vector unsigned char vec_cmpne_idx(__vector unsigned char __a, __vector unsigned char __b) { return __builtin_s390_vfeneb(__a, __b); } static inline __ATTRS_o_ai __vector signed short vec_cmpne_idx(__vector signed short __a, __vector signed short __b) { return (__vector signed short) __builtin_s390_vfeneh((__vector unsigned short)__a, (__vector unsigned short)__b); } static inline __ATTRS_o_ai __vector unsigned short vec_cmpne_idx(__vector __bool short __a, __vector __bool short __b) { return __builtin_s390_vfeneh((__vector unsigned short)__a, (__vector unsigned short)__b); } static inline __ATTRS_o_ai __vector unsigned short vec_cmpne_idx(__vector unsigned short __a, __vector unsigned short __b) { return __builtin_s390_vfeneh(__a, __b); } static inline __ATTRS_o_ai __vector signed int vec_cmpne_idx(__vector signed int __a, __vector signed int __b) { return (__vector signed int) __builtin_s390_vfenef((__vector unsigned int)__a, (__vector unsigned int)__b); } static inline __ATTRS_o_ai __vector unsigned int vec_cmpne_idx(__vector __bool int __a, __vector __bool int __b) { return __builtin_s390_vfenef((__vector unsigned int)__a, (__vector unsigned int)__b); } static inline __ATTRS_o_ai __vector unsigned int vec_cmpne_idx(__vector unsigned int __a, __vector unsigned int __b) { return __builtin_s390_vfenef(__a, __b); } /*-- vec_cmpne_idx_cc -------------------------------------------------------*/ static inline __ATTRS_o_ai __vector signed char vec_cmpne_idx_cc(__vector signed char __a, __vector signed char __b, int *__cc) { return (__vector signed char) __builtin_s390_vfenebs((__vector unsigned char)__a, (__vector unsigned char)__b, __cc); } static inline __ATTRS_o_ai __vector unsigned char vec_cmpne_idx_cc(__vector __bool char __a, __vector __bool char __b, int *__cc) { return __builtin_s390_vfenebs((__vector unsigned char)__a, (__vector unsigned char)__b, __cc); } static inline __ATTRS_o_ai __vector unsigned char vec_cmpne_idx_cc(__vector unsigned char __a, __vector unsigned char __b, int *__cc) { return __builtin_s390_vfenebs(__a, __b, __cc); } static inline __ATTRS_o_ai __vector signed short vec_cmpne_idx_cc(__vector signed short __a, __vector signed short __b, int *__cc) { return (__vector signed short) __builtin_s390_vfenehs((__vector unsigned short)__a, (__vector unsigned short)__b, __cc); } static inline __ATTRS_o_ai __vector unsigned short vec_cmpne_idx_cc(__vector __bool short __a, __vector __bool short __b, int *__cc) { return __builtin_s390_vfenehs((__vector unsigned short)__a, (__vector unsigned short)__b, __cc); } static inline __ATTRS_o_ai __vector unsigned short vec_cmpne_idx_cc(__vector unsigned short __a, __vector unsigned short __b, int *__cc) { return __builtin_s390_vfenehs(__a, __b, __cc); } static inline __ATTRS_o_ai __vector signed int vec_cmpne_idx_cc(__vector signed int __a, __vector signed int __b, int *__cc) { return (__vector signed int) __builtin_s390_vfenefs((__vector unsigned int)__a, (__vector unsigned int)__b, __cc); } static inline __ATTRS_o_ai __vector unsigned int vec_cmpne_idx_cc(__vector __bool int __a, __vector __bool int __b, int *__cc) { return __builtin_s390_vfenefs((__vector unsigned int)__a, (__vector unsigned int)__b, __cc); } static inline __ATTRS_o_ai __vector unsigned int vec_cmpne_idx_cc(__vector unsigned int __a, __vector unsigned int __b, int *__cc) { return __builtin_s390_vfenefs(__a, __b, __cc); } /*-- vec_cmpne_or_0_idx -----------------------------------------------------*/ static inline __ATTRS_o_ai __vector signed char vec_cmpne_or_0_idx(__vector signed char __a, __vector signed char __b) { return (__vector signed char) __builtin_s390_vfenezb((__vector unsigned char)__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai __vector unsigned char vec_cmpne_or_0_idx(__vector __bool char __a, __vector __bool char __b) { return __builtin_s390_vfenezb((__vector unsigned char)__a, (__vector unsigned char)__b); } static inline __ATTRS_o_ai __vector unsigned char vec_cmpne_or_0_idx(__vector unsigned char __a, __vector unsigned char __b) { return __builtin_s390_vfenezb(__a, __b); } static inline __ATTRS_o_ai __vector signed short vec_cmpne_or_0_idx(__vector signed short __a, __vector signed short __b) { return (__vector signed short) __builtin_s390_vfenezh((__vector unsigned short)__a, (__vector unsigned short)__b); } static inline __ATTRS_o_ai __vector unsigned short vec_cmpne_or_0_idx(__vector __bool short __a, __vector __bool short __b) { return __builtin_s390_vfenezh((__vector unsigned short)__a, (__vector unsigned short)__b); } static inline __ATTRS_o_ai __vector unsigned short vec_cmpne_or_0_idx(__vector unsigned short __a, __vector unsigned short __b) { return __builtin_s390_vfenezh(__a, __b); } static inline __ATTRS_o_ai __vector signed int vec_cmpne_or_0_idx(__vector signed int __a, __vector signed int __b) { return (__vector signed int) __builtin_s390_vfenezf((__vector unsigned int)__a, (__vector unsigned int)__b); } static inline __ATTRS_o_ai __vector unsigned int vec_cmpne_or_0_idx(__vector __bool int __a, __vector __bool int __b) { return __builtin_s390_vfenezf((__vector unsigned int)__a, (__vector unsigned int)__b); } static inline __ATTRS_o_ai __vector unsigned int vec_cmpne_or_0_idx(__vector unsigned int __a, __vector unsigned int __b) { return __builtin_s390_vfenezf(__a, __b); } /*-- vec_cmpne_or_0_idx_cc --------------------------------------------------*/ static inline __ATTRS_o_ai __vector signed char vec_cmpne_or_0_idx_cc(__vector signed char __a, __vector signed char __b, int *__cc) { return (__vector signed char) __builtin_s390_vfenezbs((__vector unsigned char)__a, (__vector unsigned char)__b, __cc); } static inline __ATTRS_o_ai __vector unsigned char vec_cmpne_or_0_idx_cc(__vector __bool char __a, __vector __bool char __b, int *__cc) { return __builtin_s390_vfenezbs((__vector unsigned char)__a, (__vector unsigned char)__b, __cc); } static inline __ATTRS_o_ai __vector unsigned char vec_cmpne_or_0_idx_cc(__vector unsigned char __a, __vector unsigned char __b, int *__cc) { return __builtin_s390_vfenezbs(__a, __b, __cc); } static inline __ATTRS_o_ai __vector signed short vec_cmpne_or_0_idx_cc(__vector signed short __a, __vector signed short __b, int *__cc) { return (__vector signed short) __builtin_s390_vfenezhs((__vector unsigned short)__a, (__vector unsigned short)__b, __cc); } static inline __ATTRS_o_ai __vector unsigned short vec_cmpne_or_0_idx_cc(__vector __bool short __a, __vector __bool short __b, int *__cc) { return __builtin_s390_vfenezhs((__vector unsigned short)__a, (__vector unsigned short)__b, __cc); } static inline __ATTRS_o_ai __vector unsigned short vec_cmpne_or_0_idx_cc(__vector unsigned short __a, __vector unsigned short __b, int *__cc) { return __builtin_s390_vfenezhs(__a, __b, __cc); } static inline __ATTRS_o_ai __vector signed int vec_cmpne_or_0_idx_cc(__vector signed int __a, __vector signed int __b, int *__cc) { return (__vector signed int) __builtin_s390_vfenezfs((__vector unsigned int)__a, (__vector unsigned int)__b, __cc); } static inline __ATTRS_o_ai __vector unsigned int vec_cmpne_or_0_idx_cc(__vector __bool int __a, __vector __bool int __b, int *__cc) { return __builtin_s390_vfenezfs((__vector unsigned int)__a, (__vector unsigned int)__b, __cc); } static inline __ATTRS_o_ai __vector unsigned int vec_cmpne_or_0_idx_cc(__vector unsigned int __a, __vector unsigned int __b, int *__cc) { return __builtin_s390_vfenezfs(__a, __b, __cc); } /*-- vec_cmprg --------------------------------------------------------------*/ static inline __ATTRS_o_ai __vector __bool char vec_cmprg(__vector unsigned char __a, __vector unsigned char __b, __vector unsigned char __c) { return (__vector __bool char)__builtin_s390_vstrcb(__a, __b, __c, 4); } static inline __ATTRS_o_ai __vector __bool short vec_cmprg(__vector unsigned short __a, __vector unsigned short __b, __vector unsigned short __c) { return (__vector __bool short)__builtin_s390_vstrch(__a, __b, __c, 4); } static inline __ATTRS_o_ai __vector __bool int vec_cmprg(__vector unsigned int __a, __vector unsigned int __b, __vector unsigned int __c) { return (__vector __bool int)__builtin_s390_vstrcf(__a, __b, __c, 4); } /*-- vec_cmprg_cc -----------------------------------------------------------*/ static inline __ATTRS_o_ai __vector __bool char vec_cmprg_cc(__vector unsigned char __a, __vector unsigned char __b, __vector unsigned char __c, int *__cc) { return (__vector __bool char)__builtin_s390_vstrcbs(__a, __b, __c, 4, __cc); } static inline __ATTRS_o_ai __vector __bool short vec_cmprg_cc(__vector unsigned short __a, __vector unsigned short __b, __vector unsigned short __c, int *__cc) { return (__vector __bool short)__builtin_s390_vstrchs(__a, __b, __c, 4, __cc); } static inline __ATTRS_o_ai __vector __bool int vec_cmprg_cc(__vector unsigned int __a, __vector unsigned int __b, __vector unsigned int __c, int *__cc) { return (__vector __bool int)__builtin_s390_vstrcfs(__a, __b, __c, 4, __cc); } /*-- vec_cmprg_idx ----------------------------------------------------------*/ static inline __ATTRS_o_ai __vector unsigned char vec_cmprg_idx(__vector unsigned char __a, __vector unsigned char __b, __vector unsigned char __c) { return __builtin_s390_vstrcb(__a, __b, __c, 0); } static inline __ATTRS_o_ai __vector unsigned short vec_cmprg_idx(__vector unsigned short __a, __vector unsigned short __b, __vector unsigned short __c) { return __builtin_s390_vstrch(__a, __b, __c, 0); } static inline __ATTRS_o_ai __vector unsigned int vec_cmprg_idx(__vector unsigned int __a, __vector unsigned int __b, __vector unsigned int __c) { return __builtin_s390_vstrcf(__a, __b, __c, 0); } /*-- vec_cmprg_idx_cc -------------------------------------------------------*/ static inline __ATTRS_o_ai __vector unsigned char vec_cmprg_idx_cc(__vector unsigned char __a, __vector unsigned char __b, __vector unsigned char __c, int *__cc) { return __builtin_s390_vstrcbs(__a, __b, __c, 0, __cc); } static inline __ATTRS_o_ai __vector unsigned short vec_cmprg_idx_cc(__vector unsigned short __a, __vector unsigned short __b, __vector unsigned short __c, int *__cc) { return __builtin_s390_vstrchs(__a, __b, __c, 0, __cc); } static inline __ATTRS_o_ai __vector unsigned int vec_cmprg_idx_cc(__vector unsigned int __a, __vector unsigned int __b, __vector unsigned int __c, int *__cc) { return __builtin_s390_vstrcfs(__a, __b, __c, 0, __cc); } /*-- vec_cmprg_or_0_idx -----------------------------------------------------*/ static inline __ATTRS_o_ai __vector unsigned char vec_cmprg_or_0_idx(__vector unsigned char __a, __vector unsigned char __b, __vector unsigned char __c) { return __builtin_s390_vstrczb(__a, __b, __c, 0); } static inline __ATTRS_o_ai __vector unsigned short vec_cmprg_or_0_idx(__vector unsigned short __a, __vector unsigned short __b, __vector unsigned short __c) { return __builtin_s390_vstrczh(__a, __b, __c, 0); } static inline __ATTRS_o_ai __vector unsigned int vec_cmprg_or_0_idx(__vector unsigned int __a, __vector unsigned int __b, __vector unsigned int __c) { return __builtin_s390_vstrczf(__a, __b, __c, 0); } /*-- vec_cmprg_or_0_idx_cc --------------------------------------------------*/ static inline __ATTRS_o_ai __vector unsigned char vec_cmprg_or_0_idx_cc(__vector unsigned char __a, __vector unsigned char __b, __vector unsigned char __c, int *__cc) { return __builtin_s390_vstrczbs(__a, __b, __c, 0, __cc); } static inline __ATTRS_o_ai __vector unsigned short vec_cmprg_or_0_idx_cc(__vector unsigned short __a, __vector unsigned short __b, __vector unsigned short __c, int *__cc) { return __builtin_s390_vstrczhs(__a, __b, __c, 0, __cc); } static inline __ATTRS_o_ai __vector unsigned int vec_cmprg_or_0_idx_cc(__vector unsigned int __a, __vector unsigned int __b, __vector unsigned int __c, int *__cc) { return __builtin_s390_vstrczfs(__a, __b, __c, 0, __cc); } /*-- vec_cmpnrg -------------------------------------------------------------*/ static inline __ATTRS_o_ai __vector __bool char vec_cmpnrg(__vector unsigned char __a, __vector unsigned char __b, __vector unsigned char __c) { return (__vector __bool char)__builtin_s390_vstrcb(__a, __b, __c, 12); } static inline __ATTRS_o_ai __vector __bool short vec_cmpnrg(__vector unsigned short __a, __vector unsigned short __b, __vector unsigned short __c) { return (__vector __bool short)__builtin_s390_vstrch(__a, __b, __c, 12); } static inline __ATTRS_o_ai __vector __bool int vec_cmpnrg(__vector unsigned int __a, __vector unsigned int __b, __vector unsigned int __c) { return (__vector __bool int)__builtin_s390_vstrcf(__a, __b, __c, 12); } /*-- vec_cmpnrg_cc ----------------------------------------------------------*/ static inline __ATTRS_o_ai __vector __bool char vec_cmpnrg_cc(__vector unsigned char __a, __vector unsigned char __b, __vector unsigned char __c, int *__cc) { return (__vector __bool char) __builtin_s390_vstrcbs(__a, __b, __c, 12, __cc); } static inline __ATTRS_o_ai __vector __bool short vec_cmpnrg_cc(__vector unsigned short __a, __vector unsigned short __b, __vector unsigned short __c, int *__cc) { return (__vector __bool short) __builtin_s390_vstrchs(__a, __b, __c, 12, __cc); } static inline __ATTRS_o_ai __vector __bool int vec_cmpnrg_cc(__vector unsigned int __a, __vector unsigned int __b, __vector unsigned int __c, int *__cc) { return (__vector __bool int) __builtin_s390_vstrcfs(__a, __b, __c, 12, __cc); } /*-- vec_cmpnrg_idx ---------------------------------------------------------*/ static inline __ATTRS_o_ai __vector unsigned char vec_cmpnrg_idx(__vector unsigned char __a, __vector unsigned char __b, __vector unsigned char __c) { return __builtin_s390_vstrcb(__a, __b, __c, 8); } static inline __ATTRS_o_ai __vector unsigned short vec_cmpnrg_idx(__vector unsigned short __a, __vector unsigned short __b, __vector unsigned short __c) { return __builtin_s390_vstrch(__a, __b, __c, 8); } static inline __ATTRS_o_ai __vector unsigned int vec_cmpnrg_idx(__vector unsigned int __a, __vector unsigned int __b, __vector unsigned int __c) { return __builtin_s390_vstrcf(__a, __b, __c, 8); } /*-- vec_cmpnrg_idx_cc ------------------------------------------------------*/ static inline __ATTRS_o_ai __vector unsigned char vec_cmpnrg_idx_cc(__vector unsigned char __a, __vector unsigned char __b, __vector unsigned char __c, int *__cc) { return __builtin_s390_vstrcbs(__a, __b, __c, 8, __cc); } static inline __ATTRS_o_ai __vector unsigned short vec_cmpnrg_idx_cc(__vector unsigned short __a, __vector unsigned short __b, __vector unsigned short __c, int *__cc) { return __builtin_s390_vstrchs(__a, __b, __c, 8, __cc); } static inline __ATTRS_o_ai __vector unsigned int vec_cmpnrg_idx_cc(__vector unsigned int __a, __vector unsigned int __b, __vector unsigned int __c, int *__cc) { return __builtin_s390_vstrcfs(__a, __b, __c, 8, __cc); } /*-- vec_cmpnrg_or_0_idx ----------------------------------------------------*/ static inline __ATTRS_o_ai __vector unsigned char vec_cmpnrg_or_0_idx(__vector unsigned char __a, __vector unsigned char __b, __vector unsigned char __c) { return __builtin_s390_vstrczb(__a, __b, __c, 8); } static inline __ATTRS_o_ai __vector unsigned short vec_cmpnrg_or_0_idx(__vector unsigned short __a, __vector unsigned short __b, __vector unsigned short __c) { return __builtin_s390_vstrczh(__a, __b, __c, 8); } static inline __ATTRS_o_ai __vector unsigned int vec_cmpnrg_or_0_idx(__vector unsigned int __a, __vector unsigned int __b, __vector unsigned int __c) { return __builtin_s390_vstrczf(__a, __b, __c, 8); } /*-- vec_cmpnrg_or_0_idx_cc -------------------------------------------------*/ static inline __ATTRS_o_ai __vector unsigned char vec_cmpnrg_or_0_idx_cc(__vector unsigned char __a, __vector unsigned char __b, __vector unsigned char __c, int *__cc) { return __builtin_s390_vstrczbs(__a, __b, __c, 8, __cc); } static inline __ATTRS_o_ai __vector unsigned short vec_cmpnrg_or_0_idx_cc(__vector unsigned short __a, __vector unsigned short __b, __vector unsigned short __c, int *__cc) { return __builtin_s390_vstrczhs(__a, __b, __c, 8, __cc); } static inline __ATTRS_o_ai __vector unsigned int vec_cmpnrg_or_0_idx_cc(__vector unsigned int __a, __vector unsigned int __b, __vector unsigned int __c, int *__cc) { return __builtin_s390_vstrczfs(__a, __b, __c, 8, __cc); } /*-- vec_find_any_eq --------------------------------------------------------*/ static inline __ATTRS_o_ai __vector __bool char vec_find_any_eq(__vector signed char __a, __vector signed char __b) { return (__vector __bool char) __builtin_s390_vfaeb((__vector unsigned char)__a, (__vector unsigned char)__b, 4); } static inline __ATTRS_o_ai __vector __bool char vec_find_any_eq(__vector __bool char __a, __vector __bool char __b) { return (__vector __bool char) __builtin_s390_vfaeb((__vector unsigned char)__a, (__vector unsigned char)__b, 4); } static inline __ATTRS_o_ai __vector __bool char vec_find_any_eq(__vector unsigned char __a, __vector unsigned char __b) { return (__vector __bool char)__builtin_s390_vfaeb(__a, __b, 4); } static inline __ATTRS_o_ai __vector __bool short vec_find_any_eq(__vector signed short __a, __vector signed short __b) { return (__vector __bool short) __builtin_s390_vfaeh((__vector unsigned short)__a, (__vector unsigned short)__b, 4); } static inline __ATTRS_o_ai __vector __bool short vec_find_any_eq(__vector __bool short __a, __vector __bool short __b) { return (__vector __bool short) __builtin_s390_vfaeh((__vector unsigned short)__a, (__vector unsigned short)__b, 4); } static inline __ATTRS_o_ai __vector __bool short vec_find_any_eq(__vector unsigned short __a, __vector unsigned short __b) { return (__vector __bool short)__builtin_s390_vfaeh(__a, __b, 4); } static inline __ATTRS_o_ai __vector __bool int vec_find_any_eq(__vector signed int __a, __vector signed int __b) { return (__vector __bool int) __builtin_s390_vfaef((__vector unsigned int)__a, (__vector unsigned int)__b, 4); } static inline __ATTRS_o_ai __vector __bool int vec_find_any_eq(__vector __bool int __a, __vector __bool int __b) { return (__vector __bool int) __builtin_s390_vfaef((__vector unsigned int)__a, (__vector unsigned int)__b, 4); } static inline __ATTRS_o_ai __vector __bool int vec_find_any_eq(__vector unsigned int __a, __vector unsigned int __b) { return (__vector __bool int)__builtin_s390_vfaef(__a, __b, 4); } /*-- vec_find_any_eq_cc -----------------------------------------------------*/ static inline __ATTRS_o_ai __vector __bool char vec_find_any_eq_cc(__vector signed char __a, __vector signed char __b, int *__cc) { return (__vector __bool char) __builtin_s390_vfaebs((__vector unsigned char)__a, (__vector unsigned char)__b, 4, __cc); } static inline __ATTRS_o_ai __vector __bool char vec_find_any_eq_cc(__vector __bool char __a, __vector __bool char __b, int *__cc) { return (__vector __bool char) __builtin_s390_vfaebs((__vector unsigned char)__a, (__vector unsigned char)__b, 4, __cc); } static inline __ATTRS_o_ai __vector __bool char vec_find_any_eq_cc(__vector unsigned char __a, __vector unsigned char __b, int *__cc) { return (__vector __bool char)__builtin_s390_vfaebs(__a, __b, 4, __cc); } static inline __ATTRS_o_ai __vector __bool short vec_find_any_eq_cc(__vector signed short __a, __vector signed short __b, int *__cc) { return (__vector __bool short) __builtin_s390_vfaehs((__vector unsigned short)__a, (__vector unsigned short)__b, 4, __cc); } static inline __ATTRS_o_ai __vector __bool short vec_find_any_eq_cc(__vector __bool short __a, __vector __bool short __b, int *__cc) { return (__vector __bool short) __builtin_s390_vfaehs((__vector unsigned short)__a, (__vector unsigned short)__b, 4, __cc); } static inline __ATTRS_o_ai __vector __bool short vec_find_any_eq_cc(__vector unsigned short __a, __vector unsigned short __b, int *__cc) { return (__vector __bool short)__builtin_s390_vfaehs(__a, __b, 4, __cc); } static inline __ATTRS_o_ai __vector __bool int vec_find_any_eq_cc(__vector signed int __a, __vector signed int __b, int *__cc) { return (__vector __bool int) __builtin_s390_vfaefs((__vector unsigned int)__a, (__vector unsigned int)__b, 4, __cc); } static inline __ATTRS_o_ai __vector __bool int vec_find_any_eq_cc(__vector __bool int __a, __vector __bool int __b, int *__cc) { return (__vector __bool int) __builtin_s390_vfaefs((__vector unsigned int)__a, (__vector unsigned int)__b, 4, __cc); } static inline __ATTRS_o_ai __vector __bool int vec_find_any_eq_cc(__vector unsigned int __a, __vector unsigned int __b, int *__cc) { return (__vector __bool int)__builtin_s390_vfaefs(__a, __b, 4, __cc); } /*-- vec_find_any_eq_idx ----------------------------------------------------*/ static inline __ATTRS_o_ai __vector signed char vec_find_any_eq_idx(__vector signed char __a, __vector signed char __b) { return (__vector signed char) __builtin_s390_vfaeb((__vector unsigned char)__a, (__vector unsigned char)__b, 0); } static inline __ATTRS_o_ai __vector unsigned char vec_find_any_eq_idx(__vector __bool char __a, __vector __bool char __b) { return __builtin_s390_vfaeb((__vector unsigned char)__a, (__vector unsigned char)__b, 0); } static inline __ATTRS_o_ai __vector unsigned char vec_find_any_eq_idx(__vector unsigned char __a, __vector unsigned char __b) { return __builtin_s390_vfaeb(__a, __b, 0); } static inline __ATTRS_o_ai __vector signed short vec_find_any_eq_idx(__vector signed short __a, __vector signed short __b) { return (__vector signed short) __builtin_s390_vfaeh((__vector unsigned short)__a, (__vector unsigned short)__b, 0); } static inline __ATTRS_o_ai __vector unsigned short vec_find_any_eq_idx(__vector __bool short __a, __vector __bool short __b) { return __builtin_s390_vfaeh((__vector unsigned short)__a, (__vector unsigned short)__b, 0); } static inline __ATTRS_o_ai __vector unsigned short vec_find_any_eq_idx(__vector unsigned short __a, __vector unsigned short __b) { return __builtin_s390_vfaeh(__a, __b, 0); } static inline __ATTRS_o_ai __vector signed int vec_find_any_eq_idx(__vector signed int __a, __vector signed int __b) { return (__vector signed int) __builtin_s390_vfaef((__vector unsigned int)__a, (__vector unsigned int)__b, 0); } static inline __ATTRS_o_ai __vector unsigned int vec_find_any_eq_idx(__vector __bool int __a, __vector __bool int __b) { return __builtin_s390_vfaef((__vector unsigned int)__a, (__vector unsigned int)__b, 0); } static inline __ATTRS_o_ai __vector unsigned int vec_find_any_eq_idx(__vector unsigned int __a, __vector unsigned int __b) { return __builtin_s390_vfaef(__a, __b, 0); } /*-- vec_find_any_eq_idx_cc -------------------------------------------------*/ static inline __ATTRS_o_ai __vector signed char vec_find_any_eq_idx_cc(__vector signed char __a, __vector signed char __b, int *__cc) { return (__vector signed char) __builtin_s390_vfaebs((__vector unsigned char)__a, (__vector unsigned char)__b, 0, __cc); } static inline __ATTRS_o_ai __vector unsigned char vec_find_any_eq_idx_cc(__vector __bool char __a, __vector __bool char __b, int *__cc) { return __builtin_s390_vfaebs((__vector unsigned char)__a, (__vector unsigned char)__b, 0, __cc); } static inline __ATTRS_o_ai __vector unsigned char vec_find_any_eq_idx_cc(__vector unsigned char __a, __vector unsigned char __b, int *__cc) { return __builtin_s390_vfaebs(__a, __b, 0, __cc); } static inline __ATTRS_o_ai __vector signed short vec_find_any_eq_idx_cc(__vector signed short __a, __vector signed short __b, int *__cc) { return (__vector signed short) __builtin_s390_vfaehs((__vector unsigned short)__a, (__vector unsigned short)__b, 0, __cc); } static inline __ATTRS_o_ai __vector unsigned short vec_find_any_eq_idx_cc(__vector __bool short __a, __vector __bool short __b, int *__cc) { return __builtin_s390_vfaehs((__vector unsigned short)__a, (__vector unsigned short)__b, 0, __cc); } static inline __ATTRS_o_ai __vector unsigned short vec_find_any_eq_idx_cc(__vector unsigned short __a, __vector unsigned short __b, int *__cc) { return __builtin_s390_vfaehs(__a, __b, 0, __cc); } static inline __ATTRS_o_ai __vector signed int vec_find_any_eq_idx_cc(__vector signed int __a, __vector signed int __b, int *__cc) { return (__vector signed int) __builtin_s390_vfaefs((__vector unsigned int)__a, (__vector unsigned int)__b, 0, __cc); } static inline __ATTRS_o_ai __vector unsigned int vec_find_any_eq_idx_cc(__vector __bool int __a, __vector __bool int __b, int *__cc) { return __builtin_s390_vfaefs((__vector unsigned int)__a, (__vector unsigned int)__b, 0, __cc); } static inline __ATTRS_o_ai __vector unsigned int vec_find_any_eq_idx_cc(__vector unsigned int __a, __vector unsigned int __b, int *__cc) { return __builtin_s390_vfaefs(__a, __b, 0, __cc); } /*-- vec_find_any_eq_or_0_idx -----------------------------------------------*/ static inline __ATTRS_o_ai __vector signed char vec_find_any_eq_or_0_idx(__vector signed char __a, __vector signed char __b) { return (__vector signed char) __builtin_s390_vfaezb((__vector unsigned char)__a, (__vector unsigned char)__b, 0); } static inline __ATTRS_o_ai __vector unsigned char vec_find_any_eq_or_0_idx(__vector __bool char __a, __vector __bool char __b) { return __builtin_s390_vfaezb((__vector unsigned char)__a, (__vector unsigned char)__b, 0); } static inline __ATTRS_o_ai __vector unsigned char vec_find_any_eq_or_0_idx(__vector unsigned char __a, __vector unsigned char __b) { return __builtin_s390_vfaezb(__a, __b, 0); } static inline __ATTRS_o_ai __vector signed short vec_find_any_eq_or_0_idx(__vector signed short __a, __vector signed short __b) { return (__vector signed short) __builtin_s390_vfaezh((__vector unsigned short)__a, (__vector unsigned short)__b, 0); } static inline __ATTRS_o_ai __vector unsigned short vec_find_any_eq_or_0_idx(__vector __bool short __a, __vector __bool short __b) { return __builtin_s390_vfaezh((__vector unsigned short)__a, (__vector unsigned short)__b, 0); } static inline __ATTRS_o_ai __vector unsigned short vec_find_any_eq_or_0_idx(__vector unsigned short __a, __vector unsigned short __b) { return __builtin_s390_vfaezh(__a, __b, 0); } static inline __ATTRS_o_ai __vector signed int vec_find_any_eq_or_0_idx(__vector signed int __a, __vector signed int __b) { return (__vector signed int) __builtin_s390_vfaezf((__vector unsigned int)__a, (__vector unsigned int)__b, 0); } static inline __ATTRS_o_ai __vector unsigned int vec_find_any_eq_or_0_idx(__vector __bool int __a, __vector __bool int __b) { return __builtin_s390_vfaezf((__vector unsigned int)__a, (__vector unsigned int)__b, 0); } static inline __ATTRS_o_ai __vector unsigned int vec_find_any_eq_or_0_idx(__vector unsigned int __a, __vector unsigned int __b) { return __builtin_s390_vfaezf(__a, __b, 0); } /*-- vec_find_any_eq_or_0_idx_cc --------------------------------------------*/ static inline __ATTRS_o_ai __vector signed char vec_find_any_eq_or_0_idx_cc(__vector signed char __a, __vector signed char __b, int *__cc) { return (__vector signed char) __builtin_s390_vfaezbs((__vector unsigned char)__a, (__vector unsigned char)__b, 0, __cc); } static inline __ATTRS_o_ai __vector unsigned char vec_find_any_eq_or_0_idx_cc(__vector __bool char __a, __vector __bool char __b, int *__cc) { return __builtin_s390_vfaezbs((__vector unsigned char)__a, (__vector unsigned char)__b, 0, __cc); } static inline __ATTRS_o_ai __vector unsigned char vec_find_any_eq_or_0_idx_cc(__vector unsigned char __a, __vector unsigned char __b, int *__cc) { return __builtin_s390_vfaezbs(__a, __b, 0, __cc); } static inline __ATTRS_o_ai __vector signed short vec_find_any_eq_or_0_idx_cc(__vector signed short __a, __vector signed short __b, int *__cc) { return (__vector signed short) __builtin_s390_vfaezhs((__vector unsigned short)__a, (__vector unsigned short)__b, 0, __cc); } static inline __ATTRS_o_ai __vector unsigned short vec_find_any_eq_or_0_idx_cc(__vector __bool short __a, __vector __bool short __b, int *__cc) { return __builtin_s390_vfaezhs((__vector unsigned short)__a, (__vector unsigned short)__b, 0, __cc); } static inline __ATTRS_o_ai __vector unsigned short vec_find_any_eq_or_0_idx_cc(__vector unsigned short __a, __vector unsigned short __b, int *__cc) { return __builtin_s390_vfaezhs(__a, __b, 0, __cc); } static inline __ATTRS_o_ai __vector signed int vec_find_any_eq_or_0_idx_cc(__vector signed int __a, __vector signed int __b, int *__cc) { return (__vector signed int) __builtin_s390_vfaezfs((__vector unsigned int)__a, (__vector unsigned int)__b, 0, __cc); } static inline __ATTRS_o_ai __vector unsigned int vec_find_any_eq_or_0_idx_cc(__vector __bool int __a, __vector __bool int __b, int *__cc) { return __builtin_s390_vfaezfs((__vector unsigned int)__a, (__vector unsigned int)__b, 0, __cc); } static inline __ATTRS_o_ai __vector unsigned int vec_find_any_eq_or_0_idx_cc(__vector unsigned int __a, __vector unsigned int __b, int *__cc) { return __builtin_s390_vfaezfs(__a, __b, 0, __cc); } /*-- vec_find_any_ne --------------------------------------------------------*/ static inline __ATTRS_o_ai __vector __bool char vec_find_any_ne(__vector signed char __a, __vector signed char __b) { return (__vector __bool char) __builtin_s390_vfaeb((__vector unsigned char)__a, (__vector unsigned char)__b, 12); } static inline __ATTRS_o_ai __vector __bool char vec_find_any_ne(__vector __bool char __a, __vector __bool char __b) { return (__vector __bool char) __builtin_s390_vfaeb((__vector unsigned char)__a, (__vector unsigned char)__b, 12); } static inline __ATTRS_o_ai __vector __bool char vec_find_any_ne(__vector unsigned char __a, __vector unsigned char __b) { return (__vector __bool char)__builtin_s390_vfaeb(__a, __b, 12); } static inline __ATTRS_o_ai __vector __bool short vec_find_any_ne(__vector signed short __a, __vector signed short __b) { return (__vector __bool short) __builtin_s390_vfaeh((__vector unsigned short)__a, (__vector unsigned short)__b, 12); } static inline __ATTRS_o_ai __vector __bool short vec_find_any_ne(__vector __bool short __a, __vector __bool short __b) { return (__vector __bool short) __builtin_s390_vfaeh((__vector unsigned short)__a, (__vector unsigned short)__b, 12); } static inline __ATTRS_o_ai __vector __bool short vec_find_any_ne(__vector unsigned short __a, __vector unsigned short __b) { return (__vector __bool short)__builtin_s390_vfaeh(__a, __b, 12); } static inline __ATTRS_o_ai __vector __bool int vec_find_any_ne(__vector signed int __a, __vector signed int __b) { return (__vector __bool int) __builtin_s390_vfaef((__vector unsigned int)__a, (__vector unsigned int)__b, 12); } static inline __ATTRS_o_ai __vector __bool int vec_find_any_ne(__vector __bool int __a, __vector __bool int __b) { return (__vector __bool int) __builtin_s390_vfaef((__vector unsigned int)__a, (__vector unsigned int)__b, 12); } static inline __ATTRS_o_ai __vector __bool int vec_find_any_ne(__vector unsigned int __a, __vector unsigned int __b) { return (__vector __bool int)__builtin_s390_vfaef(__a, __b, 12); } /*-- vec_find_any_ne_cc -----------------------------------------------------*/ static inline __ATTRS_o_ai __vector __bool char vec_find_any_ne_cc(__vector signed char __a, __vector signed char __b, int *__cc) { return (__vector __bool char) __builtin_s390_vfaebs((__vector unsigned char)__a, (__vector unsigned char)__b, 12, __cc); } static inline __ATTRS_o_ai __vector __bool char vec_find_any_ne_cc(__vector __bool char __a, __vector __bool char __b, int *__cc) { return (__vector __bool char) __builtin_s390_vfaebs((__vector unsigned char)__a, (__vector unsigned char)__b, 12, __cc); } static inline __ATTRS_o_ai __vector __bool char vec_find_any_ne_cc(__vector unsigned char __a, __vector unsigned char __b, int *__cc) { return (__vector __bool char)__builtin_s390_vfaebs(__a, __b, 12, __cc); } static inline __ATTRS_o_ai __vector __bool short vec_find_any_ne_cc(__vector signed short __a, __vector signed short __b, int *__cc) { return (__vector __bool short) __builtin_s390_vfaehs((__vector unsigned short)__a, (__vector unsigned short)__b, 12, __cc); } static inline __ATTRS_o_ai __vector __bool short vec_find_any_ne_cc(__vector __bool short __a, __vector __bool short __b, int *__cc) { return (__vector __bool short) __builtin_s390_vfaehs((__vector unsigned short)__a, (__vector unsigned short)__b, 12, __cc); } static inline __ATTRS_o_ai __vector __bool short vec_find_any_ne_cc(__vector unsigned short __a, __vector unsigned short __b, int *__cc) { return (__vector __bool short)__builtin_s390_vfaehs(__a, __b, 12, __cc); } static inline __ATTRS_o_ai __vector __bool int vec_find_any_ne_cc(__vector signed int __a, __vector signed int __b, int *__cc) { return (__vector __bool int) __builtin_s390_vfaefs((__vector unsigned int)__a, (__vector unsigned int)__b, 12, __cc); } static inline __ATTRS_o_ai __vector __bool int vec_find_any_ne_cc(__vector __bool int __a, __vector __bool int __b, int *__cc) { return (__vector __bool int) __builtin_s390_vfaefs((__vector unsigned int)__a, (__vector unsigned int)__b, 12, __cc); } static inline __ATTRS_o_ai __vector __bool int vec_find_any_ne_cc(__vector unsigned int __a, __vector unsigned int __b, int *__cc) { return (__vector __bool int)__builtin_s390_vfaefs(__a, __b, 12, __cc); } /*-- vec_find_any_ne_idx ----------------------------------------------------*/ static inline __ATTRS_o_ai __vector signed char vec_find_any_ne_idx(__vector signed char __a, __vector signed char __b) { return (__vector signed char) __builtin_s390_vfaeb((__vector unsigned char)__a, (__vector unsigned char)__b, 8); } static inline __ATTRS_o_ai __vector unsigned char vec_find_any_ne_idx(__vector __bool char __a, __vector __bool char __b) { return __builtin_s390_vfaeb((__vector unsigned char)__a, (__vector unsigned char)__b, 8); } static inline __ATTRS_o_ai __vector unsigned char vec_find_any_ne_idx(__vector unsigned char __a, __vector unsigned char __b) { return __builtin_s390_vfaeb(__a, __b, 8); } static inline __ATTRS_o_ai __vector signed short vec_find_any_ne_idx(__vector signed short __a, __vector signed short __b) { return (__vector signed short) __builtin_s390_vfaeh((__vector unsigned short)__a, (__vector unsigned short)__b, 8); } static inline __ATTRS_o_ai __vector unsigned short vec_find_any_ne_idx(__vector __bool short __a, __vector __bool short __b) { return __builtin_s390_vfaeh((__vector unsigned short)__a, (__vector unsigned short)__b, 8); } static inline __ATTRS_o_ai __vector unsigned short vec_find_any_ne_idx(__vector unsigned short __a, __vector unsigned short __b) { return __builtin_s390_vfaeh(__a, __b, 8); } static inline __ATTRS_o_ai __vector signed int vec_find_any_ne_idx(__vector signed int __a, __vector signed int __b) { return (__vector signed int) __builtin_s390_vfaef((__vector unsigned int)__a, (__vector unsigned int)__b, 8); } static inline __ATTRS_o_ai __vector unsigned int vec_find_any_ne_idx(__vector __bool int __a, __vector __bool int __b) { return __builtin_s390_vfaef((__vector unsigned int)__a, (__vector unsigned int)__b, 8); } static inline __ATTRS_o_ai __vector unsigned int vec_find_any_ne_idx(__vector unsigned int __a, __vector unsigned int __b) { return __builtin_s390_vfaef(__a, __b, 8); } /*-- vec_find_any_ne_idx_cc -------------------------------------------------*/ static inline __ATTRS_o_ai __vector signed char vec_find_any_ne_idx_cc(__vector signed char __a, __vector signed char __b, int *__cc) { return (__vector signed char) __builtin_s390_vfaebs((__vector unsigned char)__a, (__vector unsigned char)__b, 8, __cc); } static inline __ATTRS_o_ai __vector unsigned char vec_find_any_ne_idx_cc(__vector __bool char __a, __vector __bool char __b, int *__cc) { return __builtin_s390_vfaebs((__vector unsigned char)__a, (__vector unsigned char)__b, 8, __cc); } static inline __ATTRS_o_ai __vector unsigned char vec_find_any_ne_idx_cc(__vector unsigned char __a, __vector unsigned char __b, int *__cc) { return __builtin_s390_vfaebs(__a, __b, 8, __cc); } static inline __ATTRS_o_ai __vector signed short vec_find_any_ne_idx_cc(__vector signed short __a, __vector signed short __b, int *__cc) { return (__vector signed short) __builtin_s390_vfaehs((__vector unsigned short)__a, (__vector unsigned short)__b, 8, __cc); } static inline __ATTRS_o_ai __vector unsigned short vec_find_any_ne_idx_cc(__vector __bool short __a, __vector __bool short __b, int *__cc) { return __builtin_s390_vfaehs((__vector unsigned short)__a, (__vector unsigned short)__b, 8, __cc); } static inline __ATTRS_o_ai __vector unsigned short vec_find_any_ne_idx_cc(__vector unsigned short __a, __vector unsigned short __b, int *__cc) { return __builtin_s390_vfaehs(__a, __b, 8, __cc); } static inline __ATTRS_o_ai __vector signed int vec_find_any_ne_idx_cc(__vector signed int __a, __vector signed int __b, int *__cc) { return (__vector signed int) __builtin_s390_vfaefs((__vector unsigned int)__a, (__vector unsigned int)__b, 8, __cc); } static inline __ATTRS_o_ai __vector unsigned int vec_find_any_ne_idx_cc(__vector __bool int __a, __vector __bool int __b, int *__cc) { return __builtin_s390_vfaefs((__vector unsigned int)__a, (__vector unsigned int)__b, 8, __cc); } static inline __ATTRS_o_ai __vector unsigned int vec_find_any_ne_idx_cc(__vector unsigned int __a, __vector unsigned int __b, int *__cc) { return __builtin_s390_vfaefs(__a, __b, 8, __cc); } /*-- vec_find_any_ne_or_0_idx -----------------------------------------------*/ static inline __ATTRS_o_ai __vector signed char vec_find_any_ne_or_0_idx(__vector signed char __a, __vector signed char __b) { return (__vector signed char) __builtin_s390_vfaezb((__vector unsigned char)__a, (__vector unsigned char)__b, 8); } static inline __ATTRS_o_ai __vector unsigned char vec_find_any_ne_or_0_idx(__vector __bool char __a, __vector __bool char __b) { return __builtin_s390_vfaezb((__vector unsigned char)__a, (__vector unsigned char)__b, 8); } static inline __ATTRS_o_ai __vector unsigned char vec_find_any_ne_or_0_idx(__vector unsigned char __a, __vector unsigned char __b) { return __builtin_s390_vfaezb(__a, __b, 8); } static inline __ATTRS_o_ai __vector signed short vec_find_any_ne_or_0_idx(__vector signed short __a, __vector signed short __b) { return (__vector signed short) __builtin_s390_vfaezh((__vector unsigned short)__a, (__vector unsigned short)__b, 8); } static inline __ATTRS_o_ai __vector unsigned short vec_find_any_ne_or_0_idx(__vector __bool short __a, __vector __bool short __b) { return __builtin_s390_vfaezh((__vector unsigned short)__a, (__vector unsigned short)__b, 8); } static inline __ATTRS_o_ai __vector unsigned short vec_find_any_ne_or_0_idx(__vector unsigned short __a, __vector unsigned short __b) { return __builtin_s390_vfaezh(__a, __b, 8); } static inline __ATTRS_o_ai __vector signed int vec_find_any_ne_or_0_idx(__vector signed int __a, __vector signed int __b) { return (__vector signed int) __builtin_s390_vfaezf((__vector unsigned int)__a, (__vector unsigned int)__b, 8); } static inline __ATTRS_o_ai __vector unsigned int vec_find_any_ne_or_0_idx(__vector __bool int __a, __vector __bool int __b) { return __builtin_s390_vfaezf((__vector unsigned int)__a, (__vector unsigned int)__b, 8); } static inline __ATTRS_o_ai __vector unsigned int vec_find_any_ne_or_0_idx(__vector unsigned int __a, __vector unsigned int __b) { return __builtin_s390_vfaezf(__a, __b, 8); } /*-- vec_find_any_ne_or_0_idx_cc --------------------------------------------*/ static inline __ATTRS_o_ai __vector signed char vec_find_any_ne_or_0_idx_cc(__vector signed char __a, __vector signed char __b, int *__cc) { return (__vector signed char) __builtin_s390_vfaezbs((__vector unsigned char)__a, (__vector unsigned char)__b, 8, __cc); } static inline __ATTRS_o_ai __vector unsigned char vec_find_any_ne_or_0_idx_cc(__vector __bool char __a, __vector __bool char __b, int *__cc) { return __builtin_s390_vfaezbs((__vector unsigned char)__a, (__vector unsigned char)__b, 8, __cc); } static inline __ATTRS_o_ai __vector unsigned char vec_find_any_ne_or_0_idx_cc(__vector unsigned char __a, __vector unsigned char __b, int *__cc) { return __builtin_s390_vfaezbs(__a, __b, 8, __cc); } static inline __ATTRS_o_ai __vector signed short vec_find_any_ne_or_0_idx_cc(__vector signed short __a, __vector signed short __b, int *__cc) { return (__vector signed short) __builtin_s390_vfaezhs((__vector unsigned short)__a, (__vector unsigned short)__b, 8, __cc); } static inline __ATTRS_o_ai __vector unsigned short vec_find_any_ne_or_0_idx_cc(__vector __bool short __a, __vector __bool short __b, int *__cc) { return __builtin_s390_vfaezhs((__vector unsigned short)__a, (__vector unsigned short)__b, 8, __cc); } static inline __ATTRS_o_ai __vector unsigned short vec_find_any_ne_or_0_idx_cc(__vector unsigned short __a, __vector unsigned short __b, int *__cc) { return __builtin_s390_vfaezhs(__a, __b, 8, __cc); } static inline __ATTRS_o_ai __vector signed int vec_find_any_ne_or_0_idx_cc(__vector signed int __a, __vector signed int __b, int *__cc) { return (__vector signed int) __builtin_s390_vfaezfs((__vector unsigned int)__a, (__vector unsigned int)__b, 8, __cc); } static inline __ATTRS_o_ai __vector unsigned int vec_find_any_ne_or_0_idx_cc(__vector __bool int __a, __vector __bool int __b, int *__cc) { return __builtin_s390_vfaezfs((__vector unsigned int)__a, (__vector unsigned int)__b, 8, __cc); } static inline __ATTRS_o_ai __vector unsigned int vec_find_any_ne_or_0_idx_cc(__vector unsigned int __a, __vector unsigned int __b, int *__cc) { return __builtin_s390_vfaezfs(__a, __b, 8, __cc); } /*-- vec_search_string_cc ---------------------------------------------------*/ #if __ARCH__ >= 13 static inline __ATTRS_o_ai __vector unsigned char vec_search_string_cc(__vector signed char __a, __vector signed char __b, __vector unsigned char __c, int *__cc) { return __builtin_s390_vstrsb((__vector unsigned char)__a, (__vector unsigned char)__b, __c, __cc); } static inline __ATTRS_o_ai __vector unsigned char vec_search_string_cc(__vector __bool char __a, __vector __bool char __b, __vector unsigned char __c, int *__cc) { return __builtin_s390_vstrsb((__vector unsigned char)__a, (__vector unsigned char)__b, __c, __cc); } static inline __ATTRS_o_ai __vector unsigned char vec_search_string_cc(__vector unsigned char __a, __vector unsigned char __b, __vector unsigned char __c, int *__cc) { return __builtin_s390_vstrsb(__a, __b, __c, __cc); } static inline __ATTRS_o_ai __vector unsigned char vec_search_string_cc(__vector signed short __a, __vector signed short __b, __vector unsigned char __c, int *__cc) { return __builtin_s390_vstrsh((__vector unsigned short)__a, (__vector unsigned short)__b, __c, __cc); } static inline __ATTRS_o_ai __vector unsigned char vec_search_string_cc(__vector __bool short __a, __vector __bool short __b, __vector unsigned char __c, int *__cc) { return __builtin_s390_vstrsh((__vector unsigned short)__a, (__vector unsigned short)__b, __c, __cc); } static inline __ATTRS_o_ai __vector unsigned char vec_search_string_cc(__vector unsigned short __a, __vector unsigned short __b, __vector unsigned char __c, int *__cc) { return __builtin_s390_vstrsh(__a, __b, __c, __cc); } static inline __ATTRS_o_ai __vector unsigned char vec_search_string_cc(__vector signed int __a, __vector signed int __b, __vector unsigned char __c, int *__cc) { return __builtin_s390_vstrsf((__vector unsigned int)__a, (__vector unsigned int)__b, __c, __cc); } static inline __ATTRS_o_ai __vector unsigned char vec_search_string_cc(__vector __bool int __a, __vector __bool int __b, __vector unsigned char __c, int *__cc) { return __builtin_s390_vstrsf((__vector unsigned int)__a, (__vector unsigned int)__b, __c, __cc); } static inline __ATTRS_o_ai __vector unsigned char vec_search_string_cc(__vector unsigned int __a, __vector unsigned int __b, __vector unsigned char __c, int *__cc) { return __builtin_s390_vstrsf(__a, __b, __c, __cc); } #endif /*-- vec_search_string_until_zero_cc ----------------------------------------*/ #if __ARCH__ >= 13 static inline __ATTRS_o_ai __vector unsigned char vec_search_string_until_zero_cc(__vector signed char __a, __vector signed char __b, __vector unsigned char __c, int *__cc) { return __builtin_s390_vstrszb((__vector unsigned char)__a, (__vector unsigned char)__b, __c, __cc); } static inline __ATTRS_o_ai __vector unsigned char vec_search_string_until_zero_cc(__vector __bool char __a, __vector __bool char __b, __vector unsigned char __c, int *__cc) { return __builtin_s390_vstrszb((__vector unsigned char)__a, (__vector unsigned char)__b, __c, __cc); } static inline __ATTRS_o_ai __vector unsigned char vec_search_string_until_zero_cc(__vector unsigned char __a, __vector unsigned char __b, __vector unsigned char __c, int *__cc) { return __builtin_s390_vstrszb(__a, __b, __c, __cc); } static inline __ATTRS_o_ai __vector unsigned char vec_search_string_until_zero_cc(__vector signed short __a, __vector signed short __b, __vector unsigned char __c, int *__cc) { return __builtin_s390_vstrszh((__vector unsigned short)__a, (__vector unsigned short)__b, __c, __cc); } static inline __ATTRS_o_ai __vector unsigned char vec_search_string_until_zero_cc(__vector __bool short __a, __vector __bool short __b, __vector unsigned char __c, int *__cc) { return __builtin_s390_vstrszh((__vector unsigned short)__a, (__vector unsigned short)__b, __c, __cc); } static inline __ATTRS_o_ai __vector unsigned char vec_search_string_until_zero_cc(__vector unsigned short __a, __vector unsigned short __b, __vector unsigned char __c, int *__cc) { return __builtin_s390_vstrszh(__a, __b, __c, __cc); } static inline __ATTRS_o_ai __vector unsigned char vec_search_string_until_zero_cc(__vector signed int __a, __vector signed int __b, __vector unsigned char __c, int *__cc) { return __builtin_s390_vstrszf((__vector unsigned int)__a, (__vector unsigned int)__b, __c, __cc); } static inline __ATTRS_o_ai __vector unsigned char vec_search_string_until_zero_cc(__vector __bool int __a, __vector __bool int __b, __vector unsigned char __c, int *__cc) { return __builtin_s390_vstrszf((__vector unsigned int)__a, (__vector unsigned int)__b, __c, __cc); } static inline __ATTRS_o_ai __vector unsigned char vec_search_string_until_zero_cc(__vector unsigned int __a, __vector unsigned int __b, __vector unsigned char __c, int *__cc) { return __builtin_s390_vstrszf(__a, __b, __c, __cc); } #endif #undef __constant_pow2_range #undef __constant_range #undef __constant #undef __ATTRS_o #undef __ATTRS_o_ai #undef __ATTRS_ai #else #error "Use -fzvector to enable vector extensions" #endif //===-- netbsd_syscall_hooks.h --------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file is a part of public sanitizer interface. // // System call handlers. // // Interface methods declared in this header implement pre- and post- syscall // actions for the active sanitizer. // Usage: // __sanitizer_syscall_pre_getfoo(...args...); // long long res = syscall(SYS_getfoo, ...args...); // __sanitizer_syscall_post_getfoo(res, ...args...); // // DO NOT EDIT! THIS FILE HAS BEEN GENERATED! // // Generated with: generate_netbsd_syscalls.awk // Generated date: 2020-09-10 // Generated from: syscalls.master,v 1.306 2020/08/14 00:53:16 riastradh Exp // //===----------------------------------------------------------------------===// #ifndef SANITIZER_NETBSD_SYSCALL_HOOKS_H #define SANITIZER_NETBSD_SYSCALL_HOOKS_H #define __sanitizer_syscall_pre_syscall(code, arg0, arg1, arg2, arg3, arg4, \ arg5, arg6, arg7) \ __sanitizer_syscall_pre_impl_syscall( \ (long long)(code), (long long)(arg0), (long long)(arg1), \ (long long)(arg2), (long long)(arg3), (long long)(arg4), \ (long long)(arg5), (long long)(arg6), (long long)(arg7)) #define __sanitizer_syscall_post_syscall(res, code, arg0, arg1, arg2, arg3, \ arg4, arg5, arg6, arg7) \ __sanitizer_syscall_post_impl_syscall( \ res, (long long)(code), (long long)(arg0), (long long)(arg1), \ (long long)(arg2), (long long)(arg3), (long long)(arg4), \ (long long)(arg5), (long long)(arg6), (long long)(arg7)) #define __sanitizer_syscall_pre_exit(rval) \ __sanitizer_syscall_pre_impl_exit((long long)(rval)) #define __sanitizer_syscall_post_exit(res, rval) \ __sanitizer_syscall_post_impl_exit(res, (long long)(rval)) #define __sanitizer_syscall_pre_fork() __sanitizer_syscall_pre_impl_fork() #define __sanitizer_syscall_post_fork(res) \ __sanitizer_syscall_post_impl_fork(res) #define __sanitizer_syscall_pre_read(fd, buf, nbyte) \ __sanitizer_syscall_pre_impl_read((long long)(fd), (long long)(buf), \ (long long)(nbyte)) #define __sanitizer_syscall_post_read(res, fd, buf, nbyte) \ __sanitizer_syscall_post_impl_read(res, (long long)(fd), (long long)(buf), \ (long long)(nbyte)) #define __sanitizer_syscall_pre_write(fd, buf, nbyte) \ __sanitizer_syscall_pre_impl_write((long long)(fd), (long long)(buf), \ (long long)(nbyte)) #define __sanitizer_syscall_post_write(res, fd, buf, nbyte) \ __sanitizer_syscall_post_impl_write(res, (long long)(fd), (long long)(buf), \ (long long)(nbyte)) #define __sanitizer_syscall_pre_open(path, flags, mode) \ __sanitizer_syscall_pre_impl_open((long long)(path), (long long)(flags), \ (long long)(mode)) #define __sanitizer_syscall_post_open(res, path, flags, mode) \ __sanitizer_syscall_post_impl_open(res, (long long)(path), \ (long long)(flags), (long long)(mode)) #define __sanitizer_syscall_pre_close(fd) \ __sanitizer_syscall_pre_impl_close((long long)(fd)) #define __sanitizer_syscall_post_close(res, fd) \ __sanitizer_syscall_post_impl_close(res, (long long)(fd)) #define __sanitizer_syscall_pre_compat_50_wait4(pid, status, options, rusage) \ __sanitizer_syscall_pre_impl_compat_50_wait4( \ (long long)(pid), (long long)(status), (long long)(options), \ (long long)(rusage)) #define __sanitizer_syscall_post_compat_50_wait4(res, pid, status, options, \ rusage) \ __sanitizer_syscall_post_impl_compat_50_wait4( \ res, (long long)(pid), (long long)(status), (long long)(options), \ (long long)(rusage)) #define __sanitizer_syscall_pre_compat_43_ocreat(path, mode) \ __sanitizer_syscall_pre_impl_compat_43_ocreat((long long)(path), \ (long long)(mode)) #define __sanitizer_syscall_post_compat_43_ocreat(res, path, mode) \ __sanitizer_syscall_post_impl_compat_43_ocreat(res, (long long)(path), \ (long long)(mode)) #define __sanitizer_syscall_pre_link(path, link) \ __sanitizer_syscall_pre_impl_link((long long)(path), (long long)(link)) #define __sanitizer_syscall_post_link(res, path, link) \ __sanitizer_syscall_post_impl_link(res, (long long)(path), (long long)(link)) #define __sanitizer_syscall_pre_unlink(path) \ __sanitizer_syscall_pre_impl_unlink((long long)(path)) #define __sanitizer_syscall_post_unlink(res, path) \ __sanitizer_syscall_post_impl_unlink(res, (long long)(path)) /* syscall 11 has been skipped */ #define __sanitizer_syscall_pre_chdir(path) \ __sanitizer_syscall_pre_impl_chdir((long long)(path)) #define __sanitizer_syscall_post_chdir(res, path) \ __sanitizer_syscall_post_impl_chdir(res, (long long)(path)) #define __sanitizer_syscall_pre_fchdir(fd) \ __sanitizer_syscall_pre_impl_fchdir((long long)(fd)) #define __sanitizer_syscall_post_fchdir(res, fd) \ __sanitizer_syscall_post_impl_fchdir(res, (long long)(fd)) #define __sanitizer_syscall_pre_compat_50_mknod(path, mode, dev) \ __sanitizer_syscall_pre_impl_compat_50_mknod( \ (long long)(path), (long long)(mode), (long long)(dev)) #define __sanitizer_syscall_post_compat_50_mknod(res, path, mode, dev) \ __sanitizer_syscall_post_impl_compat_50_mknod( \ res, (long long)(path), (long long)(mode), (long long)(dev)) #define __sanitizer_syscall_pre_chmod(path, mode) \ __sanitizer_syscall_pre_impl_chmod((long long)(path), (long long)(mode)) #define __sanitizer_syscall_post_chmod(res, path, mode) \ __sanitizer_syscall_post_impl_chmod(res, (long long)(path), (long long)(mode)) #define __sanitizer_syscall_pre_chown(path, uid, gid) \ __sanitizer_syscall_pre_impl_chown((long long)(path), (long long)(uid), \ (long long)(gid)) #define __sanitizer_syscall_post_chown(res, path, uid, gid) \ __sanitizer_syscall_post_impl_chown(res, (long long)(path), \ (long long)(uid), (long long)(gid)) #define __sanitizer_syscall_pre_break(nsize) \ __sanitizer_syscall_pre_impl_break((long long)(nsize)) #define __sanitizer_syscall_post_break(res, nsize) \ __sanitizer_syscall_post_impl_break(res, (long long)(nsize)) #define __sanitizer_syscall_pre_compat_20_getfsstat(buf, bufsize, flags) \ __sanitizer_syscall_pre_impl_compat_20_getfsstat( \ (long long)(buf), (long long)(bufsize), (long long)(flags)) #define __sanitizer_syscall_post_compat_20_getfsstat(res, buf, bufsize, flags) \ __sanitizer_syscall_post_impl_compat_20_getfsstat( \ res, (long long)(buf), (long long)(bufsize), (long long)(flags)) #define __sanitizer_syscall_pre_compat_43_olseek(fd, offset, whence) \ __sanitizer_syscall_pre_impl_compat_43_olseek( \ (long long)(fd), (long long)(offset), (long long)(whence)) #define __sanitizer_syscall_post_compat_43_olseek(res, fd, offset, whence) \ __sanitizer_syscall_post_impl_compat_43_olseek( \ res, (long long)(fd), (long long)(offset), (long long)(whence)) #define __sanitizer_syscall_pre_getpid() __sanitizer_syscall_pre_impl_getpid() #define __sanitizer_syscall_post_getpid(res) \ __sanitizer_syscall_post_impl_getpid(res) #define __sanitizer_syscall_pre_compat_40_mount(type, path, flags, data) \ __sanitizer_syscall_pre_impl_compat_40_mount( \ (long long)(type), (long long)(path), (long long)(flags), \ (long long)(data)) #define __sanitizer_syscall_post_compat_40_mount(res, type, path, flags, data) \ __sanitizer_syscall_post_impl_compat_40_mount( \ res, (long long)(type), (long long)(path), (long long)(flags), \ (long long)(data)) #define __sanitizer_syscall_pre_unmount(path, flags) \ __sanitizer_syscall_pre_impl_unmount((long long)(path), (long long)(flags)) #define __sanitizer_syscall_post_unmount(res, path, flags) \ __sanitizer_syscall_post_impl_unmount(res, (long long)(path), \ (long long)(flags)) #define __sanitizer_syscall_pre_setuid(uid) \ __sanitizer_syscall_pre_impl_setuid((long long)(uid)) #define __sanitizer_syscall_post_setuid(res, uid) \ __sanitizer_syscall_post_impl_setuid(res, (long long)(uid)) #define __sanitizer_syscall_pre_getuid() __sanitizer_syscall_pre_impl_getuid() #define __sanitizer_syscall_post_getuid(res) \ __sanitizer_syscall_post_impl_getuid(res) #define __sanitizer_syscall_pre_geteuid() __sanitizer_syscall_pre_impl_geteuid() #define __sanitizer_syscall_post_geteuid(res) \ __sanitizer_syscall_post_impl_geteuid(res) #define __sanitizer_syscall_pre_ptrace(req, pid, addr, data) \ __sanitizer_syscall_pre_impl_ptrace((long long)(req), (long long)(pid), \ (long long)(addr), (long long)(data)) #define __sanitizer_syscall_post_ptrace(res, req, pid, addr, data) \ __sanitizer_syscall_post_impl_ptrace(res, (long long)(req), \ (long long)(pid), (long long)(addr), \ (long long)(data)) #define __sanitizer_syscall_pre_recvmsg(s, msg, flags) \ __sanitizer_syscall_pre_impl_recvmsg((long long)(s), (long long)(msg), \ (long long)(flags)) #define __sanitizer_syscall_post_recvmsg(res, s, msg, flags) \ __sanitizer_syscall_post_impl_recvmsg(res, (long long)(s), (long long)(msg), \ (long long)(flags)) #define __sanitizer_syscall_pre_sendmsg(s, msg, flags) \ __sanitizer_syscall_pre_impl_sendmsg((long long)(s), (long long)(msg), \ (long long)(flags)) #define __sanitizer_syscall_post_sendmsg(res, s, msg, flags) \ __sanitizer_syscall_post_impl_sendmsg(res, (long long)(s), (long long)(msg), \ (long long)(flags)) #define __sanitizer_syscall_pre_recvfrom(s, buf, len, flags, from, \ fromlenaddr) \ __sanitizer_syscall_pre_impl_recvfrom( \ (long long)(s), (long long)(buf), (long long)(len), (long long)(flags), \ (long long)(from), (long long)(fromlenaddr)) #define __sanitizer_syscall_post_recvfrom(res, s, buf, len, flags, from, \ fromlenaddr) \ __sanitizer_syscall_post_impl_recvfrom( \ res, (long long)(s), (long long)(buf), (long long)(len), \ (long long)(flags), (long long)(from), (long long)(fromlenaddr)) #define __sanitizer_syscall_pre_accept(s, name, anamelen) \ __sanitizer_syscall_pre_impl_accept((long long)(s), (long long)(name), \ (long long)(anamelen)) #define __sanitizer_syscall_post_accept(res, s, name, anamelen) \ __sanitizer_syscall_post_impl_accept(res, (long long)(s), (long long)(name), \ (long long)(anamelen)) #define __sanitizer_syscall_pre_getpeername(fdes, asa, alen) \ __sanitizer_syscall_pre_impl_getpeername( \ (long long)(fdes), (long long)(asa), (long long)(alen)) #define __sanitizer_syscall_post_getpeername(res, fdes, asa, alen) \ __sanitizer_syscall_post_impl_getpeername( \ res, (long long)(fdes), (long long)(asa), (long long)(alen)) #define __sanitizer_syscall_pre_getsockname(fdes, asa, alen) \ __sanitizer_syscall_pre_impl_getsockname( \ (long long)(fdes), (long long)(asa), (long long)(alen)) #define __sanitizer_syscall_post_getsockname(res, fdes, asa, alen) \ __sanitizer_syscall_post_impl_getsockname( \ res, (long long)(fdes), (long long)(asa), (long long)(alen)) #define __sanitizer_syscall_pre_access(path, flags) \ __sanitizer_syscall_pre_impl_access((long long)(path), (long long)(flags)) #define __sanitizer_syscall_post_access(res, path, flags) \ __sanitizer_syscall_post_impl_access(res, (long long)(path), \ (long long)(flags)) #define __sanitizer_syscall_pre_chflags(path, flags) \ __sanitizer_syscall_pre_impl_chflags((long long)(path), (long long)(flags)) #define __sanitizer_syscall_post_chflags(res, path, flags) \ __sanitizer_syscall_post_impl_chflags(res, (long long)(path), \ (long long)(flags)) #define __sanitizer_syscall_pre_fchflags(fd, flags) \ __sanitizer_syscall_pre_impl_fchflags((long long)(fd), (long long)(flags)) #define __sanitizer_syscall_post_fchflags(res, fd, flags) \ __sanitizer_syscall_post_impl_fchflags(res, (long long)(fd), \ (long long)(flags)) #define __sanitizer_syscall_pre_sync() __sanitizer_syscall_pre_impl_sync() #define __sanitizer_syscall_post_sync(res) \ __sanitizer_syscall_post_impl_sync(res) #define __sanitizer_syscall_pre_kill(pid, signum) \ __sanitizer_syscall_pre_impl_kill((long long)(pid), (long long)(signum)) #define __sanitizer_syscall_post_kill(res, pid, signum) \ __sanitizer_syscall_post_impl_kill(res, (long long)(pid), (long long)(signum)) #define __sanitizer_syscall_pre_compat_43_stat43(path, ub) \ __sanitizer_syscall_pre_impl_compat_43_stat43((long long)(path), \ (long long)(ub)) #define __sanitizer_syscall_post_compat_43_stat43(res, path, ub) \ __sanitizer_syscall_post_impl_compat_43_stat43(res, (long long)(path), \ (long long)(ub)) #define __sanitizer_syscall_pre_getppid() __sanitizer_syscall_pre_impl_getppid() #define __sanitizer_syscall_post_getppid(res) \ __sanitizer_syscall_post_impl_getppid(res) #define __sanitizer_syscall_pre_compat_43_lstat43(path, ub) \ __sanitizer_syscall_pre_impl_compat_43_lstat43((long long)(path), \ (long long)(ub)) #define __sanitizer_syscall_post_compat_43_lstat43(res, path, ub) \ __sanitizer_syscall_post_impl_compat_43_lstat43(res, (long long)(path), \ (long long)(ub)) #define __sanitizer_syscall_pre_dup(fd) \ __sanitizer_syscall_pre_impl_dup((long long)(fd)) #define __sanitizer_syscall_post_dup(res, fd) \ __sanitizer_syscall_post_impl_dup(res, (long long)(fd)) #define __sanitizer_syscall_pre_pipe() __sanitizer_syscall_pre_impl_pipe() #define __sanitizer_syscall_post_pipe(res) \ __sanitizer_syscall_post_impl_pipe(res) #define __sanitizer_syscall_pre_getegid() __sanitizer_syscall_pre_impl_getegid() #define __sanitizer_syscall_post_getegid(res) \ __sanitizer_syscall_post_impl_getegid(res) #define __sanitizer_syscall_pre_profil(samples, size, offset, scale) \ __sanitizer_syscall_pre_impl_profil((long long)(samples), (long long)(size), \ (long long)(offset), (long long)(scale)) #define __sanitizer_syscall_post_profil(res, samples, size, offset, scale) \ __sanitizer_syscall_post_impl_profil(res, (long long)(samples), \ (long long)(size), (long long)(offset), \ (long long)(scale)) #define __sanitizer_syscall_pre_ktrace(fname, ops, facs, pid) \ __sanitizer_syscall_pre_impl_ktrace((long long)(fname), (long long)(ops), \ (long long)(facs), (long long)(pid)) #define __sanitizer_syscall_post_ktrace(res, fname, ops, facs, pid) \ __sanitizer_syscall_post_impl_ktrace(res, (long long)(fname), \ (long long)(ops), (long long)(facs), \ (long long)(pid)) #define __sanitizer_syscall_pre_compat_13_sigaction13(signum, nsa, osa) \ __sanitizer_syscall_pre_impl_compat_13_sigaction13( \ (long long)(signum), (long long)(nsa), (long long)(osa)) #define __sanitizer_syscall_post_compat_13_sigaction13(res, signum, nsa, osa) \ __sanitizer_syscall_post_impl_compat_13_sigaction13( \ res, (long long)(signum), (long long)(nsa), (long long)(osa)) #define __sanitizer_syscall_pre_getgid() __sanitizer_syscall_pre_impl_getgid() #define __sanitizer_syscall_post_getgid(res) \ __sanitizer_syscall_post_impl_getgid(res) #define __sanitizer_syscall_pre_compat_13_sigprocmask13(how, mask) \ __sanitizer_syscall_pre_impl_compat_13_sigprocmask13((long long)(how), \ (long long)(mask)) #define __sanitizer_syscall_post_compat_13_sigprocmask13(res, how, mask) \ __sanitizer_syscall_post_impl_compat_13_sigprocmask13(res, (long long)(how), \ (long long)(mask)) #define __sanitizer_syscall_pre___getlogin(namebuf, namelen) \ __sanitizer_syscall_pre_impl___getlogin((long long)(namebuf), \ (long long)(namelen)) #define __sanitizer_syscall_post___getlogin(res, namebuf, namelen) \ __sanitizer_syscall_post_impl___getlogin(res, (long long)(namebuf), \ (long long)(namelen)) #define __sanitizer_syscall_pre___setlogin(namebuf) \ __sanitizer_syscall_pre_impl___setlogin((long long)(namebuf)) #define __sanitizer_syscall_post___setlogin(res, namebuf) \ __sanitizer_syscall_post_impl___setlogin(res, (long long)(namebuf)) #define __sanitizer_syscall_pre_acct(path) \ __sanitizer_syscall_pre_impl_acct((long long)(path)) #define __sanitizer_syscall_post_acct(res, path) \ __sanitizer_syscall_post_impl_acct(res, (long long)(path)) #define __sanitizer_syscall_pre_compat_13_sigpending13() \ __sanitizer_syscall_pre_impl_compat_13_sigpending13() #define __sanitizer_syscall_post_compat_13_sigpending13(res) \ __sanitizer_syscall_post_impl_compat_13_sigpending13(res) #define __sanitizer_syscall_pre_compat_13_sigaltstack13(nss, oss) \ __sanitizer_syscall_pre_impl_compat_13_sigaltstack13((long long)(nss), \ (long long)(oss)) #define __sanitizer_syscall_post_compat_13_sigaltstack13(res, nss, oss) \ __sanitizer_syscall_post_impl_compat_13_sigaltstack13(res, (long long)(nss), \ (long long)(oss)) #define __sanitizer_syscall_pre_ioctl(fd, com, data) \ __sanitizer_syscall_pre_impl_ioctl((long long)(fd), (long long)(com), \ (long long)(data)) #define __sanitizer_syscall_post_ioctl(res, fd, com, data) \ __sanitizer_syscall_post_impl_ioctl(res, (long long)(fd), (long long)(com), \ (long long)(data)) #define __sanitizer_syscall_pre_compat_12_oreboot(opt) \ __sanitizer_syscall_pre_impl_compat_12_oreboot((long long)(opt)) #define __sanitizer_syscall_post_compat_12_oreboot(res, opt) \ __sanitizer_syscall_post_impl_compat_12_oreboot(res, (long long)(opt)) #define __sanitizer_syscall_pre_revoke(path) \ __sanitizer_syscall_pre_impl_revoke((long long)(path)) #define __sanitizer_syscall_post_revoke(res, path) \ __sanitizer_syscall_post_impl_revoke(res, (long long)(path)) #define __sanitizer_syscall_pre_symlink(path, link) \ __sanitizer_syscall_pre_impl_symlink((long long)(path), (long long)(link)) #define __sanitizer_syscall_post_symlink(res, path, link) \ __sanitizer_syscall_post_impl_symlink(res, (long long)(path), \ (long long)(link)) #define __sanitizer_syscall_pre_readlink(path, buf, count) \ __sanitizer_syscall_pre_impl_readlink((long long)(path), (long long)(buf), \ (long long)(count)) #define __sanitizer_syscall_post_readlink(res, path, buf, count) \ __sanitizer_syscall_post_impl_readlink(res, (long long)(path), \ (long long)(buf), (long long)(count)) #define __sanitizer_syscall_pre_execve(path, argp, envp) \ __sanitizer_syscall_pre_impl_execve((long long)(path), (long long)(argp), \ (long long)(envp)) #define __sanitizer_syscall_post_execve(res, path, argp, envp) \ __sanitizer_syscall_post_impl_execve(res, (long long)(path), \ (long long)(argp), (long long)(envp)) #define __sanitizer_syscall_pre_umask(newmask) \ __sanitizer_syscall_pre_impl_umask((long long)(newmask)) #define __sanitizer_syscall_post_umask(res, newmask) \ __sanitizer_syscall_post_impl_umask(res, (long long)(newmask)) #define __sanitizer_syscall_pre_chroot(path) \ __sanitizer_syscall_pre_impl_chroot((long long)(path)) #define __sanitizer_syscall_post_chroot(res, path) \ __sanitizer_syscall_post_impl_chroot(res, (long long)(path)) #define __sanitizer_syscall_pre_compat_43_fstat43(fd, sb) \ __sanitizer_syscall_pre_impl_compat_43_fstat43((long long)(fd), \ (long long)(sb)) #define __sanitizer_syscall_post_compat_43_fstat43(res, fd, sb) \ __sanitizer_syscall_post_impl_compat_43_fstat43(res, (long long)(fd), \ (long long)(sb)) #define __sanitizer_syscall_pre_compat_43_ogetkerninfo(op, where, size, arg) \ __sanitizer_syscall_pre_impl_compat_43_ogetkerninfo( \ (long long)(op), (long long)(where), (long long)(size), \ (long long)(arg)) #define __sanitizer_syscall_post_compat_43_ogetkerninfo(res, op, where, size, \ arg) \ __sanitizer_syscall_post_impl_compat_43_ogetkerninfo( \ res, (long long)(op), (long long)(where), (long long)(size), \ (long long)(arg)) #define __sanitizer_syscall_pre_compat_43_ogetpagesize() \ __sanitizer_syscall_pre_impl_compat_43_ogetpagesize() #define __sanitizer_syscall_post_compat_43_ogetpagesize(res) \ __sanitizer_syscall_post_impl_compat_43_ogetpagesize(res) #define __sanitizer_syscall_pre_compat_12_msync(addr, len) \ __sanitizer_syscall_pre_impl_compat_12_msync((long long)(addr), \ (long long)(len)) #define __sanitizer_syscall_post_compat_12_msync(res, addr, len) \ __sanitizer_syscall_post_impl_compat_12_msync(res, (long long)(addr), \ (long long)(len)) #define __sanitizer_syscall_pre_vfork() __sanitizer_syscall_pre_impl_vfork() #define __sanitizer_syscall_post_vfork(res) \ __sanitizer_syscall_post_impl_vfork(res) /* syscall 67 has been skipped */ /* syscall 68 has been skipped */ /* syscall 69 has been skipped */ /* syscall 70 has been skipped */ #define __sanitizer_syscall_pre_compat_43_ommap(addr, len, prot, flags, fd, \ pos) \ __sanitizer_syscall_pre_impl_compat_43_ommap( \ (long long)(addr), (long long)(len), (long long)(prot), \ (long long)(flags), (long long)(fd), (long long)(pos)) #define __sanitizer_syscall_post_compat_43_ommap(res, addr, len, prot, flags, \ fd, pos) \ __sanitizer_syscall_post_impl_compat_43_ommap( \ res, (long long)(addr), (long long)(len), (long long)(prot), \ (long long)(flags), (long long)(fd), (long long)(pos)) #define __sanitizer_syscall_pre_vadvise(anom) \ __sanitizer_syscall_pre_impl_vadvise((long long)(anom)) #define __sanitizer_syscall_post_vadvise(res, anom) \ __sanitizer_syscall_post_impl_vadvise(res, (long long)(anom)) #define __sanitizer_syscall_pre_munmap(addr, len) \ __sanitizer_syscall_pre_impl_munmap((long long)(addr), (long long)(len)) #define __sanitizer_syscall_post_munmap(res, addr, len) \ __sanitizer_syscall_post_impl_munmap(res, (long long)(addr), (long long)(len)) #define __sanitizer_syscall_pre_mprotect(addr, len, prot) \ __sanitizer_syscall_pre_impl_mprotect((long long)(addr), (long long)(len), \ (long long)(prot)) #define __sanitizer_syscall_post_mprotect(res, addr, len, prot) \ __sanitizer_syscall_post_impl_mprotect(res, (long long)(addr), \ (long long)(len), (long long)(prot)) #define __sanitizer_syscall_pre_madvise(addr, len, behav) \ __sanitizer_syscall_pre_impl_madvise((long long)(addr), (long long)(len), \ (long long)(behav)) #define __sanitizer_syscall_post_madvise(res, addr, len, behav) \ __sanitizer_syscall_post_impl_madvise(res, (long long)(addr), \ (long long)(len), (long long)(behav)) /* syscall 76 has been skipped */ /* syscall 77 has been skipped */ #define __sanitizer_syscall_pre_mincore(addr, len, vec) \ __sanitizer_syscall_pre_impl_mincore((long long)(addr), (long long)(len), \ (long long)(vec)) #define __sanitizer_syscall_post_mincore(res, addr, len, vec) \ __sanitizer_syscall_post_impl_mincore(res, (long long)(addr), \ (long long)(len), (long long)(vec)) #define __sanitizer_syscall_pre_getgroups(gidsetsize, gidset) \ __sanitizer_syscall_pre_impl_getgroups((long long)(gidsetsize), \ (long long)(gidset)) #define __sanitizer_syscall_post_getgroups(res, gidsetsize, gidset) \ __sanitizer_syscall_post_impl_getgroups(res, (long long)(gidsetsize), \ (long long)(gidset)) #define __sanitizer_syscall_pre_setgroups(gidsetsize, gidset) \ __sanitizer_syscall_pre_impl_setgroups((long long)(gidsetsize), \ (long long)(gidset)) #define __sanitizer_syscall_post_setgroups(res, gidsetsize, gidset) \ __sanitizer_syscall_post_impl_setgroups(res, (long long)(gidsetsize), \ (long long)(gidset)) #define __sanitizer_syscall_pre_getpgrp() __sanitizer_syscall_pre_impl_getpgrp() #define __sanitizer_syscall_post_getpgrp(res) \ __sanitizer_syscall_post_impl_getpgrp(res) #define __sanitizer_syscall_pre_setpgid(pid, pgid) \ __sanitizer_syscall_pre_impl_setpgid((long long)(pid), (long long)(pgid)) #define __sanitizer_syscall_post_setpgid(res, pid, pgid) \ __sanitizer_syscall_post_impl_setpgid(res, (long long)(pid), \ (long long)(pgid)) #define __sanitizer_syscall_pre_compat_50_setitimer(which, itv, oitv) \ __sanitizer_syscall_pre_impl_compat_50_setitimer( \ (long long)(which), (long long)(itv), (long long)(oitv)) #define __sanitizer_syscall_post_compat_50_setitimer(res, which, itv, oitv) \ __sanitizer_syscall_post_impl_compat_50_setitimer( \ res, (long long)(which), (long long)(itv), (long long)(oitv)) #define __sanitizer_syscall_pre_compat_43_owait() \ __sanitizer_syscall_pre_impl_compat_43_owait() #define __sanitizer_syscall_post_compat_43_owait(res) \ __sanitizer_syscall_post_impl_compat_43_owait(res) #define __sanitizer_syscall_pre_compat_12_oswapon(name) \ __sanitizer_syscall_pre_impl_compat_12_oswapon((long long)(name)) #define __sanitizer_syscall_post_compat_12_oswapon(res, name) \ __sanitizer_syscall_post_impl_compat_12_oswapon(res, (long long)(name)) #define __sanitizer_syscall_pre_compat_50_getitimer(which, itv) \ __sanitizer_syscall_pre_impl_compat_50_getitimer((long long)(which), \ (long long)(itv)) #define __sanitizer_syscall_post_compat_50_getitimer(res, which, itv) \ __sanitizer_syscall_post_impl_compat_50_getitimer(res, (long long)(which), \ (long long)(itv)) #define __sanitizer_syscall_pre_compat_43_ogethostname(hostname, len) \ __sanitizer_syscall_pre_impl_compat_43_ogethostname((long long)(hostname), \ (long long)(len)) #define __sanitizer_syscall_post_compat_43_ogethostname(res, hostname, len) \ __sanitizer_syscall_post_impl_compat_43_ogethostname( \ res, (long long)(hostname), (long long)(len)) #define __sanitizer_syscall_pre_compat_43_osethostname(hostname, len) \ __sanitizer_syscall_pre_impl_compat_43_osethostname((long long)(hostname), \ (long long)(len)) #define __sanitizer_syscall_post_compat_43_osethostname(res, hostname, len) \ __sanitizer_syscall_post_impl_compat_43_osethostname( \ res, (long long)(hostname), (long long)(len)) #define __sanitizer_syscall_pre_compat_43_ogetdtablesize() \ __sanitizer_syscall_pre_impl_compat_43_ogetdtablesize() #define __sanitizer_syscall_post_compat_43_ogetdtablesize(res) \ __sanitizer_syscall_post_impl_compat_43_ogetdtablesize(res) #define __sanitizer_syscall_pre_dup2(from, to) \ __sanitizer_syscall_pre_impl_dup2((long long)(from), (long long)(to)) #define __sanitizer_syscall_post_dup2(res, from, to) \ __sanitizer_syscall_post_impl_dup2(res, (long long)(from), (long long)(to)) #define __sanitizer_syscall_pre_getrandom(buf, buflen, flags) \ __sanitizer_syscall_pre_impl_getrandom( \ (long long)(buf), (long long)(buflen), (long long)(flags)) #define __sanitizer_syscall_post_getrandom(res, buf, buflen, flags) \ __sanitizer_syscall_post_impl_getrandom( \ res, (long long)(buf), (long long)(buflen), (long long)(flags)) #define __sanitizer_syscall_pre_fcntl(fd, cmd, arg) \ __sanitizer_syscall_pre_impl_fcntl((long long)(fd), (long long)(cmd), \ (long long)(arg)) #define __sanitizer_syscall_post_fcntl(res, fd, cmd, arg) \ __sanitizer_syscall_post_impl_fcntl(res, (long long)(fd), (long long)(cmd), \ (long long)(arg)) #define __sanitizer_syscall_pre_compat_50_select(nd, in, ou, ex, tv) \ __sanitizer_syscall_pre_impl_compat_50_select( \ (long long)(nd), (long long)(in), (long long)(ou), (long long)(ex), \ (long long)(tv)) #define __sanitizer_syscall_post_compat_50_select(res, nd, in, ou, ex, tv) \ __sanitizer_syscall_post_impl_compat_50_select( \ res, (long long)(nd), (long long)(in), (long long)(ou), (long long)(ex), \ (long long)(tv)) /* syscall 94 has been skipped */ #define __sanitizer_syscall_pre_fsync(fd) \ __sanitizer_syscall_pre_impl_fsync((long long)(fd)) #define __sanitizer_syscall_post_fsync(res, fd) \ __sanitizer_syscall_post_impl_fsync(res, (long long)(fd)) #define __sanitizer_syscall_pre_setpriority(which, who, prio) \ __sanitizer_syscall_pre_impl_setpriority( \ (long long)(which), (long long)(who), (long long)(prio)) #define __sanitizer_syscall_post_setpriority(res, which, who, prio) \ __sanitizer_syscall_post_impl_setpriority( \ res, (long long)(which), (long long)(who), (long long)(prio)) #define __sanitizer_syscall_pre_compat_30_socket(domain, type, protocol) \ __sanitizer_syscall_pre_impl_compat_30_socket( \ (long long)(domain), (long long)(type), (long long)(protocol)) #define __sanitizer_syscall_post_compat_30_socket(res, domain, type, protocol) \ __sanitizer_syscall_post_impl_compat_30_socket( \ res, (long long)(domain), (long long)(type), (long long)(protocol)) #define __sanitizer_syscall_pre_connect(s, name, namelen) \ __sanitizer_syscall_pre_impl_connect((long long)(s), (long long)(name), \ (long long)(namelen)) #define __sanitizer_syscall_post_connect(res, s, name, namelen) \ __sanitizer_syscall_post_impl_connect( \ res, (long long)(s), (long long)(name), (long long)(namelen)) #define __sanitizer_syscall_pre_compat_43_oaccept(s, name, anamelen) \ __sanitizer_syscall_pre_impl_compat_43_oaccept( \ (long long)(s), (long long)(name), (long long)(anamelen)) #define __sanitizer_syscall_post_compat_43_oaccept(res, s, name, anamelen) \ __sanitizer_syscall_post_impl_compat_43_oaccept( \ res, (long long)(s), (long long)(name), (long long)(anamelen)) #define __sanitizer_syscall_pre_getpriority(which, who) \ __sanitizer_syscall_pre_impl_getpriority((long long)(which), (long long)(who)) #define __sanitizer_syscall_post_getpriority(res, which, who) \ __sanitizer_syscall_post_impl_getpriority(res, (long long)(which), \ (long long)(who)) #define __sanitizer_syscall_pre_compat_43_osend(s, buf, len, flags) \ __sanitizer_syscall_pre_impl_compat_43_osend( \ (long long)(s), (long long)(buf), (long long)(len), (long long)(flags)) #define __sanitizer_syscall_post_compat_43_osend(res, s, buf, len, flags) \ __sanitizer_syscall_post_impl_compat_43_osend( \ res, (long long)(s), (long long)(buf), (long long)(len), \ (long long)(flags)) #define __sanitizer_syscall_pre_compat_43_orecv(s, buf, len, flags) \ __sanitizer_syscall_pre_impl_compat_43_orecv( \ (long long)(s), (long long)(buf), (long long)(len), (long long)(flags)) #define __sanitizer_syscall_post_compat_43_orecv(res, s, buf, len, flags) \ __sanitizer_syscall_post_impl_compat_43_orecv( \ res, (long long)(s), (long long)(buf), (long long)(len), \ (long long)(flags)) #define __sanitizer_syscall_pre_compat_13_sigreturn13(sigcntxp) \ __sanitizer_syscall_pre_impl_compat_13_sigreturn13((long long)(sigcntxp)) #define __sanitizer_syscall_post_compat_13_sigreturn13(res, sigcntxp) \ __sanitizer_syscall_post_impl_compat_13_sigreturn13(res, \ (long long)(sigcntxp)) #define __sanitizer_syscall_pre_bind(s, name, namelen) \ __sanitizer_syscall_pre_impl_bind((long long)(s), (long long)(name), \ (long long)(namelen)) #define __sanitizer_syscall_post_bind(res, s, name, namelen) \ __sanitizer_syscall_post_impl_bind(res, (long long)(s), (long long)(name), \ (long long)(namelen)) #define __sanitizer_syscall_pre_setsockopt(s, level, name, val, valsize) \ __sanitizer_syscall_pre_impl_setsockopt((long long)(s), (long long)(level), \ (long long)(name), (long long)(val), \ (long long)(valsize)) #define __sanitizer_syscall_post_setsockopt(res, s, level, name, val, valsize) \ __sanitizer_syscall_post_impl_setsockopt( \ res, (long long)(s), (long long)(level), (long long)(name), \ (long long)(val), (long long)(valsize)) #define __sanitizer_syscall_pre_listen(s, backlog) \ __sanitizer_syscall_pre_impl_listen((long long)(s), (long long)(backlog)) #define __sanitizer_syscall_post_listen(res, s, backlog) \ __sanitizer_syscall_post_impl_listen(res, (long long)(s), \ (long long)(backlog)) /* syscall 107 has been skipped */ #define __sanitizer_syscall_pre_compat_43_osigvec(signum, nsv, osv) \ __sanitizer_syscall_pre_impl_compat_43_osigvec( \ (long long)(signum), (long long)(nsv), (long long)(osv)) #define __sanitizer_syscall_post_compat_43_osigvec(res, signum, nsv, osv) \ __sanitizer_syscall_post_impl_compat_43_osigvec( \ res, (long long)(signum), (long long)(nsv), (long long)(osv)) #define __sanitizer_syscall_pre_compat_43_osigblock(mask) \ __sanitizer_syscall_pre_impl_compat_43_osigblock((long long)(mask)) #define __sanitizer_syscall_post_compat_43_osigblock(res, mask) \ __sanitizer_syscall_post_impl_compat_43_osigblock(res, (long long)(mask)) #define __sanitizer_syscall_pre_compat_43_osigsetmask(mask) \ __sanitizer_syscall_pre_impl_compat_43_osigsetmask((long long)(mask)) #define __sanitizer_syscall_post_compat_43_osigsetmask(res, mask) \ __sanitizer_syscall_post_impl_compat_43_osigsetmask(res, (long long)(mask)) #define __sanitizer_syscall_pre_compat_13_sigsuspend13(mask) \ __sanitizer_syscall_pre_impl_compat_13_sigsuspend13((long long)(mask)) #define __sanitizer_syscall_post_compat_13_sigsuspend13(res, mask) \ __sanitizer_syscall_post_impl_compat_13_sigsuspend13(res, (long long)(mask)) #define __sanitizer_syscall_pre_compat_43_osigstack(nss, oss) \ __sanitizer_syscall_pre_impl_compat_43_osigstack((long long)(nss), \ (long long)(oss)) #define __sanitizer_syscall_post_compat_43_osigstack(res, nss, oss) \ __sanitizer_syscall_post_impl_compat_43_osigstack(res, (long long)(nss), \ (long long)(oss)) #define __sanitizer_syscall_pre_compat_43_orecvmsg(s, msg, flags) \ __sanitizer_syscall_pre_impl_compat_43_orecvmsg( \ (long long)(s), (long long)(msg), (long long)(flags)) #define __sanitizer_syscall_post_compat_43_orecvmsg(res, s, msg, flags) \ __sanitizer_syscall_post_impl_compat_43_orecvmsg( \ res, (long long)(s), (long long)(msg), (long long)(flags)) #define __sanitizer_syscall_pre_compat_43_osendmsg(s, msg, flags) \ __sanitizer_syscall_pre_impl_compat_43_osendmsg( \ (long long)(s), (long long)(msg), (long long)(flags)) #define __sanitizer_syscall_post_compat_43_osendmsg(res, s, msg, flags) \ __sanitizer_syscall_post_impl_compat_43_osendmsg( \ res, (long long)(s), (long long)(msg), (long long)(flags)) /* syscall 115 has been skipped */ #define __sanitizer_syscall_pre_compat_50_gettimeofday(tp, tzp) \ __sanitizer_syscall_pre_impl_compat_50_gettimeofday((long long)(tp), \ (long long)(tzp)) #define __sanitizer_syscall_post_compat_50_gettimeofday(res, tp, tzp) \ __sanitizer_syscall_post_impl_compat_50_gettimeofday(res, (long long)(tp), \ (long long)(tzp)) #define __sanitizer_syscall_pre_compat_50_getrusage(who, rusage) \ __sanitizer_syscall_pre_impl_compat_50_getrusage((long long)(who), \ (long long)(rusage)) #define __sanitizer_syscall_post_compat_50_getrusage(res, who, rusage) \ __sanitizer_syscall_post_impl_compat_50_getrusage(res, (long long)(who), \ (long long)(rusage)) #define __sanitizer_syscall_pre_getsockopt(s, level, name, val, avalsize) \ __sanitizer_syscall_pre_impl_getsockopt((long long)(s), (long long)(level), \ (long long)(name), (long long)(val), \ (long long)(avalsize)) #define __sanitizer_syscall_post_getsockopt(res, s, level, name, val, \ avalsize) \ __sanitizer_syscall_post_impl_getsockopt( \ res, (long long)(s), (long long)(level), (long long)(name), \ (long long)(val), (long long)(avalsize)) /* syscall 119 has been skipped */ #define __sanitizer_syscall_pre_readv(fd, iovp, iovcnt) \ __sanitizer_syscall_pre_impl_readv((long long)(fd), (long long)(iovp), \ (long long)(iovcnt)) #define __sanitizer_syscall_post_readv(res, fd, iovp, iovcnt) \ __sanitizer_syscall_post_impl_readv(res, (long long)(fd), (long long)(iovp), \ (long long)(iovcnt)) #define __sanitizer_syscall_pre_writev(fd, iovp, iovcnt) \ __sanitizer_syscall_pre_impl_writev((long long)(fd), (long long)(iovp), \ (long long)(iovcnt)) #define __sanitizer_syscall_post_writev(res, fd, iovp, iovcnt) \ __sanitizer_syscall_post_impl_writev(res, (long long)(fd), \ (long long)(iovp), (long long)(iovcnt)) #define __sanitizer_syscall_pre_compat_50_settimeofday(tv, tzp) \ __sanitizer_syscall_pre_impl_compat_50_settimeofday((long long)(tv), \ (long long)(tzp)) #define __sanitizer_syscall_post_compat_50_settimeofday(res, tv, tzp) \ __sanitizer_syscall_post_impl_compat_50_settimeofday(res, (long long)(tv), \ (long long)(tzp)) #define __sanitizer_syscall_pre_fchown(fd, uid, gid) \ __sanitizer_syscall_pre_impl_fchown((long long)(fd), (long long)(uid), \ (long long)(gid)) #define __sanitizer_syscall_post_fchown(res, fd, uid, gid) \ __sanitizer_syscall_post_impl_fchown(res, (long long)(fd), (long long)(uid), \ (long long)(gid)) #define __sanitizer_syscall_pre_fchmod(fd, mode) \ __sanitizer_syscall_pre_impl_fchmod((long long)(fd), (long long)(mode)) #define __sanitizer_syscall_post_fchmod(res, fd, mode) \ __sanitizer_syscall_post_impl_fchmod(res, (long long)(fd), (long long)(mode)) #define __sanitizer_syscall_pre_compat_43_orecvfrom(s, buf, len, flags, from, \ fromlenaddr) \ __sanitizer_syscall_pre_impl_compat_43_orecvfrom( \ (long long)(s), (long long)(buf), (long long)(len), (long long)(flags), \ (long long)(from), (long long)(fromlenaddr)) #define __sanitizer_syscall_post_compat_43_orecvfrom(res, s, buf, len, flags, \ from, fromlenaddr) \ __sanitizer_syscall_post_impl_compat_43_orecvfrom( \ res, (long long)(s), (long long)(buf), (long long)(len), \ (long long)(flags), (long long)(from), (long long)(fromlenaddr)) #define __sanitizer_syscall_pre_setreuid(ruid, euid) \ __sanitizer_syscall_pre_impl_setreuid((long long)(ruid), (long long)(euid)) #define __sanitizer_syscall_post_setreuid(res, ruid, euid) \ __sanitizer_syscall_post_impl_setreuid(res, (long long)(ruid), \ (long long)(euid)) #define __sanitizer_syscall_pre_setregid(rgid, egid) \ __sanitizer_syscall_pre_impl_setregid((long long)(rgid), (long long)(egid)) #define __sanitizer_syscall_post_setregid(res, rgid, egid) \ __sanitizer_syscall_post_impl_setregid(res, (long long)(rgid), \ (long long)(egid)) #define __sanitizer_syscall_pre_rename(from, to) \ __sanitizer_syscall_pre_impl_rename((long long)(from), (long long)(to)) #define __sanitizer_syscall_post_rename(res, from, to) \ __sanitizer_syscall_post_impl_rename(res, (long long)(from), (long long)(to)) #define __sanitizer_syscall_pre_compat_43_otruncate(path, length) \ __sanitizer_syscall_pre_impl_compat_43_otruncate((long long)(path), \ (long long)(length)) #define __sanitizer_syscall_post_compat_43_otruncate(res, path, length) \ __sanitizer_syscall_post_impl_compat_43_otruncate(res, (long long)(path), \ (long long)(length)) #define __sanitizer_syscall_pre_compat_43_oftruncate(fd, length) \ __sanitizer_syscall_pre_impl_compat_43_oftruncate((long long)(fd), \ (long long)(length)) #define __sanitizer_syscall_post_compat_43_oftruncate(res, fd, length) \ __sanitizer_syscall_post_impl_compat_43_oftruncate(res, (long long)(fd), \ (long long)(length)) #define __sanitizer_syscall_pre_flock(fd, how) \ __sanitizer_syscall_pre_impl_flock((long long)(fd), (long long)(how)) #define __sanitizer_syscall_post_flock(res, fd, how) \ __sanitizer_syscall_post_impl_flock(res, (long long)(fd), (long long)(how)) #define __sanitizer_syscall_pre_mkfifo(path, mode) \ __sanitizer_syscall_pre_impl_mkfifo((long long)(path), (long long)(mode)) #define __sanitizer_syscall_post_mkfifo(res, path, mode) \ __sanitizer_syscall_post_impl_mkfifo(res, (long long)(path), \ (long long)(mode)) #define __sanitizer_syscall_pre_sendto(s, buf, len, flags, to, tolen) \ __sanitizer_syscall_pre_impl_sendto((long long)(s), (long long)(buf), \ (long long)(len), (long long)(flags), \ (long long)(to), (long long)(tolen)) #define __sanitizer_syscall_post_sendto(res, s, buf, len, flags, to, tolen) \ __sanitizer_syscall_post_impl_sendto(res, (long long)(s), (long long)(buf), \ (long long)(len), (long long)(flags), \ (long long)(to), (long long)(tolen)) #define __sanitizer_syscall_pre_shutdown(s, how) \ __sanitizer_syscall_pre_impl_shutdown((long long)(s), (long long)(how)) #define __sanitizer_syscall_post_shutdown(res, s, how) \ __sanitizer_syscall_post_impl_shutdown(res, (long long)(s), (long long)(how)) #define __sanitizer_syscall_pre_socketpair(domain, type, protocol, rsv) \ __sanitizer_syscall_pre_impl_socketpair( \ (long long)(domain), (long long)(type), (long long)(protocol), \ (long long)(rsv)) #define __sanitizer_syscall_post_socketpair(res, domain, type, protocol, rsv) \ __sanitizer_syscall_post_impl_socketpair( \ res, (long long)(domain), (long long)(type), (long long)(protocol), \ (long long)(rsv)) #define __sanitizer_syscall_pre_mkdir(path, mode) \ __sanitizer_syscall_pre_impl_mkdir((long long)(path), (long long)(mode)) #define __sanitizer_syscall_post_mkdir(res, path, mode) \ __sanitizer_syscall_post_impl_mkdir(res, (long long)(path), (long long)(mode)) #define __sanitizer_syscall_pre_rmdir(path) \ __sanitizer_syscall_pre_impl_rmdir((long long)(path)) #define __sanitizer_syscall_post_rmdir(res, path) \ __sanitizer_syscall_post_impl_rmdir(res, (long long)(path)) #define __sanitizer_syscall_pre_compat_50_utimes(path, tptr) \ __sanitizer_syscall_pre_impl_compat_50_utimes((long long)(path), \ (long long)(tptr)) #define __sanitizer_syscall_post_compat_50_utimes(res, path, tptr) \ __sanitizer_syscall_post_impl_compat_50_utimes(res, (long long)(path), \ (long long)(tptr)) /* syscall 139 has been skipped */ #define __sanitizer_syscall_pre_compat_50_adjtime(delta, olddelta) \ __sanitizer_syscall_pre_impl_compat_50_adjtime((long long)(delta), \ (long long)(olddelta)) #define __sanitizer_syscall_post_compat_50_adjtime(res, delta, olddelta) \ __sanitizer_syscall_post_impl_compat_50_adjtime(res, (long long)(delta), \ (long long)(olddelta)) #define __sanitizer_syscall_pre_compat_43_ogetpeername(fdes, asa, alen) \ __sanitizer_syscall_pre_impl_compat_43_ogetpeername( \ (long long)(fdes), (long long)(asa), (long long)(alen)) #define __sanitizer_syscall_post_compat_43_ogetpeername(res, fdes, asa, alen) \ __sanitizer_syscall_post_impl_compat_43_ogetpeername( \ res, (long long)(fdes), (long long)(asa), (long long)(alen)) #define __sanitizer_syscall_pre_compat_43_ogethostid() \ __sanitizer_syscall_pre_impl_compat_43_ogethostid() #define __sanitizer_syscall_post_compat_43_ogethostid(res) \ __sanitizer_syscall_post_impl_compat_43_ogethostid(res) #define __sanitizer_syscall_pre_compat_43_osethostid(hostid) \ __sanitizer_syscall_pre_impl_compat_43_osethostid((long long)(hostid)) #define __sanitizer_syscall_post_compat_43_osethostid(res, hostid) \ __sanitizer_syscall_post_impl_compat_43_osethostid(res, (long long)(hostid)) #define __sanitizer_syscall_pre_compat_43_ogetrlimit(which, rlp) \ __sanitizer_syscall_pre_impl_compat_43_ogetrlimit((long long)(which), \ (long long)(rlp)) #define __sanitizer_syscall_post_compat_43_ogetrlimit(res, which, rlp) \ __sanitizer_syscall_post_impl_compat_43_ogetrlimit(res, (long long)(which), \ (long long)(rlp)) #define __sanitizer_syscall_pre_compat_43_osetrlimit(which, rlp) \ __sanitizer_syscall_pre_impl_compat_43_osetrlimit((long long)(which), \ (long long)(rlp)) #define __sanitizer_syscall_post_compat_43_osetrlimit(res, which, rlp) \ __sanitizer_syscall_post_impl_compat_43_osetrlimit(res, (long long)(which), \ (long long)(rlp)) #define __sanitizer_syscall_pre_compat_43_okillpg(pgid, signum) \ __sanitizer_syscall_pre_impl_compat_43_okillpg((long long)(pgid), \ (long long)(signum)) #define __sanitizer_syscall_post_compat_43_okillpg(res, pgid, signum) \ __sanitizer_syscall_post_impl_compat_43_okillpg(res, (long long)(pgid), \ (long long)(signum)) #define __sanitizer_syscall_pre_setsid() __sanitizer_syscall_pre_impl_setsid() #define __sanitizer_syscall_post_setsid(res) \ __sanitizer_syscall_post_impl_setsid(res) #define __sanitizer_syscall_pre_compat_50_quotactl(path, cmd, uid, arg) \ __sanitizer_syscall_pre_impl_compat_50_quotactl( \ (long long)(path), (long long)(cmd), (long long)(uid), (long long)(arg)) #define __sanitizer_syscall_post_compat_50_quotactl(res, path, cmd, uid, arg) \ __sanitizer_syscall_post_impl_compat_50_quotactl( \ res, (long long)(path), (long long)(cmd), (long long)(uid), \ (long long)(arg)) #define __sanitizer_syscall_pre_compat_43_oquota() \ __sanitizer_syscall_pre_impl_compat_43_oquota() #define __sanitizer_syscall_post_compat_43_oquota(res) \ __sanitizer_syscall_post_impl_compat_43_oquota(res) #define __sanitizer_syscall_pre_compat_43_ogetsockname(fdec, asa, alen) \ __sanitizer_syscall_pre_impl_compat_43_ogetsockname( \ (long long)(fdec), (long long)(asa), (long long)(alen)) #define __sanitizer_syscall_post_compat_43_ogetsockname(res, fdec, asa, alen) \ __sanitizer_syscall_post_impl_compat_43_ogetsockname( \ res, (long long)(fdec), (long long)(asa), (long long)(alen)) /* syscall 151 has been skipped */ /* syscall 152 has been skipped */ /* syscall 153 has been skipped */ /* syscall 154 has been skipped */ #define __sanitizer_syscall_pre_nfssvc(flag, argp) \ __sanitizer_syscall_pre_impl_nfssvc((long long)(flag), (long long)(argp)) #define __sanitizer_syscall_post_nfssvc(res, flag, argp) \ __sanitizer_syscall_post_impl_nfssvc(res, (long long)(flag), \ (long long)(argp)) #define __sanitizer_syscall_pre_compat_43_ogetdirentries(fd, buf, count, \ basep) \ __sanitizer_syscall_pre_impl_compat_43_ogetdirentries( \ (long long)(fd), (long long)(buf), (long long)(count), \ (long long)(basep)) #define __sanitizer_syscall_post_compat_43_ogetdirentries(res, fd, buf, count, \ basep) \ __sanitizer_syscall_post_impl_compat_43_ogetdirentries( \ res, (long long)(fd), (long long)(buf), (long long)(count), \ (long long)(basep)) #define __sanitizer_syscall_pre_compat_20_statfs(path, buf) \ __sanitizer_syscall_pre_impl_compat_20_statfs((long long)(path), \ (long long)(buf)) #define __sanitizer_syscall_post_compat_20_statfs(res, path, buf) \ __sanitizer_syscall_post_impl_compat_20_statfs(res, (long long)(path), \ (long long)(buf)) #define __sanitizer_syscall_pre_compat_20_fstatfs(fd, buf) \ __sanitizer_syscall_pre_impl_compat_20_fstatfs((long long)(fd), \ (long long)(buf)) #define __sanitizer_syscall_post_compat_20_fstatfs(res, fd, buf) \ __sanitizer_syscall_post_impl_compat_20_fstatfs(res, (long long)(fd), \ (long long)(buf)) /* syscall 159 has been skipped */ /* syscall 160 has been skipped */ #define __sanitizer_syscall_pre_compat_30_getfh(fname, fhp) \ __sanitizer_syscall_pre_impl_compat_30_getfh((long long)(fname), \ (long long)(fhp)) #define __sanitizer_syscall_post_compat_30_getfh(res, fname, fhp) \ __sanitizer_syscall_post_impl_compat_30_getfh(res, (long long)(fname), \ (long long)(fhp)) #define __sanitizer_syscall_pre_compat_09_ogetdomainname(domainname, len) \ __sanitizer_syscall_pre_impl_compat_09_ogetdomainname( \ (long long)(domainname), (long long)(len)) #define __sanitizer_syscall_post_compat_09_ogetdomainname(res, domainname, \ len) \ __sanitizer_syscall_post_impl_compat_09_ogetdomainname( \ res, (long long)(domainname), (long long)(len)) #define __sanitizer_syscall_pre_compat_09_osetdomainname(domainname, len) \ __sanitizer_syscall_pre_impl_compat_09_osetdomainname( \ (long long)(domainname), (long long)(len)) #define __sanitizer_syscall_post_compat_09_osetdomainname(res, domainname, \ len) \ __sanitizer_syscall_post_impl_compat_09_osetdomainname( \ res, (long long)(domainname), (long long)(len)) #define __sanitizer_syscall_pre_compat_09_ouname(name) \ __sanitizer_syscall_pre_impl_compat_09_ouname((long long)(name)) #define __sanitizer_syscall_post_compat_09_ouname(res, name) \ __sanitizer_syscall_post_impl_compat_09_ouname(res, (long long)(name)) #define __sanitizer_syscall_pre_sysarch(op, parms) \ __sanitizer_syscall_pre_impl_sysarch((long long)(op), (long long)(parms)) #define __sanitizer_syscall_post_sysarch(res, op, parms) \ __sanitizer_syscall_post_impl_sysarch(res, (long long)(op), \ (long long)(parms)) #define __sanitizer_syscall_pre___futex(uaddr, op, val, timeout, uaddr2, val2, \ val3) \ __sanitizer_syscall_pre_impl___futex((long long)(uaddr), (long long)(op), \ (long long)(val), (long long)(timeout), \ (long long)(uaddr2), (long long)(val2), \ (long long)(val3)) #define __sanitizer_syscall_post___futex(res, uaddr, op, val, timeout, uaddr2, \ val2, val3) \ __sanitizer_syscall_post_impl___futex( \ res, (long long)(uaddr), (long long)(op), (long long)(val), \ (long long)(timeout), (long long)(uaddr2), (long long)(val2), \ (long long)(val3)) #define __sanitizer_syscall_pre___futex_set_robust_list(head, len) \ __sanitizer_syscall_pre_impl___futex_set_robust_list((long long)(head), \ (long long)(len)) #define __sanitizer_syscall_post___futex_set_robust_list(res, head, len) \ __sanitizer_syscall_post_impl___futex_set_robust_list( \ res, (long long)(head), (long long)(len)) #define __sanitizer_syscall_pre___futex_get_robust_list(lwpid, headp, lenp) \ __sanitizer_syscall_pre_impl___futex_get_robust_list( \ (long long)(lwpid), (long long)(headp), (long long)(lenp)) #define __sanitizer_syscall_post___futex_get_robust_list(res, lwpid, headp, \ lenp) \ __sanitizer_syscall_post_impl___futex_get_robust_list( \ res, (long long)(lwpid), (long long)(headp), (long long)(lenp)) #if !defined(_LP64) #define __sanitizer_syscall_pre_compat_10_osemsys(which, a2, a3, a4, a5) \ __sanitizer_syscall_pre_impl_compat_10_osemsys( \ (long long)(which), (long long)(a2), (long long)(a3), (long long)(a4), \ (long long)(a5)) #define __sanitizer_syscall_post_compat_10_osemsys(res, which, a2, a3, a4, a5) \ __sanitizer_syscall_post_impl_compat_10_osemsys( \ res, (long long)(which), (long long)(a2), (long long)(a3), \ (long long)(a4), (long long)(a5)) #else /* syscall 169 has been skipped */ #endif #if !defined(_LP64) #define __sanitizer_syscall_pre_compat_10_omsgsys(which, a2, a3, a4, a5, a6) \ __sanitizer_syscall_pre_impl_compat_10_omsgsys( \ (long long)(which), (long long)(a2), (long long)(a3), (long long)(a4), \ (long long)(a5), (long long)(a6)) #define __sanitizer_syscall_post_compat_10_omsgsys(res, which, a2, a3, a4, a5, \ a6) \ __sanitizer_syscall_post_impl_compat_10_omsgsys( \ res, (long long)(which), (long long)(a2), (long long)(a3), \ (long long)(a4), (long long)(a5), (long long)(a6)) #else /* syscall 170 has been skipped */ #endif #if !defined(_LP64) #define __sanitizer_syscall_pre_compat_10_oshmsys(which, a2, a3, a4) \ __sanitizer_syscall_pre_impl_compat_10_oshmsys( \ (long long)(which), (long long)(a2), (long long)(a3), (long long)(a4)) #define __sanitizer_syscall_post_compat_10_oshmsys(res, which, a2, a3, a4) \ __sanitizer_syscall_post_impl_compat_10_oshmsys( \ res, (long long)(which), (long long)(a2), (long long)(a3), \ (long long)(a4)) #else /* syscall 171 has been skipped */ #endif /* syscall 172 has been skipped */ #define __sanitizer_syscall_pre_pread(fd, buf, nbyte, PAD, offset) \ __sanitizer_syscall_pre_impl_pread((long long)(fd), (long long)(buf), \ (long long)(nbyte), (long long)(PAD), \ (long long)(offset)) #define __sanitizer_syscall_post_pread(res, fd, buf, nbyte, PAD, offset) \ __sanitizer_syscall_post_impl_pread(res, (long long)(fd), (long long)(buf), \ (long long)(nbyte), (long long)(PAD), \ (long long)(offset)) #define __sanitizer_syscall_pre_pwrite(fd, buf, nbyte, PAD, offset) \ __sanitizer_syscall_pre_impl_pwrite((long long)(fd), (long long)(buf), \ (long long)(nbyte), (long long)(PAD), \ (long long)(offset)) #define __sanitizer_syscall_post_pwrite(res, fd, buf, nbyte, PAD, offset) \ __sanitizer_syscall_post_impl_pwrite(res, (long long)(fd), (long long)(buf), \ (long long)(nbyte), (long long)(PAD), \ (long long)(offset)) #define __sanitizer_syscall_pre_compat_30_ntp_gettime(ntvp) \ __sanitizer_syscall_pre_impl_compat_30_ntp_gettime((long long)(ntvp)) #define __sanitizer_syscall_post_compat_30_ntp_gettime(res, ntvp) \ __sanitizer_syscall_post_impl_compat_30_ntp_gettime(res, (long long)(ntvp)) #if defined(NTP) || !defined(_KERNEL_OPT) #define __sanitizer_syscall_pre_ntp_adjtime(tp) \ __sanitizer_syscall_pre_impl_ntp_adjtime((long long)(tp)) #define __sanitizer_syscall_post_ntp_adjtime(res, tp) \ __sanitizer_syscall_post_impl_ntp_adjtime(res, (long long)(tp)) #else /* syscall 176 has been skipped */ #endif /* syscall 177 has been skipped */ /* syscall 178 has been skipped */ /* syscall 179 has been skipped */ /* syscall 180 has been skipped */ #define __sanitizer_syscall_pre_setgid(gid) \ __sanitizer_syscall_pre_impl_setgid((long long)(gid)) #define __sanitizer_syscall_post_setgid(res, gid) \ __sanitizer_syscall_post_impl_setgid(res, (long long)(gid)) #define __sanitizer_syscall_pre_setegid(egid) \ __sanitizer_syscall_pre_impl_setegid((long long)(egid)) #define __sanitizer_syscall_post_setegid(res, egid) \ __sanitizer_syscall_post_impl_setegid(res, (long long)(egid)) #define __sanitizer_syscall_pre_seteuid(euid) \ __sanitizer_syscall_pre_impl_seteuid((long long)(euid)) #define __sanitizer_syscall_post_seteuid(res, euid) \ __sanitizer_syscall_post_impl_seteuid(res, (long long)(euid)) #define __sanitizer_syscall_pre_lfs_bmapv(fsidp, blkiov, blkcnt) \ __sanitizer_syscall_pre_impl_lfs_bmapv( \ (long long)(fsidp), (long long)(blkiov), (long long)(blkcnt)) #define __sanitizer_syscall_post_lfs_bmapv(res, fsidp, blkiov, blkcnt) \ __sanitizer_syscall_post_impl_lfs_bmapv( \ res, (long long)(fsidp), (long long)(blkiov), (long long)(blkcnt)) #define __sanitizer_syscall_pre_lfs_markv(fsidp, blkiov, blkcnt) \ __sanitizer_syscall_pre_impl_lfs_markv( \ (long long)(fsidp), (long long)(blkiov), (long long)(blkcnt)) #define __sanitizer_syscall_post_lfs_markv(res, fsidp, blkiov, blkcnt) \ __sanitizer_syscall_post_impl_lfs_markv( \ res, (long long)(fsidp), (long long)(blkiov), (long long)(blkcnt)) #define __sanitizer_syscall_pre_lfs_segclean(fsidp, segment) \ __sanitizer_syscall_pre_impl_lfs_segclean((long long)(fsidp), \ (long long)(segment)) #define __sanitizer_syscall_post_lfs_segclean(res, fsidp, segment) \ __sanitizer_syscall_post_impl_lfs_segclean(res, (long long)(fsidp), \ (long long)(segment)) #define __sanitizer_syscall_pre_compat_50_lfs_segwait(fsidp, tv) \ __sanitizer_syscall_pre_impl_compat_50_lfs_segwait((long long)(fsidp), \ (long long)(tv)) #define __sanitizer_syscall_post_compat_50_lfs_segwait(res, fsidp, tv) \ __sanitizer_syscall_post_impl_compat_50_lfs_segwait(res, (long long)(fsidp), \ (long long)(tv)) #define __sanitizer_syscall_pre_compat_12_stat12(path, ub) \ __sanitizer_syscall_pre_impl_compat_12_stat12((long long)(path), \ (long long)(ub)) #define __sanitizer_syscall_post_compat_12_stat12(res, path, ub) \ __sanitizer_syscall_post_impl_compat_12_stat12(res, (long long)(path), \ (long long)(ub)) #define __sanitizer_syscall_pre_compat_12_fstat12(fd, sb) \ __sanitizer_syscall_pre_impl_compat_12_fstat12((long long)(fd), \ (long long)(sb)) #define __sanitizer_syscall_post_compat_12_fstat12(res, fd, sb) \ __sanitizer_syscall_post_impl_compat_12_fstat12(res, (long long)(fd), \ (long long)(sb)) #define __sanitizer_syscall_pre_compat_12_lstat12(path, ub) \ __sanitizer_syscall_pre_impl_compat_12_lstat12((long long)(path), \ (long long)(ub)) #define __sanitizer_syscall_post_compat_12_lstat12(res, path, ub) \ __sanitizer_syscall_post_impl_compat_12_lstat12(res, (long long)(path), \ (long long)(ub)) #define __sanitizer_syscall_pre_pathconf(path, name) \ __sanitizer_syscall_pre_impl_pathconf((long long)(path), (long long)(name)) #define __sanitizer_syscall_post_pathconf(res, path, name) \ __sanitizer_syscall_post_impl_pathconf(res, (long long)(path), \ (long long)(name)) #define __sanitizer_syscall_pre_fpathconf(fd, name) \ __sanitizer_syscall_pre_impl_fpathconf((long long)(fd), (long long)(name)) #define __sanitizer_syscall_post_fpathconf(res, fd, name) \ __sanitizer_syscall_post_impl_fpathconf(res, (long long)(fd), \ (long long)(name)) #define __sanitizer_syscall_pre_getsockopt2(s, level, name, val, avalsize) \ __sanitizer_syscall_pre_impl_getsockopt2( \ (long long)(s), (long long)(level), (long long)(name), (long long)(val), \ (long long)(avalsize)) #define __sanitizer_syscall_post_getsockopt2(res, s, level, name, val, \ avalsize) \ __sanitizer_syscall_post_impl_getsockopt2( \ res, (long long)(s), (long long)(level), (long long)(name), \ (long long)(val), (long long)(avalsize)) #define __sanitizer_syscall_pre_getrlimit(which, rlp) \ __sanitizer_syscall_pre_impl_getrlimit((long long)(which), (long long)(rlp)) #define __sanitizer_syscall_post_getrlimit(res, which, rlp) \ __sanitizer_syscall_post_impl_getrlimit(res, (long long)(which), \ (long long)(rlp)) #define __sanitizer_syscall_pre_setrlimit(which, rlp) \ __sanitizer_syscall_pre_impl_setrlimit((long long)(which), (long long)(rlp)) #define __sanitizer_syscall_post_setrlimit(res, which, rlp) \ __sanitizer_syscall_post_impl_setrlimit(res, (long long)(which), \ (long long)(rlp)) #define __sanitizer_syscall_pre_compat_12_getdirentries(fd, buf, count, basep) \ __sanitizer_syscall_pre_impl_compat_12_getdirentries( \ (long long)(fd), (long long)(buf), (long long)(count), \ (long long)(basep)) #define __sanitizer_syscall_post_compat_12_getdirentries(res, fd, buf, count, \ basep) \ __sanitizer_syscall_post_impl_compat_12_getdirentries( \ res, (long long)(fd), (long long)(buf), (long long)(count), \ (long long)(basep)) #define __sanitizer_syscall_pre_mmap(addr, len, prot, flags, fd, PAD, pos) \ __sanitizer_syscall_pre_impl_mmap( \ (long long)(addr), (long long)(len), (long long)(prot), \ (long long)(flags), (long long)(fd), (long long)(PAD), (long long)(pos)) #define __sanitizer_syscall_post_mmap(res, addr, len, prot, flags, fd, PAD, \ pos) \ __sanitizer_syscall_post_impl_mmap( \ res, (long long)(addr), (long long)(len), (long long)(prot), \ (long long)(flags), (long long)(fd), (long long)(PAD), (long long)(pos)) #define __sanitizer_syscall_pre___syscall(code, arg0, arg1, arg2, arg3, arg4, \ arg5, arg6, arg7) \ __sanitizer_syscall_pre_impl___syscall( \ (long long)(code), (long long)(arg0), (long long)(arg1), \ (long long)(arg2), (long long)(arg3), (long long)(arg4), \ (long long)(arg5), (long long)(arg6), (long long)(arg7)) #define __sanitizer_syscall_post___syscall(res, code, arg0, arg1, arg2, arg3, \ arg4, arg5, arg6, arg7) \ __sanitizer_syscall_post_impl___syscall( \ res, (long long)(code), (long long)(arg0), (long long)(arg1), \ (long long)(arg2), (long long)(arg3), (long long)(arg4), \ (long long)(arg5), (long long)(arg6), (long long)(arg7)) #define __sanitizer_syscall_pre_lseek(fd, PAD, offset, whence) \ __sanitizer_syscall_pre_impl_lseek((long long)(fd), (long long)(PAD), \ (long long)(offset), (long long)(whence)) #define __sanitizer_syscall_post_lseek(res, fd, PAD, offset, whence) \ __sanitizer_syscall_post_impl_lseek(res, (long long)(fd), (long long)(PAD), \ (long long)(offset), \ (long long)(whence)) #define __sanitizer_syscall_pre_truncate(path, PAD, length) \ __sanitizer_syscall_pre_impl_truncate((long long)(path), (long long)(PAD), \ (long long)(length)) #define __sanitizer_syscall_post_truncate(res, path, PAD, length) \ __sanitizer_syscall_post_impl_truncate( \ res, (long long)(path), (long long)(PAD), (long long)(length)) #define __sanitizer_syscall_pre_ftruncate(fd, PAD, length) \ __sanitizer_syscall_pre_impl_ftruncate((long long)(fd), (long long)(PAD), \ (long long)(length)) #define __sanitizer_syscall_post_ftruncate(res, fd, PAD, length) \ __sanitizer_syscall_post_impl_ftruncate( \ res, (long long)(fd), (long long)(PAD), (long long)(length)) #define __sanitizer_syscall_pre___sysctl(name, namelen, oldv, oldlenp, newv, \ newlen) \ __sanitizer_syscall_pre_impl___sysctl( \ (long long)(name), (long long)(namelen), (long long)(oldv), \ (long long)(oldlenp), (long long)(newv), (long long)(newlen)) #define __sanitizer_syscall_post___sysctl(res, name, namelen, oldv, oldlenp, \ newv, newlen) \ __sanitizer_syscall_post_impl___sysctl( \ res, (long long)(name), (long long)(namelen), (long long)(oldv), \ (long long)(oldlenp), (long long)(newv), (long long)(newlen)) #define __sanitizer_syscall_pre_mlock(addr, len) \ __sanitizer_syscall_pre_impl_mlock((long long)(addr), (long long)(len)) #define __sanitizer_syscall_post_mlock(res, addr, len) \ __sanitizer_syscall_post_impl_mlock(res, (long long)(addr), (long long)(len)) #define __sanitizer_syscall_pre_munlock(addr, len) \ __sanitizer_syscall_pre_impl_munlock((long long)(addr), (long long)(len)) #define __sanitizer_syscall_post_munlock(res, addr, len) \ __sanitizer_syscall_post_impl_munlock(res, (long long)(addr), \ (long long)(len)) #define __sanitizer_syscall_pre_undelete(path) \ __sanitizer_syscall_pre_impl_undelete((long long)(path)) #define __sanitizer_syscall_post_undelete(res, path) \ __sanitizer_syscall_post_impl_undelete(res, (long long)(path)) #define __sanitizer_syscall_pre_compat_50_futimes(fd, tptr) \ __sanitizer_syscall_pre_impl_compat_50_futimes((long long)(fd), \ (long long)(tptr)) #define __sanitizer_syscall_post_compat_50_futimes(res, fd, tptr) \ __sanitizer_syscall_post_impl_compat_50_futimes(res, (long long)(fd), \ (long long)(tptr)) #define __sanitizer_syscall_pre_getpgid(pid) \ __sanitizer_syscall_pre_impl_getpgid((long long)(pid)) #define __sanitizer_syscall_post_getpgid(res, pid) \ __sanitizer_syscall_post_impl_getpgid(res, (long long)(pid)) #define __sanitizer_syscall_pre_reboot(opt, bootstr) \ __sanitizer_syscall_pre_impl_reboot((long long)(opt), (long long)(bootstr)) #define __sanitizer_syscall_post_reboot(res, opt, bootstr) \ __sanitizer_syscall_post_impl_reboot(res, (long long)(opt), \ (long long)(bootstr)) #define __sanitizer_syscall_pre_poll(fds, nfds, timeout) \ __sanitizer_syscall_pre_impl_poll((long long)(fds), (long long)(nfds), \ (long long)(timeout)) #define __sanitizer_syscall_post_poll(res, fds, nfds, timeout) \ __sanitizer_syscall_post_impl_poll(res, (long long)(fds), (long long)(nfds), \ (long long)(timeout)) #define __sanitizer_syscall_pre_afssys(id, a1, a2, a3, a4, a5, a6) \ __sanitizer_syscall_pre_impl_afssys( \ (long long)(id), (long long)(a1), (long long)(a2), (long long)(a3), \ (long long)(a4), (long long)(a5), (long long)(a6)) #define __sanitizer_syscall_post_afssys(res, id, a1, a2, a3, a4, a5, a6) \ __sanitizer_syscall_post_impl_afssys( \ res, (long long)(id), (long long)(a1), (long long)(a2), (long long)(a3), \ (long long)(a4), (long long)(a5), (long long)(a6)) /* syscall 211 has been skipped */ /* syscall 212 has been skipped */ /* syscall 213 has been skipped */ /* syscall 214 has been skipped */ /* syscall 215 has been skipped */ /* syscall 216 has been skipped */ /* syscall 217 has been skipped */ /* syscall 218 has been skipped */ /* syscall 219 has been skipped */ #define __sanitizer_syscall_pre_compat_14___semctl(semid, semnum, cmd, arg) \ __sanitizer_syscall_pre_impl_compat_14___semctl( \ (long long)(semid), (long long)(semnum), (long long)(cmd), \ (long long)(arg)) #define __sanitizer_syscall_post_compat_14___semctl(res, semid, semnum, cmd, \ arg) \ __sanitizer_syscall_post_impl_compat_14___semctl( \ res, (long long)(semid), (long long)(semnum), (long long)(cmd), \ (long long)(arg)) #define __sanitizer_syscall_pre_semget(key, nsems, semflg) \ __sanitizer_syscall_pre_impl_semget((long long)(key), (long long)(nsems), \ (long long)(semflg)) #define __sanitizer_syscall_post_semget(res, key, nsems, semflg) \ __sanitizer_syscall_post_impl_semget( \ res, (long long)(key), (long long)(nsems), (long long)(semflg)) #define __sanitizer_syscall_pre_semop(semid, sops, nsops) \ __sanitizer_syscall_pre_impl_semop((long long)(semid), (long long)(sops), \ (long long)(nsops)) #define __sanitizer_syscall_post_semop(res, semid, sops, nsops) \ __sanitizer_syscall_post_impl_semop(res, (long long)(semid), \ (long long)(sops), (long long)(nsops)) #define __sanitizer_syscall_pre_semconfig(flag) \ __sanitizer_syscall_pre_impl_semconfig((long long)(flag)) #define __sanitizer_syscall_post_semconfig(res, flag) \ __sanitizer_syscall_post_impl_semconfig(res, (long long)(flag)) #define __sanitizer_syscall_pre_compat_14_msgctl(msqid, cmd, buf) \ __sanitizer_syscall_pre_impl_compat_14_msgctl( \ (long long)(msqid), (long long)(cmd), (long long)(buf)) #define __sanitizer_syscall_post_compat_14_msgctl(res, msqid, cmd, buf) \ __sanitizer_syscall_post_impl_compat_14_msgctl( \ res, (long long)(msqid), (long long)(cmd), (long long)(buf)) #define __sanitizer_syscall_pre_msgget(key, msgflg) \ __sanitizer_syscall_pre_impl_msgget((long long)(key), (long long)(msgflg)) #define __sanitizer_syscall_post_msgget(res, key, msgflg) \ __sanitizer_syscall_post_impl_msgget(res, (long long)(key), \ (long long)(msgflg)) #define __sanitizer_syscall_pre_msgsnd(msqid, msgp, msgsz, msgflg) \ __sanitizer_syscall_pre_impl_msgsnd((long long)(msqid), (long long)(msgp), \ (long long)(msgsz), (long long)(msgflg)) #define __sanitizer_syscall_post_msgsnd(res, msqid, msgp, msgsz, msgflg) \ __sanitizer_syscall_post_impl_msgsnd(res, (long long)(msqid), \ (long long)(msgp), (long long)(msgsz), \ (long long)(msgflg)) #define __sanitizer_syscall_pre_msgrcv(msqid, msgp, msgsz, msgtyp, msgflg) \ __sanitizer_syscall_pre_impl_msgrcv((long long)(msqid), (long long)(msgp), \ (long long)(msgsz), (long long)(msgtyp), \ (long long)(msgflg)) #define __sanitizer_syscall_post_msgrcv(res, msqid, msgp, msgsz, msgtyp, \ msgflg) \ __sanitizer_syscall_post_impl_msgrcv( \ res, (long long)(msqid), (long long)(msgp), (long long)(msgsz), \ (long long)(msgtyp), (long long)(msgflg)) #define __sanitizer_syscall_pre_shmat(shmid, shmaddr, shmflg) \ __sanitizer_syscall_pre_impl_shmat((long long)(shmid), (long long)(shmaddr), \ (long long)(shmflg)) #define __sanitizer_syscall_post_shmat(res, shmid, shmaddr, shmflg) \ __sanitizer_syscall_post_impl_shmat( \ res, (long long)(shmid), (long long)(shmaddr), (long long)(shmflg)) #define __sanitizer_syscall_pre_compat_14_shmctl(shmid, cmd, buf) \ __sanitizer_syscall_pre_impl_compat_14_shmctl( \ (long long)(shmid), (long long)(cmd), (long long)(buf)) #define __sanitizer_syscall_post_compat_14_shmctl(res, shmid, cmd, buf) \ __sanitizer_syscall_post_impl_compat_14_shmctl( \ res, (long long)(shmid), (long long)(cmd), (long long)(buf)) #define __sanitizer_syscall_pre_shmdt(shmaddr) \ __sanitizer_syscall_pre_impl_shmdt((long long)(shmaddr)) #define __sanitizer_syscall_post_shmdt(res, shmaddr) \ __sanitizer_syscall_post_impl_shmdt(res, (long long)(shmaddr)) #define __sanitizer_syscall_pre_shmget(key, size, shmflg) \ __sanitizer_syscall_pre_impl_shmget((long long)(key), (long long)(size), \ (long long)(shmflg)) #define __sanitizer_syscall_post_shmget(res, key, size, shmflg) \ __sanitizer_syscall_post_impl_shmget(res, (long long)(key), \ (long long)(size), (long long)(shmflg)) #define __sanitizer_syscall_pre_compat_50_clock_gettime(clock_id, tp) \ __sanitizer_syscall_pre_impl_compat_50_clock_gettime((long long)(clock_id), \ (long long)(tp)) #define __sanitizer_syscall_post_compat_50_clock_gettime(res, clock_id, tp) \ __sanitizer_syscall_post_impl_compat_50_clock_gettime( \ res, (long long)(clock_id), (long long)(tp)) #define __sanitizer_syscall_pre_compat_50_clock_settime(clock_id, tp) \ __sanitizer_syscall_pre_impl_compat_50_clock_settime((long long)(clock_id), \ (long long)(tp)) #define __sanitizer_syscall_post_compat_50_clock_settime(res, clock_id, tp) \ __sanitizer_syscall_post_impl_compat_50_clock_settime( \ res, (long long)(clock_id), (long long)(tp)) #define __sanitizer_syscall_pre_compat_50_clock_getres(clock_id, tp) \ __sanitizer_syscall_pre_impl_compat_50_clock_getres((long long)(clock_id), \ (long long)(tp)) #define __sanitizer_syscall_post_compat_50_clock_getres(res, clock_id, tp) \ __sanitizer_syscall_post_impl_compat_50_clock_getres( \ res, (long long)(clock_id), (long long)(tp)) #define __sanitizer_syscall_pre_timer_create(clock_id, evp, timerid) \ __sanitizer_syscall_pre_impl_timer_create( \ (long long)(clock_id), (long long)(evp), (long long)(timerid)) #define __sanitizer_syscall_post_timer_create(res, clock_id, evp, timerid) \ __sanitizer_syscall_post_impl_timer_create( \ res, (long long)(clock_id), (long long)(evp), (long long)(timerid)) #define __sanitizer_syscall_pre_timer_delete(timerid) \ __sanitizer_syscall_pre_impl_timer_delete((long long)(timerid)) #define __sanitizer_syscall_post_timer_delete(res, timerid) \ __sanitizer_syscall_post_impl_timer_delete(res, (long long)(timerid)) #define __sanitizer_syscall_pre_compat_50_timer_settime(timerid, flags, value, \ ovalue) \ __sanitizer_syscall_pre_impl_compat_50_timer_settime( \ (long long)(timerid), (long long)(flags), (long long)(value), \ (long long)(ovalue)) #define __sanitizer_syscall_post_compat_50_timer_settime(res, timerid, flags, \ value, ovalue) \ __sanitizer_syscall_post_impl_compat_50_timer_settime( \ res, (long long)(timerid), (long long)(flags), (long long)(value), \ (long long)(ovalue)) #define __sanitizer_syscall_pre_compat_50_timer_gettime(timerid, value) \ __sanitizer_syscall_pre_impl_compat_50_timer_gettime((long long)(timerid), \ (long long)(value)) #define __sanitizer_syscall_post_compat_50_timer_gettime(res, timerid, value) \ __sanitizer_syscall_post_impl_compat_50_timer_gettime( \ res, (long long)(timerid), (long long)(value)) #define __sanitizer_syscall_pre_timer_getoverrun(timerid) \ __sanitizer_syscall_pre_impl_timer_getoverrun((long long)(timerid)) #define __sanitizer_syscall_post_timer_getoverrun(res, timerid) \ __sanitizer_syscall_post_impl_timer_getoverrun(res, (long long)(timerid)) #define __sanitizer_syscall_pre_compat_50_nanosleep(rqtp, rmtp) \ __sanitizer_syscall_pre_impl_compat_50_nanosleep((long long)(rqtp), \ (long long)(rmtp)) #define __sanitizer_syscall_post_compat_50_nanosleep(res, rqtp, rmtp) \ __sanitizer_syscall_post_impl_compat_50_nanosleep(res, (long long)(rqtp), \ (long long)(rmtp)) #define __sanitizer_syscall_pre_fdatasync(fd) \ __sanitizer_syscall_pre_impl_fdatasync((long long)(fd)) #define __sanitizer_syscall_post_fdatasync(res, fd) \ __sanitizer_syscall_post_impl_fdatasync(res, (long long)(fd)) #define __sanitizer_syscall_pre_mlockall(flags) \ __sanitizer_syscall_pre_impl_mlockall((long long)(flags)) #define __sanitizer_syscall_post_mlockall(res, flags) \ __sanitizer_syscall_post_impl_mlockall(res, (long long)(flags)) #define __sanitizer_syscall_pre_munlockall() \ __sanitizer_syscall_pre_impl_munlockall() #define __sanitizer_syscall_post_munlockall(res) \ __sanitizer_syscall_post_impl_munlockall(res) #define __sanitizer_syscall_pre_compat_50___sigtimedwait(set, info, timeout) \ __sanitizer_syscall_pre_impl_compat_50___sigtimedwait( \ (long long)(set), (long long)(info), (long long)(timeout)) #define __sanitizer_syscall_post_compat_50___sigtimedwait(res, set, info, \ timeout) \ __sanitizer_syscall_post_impl_compat_50___sigtimedwait( \ res, (long long)(set), (long long)(info), (long long)(timeout)) #define __sanitizer_syscall_pre_sigqueueinfo(pid, info) \ __sanitizer_syscall_pre_impl_sigqueueinfo((long long)(pid), (long long)(info)) #define __sanitizer_syscall_post_sigqueueinfo(res, pid, info) \ __sanitizer_syscall_post_impl_sigqueueinfo(res, (long long)(pid), \ (long long)(info)) #define __sanitizer_syscall_pre_modctl(cmd, arg) \ __sanitizer_syscall_pre_impl_modctl((long long)(cmd), (long long)(arg)) #define __sanitizer_syscall_post_modctl(res, cmd, arg) \ __sanitizer_syscall_post_impl_modctl(res, (long long)(cmd), (long long)(arg)) #define __sanitizer_syscall_pre__ksem_init(value, idp) \ __sanitizer_syscall_pre_impl__ksem_init((long long)(value), (long long)(idp)) #define __sanitizer_syscall_post__ksem_init(res, value, idp) \ __sanitizer_syscall_post_impl__ksem_init(res, (long long)(value), \ (long long)(idp)) #define __sanitizer_syscall_pre__ksem_open(name, oflag, mode, value, idp) \ __sanitizer_syscall_pre_impl__ksem_open( \ (long long)(name), (long long)(oflag), (long long)(mode), \ (long long)(value), (long long)(idp)) #define __sanitizer_syscall_post__ksem_open(res, name, oflag, mode, value, \ idp) \ __sanitizer_syscall_post_impl__ksem_open( \ res, (long long)(name), (long long)(oflag), (long long)(mode), \ (long long)(value), (long long)(idp)) #define __sanitizer_syscall_pre__ksem_unlink(name) \ __sanitizer_syscall_pre_impl__ksem_unlink((long long)(name)) #define __sanitizer_syscall_post__ksem_unlink(res, name) \ __sanitizer_syscall_post_impl__ksem_unlink(res, (long long)(name)) #define __sanitizer_syscall_pre__ksem_close(id) \ __sanitizer_syscall_pre_impl__ksem_close((long long)(id)) #define __sanitizer_syscall_post__ksem_close(res, id) \ __sanitizer_syscall_post_impl__ksem_close(res, (long long)(id)) #define __sanitizer_syscall_pre__ksem_post(id) \ __sanitizer_syscall_pre_impl__ksem_post((long long)(id)) #define __sanitizer_syscall_post__ksem_post(res, id) \ __sanitizer_syscall_post_impl__ksem_post(res, (long long)(id)) #define __sanitizer_syscall_pre__ksem_wait(id) \ __sanitizer_syscall_pre_impl__ksem_wait((long long)(id)) #define __sanitizer_syscall_post__ksem_wait(res, id) \ __sanitizer_syscall_post_impl__ksem_wait(res, (long long)(id)) #define __sanitizer_syscall_pre__ksem_trywait(id) \ __sanitizer_syscall_pre_impl__ksem_trywait((long long)(id)) #define __sanitizer_syscall_post__ksem_trywait(res, id) \ __sanitizer_syscall_post_impl__ksem_trywait(res, (long long)(id)) #define __sanitizer_syscall_pre__ksem_getvalue(id, value) \ __sanitizer_syscall_pre_impl__ksem_getvalue((long long)(id), \ (long long)(value)) #define __sanitizer_syscall_post__ksem_getvalue(res, id, value) \ __sanitizer_syscall_post_impl__ksem_getvalue(res, (long long)(id), \ (long long)(value)) #define __sanitizer_syscall_pre__ksem_destroy(id) \ __sanitizer_syscall_pre_impl__ksem_destroy((long long)(id)) #define __sanitizer_syscall_post__ksem_destroy(res, id) \ __sanitizer_syscall_post_impl__ksem_destroy(res, (long long)(id)) #define __sanitizer_syscall_pre__ksem_timedwait(id, abstime) \ __sanitizer_syscall_pre_impl__ksem_timedwait((long long)(id), \ (long long)(abstime)) #define __sanitizer_syscall_post__ksem_timedwait(res, id, abstime) \ __sanitizer_syscall_post_impl__ksem_timedwait(res, (long long)(id), \ (long long)(abstime)) #define __sanitizer_syscall_pre_mq_open(name, oflag, mode, attr) \ __sanitizer_syscall_pre_impl_mq_open((long long)(name), (long long)(oflag), \ (long long)(mode), (long long)(attr)) #define __sanitizer_syscall_post_mq_open(res, name, oflag, mode, attr) \ __sanitizer_syscall_post_impl_mq_open(res, (long long)(name), \ (long long)(oflag), (long long)(mode), \ (long long)(attr)) #define __sanitizer_syscall_pre_mq_close(mqdes) \ __sanitizer_syscall_pre_impl_mq_close((long long)(mqdes)) #define __sanitizer_syscall_post_mq_close(res, mqdes) \ __sanitizer_syscall_post_impl_mq_close(res, (long long)(mqdes)) #define __sanitizer_syscall_pre_mq_unlink(name) \ __sanitizer_syscall_pre_impl_mq_unlink((long long)(name)) #define __sanitizer_syscall_post_mq_unlink(res, name) \ __sanitizer_syscall_post_impl_mq_unlink(res, (long long)(name)) #define __sanitizer_syscall_pre_mq_getattr(mqdes, mqstat) \ __sanitizer_syscall_pre_impl_mq_getattr((long long)(mqdes), \ (long long)(mqstat)) #define __sanitizer_syscall_post_mq_getattr(res, mqdes, mqstat) \ __sanitizer_syscall_post_impl_mq_getattr(res, (long long)(mqdes), \ (long long)(mqstat)) #define __sanitizer_syscall_pre_mq_setattr(mqdes, mqstat, omqstat) \ __sanitizer_syscall_pre_impl_mq_setattr( \ (long long)(mqdes), (long long)(mqstat), (long long)(omqstat)) #define __sanitizer_syscall_post_mq_setattr(res, mqdes, mqstat, omqstat) \ __sanitizer_syscall_post_impl_mq_setattr( \ res, (long long)(mqdes), (long long)(mqstat), (long long)(omqstat)) #define __sanitizer_syscall_pre_mq_notify(mqdes, notification) \ __sanitizer_syscall_pre_impl_mq_notify((long long)(mqdes), \ (long long)(notification)) #define __sanitizer_syscall_post_mq_notify(res, mqdes, notification) \ __sanitizer_syscall_post_impl_mq_notify(res, (long long)(mqdes), \ (long long)(notification)) #define __sanitizer_syscall_pre_mq_send(mqdes, msg_ptr, msg_len, msg_prio) \ __sanitizer_syscall_pre_impl_mq_send( \ (long long)(mqdes), (long long)(msg_ptr), (long long)(msg_len), \ (long long)(msg_prio)) #define __sanitizer_syscall_post_mq_send(res, mqdes, msg_ptr, msg_len, \ msg_prio) \ __sanitizer_syscall_post_impl_mq_send( \ res, (long long)(mqdes), (long long)(msg_ptr), (long long)(msg_len), \ (long long)(msg_prio)) #define __sanitizer_syscall_pre_mq_receive(mqdes, msg_ptr, msg_len, msg_prio) \ __sanitizer_syscall_pre_impl_mq_receive( \ (long long)(mqdes), (long long)(msg_ptr), (long long)(msg_len), \ (long long)(msg_prio)) #define __sanitizer_syscall_post_mq_receive(res, mqdes, msg_ptr, msg_len, \ msg_prio) \ __sanitizer_syscall_post_impl_mq_receive( \ res, (long long)(mqdes), (long long)(msg_ptr), (long long)(msg_len), \ (long long)(msg_prio)) #define __sanitizer_syscall_pre_compat_50_mq_timedsend( \ mqdes, msg_ptr, msg_len, msg_prio, abs_timeout) \ __sanitizer_syscall_pre_impl_compat_50_mq_timedsend( \ (long long)(mqdes), (long long)(msg_ptr), (long long)(msg_len), \ (long long)(msg_prio), (long long)(abs_timeout)) #define __sanitizer_syscall_post_compat_50_mq_timedsend( \ res, mqdes, msg_ptr, msg_len, msg_prio, abs_timeout) \ __sanitizer_syscall_post_impl_compat_50_mq_timedsend( \ res, (long long)(mqdes), (long long)(msg_ptr), (long long)(msg_len), \ (long long)(msg_prio), (long long)(abs_timeout)) #define __sanitizer_syscall_pre_compat_50_mq_timedreceive( \ mqdes, msg_ptr, msg_len, msg_prio, abs_timeout) \ __sanitizer_syscall_pre_impl_compat_50_mq_timedreceive( \ (long long)(mqdes), (long long)(msg_ptr), (long long)(msg_len), \ (long long)(msg_prio), (long long)(abs_timeout)) #define __sanitizer_syscall_post_compat_50_mq_timedreceive( \ res, mqdes, msg_ptr, msg_len, msg_prio, abs_timeout) \ __sanitizer_syscall_post_impl_compat_50_mq_timedreceive( \ res, (long long)(mqdes), (long long)(msg_ptr), (long long)(msg_len), \ (long long)(msg_prio), (long long)(abs_timeout)) /* syscall 267 has been skipped */ /* syscall 268 has been skipped */ /* syscall 269 has been skipped */ #define __sanitizer_syscall_pre___posix_rename(from, to) \ __sanitizer_syscall_pre_impl___posix_rename((long long)(from), \ (long long)(to)) #define __sanitizer_syscall_post___posix_rename(res, from, to) \ __sanitizer_syscall_post_impl___posix_rename(res, (long long)(from), \ (long long)(to)) #define __sanitizer_syscall_pre_swapctl(cmd, arg, misc) \ __sanitizer_syscall_pre_impl_swapctl((long long)(cmd), (long long)(arg), \ (long long)(misc)) #define __sanitizer_syscall_post_swapctl(res, cmd, arg, misc) \ __sanitizer_syscall_post_impl_swapctl(res, (long long)(cmd), \ (long long)(arg), (long long)(misc)) #define __sanitizer_syscall_pre_compat_30_getdents(fd, buf, count) \ __sanitizer_syscall_pre_impl_compat_30_getdents( \ (long long)(fd), (long long)(buf), (long long)(count)) #define __sanitizer_syscall_post_compat_30_getdents(res, fd, buf, count) \ __sanitizer_syscall_post_impl_compat_30_getdents( \ res, (long long)(fd), (long long)(buf), (long long)(count)) #define __sanitizer_syscall_pre_minherit(addr, len, inherit) \ __sanitizer_syscall_pre_impl_minherit((long long)(addr), (long long)(len), \ (long long)(inherit)) #define __sanitizer_syscall_post_minherit(res, addr, len, inherit) \ __sanitizer_syscall_post_impl_minherit( \ res, (long long)(addr), (long long)(len), (long long)(inherit)) #define __sanitizer_syscall_pre_lchmod(path, mode) \ __sanitizer_syscall_pre_impl_lchmod((long long)(path), (long long)(mode)) #define __sanitizer_syscall_post_lchmod(res, path, mode) \ __sanitizer_syscall_post_impl_lchmod(res, (long long)(path), \ (long long)(mode)) #define __sanitizer_syscall_pre_lchown(path, uid, gid) \ __sanitizer_syscall_pre_impl_lchown((long long)(path), (long long)(uid), \ (long long)(gid)) #define __sanitizer_syscall_post_lchown(res, path, uid, gid) \ __sanitizer_syscall_post_impl_lchown(res, (long long)(path), \ (long long)(uid), (long long)(gid)) #define __sanitizer_syscall_pre_compat_50_lutimes(path, tptr) \ __sanitizer_syscall_pre_impl_compat_50_lutimes((long long)(path), \ (long long)(tptr)) #define __sanitizer_syscall_post_compat_50_lutimes(res, path, tptr) \ __sanitizer_syscall_post_impl_compat_50_lutimes(res, (long long)(path), \ (long long)(tptr)) #define __sanitizer_syscall_pre___msync13(addr, len, flags) \ __sanitizer_syscall_pre_impl___msync13((long long)(addr), (long long)(len), \ (long long)(flags)) #define __sanitizer_syscall_post___msync13(res, addr, len, flags) \ __sanitizer_syscall_post_impl___msync13( \ res, (long long)(addr), (long long)(len), (long long)(flags)) #define __sanitizer_syscall_pre_compat_30___stat13(path, ub) \ __sanitizer_syscall_pre_impl_compat_30___stat13((long long)(path), \ (long long)(ub)) #define __sanitizer_syscall_post_compat_30___stat13(res, path, ub) \ __sanitizer_syscall_post_impl_compat_30___stat13(res, (long long)(path), \ (long long)(ub)) #define __sanitizer_syscall_pre_compat_30___fstat13(fd, sb) \ __sanitizer_syscall_pre_impl_compat_30___fstat13((long long)(fd), \ (long long)(sb)) #define __sanitizer_syscall_post_compat_30___fstat13(res, fd, sb) \ __sanitizer_syscall_post_impl_compat_30___fstat13(res, (long long)(fd), \ (long long)(sb)) #define __sanitizer_syscall_pre_compat_30___lstat13(path, ub) \ __sanitizer_syscall_pre_impl_compat_30___lstat13((long long)(path), \ (long long)(ub)) #define __sanitizer_syscall_post_compat_30___lstat13(res, path, ub) \ __sanitizer_syscall_post_impl_compat_30___lstat13(res, (long long)(path), \ (long long)(ub)) #define __sanitizer_syscall_pre___sigaltstack14(nss, oss) \ __sanitizer_syscall_pre_impl___sigaltstack14((long long)(nss), \ (long long)(oss)) #define __sanitizer_syscall_post___sigaltstack14(res, nss, oss) \ __sanitizer_syscall_post_impl___sigaltstack14(res, (long long)(nss), \ (long long)(oss)) #define __sanitizer_syscall_pre___vfork14() \ __sanitizer_syscall_pre_impl___vfork14() #define __sanitizer_syscall_post___vfork14(res) \ __sanitizer_syscall_post_impl___vfork14(res) #define __sanitizer_syscall_pre___posix_chown(path, uid, gid) \ __sanitizer_syscall_pre_impl___posix_chown( \ (long long)(path), (long long)(uid), (long long)(gid)) #define __sanitizer_syscall_post___posix_chown(res, path, uid, gid) \ __sanitizer_syscall_post_impl___posix_chown( \ res, (long long)(path), (long long)(uid), (long long)(gid)) #define __sanitizer_syscall_pre___posix_fchown(fd, uid, gid) \ __sanitizer_syscall_pre_impl___posix_fchown( \ (long long)(fd), (long long)(uid), (long long)(gid)) #define __sanitizer_syscall_post___posix_fchown(res, fd, uid, gid) \ __sanitizer_syscall_post_impl___posix_fchown( \ res, (long long)(fd), (long long)(uid), (long long)(gid)) #define __sanitizer_syscall_pre___posix_lchown(path, uid, gid) \ __sanitizer_syscall_pre_impl___posix_lchown( \ (long long)(path), (long long)(uid), (long long)(gid)) #define __sanitizer_syscall_post___posix_lchown(res, path, uid, gid) \ __sanitizer_syscall_post_impl___posix_lchown( \ res, (long long)(path), (long long)(uid), (long long)(gid)) #define __sanitizer_syscall_pre_getsid(pid) \ __sanitizer_syscall_pre_impl_getsid((long long)(pid)) #define __sanitizer_syscall_post_getsid(res, pid) \ __sanitizer_syscall_post_impl_getsid(res, (long long)(pid)) #define __sanitizer_syscall_pre___clone(flags, stack) \ __sanitizer_syscall_pre_impl___clone((long long)(flags), (long long)(stack)) #define __sanitizer_syscall_post___clone(res, flags, stack) \ __sanitizer_syscall_post_impl___clone(res, (long long)(flags), \ (long long)(stack)) #define __sanitizer_syscall_pre_fktrace(fd, ops, facs, pid) \ __sanitizer_syscall_pre_impl_fktrace((long long)(fd), (long long)(ops), \ (long long)(facs), (long long)(pid)) #define __sanitizer_syscall_post_fktrace(res, fd, ops, facs, pid) \ __sanitizer_syscall_post_impl_fktrace(res, (long long)(fd), \ (long long)(ops), (long long)(facs), \ (long long)(pid)) #define __sanitizer_syscall_pre_preadv(fd, iovp, iovcnt, PAD, offset) \ __sanitizer_syscall_pre_impl_preadv((long long)(fd), (long long)(iovp), \ (long long)(iovcnt), (long long)(PAD), \ (long long)(offset)) #define __sanitizer_syscall_post_preadv(res, fd, iovp, iovcnt, PAD, offset) \ __sanitizer_syscall_post_impl_preadv(res, (long long)(fd), \ (long long)(iovp), (long long)(iovcnt), \ (long long)(PAD), (long long)(offset)) #define __sanitizer_syscall_pre_pwritev(fd, iovp, iovcnt, PAD, offset) \ __sanitizer_syscall_pre_impl_pwritev((long long)(fd), (long long)(iovp), \ (long long)(iovcnt), (long long)(PAD), \ (long long)(offset)) #define __sanitizer_syscall_post_pwritev(res, fd, iovp, iovcnt, PAD, offset) \ __sanitizer_syscall_post_impl_pwritev( \ res, (long long)(fd), (long long)(iovp), (long long)(iovcnt), \ (long long)(PAD), (long long)(offset)) #define __sanitizer_syscall_pre_compat_16___sigaction14(signum, nsa, osa) \ __sanitizer_syscall_pre_impl_compat_16___sigaction14( \ (long long)(signum), (long long)(nsa), (long long)(osa)) #define __sanitizer_syscall_post_compat_16___sigaction14(res, signum, nsa, \ osa) \ __sanitizer_syscall_post_impl_compat_16___sigaction14( \ res, (long long)(signum), (long long)(nsa), (long long)(osa)) #define __sanitizer_syscall_pre___sigpending14(set) \ __sanitizer_syscall_pre_impl___sigpending14((long long)(set)) #define __sanitizer_syscall_post___sigpending14(res, set) \ __sanitizer_syscall_post_impl___sigpending14(res, (long long)(set)) #define __sanitizer_syscall_pre___sigprocmask14(how, set, oset) \ __sanitizer_syscall_pre_impl___sigprocmask14( \ (long long)(how), (long long)(set), (long long)(oset)) #define __sanitizer_syscall_post___sigprocmask14(res, how, set, oset) \ __sanitizer_syscall_post_impl___sigprocmask14( \ res, (long long)(how), (long long)(set), (long long)(oset)) #define __sanitizer_syscall_pre___sigsuspend14(set) \ __sanitizer_syscall_pre_impl___sigsuspend14((long long)(set)) #define __sanitizer_syscall_post___sigsuspend14(res, set) \ __sanitizer_syscall_post_impl___sigsuspend14(res, (long long)(set)) #define __sanitizer_syscall_pre_compat_16___sigreturn14(sigcntxp) \ __sanitizer_syscall_pre_impl_compat_16___sigreturn14((long long)(sigcntxp)) #define __sanitizer_syscall_post_compat_16___sigreturn14(res, sigcntxp) \ __sanitizer_syscall_post_impl_compat_16___sigreturn14(res, \ (long long)(sigcntxp)) #define __sanitizer_syscall_pre___getcwd(bufp, length) \ __sanitizer_syscall_pre_impl___getcwd((long long)(bufp), (long long)(length)) #define __sanitizer_syscall_post___getcwd(res, bufp, length) \ __sanitizer_syscall_post_impl___getcwd(res, (long long)(bufp), \ (long long)(length)) #define __sanitizer_syscall_pre_fchroot(fd) \ __sanitizer_syscall_pre_impl_fchroot((long long)(fd)) #define __sanitizer_syscall_post_fchroot(res, fd) \ __sanitizer_syscall_post_impl_fchroot(res, (long long)(fd)) #define __sanitizer_syscall_pre_compat_30_fhopen(fhp, flags) \ __sanitizer_syscall_pre_impl_compat_30_fhopen((long long)(fhp), \ (long long)(flags)) #define __sanitizer_syscall_post_compat_30_fhopen(res, fhp, flags) \ __sanitizer_syscall_post_impl_compat_30_fhopen(res, (long long)(fhp), \ (long long)(flags)) #define __sanitizer_syscall_pre_compat_30_fhstat(fhp, sb) \ __sanitizer_syscall_pre_impl_compat_30_fhstat((long long)(fhp), \ (long long)(sb)) #define __sanitizer_syscall_post_compat_30_fhstat(res, fhp, sb) \ __sanitizer_syscall_post_impl_compat_30_fhstat(res, (long long)(fhp), \ (long long)(sb)) #define __sanitizer_syscall_pre_compat_20_fhstatfs(fhp, buf) \ __sanitizer_syscall_pre_impl_compat_20_fhstatfs((long long)(fhp), \ (long long)(buf)) #define __sanitizer_syscall_post_compat_20_fhstatfs(res, fhp, buf) \ __sanitizer_syscall_post_impl_compat_20_fhstatfs(res, (long long)(fhp), \ (long long)(buf)) #define __sanitizer_syscall_pre_compat_50_____semctl13(semid, semnum, cmd, \ arg) \ __sanitizer_syscall_pre_impl_compat_50_____semctl13( \ (long long)(semid), (long long)(semnum), (long long)(cmd), \ (long long)(arg)) #define __sanitizer_syscall_post_compat_50_____semctl13(res, semid, semnum, \ cmd, arg) \ __sanitizer_syscall_post_impl_compat_50_____semctl13( \ res, (long long)(semid), (long long)(semnum), (long long)(cmd), \ (long long)(arg)) #define __sanitizer_syscall_pre_compat_50___msgctl13(msqid, cmd, buf) \ __sanitizer_syscall_pre_impl_compat_50___msgctl13( \ (long long)(msqid), (long long)(cmd), (long long)(buf)) #define __sanitizer_syscall_post_compat_50___msgctl13(res, msqid, cmd, buf) \ __sanitizer_syscall_post_impl_compat_50___msgctl13( \ res, (long long)(msqid), (long long)(cmd), (long long)(buf)) #define __sanitizer_syscall_pre_compat_50___shmctl13(shmid, cmd, buf) \ __sanitizer_syscall_pre_impl_compat_50___shmctl13( \ (long long)(shmid), (long long)(cmd), (long long)(buf)) #define __sanitizer_syscall_post_compat_50___shmctl13(res, shmid, cmd, buf) \ __sanitizer_syscall_post_impl_compat_50___shmctl13( \ res, (long long)(shmid), (long long)(cmd), (long long)(buf)) #define __sanitizer_syscall_pre_lchflags(path, flags) \ __sanitizer_syscall_pre_impl_lchflags((long long)(path), (long long)(flags)) #define __sanitizer_syscall_post_lchflags(res, path, flags) \ __sanitizer_syscall_post_impl_lchflags(res, (long long)(path), \ (long long)(flags)) #define __sanitizer_syscall_pre_issetugid() \ __sanitizer_syscall_pre_impl_issetugid() #define __sanitizer_syscall_post_issetugid(res) \ __sanitizer_syscall_post_impl_issetugid(res) #define __sanitizer_syscall_pre_utrace(label, addr, len) \ __sanitizer_syscall_pre_impl_utrace((long long)(label), (long long)(addr), \ (long long)(len)) #define __sanitizer_syscall_post_utrace(res, label, addr, len) \ __sanitizer_syscall_post_impl_utrace(res, (long long)(label), \ (long long)(addr), (long long)(len)) #define __sanitizer_syscall_pre_getcontext(ucp) \ __sanitizer_syscall_pre_impl_getcontext((long long)(ucp)) #define __sanitizer_syscall_post_getcontext(res, ucp) \ __sanitizer_syscall_post_impl_getcontext(res, (long long)(ucp)) #define __sanitizer_syscall_pre_setcontext(ucp) \ __sanitizer_syscall_pre_impl_setcontext((long long)(ucp)) #define __sanitizer_syscall_post_setcontext(res, ucp) \ __sanitizer_syscall_post_impl_setcontext(res, (long long)(ucp)) #define __sanitizer_syscall_pre__lwp_create(ucp, flags, new_lwp) \ __sanitizer_syscall_pre_impl__lwp_create( \ (long long)(ucp), (long long)(flags), (long long)(new_lwp)) #define __sanitizer_syscall_post__lwp_create(res, ucp, flags, new_lwp) \ __sanitizer_syscall_post_impl__lwp_create( \ res, (long long)(ucp), (long long)(flags), (long long)(new_lwp)) #define __sanitizer_syscall_pre__lwp_exit() \ __sanitizer_syscall_pre_impl__lwp_exit() #define __sanitizer_syscall_post__lwp_exit(res) \ __sanitizer_syscall_post_impl__lwp_exit(res) #define __sanitizer_syscall_pre__lwp_self() \ __sanitizer_syscall_pre_impl__lwp_self() #define __sanitizer_syscall_post__lwp_self(res) \ __sanitizer_syscall_post_impl__lwp_self(res) #define __sanitizer_syscall_pre__lwp_wait(wait_for, departed) \ __sanitizer_syscall_pre_impl__lwp_wait((long long)(wait_for), \ (long long)(departed)) #define __sanitizer_syscall_post__lwp_wait(res, wait_for, departed) \ __sanitizer_syscall_post_impl__lwp_wait(res, (long long)(wait_for), \ (long long)(departed)) #define __sanitizer_syscall_pre__lwp_suspend(target) \ __sanitizer_syscall_pre_impl__lwp_suspend((long long)(target)) #define __sanitizer_syscall_post__lwp_suspend(res, target) \ __sanitizer_syscall_post_impl__lwp_suspend(res, (long long)(target)) #define __sanitizer_syscall_pre__lwp_continue(target) \ __sanitizer_syscall_pre_impl__lwp_continue((long long)(target)) #define __sanitizer_syscall_post__lwp_continue(res, target) \ __sanitizer_syscall_post_impl__lwp_continue(res, (long long)(target)) #define __sanitizer_syscall_pre__lwp_wakeup(target) \ __sanitizer_syscall_pre_impl__lwp_wakeup((long long)(target)) #define __sanitizer_syscall_post__lwp_wakeup(res, target) \ __sanitizer_syscall_post_impl__lwp_wakeup(res, (long long)(target)) #define __sanitizer_syscall_pre__lwp_getprivate() \ __sanitizer_syscall_pre_impl__lwp_getprivate() #define __sanitizer_syscall_post__lwp_getprivate(res) \ __sanitizer_syscall_post_impl__lwp_getprivate(res) #define __sanitizer_syscall_pre__lwp_setprivate(ptr) \ __sanitizer_syscall_pre_impl__lwp_setprivate((long long)(ptr)) #define __sanitizer_syscall_post__lwp_setprivate(res, ptr) \ __sanitizer_syscall_post_impl__lwp_setprivate(res, (long long)(ptr)) #define __sanitizer_syscall_pre__lwp_kill(target, signo) \ __sanitizer_syscall_pre_impl__lwp_kill((long long)(target), \ (long long)(signo)) #define __sanitizer_syscall_post__lwp_kill(res, target, signo) \ __sanitizer_syscall_post_impl__lwp_kill(res, (long long)(target), \ (long long)(signo)) #define __sanitizer_syscall_pre__lwp_detach(target) \ __sanitizer_syscall_pre_impl__lwp_detach((long long)(target)) #define __sanitizer_syscall_post__lwp_detach(res, target) \ __sanitizer_syscall_post_impl__lwp_detach(res, (long long)(target)) #define __sanitizer_syscall_pre_compat_50__lwp_park(ts, unpark, hint, \ unparkhint) \ __sanitizer_syscall_pre_impl_compat_50__lwp_park( \ (long long)(ts), (long long)(unpark), (long long)(hint), \ (long long)(unparkhint)) #define __sanitizer_syscall_post_compat_50__lwp_park(res, ts, unpark, hint, \ unparkhint) \ __sanitizer_syscall_post_impl_compat_50__lwp_park( \ res, (long long)(ts), (long long)(unpark), (long long)(hint), \ (long long)(unparkhint)) #define __sanitizer_syscall_pre__lwp_unpark(target, hint) \ __sanitizer_syscall_pre_impl__lwp_unpark((long long)(target), \ (long long)(hint)) #define __sanitizer_syscall_post__lwp_unpark(res, target, hint) \ __sanitizer_syscall_post_impl__lwp_unpark(res, (long long)(target), \ (long long)(hint)) #define __sanitizer_syscall_pre__lwp_unpark_all(targets, ntargets, hint) \ __sanitizer_syscall_pre_impl__lwp_unpark_all( \ (long long)(targets), (long long)(ntargets), (long long)(hint)) #define __sanitizer_syscall_post__lwp_unpark_all(res, targets, ntargets, hint) \ __sanitizer_syscall_post_impl__lwp_unpark_all( \ res, (long long)(targets), (long long)(ntargets), (long long)(hint)) #define __sanitizer_syscall_pre__lwp_setname(target, name) \ __sanitizer_syscall_pre_impl__lwp_setname((long long)(target), \ (long long)(name)) #define __sanitizer_syscall_post__lwp_setname(res, target, name) \ __sanitizer_syscall_post_impl__lwp_setname(res, (long long)(target), \ (long long)(name)) #define __sanitizer_syscall_pre__lwp_getname(target, name, len) \ __sanitizer_syscall_pre_impl__lwp_getname( \ (long long)(target), (long long)(name), (long long)(len)) #define __sanitizer_syscall_post__lwp_getname(res, target, name, len) \ __sanitizer_syscall_post_impl__lwp_getname( \ res, (long long)(target), (long long)(name), (long long)(len)) #define __sanitizer_syscall_pre__lwp_ctl(features, address) \ __sanitizer_syscall_pre_impl__lwp_ctl((long long)(features), \ (long long)(address)) #define __sanitizer_syscall_post__lwp_ctl(res, features, address) \ __sanitizer_syscall_post_impl__lwp_ctl(res, (long long)(features), \ (long long)(address)) /* syscall 326 has been skipped */ /* syscall 327 has been skipped */ /* syscall 328 has been skipped */ /* syscall 329 has been skipped */ #define __sanitizer_syscall_pre_compat_60_sa_register(newv, oldv, flags, \ stackinfo_offset) \ __sanitizer_syscall_pre_impl_compat_60_sa_register( \ (long long)(newv), (long long)(oldv), (long long)(flags), \ (long long)(stackinfo_offset)) #define __sanitizer_syscall_post_compat_60_sa_register(res, newv, oldv, flags, \ stackinfo_offset) \ __sanitizer_syscall_post_impl_compat_60_sa_register( \ res, (long long)(newv), (long long)(oldv), (long long)(flags), \ (long long)(stackinfo_offset)) #define __sanitizer_syscall_pre_compat_60_sa_stacks(num, stacks) \ __sanitizer_syscall_pre_impl_compat_60_sa_stacks((long long)(num), \ (long long)(stacks)) #define __sanitizer_syscall_post_compat_60_sa_stacks(res, num, stacks) \ __sanitizer_syscall_post_impl_compat_60_sa_stacks(res, (long long)(num), \ (long long)(stacks)) #define __sanitizer_syscall_pre_compat_60_sa_enable() \ __sanitizer_syscall_pre_impl_compat_60_sa_enable() #define __sanitizer_syscall_post_compat_60_sa_enable(res) \ __sanitizer_syscall_post_impl_compat_60_sa_enable(res) #define __sanitizer_syscall_pre_compat_60_sa_setconcurrency(concurrency) \ __sanitizer_syscall_pre_impl_compat_60_sa_setconcurrency( \ (long long)(concurrency)) #define __sanitizer_syscall_post_compat_60_sa_setconcurrency(res, concurrency) \ __sanitizer_syscall_post_impl_compat_60_sa_setconcurrency( \ res, (long long)(concurrency)) #define __sanitizer_syscall_pre_compat_60_sa_yield() \ __sanitizer_syscall_pre_impl_compat_60_sa_yield() #define __sanitizer_syscall_post_compat_60_sa_yield(res) \ __sanitizer_syscall_post_impl_compat_60_sa_yield(res) #define __sanitizer_syscall_pre_compat_60_sa_preempt(sa_id) \ __sanitizer_syscall_pre_impl_compat_60_sa_preempt((long long)(sa_id)) #define __sanitizer_syscall_post_compat_60_sa_preempt(res, sa_id) \ __sanitizer_syscall_post_impl_compat_60_sa_preempt(res, (long long)(sa_id)) /* syscall 336 has been skipped */ /* syscall 337 has been skipped */ /* syscall 338 has been skipped */ /* syscall 339 has been skipped */ #define __sanitizer_syscall_pre___sigaction_sigtramp(signum, nsa, osa, tramp, \ vers) \ __sanitizer_syscall_pre_impl___sigaction_sigtramp( \ (long long)(signum), (long long)(nsa), (long long)(osa), \ (long long)(tramp), (long long)(vers)) #define __sanitizer_syscall_post___sigaction_sigtramp(res, signum, nsa, osa, \ tramp, vers) \ __sanitizer_syscall_post_impl___sigaction_sigtramp( \ res, (long long)(signum), (long long)(nsa), (long long)(osa), \ (long long)(tramp), (long long)(vers)) /* syscall 341 has been skipped */ /* syscall 342 has been skipped */ #define __sanitizer_syscall_pre_rasctl(addr, len, op) \ __sanitizer_syscall_pre_impl_rasctl((long long)(addr), (long long)(len), \ (long long)(op)) #define __sanitizer_syscall_post_rasctl(res, addr, len, op) \ __sanitizer_syscall_post_impl_rasctl(res, (long long)(addr), \ (long long)(len), (long long)(op)) #define __sanitizer_syscall_pre_kqueue() __sanitizer_syscall_pre_impl_kqueue() #define __sanitizer_syscall_post_kqueue(res) \ __sanitizer_syscall_post_impl_kqueue(res) #define __sanitizer_syscall_pre_compat_50_kevent(fd, changelist, nchanges, \ eventlist, nevents, timeout) \ __sanitizer_syscall_pre_impl_compat_50_kevent( \ (long long)(fd), (long long)(changelist), (long long)(nchanges), \ (long long)(eventlist), (long long)(nevents), (long long)(timeout)) #define __sanitizer_syscall_post_compat_50_kevent( \ res, fd, changelist, nchanges, eventlist, nevents, timeout) \ __sanitizer_syscall_post_impl_compat_50_kevent( \ res, (long long)(fd), (long long)(changelist), (long long)(nchanges), \ (long long)(eventlist), (long long)(nevents), (long long)(timeout)) #define __sanitizer_syscall_pre__sched_setparam(pid, lid, policy, params) \ __sanitizer_syscall_pre_impl__sched_setparam( \ (long long)(pid), (long long)(lid), (long long)(policy), \ (long long)(params)) #define __sanitizer_syscall_post__sched_setparam(res, pid, lid, policy, \ params) \ __sanitizer_syscall_post_impl__sched_setparam( \ res, (long long)(pid), (long long)(lid), (long long)(policy), \ (long long)(params)) #define __sanitizer_syscall_pre__sched_getparam(pid, lid, policy, params) \ __sanitizer_syscall_pre_impl__sched_getparam( \ (long long)(pid), (long long)(lid), (long long)(policy), \ (long long)(params)) #define __sanitizer_syscall_post__sched_getparam(res, pid, lid, policy, \ params) \ __sanitizer_syscall_post_impl__sched_getparam( \ res, (long long)(pid), (long long)(lid), (long long)(policy), \ (long long)(params)) #define __sanitizer_syscall_pre__sched_setaffinity(pid, lid, size, cpuset) \ __sanitizer_syscall_pre_impl__sched_setaffinity( \ (long long)(pid), (long long)(lid), (long long)(size), \ (long long)(cpuset)) #define __sanitizer_syscall_post__sched_setaffinity(res, pid, lid, size, \ cpuset) \ __sanitizer_syscall_post_impl__sched_setaffinity( \ res, (long long)(pid), (long long)(lid), (long long)(size), \ (long long)(cpuset)) #define __sanitizer_syscall_pre__sched_getaffinity(pid, lid, size, cpuset) \ __sanitizer_syscall_pre_impl__sched_getaffinity( \ (long long)(pid), (long long)(lid), (long long)(size), \ (long long)(cpuset)) #define __sanitizer_syscall_post__sched_getaffinity(res, pid, lid, size, \ cpuset) \ __sanitizer_syscall_post_impl__sched_getaffinity( \ res, (long long)(pid), (long long)(lid), (long long)(size), \ (long long)(cpuset)) #define __sanitizer_syscall_pre_sched_yield() \ __sanitizer_syscall_pre_impl_sched_yield() #define __sanitizer_syscall_post_sched_yield(res) \ __sanitizer_syscall_post_impl_sched_yield(res) #define __sanitizer_syscall_pre__sched_protect(priority) \ __sanitizer_syscall_pre_impl__sched_protect((long long)(priority)) #define __sanitizer_syscall_post__sched_protect(res, priority) \ __sanitizer_syscall_post_impl__sched_protect(res, (long long)(priority)) /* syscall 352 has been skipped */ /* syscall 353 has been skipped */ #define __sanitizer_syscall_pre_fsync_range(fd, flags, start, length) \ __sanitizer_syscall_pre_impl_fsync_range( \ (long long)(fd), (long long)(flags), (long long)(start), \ (long long)(length)) #define __sanitizer_syscall_post_fsync_range(res, fd, flags, start, length) \ __sanitizer_syscall_post_impl_fsync_range( \ res, (long long)(fd), (long long)(flags), (long long)(start), \ (long long)(length)) #define __sanitizer_syscall_pre_uuidgen(store, count) \ __sanitizer_syscall_pre_impl_uuidgen((long long)(store), (long long)(count)) #define __sanitizer_syscall_post_uuidgen(res, store, count) \ __sanitizer_syscall_post_impl_uuidgen(res, (long long)(store), \ (long long)(count)) #define __sanitizer_syscall_pre_compat_90_getvfsstat(buf, bufsize, flags) \ __sanitizer_syscall_pre_impl_compat_90_getvfsstat( \ (long long)(buf), (long long)(bufsize), (long long)(flags)) #define __sanitizer_syscall_post_compat_90_getvfsstat(res, buf, bufsize, \ flags) \ __sanitizer_syscall_post_impl_compat_90_getvfsstat( \ res, (long long)(buf), (long long)(bufsize), (long long)(flags)) #define __sanitizer_syscall_pre_compat_90_statvfs1(path, buf, flags) \ __sanitizer_syscall_pre_impl_compat_90_statvfs1( \ (long long)(path), (long long)(buf), (long long)(flags)) #define __sanitizer_syscall_post_compat_90_statvfs1(res, path, buf, flags) \ __sanitizer_syscall_post_impl_compat_90_statvfs1( \ res, (long long)(path), (long long)(buf), (long long)(flags)) #define __sanitizer_syscall_pre_compat_90_fstatvfs1(fd, buf, flags) \ __sanitizer_syscall_pre_impl_compat_90_fstatvfs1( \ (long long)(fd), (long long)(buf), (long long)(flags)) #define __sanitizer_syscall_post_compat_90_fstatvfs1(res, fd, buf, flags) \ __sanitizer_syscall_post_impl_compat_90_fstatvfs1( \ res, (long long)(fd), (long long)(buf), (long long)(flags)) #define __sanitizer_syscall_pre_compat_30_fhstatvfs1(fhp, buf, flags) \ __sanitizer_syscall_pre_impl_compat_30_fhstatvfs1( \ (long long)(fhp), (long long)(buf), (long long)(flags)) #define __sanitizer_syscall_post_compat_30_fhstatvfs1(res, fhp, buf, flags) \ __sanitizer_syscall_post_impl_compat_30_fhstatvfs1( \ res, (long long)(fhp), (long long)(buf), (long long)(flags)) #define __sanitizer_syscall_pre_extattrctl(path, cmd, filename, attrnamespace, \ attrname) \ __sanitizer_syscall_pre_impl_extattrctl( \ (long long)(path), (long long)(cmd), (long long)(filename), \ (long long)(attrnamespace), (long long)(attrname)) #define __sanitizer_syscall_post_extattrctl(res, path, cmd, filename, \ attrnamespace, attrname) \ __sanitizer_syscall_post_impl_extattrctl( \ res, (long long)(path), (long long)(cmd), (long long)(filename), \ (long long)(attrnamespace), (long long)(attrname)) #define __sanitizer_syscall_pre_extattr_set_file(path, attrnamespace, \ attrname, data, nbytes) \ __sanitizer_syscall_pre_impl_extattr_set_file( \ (long long)(path), (long long)(attrnamespace), (long long)(attrname), \ (long long)(data), (long long)(nbytes)) #define __sanitizer_syscall_post_extattr_set_file(res, path, attrnamespace, \ attrname, data, nbytes) \ __sanitizer_syscall_post_impl_extattr_set_file( \ res, (long long)(path), (long long)(attrnamespace), \ (long long)(attrname), (long long)(data), (long long)(nbytes)) #define __sanitizer_syscall_pre_extattr_get_file(path, attrnamespace, \ attrname, data, nbytes) \ __sanitizer_syscall_pre_impl_extattr_get_file( \ (long long)(path), (long long)(attrnamespace), (long long)(attrname), \ (long long)(data), (long long)(nbytes)) #define __sanitizer_syscall_post_extattr_get_file(res, path, attrnamespace, \ attrname, data, nbytes) \ __sanitizer_syscall_post_impl_extattr_get_file( \ res, (long long)(path), (long long)(attrnamespace), \ (long long)(attrname), (long long)(data), (long long)(nbytes)) #define __sanitizer_syscall_pre_extattr_delete_file(path, attrnamespace, \ attrname) \ __sanitizer_syscall_pre_impl_extattr_delete_file( \ (long long)(path), (long long)(attrnamespace), (long long)(attrname)) #define __sanitizer_syscall_post_extattr_delete_file(res, path, attrnamespace, \ attrname) \ __sanitizer_syscall_post_impl_extattr_delete_file( \ res, (long long)(path), (long long)(attrnamespace), \ (long long)(attrname)) #define __sanitizer_syscall_pre_extattr_set_fd(fd, attrnamespace, attrname, \ data, nbytes) \ __sanitizer_syscall_pre_impl_extattr_set_fd( \ (long long)(fd), (long long)(attrnamespace), (long long)(attrname), \ (long long)(data), (long long)(nbytes)) #define __sanitizer_syscall_post_extattr_set_fd(res, fd, attrnamespace, \ attrname, data, nbytes) \ __sanitizer_syscall_post_impl_extattr_set_fd( \ res, (long long)(fd), (long long)(attrnamespace), (long long)(attrname), \ (long long)(data), (long long)(nbytes)) #define __sanitizer_syscall_pre_extattr_get_fd(fd, attrnamespace, attrname, \ data, nbytes) \ __sanitizer_syscall_pre_impl_extattr_get_fd( \ (long long)(fd), (long long)(attrnamespace), (long long)(attrname), \ (long long)(data), (long long)(nbytes)) #define __sanitizer_syscall_post_extattr_get_fd(res, fd, attrnamespace, \ attrname, data, nbytes) \ __sanitizer_syscall_post_impl_extattr_get_fd( \ res, (long long)(fd), (long long)(attrnamespace), (long long)(attrname), \ (long long)(data), (long long)(nbytes)) #define __sanitizer_syscall_pre_extattr_delete_fd(fd, attrnamespace, attrname) \ __sanitizer_syscall_pre_impl_extattr_delete_fd( \ (long long)(fd), (long long)(attrnamespace), (long long)(attrname)) #define __sanitizer_syscall_post_extattr_delete_fd(res, fd, attrnamespace, \ attrname) \ __sanitizer_syscall_post_impl_extattr_delete_fd( \ res, (long long)(fd), (long long)(attrnamespace), (long long)(attrname)) #define __sanitizer_syscall_pre_extattr_set_link(path, attrnamespace, \ attrname, data, nbytes) \ __sanitizer_syscall_pre_impl_extattr_set_link( \ (long long)(path), (long long)(attrnamespace), (long long)(attrname), \ (long long)(data), (long long)(nbytes)) #define __sanitizer_syscall_post_extattr_set_link(res, path, attrnamespace, \ attrname, data, nbytes) \ __sanitizer_syscall_post_impl_extattr_set_link( \ res, (long long)(path), (long long)(attrnamespace), \ (long long)(attrname), (long long)(data), (long long)(nbytes)) #define __sanitizer_syscall_pre_extattr_get_link(path, attrnamespace, \ attrname, data, nbytes) \ __sanitizer_syscall_pre_impl_extattr_get_link( \ (long long)(path), (long long)(attrnamespace), (long long)(attrname), \ (long long)(data), (long long)(nbytes)) #define __sanitizer_syscall_post_extattr_get_link(res, path, attrnamespace, \ attrname, data, nbytes) \ __sanitizer_syscall_post_impl_extattr_get_link( \ res, (long long)(path), (long long)(attrnamespace), \ (long long)(attrname), (long long)(data), (long long)(nbytes)) #define __sanitizer_syscall_pre_extattr_delete_link(path, attrnamespace, \ attrname) \ __sanitizer_syscall_pre_impl_extattr_delete_link( \ (long long)(path), (long long)(attrnamespace), (long long)(attrname)) #define __sanitizer_syscall_post_extattr_delete_link(res, path, attrnamespace, \ attrname) \ __sanitizer_syscall_post_impl_extattr_delete_link( \ res, (long long)(path), (long long)(attrnamespace), \ (long long)(attrname)) #define __sanitizer_syscall_pre_extattr_list_fd(fd, attrnamespace, data, \ nbytes) \ __sanitizer_syscall_pre_impl_extattr_list_fd( \ (long long)(fd), (long long)(attrnamespace), (long long)(data), \ (long long)(nbytes)) #define __sanitizer_syscall_post_extattr_list_fd(res, fd, attrnamespace, data, \ nbytes) \ __sanitizer_syscall_post_impl_extattr_list_fd( \ res, (long long)(fd), (long long)(attrnamespace), (long long)(data), \ (long long)(nbytes)) #define __sanitizer_syscall_pre_extattr_list_file(path, attrnamespace, data, \ nbytes) \ __sanitizer_syscall_pre_impl_extattr_list_file( \ (long long)(path), (long long)(attrnamespace), (long long)(data), \ (long long)(nbytes)) #define __sanitizer_syscall_post_extattr_list_file(res, path, attrnamespace, \ data, nbytes) \ __sanitizer_syscall_post_impl_extattr_list_file( \ res, (long long)(path), (long long)(attrnamespace), (long long)(data), \ (long long)(nbytes)) #define __sanitizer_syscall_pre_extattr_list_link(path, attrnamespace, data, \ nbytes) \ __sanitizer_syscall_pre_impl_extattr_list_link( \ (long long)(path), (long long)(attrnamespace), (long long)(data), \ (long long)(nbytes)) #define __sanitizer_syscall_post_extattr_list_link(res, path, attrnamespace, \ data, nbytes) \ __sanitizer_syscall_post_impl_extattr_list_link( \ res, (long long)(path), (long long)(attrnamespace), (long long)(data), \ (long long)(nbytes)) #define __sanitizer_syscall_pre_compat_50_pselect(nd, in, ou, ex, ts, mask) \ __sanitizer_syscall_pre_impl_compat_50_pselect( \ (long long)(nd), (long long)(in), (long long)(ou), (long long)(ex), \ (long long)(ts), (long long)(mask)) #define __sanitizer_syscall_post_compat_50_pselect(res, nd, in, ou, ex, ts, \ mask) \ __sanitizer_syscall_post_impl_compat_50_pselect( \ res, (long long)(nd), (long long)(in), (long long)(ou), (long long)(ex), \ (long long)(ts), (long long)(mask)) #define __sanitizer_syscall_pre_compat_50_pollts(fds, nfds, ts, mask) \ __sanitizer_syscall_pre_impl_compat_50_pollts( \ (long long)(fds), (long long)(nfds), (long long)(ts), (long long)(mask)) #define __sanitizer_syscall_post_compat_50_pollts(res, fds, nfds, ts, mask) \ __sanitizer_syscall_post_impl_compat_50_pollts( \ res, (long long)(fds), (long long)(nfds), (long long)(ts), \ (long long)(mask)) #define __sanitizer_syscall_pre_setxattr(path, name, value, size, flags) \ __sanitizer_syscall_pre_impl_setxattr((long long)(path), (long long)(name), \ (long long)(value), (long long)(size), \ (long long)(flags)) #define __sanitizer_syscall_post_setxattr(res, path, name, value, size, flags) \ __sanitizer_syscall_post_impl_setxattr( \ res, (long long)(path), (long long)(name), (long long)(value), \ (long long)(size), (long long)(flags)) #define __sanitizer_syscall_pre_lsetxattr(path, name, value, size, flags) \ __sanitizer_syscall_pre_impl_lsetxattr( \ (long long)(path), (long long)(name), (long long)(value), \ (long long)(size), (long long)(flags)) #define __sanitizer_syscall_post_lsetxattr(res, path, name, value, size, \ flags) \ __sanitizer_syscall_post_impl_lsetxattr( \ res, (long long)(path), (long long)(name), (long long)(value), \ (long long)(size), (long long)(flags)) #define __sanitizer_syscall_pre_fsetxattr(fd, name, value, size, flags) \ __sanitizer_syscall_pre_impl_fsetxattr( \ (long long)(fd), (long long)(name), (long long)(value), \ (long long)(size), (long long)(flags)) #define __sanitizer_syscall_post_fsetxattr(res, fd, name, value, size, flags) \ __sanitizer_syscall_post_impl_fsetxattr( \ res, (long long)(fd), (long long)(name), (long long)(value), \ (long long)(size), (long long)(flags)) #define __sanitizer_syscall_pre_getxattr(path, name, value, size) \ __sanitizer_syscall_pre_impl_getxattr((long long)(path), (long long)(name), \ (long long)(value), (long long)(size)) #define __sanitizer_syscall_post_getxattr(res, path, name, value, size) \ __sanitizer_syscall_post_impl_getxattr( \ res, (long long)(path), (long long)(name), (long long)(value), \ (long long)(size)) #define __sanitizer_syscall_pre_lgetxattr(path, name, value, size) \ __sanitizer_syscall_pre_impl_lgetxattr((long long)(path), (long long)(name), \ (long long)(value), \ (long long)(size)) #define __sanitizer_syscall_post_lgetxattr(res, path, name, value, size) \ __sanitizer_syscall_post_impl_lgetxattr( \ res, (long long)(path), (long long)(name), (long long)(value), \ (long long)(size)) #define __sanitizer_syscall_pre_fgetxattr(fd, name, value, size) \ __sanitizer_syscall_pre_impl_fgetxattr((long long)(fd), (long long)(name), \ (long long)(value), \ (long long)(size)) #define __sanitizer_syscall_post_fgetxattr(res, fd, name, value, size) \ __sanitizer_syscall_post_impl_fgetxattr( \ res, (long long)(fd), (long long)(name), (long long)(value), \ (long long)(size)) #define __sanitizer_syscall_pre_listxattr(path, list, size) \ __sanitizer_syscall_pre_impl_listxattr((long long)(path), (long long)(list), \ (long long)(size)) #define __sanitizer_syscall_post_listxattr(res, path, list, size) \ __sanitizer_syscall_post_impl_listxattr( \ res, (long long)(path), (long long)(list), (long long)(size)) #define __sanitizer_syscall_pre_llistxattr(path, list, size) \ __sanitizer_syscall_pre_impl_llistxattr( \ (long long)(path), (long long)(list), (long long)(size)) #define __sanitizer_syscall_post_llistxattr(res, path, list, size) \ __sanitizer_syscall_post_impl_llistxattr( \ res, (long long)(path), (long long)(list), (long long)(size)) #define __sanitizer_syscall_pre_flistxattr(fd, list, size) \ __sanitizer_syscall_pre_impl_flistxattr((long long)(fd), (long long)(list), \ (long long)(size)) #define __sanitizer_syscall_post_flistxattr(res, fd, list, size) \ __sanitizer_syscall_post_impl_flistxattr( \ res, (long long)(fd), (long long)(list), (long long)(size)) #define __sanitizer_syscall_pre_removexattr(path, name) \ __sanitizer_syscall_pre_impl_removexattr((long long)(path), (long long)(name)) #define __sanitizer_syscall_post_removexattr(res, path, name) \ __sanitizer_syscall_post_impl_removexattr(res, (long long)(path), \ (long long)(name)) #define __sanitizer_syscall_pre_lremovexattr(path, name) \ __sanitizer_syscall_pre_impl_lremovexattr((long long)(path), \ (long long)(name)) #define __sanitizer_syscall_post_lremovexattr(res, path, name) \ __sanitizer_syscall_post_impl_lremovexattr(res, (long long)(path), \ (long long)(name)) #define __sanitizer_syscall_pre_fremovexattr(fd, name) \ __sanitizer_syscall_pre_impl_fremovexattr((long long)(fd), (long long)(name)) #define __sanitizer_syscall_post_fremovexattr(res, fd, name) \ __sanitizer_syscall_post_impl_fremovexattr(res, (long long)(fd), \ (long long)(name)) #define __sanitizer_syscall_pre_compat_50___stat30(path, ub) \ __sanitizer_syscall_pre_impl_compat_50___stat30((long long)(path), \ (long long)(ub)) #define __sanitizer_syscall_post_compat_50___stat30(res, path, ub) \ __sanitizer_syscall_post_impl_compat_50___stat30(res, (long long)(path), \ (long long)(ub)) #define __sanitizer_syscall_pre_compat_50___fstat30(fd, sb) \ __sanitizer_syscall_pre_impl_compat_50___fstat30((long long)(fd), \ (long long)(sb)) #define __sanitizer_syscall_post_compat_50___fstat30(res, fd, sb) \ __sanitizer_syscall_post_impl_compat_50___fstat30(res, (long long)(fd), \ (long long)(sb)) #define __sanitizer_syscall_pre_compat_50___lstat30(path, ub) \ __sanitizer_syscall_pre_impl_compat_50___lstat30((long long)(path), \ (long long)(ub)) #define __sanitizer_syscall_post_compat_50___lstat30(res, path, ub) \ __sanitizer_syscall_post_impl_compat_50___lstat30(res, (long long)(path), \ (long long)(ub)) #define __sanitizer_syscall_pre___getdents30(fd, buf, count) \ __sanitizer_syscall_pre_impl___getdents30((long long)(fd), (long long)(buf), \ (long long)(count)) #define __sanitizer_syscall_post___getdents30(res, fd, buf, count) \ __sanitizer_syscall_post_impl___getdents30( \ res, (long long)(fd), (long long)(buf), (long long)(count)) #define __sanitizer_syscall_pre_posix_fadvise() \ __sanitizer_syscall_pre_impl_posix_fadvise((long long)()) #define __sanitizer_syscall_post_posix_fadvise(res) \ __sanitizer_syscall_post_impl_posix_fadvise(res, (long long)()) #define __sanitizer_syscall_pre_compat_30___fhstat30(fhp, sb) \ __sanitizer_syscall_pre_impl_compat_30___fhstat30((long long)(fhp), \ (long long)(sb)) #define __sanitizer_syscall_post_compat_30___fhstat30(res, fhp, sb) \ __sanitizer_syscall_post_impl_compat_30___fhstat30(res, (long long)(fhp), \ (long long)(sb)) #define __sanitizer_syscall_pre_compat_50___ntp_gettime30(ntvp) \ __sanitizer_syscall_pre_impl_compat_50___ntp_gettime30((long long)(ntvp)) #define __sanitizer_syscall_post_compat_50___ntp_gettime30(res, ntvp) \ __sanitizer_syscall_post_impl_compat_50___ntp_gettime30(res, \ (long long)(ntvp)) #define __sanitizer_syscall_pre___socket30(domain, type, protocol) \ __sanitizer_syscall_pre_impl___socket30( \ (long long)(domain), (long long)(type), (long long)(protocol)) #define __sanitizer_syscall_post___socket30(res, domain, type, protocol) \ __sanitizer_syscall_post_impl___socket30( \ res, (long long)(domain), (long long)(type), (long long)(protocol)) #define __sanitizer_syscall_pre___getfh30(fname, fhp, fh_size) \ __sanitizer_syscall_pre_impl___getfh30((long long)(fname), (long long)(fhp), \ (long long)(fh_size)) #define __sanitizer_syscall_post___getfh30(res, fname, fhp, fh_size) \ __sanitizer_syscall_post_impl___getfh30( \ res, (long long)(fname), (long long)(fhp), (long long)(fh_size)) #define __sanitizer_syscall_pre___fhopen40(fhp, fh_size, flags) \ __sanitizer_syscall_pre_impl___fhopen40( \ (long long)(fhp), (long long)(fh_size), (long long)(flags)) #define __sanitizer_syscall_post___fhopen40(res, fhp, fh_size, flags) \ __sanitizer_syscall_post_impl___fhopen40( \ res, (long long)(fhp), (long long)(fh_size), (long long)(flags)) #define __sanitizer_syscall_pre_compat_90_fhstatvfs1(fhp, fh_size, buf, flags) \ __sanitizer_syscall_pre_impl_compat_90_fhstatvfs1( \ (long long)(fhp), (long long)(fh_size), (long long)(buf), \ (long long)(flags)) #define __sanitizer_syscall_post_compat_90_fhstatvfs1(res, fhp, fh_size, buf, \ flags) \ __sanitizer_syscall_post_impl_compat_90_fhstatvfs1( \ res, (long long)(fhp), (long long)(fh_size), (long long)(buf), \ (long long)(flags)) #define __sanitizer_syscall_pre_compat_50___fhstat40(fhp, fh_size, sb) \ __sanitizer_syscall_pre_impl_compat_50___fhstat40( \ (long long)(fhp), (long long)(fh_size), (long long)(sb)) #define __sanitizer_syscall_post_compat_50___fhstat40(res, fhp, fh_size, sb) \ __sanitizer_syscall_post_impl_compat_50___fhstat40( \ res, (long long)(fhp), (long long)(fh_size), (long long)(sb)) #define __sanitizer_syscall_pre_aio_cancel(fildes, aiocbp) \ __sanitizer_syscall_pre_impl_aio_cancel((long long)(fildes), \ (long long)(aiocbp)) #define __sanitizer_syscall_post_aio_cancel(res, fildes, aiocbp) \ __sanitizer_syscall_post_impl_aio_cancel(res, (long long)(fildes), \ (long long)(aiocbp)) #define __sanitizer_syscall_pre_aio_error(aiocbp) \ __sanitizer_syscall_pre_impl_aio_error((long long)(aiocbp)) #define __sanitizer_syscall_post_aio_error(res, aiocbp) \ __sanitizer_syscall_post_impl_aio_error(res, (long long)(aiocbp)) #define __sanitizer_syscall_pre_aio_fsync(op, aiocbp) \ __sanitizer_syscall_pre_impl_aio_fsync((long long)(op), (long long)(aiocbp)) #define __sanitizer_syscall_post_aio_fsync(res, op, aiocbp) \ __sanitizer_syscall_post_impl_aio_fsync(res, (long long)(op), \ (long long)(aiocbp)) #define __sanitizer_syscall_pre_aio_read(aiocbp) \ __sanitizer_syscall_pre_impl_aio_read((long long)(aiocbp)) #define __sanitizer_syscall_post_aio_read(res, aiocbp) \ __sanitizer_syscall_post_impl_aio_read(res, (long long)(aiocbp)) #define __sanitizer_syscall_pre_aio_return(aiocbp) \ __sanitizer_syscall_pre_impl_aio_return((long long)(aiocbp)) #define __sanitizer_syscall_post_aio_return(res, aiocbp) \ __sanitizer_syscall_post_impl_aio_return(res, (long long)(aiocbp)) #define __sanitizer_syscall_pre_compat_50_aio_suspend(list, nent, timeout) \ __sanitizer_syscall_pre_impl_compat_50_aio_suspend( \ (long long)(list), (long long)(nent), (long long)(timeout)) #define __sanitizer_syscall_post_compat_50_aio_suspend(res, list, nent, \ timeout) \ __sanitizer_syscall_post_impl_compat_50_aio_suspend( \ res, (long long)(list), (long long)(nent), (long long)(timeout)) #define __sanitizer_syscall_pre_aio_write(aiocbp) \ __sanitizer_syscall_pre_impl_aio_write((long long)(aiocbp)) #define __sanitizer_syscall_post_aio_write(res, aiocbp) \ __sanitizer_syscall_post_impl_aio_write(res, (long long)(aiocbp)) #define __sanitizer_syscall_pre_lio_listio(mode, list, nent, sig) \ __sanitizer_syscall_pre_impl_lio_listio((long long)(mode), \ (long long)(list), \ (long long)(nent), (long long)(sig)) #define __sanitizer_syscall_post_lio_listio(res, mode, list, nent, sig) \ __sanitizer_syscall_post_impl_lio_listio( \ res, (long long)(mode), (long long)(list), (long long)(nent), \ (long long)(sig)) /* syscall 407 has been skipped */ /* syscall 408 has been skipped */ /* syscall 409 has been skipped */ #define __sanitizer_syscall_pre___mount50(type, path, flags, data, data_len) \ __sanitizer_syscall_pre_impl___mount50( \ (long long)(type), (long long)(path), (long long)(flags), \ (long long)(data), (long long)(data_len)) #define __sanitizer_syscall_post___mount50(res, type, path, flags, data, \ data_len) \ __sanitizer_syscall_post_impl___mount50( \ res, (long long)(type), (long long)(path), (long long)(flags), \ (long long)(data), (long long)(data_len)) #define __sanitizer_syscall_pre_mremap(old_address, old_size, new_address, \ new_size, flags) \ __sanitizer_syscall_pre_impl_mremap( \ (long long)(old_address), (long long)(old_size), \ (long long)(new_address), (long long)(new_size), (long long)(flags)) #define __sanitizer_syscall_post_mremap(res, old_address, old_size, \ new_address, new_size, flags) \ __sanitizer_syscall_post_impl_mremap( \ res, (long long)(old_address), (long long)(old_size), \ (long long)(new_address), (long long)(new_size), (long long)(flags)) #define __sanitizer_syscall_pre_pset_create(psid) \ __sanitizer_syscall_pre_impl_pset_create((long long)(psid)) #define __sanitizer_syscall_post_pset_create(res, psid) \ __sanitizer_syscall_post_impl_pset_create(res, (long long)(psid)) #define __sanitizer_syscall_pre_pset_destroy(psid) \ __sanitizer_syscall_pre_impl_pset_destroy((long long)(psid)) #define __sanitizer_syscall_post_pset_destroy(res, psid) \ __sanitizer_syscall_post_impl_pset_destroy(res, (long long)(psid)) #define __sanitizer_syscall_pre_pset_assign(psid, cpuid, opsid) \ __sanitizer_syscall_pre_impl_pset_assign( \ (long long)(psid), (long long)(cpuid), (long long)(opsid)) #define __sanitizer_syscall_post_pset_assign(res, psid, cpuid, opsid) \ __sanitizer_syscall_post_impl_pset_assign( \ res, (long long)(psid), (long long)(cpuid), (long long)(opsid)) #define __sanitizer_syscall_pre__pset_bind(idtype, first_id, second_id, psid, \ opsid) \ __sanitizer_syscall_pre_impl__pset_bind( \ (long long)(idtype), (long long)(first_id), (long long)(second_id), \ (long long)(psid), (long long)(opsid)) #define __sanitizer_syscall_post__pset_bind(res, idtype, first_id, second_id, \ psid, opsid) \ __sanitizer_syscall_post_impl__pset_bind( \ res, (long long)(idtype), (long long)(first_id), (long long)(second_id), \ (long long)(psid), (long long)(opsid)) #define __sanitizer_syscall_pre___posix_fadvise50(fd, PAD, offset, len, \ advice) \ __sanitizer_syscall_pre_impl___posix_fadvise50( \ (long long)(fd), (long long)(PAD), (long long)(offset), \ (long long)(len), (long long)(advice)) #define __sanitizer_syscall_post___posix_fadvise50(res, fd, PAD, offset, len, \ advice) \ __sanitizer_syscall_post_impl___posix_fadvise50( \ res, (long long)(fd), (long long)(PAD), (long long)(offset), \ (long long)(len), (long long)(advice)) #define __sanitizer_syscall_pre___select50(nd, in, ou, ex, tv) \ __sanitizer_syscall_pre_impl___select50((long long)(nd), (long long)(in), \ (long long)(ou), (long long)(ex), \ (long long)(tv)) #define __sanitizer_syscall_post___select50(res, nd, in, ou, ex, tv) \ __sanitizer_syscall_post_impl___select50(res, (long long)(nd), \ (long long)(in), (long long)(ou), \ (long long)(ex), (long long)(tv)) #define __sanitizer_syscall_pre___gettimeofday50(tp, tzp) \ __sanitizer_syscall_pre_impl___gettimeofday50((long long)(tp), \ (long long)(tzp)) #define __sanitizer_syscall_post___gettimeofday50(res, tp, tzp) \ __sanitizer_syscall_post_impl___gettimeofday50(res, (long long)(tp), \ (long long)(tzp)) #define __sanitizer_syscall_pre___settimeofday50(tv, tzp) \ __sanitizer_syscall_pre_impl___settimeofday50((long long)(tv), \ (long long)(tzp)) #define __sanitizer_syscall_post___settimeofday50(res, tv, tzp) \ __sanitizer_syscall_post_impl___settimeofday50(res, (long long)(tv), \ (long long)(tzp)) #define __sanitizer_syscall_pre___utimes50(path, tptr) \ __sanitizer_syscall_pre_impl___utimes50((long long)(path), (long long)(tptr)) #define __sanitizer_syscall_post___utimes50(res, path, tptr) \ __sanitizer_syscall_post_impl___utimes50(res, (long long)(path), \ (long long)(tptr)) #define __sanitizer_syscall_pre___adjtime50(delta, olddelta) \ __sanitizer_syscall_pre_impl___adjtime50((long long)(delta), \ (long long)(olddelta)) #define __sanitizer_syscall_post___adjtime50(res, delta, olddelta) \ __sanitizer_syscall_post_impl___adjtime50(res, (long long)(delta), \ (long long)(olddelta)) #define __sanitizer_syscall_pre___lfs_segwait50(fsidp, tv) \ __sanitizer_syscall_pre_impl___lfs_segwait50((long long)(fsidp), \ (long long)(tv)) #define __sanitizer_syscall_post___lfs_segwait50(res, fsidp, tv) \ __sanitizer_syscall_post_impl___lfs_segwait50(res, (long long)(fsidp), \ (long long)(tv)) #define __sanitizer_syscall_pre___futimes50(fd, tptr) \ __sanitizer_syscall_pre_impl___futimes50((long long)(fd), (long long)(tptr)) #define __sanitizer_syscall_post___futimes50(res, fd, tptr) \ __sanitizer_syscall_post_impl___futimes50(res, (long long)(fd), \ (long long)(tptr)) #define __sanitizer_syscall_pre___lutimes50(path, tptr) \ __sanitizer_syscall_pre_impl___lutimes50((long long)(path), (long long)(tptr)) #define __sanitizer_syscall_post___lutimes50(res, path, tptr) \ __sanitizer_syscall_post_impl___lutimes50(res, (long long)(path), \ (long long)(tptr)) #define __sanitizer_syscall_pre___setitimer50(which, itv, oitv) \ __sanitizer_syscall_pre_impl___setitimer50( \ (long long)(which), (long long)(itv), (long long)(oitv)) #define __sanitizer_syscall_post___setitimer50(res, which, itv, oitv) \ __sanitizer_syscall_post_impl___setitimer50( \ res, (long long)(which), (long long)(itv), (long long)(oitv)) #define __sanitizer_syscall_pre___getitimer50(which, itv) \ __sanitizer_syscall_pre_impl___getitimer50((long long)(which), \ (long long)(itv)) #define __sanitizer_syscall_post___getitimer50(res, which, itv) \ __sanitizer_syscall_post_impl___getitimer50(res, (long long)(which), \ (long long)(itv)) #define __sanitizer_syscall_pre___clock_gettime50(clock_id, tp) \ __sanitizer_syscall_pre_impl___clock_gettime50((long long)(clock_id), \ (long long)(tp)) #define __sanitizer_syscall_post___clock_gettime50(res, clock_id, tp) \ __sanitizer_syscall_post_impl___clock_gettime50(res, (long long)(clock_id), \ (long long)(tp)) #define __sanitizer_syscall_pre___clock_settime50(clock_id, tp) \ __sanitizer_syscall_pre_impl___clock_settime50((long long)(clock_id), \ (long long)(tp)) #define __sanitizer_syscall_post___clock_settime50(res, clock_id, tp) \ __sanitizer_syscall_post_impl___clock_settime50(res, (long long)(clock_id), \ (long long)(tp)) #define __sanitizer_syscall_pre___clock_getres50(clock_id, tp) \ __sanitizer_syscall_pre_impl___clock_getres50((long long)(clock_id), \ (long long)(tp)) #define __sanitizer_syscall_post___clock_getres50(res, clock_id, tp) \ __sanitizer_syscall_post_impl___clock_getres50(res, (long long)(clock_id), \ (long long)(tp)) #define __sanitizer_syscall_pre___nanosleep50(rqtp, rmtp) \ __sanitizer_syscall_pre_impl___nanosleep50((long long)(rqtp), \ (long long)(rmtp)) #define __sanitizer_syscall_post___nanosleep50(res, rqtp, rmtp) \ __sanitizer_syscall_post_impl___nanosleep50(res, (long long)(rqtp), \ (long long)(rmtp)) #define __sanitizer_syscall_pre_____sigtimedwait50(set, info, timeout) \ __sanitizer_syscall_pre_impl_____sigtimedwait50( \ (long long)(set), (long long)(info), (long long)(timeout)) #define __sanitizer_syscall_post_____sigtimedwait50(res, set, info, timeout) \ __sanitizer_syscall_post_impl_____sigtimedwait50( \ res, (long long)(set), (long long)(info), (long long)(timeout)) #define __sanitizer_syscall_pre___mq_timedsend50(mqdes, msg_ptr, msg_len, \ msg_prio, abs_timeout) \ __sanitizer_syscall_pre_impl___mq_timedsend50( \ (long long)(mqdes), (long long)(msg_ptr), (long long)(msg_len), \ (long long)(msg_prio), (long long)(abs_timeout)) #define __sanitizer_syscall_post___mq_timedsend50( \ res, mqdes, msg_ptr, msg_len, msg_prio, abs_timeout) \ __sanitizer_syscall_post_impl___mq_timedsend50( \ res, (long long)(mqdes), (long long)(msg_ptr), (long long)(msg_len), \ (long long)(msg_prio), (long long)(abs_timeout)) #define __sanitizer_syscall_pre___mq_timedreceive50(mqdes, msg_ptr, msg_len, \ msg_prio, abs_timeout) \ __sanitizer_syscall_pre_impl___mq_timedreceive50( \ (long long)(mqdes), (long long)(msg_ptr), (long long)(msg_len), \ (long long)(msg_prio), (long long)(abs_timeout)) #define __sanitizer_syscall_post___mq_timedreceive50( \ res, mqdes, msg_ptr, msg_len, msg_prio, abs_timeout) \ __sanitizer_syscall_post_impl___mq_timedreceive50( \ res, (long long)(mqdes), (long long)(msg_ptr), (long long)(msg_len), \ (long long)(msg_prio), (long long)(abs_timeout)) #define __sanitizer_syscall_pre_compat_60__lwp_park(ts, unpark, hint, \ unparkhint) \ __sanitizer_syscall_pre_impl_compat_60__lwp_park( \ (long long)(ts), (long long)(unpark), (long long)(hint), \ (long long)(unparkhint)) #define __sanitizer_syscall_post_compat_60__lwp_park(res, ts, unpark, hint, \ unparkhint) \ __sanitizer_syscall_post_impl_compat_60__lwp_park( \ res, (long long)(ts), (long long)(unpark), (long long)(hint), \ (long long)(unparkhint)) #define __sanitizer_syscall_pre___kevent50(fd, changelist, nchanges, \ eventlist, nevents, timeout) \ __sanitizer_syscall_pre_impl___kevent50( \ (long long)(fd), (long long)(changelist), (long long)(nchanges), \ (long long)(eventlist), (long long)(nevents), (long long)(timeout)) #define __sanitizer_syscall_post___kevent50(res, fd, changelist, nchanges, \ eventlist, nevents, timeout) \ __sanitizer_syscall_post_impl___kevent50( \ res, (long long)(fd), (long long)(changelist), (long long)(nchanges), \ (long long)(eventlist), (long long)(nevents), (long long)(timeout)) #define __sanitizer_syscall_pre___pselect50(nd, in, ou, ex, ts, mask) \ __sanitizer_syscall_pre_impl___pselect50((long long)(nd), (long long)(in), \ (long long)(ou), (long long)(ex), \ (long long)(ts), (long long)(mask)) #define __sanitizer_syscall_post___pselect50(res, nd, in, ou, ex, ts, mask) \ __sanitizer_syscall_post_impl___pselect50( \ res, (long long)(nd), (long long)(in), (long long)(ou), (long long)(ex), \ (long long)(ts), (long long)(mask)) #define __sanitizer_syscall_pre___pollts50(fds, nfds, ts, mask) \ __sanitizer_syscall_pre_impl___pollts50((long long)(fds), (long long)(nfds), \ (long long)(ts), (long long)(mask)) #define __sanitizer_syscall_post___pollts50(res, fds, nfds, ts, mask) \ __sanitizer_syscall_post_impl___pollts50(res, (long long)(fds), \ (long long)(nfds), (long long)(ts), \ (long long)(mask)) #define __sanitizer_syscall_pre___aio_suspend50(list, nent, timeout) \ __sanitizer_syscall_pre_impl___aio_suspend50( \ (long long)(list), (long long)(nent), (long long)(timeout)) #define __sanitizer_syscall_post___aio_suspend50(res, list, nent, timeout) \ __sanitizer_syscall_post_impl___aio_suspend50( \ res, (long long)(list), (long long)(nent), (long long)(timeout)) #define __sanitizer_syscall_pre___stat50(path, ub) \ __sanitizer_syscall_pre_impl___stat50((long long)(path), (long long)(ub)) #define __sanitizer_syscall_post___stat50(res, path, ub) \ __sanitizer_syscall_post_impl___stat50(res, (long long)(path), \ (long long)(ub)) #define __sanitizer_syscall_pre___fstat50(fd, sb) \ __sanitizer_syscall_pre_impl___fstat50((long long)(fd), (long long)(sb)) #define __sanitizer_syscall_post___fstat50(res, fd, sb) \ __sanitizer_syscall_post_impl___fstat50(res, (long long)(fd), (long long)(sb)) #define __sanitizer_syscall_pre___lstat50(path, ub) \ __sanitizer_syscall_pre_impl___lstat50((long long)(path), (long long)(ub)) #define __sanitizer_syscall_post___lstat50(res, path, ub) \ __sanitizer_syscall_post_impl___lstat50(res, (long long)(path), \ (long long)(ub)) #define __sanitizer_syscall_pre_____semctl50(semid, semnum, cmd, arg) \ __sanitizer_syscall_pre_impl_____semctl50( \ (long long)(semid), (long long)(semnum), (long long)(cmd), \ (long long)(arg)) #define __sanitizer_syscall_post_____semctl50(res, semid, semnum, cmd, arg) \ __sanitizer_syscall_post_impl_____semctl50( \ res, (long long)(semid), (long long)(semnum), (long long)(cmd), \ (long long)(arg)) #define __sanitizer_syscall_pre___shmctl50(shmid, cmd, buf) \ __sanitizer_syscall_pre_impl___shmctl50((long long)(shmid), \ (long long)(cmd), (long long)(buf)) #define __sanitizer_syscall_post___shmctl50(res, shmid, cmd, buf) \ __sanitizer_syscall_post_impl___shmctl50(res, (long long)(shmid), \ (long long)(cmd), (long long)(buf)) #define __sanitizer_syscall_pre___msgctl50(msqid, cmd, buf) \ __sanitizer_syscall_pre_impl___msgctl50((long long)(msqid), \ (long long)(cmd), (long long)(buf)) #define __sanitizer_syscall_post___msgctl50(res, msqid, cmd, buf) \ __sanitizer_syscall_post_impl___msgctl50(res, (long long)(msqid), \ (long long)(cmd), (long long)(buf)) #define __sanitizer_syscall_pre___getrusage50(who, rusage) \ __sanitizer_syscall_pre_impl___getrusage50((long long)(who), \ (long long)(rusage)) #define __sanitizer_syscall_post___getrusage50(res, who, rusage) \ __sanitizer_syscall_post_impl___getrusage50(res, (long long)(who), \ (long long)(rusage)) #define __sanitizer_syscall_pre___timer_settime50(timerid, flags, value, \ ovalue) \ __sanitizer_syscall_pre_impl___timer_settime50( \ (long long)(timerid), (long long)(flags), (long long)(value), \ (long long)(ovalue)) #define __sanitizer_syscall_post___timer_settime50(res, timerid, flags, value, \ ovalue) \ __sanitizer_syscall_post_impl___timer_settime50( \ res, (long long)(timerid), (long long)(flags), (long long)(value), \ (long long)(ovalue)) #define __sanitizer_syscall_pre___timer_gettime50(timerid, value) \ __sanitizer_syscall_pre_impl___timer_gettime50((long long)(timerid), \ (long long)(value)) #define __sanitizer_syscall_post___timer_gettime50(res, timerid, value) \ __sanitizer_syscall_post_impl___timer_gettime50(res, (long long)(timerid), \ (long long)(value)) #if defined(NTP) || !defined(_KERNEL_OPT) #define __sanitizer_syscall_pre___ntp_gettime50(ntvp) \ __sanitizer_syscall_pre_impl___ntp_gettime50((long long)(ntvp)) #define __sanitizer_syscall_post___ntp_gettime50(res, ntvp) \ __sanitizer_syscall_post_impl___ntp_gettime50(res, (long long)(ntvp)) #else /* syscall 448 has been skipped */ #endif #define __sanitizer_syscall_pre___wait450(pid, status, options, rusage) \ __sanitizer_syscall_pre_impl___wait450( \ (long long)(pid), (long long)(status), (long long)(options), \ (long long)(rusage)) #define __sanitizer_syscall_post___wait450(res, pid, status, options, rusage) \ __sanitizer_syscall_post_impl___wait450( \ res, (long long)(pid), (long long)(status), (long long)(options), \ (long long)(rusage)) #define __sanitizer_syscall_pre___mknod50(path, mode, dev) \ __sanitizer_syscall_pre_impl___mknod50((long long)(path), (long long)(mode), \ (long long)(dev)) #define __sanitizer_syscall_post___mknod50(res, path, mode, dev) \ __sanitizer_syscall_post_impl___mknod50(res, (long long)(path), \ (long long)(mode), (long long)(dev)) #define __sanitizer_syscall_pre___fhstat50(fhp, fh_size, sb) \ __sanitizer_syscall_pre_impl___fhstat50( \ (long long)(fhp), (long long)(fh_size), (long long)(sb)) #define __sanitizer_syscall_post___fhstat50(res, fhp, fh_size, sb) \ __sanitizer_syscall_post_impl___fhstat50( \ res, (long long)(fhp), (long long)(fh_size), (long long)(sb)) /* syscall 452 has been skipped */ #define __sanitizer_syscall_pre_pipe2(fildes, flags) \ __sanitizer_syscall_pre_impl_pipe2((long long)(fildes), (long long)(flags)) #define __sanitizer_syscall_post_pipe2(res, fildes, flags) \ __sanitizer_syscall_post_impl_pipe2(res, (long long)(fildes), \ (long long)(flags)) #define __sanitizer_syscall_pre_dup3(from, to, flags) \ __sanitizer_syscall_pre_impl_dup3((long long)(from), (long long)(to), \ (long long)(flags)) #define __sanitizer_syscall_post_dup3(res, from, to, flags) \ __sanitizer_syscall_post_impl_dup3(res, (long long)(from), (long long)(to), \ (long long)(flags)) #define __sanitizer_syscall_pre_kqueue1(flags) \ __sanitizer_syscall_pre_impl_kqueue1((long long)(flags)) #define __sanitizer_syscall_post_kqueue1(res, flags) \ __sanitizer_syscall_post_impl_kqueue1(res, (long long)(flags)) #define __sanitizer_syscall_pre_paccept(s, name, anamelen, mask, flags) \ __sanitizer_syscall_pre_impl_paccept((long long)(s), (long long)(name), \ (long long)(anamelen), \ (long long)(mask), (long long)(flags)) #define __sanitizer_syscall_post_paccept(res, s, name, anamelen, mask, flags) \ __sanitizer_syscall_post_impl_paccept( \ res, (long long)(s), (long long)(name), (long long)(anamelen), \ (long long)(mask), (long long)(flags)) #define __sanitizer_syscall_pre_linkat(fd1, name1, fd2, name2, flags) \ __sanitizer_syscall_pre_impl_linkat((long long)(fd1), (long long)(name1), \ (long long)(fd2), (long long)(name2), \ (long long)(flags)) #define __sanitizer_syscall_post_linkat(res, fd1, name1, fd2, name2, flags) \ __sanitizer_syscall_post_impl_linkat(res, (long long)(fd1), \ (long long)(name1), (long long)(fd2), \ (long long)(name2), (long long)(flags)) #define __sanitizer_syscall_pre_renameat(fromfd, from, tofd, to) \ __sanitizer_syscall_pre_impl_renameat((long long)(fromfd), \ (long long)(from), (long long)(tofd), \ (long long)(to)) #define __sanitizer_syscall_post_renameat(res, fromfd, from, tofd, to) \ __sanitizer_syscall_post_impl_renameat(res, (long long)(fromfd), \ (long long)(from), (long long)(tofd), \ (long long)(to)) #define __sanitizer_syscall_pre_mkfifoat(fd, path, mode) \ __sanitizer_syscall_pre_impl_mkfifoat((long long)(fd), (long long)(path), \ (long long)(mode)) #define __sanitizer_syscall_post_mkfifoat(res, fd, path, mode) \ __sanitizer_syscall_post_impl_mkfifoat(res, (long long)(fd), \ (long long)(path), (long long)(mode)) #define __sanitizer_syscall_pre_mknodat(fd, path, mode, PAD, dev) \ __sanitizer_syscall_pre_impl_mknodat((long long)(fd), (long long)(path), \ (long long)(mode), (long long)(PAD), \ (long long)(dev)) #define __sanitizer_syscall_post_mknodat(res, fd, path, mode, PAD, dev) \ __sanitizer_syscall_post_impl_mknodat(res, (long long)(fd), \ (long long)(path), (long long)(mode), \ (long long)(PAD), (long long)(dev)) #define __sanitizer_syscall_pre_mkdirat(fd, path, mode) \ __sanitizer_syscall_pre_impl_mkdirat((long long)(fd), (long long)(path), \ (long long)(mode)) #define __sanitizer_syscall_post_mkdirat(res, fd, path, mode) \ __sanitizer_syscall_post_impl_mkdirat(res, (long long)(fd), \ (long long)(path), (long long)(mode)) #define __sanitizer_syscall_pre_faccessat(fd, path, amode, flag) \ __sanitizer_syscall_pre_impl_faccessat((long long)(fd), (long long)(path), \ (long long)(amode), \ (long long)(flag)) #define __sanitizer_syscall_post_faccessat(res, fd, path, amode, flag) \ __sanitizer_syscall_post_impl_faccessat( \ res, (long long)(fd), (long long)(path), (long long)(amode), \ (long long)(flag)) #define __sanitizer_syscall_pre_fchmodat(fd, path, mode, flag) \ __sanitizer_syscall_pre_impl_fchmodat((long long)(fd), (long long)(path), \ (long long)(mode), (long long)(flag)) #define __sanitizer_syscall_post_fchmodat(res, fd, path, mode, flag) \ __sanitizer_syscall_post_impl_fchmodat(res, (long long)(fd), \ (long long)(path), (long long)(mode), \ (long long)(flag)) #define __sanitizer_syscall_pre_fchownat(fd, path, owner, group, flag) \ __sanitizer_syscall_pre_impl_fchownat((long long)(fd), (long long)(path), \ (long long)(owner), \ (long long)(group), (long long)(flag)) #define __sanitizer_syscall_post_fchownat(res, fd, path, owner, group, flag) \ __sanitizer_syscall_post_impl_fchownat( \ res, (long long)(fd), (long long)(path), (long long)(owner), \ (long long)(group), (long long)(flag)) #define __sanitizer_syscall_pre_fexecve(fd, argp, envp) \ __sanitizer_syscall_pre_impl_fexecve((long long)(fd), (long long)(argp), \ (long long)(envp)) #define __sanitizer_syscall_post_fexecve(res, fd, argp, envp) \ __sanitizer_syscall_post_impl_fexecve(res, (long long)(fd), \ (long long)(argp), (long long)(envp)) #define __sanitizer_syscall_pre_fstatat(fd, path, buf, flag) \ __sanitizer_syscall_pre_impl_fstatat((long long)(fd), (long long)(path), \ (long long)(buf), (long long)(flag)) #define __sanitizer_syscall_post_fstatat(res, fd, path, buf, flag) \ __sanitizer_syscall_post_impl_fstatat(res, (long long)(fd), \ (long long)(path), (long long)(buf), \ (long long)(flag)) #define __sanitizer_syscall_pre_utimensat(fd, path, tptr, flag) \ __sanitizer_syscall_pre_impl_utimensat((long long)(fd), (long long)(path), \ (long long)(tptr), (long long)(flag)) #define __sanitizer_syscall_post_utimensat(res, fd, path, tptr, flag) \ __sanitizer_syscall_post_impl_utimensat( \ res, (long long)(fd), (long long)(path), (long long)(tptr), \ (long long)(flag)) #define __sanitizer_syscall_pre_openat(fd, path, oflags, mode) \ __sanitizer_syscall_pre_impl_openat((long long)(fd), (long long)(path), \ (long long)(oflags), (long long)(mode)) #define __sanitizer_syscall_post_openat(res, fd, path, oflags, mode) \ __sanitizer_syscall_post_impl_openat(res, (long long)(fd), \ (long long)(path), (long long)(oflags), \ (long long)(mode)) #define __sanitizer_syscall_pre_readlinkat(fd, path, buf, bufsize) \ __sanitizer_syscall_pre_impl_readlinkat((long long)(fd), (long long)(path), \ (long long)(buf), \ (long long)(bufsize)) #define __sanitizer_syscall_post_readlinkat(res, fd, path, buf, bufsize) \ __sanitizer_syscall_post_impl_readlinkat( \ res, (long long)(fd), (long long)(path), (long long)(buf), \ (long long)(bufsize)) #define __sanitizer_syscall_pre_symlinkat(path1, fd, path2) \ __sanitizer_syscall_pre_impl_symlinkat((long long)(path1), (long long)(fd), \ (long long)(path2)) #define __sanitizer_syscall_post_symlinkat(res, path1, fd, path2) \ __sanitizer_syscall_post_impl_symlinkat(res, (long long)(path1), \ (long long)(fd), (long long)(path2)) #define __sanitizer_syscall_pre_unlinkat(fd, path, flag) \ __sanitizer_syscall_pre_impl_unlinkat((long long)(fd), (long long)(path), \ (long long)(flag)) #define __sanitizer_syscall_post_unlinkat(res, fd, path, flag) \ __sanitizer_syscall_post_impl_unlinkat(res, (long long)(fd), \ (long long)(path), (long long)(flag)) #define __sanitizer_syscall_pre_futimens(fd, tptr) \ __sanitizer_syscall_pre_impl_futimens((long long)(fd), (long long)(tptr)) #define __sanitizer_syscall_post_futimens(res, fd, tptr) \ __sanitizer_syscall_post_impl_futimens(res, (long long)(fd), \ (long long)(tptr)) #define __sanitizer_syscall_pre___quotactl(path, args) \ __sanitizer_syscall_pre_impl___quotactl((long long)(path), (long long)(args)) #define __sanitizer_syscall_post___quotactl(res, path, args) \ __sanitizer_syscall_post_impl___quotactl(res, (long long)(path), \ (long long)(args)) #define __sanitizer_syscall_pre_posix_spawn(pid, path, file_actions, attrp, \ argv, envp) \ __sanitizer_syscall_pre_impl_posix_spawn( \ (long long)(pid), (long long)(path), (long long)(file_actions), \ (long long)(attrp), (long long)(argv), (long long)(envp)) #define __sanitizer_syscall_post_posix_spawn(res, pid, path, file_actions, \ attrp, argv, envp) \ __sanitizer_syscall_post_impl_posix_spawn( \ res, (long long)(pid), (long long)(path), (long long)(file_actions), \ (long long)(attrp), (long long)(argv), (long long)(envp)) #define __sanitizer_syscall_pre_recvmmsg(s, mmsg, vlen, flags, timeout) \ __sanitizer_syscall_pre_impl_recvmmsg((long long)(s), (long long)(mmsg), \ (long long)(vlen), (long long)(flags), \ (long long)(timeout)) #define __sanitizer_syscall_post_recvmmsg(res, s, mmsg, vlen, flags, timeout) \ __sanitizer_syscall_post_impl_recvmmsg( \ res, (long long)(s), (long long)(mmsg), (long long)(vlen), \ (long long)(flags), (long long)(timeout)) #define __sanitizer_syscall_pre_sendmmsg(s, mmsg, vlen, flags) \ __sanitizer_syscall_pre_impl_sendmmsg((long long)(s), (long long)(mmsg), \ (long long)(vlen), (long long)(flags)) #define __sanitizer_syscall_post_sendmmsg(res, s, mmsg, vlen, flags) \ __sanitizer_syscall_post_impl_sendmmsg(res, (long long)(s), \ (long long)(mmsg), (long long)(vlen), \ (long long)(flags)) #define __sanitizer_syscall_pre_clock_nanosleep(clock_id, flags, rqtp, rmtp) \ __sanitizer_syscall_pre_impl_clock_nanosleep( \ (long long)(clock_id), (long long)(flags), (long long)(rqtp), \ (long long)(rmtp)) #define __sanitizer_syscall_post_clock_nanosleep(res, clock_id, flags, rqtp, \ rmtp) \ __sanitizer_syscall_post_impl_clock_nanosleep( \ res, (long long)(clock_id), (long long)(flags), (long long)(rqtp), \ (long long)(rmtp)) #define __sanitizer_syscall_pre____lwp_park60(clock_id, flags, ts, unpark, \ hint, unparkhint) \ __sanitizer_syscall_pre_impl____lwp_park60( \ (long long)(clock_id), (long long)(flags), (long long)(ts), \ (long long)(unpark), (long long)(hint), (long long)(unparkhint)) #define __sanitizer_syscall_post____lwp_park60(res, clock_id, flags, ts, \ unpark, hint, unparkhint) \ __sanitizer_syscall_post_impl____lwp_park60( \ res, (long long)(clock_id), (long long)(flags), (long long)(ts), \ (long long)(unpark), (long long)(hint), (long long)(unparkhint)) #define __sanitizer_syscall_pre_posix_fallocate(fd, PAD, pos, len) \ __sanitizer_syscall_pre_impl_posix_fallocate( \ (long long)(fd), (long long)(PAD), (long long)(pos), (long long)(len)) #define __sanitizer_syscall_post_posix_fallocate(res, fd, PAD, pos, len) \ __sanitizer_syscall_post_impl_posix_fallocate( \ res, (long long)(fd), (long long)(PAD), (long long)(pos), \ (long long)(len)) #define __sanitizer_syscall_pre_fdiscard(fd, PAD, pos, len) \ __sanitizer_syscall_pre_impl_fdiscard((long long)(fd), (long long)(PAD), \ (long long)(pos), (long long)(len)) #define __sanitizer_syscall_post_fdiscard(res, fd, PAD, pos, len) \ __sanitizer_syscall_post_impl_fdiscard(res, (long long)(fd), \ (long long)(PAD), (long long)(pos), \ (long long)(len)) #define __sanitizer_syscall_pre_wait6(idtype, id, status, options, wru, info) \ __sanitizer_syscall_pre_impl_wait6( \ (long long)(idtype), (long long)(id), (long long)(status), \ (long long)(options), (long long)(wru), (long long)(info)) #define __sanitizer_syscall_post_wait6(res, idtype, id, status, options, wru, \ info) \ __sanitizer_syscall_post_impl_wait6( \ res, (long long)(idtype), (long long)(id), (long long)(status), \ (long long)(options), (long long)(wru), (long long)(info)) #define __sanitizer_syscall_pre_clock_getcpuclockid2(idtype, id, clock_id) \ __sanitizer_syscall_pre_impl_clock_getcpuclockid2( \ (long long)(idtype), (long long)(id), (long long)(clock_id)) #define __sanitizer_syscall_post_clock_getcpuclockid2(res, idtype, id, \ clock_id) \ __sanitizer_syscall_post_impl_clock_getcpuclockid2( \ res, (long long)(idtype), (long long)(id), (long long)(clock_id)) #define __sanitizer_syscall_pre___getvfsstat90(buf, bufsize, flags) \ __sanitizer_syscall_pre_impl___getvfsstat90( \ (long long)(buf), (long long)(bufsize), (long long)(flags)) #define __sanitizer_syscall_post___getvfsstat90(res, buf, bufsize, flags) \ __sanitizer_syscall_post_impl___getvfsstat90( \ res, (long long)(buf), (long long)(bufsize), (long long)(flags)) #define __sanitizer_syscall_pre___statvfs190(path, buf, flags) \ __sanitizer_syscall_pre_impl___statvfs190( \ (long long)(path), (long long)(buf), (long long)(flags)) #define __sanitizer_syscall_post___statvfs190(res, path, buf, flags) \ __sanitizer_syscall_post_impl___statvfs190( \ res, (long long)(path), (long long)(buf), (long long)(flags)) #define __sanitizer_syscall_pre___fstatvfs190(fd, buf, flags) \ __sanitizer_syscall_pre_impl___fstatvfs190( \ (long long)(fd), (long long)(buf), (long long)(flags)) #define __sanitizer_syscall_post___fstatvfs190(res, fd, buf, flags) \ __sanitizer_syscall_post_impl___fstatvfs190( \ res, (long long)(fd), (long long)(buf), (long long)(flags)) #define __sanitizer_syscall_pre___fhstatvfs190(fhp, fh_size, buf, flags) \ __sanitizer_syscall_pre_impl___fhstatvfs190( \ (long long)(fhp), (long long)(fh_size), (long long)(buf), \ (long long)(flags)) #define __sanitizer_syscall_post___fhstatvfs190(res, fhp, fh_size, buf, flags) \ __sanitizer_syscall_post_impl___fhstatvfs190( \ res, (long long)(fhp), (long long)(fh_size), (long long)(buf), \ (long long)(flags)) #define __sanitizer_syscall_pre___acl_get_link(path, type, aclp) \ __sanitizer_syscall_pre_impl___acl_get_link( \ (long long)(path), (long long)(type), (long long)(aclp)) #define __sanitizer_syscall_post___acl_get_link(res, path, type, aclp) \ __sanitizer_syscall_post_impl___acl_get_link( \ res, (long long)(path), (long long)(type), (long long)(aclp)) #define __sanitizer_syscall_pre___acl_set_link(path, type, aclp) \ __sanitizer_syscall_pre_impl___acl_set_link( \ (long long)(path), (long long)(type), (long long)(aclp)) #define __sanitizer_syscall_post___acl_set_link(res, path, type, aclp) \ __sanitizer_syscall_post_impl___acl_set_link( \ res, (long long)(path), (long long)(type), (long long)(aclp)) #define __sanitizer_syscall_pre___acl_delete_link(path, type) \ __sanitizer_syscall_pre_impl___acl_delete_link((long long)(path), \ (long long)(type)) #define __sanitizer_syscall_post___acl_delete_link(res, path, type) \ __sanitizer_syscall_post_impl___acl_delete_link(res, (long long)(path), \ (long long)(type)) #define __sanitizer_syscall_pre___acl_aclcheck_link(path, type, aclp) \ __sanitizer_syscall_pre_impl___acl_aclcheck_link( \ (long long)(path), (long long)(type), (long long)(aclp)) #define __sanitizer_syscall_post___acl_aclcheck_link(res, path, type, aclp) \ __sanitizer_syscall_post_impl___acl_aclcheck_link( \ res, (long long)(path), (long long)(type), (long long)(aclp)) #define __sanitizer_syscall_pre___acl_get_file(path, type, aclp) \ __sanitizer_syscall_pre_impl___acl_get_file( \ (long long)(path), (long long)(type), (long long)(aclp)) #define __sanitizer_syscall_post___acl_get_file(res, path, type, aclp) \ __sanitizer_syscall_post_impl___acl_get_file( \ res, (long long)(path), (long long)(type), (long long)(aclp)) #define __sanitizer_syscall_pre___acl_set_file(path, type, aclp) \ __sanitizer_syscall_pre_impl___acl_set_file( \ (long long)(path), (long long)(type), (long long)(aclp)) #define __sanitizer_syscall_post___acl_set_file(res, path, type, aclp) \ __sanitizer_syscall_post_impl___acl_set_file( \ res, (long long)(path), (long long)(type), (long long)(aclp)) #define __sanitizer_syscall_pre___acl_get_fd(filedes, type, aclp) \ __sanitizer_syscall_pre_impl___acl_get_fd( \ (long long)(filedes), (long long)(type), (long long)(aclp)) #define __sanitizer_syscall_post___acl_get_fd(res, filedes, type, aclp) \ __sanitizer_syscall_post_impl___acl_get_fd( \ res, (long long)(filedes), (long long)(type), (long long)(aclp)) #define __sanitizer_syscall_pre___acl_set_fd(filedes, type, aclp) \ __sanitizer_syscall_pre_impl___acl_set_fd( \ (long long)(filedes), (long long)(type), (long long)(aclp)) #define __sanitizer_syscall_post___acl_set_fd(res, filedes, type, aclp) \ __sanitizer_syscall_post_impl___acl_set_fd( \ res, (long long)(filedes), (long long)(type), (long long)(aclp)) #define __sanitizer_syscall_pre___acl_delete_file(path, type) \ __sanitizer_syscall_pre_impl___acl_delete_file((long long)(path), \ (long long)(type)) #define __sanitizer_syscall_post___acl_delete_file(res, path, type) \ __sanitizer_syscall_post_impl___acl_delete_file(res, (long long)(path), \ (long long)(type)) #define __sanitizer_syscall_pre___acl_delete_fd(filedes, type) \ __sanitizer_syscall_pre_impl___acl_delete_fd((long long)(filedes), \ (long long)(type)) #define __sanitizer_syscall_post___acl_delete_fd(res, filedes, type) \ __sanitizer_syscall_post_impl___acl_delete_fd(res, (long long)(filedes), \ (long long)(type)) #define __sanitizer_syscall_pre___acl_aclcheck_file(path, type, aclp) \ __sanitizer_syscall_pre_impl___acl_aclcheck_file( \ (long long)(path), (long long)(type), (long long)(aclp)) #define __sanitizer_syscall_post___acl_aclcheck_file(res, path, type, aclp) \ __sanitizer_syscall_post_impl___acl_aclcheck_file( \ res, (long long)(path), (long long)(type), (long long)(aclp)) #define __sanitizer_syscall_pre___acl_aclcheck_fd(filedes, type, aclp) \ __sanitizer_syscall_pre_impl___acl_aclcheck_fd( \ (long long)(filedes), (long long)(type), (long long)(aclp)) #define __sanitizer_syscall_post___acl_aclcheck_fd(res, filedes, type, aclp) \ __sanitizer_syscall_post_impl___acl_aclcheck_fd( \ res, (long long)(filedes), (long long)(type), (long long)(aclp)) #define __sanitizer_syscall_pre_lpathconf(path, name) \ __sanitizer_syscall_pre_impl_lpathconf((long long)(path), (long long)(name)) #define __sanitizer_syscall_post_lpathconf(res, path, name) \ __sanitizer_syscall_post_impl_lpathconf(res, (long long)(path), \ (long long)(name)) /* Compat with older releases */ #define __sanitizer_syscall_pre_getvfsstat \ __sanitizer_syscall_pre_compat_90_getvfsstat #define __sanitizer_syscall_post_getvfsstat \ __sanitizer_syscall_post_compat_90_getvfsstat #define __sanitizer_syscall_pre_statvfs1 \ __sanitizer_syscall_pre_compat_90_statvfs1 #define __sanitizer_syscall_post_statvfs1 \ __sanitizer_syscall_post_compat_90_statvfs1 #define __sanitizer_syscall_pre_fstatvfs1 \ __sanitizer_syscall_pre_compat_90_fstatvfs1 #define __sanitizer_syscall_post_fstatvfs1 \ __sanitizer_syscall_post_compat_90_fstatvfs1 #define __sanitizer_syscall_pre___fhstatvfs140 \ __sanitizer_syscall_pre_compat_90_fhstatvfs1 #define __sanitizer_syscall_post___fhstatvfs140 \ __sanitizer_syscall_post_compat_90_fhstatvfs1 #ifdef __cplusplus extern "C" { #endif // Private declarations. Do not call directly from user code. Use macros above. // DO NOT EDIT! THIS FILE HAS BEEN GENERATED! void __sanitizer_syscall_pre_impl_syscall(long long code, long long arg0, long long arg1, long long arg2, long long arg3, long long arg4, long long arg5, long long arg6, long long arg7); void __sanitizer_syscall_post_impl_syscall(long long res, long long code, long long arg0, long long arg1, long long arg2, long long arg3, long long arg4, long long arg5, long long arg6, long long arg7); void __sanitizer_syscall_pre_impl_exit(long long rval); void __sanitizer_syscall_post_impl_exit(long long res, long long rval); void __sanitizer_syscall_pre_impl_fork(void); void __sanitizer_syscall_post_impl_fork(long long res); void __sanitizer_syscall_pre_impl_read(long long fd, long long buf, long long nbyte); void __sanitizer_syscall_post_impl_read(long long res, long long fd, long long buf, long long nbyte); void __sanitizer_syscall_pre_impl_write(long long fd, long long buf, long long nbyte); void __sanitizer_syscall_post_impl_write(long long res, long long fd, long long buf, long long nbyte); void __sanitizer_syscall_pre_impl_open(long long path, long long flags, long long mode); void __sanitizer_syscall_post_impl_open(long long res, long long path, long long flags, long long mode); void __sanitizer_syscall_pre_impl_close(long long fd); void __sanitizer_syscall_post_impl_close(long long res, long long fd); void __sanitizer_syscall_pre_impl_compat_50_wait4(long long pid, long long status, long long options, long long rusage); void __sanitizer_syscall_post_impl_compat_50_wait4(long long res, long long pid, long long status, long long options, long long rusage); void __sanitizer_syscall_pre_impl_compat_43_ocreat(long long path, long long mode); void __sanitizer_syscall_post_impl_compat_43_ocreat(long long res, long long path, long long mode); void __sanitizer_syscall_pre_impl_link(long long path, long long link); void __sanitizer_syscall_post_impl_link(long long res, long long path, long long link); void __sanitizer_syscall_pre_impl_unlink(long long path); void __sanitizer_syscall_post_impl_unlink(long long res, long long path); /* syscall 11 has been skipped */ void __sanitizer_syscall_pre_impl_chdir(long long path); void __sanitizer_syscall_post_impl_chdir(long long res, long long path); void __sanitizer_syscall_pre_impl_fchdir(long long fd); void __sanitizer_syscall_post_impl_fchdir(long long res, long long fd); void __sanitizer_syscall_pre_impl_compat_50_mknod(long long path, long long mode, long long dev); void __sanitizer_syscall_post_impl_compat_50_mknod(long long res, long long path, long long mode, long long dev); void __sanitizer_syscall_pre_impl_chmod(long long path, long long mode); void __sanitizer_syscall_post_impl_chmod(long long res, long long path, long long mode); void __sanitizer_syscall_pre_impl_chown(long long path, long long uid, long long gid); void __sanitizer_syscall_post_impl_chown(long long res, long long path, long long uid, long long gid); void __sanitizer_syscall_pre_impl_break(long long nsize); void __sanitizer_syscall_post_impl_break(long long res, long long nsize); void __sanitizer_syscall_pre_impl_compat_20_getfsstat(long long buf, long long bufsize, long long flags); void __sanitizer_syscall_post_impl_compat_20_getfsstat(long long res, long long buf, long long bufsize, long long flags); void __sanitizer_syscall_pre_impl_compat_43_olseek(long long fd, long long offset, long long whence); void __sanitizer_syscall_post_impl_compat_43_olseek(long long res, long long fd, long long offset, long long whence); void __sanitizer_syscall_pre_impl_getpid(void); void __sanitizer_syscall_post_impl_getpid(long long res); void __sanitizer_syscall_pre_impl_compat_40_mount(long long type, long long path, long long flags, long long data); void __sanitizer_syscall_post_impl_compat_40_mount(long long res, long long type, long long path, long long flags, long long data); void __sanitizer_syscall_pre_impl_unmount(long long path, long long flags); void __sanitizer_syscall_post_impl_unmount(long long res, long long path, long long flags); void __sanitizer_syscall_pre_impl_setuid(long long uid); void __sanitizer_syscall_post_impl_setuid(long long res, long long uid); void __sanitizer_syscall_pre_impl_getuid(void); void __sanitizer_syscall_post_impl_getuid(long long res); void __sanitizer_syscall_pre_impl_geteuid(void); void __sanitizer_syscall_post_impl_geteuid(long long res); void __sanitizer_syscall_pre_impl_ptrace(long long req, long long pid, long long addr, long long data); void __sanitizer_syscall_post_impl_ptrace(long long res, long long req, long long pid, long long addr, long long data); void __sanitizer_syscall_pre_impl_recvmsg(long long s, long long msg, long long flags); void __sanitizer_syscall_post_impl_recvmsg(long long res, long long s, long long msg, long long flags); void __sanitizer_syscall_pre_impl_sendmsg(long long s, long long msg, long long flags); void __sanitizer_syscall_post_impl_sendmsg(long long res, long long s, long long msg, long long flags); void __sanitizer_syscall_pre_impl_recvfrom(long long s, long long buf, long long len, long long flags, long long from, long long fromlenaddr); void __sanitizer_syscall_post_impl_recvfrom(long long res, long long s, long long buf, long long len, long long flags, long long from, long long fromlenaddr); void __sanitizer_syscall_pre_impl_accept(long long s, long long name, long long anamelen); void __sanitizer_syscall_post_impl_accept(long long res, long long s, long long name, long long anamelen); void __sanitizer_syscall_pre_impl_getpeername(long long fdes, long long asa, long long alen); void __sanitizer_syscall_post_impl_getpeername(long long res, long long fdes, long long asa, long long alen); void __sanitizer_syscall_pre_impl_getsockname(long long fdes, long long asa, long long alen); void __sanitizer_syscall_post_impl_getsockname(long long res, long long fdes, long long asa, long long alen); void __sanitizer_syscall_pre_impl_access(long long path, long long flags); void __sanitizer_syscall_post_impl_access(long long res, long long path, long long flags); void __sanitizer_syscall_pre_impl_chflags(long long path, long long flags); void __sanitizer_syscall_post_impl_chflags(long long res, long long path, long long flags); void __sanitizer_syscall_pre_impl_fchflags(long long fd, long long flags); void __sanitizer_syscall_post_impl_fchflags(long long res, long long fd, long long flags); void __sanitizer_syscall_pre_impl_sync(void); void __sanitizer_syscall_post_impl_sync(long long res); void __sanitizer_syscall_pre_impl_kill(long long pid, long long signum); void __sanitizer_syscall_post_impl_kill(long long res, long long pid, long long signum); void __sanitizer_syscall_pre_impl_compat_43_stat43(long long path, long long ub); void __sanitizer_syscall_post_impl_compat_43_stat43(long long res, long long path, long long ub); void __sanitizer_syscall_pre_impl_getppid(void); void __sanitizer_syscall_post_impl_getppid(long long res); void __sanitizer_syscall_pre_impl_compat_43_lstat43(long long path, long long ub); void __sanitizer_syscall_post_impl_compat_43_lstat43(long long res, long long path, long long ub); void __sanitizer_syscall_pre_impl_dup(long long fd); void __sanitizer_syscall_post_impl_dup(long long res, long long fd); void __sanitizer_syscall_pre_impl_pipe(void); void __sanitizer_syscall_post_impl_pipe(long long res); void __sanitizer_syscall_pre_impl_getegid(void); void __sanitizer_syscall_post_impl_getegid(long long res); void __sanitizer_syscall_pre_impl_profil(long long samples, long long size, long long offset, long long scale); void __sanitizer_syscall_post_impl_profil(long long res, long long samples, long long size, long long offset, long long scale); void __sanitizer_syscall_pre_impl_ktrace(long long fname, long long ops, long long facs, long long pid); void __sanitizer_syscall_post_impl_ktrace(long long res, long long fname, long long ops, long long facs, long long pid); void __sanitizer_syscall_pre_impl_compat_13_sigaction13(long long signum, long long nsa, long long osa); void __sanitizer_syscall_post_impl_compat_13_sigaction13(long long res, long long signum, long long nsa, long long osa); void __sanitizer_syscall_pre_impl_getgid(void); void __sanitizer_syscall_post_impl_getgid(long long res); void __sanitizer_syscall_pre_impl_compat_13_sigprocmask13(long long how, long long mask); void __sanitizer_syscall_post_impl_compat_13_sigprocmask13(long long res, long long how, long long mask); void __sanitizer_syscall_pre_impl___getlogin(long long namebuf, long long namelen); void __sanitizer_syscall_post_impl___getlogin(long long res, long long namebuf, long long namelen); void __sanitizer_syscall_pre_impl___setlogin(long long namebuf); void __sanitizer_syscall_post_impl___setlogin(long long res, long long namebuf); void __sanitizer_syscall_pre_impl_acct(long long path); void __sanitizer_syscall_post_impl_acct(long long res, long long path); void __sanitizer_syscall_pre_impl_compat_13_sigpending13(void); void __sanitizer_syscall_post_impl_compat_13_sigpending13(long long res); void __sanitizer_syscall_pre_impl_compat_13_sigaltstack13(long long nss, long long oss); void __sanitizer_syscall_post_impl_compat_13_sigaltstack13(long long res, long long nss, long long oss); void __sanitizer_syscall_pre_impl_ioctl(long long fd, long long com, long long data); void __sanitizer_syscall_post_impl_ioctl(long long res, long long fd, long long com, long long data); void __sanitizer_syscall_pre_impl_compat_12_oreboot(long long opt); void __sanitizer_syscall_post_impl_compat_12_oreboot(long long res, long long opt); void __sanitizer_syscall_pre_impl_revoke(long long path); void __sanitizer_syscall_post_impl_revoke(long long res, long long path); void __sanitizer_syscall_pre_impl_symlink(long long path, long long link); void __sanitizer_syscall_post_impl_symlink(long long res, long long path, long long link); void __sanitizer_syscall_pre_impl_readlink(long long path, long long buf, long long count); void __sanitizer_syscall_post_impl_readlink(long long res, long long path, long long buf, long long count); void __sanitizer_syscall_pre_impl_execve(long long path, long long argp, long long envp); void __sanitizer_syscall_post_impl_execve(long long res, long long path, long long argp, long long envp); void __sanitizer_syscall_pre_impl_umask(long long newmask); void __sanitizer_syscall_post_impl_umask(long long res, long long newmask); void __sanitizer_syscall_pre_impl_chroot(long long path); void __sanitizer_syscall_post_impl_chroot(long long res, long long path); void __sanitizer_syscall_pre_impl_compat_43_fstat43(long long fd, long long sb); void __sanitizer_syscall_post_impl_compat_43_fstat43(long long res, long long fd, long long sb); void __sanitizer_syscall_pre_impl_compat_43_ogetkerninfo(long long op, long long where, long long size, long long arg); void __sanitizer_syscall_post_impl_compat_43_ogetkerninfo(long long res, long long op, long long where, long long size, long long arg); void __sanitizer_syscall_pre_impl_compat_43_ogetpagesize(void); void __sanitizer_syscall_post_impl_compat_43_ogetpagesize(long long res); void __sanitizer_syscall_pre_impl_compat_12_msync(long long addr, long long len); void __sanitizer_syscall_post_impl_compat_12_msync(long long res, long long addr, long long len); void __sanitizer_syscall_pre_impl_vfork(void); void __sanitizer_syscall_post_impl_vfork(long long res); /* syscall 67 has been skipped */ /* syscall 68 has been skipped */ /* syscall 69 has been skipped */ /* syscall 70 has been skipped */ void __sanitizer_syscall_pre_impl_compat_43_ommap(long long addr, long long len, long long prot, long long flags, long long fd, long long pos); void __sanitizer_syscall_post_impl_compat_43_ommap( long long res, long long addr, long long len, long long prot, long long flags, long long fd, long long pos); void __sanitizer_syscall_pre_impl_vadvise(long long anom); void __sanitizer_syscall_post_impl_vadvise(long long res, long long anom); void __sanitizer_syscall_pre_impl_munmap(long long addr, long long len); void __sanitizer_syscall_post_impl_munmap(long long res, long long addr, long long len); void __sanitizer_syscall_pre_impl_mprotect(long long addr, long long len, long long prot); void __sanitizer_syscall_post_impl_mprotect(long long res, long long addr, long long len, long long prot); void __sanitizer_syscall_pre_impl_madvise(long long addr, long long len, long long behav); void __sanitizer_syscall_post_impl_madvise(long long res, long long addr, long long len, long long behav); /* syscall 76 has been skipped */ /* syscall 77 has been skipped */ void __sanitizer_syscall_pre_impl_mincore(long long addr, long long len, long long vec); void __sanitizer_syscall_post_impl_mincore(long long res, long long addr, long long len, long long vec); void __sanitizer_syscall_pre_impl_getgroups(long long gidsetsize, long long gidset); void __sanitizer_syscall_post_impl_getgroups(long long res, long long gidsetsize, long long gidset); void __sanitizer_syscall_pre_impl_setgroups(long long gidsetsize, long long gidset); void __sanitizer_syscall_post_impl_setgroups(long long res, long long gidsetsize, long long gidset); void __sanitizer_syscall_pre_impl_getpgrp(void); void __sanitizer_syscall_post_impl_getpgrp(long long res); void __sanitizer_syscall_pre_impl_setpgid(long long pid, long long pgid); void __sanitizer_syscall_post_impl_setpgid(long long res, long long pid, long long pgid); void __sanitizer_syscall_pre_impl_compat_50_setitimer(long long which, long long itv, long long oitv); void __sanitizer_syscall_post_impl_compat_50_setitimer(long long res, long long which, long long itv, long long oitv); void __sanitizer_syscall_pre_impl_compat_43_owait(void); void __sanitizer_syscall_post_impl_compat_43_owait(long long res); void __sanitizer_syscall_pre_impl_compat_12_oswapon(long long name); void __sanitizer_syscall_post_impl_compat_12_oswapon(long long res, long long name); void __sanitizer_syscall_pre_impl_compat_50_getitimer(long long which, long long itv); void __sanitizer_syscall_post_impl_compat_50_getitimer(long long res, long long which, long long itv); void __sanitizer_syscall_pre_impl_compat_43_ogethostname(long long hostname, long long len); void __sanitizer_syscall_post_impl_compat_43_ogethostname(long long res, long long hostname, long long len); void __sanitizer_syscall_pre_impl_compat_43_osethostname(long long hostname, long long len); void __sanitizer_syscall_post_impl_compat_43_osethostname(long long res, long long hostname, long long len); void __sanitizer_syscall_pre_impl_compat_43_ogetdtablesize(void); void __sanitizer_syscall_post_impl_compat_43_ogetdtablesize(long long res); void __sanitizer_syscall_pre_impl_dup2(long long from, long long to); void __sanitizer_syscall_post_impl_dup2(long long res, long long from, long long to); void __sanitizer_syscall_pre_impl_getrandom(long long buf, long long buflen, long long flags); void __sanitizer_syscall_post_impl_getrandom(long long res, long long buf, long long buflen, long long flags); void __sanitizer_syscall_pre_impl_fcntl(long long fd, long long cmd, long long arg); void __sanitizer_syscall_post_impl_fcntl(long long res, long long fd, long long cmd, long long arg); void __sanitizer_syscall_pre_impl_compat_50_select(long long nd, long long in, long long ou, long long ex, long long tv); void __sanitizer_syscall_post_impl_compat_50_select(long long res, long long nd, long long in, long long ou, long long ex, long long tv); /* syscall 94 has been skipped */ void __sanitizer_syscall_pre_impl_fsync(long long fd); void __sanitizer_syscall_post_impl_fsync(long long res, long long fd); void __sanitizer_syscall_pre_impl_setpriority(long long which, long long who, long long prio); void __sanitizer_syscall_post_impl_setpriority(long long res, long long which, long long who, long long prio); void __sanitizer_syscall_pre_impl_compat_30_socket(long long domain, long long type, long long protocol); void __sanitizer_syscall_post_impl_compat_30_socket(long long res, long long domain, long long type, long long protocol); void __sanitizer_syscall_pre_impl_connect(long long s, long long name, long long namelen); void __sanitizer_syscall_post_impl_connect(long long res, long long s, long long name, long long namelen); void __sanitizer_syscall_pre_impl_compat_43_oaccept(long long s, long long name, long long anamelen); void __sanitizer_syscall_post_impl_compat_43_oaccept(long long res, long long s, long long name, long long anamelen); void __sanitizer_syscall_pre_impl_getpriority(long long which, long long who); void __sanitizer_syscall_post_impl_getpriority(long long res, long long which, long long who); void __sanitizer_syscall_pre_impl_compat_43_osend(long long s, long long buf, long long len, long long flags); void __sanitizer_syscall_post_impl_compat_43_osend(long long res, long long s, long long buf, long long len, long long flags); void __sanitizer_syscall_pre_impl_compat_43_orecv(long long s, long long buf, long long len, long long flags); void __sanitizer_syscall_post_impl_compat_43_orecv(long long res, long long s, long long buf, long long len, long long flags); void __sanitizer_syscall_pre_impl_compat_13_sigreturn13(long long sigcntxp); void __sanitizer_syscall_post_impl_compat_13_sigreturn13(long long res, long long sigcntxp); void __sanitizer_syscall_pre_impl_bind(long long s, long long name, long long namelen); void __sanitizer_syscall_post_impl_bind(long long res, long long s, long long name, long long namelen); void __sanitizer_syscall_pre_impl_setsockopt(long long s, long long level, long long name, long long val, long long valsize); void __sanitizer_syscall_post_impl_setsockopt(long long res, long long s, long long level, long long name, long long val, long long valsize); void __sanitizer_syscall_pre_impl_listen(long long s, long long backlog); void __sanitizer_syscall_post_impl_listen(long long res, long long s, long long backlog); /* syscall 107 has been skipped */ void __sanitizer_syscall_pre_impl_compat_43_osigvec(long long signum, long long nsv, long long osv); void __sanitizer_syscall_post_impl_compat_43_osigvec(long long res, long long signum, long long nsv, long long osv); void __sanitizer_syscall_pre_impl_compat_43_osigblock(long long mask); void __sanitizer_syscall_post_impl_compat_43_osigblock(long long res, long long mask); void __sanitizer_syscall_pre_impl_compat_43_osigsetmask(long long mask); void __sanitizer_syscall_post_impl_compat_43_osigsetmask(long long res, long long mask); void __sanitizer_syscall_pre_impl_compat_13_sigsuspend13(long long mask); void __sanitizer_syscall_post_impl_compat_13_sigsuspend13(long long res, long long mask); void __sanitizer_syscall_pre_impl_compat_43_osigstack(long long nss, long long oss); void __sanitizer_syscall_post_impl_compat_43_osigstack(long long res, long long nss, long long oss); void __sanitizer_syscall_pre_impl_compat_43_orecvmsg(long long s, long long msg, long long flags); void __sanitizer_syscall_post_impl_compat_43_orecvmsg(long long res, long long s, long long msg, long long flags); void __sanitizer_syscall_pre_impl_compat_43_osendmsg(long long s, long long msg, long long flags); void __sanitizer_syscall_post_impl_compat_43_osendmsg(long long res, long long s, long long msg, long long flags); /* syscall 115 has been skipped */ void __sanitizer_syscall_pre_impl_compat_50_gettimeofday(long long tp, long long tzp); void __sanitizer_syscall_post_impl_compat_50_gettimeofday(long long res, long long tp, long long tzp); void __sanitizer_syscall_pre_impl_compat_50_getrusage(long long who, long long rusage); void __sanitizer_syscall_post_impl_compat_50_getrusage(long long res, long long who, long long rusage); void __sanitizer_syscall_pre_impl_getsockopt(long long s, long long level, long long name, long long val, long long avalsize); void __sanitizer_syscall_post_impl_getsockopt(long long res, long long s, long long level, long long name, long long val, long long avalsize); /* syscall 119 has been skipped */ void __sanitizer_syscall_pre_impl_readv(long long fd, long long iovp, long long iovcnt); void __sanitizer_syscall_post_impl_readv(long long res, long long fd, long long iovp, long long iovcnt); void __sanitizer_syscall_pre_impl_writev(long long fd, long long iovp, long long iovcnt); void __sanitizer_syscall_post_impl_writev(long long res, long long fd, long long iovp, long long iovcnt); void __sanitizer_syscall_pre_impl_compat_50_settimeofday(long long tv, long long tzp); void __sanitizer_syscall_post_impl_compat_50_settimeofday(long long res, long long tv, long long tzp); void __sanitizer_syscall_pre_impl_fchown(long long fd, long long uid, long long gid); void __sanitizer_syscall_post_impl_fchown(long long res, long long fd, long long uid, long long gid); void __sanitizer_syscall_pre_impl_fchmod(long long fd, long long mode); void __sanitizer_syscall_post_impl_fchmod(long long res, long long fd, long long mode); void __sanitizer_syscall_pre_impl_compat_43_orecvfrom( long long s, long long buf, long long len, long long flags, long long from, long long fromlenaddr); void __sanitizer_syscall_post_impl_compat_43_orecvfrom( long long res, long long s, long long buf, long long len, long long flags, long long from, long long fromlenaddr); void __sanitizer_syscall_pre_impl_setreuid(long long ruid, long long euid); void __sanitizer_syscall_post_impl_setreuid(long long res, long long ruid, long long euid); void __sanitizer_syscall_pre_impl_setregid(long long rgid, long long egid); void __sanitizer_syscall_post_impl_setregid(long long res, long long rgid, long long egid); void __sanitizer_syscall_pre_impl_rename(long long from, long long to); void __sanitizer_syscall_post_impl_rename(long long res, long long from, long long to); void __sanitizer_syscall_pre_impl_compat_43_otruncate(long long path, long long length); void __sanitizer_syscall_post_impl_compat_43_otruncate(long long res, long long path, long long length); void __sanitizer_syscall_pre_impl_compat_43_oftruncate(long long fd, long long length); void __sanitizer_syscall_post_impl_compat_43_oftruncate(long long res, long long fd, long long length); void __sanitizer_syscall_pre_impl_flock(long long fd, long long how); void __sanitizer_syscall_post_impl_flock(long long res, long long fd, long long how); void __sanitizer_syscall_pre_impl_mkfifo(long long path, long long mode); void __sanitizer_syscall_post_impl_mkfifo(long long res, long long path, long long mode); void __sanitizer_syscall_pre_impl_sendto(long long s, long long buf, long long len, long long flags, long long to, long long tolen); void __sanitizer_syscall_post_impl_sendto(long long res, long long s, long long buf, long long len, long long flags, long long to, long long tolen); void __sanitizer_syscall_pre_impl_shutdown(long long s, long long how); void __sanitizer_syscall_post_impl_shutdown(long long res, long long s, long long how); void __sanitizer_syscall_pre_impl_socketpair(long long domain, long long type, long long protocol, long long rsv); void __sanitizer_syscall_post_impl_socketpair(long long res, long long domain, long long type, long long protocol, long long rsv); void __sanitizer_syscall_pre_impl_mkdir(long long path, long long mode); void __sanitizer_syscall_post_impl_mkdir(long long res, long long path, long long mode); void __sanitizer_syscall_pre_impl_rmdir(long long path); void __sanitizer_syscall_post_impl_rmdir(long long res, long long path); void __sanitizer_syscall_pre_impl_compat_50_utimes(long long path, long long tptr); void __sanitizer_syscall_post_impl_compat_50_utimes(long long res, long long path, long long tptr); /* syscall 139 has been skipped */ void __sanitizer_syscall_pre_impl_compat_50_adjtime(long long delta, long long olddelta); void __sanitizer_syscall_post_impl_compat_50_adjtime(long long res, long long delta, long long olddelta); void __sanitizer_syscall_pre_impl_compat_43_ogetpeername(long long fdes, long long asa, long long alen); void __sanitizer_syscall_post_impl_compat_43_ogetpeername(long long res, long long fdes, long long asa, long long alen); void __sanitizer_syscall_pre_impl_compat_43_ogethostid(void); void __sanitizer_syscall_post_impl_compat_43_ogethostid(long long res); void __sanitizer_syscall_pre_impl_compat_43_osethostid(long long hostid); void __sanitizer_syscall_post_impl_compat_43_osethostid(long long res, long long hostid); void __sanitizer_syscall_pre_impl_compat_43_ogetrlimit(long long which, long long rlp); void __sanitizer_syscall_post_impl_compat_43_ogetrlimit(long long res, long long which, long long rlp); void __sanitizer_syscall_pre_impl_compat_43_osetrlimit(long long which, long long rlp); void __sanitizer_syscall_post_impl_compat_43_osetrlimit(long long res, long long which, long long rlp); void __sanitizer_syscall_pre_impl_compat_43_okillpg(long long pgid, long long signum); void __sanitizer_syscall_post_impl_compat_43_okillpg(long long res, long long pgid, long long signum); void __sanitizer_syscall_pre_impl_setsid(void); void __sanitizer_syscall_post_impl_setsid(long long res); void __sanitizer_syscall_pre_impl_compat_50_quotactl(long long path, long long cmd, long long uid, long long arg); void __sanitizer_syscall_post_impl_compat_50_quotactl( long long res, long long path, long long cmd, long long uid, long long arg); void __sanitizer_syscall_pre_impl_compat_43_oquota(void); void __sanitizer_syscall_post_impl_compat_43_oquota(long long res); void __sanitizer_syscall_pre_impl_compat_43_ogetsockname(long long fdec, long long asa, long long alen); void __sanitizer_syscall_post_impl_compat_43_ogetsockname(long long res, long long fdec, long long asa, long long alen); /* syscall 151 has been skipped */ /* syscall 152 has been skipped */ /* syscall 153 has been skipped */ /* syscall 154 has been skipped */ void __sanitizer_syscall_pre_impl_nfssvc(long long flag, long long argp); void __sanitizer_syscall_post_impl_nfssvc(long long res, long long flag, long long argp); void __sanitizer_syscall_pre_impl_compat_43_ogetdirentries(long long fd, long long buf, long long count, long long basep); void __sanitizer_syscall_post_impl_compat_43_ogetdirentries(long long res, long long fd, long long buf, long long count, long long basep); void __sanitizer_syscall_pre_impl_compat_20_statfs(long long path, long long buf); void __sanitizer_syscall_post_impl_compat_20_statfs(long long res, long long path, long long buf); void __sanitizer_syscall_pre_impl_compat_20_fstatfs(long long fd, long long buf); void __sanitizer_syscall_post_impl_compat_20_fstatfs(long long res, long long fd, long long buf); /* syscall 159 has been skipped */ /* syscall 160 has been skipped */ void __sanitizer_syscall_pre_impl_compat_30_getfh(long long fname, long long fhp); void __sanitizer_syscall_post_impl_compat_30_getfh(long long res, long long fname, long long fhp); void __sanitizer_syscall_pre_impl_compat_09_ogetdomainname(long long domainname, long long len); void __sanitizer_syscall_post_impl_compat_09_ogetdomainname( long long res, long long domainname, long long len); void __sanitizer_syscall_pre_impl_compat_09_osetdomainname(long long domainname, long long len); void __sanitizer_syscall_post_impl_compat_09_osetdomainname( long long res, long long domainname, long long len); void __sanitizer_syscall_pre_impl_compat_09_ouname(long long name); void __sanitizer_syscall_post_impl_compat_09_ouname(long long res, long long name); void __sanitizer_syscall_pre_impl_sysarch(long long op, long long parms); void __sanitizer_syscall_post_impl_sysarch(long long res, long long op, long long parms); void __sanitizer_syscall_pre_impl___futex(long long uaddr, long long op, long long val, long long timeout, long long uaddr2, long long val2, long long val3); void __sanitizer_syscall_post_impl___futex(long long res, long long uaddr, long long op, long long val, long long timeout, long long uaddr2, long long val2, long long val3); void __sanitizer_syscall_pre_impl___futex_set_robust_list(long long head, long long len); void __sanitizer_syscall_post_impl___futex_set_robust_list(long long res, long long head, long long len); void __sanitizer_syscall_pre_impl___futex_get_robust_list(long long lwpid, long long headp, long long lenp); void __sanitizer_syscall_post_impl___futex_get_robust_list(long long res, long long lwpid, long long headp, long long lenp); #if !defined(_LP64) void __sanitizer_syscall_pre_impl_compat_10_osemsys(long long which, long long a2, long long a3, long long a4, long long a5); void __sanitizer_syscall_post_impl_compat_10_osemsys(long long res, long long which, long long a2, long long a3, long long a4, long long a5); #else /* syscall 169 has been skipped */ #endif #if !defined(_LP64) void __sanitizer_syscall_pre_impl_compat_10_omsgsys(long long which, long long a2, long long a3, long long a4, long long a5, long long a6); void __sanitizer_syscall_post_impl_compat_10_omsgsys(long long res, long long which, long long a2, long long a3, long long a4, long long a5, long long a6); #else /* syscall 170 has been skipped */ #endif #if !defined(_LP64) void __sanitizer_syscall_pre_impl_compat_10_oshmsys(long long which, long long a2, long long a3, long long a4); void __sanitizer_syscall_post_impl_compat_10_oshmsys(long long res, long long which, long long a2, long long a3, long long a4); #else /* syscall 171 has been skipped */ #endif /* syscall 172 has been skipped */ void __sanitizer_syscall_pre_impl_pread(long long fd, long long buf, long long nbyte, long long PAD, long long offset); void __sanitizer_syscall_post_impl_pread(long long res, long long fd, long long buf, long long nbyte, long long PAD, long long offset); void __sanitizer_syscall_pre_impl_pwrite(long long fd, long long buf, long long nbyte, long long PAD, long long offset); void __sanitizer_syscall_post_impl_pwrite(long long res, long long fd, long long buf, long long nbyte, long long PAD, long long offset); void __sanitizer_syscall_pre_impl_compat_30_ntp_gettime(long long ntvp); void __sanitizer_syscall_post_impl_compat_30_ntp_gettime(long long res, long long ntvp); #if defined(NTP) || !defined(_KERNEL_OPT) void __sanitizer_syscall_pre_impl_ntp_adjtime(long long tp); void __sanitizer_syscall_post_impl_ntp_adjtime(long long res, long long tp); #else /* syscall 176 has been skipped */ #endif /* syscall 177 has been skipped */ /* syscall 178 has been skipped */ /* syscall 179 has been skipped */ /* syscall 180 has been skipped */ void __sanitizer_syscall_pre_impl_setgid(long long gid); void __sanitizer_syscall_post_impl_setgid(long long res, long long gid); void __sanitizer_syscall_pre_impl_setegid(long long egid); void __sanitizer_syscall_post_impl_setegid(long long res, long long egid); void __sanitizer_syscall_pre_impl_seteuid(long long euid); void __sanitizer_syscall_post_impl_seteuid(long long res, long long euid); void __sanitizer_syscall_pre_impl_lfs_bmapv(long long fsidp, long long blkiov, long long blkcnt); void __sanitizer_syscall_post_impl_lfs_bmapv(long long res, long long fsidp, long long blkiov, long long blkcnt); void __sanitizer_syscall_pre_impl_lfs_markv(long long fsidp, long long blkiov, long long blkcnt); void __sanitizer_syscall_post_impl_lfs_markv(long long res, long long fsidp, long long blkiov, long long blkcnt); void __sanitizer_syscall_pre_impl_lfs_segclean(long long fsidp, long long segment); void __sanitizer_syscall_post_impl_lfs_segclean(long long res, long long fsidp, long long segment); void __sanitizer_syscall_pre_impl_compat_50_lfs_segwait(long long fsidp, long long tv); void __sanitizer_syscall_post_impl_compat_50_lfs_segwait(long long res, long long fsidp, long long tv); void __sanitizer_syscall_pre_impl_compat_12_stat12(long long path, long long ub); void __sanitizer_syscall_post_impl_compat_12_stat12(long long res, long long path, long long ub); void __sanitizer_syscall_pre_impl_compat_12_fstat12(long long fd, long long sb); void __sanitizer_syscall_post_impl_compat_12_fstat12(long long res, long long fd, long long sb); void __sanitizer_syscall_pre_impl_compat_12_lstat12(long long path, long long ub); void __sanitizer_syscall_post_impl_compat_12_lstat12(long long res, long long path, long long ub); void __sanitizer_syscall_pre_impl_pathconf(long long path, long long name); void __sanitizer_syscall_post_impl_pathconf(long long res, long long path, long long name); void __sanitizer_syscall_pre_impl_fpathconf(long long fd, long long name); void __sanitizer_syscall_post_impl_fpathconf(long long res, long long fd, long long name); void __sanitizer_syscall_pre_impl_getsockopt2(long long s, long long level, long long name, long long val, long long avalsize); void __sanitizer_syscall_post_impl_getsockopt2(long long res, long long s, long long level, long long name, long long val, long long avalsize); void __sanitizer_syscall_pre_impl_getrlimit(long long which, long long rlp); void __sanitizer_syscall_post_impl_getrlimit(long long res, long long which, long long rlp); void __sanitizer_syscall_pre_impl_setrlimit(long long which, long long rlp); void __sanitizer_syscall_post_impl_setrlimit(long long res, long long which, long long rlp); void __sanitizer_syscall_pre_impl_compat_12_getdirentries(long long fd, long long buf, long long count, long long basep); void __sanitizer_syscall_post_impl_compat_12_getdirentries(long long res, long long fd, long long buf, long long count, long long basep); void __sanitizer_syscall_pre_impl_mmap(long long addr, long long len, long long prot, long long flags, long long fd, long long PAD, long long pos); void __sanitizer_syscall_post_impl_mmap(long long res, long long addr, long long len, long long prot, long long flags, long long fd, long long PAD, long long pos); void __sanitizer_syscall_pre_impl___syscall(long long code, long long arg0, long long arg1, long long arg2, long long arg3, long long arg4, long long arg5, long long arg6, long long arg7); void __sanitizer_syscall_post_impl___syscall(long long res, long long code, long long arg0, long long arg1, long long arg2, long long arg3, long long arg4, long long arg5, long long arg6, long long arg7); void __sanitizer_syscall_pre_impl_lseek(long long fd, long long PAD, long long offset, long long whence); void __sanitizer_syscall_post_impl_lseek(long long res, long long fd, long long PAD, long long offset, long long whence); void __sanitizer_syscall_pre_impl_truncate(long long path, long long PAD, long long length); void __sanitizer_syscall_post_impl_truncate(long long res, long long path, long long PAD, long long length); void __sanitizer_syscall_pre_impl_ftruncate(long long fd, long long PAD, long long length); void __sanitizer_syscall_post_impl_ftruncate(long long res, long long fd, long long PAD, long long length); void __sanitizer_syscall_pre_impl___sysctl(long long name, long long namelen, long long oldv, long long oldlenp, long long newv, long long newlen); void __sanitizer_syscall_post_impl___sysctl(long long res, long long name, long long namelen, long long oldv, long long oldlenp, long long newv, long long newlen); void __sanitizer_syscall_pre_impl_mlock(long long addr, long long len); void __sanitizer_syscall_post_impl_mlock(long long res, long long addr, long long len); void __sanitizer_syscall_pre_impl_munlock(long long addr, long long len); void __sanitizer_syscall_post_impl_munlock(long long res, long long addr, long long len); void __sanitizer_syscall_pre_impl_undelete(long long path); void __sanitizer_syscall_post_impl_undelete(long long res, long long path); void __sanitizer_syscall_pre_impl_compat_50_futimes(long long fd, long long tptr); void __sanitizer_syscall_post_impl_compat_50_futimes(long long res, long long fd, long long tptr); void __sanitizer_syscall_pre_impl_getpgid(long long pid); void __sanitizer_syscall_post_impl_getpgid(long long res, long long pid); void __sanitizer_syscall_pre_impl_reboot(long long opt, long long bootstr); void __sanitizer_syscall_post_impl_reboot(long long res, long long opt, long long bootstr); void __sanitizer_syscall_pre_impl_poll(long long fds, long long nfds, long long timeout); void __sanitizer_syscall_post_impl_poll(long long res, long long fds, long long nfds, long long timeout); void __sanitizer_syscall_pre_impl_afssys(long long id, long long a1, long long a2, long long a3, long long a4, long long a5, long long a6); void __sanitizer_syscall_post_impl_afssys(long long res, long long id, long long a1, long long a2, long long a3, long long a4, long long a5, long long a6); /* syscall 211 has been skipped */ /* syscall 212 has been skipped */ /* syscall 213 has been skipped */ /* syscall 214 has been skipped */ /* syscall 215 has been skipped */ /* syscall 216 has been skipped */ /* syscall 217 has been skipped */ /* syscall 218 has been skipped */ /* syscall 219 has been skipped */ void __sanitizer_syscall_pre_impl_compat_14___semctl(long long semid, long long semnum, long long cmd, long long arg); void __sanitizer_syscall_post_impl_compat_14___semctl(long long res, long long semid, long long semnum, long long cmd, long long arg); void __sanitizer_syscall_pre_impl_semget(long long key, long long nsems, long long semflg); void __sanitizer_syscall_post_impl_semget(long long res, long long key, long long nsems, long long semflg); void __sanitizer_syscall_pre_impl_semop(long long semid, long long sops, long long nsops); void __sanitizer_syscall_post_impl_semop(long long res, long long semid, long long sops, long long nsops); void __sanitizer_syscall_pre_impl_semconfig(long long flag); void __sanitizer_syscall_post_impl_semconfig(long long res, long long flag); void __sanitizer_syscall_pre_impl_compat_14_msgctl(long long msqid, long long cmd, long long buf); void __sanitizer_syscall_post_impl_compat_14_msgctl(long long res, long long msqid, long long cmd, long long buf); void __sanitizer_syscall_pre_impl_msgget(long long key, long long msgflg); void __sanitizer_syscall_post_impl_msgget(long long res, long long key, long long msgflg); void __sanitizer_syscall_pre_impl_msgsnd(long long msqid, long long msgp, long long msgsz, long long msgflg); void __sanitizer_syscall_post_impl_msgsnd(long long res, long long msqid, long long msgp, long long msgsz, long long msgflg); void __sanitizer_syscall_pre_impl_msgrcv(long long msqid, long long msgp, long long msgsz, long long msgtyp, long long msgflg); void __sanitizer_syscall_post_impl_msgrcv(long long res, long long msqid, long long msgp, long long msgsz, long long msgtyp, long long msgflg); void __sanitizer_syscall_pre_impl_shmat(long long shmid, long long shmaddr, long long shmflg); void __sanitizer_syscall_post_impl_shmat(long long res, long long shmid, long long shmaddr, long long shmflg); void __sanitizer_syscall_pre_impl_compat_14_shmctl(long long shmid, long long cmd, long long buf); void __sanitizer_syscall_post_impl_compat_14_shmctl(long long res, long long shmid, long long cmd, long long buf); void __sanitizer_syscall_pre_impl_shmdt(long long shmaddr); void __sanitizer_syscall_post_impl_shmdt(long long res, long long shmaddr); void __sanitizer_syscall_pre_impl_shmget(long long key, long long size, long long shmflg); void __sanitizer_syscall_post_impl_shmget(long long res, long long key, long long size, long long shmflg); void __sanitizer_syscall_pre_impl_compat_50_clock_gettime(long long clock_id, long long tp); void __sanitizer_syscall_post_impl_compat_50_clock_gettime(long long res, long long clock_id, long long tp); void __sanitizer_syscall_pre_impl_compat_50_clock_settime(long long clock_id, long long tp); void __sanitizer_syscall_post_impl_compat_50_clock_settime(long long res, long long clock_id, long long tp); void __sanitizer_syscall_pre_impl_compat_50_clock_getres(long long clock_id, long long tp); void __sanitizer_syscall_post_impl_compat_50_clock_getres(long long res, long long clock_id, long long tp); void __sanitizer_syscall_pre_impl_timer_create(long long clock_id, long long evp, long long timerid); void __sanitizer_syscall_post_impl_timer_create(long long res, long long clock_id, long long evp, long long timerid); void __sanitizer_syscall_pre_impl_timer_delete(long long timerid); void __sanitizer_syscall_post_impl_timer_delete(long long res, long long timerid); void __sanitizer_syscall_pre_impl_compat_50_timer_settime(long long timerid, long long flags, long long value, long long ovalue); void __sanitizer_syscall_post_impl_compat_50_timer_settime(long long res, long long timerid, long long flags, long long value, long long ovalue); void __sanitizer_syscall_pre_impl_compat_50_timer_gettime(long long timerid, long long value); void __sanitizer_syscall_post_impl_compat_50_timer_gettime(long long res, long long timerid, long long value); void __sanitizer_syscall_pre_impl_timer_getoverrun(long long timerid); void __sanitizer_syscall_post_impl_timer_getoverrun(long long res, long long timerid); void __sanitizer_syscall_pre_impl_compat_50_nanosleep(long long rqtp, long long rmtp); void __sanitizer_syscall_post_impl_compat_50_nanosleep(long long res, long long rqtp, long long rmtp); void __sanitizer_syscall_pre_impl_fdatasync(long long fd); void __sanitizer_syscall_post_impl_fdatasync(long long res, long long fd); void __sanitizer_syscall_pre_impl_mlockall(long long flags); void __sanitizer_syscall_post_impl_mlockall(long long res, long long flags); void __sanitizer_syscall_pre_impl_munlockall(void); void __sanitizer_syscall_post_impl_munlockall(long long res); void __sanitizer_syscall_pre_impl_compat_50___sigtimedwait(long long set, long long info, long long timeout); void __sanitizer_syscall_post_impl_compat_50___sigtimedwait(long long res, long long set, long long info, long long timeout); void __sanitizer_syscall_pre_impl_sigqueueinfo(long long pid, long long info); void __sanitizer_syscall_post_impl_sigqueueinfo(long long res, long long pid, long long info); void __sanitizer_syscall_pre_impl_modctl(long long cmd, long long arg); void __sanitizer_syscall_post_impl_modctl(long long res, long long cmd, long long arg); void __sanitizer_syscall_pre_impl__ksem_init(long long value, long long idp); void __sanitizer_syscall_post_impl__ksem_init(long long res, long long value, long long idp); void __sanitizer_syscall_pre_impl__ksem_open(long long name, long long oflag, long long mode, long long value, long long idp); void __sanitizer_syscall_post_impl__ksem_open(long long res, long long name, long long oflag, long long mode, long long value, long long idp); void __sanitizer_syscall_pre_impl__ksem_unlink(long long name); void __sanitizer_syscall_post_impl__ksem_unlink(long long res, long long name); void __sanitizer_syscall_pre_impl__ksem_close(long long id); void __sanitizer_syscall_post_impl__ksem_close(long long res, long long id); void __sanitizer_syscall_pre_impl__ksem_post(long long id); void __sanitizer_syscall_post_impl__ksem_post(long long res, long long id); void __sanitizer_syscall_pre_impl__ksem_wait(long long id); void __sanitizer_syscall_post_impl__ksem_wait(long long res, long long id); void __sanitizer_syscall_pre_impl__ksem_trywait(long long id); void __sanitizer_syscall_post_impl__ksem_trywait(long long res, long long id); void __sanitizer_syscall_pre_impl__ksem_getvalue(long long id, long long value); void __sanitizer_syscall_post_impl__ksem_getvalue(long long res, long long id, long long value); void __sanitizer_syscall_pre_impl__ksem_destroy(long long id); void __sanitizer_syscall_post_impl__ksem_destroy(long long res, long long id); void __sanitizer_syscall_pre_impl__ksem_timedwait(long long id, long long abstime); void __sanitizer_syscall_post_impl__ksem_timedwait(long long res, long long id, long long abstime); void __sanitizer_syscall_pre_impl_mq_open(long long name, long long oflag, long long mode, long long attr); void __sanitizer_syscall_post_impl_mq_open(long long res, long long name, long long oflag, long long mode, long long attr); void __sanitizer_syscall_pre_impl_mq_close(long long mqdes); void __sanitizer_syscall_post_impl_mq_close(long long res, long long mqdes); void __sanitizer_syscall_pre_impl_mq_unlink(long long name); void __sanitizer_syscall_post_impl_mq_unlink(long long res, long long name); void __sanitizer_syscall_pre_impl_mq_getattr(long long mqdes, long long mqstat); void __sanitizer_syscall_post_impl_mq_getattr(long long res, long long mqdes, long long mqstat); void __sanitizer_syscall_pre_impl_mq_setattr(long long mqdes, long long mqstat, long long omqstat); void __sanitizer_syscall_post_impl_mq_setattr(long long res, long long mqdes, long long mqstat, long long omqstat); void __sanitizer_syscall_pre_impl_mq_notify(long long mqdes, long long notification); void __sanitizer_syscall_post_impl_mq_notify(long long res, long long mqdes, long long notification); void __sanitizer_syscall_pre_impl_mq_send(long long mqdes, long long msg_ptr, long long msg_len, long long msg_prio); void __sanitizer_syscall_post_impl_mq_send(long long res, long long mqdes, long long msg_ptr, long long msg_len, long long msg_prio); void __sanitizer_syscall_pre_impl_mq_receive(long long mqdes, long long msg_ptr, long long msg_len, long long msg_prio); void __sanitizer_syscall_post_impl_mq_receive(long long res, long long mqdes, long long msg_ptr, long long msg_len, long long msg_prio); void __sanitizer_syscall_pre_impl_compat_50_mq_timedsend(long long mqdes, long long msg_ptr, long long msg_len, long long msg_prio, long long abs_timeout); void __sanitizer_syscall_post_impl_compat_50_mq_timedsend( long long res, long long mqdes, long long msg_ptr, long long msg_len, long long msg_prio, long long abs_timeout); void __sanitizer_syscall_pre_impl_compat_50_mq_timedreceive( long long mqdes, long long msg_ptr, long long msg_len, long long msg_prio, long long abs_timeout); void __sanitizer_syscall_post_impl_compat_50_mq_timedreceive( long long res, long long mqdes, long long msg_ptr, long long msg_len, long long msg_prio, long long abs_timeout); /* syscall 267 has been skipped */ /* syscall 268 has been skipped */ /* syscall 269 has been skipped */ void __sanitizer_syscall_pre_impl___posix_rename(long long from, long long to); void __sanitizer_syscall_post_impl___posix_rename(long long res, long long from, long long to); void __sanitizer_syscall_pre_impl_swapctl(long long cmd, long long arg, long long misc); void __sanitizer_syscall_post_impl_swapctl(long long res, long long cmd, long long arg, long long misc); void __sanitizer_syscall_pre_impl_compat_30_getdents(long long fd, long long buf, long long count); void __sanitizer_syscall_post_impl_compat_30_getdents(long long res, long long fd, long long buf, long long count); void __sanitizer_syscall_pre_impl_minherit(long long addr, long long len, long long inherit); void __sanitizer_syscall_post_impl_minherit(long long res, long long addr, long long len, long long inherit); void __sanitizer_syscall_pre_impl_lchmod(long long path, long long mode); void __sanitizer_syscall_post_impl_lchmod(long long res, long long path, long long mode); void __sanitizer_syscall_pre_impl_lchown(long long path, long long uid, long long gid); void __sanitizer_syscall_post_impl_lchown(long long res, long long path, long long uid, long long gid); void __sanitizer_syscall_pre_impl_compat_50_lutimes(long long path, long long tptr); void __sanitizer_syscall_post_impl_compat_50_lutimes(long long res, long long path, long long tptr); void __sanitizer_syscall_pre_impl___msync13(long long addr, long long len, long long flags); void __sanitizer_syscall_post_impl___msync13(long long res, long long addr, long long len, long long flags); void __sanitizer_syscall_pre_impl_compat_30___stat13(long long path, long long ub); void __sanitizer_syscall_post_impl_compat_30___stat13(long long res, long long path, long long ub); void __sanitizer_syscall_pre_impl_compat_30___fstat13(long long fd, long long sb); void __sanitizer_syscall_post_impl_compat_30___fstat13(long long res, long long fd, long long sb); void __sanitizer_syscall_pre_impl_compat_30___lstat13(long long path, long long ub); void __sanitizer_syscall_post_impl_compat_30___lstat13(long long res, long long path, long long ub); void __sanitizer_syscall_pre_impl___sigaltstack14(long long nss, long long oss); void __sanitizer_syscall_post_impl___sigaltstack14(long long res, long long nss, long long oss); void __sanitizer_syscall_pre_impl___vfork14(void); void __sanitizer_syscall_post_impl___vfork14(long long res); void __sanitizer_syscall_pre_impl___posix_chown(long long path, long long uid, long long gid); void __sanitizer_syscall_post_impl___posix_chown(long long res, long long path, long long uid, long long gid); void __sanitizer_syscall_pre_impl___posix_fchown(long long fd, long long uid, long long gid); void __sanitizer_syscall_post_impl___posix_fchown(long long res, long long fd, long long uid, long long gid); void __sanitizer_syscall_pre_impl___posix_lchown(long long path, long long uid, long long gid); void __sanitizer_syscall_post_impl___posix_lchown(long long res, long long path, long long uid, long long gid); void __sanitizer_syscall_pre_impl_getsid(long long pid); void __sanitizer_syscall_post_impl_getsid(long long res, long long pid); void __sanitizer_syscall_pre_impl___clone(long long flags, long long stack); void __sanitizer_syscall_post_impl___clone(long long res, long long flags, long long stack); void __sanitizer_syscall_pre_impl_fktrace(long long fd, long long ops, long long facs, long long pid); void __sanitizer_syscall_post_impl_fktrace(long long res, long long fd, long long ops, long long facs, long long pid); void __sanitizer_syscall_pre_impl_preadv(long long fd, long long iovp, long long iovcnt, long long PAD, long long offset); void __sanitizer_syscall_post_impl_preadv(long long res, long long fd, long long iovp, long long iovcnt, long long PAD, long long offset); void __sanitizer_syscall_pre_impl_pwritev(long long fd, long long iovp, long long iovcnt, long long PAD, long long offset); void __sanitizer_syscall_post_impl_pwritev(long long res, long long fd, long long iovp, long long iovcnt, long long PAD, long long offset); void __sanitizer_syscall_pre_impl_compat_16___sigaction14(long long signum, long long nsa, long long osa); void __sanitizer_syscall_post_impl_compat_16___sigaction14(long long res, long long signum, long long nsa, long long osa); void __sanitizer_syscall_pre_impl___sigpending14(long long set); void __sanitizer_syscall_post_impl___sigpending14(long long res, long long set); void __sanitizer_syscall_pre_impl___sigprocmask14(long long how, long long set, long long oset); void __sanitizer_syscall_post_impl___sigprocmask14(long long res, long long how, long long set, long long oset); void __sanitizer_syscall_pre_impl___sigsuspend14(long long set); void __sanitizer_syscall_post_impl___sigsuspend14(long long res, long long set); void __sanitizer_syscall_pre_impl_compat_16___sigreturn14(long long sigcntxp); void __sanitizer_syscall_post_impl_compat_16___sigreturn14(long long res, long long sigcntxp); void __sanitizer_syscall_pre_impl___getcwd(long long bufp, long long length); void __sanitizer_syscall_post_impl___getcwd(long long res, long long bufp, long long length); void __sanitizer_syscall_pre_impl_fchroot(long long fd); void __sanitizer_syscall_post_impl_fchroot(long long res, long long fd); void __sanitizer_syscall_pre_impl_compat_30_fhopen(long long fhp, long long flags); void __sanitizer_syscall_post_impl_compat_30_fhopen(long long res, long long fhp, long long flags); void __sanitizer_syscall_pre_impl_compat_30_fhstat(long long fhp, long long sb); void __sanitizer_syscall_post_impl_compat_30_fhstat(long long res, long long fhp, long long sb); void __sanitizer_syscall_pre_impl_compat_20_fhstatfs(long long fhp, long long buf); void __sanitizer_syscall_post_impl_compat_20_fhstatfs(long long res, long long fhp, long long buf); void __sanitizer_syscall_pre_impl_compat_50_____semctl13(long long semid, long long semnum, long long cmd, long long arg); void __sanitizer_syscall_post_impl_compat_50_____semctl13(long long res, long long semid, long long semnum, long long cmd, long long arg); void __sanitizer_syscall_pre_impl_compat_50___msgctl13(long long msqid, long long cmd, long long buf); void __sanitizer_syscall_post_impl_compat_50___msgctl13(long long res, long long msqid, long long cmd, long long buf); void __sanitizer_syscall_pre_impl_compat_50___shmctl13(long long shmid, long long cmd, long long buf); void __sanitizer_syscall_post_impl_compat_50___shmctl13(long long res, long long shmid, long long cmd, long long buf); void __sanitizer_syscall_pre_impl_lchflags(long long path, long long flags); void __sanitizer_syscall_post_impl_lchflags(long long res, long long path, long long flags); void __sanitizer_syscall_pre_impl_issetugid(void); void __sanitizer_syscall_post_impl_issetugid(long long res); void __sanitizer_syscall_pre_impl_utrace(long long label, long long addr, long long len); void __sanitizer_syscall_post_impl_utrace(long long res, long long label, long long addr, long long len); void __sanitizer_syscall_pre_impl_getcontext(long long ucp); void __sanitizer_syscall_post_impl_getcontext(long long res, long long ucp); void __sanitizer_syscall_pre_impl_setcontext(long long ucp); void __sanitizer_syscall_post_impl_setcontext(long long res, long long ucp); void __sanitizer_syscall_pre_impl__lwp_create(long long ucp, long long flags, long long new_lwp); void __sanitizer_syscall_post_impl__lwp_create(long long res, long long ucp, long long flags, long long new_lwp); void __sanitizer_syscall_pre_impl__lwp_exit(void); void __sanitizer_syscall_post_impl__lwp_exit(long long res); void __sanitizer_syscall_pre_impl__lwp_self(void); void __sanitizer_syscall_post_impl__lwp_self(long long res); void __sanitizer_syscall_pre_impl__lwp_wait(long long wait_for, long long departed); void __sanitizer_syscall_post_impl__lwp_wait(long long res, long long wait_for, long long departed); void __sanitizer_syscall_pre_impl__lwp_suspend(long long target); void __sanitizer_syscall_post_impl__lwp_suspend(long long res, long long target); void __sanitizer_syscall_pre_impl__lwp_continue(long long target); void __sanitizer_syscall_post_impl__lwp_continue(long long res, long long target); void __sanitizer_syscall_pre_impl__lwp_wakeup(long long target); void __sanitizer_syscall_post_impl__lwp_wakeup(long long res, long long target); void __sanitizer_syscall_pre_impl__lwp_getprivate(void); void __sanitizer_syscall_post_impl__lwp_getprivate(long long res); void __sanitizer_syscall_pre_impl__lwp_setprivate(long long ptr); void __sanitizer_syscall_post_impl__lwp_setprivate(long long res, long long ptr); void __sanitizer_syscall_pre_impl__lwp_kill(long long target, long long signo); void __sanitizer_syscall_post_impl__lwp_kill(long long res, long long target, long long signo); void __sanitizer_syscall_pre_impl__lwp_detach(long long target); void __sanitizer_syscall_post_impl__lwp_detach(long long res, long long target); void __sanitizer_syscall_pre_impl_compat_50__lwp_park(long long ts, long long unpark, long long hint, long long unparkhint); void __sanitizer_syscall_post_impl_compat_50__lwp_park(long long res, long long ts, long long unpark, long long hint, long long unparkhint); void __sanitizer_syscall_pre_impl__lwp_unpark(long long target, long long hint); void __sanitizer_syscall_post_impl__lwp_unpark(long long res, long long target, long long hint); void __sanitizer_syscall_pre_impl__lwp_unpark_all(long long targets, long long ntargets, long long hint); void __sanitizer_syscall_post_impl__lwp_unpark_all(long long res, long long targets, long long ntargets, long long hint); void __sanitizer_syscall_pre_impl__lwp_setname(long long target, long long name); void __sanitizer_syscall_post_impl__lwp_setname(long long res, long long target, long long name); void __sanitizer_syscall_pre_impl__lwp_getname(long long target, long long name, long long len); void __sanitizer_syscall_post_impl__lwp_getname(long long res, long long target, long long name, long long len); void __sanitizer_syscall_pre_impl__lwp_ctl(long long features, long long address); void __sanitizer_syscall_post_impl__lwp_ctl(long long res, long long features, long long address); /* syscall 326 has been skipped */ /* syscall 327 has been skipped */ /* syscall 328 has been skipped */ /* syscall 329 has been skipped */ void __sanitizer_syscall_pre_impl_compat_60_sa_register( long long newv, long long oldv, long long flags, long long stackinfo_offset); void __sanitizer_syscall_post_impl_compat_60_sa_register( long long res, long long newv, long long oldv, long long flags, long long stackinfo_offset); void __sanitizer_syscall_pre_impl_compat_60_sa_stacks(long long num, long long stacks); void __sanitizer_syscall_post_impl_compat_60_sa_stacks(long long res, long long num, long long stacks); void __sanitizer_syscall_pre_impl_compat_60_sa_enable(void); void __sanitizer_syscall_post_impl_compat_60_sa_enable(long long res); void __sanitizer_syscall_pre_impl_compat_60_sa_setconcurrency( long long concurrency); void __sanitizer_syscall_post_impl_compat_60_sa_setconcurrency( long long res, long long concurrency); void __sanitizer_syscall_pre_impl_compat_60_sa_yield(void); void __sanitizer_syscall_post_impl_compat_60_sa_yield(long long res); void __sanitizer_syscall_pre_impl_compat_60_sa_preempt(long long sa_id); void __sanitizer_syscall_post_impl_compat_60_sa_preempt(long long res, long long sa_id); /* syscall 336 has been skipped */ /* syscall 337 has been skipped */ /* syscall 338 has been skipped */ /* syscall 339 has been skipped */ void __sanitizer_syscall_pre_impl___sigaction_sigtramp(long long signum, long long nsa, long long osa, long long tramp, long long vers); void __sanitizer_syscall_post_impl___sigaction_sigtramp( long long res, long long signum, long long nsa, long long osa, long long tramp, long long vers); /* syscall 341 has been skipped */ /* syscall 342 has been skipped */ void __sanitizer_syscall_pre_impl_rasctl(long long addr, long long len, long long op); void __sanitizer_syscall_post_impl_rasctl(long long res, long long addr, long long len, long long op); void __sanitizer_syscall_pre_impl_kqueue(void); void __sanitizer_syscall_post_impl_kqueue(long long res); void __sanitizer_syscall_pre_impl_compat_50_kevent( long long fd, long long changelist, long long nchanges, long long eventlist, long long nevents, long long timeout); void __sanitizer_syscall_post_impl_compat_50_kevent( long long res, long long fd, long long changelist, long long nchanges, long long eventlist, long long nevents, long long timeout); void __sanitizer_syscall_pre_impl__sched_setparam(long long pid, long long lid, long long policy, long long params); void __sanitizer_syscall_post_impl__sched_setparam(long long res, long long pid, long long lid, long long policy, long long params); void __sanitizer_syscall_pre_impl__sched_getparam(long long pid, long long lid, long long policy, long long params); void __sanitizer_syscall_post_impl__sched_getparam(long long res, long long pid, long long lid, long long policy, long long params); void __sanitizer_syscall_pre_impl__sched_setaffinity(long long pid, long long lid, long long size, long long cpuset); void __sanitizer_syscall_post_impl__sched_setaffinity(long long res, long long pid, long long lid, long long size, long long cpuset); void __sanitizer_syscall_pre_impl__sched_getaffinity(long long pid, long long lid, long long size, long long cpuset); void __sanitizer_syscall_post_impl__sched_getaffinity(long long res, long long pid, long long lid, long long size, long long cpuset); void __sanitizer_syscall_pre_impl_sched_yield(void); void __sanitizer_syscall_post_impl_sched_yield(long long res); void __sanitizer_syscall_pre_impl__sched_protect(long long priority); void __sanitizer_syscall_post_impl__sched_protect(long long res, long long priority); /* syscall 352 has been skipped */ /* syscall 353 has been skipped */ void __sanitizer_syscall_pre_impl_fsync_range(long long fd, long long flags, long long start, long long length); void __sanitizer_syscall_post_impl_fsync_range(long long res, long long fd, long long flags, long long start, long long length); void __sanitizer_syscall_pre_impl_uuidgen(long long store, long long count); void __sanitizer_syscall_post_impl_uuidgen(long long res, long long store, long long count); void __sanitizer_syscall_pre_impl_compat_90_getvfsstat(long long buf, long long bufsize, long long flags); void __sanitizer_syscall_post_impl_compat_90_getvfsstat(long long res, long long buf, long long bufsize, long long flags); void __sanitizer_syscall_pre_impl_compat_90_statvfs1(long long path, long long buf, long long flags); void __sanitizer_syscall_post_impl_compat_90_statvfs1(long long res, long long path, long long buf, long long flags); void __sanitizer_syscall_pre_impl_compat_90_fstatvfs1(long long fd, long long buf, long long flags); void __sanitizer_syscall_post_impl_compat_90_fstatvfs1(long long res, long long fd, long long buf, long long flags); void __sanitizer_syscall_pre_impl_compat_30_fhstatvfs1(long long fhp, long long buf, long long flags); void __sanitizer_syscall_post_impl_compat_30_fhstatvfs1(long long res, long long fhp, long long buf, long long flags); void __sanitizer_syscall_pre_impl_extattrctl(long long path, long long cmd, long long filename, long long attrnamespace, long long attrname); void __sanitizer_syscall_post_impl_extattrctl(long long res, long long path, long long cmd, long long filename, long long attrnamespace, long long attrname); void __sanitizer_syscall_pre_impl_extattr_set_file(long long path, long long attrnamespace, long long attrname, long long data, long long nbytes); void __sanitizer_syscall_post_impl_extattr_set_file( long long res, long long path, long long attrnamespace, long long attrname, long long data, long long nbytes); void __sanitizer_syscall_pre_impl_extattr_get_file(long long path, long long attrnamespace, long long attrname, long long data, long long nbytes); void __sanitizer_syscall_post_impl_extattr_get_file( long long res, long long path, long long attrnamespace, long long attrname, long long data, long long nbytes); void __sanitizer_syscall_pre_impl_extattr_delete_file(long long path, long long attrnamespace, long long attrname); void __sanitizer_syscall_post_impl_extattr_delete_file(long long res, long long path, long long attrnamespace, long long attrname); void __sanitizer_syscall_pre_impl_extattr_set_fd(long long fd, long long attrnamespace, long long attrname, long long data, long long nbytes); void __sanitizer_syscall_post_impl_extattr_set_fd(long long res, long long fd, long long attrnamespace, long long attrname, long long data, long long nbytes); void __sanitizer_syscall_pre_impl_extattr_get_fd(long long fd, long long attrnamespace, long long attrname, long long data, long long nbytes); void __sanitizer_syscall_post_impl_extattr_get_fd(long long res, long long fd, long long attrnamespace, long long attrname, long long data, long long nbytes); void __sanitizer_syscall_pre_impl_extattr_delete_fd(long long fd, long long attrnamespace, long long attrname); void __sanitizer_syscall_post_impl_extattr_delete_fd(long long res, long long fd, long long attrnamespace, long long attrname); void __sanitizer_syscall_pre_impl_extattr_set_link(long long path, long long attrnamespace, long long attrname, long long data, long long nbytes); void __sanitizer_syscall_post_impl_extattr_set_link( long long res, long long path, long long attrnamespace, long long attrname, long long data, long long nbytes); void __sanitizer_syscall_pre_impl_extattr_get_link(long long path, long long attrnamespace, long long attrname, long long data, long long nbytes); void __sanitizer_syscall_post_impl_extattr_get_link( long long res, long long path, long long attrnamespace, long long attrname, long long data, long long nbytes); void __sanitizer_syscall_pre_impl_extattr_delete_link(long long path, long long attrnamespace, long long attrname); void __sanitizer_syscall_post_impl_extattr_delete_link(long long res, long long path, long long attrnamespace, long long attrname); void __sanitizer_syscall_pre_impl_extattr_list_fd(long long fd, long long attrnamespace, long long data, long long nbytes); void __sanitizer_syscall_post_impl_extattr_list_fd(long long res, long long fd, long long attrnamespace, long long data, long long nbytes); void __sanitizer_syscall_pre_impl_extattr_list_file(long long path, long long attrnamespace, long long data, long long nbytes); void __sanitizer_syscall_post_impl_extattr_list_file(long long res, long long path, long long attrnamespace, long long data, long long nbytes); void __sanitizer_syscall_pre_impl_extattr_list_link(long long path, long long attrnamespace, long long data, long long nbytes); void __sanitizer_syscall_post_impl_extattr_list_link(long long res, long long path, long long attrnamespace, long long data, long long nbytes); void __sanitizer_syscall_pre_impl_compat_50_pselect(long long nd, long long in, long long ou, long long ex, long long ts, long long mask); void __sanitizer_syscall_post_impl_compat_50_pselect(long long res, long long nd, long long in, long long ou, long long ex, long long ts, long long mask); void __sanitizer_syscall_pre_impl_compat_50_pollts(long long fds, long long nfds, long long ts, long long mask); void __sanitizer_syscall_post_impl_compat_50_pollts( long long res, long long fds, long long nfds, long long ts, long long mask); void __sanitizer_syscall_pre_impl_setxattr(long long path, long long name, long long value, long long size, long long flags); void __sanitizer_syscall_post_impl_setxattr(long long res, long long path, long long name, long long value, long long size, long long flags); void __sanitizer_syscall_pre_impl_lsetxattr(long long path, long long name, long long value, long long size, long long flags); void __sanitizer_syscall_post_impl_lsetxattr(long long res, long long path, long long name, long long value, long long size, long long flags); void __sanitizer_syscall_pre_impl_fsetxattr(long long fd, long long name, long long value, long long size, long long flags); void __sanitizer_syscall_post_impl_fsetxattr(long long res, long long fd, long long name, long long value, long long size, long long flags); void __sanitizer_syscall_pre_impl_getxattr(long long path, long long name, long long value, long long size); void __sanitizer_syscall_post_impl_getxattr(long long res, long long path, long long name, long long value, long long size); void __sanitizer_syscall_pre_impl_lgetxattr(long long path, long long name, long long value, long long size); void __sanitizer_syscall_post_impl_lgetxattr(long long res, long long path, long long name, long long value, long long size); void __sanitizer_syscall_pre_impl_fgetxattr(long long fd, long long name, long long value, long long size); void __sanitizer_syscall_post_impl_fgetxattr(long long res, long long fd, long long name, long long value, long long size); void __sanitizer_syscall_pre_impl_listxattr(long long path, long long list, long long size); void __sanitizer_syscall_post_impl_listxattr(long long res, long long path, long long list, long long size); void __sanitizer_syscall_pre_impl_llistxattr(long long path, long long list, long long size); void __sanitizer_syscall_post_impl_llistxattr(long long res, long long path, long long list, long long size); void __sanitizer_syscall_pre_impl_flistxattr(long long fd, long long list, long long size); void __sanitizer_syscall_post_impl_flistxattr(long long res, long long fd, long long list, long long size); void __sanitizer_syscall_pre_impl_removexattr(long long path, long long name); void __sanitizer_syscall_post_impl_removexattr(long long res, long long path, long long name); void __sanitizer_syscall_pre_impl_lremovexattr(long long path, long long name); void __sanitizer_syscall_post_impl_lremovexattr(long long res, long long path, long long name); void __sanitizer_syscall_pre_impl_fremovexattr(long long fd, long long name); void __sanitizer_syscall_post_impl_fremovexattr(long long res, long long fd, long long name); void __sanitizer_syscall_pre_impl_compat_50___stat30(long long path, long long ub); void __sanitizer_syscall_post_impl_compat_50___stat30(long long res, long long path, long long ub); void __sanitizer_syscall_pre_impl_compat_50___fstat30(long long fd, long long sb); void __sanitizer_syscall_post_impl_compat_50___fstat30(long long res, long long fd, long long sb); void __sanitizer_syscall_pre_impl_compat_50___lstat30(long long path, long long ub); void __sanitizer_syscall_post_impl_compat_50___lstat30(long long res, long long path, long long ub); void __sanitizer_syscall_pre_impl___getdents30(long long fd, long long buf, long long count); void __sanitizer_syscall_post_impl___getdents30(long long res, long long fd, long long buf, long long count); void __sanitizer_syscall_pre_impl_posix_fadvise(long long); void __sanitizer_syscall_post_impl_posix_fadvise(long long res, long long); void __sanitizer_syscall_pre_impl_compat_30___fhstat30(long long fhp, long long sb); void __sanitizer_syscall_post_impl_compat_30___fhstat30(long long res, long long fhp, long long sb); void __sanitizer_syscall_pre_impl_compat_50___ntp_gettime30(long long ntvp); void __sanitizer_syscall_post_impl_compat_50___ntp_gettime30(long long res, long long ntvp); void __sanitizer_syscall_pre_impl___socket30(long long domain, long long type, long long protocol); void __sanitizer_syscall_post_impl___socket30(long long res, long long domain, long long type, long long protocol); void __sanitizer_syscall_pre_impl___getfh30(long long fname, long long fhp, long long fh_size); void __sanitizer_syscall_post_impl___getfh30(long long res, long long fname, long long fhp, long long fh_size); void __sanitizer_syscall_pre_impl___fhopen40(long long fhp, long long fh_size, long long flags); void __sanitizer_syscall_post_impl___fhopen40(long long res, long long fhp, long long fh_size, long long flags); void __sanitizer_syscall_pre_impl_compat_90_fhstatvfs1(long long fhp, long long fh_size, long long buf, long long flags); void __sanitizer_syscall_post_impl_compat_90_fhstatvfs1(long long res, long long fhp, long long fh_size, long long buf, long long flags); void __sanitizer_syscall_pre_impl_compat_50___fhstat40(long long fhp, long long fh_size, long long sb); void __sanitizer_syscall_post_impl_compat_50___fhstat40(long long res, long long fhp, long long fh_size, long long sb); void __sanitizer_syscall_pre_impl_aio_cancel(long long fildes, long long aiocbp); void __sanitizer_syscall_post_impl_aio_cancel(long long res, long long fildes, long long aiocbp); void __sanitizer_syscall_pre_impl_aio_error(long long aiocbp); void __sanitizer_syscall_post_impl_aio_error(long long res, long long aiocbp); void __sanitizer_syscall_pre_impl_aio_fsync(long long op, long long aiocbp); void __sanitizer_syscall_post_impl_aio_fsync(long long res, long long op, long long aiocbp); void __sanitizer_syscall_pre_impl_aio_read(long long aiocbp); void __sanitizer_syscall_post_impl_aio_read(long long res, long long aiocbp); void __sanitizer_syscall_pre_impl_aio_return(long long aiocbp); void __sanitizer_syscall_post_impl_aio_return(long long res, long long aiocbp); void __sanitizer_syscall_pre_impl_compat_50_aio_suspend(long long list, long long nent, long long timeout); void __sanitizer_syscall_post_impl_compat_50_aio_suspend(long long res, long long list, long long nent, long long timeout); void __sanitizer_syscall_pre_impl_aio_write(long long aiocbp); void __sanitizer_syscall_post_impl_aio_write(long long res, long long aiocbp); void __sanitizer_syscall_pre_impl_lio_listio(long long mode, long long list, long long nent, long long sig); void __sanitizer_syscall_post_impl_lio_listio(long long res, long long mode, long long list, long long nent, long long sig); /* syscall 407 has been skipped */ /* syscall 408 has been skipped */ /* syscall 409 has been skipped */ void __sanitizer_syscall_pre_impl___mount50(long long type, long long path, long long flags, long long data, long long data_len); void __sanitizer_syscall_post_impl___mount50(long long res, long long type, long long path, long long flags, long long data, long long data_len); void __sanitizer_syscall_pre_impl_mremap(long long old_address, long long old_size, long long new_address, long long new_size, long long flags); void __sanitizer_syscall_post_impl_mremap(long long res, long long old_address, long long old_size, long long new_address, long long new_size, long long flags); void __sanitizer_syscall_pre_impl_pset_create(long long psid); void __sanitizer_syscall_post_impl_pset_create(long long res, long long psid); void __sanitizer_syscall_pre_impl_pset_destroy(long long psid); void __sanitizer_syscall_post_impl_pset_destroy(long long res, long long psid); void __sanitizer_syscall_pre_impl_pset_assign(long long psid, long long cpuid, long long opsid); void __sanitizer_syscall_post_impl_pset_assign(long long res, long long psid, long long cpuid, long long opsid); void __sanitizer_syscall_pre_impl__pset_bind(long long idtype, long long first_id, long long second_id, long long psid, long long opsid); void __sanitizer_syscall_post_impl__pset_bind(long long res, long long idtype, long long first_id, long long second_id, long long psid, long long opsid); void __sanitizer_syscall_pre_impl___posix_fadvise50(long long fd, long long PAD, long long offset, long long len, long long advice); void __sanitizer_syscall_post_impl___posix_fadvise50( long long res, long long fd, long long PAD, long long offset, long long len, long long advice); void __sanitizer_syscall_pre_impl___select50(long long nd, long long in, long long ou, long long ex, long long tv); void __sanitizer_syscall_post_impl___select50(long long res, long long nd, long long in, long long ou, long long ex, long long tv); void __sanitizer_syscall_pre_impl___gettimeofday50(long long tp, long long tzp); void __sanitizer_syscall_post_impl___gettimeofday50(long long res, long long tp, long long tzp); void __sanitizer_syscall_pre_impl___settimeofday50(long long tv, long long tzp); void __sanitizer_syscall_post_impl___settimeofday50(long long res, long long tv, long long tzp); void __sanitizer_syscall_pre_impl___utimes50(long long path, long long tptr); void __sanitizer_syscall_post_impl___utimes50(long long res, long long path, long long tptr); void __sanitizer_syscall_pre_impl___adjtime50(long long delta, long long olddelta); void __sanitizer_syscall_post_impl___adjtime50(long long res, long long delta, long long olddelta); void __sanitizer_syscall_pre_impl___lfs_segwait50(long long fsidp, long long tv); void __sanitizer_syscall_post_impl___lfs_segwait50(long long res, long long fsidp, long long tv); void __sanitizer_syscall_pre_impl___futimes50(long long fd, long long tptr); void __sanitizer_syscall_post_impl___futimes50(long long res, long long fd, long long tptr); void __sanitizer_syscall_pre_impl___lutimes50(long long path, long long tptr); void __sanitizer_syscall_post_impl___lutimes50(long long res, long long path, long long tptr); void __sanitizer_syscall_pre_impl___setitimer50(long long which, long long itv, long long oitv); void __sanitizer_syscall_post_impl___setitimer50(long long res, long long which, long long itv, long long oitv); void __sanitizer_syscall_pre_impl___getitimer50(long long which, long long itv); void __sanitizer_syscall_post_impl___getitimer50(long long res, long long which, long long itv); void __sanitizer_syscall_pre_impl___clock_gettime50(long long clock_id, long long tp); void __sanitizer_syscall_post_impl___clock_gettime50(long long res, long long clock_id, long long tp); void __sanitizer_syscall_pre_impl___clock_settime50(long long clock_id, long long tp); void __sanitizer_syscall_post_impl___clock_settime50(long long res, long long clock_id, long long tp); void __sanitizer_syscall_pre_impl___clock_getres50(long long clock_id, long long tp); void __sanitizer_syscall_post_impl___clock_getres50(long long res, long long clock_id, long long tp); void __sanitizer_syscall_pre_impl___nanosleep50(long long rqtp, long long rmtp); void __sanitizer_syscall_post_impl___nanosleep50(long long res, long long rqtp, long long rmtp); void __sanitizer_syscall_pre_impl_____sigtimedwait50(long long set, long long info, long long timeout); void __sanitizer_syscall_post_impl_____sigtimedwait50(long long res, long long set, long long info, long long timeout); void __sanitizer_syscall_pre_impl___mq_timedsend50(long long mqdes, long long msg_ptr, long long msg_len, long long msg_prio, long long abs_timeout); void __sanitizer_syscall_post_impl___mq_timedsend50( long long res, long long mqdes, long long msg_ptr, long long msg_len, long long msg_prio, long long abs_timeout); void __sanitizer_syscall_pre_impl___mq_timedreceive50(long long mqdes, long long msg_ptr, long long msg_len, long long msg_prio, long long abs_timeout); void __sanitizer_syscall_post_impl___mq_timedreceive50( long long res, long long mqdes, long long msg_ptr, long long msg_len, long long msg_prio, long long abs_timeout); void __sanitizer_syscall_pre_impl_compat_60__lwp_park(long long ts, long long unpark, long long hint, long long unparkhint); void __sanitizer_syscall_post_impl_compat_60__lwp_park(long long res, long long ts, long long unpark, long long hint, long long unparkhint); void __sanitizer_syscall_pre_impl___kevent50(long long fd, long long changelist, long long nchanges, long long eventlist, long long nevents, long long timeout); void __sanitizer_syscall_post_impl___kevent50( long long res, long long fd, long long changelist, long long nchanges, long long eventlist, long long nevents, long long timeout); void __sanitizer_syscall_pre_impl___pselect50(long long nd, long long in, long long ou, long long ex, long long ts, long long mask); void __sanitizer_syscall_post_impl___pselect50(long long res, long long nd, long long in, long long ou, long long ex, long long ts, long long mask); void __sanitizer_syscall_pre_impl___pollts50(long long fds, long long nfds, long long ts, long long mask); void __sanitizer_syscall_post_impl___pollts50(long long res, long long fds, long long nfds, long long ts, long long mask); void __sanitizer_syscall_pre_impl___aio_suspend50(long long list, long long nent, long long timeout); void __sanitizer_syscall_post_impl___aio_suspend50(long long res, long long list, long long nent, long long timeout); void __sanitizer_syscall_pre_impl___stat50(long long path, long long ub); void __sanitizer_syscall_post_impl___stat50(long long res, long long path, long long ub); void __sanitizer_syscall_pre_impl___fstat50(long long fd, long long sb); void __sanitizer_syscall_post_impl___fstat50(long long res, long long fd, long long sb); void __sanitizer_syscall_pre_impl___lstat50(long long path, long long ub); void __sanitizer_syscall_post_impl___lstat50(long long res, long long path, long long ub); void __sanitizer_syscall_pre_impl_____semctl50(long long semid, long long semnum, long long cmd, long long arg); void __sanitizer_syscall_post_impl_____semctl50(long long res, long long semid, long long semnum, long long cmd, long long arg); void __sanitizer_syscall_pre_impl___shmctl50(long long shmid, long long cmd, long long buf); void __sanitizer_syscall_post_impl___shmctl50(long long res, long long shmid, long long cmd, long long buf); void __sanitizer_syscall_pre_impl___msgctl50(long long msqid, long long cmd, long long buf); void __sanitizer_syscall_post_impl___msgctl50(long long res, long long msqid, long long cmd, long long buf); void __sanitizer_syscall_pre_impl___getrusage50(long long who, long long rusage); void __sanitizer_syscall_post_impl___getrusage50(long long res, long long who, long long rusage); void __sanitizer_syscall_pre_impl___timer_settime50(long long timerid, long long flags, long long value, long long ovalue); void __sanitizer_syscall_post_impl___timer_settime50(long long res, long long timerid, long long flags, long long value, long long ovalue); void __sanitizer_syscall_pre_impl___timer_gettime50(long long timerid, long long value); void __sanitizer_syscall_post_impl___timer_gettime50(long long res, long long timerid, long long value); #if defined(NTP) || !defined(_KERNEL_OPT) void __sanitizer_syscall_pre_impl___ntp_gettime50(long long ntvp); void __sanitizer_syscall_post_impl___ntp_gettime50(long long res, long long ntvp); #else /* syscall 448 has been skipped */ #endif void __sanitizer_syscall_pre_impl___wait450(long long pid, long long status, long long options, long long rusage); void __sanitizer_syscall_post_impl___wait450(long long res, long long pid, long long status, long long options, long long rusage); void __sanitizer_syscall_pre_impl___mknod50(long long path, long long mode, long long dev); void __sanitizer_syscall_post_impl___mknod50(long long res, long long path, long long mode, long long dev); void __sanitizer_syscall_pre_impl___fhstat50(long long fhp, long long fh_size, long long sb); void __sanitizer_syscall_post_impl___fhstat50(long long res, long long fhp, long long fh_size, long long sb); /* syscall 452 has been skipped */ void __sanitizer_syscall_pre_impl_pipe2(long long fildes, long long flags); void __sanitizer_syscall_post_impl_pipe2(long long res, long long fildes, long long flags); void __sanitizer_syscall_pre_impl_dup3(long long from, long long to, long long flags); void __sanitizer_syscall_post_impl_dup3(long long res, long long from, long long to, long long flags); void __sanitizer_syscall_pre_impl_kqueue1(long long flags); void __sanitizer_syscall_post_impl_kqueue1(long long res, long long flags); void __sanitizer_syscall_pre_impl_paccept(long long s, long long name, long long anamelen, long long mask, long long flags); void __sanitizer_syscall_post_impl_paccept(long long res, long long s, long long name, long long anamelen, long long mask, long long flags); void __sanitizer_syscall_pre_impl_linkat(long long fd1, long long name1, long long fd2, long long name2, long long flags); void __sanitizer_syscall_post_impl_linkat(long long res, long long fd1, long long name1, long long fd2, long long name2, long long flags); void __sanitizer_syscall_pre_impl_renameat(long long fromfd, long long from, long long tofd, long long to); void __sanitizer_syscall_post_impl_renameat(long long res, long long fromfd, long long from, long long tofd, long long to); void __sanitizer_syscall_pre_impl_mkfifoat(long long fd, long long path, long long mode); void __sanitizer_syscall_post_impl_mkfifoat(long long res, long long fd, long long path, long long mode); void __sanitizer_syscall_pre_impl_mknodat(long long fd, long long path, long long mode, long long PAD, long long dev); void __sanitizer_syscall_post_impl_mknodat(long long res, long long fd, long long path, long long mode, long long PAD, long long dev); void __sanitizer_syscall_pre_impl_mkdirat(long long fd, long long path, long long mode); void __sanitizer_syscall_post_impl_mkdirat(long long res, long long fd, long long path, long long mode); void __sanitizer_syscall_pre_impl_faccessat(long long fd, long long path, long long amode, long long flag); void __sanitizer_syscall_post_impl_faccessat(long long res, long long fd, long long path, long long amode, long long flag); void __sanitizer_syscall_pre_impl_fchmodat(long long fd, long long path, long long mode, long long flag); void __sanitizer_syscall_post_impl_fchmodat(long long res, long long fd, long long path, long long mode, long long flag); void __sanitizer_syscall_pre_impl_fchownat(long long fd, long long path, long long owner, long long group, long long flag); void __sanitizer_syscall_post_impl_fchownat(long long res, long long fd, long long path, long long owner, long long group, long long flag); void __sanitizer_syscall_pre_impl_fexecve(long long fd, long long argp, long long envp); void __sanitizer_syscall_post_impl_fexecve(long long res, long long fd, long long argp, long long envp); void __sanitizer_syscall_pre_impl_fstatat(long long fd, long long path, long long buf, long long flag); void __sanitizer_syscall_post_impl_fstatat(long long res, long long fd, long long path, long long buf, long long flag); void __sanitizer_syscall_pre_impl_utimensat(long long fd, long long path, long long tptr, long long flag); void __sanitizer_syscall_post_impl_utimensat(long long res, long long fd, long long path, long long tptr, long long flag); void __sanitizer_syscall_pre_impl_openat(long long fd, long long path, long long oflags, long long mode); void __sanitizer_syscall_post_impl_openat(long long res, long long fd, long long path, long long oflags, long long mode); void __sanitizer_syscall_pre_impl_readlinkat(long long fd, long long path, long long buf, long long bufsize); void __sanitizer_syscall_post_impl_readlinkat(long long res, long long fd, long long path, long long buf, long long bufsize); void __sanitizer_syscall_pre_impl_symlinkat(long long path1, long long fd, long long path2); void __sanitizer_syscall_post_impl_symlinkat(long long res, long long path1, long long fd, long long path2); void __sanitizer_syscall_pre_impl_unlinkat(long long fd, long long path, long long flag); void __sanitizer_syscall_post_impl_unlinkat(long long res, long long fd, long long path, long long flag); void __sanitizer_syscall_pre_impl_futimens(long long fd, long long tptr); void __sanitizer_syscall_post_impl_futimens(long long res, long long fd, long long tptr); void __sanitizer_syscall_pre_impl___quotactl(long long path, long long args); void __sanitizer_syscall_post_impl___quotactl(long long res, long long path, long long args); void __sanitizer_syscall_pre_impl_posix_spawn(long long pid, long long path, long long file_actions, long long attrp, long long argv, long long envp); void __sanitizer_syscall_post_impl_posix_spawn(long long res, long long pid, long long path, long long file_actions, long long attrp, long long argv, long long envp); void __sanitizer_syscall_pre_impl_recvmmsg(long long s, long long mmsg, long long vlen, long long flags, long long timeout); void __sanitizer_syscall_post_impl_recvmmsg(long long res, long long s, long long mmsg, long long vlen, long long flags, long long timeout); void __sanitizer_syscall_pre_impl_sendmmsg(long long s, long long mmsg, long long vlen, long long flags); void __sanitizer_syscall_post_impl_sendmmsg(long long res, long long s, long long mmsg, long long vlen, long long flags); void __sanitizer_syscall_pre_impl_clock_nanosleep(long long clock_id, long long flags, long long rqtp, long long rmtp); void __sanitizer_syscall_post_impl_clock_nanosleep(long long res, long long clock_id, long long flags, long long rqtp, long long rmtp); void __sanitizer_syscall_pre_impl____lwp_park60(long long clock_id, long long flags, long long ts, long long unpark, long long hint, long long unparkhint); void __sanitizer_syscall_post_impl____lwp_park60( long long res, long long clock_id, long long flags, long long ts, long long unpark, long long hint, long long unparkhint); void __sanitizer_syscall_pre_impl_posix_fallocate(long long fd, long long PAD, long long pos, long long len); void __sanitizer_syscall_post_impl_posix_fallocate(long long res, long long fd, long long PAD, long long pos, long long len); void __sanitizer_syscall_pre_impl_fdiscard(long long fd, long long PAD, long long pos, long long len); void __sanitizer_syscall_post_impl_fdiscard(long long res, long long fd, long long PAD, long long pos, long long len); void __sanitizer_syscall_pre_impl_wait6(long long idtype, long long id, long long status, long long options, long long wru, long long info); void __sanitizer_syscall_post_impl_wait6(long long res, long long idtype, long long id, long long status, long long options, long long wru, long long info); void __sanitizer_syscall_pre_impl_clock_getcpuclockid2(long long idtype, long long id, long long clock_id); void __sanitizer_syscall_post_impl_clock_getcpuclockid2(long long res, long long idtype, long long id, long long clock_id); void __sanitizer_syscall_pre_impl___getvfsstat90(long long buf, long long bufsize, long long flags); void __sanitizer_syscall_post_impl___getvfsstat90(long long res, long long buf, long long bufsize, long long flags); void __sanitizer_syscall_pre_impl___statvfs190(long long path, long long buf, long long flags); void __sanitizer_syscall_post_impl___statvfs190(long long res, long long path, long long buf, long long flags); void __sanitizer_syscall_pre_impl___fstatvfs190(long long fd, long long buf, long long flags); void __sanitizer_syscall_post_impl___fstatvfs190(long long res, long long fd, long long buf, long long flags); void __sanitizer_syscall_pre_impl___fhstatvfs190(long long fhp, long long fh_size, long long buf, long long flags); void __sanitizer_syscall_post_impl___fhstatvfs190(long long res, long long fhp, long long fh_size, long long buf, long long flags); void __sanitizer_syscall_pre_impl___acl_get_link(long long path, long long type, long long aclp); void __sanitizer_syscall_post_impl___acl_get_link(long long res, long long path, long long type, long long aclp); void __sanitizer_syscall_pre_impl___acl_set_link(long long path, long long type, long long aclp); void __sanitizer_syscall_post_impl___acl_set_link(long long res, long long path, long long type, long long aclp); void __sanitizer_syscall_pre_impl___acl_delete_link(long long path, long long type); void __sanitizer_syscall_post_impl___acl_delete_link(long long res, long long path, long long type); void __sanitizer_syscall_pre_impl___acl_aclcheck_link(long long path, long long type, long long aclp); void __sanitizer_syscall_post_impl___acl_aclcheck_link(long long res, long long path, long long type, long long aclp); void __sanitizer_syscall_pre_impl___acl_get_file(long long path, long long type, long long aclp); void __sanitizer_syscall_post_impl___acl_get_file(long long res, long long path, long long type, long long aclp); void __sanitizer_syscall_pre_impl___acl_set_file(long long path, long long type, long long aclp); void __sanitizer_syscall_post_impl___acl_set_file(long long res, long long path, long long type, long long aclp); void __sanitizer_syscall_pre_impl___acl_get_fd(long long filedes, long long type, long long aclp); void __sanitizer_syscall_post_impl___acl_get_fd(long long res, long long filedes, long long type, long long aclp); void __sanitizer_syscall_pre_impl___acl_set_fd(long long filedes, long long type, long long aclp); void __sanitizer_syscall_post_impl___acl_set_fd(long long res, long long filedes, long long type, long long aclp); void __sanitizer_syscall_pre_impl___acl_delete_file(long long path, long long type); void __sanitizer_syscall_post_impl___acl_delete_file(long long res, long long path, long long type); void __sanitizer_syscall_pre_impl___acl_delete_fd(long long filedes, long long type); void __sanitizer_syscall_post_impl___acl_delete_fd(long long res, long long filedes, long long type); void __sanitizer_syscall_pre_impl___acl_aclcheck_file(long long path, long long type, long long aclp); void __sanitizer_syscall_post_impl___acl_aclcheck_file(long long res, long long path, long long type, long long aclp); void __sanitizer_syscall_pre_impl___acl_aclcheck_fd(long long filedes, long long type, long long aclp); void __sanitizer_syscall_post_impl___acl_aclcheck_fd(long long res, long long filedes, long long type, long long aclp); void __sanitizer_syscall_pre_impl_lpathconf(long long path, long long name); void __sanitizer_syscall_post_impl_lpathconf(long long res, long long path, long long name); #ifdef __cplusplus } // extern "C" #endif // DO NOT EDIT! THIS FILE HAS BEEN GENERATED! #endif // SANITIZER_NETBSD_SYSCALL_HOOKS_H //===-- xray_records.h ------------------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file is a part of XRay, a dynamic runtime instrumentation system. // // This header exposes some record types useful for the XRay in-memory logging // implementation. // //===----------------------------------------------------------------------===// #ifndef XRAY_XRAY_RECORDS_H #define XRAY_XRAY_RECORDS_H #include namespace __xray { enum FileTypes { NAIVE_LOG = 0, FDR_LOG = 1, }; // FDR mode use of the union field in the XRayFileHeader. struct alignas(16) FdrAdditionalHeaderData { uint64_t ThreadBufferSize; }; static_assert(sizeof(FdrAdditionalHeaderData) == 16, "FdrAdditionalHeaderData != 16 bytes"); // This data structure is used to describe the contents of the file. We use this // for versioning the supported XRay file formats. struct alignas(32) XRayFileHeader { uint16_t Version = 0; // The type of file we're writing out. See the FileTypes enum for more // information. This allows different implementations of the XRay logging to // have different files for different information being stored. uint16_t Type = 0; // What follows are a set of flags that indicate useful things for when // reading the data in the file. bool ConstantTSC : 1; bool NonstopTSC : 1; // The frequency by which TSC increases per-second. alignas(8) uint64_t CycleFrequency = 0; union { char FreeForm[16]; // The current civiltime timestamp, as retrieved from 'clock_gettime'. This // allows readers of the file to determine when the file was created or // written down. struct timespec TS; struct FdrAdditionalHeaderData FdrData; }; } __attribute__((packed)); static_assert(sizeof(XRayFileHeader) == 32, "XRayFileHeader != 32 bytes"); enum RecordTypes { NORMAL = 0, ARG_PAYLOAD = 1, }; struct alignas(32) XRayRecord { // This is the type of the record being written. We use 16 bits to allow us to // treat this as a discriminant, and so that the first 4 bytes get packed // properly. See RecordTypes for more supported types. uint16_t RecordType = RecordTypes::NORMAL; // The CPU where the thread is running. We assume number of CPUs <= 256. uint8_t CPU = 0; // The type of the event. One of the following: // ENTER = 0 // EXIT = 1 // TAIL_EXIT = 2 // ENTER_ARG = 3 uint8_t Type = 0; // The function ID for the record. int32_t FuncId = 0; // Get the full 8 bytes of the TSC when we get the log record. uint64_t TSC = 0; // The thread ID for the currently running thread. uint32_t TId = 0; // The ID of process that is currently running uint32_t PId = 0; // Use some bytes in the end of the record for buffers. char Buffer[8] = {}; } __attribute__((packed)); static_assert(sizeof(XRayRecord) == 32, "XRayRecord != 32 bytes"); struct alignas(32) XRayArgPayload { // We use the same 16 bits as a discriminant for the records in the log here // too, and so that the first 4 bytes are packed properly. uint16_t RecordType = RecordTypes::ARG_PAYLOAD; // Add a few bytes to pad. uint8_t Padding[2] = {}; // The function ID for the record. int32_t FuncId = 0; // The thread ID for the currently running thread. uint32_t TId = 0; // The ID of process that is currently running uint32_t PId = 0; // The argument payload. uint64_t Arg = 0; // The rest of this record ought to be left as padding. uint8_t TailPadding[8] = {}; } __attribute__((packed)); static_assert(sizeof(XRayArgPayload) == 32, "XRayArgPayload != 32 bytes"); } // namespace __xray #endif // XRAY_XRAY_RECORDS_H JSON transcoder produced invalid protobuf output.\x00external/regex-re2/re2/parse.ccRunStateOnByteUnlocked failed after Resetmissing )AvestanBhaiksukiCcCfOld_SogdianPhoenicianSogdianThaanaSIGTERMabsl-signalstack%s: failed to read program header %dbad magic number in DeleteArena()unordered freelist @ ... and at least %d more frames tree->lengthFlat, len = thread should hold write lock on Mutex %p %sWaitReaderLock blocking ms/system/usr/share/zoneinfo/tzdata/pkg/data/tzdata/external/boringssl/src/crypto/fipsmodule/bn/gcd.cHKDF failed. %lxPKCS7 routinesPKCS7_LIBUI_LIBexternal/boringssl/src/crypto/evp/p_rsa_asn1.ckythe.proto.CompilationUnit.argumentkythe/proto/cxx.proto.kzipFileSystem ObjC__clang_hip_libdevice_declares.h/*===---- __stddef_ptrdiff_t.h - Definition of ptrdiff_t -------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ /* * When -fbuiltin-headers-in-system-modules is set this is a non-modular header * and needs to behave as if it was textual. */ #if !defined(_PTRDIFF_T) || \ (__has_feature(modules) && !__building_module(_Builtin_stddef)) #define _PTRDIFF_T typedef __PTRDIFF_TYPE__ ptrdiff_t; #endif __stddef_unreachable.h/*===---- __wmmintrin_aes.h - AES intrinsics -------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __WMMINTRIN_H #error "Never use <__wmmintrin_aes.h> directly; include instead." #endif #ifndef __WMMINTRIN_AES_H #define __WMMINTRIN_AES_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("aes"), __min_vector_width__(128))) /// Performs a single round of AES encryption using the Equivalent /// Inverse Cipher, transforming the state value from the first source /// operand using a 128-bit round key value contained in the second source /// operand, and writes the result to the destination. /// /// \headerfile /// /// This intrinsic corresponds to the VAESENC instruction. /// /// \param __V /// A 128-bit integer vector containing the state value. /// \param __R /// A 128-bit integer vector containing the round key value. /// \returns A 128-bit integer vector containing the encrypted value. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_aesenc_si128(__m128i __V, __m128i __R) { return (__m128i)__builtin_ia32_aesenc128((__v2di)__V, (__v2di)__R); } /// Performs the final round of AES encryption using the Equivalent /// Inverse Cipher, transforming the state value from the first source /// operand using a 128-bit round key value contained in the second source /// operand, and writes the result to the destination. /// /// \headerfile /// /// This intrinsic corresponds to the VAESENCLAST instruction. /// /// \param __V /// A 128-bit integer vector containing the state value. /// \param __R /// A 128-bit integer vector containing the round key value. /// \returns A 128-bit integer vector containing the encrypted value. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_aesenclast_si128(__m128i __V, __m128i __R) { return (__m128i)__builtin_ia32_aesenclast128((__v2di)__V, (__v2di)__R); } /// Performs a single round of AES decryption using the Equivalent /// Inverse Cipher, transforming the state value from the first source /// operand using a 128-bit round key value contained in the second source /// operand, and writes the result to the destination. /// /// \headerfile /// /// This intrinsic corresponds to the VAESDEC instruction. /// /// \param __V /// A 128-bit integer vector containing the state value. /// \param __R /// A 128-bit integer vector containing the round key value. /// \returns A 128-bit integer vector containing the decrypted value. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_aesdec_si128(__m128i __V, __m128i __R) { return (__m128i)__builtin_ia32_aesdec128((__v2di)__V, (__v2di)__R); } /// Performs the final round of AES decryption using the Equivalent /// Inverse Cipher, transforming the state value from the first source /// operand using a 128-bit round key value contained in the second source /// operand, and writes the result to the destination. /// /// \headerfile /// /// This intrinsic corresponds to the VAESDECLAST instruction. /// /// \param __V /// A 128-bit integer vector containing the state value. /// \param __R /// A 128-bit integer vector containing the round key value. /// \returns A 128-bit integer vector containing the decrypted value. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_aesdeclast_si128(__m128i __V, __m128i __R) { return (__m128i)__builtin_ia32_aesdeclast128((__v2di)__V, (__v2di)__R); } /// Applies the AES InvMixColumns() transformation to an expanded key /// contained in the source operand, and writes the result to the /// destination. /// /// \headerfile /// /// This intrinsic corresponds to the VAESIMC instruction. /// /// \param __V /// A 128-bit integer vector containing the expanded key. /// \returns A 128-bit integer vector containing the transformed value. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_aesimc_si128(__m128i __V) { return (__m128i)__builtin_ia32_aesimc128((__v2di)__V); } /// Generates a round key for AES encryption, operating on 128-bit data /// specified in the first source operand and using an 8-bit round constant /// specified by the second source operand, and writes the result to the /// destination. /// /// \headerfile /// /// \code /// __m128i _mm_aeskeygenassist_si128(__m128i C, const int R); /// \endcode /// /// This intrinsic corresponds to the AESKEYGENASSIST instruction. /// /// \param C /// A 128-bit integer vector that is used to generate the AES encryption key. /// \param R /// An 8-bit round constant used to generate the AES encryption key. /// \returns A 128-bit round key for AES encryption. #define _mm_aeskeygenassist_si128(C, R) \ ((__m128i)__builtin_ia32_aeskeygenassist128((__v2di)(__m128i)(C), (int)(R))) #undef __DEFAULT_FN_ATTRS #endif /* __WMMINTRIN_AES_H */ arm_vector_types.h/*===---- armintr.h - ARM Windows intrinsics -------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ /* Only include this if we're compiling for the windows platform. */ #ifndef _MSC_VER #include_next #else #ifndef __ARMINTR_H #define __ARMINTR_H typedef enum { _ARM_BARRIER_SY = 0xF, _ARM_BARRIER_ST = 0xE, _ARM_BARRIER_ISH = 0xB, _ARM_BARRIER_ISHST = 0xA, _ARM_BARRIER_NSH = 0x7, _ARM_BARRIER_NSHST = 0x6, _ARM_BARRIER_OSH = 0x3, _ARM_BARRIER_OSHST = 0x2 } _ARMINTR_BARRIER_TYPE; #endif /* __ARMINTR_H */ #endif /* _MSC_VER */ /*===--------- avx512vlbf16intrin.h - AVX512_BF16 intrinsics ---------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifdef __SSE2__ #ifndef __AVX512VLBF16INTRIN_H #define __AVX512VLBF16INTRIN_H #define __DEFAULT_FN_ATTRS128 \ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512vl,avx512bf16,no-evex512"), \ __min_vector_width__(128))) #define __DEFAULT_FN_ATTRS256 \ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512vl,avx512bf16,no-evex512"), \ __min_vector_width__(256))) /// Convert Two Packed Single Data to One Packed BF16 Data. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTNE2PS2BF16 instructions. /// /// \param __A /// A 128-bit vector of [4 x float]. /// \param __B /// A 128-bit vector of [4 x float]. /// \returns A 128-bit vector of [8 x bfloat] whose lower 64 bits come from /// conversion of __B, and higher 64 bits come from conversion of __A. static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_cvtne2ps_pbh(__m128 __A, __m128 __B) { return (__m128bh)__builtin_ia32_cvtne2ps2bf16_128((__v4sf) __A, (__v4sf) __B); } /// Convert Two Packed Single Data to One Packed BF16 Data. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTNE2PS2BF16 instructions. /// /// \param __A /// A 128-bit vector of [4 x float]. /// \param __B /// A 128-bit vector of [4 x float]. /// \param __W /// A 128-bit vector of [8 x bfloat]. /// \param __U /// A 8-bit mask value specifying what is chosen for each element. /// A 1 means conversion of __A or __B. A 0 means element from __W. /// \returns A 128-bit vector of [8 x bfloat] whose lower 64 bits come from /// conversion of __B, and higher 64 bits come from conversion of __A. static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_mask_cvtne2ps_pbh(__m128bh __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128bh)__builtin_ia32_selectpbf_128((__mmask8)__U, (__v8bf)_mm_cvtne2ps_pbh(__A, __B), (__v8bf)__W); } /// Convert Two Packed Single Data to One Packed BF16 Data. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTNE2PS2BF16 instructions. /// /// \param __A /// A 128-bit vector of [4 x float]. /// \param __B /// A 128-bit vector of [4 x float]. /// \param __U /// A 8-bit mask value specifying what is chosen for each element. /// A 1 means conversion of __A or __B. A 0 means element is zero. /// \returns A 128-bit vector of [8 x bfloat] whose lower 64 bits come from /// conversion of __B, and higher 64 bits come from conversion of __A. static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_maskz_cvtne2ps_pbh(__mmask8 __U, __m128 __A, __m128 __B) { return (__m128bh)__builtin_ia32_selectpbf_128((__mmask8)__U, (__v8bf)_mm_cvtne2ps_pbh(__A, __B), (__v8bf)_mm_setzero_si128()); } /// Convert Two Packed Single Data to One Packed BF16 Data. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTNE2PS2BF16 instructions. /// /// \param __A /// A 256-bit vector of [8 x float]. /// \param __B /// A 256-bit vector of [8 x float]. /// \returns A 256-bit vector of [16 x bfloat] whose lower 128 bits come from /// conversion of __B, and higher 128 bits come from conversion of __A. static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_cvtne2ps_pbh(__m256 __A, __m256 __B) { return (__m256bh)__builtin_ia32_cvtne2ps2bf16_256((__v8sf) __A, (__v8sf) __B); } /// Convert Two Packed Single Data to One Packed BF16 Data. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTNE2PS2BF16 instructions. /// /// \param __A /// A 256-bit vector of [8 x float]. /// \param __B /// A 256-bit vector of [8 x float]. /// \param __W /// A 256-bit vector of [16 x bfloat]. /// \param __U /// A 16-bit mask value specifying what is chosen for each element. /// A 1 means conversion of __A or __B. A 0 means element from __W. /// \returns A 256-bit vector of [16 x bfloat] whose lower 128 bits come from /// conversion of __B, and higher 128 bits come from conversion of __A. static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_mask_cvtne2ps_pbh(__m256bh __W, __mmask16 __U, __m256 __A, __m256 __B) { return (__m256bh)__builtin_ia32_selectpbf_256((__mmask16)__U, (__v16bf)_mm256_cvtne2ps_pbh(__A, __B), (__v16bf)__W); } /// Convert Two Packed Single Data to One Packed BF16 Data. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTNE2PS2BF16 instructions. /// /// \param __A /// A 256-bit vector of [8 x float]. /// \param __B /// A 256-bit vector of [8 x float]. /// \param __U /// A 16-bit mask value specifying what is chosen for each element. /// A 1 means conversion of __A or __B. A 0 means element is zero. /// \returns A 256-bit vector of [16 x bfloat] whose lower 128 bits come from /// conversion of __B, and higher 128 bits come from conversion of __A. static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtne2ps_pbh(__mmask16 __U, __m256 __A, __m256 __B) { return (__m256bh)__builtin_ia32_selectpbf_256((__mmask16)__U, (__v16bf)_mm256_cvtne2ps_pbh(__A, __B), (__v16bf)_mm256_setzero_si256()); } /// Convert Packed Single Data to Packed BF16 Data. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTNEPS2BF16 instructions. /// /// \param __A /// A 128-bit vector of [4 x float]. /// \returns A 128-bit vector of [8 x bfloat] whose lower 64 bits come from /// conversion of __A, and higher 64 bits are 0. #define _mm_cvtneps_pbh(A) \ ((__m128bh)__builtin_ia32_vcvtneps2bf16128((__v4sf)(A))) /// Convert Packed Single Data to Packed BF16 Data. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTNEPS2BF16 instructions. /// /// \param __A /// A 128-bit vector of [4 x float]. /// \param __W /// A 128-bit vector of [8 x bfloat]. /// \param __U /// A 4-bit mask value specifying what is chosen for each element. /// A 1 means conversion of __A. A 0 means element from __W. /// \returns A 128-bit vector of [8 x bfloat] whose lower 64 bits come from /// conversion of __A, and higher 64 bits are 0. static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_mask_cvtneps_pbh(__m128bh __W, __mmask8 __U, __m128 __A) { return (__m128bh)__builtin_ia32_cvtneps2bf16_128_mask((__v4sf) __A, (__v8bf)__W, (__mmask8)__U); } /// Convert Packed Single Data to Packed BF16 Data. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTNEPS2BF16 instructions. /// /// \param __A /// A 128-bit vector of [4 x float]. /// \param __U /// A 4-bit mask value specifying what is chosen for each element. /// A 1 means conversion of __A. A 0 means element is zero. /// \returns A 128-bit vector of [8 x bfloat] whose lower 64 bits come from /// conversion of __A, and higher 64 bits are 0. static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_maskz_cvtneps_pbh(__mmask8 __U, __m128 __A) { return (__m128bh)__builtin_ia32_cvtneps2bf16_128_mask((__v4sf) __A, (__v8bf)_mm_setzero_si128(), (__mmask8)__U); } /// Convert Packed Single Data to Packed BF16 Data. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTNEPS2BF16 instructions. /// /// \param __A /// A 256-bit vector of [8 x float]. /// \returns A 128-bit vector of [8 x bfloat] comes from conversion of __A. #define _mm256_cvtneps_pbh(A) \ ((__m128bh)__builtin_ia32_vcvtneps2bf16256((__v8sf)(A))) /// Convert Packed Single Data to Packed BF16 Data. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTNEPS2BF16 instructions. /// /// \param __A /// A 256-bit vector of [8 x float]. /// \param __W /// A 256-bit vector of [8 x bfloat]. /// \param __U /// A 8-bit mask value specifying what is chosen for each element. /// A 1 means conversion of __A. A 0 means element from __W. /// \returns A 128-bit vector of [8 x bfloat] comes from conversion of __A. static __inline__ __m128bh __DEFAULT_FN_ATTRS256 _mm256_mask_cvtneps_pbh(__m128bh __W, __mmask8 __U, __m256 __A) { return (__m128bh)__builtin_ia32_cvtneps2bf16_256_mask((__v8sf)__A, (__v8bf)__W, (__mmask8)__U); } /// Convert Packed Single Data to Packed BF16 Data. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTNEPS2BF16 instructions. /// /// \param __A /// A 256-bit vector of [8 x float]. /// \param __U /// A 8-bit mask value specifying what is chosen for each element. /// A 1 means conversion of __A. A 0 means element is zero. /// \returns A 128-bit vector of [8 x bfloat] comes from conversion of __A. static __inline__ __m128bh __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtneps_pbh(__mmask8 __U, __m256 __A) { return (__m128bh)__builtin_ia32_cvtneps2bf16_256_mask((__v8sf)__A, (__v8bf)_mm_setzero_si128(), (__mmask8)__U); } /// Dot Product of BF16 Pairs Accumulated into Packed Single Precision. /// /// \headerfile /// /// This intrinsic corresponds to the VDPBF16PS instructions. /// /// \param __A /// A 128-bit vector of [8 x bfloat]. /// \param __B /// A 128-bit vector of [8 x bfloat]. /// \param __D /// A 128-bit vector of [4 x float]. /// \returns A 128-bit vector of [4 x float] comes from Dot Product of /// __A, __B and __D static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_dpbf16_ps(__m128 __D, __m128bh __A, __m128bh __B) { return (__m128)__builtin_ia32_dpbf16ps_128((__v4sf)__D, (__v8bf)__A, (__v8bf)__B); } /// Dot Product of BF16 Pairs Accumulated into Packed Single Precision. /// /// \headerfile /// /// This intrinsic corresponds to the VDPBF16PS instructions. /// /// \param __A /// A 128-bit vector of [8 x bfloat]. /// \param __B /// A 128-bit vector of [8 x bfloat]. /// \param __D /// A 128-bit vector of [4 x float]. /// \param __U /// A 8-bit mask value specifying what is chosen for each element. /// A 1 means __A and __B's dot product accumulated with __D. A 0 means __D. /// \returns A 128-bit vector of [4 x float] comes from Dot Product of /// __A, __B and __D static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_dpbf16_ps(__m128 __D, __mmask8 __U, __m128bh __A, __m128bh __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_dpbf16_ps(__D, __A, __B), (__v4sf)__D); } /// Dot Product of BF16 Pairs Accumulated into Packed Single Precision. /// /// \headerfile /// /// This intrinsic corresponds to the VDPBF16PS instructions. /// /// \param __A /// A 128-bit vector of [8 x bfloat]. /// \param __B /// A 128-bit vector of [8 x bfloat]. /// \param __D /// A 128-bit vector of [4 x float]. /// \param __U /// A 8-bit mask value specifying what is chosen for each element. /// A 1 means __A and __B's dot product accumulated with __D. A 0 means 0. /// \returns A 128-bit vector of [4 x float] comes from Dot Product of /// __A, __B and __D static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_dpbf16_ps(__mmask8 __U, __m128 __D, __m128bh __A, __m128bh __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_dpbf16_ps(__D, __A, __B), (__v4sf)_mm_setzero_si128()); } /// Dot Product of BF16 Pairs Accumulated into Packed Single Precision. /// /// \headerfile /// /// This intrinsic corresponds to the VDPBF16PS instructions. /// /// \param __A /// A 256-bit vector of [16 x bfloat]. /// \param __B /// A 256-bit vector of [16 x bfloat]. /// \param __D /// A 256-bit vector of [8 x float]. /// \returns A 256-bit vector of [8 x float] comes from Dot Product of /// __A, __B and __D static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_dpbf16_ps(__m256 __D, __m256bh __A, __m256bh __B) { return (__m256)__builtin_ia32_dpbf16ps_256((__v8sf)__D, (__v16bf)__A, (__v16bf)__B); } /// Dot Product of BF16 Pairs Accumulated into Packed Single Precision. /// /// \headerfile /// /// This intrinsic corresponds to the VDPBF16PS instructions. /// /// \param __A /// A 256-bit vector of [16 x bfloat]. /// \param __B /// A 256-bit vector of [16 x bfloat]. /// \param __D /// A 256-bit vector of [8 x float]. /// \param __U /// A 16-bit mask value specifying what is chosen for each element. /// A 1 means __A and __B's dot product accumulated with __D. A 0 means __D. /// \returns A 256-bit vector of [8 x float] comes from Dot Product of /// __A, __B and __D static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_dpbf16_ps(__m256 __D, __mmask8 __U, __m256bh __A, __m256bh __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_dpbf16_ps(__D, __A, __B), (__v8sf)__D); } /// Dot Product of BF16 Pairs Accumulated into Packed Single Precision. /// /// \headerfile /// /// This intrinsic corresponds to the VDPBF16PS instructions. /// /// \param __A /// A 256-bit vector of [16 x bfloat]. /// \param __B /// A 256-bit vector of [16 x bfloat]. /// \param __D /// A 256-bit vector of [8 x float]. /// \param __U /// A 8-bit mask value specifying what is chosen for each element. /// A 1 means __A and __B's dot product accumulated with __D. A 0 means 0. /// \returns A 256-bit vector of [8 x float] comes from Dot Product of /// __A, __B and __D static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_dpbf16_ps(__mmask8 __U, __m256 __D, __m256bh __A, __m256bh __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_dpbf16_ps(__D, __A, __B), (__v8sf)_mm256_setzero_si256()); } /// Convert One Single float Data to One BF16 Data. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTNEPS2BF16 instructions. /// /// \param __A /// A float data. /// \returns A bf16 data whose sign field and exponent field keep unchanged, /// and fraction field is truncated to 7 bits. static __inline__ __bf16 __DEFAULT_FN_ATTRS128 _mm_cvtness_sbh(float __A) { __v4sf __V = {__A, 0, 0, 0}; __v8bf __R = __builtin_ia32_cvtneps2bf16_128_mask( (__v4sf)__V, (__v8bf)_mm_undefined_si128(), (__mmask8)-1); return (__bf16)__R[0]; } /// Convert Packed BF16 Data to Packed float Data. /// /// \headerfile /// /// \param __A /// A 128-bit vector of [4 x bfloat]. /// \returns A 128-bit vector of [4 x float] come from conversion of __A static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtpbh_ps(__m128bh __A) { return _mm_castsi128_ps( (__m128i)_mm_slli_epi32((__m128i)_mm_cvtepi16_epi32((__m128i)__A), 16)); } /// Convert Packed BF16 Data to Packed float Data. /// /// \headerfile /// /// \param __A /// A 128-bit vector of [8 x bfloat]. /// \returns A 256-bit vector of [8 x float] come from conversion of __A static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_cvtpbh_ps(__m128bh __A) { return _mm256_castsi256_ps((__m256i)_mm256_slli_epi32( (__m256i)_mm256_cvtepi16_epi32((__m128i)__A), 16)); } /// Convert Packed BF16 Data to Packed float Data using zeroing mask. /// /// \headerfile /// /// \param __U /// A 4-bit mask. Elements are zeroed out when the corresponding mask /// bit is not set. /// \param __A /// A 128-bit vector of [4 x bfloat]. /// \returns A 128-bit vector of [4 x float] come from conversion of __A static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtpbh_ps(__mmask8 __U, __m128bh __A) { return _mm_castsi128_ps((__m128i)_mm_slli_epi32( (__m128i)_mm_maskz_cvtepi16_epi32((__mmask8)__U, (__m128i)__A), 16)); } /// Convert Packed BF16 Data to Packed float Data using zeroing mask. /// /// \headerfile /// /// \param __U /// A 8-bit mask. Elements are zeroed out when the corresponding mask /// bit is not set. /// \param __A /// A 128-bit vector of [8 x bfloat]. /// \returns A 256-bit vector of [8 x float] come from conversion of __A static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtpbh_ps(__mmask8 __U, __m128bh __A) { return _mm256_castsi256_ps((__m256i)_mm256_slli_epi32( (__m256i)_mm256_maskz_cvtepi16_epi32((__mmask8)__U, (__m128i)__A), 16)); } /// Convert Packed BF16 Data to Packed float Data using merging mask. /// /// \headerfile /// /// \param __S /// A 128-bit vector of [4 x float]. Elements are copied from __S when /// the corresponding mask bit is not set. /// \param __U /// A 4-bit mask. Elements are zeroed out when the corresponding mask /// bit is not set. /// \param __A /// A 128-bit vector of [4 x bfloat]. /// \returns A 128-bit vector of [4 x float] come from conversion of __A static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtpbh_ps(__m128 __S, __mmask8 __U, __m128bh __A) { return _mm_castsi128_ps((__m128i)_mm_mask_slli_epi32( (__m128i)__S, (__mmask8)__U, (__m128i)_mm_cvtepi16_epi32((__m128i)__A), 16)); } /// Convert Packed BF16 Data to Packed float Data using merging mask. /// /// \headerfile /// /// \param __S /// A 256-bit vector of [8 x float]. Elements are copied from __S when /// the corresponding mask bit is not set. /// \param __U /// A 8-bit mask. Elements are zeroed out when the corresponding mask /// bit is not set. /// \param __A /// A 128-bit vector of [8 x bfloat]. /// \returns A 256-bit vector of [8 x float] come from conversion of __A static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_cvtpbh_ps(__m256 __S, __mmask8 __U, __m128bh __A) { return _mm256_castsi256_ps((__m256i)_mm256_mask_slli_epi32( (__m256i)__S, (__mmask8)__U, (__m256i)_mm256_cvtepi16_epi32((__m128i)__A), 16)); } #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 #endif #endif /*===----------------- avxifmaintrin.h - IFMA intrinsics -------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __AVXIFMAINTRIN_H #define __AVXIFMAINTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS128 \ __attribute__((__always_inline__, __nodebug__, __target__("avxifma"), \ __min_vector_width__(128))) #define __DEFAULT_FN_ATTRS256 \ __attribute__((__always_inline__, __nodebug__, __target__("avxifma"), \ __min_vector_width__(256))) // must vex-encoding /// Multiply packed unsigned 52-bit integers in each 64-bit element of \a __Y /// and \a __Z to form a 104-bit intermediate result. Add the high 52-bit /// unsigned integer from the intermediate result with the corresponding /// unsigned 64-bit integer in \a __X, and store the results in \a dst. /// /// \headerfile /// /// \code /// __m128i /// _mm_madd52hi_avx_epu64 (__m128i __X, __m128i __Y, __m128i __Z) /// \endcode /// /// This intrinsic corresponds to the \c VPMADD52HUQ instruction. /// /// \return /// return __m128i dst. /// \param __X /// A 128-bit vector of [2 x i64] /// \param __Y /// A 128-bit vector of [2 x i64] /// \param __Z /// A 128-bit vector of [2 x i64] /// /// \code{.operation} /// FOR j := 0 to 1 /// i := j*64 /// tmp[127:0] := ZeroExtend64(__Y[i+51:i]) * ZeroExtend64(__Z[i+51:i]) /// dst[i+63:i] := __X[i+63:i] + ZeroExtend64(tmp[103:52]) /// ENDFOR /// dst[MAX:128] := 0 /// \endcode static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_madd52hi_avx_epu64(__m128i __X, __m128i __Y, __m128i __Z) { return (__m128i)__builtin_ia32_vpmadd52huq128((__v2di)__X, (__v2di)__Y, (__v2di)__Z); } /// Multiply packed unsigned 52-bit integers in each 64-bit element of \a __Y /// and \a __Z to form a 104-bit intermediate result. Add the high 52-bit /// unsigned integer from the intermediate result with the corresponding /// unsigned 64-bit integer in \a __X, and store the results in \a dst. /// /// \headerfile /// /// \code /// __m256i /// _mm256_madd52hi_avx_epu64 (__m256i __X, __m256i __Y, __m256i __Z) /// \endcode /// /// This intrinsic corresponds to the \c VPMADD52HUQ instruction. /// /// \return /// return __m256i dst. /// \param __X /// A 256-bit vector of [4 x i64] /// \param __Y /// A 256-bit vector of [4 x i64] /// \param __Z /// A 256-bit vector of [4 x i64] /// /// \code{.operation} /// FOR j := 0 to 3 /// i := j*64 /// tmp[127:0] := ZeroExtend64(__Y[i+51:i]) * ZeroExtend64(__Z[i+51:i]) /// dst[i+63:i] := __X[i+63:i] + ZeroExtend64(tmp[103:52]) /// ENDFOR /// dst[MAX:256] := 0 /// \endcode static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_madd52hi_avx_epu64(__m256i __X, __m256i __Y, __m256i __Z) { return (__m256i)__builtin_ia32_vpmadd52huq256((__v4di)__X, (__v4di)__Y, (__v4di)__Z); } /// Multiply packed unsigned 52-bit integers in each 64-bit element of \a __Y /// and \a __Z to form a 104-bit intermediate result. Add the low 52-bit /// unsigned integer from the intermediate result with the corresponding /// unsigned 64-bit integer in \a __X, and store the results in \a dst. /// /// \headerfile /// /// \code /// __m128i /// _mm_madd52lo_avx_epu64 (__m128i __X, __m128i __Y, __m128i __Z) /// \endcode /// /// This intrinsic corresponds to the \c VPMADD52LUQ instruction. /// /// \return /// return __m128i dst. /// \param __X /// A 128-bit vector of [2 x i64] /// \param __Y /// A 128-bit vector of [2 x i64] /// \param __Z /// A 128-bit vector of [2 x i64] /// /// \code{.operation} /// FOR j := 0 to 1 /// i := j*64 /// tmp[127:0] := ZeroExtend64(__Y[i+51:i]) * ZeroExtend64(__Z[i+51:i]) /// dst[i+63:i] := __X[i+63:i] + ZeroExtend64(tmp[51:0]) /// ENDFOR /// dst[MAX:128] := 0 /// \endcode static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_madd52lo_avx_epu64(__m128i __X, __m128i __Y, __m128i __Z) { return (__m128i)__builtin_ia32_vpmadd52luq128((__v2di)__X, (__v2di)__Y, (__v2di)__Z); } /// Multiply packed unsigned 52-bit integers in each 64-bit element of \a __Y /// and \a __Z to form a 104-bit intermediate result. Add the low 52-bit /// unsigned integer from the intermediate result with the corresponding /// unsigned 64-bit integer in \a __X, and store the results in \a dst. /// /// \headerfile /// /// \code /// __m256i /// _mm256_madd52lo_avx_epu64 (__m256i __X, __m256i __Y, __m256i __Z) /// \endcode /// /// This intrinsic corresponds to the \c VPMADD52LUQ instruction. /// /// \return /// return __m256i dst. /// \param __X /// A 256-bit vector of [4 x i64] /// \param __Y /// A 256-bit vector of [4 x i64] /// \param __Z /// A 256-bit vector of [4 x i64] /// /// \code{.operation} /// FOR j := 0 to 3 /// i := j*64 /// tmp[127:0] := ZeroExtend64(__Y[i+51:i]) * ZeroExtend64(__Z[i+51:i]) /// dst[i+63:i] := __X[i+63:i] + ZeroExtend64(tmp[51:0]) /// ENDFOR /// dst[MAX:256] := 0 /// \endcode static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_madd52lo_avx_epu64(__m256i __X, __m256i __Y, __m256i __Z) { return (__m256i)__builtin_ia32_vpmadd52luq256((__v4di)__X, (__v4di)__Y, (__v4di)__Z); } #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 #endif // __AVXIFMAINTRIN_H /*===---- avxintrin.h - AVX intrinsics -------------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __AVXINTRIN_H #define __AVXINTRIN_H typedef double __v4df __attribute__ ((__vector_size__ (32))); typedef float __v8sf __attribute__ ((__vector_size__ (32))); typedef long long __v4di __attribute__ ((__vector_size__ (32))); typedef int __v8si __attribute__ ((__vector_size__ (32))); typedef short __v16hi __attribute__ ((__vector_size__ (32))); typedef char __v32qi __attribute__ ((__vector_size__ (32))); /* Unsigned types */ typedef unsigned long long __v4du __attribute__ ((__vector_size__ (32))); typedef unsigned int __v8su __attribute__ ((__vector_size__ (32))); typedef unsigned short __v16hu __attribute__ ((__vector_size__ (32))); typedef unsigned char __v32qu __attribute__ ((__vector_size__ (32))); /* We need an explicitly signed variant for char. Note that this shouldn't * appear in the interface though. */ typedef signed char __v32qs __attribute__((__vector_size__(32))); typedef float __m256 __attribute__ ((__vector_size__ (32), __aligned__(32))); typedef double __m256d __attribute__((__vector_size__(32), __aligned__(32))); typedef long long __m256i __attribute__((__vector_size__(32), __aligned__(32))); typedef float __m256_u __attribute__ ((__vector_size__ (32), __aligned__(1))); typedef double __m256d_u __attribute__((__vector_size__(32), __aligned__(1))); typedef long long __m256i_u __attribute__((__vector_size__(32), __aligned__(1))); #ifdef __SSE2__ /* Both _Float16 and __bf16 require SSE2 being enabled. */ typedef _Float16 __v16hf __attribute__((__vector_size__(32), __aligned__(32))); typedef _Float16 __m256h __attribute__((__vector_size__(32), __aligned__(32))); typedef _Float16 __m256h_u __attribute__((__vector_size__(32), __aligned__(1))); typedef __bf16 __v16bf __attribute__((__vector_size__(32), __aligned__(32))); typedef __bf16 __m256bh __attribute__((__vector_size__(32), __aligned__(32))); #endif /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("avx,no-evex512"), \ __min_vector_width__(256))) #define __DEFAULT_FN_ATTRS128 \ __attribute__((__always_inline__, __nodebug__, __target__("avx,no-evex512"), \ __min_vector_width__(128))) /* Arithmetic */ /// Adds two 256-bit vectors of [4 x double]. /// /// \headerfile /// /// This intrinsic corresponds to the VADDPD instruction. /// /// \param __a /// A 256-bit vector of [4 x double] containing one of the source operands. /// \param __b /// A 256-bit vector of [4 x double] containing one of the source operands. /// \returns A 256-bit vector of [4 x double] containing the sums of both /// operands. static __inline __m256d __DEFAULT_FN_ATTRS _mm256_add_pd(__m256d __a, __m256d __b) { return (__m256d)((__v4df)__a+(__v4df)__b); } /// Adds two 256-bit vectors of [8 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VADDPS instruction. /// /// \param __a /// A 256-bit vector of [8 x float] containing one of the source operands. /// \param __b /// A 256-bit vector of [8 x float] containing one of the source operands. /// \returns A 256-bit vector of [8 x float] containing the sums of both /// operands. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_add_ps(__m256 __a, __m256 __b) { return (__m256)((__v8sf)__a+(__v8sf)__b); } /// Subtracts two 256-bit vectors of [4 x double]. /// /// \headerfile /// /// This intrinsic corresponds to the VSUBPD instruction. /// /// \param __a /// A 256-bit vector of [4 x double] containing the minuend. /// \param __b /// A 256-bit vector of [4 x double] containing the subtrahend. /// \returns A 256-bit vector of [4 x double] containing the differences between /// both operands. static __inline __m256d __DEFAULT_FN_ATTRS _mm256_sub_pd(__m256d __a, __m256d __b) { return (__m256d)((__v4df)__a-(__v4df)__b); } /// Subtracts two 256-bit vectors of [8 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VSUBPS instruction. /// /// \param __a /// A 256-bit vector of [8 x float] containing the minuend. /// \param __b /// A 256-bit vector of [8 x float] containing the subtrahend. /// \returns A 256-bit vector of [8 x float] containing the differences between /// both operands. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_sub_ps(__m256 __a, __m256 __b) { return (__m256)((__v8sf)__a-(__v8sf)__b); } /// Adds the even-indexed values and subtracts the odd-indexed values of /// two 256-bit vectors of [4 x double]. /// /// \headerfile /// /// This intrinsic corresponds to the VADDSUBPD instruction. /// /// \param __a /// A 256-bit vector of [4 x double] containing the left source operand. /// \param __b /// A 256-bit vector of [4 x double] containing the right source operand. /// \returns A 256-bit vector of [4 x double] containing the alternating sums /// and differences between both operands. static __inline __m256d __DEFAULT_FN_ATTRS _mm256_addsub_pd(__m256d __a, __m256d __b) { return (__m256d)__builtin_ia32_addsubpd256((__v4df)__a, (__v4df)__b); } /// Adds the even-indexed values and subtracts the odd-indexed values of /// two 256-bit vectors of [8 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VADDSUBPS instruction. /// /// \param __a /// A 256-bit vector of [8 x float] containing the left source operand. /// \param __b /// A 256-bit vector of [8 x float] containing the right source operand. /// \returns A 256-bit vector of [8 x float] containing the alternating sums and /// differences between both operands. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_addsub_ps(__m256 __a, __m256 __b) { return (__m256)__builtin_ia32_addsubps256((__v8sf)__a, (__v8sf)__b); } /// Divides two 256-bit vectors of [4 x double]. /// /// \headerfile /// /// This intrinsic corresponds to the VDIVPD instruction. /// /// \param __a /// A 256-bit vector of [4 x double] containing the dividend. /// \param __b /// A 256-bit vector of [4 x double] containing the divisor. /// \returns A 256-bit vector of [4 x double] containing the quotients of both /// operands. static __inline __m256d __DEFAULT_FN_ATTRS _mm256_div_pd(__m256d __a, __m256d __b) { return (__m256d)((__v4df)__a/(__v4df)__b); } /// Divides two 256-bit vectors of [8 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VDIVPS instruction. /// /// \param __a /// A 256-bit vector of [8 x float] containing the dividend. /// \param __b /// A 256-bit vector of [8 x float] containing the divisor. /// \returns A 256-bit vector of [8 x float] containing the quotients of both /// operands. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_div_ps(__m256 __a, __m256 __b) { return (__m256)((__v8sf)__a/(__v8sf)__b); } /// Compares two 256-bit vectors of [4 x double] and returns the greater /// of each pair of values. /// /// \headerfile /// /// This intrinsic corresponds to the VMAXPD instruction. /// /// \param __a /// A 256-bit vector of [4 x double] containing one of the operands. /// \param __b /// A 256-bit vector of [4 x double] containing one of the operands. /// \returns A 256-bit vector of [4 x double] containing the maximum values /// between both operands. static __inline __m256d __DEFAULT_FN_ATTRS _mm256_max_pd(__m256d __a, __m256d __b) { return (__m256d)__builtin_ia32_maxpd256((__v4df)__a, (__v4df)__b); } /// Compares two 256-bit vectors of [8 x float] and returns the greater /// of each pair of values. /// /// \headerfile /// /// This intrinsic corresponds to the VMAXPS instruction. /// /// \param __a /// A 256-bit vector of [8 x float] containing one of the operands. /// \param __b /// A 256-bit vector of [8 x float] containing one of the operands. /// \returns A 256-bit vector of [8 x float] containing the maximum values /// between both operands. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_max_ps(__m256 __a, __m256 __b) { return (__m256)__builtin_ia32_maxps256((__v8sf)__a, (__v8sf)__b); } /// Compares two 256-bit vectors of [4 x double] and returns the lesser /// of each pair of values. /// /// \headerfile /// /// This intrinsic corresponds to the VMINPD instruction. /// /// \param __a /// A 256-bit vector of [4 x double] containing one of the operands. /// \param __b /// A 256-bit vector of [4 x double] containing one of the operands. /// \returns A 256-bit vector of [4 x double] containing the minimum values /// between both operands. static __inline __m256d __DEFAULT_FN_ATTRS _mm256_min_pd(__m256d __a, __m256d __b) { return (__m256d)__builtin_ia32_minpd256((__v4df)__a, (__v4df)__b); } /// Compares two 256-bit vectors of [8 x float] and returns the lesser /// of each pair of values. /// /// \headerfile /// /// This intrinsic corresponds to the VMINPS instruction. /// /// \param __a /// A 256-bit vector of [8 x float] containing one of the operands. /// \param __b /// A 256-bit vector of [8 x float] containing one of the operands. /// \returns A 256-bit vector of [8 x float] containing the minimum values /// between both operands. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_min_ps(__m256 __a, __m256 __b) { return (__m256)__builtin_ia32_minps256((__v8sf)__a, (__v8sf)__b); } /// Multiplies two 256-bit vectors of [4 x double]. /// /// \headerfile /// /// This intrinsic corresponds to the VMULPD instruction. /// /// \param __a /// A 256-bit vector of [4 x double] containing one of the operands. /// \param __b /// A 256-bit vector of [4 x double] containing one of the operands. /// \returns A 256-bit vector of [4 x double] containing the products of both /// operands. static __inline __m256d __DEFAULT_FN_ATTRS _mm256_mul_pd(__m256d __a, __m256d __b) { return (__m256d)((__v4df)__a * (__v4df)__b); } /// Multiplies two 256-bit vectors of [8 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VMULPS instruction. /// /// \param __a /// A 256-bit vector of [8 x float] containing one of the operands. /// \param __b /// A 256-bit vector of [8 x float] containing one of the operands. /// \returns A 256-bit vector of [8 x float] containing the products of both /// operands. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_mul_ps(__m256 __a, __m256 __b) { return (__m256)((__v8sf)__a * (__v8sf)__b); } /// Calculates the square roots of the values in a 256-bit vector of /// [4 x double]. /// /// \headerfile /// /// This intrinsic corresponds to the VSQRTPD instruction. /// /// \param __a /// A 256-bit vector of [4 x double]. /// \returns A 256-bit vector of [4 x double] containing the square roots of the /// values in the operand. static __inline __m256d __DEFAULT_FN_ATTRS _mm256_sqrt_pd(__m256d __a) { return (__m256d)__builtin_ia32_sqrtpd256((__v4df)__a); } /// Calculates the square roots of the values in a 256-bit vector of /// [8 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VSQRTPS instruction. /// /// \param __a /// A 256-bit vector of [8 x float]. /// \returns A 256-bit vector of [8 x float] containing the square roots of the /// values in the operand. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_sqrt_ps(__m256 __a) { return (__m256)__builtin_ia32_sqrtps256((__v8sf)__a); } /// Calculates the reciprocal square roots of the values in a 256-bit /// vector of [8 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VRSQRTPS instruction. /// /// \param __a /// A 256-bit vector of [8 x float]. /// \returns A 256-bit vector of [8 x float] containing the reciprocal square /// roots of the values in the operand. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_rsqrt_ps(__m256 __a) { return (__m256)__builtin_ia32_rsqrtps256((__v8sf)__a); } /// Calculates the reciprocals of the values in a 256-bit vector of /// [8 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VRCPPS instruction. /// /// \param __a /// A 256-bit vector of [8 x float]. /// \returns A 256-bit vector of [8 x float] containing the reciprocals of the /// values in the operand. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_rcp_ps(__m256 __a) { return (__m256)__builtin_ia32_rcpps256((__v8sf)__a); } /// Rounds the values in a 256-bit vector of [4 x double] as specified /// by the byte operand. The source values are rounded to integer values and /// returned as 64-bit double-precision floating-point values. /// /// \headerfile /// /// \code /// __m256d _mm256_round_pd(__m256d V, const int M); /// \endcode /// /// This intrinsic corresponds to the VROUNDPD instruction. /// /// \param V /// A 256-bit vector of [4 x double]. /// \param M /// An integer value that specifies the rounding operation. \n /// Bits [7:4] are reserved. \n /// Bit [3] is a precision exception value: \n /// 0: A normal PE exception is used. \n /// 1: The PE field is not updated. \n /// Bit [2] is the rounding control source: \n /// 0: Use bits [1:0] of \a M. \n /// 1: Use the current MXCSR setting. \n /// Bits [1:0] contain the rounding control definition: \n /// 00: Nearest. \n /// 01: Downward (toward negative infinity). \n /// 10: Upward (toward positive infinity). \n /// 11: Truncated. /// \returns A 256-bit vector of [4 x double] containing the rounded values. #define _mm256_round_pd(V, M) \ ((__m256d)__builtin_ia32_roundpd256((__v4df)(__m256d)(V), (M))) /// Rounds the values stored in a 256-bit vector of [8 x float] as /// specified by the byte operand. The source values are rounded to integer /// values and returned as floating-point values. /// /// \headerfile /// /// \code /// __m256 _mm256_round_ps(__m256 V, const int M); /// \endcode /// /// This intrinsic corresponds to the VROUNDPS instruction. /// /// \param V /// A 256-bit vector of [8 x float]. /// \param M /// An integer value that specifies the rounding operation. \n /// Bits [7:4] are reserved. \n /// Bit [3] is a precision exception value: \n /// 0: A normal PE exception is used. \n /// 1: The PE field is not updated. \n /// Bit [2] is the rounding control source: \n /// 0: Use bits [1:0] of \a M. \n /// 1: Use the current MXCSR setting. \n /// Bits [1:0] contain the rounding control definition: \n /// 00: Nearest. \n /// 01: Downward (toward negative infinity). \n /// 10: Upward (toward positive infinity). \n /// 11: Truncated. /// \returns A 256-bit vector of [8 x float] containing the rounded values. #define _mm256_round_ps(V, M) \ ((__m256)__builtin_ia32_roundps256((__v8sf)(__m256)(V), (M))) /// Rounds up the values stored in a 256-bit vector of [4 x double]. The /// source values are rounded up to integer values and returned as 64-bit /// double-precision floating-point values. /// /// \headerfile /// /// \code /// __m256d _mm256_ceil_pd(__m256d V); /// \endcode /// /// This intrinsic corresponds to the VROUNDPD instruction. /// /// \param V /// A 256-bit vector of [4 x double]. /// \returns A 256-bit vector of [4 x double] containing the rounded up values. #define _mm256_ceil_pd(V) _mm256_round_pd((V), _MM_FROUND_CEIL) /// Rounds down the values stored in a 256-bit vector of [4 x double]. /// The source values are rounded down to integer values and returned as /// 64-bit double-precision floating-point values. /// /// \headerfile /// /// \code /// __m256d _mm256_floor_pd(__m256d V); /// \endcode /// /// This intrinsic corresponds to the VROUNDPD instruction. /// /// \param V /// A 256-bit vector of [4 x double]. /// \returns A 256-bit vector of [4 x double] containing the rounded down /// values. #define _mm256_floor_pd(V) _mm256_round_pd((V), _MM_FROUND_FLOOR) /// Rounds up the values stored in a 256-bit vector of [8 x float]. The /// source values are rounded up to integer values and returned as /// floating-point values. /// /// \headerfile /// /// \code /// __m256 _mm256_ceil_ps(__m256 V); /// \endcode /// /// This intrinsic corresponds to the VROUNDPS instruction. /// /// \param V /// A 256-bit vector of [8 x float]. /// \returns A 256-bit vector of [8 x float] containing the rounded up values. #define _mm256_ceil_ps(V) _mm256_round_ps((V), _MM_FROUND_CEIL) /// Rounds down the values stored in a 256-bit vector of [8 x float]. The /// source values are rounded down to integer values and returned as /// floating-point values. /// /// \headerfile /// /// \code /// __m256 _mm256_floor_ps(__m256 V); /// \endcode /// /// This intrinsic corresponds to the VROUNDPS instruction. /// /// \param V /// A 256-bit vector of [8 x float]. /// \returns A 256-bit vector of [8 x float] containing the rounded down values. #define _mm256_floor_ps(V) _mm256_round_ps((V), _MM_FROUND_FLOOR) /* Logical */ /// Performs a bitwise AND of two 256-bit vectors of [4 x double]. /// /// \headerfile /// /// This intrinsic corresponds to the VANDPD instruction. /// /// \param __a /// A 256-bit vector of [4 x double] containing one of the source operands. /// \param __b /// A 256-bit vector of [4 x double] containing one of the source operands. /// \returns A 256-bit vector of [4 x double] containing the bitwise AND of the /// values between both operands. static __inline __m256d __DEFAULT_FN_ATTRS _mm256_and_pd(__m256d __a, __m256d __b) { return (__m256d)((__v4du)__a & (__v4du)__b); } /// Performs a bitwise AND of two 256-bit vectors of [8 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VANDPS instruction. /// /// \param __a /// A 256-bit vector of [8 x float] containing one of the source operands. /// \param __b /// A 256-bit vector of [8 x float] containing one of the source operands. /// \returns A 256-bit vector of [8 x float] containing the bitwise AND of the /// values between both operands. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_and_ps(__m256 __a, __m256 __b) { return (__m256)((__v8su)__a & (__v8su)__b); } /// Performs a bitwise AND of two 256-bit vectors of [4 x double], using /// the one's complement of the values contained in the first source operand. /// /// \headerfile /// /// This intrinsic corresponds to the VANDNPD instruction. /// /// \param __a /// A 256-bit vector of [4 x double] containing the left source operand. The /// one's complement of this value is used in the bitwise AND. /// \param __b /// A 256-bit vector of [4 x double] containing the right source operand. /// \returns A 256-bit vector of [4 x double] containing the bitwise AND of the /// values of the second operand and the one's complement of the first /// operand. static __inline __m256d __DEFAULT_FN_ATTRS _mm256_andnot_pd(__m256d __a, __m256d __b) { return (__m256d)(~(__v4du)__a & (__v4du)__b); } /// Performs a bitwise AND of two 256-bit vectors of [8 x float], using /// the one's complement of the values contained in the first source operand. /// /// \headerfile /// /// This intrinsic corresponds to the VANDNPS instruction. /// /// \param __a /// A 256-bit vector of [8 x float] containing the left source operand. The /// one's complement of this value is used in the bitwise AND. /// \param __b /// A 256-bit vector of [8 x float] containing the right source operand. /// \returns A 256-bit vector of [8 x float] containing the bitwise AND of the /// values of the second operand and the one's complement of the first /// operand. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_andnot_ps(__m256 __a, __m256 __b) { return (__m256)(~(__v8su)__a & (__v8su)__b); } /// Performs a bitwise OR of two 256-bit vectors of [4 x double]. /// /// \headerfile /// /// This intrinsic corresponds to the VORPD instruction. /// /// \param __a /// A 256-bit vector of [4 x double] containing one of the source operands. /// \param __b /// A 256-bit vector of [4 x double] containing one of the source operands. /// \returns A 256-bit vector of [4 x double] containing the bitwise OR of the /// values between both operands. static __inline __m256d __DEFAULT_FN_ATTRS _mm256_or_pd(__m256d __a, __m256d __b) { return (__m256d)((__v4du)__a | (__v4du)__b); } /// Performs a bitwise OR of two 256-bit vectors of [8 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VORPS instruction. /// /// \param __a /// A 256-bit vector of [8 x float] containing one of the source operands. /// \param __b /// A 256-bit vector of [8 x float] containing one of the source operands. /// \returns A 256-bit vector of [8 x float] containing the bitwise OR of the /// values between both operands. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_or_ps(__m256 __a, __m256 __b) { return (__m256)((__v8su)__a | (__v8su)__b); } /// Performs a bitwise XOR of two 256-bit vectors of [4 x double]. /// /// \headerfile /// /// This intrinsic corresponds to the VXORPD instruction. /// /// \param __a /// A 256-bit vector of [4 x double] containing one of the source operands. /// \param __b /// A 256-bit vector of [4 x double] containing one of the source operands. /// \returns A 256-bit vector of [4 x double] containing the bitwise XOR of the /// values between both operands. static __inline __m256d __DEFAULT_FN_ATTRS _mm256_xor_pd(__m256d __a, __m256d __b) { return (__m256d)((__v4du)__a ^ (__v4du)__b); } /// Performs a bitwise XOR of two 256-bit vectors of [8 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VXORPS instruction. /// /// \param __a /// A 256-bit vector of [8 x float] containing one of the source operands. /// \param __b /// A 256-bit vector of [8 x float] containing one of the source operands. /// \returns A 256-bit vector of [8 x float] containing the bitwise XOR of the /// values between both operands. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_xor_ps(__m256 __a, __m256 __b) { return (__m256)((__v8su)__a ^ (__v8su)__b); } /* Horizontal arithmetic */ /// Horizontally adds the adjacent pairs of values contained in two /// 256-bit vectors of [4 x double]. /// /// \headerfile /// /// This intrinsic corresponds to the VHADDPD instruction. /// /// \param __a /// A 256-bit vector of [4 x double] containing one of the source operands. /// The horizontal sums of the values are returned in the even-indexed /// elements of a vector of [4 x double]. /// \param __b /// A 256-bit vector of [4 x double] containing one of the source operands. /// The horizontal sums of the values are returned in the odd-indexed /// elements of a vector of [4 x double]. /// \returns A 256-bit vector of [4 x double] containing the horizontal sums of /// both operands. static __inline __m256d __DEFAULT_FN_ATTRS _mm256_hadd_pd(__m256d __a, __m256d __b) { return (__m256d)__builtin_ia32_haddpd256((__v4df)__a, (__v4df)__b); } /// Horizontally adds the adjacent pairs of values contained in two /// 256-bit vectors of [8 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VHADDPS instruction. /// /// \param __a /// A 256-bit vector of [8 x float] containing one of the source operands. /// The horizontal sums of the values are returned in the elements with /// index 0, 1, 4, 5 of a vector of [8 x float]. /// \param __b /// A 256-bit vector of [8 x float] containing one of the source operands. /// The horizontal sums of the values are returned in the elements with /// index 2, 3, 6, 7 of a vector of [8 x float]. /// \returns A 256-bit vector of [8 x float] containing the horizontal sums of /// both operands. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_hadd_ps(__m256 __a, __m256 __b) { return (__m256)__builtin_ia32_haddps256((__v8sf)__a, (__v8sf)__b); } /// Horizontally subtracts the adjacent pairs of values contained in two /// 256-bit vectors of [4 x double]. /// /// \headerfile /// /// This intrinsic corresponds to the VHSUBPD instruction. /// /// \param __a /// A 256-bit vector of [4 x double] containing one of the source operands. /// The horizontal differences between the values are returned in the /// even-indexed elements of a vector of [4 x double]. /// \param __b /// A 256-bit vector of [4 x double] containing one of the source operands. /// The horizontal differences between the values are returned in the /// odd-indexed elements of a vector of [4 x double]. /// \returns A 256-bit vector of [4 x double] containing the horizontal /// differences of both operands. static __inline __m256d __DEFAULT_FN_ATTRS _mm256_hsub_pd(__m256d __a, __m256d __b) { return (__m256d)__builtin_ia32_hsubpd256((__v4df)__a, (__v4df)__b); } /// Horizontally subtracts the adjacent pairs of values contained in two /// 256-bit vectors of [8 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VHSUBPS instruction. /// /// \param __a /// A 256-bit vector of [8 x float] containing one of the source operands. /// The horizontal differences between the values are returned in the /// elements with index 0, 1, 4, 5 of a vector of [8 x float]. /// \param __b /// A 256-bit vector of [8 x float] containing one of the source operands. /// The horizontal differences between the values are returned in the /// elements with index 2, 3, 6, 7 of a vector of [8 x float]. /// \returns A 256-bit vector of [8 x float] containing the horizontal /// differences of both operands. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_hsub_ps(__m256 __a, __m256 __b) { return (__m256)__builtin_ia32_hsubps256((__v8sf)__a, (__v8sf)__b); } /* Vector permutations */ /// Copies the values in a 128-bit vector of [2 x double] as specified /// by the 128-bit integer vector operand. /// /// \headerfile /// /// This intrinsic corresponds to the VPERMILPD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. /// \param __c /// A 128-bit integer vector operand specifying how the values are to be /// copied. \n /// Bit [1]: \n /// 0: Bits [63:0] of the source are copied to bits [63:0] of the returned /// vector. \n /// 1: Bits [127:64] of the source are copied to bits [63:0] of the /// returned vector. \n /// Bit [65]: \n /// 0: Bits [63:0] of the source are copied to bits [127:64] of the /// returned vector. \n /// 1: Bits [127:64] of the source are copied to bits [127:64] of the /// returned vector. /// \returns A 128-bit vector of [2 x double] containing the copied values. static __inline __m128d __DEFAULT_FN_ATTRS128 _mm_permutevar_pd(__m128d __a, __m128i __c) { return (__m128d)__builtin_ia32_vpermilvarpd((__v2df)__a, (__v2di)__c); } /// Copies the values in a 256-bit vector of [4 x double] as specified /// by the 256-bit integer vector operand. /// /// \headerfile /// /// This intrinsic corresponds to the VPERMILPD instruction. /// /// \param __a /// A 256-bit vector of [4 x double]. /// \param __c /// A 256-bit integer vector operand specifying how the values are to be /// copied. \n /// Bit [1]: \n /// 0: Bits [63:0] of the source are copied to bits [63:0] of the returned /// vector. \n /// 1: Bits [127:64] of the source are copied to bits [63:0] of the /// returned vector. \n /// Bit [65]: \n /// 0: Bits [63:0] of the source are copied to bits [127:64] of the /// returned vector. \n /// 1: Bits [127:64] of the source are copied to bits [127:64] of the /// returned vector. \n /// Bit [129]: \n /// 0: Bits [191:128] of the source are copied to bits [191:128] of the /// returned vector. \n /// 1: Bits [255:192] of the source are copied to bits [191:128] of the /// returned vector. \n /// Bit [193]: \n /// 0: Bits [191:128] of the source are copied to bits [255:192] of the /// returned vector. \n /// 1: Bits [255:192] of the source are copied to bits [255:192] of the /// returned vector. /// \returns A 256-bit vector of [4 x double] containing the copied values. static __inline __m256d __DEFAULT_FN_ATTRS _mm256_permutevar_pd(__m256d __a, __m256i __c) { return (__m256d)__builtin_ia32_vpermilvarpd256((__v4df)__a, (__v4di)__c); } /// Copies the values stored in a 128-bit vector of [4 x float] as /// specified by the 128-bit integer vector operand. /// \headerfile /// /// This intrinsic corresponds to the VPERMILPS instruction. /// /// \param __a /// A 128-bit vector of [4 x float]. /// \param __c /// A 128-bit integer vector operand specifying how the values are to be /// copied. \n /// Bits [1:0]: \n /// 00: Bits [31:0] of the source are copied to bits [31:0] of the /// returned vector. \n /// 01: Bits [63:32] of the source are copied to bits [31:0] of the /// returned vector. \n /// 10: Bits [95:64] of the source are copied to bits [31:0] of the /// returned vector. \n /// 11: Bits [127:96] of the source are copied to bits [31:0] of the /// returned vector. \n /// Bits [33:32]: \n /// 00: Bits [31:0] of the source are copied to bits [63:32] of the /// returned vector. \n /// 01: Bits [63:32] of the source are copied to bits [63:32] of the /// returned vector. \n /// 10: Bits [95:64] of the source are copied to bits [63:32] of the /// returned vector. \n /// 11: Bits [127:96] of the source are copied to bits [63:32] of the /// returned vector. \n /// Bits [65:64]: \n /// 00: Bits [31:0] of the source are copied to bits [95:64] of the /// returned vector. \n /// 01: Bits [63:32] of the source are copied to bits [95:64] of the /// returned vector. \n /// 10: Bits [95:64] of the source are copied to bits [95:64] of the /// returned vector. \n /// 11: Bits [127:96] of the source are copied to bits [95:64] of the /// returned vector. \n /// Bits [97:96]: \n /// 00: Bits [31:0] of the source are copied to bits [127:96] of the /// returned vector. \n /// 01: Bits [63:32] of the source are copied to bits [127:96] of the /// returned vector. \n /// 10: Bits [95:64] of the source are copied to bits [127:96] of the /// returned vector. \n /// 11: Bits [127:96] of the source are copied to bits [127:96] of the /// returned vector. /// \returns A 128-bit vector of [4 x float] containing the copied values. static __inline __m128 __DEFAULT_FN_ATTRS128 _mm_permutevar_ps(__m128 __a, __m128i __c) { return (__m128)__builtin_ia32_vpermilvarps((__v4sf)__a, (__v4si)__c); } /// Copies the values stored in a 256-bit vector of [8 x float] as /// specified by the 256-bit integer vector operand. /// /// \headerfile /// /// This intrinsic corresponds to the VPERMILPS instruction. /// /// \param __a /// A 256-bit vector of [8 x float]. /// \param __c /// A 256-bit integer vector operand specifying how the values are to be /// copied. \n /// Bits [1:0]: \n /// 00: Bits [31:0] of the source are copied to bits [31:0] of the /// returned vector. \n /// 01: Bits [63:32] of the source are copied to bits [31:0] of the /// returned vector. \n /// 10: Bits [95:64] of the source are copied to bits [31:0] of the /// returned vector. \n /// 11: Bits [127:96] of the source are copied to bits [31:0] of the /// returned vector. \n /// Bits [33:32]: \n /// 00: Bits [31:0] of the source are copied to bits [63:32] of the /// returned vector. \n /// 01: Bits [63:32] of the source are copied to bits [63:32] of the /// returned vector. \n /// 10: Bits [95:64] of the source are copied to bits [63:32] of the /// returned vector. \n /// 11: Bits [127:96] of the source are copied to bits [63:32] of the /// returned vector. \n /// Bits [65:64]: \n /// 00: Bits [31:0] of the source are copied to bits [95:64] of the /// returned vector. \n /// 01: Bits [63:32] of the source are copied to bits [95:64] of the /// returned vector. \n /// 10: Bits [95:64] of the source are copied to bits [95:64] of the /// returned vector. \n /// 11: Bits [127:96] of the source are copied to bits [95:64] of the /// returned vector. \n /// Bits [97:96]: \n /// 00: Bits [31:0] of the source are copied to bits [127:96] of the /// returned vector. \n /// 01: Bits [63:32] of the source are copied to bits [127:96] of the /// returned vector. \n /// 10: Bits [95:64] of the source are copied to bits [127:96] of the /// returned vector. \n /// 11: Bits [127:96] of the source are copied to bits [127:96] of the /// returned vector. \n /// Bits [129:128]: \n /// 00: Bits [159:128] of the source are copied to bits [159:128] of the /// returned vector. \n /// 01: Bits [191:160] of the source are copied to bits [159:128] of the /// returned vector. \n /// 10: Bits [223:192] of the source are copied to bits [159:128] of the /// returned vector. \n /// 11: Bits [255:224] of the source are copied to bits [159:128] of the /// returned vector. \n /// Bits [161:160]: \n /// 00: Bits [159:128] of the source are copied to bits [191:160] of the /// returned vector. \n /// 01: Bits [191:160] of the source are copied to bits [191:160] of the /// returned vector. \n /// 10: Bits [223:192] of the source are copied to bits [191:160] of the /// returned vector. \n /// 11: Bits [255:224] of the source are copied to bits [191:160] of the /// returned vector. \n /// Bits [193:192]: \n /// 00: Bits [159:128] of the source are copied to bits [223:192] of the /// returned vector. \n /// 01: Bits [191:160] of the source are copied to bits [223:192] of the /// returned vector. \n /// 10: Bits [223:192] of the source are copied to bits [223:192] of the /// returned vector. \n /// 11: Bits [255:224] of the source are copied to bits [223:192] of the /// returned vector. \n /// Bits [225:224]: \n /// 00: Bits [159:128] of the source are copied to bits [255:224] of the /// returned vector. \n /// 01: Bits [191:160] of the source are copied to bits [255:224] of the /// returned vector. \n /// 10: Bits [223:192] of the source are copied to bits [255:224] of the /// returned vector. \n /// 11: Bits [255:224] of the source are copied to bits [255:224] of the /// returned vector. /// \returns A 256-bit vector of [8 x float] containing the copied values. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_permutevar_ps(__m256 __a, __m256i __c) { return (__m256)__builtin_ia32_vpermilvarps256((__v8sf)__a, (__v8si)__c); } /// Copies the values in a 128-bit vector of [2 x double] as specified /// by the immediate integer operand. /// /// \headerfile /// /// \code /// __m128d _mm_permute_pd(__m128d A, const int C); /// \endcode /// /// This intrinsic corresponds to the VPERMILPD instruction. /// /// \param A /// A 128-bit vector of [2 x double]. /// \param C /// An immediate integer operand specifying how the values are to be /// copied. \n /// Bit [0]: \n /// 0: Bits [63:0] of the source are copied to bits [63:0] of the returned /// vector. \n /// 1: Bits [127:64] of the source are copied to bits [63:0] of the /// returned vector. \n /// Bit [1]: \n /// 0: Bits [63:0] of the source are copied to bits [127:64] of the /// returned vector. \n /// 1: Bits [127:64] of the source are copied to bits [127:64] of the /// returned vector. /// \returns A 128-bit vector of [2 x double] containing the copied values. #define _mm_permute_pd(A, C) \ ((__m128d)__builtin_ia32_vpermilpd((__v2df)(__m128d)(A), (int)(C))) /// Copies the values in a 256-bit vector of [4 x double] as specified by /// the immediate integer operand. /// /// \headerfile /// /// \code /// __m256d _mm256_permute_pd(__m256d A, const int C); /// \endcode /// /// This intrinsic corresponds to the VPERMILPD instruction. /// /// \param A /// A 256-bit vector of [4 x double]. /// \param C /// An immediate integer operand specifying how the values are to be /// copied. \n /// Bit [0]: \n /// 0: Bits [63:0] of the source are copied to bits [63:0] of the returned /// vector. \n /// 1: Bits [127:64] of the source are copied to bits [63:0] of the /// returned vector. \n /// Bit [1]: \n /// 0: Bits [63:0] of the source are copied to bits [127:64] of the /// returned vector. \n /// 1: Bits [127:64] of the source are copied to bits [127:64] of the /// returned vector. \n /// Bit [2]: \n /// 0: Bits [191:128] of the source are copied to bits [191:128] of the /// returned vector. \n /// 1: Bits [255:192] of the source are copied to bits [191:128] of the /// returned vector. \n /// Bit [3]: \n /// 0: Bits [191:128] of the source are copied to bits [255:192] of the /// returned vector. \n /// 1: Bits [255:192] of the source are copied to bits [255:192] of the /// returned vector. /// \returns A 256-bit vector of [4 x double] containing the copied values. #define _mm256_permute_pd(A, C) \ ((__m256d)__builtin_ia32_vpermilpd256((__v4df)(__m256d)(A), (int)(C))) /// Copies the values in a 128-bit vector of [4 x float] as specified by /// the immediate integer operand. /// /// \headerfile /// /// \code /// __m128 _mm_permute_ps(__m128 A, const int C); /// \endcode /// /// This intrinsic corresponds to the VPERMILPS instruction. /// /// \param A /// A 128-bit vector of [4 x float]. /// \param C /// An immediate integer operand specifying how the values are to be /// copied. \n /// Bits [1:0]: \n /// 00: Bits [31:0] of the source are copied to bits [31:0] of the /// returned vector. \n /// 01: Bits [63:32] of the source are copied to bits [31:0] of the /// returned vector. \n /// 10: Bits [95:64] of the source are copied to bits [31:0] of the /// returned vector. \n /// 11: Bits [127:96] of the source are copied to bits [31:0] of the /// returned vector. \n /// Bits [3:2]: \n /// 00: Bits [31:0] of the source are copied to bits [63:32] of the /// returned vector. \n /// 01: Bits [63:32] of the source are copied to bits [63:32] of the /// returned vector. \n /// 10: Bits [95:64] of the source are copied to bits [63:32] of the /// returned vector. \n /// 11: Bits [127:96] of the source are copied to bits [63:32] of the /// returned vector. \n /// Bits [5:4]: \n /// 00: Bits [31:0] of the source are copied to bits [95:64] of the /// returned vector. \n /// 01: Bits [63:32] of the source are copied to bits [95:64] of the /// returned vector. \n /// 10: Bits [95:64] of the source are copied to bits [95:64] of the /// returned vector. \n /// 11: Bits [127:96] of the source are copied to bits [95:64] of the /// returned vector. \n /// Bits [7:6]: \n /// 00: Bits [31:0] of the source are copied to bits [127:96] of the /// returned vector. \n /// 01: Bits [63:32] of the source are copied to bits [127:96] of the /// returned vector. \n /// 10: Bits [95:64] of the source are copied to bits [127:96] of the /// returned vector. \n /// 11: Bits [127:96] of the source are copied to bits [127:96] of the /// returned vector. /// \returns A 128-bit vector of [4 x float] containing the copied values. #define _mm_permute_ps(A, C) \ ((__m128)__builtin_ia32_vpermilps((__v4sf)(__m128)(A), (int)(C))) /// Copies the values in a 256-bit vector of [8 x float] as specified by /// the immediate integer operand. /// /// \headerfile /// /// \code /// __m256 _mm256_permute_ps(__m256 A, const int C); /// \endcode /// /// This intrinsic corresponds to the VPERMILPS instruction. /// /// \param A /// A 256-bit vector of [8 x float]. /// \param C /// An immediate integer operand specifying how the values are to be /// copied. \n /// Bits [1:0]: \n /// 00: Bits [31:0] of the source are copied to bits [31:0] of the /// returned vector. \n /// 01: Bits [63:32] of the source are copied to bits [31:0] of the /// returned vector. \n /// 10: Bits [95:64] of the source are copied to bits [31:0] of the /// returned vector. \n /// 11: Bits [127:96] of the source are copied to bits [31:0] of the /// returned vector. \n /// Bits [3:2]: \n /// 00: Bits [31:0] of the source are copied to bits [63:32] of the /// returned vector. \n /// 01: Bits [63:32] of the source are copied to bits [63:32] of the /// returned vector. \n /// 10: Bits [95:64] of the source are copied to bits [63:32] of the /// returned vector. \n /// 11: Bits [127:96] of the source are copied to bits [63:32] of the /// returned vector. \n /// Bits [5:4]: \n /// 00: Bits [31:0] of the source are copied to bits [95:64] of the /// returned vector. \n /// 01: Bits [63:32] of the source are copied to bits [95:64] of the /// returned vector. \n /// 10: Bits [95:64] of the source are copied to bits [95:64] of the /// returned vector. \n /// 11: Bits [127:96] of the source are copied to bits [95:64] of the /// returned vector. \n /// Bits [7:6]: \n /// 00: Bits [31:0] of the source are copied to bits [127:96] of the /// returned vector. \n /// 01: Bits [63:32] of the source are copied to bits [127:96] of the /// returned vector. \n /// 10: Bits [95:64] of the source are copied to bits [127:96] of the /// returned vector. \n /// 11: Bits [127:96] of the source are copied to bits [127:96] of the /// returned vector. \n /// Bits [1:0]: \n /// 00: Bits [159:128] of the source are copied to bits [159:128] of the /// returned vector. \n /// 01: Bits [191:160] of the source are copied to bits [159:128] of the /// returned vector. \n /// 10: Bits [223:192] of the source are copied to bits [159:128] of the /// returned vector. \n /// 11: Bits [255:224] of the source are copied to bits [159:128] of the /// returned vector. \n /// Bits [3:2]: \n /// 00: Bits [159:128] of the source are copied to bits [191:160] of the /// returned vector. \n /// 01: Bits [191:160] of the source are copied to bits [191:160] of the /// returned vector. \n /// 10: Bits [223:192] of the source are copied to bits [191:160] of the /// returned vector. \n /// 11: Bits [255:224] of the source are copied to bits [191:160] of the /// returned vector. \n /// Bits [5:4]: \n /// 00: Bits [159:128] of the source are copied to bits [223:192] of the /// returned vector. \n /// 01: Bits [191:160] of the source are copied to bits [223:192] of the /// returned vector. \n /// 10: Bits [223:192] of the source are copied to bits [223:192] of the /// returned vector. \n /// 11: Bits [255:224] of the source are copied to bits [223:192] of the /// returned vector. \n /// Bits [7:6]: \n /// 00: Bits [159:128] of the source are copied to bits [255:224] of the /// returned vector. \n /// 01: Bits [191:160] of the source are copied to bits [255:224] of the /// returned vector. \n /// 10: Bits [223:192] of the source are copied to bits [255:224] of the /// returned vector. \n /// 11: Bits [255:224] of the source are copied to bits [255:224] of the /// returned vector. /// \returns A 256-bit vector of [8 x float] containing the copied values. #define _mm256_permute_ps(A, C) \ ((__m256)__builtin_ia32_vpermilps256((__v8sf)(__m256)(A), (int)(C))) /// Permutes 128-bit data values stored in two 256-bit vectors of /// [4 x double], as specified by the immediate integer operand. /// /// \headerfile /// /// \code /// __m256d _mm256_permute2f128_pd(__m256d V1, __m256d V2, const int M); /// \endcode /// /// This intrinsic corresponds to the VPERM2F128 instruction. /// /// \param V1 /// A 256-bit vector of [4 x double]. /// \param V2 /// A 256-bit vector of [4 x double. /// \param M /// An immediate integer operand specifying how the values are to be /// permuted. \n /// Bits [1:0]: \n /// 00: Bits [127:0] of operand \a V1 are copied to bits [127:0] of the /// destination. \n /// 01: Bits [255:128] of operand \a V1 are copied to bits [127:0] of the /// destination. \n /// 10: Bits [127:0] of operand \a V2 are copied to bits [127:0] of the /// destination. \n /// 11: Bits [255:128] of operand \a V2 are copied to bits [127:0] of the /// destination. \n /// Bits [5:4]: \n /// 00: Bits [127:0] of operand \a V1 are copied to bits [255:128] of the /// destination. \n /// 01: Bits [255:128] of operand \a V1 are copied to bits [255:128] of the /// destination. \n /// 10: Bits [127:0] of operand \a V2 are copied to bits [255:128] of the /// destination. \n /// 11: Bits [255:128] of operand \a V2 are copied to bits [255:128] of the /// destination. /// \returns A 256-bit vector of [4 x double] containing the copied values. #define _mm256_permute2f128_pd(V1, V2, M) \ ((__m256d)__builtin_ia32_vperm2f128_pd256((__v4df)(__m256d)(V1), \ (__v4df)(__m256d)(V2), (int)(M))) /// Permutes 128-bit data values stored in two 256-bit vectors of /// [8 x float], as specified by the immediate integer operand. /// /// \headerfile /// /// \code /// __m256 _mm256_permute2f128_ps(__m256 V1, __m256 V2, const int M); /// \endcode /// /// This intrinsic corresponds to the VPERM2F128 instruction. /// /// \param V1 /// A 256-bit vector of [8 x float]. /// \param V2 /// A 256-bit vector of [8 x float]. /// \param M /// An immediate integer operand specifying how the values are to be /// permuted. \n /// Bits [1:0]: \n /// 00: Bits [127:0] of operand \a V1 are copied to bits [127:0] of the /// destination. \n /// 01: Bits [255:128] of operand \a V1 are copied to bits [127:0] of the /// destination. \n /// 10: Bits [127:0] of operand \a V2 are copied to bits [127:0] of the /// destination. \n /// 11: Bits [255:128] of operand \a V2 are copied to bits [127:0] of the /// destination. \n /// Bits [5:4]: \n /// 00: Bits [127:0] of operand \a V1 are copied to bits [255:128] of the /// destination. \n /// 01: Bits [255:128] of operand \a V1 are copied to bits [255:128] of the /// destination. \n /// 10: Bits [127:0] of operand \a V2 are copied to bits [255:128] of the /// destination. \n /// 11: Bits [255:128] of operand \a V2 are copied to bits [255:128] of the /// destination. /// \returns A 256-bit vector of [8 x float] containing the copied values. #define _mm256_permute2f128_ps(V1, V2, M) \ ((__m256)__builtin_ia32_vperm2f128_ps256((__v8sf)(__m256)(V1), \ (__v8sf)(__m256)(V2), (int)(M))) /// Permutes 128-bit data values stored in two 256-bit integer vectors, /// as specified by the immediate integer operand. /// /// \headerfile /// /// \code /// __m256i _mm256_permute2f128_si256(__m256i V1, __m256i V2, const int M); /// \endcode /// /// This intrinsic corresponds to the VPERM2F128 instruction. /// /// \param V1 /// A 256-bit integer vector. /// \param V2 /// A 256-bit integer vector. /// \param M /// An immediate integer operand specifying how the values are to be copied. /// Bits [1:0]: \n /// 00: Bits [127:0] of operand \a V1 are copied to bits [127:0] of the /// destination. \n /// 01: Bits [255:128] of operand \a V1 are copied to bits [127:0] of the /// destination. \n /// 10: Bits [127:0] of operand \a V2 are copied to bits [127:0] of the /// destination. \n /// 11: Bits [255:128] of operand \a V2 are copied to bits [127:0] of the /// destination. \n /// Bits [5:4]: \n /// 00: Bits [127:0] of operand \a V1 are copied to bits [255:128] of the /// destination. \n /// 01: Bits [255:128] of operand \a V1 are copied to bits [255:128] of the /// destination. \n /// 10: Bits [127:0] of operand \a V2 are copied to bits [255:128] of the /// destination. \n /// 11: Bits [255:128] of operand \a V2 are copied to bits [255:128] of the /// destination. /// \returns A 256-bit integer vector containing the copied values. #define _mm256_permute2f128_si256(V1, V2, M) \ ((__m256i)__builtin_ia32_vperm2f128_si256((__v8si)(__m256i)(V1), \ (__v8si)(__m256i)(V2), (int)(M))) /* Vector Blend */ /// Merges 64-bit double-precision data values stored in either of the /// two 256-bit vectors of [4 x double], as specified by the immediate /// integer operand. /// /// \headerfile /// /// \code /// __m256d _mm256_blend_pd(__m256d V1, __m256d V2, const int M); /// \endcode /// /// This intrinsic corresponds to the VBLENDPD instruction. /// /// \param V1 /// A 256-bit vector of [4 x double]. /// \param V2 /// A 256-bit vector of [4 x double]. /// \param M /// An immediate integer operand, with mask bits [3:0] specifying how the /// values are to be copied. The position of the mask bit corresponds to the /// index of a copied value. When a mask bit is 0, the corresponding 64-bit /// element in operand \a V1 is copied to the same position in the /// destination. When a mask bit is 1, the corresponding 64-bit element in /// operand \a V2 is copied to the same position in the destination. /// \returns A 256-bit vector of [4 x double] containing the copied values. #define _mm256_blend_pd(V1, V2, M) \ ((__m256d)__builtin_ia32_blendpd256((__v4df)(__m256d)(V1), \ (__v4df)(__m256d)(V2), (int)(M))) /// Merges 32-bit single-precision data values stored in either of the /// two 256-bit vectors of [8 x float], as specified by the immediate /// integer operand. /// /// \headerfile /// /// \code /// __m256 _mm256_blend_ps(__m256 V1, __m256 V2, const int M); /// \endcode /// /// This intrinsic corresponds to the VBLENDPS instruction. /// /// \param V1 /// A 256-bit vector of [8 x float]. /// \param V2 /// A 256-bit vector of [8 x float]. /// \param M /// An immediate integer operand, with mask bits [7:0] specifying how the /// values are to be copied. The position of the mask bit corresponds to the /// index of a copied value. When a mask bit is 0, the corresponding 32-bit /// element in operand \a V1 is copied to the same position in the /// destination. When a mask bit is 1, the corresponding 32-bit element in /// operand \a V2 is copied to the same position in the destination. /// \returns A 256-bit vector of [8 x float] containing the copied values. #define _mm256_blend_ps(V1, V2, M) \ ((__m256)__builtin_ia32_blendps256((__v8sf)(__m256)(V1), \ (__v8sf)(__m256)(V2), (int)(M))) /// Merges 64-bit double-precision data values stored in either of the /// two 256-bit vectors of [4 x double], as specified by the 256-bit vector /// operand. /// /// \headerfile /// /// This intrinsic corresponds to the VBLENDVPD instruction. /// /// \param __a /// A 256-bit vector of [4 x double]. /// \param __b /// A 256-bit vector of [4 x double]. /// \param __c /// A 256-bit vector operand, with mask bits 255, 191, 127, and 63 specifying /// how the values are to be copied. The position of the mask bit corresponds /// to the most significant bit of a copied value. When a mask bit is 0, the /// corresponding 64-bit element in operand \a __a is copied to the same /// position in the destination. When a mask bit is 1, the corresponding /// 64-bit element in operand \a __b is copied to the same position in the /// destination. /// \returns A 256-bit vector of [4 x double] containing the copied values. static __inline __m256d __DEFAULT_FN_ATTRS _mm256_blendv_pd(__m256d __a, __m256d __b, __m256d __c) { return (__m256d)__builtin_ia32_blendvpd256( (__v4df)__a, (__v4df)__b, (__v4df)__c); } /// Merges 32-bit single-precision data values stored in either of the /// two 256-bit vectors of [8 x float], as specified by the 256-bit vector /// operand. /// /// \headerfile /// /// This intrinsic corresponds to the VBLENDVPS instruction. /// /// \param __a /// A 256-bit vector of [8 x float]. /// \param __b /// A 256-bit vector of [8 x float]. /// \param __c /// A 256-bit vector operand, with mask bits 255, 223, 191, 159, 127, 95, 63, /// and 31 specifying how the values are to be copied. The position of the /// mask bit corresponds to the most significant bit of a copied value. When /// a mask bit is 0, the corresponding 32-bit element in operand \a __a is /// copied to the same position in the destination. When a mask bit is 1, the /// corresponding 32-bit element in operand \a __b is copied to the same /// position in the destination. /// \returns A 256-bit vector of [8 x float] containing the copied values. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c) { return (__m256)__builtin_ia32_blendvps256( (__v8sf)__a, (__v8sf)__b, (__v8sf)__c); } /* Vector Dot Product */ /// Computes two dot products in parallel, using the lower and upper /// halves of two [8 x float] vectors as input to the two computations, and /// returning the two dot products in the lower and upper halves of the /// [8 x float] result. /// /// The immediate integer operand controls which input elements will /// contribute to the dot product, and where the final results are returned. /// In general, for each dot product, the four corresponding elements of the /// input vectors are multiplied; the first two and second two products are /// summed, then the two sums are added to form the final result. /// /// \headerfile /// /// \code /// __m256 _mm256_dp_ps(__m256 V1, __m256 V2, const int M); /// \endcode /// /// This intrinsic corresponds to the VDPPS instruction. /// /// \param V1 /// A vector of [8 x float] values, treated as two [4 x float] vectors. /// \param V2 /// A vector of [8 x float] values, treated as two [4 x float] vectors. /// \param M /// An immediate integer argument. Bits [7:4] determine which elements of /// the input vectors are used, with bit [4] corresponding to the lowest /// element and bit [7] corresponding to the highest element of each [4 x /// float] subvector. If a bit is set, the corresponding elements from the /// two input vectors are used as an input for dot product; otherwise that /// input is treated as zero. Bits [3:0] determine which elements of the /// result will receive a copy of the final dot product, with bit [0] /// corresponding to the lowest element and bit [3] corresponding to the /// highest element of each [4 x float] subvector. If a bit is set, the dot /// product is returned in the corresponding element; otherwise that element /// is set to zero. The bitmask is applied in the same way to each of the /// two parallel dot product computations. /// \returns A 256-bit vector of [8 x float] containing the two dot products. #define _mm256_dp_ps(V1, V2, M) \ ((__m256)__builtin_ia32_dpps256((__v8sf)(__m256)(V1), \ (__v8sf)(__m256)(V2), (M))) /* Vector shuffle */ /// Selects 8 float values from the 256-bit operands of [8 x float], as /// specified by the immediate value operand. /// /// The four selected elements in each operand are copied to the destination /// according to the bits specified in the immediate operand. The selected /// elements from the first 256-bit operand are copied to bits [63:0] and /// bits [191:128] of the destination, and the selected elements from the /// second 256-bit operand are copied to bits [127:64] and bits [255:192] of /// the destination. For example, if bits [7:0] of the immediate operand /// contain a value of 0xFF, the 256-bit destination vector would contain the /// following values: b[7], b[7], a[7], a[7], b[3], b[3], a[3], a[3]. /// /// \headerfile /// /// \code /// __m256 _mm256_shuffle_ps(__m256 a, __m256 b, const int mask); /// \endcode /// /// This intrinsic corresponds to the VSHUFPS instruction. /// /// \param a /// A 256-bit vector of [8 x float]. The four selected elements in this /// operand are copied to bits [63:0] and bits [191:128] in the destination, /// according to the bits specified in the immediate operand. /// \param b /// A 256-bit vector of [8 x float]. The four selected elements in this /// operand are copied to bits [127:64] and bits [255:192] in the /// destination, according to the bits specified in the immediate operand. /// \param mask /// An immediate value containing an 8-bit value specifying which elements to /// copy from \a a and \a b \n. /// Bits [3:0] specify the values copied from operand \a a. \n /// Bits [7:4] specify the values copied from operand \a b. \n /// The destinations within the 256-bit destination are assigned values as /// follows, according to the bit value assignments described below: \n /// Bits [1:0] are used to assign values to bits [31:0] and [159:128] in the /// destination. \n /// Bits [3:2] are used to assign values to bits [63:32] and [191:160] in the /// destination. \n /// Bits [5:4] are used to assign values to bits [95:64] and [223:192] in the /// destination. \n /// Bits [7:6] are used to assign values to bits [127:96] and [255:224] in /// the destination. \n /// Bit value assignments: \n /// 00: Bits [31:0] and [159:128] are copied from the selected operand. \n /// 01: Bits [63:32] and [191:160] are copied from the selected operand. \n /// 10: Bits [95:64] and [223:192] are copied from the selected operand. \n /// 11: Bits [127:96] and [255:224] are copied from the selected operand. \n /// Note: To generate a mask, you can use the \c _MM_SHUFFLE macro. /// _MM_SHUFFLE(b6, b4, b2, b0) can create an 8-bit mask of the form /// [b6, b4, b2, b0]. /// \returns A 256-bit vector of [8 x float] containing the shuffled values. #define _mm256_shuffle_ps(a, b, mask) \ ((__m256)__builtin_ia32_shufps256((__v8sf)(__m256)(a), \ (__v8sf)(__m256)(b), (int)(mask))) /// Selects four double-precision values from the 256-bit operands of /// [4 x double], as specified by the immediate value operand. /// /// The selected elements from the first 256-bit operand are copied to bits /// [63:0] and bits [191:128] in the destination, and the selected elements /// from the second 256-bit operand are copied to bits [127:64] and bits /// [255:192] in the destination. For example, if bits [3:0] of the immediate /// operand contain a value of 0xF, the 256-bit destination vector would /// contain the following values: b[3], a[3], b[1], a[1]. /// /// \headerfile /// /// \code /// __m256d _mm256_shuffle_pd(__m256d a, __m256d b, const int mask); /// \endcode /// /// This intrinsic corresponds to the VSHUFPD instruction. /// /// \param a /// A 256-bit vector of [4 x double]. /// \param b /// A 256-bit vector of [4 x double]. /// \param mask /// An immediate value containing 8-bit values specifying which elements to /// copy from \a a and \a b: \n /// Bit [0]=0: Bits [63:0] are copied from \a a to bits [63:0] of the /// destination. \n /// Bit [0]=1: Bits [127:64] are copied from \a a to bits [63:0] of the /// destination. \n /// Bit [1]=0: Bits [63:0] are copied from \a b to bits [127:64] of the /// destination. \n /// Bit [1]=1: Bits [127:64] are copied from \a b to bits [127:64] of the /// destination. \n /// Bit [2]=0: Bits [191:128] are copied from \a a to bits [191:128] of the /// destination. \n /// Bit [2]=1: Bits [255:192] are copied from \a a to bits [191:128] of the /// destination. \n /// Bit [3]=0: Bits [191:128] are copied from \a b to bits [255:192] of the /// destination. \n /// Bit [3]=1: Bits [255:192] are copied from \a b to bits [255:192] of the /// destination. /// \returns A 256-bit vector of [4 x double] containing the shuffled values. #define _mm256_shuffle_pd(a, b, mask) \ ((__m256d)__builtin_ia32_shufpd256((__v4df)(__m256d)(a), \ (__v4df)(__m256d)(b), (int)(mask))) /* Compare */ #define _CMP_EQ_UQ 0x08 /* Equal (unordered, non-signaling) */ #define _CMP_NGE_US 0x09 /* Not-greater-than-or-equal (unordered, signaling) */ #define _CMP_NGT_US 0x0a /* Not-greater-than (unordered, signaling) */ #define _CMP_FALSE_OQ 0x0b /* False (ordered, non-signaling) */ #define _CMP_NEQ_OQ 0x0c /* Not-equal (ordered, non-signaling) */ #define _CMP_GE_OS 0x0d /* Greater-than-or-equal (ordered, signaling) */ #define _CMP_GT_OS 0x0e /* Greater-than (ordered, signaling) */ #define _CMP_TRUE_UQ 0x0f /* True (unordered, non-signaling) */ #define _CMP_EQ_OS 0x10 /* Equal (ordered, signaling) */ #define _CMP_LT_OQ 0x11 /* Less-than (ordered, non-signaling) */ #define _CMP_LE_OQ 0x12 /* Less-than-or-equal (ordered, non-signaling) */ #define _CMP_UNORD_S 0x13 /* Unordered (signaling) */ #define _CMP_NEQ_US 0x14 /* Not-equal (unordered, signaling) */ #define _CMP_NLT_UQ 0x15 /* Not-less-than (unordered, non-signaling) */ #define _CMP_NLE_UQ 0x16 /* Not-less-than-or-equal (unordered, non-signaling) */ #define _CMP_ORD_S 0x17 /* Ordered (signaling) */ #define _CMP_EQ_US 0x18 /* Equal (unordered, signaling) */ #define _CMP_NGE_UQ 0x19 /* Not-greater-than-or-equal (unordered, non-signaling) */ #define _CMP_NGT_UQ 0x1a /* Not-greater-than (unordered, non-signaling) */ #define _CMP_FALSE_OS 0x1b /* False (ordered, signaling) */ #define _CMP_NEQ_OS 0x1c /* Not-equal (ordered, signaling) */ #define _CMP_GE_OQ 0x1d /* Greater-than-or-equal (ordered, non-signaling) */ #define _CMP_GT_OQ 0x1e /* Greater-than (ordered, non-signaling) */ #define _CMP_TRUE_US 0x1f /* True (unordered, signaling) */ /* Below intrinsic defined in emmintrin.h can be used for AVX */ /// Compares each of the corresponding double-precision values of two /// 128-bit vectors of [2 x double], using the operation specified by the /// immediate integer operand. /// /// Returns a [2 x double] vector consisting of two doubles corresponding to /// the two comparison results: zero if the comparison is false, and all 1's /// if the comparison is true. /// /// \headerfile /// /// \code /// __m128d _mm_cmp_pd(__m128d a, __m128d b, const int c); /// \endcode /// /// This intrinsic corresponds to the VCMPPD instruction. /// /// \param a /// A 128-bit vector of [2 x double]. /// \param b /// A 128-bit vector of [2 x double]. /// \param c /// An immediate integer operand, with bits [4:0] specifying which comparison /// operation to use: \n /// 0x00: Equal (ordered, non-signaling) \n /// 0x01: Less-than (ordered, signaling) \n /// 0x02: Less-than-or-equal (ordered, signaling) \n /// 0x03: Unordered (non-signaling) \n /// 0x04: Not-equal (unordered, non-signaling) \n /// 0x05: Not-less-than (unordered, signaling) \n /// 0x06: Not-less-than-or-equal (unordered, signaling) \n /// 0x07: Ordered (non-signaling) \n /// 0x08: Equal (unordered, non-signaling) \n /// 0x09: Not-greater-than-or-equal (unordered, signaling) \n /// 0x0A: Not-greater-than (unordered, signaling) \n /// 0x0B: False (ordered, non-signaling) \n /// 0x0C: Not-equal (ordered, non-signaling) \n /// 0x0D: Greater-than-or-equal (ordered, signaling) \n /// 0x0E: Greater-than (ordered, signaling) \n /// 0x0F: True (unordered, non-signaling) \n /// 0x10: Equal (ordered, signaling) \n /// 0x11: Less-than (ordered, non-signaling) \n /// 0x12: Less-than-or-equal (ordered, non-signaling) \n /// 0x13: Unordered (signaling) \n /// 0x14: Not-equal (unordered, signaling) \n /// 0x15: Not-less-than (unordered, non-signaling) \n /// 0x16: Not-less-than-or-equal (unordered, non-signaling) \n /// 0x17: Ordered (signaling) \n /// 0x18: Equal (unordered, signaling) \n /// 0x19: Not-greater-than-or-equal (unordered, non-signaling) \n /// 0x1A: Not-greater-than (unordered, non-signaling) \n /// 0x1B: False (ordered, signaling) \n /// 0x1C: Not-equal (ordered, signaling) \n /// 0x1D: Greater-than-or-equal (ordered, non-signaling) \n /// 0x1E: Greater-than (ordered, non-signaling) \n /// 0x1F: True (unordered, signaling) /// \returns A 128-bit vector of [2 x double] containing the comparison results. /// \fn __m128d _mm_cmp_pd(__m128d a, __m128d b, const int c) /* Below intrinsic defined in xmmintrin.h can be used for AVX */ /// Compares each of the corresponding values of two 128-bit vectors of /// [4 x float], using the operation specified by the immediate integer /// operand. /// /// Returns a [4 x float] vector consisting of four floats corresponding to /// the four comparison results: zero if the comparison is false, and all 1's /// if the comparison is true. /// /// \headerfile /// /// \code /// __m128 _mm_cmp_ps(__m128 a, __m128 b, const int c); /// \endcode /// /// This intrinsic corresponds to the VCMPPS instruction. /// /// \param a /// A 128-bit vector of [4 x float]. /// \param b /// A 128-bit vector of [4 x float]. /// \param c /// An immediate integer operand, with bits [4:0] specifying which comparison /// operation to use: \n /// 0x00: Equal (ordered, non-signaling) \n /// 0x01: Less-than (ordered, signaling) \n /// 0x02: Less-than-or-equal (ordered, signaling) \n /// 0x03: Unordered (non-signaling) \n /// 0x04: Not-equal (unordered, non-signaling) \n /// 0x05: Not-less-than (unordered, signaling) \n /// 0x06: Not-less-than-or-equal (unordered, signaling) \n /// 0x07: Ordered (non-signaling) \n /// 0x08: Equal (unordered, non-signaling) \n /// 0x09: Not-greater-than-or-equal (unordered, signaling) \n /// 0x0A: Not-greater-than (unordered, signaling) \n /// 0x0B: False (ordered, non-signaling) \n /// 0x0C: Not-equal (ordered, non-signaling) \n /// 0x0D: Greater-than-or-equal (ordered, signaling) \n /// 0x0E: Greater-than (ordered, signaling) \n /// 0x0F: True (unordered, non-signaling) \n /// 0x10: Equal (ordered, signaling) \n /// 0x11: Less-than (ordered, non-signaling) \n /// 0x12: Less-than-or-equal (ordered, non-signaling) \n /// 0x13: Unordered (signaling) \n /// 0x14: Not-equal (unordered, signaling) \n /// 0x15: Not-less-than (unordered, non-signaling) \n /// 0x16: Not-less-than-or-equal (unordered, non-signaling) \n /// 0x17: Ordered (signaling) \n /// 0x18: Equal (unordered, signaling) \n /// 0x19: Not-greater-than-or-equal (unordered, non-signaling) \n /// 0x1A: Not-greater-than (unordered, non-signaling) \n /// 0x1B: False (ordered, signaling) \n /// 0x1C: Not-equal (ordered, signaling) \n /// 0x1D: Greater-than-or-equal (ordered, non-signaling) \n /// 0x1E: Greater-than (ordered, non-signaling) \n /// 0x1F: True (unordered, signaling) /// \returns A 128-bit vector of [4 x float] containing the comparison results. /// \fn __m128 _mm_cmp_ps(__m128 a, __m128 b, const int c) /// Compares each of the corresponding double-precision values of two /// 256-bit vectors of [4 x double], using the operation specified by the /// immediate integer operand. /// /// Returns a [4 x double] vector consisting of four doubles corresponding to /// the four comparison results: zero if the comparison is false, and all 1's /// if the comparison is true. /// /// \headerfile /// /// \code /// __m256d _mm256_cmp_pd(__m256d a, __m256d b, const int c); /// \endcode /// /// This intrinsic corresponds to the VCMPPD instruction. /// /// \param a /// A 256-bit vector of [4 x double]. /// \param b /// A 256-bit vector of [4 x double]. /// \param c /// An immediate integer operand, with bits [4:0] specifying which comparison /// operation to use: \n /// 0x00: Equal (ordered, non-signaling) \n /// 0x01: Less-than (ordered, signaling) \n /// 0x02: Less-than-or-equal (ordered, signaling) \n /// 0x03: Unordered (non-signaling) \n /// 0x04: Not-equal (unordered, non-signaling) \n /// 0x05: Not-less-than (unordered, signaling) \n /// 0x06: Not-less-than-or-equal (unordered, signaling) \n /// 0x07: Ordered (non-signaling) \n /// 0x08: Equal (unordered, non-signaling) \n /// 0x09: Not-greater-than-or-equal (unordered, signaling) \n /// 0x0A: Not-greater-than (unordered, signaling) \n /// 0x0B: False (ordered, non-signaling) \n /// 0x0C: Not-equal (ordered, non-signaling) \n /// 0x0D: Greater-than-or-equal (ordered, signaling) \n /// 0x0E: Greater-than (ordered, signaling) \n /// 0x0F: True (unordered, non-signaling) \n /// 0x10: Equal (ordered, signaling) \n /// 0x11: Less-than (ordered, non-signaling) \n /// 0x12: Less-than-or-equal (ordered, non-signaling) \n /// 0x13: Unordered (signaling) \n /// 0x14: Not-equal (unordered, signaling) \n /// 0x15: Not-less-than (unordered, non-signaling) \n /// 0x16: Not-less-than-or-equal (unordered, non-signaling) \n /// 0x17: Ordered (signaling) \n /// 0x18: Equal (unordered, signaling) \n /// 0x19: Not-greater-than-or-equal (unordered, non-signaling) \n /// 0x1A: Not-greater-than (unordered, non-signaling) \n /// 0x1B: False (ordered, signaling) \n /// 0x1C: Not-equal (ordered, signaling) \n /// 0x1D: Greater-than-or-equal (ordered, non-signaling) \n /// 0x1E: Greater-than (ordered, non-signaling) \n /// 0x1F: True (unordered, signaling) /// \returns A 256-bit vector of [4 x double] containing the comparison results. #define _mm256_cmp_pd(a, b, c) \ ((__m256d)__builtin_ia32_cmppd256((__v4df)(__m256d)(a), \ (__v4df)(__m256d)(b), (c))) /// Compares each of the corresponding values of two 256-bit vectors of /// [8 x float], using the operation specified by the immediate integer /// operand. /// /// Returns a [8 x float] vector consisting of eight floats corresponding to /// the eight comparison results: zero if the comparison is false, and all /// 1's if the comparison is true. /// /// \headerfile /// /// \code /// __m256 _mm256_cmp_ps(__m256 a, __m256 b, const int c); /// \endcode /// /// This intrinsic corresponds to the VCMPPS instruction. /// /// \param a /// A 256-bit vector of [8 x float]. /// \param b /// A 256-bit vector of [8 x float]. /// \param c /// An immediate integer operand, with bits [4:0] specifying which comparison /// operation to use: \n /// 0x00: Equal (ordered, non-signaling) \n /// 0x01: Less-than (ordered, signaling) \n /// 0x02: Less-than-or-equal (ordered, signaling) \n /// 0x03: Unordered (non-signaling) \n /// 0x04: Not-equal (unordered, non-signaling) \n /// 0x05: Not-less-than (unordered, signaling) \n /// 0x06: Not-less-than-or-equal (unordered, signaling) \n /// 0x07: Ordered (non-signaling) \n /// 0x08: Equal (unordered, non-signaling) \n /// 0x09: Not-greater-than-or-equal (unordered, signaling) \n /// 0x0A: Not-greater-than (unordered, signaling) \n /// 0x0B: False (ordered, non-signaling) \n /// 0x0C: Not-equal (ordered, non-signaling) \n /// 0x0D: Greater-than-or-equal (ordered, signaling) \n /// 0x0E: Greater-than (ordered, signaling) \n /// 0x0F: True (unordered, non-signaling) \n /// 0x10: Equal (ordered, signaling) \n /// 0x11: Less-than (ordered, non-signaling) \n /// 0x12: Less-than-or-equal (ordered, non-signaling) \n /// 0x13: Unordered (signaling) \n /// 0x14: Not-equal (unordered, signaling) \n /// 0x15: Not-less-than (unordered, non-signaling) \n /// 0x16: Not-less-than-or-equal (unordered, non-signaling) \n /// 0x17: Ordered (signaling) \n /// 0x18: Equal (unordered, signaling) \n /// 0x19: Not-greater-than-or-equal (unordered, non-signaling) \n /// 0x1A: Not-greater-than (unordered, non-signaling) \n /// 0x1B: False (ordered, signaling) \n /// 0x1C: Not-equal (ordered, signaling) \n /// 0x1D: Greater-than-or-equal (ordered, non-signaling) \n /// 0x1E: Greater-than (ordered, non-signaling) \n /// 0x1F: True (unordered, signaling) /// \returns A 256-bit vector of [8 x float] containing the comparison results. #define _mm256_cmp_ps(a, b, c) \ ((__m256)__builtin_ia32_cmpps256((__v8sf)(__m256)(a), \ (__v8sf)(__m256)(b), (c))) /* Below intrinsic defined in emmintrin.h can be used for AVX */ /// Compares each of the corresponding scalar double-precision values of /// two 128-bit vectors of [2 x double], using the operation specified by the /// immediate integer operand. /// /// If the result is true, all 64 bits of the destination vector are set; /// otherwise they are cleared. /// /// \headerfile /// /// \code /// __m128d _mm_cmp_sd(__m128d a, __m128d b, const int c); /// \endcode /// /// This intrinsic corresponds to the VCMPSD instruction. /// /// \param a /// A 128-bit vector of [2 x double]. /// \param b /// A 128-bit vector of [2 x double]. /// \param c /// An immediate integer operand, with bits [4:0] specifying which comparison /// operation to use: \n /// 0x00: Equal (ordered, non-signaling) \n /// 0x01: Less-than (ordered, signaling) \n /// 0x02: Less-than-or-equal (ordered, signaling) \n /// 0x03: Unordered (non-signaling) \n /// 0x04: Not-equal (unordered, non-signaling) \n /// 0x05: Not-less-than (unordered, signaling) \n /// 0x06: Not-less-than-or-equal (unordered, signaling) \n /// 0x07: Ordered (non-signaling) \n /// 0x08: Equal (unordered, non-signaling) \n /// 0x09: Not-greater-than-or-equal (unordered, signaling) \n /// 0x0A: Not-greater-than (unordered, signaling) \n /// 0x0B: False (ordered, non-signaling) \n /// 0x0C: Not-equal (ordered, non-signaling) \n /// 0x0D: Greater-than-or-equal (ordered, signaling) \n /// 0x0E: Greater-than (ordered, signaling) \n /// 0x0F: True (unordered, non-signaling) \n /// 0x10: Equal (ordered, signaling) \n /// 0x11: Less-than (ordered, non-signaling) \n /// 0x12: Less-than-or-equal (ordered, non-signaling) \n /// 0x13: Unordered (signaling) \n /// 0x14: Not-equal (unordered, signaling) \n /// 0x15: Not-less-than (unordered, non-signaling) \n /// 0x16: Not-less-than-or-equal (unordered, non-signaling) \n /// 0x17: Ordered (signaling) \n /// 0x18: Equal (unordered, signaling) \n /// 0x19: Not-greater-than-or-equal (unordered, non-signaling) \n /// 0x1A: Not-greater-than (unordered, non-signaling) \n /// 0x1B: False (ordered, signaling) \n /// 0x1C: Not-equal (ordered, signaling) \n /// 0x1D: Greater-than-or-equal (ordered, non-signaling) \n /// 0x1E: Greater-than (ordered, non-signaling) \n /// 0x1F: True (unordered, signaling) /// \returns A 128-bit vector of [2 x double] containing the comparison results. /// \fn __m128d _mm_cmp_sd(__m128d a, __m128d b, const int c) /* Below intrinsic defined in xmmintrin.h can be used for AVX */ /// Compares each of the corresponding scalar values of two 128-bit /// vectors of [4 x float], using the operation specified by the immediate /// integer operand. /// /// If the result is true, all 32 bits of the destination vector are set; /// otherwise they are cleared. /// /// \headerfile /// /// \code /// __m128 _mm_cmp_ss(__m128 a, __m128 b, const int c); /// \endcode /// /// This intrinsic corresponds to the VCMPSS instruction. /// /// \param a /// A 128-bit vector of [4 x float]. /// \param b /// A 128-bit vector of [4 x float]. /// \param c /// An immediate integer operand, with bits [4:0] specifying which comparison /// operation to use: \n /// 0x00: Equal (ordered, non-signaling) \n /// 0x01: Less-than (ordered, signaling) \n /// 0x02: Less-than-or-equal (ordered, signaling) \n /// 0x03: Unordered (non-signaling) \n /// 0x04: Not-equal (unordered, non-signaling) \n /// 0x05: Not-less-than (unordered, signaling) \n /// 0x06: Not-less-than-or-equal (unordered, signaling) \n /// 0x07: Ordered (non-signaling) \n /// 0x08: Equal (unordered, non-signaling) \n /// 0x09: Not-greater-than-or-equal (unordered, signaling) \n /// 0x0A: Not-greater-than (unordered, signaling) \n /// 0x0B: False (ordered, non-signaling) \n /// 0x0C: Not-equal (ordered, non-signaling) \n /// 0x0D: Greater-than-or-equal (ordered, signaling) \n /// 0x0E: Greater-than (ordered, signaling) \n /// 0x0F: True (unordered, non-signaling) \n /// 0x10: Equal (ordered, signaling) \n /// 0x11: Less-than (ordered, non-signaling) \n /// 0x12: Less-than-or-equal (ordered, non-signaling) \n /// 0x13: Unordered (signaling) \n /// 0x14: Not-equal (unordered, signaling) \n /// 0x15: Not-less-than (unordered, non-signaling) \n /// 0x16: Not-less-than-or-equal (unordered, non-signaling) \n /// 0x17: Ordered (signaling) \n /// 0x18: Equal (unordered, signaling) \n /// 0x19: Not-greater-than-or-equal (unordered, non-signaling) \n /// 0x1A: Not-greater-than (unordered, non-signaling) \n /// 0x1B: False (ordered, signaling) \n /// 0x1C: Not-equal (ordered, signaling) \n /// 0x1D: Greater-than-or-equal (ordered, non-signaling) \n /// 0x1E: Greater-than (ordered, non-signaling) \n /// 0x1F: True (unordered, signaling) /// \returns A 128-bit vector of [4 x float] containing the comparison results. /// \fn __m128 _mm_cmp_ss(__m128 a, __m128 b, const int c) /// Takes a [8 x i32] vector and returns the vector element value /// indexed by the immediate constant operand. /// /// \headerfile /// /// \code /// int _mm256_extract_epi32(__m256i X, const int N); /// \endcode /// /// This intrinsic corresponds to the VEXTRACTF128+COMPOSITE /// instruction. /// /// \param X /// A 256-bit vector of [8 x i32]. /// \param N /// An immediate integer operand with bits [2:0] determining which vector /// element is extracted and returned. /// \returns A 32-bit integer containing the extracted 32 bits of extended /// packed data. #define _mm256_extract_epi32(X, N) \ ((int)__builtin_ia32_vec_ext_v8si((__v8si)(__m256i)(X), (int)(N))) /// Takes a [16 x i16] vector and returns the vector element value /// indexed by the immediate constant operand. /// /// \headerfile /// /// \code /// int _mm256_extract_epi16(__m256i X, const int N); /// \endcode /// /// This intrinsic corresponds to the VEXTRACTF128+COMPOSITE /// instruction. /// /// \param X /// A 256-bit integer vector of [16 x i16]. /// \param N /// An immediate integer operand with bits [3:0] determining which vector /// element is extracted and returned. /// \returns A 32-bit integer containing the extracted 16 bits of zero extended /// packed data. #define _mm256_extract_epi16(X, N) \ ((int)(unsigned short)__builtin_ia32_vec_ext_v16hi((__v16hi)(__m256i)(X), \ (int)(N))) /// Takes a [32 x i8] vector and returns the vector element value /// indexed by the immediate constant operand. /// /// \headerfile /// /// \code /// int _mm256_extract_epi8(__m256i X, const int N); /// \endcode /// /// This intrinsic corresponds to the VEXTRACTF128+COMPOSITE /// instruction. /// /// \param X /// A 256-bit integer vector of [32 x i8]. /// \param N /// An immediate integer operand with bits [4:0] determining which vector /// element is extracted and returned. /// \returns A 32-bit integer containing the extracted 8 bits of zero extended /// packed data. #define _mm256_extract_epi8(X, N) \ ((int)(unsigned char)__builtin_ia32_vec_ext_v32qi((__v32qi)(__m256i)(X), \ (int)(N))) #ifdef __x86_64__ /// Takes a [4 x i64] vector and returns the vector element value /// indexed by the immediate constant operand. /// /// \headerfile /// /// \code /// long long _mm256_extract_epi64(__m256i X, const int N); /// \endcode /// /// This intrinsic corresponds to the VEXTRACTF128+COMPOSITE /// instruction. /// /// \param X /// A 256-bit integer vector of [4 x i64]. /// \param N /// An immediate integer operand with bits [1:0] determining which vector /// element is extracted and returned. /// \returns A 64-bit integer containing the extracted 64 bits of extended /// packed data. #define _mm256_extract_epi64(X, N) \ ((long long)__builtin_ia32_vec_ext_v4di((__v4di)(__m256i)(X), (int)(N))) #endif /// Takes a [8 x i32] vector and replaces the vector element value /// indexed by the immediate constant operand by a new value. Returns the /// modified vector. /// /// \headerfile /// /// \code /// __m256i _mm256_insert_epi32(__m256i X, int I, const int N); /// \endcode /// /// This intrinsic corresponds to the VINSERTF128+COMPOSITE /// instruction. /// /// \param X /// A vector of [8 x i32] to be used by the insert operation. /// \param I /// An integer value. The replacement value for the insert operation. /// \param N /// An immediate integer specifying the index of the vector element to be /// replaced. /// \returns A copy of vector \a X, after replacing its element indexed by /// \a N with \a I. #define _mm256_insert_epi32(X, I, N) \ ((__m256i)__builtin_ia32_vec_set_v8si((__v8si)(__m256i)(X), \ (int)(I), (int)(N))) /// Takes a [16 x i16] vector and replaces the vector element value /// indexed by the immediate constant operand with a new value. Returns the /// modified vector. /// /// \headerfile /// /// \code /// __m256i _mm256_insert_epi16(__m256i X, int I, const int N); /// \endcode /// /// This intrinsic corresponds to the VINSERTF128+COMPOSITE /// instruction. /// /// \param X /// A vector of [16 x i16] to be used by the insert operation. /// \param I /// An i16 integer value. The replacement value for the insert operation. /// \param N /// An immediate integer specifying the index of the vector element to be /// replaced. /// \returns A copy of vector \a X, after replacing its element indexed by /// \a N with \a I. #define _mm256_insert_epi16(X, I, N) \ ((__m256i)__builtin_ia32_vec_set_v16hi((__v16hi)(__m256i)(X), \ (int)(I), (int)(N))) /// Takes a [32 x i8] vector and replaces the vector element value /// indexed by the immediate constant operand with a new value. Returns the /// modified vector. /// /// \headerfile /// /// \code /// __m256i _mm256_insert_epi8(__m256i X, int I, const int N); /// \endcode /// /// This intrinsic corresponds to the VINSERTF128+COMPOSITE /// instruction. /// /// \param X /// A vector of [32 x i8] to be used by the insert operation. /// \param I /// An i8 integer value. The replacement value for the insert operation. /// \param N /// An immediate integer specifying the index of the vector element to be /// replaced. /// \returns A copy of vector \a X, after replacing its element indexed by /// \a N with \a I. #define _mm256_insert_epi8(X, I, N) \ ((__m256i)__builtin_ia32_vec_set_v32qi((__v32qi)(__m256i)(X), \ (int)(I), (int)(N))) #ifdef __x86_64__ /// Takes a [4 x i64] vector and replaces the vector element value /// indexed by the immediate constant operand with a new value. Returns the /// modified vector. /// /// \headerfile /// /// \code /// __m256i _mm256_insert_epi64(__m256i X, int I, const int N); /// \endcode /// /// This intrinsic corresponds to the VINSERTF128+COMPOSITE /// instruction. /// /// \param X /// A vector of [4 x i64] to be used by the insert operation. /// \param I /// A 64-bit integer value. The replacement value for the insert operation. /// \param N /// An immediate integer specifying the index of the vector element to be /// replaced. /// \returns A copy of vector \a X, after replacing its element indexed by /// \a N with \a I. #define _mm256_insert_epi64(X, I, N) \ ((__m256i)__builtin_ia32_vec_set_v4di((__v4di)(__m256i)(X), \ (long long)(I), (int)(N))) #endif /* Conversion */ /// Converts a vector of [4 x i32] into a vector of [4 x double]. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTDQ2PD instruction. /// /// \param __a /// A 128-bit integer vector of [4 x i32]. /// \returns A 256-bit vector of [4 x double] containing the converted values. static __inline __m256d __DEFAULT_FN_ATTRS _mm256_cvtepi32_pd(__m128i __a) { return (__m256d)__builtin_convertvector((__v4si)__a, __v4df); } /// Converts a vector of [8 x i32] into a vector of [8 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTDQ2PS instruction. /// /// \param __a /// A 256-bit integer vector. /// \returns A 256-bit vector of [8 x float] containing the converted values. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_cvtepi32_ps(__m256i __a) { return (__m256)__builtin_convertvector((__v8si)__a, __v8sf); } /// Converts a 256-bit vector of [4 x double] into a 128-bit vector of /// [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTPD2PS instruction. /// /// \param __a /// A 256-bit vector of [4 x double]. /// \returns A 128-bit vector of [4 x float] containing the converted values. static __inline __m128 __DEFAULT_FN_ATTRS _mm256_cvtpd_ps(__m256d __a) { return (__m128)__builtin_ia32_cvtpd2ps256((__v4df) __a); } /// Converts a vector of [8 x float] into a vector of [8 x i32]. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTPS2DQ instruction. /// /// \param __a /// A 256-bit vector of [8 x float]. /// \returns A 256-bit integer vector containing the converted values. static __inline __m256i __DEFAULT_FN_ATTRS _mm256_cvtps_epi32(__m256 __a) { return (__m256i)__builtin_ia32_cvtps2dq256((__v8sf) __a); } /// Converts a 128-bit vector of [4 x float] into a 256-bit vector of [4 /// x double]. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTPS2PD instruction. /// /// \param __a /// A 128-bit vector of [4 x float]. /// \returns A 256-bit vector of [4 x double] containing the converted values. static __inline __m256d __DEFAULT_FN_ATTRS _mm256_cvtps_pd(__m128 __a) { return (__m256d)__builtin_convertvector((__v4sf)__a, __v4df); } /// Converts a 256-bit vector of [4 x double] into a 128-bit vector of [4 /// x i32], truncating the result by rounding towards zero when it is /// inexact. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTTPD2DQ instruction. /// /// \param __a /// A 256-bit vector of [4 x double]. /// \returns A 128-bit integer vector containing the converted values. static __inline __m128i __DEFAULT_FN_ATTRS _mm256_cvttpd_epi32(__m256d __a) { return (__m128i)__builtin_ia32_cvttpd2dq256((__v4df) __a); } /// Converts a 256-bit vector of [4 x double] into a 128-bit vector of [4 /// x i32]. When a conversion is inexact, the value returned is rounded /// according to the rounding control bits in the MXCSR register. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTPD2DQ instruction. /// /// \param __a /// A 256-bit vector of [4 x double]. /// \returns A 128-bit integer vector containing the converted values. static __inline __m128i __DEFAULT_FN_ATTRS _mm256_cvtpd_epi32(__m256d __a) { return (__m128i)__builtin_ia32_cvtpd2dq256((__v4df) __a); } /// Converts a vector of [8 x float] into a vector of [8 x i32], /// truncating the result by rounding towards zero when it is inexact. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTTPS2DQ instruction. /// /// \param __a /// A 256-bit vector of [8 x float]. /// \returns A 256-bit integer vector containing the converted values. static __inline __m256i __DEFAULT_FN_ATTRS _mm256_cvttps_epi32(__m256 __a) { return (__m256i)__builtin_ia32_cvttps2dq256((__v8sf) __a); } /// Returns the first element of the input vector of [4 x double]. /// /// \headerfile /// /// This intrinsic is a utility function and does not correspond to a specific /// instruction. /// /// \param __a /// A 256-bit vector of [4 x double]. /// \returns A 64 bit double containing the first element of the input vector. static __inline double __DEFAULT_FN_ATTRS _mm256_cvtsd_f64(__m256d __a) { return __a[0]; } /// Returns the first element of the input vector of [8 x i32]. /// /// \headerfile /// /// This intrinsic is a utility function and does not correspond to a specific /// instruction. /// /// \param __a /// A 256-bit vector of [8 x i32]. /// \returns A 32 bit integer containing the first element of the input vector. static __inline int __DEFAULT_FN_ATTRS _mm256_cvtsi256_si32(__m256i __a) { __v8si __b = (__v8si)__a; return __b[0]; } /// Returns the first element of the input vector of [8 x float]. /// /// \headerfile /// /// This intrinsic is a utility function and does not correspond to a specific /// instruction. /// /// \param __a /// A 256-bit vector of [8 x float]. /// \returns A 32 bit float containing the first element of the input vector. static __inline float __DEFAULT_FN_ATTRS _mm256_cvtss_f32(__m256 __a) { return __a[0]; } /* Vector replicate */ /// Moves and duplicates odd-indexed values from a 256-bit vector of /// [8 x float] to float values in a 256-bit vector of [8 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVSHDUP instruction. /// /// \param __a /// A 256-bit vector of [8 x float]. \n /// Bits [255:224] of \a __a are written to bits [255:224] and [223:192] of /// the return value. \n /// Bits [191:160] of \a __a are written to bits [191:160] and [159:128] of /// the return value. \n /// Bits [127:96] of \a __a are written to bits [127:96] and [95:64] of the /// return value. \n /// Bits [63:32] of \a __a are written to bits [63:32] and [31:0] of the /// return value. /// \returns A 256-bit vector of [8 x float] containing the moved and duplicated /// values. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_movehdup_ps(__m256 __a) { return __builtin_shufflevector((__v8sf)__a, (__v8sf)__a, 1, 1, 3, 3, 5, 5, 7, 7); } /// Moves and duplicates even-indexed values from a 256-bit vector of /// [8 x float] to float values in a 256-bit vector of [8 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVSLDUP instruction. /// /// \param __a /// A 256-bit vector of [8 x float]. \n /// Bits [223:192] of \a __a are written to bits [255:224] and [223:192] of /// the return value. \n /// Bits [159:128] of \a __a are written to bits [191:160] and [159:128] of /// the return value. \n /// Bits [95:64] of \a __a are written to bits [127:96] and [95:64] of the /// return value. \n /// Bits [31:0] of \a __a are written to bits [63:32] and [31:0] of the /// return value. /// \returns A 256-bit vector of [8 x float] containing the moved and duplicated /// values. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_moveldup_ps(__m256 __a) { return __builtin_shufflevector((__v8sf)__a, (__v8sf)__a, 0, 0, 2, 2, 4, 4, 6, 6); } /// Moves and duplicates double-precision floating point values from a /// 256-bit vector of [4 x double] to double-precision values in a 256-bit /// vector of [4 x double]. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVDDUP instruction. /// /// \param __a /// A 256-bit vector of [4 x double]. \n /// Bits [63:0] of \a __a are written to bits [127:64] and [63:0] of the /// return value. \n /// Bits [191:128] of \a __a are written to bits [255:192] and [191:128] of /// the return value. /// \returns A 256-bit vector of [4 x double] containing the moved and /// duplicated values. static __inline __m256d __DEFAULT_FN_ATTRS _mm256_movedup_pd(__m256d __a) { return __builtin_shufflevector((__v4df)__a, (__v4df)__a, 0, 0, 2, 2); } /* Unpack and Interleave */ /// Unpacks the odd-indexed vector elements from two 256-bit vectors of /// [4 x double] and interleaves them into a 256-bit vector of [4 x double]. /// /// \headerfile /// /// This intrinsic corresponds to the VUNPCKHPD instruction. /// /// \param __a /// A 256-bit floating-point vector of [4 x double]. \n /// Bits [127:64] are written to bits [63:0] of the return value. \n /// Bits [255:192] are written to bits [191:128] of the return value. \n /// \param __b /// A 256-bit floating-point vector of [4 x double]. \n /// Bits [127:64] are written to bits [127:64] of the return value. \n /// Bits [255:192] are written to bits [255:192] of the return value. \n /// \returns A 256-bit vector of [4 x double] containing the interleaved values. static __inline __m256d __DEFAULT_FN_ATTRS _mm256_unpackhi_pd(__m256d __a, __m256d __b) { return __builtin_shufflevector((__v4df)__a, (__v4df)__b, 1, 5, 1+2, 5+2); } /// Unpacks the even-indexed vector elements from two 256-bit vectors of /// [4 x double] and interleaves them into a 256-bit vector of [4 x double]. /// /// \headerfile /// /// This intrinsic corresponds to the VUNPCKLPD instruction. /// /// \param __a /// A 256-bit floating-point vector of [4 x double]. \n /// Bits [63:0] are written to bits [63:0] of the return value. \n /// Bits [191:128] are written to bits [191:128] of the return value. /// \param __b /// A 256-bit floating-point vector of [4 x double]. \n /// Bits [63:0] are written to bits [127:64] of the return value. \n /// Bits [191:128] are written to bits [255:192] of the return value. \n /// \returns A 256-bit vector of [4 x double] containing the interleaved values. static __inline __m256d __DEFAULT_FN_ATTRS _mm256_unpacklo_pd(__m256d __a, __m256d __b) { return __builtin_shufflevector((__v4df)__a, (__v4df)__b, 0, 4, 0+2, 4+2); } /// Unpacks the 32-bit vector elements 2, 3, 6 and 7 from each of the /// two 256-bit vectors of [8 x float] and interleaves them into a 256-bit /// vector of [8 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VUNPCKHPS instruction. /// /// \param __a /// A 256-bit vector of [8 x float]. \n /// Bits [95:64] are written to bits [31:0] of the return value. \n /// Bits [127:96] are written to bits [95:64] of the return value. \n /// Bits [223:192] are written to bits [159:128] of the return value. \n /// Bits [255:224] are written to bits [223:192] of the return value. /// \param __b /// A 256-bit vector of [8 x float]. \n /// Bits [95:64] are written to bits [63:32] of the return value. \n /// Bits [127:96] are written to bits [127:96] of the return value. \n /// Bits [223:192] are written to bits [191:160] of the return value. \n /// Bits [255:224] are written to bits [255:224] of the return value. /// \returns A 256-bit vector of [8 x float] containing the interleaved values. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_unpackhi_ps(__m256 __a, __m256 __b) { return __builtin_shufflevector((__v8sf)__a, (__v8sf)__b, 2, 10, 2+1, 10+1, 6, 14, 6+1, 14+1); } /// Unpacks the 32-bit vector elements 0, 1, 4 and 5 from each of the /// two 256-bit vectors of [8 x float] and interleaves them into a 256-bit /// vector of [8 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VUNPCKLPS instruction. /// /// \param __a /// A 256-bit vector of [8 x float]. \n /// Bits [31:0] are written to bits [31:0] of the return value. \n /// Bits [63:32] are written to bits [95:64] of the return value. \n /// Bits [159:128] are written to bits [159:128] of the return value. \n /// Bits [191:160] are written to bits [223:192] of the return value. /// \param __b /// A 256-bit vector of [8 x float]. \n /// Bits [31:0] are written to bits [63:32] of the return value. \n /// Bits [63:32] are written to bits [127:96] of the return value. \n /// Bits [159:128] are written to bits [191:160] of the return value. \n /// Bits [191:160] are written to bits [255:224] of the return value. /// \returns A 256-bit vector of [8 x float] containing the interleaved values. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_unpacklo_ps(__m256 __a, __m256 __b) { return __builtin_shufflevector((__v8sf)__a, (__v8sf)__b, 0, 8, 0+1, 8+1, 4, 12, 4+1, 12+1); } /* Bit Test */ /// Given two 128-bit floating-point vectors of [2 x double], perform an /// element-by-element comparison of the double-precision element in the /// first source vector and the corresponding element in the second source /// vector. /// /// The EFLAGS register is updated as follows: \n /// If there is at least one pair of double-precision elements where the /// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the /// ZF flag is set to 1. \n /// If there is at least one pair of double-precision elements where the /// sign-bit of the first element is 0 and the sign-bit of the second element /// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \n /// This intrinsic returns the value of the ZF flag. /// /// \headerfile /// /// This intrinsic corresponds to the VTESTPD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. /// \param __b /// A 128-bit vector of [2 x double]. /// \returns the ZF flag in the EFLAGS register. static __inline int __DEFAULT_FN_ATTRS128 _mm_testz_pd(__m128d __a, __m128d __b) { return __builtin_ia32_vtestzpd((__v2df)__a, (__v2df)__b); } /// Given two 128-bit floating-point vectors of [2 x double], perform an /// element-by-element comparison of the double-precision element in the /// first source vector and the corresponding element in the second source /// vector. /// /// The EFLAGS register is updated as follows: \n /// If there is at least one pair of double-precision elements where the /// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the /// ZF flag is set to 1. \n /// If there is at least one pair of double-precision elements where the /// sign-bit of the first element is 0 and the sign-bit of the second element /// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \n /// This intrinsic returns the value of the CF flag. /// /// \headerfile /// /// This intrinsic corresponds to the VTESTPD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. /// \param __b /// A 128-bit vector of [2 x double]. /// \returns the CF flag in the EFLAGS register. static __inline int __DEFAULT_FN_ATTRS128 _mm_testc_pd(__m128d __a, __m128d __b) { return __builtin_ia32_vtestcpd((__v2df)__a, (__v2df)__b); } /// Given two 128-bit floating-point vectors of [2 x double], perform an /// element-by-element comparison of the double-precision element in the /// first source vector and the corresponding element in the second source /// vector. /// /// The EFLAGS register is updated as follows: \n /// If there is at least one pair of double-precision elements where the /// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the /// ZF flag is set to 1. \n /// If there is at least one pair of double-precision elements where the /// sign-bit of the first element is 0 and the sign-bit of the second element /// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \n /// This intrinsic returns 1 if both the ZF and CF flags are set to 0, /// otherwise it returns 0. /// /// \headerfile /// /// This intrinsic corresponds to the VTESTPD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. /// \param __b /// A 128-bit vector of [2 x double]. /// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0. static __inline int __DEFAULT_FN_ATTRS128 _mm_testnzc_pd(__m128d __a, __m128d __b) { return __builtin_ia32_vtestnzcpd((__v2df)__a, (__v2df)__b); } /// Given two 128-bit floating-point vectors of [4 x float], perform an /// element-by-element comparison of the single-precision element in the /// first source vector and the corresponding element in the second source /// vector. /// /// The EFLAGS register is updated as follows: \n /// If there is at least one pair of single-precision elements where the /// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the /// ZF flag is set to 1. \n /// If there is at least one pair of single-precision elements where the /// sign-bit of the first element is 0 and the sign-bit of the second element /// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \n /// This intrinsic returns the value of the ZF flag. /// /// \headerfile /// /// This intrinsic corresponds to the VTESTPS instruction. /// /// \param __a /// A 128-bit vector of [4 x float]. /// \param __b /// A 128-bit vector of [4 x float]. /// \returns the ZF flag. static __inline int __DEFAULT_FN_ATTRS128 _mm_testz_ps(__m128 __a, __m128 __b) { return __builtin_ia32_vtestzps((__v4sf)__a, (__v4sf)__b); } /// Given two 128-bit floating-point vectors of [4 x float], perform an /// element-by-element comparison of the single-precision element in the /// first source vector and the corresponding element in the second source /// vector. /// /// The EFLAGS register is updated as follows: \n /// If there is at least one pair of single-precision elements where the /// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the /// ZF flag is set to 1. \n /// If there is at least one pair of single-precision elements where the /// sign-bit of the first element is 0 and the sign-bit of the second element /// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \n /// This intrinsic returns the value of the CF flag. /// /// \headerfile /// /// This intrinsic corresponds to the VTESTPS instruction. /// /// \param __a /// A 128-bit vector of [4 x float]. /// \param __b /// A 128-bit vector of [4 x float]. /// \returns the CF flag. static __inline int __DEFAULT_FN_ATTRS128 _mm_testc_ps(__m128 __a, __m128 __b) { return __builtin_ia32_vtestcps((__v4sf)__a, (__v4sf)__b); } /// Given two 128-bit floating-point vectors of [4 x float], perform an /// element-by-element comparison of the single-precision element in the /// first source vector and the corresponding element in the second source /// vector. /// /// The EFLAGS register is updated as follows: \n /// If there is at least one pair of single-precision elements where the /// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the /// ZF flag is set to 1. \n /// If there is at least one pair of single-precision elements where the /// sign-bit of the first element is 0 and the sign-bit of the second element /// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \n /// This intrinsic returns 1 if both the ZF and CF flags are set to 0, /// otherwise it returns 0. /// /// \headerfile /// /// This intrinsic corresponds to the VTESTPS instruction. /// /// \param __a /// A 128-bit vector of [4 x float]. /// \param __b /// A 128-bit vector of [4 x float]. /// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0. static __inline int __DEFAULT_FN_ATTRS128 _mm_testnzc_ps(__m128 __a, __m128 __b) { return __builtin_ia32_vtestnzcps((__v4sf)__a, (__v4sf)__b); } /// Given two 256-bit floating-point vectors of [4 x double], perform an /// element-by-element comparison of the double-precision elements in the /// first source vector and the corresponding elements in the second source /// vector. /// /// The EFLAGS register is updated as follows: \n /// If there is at least one pair of double-precision elements where the /// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the /// ZF flag is set to 1. \n /// If there is at least one pair of double-precision elements where the /// sign-bit of the first element is 0 and the sign-bit of the second element /// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \n /// This intrinsic returns the value of the ZF flag. /// /// \headerfile /// /// This intrinsic corresponds to the VTESTPD instruction. /// /// \param __a /// A 256-bit vector of [4 x double]. /// \param __b /// A 256-bit vector of [4 x double]. /// \returns the ZF flag. static __inline int __DEFAULT_FN_ATTRS _mm256_testz_pd(__m256d __a, __m256d __b) { return __builtin_ia32_vtestzpd256((__v4df)__a, (__v4df)__b); } /// Given two 256-bit floating-point vectors of [4 x double], perform an /// element-by-element comparison of the double-precision elements in the /// first source vector and the corresponding elements in the second source /// vector. /// /// The EFLAGS register is updated as follows: \n /// If there is at least one pair of double-precision elements where the /// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the /// ZF flag is set to 1. \n /// If there is at least one pair of double-precision elements where the /// sign-bit of the first element is 0 and the sign-bit of the second element /// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \n /// This intrinsic returns the value of the CF flag. /// /// \headerfile /// /// This intrinsic corresponds to the VTESTPD instruction. /// /// \param __a /// A 256-bit vector of [4 x double]. /// \param __b /// A 256-bit vector of [4 x double]. /// \returns the CF flag. static __inline int __DEFAULT_FN_ATTRS _mm256_testc_pd(__m256d __a, __m256d __b) { return __builtin_ia32_vtestcpd256((__v4df)__a, (__v4df)__b); } /// Given two 256-bit floating-point vectors of [4 x double], perform an /// element-by-element comparison of the double-precision elements in the /// first source vector and the corresponding elements in the second source /// vector. /// /// The EFLAGS register is updated as follows: \n /// If there is at least one pair of double-precision elements where the /// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the /// ZF flag is set to 1. \n /// If there is at least one pair of double-precision elements where the /// sign-bit of the first element is 0 and the sign-bit of the second element /// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \n /// This intrinsic returns 1 if both the ZF and CF flags are set to 0, /// otherwise it returns 0. /// /// \headerfile /// /// This intrinsic corresponds to the VTESTPD instruction. /// /// \param __a /// A 256-bit vector of [4 x double]. /// \param __b /// A 256-bit vector of [4 x double]. /// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0. static __inline int __DEFAULT_FN_ATTRS _mm256_testnzc_pd(__m256d __a, __m256d __b) { return __builtin_ia32_vtestnzcpd256((__v4df)__a, (__v4df)__b); } /// Given two 256-bit floating-point vectors of [8 x float], perform an /// element-by-element comparison of the single-precision element in the /// first source vector and the corresponding element in the second source /// vector. /// /// The EFLAGS register is updated as follows: \n /// If there is at least one pair of single-precision elements where the /// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the /// ZF flag is set to 1. \n /// If there is at least one pair of single-precision elements where the /// sign-bit of the first element is 0 and the sign-bit of the second element /// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \n /// This intrinsic returns the value of the ZF flag. /// /// \headerfile /// /// This intrinsic corresponds to the VTESTPS instruction. /// /// \param __a /// A 256-bit vector of [8 x float]. /// \param __b /// A 256-bit vector of [8 x float]. /// \returns the ZF flag. static __inline int __DEFAULT_FN_ATTRS _mm256_testz_ps(__m256 __a, __m256 __b) { return __builtin_ia32_vtestzps256((__v8sf)__a, (__v8sf)__b); } /// Given two 256-bit floating-point vectors of [8 x float], perform an /// element-by-element comparison of the single-precision element in the /// first source vector and the corresponding element in the second source /// vector. /// /// The EFLAGS register is updated as follows: \n /// If there is at least one pair of single-precision elements where the /// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the /// ZF flag is set to 1. \n /// If there is at least one pair of single-precision elements where the /// sign-bit of the first element is 0 and the sign-bit of the second element /// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \n /// This intrinsic returns the value of the CF flag. /// /// \headerfile /// /// This intrinsic corresponds to the VTESTPS instruction. /// /// \param __a /// A 256-bit vector of [8 x float]. /// \param __b /// A 256-bit vector of [8 x float]. /// \returns the CF flag. static __inline int __DEFAULT_FN_ATTRS _mm256_testc_ps(__m256 __a, __m256 __b) { return __builtin_ia32_vtestcps256((__v8sf)__a, (__v8sf)__b); } /// Given two 256-bit floating-point vectors of [8 x float], perform an /// element-by-element comparison of the single-precision elements in the /// first source vector and the corresponding elements in the second source /// vector. /// /// The EFLAGS register is updated as follows: \n /// If there is at least one pair of single-precision elements where the /// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the /// ZF flag is set to 1. \n /// If there is at least one pair of single-precision elements where the /// sign-bit of the first element is 0 and the sign-bit of the second element /// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \n /// This intrinsic returns 1 if both the ZF and CF flags are set to 0, /// otherwise it returns 0. /// /// \headerfile /// /// This intrinsic corresponds to the VTESTPS instruction. /// /// \param __a /// A 256-bit vector of [8 x float]. /// \param __b /// A 256-bit vector of [8 x float]. /// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0. static __inline int __DEFAULT_FN_ATTRS _mm256_testnzc_ps(__m256 __a, __m256 __b) { return __builtin_ia32_vtestnzcps256((__v8sf)__a, (__v8sf)__b); } /// Given two 256-bit integer vectors, perform a bit-by-bit comparison /// of the two source vectors. /// /// The EFLAGS register is updated as follows: \n /// If there is at least one pair of bits where both bits are 1, the ZF flag /// is set to 0. Otherwise the ZF flag is set to 1. \n /// If there is at least one pair of bits where the bit from the first source /// vector is 0 and the bit from the second source vector is 1, the CF flag /// is set to 0. Otherwise the CF flag is set to 1. \n /// This intrinsic returns the value of the ZF flag. /// /// \headerfile /// /// This intrinsic corresponds to the VPTEST instruction. /// /// \param __a /// A 256-bit integer vector. /// \param __b /// A 256-bit integer vector. /// \returns the ZF flag. static __inline int __DEFAULT_FN_ATTRS _mm256_testz_si256(__m256i __a, __m256i __b) { return __builtin_ia32_ptestz256((__v4di)__a, (__v4di)__b); } /// Given two 256-bit integer vectors, perform a bit-by-bit comparison /// of the two source vectors. /// /// The EFLAGS register is updated as follows: \n /// If there is at least one pair of bits where both bits are 1, the ZF flag /// is set to 0. Otherwise the ZF flag is set to 1. \n /// If there is at least one pair of bits where the bit from the first source /// vector is 0 and the bit from the second source vector is 1, the CF flag /// is set to 0. Otherwise the CF flag is set to 1. \n /// This intrinsic returns the value of the CF flag. /// /// \headerfile /// /// This intrinsic corresponds to the VPTEST instruction. /// /// \param __a /// A 256-bit integer vector. /// \param __b /// A 256-bit integer vector. /// \returns the CF flag. static __inline int __DEFAULT_FN_ATTRS _mm256_testc_si256(__m256i __a, __m256i __b) { return __builtin_ia32_ptestc256((__v4di)__a, (__v4di)__b); } /// Given two 256-bit integer vectors, perform a bit-by-bit comparison /// of the two source vectors. /// /// The EFLAGS register is updated as follows: \n /// If there is at least one pair of bits where both bits are 1, the ZF flag /// is set to 0. Otherwise the ZF flag is set to 1. \n /// If there is at least one pair of bits where the bit from the first source /// vector is 0 and the bit from the second source vector is 1, the CF flag /// is set to 0. Otherwise the CF flag is set to 1. \n /// This intrinsic returns 1 if both the ZF and CF flags are set to 0, /// otherwise it returns 0. /// /// \headerfile /// /// This intrinsic corresponds to the VPTEST instruction. /// /// \param __a /// A 256-bit integer vector. /// \param __b /// A 256-bit integer vector. /// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0. static __inline int __DEFAULT_FN_ATTRS _mm256_testnzc_si256(__m256i __a, __m256i __b) { return __builtin_ia32_ptestnzc256((__v4di)__a, (__v4di)__b); } /* Vector extract sign mask */ /// Extracts the sign bits of double-precision floating point elements /// in a 256-bit vector of [4 x double] and writes them to the lower order /// bits of the return value. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVMSKPD instruction. /// /// \param __a /// A 256-bit vector of [4 x double] containing the double-precision /// floating point values with sign bits to be extracted. /// \returns The sign bits from the operand, written to bits [3:0]. static __inline int __DEFAULT_FN_ATTRS _mm256_movemask_pd(__m256d __a) { return __builtin_ia32_movmskpd256((__v4df)__a); } /// Extracts the sign bits of single-precision floating point elements /// in a 256-bit vector of [8 x float] and writes them to the lower order /// bits of the return value. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVMSKPS instruction. /// /// \param __a /// A 256-bit vector of [8 x float] containing the single-precision floating /// point values with sign bits to be extracted. /// \returns The sign bits from the operand, written to bits [7:0]. static __inline int __DEFAULT_FN_ATTRS _mm256_movemask_ps(__m256 __a) { return __builtin_ia32_movmskps256((__v8sf)__a); } /* Vector __zero */ /// Zeroes the contents of all XMM or YMM registers. /// /// \headerfile /// /// This intrinsic corresponds to the VZEROALL instruction. static __inline void __attribute__((__always_inline__, __nodebug__, __target__("avx"))) _mm256_zeroall(void) { __builtin_ia32_vzeroall(); } /// Zeroes the upper 128 bits (bits 255:128) of all YMM registers. /// /// \headerfile /// /// This intrinsic corresponds to the VZEROUPPER instruction. static __inline void __attribute__((__always_inline__, __nodebug__, __target__("avx"))) _mm256_zeroupper(void) { __builtin_ia32_vzeroupper(); } /* Vector load with broadcast */ /// Loads a scalar single-precision floating point value from the /// specified address pointed to by \a __a and broadcasts it to the elements /// of a [4 x float] vector. /// /// \headerfile /// /// This intrinsic corresponds to the VBROADCASTSS instruction. /// /// \param __a /// The single-precision floating point value to be broadcast. /// \returns A 128-bit vector of [4 x float] whose 32-bit elements are set /// equal to the broadcast value. static __inline __m128 __DEFAULT_FN_ATTRS128 _mm_broadcast_ss(float const *__a) { struct __mm_broadcast_ss_struct { float __f; } __attribute__((__packed__, __may_alias__)); float __f = ((const struct __mm_broadcast_ss_struct*)__a)->__f; return __extension__ (__m128){ __f, __f, __f, __f }; } /// Loads a scalar double-precision floating point value from the /// specified address pointed to by \a __a and broadcasts it to the elements /// of a [4 x double] vector. /// /// \headerfile /// /// This intrinsic corresponds to the VBROADCASTSD instruction. /// /// \param __a /// The double-precision floating point value to be broadcast. /// \returns A 256-bit vector of [4 x double] whose 64-bit elements are set /// equal to the broadcast value. static __inline __m256d __DEFAULT_FN_ATTRS _mm256_broadcast_sd(double const *__a) { struct __mm256_broadcast_sd_struct { double __d; } __attribute__((__packed__, __may_alias__)); double __d = ((const struct __mm256_broadcast_sd_struct*)__a)->__d; return __extension__ (__m256d)(__v4df){ __d, __d, __d, __d }; } /// Loads a scalar single-precision floating point value from the /// specified address pointed to by \a __a and broadcasts it to the elements /// of a [8 x float] vector. /// /// \headerfile /// /// This intrinsic corresponds to the VBROADCASTSS instruction. /// /// \param __a /// The single-precision floating point value to be broadcast. /// \returns A 256-bit vector of [8 x float] whose 32-bit elements are set /// equal to the broadcast value. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_broadcast_ss(float const *__a) { struct __mm256_broadcast_ss_struct { float __f; } __attribute__((__packed__, __may_alias__)); float __f = ((const struct __mm256_broadcast_ss_struct*)__a)->__f; return __extension__ (__m256)(__v8sf){ __f, __f, __f, __f, __f, __f, __f, __f }; } /// Loads the data from a 128-bit vector of [2 x double] from the /// specified address pointed to by \a __a and broadcasts it to 128-bit /// elements in a 256-bit vector of [4 x double]. /// /// \headerfile /// /// This intrinsic corresponds to the VBROADCASTF128 instruction. /// /// \param __a /// The 128-bit vector of [2 x double] to be broadcast. /// \returns A 256-bit vector of [4 x double] whose 128-bit elements are set /// equal to the broadcast value. static __inline __m256d __DEFAULT_FN_ATTRS _mm256_broadcast_pd(__m128d const *__a) { __m128d __b = _mm_loadu_pd((const double *)__a); return (__m256d)__builtin_shufflevector((__v2df)__b, (__v2df)__b, 0, 1, 0, 1); } /// Loads the data from a 128-bit vector of [4 x float] from the /// specified address pointed to by \a __a and broadcasts it to 128-bit /// elements in a 256-bit vector of [8 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VBROADCASTF128 instruction. /// /// \param __a /// The 128-bit vector of [4 x float] to be broadcast. /// \returns A 256-bit vector of [8 x float] whose 128-bit elements are set /// equal to the broadcast value. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_broadcast_ps(__m128 const *__a) { __m128 __b = _mm_loadu_ps((const float *)__a); return (__m256)__builtin_shufflevector((__v4sf)__b, (__v4sf)__b, 0, 1, 2, 3, 0, 1, 2, 3); } /* SIMD load ops */ /// Loads 4 double-precision floating point values from a 32-byte aligned /// memory location pointed to by \a __p into a vector of [4 x double]. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVAPD instruction. /// /// \param __p /// A 32-byte aligned pointer to a memory location containing /// double-precision floating point values. /// \returns A 256-bit vector of [4 x double] containing the moved values. static __inline __m256d __DEFAULT_FN_ATTRS _mm256_load_pd(double const *__p) { return *(const __m256d *)__p; } /// Loads 8 single-precision floating point values from a 32-byte aligned /// memory location pointed to by \a __p into a vector of [8 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVAPS instruction. /// /// \param __p /// A 32-byte aligned pointer to a memory location containing float values. /// \returns A 256-bit vector of [8 x float] containing the moved values. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_load_ps(float const *__p) { return *(const __m256 *)__p; } /// Loads 4 double-precision floating point values from an unaligned /// memory location pointed to by \a __p into a vector of [4 x double]. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVUPD instruction. /// /// \param __p /// A pointer to a memory location containing double-precision floating /// point values. /// \returns A 256-bit vector of [4 x double] containing the moved values. static __inline __m256d __DEFAULT_FN_ATTRS _mm256_loadu_pd(double const *__p) { struct __loadu_pd { __m256d_u __v; } __attribute__((__packed__, __may_alias__)); return ((const struct __loadu_pd*)__p)->__v; } /// Loads 8 single-precision floating point values from an unaligned /// memory location pointed to by \a __p into a vector of [8 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVUPS instruction. /// /// \param __p /// A pointer to a memory location containing single-precision floating /// point values. /// \returns A 256-bit vector of [8 x float] containing the moved values. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_loadu_ps(float const *__p) { struct __loadu_ps { __m256_u __v; } __attribute__((__packed__, __may_alias__)); return ((const struct __loadu_ps*)__p)->__v; } /// Loads 256 bits of integer data from a 32-byte aligned memory /// location pointed to by \a __p into elements of a 256-bit integer vector. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVDQA instruction. /// /// \param __p /// A 32-byte aligned pointer to a 256-bit integer vector containing integer /// values. /// \returns A 256-bit integer vector containing the moved values. static __inline __m256i __DEFAULT_FN_ATTRS _mm256_load_si256(__m256i const *__p) { return *__p; } /// Loads 256 bits of integer data from an unaligned memory location /// pointed to by \a __p into a 256-bit integer vector. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVDQU instruction. /// /// \param __p /// A pointer to a 256-bit integer vector containing integer values. /// \returns A 256-bit integer vector containing the moved values. static __inline __m256i __DEFAULT_FN_ATTRS _mm256_loadu_si256(__m256i_u const *__p) { struct __loadu_si256 { __m256i_u __v; } __attribute__((__packed__, __may_alias__)); return ((const struct __loadu_si256*)__p)->__v; } /// Loads 256 bits of integer data from an unaligned memory location /// pointed to by \a __p into a 256-bit integer vector. This intrinsic may /// perform better than \c _mm256_loadu_si256 when the data crosses a cache /// line boundary. /// /// \headerfile /// /// This intrinsic corresponds to the VLDDQU instruction. /// /// \param __p /// A pointer to a 256-bit integer vector containing integer values. /// \returns A 256-bit integer vector containing the moved values. static __inline __m256i __DEFAULT_FN_ATTRS _mm256_lddqu_si256(__m256i_u const *__p) { return (__m256i)__builtin_ia32_lddqu256((char const *)__p); } /* SIMD store ops */ /// Stores double-precision floating point values from a 256-bit vector /// of [4 x double] to a 32-byte aligned memory location pointed to by /// \a __p. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVAPD instruction. /// /// \param __p /// A 32-byte aligned pointer to a memory location that will receive the /// double-precision floaing point values. /// \param __a /// A 256-bit vector of [4 x double] containing the values to be moved. static __inline void __DEFAULT_FN_ATTRS _mm256_store_pd(double *__p, __m256d __a) { *(__m256d *)__p = __a; } /// Stores single-precision floating point values from a 256-bit vector /// of [8 x float] to a 32-byte aligned memory location pointed to by \a __p. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVAPS instruction. /// /// \param __p /// A 32-byte aligned pointer to a memory location that will receive the /// float values. /// \param __a /// A 256-bit vector of [8 x float] containing the values to be moved. static __inline void __DEFAULT_FN_ATTRS _mm256_store_ps(float *__p, __m256 __a) { *(__m256 *)__p = __a; } /// Stores double-precision floating point values from a 256-bit vector /// of [4 x double] to an unaligned memory location pointed to by \a __p. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVUPD instruction. /// /// \param __p /// A pointer to a memory location that will receive the double-precision /// floating point values. /// \param __a /// A 256-bit vector of [4 x double] containing the values to be moved. static __inline void __DEFAULT_FN_ATTRS _mm256_storeu_pd(double *__p, __m256d __a) { struct __storeu_pd { __m256d_u __v; } __attribute__((__packed__, __may_alias__)); ((struct __storeu_pd*)__p)->__v = __a; } /// Stores single-precision floating point values from a 256-bit vector /// of [8 x float] to an unaligned memory location pointed to by \a __p. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVUPS instruction. /// /// \param __p /// A pointer to a memory location that will receive the float values. /// \param __a /// A 256-bit vector of [8 x float] containing the values to be moved. static __inline void __DEFAULT_FN_ATTRS _mm256_storeu_ps(float *__p, __m256 __a) { struct __storeu_ps { __m256_u __v; } __attribute__((__packed__, __may_alias__)); ((struct __storeu_ps*)__p)->__v = __a; } /// Stores integer values from a 256-bit integer vector to a 32-byte /// aligned memory location pointed to by \a __p. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVDQA instruction. /// /// \param __p /// A 32-byte aligned pointer to a memory location that will receive the /// integer values. /// \param __a /// A 256-bit integer vector containing the values to be moved. static __inline void __DEFAULT_FN_ATTRS _mm256_store_si256(__m256i *__p, __m256i __a) { *__p = __a; } /// Stores integer values from a 256-bit integer vector to an unaligned /// memory location pointed to by \a __p. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVDQU instruction. /// /// \param __p /// A pointer to a memory location that will receive the integer values. /// \param __a /// A 256-bit integer vector containing the values to be moved. static __inline void __DEFAULT_FN_ATTRS _mm256_storeu_si256(__m256i_u *__p, __m256i __a) { struct __storeu_si256 { __m256i_u __v; } __attribute__((__packed__, __may_alias__)); ((struct __storeu_si256*)__p)->__v = __a; } /* Conditional load ops */ /// Conditionally loads double-precision floating point elements from a /// memory location pointed to by \a __p into a 128-bit vector of /// [2 x double], depending on the mask bits associated with each data /// element. /// /// \headerfile /// /// This intrinsic corresponds to the VMASKMOVPD instruction. /// /// \param __p /// A pointer to a memory location that contains the double-precision /// floating point values. /// \param __m /// A 128-bit integer vector containing the mask. The most significant bit of /// each data element represents the mask bits. If a mask bit is zero, the /// corresponding value in the memory location is not loaded and the /// corresponding field in the return value is set to zero. /// \returns A 128-bit vector of [2 x double] containing the loaded values. static __inline __m128d __DEFAULT_FN_ATTRS128 _mm_maskload_pd(double const *__p, __m128i __m) { return (__m128d)__builtin_ia32_maskloadpd((const __v2df *)__p, (__v2di)__m); } /// Conditionally loads double-precision floating point elements from a /// memory location pointed to by \a __p into a 256-bit vector of /// [4 x double], depending on the mask bits associated with each data /// element. /// /// \headerfile /// /// This intrinsic corresponds to the VMASKMOVPD instruction. /// /// \param __p /// A pointer to a memory location that contains the double-precision /// floating point values. /// \param __m /// A 256-bit integer vector of [4 x quadword] containing the mask. The most /// significant bit of each quadword element represents the mask bits. If a /// mask bit is zero, the corresponding value in the memory location is not /// loaded and the corresponding field in the return value is set to zero. /// \returns A 256-bit vector of [4 x double] containing the loaded values. static __inline __m256d __DEFAULT_FN_ATTRS _mm256_maskload_pd(double const *__p, __m256i __m) { return (__m256d)__builtin_ia32_maskloadpd256((const __v4df *)__p, (__v4di)__m); } /// Conditionally loads single-precision floating point elements from a /// memory location pointed to by \a __p into a 128-bit vector of /// [4 x float], depending on the mask bits associated with each data /// element. /// /// \headerfile /// /// This intrinsic corresponds to the VMASKMOVPS instruction. /// /// \param __p /// A pointer to a memory location that contains the single-precision /// floating point values. /// \param __m /// A 128-bit integer vector containing the mask. The most significant bit of /// each data element represents the mask bits. If a mask bit is zero, the /// corresponding value in the memory location is not loaded and the /// corresponding field in the return value is set to zero. /// \returns A 128-bit vector of [4 x float] containing the loaded values. static __inline __m128 __DEFAULT_FN_ATTRS128 _mm_maskload_ps(float const *__p, __m128i __m) { return (__m128)__builtin_ia32_maskloadps((const __v4sf *)__p, (__v4si)__m); } /// Conditionally loads single-precision floating point elements from a /// memory location pointed to by \a __p into a 256-bit vector of /// [8 x float], depending on the mask bits associated with each data /// element. /// /// \headerfile /// /// This intrinsic corresponds to the VMASKMOVPS instruction. /// /// \param __p /// A pointer to a memory location that contains the single-precision /// floating point values. /// \param __m /// A 256-bit integer vector of [8 x dword] containing the mask. The most /// significant bit of each dword element represents the mask bits. If a mask /// bit is zero, the corresponding value in the memory location is not loaded /// and the corresponding field in the return value is set to zero. /// \returns A 256-bit vector of [8 x float] containing the loaded values. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_maskload_ps(float const *__p, __m256i __m) { return (__m256)__builtin_ia32_maskloadps256((const __v8sf *)__p, (__v8si)__m); } /* Conditional store ops */ /// Moves single-precision floating point values from a 256-bit vector /// of [8 x float] to a memory location pointed to by \a __p, according to /// the specified mask. /// /// \headerfile /// /// This intrinsic corresponds to the VMASKMOVPS instruction. /// /// \param __p /// A pointer to a memory location that will receive the float values. /// \param __m /// A 256-bit integer vector of [8 x dword] containing the mask. The most /// significant bit of each dword element in the mask vector represents the /// mask bits. If a mask bit is zero, the corresponding value from vector /// \a __a is not stored and the corresponding field in the memory location /// pointed to by \a __p is not changed. /// \param __a /// A 256-bit vector of [8 x float] containing the values to be stored. static __inline void __DEFAULT_FN_ATTRS _mm256_maskstore_ps(float *__p, __m256i __m, __m256 __a) { __builtin_ia32_maskstoreps256((__v8sf *)__p, (__v8si)__m, (__v8sf)__a); } /// Moves double-precision values from a 128-bit vector of [2 x double] /// to a memory location pointed to by \a __p, according to the specified /// mask. /// /// \headerfile /// /// This intrinsic corresponds to the VMASKMOVPD instruction. /// /// \param __p /// A pointer to a memory location that will receive the float values. /// \param __m /// A 128-bit integer vector containing the mask. The most significant bit of /// each field in the mask vector represents the mask bits. If a mask bit is /// zero, the corresponding value from vector \a __a is not stored and the /// corresponding field in the memory location pointed to by \a __p is not /// changed. /// \param __a /// A 128-bit vector of [2 x double] containing the values to be stored. static __inline void __DEFAULT_FN_ATTRS128 _mm_maskstore_pd(double *__p, __m128i __m, __m128d __a) { __builtin_ia32_maskstorepd((__v2df *)__p, (__v2di)__m, (__v2df)__a); } /// Moves double-precision values from a 256-bit vector of [4 x double] /// to a memory location pointed to by \a __p, according to the specified /// mask. /// /// \headerfile /// /// This intrinsic corresponds to the VMASKMOVPD instruction. /// /// \param __p /// A pointer to a memory location that will receive the float values. /// \param __m /// A 256-bit integer vector of [4 x quadword] containing the mask. The most /// significant bit of each quadword element in the mask vector represents /// the mask bits. If a mask bit is zero, the corresponding value from vector /// __a is not stored and the corresponding field in the memory location /// pointed to by \a __p is not changed. /// \param __a /// A 256-bit vector of [4 x double] containing the values to be stored. static __inline void __DEFAULT_FN_ATTRS _mm256_maskstore_pd(double *__p, __m256i __m, __m256d __a) { __builtin_ia32_maskstorepd256((__v4df *)__p, (__v4di)__m, (__v4df)__a); } /// Moves single-precision floating point values from a 128-bit vector /// of [4 x float] to a memory location pointed to by \a __p, according to /// the specified mask. /// /// \headerfile /// /// This intrinsic corresponds to the VMASKMOVPS instruction. /// /// \param __p /// A pointer to a memory location that will receive the float values. /// \param __m /// A 128-bit integer vector containing the mask. The most significant bit of /// each field in the mask vector represents the mask bits. If a mask bit is /// zero, the corresponding value from vector __a is not stored and the /// corresponding field in the memory location pointed to by \a __p is not /// changed. /// \param __a /// A 128-bit vector of [4 x float] containing the values to be stored. static __inline void __DEFAULT_FN_ATTRS128 _mm_maskstore_ps(float *__p, __m128i __m, __m128 __a) { __builtin_ia32_maskstoreps((__v4sf *)__p, (__v4si)__m, (__v4sf)__a); } /* Cacheability support ops */ /// Moves integer data from a 256-bit integer vector to a 32-byte /// aligned memory location. To minimize caching, the data is flagged as /// non-temporal (unlikely to be used again soon). /// /// \headerfile /// /// This intrinsic corresponds to the VMOVNTDQ instruction. /// /// \param __a /// A pointer to a 32-byte aligned memory location that will receive the /// integer values. /// \param __b /// A 256-bit integer vector containing the values to be moved. static __inline void __DEFAULT_FN_ATTRS _mm256_stream_si256(void *__a, __m256i __b) { typedef __v4di __v4di_aligned __attribute__((aligned(32))); __builtin_nontemporal_store((__v4di_aligned)__b, (__v4di_aligned*)__a); } /// Moves double-precision values from a 256-bit vector of [4 x double] /// to a 32-byte aligned memory location. To minimize caching, the data is /// flagged as non-temporal (unlikely to be used again soon). /// /// \headerfile /// /// This intrinsic corresponds to the VMOVNTPD instruction. /// /// \param __a /// A pointer to a 32-byte aligned memory location that will receive the /// double-precision floating-point values. /// \param __b /// A 256-bit vector of [4 x double] containing the values to be moved. static __inline void __DEFAULT_FN_ATTRS _mm256_stream_pd(void *__a, __m256d __b) { typedef __v4df __v4df_aligned __attribute__((aligned(32))); __builtin_nontemporal_store((__v4df_aligned)__b, (__v4df_aligned*)__a); } /// Moves single-precision floating point values from a 256-bit vector /// of [8 x float] to a 32-byte aligned memory location. To minimize /// caching, the data is flagged as non-temporal (unlikely to be used again /// soon). /// /// \headerfile /// /// This intrinsic corresponds to the VMOVNTPS instruction. /// /// \param __p /// A pointer to a 32-byte aligned memory location that will receive the /// single-precision floating point values. /// \param __a /// A 256-bit vector of [8 x float] containing the values to be moved. static __inline void __DEFAULT_FN_ATTRS _mm256_stream_ps(void *__p, __m256 __a) { typedef __v8sf __v8sf_aligned __attribute__((aligned(32))); __builtin_nontemporal_store((__v8sf_aligned)__a, (__v8sf_aligned*)__p); } /* Create vectors */ /// Create a 256-bit vector of [4 x double] with undefined values. /// /// \headerfile /// /// This intrinsic has no corresponding instruction. /// /// \returns A 256-bit vector of [4 x double] containing undefined values. static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_undefined_pd(void) { return (__m256d)__builtin_ia32_undef256(); } /// Create a 256-bit vector of [8 x float] with undefined values. /// /// \headerfile /// /// This intrinsic has no corresponding instruction. /// /// \returns A 256-bit vector of [8 x float] containing undefined values. static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_undefined_ps(void) { return (__m256)__builtin_ia32_undef256(); } /// Create a 256-bit integer vector with undefined values. /// /// \headerfile /// /// This intrinsic has no corresponding instruction. /// /// \returns A 256-bit integer vector containing undefined values. static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_undefined_si256(void) { return (__m256i)__builtin_ia32_undef256(); } /// Constructs a 256-bit floating-point vector of [4 x double] /// initialized with the specified double-precision floating-point values. /// /// \headerfile /// /// This intrinsic corresponds to the VUNPCKLPD+VINSERTF128 /// instruction. /// /// \param __a /// A double-precision floating-point value used to initialize bits [255:192] /// of the result. /// \param __b /// A double-precision floating-point value used to initialize bits [191:128] /// of the result. /// \param __c /// A double-precision floating-point value used to initialize bits [127:64] /// of the result. /// \param __d /// A double-precision floating-point value used to initialize bits [63:0] /// of the result. /// \returns An initialized 256-bit floating-point vector of [4 x double]. static __inline __m256d __DEFAULT_FN_ATTRS _mm256_set_pd(double __a, double __b, double __c, double __d) { return __extension__ (__m256d){ __d, __c, __b, __a }; } /// Constructs a 256-bit floating-point vector of [8 x float] initialized /// with the specified single-precision floating-point values. /// /// \headerfile /// /// This intrinsic is a utility function and does not correspond to a specific /// instruction. /// /// \param __a /// A single-precision floating-point value used to initialize bits [255:224] /// of the result. /// \param __b /// A single-precision floating-point value used to initialize bits [223:192] /// of the result. /// \param __c /// A single-precision floating-point value used to initialize bits [191:160] /// of the result. /// \param __d /// A single-precision floating-point value used to initialize bits [159:128] /// of the result. /// \param __e /// A single-precision floating-point value used to initialize bits [127:96] /// of the result. /// \param __f /// A single-precision floating-point value used to initialize bits [95:64] /// of the result. /// \param __g /// A single-precision floating-point value used to initialize bits [63:32] /// of the result. /// \param __h /// A single-precision floating-point value used to initialize bits [31:0] /// of the result. /// \returns An initialized 256-bit floating-point vector of [8 x float]. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_set_ps(float __a, float __b, float __c, float __d, float __e, float __f, float __g, float __h) { return __extension__ (__m256){ __h, __g, __f, __e, __d, __c, __b, __a }; } /// Constructs a 256-bit integer vector initialized with the specified /// 32-bit integral values. /// /// \headerfile /// /// This intrinsic is a utility function and does not correspond to a specific /// instruction. /// /// \param __i0 /// A 32-bit integral value used to initialize bits [255:224] of the result. /// \param __i1 /// A 32-bit integral value used to initialize bits [223:192] of the result. /// \param __i2 /// A 32-bit integral value used to initialize bits [191:160] of the result. /// \param __i3 /// A 32-bit integral value used to initialize bits [159:128] of the result. /// \param __i4 /// A 32-bit integral value used to initialize bits [127:96] of the result. /// \param __i5 /// A 32-bit integral value used to initialize bits [95:64] of the result. /// \param __i6 /// A 32-bit integral value used to initialize bits [63:32] of the result. /// \param __i7 /// A 32-bit integral value used to initialize bits [31:0] of the result. /// \returns An initialized 256-bit integer vector. static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set_epi32(int __i0, int __i1, int __i2, int __i3, int __i4, int __i5, int __i6, int __i7) { return __extension__ (__m256i)(__v8si){ __i7, __i6, __i5, __i4, __i3, __i2, __i1, __i0 }; } /// Constructs a 256-bit integer vector initialized with the specified /// 16-bit integral values. /// /// \headerfile /// /// This intrinsic is a utility function and does not correspond to a specific /// instruction. /// /// \param __w15 /// A 16-bit integral value used to initialize bits [255:240] of the result. /// \param __w14 /// A 16-bit integral value used to initialize bits [239:224] of the result. /// \param __w13 /// A 16-bit integral value used to initialize bits [223:208] of the result. /// \param __w12 /// A 16-bit integral value used to initialize bits [207:192] of the result. /// \param __w11 /// A 16-bit integral value used to initialize bits [191:176] of the result. /// \param __w10 /// A 16-bit integral value used to initialize bits [175:160] of the result. /// \param __w09 /// A 16-bit integral value used to initialize bits [159:144] of the result. /// \param __w08 /// A 16-bit integral value used to initialize bits [143:128] of the result. /// \param __w07 /// A 16-bit integral value used to initialize bits [127:112] of the result. /// \param __w06 /// A 16-bit integral value used to initialize bits [111:96] of the result. /// \param __w05 /// A 16-bit integral value used to initialize bits [95:80] of the result. /// \param __w04 /// A 16-bit integral value used to initialize bits [79:64] of the result. /// \param __w03 /// A 16-bit integral value used to initialize bits [63:48] of the result. /// \param __w02 /// A 16-bit integral value used to initialize bits [47:32] of the result. /// \param __w01 /// A 16-bit integral value used to initialize bits [31:16] of the result. /// \param __w00 /// A 16-bit integral value used to initialize bits [15:0] of the result. /// \returns An initialized 256-bit integer vector. static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set_epi16(short __w15, short __w14, short __w13, short __w12, short __w11, short __w10, short __w09, short __w08, short __w07, short __w06, short __w05, short __w04, short __w03, short __w02, short __w01, short __w00) { return __extension__ (__m256i)(__v16hi){ __w00, __w01, __w02, __w03, __w04, __w05, __w06, __w07, __w08, __w09, __w10, __w11, __w12, __w13, __w14, __w15 }; } /// Constructs a 256-bit integer vector initialized with the specified /// 8-bit integral values. /// /// \headerfile /// /// This intrinsic is a utility function and does not correspond to a specific /// instruction. /// /// \param __b31 /// An 8-bit integral value used to initialize bits [255:248] of the result. /// \param __b30 /// An 8-bit integral value used to initialize bits [247:240] of the result. /// \param __b29 /// An 8-bit integral value used to initialize bits [239:232] of the result. /// \param __b28 /// An 8-bit integral value used to initialize bits [231:224] of the result. /// \param __b27 /// An 8-bit integral value used to initialize bits [223:216] of the result. /// \param __b26 /// An 8-bit integral value used to initialize bits [215:208] of the result. /// \param __b25 /// An 8-bit integral value used to initialize bits [207:200] of the result. /// \param __b24 /// An 8-bit integral value used to initialize bits [199:192] of the result. /// \param __b23 /// An 8-bit integral value used to initialize bits [191:184] of the result. /// \param __b22 /// An 8-bit integral value used to initialize bits [183:176] of the result. /// \param __b21 /// An 8-bit integral value used to initialize bits [175:168] of the result. /// \param __b20 /// An 8-bit integral value used to initialize bits [167:160] of the result. /// \param __b19 /// An 8-bit integral value used to initialize bits [159:152] of the result. /// \param __b18 /// An 8-bit integral value used to initialize bits [151:144] of the result. /// \param __b17 /// An 8-bit integral value used to initialize bits [143:136] of the result. /// \param __b16 /// An 8-bit integral value used to initialize bits [135:128] of the result. /// \param __b15 /// An 8-bit integral value used to initialize bits [127:120] of the result. /// \param __b14 /// An 8-bit integral value used to initialize bits [119:112] of the result. /// \param __b13 /// An 8-bit integral value used to initialize bits [111:104] of the result. /// \param __b12 /// An 8-bit integral value used to initialize bits [103:96] of the result. /// \param __b11 /// An 8-bit integral value used to initialize bits [95:88] of the result. /// \param __b10 /// An 8-bit integral value used to initialize bits [87:80] of the result. /// \param __b09 /// An 8-bit integral value used to initialize bits [79:72] of the result. /// \param __b08 /// An 8-bit integral value used to initialize bits [71:64] of the result. /// \param __b07 /// An 8-bit integral value used to initialize bits [63:56] of the result. /// \param __b06 /// An 8-bit integral value used to initialize bits [55:48] of the result. /// \param __b05 /// An 8-bit integral value used to initialize bits [47:40] of the result. /// \param __b04 /// An 8-bit integral value used to initialize bits [39:32] of the result. /// \param __b03 /// An 8-bit integral value used to initialize bits [31:24] of the result. /// \param __b02 /// An 8-bit integral value used to initialize bits [23:16] of the result. /// \param __b01 /// An 8-bit integral value used to initialize bits [15:8] of the result. /// \param __b00 /// An 8-bit integral value used to initialize bits [7:0] of the result. /// \returns An initialized 256-bit integer vector. static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set_epi8(char __b31, char __b30, char __b29, char __b28, char __b27, char __b26, char __b25, char __b24, char __b23, char __b22, char __b21, char __b20, char __b19, char __b18, char __b17, char __b16, char __b15, char __b14, char __b13, char __b12, char __b11, char __b10, char __b09, char __b08, char __b07, char __b06, char __b05, char __b04, char __b03, char __b02, char __b01, char __b00) { return __extension__ (__m256i)(__v32qi){ __b00, __b01, __b02, __b03, __b04, __b05, __b06, __b07, __b08, __b09, __b10, __b11, __b12, __b13, __b14, __b15, __b16, __b17, __b18, __b19, __b20, __b21, __b22, __b23, __b24, __b25, __b26, __b27, __b28, __b29, __b30, __b31 }; } /// Constructs a 256-bit integer vector initialized with the specified /// 64-bit integral values. /// /// \headerfile /// /// This intrinsic corresponds to the VPUNPCKLQDQ+VINSERTF128 /// instruction. /// /// \param __a /// A 64-bit integral value used to initialize bits [255:192] of the result. /// \param __b /// A 64-bit integral value used to initialize bits [191:128] of the result. /// \param __c /// A 64-bit integral value used to initialize bits [127:64] of the result. /// \param __d /// A 64-bit integral value used to initialize bits [63:0] of the result. /// \returns An initialized 256-bit integer vector. static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set_epi64x(long long __a, long long __b, long long __c, long long __d) { return __extension__ (__m256i)(__v4di){ __d, __c, __b, __a }; } /* Create vectors with elements in reverse order */ /// Constructs a 256-bit floating-point vector of [4 x double], /// initialized in reverse order with the specified double-precision /// floating-point values. /// /// \headerfile /// /// This intrinsic corresponds to the VUNPCKLPD+VINSERTF128 /// instruction. /// /// \param __a /// A double-precision floating-point value used to initialize bits [63:0] /// of the result. /// \param __b /// A double-precision floating-point value used to initialize bits [127:64] /// of the result. /// \param __c /// A double-precision floating-point value used to initialize bits [191:128] /// of the result. /// \param __d /// A double-precision floating-point value used to initialize bits [255:192] /// of the result. /// \returns An initialized 256-bit floating-point vector of [4 x double]. static __inline __m256d __DEFAULT_FN_ATTRS _mm256_setr_pd(double __a, double __b, double __c, double __d) { return _mm256_set_pd(__d, __c, __b, __a); } /// Constructs a 256-bit floating-point vector of [8 x float], /// initialized in reverse order with the specified single-precision /// float-point values. /// /// \headerfile /// /// This intrinsic is a utility function and does not correspond to a specific /// instruction. /// /// \param __a /// A single-precision floating-point value used to initialize bits [31:0] /// of the result. /// \param __b /// A single-precision floating-point value used to initialize bits [63:32] /// of the result. /// \param __c /// A single-precision floating-point value used to initialize bits [95:64] /// of the result. /// \param __d /// A single-precision floating-point value used to initialize bits [127:96] /// of the result. /// \param __e /// A single-precision floating-point value used to initialize bits [159:128] /// of the result. /// \param __f /// A single-precision floating-point value used to initialize bits [191:160] /// of the result. /// \param __g /// A single-precision floating-point value used to initialize bits [223:192] /// of the result. /// \param __h /// A single-precision floating-point value used to initialize bits [255:224] /// of the result. /// \returns An initialized 256-bit floating-point vector of [8 x float]. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_setr_ps(float __a, float __b, float __c, float __d, float __e, float __f, float __g, float __h) { return _mm256_set_ps(__h, __g, __f, __e, __d, __c, __b, __a); } /// Constructs a 256-bit integer vector, initialized in reverse order /// with the specified 32-bit integral values. /// /// \headerfile /// /// This intrinsic is a utility function and does not correspond to a specific /// instruction. /// /// \param __i0 /// A 32-bit integral value used to initialize bits [31:0] of the result. /// \param __i1 /// A 32-bit integral value used to initialize bits [63:32] of the result. /// \param __i2 /// A 32-bit integral value used to initialize bits [95:64] of the result. /// \param __i3 /// A 32-bit integral value used to initialize bits [127:96] of the result. /// \param __i4 /// A 32-bit integral value used to initialize bits [159:128] of the result. /// \param __i5 /// A 32-bit integral value used to initialize bits [191:160] of the result. /// \param __i6 /// A 32-bit integral value used to initialize bits [223:192] of the result. /// \param __i7 /// A 32-bit integral value used to initialize bits [255:224] of the result. /// \returns An initialized 256-bit integer vector. static __inline __m256i __DEFAULT_FN_ATTRS _mm256_setr_epi32(int __i0, int __i1, int __i2, int __i3, int __i4, int __i5, int __i6, int __i7) { return _mm256_set_epi32(__i7, __i6, __i5, __i4, __i3, __i2, __i1, __i0); } /// Constructs a 256-bit integer vector, initialized in reverse order /// with the specified 16-bit integral values. /// /// \headerfile /// /// This intrinsic is a utility function and does not correspond to a specific /// instruction. /// /// \param __w15 /// A 16-bit integral value used to initialize bits [15:0] of the result. /// \param __w14 /// A 16-bit integral value used to initialize bits [31:16] of the result. /// \param __w13 /// A 16-bit integral value used to initialize bits [47:32] of the result. /// \param __w12 /// A 16-bit integral value used to initialize bits [63:48] of the result. /// \param __w11 /// A 16-bit integral value used to initialize bits [79:64] of the result. /// \param __w10 /// A 16-bit integral value used to initialize bits [95:80] of the result. /// \param __w09 /// A 16-bit integral value used to initialize bits [111:96] of the result. /// \param __w08 /// A 16-bit integral value used to initialize bits [127:112] of the result. /// \param __w07 /// A 16-bit integral value used to initialize bits [143:128] of the result. /// \param __w06 /// A 16-bit integral value used to initialize bits [159:144] of the result. /// \param __w05 /// A 16-bit integral value used to initialize bits [175:160] of the result. /// \param __w04 /// A 16-bit integral value used to initialize bits [191:176] of the result. /// \param __w03 /// A 16-bit integral value used to initialize bits [207:192] of the result. /// \param __w02 /// A 16-bit integral value used to initialize bits [223:208] of the result. /// \param __w01 /// A 16-bit integral value used to initialize bits [239:224] of the result. /// \param __w00 /// A 16-bit integral value used to initialize bits [255:240] of the result. /// \returns An initialized 256-bit integer vector. static __inline __m256i __DEFAULT_FN_ATTRS _mm256_setr_epi16(short __w15, short __w14, short __w13, short __w12, short __w11, short __w10, short __w09, short __w08, short __w07, short __w06, short __w05, short __w04, short __w03, short __w02, short __w01, short __w00) { return _mm256_set_epi16(__w00, __w01, __w02, __w03, __w04, __w05, __w06, __w07, __w08, __w09, __w10, __w11, __w12, __w13, __w14, __w15); } /// Constructs a 256-bit integer vector, initialized in reverse order /// with the specified 8-bit integral values. /// /// \headerfile /// /// This intrinsic is a utility function and does not correspond to a specific /// instruction. /// /// \param __b31 /// An 8-bit integral value used to initialize bits [7:0] of the result. /// \param __b30 /// An 8-bit integral value used to initialize bits [15:8] of the result. /// \param __b29 /// An 8-bit integral value used to initialize bits [23:16] of the result. /// \param __b28 /// An 8-bit integral value used to initialize bits [31:24] of the result. /// \param __b27 /// An 8-bit integral value used to initialize bits [39:32] of the result. /// \param __b26 /// An 8-bit integral value used to initialize bits [47:40] of the result. /// \param __b25 /// An 8-bit integral value used to initialize bits [55:48] of the result. /// \param __b24 /// An 8-bit integral value used to initialize bits [63:56] of the result. /// \param __b23 /// An 8-bit integral value used to initialize bits [71:64] of the result. /// \param __b22 /// An 8-bit integral value used to initialize bits [79:72] of the result. /// \param __b21 /// An 8-bit integral value used to initialize bits [87:80] of the result. /// \param __b20 /// An 8-bit integral value used to initialize bits [95:88] of the result. /// \param __b19 /// An 8-bit integral value used to initialize bits [103:96] of the result. /// \param __b18 /// An 8-bit integral value used to initialize bits [111:104] of the result. /// \param __b17 /// An 8-bit integral value used to initialize bits [119:112] of the result. /// \param __b16 /// An 8-bit integral value used to initialize bits [127:120] of the result. /// \param __b15 /// An 8-bit integral value used to initialize bits [135:128] of the result. /// \param __b14 /// An 8-bit integral value used to initialize bits [143:136] of the result. /// \param __b13 /// An 8-bit integral value used to initialize bits [151:144] of the result. /// \param __b12 /// An 8-bit integral value used to initialize bits [159:152] of the result. /// \param __b11 /// An 8-bit integral value used to initialize bits [167:160] of the result. /// \param __b10 /// An 8-bit integral value used to initialize bits [175:168] of the result. /// \param __b09 /// An 8-bit integral value used to initialize bits [183:176] of the result. /// \param __b08 /// An 8-bit integral value used to initialize bits [191:184] of the result. /// \param __b07 /// An 8-bit integral value used to initialize bits [199:192] of the result. /// \param __b06 /// An 8-bit integral value used to initialize bits [207:200] of the result. /// \param __b05 /// An 8-bit integral value used to initialize bits [215:208] of the result. /// \param __b04 /// An 8-bit integral value used to initialize bits [223:216] of the result. /// \param __b03 /// An 8-bit integral value used to initialize bits [231:224] of the result. /// \param __b02 /// An 8-bit integral value used to initialize bits [239:232] of the result. /// \param __b01 /// An 8-bit integral value used to initialize bits [247:240] of the result. /// \param __b00 /// An 8-bit integral value used to initialize bits [255:248] of the result. /// \returns An initialized 256-bit integer vector. static __inline __m256i __DEFAULT_FN_ATTRS _mm256_setr_epi8(char __b31, char __b30, char __b29, char __b28, char __b27, char __b26, char __b25, char __b24, char __b23, char __b22, char __b21, char __b20, char __b19, char __b18, char __b17, char __b16, char __b15, char __b14, char __b13, char __b12, char __b11, char __b10, char __b09, char __b08, char __b07, char __b06, char __b05, char __b04, char __b03, char __b02, char __b01, char __b00) { return _mm256_set_epi8(__b00, __b01, __b02, __b03, __b04, __b05, __b06, __b07, __b08, __b09, __b10, __b11, __b12, __b13, __b14, __b15, __b16, __b17, __b18, __b19, __b20, __b21, __b22, __b23, __b24, __b25, __b26, __b27, __b28, __b29, __b30, __b31); } /// Constructs a 256-bit integer vector, initialized in reverse order /// with the specified 64-bit integral values. /// /// \headerfile /// /// This intrinsic corresponds to the VPUNPCKLQDQ+VINSERTF128 /// instruction. /// /// \param __a /// A 64-bit integral value used to initialize bits [63:0] of the result. /// \param __b /// A 64-bit integral value used to initialize bits [127:64] of the result. /// \param __c /// A 64-bit integral value used to initialize bits [191:128] of the result. /// \param __d /// A 64-bit integral value used to initialize bits [255:192] of the result. /// \returns An initialized 256-bit integer vector. static __inline __m256i __DEFAULT_FN_ATTRS _mm256_setr_epi64x(long long __a, long long __b, long long __c, long long __d) { return _mm256_set_epi64x(__d, __c, __b, __a); } /* Create vectors with repeated elements */ /// Constructs a 256-bit floating-point vector of [4 x double], with each /// of the four double-precision floating-point vector elements set to the /// specified double-precision floating-point value. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVDDUP+VINSERTF128 instruction. /// /// \param __w /// A double-precision floating-point value used to initialize each vector /// element of the result. /// \returns An initialized 256-bit floating-point vector of [4 x double]. static __inline __m256d __DEFAULT_FN_ATTRS _mm256_set1_pd(double __w) { return _mm256_set_pd(__w, __w, __w, __w); } /// Constructs a 256-bit floating-point vector of [8 x float], with each /// of the eight single-precision floating-point vector elements set to the /// specified single-precision floating-point value. /// /// \headerfile /// /// This intrinsic corresponds to the VPERMILPS+VINSERTF128 /// instruction. /// /// \param __w /// A single-precision floating-point value used to initialize each vector /// element of the result. /// \returns An initialized 256-bit floating-point vector of [8 x float]. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_set1_ps(float __w) { return _mm256_set_ps(__w, __w, __w, __w, __w, __w, __w, __w); } /// Constructs a 256-bit integer vector of [8 x i32], with each of the /// 32-bit integral vector elements set to the specified 32-bit integral /// value. /// /// \headerfile /// /// This intrinsic corresponds to the VPERMILPS+VINSERTF128 /// instruction. /// /// \param __i /// A 32-bit integral value used to initialize each vector element of the /// result. /// \returns An initialized 256-bit integer vector of [8 x i32]. static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set1_epi32(int __i) { return _mm256_set_epi32(__i, __i, __i, __i, __i, __i, __i, __i); } /// Constructs a 256-bit integer vector of [16 x i16], with each of the /// 16-bit integral vector elements set to the specified 16-bit integral /// value. /// /// \headerfile /// /// This intrinsic corresponds to the VPSHUFB+VINSERTF128 instruction. /// /// \param __w /// A 16-bit integral value used to initialize each vector element of the /// result. /// \returns An initialized 256-bit integer vector of [16 x i16]. static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set1_epi16(short __w) { return _mm256_set_epi16(__w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w); } /// Constructs a 256-bit integer vector of [32 x i8], with each of the /// 8-bit integral vector elements set to the specified 8-bit integral value. /// /// \headerfile /// /// This intrinsic corresponds to the VPSHUFB+VINSERTF128 instruction. /// /// \param __b /// An 8-bit integral value used to initialize each vector element of the /// result. /// \returns An initialized 256-bit integer vector of [32 x i8]. static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set1_epi8(char __b) { return _mm256_set_epi8(__b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b); } /// Constructs a 256-bit integer vector of [4 x i64], with each of the /// 64-bit integral vector elements set to the specified 64-bit integral /// value. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVDDUP+VINSERTF128 instruction. /// /// \param __q /// A 64-bit integral value used to initialize each vector element of the /// result. /// \returns An initialized 256-bit integer vector of [4 x i64]. static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set1_epi64x(long long __q) { return _mm256_set_epi64x(__q, __q, __q, __q); } /* Create __zeroed vectors */ /// Constructs a 256-bit floating-point vector of [4 x double] with all /// vector elements initialized to zero. /// /// \headerfile /// /// This intrinsic corresponds to the VXORPS instruction. /// /// \returns A 256-bit vector of [4 x double] with all elements set to zero. static __inline __m256d __DEFAULT_FN_ATTRS _mm256_setzero_pd(void) { return __extension__ (__m256d){ 0.0, 0.0, 0.0, 0.0 }; } /// Constructs a 256-bit floating-point vector of [8 x float] with all /// vector elements initialized to zero. /// /// \headerfile /// /// This intrinsic corresponds to the VXORPS instruction. /// /// \returns A 256-bit vector of [8 x float] with all elements set to zero. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_setzero_ps(void) { return __extension__ (__m256){ 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f }; } /// Constructs a 256-bit integer vector initialized to zero. /// /// \headerfile /// /// This intrinsic corresponds to the VXORPS instruction. /// /// \returns A 256-bit integer vector initialized to zero. static __inline __m256i __DEFAULT_FN_ATTRS _mm256_setzero_si256(void) { return __extension__ (__m256i)(__v4di){ 0, 0, 0, 0 }; } /* Cast between vector types */ /// Casts a 256-bit floating-point vector of [4 x double] into a 256-bit /// floating-point vector of [8 x float]. /// /// \headerfile /// /// This intrinsic has no corresponding instruction. /// /// \param __a /// A 256-bit floating-point vector of [4 x double]. /// \returns A 256-bit floating-point vector of [8 x float] containing the same /// bitwise pattern as the parameter. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_castpd_ps(__m256d __a) { return (__m256)__a; } /// Casts a 256-bit floating-point vector of [4 x double] into a 256-bit /// integer vector. /// /// \headerfile /// /// This intrinsic has no corresponding instruction. /// /// \param __a /// A 256-bit floating-point vector of [4 x double]. /// \returns A 256-bit integer vector containing the same bitwise pattern as the /// parameter. static __inline __m256i __DEFAULT_FN_ATTRS _mm256_castpd_si256(__m256d __a) { return (__m256i)__a; } /// Casts a 256-bit floating-point vector of [8 x float] into a 256-bit /// floating-point vector of [4 x double]. /// /// \headerfile /// /// This intrinsic has no corresponding instruction. /// /// \param __a /// A 256-bit floating-point vector of [8 x float]. /// \returns A 256-bit floating-point vector of [4 x double] containing the same /// bitwise pattern as the parameter. static __inline __m256d __DEFAULT_FN_ATTRS _mm256_castps_pd(__m256 __a) { return (__m256d)__a; } /// Casts a 256-bit floating-point vector of [8 x float] into a 256-bit /// integer vector. /// /// \headerfile /// /// This intrinsic has no corresponding instruction. /// /// \param __a /// A 256-bit floating-point vector of [8 x float]. /// \returns A 256-bit integer vector containing the same bitwise pattern as the /// parameter. static __inline __m256i __DEFAULT_FN_ATTRS _mm256_castps_si256(__m256 __a) { return (__m256i)__a; } /// Casts a 256-bit integer vector into a 256-bit floating-point vector /// of [8 x float]. /// /// \headerfile /// /// This intrinsic has no corresponding instruction. /// /// \param __a /// A 256-bit integer vector. /// \returns A 256-bit floating-point vector of [8 x float] containing the same /// bitwise pattern as the parameter. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_castsi256_ps(__m256i __a) { return (__m256)__a; } /// Casts a 256-bit integer vector into a 256-bit floating-point vector /// of [4 x double]. /// /// \headerfile /// /// This intrinsic has no corresponding instruction. /// /// \param __a /// A 256-bit integer vector. /// \returns A 256-bit floating-point vector of [4 x double] containing the same /// bitwise pattern as the parameter. static __inline __m256d __DEFAULT_FN_ATTRS _mm256_castsi256_pd(__m256i __a) { return (__m256d)__a; } /// Returns the lower 128 bits of a 256-bit floating-point vector of /// [4 x double] as a 128-bit floating-point vector of [2 x double]. /// /// \headerfile /// /// This intrinsic has no corresponding instruction. /// /// \param __a /// A 256-bit floating-point vector of [4 x double]. /// \returns A 128-bit floating-point vector of [2 x double] containing the /// lower 128 bits of the parameter. static __inline __m128d __DEFAULT_FN_ATTRS _mm256_castpd256_pd128(__m256d __a) { return __builtin_shufflevector((__v4df)__a, (__v4df)__a, 0, 1); } /// Returns the lower 128 bits of a 256-bit floating-point vector of /// [8 x float] as a 128-bit floating-point vector of [4 x float]. /// /// \headerfile /// /// This intrinsic has no corresponding instruction. /// /// \param __a /// A 256-bit floating-point vector of [8 x float]. /// \returns A 128-bit floating-point vector of [4 x float] containing the /// lower 128 bits of the parameter. static __inline __m128 __DEFAULT_FN_ATTRS _mm256_castps256_ps128(__m256 __a) { return __builtin_shufflevector((__v8sf)__a, (__v8sf)__a, 0, 1, 2, 3); } /// Truncates a 256-bit integer vector into a 128-bit integer vector. /// /// \headerfile /// /// This intrinsic has no corresponding instruction. /// /// \param __a /// A 256-bit integer vector. /// \returns A 128-bit integer vector containing the lower 128 bits of the /// parameter. static __inline __m128i __DEFAULT_FN_ATTRS _mm256_castsi256_si128(__m256i __a) { return __builtin_shufflevector((__v4di)__a, (__v4di)__a, 0, 1); } /// Constructs a 256-bit floating-point vector of [4 x double] from a /// 128-bit floating-point vector of [2 x double]. /// /// The lower 128 bits contain the value of the source vector. The contents /// of the upper 128 bits are undefined. /// /// \headerfile /// /// This intrinsic has no corresponding instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. /// \returns A 256-bit floating-point vector of [4 x double]. The lower 128 bits /// contain the value of the parameter. The contents of the upper 128 bits /// are undefined. static __inline __m256d __DEFAULT_FN_ATTRS _mm256_castpd128_pd256(__m128d __a) { return __builtin_shufflevector( (__v2df)__a, (__v2df)__builtin_nondeterministic_value(__a), 0, 1, 2, 3); } /// Constructs a 256-bit floating-point vector of [8 x float] from a /// 128-bit floating-point vector of [4 x float]. /// /// The lower 128 bits contain the value of the source vector. The contents /// of the upper 128 bits are undefined. /// /// \headerfile /// /// This intrinsic has no corresponding instruction. /// /// \param __a /// A 128-bit vector of [4 x float]. /// \returns A 256-bit floating-point vector of [8 x float]. The lower 128 bits /// contain the value of the parameter. The contents of the upper 128 bits /// are undefined. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_castps128_ps256(__m128 __a) { return __builtin_shufflevector((__v4sf)__a, (__v4sf)__builtin_nondeterministic_value(__a), 0, 1, 2, 3, 4, 5, 6, 7); } /// Constructs a 256-bit integer vector from a 128-bit integer vector. /// /// The lower 128 bits contain the value of the source vector. The contents /// of the upper 128 bits are undefined. /// /// \headerfile /// /// This intrinsic has no corresponding instruction. /// /// \param __a /// A 128-bit integer vector. /// \returns A 256-bit integer vector. The lower 128 bits contain the value of /// the parameter. The contents of the upper 128 bits are undefined. static __inline __m256i __DEFAULT_FN_ATTRS _mm256_castsi128_si256(__m128i __a) { return __builtin_shufflevector( (__v2di)__a, (__v2di)__builtin_nondeterministic_value(__a), 0, 1, 2, 3); } /// Constructs a 256-bit floating-point vector of [4 x double] from a /// 128-bit floating-point vector of [2 x double]. The lower 128 bits /// contain the value of the source vector. The upper 128 bits are set /// to zero. /// /// \headerfile /// /// This intrinsic has no corresponding instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. /// \returns A 256-bit floating-point vector of [4 x double]. The lower 128 bits /// contain the value of the parameter. The upper 128 bits are set to zero. static __inline __m256d __DEFAULT_FN_ATTRS _mm256_zextpd128_pd256(__m128d __a) { return __builtin_shufflevector((__v2df)__a, (__v2df)_mm_setzero_pd(), 0, 1, 2, 3); } /// Constructs a 256-bit floating-point vector of [8 x float] from a /// 128-bit floating-point vector of [4 x float]. The lower 128 bits contain /// the value of the source vector. The upper 128 bits are set to zero. /// /// \headerfile /// /// This intrinsic has no corresponding instruction. /// /// \param __a /// A 128-bit vector of [4 x float]. /// \returns A 256-bit floating-point vector of [8 x float]. The lower 128 bits /// contain the value of the parameter. The upper 128 bits are set to zero. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_zextps128_ps256(__m128 __a) { return __builtin_shufflevector((__v4sf)__a, (__v4sf)_mm_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7); } /// Constructs a 256-bit integer vector from a 128-bit integer vector. /// The lower 128 bits contain the value of the source vector. The upper /// 128 bits are set to zero. /// /// \headerfile /// /// This intrinsic has no corresponding instruction. /// /// \param __a /// A 128-bit integer vector. /// \returns A 256-bit integer vector. The lower 128 bits contain the value of /// the parameter. The upper 128 bits are set to zero. static __inline __m256i __DEFAULT_FN_ATTRS _mm256_zextsi128_si256(__m128i __a) { return __builtin_shufflevector((__v2di)__a, (__v2di)_mm_setzero_si128(), 0, 1, 2, 3); } /* Vector insert. We use macros rather than inlines because we only want to accept invocations where the immediate M is a constant expression. */ /// Constructs a new 256-bit vector of [8 x float] by first duplicating /// a 256-bit vector of [8 x float] given in the first parameter, and then /// replacing either the upper or the lower 128 bits with the contents of a /// 128-bit vector of [4 x float] in the second parameter. /// /// The immediate integer parameter determines between the upper or the lower /// 128 bits. /// /// \headerfile /// /// \code /// __m256 _mm256_insertf128_ps(__m256 V1, __m128 V2, const int M); /// \endcode /// /// This intrinsic corresponds to the VINSERTF128 instruction. /// /// \param V1 /// A 256-bit vector of [8 x float]. This vector is copied to the result /// first, and then either the upper or the lower 128 bits of the result will /// be replaced by the contents of \a V2. /// \param V2 /// A 128-bit vector of [4 x float]. The contents of this parameter are /// written to either the upper or the lower 128 bits of the result depending /// on the value of parameter \a M. /// \param M /// An immediate integer. The least significant bit determines how the values /// from the two parameters are interleaved: \n /// If bit [0] of \a M is 0, \a V2 are copied to bits [127:0] of the result, /// and bits [255:128] of \a V1 are copied to bits [255:128] of the /// result. \n /// If bit [0] of \a M is 1, \a V2 are copied to bits [255:128] of the /// result, and bits [127:0] of \a V1 are copied to bits [127:0] of the /// result. /// \returns A 256-bit vector of [8 x float] containing the interleaved values. #define _mm256_insertf128_ps(V1, V2, M) \ ((__m256)__builtin_ia32_vinsertf128_ps256((__v8sf)(__m256)(V1), \ (__v4sf)(__m128)(V2), (int)(M))) /// Constructs a new 256-bit vector of [4 x double] by first duplicating /// a 256-bit vector of [4 x double] given in the first parameter, and then /// replacing either the upper or the lower 128 bits with the contents of a /// 128-bit vector of [2 x double] in the second parameter. /// /// The immediate integer parameter determines between the upper or the lower /// 128 bits. /// /// \headerfile /// /// \code /// __m256d _mm256_insertf128_pd(__m256d V1, __m128d V2, const int M); /// \endcode /// /// This intrinsic corresponds to the VINSERTF128 instruction. /// /// \param V1 /// A 256-bit vector of [4 x double]. This vector is copied to the result /// first, and then either the upper or the lower 128 bits of the result will /// be replaced by the contents of \a V2. /// \param V2 /// A 128-bit vector of [2 x double]. The contents of this parameter are /// written to either the upper or the lower 128 bits of the result depending /// on the value of parameter \a M. /// \param M /// An immediate integer. The least significant bit determines how the values /// from the two parameters are interleaved: \n /// If bit [0] of \a M is 0, \a V2 are copied to bits [127:0] of the result, /// and bits [255:128] of \a V1 are copied to bits [255:128] of the /// result. \n /// If bit [0] of \a M is 1, \a V2 are copied to bits [255:128] of the /// result, and bits [127:0] of \a V1 are copied to bits [127:0] of the /// result. /// \returns A 256-bit vector of [4 x double] containing the interleaved values. #define _mm256_insertf128_pd(V1, V2, M) \ ((__m256d)__builtin_ia32_vinsertf128_pd256((__v4df)(__m256d)(V1), \ (__v2df)(__m128d)(V2), (int)(M))) /// Constructs a new 256-bit integer vector by first duplicating a /// 256-bit integer vector given in the first parameter, and then replacing /// either the upper or the lower 128 bits with the contents of a 128-bit /// integer vector in the second parameter. /// /// The immediate integer parameter determines between the upper or the lower /// 128 bits. /// /// \headerfile /// /// \code /// __m256i _mm256_insertf128_si256(__m256i V1, __m128i V2, const int M); /// \endcode /// /// This intrinsic corresponds to the VINSERTF128 instruction. /// /// \param V1 /// A 256-bit integer vector. This vector is copied to the result first, and /// then either the upper or the lower 128 bits of the result will be /// replaced by the contents of \a V2. /// \param V2 /// A 128-bit integer vector. The contents of this parameter are written to /// either the upper or the lower 128 bits of the result depending on the /// value of parameter \a M. /// \param M /// An immediate integer. The least significant bit determines how the values /// from the two parameters are interleaved: \n /// If bit [0] of \a M is 0, \a V2 are copied to bits [127:0] of the result, /// and bits [255:128] of \a V1 are copied to bits [255:128] of the /// result. \n /// If bit [0] of \a M is 1, \a V2 are copied to bits [255:128] of the /// result, and bits [127:0] of \a V1 are copied to bits [127:0] of the /// result. /// \returns A 256-bit integer vector containing the interleaved values. #define _mm256_insertf128_si256(V1, V2, M) \ ((__m256i)__builtin_ia32_vinsertf128_si256((__v8si)(__m256i)(V1), \ (__v4si)(__m128i)(V2), (int)(M))) /* Vector extract. We use macros rather than inlines because we only want to accept invocations where the immediate M is a constant expression. */ /// Extracts either the upper or the lower 128 bits from a 256-bit vector /// of [8 x float], as determined by the immediate integer parameter, and /// returns the extracted bits as a 128-bit vector of [4 x float]. /// /// \headerfile /// /// \code /// __m128 _mm256_extractf128_ps(__m256 V, const int M); /// \endcode /// /// This intrinsic corresponds to the VEXTRACTF128 instruction. /// /// \param V /// A 256-bit vector of [8 x float]. /// \param M /// An immediate integer. The least significant bit determines which bits are /// extracted from the first parameter: \n /// If bit [0] of \a M is 0, bits [127:0] of \a V are copied to the /// result. \n /// If bit [0] of \a M is 1, bits [255:128] of \a V are copied to the result. /// \returns A 128-bit vector of [4 x float] containing the extracted bits. #define _mm256_extractf128_ps(V, M) \ ((__m128)__builtin_ia32_vextractf128_ps256((__v8sf)(__m256)(V), (int)(M))) /// Extracts either the upper or the lower 128 bits from a 256-bit vector /// of [4 x double], as determined by the immediate integer parameter, and /// returns the extracted bits as a 128-bit vector of [2 x double]. /// /// \headerfile /// /// \code /// __m128d _mm256_extractf128_pd(__m256d V, const int M); /// \endcode /// /// This intrinsic corresponds to the VEXTRACTF128 instruction. /// /// \param V /// A 256-bit vector of [4 x double]. /// \param M /// An immediate integer. The least significant bit determines which bits are /// extracted from the first parameter: \n /// If bit [0] of \a M is 0, bits [127:0] of \a V are copied to the /// result. \n /// If bit [0] of \a M is 1, bits [255:128] of \a V are copied to the result. /// \returns A 128-bit vector of [2 x double] containing the extracted bits. #define _mm256_extractf128_pd(V, M) \ ((__m128d)__builtin_ia32_vextractf128_pd256((__v4df)(__m256d)(V), (int)(M))) /// Extracts either the upper or the lower 128 bits from a 256-bit /// integer vector, as determined by the immediate integer parameter, and /// returns the extracted bits as a 128-bit integer vector. /// /// \headerfile /// /// \code /// __m128i _mm256_extractf128_si256(__m256i V, const int M); /// \endcode /// /// This intrinsic corresponds to the VEXTRACTF128 instruction. /// /// \param V /// A 256-bit integer vector. /// \param M /// An immediate integer. The least significant bit determines which bits are /// extracted from the first parameter: \n /// If bit [0] of \a M is 0, bits [127:0] of \a V are copied to the /// result. \n /// If bit [0] of \a M is 1, bits [255:128] of \a V are copied to the result. /// \returns A 128-bit integer vector containing the extracted bits. #define _mm256_extractf128_si256(V, M) \ ((__m128i)__builtin_ia32_vextractf128_si256((__v8si)(__m256i)(V), (int)(M))) /// Constructs a 256-bit floating-point vector of [8 x float] by /// concatenating two 128-bit floating-point vectors of [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VINSERTF128 instruction. /// /// \param __hi /// A 128-bit floating-point vector of [4 x float] to be copied to the upper /// 128 bits of the result. /// \param __lo /// A 128-bit floating-point vector of [4 x float] to be copied to the lower /// 128 bits of the result. /// \returns A 256-bit floating-point vector of [8 x float] containing the /// concatenated result. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_set_m128 (__m128 __hi, __m128 __lo) { return (__m256) __builtin_shufflevector((__v4sf)__lo, (__v4sf)__hi, 0, 1, 2, 3, 4, 5, 6, 7); } /// Constructs a 256-bit floating-point vector of [4 x double] by /// concatenating two 128-bit floating-point vectors of [2 x double]. /// /// \headerfile /// /// This intrinsic corresponds to the VINSERTF128 instruction. /// /// \param __hi /// A 128-bit floating-point vector of [2 x double] to be copied to the upper /// 128 bits of the result. /// \param __lo /// A 128-bit floating-point vector of [2 x double] to be copied to the lower /// 128 bits of the result. /// \returns A 256-bit floating-point vector of [4 x double] containing the /// concatenated result. static __inline __m256d __DEFAULT_FN_ATTRS _mm256_set_m128d (__m128d __hi, __m128d __lo) { return (__m256d) __builtin_shufflevector((__v2df)__lo, (__v2df)__hi, 0, 1, 2, 3); } /// Constructs a 256-bit integer vector by concatenating two 128-bit /// integer vectors. /// /// \headerfile /// /// This intrinsic corresponds to the VINSERTF128 instruction. /// /// \param __hi /// A 128-bit integer vector to be copied to the upper 128 bits of the /// result. /// \param __lo /// A 128-bit integer vector to be copied to the lower 128 bits of the /// result. /// \returns A 256-bit integer vector containing the concatenated result. static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set_m128i (__m128i __hi, __m128i __lo) { return (__m256i) __builtin_shufflevector((__v2di)__lo, (__v2di)__hi, 0, 1, 2, 3); } /// Constructs a 256-bit floating-point vector of [8 x float] by /// concatenating two 128-bit floating-point vectors of [4 x float]. This is /// similar to _mm256_set_m128, but the order of the input parameters is /// swapped. /// /// \headerfile /// /// This intrinsic corresponds to the VINSERTF128 instruction. /// /// \param __lo /// A 128-bit floating-point vector of [4 x float] to be copied to the lower /// 128 bits of the result. /// \param __hi /// A 128-bit floating-point vector of [4 x float] to be copied to the upper /// 128 bits of the result. /// \returns A 256-bit floating-point vector of [8 x float] containing the /// concatenated result. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_setr_m128 (__m128 __lo, __m128 __hi) { return _mm256_set_m128(__hi, __lo); } /// Constructs a 256-bit floating-point vector of [4 x double] by /// concatenating two 128-bit floating-point vectors of [2 x double]. This is /// similar to _mm256_set_m128d, but the order of the input parameters is /// swapped. /// /// \headerfile /// /// This intrinsic corresponds to the VINSERTF128 instruction. /// /// \param __lo /// A 128-bit floating-point vector of [2 x double] to be copied to the lower /// 128 bits of the result. /// \param __hi /// A 128-bit floating-point vector of [2 x double] to be copied to the upper /// 128 bits of the result. /// \returns A 256-bit floating-point vector of [4 x double] containing the /// concatenated result. static __inline __m256d __DEFAULT_FN_ATTRS _mm256_setr_m128d (__m128d __lo, __m128d __hi) { return (__m256d)_mm256_set_m128d(__hi, __lo); } /// Constructs a 256-bit integer vector by concatenating two 128-bit /// integer vectors. This is similar to _mm256_set_m128i, but the order of /// the input parameters is swapped. /// /// \headerfile /// /// This intrinsic corresponds to the VINSERTF128 instruction. /// /// \param __lo /// A 128-bit integer vector to be copied to the lower 128 bits of the /// result. /// \param __hi /// A 128-bit integer vector to be copied to the upper 128 bits of the /// result. /// \returns A 256-bit integer vector containing the concatenated result. static __inline __m256i __DEFAULT_FN_ATTRS _mm256_setr_m128i (__m128i __lo, __m128i __hi) { return (__m256i)_mm256_set_m128i(__hi, __lo); } /* SIMD load ops (unaligned) */ /// Loads two 128-bit floating-point vectors of [4 x float] from /// unaligned memory locations and constructs a 256-bit floating-point vector /// of [8 x float] by concatenating the two 128-bit vectors. /// /// \headerfile /// /// This intrinsic corresponds to load instructions followed by the /// VINSERTF128 instruction. /// /// \param __addr_hi /// A pointer to a 128-bit memory location containing 4 consecutive /// single-precision floating-point values. These values are to be copied to /// bits[255:128] of the result. The address of the memory location does not /// have to be aligned. /// \param __addr_lo /// A pointer to a 128-bit memory location containing 4 consecutive /// single-precision floating-point values. These values are to be copied to /// bits[127:0] of the result. The address of the memory location does not /// have to be aligned. /// \returns A 256-bit floating-point vector of [8 x float] containing the /// concatenated result. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_loadu2_m128(float const *__addr_hi, float const *__addr_lo) { return _mm256_set_m128(_mm_loadu_ps(__addr_hi), _mm_loadu_ps(__addr_lo)); } /// Loads two 128-bit floating-point vectors of [2 x double] from /// unaligned memory locations and constructs a 256-bit floating-point vector /// of [4 x double] by concatenating the two 128-bit vectors. /// /// \headerfile /// /// This intrinsic corresponds to load instructions followed by the /// VINSERTF128 instruction. /// /// \param __addr_hi /// A pointer to a 128-bit memory location containing two consecutive /// double-precision floating-point values. These values are to be copied to /// bits[255:128] of the result. The address of the memory location does not /// have to be aligned. /// \param __addr_lo /// A pointer to a 128-bit memory location containing two consecutive /// double-precision floating-point values. These values are to be copied to /// bits[127:0] of the result. The address of the memory location does not /// have to be aligned. /// \returns A 256-bit floating-point vector of [4 x double] containing the /// concatenated result. static __inline __m256d __DEFAULT_FN_ATTRS _mm256_loadu2_m128d(double const *__addr_hi, double const *__addr_lo) { return _mm256_set_m128d(_mm_loadu_pd(__addr_hi), _mm_loadu_pd(__addr_lo)); } /// Loads two 128-bit integer vectors from unaligned memory locations and /// constructs a 256-bit integer vector by concatenating the two 128-bit /// vectors. /// /// \headerfile /// /// This intrinsic corresponds to load instructions followed by the /// VINSERTF128 instruction. /// /// \param __addr_hi /// A pointer to a 128-bit memory location containing a 128-bit integer /// vector. This vector is to be copied to bits[255:128] of the result. The /// address of the memory location does not have to be aligned. /// \param __addr_lo /// A pointer to a 128-bit memory location containing a 128-bit integer /// vector. This vector is to be copied to bits[127:0] of the result. The /// address of the memory location does not have to be aligned. /// \returns A 256-bit integer vector containing the concatenated result. static __inline __m256i __DEFAULT_FN_ATTRS _mm256_loadu2_m128i(__m128i_u const *__addr_hi, __m128i_u const *__addr_lo) { return _mm256_set_m128i(_mm_loadu_si128(__addr_hi), _mm_loadu_si128(__addr_lo)); } /* SIMD store ops (unaligned) */ /// Stores the upper and lower 128 bits of a 256-bit floating-point /// vector of [8 x float] into two different unaligned memory locations. /// /// \headerfile /// /// This intrinsic corresponds to the VEXTRACTF128 instruction and the /// store instructions. /// /// \param __addr_hi /// A pointer to a 128-bit memory location. Bits[255:128] of \a __a are to be /// copied to this memory location. The address of this memory location does /// not have to be aligned. /// \param __addr_lo /// A pointer to a 128-bit memory location. Bits[127:0] of \a __a are to be /// copied to this memory location. The address of this memory location does /// not have to be aligned. /// \param __a /// A 256-bit floating-point vector of [8 x float]. static __inline void __DEFAULT_FN_ATTRS _mm256_storeu2_m128(float *__addr_hi, float *__addr_lo, __m256 __a) { __m128 __v128; __v128 = _mm256_castps256_ps128(__a); _mm_storeu_ps(__addr_lo, __v128); __v128 = _mm256_extractf128_ps(__a, 1); _mm_storeu_ps(__addr_hi, __v128); } /// Stores the upper and lower 128 bits of a 256-bit floating-point /// vector of [4 x double] into two different unaligned memory locations. /// /// \headerfile /// /// This intrinsic corresponds to the VEXTRACTF128 instruction and the /// store instructions. /// /// \param __addr_hi /// A pointer to a 128-bit memory location. Bits[255:128] of \a __a are to be /// copied to this memory location. The address of this memory location does /// not have to be aligned. /// \param __addr_lo /// A pointer to a 128-bit memory location. Bits[127:0] of \a __a are to be /// copied to this memory location. The address of this memory location does /// not have to be aligned. /// \param __a /// A 256-bit floating-point vector of [4 x double]. static __inline void __DEFAULT_FN_ATTRS _mm256_storeu2_m128d(double *__addr_hi, double *__addr_lo, __m256d __a) { __m128d __v128; __v128 = _mm256_castpd256_pd128(__a); _mm_storeu_pd(__addr_lo, __v128); __v128 = _mm256_extractf128_pd(__a, 1); _mm_storeu_pd(__addr_hi, __v128); } /// Stores the upper and lower 128 bits of a 256-bit integer vector into /// two different unaligned memory locations. /// /// \headerfile /// /// This intrinsic corresponds to the VEXTRACTF128 instruction and the /// store instructions. /// /// \param __addr_hi /// A pointer to a 128-bit memory location. Bits[255:128] of \a __a are to be /// copied to this memory location. The address of this memory location does /// not have to be aligned. /// \param __addr_lo /// A pointer to a 128-bit memory location. Bits[127:0] of \a __a are to be /// copied to this memory location. The address of this memory location does /// not have to be aligned. /// \param __a /// A 256-bit integer vector. static __inline void __DEFAULT_FN_ATTRS _mm256_storeu2_m128i(__m128i_u *__addr_hi, __m128i_u *__addr_lo, __m256i __a) { __m128i __v128; __v128 = _mm256_castsi256_si128(__a); _mm_storeu_si128(__addr_lo, __v128); __v128 = _mm256_extractf128_si256(__a, 1); _mm_storeu_si128(__addr_hi, __v128); } #undef __DEFAULT_FN_ATTRS #undef __DEFAULT_FN_ATTRS128 #endif /* __AVXINTRIN_H */ lwpintrin.hrdseedintrin.h/*===---- stdbool.h - Standard header for booleans -------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __STDBOOL_H #define __STDBOOL_H #define __bool_true_false_are_defined 1 #if defined(__STDC_VERSION__) && __STDC_VERSION__ > 201710L /* FIXME: We should be issuing a deprecation warning here, but cannot yet due * to system headers which include this header file unconditionally. */ #elif !defined(__cplusplus) #define bool _Bool #define true 1 #define false 0 #elif defined(__GNUC__) && !defined(__STRICT_ANSI__) /* Define _Bool as a GNU extension. */ #define _Bool bool #if defined(__cplusplus) && __cplusplus < 201103L /* For C++98, define bool, false, true as a GNU extension. */ #define bool bool #define false false #define true true #endif #endif #endif /* __STDBOOL_H */ x86gprintrin.h//===- FuzzedDataProvider.h - Utility header for fuzz targets ---*- C++ -* ===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // A single header library providing an utility class to break up an array of // bytes. Whenever run on the same input, provides the same output, as long as // its methods are called in the same order, with the same arguments. //===----------------------------------------------------------------------===// #ifndef LLVM_FUZZER_FUZZED_DATA_PROVIDER_H_ #define LLVM_FUZZER_FUZZED_DATA_PROVIDER_H_ #include #include #include #include #include #include #include #include #include #include #include #include // In addition to the comments below, the API is also briefly documented at // https://github.com/google/fuzzing/blob/master/docs/split-inputs.md#fuzzed-data-provider class FuzzedDataProvider { public: // |data| is an array of length |size| that the FuzzedDataProvider wraps to // provide more granular access. |data| must outlive the FuzzedDataProvider. FuzzedDataProvider(const uint8_t *data, size_t size) : data_ptr_(data), remaining_bytes_(size) {} ~FuzzedDataProvider() = default; // See the implementation below (after the class definition) for more verbose // comments for each of the methods. // Methods returning std::vector of bytes. These are the most popular choice // when splitting fuzzing input into pieces, as every piece is put into a // separate buffer (i.e. ASan would catch any under-/overflow) and the memory // will be released automatically. template std::vector ConsumeBytes(size_t num_bytes); template std::vector ConsumeBytesWithTerminator(size_t num_bytes, T terminator = 0); template std::vector ConsumeRemainingBytes(); // Methods returning strings. Use only when you need a std::string or a null // terminated C-string. Otherwise, prefer the methods returning std::vector. std::string ConsumeBytesAsString(size_t num_bytes); std::string ConsumeRandomLengthString(size_t max_length); std::string ConsumeRandomLengthString(); std::string ConsumeRemainingBytesAsString(); // Methods returning integer values. template T ConsumeIntegral(); template T ConsumeIntegralInRange(T min, T max); // Methods returning floating point values. template T ConsumeFloatingPoint(); template T ConsumeFloatingPointInRange(T min, T max); // 0 <= return value <= 1. template T ConsumeProbability(); bool ConsumeBool(); // Returns a value chosen from the given enum. template T ConsumeEnum(); // Returns a value from the given array. template T PickValueInArray(const T (&array)[size]); template T PickValueInArray(const std::array &array); template T PickValueInArray(std::initializer_list list); // Writes data to the given destination and returns number of bytes written. size_t ConsumeData(void *destination, size_t num_bytes); // Reports the remaining bytes available for fuzzed input. size_t remaining_bytes() { return remaining_bytes_; } private: FuzzedDataProvider(const FuzzedDataProvider &) = delete; FuzzedDataProvider &operator=(const FuzzedDataProvider &) = delete; void CopyAndAdvance(void *destination, size_t num_bytes); void Advance(size_t num_bytes); template std::vector ConsumeBytes(size_t size, size_t num_bytes); template TS ConvertUnsignedToSigned(TU value); const uint8_t *data_ptr_; size_t remaining_bytes_; }; // Returns a std::vector containing |num_bytes| of input data. If fewer than // |num_bytes| of data remain, returns a shorter std::vector containing all // of the data that's left. Can be used with any byte sized type, such as // char, unsigned char, uint8_t, etc. template std::vector FuzzedDataProvider::ConsumeBytes(size_t num_bytes) { num_bytes = std::min(num_bytes, remaining_bytes_); return ConsumeBytes(num_bytes, num_bytes); } // Similar to |ConsumeBytes|, but also appends the terminator value at the end // of the resulting vector. Useful, when a mutable null-terminated C-string is // needed, for example. But that is a rare case. Better avoid it, if possible, // and prefer using |ConsumeBytes| or |ConsumeBytesAsString| methods. template std::vector FuzzedDataProvider::ConsumeBytesWithTerminator(size_t num_bytes, T terminator) { num_bytes = std::min(num_bytes, remaining_bytes_); std::vector result = ConsumeBytes(num_bytes + 1, num_bytes); result.back() = terminator; return result; } // Returns a std::vector containing all remaining bytes of the input data. template std::vector FuzzedDataProvider::ConsumeRemainingBytes() { return ConsumeBytes(remaining_bytes_); } // Returns a std::string containing |num_bytes| of input data. Using this and // |.c_str()| on the resulting string is the best way to get an immutable // null-terminated C string. If fewer than |num_bytes| of data remain, returns // a shorter std::string containing all of the data that's left. inline std::string FuzzedDataProvider::ConsumeBytesAsString(size_t num_bytes) { static_assert(sizeof(std::string::value_type) == sizeof(uint8_t), "ConsumeBytesAsString cannot convert the data to a string."); num_bytes = std::min(num_bytes, remaining_bytes_); std::string result( reinterpret_cast(data_ptr_), num_bytes); Advance(num_bytes); return result; } // Returns a std::string of length from 0 to |max_length|. When it runs out of // input data, returns what remains of the input. Designed to be more stable // with respect to a fuzzer inserting characters than just picking a random // length and then consuming that many bytes with |ConsumeBytes|. inline std::string FuzzedDataProvider::ConsumeRandomLengthString(size_t max_length) { // Reads bytes from the start of |data_ptr_|. Maps "\\" to "\", and maps "\" // followed by anything else to the end of the string. As a result of this // logic, a fuzzer can insert characters into the string, and the string // will be lengthened to include those new characters, resulting in a more // stable fuzzer than picking the length of a string independently from // picking its contents. std::string result; // Reserve the anticipated capacity to prevent several reallocations. result.reserve(std::min(max_length, remaining_bytes_)); for (size_t i = 0; i < max_length && remaining_bytes_ != 0; ++i) { char next = ConvertUnsignedToSigned(data_ptr_[0]); Advance(1); if (next == '\\' && remaining_bytes_ != 0) { next = ConvertUnsignedToSigned(data_ptr_[0]); Advance(1); if (next != '\\') break; } result += next; } result.shrink_to_fit(); return result; } // Returns a std::string of length from 0 to |remaining_bytes_|. inline std::string FuzzedDataProvider::ConsumeRandomLengthString() { return ConsumeRandomLengthString(remaining_bytes_); } // Returns a std::string containing all remaining bytes of the input data. // Prefer using |ConsumeRemainingBytes| unless you actually need a std::string // object. inline std::string FuzzedDataProvider::ConsumeRemainingBytesAsString() { return ConsumeBytesAsString(remaining_bytes_); } // Returns a number in the range [Type's min, Type's max]. The value might // not be uniformly distributed in the given range. If there's no input data // left, always returns |min|. template T FuzzedDataProvider::ConsumeIntegral() { return ConsumeIntegralInRange(std::numeric_limits::min(), std::numeric_limits::max()); } // Returns a number in the range [min, max] by consuming bytes from the // input data. The value might not be uniformly distributed in the given // range. If there's no input data left, always returns |min|. |min| must // be less than or equal to |max|. template T FuzzedDataProvider::ConsumeIntegralInRange(T min, T max) { static_assert(std::is_integral::value, "An integral type is required."); static_assert(sizeof(T) <= sizeof(uint64_t), "Unsupported integral type."); if (min > max) abort(); // Use the biggest type possible to hold the range and the result. uint64_t range = static_cast(max) - static_cast(min); uint64_t result = 0; size_t offset = 0; while (offset < sizeof(T) * CHAR_BIT && (range >> offset) > 0 && remaining_bytes_ != 0) { // Pull bytes off the end of the seed data. Experimentally, this seems to // allow the fuzzer to more easily explore the input space. This makes // sense, since it works by modifying inputs that caused new code to run, // and this data is often used to encode length of data read by // |ConsumeBytes|. Separating out read lengths makes it easier modify the // contents of the data that is actually read. --remaining_bytes_; result = (result << CHAR_BIT) | data_ptr_[remaining_bytes_]; offset += CHAR_BIT; } // Avoid division by 0, in case |range + 1| results in overflow. if (range != std::numeric_limits::max()) result = result % (range + 1); return static_cast(static_cast(min) + result); } // Returns a floating point value in the range [Type's lowest, Type's max] by // consuming bytes from the input data. If there's no input data left, always // returns approximately 0. template T FuzzedDataProvider::ConsumeFloatingPoint() { return ConsumeFloatingPointInRange(std::numeric_limits::lowest(), std::numeric_limits::max()); } // Returns a floating point value in the given range by consuming bytes from // the input data. If there's no input data left, returns |min|. Note that // |min| must be less than or equal to |max|. template T FuzzedDataProvider::ConsumeFloatingPointInRange(T min, T max) { if (min > max) abort(); T range = .0; T result = min; constexpr T zero(.0); if (max > zero && min < zero && max > min + std::numeric_limits::max()) { // The diff |max - min| would overflow the given floating point type. Use // the half of the diff as the range and consume a bool to decide whether // the result is in the first of the second part of the diff. range = (max / 2.0) - (min / 2.0); if (ConsumeBool()) { result += range; } } else { range = max - min; } return result + range * ConsumeProbability(); } // Returns a floating point number in the range [0.0, 1.0]. If there's no // input data left, always returns 0. template T FuzzedDataProvider::ConsumeProbability() { static_assert(std::is_floating_point::value, "A floating point type is required."); // Use different integral types for different floating point types in order // to provide better density of the resulting values. using IntegralType = typename std::conditional<(sizeof(T) <= sizeof(uint32_t)), uint32_t, uint64_t>::type; T result = static_cast(ConsumeIntegral()); result /= static_cast(std::numeric_limits::max()); return result; } // Reads one byte and returns a bool, or false when no data remains. inline bool FuzzedDataProvider::ConsumeBool() { return 1 & ConsumeIntegral(); } // Returns an enum value. The enum must start at 0 and be contiguous. It must // also contain |kMaxValue| aliased to its largest (inclusive) value. Such as: // enum class Foo { SomeValue, OtherValue, kMaxValue = OtherValue }; template T FuzzedDataProvider::ConsumeEnum() { static_assert(std::is_enum::value, "|T| must be an enum type."); return static_cast( ConsumeIntegralInRange(0, static_cast(T::kMaxValue))); } // Returns a copy of the value selected from the given fixed-size |array|. template T FuzzedDataProvider::PickValueInArray(const T (&array)[size]) { static_assert(size > 0, "The array must be non empty."); return array[ConsumeIntegralInRange(0, size - 1)]; } template T FuzzedDataProvider::PickValueInArray(const std::array &array) { static_assert(size > 0, "The array must be non empty."); return array[ConsumeIntegralInRange(0, size - 1)]; } template T FuzzedDataProvider::PickValueInArray(std::initializer_list list) { // TODO(Dor1s): switch to static_assert once C++14 is allowed. if (!list.size()) abort(); return *(list.begin() + ConsumeIntegralInRange(0, list.size() - 1)); } // Writes |num_bytes| of input data to the given destination pointer. If there // is not enough data left, writes all remaining bytes. Return value is the // number of bytes written. // In general, it's better to avoid using this function, but it may be useful // in cases when it's necessary to fill a certain buffer or object with // fuzzing data. inline size_t FuzzedDataProvider::ConsumeData(void *destination, size_t num_bytes) { num_bytes = std::min(num_bytes, remaining_bytes_); CopyAndAdvance(destination, num_bytes); return num_bytes; } // Private methods. inline void FuzzedDataProvider::CopyAndAdvance(void *destination, size_t num_bytes) { std::memcpy(destination, data_ptr_, num_bytes); Advance(num_bytes); } inline void FuzzedDataProvider::Advance(size_t num_bytes) { if (num_bytes > remaining_bytes_) abort(); data_ptr_ += num_bytes; remaining_bytes_ -= num_bytes; } template std::vector FuzzedDataProvider::ConsumeBytes(size_t size, size_t num_bytes) { static_assert(sizeof(T) == sizeof(uint8_t), "Incompatible data type."); // The point of using the size-based constructor below is to increase the // odds of having a vector object with capacity being equal to the length. // That part is always implementation specific, but at least both libc++ and // libstdc++ allocate the requested number of bytes in that constructor, // which seems to be a natural choice for other implementations as well. // To increase the odds even more, we also call |shrink_to_fit| below. std::vector result(size); if (size == 0) { if (num_bytes != 0) abort(); return result; } CopyAndAdvance(result.data(), num_bytes); // Even though |shrink_to_fit| is also implementation specific, we expect it // to provide an additional assurance in case vector's constructor allocated // a buffer which is larger than the actual amount of data we put inside it. result.shrink_to_fit(); return result; } template TS FuzzedDataProvider::ConvertUnsignedToSigned(TU value) { static_assert(sizeof(TS) == sizeof(TU), "Incompatible data types."); static_assert(!std::numeric_limits::is_signed, "Source type must be unsigned."); // TODO(Dor1s): change to `if constexpr` once C++17 becomes mainstream. if (std::numeric_limits::is_modulo) return static_cast(value); // Avoid using implementation-defined unsigned to signed conversions. // To learn more, see https://stackoverflow.com/questions/13150449. if (value <= std::numeric_limits::max()) { return static_cast(value); } else { constexpr auto TS_min = std::numeric_limits::min(); return TS_min + static_cast(value - TS_min); } } #endif // LLVM_FUZZER_FUZZED_DATA_PROVIDER_H_ //===------------------------- __complex_cmath.h --------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // std::complex header copied from the libcxx source and simplified for use in // OpenMP target offload regions. // //===----------------------------------------------------------------------===// #ifndef _OPENMP #error "This file is for OpenMP compilation only." #endif #ifndef __cplusplus #error "This file is for C++ compilation only." #endif #ifndef _LIBCPP_COMPLEX #define _LIBCPP_COMPLEX #include #include #define __DEVICE__ static constexpr __attribute__((nothrow)) namespace std { // abs template __DEVICE__ _Tp abs(const std::complex<_Tp> &__c) { return hypot(__c.real(), __c.imag()); } // arg template __DEVICE__ _Tp arg(const std::complex<_Tp> &__c) { return atan2(__c.imag(), __c.real()); } template typename enable_if::value || is_same<_Tp, double>::value, double>::type arg(_Tp __re) { return atan2(0., __re); } template typename enable_if::value, float>::type arg(_Tp __re) { return atan2f(0.F, __re); } // norm template __DEVICE__ _Tp norm(const std::complex<_Tp> &__c) { if (std::isinf(__c.real())) return abs(__c.real()); if (std::isinf(__c.imag())) return abs(__c.imag()); return __c.real() * __c.real() + __c.imag() * __c.imag(); } // conj template std::complex<_Tp> conj(const std::complex<_Tp> &__c) { return std::complex<_Tp>(__c.real(), -__c.imag()); } // proj template std::complex<_Tp> proj(const std::complex<_Tp> &__c) { std::complex<_Tp> __r = __c; if (std::isinf(__c.real()) || std::isinf(__c.imag())) __r = std::complex<_Tp>(INFINITY, copysign(_Tp(0), __c.imag())); return __r; } // polar template complex<_Tp> polar(const _Tp &__rho, const _Tp &__theta = _Tp()) { if (std::isnan(__rho) || signbit(__rho)) return std::complex<_Tp>(_Tp(NAN), _Tp(NAN)); if (std::isnan(__theta)) { if (std::isinf(__rho)) return std::complex<_Tp>(__rho, __theta); return std::complex<_Tp>(__theta, __theta); } if (std::isinf(__theta)) { if (std::isinf(__rho)) return std::complex<_Tp>(__rho, _Tp(NAN)); return std::complex<_Tp>(_Tp(NAN), _Tp(NAN)); } _Tp __x = __rho * cos(__theta); if (std::isnan(__x)) __x = 0; _Tp __y = __rho * sin(__theta); if (std::isnan(__y)) __y = 0; return std::complex<_Tp>(__x, __y); } // log template std::complex<_Tp> log(const std::complex<_Tp> &__x) { return std::complex<_Tp>(log(abs(__x)), arg(__x)); } // log10 template std::complex<_Tp> log10(const std::complex<_Tp> &__x) { return log(__x) / log(_Tp(10)); } // sqrt template __DEVICE__ std::complex<_Tp> sqrt(const std::complex<_Tp> &__x) { if (std::isinf(__x.imag())) return std::complex<_Tp>(_Tp(INFINITY), __x.imag()); if (std::isinf(__x.real())) { if (__x.real() > _Tp(0)) return std::complex<_Tp>(__x.real(), std::isnan(__x.imag()) ? __x.imag() : copysign(_Tp(0), __x.imag())); return std::complex<_Tp>(std::isnan(__x.imag()) ? __x.imag() : _Tp(0), copysign(__x.real(), __x.imag())); } return polar(sqrt(abs(__x)), arg(__x) / _Tp(2)); } // exp template __DEVICE__ std::complex<_Tp> exp(const std::complex<_Tp> &__x) { _Tp __i = __x.imag(); if (std::isinf(__x.real())) { if (__x.real() < _Tp(0)) { if (!std::isfinite(__i)) __i = _Tp(1); } else if (__i == 0 || !std::isfinite(__i)) { if (std::isinf(__i)) __i = _Tp(NAN); return std::complex<_Tp>(__x.real(), __i); } } else if (std::isnan(__x.real()) && __x.imag() == 0) return __x; _Tp __e = exp(__x.real()); return std::complex<_Tp>(__e * cos(__i), __e * sin(__i)); } // pow template std::complex<_Tp> pow(const std::complex<_Tp> &__x, const std::complex<_Tp> &__y) { return exp(__y * log(__x)); } // __sqr, computes pow(x, 2) template std::complex<_Tp> __sqr(const std::complex<_Tp> &__x) { return std::complex<_Tp>((__x.real() - __x.imag()) * (__x.real() + __x.imag()), _Tp(2) * __x.real() * __x.imag()); } // asinh template __DEVICE__ std::complex<_Tp> asinh(const std::complex<_Tp> &__x) { const _Tp __pi(atan2(+0., -0.)); if (std::isinf(__x.real())) { if (std::isnan(__x.imag())) return __x; if (std::isinf(__x.imag())) return std::complex<_Tp>(__x.real(), copysign(__pi * _Tp(0.25), __x.imag())); return std::complex<_Tp>(__x.real(), copysign(_Tp(0), __x.imag())); } if (std::isnan(__x.real())) { if (std::isinf(__x.imag())) return std::complex<_Tp>(__x.imag(), __x.real()); if (__x.imag() == 0) return __x; return std::complex<_Tp>(__x.real(), __x.real()); } if (std::isinf(__x.imag())) return std::complex<_Tp>(copysign(__x.imag(), __x.real()), copysign(__pi / _Tp(2), __x.imag())); std::complex<_Tp> __z = log(__x + sqrt(__sqr(__x) + _Tp(1))); return std::complex<_Tp>(copysign(__z.real(), __x.real()), copysign(__z.imag(), __x.imag())); } // acosh template __DEVICE__ std::complex<_Tp> acosh(const std::complex<_Tp> &__x) { const _Tp __pi(atan2(+0., -0.)); if (std::isinf(__x.real())) { if (std::isnan(__x.imag())) return std::complex<_Tp>(abs(__x.real()), __x.imag()); if (std::isinf(__x.imag())) { if (__x.real() > 0) return std::complex<_Tp>(__x.real(), copysign(__pi * _Tp(0.25), __x.imag())); else return std::complex<_Tp>(-__x.real(), copysign(__pi * _Tp(0.75), __x.imag())); } if (__x.real() < 0) return std::complex<_Tp>(-__x.real(), copysign(__pi, __x.imag())); return std::complex<_Tp>(__x.real(), copysign(_Tp(0), __x.imag())); } if (std::isnan(__x.real())) { if (std::isinf(__x.imag())) return std::complex<_Tp>(abs(__x.imag()), __x.real()); return std::complex<_Tp>(__x.real(), __x.real()); } if (std::isinf(__x.imag())) return std::complex<_Tp>(abs(__x.imag()), copysign(__pi / _Tp(2), __x.imag())); std::complex<_Tp> __z = log(__x + sqrt(__sqr(__x) - _Tp(1))); return std::complex<_Tp>(copysign(__z.real(), _Tp(0)), copysign(__z.imag(), __x.imag())); } // atanh template __DEVICE__ std::complex<_Tp> atanh(const std::complex<_Tp> &__x) { const _Tp __pi(atan2(+0., -0.)); if (std::isinf(__x.imag())) { return std::complex<_Tp>(copysign(_Tp(0), __x.real()), copysign(__pi / _Tp(2), __x.imag())); } if (std::isnan(__x.imag())) { if (std::isinf(__x.real()) || __x.real() == 0) return std::complex<_Tp>(copysign(_Tp(0), __x.real()), __x.imag()); return std::complex<_Tp>(__x.imag(), __x.imag()); } if (std::isnan(__x.real())) { return std::complex<_Tp>(__x.real(), __x.real()); } if (std::isinf(__x.real())) { return std::complex<_Tp>(copysign(_Tp(0), __x.real()), copysign(__pi / _Tp(2), __x.imag())); } if (abs(__x.real()) == _Tp(1) && __x.imag() == _Tp(0)) { return std::complex<_Tp>(copysign(_Tp(INFINITY), __x.real()), copysign(_Tp(0), __x.imag())); } std::complex<_Tp> __z = log((_Tp(1) + __x) / (_Tp(1) - __x)) / _Tp(2); return std::complex<_Tp>(copysign(__z.real(), __x.real()), copysign(__z.imag(), __x.imag())); } // sinh template __DEVICE__ std::complex<_Tp> sinh(const std::complex<_Tp> &__x) { if (std::isinf(__x.real()) && !std::isfinite(__x.imag())) return std::complex<_Tp>(__x.real(), _Tp(NAN)); if (__x.real() == 0 && !std::isfinite(__x.imag())) return std::complex<_Tp>(__x.real(), _Tp(NAN)); if (__x.imag() == 0 && !std::isfinite(__x.real())) return __x; return std::complex<_Tp>(sinh(__x.real()) * cos(__x.imag()), cosh(__x.real()) * sin(__x.imag())); } // cosh template __DEVICE__ std::complex<_Tp> cosh(const std::complex<_Tp> &__x) { if (std::isinf(__x.real()) && !std::isfinite(__x.imag())) return std::complex<_Tp>(abs(__x.real()), _Tp(NAN)); if (__x.real() == 0 && !std::isfinite(__x.imag())) return std::complex<_Tp>(_Tp(NAN), __x.real()); if (__x.real() == 0 && __x.imag() == 0) return std::complex<_Tp>(_Tp(1), __x.imag()); if (__x.imag() == 0 && !std::isfinite(__x.real())) return std::complex<_Tp>(abs(__x.real()), __x.imag()); return std::complex<_Tp>(cosh(__x.real()) * cos(__x.imag()), sinh(__x.real()) * sin(__x.imag())); } // tanh template __DEVICE__ std::complex<_Tp> tanh(const std::complex<_Tp> &__x) { if (std::isinf(__x.real())) { if (!std::isfinite(__x.imag())) return std::complex<_Tp>(_Tp(1), _Tp(0)); return std::complex<_Tp>(_Tp(1), copysign(_Tp(0), sin(_Tp(2) * __x.imag()))); } if (std::isnan(__x.real()) && __x.imag() == 0) return __x; _Tp __2r(_Tp(2) * __x.real()); _Tp __2i(_Tp(2) * __x.imag()); _Tp __d(cosh(__2r) + cos(__2i)); _Tp __2rsh(sinh(__2r)); if (std::isinf(__2rsh) && std::isinf(__d)) return std::complex<_Tp>(__2rsh > _Tp(0) ? _Tp(1) : _Tp(-1), __2i > _Tp(0) ? _Tp(0) : _Tp(-0.)); return std::complex<_Tp>(__2rsh / __d, sin(__2i) / __d); } // asin template __DEVICE__ std::complex<_Tp> asin(const std::complex<_Tp> &__x) { std::complex<_Tp> __z = asinh(complex<_Tp>(-__x.imag(), __x.real())); return std::complex<_Tp>(__z.imag(), -__z.real()); } // acos template __DEVICE__ std::complex<_Tp> acos(const std::complex<_Tp> &__x) { const _Tp __pi(atan2(+0., -0.)); if (std::isinf(__x.real())) { if (std::isnan(__x.imag())) return std::complex<_Tp>(__x.imag(), __x.real()); if (std::isinf(__x.imag())) { if (__x.real() < _Tp(0)) return std::complex<_Tp>(_Tp(0.75) * __pi, -__x.imag()); return std::complex<_Tp>(_Tp(0.25) * __pi, -__x.imag()); } if (__x.real() < _Tp(0)) return std::complex<_Tp>(__pi, signbit(__x.imag()) ? -__x.real() : __x.real()); return std::complex<_Tp>(_Tp(0), signbit(__x.imag()) ? __x.real() : -__x.real()); } if (std::isnan(__x.real())) { if (std::isinf(__x.imag())) return std::complex<_Tp>(__x.real(), -__x.imag()); return std::complex<_Tp>(__x.real(), __x.real()); } if (std::isinf(__x.imag())) return std::complex<_Tp>(__pi / _Tp(2), -__x.imag()); if (__x.real() == 0 && (__x.imag() == 0 || isnan(__x.imag()))) return std::complex<_Tp>(__pi / _Tp(2), -__x.imag()); std::complex<_Tp> __z = log(__x + sqrt(__sqr(__x) - _Tp(1))); if (signbit(__x.imag())) return std::complex<_Tp>(abs(__z.imag()), abs(__z.real())); return std::complex<_Tp>(abs(__z.imag()), -abs(__z.real())); } // atan template __DEVICE__ std::complex<_Tp> atan(const std::complex<_Tp> &__x) { std::complex<_Tp> __z = atanh(complex<_Tp>(-__x.imag(), __x.real())); return std::complex<_Tp>(__z.imag(), -__z.real()); } // sin template __DEVICE__ std::complex<_Tp> sin(const std::complex<_Tp> &__x) { std::complex<_Tp> __z = sinh(complex<_Tp>(-__x.imag(), __x.real())); return std::complex<_Tp>(__z.imag(), -__z.real()); } // cos template std::complex<_Tp> cos(const std::complex<_Tp> &__x) { return cosh(complex<_Tp>(-__x.imag(), __x.real())); } // tan template __DEVICE__ std::complex<_Tp> tan(const std::complex<_Tp> &__x) { std::complex<_Tp> __z = tanh(complex<_Tp>(-__x.imag(), __x.real())); return std::complex<_Tp>(__z.imag(), -__z.real()); } } // namespace std #endif /*===---- bmiintrin.h - Implementation of BMI intrinsics on PowerPC --------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #if !defined X86GPRINTRIN_H_ #error "Never use directly; include instead." #endif #ifndef BMIINTRIN_H_ #define BMIINTRIN_H_ extern __inline unsigned short __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __tzcnt_u16(unsigned short __X) { return __builtin_ctz(__X); } extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __andn_u32(unsigned int __X, unsigned int __Y) { return (~__X & __Y); } extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _bextr_u32(unsigned int __X, unsigned int __P, unsigned int __L) { return ((__X << (32 - (__L + __P))) >> (32 - __L)); } extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __bextr_u32(unsigned int __X, unsigned int __Y) { unsigned int __P, __L; __P = __Y & 0xFF; __L = (__Y >> 8) & 0xFF; return (_bextr_u32(__X, __P, __L)); } extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __blsi_u32(unsigned int __X) { return (__X & -__X); } extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _blsi_u32(unsigned int __X) { return __blsi_u32(__X); } extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __blsmsk_u32(unsigned int __X) { return (__X ^ (__X - 1)); } extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _blsmsk_u32(unsigned int __X) { return __blsmsk_u32(__X); } extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __blsr_u32(unsigned int __X) { return (__X & (__X - 1)); } extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _blsr_u32(unsigned int __X) { return __blsr_u32(__X); } extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __tzcnt_u32(unsigned int __X) { return __builtin_ctz(__X); } extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _tzcnt_u32(unsigned int __X) { return __builtin_ctz(__X); } /* use the 64-bit shift, rotate, and count leading zeros instructions for long long. */ #ifdef __PPC64__ extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __andn_u64(unsigned long long __X, unsigned long long __Y) { return (~__X & __Y); } extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _bextr_u64(unsigned long long __X, unsigned int __P, unsigned int __L) { return ((__X << (64 - (__L + __P))) >> (64 - __L)); } extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __bextr_u64(unsigned long long __X, unsigned long long __Y) { unsigned int __P, __L; __P = __Y & 0xFF; __L = (__Y & 0xFF00) >> 8; return (_bextr_u64(__X, __P, __L)); } extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __blsi_u64(unsigned long long __X) { return __X & -__X; } extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _blsi_u64(unsigned long long __X) { return __blsi_u64(__X); } extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __blsmsk_u64(unsigned long long __X) { return (__X ^ (__X - 1)); } extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _blsmsk_u64(unsigned long long __X) { return __blsmsk_u64(__X); } extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __blsr_u64(unsigned long long __X) { return (__X & (__X - 1)); } extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _blsr_u64(unsigned long long __X) { return __blsr_u64(__X); } extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __tzcnt_u64(unsigned long long __X) { return __builtin_ctzll(__X); } extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _tzcnt_u64(unsigned long long __X) { return __builtin_ctzll(__X); } #endif /* __PPC64__ */ #endif /* BMIINTRIN_H_ */ rootstartpos: ]SearchBitState inconsistencyRepetitionWalker::ShortVisit called[:^blank:][:^cntrl:]RunStateOnByteUnlocked failed after ResetCacheUnhandled CarianEgyptian_HieroglyphsGothicInscriptional_PahlaviKannadaMiaoSharadasigaction(data->signo, &act, &data->previous_action) == 0fdelete[]PC: Wait on Check (v & (kMuWait | kMuWrWait)) != kMuWrWait failed: %s: Mutex corrupt: waiting writer with no waiters: %p-2562047788015215h30m8s:localtimeUTCDRBG Generate KATTLS12-KDF KATEVPCRYPTO_LIBSSL_LIBUICOMP_LIBHMACCipher functionspassed a null parameter',' [^-].*\.go-Xclang-only=--analyze|-CC?|-E|-L.*|-MM?D|-M[MGP]?|-S|-W[al],.*|-c|-f(no-)?data-sections|-f(no-)?function-sections|-f(no-)?omit-frame-pointer|-f(no-)?profile-arcs|-f(no-)?stack-protector(-all)?|-f(no-)?strict-aliasing|-f(no-)?test-coverage|-f(no-)?unroll-loops|-fsyntax-only|-g.+|-nostartfiles|-s|-sharednot_GNUMode/*===---- __clang_hip_math.h - Device-side HIP math support ----------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __CLANG_HIP_MATH_H__ #define __CLANG_HIP_MATH_H__ #if !defined(__HIP__) && !defined(__OPENMP_AMDGCN__) #error "This file is for HIP and OpenMP AMDGCN device compilation only." #endif #if !defined(__HIPCC_RTC__) #include #include #ifdef __OPENMP_AMDGCN__ #include #endif #endif // !defined(__HIPCC_RTC__) #pragma push_macro("__DEVICE__") #ifdef __OPENMP_AMDGCN__ #define __DEVICE__ static inline __attribute__((always_inline, nothrow)) #else #define __DEVICE__ static __device__ inline __attribute__((always_inline)) #endif // Device library provides fast low precision and slow full-recision // implementations for some functions. Which one gets selected depends on // __CLANG_GPU_APPROX_TRANSCENDENTALS__ which gets defined by clang if // -ffast-math or -fgpu-approx-transcendentals are in effect. #pragma push_macro("__FAST_OR_SLOW") #if defined(__CLANG_GPU_APPROX_TRANSCENDENTALS__) #define __FAST_OR_SLOW(fast, slow) fast #else #define __FAST_OR_SLOW(fast, slow) slow #endif // A few functions return bool type starting only in C++11. #pragma push_macro("__RETURN_TYPE") #ifdef __OPENMP_AMDGCN__ #define __RETURN_TYPE int #else #if defined(__cplusplus) #define __RETURN_TYPE bool #else #define __RETURN_TYPE int #endif #endif // __OPENMP_AMDGCN__ #if defined (__cplusplus) && __cplusplus < 201103L // emulate static_assert on type sizes template struct __compare_result{}; template<> struct __compare_result { static const __device__ bool valid; }; __DEVICE__ void __suppress_unused_warning(bool b){}; template __DEVICE__ void __static_assert_equal_size() { __suppress_unused_warning(__compare_result::valid); } #define __static_assert_type_size_equal(A, B) \ __static_assert_equal_size() #else #define __static_assert_type_size_equal(A,B) \ static_assert((A) == (B), "") #endif __DEVICE__ uint64_t __make_mantissa_base8(const char *__tagp __attribute__((nonnull))) { uint64_t __r = 0; while (*__tagp != '\0') { char __tmp = *__tagp; if (__tmp >= '0' && __tmp <= '7') __r = (__r * 8u) + __tmp - '0'; else return 0; ++__tagp; } return __r; } __DEVICE__ uint64_t __make_mantissa_base10(const char *__tagp __attribute__((nonnull))) { uint64_t __r = 0; while (*__tagp != '\0') { char __tmp = *__tagp; if (__tmp >= '0' && __tmp <= '9') __r = (__r * 10u) + __tmp - '0'; else return 0; ++__tagp; } return __r; } __DEVICE__ uint64_t __make_mantissa_base16(const char *__tagp __attribute__((nonnull))) { uint64_t __r = 0; while (*__tagp != '\0') { char __tmp = *__tagp; if (__tmp >= '0' && __tmp <= '9') __r = (__r * 16u) + __tmp - '0'; else if (__tmp >= 'a' && __tmp <= 'f') __r = (__r * 16u) + __tmp - 'a' + 10; else if (__tmp >= 'A' && __tmp <= 'F') __r = (__r * 16u) + __tmp - 'A' + 10; else return 0; ++__tagp; } return __r; } __DEVICE__ uint64_t __make_mantissa(const char *__tagp __attribute__((nonnull))) { if (*__tagp == '0') { ++__tagp; if (*__tagp == 'x' || *__tagp == 'X') return __make_mantissa_base16(__tagp); else return __make_mantissa_base8(__tagp); } return __make_mantissa_base10(__tagp); } // BEGIN FLOAT // BEGIN INTRINSICS __DEVICE__ float __cosf(float __x) { return __ocml_native_cos_f32(__x); } __DEVICE__ float __exp10f(float __x) { const float __log2_10 = 0x1.a934f0p+1f; return __builtin_amdgcn_exp2f(__log2_10 * __x); } __DEVICE__ float __expf(float __x) { const float __log2_e = 0x1.715476p+0; return __builtin_amdgcn_exp2f(__log2_e * __x); } #if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ float __fadd_rd(float __x, float __y) { return __ocml_add_rtn_f32(__x, __y); } __DEVICE__ float __fadd_rn(float __x, float __y) { return __ocml_add_rte_f32(__x, __y); } __DEVICE__ float __fadd_ru(float __x, float __y) { return __ocml_add_rtp_f32(__x, __y); } __DEVICE__ float __fadd_rz(float __x, float __y) { return __ocml_add_rtz_f32(__x, __y); } #else __DEVICE__ float __fadd_rn(float __x, float __y) { return __x + __y; } #endif #if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ float __fdiv_rd(float __x, float __y) { return __ocml_div_rtn_f32(__x, __y); } __DEVICE__ float __fdiv_rn(float __x, float __y) { return __ocml_div_rte_f32(__x, __y); } __DEVICE__ float __fdiv_ru(float __x, float __y) { return __ocml_div_rtp_f32(__x, __y); } __DEVICE__ float __fdiv_rz(float __x, float __y) { return __ocml_div_rtz_f32(__x, __y); } #else __DEVICE__ float __fdiv_rn(float __x, float __y) { return __x / __y; } #endif __DEVICE__ float __fdividef(float __x, float __y) { return __x / __y; } #if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ float __fmaf_rd(float __x, float __y, float __z) { return __ocml_fma_rtn_f32(__x, __y, __z); } __DEVICE__ float __fmaf_rn(float __x, float __y, float __z) { return __ocml_fma_rte_f32(__x, __y, __z); } __DEVICE__ float __fmaf_ru(float __x, float __y, float __z) { return __ocml_fma_rtp_f32(__x, __y, __z); } __DEVICE__ float __fmaf_rz(float __x, float __y, float __z) { return __ocml_fma_rtz_f32(__x, __y, __z); } #else __DEVICE__ float __fmaf_rn(float __x, float __y, float __z) { return __builtin_fmaf(__x, __y, __z); } #endif #if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ float __fmul_rd(float __x, float __y) { return __ocml_mul_rtn_f32(__x, __y); } __DEVICE__ float __fmul_rn(float __x, float __y) { return __ocml_mul_rte_f32(__x, __y); } __DEVICE__ float __fmul_ru(float __x, float __y) { return __ocml_mul_rtp_f32(__x, __y); } __DEVICE__ float __fmul_rz(float __x, float __y) { return __ocml_mul_rtz_f32(__x, __y); } #else __DEVICE__ float __fmul_rn(float __x, float __y) { return __x * __y; } #endif #if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ float __frcp_rd(float __x) { return __ocml_div_rtn_f32(1.0f, __x); } __DEVICE__ float __frcp_rn(float __x) { return __ocml_div_rte_f32(1.0f, __x); } __DEVICE__ float __frcp_ru(float __x) { return __ocml_div_rtp_f32(1.0f, __x); } __DEVICE__ float __frcp_rz(float __x) { return __ocml_div_rtz_f32(1.0f, __x); } #else __DEVICE__ float __frcp_rn(float __x) { return 1.0f / __x; } #endif __DEVICE__ float __frsqrt_rn(float __x) { return __builtin_amdgcn_rsqf(__x); } #if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ float __fsqrt_rd(float __x) { return __ocml_sqrt_rtn_f32(__x); } __DEVICE__ float __fsqrt_rn(float __x) { return __ocml_sqrt_rte_f32(__x); } __DEVICE__ float __fsqrt_ru(float __x) { return __ocml_sqrt_rtp_f32(__x); } __DEVICE__ float __fsqrt_rz(float __x) { return __ocml_sqrt_rtz_f32(__x); } #else __DEVICE__ float __fsqrt_rn(float __x) { return __ocml_native_sqrt_f32(__x); } #endif #if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ float __fsub_rd(float __x, float __y) { return __ocml_sub_rtn_f32(__x, __y); } __DEVICE__ float __fsub_rn(float __x, float __y) { return __ocml_sub_rte_f32(__x, __y); } __DEVICE__ float __fsub_ru(float __x, float __y) { return __ocml_sub_rtp_f32(__x, __y); } __DEVICE__ float __fsub_rz(float __x, float __y) { return __ocml_sub_rtz_f32(__x, __y); } #else __DEVICE__ float __fsub_rn(float __x, float __y) { return __x - __y; } #endif __DEVICE__ float __log10f(float __x) { return __builtin_log10f(__x); } __DEVICE__ float __log2f(float __x) { return __builtin_amdgcn_logf(__x); } __DEVICE__ float __logf(float __x) { return __builtin_logf(__x); } __DEVICE__ float __powf(float __x, float __y) { return __ocml_pow_f32(__x, __y); } __DEVICE__ float __saturatef(float __x) { return (__x < 0) ? 0 : ((__x > 1) ? 1 : __x); } __DEVICE__ void __sincosf(float __x, float *__sinptr, float *__cosptr) { *__sinptr = __ocml_native_sin_f32(__x); *__cosptr = __ocml_native_cos_f32(__x); } __DEVICE__ float __sinf(float __x) { return __ocml_native_sin_f32(__x); } __DEVICE__ float __tanf(float __x) { return __sinf(__x) * __builtin_amdgcn_rcpf(__cosf(__x)); } // END INTRINSICS #if defined(__cplusplus) __DEVICE__ int abs(int __x) { return __builtin_abs(__x); } __DEVICE__ long labs(long __x) { return __builtin_labs(__x); } __DEVICE__ long long llabs(long long __x) { return __builtin_llabs(__x); } #endif __DEVICE__ float acosf(float __x) { return __ocml_acos_f32(__x); } __DEVICE__ float acoshf(float __x) { return __ocml_acosh_f32(__x); } __DEVICE__ float asinf(float __x) { return __ocml_asin_f32(__x); } __DEVICE__ float asinhf(float __x) { return __ocml_asinh_f32(__x); } __DEVICE__ float atan2f(float __x, float __y) { return __ocml_atan2_f32(__x, __y); } __DEVICE__ float atanf(float __x) { return __ocml_atan_f32(__x); } __DEVICE__ float atanhf(float __x) { return __ocml_atanh_f32(__x); } __DEVICE__ float cbrtf(float __x) { return __ocml_cbrt_f32(__x); } __DEVICE__ float ceilf(float __x) { return __builtin_ceilf(__x); } __DEVICE__ float copysignf(float __x, float __y) { return __builtin_copysignf(__x, __y); } __DEVICE__ float cosf(float __x) { return __FAST_OR_SLOW(__cosf, __ocml_cos_f32)(__x); } __DEVICE__ float coshf(float __x) { return __ocml_cosh_f32(__x); } __DEVICE__ float cospif(float __x) { return __ocml_cospi_f32(__x); } __DEVICE__ float cyl_bessel_i0f(float __x) { return __ocml_i0_f32(__x); } __DEVICE__ float cyl_bessel_i1f(float __x) { return __ocml_i1_f32(__x); } __DEVICE__ float erfcf(float __x) { return __ocml_erfc_f32(__x); } __DEVICE__ float erfcinvf(float __x) { return __ocml_erfcinv_f32(__x); } __DEVICE__ float erfcxf(float __x) { return __ocml_erfcx_f32(__x); } __DEVICE__ float erff(float __x) { return __ocml_erf_f32(__x); } __DEVICE__ float erfinvf(float __x) { return __ocml_erfinv_f32(__x); } __DEVICE__ float exp10f(float __x) { return __ocml_exp10_f32(__x); } __DEVICE__ float exp2f(float __x) { return __builtin_exp2f(__x); } __DEVICE__ float expf(float __x) { return __builtin_expf(__x); } __DEVICE__ float expm1f(float __x) { return __ocml_expm1_f32(__x); } __DEVICE__ float fabsf(float __x) { return __builtin_fabsf(__x); } __DEVICE__ float fdimf(float __x, float __y) { return __ocml_fdim_f32(__x, __y); } __DEVICE__ float fdividef(float __x, float __y) { return __x / __y; } __DEVICE__ float floorf(float __x) { return __builtin_floorf(__x); } __DEVICE__ float fmaf(float __x, float __y, float __z) { return __builtin_fmaf(__x, __y, __z); } __DEVICE__ float fmaxf(float __x, float __y) { return __builtin_fmaxf(__x, __y); } __DEVICE__ float fminf(float __x, float __y) { return __builtin_fminf(__x, __y); } __DEVICE__ float fmodf(float __x, float __y) { return __ocml_fmod_f32(__x, __y); } __DEVICE__ float frexpf(float __x, int *__nptr) { return __builtin_frexpf(__x, __nptr); } __DEVICE__ float hypotf(float __x, float __y) { return __ocml_hypot_f32(__x, __y); } __DEVICE__ int ilogbf(float __x) { return __ocml_ilogb_f32(__x); } __DEVICE__ __RETURN_TYPE __finitef(float __x) { return __builtin_isfinite(__x); } __DEVICE__ __RETURN_TYPE __isinff(float __x) { return __builtin_isinf(__x); } __DEVICE__ __RETURN_TYPE __isnanf(float __x) { return __builtin_isnan(__x); } __DEVICE__ float j0f(float __x) { return __ocml_j0_f32(__x); } __DEVICE__ float j1f(float __x) { return __ocml_j1_f32(__x); } __DEVICE__ float jnf(int __n, float __x) { // TODO: we could use Ahmes multiplication // and the Miller & Brown algorithm // for linear recurrences to get O(log n) steps, but it's unclear if // it'd be beneficial in this case. if (__n == 0) return j0f(__x); if (__n == 1) return j1f(__x); float __x0 = j0f(__x); float __x1 = j1f(__x); for (int __i = 1; __i < __n; ++__i) { float __x2 = (2 * __i) / __x * __x1 - __x0; __x0 = __x1; __x1 = __x2; } return __x1; } __DEVICE__ float ldexpf(float __x, int __e) { return __builtin_amdgcn_ldexpf(__x, __e); } __DEVICE__ float lgammaf(float __x) { return __ocml_lgamma_f32(__x); } __DEVICE__ long long int llrintf(float __x) { return __builtin_rintf(__x); } __DEVICE__ long long int llroundf(float __x) { return __builtin_roundf(__x); } __DEVICE__ float log10f(float __x) { return __builtin_log10f(__x); } __DEVICE__ float log1pf(float __x) { return __ocml_log1p_f32(__x); } __DEVICE__ float log2f(float __x) { return __FAST_OR_SLOW(__log2f, __ocml_log2_f32)(__x); } __DEVICE__ float logbf(float __x) { return __ocml_logb_f32(__x); } __DEVICE__ float logf(float __x) { return __FAST_OR_SLOW(__logf, __ocml_log_f32)(__x); } __DEVICE__ long int lrintf(float __x) { return __builtin_rintf(__x); } __DEVICE__ long int lroundf(float __x) { return __builtin_roundf(__x); } __DEVICE__ float modff(float __x, float *__iptr) { float __tmp; #ifdef __OPENMP_AMDGCN__ #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) #endif float __r = __ocml_modf_f32(__x, (__attribute__((address_space(5))) float *)&__tmp); *__iptr = __tmp; return __r; } __DEVICE__ float nanf(const char *__tagp __attribute__((nonnull))) { union { float val; struct ieee_float { unsigned int mantissa : 22; unsigned int quiet : 1; unsigned int exponent : 8; unsigned int sign : 1; } bits; } __tmp; __static_assert_type_size_equal(sizeof(__tmp.val), sizeof(__tmp.bits)); __tmp.bits.sign = 0u; __tmp.bits.exponent = ~0u; __tmp.bits.quiet = 1u; __tmp.bits.mantissa = __make_mantissa(__tagp); return __tmp.val; } __DEVICE__ float nearbyintf(float __x) { return __builtin_nearbyintf(__x); } __DEVICE__ float nextafterf(float __x, float __y) { return __ocml_nextafter_f32(__x, __y); } __DEVICE__ float norm3df(float __x, float __y, float __z) { return __ocml_len3_f32(__x, __y, __z); } __DEVICE__ float norm4df(float __x, float __y, float __z, float __w) { return __ocml_len4_f32(__x, __y, __z, __w); } __DEVICE__ float normcdff(float __x) { return __ocml_ncdf_f32(__x); } __DEVICE__ float normcdfinvf(float __x) { return __ocml_ncdfinv_f32(__x); } __DEVICE__ float normf(int __dim, const float *__a) { // TODO: placeholder until OCML adds support. float __r = 0; while (__dim--) { __r += __a[0] * __a[0]; ++__a; } return __builtin_sqrtf(__r); } __DEVICE__ float powf(float __x, float __y) { return __ocml_pow_f32(__x, __y); } __DEVICE__ float powif(float __x, int __y) { return __ocml_pown_f32(__x, __y); } __DEVICE__ float rcbrtf(float __x) { return __ocml_rcbrt_f32(__x); } __DEVICE__ float remainderf(float __x, float __y) { return __ocml_remainder_f32(__x, __y); } __DEVICE__ float remquof(float __x, float __y, int *__quo) { int __tmp; #ifdef __OPENMP_AMDGCN__ #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) #endif float __r = __ocml_remquo_f32( __x, __y, (__attribute__((address_space(5))) int *)&__tmp); *__quo = __tmp; return __r; } __DEVICE__ float rhypotf(float __x, float __y) { return __ocml_rhypot_f32(__x, __y); } __DEVICE__ float rintf(float __x) { return __builtin_rintf(__x); } __DEVICE__ float rnorm3df(float __x, float __y, float __z) { return __ocml_rlen3_f32(__x, __y, __z); } __DEVICE__ float rnorm4df(float __x, float __y, float __z, float __w) { return __ocml_rlen4_f32(__x, __y, __z, __w); } __DEVICE__ float rnormf(int __dim, const float *__a) { // TODO: placeholder until OCML adds support. float __r = 0; while (__dim--) { __r += __a[0] * __a[0]; ++__a; } return __ocml_rsqrt_f32(__r); } __DEVICE__ float roundf(float __x) { return __builtin_roundf(__x); } __DEVICE__ float rsqrtf(float __x) { return __ocml_rsqrt_f32(__x); } __DEVICE__ float scalblnf(float __x, long int __n) { return (__n < INT_MAX) ? __builtin_amdgcn_ldexpf(__x, __n) : __ocml_scalb_f32(__x, __n); } __DEVICE__ float scalbnf(float __x, int __n) { return __builtin_amdgcn_ldexpf(__x, __n); } __DEVICE__ __RETURN_TYPE __signbitf(float __x) { return __builtin_signbitf(__x); } __DEVICE__ void sincosf(float __x, float *__sinptr, float *__cosptr) { float __tmp; #ifdef __OPENMP_AMDGCN__ #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) #endif #ifdef __CLANG_CUDA_APPROX_TRANSCENDENTALS__ __sincosf(__x, __sinptr, __cosptr); #else *__sinptr = __ocml_sincos_f32(__x, (__attribute__((address_space(5))) float *)&__tmp); *__cosptr = __tmp; #endif } __DEVICE__ void sincospif(float __x, float *__sinptr, float *__cosptr) { float __tmp; #ifdef __OPENMP_AMDGCN__ #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) #endif *__sinptr = __ocml_sincospi_f32( __x, (__attribute__((address_space(5))) float *)&__tmp); *__cosptr = __tmp; } __DEVICE__ float sinf(float __x) { return __FAST_OR_SLOW(__sinf, __ocml_sin_f32)(__x); } __DEVICE__ float sinhf(float __x) { return __ocml_sinh_f32(__x); } __DEVICE__ float sinpif(float __x) { return __ocml_sinpi_f32(__x); } __DEVICE__ float sqrtf(float __x) { return __builtin_sqrtf(__x); } __DEVICE__ float tanf(float __x) { return __ocml_tan_f32(__x); } __DEVICE__ float tanhf(float __x) { return __ocml_tanh_f32(__x); } __DEVICE__ float tgammaf(float __x) { return __ocml_tgamma_f32(__x); } __DEVICE__ float truncf(float __x) { return __builtin_truncf(__x); } __DEVICE__ float y0f(float __x) { return __ocml_y0_f32(__x); } __DEVICE__ float y1f(float __x) { return __ocml_y1_f32(__x); } __DEVICE__ float ynf(int __n, float __x) { // TODO: we could use Ahmes multiplication // and the Miller & Brown algorithm // for linear recurrences to get O(log n) steps, but it's unclear if // it'd be beneficial in this case. Placeholder until OCML adds // support. if (__n == 0) return y0f(__x); if (__n == 1) return y1f(__x); float __x0 = y0f(__x); float __x1 = y1f(__x); for (int __i = 1; __i < __n; ++__i) { float __x2 = (2 * __i) / __x * __x1 - __x0; __x0 = __x1; __x1 = __x2; } return __x1; } // END FLOAT // BEGIN DOUBLE __DEVICE__ double acos(double __x) { return __ocml_acos_f64(__x); } __DEVICE__ double acosh(double __x) { return __ocml_acosh_f64(__x); } __DEVICE__ double asin(double __x) { return __ocml_asin_f64(__x); } __DEVICE__ double asinh(double __x) { return __ocml_asinh_f64(__x); } __DEVICE__ double atan(double __x) { return __ocml_atan_f64(__x); } __DEVICE__ double atan2(double __x, double __y) { return __ocml_atan2_f64(__x, __y); } __DEVICE__ double atanh(double __x) { return __ocml_atanh_f64(__x); } __DEVICE__ double cbrt(double __x) { return __ocml_cbrt_f64(__x); } __DEVICE__ double ceil(double __x) { return __builtin_ceil(__x); } __DEVICE__ double copysign(double __x, double __y) { return __builtin_copysign(__x, __y); } __DEVICE__ double cos(double __x) { return __ocml_cos_f64(__x); } __DEVICE__ double cosh(double __x) { return __ocml_cosh_f64(__x); } __DEVICE__ double cospi(double __x) { return __ocml_cospi_f64(__x); } __DEVICE__ double cyl_bessel_i0(double __x) { return __ocml_i0_f64(__x); } __DEVICE__ double cyl_bessel_i1(double __x) { return __ocml_i1_f64(__x); } __DEVICE__ double erf(double __x) { return __ocml_erf_f64(__x); } __DEVICE__ double erfc(double __x) { return __ocml_erfc_f64(__x); } __DEVICE__ double erfcinv(double __x) { return __ocml_erfcinv_f64(__x); } __DEVICE__ double erfcx(double __x) { return __ocml_erfcx_f64(__x); } __DEVICE__ double erfinv(double __x) { return __ocml_erfinv_f64(__x); } __DEVICE__ double exp(double __x) { return __ocml_exp_f64(__x); } __DEVICE__ double exp10(double __x) { return __ocml_exp10_f64(__x); } __DEVICE__ double exp2(double __x) { return __ocml_exp2_f64(__x); } __DEVICE__ double expm1(double __x) { return __ocml_expm1_f64(__x); } __DEVICE__ double fabs(double __x) { return __builtin_fabs(__x); } __DEVICE__ double fdim(double __x, double __y) { return __ocml_fdim_f64(__x, __y); } __DEVICE__ double floor(double __x) { return __builtin_floor(__x); } __DEVICE__ double fma(double __x, double __y, double __z) { return __builtin_fma(__x, __y, __z); } __DEVICE__ double fmax(double __x, double __y) { return __builtin_fmax(__x, __y); } __DEVICE__ double fmin(double __x, double __y) { return __builtin_fmin(__x, __y); } __DEVICE__ double fmod(double __x, double __y) { return __ocml_fmod_f64(__x, __y); } __DEVICE__ double frexp(double __x, int *__nptr) { return __builtin_frexp(__x, __nptr); } __DEVICE__ double hypot(double __x, double __y) { return __ocml_hypot_f64(__x, __y); } __DEVICE__ int ilogb(double __x) { return __ocml_ilogb_f64(__x); } __DEVICE__ __RETURN_TYPE __finite(double __x) { return __builtin_isfinite(__x); } __DEVICE__ __RETURN_TYPE __isinf(double __x) { return __builtin_isinf(__x); } __DEVICE__ __RETURN_TYPE __isnan(double __x) { return __builtin_isnan(__x); } __DEVICE__ double j0(double __x) { return __ocml_j0_f64(__x); } __DEVICE__ double j1(double __x) { return __ocml_j1_f64(__x); } __DEVICE__ double jn(int __n, double __x) { // TODO: we could use Ahmes multiplication // and the Miller & Brown algorithm // for linear recurrences to get O(log n) steps, but it's unclear if // it'd be beneficial in this case. Placeholder until OCML adds // support. if (__n == 0) return j0(__x); if (__n == 1) return j1(__x); double __x0 = j0(__x); double __x1 = j1(__x); for (int __i = 1; __i < __n; ++__i) { double __x2 = (2 * __i) / __x * __x1 - __x0; __x0 = __x1; __x1 = __x2; } return __x1; } __DEVICE__ double ldexp(double __x, int __e) { return __builtin_amdgcn_ldexp(__x, __e); } __DEVICE__ double lgamma(double __x) { return __ocml_lgamma_f64(__x); } __DEVICE__ long long int llrint(double __x) { return __builtin_rint(__x); } __DEVICE__ long long int llround(double __x) { return __builtin_round(__x); } __DEVICE__ double log(double __x) { return __ocml_log_f64(__x); } __DEVICE__ double log10(double __x) { return __ocml_log10_f64(__x); } __DEVICE__ double log1p(double __x) { return __ocml_log1p_f64(__x); } __DEVICE__ double log2(double __x) { return __ocml_log2_f64(__x); } __DEVICE__ double logb(double __x) { return __ocml_logb_f64(__x); } __DEVICE__ long int lrint(double __x) { return __builtin_rint(__x); } __DEVICE__ long int lround(double __x) { return __builtin_round(__x); } __DEVICE__ double modf(double __x, double *__iptr) { double __tmp; #ifdef __OPENMP_AMDGCN__ #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) #endif double __r = __ocml_modf_f64(__x, (__attribute__((address_space(5))) double *)&__tmp); *__iptr = __tmp; return __r; } __DEVICE__ double nan(const char *__tagp) { #if !_WIN32 union { double val; struct ieee_double { uint64_t mantissa : 51; uint32_t quiet : 1; uint32_t exponent : 11; uint32_t sign : 1; } bits; } __tmp; __static_assert_type_size_equal(sizeof(__tmp.val), sizeof(__tmp.bits)); __tmp.bits.sign = 0u; __tmp.bits.exponent = ~0u; __tmp.bits.quiet = 1u; __tmp.bits.mantissa = __make_mantissa(__tagp); return __tmp.val; #else __static_assert_type_size_equal(sizeof(uint64_t), sizeof(double)); uint64_t __val = __make_mantissa(__tagp); __val |= 0xFFF << 51; return *reinterpret_cast(&__val); #endif } __DEVICE__ double nearbyint(double __x) { return __builtin_nearbyint(__x); } __DEVICE__ double nextafter(double __x, double __y) { return __ocml_nextafter_f64(__x, __y); } __DEVICE__ double norm(int __dim, const double *__a) { // TODO: placeholder until OCML adds support. double __r = 0; while (__dim--) { __r += __a[0] * __a[0]; ++__a; } return __builtin_sqrt(__r); } __DEVICE__ double norm3d(double __x, double __y, double __z) { return __ocml_len3_f64(__x, __y, __z); } __DEVICE__ double norm4d(double __x, double __y, double __z, double __w) { return __ocml_len4_f64(__x, __y, __z, __w); } __DEVICE__ double normcdf(double __x) { return __ocml_ncdf_f64(__x); } __DEVICE__ double normcdfinv(double __x) { return __ocml_ncdfinv_f64(__x); } __DEVICE__ double pow(double __x, double __y) { return __ocml_pow_f64(__x, __y); } __DEVICE__ double powi(double __x, int __y) { return __ocml_pown_f64(__x, __y); } __DEVICE__ double rcbrt(double __x) { return __ocml_rcbrt_f64(__x); } __DEVICE__ double remainder(double __x, double __y) { return __ocml_remainder_f64(__x, __y); } __DEVICE__ double remquo(double __x, double __y, int *__quo) { int __tmp; #ifdef __OPENMP_AMDGCN__ #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) #endif double __r = __ocml_remquo_f64( __x, __y, (__attribute__((address_space(5))) int *)&__tmp); *__quo = __tmp; return __r; } __DEVICE__ double rhypot(double __x, double __y) { return __ocml_rhypot_f64(__x, __y); } __DEVICE__ double rint(double __x) { return __builtin_rint(__x); } __DEVICE__ double rnorm(int __dim, const double *__a) { // TODO: placeholder until OCML adds support. double __r = 0; while (__dim--) { __r += __a[0] * __a[0]; ++__a; } return __ocml_rsqrt_f64(__r); } __DEVICE__ double rnorm3d(double __x, double __y, double __z) { return __ocml_rlen3_f64(__x, __y, __z); } __DEVICE__ double rnorm4d(double __x, double __y, double __z, double __w) { return __ocml_rlen4_f64(__x, __y, __z, __w); } __DEVICE__ double round(double __x) { return __builtin_round(__x); } __DEVICE__ double rsqrt(double __x) { return __ocml_rsqrt_f64(__x); } __DEVICE__ double scalbln(double __x, long int __n) { return (__n < INT_MAX) ? __builtin_amdgcn_ldexp(__x, __n) : __ocml_scalb_f64(__x, __n); } __DEVICE__ double scalbn(double __x, int __n) { return __builtin_amdgcn_ldexp(__x, __n); } __DEVICE__ __RETURN_TYPE __signbit(double __x) { return __builtin_signbit(__x); } __DEVICE__ double sin(double __x) { return __ocml_sin_f64(__x); } __DEVICE__ void sincos(double __x, double *__sinptr, double *__cosptr) { double __tmp; #ifdef __OPENMP_AMDGCN__ #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) #endif *__sinptr = __ocml_sincos_f64( __x, (__attribute__((address_space(5))) double *)&__tmp); *__cosptr = __tmp; } __DEVICE__ void sincospi(double __x, double *__sinptr, double *__cosptr) { double __tmp; #ifdef __OPENMP_AMDGCN__ #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) #endif *__sinptr = __ocml_sincospi_f64( __x, (__attribute__((address_space(5))) double *)&__tmp); *__cosptr = __tmp; } __DEVICE__ double sinh(double __x) { return __ocml_sinh_f64(__x); } __DEVICE__ double sinpi(double __x) { return __ocml_sinpi_f64(__x); } __DEVICE__ double sqrt(double __x) { return __builtin_sqrt(__x); } __DEVICE__ double tan(double __x) { return __ocml_tan_f64(__x); } __DEVICE__ double tanh(double __x) { return __ocml_tanh_f64(__x); } __DEVICE__ double tgamma(double __x) { return __ocml_tgamma_f64(__x); } __DEVICE__ double trunc(double __x) { return __builtin_trunc(__x); } __DEVICE__ double y0(double __x) { return __ocml_y0_f64(__x); } __DEVICE__ double y1(double __x) { return __ocml_y1_f64(__x); } __DEVICE__ double yn(int __n, double __x) { // TODO: we could use Ahmes multiplication // and the Miller & Brown algorithm // for linear recurrences to get O(log n) steps, but it's unclear if // it'd be beneficial in this case. Placeholder until OCML adds // support. if (__n == 0) return y0(__x); if (__n == 1) return y1(__x); double __x0 = y0(__x); double __x1 = y1(__x); for (int __i = 1; __i < __n; ++__i) { double __x2 = (2 * __i) / __x * __x1 - __x0; __x0 = __x1; __x1 = __x2; } return __x1; } // BEGIN INTRINSICS #if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ double __dadd_rd(double __x, double __y) { return __ocml_add_rtn_f64(__x, __y); } __DEVICE__ double __dadd_rn(double __x, double __y) { return __ocml_add_rte_f64(__x, __y); } __DEVICE__ double __dadd_ru(double __x, double __y) { return __ocml_add_rtp_f64(__x, __y); } __DEVICE__ double __dadd_rz(double __x, double __y) { return __ocml_add_rtz_f64(__x, __y); } #else __DEVICE__ double __dadd_rn(double __x, double __y) { return __x + __y; } #endif #if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ double __ddiv_rd(double __x, double __y) { return __ocml_div_rtn_f64(__x, __y); } __DEVICE__ double __ddiv_rn(double __x, double __y) { return __ocml_div_rte_f64(__x, __y); } __DEVICE__ double __ddiv_ru(double __x, double __y) { return __ocml_div_rtp_f64(__x, __y); } __DEVICE__ double __ddiv_rz(double __x, double __y) { return __ocml_div_rtz_f64(__x, __y); } #else __DEVICE__ double __ddiv_rn(double __x, double __y) { return __x / __y; } #endif #if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ double __dmul_rd(double __x, double __y) { return __ocml_mul_rtn_f64(__x, __y); } __DEVICE__ double __dmul_rn(double __x, double __y) { return __ocml_mul_rte_f64(__x, __y); } __DEVICE__ double __dmul_ru(double __x, double __y) { return __ocml_mul_rtp_f64(__x, __y); } __DEVICE__ double __dmul_rz(double __x, double __y) { return __ocml_mul_rtz_f64(__x, __y); } #else __DEVICE__ double __dmul_rn(double __x, double __y) { return __x * __y; } #endif #if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ double __drcp_rd(double __x) { return __ocml_div_rtn_f64(1.0, __x); } __DEVICE__ double __drcp_rn(double __x) { return __ocml_div_rte_f64(1.0, __x); } __DEVICE__ double __drcp_ru(double __x) { return __ocml_div_rtp_f64(1.0, __x); } __DEVICE__ double __drcp_rz(double __x) { return __ocml_div_rtz_f64(1.0, __x); } #else __DEVICE__ double __drcp_rn(double __x) { return 1.0 / __x; } #endif #if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ double __dsqrt_rd(double __x) { return __ocml_sqrt_rtn_f64(__x); } __DEVICE__ double __dsqrt_rn(double __x) { return __ocml_sqrt_rte_f64(__x); } __DEVICE__ double __dsqrt_ru(double __x) { return __ocml_sqrt_rtp_f64(__x); } __DEVICE__ double __dsqrt_rz(double __x) { return __ocml_sqrt_rtz_f64(__x); } #else __DEVICE__ double __dsqrt_rn(double __x) { return __builtin_sqrt(__x); } #endif #if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ double __dsub_rd(double __x, double __y) { return __ocml_sub_rtn_f64(__x, __y); } __DEVICE__ double __dsub_rn(double __x, double __y) { return __ocml_sub_rte_f64(__x, __y); } __DEVICE__ double __dsub_ru(double __x, double __y) { return __ocml_sub_rtp_f64(__x, __y); } __DEVICE__ double __dsub_rz(double __x, double __y) { return __ocml_sub_rtz_f64(__x, __y); } #else __DEVICE__ double __dsub_rn(double __x, double __y) { return __x - __y; } #endif #if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ double __fma_rd(double __x, double __y, double __z) { return __ocml_fma_rtn_f64(__x, __y, __z); } __DEVICE__ double __fma_rn(double __x, double __y, double __z) { return __ocml_fma_rte_f64(__x, __y, __z); } __DEVICE__ double __fma_ru(double __x, double __y, double __z) { return __ocml_fma_rtp_f64(__x, __y, __z); } __DEVICE__ double __fma_rz(double __x, double __y, double __z) { return __ocml_fma_rtz_f64(__x, __y, __z); } #else __DEVICE__ double __fma_rn(double __x, double __y, double __z) { return __builtin_fma(__x, __y, __z); } #endif // END INTRINSICS // END DOUBLE // C only macros #if !defined(__cplusplus) && __STDC_VERSION__ >= 201112L #define isfinite(__x) _Generic((__x), float : __finitef, double : __finite)(__x) #define isinf(__x) _Generic((__x), float : __isinff, double : __isinf)(__x) #define isnan(__x) _Generic((__x), float : __isnanf, double : __isnan)(__x) #define signbit(__x) \ _Generic((__x), float : __signbitf, double : __signbit)(__x) #endif // !defined(__cplusplus) && __STDC_VERSION__ >= 201112L #if defined(__cplusplus) template __DEVICE__ T min(T __arg1, T __arg2) { return (__arg1 < __arg2) ? __arg1 : __arg2; } template __DEVICE__ T max(T __arg1, T __arg2) { return (__arg1 > __arg2) ? __arg1 : __arg2; } __DEVICE__ int min(int __arg1, int __arg2) { return (__arg1 < __arg2) ? __arg1 : __arg2; } __DEVICE__ int max(int __arg1, int __arg2) { return (__arg1 > __arg2) ? __arg1 : __arg2; } __DEVICE__ float max(float __x, float __y) { return __builtin_fmaxf(__x, __y); } __DEVICE__ double max(double __x, double __y) { return __builtin_fmax(__x, __y); } __DEVICE__ float min(float __x, float __y) { return __builtin_fminf(__x, __y); } __DEVICE__ double min(double __x, double __y) { return __builtin_fmin(__x, __y); } #if !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__) __host__ inline static int min(int __arg1, int __arg2) { return __arg1 < __arg2 ? __arg1 : __arg2; } __host__ inline static int max(int __arg1, int __arg2) { return __arg1 > __arg2 ? __arg1 : __arg2; } #endif // !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__) #endif #pragma pop_macro("__DEVICE__") #pragma pop_macro("__RETURN_TYPE") #pragma pop_macro("__FAST_OR_SLOW") #endif // __CLANG_HIP_MATH_H__ __clang_hip_stdlib.harm_cde.harm_neon_sve_bridge.harmintr.havx512vlvbmi2intrin.h/*===---- cldemoteintrin.h - CLDEMOTE intrinsic ----------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #if !defined __X86INTRIN_H && !defined __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __CLDEMOTEINTRIN_H #define __CLDEMOTEINTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("cldemote"))) /// Hint to hardware that the cache line that contains \p __P should be demoted /// from the cache closest to the processor core to a level more distant from /// the processor core. /// /// \headerfile /// /// This intrinsic corresponds to the CLDEMOTE instruction. static __inline__ void __DEFAULT_FN_ATTRS _cldemote(const void * __P) { __builtin_ia32_cldemote(__P); } #define _mm_cldemote(p) _cldemote(p) #undef __DEFAULT_FN_ATTRS #endif enqcmdintrin.h/*===---- fma4intrin.h - FMA4 intrinsics -----------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __X86INTRIN_H #error "Never use directly; include instead." #endif #ifndef __FMA4INTRIN_H #define __FMA4INTRIN_H #include /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("fma4"), __min_vector_width__(128))) #define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("fma4"), __min_vector_width__(256))) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_macc_ps(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_macc_pd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_macc_ss(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_macc_sd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, (__v2df)__C); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_msub_ps(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_msub_pd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, -(__v2df)__C); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_msub_ss(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_msub_sd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, -(__v2df)__C); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_nmacc_ps(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_nmacc_pd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, (__v2df)__C); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_nmacc_ss(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddss(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_nmacc_sd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddsd(-(__v2df)__A, (__v2df)__B, (__v2df)__C); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_nmsub_ps(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_nmsub_pd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_nmsub_ss(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddss(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_nmsub_sd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddsd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maddsub_ps(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maddsub_pd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_msubadd_ps(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_msubadd_pd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, -(__v2df)__C); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_macc_ps(__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_macc_pd(__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_msub_ps(__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_msub_pd(__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_nmacc_ps(__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_nmacc_pd(__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, (__v4df)__C); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_nmsub_ps(__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_nmsub_pd(__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, -(__v4df)__C); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maddsub_ps(__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maddsub_pd(__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_msubadd_ps(__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_msubadd_pd(__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C); } #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 #endif /* __FMA4INTRIN_H */ inttypes.h/*===---- inttypes.h - Standard header for integer printf macros ----------===*\ * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * \*===----------------------------------------------------------------------===*/ #ifndef __CLANG_INTTYPES_H // AIX system headers need inttypes.h to be re-enterable while _STD_TYPES_T // is defined until an inclusion of it without _STD_TYPES_T occurs, in which // case the header guard macro is defined. #if !defined(_AIX) || !defined(_STD_TYPES_T) #define __CLANG_INTTYPES_H #endif #if defined(_MSC_VER) && _MSC_VER < 1800 #error MSVC does not have inttypes.h prior to Visual Studio 2013 #endif #include_next #if defined(_MSC_VER) && _MSC_VER < 1900 /* MSVC headers define int32_t as int, but PRIx32 as "lx" instead of "x". * This triggers format warnings, so fix it up here. */ #undef PRId32 #undef PRIdLEAST32 #undef PRIdFAST32 #undef PRIi32 #undef PRIiLEAST32 #undef PRIiFAST32 #undef PRIo32 #undef PRIoLEAST32 #undef PRIoFAST32 #undef PRIu32 #undef PRIuLEAST32 #undef PRIuFAST32 #undef PRIx32 #undef PRIxLEAST32 #undef PRIxFAST32 #undef PRIX32 #undef PRIXLEAST32 #undef PRIXFAST32 #undef SCNd32 #undef SCNdLEAST32 #undef SCNdFAST32 #undef SCNi32 #undef SCNiLEAST32 #undef SCNiFAST32 #undef SCNo32 #undef SCNoLEAST32 #undef SCNoFAST32 #undef SCNu32 #undef SCNuLEAST32 #undef SCNuFAST32 #undef SCNx32 #undef SCNxLEAST32 #undef SCNxFAST32 #define PRId32 "d" #define PRIdLEAST32 "d" #define PRIdFAST32 "d" #define PRIi32 "i" #define PRIiLEAST32 "i" #define PRIiFAST32 "i" #define PRIo32 "o" #define PRIoLEAST32 "o" #define PRIoFAST32 "o" #define PRIu32 "u" #define PRIuLEAST32 "u" #define PRIuFAST32 "u" #define PRIx32 "x" #define PRIxLEAST32 "x" #define PRIxFAST32 "x" #define PRIX32 "X" #define PRIXLEAST32 "X" #define PRIXFAST32 "X" #define SCNd32 "d" #define SCNdLEAST32 "d" #define SCNdFAST32 "d" #define SCNi32 "i" #define SCNiLEAST32 "i" #define SCNiFAST32 "i" #define SCNo32 "o" #define SCNoLEAST32 "o" #define SCNoFAST32 "o" #define SCNu32 "u" #define SCNuLEAST32 "u" #define SCNuFAST32 "u" #define SCNx32 "x" #define SCNxLEAST32 "x" #define SCNxFAST32 "x" #endif #endif /* __CLANG_INTTYPES_H */ larchintrin.hstdckdint.h/*===---- velintrin_approx.h - VEL intrinsics helper for VE ----------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __VEL_INTRIN_APPROX_H__ #define __VEL_INTRIN_APPROX_H__ static inline __vr _vel_approx_vfdivs_vvvl(__vr v0, __vr v1, int l) { float s0; __vr v2, v3, v4, v5; v5 = _vel_vrcps_vvl(v1, l); s0 = 1.0; v4 = _vel_vfnmsbs_vsvvl(s0, v1, v5, l); v3 = _vel_vfmads_vvvvl(v5, v5, v4, l); v2 = _vel_vfmuls_vvvl(v0, v3, l); v4 = _vel_vfnmsbs_vvvvl(v0, v2, v1, l); v2 = _vel_vfmads_vvvvl(v2, v5, v4, l); v0 = _vel_vfnmsbs_vvvvl(v0, v2, v1, l); v0 = _vel_vfmads_vvvvl(v2, v3, v0, l); return v0; } static inline __vr _vel_approx_pvfdiv_vvvl(__vr v0, __vr v1, int l) { float s0; __vr v2, v3, v4, v5; v5 = _vel_pvrcp_vvl(v1, l); s0 = 1.0; v4 = _vel_pvfnmsb_vsvvl(s0, v1, v5, l); v3 = _vel_pvfmad_vvvvl(v5, v5, v4, l); v2 = _vel_pvfmul_vvvl(v0, v3, l); v4 = _vel_pvfnmsb_vvvvl(v0, v2, v1, l); v2 = _vel_pvfmad_vvvvl(v2, v5, v4, l); v0 = _vel_pvfnmsb_vvvvl(v0, v2, v1, l); v0 = _vel_pvfmad_vvvvl(v2, v3, v0, l); return v0; } static inline __vr _vel_approx_vfdivs_vsvl(float s0, __vr v0, int l) { float s1; __vr v1, v2, v3, v4; v4 = _vel_vrcps_vvl(v0, l); s1 = 1.0; v2 = _vel_vfnmsbs_vsvvl(s1, v0, v4, l); v2 = _vel_vfmads_vvvvl(v4, v4, v2, l); v1 = _vel_vfmuls_vsvl(s0, v2, l); v3 = _vel_vfnmsbs_vsvvl(s0, v1, v0, l); v1 = _vel_vfmads_vvvvl(v1, v4, v3, l); v3 = _vel_vfnmsbs_vsvvl(s0, v1, v0, l); v0 = _vel_vfmads_vvvvl(v1, v2, v3, l); return v0; } static inline __vr _vel_approx_vfdivs_vvsl(__vr v0, float s0, int l) { float s1; __vr v1, v2; s1 = 1.0f / s0; v1 = _vel_vfmuls_vsvl(s1, v0, l); v2 = _vel_vfnmsbs_vvsvl(v0, s0, v1, l); v0 = _vel_vfmads_vvsvl(v1, s1, v2, l); return v0; } static inline __vr _vel_approx_vfdivd_vsvl(double s0, __vr v0, int l) { __vr v1, v2, v3; v2 = _vel_vrcpd_vvl(v0, l); double s1 = 1.0; v3 = _vel_vfnmsbd_vsvvl(s1, v0, v2, l); v2 = _vel_vfmadd_vvvvl(v2, v2, v3, l); v1 = _vel_vfnmsbd_vsvvl(s1, v0, v2, l); v1 = _vel_vfmadd_vvvvl(v2, v2, v1, l); v1 = _vel_vaddul_vsvl(1, v1, l); v3 = _vel_vfnmsbd_vsvvl(s1, v0, v1, l); v3 = _vel_vfmadd_vvvvl(v1, v1, v3, l); v1 = _vel_vfmuld_vsvl(s0, v3, l); v0 = _vel_vfnmsbd_vsvvl(s0, v1, v0, l); v0 = _vel_vfmadd_vvvvl(v1, v3, v0, l); return v0; } static inline __vr _vel_approx_vfsqrtd_vvl(__vr v0, int l) { double s0, s1; __vr v1, v2, v3; v2 = _vel_vrsqrtdnex_vvl(v0, l); v1 = _vel_vfmuld_vvvl(v0, v2, l); s0 = 1.0; s1 = 0.5; v3 = _vel_vfnmsbd_vsvvl(s0, v1, v2, l); v3 = _vel_vfmuld_vsvl(s1, v3, l); v2 = _vel_vfmadd_vvvvl(v2, v2, v3, l); v1 = _vel_vfmuld_vvvl(v0, v2, l); v3 = _vel_vfnmsbd_vsvvl(s0, v1, v2, l); v3 = _vel_vfmuld_vsvl(s1, v3, l); v0 = _vel_vfmadd_vvvvl(v1, v1, v3, l); return v0; } static inline __vr _vel_approx_vfsqrts_vvl(__vr v0, int l) { float s0, s1; __vr v1, v2, v3; v0 = _vel_vcvtds_vvl(v0, l); v2 = _vel_vrsqrtdnex_vvl(v0, l); v1 = _vel_vfmuld_vvvl(v0, v2, l); s0 = 1.0; s1 = 0.5; v3 = _vel_vfnmsbd_vsvvl(s0, v1, v2, l); v3 = _vel_vfmuld_vsvl(s1, v3, l); v2 = _vel_vfmadd_vvvvl(v2, v2, v3, l); v1 = _vel_vfmuld_vvvl(v0, v2, l); v3 = _vel_vfnmsbd_vsvvl(s0, v1, v2, l); v3 = _vel_vfmuld_vsvl(s1, v3, l); v0 = _vel_vfmadd_vvvvl(v1, v1, v3, l); v0 = _vel_vcvtsd_vvl(v0, l); return v0; } #endif velintrin_gen.hcuda_wrappers/bits/shared_ptr_base.hllvm_libc_wrappers/stdlib.hllvm_libc_wrappers/string.h/*===-- complex --- OpenMP complex wrapper for target regions --------- c++ -=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __CLANG_OPENMP_COMPLEX_H__ #define __CLANG_OPENMP_COMPLEX_H__ #ifndef _OPENMP #error "This file is for OpenMP compilation only." #endif // We require math functions in the complex builtins below. #include #ifdef __NVPTX__ #define __OPENMP_NVPTX__ #include <__clang_cuda_complex_builtins.h> #undef __OPENMP_NVPTX__ #endif #ifdef __AMDGCN__ #define __OPENMP_AMDGCN__ #include <__clang_cuda_complex_builtins.h> #undef __OPENMP_AMDGCN__ #endif #endif // Grab the host header too. #include_next /*===---- bmiintrin.h - Implementation of BMI2 intrinsics on PowerPC -------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #if !defined X86GPRINTRIN_H_ #error "Never use directly; include instead." #endif #ifndef BMI2INTRIN_H_ #define BMI2INTRIN_H_ extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _bzhi_u32(unsigned int __X, unsigned int __Y) { return ((__X << (32 - __Y)) >> (32 - __Y)); } extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mulx_u32(unsigned int __X, unsigned int __Y, unsigned int *__P) { unsigned long long __res = (unsigned long long)__X * __Y; *__P = (unsigned int)(__res >> 32); return (unsigned int)__res; } #ifdef __PPC64__ extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _bzhi_u64(unsigned long long __X, unsigned long long __Y) { return ((__X << (64 - __Y)) >> (64 - __Y)); } /* __int128 requires base 64-bit. */ extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mulx_u64(unsigned long long __X, unsigned long long __Y, unsigned long long *__P) { unsigned __int128 __res = (unsigned __int128)__X * __Y; *__P = (unsigned long long)(__res >> 64); return (unsigned long long)__res; } #ifdef _ARCH_PWR7 /* popcount and bpermd require power7 minimum. */ extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _pdep_u64(unsigned long long __X, unsigned long long __M) { unsigned long __result = 0x0UL; const unsigned long __mask = 0x8000000000000000UL; unsigned long __m = __M; unsigned long __c, __t; unsigned long __p; /* The pop-count of the mask gives the number of the bits from source to process. This is also needed to shift bits from the source into the correct position for the result. */ __p = 64 - __builtin_popcountl(__M); /* The loop is for the number of '1' bits in the mask and clearing each mask bit as it is processed. */ while (__m != 0) { __c = __builtin_clzl(__m); __t = __X << (__p - __c); __m ^= (__mask >> __c); __result |= (__t & (__mask >> __c)); __p++; } return __result; } extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _pext_u64(unsigned long long __X, unsigned long long __M) { unsigned long __p = 0x4040404040404040UL; // initial bit permute control const unsigned long __mask = 0x8000000000000000UL; unsigned long __m = __M; unsigned long __c; unsigned long __result; /* if the mask is constant and selects 8 bits or less we can use the Power8 Bit permute instruction. */ if (__builtin_constant_p(__M) && (__builtin_popcountl(__M) <= 8)) { /* Also if the pext mask is constant, then the popcount is constant, we can evaluate the following loop at compile time and use a constant bit permute vector. */ long __i; for (__i = 0; __i < __builtin_popcountl(__M); __i++) { __c = __builtin_clzl(__m); __p = (__p << 8) | __c; __m ^= (__mask >> __c); } __result = __builtin_bpermd(__p, __X); } else { __p = 64 - __builtin_popcountl(__M); __result = 0; /* We could a use a for loop here, but that combined with -funroll-loops can expand to a lot of code. The while loop avoids unrolling and the compiler commons the xor from clearing the mask bit with the (m != 0) test. The result is a more compact loop setup and body. */ while (__m != 0) { unsigned long __t; __c = __builtin_clzl(__m); __t = (__X & (__mask >> __c)) >> (__p - __c); __m ^= (__mask >> __c); __result |= (__t); __p++; } } return __result; } /* these 32-bit implementations depend on 64-bit pdep/pext which depend on _ARCH_PWR7. */ extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _pdep_u32(unsigned int __X, unsigned int __Y) { return _pdep_u64(__X, __Y); } extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _pext_u32(unsigned int __X, unsigned int __Y) { return _pext_u64(__X, __Y); } #endif /* _ARCH_PWR7 */ #endif /* __PPC64__ */ #endif /* BMI2INTRIN_H_ */ /*===---- mmintrin.h - Implementation of MMX intrinsics on PowerPC ---------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ /* Implemented from the specification included in the Intel C++ Compiler User Guide and Reference, version 9.0. */ #ifndef NO_WARN_X86_INTRINSICS /* This header file is to help porting code using Intel intrinsics explicitly from x86_64 to powerpc64/powerpc64le. Since PowerPC target doesn't support native 64-bit vector type, we typedef __m64 to 64-bit unsigned long long in MMX intrinsics, which works well for _si64 and some _pi32 operations. For _pi16 and _pi8 operations, it's better to transfer __m64 into 128-bit PowerPC vector first. Power8 introduced direct register move instructions which helps for more efficient implementation. It's user's responsibility to determine if the results of such port are acceptable or further changes are needed. Please note that much code using Intel intrinsics CAN BE REWRITTEN in more portable and efficient standard C or GNU C extensions with 64-bit scalar operations, or 128-bit SSE/Altivec operations, which are more recommended. */ #error \ "Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this error." #endif #ifndef _MMINTRIN_H_INCLUDED #define _MMINTRIN_H_INCLUDED #if defined(__powerpc64__) && \ (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) #include /* The Intel API is flexible enough that we must allow aliasing with other vector types, and their scalar components. */ typedef __attribute__((__aligned__(8))) unsigned long long __m64; typedef __attribute__((__aligned__(8))) union { __m64 as_m64; char as_char[8]; signed char as_signed_char[8]; short as_short[4]; int as_int[2]; long long as_long_long; float as_float[2]; double as_double; } __m64_union; /* Empty the multimedia state. */ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_empty(void) { /* nothing to do on PowerPC. */ } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_empty(void) { /* nothing to do on PowerPC. */ } /* Convert I to a __m64 object. The integer is zero-extended to 64-bits. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtsi32_si64(int __i) { return (__m64)(unsigned int)__i; } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_from_int(int __i) { return _mm_cvtsi32_si64(__i); } /* Convert the lower 32 bits of the __m64 object into an integer. */ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtsi64_si32(__m64 __i) { return ((int)__i); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_to_int(__m64 __i) { return _mm_cvtsi64_si32(__i); } /* Convert I to a __m64 object. */ /* Intel intrinsic. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_from_int64(long long __i) { return (__m64)__i; } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtsi64_m64(long long __i) { return (__m64)__i; } /* Microsoft intrinsic. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtsi64x_si64(long long __i) { return (__m64)__i; } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_set_pi64x(long long __i) { return (__m64)__i; } /* Convert the __m64 object to a 64bit integer. */ /* Intel intrinsic. */ extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_to_int64(__m64 __i) { return (long long)__i; } extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtm64_si64(__m64 __i) { return (long long)__i; } /* Microsoft intrinsic. */ extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtsi64_si64x(__m64 __i) { return (long long)__i; } #ifdef _ARCH_PWR8 /* Pack the four 16-bit values from M1 into the lower four 8-bit values of the result, and the four 16-bit values from M2 into the upper four 8-bit values of the result, all with signed saturation. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_packs_pi16(__m64 __m1, __m64 __m2) { __vector signed short __vm1; __vector signed char __vresult; __vm1 = (__vector signed short)(__vector unsigned long long) #ifdef __LITTLE_ENDIAN__ {__m1, __m2}; #else {__m2, __m1}; #endif __vresult = vec_packs(__vm1, __vm1); return (__m64)((__vector long long)__vresult)[0]; } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_packsswb(__m64 __m1, __m64 __m2) { return _mm_packs_pi16(__m1, __m2); } /* Pack the two 32-bit values from M1 in to the lower two 16-bit values of the result, and the two 32-bit values from M2 into the upper two 16-bit values of the result, all with signed saturation. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_packs_pi32(__m64 __m1, __m64 __m2) { __vector signed int __vm1; __vector signed short __vresult; __vm1 = (__vector signed int)(__vector unsigned long long) #ifdef __LITTLE_ENDIAN__ {__m1, __m2}; #else {__m2, __m1}; #endif __vresult = vec_packs(__vm1, __vm1); return (__m64)((__vector long long)__vresult)[0]; } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_packssdw(__m64 __m1, __m64 __m2) { return _mm_packs_pi32(__m1, __m2); } /* Pack the four 16-bit values from M1 into the lower four 8-bit values of the result, and the four 16-bit values from M2 into the upper four 8-bit values of the result, all with unsigned saturation. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_packs_pu16(__m64 __m1, __m64 __m2) { __vector unsigned char __r; __vector signed short __vm1 = (__vector signed short)(__vector long long) #ifdef __LITTLE_ENDIAN__ {__m1, __m2}; #else {__m2, __m1}; #endif const __vector signed short __zero = {0}; __vector __bool short __select = vec_cmplt(__vm1, __zero); __r = vec_packs((__vector unsigned short)__vm1, (__vector unsigned short)__vm1); __vector __bool char __packsel = vec_pack(__select, __select); __r = vec_sel(__r, (const __vector unsigned char)__zero, __packsel); return (__m64)((__vector long long)__r)[0]; } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_packuswb(__m64 __m1, __m64 __m2) { return _mm_packs_pu16(__m1, __m2); } #endif /* end ARCH_PWR8 */ /* Interleave the four 8-bit values from the high half of M1 with the four 8-bit values from the high half of M2. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_unpackhi_pi8(__m64 __m1, __m64 __m2) { #if _ARCH_PWR8 __vector unsigned char __a, __b, __c; __a = (__vector unsigned char)vec_splats(__m1); __b = (__vector unsigned char)vec_splats(__m2); __c = vec_mergel(__a, __b); return (__m64)((__vector long long)__c)[1]; #else __m64_union __mu1, __mu2, __res; __mu1.as_m64 = __m1; __mu2.as_m64 = __m2; __res.as_char[0] = __mu1.as_char[4]; __res.as_char[1] = __mu2.as_char[4]; __res.as_char[2] = __mu1.as_char[5]; __res.as_char[3] = __mu2.as_char[5]; __res.as_char[4] = __mu1.as_char[6]; __res.as_char[5] = __mu2.as_char[6]; __res.as_char[6] = __mu1.as_char[7]; __res.as_char[7] = __mu2.as_char[7]; return (__m64)__res.as_m64; #endif } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_punpckhbw(__m64 __m1, __m64 __m2) { return _mm_unpackhi_pi8(__m1, __m2); } /* Interleave the two 16-bit values from the high half of M1 with the two 16-bit values from the high half of M2. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_unpackhi_pi16(__m64 __m1, __m64 __m2) { __m64_union __mu1, __mu2, __res; __mu1.as_m64 = __m1; __mu2.as_m64 = __m2; __res.as_short[0] = __mu1.as_short[2]; __res.as_short[1] = __mu2.as_short[2]; __res.as_short[2] = __mu1.as_short[3]; __res.as_short[3] = __mu2.as_short[3]; return (__m64)__res.as_m64; } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_punpckhwd(__m64 __m1, __m64 __m2) { return _mm_unpackhi_pi16(__m1, __m2); } /* Interleave the 32-bit value from the high half of M1 with the 32-bit value from the high half of M2. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_unpackhi_pi32(__m64 __m1, __m64 __m2) { __m64_union __mu1, __mu2, __res; __mu1.as_m64 = __m1; __mu2.as_m64 = __m2; __res.as_int[0] = __mu1.as_int[1]; __res.as_int[1] = __mu2.as_int[1]; return (__m64)__res.as_m64; } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_punpckhdq(__m64 __m1, __m64 __m2) { return _mm_unpackhi_pi32(__m1, __m2); } /* Interleave the four 8-bit values from the low half of M1 with the four 8-bit values from the low half of M2. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_unpacklo_pi8(__m64 __m1, __m64 __m2) { #if _ARCH_PWR8 __vector unsigned char __a, __b, __c; __a = (__vector unsigned char)vec_splats(__m1); __b = (__vector unsigned char)vec_splats(__m2); __c = vec_mergel(__a, __b); return (__m64)((__vector long long)__c)[0]; #else __m64_union __mu1, __mu2, __res; __mu1.as_m64 = __m1; __mu2.as_m64 = __m2; __res.as_char[0] = __mu1.as_char[0]; __res.as_char[1] = __mu2.as_char[0]; __res.as_char[2] = __mu1.as_char[1]; __res.as_char[3] = __mu2.as_char[1]; __res.as_char[4] = __mu1.as_char[2]; __res.as_char[5] = __mu2.as_char[2]; __res.as_char[6] = __mu1.as_char[3]; __res.as_char[7] = __mu2.as_char[3]; return (__m64)__res.as_m64; #endif } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_punpcklbw(__m64 __m1, __m64 __m2) { return _mm_unpacklo_pi8(__m1, __m2); } /* Interleave the two 16-bit values from the low half of M1 with the two 16-bit values from the low half of M2. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_unpacklo_pi16(__m64 __m1, __m64 __m2) { __m64_union __mu1, __mu2, __res; __mu1.as_m64 = __m1; __mu2.as_m64 = __m2; __res.as_short[0] = __mu1.as_short[0]; __res.as_short[1] = __mu2.as_short[0]; __res.as_short[2] = __mu1.as_short[1]; __res.as_short[3] = __mu2.as_short[1]; return (__m64)__res.as_m64; } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_punpcklwd(__m64 __m1, __m64 __m2) { return _mm_unpacklo_pi16(__m1, __m2); } /* Interleave the 32-bit value from the low half of M1 with the 32-bit value from the low half of M2. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_unpacklo_pi32(__m64 __m1, __m64 __m2) { __m64_union __mu1, __mu2, __res; __mu1.as_m64 = __m1; __mu2.as_m64 = __m2; __res.as_int[0] = __mu1.as_int[0]; __res.as_int[1] = __mu2.as_int[0]; return (__m64)__res.as_m64; } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_punpckldq(__m64 __m1, __m64 __m2) { return _mm_unpacklo_pi32(__m1, __m2); } /* Add the 8-bit values in M1 to the 8-bit values in M2. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_add_pi8(__m64 __m1, __m64 __m2) { #if _ARCH_PWR8 __vector signed char __a, __b, __c; __a = (__vector signed char)vec_splats(__m1); __b = (__vector signed char)vec_splats(__m2); __c = vec_add(__a, __b); return (__m64)((__vector long long)__c)[0]; #else __m64_union __mu1, __mu2, __res; __mu1.as_m64 = __m1; __mu2.as_m64 = __m2; __res.as_char[0] = __mu1.as_char[0] + __mu2.as_char[0]; __res.as_char[1] = __mu1.as_char[1] + __mu2.as_char[1]; __res.as_char[2] = __mu1.as_char[2] + __mu2.as_char[2]; __res.as_char[3] = __mu1.as_char[3] + __mu2.as_char[3]; __res.as_char[4] = __mu1.as_char[4] + __mu2.as_char[4]; __res.as_char[5] = __mu1.as_char[5] + __mu2.as_char[5]; __res.as_char[6] = __mu1.as_char[6] + __mu2.as_char[6]; __res.as_char[7] = __mu1.as_char[7] + __mu2.as_char[7]; return (__m64)__res.as_m64; #endif } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_paddb(__m64 __m1, __m64 __m2) { return _mm_add_pi8(__m1, __m2); } /* Add the 16-bit values in M1 to the 16-bit values in M2. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_add_pi16(__m64 __m1, __m64 __m2) { #if _ARCH_PWR8 __vector signed short __a, __b, __c; __a = (__vector signed short)vec_splats(__m1); __b = (__vector signed short)vec_splats(__m2); __c = vec_add(__a, __b); return (__m64)((__vector long long)__c)[0]; #else __m64_union __mu1, __mu2, __res; __mu1.as_m64 = __m1; __mu2.as_m64 = __m2; __res.as_short[0] = __mu1.as_short[0] + __mu2.as_short[0]; __res.as_short[1] = __mu1.as_short[1] + __mu2.as_short[1]; __res.as_short[2] = __mu1.as_short[2] + __mu2.as_short[2]; __res.as_short[3] = __mu1.as_short[3] + __mu2.as_short[3]; return (__m64)__res.as_m64; #endif } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_paddw(__m64 __m1, __m64 __m2) { return _mm_add_pi16(__m1, __m2); } /* Add the 32-bit values in M1 to the 32-bit values in M2. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_add_pi32(__m64 __m1, __m64 __m2) { #if _ARCH_PWR9 __vector signed int __a, __b, __c; __a = (__vector signed int)vec_splats(__m1); __b = (__vector signed int)vec_splats(__m2); __c = vec_add(__a, __b); return (__m64)((__vector long long)__c)[0]; #else __m64_union __mu1, __mu2, __res; __mu1.as_m64 = __m1; __mu2.as_m64 = __m2; __res.as_int[0] = __mu1.as_int[0] + __mu2.as_int[0]; __res.as_int[1] = __mu1.as_int[1] + __mu2.as_int[1]; return (__m64)__res.as_m64; #endif } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_paddd(__m64 __m1, __m64 __m2) { return _mm_add_pi32(__m1, __m2); } /* Subtract the 8-bit values in M2 from the 8-bit values in M1. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sub_pi8(__m64 __m1, __m64 __m2) { #if _ARCH_PWR8 __vector signed char __a, __b, __c; __a = (__vector signed char)vec_splats(__m1); __b = (__vector signed char)vec_splats(__m2); __c = vec_sub(__a, __b); return (__m64)((__vector long long)__c)[0]; #else __m64_union __mu1, __mu2, __res; __mu1.as_m64 = __m1; __mu2.as_m64 = __m2; __res.as_char[0] = __mu1.as_char[0] - __mu2.as_char[0]; __res.as_char[1] = __mu1.as_char[1] - __mu2.as_char[1]; __res.as_char[2] = __mu1.as_char[2] - __mu2.as_char[2]; __res.as_char[3] = __mu1.as_char[3] - __mu2.as_char[3]; __res.as_char[4] = __mu1.as_char[4] - __mu2.as_char[4]; __res.as_char[5] = __mu1.as_char[5] - __mu2.as_char[5]; __res.as_char[6] = __mu1.as_char[6] - __mu2.as_char[6]; __res.as_char[7] = __mu1.as_char[7] - __mu2.as_char[7]; return (__m64)__res.as_m64; #endif } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_psubb(__m64 __m1, __m64 __m2) { return _mm_sub_pi8(__m1, __m2); } /* Subtract the 16-bit values in M2 from the 16-bit values in M1. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sub_pi16(__m64 __m1, __m64 __m2) { #if _ARCH_PWR8 __vector signed short __a, __b, __c; __a = (__vector signed short)vec_splats(__m1); __b = (__vector signed short)vec_splats(__m2); __c = vec_sub(__a, __b); return (__m64)((__vector long long)__c)[0]; #else __m64_union __mu1, __mu2, __res; __mu1.as_m64 = __m1; __mu2.as_m64 = __m2; __res.as_short[0] = __mu1.as_short[0] - __mu2.as_short[0]; __res.as_short[1] = __mu1.as_short[1] - __mu2.as_short[1]; __res.as_short[2] = __mu1.as_short[2] - __mu2.as_short[2]; __res.as_short[3] = __mu1.as_short[3] - __mu2.as_short[3]; return (__m64)__res.as_m64; #endif } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_psubw(__m64 __m1, __m64 __m2) { return _mm_sub_pi16(__m1, __m2); } /* Subtract the 32-bit values in M2 from the 32-bit values in M1. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sub_pi32(__m64 __m1, __m64 __m2) { #if _ARCH_PWR9 __vector signed int __a, __b, __c; __a = (__vector signed int)vec_splats(__m1); __b = (__vector signed int)vec_splats(__m2); __c = vec_sub(__a, __b); return (__m64)((__vector long long)__c)[0]; #else __m64_union __mu1, __mu2, __res; __mu1.as_m64 = __m1; __mu2.as_m64 = __m2; __res.as_int[0] = __mu1.as_int[0] - __mu2.as_int[0]; __res.as_int[1] = __mu1.as_int[1] - __mu2.as_int[1]; return (__m64)__res.as_m64; #endif } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_psubd(__m64 __m1, __m64 __m2) { return _mm_sub_pi32(__m1, __m2); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_add_si64(__m64 __m1, __m64 __m2) { return (__m1 + __m2); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sub_si64(__m64 __m1, __m64 __m2) { return (__m1 - __m2); } /* Shift the 64-bit value in M left by COUNT. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sll_si64(__m64 __m, __m64 __count) { return (__m << __count); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_psllq(__m64 __m, __m64 __count) { return _mm_sll_si64(__m, __count); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_slli_si64(__m64 __m, const int __count) { return (__m << __count); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_psllqi(__m64 __m, const int __count) { return _mm_slli_si64(__m, __count); } /* Shift the 64-bit value in M left by COUNT; shift in zeros. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_srl_si64(__m64 __m, __m64 __count) { return (__m >> __count); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_psrlq(__m64 __m, __m64 __count) { return _mm_srl_si64(__m, __count); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_srli_si64(__m64 __m, const int __count) { return (__m >> __count); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_psrlqi(__m64 __m, const int __count) { return _mm_srli_si64(__m, __count); } /* Bit-wise AND the 64-bit values in M1 and M2. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_and_si64(__m64 __m1, __m64 __m2) { return (__m1 & __m2); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_pand(__m64 __m1, __m64 __m2) { return _mm_and_si64(__m1, __m2); } /* Bit-wise complement the 64-bit value in M1 and bit-wise AND it with the 64-bit value in M2. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_andnot_si64(__m64 __m1, __m64 __m2) { return (~__m1 & __m2); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_pandn(__m64 __m1, __m64 __m2) { return _mm_andnot_si64(__m1, __m2); } /* Bit-wise inclusive OR the 64-bit values in M1 and M2. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_or_si64(__m64 __m1, __m64 __m2) { return (__m1 | __m2); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_por(__m64 __m1, __m64 __m2) { return _mm_or_si64(__m1, __m2); } /* Bit-wise exclusive OR the 64-bit values in M1 and M2. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_xor_si64(__m64 __m1, __m64 __m2) { return (__m1 ^ __m2); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_pxor(__m64 __m1, __m64 __m2) { return _mm_xor_si64(__m1, __m2); } /* Creates a 64-bit zero. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_setzero_si64(void) { return (__m64)0; } /* Compare eight 8-bit values. The result of the comparison is 0xFF if the test is true and zero if false. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpeq_pi8(__m64 __m1, __m64 __m2) { #if defined(_ARCH_PWR6) && defined(__powerpc64__) __m64 __res; __asm__("cmpb %0,%1,%2;\n" : "=r"(__res) : "r"(__m1), "r"(__m2) :); return (__res); #else __m64_union __mu1, __mu2, __res; __mu1.as_m64 = __m1; __mu2.as_m64 = __m2; __res.as_char[0] = (__mu1.as_char[0] == __mu2.as_char[0]) ? -1 : 0; __res.as_char[1] = (__mu1.as_char[1] == __mu2.as_char[1]) ? -1 : 0; __res.as_char[2] = (__mu1.as_char[2] == __mu2.as_char[2]) ? -1 : 0; __res.as_char[3] = (__mu1.as_char[3] == __mu2.as_char[3]) ? -1 : 0; __res.as_char[4] = (__mu1.as_char[4] == __mu2.as_char[4]) ? -1 : 0; __res.as_char[5] = (__mu1.as_char[5] == __mu2.as_char[5]) ? -1 : 0; __res.as_char[6] = (__mu1.as_char[6] == __mu2.as_char[6]) ? -1 : 0; __res.as_char[7] = (__mu1.as_char[7] == __mu2.as_char[7]) ? -1 : 0; return (__m64)__res.as_m64; #endif } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_pcmpeqb(__m64 __m1, __m64 __m2) { return _mm_cmpeq_pi8(__m1, __m2); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpgt_pi8(__m64 __m1, __m64 __m2) { #if _ARCH_PWR8 __vector signed char __a, __b, __c; __a = (__vector signed char)vec_splats(__m1); __b = (__vector signed char)vec_splats(__m2); __c = (__vector signed char)vec_cmpgt(__a, __b); return (__m64)((__vector long long)__c)[0]; #else __m64_union __mu1, __mu2, __res; __mu1.as_m64 = __m1; __mu2.as_m64 = __m2; __res.as_char[0] = (__mu1.as_char[0] > __mu2.as_char[0]) ? -1 : 0; __res.as_char[1] = (__mu1.as_char[1] > __mu2.as_char[1]) ? -1 : 0; __res.as_char[2] = (__mu1.as_char[2] > __mu2.as_char[2]) ? -1 : 0; __res.as_char[3] = (__mu1.as_char[3] > __mu2.as_char[3]) ? -1 : 0; __res.as_char[4] = (__mu1.as_char[4] > __mu2.as_char[4]) ? -1 : 0; __res.as_char[5] = (__mu1.as_char[5] > __mu2.as_char[5]) ? -1 : 0; __res.as_char[6] = (__mu1.as_char[6] > __mu2.as_char[6]) ? -1 : 0; __res.as_char[7] = (__mu1.as_char[7] > __mu2.as_char[7]) ? -1 : 0; return (__m64)__res.as_m64; #endif } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_pcmpgtb(__m64 __m1, __m64 __m2) { return _mm_cmpgt_pi8(__m1, __m2); } /* Compare four 16-bit values. The result of the comparison is 0xFFFF if the test is true and zero if false. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpeq_pi16(__m64 __m1, __m64 __m2) { #if _ARCH_PWR8 __vector signed short __a, __b, __c; __a = (__vector signed short)vec_splats(__m1); __b = (__vector signed short)vec_splats(__m2); __c = (__vector signed short)vec_cmpeq(__a, __b); return (__m64)((__vector long long)__c)[0]; #else __m64_union __mu1, __mu2, __res; __mu1.as_m64 = __m1; __mu2.as_m64 = __m2; __res.as_short[0] = (__mu1.as_short[0] == __mu2.as_short[0]) ? -1 : 0; __res.as_short[1] = (__mu1.as_short[1] == __mu2.as_short[1]) ? -1 : 0; __res.as_short[2] = (__mu1.as_short[2] == __mu2.as_short[2]) ? -1 : 0; __res.as_short[3] = (__mu1.as_short[3] == __mu2.as_short[3]) ? -1 : 0; return (__m64)__res.as_m64; #endif } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_pcmpeqw(__m64 __m1, __m64 __m2) { return _mm_cmpeq_pi16(__m1, __m2); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpgt_pi16(__m64 __m1, __m64 __m2) { #if _ARCH_PWR8 __vector signed short __a, __b, __c; __a = (__vector signed short)vec_splats(__m1); __b = (__vector signed short)vec_splats(__m2); __c = (__vector signed short)vec_cmpgt(__a, __b); return (__m64)((__vector long long)__c)[0]; #else __m64_union __mu1, __mu2, __res; __mu1.as_m64 = __m1; __mu2.as_m64 = __m2; __res.as_short[0] = (__mu1.as_short[0] > __mu2.as_short[0]) ? -1 : 0; __res.as_short[1] = (__mu1.as_short[1] > __mu2.as_short[1]) ? -1 : 0; __res.as_short[2] = (__mu1.as_short[2] > __mu2.as_short[2]) ? -1 : 0; __res.as_short[3] = (__mu1.as_short[3] > __mu2.as_short[3]) ? -1 : 0; return (__m64)__res.as_m64; #endif } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_pcmpgtw(__m64 __m1, __m64 __m2) { return _mm_cmpgt_pi16(__m1, __m2); } /* Compare two 32-bit values. The result of the comparison is 0xFFFFFFFF if the test is true and zero if false. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpeq_pi32(__m64 __m1, __m64 __m2) { #if _ARCH_PWR9 __vector signed int __a, __b, __c; __a = (__vector signed int)vec_splats(__m1); __b = (__vector signed int)vec_splats(__m2); __c = (__vector signed int)vec_cmpeq(__a, __b); return (__m64)((__vector long long)__c)[0]; #else __m64_union __mu1, __mu2, __res; __mu1.as_m64 = __m1; __mu2.as_m64 = __m2; __res.as_int[0] = (__mu1.as_int[0] == __mu2.as_int[0]) ? -1 : 0; __res.as_int[1] = (__mu1.as_int[1] == __mu2.as_int[1]) ? -1 : 0; return (__m64)__res.as_m64; #endif } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_pcmpeqd(__m64 __m1, __m64 __m2) { return _mm_cmpeq_pi32(__m1, __m2); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpgt_pi32(__m64 __m1, __m64 __m2) { #if _ARCH_PWR9 __vector signed int __a, __b, __c; __a = (__vector signed int)vec_splats(__m1); __b = (__vector signed int)vec_splats(__m2); __c = (__vector signed int)vec_cmpgt(__a, __b); return (__m64)((__vector long long)__c)[0]; #else __m64_union __mu1, __mu2, __res; __mu1.as_m64 = __m1; __mu2.as_m64 = __m2; __res.as_int[0] = (__mu1.as_int[0] > __mu2.as_int[0]) ? -1 : 0; __res.as_int[1] = (__mu1.as_int[1] > __mu2.as_int[1]) ? -1 : 0; return (__m64)__res.as_m64; #endif } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_pcmpgtd(__m64 __m1, __m64 __m2) { return _mm_cmpgt_pi32(__m1, __m2); } #if _ARCH_PWR8 /* Add the 8-bit values in M1 to the 8-bit values in M2 using signed saturated arithmetic. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_adds_pi8(__m64 __m1, __m64 __m2) { __vector signed char __a, __b, __c; __a = (__vector signed char)vec_splats(__m1); __b = (__vector signed char)vec_splats(__m2); __c = vec_adds(__a, __b); return (__m64)((__vector long long)__c)[0]; } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_paddsb(__m64 __m1, __m64 __m2) { return _mm_adds_pi8(__m1, __m2); } /* Add the 16-bit values in M1 to the 16-bit values in M2 using signed saturated arithmetic. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_adds_pi16(__m64 __m1, __m64 __m2) { __vector signed short __a, __b, __c; __a = (__vector signed short)vec_splats(__m1); __b = (__vector signed short)vec_splats(__m2); __c = vec_adds(__a, __b); return (__m64)((__vector long long)__c)[0]; } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_paddsw(__m64 __m1, __m64 __m2) { return _mm_adds_pi16(__m1, __m2); } /* Add the 8-bit values in M1 to the 8-bit values in M2 using unsigned saturated arithmetic. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_adds_pu8(__m64 __m1, __m64 __m2) { __vector unsigned char __a, __b, __c; __a = (__vector unsigned char)vec_splats(__m1); __b = (__vector unsigned char)vec_splats(__m2); __c = vec_adds(__a, __b); return (__m64)((__vector long long)__c)[0]; } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_paddusb(__m64 __m1, __m64 __m2) { return _mm_adds_pu8(__m1, __m2); } /* Add the 16-bit values in M1 to the 16-bit values in M2 using unsigned saturated arithmetic. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_adds_pu16(__m64 __m1, __m64 __m2) { __vector unsigned short __a, __b, __c; __a = (__vector unsigned short)vec_splats(__m1); __b = (__vector unsigned short)vec_splats(__m2); __c = vec_adds(__a, __b); return (__m64)((__vector long long)__c)[0]; } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_paddusw(__m64 __m1, __m64 __m2) { return _mm_adds_pu16(__m1, __m2); } /* Subtract the 8-bit values in M2 from the 8-bit values in M1 using signed saturating arithmetic. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_subs_pi8(__m64 __m1, __m64 __m2) { __vector signed char __a, __b, __c; __a = (__vector signed char)vec_splats(__m1); __b = (__vector signed char)vec_splats(__m2); __c = vec_subs(__a, __b); return (__m64)((__vector long long)__c)[0]; } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_psubsb(__m64 __m1, __m64 __m2) { return _mm_subs_pi8(__m1, __m2); } /* Subtract the 16-bit values in M2 from the 16-bit values in M1 using signed saturating arithmetic. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_subs_pi16(__m64 __m1, __m64 __m2) { __vector signed short __a, __b, __c; __a = (__vector signed short)vec_splats(__m1); __b = (__vector signed short)vec_splats(__m2); __c = vec_subs(__a, __b); return (__m64)((__vector long long)__c)[0]; } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_psubsw(__m64 __m1, __m64 __m2) { return _mm_subs_pi16(__m1, __m2); } /* Subtract the 8-bit values in M2 from the 8-bit values in M1 using unsigned saturating arithmetic. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_subs_pu8(__m64 __m1, __m64 __m2) { __vector unsigned char __a, __b, __c; __a = (__vector unsigned char)vec_splats(__m1); __b = (__vector unsigned char)vec_splats(__m2); __c = vec_subs(__a, __b); return (__m64)((__vector long long)__c)[0]; } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_psubusb(__m64 __m1, __m64 __m2) { return _mm_subs_pu8(__m1, __m2); } /* Subtract the 16-bit values in M2 from the 16-bit values in M1 using unsigned saturating arithmetic. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_subs_pu16(__m64 __m1, __m64 __m2) { __vector unsigned short __a, __b, __c; __a = (__vector unsigned short)vec_splats(__m1); __b = (__vector unsigned short)vec_splats(__m2); __c = vec_subs(__a, __b); return (__m64)((__vector long long)__c)[0]; } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_psubusw(__m64 __m1, __m64 __m2) { return _mm_subs_pu16(__m1, __m2); } /* Multiply four 16-bit values in M1 by four 16-bit values in M2 producing four 32-bit intermediate results, which are then summed by pairs to produce two 32-bit results. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_madd_pi16(__m64 __m1, __m64 __m2) { __vector signed short __a, __b; __vector signed int __c; __vector signed int __zero = {0, 0, 0, 0}; __a = (__vector signed short)vec_splats(__m1); __b = (__vector signed short)vec_splats(__m2); __c = vec_vmsumshm(__a, __b, __zero); return (__m64)((__vector long long)__c)[0]; } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_pmaddwd(__m64 __m1, __m64 __m2) { return _mm_madd_pi16(__m1, __m2); } /* Multiply four signed 16-bit values in M1 by four signed 16-bit values in M2 and produce the high 16 bits of the 32-bit results. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_mulhi_pi16(__m64 __m1, __m64 __m2) { __vector signed short __a, __b; __vector signed short __c; __vector signed int __w0, __w1; __vector unsigned char __xform1 = { #ifdef __LITTLE_ENDIAN__ 0x02, 0x03, 0x12, 0x13, 0x06, 0x07, 0x16, 0x17, 0x0A, 0x0B, 0x1A, 0x1B, 0x0E, 0x0F, 0x1E, 0x1F #else 0x00, 0x01, 0x10, 0x11, 0x04, 0x05, 0x14, 0x15, 0x00, 0x01, 0x10, 0x11, 0x04, 0x05, 0x14, 0x15 #endif }; __a = (__vector signed short)vec_splats(__m1); __b = (__vector signed short)vec_splats(__m2); __w0 = vec_vmulesh(__a, __b); __w1 = vec_vmulosh(__a, __b); __c = (__vector signed short)vec_perm(__w0, __w1, __xform1); return (__m64)((__vector long long)__c)[0]; } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_pmulhw(__m64 __m1, __m64 __m2) { return _mm_mulhi_pi16(__m1, __m2); } /* Multiply four 16-bit values in M1 by four 16-bit values in M2 and produce the low 16 bits of the results. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_mullo_pi16(__m64 __m1, __m64 __m2) { __vector signed short __a, __b, __c; __a = (__vector signed short)vec_splats(__m1); __b = (__vector signed short)vec_splats(__m2); __c = __a * __b; return (__m64)((__vector long long)__c)[0]; } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_pmullw(__m64 __m1, __m64 __m2) { return _mm_mullo_pi16(__m1, __m2); } /* Shift four 16-bit values in M left by COUNT. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sll_pi16(__m64 __m, __m64 __count) { __vector signed short __r; __vector unsigned short __c; if (__count <= 15) { __r = (__vector signed short)vec_splats(__m); __c = (__vector unsigned short)vec_splats((unsigned short)__count); __r = vec_sl(__r, (__vector unsigned short)__c); return (__m64)((__vector long long)__r)[0]; } else return (0); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_psllw(__m64 __m, __m64 __count) { return _mm_sll_pi16(__m, __count); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_slli_pi16(__m64 __m, int __count) { /* Promote int to long then invoke mm_sll_pi16. */ return _mm_sll_pi16(__m, __count); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_psllwi(__m64 __m, int __count) { return _mm_slli_pi16(__m, __count); } /* Shift two 32-bit values in M left by COUNT. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sll_pi32(__m64 __m, __m64 __count) { __m64_union __res; __res.as_m64 = __m; __res.as_int[0] = __res.as_int[0] << __count; __res.as_int[1] = __res.as_int[1] << __count; return (__res.as_m64); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_pslld(__m64 __m, __m64 __count) { return _mm_sll_pi32(__m, __count); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_slli_pi32(__m64 __m, int __count) { /* Promote int to long then invoke mm_sll_pi32. */ return _mm_sll_pi32(__m, __count); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_pslldi(__m64 __m, int __count) { return _mm_slli_pi32(__m, __count); } /* Shift four 16-bit values in M right by COUNT; shift in the sign bit. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sra_pi16(__m64 __m, __m64 __count) { __vector signed short __r; __vector unsigned short __c; if (__count <= 15) { __r = (__vector signed short)vec_splats(__m); __c = (__vector unsigned short)vec_splats((unsigned short)__count); __r = vec_sra(__r, (__vector unsigned short)__c); return (__m64)((__vector long long)__r)[0]; } else return (0); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_psraw(__m64 __m, __m64 __count) { return _mm_sra_pi16(__m, __count); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_srai_pi16(__m64 __m, int __count) { /* Promote int to long then invoke mm_sra_pi32. */ return _mm_sra_pi16(__m, __count); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_psrawi(__m64 __m, int __count) { return _mm_srai_pi16(__m, __count); } /* Shift two 32-bit values in M right by COUNT; shift in the sign bit. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sra_pi32(__m64 __m, __m64 __count) { __m64_union __res; __res.as_m64 = __m; __res.as_int[0] = __res.as_int[0] >> __count; __res.as_int[1] = __res.as_int[1] >> __count; return (__res.as_m64); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_psrad(__m64 __m, __m64 __count) { return _mm_sra_pi32(__m, __count); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_srai_pi32(__m64 __m, int __count) { /* Promote int to long then invoke mm_sra_pi32. */ return _mm_sra_pi32(__m, __count); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_psradi(__m64 __m, int __count) { return _mm_srai_pi32(__m, __count); } /* Shift four 16-bit values in M right by COUNT; shift in zeros. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_srl_pi16(__m64 __m, __m64 __count) { __vector unsigned short __r; __vector unsigned short __c; if (__count <= 15) { __r = (__vector unsigned short)vec_splats(__m); __c = (__vector unsigned short)vec_splats((unsigned short)__count); __r = vec_sr(__r, (__vector unsigned short)__c); return (__m64)((__vector long long)__r)[0]; } else return (0); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_psrlw(__m64 __m, __m64 __count) { return _mm_srl_pi16(__m, __count); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_srli_pi16(__m64 __m, int __count) { /* Promote int to long then invoke mm_sra_pi32. */ return _mm_srl_pi16(__m, __count); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_psrlwi(__m64 __m, int __count) { return _mm_srli_pi16(__m, __count); } /* Shift two 32-bit values in M right by COUNT; shift in zeros. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_srl_pi32(__m64 __m, __m64 __count) { __m64_union __res; __res.as_m64 = __m; __res.as_int[0] = (unsigned int)__res.as_int[0] >> __count; __res.as_int[1] = (unsigned int)__res.as_int[1] >> __count; return (__res.as_m64); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_psrld(__m64 __m, __m64 __count) { return _mm_srl_pi32(__m, __count); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_srli_pi32(__m64 __m, int __count) { /* Promote int to long then invoke mm_srl_pi32. */ return _mm_srl_pi32(__m, __count); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_psrldi(__m64 __m, int __count) { return _mm_srli_pi32(__m, __count); } #endif /* _ARCH_PWR8 */ /* Creates a vector of two 32-bit values; I0 is least significant. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_set_pi32(int __i1, int __i0) { __m64_union __res; __res.as_int[0] = __i0; __res.as_int[1] = __i1; return (__res.as_m64); } /* Creates a vector of four 16-bit values; W0 is least significant. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_set_pi16(short __w3, short __w2, short __w1, short __w0) { __m64_union __res; __res.as_short[0] = __w0; __res.as_short[1] = __w1; __res.as_short[2] = __w2; __res.as_short[3] = __w3; return (__res.as_m64); } /* Creates a vector of eight 8-bit values; B0 is least significant. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2, char __b1, char __b0) { __m64_union __res; __res.as_char[0] = __b0; __res.as_char[1] = __b1; __res.as_char[2] = __b2; __res.as_char[3] = __b3; __res.as_char[4] = __b4; __res.as_char[5] = __b5; __res.as_char[6] = __b6; __res.as_char[7] = __b7; return (__res.as_m64); } /* Similar, but with the arguments in reverse order. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_setr_pi32(int __i0, int __i1) { __m64_union __res; __res.as_int[0] = __i0; __res.as_int[1] = __i1; return (__res.as_m64); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_setr_pi16(short __w0, short __w1, short __w2, short __w3) { return _mm_set_pi16(__w3, __w2, __w1, __w0); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5, char __b6, char __b7) { return _mm_set_pi8(__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0); } /* Creates a vector of two 32-bit values, both elements containing I. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_set1_pi32(int __i) { __m64_union __res; __res.as_int[0] = __i; __res.as_int[1] = __i; return (__res.as_m64); } /* Creates a vector of four 16-bit values, all elements containing W. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_set1_pi16(short __w) { #if _ARCH_PWR9 __vector signed short w; w = (__vector signed short)vec_splats(__w); return (__m64)((__vector long long)w)[0]; #else __m64_union __res; __res.as_short[0] = __w; __res.as_short[1] = __w; __res.as_short[2] = __w; __res.as_short[3] = __w; return (__res.as_m64); #endif } /* Creates a vector of eight 8-bit values, all elements containing B. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_set1_pi8(signed char __b) { #if _ARCH_PWR8 __vector signed char __res; __res = (__vector signed char)vec_splats(__b); return (__m64)((__vector long long)__res)[0]; #else __m64_union __res; __res.as_char[0] = __b; __res.as_char[1] = __b; __res.as_char[2] = __b; __res.as_char[3] = __b; __res.as_char[4] = __b; __res.as_char[5] = __b; __res.as_char[6] = __b; __res.as_char[7] = __b; return (__res.as_m64); #endif } #else #include_next #endif /* defined(__powerpc64__) && \ * (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) */ #endif /* _MMINTRIN_H_INCLUDED */ //===-- tsan_interface_atomic.h ---------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file is a part of ThreadSanitizer (TSan), a race detector. // // Public interface header for TSan atomics. //===----------------------------------------------------------------------===// #ifndef TSAN_INTERFACE_ATOMIC_H #define TSAN_INTERFACE_ATOMIC_H #include #ifdef __cplusplus extern "C" { #endif typedef char __tsan_atomic8; typedef short __tsan_atomic16; typedef int __tsan_atomic32; typedef long __tsan_atomic64; #if defined(__SIZEOF_INT128__) || \ (__clang_major__ * 100 + __clang_minor__ >= 302) __extension__ typedef __int128 __tsan_atomic128; #define __TSAN_HAS_INT128 1 #else #define __TSAN_HAS_INT128 0 #endif // Part of ABI, do not change. // https://github.com/llvm/llvm-project/blob/main/libcxx/include/atomic typedef enum { __tsan_memory_order_relaxed, __tsan_memory_order_consume, __tsan_memory_order_acquire, __tsan_memory_order_release, __tsan_memory_order_acq_rel, __tsan_memory_order_seq_cst } __tsan_memory_order; __tsan_atomic8 SANITIZER_CDECL __tsan_atomic8_load(const volatile __tsan_atomic8 *a, __tsan_memory_order mo); __tsan_atomic16 SANITIZER_CDECL __tsan_atomic16_load(const volatile __tsan_atomic16 *a, __tsan_memory_order mo); __tsan_atomic32 SANITIZER_CDECL __tsan_atomic32_load(const volatile __tsan_atomic32 *a, __tsan_memory_order mo); __tsan_atomic64 SANITIZER_CDECL __tsan_atomic64_load(const volatile __tsan_atomic64 *a, __tsan_memory_order mo); #if __TSAN_HAS_INT128 __tsan_atomic128 SANITIZER_CDECL __tsan_atomic128_load( const volatile __tsan_atomic128 *a, __tsan_memory_order mo); #endif void SANITIZER_CDECL __tsan_atomic8_store(volatile __tsan_atomic8 *a, __tsan_atomic8 v, __tsan_memory_order mo); void SANITIZER_CDECL __tsan_atomic16_store(volatile __tsan_atomic16 *a, __tsan_atomic16 v, __tsan_memory_order mo); void SANITIZER_CDECL __tsan_atomic32_store(volatile __tsan_atomic32 *a, __tsan_atomic32 v, __tsan_memory_order mo); void SANITIZER_CDECL __tsan_atomic64_store(volatile __tsan_atomic64 *a, __tsan_atomic64 v, __tsan_memory_order mo); #if __TSAN_HAS_INT128 void SANITIZER_CDECL __tsan_atomic128_store(volatile __tsan_atomic128 *a, __tsan_atomic128 v, __tsan_memory_order mo); #endif __tsan_atomic8 SANITIZER_CDECL __tsan_atomic8_exchange( volatile __tsan_atomic8 *a, __tsan_atomic8 v, __tsan_memory_order mo); __tsan_atomic16 SANITIZER_CDECL __tsan_atomic16_exchange( volatile __tsan_atomic16 *a, __tsan_atomic16 v, __tsan_memory_order mo); __tsan_atomic32 SANITIZER_CDECL __tsan_atomic32_exchange( volatile __tsan_atomic32 *a, __tsan_atomic32 v, __tsan_memory_order mo); __tsan_atomic64 SANITIZER_CDECL __tsan_atomic64_exchange( volatile __tsan_atomic64 *a, __tsan_atomic64 v, __tsan_memory_order mo); #if __TSAN_HAS_INT128 __tsan_atomic128 SANITIZER_CDECL __tsan_atomic128_exchange( volatile __tsan_atomic128 *a, __tsan_atomic128 v, __tsan_memory_order mo); #endif __tsan_atomic8 SANITIZER_CDECL __tsan_atomic8_fetch_add( volatile __tsan_atomic8 *a, __tsan_atomic8 v, __tsan_memory_order mo); __tsan_atomic16 SANITIZER_CDECL __tsan_atomic16_fetch_add( volatile __tsan_atomic16 *a, __tsan_atomic16 v, __tsan_memory_order mo); __tsan_atomic32 SANITIZER_CDECL __tsan_atomic32_fetch_add( volatile __tsan_atomic32 *a, __tsan_atomic32 v, __tsan_memory_order mo); __tsan_atomic64 SANITIZER_CDECL __tsan_atomic64_fetch_add( volatile __tsan_atomic64 *a, __tsan_atomic64 v, __tsan_memory_order mo); #if __TSAN_HAS_INT128 __tsan_atomic128 SANITIZER_CDECL __tsan_atomic128_fetch_add( volatile __tsan_atomic128 *a, __tsan_atomic128 v, __tsan_memory_order mo); #endif __tsan_atomic8 SANITIZER_CDECL __tsan_atomic8_fetch_sub( volatile __tsan_atomic8 *a, __tsan_atomic8 v, __tsan_memory_order mo); __tsan_atomic16 SANITIZER_CDECL __tsan_atomic16_fetch_sub( volatile __tsan_atomic16 *a, __tsan_atomic16 v, __tsan_memory_order mo); __tsan_atomic32 SANITIZER_CDECL __tsan_atomic32_fetch_sub( volatile __tsan_atomic32 *a, __tsan_atomic32 v, __tsan_memory_order mo); __tsan_atomic64 SANITIZER_CDECL __tsan_atomic64_fetch_sub( volatile __tsan_atomic64 *a, __tsan_atomic64 v, __tsan_memory_order mo); #if __TSAN_HAS_INT128 __tsan_atomic128 SANITIZER_CDECL __tsan_atomic128_fetch_sub( volatile __tsan_atomic128 *a, __tsan_atomic128 v, __tsan_memory_order mo); #endif __tsan_atomic8 SANITIZER_CDECL __tsan_atomic8_fetch_and( volatile __tsan_atomic8 *a, __tsan_atomic8 v, __tsan_memory_order mo); __tsan_atomic16 SANITIZER_CDECL __tsan_atomic16_fetch_and( volatile __tsan_atomic16 *a, __tsan_atomic16 v, __tsan_memory_order mo); __tsan_atomic32 SANITIZER_CDECL __tsan_atomic32_fetch_and( volatile __tsan_atomic32 *a, __tsan_atomic32 v, __tsan_memory_order mo); __tsan_atomic64 SANITIZER_CDECL __tsan_atomic64_fetch_and( volatile __tsan_atomic64 *a, __tsan_atomic64 v, __tsan_memory_order mo); #if __TSAN_HAS_INT128 __tsan_atomic128 SANITIZER_CDECL __tsan_atomic128_fetch_and( volatile __tsan_atomic128 *a, __tsan_atomic128 v, __tsan_memory_order mo); #endif __tsan_atomic8 SANITIZER_CDECL __tsan_atomic8_fetch_or( volatile __tsan_atomic8 *a, __tsan_atomic8 v, __tsan_memory_order mo); __tsan_atomic16 SANITIZER_CDECL __tsan_atomic16_fetch_or( volatile __tsan_atomic16 *a, __tsan_atomic16 v, __tsan_memory_order mo); __tsan_atomic32 SANITIZER_CDECL __tsan_atomic32_fetch_or( volatile __tsan_atomic32 *a, __tsan_atomic32 v, __tsan_memory_order mo); __tsan_atomic64 SANITIZER_CDECL __tsan_atomic64_fetch_or( volatile __tsan_atomic64 *a, __tsan_atomic64 v, __tsan_memory_order mo); #if __TSAN_HAS_INT128 __tsan_atomic128 SANITIZER_CDECL __tsan_atomic128_fetch_or( volatile __tsan_atomic128 *a, __tsan_atomic128 v, __tsan_memory_order mo); #endif __tsan_atomic8 SANITIZER_CDECL __tsan_atomic8_fetch_xor( volatile __tsan_atomic8 *a, __tsan_atomic8 v, __tsan_memory_order mo); __tsan_atomic16 SANITIZER_CDECL __tsan_atomic16_fetch_xor( volatile __tsan_atomic16 *a, __tsan_atomic16 v, __tsan_memory_order mo); __tsan_atomic32 SANITIZER_CDECL __tsan_atomic32_fetch_xor( volatile __tsan_atomic32 *a, __tsan_atomic32 v, __tsan_memory_order mo); __tsan_atomic64 SANITIZER_CDECL __tsan_atomic64_fetch_xor( volatile __tsan_atomic64 *a, __tsan_atomic64 v, __tsan_memory_order mo); #if __TSAN_HAS_INT128 __tsan_atomic128 SANITIZER_CDECL __tsan_atomic128_fetch_xor( volatile __tsan_atomic128 *a, __tsan_atomic128 v, __tsan_memory_order mo); #endif __tsan_atomic8 SANITIZER_CDECL __tsan_atomic8_fetch_nand( volatile __tsan_atomic8 *a, __tsan_atomic8 v, __tsan_memory_order mo); __tsan_atomic16 SANITIZER_CDECL __tsan_atomic16_fetch_nand( volatile __tsan_atomic16 *a, __tsan_atomic16 v, __tsan_memory_order mo); __tsan_atomic32 SANITIZER_CDECL __tsan_atomic32_fetch_nand( volatile __tsan_atomic32 *a, __tsan_atomic32 v, __tsan_memory_order mo); __tsan_atomic64 SANITIZER_CDECL __tsan_atomic64_fetch_nand( volatile __tsan_atomic64 *a, __tsan_atomic64 v, __tsan_memory_order mo); #if __TSAN_HAS_INT128 __tsan_atomic128 SANITIZER_CDECL __tsan_atomic128_fetch_nand( volatile __tsan_atomic128 *a, __tsan_atomic128 v, __tsan_memory_order mo); #endif int SANITIZER_CDECL __tsan_atomic8_compare_exchange_weak( volatile __tsan_atomic8 *a, __tsan_atomic8 *c, __tsan_atomic8 v, __tsan_memory_order mo, __tsan_memory_order fail_mo); int SANITIZER_CDECL __tsan_atomic16_compare_exchange_weak( volatile __tsan_atomic16 *a, __tsan_atomic16 *c, __tsan_atomic16 v, __tsan_memory_order mo, __tsan_memory_order fail_mo); int SANITIZER_CDECL __tsan_atomic32_compare_exchange_weak( volatile __tsan_atomic32 *a, __tsan_atomic32 *c, __tsan_atomic32 v, __tsan_memory_order mo, __tsan_memory_order fail_mo); int SANITIZER_CDECL __tsan_atomic64_compare_exchange_weak( volatile __tsan_atomic64 *a, __tsan_atomic64 *c, __tsan_atomic64 v, __tsan_memory_order mo, __tsan_memory_order fail_mo); #if __TSAN_HAS_INT128 int SANITIZER_CDECL __tsan_atomic128_compare_exchange_weak( volatile __tsan_atomic128 *a, __tsan_atomic128 *c, __tsan_atomic128 v, __tsan_memory_order mo, __tsan_memory_order fail_mo); #endif int SANITIZER_CDECL __tsan_atomic8_compare_exchange_strong( volatile __tsan_atomic8 *a, __tsan_atomic8 *c, __tsan_atomic8 v, __tsan_memory_order mo, __tsan_memory_order fail_mo); int SANITIZER_CDECL __tsan_atomic16_compare_exchange_strong( volatile __tsan_atomic16 *a, __tsan_atomic16 *c, __tsan_atomic16 v, __tsan_memory_order mo, __tsan_memory_order fail_mo); int SANITIZER_CDECL __tsan_atomic32_compare_exchange_strong( volatile __tsan_atomic32 *a, __tsan_atomic32 *c, __tsan_atomic32 v, __tsan_memory_order mo, __tsan_memory_order fail_mo); int SANITIZER_CDECL __tsan_atomic64_compare_exchange_strong( volatile __tsan_atomic64 *a, __tsan_atomic64 *c, __tsan_atomic64 v, __tsan_memory_order mo, __tsan_memory_order fail_mo); #if __TSAN_HAS_INT128 int SANITIZER_CDECL __tsan_atomic128_compare_exchange_strong( volatile __tsan_atomic128 *a, __tsan_atomic128 *c, __tsan_atomic128 v, __tsan_memory_order mo, __tsan_memory_order fail_mo); #endif __tsan_atomic8 SANITIZER_CDECL __tsan_atomic8_compare_exchange_val( volatile __tsan_atomic8 *a, __tsan_atomic8 c, __tsan_atomic8 v, __tsan_memory_order mo, __tsan_memory_order fail_mo); __tsan_atomic16 SANITIZER_CDECL __tsan_atomic16_compare_exchange_val( volatile __tsan_atomic16 *a, __tsan_atomic16 c, __tsan_atomic16 v, __tsan_memory_order mo, __tsan_memory_order fail_mo); __tsan_atomic32 SANITIZER_CDECL __tsan_atomic32_compare_exchange_val( volatile __tsan_atomic32 *a, __tsan_atomic32 c, __tsan_atomic32 v, __tsan_memory_order mo, __tsan_memory_order fail_mo); __tsan_atomic64 SANITIZER_CDECL __tsan_atomic64_compare_exchange_val( volatile __tsan_atomic64 *a, __tsan_atomic64 c, __tsan_atomic64 v, __tsan_memory_order mo, __tsan_memory_order fail_mo); #if __TSAN_HAS_INT128 __tsan_atomic128 SANITIZER_CDECL __tsan_atomic128_compare_exchange_val( volatile __tsan_atomic128 *a, __tsan_atomic128 c, __tsan_atomic128 v, __tsan_memory_order mo, __tsan_memory_order fail_mo); #endif void SANITIZER_CDECL __tsan_atomic_thread_fence(__tsan_memory_order mo); void SANITIZER_CDECL __tsan_atomic_signal_fence(__tsan_memory_order mo); #ifdef __cplusplus } // extern "C" #endif #endif // TSAN_INTERFACE_ATOMIC_H pathUnknown named capture: Unable to resolve [:^graph:]hello, world(?:external/regex-re2/re2/tostring.ccArabicBugineseKatakanaMyanmarNew_Tai_LueTamilTelugumust pass a valid arenaDcdecimal32ngad=lsversion_definition->vd_cnt == 1 || version_definition->vd_cnt == 2FAILED_PRECONDITION']nfedge->IsBtree()res || t.has_timeout()old_h->skip == nullptrw->waitp->cv_word == nullptrTZDIRexternal/boringssl/src/crypto/fipsmodule/ec/ec_key.cHKDFexternal/boringssl/src/crypto/mem.cobject identifier routinesoverflowrb+cpp-output-fsyntax-onlyKYTHE_OUTPUT_DIRECTORY) #ifobjectivecpragma/*===- __clang_math_forward_declares.h - Prototypes of __device__ math fns --=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __CLANG__CUDA_MATH_FORWARD_DECLARES_H__ #define __CLANG__CUDA_MATH_FORWARD_DECLARES_H__ #if !defined(__CUDA__) && !__HIP__ #error "This file is for CUDA/HIP compilation only." #endif // This file forward-declares of some math functions we (or the CUDA headers) // will define later. We need to do this, and do it before cmath is included, // because the standard library may have constexpr math functions. In the // absence of a prior __device__ decl, those constexpr functions may become // implicitly host+device. host+device functions can't be overloaded, so that // would preclude the use of our own __device__ overloads for these functions. #pragma push_macro("__DEVICE__") #define __DEVICE__ \ static __inline__ __attribute__((always_inline)) __attribute__((device)) __DEVICE__ long abs(long); __DEVICE__ long long abs(long long); __DEVICE__ double abs(double); __DEVICE__ float abs(float); __DEVICE__ int abs(int); __DEVICE__ double acos(double); __DEVICE__ float acos(float); __DEVICE__ double acosh(double); __DEVICE__ float acosh(float); __DEVICE__ double asin(double); __DEVICE__ float asin(float); __DEVICE__ double asinh(double); __DEVICE__ float asinh(float); __DEVICE__ double atan2(double, double); __DEVICE__ float atan2(float, float); __DEVICE__ double atan(double); __DEVICE__ float atan(float); __DEVICE__ double atanh(double); __DEVICE__ float atanh(float); __DEVICE__ double cbrt(double); __DEVICE__ float cbrt(float); __DEVICE__ double ceil(double); __DEVICE__ float ceil(float); __DEVICE__ double copysign(double, double); __DEVICE__ float copysign(float, float); __DEVICE__ double cos(double); __DEVICE__ float cos(float); __DEVICE__ double cosh(double); __DEVICE__ float cosh(float); __DEVICE__ double erfc(double); __DEVICE__ float erfc(float); __DEVICE__ double erf(double); __DEVICE__ float erf(float); __DEVICE__ double exp2(double); __DEVICE__ float exp2(float); __DEVICE__ double exp(double); __DEVICE__ float exp(float); __DEVICE__ double expm1(double); __DEVICE__ float expm1(float); __DEVICE__ double fabs(double); __DEVICE__ float fabs(float); __DEVICE__ double fdim(double, double); __DEVICE__ float fdim(float, float); __DEVICE__ double floor(double); __DEVICE__ float floor(float); __DEVICE__ double fma(double, double, double); __DEVICE__ float fma(float, float, float); __DEVICE__ double fmax(double, double); __DEVICE__ float fmax(float, float); __DEVICE__ double fmin(double, double); __DEVICE__ float fmin(float, float); __DEVICE__ double fmod(double, double); __DEVICE__ float fmod(float, float); __DEVICE__ int fpclassify(double); __DEVICE__ int fpclassify(float); __DEVICE__ double frexp(double, int *); __DEVICE__ float frexp(float, int *); __DEVICE__ double hypot(double, double); __DEVICE__ float hypot(float, float); __DEVICE__ int ilogb(double); __DEVICE__ int ilogb(float); #ifdef _MSC_VER __DEVICE__ bool isfinite(long double); #endif __DEVICE__ bool isfinite(double); __DEVICE__ bool isfinite(float); __DEVICE__ bool isgreater(double, double); __DEVICE__ bool isgreaterequal(double, double); __DEVICE__ bool isgreaterequal(float, float); __DEVICE__ bool isgreater(float, float); #ifdef _MSC_VER __DEVICE__ bool isinf(long double); #endif __DEVICE__ bool isinf(double); __DEVICE__ bool isinf(float); __DEVICE__ bool isless(double, double); __DEVICE__ bool islessequal(double, double); __DEVICE__ bool islessequal(float, float); __DEVICE__ bool isless(float, float); __DEVICE__ bool islessgreater(double, double); __DEVICE__ bool islessgreater(float, float); #ifdef _MSC_VER __DEVICE__ bool isnan(long double); #endif __DEVICE__ bool isnan(double); __DEVICE__ bool isnan(float); __DEVICE__ bool isnormal(double); __DEVICE__ bool isnormal(float); __DEVICE__ bool isunordered(double, double); __DEVICE__ bool isunordered(float, float); __DEVICE__ long labs(long); __DEVICE__ double ldexp(double, int); __DEVICE__ float ldexp(float, int); __DEVICE__ double lgamma(double); __DEVICE__ float lgamma(float); __DEVICE__ long long llabs(long long); __DEVICE__ long long llrint(double); __DEVICE__ long long llrint(float); __DEVICE__ double log10(double); __DEVICE__ float log10(float); __DEVICE__ double log1p(double); __DEVICE__ float log1p(float); __DEVICE__ double log2(double); __DEVICE__ float log2(float); __DEVICE__ double logb(double); __DEVICE__ float logb(float); __DEVICE__ double log(double); __DEVICE__ float log(float); __DEVICE__ long lrint(double); __DEVICE__ long lrint(float); __DEVICE__ long lround(double); __DEVICE__ long lround(float); __DEVICE__ long long llround(float); // No llround(double). __DEVICE__ double modf(double, double *); __DEVICE__ float modf(float, float *); __DEVICE__ double nan(const char *); __DEVICE__ float nanf(const char *); __DEVICE__ double nearbyint(double); __DEVICE__ float nearbyint(float); __DEVICE__ double nextafter(double, double); __DEVICE__ float nextafter(float, float); __DEVICE__ double pow(double, double); __DEVICE__ double pow(double, int); __DEVICE__ float pow(float, float); __DEVICE__ float pow(float, int); __DEVICE__ double remainder(double, double); __DEVICE__ float remainder(float, float); __DEVICE__ double remquo(double, double, int *); __DEVICE__ float remquo(float, float, int *); __DEVICE__ double rint(double); __DEVICE__ float rint(float); __DEVICE__ double round(double); __DEVICE__ float round(float); __DEVICE__ double scalbln(double, long); __DEVICE__ float scalbln(float, long); __DEVICE__ double scalbn(double, int); __DEVICE__ float scalbn(float, int); #ifdef _MSC_VER __DEVICE__ bool signbit(long double); #endif __DEVICE__ bool signbit(double); __DEVICE__ bool signbit(float); __DEVICE__ double sin(double); __DEVICE__ float sin(float); __DEVICE__ double sinh(double); __DEVICE__ float sinh(float); __DEVICE__ double sqrt(double); __DEVICE__ float sqrt(float); __DEVICE__ double tan(double); __DEVICE__ float tan(float); __DEVICE__ double tanh(double); __DEVICE__ float tanh(float); __DEVICE__ double tgamma(double); __DEVICE__ float tgamma(float); __DEVICE__ double trunc(double); __DEVICE__ float trunc(float); // Notably missing above is nexttoward, which we don't define on // the device side because libdevice doesn't give us an implementation, and we // don't want to be in the business of writing one ourselves. // We need to define these overloads in exactly the namespace our standard // library uses (including the right inline namespace), otherwise they won't be // picked up by other functions in the standard library (e.g. functions in // ). Thus the ugliness below. #ifdef _LIBCPP_BEGIN_NAMESPACE_STD _LIBCPP_BEGIN_NAMESPACE_STD #else namespace std { #ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION _GLIBCXX_BEGIN_NAMESPACE_VERSION #endif #endif using ::abs; using ::acos; using ::acosh; using ::asin; using ::asinh; using ::atan; using ::atan2; using ::atanh; using ::cbrt; using ::ceil; using ::copysign; using ::cos; using ::cosh; using ::erf; using ::erfc; using ::exp; using ::exp2; using ::expm1; using ::fabs; using ::fdim; using ::floor; using ::fma; using ::fmax; using ::fmin; using ::fmod; using ::fpclassify; using ::frexp; using ::hypot; using ::ilogb; using ::isfinite; using ::isgreater; using ::isgreaterequal; using ::isinf; using ::isless; using ::islessequal; using ::islessgreater; using ::isnan; using ::isnormal; using ::isunordered; using ::labs; using ::ldexp; using ::lgamma; using ::llabs; using ::llrint; using ::log; using ::log10; using ::log1p; using ::log2; using ::logb; using ::lrint; using ::lround; using ::llround; using ::modf; using ::nan; using ::nanf; using ::nearbyint; using ::nextafter; using ::pow; using ::remainder; using ::remquo; using ::rint; using ::round; using ::scalbln; using ::scalbn; using ::signbit; using ::sin; using ::sinh; using ::sqrt; using ::tan; using ::tanh; using ::tgamma; using ::trunc; #ifdef _LIBCPP_END_NAMESPACE_STD _LIBCPP_END_NAMESPACE_STD #else #ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION _GLIBCXX_END_NAMESPACE_VERSION #endif } // namespace std #endif #pragma pop_macro("__DEVICE__") #endif avx512vlbitalgintrin.h/*===----- avx512vpopcntdqintrin.h - AVX512VPOPCNTDQ intrinsics-------------=== * * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error \ "Never use directly; include instead." #endif #ifndef __AVX512VPOPCNTDQINTRIN_H #define __AVX512VPOPCNTDQINTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512vpopcntdq,evex512"), \ __min_vector_width__(512))) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_popcnt_epi64(__m512i __A) { return (__m512i)__builtin_ia32_vpopcntq_512((__v8di)__A); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_popcnt_epi64(__m512i __W, __mmask8 __U, __m512i __A) { return (__m512i)__builtin_ia32_selectq_512( (__mmask8)__U, (__v8di)_mm512_popcnt_epi64(__A), (__v8di)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_popcnt_epi64(__mmask8 __U, __m512i __A) { return _mm512_mask_popcnt_epi64((__m512i)_mm512_setzero_si512(), __U, __A); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_popcnt_epi32(__m512i __A) { return (__m512i)__builtin_ia32_vpopcntd_512((__v16si)__A); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_popcnt_epi32(__m512i __W, __mmask16 __U, __m512i __A) { return (__m512i)__builtin_ia32_selectd_512( (__mmask16)__U, (__v16si)_mm512_popcnt_epi32(__A), (__v16si)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_popcnt_epi32(__mmask16 __U, __m512i __A) { return _mm512_mask_popcnt_epi32((__m512i)_mm512_setzero_si512(), __U, __A); } #undef __DEFAULT_FN_ATTRS #endif cldemoteintrin.h/*===------------------------- movdirintrin.h ------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #if !defined __X86INTRIN_H && !defined __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef _MOVDIRINTRIN_H #define _MOVDIRINTRIN_H /* Move doubleword as direct store */ static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movdiri"))) _directstoreu_u32 (void *__dst, unsigned int __value) { __builtin_ia32_directstore_u32((unsigned int *)__dst, (unsigned int)__value); } #ifdef __x86_64__ /* Move quadword as direct store */ static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movdiri"))) _directstoreu_u64 (void *__dst, unsigned long __value) { __builtin_ia32_directstore_u64((unsigned long *)__dst, __value); } #endif /* __x86_64__ */ /* * movdir64b - Move 64 bytes as direct store. * The destination must be 64 byte aligned, and the store is atomic. * The source address has no alignment requirement, and the load from * the source address is not atomic. */ static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movdir64b"))) _movdir64b (void *__dst __attribute__((align_value(64))), const void *__src) { __builtin_ia32_movdir64b(__dst, __src); } #endif /* _MOVDIRINTRIN_H */ /*===---- nmmintrin.h - SSE4 intrinsics ------------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __NMMINTRIN_H #define __NMMINTRIN_H #if !defined(__i386__) && !defined(__x86_64__) #error "This header is only meant to be used on x86 and x64 architecture" #endif /* To match expectations of gcc we put the sse4.2 definitions into smmintrin.h, just include it now then. */ #include #endif /* __NMMINTRIN_H */ /*===----------------------- raointintrin.h - RAOINT ------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __X86GPRINTRIN_H #error "Never use directly; include instead." #endif // __X86GPRINTRIN_H #ifndef __RAOINTINTRIN_H #define __RAOINTINTRIN_H #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("raoint"))) /// Atomically add a 32-bit value at memory operand \a __A and a 32-bit \a __B, /// and store the result to the same memory location. /// /// This intrinsic should be used for contention or weak ordering. It may /// result in bad performance for hot data used by single thread only. /// /// \headerfile /// /// This intrinsic corresponds to the \c AADD instruction. /// /// \param __A /// A pointer to a 32-bit memory location. /// \param __B /// A 32-bit integer value. /// /// \code{.operation} /// MEM[__A+31:__A] := MEM[__A+31:__A] + __B[31:0] /// \endcode static __inline__ void __DEFAULT_FN_ATTRS _aadd_i32(int *__A, int __B) { __builtin_ia32_aadd32((int *)__A, __B); } /// Atomically and a 32-bit value at memory operand \a __A and a 32-bit \a __B, /// and store the result to the same memory location. /// /// This intrinsic should be used for contention or weak ordering. It may /// result in bad performance for hot data used by single thread only. /// /// \headerfile /// /// This intrinsic corresponds to the \c AAND instruction. /// /// \param __A /// A pointer to a 32-bit memory location. /// \param __B /// A 32-bit integer value. /// /// \code{.operation} /// MEM[__A+31:__A] := MEM[__A+31:__A] AND __B[31:0] /// \endcode static __inline__ void __DEFAULT_FN_ATTRS _aand_i32(int *__A, int __B) { __builtin_ia32_aand32((int *)__A, __B); } /// Atomically or a 32-bit value at memory operand \a __A and a 32-bit \a __B, /// and store the result to the same memory location. /// /// This intrinsic should be used for contention or weak ordering. It may /// result in bad performance for hot data used by single thread only. /// /// \headerfile /// /// This intrinsic corresponds to the \c AOR instruction. /// /// \param __A /// A pointer to a 32-bit memory location. /// \param __B /// A 32-bit integer value. /// /// \code{.operation} /// MEM[__A+31:__A] := MEM[__A+31:__A] OR __B[31:0] /// \endcode static __inline__ void __DEFAULT_FN_ATTRS _aor_i32(int *__A, int __B) { __builtin_ia32_aor32((int *)__A, __B); } /// Atomically xor a 32-bit value at memory operand \a __A and a 32-bit \a __B, /// and store the result to the same memory location. /// /// This intrinsic should be used for contention or weak ordering. It may /// result in bad performance for hot data used by single thread only. /// /// \headerfile /// /// This intrinsic corresponds to the \c AXOR instruction. /// /// \param __A /// A pointer to a 32-bit memory location. /// \param __B /// A 32-bit integer value. /// /// \code{.operation} /// MEM[__A+31:__A] := MEM[__A+31:__A] XOR __B[31:0] /// \endcode static __inline__ void __DEFAULT_FN_ATTRS _axor_i32(int *__A, int __B) { __builtin_ia32_axor32((int *)__A, __B); } #ifdef __x86_64__ /// Atomically add a 64-bit value at memory operand \a __A and a 64-bit \a __B, /// and store the result to the same memory location. /// /// This intrinsic should be used for contention or weak ordering. It may /// result in bad performance for hot data used by single thread only. /// /// \headerfile /// /// This intrinsic corresponds to the \c AADD instruction. /// /// \param __A /// A pointer to a 64-bit memory location. /// \param __B /// A 64-bit integer value. /// /// \code{.operation} /// MEM[__A+63:__A] := MEM[__A+63:__A] + __B[63:0] /// \endcode static __inline__ void __DEFAULT_FN_ATTRS _aadd_i64(long long *__A, long long __B) { __builtin_ia32_aadd64((long long *)__A, __B); } /// Atomically and a 64-bit value at memory operand \a __A and a 64-bit \a __B, /// and store the result to the same memory location. /// /// This intrinsic should be used for contention or weak ordering. It may /// result in bad performance for hot data used by single thread only. /// /// \headerfile /// /// This intrinsic corresponds to the \c AAND instruction. /// /// \param __A /// A pointer to a 64-bit memory location. /// \param __B /// A 64-bit integer value. /// /// \code{.operation} /// MEM[__A+63:__A] := MEM[__A+63:__A] AND __B[63:0] /// \endcode static __inline__ void __DEFAULT_FN_ATTRS _aand_i64(long long *__A, long long __B) { __builtin_ia32_aand64((long long *)__A, __B); } /// Atomically or a 64-bit value at memory operand \a __A and a 64-bit \a __B, /// and store the result to the same memory location. /// /// This intrinsic should be used for contention or weak ordering. It may /// result in bad performance for hot data used by single thread only. /// /// \headerfile /// /// This intrinsic corresponds to the \c AOR instruction. /// /// \param __A /// A pointer to a 64-bit memory location. /// \param __B /// A 64-bit integer value. /// /// \code{.operation} /// MEM[__A+63:__A] := MEM[__A+63:__A] OR __B[63:0] /// \endcode static __inline__ void __DEFAULT_FN_ATTRS _aor_i64(long long *__A, long long __B) { __builtin_ia32_aor64((long long *)__A, __B); } /// Atomically xor a 64-bit value at memory operand \a __A and a 64-bit \a __B, /// and store the result to the same memory location. /// /// This intrinsic should be used for contention or weak ordering. It may /// result in bad performance for hot data used by single thread only. /// /// \headerfile /// /// This intrinsic corresponds to the \c AXOR instruction. /// /// \param __A /// A pointer to a 64-bit memory location. /// \param __B /// A 64-bit integer value. /// /// \code{.operation} /// MEM[__A+63:__A] := MEM[__A+63:__A] XOR __B[63:0] /// \endcode static __inline__ void __DEFAULT_FN_ATTRS _axor_i64(long long *__A, long long __B) { __builtin_ia32_axor64((long long *)__A, __B); } #endif // __x86_64__ #undef __DEFAULT_FN_ATTRS #endif // __RAOINTINTRIN_H /*===---- riscv_ntlh.h - RISC-V NTLH intrinsics ----------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __RISCV_NTLH_H #define __RISCV_NTLH_H #ifndef __riscv_zihintntl #error "NTLH intrinsics require the NTLH extension." #endif enum { __RISCV_NTLH_INNERMOST_PRIVATE = 2, __RISCV_NTLH_ALL_PRIVATE, __RISCV_NTLH_INNERMOST_SHARED, __RISCV_NTLH_ALL }; #define __riscv_ntl_load __builtin_riscv_ntl_load #define __riscv_ntl_store __builtin_riscv_ntl_store #endif sha512intrin.h/*===-------------------- sm3intrin.h - SM3 intrinsics ---------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif // __IMMINTRIN_H #ifndef __SM3INTRIN_H #define __SM3INTRIN_H #define __DEFAULT_FN_ATTRS128 \ __attribute__((__always_inline__, __nodebug__, __target__("sm3"), \ __min_vector_width__(128))) /// This intrinisc is one of the two SM3 message scheduling intrinsics. The /// intrinsic performs an initial calculation for the next four SM3 message /// words. The calculated results are stored in \a dst. /// /// \headerfile /// /// \code /// __m128i _mm_sm3msg1_epi32(__m128i __A, __m128i __B, __m128i __C) /// \endcode /// /// This intrinsic corresponds to the \c VSM3MSG1 instruction. /// /// \param __A /// A 128-bit vector of [4 x int]. /// \param __B /// A 128-bit vector of [4 x int]. /// \param __C /// A 128-bit vector of [4 x int]. /// \returns /// A 128-bit vector of [4 x int]. /// /// \code{.operation} /// DEFINE ROL32(dword, n) { /// count := n % 32 /// dest := (dword << count) | (dword >> (32 - count)) /// RETURN dest /// } /// DEFINE P1(x) { /// RETURN x ^ ROL32(x, 15) ^ ROL32(x, 23) /// } /// W[0] := __C.dword[0] /// W[1] := __C.dword[1] /// W[2] := __C.dword[2] /// W[3] := __C.dword[3] /// W[7] := __A.dword[0] /// W[8] := __A.dword[1] /// W[9] := __A.dword[2] /// W[10] := __A.dword[3] /// W[13] := __B.dword[0] /// W[14] := __B.dword[1] /// W[15] := __B.dword[2] /// TMP0 := W[7] ^ W[0] ^ ROL32(W[13], 15) /// TMP1 := W[8] ^ W[1] ^ ROL32(W[14], 15) /// TMP2 := W[9] ^ W[2] ^ ROL32(W[15], 15) /// TMP3 := W[10] ^ W[3] /// dst.dword[0] := P1(TMP0) /// dst.dword[1] := P1(TMP1) /// dst.dword[2] := P1(TMP2) /// dst.dword[3] := P1(TMP3) /// dst[MAX:128] := 0 /// \endcode static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_sm3msg1_epi32(__m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_vsm3msg1((__v4su)__A, (__v4su)__B, (__v4su)__C); } /// This intrinisc is one of the two SM3 message scheduling intrinsics. The /// intrinsic performs the final calculation for the next four SM3 message /// words. The calculated results are stored in \a dst. /// /// \headerfile /// /// \code /// __m128i _mm_sm3msg2_epi32(__m128i __A, __m128i __B, __m128i __C) /// \endcode /// /// This intrinsic corresponds to the \c VSM3MSG2 instruction. /// /// \param __A /// A 128-bit vector of [4 x int]. /// \param __B /// A 128-bit vector of [4 x int]. /// \param __C /// A 128-bit vector of [4 x int]. /// \returns /// A 128-bit vector of [4 x int]. /// /// \code{.operation} /// DEFINE ROL32(dword, n) { /// count := n % 32 /// dest := (dword << count) | (dword >> (32-count)) /// RETURN dest /// } /// WTMP[0] := __A.dword[0] /// WTMP[1] := __A.dword[1] /// WTMP[2] := __A.dword[2] /// WTMP[3] := __A.dword[3] /// W[3] := __B.dword[0] /// W[4] := __B.dword[1] /// W[5] := __B.dword[2] /// W[6] := __B.dword[3] /// W[10] := __C.dword[0] /// W[11] := __C.dword[1] /// W[12] := __C.dword[2] /// W[13] := __C.dword[3] /// W[16] := ROL32(W[3], 7) ^ W[10] ^ WTMP[0] /// W[17] := ROL32(W[4], 7) ^ W[11] ^ WTMP[1] /// W[18] := ROL32(W[5], 7) ^ W[12] ^ WTMP[2] /// W[19] := ROL32(W[6], 7) ^ W[13] ^ WTMP[3] /// W[19] := W[19] ^ ROL32(W[16], 6) ^ ROL32(W[16], 15) ^ ROL32(W[16], 30) /// dst.dword[0] := W[16] /// dst.dword[1] := W[17] /// dst.dword[2] := W[18] /// dst.dword[3] := W[19] /// dst[MAX:128] := 0 /// \endcode static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_sm3msg2_epi32(__m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_vsm3msg2((__v4su)__A, (__v4su)__B, (__v4su)__C); } /// This intrinsic performs two rounds of SM3 operation using initial SM3 state /// (C, D, G, H) from \a __A, an initial SM3 states (A, B, E, F) /// from \a __B and a pre-computed words from the \a __C. \a __A with /// initial SM3 state of (C, D, G, H) assumes input of non-rotated left /// variables from previous state. The updated SM3 state (A, B, E, F) is /// written to \a __A. The \a imm8 should contain the even round number /// for the first of the two rounds computed by this instruction. The /// computation masks the \a imm8 value by AND’ing it with 0x3E so that only /// even round numbers from 0 through 62 are used for this operation. The /// calculated results are stored in \a dst. /// /// \headerfile /// /// \code /// __m128i _mm_sm3rnds2_epi32(__m128i __A, __m128i __B, __m128i __C, const int /// imm8) \endcode /// /// This intrinsic corresponds to the \c VSM3RNDS2 instruction. /// /// \param __A /// A 128-bit vector of [4 x int]. /// \param __B /// A 128-bit vector of [4 x int]. /// \param __C /// A 128-bit vector of [4 x int]. /// \param imm8 /// A 8-bit constant integer. /// \returns /// A 128-bit vector of [4 x int]. /// /// \code{.operation} /// DEFINE ROL32(dword, n) { /// count := n % 32 /// dest := (dword << count) | (dword >> (32-count)) /// RETURN dest /// } /// DEFINE P0(dword) { /// RETURN dword ^ ROL32(dword, 9) ^ ROL32(dword, 17) /// } /// DEFINE FF(x,y,z, round){ /// IF round < 16 /// RETURN (x ^ y ^ z) /// ELSE /// RETURN (x & y) | (x & z) | (y & z) /// FI /// } /// DEFINE GG(x, y, z, round){ /// IF round < 16 /// RETURN (x ^ y ^ z) /// ELSE /// RETURN (x & y) | (~x & z) /// FI /// } /// A[0] := __B.dword[3] /// B[0] := __B.dword[2] /// C[0] := __A.dword[3] /// D[0] := __A.dword[2] /// E[0] := __B.dword[1] /// F[0] := __B.dword[0] /// G[0] := __A.dword[1] /// H[0] := __A.dword[0] /// W[0] := __C.dword[0] /// W[1] := __C.dword[1] /// W[4] := __C.dword[2] /// W[5] := __C.dword[3] /// C[0] := ROL32(C[0], 9) /// D[0] := ROL32(D[0], 9) /// G[0] := ROL32(G[0], 19) /// H[0] := ROL32(H[0], 19) /// ROUND := __D & 0x3E /// IF ROUND < 16 /// CONST := 0x79CC4519 /// ELSE /// CONST := 0x7A879D8A /// FI /// CONST := ROL32(CONST,ROUND) /// FOR i:= 0 to 1 /// S1 := ROL32((ROL32(A[i], 12) + E[i] + CONST), 7) /// S2 := S1 ^ ROL32(A[i], 12) /// T1 := FF(A[i], B[i], C[i], ROUND) + D[i] + S2 + (W[i] ^ W[i+4]) /// T2 := GG(E[i], F[i], G[i], ROUND) + H[i] + S1 + W[i] /// D[i+1] := C[i] /// C[i+1] := ROL32(B[i],9) /// B[i+1] := A[i] /// A[i+1] := T1 /// H[i+1] := G[i] /// G[i+1] := ROL32(F[i], 19) /// F[i+1] := E[i] /// E[i+1] := P0(T2) /// CONST := ROL32(CONST, 1) /// ENDFOR /// dst.dword[3] := A[2] /// dst.dword[2] := B[2] /// dst.dword[1] := E[2] /// dst.dword[0] := F[2] /// dst[MAX:128] := 0 /// \endcode #define _mm_sm3rnds2_epi32(A, B, C, D) \ (__m128i) __builtin_ia32_vsm3rnds2((__v4su)A, (__v4su)B, (__v4su)C, (int)D) #undef __DEFAULT_FN_ATTRS128 #endif // __SM3INTRIN_H llvm_libc_wrappers/time.h(-1)Missing a name for object member.protoInvalid path policy provided: SearchDFA inconsistency\W(%d,?)external/regex-re2/re2/simplify.ccCCoRejangCorrupt /proc/self/maps: %snewaNszALREADY_EXISTSBad StatusOr access: Invalid length invalid surrogate character (0xD800-DFFF): \out != nullptrexternal/abseil-cpp/absl/strings/str_split.cctree != nullptr"%dEnqueue to empty list failedpthread_getschedparam failed: %dexternal/boringssl/src/crypto/fipsmodule/dh/check.cDSASSL routinesX509V3_LIBRAND_LIBDIGEST_LIBexternal/boringssl/src/crypto/evp/p_dh.ckythe.proto.common.MarkedSource.post_child_text[^-].*\.(f|for|ftn|F|FOR|fpp|FPP|FTN|f90|f95|f03|f08|F90|F95|F03|F08)--with_executableKYTHE_EXCLUDE_EMPTY_DIRSKYTHE_BUILD_CONFIG Expected to see only one TU; instead saw (location /*===---- __stddef_max_align_t.h - Definition of max_align_t ---------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __CLANG_MAX_ALIGN_T_DEFINED #define __CLANG_MAX_ALIGN_T_DEFINED #if defined(_MSC_VER) typedef double max_align_t; #elif defined(__APPLE__) typedef long double max_align_t; #else // Define 'max_align_t' to match the GCC definition. typedef struct { long long __clang_max_align_nonce1 __attribute__((__aligned__(__alignof__(long long)))); long double __clang_max_align_nonce2 __attribute__((__aligned__(__alignof__(long double)))); } max_align_t; #endif #endif /*===---- arm_sme.h - ARM SME intrinsics ------=== * * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __ARM_SME_H #define __ARM_SME_H #if !defined(__LITTLE_ENDIAN__) #error "Big endian is currently not supported for arm_sme.h" #endif #include #include /* Function attributes */ #define __ai static __inline__ __attribute__((__always_inline__, __nodebug__)) #define __aio static __inline__ __attribute__((__always_inline__, __nodebug__, __overloadable__)) #ifdef __cplusplus extern "C" { #endif void __arm_za_disable(void) __arm_streaming_compatible; __ai bool __arm_has_sme(void) __arm_streaming_compatible { uint64_t x0, x1; __builtin_arm_get_sme_state(&x0, &x1); return x0 & (1ULL << 63); } __ai bool __arm_in_streaming_mode(void) __arm_streaming_compatible { uint64_t x0, x1; __builtin_arm_get_sme_state(&x0, &x1); return x0 & 1; } void *__arm_sc_memcpy(void *dest, const void *src, size_t n) __arm_streaming_compatible; void *__arm_sc_memmove(void *dest, const void *src, size_t n) __arm_streaming_compatible; void *__arm_sc_memset(void *s, int c, size_t n) __arm_streaming_compatible; void *__arm_sc_memchr(void *s, int c, size_t n) __arm_streaming_compatible; __ai __attribute__((target("sme"))) void svundef_za(void) __arm_streaming_compatible __arm_out("za") { } __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za32_u32_m))) void svaddha_za32_u32_m(uint64_t, svbool_t, svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za32_s32_m))) void svaddha_za32_s32_m(uint64_t, svbool_t, svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za32_u32_m))) void svaddva_za32_u32_m(uint64_t, svbool_t, svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za32_s32_m))) void svaddva_za32_s32_m(uint64_t, svbool_t, svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svcntsb))) uint64_t svcntsb(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svcntsd))) uint64_t svcntsd(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svcntsh))) uint64_t svcntsh(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svcntsw))) uint64_t svcntsw(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_vnum_za128))) void svld1_hor_vnum_za128(uint64_t, uint32_t, svbool_t, void const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_vnum_za16))) void svld1_hor_vnum_za16(uint64_t, uint32_t, svbool_t, void const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_vnum_za32))) void svld1_hor_vnum_za32(uint64_t, uint32_t, svbool_t, void const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_vnum_za64))) void svld1_hor_vnum_za64(uint64_t, uint32_t, svbool_t, void const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_vnum_za8))) void svld1_hor_vnum_za8(uint64_t, uint32_t, svbool_t, void const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_za128))) void svld1_hor_za128(uint64_t, uint32_t, svbool_t, void const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_za16))) void svld1_hor_za16(uint64_t, uint32_t, svbool_t, void const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_za32))) void svld1_hor_za32(uint64_t, uint32_t, svbool_t, void const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_za64))) void svld1_hor_za64(uint64_t, uint32_t, svbool_t, void const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_za8))) void svld1_hor_za8(uint64_t, uint32_t, svbool_t, void const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_vnum_za128))) void svld1_ver_vnum_za128(uint64_t, uint32_t, svbool_t, void const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_vnum_za16))) void svld1_ver_vnum_za16(uint64_t, uint32_t, svbool_t, void const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_vnum_za32))) void svld1_ver_vnum_za32(uint64_t, uint32_t, svbool_t, void const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_vnum_za64))) void svld1_ver_vnum_za64(uint64_t, uint32_t, svbool_t, void const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_vnum_za8))) void svld1_ver_vnum_za8(uint64_t, uint32_t, svbool_t, void const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_za128))) void svld1_ver_za128(uint64_t, uint32_t, svbool_t, void const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_za16))) void svld1_ver_za16(uint64_t, uint32_t, svbool_t, void const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_za32))) void svld1_ver_za32(uint64_t, uint32_t, svbool_t, void const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_za64))) void svld1_ver_za64(uint64_t, uint32_t, svbool_t, void const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_za8))) void svld1_ver_za8(uint64_t, uint32_t, svbool_t, void const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svldr_vnum_za))) void svldr_vnum_za(uint32_t, void const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svldr_za))) void svldr_za(uint32_t, void const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_f16_m))) void svmopa_za32_f16_m(uint64_t, svbool_t, svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_bf16_m))) void svmopa_za32_bf16_m(uint64_t, svbool_t, svbool_t, svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_f32_m))) void svmopa_za32_f32_m(uint64_t, svbool_t, svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_s8_m))) void svmopa_za32_s8_m(uint64_t, svbool_t, svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_u8_m))) void svmopa_za32_u8_m(uint64_t, svbool_t, svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_f16_m))) void svmops_za32_f16_m(uint64_t, svbool_t, svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_bf16_m))) void svmops_za32_bf16_m(uint64_t, svbool_t, svbool_t, svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_f32_m))) void svmops_za32_f32_m(uint64_t, svbool_t, svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_s8_m))) void svmops_za32_s8_m(uint64_t, svbool_t, svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_u8_m))) void svmops_za32_u8_m(uint64_t, svbool_t, svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u8_m))) svuint8_t svread_hor_za128_u8_m(svuint8_t, svbool_t, uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u32_m))) svuint32_t svread_hor_za128_u32_m(svuint32_t, svbool_t, uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u64_m))) svuint64_t svread_hor_za128_u64_m(svuint64_t, svbool_t, uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u16_m))) svuint16_t svread_hor_za128_u16_m(svuint16_t, svbool_t, uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_bf16_m))) svbfloat16_t svread_hor_za128_bf16_m(svbfloat16_t, svbool_t, uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s8_m))) svint8_t svread_hor_za128_s8_m(svint8_t, svbool_t, uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_f64_m))) svfloat64_t svread_hor_za128_f64_m(svfloat64_t, svbool_t, uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_f32_m))) svfloat32_t svread_hor_za128_f32_m(svfloat32_t, svbool_t, uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_f16_m))) svfloat16_t svread_hor_za128_f16_m(svfloat16_t, svbool_t, uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s32_m))) svint32_t svread_hor_za128_s32_m(svint32_t, svbool_t, uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s64_m))) svint64_t svread_hor_za128_s64_m(svint64_t, svbool_t, uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s16_m))) svint16_t svread_hor_za128_s16_m(svint16_t, svbool_t, uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_u16_m))) svuint16_t svread_hor_za16_u16_m(svuint16_t, svbool_t, uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_bf16_m))) svbfloat16_t svread_hor_za16_bf16_m(svbfloat16_t, svbool_t, uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_f16_m))) svfloat16_t svread_hor_za16_f16_m(svfloat16_t, svbool_t, uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_s16_m))) svint16_t svread_hor_za16_s16_m(svint16_t, svbool_t, uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_u32_m))) svuint32_t svread_hor_za32_u32_m(svuint32_t, svbool_t, uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_f32_m))) svfloat32_t svread_hor_za32_f32_m(svfloat32_t, svbool_t, uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_s32_m))) svint32_t svread_hor_za32_s32_m(svint32_t, svbool_t, uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_u64_m))) svuint64_t svread_hor_za64_u64_m(svuint64_t, svbool_t, uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_f64_m))) svfloat64_t svread_hor_za64_f64_m(svfloat64_t, svbool_t, uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_s64_m))) svint64_t svread_hor_za64_s64_m(svint64_t, svbool_t, uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za8_u8_m))) svuint8_t svread_hor_za8_u8_m(svuint8_t, svbool_t, uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za8_s8_m))) svint8_t svread_hor_za8_s8_m(svint8_t, svbool_t, uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u8_m))) svuint8_t svread_ver_za128_u8_m(svuint8_t, svbool_t, uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u32_m))) svuint32_t svread_ver_za128_u32_m(svuint32_t, svbool_t, uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u64_m))) svuint64_t svread_ver_za128_u64_m(svuint64_t, svbool_t, uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u16_m))) svuint16_t svread_ver_za128_u16_m(svuint16_t, svbool_t, uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_bf16_m))) svbfloat16_t svread_ver_za128_bf16_m(svbfloat16_t, svbool_t, uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s8_m))) svint8_t svread_ver_za128_s8_m(svint8_t, svbool_t, uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_f64_m))) svfloat64_t svread_ver_za128_f64_m(svfloat64_t, svbool_t, uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_f32_m))) svfloat32_t svread_ver_za128_f32_m(svfloat32_t, svbool_t, uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_f16_m))) svfloat16_t svread_ver_za128_f16_m(svfloat16_t, svbool_t, uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s32_m))) svint32_t svread_ver_za128_s32_m(svint32_t, svbool_t, uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s64_m))) svint64_t svread_ver_za128_s64_m(svint64_t, svbool_t, uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s16_m))) svint16_t svread_ver_za128_s16_m(svint16_t, svbool_t, uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_u16_m))) svuint16_t svread_ver_za16_u16_m(svuint16_t, svbool_t, uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_bf16_m))) svbfloat16_t svread_ver_za16_bf16_m(svbfloat16_t, svbool_t, uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_f16_m))) svfloat16_t svread_ver_za16_f16_m(svfloat16_t, svbool_t, uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_s16_m))) svint16_t svread_ver_za16_s16_m(svint16_t, svbool_t, uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_u32_m))) svuint32_t svread_ver_za32_u32_m(svuint32_t, svbool_t, uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_f32_m))) svfloat32_t svread_ver_za32_f32_m(svfloat32_t, svbool_t, uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_s32_m))) svint32_t svread_ver_za32_s32_m(svint32_t, svbool_t, uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_u64_m))) svuint64_t svread_ver_za64_u64_m(svuint64_t, svbool_t, uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_f64_m))) svfloat64_t svread_ver_za64_f64_m(svfloat64_t, svbool_t, uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_s64_m))) svint64_t svread_ver_za64_s64_m(svint64_t, svbool_t, uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za8_u8_m))) svuint8_t svread_ver_za8_u8_m(svuint8_t, svbool_t, uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za8_s8_m))) svint8_t svread_ver_za8_s8_m(svint8_t, svbool_t, uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_vnum_za128))) void svst1_hor_vnum_za128(uint64_t, uint32_t, svbool_t, void *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_vnum_za16))) void svst1_hor_vnum_za16(uint64_t, uint32_t, svbool_t, void *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_vnum_za32))) void svst1_hor_vnum_za32(uint64_t, uint32_t, svbool_t, void *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_vnum_za64))) void svst1_hor_vnum_za64(uint64_t, uint32_t, svbool_t, void *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_vnum_za8))) void svst1_hor_vnum_za8(uint64_t, uint32_t, svbool_t, void *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_za128))) void svst1_hor_za128(uint64_t, uint32_t, svbool_t, void *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_za16))) void svst1_hor_za16(uint64_t, uint32_t, svbool_t, void *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_za32))) void svst1_hor_za32(uint64_t, uint32_t, svbool_t, void *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_za64))) void svst1_hor_za64(uint64_t, uint32_t, svbool_t, void *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_za8))) void svst1_hor_za8(uint64_t, uint32_t, svbool_t, void *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_vnum_za128))) void svst1_ver_vnum_za128(uint64_t, uint32_t, svbool_t, void *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_vnum_za16))) void svst1_ver_vnum_za16(uint64_t, uint32_t, svbool_t, void *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_vnum_za32))) void svst1_ver_vnum_za32(uint64_t, uint32_t, svbool_t, void *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_vnum_za64))) void svst1_ver_vnum_za64(uint64_t, uint32_t, svbool_t, void *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_vnum_za8))) void svst1_ver_vnum_za8(uint64_t, uint32_t, svbool_t, void *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_za128))) void svst1_ver_za128(uint64_t, uint32_t, svbool_t, void *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_za16))) void svst1_ver_za16(uint64_t, uint32_t, svbool_t, void *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_za32))) void svst1_ver_za32(uint64_t, uint32_t, svbool_t, void *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_za64))) void svst1_ver_za64(uint64_t, uint32_t, svbool_t, void *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_za8))) void svst1_ver_za8(uint64_t, uint32_t, svbool_t, void *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svstr_vnum_za))) void svstr_vnum_za(uint32_t, void *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svstr_za))) void svstr_za(uint32_t, void *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumopa_za32_s8_m))) void svsumopa_za32_s8_m(uint64_t, svbool_t, svbool_t, svint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumops_za32_s8_m))) void svsumops_za32_s8_m(uint64_t, svbool_t, svbool_t, svint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmopa_za32_u8_m))) void svusmopa_za32_u8_m(uint64_t, svbool_t, svbool_t, svuint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmops_za32_u8_m))) void svusmops_za32_u8_m(uint64_t, svbool_t, svbool_t, svuint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u8_m))) void svwrite_hor_za128_u8_m(uint64_t, uint32_t, svbool_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u32_m))) void svwrite_hor_za128_u32_m(uint64_t, uint32_t, svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u64_m))) void svwrite_hor_za128_u64_m(uint64_t, uint32_t, svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u16_m))) void svwrite_hor_za128_u16_m(uint64_t, uint32_t, svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_bf16_m))) void svwrite_hor_za128_bf16_m(uint64_t, uint32_t, svbool_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s8_m))) void svwrite_hor_za128_s8_m(uint64_t, uint32_t, svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_f64_m))) void svwrite_hor_za128_f64_m(uint64_t, uint32_t, svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_f32_m))) void svwrite_hor_za128_f32_m(uint64_t, uint32_t, svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_f16_m))) void svwrite_hor_za128_f16_m(uint64_t, uint32_t, svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s32_m))) void svwrite_hor_za128_s32_m(uint64_t, uint32_t, svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s64_m))) void svwrite_hor_za128_s64_m(uint64_t, uint32_t, svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s16_m))) void svwrite_hor_za128_s16_m(uint64_t, uint32_t, svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_u16_m))) void svwrite_hor_za16_u16_m(uint64_t, uint32_t, svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_bf16_m))) void svwrite_hor_za16_bf16_m(uint64_t, uint32_t, svbool_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_f16_m))) void svwrite_hor_za16_f16_m(uint64_t, uint32_t, svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_s16_m))) void svwrite_hor_za16_s16_m(uint64_t, uint32_t, svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_u32_m))) void svwrite_hor_za32_u32_m(uint64_t, uint32_t, svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_f32_m))) void svwrite_hor_za32_f32_m(uint64_t, uint32_t, svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_s32_m))) void svwrite_hor_za32_s32_m(uint64_t, uint32_t, svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_u64_m))) void svwrite_hor_za64_u64_m(uint64_t, uint32_t, svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_f64_m))) void svwrite_hor_za64_f64_m(uint64_t, uint32_t, svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_s64_m))) void svwrite_hor_za64_s64_m(uint64_t, uint32_t, svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_u8_m))) void svwrite_hor_za8_u8_m(uint64_t, uint32_t, svbool_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_s8_m))) void svwrite_hor_za8_s8_m(uint64_t, uint32_t, svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u8_m))) void svwrite_ver_za128_u8_m(uint64_t, uint32_t, svbool_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u32_m))) void svwrite_ver_za128_u32_m(uint64_t, uint32_t, svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u64_m))) void svwrite_ver_za128_u64_m(uint64_t, uint32_t, svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u16_m))) void svwrite_ver_za128_u16_m(uint64_t, uint32_t, svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_bf16_m))) void svwrite_ver_za128_bf16_m(uint64_t, uint32_t, svbool_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s8_m))) void svwrite_ver_za128_s8_m(uint64_t, uint32_t, svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_f64_m))) void svwrite_ver_za128_f64_m(uint64_t, uint32_t, svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_f32_m))) void svwrite_ver_za128_f32_m(uint64_t, uint32_t, svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_f16_m))) void svwrite_ver_za128_f16_m(uint64_t, uint32_t, svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s32_m))) void svwrite_ver_za128_s32_m(uint64_t, uint32_t, svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s64_m))) void svwrite_ver_za128_s64_m(uint64_t, uint32_t, svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s16_m))) void svwrite_ver_za128_s16_m(uint64_t, uint32_t, svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_u16_m))) void svwrite_ver_za16_u16_m(uint64_t, uint32_t, svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_bf16_m))) void svwrite_ver_za16_bf16_m(uint64_t, uint32_t, svbool_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_f16_m))) void svwrite_ver_za16_f16_m(uint64_t, uint32_t, svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_s16_m))) void svwrite_ver_za16_s16_m(uint64_t, uint32_t, svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_u32_m))) void svwrite_ver_za32_u32_m(uint64_t, uint32_t, svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_f32_m))) void svwrite_ver_za32_f32_m(uint64_t, uint32_t, svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_s32_m))) void svwrite_ver_za32_s32_m(uint64_t, uint32_t, svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_u64_m))) void svwrite_ver_za64_u64_m(uint64_t, uint32_t, svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_f64_m))) void svwrite_ver_za64_f64_m(uint64_t, uint32_t, svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_s64_m))) void svwrite_ver_za64_s64_m(uint64_t, uint32_t, svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_u8_m))) void svwrite_ver_za8_u8_m(uint64_t, uint32_t, svbool_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_s8_m))) void svwrite_ver_za8_s8_m(uint64_t, uint32_t, svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svzero_mask_za))) void svzero_mask_za(uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svzero_za))) void svzero_za(void); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za32_u32_m))) void svaddha_za32_m(uint64_t, svbool_t, svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za32_s32_m))) void svaddha_za32_m(uint64_t, svbool_t, svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za32_u32_m))) void svaddva_za32_m(uint64_t, svbool_t, svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za32_s32_m))) void svaddva_za32_m(uint64_t, svbool_t, svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_f16_m))) void svmopa_za32_m(uint64_t, svbool_t, svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_bf16_m))) void svmopa_za32_m(uint64_t, svbool_t, svbool_t, svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_f32_m))) void svmopa_za32_m(uint64_t, svbool_t, svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_s8_m))) void svmopa_za32_m(uint64_t, svbool_t, svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_u8_m))) void svmopa_za32_m(uint64_t, svbool_t, svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_f16_m))) void svmops_za32_m(uint64_t, svbool_t, svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_bf16_m))) void svmops_za32_m(uint64_t, svbool_t, svbool_t, svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_f32_m))) void svmops_za32_m(uint64_t, svbool_t, svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_s8_m))) void svmops_za32_m(uint64_t, svbool_t, svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_u8_m))) void svmops_za32_m(uint64_t, svbool_t, svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u8_m))) svuint8_t svread_hor_za128_m(svuint8_t, svbool_t, uint64_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u32_m))) svuint32_t svread_hor_za128_m(svuint32_t, svbool_t, uint64_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u64_m))) svuint64_t svread_hor_za128_m(svuint64_t, svbool_t, uint64_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u16_m))) svuint16_t svread_hor_za128_m(svuint16_t, svbool_t, uint64_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_bf16_m))) svbfloat16_t svread_hor_za128_m(svbfloat16_t, svbool_t, uint64_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s8_m))) svint8_t svread_hor_za128_m(svint8_t, svbool_t, uint64_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_f64_m))) svfloat64_t svread_hor_za128_m(svfloat64_t, svbool_t, uint64_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_f32_m))) svfloat32_t svread_hor_za128_m(svfloat32_t, svbool_t, uint64_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_f16_m))) svfloat16_t svread_hor_za128_m(svfloat16_t, svbool_t, uint64_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s32_m))) svint32_t svread_hor_za128_m(svint32_t, svbool_t, uint64_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s64_m))) svint64_t svread_hor_za128_m(svint64_t, svbool_t, uint64_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s16_m))) svint16_t svread_hor_za128_m(svint16_t, svbool_t, uint64_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_u16_m))) svuint16_t svread_hor_za16_m(svuint16_t, svbool_t, uint64_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_bf16_m))) svbfloat16_t svread_hor_za16_m(svbfloat16_t, svbool_t, uint64_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_f16_m))) svfloat16_t svread_hor_za16_m(svfloat16_t, svbool_t, uint64_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_s16_m))) svint16_t svread_hor_za16_m(svint16_t, svbool_t, uint64_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_u32_m))) svuint32_t svread_hor_za32_m(svuint32_t, svbool_t, uint64_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_f32_m))) svfloat32_t svread_hor_za32_m(svfloat32_t, svbool_t, uint64_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_s32_m))) svint32_t svread_hor_za32_m(svint32_t, svbool_t, uint64_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_u64_m))) svuint64_t svread_hor_za64_m(svuint64_t, svbool_t, uint64_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_f64_m))) svfloat64_t svread_hor_za64_m(svfloat64_t, svbool_t, uint64_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_s64_m))) svint64_t svread_hor_za64_m(svint64_t, svbool_t, uint64_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za8_u8_m))) svuint8_t svread_hor_za8_m(svuint8_t, svbool_t, uint64_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za8_s8_m))) svint8_t svread_hor_za8_m(svint8_t, svbool_t, uint64_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u8_m))) svuint8_t svread_ver_za128_m(svuint8_t, svbool_t, uint64_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u32_m))) svuint32_t svread_ver_za128_m(svuint32_t, svbool_t, uint64_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u64_m))) svuint64_t svread_ver_za128_m(svuint64_t, svbool_t, uint64_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u16_m))) svuint16_t svread_ver_za128_m(svuint16_t, svbool_t, uint64_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_bf16_m))) svbfloat16_t svread_ver_za128_m(svbfloat16_t, svbool_t, uint64_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s8_m))) svint8_t svread_ver_za128_m(svint8_t, svbool_t, uint64_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_f64_m))) svfloat64_t svread_ver_za128_m(svfloat64_t, svbool_t, uint64_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_f32_m))) svfloat32_t svread_ver_za128_m(svfloat32_t, svbool_t, uint64_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_f16_m))) svfloat16_t svread_ver_za128_m(svfloat16_t, svbool_t, uint64_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s32_m))) svint32_t svread_ver_za128_m(svint32_t, svbool_t, uint64_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s64_m))) svint64_t svread_ver_za128_m(svint64_t, svbool_t, uint64_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s16_m))) svint16_t svread_ver_za128_m(svint16_t, svbool_t, uint64_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_u16_m))) svuint16_t svread_ver_za16_m(svuint16_t, svbool_t, uint64_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_bf16_m))) svbfloat16_t svread_ver_za16_m(svbfloat16_t, svbool_t, uint64_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_f16_m))) svfloat16_t svread_ver_za16_m(svfloat16_t, svbool_t, uint64_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_s16_m))) svint16_t svread_ver_za16_m(svint16_t, svbool_t, uint64_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_u32_m))) svuint32_t svread_ver_za32_m(svuint32_t, svbool_t, uint64_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_f32_m))) svfloat32_t svread_ver_za32_m(svfloat32_t, svbool_t, uint64_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_s32_m))) svint32_t svread_ver_za32_m(svint32_t, svbool_t, uint64_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_u64_m))) svuint64_t svread_ver_za64_m(svuint64_t, svbool_t, uint64_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_f64_m))) svfloat64_t svread_ver_za64_m(svfloat64_t, svbool_t, uint64_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_s64_m))) svint64_t svread_ver_za64_m(svint64_t, svbool_t, uint64_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za8_u8_m))) svuint8_t svread_ver_za8_m(svuint8_t, svbool_t, uint64_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za8_s8_m))) svint8_t svread_ver_za8_m(svint8_t, svbool_t, uint64_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumopa_za32_s8_m))) void svsumopa_za32_m(uint64_t, svbool_t, svbool_t, svint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumops_za32_s8_m))) void svsumops_za32_m(uint64_t, svbool_t, svbool_t, svint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmopa_za32_u8_m))) void svusmopa_za32_m(uint64_t, svbool_t, svbool_t, svuint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmops_za32_u8_m))) void svusmops_za32_m(uint64_t, svbool_t, svbool_t, svuint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u8_m))) void svwrite_hor_za128_m(uint64_t, uint32_t, svbool_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u32_m))) void svwrite_hor_za128_m(uint64_t, uint32_t, svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u64_m))) void svwrite_hor_za128_m(uint64_t, uint32_t, svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u16_m))) void svwrite_hor_za128_m(uint64_t, uint32_t, svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_bf16_m))) void svwrite_hor_za128_m(uint64_t, uint32_t, svbool_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s8_m))) void svwrite_hor_za128_m(uint64_t, uint32_t, svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_f64_m))) void svwrite_hor_za128_m(uint64_t, uint32_t, svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_f32_m))) void svwrite_hor_za128_m(uint64_t, uint32_t, svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_f16_m))) void svwrite_hor_za128_m(uint64_t, uint32_t, svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s32_m))) void svwrite_hor_za128_m(uint64_t, uint32_t, svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s64_m))) void svwrite_hor_za128_m(uint64_t, uint32_t, svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s16_m))) void svwrite_hor_za128_m(uint64_t, uint32_t, svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_u16_m))) void svwrite_hor_za16_m(uint64_t, uint32_t, svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_bf16_m))) void svwrite_hor_za16_m(uint64_t, uint32_t, svbool_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_f16_m))) void svwrite_hor_za16_m(uint64_t, uint32_t, svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_s16_m))) void svwrite_hor_za16_m(uint64_t, uint32_t, svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_u32_m))) void svwrite_hor_za32_m(uint64_t, uint32_t, svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_f32_m))) void svwrite_hor_za32_m(uint64_t, uint32_t, svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_s32_m))) void svwrite_hor_za32_m(uint64_t, uint32_t, svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_u64_m))) void svwrite_hor_za64_m(uint64_t, uint32_t, svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_f64_m))) void svwrite_hor_za64_m(uint64_t, uint32_t, svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_s64_m))) void svwrite_hor_za64_m(uint64_t, uint32_t, svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_u8_m))) void svwrite_hor_za8_m(uint64_t, uint32_t, svbool_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_s8_m))) void svwrite_hor_za8_m(uint64_t, uint32_t, svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u8_m))) void svwrite_ver_za128_m(uint64_t, uint32_t, svbool_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u32_m))) void svwrite_ver_za128_m(uint64_t, uint32_t, svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u64_m))) void svwrite_ver_za128_m(uint64_t, uint32_t, svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u16_m))) void svwrite_ver_za128_m(uint64_t, uint32_t, svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_bf16_m))) void svwrite_ver_za128_m(uint64_t, uint32_t, svbool_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s8_m))) void svwrite_ver_za128_m(uint64_t, uint32_t, svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_f64_m))) void svwrite_ver_za128_m(uint64_t, uint32_t, svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_f32_m))) void svwrite_ver_za128_m(uint64_t, uint32_t, svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_f16_m))) void svwrite_ver_za128_m(uint64_t, uint32_t, svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s32_m))) void svwrite_ver_za128_m(uint64_t, uint32_t, svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s64_m))) void svwrite_ver_za128_m(uint64_t, uint32_t, svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s16_m))) void svwrite_ver_za128_m(uint64_t, uint32_t, svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_u16_m))) void svwrite_ver_za16_m(uint64_t, uint32_t, svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_bf16_m))) void svwrite_ver_za16_m(uint64_t, uint32_t, svbool_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_f16_m))) void svwrite_ver_za16_m(uint64_t, uint32_t, svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_s16_m))) void svwrite_ver_za16_m(uint64_t, uint32_t, svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_u32_m))) void svwrite_ver_za32_m(uint64_t, uint32_t, svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_f32_m))) void svwrite_ver_za32_m(uint64_t, uint32_t, svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_s32_m))) void svwrite_ver_za32_m(uint64_t, uint32_t, svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_u64_m))) void svwrite_ver_za64_m(uint64_t, uint32_t, svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_f64_m))) void svwrite_ver_za64_m(uint64_t, uint32_t, svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_s64_m))) void svwrite_ver_za64_m(uint64_t, uint32_t, svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_u8_m))) void svwrite_ver_za8_m(uint64_t, uint32_t, svbool_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_s8_m))) void svwrite_ver_za8_m(uint64_t, uint32_t, svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za64_f64_m))) void svmopa_za64_f64_m(uint64_t, svbool_t, svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za64_f64_m))) void svmops_za64_f64_m(uint64_t, svbool_t, svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za64_f64_m))) void svmopa_za64_m(uint64_t, svbool_t, svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za64_f64_m))) void svmops_za64_m(uint64_t, svbool_t, svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za64_u64_m))) void svaddha_za64_u64_m(uint64_t, svbool_t, svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za64_s64_m))) void svaddha_za64_s64_m(uint64_t, svbool_t, svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za64_u64_m))) void svaddva_za64_u64_m(uint64_t, svbool_t, svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za64_s64_m))) void svaddva_za64_s64_m(uint64_t, svbool_t, svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za64_s16_m))) void svmopa_za64_s16_m(uint64_t, svbool_t, svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za64_u16_m))) void svmopa_za64_u16_m(uint64_t, svbool_t, svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za64_s16_m))) void svmops_za64_s16_m(uint64_t, svbool_t, svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za64_u16_m))) void svmops_za64_u16_m(uint64_t, svbool_t, svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumopa_za64_s16_m))) void svsumopa_za64_s16_m(uint64_t, svbool_t, svbool_t, svint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumops_za64_s16_m))) void svsumops_za64_s16_m(uint64_t, svbool_t, svbool_t, svint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmopa_za64_u16_m))) void svusmopa_za64_u16_m(uint64_t, svbool_t, svbool_t, svuint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmops_za64_u16_m))) void svusmops_za64_u16_m(uint64_t, svbool_t, svbool_t, svuint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za64_u64_m))) void svaddha_za64_m(uint64_t, svbool_t, svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za64_s64_m))) void svaddha_za64_m(uint64_t, svbool_t, svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za64_u64_m))) void svaddva_za64_m(uint64_t, svbool_t, svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za64_s64_m))) void svaddva_za64_m(uint64_t, svbool_t, svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za64_s16_m))) void svmopa_za64_m(uint64_t, svbool_t, svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za64_u16_m))) void svmopa_za64_m(uint64_t, svbool_t, svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za64_s16_m))) void svmops_za64_m(uint64_t, svbool_t, svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za64_u16_m))) void svmops_za64_m(uint64_t, svbool_t, svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumopa_za64_s16_m))) void svsumopa_za64_m(uint64_t, svbool_t, svbool_t, svint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumops_za64_s16_m))) void svsumops_za64_m(uint64_t, svbool_t, svbool_t, svint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmopa_za64_u16_m))) void svusmopa_za64_m(uint64_t, svbool_t, svbool_t, svuint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmops_za64_u16_m))) void svusmops_za64_m(uint64_t, svbool_t, svbool_t, svuint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_single_za32_u32_vg1x2))) void svadd_write_single_za32_u32_vg1x2(uint32_t, svuint32x2_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_single_za32_s32_vg1x2))) void svadd_write_single_za32_s32_vg1x2(uint32_t, svint32x2_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_single_za32_u32_vg1x4))) void svadd_write_single_za32_u32_vg1x4(uint32_t, svuint32x4_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_single_za32_s32_vg1x4))) void svadd_write_single_za32_s32_vg1x4(uint32_t, svint32x4_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_za32_u32_vg1x2))) void svadd_write_za32_u32_vg1x2(uint32_t, svuint32x2_t, svuint32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_za32_s32_vg1x2))) void svadd_write_za32_s32_vg1x2(uint32_t, svint32x2_t, svint32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_za32_u32_vg1x4))) void svadd_write_za32_u32_vg1x4(uint32_t, svuint32x4_t, svuint32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_za32_s32_vg1x4))) void svadd_write_za32_s32_vg1x4(uint32_t, svint32x4_t, svint32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za32_u32_vg1x2))) void svadd_za32_u32_vg1x2(uint32_t, svuint32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za32_f32_vg1x2))) void svadd_za32_f32_vg1x2(uint32_t, svfloat32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za32_s32_vg1x2))) void svadd_za32_s32_vg1x2(uint32_t, svint32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za32_u32_vg1x4))) void svadd_za32_u32_vg1x4(uint32_t, svuint32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za32_f32_vg1x4))) void svadd_za32_f32_vg1x4(uint32_t, svfloat32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za32_s32_vg1x4))) void svadd_za32_s32_vg1x4(uint32_t, svint32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svbmopa_za32_u32_m))) void svbmopa_za32_u32_m(uint64_t, svbool_t, svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svbmopa_za32_s32_m))) void svbmopa_za32_s32_m(uint64_t, svbool_t, svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svbmops_za32_u32_m))) void svbmops_za32_u32_m(uint64_t, svbool_t, svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svbmops_za32_s32_m))) void svbmops_za32_s32_m(uint64_t, svbool_t, svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_bf16_vg1x2))) void svdot_single_za32_bf16_vg1x2(uint32_t, svbfloat16x2_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_f16_vg1x2))) void svdot_single_za32_f16_vg1x2(uint32_t, svfloat16x2_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_s8_vg1x2))) void svdot_single_za32_s8_vg1x2(uint32_t, svint8x2_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_s16_vg1x2))) void svdot_single_za32_s16_vg1x2(uint32_t, svint16x2_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_u8_vg1x2))) void svdot_single_za32_u8_vg1x2(uint32_t, svuint8x2_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_u16_vg1x2))) void svdot_single_za32_u16_vg1x2(uint32_t, svuint16x2_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_bf16_vg1x4))) void svdot_single_za32_bf16_vg1x4(uint32_t, svbfloat16x4_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_f16_vg1x4))) void svdot_single_za32_f16_vg1x4(uint32_t, svfloat16x4_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_s8_vg1x4))) void svdot_single_za32_s8_vg1x4(uint32_t, svint8x4_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_s16_vg1x4))) void svdot_single_za32_s16_vg1x4(uint32_t, svint16x4_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_u8_vg1x4))) void svdot_single_za32_u8_vg1x4(uint32_t, svuint8x4_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_u16_vg1x4))) void svdot_single_za32_u16_vg1x4(uint32_t, svuint16x4_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_bf16_vg1x2))) void svdot_lane_za32_bf16_vg1x2(uint32_t, svbfloat16x2_t, svbfloat16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_f16_vg1x2))) void svdot_lane_za32_f16_vg1x2(uint32_t, svfloat16x2_t, svfloat16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_s8_vg1x2))) void svdot_lane_za32_s8_vg1x2(uint32_t, svint8x2_t, svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_s16_vg1x2))) void svdot_lane_za32_s16_vg1x2(uint32_t, svint16x2_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_u8_vg1x2))) void svdot_lane_za32_u8_vg1x2(uint32_t, svuint8x2_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_u16_vg1x2))) void svdot_lane_za32_u16_vg1x2(uint32_t, svuint16x2_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_bf16_vg1x4))) void svdot_lane_za32_bf16_vg1x4(uint32_t, svbfloat16x4_t, svbfloat16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_f16_vg1x4))) void svdot_lane_za32_f16_vg1x4(uint32_t, svfloat16x4_t, svfloat16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_s8_vg1x4))) void svdot_lane_za32_s8_vg1x4(uint32_t, svint8x4_t, svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_s16_vg1x4))) void svdot_lane_za32_s16_vg1x4(uint32_t, svint16x4_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_u8_vg1x4))) void svdot_lane_za32_u8_vg1x4(uint32_t, svuint8x4_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_u16_vg1x4))) void svdot_lane_za32_u16_vg1x4(uint32_t, svuint16x4_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_bf16_vg1x2))) void svdot_za32_bf16_vg1x2(uint32_t, svbfloat16x2_t, svbfloat16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_f16_vg1x2))) void svdot_za32_f16_vg1x2(uint32_t, svfloat16x2_t, svfloat16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_s8_vg1x2))) void svdot_za32_s8_vg1x2(uint32_t, svint8x2_t, svint8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_s16_vg1x2))) void svdot_za32_s16_vg1x2(uint32_t, svint16x2_t, svint16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_u8_vg1x2))) void svdot_za32_u8_vg1x2(uint32_t, svuint8x2_t, svuint8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_u16_vg1x2))) void svdot_za32_u16_vg1x2(uint32_t, svuint16x2_t, svuint16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_bf16_vg1x4))) void svdot_za32_bf16_vg1x4(uint32_t, svbfloat16x4_t, svbfloat16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_f16_vg1x4))) void svdot_za32_f16_vg1x4(uint32_t, svfloat16x4_t, svfloat16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_s8_vg1x4))) void svdot_za32_s8_vg1x4(uint32_t, svint8x4_t, svint8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_s16_vg1x4))) void svdot_za32_s16_vg1x4(uint32_t, svint16x4_t, svint16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_u8_vg1x4))) void svdot_za32_u8_vg1x4(uint32_t, svuint8x4_t, svuint8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_u16_vg1x4))) void svdot_za32_u16_vg1x4(uint32_t, svuint16x4_t, svuint16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svldr_zt))) void svldr_zt(uint64_t, void const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_u8))) svuint8_t svluti2_lane_zt_u8(uint64_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_u32))) svuint32_t svluti2_lane_zt_u32(uint64_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_u16))) svuint16_t svluti2_lane_zt_u16(uint64_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_bf16))) svbfloat16_t svluti2_lane_zt_bf16(uint64_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_s8))) svint8_t svluti2_lane_zt_s8(uint64_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_f32))) svfloat32_t svluti2_lane_zt_f32(uint64_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_f16))) svfloat16_t svluti2_lane_zt_f16(uint64_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_s32))) svint32_t svluti2_lane_zt_s32(uint64_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_s16))) svint16_t svluti2_lane_zt_s16(uint64_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_u8_x2))) svuint8x2_t svluti2_lane_zt_u8_x2(uint64_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_u32_x2))) svuint32x2_t svluti2_lane_zt_u32_x2(uint64_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_u16_x2))) svuint16x2_t svluti2_lane_zt_u16_x2(uint64_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_bf16_x2))) svbfloat16x2_t svluti2_lane_zt_bf16_x2(uint64_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_s8_x2))) svint8x2_t svluti2_lane_zt_s8_x2(uint64_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_f32_x2))) svfloat32x2_t svluti2_lane_zt_f32_x2(uint64_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_f16_x2))) svfloat16x2_t svluti2_lane_zt_f16_x2(uint64_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_s32_x2))) svint32x2_t svluti2_lane_zt_s32_x2(uint64_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_s16_x2))) svint16x2_t svluti2_lane_zt_s16_x2(uint64_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_u8_x4))) svuint8x4_t svluti2_lane_zt_u8_x4(uint64_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_u32_x4))) svuint32x4_t svluti2_lane_zt_u32_x4(uint64_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_u16_x4))) svuint16x4_t svluti2_lane_zt_u16_x4(uint64_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_bf16_x4))) svbfloat16x4_t svluti2_lane_zt_bf16_x4(uint64_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_s8_x4))) svint8x4_t svluti2_lane_zt_s8_x4(uint64_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_f32_x4))) svfloat32x4_t svluti2_lane_zt_f32_x4(uint64_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_f16_x4))) svfloat16x4_t svluti2_lane_zt_f16_x4(uint64_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_s32_x4))) svint32x4_t svluti2_lane_zt_s32_x4(uint64_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_s16_x4))) svint16x4_t svluti2_lane_zt_s16_x4(uint64_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_u8))) svuint8_t svluti4_lane_zt_u8(uint64_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_u32))) svuint32_t svluti4_lane_zt_u32(uint64_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_u16))) svuint16_t svluti4_lane_zt_u16(uint64_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_bf16))) svbfloat16_t svluti4_lane_zt_bf16(uint64_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_s8))) svint8_t svluti4_lane_zt_s8(uint64_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_f32))) svfloat32_t svluti4_lane_zt_f32(uint64_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_f16))) svfloat16_t svluti4_lane_zt_f16(uint64_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_s32))) svint32_t svluti4_lane_zt_s32(uint64_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_s16))) svint16_t svluti4_lane_zt_s16(uint64_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_u8_x2))) svuint8x2_t svluti4_lane_zt_u8_x2(uint64_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_u32_x2))) svuint32x2_t svluti4_lane_zt_u32_x2(uint64_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_u16_x2))) svuint16x2_t svluti4_lane_zt_u16_x2(uint64_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_bf16_x2))) svbfloat16x2_t svluti4_lane_zt_bf16_x2(uint64_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_s8_x2))) svint8x2_t svluti4_lane_zt_s8_x2(uint64_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_f32_x2))) svfloat32x2_t svluti4_lane_zt_f32_x2(uint64_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_f16_x2))) svfloat16x2_t svluti4_lane_zt_f16_x2(uint64_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_s32_x2))) svint32x2_t svluti4_lane_zt_s32_x2(uint64_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_s16_x2))) svint16x2_t svluti4_lane_zt_s16_x2(uint64_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_u32_x4))) svuint32x4_t svluti4_lane_zt_u32_x4(uint64_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_u16_x4))) svuint16x4_t svluti4_lane_zt_u16_x4(uint64_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_bf16_x4))) svbfloat16x4_t svluti4_lane_zt_bf16_x4(uint64_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_f32_x4))) svfloat32x4_t svluti4_lane_zt_f32_x4(uint64_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_f16_x4))) svfloat16x4_t svluti4_lane_zt_f16_x4(uint64_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_s32_x4))) svint32x4_t svluti4_lane_zt_s32_x4(uint64_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_s16_x4))) svint16x4_t svluti4_lane_zt_s16_x4(uint64_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_f32_vg1x2))) void svmla_single_za32_f32_vg1x2(uint32_t, svfloat32x2_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_f32_vg1x4))) void svmla_single_za32_f32_vg1x4(uint32_t, svfloat32x4_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_bf16_vg2x2))) void svmla_single_za32_bf16_vg2x2(uint32_t, svbfloat16x2_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_f16_vg2x2))) void svmla_single_za32_f16_vg2x2(uint32_t, svfloat16x2_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_s16_vg2x2))) void svmla_single_za32_s16_vg2x2(uint32_t, svint16x2_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_u16_vg2x2))) void svmla_single_za32_u16_vg2x2(uint32_t, svuint16x2_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_bf16_vg2x4))) void svmla_single_za32_bf16_vg2x4(uint32_t, svbfloat16x4_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_f16_vg2x4))) void svmla_single_za32_f16_vg2x4(uint32_t, svfloat16x4_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_s16_vg2x4))) void svmla_single_za32_s16_vg2x4(uint32_t, svint16x4_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_u16_vg2x4))) void svmla_single_za32_u16_vg2x4(uint32_t, svuint16x4_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_s8_vg4x2))) void svmla_single_za32_s8_vg4x2(uint32_t, svint8x2_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_u8_vg4x2))) void svmla_single_za32_u8_vg4x2(uint32_t, svuint8x2_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_s8_vg4x4))) void svmla_single_za32_s8_vg4x4(uint32_t, svint8x4_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_u8_vg4x4))) void svmla_single_za32_u8_vg4x4(uint32_t, svuint8x4_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_f32_vg1x2))) void svmla_lane_za32_f32_vg1x2(uint32_t, svfloat32x2_t, svfloat32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_f32_vg1x4))) void svmla_lane_za32_f32_vg1x4(uint32_t, svfloat32x4_t, svfloat32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_bf16_vg2x1))) void svmla_lane_za32_bf16_vg2x1(uint32_t, svbfloat16_t, svbfloat16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_f16_vg2x1))) void svmla_lane_za32_f16_vg2x1(uint32_t, svfloat16_t, svfloat16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_s16_vg2x1))) void svmla_lane_za32_s16_vg2x1(uint32_t, svint16_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_u16_vg2x1))) void svmla_lane_za32_u16_vg2x1(uint32_t, svuint16_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_bf16_vg2x2))) void svmla_lane_za32_bf16_vg2x2(uint32_t, svbfloat16x2_t, svbfloat16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_f16_vg2x2))) void svmla_lane_za32_f16_vg2x2(uint32_t, svfloat16x2_t, svfloat16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_s16_vg2x2))) void svmla_lane_za32_s16_vg2x2(uint32_t, svint16x2_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_u16_vg2x2))) void svmla_lane_za32_u16_vg2x2(uint32_t, svuint16x2_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_bf16_vg2x4))) void svmla_lane_za32_bf16_vg2x4(uint32_t, svbfloat16x4_t, svbfloat16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_f16_vg2x4))) void svmla_lane_za32_f16_vg2x4(uint32_t, svfloat16x4_t, svfloat16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_s16_vg2x4))) void svmla_lane_za32_s16_vg2x4(uint32_t, svint16x4_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_u16_vg2x4))) void svmla_lane_za32_u16_vg2x4(uint32_t, svuint16x4_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_s8_vg4x1))) void svmla_lane_za32_s8_vg4x1(uint32_t, svint8_t, svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_u8_vg4x1))) void svmla_lane_za32_u8_vg4x1(uint32_t, svuint8_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_s8_vg4x2))) void svmla_lane_za32_s8_vg4x2(uint32_t, svint8x2_t, svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_u8_vg4x2))) void svmla_lane_za32_u8_vg4x2(uint32_t, svuint8x2_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_s8_vg4x4))) void svmla_lane_za32_s8_vg4x4(uint32_t, svint8x4_t, svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_u8_vg4x4))) void svmla_lane_za32_u8_vg4x4(uint32_t, svuint8x4_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_f32_vg1x2))) void svmla_za32_f32_vg1x2(uint32_t, svfloat32x2_t, svfloat32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_f32_vg1x4))) void svmla_za32_f32_vg1x4(uint32_t, svfloat32x4_t, svfloat32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_bf16_vg2x1))) void svmla_za32_bf16_vg2x1(uint32_t, svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_f16_vg2x1))) void svmla_za32_f16_vg2x1(uint32_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_s16_vg2x1))) void svmla_za32_s16_vg2x1(uint32_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_u16_vg2x1))) void svmla_za32_u16_vg2x1(uint32_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_bf16_vg2x2))) void svmla_za32_bf16_vg2x2(uint32_t, svbfloat16x2_t, svbfloat16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_f16_vg2x2))) void svmla_za32_f16_vg2x2(uint32_t, svfloat16x2_t, svfloat16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_s16_vg2x2))) void svmla_za32_s16_vg2x2(uint32_t, svint16x2_t, svint16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_u16_vg2x2))) void svmla_za32_u16_vg2x2(uint32_t, svuint16x2_t, svuint16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_bf16_vg2x4))) void svmla_za32_bf16_vg2x4(uint32_t, svbfloat16x4_t, svbfloat16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_f16_vg2x4))) void svmla_za32_f16_vg2x4(uint32_t, svfloat16x4_t, svfloat16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_s16_vg2x4))) void svmla_za32_s16_vg2x4(uint32_t, svint16x4_t, svint16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_u16_vg2x4))) void svmla_za32_u16_vg2x4(uint32_t, svuint16x4_t, svuint16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_s8_vg4x1))) void svmla_za32_s8_vg4x1(uint32_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_u8_vg4x1))) void svmla_za32_u8_vg4x1(uint32_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_s8_vg4x2))) void svmla_za32_s8_vg4x2(uint32_t, svint8x2_t, svint8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_u8_vg4x2))) void svmla_za32_u8_vg4x2(uint32_t, svuint8x2_t, svuint8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_s8_vg4x4))) void svmla_za32_s8_vg4x4(uint32_t, svint8x4_t, svint8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_u8_vg4x4))) void svmla_za32_u8_vg4x4(uint32_t, svuint8x4_t, svuint8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_f32_vg1x2))) void svmls_single_za32_f32_vg1x2(uint32_t, svfloat32x2_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_f32_vg1x4))) void svmls_single_za32_f32_vg1x4(uint32_t, svfloat32x4_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_bf16_vg2x2))) void svmls_single_za32_bf16_vg2x2(uint32_t, svbfloat16x2_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_f16_vg2x2))) void svmls_single_za32_f16_vg2x2(uint32_t, svfloat16x2_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_s16_vg2x2))) void svmls_single_za32_s16_vg2x2(uint32_t, svint16x2_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_u16_vg2x2))) void svmls_single_za32_u16_vg2x2(uint32_t, svuint16x2_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_bf16_vg2x4))) void svmls_single_za32_bf16_vg2x4(uint32_t, svbfloat16x4_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_f16_vg2x4))) void svmls_single_za32_f16_vg2x4(uint32_t, svfloat16x4_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_s16_vg2x4))) void svmls_single_za32_s16_vg2x4(uint32_t, svint16x4_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_u16_vg2x4))) void svmls_single_za32_u16_vg2x4(uint32_t, svuint16x4_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_s8_vg4x2))) void svmls_single_za32_s8_vg4x2(uint32_t, svint8x2_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_u8_vg4x2))) void svmls_single_za32_u8_vg4x2(uint32_t, svuint8x2_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_s8_vg4x4))) void svmls_single_za32_s8_vg4x4(uint32_t, svint8x4_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_u8_vg4x4))) void svmls_single_za32_u8_vg4x4(uint32_t, svuint8x4_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_f32_vg1x2))) void svmls_lane_za32_f32_vg1x2(uint32_t, svfloat32x2_t, svfloat32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_f32_vg1x4))) void svmls_lane_za32_f32_vg1x4(uint32_t, svfloat32x4_t, svfloat32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_bf16_vg2x1))) void svmls_lane_za32_bf16_vg2x1(uint32_t, svbfloat16_t, svbfloat16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_f16_vg2x1))) void svmls_lane_za32_f16_vg2x1(uint32_t, svfloat16_t, svfloat16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_s16_vg2x1))) void svmls_lane_za32_s16_vg2x1(uint32_t, svint16_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_u16_vg2x1))) void svmls_lane_za32_u16_vg2x1(uint32_t, svuint16_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_bf16_vg2x2))) void svmls_lane_za32_bf16_vg2x2(uint32_t, svbfloat16x2_t, svbfloat16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_f16_vg2x2))) void svmls_lane_za32_f16_vg2x2(uint32_t, svfloat16x2_t, svfloat16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_s16_vg2x2))) void svmls_lane_za32_s16_vg2x2(uint32_t, svint16x2_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_u16_vg2x2))) void svmls_lane_za32_u16_vg2x2(uint32_t, svuint16x2_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_bf16_vg2x4))) void svmls_lane_za32_bf16_vg2x4(uint32_t, svbfloat16x4_t, svbfloat16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_f16_vg2x4))) void svmls_lane_za32_f16_vg2x4(uint32_t, svfloat16x4_t, svfloat16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_s16_vg2x4))) void svmls_lane_za32_s16_vg2x4(uint32_t, svint16x4_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_u16_vg2x4))) void svmls_lane_za32_u16_vg2x4(uint32_t, svuint16x4_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_s8_vg4x1))) void svmls_lane_za32_s8_vg4x1(uint32_t, svint8_t, svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_u8_vg4x1))) void svmls_lane_za32_u8_vg4x1(uint32_t, svuint8_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_s8_vg4x2))) void svmls_lane_za32_s8_vg4x2(uint32_t, svint8x2_t, svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_u8_vg4x2))) void svmls_lane_za32_u8_vg4x2(uint32_t, svuint8x2_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_s8_vg4x4))) void svmls_lane_za32_s8_vg4x4(uint32_t, svint8x4_t, svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_u8_vg4x4))) void svmls_lane_za32_u8_vg4x4(uint32_t, svuint8x4_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_f32_vg1x2))) void svmls_za32_f32_vg1x2(uint32_t, svfloat32x2_t, svfloat32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_f32_vg1x4))) void svmls_za32_f32_vg1x4(uint32_t, svfloat32x4_t, svfloat32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_bf16_vg2x1))) void svmls_za32_bf16_vg2x1(uint32_t, svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_f16_vg2x1))) void svmls_za32_f16_vg2x1(uint32_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_s16_vg2x1))) void svmls_za32_s16_vg2x1(uint32_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_u16_vg2x1))) void svmls_za32_u16_vg2x1(uint32_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_bf16_vg2x2))) void svmls_za32_bf16_vg2x2(uint32_t, svbfloat16x2_t, svbfloat16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_f16_vg2x2))) void svmls_za32_f16_vg2x2(uint32_t, svfloat16x2_t, svfloat16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_s16_vg2x2))) void svmls_za32_s16_vg2x2(uint32_t, svint16x2_t, svint16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_u16_vg2x2))) void svmls_za32_u16_vg2x2(uint32_t, svuint16x2_t, svuint16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_bf16_vg2x4))) void svmls_za32_bf16_vg2x4(uint32_t, svbfloat16x4_t, svbfloat16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_f16_vg2x4))) void svmls_za32_f16_vg2x4(uint32_t, svfloat16x4_t, svfloat16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_s16_vg2x4))) void svmls_za32_s16_vg2x4(uint32_t, svint16x4_t, svint16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_u16_vg2x4))) void svmls_za32_u16_vg2x4(uint32_t, svuint16x4_t, svuint16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_s8_vg4x1))) void svmls_za32_s8_vg4x1(uint32_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_u8_vg4x1))) void svmls_za32_u8_vg4x1(uint32_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_s8_vg4x2))) void svmls_za32_s8_vg4x2(uint32_t, svint8x2_t, svint8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_u8_vg4x2))) void svmls_za32_u8_vg4x2(uint32_t, svuint8x2_t, svuint8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_s8_vg4x4))) void svmls_za32_s8_vg4x4(uint32_t, svint8x4_t, svint8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_u8_vg4x4))) void svmls_za32_u8_vg4x4(uint32_t, svuint8x4_t, svuint8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_s16_m))) void svmopa_za32_s16_m(uint64_t, svbool_t, svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_u16_m))) void svmopa_za32_u16_m(uint64_t, svbool_t, svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_s16_m))) void svmops_za32_s16_m(uint64_t, svbool_t, svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_u16_m))) void svmops_za32_u16_m(uint64_t, svbool_t, svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_u16_vg2))) svuint16x2_t svread_hor_za16_u16_vg2(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_bf16_vg2))) svbfloat16x2_t svread_hor_za16_bf16_vg2(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_f16_vg2))) svfloat16x2_t svread_hor_za16_f16_vg2(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_s16_vg2))) svint16x2_t svread_hor_za16_s16_vg2(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_u16_vg4))) svuint16x4_t svread_hor_za16_u16_vg4(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_bf16_vg4))) svbfloat16x4_t svread_hor_za16_bf16_vg4(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_f16_vg4))) svfloat16x4_t svread_hor_za16_f16_vg4(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_s16_vg4))) svint16x4_t svread_hor_za16_s16_vg4(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_u32_vg2))) svuint32x2_t svread_hor_za32_u32_vg2(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_f32_vg2))) svfloat32x2_t svread_hor_za32_f32_vg2(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_s32_vg2))) svint32x2_t svread_hor_za32_s32_vg2(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_u32_vg4))) svuint32x4_t svread_hor_za32_u32_vg4(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_f32_vg4))) svfloat32x4_t svread_hor_za32_f32_vg4(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_s32_vg4))) svint32x4_t svread_hor_za32_s32_vg4(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_u64_vg2))) svuint64x2_t svread_hor_za64_u64_vg2(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_f64_vg2))) svfloat64x2_t svread_hor_za64_f64_vg2(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_s64_vg2))) svint64x2_t svread_hor_za64_s64_vg2(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_u64_vg4))) svuint64x4_t svread_hor_za64_u64_vg4(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_f64_vg4))) svfloat64x4_t svread_hor_za64_f64_vg4(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_s64_vg4))) svint64x4_t svread_hor_za64_s64_vg4(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za8_u8_vg2))) svuint8x2_t svread_hor_za8_u8_vg2(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za8_s8_vg2))) svint8x2_t svread_hor_za8_s8_vg2(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za8_u8_vg4))) svuint8x4_t svread_hor_za8_u8_vg4(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za8_s8_vg4))) svint8x4_t svread_hor_za8_s8_vg4(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_u16_vg2))) svuint16x2_t svread_ver_za16_u16_vg2(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_bf16_vg2))) svbfloat16x2_t svread_ver_za16_bf16_vg2(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_f16_vg2))) svfloat16x2_t svread_ver_za16_f16_vg2(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_s16_vg2))) svint16x2_t svread_ver_za16_s16_vg2(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_u16_vg4))) svuint16x4_t svread_ver_za16_u16_vg4(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_bf16_vg4))) svbfloat16x4_t svread_ver_za16_bf16_vg4(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_f16_vg4))) svfloat16x4_t svread_ver_za16_f16_vg4(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_s16_vg4))) svint16x4_t svread_ver_za16_s16_vg4(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_u32_vg2))) svuint32x2_t svread_ver_za32_u32_vg2(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_f32_vg2))) svfloat32x2_t svread_ver_za32_f32_vg2(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_s32_vg2))) svint32x2_t svread_ver_za32_s32_vg2(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_u32_vg4))) svuint32x4_t svread_ver_za32_u32_vg4(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_f32_vg4))) svfloat32x4_t svread_ver_za32_f32_vg4(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_s32_vg4))) svint32x4_t svread_ver_za32_s32_vg4(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_u64_vg2))) svuint64x2_t svread_ver_za64_u64_vg2(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_f64_vg2))) svfloat64x2_t svread_ver_za64_f64_vg2(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_s64_vg2))) svint64x2_t svread_ver_za64_s64_vg2(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_u64_vg4))) svuint64x4_t svread_ver_za64_u64_vg4(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_f64_vg4))) svfloat64x4_t svread_ver_za64_f64_vg4(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_s64_vg4))) svint64x4_t svread_ver_za64_s64_vg4(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za8_u8_vg2))) svuint8x2_t svread_ver_za8_u8_vg2(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za8_s8_vg2))) svint8x2_t svread_ver_za8_s8_vg2(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za8_u8_vg4))) svuint8x4_t svread_ver_za8_u8_vg4(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za8_s8_vg4))) svint8x4_t svread_ver_za8_s8_vg4(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za16_u16_vg1x2))) svuint16x2_t svread_za16_u16_vg1x2(uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za16_bf16_vg1x2))) svbfloat16x2_t svread_za16_bf16_vg1x2(uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za16_f16_vg1x2))) svfloat16x2_t svread_za16_f16_vg1x2(uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za16_s16_vg1x2))) svint16x2_t svread_za16_s16_vg1x2(uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za16_u16_vg1x4))) svuint16x4_t svread_za16_u16_vg1x4(uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za16_bf16_vg1x4))) svbfloat16x4_t svread_za16_bf16_vg1x4(uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za16_f16_vg1x4))) svfloat16x4_t svread_za16_f16_vg1x4(uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za16_s16_vg1x4))) svint16x4_t svread_za16_s16_vg1x4(uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za32_u32_vg1x2))) svuint32x2_t svread_za32_u32_vg1x2(uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za32_f32_vg1x2))) svfloat32x2_t svread_za32_f32_vg1x2(uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za32_s32_vg1x2))) svint32x2_t svread_za32_s32_vg1x2(uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za32_u32_vg1x4))) svuint32x4_t svread_za32_u32_vg1x4(uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za32_f32_vg1x4))) svfloat32x4_t svread_za32_f32_vg1x4(uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za32_s32_vg1x4))) svint32x4_t svread_za32_s32_vg1x4(uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za64_u64_vg1x2))) svuint64x2_t svread_za64_u64_vg1x2(uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za64_f64_vg1x2))) svfloat64x2_t svread_za64_f64_vg1x2(uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za64_s64_vg1x2))) svint64x2_t svread_za64_s64_vg1x2(uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za64_u64_vg1x4))) svuint64x4_t svread_za64_u64_vg1x4(uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za64_f64_vg1x4))) svfloat64x4_t svread_za64_f64_vg1x4(uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za64_s64_vg1x4))) svint64x4_t svread_za64_s64_vg1x4(uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za8_u8_vg1x2))) svuint8x2_t svread_za8_u8_vg1x2(uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za8_s8_vg1x2))) svint8x2_t svread_za8_s8_vg1x2(uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za8_u8_vg1x4))) svuint8x4_t svread_za8_u8_vg1x4(uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za8_s8_vg1x4))) svint8x4_t svread_za8_s8_vg1x4(uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svstr_zt))) void svstr_zt(uint64_t, void *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_single_za32_u32_vg1x2))) void svsub_write_single_za32_u32_vg1x2(uint32_t, svuint32x2_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_single_za32_s32_vg1x2))) void svsub_write_single_za32_s32_vg1x2(uint32_t, svint32x2_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_single_za32_u32_vg1x4))) void svsub_write_single_za32_u32_vg1x4(uint32_t, svuint32x4_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_single_za32_s32_vg1x4))) void svsub_write_single_za32_s32_vg1x4(uint32_t, svint32x4_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_za32_u32_vg1x2))) void svsub_write_za32_u32_vg1x2(uint32_t, svuint32x2_t, svuint32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_za32_s32_vg1x2))) void svsub_write_za32_s32_vg1x2(uint32_t, svint32x2_t, svint32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_za32_u32_vg1x4))) void svsub_write_za32_u32_vg1x4(uint32_t, svuint32x4_t, svuint32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_za32_s32_vg1x4))) void svsub_write_za32_s32_vg1x4(uint32_t, svint32x4_t, svint32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za32_u32_vg1x2))) void svsub_za32_u32_vg1x2(uint32_t, svuint32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za32_f32_vg1x2))) void svsub_za32_f32_vg1x2(uint32_t, svfloat32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za32_s32_vg1x2))) void svsub_za32_s32_vg1x2(uint32_t, svint32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za32_u32_vg1x4))) void svsub_za32_u32_vg1x4(uint32_t, svuint32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za32_f32_vg1x4))) void svsub_za32_f32_vg1x4(uint32_t, svfloat32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za32_s32_vg1x4))) void svsub_za32_s32_vg1x4(uint32_t, svint32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsudot_single_za32_s8_vg1x2))) void svsudot_single_za32_s8_vg1x2(uint32_t, svint8x2_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsudot_single_za32_s8_vg1x4))) void svsudot_single_za32_s8_vg1x4(uint32_t, svint8x4_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsudot_lane_za32_s8_vg1x2))) void svsudot_lane_za32_s8_vg1x2(uint32_t, svint8x2_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsudot_lane_za32_s8_vg1x4))) void svsudot_lane_za32_s8_vg1x4(uint32_t, svint8x4_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsudot_za32_s8_vg1x2))) void svsudot_za32_s8_vg1x2(uint32_t, svint8x2_t, svuint8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsudot_za32_s8_vg1x4))) void svsudot_za32_s8_vg1x4(uint32_t, svint8x4_t, svuint8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumla_single_za32_s8_vg4x2))) void svsumla_single_za32_s8_vg4x2(uint32_t, svint8x2_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumla_single_za32_s8_vg4x4))) void svsumla_single_za32_s8_vg4x4(uint32_t, svint8x4_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumla_lane_za32_s8_vg4x1))) void svsumla_lane_za32_s8_vg4x1(uint32_t, svint8_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumla_lane_za32_s8_vg4x2))) void svsumla_lane_za32_s8_vg4x2(uint32_t, svint8x2_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumla_lane_za32_s8_vg4x4))) void svsumla_lane_za32_s8_vg4x4(uint32_t, svint8x4_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumla_za32_s8_vg4x1))) void svsumla_za32_s8_vg4x1(uint32_t, svint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumla_za32_s8_vg4x2))) void svsumla_za32_s8_vg4x2(uint32_t, svint8x2_t, svuint8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumla_za32_s8_vg4x4))) void svsumla_za32_s8_vg4x4(uint32_t, svint8x4_t, svuint8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsuvdot_lane_za32_s8_vg1x4))) void svsuvdot_lane_za32_s8_vg1x4(uint32_t, svint8x4_t, svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusdot_single_za32_u8_vg1x2))) void svusdot_single_za32_u8_vg1x2(uint32_t, svuint8x2_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusdot_single_za32_u8_vg1x4))) void svusdot_single_za32_u8_vg1x4(uint32_t, svuint8x4_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusdot_lane_za32_u8_vg1x2))) void svusdot_lane_za32_u8_vg1x2(uint32_t, svuint8x2_t, svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusdot_lane_za32_u8_vg1x4))) void svusdot_lane_za32_u8_vg1x4(uint32_t, svuint8x4_t, svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusdot_za32_u8_vg1x2))) void svusdot_za32_u8_vg1x2(uint32_t, svuint8x2_t, svint8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusdot_za32_u8_vg1x4))) void svusdot_za32_u8_vg1x4(uint32_t, svuint8x4_t, svint8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmla_single_za32_u8_vg4x2))) void svusmla_single_za32_u8_vg4x2(uint32_t, svuint8x2_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmla_single_za32_u8_vg4x4))) void svusmla_single_za32_u8_vg4x4(uint32_t, svuint8x4_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmla_lane_za32_u8_vg4x1))) void svusmla_lane_za32_u8_vg4x1(uint32_t, svuint8_t, svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmla_lane_za32_u8_vg4x2))) void svusmla_lane_za32_u8_vg4x2(uint32_t, svuint8x2_t, svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmla_lane_za32_u8_vg4x4))) void svusmla_lane_za32_u8_vg4x4(uint32_t, svuint8x4_t, svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmla_za32_u8_vg4x1))) void svusmla_za32_u8_vg4x1(uint32_t, svuint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmla_za32_u8_vg4x2))) void svusmla_za32_u8_vg4x2(uint32_t, svuint8x2_t, svint8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmla_za32_u8_vg4x4))) void svusmla_za32_u8_vg4x4(uint32_t, svuint8x4_t, svint8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusvdot_lane_za32_u8_vg1x4))) void svusvdot_lane_za32_u8_vg1x4(uint32_t, svuint8x4_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svvdot_lane_za32_bf16_vg1x2))) void svvdot_lane_za32_bf16_vg1x2(uint32_t, svbfloat16x2_t, svbfloat16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svvdot_lane_za32_f16_vg1x2))) void svvdot_lane_za32_f16_vg1x2(uint32_t, svfloat16x2_t, svfloat16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svvdot_lane_za32_s16_vg1x2))) void svvdot_lane_za32_s16_vg1x2(uint32_t, svint16x2_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svvdot_lane_za32_u16_vg1x2))) void svvdot_lane_za32_u16_vg1x2(uint32_t, svuint16x2_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svvdot_lane_za32_s8_vg1x4))) void svvdot_lane_za32_s8_vg1x4(uint32_t, svint8x4_t, svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svvdot_lane_za32_u8_vg1x4))) void svvdot_lane_za32_u8_vg1x4(uint32_t, svuint8x4_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_u16_vg2))) void svwrite_hor_za16_u16_vg2(uint64_t, uint32_t, svuint16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_bf16_vg2))) void svwrite_hor_za16_bf16_vg2(uint64_t, uint32_t, svbfloat16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_f16_vg2))) void svwrite_hor_za16_f16_vg2(uint64_t, uint32_t, svfloat16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_s16_vg2))) void svwrite_hor_za16_s16_vg2(uint64_t, uint32_t, svint16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_u16_vg4))) void svwrite_hor_za16_u16_vg4(uint64_t, uint32_t, svuint16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_bf16_vg4))) void svwrite_hor_za16_bf16_vg4(uint64_t, uint32_t, svbfloat16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_f16_vg4))) void svwrite_hor_za16_f16_vg4(uint64_t, uint32_t, svfloat16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_s16_vg4))) void svwrite_hor_za16_s16_vg4(uint64_t, uint32_t, svint16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_u32_vg2))) void svwrite_hor_za32_u32_vg2(uint64_t, uint32_t, svuint32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_f32_vg2))) void svwrite_hor_za32_f32_vg2(uint64_t, uint32_t, svfloat32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_s32_vg2))) void svwrite_hor_za32_s32_vg2(uint64_t, uint32_t, svint32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_u32_vg4))) void svwrite_hor_za32_u32_vg4(uint64_t, uint32_t, svuint32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_f32_vg4))) void svwrite_hor_za32_f32_vg4(uint64_t, uint32_t, svfloat32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_s32_vg4))) void svwrite_hor_za32_s32_vg4(uint64_t, uint32_t, svint32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_u64_vg2))) void svwrite_hor_za64_u64_vg2(uint64_t, uint32_t, svuint64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_f64_vg2))) void svwrite_hor_za64_f64_vg2(uint64_t, uint32_t, svfloat64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_s64_vg2))) void svwrite_hor_za64_s64_vg2(uint64_t, uint32_t, svint64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_u64_vg4))) void svwrite_hor_za64_u64_vg4(uint64_t, uint32_t, svuint64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_f64_vg4))) void svwrite_hor_za64_f64_vg4(uint64_t, uint32_t, svfloat64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_s64_vg4))) void svwrite_hor_za64_s64_vg4(uint64_t, uint32_t, svint64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_u8_vg2))) void svwrite_hor_za8_u8_vg2(uint64_t, uint32_t, svuint8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_s8_vg2))) void svwrite_hor_za8_s8_vg2(uint64_t, uint32_t, svint8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_u8_vg4))) void svwrite_hor_za8_u8_vg4(uint64_t, uint32_t, svuint8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_s8_vg4))) void svwrite_hor_za8_s8_vg4(uint64_t, uint32_t, svint8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_u16_vg2))) void svwrite_ver_za16_u16_vg2(uint64_t, uint32_t, svuint16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_bf16_vg2))) void svwrite_ver_za16_bf16_vg2(uint64_t, uint32_t, svbfloat16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_f16_vg2))) void svwrite_ver_za16_f16_vg2(uint64_t, uint32_t, svfloat16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_s16_vg2))) void svwrite_ver_za16_s16_vg2(uint64_t, uint32_t, svint16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_u16_vg4))) void svwrite_ver_za16_u16_vg4(uint64_t, uint32_t, svuint16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_bf16_vg4))) void svwrite_ver_za16_bf16_vg4(uint64_t, uint32_t, svbfloat16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_f16_vg4))) void svwrite_ver_za16_f16_vg4(uint64_t, uint32_t, svfloat16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_s16_vg4))) void svwrite_ver_za16_s16_vg4(uint64_t, uint32_t, svint16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_u32_vg2))) void svwrite_ver_za32_u32_vg2(uint64_t, uint32_t, svuint32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_f32_vg2))) void svwrite_ver_za32_f32_vg2(uint64_t, uint32_t, svfloat32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_s32_vg2))) void svwrite_ver_za32_s32_vg2(uint64_t, uint32_t, svint32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_u32_vg4))) void svwrite_ver_za32_u32_vg4(uint64_t, uint32_t, svuint32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_f32_vg4))) void svwrite_ver_za32_f32_vg4(uint64_t, uint32_t, svfloat32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_s32_vg4))) void svwrite_ver_za32_s32_vg4(uint64_t, uint32_t, svint32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_u64_vg2))) void svwrite_ver_za64_u64_vg2(uint64_t, uint32_t, svuint64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_f64_vg2))) void svwrite_ver_za64_f64_vg2(uint64_t, uint32_t, svfloat64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_s64_vg2))) void svwrite_ver_za64_s64_vg2(uint64_t, uint32_t, svint64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_u64_vg4))) void svwrite_ver_za64_u64_vg4(uint64_t, uint32_t, svuint64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_f64_vg4))) void svwrite_ver_za64_f64_vg4(uint64_t, uint32_t, svfloat64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_s64_vg4))) void svwrite_ver_za64_s64_vg4(uint64_t, uint32_t, svint64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_u8_vg2))) void svwrite_ver_za8_u8_vg2(uint64_t, uint32_t, svuint8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_s8_vg2))) void svwrite_ver_za8_s8_vg2(uint64_t, uint32_t, svint8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_u8_vg4))) void svwrite_ver_za8_u8_vg4(uint64_t, uint32_t, svuint8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_s8_vg4))) void svwrite_ver_za8_s8_vg4(uint64_t, uint32_t, svint8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za16_u16_vg1x2))) void svwrite_za16_u16_vg1x2(uint32_t, svuint16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za16_bf16_vg1x2))) void svwrite_za16_bf16_vg1x2(uint32_t, svbfloat16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za16_f16_vg1x2))) void svwrite_za16_f16_vg1x2(uint32_t, svfloat16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za16_s16_vg1x2))) void svwrite_za16_s16_vg1x2(uint32_t, svint16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za16_u16_vg1x4))) void svwrite_za16_u16_vg1x4(uint32_t, svuint16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za16_bf16_vg1x4))) void svwrite_za16_bf16_vg1x4(uint32_t, svbfloat16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za16_f16_vg1x4))) void svwrite_za16_f16_vg1x4(uint32_t, svfloat16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za16_s16_vg1x4))) void svwrite_za16_s16_vg1x4(uint32_t, svint16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za32_u32_vg1x2))) void svwrite_za32_u32_vg1x2(uint32_t, svuint32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za32_f32_vg1x2))) void svwrite_za32_f32_vg1x2(uint32_t, svfloat32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za32_s32_vg1x2))) void svwrite_za32_s32_vg1x2(uint32_t, svint32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za32_u32_vg1x4))) void svwrite_za32_u32_vg1x4(uint32_t, svuint32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za32_f32_vg1x4))) void svwrite_za32_f32_vg1x4(uint32_t, svfloat32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za32_s32_vg1x4))) void svwrite_za32_s32_vg1x4(uint32_t, svint32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za64_u64_vg1x2))) void svwrite_za64_u64_vg1x2(uint32_t, svuint64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za64_f64_vg1x2))) void svwrite_za64_f64_vg1x2(uint32_t, svfloat64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za64_s64_vg1x2))) void svwrite_za64_s64_vg1x2(uint32_t, svint64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za64_u64_vg1x4))) void svwrite_za64_u64_vg1x4(uint32_t, svuint64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za64_f64_vg1x4))) void svwrite_za64_f64_vg1x4(uint32_t, svfloat64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za64_s64_vg1x4))) void svwrite_za64_s64_vg1x4(uint32_t, svint64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za8_u8_vg1x2))) void svwrite_za8_u8_vg1x2(uint32_t, svuint8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za8_s8_vg1x2))) void svwrite_za8_s8_vg1x2(uint32_t, svint8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za8_u8_vg1x4))) void svwrite_za8_u8_vg1x4(uint32_t, svuint8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za8_s8_vg1x4))) void svwrite_za8_s8_vg1x4(uint32_t, svint8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svzero_zt))) void svzero_zt(uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_single_za32_u32_vg1x2))) void svadd_write_za32_vg1x2(uint32_t, svuint32x2_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_single_za32_s32_vg1x2))) void svadd_write_za32_vg1x2(uint32_t, svint32x2_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_single_za32_u32_vg1x4))) void svadd_write_za32_vg1x4(uint32_t, svuint32x4_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_single_za32_s32_vg1x4))) void svadd_write_za32_vg1x4(uint32_t, svint32x4_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_za32_u32_vg1x2))) void svadd_write_za32_vg1x2(uint32_t, svuint32x2_t, svuint32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_za32_s32_vg1x2))) void svadd_write_za32_vg1x2(uint32_t, svint32x2_t, svint32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_za32_u32_vg1x4))) void svadd_write_za32_vg1x4(uint32_t, svuint32x4_t, svuint32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_za32_s32_vg1x4))) void svadd_write_za32_vg1x4(uint32_t, svint32x4_t, svint32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za32_u32_vg1x2))) void svadd_za32_vg1x2(uint32_t, svuint32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za32_f32_vg1x2))) void svadd_za32_vg1x2(uint32_t, svfloat32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za32_s32_vg1x2))) void svadd_za32_vg1x2(uint32_t, svint32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za32_u32_vg1x4))) void svadd_za32_vg1x4(uint32_t, svuint32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za32_f32_vg1x4))) void svadd_za32_vg1x4(uint32_t, svfloat32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za32_s32_vg1x4))) void svadd_za32_vg1x4(uint32_t, svint32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svbmopa_za32_u32_m))) void svbmopa_za32_m(uint64_t, svbool_t, svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svbmopa_za32_s32_m))) void svbmopa_za32_m(uint64_t, svbool_t, svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svbmops_za32_u32_m))) void svbmops_za32_m(uint64_t, svbool_t, svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svbmops_za32_s32_m))) void svbmops_za32_m(uint64_t, svbool_t, svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_bf16_vg1x2))) void svdot_za32_vg1x2(uint32_t, svbfloat16x2_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_f16_vg1x2))) void svdot_za32_vg1x2(uint32_t, svfloat16x2_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_s8_vg1x2))) void svdot_za32_vg1x2(uint32_t, svint8x2_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_s16_vg1x2))) void svdot_za32_vg1x2(uint32_t, svint16x2_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_u8_vg1x2))) void svdot_za32_vg1x2(uint32_t, svuint8x2_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_u16_vg1x2))) void svdot_za32_vg1x2(uint32_t, svuint16x2_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_bf16_vg1x4))) void svdot_za32_vg1x4(uint32_t, svbfloat16x4_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_f16_vg1x4))) void svdot_za32_vg1x4(uint32_t, svfloat16x4_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_s8_vg1x4))) void svdot_za32_vg1x4(uint32_t, svint8x4_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_s16_vg1x4))) void svdot_za32_vg1x4(uint32_t, svint16x4_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_u8_vg1x4))) void svdot_za32_vg1x4(uint32_t, svuint8x4_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_u16_vg1x4))) void svdot_za32_vg1x4(uint32_t, svuint16x4_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_bf16_vg1x2))) void svdot_lane_za32_vg1x2(uint32_t, svbfloat16x2_t, svbfloat16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_f16_vg1x2))) void svdot_lane_za32_vg1x2(uint32_t, svfloat16x2_t, svfloat16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_s8_vg1x2))) void svdot_lane_za32_vg1x2(uint32_t, svint8x2_t, svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_s16_vg1x2))) void svdot_lane_za32_vg1x2(uint32_t, svint16x2_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_u8_vg1x2))) void svdot_lane_za32_vg1x2(uint32_t, svuint8x2_t, svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_u16_vg1x2))) void svdot_lane_za32_vg1x2(uint32_t, svuint16x2_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_bf16_vg1x4))) void svdot_lane_za32_vg1x4(uint32_t, svbfloat16x4_t, svbfloat16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_f16_vg1x4))) void svdot_lane_za32_vg1x4(uint32_t, svfloat16x4_t, svfloat16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_s8_vg1x4))) void svdot_lane_za32_vg1x4(uint32_t, svint8x4_t, svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_s16_vg1x4))) void svdot_lane_za32_vg1x4(uint32_t, svint16x4_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_u8_vg1x4))) void svdot_lane_za32_vg1x4(uint32_t, svuint8x4_t, svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_u16_vg1x4))) void svdot_lane_za32_vg1x4(uint32_t, svuint16x4_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_bf16_vg1x2))) void svdot_za32_vg1x2(uint32_t, svbfloat16x2_t, svbfloat16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_f16_vg1x2))) void svdot_za32_vg1x2(uint32_t, svfloat16x2_t, svfloat16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_s8_vg1x2))) void svdot_za32_vg1x2(uint32_t, svint8x2_t, svint8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_s16_vg1x2))) void svdot_za32_vg1x2(uint32_t, svint16x2_t, svint16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_u8_vg1x2))) void svdot_za32_vg1x2(uint32_t, svuint8x2_t, svuint8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_u16_vg1x2))) void svdot_za32_vg1x2(uint32_t, svuint16x2_t, svuint16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_bf16_vg1x4))) void svdot_za32_vg1x4(uint32_t, svbfloat16x4_t, svbfloat16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_f16_vg1x4))) void svdot_za32_vg1x4(uint32_t, svfloat16x4_t, svfloat16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_s8_vg1x4))) void svdot_za32_vg1x4(uint32_t, svint8x4_t, svint8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_s16_vg1x4))) void svdot_za32_vg1x4(uint32_t, svint16x4_t, svint16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_u8_vg1x4))) void svdot_za32_vg1x4(uint32_t, svuint8x4_t, svuint8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_u16_vg1x4))) void svdot_za32_vg1x4(uint32_t, svuint16x4_t, svuint16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_f32_vg1x2))) void svmla_za32_vg1x2(uint32_t, svfloat32x2_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_f32_vg1x4))) void svmla_za32_vg1x4(uint32_t, svfloat32x4_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_bf16_vg2x2))) void svmla_za32_vg2x2(uint32_t, svbfloat16x2_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_f16_vg2x2))) void svmla_za32_vg2x2(uint32_t, svfloat16x2_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_s16_vg2x2))) void svmla_za32_vg2x2(uint32_t, svint16x2_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_u16_vg2x2))) void svmla_za32_vg2x2(uint32_t, svuint16x2_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_bf16_vg2x4))) void svmla_za32_vg2x4(uint32_t, svbfloat16x4_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_f16_vg2x4))) void svmla_za32_vg2x4(uint32_t, svfloat16x4_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_s16_vg2x4))) void svmla_za32_vg2x4(uint32_t, svint16x4_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_u16_vg2x4))) void svmla_za32_vg2x4(uint32_t, svuint16x4_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_s8_vg4x2))) void svmla_za32_vg4x2(uint32_t, svint8x2_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_u8_vg4x2))) void svmla_za32_vg4x2(uint32_t, svuint8x2_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_s8_vg4x4))) void svmla_za32_vg4x4(uint32_t, svint8x4_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_u8_vg4x4))) void svmla_za32_vg4x4(uint32_t, svuint8x4_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_f32_vg1x2))) void svmla_lane_za32_vg1x2(uint32_t, svfloat32x2_t, svfloat32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_f32_vg1x4))) void svmla_lane_za32_vg1x4(uint32_t, svfloat32x4_t, svfloat32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_bf16_vg2x1))) void svmla_lane_za32_vg2x1(uint32_t, svbfloat16_t, svbfloat16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_f16_vg2x1))) void svmla_lane_za32_vg2x1(uint32_t, svfloat16_t, svfloat16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_s16_vg2x1))) void svmla_lane_za32_vg2x1(uint32_t, svint16_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_u16_vg2x1))) void svmla_lane_za32_vg2x1(uint32_t, svuint16_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_bf16_vg2x2))) void svmla_lane_za32_vg2x2(uint32_t, svbfloat16x2_t, svbfloat16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_f16_vg2x2))) void svmla_lane_za32_vg2x2(uint32_t, svfloat16x2_t, svfloat16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_s16_vg2x2))) void svmla_lane_za32_vg2x2(uint32_t, svint16x2_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_u16_vg2x2))) void svmla_lane_za32_vg2x2(uint32_t, svuint16x2_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_bf16_vg2x4))) void svmla_lane_za32_vg2x4(uint32_t, svbfloat16x4_t, svbfloat16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_f16_vg2x4))) void svmla_lane_za32_vg2x4(uint32_t, svfloat16x4_t, svfloat16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_s16_vg2x4))) void svmla_lane_za32_vg2x4(uint32_t, svint16x4_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_u16_vg2x4))) void svmla_lane_za32_vg2x4(uint32_t, svuint16x4_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_s8_vg4x1))) void svmla_lane_za32_vg4x1(uint32_t, svint8_t, svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_u8_vg4x1))) void svmla_lane_za32_vg4x1(uint32_t, svuint8_t, svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_s8_vg4x2))) void svmla_lane_za32_vg4x2(uint32_t, svint8x2_t, svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_u8_vg4x2))) void svmla_lane_za32_vg4x2(uint32_t, svuint8x2_t, svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_s8_vg4x4))) void svmla_lane_za32_vg4x4(uint32_t, svint8x4_t, svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_u8_vg4x4))) void svmla_lane_za32_vg4x4(uint32_t, svuint8x4_t, svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_f32_vg1x2))) void svmla_za32_vg1x2(uint32_t, svfloat32x2_t, svfloat32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_f32_vg1x4))) void svmla_za32_vg1x4(uint32_t, svfloat32x4_t, svfloat32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_bf16_vg2x1))) void svmla_za32_vg2x1(uint32_t, svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_f16_vg2x1))) void svmla_za32_vg2x1(uint32_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_s16_vg2x1))) void svmla_za32_vg2x1(uint32_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_u16_vg2x1))) void svmla_za32_vg2x1(uint32_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_bf16_vg2x2))) void svmla_za32_vg2x2(uint32_t, svbfloat16x2_t, svbfloat16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_f16_vg2x2))) void svmla_za32_vg2x2(uint32_t, svfloat16x2_t, svfloat16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_s16_vg2x2))) void svmla_za32_vg2x2(uint32_t, svint16x2_t, svint16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_u16_vg2x2))) void svmla_za32_vg2x2(uint32_t, svuint16x2_t, svuint16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_bf16_vg2x4))) void svmla_za32_vg2x4(uint32_t, svbfloat16x4_t, svbfloat16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_f16_vg2x4))) void svmla_za32_vg2x4(uint32_t, svfloat16x4_t, svfloat16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_s16_vg2x4))) void svmla_za32_vg2x4(uint32_t, svint16x4_t, svint16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_u16_vg2x4))) void svmla_za32_vg2x4(uint32_t, svuint16x4_t, svuint16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_s8_vg4x1))) void svmla_za32_vg4x1(uint32_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_u8_vg4x1))) void svmla_za32_vg4x1(uint32_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_s8_vg4x2))) void svmla_za32_vg4x2(uint32_t, svint8x2_t, svint8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_u8_vg4x2))) void svmla_za32_vg4x2(uint32_t, svuint8x2_t, svuint8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_s8_vg4x4))) void svmla_za32_vg4x4(uint32_t, svint8x4_t, svint8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_u8_vg4x4))) void svmla_za32_vg4x4(uint32_t, svuint8x4_t, svuint8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_f32_vg1x2))) void svmls_za32_vg1x2(uint32_t, svfloat32x2_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_f32_vg1x4))) void svmls_za32_vg1x4(uint32_t, svfloat32x4_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_bf16_vg2x2))) void svmls_za32_vg2x2(uint32_t, svbfloat16x2_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_f16_vg2x2))) void svmls_za32_vg2x2(uint32_t, svfloat16x2_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_s16_vg2x2))) void svmls_za32_vg2x2(uint32_t, svint16x2_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_u16_vg2x2))) void svmls_za32_vg2x2(uint32_t, svuint16x2_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_bf16_vg2x4))) void svmls_za32_vg2x4(uint32_t, svbfloat16x4_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_f16_vg2x4))) void svmls_za32_vg2x4(uint32_t, svfloat16x4_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_s16_vg2x4))) void svmls_za32_vg2x4(uint32_t, svint16x4_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_u16_vg2x4))) void svmls_za32_vg2x4(uint32_t, svuint16x4_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_s8_vg4x2))) void svmls_za32_vg4x2(uint32_t, svint8x2_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_u8_vg4x2))) void svmls_za32_vg4x2(uint32_t, svuint8x2_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_s8_vg4x4))) void svmls_za32_vg4x4(uint32_t, svint8x4_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_u8_vg4x4))) void svmls_za32_vg4x4(uint32_t, svuint8x4_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_f32_vg1x2))) void svmls_lane_za32_vg1x2(uint32_t, svfloat32x2_t, svfloat32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_f32_vg1x4))) void svmls_lane_za32_vg1x4(uint32_t, svfloat32x4_t, svfloat32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_bf16_vg2x1))) void svmls_lane_za32_vg2x1(uint32_t, svbfloat16_t, svbfloat16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_f16_vg2x1))) void svmls_lane_za32_vg2x1(uint32_t, svfloat16_t, svfloat16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_s16_vg2x1))) void svmls_lane_za32_vg2x1(uint32_t, svint16_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_u16_vg2x1))) void svmls_lane_za32_vg2x1(uint32_t, svuint16_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_bf16_vg2x2))) void svmls_lane_za32_vg2x2(uint32_t, svbfloat16x2_t, svbfloat16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_f16_vg2x2))) void svmls_lane_za32_vg2x2(uint32_t, svfloat16x2_t, svfloat16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_s16_vg2x2))) void svmls_lane_za32_vg2x2(uint32_t, svint16x2_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_u16_vg2x2))) void svmls_lane_za32_vg2x2(uint32_t, svuint16x2_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_bf16_vg2x4))) void svmls_lane_za32_vg2x4(uint32_t, svbfloat16x4_t, svbfloat16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_f16_vg2x4))) void svmls_lane_za32_vg2x4(uint32_t, svfloat16x4_t, svfloat16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_s16_vg2x4))) void svmls_lane_za32_vg2x4(uint32_t, svint16x4_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_u16_vg2x4))) void svmls_lane_za32_vg2x4(uint32_t, svuint16x4_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_s8_vg4x1))) void svmls_lane_za32_vg4x1(uint32_t, svint8_t, svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_u8_vg4x1))) void svmls_lane_za32_vg4x1(uint32_t, svuint8_t, svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_s8_vg4x2))) void svmls_lane_za32_vg4x2(uint32_t, svint8x2_t, svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_u8_vg4x2))) void svmls_lane_za32_vg4x2(uint32_t, svuint8x2_t, svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_s8_vg4x4))) void svmls_lane_za32_vg4x4(uint32_t, svint8x4_t, svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_u8_vg4x4))) void svmls_lane_za32_vg4x4(uint32_t, svuint8x4_t, svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_f32_vg1x2))) void svmls_za32_vg1x2(uint32_t, svfloat32x2_t, svfloat32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_f32_vg1x4))) void svmls_za32_vg1x4(uint32_t, svfloat32x4_t, svfloat32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_bf16_vg2x1))) void svmls_za32_vg2x1(uint32_t, svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_f16_vg2x1))) void svmls_za32_vg2x1(uint32_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_s16_vg2x1))) void svmls_za32_vg2x1(uint32_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_u16_vg2x1))) void svmls_za32_vg2x1(uint32_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_bf16_vg2x2))) void svmls_za32_vg2x2(uint32_t, svbfloat16x2_t, svbfloat16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_f16_vg2x2))) void svmls_za32_vg2x2(uint32_t, svfloat16x2_t, svfloat16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_s16_vg2x2))) void svmls_za32_vg2x2(uint32_t, svint16x2_t, svint16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_u16_vg2x2))) void svmls_za32_vg2x2(uint32_t, svuint16x2_t, svuint16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_bf16_vg2x4))) void svmls_za32_vg2x4(uint32_t, svbfloat16x4_t, svbfloat16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_f16_vg2x4))) void svmls_za32_vg2x4(uint32_t, svfloat16x4_t, svfloat16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_s16_vg2x4))) void svmls_za32_vg2x4(uint32_t, svint16x4_t, svint16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_u16_vg2x4))) void svmls_za32_vg2x4(uint32_t, svuint16x4_t, svuint16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_s8_vg4x1))) void svmls_za32_vg4x1(uint32_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_u8_vg4x1))) void svmls_za32_vg4x1(uint32_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_s8_vg4x2))) void svmls_za32_vg4x2(uint32_t, svint8x2_t, svint8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_u8_vg4x2))) void svmls_za32_vg4x2(uint32_t, svuint8x2_t, svuint8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_s8_vg4x4))) void svmls_za32_vg4x4(uint32_t, svint8x4_t, svint8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_u8_vg4x4))) void svmls_za32_vg4x4(uint32_t, svuint8x4_t, svuint8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_s16_m))) void svmopa_za32_m(uint64_t, svbool_t, svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_u16_m))) void svmopa_za32_m(uint64_t, svbool_t, svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_s16_m))) void svmops_za32_m(uint64_t, svbool_t, svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_u16_m))) void svmops_za32_m(uint64_t, svbool_t, svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_single_za32_u32_vg1x2))) void svsub_write_za32_vg1x2(uint32_t, svuint32x2_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_single_za32_s32_vg1x2))) void svsub_write_za32_vg1x2(uint32_t, svint32x2_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_single_za32_u32_vg1x4))) void svsub_write_za32_vg1x4(uint32_t, svuint32x4_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_single_za32_s32_vg1x4))) void svsub_write_za32_vg1x4(uint32_t, svint32x4_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_za32_u32_vg1x2))) void svsub_write_za32_vg1x2(uint32_t, svuint32x2_t, svuint32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_za32_s32_vg1x2))) void svsub_write_za32_vg1x2(uint32_t, svint32x2_t, svint32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_za32_u32_vg1x4))) void svsub_write_za32_vg1x4(uint32_t, svuint32x4_t, svuint32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_za32_s32_vg1x4))) void svsub_write_za32_vg1x4(uint32_t, svint32x4_t, svint32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za32_u32_vg1x2))) void svsub_za32_vg1x2(uint32_t, svuint32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za32_f32_vg1x2))) void svsub_za32_vg1x2(uint32_t, svfloat32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za32_s32_vg1x2))) void svsub_za32_vg1x2(uint32_t, svint32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za32_u32_vg1x4))) void svsub_za32_vg1x4(uint32_t, svuint32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za32_f32_vg1x4))) void svsub_za32_vg1x4(uint32_t, svfloat32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za32_s32_vg1x4))) void svsub_za32_vg1x4(uint32_t, svint32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsudot_single_za32_s8_vg1x2))) void svsudot_za32_vg1x2(uint32_t, svint8x2_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsudot_single_za32_s8_vg1x4))) void svsudot_za32_vg1x4(uint32_t, svint8x4_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsudot_lane_za32_s8_vg1x2))) void svsudot_lane_za32_vg1x2(uint32_t, svint8x2_t, svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsudot_lane_za32_s8_vg1x4))) void svsudot_lane_za32_vg1x4(uint32_t, svint8x4_t, svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsudot_za32_s8_vg1x2))) void svsudot_za32_vg1x2(uint32_t, svint8x2_t, svuint8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsudot_za32_s8_vg1x4))) void svsudot_za32_vg1x4(uint32_t, svint8x4_t, svuint8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumla_single_za32_s8_vg4x2))) void svsumla_za32_vg4x2(uint32_t, svint8x2_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumla_single_za32_s8_vg4x4))) void svsumla_za32_vg4x4(uint32_t, svint8x4_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumla_lane_za32_s8_vg4x1))) void svsumla_lane_za32_vg4x1(uint32_t, svint8_t, svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumla_lane_za32_s8_vg4x2))) void svsumla_lane_za32_vg4x2(uint32_t, svint8x2_t, svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumla_lane_za32_s8_vg4x4))) void svsumla_lane_za32_vg4x4(uint32_t, svint8x4_t, svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumla_za32_s8_vg4x1))) void svsumla_za32_vg4x1(uint32_t, svint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumla_za32_s8_vg4x2))) void svsumla_za32_vg4x2(uint32_t, svint8x2_t, svuint8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumla_za32_s8_vg4x4))) void svsumla_za32_vg4x4(uint32_t, svint8x4_t, svuint8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsuvdot_lane_za32_s8_vg1x4))) void svsuvdot_lane_za32_vg1x4(uint32_t, svint8x4_t, svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusdot_single_za32_u8_vg1x2))) void svusdot_za32_vg1x2(uint32_t, svuint8x2_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusdot_single_za32_u8_vg1x4))) void svusdot_za32_vg1x4(uint32_t, svuint8x4_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusdot_lane_za32_u8_vg1x2))) void svusdot_lane_za32_vg1x2(uint32_t, svuint8x2_t, svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusdot_lane_za32_u8_vg1x4))) void svusdot_lane_za32_vg1x4(uint32_t, svuint8x4_t, svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusdot_za32_u8_vg1x2))) void svusdot_za32_vg1x2(uint32_t, svuint8x2_t, svint8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusdot_za32_u8_vg1x4))) void svusdot_za32_vg1x4(uint32_t, svuint8x4_t, svint8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmla_single_za32_u8_vg4x2))) void svusmla_za32_vg4x2(uint32_t, svuint8x2_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmla_single_za32_u8_vg4x4))) void svusmla_za32_vg4x4(uint32_t, svuint8x4_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmla_lane_za32_u8_vg4x1))) void svusmla_lane_za32_vg4x1(uint32_t, svuint8_t, svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmla_lane_za32_u8_vg4x2))) void svusmla_lane_za32_vg4x2(uint32_t, svuint8x2_t, svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmla_lane_za32_u8_vg4x4))) void svusmla_lane_za32_vg4x4(uint32_t, svuint8x4_t, svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmla_za32_u8_vg4x1))) void svusmla_za32_vg4x1(uint32_t, svuint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmla_za32_u8_vg4x2))) void svusmla_za32_vg4x2(uint32_t, svuint8x2_t, svint8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmla_za32_u8_vg4x4))) void svusmla_za32_vg4x4(uint32_t, svuint8x4_t, svint8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusvdot_lane_za32_u8_vg1x4))) void svusvdot_lane_za32_vg1x4(uint32_t, svuint8x4_t, svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svvdot_lane_za32_bf16_vg1x2))) void svvdot_lane_za32_vg1x2(uint32_t, svbfloat16x2_t, svbfloat16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svvdot_lane_za32_f16_vg1x2))) void svvdot_lane_za32_vg1x2(uint32_t, svfloat16x2_t, svfloat16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svvdot_lane_za32_s16_vg1x2))) void svvdot_lane_za32_vg1x2(uint32_t, svint16x2_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svvdot_lane_za32_u16_vg1x2))) void svvdot_lane_za32_vg1x2(uint32_t, svuint16x2_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svvdot_lane_za32_s8_vg1x4))) void svvdot_lane_za32_vg1x4(uint32_t, svint8x4_t, svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svvdot_lane_za32_u8_vg1x4))) void svvdot_lane_za32_vg1x4(uint32_t, svuint8x4_t, svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_u16_vg2))) void svwrite_hor_za16_vg2(uint64_t, uint32_t, svuint16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_bf16_vg2))) void svwrite_hor_za16_vg2(uint64_t, uint32_t, svbfloat16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_f16_vg2))) void svwrite_hor_za16_vg2(uint64_t, uint32_t, svfloat16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_s16_vg2))) void svwrite_hor_za16_vg2(uint64_t, uint32_t, svint16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_u16_vg4))) void svwrite_hor_za16_vg4(uint64_t, uint32_t, svuint16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_bf16_vg4))) void svwrite_hor_za16_vg4(uint64_t, uint32_t, svbfloat16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_f16_vg4))) void svwrite_hor_za16_vg4(uint64_t, uint32_t, svfloat16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_s16_vg4))) void svwrite_hor_za16_vg4(uint64_t, uint32_t, svint16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_u32_vg2))) void svwrite_hor_za32_vg2(uint64_t, uint32_t, svuint32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_f32_vg2))) void svwrite_hor_za32_vg2(uint64_t, uint32_t, svfloat32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_s32_vg2))) void svwrite_hor_za32_vg2(uint64_t, uint32_t, svint32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_u32_vg4))) void svwrite_hor_za32_vg4(uint64_t, uint32_t, svuint32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_f32_vg4))) void svwrite_hor_za32_vg4(uint64_t, uint32_t, svfloat32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_s32_vg4))) void svwrite_hor_za32_vg4(uint64_t, uint32_t, svint32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_u64_vg2))) void svwrite_hor_za64_vg2(uint64_t, uint32_t, svuint64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_f64_vg2))) void svwrite_hor_za64_vg2(uint64_t, uint32_t, svfloat64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_s64_vg2))) void svwrite_hor_za64_vg2(uint64_t, uint32_t, svint64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_u64_vg4))) void svwrite_hor_za64_vg4(uint64_t, uint32_t, svuint64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_f64_vg4))) void svwrite_hor_za64_vg4(uint64_t, uint32_t, svfloat64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_s64_vg4))) void svwrite_hor_za64_vg4(uint64_t, uint32_t, svint64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_u8_vg2))) void svwrite_hor_za8_vg2(uint64_t, uint32_t, svuint8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_s8_vg2))) void svwrite_hor_za8_vg2(uint64_t, uint32_t, svint8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_u8_vg4))) void svwrite_hor_za8_vg4(uint64_t, uint32_t, svuint8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_s8_vg4))) void svwrite_hor_za8_vg4(uint64_t, uint32_t, svint8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_u16_vg2))) void svwrite_ver_za16_vg2(uint64_t, uint32_t, svuint16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_bf16_vg2))) void svwrite_ver_za16_vg2(uint64_t, uint32_t, svbfloat16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_f16_vg2))) void svwrite_ver_za16_vg2(uint64_t, uint32_t, svfloat16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_s16_vg2))) void svwrite_ver_za16_vg2(uint64_t, uint32_t, svint16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_u16_vg4))) void svwrite_ver_za16_vg4(uint64_t, uint32_t, svuint16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_bf16_vg4))) void svwrite_ver_za16_vg4(uint64_t, uint32_t, svbfloat16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_f16_vg4))) void svwrite_ver_za16_vg4(uint64_t, uint32_t, svfloat16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_s16_vg4))) void svwrite_ver_za16_vg4(uint64_t, uint32_t, svint16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_u32_vg2))) void svwrite_ver_za32_vg2(uint64_t, uint32_t, svuint32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_f32_vg2))) void svwrite_ver_za32_vg2(uint64_t, uint32_t, svfloat32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_s32_vg2))) void svwrite_ver_za32_vg2(uint64_t, uint32_t, svint32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_u32_vg4))) void svwrite_ver_za32_vg4(uint64_t, uint32_t, svuint32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_f32_vg4))) void svwrite_ver_za32_vg4(uint64_t, uint32_t, svfloat32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_s32_vg4))) void svwrite_ver_za32_vg4(uint64_t, uint32_t, svint32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_u64_vg2))) void svwrite_ver_za64_vg2(uint64_t, uint32_t, svuint64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_f64_vg2))) void svwrite_ver_za64_vg2(uint64_t, uint32_t, svfloat64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_s64_vg2))) void svwrite_ver_za64_vg2(uint64_t, uint32_t, svint64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_u64_vg4))) void svwrite_ver_za64_vg4(uint64_t, uint32_t, svuint64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_f64_vg4))) void svwrite_ver_za64_vg4(uint64_t, uint32_t, svfloat64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_s64_vg4))) void svwrite_ver_za64_vg4(uint64_t, uint32_t, svint64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_u8_vg2))) void svwrite_ver_za8_vg2(uint64_t, uint32_t, svuint8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_s8_vg2))) void svwrite_ver_za8_vg2(uint64_t, uint32_t, svint8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_u8_vg4))) void svwrite_ver_za8_vg4(uint64_t, uint32_t, svuint8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_s8_vg4))) void svwrite_ver_za8_vg4(uint64_t, uint32_t, svint8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za16_u16_vg1x2))) void svwrite_za16_vg1x2(uint32_t, svuint16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za16_bf16_vg1x2))) void svwrite_za16_vg1x2(uint32_t, svbfloat16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za16_f16_vg1x2))) void svwrite_za16_vg1x2(uint32_t, svfloat16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za16_s16_vg1x2))) void svwrite_za16_vg1x2(uint32_t, svint16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za16_u16_vg1x4))) void svwrite_za16_vg1x4(uint32_t, svuint16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za16_bf16_vg1x4))) void svwrite_za16_vg1x4(uint32_t, svbfloat16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za16_f16_vg1x4))) void svwrite_za16_vg1x4(uint32_t, svfloat16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za16_s16_vg1x4))) void svwrite_za16_vg1x4(uint32_t, svint16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za32_u32_vg1x2))) void svwrite_za32_vg1x2(uint32_t, svuint32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za32_f32_vg1x2))) void svwrite_za32_vg1x2(uint32_t, svfloat32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za32_s32_vg1x2))) void svwrite_za32_vg1x2(uint32_t, svint32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za32_u32_vg1x4))) void svwrite_za32_vg1x4(uint32_t, svuint32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za32_f32_vg1x4))) void svwrite_za32_vg1x4(uint32_t, svfloat32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za32_s32_vg1x4))) void svwrite_za32_vg1x4(uint32_t, svint32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za64_u64_vg1x2))) void svwrite_za64_vg1x2(uint32_t, svuint64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za64_f64_vg1x2))) void svwrite_za64_vg1x2(uint32_t, svfloat64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za64_s64_vg1x2))) void svwrite_za64_vg1x2(uint32_t, svint64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za64_u64_vg1x4))) void svwrite_za64_vg1x4(uint32_t, svuint64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za64_f64_vg1x4))) void svwrite_za64_vg1x4(uint32_t, svfloat64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za64_s64_vg1x4))) void svwrite_za64_vg1x4(uint32_t, svint64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za8_u8_vg1x2))) void svwrite_za8_vg1x2(uint32_t, svuint8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za8_s8_vg1x2))) void svwrite_za8_vg1x2(uint32_t, svint8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za8_u8_vg1x4))) void svwrite_za8_vg1x4(uint32_t, svuint8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za8_s8_vg1x4))) void svwrite_za8_vg1x4(uint32_t, svint8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za64_f64_vg1x2))) void svadd_za64_f64_vg1x2(uint32_t, svfloat64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za64_f64_vg1x4))) void svadd_za64_f64_vg1x4(uint32_t, svfloat64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za64_f64_vg1x2))) void svmla_single_za64_f64_vg1x2(uint32_t, svfloat64x2_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za64_f64_vg1x4))) void svmla_single_za64_f64_vg1x4(uint32_t, svfloat64x4_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za64_f64_vg1x2))) void svmla_lane_za64_f64_vg1x2(uint32_t, svfloat64x2_t, svfloat64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za64_f64_vg1x4))) void svmla_lane_za64_f64_vg1x4(uint32_t, svfloat64x4_t, svfloat64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za64_f64_vg1x2))) void svmla_za64_f64_vg1x2(uint32_t, svfloat64x2_t, svfloat64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za64_f64_vg1x4))) void svmla_za64_f64_vg1x4(uint32_t, svfloat64x4_t, svfloat64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za64_f64_vg1x2))) void svmls_single_za64_f64_vg1x2(uint32_t, svfloat64x2_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za64_f64_vg1x4))) void svmls_single_za64_f64_vg1x4(uint32_t, svfloat64x4_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za64_f64_vg1x2))) void svmls_lane_za64_f64_vg1x2(uint32_t, svfloat64x2_t, svfloat64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za64_f64_vg1x4))) void svmls_lane_za64_f64_vg1x4(uint32_t, svfloat64x4_t, svfloat64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za64_f64_vg1x2))) void svmls_za64_f64_vg1x2(uint32_t, svfloat64x2_t, svfloat64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za64_f64_vg1x4))) void svmls_za64_f64_vg1x4(uint32_t, svfloat64x4_t, svfloat64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za64_f64_vg1x2))) void svsub_za64_f64_vg1x2(uint32_t, svfloat64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za64_f64_vg1x4))) void svsub_za64_f64_vg1x4(uint32_t, svfloat64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za64_f64_vg1x2))) void svadd_za64_vg1x2(uint32_t, svfloat64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za64_f64_vg1x4))) void svadd_za64_vg1x4(uint32_t, svfloat64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za64_f64_vg1x2))) void svmla_za64_vg1x2(uint32_t, svfloat64x2_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za64_f64_vg1x4))) void svmla_za64_vg1x4(uint32_t, svfloat64x4_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za64_f64_vg1x2))) void svmla_lane_za64_vg1x2(uint32_t, svfloat64x2_t, svfloat64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za64_f64_vg1x4))) void svmla_lane_za64_vg1x4(uint32_t, svfloat64x4_t, svfloat64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za64_f64_vg1x2))) void svmla_za64_vg1x2(uint32_t, svfloat64x2_t, svfloat64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za64_f64_vg1x4))) void svmla_za64_vg1x4(uint32_t, svfloat64x4_t, svfloat64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za64_f64_vg1x2))) void svmls_za64_vg1x2(uint32_t, svfloat64x2_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za64_f64_vg1x4))) void svmls_za64_vg1x4(uint32_t, svfloat64x4_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za64_f64_vg1x2))) void svmls_lane_za64_vg1x2(uint32_t, svfloat64x2_t, svfloat64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za64_f64_vg1x4))) void svmls_lane_za64_vg1x4(uint32_t, svfloat64x4_t, svfloat64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za64_f64_vg1x2))) void svmls_za64_vg1x2(uint32_t, svfloat64x2_t, svfloat64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za64_f64_vg1x4))) void svmls_za64_vg1x4(uint32_t, svfloat64x4_t, svfloat64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za64_f64_vg1x2))) void svsub_za64_vg1x2(uint32_t, svfloat64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za64_f64_vg1x4))) void svsub_za64_vg1x4(uint32_t, svfloat64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_single_za64_u64_vg1x2))) void svadd_write_single_za64_u64_vg1x2(uint32_t, svuint64x2_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_single_za64_s64_vg1x2))) void svadd_write_single_za64_s64_vg1x2(uint32_t, svint64x2_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_single_za64_u64_vg1x4))) void svadd_write_single_za64_u64_vg1x4(uint32_t, svuint64x4_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_single_za64_s64_vg1x4))) void svadd_write_single_za64_s64_vg1x4(uint32_t, svint64x4_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_za64_u64_vg1x2))) void svadd_write_za64_u64_vg1x2(uint32_t, svuint64x2_t, svuint64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_za64_s64_vg1x2))) void svadd_write_za64_s64_vg1x2(uint32_t, svint64x2_t, svint64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_za64_u64_vg1x4))) void svadd_write_za64_u64_vg1x4(uint32_t, svuint64x4_t, svuint64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_za64_s64_vg1x4))) void svadd_write_za64_s64_vg1x4(uint32_t, svint64x4_t, svint64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za64_u64_vg1x2))) void svadd_za64_u64_vg1x2(uint32_t, svuint64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za64_s64_vg1x2))) void svadd_za64_s64_vg1x2(uint32_t, svint64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za64_u64_vg1x4))) void svadd_za64_u64_vg1x4(uint32_t, svuint64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za64_s64_vg1x4))) void svadd_za64_s64_vg1x4(uint32_t, svint64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za64_s16_vg1x2))) void svdot_single_za64_s16_vg1x2(uint32_t, svint16x2_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za64_u16_vg1x2))) void svdot_single_za64_u16_vg1x2(uint32_t, svuint16x2_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za64_s16_vg1x4))) void svdot_single_za64_s16_vg1x4(uint32_t, svint16x4_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za64_u16_vg1x4))) void svdot_single_za64_u16_vg1x4(uint32_t, svuint16x4_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za64_s16_vg1x2))) void svdot_lane_za64_s16_vg1x2(uint32_t, svint16x2_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za64_u16_vg1x2))) void svdot_lane_za64_u16_vg1x2(uint32_t, svuint16x2_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za64_s16_vg1x4))) void svdot_lane_za64_s16_vg1x4(uint32_t, svint16x4_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za64_u16_vg1x4))) void svdot_lane_za64_u16_vg1x4(uint32_t, svuint16x4_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za64_s16_vg1x2))) void svdot_za64_s16_vg1x2(uint32_t, svint16x2_t, svint16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za64_u16_vg1x2))) void svdot_za64_u16_vg1x2(uint32_t, svuint16x2_t, svuint16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za64_s16_vg1x4))) void svdot_za64_s16_vg1x4(uint32_t, svint16x4_t, svint16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za64_u16_vg1x4))) void svdot_za64_u16_vg1x4(uint32_t, svuint16x4_t, svuint16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za64_s16_vg4x2))) void svmla_single_za64_s16_vg4x2(uint32_t, svint16x2_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za64_u16_vg4x2))) void svmla_single_za64_u16_vg4x2(uint32_t, svuint16x2_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za64_s16_vg4x4))) void svmla_single_za64_s16_vg4x4(uint32_t, svint16x4_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za64_u16_vg4x4))) void svmla_single_za64_u16_vg4x4(uint32_t, svuint16x4_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za64_s16_vg4x1))) void svmla_lane_za64_s16_vg4x1(uint32_t, svint16_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za64_u16_vg4x1))) void svmla_lane_za64_u16_vg4x1(uint32_t, svuint16_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za64_s16_vg4x2))) void svmla_lane_za64_s16_vg4x2(uint32_t, svint16x2_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za64_u16_vg4x2))) void svmla_lane_za64_u16_vg4x2(uint32_t, svuint16x2_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za64_s16_vg4x4))) void svmla_lane_za64_s16_vg4x4(uint32_t, svint16x4_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za64_u16_vg4x4))) void svmla_lane_za64_u16_vg4x4(uint32_t, svuint16x4_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za64_s16_vg4x1))) void svmla_za64_s16_vg4x1(uint32_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za64_u16_vg4x1))) void svmla_za64_u16_vg4x1(uint32_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za64_s16_vg4x2))) void svmla_za64_s16_vg4x2(uint32_t, svint16x2_t, svint16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za64_u16_vg4x2))) void svmla_za64_u16_vg4x2(uint32_t, svuint16x2_t, svuint16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za64_s16_vg4x4))) void svmla_za64_s16_vg4x4(uint32_t, svint16x4_t, svint16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za64_u16_vg4x4))) void svmla_za64_u16_vg4x4(uint32_t, svuint16x4_t, svuint16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za64_s16_vg4x2))) void svmls_single_za64_s16_vg4x2(uint32_t, svint16x2_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za64_u16_vg4x2))) void svmls_single_za64_u16_vg4x2(uint32_t, svuint16x2_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za64_s16_vg4x4))) void svmls_single_za64_s16_vg4x4(uint32_t, svint16x4_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za64_u16_vg4x4))) void svmls_single_za64_u16_vg4x4(uint32_t, svuint16x4_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za64_s16_vg4x1))) void svmls_lane_za64_s16_vg4x1(uint32_t, svint16_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za64_u16_vg4x1))) void svmls_lane_za64_u16_vg4x1(uint32_t, svuint16_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za64_s16_vg4x2))) void svmls_lane_za64_s16_vg4x2(uint32_t, svint16x2_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za64_u16_vg4x2))) void svmls_lane_za64_u16_vg4x2(uint32_t, svuint16x2_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za64_s16_vg4x4))) void svmls_lane_za64_s16_vg4x4(uint32_t, svint16x4_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za64_u16_vg4x4))) void svmls_lane_za64_u16_vg4x4(uint32_t, svuint16x4_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za64_s16_vg4x1))) void svmls_za64_s16_vg4x1(uint32_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za64_u16_vg4x1))) void svmls_za64_u16_vg4x1(uint32_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za64_s16_vg4x2))) void svmls_za64_s16_vg4x2(uint32_t, svint16x2_t, svint16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za64_u16_vg4x2))) void svmls_za64_u16_vg4x2(uint32_t, svuint16x2_t, svuint16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za64_s16_vg4x4))) void svmls_za64_s16_vg4x4(uint32_t, svint16x4_t, svint16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za64_u16_vg4x4))) void svmls_za64_u16_vg4x4(uint32_t, svuint16x4_t, svuint16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_single_za64_u64_vg1x2))) void svsub_write_single_za64_u64_vg1x2(uint32_t, svuint64x2_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_single_za64_s64_vg1x2))) void svsub_write_single_za64_s64_vg1x2(uint32_t, svint64x2_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_single_za64_u64_vg1x4))) void svsub_write_single_za64_u64_vg1x4(uint32_t, svuint64x4_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_single_za64_s64_vg1x4))) void svsub_write_single_za64_s64_vg1x4(uint32_t, svint64x4_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_za64_u64_vg1x2))) void svsub_write_za64_u64_vg1x2(uint32_t, svuint64x2_t, svuint64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_za64_s64_vg1x2))) void svsub_write_za64_s64_vg1x2(uint32_t, svint64x2_t, svint64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_za64_u64_vg1x4))) void svsub_write_za64_u64_vg1x4(uint32_t, svuint64x4_t, svuint64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_za64_s64_vg1x4))) void svsub_write_za64_s64_vg1x4(uint32_t, svint64x4_t, svint64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za64_u64_vg1x2))) void svsub_za64_u64_vg1x2(uint32_t, svuint64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za64_s64_vg1x2))) void svsub_za64_s64_vg1x2(uint32_t, svint64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za64_u64_vg1x4))) void svsub_za64_u64_vg1x4(uint32_t, svuint64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za64_s64_vg1x4))) void svsub_za64_s64_vg1x4(uint32_t, svint64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svvdot_lane_za64_s16_vg1x4))) void svvdot_lane_za64_s16_vg1x4(uint32_t, svint16x4_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svvdot_lane_za64_u16_vg1x4))) void svvdot_lane_za64_u16_vg1x4(uint32_t, svuint16x4_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_single_za64_u64_vg1x2))) void svadd_write_za64_vg1x2(uint32_t, svuint64x2_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_single_za64_s64_vg1x2))) void svadd_write_za64_vg1x2(uint32_t, svint64x2_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_single_za64_u64_vg1x4))) void svadd_write_za64_vg1x4(uint32_t, svuint64x4_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_single_za64_s64_vg1x4))) void svadd_write_za64_vg1x4(uint32_t, svint64x4_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_za64_u64_vg1x2))) void svadd_write_za64_vg1x2(uint32_t, svuint64x2_t, svuint64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_za64_s64_vg1x2))) void svadd_write_za64_vg1x2(uint32_t, svint64x2_t, svint64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_za64_u64_vg1x4))) void svadd_write_za64_vg1x4(uint32_t, svuint64x4_t, svuint64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_za64_s64_vg1x4))) void svadd_write_za64_vg1x4(uint32_t, svint64x4_t, svint64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za64_u64_vg1x2))) void svadd_za64_vg1x2(uint32_t, svuint64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za64_s64_vg1x2))) void svadd_za64_vg1x2(uint32_t, svint64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za64_u64_vg1x4))) void svadd_za64_vg1x4(uint32_t, svuint64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za64_s64_vg1x4))) void svadd_za64_vg1x4(uint32_t, svint64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za64_s16_vg1x2))) void svdot_za64_vg1x2(uint32_t, svint16x2_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za64_u16_vg1x2))) void svdot_za64_vg1x2(uint32_t, svuint16x2_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za64_s16_vg1x4))) void svdot_za64_vg1x4(uint32_t, svint16x4_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za64_u16_vg1x4))) void svdot_za64_vg1x4(uint32_t, svuint16x4_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za64_s16_vg1x2))) void svdot_lane_za64_vg1x2(uint32_t, svint16x2_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za64_u16_vg1x2))) void svdot_lane_za64_vg1x2(uint32_t, svuint16x2_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za64_s16_vg1x4))) void svdot_lane_za64_vg1x4(uint32_t, svint16x4_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za64_u16_vg1x4))) void svdot_lane_za64_vg1x4(uint32_t, svuint16x4_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za64_s16_vg1x2))) void svdot_za64_vg1x2(uint32_t, svint16x2_t, svint16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za64_u16_vg1x2))) void svdot_za64_vg1x2(uint32_t, svuint16x2_t, svuint16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za64_s16_vg1x4))) void svdot_za64_vg1x4(uint32_t, svint16x4_t, svint16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za64_u16_vg1x4))) void svdot_za64_vg1x4(uint32_t, svuint16x4_t, svuint16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za64_s16_vg4x2))) void svmla_za64_vg4x2(uint32_t, svint16x2_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za64_u16_vg4x2))) void svmla_za64_vg4x2(uint32_t, svuint16x2_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za64_s16_vg4x4))) void svmla_za64_vg4x4(uint32_t, svint16x4_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za64_u16_vg4x4))) void svmla_za64_vg4x4(uint32_t, svuint16x4_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za64_s16_vg4x1))) void svmla_lane_za64_vg4x1(uint32_t, svint16_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za64_u16_vg4x1))) void svmla_lane_za64_vg4x1(uint32_t, svuint16_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za64_s16_vg4x2))) void svmla_lane_za64_vg4x2(uint32_t, svint16x2_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za64_u16_vg4x2))) void svmla_lane_za64_vg4x2(uint32_t, svuint16x2_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za64_s16_vg4x4))) void svmla_lane_za64_vg4x4(uint32_t, svint16x4_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za64_u16_vg4x4))) void svmla_lane_za64_vg4x4(uint32_t, svuint16x4_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za64_s16_vg4x1))) void svmla_za64_vg4x1(uint32_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za64_u16_vg4x1))) void svmla_za64_vg4x1(uint32_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za64_s16_vg4x2))) void svmla_za64_vg4x2(uint32_t, svint16x2_t, svint16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za64_u16_vg4x2))) void svmla_za64_vg4x2(uint32_t, svuint16x2_t, svuint16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za64_s16_vg4x4))) void svmla_za64_vg4x4(uint32_t, svint16x4_t, svint16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za64_u16_vg4x4))) void svmla_za64_vg4x4(uint32_t, svuint16x4_t, svuint16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za64_s16_vg4x2))) void svmls_za64_vg4x2(uint32_t, svint16x2_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za64_u16_vg4x2))) void svmls_za64_vg4x2(uint32_t, svuint16x2_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za64_s16_vg4x4))) void svmls_za64_vg4x4(uint32_t, svint16x4_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za64_u16_vg4x4))) void svmls_za64_vg4x4(uint32_t, svuint16x4_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za64_s16_vg4x1))) void svmls_lane_za64_vg4x1(uint32_t, svint16_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za64_u16_vg4x1))) void svmls_lane_za64_vg4x1(uint32_t, svuint16_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za64_s16_vg4x2))) void svmls_lane_za64_vg4x2(uint32_t, svint16x2_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za64_u16_vg4x2))) void svmls_lane_za64_vg4x2(uint32_t, svuint16x2_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za64_s16_vg4x4))) void svmls_lane_za64_vg4x4(uint32_t, svint16x4_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za64_u16_vg4x4))) void svmls_lane_za64_vg4x4(uint32_t, svuint16x4_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za64_s16_vg4x1))) void svmls_za64_vg4x1(uint32_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za64_u16_vg4x1))) void svmls_za64_vg4x1(uint32_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za64_s16_vg4x2))) void svmls_za64_vg4x2(uint32_t, svint16x2_t, svint16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za64_u16_vg4x2))) void svmls_za64_vg4x2(uint32_t, svuint16x2_t, svuint16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za64_s16_vg4x4))) void svmls_za64_vg4x4(uint32_t, svint16x4_t, svint16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za64_u16_vg4x4))) void svmls_za64_vg4x4(uint32_t, svuint16x4_t, svuint16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_single_za64_u64_vg1x2))) void svsub_write_za64_vg1x2(uint32_t, svuint64x2_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_single_za64_s64_vg1x2))) void svsub_write_za64_vg1x2(uint32_t, svint64x2_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_single_za64_u64_vg1x4))) void svsub_write_za64_vg1x4(uint32_t, svuint64x4_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_single_za64_s64_vg1x4))) void svsub_write_za64_vg1x4(uint32_t, svint64x4_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_za64_u64_vg1x2))) void svsub_write_za64_vg1x2(uint32_t, svuint64x2_t, svuint64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_za64_s64_vg1x2))) void svsub_write_za64_vg1x2(uint32_t, svint64x2_t, svint64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_za64_u64_vg1x4))) void svsub_write_za64_vg1x4(uint32_t, svuint64x4_t, svuint64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_za64_s64_vg1x4))) void svsub_write_za64_vg1x4(uint32_t, svint64x4_t, svint64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za64_u64_vg1x2))) void svsub_za64_vg1x2(uint32_t, svuint64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za64_s64_vg1x2))) void svsub_za64_vg1x2(uint32_t, svint64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za64_u64_vg1x4))) void svsub_za64_vg1x4(uint32_t, svuint64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za64_s64_vg1x4))) void svsub_za64_vg1x4(uint32_t, svint64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svvdot_lane_za64_s16_vg1x4))) void svvdot_lane_za64_vg1x4(uint32_t, svint16x4_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svvdot_lane_za64_u16_vg1x4))) void svvdot_lane_za64_vg1x4(uint32_t, svuint16x4_t, svuint16_t, uint64_t); #ifdef __cplusplus } // extern "C" #endif #undef __ai #endif /* __ARM_SME_H */ avx512bf16intrin.havx512dqintrin.h/*===---- avx512fintrin.h - AVX512F intrinsics -----------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __AVX512FINTRIN_H #define __AVX512FINTRIN_H typedef char __v64qi __attribute__((__vector_size__(64))); typedef short __v32hi __attribute__((__vector_size__(64))); typedef double __v8df __attribute__((__vector_size__(64))); typedef float __v16sf __attribute__((__vector_size__(64))); typedef long long __v8di __attribute__((__vector_size__(64))); typedef int __v16si __attribute__((__vector_size__(64))); /* Unsigned types */ typedef unsigned char __v64qu __attribute__((__vector_size__(64))); typedef unsigned short __v32hu __attribute__((__vector_size__(64))); typedef unsigned long long __v8du __attribute__((__vector_size__(64))); typedef unsigned int __v16su __attribute__((__vector_size__(64))); /* We need an explicitly signed variant for char. Note that this shouldn't * appear in the interface though. */ typedef signed char __v64qs __attribute__((__vector_size__(64))); typedef float __m512 __attribute__((__vector_size__(64), __aligned__(64))); typedef double __m512d __attribute__((__vector_size__(64), __aligned__(64))); typedef long long __m512i __attribute__((__vector_size__(64), __aligned__(64))); typedef float __m512_u __attribute__((__vector_size__(64), __aligned__(1))); typedef double __m512d_u __attribute__((__vector_size__(64), __aligned__(1))); typedef long long __m512i_u __attribute__((__vector_size__(64), __aligned__(1))); typedef unsigned char __mmask8; typedef unsigned short __mmask16; /* Rounding mode macros. */ #define _MM_FROUND_TO_NEAREST_INT 0x00 #define _MM_FROUND_TO_NEG_INF 0x01 #define _MM_FROUND_TO_POS_INF 0x02 #define _MM_FROUND_TO_ZERO 0x03 #define _MM_FROUND_CUR_DIRECTION 0x04 /* Constants for integer comparison predicates */ typedef enum { _MM_CMPINT_EQ, /* Equal */ _MM_CMPINT_LT, /* Less than */ _MM_CMPINT_LE, /* Less than or Equal */ _MM_CMPINT_UNUSED, _MM_CMPINT_NE, /* Not Equal */ _MM_CMPINT_NLT, /* Not Less than */ #define _MM_CMPINT_GE _MM_CMPINT_NLT /* Greater than or Equal */ _MM_CMPINT_NLE /* Not Less than or Equal */ #define _MM_CMPINT_GT _MM_CMPINT_NLE /* Greater than */ } _MM_CMPINT_ENUM; typedef enum { _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02, _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05, _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08, _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B, _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E, _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11, _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14, _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17, _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A, _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D, _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20, _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23, _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26, _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29, _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C, _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F, _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32, _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35, _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38, _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B, _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E, _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41, _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44, _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47, _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A, _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D, _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50, _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53, _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56, _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59, _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C, _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F, _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62, _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65, _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68, _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B, _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E, _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71, _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74, _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77, _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A, _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D, _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80, _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83, _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86, _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89, _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C, _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F, _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92, _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95, _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98, _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B, _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E, _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1, _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4, _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7, _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA, _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD, _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0, _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3, _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6, _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9, _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC, _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF, _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2, _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5, _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8, _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB, _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE, _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1, _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4, _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7, _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA, _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD, _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0, _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3, _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6, _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9, _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC, _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF, _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2, _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5, _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8, _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB, _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE, _MM_PERM_DDDD = 0xFF } _MM_PERM_ENUM; typedef enum { _MM_MANT_NORM_1_2, /* interval [1, 2) */ _MM_MANT_NORM_p5_2, /* interval [0.5, 2) */ _MM_MANT_NORM_p5_1, /* interval [0.5, 1) */ _MM_MANT_NORM_p75_1p5 /* interval [0.75, 1.5) */ } _MM_MANTISSA_NORM_ENUM; typedef enum { _MM_MANT_SIGN_src, /* sign = sign(SRC) */ _MM_MANT_SIGN_zero, /* sign = 0 */ _MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */ } _MM_MANTISSA_SIGN_ENUM; /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512f,evex512"), __min_vector_width__(512))) #define __DEFAULT_FN_ATTRS128 \ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512f,no-evex512"), __min_vector_width__(128))) #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512f,no-evex512"))) /* Create vectors with repeated elements */ static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_setzero_si512(void) { return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 }; } #define _mm512_setzero_epi32 _mm512_setzero_si512 static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_undefined_pd(void) { return (__m512d)__builtin_ia32_undef512(); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_undefined(void) { return (__m512)__builtin_ia32_undef512(); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_undefined_ps(void) { return (__m512)__builtin_ia32_undef512(); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_undefined_epi32(void) { return (__m512i)__builtin_ia32_undef512(); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_broadcastd_epi32 (__m128i __A) { return (__m512i)__builtin_shufflevector((__v4si) __A, (__v4si) __A, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A) { return (__m512i)__builtin_ia32_selectd_512(__M, (__v16si) _mm512_broadcastd_epi32(__A), (__v16si) __O); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A) { return (__m512i)__builtin_ia32_selectd_512(__M, (__v16si) _mm512_broadcastd_epi32(__A), (__v16si) _mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_broadcastq_epi64 (__m128i __A) { return (__m512i)__builtin_shufflevector((__v2di) __A, (__v2di) __A, 0, 0, 0, 0, 0, 0, 0, 0); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A) { return (__m512i)__builtin_ia32_selectq_512(__M, (__v8di) _mm512_broadcastq_epi64(__A), (__v8di) __O); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A) { return (__m512i)__builtin_ia32_selectq_512(__M, (__v8di) _mm512_broadcastq_epi64(__A), (__v8di) _mm512_setzero_si512()); } static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_setzero_ps(void) { return __extension__ (__m512){ 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f }; } #define _mm512_setzero _mm512_setzero_ps static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_setzero_pd(void) { return __extension__ (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; } static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_set1_ps(float __w) { return __extension__ (__m512){ __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w }; } static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_set1_pd(double __w) { return __extension__ (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w }; } static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set1_epi8(char __w) { return __extension__ (__m512i)(__v64qi){ __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w }; } static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set1_epi16(short __w) { return __extension__ (__m512i)(__v32hi){ __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w }; } static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set1_epi32(int __s) { return __extension__ (__m512i)(__v16si){ __s, __s, __s, __s, __s, __s, __s, __s, __s, __s, __s, __s, __s, __s, __s, __s }; } static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_set1_epi32(__mmask16 __M, int __A) { return (__m512i)__builtin_ia32_selectd_512(__M, (__v16si)_mm512_set1_epi32(__A), (__v16si)_mm512_setzero_si512()); } static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set1_epi64(long long __d) { return __extension__(__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d }; } static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_set1_epi64(__mmask8 __M, long long __A) { return (__m512i)__builtin_ia32_selectq_512(__M, (__v8di)_mm512_set1_epi64(__A), (__v8di)_mm512_setzero_si512()); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_broadcastss_ps(__m128 __A) { return (__m512)__builtin_shufflevector((__v4sf) __A, (__v4sf) __A, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); } static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set4_epi32 (int __A, int __B, int __C, int __D) { return __extension__ (__m512i)(__v16si) { __D, __C, __B, __A, __D, __C, __B, __A, __D, __C, __B, __A, __D, __C, __B, __A }; } static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set4_epi64 (long long __A, long long __B, long long __C, long long __D) { return __extension__ (__m512i) (__v8di) { __D, __C, __B, __A, __D, __C, __B, __A }; } static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_set4_pd (double __A, double __B, double __C, double __D) { return __extension__ (__m512d) { __D, __C, __B, __A, __D, __C, __B, __A }; } static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_set4_ps (float __A, float __B, float __C, float __D) { return __extension__ (__m512) { __D, __C, __B, __A, __D, __C, __B, __A, __D, __C, __B, __A, __D, __C, __B, __A }; } #define _mm512_setr4_epi32(e0,e1,e2,e3) \ _mm512_set4_epi32((e3),(e2),(e1),(e0)) #define _mm512_setr4_epi64(e0,e1,e2,e3) \ _mm512_set4_epi64((e3),(e2),(e1),(e0)) #define _mm512_setr4_pd(e0,e1,e2,e3) \ _mm512_set4_pd((e3),(e2),(e1),(e0)) #define _mm512_setr4_ps(e0,e1,e2,e3) \ _mm512_set4_ps((e3),(e2),(e1),(e0)) static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_broadcastsd_pd(__m128d __A) { return (__m512d)__builtin_shufflevector((__v2df) __A, (__v2df) __A, 0, 0, 0, 0, 0, 0, 0, 0); } /* Cast between vector types */ static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_castpd256_pd512(__m256d __a) { return __builtin_shufflevector(__a, __builtin_nondeterministic_value(__a), 0, 1, 2, 3, 4, 5, 6, 7); } static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_castps256_ps512(__m256 __a) { return __builtin_shufflevector(__a, __builtin_nondeterministic_value(__a), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); } static __inline __m128d __DEFAULT_FN_ATTRS512 _mm512_castpd512_pd128(__m512d __a) { return __builtin_shufflevector(__a, __a, 0, 1); } static __inline __m256d __DEFAULT_FN_ATTRS512 _mm512_castpd512_pd256 (__m512d __A) { return __builtin_shufflevector(__A, __A, 0, 1, 2, 3); } static __inline __m128 __DEFAULT_FN_ATTRS512 _mm512_castps512_ps128(__m512 __a) { return __builtin_shufflevector(__a, __a, 0, 1, 2, 3); } static __inline __m256 __DEFAULT_FN_ATTRS512 _mm512_castps512_ps256 (__m512 __A) { return __builtin_shufflevector(__A, __A, 0, 1, 2, 3, 4, 5, 6, 7); } static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_castpd_ps (__m512d __A) { return (__m512) (__A); } static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_castpd_si512 (__m512d __A) { return (__m512i) (__A); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_castpd128_pd512 (__m128d __A) { __m256d __B = __builtin_nondeterministic_value(__B); return __builtin_shufflevector( __builtin_shufflevector(__A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3), __B, 0, 1, 2, 3, 4, 5, 6, 7); } static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_castps_pd (__m512 __A) { return (__m512d) (__A); } static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_castps_si512 (__m512 __A) { return (__m512i) (__A); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_castps128_ps512 (__m128 __A) { __m256 __B = __builtin_nondeterministic_value(__B); return __builtin_shufflevector( __builtin_shufflevector(__A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3, 4, 5, 6, 7), __B, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_castsi128_si512 (__m128i __A) { __m256i __B = __builtin_nondeterministic_value(__B); return __builtin_shufflevector( __builtin_shufflevector(__A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3), __B, 0, 1, 2, 3, 4, 5, 6, 7); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_castsi256_si512 (__m256i __A) { return __builtin_shufflevector( __A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3, 4, 5, 6, 7); } static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_castsi512_ps (__m512i __A) { return (__m512) (__A); } static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_castsi512_pd (__m512i __A) { return (__m512d) (__A); } static __inline __m128i __DEFAULT_FN_ATTRS512 _mm512_castsi512_si128 (__m512i __A) { return (__m128i)__builtin_shufflevector(__A, __A , 0, 1); } static __inline __m256i __DEFAULT_FN_ATTRS512 _mm512_castsi512_si256 (__m512i __A) { return (__m256i)__builtin_shufflevector(__A, __A , 0, 1, 2, 3); } static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_int2mask(int __a) { return (__mmask16)__a; } static __inline__ int __DEFAULT_FN_ATTRS _mm512_mask2int(__mmask16 __a) { return (int)__a; } /// Constructs a 512-bit floating-point vector of [8 x double] from a /// 128-bit floating-point vector of [2 x double]. The lower 128 bits /// contain the value of the source vector. The upper 384 bits are set /// to zero. /// /// \headerfile /// /// This intrinsic has no corresponding instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. /// \returns A 512-bit floating-point vector of [8 x double]. The lower 128 bits /// contain the value of the parameter. The upper 384 bits are set to zero. static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_zextpd128_pd512(__m128d __a) { return __builtin_shufflevector((__v2df)__a, (__v2df)_mm_setzero_pd(), 0, 1, 2, 3, 2, 3, 2, 3); } /// Constructs a 512-bit floating-point vector of [8 x double] from a /// 256-bit floating-point vector of [4 x double]. The lower 256 bits /// contain the value of the source vector. The upper 256 bits are set /// to zero. /// /// \headerfile /// /// This intrinsic has no corresponding instruction. /// /// \param __a /// A 256-bit vector of [4 x double]. /// \returns A 512-bit floating-point vector of [8 x double]. The lower 256 bits /// contain the value of the parameter. The upper 256 bits are set to zero. static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_zextpd256_pd512(__m256d __a) { return __builtin_shufflevector((__v4df)__a, (__v4df)_mm256_setzero_pd(), 0, 1, 2, 3, 4, 5, 6, 7); } /// Constructs a 512-bit floating-point vector of [16 x float] from a /// 128-bit floating-point vector of [4 x float]. The lower 128 bits contain /// the value of the source vector. The upper 384 bits are set to zero. /// /// \headerfile /// /// This intrinsic has no corresponding instruction. /// /// \param __a /// A 128-bit vector of [4 x float]. /// \returns A 512-bit floating-point vector of [16 x float]. The lower 128 bits /// contain the value of the parameter. The upper 384 bits are set to zero. static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_zextps128_ps512(__m128 __a) { return __builtin_shufflevector((__v4sf)__a, (__v4sf)_mm_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7); } /// Constructs a 512-bit floating-point vector of [16 x float] from a /// 256-bit floating-point vector of [8 x float]. The lower 256 bits contain /// the value of the source vector. The upper 256 bits are set to zero. /// /// \headerfile /// /// This intrinsic has no corresponding instruction. /// /// \param __a /// A 256-bit vector of [8 x float]. /// \returns A 512-bit floating-point vector of [16 x float]. The lower 256 bits /// contain the value of the parameter. The upper 256 bits are set to zero. static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_zextps256_ps512(__m256 __a) { return __builtin_shufflevector((__v8sf)__a, (__v8sf)_mm256_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); } /// Constructs a 512-bit integer vector from a 128-bit integer vector. /// The lower 128 bits contain the value of the source vector. The upper /// 384 bits are set to zero. /// /// \headerfile /// /// This intrinsic has no corresponding instruction. /// /// \param __a /// A 128-bit integer vector. /// \returns A 512-bit integer vector. The lower 128 bits contain the value of /// the parameter. The upper 384 bits are set to zero. static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_zextsi128_si512(__m128i __a) { return __builtin_shufflevector((__v2di)__a, (__v2di)_mm_setzero_si128(), 0, 1, 2, 3, 2, 3, 2, 3); } /// Constructs a 512-bit integer vector from a 256-bit integer vector. /// The lower 256 bits contain the value of the source vector. The upper /// 256 bits are set to zero. /// /// \headerfile /// /// This intrinsic has no corresponding instruction. /// /// \param __a /// A 256-bit integer vector. /// \returns A 512-bit integer vector. The lower 256 bits contain the value of /// the parameter. The upper 256 bits are set to zero. static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_zextsi256_si512(__m256i __a) { return __builtin_shufflevector((__v4di)__a, (__v4di)_mm256_setzero_si256(), 0, 1, 2, 3, 4, 5, 6, 7); } /* Bitwise operators */ static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_and_epi32(__m512i __a, __m512i __b) { return (__m512i)((__v16su)__a & (__v16su)__b); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k, (__v16si) _mm512_and_epi32(__a, __b), (__v16si) __src); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b) { return (__m512i) _mm512_mask_and_epi32(_mm512_setzero_si512 (), __k, __a, __b); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_and_epi64(__m512i __a, __m512i __b) { return (__m512i)((__v8du)__a & (__v8du)__b); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b) { return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __k, (__v8di) _mm512_and_epi64(__a, __b), (__v8di) __src); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b) { return (__m512i) _mm512_mask_and_epi64(_mm512_setzero_si512 (), __k, __a, __b); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_andnot_si512 (__m512i __A, __m512i __B) { return (__m512i)(~(__v8du)__A & (__v8du)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_andnot_epi32 (__m512i __A, __m512i __B) { return (__m512i)(~(__v16su)__A & (__v16su)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_andnot_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_andnot_epi32(__A, __B), (__v16si)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)_mm512_mask_andnot_epi32(_mm512_setzero_si512(), __U, __A, __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_andnot_epi64(__m512i __A, __m512i __B) { return (__m512i)(~(__v8du)__A & (__v8du)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_andnot_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_andnot_epi64(__A, __B), (__v8di)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)_mm512_mask_andnot_epi64(_mm512_setzero_si512(), __U, __A, __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_or_epi32(__m512i __a, __m512i __b) { return (__m512i)((__v16su)__a | (__v16su)__b); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k, (__v16si)_mm512_or_epi32(__a, __b), (__v16si)__src); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b) { return (__m512i)_mm512_mask_or_epi32(_mm512_setzero_si512(), __k, __a, __b); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_or_epi64(__m512i __a, __m512i __b) { return (__m512i)((__v8du)__a | (__v8du)__b); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k, (__v8di)_mm512_or_epi64(__a, __b), (__v8di)__src); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b) { return (__m512i)_mm512_mask_or_epi64(_mm512_setzero_si512(), __k, __a, __b); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_xor_epi32(__m512i __a, __m512i __b) { return (__m512i)((__v16su)__a ^ (__v16su)__b); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k, (__v16si)_mm512_xor_epi32(__a, __b), (__v16si)__src); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b) { return (__m512i)_mm512_mask_xor_epi32(_mm512_setzero_si512(), __k, __a, __b); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_xor_epi64(__m512i __a, __m512i __b) { return (__m512i)((__v8du)__a ^ (__v8du)__b); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k, (__v8di)_mm512_xor_epi64(__a, __b), (__v8di)__src); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b) { return (__m512i)_mm512_mask_xor_epi64(_mm512_setzero_si512(), __k, __a, __b); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_and_si512(__m512i __a, __m512i __b) { return (__m512i)((__v8du)__a & (__v8du)__b); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_or_si512(__m512i __a, __m512i __b) { return (__m512i)((__v8du)__a | (__v8du)__b); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_xor_si512(__m512i __a, __m512i __b) { return (__m512i)((__v8du)__a ^ (__v8du)__b); } /* Arithmetic */ static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_add_pd(__m512d __a, __m512d __b) { return (__m512d)((__v8df)__a + (__v8df)__b); } static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_add_ps(__m512 __a, __m512 __b) { return (__m512)((__v16sf)__a + (__v16sf)__b); } static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_mul_pd(__m512d __a, __m512d __b) { return (__m512d)((__v8df)__a * (__v8df)__b); } static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_mul_ps(__m512 __a, __m512 __b) { return (__m512)((__v16sf)__a * (__v16sf)__b); } static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_sub_pd(__m512d __a, __m512d __b) { return (__m512d)((__v8df)__a - (__v8df)__b); } static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_sub_ps(__m512 __a, __m512 __b) { return (__m512)((__v16sf)__a - (__v16sf)__b); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_add_epi64 (__m512i __A, __m512i __B) { return (__m512i) ((__v8du) __A + (__v8du) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_add_epi64(__A, __B), (__v8di)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_add_epi64(__A, __B), (__v8di)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sub_epi64 (__m512i __A, __m512i __B) { return (__m512i) ((__v8du) __A - (__v8du) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_sub_epi64(__A, __B), (__v8di)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_sub_epi64(__A, __B), (__v8di)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_add_epi32 (__m512i __A, __m512i __B) { return (__m512i) ((__v16su) __A + (__v16su) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_add_epi32(__A, __B), (__v16si)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_add_epi32(__A, __B), (__v16si)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sub_epi32 (__m512i __A, __m512i __B) { return (__m512i) ((__v16su) __A - (__v16su) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_sub_epi32(__A, __B), (__v16si)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_sub_epi32(__A, __B), (__v16si)_mm512_setzero_si512()); } #define _mm512_max_round_pd(A, B, R) \ ((__m512d)__builtin_ia32_maxpd512((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), (int)(R))) #define _mm512_mask_max_round_pd(W, U, A, B, R) \ ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_max_round_pd((A), (B), (R)), \ (__v8df)(W))) #define _mm512_maskz_max_round_pd(U, A, B, R) \ ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_max_round_pd((A), (B), (R)), \ (__v8df)_mm512_setzero_pd())) static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_max_pd(__m512d __A, __m512d __B) { return (__m512d) __builtin_ia32_maxpd512((__v8df) __A, (__v8df) __B, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512(__U, (__v8df)_mm512_max_pd(__A, __B), (__v8df)__W); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512(__U, (__v8df)_mm512_max_pd(__A, __B), (__v8df)_mm512_setzero_pd()); } #define _mm512_max_round_ps(A, B, R) \ ((__m512)__builtin_ia32_maxps512((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), (int)(R))) #define _mm512_mask_max_round_ps(W, U, A, B, R) \ ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_max_round_ps((A), (B), (R)), \ (__v16sf)(W))) #define _mm512_maskz_max_round_ps(U, A, B, R) \ ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_max_round_ps((A), (B), (R)), \ (__v16sf)_mm512_setzero_ps())) static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_max_ps(__m512 __A, __m512 __B) { return (__m512) __builtin_ia32_maxps512((__v16sf) __A, (__v16sf) __B, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512(__U, (__v16sf)_mm512_max_ps(__A, __B), (__v16sf)__W); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512(__U, (__v16sf)_mm512_max_ps(__A, __B), (__v16sf)_mm512_setzero_ps()); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_max_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A, (__v4sf) __B, (__v4sf) __W, (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_max_ss(__mmask8 __U,__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A, (__v4sf) __B, (__v4sf) _mm_setzero_ps (), (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } #define _mm_max_round_ss(A, B, R) \ ((__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)-1, (int)(R))) #define _mm_mask_max_round_ss(W, U, A, B, R) \ ((__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)(__m128)(W), (__mmask8)(U), \ (int)(R))) #define _mm_maskz_max_round_ss(U, A, B, R) \ ((__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U), (int)(R))) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A, (__v2df) __B, (__v2df) __W, (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_max_sd(__mmask8 __U,__m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A, (__v2df) __B, (__v2df) _mm_setzero_pd (), (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } #define _mm_max_round_sd(A, B, R) \ ((__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)-1, (int)(R))) #define _mm_mask_max_round_sd(W, U, A, B, R) \ ((__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)(__m128d)(W), \ (__mmask8)(U), (int)(R))) #define _mm_maskz_max_round_sd(U, A, B, R) \ ((__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)(U), (int)(R))) static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epi32(__m512i __A, __m512i __B) { return (__m512i)__builtin_elementwise_max((__v16si)__A, (__v16si)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, (__v16si)_mm512_max_epi32(__A, __B), (__v16si)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, (__v16si)_mm512_max_epi32(__A, __B), (__v16si)_mm512_setzero_si512()); } static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epu32(__m512i __A, __m512i __B) { return (__m512i)__builtin_elementwise_max((__v16su)__A, (__v16su)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, (__v16si)_mm512_max_epu32(__A, __B), (__v16si)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, (__v16si)_mm512_max_epu32(__A, __B), (__v16si)_mm512_setzero_si512()); } static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epi64(__m512i __A, __m512i __B) { return (__m512i)__builtin_elementwise_max((__v8di)__A, (__v8di)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, (__v8di)_mm512_max_epi64(__A, __B), (__v8di)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, (__v8di)_mm512_max_epi64(__A, __B), (__v8di)_mm512_setzero_si512()); } static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epu64(__m512i __A, __m512i __B) { return (__m512i)__builtin_elementwise_max((__v8du)__A, (__v8du)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, (__v8di)_mm512_max_epu64(__A, __B), (__v8di)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, (__v8di)_mm512_max_epu64(__A, __B), (__v8di)_mm512_setzero_si512()); } #define _mm512_min_round_pd(A, B, R) \ ((__m512d)__builtin_ia32_minpd512((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), (int)(R))) #define _mm512_mask_min_round_pd(W, U, A, B, R) \ ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_min_round_pd((A), (B), (R)), \ (__v8df)(W))) #define _mm512_maskz_min_round_pd(U, A, B, R) \ ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_min_round_pd((A), (B), (R)), \ (__v8df)_mm512_setzero_pd())) static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_min_pd(__m512d __A, __m512d __B) { return (__m512d) __builtin_ia32_minpd512((__v8df) __A, (__v8df) __B, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512(__U, (__v8df)_mm512_min_pd(__A, __B), (__v8df)__W); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512(__U, (__v8df)_mm512_min_pd(__A, __B), (__v8df)_mm512_setzero_pd()); } #define _mm512_min_round_ps(A, B, R) \ ((__m512)__builtin_ia32_minps512((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), (int)(R))) #define _mm512_mask_min_round_ps(W, U, A, B, R) \ ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_min_round_ps((A), (B), (R)), \ (__v16sf)(W))) #define _mm512_maskz_min_round_ps(U, A, B, R) \ ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_min_round_ps((A), (B), (R)), \ (__v16sf)_mm512_setzero_ps())) static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_min_ps(__m512 __A, __m512 __B) { return (__m512) __builtin_ia32_minps512((__v16sf) __A, (__v16sf) __B, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512(__U, (__v16sf)_mm512_min_ps(__A, __B), (__v16sf)__W); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512(__U, (__v16sf)_mm512_min_ps(__A, __B), (__v16sf)_mm512_setzero_ps()); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_min_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A, (__v4sf) __B, (__v4sf) __W, (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_min_ss(__mmask8 __U,__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A, (__v4sf) __B, (__v4sf) _mm_setzero_ps (), (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } #define _mm_min_round_ss(A, B, R) \ ((__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)-1, (int)(R))) #define _mm_mask_min_round_ss(W, U, A, B, R) \ ((__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)(__m128)(W), (__mmask8)(U), \ (int)(R))) #define _mm_maskz_min_round_ss(U, A, B, R) \ ((__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U), (int)(R))) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A, (__v2df) __B, (__v2df) __W, (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_min_sd(__mmask8 __U,__m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A, (__v2df) __B, (__v2df) _mm_setzero_pd (), (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } #define _mm_min_round_sd(A, B, R) \ ((__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)-1, (int)(R))) #define _mm_mask_min_round_sd(W, U, A, B, R) \ ((__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)(__m128d)(W), \ (__mmask8)(U), (int)(R))) #define _mm_maskz_min_round_sd(U, A, B, R) \ ((__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)(U), (int)(R))) static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epi32(__m512i __A, __m512i __B) { return (__m512i)__builtin_elementwise_min((__v16si)__A, (__v16si)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, (__v16si)_mm512_min_epi32(__A, __B), (__v16si)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, (__v16si)_mm512_min_epi32(__A, __B), (__v16si)_mm512_setzero_si512()); } static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epu32(__m512i __A, __m512i __B) { return (__m512i)__builtin_elementwise_min((__v16su)__A, (__v16su)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, (__v16si)_mm512_min_epu32(__A, __B), (__v16si)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, (__v16si)_mm512_min_epu32(__A, __B), (__v16si)_mm512_setzero_si512()); } static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epi64(__m512i __A, __m512i __B) { return (__m512i)__builtin_elementwise_min((__v8di)__A, (__v8di)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, (__v8di)_mm512_min_epi64(__A, __B), (__v8di)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, (__v8di)_mm512_min_epi64(__A, __B), (__v8di)_mm512_setzero_si512()); } static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epu64(__m512i __A, __m512i __B) { return (__m512i)__builtin_elementwise_min((__v8du)__A, (__v8du)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, (__v8di)_mm512_min_epu64(__A, __B), (__v8di)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, (__v8di)_mm512_min_epu64(__A, __B), (__v8di)_mm512_setzero_si512()); } static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mul_epi32(__m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_pmuldq512((__v16si)__X, (__v16si) __Y); } static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, (__v8di)_mm512_mul_epi32(__X, __Y), (__v8di)__W); } static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, (__v8di)_mm512_mul_epi32(__X, __Y), (__v8di)_mm512_setzero_si512 ()); } static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mul_epu32(__m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_pmuludq512((__v16si)__X, (__v16si)__Y); } static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, (__v8di)_mm512_mul_epu32(__X, __Y), (__v8di)__W); } static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, (__v8di)_mm512_mul_epu32(__X, __Y), (__v8di)_mm512_setzero_si512 ()); } static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mullo_epi32 (__m512i __A, __m512i __B) { return (__m512i) ((__v16su) __A * (__v16su) __B); } static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, (__v16si)_mm512_mullo_epi32(__A, __B), (__v16si)_mm512_setzero_si512()); } static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, (__v16si)_mm512_mullo_epi32(__A, __B), (__v16si)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mullox_epi64 (__m512i __A, __m512i __B) { return (__m512i) ((__v8du) __A * (__v8du) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mullox_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_mullox_epi64(__A, __B), (__v8di)__W); } #define _mm512_sqrt_round_pd(A, R) \ ((__m512d)__builtin_ia32_sqrtpd512((__v8df)(__m512d)(A), (int)(R))) #define _mm512_mask_sqrt_round_pd(W, U, A, R) \ ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_sqrt_round_pd((A), (R)), \ (__v8df)(__m512d)(W))) #define _mm512_maskz_sqrt_round_pd(U, A, R) \ ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_sqrt_round_pd((A), (R)), \ (__v8df)_mm512_setzero_pd())) static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_sqrt_pd(__m512d __A) { return (__m512d)__builtin_ia32_sqrtpd512((__v8df)__A, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A) { return (__m512d)__builtin_ia32_selectpd_512(__U, (__v8df)_mm512_sqrt_pd(__A), (__v8df)__W); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A) { return (__m512d)__builtin_ia32_selectpd_512(__U, (__v8df)_mm512_sqrt_pd(__A), (__v8df)_mm512_setzero_pd()); } #define _mm512_sqrt_round_ps(A, R) \ ((__m512)__builtin_ia32_sqrtps512((__v16sf)(__m512)(A), (int)(R))) #define _mm512_mask_sqrt_round_ps(W, U, A, R) \ ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_sqrt_round_ps((A), (R)), \ (__v16sf)(__m512)(W))) #define _mm512_maskz_sqrt_round_ps(U, A, R) \ ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_sqrt_round_ps((A), (R)), \ (__v16sf)_mm512_setzero_ps())) static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_sqrt_ps(__m512 __A) { return (__m512)__builtin_ia32_sqrtps512((__v16sf)__A, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A) { return (__m512)__builtin_ia32_selectps_512(__U, (__v16sf)_mm512_sqrt_ps(__A), (__v16sf)__W); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_sqrt_ps( __mmask16 __U, __m512 __A) { return (__m512)__builtin_ia32_selectps_512(__U, (__v16sf)_mm512_sqrt_ps(__A), (__v16sf)_mm512_setzero_ps()); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_rsqrt14_pd(__m512d __A) { return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A, (__v8df) _mm512_setzero_pd (), (__mmask8) -1);} static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A) { return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A, (__v8df) __W, (__mmask8) __U); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A) { return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A, (__v8df) _mm512_setzero_pd (), (__mmask8) __U); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_rsqrt14_ps(__m512 __A) { return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A, (__v16sf) _mm512_setzero_ps (), (__mmask16) -1); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A) { return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A, (__v16sf) __W, (__mmask16) __U); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A) { return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A, (__v16sf) _mm512_setzero_ps (), (__mmask16) __U); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_rsqrt14_ss(__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A, (__v4sf) __B, (__v4sf) _mm_setzero_ps (), (__mmask8) -1); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A, (__v4sf) __B, (__v4sf) __W, (__mmask8) __U); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B) { return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A, (__v4sf) __B, (__v4sf) _mm_setzero_ps (), (__mmask8) __U); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_rsqrt14_sd(__m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A, (__v2df) __B, (__v2df) _mm_setzero_pd (), (__mmask8) -1); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A, (__v2df) __B, (__v2df) __W, (__mmask8) __U); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A, (__v2df) __B, (__v2df) _mm_setzero_pd (), (__mmask8) __U); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_rcp14_pd(__m512d __A) { return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A, (__v8df) _mm512_setzero_pd (), (__mmask8) -1); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A) { return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A, (__v8df) __W, (__mmask8) __U); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A) { return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A, (__v8df) _mm512_setzero_pd (), (__mmask8) __U); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_rcp14_ps(__m512 __A) { return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A, (__v16sf) _mm512_setzero_ps (), (__mmask16) -1); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A) { return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A, (__v16sf) __W, (__mmask16) __U); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A) { return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A, (__v16sf) _mm512_setzero_ps (), (__mmask16) __U); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_rcp14_ss(__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A, (__v4sf) __B, (__v4sf) _mm_setzero_ps (), (__mmask8) -1); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A, (__v4sf) __B, (__v4sf) __W, (__mmask8) __U); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B) { return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A, (__v4sf) __B, (__v4sf) _mm_setzero_ps (), (__mmask8) __U); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_rcp14_sd(__m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A, (__v2df) __B, (__v2df) _mm_setzero_pd (), (__mmask8) -1); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A, (__v2df) __B, (__v2df) __W, (__mmask8) __U); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A, (__v2df) __B, (__v2df) _mm_setzero_pd (), (__mmask8) __U); } static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_floor_ps(__m512 __A) { return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, _MM_FROUND_FLOOR, (__v16sf) __A, (unsigned short)-1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A) { return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, _MM_FROUND_FLOOR, (__v16sf) __W, __U, _MM_FROUND_CUR_DIRECTION); } static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_floor_pd(__m512d __A) { return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, _MM_FROUND_FLOOR, (__v8df) __A, (unsigned char)-1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A) { return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, _MM_FROUND_FLOOR, (__v8df) __W, __U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A) { return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, _MM_FROUND_CEIL, (__v16sf) __W, __U, _MM_FROUND_CUR_DIRECTION); } static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_ceil_ps(__m512 __A) { return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, _MM_FROUND_CEIL, (__v16sf) __A, (unsigned short)-1, _MM_FROUND_CUR_DIRECTION); } static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_ceil_pd(__m512d __A) { return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, _MM_FROUND_CEIL, (__v8df) __A, (unsigned char)-1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A) { return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, _MM_FROUND_CEIL, (__v8df) __W, __U, _MM_FROUND_CUR_DIRECTION); } static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_abs_epi64(__m512i __A) { return (__m512i)__builtin_elementwise_abs((__v8di)__A); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_abs_epi64(__A), (__v8di)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_abs_epi64(__A), (__v8di)_mm512_setzero_si512()); } static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_abs_epi32(__m512i __A) { return (__m512i)__builtin_elementwise_abs((__v16si) __A); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A) { return (__m512i)__builtin_ia32_selectd_512(__U, (__v16si)_mm512_abs_epi32(__A), (__v16si)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A) { return (__m512i)__builtin_ia32_selectd_512(__U, (__v16si)_mm512_abs_epi32(__A), (__v16si)_mm512_setzero_si512()); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_add_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { __A = _mm_add_ss(__A, __B); return __builtin_ia32_selectss_128(__U, __A, __W); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) { __A = _mm_add_ss(__A, __B); return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps()); } #define _mm_add_round_ss(A, B, R) \ ((__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)-1, (int)(R))) #define _mm_mask_add_round_ss(W, U, A, B, R) \ ((__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)(__m128)(W), (__mmask8)(U), \ (int)(R))) #define _mm_maskz_add_round_ss(U, A, B, R) \ ((__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U), (int)(R))) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_add_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { __A = _mm_add_sd(__A, __B); return __builtin_ia32_selectsd_128(__U, __A, __W); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) { __A = _mm_add_sd(__A, __B); return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd()); } #define _mm_add_round_sd(A, B, R) \ ((__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)-1, (int)(R))) #define _mm_mask_add_round_sd(W, U, A, B, R) \ ((__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)(__m128d)(W), \ (__mmask8)(U), (int)(R))) #define _mm_maskz_add_round_sd(U, A, B, R) \ ((__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)(U), (int)(R))) static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_add_pd(__A, __B), (__v8df)__W); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_add_pd(__A, __B), (__v8df)_mm512_setzero_pd()); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_add_ps(__A, __B), (__v16sf)__W); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_add_ps(__A, __B), (__v16sf)_mm512_setzero_ps()); } #define _mm512_add_round_pd(A, B, R) \ ((__m512d)__builtin_ia32_addpd512((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), (int)(R))) #define _mm512_mask_add_round_pd(W, U, A, B, R) \ ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_add_round_pd((A), (B), (R)), \ (__v8df)(__m512d)(W))) #define _mm512_maskz_add_round_pd(U, A, B, R) \ ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_add_round_pd((A), (B), (R)), \ (__v8df)_mm512_setzero_pd())) #define _mm512_add_round_ps(A, B, R) \ ((__m512)__builtin_ia32_addps512((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), (int)(R))) #define _mm512_mask_add_round_ps(W, U, A, B, R) \ ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_add_round_ps((A), (B), (R)), \ (__v16sf)(__m512)(W))) #define _mm512_maskz_add_round_ps(U, A, B, R) \ ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_add_round_ps((A), (B), (R)), \ (__v16sf)_mm512_setzero_ps())) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { __A = _mm_sub_ss(__A, __B); return __builtin_ia32_selectss_128(__U, __A, __W); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) { __A = _mm_sub_ss(__A, __B); return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps()); } #define _mm_sub_round_ss(A, B, R) \ ((__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)-1, (int)(R))) #define _mm_mask_sub_round_ss(W, U, A, B, R) \ ((__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)(__m128)(W), (__mmask8)(U), \ (int)(R))) #define _mm_maskz_sub_round_ss(U, A, B, R) \ ((__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U), (int)(R))) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_sub_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { __A = _mm_sub_sd(__A, __B); return __builtin_ia32_selectsd_128(__U, __A, __W); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) { __A = _mm_sub_sd(__A, __B); return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd()); } #define _mm_sub_round_sd(A, B, R) \ ((__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)-1, (int)(R))) #define _mm_mask_sub_round_sd(W, U, A, B, R) \ ((__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)(__m128d)(W), \ (__mmask8)(U), (int)(R))) #define _mm_maskz_sub_round_sd(U, A, B, R) \ ((__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)(U), (int)(R))) static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_sub_pd(__A, __B), (__v8df)__W); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_sub_pd(__A, __B), (__v8df)_mm512_setzero_pd()); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_sub_ps(__A, __B), (__v16sf)__W); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_sub_ps(__A, __B), (__v16sf)_mm512_setzero_ps()); } #define _mm512_sub_round_pd(A, B, R) \ ((__m512d)__builtin_ia32_subpd512((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), (int)(R))) #define _mm512_mask_sub_round_pd(W, U, A, B, R) \ ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_sub_round_pd((A), (B), (R)), \ (__v8df)(__m512d)(W))) #define _mm512_maskz_sub_round_pd(U, A, B, R) \ ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_sub_round_pd((A), (B), (R)), \ (__v8df)_mm512_setzero_pd())) #define _mm512_sub_round_ps(A, B, R) \ ((__m512)__builtin_ia32_subps512((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), (int)(R))) #define _mm512_mask_sub_round_ps(W, U, A, B, R) \ ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \ (__v16sf)(__m512)(W))) #define _mm512_maskz_sub_round_ps(U, A, B, R) \ ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \ (__v16sf)_mm512_setzero_ps())) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { __A = _mm_mul_ss(__A, __B); return __builtin_ia32_selectss_128(__U, __A, __W); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) { __A = _mm_mul_ss(__A, __B); return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps()); } #define _mm_mul_round_ss(A, B, R) \ ((__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)-1, (int)(R))) #define _mm_mask_mul_round_ss(W, U, A, B, R) \ ((__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)(__m128)(W), (__mmask8)(U), \ (int)(R))) #define _mm_maskz_mul_round_ss(U, A, B, R) \ ((__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U), (int)(R))) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_mul_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { __A = _mm_mul_sd(__A, __B); return __builtin_ia32_selectsd_128(__U, __A, __W); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) { __A = _mm_mul_sd(__A, __B); return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd()); } #define _mm_mul_round_sd(A, B, R) \ ((__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)-1, (int)(R))) #define _mm_mask_mul_round_sd(W, U, A, B, R) \ ((__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)(__m128d)(W), \ (__mmask8)(U), (int)(R))) #define _mm_maskz_mul_round_sd(U, A, B, R) \ ((__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)(U), (int)(R))) static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_mul_pd(__A, __B), (__v8df)__W); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_mul_pd(__A, __B), (__v8df)_mm512_setzero_pd()); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_mul_ps(__A, __B), (__v16sf)__W); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_mul_ps(__A, __B), (__v16sf)_mm512_setzero_ps()); } #define _mm512_mul_round_pd(A, B, R) \ ((__m512d)__builtin_ia32_mulpd512((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), (int)(R))) #define _mm512_mask_mul_round_pd(W, U, A, B, R) \ ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_mul_round_pd((A), (B), (R)), \ (__v8df)(__m512d)(W))) #define _mm512_maskz_mul_round_pd(U, A, B, R) \ ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_mul_round_pd((A), (B), (R)), \ (__v8df)_mm512_setzero_pd())) #define _mm512_mul_round_ps(A, B, R) \ ((__m512)__builtin_ia32_mulps512((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), (int)(R))) #define _mm512_mask_mul_round_ps(W, U, A, B, R) \ ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \ (__v16sf)(__m512)(W))) #define _mm512_maskz_mul_round_ps(U, A, B, R) \ ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \ (__v16sf)_mm512_setzero_ps())) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { __A = _mm_div_ss(__A, __B); return __builtin_ia32_selectss_128(__U, __A, __W); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) { __A = _mm_div_ss(__A, __B); return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps()); } #define _mm_div_round_ss(A, B, R) \ ((__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)-1, (int)(R))) #define _mm_mask_div_round_ss(W, U, A, B, R) \ ((__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)(__m128)(W), (__mmask8)(U), \ (int)(R))) #define _mm_maskz_div_round_ss(U, A, B, R) \ ((__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U), (int)(R))) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_div_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { __A = _mm_div_sd(__A, __B); return __builtin_ia32_selectsd_128(__U, __A, __W); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) { __A = _mm_div_sd(__A, __B); return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd()); } #define _mm_div_round_sd(A, B, R) \ ((__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)-1, (int)(R))) #define _mm_mask_div_round_sd(W, U, A, B, R) \ ((__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)(__m128d)(W), \ (__mmask8)(U), (int)(R))) #define _mm_maskz_div_round_sd(U, A, B, R) \ ((__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)(U), (int)(R))) static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_div_pd(__m512d __a, __m512d __b) { return (__m512d)((__v8df)__a/(__v8df)__b); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_div_pd(__A, __B), (__v8df)__W); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_div_pd(__A, __B), (__v8df)_mm512_setzero_pd()); } static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_div_ps(__m512 __a, __m512 __b) { return (__m512)((__v16sf)__a/(__v16sf)__b); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_div_ps(__A, __B), (__v16sf)__W); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_div_ps(__A, __B), (__v16sf)_mm512_setzero_ps()); } #define _mm512_div_round_pd(A, B, R) \ ((__m512d)__builtin_ia32_divpd512((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), (int)(R))) #define _mm512_mask_div_round_pd(W, U, A, B, R) \ ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_div_round_pd((A), (B), (R)), \ (__v8df)(__m512d)(W))) #define _mm512_maskz_div_round_pd(U, A, B, R) \ ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_div_round_pd((A), (B), (R)), \ (__v8df)_mm512_setzero_pd())) #define _mm512_div_round_ps(A, B, R) \ ((__m512)__builtin_ia32_divps512((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), (int)(R))) #define _mm512_mask_div_round_ps(W, U, A, B, R) \ ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_div_round_ps((A), (B), (R)), \ (__v16sf)(__m512)(W))) #define _mm512_maskz_div_round_ps(U, A, B, R) \ ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_div_round_ps((A), (B), (R)), \ (__v16sf)_mm512_setzero_ps())) #define _mm512_roundscale_ps(A, B) \ ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(B), \ (__v16sf)_mm512_undefined_ps(), \ (__mmask16)-1, \ _MM_FROUND_CUR_DIRECTION)) #define _mm512_mask_roundscale_ps(A, B, C, imm) \ ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \ (__v16sf)(__m512)(A), (__mmask16)(B), \ _MM_FROUND_CUR_DIRECTION)) #define _mm512_maskz_roundscale_ps(A, B, imm) \ ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \ (__v16sf)_mm512_setzero_ps(), \ (__mmask16)(A), \ _MM_FROUND_CUR_DIRECTION)) #define _mm512_mask_roundscale_round_ps(A, B, C, imm, R) \ ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \ (__v16sf)(__m512)(A), (__mmask16)(B), \ (int)(R))) #define _mm512_maskz_roundscale_round_ps(A, B, imm, R) \ ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \ (__v16sf)_mm512_setzero_ps(), \ (__mmask16)(A), (int)(R))) #define _mm512_roundscale_round_ps(A, imm, R) \ ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(imm), \ (__v16sf)_mm512_undefined_ps(), \ (__mmask16)-1, (int)(R))) #define _mm512_roundscale_pd(A, B) \ ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(B), \ (__v8df)_mm512_undefined_pd(), \ (__mmask8)-1, \ _MM_FROUND_CUR_DIRECTION)) #define _mm512_mask_roundscale_pd(A, B, C, imm) \ ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \ (__v8df)(__m512d)(A), (__mmask8)(B), \ _MM_FROUND_CUR_DIRECTION)) #define _mm512_maskz_roundscale_pd(A, B, imm) \ ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \ (__v8df)_mm512_setzero_pd(), \ (__mmask8)(A), \ _MM_FROUND_CUR_DIRECTION)) #define _mm512_mask_roundscale_round_pd(A, B, C, imm, R) \ ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \ (__v8df)(__m512d)(A), (__mmask8)(B), \ (int)(R))) #define _mm512_maskz_roundscale_round_pd(A, B, imm, R) \ ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \ (__v8df)_mm512_setzero_pd(), \ (__mmask8)(A), (int)(R))) #define _mm512_roundscale_round_pd(A, imm, R) \ ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(imm), \ (__v8df)_mm512_undefined_pd(), \ (__mmask8)-1, (int)(R))) #define _mm512_fmadd_round_pd(A, B, C, R) \ ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8df)(__m512d)(C), \ (__mmask8)-1, (int)(R))) #define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \ ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8df)(__m512d)(C), \ (__mmask8)(U), (int)(R))) #define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \ ((__m512d)__builtin_ia32_vfmaddpd512_mask3((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8df)(__m512d)(C), \ (__mmask8)(U), (int)(R))) #define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \ ((__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8df)(__m512d)(C), \ (__mmask8)(U), (int)(R))) #define _mm512_fmsub_round_pd(A, B, C, R) \ ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ -(__v8df)(__m512d)(C), \ (__mmask8)-1, (int)(R))) #define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \ ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ -(__v8df)(__m512d)(C), \ (__mmask8)(U), (int)(R))) #define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \ ((__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ -(__v8df)(__m512d)(C), \ (__mmask8)(U), (int)(R))) #define _mm512_fnmadd_round_pd(A, B, C, R) \ ((__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8df)(__m512d)(C), \ (__mmask8)-1, (int)(R))) #define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \ ((__m512d)__builtin_ia32_vfmaddpd512_mask3(-(__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8df)(__m512d)(C), \ (__mmask8)(U), (int)(R))) #define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \ ((__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8df)(__m512d)(C), \ (__mmask8)(U), (int)(R))) #define _mm512_fnmsub_round_pd(A, B, C, R) \ ((__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ -(__v8df)(__m512d)(C), \ (__mmask8)-1, (int)(R))) #define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \ ((__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ -(__v8df)(__m512d)(C), \ (__mmask8)(U), (int)(R))) static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C) { return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, (__v8df) __B, (__v8df) __C, (__mmask8) -1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) { return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, (__v8df) __B, (__v8df) __C, (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) { return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A, (__v8df) __B, (__v8df) __C, (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) { return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A, (__v8df) __B, (__v8df) __C, (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C) { return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, (__v8df) __B, -(__v8df) __C, (__mmask8) -1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) { return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, (__v8df) __B, -(__v8df) __C, (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) { return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A, (__v8df) __B, -(__v8df) __C, (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C) { return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, -(__v8df) __B, (__v8df) __C, (__mmask8) -1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) { return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A, (__v8df) __B, (__v8df) __C, (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) { return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A, (__v8df) __B, (__v8df) __C, (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C) { return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, -(__v8df) __B, -(__v8df) __C, (__mmask8) -1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) { return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A, (__v8df) __B, -(__v8df) __C, (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } #define _mm512_fmadd_round_ps(A, B, C, R) \ ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16sf)(__m512)(C), \ (__mmask16)-1, (int)(R))) #define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \ ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16sf)(__m512)(C), \ (__mmask16)(U), (int)(R))) #define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \ ((__m512)__builtin_ia32_vfmaddps512_mask3((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16sf)(__m512)(C), \ (__mmask16)(U), (int)(R))) #define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \ ((__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16sf)(__m512)(C), \ (__mmask16)(U), (int)(R))) #define _mm512_fmsub_round_ps(A, B, C, R) \ ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ -(__v16sf)(__m512)(C), \ (__mmask16)-1, (int)(R))) #define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \ ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ -(__v16sf)(__m512)(C), \ (__mmask16)(U), (int)(R))) #define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \ ((__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ -(__v16sf)(__m512)(C), \ (__mmask16)(U), (int)(R))) #define _mm512_fnmadd_round_ps(A, B, C, R) \ ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ -(__v16sf)(__m512)(B), \ (__v16sf)(__m512)(C), \ (__mmask16)-1, (int)(R))) #define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \ ((__m512)__builtin_ia32_vfmaddps512_mask3(-(__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16sf)(__m512)(C), \ (__mmask16)(U), (int)(R))) #define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \ ((__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16sf)(__m512)(C), \ (__mmask16)(U), (int)(R))) #define _mm512_fnmsub_round_ps(A, B, C, R) \ ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ -(__v16sf)(__m512)(B), \ -(__v16sf)(__m512)(C), \ (__mmask16)-1, (int)(R))) #define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \ ((__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ -(__v16sf)(__m512)(C), \ (__mmask16)(U), (int)(R))) static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C) { return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, (__v16sf) __B, (__v16sf) __C, (__mmask16) -1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) { return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, (__v16sf) __B, (__v16sf) __C, (__mmask16) __U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) { return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A, (__v16sf) __B, (__v16sf) __C, (__mmask16) __U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) { return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A, (__v16sf) __B, (__v16sf) __C, (__mmask16) __U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C) { return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, (__v16sf) __B, -(__v16sf) __C, (__mmask16) -1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) { return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, (__v16sf) __B, -(__v16sf) __C, (__mmask16) __U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) { return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A, (__v16sf) __B, -(__v16sf) __C, (__mmask16) __U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C) { return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, -(__v16sf) __B, (__v16sf) __C, (__mmask16) -1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) { return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A, (__v16sf) __B, (__v16sf) __C, (__mmask16) __U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) { return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A, (__v16sf) __B, (__v16sf) __C, (__mmask16) __U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C) { return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, -(__v16sf) __B, -(__v16sf) __C, (__mmask16) -1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) { return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A, (__v16sf) __B, -(__v16sf) __C, (__mmask16) __U, _MM_FROUND_CUR_DIRECTION); } #define _mm512_fmaddsub_round_pd(A, B, C, R) \ ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8df)(__m512d)(C), \ (__mmask8)-1, (int)(R))) #define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \ ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8df)(__m512d)(C), \ (__mmask8)(U), (int)(R))) #define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \ ((__m512d)__builtin_ia32_vfmaddsubpd512_mask3((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8df)(__m512d)(C), \ (__mmask8)(U), (int)(R))) #define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \ ((__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8df)(__m512d)(C), \ (__mmask8)(U), (int)(R))) #define _mm512_fmsubadd_round_pd(A, B, C, R) \ ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ -(__v8df)(__m512d)(C), \ (__mmask8)-1, (int)(R))) #define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \ ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ -(__v8df)(__m512d)(C), \ (__mmask8)(U), (int)(R))) #define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \ ((__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ -(__v8df)(__m512d)(C), \ (__mmask8)(U), (int)(R))) static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C) { return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, (__v8df) __B, (__v8df) __C, (__mmask8) -1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) { return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, (__v8df) __B, (__v8df) __C, (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) { return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A, (__v8df) __B, (__v8df) __C, (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) { return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A, (__v8df) __B, (__v8df) __C, (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C) { return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, (__v8df) __B, -(__v8df) __C, (__mmask8) -1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) { return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, (__v8df) __B, -(__v8df) __C, (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) { return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A, (__v8df) __B, -(__v8df) __C, (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } #define _mm512_fmaddsub_round_ps(A, B, C, R) \ ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16sf)(__m512)(C), \ (__mmask16)-1, (int)(R))) #define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \ ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16sf)(__m512)(C), \ (__mmask16)(U), (int)(R))) #define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \ ((__m512)__builtin_ia32_vfmaddsubps512_mask3((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16sf)(__m512)(C), \ (__mmask16)(U), (int)(R))) #define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \ ((__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16sf)(__m512)(C), \ (__mmask16)(U), (int)(R))) #define _mm512_fmsubadd_round_ps(A, B, C, R) \ ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ -(__v16sf)(__m512)(C), \ (__mmask16)-1, (int)(R))) #define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \ ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ -(__v16sf)(__m512)(C), \ (__mmask16)(U), (int)(R))) #define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \ ((__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ -(__v16sf)(__m512)(C), \ (__mmask16)(U), (int)(R))) static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C) { return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, (__v16sf) __B, (__v16sf) __C, (__mmask16) -1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) { return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, (__v16sf) __B, (__v16sf) __C, (__mmask16) __U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) { return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A, (__v16sf) __B, (__v16sf) __C, (__mmask16) __U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) { return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A, (__v16sf) __B, (__v16sf) __C, (__mmask16) __U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C) { return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, (__v16sf) __B, -(__v16sf) __C, (__mmask16) -1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) { return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, (__v16sf) __B, -(__v16sf) __C, (__mmask16) __U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) { return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A, (__v16sf) __B, -(__v16sf) __C, (__mmask16) __U, _MM_FROUND_CUR_DIRECTION); } #define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \ ((__m512d)__builtin_ia32_vfmsubpd512_mask3((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8df)(__m512d)(C), \ (__mmask8)(U), (int)(R))) static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) { return (__m512d)__builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A, (__v8df) __B, (__v8df) __C, (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } #define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \ ((__m512)__builtin_ia32_vfmsubps512_mask3((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16sf)(__m512)(C), \ (__mmask16)(U), (int)(R))) static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) { return (__m512)__builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A, (__v16sf) __B, (__v16sf) __C, (__mmask16) __U, _MM_FROUND_CUR_DIRECTION); } #define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \ ((__m512d)__builtin_ia32_vfmsubaddpd512_mask3((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8df)(__m512d)(C), \ (__mmask8)(U), (int)(R))) static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) { return (__m512d)__builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A, (__v8df) __B, (__v8df) __C, (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } #define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \ ((__m512)__builtin_ia32_vfmsubaddps512_mask3((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16sf)(__m512)(C), \ (__mmask16)(U), (int)(R))) static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) { return (__m512)__builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A, (__v16sf) __B, (__v16sf) __C, (__mmask16) __U, _MM_FROUND_CUR_DIRECTION); } #define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \ ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ -(__v8df)(__m512d)(B), \ (__v8df)(__m512d)(C), \ (__mmask8)(U), (int)(R))) static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) { return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, -(__v8df) __B, (__v8df) __C, (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } #define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \ ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ -(__v16sf)(__m512)(B), \ (__v16sf)(__m512)(C), \ (__mmask16)(U), (int)(R))) static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) { return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, -(__v16sf) __B, (__v16sf) __C, (__mmask16) __U, _MM_FROUND_CUR_DIRECTION); } #define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \ ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ -(__v8df)(__m512d)(B), \ -(__v8df)(__m512d)(C), \ (__mmask8)(U), (int)(R))) #define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \ ((__m512d)__builtin_ia32_vfmsubpd512_mask3(-(__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8df)(__m512d)(C), \ (__mmask8)(U), (int)(R))) static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) { return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, -(__v8df) __B, -(__v8df) __C, (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) { return (__m512d) __builtin_ia32_vfmsubpd512_mask3 (-(__v8df) __A, (__v8df) __B, (__v8df) __C, (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } #define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \ ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ -(__v16sf)(__m512)(B), \ -(__v16sf)(__m512)(C), \ (__mmask16)(U), (int)(R))) #define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \ ((__m512)__builtin_ia32_vfmsubps512_mask3(-(__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16sf)(__m512)(C), \ (__mmask16)(U), (int)(R))) static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) { return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, -(__v16sf) __B, -(__v16sf) __C, (__mmask16) __U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) { return (__m512) __builtin_ia32_vfmsubps512_mask3 (-(__v16sf) __A, (__v16sf) __B, (__v16sf) __C, (__mmask16) __U, _MM_FROUND_CUR_DIRECTION); } /* Vector permutations */ static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B) { return (__m512i)__builtin_ia32_vpermi2vard512((__v16si)__A, (__v16si) __I, (__v16si) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_permutex2var_epi32(__m512i __A, __mmask16 __U, __m512i __I, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512(__U, (__v16si)_mm512_permutex2var_epi32(__A, __I, __B), (__v16si)__A); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask2_permutex2var_epi32(__m512i __A, __m512i __I, __mmask16 __U, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512(__U, (__v16si)_mm512_permutex2var_epi32(__A, __I, __B), (__v16si)__I); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_permutex2var_epi32(__mmask16 __U, __m512i __A, __m512i __I, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512(__U, (__v16si)_mm512_permutex2var_epi32(__A, __I, __B), (__v16si)_mm512_setzero_si512()); } static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B) { return (__m512i)__builtin_ia32_vpermi2varq512((__v8di)__A, (__v8di) __I, (__v8di) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_permutex2var_epi64(__m512i __A, __mmask8 __U, __m512i __I, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512(__U, (__v8di)_mm512_permutex2var_epi64(__A, __I, __B), (__v8di)__A); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask2_permutex2var_epi64(__m512i __A, __m512i __I, __mmask8 __U, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512(__U, (__v8di)_mm512_permutex2var_epi64(__A, __I, __B), (__v8di)__I); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512(__U, (__v8di)_mm512_permutex2var_epi64(__A, __I, __B), (__v8di)_mm512_setzero_si512()); } #define _mm512_alignr_epi64(A, B, I) \ ((__m512i)__builtin_ia32_alignq512((__v8di)(__m512i)(A), \ (__v8di)(__m512i)(B), (int)(I))) #define _mm512_mask_alignr_epi64(W, U, A, B, imm) \ ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \ (__v8di)(__m512i)(W))) #define _mm512_maskz_alignr_epi64(U, A, B, imm) \ ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \ (__v8di)_mm512_setzero_si512())) #define _mm512_alignr_epi32(A, B, I) \ ((__m512i)__builtin_ia32_alignd512((__v16si)(__m512i)(A), \ (__v16si)(__m512i)(B), (int)(I))) #define _mm512_mask_alignr_epi32(W, U, A, B, imm) \ ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \ (__v16si)(__m512i)(W))) #define _mm512_maskz_alignr_epi32(U, A, B, imm) \ ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \ (__v16si)_mm512_setzero_si512())) /* Vector Extract */ #define _mm512_extractf64x4_pd(A, I) \ ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(I), \ (__v4df)_mm256_undefined_pd(), \ (__mmask8)-1)) #define _mm512_mask_extractf64x4_pd(W, U, A, imm) \ ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \ (__v4df)(__m256d)(W), \ (__mmask8)(U))) #define _mm512_maskz_extractf64x4_pd(U, A, imm) \ ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \ (__v4df)_mm256_setzero_pd(), \ (__mmask8)(U))) #define _mm512_extractf32x4_ps(A, I) \ ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(I), \ (__v4sf)_mm_undefined_ps(), \ (__mmask8)-1)) #define _mm512_mask_extractf32x4_ps(W, U, A, imm) \ ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \ (__v4sf)(__m128)(W), \ (__mmask8)(U))) #define _mm512_maskz_extractf32x4_ps(U, A, imm) \ ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U))) /* Vector Blend */ static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W) { return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U, (__v8df) __W, (__v8df) __A); } static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W) { return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U, (__v16sf) __W, (__v16sf) __A); } static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W) { return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U, (__v8di) __W, (__v8di) __A); } static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W) { return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U, (__v16si) __W, (__v16si) __A); } /* Compare */ #define _mm512_cmp_round_ps_mask(A, B, P, R) \ ((__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), (int)(P), \ (__mmask16)-1, (int)(R))) #define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) \ ((__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), (int)(P), \ (__mmask16)(U), (int)(R))) #define _mm512_cmp_ps_mask(A, B, P) \ _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION) #define _mm512_mask_cmp_ps_mask(U, A, B, P) \ _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION) #define _mm512_cmpeq_ps_mask(A, B) \ _mm512_cmp_ps_mask((A), (B), _CMP_EQ_OQ) #define _mm512_mask_cmpeq_ps_mask(k, A, B) \ _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_EQ_OQ) #define _mm512_cmplt_ps_mask(A, B) \ _mm512_cmp_ps_mask((A), (B), _CMP_LT_OS) #define _mm512_mask_cmplt_ps_mask(k, A, B) \ _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LT_OS) #define _mm512_cmple_ps_mask(A, B) \ _mm512_cmp_ps_mask((A), (B), _CMP_LE_OS) #define _mm512_mask_cmple_ps_mask(k, A, B) \ _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LE_OS) #define _mm512_cmpunord_ps_mask(A, B) \ _mm512_cmp_ps_mask((A), (B), _CMP_UNORD_Q) #define _mm512_mask_cmpunord_ps_mask(k, A, B) \ _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_UNORD_Q) #define _mm512_cmpneq_ps_mask(A, B) \ _mm512_cmp_ps_mask((A), (B), _CMP_NEQ_UQ) #define _mm512_mask_cmpneq_ps_mask(k, A, B) \ _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NEQ_UQ) #define _mm512_cmpnlt_ps_mask(A, B) \ _mm512_cmp_ps_mask((A), (B), _CMP_NLT_US) #define _mm512_mask_cmpnlt_ps_mask(k, A, B) \ _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLT_US) #define _mm512_cmpnle_ps_mask(A, B) \ _mm512_cmp_ps_mask((A), (B), _CMP_NLE_US) #define _mm512_mask_cmpnle_ps_mask(k, A, B) \ _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLE_US) #define _mm512_cmpord_ps_mask(A, B) \ _mm512_cmp_ps_mask((A), (B), _CMP_ORD_Q) #define _mm512_mask_cmpord_ps_mask(k, A, B) \ _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_ORD_Q) #define _mm512_cmp_round_pd_mask(A, B, P, R) \ ((__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), (int)(P), \ (__mmask8)-1, (int)(R))) #define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) \ ((__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), (int)(P), \ (__mmask8)(U), (int)(R))) #define _mm512_cmp_pd_mask(A, B, P) \ _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION) #define _mm512_mask_cmp_pd_mask(U, A, B, P) \ _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION) #define _mm512_cmpeq_pd_mask(A, B) \ _mm512_cmp_pd_mask((A), (B), _CMP_EQ_OQ) #define _mm512_mask_cmpeq_pd_mask(k, A, B) \ _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_EQ_OQ) #define _mm512_cmplt_pd_mask(A, B) \ _mm512_cmp_pd_mask((A), (B), _CMP_LT_OS) #define _mm512_mask_cmplt_pd_mask(k, A, B) \ _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LT_OS) #define _mm512_cmple_pd_mask(A, B) \ _mm512_cmp_pd_mask((A), (B), _CMP_LE_OS) #define _mm512_mask_cmple_pd_mask(k, A, B) \ _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LE_OS) #define _mm512_cmpunord_pd_mask(A, B) \ _mm512_cmp_pd_mask((A), (B), _CMP_UNORD_Q) #define _mm512_mask_cmpunord_pd_mask(k, A, B) \ _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_UNORD_Q) #define _mm512_cmpneq_pd_mask(A, B) \ _mm512_cmp_pd_mask((A), (B), _CMP_NEQ_UQ) #define _mm512_mask_cmpneq_pd_mask(k, A, B) \ _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NEQ_UQ) #define _mm512_cmpnlt_pd_mask(A, B) \ _mm512_cmp_pd_mask((A), (B), _CMP_NLT_US) #define _mm512_mask_cmpnlt_pd_mask(k, A, B) \ _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLT_US) #define _mm512_cmpnle_pd_mask(A, B) \ _mm512_cmp_pd_mask((A), (B), _CMP_NLE_US) #define _mm512_mask_cmpnle_pd_mask(k, A, B) \ _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLE_US) #define _mm512_cmpord_pd_mask(A, B) \ _mm512_cmp_pd_mask((A), (B), _CMP_ORD_Q) #define _mm512_mask_cmpord_pd_mask(k, A, B) \ _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_ORD_Q) /* Conversion */ #define _mm512_cvtt_roundps_epu32(A, R) \ ((__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \ (__v16si)_mm512_undefined_epi32(), \ (__mmask16)-1, (int)(R))) #define _mm512_mask_cvtt_roundps_epu32(W, U, A, R) \ ((__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \ (__v16si)(__m512i)(W), \ (__mmask16)(U), (int)(R))) #define _mm512_maskz_cvtt_roundps_epu32(U, A, R) \ ((__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \ (__v16si)_mm512_setzero_si512(), \ (__mmask16)(U), (int)(R))) static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_cvttps_epu32(__m512 __A) { return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A, (__v16si) _mm512_setzero_si512 (), (__mmask16) -1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A) { return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A, (__v16si) __W, (__mmask16) __U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A) { return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A, (__v16si) _mm512_setzero_si512 (), (__mmask16) __U, _MM_FROUND_CUR_DIRECTION); } #define _mm512_cvt_roundepi32_ps(A, R) \ ((__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \ (__v16sf)_mm512_setzero_ps(), \ (__mmask16)-1, (int)(R))) #define _mm512_mask_cvt_roundepi32_ps(W, U, A, R) \ ((__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \ (__v16sf)(__m512)(W), \ (__mmask16)(U), (int)(R))) #define _mm512_maskz_cvt_roundepi32_ps(U, A, R) \ ((__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \ (__v16sf)_mm512_setzero_ps(), \ (__mmask16)(U), (int)(R))) #define _mm512_cvt_roundepu32_ps(A, R) \ ((__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \ (__v16sf)_mm512_setzero_ps(), \ (__mmask16)-1, (int)(R))) #define _mm512_mask_cvt_roundepu32_ps(W, U, A, R) \ ((__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \ (__v16sf)(__m512)(W), \ (__mmask16)(U), (int)(R))) #define _mm512_maskz_cvt_roundepu32_ps(U, A, R) \ ((__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \ (__v16sf)_mm512_setzero_ps(), \ (__mmask16)(U), (int)(R))) static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtepu32_ps (__m512i __A) { return (__m512)__builtin_convertvector((__v16su)__A, __v16sf); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_cvtepu32_ps(__A), (__v16sf)__W); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_cvtepu32_ps(__A), (__v16sf)_mm512_setzero_ps()); } static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_pd(__m256i __A) { return (__m512d)__builtin_convertvector((__v8si)__A, __v8df); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, (__v8df)_mm512_cvtepi32_pd(__A), (__v8df)__W); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, (__v8df)_mm512_cvtepi32_pd(__A), (__v8df)_mm512_setzero_pd()); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtepi32lo_pd(__m512i __A) { return (__m512d) _mm512_cvtepi32_pd(_mm512_castsi512_si256(__A)); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32lo_pd(__m512d __W, __mmask8 __U,__m512i __A) { return (__m512d) _mm512_mask_cvtepi32_pd(__W, __U, _mm512_castsi512_si256(__A)); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_ps (__m512i __A) { return (__m512)__builtin_convertvector((__v16si)__A, __v16sf); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_cvtepi32_ps(__A), (__v16sf)__W); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_cvtepi32_ps(__A), (__v16sf)_mm512_setzero_ps()); } static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtepu32_pd(__m256i __A) { return (__m512d)__builtin_convertvector((__v8su)__A, __v8df); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, (__v8df)_mm512_cvtepu32_pd(__A), (__v8df)__W); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, (__v8df)_mm512_cvtepu32_pd(__A), (__v8df)_mm512_setzero_pd()); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtepu32lo_pd(__m512i __A) { return (__m512d) _mm512_cvtepu32_pd(_mm512_castsi512_si256(__A)); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U,__m512i __A) { return (__m512d) _mm512_mask_cvtepu32_pd(__W, __U, _mm512_castsi512_si256(__A)); } #define _mm512_cvt_roundpd_ps(A, R) \ ((__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \ (__v8sf)_mm256_setzero_ps(), \ (__mmask8)-1, (int)(R))) #define _mm512_mask_cvt_roundpd_ps(W, U, A, R) \ ((__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \ (__v8sf)(__m256)(W), (__mmask8)(U), \ (int)(R))) #define _mm512_maskz_cvt_roundpd_ps(U, A, R) \ ((__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \ (__v8sf)_mm256_setzero_ps(), \ (__mmask8)(U), (int)(R))) static __inline__ __m256 __DEFAULT_FN_ATTRS512 _mm512_cvtpd_ps (__m512d __A) { return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, (__v8sf) _mm256_undefined_ps (), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m256 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A) { return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, (__v8sf) __W, (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m256 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A) { return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, (__v8sf) _mm256_setzero_ps (), (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtpd_pslo (__m512d __A) { return (__m512) __builtin_shufflevector((__v8sf) _mm512_cvtpd_ps(__A), (__v8sf) _mm256_setzero_ps (), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_pslo (__m512 __W, __mmask8 __U,__m512d __A) { return (__m512) __builtin_shufflevector ( (__v8sf) _mm512_mask_cvtpd_ps (_mm512_castps512_ps256(__W), __U, __A), (__v8sf) _mm256_setzero_ps (), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); } #define _mm512_cvt_roundps_ph(A, I) \ ((__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ (__v16hi)_mm256_undefined_si256(), \ (__mmask16)-1)) #define _mm512_mask_cvt_roundps_ph(U, W, A, I) \ ((__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ (__v16hi)(__m256i)(U), \ (__mmask16)(W))) #define _mm512_maskz_cvt_roundps_ph(W, A, I) \ ((__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ (__v16hi)_mm256_setzero_si256(), \ (__mmask16)(W))) #define _mm512_cvtps_ph _mm512_cvt_roundps_ph #define _mm512_mask_cvtps_ph _mm512_mask_cvt_roundps_ph #define _mm512_maskz_cvtps_ph _mm512_maskz_cvt_roundps_ph #define _mm512_cvt_roundph_ps(A, R) \ ((__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \ (__v16sf)_mm512_undefined_ps(), \ (__mmask16)-1, (int)(R))) #define _mm512_mask_cvt_roundph_ps(W, U, A, R) \ ((__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \ (__v16sf)(__m512)(W), \ (__mmask16)(U), (int)(R))) #define _mm512_maskz_cvt_roundph_ps(U, A, R) \ ((__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \ (__v16sf)_mm512_setzero_ps(), \ (__mmask16)(U), (int)(R))) static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtph_ps(__m256i __A) { return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A, (__v16sf) _mm512_setzero_ps (), (__mmask16) -1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A) { return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A, (__v16sf) __W, (__mmask16) __U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A) { return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A, (__v16sf) _mm512_setzero_ps (), (__mmask16) __U, _MM_FROUND_CUR_DIRECTION); } #define _mm512_cvtt_roundpd_epi32(A, R) \ ((__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \ (__v8si)_mm256_setzero_si256(), \ (__mmask8)-1, (int)(R))) #define _mm512_mask_cvtt_roundpd_epi32(W, U, A, R) \ ((__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \ (__v8si)(__m256i)(W), \ (__mmask8)(U), (int)(R))) #define _mm512_maskz_cvtt_roundpd_epi32(U, A, R) \ ((__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \ (__v8si)_mm256_setzero_si256(), \ (__mmask8)(U), (int)(R))) static __inline __m256i __DEFAULT_FN_ATTRS512 _mm512_cvttpd_epi32(__m512d __a) { return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) __a, (__v8si)_mm256_setzero_si256(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A) { return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A, (__v8si) __W, (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A) { return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A, (__v8si) _mm256_setzero_si256 (), (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } #define _mm512_cvtt_roundps_epi32(A, R) \ ((__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \ (__v16si)_mm512_setzero_si512(), \ (__mmask16)-1, (int)(R))) #define _mm512_mask_cvtt_roundps_epi32(W, U, A, R) \ ((__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \ (__v16si)(__m512i)(W), \ (__mmask16)(U), (int)(R))) #define _mm512_maskz_cvtt_roundps_epi32(U, A, R) \ ((__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \ (__v16si)_mm512_setzero_si512(), \ (__mmask16)(U), (int)(R))) static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_cvttps_epi32(__m512 __a) { return (__m512i) __builtin_ia32_cvttps2dq512_mask((__v16sf) __a, (__v16si) _mm512_setzero_si512 (), (__mmask16) -1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A) { return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A, (__v16si) __W, (__mmask16) __U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A) { return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A, (__v16si) _mm512_setzero_si512 (), (__mmask16) __U, _MM_FROUND_CUR_DIRECTION); } #define _mm512_cvt_roundps_epi32(A, R) \ ((__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \ (__v16si)_mm512_setzero_si512(), \ (__mmask16)-1, (int)(R))) #define _mm512_mask_cvt_roundps_epi32(W, U, A, R) \ ((__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \ (__v16si)(__m512i)(W), \ (__mmask16)(U), (int)(R))) #define _mm512_maskz_cvt_roundps_epi32(U, A, R) \ ((__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \ (__v16si)_mm512_setzero_si512(), \ (__mmask16)(U), (int)(R))) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtps_epi32 (__m512 __A) { return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A, (__v16si) _mm512_undefined_epi32 (), (__mmask16) -1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A) { return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A, (__v16si) __W, (__mmask16) __U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A) { return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A, (__v16si) _mm512_setzero_si512 (), (__mmask16) __U, _MM_FROUND_CUR_DIRECTION); } #define _mm512_cvt_roundpd_epi32(A, R) \ ((__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \ (__v8si)_mm256_setzero_si256(), \ (__mmask8)-1, (int)(R))) #define _mm512_mask_cvt_roundpd_epi32(W, U, A, R) \ ((__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \ (__v8si)(__m256i)(W), \ (__mmask8)(U), (int)(R))) #define _mm512_maskz_cvt_roundpd_epi32(U, A, R) \ ((__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \ (__v8si)_mm256_setzero_si256(), \ (__mmask8)(U), (int)(R))) static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtpd_epi32 (__m512d __A) { return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A, (__v8si) _mm256_undefined_si256 (), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A) { return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A, (__v8si) __W, (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A) { return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A, (__v8si) _mm256_setzero_si256 (), (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } #define _mm512_cvt_roundps_epu32(A, R) \ ((__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \ (__v16si)_mm512_setzero_si512(), \ (__mmask16)-1, (int)(R))) #define _mm512_mask_cvt_roundps_epu32(W, U, A, R) \ ((__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \ (__v16si)(__m512i)(W), \ (__mmask16)(U), (int)(R))) #define _mm512_maskz_cvt_roundps_epu32(U, A, R) \ ((__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \ (__v16si)_mm512_setzero_si512(), \ (__mmask16)(U), (int)(R))) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtps_epu32 ( __m512 __A) { return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,\ (__v16si)\ _mm512_undefined_epi32 (), (__mmask16) -1,\ _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A) { return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A, (__v16si) __W, (__mmask16) __U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtps_epu32 ( __mmask16 __U, __m512 __A) { return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A, (__v16si) _mm512_setzero_si512 (), (__mmask16) __U , _MM_FROUND_CUR_DIRECTION); } #define _mm512_cvt_roundpd_epu32(A, R) \ ((__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \ (__v8si)_mm256_setzero_si256(), \ (__mmask8)-1, (int)(R))) #define _mm512_mask_cvt_roundpd_epu32(W, U, A, R) \ ((__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \ (__v8si)(__m256i)(W), \ (__mmask8)(U), (int)(R))) #define _mm512_maskz_cvt_roundpd_epu32(U, A, R) \ ((__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \ (__v8si)_mm256_setzero_si256(), \ (__mmask8)(U), (int)(R))) static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtpd_epu32 (__m512d __A) { return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A, (__v8si) _mm256_undefined_si256 (), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A) { return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A, (__v8si) __W, (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A) { return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A, (__v8si) _mm256_setzero_si256 (), (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_cvtsd_f64(__m512d __a) { return __a[0]; } static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_cvtss_f32(__m512 __a) { return __a[0]; } /* Unpack and Interleave */ static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_unpackhi_pd(__m512d __a, __m512d __b) { return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b, 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, (__v8df)_mm512_unpackhi_pd(__A, __B), (__v8df)__W); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, (__v8df)_mm512_unpackhi_pd(__A, __B), (__v8df)_mm512_setzero_pd()); } static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_unpacklo_pd(__m512d __a, __m512d __b) { return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b, 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, (__v8df)_mm512_unpacklo_pd(__A, __B), (__v8df)__W); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, (__v8df)_mm512_unpacklo_pd(__A, __B), (__v8df)_mm512_setzero_pd()); } static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_unpackhi_ps(__m512 __a, __m512 __b) { return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b, 2, 18, 3, 19, 2+4, 18+4, 3+4, 19+4, 2+8, 18+8, 3+8, 19+8, 2+12, 18+12, 3+12, 19+12); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512((__mmask16) __U, (__v16sf)_mm512_unpackhi_ps(__A, __B), (__v16sf)__W); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512((__mmask16) __U, (__v16sf)_mm512_unpackhi_ps(__A, __B), (__v16sf)_mm512_setzero_ps()); } static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_unpacklo_ps(__m512 __a, __m512 __b) { return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b, 0, 16, 1, 17, 0+4, 16+4, 1+4, 17+4, 0+8, 16+8, 1+8, 17+8, 0+12, 16+12, 1+12, 17+12); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512((__mmask16) __U, (__v16sf)_mm512_unpacklo_ps(__A, __B), (__v16sf)__W); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512((__mmask16) __U, (__v16sf)_mm512_unpacklo_ps(__A, __B), (__v16sf)_mm512_setzero_ps()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_unpackhi_epi32(__m512i __A, __m512i __B) { return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B, 2, 18, 3, 19, 2+4, 18+4, 3+4, 19+4, 2+8, 18+8, 3+8, 19+8, 2+12, 18+12, 3+12, 19+12); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U, (__v16si)_mm512_unpackhi_epi32(__A, __B), (__v16si)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U, (__v16si)_mm512_unpackhi_epi32(__A, __B), (__v16si)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_unpacklo_epi32(__m512i __A, __m512i __B) { return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B, 0, 16, 1, 17, 0+4, 16+4, 1+4, 17+4, 0+8, 16+8, 1+8, 17+8, 0+12, 16+12, 1+12, 17+12); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U, (__v16si)_mm512_unpacklo_epi32(__A, __B), (__v16si)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U, (__v16si)_mm512_unpacklo_epi32(__A, __B), (__v16si)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_unpackhi_epi64(__m512i __A, __m512i __B) { return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B, 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U, (__v8di)_mm512_unpackhi_epi64(__A, __B), (__v8di)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U, (__v8di)_mm512_unpackhi_epi64(__A, __B), (__v8di)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_unpacklo_epi64 (__m512i __A, __m512i __B) { return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B, 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U, (__v8di)_mm512_unpacklo_epi64(__A, __B), (__v8di)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U, (__v8di)_mm512_unpacklo_epi64(__A, __B), (__v8di)_mm512_setzero_si512()); } /* SIMD load ops */ static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_loadu_si512 (void const *__P) { struct __loadu_si512 { __m512i_u __v; } __attribute__((__packed__, __may_alias__)); return ((const struct __loadu_si512*)__P)->__v; } static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_loadu_epi32 (void const *__P) { struct __loadu_epi32 { __m512i_u __v; } __attribute__((__packed__, __may_alias__)); return ((const struct __loadu_epi32*)__P)->__v; } static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P) { return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P, (__v16si) __W, (__mmask16) __U); } static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P) { return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *)__P, (__v16si) _mm512_setzero_si512 (), (__mmask16) __U); } static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_loadu_epi64 (void const *__P) { struct __loadu_epi64 { __m512i_u __v; } __attribute__((__packed__, __may_alias__)); return ((const struct __loadu_epi64*)__P)->__v; } static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P) { return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P, (__v8di) __W, (__mmask8) __U); } static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P) { return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *)__P, (__v8di) _mm512_setzero_si512 (), (__mmask8) __U); } static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P) { return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P, (__v16sf) __W, (__mmask16) __U); } static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_ps(__mmask16 __U, void const *__P) { return (__m512) __builtin_ia32_loadups512_mask ((const float *)__P, (__v16sf) _mm512_setzero_ps (), (__mmask16) __U); } static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P) { return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P, (__v8df) __W, (__mmask8) __U); } static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_pd(__mmask8 __U, void const *__P) { return (__m512d) __builtin_ia32_loadupd512_mask ((const double *)__P, (__v8df) _mm512_setzero_pd (), (__mmask8) __U); } static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_loadu_pd(void const *__p) { struct __loadu_pd { __m512d_u __v; } __attribute__((__packed__, __may_alias__)); return ((const struct __loadu_pd*)__p)->__v; } static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_loadu_ps(void const *__p) { struct __loadu_ps { __m512_u __v; } __attribute__((__packed__, __may_alias__)); return ((const struct __loadu_ps*)__p)->__v; } static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_load_ps(void const *__p) { return *(const __m512*)__p; } static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P) { return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P, (__v16sf) __W, (__mmask16) __U); } static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_load_ps(__mmask16 __U, void const *__P) { return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P, (__v16sf) _mm512_setzero_ps (), (__mmask16) __U); } static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_load_pd(void const *__p) { return *(const __m512d*)__p; } static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P) { return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P, (__v8df) __W, (__mmask8) __U); } static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_load_pd(__mmask8 __U, void const *__P) { return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P, (__v8df) _mm512_setzero_pd (), (__mmask8) __U); } static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_load_si512 (void const *__P) { return *(const __m512i *) __P; } static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_load_epi32 (void const *__P) { return *(const __m512i *) __P; } static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_load_epi64 (void const *__P) { return *(const __m512i *) __P; } /* SIMD store ops */ static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_epi64 (void *__P, __m512i __A) { struct __storeu_epi64 { __m512i_u __v; } __attribute__((__packed__, __may_alias__)); ((struct __storeu_epi64*)__P)->__v = __A; } static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A) { __builtin_ia32_storedqudi512_mask ((long long *)__P, (__v8di) __A, (__mmask8) __U); } static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_si512 (void *__P, __m512i __A) { struct __storeu_si512 { __m512i_u __v; } __attribute__((__packed__, __may_alias__)); ((struct __storeu_si512*)__P)->__v = __A; } static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_epi32 (void *__P, __m512i __A) { struct __storeu_epi32 { __m512i_u __v; } __attribute__((__packed__, __may_alias__)); ((struct __storeu_epi32*)__P)->__v = __A; } static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A) { __builtin_ia32_storedqusi512_mask ((int *)__P, (__v16si) __A, (__mmask16) __U); } static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A) { __builtin_ia32_storeupd512_mask ((double *)__P, (__v8df) __A, (__mmask8) __U); } static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_pd(void *__P, __m512d __A) { struct __storeu_pd { __m512d_u __v; } __attribute__((__packed__, __may_alias__)); ((struct __storeu_pd*)__P)->__v = __A; } static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A) { __builtin_ia32_storeups512_mask ((float *)__P, (__v16sf) __A, (__mmask16) __U); } static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_ps(void *__P, __m512 __A) { struct __storeu_ps { __m512_u __v; } __attribute__((__packed__, __may_alias__)); ((struct __storeu_ps*)__P)->__v = __A; } static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A) { __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U); } static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_pd(void *__P, __m512d __A) { *(__m512d*)__P = __A; } static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A) { __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A, (__mmask16) __U); } static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_ps(void *__P, __m512 __A) { *(__m512*)__P = __A; } static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_si512 (void *__P, __m512i __A) { *(__m512i *) __P = __A; } static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_epi32 (void *__P, __m512i __A) { *(__m512i *) __P = __A; } static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_epi64 (void *__P, __m512i __A) { *(__m512i *) __P = __A; } /* Mask ops */ static __inline __mmask16 __DEFAULT_FN_ATTRS _mm512_knot(__mmask16 __M) { return __builtin_ia32_knothi(__M); } /* Integer compare */ #define _mm512_cmpeq_epi32_mask(A, B) \ _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ) #define _mm512_mask_cmpeq_epi32_mask(k, A, B) \ _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ) #define _mm512_cmpge_epi32_mask(A, B) \ _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GE) #define _mm512_mask_cmpge_epi32_mask(k, A, B) \ _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE) #define _mm512_cmpgt_epi32_mask(A, B) \ _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GT) #define _mm512_mask_cmpgt_epi32_mask(k, A, B) \ _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT) #define _mm512_cmple_epi32_mask(A, B) \ _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LE) #define _mm512_mask_cmple_epi32_mask(k, A, B) \ _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE) #define _mm512_cmplt_epi32_mask(A, B) \ _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LT) #define _mm512_mask_cmplt_epi32_mask(k, A, B) \ _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT) #define _mm512_cmpneq_epi32_mask(A, B) \ _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_NE) #define _mm512_mask_cmpneq_epi32_mask(k, A, B) \ _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE) #define _mm512_cmpeq_epu32_mask(A, B) \ _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ) #define _mm512_mask_cmpeq_epu32_mask(k, A, B) \ _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ) #define _mm512_cmpge_epu32_mask(A, B) \ _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GE) #define _mm512_mask_cmpge_epu32_mask(k, A, B) \ _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE) #define _mm512_cmpgt_epu32_mask(A, B) \ _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GT) #define _mm512_mask_cmpgt_epu32_mask(k, A, B) \ _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT) #define _mm512_cmple_epu32_mask(A, B) \ _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LE) #define _mm512_mask_cmple_epu32_mask(k, A, B) \ _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE) #define _mm512_cmplt_epu32_mask(A, B) \ _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LT) #define _mm512_mask_cmplt_epu32_mask(k, A, B) \ _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT) #define _mm512_cmpneq_epu32_mask(A, B) \ _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_NE) #define _mm512_mask_cmpneq_epu32_mask(k, A, B) \ _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE) #define _mm512_cmpeq_epi64_mask(A, B) \ _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ) #define _mm512_mask_cmpeq_epi64_mask(k, A, B) \ _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ) #define _mm512_cmpge_epi64_mask(A, B) \ _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GE) #define _mm512_mask_cmpge_epi64_mask(k, A, B) \ _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE) #define _mm512_cmpgt_epi64_mask(A, B) \ _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GT) #define _mm512_mask_cmpgt_epi64_mask(k, A, B) \ _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT) #define _mm512_cmple_epi64_mask(A, B) \ _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LE) #define _mm512_mask_cmple_epi64_mask(k, A, B) \ _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE) #define _mm512_cmplt_epi64_mask(A, B) \ _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LT) #define _mm512_mask_cmplt_epi64_mask(k, A, B) \ _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT) #define _mm512_cmpneq_epi64_mask(A, B) \ _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_NE) #define _mm512_mask_cmpneq_epi64_mask(k, A, B) \ _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE) #define _mm512_cmpeq_epu64_mask(A, B) \ _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ) #define _mm512_mask_cmpeq_epu64_mask(k, A, B) \ _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ) #define _mm512_cmpge_epu64_mask(A, B) \ _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GE) #define _mm512_mask_cmpge_epu64_mask(k, A, B) \ _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE) #define _mm512_cmpgt_epu64_mask(A, B) \ _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GT) #define _mm512_mask_cmpgt_epu64_mask(k, A, B) \ _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT) #define _mm512_cmple_epu64_mask(A, B) \ _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LE) #define _mm512_mask_cmple_epu64_mask(k, A, B) \ _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE) #define _mm512_cmplt_epu64_mask(A, B) \ _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LT) #define _mm512_mask_cmplt_epu64_mask(k, A, B) \ _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT) #define _mm512_cmpneq_epu64_mask(A, B) \ _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_NE) #define _mm512_mask_cmpneq_epu64_mask(k, A, B) \ _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepi8_epi32(__m128i __A) { /* This function always performs a signed extension, but __v16qi is a char which may be signed or unsigned, so use __v16qs. */ return (__m512i)__builtin_convertvector((__v16qs)__A, __v16si); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi8_epi32(__m512i __W, __mmask16 __U, __m128i __A) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_cvtepi8_epi32(__A), (__v16si)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi8_epi32(__mmask16 __U, __m128i __A) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_cvtepi8_epi32(__A), (__v16si)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepi8_epi64(__m128i __A) { /* This function always performs a signed extension, but __v16qi is a char which may be signed or unsigned, so use __v16qs. */ return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__A, (__v16qs)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi8_epi64(__m512i __W, __mmask8 __U, __m128i __A) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_cvtepi8_epi64(__A), (__v8di)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_cvtepi8_epi64(__A), (__v8di)_mm512_setzero_si512 ()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_epi64(__m256i __X) { return (__m512i)__builtin_convertvector((__v8si)__X, __v8di); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_epi64(__m512i __W, __mmask8 __U, __m256i __X) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_cvtepi32_epi64(__X), (__v8di)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_epi64(__mmask8 __U, __m256i __X) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_cvtepi32_epi64(__X), (__v8di)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepi16_epi32(__m256i __A) { return (__m512i)__builtin_convertvector((__v16hi)__A, __v16si); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi16_epi32(__m512i __W, __mmask16 __U, __m256i __A) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_cvtepi16_epi32(__A), (__v16si)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi16_epi32(__mmask16 __U, __m256i __A) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_cvtepi16_epi32(__A), (__v16si)_mm512_setzero_si512 ()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepi16_epi64(__m128i __A) { return (__m512i)__builtin_convertvector((__v8hi)__A, __v8di); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi16_epi64(__m512i __W, __mmask8 __U, __m128i __A) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_cvtepi16_epi64(__A), (__v8di)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_cvtepi16_epi64(__A), (__v8di)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepu8_epi32(__m128i __A) { return (__m512i)__builtin_convertvector((__v16qu)__A, __v16si); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu8_epi32(__m512i __W, __mmask16 __U, __m128i __A) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_cvtepu8_epi32(__A), (__v16si)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu8_epi32(__mmask16 __U, __m128i __A) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_cvtepu8_epi32(__A), (__v16si)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepu8_epi64(__m128i __A) { return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__A, (__v16qu)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu8_epi64(__m512i __W, __mmask8 __U, __m128i __A) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_cvtepu8_epi64(__A), (__v8di)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_cvtepu8_epi64(__A), (__v8di)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepu32_epi64(__m256i __X) { return (__m512i)__builtin_convertvector((__v8su)__X, __v8di); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_cvtepu32_epi64(__X), (__v8di)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu32_epi64(__mmask8 __U, __m256i __X) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_cvtepu32_epi64(__X), (__v8di)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepu16_epi32(__m256i __A) { return (__m512i)__builtin_convertvector((__v16hu)__A, __v16si); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_cvtepu16_epi32(__A), (__v16si)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu16_epi32(__mmask16 __U, __m256i __A) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_cvtepu16_epi32(__A), (__v16si)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepu16_epi64(__m128i __A) { return (__m512i)__builtin_convertvector((__v8hu)__A, __v8di); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_cvtepu16_epi64(__A), (__v8di)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_cvtepu16_epi64(__A), (__v8di)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_rorv_epi32 (__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_prorvd512((__v16si)__A, (__v16si)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512(__U, (__v16si)_mm512_rorv_epi32(__A, __B), (__v16si)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512(__U, (__v16si)_mm512_rorv_epi32(__A, __B), (__v16si)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_rorv_epi64 (__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_prorvq512((__v8di)__A, (__v8di)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512(__U, (__v8di)_mm512_rorv_epi64(__A, __B), (__v8di)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512(__U, (__v8di)_mm512_rorv_epi64(__A, __B), (__v8di)_mm512_setzero_si512()); } #define _mm512_cmp_epi32_mask(a, b, p) \ ((__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \ (__v16si)(__m512i)(b), (int)(p), \ (__mmask16)-1)) #define _mm512_cmp_epu32_mask(a, b, p) \ ((__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \ (__v16si)(__m512i)(b), (int)(p), \ (__mmask16)-1)) #define _mm512_cmp_epi64_mask(a, b, p) \ ((__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \ (__v8di)(__m512i)(b), (int)(p), \ (__mmask8)-1)) #define _mm512_cmp_epu64_mask(a, b, p) \ ((__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \ (__v8di)(__m512i)(b), (int)(p), \ (__mmask8)-1)) #define _mm512_mask_cmp_epi32_mask(m, a, b, p) \ ((__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \ (__v16si)(__m512i)(b), (int)(p), \ (__mmask16)(m))) #define _mm512_mask_cmp_epu32_mask(m, a, b, p) \ ((__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \ (__v16si)(__m512i)(b), (int)(p), \ (__mmask16)(m))) #define _mm512_mask_cmp_epi64_mask(m, a, b, p) \ ((__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \ (__v8di)(__m512i)(b), (int)(p), \ (__mmask8)(m))) #define _mm512_mask_cmp_epu64_mask(m, a, b, p) \ ((__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \ (__v8di)(__m512i)(b), (int)(p), \ (__mmask8)(m))) #define _mm512_rol_epi32(a, b) \ ((__m512i)__builtin_ia32_prold512((__v16si)(__m512i)(a), (int)(b))) #define _mm512_mask_rol_epi32(W, U, a, b) \ ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ (__v16si)_mm512_rol_epi32((a), (b)), \ (__v16si)(__m512i)(W))) #define _mm512_maskz_rol_epi32(U, a, b) \ ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ (__v16si)_mm512_rol_epi32((a), (b)), \ (__v16si)_mm512_setzero_si512())) #define _mm512_rol_epi64(a, b) \ ((__m512i)__builtin_ia32_prolq512((__v8di)(__m512i)(a), (int)(b))) #define _mm512_mask_rol_epi64(W, U, a, b) \ ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ (__v8di)_mm512_rol_epi64((a), (b)), \ (__v8di)(__m512i)(W))) #define _mm512_maskz_rol_epi64(U, a, b) \ ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ (__v8di)_mm512_rol_epi64((a), (b)), \ (__v8di)_mm512_setzero_si512())) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_rolv_epi32 (__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_prolvd512((__v16si)__A, (__v16si)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512(__U, (__v16si)_mm512_rolv_epi32(__A, __B), (__v16si)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512(__U, (__v16si)_mm512_rolv_epi32(__A, __B), (__v16si)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_rolv_epi64 (__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_prolvq512((__v8di)__A, (__v8di)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512(__U, (__v8di)_mm512_rolv_epi64(__A, __B), (__v8di)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512(__U, (__v8di)_mm512_rolv_epi64(__A, __B), (__v8di)_mm512_setzero_si512()); } #define _mm512_ror_epi32(A, B) \ ((__m512i)__builtin_ia32_prord512((__v16si)(__m512i)(A), (int)(B))) #define _mm512_mask_ror_epi32(W, U, A, B) \ ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ (__v16si)_mm512_ror_epi32((A), (B)), \ (__v16si)(__m512i)(W))) #define _mm512_maskz_ror_epi32(U, A, B) \ ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ (__v16si)_mm512_ror_epi32((A), (B)), \ (__v16si)_mm512_setzero_si512())) #define _mm512_ror_epi64(A, B) \ ((__m512i)__builtin_ia32_prorq512((__v8di)(__m512i)(A), (int)(B))) #define _mm512_mask_ror_epi64(W, U, A, B) \ ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ (__v8di)_mm512_ror_epi64((A), (B)), \ (__v8di)(__m512i)(W))) #define _mm512_maskz_ror_epi64(U, A, B) \ ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ (__v8di)_mm512_ror_epi64((A), (B)), \ (__v8di)_mm512_setzero_si512())) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_slli_epi32(__m512i __A, unsigned int __B) { return (__m512i)__builtin_ia32_pslldi512((__v16si)__A, (int)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_slli_epi32(__m512i __W, __mmask16 __U, __m512i __A, unsigned int __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_slli_epi32(__A, __B), (__v16si)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_slli_epi32(__mmask16 __U, __m512i __A, unsigned int __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_slli_epi32(__A, __B), (__v16si)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_slli_epi64(__m512i __A, unsigned int __B) { return (__m512i)__builtin_ia32_psllqi512((__v8di)__A, (int)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_slli_epi64(__A, __B), (__v8di)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A, unsigned int __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_slli_epi64(__A, __B), (__v8di)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srli_epi32(__m512i __A, unsigned int __B) { return (__m512i)__builtin_ia32_psrldi512((__v16si)__A, (int)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srli_epi32(__m512i __W, __mmask16 __U, __m512i __A, unsigned int __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_srli_epi32(__A, __B), (__v16si)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srli_epi32(__mmask16 __U, __m512i __A, unsigned int __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_srli_epi32(__A, __B), (__v16si)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srli_epi64(__m512i __A, unsigned int __B) { return (__m512i)__builtin_ia32_psrlqi512((__v8di)__A, (int)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srli_epi64(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_srli_epi64(__A, __B), (__v8di)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srli_epi64(__mmask8 __U, __m512i __A, unsigned int __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_srli_epi64(__A, __B), (__v8di)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P) { return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P, (__v16si) __W, (__mmask16) __U); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_load_epi32 (__mmask16 __U, void const *__P) { return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P, (__v16si) _mm512_setzero_si512 (), (__mmask16) __U); } static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A) { __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A, (__mmask16) __U); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A) { return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U, (__v16si) __A, (__v16si) __W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A) { return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U, (__v16si) __A, (__v16si) _mm512_setzero_si512 ()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A) { return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U, (__v8di) __A, (__v8di) __W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A) { return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U, (__v8di) __A, (__v8di) _mm512_setzero_si512 ()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P) { return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P, (__v8di) __W, (__mmask8) __U); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_load_epi64 (__mmask8 __U, void const *__P) { return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P, (__v8di) _mm512_setzero_si512 (), (__mmask8) __U); } static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A) { __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A, (__mmask8) __U); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_movedup_pd (__m512d __A) { return (__m512d)__builtin_shufflevector((__v8df)__A, (__v8df)__A, 0, 0, 2, 2, 4, 4, 6, 6); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_movedup_pd(__A), (__v8df)__W); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_movedup_pd(__A), (__v8df)_mm512_setzero_pd()); } #define _mm512_fixupimm_round_pd(A, B, C, imm, R) \ ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8di)(__m512i)(C), (int)(imm), \ (__mmask8)-1, (int)(R))) #define _mm512_mask_fixupimm_round_pd(A, U, B, C, imm, R) \ ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8di)(__m512i)(C), (int)(imm), \ (__mmask8)(U), (int)(R))) #define _mm512_fixupimm_pd(A, B, C, imm) \ ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8di)(__m512i)(C), (int)(imm), \ (__mmask8)-1, \ _MM_FROUND_CUR_DIRECTION)) #define _mm512_mask_fixupimm_pd(A, U, B, C, imm) \ ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8di)(__m512i)(C), (int)(imm), \ (__mmask8)(U), \ _MM_FROUND_CUR_DIRECTION)) #define _mm512_maskz_fixupimm_round_pd(U, A, B, C, imm, R) \ ((__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8di)(__m512i)(C), \ (int)(imm), (__mmask8)(U), \ (int)(R))) #define _mm512_maskz_fixupimm_pd(U, A, B, C, imm) \ ((__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8di)(__m512i)(C), \ (int)(imm), (__mmask8)(U), \ _MM_FROUND_CUR_DIRECTION)) #define _mm512_fixupimm_round_ps(A, B, C, imm, R) \ ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16si)(__m512i)(C), (int)(imm), \ (__mmask16)-1, (int)(R))) #define _mm512_mask_fixupimm_round_ps(A, U, B, C, imm, R) \ ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16si)(__m512i)(C), (int)(imm), \ (__mmask16)(U), (int)(R))) #define _mm512_fixupimm_ps(A, B, C, imm) \ ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16si)(__m512i)(C), (int)(imm), \ (__mmask16)-1, \ _MM_FROUND_CUR_DIRECTION)) #define _mm512_mask_fixupimm_ps(A, U, B, C, imm) \ ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16si)(__m512i)(C), (int)(imm), \ (__mmask16)(U), \ _MM_FROUND_CUR_DIRECTION)) #define _mm512_maskz_fixupimm_round_ps(U, A, B, C, imm, R) \ ((__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16si)(__m512i)(C), \ (int)(imm), (__mmask16)(U), \ (int)(R))) #define _mm512_maskz_fixupimm_ps(U, A, B, C, imm) \ ((__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16si)(__m512i)(C), \ (int)(imm), (__mmask16)(U), \ _MM_FROUND_CUR_DIRECTION)) #define _mm_fixupimm_round_sd(A, B, C, imm, R) \ ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2di)(__m128i)(C), (int)(imm), \ (__mmask8)-1, (int)(R))) #define _mm_mask_fixupimm_round_sd(A, U, B, C, imm, R) \ ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2di)(__m128i)(C), (int)(imm), \ (__mmask8)(U), (int)(R))) #define _mm_fixupimm_sd(A, B, C, imm) \ ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2di)(__m128i)(C), (int)(imm), \ (__mmask8)-1, \ _MM_FROUND_CUR_DIRECTION)) #define _mm_mask_fixupimm_sd(A, U, B, C, imm) \ ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2di)(__m128i)(C), (int)(imm), \ (__mmask8)(U), \ _MM_FROUND_CUR_DIRECTION)) #define _mm_maskz_fixupimm_round_sd(U, A, B, C, imm, R) \ ((__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2di)(__m128i)(C), (int)(imm), \ (__mmask8)(U), (int)(R))) #define _mm_maskz_fixupimm_sd(U, A, B, C, imm) \ ((__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2di)(__m128i)(C), (int)(imm), \ (__mmask8)(U), \ _MM_FROUND_CUR_DIRECTION)) #define _mm_fixupimm_round_ss(A, B, C, imm, R) \ ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4si)(__m128i)(C), (int)(imm), \ (__mmask8)-1, (int)(R))) #define _mm_mask_fixupimm_round_ss(A, U, B, C, imm, R) \ ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4si)(__m128i)(C), (int)(imm), \ (__mmask8)(U), (int)(R))) #define _mm_fixupimm_ss(A, B, C, imm) \ ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4si)(__m128i)(C), (int)(imm), \ (__mmask8)-1, \ _MM_FROUND_CUR_DIRECTION)) #define _mm_mask_fixupimm_ss(A, U, B, C, imm) \ ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4si)(__m128i)(C), (int)(imm), \ (__mmask8)(U), \ _MM_FROUND_CUR_DIRECTION)) #define _mm_maskz_fixupimm_round_ss(U, A, B, C, imm, R) \ ((__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4si)(__m128i)(C), (int)(imm), \ (__mmask8)(U), (int)(R))) #define _mm_maskz_fixupimm_ss(U, A, B, C, imm) \ ((__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4si)(__m128i)(C), (int)(imm), \ (__mmask8)(U), \ _MM_FROUND_CUR_DIRECTION)) #define _mm_getexp_round_sd(A, B, R) \ ((__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)-1, (int)(R))) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_getexp_sd (__m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_getexpsd128_round_mask ((__v2df) __A, (__v2df) __B, (__v2df) _mm_setzero_pd(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A, (__v2df) __B, (__v2df) __W, (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } #define _mm_mask_getexp_round_sd(W, U, A, B, R) \ ((__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)(__m128d)(W), \ (__mmask8)(U), (int)(R))) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A, (__v2df) __B, (__v2df) _mm_setzero_pd (), (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } #define _mm_maskz_getexp_round_sd(U, A, B, R) \ ((__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)(U), (int)(R))) #define _mm_getexp_round_ss(A, B, R) \ ((__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)-1, (int)(R))) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_getexp_ss (__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A, (__v4sf) __B, (__v4sf) _mm_setzero_ps(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_getexp_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A, (__v4sf) __B, (__v4sf) __W, (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } #define _mm_mask_getexp_round_ss(W, U, A, B, R) \ ((__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)(__m128)(W), \ (__mmask8)(U), (int)(R))) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B) { return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A, (__v4sf) __B, (__v4sf) _mm_setzero_ps (), (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } #define _mm_maskz_getexp_round_ss(U, A, B, R) \ ((__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U), (int)(R))) #define _mm_getmant_round_sd(A, B, C, D, R) \ ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (int)(((D)<<2) | (C)), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)-1, (int)(R))) #define _mm_getmant_sd(A, B, C, D) \ ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (int)(((D)<<2) | (C)), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)-1, \ _MM_FROUND_CUR_DIRECTION)) #define _mm_mask_getmant_sd(W, U, A, B, C, D) \ ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (int)(((D)<<2) | (C)), \ (__v2df)(__m128d)(W), \ (__mmask8)(U), \ _MM_FROUND_CUR_DIRECTION)) #define _mm_mask_getmant_round_sd(W, U, A, B, C, D, R) \ ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (int)(((D)<<2) | (C)), \ (__v2df)(__m128d)(W), \ (__mmask8)(U), (int)(R))) #define _mm_maskz_getmant_sd(U, A, B, C, D) \ ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (int)(((D)<<2) | (C)), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)(U), \ _MM_FROUND_CUR_DIRECTION)) #define _mm_maskz_getmant_round_sd(U, A, B, C, D, R) \ ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (int)(((D)<<2) | (C)), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)(U), (int)(R))) #define _mm_getmant_round_ss(A, B, C, D, R) \ ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (int)(((D)<<2) | (C)), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)-1, (int)(R))) #define _mm_getmant_ss(A, B, C, D) \ ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (int)(((D)<<2) | (C)), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)-1, \ _MM_FROUND_CUR_DIRECTION)) #define _mm_mask_getmant_ss(W, U, A, B, C, D) \ ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (int)(((D)<<2) | (C)), \ (__v4sf)(__m128)(W), \ (__mmask8)(U), \ _MM_FROUND_CUR_DIRECTION)) #define _mm_mask_getmant_round_ss(W, U, A, B, C, D, R) \ ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (int)(((D)<<2) | (C)), \ (__v4sf)(__m128)(W), \ (__mmask8)(U), (int)(R))) #define _mm_maskz_getmant_ss(U, A, B, C, D) \ ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (int)(((D)<<2) | (C)), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U), \ _MM_FROUND_CUR_DIRECTION)) #define _mm_maskz_getmant_round_ss(U, A, B, C, D, R) \ ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (int)(((D)<<2) | (C)), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U), (int)(R))) static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kmov (__mmask16 __A) { return __A; } #define _mm_comi_round_sd(A, B, P, R) \ ((int)__builtin_ia32_vcomisd((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), \ (int)(P), (int)(R))) #define _mm_comi_round_ss(A, B, P, R) \ ((int)__builtin_ia32_vcomiss((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \ (int)(P), (int)(R))) #ifdef __x86_64__ #define _mm_cvt_roundsd_si64(A, R) \ ((long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R))) #endif static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sll_epi32(__m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_pslld512((__v16si) __A, (__v4si)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_sll_epi32(__A, __B), (__v16si)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_sll_epi32(__A, __B), (__v16si)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sll_epi64(__m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_psllq512((__v8di)__A, (__v2di)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_sll_epi64(__A, __B), (__v8di)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_sll_epi64(__A, __B), (__v8di)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sllv_epi32(__m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_psllv16si((__v16si)__X, (__v16si)__Y); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_sllv_epi32(__X, __Y), (__v16si)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_sllv_epi32(__X, __Y), (__v16si)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sllv_epi64(__m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_psllv8di((__v8di)__X, (__v8di)__Y); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_sllv_epi64(__X, __Y), (__v8di)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_sllv_epi64(__X, __Y), (__v8di)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sra_epi32(__m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_psrad512((__v16si) __A, (__v4si)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_sra_epi32(__A, __B), (__v16si)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_sra_epi32(__A, __B), (__v16si)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sra_epi64(__m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_psraq512((__v8di)__A, (__v2di)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_sra_epi64(__A, __B), (__v8di)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_sra_epi64(__A, __B), (__v8di)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srav_epi32(__m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_psrav16si((__v16si)__X, (__v16si)__Y); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_srav_epi32(__X, __Y), (__v16si)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_srav_epi32(__X, __Y), (__v16si)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srav_epi64(__m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_psrav8di((__v8di)__X, (__v8di)__Y); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_srav_epi64(__X, __Y), (__v8di)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_srav_epi64(__X, __Y), (__v8di)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srl_epi32(__m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_psrld512((__v16si) __A, (__v4si)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_srl_epi32(__A, __B), (__v16si)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srl_epi32(__mmask16 __U, __m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_srl_epi32(__A, __B), (__v16si)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srl_epi64(__m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_psrlq512((__v8di)__A, (__v2di)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_srl_epi64(__A, __B), (__v8di)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_srl_epi64(__A, __B), (__v8di)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srlv_epi32(__m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_psrlv16si((__v16si)__X, (__v16si)__Y); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_srlv_epi32(__X, __Y), (__v16si)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_srlv_epi32(__X, __Y), (__v16si)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srlv_epi64 (__m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_psrlv8di((__v8di)__X, (__v8di)__Y); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_srlv_epi64(__X, __Y), (__v8di)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_srlv_epi64(__X, __Y), (__v8di)_mm512_setzero_si512()); } /// \enum _MM_TERNLOG_ENUM /// A helper to represent the ternary logic operations among vector \a A, /// \a B and \a C. The representation is passed to \a imm. typedef enum { _MM_TERNLOG_A = 0xF0, _MM_TERNLOG_B = 0xCC, _MM_TERNLOG_C = 0xAA } _MM_TERNLOG_ENUM; #define _mm512_ternarylogic_epi32(A, B, C, imm) \ ((__m512i)__builtin_ia32_pternlogd512_mask( \ (__v16si)(__m512i)(A), (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), \ (unsigned char)(imm), (__mmask16)-1)) #define _mm512_mask_ternarylogic_epi32(A, U, B, C, imm) \ ((__m512i)__builtin_ia32_pternlogd512_mask( \ (__v16si)(__m512i)(A), (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), \ (unsigned char)(imm), (__mmask16)(U))) #define _mm512_maskz_ternarylogic_epi32(U, A, B, C, imm) \ ((__m512i)__builtin_ia32_pternlogd512_maskz( \ (__v16si)(__m512i)(A), (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), \ (unsigned char)(imm), (__mmask16)(U))) #define _mm512_ternarylogic_epi64(A, B, C, imm) \ ((__m512i)__builtin_ia32_pternlogq512_mask( \ (__v8di)(__m512i)(A), (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), \ (unsigned char)(imm), (__mmask8)-1)) #define _mm512_mask_ternarylogic_epi64(A, U, B, C, imm) \ ((__m512i)__builtin_ia32_pternlogq512_mask( \ (__v8di)(__m512i)(A), (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), \ (unsigned char)(imm), (__mmask8)(U))) #define _mm512_maskz_ternarylogic_epi64(U, A, B, C, imm) \ ((__m512i)__builtin_ia32_pternlogq512_maskz( \ (__v8di)(__m512i)(A), (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), \ (unsigned char)(imm), (__mmask8)(U))) #ifdef __x86_64__ #define _mm_cvt_roundsd_i64(A, R) \ ((long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R))) #endif #define _mm_cvt_roundsd_si32(A, R) \ ((int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R))) #define _mm_cvt_roundsd_i32(A, R) \ ((int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R))) #define _mm_cvt_roundsd_u32(A, R) \ ((unsigned int)__builtin_ia32_vcvtsd2usi32((__v2df)(__m128d)(A), (int)(R))) static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvtsd_u32 (__m128d __A) { return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A, _MM_FROUND_CUR_DIRECTION); } #ifdef __x86_64__ #define _mm_cvt_roundsd_u64(A, R) \ ((unsigned long long)__builtin_ia32_vcvtsd2usi64((__v2df)(__m128d)(A), \ (int)(R))) static __inline__ unsigned long long __DEFAULT_FN_ATTRS128 _mm_cvtsd_u64 (__m128d __A) { return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df) __A, _MM_FROUND_CUR_DIRECTION); } #endif #define _mm_cvt_roundss_si32(A, R) \ ((int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R))) #define _mm_cvt_roundss_i32(A, R) \ ((int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R))) #ifdef __x86_64__ #define _mm_cvt_roundss_si64(A, R) \ ((long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R))) #define _mm_cvt_roundss_i64(A, R) \ ((long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R))) #endif #define _mm_cvt_roundss_u32(A, R) \ ((unsigned int)__builtin_ia32_vcvtss2usi32((__v4sf)(__m128)(A), (int)(R))) static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvtss_u32 (__m128 __A) { return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A, _MM_FROUND_CUR_DIRECTION); } #ifdef __x86_64__ #define _mm_cvt_roundss_u64(A, R) \ ((unsigned long long)__builtin_ia32_vcvtss2usi64((__v4sf)(__m128)(A), \ (int)(R))) static __inline__ unsigned long long __DEFAULT_FN_ATTRS128 _mm_cvtss_u64 (__m128 __A) { return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf) __A, _MM_FROUND_CUR_DIRECTION); } #endif #define _mm_cvtt_roundsd_i32(A, R) \ ((int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R))) #define _mm_cvtt_roundsd_si32(A, R) \ ((int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R))) static __inline__ int __DEFAULT_FN_ATTRS128 _mm_cvttsd_i32 (__m128d __A) { return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, _MM_FROUND_CUR_DIRECTION); } #ifdef __x86_64__ #define _mm_cvtt_roundsd_si64(A, R) \ ((long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R))) #define _mm_cvtt_roundsd_i64(A, R) \ ((long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R))) static __inline__ long long __DEFAULT_FN_ATTRS128 _mm_cvttsd_i64 (__m128d __A) { return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, _MM_FROUND_CUR_DIRECTION); } #endif #define _mm_cvtt_roundsd_u32(A, R) \ ((unsigned int)__builtin_ia32_vcvttsd2usi32((__v2df)(__m128d)(A), (int)(R))) static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvttsd_u32 (__m128d __A) { return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A, _MM_FROUND_CUR_DIRECTION); } #ifdef __x86_64__ #define _mm_cvtt_roundsd_u64(A, R) \ ((unsigned long long)__builtin_ia32_vcvttsd2usi64((__v2df)(__m128d)(A), \ (int)(R))) static __inline__ unsigned long long __DEFAULT_FN_ATTRS128 _mm_cvttsd_u64 (__m128d __A) { return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df) __A, _MM_FROUND_CUR_DIRECTION); } #endif #define _mm_cvtt_roundss_i32(A, R) \ ((int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R))) #define _mm_cvtt_roundss_si32(A, R) \ ((int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R))) static __inline__ int __DEFAULT_FN_ATTRS128 _mm_cvttss_i32 (__m128 __A) { return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, _MM_FROUND_CUR_DIRECTION); } #ifdef __x86_64__ #define _mm_cvtt_roundss_i64(A, R) \ ((long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R))) #define _mm_cvtt_roundss_si64(A, R) \ ((long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R))) static __inline__ long long __DEFAULT_FN_ATTRS128 _mm_cvttss_i64 (__m128 __A) { return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, _MM_FROUND_CUR_DIRECTION); } #endif #define _mm_cvtt_roundss_u32(A, R) \ ((unsigned int)__builtin_ia32_vcvttss2usi32((__v4sf)(__m128)(A), (int)(R))) static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvttss_u32 (__m128 __A) { return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A, _MM_FROUND_CUR_DIRECTION); } #ifdef __x86_64__ #define _mm_cvtt_roundss_u64(A, R) \ ((unsigned long long)__builtin_ia32_vcvttss2usi64((__v4sf)(__m128)(A), \ (int)(R))) static __inline__ unsigned long long __DEFAULT_FN_ATTRS128 _mm_cvttss_u64 (__m128 __A) { return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf) __A, _MM_FROUND_CUR_DIRECTION); } #endif #define _mm512_permute_pd(X, C) \ ((__m512d)__builtin_ia32_vpermilpd512((__v8df)(__m512d)(X), (int)(C))) #define _mm512_mask_permute_pd(W, U, X, C) \ ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_permute_pd((X), (C)), \ (__v8df)(__m512d)(W))) #define _mm512_maskz_permute_pd(U, X, C) \ ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_permute_pd((X), (C)), \ (__v8df)_mm512_setzero_pd())) #define _mm512_permute_ps(X, C) \ ((__m512)__builtin_ia32_vpermilps512((__v16sf)(__m512)(X), (int)(C))) #define _mm512_mask_permute_ps(W, U, X, C) \ ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_permute_ps((X), (C)), \ (__v16sf)(__m512)(W))) #define _mm512_maskz_permute_ps(U, X, C) \ ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_permute_ps((X), (C)), \ (__v16sf)_mm512_setzero_ps())) static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_permutevar_pd(__m512d __A, __m512i __C) { return (__m512d)__builtin_ia32_vpermilvarpd512((__v8df)__A, (__v8di)__C); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_permutevar_pd(__A, __C), (__v8df)__W); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_permutevar_pd(__A, __C), (__v8df)_mm512_setzero_pd()); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_permutevar_ps(__m512 __A, __m512i __C) { return (__m512)__builtin_ia32_vpermilvarps512((__v16sf)__A, (__v16si)__C); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_permutevar_ps(__A, __C), (__v16sf)__W); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_permutevar_ps(__A, __C), (__v16sf)_mm512_setzero_ps()); } static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B) { return (__m512d)__builtin_ia32_vpermi2varpd512((__v8df)__A, (__v8di)__I, (__v8df)__B); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_permutex2var_pd(__m512d __A, __mmask8 __U, __m512i __I, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512(__U, (__v8df)_mm512_permutex2var_pd(__A, __I, __B), (__v8df)__A); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask2_permutex2var_pd(__m512d __A, __m512i __I, __mmask8 __U, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512(__U, (__v8df)_mm512_permutex2var_pd(__A, __I, __B), (__v8df)(__m512d)__I); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_permutex2var_pd(__mmask8 __U, __m512d __A, __m512i __I, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512(__U, (__v8df)_mm512_permutex2var_pd(__A, __I, __B), (__v8df)_mm512_setzero_pd()); } static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B) { return (__m512)__builtin_ia32_vpermi2varps512((__v16sf)__A, (__v16si)__I, (__v16sf) __B); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_permutex2var_ps(__m512 __A, __mmask16 __U, __m512i __I, __m512 __B) { return (__m512)__builtin_ia32_selectps_512(__U, (__v16sf)_mm512_permutex2var_ps(__A, __I, __B), (__v16sf)__A); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask2_permutex2var_ps(__m512 __A, __m512i __I, __mmask16 __U, __m512 __B) { return (__m512)__builtin_ia32_selectps_512(__U, (__v16sf)_mm512_permutex2var_ps(__A, __I, __B), (__v16sf)(__m512)__I); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_permutex2var_ps(__mmask16 __U, __m512 __A, __m512i __I, __m512 __B) { return (__m512)__builtin_ia32_selectps_512(__U, (__v16sf)_mm512_permutex2var_ps(__A, __I, __B), (__v16sf)_mm512_setzero_ps()); } #define _mm512_cvtt_roundpd_epu32(A, R) \ ((__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \ (__v8si)_mm256_undefined_si256(), \ (__mmask8)-1, (int)(R))) #define _mm512_mask_cvtt_roundpd_epu32(W, U, A, R) \ ((__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \ (__v8si)(__m256i)(W), \ (__mmask8)(U), (int)(R))) #define _mm512_maskz_cvtt_roundpd_epu32(U, A, R) \ ((__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \ (__v8si)_mm256_setzero_si256(), \ (__mmask8)(U), (int)(R))) static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvttpd_epu32 (__m512d __A) { return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A, (__v8si) _mm256_undefined_si256 (), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A) { return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A, (__v8si) __W, (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A) { return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A, (__v8si) _mm256_setzero_si256 (), (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } #define _mm_roundscale_round_sd(A, B, imm, R) \ ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)-1, (int)(imm), \ (int)(R))) #define _mm_roundscale_sd(A, B, imm) \ ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)-1, (int)(imm), \ _MM_FROUND_CUR_DIRECTION)) #define _mm_mask_roundscale_sd(W, U, A, B, imm) \ ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)(__m128d)(W), \ (__mmask8)(U), (int)(imm), \ _MM_FROUND_CUR_DIRECTION)) #define _mm_mask_roundscale_round_sd(W, U, A, B, I, R) \ ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)(__m128d)(W), \ (__mmask8)(U), (int)(I), \ (int)(R))) #define _mm_maskz_roundscale_sd(U, A, B, I) \ ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)(U), (int)(I), \ _MM_FROUND_CUR_DIRECTION)) #define _mm_maskz_roundscale_round_sd(U, A, B, I, R) \ ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)(U), (int)(I), \ (int)(R))) #define _mm_roundscale_round_ss(A, B, imm, R) \ ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)-1, (int)(imm), \ (int)(R))) #define _mm_roundscale_ss(A, B, imm) \ ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)-1, (int)(imm), \ _MM_FROUND_CUR_DIRECTION)) #define _mm_mask_roundscale_ss(W, U, A, B, I) \ ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)(__m128)(W), \ (__mmask8)(U), (int)(I), \ _MM_FROUND_CUR_DIRECTION)) #define _mm_mask_roundscale_round_ss(W, U, A, B, I, R) \ ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)(__m128)(W), \ (__mmask8)(U), (int)(I), \ (int)(R))) #define _mm_maskz_roundscale_ss(U, A, B, I) \ ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U), (int)(I), \ _MM_FROUND_CUR_DIRECTION)) #define _mm_maskz_roundscale_round_ss(U, A, B, I, R) \ ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U), (int)(I), \ (int)(R))) #define _mm512_scalef_round_pd(A, B, R) \ ((__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8df)_mm512_undefined_pd(), \ (__mmask8)-1, (int)(R))) #define _mm512_mask_scalef_round_pd(W, U, A, B, R) \ ((__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8df)(__m512d)(W), \ (__mmask8)(U), (int)(R))) #define _mm512_maskz_scalef_round_pd(U, A, B, R) \ ((__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ (__v8df)_mm512_setzero_pd(), \ (__mmask8)(U), (int)(R))) static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_scalef_pd (__m512d __A, __m512d __B) { return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A, (__v8df) __B, (__v8df) _mm512_undefined_pd (), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A, (__v8df) __B, (__v8df) __W, (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B) { return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A, (__v8df) __B, (__v8df) _mm512_setzero_pd (), (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } #define _mm512_scalef_round_ps(A, B, R) \ ((__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16sf)_mm512_undefined_ps(), \ (__mmask16)-1, (int)(R))) #define _mm512_mask_scalef_round_ps(W, U, A, B, R) \ ((__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16sf)(__m512)(W), \ (__mmask16)(U), (int)(R))) #define _mm512_maskz_scalef_round_ps(U, A, B, R) \ ((__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16sf)_mm512_setzero_ps(), \ (__mmask16)(U), (int)(R))) static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_scalef_ps (__m512 __A, __m512 __B) { return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A, (__v16sf) __B, (__v16sf) _mm512_undefined_ps (), (__mmask16) -1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A, (__v16sf) __B, (__v16sf) __W, (__mmask16) __U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B) { return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A, (__v16sf) __B, (__v16sf) _mm512_setzero_ps (), (__mmask16) __U, _MM_FROUND_CUR_DIRECTION); } #define _mm_scalef_round_sd(A, B, R) \ ((__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)-1, (int)(R))) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_scalef_sd (__m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_scalefsd_round_mask ((__v2df) __A, (__v2df)( __B), (__v2df) _mm_setzero_pd(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_scalef_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A, (__v2df) __B, (__v2df) __W, (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } #define _mm_mask_scalef_round_sd(W, U, A, B, R) \ ((__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)(__m128d)(W), \ (__mmask8)(U), (int)(R))) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_sd (__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A, (__v2df) __B, (__v2df) _mm_setzero_pd (), (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } #define _mm_maskz_scalef_round_sd(U, A, B, R) \ ((__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)(U), (int)(R))) #define _mm_scalef_round_ss(A, B, R) \ ((__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)-1, (int)(R))) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_scalef_ss (__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_scalefss_round_mask ((__v4sf) __A, (__v4sf)( __B), (__v4sf) _mm_setzero_ps(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_scalef_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A, (__v4sf) __B, (__v4sf) __W, (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } #define _mm_mask_scalef_round_ss(W, U, A, B, R) \ ((__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)(__m128)(W), \ (__mmask8)(U), (int)(R))) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_ss (__mmask8 __U, __m128 __A, __m128 __B) { return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A, (__v4sf) __B, (__v4sf) _mm_setzero_ps (), (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } #define _mm_maskz_scalef_round_ss(U, A, B, R) \ ((__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U), \ (int)(R))) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srai_epi32(__m512i __A, unsigned int __B) { return (__m512i)__builtin_ia32_psradi512((__v16si)__A, (int)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srai_epi32(__m512i __W, __mmask16 __U, __m512i __A, unsigned int __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_srai_epi32(__A, __B), (__v16si)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srai_epi32(__mmask16 __U, __m512i __A, unsigned int __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_srai_epi32(__A, __B), (__v16si)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srai_epi64(__m512i __A, unsigned int __B) { return (__m512i)__builtin_ia32_psraqi512((__v8di)__A, (int)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srai_epi64(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_srai_epi64(__A, __B), (__v8di)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, unsigned int __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_srai_epi64(__A, __B), (__v8di)_mm512_setzero_si512()); } #define _mm512_shuffle_f32x4(A, B, imm) \ ((__m512)__builtin_ia32_shuf_f32x4((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), (int)(imm))) #define _mm512_mask_shuffle_f32x4(W, U, A, B, imm) \ ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \ (__v16sf)(__m512)(W))) #define _mm512_maskz_shuffle_f32x4(U, A, B, imm) \ ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \ (__v16sf)_mm512_setzero_ps())) #define _mm512_shuffle_f64x2(A, B, imm) \ ((__m512d)__builtin_ia32_shuf_f64x2((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), (int)(imm))) #define _mm512_mask_shuffle_f64x2(W, U, A, B, imm) \ ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \ (__v8df)(__m512d)(W))) #define _mm512_maskz_shuffle_f64x2(U, A, B, imm) \ ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \ (__v8df)_mm512_setzero_pd())) #define _mm512_shuffle_i32x4(A, B, imm) \ ((__m512i)__builtin_ia32_shuf_i32x4((__v16si)(__m512i)(A), \ (__v16si)(__m512i)(B), (int)(imm))) #define _mm512_mask_shuffle_i32x4(W, U, A, B, imm) \ ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \ (__v16si)(__m512i)(W))) #define _mm512_maskz_shuffle_i32x4(U, A, B, imm) \ ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \ (__v16si)_mm512_setzero_si512())) #define _mm512_shuffle_i64x2(A, B, imm) \ ((__m512i)__builtin_ia32_shuf_i64x2((__v8di)(__m512i)(A), \ (__v8di)(__m512i)(B), (int)(imm))) #define _mm512_mask_shuffle_i64x2(W, U, A, B, imm) \ ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \ (__v8di)(__m512i)(W))) #define _mm512_maskz_shuffle_i64x2(U, A, B, imm) \ ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \ (__v8di)_mm512_setzero_si512())) #define _mm512_shuffle_pd(A, B, M) \ ((__m512d)__builtin_ia32_shufpd512((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), (int)(M))) #define _mm512_mask_shuffle_pd(W, U, A, B, M) \ ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_shuffle_pd((A), (B), (M)), \ (__v8df)(__m512d)(W))) #define _mm512_maskz_shuffle_pd(U, A, B, M) \ ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_shuffle_pd((A), (B), (M)), \ (__v8df)_mm512_setzero_pd())) #define _mm512_shuffle_ps(A, B, M) \ ((__m512)__builtin_ia32_shufps512((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), (int)(M))) #define _mm512_mask_shuffle_ps(W, U, A, B, M) \ ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \ (__v16sf)(__m512)(W))) #define _mm512_maskz_shuffle_ps(U, A, B, M) \ ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \ (__v16sf)_mm512_setzero_ps())) #define _mm_sqrt_round_sd(A, B, R) \ ((__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)-1, (int)(R))) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A, (__v2df) __B, (__v2df) __W, (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } #define _mm_mask_sqrt_round_sd(W, U, A, B, R) \ ((__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)(__m128d)(W), \ (__mmask8)(U), (int)(R))) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_sd (__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A, (__v2df) __B, (__v2df) _mm_setzero_pd (), (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } #define _mm_maskz_sqrt_round_sd(U, A, B, R) \ ((__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)(U), (int)(R))) #define _mm_sqrt_round_ss(A, B, R) \ ((__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)-1, (int)(R))) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A, (__v4sf) __B, (__v4sf) __W, (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } #define _mm_mask_sqrt_round_ss(W, U, A, B, R) \ ((__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)(__m128)(W), (__mmask8)(U), \ (int)(R))) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_ss (__mmask8 __U, __m128 __A, __m128 __B) { return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A, (__v4sf) __B, (__v4sf) _mm_setzero_ps (), (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } #define _mm_maskz_sqrt_round_ss(U, A, B, R) \ ((__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U), (int)(R))) static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_broadcast_f32x4(__m128 __A) { return (__m512)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__M, (__v16sf)_mm512_broadcast_f32x4(__A), (__v16sf)__O); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_f32x4(__mmask16 __M, __m128 __A) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__M, (__v16sf)_mm512_broadcast_f32x4(__A), (__v16sf)_mm512_setzero_ps()); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_broadcast_f64x4(__m256d __A) { return (__m512d)__builtin_shufflevector((__v4df)__A, (__v4df)__A, 0, 1, 2, 3, 0, 1, 2, 3); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_f64x4(__m512d __O, __mmask8 __M, __m256d __A) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M, (__v8df)_mm512_broadcast_f64x4(__A), (__v8df)__O); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_f64x4(__mmask8 __M, __m256d __A) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M, (__v8df)_mm512_broadcast_f64x4(__A), (__v8df)_mm512_setzero_pd()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_broadcast_i32x4(__m128i __A) { return (__m512i)__builtin_shufflevector((__v4si)__A, (__v4si)__A, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, (__v16si)_mm512_broadcast_i32x4(__A), (__v16si)__O); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i __A) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, (__v16si)_mm512_broadcast_i32x4(__A), (__v16si)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_broadcast_i64x4(__m256i __A) { return (__m512i)__builtin_shufflevector((__v4di)__A, (__v4di)__A, 0, 1, 2, 3, 0, 1, 2, 3); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_i64x4(__m512i __O, __mmask8 __M, __m256i __A) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, (__v8di)_mm512_broadcast_i64x4(__A), (__v8di)__O); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_i64x4(__mmask8 __M, __m256i __A) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, (__v8di)_mm512_broadcast_i64x4(__A), (__v8di)_mm512_setzero_si512()); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A) { return (__m512d)__builtin_ia32_selectpd_512(__M, (__v8df) _mm512_broadcastsd_pd(__A), (__v8df) __O); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A) { return (__m512d)__builtin_ia32_selectpd_512(__M, (__v8df) _mm512_broadcastsd_pd(__A), (__v8df) _mm512_setzero_pd()); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A) { return (__m512)__builtin_ia32_selectps_512(__M, (__v16sf) _mm512_broadcastss_ps(__A), (__v16sf) __O); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A) { return (__m512)__builtin_ia32_selectps_512(__M, (__v16sf) _mm512_broadcastss_ps(__A), (__v16sf) _mm512_setzero_ps()); } static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi32_epi8 (__m512i __A) { return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A, (__v16qi) _mm_undefined_si128 (), (__mmask16) -1); } static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A) { return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A, (__v16qi) __O, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A) { return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A, (__v16qi) _mm_setzero_si128 (), __M); } static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A) { __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M); } static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi32_epi16 (__m512i __A) { return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A, (__v16hi) _mm256_undefined_si256 (), (__mmask16) -1); } static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A) { return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A, (__v16hi) __O, __M); } static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A) { return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A, (__v16hi) _mm256_setzero_si256 (), __M); } static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A) { __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi64_epi8 (__m512i __A) { return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A, (__v16qi) _mm_undefined_si128 (), (__mmask8) -1); } static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A) { return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A, (__v16qi) __O, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A) { return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A, (__v16qi) _mm_setzero_si128 (), __M); } static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A) { __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M); } static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi64_epi32 (__m512i __A) { return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A, (__v8si) _mm256_undefined_si256 (), (__mmask8) -1); } static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A) { return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A, (__v8si) __O, __M); } static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A) { return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A, (__v8si) _mm256_setzero_si256 (), __M); } static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A) { __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi64_epi16 (__m512i __A) { return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A, (__v8hi) _mm_undefined_si128 (), (__mmask8) -1); } static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A) { return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A, (__v8hi) __O, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A) { return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A, (__v8hi) _mm_setzero_si128 (), __M); } static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A) { __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi32_epi8 (__m512i __A) { return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A, (__v16qi) _mm_undefined_si128 (), (__mmask16) -1); } static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A) { return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A, (__v16qi) __O, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A) { return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A, (__v16qi) _mm_setzero_si128 (), __M); } static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A) { __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M); } static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi32_epi16 (__m512i __A) { return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A, (__v16hi) _mm256_undefined_si256 (), (__mmask16) -1); } static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A) { return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A, (__v16hi) __O, __M); } static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A) { return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A, (__v16hi) _mm256_setzero_si256 (), __M); } static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A) { __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi64_epi8 (__m512i __A) { return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A, (__v16qi) _mm_undefined_si128 (), (__mmask8) -1); } static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A) { return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A, (__v16qi) __O, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A) { return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A, (__v16qi) _mm_setzero_si128 (), __M); } static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A) { __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M); } static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi64_epi32 (__m512i __A) { return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A, (__v8si) _mm256_undefined_si256 (), (__mmask8) -1); } static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A) { return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A, (__v8si) __O, __M); } static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A) { return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A, (__v8si) _mm256_setzero_si256 (), __M); } static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A) { __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi64_epi16 (__m512i __A) { return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A, (__v8hi) _mm_undefined_si128 (), (__mmask8) -1); } static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A) { return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A, (__v8hi) __O, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A) { return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A, (__v8hi) _mm_setzero_si128 (), __M); } static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A) { __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_epi8 (__m512i __A) { return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A, (__v16qi) _mm_undefined_si128 (), (__mmask16) -1); } static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A) { return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A, (__v16qi) __O, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A) { return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A, (__v16qi) _mm_setzero_si128 (), __M); } static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A) { __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M); } static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_epi16 (__m512i __A) { return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A, (__v16hi) _mm256_undefined_si256 (), (__mmask16) -1); } static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A) { return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A, (__v16hi) __O, __M); } static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A) { return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A, (__v16hi) _mm256_setzero_si256 (), __M); } static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A) { __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtepi64_epi8 (__m512i __A) { return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A, (__v16qi) _mm_undefined_si128 (), (__mmask8) -1); } static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A) { return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A, (__v16qi) __O, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A) { return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A, (__v16qi) _mm_setzero_si128 (), __M); } static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A) { __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M); } static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtepi64_epi32 (__m512i __A) { return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A, (__v8si) _mm256_undefined_si256 (), (__mmask8) -1); } static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A) { return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A, (__v8si) __O, __M); } static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A) { return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A, (__v8si) _mm256_setzero_si256 (), __M); } static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A) { __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtepi64_epi16 (__m512i __A) { return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A, (__v8hi) _mm_undefined_si128 (), (__mmask8) -1); } static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A) { return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A, (__v8hi) __O, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A) { return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A, (__v8hi) _mm_setzero_si128 (), __M); } static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A) { __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M); } #define _mm512_extracti32x4_epi32(A, imm) \ ((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \ (__v4si)_mm_undefined_si128(), \ (__mmask8)-1)) #define _mm512_mask_extracti32x4_epi32(W, U, A, imm) \ ((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \ (__v4si)(__m128i)(W), \ (__mmask8)(U))) #define _mm512_maskz_extracti32x4_epi32(U, A, imm) \ ((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \ (__v4si)_mm_setzero_si128(), \ (__mmask8)(U))) #define _mm512_extracti64x4_epi64(A, imm) \ ((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \ (__v4di)_mm256_undefined_si256(), \ (__mmask8)-1)) #define _mm512_mask_extracti64x4_epi64(W, U, A, imm) \ ((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \ (__v4di)(__m256i)(W), \ (__mmask8)(U))) #define _mm512_maskz_extracti64x4_epi64(U, A, imm) \ ((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \ (__v4di)_mm256_setzero_si256(), \ (__mmask8)(U))) #define _mm512_insertf64x4(A, B, imm) \ ((__m512d)__builtin_ia32_insertf64x4((__v8df)(__m512d)(A), \ (__v4df)(__m256d)(B), (int)(imm))) #define _mm512_mask_insertf64x4(W, U, A, B, imm) \ ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_insertf64x4((A), (B), (imm)), \ (__v8df)(__m512d)(W))) #define _mm512_maskz_insertf64x4(U, A, B, imm) \ ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_insertf64x4((A), (B), (imm)), \ (__v8df)_mm512_setzero_pd())) #define _mm512_inserti64x4(A, B, imm) \ ((__m512i)__builtin_ia32_inserti64x4((__v8di)(__m512i)(A), \ (__v4di)(__m256i)(B), (int)(imm))) #define _mm512_mask_inserti64x4(W, U, A, B, imm) \ ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ (__v8di)_mm512_inserti64x4((A), (B), (imm)), \ (__v8di)(__m512i)(W))) #define _mm512_maskz_inserti64x4(U, A, B, imm) \ ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ (__v8di)_mm512_inserti64x4((A), (B), (imm)), \ (__v8di)_mm512_setzero_si512())) #define _mm512_insertf32x4(A, B, imm) \ ((__m512)__builtin_ia32_insertf32x4((__v16sf)(__m512)(A), \ (__v4sf)(__m128)(B), (int)(imm))) #define _mm512_mask_insertf32x4(W, U, A, B, imm) \ ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \ (__v16sf)(__m512)(W))) #define _mm512_maskz_insertf32x4(U, A, B, imm) \ ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \ (__v16sf)_mm512_setzero_ps())) #define _mm512_inserti32x4(A, B, imm) \ ((__m512i)__builtin_ia32_inserti32x4((__v16si)(__m512i)(A), \ (__v4si)(__m128i)(B), (int)(imm))) #define _mm512_mask_inserti32x4(W, U, A, B, imm) \ ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ (__v16si)_mm512_inserti32x4((A), (B), (imm)), \ (__v16si)(__m512i)(W))) #define _mm512_maskz_inserti32x4(U, A, B, imm) \ ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ (__v16si)_mm512_inserti32x4((A), (B), (imm)), \ (__v16si)_mm512_setzero_si512())) #define _mm512_getmant_round_pd(A, B, C, R) \ ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ (int)(((C)<<2) | (B)), \ (__v8df)_mm512_undefined_pd(), \ (__mmask8)-1, (int)(R))) #define _mm512_mask_getmant_round_pd(W, U, A, B, C, R) \ ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ (int)(((C)<<2) | (B)), \ (__v8df)(__m512d)(W), \ (__mmask8)(U), (int)(R))) #define _mm512_maskz_getmant_round_pd(U, A, B, C, R) \ ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ (int)(((C)<<2) | (B)), \ (__v8df)_mm512_setzero_pd(), \ (__mmask8)(U), (int)(R))) #define _mm512_getmant_pd(A, B, C) \ ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ (int)(((C)<<2) | (B)), \ (__v8df)_mm512_setzero_pd(), \ (__mmask8)-1, \ _MM_FROUND_CUR_DIRECTION)) #define _mm512_mask_getmant_pd(W, U, A, B, C) \ ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ (int)(((C)<<2) | (B)), \ (__v8df)(__m512d)(W), \ (__mmask8)(U), \ _MM_FROUND_CUR_DIRECTION)) #define _mm512_maskz_getmant_pd(U, A, B, C) \ ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ (int)(((C)<<2) | (B)), \ (__v8df)_mm512_setzero_pd(), \ (__mmask8)(U), \ _MM_FROUND_CUR_DIRECTION)) #define _mm512_getmant_round_ps(A, B, C, R) \ ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ (int)(((C)<<2) | (B)), \ (__v16sf)_mm512_undefined_ps(), \ (__mmask16)-1, (int)(R))) #define _mm512_mask_getmant_round_ps(W, U, A, B, C, R) \ ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ (int)(((C)<<2) | (B)), \ (__v16sf)(__m512)(W), \ (__mmask16)(U), (int)(R))) #define _mm512_maskz_getmant_round_ps(U, A, B, C, R) \ ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ (int)(((C)<<2) | (B)), \ (__v16sf)_mm512_setzero_ps(), \ (__mmask16)(U), (int)(R))) #define _mm512_getmant_ps(A, B, C) \ ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ (int)(((C)<<2)|(B)), \ (__v16sf)_mm512_undefined_ps(), \ (__mmask16)-1, \ _MM_FROUND_CUR_DIRECTION)) #define _mm512_mask_getmant_ps(W, U, A, B, C) \ ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ (int)(((C)<<2)|(B)), \ (__v16sf)(__m512)(W), \ (__mmask16)(U), \ _MM_FROUND_CUR_DIRECTION)) #define _mm512_maskz_getmant_ps(U, A, B, C) \ ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ (int)(((C)<<2)|(B)), \ (__v16sf)_mm512_setzero_ps(), \ (__mmask16)(U), \ _MM_FROUND_CUR_DIRECTION)) #define _mm512_getexp_round_pd(A, R) \ ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ (__v8df)_mm512_undefined_pd(), \ (__mmask8)-1, (int)(R))) #define _mm512_mask_getexp_round_pd(W, U, A, R) \ ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(W), \ (__mmask8)(U), (int)(R))) #define _mm512_maskz_getexp_round_pd(U, A, R) \ ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ (__v8df)_mm512_setzero_pd(), \ (__mmask8)(U), (int)(R))) static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_getexp_pd (__m512d __A) { return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A, (__v8df) _mm512_undefined_pd (), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A) { return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A, (__v8df) __W, (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A) { return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A, (__v8df) _mm512_setzero_pd (), (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } #define _mm512_getexp_round_ps(A, R) \ ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ (__v16sf)_mm512_undefined_ps(), \ (__mmask16)-1, (int)(R))) #define _mm512_mask_getexp_round_ps(W, U, A, R) \ ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(W), \ (__mmask16)(U), (int)(R))) #define _mm512_maskz_getexp_round_ps(U, A, R) \ ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ (__v16sf)_mm512_setzero_ps(), \ (__mmask16)(U), (int)(R))) static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_getexp_ps (__m512 __A) { return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A, (__v16sf) _mm512_undefined_ps (), (__mmask16) -1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A) { return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A, (__v16sf) __W, (__mmask16) __U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A) { return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A, (__v16sf) _mm512_setzero_ps (), (__mmask16) __U, _MM_FROUND_CUR_DIRECTION); } #define _mm512_i64gather_ps(index, addr, scale) \ ((__m256)__builtin_ia32_gatherdiv16sf((__v8sf)_mm256_undefined_ps(), \ (void const *)(addr), \ (__v8di)(__m512i)(index), (__mmask8)-1, \ (int)(scale))) #define _mm512_mask_i64gather_ps(v1_old, mask, index, addr, scale) \ ((__m256)__builtin_ia32_gatherdiv16sf((__v8sf)(__m256)(v1_old),\ (void const *)(addr), \ (__v8di)(__m512i)(index), \ (__mmask8)(mask), (int)(scale))) #define _mm512_i64gather_epi32(index, addr, scale) \ ((__m256i)__builtin_ia32_gatherdiv16si((__v8si)_mm256_undefined_si256(), \ (void const *)(addr), \ (__v8di)(__m512i)(index), \ (__mmask8)-1, (int)(scale))) #define _mm512_mask_i64gather_epi32(v1_old, mask, index, addr, scale) \ ((__m256i)__builtin_ia32_gatherdiv16si((__v8si)(__m256i)(v1_old), \ (void const *)(addr), \ (__v8di)(__m512i)(index), \ (__mmask8)(mask), (int)(scale))) #define _mm512_i64gather_pd(index, addr, scale) \ ((__m512d)__builtin_ia32_gatherdiv8df((__v8df)_mm512_undefined_pd(), \ (void const *)(addr), \ (__v8di)(__m512i)(index), (__mmask8)-1, \ (int)(scale))) #define _mm512_mask_i64gather_pd(v1_old, mask, index, addr, scale) \ ((__m512d)__builtin_ia32_gatherdiv8df((__v8df)(__m512d)(v1_old), \ (void const *)(addr), \ (__v8di)(__m512i)(index), \ (__mmask8)(mask), (int)(scale))) #define _mm512_i64gather_epi64(index, addr, scale) \ ((__m512i)__builtin_ia32_gatherdiv8di((__v8di)_mm512_undefined_epi32(), \ (void const *)(addr), \ (__v8di)(__m512i)(index), (__mmask8)-1, \ (int)(scale))) #define _mm512_mask_i64gather_epi64(v1_old, mask, index, addr, scale) \ ((__m512i)__builtin_ia32_gatherdiv8di((__v8di)(__m512i)(v1_old), \ (void const *)(addr), \ (__v8di)(__m512i)(index), \ (__mmask8)(mask), (int)(scale))) #define _mm512_i32gather_ps(index, addr, scale) \ ((__m512)__builtin_ia32_gathersiv16sf((__v16sf)_mm512_undefined_ps(), \ (void const *)(addr), \ (__v16si)(__m512)(index), \ (__mmask16)-1, (int)(scale))) #define _mm512_mask_i32gather_ps(v1_old, mask, index, addr, scale) \ ((__m512)__builtin_ia32_gathersiv16sf((__v16sf)(__m512)(v1_old), \ (void const *)(addr), \ (__v16si)(__m512)(index), \ (__mmask16)(mask), (int)(scale))) #define _mm512_i32gather_epi32(index, addr, scale) \ ((__m512i)__builtin_ia32_gathersiv16si((__v16si)_mm512_undefined_epi32(), \ (void const *)(addr), \ (__v16si)(__m512i)(index), \ (__mmask16)-1, (int)(scale))) #define _mm512_mask_i32gather_epi32(v1_old, mask, index, addr, scale) \ ((__m512i)__builtin_ia32_gathersiv16si((__v16si)(__m512i)(v1_old), \ (void const *)(addr), \ (__v16si)(__m512i)(index), \ (__mmask16)(mask), (int)(scale))) #define _mm512_i32gather_pd(index, addr, scale) \ ((__m512d)__builtin_ia32_gathersiv8df((__v8df)_mm512_undefined_pd(), \ (void const *)(addr), \ (__v8si)(__m256i)(index), (__mmask8)-1, \ (int)(scale))) #define _mm512_mask_i32gather_pd(v1_old, mask, index, addr, scale) \ ((__m512d)__builtin_ia32_gathersiv8df((__v8df)(__m512d)(v1_old), \ (void const *)(addr), \ (__v8si)(__m256i)(index), \ (__mmask8)(mask), (int)(scale))) #define _mm512_i32gather_epi64(index, addr, scale) \ ((__m512i)__builtin_ia32_gathersiv8di((__v8di)_mm512_undefined_epi32(), \ (void const *)(addr), \ (__v8si)(__m256i)(index), (__mmask8)-1, \ (int)(scale))) #define _mm512_mask_i32gather_epi64(v1_old, mask, index, addr, scale) \ ((__m512i)__builtin_ia32_gathersiv8di((__v8di)(__m512i)(v1_old), \ (void const *)(addr), \ (__v8si)(__m256i)(index), \ (__mmask8)(mask), (int)(scale))) #define _mm512_i64scatter_ps(addr, index, v1, scale) \ __builtin_ia32_scatterdiv16sf((void *)(addr), (__mmask8)-1, \ (__v8di)(__m512i)(index), \ (__v8sf)(__m256)(v1), (int)(scale)) #define _mm512_mask_i64scatter_ps(addr, mask, index, v1, scale) \ __builtin_ia32_scatterdiv16sf((void *)(addr), (__mmask8)(mask), \ (__v8di)(__m512i)(index), \ (__v8sf)(__m256)(v1), (int)(scale)) #define _mm512_i64scatter_epi32(addr, index, v1, scale) \ __builtin_ia32_scatterdiv16si((void *)(addr), (__mmask8)-1, \ (__v8di)(__m512i)(index), \ (__v8si)(__m256i)(v1), (int)(scale)) #define _mm512_mask_i64scatter_epi32(addr, mask, index, v1, scale) \ __builtin_ia32_scatterdiv16si((void *)(addr), (__mmask8)(mask), \ (__v8di)(__m512i)(index), \ (__v8si)(__m256i)(v1), (int)(scale)) #define _mm512_i64scatter_pd(addr, index, v1, scale) \ __builtin_ia32_scatterdiv8df((void *)(addr), (__mmask8)-1, \ (__v8di)(__m512i)(index), \ (__v8df)(__m512d)(v1), (int)(scale)) #define _mm512_mask_i64scatter_pd(addr, mask, index, v1, scale) \ __builtin_ia32_scatterdiv8df((void *)(addr), (__mmask8)(mask), \ (__v8di)(__m512i)(index), \ (__v8df)(__m512d)(v1), (int)(scale)) #define _mm512_i64scatter_epi64(addr, index, v1, scale) \ __builtin_ia32_scatterdiv8di((void *)(addr), (__mmask8)-1, \ (__v8di)(__m512i)(index), \ (__v8di)(__m512i)(v1), (int)(scale)) #define _mm512_mask_i64scatter_epi64(addr, mask, index, v1, scale) \ __builtin_ia32_scatterdiv8di((void *)(addr), (__mmask8)(mask), \ (__v8di)(__m512i)(index), \ (__v8di)(__m512i)(v1), (int)(scale)) #define _mm512_i32scatter_ps(addr, index, v1, scale) \ __builtin_ia32_scattersiv16sf((void *)(addr), (__mmask16)-1, \ (__v16si)(__m512i)(index), \ (__v16sf)(__m512)(v1), (int)(scale)) #define _mm512_mask_i32scatter_ps(addr, mask, index, v1, scale) \ __builtin_ia32_scattersiv16sf((void *)(addr), (__mmask16)(mask), \ (__v16si)(__m512i)(index), \ (__v16sf)(__m512)(v1), (int)(scale)) #define _mm512_i32scatter_epi32(addr, index, v1, scale) \ __builtin_ia32_scattersiv16si((void *)(addr), (__mmask16)-1, \ (__v16si)(__m512i)(index), \ (__v16si)(__m512i)(v1), (int)(scale)) #define _mm512_mask_i32scatter_epi32(addr, mask, index, v1, scale) \ __builtin_ia32_scattersiv16si((void *)(addr), (__mmask16)(mask), \ (__v16si)(__m512i)(index), \ (__v16si)(__m512i)(v1), (int)(scale)) #define _mm512_i32scatter_pd(addr, index, v1, scale) \ __builtin_ia32_scattersiv8df((void *)(addr), (__mmask8)-1, \ (__v8si)(__m256i)(index), \ (__v8df)(__m512d)(v1), (int)(scale)) #define _mm512_mask_i32scatter_pd(addr, mask, index, v1, scale) \ __builtin_ia32_scattersiv8df((void *)(addr), (__mmask8)(mask), \ (__v8si)(__m256i)(index), \ (__v8df)(__m512d)(v1), (int)(scale)) #define _mm512_i32scatter_epi64(addr, index, v1, scale) \ __builtin_ia32_scattersiv8di((void *)(addr), (__mmask8)-1, \ (__v8si)(__m256i)(index), \ (__v8di)(__m512i)(v1), (int)(scale)) #define _mm512_mask_i32scatter_epi64(addr, mask, index, v1, scale) \ __builtin_ia32_scattersiv8di((void *)(addr), (__mmask8)(mask), \ (__v8si)(__m256i)(index), \ (__v8di)(__m512i)(v1), (int)(scale)) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return __builtin_ia32_vfmaddss3_mask((__v4sf)__W, (__v4sf)__A, (__v4sf)__B, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm_fmadd_round_ss(A, B, C, R) \ ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)(__m128)(C), (__mmask8)-1, \ (int)(R))) #define _mm_mask_fmadd_round_ss(W, U, A, B, R) \ ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \ (__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), (__mmask8)(U), \ (int)(R))) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) { return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A, (__v4sf)__B, (__v4sf)__C, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm_maskz_fmadd_round_ss(U, A, B, C, R) \ ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)(__m128)(C), (__mmask8)(U), \ (int)(R))) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U) { return __builtin_ia32_vfmaddss3_mask3((__v4sf)__W, (__v4sf)__X, (__v4sf)__Y, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm_mask3_fmadd_round_ss(W, X, Y, U, R) \ ((__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \ (__v4sf)(__m128)(X), \ (__v4sf)(__m128)(Y), (__mmask8)(U), \ (int)(R))) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return __builtin_ia32_vfmaddss3_mask((__v4sf)__W, (__v4sf)__A, -(__v4sf)__B, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm_fmsub_round_ss(A, B, C, R) \ ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ -(__v4sf)(__m128)(C), (__mmask8)-1, \ (int)(R))) #define _mm_mask_fmsub_round_ss(W, U, A, B, R) \ ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \ (__v4sf)(__m128)(A), \ -(__v4sf)(__m128)(B), (__mmask8)(U), \ (int)(R))) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) { return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm_maskz_fmsub_round_ss(U, A, B, C, R) \ ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ -(__v4sf)(__m128)(C), (__mmask8)(U), \ (int)(R))) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U) { return __builtin_ia32_vfmsubss3_mask3((__v4sf)__W, (__v4sf)__X, (__v4sf)__Y, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm_mask3_fmsub_round_ss(W, X, Y, U, R) \ ((__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \ (__v4sf)(__m128)(X), \ (__v4sf)(__m128)(Y), (__mmask8)(U), \ (int)(R))) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return __builtin_ia32_vfmaddss3_mask((__v4sf)__W, -(__v4sf)__A, (__v4sf)__B, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm_fnmadd_round_ss(A, B, C, R) \ ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \ -(__v4sf)(__m128)(B), \ (__v4sf)(__m128)(C), (__mmask8)-1, \ (int)(R))) #define _mm_mask_fnmadd_round_ss(W, U, A, B, R) \ ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \ -(__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), (__mmask8)(U), \ (int)(R))) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) { return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A, -(__v4sf)__B, (__v4sf)__C, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) \ ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \ -(__v4sf)(__m128)(B), \ (__v4sf)(__m128)(C), (__mmask8)(U), \ (int)(R))) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U) { return __builtin_ia32_vfmaddss3_mask3((__v4sf)__W, -(__v4sf)__X, (__v4sf)__Y, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm_mask3_fnmadd_round_ss(W, X, Y, U, R) \ ((__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \ -(__v4sf)(__m128)(X), \ (__v4sf)(__m128)(Y), (__mmask8)(U), \ (int)(R))) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return __builtin_ia32_vfmaddss3_mask((__v4sf)__W, -(__v4sf)__A, -(__v4sf)__B, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm_fnmsub_round_ss(A, B, C, R) \ ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \ -(__v4sf)(__m128)(B), \ -(__v4sf)(__m128)(C), (__mmask8)-1, \ (int)(R))) #define _mm_mask_fnmsub_round_ss(W, U, A, B, R) \ ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \ -(__v4sf)(__m128)(A), \ -(__v4sf)(__m128)(B), (__mmask8)(U), \ (int)(R))) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) { return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A, -(__v4sf)__B, -(__v4sf)__C, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) \ ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \ -(__v4sf)(__m128)(B), \ -(__v4sf)(__m128)(C), (__mmask8)(U), \ (int)(R))) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U) { return __builtin_ia32_vfmsubss3_mask3((__v4sf)__W, -(__v4sf)__X, (__v4sf)__Y, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm_mask3_fnmsub_round_ss(W, X, Y, U, R) \ ((__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \ -(__v4sf)(__m128)(X), \ (__v4sf)(__m128)(Y), (__mmask8)(U), \ (int)(R))) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return __builtin_ia32_vfmaddsd3_mask((__v2df)__W, (__v2df)__A, (__v2df)__B, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm_fmadd_round_sd(A, B, C, R) \ ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)(__m128d)(C), (__mmask8)-1, \ (int)(R))) #define _mm_mask_fmadd_round_sd(W, U, A, B, R) \ ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \ (__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), (__mmask8)(U), \ (int)(R))) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) { return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A, (__v2df)__B, (__v2df)__C, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm_maskz_fmadd_round_sd(U, A, B, C, R) \ ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)(__m128d)(C), (__mmask8)(U), \ (int)(R))) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) { return __builtin_ia32_vfmaddsd3_mask3((__v2df)__W, (__v2df)__X, (__v2df)__Y, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm_mask3_fmadd_round_sd(W, X, Y, U, R) \ ((__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \ (__v2df)(__m128d)(X), \ (__v2df)(__m128d)(Y), (__mmask8)(U), \ (int)(R))) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return __builtin_ia32_vfmaddsd3_mask((__v2df)__W, (__v2df)__A, -(__v2df)__B, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm_fmsub_round_sd(A, B, C, R) \ ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ -(__v2df)(__m128d)(C), (__mmask8)-1, \ (int)(R))) #define _mm_mask_fmsub_round_sd(W, U, A, B, R) \ ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \ (__v2df)(__m128d)(A), \ -(__v2df)(__m128d)(B), (__mmask8)(U), \ (int)(R))) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) { return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A, (__v2df)__B, -(__v2df)__C, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm_maskz_fmsub_round_sd(U, A, B, C, R) \ ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ -(__v2df)(__m128d)(C), \ (__mmask8)(U), (int)(R))) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) { return __builtin_ia32_vfmsubsd3_mask3((__v2df)__W, (__v2df)__X, (__v2df)__Y, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm_mask3_fmsub_round_sd(W, X, Y, U, R) \ ((__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \ (__v2df)(__m128d)(X), \ (__v2df)(__m128d)(Y), \ (__mmask8)(U), (int)(R))) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return __builtin_ia32_vfmaddsd3_mask((__v2df)__W, -(__v2df)__A, (__v2df)__B, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm_fnmadd_round_sd(A, B, C, R) \ ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \ -(__v2df)(__m128d)(B), \ (__v2df)(__m128d)(C), (__mmask8)-1, \ (int)(R))) #define _mm_mask_fnmadd_round_sd(W, U, A, B, R) \ ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \ -(__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), (__mmask8)(U), \ (int)(R))) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) { return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A, -(__v2df)__B, (__v2df)__C, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) \ ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \ -(__v2df)(__m128d)(B), \ (__v2df)(__m128d)(C), (__mmask8)(U), \ (int)(R))) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) { return __builtin_ia32_vfmaddsd3_mask3((__v2df)__W, -(__v2df)__X, (__v2df)__Y, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm_mask3_fnmadd_round_sd(W, X, Y, U, R) \ ((__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \ -(__v2df)(__m128d)(X), \ (__v2df)(__m128d)(Y), (__mmask8)(U), \ (int)(R))) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return __builtin_ia32_vfmaddsd3_mask((__v2df)__W, -(__v2df)__A, -(__v2df)__B, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm_fnmsub_round_sd(A, B, C, R) \ ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \ -(__v2df)(__m128d)(B), \ -(__v2df)(__m128d)(C), (__mmask8)-1, \ (int)(R))) #define _mm_mask_fnmsub_round_sd(W, U, A, B, R) \ ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \ -(__v2df)(__m128d)(A), \ -(__v2df)(__m128d)(B), (__mmask8)(U), \ (int)(R))) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) { return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A, -(__v2df)__B, -(__v2df)__C, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) \ ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \ -(__v2df)(__m128d)(B), \ -(__v2df)(__m128d)(C), \ (__mmask8)(U), \ (int)(R))) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) { return __builtin_ia32_vfmsubsd3_mask3((__v2df)__W, -(__v2df)__X, (__v2df)__Y, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm_mask3_fnmsub_round_sd(W, X, Y, U, R) \ ((__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \ -(__v2df)(__m128d)(X), \ (__v2df)(__m128d)(Y), \ (__mmask8)(U), (int)(R))) #define _mm512_permutex_pd(X, C) \ ((__m512d)__builtin_ia32_permdf512((__v8df)(__m512d)(X), (int)(C))) #define _mm512_mask_permutex_pd(W, U, X, C) \ ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_permutex_pd((X), (C)), \ (__v8df)(__m512d)(W))) #define _mm512_maskz_permutex_pd(U, X, C) \ ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_permutex_pd((X), (C)), \ (__v8df)_mm512_setzero_pd())) #define _mm512_permutex_epi64(X, C) \ ((__m512i)__builtin_ia32_permdi512((__v8di)(__m512i)(X), (int)(C))) #define _mm512_mask_permutex_epi64(W, U, X, C) \ ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ (__v8di)_mm512_permutex_epi64((X), (C)), \ (__v8di)(__m512i)(W))) #define _mm512_maskz_permutex_epi64(U, X, C) \ ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ (__v8di)_mm512_permutex_epi64((X), (C)), \ (__v8di)_mm512_setzero_si512())) static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_permutexvar_pd (__m512i __X, __m512d __Y) { return (__m512d)__builtin_ia32_permvardf512((__v8df) __Y, (__v8di) __X); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_permutexvar_pd(__X, __Y), (__v8df)__W); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_permutexvar_pd(__X, __Y), (__v8df)_mm512_setzero_pd()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_permutexvar_epi64 (__m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_permvardi512((__v8di)__Y, (__v8di)__X); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, (__v8di)_mm512_permutexvar_epi64(__X, __Y), (__v8di)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, (__v8di)_mm512_permutexvar_epi64(__X, __Y), (__v8di)__W); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_permutexvar_ps (__m512i __X, __m512 __Y) { return (__m512)__builtin_ia32_permvarsf512((__v16sf)__Y, (__v16si)__X); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_permutexvar_ps(__X, __Y), (__v16sf)__W); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_permutexvar_ps(__X, __Y), (__v16sf)_mm512_setzero_ps()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_permutexvar_epi32 (__m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_permvarsi512((__v16si)__Y, (__v16si)__X); } #define _mm512_permutevar_epi32 _mm512_permutexvar_epi32 static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, (__v16si)_mm512_permutexvar_epi32(__X, __Y), (__v16si)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, (__v16si)_mm512_permutexvar_epi32(__X, __Y), (__v16si)__W); } #define _mm512_mask_permutevar_epi32 _mm512_mask_permutexvar_epi32 static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kand (__mmask16 __A, __mmask16 __B) { return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B); } static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kandn (__mmask16 __A, __mmask16 __B) { return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B); } static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kor (__mmask16 __A, __mmask16 __B) { return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B); } static __inline__ int __DEFAULT_FN_ATTRS _mm512_kortestc (__mmask16 __A, __mmask16 __B) { return __builtin_ia32_kortestchi ((__mmask16) __A, (__mmask16) __B); } static __inline__ int __DEFAULT_FN_ATTRS _mm512_kortestz (__mmask16 __A, __mmask16 __B) { return __builtin_ia32_kortestzhi ((__mmask16) __A, (__mmask16) __B); } static __inline__ unsigned char __DEFAULT_FN_ATTRS _kortestc_mask16_u8(__mmask16 __A, __mmask16 __B) { return (unsigned char)__builtin_ia32_kortestchi(__A, __B); } static __inline__ unsigned char __DEFAULT_FN_ATTRS _kortestz_mask16_u8(__mmask16 __A, __mmask16 __B) { return (unsigned char)__builtin_ia32_kortestzhi(__A, __B); } static __inline__ unsigned char __DEFAULT_FN_ATTRS _kortest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C) { *__C = (unsigned char)__builtin_ia32_kortestchi(__A, __B); return (unsigned char)__builtin_ia32_kortestzhi(__A, __B); } static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kunpackb (__mmask16 __A, __mmask16 __B) { return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B); } static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kxnor (__mmask16 __A, __mmask16 __B) { return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B); } static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kxor (__mmask16 __A, __mmask16 __B) { return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B); } #define _kand_mask16 _mm512_kand #define _kandn_mask16 _mm512_kandn #define _knot_mask16 _mm512_knot #define _kor_mask16 _mm512_kor #define _kxnor_mask16 _mm512_kxnor #define _kxor_mask16 _mm512_kxor #define _kshiftli_mask16(A, I) \ ((__mmask16)__builtin_ia32_kshiftlihi((__mmask16)(A), (unsigned int)(I))) #define _kshiftri_mask16(A, I) \ ((__mmask16)__builtin_ia32_kshiftrihi((__mmask16)(A), (unsigned int)(I))) static __inline__ unsigned int __DEFAULT_FN_ATTRS _cvtmask16_u32(__mmask16 __A) { return (unsigned int)__builtin_ia32_kmovw((__mmask16)__A); } static __inline__ __mmask16 __DEFAULT_FN_ATTRS _cvtu32_mask16(unsigned int __A) { return (__mmask16)__builtin_ia32_kmovw((__mmask16)__A); } static __inline__ __mmask16 __DEFAULT_FN_ATTRS _load_mask16(__mmask16 *__A) { return (__mmask16)__builtin_ia32_kmovw(*(__mmask16 *)__A); } static __inline__ void __DEFAULT_FN_ATTRS _store_mask16(__mmask16 *__A, __mmask16 __B) { *(__mmask16 *)__A = __builtin_ia32_kmovw((__mmask16)__B); } static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_stream_si512 (void * __P, __m512i __A) { typedef __v8di __v8di_aligned __attribute__((aligned(64))); __builtin_nontemporal_store((__v8di_aligned)__A, (__v8di_aligned*)__P); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_stream_load_si512 (void const *__P) { typedef __v8di __v8di_aligned __attribute__((aligned(64))); return (__m512i) __builtin_nontemporal_load((const __v8di_aligned *)__P); } static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_stream_pd (void *__P, __m512d __A) { typedef __v8df __v8df_aligned __attribute__((aligned(64))); __builtin_nontemporal_store((__v8df_aligned)__A, (__v8df_aligned*)__P); } static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_stream_ps (void *__P, __m512 __A) { typedef __v16sf __v16sf_aligned __attribute__((aligned(64))); __builtin_nontemporal_store((__v16sf_aligned)__A, (__v16sf_aligned*)__P); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A) { return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A, (__v8df) __W, (__mmask8) __U); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_pd (__mmask8 __U, __m512d __A) { return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A, (__v8df) _mm512_setzero_pd (), (__mmask8) __U); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A) { return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A, (__v8di) __W, (__mmask8) __U); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A) { return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A, (__v8di) _mm512_setzero_si512 (), (__mmask8) __U); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A) { return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A, (__v16sf) __W, (__mmask16) __U); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_ps (__mmask16 __U, __m512 __A) { return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A, (__v16sf) _mm512_setzero_ps (), (__mmask16) __U); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A) { return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A, (__v16si) __W, (__mmask16) __U); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A) { return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A, (__v16si) _mm512_setzero_si512 (), (__mmask16) __U); } #define _mm_cmp_round_ss_mask(X, Y, P, R) \ ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \ (__v4sf)(__m128)(Y), (int)(P), \ (__mmask8)-1, (int)(R))) #define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \ ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \ (__v4sf)(__m128)(Y), (int)(P), \ (__mmask8)(M), (int)(R))) #define _mm_cmp_ss_mask(X, Y, P) \ ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \ (__v4sf)(__m128)(Y), (int)(P), \ (__mmask8)-1, \ _MM_FROUND_CUR_DIRECTION)) #define _mm_mask_cmp_ss_mask(M, X, Y, P) \ ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \ (__v4sf)(__m128)(Y), (int)(P), \ (__mmask8)(M), \ _MM_FROUND_CUR_DIRECTION)) #define _mm_cmp_round_sd_mask(X, Y, P, R) \ ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \ (__v2df)(__m128d)(Y), (int)(P), \ (__mmask8)-1, (int)(R))) #define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \ ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \ (__v2df)(__m128d)(Y), (int)(P), \ (__mmask8)(M), (int)(R))) #define _mm_cmp_sd_mask(X, Y, P) \ ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \ (__v2df)(__m128d)(Y), (int)(P), \ (__mmask8)-1, \ _MM_FROUND_CUR_DIRECTION)) #define _mm_mask_cmp_sd_mask(M, X, Y, P) \ ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \ (__v2df)(__m128d)(Y), (int)(P), \ (__mmask8)(M), \ _MM_FROUND_CUR_DIRECTION)) /* Bit Test */ static __inline __mmask16 __DEFAULT_FN_ATTRS512 _mm512_test_epi32_mask (__m512i __A, __m512i __B) { return _mm512_cmpneq_epi32_mask (_mm512_and_epi32(__A, __B), _mm512_setzero_si512()); } static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B) { return _mm512_mask_cmpneq_epi32_mask (__U, _mm512_and_epi32 (__A, __B), _mm512_setzero_si512()); } static __inline __mmask8 __DEFAULT_FN_ATTRS512 _mm512_test_epi64_mask (__m512i __A, __m512i __B) { return _mm512_cmpneq_epi64_mask (_mm512_and_epi32 (__A, __B), _mm512_setzero_si512()); } static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 _mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B) { return _mm512_mask_cmpneq_epi64_mask (__U, _mm512_and_epi32 (__A, __B), _mm512_setzero_si512()); } static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_testn_epi32_mask (__m512i __A, __m512i __B) { return _mm512_cmpeq_epi32_mask (_mm512_and_epi32 (__A, __B), _mm512_setzero_si512()); } static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B) { return _mm512_mask_cmpeq_epi32_mask (__U, _mm512_and_epi32 (__A, __B), _mm512_setzero_si512()); } static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 _mm512_testn_epi64_mask (__m512i __A, __m512i __B) { return _mm512_cmpeq_epi64_mask (_mm512_and_epi32 (__A, __B), _mm512_setzero_si512()); } static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 _mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B) { return _mm512_mask_cmpeq_epi64_mask (__U, _mm512_and_epi32 (__A, __B), _mm512_setzero_si512()); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_movehdup_ps (__m512 __A) { return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A, 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_movehdup_ps(__A), (__v16sf)__W); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_movehdup_ps(__A), (__v16sf)_mm512_setzero_ps()); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_moveldup_ps (__m512 __A) { return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A, 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_moveldup_ps(__A), (__v16sf)__W); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_moveldup_ps(__A), (__v16sf)_mm512_setzero_ps()); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B), __W); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B) { return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B), _mm_setzero_ps()); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_move_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B), __W); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B) { return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B), _mm_setzero_pd()); } static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_ss (float * __W, __mmask8 __U, __m128 __A) { __builtin_ia32_storess128_mask ((__v4sf *)__W, __A, __U & 1); } static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_sd (double * __W, __mmask8 __U, __m128d __A) { __builtin_ia32_storesd128_mask ((__v2df *)__W, __A, __U & 1); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_load_ss (__m128 __W, __mmask8 __U, const float* __A) { __m128 src = (__v4sf) __builtin_shufflevector((__v4sf) __W, (__v4sf)_mm_setzero_ps(), 0, 4, 4, 4); return (__m128) __builtin_ia32_loadss128_mask ((const __v4sf *) __A, src, __U & 1); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_load_ss (__mmask8 __U, const float* __A) { return (__m128)__builtin_ia32_loadss128_mask ((const __v4sf *) __A, (__v4sf) _mm_setzero_ps(), __U & 1); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_load_sd (__m128d __W, __mmask8 __U, const double* __A) { __m128d src = (__v2df) __builtin_shufflevector((__v2df) __W, (__v2df)_mm_setzero_pd(), 0, 2); return (__m128d) __builtin_ia32_loadsd128_mask ((const __v2df *) __A, src, __U & 1); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_load_sd (__mmask8 __U, const double* __A) { return (__m128d) __builtin_ia32_loadsd128_mask ((const __v2df *) __A, (__v2df) _mm_setzero_pd(), __U & 1); } #define _mm512_shuffle_epi32(A, I) \ ((__m512i)__builtin_ia32_pshufd512((__v16si)(__m512i)(A), (int)(I))) #define _mm512_mask_shuffle_epi32(W, U, A, I) \ ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ (__v16si)_mm512_shuffle_epi32((A), (I)), \ (__v16si)(__m512i)(W))) #define _mm512_maskz_shuffle_epi32(U, A, I) \ ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ (__v16si)_mm512_shuffle_epi32((A), (I)), \ (__v16si)_mm512_setzero_si512())) static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A) { return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A, (__v8df) __W, (__mmask8) __U); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_pd (__mmask8 __U, __m512d __A) { return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A, (__v8df) _mm512_setzero_pd (), (__mmask8) __U); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A) { return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A, (__v8di) __W, (__mmask8) __U); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_epi64 ( __mmask8 __U, __m512i __A) { return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A, (__v8di) _mm512_setzero_si512 (), (__mmask8) __U); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_pd(__m512d __W, __mmask8 __U, void const *__P) { return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P, (__v8df) __W, (__mmask8) __U); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_pd(__mmask8 __U, void const *__P) { return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P, (__v8df) _mm512_setzero_pd(), (__mmask8) __U); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P) { return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P, (__v8di) __W, (__mmask8) __U); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_epi64(__mmask8 __U, void const *__P) { return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P, (__v8di) _mm512_setzero_si512(), (__mmask8) __U); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_ps(__m512 __W, __mmask16 __U, void const *__P) { return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P, (__v16sf) __W, (__mmask16) __U); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_ps(__mmask16 __U, void const *__P) { return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P, (__v16sf) _mm512_setzero_ps(), (__mmask16) __U); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_epi32(__m512i __W, __mmask16 __U, void const *__P) { return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P, (__v16si) __W, (__mmask16) __U); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_epi32(__mmask16 __U, void const *__P) { return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P, (__v16si) _mm512_setzero_si512(), (__mmask16) __U); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A) { return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A, (__v16sf) __W, (__mmask16) __U); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_ps (__mmask16 __U, __m512 __A) { return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A, (__v16sf) _mm512_setzero_ps(), (__mmask16) __U); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A) { return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A, (__v16si) __W, (__mmask16) __U); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A) { return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A, (__v16si) _mm512_setzero_si512(), (__mmask16) __U); } #define _mm512_cvt_roundps_pd(A, R) \ ((__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \ (__v8df)_mm512_undefined_pd(), \ (__mmask8)-1, (int)(R))) #define _mm512_mask_cvt_roundps_pd(W, U, A, R) \ ((__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \ (__v8df)(__m512d)(W), \ (__mmask8)(U), (int)(R))) #define _mm512_maskz_cvt_roundps_pd(U, A, R) \ ((__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \ (__v8df)_mm512_setzero_pd(), \ (__mmask8)(U), (int)(R))) static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtps_pd (__m256 __A) { return (__m512d) __builtin_convertvector((__v8sf)__A, __v8df); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_cvtps_pd(__A), (__v8df)__W); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_cvtps_pd(__A), (__v8df)_mm512_setzero_pd()); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtpslo_pd (__m512 __A) { return (__m512d) _mm512_cvtps_pd(_mm512_castps512_ps256(__A)); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpslo_pd (__m512d __W, __mmask8 __U, __m512 __A) { return (__m512d) _mm512_mask_cvtps_pd(__W, __U, _mm512_castps512_ps256(__A)); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A) { return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U, (__v8df) __A, (__v8df) __W); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_mov_pd (__mmask8 __U, __m512d __A) { return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U, (__v8df) __A, (__v8df) _mm512_setzero_pd ()); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A) { return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U, (__v16sf) __A, (__v16sf) __W); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_mov_ps (__mmask16 __U, __m512 __A) { return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U, (__v16sf) __A, (__v16sf) _mm512_setzero_ps ()); } static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A) { __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A, (__mmask8) __U); } static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A) { __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A, (__mmask8) __U); } static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A) { __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A, (__mmask16) __U); } static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A) { __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A, (__mmask16) __U); } #define _mm_cvt_roundsd_ss(A, B, R) \ ((__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \ (__v2df)(__m128d)(B), \ (__v4sf)_mm_undefined_ps(), \ (__mmask8)-1, (int)(R))) #define _mm_mask_cvt_roundsd_ss(W, U, A, B, R) \ ((__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \ (__v2df)(__m128d)(B), \ (__v4sf)(__m128)(W), \ (__mmask8)(U), (int)(R))) #define _mm_maskz_cvt_roundsd_ss(U, A, B, R) \ ((__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \ (__v2df)(__m128d)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U), (int)(R))) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtsd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128d __B) { return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A, (__v2df)__B, (__v4sf)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsd_ss (__mmask8 __U, __m128 __A, __m128d __B) { return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A, (__v2df)__B, (__v4sf)_mm_setzero_ps(), (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm_cvtss_i32 _mm_cvtss_si32 #define _mm_cvtsd_i32 _mm_cvtsd_si32 #define _mm_cvti32_sd _mm_cvtsi32_sd #define _mm_cvti32_ss _mm_cvtsi32_ss #ifdef __x86_64__ #define _mm_cvtss_i64 _mm_cvtss_si64 #define _mm_cvtsd_i64 _mm_cvtsd_si64 #define _mm_cvti64_sd _mm_cvtsi64_sd #define _mm_cvti64_ss _mm_cvtsi64_ss #endif #ifdef __x86_64__ #define _mm_cvt_roundi64_sd(A, B, R) \ ((__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \ (int)(R))) #define _mm_cvt_roundsi64_sd(A, B, R) \ ((__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \ (int)(R))) #endif #define _mm_cvt_roundsi32_ss(A, B, R) \ ((__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R))) #define _mm_cvt_roundi32_ss(A, B, R) \ ((__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R))) #ifdef __x86_64__ #define _mm_cvt_roundsi64_ss(A, B, R) \ ((__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \ (int)(R))) #define _mm_cvt_roundi64_ss(A, B, R) \ ((__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \ (int)(R))) #endif #define _mm_cvt_roundss_sd(A, B, R) \ ((__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \ (__v4sf)(__m128)(B), \ (__v2df)_mm_undefined_pd(), \ (__mmask8)-1, (int)(R))) #define _mm_mask_cvt_roundss_sd(W, U, A, B, R) \ ((__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \ (__v4sf)(__m128)(B), \ (__v2df)(__m128d)(W), \ (__mmask8)(U), (int)(R))) #define _mm_maskz_cvt_roundss_sd(U, A, B, R) \ ((__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \ (__v4sf)(__m128)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)(U), (int)(R))) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtss_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128 __B) { return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A, (__v4sf)__B, (__v2df)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtss_sd (__mmask8 __U, __m128d __A, __m128 __B) { return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A, (__v4sf)__B, (__v2df)_mm_setzero_pd(), (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtu32_sd (__m128d __A, unsigned __B) { __A[0] = __B; return __A; } #ifdef __x86_64__ #define _mm_cvt_roundu64_sd(A, B, R) \ ((__m128d)__builtin_ia32_cvtusi2sd64((__v2df)(__m128d)(A), \ (unsigned long long)(B), (int)(R))) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtu64_sd (__m128d __A, unsigned long long __B) { __A[0] = __B; return __A; } #endif #define _mm_cvt_roundu32_ss(A, B, R) \ ((__m128)__builtin_ia32_cvtusi2ss32((__v4sf)(__m128)(A), (unsigned int)(B), \ (int)(R))) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtu32_ss (__m128 __A, unsigned __B) { __A[0] = __B; return __A; } #ifdef __x86_64__ #define _mm_cvt_roundu64_ss(A, B, R) \ ((__m128)__builtin_ia32_cvtusi2ss64((__v4sf)(__m128)(A), \ (unsigned long long)(B), (int)(R))) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtu64_ss (__m128 __A, unsigned long long __B) { __A[0] = __B; return __A; } #endif static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A) { return (__m512i) __builtin_ia32_selectd_512(__M, (__v16si) _mm512_set1_epi32(__A), (__v16si) __O); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A) { return (__m512i) __builtin_ia32_selectq_512(__M, (__v8di) _mm512_set1_epi64(__A), (__v8di) __O); } static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set_epi8 (char __e63, char __e62, char __e61, char __e60, char __e59, char __e58, char __e57, char __e56, char __e55, char __e54, char __e53, char __e52, char __e51, char __e50, char __e49, char __e48, char __e47, char __e46, char __e45, char __e44, char __e43, char __e42, char __e41, char __e40, char __e39, char __e38, char __e37, char __e36, char __e35, char __e34, char __e33, char __e32, char __e31, char __e30, char __e29, char __e28, char __e27, char __e26, char __e25, char __e24, char __e23, char __e22, char __e21, char __e20, char __e19, char __e18, char __e17, char __e16, char __e15, char __e14, char __e13, char __e12, char __e11, char __e10, char __e9, char __e8, char __e7, char __e6, char __e5, char __e4, char __e3, char __e2, char __e1, char __e0) { return __extension__ (__m512i)(__v64qi) {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7, __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15, __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23, __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31, __e32, __e33, __e34, __e35, __e36, __e37, __e38, __e39, __e40, __e41, __e42, __e43, __e44, __e45, __e46, __e47, __e48, __e49, __e50, __e51, __e52, __e53, __e54, __e55, __e56, __e57, __e58, __e59, __e60, __e61, __e62, __e63}; } static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set_epi16(short __e31, short __e30, short __e29, short __e28, short __e27, short __e26, short __e25, short __e24, short __e23, short __e22, short __e21, short __e20, short __e19, short __e18, short __e17, short __e16, short __e15, short __e14, short __e13, short __e12, short __e11, short __e10, short __e9, short __e8, short __e7, short __e6, short __e5, short __e4, short __e3, short __e2, short __e1, short __e0) { return __extension__ (__m512i)(__v32hi) {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7, __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15, __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23, __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31 }; } static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set_epi32 (int __A, int __B, int __C, int __D, int __E, int __F, int __G, int __H, int __I, int __J, int __K, int __L, int __M, int __N, int __O, int __P) { return __extension__ (__m512i)(__v16si) { __P, __O, __N, __M, __L, __K, __J, __I, __H, __G, __F, __E, __D, __C, __B, __A }; } #define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7, \ e8,e9,e10,e11,e12,e13,e14,e15) \ _mm512_set_epi32((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6), \ (e5),(e4),(e3),(e2),(e1),(e0)) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_set_epi64 (long long __A, long long __B, long long __C, long long __D, long long __E, long long __F, long long __G, long long __H) { return __extension__ (__m512i) (__v8di) { __H, __G, __F, __E, __D, __C, __B, __A }; } #define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7) \ _mm512_set_epi64((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0)) static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_set_pd (double __A, double __B, double __C, double __D, double __E, double __F, double __G, double __H) { return __extension__ (__m512d) { __H, __G, __F, __E, __D, __C, __B, __A }; } #define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7) \ _mm512_set_pd((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0)) static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_set_ps (float __A, float __B, float __C, float __D, float __E, float __F, float __G, float __H, float __I, float __J, float __K, float __L, float __M, float __N, float __O, float __P) { return __extension__ (__m512) { __P, __O, __N, __M, __L, __K, __J, __I, __H, __G, __F, __E, __D, __C, __B, __A }; } #define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \ _mm512_set_ps((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6),(e5), \ (e4),(e3),(e2),(e1),(e0)) static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_abs_ps(__m512 __A) { return (__m512)_mm512_and_epi32(_mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ; } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_abs_ps(__m512 __W, __mmask16 __K, __m512 __A) { return (__m512)_mm512_mask_and_epi32((__m512i)__W, __K, _mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ; } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_abs_pd(__m512d __A) { return (__m512d)_mm512_and_epi64(_mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A) ; } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A) { return (__m512d)_mm512_mask_and_epi64((__v8di)__W, __K, _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A); } /* Vector-reduction arithmetic accepts vectors as inputs and produces scalars as * outputs. This class of vector operation forms the basis of many scientific * computations. In vector-reduction arithmetic, the evaluation order is * independent of the order of the input elements of V. * For floating-point intrinsics: * 1. When using fadd/fmul intrinsics, the order of operations within the * vector is unspecified (associative math). * 2. When using fmin/fmax intrinsics, NaN or -0.0 elements within the vector * produce unspecified results. * Used bisection method. At each step, we partition the vector with previous * step in half, and the operation is performed on its two halves. * This takes log2(n) steps where n is the number of elements in the vector. */ static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_add_epi64(__m512i __W) { return __builtin_reduce_add((__v8di)__W); } static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_epi64(__m512i __W) { return __builtin_reduce_mul((__v8di)__W); } static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_and_epi64(__m512i __W) { return __builtin_reduce_and((__v8di)__W); } static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_or_epi64(__m512i __W) { return __builtin_reduce_or((__v8di)__W); } static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_epi64(__mmask8 __M, __m512i __W) { __W = _mm512_maskz_mov_epi64(__M, __W); return __builtin_reduce_add((__v8di)__W); } static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_epi64(__mmask8 __M, __m512i __W) { __W = _mm512_mask_mov_epi64(_mm512_set1_epi64(1), __M, __W); return __builtin_reduce_mul((__v8di)__W); } static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_and_epi64(__mmask8 __M, __m512i __W) { __W = _mm512_mask_mov_epi64(_mm512_set1_epi64(-1LL), __M, __W); return __builtin_reduce_and((__v8di)__W); } static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_or_epi64(__mmask8 __M, __m512i __W) { __W = _mm512_maskz_mov_epi64(__M, __W); return __builtin_reduce_or((__v8di)__W); } // -0.0 is used to ignore the start value since it is the neutral value of // floating point addition. For more information, please refer to // https://llvm.org/docs/LangRef.html#llvm-vector-reduce-fadd-intrinsic static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_add_pd(__m512d __W) { return __builtin_ia32_reduce_fadd_pd512(-0.0, __W); } static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_pd(__m512d __W) { return __builtin_ia32_reduce_fmul_pd512(1.0, __W); } static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_pd(__mmask8 __M, __m512d __W) { __W = _mm512_maskz_mov_pd(__M, __W); return __builtin_ia32_reduce_fadd_pd512(-0.0, __W); } static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_pd(__mmask8 __M, __m512d __W) { __W = _mm512_mask_mov_pd(_mm512_set1_pd(1.0), __M, __W); return __builtin_ia32_reduce_fmul_pd512(1.0, __W); } static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_add_epi32(__m512i __W) { return __builtin_reduce_add((__v16si)__W); } static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_epi32(__m512i __W) { return __builtin_reduce_mul((__v16si)__W); } static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_and_epi32(__m512i __W) { return __builtin_reduce_and((__v16si)__W); } static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_or_epi32(__m512i __W) { return __builtin_reduce_or((__v16si)__W); } static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_epi32( __mmask16 __M, __m512i __W) { __W = _mm512_maskz_mov_epi32(__M, __W); return __builtin_reduce_add((__v16si)__W); } static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_epi32( __mmask16 __M, __m512i __W) { __W = _mm512_mask_mov_epi32(_mm512_set1_epi32(1), __M, __W); return __builtin_reduce_mul((__v16si)__W); } static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_and_epi32( __mmask16 __M, __m512i __W) { __W = _mm512_mask_mov_epi32(_mm512_set1_epi32(-1), __M, __W); return __builtin_reduce_and((__v16si)__W); } static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_or_epi32(__mmask16 __M, __m512i __W) { __W = _mm512_maskz_mov_epi32(__M, __W); return __builtin_reduce_or((__v16si)__W); } static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_add_ps(__m512 __W) { return __builtin_ia32_reduce_fadd_ps512(-0.0f, __W); } static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_ps(__m512 __W) { return __builtin_ia32_reduce_fmul_ps512(1.0f, __W); } static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_ps(__mmask16 __M, __m512 __W) { __W = _mm512_maskz_mov_ps(__M, __W); return __builtin_ia32_reduce_fadd_ps512(-0.0f, __W); } static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_ps(__mmask16 __M, __m512 __W) { __W = _mm512_mask_mov_ps(_mm512_set1_ps(1.0f), __M, __W); return __builtin_ia32_reduce_fmul_ps512(1.0f, __W); } static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_max_epi64(__m512i __V) { return __builtin_reduce_max((__v8di)__V); } static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 _mm512_reduce_max_epu64(__m512i __V) { return __builtin_reduce_max((__v8du)__V); } static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_min_epi64(__m512i __V) { return __builtin_reduce_min((__v8di)__V); } static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 _mm512_reduce_min_epu64(__m512i __V) { return __builtin_reduce_min((__v8du)__V); } static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epi64(__mmask8 __M, __m512i __V) { __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(-__LONG_LONG_MAX__ - 1LL), __M, __V); return __builtin_reduce_max((__v8di)__V); } static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epu64(__mmask8 __M, __m512i __V) { __V = _mm512_maskz_mov_epi64(__M, __V); return __builtin_reduce_max((__v8du)__V); } static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epi64(__mmask8 __M, __m512i __V) { __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(__LONG_LONG_MAX__), __M, __V); return __builtin_reduce_min((__v8di)__V); } static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __V) { __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(-1LL), __M, __V); return __builtin_reduce_min((__v8du)__V); } static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_max_epi32(__m512i __V) { return __builtin_reduce_max((__v16si)__V); } static __inline__ unsigned int __DEFAULT_FN_ATTRS512 _mm512_reduce_max_epu32(__m512i __V) { return __builtin_reduce_max((__v16su)__V); } static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_min_epi32(__m512i __V) { return __builtin_reduce_min((__v16si)__V); } static __inline__ unsigned int __DEFAULT_FN_ATTRS512 _mm512_reduce_min_epu32(__m512i __V) { return __builtin_reduce_min((__v16su)__V); } static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epi32(__mmask16 __M, __m512i __V) { __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(-__INT_MAX__ - 1), __M, __V); return __builtin_reduce_max((__v16si)__V); } static __inline__ unsigned int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epu32(__mmask16 __M, __m512i __V) { __V = _mm512_maskz_mov_epi32(__M, __V); return __builtin_reduce_max((__v16su)__V); } static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epi32(__mmask16 __M, __m512i __V) { __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(__INT_MAX__), __M, __V); return __builtin_reduce_min((__v16si)__V); } static __inline__ unsigned int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __V) { __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(-1), __M, __V); return __builtin_reduce_min((__v16su)__V); } static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_max_pd(__m512d __V) { return __builtin_ia32_reduce_fmax_pd512(__V); } static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_min_pd(__m512d __V) { return __builtin_ia32_reduce_fmin_pd512(__V); } static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_pd(__mmask8 __M, __m512d __V) { __V = _mm512_mask_mov_pd(_mm512_set1_pd(-__builtin_inf()), __M, __V); return __builtin_ia32_reduce_fmax_pd512(__V); } static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __V) { __V = _mm512_mask_mov_pd(_mm512_set1_pd(__builtin_inf()), __M, __V); return __builtin_ia32_reduce_fmin_pd512(__V); } static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_max_ps(__m512 __V) { return __builtin_ia32_reduce_fmax_ps512(__V); } static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_min_ps(__m512 __V) { return __builtin_ia32_reduce_fmin_ps512(__V); } static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_ps(__mmask16 __M, __m512 __V) { __V = _mm512_mask_mov_ps(_mm512_set1_ps(-__builtin_inff()), __M, __V); return __builtin_ia32_reduce_fmax_ps512(__V); } static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __V) { __V = _mm512_mask_mov_ps(_mm512_set1_ps(__builtin_inff()), __M, __V); return __builtin_ia32_reduce_fmin_ps512(__V); } /// Moves the least significant 32 bits of a vector of [16 x i32] to a /// 32-bit signed integer value. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVD / MOVD instruction. /// /// \param __A /// A vector of [16 x i32]. The least significant 32 bits are moved to the /// destination. /// \returns A 32-bit signed integer containing the moved value. static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_cvtsi512_si32(__m512i __A) { __v16si __b = (__v16si)__A; return __b[0]; } /// Loads 8 double-precision (64-bit) floating-point elements stored at memory /// locations starting at location \a base_addr at packed 32-bit integer indices /// stored in the lower half of \a vindex scaled by \a scale them in dst. /// /// This intrinsic corresponds to the VGATHERDPD instructions. /// /// \code{.operation} /// FOR j := 0 to 7 /// i := j*64 /// m := j*32 /// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 /// dst[i+63:i] := MEM[addr+63:addr] /// ENDFOR /// dst[MAX:512] := 0 /// \endcode #define _mm512_i32logather_pd(vindex, base_addr, scale) \ _mm512_i32gather_pd(_mm512_castsi512_si256(vindex), (base_addr), (scale)) /// Loads 8 double-precision (64-bit) floating-point elements from memory /// starting at location \a base_addr at packed 32-bit integer indices stored in /// the lower half of \a vindex scaled by \a scale into dst using writemask /// \a mask (elements are copied from \a src when the corresponding mask bit is /// not set). /// /// This intrinsic corresponds to the VGATHERDPD instructions. /// /// \code{.operation} /// FOR j := 0 to 7 /// i := j*64 /// m := j*32 /// IF mask[j] /// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 /// dst[i+63:i] := MEM[addr+63:addr] /// ELSE /// dst[i+63:i] := src[i+63:i] /// FI /// ENDFOR /// dst[MAX:512] := 0 /// \endcode #define _mm512_mask_i32logather_pd(src, mask, vindex, base_addr, scale) \ _mm512_mask_i32gather_pd((src), (mask), _mm512_castsi512_si256(vindex), \ (base_addr), (scale)) /// Loads 8 64-bit integer elements from memory starting at location \a base_addr /// at packed 32-bit integer indices stored in the lower half of \a vindex /// scaled by \a scale and stores them in dst. /// /// This intrinsic corresponds to the VPGATHERDQ instructions. /// /// \code{.operation} /// FOR j := 0 to 7 /// i := j*64 /// m := j*32 /// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 /// dst[i+63:i] := MEM[addr+63:addr] /// ENDFOR /// dst[MAX:512] := 0 /// \endcode #define _mm512_i32logather_epi64(vindex, base_addr, scale) \ _mm512_i32gather_epi64(_mm512_castsi512_si256(vindex), (base_addr), (scale)) /// Loads 8 64-bit integer elements from memory starting at location \a base_addr /// at packed 32-bit integer indices stored in the lower half of \a vindex /// scaled by \a scale and stores them in dst using writemask \a mask (elements /// are copied from \a src when the corresponding mask bit is not set). /// /// This intrinsic corresponds to the VPGATHERDQ instructions. /// /// \code{.operation} /// FOR j := 0 to 7 /// i := j*64 /// m := j*32 /// IF mask[j] /// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 /// dst[i+63:i] := MEM[addr+63:addr] /// ELSE /// dst[i+63:i] := src[i+63:i] /// FI /// ENDFOR /// dst[MAX:512] := 0 /// \endcode #define _mm512_mask_i32logather_epi64(src, mask, vindex, base_addr, scale) \ _mm512_mask_i32gather_epi64((src), (mask), _mm512_castsi512_si256(vindex), \ (base_addr), (scale)) /// Stores 8 packed double-precision (64-bit) floating-point elements in \a v1 /// and to memory locations starting at location \a base_addr at packed 32-bit /// integer indices stored in \a vindex scaled by \a scale. /// /// This intrinsic corresponds to the VSCATTERDPD instructions. /// /// \code{.operation} /// FOR j := 0 to 7 /// i := j*64 /// m := j*32 /// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 /// MEM[addr+63:addr] := v1[i+63:i] /// ENDFOR /// \endcode #define _mm512_i32loscatter_pd(base_addr, vindex, v1, scale) \ _mm512_i32scatter_pd((base_addr), _mm512_castsi512_si256(vindex), (v1), (scale)) /// Stores 8 packed double-precision (64-bit) floating-point elements in \a v1 /// to memory locations starting at location \a base_addr at packed 32-bit /// integer indices stored in \a vindex scaled by \a scale. Only those elements /// whose corresponding mask bit is set in writemask \a mask are written to /// memory. /// /// This intrinsic corresponds to the VSCATTERDPD instructions. /// /// \code{.operation} /// FOR j := 0 to 7 /// i := j*64 /// m := j*32 /// IF mask[j] /// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 /// MEM[addr+63:addr] := a[i+63:i] /// FI /// ENDFOR /// \endcode #define _mm512_mask_i32loscatter_pd(base_addr, mask, vindex, v1, scale) \ _mm512_mask_i32scatter_pd((base_addr), (mask), \ _mm512_castsi512_si256(vindex), (v1), (scale)) /// Stores 8 packed 64-bit integer elements located in \a v1 and stores them in /// memory locations starting at location \a base_addr at packed 32-bit integer /// indices stored in \a vindex scaled by \a scale. /// /// This intrinsic corresponds to the VPSCATTERDQ instructions. /// /// \code{.operation} /// FOR j := 0 to 7 /// i := j*64 /// m := j*32 /// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 /// MEM[addr+63:addr] := a[i+63:i] /// ENDFOR /// \endcode #define _mm512_i32loscatter_epi64(base_addr, vindex, v1, scale) \ _mm512_i32scatter_epi64((base_addr), \ _mm512_castsi512_si256(vindex), (v1), (scale)) /// Stores 8 packed 64-bit integer elements located in a and stores them in /// memory locations starting at location \a base_addr at packed 32-bit integer /// indices stored in \a vindex scaled by scale using writemask \a mask (elements /// whose corresponding mask bit is not set are not written to memory). /// /// This intrinsic corresponds to the VPSCATTERDQ instructions. /// /// \code{.operation} /// FOR j := 0 to 7 /// i := j*64 /// m := j*32 /// IF mask[j] /// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 /// MEM[addr+63:addr] := a[i+63:i] /// FI /// ENDFOR /// \endcode #define _mm512_mask_i32loscatter_epi64(base_addr, mask, vindex, v1, scale) \ _mm512_mask_i32scatter_epi64((base_addr), (mask), \ _mm512_castsi512_si256(vindex), (v1), (scale)) #undef __DEFAULT_FN_ATTRS512 #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS #endif /* __AVX512FINTRIN_H */ avx512vlintrin.havx512vp2intersectintrin.havxvnniintrin.hclzerointrin.h/*===---- rdseedintrin.h - RDSEED intrinsics -------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __RDSEEDINTRIN_H #define __RDSEEDINTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("rdseed"))) /// Stores a hardware-generated 16-bit random value in the memory at \a __p. /// /// The random number generator complies with NIST SP800-90B and SP800-90C. /// /// \code{.operation} /// IF HW_NRND_GEN.ready == 1 /// Store16(__p, HW_NRND_GEN.data) /// result := 1 /// ELSE /// Store16(__p, 0) /// result := 0 /// END /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c RDSEED instruction. /// /// \param __p /// Pointer to memory for storing the 16-bit random number. /// \returns 1 if a random number was generated, 0 if not. static __inline__ int __DEFAULT_FN_ATTRS _rdseed16_step(unsigned short *__p) { return (int) __builtin_ia32_rdseed16_step(__p); } /// Stores a hardware-generated 32-bit random value in the memory at \a __p. /// /// The random number generator complies with NIST SP800-90B and SP800-90C. /// /// \code{.operation} /// IF HW_NRND_GEN.ready == 1 /// Store32(__p, HW_NRND_GEN.data) /// result := 1 /// ELSE /// Store32(__p, 0) /// result := 0 /// END /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c RDSEED instruction. /// /// \param __p /// Pointer to memory for storing the 32-bit random number. /// \returns 1 if a random number was generated, 0 if not. static __inline__ int __DEFAULT_FN_ATTRS _rdseed32_step(unsigned int *__p) { return (int) __builtin_ia32_rdseed32_step(__p); } #ifdef __x86_64__ /// Stores a hardware-generated 64-bit random value in the memory at \a __p. /// /// The random number generator complies with NIST SP800-90B and SP800-90C. /// /// \code{.operation} /// IF HW_NRND_GEN.ready == 1 /// Store64(__p, HW_NRND_GEN.data) /// result := 1 /// ELSE /// Store64(__p, 0) /// result := 0 /// END /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c RDSEED instruction. /// /// \param __p /// Pointer to memory for storing the 64-bit random number. /// \returns 1 if a random number was generated, 0 if not. static __inline__ int __DEFAULT_FN_ATTRS _rdseed64_step(unsigned long long *__p) { return (int) __builtin_ia32_rdseed64_step(__p); } #endif #undef __DEFAULT_FN_ATTRS #endif /* __RDSEEDINTRIN_H */ sgxintrin.h//===-- Wrapper for C standard assert.h declarations on the GPU -*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef __CLANG_LLVM_LIBC_WRAPPERS_ASSERT_H__ #define __CLANG_LLVM_LIBC_WRAPPERS_ASSERT_H__ #if !defined(_OPENMP) && !defined(__HIP__) && !defined(__CUDA__) #error "This file is for GPU offloading compilation only" #endif #include_next #if __has_include() #if defined(__HIP__) || defined(__CUDA__) #define __LIBC_ATTRS __attribute__((device)) #endif #pragma omp begin declare target #include #pragma omp end declare target #undef __LIBC_ATTRS #endif #endif // __CLANG_LLVM_LIBC_WRAPPERS_ASSERT_H__ sanitizer/msan_interface.hexternal/kythe/kythe/cxx/common/file_utils.cc (fclose(handle)=formatUnknown encoding 'No ranges in char classFailed to analyze start state.invalid escape sequenceno argument for repetition operator*Bassa_VahMeMeroitic_CursivePalmyrenePhags_PaZlstatic_cast(n_read) <= out_sizef->header.arena == arenaunsigned int&=++--allocatorSiSdUNAVAILABLEUnknown error %dno*.*Mutex::Fer while waiting on Conditionwaitp.thread->waitp != nullptrTryLock succeeded ReaderTryLock failed spec='/apex/com.android.tzdata/etc/tz/tzdataopportunistic entropy fill failedgetrandomAES-GCM-decrypt KATTLS10-KDF KATRSA_LIBPKCS8 routinesECDSAexternal/protobuf/src/google/protobuf/map_field.hProtocol Buffer map usage error: Actual : kythe.proto.CxxCompilationUnitDetails.SystemHeaderPrefix.prefixkythe.proto.metadata.MappingRule.edge--target=.*|-W(no-)?(error=)?ambiguous-member-template|-W(no-)?(error=)?bind-to-temporary-copy|-W(no-)?(error=)?bool-conversions|-W(no-)?(error=)?c\+\+0x-static-nonintegral-init|-W(no-)?(error=)?constant-conversion|-W(no-)?(error=)?constant-logical-operand|-W(no-)?(error=)?gnu|-W(no-)?(error=)?gnu-designator|-W(no-)?(error=)?initializer-overrides|-W(no-)?(error=)?invalid-noreturn|-W(no-)?(error=)?local-type-template-args|-W(no-)?(error=)?mismatched-tags|-W(no-)?(error=)?null-dereference|-W(no-)?(error=)?out-of-line-declaration|-W(no-)?(error=)?really-dont-use-clang-diagnostics|-W(no-)?(error=)?tautological-compare|-W(no-)?(error=)?unknown-attributes|-W(no-)?(error=)?unnamed-type-template-args|-W(no-)?(error=)?thread-safety-.*|-Xclang=.*|-Xclang-only=.*|-f(no-)?assume-sane-operator-new|-f(no-)?caret-diagnostics|-f(no-)?catch-undefined-behavior|-f(no-)?color-diagnostics|-f(no-)?diagnostics-fixit-info|-f(no-)?diagnostics-parseable-fixits|-f(no-)?diagnostics-print-source-range-info|-f(no-)?diagnostics-show-category.*|-f(no-)?heinous-gnu-extensions|-f(no-)?macro-backtrace-limit.*|-f(no-)?sanitize-address-zero-base-shadow|-f(no-)?sanitize-blacklist|-f(no-)?sanitize-memory-track-origins|-f(no-)?sanitize-recover|-f(no-)?sanitize=.*|-f(no-)?show-overloads.*|-f(no-)?use-init-array|-f(no-)?template-backtrace-limit.*|-fplugin=.*|-fplugin-arg-.*|-gline-tables-only-sharedvector-targetCan't get working directory: include0external/kythe/kythe/cxx/indexer/cxx/indexed_parent_map.ccInvariant broken: only nodes that support memoization may be used in the parent map./*===---- __clang_cuda_math.h - Device-side CUDA math support --------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __CLANG_CUDA_MATH_H__ #define __CLANG_CUDA_MATH_H__ #ifndef __CUDA__ #error "This file is for CUDA compilation only." #endif #ifndef __OPENMP_NVPTX__ #if CUDA_VERSION < 9000 #error This file is intended to be used with CUDA-9+ only. #endif #endif // __DEVICE__ is a helper macro with common set of attributes for the wrappers // we implement in this file. We need static in order to avoid emitting unused // functions and __forceinline__ helps inlining these wrappers at -O1. #pragma push_macro("__DEVICE__") #ifdef __OPENMP_NVPTX__ #if defined(__cplusplus) #define __DEVICE__ static constexpr __attribute__((always_inline, nothrow)) #else #define __DEVICE__ static __attribute__((always_inline, nothrow)) #endif #else #define __DEVICE__ static __device__ __forceinline__ #endif // Specialized version of __DEVICE__ for functions with void return type. Needed // because the OpenMP overlay requires constexpr functions here but prior to // c++14 void return functions could not be constexpr. #pragma push_macro("__DEVICE_VOID__") #if defined(__OPENMP_NVPTX__) && defined(__cplusplus) && __cplusplus < 201402L #define __DEVICE_VOID__ static __attribute__((always_inline, nothrow)) #else #define __DEVICE_VOID__ __DEVICE__ #endif // libdevice provides fast low precision and slow full-recision implementations // for some functions. Which one gets selected depends on // __CLANG_CUDA_APPROX_TRANSCENDENTALS__ which gets defined by clang if // -ffast-math or -fgpu-approx-transcendentals are in effect. #pragma push_macro("__FAST_OR_SLOW") #if defined(__CLANG_GPU_APPROX_TRANSCENDENTALS__) #define __FAST_OR_SLOW(fast, slow) fast #else #define __FAST_OR_SLOW(fast, slow) slow #endif __DEVICE__ int abs(int __a) { return __nv_abs(__a); } __DEVICE__ double fabs(double __a) { return __nv_fabs(__a); } __DEVICE__ double acos(double __a) { return __nv_acos(__a); } __DEVICE__ float acosf(float __a) { return __nv_acosf(__a); } __DEVICE__ double acosh(double __a) { return __nv_acosh(__a); } __DEVICE__ float acoshf(float __a) { return __nv_acoshf(__a); } __DEVICE__ double asin(double __a) { return __nv_asin(__a); } __DEVICE__ float asinf(float __a) { return __nv_asinf(__a); } __DEVICE__ double asinh(double __a) { return __nv_asinh(__a); } __DEVICE__ float asinhf(float __a) { return __nv_asinhf(__a); } __DEVICE__ double atan(double __a) { return __nv_atan(__a); } __DEVICE__ double atan2(double __a, double __b) { return __nv_atan2(__a, __b); } __DEVICE__ float atan2f(float __a, float __b) { return __nv_atan2f(__a, __b); } __DEVICE__ float atanf(float __a) { return __nv_atanf(__a); } __DEVICE__ double atanh(double __a) { return __nv_atanh(__a); } __DEVICE__ float atanhf(float __a) { return __nv_atanhf(__a); } __DEVICE__ double cbrt(double __a) { return __nv_cbrt(__a); } __DEVICE__ float cbrtf(float __a) { return __nv_cbrtf(__a); } __DEVICE__ double ceil(double __a) { return __nv_ceil(__a); } __DEVICE__ float ceilf(float __a) { return __nv_ceilf(__a); } __DEVICE__ double copysign(double __a, double __b) { return __nv_copysign(__a, __b); } __DEVICE__ float copysignf(float __a, float __b) { return __nv_copysignf(__a, __b); } __DEVICE__ double cos(double __a) { return __nv_cos(__a); } __DEVICE__ float cosf(float __a) { return __FAST_OR_SLOW(__nv_fast_cosf, __nv_cosf)(__a); } __DEVICE__ double cosh(double __a) { return __nv_cosh(__a); } __DEVICE__ float coshf(float __a) { return __nv_coshf(__a); } __DEVICE__ double cospi(double __a) { return __nv_cospi(__a); } __DEVICE__ float cospif(float __a) { return __nv_cospif(__a); } __DEVICE__ double cyl_bessel_i0(double __a) { return __nv_cyl_bessel_i0(__a); } __DEVICE__ float cyl_bessel_i0f(float __a) { return __nv_cyl_bessel_i0f(__a); } __DEVICE__ double cyl_bessel_i1(double __a) { return __nv_cyl_bessel_i1(__a); } __DEVICE__ float cyl_bessel_i1f(float __a) { return __nv_cyl_bessel_i1f(__a); } __DEVICE__ double erf(double __a) { return __nv_erf(__a); } __DEVICE__ double erfc(double __a) { return __nv_erfc(__a); } __DEVICE__ float erfcf(float __a) { return __nv_erfcf(__a); } __DEVICE__ double erfcinv(double __a) { return __nv_erfcinv(__a); } __DEVICE__ float erfcinvf(float __a) { return __nv_erfcinvf(__a); } __DEVICE__ double erfcx(double __a) { return __nv_erfcx(__a); } __DEVICE__ float erfcxf(float __a) { return __nv_erfcxf(__a); } __DEVICE__ float erff(float __a) { return __nv_erff(__a); } __DEVICE__ double erfinv(double __a) { return __nv_erfinv(__a); } __DEVICE__ float erfinvf(float __a) { return __nv_erfinvf(__a); } __DEVICE__ double exp(double __a) { return __nv_exp(__a); } __DEVICE__ double exp10(double __a) { return __nv_exp10(__a); } __DEVICE__ float exp10f(float __a) { return __nv_exp10f(__a); } __DEVICE__ double exp2(double __a) { return __nv_exp2(__a); } __DEVICE__ float exp2f(float __a) { return __nv_exp2f(__a); } __DEVICE__ float expf(float __a) { return __nv_expf(__a); } __DEVICE__ double expm1(double __a) { return __nv_expm1(__a); } __DEVICE__ float expm1f(float __a) { return __nv_expm1f(__a); } __DEVICE__ float fabsf(float __a) { return __nv_fabsf(__a); } __DEVICE__ double fdim(double __a, double __b) { return __nv_fdim(__a, __b); } __DEVICE__ float fdimf(float __a, float __b) { return __nv_fdimf(__a, __b); } __DEVICE__ double fdivide(double __a, double __b) { return __a / __b; } __DEVICE__ float fdividef(float __a, float __b) { #if __FAST_MATH__ && !__CUDA_PREC_DIV return __nv_fast_fdividef(__a, __b); #else return __a / __b; #endif } __DEVICE__ double floor(double __f) { return __nv_floor(__f); } __DEVICE__ float floorf(float __f) { return __nv_floorf(__f); } __DEVICE__ double fma(double __a, double __b, double __c) { return __nv_fma(__a, __b, __c); } __DEVICE__ float fmaf(float __a, float __b, float __c) { return __nv_fmaf(__a, __b, __c); } __DEVICE__ double fmax(double __a, double __b) { return __nv_fmax(__a, __b); } __DEVICE__ float fmaxf(float __a, float __b) { return __nv_fmaxf(__a, __b); } __DEVICE__ double fmin(double __a, double __b) { return __nv_fmin(__a, __b); } __DEVICE__ float fminf(float __a, float __b) { return __nv_fminf(__a, __b); } __DEVICE__ double fmod(double __a, double __b) { return __nv_fmod(__a, __b); } __DEVICE__ float fmodf(float __a, float __b) { return __nv_fmodf(__a, __b); } __DEVICE__ double frexp(double __a, int *__b) { return __nv_frexp(__a, __b); } __DEVICE__ float frexpf(float __a, int *__b) { return __nv_frexpf(__a, __b); } __DEVICE__ double hypot(double __a, double __b) { return __nv_hypot(__a, __b); } __DEVICE__ float hypotf(float __a, float __b) { return __nv_hypotf(__a, __b); } __DEVICE__ int ilogb(double __a) { return __nv_ilogb(__a); } __DEVICE__ int ilogbf(float __a) { return __nv_ilogbf(__a); } __DEVICE__ double j0(double __a) { return __nv_j0(__a); } __DEVICE__ float j0f(float __a) { return __nv_j0f(__a); } __DEVICE__ double j1(double __a) { return __nv_j1(__a); } __DEVICE__ float j1f(float __a) { return __nv_j1f(__a); } __DEVICE__ double jn(int __n, double __a) { return __nv_jn(__n, __a); } __DEVICE__ float jnf(int __n, float __a) { return __nv_jnf(__n, __a); } #if defined(__LP64__) || defined(_WIN64) __DEVICE__ long labs(long __a) { return __nv_llabs(__a); }; #else __DEVICE__ long labs(long __a) { return __nv_abs(__a); }; #endif __DEVICE__ double ldexp(double __a, int __b) { return __nv_ldexp(__a, __b); } __DEVICE__ float ldexpf(float __a, int __b) { return __nv_ldexpf(__a, __b); } __DEVICE__ double lgamma(double __a) { return __nv_lgamma(__a); } __DEVICE__ float lgammaf(float __a) { return __nv_lgammaf(__a); } __DEVICE__ long long llabs(long long __a) { return __nv_llabs(__a); } __DEVICE__ long long llmax(long long __a, long long __b) { return __nv_llmax(__a, __b); } __DEVICE__ long long llmin(long long __a, long long __b) { return __nv_llmin(__a, __b); } __DEVICE__ long long llrint(double __a) { return __nv_llrint(__a); } __DEVICE__ long long llrintf(float __a) { return __nv_llrintf(__a); } __DEVICE__ long long llround(double __a) { return __nv_llround(__a); } __DEVICE__ long long llroundf(float __a) { return __nv_llroundf(__a); } __DEVICE__ double round(double __a) { return __nv_round(__a); } __DEVICE__ float roundf(float __a) { return __nv_roundf(__a); } __DEVICE__ double log(double __a) { return __nv_log(__a); } __DEVICE__ double log10(double __a) { return __nv_log10(__a); } __DEVICE__ float log10f(float __a) { return __nv_log10f(__a); } __DEVICE__ double log1p(double __a) { return __nv_log1p(__a); } __DEVICE__ float log1pf(float __a) { return __nv_log1pf(__a); } __DEVICE__ double log2(double __a) { return __nv_log2(__a); } __DEVICE__ float log2f(float __a) { return __FAST_OR_SLOW(__nv_fast_log2f, __nv_log2f)(__a); } __DEVICE__ double logb(double __a) { return __nv_logb(__a); } __DEVICE__ float logbf(float __a) { return __nv_logbf(__a); } __DEVICE__ float logf(float __a) { return __FAST_OR_SLOW(__nv_fast_logf, __nv_logf)(__a); } #if defined(__LP64__) || defined(_WIN64) __DEVICE__ long lrint(double __a) { return llrint(__a); } __DEVICE__ long lrintf(float __a) { return __float2ll_rn(__a); } __DEVICE__ long lround(double __a) { return llround(__a); } __DEVICE__ long lroundf(float __a) { return llroundf(__a); } #else __DEVICE__ long lrint(double __a) { return (long)rint(__a); } __DEVICE__ long lrintf(float __a) { return __float2int_rn(__a); } __DEVICE__ long lround(double __a) { return round(__a); } __DEVICE__ long lroundf(float __a) { return roundf(__a); } #endif __DEVICE__ int max(int __a, int __b) { return __nv_max(__a, __b); } __DEVICE__ int min(int __a, int __b) { return __nv_min(__a, __b); } __DEVICE__ double modf(double __a, double *__b) { return __nv_modf(__a, __b); } __DEVICE__ float modff(float __a, float *__b) { return __nv_modff(__a, __b); } __DEVICE__ double nearbyint(double __a) { return __builtin_nearbyint(__a); } __DEVICE__ float nearbyintf(float __a) { return __builtin_nearbyintf(__a); } __DEVICE__ double nextafter(double __a, double __b) { return __nv_nextafter(__a, __b); } __DEVICE__ float nextafterf(float __a, float __b) { return __nv_nextafterf(__a, __b); } __DEVICE__ double norm(int __dim, const double *__t) { return __nv_norm(__dim, __t); } __DEVICE__ double norm3d(double __a, double __b, double __c) { return __nv_norm3d(__a, __b, __c); } __DEVICE__ float norm3df(float __a, float __b, float __c) { return __nv_norm3df(__a, __b, __c); } __DEVICE__ double norm4d(double __a, double __b, double __c, double __d) { return __nv_norm4d(__a, __b, __c, __d); } __DEVICE__ float norm4df(float __a, float __b, float __c, float __d) { return __nv_norm4df(__a, __b, __c, __d); } __DEVICE__ double normcdf(double __a) { return __nv_normcdf(__a); } __DEVICE__ float normcdff(float __a) { return __nv_normcdff(__a); } __DEVICE__ double normcdfinv(double __a) { return __nv_normcdfinv(__a); } __DEVICE__ float normcdfinvf(float __a) { return __nv_normcdfinvf(__a); } __DEVICE__ float normf(int __dim, const float *__t) { return __nv_normf(__dim, __t); } __DEVICE__ double pow(double __a, double __b) { return __nv_pow(__a, __b); } __DEVICE__ float powf(float __a, float __b) { return __nv_powf(__a, __b); } __DEVICE__ double powi(double __a, int __b) { return __nv_powi(__a, __b); } __DEVICE__ float powif(float __a, int __b) { return __nv_powif(__a, __b); } __DEVICE__ double rcbrt(double __a) { return __nv_rcbrt(__a); } __DEVICE__ float rcbrtf(float __a) { return __nv_rcbrtf(__a); } __DEVICE__ double remainder(double __a, double __b) { return __nv_remainder(__a, __b); } __DEVICE__ float remainderf(float __a, float __b) { return __nv_remainderf(__a, __b); } __DEVICE__ double remquo(double __a, double __b, int *__c) { return __nv_remquo(__a, __b, __c); } __DEVICE__ float remquof(float __a, float __b, int *__c) { return __nv_remquof(__a, __b, __c); } __DEVICE__ double rhypot(double __a, double __b) { return __nv_rhypot(__a, __b); } __DEVICE__ float rhypotf(float __a, float __b) { return __nv_rhypotf(__a, __b); } // __nv_rint* in libdevice is buggy and produces incorrect results. __DEVICE__ double rint(double __a) { return __builtin_rint(__a); } __DEVICE__ float rintf(float __a) { return __builtin_rintf(__a); } __DEVICE__ double rnorm(int __a, const double *__b) { return __nv_rnorm(__a, __b); } __DEVICE__ double rnorm3d(double __a, double __b, double __c) { return __nv_rnorm3d(__a, __b, __c); } __DEVICE__ float rnorm3df(float __a, float __b, float __c) { return __nv_rnorm3df(__a, __b, __c); } __DEVICE__ double rnorm4d(double __a, double __b, double __c, double __d) { return __nv_rnorm4d(__a, __b, __c, __d); } __DEVICE__ float rnorm4df(float __a, float __b, float __c, float __d) { return __nv_rnorm4df(__a, __b, __c, __d); } __DEVICE__ float rnormf(int __dim, const float *__t) { return __nv_rnormf(__dim, __t); } __DEVICE__ double rsqrt(double __a) { return __nv_rsqrt(__a); } __DEVICE__ float rsqrtf(float __a) { return __nv_rsqrtf(__a); } __DEVICE__ double scalbn(double __a, int __b) { return __nv_scalbn(__a, __b); } __DEVICE__ float scalbnf(float __a, int __b) { return __nv_scalbnf(__a, __b); } __DEVICE__ double scalbln(double __a, long __b) { if (__b > INT_MAX) return __a > 0 ? HUGE_VAL : -HUGE_VAL; if (__b < INT_MIN) return __a > 0 ? 0.0 : -0.0; return scalbn(__a, (int)__b); } __DEVICE__ float scalblnf(float __a, long __b) { if (__b > INT_MAX) return __a > 0 ? HUGE_VALF : -HUGE_VALF; if (__b < INT_MIN) return __a > 0 ? 0.f : -0.f; return scalbnf(__a, (int)__b); } __DEVICE__ double sin(double __a) { return __nv_sin(__a); } __DEVICE_VOID__ void sincos(double __a, double *__s, double *__c) { return __nv_sincos(__a, __s, __c); } __DEVICE_VOID__ void sincosf(float __a, float *__s, float *__c) { return __FAST_OR_SLOW(__nv_fast_sincosf, __nv_sincosf)(__a, __s, __c); } __DEVICE_VOID__ void sincospi(double __a, double *__s, double *__c) { return __nv_sincospi(__a, __s, __c); } __DEVICE_VOID__ void sincospif(float __a, float *__s, float *__c) { return __nv_sincospif(__a, __s, __c); } __DEVICE__ float sinf(float __a) { return __FAST_OR_SLOW(__nv_fast_sinf, __nv_sinf)(__a); } __DEVICE__ double sinh(double __a) { return __nv_sinh(__a); } __DEVICE__ float sinhf(float __a) { return __nv_sinhf(__a); } __DEVICE__ double sinpi(double __a) { return __nv_sinpi(__a); } __DEVICE__ float sinpif(float __a) { return __nv_sinpif(__a); } __DEVICE__ double sqrt(double __a) { return __nv_sqrt(__a); } __DEVICE__ float sqrtf(float __a) { return __nv_sqrtf(__a); } __DEVICE__ double tan(double __a) { return __nv_tan(__a); } __DEVICE__ float tanf(float __a) { return __nv_tanf(__a); } __DEVICE__ double tanh(double __a) { return __nv_tanh(__a); } __DEVICE__ float tanhf(float __a) { return __nv_tanhf(__a); } __DEVICE__ double tgamma(double __a) { return __nv_tgamma(__a); } __DEVICE__ float tgammaf(float __a) { return __nv_tgammaf(__a); } __DEVICE__ double trunc(double __a) { return __nv_trunc(__a); } __DEVICE__ float truncf(float __a) { return __nv_truncf(__a); } __DEVICE__ unsigned long long ullmax(unsigned long long __a, unsigned long long __b) { return __nv_ullmax(__a, __b); } __DEVICE__ unsigned long long ullmin(unsigned long long __a, unsigned long long __b) { return __nv_ullmin(__a, __b); } __DEVICE__ unsigned int umax(unsigned int __a, unsigned int __b) { return __nv_umax(__a, __b); } __DEVICE__ unsigned int umin(unsigned int __a, unsigned int __b) { return __nv_umin(__a, __b); } __DEVICE__ double y0(double __a) { return __nv_y0(__a); } __DEVICE__ float y0f(float __a) { return __nv_y0f(__a); } __DEVICE__ double y1(double __a) { return __nv_y1(__a); } __DEVICE__ float y1f(float __a) { return __nv_y1f(__a); } __DEVICE__ double yn(int __a, double __b) { return __nv_yn(__a, __b); } __DEVICE__ float ynf(int __a, float __b) { return __nv_ynf(__a, __b); } #pragma pop_macro("__DEVICE__") #pragma pop_macro("__DEVICE_VOID__") #pragma pop_macro("__FAST_OR_SLOW") #endif // __CLANG_CUDA_MATH_H__ /*===---- __stddef_wchar.h - Definition of wchar_t -------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #if !defined(__cplusplus) || (defined(_MSC_VER) && !_NATIVE_WCHAR_T_DEFINED) /* * When -fbuiltin-headers-in-system-modules is set this is a non-modular header * and needs to behave as if it was textual. */ #if !defined(_WCHAR_T) || \ (__has_feature(modules) && !__building_module(_Builtin_stddef)) #define _WCHAR_T #ifdef _MSC_EXTENSIONS #define _WCHAR_T_DEFINED #endif typedef __WCHAR_TYPE__ wchar_t; #endif #endif /*===------------- avx512bwintrin.h - AVX512BW intrinsics ------------------=== * * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __AVX512BWINTRIN_H #define __AVX512BWINTRIN_H typedef unsigned int __mmask32; typedef unsigned long long __mmask64; /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS512 \ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512bw,evex512"), __min_vector_width__(512))) #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512bw,no-evex512"))) static __inline __mmask32 __DEFAULT_FN_ATTRS _knot_mask32(__mmask32 __M) { return __builtin_ia32_knotsi(__M); } static __inline __mmask64 __DEFAULT_FN_ATTRS _knot_mask64(__mmask64 __M) { return __builtin_ia32_knotdi(__M); } static __inline__ __mmask32 __DEFAULT_FN_ATTRS _kand_mask32(__mmask32 __A, __mmask32 __B) { return (__mmask32)__builtin_ia32_kandsi((__mmask32)__A, (__mmask32)__B); } static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kand_mask64(__mmask64 __A, __mmask64 __B) { return (__mmask64)__builtin_ia32_kanddi((__mmask64)__A, (__mmask64)__B); } static __inline__ __mmask32 __DEFAULT_FN_ATTRS _kandn_mask32(__mmask32 __A, __mmask32 __B) { return (__mmask32)__builtin_ia32_kandnsi((__mmask32)__A, (__mmask32)__B); } static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kandn_mask64(__mmask64 __A, __mmask64 __B) { return (__mmask64)__builtin_ia32_kandndi((__mmask64)__A, (__mmask64)__B); } static __inline__ __mmask32 __DEFAULT_FN_ATTRS _kor_mask32(__mmask32 __A, __mmask32 __B) { return (__mmask32)__builtin_ia32_korsi((__mmask32)__A, (__mmask32)__B); } static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kor_mask64(__mmask64 __A, __mmask64 __B) { return (__mmask64)__builtin_ia32_kordi((__mmask64)__A, (__mmask64)__B); } static __inline__ __mmask32 __DEFAULT_FN_ATTRS _kxnor_mask32(__mmask32 __A, __mmask32 __B) { return (__mmask32)__builtin_ia32_kxnorsi((__mmask32)__A, (__mmask32)__B); } static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kxnor_mask64(__mmask64 __A, __mmask64 __B) { return (__mmask64)__builtin_ia32_kxnordi((__mmask64)__A, (__mmask64)__B); } static __inline__ __mmask32 __DEFAULT_FN_ATTRS _kxor_mask32(__mmask32 __A, __mmask32 __B) { return (__mmask32)__builtin_ia32_kxorsi((__mmask32)__A, (__mmask32)__B); } static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kxor_mask64(__mmask64 __A, __mmask64 __B) { return (__mmask64)__builtin_ia32_kxordi((__mmask64)__A, (__mmask64)__B); } static __inline__ unsigned char __DEFAULT_FN_ATTRS _kortestc_mask32_u8(__mmask32 __A, __mmask32 __B) { return (unsigned char)__builtin_ia32_kortestcsi(__A, __B); } static __inline__ unsigned char __DEFAULT_FN_ATTRS _kortestz_mask32_u8(__mmask32 __A, __mmask32 __B) { return (unsigned char)__builtin_ia32_kortestzsi(__A, __B); } static __inline__ unsigned char __DEFAULT_FN_ATTRS _kortest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) { *__C = (unsigned char)__builtin_ia32_kortestcsi(__A, __B); return (unsigned char)__builtin_ia32_kortestzsi(__A, __B); } static __inline__ unsigned char __DEFAULT_FN_ATTRS _kortestc_mask64_u8(__mmask64 __A, __mmask64 __B) { return (unsigned char)__builtin_ia32_kortestcdi(__A, __B); } static __inline__ unsigned char __DEFAULT_FN_ATTRS _kortestz_mask64_u8(__mmask64 __A, __mmask64 __B) { return (unsigned char)__builtin_ia32_kortestzdi(__A, __B); } static __inline__ unsigned char __DEFAULT_FN_ATTRS _kortest_mask64_u8(__mmask64 __A, __mmask64 __B, unsigned char *__C) { *__C = (unsigned char)__builtin_ia32_kortestcdi(__A, __B); return (unsigned char)__builtin_ia32_kortestzdi(__A, __B); } static __inline__ unsigned char __DEFAULT_FN_ATTRS _ktestc_mask32_u8(__mmask32 __A, __mmask32 __B) { return (unsigned char)__builtin_ia32_ktestcsi(__A, __B); } static __inline__ unsigned char __DEFAULT_FN_ATTRS _ktestz_mask32_u8(__mmask32 __A, __mmask32 __B) { return (unsigned char)__builtin_ia32_ktestzsi(__A, __B); } static __inline__ unsigned char __DEFAULT_FN_ATTRS _ktest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) { *__C = (unsigned char)__builtin_ia32_ktestcsi(__A, __B); return (unsigned char)__builtin_ia32_ktestzsi(__A, __B); } static __inline__ unsigned char __DEFAULT_FN_ATTRS _ktestc_mask64_u8(__mmask64 __A, __mmask64 __B) { return (unsigned char)__builtin_ia32_ktestcdi(__A, __B); } static __inline__ unsigned char __DEFAULT_FN_ATTRS _ktestz_mask64_u8(__mmask64 __A, __mmask64 __B) { return (unsigned char)__builtin_ia32_ktestzdi(__A, __B); } static __inline__ unsigned char __DEFAULT_FN_ATTRS _ktest_mask64_u8(__mmask64 __A, __mmask64 __B, unsigned char *__C) { *__C = (unsigned char)__builtin_ia32_ktestcdi(__A, __B); return (unsigned char)__builtin_ia32_ktestzdi(__A, __B); } static __inline__ __mmask32 __DEFAULT_FN_ATTRS _kadd_mask32(__mmask32 __A, __mmask32 __B) { return (__mmask32)__builtin_ia32_kaddsi((__mmask32)__A, (__mmask32)__B); } static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kadd_mask64(__mmask64 __A, __mmask64 __B) { return (__mmask64)__builtin_ia32_kadddi((__mmask64)__A, (__mmask64)__B); } #define _kshiftli_mask32(A, I) \ ((__mmask32)__builtin_ia32_kshiftlisi((__mmask32)(A), (unsigned int)(I))) #define _kshiftri_mask32(A, I) \ ((__mmask32)__builtin_ia32_kshiftrisi((__mmask32)(A), (unsigned int)(I))) #define _kshiftli_mask64(A, I) \ ((__mmask64)__builtin_ia32_kshiftlidi((__mmask64)(A), (unsigned int)(I))) #define _kshiftri_mask64(A, I) \ ((__mmask64)__builtin_ia32_kshiftridi((__mmask64)(A), (unsigned int)(I))) static __inline__ unsigned int __DEFAULT_FN_ATTRS _cvtmask32_u32(__mmask32 __A) { return (unsigned int)__builtin_ia32_kmovd((__mmask32)__A); } static __inline__ unsigned long long __DEFAULT_FN_ATTRS _cvtmask64_u64(__mmask64 __A) { return (unsigned long long)__builtin_ia32_kmovq((__mmask64)__A); } static __inline__ __mmask32 __DEFAULT_FN_ATTRS _cvtu32_mask32(unsigned int __A) { return (__mmask32)__builtin_ia32_kmovd((__mmask32)__A); } static __inline__ __mmask64 __DEFAULT_FN_ATTRS _cvtu64_mask64(unsigned long long __A) { return (__mmask64)__builtin_ia32_kmovq((__mmask64)__A); } static __inline__ __mmask32 __DEFAULT_FN_ATTRS _load_mask32(__mmask32 *__A) { return (__mmask32)__builtin_ia32_kmovd(*(__mmask32 *)__A); } static __inline__ __mmask64 __DEFAULT_FN_ATTRS _load_mask64(__mmask64 *__A) { return (__mmask64)__builtin_ia32_kmovq(*(__mmask64 *)__A); } static __inline__ void __DEFAULT_FN_ATTRS _store_mask32(__mmask32 *__A, __mmask32 __B) { *(__mmask32 *)__A = __builtin_ia32_kmovd((__mmask32)__B); } static __inline__ void __DEFAULT_FN_ATTRS _store_mask64(__mmask64 *__A, __mmask64 __B) { *(__mmask64 *)__A = __builtin_ia32_kmovq((__mmask64)__B); } /* Integer compare */ #define _mm512_cmp_epi8_mask(a, b, p) \ ((__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \ (__v64qi)(__m512i)(b), (int)(p), \ (__mmask64)-1)) #define _mm512_mask_cmp_epi8_mask(m, a, b, p) \ ((__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \ (__v64qi)(__m512i)(b), (int)(p), \ (__mmask64)(m))) #define _mm512_cmp_epu8_mask(a, b, p) \ ((__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \ (__v64qi)(__m512i)(b), (int)(p), \ (__mmask64)-1)) #define _mm512_mask_cmp_epu8_mask(m, a, b, p) \ ((__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \ (__v64qi)(__m512i)(b), (int)(p), \ (__mmask64)(m))) #define _mm512_cmp_epi16_mask(a, b, p) \ ((__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \ (__v32hi)(__m512i)(b), (int)(p), \ (__mmask32)-1)) #define _mm512_mask_cmp_epi16_mask(m, a, b, p) \ ((__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \ (__v32hi)(__m512i)(b), (int)(p), \ (__mmask32)(m))) #define _mm512_cmp_epu16_mask(a, b, p) \ ((__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \ (__v32hi)(__m512i)(b), (int)(p), \ (__mmask32)-1)) #define _mm512_mask_cmp_epu16_mask(m, a, b, p) \ ((__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \ (__v32hi)(__m512i)(b), (int)(p), \ (__mmask32)(m))) #define _mm512_cmpeq_epi8_mask(A, B) \ _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_EQ) #define _mm512_mask_cmpeq_epi8_mask(k, A, B) \ _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_EQ) #define _mm512_cmpge_epi8_mask(A, B) \ _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_GE) #define _mm512_mask_cmpge_epi8_mask(k, A, B) \ _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GE) #define _mm512_cmpgt_epi8_mask(A, B) \ _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_GT) #define _mm512_mask_cmpgt_epi8_mask(k, A, B) \ _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GT) #define _mm512_cmple_epi8_mask(A, B) \ _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_LE) #define _mm512_mask_cmple_epi8_mask(k, A, B) \ _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LE) #define _mm512_cmplt_epi8_mask(A, B) \ _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_LT) #define _mm512_mask_cmplt_epi8_mask(k, A, B) \ _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LT) #define _mm512_cmpneq_epi8_mask(A, B) \ _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_NE) #define _mm512_mask_cmpneq_epi8_mask(k, A, B) \ _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_NE) #define _mm512_cmpeq_epu8_mask(A, B) \ _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_EQ) #define _mm512_mask_cmpeq_epu8_mask(k, A, B) \ _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_EQ) #define _mm512_cmpge_epu8_mask(A, B) \ _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_GE) #define _mm512_mask_cmpge_epu8_mask(k, A, B) \ _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GE) #define _mm512_cmpgt_epu8_mask(A, B) \ _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_GT) #define _mm512_mask_cmpgt_epu8_mask(k, A, B) \ _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GT) #define _mm512_cmple_epu8_mask(A, B) \ _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_LE) #define _mm512_mask_cmple_epu8_mask(k, A, B) \ _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LE) #define _mm512_cmplt_epu8_mask(A, B) \ _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_LT) #define _mm512_mask_cmplt_epu8_mask(k, A, B) \ _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LT) #define _mm512_cmpneq_epu8_mask(A, B) \ _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_NE) #define _mm512_mask_cmpneq_epu8_mask(k, A, B) \ _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_NE) #define _mm512_cmpeq_epi16_mask(A, B) \ _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_EQ) #define _mm512_mask_cmpeq_epi16_mask(k, A, B) \ _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_EQ) #define _mm512_cmpge_epi16_mask(A, B) \ _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_GE) #define _mm512_mask_cmpge_epi16_mask(k, A, B) \ _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GE) #define _mm512_cmpgt_epi16_mask(A, B) \ _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_GT) #define _mm512_mask_cmpgt_epi16_mask(k, A, B) \ _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GT) #define _mm512_cmple_epi16_mask(A, B) \ _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_LE) #define _mm512_mask_cmple_epi16_mask(k, A, B) \ _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LE) #define _mm512_cmplt_epi16_mask(A, B) \ _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_LT) #define _mm512_mask_cmplt_epi16_mask(k, A, B) \ _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LT) #define _mm512_cmpneq_epi16_mask(A, B) \ _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_NE) #define _mm512_mask_cmpneq_epi16_mask(k, A, B) \ _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_NE) #define _mm512_cmpeq_epu16_mask(A, B) \ _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_EQ) #define _mm512_mask_cmpeq_epu16_mask(k, A, B) \ _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_EQ) #define _mm512_cmpge_epu16_mask(A, B) \ _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_GE) #define _mm512_mask_cmpge_epu16_mask(k, A, B) \ _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GE) #define _mm512_cmpgt_epu16_mask(A, B) \ _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_GT) #define _mm512_mask_cmpgt_epu16_mask(k, A, B) \ _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GT) #define _mm512_cmple_epu16_mask(A, B) \ _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_LE) #define _mm512_mask_cmple_epu16_mask(k, A, B) \ _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LE) #define _mm512_cmplt_epu16_mask(A, B) \ _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_LT) #define _mm512_mask_cmplt_epu16_mask(k, A, B) \ _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LT) #define _mm512_cmpneq_epu16_mask(A, B) \ _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_NE) #define _mm512_mask_cmpneq_epu16_mask(k, A, B) \ _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_add_epi8 (__m512i __A, __m512i __B) { return (__m512i) ((__v64qu) __A + (__v64qu) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_add_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, (__v64qi)_mm512_add_epi8(__A, __B), (__v64qi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_add_epi8(__mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, (__v64qi)_mm512_add_epi8(__A, __B), (__v64qi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sub_epi8 (__m512i __A, __m512i __B) { return (__m512i) ((__v64qu) __A - (__v64qu) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sub_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, (__v64qi)_mm512_sub_epi8(__A, __B), (__v64qi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sub_epi8(__mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, (__v64qi)_mm512_sub_epi8(__A, __B), (__v64qi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_add_epi16 (__m512i __A, __m512i __B) { return (__m512i) ((__v32hu) __A + (__v32hu) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_add_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_add_epi16(__A, __B), (__v32hi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_add_epi16(__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_add_epi16(__A, __B), (__v32hi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sub_epi16 (__m512i __A, __m512i __B) { return (__m512i) ((__v32hu) __A - (__v32hu) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sub_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_sub_epi16(__A, __B), (__v32hi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sub_epi16(__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_sub_epi16(__A, __B), (__v32hi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mullo_epi16 (__m512i __A, __m512i __B) { return (__m512i) ((__v32hu) __A * (__v32hu) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mullo_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_mullo_epi16(__A, __B), (__v32hi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mullo_epi16(__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_mullo_epi16(__A, __B), (__v32hi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_blend_epi8 (__mmask64 __U, __m512i __A, __m512i __W) { return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U, (__v64qi) __W, (__v64qi) __A); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_blend_epi16 (__mmask32 __U, __m512i __A, __m512i __W) { return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U, (__v32hi) __W, (__v32hi) __A); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_abs_epi8 (__m512i __A) { return (__m512i)__builtin_elementwise_abs((__v64qs)__A); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_abs_epi8 (__m512i __W, __mmask64 __U, __m512i __A) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, (__v64qi)_mm512_abs_epi8(__A), (__v64qi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_abs_epi8 (__mmask64 __U, __m512i __A) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, (__v64qi)_mm512_abs_epi8(__A), (__v64qi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_abs_epi16 (__m512i __A) { return (__m512i)__builtin_elementwise_abs((__v32hi)__A); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_abs_epi16 (__m512i __W, __mmask32 __U, __m512i __A) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_abs_epi16(__A), (__v32hi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_abs_epi16 (__mmask32 __U, __m512i __A) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_abs_epi16(__A), (__v32hi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_packs_epi32(__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_packssdw512((__v16si)__A, (__v16si)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_packs_epi32(__mmask32 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, (__v32hi)_mm512_packs_epi32(__A, __B), (__v32hi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_packs_epi32(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, (__v32hi)_mm512_packs_epi32(__A, __B), (__v32hi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_packs_epi16(__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_packsswb512((__v32hi)__A, (__v32hi) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_packs_epi16(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, (__v64qi)_mm512_packs_epi16(__A, __B), (__v64qi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_packs_epi16(__mmask64 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, (__v64qi)_mm512_packs_epi16(__A, __B), (__v64qi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_packus_epi32(__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_packusdw512((__v16si) __A, (__v16si) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_packus_epi32(__mmask32 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, (__v32hi)_mm512_packus_epi32(__A, __B), (__v32hi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_packus_epi32(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, (__v32hi)_mm512_packus_epi32(__A, __B), (__v32hi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_packus_epi16(__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_packuswb512((__v32hi) __A, (__v32hi) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_packus_epi16(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, (__v64qi)_mm512_packus_epi16(__A, __B), (__v64qi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_packus_epi16(__mmask64 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, (__v64qi)_mm512_packus_epi16(__A, __B), (__v64qi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_adds_epi8 (__m512i __A, __m512i __B) { return (__m512i)__builtin_elementwise_add_sat((__v64qs)__A, (__v64qs)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_adds_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, (__v64qi)_mm512_adds_epi8(__A, __B), (__v64qi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_adds_epi8 (__mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, (__v64qi)_mm512_adds_epi8(__A, __B), (__v64qi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_adds_epi16 (__m512i __A, __m512i __B) { return (__m512i)__builtin_elementwise_add_sat((__v32hi)__A, (__v32hi)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_adds_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_adds_epi16(__A, __B), (__v32hi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_adds_epi16 (__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_adds_epi16(__A, __B), (__v32hi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_adds_epu8 (__m512i __A, __m512i __B) { return (__m512i)__builtin_elementwise_add_sat((__v64qu) __A, (__v64qu) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_adds_epu8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, (__v64qi)_mm512_adds_epu8(__A, __B), (__v64qi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_adds_epu8 (__mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, (__v64qi)_mm512_adds_epu8(__A, __B), (__v64qi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_adds_epu16 (__m512i __A, __m512i __B) { return (__m512i)__builtin_elementwise_add_sat((__v32hu) __A, (__v32hu) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_adds_epu16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_adds_epu16(__A, __B), (__v32hi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_adds_epu16 (__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_adds_epu16(__A, __B), (__v32hi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_avg_epu8 (__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_pavgb512((__v64qi)__A, (__v64qi)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_avg_epu8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, (__v64qi)_mm512_avg_epu8(__A, __B), (__v64qi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_avg_epu8 (__mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, (__v64qi)_mm512_avg_epu8(__A, __B), (__v64qi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_avg_epu16 (__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_pavgw512((__v32hi)__A, (__v32hi)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_avg_epu16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_avg_epu16(__A, __B), (__v32hi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_avg_epu16 (__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_avg_epu16(__A, __B), (__v32hi) _mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epi8 (__m512i __A, __m512i __B) { return (__m512i)__builtin_elementwise_max((__v64qs) __A, (__v64qs) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_max_epi8 (__mmask64 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, (__v64qi)_mm512_max_epi8(__A, __B), (__v64qi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_max_epi8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, (__v64qi)_mm512_max_epi8(__A, __B), (__v64qi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epi16 (__m512i __A, __m512i __B) { return (__m512i)__builtin_elementwise_max((__v32hi) __A, (__v32hi) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_max_epi16 (__mmask32 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, (__v32hi)_mm512_max_epi16(__A, __B), (__v32hi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_max_epi16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, (__v32hi)_mm512_max_epi16(__A, __B), (__v32hi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epu8 (__m512i __A, __m512i __B) { return (__m512i)__builtin_elementwise_max((__v64qu)__A, (__v64qu)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_max_epu8 (__mmask64 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, (__v64qi)_mm512_max_epu8(__A, __B), (__v64qi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_max_epu8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, (__v64qi)_mm512_max_epu8(__A, __B), (__v64qi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epu16 (__m512i __A, __m512i __B) { return (__m512i)__builtin_elementwise_max((__v32hu)__A, (__v32hu)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_max_epu16 (__mmask32 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, (__v32hi)_mm512_max_epu16(__A, __B), (__v32hi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_max_epu16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, (__v32hi)_mm512_max_epu16(__A, __B), (__v32hi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epi8 (__m512i __A, __m512i __B) { return (__m512i)__builtin_elementwise_min((__v64qs) __A, (__v64qs) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_min_epi8 (__mmask64 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, (__v64qi)_mm512_min_epi8(__A, __B), (__v64qi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_min_epi8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, (__v64qi)_mm512_min_epi8(__A, __B), (__v64qi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epi16 (__m512i __A, __m512i __B) { return (__m512i)__builtin_elementwise_min((__v32hi) __A, (__v32hi) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_min_epi16 (__mmask32 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, (__v32hi)_mm512_min_epi16(__A, __B), (__v32hi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_min_epi16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, (__v32hi)_mm512_min_epi16(__A, __B), (__v32hi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epu8 (__m512i __A, __m512i __B) { return (__m512i)__builtin_elementwise_min((__v64qu)__A, (__v64qu)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_min_epu8 (__mmask64 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, (__v64qi)_mm512_min_epu8(__A, __B), (__v64qi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_min_epu8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, (__v64qi)_mm512_min_epu8(__A, __B), (__v64qi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epu16 (__m512i __A, __m512i __B) { return (__m512i)__builtin_elementwise_min((__v32hu)__A, (__v32hu)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_min_epu16 (__mmask32 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, (__v32hi)_mm512_min_epu16(__A, __B), (__v32hi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_min_epu16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, (__v32hi)_mm512_min_epu16(__A, __B), (__v32hi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_shuffle_epi8(__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_pshufb512((__v64qi)__A,(__v64qi)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_shuffle_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, (__v64qi)_mm512_shuffle_epi8(__A, __B), (__v64qi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_shuffle_epi8(__mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, (__v64qi)_mm512_shuffle_epi8(__A, __B), (__v64qi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_subs_epi8 (__m512i __A, __m512i __B) { return (__m512i)__builtin_elementwise_sub_sat((__v64qs)__A, (__v64qs)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_subs_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, (__v64qi)_mm512_subs_epi8(__A, __B), (__v64qi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_subs_epi8 (__mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, (__v64qi)_mm512_subs_epi8(__A, __B), (__v64qi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_subs_epi16 (__m512i __A, __m512i __B) { return (__m512i)__builtin_elementwise_sub_sat((__v32hi)__A, (__v32hi)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_subs_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_subs_epi16(__A, __B), (__v32hi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_subs_epi16 (__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_subs_epi16(__A, __B), (__v32hi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_subs_epu8 (__m512i __A, __m512i __B) { return (__m512i)__builtin_elementwise_sub_sat((__v64qu) __A, (__v64qu) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_subs_epu8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, (__v64qi)_mm512_subs_epu8(__A, __B), (__v64qi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_subs_epu8 (__mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, (__v64qi)_mm512_subs_epu8(__A, __B), (__v64qi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_subs_epu16 (__m512i __A, __m512i __B) { return (__m512i)__builtin_elementwise_sub_sat((__v32hu) __A, (__v32hu) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_subs_epu16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_subs_epu16(__A, __B), (__v32hi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_subs_epu16 (__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_subs_epu16(__A, __B), (__v32hi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_permutex2var_epi16(__m512i __A, __m512i __I, __m512i __B) { return (__m512i)__builtin_ia32_vpermi2varhi512((__v32hi)__A, (__v32hi)__I, (__v32hi)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_permutex2var_epi16(__m512i __A, __mmask32 __U, __m512i __I, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512(__U, (__v32hi)_mm512_permutex2var_epi16(__A, __I, __B), (__v32hi)__A); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask2_permutex2var_epi16(__m512i __A, __m512i __I, __mmask32 __U, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512(__U, (__v32hi)_mm512_permutex2var_epi16(__A, __I, __B), (__v32hi)__I); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_permutex2var_epi16(__mmask32 __U, __m512i __A, __m512i __I, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512(__U, (__v32hi)_mm512_permutex2var_epi16(__A, __I, __B), (__v32hi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mulhrs_epi16(__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_pmulhrsw512((__v32hi)__A, (__v32hi)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mulhrs_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_mulhrs_epi16(__A, __B), (__v32hi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mulhrs_epi16(__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_mulhrs_epi16(__A, __B), (__v32hi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mulhi_epi16(__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_pmulhw512((__v32hi) __A, (__v32hi) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mulhi_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_mulhi_epi16(__A, __B), (__v32hi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mulhi_epi16(__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_mulhi_epi16(__A, __B), (__v32hi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mulhi_epu16(__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_pmulhuw512((__v32hi) __A, (__v32hi) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mulhi_epu16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_mulhi_epu16(__A, __B), (__v32hi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mulhi_epu16 (__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_mulhi_epu16(__A, __B), (__v32hi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maddubs_epi16(__m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_pmaddubsw512((__v64qi)__X, (__v64qi)__Y); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_maddubs_epi16(__m512i __W, __mmask32 __U, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectw_512((__mmask32) __U, (__v32hi)_mm512_maddubs_epi16(__X, __Y), (__v32hi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_maddubs_epi16(__mmask32 __U, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectw_512((__mmask32) __U, (__v32hi)_mm512_maddubs_epi16(__X, __Y), (__v32hi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_madd_epi16(__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_pmaddwd512((__v32hi)__A, (__v32hi)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_madd_epi16(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_madd_epi16(__A, __B), (__v16si)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_madd_epi16(__mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_madd_epi16(__A, __B), (__v16si)_mm512_setzero_si512()); } static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi16_epi8 (__m512i __A) { return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A, (__v32qi)_mm256_setzero_si256(), (__mmask32) -1); } static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) { return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A, (__v32qi)__O, __M); } static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi16_epi8 (__mmask32 __M, __m512i __A) { return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A, (__v32qi) _mm256_setzero_si256(), __M); } static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi16_epi8 (__m512i __A) { return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A, (__v32qi) _mm256_setzero_si256(), (__mmask32) -1); } static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) { return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A, (__v32qi) __O, __M); } static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi16_epi8 (__mmask32 __M, __m512i __A) { return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A, (__v32qi) _mm256_setzero_si256(), __M); } static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtepi16_epi8 (__m512i __A) { return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A, (__v32qi) _mm256_undefined_si256(), (__mmask32) -1); } static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) { return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A, (__v32qi) __O, __M); } static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi16_epi8 (__mmask32 __M, __m512i __A) { return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A, (__v32qi) _mm256_setzero_si256(), __M); } static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A) { __builtin_ia32_pmovwb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M); } static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A) { __builtin_ia32_pmovswb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M); } static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A) { __builtin_ia32_pmovuswb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_unpackhi_epi8(__m512i __A, __m512i __B) { return (__m512i)__builtin_shufflevector((__v64qi)__A, (__v64qi)__B, 8, 64+8, 9, 64+9, 10, 64+10, 11, 64+11, 12, 64+12, 13, 64+13, 14, 64+14, 15, 64+15, 24, 64+24, 25, 64+25, 26, 64+26, 27, 64+27, 28, 64+28, 29, 64+29, 30, 64+30, 31, 64+31, 40, 64+40, 41, 64+41, 42, 64+42, 43, 64+43, 44, 64+44, 45, 64+45, 46, 64+46, 47, 64+47, 56, 64+56, 57, 64+57, 58, 64+58, 59, 64+59, 60, 64+60, 61, 64+61, 62, 64+62, 63, 64+63); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, (__v64qi)_mm512_unpackhi_epi8(__A, __B), (__v64qi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_epi8(__mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, (__v64qi)_mm512_unpackhi_epi8(__A, __B), (__v64qi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_unpackhi_epi16(__m512i __A, __m512i __B) { return (__m512i)__builtin_shufflevector((__v32hi)__A, (__v32hi)__B, 4, 32+4, 5, 32+5, 6, 32+6, 7, 32+7, 12, 32+12, 13, 32+13, 14, 32+14, 15, 32+15, 20, 32+20, 21, 32+21, 22, 32+22, 23, 32+23, 28, 32+28, 29, 32+29, 30, 32+30, 31, 32+31); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_unpackhi_epi16(__A, __B), (__v32hi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_epi16(__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_unpackhi_epi16(__A, __B), (__v32hi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_unpacklo_epi8(__m512i __A, __m512i __B) { return (__m512i)__builtin_shufflevector((__v64qi)__A, (__v64qi)__B, 0, 64+0, 1, 64+1, 2, 64+2, 3, 64+3, 4, 64+4, 5, 64+5, 6, 64+6, 7, 64+7, 16, 64+16, 17, 64+17, 18, 64+18, 19, 64+19, 20, 64+20, 21, 64+21, 22, 64+22, 23, 64+23, 32, 64+32, 33, 64+33, 34, 64+34, 35, 64+35, 36, 64+36, 37, 64+37, 38, 64+38, 39, 64+39, 48, 64+48, 49, 64+49, 50, 64+50, 51, 64+51, 52, 64+52, 53, 64+53, 54, 64+54, 55, 64+55); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, (__v64qi)_mm512_unpacklo_epi8(__A, __B), (__v64qi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_epi8(__mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, (__v64qi)_mm512_unpacklo_epi8(__A, __B), (__v64qi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_unpacklo_epi16(__m512i __A, __m512i __B) { return (__m512i)__builtin_shufflevector((__v32hi)__A, (__v32hi)__B, 0, 32+0, 1, 32+1, 2, 32+2, 3, 32+3, 8, 32+8, 9, 32+9, 10, 32+10, 11, 32+11, 16, 32+16, 17, 32+17, 18, 32+18, 19, 32+19, 24, 32+24, 25, 32+25, 26, 32+26, 27, 32+27); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_unpacklo_epi16(__A, __B), (__v32hi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_epi16(__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_unpacklo_epi16(__A, __B), (__v32hi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepi8_epi16(__m256i __A) { /* This function always performs a signed extension, but __v32qi is a char which may be signed or unsigned, so use __v32qs. */ return (__m512i)__builtin_convertvector((__v32qs)__A, __v32hi); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi8_epi16(__m512i __W, __mmask32 __U, __m256i __A) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_cvtepi8_epi16(__A), (__v32hi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi8_epi16(__mmask32 __U, __m256i __A) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_cvtepi8_epi16(__A), (__v32hi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepu8_epi16(__m256i __A) { return (__m512i)__builtin_convertvector((__v32qu)__A, __v32hi); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu8_epi16(__m512i __W, __mmask32 __U, __m256i __A) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_cvtepu8_epi16(__A), (__v32hi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu8_epi16(__mmask32 __U, __m256i __A) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_cvtepu8_epi16(__A), (__v32hi)_mm512_setzero_si512()); } #define _mm512_shufflehi_epi16(A, imm) \ ((__m512i)__builtin_ia32_pshufhw512((__v32hi)(__m512i)(A), (int)(imm))) #define _mm512_mask_shufflehi_epi16(W, U, A, imm) \ ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ (__v32hi)_mm512_shufflehi_epi16((A), \ (imm)), \ (__v32hi)(__m512i)(W))) #define _mm512_maskz_shufflehi_epi16(U, A, imm) \ ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ (__v32hi)_mm512_shufflehi_epi16((A), \ (imm)), \ (__v32hi)_mm512_setzero_si512())) #define _mm512_shufflelo_epi16(A, imm) \ ((__m512i)__builtin_ia32_pshuflw512((__v32hi)(__m512i)(A), (int)(imm))) #define _mm512_mask_shufflelo_epi16(W, U, A, imm) \ ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ (__v32hi)_mm512_shufflelo_epi16((A), \ (imm)), \ (__v32hi)(__m512i)(W))) #define _mm512_maskz_shufflelo_epi16(U, A, imm) \ ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ (__v32hi)_mm512_shufflelo_epi16((A), \ (imm)), \ (__v32hi)_mm512_setzero_si512())) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sllv_epi16(__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_psllv32hi((__v32hi) __A, (__v32hi) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sllv_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_sllv_epi16(__A, __B), (__v32hi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sllv_epi16(__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_sllv_epi16(__A, __B), (__v32hi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sll_epi16(__m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_psllw512((__v32hi) __A, (__v8hi) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sll_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_sll_epi16(__A, __B), (__v32hi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sll_epi16(__mmask32 __U, __m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_sll_epi16(__A, __B), (__v32hi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_slli_epi16(__m512i __A, unsigned int __B) { return (__m512i)__builtin_ia32_psllwi512((__v32hi)__A, (int)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_slli_epi16(__m512i __W, __mmask32 __U, __m512i __A, unsigned int __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_slli_epi16(__A, __B), (__v32hi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_slli_epi16(__mmask32 __U, __m512i __A, unsigned int __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_slli_epi16(__A, __B), (__v32hi)_mm512_setzero_si512()); } #define _mm512_bslli_epi128(a, imm) \ ((__m512i)__builtin_ia32_pslldqi512_byteshift((__v8di)(__m512i)(a), (int)(imm))) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srlv_epi16(__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_psrlv32hi((__v32hi)__A, (__v32hi)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srlv_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_srlv_epi16(__A, __B), (__v32hi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srlv_epi16(__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_srlv_epi16(__A, __B), (__v32hi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srav_epi16(__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_psrav32hi((__v32hi)__A, (__v32hi)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srav_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_srav_epi16(__A, __B), (__v32hi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srav_epi16(__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_srav_epi16(__A, __B), (__v32hi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sra_epi16(__m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_psraw512((__v32hi) __A, (__v8hi) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sra_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_sra_epi16(__A, __B), (__v32hi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sra_epi16(__mmask32 __U, __m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_sra_epi16(__A, __B), (__v32hi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srai_epi16(__m512i __A, unsigned int __B) { return (__m512i)__builtin_ia32_psrawi512((__v32hi)__A, (int)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srai_epi16(__m512i __W, __mmask32 __U, __m512i __A, unsigned int __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_srai_epi16(__A, __B), (__v32hi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srai_epi16(__mmask32 __U, __m512i __A, unsigned int __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_srai_epi16(__A, __B), (__v32hi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srl_epi16(__m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_psrlw512((__v32hi) __A, (__v8hi) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srl_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_srl_epi16(__A, __B), (__v32hi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srl_epi16(__mmask32 __U, __m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_srl_epi16(__A, __B), (__v32hi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srli_epi16(__m512i __A, unsigned int __B) { return (__m512i)__builtin_ia32_psrlwi512((__v32hi)__A, (int)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srli_epi16(__m512i __W, __mmask32 __U, __m512i __A, unsigned int __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_srli_epi16(__A, __B), (__v32hi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srli_epi16(__mmask32 __U, __m512i __A, int __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_srli_epi16(__A, (unsigned int)__B), (__v32hi)_mm512_setzero_si512()); } #define _mm512_bsrli_epi128(a, imm) \ ((__m512i)__builtin_ia32_psrldqi512_byteshift((__v8di)(__m512i)(a), (int)(imm))) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mov_epi16 (__m512i __W, __mmask32 __U, __m512i __A) { return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U, (__v32hi) __A, (__v32hi) __W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mov_epi16 (__mmask32 __U, __m512i __A) { return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U, (__v32hi) __A, (__v32hi) _mm512_setzero_si512 ()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mov_epi8 (__m512i __W, __mmask64 __U, __m512i __A) { return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U, (__v64qi) __A, (__v64qi) __W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mov_epi8 (__mmask64 __U, __m512i __A) { return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U, (__v64qi) __A, (__v64qi) _mm512_setzero_si512 ()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_set1_epi8 (__m512i __O, __mmask64 __M, char __A) { return (__m512i) __builtin_ia32_selectb_512(__M, (__v64qi)_mm512_set1_epi8(__A), (__v64qi) __O); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_set1_epi8 (__mmask64 __M, char __A) { return (__m512i) __builtin_ia32_selectb_512(__M, (__v64qi) _mm512_set1_epi8(__A), (__v64qi) _mm512_setzero_si512()); } static __inline__ __mmask64 __DEFAULT_FN_ATTRS _mm512_kunpackd(__mmask64 __A, __mmask64 __B) { return (__mmask64) __builtin_ia32_kunpckdi ((__mmask64) __A, (__mmask64) __B); } static __inline__ __mmask32 __DEFAULT_FN_ATTRS _mm512_kunpackw (__mmask32 __A, __mmask32 __B) { return (__mmask32) __builtin_ia32_kunpcksi ((__mmask32) __A, (__mmask32) __B); } static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_loadu_epi16 (void const *__P) { struct __loadu_epi16 { __m512i_u __v; } __attribute__((__packed__, __may_alias__)); return ((const struct __loadu_epi16*)__P)->__v; } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_epi16 (__m512i __W, __mmask32 __U, void const *__P) { return (__m512i) __builtin_ia32_loaddquhi512_mask ((const __v32hi *) __P, (__v32hi) __W, (__mmask32) __U); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_epi16 (__mmask32 __U, void const *__P) { return (__m512i) __builtin_ia32_loaddquhi512_mask ((const __v32hi *) __P, (__v32hi) _mm512_setzero_si512 (), (__mmask32) __U); } static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_loadu_epi8 (void const *__P) { struct __loadu_epi8 { __m512i_u __v; } __attribute__((__packed__, __may_alias__)); return ((const struct __loadu_epi8*)__P)->__v; } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_epi8 (__m512i __W, __mmask64 __U, void const *__P) { return (__m512i) __builtin_ia32_loaddquqi512_mask ((const __v64qi *) __P, (__v64qi) __W, (__mmask64) __U); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_epi8 (__mmask64 __U, void const *__P) { return (__m512i) __builtin_ia32_loaddquqi512_mask ((const __v64qi *) __P, (__v64qi) _mm512_setzero_si512 (), (__mmask64) __U); } static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_epi16 (void *__P, __m512i __A) { struct __storeu_epi16 { __m512i_u __v; } __attribute__((__packed__, __may_alias__)); ((struct __storeu_epi16*)__P)->__v = __A; } static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_epi16 (void *__P, __mmask32 __U, __m512i __A) { __builtin_ia32_storedquhi512_mask ((__v32hi *) __P, (__v32hi) __A, (__mmask32) __U); } static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_epi8 (void *__P, __m512i __A) { struct __storeu_epi8 { __m512i_u __v; } __attribute__((__packed__, __may_alias__)); ((struct __storeu_epi8*)__P)->__v = __A; } static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_epi8 (void *__P, __mmask64 __U, __m512i __A) { __builtin_ia32_storedquqi512_mask ((__v64qi *) __P, (__v64qi) __A, (__mmask64) __U); } static __inline__ __mmask64 __DEFAULT_FN_ATTRS512 _mm512_test_epi8_mask (__m512i __A, __m512i __B) { return _mm512_cmpneq_epi8_mask (_mm512_and_epi32 (__A, __B), _mm512_setzero_si512()); } static __inline__ __mmask64 __DEFAULT_FN_ATTRS512 _mm512_mask_test_epi8_mask (__mmask64 __U, __m512i __A, __m512i __B) { return _mm512_mask_cmpneq_epi8_mask (__U, _mm512_and_epi32 (__A, __B), _mm512_setzero_si512()); } static __inline__ __mmask32 __DEFAULT_FN_ATTRS512 _mm512_test_epi16_mask (__m512i __A, __m512i __B) { return _mm512_cmpneq_epi16_mask (_mm512_and_epi32 (__A, __B), _mm512_setzero_si512()); } static __inline__ __mmask32 __DEFAULT_FN_ATTRS512 _mm512_mask_test_epi16_mask (__mmask32 __U, __m512i __A, __m512i __B) { return _mm512_mask_cmpneq_epi16_mask (__U, _mm512_and_epi32 (__A, __B), _mm512_setzero_si512()); } static __inline__ __mmask64 __DEFAULT_FN_ATTRS512 _mm512_testn_epi8_mask (__m512i __A, __m512i __B) { return _mm512_cmpeq_epi8_mask (_mm512_and_epi32 (__A, __B), _mm512_setzero_si512()); } static __inline__ __mmask64 __DEFAULT_FN_ATTRS512 _mm512_mask_testn_epi8_mask (__mmask64 __U, __m512i __A, __m512i __B) { return _mm512_mask_cmpeq_epi8_mask (__U, _mm512_and_epi32 (__A, __B), _mm512_setzero_si512()); } static __inline__ __mmask32 __DEFAULT_FN_ATTRS512 _mm512_testn_epi16_mask (__m512i __A, __m512i __B) { return _mm512_cmpeq_epi16_mask (_mm512_and_epi32 (__A, __B), _mm512_setzero_si512()); } static __inline__ __mmask32 __DEFAULT_FN_ATTRS512 _mm512_mask_testn_epi16_mask (__mmask32 __U, __m512i __A, __m512i __B) { return _mm512_mask_cmpeq_epi16_mask (__U, _mm512_and_epi32 (__A, __B), _mm512_setzero_si512()); } static __inline__ __mmask64 __DEFAULT_FN_ATTRS512 _mm512_movepi8_mask (__m512i __A) { return (__mmask64) __builtin_ia32_cvtb2mask512 ((__v64qi) __A); } static __inline__ __mmask32 __DEFAULT_FN_ATTRS512 _mm512_movepi16_mask (__m512i __A) { return (__mmask32) __builtin_ia32_cvtw2mask512 ((__v32hi) __A); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_movm_epi8 (__mmask64 __A) { return (__m512i) __builtin_ia32_cvtmask2b512 (__A); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_movm_epi16 (__mmask32 __A) { return (__m512i) __builtin_ia32_cvtmask2w512 (__A); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_broadcastb_epi8 (__m128i __A) { return (__m512i)__builtin_shufflevector((__v16qi) __A, (__v16qi) __A, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_broadcastb_epi8 (__m512i __O, __mmask64 __M, __m128i __A) { return (__m512i)__builtin_ia32_selectb_512(__M, (__v64qi) _mm512_broadcastb_epi8(__A), (__v64qi) __O); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcastb_epi8 (__mmask64 __M, __m128i __A) { return (__m512i)__builtin_ia32_selectb_512(__M, (__v64qi) _mm512_broadcastb_epi8(__A), (__v64qi) _mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_set1_epi16 (__m512i __O, __mmask32 __M, short __A) { return (__m512i) __builtin_ia32_selectw_512(__M, (__v32hi) _mm512_set1_epi16(__A), (__v32hi) __O); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_set1_epi16 (__mmask32 __M, short __A) { return (__m512i) __builtin_ia32_selectw_512(__M, (__v32hi) _mm512_set1_epi16(__A), (__v32hi) _mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_broadcastw_epi16 (__m128i __A) { return (__m512i)__builtin_shufflevector((__v8hi) __A, (__v8hi) __A, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_broadcastw_epi16 (__m512i __O, __mmask32 __M, __m128i __A) { return (__m512i)__builtin_ia32_selectw_512(__M, (__v32hi) _mm512_broadcastw_epi16(__A), (__v32hi) __O); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcastw_epi16 (__mmask32 __M, __m128i __A) { return (__m512i)__builtin_ia32_selectw_512(__M, (__v32hi) _mm512_broadcastw_epi16(__A), (__v32hi) _mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_permutexvar_epi16 (__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_permvarhi512((__v32hi)__B, (__v32hi)__A); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_permutexvar_epi16 (__mmask32 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, (__v32hi)_mm512_permutexvar_epi16(__A, __B), (__v32hi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_permutexvar_epi16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, (__v32hi)_mm512_permutexvar_epi16(__A, __B), (__v32hi)__W); } #define _mm512_alignr_epi8(A, B, N) \ ((__m512i)__builtin_ia32_palignr512((__v64qi)(__m512i)(A), \ (__v64qi)(__m512i)(B), (int)(N))) #define _mm512_mask_alignr_epi8(W, U, A, B, N) \ ((__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \ (__v64qi)_mm512_alignr_epi8((A), (B), (int)(N)), \ (__v64qi)(__m512i)(W))) #define _mm512_maskz_alignr_epi8(U, A, B, N) \ ((__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \ (__v64qi)_mm512_alignr_epi8((A), (B), (int)(N)), \ (__v64qi)(__m512i)_mm512_setzero_si512())) #define _mm512_dbsad_epu8(A, B, imm) \ ((__m512i)__builtin_ia32_dbpsadbw512((__v64qi)(__m512i)(A), \ (__v64qi)(__m512i)(B), (int)(imm))) #define _mm512_mask_dbsad_epu8(W, U, A, B, imm) \ ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ (__v32hi)_mm512_dbsad_epu8((A), (B), (imm)), \ (__v32hi)(__m512i)(W))) #define _mm512_maskz_dbsad_epu8(U, A, B, imm) \ ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ (__v32hi)_mm512_dbsad_epu8((A), (B), (imm)), \ (__v32hi)_mm512_setzero_si512())) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sad_epu8 (__m512i __A, __m512i __B) { return (__m512i) __builtin_ia32_psadbw512 ((__v64qi) __A, (__v64qi) __B); } #undef __DEFAULT_FN_ATTRS512 #undef __DEFAULT_FN_ATTRS #endif avx512ifmaintrin.h/*===------------- avx512vbmiintrin.h - VBMI intrinsics ------------------=== * * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __VBMIINTRIN_H #define __VBMIINTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512vbmi,evex512"), __min_vector_width__(512))) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_permutex2var_epi8(__m512i __A, __m512i __I, __m512i __B) { return (__m512i)__builtin_ia32_vpermi2varqi512((__v64qi)__A, (__v64qi)__I, (__v64qi) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_permutex2var_epi8(__m512i __A, __mmask64 __U, __m512i __I, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512(__U, (__v64qi)_mm512_permutex2var_epi8(__A, __I, __B), (__v64qi)__A); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask2_permutex2var_epi8(__m512i __A, __m512i __I, __mmask64 __U, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512(__U, (__v64qi)_mm512_permutex2var_epi8(__A, __I, __B), (__v64qi)__I); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_permutex2var_epi8(__mmask64 __U, __m512i __A, __m512i __I, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512(__U, (__v64qi)_mm512_permutex2var_epi8(__A, __I, __B), (__v64qi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_permutexvar_epi8 (__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_permvarqi512((__v64qi) __B, (__v64qi) __A); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_permutexvar_epi8 (__mmask64 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, (__v64qi)_mm512_permutexvar_epi8(__A, __B), (__v64qi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_permutexvar_epi8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, (__v64qi)_mm512_permutexvar_epi8(__A, __B), (__v64qi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_multishift_epi64_epi8(__m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_vpmultishiftqb512((__v64qi)__X, (__v64qi) __Y); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_multishift_epi64_epi8(__m512i __W, __mmask64 __M, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, (__v64qi)_mm512_multishift_epi64_epi8(__X, __Y), (__v64qi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_multishift_epi64_epi8(__mmask64 __M, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, (__v64qi)_mm512_multishift_epi64_epi8(__X, __Y), (__v64qi)_mm512_setzero_si512()); } #undef __DEFAULT_FN_ATTRS #endif /*===---- avx512vlcdintrin.h - AVX512VL and AVX512CD intrinsics ------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __AVX512VLCDINTRIN_H #define __AVX512VLCDINTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS128 \ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512vl,avx512cd,no-evex512"), \ __min_vector_width__(128))) #define __DEFAULT_FN_ATTRS256 \ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512vl,avx512cd,no-evex512"), \ __min_vector_width__(256))) static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_broadcastmb_epi64 (__mmask8 __A) { return (__m128i) _mm_set1_epi64x((long long) __A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_broadcastmb_epi64 (__mmask8 __A) { return (__m256i) _mm256_set1_epi64x((long long)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_broadcastmw_epi32 (__mmask16 __A) { return (__m128i) _mm_set1_epi32((int)__A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_broadcastmw_epi32 (__mmask16 __A) { return (__m256i) _mm256_set1_epi32((int)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_conflict_epi64 (__m128i __A) { return (__m128i) __builtin_ia32_vpconflictdi_128 ((__v2di) __A); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_conflict_epi64 (__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_conflict_epi64(__A), (__v2di)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_conflict_epi64 (__mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_conflict_epi64(__A), (__v2di)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_conflict_epi64 (__m256i __A) { return (__m256i) __builtin_ia32_vpconflictdi_256 ((__v4di) __A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_conflict_epi64 (__m256i __W, __mmask8 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_conflict_epi64(__A), (__v4di)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_conflict_epi64 (__mmask8 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_conflict_epi64(__A), (__v4di)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_conflict_epi32 (__m128i __A) { return (__m128i) __builtin_ia32_vpconflictsi_128 ((__v4si) __A); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_conflict_epi32 (__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_conflict_epi32(__A), (__v4si)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_conflict_epi32 (__mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_conflict_epi32(__A), (__v4si)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_conflict_epi32 (__m256i __A) { return (__m256i) __builtin_ia32_vpconflictsi_256 ((__v8si) __A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_conflict_epi32 (__m256i __W, __mmask8 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_conflict_epi32(__A), (__v8si)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_conflict_epi32 (__mmask8 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_conflict_epi32(__A), (__v8si)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_lzcnt_epi32 (__m128i __A) { return (__m128i) __builtin_ia32_vplzcntd_128 ((__v4si) __A); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_lzcnt_epi32 (__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_lzcnt_epi32(__A), (__v4si)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_lzcnt_epi32 (__mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_lzcnt_epi32(__A), (__v4si)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_lzcnt_epi32 (__m256i __A) { return (__m256i) __builtin_ia32_vplzcntd_256 ((__v8si) __A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_lzcnt_epi32 (__m256i __W, __mmask8 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_lzcnt_epi32(__A), (__v8si)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_lzcnt_epi32 (__mmask8 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_lzcnt_epi32(__A), (__v8si)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_lzcnt_epi64 (__m128i __A) { return (__m128i) __builtin_ia32_vplzcntq_128 ((__v2di) __A); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_lzcnt_epi64 (__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_lzcnt_epi64(__A), (__v2di)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_lzcnt_epi64 (__mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_lzcnt_epi64(__A), (__v2di)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_lzcnt_epi64 (__m256i __A) { return (__m256i) __builtin_ia32_vplzcntq_256 ((__v4di) __A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_lzcnt_epi64 (__m256i __W, __mmask8 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_lzcnt_epi64(__A), (__v4di)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_lzcnt_epi64 (__mmask8 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_lzcnt_epi64(__A), (__v4di)_mm256_setzero_si256()); } #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 #endif /* __AVX512VLCDINTRIN_H */ /*===---- avx512vldqintrin.h - AVX512VL and AVX512DQ intrinsics ------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __AVX512VLDQINTRIN_H #define __AVX512VLDQINTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS128 \ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512vl,avx512dq,no-evex512"), \ __min_vector_width__(128))) #define __DEFAULT_FN_ATTRS256 \ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512vl,avx512dq,no-evex512"), \ __min_vector_width__(256))) static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mullo_epi64 (__m256i __A, __m256i __B) { return (__m256i) ((__v4du) __A * (__v4du) __B); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mullo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_mullo_epi64(__A, __B), (__v4di)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mullo_epi64(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_mullo_epi64(__A, __B), (__v4di)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mullo_epi64 (__m128i __A, __m128i __B) { return (__m128i) ((__v2du) __A * (__v2du) __B); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mullo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_mullo_epi64(__A, __B), (__v2di)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mullo_epi64(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_mullo_epi64(__A, __B), (__v2di)_mm_setzero_si128()); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_andnot_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_andnot_pd(__A, __B), (__v4df)__W); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_andnot_pd(__mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_andnot_pd(__A, __B), (__v4df)_mm256_setzero_pd()); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_andnot_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_andnot_pd(__A, __B), (__v2df)__W); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_andnot_pd(__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_andnot_pd(__A, __B), (__v2df)_mm_setzero_pd()); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_andnot_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_andnot_ps(__A, __B), (__v8sf)__W); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_andnot_ps(__mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_andnot_ps(__A, __B), (__v8sf)_mm256_setzero_ps()); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_andnot_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_andnot_ps(__A, __B), (__v4sf)__W); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_andnot_ps(__mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_andnot_ps(__A, __B), (__v4sf)_mm_setzero_ps()); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_and_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_and_pd(__A, __B), (__v4df)__W); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_and_pd(__mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_and_pd(__A, __B), (__v4df)_mm256_setzero_pd()); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_and_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_and_pd(__A, __B), (__v2df)__W); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_and_pd(__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_and_pd(__A, __B), (__v2df)_mm_setzero_pd()); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_and_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_and_ps(__A, __B), (__v8sf)__W); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_and_ps(__mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_and_ps(__A, __B), (__v8sf)_mm256_setzero_ps()); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_and_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_and_ps(__A, __B), (__v4sf)__W); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_and_ps(__mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_and_ps(__A, __B), (__v4sf)_mm_setzero_ps()); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_xor_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_xor_pd(__A, __B), (__v4df)__W); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_xor_pd(__mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_xor_pd(__A, __B), (__v4df)_mm256_setzero_pd()); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_xor_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_xor_pd(__A, __B), (__v2df)__W); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_xor_pd (__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_xor_pd(__A, __B), (__v2df)_mm_setzero_pd()); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_xor_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_xor_ps(__A, __B), (__v8sf)__W); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_xor_ps(__mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_xor_ps(__A, __B), (__v8sf)_mm256_setzero_ps()); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_xor_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_xor_ps(__A, __B), (__v4sf)__W); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_xor_ps(__mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_xor_ps(__A, __B), (__v4sf)_mm_setzero_ps()); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_or_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_or_pd(__A, __B), (__v4df)__W); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_or_pd(__mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_or_pd(__A, __B), (__v4df)_mm256_setzero_pd()); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_or_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_or_pd(__A, __B), (__v2df)__W); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_or_pd(__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_or_pd(__A, __B), (__v2df)_mm_setzero_pd()); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_or_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_or_ps(__A, __B), (__v8sf)__W); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_or_ps(__mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_or_ps(__A, __B), (__v8sf)_mm256_setzero_ps()); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_or_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_or_ps(__A, __B), (__v4sf)__W); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_or_ps(__mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_or_ps(__A, __B), (__v4sf)_mm_setzero_ps()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtpd_epi64 (__m128d __A) { return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A, (__v2di) _mm_setzero_si128(), (__mmask8) -1); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A) { return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A, (__v2di) __W, (__mmask8) __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtpd_epi64 (__mmask8 __U, __m128d __A) { return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A, (__v2di) _mm_setzero_si128(), (__mmask8) __U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtpd_epi64 (__m256d __A) { return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) -1); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A) { return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A, (__v4di) __W, (__mmask8) __U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtpd_epi64 (__mmask8 __U, __m256d __A) { return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtpd_epu64 (__m128d __A) { return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A, (__v2di) _mm_setzero_si128(), (__mmask8) -1); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A) { return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A, (__v2di) __W, (__mmask8) __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtpd_epu64 (__mmask8 __U, __m128d __A) { return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A, (__v2di) _mm_setzero_si128(), (__mmask8) __U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtpd_epu64 (__m256d __A) { return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) -1); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A) { return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A, (__v4di) __W, (__mmask8) __U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtpd_epu64 (__mmask8 __U, __m256d __A) { return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtps_epi64 (__m128 __A) { return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A, (__v2di) _mm_setzero_si128(), (__mmask8) -1); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtps_epi64 (__m128i __W, __mmask8 __U, __m128 __A) { return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A, (__v2di) __W, (__mmask8) __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A) { return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A, (__v2di) _mm_setzero_si128(), (__mmask8) __U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtps_epi64 (__m128 __A) { return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) -1); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtps_epi64 (__m256i __W, __mmask8 __U, __m128 __A) { return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A, (__v4di) __W, (__mmask8) __U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A) { return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtps_epu64 (__m128 __A) { return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A, (__v2di) _mm_setzero_si128(), (__mmask8) -1); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtps_epu64 (__m128i __W, __mmask8 __U, __m128 __A) { return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A, (__v2di) __W, (__mmask8) __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A) { return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A, (__v2di) _mm_setzero_si128(), (__mmask8) __U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtps_epu64 (__m128 __A) { return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) -1); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtps_epu64 (__m256i __W, __mmask8 __U, __m128 __A) { return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A, (__v4di) __W, (__mmask8) __U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A) { return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) __U); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtepi64_pd (__m128i __A) { return (__m128d)__builtin_convertvector((__v2di)__A, __v2df); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_pd (__m128d __W, __mmask8 __U, __m128i __A) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_cvtepi64_pd(__A), (__v2df)__W); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi64_pd (__mmask8 __U, __m128i __A) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_cvtepi64_pd(__A), (__v2df)_mm_setzero_pd()); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_cvtepi64_pd (__m256i __A) { return (__m256d)__builtin_convertvector((__v4di)__A, __v4df); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_pd (__m256d __W, __mmask8 __U, __m256i __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_cvtepi64_pd(__A), (__v4df)__W); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi64_pd (__mmask8 __U, __m256i __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_cvtepi64_pd(__A), (__v4df)_mm256_setzero_pd()); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtepi64_ps (__m128i __A) { return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A, (__v4sf) _mm_setzero_ps(), (__mmask8) -1); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m128i __A) { return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A, (__v4sf) __W, (__mmask8) __U); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi64_ps (__mmask8 __U, __m128i __A) { return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A, (__v4sf) _mm_setzero_ps(), (__mmask8) __U); } static __inline__ __m128 __DEFAULT_FN_ATTRS256 _mm256_cvtepi64_ps (__m256i __A) { return (__m128)__builtin_convertvector((__v4di)__A, __v4sf); } static __inline__ __m128 __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m256i __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm256_cvtepi64_ps(__A), (__v4sf)__W); } static __inline__ __m128 __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi64_ps (__mmask8 __U, __m256i __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm256_cvtepi64_ps(__A), (__v4sf)_mm_setzero_ps()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttpd_epi64 (__m128d __A) { return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A, (__v2di) _mm_setzero_si128(), (__mmask8) -1); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A) { return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A, (__v2di) __W, (__mmask8) __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttpd_epi64 (__mmask8 __U, __m128d __A) { return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A, (__v2di) _mm_setzero_si128(), (__mmask8) __U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvttpd_epi64 (__m256d __A) { return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) -1); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A) { return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A, (__v4di) __W, (__mmask8) __U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttpd_epi64 (__mmask8 __U, __m256d __A) { return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttpd_epu64 (__m128d __A) { return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A, (__v2di) _mm_setzero_si128(), (__mmask8) -1); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A) { return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A, (__v2di) __W, (__mmask8) __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttpd_epu64 (__mmask8 __U, __m128d __A) { return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A, (__v2di) _mm_setzero_si128(), (__mmask8) __U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvttpd_epu64 (__m256d __A) { return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) -1); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A) { return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A, (__v4di) __W, (__mmask8) __U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttpd_epu64 (__mmask8 __U, __m256d __A) { return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttps_epi64 (__m128 __A) { return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A, (__v2di) _mm_setzero_si128(), (__mmask8) -1); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttps_epi64 (__m128i __W, __mmask8 __U, __m128 __A) { return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A, (__v2di) __W, (__mmask8) __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A) { return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A, (__v2di) _mm_setzero_si128(), (__mmask8) __U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvttps_epi64 (__m128 __A) { return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) -1); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttps_epi64 (__m256i __W, __mmask8 __U, __m128 __A) { return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A, (__v4di) __W, (__mmask8) __U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A) { return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttps_epu64 (__m128 __A) { return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A, (__v2di) _mm_setzero_si128(), (__mmask8) -1); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttps_epu64 (__m128i __W, __mmask8 __U, __m128 __A) { return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A, (__v2di) __W, (__mmask8) __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A) { return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A, (__v2di) _mm_setzero_si128(), (__mmask8) __U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvttps_epu64 (__m128 __A) { return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) -1); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttps_epu64 (__m256i __W, __mmask8 __U, __m128 __A) { return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A, (__v4di) __W, (__mmask8) __U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A) { return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) __U); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtepu64_pd (__m128i __A) { return (__m128d)__builtin_convertvector((__v2du)__A, __v2df); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu64_pd (__m128d __W, __mmask8 __U, __m128i __A) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_cvtepu64_pd(__A), (__v2df)__W); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu64_pd (__mmask8 __U, __m128i __A) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_cvtepu64_pd(__A), (__v2df)_mm_setzero_pd()); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_cvtepu64_pd (__m256i __A) { return (__m256d)__builtin_convertvector((__v4du)__A, __v4df); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu64_pd (__m256d __W, __mmask8 __U, __m256i __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_cvtepu64_pd(__A), (__v4df)__W); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu64_pd (__mmask8 __U, __m256i __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_cvtepu64_pd(__A), (__v4df)_mm256_setzero_pd()); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtepu64_ps (__m128i __A) { return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A, (__v4sf) _mm_setzero_ps(), (__mmask8) -1); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m128i __A) { return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A, (__v4sf) __W, (__mmask8) __U); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu64_ps (__mmask8 __U, __m128i __A) { return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A, (__v4sf) _mm_setzero_ps(), (__mmask8) __U); } static __inline__ __m128 __DEFAULT_FN_ATTRS256 _mm256_cvtepu64_ps (__m256i __A) { return (__m128)__builtin_convertvector((__v4du)__A, __v4sf); } static __inline__ __m128 __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m256i __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm256_cvtepu64_ps(__A), (__v4sf)__W); } static __inline__ __m128 __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu64_ps (__mmask8 __U, __m256i __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm256_cvtepu64_ps(__A), (__v4sf)_mm_setzero_ps()); } #define _mm_range_pd(A, B, C) \ ((__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), (int)(C), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)-1)) #define _mm_mask_range_pd(W, U, A, B, C) \ ((__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), (int)(C), \ (__v2df)(__m128d)(W), \ (__mmask8)(U))) #define _mm_maskz_range_pd(U, A, B, C) \ ((__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), (int)(C), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)(U))) #define _mm256_range_pd(A, B, C) \ ((__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \ (__v4df)(__m256d)(B), (int)(C), \ (__v4df)_mm256_setzero_pd(), \ (__mmask8)-1)) #define _mm256_mask_range_pd(W, U, A, B, C) \ ((__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \ (__v4df)(__m256d)(B), (int)(C), \ (__v4df)(__m256d)(W), \ (__mmask8)(U))) #define _mm256_maskz_range_pd(U, A, B, C) \ ((__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \ (__v4df)(__m256d)(B), (int)(C), \ (__v4df)_mm256_setzero_pd(), \ (__mmask8)(U))) #define _mm_range_ps(A, B, C) \ ((__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), (int)(C), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)-1)) #define _mm_mask_range_ps(W, U, A, B, C) \ ((__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), (int)(C), \ (__v4sf)(__m128)(W), (__mmask8)(U))) #define _mm_maskz_range_ps(U, A, B, C) \ ((__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), (int)(C), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U))) #define _mm256_range_ps(A, B, C) \ ((__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \ (__v8sf)(__m256)(B), (int)(C), \ (__v8sf)_mm256_setzero_ps(), \ (__mmask8)-1)) #define _mm256_mask_range_ps(W, U, A, B, C) \ ((__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \ (__v8sf)(__m256)(B), (int)(C), \ (__v8sf)(__m256)(W), (__mmask8)(U))) #define _mm256_maskz_range_ps(U, A, B, C) \ ((__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \ (__v8sf)(__m256)(B), (int)(C), \ (__v8sf)_mm256_setzero_ps(), \ (__mmask8)(U))) #define _mm_reduce_pd(A, B) \ ((__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)-1)) #define _mm_mask_reduce_pd(W, U, A, B) \ ((__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \ (__v2df)(__m128d)(W), \ (__mmask8)(U))) #define _mm_maskz_reduce_pd(U, A, B) \ ((__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)(U))) #define _mm256_reduce_pd(A, B) \ ((__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \ (__v4df)_mm256_setzero_pd(), \ (__mmask8)-1)) #define _mm256_mask_reduce_pd(W, U, A, B) \ ((__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \ (__v4df)(__m256d)(W), \ (__mmask8)(U))) #define _mm256_maskz_reduce_pd(U, A, B) \ ((__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \ (__v4df)_mm256_setzero_pd(), \ (__mmask8)(U))) #define _mm_reduce_ps(A, B) \ ((__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)-1)) #define _mm_mask_reduce_ps(W, U, A, B) \ ((__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \ (__v4sf)(__m128)(W), \ (__mmask8)(U))) #define _mm_maskz_reduce_ps(U, A, B) \ ((__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U))) #define _mm256_reduce_ps(A, B) \ ((__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \ (__v8sf)_mm256_setzero_ps(), \ (__mmask8)-1)) #define _mm256_mask_reduce_ps(W, U, A, B) \ ((__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \ (__v8sf)(__m256)(W), \ (__mmask8)(U))) #define _mm256_maskz_reduce_ps(U, A, B) \ ((__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \ (__v8sf)_mm256_setzero_ps(), \ (__mmask8)(U))) static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_movepi32_mask (__m128i __A) { return (__mmask8) __builtin_ia32_cvtd2mask128 ((__v4si) __A); } static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_movepi32_mask (__m256i __A) { return (__mmask8) __builtin_ia32_cvtd2mask256 ((__v8si) __A); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_movm_epi32 (__mmask8 __A) { return (__m128i) __builtin_ia32_cvtmask2d128 (__A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_movm_epi32 (__mmask8 __A) { return (__m256i) __builtin_ia32_cvtmask2d256 (__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_movm_epi64 (__mmask8 __A) { return (__m128i) __builtin_ia32_cvtmask2q128 (__A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_movm_epi64 (__mmask8 __A) { return (__m256i) __builtin_ia32_cvtmask2q256 (__A); } static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_movepi64_mask (__m128i __A) { return (__mmask8) __builtin_ia32_cvtq2mask128 ((__v2di) __A); } static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_movepi64_mask (__m256i __A) { return (__mmask8) __builtin_ia32_cvtq2mask256 ((__v4di) __A); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_broadcast_f32x2 (__m128 __A) { return (__m256)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A, 0, 1, 0, 1, 0, 1, 0, 1); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_broadcast_f32x2 (__m256 __O, __mmask8 __M, __m128 __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__M, (__v8sf)_mm256_broadcast_f32x2(__A), (__v8sf)__O); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcast_f32x2 (__mmask8 __M, __m128 __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__M, (__v8sf)_mm256_broadcast_f32x2(__A), (__v8sf)_mm256_setzero_ps()); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_broadcast_f64x2(__m128d __A) { return (__m256d)__builtin_shufflevector((__v2df)__A, (__v2df)__A, 0, 1, 0, 1); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_broadcast_f64x2(__m256d __O, __mmask8 __M, __m128d __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__M, (__v4df)_mm256_broadcast_f64x2(__A), (__v4df)__O); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__M, (__v4df)_mm256_broadcast_f64x2(__A), (__v4df)_mm256_setzero_pd()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_broadcast_i32x2 (__m128i __A) { return (__m128i)__builtin_shufflevector((__v4si)__A, (__v4si)__A, 0, 1, 0, 1); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_broadcast_i32x2 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, (__v4si)_mm_broadcast_i32x2(__A), (__v4si)__O); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, (__v4si)_mm_broadcast_i32x2(__A), (__v4si)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_broadcast_i32x2 (__m128i __A) { return (__m256i)__builtin_shufflevector((__v4si)__A, (__v4si)__A, 0, 1, 0, 1, 0, 1, 0, 1); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_broadcast_i32x2 (__m256i __O, __mmask8 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, (__v8si)_mm256_broadcast_i32x2(__A), (__v8si)__O); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, (__v8si)_mm256_broadcast_i32x2(__A), (__v8si)_mm256_setzero_si256()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_broadcast_i64x2(__m128i __A) { return (__m256i)__builtin_shufflevector((__v2di)__A, (__v2di)__A, 0, 1, 0, 1); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_broadcast_i64x2(__m256i __O, __mmask8 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, (__v4di)_mm256_broadcast_i64x2(__A), (__v4di)__O); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, (__v4di)_mm256_broadcast_i64x2(__A), (__v4di)_mm256_setzero_si256()); } #define _mm256_extractf64x2_pd(A, imm) \ ((__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \ (int)(imm), \ (__v2df)_mm_undefined_pd(), \ (__mmask8)-1)) #define _mm256_mask_extractf64x2_pd(W, U, A, imm) \ ((__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \ (int)(imm), \ (__v2df)(__m128d)(W), \ (__mmask8)(U))) #define _mm256_maskz_extractf64x2_pd(U, A, imm) \ ((__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \ (int)(imm), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)(U))) #define _mm256_extracti64x2_epi64(A, imm) \ ((__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \ (int)(imm), \ (__v2di)_mm_undefined_si128(), \ (__mmask8)-1)) #define _mm256_mask_extracti64x2_epi64(W, U, A, imm) \ ((__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \ (int)(imm), \ (__v2di)(__m128i)(W), \ (__mmask8)(U))) #define _mm256_maskz_extracti64x2_epi64(U, A, imm) \ ((__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \ (int)(imm), \ (__v2di)_mm_setzero_si128(), \ (__mmask8)(U))) #define _mm256_insertf64x2(A, B, imm) \ ((__m256d)__builtin_ia32_insertf64x2_256((__v4df)(__m256d)(A), \ (__v2df)(__m128d)(B), (int)(imm))) #define _mm256_mask_insertf64x2(W, U, A, B, imm) \ ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ (__v4df)_mm256_insertf64x2((A), (B), (imm)), \ (__v4df)(__m256d)(W))) #define _mm256_maskz_insertf64x2(U, A, B, imm) \ ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ (__v4df)_mm256_insertf64x2((A), (B), (imm)), \ (__v4df)_mm256_setzero_pd())) #define _mm256_inserti64x2(A, B, imm) \ ((__m256i)__builtin_ia32_inserti64x2_256((__v4di)(__m256i)(A), \ (__v2di)(__m128i)(B), (int)(imm))) #define _mm256_mask_inserti64x2(W, U, A, B, imm) \ ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ (__v4di)_mm256_inserti64x2((A), (B), (imm)), \ (__v4di)(__m256i)(W))) #define _mm256_maskz_inserti64x2(U, A, B, imm) \ ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ (__v4di)_mm256_inserti64x2((A), (B), (imm)), \ (__v4di)_mm256_setzero_si256())) #define _mm_mask_fpclass_pd_mask(U, A, imm) \ ((__mmask8)__builtin_ia32_fpclasspd128_mask((__v2df)(__m128d)(A), (int)(imm), \ (__mmask8)(U))) #define _mm_fpclass_pd_mask(A, imm) \ ((__mmask8)__builtin_ia32_fpclasspd128_mask((__v2df)(__m128d)(A), (int)(imm), \ (__mmask8)-1)) #define _mm256_mask_fpclass_pd_mask(U, A, imm) \ ((__mmask8)__builtin_ia32_fpclasspd256_mask((__v4df)(__m256d)(A), (int)(imm), \ (__mmask8)(U))) #define _mm256_fpclass_pd_mask(A, imm) \ ((__mmask8)__builtin_ia32_fpclasspd256_mask((__v4df)(__m256d)(A), (int)(imm), \ (__mmask8)-1)) #define _mm_mask_fpclass_ps_mask(U, A, imm) \ ((__mmask8)__builtin_ia32_fpclassps128_mask((__v4sf)(__m128)(A), (int)(imm), \ (__mmask8)(U))) #define _mm_fpclass_ps_mask(A, imm) \ ((__mmask8)__builtin_ia32_fpclassps128_mask((__v4sf)(__m128)(A), (int)(imm), \ (__mmask8)-1)) #define _mm256_mask_fpclass_ps_mask(U, A, imm) \ ((__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \ (__mmask8)(U))) #define _mm256_fpclass_ps_mask(A, imm) \ ((__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \ (__mmask8)-1)) #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 #endif /*===------ avx512vlvp2intersectintrin.h - VL VP2INTERSECT intrinsics ------=== * * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef _AVX512VLVP2INTERSECT_H #define _AVX512VLVP2INTERSECT_H #define __DEFAULT_FN_ATTRS128 \ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512vl,avx512vp2intersect,no-evex512"), \ __min_vector_width__(128))) #define __DEFAULT_FN_ATTRS256 \ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512vl,avx512vp2intersect,no-evex512"), \ __min_vector_width__(256))) /// Store, in an even/odd pair of mask registers, the indicators of the /// locations of value matches between dwords in operands __a and __b. /// /// \headerfile /// /// This intrinsic corresponds to the VP2INTERSECTD instruction. /// /// \param __a /// A 256-bit vector of [8 x i32]. /// \param __b /// A 256-bit vector of [8 x i32] /// \param __m0 /// A pointer point to 8-bit mask /// \param __m1 /// A pointer point to 8-bit mask static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_2intersect_epi32(__m256i __a, __m256i __b, __mmask8 *__m0, __mmask8 *__m1) { __builtin_ia32_vp2intersect_d_256((__v8si)__a, (__v8si)__b, __m0, __m1); } /// Store, in an even/odd pair of mask registers, the indicators of the /// locations of value matches between quadwords in operands __a and __b. /// /// \headerfile /// /// This intrinsic corresponds to the VP2INTERSECTQ instruction. /// /// \param __a /// A 256-bit vector of [4 x i64]. /// \param __b /// A 256-bit vector of [4 x i64] /// \param __m0 /// A pointer point to 8-bit mask /// \param __m1 /// A pointer point to 8-bit mask static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_2intersect_epi64(__m256i __a, __m256i __b, __mmask8 *__m0, __mmask8 *__m1) { __builtin_ia32_vp2intersect_q_256((__v4di)__a, (__v4di)__b, __m0, __m1); } /// Store, in an even/odd pair of mask registers, the indicators of the /// locations of value matches between dwords in operands __a and __b. /// /// \headerfile /// /// This intrinsic corresponds to the VP2INTERSECTD instruction. /// /// \param __a /// A 128-bit vector of [4 x i32]. /// \param __b /// A 128-bit vector of [4 x i32] /// \param __m0 /// A pointer point to 8-bit mask /// \param __m1 /// A pointer point to 8-bit mask static __inline__ void __DEFAULT_FN_ATTRS128 _mm_2intersect_epi32(__m128i __a, __m128i __b, __mmask8 *__m0, __mmask8 *__m1) { __builtin_ia32_vp2intersect_d_128((__v4si)__a, (__v4si)__b, __m0, __m1); } /// Store, in an even/odd pair of mask registers, the indicators of the /// locations of value matches between quadwords in operands __a and __b. /// /// \headerfile /// /// This intrinsic corresponds to the VP2INTERSECTQ instruction. /// /// \param __a /// A 128-bit vector of [2 x i64]. /// \param __b /// A 128-bit vector of [2 x i64] /// \param __m0 /// A pointer point to 8-bit mask /// \param __m1 /// A pointer point to 8-bit mask static __inline__ void __DEFAULT_FN_ATTRS128 _mm_2intersect_epi64(__m128i __a, __m128i __b, __mmask8 *__m0, __mmask8 *__m1) { __builtin_ia32_vp2intersect_q_128((__v2di)__a, (__v2di)__b, __m0, __m1); } #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 #endif bmi2intrin.h/*===---------------- hresetintrin.h - HRESET intrinsics -------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __X86GPRINTRIN_H #error "Never use directly; include instead." #endif #ifndef __HRESETINTRIN_H #define __HRESETINTRIN_H #if __has_extension(gnu_asm) /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("hreset"))) /// Provides a hint to the processor to selectively reset the prediction /// history of the current logical processor specified by a 32-bit integer /// value \a __eax. /// /// This intrinsic corresponds to the HRESET instruction. /// /// \code{.operation} /// IF __eax == 0 /// // nop /// ELSE /// FOR i := 0 to 31 /// IF __eax[i] /// ResetPredictionFeature(i) /// FI /// ENDFOR /// FI /// \endcode static __inline void __DEFAULT_FN_ATTRS _hreset(int __eax) { __asm__ ("hreset $0" :: "a"(__eax)); } #undef __DEFAULT_FN_ATTRS #endif /* __has_extension(gnu_asm) */ #endif /* __HRESETINTRIN_H */ keylockerintrin.h/*===----------------- keylockerintrin.h - KL Intrinsics -------------------=== * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef _KEYLOCKERINTRIN_H #define _KEYLOCKERINTRIN_H #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__KL__) /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("kl"),\ __min_vector_width__(128))) /// Load internal wrapping key from __intkey, __enkey_lo and __enkey_hi. __ctl /// will assigned to EAX, whch specifies the KeySource and whether backing up /// the key is permitted. The 256-bit encryption key is loaded from the two /// explicit operands (__enkey_lo and __enkey_hi). The 128-bit integrity key is /// loaded from the implicit operand XMM0 which assigned by __intkey. /// /// \headerfile /// /// This intrinsic corresponds to the LOADIWKEY instructions. /// /// \code{.operation} /// IF CPL > 0 // LOADKWKEY only allowed at ring 0 (supervisor mode) /// GP (0) /// FI /// IF “LOADIWKEY exiting” VM execution control set /// VMexit /// FI /// IF __ctl[4:1] > 1 // Reserved KeySource encoding used /// GP (0) /// FI /// IF __ctl[31:5] != 0 // Reserved bit in __ctl is set /// GP (0) /// FI /// IF __ctl[0] AND (CPUID.19H.ECX[0] == 0) // NoBackup is not supported on this part /// GP (0) /// FI /// IF (__ctl[4:1] == 1) AND (CPUID.19H.ECX[1] == 0) // KeySource of 1 is not supported on this part /// GP (0) /// FI /// IF (__ctl[4:1] == 0) // KeySource of 0. /// IWKey.Encryption Key[127:0] := __enkey_hi[127:0]: /// IWKey.Encryption Key[255:128] := __enkey_lo[127:0] /// IWKey.IntegrityKey[127:0] := __intkey[127:0] /// IWKey.NoBackup := __ctl[0] /// IWKey.KeySource := __ctl[4:1] /// ZF := 0 /// ELSE // KeySource of 1. See RDSEED definition for details of randomness /// IF HW_NRND_GEN.ready == 1 // Full-entropy random data from RDSEED was received /// IWKey.Encryption Key[127:0] := __enkey_hi[127:0] XOR HW_NRND_GEN.data[127:0] /// IWKey.Encryption Key[255:128] := __enkey_lo[127:0] XOR HW_NRND_GEN.data[255:128] /// IWKey.Encryption Key[255:0] := __enkey_hi[127:0]:__enkey_lo[127:0] XOR HW_NRND_GEN.data[255:0] /// IWKey.IntegrityKey[127:0] := __intkey[127:0] XOR HW_NRND_GEN.data[383:256] /// IWKey.NoBackup := __ctl[0] /// IWKey.KeySource := __ctl[4:1] /// ZF := 0 /// ELSE // Random data was not returned from RDSEED. IWKey was not loaded /// ZF := 1 /// FI /// FI /// dst := ZF /// OF := 0 /// SF := 0 /// AF := 0 /// PF := 0 /// CF := 0 /// \endcode static __inline__ void __DEFAULT_FN_ATTRS _mm_loadiwkey (unsigned int __ctl, __m128i __intkey, __m128i __enkey_lo, __m128i __enkey_hi) { __builtin_ia32_loadiwkey (__intkey, __enkey_lo, __enkey_hi, __ctl); } /// Wrap a 128-bit AES key from __key into a key handle and output in /// ((__m128i*)__h) to ((__m128i*)__h) + 2 and a 32-bit value as return. /// The explicit source operand __htype specifies handle restrictions. /// /// \headerfile /// /// This intrinsic corresponds to the ENCODEKEY128 instructions. /// /// \code{.operation} /// InputKey[127:0] := __key[127:0] /// KeyMetadata[2:0] := __htype[2:0] /// KeyMetadata[23:3] := 0 // Reserved for future usage /// KeyMetadata[27:24] := 0 // KeyType is AES-128 (value of 0) /// KeyMetadata[127:28] := 0 // Reserved for future usage /// Handle[383:0] := WrapKey128(InputKey[127:0], KeyMetadata[127:0], /// IWKey.Integrity Key[127:0], IWKey.Encryption Key[255:0]) /// dst[0] := IWKey.NoBackup /// dst[4:1] := IWKey.KeySource[3:0] /// dst[31:5] := 0 /// MEM[__h+127:__h] := Handle[127:0] // AAD /// MEM[__h+255:__h+128] := Handle[255:128] // Integrity Tag /// MEM[__h+383:__h+256] := Handle[383:256] // CipherText /// OF := 0 /// SF := 0 /// ZF := 0 /// AF := 0 /// PF := 0 /// CF := 0 /// \endcode static __inline__ unsigned int __DEFAULT_FN_ATTRS _mm_encodekey128_u32(unsigned int __htype, __m128i __key, void *__h) { return __builtin_ia32_encodekey128_u32(__htype, (__v2di)__key, __h); } /// Wrap a 256-bit AES key from __key_hi:__key_lo into a key handle, then /// output handle in ((__m128i*)__h) to ((__m128i*)__h) + 3 and /// a 32-bit value as return. /// The explicit source operand __htype specifies handle restrictions. /// /// \headerfile /// /// This intrinsic corresponds to the ENCODEKEY256 instructions. /// /// \code{.operation} /// InputKey[127:0] := __key_lo[127:0] /// InputKey[255:128] := __key_hi[255:128] /// KeyMetadata[2:0] := __htype[2:0] /// KeyMetadata[23:3] := 0 // Reserved for future usage /// KeyMetadata[27:24] := 1 // KeyType is AES-256 (value of 1) /// KeyMetadata[127:28] := 0 // Reserved for future usage /// Handle[511:0] := WrapKey256(InputKey[255:0], KeyMetadata[127:0], /// IWKey.Integrity Key[127:0], IWKey.Encryption Key[255:0]) /// dst[0] := IWKey.NoBackup /// dst[4:1] := IWKey.KeySource[3:0] /// dst[31:5] := 0 /// MEM[__h+127:__h] := Handle[127:0] // AAD /// MEM[__h+255:__h+128] := Handle[255:128] // Tag /// MEM[__h+383:__h+256] := Handle[383:256] // CipherText[127:0] /// MEM[__h+511:__h+384] := Handle[511:384] // CipherText[255:128] /// OF := 0 /// SF := 0 /// ZF := 0 /// AF := 0 /// PF := 0 /// CF := 0 /// \endcode static __inline__ unsigned int __DEFAULT_FN_ATTRS _mm_encodekey256_u32(unsigned int __htype, __m128i __key_lo, __m128i __key_hi, void *__h) { return __builtin_ia32_encodekey256_u32(__htype, (__v2di)__key_lo, (__v2di)__key_hi, __h); } /// The AESENC128KL performs 10 rounds of AES to encrypt the __idata using /// the 128-bit key in the handle from the __h. It stores the result in the /// __odata. And return the affected ZF flag status. /// /// \headerfile /// /// This intrinsic corresponds to the AESENC128KL instructions. /// /// \code{.operation} /// Handle[383:0] := MEM[__h+383:__h] // Load is not guaranteed to be atomic. /// IllegalHandle := ( HandleReservedBitSet (Handle[383:0]) || /// (Handle[127:0] AND (CPL > 0)) || /// Handle[383:256] || /// HandleKeyType (Handle[383:0]) != HANDLE_KEY_TYPE_AES128 ) /// IF (IllegalHandle) /// ZF := 1 /// ELSE /// (UnwrappedKey, Authentic) := UnwrapKeyAndAuthenticate384 (Handle[383:0], IWKey) /// IF (Authentic == 0) /// ZF := 1 /// ELSE /// MEM[__odata+127:__odata] := AES128Encrypt (__idata[127:0], UnwrappedKey) /// ZF := 0 /// FI /// FI /// dst := ZF /// OF := 0 /// SF := 0 /// AF := 0 /// PF := 0 /// CF := 0 /// \endcode static __inline__ unsigned char __DEFAULT_FN_ATTRS _mm_aesenc128kl_u8(__m128i* __odata, __m128i __idata, const void *__h) { return __builtin_ia32_aesenc128kl_u8((__v2di *)__odata, (__v2di)__idata, __h); } /// The AESENC256KL performs 14 rounds of AES to encrypt the __idata using /// the 256-bit key in the handle from the __h. It stores the result in the /// __odata. And return the affected ZF flag status. /// /// \headerfile /// /// This intrinsic corresponds to the AESENC256KL instructions. /// /// \code{.operation} /// Handle[511:0] := MEM[__h+511:__h] // Load is not guaranteed to be atomic. /// IllegalHandle := ( HandleReservedBitSet (Handle[511:0]) || /// (Handle[127:0] AND (CPL > 0)) || /// Handle[255:128] || /// HandleKeyType (Handle[511:0]) != HANDLE_KEY_TYPE_AES256 ) /// IF (IllegalHandle) /// ZF := 1 /// MEM[__odata+127:__odata] := 0 /// ELSE /// (UnwrappedKey, Authentic) := UnwrapKeyAndAuthenticate512 (Handle[511:0], IWKey) /// IF (Authentic == 0) /// ZF := 1 /// MEM[__odata+127:__odata] := 0 /// ELSE /// MEM[__odata+127:__odata] := AES256Encrypt (__idata[127:0], UnwrappedKey) /// ZF := 0 /// FI /// FI /// dst := ZF /// OF := 0 /// SF := 0 /// AF := 0 /// PF := 0 /// CF := 0 /// \endcode static __inline__ unsigned char __DEFAULT_FN_ATTRS _mm_aesenc256kl_u8(__m128i* __odata, __m128i __idata, const void *__h) { return __builtin_ia32_aesenc256kl_u8((__v2di *)__odata, (__v2di)__idata, __h); } /// The AESDEC128KL performs 10 rounds of AES to decrypt the __idata using /// the 128-bit key in the handle from the __h. It stores the result in the /// __odata. And return the affected ZF flag status. /// /// \headerfile /// /// This intrinsic corresponds to the AESDEC128KL instructions. /// /// \code{.operation} /// Handle[383:0] := MEM[__h+383:__h] // Load is not guaranteed to be atomic. /// IllegalHandle := (HandleReservedBitSet (Handle[383:0]) || /// (Handle[127:0] AND (CPL > 0)) || /// Handle[383:256] || /// HandleKeyType (Handle[383:0]) != HANDLE_KEY_TYPE_AES128) /// IF (IllegalHandle) /// ZF := 1 /// MEM[__odata+127:__odata] := 0 /// ELSE /// (UnwrappedKey, Authentic) := UnwrapKeyAndAuthenticate384 (Handle[383:0], IWKey) /// IF (Authentic == 0) /// ZF := 1 /// MEM[__odata+127:__odata] := 0 /// ELSE /// MEM[__odata+127:__odata] := AES128Decrypt (__idata[127:0], UnwrappedKey) /// ZF := 0 /// FI /// FI /// dst := ZF /// OF := 0 /// SF := 0 /// AF := 0 /// PF := 0 /// CF := 0 /// \endcode static __inline__ unsigned char __DEFAULT_FN_ATTRS _mm_aesdec128kl_u8(__m128i* __odata, __m128i __idata, const void *__h) { return __builtin_ia32_aesdec128kl_u8((__v2di *)__odata, (__v2di)__idata, __h); } /// The AESDEC256KL performs 10 rounds of AES to decrypt the __idata using /// the 256-bit key in the handle from the __h. It stores the result in the /// __odata. And return the affected ZF flag status. /// /// \headerfile /// /// This intrinsic corresponds to the AESDEC256KL instructions. /// /// \code{.operation} /// Handle[511:0] := MEM[__h+511:__h] /// IllegalHandle := (HandleReservedBitSet (Handle[511:0]) || /// (Handle[127:0] AND (CPL > 0)) || /// Handle[383:256] || /// HandleKeyType (Handle[511:0]) != HANDLE_KEY_TYPE_AES256) /// IF (IllegalHandle) /// ZF := 1 /// MEM[__odata+127:__odata] := 0 /// ELSE /// (UnwrappedKey, Authentic) := UnwrapKeyAndAuthenticate512 (Handle[511:0], IWKey) /// IF (Authentic == 0) /// ZF := 1 /// MEM[__odata+127:__odata] := 0 /// ELSE /// MEM[__odata+127:__odata] := AES256Decrypt (__idata[127:0], UnwrappedKey) /// ZF := 0 /// FI /// FI /// dst := ZF /// OF := 0 /// SF := 0 /// AF := 0 /// PF := 0 /// CF := 0 /// \endcode static __inline__ unsigned char __DEFAULT_FN_ATTRS _mm_aesdec256kl_u8(__m128i* __odata, __m128i __idata, const void *__h) { return __builtin_ia32_aesdec256kl_u8((__v2di *)__odata, (__v2di)__idata, __h); } #undef __DEFAULT_FN_ATTRS #endif /* !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) \ || defined(__KL__) */ #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__WIDEKL__) /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("kl,widekl"),\ __min_vector_width__(128))) /// Encrypt __idata[0] to __idata[7] using 128-bit AES key indicated by handle /// at __h and store each resultant block back from __odata to __odata+7. And /// return the affected ZF flag status. /// /// \headerfile /// /// This intrinsic corresponds to the AESENCWIDE128KL instructions. /// /// \code{.operation} /// Handle := MEM[__h+383:__h] /// IllegalHandle := ( HandleReservedBitSet (Handle[383:0]) || /// (Handle[127:0] AND (CPL > 0)) || /// Handle[255:128] || /// HandleKeyType (Handle[383:0]) != HANDLE_KEY_TYPE_AES128 ) /// IF (IllegalHandle) /// ZF := 1 /// FOR i := 0 to 7 /// __odata[i] := 0 /// ENDFOR /// ELSE /// (UnwrappedKey, Authentic) := UnwrapKeyAndAuthenticate384 (Handle[383:0], IWKey) /// IF Authentic == 0 /// ZF := 1 /// FOR i := 0 to 7 /// __odata[i] := 0 /// ENDFOR /// ELSE /// FOR i := 0 to 7 /// __odata[i] := AES128Encrypt (__idata[i], UnwrappedKey) /// ENDFOR /// ZF := 0 /// FI /// FI /// dst := ZF /// OF := 0 /// SF := 0 /// AF := 0 /// PF := 0 /// CF := 0 /// \endcode static __inline__ unsigned char __DEFAULT_FN_ATTRS _mm_aesencwide128kl_u8(__m128i __odata[8], const __m128i __idata[8], const void* __h) { return __builtin_ia32_aesencwide128kl_u8((__v2di *)__odata, (const __v2di *)__idata, __h); } /// Encrypt __idata[0] to __idata[7] using 256-bit AES key indicated by handle /// at __h and store each resultant block back from __odata to __odata+7. And /// return the affected ZF flag status. /// /// \headerfile /// /// This intrinsic corresponds to the AESENCWIDE256KL instructions. /// /// \code{.operation} /// Handle[511:0] := MEM[__h+511:__h] /// IllegalHandle := ( HandleReservedBitSet (Handle[511:0]) || /// (Handle[127:0] AND (CPL > 0)) || /// Handle[255:128] || /// HandleKeyType (Handle[511:0]) != HANDLE_KEY_TYPE_AES512 ) /// IF (IllegalHandle) /// ZF := 1 /// FOR i := 0 to 7 /// __odata[i] := 0 /// ENDFOR /// ELSE /// (UnwrappedKey, Authentic) := UnwrapKeyAndAuthenticate512 (Handle[511:0], IWKey) /// IF Authentic == 0 /// ZF := 1 /// FOR i := 0 to 7 /// __odata[i] := 0 /// ENDFOR /// ELSE /// FOR i := 0 to 7 /// __odata[i] := AES256Encrypt (__idata[i], UnwrappedKey) /// ENDFOR /// ZF := 0 /// FI /// FI /// dst := ZF /// OF := 0 /// SF := 0 /// AF := 0 /// PF := 0 /// CF := 0 /// \endcode static __inline__ unsigned char __DEFAULT_FN_ATTRS _mm_aesencwide256kl_u8(__m128i __odata[8], const __m128i __idata[8], const void* __h) { return __builtin_ia32_aesencwide256kl_u8((__v2di *)__odata, (const __v2di *)__idata, __h); } /// Decrypt __idata[0] to __idata[7] using 128-bit AES key indicated by handle /// at __h and store each resultant block back from __odata to __odata+7. And /// return the affected ZF flag status. /// /// \headerfile /// /// This intrinsic corresponds to the AESDECWIDE128KL instructions. /// /// \code{.operation} /// Handle[383:0] := MEM[__h+383:__h] /// IllegalHandle := ( HandleReservedBitSet (Handle[383:0]) || /// (Handle[127:0] AND (CPL > 0)) || /// Handle[255:128] || /// HandleKeyType (Handle) != HANDLE_KEY_TYPE_AES128 ) /// IF (IllegalHandle) /// ZF := 1 /// FOR i := 0 to 7 /// __odata[i] := 0 /// ENDFOR /// ELSE /// (UnwrappedKey, Authentic) := UnwrapKeyAndAuthenticate384 (Handle[383:0], IWKey) /// IF Authentic == 0 /// ZF := 1 /// FOR i := 0 to 7 /// __odata[i] := 0 /// ENDFOR /// ELSE /// FOR i := 0 to 7 /// __odata[i] := AES128Decrypt (__idata[i], UnwrappedKey) /// ENDFOR /// ZF := 0 /// FI /// FI /// dst := ZF /// OF := 0 /// SF := 0 /// AF := 0 /// PF := 0 /// CF := 0 /// \endcode static __inline__ unsigned char __DEFAULT_FN_ATTRS _mm_aesdecwide128kl_u8(__m128i __odata[8], const __m128i __idata[8], const void* __h) { return __builtin_ia32_aesdecwide128kl_u8((__v2di *)__odata, (const __v2di *)__idata, __h); } /// Decrypt __idata[0] to __idata[7] using 256-bit AES key indicated by handle /// at __h and store each resultant block back from __odata to __odata+7. And /// return the affected ZF flag status. /// /// \headerfile /// /// This intrinsic corresponds to the AESDECWIDE256KL instructions. /// /// \code{.operation} /// Handle[511:0] := MEM[__h+511:__h] /// IllegalHandle = ( HandleReservedBitSet (Handle[511:0]) || /// (Handle[127:0] AND (CPL > 0)) || /// Handle[255:128] || /// HandleKeyType (Handle) != HANDLE_KEY_TYPE_AES512 ) /// If (IllegalHandle) /// ZF := 1 /// FOR i := 0 to 7 /// __odata[i] := 0 /// ENDFOR /// ELSE /// (UnwrappedKey, Authentic) := UnwrapKeyAndAuthenticate512 (Handle[511:0], IWKey) /// IF Authentic == 0 /// ZF := 1 /// FOR i := 0 to 7 /// __odata[i] := 0 /// ENDFOR /// ELSE /// FOR i := 0 to 7 /// __odata[i] := AES256Decrypt (__idata[i], UnwrappedKey) /// ENDFOR /// ZF := 0 /// FI /// FI /// dst := ZF /// OF := 0 /// SF := 0 /// AF := 0 /// PF := 0 /// CF := 0 /// \endcode static __inline__ unsigned char __DEFAULT_FN_ATTRS _mm_aesdecwide256kl_u8(__m128i __odata[8], const __m128i __idata[8], const void* __h) { return __builtin_ia32_aesdecwide256kl_u8((__v2di *)__odata, (const __v2di *)__idata, __h); } #undef __DEFAULT_FN_ATTRS #endif /* !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) \ || defined(__WIDEKL__) */ #endif /* _KEYLOCKERINTRIN_H */ nmmintrin.h/*===---- s390intrin.h - SystemZ intrinsics --------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __S390INTRIN_H #define __S390INTRIN_H #ifndef __s390__ #error " is for s390 only" #endif #ifdef __HTM__ #include #endif #ifdef __VEC__ #include #endif #endif /* __S390INTRIN_H*/ /*===---- stdalign.h - Standard header for alignment ------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __STDALIGN_H #define __STDALIGN_H #if defined(__cplusplus) || \ (defined(__STDC_VERSION__) && __STDC_VERSION__ < 202311L) #ifndef __cplusplus #define alignas _Alignas #define alignof _Alignof #endif #define __alignas_is_defined 1 #define __alignof_is_defined 1 #endif /* __STDC_VERSION__ */ #endif /* __STDALIGN_H */ /*===---- tbmintrin.h - TBM intrinsics -------------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __X86INTRIN_H #error "Never use directly; include instead." #endif #ifndef __TBMINTRIN_H #define __TBMINTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("tbm"))) #define __bextri_u32(a, b) \ ((unsigned int)__builtin_ia32_bextri_u32((unsigned int)(a), \ (unsigned int)(b))) static __inline__ unsigned int __DEFAULT_FN_ATTRS __blcfill_u32(unsigned int __a) { return __a & (__a + 1); } static __inline__ unsigned int __DEFAULT_FN_ATTRS __blci_u32(unsigned int __a) { return __a | ~(__a + 1); } static __inline__ unsigned int __DEFAULT_FN_ATTRS __blcic_u32(unsigned int __a) { return ~__a & (__a + 1); } static __inline__ unsigned int __DEFAULT_FN_ATTRS __blcmsk_u32(unsigned int __a) { return __a ^ (__a + 1); } static __inline__ unsigned int __DEFAULT_FN_ATTRS __blcs_u32(unsigned int __a) { return __a | (__a + 1); } static __inline__ unsigned int __DEFAULT_FN_ATTRS __blsfill_u32(unsigned int __a) { return __a | (__a - 1); } static __inline__ unsigned int __DEFAULT_FN_ATTRS __blsic_u32(unsigned int __a) { return ~__a | (__a - 1); } static __inline__ unsigned int __DEFAULT_FN_ATTRS __t1mskc_u32(unsigned int __a) { return ~__a | (__a + 1); } static __inline__ unsigned int __DEFAULT_FN_ATTRS __tzmsk_u32(unsigned int __a) { return ~__a & (__a - 1); } #ifdef __x86_64__ #define __bextri_u64(a, b) \ ((unsigned long long)__builtin_ia32_bextri_u64((unsigned long long)(a), \ (unsigned long long)(b))) static __inline__ unsigned long long __DEFAULT_FN_ATTRS __blcfill_u64(unsigned long long __a) { return __a & (__a + 1); } static __inline__ unsigned long long __DEFAULT_FN_ATTRS __blci_u64(unsigned long long __a) { return __a | ~(__a + 1); } static __inline__ unsigned long long __DEFAULT_FN_ATTRS __blcic_u64(unsigned long long __a) { return ~__a & (__a + 1); } static __inline__ unsigned long long __DEFAULT_FN_ATTRS __blcmsk_u64(unsigned long long __a) { return __a ^ (__a + 1); } static __inline__ unsigned long long __DEFAULT_FN_ATTRS __blcs_u64(unsigned long long __a) { return __a | (__a + 1); } static __inline__ unsigned long long __DEFAULT_FN_ATTRS __blsfill_u64(unsigned long long __a) { return __a | (__a - 1); } static __inline__ unsigned long long __DEFAULT_FN_ATTRS __blsic_u64(unsigned long long __a) { return ~__a | (__a - 1); } static __inline__ unsigned long long __DEFAULT_FN_ATTRS __t1mskc_u64(unsigned long long __a) { return ~__a | (__a + 1); } static __inline__ unsigned long long __DEFAULT_FN_ATTRS __tzmsk_u64(unsigned long long __a) { return ~__a & (__a - 1); } #endif #undef __DEFAULT_FN_ATTRS #endif /* __TBMINTRIN_H */ llvm_libc_wrappers/assert.hppc_wrappers/xmmintrin.h//===-- sanitizer/lsan_interface.h ------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file is a part of LeakSanitizer. // // Public interface header. //===----------------------------------------------------------------------===// #ifndef SANITIZER_LSAN_INTERFACE_H #define SANITIZER_LSAN_INTERFACE_H #include #ifdef __cplusplus extern "C" { #endif // Allocations made between calls to __lsan_disable() and __lsan_enable() will // be treated as non-leaks. Disable/enable pairs may be nested. void SANITIZER_CDECL __lsan_disable(void); void SANITIZER_CDECL __lsan_enable(void); // The heap object into which p points will be treated as a non-leak. void SANITIZER_CDECL __lsan_ignore_object(const void *p); // Memory regions registered through this interface will be treated as sources // of live pointers during leak checking. Useful if you store pointers in // mapped memory. // Points of note: // - __lsan_unregister_root_region() must be called with the same pointer and // size that have earlier been passed to __lsan_register_root_region() // - LSan will skip any inaccessible memory when scanning a root region. E.g., // if you map memory within a larger region that you have mprotect'ed, you can // register the entire large region. // - the implementation is not optimized for performance. This interface is // intended to be used for a small number of relatively static regions. void SANITIZER_CDECL __lsan_register_root_region(const void *p, size_t size); void SANITIZER_CDECL __lsan_unregister_root_region(const void *p, size_t size); // Check for leaks now. This function behaves identically to the default // end-of-process leak check. In particular, it will terminate the process if // leaks are found and the exitcode runtime flag is non-zero. // Subsequent calls to this function will have no effect and end-of-process // leak check will not run. Effectively, end-of-process leak check is moved to // the time of first invocation of this function. // By calling this function early during process shutdown, you can instruct // LSan to ignore shutdown-only leaks which happen later on. void SANITIZER_CDECL __lsan_do_leak_check(void); // Check for leaks now. Returns zero if no leaks have been found or if leak // detection is disabled, non-zero otherwise. // This function may be called repeatedly, e.g. to periodically check a // long-running process. It prints a leak report if appropriate, but does not // terminate the process. It does not affect the behavior of // __lsan_do_leak_check() or the end-of-process leak check, and is not // affected by them. int SANITIZER_CDECL __lsan_do_recoverable_leak_check(void); // The user may optionally provide this function to disallow leak checking // for the program it is linked into (if the return value is non-zero). This // function must be defined as returning a constant value; any behavior beyond // that is unsupported. // To avoid dead stripping, you may need to define this function with // __attribute__((used)) int SANITIZER_CDECL __lsan_is_turned_off(void); // This function may be optionally provided by user and should return // a string containing LSan runtime options. See lsan_flags.inc for details. const char *SANITIZER_CDECL __lsan_default_options(void); // This function may be optionally provided by the user and should return // a string containing LSan suppressions. const char *SANITIZER_CDECL __lsan_default_suppressions(void); #ifdef __cplusplus } // extern "C" namespace __lsan { class ScopedDisabler { public: ScopedDisabler() { __lsan_disable(); } ~ScopedDisabler() { __lsan_enable(); } }; } // namespace __lsan #endif #endif // SANITIZER_LSAN_INTERFACE_H Couldn't read Couldn't close unsupported encoding[:print:]no errorinvalid character classinvalid UTF-8NumCapturesWalker::ShortVisit called\fBrailleGujaratiLatinMedefaidrinPdSIGSEGVbad arena pointer in DeleteArena()bad magic number in AddToFreelist()too few levels in Next()eqcmqustdistreamInit() did not set getcpu_fn_external/abseil-cpp/absl/debugging/internal/elf_mem_image.ccexternal/abseil-cpp/absl/crc/internal/crc.ccPrivate"...this->mu_ != nullptrTryLock failed external/boringssl/src/crypto/fipsmodule/bn/mul.cexternal/boringssl/src/crypto/fipsmodule/dh/dh.cexternal/boringssl/src/crypto/fipsmodule/rsa/blinding.cAES_set_encrypt_key failed. reason(%u)TRUST_TOKEN_LIBkythe/proto/analysis.protokythe.proto.ContextDependentVersion.Column.linked_context__stddef_nullptr_t.hamxintrin.h/*===---- arm64intr.h - ARM64 Windows intrinsics -------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ /* Only include this if we're compiling for the windows platform. */ #ifndef _MSC_VER #include_next #else #ifndef __ARM64INTR_H #define __ARM64INTR_H typedef enum { _ARM64_BARRIER_SY = 0xF, _ARM64_BARRIER_ST = 0xE, _ARM64_BARRIER_LD = 0xD, _ARM64_BARRIER_ISH = 0xB, _ARM64_BARRIER_ISHST = 0xA, _ARM64_BARRIER_ISHLD = 0x9, _ARM64_BARRIER_NSH = 0x7, _ARM64_BARRIER_NSHST = 0x6, _ARM64_BARRIER_NSHLD = 0x5, _ARM64_BARRIER_OSH = 0x3, _ARM64_BARRIER_OSHST = 0x2, _ARM64_BARRIER_OSHLD = 0x1 } _ARM64INTR_BARRIER_TYPE; #endif /* __ARM64INTR_H */ #endif /* _MSC_VER */ avx512fp16intrin.havx512pfintrin.havx512vnniintrin.h/*===---- bmi2intrin.h - BMI2 intrinsics -----------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __BMI2INTRIN_H #define __BMI2INTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("bmi2"))) /// Copies the unsigned 32-bit integer \a __X and zeroes the upper bits /// starting at bit number \a __Y. /// /// \code{.operation} /// i := __Y[7:0] /// result := __X /// IF i < 32 /// result[31:i] := 0 /// FI /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c BZHI instruction. /// /// \param __X /// The 32-bit source value to copy. /// \param __Y /// The lower 8 bits specify the bit number of the lowest bit to zero. /// \returns The partially zeroed 32-bit value. static __inline__ unsigned int __DEFAULT_FN_ATTRS _bzhi_u32(unsigned int __X, unsigned int __Y) { return __builtin_ia32_bzhi_si(__X, __Y); } /// Deposit (scatter) low-order bits from the unsigned 32-bit integer \a __X /// into the 32-bit result, according to the mask in the unsigned 32-bit /// integer \a __Y. All other bits of the result are zero. /// /// \code{.operation} /// i := 0 /// result := 0 /// FOR m := 0 TO 31 /// IF __Y[m] == 1 /// result[m] := __X[i] /// i := i + 1 /// ENDIF /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c PDEP instruction. /// /// \param __X /// The 32-bit source value to copy. /// \param __Y /// The 32-bit mask specifying where to deposit source bits. /// \returns The 32-bit result. static __inline__ unsigned int __DEFAULT_FN_ATTRS _pdep_u32(unsigned int __X, unsigned int __Y) { return __builtin_ia32_pdep_si(__X, __Y); } /// Extract (gather) bits from the unsigned 32-bit integer \a __X into the /// low-order bits of the 32-bit result, according to the mask in the /// unsigned 32-bit integer \a __Y. All other bits of the result are zero. /// /// \code{.operation} /// i := 0 /// result := 0 /// FOR m := 0 TO 31 /// IF __Y[m] == 1 /// result[i] := __X[m] /// i := i + 1 /// ENDIF /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c PEXT instruction. /// /// \param __X /// The 32-bit source value to copy. /// \param __Y /// The 32-bit mask specifying which source bits to extract. /// \returns The 32-bit result. static __inline__ unsigned int __DEFAULT_FN_ATTRS _pext_u32(unsigned int __X, unsigned int __Y) { return __builtin_ia32_pext_si(__X, __Y); } /// Multiplies the unsigned 32-bit integers \a __X and \a __Y to form a /// 64-bit product. Stores the upper 32 bits of the product in the /// memory at \a __P and returns the lower 32 bits. /// /// \code{.operation} /// Store32(__P, (__X * __Y)[63:32]) /// result := (__X * __Y)[31:0] /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c MULX instruction. /// /// \param __X /// An unsigned 32-bit multiplicand. /// \param __Y /// An unsigned 32-bit multiplicand. /// \param __P /// A pointer to memory for storing the upper half of the product. /// \returns The lower half of the product. static __inline__ unsigned int __DEFAULT_FN_ATTRS _mulx_u32(unsigned int __X, unsigned int __Y, unsigned int *__P) { unsigned long long __res = (unsigned long long) __X * __Y; *__P = (unsigned int)(__res >> 32); return (unsigned int)__res; } #ifdef __x86_64__ /// Copies the unsigned 64-bit integer \a __X and zeroes the upper bits /// starting at bit number \a __Y. /// /// \code{.operation} /// i := __Y[7:0] /// result := __X /// IF i < 64 /// result[63:i] := 0 /// FI /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c BZHI instruction. /// /// \param __X /// The 64-bit source value to copy. /// \param __Y /// The lower 8 bits specify the bit number of the lowest bit to zero. /// \returns The partially zeroed 64-bit value. static __inline__ unsigned long long __DEFAULT_FN_ATTRS _bzhi_u64(unsigned long long __X, unsigned long long __Y) { return __builtin_ia32_bzhi_di(__X, __Y); } /// Deposit (scatter) low-order bits from the unsigned 64-bit integer \a __X /// into the 64-bit result, according to the mask in the unsigned 64-bit /// integer \a __Y. All other bits of the result are zero. /// /// \code{.operation} /// i := 0 /// result := 0 /// FOR m := 0 TO 63 /// IF __Y[m] == 1 /// result[m] := __X[i] /// i := i + 1 /// ENDIF /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c PDEP instruction. /// /// \param __X /// The 64-bit source value to copy. /// \param __Y /// The 64-bit mask specifying where to deposit source bits. /// \returns The 64-bit result. static __inline__ unsigned long long __DEFAULT_FN_ATTRS _pdep_u64(unsigned long long __X, unsigned long long __Y) { return __builtin_ia32_pdep_di(__X, __Y); } /// Extract (gather) bits from the unsigned 64-bit integer \a __X into the /// low-order bits of the 64-bit result, according to the mask in the /// unsigned 64-bit integer \a __Y. All other bits of the result are zero. /// /// \code{.operation} /// i := 0 /// result := 0 /// FOR m := 0 TO 63 /// IF __Y[m] == 1 /// result[i] := __X[m] /// i := i + 1 /// ENDIF /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c PEXT instruction. /// /// \param __X /// The 64-bit source value to copy. /// \param __Y /// The 64-bit mask specifying which source bits to extract. /// \returns The 64-bit result. static __inline__ unsigned long long __DEFAULT_FN_ATTRS _pext_u64(unsigned long long __X, unsigned long long __Y) { return __builtin_ia32_pext_di(__X, __Y); } /// Multiplies the unsigned 64-bit integers \a __X and \a __Y to form a /// 128-bit product. Stores the upper 64 bits of the product to the /// memory addressed by \a __P and returns the lower 64 bits. /// /// \code{.operation} /// Store64(__P, (__X * __Y)[127:64]) /// result := (__X * __Y)[63:0] /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c MULX instruction. /// /// \param __X /// An unsigned 64-bit multiplicand. /// \param __Y /// An unsigned 64-bit multiplicand. /// \param __P /// A pointer to memory for storing the upper half of the product. /// \returns The lower half of the product. static __inline__ unsigned long long __DEFAULT_FN_ATTRS _mulx_u64 (unsigned long long __X, unsigned long long __Y, unsigned long long *__P) { unsigned __int128 __res = (unsigned __int128) __X * __Y; *__P = (unsigned long long) (__res >> 64); return (unsigned long long) __res; } #endif /* __x86_64__ */ #undef __DEFAULT_FN_ATTRS #endif /* __BMI2INTRIN_H */ hexagon_circ_brev_intrinsics.htsxldtrkintrin.h/*===---- x86intrin.h - X86 intrinsics -------------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __X86INTRIN_H #define __X86INTRIN_H #include #include #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__3dNOW__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__PRFCHW__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__SSE4A__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__FMA4__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__XOP__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__TBM__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__LWP__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__MWAITX__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__CLZERO__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__RDPRU__) #include #endif #endif /* __X86INTRIN_H */ bits/stdatomic.h//===-- Wrapper for C standard string.h declarations on the GPU -----------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef __CLANG_LLVM_LIBC_WRAPPERS_STRING_H__ #define __CLANG_LLVM_LIBC_WRAPPERS_STRING_H__ #if !defined(_OPENMP) && !defined(__HIP__) && !defined(__CUDA__) #error "This file is for GPU offloading compilation only" #endif #include_next #if __has_include() #if defined(__HIP__) || defined(__CUDA__) #define __LIBC_ATTRS __attribute__((device)) #endif #pragma omp begin declare target // The GNU headers provide C++ standard compliant headers when in C++ mode and // the LLVM libc does not. We need to manually provide the definitions using the // same prototypes. #if defined(__cplusplus) && defined(__GLIBC__) && \ defined(__CORRECT_ISO_CPP_STRING_H_PROTO) #ifndef __LIBC_ATTRS #define __LIBC_ATTRS #endif extern "C" { void *memccpy(void *__restrict, const void *__restrict, int, size_t) __LIBC_ATTRS; int memcmp(const void *, const void *, size_t) __LIBC_ATTRS; void *memcpy(void *__restrict, const void *__restrict, size_t) __LIBC_ATTRS; void *memmem(const void *, size_t, const void *, size_t) __LIBC_ATTRS; void *memmove(void *, const void *, size_t) __LIBC_ATTRS; void *mempcpy(void *__restrict, const void *__restrict, size_t) __LIBC_ATTRS; void *memset(void *, int, size_t) __LIBC_ATTRS; char *stpcpy(char *__restrict, const char *__restrict) __LIBC_ATTRS; char *stpncpy(char *__restrict, const char *__restrict, size_t) __LIBC_ATTRS; char *strcat(char *__restrict, const char *__restrict) __LIBC_ATTRS; int strcmp(const char *, const char *) __LIBC_ATTRS; int strcoll(const char *, const char *) __LIBC_ATTRS; char *strcpy(char *__restrict, const char *__restrict) __LIBC_ATTRS; size_t strcspn(const char *, const char *) __LIBC_ATTRS; char *strdup(const char *) __LIBC_ATTRS; size_t strlen(const char *) __LIBC_ATTRS; char *strncat(char *__restrict, const char *__restrict, size_t) __LIBC_ATTRS; int strncmp(const char *, const char *, size_t) __LIBC_ATTRS; char *strncpy(char *__restrict, const char *__restrict, size_t) __LIBC_ATTRS; char *strndup(const char *, size_t) __LIBC_ATTRS; size_t strnlen(const char *, size_t) __LIBC_ATTRS; size_t strspn(const char *, const char *) __LIBC_ATTRS; char *strtok(char *__restrict, const char *__restrict) __LIBC_ATTRS; char *strtok_r(char *__restrict, const char *__restrict, char **__restrict) __LIBC_ATTRS; size_t strxfrm(char *__restrict, const char *__restrict, size_t) __LIBC_ATTRS; } extern "C++" { char *strstr(char *, const char *) noexcept __LIBC_ATTRS; const char *strstr(const char *, const char *) noexcept __LIBC_ATTRS; char *strpbrk(char *, const char *) noexcept __LIBC_ATTRS; const char *strpbrk(const char *, const char *) noexcept __LIBC_ATTRS; char *strrchr(char *, int) noexcept __LIBC_ATTRS; const char *strrchr(const char *, int) noexcept __LIBC_ATTRS; char *strchr(char *, int) noexcept __LIBC_ATTRS; const char *strchr(const char *, int) noexcept __LIBC_ATTRS; char *strchrnul(char *, int) noexcept __LIBC_ATTRS; const char *strchrnul(const char *, int) noexcept __LIBC_ATTRS; char *strcasestr(char *, const char *) noexcept __LIBC_ATTRS; const char *strcasestr(const char *, const char *) noexcept __LIBC_ATTRS; void *memrchr(void *__s, int __c, size_t __n) noexcept __LIBC_ATTRS; const void *memrchr(const void *__s, int __c, size_t __n) noexcept __LIBC_ATTRS; void *memchr(void *__s, int __c, size_t __n) noexcept __LIBC_ATTRS; const void *memchr(const void *__s, int __c, size_t __n) noexcept __LIBC_ATTRS; } #else #include #endif #pragma omp end declare target #undef __LIBC_ATTRS #endif #endif // __CLANG_LLVM_LIBC_WRAPPERS_STRING_H__ //===-- sanitizer/asan_interface.h ------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file is a part of AddressSanitizer (ASan). // // Public interface header. //===----------------------------------------------------------------------===// #ifndef SANITIZER_ASAN_INTERFACE_H #define SANITIZER_ASAN_INTERFACE_H #include #ifdef __cplusplus extern "C" { #endif /// Marks a memory region ([addr, addr+size)) as unaddressable. /// /// This memory must be previously allocated by your program. Instrumented /// code is forbidden from accessing addresses in this region until it is /// unpoisoned. This function is not guaranteed to poison the entire region - /// it could poison only a subregion of [addr, addr+size) due to ASan /// alignment restrictions. /// /// \note This function is not thread-safe because no two threads can poison or /// unpoison memory in the same memory region simultaneously. /// /// \param addr Start of memory region. /// \param size Size of memory region. void SANITIZER_CDECL __asan_poison_memory_region(void const volatile *addr, size_t size); /// Marks a memory region ([addr, addr+size)) as addressable. /// /// This memory must be previously allocated by your program. Accessing /// addresses in this region is allowed until this region is poisoned again. /// This function could unpoison a super-region of [addr, addr+size) due /// to ASan alignment restrictions. /// /// \note This function is not thread-safe because no two threads can /// poison or unpoison memory in the same memory region simultaneously. /// /// \param addr Start of memory region. /// \param size Size of memory region. void SANITIZER_CDECL __asan_unpoison_memory_region(void const volatile *addr, size_t size); // Macros provided for convenience. #ifdef __has_feature #if __has_feature(address_sanitizer) #define ASAN_DEFINE_REGION_MACROS #endif #elif defined(__SANITIZE_ADDRESS__) #define ASAN_DEFINE_REGION_MACROS #endif #ifdef ASAN_DEFINE_REGION_MACROS /// Marks a memory region as unaddressable. /// /// \note Macro provided for convenience; defined as a no-op if ASan is not /// enabled. /// /// \param addr Start of memory region. /// \param size Size of memory region. #define ASAN_POISON_MEMORY_REGION(addr, size) \ __asan_poison_memory_region((addr), (size)) /// Marks a memory region as addressable. /// /// \note Macro provided for convenience; defined as a no-op if ASan is not /// enabled. /// /// \param addr Start of memory region. /// \param size Size of memory region. #define ASAN_UNPOISON_MEMORY_REGION(addr, size) \ __asan_unpoison_memory_region((addr), (size)) #else #define ASAN_POISON_MEMORY_REGION(addr, size) ((void)(addr), (void)(size)) #define ASAN_UNPOISON_MEMORY_REGION(addr, size) ((void)(addr), (void)(size)) #endif #undef ASAN_DEFINE_REGION_MACROS /// Checks if an address is poisoned. /// /// Returns 1 if addr is poisoned (that is, 1-byte read/write /// access to this address would result in an error report from ASan). /// Otherwise returns 0. /// /// \param addr Address to check. /// /// \retval 1 Address is poisoned. /// \retval 0 Address is not poisoned. int SANITIZER_CDECL __asan_address_is_poisoned(void const volatile *addr); /// Checks if a region is poisoned. /// /// If at least one byte in [beg, beg+size) is poisoned, returns the /// address of the first such byte. Otherwise returns 0. /// /// \param beg Start of memory region. /// \param size Start of memory region. /// \returns Address of first poisoned byte. void *SANITIZER_CDECL __asan_region_is_poisoned(void *beg, size_t size); /// Describes an address (useful for calling from the debugger). /// /// Prints the description of addr. /// /// \param addr Address to describe. void SANITIZER_CDECL __asan_describe_address(void *addr); /// Checks if an error has been or is being reported (useful for calling from /// the debugger to get information about an ASan error). /// /// Returns 1 if an error has been (or is being) reported. Otherwise returns 0. /// /// \returns 1 if an error has been (or is being) reported. Otherwise returns /// 0. int SANITIZER_CDECL __asan_report_present(void); /// Gets the PC (program counter) register value of an ASan error (useful for /// calling from the debugger). /// /// Returns PC if an error has been (or is being) reported. /// Otherwise returns 0. /// /// \returns PC value. void *SANITIZER_CDECL __asan_get_report_pc(void); /// Gets the BP (base pointer) register value of an ASan error (useful for /// calling from the debugger). /// /// Returns BP if an error has been (or is being) reported. /// Otherwise returns 0. /// /// \returns BP value. void *SANITIZER_CDECL __asan_get_report_bp(void); /// Gets the SP (stack pointer) register value of an ASan error (useful for /// calling from the debugger). /// /// If an error has been (or is being) reported, returns SP. /// Otherwise returns 0. /// /// \returns SP value. void *SANITIZER_CDECL __asan_get_report_sp(void); /// Gets the address of the report buffer of an ASan error (useful for calling /// from the debugger). /// /// Returns the address of the report buffer if an error has been (or is being) /// reported. Otherwise returns 0. /// /// \returns Address of report buffer. void *SANITIZER_CDECL __asan_get_report_address(void); /// Gets access type of an ASan error (useful for calling from the debugger). /// /// Returns access type (read or write) if an error has been (or is being) /// reported. Otherwise returns 0. /// /// \returns Access type (0 = read, 1 = write). int SANITIZER_CDECL __asan_get_report_access_type(void); /// Gets access size of an ASan error (useful for calling from the debugger). /// /// Returns access size if an error has been (or is being) reported. Otherwise /// returns 0. /// /// \returns Access size in bytes. size_t SANITIZER_CDECL __asan_get_report_access_size(void); /// Gets the bug description of an ASan error (useful for calling from a /// debugger). /// /// \returns Returns a bug description if an error has been (or is being) /// reported - for example, "heap-use-after-free". Otherwise returns an empty /// string. const char *SANITIZER_CDECL __asan_get_report_description(void); /// Gets information about a pointer (useful for calling from the debugger). /// /// Returns the category of the given pointer as a constant string. /// Possible return values are global, stack, stack-fake, /// heap, heap-invalid, shadow-low, shadow-gap, /// shadow-high, and unknown. /// /// If the return value is global or stack, tries to also return /// the variable name, address, and size. If the return value is heap, /// tries to return the chunk address and size. name should point /// to an allocated buffer of size name_size. /// /// \param addr Address to locate. /// \param name Buffer to store the variable's name. /// \param name_size Size in bytes of the variable's name buffer. /// \param[out] region_address Address of the region. /// \param[out] region_size Size of the region in bytes. /// /// \returns Returns the category of the given pointer as a constant string. const char *SANITIZER_CDECL __asan_locate_address(void *addr, char *name, size_t name_size, void **region_address, size_t *region_size); /// Gets the allocation stack trace and thread ID for a heap address (useful /// for calling from the debugger). /// /// Stores up to size frames in trace. Returns /// the number of stored frames or 0 on error. /// /// \param addr A heap address. /// \param trace A buffer to store the stack trace. /// \param size Size in bytes of the trace buffer. /// \param[out] thread_id The thread ID of the address. /// /// \returns Returns the number of stored frames or 0 on error. size_t SANITIZER_CDECL __asan_get_alloc_stack(void *addr, void **trace, size_t size, int *thread_id); /// Gets the free stack trace and thread ID for a heap address (useful for /// calling from the debugger). /// /// Stores up to size frames in trace. Returns /// the number of stored frames or 0 on error. /// /// \param addr A heap address. /// \param trace A buffer to store the stack trace. /// \param size Size in bytes of the trace buffer. /// \param[out] thread_id The thread ID of the address. /// /// \returns Returns the number of stored frames or 0 on error. size_t SANITIZER_CDECL __asan_get_free_stack(void *addr, void **trace, size_t size, int *thread_id); /// Gets the current shadow memory mapping (useful for calling from the /// debugger). /// /// \param[out] shadow_scale Shadow scale value. /// \param[out] shadow_offset Offset value. void SANITIZER_CDECL __asan_get_shadow_mapping(size_t *shadow_scale, size_t *shadow_offset); /// This is an internal function that is called to report an error. However, /// it is still a part of the interface because you might want to set a /// breakpoint on this function in the debugger. /// /// \param pc pc value of the ASan error. /// \param bp bp value of the ASan error. /// \param sp sp value of the ASan error. /// \param addr Address of the ASan error. /// \param is_write True if the error is a write error; false otherwise. /// \param access_size Size of the memory access of the ASan error. void SANITIZER_CDECL __asan_report_error(void *pc, void *bp, void *sp, void *addr, int is_write, size_t access_size); // Deprecated. Call __sanitizer_set_death_callback instead. void SANITIZER_CDECL __asan_set_death_callback(void (*callback)(void)); /// Sets the callback function to be called during ASan error reporting. /// /// The callback provides a string pointer to the report. /// /// \param callback User-provided function. void SANITIZER_CDECL __asan_set_error_report_callback(void (*callback)(const char *)); /// User-provided callback on ASan errors. /// /// You can provide a function that would be called immediately when ASan /// detects an error. This is useful in cases when ASan detects an error but /// your program crashes before the ASan report is printed. void SANITIZER_CDECL __asan_on_error(void); /// Prints accumulated statistics to stderr (useful for calling from the /// debugger). void SANITIZER_CDECL __asan_print_accumulated_stats(void); /// User-provided default option settings. /// /// You can provide your own implementation of this function to return a string /// containing ASan runtime options (for example, /// verbosity=1:halt_on_error=0). /// /// \returns Default options string. const char *SANITIZER_CDECL __asan_default_options(void); // The following two functions facilitate garbage collection in presence of // ASan's fake stack. /// Gets an opaque handler to the current thread's fake stack. /// /// Returns an opaque handler to be used by /// __asan_addr_is_in_fake_stack(). Returns NULL if the current thread /// does not have a fake stack. /// /// \returns An opaque handler to the fake stack or NULL. void *SANITIZER_CDECL __asan_get_current_fake_stack(void); /// Checks if an address belongs to a given fake stack. /// /// If fake_stack is non-NULL and addr belongs to a /// fake frame in fake_stack, returns the address of the real /// stack that corresponds to the fake frame and sets beg and /// end to the boundaries of this fake frame. Otherwise returns /// NULL and does not touch beg and end. /// /// If beg or end are NULL, they are not touched. /// /// \note This function can be called from a thread other than the owner of /// fake_stack, but the owner thread needs to be alive. /// /// \param fake_stack An opaque handler to a fake stack. /// \param addr Address to test. /// \param[out] beg Beginning of fake frame. /// \param[out] end End of fake frame. /// \returns Stack address or NULL. void *SANITIZER_CDECL __asan_addr_is_in_fake_stack(void *fake_stack, void *addr, void **beg, void **end); /// Performs shadow memory cleanup of the current thread's stack before a /// function marked with the [[noreturn]] attribute is called. /// /// To avoid false positives on the stack, must be called before no-return /// functions like _exit() and execl(). void SANITIZER_CDECL __asan_handle_no_return(void); /// Update allocation stack trace for the given allocation to the current stack /// trace. Returns 1 if successful, 0 if not. int SANITIZER_CDECL __asan_update_allocation_context(void *addr); #ifdef __cplusplus } // extern "C" #endif #endif // SANITIZER_ASAN_INTERFACE_H sanitizer/tsan_interface.hcorpusroot/units/[:alpha:][:word:]external/regex-re2/re2/dfa.cc(?,?)[^\x00-\x{10ffff}]-AhomBopomofoChamGlagoliticJavaneseLmMende_KikakuiNlOld_HungarianRunicSc*** %s received at time=%ld%s *** signed charlong longdecimal64ptaSpLStiostream%s@ %*p (unknown) %s %s@ %*p %9d %s %s@ %*p (unknown) UNKNOWNj <= static_cast(ABSL_ARRAYSIZE(this->zeroes_))tree->begin() <= tree->end() %pexternal/boringssl/src/crypto/fipsmodule/bn/div.cexternal/boringssl/src/crypto/fipsmodule/bn/jacobi.cexternal/boringssl/src/crypto/fipsmodule/ecdsa/ecdsa.cexternal/boringssl/src/crypto/fipsmodule/hkdf/hkdf.cexternal/boringssl/src/crypto/fipsmodule/ec/ec_montgomery.c/dev/urandomECDH_LIBUSERexternal/boringssl/src/crypto/evp/p_dh_asn1.cbasic_stringkythe/proto/buildinfo.protokythe.proto.common.MarkedSource.post_textCheck failed: absl::EndsWith(output_file_, ".kzip")Imported was set to __clang_cuda_builtin_vars.h/*===---- __stdarg_va_list.h - Definition of va_list -----------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef _VA_LIST #define _VA_LIST typedef __builtin_va_list va_list; #endif /*===---- arm_cde.h - ARM CDE intrinsics -----------------------------------=== * * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __ARM_CDE_H #define __ARM_CDE_H #if !__ARM_FEATURE_CDE #error "CDE support not enabled" #endif #include #ifdef __cplusplus extern "C" { #endif static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_cx1))) uint32_t __arm_cx1(int, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_cx1a))) uint32_t __arm_cx1a(int, uint32_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_cx1d))) uint64_t __arm_cx1d(int, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_cx1da))) uint64_t __arm_cx1da(int, uint64_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_cx2))) uint32_t __arm_cx2(int, uint32_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_cx2a))) uint32_t __arm_cx2a(int, uint32_t, uint32_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_cx2d))) uint64_t __arm_cx2d(int, uint32_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_cx2da))) uint64_t __arm_cx2da(int, uint64_t, uint32_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_cx3))) uint32_t __arm_cx3(int, uint32_t, uint32_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_cx3a))) uint32_t __arm_cx3a(int, uint32_t, uint32_t, uint32_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_cx3d))) uint64_t __arm_cx3d(int, uint32_t, uint32_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_cx3da))) uint64_t __arm_cx3da(int, uint64_t, uint32_t, uint32_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_vcx1_u32))) uint32_t __arm_vcx1_u32(int, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_vcx1a_u32))) uint32_t __arm_vcx1a_u32(int, uint32_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_vcx1d_u64))) uint64_t __arm_vcx1d_u64(int, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_vcx1da_u64))) uint64_t __arm_vcx1da_u64(int, uint64_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_vcx2_u32))) uint32_t __arm_vcx2_u32(int, uint32_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_vcx2a_u32))) uint32_t __arm_vcx2a_u32(int, uint32_t, uint32_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_vcx2d_u64))) uint64_t __arm_vcx2d_u64(int, uint64_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_vcx2da_u64))) uint64_t __arm_vcx2da_u64(int, uint64_t, uint64_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_vcx3_u32))) uint32_t __arm_vcx3_u32(int, uint32_t, uint32_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_vcx3a_u32))) uint32_t __arm_vcx3a_u32(int, uint32_t, uint32_t, uint32_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_vcx3d_u64))) uint64_t __arm_vcx3d_u64(int, uint64_t, uint64_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_vcx3da_u64))) uint64_t __arm_vcx3da_u64(int, uint64_t, uint64_t, uint64_t, uint32_t); #if __ARM_FEATURE_MVE typedef uint16_t mve_pred16_t; typedef __attribute__((__neon_vector_type__(8), __clang_arm_mve_strict_polymorphism)) int16_t int16x8_t; typedef __attribute__((__neon_vector_type__(4), __clang_arm_mve_strict_polymorphism)) int32_t int32x4_t; typedef __attribute__((__neon_vector_type__(2), __clang_arm_mve_strict_polymorphism)) int64_t int64x2_t; typedef __attribute__((__neon_vector_type__(16), __clang_arm_mve_strict_polymorphism)) int8_t int8x16_t; typedef __attribute__((__neon_vector_type__(8), __clang_arm_mve_strict_polymorphism)) uint16_t uint16x8_t; typedef __attribute__((__neon_vector_type__(4), __clang_arm_mve_strict_polymorphism)) uint32_t uint32x4_t; typedef __attribute__((__neon_vector_type__(2), __clang_arm_mve_strict_polymorphism)) uint64_t uint64x2_t; typedef __attribute__((__neon_vector_type__(16), __clang_arm_mve_strict_polymorphism)) uint8_t uint8x16_t; static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1q_m_s16))) int16x8_t __arm_vcx1q_m(int, int16x8_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1q_m_s32))) int32x4_t __arm_vcx1q_m(int, int32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1q_m_s64))) int64x2_t __arm_vcx1q_m(int, int64x2_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1q_m_s8))) int8x16_t __arm_vcx1q_m(int, int8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1q_m_u16))) uint16x8_t __arm_vcx1q_m(int, uint16x8_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1q_m_u32))) uint32x4_t __arm_vcx1q_m(int, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1q_m_u64))) uint64x2_t __arm_vcx1q_m(int, uint64x2_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1q_m_u8))) uint8x16_t __arm_vcx1q_m(int, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_vcx1q_u8))) uint8x16_t __arm_vcx1q_u8(int, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1qa_m_s16))) int16x8_t __arm_vcx1qa_m(int, int16x8_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1qa_m_s32))) int32x4_t __arm_vcx1qa_m(int, int32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1qa_m_s64))) int64x2_t __arm_vcx1qa_m(int, int64x2_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1qa_m_s8))) int8x16_t __arm_vcx1qa_m(int, int8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1qa_m_u16))) uint16x8_t __arm_vcx1qa_m(int, uint16x8_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1qa_m_u32))) uint32x4_t __arm_vcx1qa_m(int, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1qa_m_u64))) uint64x2_t __arm_vcx1qa_m(int, uint64x2_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1qa_m_u8))) uint8x16_t __arm_vcx1qa_m(int, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1qa_s16))) int16x8_t __arm_vcx1qa(int, int16x8_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1qa_s32))) int32x4_t __arm_vcx1qa(int, int32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1qa_s64))) int64x2_t __arm_vcx1qa(int, int64x2_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1qa_s8))) int8x16_t __arm_vcx1qa(int, int8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1qa_u16))) uint16x8_t __arm_vcx1qa(int, uint16x8_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1qa_u32))) uint32x4_t __arm_vcx1qa(int, uint32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1qa_u64))) uint64x2_t __arm_vcx1qa(int, uint64x2_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1qa_u8))) uint8x16_t __arm_vcx1qa(int, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_m_impl_s16))) int16x8_t __arm_vcx2q_m_impl(int, int16x8_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_m_impl_s32))) int32x4_t __arm_vcx2q_m_impl(int, int32x4_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_m_impl_s64))) int64x2_t __arm_vcx2q_m_impl(int, int64x2_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_m_impl_s8))) int8x16_t __arm_vcx2q_m_impl(int, int8x16_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_m_impl_u16))) uint16x8_t __arm_vcx2q_m_impl(int, uint16x8_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_m_impl_u32))) uint32x4_t __arm_vcx2q_m_impl(int, uint32x4_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_m_impl_u64))) uint64x2_t __arm_vcx2q_m_impl(int, uint64x2_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_m_impl_u8))) uint8x16_t __arm_vcx2q_m_impl(int, uint8x16_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_s16))) int16x8_t __arm_vcx2q(int, int16x8_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_s32))) int32x4_t __arm_vcx2q(int, int32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_s64))) int64x2_t __arm_vcx2q(int, int64x2_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_s8))) int8x16_t __arm_vcx2q(int, int8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_u16))) uint16x8_t __arm_vcx2q(int, uint16x8_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_u32))) uint32x4_t __arm_vcx2q(int, uint32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_u64))) uint64x2_t __arm_vcx2q(int, uint64x2_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_u8))) uint8x16_t __arm_vcx2q(int, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_u8_s16))) uint8x16_t __arm_vcx2q_u8(int, int16x8_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_u8_s32))) uint8x16_t __arm_vcx2q_u8(int, int32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_u8_s64))) uint8x16_t __arm_vcx2q_u8(int, int64x2_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_u8_s8))) uint8x16_t __arm_vcx2q_u8(int, int8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_u8_u16))) uint8x16_t __arm_vcx2q_u8(int, uint16x8_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_u8_u32))) uint8x16_t __arm_vcx2q_u8(int, uint32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_u8_u64))) uint8x16_t __arm_vcx2q_u8(int, uint64x2_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_u8_u8))) uint8x16_t __arm_vcx2q_u8(int, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2qa_impl_s16))) int16x8_t __arm_vcx2qa_impl(int, int16x8_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2qa_impl_s32))) int32x4_t __arm_vcx2qa_impl(int, int32x4_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2qa_impl_s64))) int64x2_t __arm_vcx2qa_impl(int, int64x2_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2qa_impl_s8))) int8x16_t __arm_vcx2qa_impl(int, int8x16_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2qa_impl_u16))) uint16x8_t __arm_vcx2qa_impl(int, uint16x8_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2qa_impl_u32))) uint32x4_t __arm_vcx2qa_impl(int, uint32x4_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2qa_impl_u64))) uint64x2_t __arm_vcx2qa_impl(int, uint64x2_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2qa_impl_u8))) uint8x16_t __arm_vcx2qa_impl(int, uint8x16_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2qa_m_impl_s16))) int16x8_t __arm_vcx2qa_m_impl(int, int16x8_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2qa_m_impl_s32))) int32x4_t __arm_vcx2qa_m_impl(int, int32x4_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2qa_m_impl_s64))) int64x2_t __arm_vcx2qa_m_impl(int, int64x2_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2qa_m_impl_s8))) int8x16_t __arm_vcx2qa_m_impl(int, int8x16_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2qa_m_impl_u16))) uint16x8_t __arm_vcx2qa_m_impl(int, uint16x8_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2qa_m_impl_u32))) uint32x4_t __arm_vcx2qa_m_impl(int, uint32x4_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2qa_m_impl_u64))) uint64x2_t __arm_vcx2qa_m_impl(int, uint64x2_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2qa_m_impl_u8))) uint8x16_t __arm_vcx2qa_m_impl(int, uint8x16_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_impl_s16))) int16x8_t __arm_vcx3q_impl(int, int16x8_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_impl_s32))) int32x4_t __arm_vcx3q_impl(int, int32x4_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_impl_s64))) int64x2_t __arm_vcx3q_impl(int, int64x2_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_impl_s8))) int8x16_t __arm_vcx3q_impl(int, int8x16_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_impl_u16))) uint16x8_t __arm_vcx3q_impl(int, uint16x8_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_impl_u32))) uint32x4_t __arm_vcx3q_impl(int, uint32x4_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_impl_u64))) uint64x2_t __arm_vcx3q_impl(int, uint64x2_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_impl_u8))) uint8x16_t __arm_vcx3q_impl(int, uint8x16_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_m_impl_s16))) int16x8_t __arm_vcx3q_m_impl(int, int16x8_t, uint8x16_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_m_impl_s32))) int32x4_t __arm_vcx3q_m_impl(int, int32x4_t, uint8x16_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_m_impl_s64))) int64x2_t __arm_vcx3q_m_impl(int, int64x2_t, uint8x16_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_m_impl_s8))) int8x16_t __arm_vcx3q_m_impl(int, int8x16_t, uint8x16_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_m_impl_u16))) uint16x8_t __arm_vcx3q_m_impl(int, uint16x8_t, uint8x16_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_m_impl_u32))) uint32x4_t __arm_vcx3q_m_impl(int, uint32x4_t, uint8x16_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_m_impl_u64))) uint64x2_t __arm_vcx3q_m_impl(int, uint64x2_t, uint8x16_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_m_impl_u8))) uint8x16_t __arm_vcx3q_m_impl(int, uint8x16_t, uint8x16_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_u8_impl_s16))) uint8x16_t __arm_vcx3q_u8_impl(int, int16x8_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_u8_impl_s32))) uint8x16_t __arm_vcx3q_u8_impl(int, int32x4_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_u8_impl_s64))) uint8x16_t __arm_vcx3q_u8_impl(int, int64x2_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_u8_impl_s8))) uint8x16_t __arm_vcx3q_u8_impl(int, int8x16_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_u8_impl_u16))) uint8x16_t __arm_vcx3q_u8_impl(int, uint16x8_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_u8_impl_u32))) uint8x16_t __arm_vcx3q_u8_impl(int, uint32x4_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_u8_impl_u64))) uint8x16_t __arm_vcx3q_u8_impl(int, uint64x2_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_u8_impl_u8))) uint8x16_t __arm_vcx3q_u8_impl(int, uint8x16_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3qa_impl_s16))) int16x8_t __arm_vcx3qa_impl(int, int16x8_t, uint8x16_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3qa_impl_s32))) int32x4_t __arm_vcx3qa_impl(int, int32x4_t, uint8x16_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3qa_impl_s64))) int64x2_t __arm_vcx3qa_impl(int, int64x2_t, uint8x16_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3qa_impl_s8))) int8x16_t __arm_vcx3qa_impl(int, int8x16_t, uint8x16_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3qa_impl_u16))) uint16x8_t __arm_vcx3qa_impl(int, uint16x8_t, uint8x16_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3qa_impl_u32))) uint32x4_t __arm_vcx3qa_impl(int, uint32x4_t, uint8x16_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3qa_impl_u64))) uint64x2_t __arm_vcx3qa_impl(int, uint64x2_t, uint8x16_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3qa_impl_u8))) uint8x16_t __arm_vcx3qa_impl(int, uint8x16_t, uint8x16_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3qa_m_impl_s16))) int16x8_t __arm_vcx3qa_m_impl(int, int16x8_t, uint8x16_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3qa_m_impl_s32))) int32x4_t __arm_vcx3qa_m_impl(int, int32x4_t, uint8x16_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3qa_m_impl_s64))) int64x2_t __arm_vcx3qa_m_impl(int, int64x2_t, uint8x16_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3qa_m_impl_s8))) int8x16_t __arm_vcx3qa_m_impl(int, int8x16_t, uint8x16_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3qa_m_impl_u16))) uint16x8_t __arm_vcx3qa_m_impl(int, uint16x8_t, uint8x16_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3qa_m_impl_u32))) uint32x4_t __arm_vcx3qa_m_impl(int, uint32x4_t, uint8x16_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3qa_m_impl_u64))) uint64x2_t __arm_vcx3qa_m_impl(int, uint64x2_t, uint8x16_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3qa_m_impl_u8))) uint8x16_t __arm_vcx3qa_m_impl(int, uint8x16_t, uint8x16_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_u8))) int16x8_t __arm_vreinterpretq_s16_u8(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_u8))) int32x4_t __arm_vreinterpretq_s32_u8(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_u8))) int64x2_t __arm_vreinterpretq_s64_u8(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_u8))) int8x16_t __arm_vreinterpretq_s8_u8(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_u8))) uint16x8_t __arm_vreinterpretq_u16_u8(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_u8))) uint32x4_t __arm_vreinterpretq_u32_u8(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_u8))) uint64x2_t __arm_vreinterpretq_u64_u8(uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_s16))) uint8x16_t __arm_vreinterpretq_u8(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_s32))) uint8x16_t __arm_vreinterpretq_u8(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_s64))) uint8x16_t __arm_vreinterpretq_u8(int64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_s8))) uint8x16_t __arm_vreinterpretq_u8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_u16))) uint8x16_t __arm_vreinterpretq_u8(uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_u32))) uint8x16_t __arm_vreinterpretq_u8(uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_u64))) uint8x16_t __arm_vreinterpretq_u8(uint64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vreinterpretq_u8_u8))) uint8x16_t __arm_vreinterpretq_u8(uint8x16_t); #define __arm_vcx2q_m(cp, inactive, n, imm, pred) __arm_vcx2q_m_impl((cp), (inactive), __arm_vreinterpretq_u8(n), (imm), (pred)) #define __arm_vcx2qa(cp, acc, n, imm) __arm_vcx2qa_impl((cp), (acc), __arm_vreinterpretq_u8(n), (imm)) #define __arm_vcx2qa_m(cp, acc, n, imm, pred) __arm_vcx2qa_m_impl((cp), (acc), __arm_vreinterpretq_u8(n), (imm), (pred)) #define __arm_vcx3q(cp, n, m, imm) __arm_vcx3q_impl((cp), (n), __arm_vreinterpretq_u8(m), (imm)) #define __arm_vcx3q_m(cp, inactive, n, m, imm, pred) __arm_vcx3q_m_impl((cp), (inactive), __arm_vreinterpretq_u8(n), __arm_vreinterpretq_u8(m), (imm), (pred)) #define __arm_vcx3q_u8(cp, n, m, imm) __arm_vcx3q_u8_impl((cp), (n), __arm_vreinterpretq_u8(m), (imm)) #define __arm_vcx3qa(cp, acc, n, m, imm) __arm_vcx3qa_impl((cp), (acc), __arm_vreinterpretq_u8(n), __arm_vreinterpretq_u8(m), (imm)) #define __arm_vcx3qa_m(cp, acc, n, m, imm, pred) __arm_vcx3qa_m_impl((cp), (acc), __arm_vreinterpretq_u8(n), __arm_vreinterpretq_u8(m), (imm), (pred)) #endif /* __ARM_FEATURE_MVE */ #if __ARM_FEATURE_MVE & 2 typedef __fp16 float16_t; typedef float float32_t; typedef __attribute__((__neon_vector_type__(8), __clang_arm_mve_strict_polymorphism)) float16_t float16x8_t; typedef __attribute__((__neon_vector_type__(4), __clang_arm_mve_strict_polymorphism)) float32_t float32x4_t; static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1q_m_f16))) float16x8_t __arm_vcx1q_m(int, float16x8_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1q_m_f32))) float32x4_t __arm_vcx1q_m(int, float32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1qa_f16))) float16x8_t __arm_vcx1qa(int, float16x8_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1qa_f32))) float32x4_t __arm_vcx1qa(int, float32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1qa_m_f16))) float16x8_t __arm_vcx1qa_m(int, float16x8_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1qa_m_f32))) float32x4_t __arm_vcx1qa_m(int, float32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_f16))) float16x8_t __arm_vcx2q(int, float16x8_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_f32))) float32x4_t __arm_vcx2q(int, float32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_m_impl_f16))) float16x8_t __arm_vcx2q_m_impl(int, float16x8_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_m_impl_f32))) float32x4_t __arm_vcx2q_m_impl(int, float32x4_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_u8_f16))) uint8x16_t __arm_vcx2q_u8(int, float16x8_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_u8_f32))) uint8x16_t __arm_vcx2q_u8(int, float32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2qa_impl_f16))) float16x8_t __arm_vcx2qa_impl(int, float16x8_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2qa_impl_f32))) float32x4_t __arm_vcx2qa_impl(int, float32x4_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2qa_m_impl_f16))) float16x8_t __arm_vcx2qa_m_impl(int, float16x8_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2qa_m_impl_f32))) float32x4_t __arm_vcx2qa_m_impl(int, float32x4_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_impl_f16))) float16x8_t __arm_vcx3q_impl(int, float16x8_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_impl_f32))) float32x4_t __arm_vcx3q_impl(int, float32x4_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_m_impl_f16))) float16x8_t __arm_vcx3q_m_impl(int, float16x8_t, uint8x16_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_m_impl_f32))) float32x4_t __arm_vcx3q_m_impl(int, float32x4_t, uint8x16_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_u8_impl_f16))) uint8x16_t __arm_vcx3q_u8_impl(int, float16x8_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_u8_impl_f32))) uint8x16_t __arm_vcx3q_u8_impl(int, float32x4_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3qa_impl_f16))) float16x8_t __arm_vcx3qa_impl(int, float16x8_t, uint8x16_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3qa_impl_f32))) float32x4_t __arm_vcx3qa_impl(int, float32x4_t, uint8x16_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3qa_m_impl_f16))) float16x8_t __arm_vcx3qa_m_impl(int, float16x8_t, uint8x16_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3qa_m_impl_f32))) float32x4_t __arm_vcx3qa_m_impl(int, float32x4_t, uint8x16_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_u8))) float16x8_t __arm_vreinterpretq_f16_u8(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_u8))) float32x4_t __arm_vreinterpretq_f32_u8(uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_f16))) uint8x16_t __arm_vreinterpretq_u8(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_f32))) uint8x16_t __arm_vreinterpretq_u8(float32x4_t); #endif /* __ARM_FEATURE_MVE & 2 */ #ifdef __cplusplus } /* extern "C" */ #endif #endif /* __ARM_CDE_H */ avx512fintrin.h/*===----------------- gfniintrin.h - GFNI intrinsics ----------------------=== * * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __GFNIINTRIN_H #define __GFNIINTRIN_H /* Default attributes for simple form (no masking). */ #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, \ __target__("gfni,no-evex512"), __min_vector_width__(128))) /* Default attributes for YMM unmasked form. */ #define __DEFAULT_FN_ATTRS_Y \ __attribute__((__always_inline__, __nodebug__, \ __target__("avx,gfni,no-evex512"), \ __min_vector_width__(256))) /* Default attributes for ZMM unmasked forms. */ #define __DEFAULT_FN_ATTRS_Z \ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512f,evex512,gfni"), \ __min_vector_width__(512))) /* Default attributes for ZMM masked forms. */ #define __DEFAULT_FN_ATTRS_Z_MASK \ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512bw,evex512,gfni"), \ __min_vector_width__(512))) /* Default attributes for VLX masked forms. */ #define __DEFAULT_FN_ATTRS_VL128 \ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512bw,avx512vl,gfni,no-evex512"), \ __min_vector_width__(128))) #define __DEFAULT_FN_ATTRS_VL256 \ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512bw,avx512vl,gfni,no-evex512"), \ __min_vector_width__(256))) #define _mm_gf2p8affineinv_epi64_epi8(A, B, I) \ ((__m128i)__builtin_ia32_vgf2p8affineinvqb_v16qi((__v16qi)(__m128i)(A), \ (__v16qi)(__m128i)(B), \ (char)(I))) #define _mm_gf2p8affine_epi64_epi8(A, B, I) \ ((__m128i)__builtin_ia32_vgf2p8affineqb_v16qi((__v16qi)(__m128i)(A), \ (__v16qi)(__m128i)(B), \ (char)(I))) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_gf2p8mul_epi8(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vgf2p8mulb_v16qi((__v16qi) __A, (__v16qi) __B); } #ifdef __AVXINTRIN_H #define _mm256_gf2p8affineinv_epi64_epi8(A, B, I) \ ((__m256i)__builtin_ia32_vgf2p8affineinvqb_v32qi((__v32qi)(__m256i)(A), \ (__v32qi)(__m256i)(B), \ (char)(I))) #define _mm256_gf2p8affine_epi64_epi8(A, B, I) \ ((__m256i)__builtin_ia32_vgf2p8affineqb_v32qi((__v32qi)(__m256i)(A), \ (__v32qi)(__m256i)(B), \ (char)(I))) static __inline__ __m256i __DEFAULT_FN_ATTRS_Y _mm256_gf2p8mul_epi8(__m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_vgf2p8mulb_v32qi((__v32qi) __A, (__v32qi) __B); } #endif /* __AVXINTRIN_H */ #ifdef __AVX512BWINTRIN_H #define _mm512_gf2p8affineinv_epi64_epi8(A, B, I) \ ((__m512i)__builtin_ia32_vgf2p8affineinvqb_v64qi((__v64qi)(__m512i)(A), \ (__v64qi)(__m512i)(B), \ (char)(I))) #define _mm512_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) \ ((__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \ (__v64qi)_mm512_gf2p8affineinv_epi64_epi8(A, B, I), \ (__v64qi)(__m512i)(S))) #define _mm512_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) \ _mm512_mask_gf2p8affineinv_epi64_epi8((__m512i)_mm512_setzero_si512(), \ U, A, B, I) #define _mm512_gf2p8affine_epi64_epi8(A, B, I) \ ((__m512i)__builtin_ia32_vgf2p8affineqb_v64qi((__v64qi)(__m512i)(A), \ (__v64qi)(__m512i)(B), \ (char)(I))) #define _mm512_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) \ ((__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \ (__v64qi)_mm512_gf2p8affine_epi64_epi8((A), (B), (I)), \ (__v64qi)(__m512i)(S))) #define _mm512_maskz_gf2p8affine_epi64_epi8(U, A, B, I) \ _mm512_mask_gf2p8affine_epi64_epi8((__m512i)_mm512_setzero_si512(), \ U, A, B, I) static __inline__ __m512i __DEFAULT_FN_ATTRS_Z _mm512_gf2p8mul_epi8(__m512i __A, __m512i __B) { return (__m512i) __builtin_ia32_vgf2p8mulb_v64qi((__v64qi) __A, (__v64qi) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS_Z_MASK _mm512_mask_gf2p8mul_epi8(__m512i __S, __mmask64 __U, __m512i __A, __m512i __B) { return (__m512i) __builtin_ia32_selectb_512(__U, (__v64qi) _mm512_gf2p8mul_epi8(__A, __B), (__v64qi) __S); } static __inline__ __m512i __DEFAULT_FN_ATTRS_Z_MASK _mm512_maskz_gf2p8mul_epi8(__mmask64 __U, __m512i __A, __m512i __B) { return _mm512_mask_gf2p8mul_epi8((__m512i)_mm512_setzero_si512(), __U, __A, __B); } #endif /* __AVX512BWINTRIN_H */ #ifdef __AVX512VLBWINTRIN_H #define _mm_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) \ ((__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \ (__v16qi)_mm_gf2p8affineinv_epi64_epi8(A, B, I), \ (__v16qi)(__m128i)(S))) #define _mm_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) \ _mm_mask_gf2p8affineinv_epi64_epi8((__m128i)_mm_setzero_si128(), \ U, A, B, I) #define _mm256_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) \ ((__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \ (__v32qi)_mm256_gf2p8affineinv_epi64_epi8(A, B, I), \ (__v32qi)(__m256i)(S))) #define _mm256_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) \ _mm256_mask_gf2p8affineinv_epi64_epi8((__m256i)_mm256_setzero_si256(), \ U, A, B, I) #define _mm_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) \ ((__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \ (__v16qi)_mm_gf2p8affine_epi64_epi8(A, B, I), \ (__v16qi)(__m128i)(S))) #define _mm_maskz_gf2p8affine_epi64_epi8(U, A, B, I) \ _mm_mask_gf2p8affine_epi64_epi8((__m128i)_mm_setzero_si128(), U, A, B, I) #define _mm256_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) \ ((__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \ (__v32qi)_mm256_gf2p8affine_epi64_epi8(A, B, I), \ (__v32qi)(__m256i)(S))) #define _mm256_maskz_gf2p8affine_epi64_epi8(U, A, B, I) \ _mm256_mask_gf2p8affine_epi64_epi8((__m256i)_mm256_setzero_si256(), \ U, A, B, I) static __inline__ __m128i __DEFAULT_FN_ATTRS_VL128 _mm_mask_gf2p8mul_epi8(__m128i __S, __mmask16 __U, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_selectb_128(__U, (__v16qi) _mm_gf2p8mul_epi8(__A, __B), (__v16qi) __S); } static __inline__ __m128i __DEFAULT_FN_ATTRS_VL128 _mm_maskz_gf2p8mul_epi8(__mmask16 __U, __m128i __A, __m128i __B) { return _mm_mask_gf2p8mul_epi8((__m128i)_mm_setzero_si128(), __U, __A, __B); } static __inline__ __m256i __DEFAULT_FN_ATTRS_VL256 _mm256_mask_gf2p8mul_epi8(__m256i __S, __mmask32 __U, __m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_selectb_256(__U, (__v32qi) _mm256_gf2p8mul_epi8(__A, __B), (__v32qi) __S); } static __inline__ __m256i __DEFAULT_FN_ATTRS_VL256 _mm256_maskz_gf2p8mul_epi8(__mmask32 __U, __m256i __A, __m256i __B) { return _mm256_mask_gf2p8mul_epi8((__m256i)_mm256_setzero_si256(), __U, __A, __B); } #endif /* __AVX512VLBWINTRIN_H */ #undef __DEFAULT_FN_ATTRS #undef __DEFAULT_FN_ATTRS_Y #undef __DEFAULT_FN_ATTRS_Z #undef __DEFAULT_FN_ATTRS_VL128 #undef __DEFAULT_FN_ATTRS_VL256 #endif /* __GFNIINTRIN_H */ /*===---- immintrin.h - Intel intrinsics -----------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #define __IMMINTRIN_H #if !defined(__i386__) && !defined(__x86_64__) #error "This header is only meant to be used on x86 and x64 architecture" #endif #include #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__MMX__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__SSE__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__SSE2__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__SSE3__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__SSSE3__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ (defined(__SSE4_2__) || defined(__SSE4_1__)) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ (defined(__AES__) || defined(__PCLMUL__)) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__CLFLUSHOPT__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__CLWB__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVX__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVX2__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__F16C__) #include #endif /* No feature check desired due to internal checks */ #include #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__BMI2__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__LZCNT__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__POPCNT__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__FMA__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVX512F__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVX512VL__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVX512BW__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVX512BITALG__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVX512CD__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVX512VPOPCNTDQ__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ (defined(__AVX512VL__) && defined(__AVX512VPOPCNTDQ__)) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVX512VNNI__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ (defined(__AVX512VL__) && defined(__AVX512VNNI__)) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVXVNNI__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVX512DQ__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ (defined(__AVX512VL__) && defined(__AVX512BITALG__)) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ (defined(__AVX512VL__) && defined(__AVX512BW__)) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ (defined(__AVX512VL__) && defined(__AVX512CD__)) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ (defined(__AVX512VL__) && defined(__AVX512DQ__)) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVX512ER__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVX512IFMA__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ (defined(__AVX512IFMA__) && defined(__AVX512VL__)) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVXIFMA__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVX512VBMI__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ (defined(__AVX512VBMI__) && defined(__AVX512VL__)) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVX512VBMI2__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ (defined(__AVX512VBMI2__) && defined(__AVX512VL__)) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVX512PF__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVX512FP16__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ (defined(__AVX512VL__) && defined(__AVX512FP16__)) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVX512BF16__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ (defined(__AVX512VL__) && defined(__AVX512BF16__)) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__PKU__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__VPCLMULQDQ__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__VAES__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__GFNI__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVXVNNIINT8__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVXNECONVERT__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__SHA512__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__SM3__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__SM4__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVXVNNIINT16__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__RDPID__) /// Reads the value of the IA32_TSC_AUX MSR (0xc0000103). /// /// \headerfile /// /// This intrinsic corresponds to the RDPID instruction. /// /// \returns The 32-bit contents of the MSR. static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("rdpid"))) _rdpid_u32(void) { return __builtin_ia32_rdpid(); } #endif // __RDPID__ #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__RDRND__) /// Returns a 16-bit hardware-generated random value. /// /// \headerfile /// /// This intrinsic corresponds to the RDRAND instruction. /// /// \param __p /// A pointer to a 16-bit memory location to place the random value. /// \returns 1 if the value was successfully generated, 0 otherwise. static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) _rdrand16_step(unsigned short *__p) { return (int)__builtin_ia32_rdrand16_step(__p); } /// Returns a 32-bit hardware-generated random value. /// /// \headerfile /// /// This intrinsic corresponds to the RDRAND instruction. /// /// \param __p /// A pointer to a 32-bit memory location to place the random value. /// \returns 1 if the value was successfully generated, 0 otherwise. static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) _rdrand32_step(unsigned int *__p) { return (int)__builtin_ia32_rdrand32_step(__p); } /// Returns a 64-bit hardware-generated random value. /// /// \headerfile /// /// This intrinsic corresponds to the RDRAND instruction. /// /// \param __p /// A pointer to a 64-bit memory location to place the random value. /// \returns 1 if the value was successfully generated, 0 otherwise. static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) _rdrand64_step(unsigned long long *__p) { #ifdef __x86_64__ return (int)__builtin_ia32_rdrand64_step(__p); #else // We need to emulate the functionality of 64-bit rdrand with 2 32-bit // rdrand instructions. unsigned int __lo, __hi; unsigned int __res_lo = __builtin_ia32_rdrand32_step(&__lo); unsigned int __res_hi = __builtin_ia32_rdrand32_step(&__hi); if (__res_lo && __res_hi) { *__p = ((unsigned long long)__hi << 32) | (unsigned long long)__lo; return 1; } else { *__p = 0; return 0; } #endif } #endif /* __RDRND__ */ #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__FSGSBASE__) #ifdef __x86_64__ /// Reads the FS base register. /// /// \headerfile /// /// This intrinsic corresponds to the RDFSBASE instruction. /// /// \returns The lower 32 bits of the FS base register. static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) _readfsbase_u32(void) { return __builtin_ia32_rdfsbase32(); } /// Reads the FS base register. /// /// \headerfile /// /// This intrinsic corresponds to the RDFSBASE instruction. /// /// \returns The contents of the FS base register. static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) _readfsbase_u64(void) { return __builtin_ia32_rdfsbase64(); } /// Reads the GS base register. /// /// \headerfile /// /// This intrinsic corresponds to the RDGSBASE instruction. /// /// \returns The lower 32 bits of the GS base register. static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) _readgsbase_u32(void) { return __builtin_ia32_rdgsbase32(); } /// Reads the GS base register. /// /// \headerfile /// /// This intrinsic corresponds to the RDGSBASE instruction. /// /// \returns The contents of the GS base register. static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) _readgsbase_u64(void) { return __builtin_ia32_rdgsbase64(); } /// Modifies the FS base register. /// /// \headerfile /// /// This intrinsic corresponds to the WRFSBASE instruction. /// /// \param __V /// Value to use for the lower 32 bits of the FS base register. static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) _writefsbase_u32(unsigned int __V) { __builtin_ia32_wrfsbase32(__V); } /// Modifies the FS base register. /// /// \headerfile /// /// This intrinsic corresponds to the WRFSBASE instruction. /// /// \param __V /// Value to use for the FS base register. static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) _writefsbase_u64(unsigned long long __V) { __builtin_ia32_wrfsbase64(__V); } /// Modifies the GS base register. /// /// \headerfile /// /// This intrinsic corresponds to the WRGSBASE instruction. /// /// \param __V /// Value to use for the lower 32 bits of the GS base register. static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) _writegsbase_u32(unsigned int __V) { __builtin_ia32_wrgsbase32(__V); } /// Modifies the GS base register. /// /// \headerfile /// /// This intrinsic corresponds to the WRFSBASE instruction. /// /// \param __V /// Value to use for GS base register. static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) _writegsbase_u64(unsigned long long __V) { __builtin_ia32_wrgsbase64(__V); } #endif #endif /* __FSGSBASE__ */ #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__MOVBE__) /* The structs used below are to force the load/store to be unaligned. This * is accomplished with the __packed__ attribute. The __may_alias__ prevents * tbaa metadata from being generated based on the struct and the type of the * field inside of it. */ /// Load a 16-bit value from memory and swap its bytes. /// /// \headerfile /// /// This intrinsic corresponds to the MOVBE instruction. /// /// \param __P /// A pointer to the 16-bit value to load. /// \returns The byte-swapped value. static __inline__ short __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) _loadbe_i16(void const * __P) { struct __loadu_i16 { unsigned short __v; } __attribute__((__packed__, __may_alias__)); return (short)__builtin_bswap16(((const struct __loadu_i16*)__P)->__v); } /// Swap the bytes of a 16-bit value and store it to memory. /// /// \headerfile /// /// This intrinsic corresponds to the MOVBE instruction. /// /// \param __P /// A pointer to the memory for storing the swapped value. /// \param __D /// The 16-bit value to be byte-swapped. static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) _storebe_i16(void * __P, short __D) { struct __storeu_i16 { unsigned short __v; } __attribute__((__packed__, __may_alias__)); ((struct __storeu_i16*)__P)->__v = __builtin_bswap16((unsigned short)__D); } /// Load a 32-bit value from memory and swap its bytes. /// /// \headerfile /// /// This intrinsic corresponds to the MOVBE instruction. /// /// \param __P /// A pointer to the 32-bit value to load. /// \returns The byte-swapped value. static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) _loadbe_i32(void const * __P) { struct __loadu_i32 { unsigned int __v; } __attribute__((__packed__, __may_alias__)); return (int)__builtin_bswap32(((const struct __loadu_i32*)__P)->__v); } /// Swap the bytes of a 32-bit value and store it to memory. /// /// \headerfile /// /// This intrinsic corresponds to the MOVBE instruction. /// /// \param __P /// A pointer to the memory for storing the swapped value. /// \param __D /// The 32-bit value to be byte-swapped. static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) _storebe_i32(void * __P, int __D) { struct __storeu_i32 { unsigned int __v; } __attribute__((__packed__, __may_alias__)); ((struct __storeu_i32*)__P)->__v = __builtin_bswap32((unsigned int)__D); } #ifdef __x86_64__ /// Load a 64-bit value from memory and swap its bytes. /// /// \headerfile /// /// This intrinsic corresponds to the MOVBE instruction. /// /// \param __P /// A pointer to the 64-bit value to load. /// \returns The byte-swapped value. static __inline__ long long __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) _loadbe_i64(void const * __P) { struct __loadu_i64 { unsigned long long __v; } __attribute__((__packed__, __may_alias__)); return (long long)__builtin_bswap64(((const struct __loadu_i64*)__P)->__v); } /// Swap the bytes of a 64-bit value and store it to memory. /// /// \headerfile /// /// This intrinsic corresponds to the MOVBE instruction. /// /// \param __P /// A pointer to the memory for storing the swapped value. /// \param __D /// The 64-bit value to be byte-swapped. static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) _storebe_i64(void * __P, long long __D) { struct __storeu_i64 { unsigned long long __v; } __attribute__((__packed__, __may_alias__)); ((struct __storeu_i64*)__P)->__v = __builtin_bswap64((unsigned long long)__D); } #endif #endif /* __MOVBE */ #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__RTM__) #include #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__SHA__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__FXSR__) #include #endif /* No feature check desired due to internal MSC_VER checks */ #include #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__XSAVEOPT__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__XSAVEC__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__XSAVES__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__SHSTK__) #include #endif /* Intrinsics inside adcintrin.h are available at all times. */ #include #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__ADX__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__RDSEED__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__WBNOINVD__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__CLDEMOTE__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__WAITPKG__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__MOVDIRI__) || defined(__MOVDIR64B__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__PCONFIG__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__SGX__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__PTWRITE__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__INVPCID__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AMX_FP16__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__KL__) || defined(__WIDEKL__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AMX_TILE__) || defined(__AMX_INT8__) || defined(__AMX_BF16__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AMX_COMPLEX__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVX512VP2INTERSECT__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ (defined(__AVX512VL__) && defined(__AVX512VP2INTERSECT__)) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__ENQCMD__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__SERIALIZE__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__TSXLDTRK__) #include #endif #if defined(_MSC_VER) && __has_extension(gnu_asm) /* Define the default attributes for these intrinsics */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) #ifdef __cplusplus extern "C" { #endif /*----------------------------------------------------------------------------*\ |* Interlocked Exchange HLE \*----------------------------------------------------------------------------*/ #if defined(__i386__) || defined(__x86_64__) static __inline__ long __DEFAULT_FN_ATTRS _InterlockedExchange_HLEAcquire(long volatile *_Target, long _Value) { __asm__ __volatile__(".byte 0xf2 ; lock ; xchg {%0, %1|%1, %0}" : "+r" (_Value), "+m" (*_Target) :: "memory"); return _Value; } static __inline__ long __DEFAULT_FN_ATTRS _InterlockedExchange_HLERelease(long volatile *_Target, long _Value) { __asm__ __volatile__(".byte 0xf3 ; lock ; xchg {%0, %1|%1, %0}" : "+r" (_Value), "+m" (*_Target) :: "memory"); return _Value; } #endif #if defined(__x86_64__) static __inline__ __int64 __DEFAULT_FN_ATTRS _InterlockedExchange64_HLEAcquire(__int64 volatile *_Target, __int64 _Value) { __asm__ __volatile__(".byte 0xf2 ; lock ; xchg {%0, %1|%1, %0}" : "+r" (_Value), "+m" (*_Target) :: "memory"); return _Value; } static __inline__ __int64 __DEFAULT_FN_ATTRS _InterlockedExchange64_HLERelease(__int64 volatile *_Target, __int64 _Value) { __asm__ __volatile__(".byte 0xf3 ; lock ; xchg {%0, %1|%1, %0}" : "+r" (_Value), "+m" (*_Target) :: "memory"); return _Value; } #endif /*----------------------------------------------------------------------------*\ |* Interlocked Compare Exchange HLE \*----------------------------------------------------------------------------*/ #if defined(__i386__) || defined(__x86_64__) static __inline__ long __DEFAULT_FN_ATTRS _InterlockedCompareExchange_HLEAcquire(long volatile *_Destination, long _Exchange, long _Comparand) { __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg {%2, %1|%1, %2}" : "+a" (_Comparand), "+m" (*_Destination) : "r" (_Exchange) : "memory"); return _Comparand; } static __inline__ long __DEFAULT_FN_ATTRS _InterlockedCompareExchange_HLERelease(long volatile *_Destination, long _Exchange, long _Comparand) { __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg {%2, %1|%1, %2}" : "+a" (_Comparand), "+m" (*_Destination) : "r" (_Exchange) : "memory"); return _Comparand; } #endif #if defined(__x86_64__) static __inline__ __int64 __DEFAULT_FN_ATTRS _InterlockedCompareExchange64_HLEAcquire(__int64 volatile *_Destination, __int64 _Exchange, __int64 _Comparand) { __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg {%2, %1|%1, %2}" : "+a" (_Comparand), "+m" (*_Destination) : "r" (_Exchange) : "memory"); return _Comparand; } static __inline__ __int64 __DEFAULT_FN_ATTRS _InterlockedCompareExchange64_HLERelease(__int64 volatile *_Destination, __int64 _Exchange, __int64 _Comparand) { __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg {%2, %1|%1, %2}" : "+a" (_Comparand), "+m" (*_Destination) : "r" (_Exchange) : "memory"); return _Comparand; } #endif #ifdef __cplusplus } #endif #undef __DEFAULT_FN_ATTRS #endif /* defined(_MSC_VER) && __has_extension(gnu_asm) */ #endif /* __IMMINTRIN_H */ riscv_vector.h/*- * Copyright (c) 2011 Ed Schouten * David Chisnall * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _STDATOMIC_H_ #define _STDATOMIC_H_ #include #if defined(__cplusplus) && __cplusplus >= 201103L && __has_include() # if __has_feature(cxx_atomic) # define _STDATOMIC_HAVE_ATOMIC # endif #endif #ifdef _STDATOMIC_HAVE_ATOMIC /* We have a usable C++ ; use it instead. */ #include #undef _Atomic /* Also defined by for gcc. But not used in macros. */ /* Also a clang intrinsic. */ /* Should not be used by client code before this file is */ /* included. The definitions in themselves see */ /* the old definition, as they should. */ /* Client code sees the following definition. */ #define _Atomic(t) std::atomic using std::atomic_is_lock_free; using std::atomic_init; using std::atomic_store; using std::atomic_store_explicit; using std::atomic_load; using std::atomic_load_explicit; using std::atomic_exchange; using std::atomic_exchange_explicit; using std::atomic_compare_exchange_strong; using std::atomic_compare_exchange_strong_explicit; using std::atomic_compare_exchange_weak; using std::atomic_compare_exchange_weak_explicit; using std::atomic_fetch_add; using std::atomic_fetch_add_explicit; using std::atomic_fetch_sub; using std::atomic_fetch_sub_explicit; using std::atomic_fetch_or; using std::atomic_fetch_or_explicit; using std::atomic_fetch_xor; using std::atomic_fetch_xor_explicit; using std::atomic_fetch_and; using std::atomic_fetch_and_explicit; using std::atomic_thread_fence; using std::atomic_signal_fence; using std::memory_order; using std::memory_order_relaxed; using std::memory_order_consume; using std::memory_order_acquire; using std::memory_order_release; using std::memory_order_acq_rel; using std::memory_order_seq_cst; using std::atomic_bool; using std::atomic_char; using std::atomic_schar; using std::atomic_uchar; using std::atomic_short; using std::atomic_ushort; using std::atomic_int; using std::atomic_uint; using std::atomic_long; using std::atomic_ulong; using std::atomic_llong; using std::atomic_ullong; using std::atomic_char16_t; using std::atomic_char32_t; using std::atomic_wchar_t; using std::atomic_int_least8_t; using std::atomic_uint_least8_t; using std::atomic_int_least16_t; using std::atomic_uint_least16_t; using std::atomic_int_least32_t; using std::atomic_uint_least32_t; using std::atomic_int_least64_t; using std::atomic_uint_least64_t; using std::atomic_int_fast8_t; using std::atomic_uint_fast8_t; using std::atomic_int_fast16_t; using std::atomic_uint_fast16_t; using std::atomic_int_fast32_t; using std::atomic_uint_fast32_t; using std::atomic_int_fast64_t; using std::atomic_uint_fast64_t; using std::atomic_intptr_t; using std::atomic_uintptr_t; using std::atomic_size_t; using std::atomic_ptrdiff_t; using std::atomic_intmax_t; using std::atomic_uintmax_t; #else /* unavailable, possibly because this is C, not C++ */ /* Actual implementation is in bits/stdatomic.h since our test code is C++. */ #include #endif /* unavailable */ #endif /* !_STDATOMIC_H_ */ /*===------------- tsxldtrkintrin.h - tsxldtrk intrinsics ------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __TSXLDTRKINTRIN_H #define __TSXLDTRKINTRIN_H /* Define the default attributes for the functions in this file */ #define _DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("tsxldtrk"))) /// Marks the start of an TSX (RTM) suspend load address tracking region. If /// this intrinsic is used inside a transactional region, subsequent loads /// are not added to the read set of the transaction. If it's used inside a /// suspend load address tracking region it will cause transaction abort. /// If it's used outside of a transactional region it behaves like a NOP. /// /// \headerfile /// /// This intrinsic corresponds to the \c XSUSLDTRK instruction. /// static __inline__ void _DEFAULT_FN_ATTRS _xsusldtrk (void) { __builtin_ia32_xsusldtrk(); } /// Marks the end of an TSX (RTM) suspend load address tracking region. If this /// intrinsic is used inside a suspend load address tracking region it will /// end the suspend region and all following load addresses will be added to /// the transaction read set. If it's used inside an active transaction but /// not in a suspend region it will cause transaction abort. If it's used /// outside of a transactional region it behaves like a NOP. /// /// \headerfile /// /// This intrinsic corresponds to the \c XRESLDTRK instruction. /// static __inline__ void _DEFAULT_FN_ATTRS _xresldtrk (void) { __builtin_ia32_xresldtrk(); } #undef _DEFAULT_FN_ATTRS #endif /* __TSXLDTRKINTRIN_H */ sanitizer/memprof_interface.h>=VName template root/pbunits/\w[:alnum:][:^word:]Missing case in Compiler: in AddToThreadqSimplify case not handled: {%d,%d}^\bHanunooMalayalamSIGBUSELFmmap error: %ddpldV%=le->*Ss image->IsPresent() || increment == 0Attempting to fetch value instead of handling error Requested prefix size Check s <= kMaxFlatLength failed: exceeds Unicode limit (0x10FFFF)inityedge->btree()->height() == tree->height() - 1IsDataEdge(edge)(nil)LockFutex operation failed with error %d us/etc/localtime/data/tzdata/external/boringssl/src/crypto/fipsmodule/ec/ec.cAES-GCM-encrypt KATECDSA-verify KAT failed. BUF_LIBDSA_LIBASN1CONF_LIBECOCSP routinesexternal/boringssl/src/crypto/bio/file.cexternal/boringssl/src/crypto/evp/p_ec.ckythe/proto/common.protokythe.proto.common.Fact.namekythe.proto.common.Origin.corpuscudaUsing real working directory (, inputs.size()=added content for Relative path was __stddef_max_align_t.h/*===---- ammintrin.h - SSE4a intrinsics -----------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __AMMINTRIN_H #define __AMMINTRIN_H #if !defined(__i386__) && !defined(__x86_64__) #error "This header is only meant to be used on x86 and x64 architecture" #endif #include /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4a"), __min_vector_width__(128))) /// Extracts the specified bits from the lower 64 bits of the 128-bit /// integer vector operand at the index \a idx and of the length \a len. /// /// \headerfile /// /// \code /// __m128i _mm_extracti_si64(__m128i x, const int len, const int idx); /// \endcode /// /// This intrinsic corresponds to the EXTRQ instruction. /// /// \param x /// The value from which bits are extracted. /// \param len /// Bits [5:0] specify the length; the other bits are ignored. If bits [5:0] /// are zero, the length is interpreted as 64. /// \param idx /// Bits [5:0] specify the index of the least significant bit; the other /// bits are ignored. If the sum of the index and length is greater than 64, /// the result is undefined. If the length and index are both zero, bits /// [63:0] of parameter \a x are extracted. If the length is zero but the /// index is non-zero, the result is undefined. /// \returns A 128-bit integer vector whose lower 64 bits contain the bits /// extracted from the source operand. #define _mm_extracti_si64(x, len, idx) \ ((__m128i)__builtin_ia32_extrqi((__v2di)(__m128i)(x), \ (char)(len), (char)(idx))) /// Extracts the specified bits from the lower 64 bits of the 128-bit /// integer vector operand at the index and of the length specified by /// \a __y. /// /// \headerfile /// /// This intrinsic corresponds to the EXTRQ instruction. /// /// \param __x /// The value from which bits are extracted. /// \param __y /// Specifies the index of the least significant bit at [13:8] and the /// length at [5:0]; all other bits are ignored. If bits [5:0] are zero, the /// length is interpreted as 64. If the sum of the index and length is /// greater than 64, the result is undefined. If the length and index are /// both zero, bits [63:0] of parameter \a __x are extracted. If the length /// is zero but the index is non-zero, the result is undefined. /// \returns A 128-bit vector whose lower 64 bits contain the bits extracted /// from the source operand. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_extract_si64(__m128i __x, __m128i __y) { return (__m128i)__builtin_ia32_extrq((__v2di)__x, (__v16qi)__y); } /// Inserts bits of a specified length from the source integer vector /// \a y into the lower 64 bits of the destination integer vector \a x at /// the index \a idx and of the length \a len. /// /// \headerfile /// /// \code /// __m128i _mm_inserti_si64(__m128i x, __m128i y, const int len, /// const int idx); /// \endcode /// /// This intrinsic corresponds to the INSERTQ instruction. /// /// \param x /// The destination operand where bits will be inserted. The inserted bits /// are defined by the length \a len and by the index \a idx specifying the /// least significant bit. /// \param y /// The source operand containing the bits to be extracted. The extracted /// bits are the least significant bits of operand \a y of length \a len. /// \param len /// Bits [5:0] specify the length; the other bits are ignored. If bits [5:0] /// are zero, the length is interpreted as 64. /// \param idx /// Bits [5:0] specify the index of the least significant bit; the other /// bits are ignored. If the sum of the index and length is greater than 64, /// the result is undefined. If the length and index are both zero, bits /// [63:0] of parameter \a y are inserted into parameter \a x. If the length /// is zero but the index is non-zero, the result is undefined. /// \returns A 128-bit integer vector containing the original lower 64-bits of /// destination operand \a x with the specified bitfields replaced by the /// lower bits of source operand \a y. The upper 64 bits of the return value /// are undefined. #define _mm_inserti_si64(x, y, len, idx) \ ((__m128i)__builtin_ia32_insertqi((__v2di)(__m128i)(x), \ (__v2di)(__m128i)(y), \ (char)(len), (char)(idx))) /// Inserts bits of a specified length from the source integer vector /// \a __y into the lower 64 bits of the destination integer vector \a __x /// at the index and of the length specified by \a __y. /// /// \headerfile /// /// This intrinsic corresponds to the INSERTQ instruction. /// /// \param __x /// The destination operand where bits will be inserted. The inserted bits /// are defined by the length and by the index of the least significant bit /// specified by operand \a __y. /// \param __y /// The source operand containing the bits to be extracted. The extracted /// bits are the least significant bits of operand \a __y with length /// specified by bits [69:64]. These are inserted into the destination at the /// index specified by bits [77:72]; all other bits are ignored. If bits /// [69:64] are zero, the length is interpreted as 64. If the sum of the /// index and length is greater than 64, the result is undefined. If the /// length and index are both zero, bits [63:0] of parameter \a __y are /// inserted into parameter \a __x. If the length is zero but the index is /// non-zero, the result is undefined. /// \returns A 128-bit integer vector containing the original lower 64-bits of /// destination operand \a __x with the specified bitfields replaced by the /// lower bits of source operand \a __y. The upper 64 bits of the return /// value are undefined. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_insert_si64(__m128i __x, __m128i __y) { return (__m128i)__builtin_ia32_insertq((__v2di)__x, (__v2di)__y); } /// Stores a 64-bit double-precision value in a 64-bit memory location. /// To minimize caching, the data is flagged as non-temporal (unlikely to be /// used again soon). /// /// \headerfile /// /// This intrinsic corresponds to the MOVNTSD instruction. /// /// \param __p /// The 64-bit memory location used to store the register value. /// \param __a /// The 64-bit double-precision floating-point register value to be stored. static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_sd(void *__p, __m128d __a) { __builtin_ia32_movntsd((double *)__p, (__v2df)__a); } /// Stores a 32-bit single-precision floating-point value in a 32-bit /// memory location. To minimize caching, the data is flagged as /// non-temporal (unlikely to be used again soon). /// /// \headerfile /// /// This intrinsic corresponds to the MOVNTSS instruction. /// /// \param __p /// The 32-bit memory location used to store the register value. /// \param __a /// The 32-bit single-precision floating-point register value to be stored. static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_ss(void *__p, __m128 __a) { __builtin_ia32_movntss((float *)__p, (__v4sf)__a); } #undef __DEFAULT_FN_ATTRS #endif /* __AMMINTRIN_H */ avxintrin.h/*===-------- avxvnniint8intrin.h - AVXVNNIINT8 intrinsics -----------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error \ "Never use directly; include instead." #endif #ifndef __AVXVNNIINT8INTRIN_H #define __AVXVNNIINT8INTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS256 \ __attribute__((__always_inline__, __nodebug__, __target__("avxvnniint8"), \ __min_vector_width__(256))) #define __DEFAULT_FN_ATTRS128 \ __attribute__((__always_inline__, __nodebug__, __target__("avxvnniint8"), \ __min_vector_width__(128))) /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with /// corresponding signed 8-bit integers in \a __B, producing 4 intermediate /// signed 16-bit results. Sum these 4 results with the corresponding /// 32-bit integer in \a __W, and store the packed 32-bit results in \a dst. /// /// \headerfile /// /// \code /// _mm_dpbssd_epi32(__m128i __W, __m128i __A, __m128i __B); /// \endcode /// /// This intrinsic corresponds to the \c VPDPBSSD instruction. /// /// \param __A /// A 128-bit vector of [16 x char]. /// \param __B /// A 128-bit vector of [16 x char]. /// \returns /// A 128-bit vector of [4 x int]. /// /// \code{.operation} /// FOR j := 0 to 3 /// tmp1.word := SignExtend16(__A.byte[4*j]) * SignExtend16(__B.byte[4*j]) /// tmp2.word := SignExtend16(__A.byte[4*j+1]) * SignExtend16(__B.byte[4*j+1]) /// tmp3.word := SignExtend16(__A.byte[4*j+2]) * SignExtend16(__B.byte[4*j+2]) /// tmp4.word := SignExtend16(__A.byte[4*j+3]) * SignExtend16(__B.byte[4*j+3]) /// dst.dword[j] := __W.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 /// ENDFOR /// dst[MAX:128] := 0 /// \endcode static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpbssd_epi32(__m128i __W, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vpdpbssd128((__v4si)__W, (__v4si)__A, (__v4si)__B); } /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with /// corresponding signed 8-bit integers in \a __B, producing 4 intermediate /// signed 16-bit results. Sum these 4 results with the corresponding /// 32-bit integer in \a __W, and store the packed 32-bit results in \a dst. /// /// \headerfile /// /// \code /// _mm256_dpbssd_epi32(__m256i __W, __m256i __A, __m256i __B); /// \endcode /// /// This intrinsic corresponds to the \c VPDPBSSD instruction. /// /// \param __A /// A 256-bit vector of [32 x char]. /// \param __B /// A 256-bit vector of [32 x char]. /// \returns /// A 256-bit vector of [8 x int]. /// /// \code{.operation} /// FOR j := 0 to 7 /// tmp1.word := SignExtend16(__A.byte[4*j]) * SignExtend16(__B.byte[4*j]) /// tmp2.word := SignExtend16(__A.byte[4*j+1]) * SignExtend16(__B.byte[4*j+1]) /// tmp3.word := SignExtend16(__A.byte[4*j+2]) * SignExtend16(__B.byte[4*j+2]) /// tmp4.word := SignExtend16(__A.byte[4*j+3]) * SignExtend16(__B.byte[4*j+3]) /// dst.dword[j] := __W.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 /// ENDFOR /// dst[MAX:256] := 0 /// \endcode static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_dpbssd_epi32(__m256i __W, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_vpdpbssd256((__v8si)__W, (__v8si)__A, (__v8si)__B); } /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with /// corresponding signed 8-bit integers in \a __B, producing 4 intermediate /// signed 16-bit results. Sum these 4 results with the corresponding /// 32-bit integer in \a __W with signed saturation, and store the packed /// 32-bit results in \a dst. /// /// \headerfile /// /// \code /// _mm_dpbssds_epi32( __m128i __W, __m128i __A, __m128i __B); /// \endcode /// /// This intrinsic corresponds to the \c VPDPBSSD instruction. /// /// \param __A /// A 128-bit vector of [16 x char]. /// \param __B /// A 128-bit vector of [16 x char]. /// \returns /// A 128-bit vector of [4 x int]. /// /// \code{.operation} /// FOR j := 0 to 3 /// tmp1.word := SignExtend16(__A.byte[4*j]) * SignExtend16(__B.byte[4*j]) /// tmp2.word := SignExtend16(__A.byte[4*j+1]) * SignExtend16(__B.byte[4*j+1]) /// tmp3.word := SignExtend16(__A.byte[4*j+2]) * SignExtend16(__B.byte[4*j+2]) /// tmp4.word := SignExtend16(__A.byte[4*j+3]) * SignExtend16(__B.byte[4*j+3]) /// dst.dword[j] := SIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) /// ENDFOR /// dst[MAX:128] := 0 /// \endcode static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpbssds_epi32(__m128i __W, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vpdpbssds128((__v4si)__W, (__v4si)__A, (__v4si)__B); } /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with /// corresponding signed 8-bit integers in \a __B, producing 4 intermediate /// signed 16-bit results. Sum these 4 results with the corresponding /// 32-bit integer in \a __W with signed saturation, and store the packed /// 32-bit results in \a dst. /// /// \headerfile /// /// \code /// _mm256_dpbssds_epi32(__m256i __W, __m256i __A, __m256i __B); /// \endcode /// /// This intrinsic corresponds to the \c VPDPBSSD instruction. /// /// \param __A /// A 256-bit vector of [32 x char]. /// \param __B /// A 256-bit vector of [32 x char]. /// \returns /// A 256-bit vector of [8 x int]. /// /// \code{.operation} /// FOR j := 0 to 7 /// tmp1.word := SignExtend16(__A.byte[4*j]) * SignExtend16(__B.byte[4*j]) /// tmp2.word := SignExtend16(__A.byte[4*j+1]) * SignExtend16(__B.byte[4*j+1]) /// tmp3.word := SignExtend16(__A.byte[4*j+2]) * SignExtend16(__B.byte[4*j+2]) /// tmp4.word := SignExtend16(__A.byte[4*j+3]) * SignExtend16(__B.byte[4*j+3]) /// dst.dword[j] := SIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) /// ENDFOR /// dst[MAX:256] := 0 /// \endcode static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_dpbssds_epi32(__m256i __W, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_vpdpbssds256((__v8si)__W, (__v8si)__A, (__v8si)__B); } /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with /// corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate /// signed 16-bit results. Sum these 4 results with the corresponding /// 32-bit integer in \a __W, and store the packed 32-bit results in \a dst. /// /// \headerfile /// /// \code /// _mm_dpbsud_epi32(__m128i __W, __m128i __A, __m128i __B); /// \endcode /// /// This intrinsic corresponds to the \c VPDPBSSD instruction. /// /// \param __A /// A 128-bit vector of [16 x char]. /// \param __B /// A 128-bit vector of [16 x unsigned char]. /// \returns /// A 128-bit vector of [4 x int]. /// /// \code{.operation} /// FOR j := 0 to 3 /// tmp1.word := Signed(SignExtend16(__A.byte[4*j]) * ZeroExtend16(__B.byte[4*j])) /// tmp2.word := Signed(SignExtend16(__A.byte[4*j+1]) * ZeroExtend16(__B.byte[4*j+1])) /// tmp3.word := Signed(SignExtend16(__A.byte[4*j+2]) * ZeroExtend16(__B.byte[4*j+2])) /// tmp4.word := Signed(SignExtend16(__A.byte[4*j+3]) * ZeroExtend16(__B.byte[4*j+3])) /// dst.dword[j] := __W.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 /// ENDFOR /// dst[MAX:128] := 0 /// \endcode static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpbsud_epi32(__m128i __W, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vpdpbsud128((__v4si)__W, (__v4si)__A, (__v4si)__B); } /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with /// corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate /// signed 16-bit results. Sum these 4 results with the corresponding /// 32-bit integer in \a __W, and store the packed 32-bit results in \a dst. /// /// \headerfile /// /// \code /// _mm256_dpbsud_epi32(__m256i __W, __m256i __A, __m256i __B); /// \endcode /// /// This intrinsic corresponds to the \c VPDPBSSD instruction. /// /// \param __A /// A 256-bit vector of [32 x char]. /// \param __B /// A 256-bit vector of [32 x unsigned char]. /// \returns /// A 256-bit vector of [8 x int]. /// /// \code{.operation} /// FOR j := 0 to 7 /// tmp1.word := Signed(SignExtend16(__A.byte[4*j]) * ZeroExtend16(__B.byte[4*j])) /// tmp2.word := Signed(SignExtend16(__A.byte[4*j+1]) * ZeroExtend16(__B.byte[4*j+1])) /// tmp3.word := Signed(SignExtend16(__A.byte[4*j+2]) * ZeroExtend16(__B.byte[4*j+2])) /// tmp4.word := Signed(SignExtend16(__A.byte[4*j+3]) * ZeroExtend16(__B.byte[4*j+3])) /// dst.dword[j] := __W.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 /// ENDFOR /// dst[MAX:256] := 0 /// \endcode static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_dpbsud_epi32(__m256i __W, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_vpdpbsud256((__v8si)__W, (__v8si)__A, (__v8si)__B); } /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with /// corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate /// signed 16-bit results. Sum these 4 results with the corresponding /// 32-bit integer in \a __W with signed saturation, and store the packed /// 32-bit results in \a dst. /// /// \headerfile /// /// \code /// _mm_dpbsuds_epi32( __m128i __W, __m128i __A, __m128i __B); /// \endcode /// /// This intrinsic corresponds to the \c VPDPBSSD instruction. /// /// \param __A /// A 128-bit vector of [16 x char]. /// \param __B /// A 128-bit vector of [16 x unsigned char]. /// \returns /// A 128-bit vector of [4 x int]. /// /// \code{.operation} /// FOR j := 0 to 3 /// tmp1.word := Signed(SignExtend16(__A.byte[4*j]) * ZeroExtend16(__B.byte[4*j])) /// tmp2.word := Signed(SignExtend16(__A.byte[4*j+1]) * ZeroExtend16(__B.byte[4*j+1])) /// tmp3.word := Signed(SignExtend16(__A.byte[4*j+2]) * ZeroExtend16(__B.byte[4*j+2])) /// tmp4.word := Signed(SignExtend16(__A.byte[4*j+3]) * ZeroExtend16(__B.byte[4*j+3])) /// dst.dword[j] := SIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) /// ENDFOR /// dst[MAX:128] := 0 /// \endcode static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpbsuds_epi32(__m128i __W, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vpdpbsuds128((__v4si)__W, (__v4si)__A, (__v4si)__B); } /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with /// corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate /// signed 16-bit results. Sum these 4 results with the corresponding /// 32-bit integer in \a __W with signed saturation, and store the packed /// 32-bit results in \a dst. /// /// \headerfile /// /// \code /// _mm256_dpbsuds_epi32(__m256i __W, __m256i __A, __m256i __B); /// \endcode /// /// This intrinsic corresponds to the \c VPDPBSSD instruction. /// /// \param __A /// A 256-bit vector of [32 x char]. /// \param __B /// A 256-bit vector of [32 x unsigned char]. /// \returns /// A 256-bit vector of [8 x int]. /// /// \code{.operation} /// FOR j := 0 to 7 /// tmp1.word := Signed(SignExtend16(__A.byte[4*j]) * ZeroExtend16(__B.byte[4*j])) /// tmp2.word := Signed(SignExtend16(__A.byte[4*j+1]) * ZeroExtend16(__B.byte[4*j+1])) /// tmp3.word := Signed(SignExtend16(__A.byte[4*j+2]) * ZeroExtend16(__B.byte[4*j+2])) /// tmp4.word := Signed(SignExtend16(__A.byte[4*j+3]) * ZeroExtend16(__B.byte[4*j+3])) /// dst.dword[j] := SIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) /// ENDFOR /// dst[MAX:256] := 0 /// \endcode static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_dpbsuds_epi32(__m256i __W, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_vpdpbsuds256((__v8si)__W, (__v8si)__A, (__v8si)__B); } /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a __A with /// corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate /// signed 16-bit results. Sum these 4 results with the corresponding /// 32-bit integer in \a __W, and store the packed 32-bit results in \a dst. /// /// \headerfile /// /// \code /// _mm_dpbuud_epi32(__m128i __W, __m128i __A, __m128i __B); /// \endcode /// /// This intrinsic corresponds to the \c VPDPBSSD instruction. /// /// \param __A /// A 128-bit vector of [16 x unsigned char]. /// \param __B /// A 128-bit vector of [16 x unsigned char]. /// \returns /// A 128-bit vector of [4 x int]. /// /// \code{.operation} /// FOR j := 0 to 3 /// tmp1.word := ZeroExtend16(__A.byte[4*j]) * ZeroExtend16(__B.byte[4*j]) /// tmp2.word := ZeroExtend16(__A.byte[4*j+1]) * ZeroExtend16(__B.byte[4*j+1]) /// tmp3.word := ZeroExtend16(__A.byte[4*j+2]) * ZeroExtend16(__B.byte[4*j+2]) /// tmp4.word := ZeroExtend16(__A.byte[4*j+3]) * ZeroExtend16(__B.byte[4*j+3]) /// dst.dword[j] := __W.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 /// ENDFOR /// dst[MAX:128] := 0 /// \endcode static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpbuud_epi32(__m128i __W, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vpdpbuud128((__v4si)__W, (__v4si)__A, (__v4si)__B); } /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a __A with /// corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate /// signed 16-bit results. Sum these 4 results with the corresponding /// 32-bit integer in \a __W, and store the packed 32-bit results in \a dst. /// /// \headerfile /// /// \code /// _mm256_dpbuud_epi32(__m256i __W, __m256i __A, __m256i __B); /// \endcode /// /// This intrinsic corresponds to the \c VPDPBSSD instruction. /// /// \param __A /// A 256-bit vector of [32 x unsigned char]. /// \param __B /// A 256-bit vector of [32 x unsigned char]. /// \returns /// A 256-bit vector of [8 x int]. /// /// \code{.operation} /// FOR j := 0 to 7 /// tmp1.word := ZeroExtend16(__A.byte[4*j]) * ZeroExtend16(__B.byte[4*j]) /// tmp2.word := ZeroExtend16(__A.byte[4*j+1]) * ZeroExtend16(__B.byte[4*j+1]) /// tmp3.word := ZeroExtend16(__A.byte[4*j+2]) * ZeroExtend16(__B.byte[4*j+2]) /// tmp4.word := ZeroExtend16(__A.byte[4*j+3]) * ZeroExtend16(__B.byte[4*j+3]) /// dst.dword[j] := __W.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 /// ENDFOR /// dst[MAX:256] := 0 /// \endcode static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_dpbuud_epi32(__m256i __W, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_vpdpbuud256((__v8si)__W, (__v8si)__A, (__v8si)__B); } /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a __A with /// corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate /// signed 16-bit results. Sum these 4 results with the corresponding /// 32-bit integer in \a __W with signed saturation, and store the packed /// 32-bit results in \a dst. /// /// \headerfile /// /// \code /// _mm_dpbuuds_epi32( __m128i __W, __m128i __A, __m128i __B); /// \endcode /// /// This intrinsic corresponds to the \c VPDPBUUDS instruction. /// /// \param __A /// A 128-bit vector of [16 x unsigned char]. /// \param __B /// A 128-bit vector of [16 x unsigned char]. /// \returns /// A 128-bit vector of [4 x int]. /// /// \code{.operation} /// FOR j := 0 to 3 /// tmp1.word := ZeroExtend16(__A.byte[4*j]) * ZeroExtend16(__B.byte[4*j]) /// tmp2.word := ZeroExtend16(__A.byte[4*j+1]) * ZeroExtend16(__B.byte[4*j+1]) /// tmp3.word := ZeroExtend16(__A.byte[4*j+2]) * ZeroExtend16(__B.byte[4*j+2]) /// tmp4.word := ZeroExtend16(__A.byte[4*j+3]) * ZeroExtend16(__B.byte[4*j+3]) /// dst.dword[j] := UNSIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) /// ENDFOR /// dst[MAX:128] := 0 /// \endcode static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpbuuds_epi32(__m128i __W, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vpdpbuuds128((__v4si)__W, (__v4si)__A, (__v4si)__B); } /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with /// corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate /// signed 16-bit results. Sum these 4 results with the corresponding /// 32-bit integer in \a __W with signed saturation, and store the packed /// 32-bit results in \a dst. /// /// \headerfile /// /// \code /// _mm256_dpbuuds_epi32(__m256i __W, __m256i __A, __m256i __B); /// \endcode /// /// This intrinsic corresponds to the \c VPDPBUUDS instruction. /// /// \param __A /// A 256-bit vector of [32 x unsigned char]. /// \param __B /// A 256-bit vector of [32 x unsigned char]. /// \returns /// A 256-bit vector of [8 x int]. /// /// \code{.operation} /// FOR j := 0 to 7 /// tmp1.word := ZeroExtend16(__A.byte[4*j]) * ZeroExtend16(__B.byte[4*j]) /// tmp2.word := ZeroExtend16(__A.byte[4*j+1]) * ZeroExtend16(__B.byte[4*j+1]) /// tmp3.word := ZeroExtend16(__A.byte[4*j+2]) * ZeroExtend16(__B.byte[4*j+2]) /// tmp4.word := ZeroExtend16(__A.byte[4*j+3]) * ZeroExtend16(__B.byte[4*j+3]) /// dst.dword[j] := UNSIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) /// ENDFOR /// dst[MAX:256] := 0 /// \endcode static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_dpbuuds_epi32(__m256i __W, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_vpdpbuuds256((__v8si)__W, (__v8si)__A, (__v8si)__B); } #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 #endif // __AVXVNNIINT8INTRIN_H /*===---- crc32intrin.h - SSE4.2 Accumulate CRC32 intrinsics ---------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __CRC32INTRIN_H #define __CRC32INTRIN_H #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("crc32"))) /// Adds the unsigned integer operand to the CRC-32C checksum of the /// unsigned char operand. /// /// \headerfile /// /// This intrinsic corresponds to the CRC32B instruction. /// /// \param __C /// An unsigned integer operand to add to the CRC-32C checksum of operand /// \a __D. /// \param __D /// An unsigned 8-bit integer operand used to compute the CRC-32C checksum. /// \returns The result of adding operand \a __C to the CRC-32C checksum of /// operand \a __D. static __inline__ unsigned int __DEFAULT_FN_ATTRS _mm_crc32_u8(unsigned int __C, unsigned char __D) { return __builtin_ia32_crc32qi(__C, __D); } /// Adds the unsigned integer operand to the CRC-32C checksum of the /// unsigned short operand. /// /// \headerfile /// /// This intrinsic corresponds to the CRC32W instruction. /// /// \param __C /// An unsigned integer operand to add to the CRC-32C checksum of operand /// \a __D. /// \param __D /// An unsigned 16-bit integer operand used to compute the CRC-32C checksum. /// \returns The result of adding operand \a __C to the CRC-32C checksum of /// operand \a __D. static __inline__ unsigned int __DEFAULT_FN_ATTRS _mm_crc32_u16(unsigned int __C, unsigned short __D) { return __builtin_ia32_crc32hi(__C, __D); } /// Adds the first unsigned integer operand to the CRC-32C checksum of /// the second unsigned integer operand. /// /// \headerfile /// /// This intrinsic corresponds to the CRC32L instruction. /// /// \param __C /// An unsigned integer operand to add to the CRC-32C checksum of operand /// \a __D. /// \param __D /// An unsigned 32-bit integer operand used to compute the CRC-32C checksum. /// \returns The result of adding operand \a __C to the CRC-32C checksum of /// operand \a __D. static __inline__ unsigned int __DEFAULT_FN_ATTRS _mm_crc32_u32(unsigned int __C, unsigned int __D) { return __builtin_ia32_crc32si(__C, __D); } #ifdef __x86_64__ /// Adds the unsigned integer operand to the CRC-32C checksum of the /// unsigned 64-bit integer operand. /// /// \headerfile /// /// This intrinsic corresponds to the CRC32Q instruction. /// /// \param __C /// An unsigned integer operand to add to the CRC-32C checksum of operand /// \a __D. /// \param __D /// An unsigned 64-bit integer operand used to compute the CRC-32C checksum. /// \returns The result of adding operand \a __C to the CRC-32C checksum of /// operand \a __D. static __inline__ unsigned long long __DEFAULT_FN_ATTRS _mm_crc32_u64(unsigned long long __C, unsigned long long __D) { return __builtin_ia32_crc32di(__C, __D); } #endif /* __x86_64__ */ #undef __DEFAULT_FN_ATTRS #endif /* __CRC32INTRIN_H */ fmaintrin.hinvpcidintrin.hpopcntintrin.hriscv_ntlh.hvaesintrin.hppc_wrappers/tmmintrin.h/*===---- tmmintrin.h - Implementation of SSSE3 intrinsics on PowerPC ------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ /* Implemented from the specification included in the Intel C++ Compiler User Guide and Reference, version 9.0. */ #ifndef NO_WARN_X86_INTRINSICS /* This header is distributed to simplify porting x86_64 code that makes explicit use of Intel intrinsics to powerpc64le. It is the user's responsibility to determine if the results are acceptable and make additional changes as necessary. Note that much code that uses Intel intrinsics can be rewritten in standard C or GNU C extensions, which are more portable and better optimized across multiple targets. */ #endif #ifndef TMMINTRIN_H_ #define TMMINTRIN_H_ #if defined(__powerpc64__) && \ (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) #include /* We need definitions from the SSE header files. */ #include extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_abs_epi16(__m128i __A) { return (__m128i)vec_abs((__v8hi)__A); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_abs_epi32(__m128i __A) { return (__m128i)vec_abs((__v4si)__A); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_abs_epi8(__m128i __A) { return (__m128i)vec_abs((__v16qi)__A); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_abs_pi16(__m64 __A) { __v8hi __B = (__v8hi)(__v2du){__A, __A}; return (__m64)((__v2du)vec_abs(__B))[0]; } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_abs_pi32(__m64 __A) { __v4si __B = (__v4si)(__v2du){__A, __A}; return (__m64)((__v2du)vec_abs(__B))[0]; } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_abs_pi8(__m64 __A) { __v16qi __B = (__v16qi)(__v2du){__A, __A}; return (__m64)((__v2du)vec_abs(__B))[0]; } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_alignr_epi8(__m128i __A, __m128i __B, const unsigned int __count) { if (__builtin_constant_p(__count) && __count < 16) { #ifdef __LITTLE_ENDIAN__ __A = (__m128i)vec_reve((__v16qu)__A); __B = (__m128i)vec_reve((__v16qu)__B); #endif __A = (__m128i)vec_sld((__v16qu)__B, (__v16qu)__A, __count); #ifdef __LITTLE_ENDIAN__ __A = (__m128i)vec_reve((__v16qu)__A); #endif return __A; } if (__count == 0) return __B; if (__count >= 16) { if (__count >= 32) { const __v16qu __zero = {0}; return (__m128i)__zero; } else { const __v16qu __shift = vec_splats((unsigned char)((__count - 16) * 8)); #ifdef __LITTLE_ENDIAN__ return (__m128i)vec_sro((__v16qu)__A, __shift); #else return (__m128i)vec_slo((__v16qu)__A, __shift); #endif } } else { const __v16qu __shiftA = vec_splats((unsigned char)((16 - __count) * 8)); const __v16qu __shiftB = vec_splats((unsigned char)(__count * 8)); #ifdef __LITTLE_ENDIAN__ __A = (__m128i)vec_slo((__v16qu)__A, __shiftA); __B = (__m128i)vec_sro((__v16qu)__B, __shiftB); #else __A = (__m128i)vec_sro((__v16qu)__A, __shiftA); __B = (__m128i)vec_slo((__v16qu)__B, __shiftB); #endif return (__m128i)vec_or((__v16qu)__A, (__v16qu)__B); } } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_alignr_pi8(__m64 __A, __m64 __B, unsigned int __count) { if (__count < 16) { __v2du __C = {__B, __A}; #ifdef __LITTLE_ENDIAN__ const __v4su __shift = {__count << 3, 0, 0, 0}; __C = (__v2du)vec_sro((__v16qu)__C, (__v16qu)__shift); #else const __v4su __shift = {0, 0, 0, __count << 3}; __C = (__v2du)vec_slo((__v16qu)__C, (__v16qu)__shift); #endif return (__m64)__C[0]; } else { const __m64 __zero = {0}; return __zero; } } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_hadd_epi16(__m128i __A, __m128i __B) { const __v16qu __P = {0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29}; const __v16qu __Q = {2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31}; __v8hi __C = vec_perm((__v8hi)__A, (__v8hi)__B, __P); __v8hi __D = vec_perm((__v8hi)__A, (__v8hi)__B, __Q); return (__m128i)vec_add(__C, __D); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_hadd_epi32(__m128i __A, __m128i __B) { const __v16qu __P = {0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27}; const __v16qu __Q = {4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31}; __v4si __C = vec_perm((__v4si)__A, (__v4si)__B, __P); __v4si __D = vec_perm((__v4si)__A, (__v4si)__B, __Q); return (__m128i)vec_add(__C, __D); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_hadd_pi16(__m64 __A, __m64 __B) { __v8hi __C = (__v8hi)(__v2du){__A, __B}; const __v16qu __P = {0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 4, 5, 8, 9, 12, 13}; const __v16qu __Q = {2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15}; __v8hi __D = vec_perm(__C, __C, __Q); __C = vec_perm(__C, __C, __P); __C = vec_add(__C, __D); return (__m64)((__v2du)__C)[1]; } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_hadd_pi32(__m64 __A, __m64 __B) { __v4si __C = (__v4si)(__v2du){__A, __B}; const __v16qu __P = {0, 1, 2, 3, 8, 9, 10, 11, 0, 1, 2, 3, 8, 9, 10, 11}; const __v16qu __Q = {4, 5, 6, 7, 12, 13, 14, 15, 4, 5, 6, 7, 12, 13, 14, 15}; __v4si __D = vec_perm(__C, __C, __Q); __C = vec_perm(__C, __C, __P); __C = vec_add(__C, __D); return (__m64)((__v2du)__C)[1]; } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_hadds_epi16(__m128i __A, __m128i __B) { __v4si __C = {0}, __D = {0}; __C = vec_sum4s((__v8hi)__A, __C); __D = vec_sum4s((__v8hi)__B, __D); __C = (__v4si)vec_packs(__C, __D); return (__m128i)__C; } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_hadds_pi16(__m64 __A, __m64 __B) { const __v4si __zero = {0}; __v8hi __C = (__v8hi)(__v2du){__A, __B}; __v4si __D = vec_sum4s(__C, __zero); __C = vec_packs(__D, __D); return (__m64)((__v2du)__C)[1]; } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_hsub_epi16(__m128i __A, __m128i __B) { const __v16qu __P = {0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29}; const __v16qu __Q = {2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31}; __v8hi __C = vec_perm((__v8hi)__A, (__v8hi)__B, __P); __v8hi __D = vec_perm((__v8hi)__A, (__v8hi)__B, __Q); return (__m128i)vec_sub(__C, __D); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_hsub_epi32(__m128i __A, __m128i __B) { const __v16qu __P = {0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27}; const __v16qu __Q = {4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31}; __v4si __C = vec_perm((__v4si)__A, (__v4si)__B, __P); __v4si __D = vec_perm((__v4si)__A, (__v4si)__B, __Q); return (__m128i)vec_sub(__C, __D); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_hsub_pi16(__m64 __A, __m64 __B) { const __v16qu __P = {0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 4, 5, 8, 9, 12, 13}; const __v16qu __Q = {2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15}; __v8hi __C = (__v8hi)(__v2du){__A, __B}; __v8hi __D = vec_perm(__C, __C, __Q); __C = vec_perm(__C, __C, __P); __C = vec_sub(__C, __D); return (__m64)((__v2du)__C)[1]; } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_hsub_pi32(__m64 __A, __m64 __B) { const __v16qu __P = {0, 1, 2, 3, 8, 9, 10, 11, 0, 1, 2, 3, 8, 9, 10, 11}; const __v16qu __Q = {4, 5, 6, 7, 12, 13, 14, 15, 4, 5, 6, 7, 12, 13, 14, 15}; __v4si __C = (__v4si)(__v2du){__A, __B}; __v4si __D = vec_perm(__C, __C, __Q); __C = vec_perm(__C, __C, __P); __C = vec_sub(__C, __D); return (__m64)((__v2du)__C)[1]; } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_hsubs_epi16(__m128i __A, __m128i __B) { const __v16qu __P = {0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29}; const __v16qu __Q = {2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31}; __v8hi __C = vec_perm((__v8hi)__A, (__v8hi)__B, __P); __v8hi __D = vec_perm((__v8hi)__A, (__v8hi)__B, __Q); return (__m128i)vec_subs(__C, __D); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_hsubs_pi16(__m64 __A, __m64 __B) { const __v16qu __P = {0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 4, 5, 8, 9, 12, 13}; const __v16qu __Q = {2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15}; __v8hi __C = (__v8hi)(__v2du){__A, __B}; __v8hi __D = vec_perm(__C, __C, __P); __v8hi __E = vec_perm(__C, __C, __Q); __C = vec_subs(__D, __E); return (__m64)((__v2du)__C)[1]; } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_shuffle_epi8(__m128i __A, __m128i __B) { const __v16qi __zero = {0}; __vector __bool char __select = vec_cmplt((__v16qi)__B, __zero); __v16qi __C = vec_perm((__v16qi)__A, (__v16qi)__A, (__v16qu)__B); return (__m128i)vec_sel(__C, __zero, __select); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_shuffle_pi8(__m64 __A, __m64 __B) { const __v16qi __zero = {0}; __v16qi __C = (__v16qi)(__v2du){__A, __A}; __v16qi __D = (__v16qi)(__v2du){__B, __B}; __vector __bool char __select = vec_cmplt((__v16qi)__D, __zero); __C = vec_perm((__v16qi)__C, (__v16qi)__C, (__v16qu)__D); __C = vec_sel(__C, __zero, __select); return (__m64)((__v2du)(__C))[0]; } #ifdef _ARCH_PWR8 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sign_epi8(__m128i __A, __m128i __B) { const __v16qi __zero = {0}; __v16qi __selectneg = (__v16qi)vec_cmplt((__v16qi)__B, __zero); __v16qi __selectpos = (__v16qi)vec_neg((__v16qi)vec_cmpgt((__v16qi)__B, __zero)); __v16qi __conv = vec_add(__selectneg, __selectpos); return (__m128i)vec_mul((__v16qi)__A, (__v16qi)__conv); } #endif #ifdef _ARCH_PWR8 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sign_epi16(__m128i __A, __m128i __B) { const __v8hi __zero = {0}; __v8hi __selectneg = (__v8hi)vec_cmplt((__v8hi)__B, __zero); __v8hi __selectpos = (__v8hi)vec_neg((__v8hi)vec_cmpgt((__v8hi)__B, __zero)); __v8hi __conv = vec_add(__selectneg, __selectpos); return (__m128i)vec_mul((__v8hi)__A, (__v8hi)__conv); } #endif #ifdef _ARCH_PWR8 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sign_epi32(__m128i __A, __m128i __B) { const __v4si __zero = {0}; __v4si __selectneg = (__v4si)vec_cmplt((__v4si)__B, __zero); __v4si __selectpos = (__v4si)vec_neg((__v4si)vec_cmpgt((__v4si)__B, __zero)); __v4si __conv = vec_add(__selectneg, __selectpos); return (__m128i)vec_mul((__v4si)__A, (__v4si)__conv); } #endif #ifdef _ARCH_PWR8 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sign_pi8(__m64 __A, __m64 __B) { const __v16qi __zero = {0}; __v16qi __C = (__v16qi)(__v2du){__A, __A}; __v16qi __D = (__v16qi)(__v2du){__B, __B}; __C = (__v16qi)_mm_sign_epi8((__m128i)__C, (__m128i)__D); return (__m64)((__v2du)(__C))[0]; } #endif #ifdef _ARCH_PWR8 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sign_pi16(__m64 __A, __m64 __B) { const __v8hi __zero = {0}; __v8hi __C = (__v8hi)(__v2du){__A, __A}; __v8hi __D = (__v8hi)(__v2du){__B, __B}; __C = (__v8hi)_mm_sign_epi16((__m128i)__C, (__m128i)__D); return (__m64)((__v2du)(__C))[0]; } #endif #ifdef _ARCH_PWR8 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sign_pi32(__m64 __A, __m64 __B) { const __v4si __zero = {0}; __v4si __C = (__v4si)(__v2du){__A, __A}; __v4si __D = (__v4si)(__v2du){__B, __B}; __C = (__v4si)_mm_sign_epi32((__m128i)__C, (__m128i)__D); return (__m64)((__v2du)(__C))[0]; } #endif extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_maddubs_epi16(__m128i __A, __m128i __B) { __v8hi __unsigned = vec_splats((signed short)0x00ff); __v8hi __C = vec_and(vec_unpackh((__v16qi)__A), __unsigned); __v8hi __D = vec_and(vec_unpackl((__v16qi)__A), __unsigned); __v8hi __E = vec_unpackh((__v16qi)__B); __v8hi __F = vec_unpackl((__v16qi)__B); __C = vec_mul(__C, __E); __D = vec_mul(__D, __F); const __v16qu __odds = {0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29}; const __v16qu __evens = {2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31}; __E = vec_perm(__C, __D, __odds); __F = vec_perm(__C, __D, __evens); return (__m128i)vec_adds(__E, __F); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_maddubs_pi16(__m64 __A, __m64 __B) { __v8hi __C = (__v8hi)(__v2du){__A, __A}; __C = vec_unpackl((__v16qi)__C); const __v8hi __unsigned = vec_splats((signed short)0x00ff); __C = vec_and(__C, __unsigned); __v8hi __D = (__v8hi)(__v2du){__B, __B}; __D = vec_unpackl((__v16qi)__D); __D = vec_mul(__C, __D); const __v16qu __odds = {0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29}; const __v16qu __evens = {2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31}; __C = vec_perm(__D, __D, __odds); __D = vec_perm(__D, __D, __evens); __C = vec_adds(__C, __D); return (__m64)((__v2du)(__C))[0]; } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_mulhrs_epi16(__m128i __A, __m128i __B) { __v4si __C = vec_unpackh((__v8hi)__A); __v4si __D = vec_unpackh((__v8hi)__B); __C = vec_mul(__C, __D); __D = vec_unpackl((__v8hi)__A); __v4si __E = vec_unpackl((__v8hi)__B); __D = vec_mul(__D, __E); const __v4su __shift = vec_splats((unsigned int)14); __C = vec_sr(__C, __shift); __D = vec_sr(__D, __shift); const __v4si __ones = vec_splats((signed int)1); __C = vec_add(__C, __ones); __C = vec_sr(__C, (__v4su)__ones); __D = vec_add(__D, __ones); __D = vec_sr(__D, (__v4su)__ones); return (__m128i)vec_pack(__C, __D); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_mulhrs_pi16(__m64 __A, __m64 __B) { __v4si __C = (__v4si)(__v2du){__A, __A}; __C = vec_unpackh((__v8hi)__C); __v4si __D = (__v4si)(__v2du){__B, __B}; __D = vec_unpackh((__v8hi)__D); __C = vec_mul(__C, __D); const __v4su __shift = vec_splats((unsigned int)14); __C = vec_sr(__C, __shift); const __v4si __ones = vec_splats((signed int)1); __C = vec_add(__C, __ones); __C = vec_sr(__C, (__v4su)__ones); __v8hi __E = vec_pack(__C, __D); return (__m64)((__v2du)(__E))[0]; } #else #include_next #endif /* defined(__powerpc64__) && \ * (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) */ #endif /* TMMINTRIN_H_ */ /*===---- instr_prof_interface.h - Instrumentation PGO User Program API ----=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== * * This header provides a public interface for fine-grained control of counter * reset and profile dumping. These interface functions can be directly called * in user programs. * \*===---------------------------------------------------------------------===*/ #ifndef COMPILER_RT_INSTR_PROFILING #define COMPILER_RT_INSTR_PROFILING #ifdef __cplusplus extern "C" { #endif #ifdef __LLVM_INSTR_PROFILE_GENERATE // Profile file reset and dump interfaces. // When `-fprofile[-instr]-generate`/`-fcs-profile-generate` is in effect, // clang defines __LLVM_INSTR_PROFILE_GENERATE to pick up the API calls. /*! * \brief Set the filename for writing instrumentation data. * * Sets the filename to be used for subsequent calls to * \a __llvm_profile_write_file(). * * \c Name is not copied, so it must remain valid. Passing NULL resets the * filename logic to the default behaviour. * * Note: There may be multiple copies of the profile runtime (one for each * instrumented image/DSO). This API only modifies the filename within the * copy of the runtime available to the calling image. * * Warning: This is a no-op if continuous mode (\ref * __llvm_profile_is_continuous_mode_enabled) is on. The reason for this is * that in continuous mode, profile counters are mmap()'d to the profile at * program initialization time. Support for transferring the mmap'd profile * counts to a new file has not been implemented. */ void __llvm_profile_set_filename(const char *Name); /*! * \brief Interface to set all PGO counters to zero for the current process. * */ void __llvm_profile_reset_counters(void); /*! * \brief this is a wrapper interface to \c __llvm_profile_write_file. * After this interface is invoked, an already dumped flag will be set * so that profile won't be dumped again during program exit. * Invocation of interface __llvm_profile_reset_counters will clear * the flag. This interface is designed to be used to collect profile * data from user selected hot regions. The use model is * __llvm_profile_reset_counters(); * ... hot region 1 * __llvm_profile_dump(); * .. some other code * __llvm_profile_reset_counters(); * ... hot region 2 * __llvm_profile_dump(); * * It is expected that on-line profile merging is on with \c %m specifier * used in profile filename . If merging is not turned on, user is expected * to invoke __llvm_profile_set_filename to specify different profile names * for different regions before dumping to avoid profile write clobbering. */ int __llvm_profile_dump(void); // Interface to dump the current process' order file to disk. int __llvm_orderfile_dump(void); #else #define __llvm_profile_set_filename(Name) #define __llvm_profile_reset_counters() #define __llvm_profile_dump() (0) #define __llvm_orderfile_dump() (0) #endif #ifdef __cplusplus } // extern "C" #endif #endif //===-- linux_syscall_hooks.h ---------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file is a part of public sanitizer interface. // // System call handlers. // // Interface methods declared in this header implement pre- and post- syscall // actions for the active sanitizer. // Usage: // __sanitizer_syscall_pre_getfoo(...args...); // long res = syscall(__NR_getfoo, ...args...); // __sanitizer_syscall_post_getfoo(res, ...args...); //===----------------------------------------------------------------------===// #ifndef SANITIZER_LINUX_SYSCALL_HOOKS_H #define SANITIZER_LINUX_SYSCALL_HOOKS_H #define __sanitizer_syscall_pre_time(tloc) \ __sanitizer_syscall_pre_impl_time((long)(tloc)) #define __sanitizer_syscall_post_time(res, tloc) \ __sanitizer_syscall_post_impl_time(res, (long)(tloc)) #define __sanitizer_syscall_pre_stime(tptr) \ __sanitizer_syscall_pre_impl_stime((long)(tptr)) #define __sanitizer_syscall_post_stime(res, tptr) \ __sanitizer_syscall_post_impl_stime(res, (long)(tptr)) #define __sanitizer_syscall_pre_gettimeofday(tv, tz) \ __sanitizer_syscall_pre_impl_gettimeofday((long)(tv), (long)(tz)) #define __sanitizer_syscall_post_gettimeofday(res, tv, tz) \ __sanitizer_syscall_post_impl_gettimeofday(res, (long)(tv), (long)(tz)) #define __sanitizer_syscall_pre_settimeofday(tv, tz) \ __sanitizer_syscall_pre_impl_settimeofday((long)(tv), (long)(tz)) #define __sanitizer_syscall_post_settimeofday(res, tv, tz) \ __sanitizer_syscall_post_impl_settimeofday(res, (long)(tv), (long)(tz)) #define __sanitizer_syscall_pre_adjtimex(txc_p) \ __sanitizer_syscall_pre_impl_adjtimex((long)(txc_p)) #define __sanitizer_syscall_post_adjtimex(res, txc_p) \ __sanitizer_syscall_post_impl_adjtimex(res, (long)(txc_p)) #define __sanitizer_syscall_pre_times(tbuf) \ __sanitizer_syscall_pre_impl_times((long)(tbuf)) #define __sanitizer_syscall_post_times(res, tbuf) \ __sanitizer_syscall_post_impl_times(res, (long)(tbuf)) #define __sanitizer_syscall_pre_gettid() __sanitizer_syscall_pre_impl_gettid() #define __sanitizer_syscall_post_gettid(res) \ __sanitizer_syscall_post_impl_gettid(res) #define __sanitizer_syscall_pre_nanosleep(rqtp, rmtp) \ __sanitizer_syscall_pre_impl_nanosleep((long)(rqtp), (long)(rmtp)) #define __sanitizer_syscall_post_nanosleep(res, rqtp, rmtp) \ __sanitizer_syscall_post_impl_nanosleep(res, (long)(rqtp), (long)(rmtp)) #define __sanitizer_syscall_pre_alarm(seconds) \ __sanitizer_syscall_pre_impl_alarm((long)(seconds)) #define __sanitizer_syscall_post_alarm(res, seconds) \ __sanitizer_syscall_post_impl_alarm(res, (long)(seconds)) #define __sanitizer_syscall_pre_getpid() __sanitizer_syscall_pre_impl_getpid() #define __sanitizer_syscall_post_getpid(res) \ __sanitizer_syscall_post_impl_getpid(res) #define __sanitizer_syscall_pre_getppid() __sanitizer_syscall_pre_impl_getppid() #define __sanitizer_syscall_post_getppid(res) \ __sanitizer_syscall_post_impl_getppid(res) #define __sanitizer_syscall_pre_getuid() __sanitizer_syscall_pre_impl_getuid() #define __sanitizer_syscall_post_getuid(res) \ __sanitizer_syscall_post_impl_getuid(res) #define __sanitizer_syscall_pre_geteuid() __sanitizer_syscall_pre_impl_geteuid() #define __sanitizer_syscall_post_geteuid(res) \ __sanitizer_syscall_post_impl_geteuid(res) #define __sanitizer_syscall_pre_getgid() __sanitizer_syscall_pre_impl_getgid() #define __sanitizer_syscall_post_getgid(res) \ __sanitizer_syscall_post_impl_getgid(res) #define __sanitizer_syscall_pre_getegid() __sanitizer_syscall_pre_impl_getegid() #define __sanitizer_syscall_post_getegid(res) \ __sanitizer_syscall_post_impl_getegid(res) #define __sanitizer_syscall_pre_getresuid(ruid, euid, suid) \ __sanitizer_syscall_pre_impl_getresuid((long)(ruid), (long)(euid), \ (long)(suid)) #define __sanitizer_syscall_post_getresuid(res, ruid, euid, suid) \ __sanitizer_syscall_post_impl_getresuid(res, (long)(ruid), (long)(euid), \ (long)(suid)) #define __sanitizer_syscall_pre_getresgid(rgid, egid, sgid) \ __sanitizer_syscall_pre_impl_getresgid((long)(rgid), (long)(egid), \ (long)(sgid)) #define __sanitizer_syscall_post_getresgid(res, rgid, egid, sgid) \ __sanitizer_syscall_post_impl_getresgid(res, (long)(rgid), (long)(egid), \ (long)(sgid)) #define __sanitizer_syscall_pre_getpgid(pid) \ __sanitizer_syscall_pre_impl_getpgid((long)(pid)) #define __sanitizer_syscall_post_getpgid(res, pid) \ __sanitizer_syscall_post_impl_getpgid(res, (long)(pid)) #define __sanitizer_syscall_pre_getpgrp() __sanitizer_syscall_pre_impl_getpgrp() #define __sanitizer_syscall_post_getpgrp(res) \ __sanitizer_syscall_post_impl_getpgrp(res) #define __sanitizer_syscall_pre_getsid(pid) \ __sanitizer_syscall_pre_impl_getsid((long)(pid)) #define __sanitizer_syscall_post_getsid(res, pid) \ __sanitizer_syscall_post_impl_getsid(res, (long)(pid)) #define __sanitizer_syscall_pre_getgroups(gidsetsize, grouplist) \ __sanitizer_syscall_pre_impl_getgroups((long)(gidsetsize), (long)(grouplist)) #define __sanitizer_syscall_post_getgroups(res, gidsetsize, grouplist) \ __sanitizer_syscall_post_impl_getgroups(res, (long)(gidsetsize), \ (long)(grouplist)) #define __sanitizer_syscall_pre_setregid(rgid, egid) \ __sanitizer_syscall_pre_impl_setregid((long)(rgid), (long)(egid)) #define __sanitizer_syscall_post_setregid(res, rgid, egid) \ __sanitizer_syscall_post_impl_setregid(res, (long)(rgid), (long)(egid)) #define __sanitizer_syscall_pre_setgid(gid) \ __sanitizer_syscall_pre_impl_setgid((long)(gid)) #define __sanitizer_syscall_post_setgid(res, gid) \ __sanitizer_syscall_post_impl_setgid(res, (long)(gid)) #define __sanitizer_syscall_pre_setreuid(ruid, euid) \ __sanitizer_syscall_pre_impl_setreuid((long)(ruid), (long)(euid)) #define __sanitizer_syscall_post_setreuid(res, ruid, euid) \ __sanitizer_syscall_post_impl_setreuid(res, (long)(ruid), (long)(euid)) #define __sanitizer_syscall_pre_setuid(uid) \ __sanitizer_syscall_pre_impl_setuid((long)(uid)) #define __sanitizer_syscall_post_setuid(res, uid) \ __sanitizer_syscall_post_impl_setuid(res, (long)(uid)) #define __sanitizer_syscall_pre_setresuid(ruid, euid, suid) \ __sanitizer_syscall_pre_impl_setresuid((long)(ruid), (long)(euid), \ (long)(suid)) #define __sanitizer_syscall_post_setresuid(res, ruid, euid, suid) \ __sanitizer_syscall_post_impl_setresuid(res, (long)(ruid), (long)(euid), \ (long)(suid)) #define __sanitizer_syscall_pre_setresgid(rgid, egid, sgid) \ __sanitizer_syscall_pre_impl_setresgid((long)(rgid), (long)(egid), \ (long)(sgid)) #define __sanitizer_syscall_post_setresgid(res, rgid, egid, sgid) \ __sanitizer_syscall_post_impl_setresgid(res, (long)(rgid), (long)(egid), \ (long)(sgid)) #define __sanitizer_syscall_pre_setfsuid(uid) \ __sanitizer_syscall_pre_impl_setfsuid((long)(uid)) #define __sanitizer_syscall_post_setfsuid(res, uid) \ __sanitizer_syscall_post_impl_setfsuid(res, (long)(uid)) #define __sanitizer_syscall_pre_setfsgid(gid) \ __sanitizer_syscall_pre_impl_setfsgid((long)(gid)) #define __sanitizer_syscall_post_setfsgid(res, gid) \ __sanitizer_syscall_post_impl_setfsgid(res, (long)(gid)) #define __sanitizer_syscall_pre_setpgid(pid, pgid) \ __sanitizer_syscall_pre_impl_setpgid((long)(pid), (long)(pgid)) #define __sanitizer_syscall_post_setpgid(res, pid, pgid) \ __sanitizer_syscall_post_impl_setpgid(res, (long)(pid), (long)(pgid)) #define __sanitizer_syscall_pre_setsid() __sanitizer_syscall_pre_impl_setsid() #define __sanitizer_syscall_post_setsid(res) \ __sanitizer_syscall_post_impl_setsid(res) #define __sanitizer_syscall_pre_setgroups(gidsetsize, grouplist) \ __sanitizer_syscall_pre_impl_setgroups((long)(gidsetsize), (long)(grouplist)) #define __sanitizer_syscall_post_setgroups(res, gidsetsize, grouplist) \ __sanitizer_syscall_post_impl_setgroups(res, (long)(gidsetsize), \ (long)(grouplist)) #define __sanitizer_syscall_pre_acct(name) \ __sanitizer_syscall_pre_impl_acct((long)(name)) #define __sanitizer_syscall_post_acct(res, name) \ __sanitizer_syscall_post_impl_acct(res, (long)(name)) #define __sanitizer_syscall_pre_capget(header, dataptr) \ __sanitizer_syscall_pre_impl_capget((long)(header), (long)(dataptr)) #define __sanitizer_syscall_post_capget(res, header, dataptr) \ __sanitizer_syscall_post_impl_capget(res, (long)(header), (long)(dataptr)) #define __sanitizer_syscall_pre_capset(header, data) \ __sanitizer_syscall_pre_impl_capset((long)(header), (long)(data)) #define __sanitizer_syscall_post_capset(res, header, data) \ __sanitizer_syscall_post_impl_capset(res, (long)(header), (long)(data)) #define __sanitizer_syscall_pre_personality(personality) \ __sanitizer_syscall_pre_impl_personality((long)(personality)) #define __sanitizer_syscall_post_personality(res, personality) \ __sanitizer_syscall_post_impl_personality(res, (long)(personality)) #define __sanitizer_syscall_pre_sigpending(set) \ __sanitizer_syscall_pre_impl_sigpending((long)(set)) #define __sanitizer_syscall_post_sigpending(res, set) \ __sanitizer_syscall_post_impl_sigpending(res, (long)(set)) #define __sanitizer_syscall_pre_sigprocmask(how, set, oset) \ __sanitizer_syscall_pre_impl_sigprocmask((long)(how), (long)(set), \ (long)(oset)) #define __sanitizer_syscall_post_sigprocmask(res, how, set, oset) \ __sanitizer_syscall_post_impl_sigprocmask(res, (long)(how), (long)(set), \ (long)(oset)) #define __sanitizer_syscall_pre_getitimer(which, value) \ __sanitizer_syscall_pre_impl_getitimer((long)(which), (long)(value)) #define __sanitizer_syscall_post_getitimer(res, which, value) \ __sanitizer_syscall_post_impl_getitimer(res, (long)(which), (long)(value)) #define __sanitizer_syscall_pre_setitimer(which, value, ovalue) \ __sanitizer_syscall_pre_impl_setitimer((long)(which), (long)(value), \ (long)(ovalue)) #define __sanitizer_syscall_post_setitimer(res, which, value, ovalue) \ __sanitizer_syscall_post_impl_setitimer(res, (long)(which), (long)(value), \ (long)(ovalue)) #define __sanitizer_syscall_pre_timer_create(which_clock, timer_event_spec, \ created_timer_id) \ __sanitizer_syscall_pre_impl_timer_create( \ (long)(which_clock), (long)(timer_event_spec), (long)(created_timer_id)) #define __sanitizer_syscall_post_timer_create( \ res, which_clock, timer_event_spec, created_timer_id) \ __sanitizer_syscall_post_impl_timer_create(res, (long)(which_clock), \ (long)(timer_event_spec), \ (long)(created_timer_id)) #define __sanitizer_syscall_pre_timer_gettime(timer_id, setting) \ __sanitizer_syscall_pre_impl_timer_gettime((long)(timer_id), (long)(setting)) #define __sanitizer_syscall_post_timer_gettime(res, timer_id, setting) \ __sanitizer_syscall_post_impl_timer_gettime(res, (long)(timer_id), \ (long)(setting)) #define __sanitizer_syscall_pre_timer_getoverrun(timer_id) \ __sanitizer_syscall_pre_impl_timer_getoverrun((long)(timer_id)) #define __sanitizer_syscall_post_timer_getoverrun(res, timer_id) \ __sanitizer_syscall_post_impl_timer_getoverrun(res, (long)(timer_id)) #define __sanitizer_syscall_pre_timer_settime(timer_id, flags, new_setting, \ old_setting) \ __sanitizer_syscall_pre_impl_timer_settime((long)(timer_id), (long)(flags), \ (long)(new_setting), \ (long)(old_setting)) #define __sanitizer_syscall_post_timer_settime(res, timer_id, flags, \ new_setting, old_setting) \ __sanitizer_syscall_post_impl_timer_settime( \ res, (long)(timer_id), (long)(flags), (long)(new_setting), \ (long)(old_setting)) #define __sanitizer_syscall_pre_timer_delete(timer_id) \ __sanitizer_syscall_pre_impl_timer_delete((long)(timer_id)) #define __sanitizer_syscall_post_timer_delete(res, timer_id) \ __sanitizer_syscall_post_impl_timer_delete(res, (long)(timer_id)) #define __sanitizer_syscall_pre_clock_settime(which_clock, tp) \ __sanitizer_syscall_pre_impl_clock_settime((long)(which_clock), (long)(tp)) #define __sanitizer_syscall_post_clock_settime(res, which_clock, tp) \ __sanitizer_syscall_post_impl_clock_settime(res, (long)(which_clock), \ (long)(tp)) #define __sanitizer_syscall_pre_clock_gettime(which_clock, tp) \ __sanitizer_syscall_pre_impl_clock_gettime((long)(which_clock), (long)(tp)) #define __sanitizer_syscall_post_clock_gettime(res, which_clock, tp) \ __sanitizer_syscall_post_impl_clock_gettime(res, (long)(which_clock), \ (long)(tp)) #define __sanitizer_syscall_pre_clock_adjtime(which_clock, tx) \ __sanitizer_syscall_pre_impl_clock_adjtime((long)(which_clock), (long)(tx)) #define __sanitizer_syscall_post_clock_adjtime(res, which_clock, tx) \ __sanitizer_syscall_post_impl_clock_adjtime(res, (long)(which_clock), \ (long)(tx)) #define __sanitizer_syscall_pre_clock_getres(which_clock, tp) \ __sanitizer_syscall_pre_impl_clock_getres((long)(which_clock), (long)(tp)) #define __sanitizer_syscall_post_clock_getres(res, which_clock, tp) \ __sanitizer_syscall_post_impl_clock_getres(res, (long)(which_clock), \ (long)(tp)) #define __sanitizer_syscall_pre_clock_nanosleep(which_clock, flags, rqtp, \ rmtp) \ __sanitizer_syscall_pre_impl_clock_nanosleep( \ (long)(which_clock), (long)(flags), (long)(rqtp), (long)(rmtp)) #define __sanitizer_syscall_post_clock_nanosleep(res, which_clock, flags, \ rqtp, rmtp) \ __sanitizer_syscall_post_impl_clock_nanosleep( \ res, (long)(which_clock), (long)(flags), (long)(rqtp), (long)(rmtp)) #define __sanitizer_syscall_pre_nice(increment) \ __sanitizer_syscall_pre_impl_nice((long)(increment)) #define __sanitizer_syscall_post_nice(res, increment) \ __sanitizer_syscall_post_impl_nice(res, (long)(increment)) #define __sanitizer_syscall_pre_sched_setscheduler(pid, policy, param) \ __sanitizer_syscall_pre_impl_sched_setscheduler((long)(pid), (long)(policy), \ (long)(param)) #define __sanitizer_syscall_post_sched_setscheduler(res, pid, policy, param) \ __sanitizer_syscall_post_impl_sched_setscheduler( \ res, (long)(pid), (long)(policy), (long)(param)) #define __sanitizer_syscall_pre_sched_setparam(pid, param) \ __sanitizer_syscall_pre_impl_sched_setparam((long)(pid), (long)(param)) #define __sanitizer_syscall_post_sched_setparam(res, pid, param) \ __sanitizer_syscall_post_impl_sched_setparam(res, (long)(pid), (long)(param)) #define __sanitizer_syscall_pre_sched_getscheduler(pid) \ __sanitizer_syscall_pre_impl_sched_getscheduler((long)(pid)) #define __sanitizer_syscall_post_sched_getscheduler(res, pid) \ __sanitizer_syscall_post_impl_sched_getscheduler(res, (long)(pid)) #define __sanitizer_syscall_pre_sched_getparam(pid, param) \ __sanitizer_syscall_pre_impl_sched_getparam((long)(pid), (long)(param)) #define __sanitizer_syscall_post_sched_getparam(res, pid, param) \ __sanitizer_syscall_post_impl_sched_getparam(res, (long)(pid), (long)(param)) #define __sanitizer_syscall_pre_sched_setaffinity(pid, len, user_mask_ptr) \ __sanitizer_syscall_pre_impl_sched_setaffinity((long)(pid), (long)(len), \ (long)(user_mask_ptr)) #define __sanitizer_syscall_post_sched_setaffinity(res, pid, len, \ user_mask_ptr) \ __sanitizer_syscall_post_impl_sched_setaffinity( \ res, (long)(pid), (long)(len), (long)(user_mask_ptr)) #define __sanitizer_syscall_pre_sched_getaffinity(pid, len, user_mask_ptr) \ __sanitizer_syscall_pre_impl_sched_getaffinity((long)(pid), (long)(len), \ (long)(user_mask_ptr)) #define __sanitizer_syscall_post_sched_getaffinity(res, pid, len, \ user_mask_ptr) \ __sanitizer_syscall_post_impl_sched_getaffinity( \ res, (long)(pid), (long)(len), (long)(user_mask_ptr)) #define __sanitizer_syscall_pre_sched_yield() \ __sanitizer_syscall_pre_impl_sched_yield() #define __sanitizer_syscall_post_sched_yield(res) \ __sanitizer_syscall_post_impl_sched_yield(res) #define __sanitizer_syscall_pre_sched_get_priority_max(policy) \ __sanitizer_syscall_pre_impl_sched_get_priority_max((long)(policy)) #define __sanitizer_syscall_post_sched_get_priority_max(res, policy) \ __sanitizer_syscall_post_impl_sched_get_priority_max(res, (long)(policy)) #define __sanitizer_syscall_pre_sched_get_priority_min(policy) \ __sanitizer_syscall_pre_impl_sched_get_priority_min((long)(policy)) #define __sanitizer_syscall_post_sched_get_priority_min(res, policy) \ __sanitizer_syscall_post_impl_sched_get_priority_min(res, (long)(policy)) #define __sanitizer_syscall_pre_sched_rr_get_interval(pid, interval) \ __sanitizer_syscall_pre_impl_sched_rr_get_interval((long)(pid), \ (long)(interval)) #define __sanitizer_syscall_post_sched_rr_get_interval(res, pid, interval) \ __sanitizer_syscall_post_impl_sched_rr_get_interval(res, (long)(pid), \ (long)(interval)) #define __sanitizer_syscall_pre_setpriority(which, who, niceval) \ __sanitizer_syscall_pre_impl_setpriority((long)(which), (long)(who), \ (long)(niceval)) #define __sanitizer_syscall_post_setpriority(res, which, who, niceval) \ __sanitizer_syscall_post_impl_setpriority(res, (long)(which), (long)(who), \ (long)(niceval)) #define __sanitizer_syscall_pre_getpriority(which, who) \ __sanitizer_syscall_pre_impl_getpriority((long)(which), (long)(who)) #define __sanitizer_syscall_post_getpriority(res, which, who) \ __sanitizer_syscall_post_impl_getpriority(res, (long)(which), (long)(who)) #define __sanitizer_syscall_pre_shutdown(arg0, arg1) \ __sanitizer_syscall_pre_impl_shutdown((long)(arg0), (long)(arg1)) #define __sanitizer_syscall_post_shutdown(res, arg0, arg1) \ __sanitizer_syscall_post_impl_shutdown(res, (long)(arg0), (long)(arg1)) #define __sanitizer_syscall_pre_reboot(magic1, magic2, cmd, arg) \ __sanitizer_syscall_pre_impl_reboot((long)(magic1), (long)(magic2), \ (long)(cmd), (long)(arg)) #define __sanitizer_syscall_post_reboot(res, magic1, magic2, cmd, arg) \ __sanitizer_syscall_post_impl_reboot(res, (long)(magic1), (long)(magic2), \ (long)(cmd), (long)(arg)) #define __sanitizer_syscall_pre_restart_syscall() \ __sanitizer_syscall_pre_impl_restart_syscall() #define __sanitizer_syscall_post_restart_syscall(res) \ __sanitizer_syscall_post_impl_restart_syscall(res) #define __sanitizer_syscall_pre_kexec_load(entry, nr_segments, segments, \ flags) \ __sanitizer_syscall_pre_impl_kexec_load((long)(entry), (long)(nr_segments), \ (long)(segments), (long)(flags)) #define __sanitizer_syscall_post_kexec_load(res, entry, nr_segments, segments, \ flags) \ __sanitizer_syscall_post_impl_kexec_load(res, (long)(entry), \ (long)(nr_segments), \ (long)(segments), (long)(flags)) #define __sanitizer_syscall_pre_exit(error_code) \ __sanitizer_syscall_pre_impl_exit((long)(error_code)) #define __sanitizer_syscall_post_exit(res, error_code) \ __sanitizer_syscall_post_impl_exit(res, (long)(error_code)) #define __sanitizer_syscall_pre_exit_group(error_code) \ __sanitizer_syscall_pre_impl_exit_group((long)(error_code)) #define __sanitizer_syscall_post_exit_group(res, error_code) \ __sanitizer_syscall_post_impl_exit_group(res, (long)(error_code)) #define __sanitizer_syscall_pre_wait4(pid, stat_addr, options, ru) \ __sanitizer_syscall_pre_impl_wait4((long)(pid), (long)(stat_addr), \ (long)(options), (long)(ru)) #define __sanitizer_syscall_post_wait4(res, pid, stat_addr, options, ru) \ __sanitizer_syscall_post_impl_wait4(res, (long)(pid), (long)(stat_addr), \ (long)(options), (long)(ru)) #define __sanitizer_syscall_pre_waitid(which, pid, infop, options, ru) \ __sanitizer_syscall_pre_impl_waitid( \ (long)(which), (long)(pid), (long)(infop), (long)(options), (long)(ru)) #define __sanitizer_syscall_post_waitid(res, which, pid, infop, options, ru) \ __sanitizer_syscall_post_impl_waitid(res, (long)(which), (long)(pid), \ (long)(infop), (long)(options), \ (long)(ru)) #define __sanitizer_syscall_pre_waitpid(pid, stat_addr, options) \ __sanitizer_syscall_pre_impl_waitpid((long)(pid), (long)(stat_addr), \ (long)(options)) #define __sanitizer_syscall_post_waitpid(res, pid, stat_addr, options) \ __sanitizer_syscall_post_impl_waitpid(res, (long)(pid), (long)(stat_addr), \ (long)(options)) #define __sanitizer_syscall_pre_set_tid_address(tidptr) \ __sanitizer_syscall_pre_impl_set_tid_address((long)(tidptr)) #define __sanitizer_syscall_post_set_tid_address(res, tidptr) \ __sanitizer_syscall_post_impl_set_tid_address(res, (long)(tidptr)) #define __sanitizer_syscall_pre_init_module(umod, len, uargs) \ __sanitizer_syscall_pre_impl_init_module((long)(umod), (long)(len), \ (long)(uargs)) #define __sanitizer_syscall_post_init_module(res, umod, len, uargs) \ __sanitizer_syscall_post_impl_init_module(res, (long)(umod), (long)(len), \ (long)(uargs)) #define __sanitizer_syscall_pre_delete_module(name_user, flags) \ __sanitizer_syscall_pre_impl_delete_module((long)(name_user), (long)(flags)) #define __sanitizer_syscall_post_delete_module(res, name_user, flags) \ __sanitizer_syscall_post_impl_delete_module(res, (long)(name_user), \ (long)(flags)) #define __sanitizer_syscall_pre_rt_sigprocmask(how, set, oset, sigsetsize) \ __sanitizer_syscall_pre_impl_rt_sigprocmask( \ (long)(how), (long)(set), (long)(oset), (long)(sigsetsize)) #define __sanitizer_syscall_post_rt_sigprocmask(res, how, set, oset, \ sigsetsize) \ __sanitizer_syscall_post_impl_rt_sigprocmask( \ res, (long)(how), (long)(set), (long)(oset), (long)(sigsetsize)) #define __sanitizer_syscall_pre_rt_sigpending(set, sigsetsize) \ __sanitizer_syscall_pre_impl_rt_sigpending((long)(set), (long)(sigsetsize)) #define __sanitizer_syscall_post_rt_sigpending(res, set, sigsetsize) \ __sanitizer_syscall_post_impl_rt_sigpending(res, (long)(set), \ (long)(sigsetsize)) #define __sanitizer_syscall_pre_rt_sigtimedwait(uthese, uinfo, uts, \ sigsetsize) \ __sanitizer_syscall_pre_impl_rt_sigtimedwait( \ (long)(uthese), (long)(uinfo), (long)(uts), (long)(sigsetsize)) #define __sanitizer_syscall_post_rt_sigtimedwait(res, uthese, uinfo, uts, \ sigsetsize) \ __sanitizer_syscall_post_impl_rt_sigtimedwait( \ res, (long)(uthese), (long)(uinfo), (long)(uts), (long)(sigsetsize)) #define __sanitizer_syscall_pre_rt_tgsigqueueinfo(tgid, pid, sig, uinfo) \ __sanitizer_syscall_pre_impl_rt_tgsigqueueinfo((long)(tgid), (long)(pid), \ (long)(sig), (long)(uinfo)) #define __sanitizer_syscall_post_rt_tgsigqueueinfo(res, tgid, pid, sig, uinfo) \ __sanitizer_syscall_post_impl_rt_tgsigqueueinfo( \ res, (long)(tgid), (long)(pid), (long)(sig), (long)(uinfo)) #define __sanitizer_syscall_pre_kill(pid, sig) \ __sanitizer_syscall_pre_impl_kill((long)(pid), (long)(sig)) #define __sanitizer_syscall_post_kill(res, pid, sig) \ __sanitizer_syscall_post_impl_kill(res, (long)(pid), (long)(sig)) #define __sanitizer_syscall_pre_tgkill(tgid, pid, sig) \ __sanitizer_syscall_pre_impl_tgkill((long)(tgid), (long)(pid), (long)(sig)) #define __sanitizer_syscall_post_tgkill(res, tgid, pid, sig) \ __sanitizer_syscall_post_impl_tgkill(res, (long)(tgid), (long)(pid), \ (long)(sig)) #define __sanitizer_syscall_pre_tkill(pid, sig) \ __sanitizer_syscall_pre_impl_tkill((long)(pid), (long)(sig)) #define __sanitizer_syscall_post_tkill(res, pid, sig) \ __sanitizer_syscall_post_impl_tkill(res, (long)(pid), (long)(sig)) #define __sanitizer_syscall_pre_rt_sigqueueinfo(pid, sig, uinfo) \ __sanitizer_syscall_pre_impl_rt_sigqueueinfo((long)(pid), (long)(sig), \ (long)(uinfo)) #define __sanitizer_syscall_post_rt_sigqueueinfo(res, pid, sig, uinfo) \ __sanitizer_syscall_post_impl_rt_sigqueueinfo(res, (long)(pid), (long)(sig), \ (long)(uinfo)) #define __sanitizer_syscall_pre_sgetmask() \ __sanitizer_syscall_pre_impl_sgetmask() #define __sanitizer_syscall_post_sgetmask(res) \ __sanitizer_syscall_post_impl_sgetmask(res) #define __sanitizer_syscall_pre_ssetmask(newmask) \ __sanitizer_syscall_pre_impl_ssetmask((long)(newmask)) #define __sanitizer_syscall_post_ssetmask(res, newmask) \ __sanitizer_syscall_post_impl_ssetmask(res, (long)(newmask)) #define __sanitizer_syscall_pre_signal(sig, handler) \ __sanitizer_syscall_pre_impl_signal((long)(sig), (long)(handler)) #define __sanitizer_syscall_post_signal(res, sig, handler) \ __sanitizer_syscall_post_impl_signal(res, (long)(sig), (long)(handler)) #define __sanitizer_syscall_pre_pause() __sanitizer_syscall_pre_impl_pause() #define __sanitizer_syscall_post_pause(res) \ __sanitizer_syscall_post_impl_pause(res) #define __sanitizer_syscall_pre_sync() __sanitizer_syscall_pre_impl_sync() #define __sanitizer_syscall_post_sync(res) \ __sanitizer_syscall_post_impl_sync(res) #define __sanitizer_syscall_pre_fsync(fd) \ __sanitizer_syscall_pre_impl_fsync((long)(fd)) #define __sanitizer_syscall_post_fsync(res, fd) \ __sanitizer_syscall_post_impl_fsync(res, (long)(fd)) #define __sanitizer_syscall_pre_fdatasync(fd) \ __sanitizer_syscall_pre_impl_fdatasync((long)(fd)) #define __sanitizer_syscall_post_fdatasync(res, fd) \ __sanitizer_syscall_post_impl_fdatasync(res, (long)(fd)) #define __sanitizer_syscall_pre_bdflush(func, data) \ __sanitizer_syscall_pre_impl_bdflush((long)(func), (long)(data)) #define __sanitizer_syscall_post_bdflush(res, func, data) \ __sanitizer_syscall_post_impl_bdflush(res, (long)(func), (long)(data)) #define __sanitizer_syscall_pre_mount(dev_name, dir_name, type, flags, data) \ __sanitizer_syscall_pre_impl_mount((long)(dev_name), (long)(dir_name), \ (long)(type), (long)(flags), \ (long)(data)) #define __sanitizer_syscall_post_mount(res, dev_name, dir_name, type, flags, \ data) \ __sanitizer_syscall_post_impl_mount(res, (long)(dev_name), (long)(dir_name), \ (long)(type), (long)(flags), \ (long)(data)) #define __sanitizer_syscall_pre_umount(name, flags) \ __sanitizer_syscall_pre_impl_umount((long)(name), (long)(flags)) #define __sanitizer_syscall_post_umount(res, name, flags) \ __sanitizer_syscall_post_impl_umount(res, (long)(name), (long)(flags)) #define __sanitizer_syscall_pre_oldumount(name) \ __sanitizer_syscall_pre_impl_oldumount((long)(name)) #define __sanitizer_syscall_post_oldumount(res, name) \ __sanitizer_syscall_post_impl_oldumount(res, (long)(name)) #define __sanitizer_syscall_pre_truncate(path, length) \ __sanitizer_syscall_pre_impl_truncate((long)(path), (long)(length)) #define __sanitizer_syscall_post_truncate(res, path, length) \ __sanitizer_syscall_post_impl_truncate(res, (long)(path), (long)(length)) #define __sanitizer_syscall_pre_ftruncate(fd, length) \ __sanitizer_syscall_pre_impl_ftruncate((long)(fd), (long)(length)) #define __sanitizer_syscall_post_ftruncate(res, fd, length) \ __sanitizer_syscall_post_impl_ftruncate(res, (long)(fd), (long)(length)) #define __sanitizer_syscall_pre_stat(filename, statbuf) \ __sanitizer_syscall_pre_impl_stat((long)(filename), (long)(statbuf)) #define __sanitizer_syscall_post_stat(res, filename, statbuf) \ __sanitizer_syscall_post_impl_stat(res, (long)(filename), (long)(statbuf)) #define __sanitizer_syscall_pre_statfs(path, buf) \ __sanitizer_syscall_pre_impl_statfs((long)(path), (long)(buf)) #define __sanitizer_syscall_post_statfs(res, path, buf) \ __sanitizer_syscall_post_impl_statfs(res, (long)(path), (long)(buf)) #define __sanitizer_syscall_pre_statfs64(path, sz, buf) \ __sanitizer_syscall_pre_impl_statfs64((long)(path), (long)(sz), (long)(buf)) #define __sanitizer_syscall_post_statfs64(res, path, sz, buf) \ __sanitizer_syscall_post_impl_statfs64(res, (long)(path), (long)(sz), \ (long)(buf)) #define __sanitizer_syscall_pre_fstatfs(fd, buf) \ __sanitizer_syscall_pre_impl_fstatfs((long)(fd), (long)(buf)) #define __sanitizer_syscall_post_fstatfs(res, fd, buf) \ __sanitizer_syscall_post_impl_fstatfs(res, (long)(fd), (long)(buf)) #define __sanitizer_syscall_pre_fstatfs64(fd, sz, buf) \ __sanitizer_syscall_pre_impl_fstatfs64((long)(fd), (long)(sz), (long)(buf)) #define __sanitizer_syscall_post_fstatfs64(res, fd, sz, buf) \ __sanitizer_syscall_post_impl_fstatfs64(res, (long)(fd), (long)(sz), \ (long)(buf)) #define __sanitizer_syscall_pre_lstat(filename, statbuf) \ __sanitizer_syscall_pre_impl_lstat((long)(filename), (long)(statbuf)) #define __sanitizer_syscall_post_lstat(res, filename, statbuf) \ __sanitizer_syscall_post_impl_lstat(res, (long)(filename), (long)(statbuf)) #define __sanitizer_syscall_pre_fstat(fd, statbuf) \ __sanitizer_syscall_pre_impl_fstat((long)(fd), (long)(statbuf)) #define __sanitizer_syscall_post_fstat(res, fd, statbuf) \ __sanitizer_syscall_post_impl_fstat(res, (long)(fd), (long)(statbuf)) #define __sanitizer_syscall_pre_newstat(filename, statbuf) \ __sanitizer_syscall_pre_impl_newstat((long)(filename), (long)(statbuf)) #define __sanitizer_syscall_post_newstat(res, filename, statbuf) \ __sanitizer_syscall_post_impl_newstat(res, (long)(filename), (long)(statbuf)) #define __sanitizer_syscall_pre_newlstat(filename, statbuf) \ __sanitizer_syscall_pre_impl_newlstat((long)(filename), (long)(statbuf)) #define __sanitizer_syscall_post_newlstat(res, filename, statbuf) \ __sanitizer_syscall_post_impl_newlstat(res, (long)(filename), (long)(statbuf)) #define __sanitizer_syscall_pre_newfstat(fd, statbuf) \ __sanitizer_syscall_pre_impl_newfstat((long)(fd), (long)(statbuf)) #define __sanitizer_syscall_post_newfstat(res, fd, statbuf) \ __sanitizer_syscall_post_impl_newfstat(res, (long)(fd), (long)(statbuf)) #define __sanitizer_syscall_pre_ustat(dev, ubuf) \ __sanitizer_syscall_pre_impl_ustat((long)(dev), (long)(ubuf)) #define __sanitizer_syscall_post_ustat(res, dev, ubuf) \ __sanitizer_syscall_post_impl_ustat(res, (long)(dev), (long)(ubuf)) #define __sanitizer_syscall_pre_stat64(filename, statbuf) \ __sanitizer_syscall_pre_impl_stat64((long)(filename), (long)(statbuf)) #define __sanitizer_syscall_post_stat64(res, filename, statbuf) \ __sanitizer_syscall_post_impl_stat64(res, (long)(filename), (long)(statbuf)) #define __sanitizer_syscall_pre_fstat64(fd, statbuf) \ __sanitizer_syscall_pre_impl_fstat64((long)(fd), (long)(statbuf)) #define __sanitizer_syscall_post_fstat64(res, fd, statbuf) \ __sanitizer_syscall_post_impl_fstat64(res, (long)(fd), (long)(statbuf)) #define __sanitizer_syscall_pre_lstat64(filename, statbuf) \ __sanitizer_syscall_pre_impl_lstat64((long)(filename), (long)(statbuf)) #define __sanitizer_syscall_post_lstat64(res, filename, statbuf) \ __sanitizer_syscall_post_impl_lstat64(res, (long)(filename), (long)(statbuf)) #define __sanitizer_syscall_pre_setxattr(path, name, value, size, flags) \ __sanitizer_syscall_pre_impl_setxattr( \ (long)(path), (long)(name), (long)(value), (long)(size), (long)(flags)) #define __sanitizer_syscall_post_setxattr(res, path, name, value, size, flags) \ __sanitizer_syscall_post_impl_setxattr(res, (long)(path), (long)(name), \ (long)(value), (long)(size), \ (long)(flags)) #define __sanitizer_syscall_pre_lsetxattr(path, name, value, size, flags) \ __sanitizer_syscall_pre_impl_lsetxattr( \ (long)(path), (long)(name), (long)(value), (long)(size), (long)(flags)) #define __sanitizer_syscall_post_lsetxattr(res, path, name, value, size, \ flags) \ __sanitizer_syscall_post_impl_lsetxattr(res, (long)(path), (long)(name), \ (long)(value), (long)(size), \ (long)(flags)) #define __sanitizer_syscall_pre_fsetxattr(fd, name, value, size, flags) \ __sanitizer_syscall_pre_impl_fsetxattr( \ (long)(fd), (long)(name), (long)(value), (long)(size), (long)(flags)) #define __sanitizer_syscall_post_fsetxattr(res, fd, name, value, size, flags) \ __sanitizer_syscall_post_impl_fsetxattr(res, (long)(fd), (long)(name), \ (long)(value), (long)(size), \ (long)(flags)) #define __sanitizer_syscall_pre_getxattr(path, name, value, size) \ __sanitizer_syscall_pre_impl_getxattr((long)(path), (long)(name), \ (long)(value), (long)(size)) #define __sanitizer_syscall_post_getxattr(res, path, name, value, size) \ __sanitizer_syscall_post_impl_getxattr(res, (long)(path), (long)(name), \ (long)(value), (long)(size)) #define __sanitizer_syscall_pre_lgetxattr(path, name, value, size) \ __sanitizer_syscall_pre_impl_lgetxattr((long)(path), (long)(name), \ (long)(value), (long)(size)) #define __sanitizer_syscall_post_lgetxattr(res, path, name, value, size) \ __sanitizer_syscall_post_impl_lgetxattr(res, (long)(path), (long)(name), \ (long)(value), (long)(size)) #define __sanitizer_syscall_pre_fgetxattr(fd, name, value, size) \ __sanitizer_syscall_pre_impl_fgetxattr((long)(fd), (long)(name), \ (long)(value), (long)(size)) #define __sanitizer_syscall_post_fgetxattr(res, fd, name, value, size) \ __sanitizer_syscall_post_impl_fgetxattr(res, (long)(fd), (long)(name), \ (long)(value), (long)(size)) #define __sanitizer_syscall_pre_listxattr(path, list, size) \ __sanitizer_syscall_pre_impl_listxattr((long)(path), (long)(list), \ (long)(size)) #define __sanitizer_syscall_post_listxattr(res, path, list, size) \ __sanitizer_syscall_post_impl_listxattr(res, (long)(path), (long)(list), \ (long)(size)) #define __sanitizer_syscall_pre_llistxattr(path, list, size) \ __sanitizer_syscall_pre_impl_llistxattr((long)(path), (long)(list), \ (long)(size)) #define __sanitizer_syscall_post_llistxattr(res, path, list, size) \ __sanitizer_syscall_post_impl_llistxattr(res, (long)(path), (long)(list), \ (long)(size)) #define __sanitizer_syscall_pre_flistxattr(fd, list, size) \ __sanitizer_syscall_pre_impl_flistxattr((long)(fd), (long)(list), \ (long)(size)) #define __sanitizer_syscall_post_flistxattr(res, fd, list, size) \ __sanitizer_syscall_post_impl_flistxattr(res, (long)(fd), (long)(list), \ (long)(size)) #define __sanitizer_syscall_pre_removexattr(path, name) \ __sanitizer_syscall_pre_impl_removexattr((long)(path), (long)(name)) #define __sanitizer_syscall_post_removexattr(res, path, name) \ __sanitizer_syscall_post_impl_removexattr(res, (long)(path), (long)(name)) #define __sanitizer_syscall_pre_lremovexattr(path, name) \ __sanitizer_syscall_pre_impl_lremovexattr((long)(path), (long)(name)) #define __sanitizer_syscall_post_lremovexattr(res, path, name) \ __sanitizer_syscall_post_impl_lremovexattr(res, (long)(path), (long)(name)) #define __sanitizer_syscall_pre_fremovexattr(fd, name) \ __sanitizer_syscall_pre_impl_fremovexattr((long)(fd), (long)(name)) #define __sanitizer_syscall_post_fremovexattr(res, fd, name) \ __sanitizer_syscall_post_impl_fremovexattr(res, (long)(fd), (long)(name)) #define __sanitizer_syscall_pre_brk(brk) \ __sanitizer_syscall_pre_impl_brk((long)(brk)) #define __sanitizer_syscall_post_brk(res, brk) \ __sanitizer_syscall_post_impl_brk(res, (long)(brk)) #define __sanitizer_syscall_pre_mprotect(start, len, prot) \ __sanitizer_syscall_pre_impl_mprotect((long)(start), (long)(len), \ (long)(prot)) #define __sanitizer_syscall_post_mprotect(res, start, len, prot) \ __sanitizer_syscall_post_impl_mprotect(res, (long)(start), (long)(len), \ (long)(prot)) #define __sanitizer_syscall_pre_mremap(addr, old_len, new_len, flags, \ new_addr) \ __sanitizer_syscall_pre_impl_mremap((long)(addr), (long)(old_len), \ (long)(new_len), (long)(flags), \ (long)(new_addr)) #define __sanitizer_syscall_post_mremap(res, addr, old_len, new_len, flags, \ new_addr) \ __sanitizer_syscall_post_impl_mremap(res, (long)(addr), (long)(old_len), \ (long)(new_len), (long)(flags), \ (long)(new_addr)) #define __sanitizer_syscall_pre_remap_file_pages(start, size, prot, pgoff, \ flags) \ __sanitizer_syscall_pre_impl_remap_file_pages( \ (long)(start), (long)(size), (long)(prot), (long)(pgoff), (long)(flags)) #define __sanitizer_syscall_post_remap_file_pages(res, start, size, prot, \ pgoff, flags) \ __sanitizer_syscall_post_impl_remap_file_pages(res, (long)(start), \ (long)(size), (long)(prot), \ (long)(pgoff), (long)(flags)) #define __sanitizer_syscall_pre_msync(start, len, flags) \ __sanitizer_syscall_pre_impl_msync((long)(start), (long)(len), (long)(flags)) #define __sanitizer_syscall_post_msync(res, start, len, flags) \ __sanitizer_syscall_post_impl_msync(res, (long)(start), (long)(len), \ (long)(flags)) #define __sanitizer_syscall_pre_munmap(addr, len) \ __sanitizer_syscall_pre_impl_munmap((long)(addr), (long)(len)) #define __sanitizer_syscall_post_munmap(res, addr, len) \ __sanitizer_syscall_post_impl_munmap(res, (long)(addr), (long)(len)) #define __sanitizer_syscall_pre_mlock(start, len) \ __sanitizer_syscall_pre_impl_mlock((long)(start), (long)(len)) #define __sanitizer_syscall_post_mlock(res, start, len) \ __sanitizer_syscall_post_impl_mlock(res, (long)(start), (long)(len)) #define __sanitizer_syscall_pre_munlock(start, len) \ __sanitizer_syscall_pre_impl_munlock((long)(start), (long)(len)) #define __sanitizer_syscall_post_munlock(res, start, len) \ __sanitizer_syscall_post_impl_munlock(res, (long)(start), (long)(len)) #define __sanitizer_syscall_pre_mlockall(flags) \ __sanitizer_syscall_pre_impl_mlockall((long)(flags)) #define __sanitizer_syscall_post_mlockall(res, flags) \ __sanitizer_syscall_post_impl_mlockall(res, (long)(flags)) #define __sanitizer_syscall_pre_munlockall() \ __sanitizer_syscall_pre_impl_munlockall() #define __sanitizer_syscall_post_munlockall(res) \ __sanitizer_syscall_post_impl_munlockall(res) #define __sanitizer_syscall_pre_madvise(start, len, behavior) \ __sanitizer_syscall_pre_impl_madvise((long)(start), (long)(len), \ (long)(behavior)) #define __sanitizer_syscall_post_madvise(res, start, len, behavior) \ __sanitizer_syscall_post_impl_madvise(res, (long)(start), (long)(len), \ (long)(behavior)) #define __sanitizer_syscall_pre_mincore(start, len, vec) \ __sanitizer_syscall_pre_impl_mincore((long)(start), (long)(len), (long)(vec)) #define __sanitizer_syscall_post_mincore(res, start, len, vec) \ __sanitizer_syscall_post_impl_mincore(res, (long)(start), (long)(len), \ (long)(vec)) #define __sanitizer_syscall_pre_pivot_root(new_root, put_old) \ __sanitizer_syscall_pre_impl_pivot_root((long)(new_root), (long)(put_old)) #define __sanitizer_syscall_post_pivot_root(res, new_root, put_old) \ __sanitizer_syscall_post_impl_pivot_root(res, (long)(new_root), \ (long)(put_old)) #define __sanitizer_syscall_pre_chroot(filename) \ __sanitizer_syscall_pre_impl_chroot((long)(filename)) #define __sanitizer_syscall_post_chroot(res, filename) \ __sanitizer_syscall_post_impl_chroot(res, (long)(filename)) #define __sanitizer_syscall_pre_mknod(filename, mode, dev) \ __sanitizer_syscall_pre_impl_mknod((long)(filename), (long)(mode), \ (long)(dev)) #define __sanitizer_syscall_post_mknod(res, filename, mode, dev) \ __sanitizer_syscall_post_impl_mknod(res, (long)(filename), (long)(mode), \ (long)(dev)) #define __sanitizer_syscall_pre_link(oldname, newname) \ __sanitizer_syscall_pre_impl_link((long)(oldname), (long)(newname)) #define __sanitizer_syscall_post_link(res, oldname, newname) \ __sanitizer_syscall_post_impl_link(res, (long)(oldname), (long)(newname)) #define __sanitizer_syscall_pre_symlink(old, new_) \ __sanitizer_syscall_pre_impl_symlink((long)(old), (long)(new_)) #define __sanitizer_syscall_post_symlink(res, old, new_) \ __sanitizer_syscall_post_impl_symlink(res, (long)(old), (long)(new_)) #define __sanitizer_syscall_pre_unlink(pathname) \ __sanitizer_syscall_pre_impl_unlink((long)(pathname)) #define __sanitizer_syscall_post_unlink(res, pathname) \ __sanitizer_syscall_post_impl_unlink(res, (long)(pathname)) #define __sanitizer_syscall_pre_rename(oldname, newname) \ __sanitizer_syscall_pre_impl_rename((long)(oldname), (long)(newname)) #define __sanitizer_syscall_post_rename(res, oldname, newname) \ __sanitizer_syscall_post_impl_rename(res, (long)(oldname), (long)(newname)) #define __sanitizer_syscall_pre_chmod(filename, mode) \ __sanitizer_syscall_pre_impl_chmod((long)(filename), (long)(mode)) #define __sanitizer_syscall_post_chmod(res, filename, mode) \ __sanitizer_syscall_post_impl_chmod(res, (long)(filename), (long)(mode)) #define __sanitizer_syscall_pre_fchmod(fd, mode) \ __sanitizer_syscall_pre_impl_fchmod((long)(fd), (long)(mode)) #define __sanitizer_syscall_post_fchmod(res, fd, mode) \ __sanitizer_syscall_post_impl_fchmod(res, (long)(fd), (long)(mode)) #define __sanitizer_syscall_pre_fcntl(fd, cmd, arg) \ __sanitizer_syscall_pre_impl_fcntl((long)(fd), (long)(cmd), (long)(arg)) #define __sanitizer_syscall_post_fcntl(res, fd, cmd, arg) \ __sanitizer_syscall_post_impl_fcntl(res, (long)(fd), (long)(cmd), (long)(arg)) #define __sanitizer_syscall_pre_fcntl64(fd, cmd, arg) \ __sanitizer_syscall_pre_impl_fcntl64((long)(fd), (long)(cmd), (long)(arg)) #define __sanitizer_syscall_post_fcntl64(res, fd, cmd, arg) \ __sanitizer_syscall_post_impl_fcntl64(res, (long)(fd), (long)(cmd), \ (long)(arg)) #define __sanitizer_syscall_pre_pipe(fildes) \ __sanitizer_syscall_pre_impl_pipe((long)(fildes)) #define __sanitizer_syscall_post_pipe(res, fildes) \ __sanitizer_syscall_post_impl_pipe(res, (long)(fildes)) #define __sanitizer_syscall_pre_pipe2(fildes, flags) \ __sanitizer_syscall_pre_impl_pipe2((long)(fildes), (long)(flags)) #define __sanitizer_syscall_post_pipe2(res, fildes, flags) \ __sanitizer_syscall_post_impl_pipe2(res, (long)(fildes), (long)(flags)) #define __sanitizer_syscall_pre_dup(fildes) \ __sanitizer_syscall_pre_impl_dup((long)(fildes)) #define __sanitizer_syscall_post_dup(res, fildes) \ __sanitizer_syscall_post_impl_dup(res, (long)(fildes)) #define __sanitizer_syscall_pre_dup2(oldfd, newfd) \ __sanitizer_syscall_pre_impl_dup2((long)(oldfd), (long)(newfd)) #define __sanitizer_syscall_post_dup2(res, oldfd, newfd) \ __sanitizer_syscall_post_impl_dup2(res, (long)(oldfd), (long)(newfd)) #define __sanitizer_syscall_pre_dup3(oldfd, newfd, flags) \ __sanitizer_syscall_pre_impl_dup3((long)(oldfd), (long)(newfd), (long)(flags)) #define __sanitizer_syscall_post_dup3(res, oldfd, newfd, flags) \ __sanitizer_syscall_post_impl_dup3(res, (long)(oldfd), (long)(newfd), \ (long)(flags)) #define __sanitizer_syscall_pre_ioperm(from, num, on) \ __sanitizer_syscall_pre_impl_ioperm((long)(from), (long)(num), (long)(on)) #define __sanitizer_syscall_post_ioperm(res, from, num, on) \ __sanitizer_syscall_post_impl_ioperm(res, (long)(from), (long)(num), \ (long)(on)) #define __sanitizer_syscall_pre_ioctl(fd, cmd, arg) \ __sanitizer_syscall_pre_impl_ioctl((long)(fd), (long)(cmd), (long)(arg)) #define __sanitizer_syscall_post_ioctl(res, fd, cmd, arg) \ __sanitizer_syscall_post_impl_ioctl(res, (long)(fd), (long)(cmd), (long)(arg)) #define __sanitizer_syscall_pre_flock(fd, cmd) \ __sanitizer_syscall_pre_impl_flock((long)(fd), (long)(cmd)) #define __sanitizer_syscall_post_flock(res, fd, cmd) \ __sanitizer_syscall_post_impl_flock(res, (long)(fd), (long)(cmd)) #define __sanitizer_syscall_pre_io_setup(nr_reqs, ctx) \ __sanitizer_syscall_pre_impl_io_setup((long)(nr_reqs), (long)(ctx)) #define __sanitizer_syscall_post_io_setup(res, nr_reqs, ctx) \ __sanitizer_syscall_post_impl_io_setup(res, (long)(nr_reqs), (long)(ctx)) #define __sanitizer_syscall_pre_io_destroy(ctx) \ __sanitizer_syscall_pre_impl_io_destroy((long)(ctx)) #define __sanitizer_syscall_post_io_destroy(res, ctx) \ __sanitizer_syscall_post_impl_io_destroy(res, (long)(ctx)) #define __sanitizer_syscall_pre_io_getevents(ctx_id, min_nr, nr, events, \ timeout) \ __sanitizer_syscall_pre_impl_io_getevents((long)(ctx_id), (long)(min_nr), \ (long)(nr), (long)(events), \ (long)(timeout)) #define __sanitizer_syscall_post_io_getevents(res, ctx_id, min_nr, nr, events, \ timeout) \ __sanitizer_syscall_post_impl_io_getevents(res, (long)(ctx_id), \ (long)(min_nr), (long)(nr), \ (long)(events), (long)(timeout)) #define __sanitizer_syscall_pre_io_submit(ctx_id, arg1, arg2) \ __sanitizer_syscall_pre_impl_io_submit((long)(ctx_id), (long)(arg1), \ (long)(arg2)) #define __sanitizer_syscall_post_io_submit(res, ctx_id, arg1, arg2) \ __sanitizer_syscall_post_impl_io_submit(res, (long)(ctx_id), (long)(arg1), \ (long)(arg2)) #define __sanitizer_syscall_pre_io_cancel(ctx_id, iocb, result) \ __sanitizer_syscall_pre_impl_io_cancel((long)(ctx_id), (long)(iocb), \ (long)(result)) #define __sanitizer_syscall_post_io_cancel(res, ctx_id, iocb, result) \ __sanitizer_syscall_post_impl_io_cancel(res, (long)(ctx_id), (long)(iocb), \ (long)(result)) #define __sanitizer_syscall_pre_sendfile(out_fd, in_fd, offset, count) \ __sanitizer_syscall_pre_impl_sendfile((long)(out_fd), (long)(in_fd), \ (long)(offset), (long)(count)) #define __sanitizer_syscall_post_sendfile(res, out_fd, in_fd, offset, count) \ __sanitizer_syscall_post_impl_sendfile(res, (long)(out_fd), (long)(in_fd), \ (long)(offset), (long)(count)) #define __sanitizer_syscall_pre_sendfile64(out_fd, in_fd, offset, count) \ __sanitizer_syscall_pre_impl_sendfile64((long)(out_fd), (long)(in_fd), \ (long)(offset), (long)(count)) #define __sanitizer_syscall_post_sendfile64(res, out_fd, in_fd, offset, count) \ __sanitizer_syscall_post_impl_sendfile64(res, (long)(out_fd), (long)(in_fd), \ (long)(offset), (long)(count)) #define __sanitizer_syscall_pre_readlink(path, buf, bufsiz) \ __sanitizer_syscall_pre_impl_readlink((long)(path), (long)(buf), \ (long)(bufsiz)) #define __sanitizer_syscall_post_readlink(res, path, buf, bufsiz) \ __sanitizer_syscall_post_impl_readlink(res, (long)(path), (long)(buf), \ (long)(bufsiz)) #define __sanitizer_syscall_pre_creat(pathname, mode) \ __sanitizer_syscall_pre_impl_creat((long)(pathname), (long)(mode)) #define __sanitizer_syscall_post_creat(res, pathname, mode) \ __sanitizer_syscall_post_impl_creat(res, (long)(pathname), (long)(mode)) #define __sanitizer_syscall_pre_open(filename, flags, mode) \ __sanitizer_syscall_pre_impl_open((long)(filename), (long)(flags), \ (long)(mode)) #define __sanitizer_syscall_post_open(res, filename, flags, mode) \ __sanitizer_syscall_post_impl_open(res, (long)(filename), (long)(flags), \ (long)(mode)) #define __sanitizer_syscall_pre_close(fd) \ __sanitizer_syscall_pre_impl_close((long)(fd)) #define __sanitizer_syscall_post_close(res, fd) \ __sanitizer_syscall_post_impl_close(res, (long)(fd)) #define __sanitizer_syscall_pre_access(filename, mode) \ __sanitizer_syscall_pre_impl_access((long)(filename), (long)(mode)) #define __sanitizer_syscall_post_access(res, filename, mode) \ __sanitizer_syscall_post_impl_access(res, (long)(filename), (long)(mode)) #define __sanitizer_syscall_pre_vhangup() __sanitizer_syscall_pre_impl_vhangup() #define __sanitizer_syscall_post_vhangup(res) \ __sanitizer_syscall_post_impl_vhangup(res) #define __sanitizer_syscall_pre_chown(filename, user, group) \ __sanitizer_syscall_pre_impl_chown((long)(filename), (long)(user), \ (long)(group)) #define __sanitizer_syscall_post_chown(res, filename, user, group) \ __sanitizer_syscall_post_impl_chown(res, (long)(filename), (long)(user), \ (long)(group)) #define __sanitizer_syscall_pre_lchown(filename, user, group) \ __sanitizer_syscall_pre_impl_lchown((long)(filename), (long)(user), \ (long)(group)) #define __sanitizer_syscall_post_lchown(res, filename, user, group) \ __sanitizer_syscall_post_impl_lchown(res, (long)(filename), (long)(user), \ (long)(group)) #define __sanitizer_syscall_pre_fchown(fd, user, group) \ __sanitizer_syscall_pre_impl_fchown((long)(fd), (long)(user), (long)(group)) #define __sanitizer_syscall_post_fchown(res, fd, user, group) \ __sanitizer_syscall_post_impl_fchown(res, (long)(fd), (long)(user), \ (long)(group)) #define __sanitizer_syscall_pre_chown16(filename, user, group) \ __sanitizer_syscall_pre_impl_chown16((long)(filename), (long)user, \ (long)group) #define __sanitizer_syscall_post_chown16(res, filename, user, group) \ __sanitizer_syscall_post_impl_chown16(res, (long)(filename), (long)user, \ (long)group) #define __sanitizer_syscall_pre_lchown16(filename, user, group) \ __sanitizer_syscall_pre_impl_lchown16((long)(filename), (long)user, \ (long)group) #define __sanitizer_syscall_post_lchown16(res, filename, user, group) \ __sanitizer_syscall_post_impl_lchown16(res, (long)(filename), (long)user, \ (long)group) #define __sanitizer_syscall_pre_fchown16(fd, user, group) \ __sanitizer_syscall_pre_impl_fchown16((long)(fd), (long)user, (long)group) #define __sanitizer_syscall_post_fchown16(res, fd, user, group) \ __sanitizer_syscall_post_impl_fchown16(res, (long)(fd), (long)user, \ (long)group) #define __sanitizer_syscall_pre_setregid16(rgid, egid) \ __sanitizer_syscall_pre_impl_setregid16((long)rgid, (long)egid) #define __sanitizer_syscall_post_setregid16(res, rgid, egid) \ __sanitizer_syscall_post_impl_setregid16(res, (long)rgid, (long)egid) #define __sanitizer_syscall_pre_setgid16(gid) \ __sanitizer_syscall_pre_impl_setgid16((long)gid) #define __sanitizer_syscall_post_setgid16(res, gid) \ __sanitizer_syscall_post_impl_setgid16(res, (long)gid) #define __sanitizer_syscall_pre_setreuid16(ruid, euid) \ __sanitizer_syscall_pre_impl_setreuid16((long)ruid, (long)euid) #define __sanitizer_syscall_post_setreuid16(res, ruid, euid) \ __sanitizer_syscall_post_impl_setreuid16(res, (long)ruid, (long)euid) #define __sanitizer_syscall_pre_setuid16(uid) \ __sanitizer_syscall_pre_impl_setuid16((long)uid) #define __sanitizer_syscall_post_setuid16(res, uid) \ __sanitizer_syscall_post_impl_setuid16(res, (long)uid) #define __sanitizer_syscall_pre_setresuid16(ruid, euid, suid) \ __sanitizer_syscall_pre_impl_setresuid16((long)ruid, (long)euid, (long)suid) #define __sanitizer_syscall_post_setresuid16(res, ruid, euid, suid) \ __sanitizer_syscall_post_impl_setresuid16(res, (long)ruid, (long)euid, \ (long)suid) #define __sanitizer_syscall_pre_getresuid16(ruid, euid, suid) \ __sanitizer_syscall_pre_impl_getresuid16((long)(ruid), (long)(euid), \ (long)(suid)) #define __sanitizer_syscall_post_getresuid16(res, ruid, euid, suid) \ __sanitizer_syscall_post_impl_getresuid16(res, (long)(ruid), (long)(euid), \ (long)(suid)) #define __sanitizer_syscall_pre_setresgid16(rgid, egid, sgid) \ __sanitizer_syscall_pre_impl_setresgid16((long)rgid, (long)egid, (long)sgid) #define __sanitizer_syscall_post_setresgid16(res, rgid, egid, sgid) \ __sanitizer_syscall_post_impl_setresgid16(res, (long)rgid, (long)egid, \ (long)sgid) #define __sanitizer_syscall_pre_getresgid16(rgid, egid, sgid) \ __sanitizer_syscall_pre_impl_getresgid16((long)(rgid), (long)(egid), \ (long)(sgid)) #define __sanitizer_syscall_post_getresgid16(res, rgid, egid, sgid) \ __sanitizer_syscall_post_impl_getresgid16(res, (long)(rgid), (long)(egid), \ (long)(sgid)) #define __sanitizer_syscall_pre_setfsuid16(uid) \ __sanitizer_syscall_pre_impl_setfsuid16((long)uid) #define __sanitizer_syscall_post_setfsuid16(res, uid) \ __sanitizer_syscall_post_impl_setfsuid16(res, (long)uid) #define __sanitizer_syscall_pre_setfsgid16(gid) \ __sanitizer_syscall_pre_impl_setfsgid16((long)gid) #define __sanitizer_syscall_post_setfsgid16(res, gid) \ __sanitizer_syscall_post_impl_setfsgid16(res, (long)gid) #define __sanitizer_syscall_pre_getgroups16(gidsetsize, grouplist) \ __sanitizer_syscall_pre_impl_getgroups16((long)(gidsetsize), \ (long)(grouplist)) #define __sanitizer_syscall_post_getgroups16(res, gidsetsize, grouplist) \ __sanitizer_syscall_post_impl_getgroups16(res, (long)(gidsetsize), \ (long)(grouplist)) #define __sanitizer_syscall_pre_setgroups16(gidsetsize, grouplist) \ __sanitizer_syscall_pre_impl_setgroups16((long)(gidsetsize), \ (long)(grouplist)) #define __sanitizer_syscall_post_setgroups16(res, gidsetsize, grouplist) \ __sanitizer_syscall_post_impl_setgroups16(res, (long)(gidsetsize), \ (long)(grouplist)) #define __sanitizer_syscall_pre_getuid16() \ __sanitizer_syscall_pre_impl_getuid16() #define __sanitizer_syscall_post_getuid16(res) \ __sanitizer_syscall_post_impl_getuid16(res) #define __sanitizer_syscall_pre_geteuid16() \ __sanitizer_syscall_pre_impl_geteuid16() #define __sanitizer_syscall_post_geteuid16(res) \ __sanitizer_syscall_post_impl_geteuid16(res) #define __sanitizer_syscall_pre_getgid16() \ __sanitizer_syscall_pre_impl_getgid16() #define __sanitizer_syscall_post_getgid16(res) \ __sanitizer_syscall_post_impl_getgid16(res) #define __sanitizer_syscall_pre_getegid16() \ __sanitizer_syscall_pre_impl_getegid16() #define __sanitizer_syscall_post_getegid16(res) \ __sanitizer_syscall_post_impl_getegid16(res) #define __sanitizer_syscall_pre_utime(filename, times) \ __sanitizer_syscall_pre_impl_utime((long)(filename), (long)(times)) #define __sanitizer_syscall_post_utime(res, filename, times) \ __sanitizer_syscall_post_impl_utime(res, (long)(filename), (long)(times)) #define __sanitizer_syscall_pre_utimes(filename, utimes) \ __sanitizer_syscall_pre_impl_utimes((long)(filename), (long)(utimes)) #define __sanitizer_syscall_post_utimes(res, filename, utimes) \ __sanitizer_syscall_post_impl_utimes(res, (long)(filename), (long)(utimes)) #define __sanitizer_syscall_pre_lseek(fd, offset, origin) \ __sanitizer_syscall_pre_impl_lseek((long)(fd), (long)(offset), (long)(origin)) #define __sanitizer_syscall_post_lseek(res, fd, offset, origin) \ __sanitizer_syscall_post_impl_lseek(res, (long)(fd), (long)(offset), \ (long)(origin)) #define __sanitizer_syscall_pre_llseek(fd, offset_high, offset_low, result, \ origin) \ __sanitizer_syscall_pre_impl_llseek((long)(fd), (long)(offset_high), \ (long)(offset_low), (long)(result), \ (long)(origin)) #define __sanitizer_syscall_post_llseek(res, fd, offset_high, offset_low, \ result, origin) \ __sanitizer_syscall_post_impl_llseek(res, (long)(fd), (long)(offset_high), \ (long)(offset_low), (long)(result), \ (long)(origin)) #define __sanitizer_syscall_pre_read(fd, buf, count) \ __sanitizer_syscall_pre_impl_read((long)(fd), (long)(buf), (long)(count)) #define __sanitizer_syscall_post_read(res, fd, buf, count) \ __sanitizer_syscall_post_impl_read(res, (long)(fd), (long)(buf), \ (long)(count)) #define __sanitizer_syscall_pre_readv(fd, vec, vlen) \ __sanitizer_syscall_pre_impl_readv((long)(fd), (long)(vec), (long)(vlen)) #define __sanitizer_syscall_post_readv(res, fd, vec, vlen) \ __sanitizer_syscall_post_impl_readv(res, (long)(fd), (long)(vec), \ (long)(vlen)) #define __sanitizer_syscall_pre_write(fd, buf, count) \ __sanitizer_syscall_pre_impl_write((long)(fd), (long)(buf), (long)(count)) #define __sanitizer_syscall_post_write(res, fd, buf, count) \ __sanitizer_syscall_post_impl_write(res, (long)(fd), (long)(buf), \ (long)(count)) #define __sanitizer_syscall_pre_writev(fd, vec, vlen) \ __sanitizer_syscall_pre_impl_writev((long)(fd), (long)(vec), (long)(vlen)) #define __sanitizer_syscall_post_writev(res, fd, vec, vlen) \ __sanitizer_syscall_post_impl_writev(res, (long)(fd), (long)(vec), \ (long)(vlen)) #ifdef _LP64 #define __sanitizer_syscall_pre_pread64(fd, buf, count, pos) \ __sanitizer_syscall_pre_impl_pread64((long)(fd), (long)(buf), (long)(count), \ (long)(pos)) #define __sanitizer_syscall_post_pread64(res, fd, buf, count, pos) \ __sanitizer_syscall_post_impl_pread64(res, (long)(fd), (long)(buf), \ (long)(count), (long)(pos)) #define __sanitizer_syscall_pre_pwrite64(fd, buf, count, pos) \ __sanitizer_syscall_pre_impl_pwrite64((long)(fd), (long)(buf), \ (long)(count), (long)(pos)) #define __sanitizer_syscall_post_pwrite64(res, fd, buf, count, pos) \ __sanitizer_syscall_post_impl_pwrite64(res, (long)(fd), (long)(buf), \ (long)(count), (long)(pos)) #else #define __sanitizer_syscall_pre_pread64(fd, buf, count, pos0, pos1) \ __sanitizer_syscall_pre_impl_pread64((long)(fd), (long)(buf), (long)(count), \ (long)(pos0), (long)(pos1)) #define __sanitizer_syscall_post_pread64(res, fd, buf, count, pos0, pos1) \ __sanitizer_syscall_post_impl_pread64( \ res, (long)(fd), (long)(buf), (long)(count), (long)(pos0), (long)(pos1)) #define __sanitizer_syscall_pre_pwrite64(fd, buf, count, pos0, pos1) \ __sanitizer_syscall_pre_impl_pwrite64( \ (long)(fd), (long)(buf), (long)(count), (long)(pos0), (long)(pos1)) #define __sanitizer_syscall_post_pwrite64(res, fd, buf, count, pos0, pos1) \ __sanitizer_syscall_post_impl_pwrite64( \ res, (long)(fd), (long)(buf), (long)(count), (long)(pos0), (long)(pos1)) #endif #define __sanitizer_syscall_pre_preadv(fd, vec, vlen, pos_l, pos_h) \ __sanitizer_syscall_pre_impl_preadv((long)(fd), (long)(vec), (long)(vlen), \ (long)(pos_l), (long)(pos_h)) #define __sanitizer_syscall_post_preadv(res, fd, vec, vlen, pos_l, pos_h) \ __sanitizer_syscall_post_impl_preadv(res, (long)(fd), (long)(vec), \ (long)(vlen), (long)(pos_l), \ (long)(pos_h)) #define __sanitizer_syscall_pre_pwritev(fd, vec, vlen, pos_l, pos_h) \ __sanitizer_syscall_pre_impl_pwritev((long)(fd), (long)(vec), (long)(vlen), \ (long)(pos_l), (long)(pos_h)) #define __sanitizer_syscall_post_pwritev(res, fd, vec, vlen, pos_l, pos_h) \ __sanitizer_syscall_post_impl_pwritev(res, (long)(fd), (long)(vec), \ (long)(vlen), (long)(pos_l), \ (long)(pos_h)) #define __sanitizer_syscall_pre_getcwd(buf, size) \ __sanitizer_syscall_pre_impl_getcwd((long)(buf), (long)(size)) #define __sanitizer_syscall_post_getcwd(res, buf, size) \ __sanitizer_syscall_post_impl_getcwd(res, (long)(buf), (long)(size)) #define __sanitizer_syscall_pre_mkdir(pathname, mode) \ __sanitizer_syscall_pre_impl_mkdir((long)(pathname), (long)(mode)) #define __sanitizer_syscall_post_mkdir(res, pathname, mode) \ __sanitizer_syscall_post_impl_mkdir(res, (long)(pathname), (long)(mode)) #define __sanitizer_syscall_pre_chdir(filename) \ __sanitizer_syscall_pre_impl_chdir((long)(filename)) #define __sanitizer_syscall_post_chdir(res, filename) \ __sanitizer_syscall_post_impl_chdir(res, (long)(filename)) #define __sanitizer_syscall_pre_fchdir(fd) \ __sanitizer_syscall_pre_impl_fchdir((long)(fd)) #define __sanitizer_syscall_post_fchdir(res, fd) \ __sanitizer_syscall_post_impl_fchdir(res, (long)(fd)) #define __sanitizer_syscall_pre_rmdir(pathname) \ __sanitizer_syscall_pre_impl_rmdir((long)(pathname)) #define __sanitizer_syscall_post_rmdir(res, pathname) \ __sanitizer_syscall_post_impl_rmdir(res, (long)(pathname)) #define __sanitizer_syscall_pre_lookup_dcookie(cookie64, buf, len) \ __sanitizer_syscall_pre_impl_lookup_dcookie((long)(cookie64), (long)(buf), \ (long)(len)) #define __sanitizer_syscall_post_lookup_dcookie(res, cookie64, buf, len) \ __sanitizer_syscall_post_impl_lookup_dcookie(res, (long)(cookie64), \ (long)(buf), (long)(len)) #define __sanitizer_syscall_pre_quotactl(cmd, special, id, addr) \ __sanitizer_syscall_pre_impl_quotactl((long)(cmd), (long)(special), \ (long)(id), (long)(addr)) #define __sanitizer_syscall_post_quotactl(res, cmd, special, id, addr) \ __sanitizer_syscall_post_impl_quotactl(res, (long)(cmd), (long)(special), \ (long)(id), (long)(addr)) #define __sanitizer_syscall_pre_getdents(fd, dirent, count) \ __sanitizer_syscall_pre_impl_getdents((long)(fd), (long)(dirent), \ (long)(count)) #define __sanitizer_syscall_post_getdents(res, fd, dirent, count) \ __sanitizer_syscall_post_impl_getdents(res, (long)(fd), (long)(dirent), \ (long)(count)) #define __sanitizer_syscall_pre_getdents64(fd, dirent, count) \ __sanitizer_syscall_pre_impl_getdents64((long)(fd), (long)(dirent), \ (long)(count)) #define __sanitizer_syscall_post_getdents64(res, fd, dirent, count) \ __sanitizer_syscall_post_impl_getdents64(res, (long)(fd), (long)(dirent), \ (long)(count)) #define __sanitizer_syscall_pre_setsockopt(fd, level, optname, optval, optlen) \ __sanitizer_syscall_pre_impl_setsockopt((long)(fd), (long)(level), \ (long)(optname), (long)(optval), \ (long)(optlen)) #define __sanitizer_syscall_post_setsockopt(res, fd, level, optname, optval, \ optlen) \ __sanitizer_syscall_post_impl_setsockopt(res, (long)(fd), (long)(level), \ (long)(optname), (long)(optval), \ (long)(optlen)) #define __sanitizer_syscall_pre_getsockopt(fd, level, optname, optval, optlen) \ __sanitizer_syscall_pre_impl_getsockopt((long)(fd), (long)(level), \ (long)(optname), (long)(optval), \ (long)(optlen)) #define __sanitizer_syscall_post_getsockopt(res, fd, level, optname, optval, \ optlen) \ __sanitizer_syscall_post_impl_getsockopt(res, (long)(fd), (long)(level), \ (long)(optname), (long)(optval), \ (long)(optlen)) #define __sanitizer_syscall_pre_bind(arg0, arg1, arg2) \ __sanitizer_syscall_pre_impl_bind((long)(arg0), (long)(arg1), (long)(arg2)) #define __sanitizer_syscall_post_bind(res, arg0, arg1, arg2) \ __sanitizer_syscall_post_impl_bind(res, (long)(arg0), (long)(arg1), \ (long)(arg2)) #define __sanitizer_syscall_pre_connect(arg0, arg1, arg2) \ __sanitizer_syscall_pre_impl_connect((long)(arg0), (long)(arg1), (long)(arg2)) #define __sanitizer_syscall_post_connect(res, arg0, arg1, arg2) \ __sanitizer_syscall_post_impl_connect(res, (long)(arg0), (long)(arg1), \ (long)(arg2)) #define __sanitizer_syscall_pre_accept(arg0, arg1, arg2) \ __sanitizer_syscall_pre_impl_accept((long)(arg0), (long)(arg1), (long)(arg2)) #define __sanitizer_syscall_post_accept(res, arg0, arg1, arg2) \ __sanitizer_syscall_post_impl_accept(res, (long)(arg0), (long)(arg1), \ (long)(arg2)) #define __sanitizer_syscall_pre_accept4(arg0, arg1, arg2, arg3) \ __sanitizer_syscall_pre_impl_accept4((long)(arg0), (long)(arg1), \ (long)(arg2), (long)(arg3)) #define __sanitizer_syscall_post_accept4(res, arg0, arg1, arg2, arg3) \ __sanitizer_syscall_post_impl_accept4(res, (long)(arg0), (long)(arg1), \ (long)(arg2), (long)(arg3)) #define __sanitizer_syscall_pre_getsockname(arg0, arg1, arg2) \ __sanitizer_syscall_pre_impl_getsockname((long)(arg0), (long)(arg1), \ (long)(arg2)) #define __sanitizer_syscall_post_getsockname(res, arg0, arg1, arg2) \ __sanitizer_syscall_post_impl_getsockname(res, (long)(arg0), (long)(arg1), \ (long)(arg2)) #define __sanitizer_syscall_pre_getpeername(arg0, arg1, arg2) \ __sanitizer_syscall_pre_impl_getpeername((long)(arg0), (long)(arg1), \ (long)(arg2)) #define __sanitizer_syscall_post_getpeername(res, arg0, arg1, arg2) \ __sanitizer_syscall_post_impl_getpeername(res, (long)(arg0), (long)(arg1), \ (long)(arg2)) #define __sanitizer_syscall_pre_send(arg0, arg1, arg2, arg3) \ __sanitizer_syscall_pre_impl_send((long)(arg0), (long)(arg1), (long)(arg2), \ (long)(arg3)) #define __sanitizer_syscall_post_send(res, arg0, arg1, arg2, arg3) \ __sanitizer_syscall_post_impl_send(res, (long)(arg0), (long)(arg1), \ (long)(arg2), (long)(arg3)) #define __sanitizer_syscall_pre_sendto(arg0, arg1, arg2, arg3, arg4, arg5) \ __sanitizer_syscall_pre_impl_sendto((long)(arg0), (long)(arg1), \ (long)(arg2), (long)(arg3), \ (long)(arg4), (long)(arg5)) #define __sanitizer_syscall_post_sendto(res, arg0, arg1, arg2, arg3, arg4, \ arg5) \ __sanitizer_syscall_post_impl_sendto(res, (long)(arg0), (long)(arg1), \ (long)(arg2), (long)(arg3), \ (long)(arg4), (long)(arg5)) #define __sanitizer_syscall_pre_sendmsg(fd, msg, flags) \ __sanitizer_syscall_pre_impl_sendmsg((long)(fd), (long)(msg), (long)(flags)) #define __sanitizer_syscall_post_sendmsg(res, fd, msg, flags) \ __sanitizer_syscall_post_impl_sendmsg(res, (long)(fd), (long)(msg), \ (long)(flags)) #define __sanitizer_syscall_pre_sendmmsg(fd, msg, vlen, flags) \ __sanitizer_syscall_pre_impl_sendmmsg((long)(fd), (long)(msg), (long)(vlen), \ (long)(flags)) #define __sanitizer_syscall_post_sendmmsg(res, fd, msg, vlen, flags) \ __sanitizer_syscall_post_impl_sendmmsg(res, (long)(fd), (long)(msg), \ (long)(vlen), (long)(flags)) #define __sanitizer_syscall_pre_recv(arg0, arg1, arg2, arg3) \ __sanitizer_syscall_pre_impl_recv((long)(arg0), (long)(arg1), (long)(arg2), \ (long)(arg3)) #define __sanitizer_syscall_post_recv(res, arg0, arg1, arg2, arg3) \ __sanitizer_syscall_post_impl_recv(res, (long)(arg0), (long)(arg1), \ (long)(arg2), (long)(arg3)) #define __sanitizer_syscall_pre_recvfrom(arg0, arg1, arg2, arg3, arg4, arg5) \ __sanitizer_syscall_pre_impl_recvfrom((long)(arg0), (long)(arg1), \ (long)(arg2), (long)(arg3), \ (long)(arg4), (long)(arg5)) #define __sanitizer_syscall_post_recvfrom(res, arg0, arg1, arg2, arg3, arg4, \ arg5) \ __sanitizer_syscall_post_impl_recvfrom(res, (long)(arg0), (long)(arg1), \ (long)(arg2), (long)(arg3), \ (long)(arg4), (long)(arg5)) #define __sanitizer_syscall_pre_recvmsg(fd, msg, flags) \ __sanitizer_syscall_pre_impl_recvmsg((long)(fd), (long)(msg), (long)(flags)) #define __sanitizer_syscall_post_recvmsg(res, fd, msg, flags) \ __sanitizer_syscall_post_impl_recvmsg(res, (long)(fd), (long)(msg), \ (long)(flags)) #define __sanitizer_syscall_pre_recvmmsg(fd, msg, vlen, flags, timeout) \ __sanitizer_syscall_pre_impl_recvmmsg((long)(fd), (long)(msg), (long)(vlen), \ (long)(flags), (long)(timeout)) #define __sanitizer_syscall_post_recvmmsg(res, fd, msg, vlen, flags, timeout) \ __sanitizer_syscall_post_impl_recvmmsg(res, (long)(fd), (long)(msg), \ (long)(vlen), (long)(flags), \ (long)(timeout)) #define __sanitizer_syscall_pre_socket(arg0, arg1, arg2) \ __sanitizer_syscall_pre_impl_socket((long)(arg0), (long)(arg1), (long)(arg2)) #define __sanitizer_syscall_post_socket(res, arg0, arg1, arg2) \ __sanitizer_syscall_post_impl_socket(res, (long)(arg0), (long)(arg1), \ (long)(arg2)) #define __sanitizer_syscall_pre_socketpair(arg0, arg1, arg2, arg3) \ __sanitizer_syscall_pre_impl_socketpair((long)(arg0), (long)(arg1), \ (long)(arg2), (long)(arg3)) #define __sanitizer_syscall_post_socketpair(res, arg0, arg1, arg2, arg3) \ __sanitizer_syscall_post_impl_socketpair(res, (long)(arg0), (long)(arg1), \ (long)(arg2), (long)(arg3)) #define __sanitizer_syscall_pre_socketcall(call, args) \ __sanitizer_syscall_pre_impl_socketcall((long)(call), (long)(args)) #define __sanitizer_syscall_post_socketcall(res, call, args) \ __sanitizer_syscall_post_impl_socketcall(res, (long)(call), (long)(args)) #define __sanitizer_syscall_pre_listen(arg0, arg1) \ __sanitizer_syscall_pre_impl_listen((long)(arg0), (long)(arg1)) #define __sanitizer_syscall_post_listen(res, arg0, arg1) \ __sanitizer_syscall_post_impl_listen(res, (long)(arg0), (long)(arg1)) #define __sanitizer_syscall_pre_poll(ufds, nfds, timeout) \ __sanitizer_syscall_pre_impl_poll((long)(ufds), (long)(nfds), (long)(timeout)) #define __sanitizer_syscall_post_poll(res, ufds, nfds, timeout) \ __sanitizer_syscall_post_impl_poll(res, (long)(ufds), (long)(nfds), \ (long)(timeout)) #define __sanitizer_syscall_pre_select(n, inp, outp, exp, tvp) \ __sanitizer_syscall_pre_impl_select((long)(n), (long)(inp), (long)(outp), \ (long)(exp), (long)(tvp)) #define __sanitizer_syscall_post_select(res, n, inp, outp, exp, tvp) \ __sanitizer_syscall_post_impl_select(res, (long)(n), (long)(inp), \ (long)(outp), (long)(exp), (long)(tvp)) #define __sanitizer_syscall_pre_old_select(arg) \ __sanitizer_syscall_pre_impl_old_select((long)(arg)) #define __sanitizer_syscall_post_old_select(res, arg) \ __sanitizer_syscall_post_impl_old_select(res, (long)(arg)) #define __sanitizer_syscall_pre_epoll_create(size) \ __sanitizer_syscall_pre_impl_epoll_create((long)(size)) #define __sanitizer_syscall_post_epoll_create(res, size) \ __sanitizer_syscall_post_impl_epoll_create(res, (long)(size)) #define __sanitizer_syscall_pre_epoll_create1(flags) \ __sanitizer_syscall_pre_impl_epoll_create1((long)(flags)) #define __sanitizer_syscall_post_epoll_create1(res, flags) \ __sanitizer_syscall_post_impl_epoll_create1(res, (long)(flags)) #define __sanitizer_syscall_pre_epoll_ctl(epfd, op, fd, event) \ __sanitizer_syscall_pre_impl_epoll_ctl((long)(epfd), (long)(op), (long)(fd), \ (long)(event)) #define __sanitizer_syscall_post_epoll_ctl(res, epfd, op, fd, event) \ __sanitizer_syscall_post_impl_epoll_ctl(res, (long)(epfd), (long)(op), \ (long)(fd), (long)(event)) #define __sanitizer_syscall_pre_epoll_wait(epfd, events, maxevents, timeout) \ __sanitizer_syscall_pre_impl_epoll_wait((long)(epfd), (long)(events), \ (long)(maxevents), (long)(timeout)) #define __sanitizer_syscall_post_epoll_wait(res, epfd, events, maxevents, \ timeout) \ __sanitizer_syscall_post_impl_epoll_wait(res, (long)(epfd), (long)(events), \ (long)(maxevents), (long)(timeout)) #define __sanitizer_syscall_pre_epoll_pwait(epfd, events, maxevents, timeout, \ sigmask, sigsetsize) \ __sanitizer_syscall_pre_impl_epoll_pwait( \ (long)(epfd), (long)(events), (long)(maxevents), (long)(timeout), \ (long)(sigmask), (long)(sigsetsize)) #define __sanitizer_syscall_post_epoll_pwait(res, epfd, events, maxevents, \ timeout, sigmask, sigsetsize) \ __sanitizer_syscall_post_impl_epoll_pwait( \ res, (long)(epfd), (long)(events), (long)(maxevents), (long)(timeout), \ (long)(sigmask), (long)(sigsetsize)) #define __sanitizer_syscall_pre_epoll_pwait2(epfd, events, maxevents, timeout, \ sigmask, sigsetsize) \ __sanitizer_syscall_pre_impl_epoll_pwait2( \ (long)(epfd), (long)(events), (long)(maxevents), (long)(timeout), \ (long)(sigmask), (long)(sigsetsize)) #define __sanitizer_syscall_post_epoll_pwait2(res, epfd, events, maxevents, \ timeout, sigmask, sigsetsize) \ __sanitizer_syscall_post_impl_epoll_pwait2( \ res, (long)(epfd), (long)(events), (long)(maxevents), (long)(timeout), \ (long)(sigmask), (long)(sigsetsize)) #define __sanitizer_syscall_pre_gethostname(name, len) \ __sanitizer_syscall_pre_impl_gethostname((long)(name), (long)(len)) #define __sanitizer_syscall_post_gethostname(res, name, len) \ __sanitizer_syscall_post_impl_gethostname(res, (long)(name), (long)(len)) #define __sanitizer_syscall_pre_sethostname(name, len) \ __sanitizer_syscall_pre_impl_sethostname((long)(name), (long)(len)) #define __sanitizer_syscall_post_sethostname(res, name, len) \ __sanitizer_syscall_post_impl_sethostname(res, (long)(name), (long)(len)) #define __sanitizer_syscall_pre_setdomainname(name, len) \ __sanitizer_syscall_pre_impl_setdomainname((long)(name), (long)(len)) #define __sanitizer_syscall_post_setdomainname(res, name, len) \ __sanitizer_syscall_post_impl_setdomainname(res, (long)(name), (long)(len)) #define __sanitizer_syscall_pre_newuname(name) \ __sanitizer_syscall_pre_impl_newuname((long)(name)) #define __sanitizer_syscall_post_newuname(res, name) \ __sanitizer_syscall_post_impl_newuname(res, (long)(name)) #define __sanitizer_syscall_pre_uname(arg0) \ __sanitizer_syscall_pre_impl_uname((long)(arg0)) #define __sanitizer_syscall_post_uname(res, arg0) \ __sanitizer_syscall_post_impl_uname(res, (long)(arg0)) #define __sanitizer_syscall_pre_olduname(arg0) \ __sanitizer_syscall_pre_impl_olduname((long)(arg0)) #define __sanitizer_syscall_post_olduname(res, arg0) \ __sanitizer_syscall_post_impl_olduname(res, (long)(arg0)) #define __sanitizer_syscall_pre_getrlimit(resource, rlim) \ __sanitizer_syscall_pre_impl_getrlimit((long)(resource), (long)(rlim)) #define __sanitizer_syscall_post_getrlimit(res, resource, rlim) \ __sanitizer_syscall_post_impl_getrlimit(res, (long)(resource), (long)(rlim)) #define __sanitizer_syscall_pre_old_getrlimit(resource, rlim) \ __sanitizer_syscall_pre_impl_old_getrlimit((long)(resource), (long)(rlim)) #define __sanitizer_syscall_post_old_getrlimit(res, resource, rlim) \ __sanitizer_syscall_post_impl_old_getrlimit(res, (long)(resource), \ (long)(rlim)) #define __sanitizer_syscall_pre_setrlimit(resource, rlim) \ __sanitizer_syscall_pre_impl_setrlimit((long)(resource), (long)(rlim)) #define __sanitizer_syscall_post_setrlimit(res, resource, rlim) \ __sanitizer_syscall_post_impl_setrlimit(res, (long)(resource), (long)(rlim)) #define __sanitizer_syscall_pre_prlimit64(pid, resource, new_rlim, old_rlim) \ __sanitizer_syscall_pre_impl_prlimit64((long)(pid), (long)(resource), \ (long)(new_rlim), (long)(old_rlim)) #define __sanitizer_syscall_post_prlimit64(res, pid, resource, new_rlim, \ old_rlim) \ __sanitizer_syscall_post_impl_prlimit64(res, (long)(pid), (long)(resource), \ (long)(new_rlim), (long)(old_rlim)) #define __sanitizer_syscall_pre_getrusage(who, ru) \ __sanitizer_syscall_pre_impl_getrusage((long)(who), (long)(ru)) #define __sanitizer_syscall_post_getrusage(res, who, ru) \ __sanitizer_syscall_post_impl_getrusage(res, (long)(who), (long)(ru)) #define __sanitizer_syscall_pre_umask(mask) \ __sanitizer_syscall_pre_impl_umask((long)(mask)) #define __sanitizer_syscall_post_umask(res, mask) \ __sanitizer_syscall_post_impl_umask(res, (long)(mask)) #define __sanitizer_syscall_pre_msgget(key, msgflg) \ __sanitizer_syscall_pre_impl_msgget((long)(key), (long)(msgflg)) #define __sanitizer_syscall_post_msgget(res, key, msgflg) \ __sanitizer_syscall_post_impl_msgget(res, (long)(key), (long)(msgflg)) #define __sanitizer_syscall_pre_msgsnd(msqid, msgp, msgsz, msgflg) \ __sanitizer_syscall_pre_impl_msgsnd((long)(msqid), (long)(msgp), \ (long)(msgsz), (long)(msgflg)) #define __sanitizer_syscall_post_msgsnd(res, msqid, msgp, msgsz, msgflg) \ __sanitizer_syscall_post_impl_msgsnd(res, (long)(msqid), (long)(msgp), \ (long)(msgsz), (long)(msgflg)) #define __sanitizer_syscall_pre_msgrcv(msqid, msgp, msgsz, msgtyp, msgflg) \ __sanitizer_syscall_pre_impl_msgrcv((long)(msqid), (long)(msgp), \ (long)(msgsz), (long)(msgtyp), \ (long)(msgflg)) #define __sanitizer_syscall_post_msgrcv(res, msqid, msgp, msgsz, msgtyp, \ msgflg) \ __sanitizer_syscall_post_impl_msgrcv(res, (long)(msqid), (long)(msgp), \ (long)(msgsz), (long)(msgtyp), \ (long)(msgflg)) #define __sanitizer_syscall_pre_msgctl(msqid, cmd, buf) \ __sanitizer_syscall_pre_impl_msgctl((long)(msqid), (long)(cmd), (long)(buf)) #define __sanitizer_syscall_post_msgctl(res, msqid, cmd, buf) \ __sanitizer_syscall_post_impl_msgctl(res, (long)(msqid), (long)(cmd), \ (long)(buf)) #define __sanitizer_syscall_pre_semget(key, nsems, semflg) \ __sanitizer_syscall_pre_impl_semget((long)(key), (long)(nsems), \ (long)(semflg)) #define __sanitizer_syscall_post_semget(res, key, nsems, semflg) \ __sanitizer_syscall_post_impl_semget(res, (long)(key), (long)(nsems), \ (long)(semflg)) #define __sanitizer_syscall_pre_semop(semid, sops, nsops) \ __sanitizer_syscall_pre_impl_semop((long)(semid), (long)(sops), (long)(nsops)) #define __sanitizer_syscall_post_semop(res, semid, sops, nsops) \ __sanitizer_syscall_post_impl_semop(res, (long)(semid), (long)(sops), \ (long)(nsops)) #define __sanitizer_syscall_pre_semctl(semid, semnum, cmd, arg) \ __sanitizer_syscall_pre_impl_semctl((long)(semid), (long)(semnum), \ (long)(cmd), (long)(arg)) #define __sanitizer_syscall_post_semctl(res, semid, semnum, cmd, arg) \ __sanitizer_syscall_post_impl_semctl(res, (long)(semid), (long)(semnum), \ (long)(cmd), (long)(arg)) #define __sanitizer_syscall_pre_semtimedop(semid, sops, nsops, timeout) \ __sanitizer_syscall_pre_impl_semtimedop((long)(semid), (long)(sops), \ (long)(nsops), (long)(timeout)) #define __sanitizer_syscall_post_semtimedop(res, semid, sops, nsops, timeout) \ __sanitizer_syscall_post_impl_semtimedop(res, (long)(semid), (long)(sops), \ (long)(nsops), (long)(timeout)) #define __sanitizer_syscall_pre_shmat(shmid, shmaddr, shmflg) \ __sanitizer_syscall_pre_impl_shmat((long)(shmid), (long)(shmaddr), \ (long)(shmflg)) #define __sanitizer_syscall_post_shmat(res, shmid, shmaddr, shmflg) \ __sanitizer_syscall_post_impl_shmat(res, (long)(shmid), (long)(shmaddr), \ (long)(shmflg)) #define __sanitizer_syscall_pre_shmget(key, size, flag) \ __sanitizer_syscall_pre_impl_shmget((long)(key), (long)(size), (long)(flag)) #define __sanitizer_syscall_post_shmget(res, key, size, flag) \ __sanitizer_syscall_post_impl_shmget(res, (long)(key), (long)(size), \ (long)(flag)) #define __sanitizer_syscall_pre_shmdt(shmaddr) \ __sanitizer_syscall_pre_impl_shmdt((long)(shmaddr)) #define __sanitizer_syscall_post_shmdt(res, shmaddr) \ __sanitizer_syscall_post_impl_shmdt(res, (long)(shmaddr)) #define __sanitizer_syscall_pre_shmctl(shmid, cmd, buf) \ __sanitizer_syscall_pre_impl_shmctl((long)(shmid), (long)(cmd), (long)(buf)) #define __sanitizer_syscall_post_shmctl(res, shmid, cmd, buf) \ __sanitizer_syscall_post_impl_shmctl(res, (long)(shmid), (long)(cmd), \ (long)(buf)) #define __sanitizer_syscall_pre_ipc(call, first, second, third, ptr, fifth) \ __sanitizer_syscall_pre_impl_ipc((long)(call), (long)(first), \ (long)(second), (long)(third), (long)(ptr), \ (long)(fifth)) #define __sanitizer_syscall_post_ipc(res, call, first, second, third, ptr, \ fifth) \ __sanitizer_syscall_post_impl_ipc(res, (long)(call), (long)(first), \ (long)(second), (long)(third), \ (long)(ptr), (long)(fifth)) #define __sanitizer_syscall_pre_mq_open(name, oflag, mode, attr) \ __sanitizer_syscall_pre_impl_mq_open((long)(name), (long)(oflag), \ (long)(mode), (long)(attr)) #define __sanitizer_syscall_post_mq_open(res, name, oflag, mode, attr) \ __sanitizer_syscall_post_impl_mq_open(res, (long)(name), (long)(oflag), \ (long)(mode), (long)(attr)) #define __sanitizer_syscall_pre_mq_unlink(name) \ __sanitizer_syscall_pre_impl_mq_unlink((long)(name)) #define __sanitizer_syscall_post_mq_unlink(res, name) \ __sanitizer_syscall_post_impl_mq_unlink(res, (long)(name)) #define __sanitizer_syscall_pre_mq_timedsend(mqdes, msg_ptr, msg_len, \ msg_prio, abs_timeout) \ __sanitizer_syscall_pre_impl_mq_timedsend((long)(mqdes), (long)(msg_ptr), \ (long)(msg_len), (long)(msg_prio), \ (long)(abs_timeout)) #define __sanitizer_syscall_post_mq_timedsend(res, mqdes, msg_ptr, msg_len, \ msg_prio, abs_timeout) \ __sanitizer_syscall_post_impl_mq_timedsend( \ res, (long)(mqdes), (long)(msg_ptr), (long)(msg_len), (long)(msg_prio), \ (long)(abs_timeout)) #define __sanitizer_syscall_pre_mq_timedreceive(mqdes, msg_ptr, msg_len, \ msg_prio, abs_timeout) \ __sanitizer_syscall_pre_impl_mq_timedreceive( \ (long)(mqdes), (long)(msg_ptr), (long)(msg_len), (long)(msg_prio), \ (long)(abs_timeout)) #define __sanitizer_syscall_post_mq_timedreceive(res, mqdes, msg_ptr, msg_len, \ msg_prio, abs_timeout) \ __sanitizer_syscall_post_impl_mq_timedreceive( \ res, (long)(mqdes), (long)(msg_ptr), (long)(msg_len), (long)(msg_prio), \ (long)(abs_timeout)) #define __sanitizer_syscall_pre_mq_notify(mqdes, notification) \ __sanitizer_syscall_pre_impl_mq_notify((long)(mqdes), (long)(notification)) #define __sanitizer_syscall_post_mq_notify(res, mqdes, notification) \ __sanitizer_syscall_post_impl_mq_notify(res, (long)(mqdes), \ (long)(notification)) #define __sanitizer_syscall_pre_mq_getsetattr(mqdes, mqstat, omqstat) \ __sanitizer_syscall_pre_impl_mq_getsetattr((long)(mqdes), (long)(mqstat), \ (long)(omqstat)) #define __sanitizer_syscall_post_mq_getsetattr(res, mqdes, mqstat, omqstat) \ __sanitizer_syscall_post_impl_mq_getsetattr(res, (long)(mqdes), \ (long)(mqstat), (long)(omqstat)) #define __sanitizer_syscall_pre_pciconfig_iobase(which, bus, devfn) \ __sanitizer_syscall_pre_impl_pciconfig_iobase((long)(which), (long)(bus), \ (long)(devfn)) #define __sanitizer_syscall_post_pciconfig_iobase(res, which, bus, devfn) \ __sanitizer_syscall_post_impl_pciconfig_iobase(res, (long)(which), \ (long)(bus), (long)(devfn)) #define __sanitizer_syscall_pre_pciconfig_read(bus, dfn, off, len, buf) \ __sanitizer_syscall_pre_impl_pciconfig_read( \ (long)(bus), (long)(dfn), (long)(off), (long)(len), (long)(buf)) #define __sanitizer_syscall_post_pciconfig_read(res, bus, dfn, off, len, buf) \ __sanitizer_syscall_post_impl_pciconfig_read( \ res, (long)(bus), (long)(dfn), (long)(off), (long)(len), (long)(buf)) #define __sanitizer_syscall_pre_pciconfig_write(bus, dfn, off, len, buf) \ __sanitizer_syscall_pre_impl_pciconfig_write( \ (long)(bus), (long)(dfn), (long)(off), (long)(len), (long)(buf)) #define __sanitizer_syscall_post_pciconfig_write(res, bus, dfn, off, len, buf) \ __sanitizer_syscall_post_impl_pciconfig_write( \ res, (long)(bus), (long)(dfn), (long)(off), (long)(len), (long)(buf)) #define __sanitizer_syscall_pre_swapon(specialfile, swap_flags) \ __sanitizer_syscall_pre_impl_swapon((long)(specialfile), (long)(swap_flags)) #define __sanitizer_syscall_post_swapon(res, specialfile, swap_flags) \ __sanitizer_syscall_post_impl_swapon(res, (long)(specialfile), \ (long)(swap_flags)) #define __sanitizer_syscall_pre_swapoff(specialfile) \ __sanitizer_syscall_pre_impl_swapoff((long)(specialfile)) #define __sanitizer_syscall_post_swapoff(res, specialfile) \ __sanitizer_syscall_post_impl_swapoff(res, (long)(specialfile)) #define __sanitizer_syscall_pre_sysctl(args) \ __sanitizer_syscall_pre_impl_sysctl((long)(args)) #define __sanitizer_syscall_post_sysctl(res, args) \ __sanitizer_syscall_post_impl_sysctl(res, (long)(args)) #define __sanitizer_syscall_pre_sysinfo(info) \ __sanitizer_syscall_pre_impl_sysinfo((long)(info)) #define __sanitizer_syscall_post_sysinfo(res, info) \ __sanitizer_syscall_post_impl_sysinfo(res, (long)(info)) #define __sanitizer_syscall_pre_sysfs(option, arg1, arg2) \ __sanitizer_syscall_pre_impl_sysfs((long)(option), (long)(arg1), (long)(arg2)) #define __sanitizer_syscall_post_sysfs(res, option, arg1, arg2) \ __sanitizer_syscall_post_impl_sysfs(res, (long)(option), (long)(arg1), \ (long)(arg2)) #define __sanitizer_syscall_pre_syslog(type, buf, len) \ __sanitizer_syscall_pre_impl_syslog((long)(type), (long)(buf), (long)(len)) #define __sanitizer_syscall_post_syslog(res, type, buf, len) \ __sanitizer_syscall_post_impl_syslog(res, (long)(type), (long)(buf), \ (long)(len)) #define __sanitizer_syscall_pre_uselib(library) \ __sanitizer_syscall_pre_impl_uselib((long)(library)) #define __sanitizer_syscall_post_uselib(res, library) \ __sanitizer_syscall_post_impl_uselib(res, (long)(library)) #define __sanitizer_syscall_pre_ni_syscall() \ __sanitizer_syscall_pre_impl_ni_syscall() #define __sanitizer_syscall_post_ni_syscall(res) \ __sanitizer_syscall_post_impl_ni_syscall(res) #define __sanitizer_syscall_pre_ptrace(request, pid, addr, data) \ __sanitizer_syscall_pre_impl_ptrace((long)(request), (long)(pid), \ (long)(addr), (long)(data)) #define __sanitizer_syscall_post_ptrace(res, request, pid, addr, data) \ __sanitizer_syscall_post_impl_ptrace(res, (long)(request), (long)(pid), \ (long)(addr), (long)(data)) #define __sanitizer_syscall_pre_add_key(_type, _description, _payload, plen, \ destringid) \ __sanitizer_syscall_pre_impl_add_key((long)(_type), (long)(_description), \ (long)(_payload), (long)(plen), \ (long)(destringid)) #define __sanitizer_syscall_post_add_key(res, _type, _description, _payload, \ plen, destringid) \ __sanitizer_syscall_post_impl_add_key( \ res, (long)(_type), (long)(_description), (long)(_payload), \ (long)(plen), (long)(destringid)) #define __sanitizer_syscall_pre_request_key(_type, _description, \ _callout_info, destringid) \ __sanitizer_syscall_pre_impl_request_key( \ (long)(_type), (long)(_description), (long)(_callout_info), \ (long)(destringid)) #define __sanitizer_syscall_post_request_key(res, _type, _description, \ _callout_info, destringid) \ __sanitizer_syscall_post_impl_request_key( \ res, (long)(_type), (long)(_description), (long)(_callout_info), \ (long)(destringid)) #define __sanitizer_syscall_pre_keyctl(cmd, arg2, arg3, arg4, arg5) \ __sanitizer_syscall_pre_impl_keyctl((long)(cmd), (long)(arg2), (long)(arg3), \ (long)(arg4), (long)(arg5)) #define __sanitizer_syscall_post_keyctl(res, cmd, arg2, arg3, arg4, arg5) \ __sanitizer_syscall_post_impl_keyctl(res, (long)(cmd), (long)(arg2), \ (long)(arg3), (long)(arg4), \ (long)(arg5)) #define __sanitizer_syscall_pre_ioprio_set(which, who, ioprio) \ __sanitizer_syscall_pre_impl_ioprio_set((long)(which), (long)(who), \ (long)(ioprio)) #define __sanitizer_syscall_post_ioprio_set(res, which, who, ioprio) \ __sanitizer_syscall_post_impl_ioprio_set(res, (long)(which), (long)(who), \ (long)(ioprio)) #define __sanitizer_syscall_pre_ioprio_get(which, who) \ __sanitizer_syscall_pre_impl_ioprio_get((long)(which), (long)(who)) #define __sanitizer_syscall_post_ioprio_get(res, which, who) \ __sanitizer_syscall_post_impl_ioprio_get(res, (long)(which), (long)(who)) #define __sanitizer_syscall_pre_set_mempolicy(mode, nmask, maxnode) \ __sanitizer_syscall_pre_impl_set_mempolicy((long)(mode), (long)(nmask), \ (long)(maxnode)) #define __sanitizer_syscall_post_set_mempolicy(res, mode, nmask, maxnode) \ __sanitizer_syscall_post_impl_set_mempolicy(res, (long)(mode), \ (long)(nmask), (long)(maxnode)) #define __sanitizer_syscall_pre_migrate_pages(pid, maxnode, from, to) \ __sanitizer_syscall_pre_impl_migrate_pages((long)(pid), (long)(maxnode), \ (long)(from), (long)(to)) #define __sanitizer_syscall_post_migrate_pages(res, pid, maxnode, from, to) \ __sanitizer_syscall_post_impl_migrate_pages( \ res, (long)(pid), (long)(maxnode), (long)(from), (long)(to)) #define __sanitizer_syscall_pre_move_pages(pid, nr_pages, pages, nodes, \ status, flags) \ __sanitizer_syscall_pre_impl_move_pages((long)(pid), (long)(nr_pages), \ (long)(pages), (long)(nodes), \ (long)(status), (long)(flags)) #define __sanitizer_syscall_post_move_pages(res, pid, nr_pages, pages, nodes, \ status, flags) \ __sanitizer_syscall_post_impl_move_pages(res, (long)(pid), (long)(nr_pages), \ (long)(pages), (long)(nodes), \ (long)(status), (long)(flags)) #define __sanitizer_syscall_pre_mbind(start, len, mode, nmask, maxnode, flags) \ __sanitizer_syscall_pre_impl_mbind((long)(start), (long)(len), (long)(mode), \ (long)(nmask), (long)(maxnode), \ (long)(flags)) #define __sanitizer_syscall_post_mbind(res, start, len, mode, nmask, maxnode, \ flags) \ __sanitizer_syscall_post_impl_mbind(res, (long)(start), (long)(len), \ (long)(mode), (long)(nmask), \ (long)(maxnode), (long)(flags)) #define __sanitizer_syscall_pre_get_mempolicy(policy, nmask, maxnode, addr, \ flags) \ __sanitizer_syscall_pre_impl_get_mempolicy((long)(policy), (long)(nmask), \ (long)(maxnode), (long)(addr), \ (long)(flags)) #define __sanitizer_syscall_post_get_mempolicy(res, policy, nmask, maxnode, \ addr, flags) \ __sanitizer_syscall_post_impl_get_mempolicy(res, (long)(policy), \ (long)(nmask), (long)(maxnode), \ (long)(addr), (long)(flags)) #define __sanitizer_syscall_pre_inotify_init() \ __sanitizer_syscall_pre_impl_inotify_init() #define __sanitizer_syscall_post_inotify_init(res) \ __sanitizer_syscall_post_impl_inotify_init(res) #define __sanitizer_syscall_pre_inotify_init1(flags) \ __sanitizer_syscall_pre_impl_inotify_init1((long)(flags)) #define __sanitizer_syscall_post_inotify_init1(res, flags) \ __sanitizer_syscall_post_impl_inotify_init1(res, (long)(flags)) #define __sanitizer_syscall_pre_inotify_add_watch(fd, path, mask) \ __sanitizer_syscall_pre_impl_inotify_add_watch((long)(fd), (long)(path), \ (long)(mask)) #define __sanitizer_syscall_post_inotify_add_watch(res, fd, path, mask) \ __sanitizer_syscall_post_impl_inotify_add_watch(res, (long)(fd), \ (long)(path), (long)(mask)) #define __sanitizer_syscall_pre_inotify_rm_watch(fd, wd) \ __sanitizer_syscall_pre_impl_inotify_rm_watch((long)(fd), (long)(wd)) #define __sanitizer_syscall_post_inotify_rm_watch(res, fd, wd) \ __sanitizer_syscall_post_impl_inotify_rm_watch(res, (long)(fd), (long)(wd)) #define __sanitizer_syscall_pre_spu_run(fd, unpc, ustatus) \ __sanitizer_syscall_pre_impl_spu_run((long)(fd), (long)(unpc), \ (long)(ustatus)) #define __sanitizer_syscall_post_spu_run(res, fd, unpc, ustatus) \ __sanitizer_syscall_post_impl_spu_run(res, (long)(fd), (long)(unpc), \ (long)(ustatus)) #define __sanitizer_syscall_pre_spu_create(name, flags, mode, fd) \ __sanitizer_syscall_pre_impl_spu_create((long)(name), (long)(flags), \ (long)(mode), (long)(fd)) #define __sanitizer_syscall_post_spu_create(res, name, flags, mode, fd) \ __sanitizer_syscall_post_impl_spu_create(res, (long)(name), (long)(flags), \ (long)(mode), (long)(fd)) #define __sanitizer_syscall_pre_mknodat(dfd, filename, mode, dev) \ __sanitizer_syscall_pre_impl_mknodat((long)(dfd), (long)(filename), \ (long)(mode), (long)(dev)) #define __sanitizer_syscall_post_mknodat(res, dfd, filename, mode, dev) \ __sanitizer_syscall_post_impl_mknodat(res, (long)(dfd), (long)(filename), \ (long)(mode), (long)(dev)) #define __sanitizer_syscall_pre_mkdirat(dfd, pathname, mode) \ __sanitizer_syscall_pre_impl_mkdirat((long)(dfd), (long)(pathname), \ (long)(mode)) #define __sanitizer_syscall_post_mkdirat(res, dfd, pathname, mode) \ __sanitizer_syscall_post_impl_mkdirat(res, (long)(dfd), (long)(pathname), \ (long)(mode)) #define __sanitizer_syscall_pre_unlinkat(dfd, pathname, flag) \ __sanitizer_syscall_pre_impl_unlinkat((long)(dfd), (long)(pathname), \ (long)(flag)) #define __sanitizer_syscall_post_unlinkat(res, dfd, pathname, flag) \ __sanitizer_syscall_post_impl_unlinkat(res, (long)(dfd), (long)(pathname), \ (long)(flag)) #define __sanitizer_syscall_pre_symlinkat(oldname, newdfd, newname) \ __sanitizer_syscall_pre_impl_symlinkat((long)(oldname), (long)(newdfd), \ (long)(newname)) #define __sanitizer_syscall_post_symlinkat(res, oldname, newdfd, newname) \ __sanitizer_syscall_post_impl_symlinkat(res, (long)(oldname), \ (long)(newdfd), (long)(newname)) #define __sanitizer_syscall_pre_linkat(olddfd, oldname, newdfd, newname, \ flags) \ __sanitizer_syscall_pre_impl_linkat((long)(olddfd), (long)(oldname), \ (long)(newdfd), (long)(newname), \ (long)(flags)) #define __sanitizer_syscall_post_linkat(res, olddfd, oldname, newdfd, newname, \ flags) \ __sanitizer_syscall_post_impl_linkat(res, (long)(olddfd), (long)(oldname), \ (long)(newdfd), (long)(newname), \ (long)(flags)) #define __sanitizer_syscall_pre_renameat(olddfd, oldname, newdfd, newname) \ __sanitizer_syscall_pre_impl_renameat((long)(olddfd), (long)(oldname), \ (long)(newdfd), (long)(newname)) #define __sanitizer_syscall_post_renameat(res, olddfd, oldname, newdfd, \ newname) \ __sanitizer_syscall_post_impl_renameat(res, (long)(olddfd), (long)(oldname), \ (long)(newdfd), (long)(newname)) #define __sanitizer_syscall_pre_futimesat(dfd, filename, utimes) \ __sanitizer_syscall_pre_impl_futimesat((long)(dfd), (long)(filename), \ (long)(utimes)) #define __sanitizer_syscall_post_futimesat(res, dfd, filename, utimes) \ __sanitizer_syscall_post_impl_futimesat(res, (long)(dfd), (long)(filename), \ (long)(utimes)) #define __sanitizer_syscall_pre_faccessat(dfd, filename, mode) \ __sanitizer_syscall_pre_impl_faccessat((long)(dfd), (long)(filename), \ (long)(mode)) #define __sanitizer_syscall_post_faccessat(res, dfd, filename, mode) \ __sanitizer_syscall_post_impl_faccessat(res, (long)(dfd), (long)(filename), \ (long)(mode)) #define __sanitizer_syscall_pre_fchmodat(dfd, filename, mode) \ __sanitizer_syscall_pre_impl_fchmodat((long)(dfd), (long)(filename), \ (long)(mode)) #define __sanitizer_syscall_post_fchmodat(res, dfd, filename, mode) \ __sanitizer_syscall_post_impl_fchmodat(res, (long)(dfd), (long)(filename), \ (long)(mode)) #define __sanitizer_syscall_pre_fchownat(dfd, filename, user, group, flag) \ __sanitizer_syscall_pre_impl_fchownat((long)(dfd), (long)(filename), \ (long)(user), (long)(group), \ (long)(flag)) #define __sanitizer_syscall_post_fchownat(res, dfd, filename, user, group, \ flag) \ __sanitizer_syscall_post_impl_fchownat(res, (long)(dfd), (long)(filename), \ (long)(user), (long)(group), \ (long)(flag)) #define __sanitizer_syscall_pre_openat(dfd, filename, flags, mode) \ __sanitizer_syscall_pre_impl_openat((long)(dfd), (long)(filename), \ (long)(flags), (long)(mode)) #define __sanitizer_syscall_post_openat(res, dfd, filename, flags, mode) \ __sanitizer_syscall_post_impl_openat(res, (long)(dfd), (long)(filename), \ (long)(flags), (long)(mode)) #define __sanitizer_syscall_pre_newfstatat(dfd, filename, statbuf, flag) \ __sanitizer_syscall_pre_impl_newfstatat((long)(dfd), (long)(filename), \ (long)(statbuf), (long)(flag)) #define __sanitizer_syscall_post_newfstatat(res, dfd, filename, statbuf, flag) \ __sanitizer_syscall_post_impl_newfstatat(res, (long)(dfd), (long)(filename), \ (long)(statbuf), (long)(flag)) #define __sanitizer_syscall_pre_fstatat64(dfd, filename, statbuf, flag) \ __sanitizer_syscall_pre_impl_fstatat64((long)(dfd), (long)(filename), \ (long)(statbuf), (long)(flag)) #define __sanitizer_syscall_post_fstatat64(res, dfd, filename, statbuf, flag) \ __sanitizer_syscall_post_impl_fstatat64(res, (long)(dfd), (long)(filename), \ (long)(statbuf), (long)(flag)) #define __sanitizer_syscall_pre_readlinkat(dfd, path, buf, bufsiz) \ __sanitizer_syscall_pre_impl_readlinkat((long)(dfd), (long)(path), \ (long)(buf), (long)(bufsiz)) #define __sanitizer_syscall_post_readlinkat(res, dfd, path, buf, bufsiz) \ __sanitizer_syscall_post_impl_readlinkat(res, (long)(dfd), (long)(path), \ (long)(buf), (long)(bufsiz)) #define __sanitizer_syscall_pre_utimensat(dfd, filename, utimes, flags) \ __sanitizer_syscall_pre_impl_utimensat((long)(dfd), (long)(filename), \ (long)(utimes), (long)(flags)) #define __sanitizer_syscall_post_utimensat(res, dfd, filename, utimes, flags) \ __sanitizer_syscall_post_impl_utimensat(res, (long)(dfd), (long)(filename), \ (long)(utimes), (long)(flags)) #define __sanitizer_syscall_pre_unshare(unshare_flags) \ __sanitizer_syscall_pre_impl_unshare((long)(unshare_flags)) #define __sanitizer_syscall_post_unshare(res, unshare_flags) \ __sanitizer_syscall_post_impl_unshare(res, (long)(unshare_flags)) #define __sanitizer_syscall_pre_splice(fd_in, off_in, fd_out, off_out, len, \ flags) \ __sanitizer_syscall_pre_impl_splice((long)(fd_in), (long)(off_in), \ (long)(fd_out), (long)(off_out), \ (long)(len), (long)(flags)) #define __sanitizer_syscall_post_splice(res, fd_in, off_in, fd_out, off_out, \ len, flags) \ __sanitizer_syscall_post_impl_splice(res, (long)(fd_in), (long)(off_in), \ (long)(fd_out), (long)(off_out), \ (long)(len), (long)(flags)) #define __sanitizer_syscall_pre_vmsplice(fd, iov, nr_segs, flags) \ __sanitizer_syscall_pre_impl_vmsplice((long)(fd), (long)(iov), \ (long)(nr_segs), (long)(flags)) #define __sanitizer_syscall_post_vmsplice(res, fd, iov, nr_segs, flags) \ __sanitizer_syscall_post_impl_vmsplice(res, (long)(fd), (long)(iov), \ (long)(nr_segs), (long)(flags)) #define __sanitizer_syscall_pre_tee(fdin, fdout, len, flags) \ __sanitizer_syscall_pre_impl_tee((long)(fdin), (long)(fdout), (long)(len), \ (long)(flags)) #define __sanitizer_syscall_post_tee(res, fdin, fdout, len, flags) \ __sanitizer_syscall_post_impl_tee(res, (long)(fdin), (long)(fdout), \ (long)(len), (long)(flags)) #define __sanitizer_syscall_pre_get_robust_list(pid, head_ptr, len_ptr) \ __sanitizer_syscall_pre_impl_get_robust_list((long)(pid), (long)(head_ptr), \ (long)(len_ptr)) #define __sanitizer_syscall_post_get_robust_list(res, pid, head_ptr, len_ptr) \ __sanitizer_syscall_post_impl_get_robust_list( \ res, (long)(pid), (long)(head_ptr), (long)(len_ptr)) #define __sanitizer_syscall_pre_set_robust_list(head, len) \ __sanitizer_syscall_pre_impl_set_robust_list((long)(head), (long)(len)) #define __sanitizer_syscall_post_set_robust_list(res, head, len) \ __sanitizer_syscall_post_impl_set_robust_list(res, (long)(head), (long)(len)) #define __sanitizer_syscall_pre_getcpu(cpu, node, cache) \ __sanitizer_syscall_pre_impl_getcpu((long)(cpu), (long)(node), (long)(cache)) #define __sanitizer_syscall_post_getcpu(res, cpu, node, cache) \ __sanitizer_syscall_post_impl_getcpu(res, (long)(cpu), (long)(node), \ (long)(cache)) #define __sanitizer_syscall_pre_signalfd(ufd, user_mask, sizemask) \ __sanitizer_syscall_pre_impl_signalfd((long)(ufd), (long)(user_mask), \ (long)(sizemask)) #define __sanitizer_syscall_post_signalfd(res, ufd, user_mask, sizemask) \ __sanitizer_syscall_post_impl_signalfd(res, (long)(ufd), (long)(user_mask), \ (long)(sizemask)) #define __sanitizer_syscall_pre_signalfd4(ufd, user_mask, sizemask, flags) \ __sanitizer_syscall_pre_impl_signalfd4((long)(ufd), (long)(user_mask), \ (long)(sizemask), (long)(flags)) #define __sanitizer_syscall_post_signalfd4(res, ufd, user_mask, sizemask, \ flags) \ __sanitizer_syscall_post_impl_signalfd4(res, (long)(ufd), (long)(user_mask), \ (long)(sizemask), (long)(flags)) #define __sanitizer_syscall_pre_timerfd_create(clockid, flags) \ __sanitizer_syscall_pre_impl_timerfd_create((long)(clockid), (long)(flags)) #define __sanitizer_syscall_post_timerfd_create(res, clockid, flags) \ __sanitizer_syscall_post_impl_timerfd_create(res, (long)(clockid), \ (long)(flags)) #define __sanitizer_syscall_pre_timerfd_settime(ufd, flags, utmr, otmr) \ __sanitizer_syscall_pre_impl_timerfd_settime((long)(ufd), (long)(flags), \ (long)(utmr), (long)(otmr)) #define __sanitizer_syscall_post_timerfd_settime(res, ufd, flags, utmr, otmr) \ __sanitizer_syscall_post_impl_timerfd_settime( \ res, (long)(ufd), (long)(flags), (long)(utmr), (long)(otmr)) #define __sanitizer_syscall_pre_timerfd_gettime(ufd, otmr) \ __sanitizer_syscall_pre_impl_timerfd_gettime((long)(ufd), (long)(otmr)) #define __sanitizer_syscall_post_timerfd_gettime(res, ufd, otmr) \ __sanitizer_syscall_post_impl_timerfd_gettime(res, (long)(ufd), (long)(otmr)) #define __sanitizer_syscall_pre_eventfd(count) \ __sanitizer_syscall_pre_impl_eventfd((long)(count)) #define __sanitizer_syscall_post_eventfd(res, count) \ __sanitizer_syscall_post_impl_eventfd(res, (long)(count)) #define __sanitizer_syscall_pre_eventfd2(count, flags) \ __sanitizer_syscall_pre_impl_eventfd2((long)(count), (long)(flags)) #define __sanitizer_syscall_post_eventfd2(res, count, flags) \ __sanitizer_syscall_post_impl_eventfd2(res, (long)(count), (long)(flags)) #define __sanitizer_syscall_pre_old_readdir(arg0, arg1, arg2) \ __sanitizer_syscall_pre_impl_old_readdir((long)(arg0), (long)(arg1), \ (long)(arg2)) #define __sanitizer_syscall_post_old_readdir(res, arg0, arg1, arg2) \ __sanitizer_syscall_post_impl_old_readdir(res, (long)(arg0), (long)(arg1), \ (long)(arg2)) #define __sanitizer_syscall_pre_pselect6(arg0, arg1, arg2, arg3, arg4, arg5) \ __sanitizer_syscall_pre_impl_pselect6((long)(arg0), (long)(arg1), \ (long)(arg2), (long)(arg3), \ (long)(arg4), (long)(arg5)) #define __sanitizer_syscall_post_pselect6(res, arg0, arg1, arg2, arg3, arg4, \ arg5) \ __sanitizer_syscall_post_impl_pselect6(res, (long)(arg0), (long)(arg1), \ (long)(arg2), (long)(arg3), \ (long)(arg4), (long)(arg5)) #define __sanitizer_syscall_pre_ppoll(arg0, arg1, arg2, arg3, arg4) \ __sanitizer_syscall_pre_impl_ppoll((long)(arg0), (long)(arg1), (long)(arg2), \ (long)(arg3), (long)(arg4)) #define __sanitizer_syscall_post_ppoll(res, arg0, arg1, arg2, arg3, arg4) \ __sanitizer_syscall_post_impl_ppoll(res, (long)(arg0), (long)(arg1), \ (long)(arg2), (long)(arg3), \ (long)(arg4)) #define __sanitizer_syscall_pre_syncfs(fd) \ __sanitizer_syscall_pre_impl_syncfs((long)(fd)) #define __sanitizer_syscall_post_syncfs(res, fd) \ __sanitizer_syscall_post_impl_syncfs(res, (long)(fd)) #define __sanitizer_syscall_pre_perf_event_open(attr_uptr, pid, cpu, group_fd, \ flags) \ __sanitizer_syscall_pre_impl_perf_event_open((long)(attr_uptr), (long)(pid), \ (long)(cpu), (long)(group_fd), \ (long)(flags)) #define __sanitizer_syscall_post_perf_event_open(res, attr_uptr, pid, cpu, \ group_fd, flags) \ __sanitizer_syscall_post_impl_perf_event_open( \ res, (long)(attr_uptr), (long)(pid), (long)(cpu), (long)(group_fd), \ (long)(flags)) #define __sanitizer_syscall_pre_mmap_pgoff(addr, len, prot, flags, fd, pgoff) \ __sanitizer_syscall_pre_impl_mmap_pgoff((long)(addr), (long)(len), \ (long)(prot), (long)(flags), \ (long)(fd), (long)(pgoff)) #define __sanitizer_syscall_post_mmap_pgoff(res, addr, len, prot, flags, fd, \ pgoff) \ __sanitizer_syscall_post_impl_mmap_pgoff(res, (long)(addr), (long)(len), \ (long)(prot), (long)(flags), \ (long)(fd), (long)(pgoff)) #define __sanitizer_syscall_pre_old_mmap(arg) \ __sanitizer_syscall_pre_impl_old_mmap((long)(arg)) #define __sanitizer_syscall_post_old_mmap(res, arg) \ __sanitizer_syscall_post_impl_old_mmap(res, (long)(arg)) #define __sanitizer_syscall_pre_name_to_handle_at(dfd, name, handle, mnt_id, \ flag) \ __sanitizer_syscall_pre_impl_name_to_handle_at( \ (long)(dfd), (long)(name), (long)(handle), (long)(mnt_id), (long)(flag)) #define __sanitizer_syscall_post_name_to_handle_at(res, dfd, name, handle, \ mnt_id, flag) \ __sanitizer_syscall_post_impl_name_to_handle_at( \ res, (long)(dfd), (long)(name), (long)(handle), (long)(mnt_id), \ (long)(flag)) #define __sanitizer_syscall_pre_open_by_handle_at(mountdirfd, handle, flags) \ __sanitizer_syscall_pre_impl_open_by_handle_at( \ (long)(mountdirfd), (long)(handle), (long)(flags)) #define __sanitizer_syscall_post_open_by_handle_at(res, mountdirfd, handle, \ flags) \ __sanitizer_syscall_post_impl_open_by_handle_at( \ res, (long)(mountdirfd), (long)(handle), (long)(flags)) #define __sanitizer_syscall_pre_setns(fd, nstype) \ __sanitizer_syscall_pre_impl_setns((long)(fd), (long)(nstype)) #define __sanitizer_syscall_post_setns(res, fd, nstype) \ __sanitizer_syscall_post_impl_setns(res, (long)(fd), (long)(nstype)) #define __sanitizer_syscall_pre_process_vm_readv(pid, lvec, liovcnt, rvec, \ riovcnt, flags) \ __sanitizer_syscall_pre_impl_process_vm_readv( \ (long)(pid), (long)(lvec), (long)(liovcnt), (long)(rvec), \ (long)(riovcnt), (long)(flags)) #define __sanitizer_syscall_post_process_vm_readv(res, pid, lvec, liovcnt, \ rvec, riovcnt, flags) \ __sanitizer_syscall_post_impl_process_vm_readv( \ res, (long)(pid), (long)(lvec), (long)(liovcnt), (long)(rvec), \ (long)(riovcnt), (long)(flags)) #define __sanitizer_syscall_pre_process_vm_writev(pid, lvec, liovcnt, rvec, \ riovcnt, flags) \ __sanitizer_syscall_pre_impl_process_vm_writev( \ (long)(pid), (long)(lvec), (long)(liovcnt), (long)(rvec), \ (long)(riovcnt), (long)(flags)) #define __sanitizer_syscall_post_process_vm_writev(res, pid, lvec, liovcnt, \ rvec, riovcnt, flags) \ __sanitizer_syscall_post_impl_process_vm_writev( \ res, (long)(pid), (long)(lvec), (long)(liovcnt), (long)(rvec), \ (long)(riovcnt), (long)(flags)) #define __sanitizer_syscall_pre_fork() __sanitizer_syscall_pre_impl_fork() #define __sanitizer_syscall_post_fork(res) \ __sanitizer_syscall_post_impl_fork(res) #define __sanitizer_syscall_pre_vfork() __sanitizer_syscall_pre_impl_vfork() #define __sanitizer_syscall_post_vfork(res) \ __sanitizer_syscall_post_impl_vfork(res) #define __sanitizer_syscall_pre_sigaction(signum, act, oldact) \ __sanitizer_syscall_pre_impl_sigaction((long)signum, (long)act, (long)oldact) #define __sanitizer_syscall_post_sigaction(res, signum, act, oldact) \ __sanitizer_syscall_post_impl_sigaction(res, (long)signum, (long)act, \ (long)oldact) #define __sanitizer_syscall_pre_rt_sigaction(signum, act, oldact, sz) \ __sanitizer_syscall_pre_impl_rt_sigaction((long)signum, (long)act, \ (long)oldact, (long)sz) #define __sanitizer_syscall_post_rt_sigaction(res, signum, act, oldact, sz) \ __sanitizer_syscall_post_impl_rt_sigaction(res, (long)signum, (long)act, \ (long)oldact, (long)sz) #define __sanitizer_syscall_pre_sigaltstack(ss, oss) \ __sanitizer_syscall_pre_impl_sigaltstack((long)ss, (long)oss) #define __sanitizer_syscall_post_sigaltstack(res, ss, oss) \ __sanitizer_syscall_post_impl_sigaltstack(res, (long)ss, (long)oss) // And now a few syscalls we don't handle yet. #define __sanitizer_syscall_pre_afs_syscall(...) #define __sanitizer_syscall_pre_arch_prctl(...) #define __sanitizer_syscall_pre_break(...) #define __sanitizer_syscall_pre_chown32(...) #define __sanitizer_syscall_pre_clone(...) #define __sanitizer_syscall_pre_create_module(...) #define __sanitizer_syscall_pre_epoll_ctl_old(...) #define __sanitizer_syscall_pre_epoll_wait_old(...) #define __sanitizer_syscall_pre_execve(...) #define __sanitizer_syscall_pre_fadvise64(...) #define __sanitizer_syscall_pre_fadvise64_64(...) #define __sanitizer_syscall_pre_fallocate(...) #define __sanitizer_syscall_pre_fanotify_init(...) #define __sanitizer_syscall_pre_fanotify_mark(...) #define __sanitizer_syscall_pre_fchown32(...) #define __sanitizer_syscall_pre_ftime(...) #define __sanitizer_syscall_pre_ftruncate64(...) #define __sanitizer_syscall_pre_futex(...) #define __sanitizer_syscall_pre_getegid32(...) #define __sanitizer_syscall_pre_geteuid32(...) #define __sanitizer_syscall_pre_getgid32(...) #define __sanitizer_syscall_pre_getgroups32(...) #define __sanitizer_syscall_pre_get_kernel_syms(...) #define __sanitizer_syscall_pre_getpmsg(...) #define __sanitizer_syscall_pre_getresgid32(...) #define __sanitizer_syscall_pre_getresuid32(...) #define __sanitizer_syscall_pre_get_thread_area(...) #define __sanitizer_syscall_pre_getuid32(...) #define __sanitizer_syscall_pre_gtty(...) #define __sanitizer_syscall_pre_idle(...) #define __sanitizer_syscall_pre_iopl(...) #define __sanitizer_syscall_pre_lchown32(...) #define __sanitizer_syscall_pre__llseek(...) #define __sanitizer_syscall_pre_lock(...) #define __sanitizer_syscall_pre_madvise1(...) #define __sanitizer_syscall_pre_mmap(...) #define __sanitizer_syscall_pre_mmap2(...) #define __sanitizer_syscall_pre_modify_ldt(...) #define __sanitizer_syscall_pre_mpx(...) #define __sanitizer_syscall_pre__newselect(...) #define __sanitizer_syscall_pre_nfsservctl(...) #define __sanitizer_syscall_pre_oldfstat(...) #define __sanitizer_syscall_pre_oldlstat(...) #define __sanitizer_syscall_pre_oldolduname(...) #define __sanitizer_syscall_pre_oldstat(...) #define __sanitizer_syscall_pre_prctl(...) #define __sanitizer_syscall_pre_prof(...) #define __sanitizer_syscall_pre_profil(...) #define __sanitizer_syscall_pre_putpmsg(...) #define __sanitizer_syscall_pre_query_module(...) #define __sanitizer_syscall_pre_readahead(...) #define __sanitizer_syscall_pre_readdir(...) #define __sanitizer_syscall_pre_rt_sigreturn(...) #define __sanitizer_syscall_pre_rt_sigsuspend(...) #define __sanitizer_syscall_pre_security(...) #define __sanitizer_syscall_pre_setfsgid32(...) #define __sanitizer_syscall_pre_setfsuid32(...) #define __sanitizer_syscall_pre_setgid32(...) #define __sanitizer_syscall_pre_setgroups32(...) #define __sanitizer_syscall_pre_setregid32(...) #define __sanitizer_syscall_pre_setresgid32(...) #define __sanitizer_syscall_pre_setresuid32(...) #define __sanitizer_syscall_pre_setreuid32(...) #define __sanitizer_syscall_pre_set_thread_area(...) #define __sanitizer_syscall_pre_setuid32(...) #define __sanitizer_syscall_pre_sigreturn(...) #define __sanitizer_syscall_pre_sigsuspend(...) #define __sanitizer_syscall_pre_stty(...) #define __sanitizer_syscall_pre_sync_file_range(...) #define __sanitizer_syscall_pre__sysctl(...) #define __sanitizer_syscall_pre_truncate64(...) #define __sanitizer_syscall_pre_tuxcall(...) #define __sanitizer_syscall_pre_ugetrlimit(...) #define __sanitizer_syscall_pre_ulimit(...) #define __sanitizer_syscall_pre_umount2(...) #define __sanitizer_syscall_pre_vm86(...) #define __sanitizer_syscall_pre_vm86old(...) #define __sanitizer_syscall_pre_vserver(...) #define __sanitizer_syscall_post_afs_syscall(res, ...) #define __sanitizer_syscall_post_arch_prctl(res, ...) #define __sanitizer_syscall_post_break(res, ...) #define __sanitizer_syscall_post_chown32(res, ...) #define __sanitizer_syscall_post_clone(res, ...) #define __sanitizer_syscall_post_create_module(res, ...) #define __sanitizer_syscall_post_epoll_ctl_old(res, ...) #define __sanitizer_syscall_post_epoll_wait_old(res, ...) #define __sanitizer_syscall_post_execve(res, ...) #define __sanitizer_syscall_post_fadvise64(res, ...) #define __sanitizer_syscall_post_fadvise64_64(res, ...) #define __sanitizer_syscall_post_fallocate(res, ...) #define __sanitizer_syscall_post_fanotify_init(res, ...) #define __sanitizer_syscall_post_fanotify_mark(res, ...) #define __sanitizer_syscall_post_fchown32(res, ...) #define __sanitizer_syscall_post_ftime(res, ...) #define __sanitizer_syscall_post_ftruncate64(res, ...) #define __sanitizer_syscall_post_futex(res, ...) #define __sanitizer_syscall_post_getegid32(res, ...) #define __sanitizer_syscall_post_geteuid32(res, ...) #define __sanitizer_syscall_post_getgid32(res, ...) #define __sanitizer_syscall_post_getgroups32(res, ...) #define __sanitizer_syscall_post_get_kernel_syms(res, ...) #define __sanitizer_syscall_post_getpmsg(res, ...) #define __sanitizer_syscall_post_getresgid32(res, ...) #define __sanitizer_syscall_post_getresuid32(res, ...) #define __sanitizer_syscall_post_get_thread_area(res, ...) #define __sanitizer_syscall_post_getuid32(res, ...) #define __sanitizer_syscall_post_gtty(res, ...) #define __sanitizer_syscall_post_idle(res, ...) #define __sanitizer_syscall_post_iopl(res, ...) #define __sanitizer_syscall_post_lchown32(res, ...) #define __sanitizer_syscall_post__llseek(res, ...) #define __sanitizer_syscall_post_lock(res, ...) #define __sanitizer_syscall_post_madvise1(res, ...) #define __sanitizer_syscall_post_mmap2(res, ...) #define __sanitizer_syscall_post_mmap(res, ...) #define __sanitizer_syscall_post_modify_ldt(res, ...) #define __sanitizer_syscall_post_mpx(res, ...) #define __sanitizer_syscall_post__newselect(res, ...) #define __sanitizer_syscall_post_nfsservctl(res, ...) #define __sanitizer_syscall_post_oldfstat(res, ...) #define __sanitizer_syscall_post_oldlstat(res, ...) #define __sanitizer_syscall_post_oldolduname(res, ...) #define __sanitizer_syscall_post_oldstat(res, ...) #define __sanitizer_syscall_post_prctl(res, ...) #define __sanitizer_syscall_post_profil(res, ...) #define __sanitizer_syscall_post_prof(res, ...) #define __sanitizer_syscall_post_putpmsg(res, ...) #define __sanitizer_syscall_post_query_module(res, ...) #define __sanitizer_syscall_post_readahead(res, ...) #define __sanitizer_syscall_post_readdir(res, ...) #define __sanitizer_syscall_post_rt_sigreturn(res, ...) #define __sanitizer_syscall_post_rt_sigsuspend(res, ...) #define __sanitizer_syscall_post_security(res, ...) #define __sanitizer_syscall_post_setfsgid32(res, ...) #define __sanitizer_syscall_post_setfsuid32(res, ...) #define __sanitizer_syscall_post_setgid32(res, ...) #define __sanitizer_syscall_post_setgroups32(res, ...) #define __sanitizer_syscall_post_setregid32(res, ...) #define __sanitizer_syscall_post_setresgid32(res, ...) #define __sanitizer_syscall_post_setresuid32(res, ...) #define __sanitizer_syscall_post_setreuid32(res, ...) #define __sanitizer_syscall_post_set_thread_area(res, ...) #define __sanitizer_syscall_post_setuid32(res, ...) #define __sanitizer_syscall_post_sigreturn(res, ...) #define __sanitizer_syscall_post_sigsuspend(res, ...) #define __sanitizer_syscall_post_stty(res, ...) #define __sanitizer_syscall_post_sync_file_range(res, ...) #define __sanitizer_syscall_post__sysctl(res, ...) #define __sanitizer_syscall_post_truncate64(res, ...) #define __sanitizer_syscall_post_tuxcall(res, ...) #define __sanitizer_syscall_post_ugetrlimit(res, ...) #define __sanitizer_syscall_post_ulimit(res, ...) #define __sanitizer_syscall_post_umount2(res, ...) #define __sanitizer_syscall_post_vm86old(res, ...) #define __sanitizer_syscall_post_vm86(res, ...) #define __sanitizer_syscall_post_vserver(res, ...) #ifdef __cplusplus extern "C" { #endif // Private declarations. Do not call directly from user code. Use macros above. void __sanitizer_syscall_pre_impl_time(long tloc); void __sanitizer_syscall_post_impl_time(long res, long tloc); void __sanitizer_syscall_pre_impl_stime(long tptr); void __sanitizer_syscall_post_impl_stime(long res, long tptr); void __sanitizer_syscall_pre_impl_gettimeofday(long tv, long tz); void __sanitizer_syscall_post_impl_gettimeofday(long res, long tv, long tz); void __sanitizer_syscall_pre_impl_settimeofday(long tv, long tz); void __sanitizer_syscall_post_impl_settimeofday(long res, long tv, long tz); void __sanitizer_syscall_pre_impl_adjtimex(long txc_p); void __sanitizer_syscall_post_impl_adjtimex(long res, long txc_p); void __sanitizer_syscall_pre_impl_times(long tbuf); void __sanitizer_syscall_post_impl_times(long res, long tbuf); void __sanitizer_syscall_pre_impl_gettid(); void __sanitizer_syscall_post_impl_gettid(long res); void __sanitizer_syscall_pre_impl_nanosleep(long rqtp, long rmtp); void __sanitizer_syscall_post_impl_nanosleep(long res, long rqtp, long rmtp); void __sanitizer_syscall_pre_impl_alarm(long seconds); void __sanitizer_syscall_post_impl_alarm(long res, long seconds); void __sanitizer_syscall_pre_impl_getpid(); void __sanitizer_syscall_post_impl_getpid(long res); void __sanitizer_syscall_pre_impl_getppid(); void __sanitizer_syscall_post_impl_getppid(long res); void __sanitizer_syscall_pre_impl_getuid(); void __sanitizer_syscall_post_impl_getuid(long res); void __sanitizer_syscall_pre_impl_geteuid(); void __sanitizer_syscall_post_impl_geteuid(long res); void __sanitizer_syscall_pre_impl_getgid(); void __sanitizer_syscall_post_impl_getgid(long res); void __sanitizer_syscall_pre_impl_getegid(); void __sanitizer_syscall_post_impl_getegid(long res); void __sanitizer_syscall_pre_impl_getresuid(long ruid, long euid, long suid); void __sanitizer_syscall_post_impl_getresuid(long res, long ruid, long euid, long suid); void __sanitizer_syscall_pre_impl_getresgid(long rgid, long egid, long sgid); void __sanitizer_syscall_post_impl_getresgid(long res, long rgid, long egid, long sgid); void __sanitizer_syscall_pre_impl_getpgid(long pid); void __sanitizer_syscall_post_impl_getpgid(long res, long pid); void __sanitizer_syscall_pre_impl_getpgrp(); void __sanitizer_syscall_post_impl_getpgrp(long res); void __sanitizer_syscall_pre_impl_getsid(long pid); void __sanitizer_syscall_post_impl_getsid(long res, long pid); void __sanitizer_syscall_pre_impl_getgroups(long gidsetsize, long grouplist); void __sanitizer_syscall_post_impl_getgroups(long res, long gidsetsize, long grouplist); void __sanitizer_syscall_pre_impl_setregid(long rgid, long egid); void __sanitizer_syscall_post_impl_setregid(long res, long rgid, long egid); void __sanitizer_syscall_pre_impl_setgid(long gid); void __sanitizer_syscall_post_impl_setgid(long res, long gid); void __sanitizer_syscall_pre_impl_setreuid(long ruid, long euid); void __sanitizer_syscall_post_impl_setreuid(long res, long ruid, long euid); void __sanitizer_syscall_pre_impl_setuid(long uid); void __sanitizer_syscall_post_impl_setuid(long res, long uid); void __sanitizer_syscall_pre_impl_setresuid(long ruid, long euid, long suid); void __sanitizer_syscall_post_impl_setresuid(long res, long ruid, long euid, long suid); void __sanitizer_syscall_pre_impl_setresgid(long rgid, long egid, long sgid); void __sanitizer_syscall_post_impl_setresgid(long res, long rgid, long egid, long sgid); void __sanitizer_syscall_pre_impl_setfsuid(long uid); void __sanitizer_syscall_post_impl_setfsuid(long res, long uid); void __sanitizer_syscall_pre_impl_setfsgid(long gid); void __sanitizer_syscall_post_impl_setfsgid(long res, long gid); void __sanitizer_syscall_pre_impl_setpgid(long pid, long pgid); void __sanitizer_syscall_post_impl_setpgid(long res, long pid, long pgid); void __sanitizer_syscall_pre_impl_setsid(); void __sanitizer_syscall_post_impl_setsid(long res); void __sanitizer_syscall_pre_impl_setgroups(long gidsetsize, long grouplist); void __sanitizer_syscall_post_impl_setgroups(long res, long gidsetsize, long grouplist); void __sanitizer_syscall_pre_impl_acct(long name); void __sanitizer_syscall_post_impl_acct(long res, long name); void __sanitizer_syscall_pre_impl_capget(long header, long dataptr); void __sanitizer_syscall_post_impl_capget(long res, long header, long dataptr); void __sanitizer_syscall_pre_impl_capset(long header, long data); void __sanitizer_syscall_post_impl_capset(long res, long header, long data); void __sanitizer_syscall_pre_impl_personality(long personality); void __sanitizer_syscall_post_impl_personality(long res, long personality); void __sanitizer_syscall_pre_impl_sigpending(long set); void __sanitizer_syscall_post_impl_sigpending(long res, long set); void __sanitizer_syscall_pre_impl_sigprocmask(long how, long set, long oset); void __sanitizer_syscall_post_impl_sigprocmask(long res, long how, long set, long oset); void __sanitizer_syscall_pre_impl_getitimer(long which, long value); void __sanitizer_syscall_post_impl_getitimer(long res, long which, long value); void __sanitizer_syscall_pre_impl_setitimer(long which, long value, long ovalue); void __sanitizer_syscall_post_impl_setitimer(long res, long which, long value, long ovalue); void __sanitizer_syscall_pre_impl_timer_create(long which_clock, long timer_event_spec, long created_timer_id); void __sanitizer_syscall_post_impl_timer_create(long res, long which_clock, long timer_event_spec, long created_timer_id); void __sanitizer_syscall_pre_impl_timer_gettime(long timer_id, long setting); void __sanitizer_syscall_post_impl_timer_gettime(long res, long timer_id, long setting); void __sanitizer_syscall_pre_impl_timer_getoverrun(long timer_id); void __sanitizer_syscall_post_impl_timer_getoverrun(long res, long timer_id); void __sanitizer_syscall_pre_impl_timer_settime(long timer_id, long flags, long new_setting, long old_setting); void __sanitizer_syscall_post_impl_timer_settime(long res, long timer_id, long flags, long new_setting, long old_setting); void __sanitizer_syscall_pre_impl_timer_delete(long timer_id); void __sanitizer_syscall_post_impl_timer_delete(long res, long timer_id); void __sanitizer_syscall_pre_impl_clock_settime(long which_clock, long tp); void __sanitizer_syscall_post_impl_clock_settime(long res, long which_clock, long tp); void __sanitizer_syscall_pre_impl_clock_gettime(long which_clock, long tp); void __sanitizer_syscall_post_impl_clock_gettime(long res, long which_clock, long tp); void __sanitizer_syscall_pre_impl_clock_adjtime(long which_clock, long tx); void __sanitizer_syscall_post_impl_clock_adjtime(long res, long which_clock, long tx); void __sanitizer_syscall_pre_impl_clock_getres(long which_clock, long tp); void __sanitizer_syscall_post_impl_clock_getres(long res, long which_clock, long tp); void __sanitizer_syscall_pre_impl_clock_nanosleep(long which_clock, long flags, long rqtp, long rmtp); void __sanitizer_syscall_post_impl_clock_nanosleep(long res, long which_clock, long flags, long rqtp, long rmtp); void __sanitizer_syscall_pre_impl_nice(long increment); void __sanitizer_syscall_post_impl_nice(long res, long increment); void __sanitizer_syscall_pre_impl_sched_setscheduler(long pid, long policy, long param); void __sanitizer_syscall_post_impl_sched_setscheduler(long res, long pid, long policy, long param); void __sanitizer_syscall_pre_impl_sched_setparam(long pid, long param); void __sanitizer_syscall_post_impl_sched_setparam(long res, long pid, long param); void __sanitizer_syscall_pre_impl_sched_getscheduler(long pid); void __sanitizer_syscall_post_impl_sched_getscheduler(long res, long pid); void __sanitizer_syscall_pre_impl_sched_getparam(long pid, long param); void __sanitizer_syscall_post_impl_sched_getparam(long res, long pid, long param); void __sanitizer_syscall_pre_impl_sched_setaffinity(long pid, long len, long user_mask_ptr); void __sanitizer_syscall_post_impl_sched_setaffinity(long res, long pid, long len, long user_mask_ptr); void __sanitizer_syscall_pre_impl_sched_getaffinity(long pid, long len, long user_mask_ptr); void __sanitizer_syscall_post_impl_sched_getaffinity(long res, long pid, long len, long user_mask_ptr); void __sanitizer_syscall_pre_impl_sched_yield(); void __sanitizer_syscall_post_impl_sched_yield(long res); void __sanitizer_syscall_pre_impl_sched_get_priority_max(long policy); void __sanitizer_syscall_post_impl_sched_get_priority_max(long res, long policy); void __sanitizer_syscall_pre_impl_sched_get_priority_min(long policy); void __sanitizer_syscall_post_impl_sched_get_priority_min(long res, long policy); void __sanitizer_syscall_pre_impl_sched_rr_get_interval(long pid, long interval); void __sanitizer_syscall_post_impl_sched_rr_get_interval(long res, long pid, long interval); void __sanitizer_syscall_pre_impl_setpriority(long which, long who, long niceval); void __sanitizer_syscall_post_impl_setpriority(long res, long which, long who, long niceval); void __sanitizer_syscall_pre_impl_getpriority(long which, long who); void __sanitizer_syscall_post_impl_getpriority(long res, long which, long who); void __sanitizer_syscall_pre_impl_shutdown(long arg0, long arg1); void __sanitizer_syscall_post_impl_shutdown(long res, long arg0, long arg1); void __sanitizer_syscall_pre_impl_reboot(long magic1, long magic2, long cmd, long arg); void __sanitizer_syscall_post_impl_reboot(long res, long magic1, long magic2, long cmd, long arg); void __sanitizer_syscall_pre_impl_restart_syscall(); void __sanitizer_syscall_post_impl_restart_syscall(long res); void __sanitizer_syscall_pre_impl_kexec_load(long entry, long nr_segments, long segments, long flags); void __sanitizer_syscall_post_impl_kexec_load(long res, long entry, long nr_segments, long segments, long flags); void __sanitizer_syscall_pre_impl_exit(long error_code); void __sanitizer_syscall_post_impl_exit(long res, long error_code); void __sanitizer_syscall_pre_impl_exit_group(long error_code); void __sanitizer_syscall_post_impl_exit_group(long res, long error_code); void __sanitizer_syscall_pre_impl_wait4(long pid, long stat_addr, long options, long ru); void __sanitizer_syscall_post_impl_wait4(long res, long pid, long stat_addr, long options, long ru); void __sanitizer_syscall_pre_impl_waitid(long which, long pid, long infop, long options, long ru); void __sanitizer_syscall_post_impl_waitid(long res, long which, long pid, long infop, long options, long ru); void __sanitizer_syscall_pre_impl_waitpid(long pid, long stat_addr, long options); void __sanitizer_syscall_post_impl_waitpid(long res, long pid, long stat_addr, long options); void __sanitizer_syscall_pre_impl_set_tid_address(long tidptr); void __sanitizer_syscall_post_impl_set_tid_address(long res, long tidptr); void __sanitizer_syscall_pre_impl_init_module(long umod, long len, long uargs); void __sanitizer_syscall_post_impl_init_module(long res, long umod, long len, long uargs); void __sanitizer_syscall_pre_impl_delete_module(long name_user, long flags); void __sanitizer_syscall_post_impl_delete_module(long res, long name_user, long flags); void __sanitizer_syscall_pre_impl_rt_sigprocmask(long how, long set, long oset, long sigsetsize); void __sanitizer_syscall_post_impl_rt_sigprocmask(long res, long how, long set, long oset, long sigsetsize); void __sanitizer_syscall_pre_impl_rt_sigpending(long set, long sigsetsize); void __sanitizer_syscall_post_impl_rt_sigpending(long res, long set, long sigsetsize); void __sanitizer_syscall_pre_impl_rt_sigtimedwait(long uthese, long uinfo, long uts, long sigsetsize); void __sanitizer_syscall_post_impl_rt_sigtimedwait(long res, long uthese, long uinfo, long uts, long sigsetsize); void __sanitizer_syscall_pre_impl_rt_tgsigqueueinfo(long tgid, long pid, long sig, long uinfo); void __sanitizer_syscall_post_impl_rt_tgsigqueueinfo(long res, long tgid, long pid, long sig, long uinfo); void __sanitizer_syscall_pre_impl_kill(long pid, long sig); void __sanitizer_syscall_post_impl_kill(long res, long pid, long sig); void __sanitizer_syscall_pre_impl_tgkill(long tgid, long pid, long sig); void __sanitizer_syscall_post_impl_tgkill(long res, long tgid, long pid, long sig); void __sanitizer_syscall_pre_impl_tkill(long pid, long sig); void __sanitizer_syscall_post_impl_tkill(long res, long pid, long sig); void __sanitizer_syscall_pre_impl_rt_sigqueueinfo(long pid, long sig, long uinfo); void __sanitizer_syscall_post_impl_rt_sigqueueinfo(long res, long pid, long sig, long uinfo); void __sanitizer_syscall_pre_impl_sgetmask(); void __sanitizer_syscall_post_impl_sgetmask(long res); void __sanitizer_syscall_pre_impl_ssetmask(long newmask); void __sanitizer_syscall_post_impl_ssetmask(long res, long newmask); void __sanitizer_syscall_pre_impl_signal(long sig, long handler); void __sanitizer_syscall_post_impl_signal(long res, long sig, long handler); void __sanitizer_syscall_pre_impl_pause(); void __sanitizer_syscall_post_impl_pause(long res); void __sanitizer_syscall_pre_impl_sync(); void __sanitizer_syscall_post_impl_sync(long res); void __sanitizer_syscall_pre_impl_fsync(long fd); void __sanitizer_syscall_post_impl_fsync(long res, long fd); void __sanitizer_syscall_pre_impl_fdatasync(long fd); void __sanitizer_syscall_post_impl_fdatasync(long res, long fd); void __sanitizer_syscall_pre_impl_bdflush(long func, long data); void __sanitizer_syscall_post_impl_bdflush(long res, long func, long data); void __sanitizer_syscall_pre_impl_mount(long dev_name, long dir_name, long type, long flags, long data); void __sanitizer_syscall_post_impl_mount(long res, long dev_name, long dir_name, long type, long flags, long data); void __sanitizer_syscall_pre_impl_umount(long name, long flags); void __sanitizer_syscall_post_impl_umount(long res, long name, long flags); void __sanitizer_syscall_pre_impl_oldumount(long name); void __sanitizer_syscall_post_impl_oldumount(long res, long name); void __sanitizer_syscall_pre_impl_truncate(long path, long length); void __sanitizer_syscall_post_impl_truncate(long res, long path, long length); void __sanitizer_syscall_pre_impl_ftruncate(long fd, long length); void __sanitizer_syscall_post_impl_ftruncate(long res, long fd, long length); void __sanitizer_syscall_pre_impl_stat(long filename, long statbuf); void __sanitizer_syscall_post_impl_stat(long res, long filename, long statbuf); void __sanitizer_syscall_pre_impl_statfs(long path, long buf); void __sanitizer_syscall_post_impl_statfs(long res, long path, long buf); void __sanitizer_syscall_pre_impl_statfs64(long path, long sz, long buf); void __sanitizer_syscall_post_impl_statfs64(long res, long path, long sz, long buf); void __sanitizer_syscall_pre_impl_fstatfs(long fd, long buf); void __sanitizer_syscall_post_impl_fstatfs(long res, long fd, long buf); void __sanitizer_syscall_pre_impl_fstatfs64(long fd, long sz, long buf); void __sanitizer_syscall_post_impl_fstatfs64(long res, long fd, long sz, long buf); void __sanitizer_syscall_pre_impl_lstat(long filename, long statbuf); void __sanitizer_syscall_post_impl_lstat(long res, long filename, long statbuf); void __sanitizer_syscall_pre_impl_fstat(long fd, long statbuf); void __sanitizer_syscall_post_impl_fstat(long res, long fd, long statbuf); void __sanitizer_syscall_pre_impl_newstat(long filename, long statbuf); void __sanitizer_syscall_post_impl_newstat(long res, long filename, long statbuf); void __sanitizer_syscall_pre_impl_newlstat(long filename, long statbuf); void __sanitizer_syscall_post_impl_newlstat(long res, long filename, long statbuf); void __sanitizer_syscall_pre_impl_newfstat(long fd, long statbuf); void __sanitizer_syscall_post_impl_newfstat(long res, long fd, long statbuf); void __sanitizer_syscall_pre_impl_ustat(long dev, long ubuf); void __sanitizer_syscall_post_impl_ustat(long res, long dev, long ubuf); void __sanitizer_syscall_pre_impl_stat64(long filename, long statbuf); void __sanitizer_syscall_post_impl_stat64(long res, long filename, long statbuf); void __sanitizer_syscall_pre_impl_fstat64(long fd, long statbuf); void __sanitizer_syscall_post_impl_fstat64(long res, long fd, long statbuf); void __sanitizer_syscall_pre_impl_lstat64(long filename, long statbuf); void __sanitizer_syscall_post_impl_lstat64(long res, long filename, long statbuf); void __sanitizer_syscall_pre_impl_setxattr(long path, long name, long value, long size, long flags); void __sanitizer_syscall_post_impl_setxattr(long res, long path, long name, long value, long size, long flags); void __sanitizer_syscall_pre_impl_lsetxattr(long path, long name, long value, long size, long flags); void __sanitizer_syscall_post_impl_lsetxattr(long res, long path, long name, long value, long size, long flags); void __sanitizer_syscall_pre_impl_fsetxattr(long fd, long name, long value, long size, long flags); void __sanitizer_syscall_post_impl_fsetxattr(long res, long fd, long name, long value, long size, long flags); void __sanitizer_syscall_pre_impl_getxattr(long path, long name, long value, long size); void __sanitizer_syscall_post_impl_getxattr(long res, long path, long name, long value, long size); void __sanitizer_syscall_pre_impl_lgetxattr(long path, long name, long value, long size); void __sanitizer_syscall_post_impl_lgetxattr(long res, long path, long name, long value, long size); void __sanitizer_syscall_pre_impl_fgetxattr(long fd, long name, long value, long size); void __sanitizer_syscall_post_impl_fgetxattr(long res, long fd, long name, long value, long size); void __sanitizer_syscall_pre_impl_listxattr(long path, long list, long size); void __sanitizer_syscall_post_impl_listxattr(long res, long path, long list, long size); void __sanitizer_syscall_pre_impl_llistxattr(long path, long list, long size); void __sanitizer_syscall_post_impl_llistxattr(long res, long path, long list, long size); void __sanitizer_syscall_pre_impl_flistxattr(long fd, long list, long size); void __sanitizer_syscall_post_impl_flistxattr(long res, long fd, long list, long size); void __sanitizer_syscall_pre_impl_removexattr(long path, long name); void __sanitizer_syscall_post_impl_removexattr(long res, long path, long name); void __sanitizer_syscall_pre_impl_lremovexattr(long path, long name); void __sanitizer_syscall_post_impl_lremovexattr(long res, long path, long name); void __sanitizer_syscall_pre_impl_fremovexattr(long fd, long name); void __sanitizer_syscall_post_impl_fremovexattr(long res, long fd, long name); void __sanitizer_syscall_pre_impl_brk(long brk); void __sanitizer_syscall_post_impl_brk(long res, long brk); void __sanitizer_syscall_pre_impl_mprotect(long start, long len, long prot); void __sanitizer_syscall_post_impl_mprotect(long res, long start, long len, long prot); void __sanitizer_syscall_pre_impl_mremap(long addr, long old_len, long new_len, long flags, long new_addr); void __sanitizer_syscall_post_impl_mremap(long res, long addr, long old_len, long new_len, long flags, long new_addr); void __sanitizer_syscall_pre_impl_remap_file_pages(long start, long size, long prot, long pgoff, long flags); void __sanitizer_syscall_post_impl_remap_file_pages(long res, long start, long size, long prot, long pgoff, long flags); void __sanitizer_syscall_pre_impl_msync(long start, long len, long flags); void __sanitizer_syscall_post_impl_msync(long res, long start, long len, long flags); void __sanitizer_syscall_pre_impl_munmap(long addr, long len); void __sanitizer_syscall_post_impl_munmap(long res, long addr, long len); void __sanitizer_syscall_pre_impl_mlock(long start, long len); void __sanitizer_syscall_post_impl_mlock(long res, long start, long len); void __sanitizer_syscall_pre_impl_munlock(long start, long len); void __sanitizer_syscall_post_impl_munlock(long res, long start, long len); void __sanitizer_syscall_pre_impl_mlockall(long flags); void __sanitizer_syscall_post_impl_mlockall(long res, long flags); void __sanitizer_syscall_pre_impl_munlockall(); void __sanitizer_syscall_post_impl_munlockall(long res); void __sanitizer_syscall_pre_impl_madvise(long start, long len, long behavior); void __sanitizer_syscall_post_impl_madvise(long res, long start, long len, long behavior); void __sanitizer_syscall_pre_impl_mincore(long start, long len, long vec); void __sanitizer_syscall_post_impl_mincore(long res, long start, long len, long vec); void __sanitizer_syscall_pre_impl_pivot_root(long new_root, long put_old); void __sanitizer_syscall_post_impl_pivot_root(long res, long new_root, long put_old); void __sanitizer_syscall_pre_impl_chroot(long filename); void __sanitizer_syscall_post_impl_chroot(long res, long filename); void __sanitizer_syscall_pre_impl_mknod(long filename, long mode, long dev); void __sanitizer_syscall_post_impl_mknod(long res, long filename, long mode, long dev); void __sanitizer_syscall_pre_impl_link(long oldname, long newname); void __sanitizer_syscall_post_impl_link(long res, long oldname, long newname); void __sanitizer_syscall_pre_impl_symlink(long old, long new_); void __sanitizer_syscall_post_impl_symlink(long res, long old, long new_); void __sanitizer_syscall_pre_impl_unlink(long pathname); void __sanitizer_syscall_post_impl_unlink(long res, long pathname); void __sanitizer_syscall_pre_impl_rename(long oldname, long newname); void __sanitizer_syscall_post_impl_rename(long res, long oldname, long newname); void __sanitizer_syscall_pre_impl_chmod(long filename, long mode); void __sanitizer_syscall_post_impl_chmod(long res, long filename, long mode); void __sanitizer_syscall_pre_impl_fchmod(long fd, long mode); void __sanitizer_syscall_post_impl_fchmod(long res, long fd, long mode); void __sanitizer_syscall_pre_impl_fcntl(long fd, long cmd, long arg); void __sanitizer_syscall_post_impl_fcntl(long res, long fd, long cmd, long arg); void __sanitizer_syscall_pre_impl_fcntl64(long fd, long cmd, long arg); void __sanitizer_syscall_post_impl_fcntl64(long res, long fd, long cmd, long arg); void __sanitizer_syscall_pre_impl_pipe(long fildes); void __sanitizer_syscall_post_impl_pipe(long res, long fildes); void __sanitizer_syscall_pre_impl_pipe2(long fildes, long flags); void __sanitizer_syscall_post_impl_pipe2(long res, long fildes, long flags); void __sanitizer_syscall_pre_impl_dup(long fildes); void __sanitizer_syscall_post_impl_dup(long res, long fildes); void __sanitizer_syscall_pre_impl_dup2(long oldfd, long newfd); void __sanitizer_syscall_post_impl_dup2(long res, long oldfd, long newfd); void __sanitizer_syscall_pre_impl_dup3(long oldfd, long newfd, long flags); void __sanitizer_syscall_post_impl_dup3(long res, long oldfd, long newfd, long flags); void __sanitizer_syscall_pre_impl_ioperm(long from, long num, long on); void __sanitizer_syscall_post_impl_ioperm(long res, long from, long num, long on); void __sanitizer_syscall_pre_impl_ioctl(long fd, long cmd, long arg); void __sanitizer_syscall_post_impl_ioctl(long res, long fd, long cmd, long arg); void __sanitizer_syscall_pre_impl_flock(long fd, long cmd); void __sanitizer_syscall_post_impl_flock(long res, long fd, long cmd); void __sanitizer_syscall_pre_impl_io_setup(long nr_reqs, long ctx); void __sanitizer_syscall_post_impl_io_setup(long res, long nr_reqs, long ctx); void __sanitizer_syscall_pre_impl_io_destroy(long ctx); void __sanitizer_syscall_post_impl_io_destroy(long res, long ctx); void __sanitizer_syscall_pre_impl_io_getevents(long ctx_id, long min_nr, long nr, long events, long timeout); void __sanitizer_syscall_post_impl_io_getevents(long res, long ctx_id, long min_nr, long nr, long events, long timeout); void __sanitizer_syscall_pre_impl_io_submit(long ctx_id, long arg1, long arg2); void __sanitizer_syscall_post_impl_io_submit(long res, long ctx_id, long arg1, long arg2); void __sanitizer_syscall_pre_impl_io_cancel(long ctx_id, long iocb, long result); void __sanitizer_syscall_post_impl_io_cancel(long res, long ctx_id, long iocb, long result); void __sanitizer_syscall_pre_impl_sendfile(long out_fd, long in_fd, long offset, long count); void __sanitizer_syscall_post_impl_sendfile(long res, long out_fd, long in_fd, long offset, long count); void __sanitizer_syscall_pre_impl_sendfile64(long out_fd, long in_fd, long offset, long count); void __sanitizer_syscall_post_impl_sendfile64(long res, long out_fd, long in_fd, long offset, long count); void __sanitizer_syscall_pre_impl_readlink(long path, long buf, long bufsiz); void __sanitizer_syscall_post_impl_readlink(long res, long path, long buf, long bufsiz); void __sanitizer_syscall_pre_impl_creat(long pathname, long mode); void __sanitizer_syscall_post_impl_creat(long res, long pathname, long mode); void __sanitizer_syscall_pre_impl_open(long filename, long flags, long mode); void __sanitizer_syscall_post_impl_open(long res, long filename, long flags, long mode); void __sanitizer_syscall_pre_impl_close(long fd); void __sanitizer_syscall_post_impl_close(long res, long fd); void __sanitizer_syscall_pre_impl_access(long filename, long mode); void __sanitizer_syscall_post_impl_access(long res, long filename, long mode); void __sanitizer_syscall_pre_impl_vhangup(); void __sanitizer_syscall_post_impl_vhangup(long res); void __sanitizer_syscall_pre_impl_chown(long filename, long user, long group); void __sanitizer_syscall_post_impl_chown(long res, long filename, long user, long group); void __sanitizer_syscall_pre_impl_lchown(long filename, long user, long group); void __sanitizer_syscall_post_impl_lchown(long res, long filename, long user, long group); void __sanitizer_syscall_pre_impl_fchown(long fd, long user, long group); void __sanitizer_syscall_post_impl_fchown(long res, long fd, long user, long group); void __sanitizer_syscall_pre_impl_chown16(long filename, long user, long group); void __sanitizer_syscall_post_impl_chown16(long res, long filename, long user, long group); void __sanitizer_syscall_pre_impl_lchown16(long filename, long user, long group); void __sanitizer_syscall_post_impl_lchown16(long res, long filename, long user, long group); void __sanitizer_syscall_pre_impl_fchown16(long fd, long user, long group); void __sanitizer_syscall_post_impl_fchown16(long res, long fd, long user, long group); void __sanitizer_syscall_pre_impl_setregid16(long rgid, long egid); void __sanitizer_syscall_post_impl_setregid16(long res, long rgid, long egid); void __sanitizer_syscall_pre_impl_setgid16(long gid); void __sanitizer_syscall_post_impl_setgid16(long res, long gid); void __sanitizer_syscall_pre_impl_setreuid16(long ruid, long euid); void __sanitizer_syscall_post_impl_setreuid16(long res, long ruid, long euid); void __sanitizer_syscall_pre_impl_setuid16(long uid); void __sanitizer_syscall_post_impl_setuid16(long res, long uid); void __sanitizer_syscall_pre_impl_setresuid16(long ruid, long euid, long suid); void __sanitizer_syscall_post_impl_setresuid16(long res, long ruid, long euid, long suid); void __sanitizer_syscall_pre_impl_getresuid16(long ruid, long euid, long suid); void __sanitizer_syscall_post_impl_getresuid16(long res, long ruid, long euid, long suid); void __sanitizer_syscall_pre_impl_setresgid16(long rgid, long egid, long sgid); void __sanitizer_syscall_post_impl_setresgid16(long res, long rgid, long egid, long sgid); void __sanitizer_syscall_pre_impl_getresgid16(long rgid, long egid, long sgid); void __sanitizer_syscall_post_impl_getresgid16(long res, long rgid, long egid, long sgid); void __sanitizer_syscall_pre_impl_setfsuid16(long uid); void __sanitizer_syscall_post_impl_setfsuid16(long res, long uid); void __sanitizer_syscall_pre_impl_setfsgid16(long gid); void __sanitizer_syscall_post_impl_setfsgid16(long res, long gid); void __sanitizer_syscall_pre_impl_getgroups16(long gidsetsize, long grouplist); void __sanitizer_syscall_post_impl_getgroups16(long res, long gidsetsize, long grouplist); void __sanitizer_syscall_pre_impl_setgroups16(long gidsetsize, long grouplist); void __sanitizer_syscall_post_impl_setgroups16(long res, long gidsetsize, long grouplist); void __sanitizer_syscall_pre_impl_getuid16(); void __sanitizer_syscall_post_impl_getuid16(long res); void __sanitizer_syscall_pre_impl_geteuid16(); void __sanitizer_syscall_post_impl_geteuid16(long res); void __sanitizer_syscall_pre_impl_getgid16(); void __sanitizer_syscall_post_impl_getgid16(long res); void __sanitizer_syscall_pre_impl_getegid16(); void __sanitizer_syscall_post_impl_getegid16(long res); void __sanitizer_syscall_pre_impl_utime(long filename, long times); void __sanitizer_syscall_post_impl_utime(long res, long filename, long times); void __sanitizer_syscall_pre_impl_utimes(long filename, long utimes); void __sanitizer_syscall_post_impl_utimes(long res, long filename, long utimes); void __sanitizer_syscall_pre_impl_lseek(long fd, long offset, long origin); void __sanitizer_syscall_post_impl_lseek(long res, long fd, long offset, long origin); void __sanitizer_syscall_pre_impl_llseek(long fd, long offset_high, long offset_low, long result, long origin); void __sanitizer_syscall_post_impl_llseek(long res, long fd, long offset_high, long offset_low, long result, long origin); void __sanitizer_syscall_pre_impl_read(long fd, long buf, long count); void __sanitizer_syscall_post_impl_read(long res, long fd, long buf, long count); void __sanitizer_syscall_pre_impl_readv(long fd, long vec, long vlen); void __sanitizer_syscall_post_impl_readv(long res, long fd, long vec, long vlen); void __sanitizer_syscall_pre_impl_write(long fd, long buf, long count); void __sanitizer_syscall_post_impl_write(long res, long fd, long buf, long count); void __sanitizer_syscall_pre_impl_writev(long fd, long vec, long vlen); void __sanitizer_syscall_post_impl_writev(long res, long fd, long vec, long vlen); #ifdef _LP64 void __sanitizer_syscall_pre_impl_pread64(long fd, long buf, long count, long pos); void __sanitizer_syscall_post_impl_pread64(long res, long fd, long buf, long count, long pos); void __sanitizer_syscall_pre_impl_pwrite64(long fd, long buf, long count, long pos); void __sanitizer_syscall_post_impl_pwrite64(long res, long fd, long buf, long count, long pos); #else void __sanitizer_syscall_pre_impl_pread64(long fd, long buf, long count, long pos0, long pos1); void __sanitizer_syscall_post_impl_pread64(long res, long fd, long buf, long count, long pos0, long pos1); void __sanitizer_syscall_pre_impl_pwrite64(long fd, long buf, long count, long pos0, long pos1); void __sanitizer_syscall_post_impl_pwrite64(long res, long fd, long buf, long count, long pos0, long pos1); #endif void __sanitizer_syscall_pre_impl_preadv(long fd, long vec, long vlen, long pos_l, long pos_h); void __sanitizer_syscall_post_impl_preadv(long res, long fd, long vec, long vlen, long pos_l, long pos_h); void __sanitizer_syscall_pre_impl_pwritev(long fd, long vec, long vlen, long pos_l, long pos_h); void __sanitizer_syscall_post_impl_pwritev(long res, long fd, long vec, long vlen, long pos_l, long pos_h); void __sanitizer_syscall_pre_impl_getcwd(long buf, long size); void __sanitizer_syscall_post_impl_getcwd(long res, long buf, long size); void __sanitizer_syscall_pre_impl_mkdir(long pathname, long mode); void __sanitizer_syscall_post_impl_mkdir(long res, long pathname, long mode); void __sanitizer_syscall_pre_impl_chdir(long filename); void __sanitizer_syscall_post_impl_chdir(long res, long filename); void __sanitizer_syscall_pre_impl_fchdir(long fd); void __sanitizer_syscall_post_impl_fchdir(long res, long fd); void __sanitizer_syscall_pre_impl_rmdir(long pathname); void __sanitizer_syscall_post_impl_rmdir(long res, long pathname); void __sanitizer_syscall_pre_impl_lookup_dcookie(long cookie64, long buf, long len); void __sanitizer_syscall_post_impl_lookup_dcookie(long res, long cookie64, long buf, long len); void __sanitizer_syscall_pre_impl_quotactl(long cmd, long special, long id, long addr); void __sanitizer_syscall_post_impl_quotactl(long res, long cmd, long special, long id, long addr); void __sanitizer_syscall_pre_impl_getdents(long fd, long dirent, long count); void __sanitizer_syscall_post_impl_getdents(long res, long fd, long dirent, long count); void __sanitizer_syscall_pre_impl_getdents64(long fd, long dirent, long count); void __sanitizer_syscall_post_impl_getdents64(long res, long fd, long dirent, long count); void __sanitizer_syscall_pre_impl_setsockopt(long fd, long level, long optname, long optval, long optlen); void __sanitizer_syscall_post_impl_setsockopt(long res, long fd, long level, long optname, long optval, long optlen); void __sanitizer_syscall_pre_impl_getsockopt(long fd, long level, long optname, long optval, long optlen); void __sanitizer_syscall_post_impl_getsockopt(long res, long fd, long level, long optname, long optval, long optlen); void __sanitizer_syscall_pre_impl_bind(long arg0, long arg1, long arg2); void __sanitizer_syscall_post_impl_bind(long res, long arg0, long arg1, long arg2); void __sanitizer_syscall_pre_impl_connect(long arg0, long arg1, long arg2); void __sanitizer_syscall_post_impl_connect(long res, long arg0, long arg1, long arg2); void __sanitizer_syscall_pre_impl_accept(long arg0, long arg1, long arg2); void __sanitizer_syscall_post_impl_accept(long res, long arg0, long arg1, long arg2); void __sanitizer_syscall_pre_impl_accept4(long arg0, long arg1, long arg2, long arg3); void __sanitizer_syscall_post_impl_accept4(long res, long arg0, long arg1, long arg2, long arg3); void __sanitizer_syscall_pre_impl_getsockname(long arg0, long arg1, long arg2); void __sanitizer_syscall_post_impl_getsockname(long res, long arg0, long arg1, long arg2); void __sanitizer_syscall_pre_impl_getpeername(long arg0, long arg1, long arg2); void __sanitizer_syscall_post_impl_getpeername(long res, long arg0, long arg1, long arg2); void __sanitizer_syscall_pre_impl_send(long arg0, long arg1, long arg2, long arg3); void __sanitizer_syscall_post_impl_send(long res, long arg0, long arg1, long arg2, long arg3); void __sanitizer_syscall_pre_impl_sendto(long arg0, long arg1, long arg2, long arg3, long arg4, long arg5); void __sanitizer_syscall_post_impl_sendto(long res, long arg0, long arg1, long arg2, long arg3, long arg4, long arg5); void __sanitizer_syscall_pre_impl_sendmsg(long fd, long msg, long flags); void __sanitizer_syscall_post_impl_sendmsg(long res, long fd, long msg, long flags); void __sanitizer_syscall_pre_impl_sendmmsg(long fd, long msg, long vlen, long flags); void __sanitizer_syscall_post_impl_sendmmsg(long res, long fd, long msg, long vlen, long flags); void __sanitizer_syscall_pre_impl_recv(long arg0, long arg1, long arg2, long arg3); void __sanitizer_syscall_post_impl_recv(long res, long arg0, long arg1, long arg2, long arg3); void __sanitizer_syscall_pre_impl_recvfrom(long arg0, long arg1, long arg2, long arg3, long arg4, long arg5); void __sanitizer_syscall_post_impl_recvfrom(long res, long arg0, long arg1, long arg2, long arg3, long arg4, long arg5); void __sanitizer_syscall_pre_impl_recvmsg(long fd, long msg, long flags); void __sanitizer_syscall_post_impl_recvmsg(long res, long fd, long msg, long flags); void __sanitizer_syscall_pre_impl_recvmmsg(long fd, long msg, long vlen, long flags, long timeout); void __sanitizer_syscall_post_impl_recvmmsg(long res, long fd, long msg, long vlen, long flags, long timeout); void __sanitizer_syscall_pre_impl_socket(long arg0, long arg1, long arg2); void __sanitizer_syscall_post_impl_socket(long res, long arg0, long arg1, long arg2); void __sanitizer_syscall_pre_impl_socketpair(long arg0, long arg1, long arg2, long arg3); void __sanitizer_syscall_post_impl_socketpair(long res, long arg0, long arg1, long arg2, long arg3); void __sanitizer_syscall_pre_impl_socketcall(long call, long args); void __sanitizer_syscall_post_impl_socketcall(long res, long call, long args); void __sanitizer_syscall_pre_impl_listen(long arg0, long arg1); void __sanitizer_syscall_post_impl_listen(long res, long arg0, long arg1); void __sanitizer_syscall_pre_impl_poll(long ufds, long nfds, long timeout); void __sanitizer_syscall_post_impl_poll(long res, long ufds, long nfds, long timeout); void __sanitizer_syscall_pre_impl_select(long n, long inp, long outp, long exp, long tvp); void __sanitizer_syscall_post_impl_select(long res, long n, long inp, long outp, long exp, long tvp); void __sanitizer_syscall_pre_impl_old_select(long arg); void __sanitizer_syscall_post_impl_old_select(long res, long arg); void __sanitizer_syscall_pre_impl_epoll_create(long size); void __sanitizer_syscall_post_impl_epoll_create(long res, long size); void __sanitizer_syscall_pre_impl_epoll_create1(long flags); void __sanitizer_syscall_post_impl_epoll_create1(long res, long flags); void __sanitizer_syscall_pre_impl_epoll_ctl(long epfd, long op, long fd, long event); void __sanitizer_syscall_post_impl_epoll_ctl(long res, long epfd, long op, long fd, long event); void __sanitizer_syscall_pre_impl_epoll_wait(long epfd, long events, long maxevents, long timeout); void __sanitizer_syscall_post_impl_epoll_wait(long res, long epfd, long events, long maxevents, long timeout); void __sanitizer_syscall_pre_impl_epoll_pwait(long epfd, long events, long maxevents, long timeout, long sigmask, long sigsetsize); void __sanitizer_syscall_post_impl_epoll_pwait(long res, long epfd, long events, long maxevents, long timeout, long sigmask, long sigsetsize); void __sanitizer_syscall_pre_impl_epoll_pwait2(long epfd, long events, long maxevents, long timeout, long sigmask, long sigsetsize); void __sanitizer_syscall_post_impl_epoll_pwait2(long res, long epfd, long events, long maxevents, long timeout, long sigmask, long sigsetsize); void __sanitizer_syscall_pre_impl_gethostname(long name, long len); void __sanitizer_syscall_post_impl_gethostname(long res, long name, long len); void __sanitizer_syscall_pre_impl_sethostname(long name, long len); void __sanitizer_syscall_post_impl_sethostname(long res, long name, long len); void __sanitizer_syscall_pre_impl_setdomainname(long name, long len); void __sanitizer_syscall_post_impl_setdomainname(long res, long name, long len); void __sanitizer_syscall_pre_impl_newuname(long name); void __sanitizer_syscall_post_impl_newuname(long res, long name); void __sanitizer_syscall_pre_impl_uname(long arg0); void __sanitizer_syscall_post_impl_uname(long res, long arg0); void __sanitizer_syscall_pre_impl_olduname(long arg0); void __sanitizer_syscall_post_impl_olduname(long res, long arg0); void __sanitizer_syscall_pre_impl_getrlimit(long resource, long rlim); void __sanitizer_syscall_post_impl_getrlimit(long res, long resource, long rlim); void __sanitizer_syscall_pre_impl_old_getrlimit(long resource, long rlim); void __sanitizer_syscall_post_impl_old_getrlimit(long res, long resource, long rlim); void __sanitizer_syscall_pre_impl_setrlimit(long resource, long rlim); void __sanitizer_syscall_post_impl_setrlimit(long res, long resource, long rlim); void __sanitizer_syscall_pre_impl_prlimit64(long pid, long resource, long new_rlim, long old_rlim); void __sanitizer_syscall_post_impl_prlimit64(long res, long pid, long resource, long new_rlim, long old_rlim); void __sanitizer_syscall_pre_impl_getrusage(long who, long ru); void __sanitizer_syscall_post_impl_getrusage(long res, long who, long ru); void __sanitizer_syscall_pre_impl_umask(long mask); void __sanitizer_syscall_post_impl_umask(long res, long mask); void __sanitizer_syscall_pre_impl_msgget(long key, long msgflg); void __sanitizer_syscall_post_impl_msgget(long res, long key, long msgflg); void __sanitizer_syscall_pre_impl_msgsnd(long msqid, long msgp, long msgsz, long msgflg); void __sanitizer_syscall_post_impl_msgsnd(long res, long msqid, long msgp, long msgsz, long msgflg); void __sanitizer_syscall_pre_impl_msgrcv(long msqid, long msgp, long msgsz, long msgtyp, long msgflg); void __sanitizer_syscall_post_impl_msgrcv(long res, long msqid, long msgp, long msgsz, long msgtyp, long msgflg); void __sanitizer_syscall_pre_impl_msgctl(long msqid, long cmd, long buf); void __sanitizer_syscall_post_impl_msgctl(long res, long msqid, long cmd, long buf); void __sanitizer_syscall_pre_impl_semget(long key, long nsems, long semflg); void __sanitizer_syscall_post_impl_semget(long res, long key, long nsems, long semflg); void __sanitizer_syscall_pre_impl_semop(long semid, long sops, long nsops); void __sanitizer_syscall_post_impl_semop(long res, long semid, long sops, long nsops); void __sanitizer_syscall_pre_impl_semctl(long semid, long semnum, long cmd, long arg); void __sanitizer_syscall_post_impl_semctl(long res, long semid, long semnum, long cmd, long arg); void __sanitizer_syscall_pre_impl_semtimedop(long semid, long sops, long nsops, long timeout); void __sanitizer_syscall_post_impl_semtimedop(long res, long semid, long sops, long nsops, long timeout); void __sanitizer_syscall_pre_impl_shmat(long shmid, long shmaddr, long shmflg); void __sanitizer_syscall_post_impl_shmat(long res, long shmid, long shmaddr, long shmflg); void __sanitizer_syscall_pre_impl_shmget(long key, long size, long flag); void __sanitizer_syscall_post_impl_shmget(long res, long key, long size, long flag); void __sanitizer_syscall_pre_impl_shmdt(long shmaddr); void __sanitizer_syscall_post_impl_shmdt(long res, long shmaddr); void __sanitizer_syscall_pre_impl_shmctl(long shmid, long cmd, long buf); void __sanitizer_syscall_post_impl_shmctl(long res, long shmid, long cmd, long buf); void __sanitizer_syscall_pre_impl_ipc(long call, long first, long second, long third, long ptr, long fifth); void __sanitizer_syscall_post_impl_ipc(long res, long call, long first, long second, long third, long ptr, long fifth); void __sanitizer_syscall_pre_impl_mq_open(long name, long oflag, long mode, long attr); void __sanitizer_syscall_post_impl_mq_open(long res, long name, long oflag, long mode, long attr); void __sanitizer_syscall_pre_impl_mq_unlink(long name); void __sanitizer_syscall_post_impl_mq_unlink(long res, long name); void __sanitizer_syscall_pre_impl_mq_timedsend(long mqdes, long msg_ptr, long msg_len, long msg_prio, long abs_timeout); void __sanitizer_syscall_post_impl_mq_timedsend(long res, long mqdes, long msg_ptr, long msg_len, long msg_prio, long abs_timeout); void __sanitizer_syscall_pre_impl_mq_timedreceive(long mqdes, long msg_ptr, long msg_len, long msg_prio, long abs_timeout); void __sanitizer_syscall_post_impl_mq_timedreceive(long res, long mqdes, long msg_ptr, long msg_len, long msg_prio, long abs_timeout); void __sanitizer_syscall_pre_impl_mq_notify(long mqdes, long notification); void __sanitizer_syscall_post_impl_mq_notify(long res, long mqdes, long notification); void __sanitizer_syscall_pre_impl_mq_getsetattr(long mqdes, long mqstat, long omqstat); void __sanitizer_syscall_post_impl_mq_getsetattr(long res, long mqdes, long mqstat, long omqstat); void __sanitizer_syscall_pre_impl_pciconfig_iobase(long which, long bus, long devfn); void __sanitizer_syscall_post_impl_pciconfig_iobase(long res, long which, long bus, long devfn); void __sanitizer_syscall_pre_impl_pciconfig_read(long bus, long dfn, long off, long len, long buf); void __sanitizer_syscall_post_impl_pciconfig_read(long res, long bus, long dfn, long off, long len, long buf); void __sanitizer_syscall_pre_impl_pciconfig_write(long bus, long dfn, long off, long len, long buf); void __sanitizer_syscall_post_impl_pciconfig_write(long res, long bus, long dfn, long off, long len, long buf); void __sanitizer_syscall_pre_impl_swapon(long specialfile, long swap_flags); void __sanitizer_syscall_post_impl_swapon(long res, long specialfile, long swap_flags); void __sanitizer_syscall_pre_impl_swapoff(long specialfile); void __sanitizer_syscall_post_impl_swapoff(long res, long specialfile); void __sanitizer_syscall_pre_impl_sysctl(long args); void __sanitizer_syscall_post_impl_sysctl(long res, long args); void __sanitizer_syscall_pre_impl_sysinfo(long info); void __sanitizer_syscall_post_impl_sysinfo(long res, long info); void __sanitizer_syscall_pre_impl_sysfs(long option, long arg1, long arg2); void __sanitizer_syscall_post_impl_sysfs(long res, long option, long arg1, long arg2); void __sanitizer_syscall_pre_impl_syslog(long type, long buf, long len); void __sanitizer_syscall_post_impl_syslog(long res, long type, long buf, long len); void __sanitizer_syscall_pre_impl_uselib(long library); void __sanitizer_syscall_post_impl_uselib(long res, long library); void __sanitizer_syscall_pre_impl_ni_syscall(); void __sanitizer_syscall_post_impl_ni_syscall(long res); void __sanitizer_syscall_pre_impl_ptrace(long request, long pid, long addr, long data); void __sanitizer_syscall_post_impl_ptrace(long res, long request, long pid, long addr, long data); void __sanitizer_syscall_pre_impl_add_key(long _type, long _description, long _payload, long plen, long destringid); void __sanitizer_syscall_post_impl_add_key(long res, long _type, long _description, long _payload, long plen, long destringid); void __sanitizer_syscall_pre_impl_request_key(long _type, long _description, long _callout_info, long destringid); void __sanitizer_syscall_post_impl_request_key(long res, long _type, long _description, long _callout_info, long destringid); void __sanitizer_syscall_pre_impl_keyctl(long cmd, long arg2, long arg3, long arg4, long arg5); void __sanitizer_syscall_post_impl_keyctl(long res, long cmd, long arg2, long arg3, long arg4, long arg5); void __sanitizer_syscall_pre_impl_ioprio_set(long which, long who, long ioprio); void __sanitizer_syscall_post_impl_ioprio_set(long res, long which, long who, long ioprio); void __sanitizer_syscall_pre_impl_ioprio_get(long which, long who); void __sanitizer_syscall_post_impl_ioprio_get(long res, long which, long who); void __sanitizer_syscall_pre_impl_set_mempolicy(long mode, long nmask, long maxnode); void __sanitizer_syscall_post_impl_set_mempolicy(long res, long mode, long nmask, long maxnode); void __sanitizer_syscall_pre_impl_migrate_pages(long pid, long maxnode, long from, long to); void __sanitizer_syscall_post_impl_migrate_pages(long res, long pid, long maxnode, long from, long to); void __sanitizer_syscall_pre_impl_move_pages(long pid, long nr_pages, long pages, long nodes, long status, long flags); void __sanitizer_syscall_post_impl_move_pages(long res, long pid, long nr_pages, long pages, long nodes, long status, long flags); void __sanitizer_syscall_pre_impl_mbind(long start, long len, long mode, long nmask, long maxnode, long flags); void __sanitizer_syscall_post_impl_mbind(long res, long start, long len, long mode, long nmask, long maxnode, long flags); void __sanitizer_syscall_pre_impl_get_mempolicy(long policy, long nmask, long maxnode, long addr, long flags); void __sanitizer_syscall_post_impl_get_mempolicy(long res, long policy, long nmask, long maxnode, long addr, long flags); void __sanitizer_syscall_pre_impl_inotify_init(); void __sanitizer_syscall_post_impl_inotify_init(long res); void __sanitizer_syscall_pre_impl_inotify_init1(long flags); void __sanitizer_syscall_post_impl_inotify_init1(long res, long flags); void __sanitizer_syscall_pre_impl_inotify_add_watch(long fd, long path, long mask); void __sanitizer_syscall_post_impl_inotify_add_watch(long res, long fd, long path, long mask); void __sanitizer_syscall_pre_impl_inotify_rm_watch(long fd, long wd); void __sanitizer_syscall_post_impl_inotify_rm_watch(long res, long fd, long wd); void __sanitizer_syscall_pre_impl_spu_run(long fd, long unpc, long ustatus); void __sanitizer_syscall_post_impl_spu_run(long res, long fd, long unpc, long ustatus); void __sanitizer_syscall_pre_impl_spu_create(long name, long flags, long mode, long fd); void __sanitizer_syscall_post_impl_spu_create(long res, long name, long flags, long mode, long fd); void __sanitizer_syscall_pre_impl_mknodat(long dfd, long filename, long mode, long dev); void __sanitizer_syscall_post_impl_mknodat(long res, long dfd, long filename, long mode, long dev); void __sanitizer_syscall_pre_impl_mkdirat(long dfd, long pathname, long mode); void __sanitizer_syscall_post_impl_mkdirat(long res, long dfd, long pathname, long mode); void __sanitizer_syscall_pre_impl_unlinkat(long dfd, long pathname, long flag); void __sanitizer_syscall_post_impl_unlinkat(long res, long dfd, long pathname, long flag); void __sanitizer_syscall_pre_impl_symlinkat(long oldname, long newdfd, long newname); void __sanitizer_syscall_post_impl_symlinkat(long res, long oldname, long newdfd, long newname); void __sanitizer_syscall_pre_impl_linkat(long olddfd, long oldname, long newdfd, long newname, long flags); void __sanitizer_syscall_post_impl_linkat(long res, long olddfd, long oldname, long newdfd, long newname, long flags); void __sanitizer_syscall_pre_impl_renameat(long olddfd, long oldname, long newdfd, long newname); void __sanitizer_syscall_post_impl_renameat(long res, long olddfd, long oldname, long newdfd, long newname); void __sanitizer_syscall_pre_impl_futimesat(long dfd, long filename, long utimes); void __sanitizer_syscall_post_impl_futimesat(long res, long dfd, long filename, long utimes); void __sanitizer_syscall_pre_impl_faccessat(long dfd, long filename, long mode); void __sanitizer_syscall_post_impl_faccessat(long res, long dfd, long filename, long mode); void __sanitizer_syscall_pre_impl_fchmodat(long dfd, long filename, long mode); void __sanitizer_syscall_post_impl_fchmodat(long res, long dfd, long filename, long mode); void __sanitizer_syscall_pre_impl_fchownat(long dfd, long filename, long user, long group, long flag); void __sanitizer_syscall_post_impl_fchownat(long res, long dfd, long filename, long user, long group, long flag); void __sanitizer_syscall_pre_impl_openat(long dfd, long filename, long flags, long mode); void __sanitizer_syscall_post_impl_openat(long res, long dfd, long filename, long flags, long mode); void __sanitizer_syscall_pre_impl_newfstatat(long dfd, long filename, long statbuf, long flag); void __sanitizer_syscall_post_impl_newfstatat(long res, long dfd, long filename, long statbuf, long flag); void __sanitizer_syscall_pre_impl_fstatat64(long dfd, long filename, long statbuf, long flag); void __sanitizer_syscall_post_impl_fstatat64(long res, long dfd, long filename, long statbuf, long flag); void __sanitizer_syscall_pre_impl_readlinkat(long dfd, long path, long buf, long bufsiz); void __sanitizer_syscall_post_impl_readlinkat(long res, long dfd, long path, long buf, long bufsiz); void __sanitizer_syscall_pre_impl_utimensat(long dfd, long filename, long utimes, long flags); void __sanitizer_syscall_post_impl_utimensat(long res, long dfd, long filename, long utimes, long flags); void __sanitizer_syscall_pre_impl_unshare(long unshare_flags); void __sanitizer_syscall_post_impl_unshare(long res, long unshare_flags); void __sanitizer_syscall_pre_impl_splice(long fd_in, long off_in, long fd_out, long off_out, long len, long flags); void __sanitizer_syscall_post_impl_splice(long res, long fd_in, long off_in, long fd_out, long off_out, long len, long flags); void __sanitizer_syscall_pre_impl_vmsplice(long fd, long iov, long nr_segs, long flags); void __sanitizer_syscall_post_impl_vmsplice(long res, long fd, long iov, long nr_segs, long flags); void __sanitizer_syscall_pre_impl_tee(long fdin, long fdout, long len, long flags); void __sanitizer_syscall_post_impl_tee(long res, long fdin, long fdout, long len, long flags); void __sanitizer_syscall_pre_impl_get_robust_list(long pid, long head_ptr, long len_ptr); void __sanitizer_syscall_post_impl_get_robust_list(long res, long pid, long head_ptr, long len_ptr); void __sanitizer_syscall_pre_impl_set_robust_list(long head, long len); void __sanitizer_syscall_post_impl_set_robust_list(long res, long head, long len); void __sanitizer_syscall_pre_impl_getcpu(long cpu, long node, long cache); void __sanitizer_syscall_post_impl_getcpu(long res, long cpu, long node, long cache); void __sanitizer_syscall_pre_impl_signalfd(long ufd, long user_mask, long sizemask); void __sanitizer_syscall_post_impl_signalfd(long res, long ufd, long user_mask, long sizemask); void __sanitizer_syscall_pre_impl_signalfd4(long ufd, long user_mask, long sizemask, long flags); void __sanitizer_syscall_post_impl_signalfd4(long res, long ufd, long user_mask, long sizemask, long flags); void __sanitizer_syscall_pre_impl_timerfd_create(long clockid, long flags); void __sanitizer_syscall_post_impl_timerfd_create(long res, long clockid, long flags); void __sanitizer_syscall_pre_impl_timerfd_settime(long ufd, long flags, long utmr, long otmr); void __sanitizer_syscall_post_impl_timerfd_settime(long res, long ufd, long flags, long utmr, long otmr); void __sanitizer_syscall_pre_impl_timerfd_gettime(long ufd, long otmr); void __sanitizer_syscall_post_impl_timerfd_gettime(long res, long ufd, long otmr); void __sanitizer_syscall_pre_impl_eventfd(long count); void __sanitizer_syscall_post_impl_eventfd(long res, long count); void __sanitizer_syscall_pre_impl_eventfd2(long count, long flags); void __sanitizer_syscall_post_impl_eventfd2(long res, long count, long flags); void __sanitizer_syscall_pre_impl_old_readdir(long arg0, long arg1, long arg2); void __sanitizer_syscall_post_impl_old_readdir(long res, long arg0, long arg1, long arg2); void __sanitizer_syscall_pre_impl_pselect6(long arg0, long arg1, long arg2, long arg3, long arg4, long arg5); void __sanitizer_syscall_post_impl_pselect6(long res, long arg0, long arg1, long arg2, long arg3, long arg4, long arg5); void __sanitizer_syscall_pre_impl_ppoll(long arg0, long arg1, long arg2, long arg3, long arg4); void __sanitizer_syscall_post_impl_ppoll(long res, long arg0, long arg1, long arg2, long arg3, long arg4); void __sanitizer_syscall_pre_impl_fanotify_init(long flags, long event_f_flags); void __sanitizer_syscall_post_impl_fanotify_init(long res, long flags, long event_f_flags); void __sanitizer_syscall_pre_impl_fanotify_mark(long fanotify_fd, long flags, long mask, long fd, long pathname); void __sanitizer_syscall_post_impl_fanotify_mark(long res, long fanotify_fd, long flags, long mask, long fd, long pathname); void __sanitizer_syscall_pre_impl_syncfs(long fd); void __sanitizer_syscall_post_impl_syncfs(long res, long fd); void __sanitizer_syscall_pre_impl_perf_event_open(long attr_uptr, long pid, long cpu, long group_fd, long flags); void __sanitizer_syscall_post_impl_perf_event_open(long res, long attr_uptr, long pid, long cpu, long group_fd, long flags); void __sanitizer_syscall_pre_impl_mmap_pgoff(long addr, long len, long prot, long flags, long fd, long pgoff); void __sanitizer_syscall_post_impl_mmap_pgoff(long res, long addr, long len, long prot, long flags, long fd, long pgoff); void __sanitizer_syscall_pre_impl_old_mmap(long arg); void __sanitizer_syscall_post_impl_old_mmap(long res, long arg); void __sanitizer_syscall_pre_impl_name_to_handle_at(long dfd, long name, long handle, long mnt_id, long flag); void __sanitizer_syscall_post_impl_name_to_handle_at(long res, long dfd, long name, long handle, long mnt_id, long flag); void __sanitizer_syscall_pre_impl_open_by_handle_at(long mountdirfd, long handle, long flags); void __sanitizer_syscall_post_impl_open_by_handle_at(long res, long mountdirfd, long handle, long flags); void __sanitizer_syscall_pre_impl_setns(long fd, long nstype); void __sanitizer_syscall_post_impl_setns(long res, long fd, long nstype); void __sanitizer_syscall_pre_impl_process_vm_readv(long pid, long lvec, long liovcnt, long rvec, long riovcnt, long flags); void __sanitizer_syscall_post_impl_process_vm_readv(long res, long pid, long lvec, long liovcnt, long rvec, long riovcnt, long flags); void __sanitizer_syscall_pre_impl_process_vm_writev(long pid, long lvec, long liovcnt, long rvec, long riovcnt, long flags); void __sanitizer_syscall_post_impl_process_vm_writev(long res, long pid, long lvec, long liovcnt, long rvec, long riovcnt, long flags); void __sanitizer_syscall_pre_impl_fork(); void __sanitizer_syscall_post_impl_fork(long res); void __sanitizer_syscall_pre_impl_vfork(); void __sanitizer_syscall_post_impl_vfork(long res); void __sanitizer_syscall_pre_impl_sigaction(long signum, long act, long oldact); void __sanitizer_syscall_post_impl_sigaction(long res, long signum, long act, long oldact); void __sanitizer_syscall_pre_impl_rt_sigaction(long signum, long act, long oldact, long sz); void __sanitizer_syscall_post_impl_rt_sigaction(long res, long signum, long act, long oldact, long sz); void __sanitizer_syscall_pre_impl_sigaltstack(long ss, long oss); void __sanitizer_syscall_post_impl_sigaltstack(long res, long ss, long oss); #ifdef __cplusplus } // extern "C" #endif #endif // SANITIZER_LINUX_SYSCALL_HOOKS_H sanitizer/tsan_interface_atomic.hxray/xray_interface.hfread(&content[0], size, 1, handle)Expected an array of objects.Unknown policy: pattern too large - compile failedAnyBad hex digit [:^xdigit:]ChakmaCopticImperial_AramaicLimbuMandaicOld_ItalicSkTagbanwaCheck %s failed: %sregion->header.magic == Magic(kMagicUnallocated, ®ion->header)unexpected successyfloatoperatordarMrSUNAUTHENTICATEDRequested suffix size external/abseil-cpp/absl/strings/internal/cord_rep_btree.ccNANillegal skip from headSignal on #types=NIST P-384NONE_LIBBNDiffie-Hellman routinesPASSED_NULL_PARAMETERexternal/boringssl/src/crypto/evp/p_ec_asn1.ckythe.proto.CompilationUnit.working_directorykythe.proto.Entry.edge_kind[^-].*\.(C|c\+\+|cc|cp|cpp|cxx|CPP|ii)-wrapperError closing kzip output: Check failed: writer.ok()/kythe_cxx_extractor_rootExternCSystem__clang_hip_math.h__clang_hip_runtime_wrapper.h/*===---- __clang_hip_runtime_wrapper.h - HIP runtime support ---------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ /* * WARNING: This header is intended to be directly -include'd by * the compiler and is not supposed to be included by users. * */ #ifndef __CLANG_HIP_RUNTIME_WRAPPER_H__ #define __CLANG_HIP_RUNTIME_WRAPPER_H__ #if __HIP__ #define __host__ __attribute__((host)) #define __device__ __attribute__((device)) #define __global__ __attribute__((global)) #define __shared__ __attribute__((shared)) #define __constant__ __attribute__((constant)) #define __managed__ __attribute__((managed)) #if !defined(__cplusplus) || __cplusplus < 201103L #define nullptr NULL; #endif #ifdef __cplusplus extern "C" { __attribute__((__visibility__("default"))) __attribute__((weak)) __attribute__((noreturn)) __device__ void __cxa_pure_virtual(void) { __builtin_trap(); } __attribute__((__visibility__("default"))) __attribute__((weak)) __attribute__((noreturn)) __device__ void __cxa_deleted_virtual(void) { __builtin_trap(); } } #endif //__cplusplus #if !defined(__HIPCC_RTC__) #if __has_include("hip/hip_version.h") #include "hip/hip_version.h" #endif // __has_include("hip/hip_version.h") #endif // __HIPCC_RTC__ typedef __SIZE_TYPE__ __hip_size_t; #ifdef __cplusplus extern "C" { #endif //__cplusplus #if HIP_VERSION_MAJOR * 100 + HIP_VERSION_MINOR >= 405 __device__ unsigned long long __ockl_dm_alloc(unsigned long long __size); __device__ void __ockl_dm_dealloc(unsigned long long __addr); #if __has_feature(address_sanitizer) __device__ unsigned long long __asan_malloc_impl(unsigned long long __size, unsigned long long __pc); __device__ void __asan_free_impl(unsigned long long __addr, unsigned long long __pc); __attribute__((noinline, weak)) __device__ void *malloc(__hip_size_t __size) { unsigned long long __pc = (unsigned long long)__builtin_return_address(0); return (void *)__asan_malloc_impl(__size, __pc); } __attribute__((noinline, weak)) __device__ void free(void *__ptr) { unsigned long long __pc = (unsigned long long)__builtin_return_address(0); __asan_free_impl((unsigned long long)__ptr, __pc); } #else // __has_feature(address_sanitizer) __attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) { return (void *) __ockl_dm_alloc(__size); } __attribute__((weak)) inline __device__ void free(void *__ptr) { __ockl_dm_dealloc((unsigned long long)__ptr); } #endif // __has_feature(address_sanitizer) #else // HIP version check #if __HIP_ENABLE_DEVICE_MALLOC__ __device__ void *__hip_malloc(__hip_size_t __size); __device__ void *__hip_free(void *__ptr); __attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) { return __hip_malloc(__size); } __attribute__((weak)) inline __device__ void free(void *__ptr) { __hip_free(__ptr); } #else // __HIP_ENABLE_DEVICE_MALLOC__ __attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) { __builtin_trap(); return (void *)0; } __attribute__((weak)) inline __device__ void free(void *__ptr) { __builtin_trap(); } #endif // __HIP_ENABLE_DEVICE_MALLOC__ #endif // HIP version check #ifdef __cplusplus } // extern "C" #endif //__cplusplus #if !defined(__HIPCC_RTC__) #include #include #include #if __has_include("hip/hip_version.h") #include "hip/hip_version.h" #endif // __has_include("hip/hip_version.h") #else typedef __SIZE_TYPE__ size_t; // Define macros which are needed to declare HIP device API's without standard // C/C++ headers. This is for readability so that these API's can be written // the same way as non-hipRTC use case. These macros need to be popped so that // they do not pollute users' name space. #pragma push_macro("NULL") #pragma push_macro("uint32_t") #pragma push_macro("uint64_t") #pragma push_macro("CHAR_BIT") #pragma push_macro("INT_MAX") #define NULL (void *)0 #define uint32_t __UINT32_TYPE__ #define uint64_t __UINT64_TYPE__ #define CHAR_BIT __CHAR_BIT__ #define INT_MAX __INTMAX_MAX__ #endif // __HIPCC_RTC__ #include <__clang_hip_libdevice_declares.h> #include <__clang_hip_math.h> #include <__clang_hip_stdlib.h> #if defined(__HIPCC_RTC__) #include <__clang_hip_cmath.h> #else #include <__clang_cuda_math_forward_declares.h> #include <__clang_hip_cmath.h> #include <__clang_cuda_complex_builtins.h> #include #include #include #endif // __HIPCC_RTC__ #define __CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__ 1 #if defined(__HIPCC_RTC__) #pragma pop_macro("NULL") #pragma pop_macro("uint32_t") #pragma pop_macro("uint64_t") #pragma pop_macro("CHAR_BIT") #pragma pop_macro("INT_MAX") #endif // __HIPCC_RTC__ #endif // __HIP__ #endif // __CLANG_HIP_RUNTIME_WRAPPER_H__ /*===---- __stdarg_va_arg.h - Definitions of va_start, va_arg, va_end-------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef va_arg #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L /* C23 does not require the second parameter for va_start. */ #define va_start(ap, ...) __builtin_va_start(ap, 0) #else /* Versions before C23 do require the second parameter. */ #define va_start(ap, param) __builtin_va_start(ap, param) #endif #define va_end(ap) __builtin_va_end(ap) #define va_arg(ap, type) __builtin_va_arg(ap, type) #endif __stddef_ptrdiff_t.himmintrin.hmm_malloc.hmsa.hopencl-c.hstdatomic.h/*===---- xmmintrin.h - SSE intrinsics -------------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __XMMINTRIN_H #define __XMMINTRIN_H #if !defined(__i386__) && !defined(__x86_64__) #error "This header is only meant to be used on x86 and x64 architecture" #endif #include typedef int __v4si __attribute__((__vector_size__(16))); typedef float __v4sf __attribute__((__vector_size__(16))); typedef float __m128 __attribute__((__vector_size__(16), __aligned__(16))); typedef float __m128_u __attribute__((__vector_size__(16), __aligned__(1))); /* Unsigned types */ typedef unsigned int __v4su __attribute__((__vector_size__(16))); /* This header should only be included in a hosted environment as it depends on * a standard library to provide allocation routines. */ #if __STDC_HOSTED__ #include #endif /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("sse,no-evex512"), \ __min_vector_width__(128))) #define __DEFAULT_FN_ATTRS_MMX \ __attribute__((__always_inline__, __nodebug__, \ __target__("mmx,sse,no-evex512"), __min_vector_width__(64))) /// Adds the 32-bit float values in the low-order bits of the operands. /// /// \headerfile /// /// This intrinsic corresponds to the VADDSS / ADDSS instructions. /// /// \param __a /// A 128-bit vector of [4 x float] containing one of the source operands. /// The lower 32 bits of this operand are used in the calculation. /// \param __b /// A 128-bit vector of [4 x float] containing one of the source operands. /// The lower 32 bits of this operand are used in the calculation. /// \returns A 128-bit vector of [4 x float] whose lower 32 bits contain the sum /// of the lower 32 bits of both operands. The upper 96 bits are copied from /// the upper 96 bits of the first source operand. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_add_ss(__m128 __a, __m128 __b) { __a[0] += __b[0]; return __a; } /// Adds two 128-bit vectors of [4 x float], and returns the results of /// the addition. /// /// \headerfile /// /// This intrinsic corresponds to the VADDPS / ADDPS instructions. /// /// \param __a /// A 128-bit vector of [4 x float] containing one of the source operands. /// \param __b /// A 128-bit vector of [4 x float] containing one of the source operands. /// \returns A 128-bit vector of [4 x float] containing the sums of both /// operands. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_add_ps(__m128 __a, __m128 __b) { return (__m128)((__v4sf)__a + (__v4sf)__b); } /// Subtracts the 32-bit float value in the low-order bits of the second /// operand from the corresponding value in the first operand. /// /// \headerfile /// /// This intrinsic corresponds to the VSUBSS / SUBSS instructions. /// /// \param __a /// A 128-bit vector of [4 x float] containing the minuend. The lower 32 bits /// of this operand are used in the calculation. /// \param __b /// A 128-bit vector of [4 x float] containing the subtrahend. The lower 32 /// bits of this operand are used in the calculation. /// \returns A 128-bit vector of [4 x float] whose lower 32 bits contain the /// difference of the lower 32 bits of both operands. The upper 96 bits are /// copied from the upper 96 bits of the first source operand. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_sub_ss(__m128 __a, __m128 __b) { __a[0] -= __b[0]; return __a; } /// Subtracts each of the values of the second operand from the first /// operand, both of which are 128-bit vectors of [4 x float] and returns /// the results of the subtraction. /// /// \headerfile /// /// This intrinsic corresponds to the VSUBPS / SUBPS instructions. /// /// \param __a /// A 128-bit vector of [4 x float] containing the minuend. /// \param __b /// A 128-bit vector of [4 x float] containing the subtrahend. /// \returns A 128-bit vector of [4 x float] containing the differences between /// both operands. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_sub_ps(__m128 __a, __m128 __b) { return (__m128)((__v4sf)__a - (__v4sf)__b); } /// Multiplies two 32-bit float values in the low-order bits of the /// operands. /// /// \headerfile /// /// This intrinsic corresponds to the VMULSS / MULSS instructions. /// /// \param __a /// A 128-bit vector of [4 x float] containing one of the source operands. /// The lower 32 bits of this operand are used in the calculation. /// \param __b /// A 128-bit vector of [4 x float] containing one of the source operands. /// The lower 32 bits of this operand are used in the calculation. /// \returns A 128-bit vector of [4 x float] containing the product of the lower /// 32 bits of both operands. The upper 96 bits are copied from the upper 96 /// bits of the first source operand. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mul_ss(__m128 __a, __m128 __b) { __a[0] *= __b[0]; return __a; } /// Multiplies two 128-bit vectors of [4 x float] and returns the /// results of the multiplication. /// /// \headerfile /// /// This intrinsic corresponds to the VMULPS / MULPS instructions. /// /// \param __a /// A 128-bit vector of [4 x float] containing one of the source operands. /// \param __b /// A 128-bit vector of [4 x float] containing one of the source operands. /// \returns A 128-bit vector of [4 x float] containing the products of both /// operands. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mul_ps(__m128 __a, __m128 __b) { return (__m128)((__v4sf)__a * (__v4sf)__b); } /// Divides the value in the low-order 32 bits of the first operand by /// the corresponding value in the second operand. /// /// \headerfile /// /// This intrinsic corresponds to the VDIVSS / DIVSS instructions. /// /// \param __a /// A 128-bit vector of [4 x float] containing the dividend. The lower 32 /// bits of this operand are used in the calculation. /// \param __b /// A 128-bit vector of [4 x float] containing the divisor. The lower 32 bits /// of this operand are used in the calculation. /// \returns A 128-bit vector of [4 x float] containing the quotients of the /// lower 32 bits of both operands. The upper 96 bits are copied from the /// upper 96 bits of the first source operand. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_div_ss(__m128 __a, __m128 __b) { __a[0] /= __b[0]; return __a; } /// Divides two 128-bit vectors of [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VDIVPS / DIVPS instructions. /// /// \param __a /// A 128-bit vector of [4 x float] containing the dividend. /// \param __b /// A 128-bit vector of [4 x float] containing the divisor. /// \returns A 128-bit vector of [4 x float] containing the quotients of both /// operands. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_div_ps(__m128 __a, __m128 __b) { return (__m128)((__v4sf)__a / (__v4sf)__b); } /// Calculates the square root of the value stored in the low-order bits /// of a 128-bit vector of [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VSQRTSS / SQRTSS instructions. /// /// \param __a /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the calculation. /// \returns A 128-bit vector of [4 x float] containing the square root of the /// value in the low-order bits of the operand. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_sqrt_ss(__m128 __a) { return (__m128)__builtin_ia32_sqrtss((__v4sf)__a); } /// Calculates the square roots of the values stored in a 128-bit vector /// of [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VSQRTPS / SQRTPS instructions. /// /// \param __a /// A 128-bit vector of [4 x float]. /// \returns A 128-bit vector of [4 x float] containing the square roots of the /// values in the operand. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_sqrt_ps(__m128 __a) { return __builtin_ia32_sqrtps((__v4sf)__a); } /// Calculates the approximate reciprocal of the value stored in the /// low-order bits of a 128-bit vector of [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VRCPSS / RCPSS instructions. /// /// \param __a /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the calculation. /// \returns A 128-bit vector of [4 x float] containing the approximate /// reciprocal of the value in the low-order bits of the operand. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_rcp_ss(__m128 __a) { return (__m128)__builtin_ia32_rcpss((__v4sf)__a); } /// Calculates the approximate reciprocals of the values stored in a /// 128-bit vector of [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VRCPPS / RCPPS instructions. /// /// \param __a /// A 128-bit vector of [4 x float]. /// \returns A 128-bit vector of [4 x float] containing the approximate /// reciprocals of the values in the operand. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_rcp_ps(__m128 __a) { return (__m128)__builtin_ia32_rcpps((__v4sf)__a); } /// Calculates the approximate reciprocal of the square root of the value /// stored in the low-order bits of a 128-bit vector of [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VRSQRTSS / RSQRTSS instructions. /// /// \param __a /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the calculation. /// \returns A 128-bit vector of [4 x float] containing the approximate /// reciprocal of the square root of the value in the low-order bits of the /// operand. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_rsqrt_ss(__m128 __a) { return __builtin_ia32_rsqrtss((__v4sf)__a); } /// Calculates the approximate reciprocals of the square roots of the /// values stored in a 128-bit vector of [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VRSQRTPS / RSQRTPS instructions. /// /// \param __a /// A 128-bit vector of [4 x float]. /// \returns A 128-bit vector of [4 x float] containing the approximate /// reciprocals of the square roots of the values in the operand. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_rsqrt_ps(__m128 __a) { return __builtin_ia32_rsqrtps((__v4sf)__a); } /// Compares two 32-bit float values in the low-order bits of both /// operands and returns the lesser value in the low-order bits of the /// vector of [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VMINSS / MINSS instructions. /// /// \param __a /// A 128-bit vector of [4 x float] containing one of the operands. The lower /// 32 bits of this operand are used in the comparison. /// \param __b /// A 128-bit vector of [4 x float] containing one of the operands. The lower /// 32 bits of this operand are used in the comparison. /// \returns A 128-bit vector of [4 x float] whose lower 32 bits contain the /// minimum value between both operands. The upper 96 bits are copied from /// the upper 96 bits of the first source operand. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_min_ss(__m128 __a, __m128 __b) { return __builtin_ia32_minss((__v4sf)__a, (__v4sf)__b); } /// Compares two 128-bit vectors of [4 x float] and returns the lesser /// of each pair of values. /// /// \headerfile /// /// This intrinsic corresponds to the VMINPS / MINPS instructions. /// /// \param __a /// A 128-bit vector of [4 x float] containing one of the operands. /// \param __b /// A 128-bit vector of [4 x float] containing one of the operands. /// \returns A 128-bit vector of [4 x float] containing the minimum values /// between both operands. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_min_ps(__m128 __a, __m128 __b) { return __builtin_ia32_minps((__v4sf)__a, (__v4sf)__b); } /// Compares two 32-bit float values in the low-order bits of both /// operands and returns the greater value in the low-order bits of a 128-bit /// vector of [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VMAXSS / MAXSS instructions. /// /// \param __a /// A 128-bit vector of [4 x float] containing one of the operands. The lower /// 32 bits of this operand are used in the comparison. /// \param __b /// A 128-bit vector of [4 x float] containing one of the operands. The lower /// 32 bits of this operand are used in the comparison. /// \returns A 128-bit vector of [4 x float] whose lower 32 bits contain the /// maximum value between both operands. The upper 96 bits are copied from /// the upper 96 bits of the first source operand. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_max_ss(__m128 __a, __m128 __b) { return __builtin_ia32_maxss((__v4sf)__a, (__v4sf)__b); } /// Compares two 128-bit vectors of [4 x float] and returns the greater /// of each pair of values. /// /// \headerfile /// /// This intrinsic corresponds to the VMAXPS / MAXPS instructions. /// /// \param __a /// A 128-bit vector of [4 x float] containing one of the operands. /// \param __b /// A 128-bit vector of [4 x float] containing one of the operands. /// \returns A 128-bit vector of [4 x float] containing the maximum values /// between both operands. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_max_ps(__m128 __a, __m128 __b) { return __builtin_ia32_maxps((__v4sf)__a, (__v4sf)__b); } /// Performs a bitwise AND of two 128-bit vectors of [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VANDPS / ANDPS instructions. /// /// \param __a /// A 128-bit vector containing one of the source operands. /// \param __b /// A 128-bit vector containing one of the source operands. /// \returns A 128-bit vector of [4 x float] containing the bitwise AND of the /// values between both operands. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_and_ps(__m128 __a, __m128 __b) { return (__m128)((__v4su)__a & (__v4su)__b); } /// Performs a bitwise AND of two 128-bit vectors of [4 x float], using /// the one's complement of the values contained in the first source /// operand. /// /// \headerfile /// /// This intrinsic corresponds to the VANDNPS / ANDNPS instructions. /// /// \param __a /// A 128-bit vector of [4 x float] containing the first source operand. The /// one's complement of this value is used in the bitwise AND. /// \param __b /// A 128-bit vector of [4 x float] containing the second source operand. /// \returns A 128-bit vector of [4 x float] containing the bitwise AND of the /// one's complement of the first operand and the values in the second /// operand. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_andnot_ps(__m128 __a, __m128 __b) { return (__m128)(~(__v4su)__a & (__v4su)__b); } /// Performs a bitwise OR of two 128-bit vectors of [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VORPS / ORPS instructions. /// /// \param __a /// A 128-bit vector of [4 x float] containing one of the source operands. /// \param __b /// A 128-bit vector of [4 x float] containing one of the source operands. /// \returns A 128-bit vector of [4 x float] containing the bitwise OR of the /// values between both operands. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_or_ps(__m128 __a, __m128 __b) { return (__m128)((__v4su)__a | (__v4su)__b); } /// Performs a bitwise exclusive OR of two 128-bit vectors of /// [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VXORPS / XORPS instructions. /// /// \param __a /// A 128-bit vector of [4 x float] containing one of the source operands. /// \param __b /// A 128-bit vector of [4 x float] containing one of the source operands. /// \returns A 128-bit vector of [4 x float] containing the bitwise exclusive OR /// of the values between both operands. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_xor_ps(__m128 __a, __m128 __b) { return (__m128)((__v4su)__a ^ (__v4su)__b); } /// Compares two 32-bit float values in the low-order bits of both /// operands for equality. /// /// The comparison yields 0x0 for false, 0xFFFFFFFF for true, in the /// low-order bits of a vector [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPEQSS / CMPEQSS instructions. /// /// \param __a /// A 128-bit vector of [4 x float] containing one of the operands. The lower /// 32 bits of this operand are used in the comparison. /// \param __b /// A 128-bit vector of [4 x float] containing one of the operands. The lower /// 32 bits of this operand are used in the comparison. /// \returns A 128-bit vector of [4 x float] containing the comparison results /// in the low-order bits. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpeq_ss(__m128 __a, __m128 __b) { return (__m128)__builtin_ia32_cmpeqss((__v4sf)__a, (__v4sf)__b); } /// Compares each of the corresponding 32-bit float values of the /// 128-bit vectors of [4 x float] for equality. /// /// Each comparison yields 0x0 for false, 0xFFFFFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPEQPS / CMPEQPS instructions. /// /// \param __a /// A 128-bit vector of [4 x float]. /// \param __b /// A 128-bit vector of [4 x float]. /// \returns A 128-bit vector of [4 x float] containing the comparison results. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpeq_ps(__m128 __a, __m128 __b) { return (__m128)__builtin_ia32_cmpeqps((__v4sf)__a, (__v4sf)__b); } /// Compares two 32-bit float values in the low-order bits of both /// operands to determine if the value in the first operand is less than the /// corresponding value in the second operand. /// /// The comparison yields 0x0 for false, 0xFFFFFFFF for true, in the /// low-order bits of a vector of [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPLTSS / CMPLTSS instructions. /// /// \param __a /// A 128-bit vector of [4 x float] containing one of the operands. The lower /// 32 bits of this operand are used in the comparison. /// \param __b /// A 128-bit vector of [4 x float] containing one of the operands. The lower /// 32 bits of this operand are used in the comparison. /// \returns A 128-bit vector of [4 x float] containing the comparison results /// in the low-order bits. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmplt_ss(__m128 __a, __m128 __b) { return (__m128)__builtin_ia32_cmpltss((__v4sf)__a, (__v4sf)__b); } /// Compares each of the corresponding 32-bit float values of the /// 128-bit vectors of [4 x float] to determine if the values in the first /// operand are less than those in the second operand. /// /// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPLTPS / CMPLTPS instructions. /// /// \param __a /// A 128-bit vector of [4 x float]. /// \param __b /// A 128-bit vector of [4 x float]. /// \returns A 128-bit vector of [4 x float] containing the comparison results. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmplt_ps(__m128 __a, __m128 __b) { return (__m128)__builtin_ia32_cmpltps((__v4sf)__a, (__v4sf)__b); } /// Compares two 32-bit float values in the low-order bits of both /// operands to determine if the value in the first operand is less than or /// equal to the corresponding value in the second operand. /// /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true, in /// the low-order bits of a vector of [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPLESS / CMPLESS instructions. /// /// \param __a /// A 128-bit vector of [4 x float] containing one of the operands. The lower /// 32 bits of this operand are used in the comparison. /// \param __b /// A 128-bit vector of [4 x float] containing one of the operands. The lower /// 32 bits of this operand are used in the comparison. /// \returns A 128-bit vector of [4 x float] containing the comparison results /// in the low-order bits. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmple_ss(__m128 __a, __m128 __b) { return (__m128)__builtin_ia32_cmpless((__v4sf)__a, (__v4sf)__b); } /// Compares each of the corresponding 32-bit float values of the /// 128-bit vectors of [4 x float] to determine if the values in the first /// operand are less than or equal to those in the second operand. /// /// Each comparison yields 0x0 for false, 0xFFFFFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPLEPS / CMPLEPS instructions. /// /// \param __a /// A 128-bit vector of [4 x float]. /// \param __b /// A 128-bit vector of [4 x float]. /// \returns A 128-bit vector of [4 x float] containing the comparison results. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmple_ps(__m128 __a, __m128 __b) { return (__m128)__builtin_ia32_cmpleps((__v4sf)__a, (__v4sf)__b); } /// Compares two 32-bit float values in the low-order bits of both /// operands to determine if the value in the first operand is greater than /// the corresponding value in the second operand. /// /// The comparison yields 0x0 for false, 0xFFFFFFFF for true, in the /// low-order bits of a vector of [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPLTSS / CMPLTSS instructions. /// /// \param __a /// A 128-bit vector of [4 x float] containing one of the operands. The lower /// 32 bits of this operand are used in the comparison. /// \param __b /// A 128-bit vector of [4 x float] containing one of the operands. The lower /// 32 bits of this operand are used in the comparison. /// \returns A 128-bit vector of [4 x float] containing the comparison results /// in the low-order bits. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpgt_ss(__m128 __a, __m128 __b) { return (__m128)__builtin_shufflevector((__v4sf)__a, (__v4sf)__builtin_ia32_cmpltss((__v4sf)__b, (__v4sf)__a), 4, 1, 2, 3); } /// Compares each of the corresponding 32-bit float values of the /// 128-bit vectors of [4 x float] to determine if the values in the first /// operand are greater than those in the second operand. /// /// Each comparison yields 0x0 for false, 0xFFFFFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPLTPS / CMPLTPS instructions. /// /// \param __a /// A 128-bit vector of [4 x float]. /// \param __b /// A 128-bit vector of [4 x float]. /// \returns A 128-bit vector of [4 x float] containing the comparison results. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpgt_ps(__m128 __a, __m128 __b) { return (__m128)__builtin_ia32_cmpltps((__v4sf)__b, (__v4sf)__a); } /// Compares two 32-bit float values in the low-order bits of both /// operands to determine if the value in the first operand is greater than /// or equal to the corresponding value in the second operand. /// /// Each comparison yields 0x0 for false, 0xFFFFFFFF for true, in the /// low-order bits of a vector of [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPLESS / CMPLESS instructions. /// /// \param __a /// A 128-bit vector of [4 x float] containing one of the operands. The lower /// 32 bits of this operand are used in the comparison. /// \param __b /// A 128-bit vector of [4 x float] containing one of the operands. The lower /// 32 bits of this operand are used in the comparison. /// \returns A 128-bit vector of [4 x float] containing the comparison results /// in the low-order bits. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpge_ss(__m128 __a, __m128 __b) { return (__m128)__builtin_shufflevector((__v4sf)__a, (__v4sf)__builtin_ia32_cmpless((__v4sf)__b, (__v4sf)__a), 4, 1, 2, 3); } /// Compares each of the corresponding 32-bit float values of the /// 128-bit vectors of [4 x float] to determine if the values in the first /// operand are greater than or equal to those in the second operand. /// /// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPLEPS / CMPLEPS instructions. /// /// \param __a /// A 128-bit vector of [4 x float]. /// \param __b /// A 128-bit vector of [4 x float]. /// \returns A 128-bit vector of [4 x float] containing the comparison results. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpge_ps(__m128 __a, __m128 __b) { return (__m128)__builtin_ia32_cmpleps((__v4sf)__b, (__v4sf)__a); } /// Compares two 32-bit float values in the low-order bits of both operands /// for inequality. /// /// The comparison yields 0x0 for false, 0xFFFFFFFF for true, in the /// low-order bits of a vector of [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPNEQSS / CMPNEQSS /// instructions. /// /// \param __a /// A 128-bit vector of [4 x float] containing one of the operands. The lower /// 32 bits of this operand are used in the comparison. /// \param __b /// A 128-bit vector of [4 x float] containing one of the operands. The lower /// 32 bits of this operand are used in the comparison. /// \returns A 128-bit vector of [4 x float] containing the comparison results /// in the low-order bits. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpneq_ss(__m128 __a, __m128 __b) { return (__m128)__builtin_ia32_cmpneqss((__v4sf)__a, (__v4sf)__b); } /// Compares each of the corresponding 32-bit float values of the /// 128-bit vectors of [4 x float] for inequality. /// /// Each comparison yields 0x0 for false, 0xFFFFFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPNEQPS / CMPNEQPS /// instructions. /// /// \param __a /// A 128-bit vector of [4 x float]. /// \param __b /// A 128-bit vector of [4 x float]. /// \returns A 128-bit vector of [4 x float] containing the comparison results. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpneq_ps(__m128 __a, __m128 __b) { return (__m128)__builtin_ia32_cmpneqps((__v4sf)__a, (__v4sf)__b); } /// Compares two 32-bit float values in the low-order bits of both /// operands to determine if the value in the first operand is not less than /// the corresponding value in the second operand. /// /// Each comparison yields 0x0 for false, 0xFFFFFFFF for true, in the /// low-order bits of a vector of [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPNLTSS / CMPNLTSS /// instructions. /// /// \param __a /// A 128-bit vector of [4 x float] containing one of the operands. The lower /// 32 bits of this operand are used in the comparison. /// \param __b /// A 128-bit vector of [4 x float] containing one of the operands. The lower /// 32 bits of this operand are used in the comparison. /// \returns A 128-bit vector of [4 x float] containing the comparison results /// in the low-order bits. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpnlt_ss(__m128 __a, __m128 __b) { return (__m128)__builtin_ia32_cmpnltss((__v4sf)__a, (__v4sf)__b); } /// Compares each of the corresponding 32-bit float values of the /// 128-bit vectors of [4 x float] to determine if the values in the first /// operand are not less than those in the second operand. /// /// Each comparison yields 0x0 for false, 0xFFFFFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPNLTPS / CMPNLTPS /// instructions. /// /// \param __a /// A 128-bit vector of [4 x float]. /// \param __b /// A 128-bit vector of [4 x float]. /// \returns A 128-bit vector of [4 x float] containing the comparison results. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpnlt_ps(__m128 __a, __m128 __b) { return (__m128)__builtin_ia32_cmpnltps((__v4sf)__a, (__v4sf)__b); } /// Compares two 32-bit float values in the low-order bits of both /// operands to determine if the value in the first operand is not less than /// or equal to the corresponding value in the second operand. /// /// Each comparison yields 0x0 for false, 0xFFFFFFFF for true, in the /// low-order bits of a vector of [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPNLESS / CMPNLESS /// instructions. /// /// \param __a /// A 128-bit vector of [4 x float] containing one of the operands. The lower /// 32 bits of this operand are used in the comparison. /// \param __b /// A 128-bit vector of [4 x float] containing one of the operands. The lower /// 32 bits of this operand are used in the comparison. /// \returns A 128-bit vector of [4 x float] containing the comparison results /// in the low-order bits. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpnle_ss(__m128 __a, __m128 __b) { return (__m128)__builtin_ia32_cmpnless((__v4sf)__a, (__v4sf)__b); } /// Compares each of the corresponding 32-bit float values of the /// 128-bit vectors of [4 x float] to determine if the values in the first /// operand are not less than or equal to those in the second operand. /// /// Each comparison yields 0x0 for false, 0xFFFFFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPNLEPS / CMPNLEPS /// instructions. /// /// \param __a /// A 128-bit vector of [4 x float]. /// \param __b /// A 128-bit vector of [4 x float]. /// \returns A 128-bit vector of [4 x float] containing the comparison results. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpnle_ps(__m128 __a, __m128 __b) { return (__m128)__builtin_ia32_cmpnleps((__v4sf)__a, (__v4sf)__b); } /// Compares two 32-bit float values in the low-order bits of both /// operands to determine if the value in the first operand is not greater /// than the corresponding value in the second operand. /// /// Each comparison yields 0x0 for false, 0xFFFFFFFF for true, in the /// low-order bits of a vector of [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPNLTSS / CMPNLTSS /// instructions. /// /// \param __a /// A 128-bit vector of [4 x float] containing one of the operands. The lower /// 32 bits of this operand are used in the comparison. /// \param __b /// A 128-bit vector of [4 x float] containing one of the operands. The lower /// 32 bits of this operand are used in the comparison. /// \returns A 128-bit vector of [4 x float] containing the comparison results /// in the low-order bits. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpngt_ss(__m128 __a, __m128 __b) { return (__m128)__builtin_shufflevector((__v4sf)__a, (__v4sf)__builtin_ia32_cmpnltss((__v4sf)__b, (__v4sf)__a), 4, 1, 2, 3); } /// Compares each of the corresponding 32-bit float values of the /// 128-bit vectors of [4 x float] to determine if the values in the first /// operand are not greater than those in the second operand. /// /// Each comparison yields 0x0 for false, 0xFFFFFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPNLTPS / CMPNLTPS /// instructions. /// /// \param __a /// A 128-bit vector of [4 x float]. /// \param __b /// A 128-bit vector of [4 x float]. /// \returns A 128-bit vector of [4 x float] containing the comparison results. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpngt_ps(__m128 __a, __m128 __b) { return (__m128)__builtin_ia32_cmpnltps((__v4sf)__b, (__v4sf)__a); } /// Compares two 32-bit float values in the low-order bits of both /// operands to determine if the value in the first operand is not greater /// than or equal to the corresponding value in the second operand. /// /// Each comparison yields 0x0 for false, 0xFFFFFFFF for true, in the /// low-order bits of a vector of [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPNLESS / CMPNLESS /// instructions. /// /// \param __a /// A 128-bit vector of [4 x float] containing one of the operands. The lower /// 32 bits of this operand are used in the comparison. /// \param __b /// A 128-bit vector of [4 x float] containing one of the operands. The lower /// 32 bits of this operand are used in the comparison. /// \returns A 128-bit vector of [4 x float] containing the comparison results /// in the low-order bits. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpnge_ss(__m128 __a, __m128 __b) { return (__m128)__builtin_shufflevector((__v4sf)__a, (__v4sf)__builtin_ia32_cmpnless((__v4sf)__b, (__v4sf)__a), 4, 1, 2, 3); } /// Compares each of the corresponding 32-bit float values of the /// 128-bit vectors of [4 x float] to determine if the values in the first /// operand are not greater than or equal to those in the second operand. /// /// Each comparison yields 0x0 for false, 0xFFFFFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPNLEPS / CMPNLEPS /// instructions. /// /// \param __a /// A 128-bit vector of [4 x float]. /// \param __b /// A 128-bit vector of [4 x float]. /// \returns A 128-bit vector of [4 x float] containing the comparison results. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpnge_ps(__m128 __a, __m128 __b) { return (__m128)__builtin_ia32_cmpnleps((__v4sf)__b, (__v4sf)__a); } /// Compares two 32-bit float values in the low-order bits of both /// operands to determine if the value in the first operand is ordered with /// respect to the corresponding value in the second operand. /// /// Each comparison yields 0x0 for false, 0xFFFFFFFF for true, in the /// low-order bits of a vector of [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPORDSS / CMPORDSS /// instructions. /// /// \param __a /// A 128-bit vector of [4 x float] containing one of the operands. The lower /// 32 bits of this operand are used in the comparison. /// \param __b /// A 128-bit vector of [4 x float] containing one of the operands. The lower /// 32 bits of this operand are used in the comparison. /// \returns A 128-bit vector of [4 x float] containing the comparison results /// in the low-order bits. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpord_ss(__m128 __a, __m128 __b) { return (__m128)__builtin_ia32_cmpordss((__v4sf)__a, (__v4sf)__b); } /// Compares each of the corresponding 32-bit float values of the /// 128-bit vectors of [4 x float] to determine if the values in the first /// operand are ordered with respect to those in the second operand. /// /// Each comparison yields 0x0 for false, 0xFFFFFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPORDPS / CMPORDPS /// instructions. /// /// \param __a /// A 128-bit vector of [4 x float]. /// \param __b /// A 128-bit vector of [4 x float]. /// \returns A 128-bit vector of [4 x float] containing the comparison results. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpord_ps(__m128 __a, __m128 __b) { return (__m128)__builtin_ia32_cmpordps((__v4sf)__a, (__v4sf)__b); } /// Compares two 32-bit float values in the low-order bits of both /// operands to determine if the value in the first operand is unordered /// with respect to the corresponding value in the second operand. /// /// Each comparison yields 0x0 for false, 0xFFFFFFFF for true, in the /// low-order bits of a vector of [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPUNORDSS / CMPUNORDSS /// instructions. /// /// \param __a /// A 128-bit vector of [4 x float] containing one of the operands. The lower /// 32 bits of this operand are used in the comparison. /// \param __b /// A 128-bit vector of [4 x float] containing one of the operands. The lower /// 32 bits of this operand are used in the comparison. /// \returns A 128-bit vector of [4 x float] containing the comparison results /// in the low-order bits. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpunord_ss(__m128 __a, __m128 __b) { return (__m128)__builtin_ia32_cmpunordss((__v4sf)__a, (__v4sf)__b); } /// Compares each of the corresponding 32-bit float values of the /// 128-bit vectors of [4 x float] to determine if the values in the first /// operand are unordered with respect to those in the second operand. /// /// Each comparison yields 0x0 for false, 0xFFFFFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPUNORDPS / CMPUNORDPS /// instructions. /// /// \param __a /// A 128-bit vector of [4 x float]. /// \param __b /// A 128-bit vector of [4 x float]. /// \returns A 128-bit vector of [4 x float] containing the comparison results. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpunord_ps(__m128 __a, __m128 __b) { return (__m128)__builtin_ia32_cmpunordps((__v4sf)__a, (__v4sf)__b); } /// Compares two 32-bit float values in the low-order bits of both /// operands for equality. /// /// The comparison returns 0 for false, 1 for true. If either of the two /// lower floating-point values is NaN, returns 0. /// /// \headerfile /// /// This intrinsic corresponds to the VCOMISS / COMISS /// instructions. /// /// \param __a /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the comparison. /// \param __b /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the comparison. /// \returns An integer containing the comparison results. static __inline__ int __DEFAULT_FN_ATTRS _mm_comieq_ss(__m128 __a, __m128 __b) { return __builtin_ia32_comieq((__v4sf)__a, (__v4sf)__b); } /// Compares two 32-bit float values in the low-order bits of both /// operands to determine if the first operand is less than the second /// operand. /// /// The comparison returns 0 for false, 1 for true. If either of the two /// lower floating-point values is NaN, returns 0. /// /// \headerfile /// /// This intrinsic corresponds to the VCOMISS / COMISS /// instructions. /// /// \param __a /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the comparison. /// \param __b /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the comparison. /// \returns An integer containing the comparison results. static __inline__ int __DEFAULT_FN_ATTRS _mm_comilt_ss(__m128 __a, __m128 __b) { return __builtin_ia32_comilt((__v4sf)__a, (__v4sf)__b); } /// Compares two 32-bit float values in the low-order bits of both /// operands to determine if the first operand is less than or equal to the /// second operand. /// /// The comparison returns 0 for false, 1 for true. If either of the two /// lower floating-point values is NaN, returns 0. /// /// \headerfile /// /// This intrinsic corresponds to the VCOMISS / COMISS instructions. /// /// \param __a /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the comparison. /// \param __b /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the comparison. /// \returns An integer containing the comparison results. static __inline__ int __DEFAULT_FN_ATTRS _mm_comile_ss(__m128 __a, __m128 __b) { return __builtin_ia32_comile((__v4sf)__a, (__v4sf)__b); } /// Compares two 32-bit float values in the low-order bits of both /// operands to determine if the first operand is greater than the second /// operand. /// /// The comparison returns 0 for false, 1 for true. If either of the two /// lower floating-point values is NaN, returns 0. /// /// \headerfile /// /// This intrinsic corresponds to the VCOMISS / COMISS instructions. /// /// \param __a /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the comparison. /// \param __b /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the comparison. /// \returns An integer containing the comparison results. static __inline__ int __DEFAULT_FN_ATTRS _mm_comigt_ss(__m128 __a, __m128 __b) { return __builtin_ia32_comigt((__v4sf)__a, (__v4sf)__b); } /// Compares two 32-bit float values in the low-order bits of both /// operands to determine if the first operand is greater than or equal to /// the second operand. /// /// The comparison returns 0 for false, 1 for true. If either of the two /// lower floating-point values is NaN, returns 0. /// /// \headerfile /// /// This intrinsic corresponds to the VCOMISS / COMISS instructions. /// /// \param __a /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the comparison. /// \param __b /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the comparison. /// \returns An integer containing the comparison results. static __inline__ int __DEFAULT_FN_ATTRS _mm_comige_ss(__m128 __a, __m128 __b) { return __builtin_ia32_comige((__v4sf)__a, (__v4sf)__b); } /// Compares two 32-bit float values in the low-order bits of both /// operands to determine if the first operand is not equal to the second /// operand. /// /// The comparison returns 0 for false, 1 for true. If either of the two /// lower floating-point values is NaN, returns 0. /// /// \headerfile /// /// This intrinsic corresponds to the VCOMISS / COMISS instructions. /// /// \param __a /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the comparison. /// \param __b /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the comparison. /// \returns An integer containing the comparison results. static __inline__ int __DEFAULT_FN_ATTRS _mm_comineq_ss(__m128 __a, __m128 __b) { return __builtin_ia32_comineq((__v4sf)__a, (__v4sf)__b); } /// Performs an unordered comparison of two 32-bit float values using /// the low-order bits of both operands to determine equality. /// /// The comparison returns 0 for false, 1 for true. If either of the two /// lower floating-point values is NaN, returns 0. /// /// \headerfile /// /// This intrinsic corresponds to the VUCOMISS / UCOMISS instructions. /// /// \param __a /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the comparison. /// \param __b /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the comparison. /// \returns An integer containing the comparison results. static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomieq_ss(__m128 __a, __m128 __b) { return __builtin_ia32_ucomieq((__v4sf)__a, (__v4sf)__b); } /// Performs an unordered comparison of two 32-bit float values using /// the low-order bits of both operands to determine if the first operand is /// less than the second operand. /// /// The comparison returns 0 for false, 1 for true. If either of the two /// lower floating-point values is NaN, returns 0. /// /// \headerfile /// /// This intrinsic corresponds to the VUCOMISS / UCOMISS instructions. /// /// \param __a /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the comparison. /// \param __b /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the comparison. /// \returns An integer containing the comparison results. static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomilt_ss(__m128 __a, __m128 __b) { return __builtin_ia32_ucomilt((__v4sf)__a, (__v4sf)__b); } /// Performs an unordered comparison of two 32-bit float values using /// the low-order bits of both operands to determine if the first operand is /// less than or equal to the second operand. /// /// The comparison returns 0 for false, 1 for true. If either of the two /// lower floating-point values is NaN, returns 0. /// /// \headerfile /// /// This intrinsic corresponds to the VUCOMISS / UCOMISS instructions. /// /// \param __a /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the comparison. /// \param __b /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the comparison. /// \returns An integer containing the comparison results. static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomile_ss(__m128 __a, __m128 __b) { return __builtin_ia32_ucomile((__v4sf)__a, (__v4sf)__b); } /// Performs an unordered comparison of two 32-bit float values using /// the low-order bits of both operands to determine if the first operand is /// greater than the second operand. /// /// The comparison returns 0 for false, 1 for true. If either of the two /// lower floating-point values is NaN, returns 0. /// /// \headerfile /// /// This intrinsic corresponds to the VUCOMISS / UCOMISS instructions. /// /// \param __a /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the comparison. /// \param __b /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the comparison. /// \returns An integer containing the comparison results. static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomigt_ss(__m128 __a, __m128 __b) { return __builtin_ia32_ucomigt((__v4sf)__a, (__v4sf)__b); } /// Performs an unordered comparison of two 32-bit float values using /// the low-order bits of both operands to determine if the first operand is /// greater than or equal to the second operand. /// /// The comparison returns 0 for false, 1 for true. If either of the two /// lower floating-point values is NaN, returns 0. /// /// \headerfile /// /// This intrinsic corresponds to the VUCOMISS / UCOMISS instructions. /// /// \param __a /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the comparison. /// \param __b /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the comparison. /// \returns An integer containing the comparison results. static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomige_ss(__m128 __a, __m128 __b) { return __builtin_ia32_ucomige((__v4sf)__a, (__v4sf)__b); } /// Performs an unordered comparison of two 32-bit float values using /// the low-order bits of both operands to determine inequality. /// /// The comparison returns 0 for false, 1 for true. If either of the two /// lower floating-point values is NaN, returns 0. /// /// \headerfile /// /// This intrinsic corresponds to the VUCOMISS / UCOMISS instructions. /// /// \param __a /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the comparison. /// \param __b /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the comparison. /// \returns An integer containing the comparison results. static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomineq_ss(__m128 __a, __m128 __b) { return __builtin_ia32_ucomineq((__v4sf)__a, (__v4sf)__b); } /// Converts a float value contained in the lower 32 bits of a vector of /// [4 x float] into a 32-bit integer. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTSS2SI / CVTSS2SI /// instructions. /// /// \param __a /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the conversion. /// \returns A 32-bit integer containing the converted value. static __inline__ int __DEFAULT_FN_ATTRS _mm_cvtss_si32(__m128 __a) { return __builtin_ia32_cvtss2si((__v4sf)__a); } /// Converts a float value contained in the lower 32 bits of a vector of /// [4 x float] into a 32-bit integer. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTSS2SI / CVTSS2SI /// instructions. /// /// \param __a /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the conversion. /// \returns A 32-bit integer containing the converted value. static __inline__ int __DEFAULT_FN_ATTRS _mm_cvt_ss2si(__m128 __a) { return _mm_cvtss_si32(__a); } #ifdef __x86_64__ /// Converts a float value contained in the lower 32 bits of a vector of /// [4 x float] into a 64-bit integer. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTSS2SI / CVTSS2SI /// instructions. /// /// \param __a /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the conversion. /// \returns A 64-bit integer containing the converted value. static __inline__ long long __DEFAULT_FN_ATTRS _mm_cvtss_si64(__m128 __a) { return __builtin_ia32_cvtss2si64((__v4sf)__a); } #endif /// Converts two low-order float values in a 128-bit vector of /// [4 x float] into a 64-bit vector of [2 x i32]. /// /// \headerfile /// /// This intrinsic corresponds to the CVTPS2PI instruction. /// /// \param __a /// A 128-bit vector of [4 x float]. /// \returns A 64-bit integer vector containing the converted values. static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_cvtps_pi32(__m128 __a) { return (__m64)__builtin_ia32_cvtps2pi((__v4sf)__a); } /// Converts two low-order float values in a 128-bit vector of /// [4 x float] into a 64-bit vector of [2 x i32]. /// /// \headerfile /// /// This intrinsic corresponds to the CVTPS2PI instruction. /// /// \param __a /// A 128-bit vector of [4 x float]. /// \returns A 64-bit integer vector containing the converted values. static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_cvt_ps2pi(__m128 __a) { return _mm_cvtps_pi32(__a); } /// Converts a float value contained in the lower 32 bits of a vector of /// [4 x float] into a 32-bit integer, truncating the result when it is /// inexact. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTTSS2SI / CVTTSS2SI /// instructions. /// /// \param __a /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the conversion. /// \returns A 32-bit integer containing the converted value. static __inline__ int __DEFAULT_FN_ATTRS _mm_cvttss_si32(__m128 __a) { return __builtin_ia32_cvttss2si((__v4sf)__a); } /// Converts a float value contained in the lower 32 bits of a vector of /// [4 x float] into a 32-bit integer, truncating the result when it is /// inexact. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTTSS2SI / CVTTSS2SI /// instructions. /// /// \param __a /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the conversion. /// \returns A 32-bit integer containing the converted value. static __inline__ int __DEFAULT_FN_ATTRS _mm_cvtt_ss2si(__m128 __a) { return _mm_cvttss_si32(__a); } #ifdef __x86_64__ /// Converts a float value contained in the lower 32 bits of a vector of /// [4 x float] into a 64-bit integer, truncating the result when it is /// inexact. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTTSS2SI / CVTTSS2SI /// instructions. /// /// \param __a /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the conversion. /// \returns A 64-bit integer containing the converted value. static __inline__ long long __DEFAULT_FN_ATTRS _mm_cvttss_si64(__m128 __a) { return __builtin_ia32_cvttss2si64((__v4sf)__a); } #endif /// Converts two low-order float values in a 128-bit vector of /// [4 x float] into a 64-bit vector of [2 x i32], truncating the result /// when it is inexact. /// /// \headerfile /// /// This intrinsic corresponds to the CVTTPS2PI / VTTPS2PI /// instructions. /// /// \param __a /// A 128-bit vector of [4 x float]. /// \returns A 64-bit integer vector containing the converted values. static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_cvttps_pi32(__m128 __a) { return (__m64)__builtin_ia32_cvttps2pi((__v4sf)__a); } /// Converts two low-order float values in a 128-bit vector of [4 x /// float] into a 64-bit vector of [2 x i32], truncating the result when it /// is inexact. /// /// \headerfile /// /// This intrinsic corresponds to the CVTTPS2PI instruction. /// /// \param __a /// A 128-bit vector of [4 x float]. /// \returns A 64-bit integer vector containing the converted values. static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_cvtt_ps2pi(__m128 __a) { return _mm_cvttps_pi32(__a); } /// Converts a 32-bit signed integer value into a floating point value /// and writes it to the lower 32 bits of the destination. The remaining /// higher order elements of the destination vector are copied from the /// corresponding elements in the first operand. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTSI2SS / CVTSI2SS instruction. /// /// \param __a /// A 128-bit vector of [4 x float]. /// \param __b /// A 32-bit signed integer operand containing the value to be converted. /// \returns A 128-bit vector of [4 x float] whose lower 32 bits contain the /// converted value of the second operand. The upper 96 bits are copied from /// the upper 96 bits of the first operand. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtsi32_ss(__m128 __a, int __b) { __a[0] = __b; return __a; } /// Converts a 32-bit signed integer value into a floating point value /// and writes it to the lower 32 bits of the destination. The remaining /// higher order elements of the destination are copied from the /// corresponding elements in the first operand. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTSI2SS / CVTSI2SS instruction. /// /// \param __a /// A 128-bit vector of [4 x float]. /// \param __b /// A 32-bit signed integer operand containing the value to be converted. /// \returns A 128-bit vector of [4 x float] whose lower 32 bits contain the /// converted value of the second operand. The upper 96 bits are copied from /// the upper 96 bits of the first operand. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvt_si2ss(__m128 __a, int __b) { return _mm_cvtsi32_ss(__a, __b); } #ifdef __x86_64__ /// Converts a 64-bit signed integer value into a floating point value /// and writes it to the lower 32 bits of the destination. The remaining /// higher order elements of the destination are copied from the /// corresponding elements in the first operand. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTSI2SS / CVTSI2SS instruction. /// /// \param __a /// A 128-bit vector of [4 x float]. /// \param __b /// A 64-bit signed integer operand containing the value to be converted. /// \returns A 128-bit vector of [4 x float] whose lower 32 bits contain the /// converted value of the second operand. The upper 96 bits are copied from /// the upper 96 bits of the first operand. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtsi64_ss(__m128 __a, long long __b) { __a[0] = __b; return __a; } #endif /// Converts two elements of a 64-bit vector of [2 x i32] into two /// floating point values and writes them to the lower 64-bits of the /// destination. The remaining higher order elements of the destination are /// copied from the corresponding elements in the first operand. /// /// \headerfile /// /// This intrinsic corresponds to the CVTPI2PS instruction. /// /// \param __a /// A 128-bit vector of [4 x float]. /// \param __b /// A 64-bit vector of [2 x i32]. The elements in this vector are converted /// and written to the corresponding low-order elements in the destination. /// \returns A 128-bit vector of [4 x float] whose lower 64 bits contain the /// converted value of the second operand. The upper 64 bits are copied from /// the upper 64 bits of the first operand. static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX _mm_cvtpi32_ps(__m128 __a, __m64 __b) { return __builtin_ia32_cvtpi2ps((__v4sf)__a, (__v2si)__b); } /// Converts two elements of a 64-bit vector of [2 x i32] into two /// floating point values and writes them to the lower 64-bits of the /// destination. The remaining higher order elements of the destination are /// copied from the corresponding elements in the first operand. /// /// \headerfile /// /// This intrinsic corresponds to the CVTPI2PS instruction. /// /// \param __a /// A 128-bit vector of [4 x float]. /// \param __b /// A 64-bit vector of [2 x i32]. The elements in this vector are converted /// and written to the corresponding low-order elements in the destination. /// \returns A 128-bit vector of [4 x float] whose lower 64 bits contain the /// converted value from the second operand. The upper 64 bits are copied /// from the upper 64 bits of the first operand. static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX _mm_cvt_pi2ps(__m128 __a, __m64 __b) { return _mm_cvtpi32_ps(__a, __b); } /// Extracts a float value contained in the lower 32 bits of a vector of /// [4 x float]. /// /// \headerfile /// /// This intrinsic has no corresponding instruction. /// /// \param __a /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the extraction. /// \returns A 32-bit float containing the extracted value. static __inline__ float __DEFAULT_FN_ATTRS _mm_cvtss_f32(__m128 __a) { return __a[0]; } /// Loads two packed float values from the address \a __p into the /// high-order bits of a 128-bit vector of [4 x float]. The low-order bits /// are copied from the low-order bits of the first operand. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVHPD / MOVHPD instruction. /// /// \param __a /// A 128-bit vector of [4 x float]. Bits [63:0] are written to bits [63:0] /// of the destination. /// \param __p /// A pointer to two packed float values. Bits [63:0] are written to bits /// [127:64] of the destination. /// \returns A 128-bit vector of [4 x float] containing the moved values. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_loadh_pi(__m128 __a, const __m64 *__p) { typedef float __mm_loadh_pi_v2f32 __attribute__((__vector_size__(8))); struct __mm_loadh_pi_struct { __mm_loadh_pi_v2f32 __u; } __attribute__((__packed__, __may_alias__)); __mm_loadh_pi_v2f32 __b = ((const struct __mm_loadh_pi_struct*)__p)->__u; __m128 __bb = __builtin_shufflevector(__b, __b, 0, 1, 0, 1); return __builtin_shufflevector(__a, __bb, 0, 1, 4, 5); } /// Loads two packed float values from the address \a __p into the /// low-order bits of a 128-bit vector of [4 x float]. The high-order bits /// are copied from the high-order bits of the first operand. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVLPD / MOVLPD instruction. /// /// \param __a /// A 128-bit vector of [4 x float]. Bits [127:64] are written to bits /// [127:64] of the destination. /// \param __p /// A pointer to two packed float values. Bits [63:0] are written to bits /// [63:0] of the destination. /// \returns A 128-bit vector of [4 x float] containing the moved values. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_loadl_pi(__m128 __a, const __m64 *__p) { typedef float __mm_loadl_pi_v2f32 __attribute__((__vector_size__(8))); struct __mm_loadl_pi_struct { __mm_loadl_pi_v2f32 __u; } __attribute__((__packed__, __may_alias__)); __mm_loadl_pi_v2f32 __b = ((const struct __mm_loadl_pi_struct*)__p)->__u; __m128 __bb = __builtin_shufflevector(__b, __b, 0, 1, 0, 1); return __builtin_shufflevector(__a, __bb, 4, 5, 2, 3); } /// Constructs a 128-bit floating-point vector of [4 x float]. The lower /// 32 bits of the vector are initialized with the single-precision /// floating-point value loaded from a specified memory location. The upper /// 96 bits are set to zero. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVSS / MOVSS instruction. /// /// \param __p /// A pointer to a 32-bit memory location containing a single-precision /// floating-point value. /// \returns An initialized 128-bit floating-point vector of [4 x float]. The /// lower 32 bits contain the value loaded from the memory location. The /// upper 96 bits are set to zero. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_load_ss(const float *__p) { struct __mm_load_ss_struct { float __u; } __attribute__((__packed__, __may_alias__)); float __u = ((const struct __mm_load_ss_struct*)__p)->__u; return __extension__ (__m128){ __u, 0, 0, 0 }; } /// Loads a 32-bit float value and duplicates it to all four vector /// elements of a 128-bit vector of [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VBROADCASTSS / MOVSS + shuffling /// instruction. /// /// \param __p /// A pointer to a float value to be loaded and duplicated. /// \returns A 128-bit vector of [4 x float] containing the loaded and /// duplicated values. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_load1_ps(const float *__p) { struct __mm_load1_ps_struct { float __u; } __attribute__((__packed__, __may_alias__)); float __u = ((const struct __mm_load1_ps_struct*)__p)->__u; return __extension__ (__m128){ __u, __u, __u, __u }; } #define _mm_load_ps1(p) _mm_load1_ps(p) /// Loads a 128-bit floating-point vector of [4 x float] from an aligned /// memory location. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVAPS / MOVAPS instruction. /// /// \param __p /// A pointer to a 128-bit memory location. The address of the memory /// location has to be 128-bit aligned. /// \returns A 128-bit vector of [4 x float] containing the loaded values. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_load_ps(const float *__p) { return *(const __m128*)__p; } /// Loads a 128-bit floating-point vector of [4 x float] from an /// unaligned memory location. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVUPS / MOVUPS instruction. /// /// \param __p /// A pointer to a 128-bit memory location. The address of the memory /// location does not have to be aligned. /// \returns A 128-bit vector of [4 x float] containing the loaded values. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_loadu_ps(const float *__p) { struct __loadu_ps { __m128_u __v; } __attribute__((__packed__, __may_alias__)); return ((const struct __loadu_ps*)__p)->__v; } /// Loads four packed float values, in reverse order, from an aligned /// memory location to 32-bit elements in a 128-bit vector of [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVAPS / MOVAPS + shuffling /// instruction. /// /// \param __p /// A pointer to a 128-bit memory location. The address of the memory /// location has to be 128-bit aligned. /// \returns A 128-bit vector of [4 x float] containing the moved values, loaded /// in reverse order. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_loadr_ps(const float *__p) { __m128 __a = _mm_load_ps(__p); return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 3, 2, 1, 0); } /// Create a 128-bit vector of [4 x float] with undefined values. /// /// \headerfile /// /// This intrinsic has no corresponding instruction. /// /// \returns A 128-bit vector of [4 x float] containing undefined values. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_undefined_ps(void) { return (__m128)__builtin_ia32_undef128(); } /// Constructs a 128-bit floating-point vector of [4 x float]. The lower /// 32 bits of the vector are initialized with the specified single-precision /// floating-point value. The upper 96 bits are set to zero. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVSS / MOVSS instruction. /// /// \param __w /// A single-precision floating-point value used to initialize the lower 32 /// bits of the result. /// \returns An initialized 128-bit floating-point vector of [4 x float]. The /// lower 32 bits contain the value provided in the source operand. The /// upper 96 bits are set to zero. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_set_ss(float __w) { return __extension__ (__m128){ __w, 0, 0, 0 }; } /// Constructs a 128-bit floating-point vector of [4 x float], with each /// of the four single-precision floating-point vector elements set to the /// specified single-precision floating-point value. /// /// \headerfile /// /// This intrinsic corresponds to the VPERMILPS / PERMILPS instruction. /// /// \param __w /// A single-precision floating-point value used to initialize each vector /// element of the result. /// \returns An initialized 128-bit floating-point vector of [4 x float]. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_set1_ps(float __w) { return __extension__ (__m128){ __w, __w, __w, __w }; } /* Microsoft specific. */ /// Constructs a 128-bit floating-point vector of [4 x float], with each /// of the four single-precision floating-point vector elements set to the /// specified single-precision floating-point value. /// /// \headerfile /// /// This intrinsic corresponds to the VPERMILPS / PERMILPS instruction. /// /// \param __w /// A single-precision floating-point value used to initialize each vector /// element of the result. /// \returns An initialized 128-bit floating-point vector of [4 x float]. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_set_ps1(float __w) { return _mm_set1_ps(__w); } /// Constructs a 128-bit floating-point vector of [4 x float] /// initialized with the specified single-precision floating-point values. /// /// \headerfile /// /// This intrinsic is a utility function and does not correspond to a specific /// instruction. /// /// \param __z /// A single-precision floating-point value used to initialize bits [127:96] /// of the result. /// \param __y /// A single-precision floating-point value used to initialize bits [95:64] /// of the result. /// \param __x /// A single-precision floating-point value used to initialize bits [63:32] /// of the result. /// \param __w /// A single-precision floating-point value used to initialize bits [31:0] /// of the result. /// \returns An initialized 128-bit floating-point vector of [4 x float]. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_set_ps(float __z, float __y, float __x, float __w) { return __extension__ (__m128){ __w, __x, __y, __z }; } /// Constructs a 128-bit floating-point vector of [4 x float], /// initialized in reverse order with the specified 32-bit single-precision /// float-point values. /// /// \headerfile /// /// This intrinsic is a utility function and does not correspond to a specific /// instruction. /// /// \param __z /// A single-precision floating-point value used to initialize bits [31:0] /// of the result. /// \param __y /// A single-precision floating-point value used to initialize bits [63:32] /// of the result. /// \param __x /// A single-precision floating-point value used to initialize bits [95:64] /// of the result. /// \param __w /// A single-precision floating-point value used to initialize bits [127:96] /// of the result. /// \returns An initialized 128-bit floating-point vector of [4 x float]. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_setr_ps(float __z, float __y, float __x, float __w) { return __extension__ (__m128){ __z, __y, __x, __w }; } /// Constructs a 128-bit floating-point vector of [4 x float] initialized /// to zero. /// /// \headerfile /// /// This intrinsic corresponds to the VXORPS / XORPS instruction. /// /// \returns An initialized 128-bit floating-point vector of [4 x float] with /// all elements set to zero. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_setzero_ps(void) { return __extension__ (__m128){ 0.0f, 0.0f, 0.0f, 0.0f }; } /// Stores the upper 64 bits of a 128-bit vector of [4 x float] to a /// memory location. /// /// \headerfile /// /// This intrinsic corresponds to the VPEXTRQ / PEXTRQ instruction. /// /// \param __p /// A pointer to a 64-bit memory location. /// \param __a /// A 128-bit vector of [4 x float] containing the values to be stored. static __inline__ void __DEFAULT_FN_ATTRS _mm_storeh_pi(__m64 *__p, __m128 __a) { typedef float __mm_storeh_pi_v2f32 __attribute__((__vector_size__(8))); struct __mm_storeh_pi_struct { __mm_storeh_pi_v2f32 __u; } __attribute__((__packed__, __may_alias__)); ((struct __mm_storeh_pi_struct*)__p)->__u = __builtin_shufflevector(__a, __a, 2, 3); } /// Stores the lower 64 bits of a 128-bit vector of [4 x float] to a /// memory location. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVLPS / MOVLPS instruction. /// /// \param __p /// A pointer to a memory location that will receive the float values. /// \param __a /// A 128-bit vector of [4 x float] containing the values to be stored. static __inline__ void __DEFAULT_FN_ATTRS _mm_storel_pi(__m64 *__p, __m128 __a) { typedef float __mm_storeh_pi_v2f32 __attribute__((__vector_size__(8))); struct __mm_storeh_pi_struct { __mm_storeh_pi_v2f32 __u; } __attribute__((__packed__, __may_alias__)); ((struct __mm_storeh_pi_struct*)__p)->__u = __builtin_shufflevector(__a, __a, 0, 1); } /// Stores the lower 32 bits of a 128-bit vector of [4 x float] to a /// memory location. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVSS / MOVSS instruction. /// /// \param __p /// A pointer to a 32-bit memory location. /// \param __a /// A 128-bit vector of [4 x float] containing the value to be stored. static __inline__ void __DEFAULT_FN_ATTRS _mm_store_ss(float *__p, __m128 __a) { struct __mm_store_ss_struct { float __u; } __attribute__((__packed__, __may_alias__)); ((struct __mm_store_ss_struct*)__p)->__u = __a[0]; } /// Stores a 128-bit vector of [4 x float] to an unaligned memory /// location. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVUPS / MOVUPS instruction. /// /// \param __p /// A pointer to a 128-bit memory location. The address of the memory /// location does not have to be aligned. /// \param __a /// A 128-bit vector of [4 x float] containing the values to be stored. static __inline__ void __DEFAULT_FN_ATTRS _mm_storeu_ps(float *__p, __m128 __a) { struct __storeu_ps { __m128_u __v; } __attribute__((__packed__, __may_alias__)); ((struct __storeu_ps*)__p)->__v = __a; } /// Stores a 128-bit vector of [4 x float] into an aligned memory /// location. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVAPS / MOVAPS instruction. /// /// \param __p /// A pointer to a 128-bit memory location. The address of the memory /// location has to be 16-byte aligned. /// \param __a /// A 128-bit vector of [4 x float] containing the values to be stored. static __inline__ void __DEFAULT_FN_ATTRS _mm_store_ps(float *__p, __m128 __a) { *(__m128*)__p = __a; } /// Stores the lower 32 bits of a 128-bit vector of [4 x float] into /// four contiguous elements in an aligned memory location. /// /// \headerfile /// /// This intrinsic corresponds to VMOVAPS / MOVAPS + shuffling /// instruction. /// /// \param __p /// A pointer to a 128-bit memory location. /// \param __a /// A 128-bit vector of [4 x float] whose lower 32 bits are stored to each /// of the four contiguous elements pointed by \a __p. static __inline__ void __DEFAULT_FN_ATTRS _mm_store1_ps(float *__p, __m128 __a) { __a = __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 0, 0, 0); _mm_store_ps(__p, __a); } /// Stores the lower 32 bits of a 128-bit vector of [4 x float] into /// four contiguous elements in an aligned memory location. /// /// \headerfile /// /// This intrinsic corresponds to VMOVAPS / MOVAPS + shuffling /// instruction. /// /// \param __p /// A pointer to a 128-bit memory location. /// \param __a /// A 128-bit vector of [4 x float] whose lower 32 bits are stored to each /// of the four contiguous elements pointed by \a __p. static __inline__ void __DEFAULT_FN_ATTRS _mm_store_ps1(float *__p, __m128 __a) { _mm_store1_ps(__p, __a); } /// Stores float values from a 128-bit vector of [4 x float] to an /// aligned memory location in reverse order. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVAPS / MOVAPS + shuffling /// instruction. /// /// \param __p /// A pointer to a 128-bit memory location. The address of the memory /// location has to be 128-bit aligned. /// \param __a /// A 128-bit vector of [4 x float] containing the values to be stored. static __inline__ void __DEFAULT_FN_ATTRS _mm_storer_ps(float *__p, __m128 __a) { __a = __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 3, 2, 1, 0); _mm_store_ps(__p, __a); } #define _MM_HINT_ET0 7 #define _MM_HINT_ET1 6 #define _MM_HINT_T0 3 #define _MM_HINT_T1 2 #define _MM_HINT_T2 1 #define _MM_HINT_NTA 0 #ifndef _MSC_VER /* FIXME: We have to #define this because "sel" must be a constant integer, and Sema doesn't do any form of constant propagation yet. */ /// Loads one cache line of data from the specified address to a location /// closer to the processor. /// /// \headerfile /// /// \code /// void _mm_prefetch(const void *a, const int sel); /// \endcode /// /// This intrinsic corresponds to the PREFETCHNTA instruction. /// /// \param a /// A pointer to a memory location containing a cache line of data. /// \param sel /// A predefined integer constant specifying the type of prefetch /// operation: \n /// _MM_HINT_NTA: Move data using the non-temporal access (NTA) hint. The /// PREFETCHNTA instruction will be generated. \n /// _MM_HINT_T0: Move data using the T0 hint. The PREFETCHT0 instruction will /// be generated. \n /// _MM_HINT_T1: Move data using the T1 hint. The PREFETCHT1 instruction will /// be generated. \n /// _MM_HINT_T2: Move data using the T2 hint. The PREFETCHT2 instruction will /// be generated. #define _mm_prefetch(a, sel) (__builtin_prefetch((const void *)(a), \ ((sel) >> 2) & 1, (sel) & 0x3)) #endif /// Stores a 64-bit integer in the specified aligned memory location. To /// minimize caching, the data is flagged as non-temporal (unlikely to be /// used again soon). /// /// \headerfile /// /// This intrinsic corresponds to the MOVNTQ instruction. /// /// \param __p /// A pointer to an aligned memory location used to store the register value. /// \param __a /// A 64-bit integer containing the value to be stored. static __inline__ void __DEFAULT_FN_ATTRS_MMX _mm_stream_pi(void *__p, __m64 __a) { __builtin_ia32_movntq((__m64 *)__p, __a); } /// Moves packed float values from a 128-bit vector of [4 x float] to a /// 128-bit aligned memory location. To minimize caching, the data is flagged /// as non-temporal (unlikely to be used again soon). /// /// \headerfile /// /// This intrinsic corresponds to the VMOVNTPS / MOVNTPS instruction. /// /// \param __p /// A pointer to a 128-bit aligned memory location that will receive the /// single-precision floating-point values. /// \param __a /// A 128-bit vector of [4 x float] containing the values to be moved. static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_ps(void *__p, __m128 __a) { __builtin_nontemporal_store((__v4sf)__a, (__v4sf*)__p); } #if defined(__cplusplus) extern "C" { #endif /// Forces strong memory ordering (serialization) between store /// instructions preceding this instruction and store instructions following /// this instruction, ensuring the system completes all previous stores /// before executing subsequent stores. /// /// \headerfile /// /// This intrinsic corresponds to the SFENCE instruction. /// void _mm_sfence(void); #if defined(__cplusplus) } // extern "C" #endif /// Extracts 16-bit element from a 64-bit vector of [4 x i16] and /// returns it, as specified by the immediate integer operand. /// /// \headerfile /// /// \code /// int _mm_extract_pi16(__m64 a, int n); /// \endcode /// /// This intrinsic corresponds to the VPEXTRW / PEXTRW instruction. /// /// \param a /// A 64-bit vector of [4 x i16]. /// \param n /// An immediate integer operand that determines which bits are extracted: \n /// 0: Bits [15:0] are copied to the destination. \n /// 1: Bits [31:16] are copied to the destination. \n /// 2: Bits [47:32] are copied to the destination. \n /// 3: Bits [63:48] are copied to the destination. /// \returns A 16-bit integer containing the extracted 16 bits of packed data. #define _mm_extract_pi16(a, n) \ ((int)__builtin_ia32_vec_ext_v4hi((__v4hi)a, (int)n)) /// Copies data from the 64-bit vector of [4 x i16] to the destination, /// and inserts the lower 16-bits of an integer operand at the 16-bit offset /// specified by the immediate operand \a n. /// /// \headerfile /// /// \code /// __m64 _mm_insert_pi16(__m64 a, int d, int n); /// \endcode /// /// This intrinsic corresponds to the PINSRW instruction. /// /// \param a /// A 64-bit vector of [4 x i16]. /// \param d /// An integer. The lower 16-bit value from this operand is written to the /// destination at the offset specified by operand \a n. /// \param n /// An immediate integer operant that determines which the bits to be used /// in the destination. \n /// 0: Bits [15:0] are copied to the destination. \n /// 1: Bits [31:16] are copied to the destination. \n /// 2: Bits [47:32] are copied to the destination. \n /// 3: Bits [63:48] are copied to the destination. \n /// The remaining bits in the destination are copied from the corresponding /// bits in operand \a a. /// \returns A 64-bit integer vector containing the copied packed data from the /// operands. #define _mm_insert_pi16(a, d, n) \ ((__m64)__builtin_ia32_vec_set_v4hi((__v4hi)a, (int)d, (int)n)) /// Compares each of the corresponding packed 16-bit integer values of /// the 64-bit integer vectors, and writes the greater value to the /// corresponding bits in the destination. /// /// \headerfile /// /// This intrinsic corresponds to the PMAXSW instruction. /// /// \param __a /// A 64-bit integer vector containing one of the source operands. /// \param __b /// A 64-bit integer vector containing one of the source operands. /// \returns A 64-bit integer vector containing the comparison results. static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_max_pi16(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_pmaxsw((__v4hi)__a, (__v4hi)__b); } /// Compares each of the corresponding packed 8-bit unsigned integer /// values of the 64-bit integer vectors, and writes the greater value to the /// corresponding bits in the destination. /// /// \headerfile /// /// This intrinsic corresponds to the PMAXUB instruction. /// /// \param __a /// A 64-bit integer vector containing one of the source operands. /// \param __b /// A 64-bit integer vector containing one of the source operands. /// \returns A 64-bit integer vector containing the comparison results. static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_max_pu8(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_pmaxub((__v8qi)__a, (__v8qi)__b); } /// Compares each of the corresponding packed 16-bit integer values of /// the 64-bit integer vectors, and writes the lesser value to the /// corresponding bits in the destination. /// /// \headerfile /// /// This intrinsic corresponds to the PMINSW instruction. /// /// \param __a /// A 64-bit integer vector containing one of the source operands. /// \param __b /// A 64-bit integer vector containing one of the source operands. /// \returns A 64-bit integer vector containing the comparison results. static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_min_pi16(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_pminsw((__v4hi)__a, (__v4hi)__b); } /// Compares each of the corresponding packed 8-bit unsigned integer /// values of the 64-bit integer vectors, and writes the lesser value to the /// corresponding bits in the destination. /// /// \headerfile /// /// This intrinsic corresponds to the PMINUB instruction. /// /// \param __a /// A 64-bit integer vector containing one of the source operands. /// \param __b /// A 64-bit integer vector containing one of the source operands. /// \returns A 64-bit integer vector containing the comparison results. static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_min_pu8(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_pminub((__v8qi)__a, (__v8qi)__b); } /// Takes the most significant bit from each 8-bit element in a 64-bit /// integer vector to create an 8-bit mask value. Zero-extends the value to /// 32-bit integer and writes it to the destination. /// /// \headerfile /// /// This intrinsic corresponds to the PMOVMSKB instruction. /// /// \param __a /// A 64-bit integer vector containing the values with bits to be extracted. /// \returns The most significant bit from each 8-bit element in \a __a, /// written to bits [7:0]. static __inline__ int __DEFAULT_FN_ATTRS_MMX _mm_movemask_pi8(__m64 __a) { return __builtin_ia32_pmovmskb((__v8qi)__a); } /// Multiplies packed 16-bit unsigned integer values and writes the /// high-order 16 bits of each 32-bit product to the corresponding bits in /// the destination. /// /// \headerfile /// /// This intrinsic corresponds to the PMULHUW instruction. /// /// \param __a /// A 64-bit integer vector containing one of the source operands. /// \param __b /// A 64-bit integer vector containing one of the source operands. /// \returns A 64-bit integer vector containing the products of both operands. static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_mulhi_pu16(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_pmulhuw((__v4hi)__a, (__v4hi)__b); } /// Shuffles the 4 16-bit integers from a 64-bit integer vector to the /// destination, as specified by the immediate value operand. /// /// \headerfile /// /// \code /// __m64 _mm_shuffle_pi16(__m64 a, const int n); /// \endcode /// /// This intrinsic corresponds to the PSHUFW instruction. /// /// \param a /// A 64-bit integer vector containing the values to be shuffled. /// \param n /// An immediate value containing an 8-bit value specifying which elements to /// copy from \a a. The destinations within the 64-bit destination are /// assigned values as follows: \n /// Bits [1:0] are used to assign values to bits [15:0] in the /// destination. \n /// Bits [3:2] are used to assign values to bits [31:16] in the /// destination. \n /// Bits [5:4] are used to assign values to bits [47:32] in the /// destination. \n /// Bits [7:6] are used to assign values to bits [63:48] in the /// destination. \n /// Bit value assignments: \n /// 00: assigned from bits [15:0] of \a a. \n /// 01: assigned from bits [31:16] of \a a. \n /// 10: assigned from bits [47:32] of \a a. \n /// 11: assigned from bits [63:48] of \a a. \n /// Note: To generate a mask, you can use the \c _MM_SHUFFLE macro. /// _MM_SHUFFLE(b6, b4, b2, b0) can create an 8-bit mask of the form /// [b6, b4, b2, b0]. /// \returns A 64-bit integer vector containing the shuffled values. #define _mm_shuffle_pi16(a, n) \ ((__m64)__builtin_ia32_pshufw((__v4hi)(__m64)(a), (n))) /// Conditionally copies the values from each 8-bit element in the first /// 64-bit integer vector operand to the specified memory location, as /// specified by the most significant bit in the corresponding element in the /// second 64-bit integer vector operand. /// /// To minimize caching, the data is flagged as non-temporal /// (unlikely to be used again soon). /// /// \headerfile /// /// This intrinsic corresponds to the MASKMOVQ instruction. /// /// \param __d /// A 64-bit integer vector containing the values with elements to be copied. /// \param __n /// A 64-bit integer vector operand. The most significant bit from each 8-bit /// element determines whether the corresponding element in operand \a __d /// is copied. If the most significant bit of a given element is 1, the /// corresponding element in operand \a __d is copied. /// \param __p /// A pointer to a 64-bit memory location that will receive the conditionally /// copied integer values. The address of the memory location does not have /// to be aligned. static __inline__ void __DEFAULT_FN_ATTRS_MMX _mm_maskmove_si64(__m64 __d, __m64 __n, char *__p) { __builtin_ia32_maskmovq((__v8qi)__d, (__v8qi)__n, __p); } /// Computes the rounded averages of the packed unsigned 8-bit integer /// values and writes the averages to the corresponding bits in the /// destination. /// /// \headerfile /// /// This intrinsic corresponds to the PAVGB instruction. /// /// \param __a /// A 64-bit integer vector containing one of the source operands. /// \param __b /// A 64-bit integer vector containing one of the source operands. /// \returns A 64-bit integer vector containing the averages of both operands. static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_avg_pu8(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_pavgb((__v8qi)__a, (__v8qi)__b); } /// Computes the rounded averages of the packed unsigned 16-bit integer /// values and writes the averages to the corresponding bits in the /// destination. /// /// \headerfile /// /// This intrinsic corresponds to the PAVGW instruction. /// /// \param __a /// A 64-bit integer vector containing one of the source operands. /// \param __b /// A 64-bit integer vector containing one of the source operands. /// \returns A 64-bit integer vector containing the averages of both operands. static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_avg_pu16(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_pavgw((__v4hi)__a, (__v4hi)__b); } /// Subtracts the corresponding 8-bit unsigned integer values of the two /// 64-bit vector operands and computes the absolute value for each of the /// difference. Then sum of the 8 absolute differences is written to the /// bits [15:0] of the destination; the remaining bits [63:16] are cleared. /// /// \headerfile /// /// This intrinsic corresponds to the PSADBW instruction. /// /// \param __a /// A 64-bit integer vector containing one of the source operands. /// \param __b /// A 64-bit integer vector containing one of the source operands. /// \returns A 64-bit integer vector whose lower 16 bits contain the sums of the /// sets of absolute differences between both operands. The upper bits are /// cleared. static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_sad_pu8(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_psadbw((__v8qi)__a, (__v8qi)__b); } #if defined(__cplusplus) extern "C" { #endif /// Returns the contents of the MXCSR register as a 32-bit unsigned /// integer value. /// /// There are several groups of macros associated with this /// intrinsic, including: ///
    ///
  • /// For checking exception states: _MM_EXCEPT_INVALID, _MM_EXCEPT_DIV_ZERO, /// _MM_EXCEPT_DENORM, _MM_EXCEPT_OVERFLOW, _MM_EXCEPT_UNDERFLOW, /// _MM_EXCEPT_INEXACT. There is a convenience wrapper /// _MM_GET_EXCEPTION_STATE(). ///
  • ///
  • /// For checking exception masks: _MM_MASK_UNDERFLOW, _MM_MASK_OVERFLOW, /// _MM_MASK_INVALID, _MM_MASK_DENORM, _MM_MASK_DIV_ZERO, _MM_MASK_INEXACT. /// There is a convenience wrapper _MM_GET_EXCEPTION_MASK(). ///
  • ///
  • /// For checking rounding modes: _MM_ROUND_NEAREST, _MM_ROUND_DOWN, /// _MM_ROUND_UP, _MM_ROUND_TOWARD_ZERO. There is a convenience wrapper /// _MM_GET_ROUNDING_MODE(). ///
  • ///
  • /// For checking flush-to-zero mode: _MM_FLUSH_ZERO_ON, _MM_FLUSH_ZERO_OFF. /// There is a convenience wrapper _MM_GET_FLUSH_ZERO_MODE(). ///
  • ///
  • /// For checking denormals-are-zero mode: _MM_DENORMALS_ZERO_ON, /// _MM_DENORMALS_ZERO_OFF. There is a convenience wrapper /// _MM_GET_DENORMALS_ZERO_MODE(). ///
  • ///
/// /// For example, the following expression checks if an overflow exception has /// occurred: /// \code /// ( _mm_getcsr() & _MM_EXCEPT_OVERFLOW ) /// \endcode /// /// The following expression gets the current rounding mode: /// \code /// _MM_GET_ROUNDING_MODE() /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the VSTMXCSR / STMXCSR instruction. /// /// \returns A 32-bit unsigned integer containing the contents of the MXCSR /// register. unsigned int _mm_getcsr(void); /// Sets the MXCSR register with the 32-bit unsigned integer value. /// /// There are several groups of macros associated with this intrinsic, /// including: ///
    ///
  • /// For setting exception states: _MM_EXCEPT_INVALID, _MM_EXCEPT_DIV_ZERO, /// _MM_EXCEPT_DENORM, _MM_EXCEPT_OVERFLOW, _MM_EXCEPT_UNDERFLOW, /// _MM_EXCEPT_INEXACT. There is a convenience wrapper /// _MM_SET_EXCEPTION_STATE(x) where x is one of these macros. ///
  • ///
  • /// For setting exception masks: _MM_MASK_UNDERFLOW, _MM_MASK_OVERFLOW, /// _MM_MASK_INVALID, _MM_MASK_DENORM, _MM_MASK_DIV_ZERO, _MM_MASK_INEXACT. /// There is a convenience wrapper _MM_SET_EXCEPTION_MASK(x) where x is one /// of these macros. ///
  • ///
  • /// For setting rounding modes: _MM_ROUND_NEAREST, _MM_ROUND_DOWN, /// _MM_ROUND_UP, _MM_ROUND_TOWARD_ZERO. There is a convenience wrapper /// _MM_SET_ROUNDING_MODE(x) where x is one of these macros. ///
  • ///
  • /// For setting flush-to-zero mode: _MM_FLUSH_ZERO_ON, _MM_FLUSH_ZERO_OFF. /// There is a convenience wrapper _MM_SET_FLUSH_ZERO_MODE(x) where x is /// one of these macros. ///
  • ///
  • /// For setting denormals-are-zero mode: _MM_DENORMALS_ZERO_ON, /// _MM_DENORMALS_ZERO_OFF. There is a convenience wrapper /// _MM_SET_DENORMALS_ZERO_MODE(x) where x is one of these macros. ///
  • ///
/// /// For example, the following expression causes subsequent floating-point /// operations to round up: /// _mm_setcsr(_mm_getcsr() | _MM_ROUND_UP) /// /// The following example sets the DAZ and FTZ flags: /// \code /// void setFlags() { /// _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); /// _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON); /// } /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the VLDMXCSR / LDMXCSR instruction. /// /// \param __i /// A 32-bit unsigned integer value to be written to the MXCSR register. void _mm_setcsr(unsigned int __i); #if defined(__cplusplus) } // extern "C" #endif /// Selects 4 float values from the 128-bit operands of [4 x float], as /// specified by the immediate value operand. /// /// \headerfile /// /// \code /// __m128 _mm_shuffle_ps(__m128 a, __m128 b, const int mask); /// \endcode /// /// This intrinsic corresponds to the VSHUFPS / SHUFPS instruction. /// /// \param a /// A 128-bit vector of [4 x float]. /// \param b /// A 128-bit vector of [4 x float]. /// \param mask /// An immediate value containing an 8-bit value specifying which elements to /// copy from \a a and \a b. \n /// Bits [3:0] specify the values copied from operand \a a. \n /// Bits [7:4] specify the values copied from operand \a b. \n /// The destinations within the 128-bit destination are assigned values as /// follows: \n /// Bits [1:0] are used to assign values to bits [31:0] in the /// destination. \n /// Bits [3:2] are used to assign values to bits [63:32] in the /// destination. \n /// Bits [5:4] are used to assign values to bits [95:64] in the /// destination. \n /// Bits [7:6] are used to assign values to bits [127:96] in the /// destination. \n /// Bit value assignments: \n /// 00: Bits [31:0] copied from the specified operand. \n /// 01: Bits [63:32] copied from the specified operand. \n /// 10: Bits [95:64] copied from the specified operand. \n /// 11: Bits [127:96] copied from the specified operand. \n /// Note: To generate a mask, you can use the \c _MM_SHUFFLE macro. /// _MM_SHUFFLE(b6, b4, b2, b0) can create an 8-bit mask of the form /// [b6, b4, b2, b0]. /// \returns A 128-bit vector of [4 x float] containing the shuffled values. #define _mm_shuffle_ps(a, b, mask) \ ((__m128)__builtin_ia32_shufps((__v4sf)(__m128)(a), (__v4sf)(__m128)(b), \ (int)(mask))) /// Unpacks the high-order (index 2,3) values from two 128-bit vectors of /// [4 x float] and interleaves them into a 128-bit vector of [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VUNPCKHPS / UNPCKHPS instruction. /// /// \param __a /// A 128-bit vector of [4 x float]. \n /// Bits [95:64] are written to bits [31:0] of the destination. \n /// Bits [127:96] are written to bits [95:64] of the destination. /// \param __b /// A 128-bit vector of [4 x float]. /// Bits [95:64] are written to bits [63:32] of the destination. \n /// Bits [127:96] are written to bits [127:96] of the destination. /// \returns A 128-bit vector of [4 x float] containing the interleaved values. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_unpackhi_ps(__m128 __a, __m128 __b) { return __builtin_shufflevector((__v4sf)__a, (__v4sf)__b, 2, 6, 3, 7); } /// Unpacks the low-order (index 0,1) values from two 128-bit vectors of /// [4 x float] and interleaves them into a 128-bit vector of [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VUNPCKLPS / UNPCKLPS instruction. /// /// \param __a /// A 128-bit vector of [4 x float]. \n /// Bits [31:0] are written to bits [31:0] of the destination. \n /// Bits [63:32] are written to bits [95:64] of the destination. /// \param __b /// A 128-bit vector of [4 x float]. \n /// Bits [31:0] are written to bits [63:32] of the destination. \n /// Bits [63:32] are written to bits [127:96] of the destination. /// \returns A 128-bit vector of [4 x float] containing the interleaved values. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_unpacklo_ps(__m128 __a, __m128 __b) { return __builtin_shufflevector((__v4sf)__a, (__v4sf)__b, 0, 4, 1, 5); } /// Constructs a 128-bit floating-point vector of [4 x float]. The lower /// 32 bits are set to the lower 32 bits of the second parameter. The upper /// 96 bits are set to the upper 96 bits of the first parameter. /// /// \headerfile /// /// This intrinsic corresponds to the VBLENDPS / BLENDPS / MOVSS /// instruction. /// /// \param __a /// A 128-bit floating-point vector of [4 x float]. The upper 96 bits are /// written to the upper 96 bits of the result. /// \param __b /// A 128-bit floating-point vector of [4 x float]. The lower 32 bits are /// written to the lower 32 bits of the result. /// \returns A 128-bit floating-point vector of [4 x float]. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_move_ss(__m128 __a, __m128 __b) { __a[0] = __b[0]; return __a; } /// Constructs a 128-bit floating-point vector of [4 x float]. The lower /// 64 bits are set to the upper 64 bits of the second parameter. The upper /// 64 bits are set to the upper 64 bits of the first parameter. /// /// \headerfile /// /// This intrinsic corresponds to the VUNPCKHPD / UNPCKHPD instruction. /// /// \param __a /// A 128-bit floating-point vector of [4 x float]. The upper 64 bits are /// written to the upper 64 bits of the result. /// \param __b /// A 128-bit floating-point vector of [4 x float]. The upper 64 bits are /// written to the lower 64 bits of the result. /// \returns A 128-bit floating-point vector of [4 x float]. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_movehl_ps(__m128 __a, __m128 __b) { return __builtin_shufflevector((__v4sf)__a, (__v4sf)__b, 6, 7, 2, 3); } /// Constructs a 128-bit floating-point vector of [4 x float]. The lower /// 64 bits are set to the lower 64 bits of the first parameter. The upper /// 64 bits are set to the lower 64 bits of the second parameter. /// /// \headerfile /// /// This intrinsic corresponds to the VUNPCKLPD / UNPCKLPD instruction. /// /// \param __a /// A 128-bit floating-point vector of [4 x float]. The lower 64 bits are /// written to the lower 64 bits of the result. /// \param __b /// A 128-bit floating-point vector of [4 x float]. The lower 64 bits are /// written to the upper 64 bits of the result. /// \returns A 128-bit floating-point vector of [4 x float]. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_movelh_ps(__m128 __a, __m128 __b) { return __builtin_shufflevector((__v4sf)__a, (__v4sf)__b, 0, 1, 4, 5); } /// Converts a 64-bit vector of [4 x i16] into a 128-bit vector of [4 x /// float]. /// /// \headerfile /// /// This intrinsic corresponds to the CVTPI2PS + COMPOSITE instruction. /// /// \param __a /// A 64-bit vector of [4 x i16]. The elements of the destination are copied /// from the corresponding elements in this operand. /// \returns A 128-bit vector of [4 x float] containing the copied and converted /// values from the operand. static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX _mm_cvtpi16_ps(__m64 __a) { __m64 __b, __c; __m128 __r; __b = _mm_setzero_si64(); __b = _mm_cmpgt_pi16(__b, __a); __c = _mm_unpackhi_pi16(__a, __b); __r = _mm_setzero_ps(); __r = _mm_cvtpi32_ps(__r, __c); __r = _mm_movelh_ps(__r, __r); __c = _mm_unpacklo_pi16(__a, __b); __r = _mm_cvtpi32_ps(__r, __c); return __r; } /// Converts a 64-bit vector of 16-bit unsigned integer values into a /// 128-bit vector of [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the CVTPI2PS + COMPOSITE instruction. /// /// \param __a /// A 64-bit vector of 16-bit unsigned integer values. The elements of the /// destination are copied from the corresponding elements in this operand. /// \returns A 128-bit vector of [4 x float] containing the copied and converted /// values from the operand. static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX _mm_cvtpu16_ps(__m64 __a) { __m64 __b, __c; __m128 __r; __b = _mm_setzero_si64(); __c = _mm_unpackhi_pi16(__a, __b); __r = _mm_setzero_ps(); __r = _mm_cvtpi32_ps(__r, __c); __r = _mm_movelh_ps(__r, __r); __c = _mm_unpacklo_pi16(__a, __b); __r = _mm_cvtpi32_ps(__r, __c); return __r; } /// Converts the lower four 8-bit values from a 64-bit vector of [8 x i8] /// into a 128-bit vector of [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the CVTPI2PS + COMPOSITE instruction. /// /// \param __a /// A 64-bit vector of [8 x i8]. The elements of the destination are copied /// from the corresponding lower 4 elements in this operand. /// \returns A 128-bit vector of [4 x float] containing the copied and converted /// values from the operand. static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX _mm_cvtpi8_ps(__m64 __a) { __m64 __b; __b = _mm_setzero_si64(); __b = _mm_cmpgt_pi8(__b, __a); __b = _mm_unpacklo_pi8(__a, __b); return _mm_cvtpi16_ps(__b); } /// Converts the lower four unsigned 8-bit integer values from a 64-bit /// vector of [8 x u8] into a 128-bit vector of [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the CVTPI2PS + COMPOSITE instruction. /// /// \param __a /// A 64-bit vector of unsigned 8-bit integer values. The elements of the /// destination are copied from the corresponding lower 4 elements in this /// operand. /// \returns A 128-bit vector of [4 x float] containing the copied and converted /// values from the source operand. static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX _mm_cvtpu8_ps(__m64 __a) { __m64 __b; __b = _mm_setzero_si64(); __b = _mm_unpacklo_pi8(__a, __b); return _mm_cvtpi16_ps(__b); } /// Converts the two 32-bit signed integer values from each 64-bit vector /// operand of [2 x i32] into a 128-bit vector of [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the CVTPI2PS + COMPOSITE instruction. /// /// \param __a /// A 64-bit vector of [2 x i32]. The lower elements of the destination are /// copied from the elements in this operand. /// \param __b /// A 64-bit vector of [2 x i32]. The upper elements of the destination are /// copied from the elements in this operand. /// \returns A 128-bit vector of [4 x float] whose lower 64 bits contain the /// copied and converted values from the first operand. The upper 64 bits /// contain the copied and converted values from the second operand. static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX _mm_cvtpi32x2_ps(__m64 __a, __m64 __b) { __m128 __c; __c = _mm_setzero_ps(); __c = _mm_cvtpi32_ps(__c, __b); __c = _mm_movelh_ps(__c, __c); return _mm_cvtpi32_ps(__c, __a); } /// Converts each single-precision floating-point element of a 128-bit /// floating-point vector of [4 x float] into a 16-bit signed integer, and /// packs the results into a 64-bit integer vector of [4 x i16]. /// /// If the floating-point element is NaN or infinity, or if the /// floating-point element is greater than 0x7FFFFFFF or less than -0x8000, /// it is converted to 0x8000. Otherwise if the floating-point element is /// greater than 0x7FFF, it is converted to 0x7FFF. /// /// \headerfile /// /// This intrinsic corresponds to the CVTPS2PI + COMPOSITE instruction. /// /// \param __a /// A 128-bit floating-point vector of [4 x float]. /// \returns A 64-bit integer vector of [4 x i16] containing the converted /// values. static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_cvtps_pi16(__m128 __a) { __m64 __b, __c; __b = _mm_cvtps_pi32(__a); __a = _mm_movehl_ps(__a, __a); __c = _mm_cvtps_pi32(__a); return _mm_packs_pi32(__b, __c); } /// Converts each single-precision floating-point element of a 128-bit /// floating-point vector of [4 x float] into an 8-bit signed integer, and /// packs the results into the lower 32 bits of a 64-bit integer vector of /// [8 x i8]. The upper 32 bits of the vector are set to 0. /// /// If the floating-point element is NaN or infinity, or if the /// floating-point element is greater than 0x7FFFFFFF or less than -0x80, it /// is converted to 0x80. Otherwise if the floating-point element is greater /// than 0x7F, it is converted to 0x7F. /// /// \headerfile /// /// This intrinsic corresponds to the CVTPS2PI + COMPOSITE instruction. /// /// \param __a /// 128-bit floating-point vector of [4 x float]. /// \returns A 64-bit integer vector of [8 x i8]. The lower 32 bits contain the /// converted values and the uppper 32 bits are set to zero. static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_cvtps_pi8(__m128 __a) { __m64 __b, __c; __b = _mm_cvtps_pi16(__a); __c = _mm_setzero_si64(); return _mm_packs_pi16(__b, __c); } /// Extracts the sign bits from each single-precision floating-point /// element of a 128-bit floating-point vector of [4 x float] and returns the /// sign bits in bits [0:3] of the result. Bits [31:4] of the result are set /// to zero. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVMSKPS / MOVMSKPS instruction. /// /// \param __a /// A 128-bit floating-point vector of [4 x float]. /// \returns A 32-bit integer value. Bits [3:0] contain the sign bits from each /// single-precision floating-point element of the parameter. Bits [31:4] are /// set to zero. static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_ps(__m128 __a) { return __builtin_ia32_movmskps((__v4sf)__a); } /* Compare */ #define _CMP_EQ_OQ 0x00 /* Equal (ordered, non-signaling) */ #define _CMP_LT_OS 0x01 /* Less-than (ordered, signaling) */ #define _CMP_LE_OS 0x02 /* Less-than-or-equal (ordered, signaling) */ #define _CMP_UNORD_Q 0x03 /* Unordered (non-signaling) */ #define _CMP_NEQ_UQ 0x04 /* Not-equal (unordered, non-signaling) */ #define _CMP_NLT_US 0x05 /* Not-less-than (unordered, signaling) */ #define _CMP_NLE_US 0x06 /* Not-less-than-or-equal (unordered, signaling) */ #define _CMP_ORD_Q 0x07 /* Ordered (non-signaling) */ /// Compares each of the corresponding values of two 128-bit vectors of /// [4 x float], using the operation specified by the immediate integer /// operand. /// /// Each comparison yields 0x0 for false, 0xFFFFFFFF for true. /// /// \headerfile /// /// \code /// __m128 _mm_cmp_ps(__m128 a, __m128 b, const int c); /// \endcode /// /// This intrinsic corresponds to the (V)CMPPS instruction. /// /// \param a /// A 128-bit vector of [4 x float]. /// \param b /// A 128-bit vector of [4 x float]. /// \param c /// An immediate integer operand, with bits [4:0] specifying which comparison /// operation to use: \n /// 0x00: Equal (ordered, non-signaling) \n /// 0x01: Less-than (ordered, signaling) \n /// 0x02: Less-than-or-equal (ordered, signaling) \n /// 0x03: Unordered (non-signaling) \n /// 0x04: Not-equal (unordered, non-signaling) \n /// 0x05: Not-less-than (unordered, signaling) \n /// 0x06: Not-less-than-or-equal (unordered, signaling) \n /// 0x07: Ordered (non-signaling) \n /// \returns A 128-bit vector of [4 x float] containing the comparison results. #define _mm_cmp_ps(a, b, c) \ ((__m128)__builtin_ia32_cmpps((__v4sf)(__m128)(a), (__v4sf)(__m128)(b), (c))) /// Compares each of the corresponding scalar values of two 128-bit /// vectors of [4 x float], using the operation specified by the immediate /// integer operand. /// /// Each comparison yields 0x0 for false, 0xFFFFFFFF for true. /// /// \headerfile /// /// \code /// __m128 _mm_cmp_ss(__m128 a, __m128 b, const int c); /// \endcode /// /// This intrinsic corresponds to the (V)CMPSS instruction. /// /// \param a /// A 128-bit vector of [4 x float]. /// \param b /// A 128-bit vector of [4 x float]. /// \param c /// An immediate integer operand, with bits [4:0] specifying which comparison /// operation to use: \n /// 0x00: Equal (ordered, non-signaling) \n /// 0x01: Less-than (ordered, signaling) \n /// 0x02: Less-than-or-equal (ordered, signaling) \n /// 0x03: Unordered (non-signaling) \n /// 0x04: Not-equal (unordered, non-signaling) \n /// 0x05: Not-less-than (unordered, signaling) \n /// 0x06: Not-less-than-or-equal (unordered, signaling) \n /// 0x07: Ordered (non-signaling) \n /// \returns A 128-bit vector of [4 x float] containing the comparison results. #define _mm_cmp_ss(a, b, c) \ ((__m128)__builtin_ia32_cmpss((__v4sf)(__m128)(a), (__v4sf)(__m128)(b), (c))) #define _MM_ALIGN16 __attribute__((aligned(16))) #define _MM_SHUFFLE(z, y, x, w) (((z) << 6) | ((y) << 4) | ((x) << 2) | (w)) #define _MM_EXCEPT_INVALID (0x0001U) #define _MM_EXCEPT_DENORM (0x0002U) #define _MM_EXCEPT_DIV_ZERO (0x0004U) #define _MM_EXCEPT_OVERFLOW (0x0008U) #define _MM_EXCEPT_UNDERFLOW (0x0010U) #define _MM_EXCEPT_INEXACT (0x0020U) #define _MM_EXCEPT_MASK (0x003fU) #define _MM_MASK_INVALID (0x0080U) #define _MM_MASK_DENORM (0x0100U) #define _MM_MASK_DIV_ZERO (0x0200U) #define _MM_MASK_OVERFLOW (0x0400U) #define _MM_MASK_UNDERFLOW (0x0800U) #define _MM_MASK_INEXACT (0x1000U) #define _MM_MASK_MASK (0x1f80U) #define _MM_ROUND_NEAREST (0x0000U) #define _MM_ROUND_DOWN (0x2000U) #define _MM_ROUND_UP (0x4000U) #define _MM_ROUND_TOWARD_ZERO (0x6000U) #define _MM_ROUND_MASK (0x6000U) #define _MM_FLUSH_ZERO_MASK (0x8000U) #define _MM_FLUSH_ZERO_ON (0x8000U) #define _MM_FLUSH_ZERO_OFF (0x0000U) #define _MM_GET_EXCEPTION_MASK() (_mm_getcsr() & _MM_MASK_MASK) #define _MM_GET_EXCEPTION_STATE() (_mm_getcsr() & _MM_EXCEPT_MASK) #define _MM_GET_FLUSH_ZERO_MODE() (_mm_getcsr() & _MM_FLUSH_ZERO_MASK) #define _MM_GET_ROUNDING_MODE() (_mm_getcsr() & _MM_ROUND_MASK) #define _MM_SET_EXCEPTION_MASK(x) (_mm_setcsr((_mm_getcsr() & ~_MM_MASK_MASK) | (x))) #define _MM_SET_EXCEPTION_STATE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_EXCEPT_MASK) | (x))) #define _MM_SET_FLUSH_ZERO_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_FLUSH_ZERO_MASK) | (x))) #define _MM_SET_ROUNDING_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_ROUND_MASK) | (x))) #define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ do { \ __m128 tmp3, tmp2, tmp1, tmp0; \ tmp0 = _mm_unpacklo_ps((row0), (row1)); \ tmp2 = _mm_unpacklo_ps((row2), (row3)); \ tmp1 = _mm_unpackhi_ps((row0), (row1)); \ tmp3 = _mm_unpackhi_ps((row2), (row3)); \ (row0) = _mm_movelh_ps(tmp0, tmp2); \ (row1) = _mm_movehl_ps(tmp2, tmp0); \ (row2) = _mm_movelh_ps(tmp1, tmp3); \ (row3) = _mm_movehl_ps(tmp3, tmp1); \ } while (0) /* Aliases for compatibility. */ #define _m_pextrw _mm_extract_pi16 #define _m_pinsrw _mm_insert_pi16 #define _m_pmaxsw _mm_max_pi16 #define _m_pmaxub _mm_max_pu8 #define _m_pminsw _mm_min_pi16 #define _m_pminub _mm_min_pu8 #define _m_pmovmskb _mm_movemask_pi8 #define _m_pmulhuw _mm_mulhi_pu16 #define _m_pshufw _mm_shuffle_pi16 #define _m_maskmovq _mm_maskmove_si64 #define _m_pavgb _mm_avg_pu8 #define _m_pavgw _mm_avg_pu16 #define _m_psadbw _mm_sad_pu8 #define _m_ _mm_ #undef __DEFAULT_FN_ATTRS #undef __DEFAULT_FN_ATTRS_MMX /* Ugly hack for backwards-compatibility (compatible with gcc) */ #if defined(__SSE2__) && !__building_module(_Builtin_intrinsics) #include #endif #endif /* __XMMINTRIN_H */ /*- * Copyright (c) 2011 Ed Schouten * David Chisnall * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #pragma once #include #include #include /* * C: Do it ourselves. * Note that the runtime representation defined here should be compatible * with the C++ one, i.e. an _Atomic(T) needs to contain the same * bits as a T. */ #include /* For ptrdiff_t. */ #include // Include uchar.h only when available. Bionic's stdatomic.h is also used for // the host (via a copy in prebuilts/clang) and uchar.h is not available in the // glibc used for the host. #if defined(__BIONIC__) # include /* For char16_t and char32_t. */ #endif /* * 7.17.1 Atomic lock-free macros. */ #ifdef __GCC_ATOMIC_BOOL_LOCK_FREE #define ATOMIC_BOOL_LOCK_FREE __GCC_ATOMIC_BOOL_LOCK_FREE #endif #ifdef __GCC_ATOMIC_CHAR_LOCK_FREE #define ATOMIC_CHAR_LOCK_FREE __GCC_ATOMIC_CHAR_LOCK_FREE #endif #ifdef __GCC_ATOMIC_CHAR16_T_LOCK_FREE #define ATOMIC_CHAR16_T_LOCK_FREE __GCC_ATOMIC_CHAR16_T_LOCK_FREE #endif #ifdef __GCC_ATOMIC_CHAR32_T_LOCK_FREE #define ATOMIC_CHAR32_T_LOCK_FREE __GCC_ATOMIC_CHAR32_T_LOCK_FREE #endif #ifdef __GCC_ATOMIC_WCHAR_T_LOCK_FREE #define ATOMIC_WCHAR_T_LOCK_FREE __GCC_ATOMIC_WCHAR_T_LOCK_FREE #endif #ifdef __GCC_ATOMIC_SHORT_LOCK_FREE #define ATOMIC_SHORT_LOCK_FREE __GCC_ATOMIC_SHORT_LOCK_FREE #endif #ifdef __GCC_ATOMIC_INT_LOCK_FREE #define ATOMIC_INT_LOCK_FREE __GCC_ATOMIC_INT_LOCK_FREE #endif #ifdef __GCC_ATOMIC_LONG_LOCK_FREE #define ATOMIC_LONG_LOCK_FREE __GCC_ATOMIC_LONG_LOCK_FREE #endif #ifdef __GCC_ATOMIC_LLONG_LOCK_FREE #define ATOMIC_LLONG_LOCK_FREE __GCC_ATOMIC_LLONG_LOCK_FREE #endif #ifdef __GCC_ATOMIC_POINTER_LOCK_FREE #define ATOMIC_POINTER_LOCK_FREE __GCC_ATOMIC_POINTER_LOCK_FREE #endif /* * 7.17.2 Initialization. */ #define ATOMIC_VAR_INIT(value) (value) #define atomic_init(obj, value) __c11_atomic_init(obj, value) /* * Clang and recent GCC both provide predefined macros for the memory * orderings. If we are using a compiler that doesn't define them, use the * clang values - these will be ignored in the fallback path. */ #ifndef __ATOMIC_RELAXED #define __ATOMIC_RELAXED 0 #endif #ifndef __ATOMIC_CONSUME #define __ATOMIC_CONSUME 1 #endif #ifndef __ATOMIC_ACQUIRE #define __ATOMIC_ACQUIRE 2 #endif #ifndef __ATOMIC_RELEASE #define __ATOMIC_RELEASE 3 #endif #ifndef __ATOMIC_ACQ_REL #define __ATOMIC_ACQ_REL 4 #endif #ifndef __ATOMIC_SEQ_CST #define __ATOMIC_SEQ_CST 5 #endif /* * 7.17.3 Order and consistency. * * The memory_order_* constants that denote the barrier behaviour of the * atomic operations. * The enum values must be identical to those used by the * C++ header. */ typedef enum { memory_order_relaxed = __ATOMIC_RELAXED, memory_order_consume = __ATOMIC_CONSUME, memory_order_acquire = __ATOMIC_ACQUIRE, memory_order_release = __ATOMIC_RELEASE, memory_order_acq_rel = __ATOMIC_ACQ_REL, memory_order_seq_cst = __ATOMIC_SEQ_CST } memory_order; /* * 7.17.4 Fences. */ static __inline void atomic_thread_fence(memory_order __order __attribute__((__unused__))) { __c11_atomic_thread_fence(__order); } static __inline void atomic_signal_fence(memory_order __order __attribute__((__unused__))) { __c11_atomic_signal_fence(__order); } /* * 7.17.5 Lock-free property. */ #define atomic_is_lock_free(obj) __c11_atomic_is_lock_free(sizeof(*(obj))) /* * 7.17.6 Atomic integer types. */ typedef _Atomic(bool) atomic_bool; typedef _Atomic(char) atomic_char; typedef _Atomic(signed char) atomic_schar; typedef _Atomic(unsigned char) atomic_uchar; typedef _Atomic(short) atomic_short; typedef _Atomic(unsigned short) atomic_ushort; typedef _Atomic(int) atomic_int; typedef _Atomic(unsigned int) atomic_uint; typedef _Atomic(long) atomic_long; typedef _Atomic(unsigned long) atomic_ulong; typedef _Atomic(long long) atomic_llong; typedef _Atomic(unsigned long long) atomic_ullong; #if defined(__BIONIC__) || (defined(__cplusplus) && __cplusplus >= 201103L) typedef _Atomic(char16_t) atomic_char16_t; typedef _Atomic(char32_t) atomic_char32_t; #endif typedef _Atomic(wchar_t) atomic_wchar_t; typedef _Atomic(int_least8_t) atomic_int_least8_t; typedef _Atomic(uint_least8_t) atomic_uint_least8_t; typedef _Atomic(int_least16_t) atomic_int_least16_t; typedef _Atomic(uint_least16_t) atomic_uint_least16_t; typedef _Atomic(int_least32_t) atomic_int_least32_t; typedef _Atomic(uint_least32_t) atomic_uint_least32_t; typedef _Atomic(int_least64_t) atomic_int_least64_t; typedef _Atomic(uint_least64_t) atomic_uint_least64_t; typedef _Atomic(int_fast8_t) atomic_int_fast8_t; typedef _Atomic(uint_fast8_t) atomic_uint_fast8_t; typedef _Atomic(int_fast16_t) atomic_int_fast16_t; typedef _Atomic(uint_fast16_t) atomic_uint_fast16_t; typedef _Atomic(int_fast32_t) atomic_int_fast32_t; typedef _Atomic(uint_fast32_t) atomic_uint_fast32_t; typedef _Atomic(int_fast64_t) atomic_int_fast64_t; typedef _Atomic(uint_fast64_t) atomic_uint_fast64_t; typedef _Atomic(intptr_t) atomic_intptr_t; typedef _Atomic(uintptr_t) atomic_uintptr_t; typedef _Atomic(size_t) atomic_size_t; typedef _Atomic(ptrdiff_t) atomic_ptrdiff_t; typedef _Atomic(intmax_t) atomic_intmax_t; typedef _Atomic(uintmax_t) atomic_uintmax_t; /* * 7.17.7 Operations on atomic types. */ /* * Compiler-specific operations. */ #define atomic_compare_exchange_strong_explicit(object, expected, \ desired, success, failure) \ __c11_atomic_compare_exchange_strong(object, expected, desired, \ success, failure) #define atomic_compare_exchange_weak_explicit(object, expected, \ desired, success, failure) \ __c11_atomic_compare_exchange_weak(object, expected, desired, \ success, failure) #define atomic_exchange_explicit(object, desired, order) \ __c11_atomic_exchange(object, desired, order) #define atomic_fetch_add_explicit(object, operand, order) \ __c11_atomic_fetch_add(object, operand, order) #define atomic_fetch_and_explicit(object, operand, order) \ __c11_atomic_fetch_and(object, operand, order) #define atomic_fetch_or_explicit(object, operand, order) \ __c11_atomic_fetch_or(object, operand, order) #define atomic_fetch_sub_explicit(object, operand, order) \ __c11_atomic_fetch_sub(object, operand, order) #define atomic_fetch_xor_explicit(object, operand, order) \ __c11_atomic_fetch_xor(object, operand, order) #define atomic_load_explicit(object, order) \ __c11_atomic_load(object, order) #define atomic_store_explicit(object, desired, order) \ __c11_atomic_store(object, desired, order) /* * Convenience functions. */ #define atomic_compare_exchange_strong(object, expected, desired) \ atomic_compare_exchange_strong_explicit(object, expected, \ desired, memory_order_seq_cst, memory_order_seq_cst) #define atomic_compare_exchange_weak(object, expected, desired) \ atomic_compare_exchange_weak_explicit(object, expected, \ desired, memory_order_seq_cst, memory_order_seq_cst) #define atomic_exchange(object, desired) \ atomic_exchange_explicit(object, desired, memory_order_seq_cst) #define atomic_fetch_add(object, operand) \ atomic_fetch_add_explicit(object, operand, memory_order_seq_cst) #define atomic_fetch_and(object, operand) \ atomic_fetch_and_explicit(object, operand, memory_order_seq_cst) #define atomic_fetch_or(object, operand) \ atomic_fetch_or_explicit(object, operand, memory_order_seq_cst) #define atomic_fetch_sub(object, operand) \ atomic_fetch_sub_explicit(object, operand, memory_order_seq_cst) #define atomic_fetch_xor(object, operand) \ atomic_fetch_xor_explicit(object, operand, memory_order_seq_cst) #define atomic_load(object) \ atomic_load_explicit(object, memory_order_seq_cst) #define atomic_store(object, desired) \ atomic_store_explicit(object, desired, memory_order_seq_cst) /* * 7.17.8 Atomic flag type and operations. * * XXX: Assume atomic_bool can be used as an atomic_flag. Is there some * kind of compiler built-in type we could use? */ typedef struct { atomic_bool __flag; } atomic_flag; #define ATOMIC_FLAG_INIT { ATOMIC_VAR_INIT(false) } static __inline bool atomic_flag_test_and_set_explicit(volatile atomic_flag * _Nonnull __object, memory_order __order) { return (atomic_exchange_explicit(&__object->__flag, 1, __order)); } static __inline void atomic_flag_clear_explicit(volatile atomic_flag * _Nonnull __object, memory_order __order) { atomic_store_explicit(&__object->__flag, 0, __order); } static __inline bool atomic_flag_test_and_set(volatile atomic_flag * _Nonnull __object) { return (atomic_flag_test_and_set_explicit(__object, memory_order_seq_cst)); } static __inline void atomic_flag_clear(volatile atomic_flag * _Nonnull __object) { atomic_flag_clear_explicit(__object, memory_order_seq_cst); } llvm_libc_wrappers/ctype.hopenmp_wrappers/complex.hInvalid value.Invalid escape character in string.external/kythe/kythe/cxx/common/json_proto.ccKYTHE_KZIP_ENCODINGunknown round: byte%s [%02x-%02x] -> %d in Prog::Fanout()Bad reference count Malformed repeat {%d,}\tDuployanLepchaMeetei_MayekN[%s : %d] RAW: unexpected p_typeempty arena has non-page-aligned blockwDdstoperator co^=<=sizeofindex >= 0 && index < ehdr_->e_phnumexternal/abseil-cpp/absl/status/statusor.cc\U must be followed by 8 hex digits: \tree->begin() < tree->capacity()tree->end() <= tree->capacity()nanLock blocking TZifexternal/boringssl/src/crypto/fipsmodule/bn/ctx.cDRBG Clear KATFFC DHDSA routinesEC_LIBMALLOC_FAILURE2.ab+external/boringssl/src/crypto/dsa/dsa_asn1.ckythe.proto.KzipInfo.CorporaEntry.key-cCouldn't reopen /buildUser entry (Afterbad_function_call was thrown in -fno-exceptions mode/*===---- __stdarg___va_copy.h - Definition of __va_copy -------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __va_copy #define __va_copy(d, s) __builtin_va_copy(d, s) #endif __stdarg_va_arg.h/*===----------- avx512fp16intrin.h - AVX512-FP16 intrinsics ---------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifdef __SSE2__ #ifndef __AVX512FP16INTRIN_H #define __AVX512FP16INTRIN_H /* Define the default attributes for the functions in this file. */ typedef _Float16 __v32hf __attribute__((__vector_size__(64), __aligned__(64))); typedef _Float16 __m512h __attribute__((__vector_size__(64), __aligned__(64))); typedef _Float16 __m512h_u __attribute__((__vector_size__(64), __aligned__(1))); /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS512 \ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512fp16,evex512"), __min_vector_width__(512))) #define __DEFAULT_FN_ATTRS256 \ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512fp16,no-evex512"), \ __min_vector_width__(256))) #define __DEFAULT_FN_ATTRS128 \ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512fp16,no-evex512"), \ __min_vector_width__(128))) static __inline__ _Float16 __DEFAULT_FN_ATTRS512 _mm512_cvtsh_h(__m512h __a) { return __a[0]; } static __inline __m128h __DEFAULT_FN_ATTRS128 _mm_setzero_ph(void) { return (__m128h){0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}; } static __inline __m256h __DEFAULT_FN_ATTRS256 _mm256_setzero_ph(void) { return (__m256h){0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}; } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_undefined_ph(void) { return (__m256h)__builtin_ia32_undef256(); } static __inline __m512h __DEFAULT_FN_ATTRS512 _mm512_setzero_ph(void) { return (__m512h){0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}; } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_undefined_ph(void) { return (__m128h)__builtin_ia32_undef128(); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_undefined_ph(void) { return (__m512h)__builtin_ia32_undef512(); } static __inline __m512h __DEFAULT_FN_ATTRS512 _mm512_set1_ph(_Float16 __h) { return (__m512h)(__v32hf){__h, __h, __h, __h, __h, __h, __h, __h, __h, __h, __h, __h, __h, __h, __h, __h, __h, __h, __h, __h, __h, __h, __h, __h, __h, __h, __h, __h, __h, __h, __h, __h}; } static __inline __m512h __DEFAULT_FN_ATTRS512 _mm512_set_ph(_Float16 __h1, _Float16 __h2, _Float16 __h3, _Float16 __h4, _Float16 __h5, _Float16 __h6, _Float16 __h7, _Float16 __h8, _Float16 __h9, _Float16 __h10, _Float16 __h11, _Float16 __h12, _Float16 __h13, _Float16 __h14, _Float16 __h15, _Float16 __h16, _Float16 __h17, _Float16 __h18, _Float16 __h19, _Float16 __h20, _Float16 __h21, _Float16 __h22, _Float16 __h23, _Float16 __h24, _Float16 __h25, _Float16 __h26, _Float16 __h27, _Float16 __h28, _Float16 __h29, _Float16 __h30, _Float16 __h31, _Float16 __h32) { return (__m512h)(__v32hf){__h32, __h31, __h30, __h29, __h28, __h27, __h26, __h25, __h24, __h23, __h22, __h21, __h20, __h19, __h18, __h17, __h16, __h15, __h14, __h13, __h12, __h11, __h10, __h9, __h8, __h7, __h6, __h5, __h4, __h3, __h2, __h1}; } #define _mm512_setr_ph(h1, h2, h3, h4, h5, h6, h7, h8, h9, h10, h11, h12, h13, \ h14, h15, h16, h17, h18, h19, h20, h21, h22, h23, h24, \ h25, h26, h27, h28, h29, h30, h31, h32) \ _mm512_set_ph((h32), (h31), (h30), (h29), (h28), (h27), (h26), (h25), (h24), \ (h23), (h22), (h21), (h20), (h19), (h18), (h17), (h16), (h15), \ (h14), (h13), (h12), (h11), (h10), (h9), (h8), (h7), (h6), \ (h5), (h4), (h3), (h2), (h1)) static __inline __m512h __DEFAULT_FN_ATTRS512 _mm512_set1_pch(_Float16 _Complex h) { return (__m512h)_mm512_set1_ps(__builtin_bit_cast(float, h)); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_castph_ps(__m128h __a) { return (__m128)__a; } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_castph_ps(__m256h __a) { return (__m256)__a; } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_castph_ps(__m512h __a) { return (__m512)__a; } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_castph_pd(__m128h __a) { return (__m128d)__a; } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_castph_pd(__m256h __a) { return (__m256d)__a; } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_castph_pd(__m512h __a) { return (__m512d)__a; } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_castph_si128(__m128h __a) { return (__m128i)__a; } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_castph_si256(__m256h __a) { return (__m256i)__a; } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_castph_si512(__m512h __a) { return (__m512i)__a; } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_castps_ph(__m128 __a) { return (__m128h)__a; } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_castps_ph(__m256 __a) { return (__m256h)__a; } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_castps_ph(__m512 __a) { return (__m512h)__a; } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_castpd_ph(__m128d __a) { return (__m128h)__a; } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_castpd_ph(__m256d __a) { return (__m256h)__a; } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_castpd_ph(__m512d __a) { return (__m512h)__a; } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_castsi128_ph(__m128i __a) { return (__m128h)__a; } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_castsi256_ph(__m256i __a) { return (__m256h)__a; } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_castsi512_ph(__m512i __a) { return (__m512h)__a; } static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_castph256_ph128(__m256h __a) { return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7); } static __inline__ __m128h __DEFAULT_FN_ATTRS512 _mm512_castph512_ph128(__m512h __a) { return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7); } static __inline__ __m256h __DEFAULT_FN_ATTRS512 _mm512_castph512_ph256(__m512h __a) { return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_castph128_ph256(__m128h __a) { return __builtin_shufflevector(__a, __builtin_nondeterministic_value(__a), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_castph128_ph512(__m128h __a) { __m256h __b = __builtin_nondeterministic_value(__b); return __builtin_shufflevector( __builtin_shufflevector(__a, __builtin_nondeterministic_value(__a), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15), __b, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_castph256_ph512(__m256h __a) { return __builtin_shufflevector(__a, __builtin_nondeterministic_value(__a), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); } /// Constructs a 256-bit floating-point vector of [16 x half] from a /// 128-bit floating-point vector of [8 x half]. The lower 128 bits /// contain the value of the source vector. The upper 384 bits are set /// to zero. /// /// \headerfile /// /// This intrinsic has no corresponding instruction. /// /// \param __a /// A 128-bit vector of [8 x half]. /// \returns A 512-bit floating-point vector of [16 x half]. The lower 128 bits /// contain the value of the parameter. The upper 384 bits are set to zero. static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_zextph128_ph256(__m128h __a) { return __builtin_shufflevector(__a, (__v8hf)_mm_setzero_ph(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); } /// Constructs a 512-bit floating-point vector of [32 x half] from a /// 128-bit floating-point vector of [8 x half]. The lower 128 bits /// contain the value of the source vector. The upper 384 bits are set /// to zero. /// /// \headerfile /// /// This intrinsic has no corresponding instruction. /// /// \param __a /// A 128-bit vector of [8 x half]. /// \returns A 512-bit floating-point vector of [32 x half]. The lower 128 bits /// contain the value of the parameter. The upper 384 bits are set to zero. static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_zextph128_ph512(__m128h __a) { return __builtin_shufflevector( __a, (__v8hf)_mm_setzero_ph(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15); } /// Constructs a 512-bit floating-point vector of [32 x half] from a /// 256-bit floating-point vector of [16 x half]. The lower 256 bits /// contain the value of the source vector. The upper 256 bits are set /// to zero. /// /// \headerfile /// /// This intrinsic has no corresponding instruction. /// /// \param __a /// A 256-bit vector of [16 x half]. /// \returns A 512-bit floating-point vector of [32 x half]. The lower 256 bits /// contain the value of the parameter. The upper 256 bits are set to zero. static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_zextph256_ph512(__m256h __a) { return __builtin_shufflevector(__a, (__v16hf)_mm256_setzero_ph(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); } #define _mm_comi_round_sh(A, B, P, R) \ __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, (int)(P), (int)(R)) #define _mm_comi_sh(A, B, pred) \ _mm_comi_round_sh((A), (B), (pred), _MM_FROUND_CUR_DIRECTION) static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comieq_sh(__m128h A, __m128h B) { return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_EQ_OS, _MM_FROUND_CUR_DIRECTION); } static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comilt_sh(__m128h A, __m128h B) { return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_LT_OS, _MM_FROUND_CUR_DIRECTION); } static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comile_sh(__m128h A, __m128h B) { return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_LE_OS, _MM_FROUND_CUR_DIRECTION); } static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comigt_sh(__m128h A, __m128h B) { return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_GT_OS, _MM_FROUND_CUR_DIRECTION); } static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comige_sh(__m128h A, __m128h B) { return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_GE_OS, _MM_FROUND_CUR_DIRECTION); } static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comineq_sh(__m128h A, __m128h B) { return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_NEQ_US, _MM_FROUND_CUR_DIRECTION); } static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomieq_sh(__m128h A, __m128h B) { return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_EQ_OQ, _MM_FROUND_CUR_DIRECTION); } static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomilt_sh(__m128h A, __m128h B) { return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_LT_OQ, _MM_FROUND_CUR_DIRECTION); } static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomile_sh(__m128h A, __m128h B) { return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_LE_OQ, _MM_FROUND_CUR_DIRECTION); } static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomigt_sh(__m128h A, __m128h B) { return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_GT_OQ, _MM_FROUND_CUR_DIRECTION); } static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomige_sh(__m128h A, __m128h B) { return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_GE_OQ, _MM_FROUND_CUR_DIRECTION); } static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomineq_sh(__m128h A, __m128h B) { return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_NEQ_UQ, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_add_ph(__m512h __A, __m512h __B) { return (__m512h)((__v32hf)__A + (__v32hf)__B); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mask_add_ph(__m512h __W, __mmask32 __U, __m512h __A, __m512h __B) { return (__m512h)__builtin_ia32_selectph_512( (__mmask32)__U, (__v32hf)_mm512_add_ph(__A, __B), (__v32hf)__W); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_maskz_add_ph(__mmask32 __U, __m512h __A, __m512h __B) { return (__m512h)__builtin_ia32_selectph_512((__mmask32)__U, (__v32hf)_mm512_add_ph(__A, __B), (__v32hf)_mm512_setzero_ph()); } #define _mm512_add_round_ph(A, B, R) \ ((__m512h)__builtin_ia32_addph512((__v32hf)(__m512h)(A), \ (__v32hf)(__m512h)(B), (int)(R))) #define _mm512_mask_add_round_ph(W, U, A, B, R) \ ((__m512h)__builtin_ia32_selectph_512( \ (__mmask32)(U), (__v32hf)_mm512_add_round_ph((A), (B), (R)), \ (__v32hf)(__m512h)(W))) #define _mm512_maskz_add_round_ph(U, A, B, R) \ ((__m512h)__builtin_ia32_selectph_512( \ (__mmask32)(U), (__v32hf)_mm512_add_round_ph((A), (B), (R)), \ (__v32hf)_mm512_setzero_ph())) static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_sub_ph(__m512h __A, __m512h __B) { return (__m512h)((__v32hf)__A - (__v32hf)__B); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mask_sub_ph(__m512h __W, __mmask32 __U, __m512h __A, __m512h __B) { return (__m512h)__builtin_ia32_selectph_512( (__mmask32)__U, (__v32hf)_mm512_sub_ph(__A, __B), (__v32hf)__W); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_maskz_sub_ph(__mmask32 __U, __m512h __A, __m512h __B) { return (__m512h)__builtin_ia32_selectph_512((__mmask32)__U, (__v32hf)_mm512_sub_ph(__A, __B), (__v32hf)_mm512_setzero_ph()); } #define _mm512_sub_round_ph(A, B, R) \ ((__m512h)__builtin_ia32_subph512((__v32hf)(__m512h)(A), \ (__v32hf)(__m512h)(B), (int)(R))) #define _mm512_mask_sub_round_ph(W, U, A, B, R) \ ((__m512h)__builtin_ia32_selectph_512( \ (__mmask32)(U), (__v32hf)_mm512_sub_round_ph((A), (B), (R)), \ (__v32hf)(__m512h)(W))) #define _mm512_maskz_sub_round_ph(U, A, B, R) \ ((__m512h)__builtin_ia32_selectph_512( \ (__mmask32)(U), (__v32hf)_mm512_sub_round_ph((A), (B), (R)), \ (__v32hf)_mm512_setzero_ph())) static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mul_ph(__m512h __A, __m512h __B) { return (__m512h)((__v32hf)__A * (__v32hf)__B); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mask_mul_ph(__m512h __W, __mmask32 __U, __m512h __A, __m512h __B) { return (__m512h)__builtin_ia32_selectph_512( (__mmask32)__U, (__v32hf)_mm512_mul_ph(__A, __B), (__v32hf)__W); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_maskz_mul_ph(__mmask32 __U, __m512h __A, __m512h __B) { return (__m512h)__builtin_ia32_selectph_512((__mmask32)__U, (__v32hf)_mm512_mul_ph(__A, __B), (__v32hf)_mm512_setzero_ph()); } #define _mm512_mul_round_ph(A, B, R) \ ((__m512h)__builtin_ia32_mulph512((__v32hf)(__m512h)(A), \ (__v32hf)(__m512h)(B), (int)(R))) #define _mm512_mask_mul_round_ph(W, U, A, B, R) \ ((__m512h)__builtin_ia32_selectph_512( \ (__mmask32)(U), (__v32hf)_mm512_mul_round_ph((A), (B), (R)), \ (__v32hf)(__m512h)(W))) #define _mm512_maskz_mul_round_ph(U, A, B, R) \ ((__m512h)__builtin_ia32_selectph_512( \ (__mmask32)(U), (__v32hf)_mm512_mul_round_ph((A), (B), (R)), \ (__v32hf)_mm512_setzero_ph())) static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_div_ph(__m512h __A, __m512h __B) { return (__m512h)((__v32hf)__A / (__v32hf)__B); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mask_div_ph(__m512h __W, __mmask32 __U, __m512h __A, __m512h __B) { return (__m512h)__builtin_ia32_selectph_512( (__mmask32)__U, (__v32hf)_mm512_div_ph(__A, __B), (__v32hf)__W); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_maskz_div_ph(__mmask32 __U, __m512h __A, __m512h __B) { return (__m512h)__builtin_ia32_selectph_512((__mmask32)__U, (__v32hf)_mm512_div_ph(__A, __B), (__v32hf)_mm512_setzero_ph()); } #define _mm512_div_round_ph(A, B, R) \ ((__m512h)__builtin_ia32_divph512((__v32hf)(__m512h)(A), \ (__v32hf)(__m512h)(B), (int)(R))) #define _mm512_mask_div_round_ph(W, U, A, B, R) \ ((__m512h)__builtin_ia32_selectph_512( \ (__mmask32)(U), (__v32hf)_mm512_div_round_ph((A), (B), (R)), \ (__v32hf)(__m512h)(W))) #define _mm512_maskz_div_round_ph(U, A, B, R) \ ((__m512h)__builtin_ia32_selectph_512( \ (__mmask32)(U), (__v32hf)_mm512_div_round_ph((A), (B), (R)), \ (__v32hf)_mm512_setzero_ph())) static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_min_ph(__m512h __A, __m512h __B) { return (__m512h)__builtin_ia32_minph512((__v32hf)__A, (__v32hf)__B, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mask_min_ph(__m512h __W, __mmask32 __U, __m512h __A, __m512h __B) { return (__m512h)__builtin_ia32_selectph_512( (__mmask32)__U, (__v32hf)_mm512_min_ph(__A, __B), (__v32hf)__W); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_maskz_min_ph(__mmask32 __U, __m512h __A, __m512h __B) { return (__m512h)__builtin_ia32_selectph_512((__mmask32)__U, (__v32hf)_mm512_min_ph(__A, __B), (__v32hf)_mm512_setzero_ph()); } #define _mm512_min_round_ph(A, B, R) \ ((__m512h)__builtin_ia32_minph512((__v32hf)(__m512h)(A), \ (__v32hf)(__m512h)(B), (int)(R))) #define _mm512_mask_min_round_ph(W, U, A, B, R) \ ((__m512h)__builtin_ia32_selectph_512( \ (__mmask32)(U), (__v32hf)_mm512_min_round_ph((A), (B), (R)), \ (__v32hf)(__m512h)(W))) #define _mm512_maskz_min_round_ph(U, A, B, R) \ ((__m512h)__builtin_ia32_selectph_512( \ (__mmask32)(U), (__v32hf)_mm512_min_round_ph((A), (B), (R)), \ (__v32hf)_mm512_setzero_ph())) static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_max_ph(__m512h __A, __m512h __B) { return (__m512h)__builtin_ia32_maxph512((__v32hf)__A, (__v32hf)__B, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mask_max_ph(__m512h __W, __mmask32 __U, __m512h __A, __m512h __B) { return (__m512h)__builtin_ia32_selectph_512( (__mmask32)__U, (__v32hf)_mm512_max_ph(__A, __B), (__v32hf)__W); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_maskz_max_ph(__mmask32 __U, __m512h __A, __m512h __B) { return (__m512h)__builtin_ia32_selectph_512((__mmask32)__U, (__v32hf)_mm512_max_ph(__A, __B), (__v32hf)_mm512_setzero_ph()); } #define _mm512_max_round_ph(A, B, R) \ ((__m512h)__builtin_ia32_maxph512((__v32hf)(__m512h)(A), \ (__v32hf)(__m512h)(B), (int)(R))) #define _mm512_mask_max_round_ph(W, U, A, B, R) \ ((__m512h)__builtin_ia32_selectph_512( \ (__mmask32)(U), (__v32hf)_mm512_max_round_ph((A), (B), (R)), \ (__v32hf)(__m512h)(W))) #define _mm512_maskz_max_round_ph(U, A, B, R) \ ((__m512h)__builtin_ia32_selectph_512( \ (__mmask32)(U), (__v32hf)_mm512_max_round_ph((A), (B), (R)), \ (__v32hf)_mm512_setzero_ph())) static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_abs_ph(__m512h __A) { return (__m512h)_mm512_and_epi32(_mm512_set1_epi32(0x7FFF7FFF), (__m512i)__A); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_conj_pch(__m512h __A) { return (__m512h)_mm512_xor_ps((__m512)__A, _mm512_set1_ps(-0.0f)); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mask_conj_pch(__m512h __W, __mmask16 __U, __m512h __A) { return (__m512h)__builtin_ia32_selectps_512( (__mmask16)__U, (__v16sf)_mm512_conj_pch(__A), (__v16sf)__W); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_maskz_conj_pch(__mmask16 __U, __m512h __A) { return (__m512h)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_conj_pch(__A), (__v16sf)_mm512_setzero_ps()); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_add_sh(__m128h __A, __m128h __B) { __A[0] += __B[0]; return __A; } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_add_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) { __A = _mm_add_sh(__A, __B); return __builtin_ia32_selectsh_128(__U, __A, __W); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_add_sh(__mmask8 __U, __m128h __A, __m128h __B) { __A = _mm_add_sh(__A, __B); return __builtin_ia32_selectsh_128(__U, __A, _mm_setzero_ph()); } #define _mm_add_round_sh(A, B, R) \ ((__m128h)__builtin_ia32_addsh_round_mask( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ (__mmask8)-1, (int)(R))) #define _mm_mask_add_round_sh(W, U, A, B, R) \ ((__m128h)__builtin_ia32_addsh_round_mask( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \ (__mmask8)(U), (int)(R))) #define _mm_maskz_add_round_sh(U, A, B, R) \ ((__m128h)__builtin_ia32_addsh_round_mask( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ (__mmask8)(U), (int)(R))) static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_sub_sh(__m128h __A, __m128h __B) { __A[0] -= __B[0]; return __A; } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_sub_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) { __A = _mm_sub_sh(__A, __B); return __builtin_ia32_selectsh_128(__U, __A, __W); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_sub_sh(__mmask8 __U, __m128h __A, __m128h __B) { __A = _mm_sub_sh(__A, __B); return __builtin_ia32_selectsh_128(__U, __A, _mm_setzero_ph()); } #define _mm_sub_round_sh(A, B, R) \ ((__m128h)__builtin_ia32_subsh_round_mask( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ (__mmask8)-1, (int)(R))) #define _mm_mask_sub_round_sh(W, U, A, B, R) \ ((__m128h)__builtin_ia32_subsh_round_mask( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \ (__mmask8)(U), (int)(R))) #define _mm_maskz_sub_round_sh(U, A, B, R) \ ((__m128h)__builtin_ia32_subsh_round_mask( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ (__mmask8)(U), (int)(R))) static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mul_sh(__m128h __A, __m128h __B) { __A[0] *= __B[0]; return __A; } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_mul_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) { __A = _mm_mul_sh(__A, __B); return __builtin_ia32_selectsh_128(__U, __A, __W); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_mul_sh(__mmask8 __U, __m128h __A, __m128h __B) { __A = _mm_mul_sh(__A, __B); return __builtin_ia32_selectsh_128(__U, __A, _mm_setzero_ph()); } #define _mm_mul_round_sh(A, B, R) \ ((__m128h)__builtin_ia32_mulsh_round_mask( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ (__mmask8)-1, (int)(R))) #define _mm_mask_mul_round_sh(W, U, A, B, R) \ ((__m128h)__builtin_ia32_mulsh_round_mask( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \ (__mmask8)(U), (int)(R))) #define _mm_maskz_mul_round_sh(U, A, B, R) \ ((__m128h)__builtin_ia32_mulsh_round_mask( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ (__mmask8)(U), (int)(R))) static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_div_sh(__m128h __A, __m128h __B) { __A[0] /= __B[0]; return __A; } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_div_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) { __A = _mm_div_sh(__A, __B); return __builtin_ia32_selectsh_128(__U, __A, __W); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_div_sh(__mmask8 __U, __m128h __A, __m128h __B) { __A = _mm_div_sh(__A, __B); return __builtin_ia32_selectsh_128(__U, __A, _mm_setzero_ph()); } #define _mm_div_round_sh(A, B, R) \ ((__m128h)__builtin_ia32_divsh_round_mask( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ (__mmask8)-1, (int)(R))) #define _mm_mask_div_round_sh(W, U, A, B, R) \ ((__m128h)__builtin_ia32_divsh_round_mask( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \ (__mmask8)(U), (int)(R))) #define _mm_maskz_div_round_sh(U, A, B, R) \ ((__m128h)__builtin_ia32_divsh_round_mask( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ (__mmask8)(U), (int)(R))) static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_min_sh(__m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_minsh_round_mask( (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_min_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_minsh_round_mask((__v8hf)__A, (__v8hf)__B, (__v8hf)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_min_sh(__mmask8 __U, __m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_minsh_round_mask( (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm_min_round_sh(A, B, R) \ ((__m128h)__builtin_ia32_minsh_round_mask( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ (__mmask8)-1, (int)(R))) #define _mm_mask_min_round_sh(W, U, A, B, R) \ ((__m128h)__builtin_ia32_minsh_round_mask( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \ (__mmask8)(U), (int)(R))) #define _mm_maskz_min_round_sh(U, A, B, R) \ ((__m128h)__builtin_ia32_minsh_round_mask( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ (__mmask8)(U), (int)(R))) static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_max_sh(__m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_maxsh_round_mask( (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_max_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_maxsh_round_mask((__v8hf)__A, (__v8hf)__B, (__v8hf)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_max_sh(__mmask8 __U, __m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_maxsh_round_mask( (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm_max_round_sh(A, B, R) \ ((__m128h)__builtin_ia32_maxsh_round_mask( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ (__mmask8)-1, (int)(R))) #define _mm_mask_max_round_sh(W, U, A, B, R) \ ((__m128h)__builtin_ia32_maxsh_round_mask( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \ (__mmask8)(U), (int)(R))) #define _mm_maskz_max_round_sh(U, A, B, R) \ ((__m128h)__builtin_ia32_maxsh_round_mask( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ (__mmask8)(U), (int)(R))) #define _mm512_cmp_round_ph_mask(A, B, P, R) \ ((__mmask32)__builtin_ia32_cmpph512_mask((__v32hf)(__m512h)(A), \ (__v32hf)(__m512h)(B), (int)(P), \ (__mmask32)-1, (int)(R))) #define _mm512_mask_cmp_round_ph_mask(U, A, B, P, R) \ ((__mmask32)__builtin_ia32_cmpph512_mask((__v32hf)(__m512h)(A), \ (__v32hf)(__m512h)(B), (int)(P), \ (__mmask32)(U), (int)(R))) #define _mm512_cmp_ph_mask(A, B, P) \ _mm512_cmp_round_ph_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION) #define _mm512_mask_cmp_ph_mask(U, A, B, P) \ _mm512_mask_cmp_round_ph_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION) #define _mm_cmp_round_sh_mask(X, Y, P, R) \ ((__mmask8)__builtin_ia32_cmpsh_mask((__v8hf)(__m128h)(X), \ (__v8hf)(__m128h)(Y), (int)(P), \ (__mmask8)-1, (int)(R))) #define _mm_mask_cmp_round_sh_mask(M, X, Y, P, R) \ ((__mmask8)__builtin_ia32_cmpsh_mask((__v8hf)(__m128h)(X), \ (__v8hf)(__m128h)(Y), (int)(P), \ (__mmask8)(M), (int)(R))) #define _mm_cmp_sh_mask(X, Y, P) \ ((__mmask8)__builtin_ia32_cmpsh_mask( \ (__v8hf)(__m128h)(X), (__v8hf)(__m128h)(Y), (int)(P), (__mmask8)-1, \ _MM_FROUND_CUR_DIRECTION)) #define _mm_mask_cmp_sh_mask(M, X, Y, P) \ ((__mmask8)__builtin_ia32_cmpsh_mask( \ (__v8hf)(__m128h)(X), (__v8hf)(__m128h)(Y), (int)(P), (__mmask8)(M), \ _MM_FROUND_CUR_DIRECTION)) // loads with vmovsh: static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_load_sh(void const *__dp) { struct __mm_load_sh_struct { _Float16 __u; } __attribute__((__packed__, __may_alias__)); _Float16 __u = ((const struct __mm_load_sh_struct *)__dp)->__u; return (__m128h){__u, 0, 0, 0, 0, 0, 0, 0}; } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_load_sh(__m128h __W, __mmask8 __U, const void *__A) { __m128h src = (__v8hf)__builtin_shufflevector( (__v8hf)__W, (__v8hf)_mm_setzero_ph(), 0, 8, 8, 8, 8, 8, 8, 8); return (__m128h)__builtin_ia32_loadsh128_mask((const __v8hf *)__A, src, __U & 1); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_load_sh(__mmask8 __U, const void *__A) { return (__m128h)__builtin_ia32_loadsh128_mask( (const __v8hf *)__A, (__v8hf)_mm_setzero_ph(), __U & 1); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_load_ph(void const *__p) { return *(const __m512h *)__p; } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_load_ph(void const *__p) { return *(const __m256h *)__p; } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_load_ph(void const *__p) { return *(const __m128h *)__p; } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_loadu_ph(void const *__p) { struct __loadu_ph { __m512h_u __v; } __attribute__((__packed__, __may_alias__)); return ((const struct __loadu_ph *)__p)->__v; } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_loadu_ph(void const *__p) { struct __loadu_ph { __m256h_u __v; } __attribute__((__packed__, __may_alias__)); return ((const struct __loadu_ph *)__p)->__v; } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_loadu_ph(void const *__p) { struct __loadu_ph { __m128h_u __v; } __attribute__((__packed__, __may_alias__)); return ((const struct __loadu_ph *)__p)->__v; } // stores with vmovsh: static __inline__ void __DEFAULT_FN_ATTRS128 _mm_store_sh(void *__dp, __m128h __a) { struct __mm_store_sh_struct { _Float16 __u; } __attribute__((__packed__, __may_alias__)); ((struct __mm_store_sh_struct *)__dp)->__u = __a[0]; } static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_sh(void *__W, __mmask8 __U, __m128h __A) { __builtin_ia32_storesh128_mask((__v8hf *)__W, __A, __U & 1); } static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_store_ph(void *__P, __m512h __A) { *(__m512h *)__P = __A; } static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_store_ph(void *__P, __m256h __A) { *(__m256h *)__P = __A; } static __inline__ void __DEFAULT_FN_ATTRS128 _mm_store_ph(void *__P, __m128h __A) { *(__m128h *)__P = __A; } static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_storeu_ph(void *__P, __m512h __A) { struct __storeu_ph { __m512h_u __v; } __attribute__((__packed__, __may_alias__)); ((struct __storeu_ph *)__P)->__v = __A; } static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_storeu_ph(void *__P, __m256h __A) { struct __storeu_ph { __m256h_u __v; } __attribute__((__packed__, __may_alias__)); ((struct __storeu_ph *)__P)->__v = __A; } static __inline__ void __DEFAULT_FN_ATTRS128 _mm_storeu_ph(void *__P, __m128h __A) { struct __storeu_ph { __m128h_u __v; } __attribute__((__packed__, __may_alias__)); ((struct __storeu_ph *)__P)->__v = __A; } // moves with vmovsh: static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_move_sh(__m128h __a, __m128h __b) { __a[0] = __b[0]; return __a; } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_move_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) { return __builtin_ia32_selectsh_128(__U, _mm_move_sh(__A, __B), __W); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_move_sh(__mmask8 __U, __m128h __A, __m128h __B) { return __builtin_ia32_selectsh_128(__U, _mm_move_sh(__A, __B), _mm_setzero_ph()); } // vmovw: static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsi16_si128(short __a) { return (__m128i)(__v8hi){__a, 0, 0, 0, 0, 0, 0, 0}; } static __inline__ short __DEFAULT_FN_ATTRS128 _mm_cvtsi128_si16(__m128i __a) { __v8hi __b = (__v8hi)__a; return __b[0]; } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_rcp_ph(__m512h __A) { return (__m512h)__builtin_ia32_rcpph512_mask( (__v32hf)__A, (__v32hf)_mm512_undefined_ph(), (__mmask32)-1); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mask_rcp_ph(__m512h __W, __mmask32 __U, __m512h __A) { return (__m512h)__builtin_ia32_rcpph512_mask((__v32hf)__A, (__v32hf)__W, (__mmask32)__U); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_maskz_rcp_ph(__mmask32 __U, __m512h __A) { return (__m512h)__builtin_ia32_rcpph512_mask( (__v32hf)__A, (__v32hf)_mm512_setzero_ph(), (__mmask32)__U); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_rsqrt_ph(__m512h __A) { return (__m512h)__builtin_ia32_rsqrtph512_mask( (__v32hf)__A, (__v32hf)_mm512_undefined_ph(), (__mmask32)-1); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mask_rsqrt_ph(__m512h __W, __mmask32 __U, __m512h __A) { return (__m512h)__builtin_ia32_rsqrtph512_mask((__v32hf)__A, (__v32hf)__W, (__mmask32)__U); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_maskz_rsqrt_ph(__mmask32 __U, __m512h __A) { return (__m512h)__builtin_ia32_rsqrtph512_mask( (__v32hf)__A, (__v32hf)_mm512_setzero_ph(), (__mmask32)__U); } #define _mm512_getmant_ph(A, B, C) \ ((__m512h)__builtin_ia32_getmantph512_mask( \ (__v32hf)(__m512h)(A), (int)(((C) << 2) | (B)), \ (__v32hf)_mm512_undefined_ph(), (__mmask32)-1, \ _MM_FROUND_CUR_DIRECTION)) #define _mm512_mask_getmant_ph(W, U, A, B, C) \ ((__m512h)__builtin_ia32_getmantph512_mask( \ (__v32hf)(__m512h)(A), (int)(((C) << 2) | (B)), (__v32hf)(__m512h)(W), \ (__mmask32)(U), _MM_FROUND_CUR_DIRECTION)) #define _mm512_maskz_getmant_ph(U, A, B, C) \ ((__m512h)__builtin_ia32_getmantph512_mask( \ (__v32hf)(__m512h)(A), (int)(((C) << 2) | (B)), \ (__v32hf)_mm512_setzero_ph(), (__mmask32)(U), _MM_FROUND_CUR_DIRECTION)) #define _mm512_getmant_round_ph(A, B, C, R) \ ((__m512h)__builtin_ia32_getmantph512_mask( \ (__v32hf)(__m512h)(A), (int)(((C) << 2) | (B)), \ (__v32hf)_mm512_undefined_ph(), (__mmask32)-1, (int)(R))) #define _mm512_mask_getmant_round_ph(W, U, A, B, C, R) \ ((__m512h)__builtin_ia32_getmantph512_mask( \ (__v32hf)(__m512h)(A), (int)(((C) << 2) | (B)), (__v32hf)(__m512h)(W), \ (__mmask32)(U), (int)(R))) #define _mm512_maskz_getmant_round_ph(U, A, B, C, R) \ ((__m512h)__builtin_ia32_getmantph512_mask( \ (__v32hf)(__m512h)(A), (int)(((C) << 2) | (B)), \ (__v32hf)_mm512_setzero_ph(), (__mmask32)(U), (int)(R))) static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_getexp_ph(__m512h __A) { return (__m512h)__builtin_ia32_getexpph512_mask( (__v32hf)__A, (__v32hf)_mm512_undefined_ph(), (__mmask32)-1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mask_getexp_ph(__m512h __W, __mmask32 __U, __m512h __A) { return (__m512h)__builtin_ia32_getexpph512_mask( (__v32hf)__A, (__v32hf)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_maskz_getexp_ph(__mmask32 __U, __m512h __A) { return (__m512h)__builtin_ia32_getexpph512_mask( (__v32hf)__A, (__v32hf)_mm512_setzero_ph(), (__mmask32)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm512_getexp_round_ph(A, R) \ ((__m512h)__builtin_ia32_getexpph512_mask((__v32hf)(__m512h)(A), \ (__v32hf)_mm512_undefined_ph(), \ (__mmask32)-1, (int)(R))) #define _mm512_mask_getexp_round_ph(W, U, A, R) \ ((__m512h)__builtin_ia32_getexpph512_mask( \ (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(W), (__mmask32)(U), (int)(R))) #define _mm512_maskz_getexp_round_ph(U, A, R) \ ((__m512h)__builtin_ia32_getexpph512_mask((__v32hf)(__m512h)(A), \ (__v32hf)_mm512_setzero_ph(), \ (__mmask32)(U), (int)(R))) static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_scalef_ph(__m512h __A, __m512h __B) { return (__m512h)__builtin_ia32_scalefph512_mask( (__v32hf)__A, (__v32hf)__B, (__v32hf)_mm512_undefined_ph(), (__mmask32)-1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mask_scalef_ph(__m512h __W, __mmask32 __U, __m512h __A, __m512h __B) { return (__m512h)__builtin_ia32_scalefph512_mask((__v32hf)__A, (__v32hf)__B, (__v32hf)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_maskz_scalef_ph(__mmask32 __U, __m512h __A, __m512h __B) { return (__m512h)__builtin_ia32_scalefph512_mask( (__v32hf)__A, (__v32hf)__B, (__v32hf)_mm512_setzero_ph(), (__mmask32)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm512_scalef_round_ph(A, B, R) \ ((__m512h)__builtin_ia32_scalefph512_mask( \ (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), \ (__v32hf)_mm512_undefined_ph(), (__mmask32)-1, (int)(R))) #define _mm512_mask_scalef_round_ph(W, U, A, B, R) \ ((__m512h)__builtin_ia32_scalefph512_mask( \ (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(W), \ (__mmask32)(U), (int)(R))) #define _mm512_maskz_scalef_round_ph(U, A, B, R) \ ((__m512h)__builtin_ia32_scalefph512_mask( \ (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), \ (__v32hf)_mm512_setzero_ph(), (__mmask32)(U), (int)(R))) #define _mm512_roundscale_ph(A, B) \ ((__m512h)__builtin_ia32_rndscaleph_mask( \ (__v32hf)(__m512h)(A), (int)(B), (__v32hf)(__m512h)(A), (__mmask32)-1, \ _MM_FROUND_CUR_DIRECTION)) #define _mm512_mask_roundscale_ph(A, B, C, imm) \ ((__m512h)__builtin_ia32_rndscaleph_mask( \ (__v32hf)(__m512h)(C), (int)(imm), (__v32hf)(__m512h)(A), \ (__mmask32)(B), _MM_FROUND_CUR_DIRECTION)) #define _mm512_maskz_roundscale_ph(A, B, imm) \ ((__m512h)__builtin_ia32_rndscaleph_mask( \ (__v32hf)(__m512h)(B), (int)(imm), (__v32hf)_mm512_setzero_ph(), \ (__mmask32)(A), _MM_FROUND_CUR_DIRECTION)) #define _mm512_mask_roundscale_round_ph(A, B, C, imm, R) \ ((__m512h)__builtin_ia32_rndscaleph_mask((__v32hf)(__m512h)(C), (int)(imm), \ (__v32hf)(__m512h)(A), \ (__mmask32)(B), (int)(R))) #define _mm512_maskz_roundscale_round_ph(A, B, imm, R) \ ((__m512h)__builtin_ia32_rndscaleph_mask((__v32hf)(__m512h)(B), (int)(imm), \ (__v32hf)_mm512_setzero_ph(), \ (__mmask32)(A), (int)(R))) #define _mm512_roundscale_round_ph(A, imm, R) \ ((__m512h)__builtin_ia32_rndscaleph_mask((__v32hf)(__m512h)(A), (int)(imm), \ (__v32hf)_mm512_undefined_ph(), \ (__mmask32)-1, (int)(R))) #define _mm512_reduce_ph(A, imm) \ ((__m512h)__builtin_ia32_reduceph512_mask( \ (__v32hf)(__m512h)(A), (int)(imm), (__v32hf)_mm512_undefined_ph(), \ (__mmask32)-1, _MM_FROUND_CUR_DIRECTION)) #define _mm512_mask_reduce_ph(W, U, A, imm) \ ((__m512h)__builtin_ia32_reduceph512_mask( \ (__v32hf)(__m512h)(A), (int)(imm), (__v32hf)(__m512h)(W), \ (__mmask32)(U), _MM_FROUND_CUR_DIRECTION)) #define _mm512_maskz_reduce_ph(U, A, imm) \ ((__m512h)__builtin_ia32_reduceph512_mask( \ (__v32hf)(__m512h)(A), (int)(imm), (__v32hf)_mm512_setzero_ph(), \ (__mmask32)(U), _MM_FROUND_CUR_DIRECTION)) #define _mm512_mask_reduce_round_ph(W, U, A, imm, R) \ ((__m512h)__builtin_ia32_reduceph512_mask((__v32hf)(__m512h)(A), (int)(imm), \ (__v32hf)(__m512h)(W), \ (__mmask32)(U), (int)(R))) #define _mm512_maskz_reduce_round_ph(U, A, imm, R) \ ((__m512h)__builtin_ia32_reduceph512_mask((__v32hf)(__m512h)(A), (int)(imm), \ (__v32hf)_mm512_setzero_ph(), \ (__mmask32)(U), (int)(R))) #define _mm512_reduce_round_ph(A, imm, R) \ ((__m512h)__builtin_ia32_reduceph512_mask((__v32hf)(__m512h)(A), (int)(imm), \ (__v32hf)_mm512_undefined_ph(), \ (__mmask32)-1, (int)(R))) static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_rcp_sh(__m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_rcpsh_mask( (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)-1); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_rcp_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_rcpsh_mask((__v8hf)__A, (__v8hf)__B, (__v8hf)__W, (__mmask8)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_rcp_sh(__mmask8 __U, __m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_rcpsh_mask( (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_rsqrt_sh(__m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_rsqrtsh_mask( (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)-1); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_rsqrtsh_mask((__v8hf)__A, (__v8hf)__B, (__v8hf)__W, (__mmask8)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_rsqrt_sh(__mmask8 __U, __m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_rsqrtsh_mask( (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U); } #define _mm_getmant_round_sh(A, B, C, D, R) \ ((__m128h)__builtin_ia32_getmantsh_round_mask( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(((D) << 2) | (C)), \ (__v8hf)_mm_setzero_ph(), (__mmask8)-1, (int)(R))) #define _mm_getmant_sh(A, B, C, D) \ ((__m128h)__builtin_ia32_getmantsh_round_mask( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(((D) << 2) | (C)), \ (__v8hf)_mm_setzero_ph(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION)) #define _mm_mask_getmant_sh(W, U, A, B, C, D) \ ((__m128h)__builtin_ia32_getmantsh_round_mask( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(((D) << 2) | (C)), \ (__v8hf)(__m128h)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) #define _mm_mask_getmant_round_sh(W, U, A, B, C, D, R) \ ((__m128h)__builtin_ia32_getmantsh_round_mask( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(((D) << 2) | (C)), \ (__v8hf)(__m128h)(W), (__mmask8)(U), (int)(R))) #define _mm_maskz_getmant_sh(U, A, B, C, D) \ ((__m128h)__builtin_ia32_getmantsh_round_mask( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(((D) << 2) | (C)), \ (__v8hf)_mm_setzero_ph(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) #define _mm_maskz_getmant_round_sh(U, A, B, C, D, R) \ ((__m128h)__builtin_ia32_getmantsh_round_mask( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(((D) << 2) | (C)), \ (__v8hf)_mm_setzero_ph(), (__mmask8)(U), (int)(R))) #define _mm_getexp_round_sh(A, B, R) \ ((__m128h)__builtin_ia32_getexpsh128_round_mask( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ (__mmask8)-1, (int)(R))) static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_getexp_sh(__m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_getexpsh128_round_mask( (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_getexp_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_getexpsh128_round_mask( (__v8hf)__A, (__v8hf)__B, (__v8hf)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm_mask_getexp_round_sh(W, U, A, B, R) \ ((__m128h)__builtin_ia32_getexpsh128_round_mask( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \ (__mmask8)(U), (int)(R))) static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_sh(__mmask8 __U, __m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_getexpsh128_round_mask( (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm_maskz_getexp_round_sh(U, A, B, R) \ ((__m128h)__builtin_ia32_getexpsh128_round_mask( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ (__mmask8)(U), (int)(R))) #define _mm_scalef_round_sh(A, B, R) \ ((__m128h)__builtin_ia32_scalefsh_round_mask( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ (__mmask8)-1, (int)(R))) static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_scalef_sh(__m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_scalefsh_round_mask( (__v8hf)__A, (__v8hf)(__B), (__v8hf)_mm_setzero_ph(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_scalef_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_scalefsh_round_mask((__v8hf)__A, (__v8hf)__B, (__v8hf)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm_mask_scalef_round_sh(W, U, A, B, R) \ ((__m128h)__builtin_ia32_scalefsh_round_mask( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \ (__mmask8)(U), (int)(R))) static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_sh(__mmask8 __U, __m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_scalefsh_round_mask( (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm_maskz_scalef_round_sh(U, A, B, R) \ ((__m128h)__builtin_ia32_scalefsh_round_mask( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ (__mmask8)(U), (int)(R))) #define _mm_roundscale_round_sh(A, B, imm, R) \ ((__m128h)__builtin_ia32_rndscalesh_round_mask( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ (__mmask8)-1, (int)(imm), (int)(R))) #define _mm_roundscale_sh(A, B, imm) \ ((__m128h)__builtin_ia32_rndscalesh_round_mask( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ (__mmask8)-1, (int)(imm), _MM_FROUND_CUR_DIRECTION)) #define _mm_mask_roundscale_sh(W, U, A, B, I) \ ((__m128h)__builtin_ia32_rndscalesh_round_mask( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \ (__mmask8)(U), (int)(I), _MM_FROUND_CUR_DIRECTION)) #define _mm_mask_roundscale_round_sh(W, U, A, B, I, R) \ ((__m128h)__builtin_ia32_rndscalesh_round_mask( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \ (__mmask8)(U), (int)(I), (int)(R))) #define _mm_maskz_roundscale_sh(U, A, B, I) \ ((__m128h)__builtin_ia32_rndscalesh_round_mask( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ (__mmask8)(U), (int)(I), _MM_FROUND_CUR_DIRECTION)) #define _mm_maskz_roundscale_round_sh(U, A, B, I, R) \ ((__m128h)__builtin_ia32_rndscalesh_round_mask( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ (__mmask8)(U), (int)(I), (int)(R))) #define _mm_reduce_sh(A, B, C) \ ((__m128h)__builtin_ia32_reducesh_mask( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ (__mmask8)-1, (int)(C), _MM_FROUND_CUR_DIRECTION)) #define _mm_mask_reduce_sh(W, U, A, B, C) \ ((__m128h)__builtin_ia32_reducesh_mask( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \ (__mmask8)(U), (int)(C), _MM_FROUND_CUR_DIRECTION)) #define _mm_maskz_reduce_sh(U, A, B, C) \ ((__m128h)__builtin_ia32_reducesh_mask( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ (__mmask8)(U), (int)(C), _MM_FROUND_CUR_DIRECTION)) #define _mm_reduce_round_sh(A, B, C, R) \ ((__m128h)__builtin_ia32_reducesh_mask( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ (__mmask8)-1, (int)(C), (int)(R))) #define _mm_mask_reduce_round_sh(W, U, A, B, C, R) \ ((__m128h)__builtin_ia32_reducesh_mask( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \ (__mmask8)(U), (int)(C), (int)(R))) #define _mm_maskz_reduce_round_sh(U, A, B, C, R) \ ((__m128h)__builtin_ia32_reducesh_mask( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ (__mmask8)(U), (int)(C), (int)(R))) #define _mm512_sqrt_round_ph(A, R) \ ((__m512h)__builtin_ia32_sqrtph512((__v32hf)(__m512h)(A), (int)(R))) #define _mm512_mask_sqrt_round_ph(W, U, A, R) \ ((__m512h)__builtin_ia32_selectph_512( \ (__mmask32)(U), (__v32hf)_mm512_sqrt_round_ph((A), (R)), \ (__v32hf)(__m512h)(W))) #define _mm512_maskz_sqrt_round_ph(U, A, R) \ ((__m512h)__builtin_ia32_selectph_512( \ (__mmask32)(U), (__v32hf)_mm512_sqrt_round_ph((A), (R)), \ (__v32hf)_mm512_setzero_ph())) static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_sqrt_ph(__m512h __A) { return (__m512h)__builtin_ia32_sqrtph512((__v32hf)__A, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mask_sqrt_ph(__m512h __W, __mmask32 __U, __m512h __A) { return (__m512h)__builtin_ia32_selectph_512( (__mmask32)(__U), (__v32hf)__builtin_ia32_sqrtph512((__A), (_MM_FROUND_CUR_DIRECTION)), (__v32hf)(__m512h)(__W)); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_maskz_sqrt_ph(__mmask32 __U, __m512h __A) { return (__m512h)__builtin_ia32_selectph_512( (__mmask32)(__U), (__v32hf)__builtin_ia32_sqrtph512((__A), (_MM_FROUND_CUR_DIRECTION)), (__v32hf)_mm512_setzero_ph()); } #define _mm_sqrt_round_sh(A, B, R) \ ((__m128h)__builtin_ia32_sqrtsh_round_mask( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ (__mmask8)-1, (int)(R))) #define _mm_mask_sqrt_round_sh(W, U, A, B, R) \ ((__m128h)__builtin_ia32_sqrtsh_round_mask( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \ (__mmask8)(U), (int)(R))) #define _mm_maskz_sqrt_round_sh(U, A, B, R) \ ((__m128h)__builtin_ia32_sqrtsh_round_mask( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ (__mmask8)(U), (int)(R))) static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_sqrt_sh(__m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_sqrtsh_round_mask( (__v8hf)(__m128h)(__A), (__v8hf)(__m128h)(__B), (__v8hf)_mm_setzero_ph(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_sh(__m128h __W, __mmask32 __U, __m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_sqrtsh_round_mask( (__v8hf)(__m128h)(__A), (__v8hf)(__m128h)(__B), (__v8hf)(__m128h)(__W), (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_sh(__mmask32 __U, __m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_sqrtsh_round_mask( (__v8hf)(__m128h)(__A), (__v8hf)(__m128h)(__B), (__v8hf)_mm_setzero_ph(), (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION); } #define _mm512_mask_fpclass_ph_mask(U, A, imm) \ ((__mmask32)__builtin_ia32_fpclassph512_mask((__v32hf)(__m512h)(A), \ (int)(imm), (__mmask32)(U))) #define _mm512_fpclass_ph_mask(A, imm) \ ((__mmask32)__builtin_ia32_fpclassph512_mask((__v32hf)(__m512h)(A), \ (int)(imm), (__mmask32)-1)) #define _mm_fpclass_sh_mask(A, imm) \ ((__mmask8)__builtin_ia32_fpclasssh_mask((__v8hf)(__m128h)(A), (int)(imm), \ (__mmask8)-1)) #define _mm_mask_fpclass_sh_mask(U, A, imm) \ ((__mmask8)__builtin_ia32_fpclasssh_mask((__v8hf)(__m128h)(A), (int)(imm), \ (__mmask8)(U))) #define _mm512_cvt_roundpd_ph(A, R) \ ((__m128h)__builtin_ia32_vcvtpd2ph512_mask( \ (__v8df)(A), (__v8hf)_mm_undefined_ph(), (__mmask8)(-1), (int)(R))) #define _mm512_mask_cvt_roundpd_ph(W, U, A, R) \ ((__m128h)__builtin_ia32_vcvtpd2ph512_mask((__v8df)(A), (__v8hf)(W), \ (__mmask8)(U), (int)(R))) #define _mm512_maskz_cvt_roundpd_ph(U, A, R) \ ((__m128h)__builtin_ia32_vcvtpd2ph512_mask( \ (__v8df)(A), (__v8hf)_mm_setzero_ph(), (__mmask8)(U), (int)(R))) static __inline__ __m128h __DEFAULT_FN_ATTRS512 _mm512_cvtpd_ph(__m512d __A) { return (__m128h)__builtin_ia32_vcvtpd2ph512_mask( (__v8df)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m128h __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_ph(__m128h __W, __mmask8 __U, __m512d __A) { return (__m128h)__builtin_ia32_vcvtpd2ph512_mask( (__v8df)__A, (__v8hf)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m128h __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtpd_ph(__mmask8 __U, __m512d __A) { return (__m128h)__builtin_ia32_vcvtpd2ph512_mask( (__v8df)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm512_cvt_roundph_pd(A, R) \ ((__m512d)__builtin_ia32_vcvtph2pd512_mask( \ (__v8hf)(A), (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), (int)(R))) #define _mm512_mask_cvt_roundph_pd(W, U, A, R) \ ((__m512d)__builtin_ia32_vcvtph2pd512_mask((__v8hf)(A), (__v8df)(W), \ (__mmask8)(U), (int)(R))) #define _mm512_maskz_cvt_roundph_pd(U, A, R) \ ((__m512d)__builtin_ia32_vcvtph2pd512_mask( \ (__v8hf)(A), (__v8df)_mm512_setzero_pd(), (__mmask8)(U), (int)(R))) static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtph_pd(__m128h __A) { return (__m512d)__builtin_ia32_vcvtph2pd512_mask( (__v8hf)__A, (__v8df)_mm512_setzero_pd(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtph_pd(__m512d __W, __mmask8 __U, __m128h __A) { return (__m512d)__builtin_ia32_vcvtph2pd512_mask( (__v8hf)__A, (__v8df)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtph_pd(__mmask8 __U, __m128h __A) { return (__m512d)__builtin_ia32_vcvtph2pd512_mask( (__v8hf)__A, (__v8df)_mm512_setzero_pd(), (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm_cvt_roundsh_ss(A, B, R) \ ((__m128)__builtin_ia32_vcvtsh2ss_round_mask((__v4sf)(A), (__v8hf)(B), \ (__v4sf)_mm_undefined_ps(), \ (__mmask8)(-1), (int)(R))) #define _mm_mask_cvt_roundsh_ss(W, U, A, B, R) \ ((__m128)__builtin_ia32_vcvtsh2ss_round_mask( \ (__v4sf)(A), (__v8hf)(B), (__v4sf)(W), (__mmask8)(U), (int)(R))) #define _mm_maskz_cvt_roundsh_ss(U, A, B, R) \ ((__m128)__builtin_ia32_vcvtsh2ss_round_mask((__v4sf)(A), (__v8hf)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U), (int)(R))) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtsh_ss(__m128 __A, __m128h __B) { return (__m128)__builtin_ia32_vcvtsh2ss_round_mask( (__v4sf)__A, (__v8hf)__B, (__v4sf)_mm_undefined_ps(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtsh_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128h __B) { return (__m128)__builtin_ia32_vcvtsh2ss_round_mask((__v4sf)__A, (__v8hf)__B, (__v4sf)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsh_ss(__mmask8 __U, __m128 __A, __m128h __B) { return (__m128)__builtin_ia32_vcvtsh2ss_round_mask( (__v4sf)__A, (__v8hf)__B, (__v4sf)_mm_setzero_ps(), (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm_cvt_roundss_sh(A, B, R) \ ((__m128h)__builtin_ia32_vcvtss2sh_round_mask((__v8hf)(A), (__v4sf)(B), \ (__v8hf)_mm_undefined_ph(), \ (__mmask8)(-1), (int)(R))) #define _mm_mask_cvt_roundss_sh(W, U, A, B, R) \ ((__m128h)__builtin_ia32_vcvtss2sh_round_mask( \ (__v8hf)(A), (__v4sf)(B), (__v8hf)(W), (__mmask8)(U), (int)(R))) #define _mm_maskz_cvt_roundss_sh(U, A, B, R) \ ((__m128h)__builtin_ia32_vcvtss2sh_round_mask((__v8hf)(A), (__v4sf)(B), \ (__v8hf)_mm_setzero_ph(), \ (__mmask8)(U), (int)(R))) static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtss_sh(__m128h __A, __m128 __B) { return (__m128h)__builtin_ia32_vcvtss2sh_round_mask( (__v8hf)__A, (__v4sf)__B, (__v8hf)_mm_undefined_ph(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvtss_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128 __B) { return (__m128h)__builtin_ia32_vcvtss2sh_round_mask( (__v8hf)__A, (__v4sf)__B, (__v8hf)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_cvtss_sh(__mmask8 __U, __m128h __A, __m128 __B) { return (__m128h)__builtin_ia32_vcvtss2sh_round_mask( (__v8hf)__A, (__v4sf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm_cvt_roundsd_sh(A, B, R) \ ((__m128h)__builtin_ia32_vcvtsd2sh_round_mask((__v8hf)(A), (__v2df)(B), \ (__v8hf)_mm_undefined_ph(), \ (__mmask8)(-1), (int)(R))) #define _mm_mask_cvt_roundsd_sh(W, U, A, B, R) \ ((__m128h)__builtin_ia32_vcvtsd2sh_round_mask( \ (__v8hf)(A), (__v2df)(B), (__v8hf)(W), (__mmask8)(U), (int)(R))) #define _mm_maskz_cvt_roundsd_sh(U, A, B, R) \ ((__m128h)__builtin_ia32_vcvtsd2sh_round_mask((__v8hf)(A), (__v2df)(B), \ (__v8hf)_mm_setzero_ph(), \ (__mmask8)(U), (int)(R))) static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtsd_sh(__m128h __A, __m128d __B) { return (__m128h)__builtin_ia32_vcvtsd2sh_round_mask( (__v8hf)__A, (__v2df)__B, (__v8hf)_mm_undefined_ph(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvtsd_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128d __B) { return (__m128h)__builtin_ia32_vcvtsd2sh_round_mask( (__v8hf)__A, (__v2df)__B, (__v8hf)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsd_sh(__mmask8 __U, __m128h __A, __m128d __B) { return (__m128h)__builtin_ia32_vcvtsd2sh_round_mask( (__v8hf)__A, (__v2df)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm_cvt_roundsh_sd(A, B, R) \ ((__m128d)__builtin_ia32_vcvtsh2sd_round_mask((__v2df)(A), (__v8hf)(B), \ (__v2df)_mm_undefined_pd(), \ (__mmask8)(-1), (int)(R))) #define _mm_mask_cvt_roundsh_sd(W, U, A, B, R) \ ((__m128d)__builtin_ia32_vcvtsh2sd_round_mask( \ (__v2df)(A), (__v8hf)(B), (__v2df)(W), (__mmask8)(U), (int)(R))) #define _mm_maskz_cvt_roundsh_sd(U, A, B, R) \ ((__m128d)__builtin_ia32_vcvtsh2sd_round_mask((__v2df)(A), (__v8hf)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)(U), (int)(R))) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtsh_sd(__m128d __A, __m128h __B) { return (__m128d)__builtin_ia32_vcvtsh2sd_round_mask( (__v2df)__A, (__v8hf)__B, (__v2df)_mm_undefined_pd(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtsh_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128h __B) { return (__m128d)__builtin_ia32_vcvtsh2sd_round_mask( (__v2df)__A, (__v8hf)__B, (__v2df)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsh_sd(__mmask8 __U, __m128d __A, __m128h __B) { return (__m128d)__builtin_ia32_vcvtsh2sd_round_mask( (__v2df)__A, (__v8hf)__B, (__v2df)_mm_setzero_pd(), (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm512_cvt_roundph_epi16(A, R) \ ((__m512i)__builtin_ia32_vcvtph2w512_mask((__v32hf)(A), \ (__v32hi)_mm512_undefined_epi32(), \ (__mmask32)(-1), (int)(R))) #define _mm512_mask_cvt_roundph_epi16(W, U, A, R) \ ((__m512i)__builtin_ia32_vcvtph2w512_mask((__v32hf)(A), (__v32hi)(W), \ (__mmask32)(U), (int)(R))) #define _mm512_maskz_cvt_roundph_epi16(U, A, R) \ ((__m512i)__builtin_ia32_vcvtph2w512_mask((__v32hf)(A), \ (__v32hi)_mm512_setzero_epi32(), \ (__mmask32)(U), (int)(R))) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtph_epi16(__m512h __A) { return (__m512i)__builtin_ia32_vcvtph2w512_mask( (__v32hf)__A, (__v32hi)_mm512_setzero_epi32(), (__mmask32)-1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtph_epi16(__m512i __W, __mmask32 __U, __m512h __A) { return (__m512i)__builtin_ia32_vcvtph2w512_mask( (__v32hf)__A, (__v32hi)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtph_epi16(__mmask32 __U, __m512h __A) { return (__m512i)__builtin_ia32_vcvtph2w512_mask( (__v32hf)__A, (__v32hi)_mm512_setzero_epi32(), (__mmask32)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm512_cvtt_roundph_epi16(A, R) \ ((__m512i)__builtin_ia32_vcvttph2w512_mask( \ (__v32hf)(A), (__v32hi)_mm512_undefined_epi32(), (__mmask32)(-1), \ (int)(R))) #define _mm512_mask_cvtt_roundph_epi16(W, U, A, R) \ ((__m512i)__builtin_ia32_vcvttph2w512_mask((__v32hf)(A), (__v32hi)(W), \ (__mmask32)(U), (int)(R))) #define _mm512_maskz_cvtt_roundph_epi16(U, A, R) \ ((__m512i)__builtin_ia32_vcvttph2w512_mask((__v32hf)(A), \ (__v32hi)_mm512_setzero_epi32(), \ (__mmask32)(U), (int)(R))) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvttph_epi16(__m512h __A) { return (__m512i)__builtin_ia32_vcvttph2w512_mask( (__v32hf)__A, (__v32hi)_mm512_setzero_epi32(), (__mmask32)-1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttph_epi16(__m512i __W, __mmask32 __U, __m512h __A) { return (__m512i)__builtin_ia32_vcvttph2w512_mask( (__v32hf)__A, (__v32hi)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttph_epi16(__mmask32 __U, __m512h __A) { return (__m512i)__builtin_ia32_vcvttph2w512_mask( (__v32hf)__A, (__v32hi)_mm512_setzero_epi32(), (__mmask32)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm512_cvt_roundepi16_ph(A, R) \ ((__m512h)__builtin_ia32_vcvtw2ph512_mask((__v32hi)(A), \ (__v32hf)_mm512_undefined_ph(), \ (__mmask32)(-1), (int)(R))) #define _mm512_mask_cvt_roundepi16_ph(W, U, A, R) \ ((__m512h)__builtin_ia32_vcvtw2ph512_mask((__v32hi)(A), (__v32hf)(W), \ (__mmask32)(U), (int)(R))) #define _mm512_maskz_cvt_roundepi16_ph(U, A, R) \ ((__m512h)__builtin_ia32_vcvtw2ph512_mask( \ (__v32hi)(A), (__v32hf)_mm512_setzero_ph(), (__mmask32)(U), (int)(R))) static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_cvtepi16_ph(__m512i __A) { return (__m512h)__builtin_ia32_vcvtw2ph512_mask( (__v32hi)__A, (__v32hf)_mm512_setzero_ph(), (__mmask32)-1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi16_ph(__m512h __W, __mmask32 __U, __m512i __A) { return (__m512h)__builtin_ia32_vcvtw2ph512_mask( (__v32hi)__A, (__v32hf)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi16_ph(__mmask32 __U, __m512i __A) { return (__m512h)__builtin_ia32_vcvtw2ph512_mask( (__v32hi)__A, (__v32hf)_mm512_setzero_ph(), (__mmask32)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm512_cvt_roundph_epu16(A, R) \ ((__m512i)__builtin_ia32_vcvtph2uw512_mask( \ (__v32hf)(A), (__v32hu)_mm512_undefined_epi32(), (__mmask32)(-1), \ (int)(R))) #define _mm512_mask_cvt_roundph_epu16(W, U, A, R) \ ((__m512i)__builtin_ia32_vcvtph2uw512_mask((__v32hf)(A), (__v32hu)(W), \ (__mmask32)(U), (int)(R))) #define _mm512_maskz_cvt_roundph_epu16(U, A, R) \ ((__m512i)__builtin_ia32_vcvtph2uw512_mask((__v32hf)(A), \ (__v32hu)_mm512_setzero_epi32(), \ (__mmask32)(U), (int)(R))) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtph_epu16(__m512h __A) { return (__m512i)__builtin_ia32_vcvtph2uw512_mask( (__v32hf)__A, (__v32hu)_mm512_setzero_epi32(), (__mmask32)-1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtph_epu16(__m512i __W, __mmask32 __U, __m512h __A) { return (__m512i)__builtin_ia32_vcvtph2uw512_mask( (__v32hf)__A, (__v32hu)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtph_epu16(__mmask32 __U, __m512h __A) { return (__m512i)__builtin_ia32_vcvtph2uw512_mask( (__v32hf)__A, (__v32hu)_mm512_setzero_epi32(), (__mmask32)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm512_cvtt_roundph_epu16(A, R) \ ((__m512i)__builtin_ia32_vcvttph2uw512_mask( \ (__v32hf)(A), (__v32hu)_mm512_undefined_epi32(), (__mmask32)(-1), \ (int)(R))) #define _mm512_mask_cvtt_roundph_epu16(W, U, A, R) \ ((__m512i)__builtin_ia32_vcvttph2uw512_mask((__v32hf)(A), (__v32hu)(W), \ (__mmask32)(U), (int)(R))) #define _mm512_maskz_cvtt_roundph_epu16(U, A, R) \ ((__m512i)__builtin_ia32_vcvttph2uw512_mask((__v32hf)(A), \ (__v32hu)_mm512_setzero_epi32(), \ (__mmask32)(U), (int)(R))) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvttph_epu16(__m512h __A) { return (__m512i)__builtin_ia32_vcvttph2uw512_mask( (__v32hf)__A, (__v32hu)_mm512_setzero_epi32(), (__mmask32)-1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttph_epu16(__m512i __W, __mmask32 __U, __m512h __A) { return (__m512i)__builtin_ia32_vcvttph2uw512_mask( (__v32hf)__A, (__v32hu)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttph_epu16(__mmask32 __U, __m512h __A) { return (__m512i)__builtin_ia32_vcvttph2uw512_mask( (__v32hf)__A, (__v32hu)_mm512_setzero_epi32(), (__mmask32)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm512_cvt_roundepu16_ph(A, R) \ ((__m512h)__builtin_ia32_vcvtuw2ph512_mask((__v32hu)(A), \ (__v32hf)_mm512_undefined_ph(), \ (__mmask32)(-1), (int)(R))) #define _mm512_mask_cvt_roundepu16_ph(W, U, A, R) \ ((__m512h)__builtin_ia32_vcvtuw2ph512_mask((__v32hu)(A), (__v32hf)(W), \ (__mmask32)(U), (int)(R))) #define _mm512_maskz_cvt_roundepu16_ph(U, A, R) \ ((__m512h)__builtin_ia32_vcvtuw2ph512_mask( \ (__v32hu)(A), (__v32hf)_mm512_setzero_ph(), (__mmask32)(U), (int)(R))) static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_cvtepu16_ph(__m512i __A) { return (__m512h)__builtin_ia32_vcvtuw2ph512_mask( (__v32hu)__A, (__v32hf)_mm512_setzero_ph(), (__mmask32)-1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu16_ph(__m512h __W, __mmask32 __U, __m512i __A) { return (__m512h)__builtin_ia32_vcvtuw2ph512_mask( (__v32hu)__A, (__v32hf)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu16_ph(__mmask32 __U, __m512i __A) { return (__m512h)__builtin_ia32_vcvtuw2ph512_mask( (__v32hu)__A, (__v32hf)_mm512_setzero_ph(), (__mmask32)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm512_cvt_roundph_epi32(A, R) \ ((__m512i)__builtin_ia32_vcvtph2dq512_mask( \ (__v16hf)(A), (__v16si)_mm512_undefined_epi32(), (__mmask16)(-1), \ (int)(R))) #define _mm512_mask_cvt_roundph_epi32(W, U, A, R) \ ((__m512i)__builtin_ia32_vcvtph2dq512_mask((__v16hf)(A), (__v16si)(W), \ (__mmask16)(U), (int)(R))) #define _mm512_maskz_cvt_roundph_epi32(U, A, R) \ ((__m512i)__builtin_ia32_vcvtph2dq512_mask((__v16hf)(A), \ (__v16si)_mm512_setzero_epi32(), \ (__mmask16)(U), (int)(R))) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtph_epi32(__m256h __A) { return (__m512i)__builtin_ia32_vcvtph2dq512_mask( (__v16hf)__A, (__v16si)_mm512_setzero_epi32(), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtph_epi32(__m512i __W, __mmask16 __U, __m256h __A) { return (__m512i)__builtin_ia32_vcvtph2dq512_mask( (__v16hf)__A, (__v16si)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtph_epi32(__mmask16 __U, __m256h __A) { return (__m512i)__builtin_ia32_vcvtph2dq512_mask( (__v16hf)__A, (__v16si)_mm512_setzero_epi32(), (__mmask16)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm512_cvt_roundph_epu32(A, R) \ ((__m512i)__builtin_ia32_vcvtph2udq512_mask( \ (__v16hf)(A), (__v16su)_mm512_undefined_epi32(), (__mmask16)(-1), \ (int)(R))) #define _mm512_mask_cvt_roundph_epu32(W, U, A, R) \ ((__m512i)__builtin_ia32_vcvtph2udq512_mask((__v16hf)(A), (__v16su)(W), \ (__mmask16)(U), (int)(R))) #define _mm512_maskz_cvt_roundph_epu32(U, A, R) \ ((__m512i)__builtin_ia32_vcvtph2udq512_mask((__v16hf)(A), \ (__v16su)_mm512_setzero_epi32(), \ (__mmask16)(U), (int)(R))) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtph_epu32(__m256h __A) { return (__m512i)__builtin_ia32_vcvtph2udq512_mask( (__v16hf)__A, (__v16su)_mm512_setzero_epi32(), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtph_epu32(__m512i __W, __mmask16 __U, __m256h __A) { return (__m512i)__builtin_ia32_vcvtph2udq512_mask( (__v16hf)__A, (__v16su)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtph_epu32(__mmask16 __U, __m256h __A) { return (__m512i)__builtin_ia32_vcvtph2udq512_mask( (__v16hf)__A, (__v16su)_mm512_setzero_epi32(), (__mmask16)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm512_cvt_roundepi32_ph(A, R) \ ((__m256h)__builtin_ia32_vcvtdq2ph512_mask((__v16si)(A), \ (__v16hf)_mm256_undefined_ph(), \ (__mmask16)(-1), (int)(R))) #define _mm512_mask_cvt_roundepi32_ph(W, U, A, R) \ ((__m256h)__builtin_ia32_vcvtdq2ph512_mask((__v16si)(A), (__v16hf)(W), \ (__mmask16)(U), (int)(R))) #define _mm512_maskz_cvt_roundepi32_ph(U, A, R) \ ((__m256h)__builtin_ia32_vcvtdq2ph512_mask( \ (__v16si)(A), (__v16hf)_mm256_setzero_ph(), (__mmask16)(U), (int)(R))) static __inline__ __m256h __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_ph(__m512i __A) { return (__m256h)__builtin_ia32_vcvtdq2ph512_mask( (__v16si)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m256h __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_ph(__m256h __W, __mmask16 __U, __m512i __A) { return (__m256h)__builtin_ia32_vcvtdq2ph512_mask( (__v16si)__A, (__v16hf)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m256h __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_ph(__mmask16 __U, __m512i __A) { return (__m256h)__builtin_ia32_vcvtdq2ph512_mask( (__v16si)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm512_cvt_roundepu32_ph(A, R) \ ((__m256h)__builtin_ia32_vcvtudq2ph512_mask((__v16su)(A), \ (__v16hf)_mm256_undefined_ph(), \ (__mmask16)(-1), (int)(R))) #define _mm512_mask_cvt_roundepu32_ph(W, U, A, R) \ ((__m256h)__builtin_ia32_vcvtudq2ph512_mask((__v16su)(A), (__v16hf)(W), \ (__mmask16)(U), (int)(R))) #define _mm512_maskz_cvt_roundepu32_ph(U, A, R) \ ((__m256h)__builtin_ia32_vcvtudq2ph512_mask( \ (__v16su)(A), (__v16hf)_mm256_setzero_ph(), (__mmask16)(U), (int)(R))) static __inline__ __m256h __DEFAULT_FN_ATTRS512 _mm512_cvtepu32_ph(__m512i __A) { return (__m256h)__builtin_ia32_vcvtudq2ph512_mask( (__v16su)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m256h __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu32_ph(__m256h __W, __mmask16 __U, __m512i __A) { return (__m256h)__builtin_ia32_vcvtudq2ph512_mask( (__v16su)__A, (__v16hf)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m256h __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu32_ph(__mmask16 __U, __m512i __A) { return (__m256h)__builtin_ia32_vcvtudq2ph512_mask( (__v16su)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm512_cvtt_roundph_epi32(A, R) \ ((__m512i)__builtin_ia32_vcvttph2dq512_mask( \ (__v16hf)(A), (__v16si)_mm512_undefined_epi32(), (__mmask16)(-1), \ (int)(R))) #define _mm512_mask_cvtt_roundph_epi32(W, U, A, R) \ ((__m512i)__builtin_ia32_vcvttph2dq512_mask((__v16hf)(A), (__v16si)(W), \ (__mmask16)(U), (int)(R))) #define _mm512_maskz_cvtt_roundph_epi32(U, A, R) \ ((__m512i)__builtin_ia32_vcvttph2dq512_mask((__v16hf)(A), \ (__v16si)_mm512_setzero_epi32(), \ (__mmask16)(U), (int)(R))) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvttph_epi32(__m256h __A) { return (__m512i)__builtin_ia32_vcvttph2dq512_mask( (__v16hf)__A, (__v16si)_mm512_setzero_epi32(), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttph_epi32(__m512i __W, __mmask16 __U, __m256h __A) { return (__m512i)__builtin_ia32_vcvttph2dq512_mask( (__v16hf)__A, (__v16si)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttph_epi32(__mmask16 __U, __m256h __A) { return (__m512i)__builtin_ia32_vcvttph2dq512_mask( (__v16hf)__A, (__v16si)_mm512_setzero_epi32(), (__mmask16)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm512_cvtt_roundph_epu32(A, R) \ ((__m512i)__builtin_ia32_vcvttph2udq512_mask( \ (__v16hf)(A), (__v16su)_mm512_undefined_epi32(), (__mmask16)(-1), \ (int)(R))) #define _mm512_mask_cvtt_roundph_epu32(W, U, A, R) \ ((__m512i)__builtin_ia32_vcvttph2udq512_mask((__v16hf)(A), (__v16su)(W), \ (__mmask16)(U), (int)(R))) #define _mm512_maskz_cvtt_roundph_epu32(U, A, R) \ ((__m512i)__builtin_ia32_vcvttph2udq512_mask( \ (__v16hf)(A), (__v16su)_mm512_setzero_epi32(), (__mmask16)(U), \ (int)(R))) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvttph_epu32(__m256h __A) { return (__m512i)__builtin_ia32_vcvttph2udq512_mask( (__v16hf)__A, (__v16su)_mm512_setzero_epi32(), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttph_epu32(__m512i __W, __mmask16 __U, __m256h __A) { return (__m512i)__builtin_ia32_vcvttph2udq512_mask( (__v16hf)__A, (__v16su)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttph_epu32(__mmask16 __U, __m256h __A) { return (__m512i)__builtin_ia32_vcvttph2udq512_mask( (__v16hf)__A, (__v16su)_mm512_setzero_epi32(), (__mmask16)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm512_cvt_roundepi64_ph(A, R) \ ((__m128h)__builtin_ia32_vcvtqq2ph512_mask( \ (__v8di)(A), (__v8hf)_mm_undefined_ph(), (__mmask8)(-1), (int)(R))) #define _mm512_mask_cvt_roundepi64_ph(W, U, A, R) \ ((__m128h)__builtin_ia32_vcvtqq2ph512_mask((__v8di)(A), (__v8hf)(W), \ (__mmask8)(U), (int)(R))) #define _mm512_maskz_cvt_roundepi64_ph(U, A, R) \ ((__m128h)__builtin_ia32_vcvtqq2ph512_mask( \ (__v8di)(A), (__v8hf)_mm_setzero_ph(), (__mmask8)(U), (int)(R))) static __inline__ __m128h __DEFAULT_FN_ATTRS512 _mm512_cvtepi64_ph(__m512i __A) { return (__m128h)__builtin_ia32_vcvtqq2ph512_mask( (__v8di)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m128h __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_ph(__m128h __W, __mmask8 __U, __m512i __A) { return (__m128h)__builtin_ia32_vcvtqq2ph512_mask( (__v8di)__A, (__v8hf)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m128h __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi64_ph(__mmask8 __U, __m512i __A) { return (__m128h)__builtin_ia32_vcvtqq2ph512_mask( (__v8di)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm512_cvt_roundph_epi64(A, R) \ ((__m512i)__builtin_ia32_vcvtph2qq512_mask((__v8hf)(A), \ (__v8di)_mm512_undefined_epi32(), \ (__mmask8)(-1), (int)(R))) #define _mm512_mask_cvt_roundph_epi64(W, U, A, R) \ ((__m512i)__builtin_ia32_vcvtph2qq512_mask((__v8hf)(A), (__v8di)(W), \ (__mmask8)(U), (int)(R))) #define _mm512_maskz_cvt_roundph_epi64(U, A, R) \ ((__m512i)__builtin_ia32_vcvtph2qq512_mask( \ (__v8hf)(A), (__v8di)_mm512_setzero_epi32(), (__mmask8)(U), (int)(R))) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtph_epi64(__m128h __A) { return (__m512i)__builtin_ia32_vcvtph2qq512_mask( (__v8hf)__A, (__v8di)_mm512_setzero_epi32(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtph_epi64(__m512i __W, __mmask8 __U, __m128h __A) { return (__m512i)__builtin_ia32_vcvtph2qq512_mask( (__v8hf)__A, (__v8di)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtph_epi64(__mmask8 __U, __m128h __A) { return (__m512i)__builtin_ia32_vcvtph2qq512_mask( (__v8hf)__A, (__v8di)_mm512_setzero_epi32(), (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm512_cvt_roundepu64_ph(A, R) \ ((__m128h)__builtin_ia32_vcvtuqq2ph512_mask( \ (__v8du)(A), (__v8hf)_mm_undefined_ph(), (__mmask8)(-1), (int)(R))) #define _mm512_mask_cvt_roundepu64_ph(W, U, A, R) \ ((__m128h)__builtin_ia32_vcvtuqq2ph512_mask((__v8du)(A), (__v8hf)(W), \ (__mmask8)(U), (int)(R))) #define _mm512_maskz_cvt_roundepu64_ph(U, A, R) \ ((__m128h)__builtin_ia32_vcvtuqq2ph512_mask( \ (__v8du)(A), (__v8hf)_mm_setzero_ph(), (__mmask8)(U), (int)(R))) static __inline__ __m128h __DEFAULT_FN_ATTRS512 _mm512_cvtepu64_ph(__m512i __A) { return (__m128h)__builtin_ia32_vcvtuqq2ph512_mask( (__v8du)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m128h __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu64_ph(__m128h __W, __mmask8 __U, __m512i __A) { return (__m128h)__builtin_ia32_vcvtuqq2ph512_mask( (__v8du)__A, (__v8hf)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m128h __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu64_ph(__mmask8 __U, __m512i __A) { return (__m128h)__builtin_ia32_vcvtuqq2ph512_mask( (__v8du)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm512_cvt_roundph_epu64(A, R) \ ((__m512i)__builtin_ia32_vcvtph2uqq512_mask( \ (__v8hf)(A), (__v8du)_mm512_undefined_epi32(), (__mmask8)(-1), \ (int)(R))) #define _mm512_mask_cvt_roundph_epu64(W, U, A, R) \ ((__m512i)__builtin_ia32_vcvtph2uqq512_mask((__v8hf)(A), (__v8du)(W), \ (__mmask8)(U), (int)(R))) #define _mm512_maskz_cvt_roundph_epu64(U, A, R) \ ((__m512i)__builtin_ia32_vcvtph2uqq512_mask( \ (__v8hf)(A), (__v8du)_mm512_setzero_epi32(), (__mmask8)(U), (int)(R))) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtph_epu64(__m128h __A) { return (__m512i)__builtin_ia32_vcvtph2uqq512_mask( (__v8hf)__A, (__v8du)_mm512_setzero_epi32(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtph_epu64(__m512i __W, __mmask8 __U, __m128h __A) { return (__m512i)__builtin_ia32_vcvtph2uqq512_mask( (__v8hf)__A, (__v8du)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtph_epu64(__mmask8 __U, __m128h __A) { return (__m512i)__builtin_ia32_vcvtph2uqq512_mask( (__v8hf)__A, (__v8du)_mm512_setzero_epi32(), (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm512_cvtt_roundph_epi64(A, R) \ ((__m512i)__builtin_ia32_vcvttph2qq512_mask( \ (__v8hf)(A), (__v8di)_mm512_undefined_epi32(), (__mmask8)(-1), \ (int)(R))) #define _mm512_mask_cvtt_roundph_epi64(W, U, A, R) \ ((__m512i)__builtin_ia32_vcvttph2qq512_mask((__v8hf)(A), (__v8di)(W), \ (__mmask8)(U), (int)(R))) #define _mm512_maskz_cvtt_roundph_epi64(U, A, R) \ ((__m512i)__builtin_ia32_vcvttph2qq512_mask( \ (__v8hf)(A), (__v8di)_mm512_setzero_epi32(), (__mmask8)(U), (int)(R))) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvttph_epi64(__m128h __A) { return (__m512i)__builtin_ia32_vcvttph2qq512_mask( (__v8hf)__A, (__v8di)_mm512_setzero_epi32(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttph_epi64(__m512i __W, __mmask8 __U, __m128h __A) { return (__m512i)__builtin_ia32_vcvttph2qq512_mask( (__v8hf)__A, (__v8di)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttph_epi64(__mmask8 __U, __m128h __A) { return (__m512i)__builtin_ia32_vcvttph2qq512_mask( (__v8hf)__A, (__v8di)_mm512_setzero_epi32(), (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm512_cvtt_roundph_epu64(A, R) \ ((__m512i)__builtin_ia32_vcvttph2uqq512_mask( \ (__v8hf)(A), (__v8du)_mm512_undefined_epi32(), (__mmask8)(-1), \ (int)(R))) #define _mm512_mask_cvtt_roundph_epu64(W, U, A, R) \ ((__m512i)__builtin_ia32_vcvttph2uqq512_mask((__v8hf)(A), (__v8du)(W), \ (__mmask8)(U), (int)(R))) #define _mm512_maskz_cvtt_roundph_epu64(U, A, R) \ ((__m512i)__builtin_ia32_vcvttph2uqq512_mask( \ (__v8hf)(A), (__v8du)_mm512_setzero_epi32(), (__mmask8)(U), (int)(R))) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvttph_epu64(__m128h __A) { return (__m512i)__builtin_ia32_vcvttph2uqq512_mask( (__v8hf)__A, (__v8du)_mm512_setzero_epi32(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttph_epu64(__m512i __W, __mmask8 __U, __m128h __A) { return (__m512i)__builtin_ia32_vcvttph2uqq512_mask( (__v8hf)__A, (__v8du)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttph_epu64(__mmask8 __U, __m128h __A) { return (__m512i)__builtin_ia32_vcvttph2uqq512_mask( (__v8hf)__A, (__v8du)_mm512_setzero_epi32(), (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm_cvt_roundsh_i32(A, R) \ ((int)__builtin_ia32_vcvtsh2si32((__v8hf)(A), (int)(R))) static __inline__ int __DEFAULT_FN_ATTRS128 _mm_cvtsh_i32(__m128h __A) { return (int)__builtin_ia32_vcvtsh2si32((__v8hf)__A, _MM_FROUND_CUR_DIRECTION); } #define _mm_cvt_roundsh_u32(A, R) \ ((unsigned int)__builtin_ia32_vcvtsh2usi32((__v8hf)(A), (int)(R))) static __inline__ unsigned int __DEFAULT_FN_ATTRS128 _mm_cvtsh_u32(__m128h __A) { return (unsigned int)__builtin_ia32_vcvtsh2usi32((__v8hf)__A, _MM_FROUND_CUR_DIRECTION); } #ifdef __x86_64__ #define _mm_cvt_roundsh_i64(A, R) \ ((long long)__builtin_ia32_vcvtsh2si64((__v8hf)(A), (int)(R))) static __inline__ long long __DEFAULT_FN_ATTRS128 _mm_cvtsh_i64(__m128h __A) { return (long long)__builtin_ia32_vcvtsh2si64((__v8hf)__A, _MM_FROUND_CUR_DIRECTION); } #define _mm_cvt_roundsh_u64(A, R) \ ((unsigned long long)__builtin_ia32_vcvtsh2usi64((__v8hf)(A), (int)(R))) static __inline__ unsigned long long __DEFAULT_FN_ATTRS128 _mm_cvtsh_u64(__m128h __A) { return (unsigned long long)__builtin_ia32_vcvtsh2usi64( (__v8hf)__A, _MM_FROUND_CUR_DIRECTION); } #endif // __x86_64__ #define _mm_cvt_roundu32_sh(A, B, R) \ ((__m128h)__builtin_ia32_vcvtusi2sh((__v8hf)(A), (unsigned int)(B), (int)(R))) static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtu32_sh(__m128h __A, unsigned int __B) { __A[0] = __B; return __A; } #ifdef __x86_64__ #define _mm_cvt_roundu64_sh(A, B, R) \ ((__m128h)__builtin_ia32_vcvtusi642sh((__v8hf)(A), (unsigned long long)(B), \ (int)(R))) static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtu64_sh(__m128h __A, unsigned long long __B) { __A[0] = __B; return __A; } #endif #define _mm_cvt_roundi32_sh(A, B, R) \ ((__m128h)__builtin_ia32_vcvtsi2sh((__v8hf)(A), (int)(B), (int)(R))) static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvti32_sh(__m128h __A, int __B) { __A[0] = __B; return __A; } #ifdef __x86_64__ #define _mm_cvt_roundi64_sh(A, B, R) \ ((__m128h)__builtin_ia32_vcvtsi642sh((__v8hf)(A), (long long)(B), (int)(R))) static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvti64_sh(__m128h __A, long long __B) { __A[0] = __B; return __A; } #endif #define _mm_cvtt_roundsh_i32(A, R) \ ((int)__builtin_ia32_vcvttsh2si32((__v8hf)(A), (int)(R))) static __inline__ int __DEFAULT_FN_ATTRS128 _mm_cvttsh_i32(__m128h __A) { return (int)__builtin_ia32_vcvttsh2si32((__v8hf)__A, _MM_FROUND_CUR_DIRECTION); } #ifdef __x86_64__ #define _mm_cvtt_roundsh_i64(A, R) \ ((long long)__builtin_ia32_vcvttsh2si64((__v8hf)(A), (int)(R))) static __inline__ long long __DEFAULT_FN_ATTRS128 _mm_cvttsh_i64(__m128h __A) { return (long long)__builtin_ia32_vcvttsh2si64((__v8hf)__A, _MM_FROUND_CUR_DIRECTION); } #endif #define _mm_cvtt_roundsh_u32(A, R) \ ((unsigned int)__builtin_ia32_vcvttsh2usi32((__v8hf)(A), (int)(R))) static __inline__ unsigned int __DEFAULT_FN_ATTRS128 _mm_cvttsh_u32(__m128h __A) { return (unsigned int)__builtin_ia32_vcvttsh2usi32((__v8hf)__A, _MM_FROUND_CUR_DIRECTION); } #ifdef __x86_64__ #define _mm_cvtt_roundsh_u64(A, R) \ ((unsigned long long)__builtin_ia32_vcvttsh2usi64((__v8hf)(A), (int)(R))) static __inline__ unsigned long long __DEFAULT_FN_ATTRS128 _mm_cvttsh_u64(__m128h __A) { return (unsigned long long)__builtin_ia32_vcvttsh2usi64( (__v8hf)__A, _MM_FROUND_CUR_DIRECTION); } #endif #define _mm512_cvtx_roundph_ps(A, R) \ ((__m512)__builtin_ia32_vcvtph2psx512_mask((__v16hf)(A), \ (__v16sf)_mm512_undefined_ps(), \ (__mmask16)(-1), (int)(R))) #define _mm512_mask_cvtx_roundph_ps(W, U, A, R) \ ((__m512)__builtin_ia32_vcvtph2psx512_mask((__v16hf)(A), (__v16sf)(W), \ (__mmask16)(U), (int)(R))) #define _mm512_maskz_cvtx_roundph_ps(U, A, R) \ ((__m512)__builtin_ia32_vcvtph2psx512_mask( \ (__v16hf)(A), (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), (int)(R))) static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtxph_ps(__m256h __A) { return (__m512)__builtin_ia32_vcvtph2psx512_mask( (__v16hf)__A, (__v16sf)_mm512_setzero_ps(), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtxph_ps(__m512 __W, __mmask16 __U, __m256h __A) { return (__m512)__builtin_ia32_vcvtph2psx512_mask( (__v16hf)__A, (__v16sf)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtxph_ps(__mmask16 __U, __m256h __A) { return (__m512)__builtin_ia32_vcvtph2psx512_mask( (__v16hf)__A, (__v16sf)_mm512_setzero_ps(), (__mmask16)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm512_cvtx_roundps_ph(A, R) \ ((__m256h)__builtin_ia32_vcvtps2phx512_mask((__v16sf)(A), \ (__v16hf)_mm256_undefined_ph(), \ (__mmask16)(-1), (int)(R))) #define _mm512_mask_cvtx_roundps_ph(W, U, A, R) \ ((__m256h)__builtin_ia32_vcvtps2phx512_mask((__v16sf)(A), (__v16hf)(W), \ (__mmask16)(U), (int)(R))) #define _mm512_maskz_cvtx_roundps_ph(U, A, R) \ ((__m256h)__builtin_ia32_vcvtps2phx512_mask( \ (__v16sf)(A), (__v16hf)_mm256_setzero_ph(), (__mmask16)(U), (int)(R))) static __inline__ __m256h __DEFAULT_FN_ATTRS512 _mm512_cvtxps_ph(__m512 __A) { return (__m256h)__builtin_ia32_vcvtps2phx512_mask( (__v16sf)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m256h __DEFAULT_FN_ATTRS512 _mm512_mask_cvtxps_ph(__m256h __W, __mmask16 __U, __m512 __A) { return (__m256h)__builtin_ia32_vcvtps2phx512_mask( (__v16sf)__A, (__v16hf)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m256h __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtxps_ph(__mmask16 __U, __m512 __A) { return (__m256h)__builtin_ia32_vcvtps2phx512_mask( (__v16sf)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm512_fmadd_round_ph(A, B, C, R) \ ((__m512h)__builtin_ia32_vfmaddph512_mask( \ (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \ (__mmask32)-1, (int)(R))) #define _mm512_mask_fmadd_round_ph(A, U, B, C, R) \ ((__m512h)__builtin_ia32_vfmaddph512_mask( \ (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \ (__mmask32)(U), (int)(R))) #define _mm512_mask3_fmadd_round_ph(A, B, C, U, R) \ ((__m512h)__builtin_ia32_vfmaddph512_mask3( \ (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \ (__mmask32)(U), (int)(R))) #define _mm512_maskz_fmadd_round_ph(U, A, B, C, R) \ ((__m512h)__builtin_ia32_vfmaddph512_maskz( \ (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \ (__mmask32)(U), (int)(R))) #define _mm512_fmsub_round_ph(A, B, C, R) \ ((__m512h)__builtin_ia32_vfmaddph512_mask( \ (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), -(__v32hf)(__m512h)(C), \ (__mmask32)-1, (int)(R))) #define _mm512_mask_fmsub_round_ph(A, U, B, C, R) \ ((__m512h)__builtin_ia32_vfmaddph512_mask( \ (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), -(__v32hf)(__m512h)(C), \ (__mmask32)(U), (int)(R))) #define _mm512_maskz_fmsub_round_ph(U, A, B, C, R) \ ((__m512h)__builtin_ia32_vfmaddph512_maskz( \ (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), -(__v32hf)(__m512h)(C), \ (__mmask32)(U), (int)(R))) #define _mm512_fnmadd_round_ph(A, B, C, R) \ ((__m512h)__builtin_ia32_vfmaddph512_mask( \ (__v32hf)(__m512h)(A), -(__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \ (__mmask32)-1, (int)(R))) #define _mm512_mask3_fnmadd_round_ph(A, B, C, U, R) \ ((__m512h)__builtin_ia32_vfmaddph512_mask3( \ -(__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \ (__mmask32)(U), (int)(R))) #define _mm512_maskz_fnmadd_round_ph(U, A, B, C, R) \ ((__m512h)__builtin_ia32_vfmaddph512_maskz( \ -(__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \ (__mmask32)(U), (int)(R))) #define _mm512_fnmsub_round_ph(A, B, C, R) \ ((__m512h)__builtin_ia32_vfmaddph512_mask( \ (__v32hf)(__m512h)(A), -(__v32hf)(__m512h)(B), -(__v32hf)(__m512h)(C), \ (__mmask32)-1, (int)(R))) #define _mm512_maskz_fnmsub_round_ph(U, A, B, C, R) \ ((__m512h)__builtin_ia32_vfmaddph512_maskz( \ -(__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), -(__v32hf)(__m512h)(C), \ (__mmask32)(U), (int)(R))) static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_fmadd_ph(__m512h __A, __m512h __B, __m512h __C) { return (__m512h)__builtin_ia32_vfmaddph512_mask((__v32hf)__A, (__v32hf)__B, (__v32hf)__C, (__mmask32)-1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mask_fmadd_ph(__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) { return (__m512h)__builtin_ia32_vfmaddph512_mask((__v32hf)__A, (__v32hf)__B, (__v32hf)__C, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mask3_fmadd_ph(__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) { return (__m512h)__builtin_ia32_vfmaddph512_mask3((__v32hf)__A, (__v32hf)__B, (__v32hf)__C, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_maskz_fmadd_ph(__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) { return (__m512h)__builtin_ia32_vfmaddph512_maskz((__v32hf)__A, (__v32hf)__B, (__v32hf)__C, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_fmsub_ph(__m512h __A, __m512h __B, __m512h __C) { return (__m512h)__builtin_ia32_vfmaddph512_mask((__v32hf)__A, (__v32hf)__B, -(__v32hf)__C, (__mmask32)-1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mask_fmsub_ph(__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) { return (__m512h)__builtin_ia32_vfmaddph512_mask((__v32hf)__A, (__v32hf)__B, -(__v32hf)__C, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsub_ph(__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) { return (__m512h)__builtin_ia32_vfmaddph512_maskz( (__v32hf)__A, (__v32hf)__B, -(__v32hf)__C, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_fnmadd_ph(__m512h __A, __m512h __B, __m512h __C) { return (__m512h)__builtin_ia32_vfmaddph512_mask((__v32hf)__A, -(__v32hf)__B, (__v32hf)__C, (__mmask32)-1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmadd_ph(__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) { return (__m512h)__builtin_ia32_vfmaddph512_mask3(-(__v32hf)__A, (__v32hf)__B, (__v32hf)__C, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmadd_ph(__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) { return (__m512h)__builtin_ia32_vfmaddph512_maskz(-(__v32hf)__A, (__v32hf)__B, (__v32hf)__C, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_fnmsub_ph(__m512h __A, __m512h __B, __m512h __C) { return (__m512h)__builtin_ia32_vfmaddph512_mask((__v32hf)__A, -(__v32hf)__B, -(__v32hf)__C, (__mmask32)-1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmsub_ph(__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) { return (__m512h)__builtin_ia32_vfmaddph512_maskz( -(__v32hf)__A, (__v32hf)__B, -(__v32hf)__C, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm512_fmaddsub_round_ph(A, B, C, R) \ ((__m512h)__builtin_ia32_vfmaddsubph512_mask( \ (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \ (__mmask32)-1, (int)(R))) #define _mm512_mask_fmaddsub_round_ph(A, U, B, C, R) \ ((__m512h)__builtin_ia32_vfmaddsubph512_mask( \ (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \ (__mmask32)(U), (int)(R))) #define _mm512_mask3_fmaddsub_round_ph(A, B, C, U, R) \ ((__m512h)__builtin_ia32_vfmaddsubph512_mask3( \ (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \ (__mmask32)(U), (int)(R))) #define _mm512_maskz_fmaddsub_round_ph(U, A, B, C, R) \ ((__m512h)__builtin_ia32_vfmaddsubph512_maskz( \ (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \ (__mmask32)(U), (int)(R))) #define _mm512_fmsubadd_round_ph(A, B, C, R) \ ((__m512h)__builtin_ia32_vfmaddsubph512_mask( \ (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), -(__v32hf)(__m512h)(C), \ (__mmask32)-1, (int)(R))) #define _mm512_mask_fmsubadd_round_ph(A, U, B, C, R) \ ((__m512h)__builtin_ia32_vfmaddsubph512_mask( \ (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), -(__v32hf)(__m512h)(C), \ (__mmask32)(U), (int)(R))) #define _mm512_maskz_fmsubadd_round_ph(U, A, B, C, R) \ ((__m512h)__builtin_ia32_vfmaddsubph512_maskz( \ (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), -(__v32hf)(__m512h)(C), \ (__mmask32)(U), (int)(R))) static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_fmaddsub_ph(__m512h __A, __m512h __B, __m512h __C) { return (__m512h)__builtin_ia32_vfmaddsubph512_mask( (__v32hf)__A, (__v32hf)__B, (__v32hf)__C, (__mmask32)-1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mask_fmaddsub_ph(__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) { return (__m512h)__builtin_ia32_vfmaddsubph512_mask( (__v32hf)__A, (__v32hf)__B, (__v32hf)__C, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mask3_fmaddsub_ph(__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) { return (__m512h)__builtin_ia32_vfmaddsubph512_mask3( (__v32hf)__A, (__v32hf)__B, (__v32hf)__C, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_maskz_fmaddsub_ph(__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) { return (__m512h)__builtin_ia32_vfmaddsubph512_maskz( (__v32hf)__A, (__v32hf)__B, (__v32hf)__C, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_fmsubadd_ph(__m512h __A, __m512h __B, __m512h __C) { return (__m512h)__builtin_ia32_vfmaddsubph512_mask( (__v32hf)__A, (__v32hf)__B, -(__v32hf)__C, (__mmask32)-1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mask_fmsubadd_ph(__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) { return (__m512h)__builtin_ia32_vfmaddsubph512_mask( (__v32hf)__A, (__v32hf)__B, -(__v32hf)__C, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsubadd_ph(__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) { return (__m512h)__builtin_ia32_vfmaddsubph512_maskz( (__v32hf)__A, (__v32hf)__B, -(__v32hf)__C, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm512_mask3_fmsub_round_ph(A, B, C, U, R) \ ((__m512h)__builtin_ia32_vfmsubph512_mask3( \ (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \ (__mmask32)(U), (int)(R))) static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsub_ph(__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) { return (__m512h)__builtin_ia32_vfmsubph512_mask3((__v32hf)__A, (__v32hf)__B, (__v32hf)__C, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm512_mask3_fmsubadd_round_ph(A, B, C, U, R) \ ((__m512h)__builtin_ia32_vfmsubaddph512_mask3( \ (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \ (__mmask32)(U), (int)(R))) static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsubadd_ph(__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) { return (__m512h)__builtin_ia32_vfmsubaddph512_mask3( (__v32hf)__A, (__v32hf)__B, (__v32hf)__C, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm512_mask_fnmadd_round_ph(A, U, B, C, R) \ ((__m512h)__builtin_ia32_vfmaddph512_mask( \ (__v32hf)(__m512h)(A), -(__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \ (__mmask32)(U), (int)(R))) static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mask_fnmadd_ph(__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) { return (__m512h)__builtin_ia32_vfmaddph512_mask((__v32hf)__A, -(__v32hf)__B, (__v32hf)__C, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm512_mask_fnmsub_round_ph(A, U, B, C, R) \ ((__m512h)__builtin_ia32_vfmaddph512_mask( \ (__v32hf)(__m512h)(A), -(__v32hf)(__m512h)(B), -(__v32hf)(__m512h)(C), \ (__mmask32)(U), (int)(R))) #define _mm512_mask3_fnmsub_round_ph(A, B, C, U, R) \ ((__m512h)__builtin_ia32_vfmsubph512_mask3( \ -(__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \ (__mmask32)(U), (int)(R))) static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mask_fnmsub_ph(__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) { return (__m512h)__builtin_ia32_vfmaddph512_mask((__v32hf)__A, -(__v32hf)__B, -(__v32hf)__C, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmsub_ph(__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) { return (__m512h)__builtin_ia32_vfmsubph512_mask3(-(__v32hf)__A, (__v32hf)__B, (__v32hf)__C, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmadd_sh(__m128h __W, __m128h __A, __m128h __B) { return __builtin_ia32_vfmaddsh3_mask((__v8hf)__W, (__v8hf)__A, (__v8hf)__B, (__mmask8)-1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) { return __builtin_ia32_vfmaddsh3_mask((__v8hf)__W, (__v8hf)__A, (__v8hf)__B, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm_fmadd_round_sh(A, B, C, R) \ ((__m128h)__builtin_ia32_vfmaddsh3_mask( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(C), \ (__mmask8)-1, (int)(R))) #define _mm_mask_fmadd_round_sh(W, U, A, B, R) \ ((__m128h)__builtin_ia32_vfmaddsh3_mask( \ (__v8hf)(__m128h)(W), (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), \ (__mmask8)(U), (int)(R))) static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_sh(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) { return __builtin_ia32_vfmaddsh3_maskz((__v8hf)__A, (__v8hf)__B, (__v8hf)__C, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm_maskz_fmadd_round_sh(U, A, B, C, R) \ ((__m128h)__builtin_ia32_vfmaddsh3_maskz( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(C), \ (__mmask8)(U), (int)(R))) static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_sh(__m128h __W, __m128h __X, __m128h __Y, __mmask8 __U) { return __builtin_ia32_vfmaddsh3_mask3((__v8hf)__W, (__v8hf)__X, (__v8hf)__Y, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm_mask3_fmadd_round_sh(W, X, Y, U, R) \ ((__m128h)__builtin_ia32_vfmaddsh3_mask3( \ (__v8hf)(__m128h)(W), (__v8hf)(__m128h)(X), (__v8hf)(__m128h)(Y), \ (__mmask8)(U), (int)(R))) static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmsub_sh(__m128h __W, __m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_vfmaddsh3_mask((__v8hf)__W, (__v8hf)__A, -(__v8hf)__B, (__mmask8)-1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_vfmaddsh3_mask((__v8hf)__W, (__v8hf)__A, -(__v8hf)__B, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm_fmsub_round_sh(A, B, C, R) \ ((__m128h)__builtin_ia32_vfmaddsh3_mask( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), -(__v8hf)(__m128h)(C), \ (__mmask8)-1, (int)(R))) #define _mm_mask_fmsub_round_sh(W, U, A, B, R) \ ((__m128h)__builtin_ia32_vfmaddsh3_mask( \ (__v8hf)(__m128h)(W), (__v8hf)(__m128h)(A), -(__v8hf)(__m128h)(B), \ (__mmask8)(U), (int)(R))) static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_sh(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) { return (__m128h)__builtin_ia32_vfmaddsh3_maskz((__v8hf)__A, (__v8hf)__B, -(__v8hf)__C, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm_maskz_fmsub_round_sh(U, A, B, C, R) \ ((__m128h)__builtin_ia32_vfmaddsh3_maskz( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), -(__v8hf)(__m128h)(C), \ (__mmask8)(U), (int)R)) static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_sh(__m128h __W, __m128h __X, __m128h __Y, __mmask8 __U) { return __builtin_ia32_vfmsubsh3_mask3((__v8hf)__W, (__v8hf)__X, (__v8hf)__Y, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm_mask3_fmsub_round_sh(W, X, Y, U, R) \ ((__m128h)__builtin_ia32_vfmsubsh3_mask3( \ (__v8hf)(__m128h)(W), (__v8hf)(__m128h)(X), (__v8hf)(__m128h)(Y), \ (__mmask8)(U), (int)(R))) static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fnmadd_sh(__m128h __W, __m128h __A, __m128h __B) { return __builtin_ia32_vfmaddsh3_mask((__v8hf)__W, -(__v8hf)__A, (__v8hf)__B, (__mmask8)-1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) { return __builtin_ia32_vfmaddsh3_mask((__v8hf)__W, -(__v8hf)__A, (__v8hf)__B, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm_fnmadd_round_sh(A, B, C, R) \ ((__m128h)__builtin_ia32_vfmaddsh3_mask( \ (__v8hf)(__m128h)(A), -(__v8hf)(__m128h)(B), (__v8hf)(__m128h)(C), \ (__mmask8)-1, (int)(R))) #define _mm_mask_fnmadd_round_sh(W, U, A, B, R) \ ((__m128h)__builtin_ia32_vfmaddsh3_mask( \ (__v8hf)(__m128h)(W), -(__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), \ (__mmask8)(U), (int)(R))) static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_sh(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) { return __builtin_ia32_vfmaddsh3_maskz((__v8hf)__A, -(__v8hf)__B, (__v8hf)__C, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm_maskz_fnmadd_round_sh(U, A, B, C, R) \ ((__m128h)__builtin_ia32_vfmaddsh3_maskz( \ (__v8hf)(__m128h)(A), -(__v8hf)(__m128h)(B), (__v8hf)(__m128h)(C), \ (__mmask8)(U), (int)(R))) static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_sh(__m128h __W, __m128h __X, __m128h __Y, __mmask8 __U) { return __builtin_ia32_vfmaddsh3_mask3((__v8hf)__W, -(__v8hf)__X, (__v8hf)__Y, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm_mask3_fnmadd_round_sh(W, X, Y, U, R) \ ((__m128h)__builtin_ia32_vfmaddsh3_mask3( \ (__v8hf)(__m128h)(W), -(__v8hf)(__m128h)(X), (__v8hf)(__m128h)(Y), \ (__mmask8)(U), (int)(R))) static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fnmsub_sh(__m128h __W, __m128h __A, __m128h __B) { return __builtin_ia32_vfmaddsh3_mask((__v8hf)__W, -(__v8hf)__A, -(__v8hf)__B, (__mmask8)-1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) { return __builtin_ia32_vfmaddsh3_mask((__v8hf)__W, -(__v8hf)__A, -(__v8hf)__B, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm_fnmsub_round_sh(A, B, C, R) \ ((__m128h)__builtin_ia32_vfmaddsh3_mask( \ (__v8hf)(__m128h)(A), -(__v8hf)(__m128h)(B), -(__v8hf)(__m128h)(C), \ (__mmask8)-1, (int)(R))) #define _mm_mask_fnmsub_round_sh(W, U, A, B, R) \ ((__m128h)__builtin_ia32_vfmaddsh3_mask( \ (__v8hf)(__m128h)(W), -(__v8hf)(__m128h)(A), -(__v8hf)(__m128h)(B), \ (__mmask8)(U), (int)(R))) static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_sh(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) { return __builtin_ia32_vfmaddsh3_maskz((__v8hf)__A, -(__v8hf)__B, -(__v8hf)__C, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm_maskz_fnmsub_round_sh(U, A, B, C, R) \ ((__m128h)__builtin_ia32_vfmaddsh3_maskz( \ (__v8hf)(__m128h)(A), -(__v8hf)(__m128h)(B), -(__v8hf)(__m128h)(C), \ (__mmask8)(U), (int)(R))) static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_sh(__m128h __W, __m128h __X, __m128h __Y, __mmask8 __U) { return __builtin_ia32_vfmsubsh3_mask3((__v8hf)__W, -(__v8hf)__X, (__v8hf)__Y, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm_mask3_fnmsub_round_sh(W, X, Y, U, R) \ ((__m128h)__builtin_ia32_vfmsubsh3_mask3( \ (__v8hf)(__m128h)(W), -(__v8hf)(__m128h)(X), (__v8hf)(__m128h)(Y), \ (__mmask8)(U), (int)(R))) static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fcmadd_sch(__m128h __A, __m128h __B, __m128h __C) { return (__m128h)__builtin_ia32_vfcmaddcsh_mask((__v4sf)__A, (__v4sf)__B, (__v4sf)__C, (__mmask8)-1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fcmadd_sch(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) { return (__m128h)__builtin_ia32_vfcmaddcsh_round_mask( (__v4sf)__A, (__v4sf)(__B), (__v4sf)(__C), __U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_fcmadd_sch(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) { return (__m128h)__builtin_ia32_vfcmaddcsh_maskz((__v4sf)__A, (__v4sf)__B, (__v4sf)__C, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask3_fcmadd_sch(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) { return (__m128h)__builtin_ia32_vfcmaddcsh_round_mask3( (__v4sf)__A, (__v4sf)__B, (__v4sf)__C, __U, _MM_FROUND_CUR_DIRECTION); } #define _mm_fcmadd_round_sch(A, B, C, R) \ ((__m128h)__builtin_ia32_vfcmaddcsh_mask( \ (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), (__v4sf)(__m128h)(C), \ (__mmask8)-1, (int)(R))) #define _mm_mask_fcmadd_round_sch(A, U, B, C, R) \ ((__m128h)__builtin_ia32_vfcmaddcsh_round_mask( \ (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), (__v4sf)(__m128h)(C), \ (__mmask8)(U), (int)(R))) #define _mm_maskz_fcmadd_round_sch(U, A, B, C, R) \ ((__m128h)__builtin_ia32_vfcmaddcsh_maskz( \ (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), (__v4sf)(__m128h)(C), \ (__mmask8)(U), (int)(R))) #define _mm_mask3_fcmadd_round_sch(A, B, C, U, R) \ ((__m128h)__builtin_ia32_vfcmaddcsh_round_mask3( \ (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), (__v4sf)(__m128h)(C), \ (__mmask8)(U), (int)(R))) static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmadd_sch(__m128h __A, __m128h __B, __m128h __C) { return (__m128h)__builtin_ia32_vfmaddcsh_mask((__v4sf)__A, (__v4sf)__B, (__v4sf)__C, (__mmask8)-1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_sch(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) { return (__m128h)__builtin_ia32_vfmaddcsh_round_mask( (__v4sf)__A, (__v4sf)(__B), (__v4sf)(__C), __U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_sch(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) { return (__m128h)__builtin_ia32_vfmaddcsh_maskz((__v4sf)__A, (__v4sf)__B, (__v4sf)__C, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_sch(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) { return (__m128h)__builtin_ia32_vfmaddcsh_round_mask3( (__v4sf)__A, (__v4sf)__B, (__v4sf)__C, __U, _MM_FROUND_CUR_DIRECTION); } #define _mm_fmadd_round_sch(A, B, C, R) \ ((__m128h)__builtin_ia32_vfmaddcsh_mask( \ (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), (__v4sf)(__m128h)(C), \ (__mmask8)-1, (int)(R))) #define _mm_mask_fmadd_round_sch(A, U, B, C, R) \ ((__m128h)__builtin_ia32_vfmaddcsh_round_mask( \ (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), (__v4sf)(__m128h)(C), \ (__mmask8)(U), (int)(R))) #define _mm_maskz_fmadd_round_sch(U, A, B, C, R) \ ((__m128h)__builtin_ia32_vfmaddcsh_maskz( \ (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), (__v4sf)(__m128h)(C), \ (__mmask8)(U), (int)(R))) #define _mm_mask3_fmadd_round_sch(A, B, C, U, R) \ ((__m128h)__builtin_ia32_vfmaddcsh_round_mask3( \ (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), (__v4sf)(__m128h)(C), \ (__mmask8)(U), (int)(R))) static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fcmul_sch(__m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_vfcmulcsh_mask( (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_undefined_ph(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fcmul_sch(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_vfcmulcsh_mask((__v4sf)__A, (__v4sf)__B, (__v4sf)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_fcmul_sch(__mmask8 __U, __m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_vfcmulcsh_mask( (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_setzero_ph(), (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm_fcmul_round_sch(A, B, R) \ ((__m128h)__builtin_ia32_vfcmulcsh_mask( \ (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), \ (__v4sf)(__m128h)_mm_undefined_ph(), (__mmask8)-1, (int)(R))) #define _mm_mask_fcmul_round_sch(W, U, A, B, R) \ ((__m128h)__builtin_ia32_vfcmulcsh_mask( \ (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), (__v4sf)(__m128h)(W), \ (__mmask8)(U), (int)(R))) #define _mm_maskz_fcmul_round_sch(U, A, B, R) \ ((__m128h)__builtin_ia32_vfcmulcsh_mask( \ (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), \ (__v4sf)(__m128h)_mm_setzero_ph(), (__mmask8)(U), (int)(R))) static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmul_sch(__m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_vfmulcsh_mask( (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_undefined_ph(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmul_sch(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_vfmulcsh_mask((__v4sf)__A, (__v4sf)__B, (__v4sf)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_fmul_sch(__mmask8 __U, __m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_vfmulcsh_mask( (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_setzero_ph(), (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm_fmul_round_sch(A, B, R) \ ((__m128h)__builtin_ia32_vfmulcsh_mask( \ (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), \ (__v4sf)(__m128h)_mm_undefined_ph(), (__mmask8)-1, (int)(R))) #define _mm_mask_fmul_round_sch(W, U, A, B, R) \ ((__m128h)__builtin_ia32_vfmulcsh_mask( \ (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), (__v4sf)(__m128h)(W), \ (__mmask8)(U), (int)(R))) #define _mm_maskz_fmul_round_sch(U, A, B, R) \ ((__m128h)__builtin_ia32_vfmulcsh_mask( \ (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), \ (__v4sf)(__m128h)_mm_setzero_ph(), (__mmask8)(U), (int)(R))) static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_fcmul_pch(__m512h __A, __m512h __B) { return (__m512h)__builtin_ia32_vfcmulcph512_mask( (__v16sf)__A, (__v16sf)__B, (__v16sf)_mm512_undefined_ph(), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mask_fcmul_pch(__m512h __W, __mmask16 __U, __m512h __A, __m512h __B) { return (__m512h)__builtin_ia32_vfcmulcph512_mask((__v16sf)__A, (__v16sf)__B, (__v16sf)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_maskz_fcmul_pch(__mmask16 __U, __m512h __A, __m512h __B) { return (__m512h)__builtin_ia32_vfcmulcph512_mask( (__v16sf)__A, (__v16sf)__B, (__v16sf)_mm512_setzero_ph(), (__mmask16)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm512_fcmul_round_pch(A, B, R) \ ((__m512h)__builtin_ia32_vfcmulcph512_mask( \ (__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), \ (__v16sf)(__m512h)_mm512_undefined_ph(), (__mmask16)-1, (int)(R))) #define _mm512_mask_fcmul_round_pch(W, U, A, B, R) \ ((__m512h)__builtin_ia32_vfcmulcph512_mask( \ (__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), (__v16sf)(__m512h)(W), \ (__mmask16)(U), (int)(R))) #define _mm512_maskz_fcmul_round_pch(U, A, B, R) \ ((__m512h)__builtin_ia32_vfcmulcph512_mask( \ (__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), \ (__v16sf)(__m512h)_mm512_setzero_ph(), (__mmask16)(U), (int)(R))) static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_fmul_pch(__m512h __A, __m512h __B) { return (__m512h)__builtin_ia32_vfmulcph512_mask( (__v16sf)__A, (__v16sf)__B, (__v16sf)_mm512_undefined_ph(), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mask_fmul_pch(__m512h __W, __mmask16 __U, __m512h __A, __m512h __B) { return (__m512h)__builtin_ia32_vfmulcph512_mask((__v16sf)__A, (__v16sf)__B, (__v16sf)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_maskz_fmul_pch(__mmask16 __U, __m512h __A, __m512h __B) { return (__m512h)__builtin_ia32_vfmulcph512_mask( (__v16sf)__A, (__v16sf)__B, (__v16sf)_mm512_setzero_ph(), (__mmask16)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm512_fmul_round_pch(A, B, R) \ ((__m512h)__builtin_ia32_vfmulcph512_mask( \ (__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), \ (__v16sf)(__m512h)_mm512_undefined_ph(), (__mmask16)-1, (int)(R))) #define _mm512_mask_fmul_round_pch(W, U, A, B, R) \ ((__m512h)__builtin_ia32_vfmulcph512_mask( \ (__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), (__v16sf)(__m512h)(W), \ (__mmask16)(U), (int)(R))) #define _mm512_maskz_fmul_round_pch(U, A, B, R) \ ((__m512h)__builtin_ia32_vfmulcph512_mask( \ (__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), \ (__v16sf)(__m512h)_mm512_setzero_ph(), (__mmask16)(U), (int)(R))) static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_fcmadd_pch(__m512h __A, __m512h __B, __m512h __C) { return (__m512h)__builtin_ia32_vfcmaddcph512_mask3( (__v16sf)__A, (__v16sf)__B, (__v16sf)__C, (__mmask16)-1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mask_fcmadd_pch(__m512h __A, __mmask16 __U, __m512h __B, __m512h __C) { return (__m512h)__builtin_ia32_vfcmaddcph512_mask( (__v16sf)__A, (__v16sf)__B, (__v16sf)__C, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mask3_fcmadd_pch(__m512h __A, __m512h __B, __m512h __C, __mmask16 __U) { return (__m512h)__builtin_ia32_vfcmaddcph512_mask3( (__v16sf)__A, (__v16sf)__B, (__v16sf)__C, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_maskz_fcmadd_pch(__mmask16 __U, __m512h __A, __m512h __B, __m512h __C) { return (__m512h)__builtin_ia32_vfcmaddcph512_maskz( (__v16sf)__A, (__v16sf)__B, (__v16sf)__C, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm512_fcmadd_round_pch(A, B, C, R) \ ((__m512h)__builtin_ia32_vfcmaddcph512_mask3( \ (__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), (__v16sf)(__m512h)(C), \ (__mmask16)-1, (int)(R))) #define _mm512_mask_fcmadd_round_pch(A, U, B, C, R) \ ((__m512h)__builtin_ia32_vfcmaddcph512_mask( \ (__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), (__v16sf)(__m512h)(C), \ (__mmask16)(U), (int)(R))) #define _mm512_mask3_fcmadd_round_pch(A, B, C, U, R) \ ((__m512h)__builtin_ia32_vfcmaddcph512_mask3( \ (__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), (__v16sf)(__m512h)(C), \ (__mmask16)(U), (int)(R))) #define _mm512_maskz_fcmadd_round_pch(U, A, B, C, R) \ ((__m512h)__builtin_ia32_vfcmaddcph512_maskz( \ (__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), (__v16sf)(__m512h)(C), \ (__mmask16)(U), (int)(R))) static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_fmadd_pch(__m512h __A, __m512h __B, __m512h __C) { return (__m512h)__builtin_ia32_vfmaddcph512_mask3((__v16sf)__A, (__v16sf)__B, (__v16sf)__C, (__mmask16)-1, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mask_fmadd_pch(__m512h __A, __mmask16 __U, __m512h __B, __m512h __C) { return (__m512h)__builtin_ia32_vfmaddcph512_mask((__v16sf)__A, (__v16sf)__B, (__v16sf)__C, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mask3_fmadd_pch(__m512h __A, __m512h __B, __m512h __C, __mmask16 __U) { return (__m512h)__builtin_ia32_vfmaddcph512_mask3( (__v16sf)__A, (__v16sf)__B, (__v16sf)__C, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_maskz_fmadd_pch(__mmask16 __U, __m512h __A, __m512h __B, __m512h __C) { return (__m512h)__builtin_ia32_vfmaddcph512_maskz( (__v16sf)__A, (__v16sf)__B, (__v16sf)__C, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm512_fmadd_round_pch(A, B, C, R) \ ((__m512h)__builtin_ia32_vfmaddcph512_mask3( \ (__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), (__v16sf)(__m512h)(C), \ (__mmask16)-1, (int)(R))) #define _mm512_mask_fmadd_round_pch(A, U, B, C, R) \ ((__m512h)__builtin_ia32_vfmaddcph512_mask( \ (__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), (__v16sf)(__m512h)(C), \ (__mmask16)(U), (int)(R))) #define _mm512_mask3_fmadd_round_pch(A, B, C, U, R) \ ((__m512h)__builtin_ia32_vfmaddcph512_mask3( \ (__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), (__v16sf)(__m512h)(C), \ (__mmask16)(U), (int)(R))) #define _mm512_maskz_fmadd_round_pch(U, A, B, C, R) \ ((__m512h)__builtin_ia32_vfmaddcph512_maskz( \ (__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), (__v16sf)(__m512h)(C), \ (__mmask16)(U), (int)(R))) static __inline__ _Float16 __DEFAULT_FN_ATTRS512 _mm512_reduce_add_ph(__m512h __W) { return __builtin_ia32_reduce_fadd_ph512(-0.0f16, __W); } static __inline__ _Float16 __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_ph(__m512h __W) { return __builtin_ia32_reduce_fmul_ph512(1.0f16, __W); } static __inline__ _Float16 __DEFAULT_FN_ATTRS512 _mm512_reduce_max_ph(__m512h __V) { return __builtin_ia32_reduce_fmax_ph512(__V); } static __inline__ _Float16 __DEFAULT_FN_ATTRS512 _mm512_reduce_min_ph(__m512h __V) { return __builtin_ia32_reduce_fmin_ph512(__V); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mask_blend_ph(__mmask32 __U, __m512h __A, __m512h __W) { return (__m512h)__builtin_ia32_selectph_512((__mmask32)__U, (__v32hf)__W, (__v32hf)__A); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_permutex2var_ph(__m512h __A, __m512i __I, __m512h __B) { return (__m512h)__builtin_ia32_vpermi2varhi512((__v32hi)__A, (__v32hi)__I, (__v32hi)__B); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_permutexvar_ph(__m512i __A, __m512h __B) { return (__m512h)__builtin_ia32_permvarhi512((__v32hi)__B, (__v32hi)__A); } // intrinsics below are alias for f*mul_*ch #define _mm512_mul_pch(A, B) _mm512_fmul_pch(A, B) #define _mm512_mask_mul_pch(W, U, A, B) _mm512_mask_fmul_pch(W, U, A, B) #define _mm512_maskz_mul_pch(U, A, B) _mm512_maskz_fmul_pch(U, A, B) #define _mm512_mul_round_pch(A, B, R) _mm512_fmul_round_pch(A, B, R) #define _mm512_mask_mul_round_pch(W, U, A, B, R) \ _mm512_mask_fmul_round_pch(W, U, A, B, R) #define _mm512_maskz_mul_round_pch(U, A, B, R) \ _mm512_maskz_fmul_round_pch(U, A, B, R) #define _mm512_cmul_pch(A, B) _mm512_fcmul_pch(A, B) #define _mm512_mask_cmul_pch(W, U, A, B) _mm512_mask_fcmul_pch(W, U, A, B) #define _mm512_maskz_cmul_pch(U, A, B) _mm512_maskz_fcmul_pch(U, A, B) #define _mm512_cmul_round_pch(A, B, R) _mm512_fcmul_round_pch(A, B, R) #define _mm512_mask_cmul_round_pch(W, U, A, B, R) \ _mm512_mask_fcmul_round_pch(W, U, A, B, R) #define _mm512_maskz_cmul_round_pch(U, A, B, R) \ _mm512_maskz_fcmul_round_pch(U, A, B, R) #define _mm_mul_sch(A, B) _mm_fmul_sch(A, B) #define _mm_mask_mul_sch(W, U, A, B) _mm_mask_fmul_sch(W, U, A, B) #define _mm_maskz_mul_sch(U, A, B) _mm_maskz_fmul_sch(U, A, B) #define _mm_mul_round_sch(A, B, R) _mm_fmul_round_sch(A, B, R) #define _mm_mask_mul_round_sch(W, U, A, B, R) \ _mm_mask_fmul_round_sch(W, U, A, B, R) #define _mm_maskz_mul_round_sch(U, A, B, R) _mm_maskz_fmul_round_sch(U, A, B, R) #define _mm_cmul_sch(A, B) _mm_fcmul_sch(A, B) #define _mm_mask_cmul_sch(W, U, A, B) _mm_mask_fcmul_sch(W, U, A, B) #define _mm_maskz_cmul_sch(U, A, B) _mm_maskz_fcmul_sch(U, A, B) #define _mm_cmul_round_sch(A, B, R) _mm_fcmul_round_sch(A, B, R) #define _mm_mask_cmul_round_sch(W, U, A, B, R) \ _mm_mask_fcmul_round_sch(W, U, A, B, R) #define _mm_maskz_cmul_round_sch(U, A, B, R) \ _mm_maskz_fcmul_round_sch(U, A, B, R) #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 #undef __DEFAULT_FN_ATTRS512 #endif #endif /*===---- avx512vlbitalgintrin.h - BITALG intrinsics -----------------------=== * * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __AVX512VLBITALGINTRIN_H #define __AVX512VLBITALGINTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS128 \ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512vl,avx512bitalg,no-evex512"), \ __min_vector_width__(128))) #define __DEFAULT_FN_ATTRS256 \ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512vl,avx512bitalg,no-evex512"), \ __min_vector_width__(256))) static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_popcnt_epi16(__m256i __A) { return (__m256i) __builtin_ia32_vpopcntw_256((__v16hi) __A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_popcnt_epi16(__m256i __A, __mmask16 __U, __m256i __B) { return (__m256i) __builtin_ia32_selectw_256((__mmask16) __U, (__v16hi) _mm256_popcnt_epi16(__B), (__v16hi) __A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_popcnt_epi16(__mmask16 __U, __m256i __B) { return _mm256_mask_popcnt_epi16((__m256i) _mm256_setzero_si256(), __U, __B); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_popcnt_epi16(__m128i __A) { return (__m128i) __builtin_ia32_vpopcntw_128((__v8hi) __A); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_popcnt_epi16(__m128i __A, __mmask8 __U, __m128i __B) { return (__m128i) __builtin_ia32_selectw_128((__mmask8) __U, (__v8hi) _mm_popcnt_epi16(__B), (__v8hi) __A); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_popcnt_epi16(__mmask8 __U, __m128i __B) { return _mm_mask_popcnt_epi16((__m128i) _mm_setzero_si128(), __U, __B); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_popcnt_epi8(__m256i __A) { return (__m256i) __builtin_ia32_vpopcntb_256((__v32qi) __A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_popcnt_epi8(__m256i __A, __mmask32 __U, __m256i __B) { return (__m256i) __builtin_ia32_selectb_256((__mmask32) __U, (__v32qi) _mm256_popcnt_epi8(__B), (__v32qi) __A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_popcnt_epi8(__mmask32 __U, __m256i __B) { return _mm256_mask_popcnt_epi8((__m256i) _mm256_setzero_si256(), __U, __B); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_popcnt_epi8(__m128i __A) { return (__m128i) __builtin_ia32_vpopcntb_128((__v16qi) __A); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_popcnt_epi8(__m128i __A, __mmask16 __U, __m128i __B) { return (__m128i) __builtin_ia32_selectb_128((__mmask16) __U, (__v16qi) _mm_popcnt_epi8(__B), (__v16qi) __A); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_popcnt_epi8(__mmask16 __U, __m128i __B) { return _mm_mask_popcnt_epi8((__m128i) _mm_setzero_si128(), __U, __B); } static __inline__ __mmask32 __DEFAULT_FN_ATTRS256 _mm256_mask_bitshuffle_epi64_mask(__mmask32 __U, __m256i __A, __m256i __B) { return (__mmask32) __builtin_ia32_vpshufbitqmb256_mask((__v32qi) __A, (__v32qi) __B, __U); } static __inline__ __mmask32 __DEFAULT_FN_ATTRS256 _mm256_bitshuffle_epi64_mask(__m256i __A, __m256i __B) { return _mm256_mask_bitshuffle_epi64_mask((__mmask32) -1, __A, __B); } static __inline__ __mmask16 __DEFAULT_FN_ATTRS128 _mm_mask_bitshuffle_epi64_mask(__mmask16 __U, __m128i __A, __m128i __B) { return (__mmask16) __builtin_ia32_vpshufbitqmb128_mask((__v16qi) __A, (__v16qi) __B, __U); } static __inline__ __mmask16 __DEFAULT_FN_ATTRS128 _mm_bitshuffle_epi64_mask(__m128i __A, __m128i __B) { return _mm_mask_bitshuffle_epi64_mask((__mmask16) -1, __A, __B); } #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 #endif /*===------------- avx512vnniintrin.h - VNNI intrinsics ------------------=== * * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __AVX512VNNIINTRIN_H #define __AVX512VNNIINTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512vnni,evex512"), __min_vector_width__(512))) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpbusd_epi32(__m512i __S, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_vpdpbusd512((__v16si)__S, (__v16si)__A, (__v16si)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpbusd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512(__U, (__v16si)_mm512_dpbusd_epi32(__S, __A, __B), (__v16si)__S); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpbusd_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512(__U, (__v16si)_mm512_dpbusd_epi32(__S, __A, __B), (__v16si)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpbusds_epi32(__m512i __S, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_vpdpbusds512((__v16si)__S, (__v16si)__A, (__v16si)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpbusds_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512(__U, (__v16si)_mm512_dpbusds_epi32(__S, __A, __B), (__v16si)__S); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpbusds_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512(__U, (__v16si)_mm512_dpbusds_epi32(__S, __A, __B), (__v16si)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpwssd_epi32(__m512i __S, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_vpdpwssd512((__v16si)__S, (__v16si)__A, (__v16si)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpwssd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512(__U, (__v16si)_mm512_dpwssd_epi32(__S, __A, __B), (__v16si)__S); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwssd_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512(__U, (__v16si)_mm512_dpwssd_epi32(__S, __A, __B), (__v16si)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpwssds_epi32(__m512i __S, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_vpdpwssds512((__v16si)__S, (__v16si)__A, (__v16si)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpwssds_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512(__U, (__v16si)_mm512_dpwssds_epi32(__S, __A, __B), (__v16si)__S); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwssds_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512(__U, (__v16si)_mm512_dpwssds_epi32(__S, __A, __B), (__v16si)_mm512_setzero_si512()); } #undef __DEFAULT_FN_ATTRS #endif cmpccxaddintrin.hlzcntintrin.h/* * include/omp-tools.h.var */ //===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef __OMPT__ #define __OMPT__ /***************************************************************************** * system include files *****************************************************************************/ #include #include #ifdef DEPRECATION_WARNINGS # ifdef __cplusplus # define DEPRECATED_51 [[deprecated("as of 5.1")]] # else # define DEPRECATED_51 __attribute__((deprecated("as of 5.1"))) #endif #else #define DEPRECATED_51 #endif /***************************************************************************** * iteration macros *****************************************************************************/ #define FOREACH_OMPT_INQUIRY_FN(macro) \ macro (ompt_enumerate_states) \ macro (ompt_enumerate_mutex_impls) \ \ macro (ompt_set_callback) \ macro (ompt_get_callback) \ \ macro (ompt_get_state) \ \ macro (ompt_get_parallel_info) \ macro (ompt_get_task_info) \ macro (ompt_get_task_memory) \ macro (ompt_get_thread_data) \ macro (ompt_get_unique_id) \ macro (ompt_finalize_tool) \ \ macro(ompt_get_num_procs) \ macro(ompt_get_num_places) \ macro(ompt_get_place_proc_ids) \ macro(ompt_get_place_num) \ macro(ompt_get_partition_place_nums) \ macro(ompt_get_proc_id) \ \ macro(ompt_get_target_info) \ macro(ompt_get_num_devices) #define FOREACH_OMPT_STATE(macro) \ \ /* first available state */ \ macro (ompt_state_undefined, 0x102) /* undefined thread state */ \ \ /* work states (0..15) */ \ macro (ompt_state_work_serial, 0x000) /* working outside parallel */ \ macro (ompt_state_work_parallel, 0x001) /* working within parallel */ \ macro (ompt_state_work_reduction, 0x002) /* performing a reduction */ \ \ /* barrier wait states (16..31) */ \ macro (ompt_state_wait_barrier, 0x010) /* waiting at a barrier */ \ macro (ompt_state_wait_barrier_implicit_parallel, 0x011) \ /* implicit barrier at the end of parallel region */\ macro (ompt_state_wait_barrier_implicit_workshare, 0x012) \ /* implicit barrier at the end of worksharing */ \ macro (ompt_state_wait_barrier_implicit, 0x013) /* implicit barrier */ \ macro (ompt_state_wait_barrier_explicit, 0x014) /* explicit barrier */ \ \ /* task wait states (32..63) */ \ macro (ompt_state_wait_taskwait, 0x020) /* waiting at a taskwait */ \ macro (ompt_state_wait_taskgroup, 0x021) /* waiting at a taskgroup */ \ \ /* mutex wait states (64..127) */ \ macro (ompt_state_wait_mutex, 0x040) \ macro (ompt_state_wait_lock, 0x041) /* waiting for lock */ \ macro (ompt_state_wait_critical, 0x042) /* waiting for critical */ \ macro (ompt_state_wait_atomic, 0x043) /* waiting for atomic */ \ macro (ompt_state_wait_ordered, 0x044) /* waiting for ordered */ \ \ /* target wait states (128..255) */ \ macro (ompt_state_wait_target, 0x080) /* waiting for target region */ \ macro (ompt_state_wait_target_map, 0x081) /* waiting for target data mapping operation */ \ macro (ompt_state_wait_target_update, 0x082) /* waiting for target update operation */ \ \ /* misc (256..511) */ \ macro (ompt_state_idle, 0x100) /* waiting for work */ \ macro (ompt_state_overhead, 0x101) /* overhead excluding wait states */ \ \ /* implementation-specific states (512..) */ #define FOREACH_KMP_MUTEX_IMPL(macro) \ macro (kmp_mutex_impl_none, 0) /* unknown implementation */ \ macro (kmp_mutex_impl_spin, 1) /* based on spin */ \ macro (kmp_mutex_impl_queuing, 2) /* based on some fair policy */ \ macro (kmp_mutex_impl_speculative, 3) /* based on HW-supported speculation */ #define FOREACH_OMPT_HOST_EVENT(macro) \ \ /*--- Mandatory Events ---*/ \ macro (ompt_callback_thread_begin, ompt_callback_thread_begin_t, 1) /* thread begin */ \ macro (ompt_callback_thread_end, ompt_callback_thread_end_t, 2) /* thread end */ \ \ macro (ompt_callback_parallel_begin, ompt_callback_parallel_begin_t, 3) /* parallel begin */ \ macro (ompt_callback_parallel_end, ompt_callback_parallel_end_t, 4) /* parallel end */ \ \ macro (ompt_callback_task_create, ompt_callback_task_create_t, 5) /* task begin */ \ macro (ompt_callback_task_schedule, ompt_callback_task_schedule_t, 6) /* task schedule */ \ macro (ompt_callback_implicit_task, ompt_callback_implicit_task_t, 7) /* implicit task */ \ \ macro (ompt_callback_control_tool, ompt_callback_control_tool_t, 11) /* control tool */ \ \ /* Optional Events */ \ macro (ompt_callback_sync_region_wait, ompt_callback_sync_region_t, 16) /* sync region wait begin or end */ \ \ macro (ompt_callback_mutex_released, ompt_callback_mutex_t, 17) /* mutex released */ \ \ macro (ompt_callback_dependences, ompt_callback_dependences_t, 18) /* report task dependences */ \ macro (ompt_callback_task_dependence, ompt_callback_task_dependence_t, 19) /* report task dependence */ \ \ macro (ompt_callback_work, ompt_callback_work_t, 20) /* task at work begin or end */ \ \ macro (ompt_callback_masked, ompt_callback_masked_t, 21) /* task at masked begin or end */ \ \ macro (ompt_callback_sync_region, ompt_callback_sync_region_t, 23) /* sync region begin or end */ \ \ macro (ompt_callback_lock_init, ompt_callback_mutex_acquire_t, 24) /* lock init */ \ macro (ompt_callback_lock_destroy, ompt_callback_mutex_t, 25) /* lock destroy */ \ \ macro (ompt_callback_mutex_acquire, ompt_callback_mutex_acquire_t, 26) /* mutex acquire */ \ macro (ompt_callback_mutex_acquired, ompt_callback_mutex_t, 27) /* mutex acquired */ \ \ macro (ompt_callback_nest_lock, ompt_callback_nest_lock_t, 28) /* nest lock */ \ \ macro (ompt_callback_flush, ompt_callback_flush_t, 29) /* after executing flush */ \ \ macro (ompt_callback_cancel, ompt_callback_cancel_t, 30) /* cancel innermost binding region */ \ \ macro (ompt_callback_reduction, ompt_callback_sync_region_t, 31) /* reduction */ \ \ macro (ompt_callback_dispatch, ompt_callback_dispatch_t, 32) /* dispatch of work */ \ macro (ompt_callback_error, ompt_callback_error_t, 37) /* error */ #define FOREACH_OMPT_DEVICE_EVENT(macro) \ /*--- Mandatory Events ---*/ \ macro (ompt_callback_device_initialize, ompt_callback_device_initialize_t, 12) /* device initialize */ \ macro (ompt_callback_device_finalize, ompt_callback_device_finalize_t, 13) /* device finalize */ \ \ macro (ompt_callback_device_load, ompt_callback_device_load_t, 14) /* device load */ \ macro (ompt_callback_device_unload, ompt_callback_device_unload_t, 15) /* device unload */ #define FOREACH_OMPT_NOEMI_EVENT(macro) \ /*--- Mandatory Events ---*/ \ macro (ompt_callback_target, ompt_callback_target_t, 8) /* target */ \ macro (ompt_callback_target_data_op, ompt_callback_target_data_op_t, 9) /* target data op */ \ macro (ompt_callback_target_submit, ompt_callback_target_submit_t, 10) /* target submit */ \ /* Optional Events */ \ macro (ompt_callback_target_map, ompt_callback_target_map_t, 22) /* target map */ #define FOREACH_OMPT_EMI_EVENT(macro) \ /*--- Mandatory Events ---*/ \ macro (ompt_callback_target_emi, ompt_callback_target_emi_t, 33) /* target */ \ macro (ompt_callback_target_data_op_emi,ompt_callback_target_data_op_emi_t,34) /* target data op */ \ macro (ompt_callback_target_submit_emi, ompt_callback_target_submit_emi_t, 35) /* target submit */ \ /* Optional Events */ \ macro (ompt_callback_target_map_emi, ompt_callback_target_map_emi_t, 36) /* target map */ #define FOREACH_OMPT_50_TARGET_EVENT(macro) \ FOREACH_OMPT_DEVICE_EVENT(macro) \ FOREACH_OMPT_NOEMI_EVENT(macro) #define FOREACH_OMPT_51_TARGET_EVENT(macro) \ FOREACH_OMPT_DEVICE_EVENT(macro) \ FOREACH_OMPT_EMI_EVENT(macro) #define FOREACH_OMPT_EVENT(macro) \ FOREACH_OMPT_HOST_EVENT(macro) \ FOREACH_OMPT_DEVICE_EVENT(macro) \ FOREACH_OMPT_NOEMI_EVENT(macro) \ FOREACH_OMPT_EMI_EVENT(macro) #define FOREACH_OMPT_51_EVENT(macro) \ FOREACH_OMPT_HOST_EVENT(macro) \ FOREACH_OMPT_DEVICE_EVENT(macro) \ FOREACH_OMPT_EMI_EVENT(macro) /***************************************************************************** * implementation specific types *****************************************************************************/ typedef enum kmp_mutex_impl_t { #define kmp_mutex_impl_macro(impl, code) impl = code, FOREACH_KMP_MUTEX_IMPL(kmp_mutex_impl_macro) #undef kmp_mutex_impl_macro } kmp_mutex_impl_t; /***************************************************************************** * definitions generated from spec *****************************************************************************/ #if defined(__cplusplus) extern "C" { #endif typedef enum ompt_callbacks_t { ompt_callback_thread_begin = 1, ompt_callback_thread_end = 2, ompt_callback_parallel_begin = 3, ompt_callback_parallel_end = 4, ompt_callback_task_create = 5, ompt_callback_task_schedule = 6, ompt_callback_implicit_task = 7, ompt_callback_target = 8, ompt_callback_target_data_op = 9, ompt_callback_target_submit = 10, ompt_callback_control_tool = 11, ompt_callback_device_initialize = 12, ompt_callback_device_finalize = 13, ompt_callback_device_load = 14, ompt_callback_device_unload = 15, ompt_callback_sync_region_wait = 16, ompt_callback_mutex_released = 17, ompt_callback_dependences = 18, ompt_callback_task_dependence = 19, ompt_callback_work = 20, ompt_callback_master DEPRECATED_51 = 21, ompt_callback_masked = 21, ompt_callback_target_map = 22, ompt_callback_sync_region = 23, ompt_callback_lock_init = 24, ompt_callback_lock_destroy = 25, ompt_callback_mutex_acquire = 26, ompt_callback_mutex_acquired = 27, ompt_callback_nest_lock = 28, ompt_callback_flush = 29, ompt_callback_cancel = 30, ompt_callback_reduction = 31, ompt_callback_dispatch = 32, ompt_callback_target_emi = 33, ompt_callback_target_data_op_emi = 34, ompt_callback_target_submit_emi = 35, ompt_callback_target_map_emi = 36, ompt_callback_error = 37 } ompt_callbacks_t; typedef enum ompt_record_t { ompt_record_ompt = 1, ompt_record_native = 2, ompt_record_invalid = 3 } ompt_record_t; typedef enum ompt_record_native_t { ompt_record_native_info = 1, ompt_record_native_event = 2 } ompt_record_native_t; typedef enum ompt_set_result_t { ompt_set_error = 0, ompt_set_never = 1, ompt_set_impossible = 2, ompt_set_sometimes = 3, ompt_set_sometimes_paired = 4, ompt_set_always = 5 } ompt_set_result_t; typedef uint64_t ompt_id_t; typedef uint64_t ompt_device_time_t; typedef uint64_t ompt_buffer_cursor_t; typedef enum ompt_thread_t { ompt_thread_initial = 1, ompt_thread_worker = 2, ompt_thread_other = 3, ompt_thread_unknown = 4 } ompt_thread_t; typedef enum ompt_scope_endpoint_t { ompt_scope_begin = 1, ompt_scope_end = 2, ompt_scope_beginend = 3 } ompt_scope_endpoint_t; typedef enum ompt_dispatch_t { ompt_dispatch_iteration = 1, ompt_dispatch_section = 2, ompt_dispatch_ws_loop_chunk = 3, ompt_dispatch_taskloop_chunk = 4, ompt_dispatch_distribute_chunk = 5 } ompt_dispatch_t; typedef enum ompt_sync_region_t { ompt_sync_region_barrier DEPRECATED_51 = 1, ompt_sync_region_barrier_implicit DEPRECATED_51 = 2, ompt_sync_region_barrier_explicit = 3, ompt_sync_region_barrier_implementation = 4, ompt_sync_region_taskwait = 5, ompt_sync_region_taskgroup = 6, ompt_sync_region_reduction = 7, ompt_sync_region_barrier_implicit_workshare = 8, ompt_sync_region_barrier_implicit_parallel = 9, ompt_sync_region_barrier_teams = 10 } ompt_sync_region_t; typedef enum ompt_target_data_op_t { ompt_target_data_alloc = 1, ompt_target_data_transfer_to_device = 2, ompt_target_data_transfer_from_device = 3, ompt_target_data_delete = 4, ompt_target_data_associate = 5, ompt_target_data_disassociate = 6, ompt_target_data_alloc_async = 17, ompt_target_data_transfer_to_device_async = 18, ompt_target_data_transfer_from_device_async = 19, ompt_target_data_delete_async = 20 } ompt_target_data_op_t; typedef enum ompt_work_t { ompt_work_loop = 1, ompt_work_sections = 2, ompt_work_single_executor = 3, ompt_work_single_other = 4, ompt_work_workshare = 5, ompt_work_distribute = 6, ompt_work_taskloop = 7, ompt_work_scope = 8, ompt_work_loop_static = 10, ompt_work_loop_dynamic = 11, ompt_work_loop_guided = 12, ompt_work_loop_other = 13 } ompt_work_t; typedef enum ompt_mutex_t { ompt_mutex_lock = 1, ompt_mutex_test_lock = 2, ompt_mutex_nest_lock = 3, ompt_mutex_test_nest_lock = 4, ompt_mutex_critical = 5, ompt_mutex_atomic = 6, ompt_mutex_ordered = 7 } ompt_mutex_t; typedef enum ompt_native_mon_flag_t { ompt_native_data_motion_explicit = 0x01, ompt_native_data_motion_implicit = 0x02, ompt_native_kernel_invocation = 0x04, ompt_native_kernel_execution = 0x08, ompt_native_driver = 0x10, ompt_native_runtime = 0x20, ompt_native_overhead = 0x40, ompt_native_idleness = 0x80 } ompt_native_mon_flag_t; typedef enum ompt_task_flag_t { ompt_task_initial = 0x00000001, ompt_task_implicit = 0x00000002, ompt_task_explicit = 0x00000004, ompt_task_target = 0x00000008, ompt_task_taskwait = 0x00000010, ompt_task_undeferred = 0x08000000, ompt_task_untied = 0x10000000, ompt_task_final = 0x20000000, ompt_task_mergeable = 0x40000000, ompt_task_merged = 0x80000000 } ompt_task_flag_t; typedef enum ompt_task_status_t { ompt_task_complete = 1, ompt_task_yield = 2, ompt_task_cancel = 3, ompt_task_detach = 4, ompt_task_early_fulfill = 5, ompt_task_late_fulfill = 6, ompt_task_switch = 7, ompt_taskwait_complete = 8 } ompt_task_status_t; typedef enum ompt_target_t { ompt_target = 1, ompt_target_enter_data = 2, ompt_target_exit_data = 3, ompt_target_update = 4, ompt_target_nowait = 9, ompt_target_enter_data_nowait = 10, ompt_target_exit_data_nowait = 11, ompt_target_update_nowait = 12 } ompt_target_t; typedef enum ompt_parallel_flag_t { ompt_parallel_invoker_program = 0x00000001, ompt_parallel_invoker_runtime = 0x00000002, ompt_parallel_league = 0x40000000, ompt_parallel_team = 0x80000000 } ompt_parallel_flag_t; typedef enum ompt_target_map_flag_t { ompt_target_map_flag_to = 0x01, ompt_target_map_flag_from = 0x02, ompt_target_map_flag_alloc = 0x04, ompt_target_map_flag_release = 0x08, ompt_target_map_flag_delete = 0x10, ompt_target_map_flag_implicit = 0x20 } ompt_target_map_flag_t; typedef enum ompt_dependence_type_t { ompt_dependence_type_in = 1, ompt_dependence_type_out = 2, ompt_dependence_type_inout = 3, ompt_dependence_type_mutexinoutset = 4, ompt_dependence_type_source = 5, ompt_dependence_type_sink = 6, ompt_dependence_type_inoutset = 7, ompt_dependence_type_out_all_memory = 34, ompt_dependence_type_inout_all_memory = 35 } ompt_dependence_type_t; typedef enum ompt_severity_t { ompt_warning = 1, ompt_fatal = 2 } ompt_severity_t; typedef enum ompt_cancel_flag_t { ompt_cancel_parallel = 0x01, ompt_cancel_sections = 0x02, ompt_cancel_loop = 0x04, ompt_cancel_taskgroup = 0x08, ompt_cancel_activated = 0x10, ompt_cancel_detected = 0x20, ompt_cancel_discarded_task = 0x40 } ompt_cancel_flag_t; typedef uint64_t ompt_hwid_t; typedef uint64_t ompt_wait_id_t; typedef enum ompt_frame_flag_t { ompt_frame_runtime = 0x00, ompt_frame_application = 0x01, ompt_frame_cfa = 0x10, ompt_frame_framepointer = 0x20, ompt_frame_stackaddress = 0x30 } ompt_frame_flag_t; typedef enum ompt_state_t { ompt_state_work_serial = 0x000, ompt_state_work_parallel = 0x001, ompt_state_work_reduction = 0x002, ompt_state_wait_barrier DEPRECATED_51 = 0x010, ompt_state_wait_barrier_implicit_parallel = 0x011, ompt_state_wait_barrier_implicit_workshare = 0x012, ompt_state_wait_barrier_implicit DEPRECATED_51 = 0x013, ompt_state_wait_barrier_explicit = 0x014, ompt_state_wait_barrier_implementation = 0x015, ompt_state_wait_barrier_teams = 0x016, ompt_state_wait_taskwait = 0x020, ompt_state_wait_taskgroup = 0x021, ompt_state_wait_mutex = 0x040, ompt_state_wait_lock = 0x041, ompt_state_wait_critical = 0x042, ompt_state_wait_atomic = 0x043, ompt_state_wait_ordered = 0x044, ompt_state_wait_target = 0x080, ompt_state_wait_target_map = 0x081, ompt_state_wait_target_update = 0x082, ompt_state_idle = 0x100, ompt_state_overhead = 0x101, ompt_state_undefined = 0x102 } ompt_state_t; typedef uint64_t (*ompt_get_unique_id_t) (void); typedef uint64_t ompd_size_t; typedef uint64_t ompd_wait_id_t; typedef uint64_t ompd_addr_t; typedef int64_t ompd_word_t; typedef uint64_t ompd_seg_t; typedef uint64_t ompd_device_t; typedef uint64_t ompd_thread_id_t; typedef enum ompd_scope_t { ompd_scope_global = 1, ompd_scope_address_space = 2, ompd_scope_thread = 3, ompd_scope_parallel = 4, ompd_scope_implicit_task = 5, ompd_scope_task = 6 } ompd_scope_t; typedef uint64_t ompd_icv_id_t; typedef enum ompd_rc_t { ompd_rc_ok = 0, ompd_rc_unavailable = 1, ompd_rc_stale_handle = 2, ompd_rc_bad_input = 3, ompd_rc_error = 4, ompd_rc_unsupported = 5, ompd_rc_needs_state_tracking = 6, ompd_rc_incompatible = 7, ompd_rc_device_read_error = 8, ompd_rc_device_write_error = 9, ompd_rc_nomem = 10, ompd_rc_incomplete = 11, ompd_rc_callback_error = 12 } ompd_rc_t; typedef void (*ompt_interface_fn_t) (void); typedef ompt_interface_fn_t (*ompt_function_lookup_t) ( const char *interface_function_name ); typedef union ompt_data_t { uint64_t value; void *ptr; } ompt_data_t; typedef struct ompt_frame_t { ompt_data_t exit_frame; ompt_data_t enter_frame; int exit_frame_flags; int enter_frame_flags; } ompt_frame_t; typedef void (*ompt_callback_t) (void); typedef void ompt_device_t; typedef void ompt_buffer_t; typedef void (*ompt_callback_buffer_request_t) ( int device_num, ompt_buffer_t **buffer, size_t *bytes ); typedef void (*ompt_callback_buffer_complete_t) ( int device_num, ompt_buffer_t *buffer, size_t bytes, ompt_buffer_cursor_t begin, int buffer_owned ); typedef void (*ompt_finalize_t) ( ompt_data_t *tool_data ); typedef int (*ompt_initialize_t) ( ompt_function_lookup_t lookup, int initial_device_num, ompt_data_t *tool_data ); typedef struct ompt_start_tool_result_t { ompt_initialize_t initialize; ompt_finalize_t finalize; ompt_data_t tool_data; } ompt_start_tool_result_t; typedef struct ompt_record_abstract_t { ompt_record_native_t rclass; const char *type; ompt_device_time_t start_time; ompt_device_time_t end_time; ompt_hwid_t hwid; } ompt_record_abstract_t; typedef struct ompt_dependence_t { ompt_data_t variable; ompt_dependence_type_t dependence_type; } ompt_dependence_t; typedef struct ompt_dispatch_chunk_t { uint64_t start; uint64_t iterations; } ompt_dispatch_chunk_t; typedef int (*ompt_enumerate_states_t) ( int current_state, int *next_state, const char **next_state_name ); typedef int (*ompt_enumerate_mutex_impls_t) ( int current_impl, int *next_impl, const char **next_impl_name ); typedef ompt_set_result_t (*ompt_set_callback_t) ( ompt_callbacks_t event, ompt_callback_t callback ); typedef int (*ompt_get_callback_t) ( ompt_callbacks_t event, ompt_callback_t *callback ); typedef ompt_data_t *(*ompt_get_thread_data_t) (void); typedef int (*ompt_get_num_procs_t) (void); typedef int (*ompt_get_num_places_t) (void); typedef int (*ompt_get_place_proc_ids_t) ( int place_num, int ids_size, int *ids ); typedef int (*ompt_get_place_num_t) (void); typedef int (*ompt_get_partition_place_nums_t) ( int place_nums_size, int *place_nums ); typedef int (*ompt_get_proc_id_t) (void); typedef int (*ompt_get_state_t) ( ompt_wait_id_t *wait_id ); typedef int (*ompt_get_parallel_info_t) ( int ancestor_level, ompt_data_t **parallel_data, int *team_size ); typedef int (*ompt_get_task_info_t) ( int ancestor_level, int *flags, ompt_data_t **task_data, ompt_frame_t **task_frame, ompt_data_t **parallel_data, int *thread_num ); typedef int (*ompt_get_task_memory_t)( void **addr, size_t *size, int block ); typedef int (*ompt_get_target_info_t) ( uint64_t *device_num, ompt_id_t *target_id, ompt_id_t *host_op_id ); typedef int (*ompt_get_num_devices_t) (void); typedef void (*ompt_finalize_tool_t) (void); typedef int (*ompt_get_device_num_procs_t) ( ompt_device_t *device ); typedef ompt_device_time_t (*ompt_get_device_time_t) ( ompt_device_t *device ); typedef double (*ompt_translate_time_t) ( ompt_device_t *device, ompt_device_time_t time ); typedef ompt_set_result_t (*ompt_set_trace_ompt_t) ( ompt_device_t *device, unsigned int enable, unsigned int etype ); typedef ompt_set_result_t (*ompt_set_trace_native_t) ( ompt_device_t *device, int enable, int flags ); typedef int (*ompt_start_trace_t) ( ompt_device_t *device, ompt_callback_buffer_request_t request, ompt_callback_buffer_complete_t complete ); typedef int (*ompt_pause_trace_t) ( ompt_device_t *device, int begin_pause ); typedef int (*ompt_flush_trace_t) ( ompt_device_t *device ); typedef int (*ompt_stop_trace_t) ( ompt_device_t *device ); typedef int (*ompt_advance_buffer_cursor_t) ( ompt_device_t *device, ompt_buffer_t *buffer, size_t size, ompt_buffer_cursor_t current, ompt_buffer_cursor_t *next ); typedef ompt_record_t (*ompt_get_record_type_t) ( ompt_buffer_t *buffer, ompt_buffer_cursor_t current ); typedef void *(*ompt_get_record_native_t) ( ompt_buffer_t *buffer, ompt_buffer_cursor_t current, ompt_id_t *host_op_id ); typedef ompt_record_abstract_t * (*ompt_get_record_abstract_t) ( void *native_record ); typedef void (*ompt_callback_thread_begin_t) ( ompt_thread_t thread_type, ompt_data_t *thread_data ); typedef struct ompt_record_thread_begin_t { ompt_thread_t thread_type; } ompt_record_thread_begin_t; typedef void (*ompt_callback_thread_end_t) ( ompt_data_t *thread_data ); typedef void (*ompt_callback_parallel_begin_t) ( ompt_data_t *encountering_task_data, const ompt_frame_t *encountering_task_frame, ompt_data_t *parallel_data, unsigned int requested_parallelism, int flags, const void *codeptr_ra ); typedef struct ompt_record_parallel_begin_t { ompt_id_t encountering_task_id; ompt_id_t parallel_id; unsigned int requested_parallelism; int flags; const void *codeptr_ra; } ompt_record_parallel_begin_t; typedef void (*ompt_callback_parallel_end_t) ( ompt_data_t *parallel_data, ompt_data_t *encountering_task_data, int flags, const void *codeptr_ra ); typedef struct ompt_record_parallel_end_t { ompt_id_t parallel_id; ompt_id_t encountering_task_id; int flags; const void *codeptr_ra; } ompt_record_parallel_end_t; typedef void (*ompt_callback_work_t) ( ompt_work_t work_type, ompt_scope_endpoint_t endpoint, ompt_data_t *parallel_data, ompt_data_t *task_data, uint64_t count, const void *codeptr_ra ); typedef struct ompt_record_work_t { ompt_work_t work_type; ompt_scope_endpoint_t endpoint; ompt_id_t parallel_id; ompt_id_t task_id; uint64_t count; const void *codeptr_ra; } ompt_record_work_t; typedef void (*ompt_callback_dispatch_t) ( ompt_data_t *parallel_data, ompt_data_t *task_data, ompt_dispatch_t kind, ompt_data_t instance ); typedef struct ompt_record_dispatch_t { ompt_id_t parallel_id; ompt_id_t task_id; ompt_dispatch_t kind; ompt_data_t instance; } ompt_record_dispatch_t; typedef void (*ompt_callback_task_create_t) ( ompt_data_t *encountering_task_data, const ompt_frame_t *encountering_task_frame, ompt_data_t *new_task_data, int flags, int has_dependences, const void *codeptr_ra ); typedef struct ompt_record_task_create_t { ompt_id_t encountering_task_id; ompt_id_t new_task_id; int flags; int has_dependences; const void *codeptr_ra; } ompt_record_task_create_t; typedef void (*ompt_callback_dependences_t) ( ompt_data_t *task_data, const ompt_dependence_t *deps, int ndeps ); typedef struct ompt_record_dependences_t { ompt_id_t task_id; ompt_dependence_t dep; int ndeps; } ompt_record_dependences_t; typedef void (*ompt_callback_task_dependence_t) ( ompt_data_t *src_task_data, ompt_data_t *sink_task_data ); typedef struct ompt_record_task_dependence_t { ompt_id_t src_task_id; ompt_id_t sink_task_id; } ompt_record_task_dependence_t; typedef void (*ompt_callback_task_schedule_t) ( ompt_data_t *prior_task_data, ompt_task_status_t prior_task_status, ompt_data_t *next_task_data ); typedef struct ompt_record_task_schedule_t { ompt_id_t prior_task_id; ompt_task_status_t prior_task_status; ompt_id_t next_task_id; } ompt_record_task_schedule_t; typedef void (*ompt_callback_implicit_task_t) ( ompt_scope_endpoint_t endpoint, ompt_data_t *parallel_data, ompt_data_t *task_data, unsigned int actual_parallelism, unsigned int index, int flags ); typedef struct ompt_record_implicit_task_t { ompt_scope_endpoint_t endpoint; ompt_id_t parallel_id; ompt_id_t task_id; unsigned int actual_parallelism; unsigned int index; int flags; } ompt_record_implicit_task_t; typedef void (*ompt_callback_masked_t) ( ompt_scope_endpoint_t endpoint, ompt_data_t *parallel_data, ompt_data_t *task_data, const void *codeptr_ra ); typedef ompt_callback_masked_t ompt_callback_master_t DEPRECATED_51; typedef struct ompt_record_masked_t { ompt_scope_endpoint_t endpoint; ompt_id_t parallel_id; ompt_id_t task_id; const void *codeptr_ra; } ompt_record_masked_t; typedef void (*ompt_callback_sync_region_t) ( ompt_sync_region_t kind, ompt_scope_endpoint_t endpoint, ompt_data_t *parallel_data, ompt_data_t *task_data, const void *codeptr_ra ); typedef struct ompt_record_sync_region_t { ompt_sync_region_t kind; ompt_scope_endpoint_t endpoint; ompt_id_t parallel_id; ompt_id_t task_id; const void *codeptr_ra; } ompt_record_sync_region_t; typedef void (*ompt_callback_mutex_acquire_t) ( ompt_mutex_t kind, unsigned int hint, unsigned int impl, ompt_wait_id_t wait_id, const void *codeptr_ra ); typedef struct ompt_record_mutex_acquire_t { ompt_mutex_t kind; unsigned int hint; unsigned int impl; ompt_wait_id_t wait_id; const void *codeptr_ra; } ompt_record_mutex_acquire_t; typedef void (*ompt_callback_mutex_t) ( ompt_mutex_t kind, ompt_wait_id_t wait_id, const void *codeptr_ra ); typedef struct ompt_record_mutex_t { ompt_mutex_t kind; ompt_wait_id_t wait_id; const void *codeptr_ra; } ompt_record_mutex_t; typedef void (*ompt_callback_nest_lock_t) ( ompt_scope_endpoint_t endpoint, ompt_wait_id_t wait_id, const void *codeptr_ra ); typedef struct ompt_record_nest_lock_t { ompt_scope_endpoint_t endpoint; ompt_wait_id_t wait_id; const void *codeptr_ra; } ompt_record_nest_lock_t; typedef void (*ompt_callback_flush_t) ( ompt_data_t *thread_data, const void *codeptr_ra ); typedef struct ompt_record_flush_t { const void *codeptr_ra; } ompt_record_flush_t; typedef void (*ompt_callback_cancel_t) ( ompt_data_t *task_data, int flags, const void *codeptr_ra ); typedef struct ompt_record_cancel_t { ompt_id_t task_id; int flags; const void *codeptr_ra; } ompt_record_cancel_t; typedef void (*ompt_callback_device_initialize_t) ( int device_num, const char *type, ompt_device_t *device, ompt_function_lookup_t lookup, const char *documentation ); typedef void (*ompt_callback_device_finalize_t) ( int device_num ); typedef void (*ompt_callback_device_load_t) ( int device_num, const char *filename, int64_t offset_in_file, void *vma_in_file, size_t bytes, void *host_addr, void *device_addr, uint64_t module_id ); typedef void (*ompt_callback_device_unload_t) ( int device_num, uint64_t module_id ); typedef void (*ompt_callback_target_data_op_emi_t) ( ompt_scope_endpoint_t endpoint, ompt_data_t *target_task_data, ompt_data_t *target_data, ompt_id_t *host_op_id, ompt_target_data_op_t optype, void *src_addr, int src_device_num, void *dest_addr, int dest_device_num, size_t bytes, const void *codeptr_ra ); typedef void (*ompt_callback_target_data_op_t) ( ompt_id_t target_id, ompt_id_t host_op_id, ompt_target_data_op_t optype, void *src_addr, int src_device_num, void *dest_addr, int dest_device_num, size_t bytes, const void *codeptr_ra ); typedef struct ompt_record_target_data_op_t { ompt_id_t host_op_id; ompt_target_data_op_t optype; void *src_addr; int src_device_num; void *dest_addr; int dest_device_num; size_t bytes; ompt_device_time_t end_time; const void *codeptr_ra; } ompt_record_target_data_op_t; typedef void (*ompt_callback_target_emi_t) ( ompt_target_t kind, ompt_scope_endpoint_t endpoint, int device_num, ompt_data_t *task_data, ompt_data_t *target_task_data, ompt_data_t *target_data, const void *codeptr_ra ); typedef void (*ompt_callback_target_t) ( ompt_target_t kind, ompt_scope_endpoint_t endpoint, int device_num, ompt_data_t *task_data, ompt_id_t target_id, const void *codeptr_ra ); typedef struct ompt_record_target_t { ompt_target_t kind; ompt_scope_endpoint_t endpoint; int device_num; ompt_id_t task_id; ompt_id_t target_id; const void *codeptr_ra; } ompt_record_target_t; typedef void (*ompt_callback_target_map_emi_t) ( ompt_data_t *target_data, unsigned int nitems, void **host_addr, void **device_addr, size_t *bytes, unsigned int *mapping_flags, const void *codeptr_ra ); typedef void (*ompt_callback_target_map_t) ( ompt_id_t target_id, unsigned int nitems, void **host_addr, void **device_addr, size_t *bytes, unsigned int *mapping_flags, const void *codeptr_ra ); typedef struct ompt_record_target_map_t { ompt_id_t target_id; unsigned int nitems; void **host_addr; void **device_addr; size_t *bytes; unsigned int *mapping_flags; const void *codeptr_ra; } ompt_record_target_map_t; typedef void (*ompt_callback_target_submit_emi_t) ( ompt_scope_endpoint_t endpoint, ompt_data_t *target_data, ompt_id_t *host_op_id, unsigned int requested_num_teams ); typedef void (*ompt_callback_target_submit_t) ( ompt_id_t target_id, ompt_id_t host_op_id, unsigned int requested_num_teams ); typedef struct ompt_record_target_kernel_t { ompt_id_t host_op_id; unsigned int requested_num_teams; unsigned int granted_num_teams; ompt_device_time_t end_time; } ompt_record_target_kernel_t; typedef int (*ompt_callback_control_tool_t) ( uint64_t command, uint64_t modifier, void *arg, const void *codeptr_ra ); typedef struct ompt_record_control_tool_t { uint64_t command; uint64_t modifier; const void *codeptr_ra; } ompt_record_control_tool_t; typedef void (*ompt_callback_error_t) ( ompt_severity_t severity, const char *message, size_t length, const void *codeptr_ra ); typedef struct ompt_record_error_t { ompt_severity_t severity; const char *message; size_t length; const void *codeptr_ra; } ompt_record_error_t; typedef struct ompd_address_t { ompd_seg_t segment; ompd_addr_t address; } ompd_address_t; typedef struct ompd_frame_info_t { ompd_address_t frame_address; ompd_word_t frame_flag; } ompd_frame_info_t; typedef struct _ompd_aspace_handle ompd_address_space_handle_t; typedef struct _ompd_thread_handle ompd_thread_handle_t; typedef struct _ompd_parallel_handle ompd_parallel_handle_t; typedef struct _ompd_task_handle ompd_task_handle_t; typedef struct _ompd_aspace_cont ompd_address_space_context_t; typedef struct _ompd_thread_cont ompd_thread_context_t; typedef struct ompd_device_type_sizes_t { uint8_t sizeof_char; uint8_t sizeof_short; uint8_t sizeof_int; uint8_t sizeof_long; uint8_t sizeof_long_long; uint8_t sizeof_pointer; } ompd_device_type_sizes_t; void ompd_dll_locations_valid(void); typedef ompd_rc_t (*ompd_callback_memory_alloc_fn_t)(ompd_size_t nbytes, void **ptr); typedef ompd_rc_t (*ompd_callback_memory_free_fn_t)(void *ptr); typedef ompd_rc_t (*ompd_callback_get_thread_context_for_thread_id_fn_t)( ompd_address_space_context_t *address_space_context, ompd_thread_id_t kind, ompd_size_t sizeof_thread_id, const void *thread_id, ompd_thread_context_t **thread_context); typedef ompd_rc_t (*ompd_callback_sizeof_fn_t)( ompd_address_space_context_t *address_space_context, ompd_device_type_sizes_t *sizes); typedef ompd_rc_t (*ompd_callback_symbol_addr_fn_t)( ompd_address_space_context_t *address_space_context, ompd_thread_context_t *thread_context, const char *symbol_name, ompd_address_t *symbol_addr, const char *file_name); typedef ompd_rc_t (*ompd_callback_memory_read_fn_t)( ompd_address_space_context_t *address_space_context, ompd_thread_context_t *thread_context, const ompd_address_t *addr, ompd_size_t nbytes, void *buffer); typedef ompd_rc_t (*ompd_callback_memory_write_fn_t)( ompd_address_space_context_t *address_space_context, ompd_thread_context_t *thread_context, const ompd_address_t *addr, ompd_size_t nbytes, const void *buffer); typedef ompd_rc_t (*ompd_callback_device_host_fn_t)( ompd_address_space_context_t *address_space_context, const void *input, ompd_size_t unit_size, ompd_size_t count, void *output); typedef ompd_rc_t (*ompd_callback_print_string_fn_t)(const char *string, int category); typedef struct ompd_callbacks_t { ompd_callback_memory_alloc_fn_t alloc_memory; ompd_callback_memory_free_fn_t free_memory; ompd_callback_print_string_fn_t print_string; ompd_callback_sizeof_fn_t sizeof_type; ompd_callback_symbol_addr_fn_t symbol_addr_lookup; ompd_callback_memory_read_fn_t read_memory; ompd_callback_memory_write_fn_t write_memory; ompd_callback_memory_read_fn_t read_string; ompd_callback_device_host_fn_t device_to_host; ompd_callback_device_host_fn_t host_to_device; ompd_callback_get_thread_context_for_thread_id_fn_t get_thread_context_for_thread_id; } ompd_callbacks_t; void ompd_bp_parallel_begin(void); void ompd_bp_parallel_end(void); void ompd_bp_task_begin(void); void ompd_bp_task_end(void); void ompd_bp_thread_begin(void); void ompd_bp_thread_end(void); void ompd_bp_device_begin(void); void ompd_bp_device_end(void); ompd_rc_t ompd_initialize(ompd_word_t api_version, const ompd_callbacks_t *callbacks); ompd_rc_t ompd_get_api_version(ompd_word_t *version); ompd_rc_t ompd_get_version_string(const char **string); ompd_rc_t ompd_finalize(void); ompd_rc_t ompd_process_initialize(ompd_address_space_context_t *context, ompd_address_space_handle_t **handle); ompd_rc_t ompd_device_initialize(ompd_address_space_handle_t *process_handle, ompd_address_space_context_t *device_context, ompd_device_t kind, ompd_size_t sizeof_id, void *id, ompd_address_space_handle_t **device_handle); ompd_rc_t ompd_rel_address_space_handle(ompd_address_space_handle_t *handle); ompd_rc_t ompd_get_omp_version(ompd_address_space_handle_t *address_space, ompd_word_t *omp_version); ompd_rc_t ompd_get_omp_version_string(ompd_address_space_handle_t *address_space, const char **string); ompd_rc_t ompd_get_thread_in_parallel(ompd_parallel_handle_t *parallel_handle, int thread_num, ompd_thread_handle_t **thread_handle); ompd_rc_t ompd_get_thread_handle(ompd_address_space_handle_t *handle, ompd_thread_id_t kind, ompd_size_t sizeof_thread_id, const void *thread_id, ompd_thread_handle_t **thread_handle); ompd_rc_t ompd_rel_thread_handle(ompd_thread_handle_t *thread_handle); ompd_rc_t ompd_thread_handle_compare(ompd_thread_handle_t *thread_handle_1, ompd_thread_handle_t *thread_handle_2, int *cmp_value); ompd_rc_t ompd_get_thread_id(ompd_thread_handle_t *thread_handle, ompd_thread_id_t kind, ompd_size_t sizeof_thread_id, void *thread_id); ompd_rc_t ompd_get_curr_parallel_handle(ompd_thread_handle_t *thread_handle, ompd_parallel_handle_t **parallel_handle); ompd_rc_t ompd_get_enclosing_parallel_handle( ompd_parallel_handle_t *parallel_handle, ompd_parallel_handle_t **enclosing_parallel_handle); ompd_rc_t ompd_get_task_parallel_handle(ompd_task_handle_t *task_handle, ompd_parallel_handle_t **task_parallel_handle); ompd_rc_t ompd_rel_parallel_handle(ompd_parallel_handle_t *parallel_handle); ompd_rc_t ompd_parallel_handle_compare(ompd_parallel_handle_t *parallel_handle_1, ompd_parallel_handle_t *parallel_handle_2, int *cmp_value); ompd_rc_t ompd_get_curr_task_handle(ompd_thread_handle_t *thread_handle, ompd_task_handle_t **task_handle); ompd_rc_t ompd_get_generating_task_handle(ompd_task_handle_t *task_handle, ompd_task_handle_t **generating_task_handle); ompd_rc_t ompd_get_scheduling_task_handle(ompd_task_handle_t *task_handle, ompd_task_handle_t **scheduling_task_handle); ompd_rc_t ompd_get_task_in_parallel(ompd_parallel_handle_t *parallel_handle, int thread_num, ompd_task_handle_t **task_handle); ompd_rc_t ompd_rel_task_handle(ompd_task_handle_t *task_handle); ompd_rc_t ompd_task_handle_compare(ompd_task_handle_t *task_handle_1, ompd_task_handle_t *task_handle_2, int *cmp_value); ompd_rc_t ompd_get_task_function(ompd_task_handle_t *task_handle, ompd_address_t *entry_point); ompd_rc_t ompd_get_task_frame(ompd_task_handle_t *task_handle, ompd_frame_info_t *exit_frame, ompd_frame_info_t *enter_frame); ompd_rc_t ompd_enumerate_states(ompd_address_space_handle_t *address_space_handle, ompd_word_t current_state, ompd_word_t *next_state, const char **next_state_name, ompd_word_t *more_enums); ompd_rc_t ompd_get_state(ompd_thread_handle_t *thread_handle, ompd_word_t *state, ompd_wait_id_t *wait_id); ompd_rc_t ompd_get_display_control_vars(ompd_address_space_handle_t *address_space_handle, const char *const **control_vars); ompd_rc_t ompd_rel_display_control_vars(const char *const **control_vars); ompd_rc_t ompd_enumerate_icvs(ompd_address_space_handle_t *handle, ompd_icv_id_t current, ompd_icv_id_t *next_id, const char **next_icv_name, ompd_scope_t *next_scope, int *more); ompd_rc_t ompd_get_icv_from_scope(void *handle, ompd_scope_t scope, ompd_icv_id_t icv_id, ompd_word_t *icv_value); ompd_rc_t ompd_get_icv_string_from_scope(void *handle, ompd_scope_t scope, ompd_icv_id_t icv_id, const char **icv_string); ompd_rc_t ompd_get_tool_data(void *handle, ompd_scope_t scope, ompd_word_t *value, ompd_address_t *ptr); typedef struct ompt_record_ompt_t { ompt_callbacks_t type; ompt_device_time_t time; ompt_id_t thread_id; ompt_id_t target_id; union { ompt_record_thread_begin_t thread_begin; ompt_record_parallel_begin_t parallel_begin; ompt_record_parallel_end_t parallel_end; ompt_record_work_t work; ompt_record_dispatch_t dispatch; ompt_record_task_create_t task_create; ompt_record_dependences_t dependences; ompt_record_task_dependence_t task_dependence; ompt_record_task_schedule_t task_schedule; ompt_record_implicit_task_t implicit_task; ompt_record_masked_t masked; ompt_record_sync_region_t sync_region; ompt_record_mutex_acquire_t mutex_acquire; ompt_record_mutex_t mutex; ompt_record_nest_lock_t nest_lock; ompt_record_flush_t flush; ompt_record_cancel_t cancel; ompt_record_target_t target; ompt_record_target_data_op_t target_data_op; ompt_record_target_map_t target_map; ompt_record_target_kernel_t target_kernel; ompt_record_control_tool_t control_tool; } record; } ompt_record_ompt_t; typedef ompt_record_ompt_t *(*ompt_get_record_ompt_t) ( ompt_buffer_t *buffer, ompt_buffer_cursor_t current ); #define ompt_id_none 0 #define ompt_data_none {0} #define ompt_time_none 0 #define ompt_hwid_none 0 #define ompt_addr_none ~0 #define ompt_mutex_impl_none 0 #define ompt_wait_id_none 0 #define ompd_segment_none 0 #if defined(__cplusplus) } // extern "C" #endif #endif /* __OMPT__ */ //===----- opencl-c-base.h - OpenCL C language base definitions -----------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef _OPENCL_BASE_H_ #define _OPENCL_BASE_H_ // Define extension macros #if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200) // For SPIR and SPIR-V all extensions are supported. #if defined(__SPIR__) || defined(__SPIRV__) #define cl_khr_subgroup_extended_types 1 #define cl_khr_subgroup_non_uniform_vote 1 #define cl_khr_subgroup_ballot 1 #define cl_khr_subgroup_non_uniform_arithmetic 1 #define cl_khr_subgroup_shuffle 1 #define cl_khr_subgroup_shuffle_relative 1 #define cl_khr_subgroup_clustered_reduce 1 #define cl_khr_subgroup_rotate 1 #define cl_khr_extended_bit_ops 1 #define cl_khr_integer_dot_product 1 #define __opencl_c_integer_dot_product_input_4x8bit 1 #define __opencl_c_integer_dot_product_input_4x8bit_packed 1 #define cl_ext_float_atomics 1 #ifdef cl_khr_fp16 #define __opencl_c_ext_fp16_global_atomic_load_store 1 #define __opencl_c_ext_fp16_local_atomic_load_store 1 #define __opencl_c_ext_fp16_global_atomic_add 1 #define __opencl_c_ext_fp16_local_atomic_add 1 #define __opencl_c_ext_fp16_global_atomic_min_max 1 #define __opencl_c_ext_fp16_local_atomic_min_max 1 #endif #ifdef cl_khr_fp64 #define __opencl_c_ext_fp64_global_atomic_add 1 #define __opencl_c_ext_fp64_local_atomic_add 1 #define __opencl_c_ext_fp64_global_atomic_min_max 1 #define __opencl_c_ext_fp64_local_atomic_min_max 1 #endif #define __opencl_c_ext_fp32_global_atomic_add 1 #define __opencl_c_ext_fp32_local_atomic_add 1 #define __opencl_c_ext_fp32_global_atomic_min_max 1 #define __opencl_c_ext_fp32_local_atomic_min_max 1 #define __opencl_c_ext_image_raw10_raw12 1 #endif // defined(__SPIR__) || defined(__SPIRV__) #endif // (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200) // Define feature macros for OpenCL C 2.0 #if (__OPENCL_CPP_VERSION__ == 100 || __OPENCL_C_VERSION__ == 200) #define __opencl_c_pipes 1 #define __opencl_c_generic_address_space 1 #define __opencl_c_work_group_collective_functions 1 #define __opencl_c_atomic_order_acq_rel 1 #define __opencl_c_atomic_order_seq_cst 1 #define __opencl_c_atomic_scope_device 1 #define __opencl_c_atomic_scope_all_devices 1 #define __opencl_c_device_enqueue 1 #define __opencl_c_read_write_images 1 #define __opencl_c_program_scope_global_variables 1 #define __opencl_c_images 1 #endif // Define header-only feature macros for OpenCL C 3.0. #if (__OPENCL_CPP_VERSION__ == 202100 || __OPENCL_C_VERSION__ == 300) // For the SPIR and SPIR-V target all features are supported. #if defined(__SPIR__) || defined(__SPIRV__) #define __opencl_c_work_group_collective_functions 1 #define __opencl_c_atomic_order_seq_cst 1 #define __opencl_c_atomic_scope_device 1 #define __opencl_c_atomic_scope_all_devices 1 #define __opencl_c_read_write_images 1 #endif // defined(__SPIR__) // Undefine any feature macros that have been explicitly disabled using // an __undef_ macro. #ifdef __undef___opencl_c_work_group_collective_functions #undef __opencl_c_work_group_collective_functions #endif #ifdef __undef___opencl_c_atomic_order_seq_cst #undef __opencl_c_atomic_order_seq_cst #endif #ifdef __undef___opencl_c_atomic_scope_device #undef __opencl_c_atomic_scope_device #endif #ifdef __undef___opencl_c_atomic_scope_all_devices #undef __opencl_c_atomic_scope_all_devices #endif #ifdef __undef___opencl_c_read_write_images #undef __opencl_c_read_write_images #endif #endif // (__OPENCL_CPP_VERSION__ == 202100 || __OPENCL_C_VERSION__ == 300) #if !defined(__opencl_c_generic_address_space) // Internal feature macro to provide named (global, local, private) address // space overloads for builtin functions that take a pointer argument. #define __opencl_c_named_address_space_builtins 1 #endif // !defined(__opencl_c_generic_address_space) #if defined(cl_intel_subgroups) || defined(cl_khr_subgroups) || defined(__opencl_c_subgroups) // Internal feature macro to provide subgroup builtins. #define __opencl_subgroup_builtins 1 #endif // built-in scalar data types: /** * An unsigned 8-bit integer. */ typedef unsigned char uchar; /** * An unsigned 16-bit integer. */ typedef unsigned short ushort; /** * An unsigned 32-bit integer. */ typedef unsigned int uint; /** * An unsigned 64-bit integer. */ typedef unsigned long ulong; /** * The unsigned integer type of the result of the sizeof operator. This * is a 32-bit unsigned integer if CL_DEVICE_ADDRESS_BITS * defined in table 4.3 is 32-bits and is a 64-bit unsigned integer if * CL_DEVICE_ADDRESS_BITS is 64-bits. */ typedef __SIZE_TYPE__ size_t; /** * A signed integer type that is the result of subtracting two pointers. * This is a 32-bit signed integer if CL_DEVICE_ADDRESS_BITS * defined in table 4.3 is 32-bits and is a 64-bit signed integer if * CL_DEVICE_ADDRESS_BITS is 64-bits. */ typedef __PTRDIFF_TYPE__ ptrdiff_t; /** * A signed integer type with the property that any valid pointer to * void can be converted to this type, then converted back to pointer * to void, and the result will compare equal to the original pointer. */ typedef __INTPTR_TYPE__ intptr_t; /** * An unsigned integer type with the property that any valid pointer to * void can be converted to this type, then converted back to pointer * to void, and the result will compare equal to the original pointer. */ typedef __UINTPTR_TYPE__ uintptr_t; // built-in vector data types: typedef char char2 __attribute__((ext_vector_type(2))); typedef char char3 __attribute__((ext_vector_type(3))); typedef char char4 __attribute__((ext_vector_type(4))); typedef char char8 __attribute__((ext_vector_type(8))); typedef char char16 __attribute__((ext_vector_type(16))); typedef uchar uchar2 __attribute__((ext_vector_type(2))); typedef uchar uchar3 __attribute__((ext_vector_type(3))); typedef uchar uchar4 __attribute__((ext_vector_type(4))); typedef uchar uchar8 __attribute__((ext_vector_type(8))); typedef uchar uchar16 __attribute__((ext_vector_type(16))); typedef short short2 __attribute__((ext_vector_type(2))); typedef short short3 __attribute__((ext_vector_type(3))); typedef short short4 __attribute__((ext_vector_type(4))); typedef short short8 __attribute__((ext_vector_type(8))); typedef short short16 __attribute__((ext_vector_type(16))); typedef ushort ushort2 __attribute__((ext_vector_type(2))); typedef ushort ushort3 __attribute__((ext_vector_type(3))); typedef ushort ushort4 __attribute__((ext_vector_type(4))); typedef ushort ushort8 __attribute__((ext_vector_type(8))); typedef ushort ushort16 __attribute__((ext_vector_type(16))); typedef int int2 __attribute__((ext_vector_type(2))); typedef int int3 __attribute__((ext_vector_type(3))); typedef int int4 __attribute__((ext_vector_type(4))); typedef int int8 __attribute__((ext_vector_type(8))); typedef int int16 __attribute__((ext_vector_type(16))); typedef uint uint2 __attribute__((ext_vector_type(2))); typedef uint uint3 __attribute__((ext_vector_type(3))); typedef uint uint4 __attribute__((ext_vector_type(4))); typedef uint uint8 __attribute__((ext_vector_type(8))); typedef uint uint16 __attribute__((ext_vector_type(16))); typedef long long2 __attribute__((ext_vector_type(2))); typedef long long3 __attribute__((ext_vector_type(3))); typedef long long4 __attribute__((ext_vector_type(4))); typedef long long8 __attribute__((ext_vector_type(8))); typedef long long16 __attribute__((ext_vector_type(16))); typedef ulong ulong2 __attribute__((ext_vector_type(2))); typedef ulong ulong3 __attribute__((ext_vector_type(3))); typedef ulong ulong4 __attribute__((ext_vector_type(4))); typedef ulong ulong8 __attribute__((ext_vector_type(8))); typedef ulong ulong16 __attribute__((ext_vector_type(16))); typedef float float2 __attribute__((ext_vector_type(2))); typedef float float3 __attribute__((ext_vector_type(3))); typedef float float4 __attribute__((ext_vector_type(4))); typedef float float8 __attribute__((ext_vector_type(8))); typedef float float16 __attribute__((ext_vector_type(16))); #ifdef cl_khr_fp16 #pragma OPENCL EXTENSION cl_khr_fp16 : enable typedef half half2 __attribute__((ext_vector_type(2))); typedef half half3 __attribute__((ext_vector_type(3))); typedef half half4 __attribute__((ext_vector_type(4))); typedef half half8 __attribute__((ext_vector_type(8))); typedef half half16 __attribute__((ext_vector_type(16))); #endif #ifdef cl_khr_fp64 #if __OPENCL_C_VERSION__ < CL_VERSION_1_2 #pragma OPENCL EXTENSION cl_khr_fp64 : enable #endif typedef double double2 __attribute__((ext_vector_type(2))); typedef double double3 __attribute__((ext_vector_type(3))); typedef double double4 __attribute__((ext_vector_type(4))); typedef double double8 __attribute__((ext_vector_type(8))); typedef double double16 __attribute__((ext_vector_type(16))); #endif // An internal alias for half, for use by OpenCLBuiltins.td. #define __half half #if defined(__OPENCL_CPP_VERSION__) #define NULL nullptr #elif defined(__OPENCL_C_VERSION__) #define NULL ((void*)0) #endif /** * Value of maximum non-infinite single-precision floating-point * number. */ #define MAXFLOAT 0x1.fffffep127f /** * A positive float constant expression. HUGE_VALF evaluates * to +infinity. Used as an error value returned by the built-in * math functions. */ #define HUGE_VALF (__builtin_huge_valf()) /** * A positive double constant expression. HUGE_VAL evaluates * to +infinity. Used as an error value returned by the built-in * math functions. */ #define HUGE_VAL (__builtin_huge_val()) /** * A constant expression of type float representing positive or * unsigned infinity. */ #define INFINITY (__builtin_inff()) /** * A constant expression of type float representing a quiet NaN. */ #define NAN as_float(INT_MAX) #define FP_ILOGB0 INT_MIN #define FP_ILOGBNAN INT_MAX #define FLT_DIG 6 #define FLT_MANT_DIG 24 #define FLT_MAX_10_EXP +38 #define FLT_MAX_EXP +128 #define FLT_MIN_10_EXP -37 #define FLT_MIN_EXP -125 #define FLT_RADIX 2 #define FLT_MAX 0x1.fffffep127f #define FLT_MIN 0x1.0p-126f #define FLT_EPSILON 0x1.0p-23f #define M_E_F 2.71828182845904523536028747135266250f #define M_LOG2E_F 1.44269504088896340735992468100189214f #define M_LOG10E_F 0.434294481903251827651128918916605082f #define M_LN2_F 0.693147180559945309417232121458176568f #define M_LN10_F 2.30258509299404568401799145468436421f #define M_PI_F 3.14159265358979323846264338327950288f #define M_PI_2_F 1.57079632679489661923132169163975144f #define M_PI_4_F 0.785398163397448309615660845819875721f #define M_1_PI_F 0.318309886183790671537767526745028724f #define M_2_PI_F 0.636619772367581343075535053490057448f #define M_2_SQRTPI_F 1.12837916709551257389615890312154517f #define M_SQRT2_F 1.41421356237309504880168872420969808f #define M_SQRT1_2_F 0.707106781186547524400844362104849039f #define DBL_DIG 15 #define DBL_MANT_DIG 53 #define DBL_MAX_10_EXP +308 #define DBL_MAX_EXP +1024 #define DBL_MIN_10_EXP -307 #define DBL_MIN_EXP -1021 #define DBL_RADIX 2 #define DBL_MAX 0x1.fffffffffffffp1023 #define DBL_MIN 0x1.0p-1022 #define DBL_EPSILON 0x1.0p-52 #define M_E 0x1.5bf0a8b145769p+1 #define M_LOG2E 0x1.71547652b82fep+0 #define M_LOG10E 0x1.bcb7b1526e50ep-2 #define M_LN2 0x1.62e42fefa39efp-1 #define M_LN10 0x1.26bb1bbb55516p+1 #define M_PI 0x1.921fb54442d18p+1 #define M_PI_2 0x1.921fb54442d18p+0 #define M_PI_4 0x1.921fb54442d18p-1 #define M_1_PI 0x1.45f306dc9c883p-2 #define M_2_PI 0x1.45f306dc9c883p-1 #define M_2_SQRTPI 0x1.20dd750429b6dp+0 #define M_SQRT2 0x1.6a09e667f3bcdp+0 #define M_SQRT1_2 0x1.6a09e667f3bcdp-1 #ifdef cl_khr_fp16 #define HALF_DIG 3 #define HALF_MANT_DIG 11 #define HALF_MAX_10_EXP +4 #define HALF_MAX_EXP +16 #define HALF_MIN_10_EXP -4 #define HALF_MIN_EXP -13 #define HALF_RADIX 2 #define HALF_MAX ((0x1.ffcp15h)) #define HALF_MIN ((0x1.0p-14h)) #define HALF_EPSILON ((0x1.0p-10h)) #define M_E_H 2.71828182845904523536028747135266250h #define M_LOG2E_H 1.44269504088896340735992468100189214h #define M_LOG10E_H 0.434294481903251827651128918916605082h #define M_LN2_H 0.693147180559945309417232121458176568h #define M_LN10_H 2.30258509299404568401799145468436421h #define M_PI_H 3.14159265358979323846264338327950288h #define M_PI_2_H 1.57079632679489661923132169163975144h #define M_PI_4_H 0.785398163397448309615660845819875721h #define M_1_PI_H 0.318309886183790671537767526745028724h #define M_2_PI_H 0.636619772367581343075535053490057448h #define M_2_SQRTPI_H 1.12837916709551257389615890312154517h #define M_SQRT2_H 1.41421356237309504880168872420969808h #define M_SQRT1_2_H 0.707106781186547524400844362104849039h #endif //cl_khr_fp16 #define CHAR_BIT 8 #define SCHAR_MAX 127 #define SCHAR_MIN (-128) #define UCHAR_MAX 255 #define CHAR_MAX SCHAR_MAX #define CHAR_MIN SCHAR_MIN #define USHRT_MAX 65535 #define SHRT_MAX 32767 #define SHRT_MIN (-32768) #define UINT_MAX 0xffffffff #define INT_MAX 2147483647 #define INT_MIN (-2147483647-1) #define ULONG_MAX 0xffffffffffffffffUL #define LONG_MAX 0x7fffffffffffffffL #define LONG_MIN (-0x7fffffffffffffffL-1) // OpenCL v1.1 s6.11.8, v1.2 s6.12.8, v2.0 s6.13.8 - Synchronization Functions // Flag type and values for barrier, mem_fence, read_mem_fence, write_mem_fence typedef uint cl_mem_fence_flags; /** * Queue a memory fence to ensure correct * ordering of memory operations to local memory */ #define CLK_LOCAL_MEM_FENCE 0x01 /** * Queue a memory fence to ensure correct * ordering of memory operations to global memory */ #define CLK_GLOBAL_MEM_FENCE 0x02 #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) typedef enum memory_scope { memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM, memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP, memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE, #if defined(__opencl_c_atomic_scope_all_devices) memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES, #if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) memory_scope_all_devices = memory_scope_all_svm_devices, #endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) #endif // defined(__opencl_c_atomic_scope_all_devices) /** * Subgroups have different requirements on forward progress, so just test * all the relevant macros. * CL 3.0 sub-groups "they are not guaranteed to make independent forward progress" * KHR subgroups "Subgroups within a workgroup are independent, make forward progress with respect to each other" */ #if defined(cl_intel_subgroups) || defined(cl_khr_subgroups) || defined(__opencl_c_subgroups) memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP #endif } memory_scope; /** * Queue a memory fence to ensure correct ordering of memory * operations between work-items of a work-group to * image memory. */ #define CLK_IMAGE_MEM_FENCE 0x04 #ifndef ATOMIC_VAR_INIT #define ATOMIC_VAR_INIT(x) (x) #endif //ATOMIC_VAR_INIT #define ATOMIC_FLAG_INIT 0 // enum values aligned with what clang uses in EmitAtomicExpr() typedef enum memory_order { memory_order_relaxed = __ATOMIC_RELAXED, memory_order_acquire = __ATOMIC_ACQUIRE, memory_order_release = __ATOMIC_RELEASE, memory_order_acq_rel = __ATOMIC_ACQ_REL, #if defined(__opencl_c_atomic_order_seq_cst) memory_order_seq_cst = __ATOMIC_SEQ_CST #endif } memory_order; #endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) // OpenCL v1.1 s6.11.3, v1.2 s6.12.14, v2.0 s6.13.14 - Image Read and Write Functions // These values need to match the runtime equivalent // // Addressing Mode. // #define CLK_ADDRESS_NONE 0 #define CLK_ADDRESS_CLAMP_TO_EDGE 2 #define CLK_ADDRESS_CLAMP 4 #define CLK_ADDRESS_REPEAT 6 #define CLK_ADDRESS_MIRRORED_REPEAT 8 // // Coordination Normalization // #define CLK_NORMALIZED_COORDS_FALSE 0 #define CLK_NORMALIZED_COORDS_TRUE 1 // // Filtering Mode. // #define CLK_FILTER_NEAREST 0x10 #define CLK_FILTER_LINEAR 0x20 #ifdef cl_khr_gl_msaa_sharing #pragma OPENCL EXTENSION cl_khr_gl_msaa_sharing : enable #endif //cl_khr_gl_msaa_sharing // // Channel Datatype. // #define CLK_SNORM_INT8 0x10D0 #define CLK_SNORM_INT16 0x10D1 #define CLK_UNORM_INT8 0x10D2 #define CLK_UNORM_INT16 0x10D3 #define CLK_UNORM_SHORT_565 0x10D4 #define CLK_UNORM_SHORT_555 0x10D5 #define CLK_UNORM_INT_101010 0x10D6 #define CLK_SIGNED_INT8 0x10D7 #define CLK_SIGNED_INT16 0x10D8 #define CLK_SIGNED_INT32 0x10D9 #define CLK_UNSIGNED_INT8 0x10DA #define CLK_UNSIGNED_INT16 0x10DB #define CLK_UNSIGNED_INT32 0x10DC #define CLK_HALF_FLOAT 0x10DD #define CLK_FLOAT 0x10DE #define CLK_UNORM_INT24 0x10DF #if __OPENCL_C_VERSION__ >= CL_VERSION_3_0 #define CLK_UNORM_INT_101010_2 0x10E0 #endif // __OPENCL_C_VERSION__ >= CL_VERSION_3_0 #ifdef __opencl_c_ext_image_raw10_raw12 #define CLK_UNSIGNED_INT_RAW10_EXT 0x10E3 #define CLK_UNSIGNED_INT_RAW12_EXT 0x10E4 #endif // __opencl_c_ext_image_raw10_raw12 // Channel order, numbering must be aligned with cl_channel_order in cl.h // #define CLK_R 0x10B0 #define CLK_A 0x10B1 #define CLK_RG 0x10B2 #define CLK_RA 0x10B3 #define CLK_RGB 0x10B4 #define CLK_RGBA 0x10B5 #define CLK_BGRA 0x10B6 #define CLK_ARGB 0x10B7 #define CLK_INTENSITY 0x10B8 #define CLK_LUMINANCE 0x10B9 #define CLK_Rx 0x10BA #define CLK_RGx 0x10BB #define CLK_RGBx 0x10BC #define CLK_DEPTH 0x10BD #define CLK_DEPTH_STENCIL 0x10BE #if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 #define CLK_sRGB 0x10BF #define CLK_sRGBx 0x10C0 #define CLK_sRGBA 0x10C1 #define CLK_sBGRA 0x10C2 #define CLK_ABGR 0x10C3 #endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 // OpenCL v2.0 s6.13.16 - Pipe Functions #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) #define CLK_NULL_RESERVE_ID (__builtin_astype(((void*)(__SIZE_MAX__)), reserve_id_t)) // OpenCL v2.0 s6.13.17 - Enqueue Kernels #define CL_COMPLETE 0x0 #define CL_RUNNING 0x1 #define CL_SUBMITTED 0x2 #define CL_QUEUED 0x3 #define CLK_SUCCESS 0 #define CLK_ENQUEUE_FAILURE -101 #define CLK_INVALID_QUEUE -102 #define CLK_INVALID_NDRANGE -160 #define CLK_INVALID_EVENT_WAIT_LIST -57 #define CLK_DEVICE_QUEUE_FULL -161 #define CLK_INVALID_ARG_SIZE -51 #define CLK_EVENT_ALLOCATION_FAILURE -100 #define CLK_OUT_OF_RESOURCES -5 #define CLK_NULL_QUEUE 0 #define CLK_NULL_EVENT (__builtin_astype(((__SIZE_MAX__)), clk_event_t)) // execution model related definitions #define CLK_ENQUEUE_FLAGS_NO_WAIT 0x0 #define CLK_ENQUEUE_FLAGS_WAIT_KERNEL 0x1 #define CLK_ENQUEUE_FLAGS_WAIT_WORK_GROUP 0x2 typedef int kernel_enqueue_flags_t; typedef int clk_profiling_info; // Profiling info name (see capture_event_profiling_info) #define CLK_PROFILING_COMMAND_EXEC_TIME 0x1 #define MAX_WORK_DIM 3 #ifdef __opencl_c_device_enqueue typedef struct { unsigned int workDimension; size_t globalWorkOffset[MAX_WORK_DIM]; size_t globalWorkSize[MAX_WORK_DIM]; size_t localWorkSize[MAX_WORK_DIM]; } ndrange_t; #endif // __opencl_c_device_enqueue #endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) /** * OpenCL v1.1/1.2/2.0 s6.2.4.2 - as_type operators * Reinterprets a data type as another data type of the same size */ #define as_char(x) __builtin_astype((x), char) #define as_char2(x) __builtin_astype((x), char2) #define as_char3(x) __builtin_astype((x), char3) #define as_char4(x) __builtin_astype((x), char4) #define as_char8(x) __builtin_astype((x), char8) #define as_char16(x) __builtin_astype((x), char16) #define as_uchar(x) __builtin_astype((x), uchar) #define as_uchar2(x) __builtin_astype((x), uchar2) #define as_uchar3(x) __builtin_astype((x), uchar3) #define as_uchar4(x) __builtin_astype((x), uchar4) #define as_uchar8(x) __builtin_astype((x), uchar8) #define as_uchar16(x) __builtin_astype((x), uchar16) #define as_short(x) __builtin_astype((x), short) #define as_short2(x) __builtin_astype((x), short2) #define as_short3(x) __builtin_astype((x), short3) #define as_short4(x) __builtin_astype((x), short4) #define as_short8(x) __builtin_astype((x), short8) #define as_short16(x) __builtin_astype((x), short16) #define as_ushort(x) __builtin_astype((x), ushort) #define as_ushort2(x) __builtin_astype((x), ushort2) #define as_ushort3(x) __builtin_astype((x), ushort3) #define as_ushort4(x) __builtin_astype((x), ushort4) #define as_ushort8(x) __builtin_astype((x), ushort8) #define as_ushort16(x) __builtin_astype((x), ushort16) #define as_int(x) __builtin_astype((x), int) #define as_int2(x) __builtin_astype((x), int2) #define as_int3(x) __builtin_astype((x), int3) #define as_int4(x) __builtin_astype((x), int4) #define as_int8(x) __builtin_astype((x), int8) #define as_int16(x) __builtin_astype((x), int16) #define as_uint(x) __builtin_astype((x), uint) #define as_uint2(x) __builtin_astype((x), uint2) #define as_uint3(x) __builtin_astype((x), uint3) #define as_uint4(x) __builtin_astype((x), uint4) #define as_uint8(x) __builtin_astype((x), uint8) #define as_uint16(x) __builtin_astype((x), uint16) #define as_long(x) __builtin_astype((x), long) #define as_long2(x) __builtin_astype((x), long2) #define as_long3(x) __builtin_astype((x), long3) #define as_long4(x) __builtin_astype((x), long4) #define as_long8(x) __builtin_astype((x), long8) #define as_long16(x) __builtin_astype((x), long16) #define as_ulong(x) __builtin_astype((x), ulong) #define as_ulong2(x) __builtin_astype((x), ulong2) #define as_ulong3(x) __builtin_astype((x), ulong3) #define as_ulong4(x) __builtin_astype((x), ulong4) #define as_ulong8(x) __builtin_astype((x), ulong8) #define as_ulong16(x) __builtin_astype((x), ulong16) #define as_float(x) __builtin_astype((x), float) #define as_float2(x) __builtin_astype((x), float2) #define as_float3(x) __builtin_astype((x), float3) #define as_float4(x) __builtin_astype((x), float4) #define as_float8(x) __builtin_astype((x), float8) #define as_float16(x) __builtin_astype((x), float16) #ifdef cl_khr_fp64 #define as_double(x) __builtin_astype((x), double) #define as_double2(x) __builtin_astype((x), double2) #define as_double3(x) __builtin_astype((x), double3) #define as_double4(x) __builtin_astype((x), double4) #define as_double8(x) __builtin_astype((x), double8) #define as_double16(x) __builtin_astype((x), double16) #endif // cl_khr_fp64 #ifdef cl_khr_fp16 #define as_half(x) __builtin_astype((x), half) #define as_half2(x) __builtin_astype((x), half2) #define as_half3(x) __builtin_astype((x), half3) #define as_half4(x) __builtin_astype((x), half4) #define as_half8(x) __builtin_astype((x), half8) #define as_half16(x) __builtin_astype((x), half16) #endif // cl_khr_fp16 #define as_size_t(x) __builtin_astype((x), size_t) #define as_ptrdiff_t(x) __builtin_astype((x), ptrdiff_t) #define as_intptr_t(x) __builtin_astype((x), intptr_t) #define as_uintptr_t(x) __builtin_astype((x), uintptr_t) // C++ for OpenCL - __remove_address_space #if defined(__OPENCL_CPP_VERSION__) template struct __remove_address_space { using type = _Tp; }; #if defined(__opencl_c_generic_address_space) template struct __remove_address_space<__generic _Tp> { using type = _Tp; }; #endif template struct __remove_address_space<__global _Tp> { using type = _Tp; }; template struct __remove_address_space<__private _Tp> { using type = _Tp; }; template struct __remove_address_space<__local _Tp> { using type = _Tp; }; template struct __remove_address_space<__constant _Tp> { using type = _Tp; }; #endif // OpenCL v1.1 s6.9, v1.2/2.0 s6.10 - Function qualifiers #define __kernel_exec(X, typen) __kernel \ __attribute__((work_group_size_hint(X, 1, 1))) \ __attribute__((vec_type_hint(typen))) #define kernel_exec(X, typen) __kernel \ __attribute__((work_group_size_hint(X, 1, 1))) \ __attribute__((vec_type_hint(typen))) #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_1_2) // OpenCL v1.2 s6.12.13, v2.0 s6.13.13 - printf int printf(__constant const char* st, ...) __attribute__((format(printf, 1, 2))); #endif #ifdef cl_intel_device_side_avc_motion_estimation #define CLK_AVC_ME_MAJOR_16x16_INTEL 0x0 #define CLK_AVC_ME_MAJOR_16x8_INTEL 0x1 #define CLK_AVC_ME_MAJOR_8x16_INTEL 0x2 #define CLK_AVC_ME_MAJOR_8x8_INTEL 0x3 #define CLK_AVC_ME_MINOR_8x8_INTEL 0x0 #define CLK_AVC_ME_MINOR_8x4_INTEL 0x1 #define CLK_AVC_ME_MINOR_4x8_INTEL 0x2 #define CLK_AVC_ME_MINOR_4x4_INTEL 0x3 #define CLK_AVC_ME_MAJOR_FORWARD_INTEL 0x0 #define CLK_AVC_ME_MAJOR_BACKWARD_INTEL 0x1 #define CLK_AVC_ME_MAJOR_BIDIRECTIONAL_INTEL 0x2 #define CLK_AVC_ME_PARTITION_MASK_ALL_INTEL 0x0 #define CLK_AVC_ME_PARTITION_MASK_16x16_INTEL 0x7E #define CLK_AVC_ME_PARTITION_MASK_16x8_INTEL 0x7D #define CLK_AVC_ME_PARTITION_MASK_8x16_INTEL 0x7B #define CLK_AVC_ME_PARTITION_MASK_8x8_INTEL 0x77 #define CLK_AVC_ME_PARTITION_MASK_8x4_INTEL 0x6F #define CLK_AVC_ME_PARTITION_MASK_4x8_INTEL 0x5F #define CLK_AVC_ME_PARTITION_MASK_4x4_INTEL 0x3F #define CLK_AVC_ME_SLICE_TYPE_PRED_INTEL 0x0 #define CLK_AVC_ME_SLICE_TYPE_BPRED_INTEL 0x1 #define CLK_AVC_ME_SLICE_TYPE_INTRA_INTEL 0x2 #define CLK_AVC_ME_SEARCH_WINDOW_EXHAUSTIVE_INTEL 0x0 #define CLK_AVC_ME_SEARCH_WINDOW_SMALL_INTEL 0x1 #define CLK_AVC_ME_SEARCH_WINDOW_TINY_INTEL 0x2 #define CLK_AVC_ME_SEARCH_WINDOW_EXTRA_TINY_INTEL 0x3 #define CLK_AVC_ME_SEARCH_WINDOW_DIAMOND_INTEL 0x4 #define CLK_AVC_ME_SEARCH_WINDOW_LARGE_DIAMOND_INTEL 0x5 #define CLK_AVC_ME_SEARCH_WINDOW_RESERVED0_INTEL 0x6 #define CLK_AVC_ME_SEARCH_WINDOW_RESERVED1_INTEL 0x7 #define CLK_AVC_ME_SEARCH_WINDOW_CUSTOM_INTEL 0x8 #define CLK_AVC_ME_SAD_ADJUST_MODE_NONE_INTEL 0x0 #define CLK_AVC_ME_SAD_ADJUST_MODE_HAAR_INTEL 0x2 #define CLK_AVC_ME_SUBPIXEL_MODE_INTEGER_INTEL 0x0 #define CLK_AVC_ME_SUBPIXEL_MODE_HPEL_INTEL 0x1 #define CLK_AVC_ME_SUBPIXEL_MODE_QPEL_INTEL 0x3 #define CLK_AVC_ME_COST_PRECISION_QPEL_INTEL 0x0 #define CLK_AVC_ME_COST_PRECISION_HPEL_INTEL 0x1 #define CLK_AVC_ME_COST_PRECISION_PEL_INTEL 0x2 #define CLK_AVC_ME_COST_PRECISION_DPEL_INTEL 0x3 #define CLK_AVC_ME_BIDIR_WEIGHT_QUARTER_INTEL 0x10 #define CLK_AVC_ME_BIDIR_WEIGHT_THIRD_INTEL 0x15 #define CLK_AVC_ME_BIDIR_WEIGHT_HALF_INTEL 0x20 #define CLK_AVC_ME_BIDIR_WEIGHT_TWO_THIRD_INTEL 0x2B #define CLK_AVC_ME_BIDIR_WEIGHT_THREE_QUARTER_INTEL 0x30 #define CLK_AVC_ME_BORDER_REACHED_LEFT_INTEL 0x0 #define CLK_AVC_ME_BORDER_REACHED_RIGHT_INTEL 0x2 #define CLK_AVC_ME_BORDER_REACHED_TOP_INTEL 0x4 #define CLK_AVC_ME_BORDER_REACHED_BOTTOM_INTEL 0x8 #define CLK_AVC_ME_INTRA_16x16_INTEL 0x0 #define CLK_AVC_ME_INTRA_8x8_INTEL 0x1 #define CLK_AVC_ME_INTRA_4x4_INTEL 0x2 #define CLK_AVC_ME_SKIP_BLOCK_PARTITION_16x16_INTEL 0x0 #define CLK_AVC_ME_SKIP_BLOCK_PARTITION_8x8_INTEL 0x4000 #define CLK_AVC_ME_SKIP_BLOCK_16x16_FORWARD_ENABLE_INTEL (0x1 << 24) #define CLK_AVC_ME_SKIP_BLOCK_16x16_BACKWARD_ENABLE_INTEL (0x2 << 24) #define CLK_AVC_ME_SKIP_BLOCK_16x16_DUAL_ENABLE_INTEL (0x3 << 24) #define CLK_AVC_ME_SKIP_BLOCK_8x8_FORWARD_ENABLE_INTEL (0x55 << 24) #define CLK_AVC_ME_SKIP_BLOCK_8x8_BACKWARD_ENABLE_INTEL (0xAA << 24) #define CLK_AVC_ME_SKIP_BLOCK_8x8_DUAL_ENABLE_INTEL (0xFF << 24) #define CLK_AVC_ME_SKIP_BLOCK_8x8_0_FORWARD_ENABLE_INTEL (0x1 << 24) #define CLK_AVC_ME_SKIP_BLOCK_8x8_0_BACKWARD_ENABLE_INTEL (0x2 << 24) #define CLK_AVC_ME_SKIP_BLOCK_8x8_1_FORWARD_ENABLE_INTEL (0x1 << 26) #define CLK_AVC_ME_SKIP_BLOCK_8x8_1_BACKWARD_ENABLE_INTEL (0x2 << 26) #define CLK_AVC_ME_SKIP_BLOCK_8x8_2_FORWARD_ENABLE_INTEL (0x1 << 28) #define CLK_AVC_ME_SKIP_BLOCK_8x8_2_BACKWARD_ENABLE_INTEL (0x2 << 28) #define CLK_AVC_ME_SKIP_BLOCK_8x8_3_FORWARD_ENABLE_INTEL (0x1 << 30) #define CLK_AVC_ME_SKIP_BLOCK_8x8_3_BACKWARD_ENABLE_INTEL (0x2 << 30) #define CLK_AVC_ME_BLOCK_BASED_SKIP_4x4_INTEL 0x00 #define CLK_AVC_ME_BLOCK_BASED_SKIP_8x8_INTEL 0x80 #define CLK_AVC_ME_INTRA_LUMA_PARTITION_MASK_ALL_INTEL 0x0 #define CLK_AVC_ME_INTRA_LUMA_PARTITION_MASK_16x16_INTEL 0x6 #define CLK_AVC_ME_INTRA_LUMA_PARTITION_MASK_8x8_INTEL 0x5 #define CLK_AVC_ME_INTRA_LUMA_PARTITION_MASK_4x4_INTEL 0x3 #define CLK_AVC_ME_INTRA_NEIGHBOR_LEFT_MASK_ENABLE_INTEL 0x60 #define CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_MASK_ENABLE_INTEL 0x10 #define CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_RIGHT_MASK_ENABLE_INTEL 0x8 #define CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_LEFT_MASK_ENABLE_INTEL 0x4 #define CLK_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_INTEL 0x0 #define CLK_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1 #define CLK_AVC_ME_LUMA_PREDICTOR_MODE_DC_INTEL 0x2 #define CLK_AVC_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_LEFT_INTEL 0x3 #define CLK_AVC_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_RIGHT_INTEL 0x4 #define CLK_AVC_ME_LUMA_PREDICTOR_MODE_PLANE_INTEL 0x4 #define CLK_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_RIGHT_INTEL 0x5 #define CLK_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_DOWN_INTEL 0x6 #define CLK_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_LEFT_INTEL 0x7 #define CLK_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_UP_INTEL 0x8 #define CLK_AVC_ME_CHROMA_PREDICTOR_MODE_DC_INTEL 0x0 #define CLK_AVC_ME_CHROMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1 #define CLK_AVC_ME_CHROMA_PREDICTOR_MODE_VERTICAL_INTEL 0x2 #define CLK_AVC_ME_CHROMA_PREDICTOR_MODE_PLANE_INTEL 0x3 #define CLK_AVC_ME_FRAME_FORWARD_INTEL 0x1 #define CLK_AVC_ME_FRAME_BACKWARD_INTEL 0x2 #define CLK_AVC_ME_FRAME_DUAL_INTEL 0x3 #define CLK_AVC_ME_INTERLACED_SCAN_TOP_FIELD_INTEL 0x0 #define CLK_AVC_ME_INTERLACED_SCAN_BOTTOM_FIELD_INTEL 0x1 #define CLK_AVC_ME_INITIALIZE_INTEL 0x0 #define CLK_AVC_IME_PAYLOAD_INITIALIZE_INTEL 0x0 #define CLK_AVC_REF_PAYLOAD_INITIALIZE_INTEL 0x0 #define CLK_AVC_SIC_PAYLOAD_INITIALIZE_INTEL 0x0 #define CLK_AVC_IME_RESULT_INITIALIZE_INTEL 0x0 #define CLK_AVC_REF_RESULT_INITIALIZE_INTEL 0x0 #define CLK_AVC_SIC_RESULT_INITIALIZE_INTEL 0x0 #define CLK_AVC_IME_RESULT_SINGLE_REFERENCE_STREAMOUT_INITIALIZE_INTEL 0x0 #define CLK_AVC_IME_RESULT_SINGLE_REFERENCE_STREAMIN_INITIALIZE_INTEL 0x0 #define CLK_AVC_IME_RESULT_DUAL_REFERENCE_STREAMOUT_INITIALIZE_INTEL 0x0 #define CLK_AVC_IME_RESULT_DUAL_REFERENCE_STREAMIN_INITIALIZE_INTEL 0x0 #endif // cl_intel_device_side_avc_motion_estimation // Disable any extensions we may have enabled previously. #pragma OPENCL EXTENSION all : disable #endif //_OPENCL_BASE_H_ vpclmulqdqintrin.hwaitpkgintrin.hx86intrin.hsanitizer/asan_interface.hsanitizer/common_interface_defs.hfclose(handle): Unknown encoding ': Error compiling 'list count pattern too large - reverse compile failed\dCompiler::Copy called!altmatch -> %d | %d%s%dexternal/regex-re2/re2/nfa.cc in stepBad arg in kInstCapture: trailing \BuhidGunjala_GondiHanifi_RohingyaLydianPeVaiZpout of memoryVDSO symbol unexpectedly longLowLevelAlloc::DeleteArena: munmap failed: %darena->allocation_count > 0voidunsigned longnew[]~mioRindex out of rangej <= static_cast(ABSL_ARRAYSIZE(this->reverse_zeroes_))Value of \----------------------------------- Shared(, begin = Substring, len = 0Xbad skip in DequeueAllWakeableFailed to read real-time clock.OPENSSLDIR: n/aOBJ_LIBX509_LIBPKCS8_LIBexternal/boringssl/src/crypto/evp/p_rsa.ckythe.proto.CompilationUnit.output_keykythe.proto.common.Origin.revisionkythe.proto.CxxCompilationUnitDetails.StatPath.pathkythe.proto.VName.signaturef95-cpp-inputmath_builtin__clang_cuda_math_forward_declares.havx512cdintrin.h/*===---- avx512erintrin.h - AVX512ER intrinsics ---------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __AVX512ERINTRIN_H #define __AVX512ERINTRIN_H /* exp2a23 */ #define _mm512_exp2a23_round_pd(A, R) \ ((__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \ (__v8df)_mm512_setzero_pd(), \ (__mmask8)-1, (int)(R))) #define _mm512_mask_exp2a23_round_pd(S, M, A, R) \ ((__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(S), (__mmask8)(M), \ (int)(R))) #define _mm512_maskz_exp2a23_round_pd(M, A, R) \ ((__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \ (__v8df)_mm512_setzero_pd(), \ (__mmask8)(M), (int)(R))) #define _mm512_exp2a23_pd(A) \ _mm512_exp2a23_round_pd((A), _MM_FROUND_CUR_DIRECTION) #define _mm512_mask_exp2a23_pd(S, M, A) \ _mm512_mask_exp2a23_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION) #define _mm512_maskz_exp2a23_pd(M, A) \ _mm512_maskz_exp2a23_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION) #define _mm512_exp2a23_round_ps(A, R) \ ((__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \ (__v16sf)_mm512_setzero_ps(), \ (__mmask16)-1, (int)(R))) #define _mm512_mask_exp2a23_round_ps(S, M, A, R) \ ((__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(S), (__mmask16)(M), \ (int)(R))) #define _mm512_maskz_exp2a23_round_ps(M, A, R) \ ((__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \ (__v16sf)_mm512_setzero_ps(), \ (__mmask16)(M), (int)(R))) #define _mm512_exp2a23_ps(A) \ _mm512_exp2a23_round_ps((A), _MM_FROUND_CUR_DIRECTION) #define _mm512_mask_exp2a23_ps(S, M, A) \ _mm512_mask_exp2a23_round_ps((S), (M), (A), _MM_FROUND_CUR_DIRECTION) #define _mm512_maskz_exp2a23_ps(M, A) \ _mm512_maskz_exp2a23_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION) /* rsqrt28 */ #define _mm512_rsqrt28_round_pd(A, R) \ ((__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \ (__v8df)_mm512_setzero_pd(), \ (__mmask8)-1, (int)(R))) #define _mm512_mask_rsqrt28_round_pd(S, M, A, R) \ ((__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(S), (__mmask8)(M), \ (int)(R))) #define _mm512_maskz_rsqrt28_round_pd(M, A, R) \ ((__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \ (__v8df)_mm512_setzero_pd(), \ (__mmask8)(M), (int)(R))) #define _mm512_rsqrt28_pd(A) \ _mm512_rsqrt28_round_pd((A), _MM_FROUND_CUR_DIRECTION) #define _mm512_mask_rsqrt28_pd(S, M, A) \ _mm512_mask_rsqrt28_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION) #define _mm512_maskz_rsqrt28_pd(M, A) \ _mm512_maskz_rsqrt28_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION) #define _mm512_rsqrt28_round_ps(A, R) \ ((__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \ (__v16sf)_mm512_setzero_ps(), \ (__mmask16)-1, (int)(R))) #define _mm512_mask_rsqrt28_round_ps(S, M, A, R) \ ((__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(S), (__mmask16)(M), \ (int)(R))) #define _mm512_maskz_rsqrt28_round_ps(M, A, R) \ ((__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \ (__v16sf)_mm512_setzero_ps(), \ (__mmask16)(M), (int)(R))) #define _mm512_rsqrt28_ps(A) \ _mm512_rsqrt28_round_ps((A), _MM_FROUND_CUR_DIRECTION) #define _mm512_mask_rsqrt28_ps(S, M, A) \ _mm512_mask_rsqrt28_round_ps((S), (M), A, _MM_FROUND_CUR_DIRECTION) #define _mm512_maskz_rsqrt28_ps(M, A) \ _mm512_maskz_rsqrt28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION) #define _mm_rsqrt28_round_ss(A, B, R) \ ((__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)-1, (int)(R))) #define _mm_mask_rsqrt28_round_ss(S, M, A, B, R) \ ((__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)(__m128)(S), \ (__mmask8)(M), (int)(R))) #define _mm_maskz_rsqrt28_round_ss(M, A, B, R) \ ((__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)(M), (int)(R))) #define _mm_rsqrt28_ss(A, B) \ _mm_rsqrt28_round_ss((A), (B), _MM_FROUND_CUR_DIRECTION) #define _mm_mask_rsqrt28_ss(S, M, A, B) \ _mm_mask_rsqrt28_round_ss((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION) #define _mm_maskz_rsqrt28_ss(M, A, B) \ _mm_maskz_rsqrt28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION) #define _mm_rsqrt28_round_sd(A, B, R) \ ((__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)-1, (int)(R))) #define _mm_mask_rsqrt28_round_sd(S, M, A, B, R) \ ((__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)(__m128d)(S), \ (__mmask8)(M), (int)(R))) #define _mm_maskz_rsqrt28_round_sd(M, A, B, R) \ ((__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)(M), (int)(R))) #define _mm_rsqrt28_sd(A, B) \ _mm_rsqrt28_round_sd((A), (B), _MM_FROUND_CUR_DIRECTION) #define _mm_mask_rsqrt28_sd(S, M, A, B) \ _mm_mask_rsqrt28_round_sd((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION) #define _mm_maskz_rsqrt28_sd(M, A, B) \ _mm_maskz_rsqrt28_round_sd((M), (A), (B), _MM_FROUND_CUR_DIRECTION) /* rcp28 */ #define _mm512_rcp28_round_pd(A, R) \ ((__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \ (__v8df)_mm512_setzero_pd(), \ (__mmask8)-1, (int)(R))) #define _mm512_mask_rcp28_round_pd(S, M, A, R) \ ((__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(S), (__mmask8)(M), \ (int)(R))) #define _mm512_maskz_rcp28_round_pd(M, A, R) \ ((__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \ (__v8df)_mm512_setzero_pd(), \ (__mmask8)(M), (int)(R))) #define _mm512_rcp28_pd(A) \ _mm512_rcp28_round_pd((A), _MM_FROUND_CUR_DIRECTION) #define _mm512_mask_rcp28_pd(S, M, A) \ _mm512_mask_rcp28_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION) #define _mm512_maskz_rcp28_pd(M, A) \ _mm512_maskz_rcp28_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION) #define _mm512_rcp28_round_ps(A, R) \ ((__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \ (__v16sf)_mm512_setzero_ps(), \ (__mmask16)-1, (int)(R))) #define _mm512_mask_rcp28_round_ps(S, M, A, R) \ ((__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(S), (__mmask16)(M), \ (int)(R))) #define _mm512_maskz_rcp28_round_ps(M, A, R) \ ((__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \ (__v16sf)_mm512_setzero_ps(), \ (__mmask16)(M), (int)(R))) #define _mm512_rcp28_ps(A) \ _mm512_rcp28_round_ps((A), _MM_FROUND_CUR_DIRECTION) #define _mm512_mask_rcp28_ps(S, M, A) \ _mm512_mask_rcp28_round_ps((S), (M), (A), _MM_FROUND_CUR_DIRECTION) #define _mm512_maskz_rcp28_ps(M, A) \ _mm512_maskz_rcp28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION) #define _mm_rcp28_round_ss(A, B, R) \ ((__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)-1, (int)(R))) #define _mm_mask_rcp28_round_ss(S, M, A, B, R) \ ((__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)(__m128)(S), \ (__mmask8)(M), (int)(R))) #define _mm_maskz_rcp28_round_ss(M, A, B, R) \ ((__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)(M), (int)(R))) #define _mm_rcp28_ss(A, B) \ _mm_rcp28_round_ss((A), (B), _MM_FROUND_CUR_DIRECTION) #define _mm_mask_rcp28_ss(S, M, A, B) \ _mm_mask_rcp28_round_ss((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION) #define _mm_maskz_rcp28_ss(M, A, B) \ _mm_maskz_rcp28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION) #define _mm_rcp28_round_sd(A, B, R) \ ((__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)-1, (int)(R))) #define _mm_mask_rcp28_round_sd(S, M, A, B, R) \ ((__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)(__m128d)(S), \ (__mmask8)(M), (int)(R))) #define _mm_maskz_rcp28_round_sd(M, A, B, R) \ ((__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)(M), (int)(R))) #define _mm_rcp28_sd(A, B) \ _mm_rcp28_round_sd((A), (B), _MM_FROUND_CUR_DIRECTION) #define _mm_mask_rcp28_sd(S, M, A, B) \ _mm_mask_rcp28_round_sd((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION) #define _mm_maskz_rcp28_sd(M, A, B) \ _mm_maskz_rcp28_round_sd((M), (A), (B), _MM_FROUND_CUR_DIRECTION) #endif /* __AVX512ERINTRIN_H */ /*===------------- avx512vbmi2intrin.h - VBMI2 intrinsics ------------------=== * * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __AVX512VBMI2INTRIN_H #define __AVX512VBMI2INTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi2,evex512"), __min_vector_width__(512))) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_compress_epi16(__m512i __S, __mmask32 __U, __m512i __D) { return (__m512i) __builtin_ia32_compresshi512_mask ((__v32hi) __D, (__v32hi) __S, __U); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_compress_epi16(__mmask32 __U, __m512i __D) { return (__m512i) __builtin_ia32_compresshi512_mask ((__v32hi) __D, (__v32hi) _mm512_setzero_si512(), __U); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_compress_epi8(__m512i __S, __mmask64 __U, __m512i __D) { return (__m512i) __builtin_ia32_compressqi512_mask ((__v64qi) __D, (__v64qi) __S, __U); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_compress_epi8(__mmask64 __U, __m512i __D) { return (__m512i) __builtin_ia32_compressqi512_mask ((__v64qi) __D, (__v64qi) _mm512_setzero_si512(), __U); } static __inline__ void __DEFAULT_FN_ATTRS _mm512_mask_compressstoreu_epi16(void *__P, __mmask32 __U, __m512i __D) { __builtin_ia32_compressstorehi512_mask ((__v32hi *) __P, (__v32hi) __D, __U); } static __inline__ void __DEFAULT_FN_ATTRS _mm512_mask_compressstoreu_epi8(void *__P, __mmask64 __U, __m512i __D) { __builtin_ia32_compressstoreqi512_mask ((__v64qi *) __P, (__v64qi) __D, __U); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_expand_epi16(__m512i __S, __mmask32 __U, __m512i __D) { return (__m512i) __builtin_ia32_expandhi512_mask ((__v32hi) __D, (__v32hi) __S, __U); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_expand_epi16(__mmask32 __U, __m512i __D) { return (__m512i) __builtin_ia32_expandhi512_mask ((__v32hi) __D, (__v32hi) _mm512_setzero_si512(), __U); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_expand_epi8(__m512i __S, __mmask64 __U, __m512i __D) { return (__m512i) __builtin_ia32_expandqi512_mask ((__v64qi) __D, (__v64qi) __S, __U); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_expand_epi8(__mmask64 __U, __m512i __D) { return (__m512i) __builtin_ia32_expandqi512_mask ((__v64qi) __D, (__v64qi) _mm512_setzero_si512(), __U); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_expandloadu_epi16(__m512i __S, __mmask32 __U, void const *__P) { return (__m512i) __builtin_ia32_expandloadhi512_mask ((const __v32hi *)__P, (__v32hi) __S, __U); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_expandloadu_epi16(__mmask32 __U, void const *__P) { return (__m512i) __builtin_ia32_expandloadhi512_mask ((const __v32hi *)__P, (__v32hi) _mm512_setzero_si512(), __U); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_expandloadu_epi8(__m512i __S, __mmask64 __U, void const *__P) { return (__m512i) __builtin_ia32_expandloadqi512_mask ((const __v64qi *)__P, (__v64qi) __S, __U); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_expandloadu_epi8(__mmask64 __U, void const *__P) { return (__m512i) __builtin_ia32_expandloadqi512_mask ((const __v64qi *)__P, (__v64qi) _mm512_setzero_si512(), __U); } #define _mm512_shldi_epi64(A, B, I) \ ((__m512i)__builtin_ia32_vpshldq512((__v8di)(__m512i)(A), \ (__v8di)(__m512i)(B), (int)(I))) #define _mm512_mask_shldi_epi64(S, U, A, B, I) \ ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ (__v8di)_mm512_shldi_epi64((A), (B), (I)), \ (__v8di)(__m512i)(S))) #define _mm512_maskz_shldi_epi64(U, A, B, I) \ ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ (__v8di)_mm512_shldi_epi64((A), (B), (I)), \ (__v8di)_mm512_setzero_si512())) #define _mm512_shldi_epi32(A, B, I) \ ((__m512i)__builtin_ia32_vpshldd512((__v16si)(__m512i)(A), \ (__v16si)(__m512i)(B), (int)(I))) #define _mm512_mask_shldi_epi32(S, U, A, B, I) \ ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ (__v16si)_mm512_shldi_epi32((A), (B), (I)), \ (__v16si)(__m512i)(S))) #define _mm512_maskz_shldi_epi32(U, A, B, I) \ ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ (__v16si)_mm512_shldi_epi32((A), (B), (I)), \ (__v16si)_mm512_setzero_si512())) #define _mm512_shldi_epi16(A, B, I) \ ((__m512i)__builtin_ia32_vpshldw512((__v32hi)(__m512i)(A), \ (__v32hi)(__m512i)(B), (int)(I))) #define _mm512_mask_shldi_epi16(S, U, A, B, I) \ ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ (__v32hi)_mm512_shldi_epi16((A), (B), (I)), \ (__v32hi)(__m512i)(S))) #define _mm512_maskz_shldi_epi16(U, A, B, I) \ ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ (__v32hi)_mm512_shldi_epi16((A), (B), (I)), \ (__v32hi)_mm512_setzero_si512())) #define _mm512_shrdi_epi64(A, B, I) \ ((__m512i)__builtin_ia32_vpshrdq512((__v8di)(__m512i)(A), \ (__v8di)(__m512i)(B), (int)(I))) #define _mm512_mask_shrdi_epi64(S, U, A, B, I) \ ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ (__v8di)_mm512_shrdi_epi64((A), (B), (I)), \ (__v8di)(__m512i)(S))) #define _mm512_maskz_shrdi_epi64(U, A, B, I) \ ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ (__v8di)_mm512_shrdi_epi64((A), (B), (I)), \ (__v8di)_mm512_setzero_si512())) #define _mm512_shrdi_epi32(A, B, I) \ ((__m512i)__builtin_ia32_vpshrdd512((__v16si)(__m512i)(A), \ (__v16si)(__m512i)(B), (int)(I))) #define _mm512_mask_shrdi_epi32(S, U, A, B, I) \ ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ (__v16si)_mm512_shrdi_epi32((A), (B), (I)), \ (__v16si)(__m512i)(S))) #define _mm512_maskz_shrdi_epi32(U, A, B, I) \ ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ (__v16si)_mm512_shrdi_epi32((A), (B), (I)), \ (__v16si)_mm512_setzero_si512())) #define _mm512_shrdi_epi16(A, B, I) \ ((__m512i)__builtin_ia32_vpshrdw512((__v32hi)(__m512i)(A), \ (__v32hi)(__m512i)(B), (int)(I))) #define _mm512_mask_shrdi_epi16(S, U, A, B, I) \ ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ (__v32hi)_mm512_shrdi_epi16((A), (B), (I)), \ (__v32hi)(__m512i)(S))) #define _mm512_maskz_shrdi_epi16(U, A, B, I) \ ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ (__v32hi)_mm512_shrdi_epi16((A), (B), (I)), \ (__v32hi)_mm512_setzero_si512())) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_shldv_epi64(__m512i __A, __m512i __B, __m512i __C) { return (__m512i)__builtin_ia32_vpshldvq512((__v8di)__A, (__v8di)__B, (__v8di)__C); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_shldv_epi64(__m512i __A, __mmask8 __U, __m512i __B, __m512i __C) { return (__m512i)__builtin_ia32_selectq_512(__U, (__v8di)_mm512_shldv_epi64(__A, __B, __C), (__v8di)__A); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_shldv_epi64(__mmask8 __U, __m512i __A, __m512i __B, __m512i __C) { return (__m512i)__builtin_ia32_selectq_512(__U, (__v8di)_mm512_shldv_epi64(__A, __B, __C), (__v8di)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_shldv_epi32(__m512i __A, __m512i __B, __m512i __C) { return (__m512i)__builtin_ia32_vpshldvd512((__v16si)__A, (__v16si)__B, (__v16si)__C); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_shldv_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C) { return (__m512i)__builtin_ia32_selectd_512(__U, (__v16si)_mm512_shldv_epi32(__A, __B, __C), (__v16si)__A); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_shldv_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C) { return (__m512i)__builtin_ia32_selectd_512(__U, (__v16si)_mm512_shldv_epi32(__A, __B, __C), (__v16si)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_shldv_epi16(__m512i __A, __m512i __B, __m512i __C) { return (__m512i)__builtin_ia32_vpshldvw512((__v32hi)__A, (__v32hi)__B, (__v32hi)__C); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_shldv_epi16(__m512i __A, __mmask32 __U, __m512i __B, __m512i __C) { return (__m512i)__builtin_ia32_selectw_512(__U, (__v32hi)_mm512_shldv_epi16(__A, __B, __C), (__v32hi)__A); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_shldv_epi16(__mmask32 __U, __m512i __A, __m512i __B, __m512i __C) { return (__m512i)__builtin_ia32_selectw_512(__U, (__v32hi)_mm512_shldv_epi16(__A, __B, __C), (__v32hi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_shrdv_epi64(__m512i __A, __m512i __B, __m512i __C) { return (__m512i)__builtin_ia32_vpshrdvq512((__v8di)__A, (__v8di)__B, (__v8di)__C); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_shrdv_epi64(__m512i __A, __mmask8 __U, __m512i __B, __m512i __C) { return (__m512i)__builtin_ia32_selectq_512(__U, (__v8di)_mm512_shrdv_epi64(__A, __B, __C), (__v8di)__A); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_shrdv_epi64(__mmask8 __U, __m512i __A, __m512i __B, __m512i __C) { return (__m512i)__builtin_ia32_selectq_512(__U, (__v8di)_mm512_shrdv_epi64(__A, __B, __C), (__v8di)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_shrdv_epi32(__m512i __A, __m512i __B, __m512i __C) { return (__m512i)__builtin_ia32_vpshrdvd512((__v16si)__A, (__v16si)__B, (__v16si)__C); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_shrdv_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C) { return (__m512i) __builtin_ia32_selectd_512(__U, (__v16si)_mm512_shrdv_epi32(__A, __B, __C), (__v16si)__A); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_shrdv_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C) { return (__m512i) __builtin_ia32_selectd_512(__U, (__v16si)_mm512_shrdv_epi32(__A, __B, __C), (__v16si)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_shrdv_epi16(__m512i __A, __m512i __B, __m512i __C) { return (__m512i)__builtin_ia32_vpshrdvw512((__v32hi)__A, (__v32hi)__B, (__v32hi)__C); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_shrdv_epi16(__m512i __A, __mmask32 __U, __m512i __B, __m512i __C) { return (__m512i)__builtin_ia32_selectw_512(__U, (__v32hi)_mm512_shrdv_epi16(__A, __B, __C), (__v32hi)__A); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_shrdv_epi16(__mmask32 __U, __m512i __A, __m512i __B, __m512i __C) { return (__m512i)__builtin_ia32_selectw_512(__U, (__v32hi)_mm512_shrdv_epi16(__A, __B, __C), (__v32hi)_mm512_setzero_si512()); } #undef __DEFAULT_FN_ATTRS #endif avx512vpopcntdqintrin.h/*===--------------- avxvnniintrin.h - VNNI intrinsics --------------------=== * * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __AVXVNNIINTRIN_H #define __AVXVNNIINTRIN_H /* Below intrinsics defined in avx512vlvnniintrin.h can be used for AVXVNNI */ /// \fn __m256i _mm256_dpbusd_epi32(__m256i __S, __m256i __A, __m256i __B) /// \fn __m256i _mm256_dpbusds_epi32(__m256i __S, __m256i __A, __m256i __B) /// \fn __m256i _mm256_dpwssd_epi32(__m256i __S, __m256i __A, __m256i __B) /// \fn __m256i _mm256_dpwssds_epi32(__m256i __S, __m256i __A, __m256i __B) /// \fn __m128i _mm_dpbusd_epi32(__m128i __S, __m128i __A, __m128i __B) /// \fn __m128i _mm_dpbusds_epi32(__m128i __S, __m128i __A, __m128i __B) /// \fn __m128i _mm_dpwssd_epi32(__m128i __S, __m128i __A, __m128i __B) /// \fn __m128i _mm_dpwssds_epi32(__m128i __S, __m128i __A, __m128i __B) /* Intrinsics with _avx_ prefix are for compatibility with msvc. */ /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avxvnni"), __min_vector_width__(256))) #define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avxvnni"), __min_vector_width__(128))) /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a __A with /// corresponding signed 8-bit integers in \a __B, producing 4 intermediate signed /// 16-bit results. Sum these 4 results with the corresponding 32-bit integer /// in \a __S, and store the packed 32-bit results in DST. /// /// This intrinsic corresponds to the VPDPBUSD instructions. /// /// \code{.operation} /// FOR j := 0 to 7 /// tmp1.word := Signed(ZeroExtend16(__A.byte[4*j]) * SignExtend16(__B.byte[4*j])) /// tmp2.word := Signed(ZeroExtend16(__A.byte[4*j+1]) * SignExtend16(__B.byte[4*j+1])) /// tmp3.word := Signed(ZeroExtend16(__A.byte[4*j+2]) * SignExtend16(__B.byte[4*j+2])) /// tmp4.word := Signed(ZeroExtend16(__A.byte[4*j+3]) * SignExtend16(__B.byte[4*j+3])) /// DST.dword[j] := __S.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 /// ENDFOR /// DST[MAX:256] := 0 /// \endcode static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_dpbusd_avx_epi32(__m256i __S, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_vpdpbusd256((__v8si)__S, (__v8si)__A, (__v8si)__B); } /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a __A with /// corresponding signed 8-bit integers in \a __B, producing 4 intermediate signed /// 16-bit results. Sum these 4 results with the corresponding 32-bit integer /// in \a __S using signed saturation, and store the packed 32-bit results in DST. /// /// This intrinsic corresponds to the VPDPBUSDS instructions. /// /// \code{.operation} /// FOR j := 0 to 7 /// tmp1.word := Signed(ZeroExtend16(__A.byte[4*j]) * SignExtend16(__B.byte[4*j])) /// tmp2.word := Signed(ZeroExtend16(__A.byte[4*j+1]) * SignExtend16(__B.byte[4*j+1])) /// tmp3.word := Signed(ZeroExtend16(__A.byte[4*j+2]) * SignExtend16(__B.byte[4*j+2])) /// tmp4.word := Signed(ZeroExtend16(__A.byte[4*j+3]) * SignExtend16(__B.byte[4*j+3])) /// DST.dword[j] := Saturate32(__S.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) /// ENDFOR /// DST[MAX:256] := 0 /// \endcode static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_dpbusds_avx_epi32(__m256i __S, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_vpdpbusds256((__v8si)__S, (__v8si)__A, (__v8si)__B); } /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a __A with /// corresponding 16-bit integers in \a __B, producing 2 intermediate signed 32-bit /// results. Sum these 2 results with the corresponding 32-bit integer in \a __S, /// and store the packed 32-bit results in DST. /// /// This intrinsic corresponds to the VPDPWSSD instructions. /// /// \code{.operation} /// FOR j := 0 to 7 /// tmp1.dword := SignExtend32(__A.word[2*j]) * SignExtend32(__B.word[2*j]) /// tmp2.dword := SignExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1]) /// DST.dword[j] := __S.dword[j] + tmp1 + tmp2 /// ENDFOR /// DST[MAX:256] := 0 /// \endcode static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_dpwssd_avx_epi32(__m256i __S, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_vpdpwssd256((__v8si)__S, (__v8si)__A, (__v8si)__B); } /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a __A with /// corresponding 16-bit integers in \a __B, producing 2 intermediate signed 32-bit /// results. Sum these 2 results with the corresponding 32-bit integer in \a __S /// using signed saturation, and store the packed 32-bit results in DST. /// /// This intrinsic corresponds to the VPDPWSSDS instructions. /// /// \code{.operation} /// FOR j := 0 to 7 /// tmp1.dword := SignExtend32(__A.word[2*j]) * SignExtend32(__B.word[2*j]) /// tmp2.dword := SignExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1]) /// DST.dword[j] := Saturate32(__S.dword[j] + tmp1 + tmp2) /// ENDFOR /// DST[MAX:256] := 0 /// \endcode static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_dpwssds_avx_epi32(__m256i __S, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_vpdpwssds256((__v8si)__S, (__v8si)__A, (__v8si)__B); } /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a __A with /// corresponding signed 8-bit integers in \a __B, producing 4 intermediate signed /// 16-bit results. Sum these 4 results with the corresponding 32-bit integer /// in \a __S, and store the packed 32-bit results in DST. /// /// This intrinsic corresponds to the VPDPBUSD instructions. /// /// \code{.operation} /// FOR j := 0 to 3 /// tmp1.word := Signed(ZeroExtend16(__A.byte[4*j]) * SignExtend16(__B.byte[4*j])) /// tmp2.word := Signed(ZeroExtend16(__A.byte[4*j+1]) * SignExtend16(__B.byte[4*j+1])) /// tmp3.word := Signed(ZeroExtend16(__A.byte[4*j+2]) * SignExtend16(__B.byte[4*j+2])) /// tmp4.word := Signed(ZeroExtend16(__A.byte[4*j+3]) * SignExtend16(__B.byte[4*j+3])) /// DST.dword[j] := __S.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 /// ENDFOR /// DST[MAX:128] := 0 /// \endcode static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpbusd_avx_epi32(__m128i __S, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vpdpbusd128((__v4si)__S, (__v4si)__A, (__v4si)__B); } /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a __A with /// corresponding signed 8-bit integers in \a __B, producing 4 intermediate signed /// 16-bit results. Sum these 4 results with the corresponding 32-bit integer /// in \a __S using signed saturation, and store the packed 32-bit results in DST. /// /// This intrinsic corresponds to the VPDPBUSDS instructions. /// /// \code{.operation} /// FOR j := 0 to 3 /// tmp1.word := Signed(ZeroExtend16(__A.byte[4*j]) * SignExtend16(__B.byte[4*j])) /// tmp2.word := Signed(ZeroExtend16(__A.byte[4*j+1]) * SignExtend16(__B.byte[4*j+1])) /// tmp3.word := Signed(ZeroExtend16(__A.byte[4*j+2]) * SignExtend16(__B.byte[4*j+2])) /// tmp4.word := Signed(ZeroExtend16(__A.byte[4*j+3]) * SignExtend16(__B.byte[4*j+3])) /// DST.dword[j] := Saturate32(__S.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) /// ENDFOR /// DST[MAX:128] := 0 /// \endcode static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpbusds_avx_epi32(__m128i __S, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vpdpbusds128((__v4si)__S, (__v4si)__A, (__v4si)__B); } /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a __A with /// corresponding 16-bit integers in \a __B, producing 2 intermediate signed 32-bit /// results. Sum these 2 results with the corresponding 32-bit integer in \a __S, /// and store the packed 32-bit results in DST. /// /// This intrinsic corresponds to the VPDPWSSD instructions. /// /// \code{.operation} /// FOR j := 0 to 3 /// tmp1.dword := SignExtend32(__A.word[2*j]) * SignExtend32(__B.word[2*j]) /// tmp2.dword := SignExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1]) /// DST.dword[j] := __S.dword[j] + tmp1 + tmp2 /// ENDFOR /// DST[MAX:128] := 0 /// \endcode static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpwssd_avx_epi32(__m128i __S, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vpdpwssd128((__v4si)__S, (__v4si)__A, (__v4si)__B); } /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a __A with /// corresponding 16-bit integers in \a __B, producing 2 intermediate signed 32-bit /// results. Sum these 2 results with the corresponding 32-bit integer in \a __S /// using signed saturation, and store the packed 32-bit results in DST. /// /// This intrinsic corresponds to the VPDPWSSDS instructions. /// /// \code{.operation} /// FOR j := 0 to 3 /// tmp1.dword := SignExtend32(__A.word[2*j]) * SignExtend32(__B.word[2*j]) /// tmp2.dword := SignExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1]) /// DST.dword[j] := Saturate32(__S.dword[j] + tmp1 + tmp2) /// ENDFOR /// DST[MAX:128] := 0 /// \endcode static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpwssds_avx_epi32(__m128i __S, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vpdpwssds128((__v4si)__S, (__v4si)__A, (__v4si)__B); } #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 #endif // __AVXVNNIINTRIN_H cet.hf16cintrin.h/* * include/omp.h.var */ //===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef __OMP_H # define __OMP_H # include # include # include # define KMP_VERSION_MAJOR 5 # define KMP_VERSION_MINOR 0 # define KMP_VERSION_BUILD 20140926 # define KMP_BUILD_DATE "No_Timestamp" # ifdef __cplusplus extern "C" { # endif # define omp_set_affinity_format ompc_set_affinity_format # define omp_get_affinity_format ompc_get_affinity_format # define omp_display_affinity ompc_display_affinity # define omp_capture_affinity ompc_capture_affinity # if defined(_WIN32) # define __KAI_KMPC_CONVENTION __cdecl # ifndef __KMP_IMP # define __KMP_IMP __declspec(dllimport) # endif # else # define __KAI_KMPC_CONVENTION # ifndef __KMP_IMP # define __KMP_IMP # endif # endif /* schedule kind constants */ typedef enum omp_sched_t { omp_sched_static = 1, omp_sched_dynamic = 2, omp_sched_guided = 3, omp_sched_auto = 4, omp_sched_monotonic = 0x80000000 } omp_sched_t; /* set API functions */ extern void __KAI_KMPC_CONVENTION omp_set_num_threads (int); extern void __KAI_KMPC_CONVENTION omp_set_dynamic (int); extern void __KAI_KMPC_CONVENTION omp_set_nested (int); extern void __KAI_KMPC_CONVENTION omp_set_max_active_levels (int); extern void __KAI_KMPC_CONVENTION omp_set_schedule (omp_sched_t, int); /* query API functions */ extern int __KAI_KMPC_CONVENTION omp_get_num_threads (void); extern int __KAI_KMPC_CONVENTION omp_get_dynamic (void); extern int __KAI_KMPC_CONVENTION omp_get_nested (void); extern int __KAI_KMPC_CONVENTION omp_get_max_threads (void); extern int __KAI_KMPC_CONVENTION omp_get_thread_num (void); extern int __KAI_KMPC_CONVENTION omp_get_num_procs (void); extern int __KAI_KMPC_CONVENTION omp_in_parallel (void); extern int __KAI_KMPC_CONVENTION omp_in_final (void); extern int __KAI_KMPC_CONVENTION omp_get_active_level (void); extern int __KAI_KMPC_CONVENTION omp_get_level (void); extern int __KAI_KMPC_CONVENTION omp_get_ancestor_thread_num (int); extern int __KAI_KMPC_CONVENTION omp_get_team_size (int); extern int __KAI_KMPC_CONVENTION omp_get_thread_limit (void); extern int __KAI_KMPC_CONVENTION omp_get_max_active_levels (void); extern void __KAI_KMPC_CONVENTION omp_get_schedule (omp_sched_t *, int *); extern int __KAI_KMPC_CONVENTION omp_get_max_task_priority (void); /* lock API functions */ typedef struct omp_lock_t { void * _lk; } omp_lock_t; extern void __KAI_KMPC_CONVENTION omp_init_lock (omp_lock_t *); extern void __KAI_KMPC_CONVENTION omp_set_lock (omp_lock_t *); extern void __KAI_KMPC_CONVENTION omp_unset_lock (omp_lock_t *); extern void __KAI_KMPC_CONVENTION omp_destroy_lock (omp_lock_t *); extern int __KAI_KMPC_CONVENTION omp_test_lock (omp_lock_t *); /* nested lock API functions */ typedef struct omp_nest_lock_t { void * _lk; } omp_nest_lock_t; extern void __KAI_KMPC_CONVENTION omp_init_nest_lock (omp_nest_lock_t *); extern void __KAI_KMPC_CONVENTION omp_set_nest_lock (omp_nest_lock_t *); extern void __KAI_KMPC_CONVENTION omp_unset_nest_lock (omp_nest_lock_t *); extern void __KAI_KMPC_CONVENTION omp_destroy_nest_lock (omp_nest_lock_t *); extern int __KAI_KMPC_CONVENTION omp_test_nest_lock (omp_nest_lock_t *); /* OpenMP 5.0 Synchronization hints*/ typedef enum omp_sync_hint_t { omp_sync_hint_none = 0, omp_lock_hint_none = omp_sync_hint_none, omp_sync_hint_uncontended = 1, omp_lock_hint_uncontended = omp_sync_hint_uncontended, omp_sync_hint_contended = (1<<1), omp_lock_hint_contended = omp_sync_hint_contended, omp_sync_hint_nonspeculative = (1<<2), omp_lock_hint_nonspeculative = omp_sync_hint_nonspeculative, omp_sync_hint_speculative = (1<<3), omp_lock_hint_speculative = omp_sync_hint_speculative, kmp_lock_hint_hle = (1<<16), kmp_lock_hint_rtm = (1<<17), kmp_lock_hint_adaptive = (1<<18) } omp_sync_hint_t; /* lock hint type for dynamic user lock */ typedef omp_sync_hint_t omp_lock_hint_t; /* hinted lock initializers */ extern void __KAI_KMPC_CONVENTION omp_init_lock_with_hint(omp_lock_t *, omp_lock_hint_t); extern void __KAI_KMPC_CONVENTION omp_init_nest_lock_with_hint(omp_nest_lock_t *, omp_lock_hint_t); /* time API functions */ extern double __KAI_KMPC_CONVENTION omp_get_wtime (void); extern double __KAI_KMPC_CONVENTION omp_get_wtick (void); /* OpenMP 4.0 */ extern int __KAI_KMPC_CONVENTION omp_get_default_device (void); extern void __KAI_KMPC_CONVENTION omp_set_default_device (int); extern int __KAI_KMPC_CONVENTION omp_is_initial_device (void); extern int __KAI_KMPC_CONVENTION omp_get_num_devices (void); extern int __KAI_KMPC_CONVENTION omp_get_num_teams (void); extern int __KAI_KMPC_CONVENTION omp_get_team_num (void); extern int __KAI_KMPC_CONVENTION omp_get_cancellation (void); /* OpenMP 4.5 */ extern int __KAI_KMPC_CONVENTION omp_get_initial_device (void); extern void* __KAI_KMPC_CONVENTION omp_target_alloc(size_t, int); extern void __KAI_KMPC_CONVENTION omp_target_free(void *, int); extern int __KAI_KMPC_CONVENTION omp_target_is_present(const void *, int); extern int __KAI_KMPC_CONVENTION omp_target_memcpy(void *, const void *, size_t, size_t, size_t, int, int); extern int __KAI_KMPC_CONVENTION omp_target_memcpy_rect(void *, const void *, size_t, int, const size_t *, const size_t *, const size_t *, const size_t *, const size_t *, int, int); extern int __KAI_KMPC_CONVENTION omp_target_associate_ptr(const void *, const void *, size_t, size_t, int); extern int __KAI_KMPC_CONVENTION omp_target_disassociate_ptr(const void *, int); /* OpenMP 5.0 */ extern int __KAI_KMPC_CONVENTION omp_get_device_num (void); typedef void * omp_depend_t; /* OpenMP 5.1 interop */ typedef intptr_t omp_intptr_t; /* 0..omp_get_num_interop_properties()-1 are reserved for implementation-defined properties */ typedef enum omp_interop_property { omp_ipr_fr_id = -1, omp_ipr_fr_name = -2, omp_ipr_vendor = -3, omp_ipr_vendor_name = -4, omp_ipr_device_num = -5, omp_ipr_platform = -6, omp_ipr_device = -7, omp_ipr_device_context = -8, omp_ipr_targetsync = -9, omp_ipr_first = -9 } omp_interop_property_t; #define omp_interop_none 0 typedef enum omp_interop_rc { omp_irc_no_value = 1, omp_irc_success = 0, omp_irc_empty = -1, omp_irc_out_of_range = -2, omp_irc_type_int = -3, omp_irc_type_ptr = -4, omp_irc_type_str = -5, omp_irc_other = -6 } omp_interop_rc_t; typedef enum omp_interop_fr { omp_ifr_cuda = 1, omp_ifr_cuda_driver = 2, omp_ifr_opencl = 3, omp_ifr_sycl = 4, omp_ifr_hip = 5, omp_ifr_level_zero = 6, omp_ifr_last = 7 } omp_interop_fr_t; typedef void * omp_interop_t; /*! * The `omp_get_num_interop_properties` routine retrieves the number of implementation-defined properties available for an `omp_interop_t` object. */ extern int __KAI_KMPC_CONVENTION omp_get_num_interop_properties(const omp_interop_t); /*! * The `omp_get_interop_int` routine retrieves an integer property from an `omp_interop_t` object. */ extern omp_intptr_t __KAI_KMPC_CONVENTION omp_get_interop_int(const omp_interop_t, omp_interop_property_t, int *); /*! * The `omp_get_interop_ptr` routine retrieves a pointer property from an `omp_interop_t` object. */ extern void * __KAI_KMPC_CONVENTION omp_get_interop_ptr(const omp_interop_t, omp_interop_property_t, int *); /*! * The `omp_get_interop_str` routine retrieves a string property from an `omp_interop_t` object. */ extern const char * __KAI_KMPC_CONVENTION omp_get_interop_str(const omp_interop_t, omp_interop_property_t, int *); /*! * The `omp_get_interop_name` routine retrieves a property name from an `omp_interop_t` object. */ extern const char * __KAI_KMPC_CONVENTION omp_get_interop_name(const omp_interop_t, omp_interop_property_t); /*! * The `omp_get_interop_type_desc` routine retrieves a description of the type of a property associated with an `omp_interop_t` object. */ extern const char * __KAI_KMPC_CONVENTION omp_get_interop_type_desc(const omp_interop_t, omp_interop_property_t); /*! * The `omp_get_interop_rc_desc` routine retrieves a description of the return code associated with an `omp_interop_t` object. */ extern const char * __KAI_KMPC_CONVENTION omp_get_interop_rc_desc(const omp_interop_t, omp_interop_rc_t); /* OpenMP 5.1 device memory routines */ /*! * The `omp_target_memcpy_async` routine asynchronously performs a copy between any combination of host and device pointers. */ extern int __KAI_KMPC_CONVENTION omp_target_memcpy_async(void *, const void *, size_t, size_t, size_t, int, int, int, omp_depend_t *); /*! * The `omp_target_memcpy_rect_async` routine asynchronously performs a copy between any combination of host and device pointers. */ extern int __KAI_KMPC_CONVENTION omp_target_memcpy_rect_async(void *, const void *, size_t, int, const size_t *, const size_t *, const size_t *, const size_t *, const size_t *, int, int, int, omp_depend_t *); /* OpenMP 6.0 device memory routines */ extern void * __KAI_KMPC_CONVENTION omp_target_memset(void *, int, size_t, int); extern void * __KAI_KMPC_CONVENTION omp_target_memset_async(void *, int, size_t, int, int, omp_depend_t *); /*! * The `omp_get_mapped_ptr` routine returns the device pointer that is associated with a host pointer for a given device. */ extern void * __KAI_KMPC_CONVENTION omp_get_mapped_ptr(const void *, int); extern int __KAI_KMPC_CONVENTION omp_target_is_accessible(const void *, size_t, int); /* kmp API functions */ extern int __KAI_KMPC_CONVENTION kmp_get_stacksize (void); extern void __KAI_KMPC_CONVENTION kmp_set_stacksize (int); extern size_t __KAI_KMPC_CONVENTION kmp_get_stacksize_s (void); extern void __KAI_KMPC_CONVENTION kmp_set_stacksize_s (size_t); extern int __KAI_KMPC_CONVENTION kmp_get_blocktime (void); extern int __KAI_KMPC_CONVENTION kmp_get_library (void); extern void __KAI_KMPC_CONVENTION kmp_set_blocktime (int); extern void __KAI_KMPC_CONVENTION kmp_set_library (int); extern void __KAI_KMPC_CONVENTION kmp_set_library_serial (void); extern void __KAI_KMPC_CONVENTION kmp_set_library_turnaround (void); extern void __KAI_KMPC_CONVENTION kmp_set_library_throughput (void); extern void __KAI_KMPC_CONVENTION kmp_set_defaults (char const *); extern void __KAI_KMPC_CONVENTION kmp_set_disp_num_buffers (int); /* Intel affinity API */ typedef void * kmp_affinity_mask_t; extern int __KAI_KMPC_CONVENTION kmp_set_affinity (kmp_affinity_mask_t *); extern int __KAI_KMPC_CONVENTION kmp_get_affinity (kmp_affinity_mask_t *); extern int __KAI_KMPC_CONVENTION kmp_get_affinity_max_proc (void); extern void __KAI_KMPC_CONVENTION kmp_create_affinity_mask (kmp_affinity_mask_t *); extern void __KAI_KMPC_CONVENTION kmp_destroy_affinity_mask (kmp_affinity_mask_t *); extern int __KAI_KMPC_CONVENTION kmp_set_affinity_mask_proc (int, kmp_affinity_mask_t *); extern int __KAI_KMPC_CONVENTION kmp_unset_affinity_mask_proc (int, kmp_affinity_mask_t *); extern int __KAI_KMPC_CONVENTION kmp_get_affinity_mask_proc (int, kmp_affinity_mask_t *); /* OpenMP 4.0 affinity API */ typedef enum omp_proc_bind_t { omp_proc_bind_false = 0, omp_proc_bind_true = 1, omp_proc_bind_master = 2, omp_proc_bind_close = 3, omp_proc_bind_spread = 4 } omp_proc_bind_t; extern omp_proc_bind_t __KAI_KMPC_CONVENTION omp_get_proc_bind (void); /* OpenMP 4.5 affinity API */ extern int __KAI_KMPC_CONVENTION omp_get_num_places (void); extern int __KAI_KMPC_CONVENTION omp_get_place_num_procs (int); extern void __KAI_KMPC_CONVENTION omp_get_place_proc_ids (int, int *); extern int __KAI_KMPC_CONVENTION omp_get_place_num (void); extern int __KAI_KMPC_CONVENTION omp_get_partition_num_places (void); extern void __KAI_KMPC_CONVENTION omp_get_partition_place_nums (int *); extern void * __KAI_KMPC_CONVENTION kmp_malloc (size_t); extern void * __KAI_KMPC_CONVENTION kmp_aligned_malloc (size_t, size_t); extern void * __KAI_KMPC_CONVENTION kmp_calloc (size_t, size_t); extern void * __KAI_KMPC_CONVENTION kmp_realloc (void *, size_t); extern void __KAI_KMPC_CONVENTION kmp_free (void *); extern void __KAI_KMPC_CONVENTION kmp_set_warnings_on(void); extern void __KAI_KMPC_CONVENTION kmp_set_warnings_off(void); /* OpenMP 5.0 Tool Control */ typedef enum omp_control_tool_result_t { omp_control_tool_notool = -2, omp_control_tool_nocallback = -1, omp_control_tool_success = 0, omp_control_tool_ignored = 1 } omp_control_tool_result_t; typedef enum omp_control_tool_t { omp_control_tool_start = 1, omp_control_tool_pause = 2, omp_control_tool_flush = 3, omp_control_tool_end = 4 } omp_control_tool_t; extern int __KAI_KMPC_CONVENTION omp_control_tool(int, int, void*); /* OpenMP 5.0 Memory Management */ typedef uintptr_t omp_uintptr_t; typedef enum { omp_atk_sync_hint = 1, omp_atk_alignment = 2, omp_atk_access = 3, omp_atk_pool_size = 4, omp_atk_fallback = 5, omp_atk_fb_data = 6, omp_atk_pinned = 7, omp_atk_partition = 8 } omp_alloctrait_key_t; typedef enum { omp_atv_false = 0, omp_atv_true = 1, omp_atv_contended = 3, omp_atv_uncontended = 4, omp_atv_serialized = 5, omp_atv_sequential = omp_atv_serialized, // (deprecated) omp_atv_private = 6, omp_atv_all = 7, omp_atv_thread = 8, omp_atv_pteam = 9, omp_atv_cgroup = 10, omp_atv_default_mem_fb = 11, omp_atv_null_fb = 12, omp_atv_abort_fb = 13, omp_atv_allocator_fb = 14, omp_atv_environment = 15, omp_atv_nearest = 16, omp_atv_blocked = 17, omp_atv_interleaved = 18 } omp_alloctrait_value_t; #define omp_atv_default ((omp_uintptr_t)-1) typedef struct { omp_alloctrait_key_t key; omp_uintptr_t value; } omp_alloctrait_t; # if defined(_WIN32) // On Windows cl and icl do not support 64-bit enum, let's use integer then. typedef omp_uintptr_t omp_allocator_handle_t; extern __KMP_IMP omp_allocator_handle_t const omp_null_allocator; extern __KMP_IMP omp_allocator_handle_t const omp_default_mem_alloc; extern __KMP_IMP omp_allocator_handle_t const omp_large_cap_mem_alloc; extern __KMP_IMP omp_allocator_handle_t const omp_const_mem_alloc; extern __KMP_IMP omp_allocator_handle_t const omp_high_bw_mem_alloc; extern __KMP_IMP omp_allocator_handle_t const omp_low_lat_mem_alloc; extern __KMP_IMP omp_allocator_handle_t const omp_cgroup_mem_alloc; extern __KMP_IMP omp_allocator_handle_t const omp_pteam_mem_alloc; extern __KMP_IMP omp_allocator_handle_t const omp_thread_mem_alloc; extern __KMP_IMP omp_allocator_handle_t const llvm_omp_target_host_mem_alloc; extern __KMP_IMP omp_allocator_handle_t const llvm_omp_target_shared_mem_alloc; extern __KMP_IMP omp_allocator_handle_t const llvm_omp_target_device_mem_alloc; typedef omp_uintptr_t omp_memspace_handle_t; extern __KMP_IMP omp_memspace_handle_t const omp_default_mem_space; extern __KMP_IMP omp_memspace_handle_t const omp_large_cap_mem_space; extern __KMP_IMP omp_memspace_handle_t const omp_const_mem_space; extern __KMP_IMP omp_memspace_handle_t const omp_high_bw_mem_space; extern __KMP_IMP omp_memspace_handle_t const omp_low_lat_mem_space; extern __KMP_IMP omp_memspace_handle_t const llvm_omp_target_host_mem_space; extern __KMP_IMP omp_memspace_handle_t const llvm_omp_target_shared_mem_space; extern __KMP_IMP omp_memspace_handle_t const llvm_omp_target_device_mem_space; # else # if __cplusplus >= 201103 typedef enum omp_allocator_handle_t : omp_uintptr_t # else typedef enum omp_allocator_handle_t # endif { omp_null_allocator = 0, omp_default_mem_alloc = 1, omp_large_cap_mem_alloc = 2, omp_const_mem_alloc = 3, omp_high_bw_mem_alloc = 4, omp_low_lat_mem_alloc = 5, omp_cgroup_mem_alloc = 6, omp_pteam_mem_alloc = 7, omp_thread_mem_alloc = 8, llvm_omp_target_host_mem_alloc = 100, llvm_omp_target_shared_mem_alloc = 101, llvm_omp_target_device_mem_alloc = 102, KMP_ALLOCATOR_MAX_HANDLE = UINTPTR_MAX } omp_allocator_handle_t; # if __cplusplus >= 201103 typedef enum omp_memspace_handle_t : omp_uintptr_t # else typedef enum omp_memspace_handle_t # endif { omp_default_mem_space = 0, omp_large_cap_mem_space = 1, omp_const_mem_space = 2, omp_high_bw_mem_space = 3, omp_low_lat_mem_space = 4, llvm_omp_target_host_mem_space = 100, llvm_omp_target_shared_mem_space = 101, llvm_omp_target_device_mem_space = 102, KMP_MEMSPACE_MAX_HANDLE = UINTPTR_MAX } omp_memspace_handle_t; # endif extern omp_allocator_handle_t __KAI_KMPC_CONVENTION omp_init_allocator(omp_memspace_handle_t m, int ntraits, omp_alloctrait_t traits[]); extern void __KAI_KMPC_CONVENTION omp_destroy_allocator(omp_allocator_handle_t allocator); extern void __KAI_KMPC_CONVENTION omp_set_default_allocator(omp_allocator_handle_t a); extern omp_allocator_handle_t __KAI_KMPC_CONVENTION omp_get_default_allocator(void); # ifdef __cplusplus extern void *__KAI_KMPC_CONVENTION omp_alloc(size_t size, omp_allocator_handle_t a = omp_null_allocator); extern void *__KAI_KMPC_CONVENTION omp_aligned_alloc(size_t align, size_t size, omp_allocator_handle_t a = omp_null_allocator); extern void *__KAI_KMPC_CONVENTION omp_calloc(size_t nmemb, size_t size, omp_allocator_handle_t a = omp_null_allocator); extern void *__KAI_KMPC_CONVENTION omp_aligned_calloc(size_t align, size_t nmemb, size_t size, omp_allocator_handle_t a = omp_null_allocator); extern void *__KAI_KMPC_CONVENTION omp_realloc(void *ptr, size_t size, omp_allocator_handle_t allocator = omp_null_allocator, omp_allocator_handle_t free_allocator = omp_null_allocator); extern void __KAI_KMPC_CONVENTION omp_free(void * ptr, omp_allocator_handle_t a = omp_null_allocator); # else extern void *__KAI_KMPC_CONVENTION omp_alloc(size_t size, omp_allocator_handle_t a); extern void *__KAI_KMPC_CONVENTION omp_aligned_alloc(size_t align, size_t size, omp_allocator_handle_t a); extern void *__KAI_KMPC_CONVENTION omp_calloc(size_t nmemb, size_t size, omp_allocator_handle_t a); extern void *__KAI_KMPC_CONVENTION omp_aligned_calloc(size_t align, size_t nmemb, size_t size, omp_allocator_handle_t a); extern void *__KAI_KMPC_CONVENTION omp_realloc(void *ptr, size_t size, omp_allocator_handle_t allocator, omp_allocator_handle_t free_allocator); extern void __KAI_KMPC_CONVENTION omp_free(void *ptr, omp_allocator_handle_t a); # endif /* OpenMP 5.0 Affinity Format */ extern void __KAI_KMPC_CONVENTION omp_set_affinity_format(char const *); extern size_t __KAI_KMPC_CONVENTION omp_get_affinity_format(char *, size_t); extern void __KAI_KMPC_CONVENTION omp_display_affinity(char const *); extern size_t __KAI_KMPC_CONVENTION omp_capture_affinity(char *, size_t, char const *); /* OpenMP 5.0 events */ # if defined(_WIN32) // On Windows cl and icl do not support 64-bit enum, let's use integer then. typedef omp_uintptr_t omp_event_handle_t; # else typedef enum omp_event_handle_t { KMP_EVENT_MAX_HANDLE = UINTPTR_MAX } omp_event_handle_t; # endif extern void __KAI_KMPC_CONVENTION omp_fulfill_event ( omp_event_handle_t event ); /* OpenMP 5.0 Pause Resources */ typedef enum omp_pause_resource_t { omp_pause_resume = 0, omp_pause_soft = 1, omp_pause_hard = 2 } omp_pause_resource_t; extern int __KAI_KMPC_CONVENTION omp_pause_resource(omp_pause_resource_t, int); extern int __KAI_KMPC_CONVENTION omp_pause_resource_all(omp_pause_resource_t); extern int __KAI_KMPC_CONVENTION omp_get_supported_active_levels(void); /* OpenMP 5.1 */ extern void __KAI_KMPC_CONVENTION omp_set_num_teams(int num_teams); extern int __KAI_KMPC_CONVENTION omp_get_max_teams(void); extern void __KAI_KMPC_CONVENTION omp_set_teams_thread_limit(int limit); extern int __KAI_KMPC_CONVENTION omp_get_teams_thread_limit(void); /* OpenMP 5.1 Display Environment */ extern void omp_display_env(int verbose); # if defined(_OPENMP) && _OPENMP >= 201811 #pragma omp begin declare variant match(device={kind(host)}) static inline int omp_is_initial_device(void) { return 1; } #pragma omp end declare variant #pragma omp begin declare variant match(device={kind(nohost)}) static inline int omp_is_initial_device(void) { return 0; } #pragma omp end declare variant # endif /* OpenMP 5.2 */ extern int __KAI_KMPC_CONVENTION omp_in_explicit_task(void); /* LLVM Extensions */ extern void *llvm_omp_target_dynamic_shared_alloc(void); # undef __KAI_KMPC_CONVENTION # undef __KMP_IMP /* Warning: The following typedefs are not standard, deprecated and will be removed in a future release. */ typedef int omp_int_t; typedef double omp_wtime_t; # ifdef __cplusplus } # endif #endif /* __OMP_H */ pconfigintrin.hprfchiintrin.h/*===---- velintrin.h - VEL intrinsics for VE ------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __VEL_INTRIN_H__ #define __VEL_INTRIN_H__ // Vector registers typedef double __vr __attribute__((__vector_size__(2048))); // Vector mask registers #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L // For C99 typedef _Bool __vm __attribute__((ext_vector_type(256))); typedef _Bool __vm256 __attribute__((ext_vector_type(256))); typedef _Bool __vm512 __attribute__((ext_vector_type(512))); #else #ifdef __cplusplus // For C++ typedef bool __vm __attribute__((ext_vector_type(256))); typedef bool __vm256 __attribute__((ext_vector_type(256))); typedef bool __vm512 __attribute__((ext_vector_type(512))); #else #error need C++ or C99 to use vector intrinsics for VE #endif #endif enum VShuffleCodes { VE_VSHUFFLE_YUYU = 0, VE_VSHUFFLE_YUYL = 1, VE_VSHUFFLE_YUZU = 2, VE_VSHUFFLE_YUZL = 3, VE_VSHUFFLE_YLYU = 4, VE_VSHUFFLE_YLYL = 5, VE_VSHUFFLE_YLZU = 6, VE_VSHUFFLE_YLZL = 7, VE_VSHUFFLE_ZUYU = 8, VE_VSHUFFLE_ZUYL = 9, VE_VSHUFFLE_ZUZU = 10, VE_VSHUFFLE_ZUZL = 11, VE_VSHUFFLE_ZLYU = 12, VE_VSHUFFLE_ZLYL = 13, VE_VSHUFFLE_ZLZU = 14, VE_VSHUFFLE_ZLZL = 15, }; // Use generated intrinsic name definitions #include // Use helper functions #include // pack #define _vel_pack_f32p __builtin_ve_vl_pack_f32p #define _vel_pack_f32a __builtin_ve_vl_pack_f32a static inline unsigned long int _vel_pack_i32(unsigned int a, unsigned int b) { return (((unsigned long int)a) << 32) | b; } #define _vel_extract_vm512u(vm) __builtin_ve_vl_extract_vm512u(vm) #define _vel_extract_vm512l(vm) __builtin_ve_vl_extract_vm512l(vm) #define _vel_insert_vm512u(vm512, vm) __builtin_ve_vl_insert_vm512u(vm512, vm) #define _vel_insert_vm512l(vm512, vm) __builtin_ve_vl_insert_vm512l(vm512, vm) #endif /*===---- xsavesintrin.h - XSAVES intrinsic --------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __XSAVESINTRIN_H #define __XSAVESINTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("xsaves"))) static __inline__ void __DEFAULT_FN_ATTRS _xsaves(void *__p, unsigned long long __m) { __builtin_ia32_xsaves(__p, __m); } static __inline__ void __DEFAULT_FN_ATTRS _xrstors(void *__p, unsigned long long __m) { __builtin_ia32_xrstors(__p, __m); } #ifdef __x86_64__ static __inline__ void __DEFAULT_FN_ATTRS _xrstors64(void *__p, unsigned long long __m) { __builtin_ia32_xrstors64(__p, __m); } static __inline__ void __DEFAULT_FN_ATTRS _xsaves64(void *__p, unsigned long long __m) { __builtin_ia32_xsaves64(__p, __m); } #endif #undef __DEFAULT_FN_ATTRS #endif openmp_wrappers/__clang_openmp_device_functions.h/*===---- x86intrin.h - Implementation of X86 intrinsics on PowerPC --------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef NO_WARN_X86_INTRINSICS /* This header is distributed to simplify porting x86_64 code that makes explicit use of Intel intrinsics to powerpc64le. It is the user's responsibility to determine if the results are acceptable and make additional changes as necessary. Note that much code that uses Intel intrinsics can be rewritten in standard C or GNU C extensions, which are more portable and better optimized across multiple targets. */ #error "Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this error." #endif #ifndef X86INTRIN_H_ #define X86INTRIN_H_ #ifdef __ALTIVEC__ #include #endif /* __ALTIVEC__ */ #endif /* X86INTRIN_H_ */ sanitizer/coverage_interface.h//===-- dfsan_interface.h -------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file is a part of DataFlowSanitizer. // // Public interface header. //===----------------------------------------------------------------------===// #ifndef DFSAN_INTERFACE_H #define DFSAN_INTERFACE_H #include #include #include #ifdef __cplusplus extern "C" { #endif typedef uint8_t dfsan_label; typedef uint32_t dfsan_origin; /// Signature of the callback argument to dfsan_set_write_callback(). typedef void(SANITIZER_CDECL *dfsan_write_callback_t)(int fd, const void *buf, size_t count); /// Signature of the callback argument to dfsan_set_conditional_callback(). typedef void(SANITIZER_CDECL *dfsan_conditional_callback_t)( dfsan_label label, dfsan_origin origin); /// Signature of the callback argument to dfsan_set_reaches_function_callback(). /// The description is intended to hold the name of the variable. typedef void(SANITIZER_CDECL *dfsan_reaches_function_callback_t)( dfsan_label label, dfsan_origin origin, const char *file, unsigned int line, const char *function); /// Computes the union of \c l1 and \c l2, resulting in a union label. dfsan_label SANITIZER_CDECL dfsan_union(dfsan_label l1, dfsan_label l2); /// Sets the label for each address in [addr,addr+size) to \c label. void SANITIZER_CDECL dfsan_set_label(dfsan_label label, void *addr, size_t size); /// Sets the label for each address in [addr,addr+size) to the union of the /// current label for that address and \c label. void SANITIZER_CDECL dfsan_add_label(dfsan_label label, void *addr, size_t size); /// Retrieves the label associated with the given data. /// /// The type of 'data' is arbitrary. The function accepts a value of any type, /// which can be truncated or extended (implicitly or explicitly) as necessary. /// The truncation/extension operations will preserve the label of the original /// value. dfsan_label SANITIZER_CDECL dfsan_get_label(long data); /// Retrieves the immediate origin associated with the given data. The returned /// origin may point to another origin. /// /// The type of 'data' is arbitrary. dfsan_origin SANITIZER_CDECL dfsan_get_origin(long data); /// Retrieves the label associated with the data at the given address. dfsan_label SANITIZER_CDECL dfsan_read_label(const void *addr, size_t size); /// Return the origin associated with the first taint byte in the size bytes /// from the address addr. dfsan_origin SANITIZER_CDECL dfsan_read_origin_of_first_taint(const void *addr, size_t size); /// Returns whether the given label contains the label elem. int SANITIZER_CDECL dfsan_has_label(dfsan_label label, dfsan_label elem); /// Flushes the DFSan shadow, i.e. forgets about all labels currently associated /// with the application memory. Use this call to start over the taint tracking /// within the same process. /// /// Note: If another thread is working with tainted data during the flush, that /// taint could still be written to shadow after the flush. void SANITIZER_CDECL dfsan_flush(void); /// Sets a callback to be invoked on calls to write(). The callback is invoked /// before the write is done. The write is not guaranteed to succeed when the /// callback executes. Pass in NULL to remove any callback. void SANITIZER_CDECL dfsan_set_write_callback(dfsan_write_callback_t labeled_write_callback); /// Sets a callback to be invoked on any conditional expressions which have a /// taint label set. This can be used to find where tainted data influences /// the behavior of the program. /// These callbacks will only be added when -dfsan-conditional-callbacks=true. void SANITIZER_CDECL dfsan_set_conditional_callback(dfsan_conditional_callback_t callback); /// Conditional expressions occur during signal handlers. /// Making callbacks that handle signals well is tricky, so when /// -dfsan-conditional-callbacks=true, conditional expressions used in signal /// handlers will add the labels they see into a global (bitwise-or together). /// This function returns all label bits seen in signal handler conditions. dfsan_label SANITIZER_CDECL dfsan_get_labels_in_signal_conditional(); /// Sets a callback to be invoked when tainted data reaches a function. /// This could occur at function entry, or at a load instruction. /// These callbacks will only be added if -dfsan-reaches-function-callbacks=1. void SANITIZER_CDECL dfsan_set_reaches_function_callback(dfsan_reaches_function_callback_t callback); /// Making callbacks that handle signals well is tricky, so when /// -dfsan-reaches-function-callbacks=true, functions reached in signal /// handlers will add the labels they see into a global (bitwise-or together). /// This function returns all label bits seen during signal handlers. dfsan_label SANITIZER_CDECL dfsan_get_labels_in_signal_reaches_function(); /// Interceptor hooks. /// Whenever a dfsan's custom function is called the corresponding /// hook is called it non-zero. The hooks should be defined by the user. /// The primary use case is taint-guided fuzzing, where the fuzzer /// needs to see the parameters of the function and the labels. /// FIXME: implement more hooks. void SANITIZER_CDECL dfsan_weak_hook_memcmp(void *caller_pc, const void *s1, const void *s2, size_t n, dfsan_label s1_label, dfsan_label s2_label, dfsan_label n_label); void SANITIZER_CDECL dfsan_weak_hook_strncmp(void *caller_pc, const char *s1, const char *s2, size_t n, dfsan_label s1_label, dfsan_label s2_label, dfsan_label n_label); /// Prints the origin trace of the label at the address addr to stderr. It also /// prints description at the beginning of the trace. If origin tracking is not /// on, or the address is not labeled, it prints nothing. void SANITIZER_CDECL dfsan_print_origin_trace(const void *addr, const char *description); /// As above, but use an origin id from dfsan_get_origin() instead of address. /// Does not include header line with taint label and address information. void SANITIZER_CDECL dfsan_print_origin_id_trace(dfsan_origin origin); /// Prints the origin trace of the label at the address \p addr to a /// pre-allocated output buffer. If origin tracking is not on, or the address is /// not labeled, it prints nothing. /// /// Typical usage: /// \code /// char kDescription[] = "..."; /// char buf[1024]; /// dfsan_sprint_origin_trace(&tainted_var, kDescription, buf, sizeof(buf)); /// \endcode /// /// Typical usage that handles truncation: /// \code /// char buf[1024]; /// int len = dfsan_sprint_origin_trace(&var, nullptr, buf, sizeof(buf)); /// /// if (len < sizeof(buf)) { /// ProcessOriginTrace(buf); /// } else { /// char *tmpbuf = new char[len + 1]; /// dfsan_sprint_origin_trace(&var, nullptr, tmpbuf, len + 1); /// ProcessOriginTrace(tmpbuf); /// delete[] tmpbuf; /// } /// \endcode /// /// \param addr The tainted memory address whose origin we are printing. /// \param description A description printed at the beginning of the trace. /// \param [out] out_buf The output buffer to write the results to. /// \param out_buf_size The size of \p out_buf. /// /// \returns The number of symbols that should have been written to \p out_buf /// (not including trailing null byte '\0'). Thus, the string is truncated iff /// return value is not less than \p out_buf_size. size_t SANITIZER_CDECL dfsan_sprint_origin_trace(const void *addr, const char *description, char *out_buf, size_t out_buf_size); /// As above, but use an origin id from dfsan_get_origin() instead of address. /// Does not include header line with taint label and address information. size_t SANITIZER_CDECL dfsan_sprint_origin_id_trace(dfsan_origin origin, char *out_buf, size_t out_buf_size); /// Prints the stack trace leading to this call to a pre-allocated output /// buffer. /// /// For usage examples, see dfsan_sprint_origin_trace. /// /// \param [out] out_buf The output buffer to write the results to. /// \param out_buf_size The size of \p out_buf. /// /// \returns The number of symbols that should have been written to \p out_buf /// (not including trailing null byte '\0'). Thus, the string is truncated iff /// return value is not less than \p out_buf_size. size_t SANITIZER_CDECL dfsan_sprint_stack_trace(char *out_buf, size_t out_buf_size); /// Retrieves the very first origin associated with the data at the given /// address. dfsan_origin SANITIZER_CDECL dfsan_get_init_origin(const void *addr); /// Returns the value of -dfsan-track-origins. /// * 0: do not track origins. /// * 1: track origins at memory store operations. /// * 2: track origins at memory load and store operations. int SANITIZER_CDECL dfsan_get_track_origins(void); #ifdef __cplusplus } // extern "C" template void dfsan_set_label(dfsan_label label, T &data) { dfsan_set_label(label, (void *)&data, sizeof(T)); } #endif #endif // DFSAN_INTERFACE_H size', using JSONroot/clean-only//[:upper:]alt -> %d | %d[%02x-%02x] -> %d ||external/regex-re2/re2/regexp.cc\nAnatolian_HieroglyphsMongolianPPiZanabazar_SquareSignal %d raised at PC=%p while already in AbslFailureSignalHandler()external/abseil-cpp/absl/base/internal/sysinfo.ccexternal/abseil-cpp/absl/debugging/symbolize_elf.inczmL<<=Sainternal errorindex < GetNumSymbols()link_base_ < sym->st_valuethis->LockSlowWithDeadline(how, cond, KernelTimeout::Never(), flags)waitp->thread->waitp == nullptr || waitp->thread->suppress_fatal_errorsnew_h != nullptrpw->next == wReaderUnlock localtimeP-521external/boringssl/src/crypto/fipsmodule/ec/scalar.cHMAC-SHA-256 KATNIST P-224%02xAES_set_decrypt_key failed. OPENSSL_ia32capBN_LIBDH_LIBBUFX509 V3 routinesrandom number generatorDIGESTexternal/boringssl/src/crypto/evp/evp_ctx.calgorithm %dexternal/boringssl/src/crypto/ec_extra/ec_asn1.cCHECK failed: this->MapFieldBase::repeated_field_ != nullptr: -analyzer-configexternal/kythe/kythe/cxx/extractor/cxx_details.cc is not a normal directory.KYTHE_CANONICALIZE_VNAME_PATHSIndexHeaderMapadcintrin.h/*===---- altivec.h - Standard header for type generic math ---------------===*\ * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * \*===----------------------------------------------------------------------===*/ #ifndef __ALTIVEC_H #define __ALTIVEC_H #ifndef __ALTIVEC__ #error "AltiVec support not enabled" #endif /* Constants for mapping CR6 bits to predicate result. */ #define __CR6_EQ 0 #define __CR6_EQ_REV 1 #define __CR6_LT 2 #define __CR6_LT_REV 3 #define __CR6_GT 4 #define __CR6_GT_REV 5 #define __CR6_SO 6 #define __CR6_SO_REV 7 /* Constants for vec_test_data_class */ #define __VEC_CLASS_FP_SUBNORMAL_N (1 << 0) #define __VEC_CLASS_FP_SUBNORMAL_P (1 << 1) #define __VEC_CLASS_FP_SUBNORMAL (__VEC_CLASS_FP_SUBNORMAL_P | \ __VEC_CLASS_FP_SUBNORMAL_N) #define __VEC_CLASS_FP_ZERO_N (1<<2) #define __VEC_CLASS_FP_ZERO_P (1<<3) #define __VEC_CLASS_FP_ZERO (__VEC_CLASS_FP_ZERO_P | \ __VEC_CLASS_FP_ZERO_N) #define __VEC_CLASS_FP_INFINITY_N (1<<4) #define __VEC_CLASS_FP_INFINITY_P (1<<5) #define __VEC_CLASS_FP_INFINITY (__VEC_CLASS_FP_INFINITY_P | \ __VEC_CLASS_FP_INFINITY_N) #define __VEC_CLASS_FP_NAN (1<<6) #define __VEC_CLASS_FP_NOT_NORMAL (__VEC_CLASS_FP_NAN | \ __VEC_CLASS_FP_SUBNORMAL | \ __VEC_CLASS_FP_ZERO | \ __VEC_CLASS_FP_INFINITY) #define __ATTRS_o_ai __attribute__((__overloadable__, __always_inline__)) #include static __inline__ vector signed char __ATTRS_o_ai vec_perm( vector signed char __a, vector signed char __b, vector unsigned char __c); static __inline__ vector unsigned char __ATTRS_o_ai vec_perm(vector unsigned char __a, vector unsigned char __b, vector unsigned char __c); static __inline__ vector bool char __ATTRS_o_ai vec_perm(vector bool char __a, vector bool char __b, vector unsigned char __c); static __inline__ vector short __ATTRS_o_ai vec_perm(vector signed short __a, vector signed short __b, vector unsigned char __c); static __inline__ vector unsigned short __ATTRS_o_ai vec_perm(vector unsigned short __a, vector unsigned short __b, vector unsigned char __c); static __inline__ vector bool short __ATTRS_o_ai vec_perm( vector bool short __a, vector bool short __b, vector unsigned char __c); static __inline__ vector pixel __ATTRS_o_ai vec_perm(vector pixel __a, vector pixel __b, vector unsigned char __c); static __inline__ vector int __ATTRS_o_ai vec_perm(vector signed int __a, vector signed int __b, vector unsigned char __c); static __inline__ vector unsigned int __ATTRS_o_ai vec_perm( vector unsigned int __a, vector unsigned int __b, vector unsigned char __c); static __inline__ vector bool int __ATTRS_o_ai vec_perm(vector bool int __a, vector bool int __b, vector unsigned char __c); static __inline__ vector float __ATTRS_o_ai vec_perm(vector float __a, vector float __b, vector unsigned char __c); #ifdef __VSX__ static __inline__ vector long long __ATTRS_o_ai vec_perm(vector signed long long __a, vector signed long long __b, vector unsigned char __c); static __inline__ vector unsigned long long __ATTRS_o_ai vec_perm(vector unsigned long long __a, vector unsigned long long __b, vector unsigned char __c); static __inline__ vector bool long long __ATTRS_o_ai vec_perm(vector bool long long __a, vector bool long long __b, vector unsigned char __c); static __inline__ vector double __ATTRS_o_ai vec_perm(vector double __a, vector double __b, vector unsigned char __c); #endif static __inline__ vector unsigned char __ATTRS_o_ai vec_xor(vector unsigned char __a, vector unsigned char __b); /* vec_abs */ #define __builtin_altivec_abs_v16qi vec_abs #define __builtin_altivec_abs_v8hi vec_abs #define __builtin_altivec_abs_v4si vec_abs static __inline__ vector signed char __ATTRS_o_ai vec_abs(vector signed char __a) { return __builtin_altivec_vmaxsb(__a, -__a); } static __inline__ vector signed short __ATTRS_o_ai vec_abs(vector signed short __a) { return __builtin_altivec_vmaxsh(__a, -__a); } static __inline__ vector signed int __ATTRS_o_ai vec_abs(vector signed int __a) { return __builtin_altivec_vmaxsw(__a, -__a); } #ifdef __POWER8_VECTOR__ static __inline__ vector signed long long __ATTRS_o_ai vec_abs(vector signed long long __a) { return __builtin_altivec_vmaxsd(__a, -__a); } #endif static __inline__ vector float __ATTRS_o_ai vec_abs(vector float __a) { #ifdef __VSX__ return __builtin_vsx_xvabssp(__a); #else vector unsigned int __res = (vector unsigned int)__a & (vector unsigned int)(0x7FFFFFFF); return (vector float)__res; #endif } #ifdef __VSX__ static __inline__ vector double __ATTRS_o_ai vec_abs(vector double __a) { return __builtin_vsx_xvabsdp(__a); } #endif /* vec_abss */ #define __builtin_altivec_abss_v16qi vec_abss #define __builtin_altivec_abss_v8hi vec_abss #define __builtin_altivec_abss_v4si vec_abss static __inline__ vector signed char __ATTRS_o_ai vec_abss(vector signed char __a) { return __builtin_altivec_vmaxsb( __a, __builtin_altivec_vsubsbs((vector signed char)(0), __a)); } static __inline__ vector signed short __ATTRS_o_ai vec_abss(vector signed short __a) { return __builtin_altivec_vmaxsh( __a, __builtin_altivec_vsubshs((vector signed short)(0), __a)); } static __inline__ vector signed int __ATTRS_o_ai vec_abss(vector signed int __a) { return __builtin_altivec_vmaxsw( __a, __builtin_altivec_vsubsws((vector signed int)(0), __a)); } /* vec_absd */ #if defined(__POWER9_VECTOR__) static __inline__ vector unsigned char __ATTRS_o_ai vec_absd(vector unsigned char __a, vector unsigned char __b) { return __builtin_altivec_vabsdub(__a, __b); } static __inline__ vector unsigned short __ATTRS_o_ai vec_absd(vector unsigned short __a, vector unsigned short __b) { return __builtin_altivec_vabsduh(__a, __b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_absd(vector unsigned int __a, vector unsigned int __b) { return __builtin_altivec_vabsduw(__a, __b); } #endif /* End __POWER9_VECTOR__ */ /* vec_add */ static __inline__ vector signed char __ATTRS_o_ai vec_add(vector signed char __a, vector signed char __b) { return __a + __b; } static __inline__ vector signed char __ATTRS_o_ai vec_add(vector bool char __a, vector signed char __b) { return (vector signed char)__a + __b; } static __inline__ vector signed char __ATTRS_o_ai vec_add(vector signed char __a, vector bool char __b) { return __a + (vector signed char)__b; } static __inline__ vector unsigned char __ATTRS_o_ai vec_add(vector unsigned char __a, vector unsigned char __b) { return __a + __b; } static __inline__ vector unsigned char __ATTRS_o_ai vec_add(vector bool char __a, vector unsigned char __b) { return (vector unsigned char)__a + __b; } static __inline__ vector unsigned char __ATTRS_o_ai vec_add(vector unsigned char __a, vector bool char __b) { return __a + (vector unsigned char)__b; } static __inline__ vector short __ATTRS_o_ai vec_add(vector short __a, vector short __b) { return __a + __b; } static __inline__ vector short __ATTRS_o_ai vec_add(vector bool short __a, vector short __b) { return (vector short)__a + __b; } static __inline__ vector short __ATTRS_o_ai vec_add(vector short __a, vector bool short __b) { return __a + (vector short)__b; } static __inline__ vector unsigned short __ATTRS_o_ai vec_add(vector unsigned short __a, vector unsigned short __b) { return __a + __b; } static __inline__ vector unsigned short __ATTRS_o_ai vec_add(vector bool short __a, vector unsigned short __b) { return (vector unsigned short)__a + __b; } static __inline__ vector unsigned short __ATTRS_o_ai vec_add(vector unsigned short __a, vector bool short __b) { return __a + (vector unsigned short)__b; } static __inline__ vector int __ATTRS_o_ai vec_add(vector int __a, vector int __b) { return __a + __b; } static __inline__ vector int __ATTRS_o_ai vec_add(vector bool int __a, vector int __b) { return (vector int)__a + __b; } static __inline__ vector int __ATTRS_o_ai vec_add(vector int __a, vector bool int __b) { return __a + (vector int)__b; } static __inline__ vector unsigned int __ATTRS_o_ai vec_add(vector unsigned int __a, vector unsigned int __b) { return __a + __b; } static __inline__ vector unsigned int __ATTRS_o_ai vec_add(vector bool int __a, vector unsigned int __b) { return (vector unsigned int)__a + __b; } static __inline__ vector unsigned int __ATTRS_o_ai vec_add(vector unsigned int __a, vector bool int __b) { return __a + (vector unsigned int)__b; } #ifdef __POWER8_VECTOR__ static __inline__ vector signed long long __ATTRS_o_ai vec_add(vector signed long long __a, vector signed long long __b) { return __a + __b; } static __inline__ vector unsigned long long __ATTRS_o_ai vec_add(vector unsigned long long __a, vector unsigned long long __b) { return __a + __b; } #ifdef __SIZEOF_INT128__ static __inline__ vector signed __int128 __ATTRS_o_ai vec_add(vector signed __int128 __a, vector signed __int128 __b) { return __a + __b; } static __inline__ vector unsigned __int128 __ATTRS_o_ai vec_add(vector unsigned __int128 __a, vector unsigned __int128 __b) { return __a + __b; } #endif static __inline__ vector unsigned char __attribute__((__always_inline__)) vec_add_u128(vector unsigned char __a, vector unsigned char __b) { return (vector unsigned char)__builtin_altivec_vadduqm(__a, __b); } #elif defined(__VSX__) static __inline__ vector signed long long __ATTRS_o_ai vec_add(vector signed long long __a, vector signed long long __b) { #ifdef __LITTLE_ENDIAN__ // Little endian systems on CPU's prior to Power8 don't really exist // so scalarizing is fine. return __a + __b; #else vector unsigned int __res = (vector unsigned int)__a + (vector unsigned int)__b; vector unsigned int __carry = __builtin_altivec_vaddcuw( (vector unsigned int)__a, (vector unsigned int)__b); __carry = (vector unsigned int)__builtin_shufflevector( (vector unsigned char)__carry, (vector unsigned char)__carry, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0); return (vector signed long long)(__res + __carry); #endif } static __inline__ vector unsigned long long __ATTRS_o_ai vec_add(vector unsigned long long __a, vector unsigned long long __b) { return (vector unsigned long long)vec_add((vector signed long long)__a, (vector signed long long)__b); } #endif // __POWER8_VECTOR__ static __inline__ vector float __ATTRS_o_ai vec_add(vector float __a, vector float __b) { return __a + __b; } #ifdef __VSX__ static __inline__ vector double __ATTRS_o_ai vec_add(vector double __a, vector double __b) { return __a + __b; } #endif // __VSX__ /* vec_adde */ #ifdef __POWER8_VECTOR__ #ifdef __SIZEOF_INT128__ static __inline__ vector signed __int128 __ATTRS_o_ai vec_adde(vector signed __int128 __a, vector signed __int128 __b, vector signed __int128 __c) { return (vector signed __int128)__builtin_altivec_vaddeuqm( (vector unsigned __int128)__a, (vector unsigned __int128)__b, (vector unsigned __int128)__c); } static __inline__ vector unsigned __int128 __ATTRS_o_ai vec_adde(vector unsigned __int128 __a, vector unsigned __int128 __b, vector unsigned __int128 __c) { return __builtin_altivec_vaddeuqm(__a, __b, __c); } #endif static __inline__ vector unsigned char __attribute__((__always_inline__)) vec_adde_u128(vector unsigned char __a, vector unsigned char __b, vector unsigned char __c) { return (vector unsigned char)__builtin_altivec_vaddeuqm_c( (vector unsigned char)__a, (vector unsigned char)__b, (vector unsigned char)__c); } #endif static __inline__ vector signed int __ATTRS_o_ai vec_adde(vector signed int __a, vector signed int __b, vector signed int __c) { vector signed int __mask = {1, 1, 1, 1}; vector signed int __carry = __c & __mask; return vec_add(vec_add(__a, __b), __carry); } static __inline__ vector unsigned int __ATTRS_o_ai vec_adde(vector unsigned int __a, vector unsigned int __b, vector unsigned int __c) { vector unsigned int __mask = {1, 1, 1, 1}; vector unsigned int __carry = __c & __mask; return vec_add(vec_add(__a, __b), __carry); } /* vec_addec */ #ifdef __POWER8_VECTOR__ #ifdef __SIZEOF_INT128__ static __inline__ vector signed __int128 __ATTRS_o_ai vec_addec(vector signed __int128 __a, vector signed __int128 __b, vector signed __int128 __c) { return (vector signed __int128)__builtin_altivec_vaddecuq( (vector unsigned __int128)__a, (vector unsigned __int128)__b, (vector unsigned __int128)__c); } static __inline__ vector unsigned __int128 __ATTRS_o_ai vec_addec(vector unsigned __int128 __a, vector unsigned __int128 __b, vector unsigned __int128 __c) { return __builtin_altivec_vaddecuq(__a, __b, __c); } #endif static __inline__ vector unsigned char __attribute__((__always_inline__)) vec_addec_u128(vector unsigned char __a, vector unsigned char __b, vector unsigned char __c) { return (vector unsigned char)__builtin_altivec_vaddecuq_c( (vector unsigned char)__a, (vector unsigned char)__b, (vector unsigned char)__c); } #ifdef __powerpc64__ static __inline__ vector signed int __ATTRS_o_ai vec_addec(vector signed int __a, vector signed int __b, vector signed int __c) { signed int __result[4]; for (int i = 0; i < 4; i++) { unsigned int __tempa = (unsigned int) __a[i]; unsigned int __tempb = (unsigned int) __b[i]; unsigned int __tempc = (unsigned int) __c[i]; __tempc = __tempc & 0x00000001; unsigned long long __longa = (unsigned long long) __tempa; unsigned long long __longb = (unsigned long long) __tempb; unsigned long long __longc = (unsigned long long) __tempc; unsigned long long __sum = __longa + __longb + __longc; unsigned long long __res = (__sum >> 32) & 0x01; unsigned long long __tempres = (unsigned int) __res; __result[i] = (signed int) __tempres; } vector signed int ret = { __result[0], __result[1], __result[2], __result[3] }; return ret; } static __inline__ vector unsigned int __ATTRS_o_ai vec_addec(vector unsigned int __a, vector unsigned int __b, vector unsigned int __c) { unsigned int __result[4]; for (int i = 0; i < 4; i++) { unsigned int __tempc = __c[i] & 1; unsigned long long __longa = (unsigned long long) __a[i]; unsigned long long __longb = (unsigned long long) __b[i]; unsigned long long __longc = (unsigned long long) __tempc; unsigned long long __sum = __longa + __longb + __longc; unsigned long long __res = (__sum >> 32) & 0x01; unsigned long long __tempres = (unsigned int) __res; __result[i] = (signed int) __tempres; } vector unsigned int ret = { __result[0], __result[1], __result[2], __result[3] }; return ret; } #endif // __powerpc64__ #endif // __POWER8_VECTOR__ /* vec_vaddubm */ #define __builtin_altivec_vaddubm vec_vaddubm static __inline__ vector signed char __ATTRS_o_ai vec_vaddubm(vector signed char __a, vector signed char __b) { return __a + __b; } static __inline__ vector signed char __ATTRS_o_ai vec_vaddubm(vector bool char __a, vector signed char __b) { return (vector signed char)__a + __b; } static __inline__ vector signed char __ATTRS_o_ai vec_vaddubm(vector signed char __a, vector bool char __b) { return __a + (vector signed char)__b; } static __inline__ vector unsigned char __ATTRS_o_ai vec_vaddubm(vector unsigned char __a, vector unsigned char __b) { return __a + __b; } static __inline__ vector unsigned char __ATTRS_o_ai vec_vaddubm(vector bool char __a, vector unsigned char __b) { return (vector unsigned char)__a + __b; } static __inline__ vector unsigned char __ATTRS_o_ai vec_vaddubm(vector unsigned char __a, vector bool char __b) { return __a + (vector unsigned char)__b; } /* vec_vadduhm */ #define __builtin_altivec_vadduhm vec_vadduhm static __inline__ vector short __ATTRS_o_ai vec_vadduhm(vector short __a, vector short __b) { return __a + __b; } static __inline__ vector short __ATTRS_o_ai vec_vadduhm(vector bool short __a, vector short __b) { return (vector short)__a + __b; } static __inline__ vector short __ATTRS_o_ai vec_vadduhm(vector short __a, vector bool short __b) { return __a + (vector short)__b; } static __inline__ vector unsigned short __ATTRS_o_ai vec_vadduhm(vector unsigned short __a, vector unsigned short __b) { return __a + __b; } static __inline__ vector unsigned short __ATTRS_o_ai vec_vadduhm(vector bool short __a, vector unsigned short __b) { return (vector unsigned short)__a + __b; } static __inline__ vector unsigned short __ATTRS_o_ai vec_vadduhm(vector unsigned short __a, vector bool short __b) { return __a + (vector unsigned short)__b; } /* vec_vadduwm */ #define __builtin_altivec_vadduwm vec_vadduwm static __inline__ vector int __ATTRS_o_ai vec_vadduwm(vector int __a, vector int __b) { return __a + __b; } static __inline__ vector int __ATTRS_o_ai vec_vadduwm(vector bool int __a, vector int __b) { return (vector int)__a + __b; } static __inline__ vector int __ATTRS_o_ai vec_vadduwm(vector int __a, vector bool int __b) { return __a + (vector int)__b; } static __inline__ vector unsigned int __ATTRS_o_ai vec_vadduwm(vector unsigned int __a, vector unsigned int __b) { return __a + __b; } static __inline__ vector unsigned int __ATTRS_o_ai vec_vadduwm(vector bool int __a, vector unsigned int __b) { return (vector unsigned int)__a + __b; } static __inline__ vector unsigned int __ATTRS_o_ai vec_vadduwm(vector unsigned int __a, vector bool int __b) { return __a + (vector unsigned int)__b; } /* vec_vaddfp */ #define __builtin_altivec_vaddfp vec_vaddfp static __inline__ vector float __attribute__((__always_inline__)) vec_vaddfp(vector float __a, vector float __b) { return __a + __b; } /* vec_addc */ static __inline__ vector signed int __ATTRS_o_ai vec_addc(vector signed int __a, vector signed int __b) { return (vector signed int)__builtin_altivec_vaddcuw((vector unsigned int)__a, (vector unsigned int)__b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_addc(vector unsigned int __a, vector unsigned int __b) { return __builtin_altivec_vaddcuw(__a, __b); } #ifdef __POWER8_VECTOR__ #ifdef __SIZEOF_INT128__ static __inline__ vector signed __int128 __ATTRS_o_ai vec_addc(vector signed __int128 __a, vector signed __int128 __b) { return (vector signed __int128)__builtin_altivec_vaddcuq( (vector unsigned __int128)__a, (vector unsigned __int128)__b); } static __inline__ vector unsigned __int128 __ATTRS_o_ai vec_addc(vector unsigned __int128 __a, vector unsigned __int128 __b) { return __builtin_altivec_vaddcuq(__a, __b); } #endif static __inline__ vector unsigned char __attribute__((__always_inline__)) vec_addc_u128(vector unsigned char __a, vector unsigned char __b) { return (vector unsigned char)__builtin_altivec_vaddcuq_c( (vector unsigned char)__a, (vector unsigned char)__b); } #endif // defined(__POWER8_VECTOR__) && defined(__powerpc64__) /* vec_vaddcuw */ static __inline__ vector unsigned int __attribute__((__always_inline__)) vec_vaddcuw(vector unsigned int __a, vector unsigned int __b) { return __builtin_altivec_vaddcuw(__a, __b); } /* vec_adds */ static __inline__ vector signed char __ATTRS_o_ai vec_adds(vector signed char __a, vector signed char __b) { return __builtin_altivec_vaddsbs(__a, __b); } static __inline__ vector signed char __ATTRS_o_ai vec_adds(vector bool char __a, vector signed char __b) { return __builtin_altivec_vaddsbs((vector signed char)__a, __b); } static __inline__ vector signed char __ATTRS_o_ai vec_adds(vector signed char __a, vector bool char __b) { return __builtin_altivec_vaddsbs(__a, (vector signed char)__b); } static __inline__ vector unsigned char __ATTRS_o_ai vec_adds(vector unsigned char __a, vector unsigned char __b) { return __builtin_altivec_vaddubs(__a, __b); } static __inline__ vector unsigned char __ATTRS_o_ai vec_adds(vector bool char __a, vector unsigned char __b) { return __builtin_altivec_vaddubs((vector unsigned char)__a, __b); } static __inline__ vector unsigned char __ATTRS_o_ai vec_adds(vector unsigned char __a, vector bool char __b) { return __builtin_altivec_vaddubs(__a, (vector unsigned char)__b); } static __inline__ vector short __ATTRS_o_ai vec_adds(vector short __a, vector short __b) { return __builtin_altivec_vaddshs(__a, __b); } static __inline__ vector short __ATTRS_o_ai vec_adds(vector bool short __a, vector short __b) { return __builtin_altivec_vaddshs((vector short)__a, __b); } static __inline__ vector short __ATTRS_o_ai vec_adds(vector short __a, vector bool short __b) { return __builtin_altivec_vaddshs(__a, (vector short)__b); } static __inline__ vector unsigned short __ATTRS_o_ai vec_adds(vector unsigned short __a, vector unsigned short __b) { return __builtin_altivec_vadduhs(__a, __b); } static __inline__ vector unsigned short __ATTRS_o_ai vec_adds(vector bool short __a, vector unsigned short __b) { return __builtin_altivec_vadduhs((vector unsigned short)__a, __b); } static __inline__ vector unsigned short __ATTRS_o_ai vec_adds(vector unsigned short __a, vector bool short __b) { return __builtin_altivec_vadduhs(__a, (vector unsigned short)__b); } static __inline__ vector int __ATTRS_o_ai vec_adds(vector int __a, vector int __b) { return __builtin_altivec_vaddsws(__a, __b); } static __inline__ vector int __ATTRS_o_ai vec_adds(vector bool int __a, vector int __b) { return __builtin_altivec_vaddsws((vector int)__a, __b); } static __inline__ vector int __ATTRS_o_ai vec_adds(vector int __a, vector bool int __b) { return __builtin_altivec_vaddsws(__a, (vector int)__b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_adds(vector unsigned int __a, vector unsigned int __b) { return __builtin_altivec_vadduws(__a, __b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_adds(vector bool int __a, vector unsigned int __b) { return __builtin_altivec_vadduws((vector unsigned int)__a, __b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_adds(vector unsigned int __a, vector bool int __b) { return __builtin_altivec_vadduws(__a, (vector unsigned int)__b); } /* vec_vaddsbs */ static __inline__ vector signed char __ATTRS_o_ai vec_vaddsbs(vector signed char __a, vector signed char __b) { return __builtin_altivec_vaddsbs(__a, __b); } static __inline__ vector signed char __ATTRS_o_ai vec_vaddsbs(vector bool char __a, vector signed char __b) { return __builtin_altivec_vaddsbs((vector signed char)__a, __b); } static __inline__ vector signed char __ATTRS_o_ai vec_vaddsbs(vector signed char __a, vector bool char __b) { return __builtin_altivec_vaddsbs(__a, (vector signed char)__b); } /* vec_vaddubs */ static __inline__ vector unsigned char __ATTRS_o_ai vec_vaddubs(vector unsigned char __a, vector unsigned char __b) { return __builtin_altivec_vaddubs(__a, __b); } static __inline__ vector unsigned char __ATTRS_o_ai vec_vaddubs(vector bool char __a, vector unsigned char __b) { return __builtin_altivec_vaddubs((vector unsigned char)__a, __b); } static __inline__ vector unsigned char __ATTRS_o_ai vec_vaddubs(vector unsigned char __a, vector bool char __b) { return __builtin_altivec_vaddubs(__a, (vector unsigned char)__b); } /* vec_vaddshs */ static __inline__ vector short __ATTRS_o_ai vec_vaddshs(vector short __a, vector short __b) { return __builtin_altivec_vaddshs(__a, __b); } static __inline__ vector short __ATTRS_o_ai vec_vaddshs(vector bool short __a, vector short __b) { return __builtin_altivec_vaddshs((vector short)__a, __b); } static __inline__ vector short __ATTRS_o_ai vec_vaddshs(vector short __a, vector bool short __b) { return __builtin_altivec_vaddshs(__a, (vector short)__b); } /* vec_vadduhs */ static __inline__ vector unsigned short __ATTRS_o_ai vec_vadduhs(vector unsigned short __a, vector unsigned short __b) { return __builtin_altivec_vadduhs(__a, __b); } static __inline__ vector unsigned short __ATTRS_o_ai vec_vadduhs(vector bool short __a, vector unsigned short __b) { return __builtin_altivec_vadduhs((vector unsigned short)__a, __b); } static __inline__ vector unsigned short __ATTRS_o_ai vec_vadduhs(vector unsigned short __a, vector bool short __b) { return __builtin_altivec_vadduhs(__a, (vector unsigned short)__b); } /* vec_vaddsws */ static __inline__ vector int __ATTRS_o_ai vec_vaddsws(vector int __a, vector int __b) { return __builtin_altivec_vaddsws(__a, __b); } static __inline__ vector int __ATTRS_o_ai vec_vaddsws(vector bool int __a, vector int __b) { return __builtin_altivec_vaddsws((vector int)__a, __b); } static __inline__ vector int __ATTRS_o_ai vec_vaddsws(vector int __a, vector bool int __b) { return __builtin_altivec_vaddsws(__a, (vector int)__b); } /* vec_vadduws */ static __inline__ vector unsigned int __ATTRS_o_ai vec_vadduws(vector unsigned int __a, vector unsigned int __b) { return __builtin_altivec_vadduws(__a, __b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_vadduws(vector bool int __a, vector unsigned int __b) { return __builtin_altivec_vadduws((vector unsigned int)__a, __b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_vadduws(vector unsigned int __a, vector bool int __b) { return __builtin_altivec_vadduws(__a, (vector unsigned int)__b); } #if defined(__POWER8_VECTOR__) && defined(__powerpc64__) && \ defined(__SIZEOF_INT128__) /* vec_vadduqm */ static __inline__ vector signed __int128 __ATTRS_o_ai vec_vadduqm(vector signed __int128 __a, vector signed __int128 __b) { return __a + __b; } static __inline__ vector unsigned __int128 __ATTRS_o_ai vec_vadduqm(vector unsigned __int128 __a, vector unsigned __int128 __b) { return __a + __b; } /* vec_vaddeuqm */ static __inline__ vector signed __int128 __ATTRS_o_ai vec_vaddeuqm(vector signed __int128 __a, vector signed __int128 __b, vector signed __int128 __c) { return (vector signed __int128)__builtin_altivec_vaddeuqm( (vector unsigned __int128)__a, (vector unsigned __int128)__b, (vector unsigned __int128)__c); } static __inline__ vector unsigned __int128 __ATTRS_o_ai vec_vaddeuqm(vector unsigned __int128 __a, vector unsigned __int128 __b, vector unsigned __int128 __c) { return __builtin_altivec_vaddeuqm(__a, __b, __c); } /* vec_vaddcuq */ static __inline__ vector signed __int128 __ATTRS_o_ai vec_vaddcuq(vector signed __int128 __a, vector signed __int128 __b) { return (vector signed __int128)__builtin_altivec_vaddcuq( (vector unsigned __int128)__a, (vector unsigned __int128)__b); } static __inline__ vector unsigned __int128 __ATTRS_o_ai vec_vaddcuq(vector unsigned __int128 __a, vector unsigned __int128 __b) { return __builtin_altivec_vaddcuq(__a, __b); } /* vec_vaddecuq */ static __inline__ vector signed __int128 __ATTRS_o_ai vec_vaddecuq(vector signed __int128 __a, vector signed __int128 __b, vector signed __int128 __c) { return (vector signed __int128)__builtin_altivec_vaddecuq( (vector unsigned __int128)__a, (vector unsigned __int128)__b, (vector unsigned __int128)__c); } static __inline__ vector unsigned __int128 __ATTRS_o_ai vec_vaddecuq(vector unsigned __int128 __a, vector unsigned __int128 __b, vector unsigned __int128 __c) { return __builtin_altivec_vaddecuq(__a, __b, __c); } #endif // defined(__POWER8_VECTOR__) && defined(__powerpc64__) /* vec_and */ #define __builtin_altivec_vand vec_and static __inline__ vector signed char __ATTRS_o_ai vec_and(vector signed char __a, vector signed char __b) { return __a & __b; } static __inline__ vector signed char __ATTRS_o_ai vec_and(vector bool char __a, vector signed char __b) { return (vector signed char)__a & __b; } static __inline__ vector signed char __ATTRS_o_ai vec_and(vector signed char __a, vector bool char __b) { return __a & (vector signed char)__b; } static __inline__ vector unsigned char __ATTRS_o_ai vec_and(vector unsigned char __a, vector unsigned char __b) { return __a & __b; } static __inline__ vector unsigned char __ATTRS_o_ai vec_and(vector bool char __a, vector unsigned char __b) { return (vector unsigned char)__a & __b; } static __inline__ vector unsigned char __ATTRS_o_ai vec_and(vector unsigned char __a, vector bool char __b) { return __a & (vector unsigned char)__b; } static __inline__ vector bool char __ATTRS_o_ai vec_and(vector bool char __a, vector bool char __b) { return __a & __b; } static __inline__ vector short __ATTRS_o_ai vec_and(vector short __a, vector short __b) { return __a & __b; } static __inline__ vector short __ATTRS_o_ai vec_and(vector bool short __a, vector short __b) { return (vector short)__a & __b; } static __inline__ vector short __ATTRS_o_ai vec_and(vector short __a, vector bool short __b) { return __a & (vector short)__b; } static __inline__ vector unsigned short __ATTRS_o_ai vec_and(vector unsigned short __a, vector unsigned short __b) { return __a & __b; } static __inline__ vector unsigned short __ATTRS_o_ai vec_and(vector bool short __a, vector unsigned short __b) { return (vector unsigned short)__a & __b; } static __inline__ vector unsigned short __ATTRS_o_ai vec_and(vector unsigned short __a, vector bool short __b) { return __a & (vector unsigned short)__b; } static __inline__ vector bool short __ATTRS_o_ai vec_and(vector bool short __a, vector bool short __b) { return __a & __b; } static __inline__ vector int __ATTRS_o_ai vec_and(vector int __a, vector int __b) { return __a & __b; } static __inline__ vector int __ATTRS_o_ai vec_and(vector bool int __a, vector int __b) { return (vector int)__a & __b; } static __inline__ vector int __ATTRS_o_ai vec_and(vector int __a, vector bool int __b) { return __a & (vector int)__b; } static __inline__ vector unsigned int __ATTRS_o_ai vec_and(vector unsigned int __a, vector unsigned int __b) { return __a & __b; } static __inline__ vector unsigned int __ATTRS_o_ai vec_and(vector bool int __a, vector unsigned int __b) { return (vector unsigned int)__a & __b; } static __inline__ vector unsigned int __ATTRS_o_ai vec_and(vector unsigned int __a, vector bool int __b) { return __a & (vector unsigned int)__b; } static __inline__ vector bool int __ATTRS_o_ai vec_and(vector bool int __a, vector bool int __b) { return __a & __b; } static __inline__ vector float __ATTRS_o_ai vec_and(vector float __a, vector float __b) { vector unsigned int __res = (vector unsigned int)__a & (vector unsigned int)__b; return (vector float)__res; } static __inline__ vector float __ATTRS_o_ai vec_and(vector bool int __a, vector float __b) { vector unsigned int __res = (vector unsigned int)__a & (vector unsigned int)__b; return (vector float)__res; } static __inline__ vector float __ATTRS_o_ai vec_and(vector float __a, vector bool int __b) { vector unsigned int __res = (vector unsigned int)__a & (vector unsigned int)__b; return (vector float)__res; } #ifdef __VSX__ static __inline__ vector double __ATTRS_o_ai vec_and(vector bool long long __a, vector double __b) { vector unsigned long long __res = (vector unsigned long long)__a & (vector unsigned long long)__b; return (vector double)__res; } static __inline__ vector double __ATTRS_o_ai vec_and(vector double __a, vector bool long long __b) { vector unsigned long long __res = (vector unsigned long long)__a & (vector unsigned long long)__b; return (vector double)__res; } static __inline__ vector double __ATTRS_o_ai vec_and(vector double __a, vector double __b) { vector unsigned long long __res = (vector unsigned long long)__a & (vector unsigned long long)__b; return (vector double)__res; } static __inline__ vector signed long long __ATTRS_o_ai vec_and(vector signed long long __a, vector signed long long __b) { return __a & __b; } static __inline__ vector signed long long __ATTRS_o_ai vec_and(vector bool long long __a, vector signed long long __b) { return (vector signed long long)__a & __b; } static __inline__ vector signed long long __ATTRS_o_ai vec_and(vector signed long long __a, vector bool long long __b) { return __a & (vector signed long long)__b; } static __inline__ vector unsigned long long __ATTRS_o_ai vec_and(vector unsigned long long __a, vector unsigned long long __b) { return __a & __b; } static __inline__ vector unsigned long long __ATTRS_o_ai vec_and(vector bool long long __a, vector unsigned long long __b) { return (vector unsigned long long)__a & __b; } static __inline__ vector unsigned long long __ATTRS_o_ai vec_and(vector unsigned long long __a, vector bool long long __b) { return __a & (vector unsigned long long)__b; } static __inline__ vector bool long long __ATTRS_o_ai vec_and(vector bool long long __a, vector bool long long __b) { return __a & __b; } #endif /* vec_vand */ static __inline__ vector signed char __ATTRS_o_ai vec_vand(vector signed char __a, vector signed char __b) { return __a & __b; } static __inline__ vector signed char __ATTRS_o_ai vec_vand(vector bool char __a, vector signed char __b) { return (vector signed char)__a & __b; } static __inline__ vector signed char __ATTRS_o_ai vec_vand(vector signed char __a, vector bool char __b) { return __a & (vector signed char)__b; } static __inline__ vector unsigned char __ATTRS_o_ai vec_vand(vector unsigned char __a, vector unsigned char __b) { return __a & __b; } static __inline__ vector unsigned char __ATTRS_o_ai vec_vand(vector bool char __a, vector unsigned char __b) { return (vector unsigned char)__a & __b; } static __inline__ vector unsigned char __ATTRS_o_ai vec_vand(vector unsigned char __a, vector bool char __b) { return __a & (vector unsigned char)__b; } static __inline__ vector bool char __ATTRS_o_ai vec_vand(vector bool char __a, vector bool char __b) { return __a & __b; } static __inline__ vector short __ATTRS_o_ai vec_vand(vector short __a, vector short __b) { return __a & __b; } static __inline__ vector short __ATTRS_o_ai vec_vand(vector bool short __a, vector short __b) { return (vector short)__a & __b; } static __inline__ vector short __ATTRS_o_ai vec_vand(vector short __a, vector bool short __b) { return __a & (vector short)__b; } static __inline__ vector unsigned short __ATTRS_o_ai vec_vand(vector unsigned short __a, vector unsigned short __b) { return __a & __b; } static __inline__ vector unsigned short __ATTRS_o_ai vec_vand(vector bool short __a, vector unsigned short __b) { return (vector unsigned short)__a & __b; } static __inline__ vector unsigned short __ATTRS_o_ai vec_vand(vector unsigned short __a, vector bool short __b) { return __a & (vector unsigned short)__b; } static __inline__ vector bool short __ATTRS_o_ai vec_vand(vector bool short __a, vector bool short __b) { return __a & __b; } static __inline__ vector int __ATTRS_o_ai vec_vand(vector int __a, vector int __b) { return __a & __b; } static __inline__ vector int __ATTRS_o_ai vec_vand(vector bool int __a, vector int __b) { return (vector int)__a & __b; } static __inline__ vector int __ATTRS_o_ai vec_vand(vector int __a, vector bool int __b) { return __a & (vector int)__b; } static __inline__ vector unsigned int __ATTRS_o_ai vec_vand(vector unsigned int __a, vector unsigned int __b) { return __a & __b; } static __inline__ vector unsigned int __ATTRS_o_ai vec_vand(vector bool int __a, vector unsigned int __b) { return (vector unsigned int)__a & __b; } static __inline__ vector unsigned int __ATTRS_o_ai vec_vand(vector unsigned int __a, vector bool int __b) { return __a & (vector unsigned int)__b; } static __inline__ vector bool int __ATTRS_o_ai vec_vand(vector bool int __a, vector bool int __b) { return __a & __b; } static __inline__ vector float __ATTRS_o_ai vec_vand(vector float __a, vector float __b) { vector unsigned int __res = (vector unsigned int)__a & (vector unsigned int)__b; return (vector float)__res; } static __inline__ vector float __ATTRS_o_ai vec_vand(vector bool int __a, vector float __b) { vector unsigned int __res = (vector unsigned int)__a & (vector unsigned int)__b; return (vector float)__res; } static __inline__ vector float __ATTRS_o_ai vec_vand(vector float __a, vector bool int __b) { vector unsigned int __res = (vector unsigned int)__a & (vector unsigned int)__b; return (vector float)__res; } #ifdef __VSX__ static __inline__ vector signed long long __ATTRS_o_ai vec_vand(vector signed long long __a, vector signed long long __b) { return __a & __b; } static __inline__ vector signed long long __ATTRS_o_ai vec_vand(vector bool long long __a, vector signed long long __b) { return (vector signed long long)__a & __b; } static __inline__ vector signed long long __ATTRS_o_ai vec_vand(vector signed long long __a, vector bool long long __b) { return __a & (vector signed long long)__b; } static __inline__ vector unsigned long long __ATTRS_o_ai vec_vand(vector unsigned long long __a, vector unsigned long long __b) { return __a & __b; } static __inline__ vector unsigned long long __ATTRS_o_ai vec_vand(vector bool long long __a, vector unsigned long long __b) { return (vector unsigned long long)__a & __b; } static __inline__ vector unsigned long long __ATTRS_o_ai vec_vand(vector unsigned long long __a, vector bool long long __b) { return __a & (vector unsigned long long)__b; } static __inline__ vector bool long long __ATTRS_o_ai vec_vand(vector bool long long __a, vector bool long long __b) { return __a & __b; } #endif /* vec_andc */ #define __builtin_altivec_vandc vec_andc static __inline__ vector signed char __ATTRS_o_ai vec_andc(vector signed char __a, vector signed char __b) { return __a & ~__b; } static __inline__ vector signed char __ATTRS_o_ai vec_andc(vector bool char __a, vector signed char __b) { return (vector signed char)__a & ~__b; } static __inline__ vector signed char __ATTRS_o_ai vec_andc(vector signed char __a, vector bool char __b) { return __a & ~(vector signed char)__b; } static __inline__ vector unsigned char __ATTRS_o_ai vec_andc(vector unsigned char __a, vector unsigned char __b) { return __a & ~__b; } static __inline__ vector unsigned char __ATTRS_o_ai vec_andc(vector bool char __a, vector unsigned char __b) { return (vector unsigned char)__a & ~__b; } static __inline__ vector unsigned char __ATTRS_o_ai vec_andc(vector unsigned char __a, vector bool char __b) { return __a & ~(vector unsigned char)__b; } static __inline__ vector bool char __ATTRS_o_ai vec_andc(vector bool char __a, vector bool char __b) { return __a & ~__b; } static __inline__ vector short __ATTRS_o_ai vec_andc(vector short __a, vector short __b) { return __a & ~__b; } static __inline__ vector short __ATTRS_o_ai vec_andc(vector bool short __a, vector short __b) { return (vector short)__a & ~__b; } static __inline__ vector short __ATTRS_o_ai vec_andc(vector short __a, vector bool short __b) { return __a & ~(vector short)__b; } static __inline__ vector unsigned short __ATTRS_o_ai vec_andc(vector unsigned short __a, vector unsigned short __b) { return __a & ~__b; } static __inline__ vector unsigned short __ATTRS_o_ai vec_andc(vector bool short __a, vector unsigned short __b) { return (vector unsigned short)__a & ~__b; } static __inline__ vector unsigned short __ATTRS_o_ai vec_andc(vector unsigned short __a, vector bool short __b) { return __a & ~(vector unsigned short)__b; } static __inline__ vector bool short __ATTRS_o_ai vec_andc(vector bool short __a, vector bool short __b) { return __a & ~__b; } static __inline__ vector int __ATTRS_o_ai vec_andc(vector int __a, vector int __b) { return __a & ~__b; } static __inline__ vector int __ATTRS_o_ai vec_andc(vector bool int __a, vector int __b) { return (vector int)__a & ~__b; } static __inline__ vector int __ATTRS_o_ai vec_andc(vector int __a, vector bool int __b) { return __a & ~(vector int)__b; } static __inline__ vector unsigned int __ATTRS_o_ai vec_andc(vector unsigned int __a, vector unsigned int __b) { return __a & ~__b; } static __inline__ vector unsigned int __ATTRS_o_ai vec_andc(vector bool int __a, vector unsigned int __b) { return (vector unsigned int)__a & ~__b; } static __inline__ vector unsigned int __ATTRS_o_ai vec_andc(vector unsigned int __a, vector bool int __b) { return __a & ~(vector unsigned int)__b; } static __inline__ vector bool int __ATTRS_o_ai vec_andc(vector bool int __a, vector bool int __b) { return __a & ~__b; } static __inline__ vector float __ATTRS_o_ai vec_andc(vector float __a, vector float __b) { vector unsigned int __res = (vector unsigned int)__a & ~(vector unsigned int)__b; return (vector float)__res; } static __inline__ vector float __ATTRS_o_ai vec_andc(vector bool int __a, vector float __b) { vector unsigned int __res = (vector unsigned int)__a & ~(vector unsigned int)__b; return (vector float)__res; } static __inline__ vector float __ATTRS_o_ai vec_andc(vector float __a, vector bool int __b) { vector unsigned int __res = (vector unsigned int)__a & ~(vector unsigned int)__b; return (vector float)__res; } #ifdef __VSX__ static __inline__ vector double __ATTRS_o_ai vec_andc(vector bool long long __a, vector double __b) { vector unsigned long long __res = (vector unsigned long long)__a & ~(vector unsigned long long)__b; return (vector double)__res; } static __inline__ vector double __ATTRS_o_ai vec_andc(vector double __a, vector bool long long __b) { vector unsigned long long __res = (vector unsigned long long)__a & ~(vector unsigned long long)__b; return (vector double)__res; } static __inline__ vector double __ATTRS_o_ai vec_andc(vector double __a, vector double __b) { vector unsigned long long __res = (vector unsigned long long)__a & ~(vector unsigned long long)__b; return (vector double)__res; } static __inline__ vector signed long long __ATTRS_o_ai vec_andc(vector signed long long __a, vector signed long long __b) { return __a & ~__b; } static __inline__ vector signed long long __ATTRS_o_ai vec_andc(vector bool long long __a, vector signed long long __b) { return (vector signed long long)__a & ~__b; } static __inline__ vector signed long long __ATTRS_o_ai vec_andc(vector signed long long __a, vector bool long long __b) { return __a & ~(vector signed long long)__b; } static __inline__ vector unsigned long long __ATTRS_o_ai vec_andc(vector unsigned long long __a, vector unsigned long long __b) { return __a & ~__b; } static __inline__ vector unsigned long long __ATTRS_o_ai vec_andc(vector bool long long __a, vector unsigned long long __b) { return (vector unsigned long long)__a & ~__b; } static __inline__ vector unsigned long long __ATTRS_o_ai vec_andc(vector unsigned long long __a, vector bool long long __b) { return __a & ~(vector unsigned long long)__b; } static __inline__ vector bool long long __ATTRS_o_ai vec_andc(vector bool long long __a, vector bool long long __b) { return __a & ~__b; } #endif /* vec_vandc */ static __inline__ vector signed char __ATTRS_o_ai vec_vandc(vector signed char __a, vector signed char __b) { return __a & ~__b; } static __inline__ vector signed char __ATTRS_o_ai vec_vandc(vector bool char __a, vector signed char __b) { return (vector signed char)__a & ~__b; } static __inline__ vector signed char __ATTRS_o_ai vec_vandc(vector signed char __a, vector bool char __b) { return __a & ~(vector signed char)__b; } static __inline__ vector unsigned char __ATTRS_o_ai vec_vandc(vector unsigned char __a, vector unsigned char __b) { return __a & ~__b; } static __inline__ vector unsigned char __ATTRS_o_ai vec_vandc(vector bool char __a, vector unsigned char __b) { return (vector unsigned char)__a & ~__b; } static __inline__ vector unsigned char __ATTRS_o_ai vec_vandc(vector unsigned char __a, vector bool char __b) { return __a & ~(vector unsigned char)__b; } static __inline__ vector bool char __ATTRS_o_ai vec_vandc(vector bool char __a, vector bool char __b) { return __a & ~__b; } static __inline__ vector short __ATTRS_o_ai vec_vandc(vector short __a, vector short __b) { return __a & ~__b; } static __inline__ vector short __ATTRS_o_ai vec_vandc(vector bool short __a, vector short __b) { return (vector short)__a & ~__b; } static __inline__ vector short __ATTRS_o_ai vec_vandc(vector short __a, vector bool short __b) { return __a & ~(vector short)__b; } static __inline__ vector unsigned short __ATTRS_o_ai vec_vandc(vector unsigned short __a, vector unsigned short __b) { return __a & ~__b; } static __inline__ vector unsigned short __ATTRS_o_ai vec_vandc(vector bool short __a, vector unsigned short __b) { return (vector unsigned short)__a & ~__b; } static __inline__ vector unsigned short __ATTRS_o_ai vec_vandc(vector unsigned short __a, vector bool short __b) { return __a & ~(vector unsigned short)__b; } static __inline__ vector bool short __ATTRS_o_ai vec_vandc(vector bool short __a, vector bool short __b) { return __a & ~__b; } static __inline__ vector int __ATTRS_o_ai vec_vandc(vector int __a, vector int __b) { return __a & ~__b; } static __inline__ vector int __ATTRS_o_ai vec_vandc(vector bool int __a, vector int __b) { return (vector int)__a & ~__b; } static __inline__ vector int __ATTRS_o_ai vec_vandc(vector int __a, vector bool int __b) { return __a & ~(vector int)__b; } static __inline__ vector unsigned int __ATTRS_o_ai vec_vandc(vector unsigned int __a, vector unsigned int __b) { return __a & ~__b; } static __inline__ vector unsigned int __ATTRS_o_ai vec_vandc(vector bool int __a, vector unsigned int __b) { return (vector unsigned int)__a & ~__b; } static __inline__ vector unsigned int __ATTRS_o_ai vec_vandc(vector unsigned int __a, vector bool int __b) { return __a & ~(vector unsigned int)__b; } static __inline__ vector bool int __ATTRS_o_ai vec_vandc(vector bool int __a, vector bool int __b) { return __a & ~__b; } static __inline__ vector float __ATTRS_o_ai vec_vandc(vector float __a, vector float __b) { vector unsigned int __res = (vector unsigned int)__a & ~(vector unsigned int)__b; return (vector float)__res; } static __inline__ vector float __ATTRS_o_ai vec_vandc(vector bool int __a, vector float __b) { vector unsigned int __res = (vector unsigned int)__a & ~(vector unsigned int)__b; return (vector float)__res; } static __inline__ vector float __ATTRS_o_ai vec_vandc(vector float __a, vector bool int __b) { vector unsigned int __res = (vector unsigned int)__a & ~(vector unsigned int)__b; return (vector float)__res; } #ifdef __VSX__ static __inline__ vector signed long long __ATTRS_o_ai vec_vandc(vector signed long long __a, vector signed long long __b) { return __a & ~__b; } static __inline__ vector signed long long __ATTRS_o_ai vec_vandc(vector bool long long __a, vector signed long long __b) { return (vector signed long long)__a & ~__b; } static __inline__ vector signed long long __ATTRS_o_ai vec_vandc(vector signed long long __a, vector bool long long __b) { return __a & ~(vector signed long long)__b; } static __inline__ vector unsigned long long __ATTRS_o_ai vec_vandc(vector unsigned long long __a, vector unsigned long long __b) { return __a & ~__b; } static __inline__ vector unsigned long long __ATTRS_o_ai vec_vandc(vector bool long long __a, vector unsigned long long __b) { return (vector unsigned long long)__a & ~__b; } static __inline__ vector unsigned long long __ATTRS_o_ai vec_vandc(vector unsigned long long __a, vector bool long long __b) { return __a & ~(vector unsigned long long)__b; } static __inline__ vector bool long long __ATTRS_o_ai vec_vandc(vector bool long long __a, vector bool long long __b) { return __a & ~__b; } #endif /* vec_avg */ static __inline__ vector signed char __ATTRS_o_ai vec_avg(vector signed char __a, vector signed char __b) { return __builtin_altivec_vavgsb(__a, __b); } static __inline__ vector unsigned char __ATTRS_o_ai vec_avg(vector unsigned char __a, vector unsigned char __b) { return __builtin_altivec_vavgub(__a, __b); } static __inline__ vector short __ATTRS_o_ai vec_avg(vector short __a, vector short __b) { return __builtin_altivec_vavgsh(__a, __b); } static __inline__ vector unsigned short __ATTRS_o_ai vec_avg(vector unsigned short __a, vector unsigned short __b) { return __builtin_altivec_vavguh(__a, __b); } static __inline__ vector int __ATTRS_o_ai vec_avg(vector int __a, vector int __b) { return __builtin_altivec_vavgsw(__a, __b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_avg(vector unsigned int __a, vector unsigned int __b) { return __builtin_altivec_vavguw(__a, __b); } /* vec_vavgsb */ static __inline__ vector signed char __attribute__((__always_inline__)) vec_vavgsb(vector signed char __a, vector signed char __b) { return __builtin_altivec_vavgsb(__a, __b); } /* vec_vavgub */ static __inline__ vector unsigned char __attribute__((__always_inline__)) vec_vavgub(vector unsigned char __a, vector unsigned char __b) { return __builtin_altivec_vavgub(__a, __b); } /* vec_vavgsh */ static __inline__ vector short __attribute__((__always_inline__)) vec_vavgsh(vector short __a, vector short __b) { return __builtin_altivec_vavgsh(__a, __b); } /* vec_vavguh */ static __inline__ vector unsigned short __attribute__((__always_inline__)) vec_vavguh(vector unsigned short __a, vector unsigned short __b) { return __builtin_altivec_vavguh(__a, __b); } /* vec_vavgsw */ static __inline__ vector int __attribute__((__always_inline__)) vec_vavgsw(vector int __a, vector int __b) { return __builtin_altivec_vavgsw(__a, __b); } /* vec_vavguw */ static __inline__ vector unsigned int __attribute__((__always_inline__)) vec_vavguw(vector unsigned int __a, vector unsigned int __b) { return __builtin_altivec_vavguw(__a, __b); } /* vec_ceil */ static __inline__ vector float __ATTRS_o_ai vec_ceil(vector float __a) { #ifdef __VSX__ return __builtin_vsx_xvrspip(__a); #else return __builtin_altivec_vrfip(__a); #endif } #ifdef __VSX__ static __inline__ vector double __ATTRS_o_ai vec_ceil(vector double __a) { return __builtin_vsx_xvrdpip(__a); } #endif /* vec_roundp */ static __inline__ vector float __ATTRS_o_ai vec_roundp(vector float __a) { return vec_ceil(__a); } #ifdef __VSX__ static __inline__ vector double __ATTRS_o_ai vec_roundp(vector double __a) { return vec_ceil(__a); } #endif /* vec_vrfip */ static __inline__ vector float __attribute__((__always_inline__)) vec_vrfip(vector float __a) { return __builtin_altivec_vrfip(__a); } /* vec_cmpb */ static __inline__ vector int __attribute__((__always_inline__)) vec_cmpb(vector float __a, vector float __b) { return __builtin_altivec_vcmpbfp(__a, __b); } /* vec_vcmpbfp */ static __inline__ vector int __attribute__((__always_inline__)) vec_vcmpbfp(vector float __a, vector float __b) { return __builtin_altivec_vcmpbfp(__a, __b); } /* vec_cmpeq */ static __inline__ vector bool char __ATTRS_o_ai vec_cmpeq(vector signed char __a, vector signed char __b) { return (vector bool char)__builtin_altivec_vcmpequb((vector char)__a, (vector char)__b); } static __inline__ vector bool char __ATTRS_o_ai vec_cmpeq(vector unsigned char __a, vector unsigned char __b) { return (vector bool char)__builtin_altivec_vcmpequb((vector char)__a, (vector char)__b); } static __inline__ vector bool char __ATTRS_o_ai vec_cmpeq(vector bool char __a, vector bool char __b) { return (vector bool char)__builtin_altivec_vcmpequb((vector char)__a, (vector char)__b); } static __inline__ vector bool short __ATTRS_o_ai vec_cmpeq(vector short __a, vector short __b) { return (vector bool short)__builtin_altivec_vcmpequh(__a, __b); } static __inline__ vector bool short __ATTRS_o_ai vec_cmpeq(vector unsigned short __a, vector unsigned short __b) { return (vector bool short)__builtin_altivec_vcmpequh((vector short)__a, (vector short)__b); } static __inline__ vector bool short __ATTRS_o_ai vec_cmpeq(vector bool short __a, vector bool short __b) { return (vector bool short)__builtin_altivec_vcmpequh((vector short)__a, (vector short)__b); } static __inline__ vector bool int __ATTRS_o_ai vec_cmpeq(vector int __a, vector int __b) { return (vector bool int)__builtin_altivec_vcmpequw(__a, __b); } static __inline__ vector bool int __ATTRS_o_ai vec_cmpeq(vector unsigned int __a, vector unsigned int __b) { return (vector bool int)__builtin_altivec_vcmpequw((vector int)__a, (vector int)__b); } static __inline__ vector bool int __ATTRS_o_ai vec_cmpeq(vector bool int __a, vector bool int __b) { return (vector bool int)__builtin_altivec_vcmpequw((vector int)__a, (vector int)__b); } #ifdef __POWER8_VECTOR__ static __inline__ vector bool long long __ATTRS_o_ai vec_cmpeq(vector signed long long __a, vector signed long long __b) { return (vector bool long long)__builtin_altivec_vcmpequd(__a, __b); } static __inline__ vector bool long long __ATTRS_o_ai vec_cmpeq(vector unsigned long long __a, vector unsigned long long __b) { return (vector bool long long)__builtin_altivec_vcmpequd( (vector long long)__a, (vector long long)__b); } static __inline__ vector bool long long __ATTRS_o_ai vec_cmpeq(vector bool long long __a, vector bool long long __b) { return (vector bool long long)__builtin_altivec_vcmpequd( (vector long long)__a, (vector long long)__b); } #elif defined(__VSX__) static __inline__ vector bool long long __ATTRS_o_ai vec_cmpeq(vector signed long long __a, vector signed long long __b) { vector bool int __wordcmp = vec_cmpeq((vector signed int)__a, (vector signed int)__b); #ifdef __LITTLE_ENDIAN__ __wordcmp &= __builtin_shufflevector(__wordcmp, __wordcmp, 3, 0, 1, 2); return (vector bool long long)__builtin_shufflevector(__wordcmp, __wordcmp, 1, 1, 3, 3); #else __wordcmp &= __builtin_shufflevector(__wordcmp, __wordcmp, 1, 2, 3, 0); return (vector bool long long)__builtin_shufflevector(__wordcmp, __wordcmp, 0, 0, 2, 2); #endif } static __inline__ vector bool long long __ATTRS_o_ai vec_cmpeq(vector unsigned long long __a, vector unsigned long long __b) { return vec_cmpeq((vector signed long long)__a, (vector signed long long)__b); } static __inline__ vector bool long long __ATTRS_o_ai vec_cmpeq(vector bool long long __a, vector bool long long __b) { return vec_cmpeq((vector signed long long)__a, (vector signed long long)__b); } #endif static __inline__ vector bool int __ATTRS_o_ai vec_cmpeq(vector float __a, vector float __b) { #ifdef __VSX__ return (vector bool int)__builtin_vsx_xvcmpeqsp(__a, __b); #else return (vector bool int)__builtin_altivec_vcmpeqfp(__a, __b); #endif } #ifdef __VSX__ static __inline__ vector bool long long __ATTRS_o_ai vec_cmpeq(vector double __a, vector double __b) { return (vector bool long long)__builtin_vsx_xvcmpeqdp(__a, __b); } #endif #if defined(__POWER10_VECTOR__) && defined(__SIZEOF_INT128__) static __inline__ vector bool __int128 __ATTRS_o_ai vec_cmpeq(vector signed __int128 __a, vector signed __int128 __b) { return (vector bool __int128)__builtin_altivec_vcmpequq( (vector unsigned __int128)__a, (vector unsigned __int128)__b); } static __inline__ vector bool __int128 __ATTRS_o_ai vec_cmpeq(vector unsigned __int128 __a, vector unsigned __int128 __b) { return (vector bool __int128)__builtin_altivec_vcmpequq( (vector unsigned __int128)__a, (vector unsigned __int128)__b); } static __inline__ vector bool __int128 __ATTRS_o_ai vec_cmpeq(vector bool __int128 __a, vector bool __int128 __b) { return (vector bool __int128)__builtin_altivec_vcmpequq( (vector unsigned __int128)__a, (vector unsigned __int128)__b); } #endif #ifdef __POWER9_VECTOR__ /* vec_cmpne */ static __inline__ vector bool char __ATTRS_o_ai vec_cmpne(vector bool char __a, vector bool char __b) { return (vector bool char)__builtin_altivec_vcmpneb((vector char)__a, (vector char)__b); } static __inline__ vector bool char __ATTRS_o_ai vec_cmpne(vector signed char __a, vector signed char __b) { return (vector bool char)__builtin_altivec_vcmpneb((vector char)__a, (vector char)__b); } static __inline__ vector bool char __ATTRS_o_ai vec_cmpne(vector unsigned char __a, vector unsigned char __b) { return (vector bool char)__builtin_altivec_vcmpneb((vector char)__a, (vector char)__b); } static __inline__ vector bool short __ATTRS_o_ai vec_cmpne(vector bool short __a, vector bool short __b) { return (vector bool short)__builtin_altivec_vcmpneh((vector short)__a, (vector short)__b); } static __inline__ vector bool short __ATTRS_o_ai vec_cmpne(vector signed short __a, vector signed short __b) { return (vector bool short)__builtin_altivec_vcmpneh((vector short)__a, (vector short)__b); } static __inline__ vector bool short __ATTRS_o_ai vec_cmpne(vector unsigned short __a, vector unsigned short __b) { return (vector bool short)__builtin_altivec_vcmpneh((vector short)__a, (vector short)__b); } static __inline__ vector bool int __ATTRS_o_ai vec_cmpne(vector bool int __a, vector bool int __b) { return (vector bool int)__builtin_altivec_vcmpnew((vector int)__a, (vector int)__b); } static __inline__ vector bool int __ATTRS_o_ai vec_cmpne(vector signed int __a, vector signed int __b) { return (vector bool int)__builtin_altivec_vcmpnew((vector int)__a, (vector int)__b); } static __inline__ vector bool int __ATTRS_o_ai vec_cmpne(vector unsigned int __a, vector unsigned int __b) { return (vector bool int)__builtin_altivec_vcmpnew((vector int)__a, (vector int)__b); } static __inline__ vector bool int __ATTRS_o_ai vec_cmpne(vector float __a, vector float __b) { return (vector bool int)__builtin_altivec_vcmpnew((vector int)__a, (vector int)__b); } #if defined(__POWER10_VECTOR__) && defined(__SIZEOF_INT128__) static __inline__ vector bool __int128 __ATTRS_o_ai vec_cmpne(vector unsigned __int128 __a, vector unsigned __int128 __b) { return (vector bool __int128)~(__builtin_altivec_vcmpequq( (vector unsigned __int128)__a, (vector unsigned __int128)__b)); } static __inline__ vector bool __int128 __ATTRS_o_ai vec_cmpne(vector signed __int128 __a, vector signed __int128 __b) { return (vector bool __int128)~(__builtin_altivec_vcmpequq( (vector unsigned __int128)__a, (vector unsigned __int128)__b)); } static __inline__ vector bool __int128 __ATTRS_o_ai vec_cmpne(vector bool __int128 __a, vector bool __int128 __b) { return (vector bool __int128)~(__builtin_altivec_vcmpequq( (vector unsigned __int128)__a, (vector unsigned __int128)__b)); } #endif /* vec_cmpnez */ static __inline__ vector bool char __ATTRS_o_ai vec_cmpnez(vector signed char __a, vector signed char __b) { return (vector bool char)__builtin_altivec_vcmpnezb((vector char)__a, (vector char)__b); } static __inline__ vector bool char __ATTRS_o_ai vec_cmpnez(vector unsigned char __a, vector unsigned char __b) { return (vector bool char)__builtin_altivec_vcmpnezb((vector char)__a, (vector char)__b); } static __inline__ vector bool short __ATTRS_o_ai vec_cmpnez(vector signed short __a, vector signed short __b) { return (vector bool short)__builtin_altivec_vcmpnezh((vector short)__a, (vector short)__b); } static __inline__ vector bool short __ATTRS_o_ai vec_cmpnez(vector unsigned short __a, vector unsigned short __b) { return (vector bool short)__builtin_altivec_vcmpnezh((vector short)__a, (vector short)__b); } static __inline__ vector bool int __ATTRS_o_ai vec_cmpnez(vector signed int __a, vector signed int __b) { return (vector bool int)__builtin_altivec_vcmpnezw((vector int)__a, (vector int)__b); } static __inline__ vector bool int __ATTRS_o_ai vec_cmpnez(vector unsigned int __a, vector unsigned int __b) { return (vector bool int)__builtin_altivec_vcmpnezw((vector int)__a, (vector int)__b); } static __inline__ signed int __ATTRS_o_ai vec_cntlz_lsbb(vector signed char __a) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vctzlsbb((vector unsigned char)__a); #else return __builtin_altivec_vclzlsbb((vector unsigned char)__a); #endif } static __inline__ signed int __ATTRS_o_ai vec_cntlz_lsbb(vector unsigned char __a) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vctzlsbb((vector unsigned char)__a); #else return __builtin_altivec_vclzlsbb(__a); #endif } static __inline__ signed int __ATTRS_o_ai vec_cnttz_lsbb(vector signed char __a) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vclzlsbb((vector unsigned char)__a); #else return __builtin_altivec_vctzlsbb((vector unsigned char)__a); #endif } static __inline__ signed int __ATTRS_o_ai vec_cnttz_lsbb(vector unsigned char __a) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vclzlsbb(__a); #else return __builtin_altivec_vctzlsbb(__a); #endif } static __inline__ vector unsigned int __ATTRS_o_ai vec_parity_lsbb(vector unsigned int __a) { return __builtin_altivec_vprtybw(__a); } static __inline__ vector unsigned int __ATTRS_o_ai vec_parity_lsbb(vector signed int __a) { return __builtin_altivec_vprtybw((vector unsigned int)__a); } #ifdef __SIZEOF_INT128__ static __inline__ vector unsigned __int128 __ATTRS_o_ai vec_parity_lsbb(vector unsigned __int128 __a) { return __builtin_altivec_vprtybq(__a); } static __inline__ vector unsigned __int128 __ATTRS_o_ai vec_parity_lsbb(vector signed __int128 __a) { return __builtin_altivec_vprtybq((vector unsigned __int128)__a); } #endif static __inline__ vector unsigned long long __ATTRS_o_ai vec_parity_lsbb(vector unsigned long long __a) { return __builtin_altivec_vprtybd(__a); } static __inline__ vector unsigned long long __ATTRS_o_ai vec_parity_lsbb(vector signed long long __a) { return __builtin_altivec_vprtybd((vector unsigned long long)__a); } #else /* vec_cmpne */ static __inline__ vector bool char __ATTRS_o_ai vec_cmpne(vector bool char __a, vector bool char __b) { return ~(vec_cmpeq(__a, __b)); } static __inline__ vector bool char __ATTRS_o_ai vec_cmpne(vector signed char __a, vector signed char __b) { return ~(vec_cmpeq(__a, __b)); } static __inline__ vector bool char __ATTRS_o_ai vec_cmpne(vector unsigned char __a, vector unsigned char __b) { return ~(vec_cmpeq(__a, __b)); } static __inline__ vector bool short __ATTRS_o_ai vec_cmpne(vector bool short __a, vector bool short __b) { return ~(vec_cmpeq(__a, __b)); } static __inline__ vector bool short __ATTRS_o_ai vec_cmpne(vector signed short __a, vector signed short __b) { return ~(vec_cmpeq(__a, __b)); } static __inline__ vector bool short __ATTRS_o_ai vec_cmpne(vector unsigned short __a, vector unsigned short __b) { return ~(vec_cmpeq(__a, __b)); } static __inline__ vector bool int __ATTRS_o_ai vec_cmpne(vector bool int __a, vector bool int __b) { return ~(vec_cmpeq(__a, __b)); } static __inline__ vector bool int __ATTRS_o_ai vec_cmpne(vector signed int __a, vector signed int __b) { return ~(vec_cmpeq(__a, __b)); } static __inline__ vector bool int __ATTRS_o_ai vec_cmpne(vector unsigned int __a, vector unsigned int __b) { return ~(vec_cmpeq(__a, __b)); } static __inline__ vector bool int __ATTRS_o_ai vec_cmpne(vector float __a, vector float __b) { return ~(vec_cmpeq(__a, __b)); } #endif #ifdef __POWER8_VECTOR__ static __inline__ vector bool long long __ATTRS_o_ai vec_cmpne(vector bool long long __a, vector bool long long __b) { return (vector bool long long) ~(__builtin_altivec_vcmpequd((vector long long)__a, (vector long long)__b)); } static __inline__ vector bool long long __ATTRS_o_ai vec_cmpne(vector signed long long __a, vector signed long long __b) { return (vector bool long long) ~(__builtin_altivec_vcmpequd((vector long long)__a, (vector long long)__b)); } static __inline__ vector bool long long __ATTRS_o_ai vec_cmpne(vector unsigned long long __a, vector unsigned long long __b) { return (vector bool long long) ~(__builtin_altivec_vcmpequd((vector long long)__a, (vector long long)__b)); } #elif defined(__VSX__) static __inline__ vector bool long long __ATTRS_o_ai vec_cmpne(vector bool long long __a, vector bool long long __b) { return (vector bool long long)~( vec_cmpeq((vector signed long long)__a, (vector signed long long)__b)); } static __inline__ vector bool long long __ATTRS_o_ai vec_cmpne(vector signed long long __a, vector signed long long __b) { return (vector bool long long)~( vec_cmpeq((vector signed long long)__a, (vector signed long long)__b)); } static __inline__ vector bool long long __ATTRS_o_ai vec_cmpne(vector unsigned long long __a, vector unsigned long long __b) { return (vector bool long long)~( vec_cmpeq((vector signed long long)__a, (vector signed long long)__b)); } #endif #ifdef __VSX__ static __inline__ vector bool long long __ATTRS_o_ai vec_cmpne(vector double __a, vector double __b) { return (vector bool long long) ~(__builtin_altivec_vcmpequd((vector long long)__a, (vector long long)__b)); } #endif /* vec_cmpgt */ static __inline__ vector bool char __ATTRS_o_ai vec_cmpgt(vector signed char __a, vector signed char __b) { return (vector bool char)__builtin_altivec_vcmpgtsb(__a, __b); } static __inline__ vector bool char __ATTRS_o_ai vec_cmpgt(vector unsigned char __a, vector unsigned char __b) { return (vector bool char)__builtin_altivec_vcmpgtub(__a, __b); } static __inline__ vector bool short __ATTRS_o_ai vec_cmpgt(vector short __a, vector short __b) { return (vector bool short)__builtin_altivec_vcmpgtsh(__a, __b); } static __inline__ vector bool short __ATTRS_o_ai vec_cmpgt(vector unsigned short __a, vector unsigned short __b) { return (vector bool short)__builtin_altivec_vcmpgtuh(__a, __b); } static __inline__ vector bool int __ATTRS_o_ai vec_cmpgt(vector int __a, vector int __b) { return (vector bool int)__builtin_altivec_vcmpgtsw(__a, __b); } static __inline__ vector bool int __ATTRS_o_ai vec_cmpgt(vector unsigned int __a, vector unsigned int __b) { return (vector bool int)__builtin_altivec_vcmpgtuw(__a, __b); } #ifdef __POWER8_VECTOR__ static __inline__ vector bool long long __ATTRS_o_ai vec_cmpgt(vector signed long long __a, vector signed long long __b) { return (vector bool long long)__builtin_altivec_vcmpgtsd(__a, __b); } static __inline__ vector bool long long __ATTRS_o_ai vec_cmpgt(vector unsigned long long __a, vector unsigned long long __b) { return (vector bool long long)__builtin_altivec_vcmpgtud(__a, __b); } #elif defined(__VSX__) static __inline__ vector bool long long __ATTRS_o_ai vec_cmpgt(vector signed long long __a, vector signed long long __b) { vector signed int __sgtw = (vector signed int)vec_cmpgt( (vector signed int)__a, (vector signed int)__b); vector unsigned int __ugtw = (vector unsigned int)vec_cmpgt( (vector unsigned int)__a, (vector unsigned int)__b); vector unsigned int __eqw = (vector unsigned int)vec_cmpeq( (vector signed int)__a, (vector signed int)__b); #ifdef __LITTLE_ENDIAN__ __ugtw = __builtin_shufflevector(__ugtw, __ugtw, 3, 0, 1, 2) & __eqw; __sgtw |= (vector signed int)__ugtw; return (vector bool long long)__builtin_shufflevector(__sgtw, __sgtw, 1, 1, 3, 3); #else __ugtw = __builtin_shufflevector(__ugtw, __ugtw, 1, 2, 3, 0) & __eqw; __sgtw |= (vector signed int)__ugtw; return (vector bool long long)__builtin_shufflevector(__sgtw, __sgtw, 0, 0, 2, 2); #endif } static __inline__ vector bool long long __ATTRS_o_ai vec_cmpgt(vector unsigned long long __a, vector unsigned long long __b) { vector unsigned int __ugtw = (vector unsigned int)vec_cmpgt( (vector unsigned int)__a, (vector unsigned int)__b); vector unsigned int __eqw = (vector unsigned int)vec_cmpeq( (vector signed int)__a, (vector signed int)__b); #ifdef __LITTLE_ENDIAN__ __eqw = __builtin_shufflevector(__ugtw, __ugtw, 3, 0, 1, 2) & __eqw; __ugtw |= __eqw; return (vector bool long long)__builtin_shufflevector(__ugtw, __ugtw, 1, 1, 3, 3); #else __eqw = __builtin_shufflevector(__ugtw, __ugtw, 1, 2, 3, 0) & __eqw; __ugtw |= __eqw; return (vector bool long long)__builtin_shufflevector(__ugtw, __ugtw, 0, 0, 2, 2); #endif } #endif static __inline__ vector bool int __ATTRS_o_ai vec_cmpgt(vector float __a, vector float __b) { #ifdef __VSX__ return (vector bool int)__builtin_vsx_xvcmpgtsp(__a, __b); #else return (vector bool int)__builtin_altivec_vcmpgtfp(__a, __b); #endif } #ifdef __VSX__ static __inline__ vector bool long long __ATTRS_o_ai vec_cmpgt(vector double __a, vector double __b) { return (vector bool long long)__builtin_vsx_xvcmpgtdp(__a, __b); } #endif #if defined(__POWER10_VECTOR__) && defined(__SIZEOF_INT128__) static __inline__ vector bool __int128 __ATTRS_o_ai vec_cmpgt(vector signed __int128 __a, vector signed __int128 __b) { return (vector bool __int128)__builtin_altivec_vcmpgtsq(__a, __b); } static __inline__ vector bool __int128 __ATTRS_o_ai vec_cmpgt(vector unsigned __int128 __a, vector unsigned __int128 __b) { return (vector bool __int128)__builtin_altivec_vcmpgtuq(__a, __b); } #endif /* vec_cmpge */ static __inline__ vector bool char __ATTRS_o_ai vec_cmpge(vector signed char __a, vector signed char __b) { return ~(vec_cmpgt(__b, __a)); } static __inline__ vector bool char __ATTRS_o_ai vec_cmpge(vector unsigned char __a, vector unsigned char __b) { return ~(vec_cmpgt(__b, __a)); } static __inline__ vector bool short __ATTRS_o_ai vec_cmpge(vector signed short __a, vector signed short __b) { return ~(vec_cmpgt(__b, __a)); } static __inline__ vector bool short __ATTRS_o_ai vec_cmpge(vector unsigned short __a, vector unsigned short __b) { return ~(vec_cmpgt(__b, __a)); } static __inline__ vector bool int __ATTRS_o_ai vec_cmpge(vector signed int __a, vector signed int __b) { return ~(vec_cmpgt(__b, __a)); } static __inline__ vector bool int __ATTRS_o_ai vec_cmpge(vector unsigned int __a, vector unsigned int __b) { return ~(vec_cmpgt(__b, __a)); } static __inline__ vector bool int __ATTRS_o_ai vec_cmpge(vector float __a, vector float __b) { #ifdef __VSX__ return (vector bool int)__builtin_vsx_xvcmpgesp(__a, __b); #else return (vector bool int)__builtin_altivec_vcmpgefp(__a, __b); #endif } #ifdef __VSX__ static __inline__ vector bool long long __ATTRS_o_ai vec_cmpge(vector double __a, vector double __b) { return (vector bool long long)__builtin_vsx_xvcmpgedp(__a, __b); } static __inline__ vector bool long long __ATTRS_o_ai vec_cmpge(vector signed long long __a, vector signed long long __b) { return ~(vec_cmpgt(__b, __a)); } static __inline__ vector bool long long __ATTRS_o_ai vec_cmpge(vector unsigned long long __a, vector unsigned long long __b) { return ~(vec_cmpgt(__b, __a)); } #endif #if defined(__POWER10_VECTOR__) && defined(__SIZEOF_INT128__) static __inline__ vector bool __int128 __ATTRS_o_ai vec_cmpge(vector signed __int128 __a, vector signed __int128 __b) { return ~(vec_cmpgt(__b, __a)); } static __inline__ vector bool __int128 __ATTRS_o_ai vec_cmpge(vector unsigned __int128 __a, vector unsigned __int128 __b) { return ~(vec_cmpgt(__b, __a)); } #endif /* vec_vcmpgefp */ static __inline__ vector bool int __attribute__((__always_inline__)) vec_vcmpgefp(vector float __a, vector float __b) { return (vector bool int)__builtin_altivec_vcmpgefp(__a, __b); } /* vec_vcmpgtsb */ static __inline__ vector bool char __attribute__((__always_inline__)) vec_vcmpgtsb(vector signed char __a, vector signed char __b) { return (vector bool char)__builtin_altivec_vcmpgtsb(__a, __b); } /* vec_vcmpgtub */ static __inline__ vector bool char __attribute__((__always_inline__)) vec_vcmpgtub(vector unsigned char __a, vector unsigned char __b) { return (vector bool char)__builtin_altivec_vcmpgtub(__a, __b); } /* vec_vcmpgtsh */ static __inline__ vector bool short __attribute__((__always_inline__)) vec_vcmpgtsh(vector short __a, vector short __b) { return (vector bool short)__builtin_altivec_vcmpgtsh(__a, __b); } /* vec_vcmpgtuh */ static __inline__ vector bool short __attribute__((__always_inline__)) vec_vcmpgtuh(vector unsigned short __a, vector unsigned short __b) { return (vector bool short)__builtin_altivec_vcmpgtuh(__a, __b); } /* vec_vcmpgtsw */ static __inline__ vector bool int __attribute__((__always_inline__)) vec_vcmpgtsw(vector int __a, vector int __b) { return (vector bool int)__builtin_altivec_vcmpgtsw(__a, __b); } /* vec_vcmpgtuw */ static __inline__ vector bool int __attribute__((__always_inline__)) vec_vcmpgtuw(vector unsigned int __a, vector unsigned int __b) { return (vector bool int)__builtin_altivec_vcmpgtuw(__a, __b); } /* vec_vcmpgtfp */ static __inline__ vector bool int __attribute__((__always_inline__)) vec_vcmpgtfp(vector float __a, vector float __b) { return (vector bool int)__builtin_altivec_vcmpgtfp(__a, __b); } /* vec_cmple */ static __inline__ vector bool char __ATTRS_o_ai vec_cmple(vector signed char __a, vector signed char __b) { return vec_cmpge(__b, __a); } static __inline__ vector bool char __ATTRS_o_ai vec_cmple(vector unsigned char __a, vector unsigned char __b) { return vec_cmpge(__b, __a); } static __inline__ vector bool short __ATTRS_o_ai vec_cmple(vector signed short __a, vector signed short __b) { return vec_cmpge(__b, __a); } static __inline__ vector bool short __ATTRS_o_ai vec_cmple(vector unsigned short __a, vector unsigned short __b) { return vec_cmpge(__b, __a); } static __inline__ vector bool int __ATTRS_o_ai vec_cmple(vector signed int __a, vector signed int __b) { return vec_cmpge(__b, __a); } static __inline__ vector bool int __ATTRS_o_ai vec_cmple(vector unsigned int __a, vector unsigned int __b) { return vec_cmpge(__b, __a); } static __inline__ vector bool int __ATTRS_o_ai vec_cmple(vector float __a, vector float __b) { return vec_cmpge(__b, __a); } #ifdef __VSX__ static __inline__ vector bool long long __ATTRS_o_ai vec_cmple(vector double __a, vector double __b) { return vec_cmpge(__b, __a); } static __inline__ vector bool long long __ATTRS_o_ai vec_cmple(vector signed long long __a, vector signed long long __b) { return vec_cmpge(__b, __a); } static __inline__ vector bool long long __ATTRS_o_ai vec_cmple(vector unsigned long long __a, vector unsigned long long __b) { return vec_cmpge(__b, __a); } #endif #if defined(__POWER10_VECTOR__) && defined(__SIZEOF_INT128__) static __inline__ vector bool __int128 __ATTRS_o_ai vec_cmple(vector signed __int128 __a, vector signed __int128 __b) { return vec_cmpge(__b, __a); } static __inline__ vector bool __int128 __ATTRS_o_ai vec_cmple(vector unsigned __int128 __a, vector unsigned __int128 __b) { return vec_cmpge(__b, __a); } #endif /* vec_cmplt */ static __inline__ vector bool char __ATTRS_o_ai vec_cmplt(vector signed char __a, vector signed char __b) { return vec_cmpgt(__b, __a); } static __inline__ vector bool char __ATTRS_o_ai vec_cmplt(vector unsigned char __a, vector unsigned char __b) { return vec_cmpgt(__b, __a); } static __inline__ vector bool short __ATTRS_o_ai vec_cmplt(vector short __a, vector short __b) { return vec_cmpgt(__b, __a); } static __inline__ vector bool short __ATTRS_o_ai vec_cmplt(vector unsigned short __a, vector unsigned short __b) { return vec_cmpgt(__b, __a); } static __inline__ vector bool int __ATTRS_o_ai vec_cmplt(vector int __a, vector int __b) { return vec_cmpgt(__b, __a); } static __inline__ vector bool int __ATTRS_o_ai vec_cmplt(vector unsigned int __a, vector unsigned int __b) { return vec_cmpgt(__b, __a); } static __inline__ vector bool int __ATTRS_o_ai vec_cmplt(vector float __a, vector float __b) { return vec_cmpgt(__b, __a); } #ifdef __VSX__ static __inline__ vector bool long long __ATTRS_o_ai vec_cmplt(vector double __a, vector double __b) { return vec_cmpgt(__b, __a); } #endif #if defined(__POWER10_VECTOR__) && defined(__SIZEOF_INT128__) static __inline__ vector bool __int128 __ATTRS_o_ai vec_cmplt(vector signed __int128 __a, vector signed __int128 __b) { return vec_cmpgt(__b, __a); } static __inline__ vector bool __int128 __ATTRS_o_ai vec_cmplt(vector unsigned __int128 __a, vector unsigned __int128 __b) { return vec_cmpgt(__b, __a); } #endif #ifdef __VSX__ static __inline__ vector bool long long __ATTRS_o_ai vec_cmplt(vector signed long long __a, vector signed long long __b) { return vec_cmpgt(__b, __a); } static __inline__ vector bool long long __ATTRS_o_ai vec_cmplt(vector unsigned long long __a, vector unsigned long long __b) { return vec_cmpgt(__b, __a); } #endif #ifdef __POWER8_VECTOR__ /* vec_popcnt */ static __inline__ vector unsigned char __ATTRS_o_ai vec_popcnt(vector signed char __a) { return (vector unsigned char)__builtin_altivec_vpopcntb( (vector unsigned char)__a); } static __inline__ vector unsigned char __ATTRS_o_ai vec_popcnt(vector unsigned char __a) { return __builtin_altivec_vpopcntb(__a); } static __inline__ vector unsigned short __ATTRS_o_ai vec_popcnt(vector signed short __a) { return (vector unsigned short)__builtin_altivec_vpopcnth( (vector unsigned short)__a); } static __inline__ vector unsigned short __ATTRS_o_ai vec_popcnt(vector unsigned short __a) { return __builtin_altivec_vpopcnth(__a); } static __inline__ vector unsigned int __ATTRS_o_ai vec_popcnt(vector signed int __a) { return __builtin_altivec_vpopcntw((vector unsigned int)__a); } static __inline__ vector unsigned int __ATTRS_o_ai vec_popcnt(vector unsigned int __a) { return __builtin_altivec_vpopcntw(__a); } static __inline__ vector unsigned long long __ATTRS_o_ai vec_popcnt(vector signed long long __a) { return __builtin_altivec_vpopcntd((vector unsigned long long)__a); } static __inline__ vector unsigned long long __ATTRS_o_ai vec_popcnt(vector unsigned long long __a) { return __builtin_altivec_vpopcntd(__a); } #define vec_vclz vec_cntlz /* vec_cntlz */ static __inline__ vector signed char __ATTRS_o_ai vec_cntlz(vector signed char __a) { return (vector signed char)__builtin_altivec_vclzb((vector unsigned char)__a); } static __inline__ vector unsigned char __ATTRS_o_ai vec_cntlz(vector unsigned char __a) { return __builtin_altivec_vclzb(__a); } static __inline__ vector signed short __ATTRS_o_ai vec_cntlz(vector signed short __a) { return (vector signed short)__builtin_altivec_vclzh( (vector unsigned short)__a); } static __inline__ vector unsigned short __ATTRS_o_ai vec_cntlz(vector unsigned short __a) { return __builtin_altivec_vclzh(__a); } static __inline__ vector signed int __ATTRS_o_ai vec_cntlz(vector signed int __a) { return (vector signed int)__builtin_altivec_vclzw((vector unsigned int)__a); } static __inline__ vector unsigned int __ATTRS_o_ai vec_cntlz(vector unsigned int __a) { return __builtin_altivec_vclzw(__a); } static __inline__ vector signed long long __ATTRS_o_ai vec_cntlz(vector signed long long __a) { return (vector signed long long)__builtin_altivec_vclzd( (vector unsigned long long)__a); } static __inline__ vector unsigned long long __ATTRS_o_ai vec_cntlz(vector unsigned long long __a) { return __builtin_altivec_vclzd(__a); } #endif #ifdef __POWER9_VECTOR__ /* vec_cnttz */ static __inline__ vector signed char __ATTRS_o_ai vec_cnttz(vector signed char __a) { return (vector signed char)__builtin_altivec_vctzb((vector unsigned char)__a); } static __inline__ vector unsigned char __ATTRS_o_ai vec_cnttz(vector unsigned char __a) { return __builtin_altivec_vctzb(__a); } static __inline__ vector signed short __ATTRS_o_ai vec_cnttz(vector signed short __a) { return (vector signed short)__builtin_altivec_vctzh( (vector unsigned short)__a); } static __inline__ vector unsigned short __ATTRS_o_ai vec_cnttz(vector unsigned short __a) { return __builtin_altivec_vctzh(__a); } static __inline__ vector signed int __ATTRS_o_ai vec_cnttz(vector signed int __a) { return (vector signed int)__builtin_altivec_vctzw((vector unsigned int)__a); } static __inline__ vector unsigned int __ATTRS_o_ai vec_cnttz(vector unsigned int __a) { return __builtin_altivec_vctzw(__a); } static __inline__ vector signed long long __ATTRS_o_ai vec_cnttz(vector signed long long __a) { return (vector signed long long)__builtin_altivec_vctzd( (vector unsigned long long)__a); } static __inline__ vector unsigned long long __ATTRS_o_ai vec_cnttz(vector unsigned long long __a) { return __builtin_altivec_vctzd(__a); } /* vec_first_match_index */ static __inline__ unsigned __ATTRS_o_ai vec_first_match_index(vector signed char __a, vector signed char __b) { vector unsigned long long __res = #ifdef __LITTLE_ENDIAN__ vec_cnttz((vector unsigned long long)vec_cmpeq(__a, __b)); #else vec_cntlz((vector unsigned long long)vec_cmpeq(__a, __b)); #endif if (__res[0] == 64) { return (__res[1] + 64) >> 3; } return __res[0] >> 3; } static __inline__ unsigned __ATTRS_o_ai vec_first_match_index(vector unsigned char __a, vector unsigned char __b) { vector unsigned long long __res = #ifdef __LITTLE_ENDIAN__ vec_cnttz((vector unsigned long long)vec_cmpeq(__a, __b)); #else vec_cntlz((vector unsigned long long)vec_cmpeq(__a, __b)); #endif if (__res[0] == 64) { return (__res[1] + 64) >> 3; } return __res[0] >> 3; } static __inline__ unsigned __ATTRS_o_ai vec_first_match_index(vector signed short __a, vector signed short __b) { vector unsigned long long __res = #ifdef __LITTLE_ENDIAN__ vec_cnttz((vector unsigned long long)vec_cmpeq(__a, __b)); #else vec_cntlz((vector unsigned long long)vec_cmpeq(__a, __b)); #endif if (__res[0] == 64) { return (__res[1] + 64) >> 4; } return __res[0] >> 4; } static __inline__ unsigned __ATTRS_o_ai vec_first_match_index(vector unsigned short __a, vector unsigned short __b) { vector unsigned long long __res = #ifdef __LITTLE_ENDIAN__ vec_cnttz((vector unsigned long long)vec_cmpeq(__a, __b)); #else vec_cntlz((vector unsigned long long)vec_cmpeq(__a, __b)); #endif if (__res[0] == 64) { return (__res[1] + 64) >> 4; } return __res[0] >> 4; } static __inline__ unsigned __ATTRS_o_ai vec_first_match_index(vector signed int __a, vector signed int __b) { vector unsigned long long __res = #ifdef __LITTLE_ENDIAN__ vec_cnttz((vector unsigned long long)vec_cmpeq(__a, __b)); #else vec_cntlz((vector unsigned long long)vec_cmpeq(__a, __b)); #endif if (__res[0] == 64) { return (__res[1] + 64) >> 5; } return __res[0] >> 5; } static __inline__ unsigned __ATTRS_o_ai vec_first_match_index(vector unsigned int __a, vector unsigned int __b) { vector unsigned long long __res = #ifdef __LITTLE_ENDIAN__ vec_cnttz((vector unsigned long long)vec_cmpeq(__a, __b)); #else vec_cntlz((vector unsigned long long)vec_cmpeq(__a, __b)); #endif if (__res[0] == 64) { return (__res[1] + 64) >> 5; } return __res[0] >> 5; } /* vec_first_match_or_eos_index */ static __inline__ unsigned __ATTRS_o_ai vec_first_match_or_eos_index(vector signed char __a, vector signed char __b) { /* Compare the result of the comparison of two vectors with either and OR the result. Either the elements are equal or one will equal the comparison result if either is zero. */ vector bool char __tmp1 = vec_cmpeq(__a, __b); vector bool char __tmp2 = __tmp1 | vec_cmpeq((vector signed char)__tmp1, __a) | vec_cmpeq((vector signed char)__tmp1, __b); vector unsigned long long __res = #ifdef __LITTLE_ENDIAN__ vec_cnttz((vector unsigned long long)__tmp2); #else vec_cntlz((vector unsigned long long)__tmp2); #endif if (__res[0] == 64) { return (__res[1] + 64) >> 3; } return __res[0] >> 3; } static __inline__ unsigned __ATTRS_o_ai vec_first_match_or_eos_index(vector unsigned char __a, vector unsigned char __b) { vector bool char __tmp1 = vec_cmpeq(__a, __b); vector bool char __tmp2 = __tmp1 | vec_cmpeq((vector unsigned char)__tmp1, __a) | vec_cmpeq((vector unsigned char)__tmp1, __b); vector unsigned long long __res = #ifdef __LITTLE_ENDIAN__ vec_cnttz((vector unsigned long long)__tmp2); #else vec_cntlz((vector unsigned long long)__tmp2); #endif if (__res[0] == 64) { return (__res[1] + 64) >> 3; } return __res[0] >> 3; } static __inline__ unsigned __ATTRS_o_ai vec_first_match_or_eos_index(vector signed short __a, vector signed short __b) { vector bool short __tmp1 = vec_cmpeq(__a, __b); vector bool short __tmp2 = __tmp1 | vec_cmpeq((vector signed short)__tmp1, __a) | vec_cmpeq((vector signed short)__tmp1, __b); vector unsigned long long __res = #ifdef __LITTLE_ENDIAN__ vec_cnttz((vector unsigned long long)__tmp2); #else vec_cntlz((vector unsigned long long)__tmp2); #endif if (__res[0] == 64) { return (__res[1] + 64) >> 4; } return __res[0] >> 4; } static __inline__ unsigned __ATTRS_o_ai vec_first_match_or_eos_index(vector unsigned short __a, vector unsigned short __b) { vector bool short __tmp1 = vec_cmpeq(__a, __b); vector bool short __tmp2 = __tmp1 | vec_cmpeq((vector unsigned short)__tmp1, __a) | vec_cmpeq((vector unsigned short)__tmp1, __b); vector unsigned long long __res = #ifdef __LITTLE_ENDIAN__ vec_cnttz((vector unsigned long long)__tmp2); #else vec_cntlz((vector unsigned long long)__tmp2); #endif if (__res[0] == 64) { return (__res[1] + 64) >> 4; } return __res[0] >> 4; } static __inline__ unsigned __ATTRS_o_ai vec_first_match_or_eos_index(vector signed int __a, vector signed int __b) { vector bool int __tmp1 = vec_cmpeq(__a, __b); vector bool int __tmp2 = __tmp1 | vec_cmpeq((vector signed int)__tmp1, __a) | vec_cmpeq((vector signed int)__tmp1, __b); vector unsigned long long __res = #ifdef __LITTLE_ENDIAN__ vec_cnttz((vector unsigned long long)__tmp2); #else vec_cntlz((vector unsigned long long)__tmp2); #endif if (__res[0] == 64) { return (__res[1] + 64) >> 5; } return __res[0] >> 5; } static __inline__ unsigned __ATTRS_o_ai vec_first_match_or_eos_index(vector unsigned int __a, vector unsigned int __b) { vector bool int __tmp1 = vec_cmpeq(__a, __b); vector bool int __tmp2 = __tmp1 | vec_cmpeq((vector unsigned int)__tmp1, __a) | vec_cmpeq((vector unsigned int)__tmp1, __b); vector unsigned long long __res = #ifdef __LITTLE_ENDIAN__ vec_cnttz((vector unsigned long long)__tmp2); #else vec_cntlz((vector unsigned long long)__tmp2); #endif if (__res[0] == 64) { return (__res[1] + 64) >> 5; } return __res[0] >> 5; } /* vec_first_mismatch_index */ static __inline__ unsigned __ATTRS_o_ai vec_first_mismatch_index(vector signed char __a, vector signed char __b) { vector unsigned long long __res = #ifdef __LITTLE_ENDIAN__ vec_cnttz((vector unsigned long long)vec_cmpne(__a, __b)); #else vec_cntlz((vector unsigned long long)vec_cmpne(__a, __b)); #endif if (__res[0] == 64) { return (__res[1] + 64) >> 3; } return __res[0] >> 3; } static __inline__ unsigned __ATTRS_o_ai vec_first_mismatch_index(vector unsigned char __a, vector unsigned char __b) { vector unsigned long long __res = #ifdef __LITTLE_ENDIAN__ vec_cnttz((vector unsigned long long)vec_cmpne(__a, __b)); #else vec_cntlz((vector unsigned long long)vec_cmpne(__a, __b)); #endif if (__res[0] == 64) { return (__res[1] + 64) >> 3; } return __res[0] >> 3; } static __inline__ unsigned __ATTRS_o_ai vec_first_mismatch_index(vector signed short __a, vector signed short __b) { vector unsigned long long __res = #ifdef __LITTLE_ENDIAN__ vec_cnttz((vector unsigned long long)vec_cmpne(__a, __b)); #else vec_cntlz((vector unsigned long long)vec_cmpne(__a, __b)); #endif if (__res[0] == 64) { return (__res[1] + 64) >> 4; } return __res[0] >> 4; } static __inline__ unsigned __ATTRS_o_ai vec_first_mismatch_index(vector unsigned short __a, vector unsigned short __b) { vector unsigned long long __res = #ifdef __LITTLE_ENDIAN__ vec_cnttz((vector unsigned long long)vec_cmpne(__a, __b)); #else vec_cntlz((vector unsigned long long)vec_cmpne(__a, __b)); #endif if (__res[0] == 64) { return (__res[1] + 64) >> 4; } return __res[0] >> 4; } static __inline__ unsigned __ATTRS_o_ai vec_first_mismatch_index(vector signed int __a, vector signed int __b) { vector unsigned long long __res = #ifdef __LITTLE_ENDIAN__ vec_cnttz((vector unsigned long long)vec_cmpne(__a, __b)); #else vec_cntlz((vector unsigned long long)vec_cmpne(__a, __b)); #endif if (__res[0] == 64) { return (__res[1] + 64) >> 5; } return __res[0] >> 5; } static __inline__ unsigned __ATTRS_o_ai vec_first_mismatch_index(vector unsigned int __a, vector unsigned int __b) { vector unsigned long long __res = #ifdef __LITTLE_ENDIAN__ vec_cnttz((vector unsigned long long)vec_cmpne(__a, __b)); #else vec_cntlz((vector unsigned long long)vec_cmpne(__a, __b)); #endif if (__res[0] == 64) { return (__res[1] + 64) >> 5; } return __res[0] >> 5; } /* vec_first_mismatch_or_eos_index */ static __inline__ unsigned __ATTRS_o_ai vec_first_mismatch_or_eos_index(vector signed char __a, vector signed char __b) { vector unsigned long long __res = #ifdef __LITTLE_ENDIAN__ vec_cnttz((vector unsigned long long)vec_cmpnez(__a, __b)); #else vec_cntlz((vector unsigned long long)vec_cmpnez(__a, __b)); #endif if (__res[0] == 64) { return (__res[1] + 64) >> 3; } return __res[0] >> 3; } static __inline__ unsigned __ATTRS_o_ai vec_first_mismatch_or_eos_index(vector unsigned char __a, vector unsigned char __b) { vector unsigned long long __res = #ifdef __LITTLE_ENDIAN__ vec_cnttz((vector unsigned long long)vec_cmpnez(__a, __b)); #else vec_cntlz((vector unsigned long long)vec_cmpnez(__a, __b)); #endif if (__res[0] == 64) { return (__res[1] + 64) >> 3; } return __res[0] >> 3; } static __inline__ unsigned __ATTRS_o_ai vec_first_mismatch_or_eos_index(vector signed short __a, vector signed short __b) { vector unsigned long long __res = #ifdef __LITTLE_ENDIAN__ vec_cnttz((vector unsigned long long)vec_cmpnez(__a, __b)); #else vec_cntlz((vector unsigned long long)vec_cmpnez(__a, __b)); #endif if (__res[0] == 64) { return (__res[1] + 64) >> 4; } return __res[0] >> 4; } static __inline__ unsigned __ATTRS_o_ai vec_first_mismatch_or_eos_index(vector unsigned short __a, vector unsigned short __b) { vector unsigned long long __res = #ifdef __LITTLE_ENDIAN__ vec_cnttz((vector unsigned long long)vec_cmpnez(__a, __b)); #else vec_cntlz((vector unsigned long long)vec_cmpnez(__a, __b)); #endif if (__res[0] == 64) { return (__res[1] + 64) >> 4; } return __res[0] >> 4; } static __inline__ unsigned __ATTRS_o_ai vec_first_mismatch_or_eos_index(vector signed int __a, vector signed int __b) { vector unsigned long long __res = #ifdef __LITTLE_ENDIAN__ vec_cnttz((vector unsigned long long)vec_cmpnez(__a, __b)); #else vec_cntlz((vector unsigned long long)vec_cmpnez(__a, __b)); #endif if (__res[0] == 64) { return (__res[1] + 64) >> 5; } return __res[0] >> 5; } static __inline__ unsigned __ATTRS_o_ai vec_first_mismatch_or_eos_index(vector unsigned int __a, vector unsigned int __b) { vector unsigned long long __res = #ifdef __LITTLE_ENDIAN__ vec_cnttz((vector unsigned long long)vec_cmpnez(__a, __b)); #else vec_cntlz((vector unsigned long long)vec_cmpnez(__a, __b)); #endif if (__res[0] == 64) { return (__res[1] + 64) >> 5; } return __res[0] >> 5; } static __inline__ vector double __ATTRS_o_ai vec_insert_exp(vector double __a, vector unsigned long long __b) { return __builtin_vsx_xviexpdp((vector unsigned long long)__a,__b); } static __inline__ vector double __ATTRS_o_ai vec_insert_exp(vector unsigned long long __a, vector unsigned long long __b) { return __builtin_vsx_xviexpdp(__a,__b); } static __inline__ vector float __ATTRS_o_ai vec_insert_exp(vector float __a, vector unsigned int __b) { return __builtin_vsx_xviexpsp((vector unsigned int)__a,__b); } static __inline__ vector float __ATTRS_o_ai vec_insert_exp(vector unsigned int __a, vector unsigned int __b) { return __builtin_vsx_xviexpsp(__a,__b); } #if defined(__powerpc64__) static __inline__ vector signed char __ATTRS_o_ai vec_xl_len(const signed char *__a, size_t __b) { return (vector signed char)__builtin_vsx_lxvl(__a, (__b << 56)); } static __inline__ vector unsigned char __ATTRS_o_ai vec_xl_len(const unsigned char *__a, size_t __b) { return (vector unsigned char)__builtin_vsx_lxvl(__a, (__b << 56)); } static __inline__ vector signed short __ATTRS_o_ai vec_xl_len(const signed short *__a, size_t __b) { return (vector signed short)__builtin_vsx_lxvl(__a, (__b << 56)); } static __inline__ vector unsigned short __ATTRS_o_ai vec_xl_len(const unsigned short *__a, size_t __b) { return (vector unsigned short)__builtin_vsx_lxvl(__a, (__b << 56)); } static __inline__ vector signed int __ATTRS_o_ai vec_xl_len(const signed int *__a, size_t __b) { return (vector signed int)__builtin_vsx_lxvl(__a, (__b << 56)); } static __inline__ vector unsigned int __ATTRS_o_ai vec_xl_len(const unsigned int *__a, size_t __b) { return (vector unsigned int)__builtin_vsx_lxvl(__a, (__b << 56)); } static __inline__ vector float __ATTRS_o_ai vec_xl_len(const float *__a, size_t __b) { return (vector float)__builtin_vsx_lxvl(__a, (__b << 56)); } #ifdef __SIZEOF_INT128__ static __inline__ vector signed __int128 __ATTRS_o_ai vec_xl_len(const signed __int128 *__a, size_t __b) { return (vector signed __int128)__builtin_vsx_lxvl(__a, (__b << 56)); } static __inline__ vector unsigned __int128 __ATTRS_o_ai vec_xl_len(const unsigned __int128 *__a, size_t __b) { return (vector unsigned __int128)__builtin_vsx_lxvl(__a, (__b << 56)); } #endif static __inline__ vector signed long long __ATTRS_o_ai vec_xl_len(const signed long long *__a, size_t __b) { return (vector signed long long)__builtin_vsx_lxvl(__a, (__b << 56)); } static __inline__ vector unsigned long long __ATTRS_o_ai vec_xl_len(const unsigned long long *__a, size_t __b) { return (vector unsigned long long)__builtin_vsx_lxvl(__a, (__b << 56)); } static __inline__ vector double __ATTRS_o_ai vec_xl_len(const double *__a, size_t __b) { return (vector double)__builtin_vsx_lxvl(__a, (__b << 56)); } static __inline__ vector unsigned char __ATTRS_o_ai vec_xl_len_r(const unsigned char *__a, size_t __b) { vector unsigned char __res = (vector unsigned char)__builtin_vsx_lxvll(__a, (__b << 56)); vector unsigned char __mask = (vector unsigned char)__builtin_altivec_lvsr(16 - __b, (int *)NULL); return (vector unsigned char)__builtin_altivec_vperm_4si( (vector int)__res, (vector int)__res, __mask); } // vec_xst_len static __inline__ void __ATTRS_o_ai vec_xst_len(vector unsigned char __a, unsigned char *__b, size_t __c) { return __builtin_vsx_stxvl((vector int)__a, __b, (__c << 56)); } static __inline__ void __ATTRS_o_ai vec_xst_len(vector signed char __a, signed char *__b, size_t __c) { return __builtin_vsx_stxvl((vector int)__a, __b, (__c << 56)); } static __inline__ void __ATTRS_o_ai vec_xst_len(vector signed short __a, signed short *__b, size_t __c) { return __builtin_vsx_stxvl((vector int)__a, __b, (__c << 56)); } static __inline__ void __ATTRS_o_ai vec_xst_len(vector unsigned short __a, unsigned short *__b, size_t __c) { return __builtin_vsx_stxvl((vector int)__a, __b, (__c << 56)); } static __inline__ void __ATTRS_o_ai vec_xst_len(vector signed int __a, signed int *__b, size_t __c) { return __builtin_vsx_stxvl((vector int)__a, __b, (__c << 56)); } static __inline__ void __ATTRS_o_ai vec_xst_len(vector unsigned int __a, unsigned int *__b, size_t __c) { return __builtin_vsx_stxvl((vector int)__a, __b, (__c << 56)); } static __inline__ void __ATTRS_o_ai vec_xst_len(vector float __a, float *__b, size_t __c) { return __builtin_vsx_stxvl((vector int)__a, __b, (__c << 56)); } #ifdef __SIZEOF_INT128__ static __inline__ void __ATTRS_o_ai vec_xst_len(vector signed __int128 __a, signed __int128 *__b, size_t __c) { return __builtin_vsx_stxvl((vector int)__a, __b, (__c << 56)); } static __inline__ void __ATTRS_o_ai vec_xst_len(vector unsigned __int128 __a, unsigned __int128 *__b, size_t __c) { return __builtin_vsx_stxvl((vector int)__a, __b, (__c << 56)); } #endif static __inline__ void __ATTRS_o_ai vec_xst_len(vector signed long long __a, signed long long *__b, size_t __c) { return __builtin_vsx_stxvl((vector int)__a, __b, (__c << 56)); } static __inline__ void __ATTRS_o_ai vec_xst_len(vector unsigned long long __a, unsigned long long *__b, size_t __c) { return __builtin_vsx_stxvl((vector int)__a, __b, (__c << 56)); } static __inline__ void __ATTRS_o_ai vec_xst_len(vector double __a, double *__b, size_t __c) { return __builtin_vsx_stxvl((vector int)__a, __b, (__c << 56)); } static __inline__ void __ATTRS_o_ai vec_xst_len_r(vector unsigned char __a, unsigned char *__b, size_t __c) { vector unsigned char __mask = (vector unsigned char)__builtin_altivec_lvsl(16 - __c, (int *)NULL); vector unsigned char __res = (vector unsigned char)__builtin_altivec_vperm_4si( (vector int)__a, (vector int)__a, __mask); return __builtin_vsx_stxvll((vector int)__res, __b, (__c << 56)); } #endif #endif #if defined(__POWER9_VECTOR__) && defined(__powerpc64__) #define __vec_ldrmb(PTR, CNT) vec_xl_len_r((const unsigned char *)(PTR), (CNT)) #define __vec_strmb(PTR, CNT, VAL) \ vec_xst_len_r((VAL), (unsigned char *)(PTR), (CNT)) #else #define __vec_ldrmb __builtin_vsx_ldrmb #define __vec_strmb __builtin_vsx_strmb #endif /* vec_cpsgn */ #ifdef __VSX__ static __inline__ vector float __ATTRS_o_ai vec_cpsgn(vector float __a, vector float __b) { return __builtin_vsx_xvcpsgnsp(__b, __a); } static __inline__ vector double __ATTRS_o_ai vec_cpsgn(vector double __a, vector double __b) { return __builtin_vsx_xvcpsgndp(__b, __a); } #endif /* vec_ctf */ #ifdef __VSX__ // There are some functions that have different signatures with the XL compiler // from those in Clang/GCC and documented in the PVIPR. This macro ensures that // the XL-compatible signatures are used for those functions. #ifdef __XL_COMPAT_ALTIVEC__ #define vec_ctf(__a, __b) \ _Generic((__a), \ vector int: (vector float)__builtin_altivec_vcfsx((vector int)(__a), \ ((__b)&0x1F)), \ vector unsigned int: (vector float)__builtin_altivec_vcfux( \ (vector unsigned int)(__a), ((__b)&0x1F)), \ vector unsigned long long: ( \ vector float)(__builtin_vsx_xvcvuxdsp( \ (vector unsigned long long)(__a)) * \ (vector float)(vector unsigned)((0x7f - \ ((__b)&0x1F)) \ << 23)), \ vector signed long long: ( \ vector float)(__builtin_vsx_xvcvsxdsp( \ (vector signed long long)(__a)) * \ (vector float)(vector unsigned)((0x7f - \ ((__b)&0x1F)) \ << 23))) #else // __XL_COMPAT_ALTIVEC__ #define vec_ctf(__a, __b) \ _Generic( \ (__a), \ vector int: (vector float)__builtin_altivec_vcfsx((vector int)(__a), \ ((__b)&0x1F)), \ vector unsigned int: (vector float)__builtin_altivec_vcfux( \ (vector unsigned int)(__a), ((__b)&0x1F)), \ vector unsigned long long: ( \ vector float)(__builtin_convertvector( \ (vector unsigned long long)(__a), vector double) * \ (vector double)(vector unsigned long long)((0x3ffULL - \ ((__b)&0x1F)) \ << 52)), \ vector signed long long: ( \ vector float)(__builtin_convertvector( \ (vector signed long long)(__a), vector double) * \ (vector double)(vector unsigned long long)((0x3ffULL - \ ((__b)&0x1F)) \ << 52))) #endif // __XL_COMPAT_ALTIVEC__ #else #define vec_ctf(__a, __b) \ _Generic((__a), \ vector int: (vector float)__builtin_altivec_vcfsx((vector int)(__a), \ ((__b)&0x1F)), \ vector unsigned int: (vector float)__builtin_altivec_vcfux( \ (vector unsigned int)(__a), ((__b)&0x1F))) #endif /* vec_ctd */ #ifdef __VSX__ #define vec_ctd(__a, __b) \ _Generic((__a), \ vector signed int: ( \ vec_doublee((vector signed int)(__a)) * \ (vector double)(vector unsigned long long)((0x3ffULL - \ ((__b)&0x1F)) \ << 52)), \ vector unsigned int: ( \ vec_doublee((vector unsigned int)(__a)) * \ (vector double)(vector unsigned long long)((0x3ffULL - \ ((__b)&0x1F)) \ << 52)), \ vector unsigned long long: ( \ __builtin_convertvector((vector unsigned long long)(__a), \ vector double) * \ (vector double)(vector unsigned long long)((0x3ffULL - \ ((__b)&0x1F)) \ << 52)), \ vector signed long long: ( \ __builtin_convertvector((vector signed long long)(__a), \ vector double) * \ (vector double)(vector unsigned long long)((0x3ffULL - \ ((__b)&0x1F)) \ << 52))) #endif // __VSX__ /* vec_vcfsx */ #define vec_vcfux __builtin_altivec_vcfux /* vec_vcfux */ #define vec_vcfsx(__a, __b) __builtin_altivec_vcfsx((vector int)(__a), (__b)) /* vec_cts */ #ifdef __VSX__ #ifdef __XL_COMPAT_ALTIVEC__ #define vec_cts(__a, __b) \ _Generic((__a), \ vector float: (vector signed int)__builtin_altivec_vctsxs( \ (vector float)(__a), ((__b)&0x1F)), \ vector double: __extension__({ \ vector double __ret = \ (vector double)(__a) * \ (vector double)(vector unsigned long long)((0x3ffULL + \ ((__b)&0x1F)) \ << 52); \ (vector signed long long)__builtin_vsx_xvcvdpsxws(__ret); \ })) #else // __XL_COMPAT_ALTIVEC__ #define vec_cts(__a, __b) \ _Generic((__a), \ vector float: (vector signed int)__builtin_altivec_vctsxs( \ (vector float)(__a), ((__b)&0x1F)), \ vector double: __extension__({ \ vector double __ret = \ (vector double)(__a) * \ (vector double)(vector unsigned long long)((0x3ffULL + \ ((__b)&0x1F)) \ << 52); \ (vector signed long long)__builtin_convertvector( \ __ret, vector signed long long); \ })) #endif // __XL_COMPAT_ALTIVEC__ #else #define vec_cts __builtin_altivec_vctsxs #endif /* vec_vctsxs */ #define vec_vctsxs __builtin_altivec_vctsxs /* vec_ctu */ #ifdef __VSX__ #ifdef __XL_COMPAT_ALTIVEC__ #define vec_ctu(__a, __b) \ _Generic((__a), \ vector float: (vector unsigned int)__builtin_altivec_vctuxs( \ (vector float)(__a), ((__b)&0x1F)), \ vector double: __extension__({ \ vector double __ret = \ (vector double)(__a) * \ (vector double)(vector unsigned long long)((0x3ffULL + \ ((__b)&0x1F)) \ << 52); \ (vector unsigned long long)__builtin_vsx_xvcvdpuxws(__ret); \ })) #else // __XL_COMPAT_ALTIVEC__ #define vec_ctu(__a, __b) \ _Generic((__a), \ vector float: (vector unsigned int)__builtin_altivec_vctuxs( \ (vector float)(__a), ((__b)&0x1F)), \ vector double: __extension__({ \ vector double __ret = \ (vector double)(__a) * \ (vector double)(vector unsigned long long)((0x3ffULL + \ ((__b)&0x1F)) \ << 52); \ (vector unsigned long long)__builtin_convertvector( \ __ret, vector unsigned long long); \ })) #endif // __XL_COMPAT_ALTIVEC__ #else #define vec_ctu __builtin_altivec_vctuxs #endif #ifdef __LITTLE_ENDIAN__ /* vec_ctsl */ #ifdef __VSX__ #define vec_ctsl(__a, __b) \ _Generic( \ (__a), vector float \ : __extension__({ \ vector float __ret = \ (vector float)(__a) * \ (vector float)(vector unsigned)((0x7f + ((__b)&0x1F)) << 23); \ __builtin_vsx_xvcvspsxds(__builtin_vsx_xxsldwi(__ret, __ret, 1)); \ }), \ vector double \ : __extension__({ \ vector double __ret = \ (vector double)(__a) * \ (vector double)(vector unsigned long long)((0x3ffULL + \ ((__b)&0x1F)) \ << 52); \ __builtin_convertvector(__ret, vector signed long long); \ })) /* vec_ctul */ #define vec_ctul(__a, __b) \ _Generic( \ (__a), vector float \ : __extension__({ \ vector float __ret = \ (vector float)(__a) * \ (vector float)(vector unsigned)((0x7f + ((__b)&0x1F)) << 23); \ __builtin_vsx_xvcvspuxds(__builtin_vsx_xxsldwi(__ret, __ret, 1)); \ }), \ vector double \ : __extension__({ \ vector double __ret = \ (vector double)(__a) * \ (vector double)(vector unsigned long long)((0x3ffULL + \ ((__b)&0x1F)) \ << 52); \ __builtin_convertvector(__ret, vector unsigned long long); \ })) #endif #else // __LITTLE_ENDIAN__ /* vec_ctsl */ #ifdef __VSX__ #define vec_ctsl(__a, __b) \ _Generic((__a), \ vector float: __extension__({ \ vector float __ret = \ (vector float)(__a) * \ (vector float)(vector unsigned)((0x7f + ((__b)&0x1F)) << 23); \ __builtin_vsx_xvcvspsxds(__ret); \ }), \ vector double: __extension__({ \ vector double __ret = \ (vector double)(__a) * \ (vector double)(vector unsigned long long)((0x3ffULL + \ ((__b)&0x1F)) \ << 52); \ __builtin_convertvector(__ret, vector signed long long); \ })) /* vec_ctul */ #define vec_ctul(__a, __b) \ _Generic((__a), vector float \ : __extension__({ \ vector float __ret = \ (vector float)(__a) * \ (vector float)(vector unsigned)((0x7f + ((__b)&0x1F)) \ << 23); \ __builtin_vsx_xvcvspuxds(__ret); \ }), \ vector double \ : __extension__({ \ vector double __ret = \ (vector double)(__a) * \ (vector double)(vector unsigned long long)((0x3ffULL + \ ((__b)&0x1F)) \ << 52); \ __builtin_convertvector(__ret, vector unsigned long long); \ })) #endif #endif // __LITTLE_ENDIAN__ /* vec_vctuxs */ #define vec_vctuxs __builtin_altivec_vctuxs /* vec_signext */ #ifdef __POWER9_VECTOR__ static __inline__ vector signed int __ATTRS_o_ai vec_signexti(vector signed char __a) { return __builtin_altivec_vextsb2w(__a); } static __inline__ vector signed int __ATTRS_o_ai vec_signexti(vector signed short __a) { return __builtin_altivec_vextsh2w(__a); } static __inline__ vector signed long long __ATTRS_o_ai vec_signextll(vector signed char __a) { return __builtin_altivec_vextsb2d(__a); } static __inline__ vector signed long long __ATTRS_o_ai vec_signextll(vector signed short __a) { return __builtin_altivec_vextsh2d(__a); } static __inline__ vector signed long long __ATTRS_o_ai vec_signextll(vector signed int __a) { return __builtin_altivec_vextsw2d(__a); } #endif #if defined(__POWER10_VECTOR__) && defined(__SIZEOF_INT128__) static __inline__ vector signed __int128 __ATTRS_o_ai vec_signextq(vector signed long long __a) { return __builtin_altivec_vextsd2q(__a); } #endif /* vec_signed */ static __inline__ vector signed int __ATTRS_o_ai vec_sld(vector signed int, vector signed int, unsigned const int __c); static __inline__ vector signed int __ATTRS_o_ai vec_signed(vector float __a) { return __builtin_convertvector(__a, vector signed int); } #ifdef __VSX__ static __inline__ vector signed long long __ATTRS_o_ai vec_signed(vector double __a) { return __builtin_convertvector(__a, vector signed long long); } static __inline__ vector signed int __attribute__((__always_inline__)) vec_signed2(vector double __a, vector double __b) { return (vector signed int) { __a[0], __a[1], __b[0], __b[1] }; } static __inline__ vector signed int __ATTRS_o_ai vec_signede(vector double __a) { #ifdef __LITTLE_ENDIAN__ vector signed int __ret = __builtin_vsx_xvcvdpsxws(__a); return vec_sld(__ret, __ret, 12); #else return __builtin_vsx_xvcvdpsxws(__a); #endif } static __inline__ vector signed int __ATTRS_o_ai vec_signedo(vector double __a) { #ifdef __LITTLE_ENDIAN__ return __builtin_vsx_xvcvdpsxws(__a); #else vector signed int __ret = __builtin_vsx_xvcvdpsxws(__a); return vec_sld(__ret, __ret, 12); #endif } #endif /* vec_unsigned */ static __inline__ vector unsigned int __ATTRS_o_ai vec_sld(vector unsigned int, vector unsigned int, unsigned const int __c); static __inline__ vector unsigned int __ATTRS_o_ai vec_unsigned(vector float __a) { return __builtin_convertvector(__a, vector unsigned int); } #ifdef __VSX__ static __inline__ vector unsigned long long __ATTRS_o_ai vec_unsigned(vector double __a) { return __builtin_convertvector(__a, vector unsigned long long); } static __inline__ vector unsigned int __attribute__((__always_inline__)) vec_unsigned2(vector double __a, vector double __b) { return (vector unsigned int) { __a[0], __a[1], __b[0], __b[1] }; } static __inline__ vector unsigned int __ATTRS_o_ai vec_unsignede(vector double __a) { #ifdef __LITTLE_ENDIAN__ vector unsigned int __ret = __builtin_vsx_xvcvdpuxws(__a); return vec_sld(__ret, __ret, 12); #else return __builtin_vsx_xvcvdpuxws(__a); #endif } static __inline__ vector unsigned int __ATTRS_o_ai vec_unsignedo(vector double __a) { #ifdef __LITTLE_ENDIAN__ return __builtin_vsx_xvcvdpuxws(__a); #else vector unsigned int __ret = __builtin_vsx_xvcvdpuxws(__a); return vec_sld(__ret, __ret, 12); #endif } #endif /* vec_float */ static __inline__ vector float __ATTRS_o_ai vec_sld(vector float, vector float, unsigned const int __c); static __inline__ vector float __ATTRS_o_ai vec_float(vector signed int __a) { return __builtin_convertvector(__a, vector float); } static __inline__ vector float __ATTRS_o_ai vec_float(vector unsigned int __a) { return __builtin_convertvector(__a, vector float); } #ifdef __VSX__ static __inline__ vector float __ATTRS_o_ai vec_float2(vector signed long long __a, vector signed long long __b) { return (vector float) { __a[0], __a[1], __b[0], __b[1] }; } static __inline__ vector float __ATTRS_o_ai vec_float2(vector unsigned long long __a, vector unsigned long long __b) { return (vector float) { __a[0], __a[1], __b[0], __b[1] }; } static __inline__ vector float __ATTRS_o_ai vec_float2(vector double __a, vector double __b) { return (vector float) { __a[0], __a[1], __b[0], __b[1] }; } static __inline__ vector float __ATTRS_o_ai vec_floate(vector signed long long __a) { #ifdef __LITTLE_ENDIAN__ vector float __ret = __builtin_vsx_xvcvsxdsp(__a); return vec_sld(__ret, __ret, 12); #else return __builtin_vsx_xvcvsxdsp(__a); #endif } static __inline__ vector float __ATTRS_o_ai vec_floate(vector unsigned long long __a) { #ifdef __LITTLE_ENDIAN__ vector float __ret = __builtin_vsx_xvcvuxdsp(__a); return vec_sld(__ret, __ret, 12); #else return __builtin_vsx_xvcvuxdsp(__a); #endif } static __inline__ vector float __ATTRS_o_ai vec_floate(vector double __a) { #ifdef __LITTLE_ENDIAN__ vector float __ret = __builtin_vsx_xvcvdpsp(__a); return vec_sld(__ret, __ret, 12); #else return __builtin_vsx_xvcvdpsp(__a); #endif } static __inline__ vector float __ATTRS_o_ai vec_floato(vector signed long long __a) { #ifdef __LITTLE_ENDIAN__ return __builtin_vsx_xvcvsxdsp(__a); #else vector float __ret = __builtin_vsx_xvcvsxdsp(__a); return vec_sld(__ret, __ret, 12); #endif } static __inline__ vector float __ATTRS_o_ai vec_floato(vector unsigned long long __a) { #ifdef __LITTLE_ENDIAN__ return __builtin_vsx_xvcvuxdsp(__a); #else vector float __ret = __builtin_vsx_xvcvuxdsp(__a); return vec_sld(__ret, __ret, 12); #endif } static __inline__ vector float __ATTRS_o_ai vec_floato(vector double __a) { #ifdef __LITTLE_ENDIAN__ return __builtin_vsx_xvcvdpsp(__a); #else vector float __ret = __builtin_vsx_xvcvdpsp(__a); return vec_sld(__ret, __ret, 12); #endif } #endif /* vec_double */ #ifdef __VSX__ static __inline__ vector double __ATTRS_o_ai vec_double(vector signed long long __a) { return __builtin_convertvector(__a, vector double); } static __inline__ vector double __ATTRS_o_ai vec_double(vector unsigned long long __a) { return __builtin_convertvector(__a, vector double); } static __inline__ vector double __ATTRS_o_ai vec_doublee(vector signed int __a) { #ifdef __LITTLE_ENDIAN__ return __builtin_vsx_xvcvsxwdp(vec_sld(__a, __a, 4)); #else return __builtin_vsx_xvcvsxwdp(__a); #endif } static __inline__ vector double __ATTRS_o_ai vec_doublee(vector unsigned int __a) { #ifdef __LITTLE_ENDIAN__ return __builtin_vsx_xvcvuxwdp(vec_sld(__a, __a, 4)); #else return __builtin_vsx_xvcvuxwdp(__a); #endif } static __inline__ vector double __ATTRS_o_ai vec_doublee(vector float __a) { #ifdef __LITTLE_ENDIAN__ return __builtin_vsx_xvcvspdp(vec_sld(__a, __a, 4)); #else return __builtin_vsx_xvcvspdp(__a); #endif } static __inline__ vector double __ATTRS_o_ai vec_doubleh(vector signed int __a) { vector double __ret = {__a[0], __a[1]}; return __ret; } static __inline__ vector double __ATTRS_o_ai vec_doubleh(vector unsigned int __a) { vector double __ret = {__a[0], __a[1]}; return __ret; } static __inline__ vector double __ATTRS_o_ai vec_doubleh(vector float __a) { vector double __ret = {__a[0], __a[1]}; return __ret; } static __inline__ vector double __ATTRS_o_ai vec_doublel(vector signed int __a) { vector double __ret = {__a[2], __a[3]}; return __ret; } static __inline__ vector double __ATTRS_o_ai vec_doublel(vector unsigned int __a) { vector double __ret = {__a[2], __a[3]}; return __ret; } static __inline__ vector double __ATTRS_o_ai vec_doublel(vector float __a) { vector double __ret = {__a[2], __a[3]}; return __ret; } static __inline__ vector double __ATTRS_o_ai vec_doubleo(vector signed int __a) { #ifdef __LITTLE_ENDIAN__ return __builtin_vsx_xvcvsxwdp(__a); #else return __builtin_vsx_xvcvsxwdp(vec_sld(__a, __a, 4)); #endif } static __inline__ vector double __ATTRS_o_ai vec_doubleo(vector unsigned int __a) { #ifdef __LITTLE_ENDIAN__ return __builtin_vsx_xvcvuxwdp(__a); #else return __builtin_vsx_xvcvuxwdp(vec_sld(__a, __a, 4)); #endif } static __inline__ vector double __ATTRS_o_ai vec_doubleo(vector float __a) { #ifdef __LITTLE_ENDIAN__ return __builtin_vsx_xvcvspdp(__a); #else return __builtin_vsx_xvcvspdp(vec_sld(__a, __a, 4)); #endif } /* vec_cvf */ static __inline__ vector double __ATTRS_o_ai vec_cvf(vector float __a) { return vec_doublee(__a); } static __inline__ vector float __ATTRS_o_ai vec_cvf(vector double __a) { return vec_floate(__a); } #endif /* vec_div */ /* Integer vector divides (vectors are scalarized, elements divided and the vectors reassembled). */ static __inline__ vector signed char __ATTRS_o_ai vec_div(vector signed char __a, vector signed char __b) { return __a / __b; } static __inline__ vector unsigned char __ATTRS_o_ai vec_div(vector unsigned char __a, vector unsigned char __b) { return __a / __b; } static __inline__ vector signed short __ATTRS_o_ai vec_div(vector signed short __a, vector signed short __b) { return __a / __b; } static __inline__ vector unsigned short __ATTRS_o_ai vec_div(vector unsigned short __a, vector unsigned short __b) { return __a / __b; } static __inline__ vector signed int __ATTRS_o_ai vec_div(vector signed int __a, vector signed int __b) { return __a / __b; } static __inline__ vector unsigned int __ATTRS_o_ai vec_div(vector unsigned int __a, vector unsigned int __b) { return __a / __b; } #ifdef __VSX__ static __inline__ vector signed long long __ATTRS_o_ai vec_div(vector signed long long __a, vector signed long long __b) { return __a / __b; } static __inline__ vector unsigned long long __ATTRS_o_ai vec_div(vector unsigned long long __a, vector unsigned long long __b) { return __a / __b; } static __inline__ vector float __ATTRS_o_ai vec_div(vector float __a, vector float __b) { return __a / __b; } static __inline__ vector double __ATTRS_o_ai vec_div(vector double __a, vector double __b) { return __a / __b; } #endif /* vec_dive */ #ifdef __POWER10_VECTOR__ static __inline__ vector signed int __ATTRS_o_ai vec_dive(vector signed int __a, vector signed int __b) { return __builtin_altivec_vdivesw(__a, __b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_dive(vector unsigned int __a, vector unsigned int __b) { return __builtin_altivec_vdiveuw(__a, __b); } static __inline__ vector signed long long __ATTRS_o_ai vec_dive(vector signed long long __a, vector signed long long __b) { return __builtin_altivec_vdivesd(__a, __b); } static __inline__ vector unsigned long long __ATTRS_o_ai vec_dive(vector unsigned long long __a, vector unsigned long long __b) { return __builtin_altivec_vdiveud(__a, __b); } #ifdef __SIZEOF_INT128__ static __inline__ vector unsigned __int128 __ATTRS_o_ai vec_dive(vector unsigned __int128 __a, vector unsigned __int128 __b) { return __builtin_altivec_vdiveuq(__a, __b); } static __inline__ vector signed __int128 __ATTRS_o_ai vec_dive(vector signed __int128 __a, vector signed __int128 __b) { return __builtin_altivec_vdivesq(__a, __b); } #endif #endif #if defined(__POWER10_VECTOR__) && defined(__SIZEOF_INT128__) static __inline__ vector unsigned __int128 __ATTRS_o_ai vec_div(vector unsigned __int128 __a, vector unsigned __int128 __b) { return __a / __b; } static __inline__ vector signed __int128 __ATTRS_o_ai vec_div(vector signed __int128 __a, vector signed __int128 __b) { return __a / __b; } #endif /* __POWER10_VECTOR__ */ /* vec_xvtdiv */ #ifdef __VSX__ static __inline__ int __ATTRS_o_ai vec_test_swdiv(vector double __a, vector double __b) { return __builtin_vsx_xvtdivdp(__a, __b); } static __inline__ int __ATTRS_o_ai vec_test_swdivs(vector float __a, vector float __b) { return __builtin_vsx_xvtdivsp(__a, __b); } #endif /* vec_dss */ #define vec_dss __builtin_altivec_dss /* vec_dssall */ static __inline__ void __attribute__((__always_inline__)) vec_dssall(void) { __builtin_altivec_dssall(); } /* vec_dst */ #define vec_dst(__PTR, __CW, __STR) \ __builtin_altivec_dst((const void *)(__PTR), (__CW), (__STR)) /* vec_dstst */ #define vec_dstst(__PTR, __CW, __STR) \ __builtin_altivec_dstst((const void *)(__PTR), (__CW), (__STR)) /* vec_dststt */ #define vec_dststt(__PTR, __CW, __STR) \ __builtin_altivec_dststt((const void *)(__PTR), (__CW), (__STR)) /* vec_dstt */ #define vec_dstt(__PTR, __CW, __STR) \ __builtin_altivec_dstt((const void *)(__PTR), (__CW), (__STR)) /* vec_eqv */ #ifdef __POWER8_VECTOR__ static __inline__ vector signed char __ATTRS_o_ai vec_eqv(vector signed char __a, vector signed char __b) { return (vector signed char)__builtin_vsx_xxleqv((vector unsigned int)__a, (vector unsigned int)__b); } static __inline__ vector unsigned char __ATTRS_o_ai vec_eqv(vector unsigned char __a, vector unsigned char __b) { return (vector unsigned char)__builtin_vsx_xxleqv((vector unsigned int)__a, (vector unsigned int)__b); } static __inline__ vector bool char __ATTRS_o_ai vec_eqv(vector bool char __a, vector bool char __b) { return (vector bool char)__builtin_vsx_xxleqv((vector unsigned int)__a, (vector unsigned int)__b); } static __inline__ vector signed short __ATTRS_o_ai vec_eqv(vector signed short __a, vector signed short __b) { return (vector signed short)__builtin_vsx_xxleqv((vector unsigned int)__a, (vector unsigned int)__b); } static __inline__ vector unsigned short __ATTRS_o_ai vec_eqv(vector unsigned short __a, vector unsigned short __b) { return (vector unsigned short)__builtin_vsx_xxleqv((vector unsigned int)__a, (vector unsigned int)__b); } static __inline__ vector bool short __ATTRS_o_ai vec_eqv(vector bool short __a, vector bool short __b) { return (vector bool short)__builtin_vsx_xxleqv((vector unsigned int)__a, (vector unsigned int)__b); } static __inline__ vector signed int __ATTRS_o_ai vec_eqv(vector signed int __a, vector signed int __b) { return (vector signed int)__builtin_vsx_xxleqv((vector unsigned int)__a, (vector unsigned int)__b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_eqv(vector unsigned int __a, vector unsigned int __b) { return __builtin_vsx_xxleqv(__a, __b); } static __inline__ vector bool int __ATTRS_o_ai vec_eqv(vector bool int __a, vector bool int __b) { return (vector bool int)__builtin_vsx_xxleqv((vector unsigned int)__a, (vector unsigned int)__b); } static __inline__ vector signed long long __ATTRS_o_ai vec_eqv(vector signed long long __a, vector signed long long __b) { return (vector signed long long)__builtin_vsx_xxleqv( (vector unsigned int)__a, (vector unsigned int)__b); } static __inline__ vector unsigned long long __ATTRS_o_ai vec_eqv(vector unsigned long long __a, vector unsigned long long __b) { return (vector unsigned long long)__builtin_vsx_xxleqv( (vector unsigned int)__a, (vector unsigned int)__b); } static __inline__ vector bool long long __ATTRS_o_ai vec_eqv(vector bool long long __a, vector bool long long __b) { return (vector bool long long)__builtin_vsx_xxleqv((vector unsigned int)__a, (vector unsigned int)__b); } static __inline__ vector float __ATTRS_o_ai vec_eqv(vector float __a, vector float __b) { return (vector float)__builtin_vsx_xxleqv((vector unsigned int)__a, (vector unsigned int)__b); } static __inline__ vector double __ATTRS_o_ai vec_eqv(vector double __a, vector double __b) { return (vector double)__builtin_vsx_xxleqv((vector unsigned int)__a, (vector unsigned int)__b); } #endif /* vec_expte */ static __inline__ vector float __attribute__((__always_inline__)) vec_expte(vector float __a) { return __builtin_altivec_vexptefp(__a); } /* vec_vexptefp */ static __inline__ vector float __attribute__((__always_inline__)) vec_vexptefp(vector float __a) { return __builtin_altivec_vexptefp(__a); } /* vec_floor */ static __inline__ vector float __ATTRS_o_ai vec_floor(vector float __a) { #ifdef __VSX__ return __builtin_vsx_xvrspim(__a); #else return __builtin_altivec_vrfim(__a); #endif } #ifdef __VSX__ static __inline__ vector double __ATTRS_o_ai vec_floor(vector double __a) { return __builtin_vsx_xvrdpim(__a); } #endif /* vec_roundm */ static __inline__ vector float __ATTRS_o_ai vec_roundm(vector float __a) { return vec_floor(__a); } #ifdef __VSX__ static __inline__ vector double __ATTRS_o_ai vec_roundm(vector double __a) { return vec_floor(__a); } #endif /* vec_vrfim */ static __inline__ vector float __attribute__((__always_inline__)) vec_vrfim(vector float __a) { return __builtin_altivec_vrfim(__a); } /* vec_ld */ static __inline__ vector signed char __ATTRS_o_ai vec_ld(long __a, const vector signed char *__b) { return (vector signed char)__builtin_altivec_lvx(__a, __b); } static __inline__ vector signed char __ATTRS_o_ai vec_ld(long __a, const signed char *__b) { return (vector signed char)__builtin_altivec_lvx(__a, __b); } static __inline__ vector unsigned char __ATTRS_o_ai vec_ld(long __a, const vector unsigned char *__b) { return (vector unsigned char)__builtin_altivec_lvx(__a, __b); } static __inline__ vector unsigned char __ATTRS_o_ai vec_ld(long __a, const unsigned char *__b) { return (vector unsigned char)__builtin_altivec_lvx(__a, __b); } static __inline__ vector bool char __ATTRS_o_ai vec_ld(long __a, const vector bool char *__b) { return (vector bool char)__builtin_altivec_lvx(__a, __b); } static __inline__ vector short __ATTRS_o_ai vec_ld(long __a, const vector short *__b) { return (vector short)__builtin_altivec_lvx(__a, __b); } static __inline__ vector short __ATTRS_o_ai vec_ld(long __a, const short *__b) { return (vector short)__builtin_altivec_lvx(__a, __b); } static __inline__ vector unsigned short __ATTRS_o_ai vec_ld(long __a, const vector unsigned short *__b) { return (vector unsigned short)__builtin_altivec_lvx(__a, __b); } static __inline__ vector unsigned short __ATTRS_o_ai vec_ld(long __a, const unsigned short *__b) { return (vector unsigned short)__builtin_altivec_lvx(__a, __b); } static __inline__ vector bool short __ATTRS_o_ai vec_ld(long __a, const vector bool short *__b) { return (vector bool short)__builtin_altivec_lvx(__a, __b); } static __inline__ vector pixel __ATTRS_o_ai vec_ld(long __a, const vector pixel *__b) { return (vector pixel)__builtin_altivec_lvx(__a, __b); } static __inline__ vector int __ATTRS_o_ai vec_ld(long __a, const vector int *__b) { return (vector int)__builtin_altivec_lvx(__a, __b); } static __inline__ vector int __ATTRS_o_ai vec_ld(long __a, const int *__b) { return (vector int)__builtin_altivec_lvx(__a, __b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_ld(long __a, const vector unsigned int *__b) { return (vector unsigned int)__builtin_altivec_lvx(__a, __b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_ld(long __a, const unsigned int *__b) { return (vector unsigned int)__builtin_altivec_lvx(__a, __b); } static __inline__ vector bool int __ATTRS_o_ai vec_ld(long __a, const vector bool int *__b) { return (vector bool int)__builtin_altivec_lvx(__a, __b); } static __inline__ vector float __ATTRS_o_ai vec_ld(long __a, const vector float *__b) { return (vector float)__builtin_altivec_lvx(__a, __b); } static __inline__ vector float __ATTRS_o_ai vec_ld(long __a, const float *__b) { return (vector float)__builtin_altivec_lvx(__a, __b); } /* vec_lvx */ static __inline__ vector signed char __ATTRS_o_ai vec_lvx(long __a, const vector signed char *__b) { return (vector signed char)__builtin_altivec_lvx(__a, __b); } static __inline__ vector signed char __ATTRS_o_ai vec_lvx(long __a, const signed char *__b) { return (vector signed char)__builtin_altivec_lvx(__a, __b); } static __inline__ vector unsigned char __ATTRS_o_ai vec_lvx(long __a, const vector unsigned char *__b) { return (vector unsigned char)__builtin_altivec_lvx(__a, __b); } static __inline__ vector unsigned char __ATTRS_o_ai vec_lvx(long __a, const unsigned char *__b) { return (vector unsigned char)__builtin_altivec_lvx(__a, __b); } static __inline__ vector bool char __ATTRS_o_ai vec_lvx(long __a, const vector bool char *__b) { return (vector bool char)__builtin_altivec_lvx(__a, __b); } static __inline__ vector short __ATTRS_o_ai vec_lvx(long __a, const vector short *__b) { return (vector short)__builtin_altivec_lvx(__a, __b); } static __inline__ vector short __ATTRS_o_ai vec_lvx(long __a, const short *__b) { return (vector short)__builtin_altivec_lvx(__a, __b); } static __inline__ vector unsigned short __ATTRS_o_ai vec_lvx(long __a, const vector unsigned short *__b) { return (vector unsigned short)__builtin_altivec_lvx(__a, __b); } static __inline__ vector unsigned short __ATTRS_o_ai vec_lvx(long __a, const unsigned short *__b) { return (vector unsigned short)__builtin_altivec_lvx(__a, __b); } static __inline__ vector bool short __ATTRS_o_ai vec_lvx(long __a, const vector bool short *__b) { return (vector bool short)__builtin_altivec_lvx(__a, __b); } static __inline__ vector pixel __ATTRS_o_ai vec_lvx(long __a, const vector pixel *__b) { return (vector pixel)__builtin_altivec_lvx(__a, __b); } static __inline__ vector int __ATTRS_o_ai vec_lvx(long __a, const vector int *__b) { return (vector int)__builtin_altivec_lvx(__a, __b); } static __inline__ vector int __ATTRS_o_ai vec_lvx(long __a, const int *__b) { return (vector int)__builtin_altivec_lvx(__a, __b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_lvx(long __a, const vector unsigned int *__b) { return (vector unsigned int)__builtin_altivec_lvx(__a, __b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_lvx(long __a, const unsigned int *__b) { return (vector unsigned int)__builtin_altivec_lvx(__a, __b); } static __inline__ vector bool int __ATTRS_o_ai vec_lvx(long __a, const vector bool int *__b) { return (vector bool int)__builtin_altivec_lvx(__a, __b); } static __inline__ vector float __ATTRS_o_ai vec_lvx(long __a, const vector float *__b) { return (vector float)__builtin_altivec_lvx(__a, __b); } static __inline__ vector float __ATTRS_o_ai vec_lvx(long __a, const float *__b) { return (vector float)__builtin_altivec_lvx(__a, __b); } /* vec_lde */ static __inline__ vector signed char __ATTRS_o_ai vec_lde(long __a, const signed char *__b) { return (vector signed char)__builtin_altivec_lvebx(__a, __b); } static __inline__ vector unsigned char __ATTRS_o_ai vec_lde(long __a, const unsigned char *__b) { return (vector unsigned char)__builtin_altivec_lvebx(__a, __b); } static __inline__ vector short __ATTRS_o_ai vec_lde(long __a, const short *__b) { return (vector short)__builtin_altivec_lvehx(__a, __b); } static __inline__ vector unsigned short __ATTRS_o_ai vec_lde(long __a, const unsigned short *__b) { return (vector unsigned short)__builtin_altivec_lvehx(__a, __b); } static __inline__ vector int __ATTRS_o_ai vec_lde(long __a, const int *__b) { return (vector int)__builtin_altivec_lvewx(__a, __b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_lde(long __a, const unsigned int *__b) { return (vector unsigned int)__builtin_altivec_lvewx(__a, __b); } static __inline__ vector float __ATTRS_o_ai vec_lde(long __a, const float *__b) { return (vector float)__builtin_altivec_lvewx(__a, __b); } /* vec_lvebx */ static __inline__ vector signed char __ATTRS_o_ai vec_lvebx(long __a, const signed char *__b) { return (vector signed char)__builtin_altivec_lvebx(__a, __b); } static __inline__ vector unsigned char __ATTRS_o_ai vec_lvebx(long __a, const unsigned char *__b) { return (vector unsigned char)__builtin_altivec_lvebx(__a, __b); } /* vec_lvehx */ static __inline__ vector short __ATTRS_o_ai vec_lvehx(long __a, const short *__b) { return (vector short)__builtin_altivec_lvehx(__a, __b); } static __inline__ vector unsigned short __ATTRS_o_ai vec_lvehx(long __a, const unsigned short *__b) { return (vector unsigned short)__builtin_altivec_lvehx(__a, __b); } /* vec_lvewx */ static __inline__ vector int __ATTRS_o_ai vec_lvewx(long __a, const int *__b) { return (vector int)__builtin_altivec_lvewx(__a, __b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_lvewx(long __a, const unsigned int *__b) { return (vector unsigned int)__builtin_altivec_lvewx(__a, __b); } static __inline__ vector float __ATTRS_o_ai vec_lvewx(long __a, const float *__b) { return (vector float)__builtin_altivec_lvewx(__a, __b); } /* vec_ldl */ static __inline__ vector signed char __ATTRS_o_ai vec_ldl(long __a, const vector signed char *__b) { return (vector signed char)__builtin_altivec_lvxl(__a, __b); } static __inline__ vector signed char __ATTRS_o_ai vec_ldl(long __a, const signed char *__b) { return (vector signed char)__builtin_altivec_lvxl(__a, __b); } static __inline__ vector unsigned char __ATTRS_o_ai vec_ldl(long __a, const vector unsigned char *__b) { return (vector unsigned char)__builtin_altivec_lvxl(__a, __b); } static __inline__ vector unsigned char __ATTRS_o_ai vec_ldl(long __a, const unsigned char *__b) { return (vector unsigned char)__builtin_altivec_lvxl(__a, __b); } static __inline__ vector bool char __ATTRS_o_ai vec_ldl(long __a, const vector bool char *__b) { return (vector bool char)__builtin_altivec_lvxl(__a, __b); } static __inline__ vector short __ATTRS_o_ai vec_ldl(long __a, const vector short *__b) { return (vector short)__builtin_altivec_lvxl(__a, __b); } static __inline__ vector short __ATTRS_o_ai vec_ldl(long __a, const short *__b) { return (vector short)__builtin_altivec_lvxl(__a, __b); } static __inline__ vector unsigned short __ATTRS_o_ai vec_ldl(long __a, const vector unsigned short *__b) { return (vector unsigned short)__builtin_altivec_lvxl(__a, __b); } static __inline__ vector unsigned short __ATTRS_o_ai vec_ldl(long __a, const unsigned short *__b) { return (vector unsigned short)__builtin_altivec_lvxl(__a, __b); } static __inline__ vector bool short __ATTRS_o_ai vec_ldl(long __a, const vector bool short *__b) { return (vector bool short)__builtin_altivec_lvxl(__a, __b); } static __inline__ vector pixel __ATTRS_o_ai vec_ldl(long __a, const vector pixel *__b) { return (vector pixel short)__builtin_altivec_lvxl(__a, __b); } static __inline__ vector int __ATTRS_o_ai vec_ldl(long __a, const vector int *__b) { return (vector int)__builtin_altivec_lvxl(__a, __b); } static __inline__ vector int __ATTRS_o_ai vec_ldl(long __a, const int *__b) { return (vector int)__builtin_altivec_lvxl(__a, __b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_ldl(long __a, const vector unsigned int *__b) { return (vector unsigned int)__builtin_altivec_lvxl(__a, __b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_ldl(long __a, const unsigned int *__b) { return (vector unsigned int)__builtin_altivec_lvxl(__a, __b); } static __inline__ vector bool int __ATTRS_o_ai vec_ldl(long __a, const vector bool int *__b) { return (vector bool int)__builtin_altivec_lvxl(__a, __b); } static __inline__ vector float __ATTRS_o_ai vec_ldl(long __a, const vector float *__b) { return (vector float)__builtin_altivec_lvxl(__a, __b); } static __inline__ vector float __ATTRS_o_ai vec_ldl(long __a, const float *__b) { return (vector float)__builtin_altivec_lvxl(__a, __b); } /* vec_lvxl */ static __inline__ vector signed char __ATTRS_o_ai vec_lvxl(long __a, const vector signed char *__b) { return (vector signed char)__builtin_altivec_lvxl(__a, __b); } static __inline__ vector signed char __ATTRS_o_ai vec_lvxl(long __a, const signed char *__b) { return (vector signed char)__builtin_altivec_lvxl(__a, __b); } static __inline__ vector unsigned char __ATTRS_o_ai vec_lvxl(long __a, const vector unsigned char *__b) { return (vector unsigned char)__builtin_altivec_lvxl(__a, __b); } static __inline__ vector unsigned char __ATTRS_o_ai vec_lvxl(long __a, const unsigned char *__b) { return (vector unsigned char)__builtin_altivec_lvxl(__a, __b); } static __inline__ vector bool char __ATTRS_o_ai vec_lvxl(long __a, const vector bool char *__b) { return (vector bool char)__builtin_altivec_lvxl(__a, __b); } static __inline__ vector short __ATTRS_o_ai vec_lvxl(long __a, const vector short *__b) { return (vector short)__builtin_altivec_lvxl(__a, __b); } static __inline__ vector short __ATTRS_o_ai vec_lvxl(long __a, const short *__b) { return (vector short)__builtin_altivec_lvxl(__a, __b); } static __inline__ vector unsigned short __ATTRS_o_ai vec_lvxl(long __a, const vector unsigned short *__b) { return (vector unsigned short)__builtin_altivec_lvxl(__a, __b); } static __inline__ vector unsigned short __ATTRS_o_ai vec_lvxl(long __a, const unsigned short *__b) { return (vector unsigned short)__builtin_altivec_lvxl(__a, __b); } static __inline__ vector bool short __ATTRS_o_ai vec_lvxl(long __a, const vector bool short *__b) { return (vector bool short)__builtin_altivec_lvxl(__a, __b); } static __inline__ vector pixel __ATTRS_o_ai vec_lvxl(long __a, const vector pixel *__b) { return (vector pixel)__builtin_altivec_lvxl(__a, __b); } static __inline__ vector int __ATTRS_o_ai vec_lvxl(long __a, const vector int *__b) { return (vector int)__builtin_altivec_lvxl(__a, __b); } static __inline__ vector int __ATTRS_o_ai vec_lvxl(long __a, const int *__b) { return (vector int)__builtin_altivec_lvxl(__a, __b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_lvxl(long __a, const vector unsigned int *__b) { return (vector unsigned int)__builtin_altivec_lvxl(__a, __b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_lvxl(long __a, const unsigned int *__b) { return (vector unsigned int)__builtin_altivec_lvxl(__a, __b); } static __inline__ vector bool int __ATTRS_o_ai vec_lvxl(long __a, const vector bool int *__b) { return (vector bool int)__builtin_altivec_lvxl(__a, __b); } static __inline__ vector float __ATTRS_o_ai vec_lvxl(long __a, const vector float *__b) { return (vector float)__builtin_altivec_lvxl(__a, __b); } static __inline__ vector float __ATTRS_o_ai vec_lvxl(long __a, const float *__b) { return (vector float)__builtin_altivec_lvxl(__a, __b); } /* vec_loge */ static __inline__ vector float __attribute__((__always_inline__)) vec_loge(vector float __a) { return __builtin_altivec_vlogefp(__a); } /* vec_vlogefp */ static __inline__ vector float __attribute__((__always_inline__)) vec_vlogefp(vector float __a) { return __builtin_altivec_vlogefp(__a); } /* vec_lvsl */ #ifdef __LITTLE_ENDIAN__ static __inline__ vector unsigned char __ATTRS_o_ai __attribute__((__deprecated__("use assignment for unaligned little endian \ loads/stores"))) vec_lvsl(int __a, const signed char *__b) { vector unsigned char mask = (vector unsigned char)__builtin_altivec_lvsl(__a, __b); vector unsigned char reverse = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}; return vec_perm(mask, mask, reverse); } #else static __inline__ vector unsigned char __ATTRS_o_ai vec_lvsl(int __a, const signed char *__b) { return (vector unsigned char)__builtin_altivec_lvsl(__a, __b); } #endif #ifdef __LITTLE_ENDIAN__ static __inline__ vector unsigned char __ATTRS_o_ai __attribute__((__deprecated__("use assignment for unaligned little endian \ loads/stores"))) vec_lvsl(int __a, const unsigned char *__b) { vector unsigned char mask = (vector unsigned char)__builtin_altivec_lvsl(__a, __b); vector unsigned char reverse = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}; return vec_perm(mask, mask, reverse); } #else static __inline__ vector unsigned char __ATTRS_o_ai vec_lvsl(int __a, const unsigned char *__b) { return (vector unsigned char)__builtin_altivec_lvsl(__a, __b); } #endif #ifdef __LITTLE_ENDIAN__ static __inline__ vector unsigned char __ATTRS_o_ai __attribute__((__deprecated__("use assignment for unaligned little endian \ loads/stores"))) vec_lvsl(int __a, const short *__b) { vector unsigned char mask = (vector unsigned char)__builtin_altivec_lvsl(__a, __b); vector unsigned char reverse = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}; return vec_perm(mask, mask, reverse); } #else static __inline__ vector unsigned char __ATTRS_o_ai vec_lvsl(int __a, const short *__b) { return (vector unsigned char)__builtin_altivec_lvsl(__a, __b); } #endif #ifdef __LITTLE_ENDIAN__ static __inline__ vector unsigned char __ATTRS_o_ai __attribute__((__deprecated__("use assignment for unaligned little endian \ loads/stores"))) vec_lvsl(int __a, const unsigned short *__b) { vector unsigned char mask = (vector unsigned char)__builtin_altivec_lvsl(__a, __b); vector unsigned char reverse = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}; return vec_perm(mask, mask, reverse); } #else static __inline__ vector unsigned char __ATTRS_o_ai vec_lvsl(int __a, const unsigned short *__b) { return (vector unsigned char)__builtin_altivec_lvsl(__a, __b); } #endif #ifdef __LITTLE_ENDIAN__ static __inline__ vector unsigned char __ATTRS_o_ai __attribute__((__deprecated__("use assignment for unaligned little endian \ loads/stores"))) vec_lvsl(int __a, const int *__b) { vector unsigned char mask = (vector unsigned char)__builtin_altivec_lvsl(__a, __b); vector unsigned char reverse = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}; return vec_perm(mask, mask, reverse); } #else static __inline__ vector unsigned char __ATTRS_o_ai vec_lvsl(int __a, const int *__b) { return (vector unsigned char)__builtin_altivec_lvsl(__a, __b); } #endif #ifdef __LITTLE_ENDIAN__ static __inline__ vector unsigned char __ATTRS_o_ai __attribute__((__deprecated__("use assignment for unaligned little endian \ loads/stores"))) vec_lvsl(int __a, const unsigned int *__b) { vector unsigned char mask = (vector unsigned char)__builtin_altivec_lvsl(__a, __b); vector unsigned char reverse = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}; return vec_perm(mask, mask, reverse); } #else static __inline__ vector unsigned char __ATTRS_o_ai vec_lvsl(int __a, const unsigned int *__b) { return (vector unsigned char)__builtin_altivec_lvsl(__a, __b); } #endif #ifdef __LITTLE_ENDIAN__ static __inline__ vector unsigned char __ATTRS_o_ai __attribute__((__deprecated__("use assignment for unaligned little endian \ loads/stores"))) vec_lvsl(int __a, const float *__b) { vector unsigned char mask = (vector unsigned char)__builtin_altivec_lvsl(__a, __b); vector unsigned char reverse = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}; return vec_perm(mask, mask, reverse); } #else static __inline__ vector unsigned char __ATTRS_o_ai vec_lvsl(int __a, const float *__b) { return (vector unsigned char)__builtin_altivec_lvsl(__a, __b); } #endif /* vec_lvsr */ #ifdef __LITTLE_ENDIAN__ static __inline__ vector unsigned char __ATTRS_o_ai __attribute__((__deprecated__("use assignment for unaligned little endian \ loads/stores"))) vec_lvsr(int __a, const signed char *__b) { vector unsigned char mask = (vector unsigned char)__builtin_altivec_lvsr(__a, __b); vector unsigned char reverse = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}; return vec_perm(mask, mask, reverse); } #else static __inline__ vector unsigned char __ATTRS_o_ai vec_lvsr(int __a, const signed char *__b) { return (vector unsigned char)__builtin_altivec_lvsr(__a, __b); } #endif #ifdef __LITTLE_ENDIAN__ static __inline__ vector unsigned char __ATTRS_o_ai __attribute__((__deprecated__("use assignment for unaligned little endian \ loads/stores"))) vec_lvsr(int __a, const unsigned char *__b) { vector unsigned char mask = (vector unsigned char)__builtin_altivec_lvsr(__a, __b); vector unsigned char reverse = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}; return vec_perm(mask, mask, reverse); } #else static __inline__ vector unsigned char __ATTRS_o_ai vec_lvsr(int __a, const unsigned char *__b) { return (vector unsigned char)__builtin_altivec_lvsr(__a, __b); } #endif #ifdef __LITTLE_ENDIAN__ static __inline__ vector unsigned char __ATTRS_o_ai __attribute__((__deprecated__("use assignment for unaligned little endian \ loads/stores"))) vec_lvsr(int __a, const short *__b) { vector unsigned char mask = (vector unsigned char)__builtin_altivec_lvsr(__a, __b); vector unsigned char reverse = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}; return vec_perm(mask, mask, reverse); } #else static __inline__ vector unsigned char __ATTRS_o_ai vec_lvsr(int __a, const short *__b) { return (vector unsigned char)__builtin_altivec_lvsr(__a, __b); } #endif #ifdef __LITTLE_ENDIAN__ static __inline__ vector unsigned char __ATTRS_o_ai __attribute__((__deprecated__("use assignment for unaligned little endian \ loads/stores"))) vec_lvsr(int __a, const unsigned short *__b) { vector unsigned char mask = (vector unsigned char)__builtin_altivec_lvsr(__a, __b); vector unsigned char reverse = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}; return vec_perm(mask, mask, reverse); } #else static __inline__ vector unsigned char __ATTRS_o_ai vec_lvsr(int __a, const unsigned short *__b) { return (vector unsigned char)__builtin_altivec_lvsr(__a, __b); } #endif #ifdef __LITTLE_ENDIAN__ static __inline__ vector unsigned char __ATTRS_o_ai __attribute__((__deprecated__("use assignment for unaligned little endian \ loads/stores"))) vec_lvsr(int __a, const int *__b) { vector unsigned char mask = (vector unsigned char)__builtin_altivec_lvsr(__a, __b); vector unsigned char reverse = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}; return vec_perm(mask, mask, reverse); } #else static __inline__ vector unsigned char __ATTRS_o_ai vec_lvsr(int __a, const int *__b) { return (vector unsigned char)__builtin_altivec_lvsr(__a, __b); } #endif #ifdef __LITTLE_ENDIAN__ static __inline__ vector unsigned char __ATTRS_o_ai __attribute__((__deprecated__("use assignment for unaligned little endian \ loads/stores"))) vec_lvsr(int __a, const unsigned int *__b) { vector unsigned char mask = (vector unsigned char)__builtin_altivec_lvsr(__a, __b); vector unsigned char reverse = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}; return vec_perm(mask, mask, reverse); } #else static __inline__ vector unsigned char __ATTRS_o_ai vec_lvsr(int __a, const unsigned int *__b) { return (vector unsigned char)__builtin_altivec_lvsr(__a, __b); } #endif #ifdef __LITTLE_ENDIAN__ static __inline__ vector unsigned char __ATTRS_o_ai __attribute__((__deprecated__("use assignment for unaligned little endian \ loads/stores"))) vec_lvsr(int __a, const float *__b) { vector unsigned char mask = (vector unsigned char)__builtin_altivec_lvsr(__a, __b); vector unsigned char reverse = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}; return vec_perm(mask, mask, reverse); } #else static __inline__ vector unsigned char __ATTRS_o_ai vec_lvsr(int __a, const float *__b) { return (vector unsigned char)__builtin_altivec_lvsr(__a, __b); } #endif /* vec_madd */ static __inline__ vector signed short __ATTRS_o_ai vec_mladd(vector signed short, vector signed short, vector signed short); static __inline__ vector signed short __ATTRS_o_ai vec_mladd(vector signed short, vector unsigned short, vector unsigned short); static __inline__ vector signed short __ATTRS_o_ai vec_mladd(vector unsigned short, vector signed short, vector signed short); static __inline__ vector unsigned short __ATTRS_o_ai vec_mladd(vector unsigned short, vector unsigned short, vector unsigned short); static __inline__ vector signed short __ATTRS_o_ai vec_madd( vector signed short __a, vector signed short __b, vector signed short __c) { return vec_mladd(__a, __b, __c); } static __inline__ vector signed short __ATTRS_o_ai vec_madd(vector signed short __a, vector unsigned short __b, vector unsigned short __c) { return vec_mladd(__a, __b, __c); } static __inline__ vector signed short __ATTRS_o_ai vec_madd(vector unsigned short __a, vector signed short __b, vector signed short __c) { return vec_mladd(__a, __b, __c); } static __inline__ vector unsigned short __ATTRS_o_ai vec_madd(vector unsigned short __a, vector unsigned short __b, vector unsigned short __c) { return vec_mladd(__a, __b, __c); } static __inline__ vector float __ATTRS_o_ai vec_madd(vector float __a, vector float __b, vector float __c) { #ifdef __VSX__ return __builtin_vsx_xvmaddasp(__a, __b, __c); #else return __builtin_altivec_vmaddfp(__a, __b, __c); #endif } #ifdef __VSX__ static __inline__ vector double __ATTRS_o_ai vec_madd(vector double __a, vector double __b, vector double __c) { return __builtin_vsx_xvmaddadp(__a, __b, __c); } #endif /* vec_vmaddfp */ static __inline__ vector float __attribute__((__always_inline__)) vec_vmaddfp(vector float __a, vector float __b, vector float __c) { return __builtin_altivec_vmaddfp(__a, __b, __c); } /* vec_madds */ static __inline__ vector signed short __attribute__((__always_inline__)) vec_madds(vector signed short __a, vector signed short __b, vector signed short __c) { return __builtin_altivec_vmhaddshs(__a, __b, __c); } /* vec_vmhaddshs */ static __inline__ vector signed short __attribute__((__always_inline__)) vec_vmhaddshs(vector signed short __a, vector signed short __b, vector signed short __c) { return __builtin_altivec_vmhaddshs(__a, __b, __c); } /* vec_msub */ #ifdef __VSX__ static __inline__ vector float __ATTRS_o_ai vec_msub(vector float __a, vector float __b, vector float __c) { return __builtin_vsx_xvmsubasp(__a, __b, __c); } static __inline__ vector double __ATTRS_o_ai vec_msub(vector double __a, vector double __b, vector double __c) { return __builtin_vsx_xvmsubadp(__a, __b, __c); } #endif /* vec_max */ static __inline__ vector signed char __ATTRS_o_ai vec_max(vector signed char __a, vector signed char __b) { return __builtin_altivec_vmaxsb(__a, __b); } static __inline__ vector signed char __ATTRS_o_ai vec_max(vector bool char __a, vector signed char __b) { return __builtin_altivec_vmaxsb((vector signed char)__a, __b); } static __inline__ vector signed char __ATTRS_o_ai vec_max(vector signed char __a, vector bool char __b) { return __builtin_altivec_vmaxsb(__a, (vector signed char)__b); } static __inline__ vector unsigned char __ATTRS_o_ai vec_max(vector unsigned char __a, vector unsigned char __b) { return __builtin_altivec_vmaxub(__a, __b); } static __inline__ vector unsigned char __ATTRS_o_ai vec_max(vector bool char __a, vector unsigned char __b) { return __builtin_altivec_vmaxub((vector unsigned char)__a, __b); } static __inline__ vector unsigned char __ATTRS_o_ai vec_max(vector unsigned char __a, vector bool char __b) { return __builtin_altivec_vmaxub(__a, (vector unsigned char)__b); } static __inline__ vector short __ATTRS_o_ai vec_max(vector short __a, vector short __b) { return __builtin_altivec_vmaxsh(__a, __b); } static __inline__ vector short __ATTRS_o_ai vec_max(vector bool short __a, vector short __b) { return __builtin_altivec_vmaxsh((vector short)__a, __b); } static __inline__ vector short __ATTRS_o_ai vec_max(vector short __a, vector bool short __b) { return __builtin_altivec_vmaxsh(__a, (vector short)__b); } static __inline__ vector unsigned short __ATTRS_o_ai vec_max(vector unsigned short __a, vector unsigned short __b) { return __builtin_altivec_vmaxuh(__a, __b); } static __inline__ vector unsigned short __ATTRS_o_ai vec_max(vector bool short __a, vector unsigned short __b) { return __builtin_altivec_vmaxuh((vector unsigned short)__a, __b); } static __inline__ vector unsigned short __ATTRS_o_ai vec_max(vector unsigned short __a, vector bool short __b) { return __builtin_altivec_vmaxuh(__a, (vector unsigned short)__b); } static __inline__ vector int __ATTRS_o_ai vec_max(vector int __a, vector int __b) { return __builtin_altivec_vmaxsw(__a, __b); } static __inline__ vector int __ATTRS_o_ai vec_max(vector bool int __a, vector int __b) { return __builtin_altivec_vmaxsw((vector int)__a, __b); } static __inline__ vector int __ATTRS_o_ai vec_max(vector int __a, vector bool int __b) { return __builtin_altivec_vmaxsw(__a, (vector int)__b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_max(vector unsigned int __a, vector unsigned int __b) { return __builtin_altivec_vmaxuw(__a, __b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_max(vector bool int __a, vector unsigned int __b) { return __builtin_altivec_vmaxuw((vector unsigned int)__a, __b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_max(vector unsigned int __a, vector bool int __b) { return __builtin_altivec_vmaxuw(__a, (vector unsigned int)__b); } #ifdef __POWER8_VECTOR__ static __inline__ vector signed long long __ATTRS_o_ai vec_max(vector signed long long __a, vector signed long long __b) { return __builtin_altivec_vmaxsd(__a, __b); } static __inline__ vector signed long long __ATTRS_o_ai vec_max(vector bool long long __a, vector signed long long __b) { return __builtin_altivec_vmaxsd((vector signed long long)__a, __b); } static __inline__ vector signed long long __ATTRS_o_ai vec_max(vector signed long long __a, vector bool long long __b) { return __builtin_altivec_vmaxsd(__a, (vector signed long long)__b); } static __inline__ vector unsigned long long __ATTRS_o_ai vec_max(vector unsigned long long __a, vector unsigned long long __b) { return __builtin_altivec_vmaxud(__a, __b); } static __inline__ vector unsigned long long __ATTRS_o_ai vec_max(vector bool long long __a, vector unsigned long long __b) { return __builtin_altivec_vmaxud((vector unsigned long long)__a, __b); } static __inline__ vector unsigned long long __ATTRS_o_ai vec_max(vector unsigned long long __a, vector bool long long __b) { return __builtin_altivec_vmaxud(__a, (vector unsigned long long)__b); } #endif static __inline__ vector float __ATTRS_o_ai vec_max(vector float __a, vector float __b) { #ifdef __VSX__ return __builtin_vsx_xvmaxsp(__a, __b); #else return __builtin_altivec_vmaxfp(__a, __b); #endif } #ifdef __VSX__ static __inline__ vector double __ATTRS_o_ai vec_max(vector double __a, vector double __b) { return __builtin_vsx_xvmaxdp(__a, __b); } #endif /* vec_vmaxsb */ static __inline__ vector signed char __ATTRS_o_ai vec_vmaxsb(vector signed char __a, vector signed char __b) { return __builtin_altivec_vmaxsb(__a, __b); } static __inline__ vector signed char __ATTRS_o_ai vec_vmaxsb(vector bool char __a, vector signed char __b) { return __builtin_altivec_vmaxsb((vector signed char)__a, __b); } static __inline__ vector signed char __ATTRS_o_ai vec_vmaxsb(vector signed char __a, vector bool char __b) { return __builtin_altivec_vmaxsb(__a, (vector signed char)__b); } /* vec_vmaxub */ static __inline__ vector unsigned char __ATTRS_o_ai vec_vmaxub(vector unsigned char __a, vector unsigned char __b) { return __builtin_altivec_vmaxub(__a, __b); } static __inline__ vector unsigned char __ATTRS_o_ai vec_vmaxub(vector bool char __a, vector unsigned char __b) { return __builtin_altivec_vmaxub((vector unsigned char)__a, __b); } static __inline__ vector unsigned char __ATTRS_o_ai vec_vmaxub(vector unsigned char __a, vector bool char __b) { return __builtin_altivec_vmaxub(__a, (vector unsigned char)__b); } /* vec_vmaxsh */ static __inline__ vector short __ATTRS_o_ai vec_vmaxsh(vector short __a, vector short __b) { return __builtin_altivec_vmaxsh(__a, __b); } static __inline__ vector short __ATTRS_o_ai vec_vmaxsh(vector bool short __a, vector short __b) { return __builtin_altivec_vmaxsh((vector short)__a, __b); } static __inline__ vector short __ATTRS_o_ai vec_vmaxsh(vector short __a, vector bool short __b) { return __builtin_altivec_vmaxsh(__a, (vector short)__b); } /* vec_vmaxuh */ static __inline__ vector unsigned short __ATTRS_o_ai vec_vmaxuh(vector unsigned short __a, vector unsigned short __b) { return __builtin_altivec_vmaxuh(__a, __b); } static __inline__ vector unsigned short __ATTRS_o_ai vec_vmaxuh(vector bool short __a, vector unsigned short __b) { return __builtin_altivec_vmaxuh((vector unsigned short)__a, __b); } static __inline__ vector unsigned short __ATTRS_o_ai vec_vmaxuh(vector unsigned short __a, vector bool short __b) { return __builtin_altivec_vmaxuh(__a, (vector unsigned short)__b); } /* vec_vmaxsw */ static __inline__ vector int __ATTRS_o_ai vec_vmaxsw(vector int __a, vector int __b) { return __builtin_altivec_vmaxsw(__a, __b); } static __inline__ vector int __ATTRS_o_ai vec_vmaxsw(vector bool int __a, vector int __b) { return __builtin_altivec_vmaxsw((vector int)__a, __b); } static __inline__ vector int __ATTRS_o_ai vec_vmaxsw(vector int __a, vector bool int __b) { return __builtin_altivec_vmaxsw(__a, (vector int)__b); } /* vec_vmaxuw */ static __inline__ vector unsigned int __ATTRS_o_ai vec_vmaxuw(vector unsigned int __a, vector unsigned int __b) { return __builtin_altivec_vmaxuw(__a, __b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_vmaxuw(vector bool int __a, vector unsigned int __b) { return __builtin_altivec_vmaxuw((vector unsigned int)__a, __b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_vmaxuw(vector unsigned int __a, vector bool int __b) { return __builtin_altivec_vmaxuw(__a, (vector unsigned int)__b); } /* vec_vmaxfp */ static __inline__ vector float __attribute__((__always_inline__)) vec_vmaxfp(vector float __a, vector float __b) { #ifdef __VSX__ return __builtin_vsx_xvmaxsp(__a, __b); #else return __builtin_altivec_vmaxfp(__a, __b); #endif } /* vec_mergeh */ static __inline__ vector signed char __ATTRS_o_ai vec_mergeh(vector signed char __a, vector signed char __b) { return vec_perm(__a, __b, (vector unsigned char)(0x00, 0x10, 0x01, 0x11, 0x02, 0x12, 0x03, 0x13, 0x04, 0x14, 0x05, 0x15, 0x06, 0x16, 0x07, 0x17)); } static __inline__ vector unsigned char __ATTRS_o_ai vec_mergeh(vector unsigned char __a, vector unsigned char __b) { return vec_perm(__a, __b, (vector unsigned char)(0x00, 0x10, 0x01, 0x11, 0x02, 0x12, 0x03, 0x13, 0x04, 0x14, 0x05, 0x15, 0x06, 0x16, 0x07, 0x17)); } static __inline__ vector bool char __ATTRS_o_ai vec_mergeh(vector bool char __a, vector bool char __b) { return vec_perm(__a, __b, (vector unsigned char)(0x00, 0x10, 0x01, 0x11, 0x02, 0x12, 0x03, 0x13, 0x04, 0x14, 0x05, 0x15, 0x06, 0x16, 0x07, 0x17)); } static __inline__ vector short __ATTRS_o_ai vec_mergeh(vector short __a, vector short __b) { return vec_perm(__a, __b, (vector unsigned char)(0x00, 0x01, 0x10, 0x11, 0x02, 0x03, 0x12, 0x13, 0x04, 0x05, 0x14, 0x15, 0x06, 0x07, 0x16, 0x17)); } static __inline__ vector unsigned short __ATTRS_o_ai vec_mergeh(vector unsigned short __a, vector unsigned short __b) { return vec_perm(__a, __b, (vector unsigned char)(0x00, 0x01, 0x10, 0x11, 0x02, 0x03, 0x12, 0x13, 0x04, 0x05, 0x14, 0x15, 0x06, 0x07, 0x16, 0x17)); } static __inline__ vector bool short __ATTRS_o_ai vec_mergeh(vector bool short __a, vector bool short __b) { return vec_perm(__a, __b, (vector unsigned char)(0x00, 0x01, 0x10, 0x11, 0x02, 0x03, 0x12, 0x13, 0x04, 0x05, 0x14, 0x15, 0x06, 0x07, 0x16, 0x17)); } static __inline__ vector pixel __ATTRS_o_ai vec_mergeh(vector pixel __a, vector pixel __b) { return vec_perm(__a, __b, (vector unsigned char)(0x00, 0x01, 0x10, 0x11, 0x02, 0x03, 0x12, 0x13, 0x04, 0x05, 0x14, 0x15, 0x06, 0x07, 0x16, 0x17)); } static __inline__ vector int __ATTRS_o_ai vec_mergeh(vector int __a, vector int __b) { return vec_perm(__a, __b, (vector unsigned char)(0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, 0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17)); } static __inline__ vector unsigned int __ATTRS_o_ai vec_mergeh(vector unsigned int __a, vector unsigned int __b) { return vec_perm(__a, __b, (vector unsigned char)(0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, 0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17)); } static __inline__ vector bool int __ATTRS_o_ai vec_mergeh(vector bool int __a, vector bool int __b) { return vec_perm(__a, __b, (vector unsigned char)(0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, 0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17)); } static __inline__ vector float __ATTRS_o_ai vec_mergeh(vector float __a, vector float __b) { return vec_perm(__a, __b, (vector unsigned char)(0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, 0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17)); } #ifdef __VSX__ static __inline__ vector signed long long __ATTRS_o_ai vec_mergeh(vector signed long long __a, vector signed long long __b) { return vec_perm(__a, __b, (vector unsigned char)(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17)); } static __inline__ vector signed long long __ATTRS_o_ai vec_mergeh(vector signed long long __a, vector bool long long __b) { return vec_perm(__a, (vector signed long long)__b, (vector unsigned char)(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17)); } static __inline__ vector signed long long __ATTRS_o_ai vec_mergeh(vector bool long long __a, vector signed long long __b) { return vec_perm((vector signed long long)__a, __b, (vector unsigned char)(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17)); } static __inline__ vector unsigned long long __ATTRS_o_ai vec_mergeh(vector unsigned long long __a, vector unsigned long long __b) { return vec_perm(__a, __b, (vector unsigned char)(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17)); } static __inline__ vector unsigned long long __ATTRS_o_ai vec_mergeh(vector unsigned long long __a, vector bool long long __b) { return vec_perm(__a, (vector unsigned long long)__b, (vector unsigned char)(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17)); } static __inline__ vector unsigned long long __ATTRS_o_ai vec_mergeh(vector bool long long __a, vector unsigned long long __b) { return vec_perm((vector unsigned long long)__a, __b, (vector unsigned char)(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17)); } static __inline__ vector bool long long __ATTRS_o_ai vec_mergeh(vector bool long long __a, vector bool long long __b) { return vec_perm(__a, __b, (vector unsigned char)(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17)); } static __inline__ vector double __ATTRS_o_ai vec_mergeh(vector double __a, vector double __b) { return vec_perm(__a, __b, (vector unsigned char)(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17)); } static __inline__ vector double __ATTRS_o_ai vec_mergeh(vector double __a, vector bool long long __b) { return vec_perm(__a, (vector double)__b, (vector unsigned char)(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17)); } static __inline__ vector double __ATTRS_o_ai vec_mergeh(vector bool long long __a, vector double __b) { return vec_perm((vector double)__a, __b, (vector unsigned char)(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17)); } #endif /* vec_vmrghb */ #define __builtin_altivec_vmrghb vec_vmrghb static __inline__ vector signed char __ATTRS_o_ai vec_vmrghb(vector signed char __a, vector signed char __b) { return vec_perm(__a, __b, (vector unsigned char)(0x00, 0x10, 0x01, 0x11, 0x02, 0x12, 0x03, 0x13, 0x04, 0x14, 0x05, 0x15, 0x06, 0x16, 0x07, 0x17)); } static __inline__ vector unsigned char __ATTRS_o_ai vec_vmrghb(vector unsigned char __a, vector unsigned char __b) { return vec_perm(__a, __b, (vector unsigned char)(0x00, 0x10, 0x01, 0x11, 0x02, 0x12, 0x03, 0x13, 0x04, 0x14, 0x05, 0x15, 0x06, 0x16, 0x07, 0x17)); } static __inline__ vector bool char __ATTRS_o_ai vec_vmrghb(vector bool char __a, vector bool char __b) { return vec_perm(__a, __b, (vector unsigned char)(0x00, 0x10, 0x01, 0x11, 0x02, 0x12, 0x03, 0x13, 0x04, 0x14, 0x05, 0x15, 0x06, 0x16, 0x07, 0x17)); } /* vec_vmrghh */ #define __builtin_altivec_vmrghh vec_vmrghh static __inline__ vector short __ATTRS_o_ai vec_vmrghh(vector short __a, vector short __b) { return vec_perm(__a, __b, (vector unsigned char)(0x00, 0x01, 0x10, 0x11, 0x02, 0x03, 0x12, 0x13, 0x04, 0x05, 0x14, 0x15, 0x06, 0x07, 0x16, 0x17)); } static __inline__ vector unsigned short __ATTRS_o_ai vec_vmrghh(vector unsigned short __a, vector unsigned short __b) { return vec_perm(__a, __b, (vector unsigned char)(0x00, 0x01, 0x10, 0x11, 0x02, 0x03, 0x12, 0x13, 0x04, 0x05, 0x14, 0x15, 0x06, 0x07, 0x16, 0x17)); } static __inline__ vector bool short __ATTRS_o_ai vec_vmrghh(vector bool short __a, vector bool short __b) { return vec_perm(__a, __b, (vector unsigned char)(0x00, 0x01, 0x10, 0x11, 0x02, 0x03, 0x12, 0x13, 0x04, 0x05, 0x14, 0x15, 0x06, 0x07, 0x16, 0x17)); } static __inline__ vector pixel __ATTRS_o_ai vec_vmrghh(vector pixel __a, vector pixel __b) { return vec_perm(__a, __b, (vector unsigned char)(0x00, 0x01, 0x10, 0x11, 0x02, 0x03, 0x12, 0x13, 0x04, 0x05, 0x14, 0x15, 0x06, 0x07, 0x16, 0x17)); } /* vec_vmrghw */ #define __builtin_altivec_vmrghw vec_vmrghw static __inline__ vector int __ATTRS_o_ai vec_vmrghw(vector int __a, vector int __b) { return vec_perm(__a, __b, (vector unsigned char)(0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, 0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17)); } static __inline__ vector unsigned int __ATTRS_o_ai vec_vmrghw(vector unsigned int __a, vector unsigned int __b) { return vec_perm(__a, __b, (vector unsigned char)(0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, 0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17)); } static __inline__ vector bool int __ATTRS_o_ai vec_vmrghw(vector bool int __a, vector bool int __b) { return vec_perm(__a, __b, (vector unsigned char)(0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, 0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17)); } static __inline__ vector float __ATTRS_o_ai vec_vmrghw(vector float __a, vector float __b) { return vec_perm(__a, __b, (vector unsigned char)(0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, 0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17)); } /* vec_mergel */ static __inline__ vector signed char __ATTRS_o_ai vec_mergel(vector signed char __a, vector signed char __b) { return vec_perm(__a, __b, (vector unsigned char)(0x08, 0x18, 0x09, 0x19, 0x0A, 0x1A, 0x0B, 0x1B, 0x0C, 0x1C, 0x0D, 0x1D, 0x0E, 0x1E, 0x0F, 0x1F)); } static __inline__ vector unsigned char __ATTRS_o_ai vec_mergel(vector unsigned char __a, vector unsigned char __b) { return vec_perm(__a, __b, (vector unsigned char)(0x08, 0x18, 0x09, 0x19, 0x0A, 0x1A, 0x0B, 0x1B, 0x0C, 0x1C, 0x0D, 0x1D, 0x0E, 0x1E, 0x0F, 0x1F)); } static __inline__ vector bool char __ATTRS_o_ai vec_mergel(vector bool char __a, vector bool char __b) { return vec_perm(__a, __b, (vector unsigned char)(0x08, 0x18, 0x09, 0x19, 0x0A, 0x1A, 0x0B, 0x1B, 0x0C, 0x1C, 0x0D, 0x1D, 0x0E, 0x1E, 0x0F, 0x1F)); } static __inline__ vector short __ATTRS_o_ai vec_mergel(vector short __a, vector short __b) { return vec_perm(__a, __b, (vector unsigned char)(0x08, 0x09, 0x18, 0x19, 0x0A, 0x0B, 0x1A, 0x1B, 0x0C, 0x0D, 0x1C, 0x1D, 0x0E, 0x0F, 0x1E, 0x1F)); } static __inline__ vector unsigned short __ATTRS_o_ai vec_mergel(vector unsigned short __a, vector unsigned short __b) { return vec_perm(__a, __b, (vector unsigned char)(0x08, 0x09, 0x18, 0x19, 0x0A, 0x0B, 0x1A, 0x1B, 0x0C, 0x0D, 0x1C, 0x1D, 0x0E, 0x0F, 0x1E, 0x1F)); } static __inline__ vector bool short __ATTRS_o_ai vec_mergel(vector bool short __a, vector bool short __b) { return vec_perm(__a, __b, (vector unsigned char)(0x08, 0x09, 0x18, 0x19, 0x0A, 0x0B, 0x1A, 0x1B, 0x0C, 0x0D, 0x1C, 0x1D, 0x0E, 0x0F, 0x1E, 0x1F)); } static __inline__ vector pixel __ATTRS_o_ai vec_mergel(vector pixel __a, vector pixel __b) { return vec_perm(__a, __b, (vector unsigned char)(0x08, 0x09, 0x18, 0x19, 0x0A, 0x0B, 0x1A, 0x1B, 0x0C, 0x0D, 0x1C, 0x1D, 0x0E, 0x0F, 0x1E, 0x1F)); } static __inline__ vector int __ATTRS_o_ai vec_mergel(vector int __a, vector int __b) { return vec_perm(__a, __b, (vector unsigned char)(0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B, 0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F)); } static __inline__ vector unsigned int __ATTRS_o_ai vec_mergel(vector unsigned int __a, vector unsigned int __b) { return vec_perm(__a, __b, (vector unsigned char)(0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B, 0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F)); } static __inline__ vector bool int __ATTRS_o_ai vec_mergel(vector bool int __a, vector bool int __b) { return vec_perm(__a, __b, (vector unsigned char)(0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B, 0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F)); } static __inline__ vector float __ATTRS_o_ai vec_mergel(vector float __a, vector float __b) { return vec_perm(__a, __b, (vector unsigned char)(0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B, 0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F)); } #ifdef __VSX__ static __inline__ vector signed long long __ATTRS_o_ai vec_mergel(vector signed long long __a, vector signed long long __b) { return vec_perm(__a, __b, (vector unsigned char)(0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x18, 0X19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F)); } static __inline__ vector signed long long __ATTRS_o_ai vec_mergel(vector signed long long __a, vector bool long long __b) { return vec_perm(__a, (vector signed long long)__b, (vector unsigned char)(0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x18, 0X19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F)); } static __inline__ vector signed long long __ATTRS_o_ai vec_mergel(vector bool long long __a, vector signed long long __b) { return vec_perm((vector signed long long)__a, __b, (vector unsigned char)(0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x18, 0X19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F)); } static __inline__ vector unsigned long long __ATTRS_o_ai vec_mergel(vector unsigned long long __a, vector unsigned long long __b) { return vec_perm(__a, __b, (vector unsigned char)(0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x18, 0X19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F)); } static __inline__ vector unsigned long long __ATTRS_o_ai vec_mergel(vector unsigned long long __a, vector bool long long __b) { return vec_perm(__a, (vector unsigned long long)__b, (vector unsigned char)(0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x18, 0X19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F)); } static __inline__ vector unsigned long long __ATTRS_o_ai vec_mergel(vector bool long long __a, vector unsigned long long __b) { return vec_perm((vector unsigned long long)__a, __b, (vector unsigned char)(0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x18, 0X19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F)); } static __inline__ vector bool long long __ATTRS_o_ai vec_mergel(vector bool long long __a, vector bool long long __b) { return vec_perm(__a, __b, (vector unsigned char)(0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x18, 0X19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F)); } static __inline__ vector double __ATTRS_o_ai vec_mergel(vector double __a, vector double __b) { return vec_perm(__a, __b, (vector unsigned char)(0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x18, 0X19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F)); } static __inline__ vector double __ATTRS_o_ai vec_mergel(vector double __a, vector bool long long __b) { return vec_perm(__a, (vector double)__b, (vector unsigned char)(0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x18, 0X19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F)); } static __inline__ vector double __ATTRS_o_ai vec_mergel(vector bool long long __a, vector double __b) { return vec_perm((vector double)__a, __b, (vector unsigned char)(0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x18, 0X19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F)); } #endif /* vec_vmrglb */ #define __builtin_altivec_vmrglb vec_vmrglb static __inline__ vector signed char __ATTRS_o_ai vec_vmrglb(vector signed char __a, vector signed char __b) { return vec_perm(__a, __b, (vector unsigned char)(0x08, 0x18, 0x09, 0x19, 0x0A, 0x1A, 0x0B, 0x1B, 0x0C, 0x1C, 0x0D, 0x1D, 0x0E, 0x1E, 0x0F, 0x1F)); } static __inline__ vector unsigned char __ATTRS_o_ai vec_vmrglb(vector unsigned char __a, vector unsigned char __b) { return vec_perm(__a, __b, (vector unsigned char)(0x08, 0x18, 0x09, 0x19, 0x0A, 0x1A, 0x0B, 0x1B, 0x0C, 0x1C, 0x0D, 0x1D, 0x0E, 0x1E, 0x0F, 0x1F)); } static __inline__ vector bool char __ATTRS_o_ai vec_vmrglb(vector bool char __a, vector bool char __b) { return vec_perm(__a, __b, (vector unsigned char)(0x08, 0x18, 0x09, 0x19, 0x0A, 0x1A, 0x0B, 0x1B, 0x0C, 0x1C, 0x0D, 0x1D, 0x0E, 0x1E, 0x0F, 0x1F)); } /* vec_vmrglh */ #define __builtin_altivec_vmrglh vec_vmrglh static __inline__ vector short __ATTRS_o_ai vec_vmrglh(vector short __a, vector short __b) { return vec_perm(__a, __b, (vector unsigned char)(0x08, 0x09, 0x18, 0x19, 0x0A, 0x0B, 0x1A, 0x1B, 0x0C, 0x0D, 0x1C, 0x1D, 0x0E, 0x0F, 0x1E, 0x1F)); } static __inline__ vector unsigned short __ATTRS_o_ai vec_vmrglh(vector unsigned short __a, vector unsigned short __b) { return vec_perm(__a, __b, (vector unsigned char)(0x08, 0x09, 0x18, 0x19, 0x0A, 0x0B, 0x1A, 0x1B, 0x0C, 0x0D, 0x1C, 0x1D, 0x0E, 0x0F, 0x1E, 0x1F)); } static __inline__ vector bool short __ATTRS_o_ai vec_vmrglh(vector bool short __a, vector bool short __b) { return vec_perm(__a, __b, (vector unsigned char)(0x08, 0x09, 0x18, 0x19, 0x0A, 0x0B, 0x1A, 0x1B, 0x0C, 0x0D, 0x1C, 0x1D, 0x0E, 0x0F, 0x1E, 0x1F)); } static __inline__ vector pixel __ATTRS_o_ai vec_vmrglh(vector pixel __a, vector pixel __b) { return vec_perm(__a, __b, (vector unsigned char)(0x08, 0x09, 0x18, 0x19, 0x0A, 0x0B, 0x1A, 0x1B, 0x0C, 0x0D, 0x1C, 0x1D, 0x0E, 0x0F, 0x1E, 0x1F)); } /* vec_vmrglw */ #define __builtin_altivec_vmrglw vec_vmrglw static __inline__ vector int __ATTRS_o_ai vec_vmrglw(vector int __a, vector int __b) { return vec_perm(__a, __b, (vector unsigned char)(0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B, 0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F)); } static __inline__ vector unsigned int __ATTRS_o_ai vec_vmrglw(vector unsigned int __a, vector unsigned int __b) { return vec_perm(__a, __b, (vector unsigned char)(0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B, 0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F)); } static __inline__ vector bool int __ATTRS_o_ai vec_vmrglw(vector bool int __a, vector bool int __b) { return vec_perm(__a, __b, (vector unsigned char)(0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B, 0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F)); } static __inline__ vector float __ATTRS_o_ai vec_vmrglw(vector float __a, vector float __b) { return vec_perm(__a, __b, (vector unsigned char)(0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B, 0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F)); } #ifdef __POWER8_VECTOR__ /* vec_mergee */ static __inline__ vector bool int __ATTRS_o_ai vec_mergee(vector bool int __a, vector bool int __b) { return vec_perm(__a, __b, (vector unsigned char)(0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, 0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B)); } static __inline__ vector signed int __ATTRS_o_ai vec_mergee(vector signed int __a, vector signed int __b) { return vec_perm(__a, __b, (vector unsigned char)(0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, 0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B)); } static __inline__ vector unsigned int __ATTRS_o_ai vec_mergee(vector unsigned int __a, vector unsigned int __b) { return vec_perm(__a, __b, (vector unsigned char)(0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, 0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B)); } static __inline__ vector bool long long __ATTRS_o_ai vec_mergee(vector bool long long __a, vector bool long long __b) { return vec_mergeh(__a, __b); } static __inline__ vector signed long long __ATTRS_o_ai vec_mergee(vector signed long long __a, vector signed long long __b) { return vec_mergeh(__a, __b); } static __inline__ vector unsigned long long __ATTRS_o_ai vec_mergee(vector unsigned long long __a, vector unsigned long long __b) { return vec_mergeh(__a, __b); } static __inline__ vector float __ATTRS_o_ai vec_mergee(vector float __a, vector float __b) { return vec_perm(__a, __b, (vector unsigned char)(0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, 0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B)); } static __inline__ vector double __ATTRS_o_ai vec_mergee(vector double __a, vector double __b) { return vec_mergeh(__a, __b); } /* vec_mergeo */ static __inline__ vector bool int __ATTRS_o_ai vec_mergeo(vector bool int __a, vector bool int __b) { return vec_perm(__a, __b, (vector unsigned char)(0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17, 0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F)); } static __inline__ vector signed int __ATTRS_o_ai vec_mergeo(vector signed int __a, vector signed int __b) { return vec_perm(__a, __b, (vector unsigned char)(0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17, 0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F)); } static __inline__ vector unsigned int __ATTRS_o_ai vec_mergeo(vector unsigned int __a, vector unsigned int __b) { return vec_perm(__a, __b, (vector unsigned char)(0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17, 0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F)); } static __inline__ vector bool long long __ATTRS_o_ai vec_mergeo(vector bool long long __a, vector bool long long __b) { return vec_mergel(__a, __b); } static __inline__ vector signed long long __ATTRS_o_ai vec_mergeo(vector signed long long __a, vector signed long long __b) { return vec_mergel(__a, __b); } static __inline__ vector unsigned long long __ATTRS_o_ai vec_mergeo(vector unsigned long long __a, vector unsigned long long __b) { return vec_mergel(__a, __b); } static __inline__ vector float __ATTRS_o_ai vec_mergeo(vector float __a, vector float __b) { return vec_perm(__a, __b, (vector unsigned char)(0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17, 0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F)); } static __inline__ vector double __ATTRS_o_ai vec_mergeo(vector double __a, vector double __b) { return vec_mergel(__a, __b); } #endif /* vec_mfvscr */ static __inline__ vector unsigned short __attribute__((__always_inline__)) vec_mfvscr(void) { return __builtin_altivec_mfvscr(); } /* vec_min */ static __inline__ vector signed char __ATTRS_o_ai vec_min(vector signed char __a, vector signed char __b) { return __builtin_altivec_vminsb(__a, __b); } static __inline__ vector signed char __ATTRS_o_ai vec_min(vector bool char __a, vector signed char __b) { return __builtin_altivec_vminsb((vector signed char)__a, __b); } static __inline__ vector signed char __ATTRS_o_ai vec_min(vector signed char __a, vector bool char __b) { return __builtin_altivec_vminsb(__a, (vector signed char)__b); } static __inline__ vector unsigned char __ATTRS_o_ai vec_min(vector unsigned char __a, vector unsigned char __b) { return __builtin_altivec_vminub(__a, __b); } static __inline__ vector unsigned char __ATTRS_o_ai vec_min(vector bool char __a, vector unsigned char __b) { return __builtin_altivec_vminub((vector unsigned char)__a, __b); } static __inline__ vector unsigned char __ATTRS_o_ai vec_min(vector unsigned char __a, vector bool char __b) { return __builtin_altivec_vminub(__a, (vector unsigned char)__b); } static __inline__ vector short __ATTRS_o_ai vec_min(vector short __a, vector short __b) { return __builtin_altivec_vminsh(__a, __b); } static __inline__ vector short __ATTRS_o_ai vec_min(vector bool short __a, vector short __b) { return __builtin_altivec_vminsh((vector short)__a, __b); } static __inline__ vector short __ATTRS_o_ai vec_min(vector short __a, vector bool short __b) { return __builtin_altivec_vminsh(__a, (vector short)__b); } static __inline__ vector unsigned short __ATTRS_o_ai vec_min(vector unsigned short __a, vector unsigned short __b) { return __builtin_altivec_vminuh(__a, __b); } static __inline__ vector unsigned short __ATTRS_o_ai vec_min(vector bool short __a, vector unsigned short __b) { return __builtin_altivec_vminuh((vector unsigned short)__a, __b); } static __inline__ vector unsigned short __ATTRS_o_ai vec_min(vector unsigned short __a, vector bool short __b) { return __builtin_altivec_vminuh(__a, (vector unsigned short)__b); } static __inline__ vector int __ATTRS_o_ai vec_min(vector int __a, vector int __b) { return __builtin_altivec_vminsw(__a, __b); } static __inline__ vector int __ATTRS_o_ai vec_min(vector bool int __a, vector int __b) { return __builtin_altivec_vminsw((vector int)__a, __b); } static __inline__ vector int __ATTRS_o_ai vec_min(vector int __a, vector bool int __b) { return __builtin_altivec_vminsw(__a, (vector int)__b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_min(vector unsigned int __a, vector unsigned int __b) { return __builtin_altivec_vminuw(__a, __b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_min(vector bool int __a, vector unsigned int __b) { return __builtin_altivec_vminuw((vector unsigned int)__a, __b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_min(vector unsigned int __a, vector bool int __b) { return __builtin_altivec_vminuw(__a, (vector unsigned int)__b); } #ifdef __POWER8_VECTOR__ static __inline__ vector signed long long __ATTRS_o_ai vec_min(vector signed long long __a, vector signed long long __b) { return __builtin_altivec_vminsd(__a, __b); } static __inline__ vector signed long long __ATTRS_o_ai vec_min(vector bool long long __a, vector signed long long __b) { return __builtin_altivec_vminsd((vector signed long long)__a, __b); } static __inline__ vector signed long long __ATTRS_o_ai vec_min(vector signed long long __a, vector bool long long __b) { return __builtin_altivec_vminsd(__a, (vector signed long long)__b); } static __inline__ vector unsigned long long __ATTRS_o_ai vec_min(vector unsigned long long __a, vector unsigned long long __b) { return __builtin_altivec_vminud(__a, __b); } static __inline__ vector unsigned long long __ATTRS_o_ai vec_min(vector bool long long __a, vector unsigned long long __b) { return __builtin_altivec_vminud((vector unsigned long long)__a, __b); } static __inline__ vector unsigned long long __ATTRS_o_ai vec_min(vector unsigned long long __a, vector bool long long __b) { return __builtin_altivec_vminud(__a, (vector unsigned long long)__b); } #endif static __inline__ vector float __ATTRS_o_ai vec_min(vector float __a, vector float __b) { #ifdef __VSX__ return __builtin_vsx_xvminsp(__a, __b); #else return __builtin_altivec_vminfp(__a, __b); #endif } #ifdef __VSX__ static __inline__ vector double __ATTRS_o_ai vec_min(vector double __a, vector double __b) { return __builtin_vsx_xvmindp(__a, __b); } #endif /* vec_vminsb */ static __inline__ vector signed char __ATTRS_o_ai vec_vminsb(vector signed char __a, vector signed char __b) { return __builtin_altivec_vminsb(__a, __b); } static __inline__ vector signed char __ATTRS_o_ai vec_vminsb(vector bool char __a, vector signed char __b) { return __builtin_altivec_vminsb((vector signed char)__a, __b); } static __inline__ vector signed char __ATTRS_o_ai vec_vminsb(vector signed char __a, vector bool char __b) { return __builtin_altivec_vminsb(__a, (vector signed char)__b); } /* vec_vminub */ static __inline__ vector unsigned char __ATTRS_o_ai vec_vminub(vector unsigned char __a, vector unsigned char __b) { return __builtin_altivec_vminub(__a, __b); } static __inline__ vector unsigned char __ATTRS_o_ai vec_vminub(vector bool char __a, vector unsigned char __b) { return __builtin_altivec_vminub((vector unsigned char)__a, __b); } static __inline__ vector unsigned char __ATTRS_o_ai vec_vminub(vector unsigned char __a, vector bool char __b) { return __builtin_altivec_vminub(__a, (vector unsigned char)__b); } /* vec_vminsh */ static __inline__ vector short __ATTRS_o_ai vec_vminsh(vector short __a, vector short __b) { return __builtin_altivec_vminsh(__a, __b); } static __inline__ vector short __ATTRS_o_ai vec_vminsh(vector bool short __a, vector short __b) { return __builtin_altivec_vminsh((vector short)__a, __b); } static __inline__ vector short __ATTRS_o_ai vec_vminsh(vector short __a, vector bool short __b) { return __builtin_altivec_vminsh(__a, (vector short)__b); } /* vec_vminuh */ static __inline__ vector unsigned short __ATTRS_o_ai vec_vminuh(vector unsigned short __a, vector unsigned short __b) { return __builtin_altivec_vminuh(__a, __b); } static __inline__ vector unsigned short __ATTRS_o_ai vec_vminuh(vector bool short __a, vector unsigned short __b) { return __builtin_altivec_vminuh((vector unsigned short)__a, __b); } static __inline__ vector unsigned short __ATTRS_o_ai vec_vminuh(vector unsigned short __a, vector bool short __b) { return __builtin_altivec_vminuh(__a, (vector unsigned short)__b); } /* vec_vminsw */ static __inline__ vector int __ATTRS_o_ai vec_vminsw(vector int __a, vector int __b) { return __builtin_altivec_vminsw(__a, __b); } static __inline__ vector int __ATTRS_o_ai vec_vminsw(vector bool int __a, vector int __b) { return __builtin_altivec_vminsw((vector int)__a, __b); } static __inline__ vector int __ATTRS_o_ai vec_vminsw(vector int __a, vector bool int __b) { return __builtin_altivec_vminsw(__a, (vector int)__b); } /* vec_vminuw */ static __inline__ vector unsigned int __ATTRS_o_ai vec_vminuw(vector unsigned int __a, vector unsigned int __b) { return __builtin_altivec_vminuw(__a, __b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_vminuw(vector bool int __a, vector unsigned int __b) { return __builtin_altivec_vminuw((vector unsigned int)__a, __b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_vminuw(vector unsigned int __a, vector bool int __b) { return __builtin_altivec_vminuw(__a, (vector unsigned int)__b); } /* vec_vminfp */ static __inline__ vector float __attribute__((__always_inline__)) vec_vminfp(vector float __a, vector float __b) { #ifdef __VSX__ return __builtin_vsx_xvminsp(__a, __b); #else return __builtin_altivec_vminfp(__a, __b); #endif } /* vec_mladd */ #define __builtin_altivec_vmladduhm vec_mladd static __inline__ vector short __ATTRS_o_ai vec_mladd(vector short __a, vector short __b, vector short __c) { return __a * __b + __c; } static __inline__ vector short __ATTRS_o_ai vec_mladd( vector short __a, vector unsigned short __b, vector unsigned short __c) { return __a * (vector short)__b + (vector short)__c; } static __inline__ vector short __ATTRS_o_ai vec_mladd(vector unsigned short __a, vector short __b, vector short __c) { return (vector short)__a * __b + __c; } static __inline__ vector unsigned short __ATTRS_o_ai vec_mladd(vector unsigned short __a, vector unsigned short __b, vector unsigned short __c) { return __a * __b + __c; } /* vec_vmladduhm */ static __inline__ vector short __ATTRS_o_ai vec_vmladduhm(vector short __a, vector short __b, vector short __c) { return __a * __b + __c; } static __inline__ vector short __ATTRS_o_ai vec_vmladduhm( vector short __a, vector unsigned short __b, vector unsigned short __c) { return __a * (vector short)__b + (vector short)__c; } static __inline__ vector short __ATTRS_o_ai vec_vmladduhm(vector unsigned short __a, vector short __b, vector short __c) { return (vector short)__a * __b + __c; } static __inline__ vector unsigned short __ATTRS_o_ai vec_vmladduhm(vector unsigned short __a, vector unsigned short __b, vector unsigned short __c) { return __a * __b + __c; } /* vec_mradds */ static __inline__ vector short __attribute__((__always_inline__)) vec_mradds(vector short __a, vector short __b, vector short __c) { return __builtin_altivec_vmhraddshs(__a, __b, __c); } /* vec_vmhraddshs */ static __inline__ vector short __attribute__((__always_inline__)) vec_vmhraddshs(vector short __a, vector short __b, vector short __c) { return __builtin_altivec_vmhraddshs(__a, __b, __c); } /* vec_msum */ static __inline__ vector int __ATTRS_o_ai vec_msum(vector signed char __a, vector unsigned char __b, vector int __c) { return __builtin_altivec_vmsummbm(__a, __b, __c); } static __inline__ vector unsigned int __ATTRS_o_ai vec_msum(vector unsigned char __a, vector unsigned char __b, vector unsigned int __c) { return __builtin_altivec_vmsumubm(__a, __b, __c); } static __inline__ vector int __ATTRS_o_ai vec_msum(vector short __a, vector short __b, vector int __c) { return __builtin_altivec_vmsumshm(__a, __b, __c); } static __inline__ vector unsigned int __ATTRS_o_ai vec_msum(vector unsigned short __a, vector unsigned short __b, vector unsigned int __c) { return __builtin_altivec_vmsumuhm(__a, __b, __c); } /* vec_msumc */ #if defined(__POWER10_VECTOR__) && defined(__SIZEOF_INT128__) static __inline__ vector unsigned __int128 __ATTRS_o_ai vec_msumc(vector unsigned long long __a, vector unsigned long long __b, vector unsigned __int128 __c) { return __builtin_altivec_vmsumcud(__a, __b, __c); } #endif /* vec_vmsummbm */ static __inline__ vector int __attribute__((__always_inline__)) vec_vmsummbm(vector signed char __a, vector unsigned char __b, vector int __c) { return __builtin_altivec_vmsummbm(__a, __b, __c); } /* vec_vmsumubm */ static __inline__ vector unsigned int __attribute__((__always_inline__)) vec_vmsumubm(vector unsigned char __a, vector unsigned char __b, vector unsigned int __c) { return __builtin_altivec_vmsumubm(__a, __b, __c); } /* vec_vmsumshm */ static __inline__ vector int __attribute__((__always_inline__)) vec_vmsumshm(vector short __a, vector short __b, vector int __c) { return __builtin_altivec_vmsumshm(__a, __b, __c); } /* vec_vmsumuhm */ static __inline__ vector unsigned int __attribute__((__always_inline__)) vec_vmsumuhm(vector unsigned short __a, vector unsigned short __b, vector unsigned int __c) { return __builtin_altivec_vmsumuhm(__a, __b, __c); } /* vec_msums */ static __inline__ vector int __ATTRS_o_ai vec_msums(vector short __a, vector short __b, vector int __c) { return __builtin_altivec_vmsumshs(__a, __b, __c); } static __inline__ vector unsigned int __ATTRS_o_ai vec_msums(vector unsigned short __a, vector unsigned short __b, vector unsigned int __c) { return __builtin_altivec_vmsumuhs(__a, __b, __c); } /* vec_vmsumshs */ static __inline__ vector int __attribute__((__always_inline__)) vec_vmsumshs(vector short __a, vector short __b, vector int __c) { return __builtin_altivec_vmsumshs(__a, __b, __c); } /* vec_vmsumuhs */ static __inline__ vector unsigned int __attribute__((__always_inline__)) vec_vmsumuhs(vector unsigned short __a, vector unsigned short __b, vector unsigned int __c) { return __builtin_altivec_vmsumuhs(__a, __b, __c); } /* vec_mtvscr */ static __inline__ void __ATTRS_o_ai vec_mtvscr(vector signed char __a) { __builtin_altivec_mtvscr((vector int)__a); } static __inline__ void __ATTRS_o_ai vec_mtvscr(vector unsigned char __a) { __builtin_altivec_mtvscr((vector int)__a); } static __inline__ void __ATTRS_o_ai vec_mtvscr(vector bool char __a) { __builtin_altivec_mtvscr((vector int)__a); } static __inline__ void __ATTRS_o_ai vec_mtvscr(vector short __a) { __builtin_altivec_mtvscr((vector int)__a); } static __inline__ void __ATTRS_o_ai vec_mtvscr(vector unsigned short __a) { __builtin_altivec_mtvscr((vector int)__a); } static __inline__ void __ATTRS_o_ai vec_mtvscr(vector bool short __a) { __builtin_altivec_mtvscr((vector int)__a); } static __inline__ void __ATTRS_o_ai vec_mtvscr(vector pixel __a) { __builtin_altivec_mtvscr((vector int)__a); } static __inline__ void __ATTRS_o_ai vec_mtvscr(vector int __a) { __builtin_altivec_mtvscr((vector int)__a); } static __inline__ void __ATTRS_o_ai vec_mtvscr(vector unsigned int __a) { __builtin_altivec_mtvscr((vector int)__a); } static __inline__ void __ATTRS_o_ai vec_mtvscr(vector bool int __a) { __builtin_altivec_mtvscr((vector int)__a); } static __inline__ void __ATTRS_o_ai vec_mtvscr(vector float __a) { __builtin_altivec_mtvscr((vector int)__a); } /* vec_mul */ /* Integer vector multiplication will involve multiplication of the odd/even elements separately, then truncating the results and moving to the result vector. */ static __inline__ vector signed char __ATTRS_o_ai vec_mul(vector signed char __a, vector signed char __b) { return __a * __b; } static __inline__ vector unsigned char __ATTRS_o_ai vec_mul(vector unsigned char __a, vector unsigned char __b) { return __a * __b; } static __inline__ vector signed short __ATTRS_o_ai vec_mul(vector signed short __a, vector signed short __b) { return __a * __b; } static __inline__ vector unsigned short __ATTRS_o_ai vec_mul(vector unsigned short __a, vector unsigned short __b) { return __a * __b; } static __inline__ vector signed int __ATTRS_o_ai vec_mul(vector signed int __a, vector signed int __b) { return __a * __b; } static __inline__ vector unsigned int __ATTRS_o_ai vec_mul(vector unsigned int __a, vector unsigned int __b) { return __a * __b; } #ifdef __VSX__ static __inline__ vector signed long long __ATTRS_o_ai vec_mul(vector signed long long __a, vector signed long long __b) { return __a * __b; } static __inline__ vector unsigned long long __ATTRS_o_ai vec_mul(vector unsigned long long __a, vector unsigned long long __b) { return __a * __b; } #endif static __inline__ vector float __ATTRS_o_ai vec_mul(vector float __a, vector float __b) { return __a * __b; } #ifdef __VSX__ static __inline__ vector double __ATTRS_o_ai vec_mul(vector double __a, vector double __b) { return __a * __b; } #endif /* The vmulos* and vmules* instructions have a big endian bias, so we must reverse the meaning of "even" and "odd" for little endian. */ /* vec_mule */ static __inline__ vector short __ATTRS_o_ai vec_mule(vector signed char __a, vector signed char __b) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vmulosb(__a, __b); #else return __builtin_altivec_vmulesb(__a, __b); #endif } static __inline__ vector unsigned short __ATTRS_o_ai vec_mule(vector unsigned char __a, vector unsigned char __b) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vmuloub(__a, __b); #else return __builtin_altivec_vmuleub(__a, __b); #endif } static __inline__ vector int __ATTRS_o_ai vec_mule(vector short __a, vector short __b) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vmulosh(__a, __b); #else return __builtin_altivec_vmulesh(__a, __b); #endif } static __inline__ vector unsigned int __ATTRS_o_ai vec_mule(vector unsigned short __a, vector unsigned short __b) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vmulouh(__a, __b); #else return __builtin_altivec_vmuleuh(__a, __b); #endif } #ifdef __POWER8_VECTOR__ static __inline__ vector signed long long __ATTRS_o_ai vec_mule(vector signed int __a, vector signed int __b) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vmulosw(__a, __b); #else return __builtin_altivec_vmulesw(__a, __b); #endif } static __inline__ vector unsigned long long __ATTRS_o_ai vec_mule(vector unsigned int __a, vector unsigned int __b) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vmulouw(__a, __b); #else return __builtin_altivec_vmuleuw(__a, __b); #endif } #endif #if defined(__POWER10_VECTOR__) && defined(__SIZEOF_INT128__) static __inline__ vector signed __int128 __ATTRS_o_ai vec_mule(vector signed long long __a, vector signed long long __b) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vmulosd(__a, __b); #else return __builtin_altivec_vmulesd(__a, __b); #endif } static __inline__ vector unsigned __int128 __ATTRS_o_ai vec_mule(vector unsigned long long __a, vector unsigned long long __b) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vmuloud(__a, __b); #else return __builtin_altivec_vmuleud(__a, __b); #endif } #endif /* vec_vmulesb */ static __inline__ vector short __attribute__((__always_inline__)) vec_vmulesb(vector signed char __a, vector signed char __b) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vmulosb(__a, __b); #else return __builtin_altivec_vmulesb(__a, __b); #endif } /* vec_vmuleub */ static __inline__ vector unsigned short __attribute__((__always_inline__)) vec_vmuleub(vector unsigned char __a, vector unsigned char __b) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vmuloub(__a, __b); #else return __builtin_altivec_vmuleub(__a, __b); #endif } /* vec_vmulesh */ static __inline__ vector int __attribute__((__always_inline__)) vec_vmulesh(vector short __a, vector short __b) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vmulosh(__a, __b); #else return __builtin_altivec_vmulesh(__a, __b); #endif } /* vec_vmuleuh */ static __inline__ vector unsigned int __attribute__((__always_inline__)) vec_vmuleuh(vector unsigned short __a, vector unsigned short __b) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vmulouh(__a, __b); #else return __builtin_altivec_vmuleuh(__a, __b); #endif } /* vec_mulh */ #ifdef __POWER10_VECTOR__ static __inline__ vector signed int __ATTRS_o_ai vec_mulh(vector signed int __a, vector signed int __b) { return __builtin_altivec_vmulhsw(__a, __b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_mulh(vector unsigned int __a, vector unsigned int __b) { return __builtin_altivec_vmulhuw(__a, __b); } static __inline__ vector signed long long __ATTRS_o_ai vec_mulh(vector signed long long __a, vector signed long long __b) { return __builtin_altivec_vmulhsd(__a, __b); } static __inline__ vector unsigned long long __ATTRS_o_ai vec_mulh(vector unsigned long long __a, vector unsigned long long __b) { return __builtin_altivec_vmulhud(__a, __b); } #endif /* vec_mulo */ static __inline__ vector short __ATTRS_o_ai vec_mulo(vector signed char __a, vector signed char __b) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vmulesb(__a, __b); #else return __builtin_altivec_vmulosb(__a, __b); #endif } static __inline__ vector unsigned short __ATTRS_o_ai vec_mulo(vector unsigned char __a, vector unsigned char __b) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vmuleub(__a, __b); #else return __builtin_altivec_vmuloub(__a, __b); #endif } static __inline__ vector int __ATTRS_o_ai vec_mulo(vector short __a, vector short __b) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vmulesh(__a, __b); #else return __builtin_altivec_vmulosh(__a, __b); #endif } static __inline__ vector unsigned int __ATTRS_o_ai vec_mulo(vector unsigned short __a, vector unsigned short __b) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vmuleuh(__a, __b); #else return __builtin_altivec_vmulouh(__a, __b); #endif } #ifdef __POWER8_VECTOR__ static __inline__ vector signed long long __ATTRS_o_ai vec_mulo(vector signed int __a, vector signed int __b) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vmulesw(__a, __b); #else return __builtin_altivec_vmulosw(__a, __b); #endif } static __inline__ vector unsigned long long __ATTRS_o_ai vec_mulo(vector unsigned int __a, vector unsigned int __b) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vmuleuw(__a, __b); #else return __builtin_altivec_vmulouw(__a, __b); #endif } #endif #if defined(__POWER10_VECTOR__) && defined(__SIZEOF_INT128__) static __inline__ vector signed __int128 __ATTRS_o_ai vec_mulo(vector signed long long __a, vector signed long long __b) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vmulesd(__a, __b); #else return __builtin_altivec_vmulosd(__a, __b); #endif } static __inline__ vector unsigned __int128 __ATTRS_o_ai vec_mulo(vector unsigned long long __a, vector unsigned long long __b) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vmuleud(__a, __b); #else return __builtin_altivec_vmuloud(__a, __b); #endif } #endif /* vec_vmulosb */ static __inline__ vector short __attribute__((__always_inline__)) vec_vmulosb(vector signed char __a, vector signed char __b) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vmulesb(__a, __b); #else return __builtin_altivec_vmulosb(__a, __b); #endif } /* vec_vmuloub */ static __inline__ vector unsigned short __attribute__((__always_inline__)) vec_vmuloub(vector unsigned char __a, vector unsigned char __b) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vmuleub(__a, __b); #else return __builtin_altivec_vmuloub(__a, __b); #endif } /* vec_vmulosh */ static __inline__ vector int __attribute__((__always_inline__)) vec_vmulosh(vector short __a, vector short __b) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vmulesh(__a, __b); #else return __builtin_altivec_vmulosh(__a, __b); #endif } /* vec_vmulouh */ static __inline__ vector unsigned int __attribute__((__always_inline__)) vec_vmulouh(vector unsigned short __a, vector unsigned short __b) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vmuleuh(__a, __b); #else return __builtin_altivec_vmulouh(__a, __b); #endif } /* vec_nand */ #ifdef __POWER8_VECTOR__ static __inline__ vector signed char __ATTRS_o_ai vec_nand(vector signed char __a, vector signed char __b) { return ~(__a & __b); } static __inline__ vector signed char __ATTRS_o_ai vec_nand(vector signed char __a, vector bool char __b) { return ~(__a & (vector signed char)__b); } static __inline__ vector signed char __ATTRS_o_ai vec_nand(vector bool char __a, vector signed char __b) { return (vector signed char)~(__a & (vector bool char)__b); } static __inline__ vector unsigned char __ATTRS_o_ai vec_nand(vector unsigned char __a, vector unsigned char __b) { return ~(__a & __b); } static __inline__ vector unsigned char __ATTRS_o_ai vec_nand(vector unsigned char __a, vector bool char __b) { return ~(__a & (vector unsigned char)__b); } static __inline__ vector unsigned char __ATTRS_o_ai vec_nand(vector bool char __a, vector unsigned char __b) { return (vector unsigned char)~(__a & (vector bool char)__b); } static __inline__ vector bool char __ATTRS_o_ai vec_nand(vector bool char __a, vector bool char __b) { return ~(__a & __b); } static __inline__ vector signed short __ATTRS_o_ai vec_nand(vector signed short __a, vector signed short __b) { return ~(__a & __b); } static __inline__ vector signed short __ATTRS_o_ai vec_nand(vector signed short __a, vector bool short __b) { return ~(__a & (vector signed short)__b); } static __inline__ vector signed short __ATTRS_o_ai vec_nand(vector bool short __a, vector signed short __b) { return (vector signed short)~(__a & (vector bool short)__b); } static __inline__ vector unsigned short __ATTRS_o_ai vec_nand(vector unsigned short __a, vector unsigned short __b) { return ~(__a & __b); } static __inline__ vector unsigned short __ATTRS_o_ai vec_nand(vector unsigned short __a, vector bool short __b) { return ~(__a & (vector unsigned short)__b); } static __inline__ vector bool short __ATTRS_o_ai vec_nand(vector bool short __a, vector bool short __b) { return ~(__a & __b); } static __inline__ vector signed int __ATTRS_o_ai vec_nand(vector signed int __a, vector signed int __b) { return ~(__a & __b); } static __inline__ vector signed int __ATTRS_o_ai vec_nand(vector signed int __a, vector bool int __b) { return ~(__a & (vector signed int)__b); } static __inline__ vector signed int __ATTRS_o_ai vec_nand(vector bool int __a, vector signed int __b) { return (vector signed int)~(__a & (vector bool int)__b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_nand(vector unsigned int __a, vector unsigned int __b) { return ~(__a & __b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_nand(vector unsigned int __a, vector bool int __b) { return ~(__a & (vector unsigned int)__b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_nand(vector bool int __a, vector unsigned int __b) { return (vector unsigned int)~(__a & (vector bool int)__b); } static __inline__ vector bool int __ATTRS_o_ai vec_nand(vector bool int __a, vector bool int __b) { return ~(__a & __b); } static __inline__ vector float __ATTRS_o_ai vec_nand(vector float __a, vector float __b) { return (vector float)(~((vector unsigned int)__a & (vector unsigned int)__b)); } static __inline__ vector signed long long __ATTRS_o_ai vec_nand(vector signed long long __a, vector signed long long __b) { return ~(__a & __b); } static __inline__ vector signed long long __ATTRS_o_ai vec_nand(vector signed long long __a, vector bool long long __b) { return ~(__a & (vector signed long long)__b); } static __inline__ vector signed long long __ATTRS_o_ai vec_nand(vector bool long long __a, vector signed long long __b) { return (vector signed long long)~(__a & (vector bool long long)__b); } static __inline__ vector unsigned long long __ATTRS_o_ai vec_nand(vector unsigned long long __a, vector unsigned long long __b) { return ~(__a & __b); } static __inline__ vector unsigned long long __ATTRS_o_ai vec_nand(vector unsigned long long __a, vector bool long long __b) { return ~(__a & (vector unsigned long long)__b); } static __inline__ vector unsigned long long __ATTRS_o_ai vec_nand(vector bool long long __a, vector unsigned long long __b) { return (vector unsigned long long)~(__a & (vector bool long long)__b); } static __inline__ vector bool long long __ATTRS_o_ai vec_nand(vector bool long long __a, vector bool long long __b) { return ~(__a & __b); } static __inline__ vector double __ATTRS_o_ai vec_nand(vector double __a, vector double __b) { return (vector double)(~((vector unsigned long long)__a & (vector unsigned long long)__b)); } #endif /* vec_nmadd */ #ifdef __VSX__ static __inline__ vector float __ATTRS_o_ai vec_nmadd(vector float __a, vector float __b, vector float __c) { return __builtin_vsx_xvnmaddasp(__a, __b, __c); } static __inline__ vector double __ATTRS_o_ai vec_nmadd(vector double __a, vector double __b, vector double __c) { return __builtin_vsx_xvnmaddadp(__a, __b, __c); } #endif /* vec_nmsub */ static __inline__ vector float __ATTRS_o_ai vec_nmsub(vector float __a, vector float __b, vector float __c) { #ifdef __VSX__ return __builtin_vsx_xvnmsubasp(__a, __b, __c); #else return __builtin_altivec_vnmsubfp(__a, __b, __c); #endif } #ifdef __VSX__ static __inline__ vector double __ATTRS_o_ai vec_nmsub(vector double __a, vector double __b, vector double __c) { return __builtin_vsx_xvnmsubadp(__a, __b, __c); } #endif /* vec_vnmsubfp */ static __inline__ vector float __attribute__((__always_inline__)) vec_vnmsubfp(vector float __a, vector float __b, vector float __c) { return __builtin_altivec_vnmsubfp(__a, __b, __c); } /* vec_nor */ #define __builtin_altivec_vnor vec_nor static __inline__ vector signed char __ATTRS_o_ai vec_nor(vector signed char __a, vector signed char __b) { return ~(__a | __b); } static __inline__ vector unsigned char __ATTRS_o_ai vec_nor(vector unsigned char __a, vector unsigned char __b) { return ~(__a | __b); } static __inline__ vector bool char __ATTRS_o_ai vec_nor(vector bool char __a, vector bool char __b) { return ~(__a | __b); } static __inline__ vector short __ATTRS_o_ai vec_nor(vector short __a, vector short __b) { return ~(__a | __b); } static __inline__ vector unsigned short __ATTRS_o_ai vec_nor(vector unsigned short __a, vector unsigned short __b) { return ~(__a | __b); } static __inline__ vector bool short __ATTRS_o_ai vec_nor(vector bool short __a, vector bool short __b) { return ~(__a | __b); } static __inline__ vector int __ATTRS_o_ai vec_nor(vector int __a, vector int __b) { return ~(__a | __b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_nor(vector unsigned int __a, vector unsigned int __b) { return ~(__a | __b); } static __inline__ vector bool int __ATTRS_o_ai vec_nor(vector bool int __a, vector bool int __b) { return ~(__a | __b); } static __inline__ vector float __ATTRS_o_ai vec_nor(vector float __a, vector float __b) { vector unsigned int __res = ~((vector unsigned int)__a | (vector unsigned int)__b); return (vector float)__res; } #ifdef __VSX__ static __inline__ vector double __ATTRS_o_ai vec_nor(vector double __a, vector double __b) { vector unsigned long long __res = ~((vector unsigned long long)__a | (vector unsigned long long)__b); return (vector double)__res; } #endif /* vec_vnor */ static __inline__ vector signed char __ATTRS_o_ai vec_vnor(vector signed char __a, vector signed char __b) { return ~(__a | __b); } static __inline__ vector unsigned char __ATTRS_o_ai vec_vnor(vector unsigned char __a, vector unsigned char __b) { return ~(__a | __b); } static __inline__ vector bool char __ATTRS_o_ai vec_vnor(vector bool char __a, vector bool char __b) { return ~(__a | __b); } static __inline__ vector short __ATTRS_o_ai vec_vnor(vector short __a, vector short __b) { return ~(__a | __b); } static __inline__ vector unsigned short __ATTRS_o_ai vec_vnor(vector unsigned short __a, vector unsigned short __b) { return ~(__a | __b); } static __inline__ vector bool short __ATTRS_o_ai vec_vnor(vector bool short __a, vector bool short __b) { return ~(__a | __b); } static __inline__ vector int __ATTRS_o_ai vec_vnor(vector int __a, vector int __b) { return ~(__a | __b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_vnor(vector unsigned int __a, vector unsigned int __b) { return ~(__a | __b); } static __inline__ vector bool int __ATTRS_o_ai vec_vnor(vector bool int __a, vector bool int __b) { return ~(__a | __b); } static __inline__ vector float __ATTRS_o_ai vec_vnor(vector float __a, vector float __b) { vector unsigned int __res = ~((vector unsigned int)__a | (vector unsigned int)__b); return (vector float)__res; } #ifdef __VSX__ static __inline__ vector signed long long __ATTRS_o_ai vec_nor(vector signed long long __a, vector signed long long __b) { return ~(__a | __b); } static __inline__ vector unsigned long long __ATTRS_o_ai vec_nor(vector unsigned long long __a, vector unsigned long long __b) { return ~(__a | __b); } static __inline__ vector bool long long __ATTRS_o_ai vec_nor(vector bool long long __a, vector bool long long __b) { return ~(__a | __b); } #endif /* vec_or */ #define __builtin_altivec_vor vec_or static __inline__ vector signed char __ATTRS_o_ai vec_or(vector signed char __a, vector signed char __b) { return __a | __b; } static __inline__ vector signed char __ATTRS_o_ai vec_or(vector bool char __a, vector signed char __b) { return (vector signed char)__a | __b; } static __inline__ vector signed char __ATTRS_o_ai vec_or(vector signed char __a, vector bool char __b) { return __a | (vector signed char)__b; } static __inline__ vector unsigned char __ATTRS_o_ai vec_or(vector unsigned char __a, vector unsigned char __b) { return __a | __b; } static __inline__ vector unsigned char __ATTRS_o_ai vec_or(vector bool char __a, vector unsigned char __b) { return (vector unsigned char)__a | __b; } static __inline__ vector unsigned char __ATTRS_o_ai vec_or(vector unsigned char __a, vector bool char __b) { return __a | (vector unsigned char)__b; } static __inline__ vector bool char __ATTRS_o_ai vec_or(vector bool char __a, vector bool char __b) { return __a | __b; } static __inline__ vector short __ATTRS_o_ai vec_or(vector short __a, vector short __b) { return __a | __b; } static __inline__ vector short __ATTRS_o_ai vec_or(vector bool short __a, vector short __b) { return (vector short)__a | __b; } static __inline__ vector short __ATTRS_o_ai vec_or(vector short __a, vector bool short __b) { return __a | (vector short)__b; } static __inline__ vector unsigned short __ATTRS_o_ai vec_or(vector unsigned short __a, vector unsigned short __b) { return __a | __b; } static __inline__ vector unsigned short __ATTRS_o_ai vec_or(vector bool short __a, vector unsigned short __b) { return (vector unsigned short)__a | __b; } static __inline__ vector unsigned short __ATTRS_o_ai vec_or(vector unsigned short __a, vector bool short __b) { return __a | (vector unsigned short)__b; } static __inline__ vector bool short __ATTRS_o_ai vec_or(vector bool short __a, vector bool short __b) { return __a | __b; } static __inline__ vector int __ATTRS_o_ai vec_or(vector int __a, vector int __b) { return __a | __b; } static __inline__ vector int __ATTRS_o_ai vec_or(vector bool int __a, vector int __b) { return (vector int)__a | __b; } static __inline__ vector int __ATTRS_o_ai vec_or(vector int __a, vector bool int __b) { return __a | (vector int)__b; } static __inline__ vector unsigned int __ATTRS_o_ai vec_or(vector unsigned int __a, vector unsigned int __b) { return __a | __b; } static __inline__ vector unsigned int __ATTRS_o_ai vec_or(vector bool int __a, vector unsigned int __b) { return (vector unsigned int)__a | __b; } static __inline__ vector unsigned int __ATTRS_o_ai vec_or(vector unsigned int __a, vector bool int __b) { return __a | (vector unsigned int)__b; } static __inline__ vector bool int __ATTRS_o_ai vec_or(vector bool int __a, vector bool int __b) { return __a | __b; } static __inline__ vector float __ATTRS_o_ai vec_or(vector float __a, vector float __b) { vector unsigned int __res = (vector unsigned int)__a | (vector unsigned int)__b; return (vector float)__res; } static __inline__ vector float __ATTRS_o_ai vec_or(vector bool int __a, vector float __b) { vector unsigned int __res = (vector unsigned int)__a | (vector unsigned int)__b; return (vector float)__res; } static __inline__ vector float __ATTRS_o_ai vec_or(vector float __a, vector bool int __b) { vector unsigned int __res = (vector unsigned int)__a | (vector unsigned int)__b; return (vector float)__res; } #ifdef __VSX__ static __inline__ vector double __ATTRS_o_ai vec_or(vector bool long long __a, vector double __b) { return (vector double)((vector unsigned long long)__a | (vector unsigned long long)__b); } static __inline__ vector double __ATTRS_o_ai vec_or(vector double __a, vector bool long long __b) { return (vector double)((vector unsigned long long)__a | (vector unsigned long long)__b); } static __inline__ vector double __ATTRS_o_ai vec_or(vector double __a, vector double __b) { return (vector double)((vector unsigned long long)__a | (vector unsigned long long)__b); } static __inline__ vector signed long long __ATTRS_o_ai vec_or(vector signed long long __a, vector signed long long __b) { return __a | __b; } static __inline__ vector signed long long __ATTRS_o_ai vec_or(vector bool long long __a, vector signed long long __b) { return (vector signed long long)__a | __b; } static __inline__ vector signed long long __ATTRS_o_ai vec_or(vector signed long long __a, vector bool long long __b) { return __a | (vector signed long long)__b; } static __inline__ vector unsigned long long __ATTRS_o_ai vec_or(vector unsigned long long __a, vector unsigned long long __b) { return __a | __b; } static __inline__ vector unsigned long long __ATTRS_o_ai vec_or(vector bool long long __a, vector unsigned long long __b) { return (vector unsigned long long)__a | __b; } static __inline__ vector unsigned long long __ATTRS_o_ai vec_or(vector unsigned long long __a, vector bool long long __b) { return __a | (vector unsigned long long)__b; } static __inline__ vector bool long long __ATTRS_o_ai vec_or(vector bool long long __a, vector bool long long __b) { return __a | __b; } #endif #ifdef __POWER8_VECTOR__ static __inline__ vector signed char __ATTRS_o_ai vec_orc(vector signed char __a, vector signed char __b) { return __a | ~__b; } static __inline__ vector signed char __ATTRS_o_ai vec_orc(vector signed char __a, vector bool char __b) { return __a | (vector signed char)~__b; } static __inline__ vector signed char __ATTRS_o_ai vec_orc(vector bool char __a, vector signed char __b) { return (vector signed char)(__a | (vector bool char)~__b); } static __inline__ vector unsigned char __ATTRS_o_ai vec_orc(vector unsigned char __a, vector unsigned char __b) { return __a | ~__b; } static __inline__ vector unsigned char __ATTRS_o_ai vec_orc(vector unsigned char __a, vector bool char __b) { return __a | (vector unsigned char)~__b; } static __inline__ vector unsigned char __ATTRS_o_ai vec_orc(vector bool char __a, vector unsigned char __b) { return (vector unsigned char)(__a | (vector bool char)~__b); } static __inline__ vector bool char __ATTRS_o_ai vec_orc(vector bool char __a, vector bool char __b) { return __a | ~__b; } static __inline__ vector signed short __ATTRS_o_ai vec_orc(vector signed short __a, vector signed short __b) { return __a | ~__b; } static __inline__ vector signed short __ATTRS_o_ai vec_orc(vector signed short __a, vector bool short __b) { return __a | (vector signed short)~__b; } static __inline__ vector signed short __ATTRS_o_ai vec_orc(vector bool short __a, vector signed short __b) { return (vector signed short)(__a | (vector bool short)~__b); } static __inline__ vector unsigned short __ATTRS_o_ai vec_orc(vector unsigned short __a, vector unsigned short __b) { return __a | ~__b; } static __inline__ vector unsigned short __ATTRS_o_ai vec_orc(vector unsigned short __a, vector bool short __b) { return __a | (vector unsigned short)~__b; } static __inline__ vector unsigned short __ATTRS_o_ai vec_orc(vector bool short __a, vector unsigned short __b) { return (vector unsigned short)(__a | (vector bool short)~__b); } static __inline__ vector bool short __ATTRS_o_ai vec_orc(vector bool short __a, vector bool short __b) { return __a | ~__b; } static __inline__ vector signed int __ATTRS_o_ai vec_orc(vector signed int __a, vector signed int __b) { return __a | ~__b; } static __inline__ vector signed int __ATTRS_o_ai vec_orc(vector signed int __a, vector bool int __b) { return __a | (vector signed int)~__b; } static __inline__ vector signed int __ATTRS_o_ai vec_orc(vector bool int __a, vector signed int __b) { return (vector signed int)(__a | (vector bool int)~__b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_orc(vector unsigned int __a, vector unsigned int __b) { return __a | ~__b; } static __inline__ vector unsigned int __ATTRS_o_ai vec_orc(vector unsigned int __a, vector bool int __b) { return __a | (vector unsigned int)~__b; } static __inline__ vector unsigned int __ATTRS_o_ai vec_orc(vector bool int __a, vector unsigned int __b) { return (vector unsigned int)(__a | (vector bool int)~__b); } static __inline__ vector bool int __ATTRS_o_ai vec_orc(vector bool int __a, vector bool int __b) { return __a | ~__b; } static __inline__ vector float __ATTRS_o_ai vec_orc(vector bool int __a, vector float __b) { return (vector float)(__a | ~(vector bool int)__b); } static __inline__ vector float __ATTRS_o_ai vec_orc(vector float __a, vector bool int __b) { return (vector float)((vector bool int)__a | ~__b); } static __inline__ vector float __ATTRS_o_ai vec_orc(vector float __a, vector float __b) { return (vector float)((vector unsigned int)__a | ~(vector unsigned int)__b); } static __inline__ vector signed long long __ATTRS_o_ai vec_orc(vector signed long long __a, vector signed long long __b) { return __a | ~__b; } static __inline__ vector signed long long __ATTRS_o_ai vec_orc(vector signed long long __a, vector bool long long __b) { return __a | (vector signed long long)~__b; } static __inline__ vector signed long long __ATTRS_o_ai vec_orc(vector bool long long __a, vector signed long long __b) { return (vector signed long long)(__a | (vector bool long long)~__b); } static __inline__ vector unsigned long long __ATTRS_o_ai vec_orc(vector unsigned long long __a, vector unsigned long long __b) { return __a | ~__b; } static __inline__ vector unsigned long long __ATTRS_o_ai vec_orc(vector unsigned long long __a, vector bool long long __b) { return __a | (vector unsigned long long)~__b; } static __inline__ vector unsigned long long __ATTRS_o_ai vec_orc(vector bool long long __a, vector unsigned long long __b) { return (vector unsigned long long)(__a | (vector bool long long)~__b); } static __inline__ vector bool long long __ATTRS_o_ai vec_orc(vector bool long long __a, vector bool long long __b) { return __a | ~__b; } static __inline__ vector double __ATTRS_o_ai vec_orc(vector double __a, vector bool long long __b) { return (vector double)((vector bool long long)__a | ~__b); } static __inline__ vector double __ATTRS_o_ai vec_orc(vector bool long long __a, vector double __b) { return (vector double)(__a | ~(vector bool long long)__b); } static __inline__ vector double __ATTRS_o_ai vec_orc(vector double __a, vector double __b) { return (vector double)((vector unsigned long long)__a | ~(vector unsigned long long)__b); } #endif /* vec_vor */ static __inline__ vector signed char __ATTRS_o_ai vec_vor(vector signed char __a, vector signed char __b) { return __a | __b; } static __inline__ vector signed char __ATTRS_o_ai vec_vor(vector bool char __a, vector signed char __b) { return (vector signed char)__a | __b; } static __inline__ vector signed char __ATTRS_o_ai vec_vor(vector signed char __a, vector bool char __b) { return __a | (vector signed char)__b; } static __inline__ vector unsigned char __ATTRS_o_ai vec_vor(vector unsigned char __a, vector unsigned char __b) { return __a | __b; } static __inline__ vector unsigned char __ATTRS_o_ai vec_vor(vector bool char __a, vector unsigned char __b) { return (vector unsigned char)__a | __b; } static __inline__ vector unsigned char __ATTRS_o_ai vec_vor(vector unsigned char __a, vector bool char __b) { return __a | (vector unsigned char)__b; } static __inline__ vector bool char __ATTRS_o_ai vec_vor(vector bool char __a, vector bool char __b) { return __a | __b; } static __inline__ vector short __ATTRS_o_ai vec_vor(vector short __a, vector short __b) { return __a | __b; } static __inline__ vector short __ATTRS_o_ai vec_vor(vector bool short __a, vector short __b) { return (vector short)__a | __b; } static __inline__ vector short __ATTRS_o_ai vec_vor(vector short __a, vector bool short __b) { return __a | (vector short)__b; } static __inline__ vector unsigned short __ATTRS_o_ai vec_vor(vector unsigned short __a, vector unsigned short __b) { return __a | __b; } static __inline__ vector unsigned short __ATTRS_o_ai vec_vor(vector bool short __a, vector unsigned short __b) { return (vector unsigned short)__a | __b; } static __inline__ vector unsigned short __ATTRS_o_ai vec_vor(vector unsigned short __a, vector bool short __b) { return __a | (vector unsigned short)__b; } static __inline__ vector bool short __ATTRS_o_ai vec_vor(vector bool short __a, vector bool short __b) { return __a | __b; } static __inline__ vector int __ATTRS_o_ai vec_vor(vector int __a, vector int __b) { return __a | __b; } static __inline__ vector int __ATTRS_o_ai vec_vor(vector bool int __a, vector int __b) { return (vector int)__a | __b; } static __inline__ vector int __ATTRS_o_ai vec_vor(vector int __a, vector bool int __b) { return __a | (vector int)__b; } static __inline__ vector unsigned int __ATTRS_o_ai vec_vor(vector unsigned int __a, vector unsigned int __b) { return __a | __b; } static __inline__ vector unsigned int __ATTRS_o_ai vec_vor(vector bool int __a, vector unsigned int __b) { return (vector unsigned int)__a | __b; } static __inline__ vector unsigned int __ATTRS_o_ai vec_vor(vector unsigned int __a, vector bool int __b) { return __a | (vector unsigned int)__b; } static __inline__ vector bool int __ATTRS_o_ai vec_vor(vector bool int __a, vector bool int __b) { return __a | __b; } static __inline__ vector float __ATTRS_o_ai vec_vor(vector float __a, vector float __b) { vector unsigned int __res = (vector unsigned int)__a | (vector unsigned int)__b; return (vector float)__res; } static __inline__ vector float __ATTRS_o_ai vec_vor(vector bool int __a, vector float __b) { vector unsigned int __res = (vector unsigned int)__a | (vector unsigned int)__b; return (vector float)__res; } static __inline__ vector float __ATTRS_o_ai vec_vor(vector float __a, vector bool int __b) { vector unsigned int __res = (vector unsigned int)__a | (vector unsigned int)__b; return (vector float)__res; } #ifdef __VSX__ static __inline__ vector signed long long __ATTRS_o_ai vec_vor(vector signed long long __a, vector signed long long __b) { return __a | __b; } static __inline__ vector signed long long __ATTRS_o_ai vec_vor(vector bool long long __a, vector signed long long __b) { return (vector signed long long)__a | __b; } static __inline__ vector signed long long __ATTRS_o_ai vec_vor(vector signed long long __a, vector bool long long __b) { return __a | (vector signed long long)__b; } static __inline__ vector unsigned long long __ATTRS_o_ai vec_vor(vector unsigned long long __a, vector unsigned long long __b) { return __a | __b; } static __inline__ vector unsigned long long __ATTRS_o_ai vec_vor(vector bool long long __a, vector unsigned long long __b) { return (vector unsigned long long)__a | __b; } static __inline__ vector unsigned long long __ATTRS_o_ai vec_vor(vector unsigned long long __a, vector bool long long __b) { return __a | (vector unsigned long long)__b; } static __inline__ vector bool long long __ATTRS_o_ai vec_vor(vector bool long long __a, vector bool long long __b) { return __a | __b; } #endif /* vec_pack */ /* The various vector pack instructions have a big-endian bias, so for little endian we must handle reversed element numbering. */ static __inline__ vector signed char __ATTRS_o_ai vec_pack(vector signed short __a, vector signed short __b) { #ifdef __LITTLE_ENDIAN__ return (vector signed char)vec_perm( __a, __b, (vector unsigned char)(0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1A, 0x1C, 0x1E)); #else return (vector signed char)vec_perm( __a, __b, (vector unsigned char)(0x01, 0x03, 0x05, 0x07, 0x09, 0x0B, 0x0D, 0x0F, 0x11, 0x13, 0x15, 0x17, 0x19, 0x1B, 0x1D, 0x1F)); #endif } static __inline__ vector unsigned char __ATTRS_o_ai vec_pack(vector unsigned short __a, vector unsigned short __b) { #ifdef __LITTLE_ENDIAN__ return (vector unsigned char)vec_perm( __a, __b, (vector unsigned char)(0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1A, 0x1C, 0x1E)); #else return (vector unsigned char)vec_perm( __a, __b, (vector unsigned char)(0x01, 0x03, 0x05, 0x07, 0x09, 0x0B, 0x0D, 0x0F, 0x11, 0x13, 0x15, 0x17, 0x19, 0x1B, 0x1D, 0x1F)); #endif } static __inline__ vector bool char __ATTRS_o_ai vec_pack(vector bool short __a, vector bool short __b) { #ifdef __LITTLE_ENDIAN__ return (vector bool char)vec_perm( __a, __b, (vector unsigned char)(0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1A, 0x1C, 0x1E)); #else return (vector bool char)vec_perm( __a, __b, (vector unsigned char)(0x01, 0x03, 0x05, 0x07, 0x09, 0x0B, 0x0D, 0x0F, 0x11, 0x13, 0x15, 0x17, 0x19, 0x1B, 0x1D, 0x1F)); #endif } static __inline__ vector short __ATTRS_o_ai vec_pack(vector int __a, vector int __b) { #ifdef __LITTLE_ENDIAN__ return (vector short)vec_perm( __a, __b, (vector unsigned char)(0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0C, 0x0D, 0x10, 0x11, 0x14, 0x15, 0x18, 0x19, 0x1C, 0x1D)); #else return (vector short)vec_perm( __a, __b, (vector unsigned char)(0x02, 0x03, 0x06, 0x07, 0x0A, 0x0B, 0x0E, 0x0F, 0x12, 0x13, 0x16, 0x17, 0x1A, 0x1B, 0x1E, 0x1F)); #endif } static __inline__ vector unsigned short __ATTRS_o_ai vec_pack(vector unsigned int __a, vector unsigned int __b) { #ifdef __LITTLE_ENDIAN__ return (vector unsigned short)vec_perm( __a, __b, (vector unsigned char)(0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0C, 0x0D, 0x10, 0x11, 0x14, 0x15, 0x18, 0x19, 0x1C, 0x1D)); #else return (vector unsigned short)vec_perm( __a, __b, (vector unsigned char)(0x02, 0x03, 0x06, 0x07, 0x0A, 0x0B, 0x0E, 0x0F, 0x12, 0x13, 0x16, 0x17, 0x1A, 0x1B, 0x1E, 0x1F)); #endif } static __inline__ vector bool short __ATTRS_o_ai vec_pack(vector bool int __a, vector bool int __b) { #ifdef __LITTLE_ENDIAN__ return (vector bool short)vec_perm( __a, __b, (vector unsigned char)(0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0C, 0x0D, 0x10, 0x11, 0x14, 0x15, 0x18, 0x19, 0x1C, 0x1D)); #else return (vector bool short)vec_perm( __a, __b, (vector unsigned char)(0x02, 0x03, 0x06, 0x07, 0x0A, 0x0B, 0x0E, 0x0F, 0x12, 0x13, 0x16, 0x17, 0x1A, 0x1B, 0x1E, 0x1F)); #endif } #ifdef __VSX__ static __inline__ vector signed int __ATTRS_o_ai vec_pack(vector signed long long __a, vector signed long long __b) { #ifdef __LITTLE_ENDIAN__ return (vector signed int)vec_perm( __a, __b, (vector unsigned char)(0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0A, 0x0B, 0x10, 0x11, 0x12, 0x13, 0x18, 0x19, 0x1A, 0x1B)); #else return (vector signed int)vec_perm( __a, __b, (vector unsigned char)(0x04, 0x05, 0x06, 0x07, 0x0C, 0x0D, 0x0E, 0x0F, 0x14, 0x15, 0x16, 0x17, 0x1C, 0x1D, 0x1E, 0x1F)); #endif } static __inline__ vector unsigned int __ATTRS_o_ai vec_pack(vector unsigned long long __a, vector unsigned long long __b) { #ifdef __LITTLE_ENDIAN__ return (vector unsigned int)vec_perm( __a, __b, (vector unsigned char)(0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0A, 0x0B, 0x10, 0x11, 0x12, 0x13, 0x18, 0x19, 0x1A, 0x1B)); #else return (vector unsigned int)vec_perm( __a, __b, (vector unsigned char)(0x04, 0x05, 0x06, 0x07, 0x0C, 0x0D, 0x0E, 0x0F, 0x14, 0x15, 0x16, 0x17, 0x1C, 0x1D, 0x1E, 0x1F)); #endif } static __inline__ vector bool int __ATTRS_o_ai vec_pack(vector bool long long __a, vector bool long long __b) { #ifdef __LITTLE_ENDIAN__ return (vector bool int)vec_perm( __a, __b, (vector unsigned char)(0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0A, 0x0B, 0x10, 0x11, 0x12, 0x13, 0x18, 0x19, 0x1A, 0x1B)); #else return (vector bool int)vec_perm( __a, __b, (vector unsigned char)(0x04, 0x05, 0x06, 0x07, 0x0C, 0x0D, 0x0E, 0x0F, 0x14, 0x15, 0x16, 0x17, 0x1C, 0x1D, 0x1E, 0x1F)); #endif } static __inline__ vector float __ATTRS_o_ai vec_pack(vector double __a, vector double __b) { return (vector float) (__a[0], __a[1], __b[0], __b[1]); } #endif #ifdef __POWER9_VECTOR__ static __inline__ vector unsigned short __ATTRS_o_ai vec_pack_to_short_fp32(vector float __a, vector float __b) { vector float __resa = __builtin_vsx_xvcvsphp(__a); vector float __resb = __builtin_vsx_xvcvsphp(__b); #ifdef __LITTLE_ENDIAN__ return (vector unsigned short)vec_mergee(__resa, __resb); #else return (vector unsigned short)vec_mergeo(__resa, __resb); #endif } #endif /* vec_vpkuhum */ #define __builtin_altivec_vpkuhum vec_vpkuhum static __inline__ vector signed char __ATTRS_o_ai vec_vpkuhum(vector signed short __a, vector signed short __b) { #ifdef __LITTLE_ENDIAN__ return (vector signed char)vec_perm( __a, __b, (vector unsigned char)(0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1A, 0x1C, 0x1E)); #else return (vector signed char)vec_perm( __a, __b, (vector unsigned char)(0x01, 0x03, 0x05, 0x07, 0x09, 0x0B, 0x0D, 0x0F, 0x11, 0x13, 0x15, 0x17, 0x19, 0x1B, 0x1D, 0x1F)); #endif } static __inline__ vector unsigned char __ATTRS_o_ai vec_vpkuhum(vector unsigned short __a, vector unsigned short __b) { #ifdef __LITTLE_ENDIAN__ return (vector unsigned char)vec_perm( __a, __b, (vector unsigned char)(0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1A, 0x1C, 0x1E)); #else return (vector unsigned char)vec_perm( __a, __b, (vector unsigned char)(0x01, 0x03, 0x05, 0x07, 0x09, 0x0B, 0x0D, 0x0F, 0x11, 0x13, 0x15, 0x17, 0x19, 0x1B, 0x1D, 0x1F)); #endif } static __inline__ vector bool char __ATTRS_o_ai vec_vpkuhum(vector bool short __a, vector bool short __b) { #ifdef __LITTLE_ENDIAN__ return (vector bool char)vec_perm( __a, __b, (vector unsigned char)(0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1A, 0x1C, 0x1E)); #else return (vector bool char)vec_perm( __a, __b, (vector unsigned char)(0x01, 0x03, 0x05, 0x07, 0x09, 0x0B, 0x0D, 0x0F, 0x11, 0x13, 0x15, 0x17, 0x19, 0x1B, 0x1D, 0x1F)); #endif } /* vec_vpkuwum */ #define __builtin_altivec_vpkuwum vec_vpkuwum static __inline__ vector short __ATTRS_o_ai vec_vpkuwum(vector int __a, vector int __b) { #ifdef __LITTLE_ENDIAN__ return (vector short)vec_perm( __a, __b, (vector unsigned char)(0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0C, 0x0D, 0x10, 0x11, 0x14, 0x15, 0x18, 0x19, 0x1C, 0x1D)); #else return (vector short)vec_perm( __a, __b, (vector unsigned char)(0x02, 0x03, 0x06, 0x07, 0x0A, 0x0B, 0x0E, 0x0F, 0x12, 0x13, 0x16, 0x17, 0x1A, 0x1B, 0x1E, 0x1F)); #endif } static __inline__ vector unsigned short __ATTRS_o_ai vec_vpkuwum(vector unsigned int __a, vector unsigned int __b) { #ifdef __LITTLE_ENDIAN__ return (vector unsigned short)vec_perm( __a, __b, (vector unsigned char)(0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0C, 0x0D, 0x10, 0x11, 0x14, 0x15, 0x18, 0x19, 0x1C, 0x1D)); #else return (vector unsigned short)vec_perm( __a, __b, (vector unsigned char)(0x02, 0x03, 0x06, 0x07, 0x0A, 0x0B, 0x0E, 0x0F, 0x12, 0x13, 0x16, 0x17, 0x1A, 0x1B, 0x1E, 0x1F)); #endif } static __inline__ vector bool short __ATTRS_o_ai vec_vpkuwum(vector bool int __a, vector bool int __b) { #ifdef __LITTLE_ENDIAN__ return (vector bool short)vec_perm( __a, __b, (vector unsigned char)(0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0C, 0x0D, 0x10, 0x11, 0x14, 0x15, 0x18, 0x19, 0x1C, 0x1D)); #else return (vector bool short)vec_perm( __a, __b, (vector unsigned char)(0x02, 0x03, 0x06, 0x07, 0x0A, 0x0B, 0x0E, 0x0F, 0x12, 0x13, 0x16, 0x17, 0x1A, 0x1B, 0x1E, 0x1F)); #endif } /* vec_vpkudum */ #ifdef __POWER8_VECTOR__ #define __builtin_altivec_vpkudum vec_vpkudum static __inline__ vector int __ATTRS_o_ai vec_vpkudum(vector long long __a, vector long long __b) { #ifdef __LITTLE_ENDIAN__ return (vector int)vec_perm( __a, __b, (vector unsigned char)(0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0A, 0x0B, 0x10, 0x11, 0x12, 0x13, 0x18, 0x19, 0x1A, 0x1B)); #else return (vector int)vec_perm( __a, __b, (vector unsigned char)(0x04, 0x05, 0x06, 0x07, 0x0C, 0x0D, 0x0E, 0x0F, 0x14, 0x15, 0x16, 0x17, 0x1C, 0x1D, 0x1E, 0x1F)); #endif } static __inline__ vector unsigned int __ATTRS_o_ai vec_vpkudum(vector unsigned long long __a, vector unsigned long long __b) { #ifdef __LITTLE_ENDIAN__ return (vector unsigned int)vec_perm( __a, __b, (vector unsigned char)(0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0A, 0x0B, 0x10, 0x11, 0x12, 0x13, 0x18, 0x19, 0x1A, 0x1B)); #else return (vector unsigned int)vec_perm( __a, __b, (vector unsigned char)(0x04, 0x05, 0x06, 0x07, 0x0C, 0x0D, 0x0E, 0x0F, 0x14, 0x15, 0x16, 0x17, 0x1C, 0x1D, 0x1E, 0x1F)); #endif } static __inline__ vector bool int __ATTRS_o_ai vec_vpkudum(vector bool long long __a, vector bool long long __b) { #ifdef __LITTLE_ENDIAN__ return (vector bool int)vec_perm( (vector long long)__a, (vector long long)__b, (vector unsigned char)(0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0A, 0x0B, 0x10, 0x11, 0x12, 0x13, 0x18, 0x19, 0x1A, 0x1B)); #else return (vector bool int)vec_perm( (vector long long)__a, (vector long long)__b, (vector unsigned char)(0x04, 0x05, 0x06, 0x07, 0x0C, 0x0D, 0x0E, 0x0F, 0x14, 0x15, 0x16, 0x17, 0x1C, 0x1D, 0x1E, 0x1F)); #endif } #endif /* vec_packpx */ static __inline__ vector pixel __attribute__((__always_inline__)) vec_packpx(vector unsigned int __a, vector unsigned int __b) { #ifdef __LITTLE_ENDIAN__ return (vector pixel)__builtin_altivec_vpkpx(__b, __a); #else return (vector pixel)__builtin_altivec_vpkpx(__a, __b); #endif } /* vec_vpkpx */ static __inline__ vector pixel __attribute__((__always_inline__)) vec_vpkpx(vector unsigned int __a, vector unsigned int __b) { #ifdef __LITTLE_ENDIAN__ return (vector pixel)__builtin_altivec_vpkpx(__b, __a); #else return (vector pixel)__builtin_altivec_vpkpx(__a, __b); #endif } /* vec_packs */ static __inline__ vector signed char __ATTRS_o_ai vec_packs(vector short __a, vector short __b) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vpkshss(__b, __a); #else return __builtin_altivec_vpkshss(__a, __b); #endif } static __inline__ vector unsigned char __ATTRS_o_ai vec_packs(vector unsigned short __a, vector unsigned short __b) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vpkuhus(__b, __a); #else return __builtin_altivec_vpkuhus(__a, __b); #endif } static __inline__ vector signed short __ATTRS_o_ai vec_packs(vector int __a, vector int __b) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vpkswss(__b, __a); #else return __builtin_altivec_vpkswss(__a, __b); #endif } static __inline__ vector unsigned short __ATTRS_o_ai vec_packs(vector unsigned int __a, vector unsigned int __b) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vpkuwus(__b, __a); #else return __builtin_altivec_vpkuwus(__a, __b); #endif } #ifdef __POWER8_VECTOR__ static __inline__ vector int __ATTRS_o_ai vec_packs(vector long long __a, vector long long __b) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vpksdss(__b, __a); #else return __builtin_altivec_vpksdss(__a, __b); #endif } static __inline__ vector unsigned int __ATTRS_o_ai vec_packs(vector unsigned long long __a, vector unsigned long long __b) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vpkudus(__b, __a); #else return __builtin_altivec_vpkudus(__a, __b); #endif } #endif /* vec_vpkshss */ static __inline__ vector signed char __attribute__((__always_inline__)) vec_vpkshss(vector short __a, vector short __b) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vpkshss(__b, __a); #else return __builtin_altivec_vpkshss(__a, __b); #endif } /* vec_vpksdss */ #ifdef __POWER8_VECTOR__ static __inline__ vector int __ATTRS_o_ai vec_vpksdss(vector long long __a, vector long long __b) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vpksdss(__b, __a); #else return __builtin_altivec_vpksdss(__a, __b); #endif } #endif /* vec_vpkuhus */ static __inline__ vector unsigned char __attribute__((__always_inline__)) vec_vpkuhus(vector unsigned short __a, vector unsigned short __b) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vpkuhus(__b, __a); #else return __builtin_altivec_vpkuhus(__a, __b); #endif } /* vec_vpkudus */ #ifdef __POWER8_VECTOR__ static __inline__ vector unsigned int __attribute__((__always_inline__)) vec_vpkudus(vector unsigned long long __a, vector unsigned long long __b) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vpkudus(__b, __a); #else return __builtin_altivec_vpkudus(__a, __b); #endif } #endif /* vec_vpkswss */ static __inline__ vector signed short __attribute__((__always_inline__)) vec_vpkswss(vector int __a, vector int __b) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vpkswss(__b, __a); #else return __builtin_altivec_vpkswss(__a, __b); #endif } /* vec_vpkuwus */ static __inline__ vector unsigned short __attribute__((__always_inline__)) vec_vpkuwus(vector unsigned int __a, vector unsigned int __b) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vpkuwus(__b, __a); #else return __builtin_altivec_vpkuwus(__a, __b); #endif } /* vec_packsu */ static __inline__ vector unsigned char __ATTRS_o_ai vec_packsu(vector short __a, vector short __b) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vpkshus(__b, __a); #else return __builtin_altivec_vpkshus(__a, __b); #endif } static __inline__ vector unsigned char __ATTRS_o_ai vec_packsu(vector unsigned short __a, vector unsigned short __b) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vpkuhus(__b, __a); #else return __builtin_altivec_vpkuhus(__a, __b); #endif } static __inline__ vector unsigned short __ATTRS_o_ai vec_packsu(vector int __a, vector int __b) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vpkswus(__b, __a); #else return __builtin_altivec_vpkswus(__a, __b); #endif } static __inline__ vector unsigned short __ATTRS_o_ai vec_packsu(vector unsigned int __a, vector unsigned int __b) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vpkuwus(__b, __a); #else return __builtin_altivec_vpkuwus(__a, __b); #endif } #ifdef __POWER8_VECTOR__ static __inline__ vector unsigned int __ATTRS_o_ai vec_packsu(vector long long __a, vector long long __b) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vpksdus(__b, __a); #else return __builtin_altivec_vpksdus(__a, __b); #endif } static __inline__ vector unsigned int __ATTRS_o_ai vec_packsu(vector unsigned long long __a, vector unsigned long long __b) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vpkudus(__b, __a); #else return __builtin_altivec_vpkudus(__a, __b); #endif } #endif /* vec_vpkshus */ static __inline__ vector unsigned char __ATTRS_o_ai vec_vpkshus(vector short __a, vector short __b) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vpkshus(__b, __a); #else return __builtin_altivec_vpkshus(__a, __b); #endif } static __inline__ vector unsigned char __ATTRS_o_ai vec_vpkshus(vector unsigned short __a, vector unsigned short __b) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vpkuhus(__b, __a); #else return __builtin_altivec_vpkuhus(__a, __b); #endif } /* vec_vpkswus */ static __inline__ vector unsigned short __ATTRS_o_ai vec_vpkswus(vector int __a, vector int __b) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vpkswus(__b, __a); #else return __builtin_altivec_vpkswus(__a, __b); #endif } static __inline__ vector unsigned short __ATTRS_o_ai vec_vpkswus(vector unsigned int __a, vector unsigned int __b) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vpkuwus(__b, __a); #else return __builtin_altivec_vpkuwus(__a, __b); #endif } /* vec_vpksdus */ #ifdef __POWER8_VECTOR__ static __inline__ vector unsigned int __ATTRS_o_ai vec_vpksdus(vector long long __a, vector long long __b) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vpksdus(__b, __a); #else return __builtin_altivec_vpksdus(__a, __b); #endif } #endif /* vec_perm */ // The vperm instruction is defined architecturally with a big-endian bias. // For little endian, we swap the input operands and invert the permute // control vector. Only the rightmost 5 bits matter, so we could use // a vector of all 31s instead of all 255s to perform the inversion. // However, when the PCV is not a constant, using 255 has an advantage // in that the vec_xor can be recognized as a vec_nor (and for P8 and // later, possibly a vec_nand). static __inline__ vector signed char __ATTRS_o_ai vec_perm( vector signed char __a, vector signed char __b, vector unsigned char __c) { #ifdef __LITTLE_ENDIAN__ vector unsigned char __d = {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}; __d = vec_xor(__c, __d); return (vector signed char)__builtin_altivec_vperm_4si((vector int)__b, (vector int)__a, __d); #else return (vector signed char)__builtin_altivec_vperm_4si((vector int)__a, (vector int)__b, __c); #endif } static __inline__ vector unsigned char __ATTRS_o_ai vec_perm(vector unsigned char __a, vector unsigned char __b, vector unsigned char __c) { #ifdef __LITTLE_ENDIAN__ vector unsigned char __d = {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}; __d = vec_xor(__c, __d); return (vector unsigned char)__builtin_altivec_vperm_4si( (vector int)__b, (vector int)__a, __d); #else return (vector unsigned char)__builtin_altivec_vperm_4si( (vector int)__a, (vector int)__b, __c); #endif } static __inline__ vector bool char __ATTRS_o_ai vec_perm(vector bool char __a, vector bool char __b, vector unsigned char __c) { #ifdef __LITTLE_ENDIAN__ vector unsigned char __d = {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}; __d = vec_xor(__c, __d); return (vector bool char)__builtin_altivec_vperm_4si((vector int)__b, (vector int)__a, __d); #else return (vector bool char)__builtin_altivec_vperm_4si((vector int)__a, (vector int)__b, __c); #endif } static __inline__ vector short __ATTRS_o_ai vec_perm(vector signed short __a, vector signed short __b, vector unsigned char __c) { #ifdef __LITTLE_ENDIAN__ vector unsigned char __d = {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}; __d = vec_xor(__c, __d); return (vector signed short)__builtin_altivec_vperm_4si((vector int)__b, (vector int)__a, __d); #else return (vector signed short)__builtin_altivec_vperm_4si((vector int)__a, (vector int)__b, __c); #endif } static __inline__ vector unsigned short __ATTRS_o_ai vec_perm(vector unsigned short __a, vector unsigned short __b, vector unsigned char __c) { #ifdef __LITTLE_ENDIAN__ vector unsigned char __d = {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}; __d = vec_xor(__c, __d); return (vector unsigned short)__builtin_altivec_vperm_4si( (vector int)__b, (vector int)__a, __d); #else return (vector unsigned short)__builtin_altivec_vperm_4si( (vector int)__a, (vector int)__b, __c); #endif } static __inline__ vector bool short __ATTRS_o_ai vec_perm( vector bool short __a, vector bool short __b, vector unsigned char __c) { #ifdef __LITTLE_ENDIAN__ vector unsigned char __d = {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}; __d = vec_xor(__c, __d); return (vector bool short)__builtin_altivec_vperm_4si((vector int)__b, (vector int)__a, __d); #else return (vector bool short)__builtin_altivec_vperm_4si((vector int)__a, (vector int)__b, __c); #endif } static __inline__ vector pixel __ATTRS_o_ai vec_perm(vector pixel __a, vector pixel __b, vector unsigned char __c) { #ifdef __LITTLE_ENDIAN__ vector unsigned char __d = {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}; __d = vec_xor(__c, __d); return (vector pixel)__builtin_altivec_vperm_4si((vector int)__b, (vector int)__a, __d); #else return (vector pixel)__builtin_altivec_vperm_4si((vector int)__a, (vector int)__b, __c); #endif } static __inline__ vector int __ATTRS_o_ai vec_perm(vector signed int __a, vector signed int __b, vector unsigned char __c) { #ifdef __LITTLE_ENDIAN__ vector unsigned char __d = {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}; __d = vec_xor(__c, __d); return (vector signed int)__builtin_altivec_vperm_4si(__b, __a, __d); #else return (vector signed int)__builtin_altivec_vperm_4si(__a, __b, __c); #endif } static __inline__ vector unsigned int __ATTRS_o_ai vec_perm(vector unsigned int __a, vector unsigned int __b, vector unsigned char __c) { #ifdef __LITTLE_ENDIAN__ vector unsigned char __d = {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}; __d = vec_xor(__c, __d); return (vector unsigned int)__builtin_altivec_vperm_4si((vector int)__b, (vector int)__a, __d); #else return (vector unsigned int)__builtin_altivec_vperm_4si((vector int)__a, (vector int)__b, __c); #endif } static __inline__ vector bool int __ATTRS_o_ai vec_perm(vector bool int __a, vector bool int __b, vector unsigned char __c) { #ifdef __LITTLE_ENDIAN__ vector unsigned char __d = {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}; __d = vec_xor(__c, __d); return (vector bool int)__builtin_altivec_vperm_4si((vector int)__b, (vector int)__a, __d); #else return (vector bool int)__builtin_altivec_vperm_4si((vector int)__a, (vector int)__b, __c); #endif } static __inline__ vector float __ATTRS_o_ai vec_perm(vector float __a, vector float __b, vector unsigned char __c) { #ifdef __LITTLE_ENDIAN__ vector unsigned char __d = {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}; __d = vec_xor(__c, __d); return (vector float)__builtin_altivec_vperm_4si((vector int)__b, (vector int)__a, __d); #else return (vector float)__builtin_altivec_vperm_4si((vector int)__a, (vector int)__b, __c); #endif } #ifdef __VSX__ static __inline__ vector long long __ATTRS_o_ai vec_perm(vector signed long long __a, vector signed long long __b, vector unsigned char __c) { #ifdef __LITTLE_ENDIAN__ vector unsigned char __d = {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}; __d = vec_xor(__c, __d); return (vector signed long long)__builtin_altivec_vperm_4si( (vector int)__b, (vector int)__a, __d); #else return (vector signed long long)__builtin_altivec_vperm_4si( (vector int)__a, (vector int)__b, __c); #endif } static __inline__ vector unsigned long long __ATTRS_o_ai vec_perm(vector unsigned long long __a, vector unsigned long long __b, vector unsigned char __c) { #ifdef __LITTLE_ENDIAN__ vector unsigned char __d = {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}; __d = vec_xor(__c, __d); return (vector unsigned long long)__builtin_altivec_vperm_4si( (vector int)__b, (vector int)__a, __d); #else return (vector unsigned long long)__builtin_altivec_vperm_4si( (vector int)__a, (vector int)__b, __c); #endif } static __inline__ vector bool long long __ATTRS_o_ai vec_perm(vector bool long long __a, vector bool long long __b, vector unsigned char __c) { #ifdef __LITTLE_ENDIAN__ vector unsigned char __d = {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}; __d = vec_xor(__c, __d); return (vector bool long long)__builtin_altivec_vperm_4si( (vector int)__b, (vector int)__a, __d); #else return (vector bool long long)__builtin_altivec_vperm_4si( (vector int)__a, (vector int)__b, __c); #endif } static __inline__ vector double __ATTRS_o_ai vec_perm(vector double __a, vector double __b, vector unsigned char __c) { #ifdef __LITTLE_ENDIAN__ vector unsigned char __d = {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}; __d = vec_xor(__c, __d); return (vector double)__builtin_altivec_vperm_4si((vector int)__b, (vector int)__a, __d); #else return (vector double)__builtin_altivec_vperm_4si((vector int)__a, (vector int)__b, __c); #endif } #endif /* vec_vperm */ static __inline__ vector signed char __ATTRS_o_ai vec_vperm( vector signed char __a, vector signed char __b, vector unsigned char __c) { return vec_perm(__a, __b, __c); } static __inline__ vector unsigned char __ATTRS_o_ai vec_vperm(vector unsigned char __a, vector unsigned char __b, vector unsigned char __c) { return vec_perm(__a, __b, __c); } static __inline__ vector bool char __ATTRS_o_ai vec_vperm( vector bool char __a, vector bool char __b, vector unsigned char __c) { return vec_perm(__a, __b, __c); } static __inline__ vector short __ATTRS_o_ai vec_vperm(vector short __a, vector short __b, vector unsigned char __c) { return vec_perm(__a, __b, __c); } static __inline__ vector unsigned short __ATTRS_o_ai vec_vperm(vector unsigned short __a, vector unsigned short __b, vector unsigned char __c) { return vec_perm(__a, __b, __c); } static __inline__ vector bool short __ATTRS_o_ai vec_vperm( vector bool short __a, vector bool short __b, vector unsigned char __c) { return vec_perm(__a, __b, __c); } static __inline__ vector pixel __ATTRS_o_ai vec_vperm(vector pixel __a, vector pixel __b, vector unsigned char __c) { return vec_perm(__a, __b, __c); } static __inline__ vector int __ATTRS_o_ai vec_vperm(vector int __a, vector int __b, vector unsigned char __c) { return vec_perm(__a, __b, __c); } static __inline__ vector unsigned int __ATTRS_o_ai vec_vperm(vector unsigned int __a, vector unsigned int __b, vector unsigned char __c) { return vec_perm(__a, __b, __c); } static __inline__ vector bool int __ATTRS_o_ai vec_vperm(vector bool int __a, vector bool int __b, vector unsigned char __c) { return vec_perm(__a, __b, __c); } static __inline__ vector float __ATTRS_o_ai vec_vperm(vector float __a, vector float __b, vector unsigned char __c) { return vec_perm(__a, __b, __c); } #ifdef __VSX__ static __inline__ vector long long __ATTRS_o_ai vec_vperm( vector long long __a, vector long long __b, vector unsigned char __c) { return vec_perm(__a, __b, __c); } static __inline__ vector unsigned long long __ATTRS_o_ai vec_vperm(vector unsigned long long __a, vector unsigned long long __b, vector unsigned char __c) { return vec_perm(__a, __b, __c); } static __inline__ vector double __ATTRS_o_ai vec_vperm(vector double __a, vector double __b, vector unsigned char __c) { return vec_perm(__a, __b, __c); } #endif /* vec_re */ static __inline__ vector float __ATTRS_o_ai vec_re(vector float __a) { #ifdef __VSX__ return __builtin_vsx_xvresp(__a); #else return __builtin_altivec_vrefp(__a); #endif } #ifdef __VSX__ static __inline__ vector double __ATTRS_o_ai vec_re(vector double __a) { return __builtin_vsx_xvredp(__a); } #endif /* vec_vrefp */ static __inline__ vector float __attribute__((__always_inline__)) vec_vrefp(vector float __a) { return __builtin_altivec_vrefp(__a); } /* vec_rl */ static __inline__ vector signed char __ATTRS_o_ai vec_rl(vector signed char __a, vector unsigned char __b) { return (vector signed char)__builtin_altivec_vrlb((vector char)__a, __b); } static __inline__ vector unsigned char __ATTRS_o_ai vec_rl(vector unsigned char __a, vector unsigned char __b) { return (vector unsigned char)__builtin_altivec_vrlb((vector char)__a, __b); } static __inline__ vector short __ATTRS_o_ai vec_rl(vector short __a, vector unsigned short __b) { return __builtin_altivec_vrlh(__a, __b); } static __inline__ vector unsigned short __ATTRS_o_ai vec_rl(vector unsigned short __a, vector unsigned short __b) { return (vector unsigned short)__builtin_altivec_vrlh((vector short)__a, __b); } static __inline__ vector int __ATTRS_o_ai vec_rl(vector int __a, vector unsigned int __b) { return __builtin_altivec_vrlw(__a, __b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_rl(vector unsigned int __a, vector unsigned int __b) { return (vector unsigned int)__builtin_altivec_vrlw((vector int)__a, __b); } #ifdef __POWER8_VECTOR__ static __inline__ vector signed long long __ATTRS_o_ai vec_rl(vector signed long long __a, vector unsigned long long __b) { return __builtin_altivec_vrld(__a, __b); } static __inline__ vector unsigned long long __ATTRS_o_ai vec_rl(vector unsigned long long __a, vector unsigned long long __b) { return (vector unsigned long long)__builtin_altivec_vrld( (vector long long)__a, __b); } #endif #if defined(__POWER10_VECTOR__) && defined(__SIZEOF_INT128__) static __inline__ vector signed __int128 __ATTRS_o_ai vec_rl(vector signed __int128 __a, vector unsigned __int128 __b) { return (vector signed __int128)(((vector unsigned __int128)__b << (vector unsigned __int128)__a) | ((vector unsigned __int128)__b >> ((__CHAR_BIT__ * sizeof(vector unsigned __int128)) - (vector unsigned __int128)__a))); } static __inline__ vector unsigned __int128 __ATTRS_o_ai vec_rl(vector unsigned __int128 __a, vector unsigned __int128 __b) { return (__b << __a)|(__b >> ((__CHAR_BIT__ * sizeof(vector unsigned __int128)) - __a)); } #endif /* vec_rlmi */ #ifdef __POWER9_VECTOR__ static __inline__ vector unsigned int __ATTRS_o_ai vec_rlmi(vector unsigned int __a, vector unsigned int __b, vector unsigned int __c) { return __builtin_altivec_vrlwmi(__a, __c, __b); } static __inline__ vector unsigned long long __ATTRS_o_ai vec_rlmi(vector unsigned long long __a, vector unsigned long long __b, vector unsigned long long __c) { return __builtin_altivec_vrldmi(__a, __c, __b); } #endif #if defined(__POWER10_VECTOR__) && defined(__SIZEOF_INT128__) static __inline__ vector unsigned __int128 __ATTRS_o_ai vec_rlmi(vector unsigned __int128 __a, vector unsigned __int128 __b, vector unsigned __int128 __c) { return __builtin_altivec_vrlqmi(__a, __c, __b); } static __inline__ vector signed __int128 __ATTRS_o_ai vec_rlmi(vector signed __int128 __a, vector signed __int128 __b, vector signed __int128 __c) { return (vector signed __int128)__builtin_altivec_vrlqmi( (vector unsigned __int128)__a, (vector unsigned __int128)__c, (vector unsigned __int128)__b); } #endif /* vec_rlnm */ #ifdef __POWER9_VECTOR__ static __inline__ vector unsigned int __ATTRS_o_ai vec_rlnm(vector unsigned int __a, vector unsigned int __b, vector unsigned int __c) { vector unsigned int OneByte = { 0x8, 0x8, 0x8, 0x8 }; return __builtin_altivec_vrlwnm(__a, ((__c << OneByte) | __b)); } static __inline__ vector unsigned long long __ATTRS_o_ai vec_rlnm(vector unsigned long long __a, vector unsigned long long __b, vector unsigned long long __c) { vector unsigned long long OneByte = { 0x8, 0x8 }; return __builtin_altivec_vrldnm(__a, ((__c << OneByte) | __b)); } #endif #if defined(__POWER10_VECTOR__) && defined(__SIZEOF_INT128__) static __inline__ vector unsigned __int128 __ATTRS_o_ai vec_rlnm(vector unsigned __int128 __a, vector unsigned __int128 __b, vector unsigned __int128 __c) { // Merge __b and __c using an appropriate shuffle. vector unsigned char TmpB = (vector unsigned char)__b; vector unsigned char TmpC = (vector unsigned char)__c; vector unsigned char MaskAndShift = #ifdef __LITTLE_ENDIAN__ __builtin_shufflevector(TmpB, TmpC, -1, -1, -1, -1, -1, -1, -1, -1, 16, 0, 1, -1, -1, -1, -1, -1); #else __builtin_shufflevector(TmpB, TmpC, -1, -1, -1, -1, -1, 31, 30, 15, -1, -1, -1, -1, -1, -1, -1, -1); #endif return __builtin_altivec_vrlqnm(__a, (vector unsigned __int128) MaskAndShift); } static __inline__ vector signed __int128 __ATTRS_o_ai vec_rlnm(vector signed __int128 __a, vector signed __int128 __b, vector signed __int128 __c) { // Merge __b and __c using an appropriate shuffle. vector unsigned char TmpB = (vector unsigned char)__b; vector unsigned char TmpC = (vector unsigned char)__c; vector unsigned char MaskAndShift = #ifdef __LITTLE_ENDIAN__ __builtin_shufflevector(TmpB, TmpC, -1, -1, -1, -1, -1, -1, -1, -1, 16, 0, 1, -1, -1, -1, -1, -1); #else __builtin_shufflevector(TmpB, TmpC, -1, -1, -1, -1, -1, 31, 30, 15, -1, -1, -1, -1, -1, -1, -1, -1); #endif return (vector signed __int128)__builtin_altivec_vrlqnm( (vector unsigned __int128)__a, (vector unsigned __int128)MaskAndShift); } #endif /* vec_vrlb */ static __inline__ vector signed char __ATTRS_o_ai vec_vrlb(vector signed char __a, vector unsigned char __b) { return (vector signed char)__builtin_altivec_vrlb((vector char)__a, __b); } static __inline__ vector unsigned char __ATTRS_o_ai vec_vrlb(vector unsigned char __a, vector unsigned char __b) { return (vector unsigned char)__builtin_altivec_vrlb((vector char)__a, __b); } /* vec_vrlh */ static __inline__ vector short __ATTRS_o_ai vec_vrlh(vector short __a, vector unsigned short __b) { return __builtin_altivec_vrlh(__a, __b); } static __inline__ vector unsigned short __ATTRS_o_ai vec_vrlh(vector unsigned short __a, vector unsigned short __b) { return (vector unsigned short)__builtin_altivec_vrlh((vector short)__a, __b); } /* vec_vrlw */ static __inline__ vector int __ATTRS_o_ai vec_vrlw(vector int __a, vector unsigned int __b) { return __builtin_altivec_vrlw(__a, __b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_vrlw(vector unsigned int __a, vector unsigned int __b) { return (vector unsigned int)__builtin_altivec_vrlw((vector int)__a, __b); } /* vec_round */ static __inline__ vector float __ATTRS_o_ai vec_round(vector float __a) { return __builtin_altivec_vrfin(__a); } #ifdef __VSX__ #ifdef __XL_COMPAT_ALTIVEC__ static __inline__ vector double __ATTRS_o_ai vec_rint(vector double __a); static __inline__ vector double __ATTRS_o_ai vec_round(vector double __a) { double __fpscr = __builtin_readflm(); __builtin_setrnd(0); vector double __rounded = vec_rint(__a); __builtin_setflm(__fpscr); return __rounded; } #else static __inline__ vector double __ATTRS_o_ai vec_round(vector double __a) { return __builtin_vsx_xvrdpi(__a); } #endif /* vec_rint */ static __inline__ vector float __ATTRS_o_ai vec_rint(vector float __a) { return __builtin_vsx_xvrspic(__a); } static __inline__ vector double __ATTRS_o_ai vec_rint(vector double __a) { return __builtin_vsx_xvrdpic(__a); } /* vec_roundc */ static __inline__ vector float __ATTRS_o_ai vec_roundc(vector float __a) { return __builtin_vsx_xvrspic(__a); } static __inline__ vector double __ATTRS_o_ai vec_roundc(vector double __a) { return __builtin_vsx_xvrdpic(__a); } /* vec_nearbyint */ static __inline__ vector float __ATTRS_o_ai vec_nearbyint(vector float __a) { return __builtin_vsx_xvrspi(__a); } static __inline__ vector double __ATTRS_o_ai vec_nearbyint(vector double __a) { return __builtin_vsx_xvrdpi(__a); } #endif /* vec_vrfin */ static __inline__ vector float __attribute__((__always_inline__)) vec_vrfin(vector float __a) { return __builtin_altivec_vrfin(__a); } /* vec_sqrt */ #ifdef __VSX__ static __inline__ vector float __ATTRS_o_ai vec_sqrt(vector float __a) { return __builtin_vsx_xvsqrtsp(__a); } static __inline__ vector double __ATTRS_o_ai vec_sqrt(vector double __a) { return __builtin_vsx_xvsqrtdp(__a); } #endif /* vec_rsqrte */ static __inline__ vector float __ATTRS_o_ai vec_rsqrte(vector float __a) { #ifdef __VSX__ return __builtin_vsx_xvrsqrtesp(__a); #else return __builtin_altivec_vrsqrtefp(__a); #endif } #ifdef __VSX__ static __inline__ vector double __ATTRS_o_ai vec_rsqrte(vector double __a) { return __builtin_vsx_xvrsqrtedp(__a); } #endif static vector float __ATTRS_o_ai vec_rsqrt(vector float __a) { return __builtin_ppc_rsqrtf(__a); } #ifdef __VSX__ static vector double __ATTRS_o_ai vec_rsqrt(vector double __a) { return __builtin_ppc_rsqrtd(__a); } #endif /* vec_vrsqrtefp */ static __inline__ __vector float __attribute__((__always_inline__)) vec_vrsqrtefp(vector float __a) { return __builtin_altivec_vrsqrtefp(__a); } /* vec_xvtsqrt */ #ifdef __VSX__ static __inline__ int __ATTRS_o_ai vec_test_swsqrt(vector double __a) { return __builtin_vsx_xvtsqrtdp(__a); } static __inline__ int __ATTRS_o_ai vec_test_swsqrts(vector float __a) { return __builtin_vsx_xvtsqrtsp(__a); } #endif /* vec_sel */ #define __builtin_altivec_vsel_4si vec_sel static __inline__ vector signed char __ATTRS_o_ai vec_sel( vector signed char __a, vector signed char __b, vector unsigned char __c) { return (__a & ~(vector signed char)__c) | (__b & (vector signed char)__c); } static __inline__ vector signed char __ATTRS_o_ai vec_sel(vector signed char __a, vector signed char __b, vector bool char __c) { return (__a & ~(vector signed char)__c) | (__b & (vector signed char)__c); } static __inline__ vector unsigned char __ATTRS_o_ai vec_sel(vector unsigned char __a, vector unsigned char __b, vector unsigned char __c) { return (__a & ~__c) | (__b & __c); } static __inline__ vector unsigned char __ATTRS_o_ai vec_sel( vector unsigned char __a, vector unsigned char __b, vector bool char __c) { return (__a & ~(vector unsigned char)__c) | (__b & (vector unsigned char)__c); } static __inline__ vector bool char __ATTRS_o_ai vec_sel(vector bool char __a, vector bool char __b, vector unsigned char __c) { return (__a & ~(vector bool char)__c) | (__b & (vector bool char)__c); } static __inline__ vector bool char __ATTRS_o_ai vec_sel(vector bool char __a, vector bool char __b, vector bool char __c) { return (__a & ~__c) | (__b & __c); } static __inline__ vector short __ATTRS_o_ai vec_sel(vector short __a, vector short __b, vector unsigned short __c) { return (__a & ~(vector short)__c) | (__b & (vector short)__c); } static __inline__ vector short __ATTRS_o_ai vec_sel(vector short __a, vector short __b, vector bool short __c) { return (__a & ~(vector short)__c) | (__b & (vector short)__c); } static __inline__ vector unsigned short __ATTRS_o_ai vec_sel(vector unsigned short __a, vector unsigned short __b, vector unsigned short __c) { return (__a & ~__c) | (__b & __c); } static __inline__ vector unsigned short __ATTRS_o_ai vec_sel(vector unsigned short __a, vector unsigned short __b, vector bool short __c) { return (__a & ~(vector unsigned short)__c) | (__b & (vector unsigned short)__c); } static __inline__ vector bool short __ATTRS_o_ai vec_sel( vector bool short __a, vector bool short __b, vector unsigned short __c) { return (__a & ~(vector bool short)__c) | (__b & (vector bool short)__c); } static __inline__ vector bool short __ATTRS_o_ai vec_sel(vector bool short __a, vector bool short __b, vector bool short __c) { return (__a & ~__c) | (__b & __c); } static __inline__ vector int __ATTRS_o_ai vec_sel(vector int __a, vector int __b, vector unsigned int __c) { return (__a & ~(vector int)__c) | (__b & (vector int)__c); } static __inline__ vector int __ATTRS_o_ai vec_sel(vector int __a, vector int __b, vector bool int __c) { return (__a & ~(vector int)__c) | (__b & (vector int)__c); } static __inline__ vector unsigned int __ATTRS_o_ai vec_sel( vector unsigned int __a, vector unsigned int __b, vector unsigned int __c) { return (__a & ~__c) | (__b & __c); } static __inline__ vector unsigned int __ATTRS_o_ai vec_sel(vector unsigned int __a, vector unsigned int __b, vector bool int __c) { return (__a & ~(vector unsigned int)__c) | (__b & (vector unsigned int)__c); } static __inline__ vector bool int __ATTRS_o_ai vec_sel(vector bool int __a, vector bool int __b, vector unsigned int __c) { return (__a & ~(vector bool int)__c) | (__b & (vector bool int)__c); } static __inline__ vector bool int __ATTRS_o_ai vec_sel(vector bool int __a, vector bool int __b, vector bool int __c) { return (__a & ~__c) | (__b & __c); } static __inline__ vector float __ATTRS_o_ai vec_sel(vector float __a, vector float __b, vector unsigned int __c) { vector int __res = ((vector int)__a & ~(vector int)__c) | ((vector int)__b & (vector int)__c); return (vector float)__res; } static __inline__ vector float __ATTRS_o_ai vec_sel(vector float __a, vector float __b, vector bool int __c) { vector int __res = ((vector int)__a & ~(vector int)__c) | ((vector int)__b & (vector int)__c); return (vector float)__res; } #ifdef __VSX__ static __inline__ vector double __ATTRS_o_ai vec_sel(vector double __a, vector double __b, vector bool long long __c) { vector long long __res = ((vector long long)__a & ~(vector long long)__c) | ((vector long long)__b & (vector long long)__c); return (vector double)__res; } static __inline__ vector double __ATTRS_o_ai vec_sel(vector double __a, vector double __b, vector unsigned long long __c) { vector long long __res = ((vector long long)__a & ~(vector long long)__c) | ((vector long long)__b & (vector long long)__c); return (vector double)__res; } static __inline__ vector bool long long __ATTRS_o_ai vec_sel(vector bool long long __a, vector bool long long __b, vector bool long long __c) { return (__a & ~__c) | (__b & __c); } static __inline__ vector bool long long __ATTRS_o_ai vec_sel(vector bool long long __a, vector bool long long __b, vector unsigned long long __c) { return (__a & ~(vector bool long long)__c) | (__b & (vector bool long long)__c); } static __inline__ vector signed long long __ATTRS_o_ai vec_sel(vector signed long long __a, vector signed long long __b, vector bool long long __c) { return (__a & ~(vector signed long long)__c) | (__b & (vector signed long long)__c); } static __inline__ vector signed long long __ATTRS_o_ai vec_sel(vector signed long long __a, vector signed long long __b, vector unsigned long long __c) { return (__a & ~(vector signed long long)__c) | (__b & (vector signed long long)__c); } static __inline__ vector unsigned long long __ATTRS_o_ai vec_sel(vector unsigned long long __a, vector unsigned long long __b, vector bool long long __c) { return (__a & ~(vector unsigned long long)__c) | (__b & (vector unsigned long long)__c); } static __inline__ vector unsigned long long __ATTRS_o_ai vec_sel(vector unsigned long long __a, vector unsigned long long __b, vector unsigned long long __c) { return (__a & ~__c) | (__b & __c); } #endif /* vec_vsel */ static __inline__ vector signed char __ATTRS_o_ai vec_vsel( vector signed char __a, vector signed char __b, vector unsigned char __c) { return (__a & ~(vector signed char)__c) | (__b & (vector signed char)__c); } static __inline__ vector signed char __ATTRS_o_ai vec_vsel(vector signed char __a, vector signed char __b, vector bool char __c) { return (__a & ~(vector signed char)__c) | (__b & (vector signed char)__c); } static __inline__ vector unsigned char __ATTRS_o_ai vec_vsel(vector unsigned char __a, vector unsigned char __b, vector unsigned char __c) { return (__a & ~__c) | (__b & __c); } static __inline__ vector unsigned char __ATTRS_o_ai vec_vsel( vector unsigned char __a, vector unsigned char __b, vector bool char __c) { return (__a & ~(vector unsigned char)__c) | (__b & (vector unsigned char)__c); } static __inline__ vector bool char __ATTRS_o_ai vec_vsel(vector bool char __a, vector bool char __b, vector unsigned char __c) { return (__a & ~(vector bool char)__c) | (__b & (vector bool char)__c); } static __inline__ vector bool char __ATTRS_o_ai vec_vsel(vector bool char __a, vector bool char __b, vector bool char __c) { return (__a & ~__c) | (__b & __c); } static __inline__ vector short __ATTRS_o_ai vec_vsel(vector short __a, vector short __b, vector unsigned short __c) { return (__a & ~(vector short)__c) | (__b & (vector short)__c); } static __inline__ vector short __ATTRS_o_ai vec_vsel(vector short __a, vector short __b, vector bool short __c) { return (__a & ~(vector short)__c) | (__b & (vector short)__c); } static __inline__ vector unsigned short __ATTRS_o_ai vec_vsel(vector unsigned short __a, vector unsigned short __b, vector unsigned short __c) { return (__a & ~__c) | (__b & __c); } static __inline__ vector unsigned short __ATTRS_o_ai vec_vsel(vector unsigned short __a, vector unsigned short __b, vector bool short __c) { return (__a & ~(vector unsigned short)__c) | (__b & (vector unsigned short)__c); } static __inline__ vector bool short __ATTRS_o_ai vec_vsel( vector bool short __a, vector bool short __b, vector unsigned short __c) { return (__a & ~(vector bool short)__c) | (__b & (vector bool short)__c); } static __inline__ vector bool short __ATTRS_o_ai vec_vsel(vector bool short __a, vector bool short __b, vector bool short __c) { return (__a & ~__c) | (__b & __c); } static __inline__ vector int __ATTRS_o_ai vec_vsel(vector int __a, vector int __b, vector unsigned int __c) { return (__a & ~(vector int)__c) | (__b & (vector int)__c); } static __inline__ vector int __ATTRS_o_ai vec_vsel(vector int __a, vector int __b, vector bool int __c) { return (__a & ~(vector int)__c) | (__b & (vector int)__c); } static __inline__ vector unsigned int __ATTRS_o_ai vec_vsel( vector unsigned int __a, vector unsigned int __b, vector unsigned int __c) { return (__a & ~__c) | (__b & __c); } static __inline__ vector unsigned int __ATTRS_o_ai vec_vsel( vector unsigned int __a, vector unsigned int __b, vector bool int __c) { return (__a & ~(vector unsigned int)__c) | (__b & (vector unsigned int)__c); } static __inline__ vector bool int __ATTRS_o_ai vec_vsel(vector bool int __a, vector bool int __b, vector unsigned int __c) { return (__a & ~(vector bool int)__c) | (__b & (vector bool int)__c); } static __inline__ vector bool int __ATTRS_o_ai vec_vsel(vector bool int __a, vector bool int __b, vector bool int __c) { return (__a & ~__c) | (__b & __c); } static __inline__ vector float __ATTRS_o_ai vec_vsel(vector float __a, vector float __b, vector unsigned int __c) { vector int __res = ((vector int)__a & ~(vector int)__c) | ((vector int)__b & (vector int)__c); return (vector float)__res; } static __inline__ vector float __ATTRS_o_ai vec_vsel(vector float __a, vector float __b, vector bool int __c) { vector int __res = ((vector int)__a & ~(vector int)__c) | ((vector int)__b & (vector int)__c); return (vector float)__res; } /* vec_sl */ // vec_sl does modulo arithmetic on __b first, so __b is allowed to be more // than the length of __a. static __inline__ vector unsigned char __ATTRS_o_ai vec_sl(vector unsigned char __a, vector unsigned char __b) { return __a << (__b % (vector unsigned char)(sizeof(unsigned char) * __CHAR_BIT__)); } static __inline__ vector signed char __ATTRS_o_ai vec_sl(vector signed char __a, vector unsigned char __b) { return (vector signed char)vec_sl((vector unsigned char)__a, __b); } static __inline__ vector unsigned short __ATTRS_o_ai vec_sl(vector unsigned short __a, vector unsigned short __b) { return __a << (__b % (vector unsigned short)(sizeof(unsigned short) * __CHAR_BIT__)); } static __inline__ vector short __ATTRS_o_ai vec_sl(vector short __a, vector unsigned short __b) { return (vector short)vec_sl((vector unsigned short)__a, __b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_sl(vector unsigned int __a, vector unsigned int __b) { return __a << (__b % (vector unsigned int)(sizeof(unsigned int) * __CHAR_BIT__)); } static __inline__ vector int __ATTRS_o_ai vec_sl(vector int __a, vector unsigned int __b) { return (vector int)vec_sl((vector unsigned int)__a, __b); } #ifdef __POWER8_VECTOR__ static __inline__ vector unsigned long long __ATTRS_o_ai vec_sl(vector unsigned long long __a, vector unsigned long long __b) { return __a << (__b % (vector unsigned long long)(sizeof(unsigned long long) * __CHAR_BIT__)); } static __inline__ vector long long __ATTRS_o_ai vec_sl(vector long long __a, vector unsigned long long __b) { return (vector long long)vec_sl((vector unsigned long long)__a, __b); } #elif defined(__VSX__) static __inline__ vector unsigned char __ATTRS_o_ai vec_vspltb(vector unsigned char __a, unsigned char __b); static __inline__ vector unsigned long long __ATTRS_o_ai vec_sl(vector unsigned long long __a, vector unsigned long long __b) { __b %= (vector unsigned long long)(sizeof(unsigned long long) * __CHAR_BIT__); // Big endian element one (the right doubleword) can be left shifted as-is. // The other element needs to be swapped into the right doubleword and // shifted. Then the right doublewords of the two result vectors are merged. vector signed long long __rightelt = (vector signed long long)__builtin_altivec_vslo((vector signed int)__a, (vector signed int)__b); #ifdef __LITTLE_ENDIAN__ __rightelt = (vector signed long long)__builtin_altivec_vsl( (vector signed int)__rightelt, (vector signed int)vec_vspltb((vector unsigned char)__b, 0)); #else __rightelt = (vector signed long long)__builtin_altivec_vsl( (vector signed int)__rightelt, (vector signed int)vec_vspltb((vector unsigned char)__b, 15)); #endif __a = __builtin_shufflevector(__a, __a, 1, 0); __b = __builtin_shufflevector(__b, __b, 1, 0); vector signed long long __leftelt = (vector signed long long)__builtin_altivec_vslo((vector signed int)__a, (vector signed int)__b); #ifdef __LITTLE_ENDIAN__ __leftelt = (vector signed long long)__builtin_altivec_vsl( (vector signed int)__leftelt, (vector signed int)vec_vspltb((vector unsigned char)__b, 0)); return (vector unsigned long long)__builtin_shufflevector(__rightelt, __leftelt, 0, 2); #else __leftelt = (vector signed long long)__builtin_altivec_vsl( (vector signed int)__leftelt, (vector signed int)vec_vspltb((vector unsigned char)__b, 15)); return (vector unsigned long long)__builtin_shufflevector(__leftelt, __rightelt, 1, 3); #endif } static __inline__ vector long long __ATTRS_o_ai vec_sl(vector long long __a, vector unsigned long long __b) { return (vector long long)vec_sl((vector unsigned long long)__a, __b); } #endif /* __VSX__ */ /* vec_vslb */ #define __builtin_altivec_vslb vec_vslb static __inline__ vector signed char __ATTRS_o_ai vec_vslb(vector signed char __a, vector unsigned char __b) { return vec_sl(__a, __b); } static __inline__ vector unsigned char __ATTRS_o_ai vec_vslb(vector unsigned char __a, vector unsigned char __b) { return vec_sl(__a, __b); } /* vec_vslh */ #define __builtin_altivec_vslh vec_vslh static __inline__ vector short __ATTRS_o_ai vec_vslh(vector short __a, vector unsigned short __b) { return vec_sl(__a, __b); } static __inline__ vector unsigned short __ATTRS_o_ai vec_vslh(vector unsigned short __a, vector unsigned short __b) { return vec_sl(__a, __b); } /* vec_vslw */ #define __builtin_altivec_vslw vec_vslw static __inline__ vector int __ATTRS_o_ai vec_vslw(vector int __a, vector unsigned int __b) { return vec_sl(__a, __b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_vslw(vector unsigned int __a, vector unsigned int __b) { return vec_sl(__a, __b); } /* vec_sld */ #define __builtin_altivec_vsldoi_4si vec_sld static __inline__ vector signed char __ATTRS_o_ai vec_sld( vector signed char __a, vector signed char __b, unsigned const int __c) { unsigned char __d = __c & 0x0F; #ifdef __LITTLE_ENDIAN__ return vec_perm( __b, __a, (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d, 20 - __d, 21 - __d, 22 - __d, 23 - __d, 24 - __d, 25 - __d, 26 - __d, 27 - __d, 28 - __d, 29 - __d, 30 - __d, 31 - __d)); #else return vec_perm( __a, __b, (vector unsigned char)(__d, __d + 1, __d + 2, __d + 3, __d + 4, __d + 5, __d + 6, __d + 7, __d + 8, __d + 9, __d + 10, __d + 11, __d + 12, __d + 13, __d + 14, __d + 15)); #endif } static __inline__ vector unsigned char __ATTRS_o_ai vec_sld(vector unsigned char __a, vector unsigned char __b, unsigned const int __c) { unsigned char __d = __c & 0x0F; #ifdef __LITTLE_ENDIAN__ return vec_perm( __b, __a, (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d, 20 - __d, 21 - __d, 22 - __d, 23 - __d, 24 - __d, 25 - __d, 26 - __d, 27 - __d, 28 - __d, 29 - __d, 30 - __d, 31 - __d)); #else return vec_perm( __a, __b, (vector unsigned char)(__d, __d + 1, __d + 2, __d + 3, __d + 4, __d + 5, __d + 6, __d + 7, __d + 8, __d + 9, __d + 10, __d + 11, __d + 12, __d + 13, __d + 14, __d + 15)); #endif } static __inline__ vector bool char __ATTRS_o_ai vec_sld(vector bool char __a, vector bool char __b, unsigned const int __c) { unsigned char __d = __c & 0x0F; #ifdef __LITTLE_ENDIAN__ return vec_perm( __b, __a, (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d, 20 - __d, 21 - __d, 22 - __d, 23 - __d, 24 - __d, 25 - __d, 26 - __d, 27 - __d, 28 - __d, 29 - __d, 30 - __d, 31 - __d)); #else return vec_perm( __a, __b, (vector unsigned char)(__d, __d + 1, __d + 2, __d + 3, __d + 4, __d + 5, __d + 6, __d + 7, __d + 8, __d + 9, __d + 10, __d + 11, __d + 12, __d + 13, __d + 14, __d + 15)); #endif } static __inline__ vector signed short __ATTRS_o_ai vec_sld( vector signed short __a, vector signed short __b, unsigned const int __c) { unsigned char __d = __c & 0x0F; #ifdef __LITTLE_ENDIAN__ return vec_perm( __b, __a, (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d, 20 - __d, 21 - __d, 22 - __d, 23 - __d, 24 - __d, 25 - __d, 26 - __d, 27 - __d, 28 - __d, 29 - __d, 30 - __d, 31 - __d)); #else return vec_perm( __a, __b, (vector unsigned char)(__d, __d + 1, __d + 2, __d + 3, __d + 4, __d + 5, __d + 6, __d + 7, __d + 8, __d + 9, __d + 10, __d + 11, __d + 12, __d + 13, __d + 14, __d + 15)); #endif } static __inline__ vector unsigned short __ATTRS_o_ai vec_sld(vector unsigned short __a, vector unsigned short __b, unsigned const int __c) { unsigned char __d = __c & 0x0F; #ifdef __LITTLE_ENDIAN__ return vec_perm( __b, __a, (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d, 20 - __d, 21 - __d, 22 - __d, 23 - __d, 24 - __d, 25 - __d, 26 - __d, 27 - __d, 28 - __d, 29 - __d, 30 - __d, 31 - __d)); #else return vec_perm( __a, __b, (vector unsigned char)(__d, __d + 1, __d + 2, __d + 3, __d + 4, __d + 5, __d + 6, __d + 7, __d + 8, __d + 9, __d + 10, __d + 11, __d + 12, __d + 13, __d + 14, __d + 15)); #endif } static __inline__ vector bool short __ATTRS_o_ai vec_sld(vector bool short __a, vector bool short __b, unsigned const int __c) { unsigned char __d = __c & 0x0F; #ifdef __LITTLE_ENDIAN__ return vec_perm( __b, __a, (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d, 20 - __d, 21 - __d, 22 - __d, 23 - __d, 24 - __d, 25 - __d, 26 - __d, 27 - __d, 28 - __d, 29 - __d, 30 - __d, 31 - __d)); #else return vec_perm( __a, __b, (vector unsigned char)(__d, __d + 1, __d + 2, __d + 3, __d + 4, __d + 5, __d + 6, __d + 7, __d + 8, __d + 9, __d + 10, __d + 11, __d + 12, __d + 13, __d + 14, __d + 15)); #endif } static __inline__ vector pixel __ATTRS_o_ai vec_sld(vector pixel __a, vector pixel __b, unsigned const int __c) { unsigned char __d = __c & 0x0F; #ifdef __LITTLE_ENDIAN__ return vec_perm( __b, __a, (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d, 20 - __d, 21 - __d, 22 - __d, 23 - __d, 24 - __d, 25 - __d, 26 - __d, 27 - __d, 28 - __d, 29 - __d, 30 - __d, 31 - __d)); #else return vec_perm( __a, __b, (vector unsigned char)(__d, __d + 1, __d + 2, __d + 3, __d + 4, __d + 5, __d + 6, __d + 7, __d + 8, __d + 9, __d + 10, __d + 11, __d + 12, __d + 13, __d + 14, __d + 15)); #endif } static __inline__ vector signed int __ATTRS_o_ai vec_sld(vector signed int __a, vector signed int __b, unsigned const int __c) { unsigned char __d = __c & 0x0F; #ifdef __LITTLE_ENDIAN__ return vec_perm( __b, __a, (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d, 20 - __d, 21 - __d, 22 - __d, 23 - __d, 24 - __d, 25 - __d, 26 - __d, 27 - __d, 28 - __d, 29 - __d, 30 - __d, 31 - __d)); #else return vec_perm( __a, __b, (vector unsigned char)(__d, __d + 1, __d + 2, __d + 3, __d + 4, __d + 5, __d + 6, __d + 7, __d + 8, __d + 9, __d + 10, __d + 11, __d + 12, __d + 13, __d + 14, __d + 15)); #endif } static __inline__ vector unsigned int __ATTRS_o_ai vec_sld( vector unsigned int __a, vector unsigned int __b, unsigned const int __c) { unsigned char __d = __c & 0x0F; #ifdef __LITTLE_ENDIAN__ return vec_perm( __b, __a, (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d, 20 - __d, 21 - __d, 22 - __d, 23 - __d, 24 - __d, 25 - __d, 26 - __d, 27 - __d, 28 - __d, 29 - __d, 30 - __d, 31 - __d)); #else return vec_perm( __a, __b, (vector unsigned char)(__d, __d + 1, __d + 2, __d + 3, __d + 4, __d + 5, __d + 6, __d + 7, __d + 8, __d + 9, __d + 10, __d + 11, __d + 12, __d + 13, __d + 14, __d + 15)); #endif } static __inline__ vector bool int __ATTRS_o_ai vec_sld(vector bool int __a, vector bool int __b, unsigned const int __c) { unsigned char __d = __c & 0x0F; #ifdef __LITTLE_ENDIAN__ return vec_perm( __b, __a, (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d, 20 - __d, 21 - __d, 22 - __d, 23 - __d, 24 - __d, 25 - __d, 26 - __d, 27 - __d, 28 - __d, 29 - __d, 30 - __d, 31 - __d)); #else return vec_perm( __a, __b, (vector unsigned char)(__d, __d + 1, __d + 2, __d + 3, __d + 4, __d + 5, __d + 6, __d + 7, __d + 8, __d + 9, __d + 10, __d + 11, __d + 12, __d + 13, __d + 14, __d + 15)); #endif } static __inline__ vector float __ATTRS_o_ai vec_sld(vector float __a, vector float __b, unsigned const int __c) { unsigned char __d = __c & 0x0F; #ifdef __LITTLE_ENDIAN__ return vec_perm( __b, __a, (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d, 20 - __d, 21 - __d, 22 - __d, 23 - __d, 24 - __d, 25 - __d, 26 - __d, 27 - __d, 28 - __d, 29 - __d, 30 - __d, 31 - __d)); #else return vec_perm( __a, __b, (vector unsigned char)(__d, __d + 1, __d + 2, __d + 3, __d + 4, __d + 5, __d + 6, __d + 7, __d + 8, __d + 9, __d + 10, __d + 11, __d + 12, __d + 13, __d + 14, __d + 15)); #endif } #ifdef __VSX__ static __inline__ vector bool long long __ATTRS_o_ai vec_sld(vector bool long long __a, vector bool long long __b, unsigned const int __c) { unsigned char __d = __c & 0x0F; #ifdef __LITTLE_ENDIAN__ return vec_perm( __b, __a, (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d, 20 - __d, 21 - __d, 22 - __d, 23 - __d, 24 - __d, 25 - __d, 26 - __d, 27 - __d, 28 - __d, 29 - __d, 30 - __d, 31 - __d)); #else return vec_perm( __a, __b, (vector unsigned char)(__d, __d + 1, __d + 2, __d + 3, __d + 4, __d + 5, __d + 6, __d + 7, __d + 8, __d + 9, __d + 10, __d + 11, __d + 12, __d + 13, __d + 14, __d + 15)); #endif } static __inline__ vector signed long long __ATTRS_o_ai vec_sld(vector signed long long __a, vector signed long long __b, unsigned const int __c) { unsigned char __d = __c & 0x0F; #ifdef __LITTLE_ENDIAN__ return vec_perm( __b, __a, (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d, 20 - __d, 21 - __d, 22 - __d, 23 - __d, 24 - __d, 25 - __d, 26 - __d, 27 - __d, 28 - __d, 29 - __d, 30 - __d, 31 - __d)); #else return vec_perm( __a, __b, (vector unsigned char)(__d, __d + 1, __d + 2, __d + 3, __d + 4, __d + 5, __d + 6, __d + 7, __d + 8, __d + 9, __d + 10, __d + 11, __d + 12, __d + 13, __d + 14, __d + 15)); #endif } static __inline__ vector unsigned long long __ATTRS_o_ai vec_sld(vector unsigned long long __a, vector unsigned long long __b, unsigned const int __c) { unsigned char __d = __c & 0x0F; #ifdef __LITTLE_ENDIAN__ return vec_perm( __b, __a, (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d, 20 - __d, 21 - __d, 22 - __d, 23 - __d, 24 - __d, 25 - __d, 26 - __d, 27 - __d, 28 - __d, 29 - __d, 30 - __d, 31 - __d)); #else return vec_perm( __a, __b, (vector unsigned char)(__d, __d + 1, __d + 2, __d + 3, __d + 4, __d + 5, __d + 6, __d + 7, __d + 8, __d + 9, __d + 10, __d + 11, __d + 12, __d + 13, __d + 14, __d + 15)); #endif } static __inline__ vector double __ATTRS_o_ai vec_sld(vector double __a, vector double __b, unsigned const int __c) { unsigned char __d = __c & 0x0F; #ifdef __LITTLE_ENDIAN__ return vec_perm( __b, __a, (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d, 20 - __d, 21 - __d, 22 - __d, 23 - __d, 24 - __d, 25 - __d, 26 - __d, 27 - __d, 28 - __d, 29 - __d, 30 - __d, 31 - __d)); #else return vec_perm( __a, __b, (vector unsigned char)(__d, __d + 1, __d + 2, __d + 3, __d + 4, __d + 5, __d + 6, __d + 7, __d + 8, __d + 9, __d + 10, __d + 11, __d + 12, __d + 13, __d + 14, __d + 15)); #endif } #endif /* vec_sldw */ static __inline__ vector signed char __ATTRS_o_ai vec_sldw( vector signed char __a, vector signed char __b, unsigned const int __c) { return vec_sld(__a, __b, ((__c << 2) & 0x0F)); } static __inline__ vector unsigned char __ATTRS_o_ai vec_sldw(vector unsigned char __a, vector unsigned char __b, unsigned const int __c) { return vec_sld(__a, __b, ((__c << 2) & 0x0F)); } static __inline__ vector signed short __ATTRS_o_ai vec_sldw( vector signed short __a, vector signed short __b, unsigned const int __c) { return vec_sld(__a, __b, ((__c << 2) & 0x0F)); } static __inline__ vector unsigned short __ATTRS_o_ai vec_sldw(vector unsigned short __a, vector unsigned short __b, unsigned const int __c) { return vec_sld(__a, __b, ((__c << 2) & 0x0F)); } static __inline__ vector signed int __ATTRS_o_ai vec_sldw(vector signed int __a, vector signed int __b, unsigned const int __c) { return vec_sld(__a, __b, ((__c << 2) & 0x0F)); } static __inline__ vector unsigned int __ATTRS_o_ai vec_sldw( vector unsigned int __a, vector unsigned int __b, unsigned const int __c) { return vec_sld(__a, __b, ((__c << 2) & 0x0F)); } static __inline__ vector float __ATTRS_o_ai vec_sldw( vector float __a, vector float __b, unsigned const int __c) { return vec_sld(__a, __b, ((__c << 2) & 0x0F)); } #ifdef __VSX__ static __inline__ vector signed long long __ATTRS_o_ai vec_sldw(vector signed long long __a, vector signed long long __b, unsigned const int __c) { return vec_sld(__a, __b, ((__c << 2) & 0x0F)); } static __inline__ vector unsigned long long __ATTRS_o_ai vec_sldw(vector unsigned long long __a, vector unsigned long long __b, unsigned const int __c) { return vec_sld(__a, __b, ((__c << 2) & 0x0F)); } static __inline__ vector double __ATTRS_o_ai vec_sldw( vector double __a, vector double __b, unsigned const int __c) { return vec_sld(__a, __b, ((__c << 2) & 0x0F)); } #endif #ifdef __POWER9_VECTOR__ /* vec_slv */ static __inline__ vector unsigned char __ATTRS_o_ai vec_slv(vector unsigned char __a, vector unsigned char __b) { return __builtin_altivec_vslv(__a, __b); } /* vec_srv */ static __inline__ vector unsigned char __ATTRS_o_ai vec_srv(vector unsigned char __a, vector unsigned char __b) { return __builtin_altivec_vsrv(__a, __b); } #endif /* vec_vsldoi */ static __inline__ vector signed char __ATTRS_o_ai vec_vsldoi(vector signed char __a, vector signed char __b, unsigned char __c) { unsigned char __d = __c & 0x0F; #ifdef __LITTLE_ENDIAN__ return vec_perm( __b, __a, (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d, 20 - __d, 21 - __d, 22 - __d, 23 - __d, 24 - __d, 25 - __d, 26 - __d, 27 - __d, 28 - __d, 29 - __d, 30 - __d, 31 - __d)); #else return vec_perm( __a, __b, (vector unsigned char)(__d, __d + 1, __d + 2, __d + 3, __d + 4, __d + 5, __d + 6, __d + 7, __d + 8, __d + 9, __d + 10, __d + 11, __d + 12, __d + 13, __d + 14, __d + 15)); #endif } static __inline__ vector unsigned char __ATTRS_o_ai vec_vsldoi( vector unsigned char __a, vector unsigned char __b, unsigned char __c) { unsigned char __d = __c & 0x0F; #ifdef __LITTLE_ENDIAN__ return vec_perm( __b, __a, (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d, 20 - __d, 21 - __d, 22 - __d, 23 - __d, 24 - __d, 25 - __d, 26 - __d, 27 - __d, 28 - __d, 29 - __d, 30 - __d, 31 - __d)); #else return vec_perm( __a, __b, (vector unsigned char)(__d, __d + 1, __d + 2, __d + 3, __d + 4, __d + 5, __d + 6, __d + 7, __d + 8, __d + 9, __d + 10, __d + 11, __d + 12, __d + 13, __d + 14, __d + 15)); #endif } static __inline__ vector short __ATTRS_o_ai vec_vsldoi(vector short __a, vector short __b, unsigned char __c) { unsigned char __d = __c & 0x0F; #ifdef __LITTLE_ENDIAN__ return vec_perm( __b, __a, (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d, 20 - __d, 21 - __d, 22 - __d, 23 - __d, 24 - __d, 25 - __d, 26 - __d, 27 - __d, 28 - __d, 29 - __d, 30 - __d, 31 - __d)); #else return vec_perm( __a, __b, (vector unsigned char)(__d, __d + 1, __d + 2, __d + 3, __d + 4, __d + 5, __d + 6, __d + 7, __d + 8, __d + 9, __d + 10, __d + 11, __d + 12, __d + 13, __d + 14, __d + 15)); #endif } static __inline__ vector unsigned short __ATTRS_o_ai vec_vsldoi( vector unsigned short __a, vector unsigned short __b, unsigned char __c) { unsigned char __d = __c & 0x0F; #ifdef __LITTLE_ENDIAN__ return vec_perm( __b, __a, (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d, 20 - __d, 21 - __d, 22 - __d, 23 - __d, 24 - __d, 25 - __d, 26 - __d, 27 - __d, 28 - __d, 29 - __d, 30 - __d, 31 - __d)); #else return vec_perm( __a, __b, (vector unsigned char)(__d, __d + 1, __d + 2, __d + 3, __d + 4, __d + 5, __d + 6, __d + 7, __d + 8, __d + 9, __d + 10, __d + 11, __d + 12, __d + 13, __d + 14, __d + 15)); #endif } static __inline__ vector pixel __ATTRS_o_ai vec_vsldoi(vector pixel __a, vector pixel __b, unsigned char __c) { unsigned char __d = __c & 0x0F; #ifdef __LITTLE_ENDIAN__ return vec_perm( __b, __a, (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d, 20 - __d, 21 - __d, 22 - __d, 23 - __d, 24 - __d, 25 - __d, 26 - __d, 27 - __d, 28 - __d, 29 - __d, 30 - __d, 31 - __d)); #else return vec_perm( __a, __b, (vector unsigned char)(__d, __d + 1, __d + 2, __d + 3, __d + 4, __d + 5, __d + 6, __d + 7, __d + 8, __d + 9, __d + 10, __d + 11, __d + 12, __d + 13, __d + 14, __d + 15)); #endif } static __inline__ vector int __ATTRS_o_ai vec_vsldoi(vector int __a, vector int __b, unsigned char __c) { unsigned char __d = __c & 0x0F; #ifdef __LITTLE_ENDIAN__ return vec_perm( __b, __a, (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d, 20 - __d, 21 - __d, 22 - __d, 23 - __d, 24 - __d, 25 - __d, 26 - __d, 27 - __d, 28 - __d, 29 - __d, 30 - __d, 31 - __d)); #else return vec_perm( __a, __b, (vector unsigned char)(__d, __d + 1, __d + 2, __d + 3, __d + 4, __d + 5, __d + 6, __d + 7, __d + 8, __d + 9, __d + 10, __d + 11, __d + 12, __d + 13, __d + 14, __d + 15)); #endif } static __inline__ vector unsigned int __ATTRS_o_ai vec_vsldoi( vector unsigned int __a, vector unsigned int __b, unsigned char __c) { unsigned char __d = __c & 0x0F; #ifdef __LITTLE_ENDIAN__ return vec_perm( __b, __a, (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d, 20 - __d, 21 - __d, 22 - __d, 23 - __d, 24 - __d, 25 - __d, 26 - __d, 27 - __d, 28 - __d, 29 - __d, 30 - __d, 31 - __d)); #else return vec_perm( __a, __b, (vector unsigned char)(__d, __d + 1, __d + 2, __d + 3, __d + 4, __d + 5, __d + 6, __d + 7, __d + 8, __d + 9, __d + 10, __d + 11, __d + 12, __d + 13, __d + 14, __d + 15)); #endif } static __inline__ vector float __ATTRS_o_ai vec_vsldoi(vector float __a, vector float __b, unsigned char __c) { unsigned char __d = __c & 0x0F; #ifdef __LITTLE_ENDIAN__ return vec_perm( __b, __a, (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d, 20 - __d, 21 - __d, 22 - __d, 23 - __d, 24 - __d, 25 - __d, 26 - __d, 27 - __d, 28 - __d, 29 - __d, 30 - __d, 31 - __d)); #else return vec_perm( __a, __b, (vector unsigned char)(__d, __d + 1, __d + 2, __d + 3, __d + 4, __d + 5, __d + 6, __d + 7, __d + 8, __d + 9, __d + 10, __d + 11, __d + 12, __d + 13, __d + 14, __d + 15)); #endif } /* vec_sll */ static __inline__ vector signed char __ATTRS_o_ai vec_sll(vector signed char __a, vector unsigned char __b) { return (vector signed char)__builtin_altivec_vsl((vector int)__a, (vector int)__b); } static __inline__ vector signed char __ATTRS_o_ai vec_sll(vector signed char __a, vector unsigned short __b) { return (vector signed char)__builtin_altivec_vsl((vector int)__a, (vector int)__b); } static __inline__ vector signed char __ATTRS_o_ai vec_sll(vector signed char __a, vector unsigned int __b) { return (vector signed char)__builtin_altivec_vsl((vector int)__a, (vector int)__b); } static __inline__ vector unsigned char __ATTRS_o_ai vec_sll(vector unsigned char __a, vector unsigned char __b) { return (vector unsigned char)__builtin_altivec_vsl((vector int)__a, (vector int)__b); } static __inline__ vector unsigned char __ATTRS_o_ai vec_sll(vector unsigned char __a, vector unsigned short __b) { return (vector unsigned char)__builtin_altivec_vsl((vector int)__a, (vector int)__b); } static __inline__ vector unsigned char __ATTRS_o_ai vec_sll(vector unsigned char __a, vector unsigned int __b) { return (vector unsigned char)__builtin_altivec_vsl((vector int)__a, (vector int)__b); } static __inline__ vector bool char __ATTRS_o_ai vec_sll(vector bool char __a, vector unsigned char __b) { return (vector bool char)__builtin_altivec_vsl((vector int)__a, (vector int)__b); } static __inline__ vector bool char __ATTRS_o_ai vec_sll(vector bool char __a, vector unsigned short __b) { return (vector bool char)__builtin_altivec_vsl((vector int)__a, (vector int)__b); } static __inline__ vector bool char __ATTRS_o_ai vec_sll(vector bool char __a, vector unsigned int __b) { return (vector bool char)__builtin_altivec_vsl((vector int)__a, (vector int)__b); } static __inline__ vector short __ATTRS_o_ai vec_sll(vector short __a, vector unsigned char __b) { return (vector short)__builtin_altivec_vsl((vector int)__a, (vector int)__b); } static __inline__ vector short __ATTRS_o_ai vec_sll(vector short __a, vector unsigned short __b) { return (vector short)__builtin_altivec_vsl((vector int)__a, (vector int)__b); } static __inline__ vector short __ATTRS_o_ai vec_sll(vector short __a, vector unsigned int __b) { return (vector short)__builtin_altivec_vsl((vector int)__a, (vector int)__b); } static __inline__ vector unsigned short __ATTRS_o_ai vec_sll(vector unsigned short __a, vector unsigned char __b) { return (vector unsigned short)__builtin_altivec_vsl((vector int)__a, (vector int)__b); } static __inline__ vector unsigned short __ATTRS_o_ai vec_sll(vector unsigned short __a, vector unsigned short __b) { return (vector unsigned short)__builtin_altivec_vsl((vector int)__a, (vector int)__b); } static __inline__ vector unsigned short __ATTRS_o_ai vec_sll(vector unsigned short __a, vector unsigned int __b) { return (vector unsigned short)__builtin_altivec_vsl((vector int)__a, (vector int)__b); } static __inline__ vector bool short __ATTRS_o_ai vec_sll(vector bool short __a, vector unsigned char __b) { return (vector bool short)__builtin_altivec_vsl((vector int)__a, (vector int)__b); } static __inline__ vector bool short __ATTRS_o_ai vec_sll(vector bool short __a, vector unsigned short __b) { return (vector bool short)__builtin_altivec_vsl((vector int)__a, (vector int)__b); } static __inline__ vector bool short __ATTRS_o_ai vec_sll(vector bool short __a, vector unsigned int __b) { return (vector bool short)__builtin_altivec_vsl((vector int)__a, (vector int)__b); } static __inline__ vector pixel __ATTRS_o_ai vec_sll(vector pixel __a, vector unsigned char __b) { return (vector pixel)__builtin_altivec_vsl((vector int)__a, (vector int)__b); } static __inline__ vector pixel __ATTRS_o_ai vec_sll(vector pixel __a, vector unsigned short __b) { return (vector pixel)__builtin_altivec_vsl((vector int)__a, (vector int)__b); } static __inline__ vector pixel __ATTRS_o_ai vec_sll(vector pixel __a, vector unsigned int __b) { return (vector pixel)__builtin_altivec_vsl((vector int)__a, (vector int)__b); } static __inline__ vector int __ATTRS_o_ai vec_sll(vector int __a, vector unsigned char __b) { return (vector int)__builtin_altivec_vsl(__a, (vector int)__b); } static __inline__ vector int __ATTRS_o_ai vec_sll(vector int __a, vector unsigned short __b) { return (vector int)__builtin_altivec_vsl(__a, (vector int)__b); } static __inline__ vector int __ATTRS_o_ai vec_sll(vector int __a, vector unsigned int __b) { return (vector int)__builtin_altivec_vsl(__a, (vector int)__b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_sll(vector unsigned int __a, vector unsigned char __b) { return (vector unsigned int)__builtin_altivec_vsl((vector int)__a, (vector int)__b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_sll(vector unsigned int __a, vector unsigned short __b) { return (vector unsigned int)__builtin_altivec_vsl((vector int)__a, (vector int)__b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_sll(vector unsigned int __a, vector unsigned int __b) { return (vector unsigned int)__builtin_altivec_vsl((vector int)__a, (vector int)__b); } static __inline__ vector bool int __ATTRS_o_ai vec_sll(vector bool int __a, vector unsigned char __b) { return (vector bool int)__builtin_altivec_vsl((vector int)__a, (vector int)__b); } static __inline__ vector bool int __ATTRS_o_ai vec_sll(vector bool int __a, vector unsigned short __b) { return (vector bool int)__builtin_altivec_vsl((vector int)__a, (vector int)__b); } static __inline__ vector bool int __ATTRS_o_ai vec_sll(vector bool int __a, vector unsigned int __b) { return (vector bool int)__builtin_altivec_vsl((vector int)__a, (vector int)__b); } #ifdef __VSX__ static __inline__ vector signed long long __ATTRS_o_ai vec_sll(vector signed long long __a, vector unsigned char __b) { return (vector signed long long)__builtin_altivec_vsl((vector int)__a, (vector int)__b); } static __inline__ vector unsigned long long __ATTRS_o_ai vec_sll(vector unsigned long long __a, vector unsigned char __b) { return (vector unsigned long long)__builtin_altivec_vsl((vector int)__a, (vector int)__b); } #endif /* vec_vsl */ static __inline__ vector signed char __ATTRS_o_ai vec_vsl(vector signed char __a, vector unsigned char __b) { return (vector signed char)__builtin_altivec_vsl((vector int)__a, (vector int)__b); } static __inline__ vector signed char __ATTRS_o_ai vec_vsl(vector signed char __a, vector unsigned short __b) { return (vector signed char)__builtin_altivec_vsl((vector int)__a, (vector int)__b); } static __inline__ vector signed char __ATTRS_o_ai vec_vsl(vector signed char __a, vector unsigned int __b) { return (vector signed char)__builtin_altivec_vsl((vector int)__a, (vector int)__b); } static __inline__ vector unsigned char __ATTRS_o_ai vec_vsl(vector unsigned char __a, vector unsigned char __b) { return (vector unsigned char)__builtin_altivec_vsl((vector int)__a, (vector int)__b); } static __inline__ vector unsigned char __ATTRS_o_ai vec_vsl(vector unsigned char __a, vector unsigned short __b) { return (vector unsigned char)__builtin_altivec_vsl((vector int)__a, (vector int)__b); } static __inline__ vector unsigned char __ATTRS_o_ai vec_vsl(vector unsigned char __a, vector unsigned int __b) { return (vector unsigned char)__builtin_altivec_vsl((vector int)__a, (vector int)__b); } static __inline__ vector bool char __ATTRS_o_ai vec_vsl(vector bool char __a, vector unsigned char __b) { return (vector bool char)__builtin_altivec_vsl((vector int)__a, (vector int)__b); } static __inline__ vector bool char __ATTRS_o_ai vec_vsl(vector bool char __a, vector unsigned short __b) { return (vector bool char)__builtin_altivec_vsl((vector int)__a, (vector int)__b); } static __inline__ vector bool char __ATTRS_o_ai vec_vsl(vector bool char __a, vector unsigned int __b) { return (vector bool char)__builtin_altivec_vsl((vector int)__a, (vector int)__b); } static __inline__ vector short __ATTRS_o_ai vec_vsl(vector short __a, vector unsigned char __b) { return (vector short)__builtin_altivec_vsl((vector int)__a, (vector int)__b); } static __inline__ vector short __ATTRS_o_ai vec_vsl(vector short __a, vector unsigned short __b) { return (vector short)__builtin_altivec_vsl((vector int)__a, (vector int)__b); } static __inline__ vector short __ATTRS_o_ai vec_vsl(vector short __a, vector unsigned int __b) { return (vector short)__builtin_altivec_vsl((vector int)__a, (vector int)__b); } static __inline__ vector unsigned short __ATTRS_o_ai vec_vsl(vector unsigned short __a, vector unsigned char __b) { return (vector unsigned short)__builtin_altivec_vsl((vector int)__a, (vector int)__b); } static __inline__ vector unsigned short __ATTRS_o_ai vec_vsl(vector unsigned short __a, vector unsigned short __b) { return (vector unsigned short)__builtin_altivec_vsl((vector int)__a, (vector int)__b); } static __inline__ vector unsigned short __ATTRS_o_ai vec_vsl(vector unsigned short __a, vector unsigned int __b) { return (vector unsigned short)__builtin_altivec_vsl((vector int)__a, (vector int)__b); } static __inline__ vector bool short __ATTRS_o_ai vec_vsl(vector bool short __a, vector unsigned char __b) { return (vector bool short)__builtin_altivec_vsl((vector int)__a, (vector int)__b); } static __inline__ vector bool short __ATTRS_o_ai vec_vsl(vector bool short __a, vector unsigned short __b) { return (vector bool short)__builtin_altivec_vsl((vector int)__a, (vector int)__b); } static __inline__ vector bool short __ATTRS_o_ai vec_vsl(vector bool short __a, vector unsigned int __b) { return (vector bool short)__builtin_altivec_vsl((vector int)__a, (vector int)__b); } static __inline__ vector pixel __ATTRS_o_ai vec_vsl(vector pixel __a, vector unsigned char __b) { return (vector pixel)__builtin_altivec_vsl((vector int)__a, (vector int)__b); } static __inline__ vector pixel __ATTRS_o_ai vec_vsl(vector pixel __a, vector unsigned short __b) { return (vector pixel)__builtin_altivec_vsl((vector int)__a, (vector int)__b); } static __inline__ vector pixel __ATTRS_o_ai vec_vsl(vector pixel __a, vector unsigned int __b) { return (vector pixel)__builtin_altivec_vsl((vector int)__a, (vector int)__b); } static __inline__ vector int __ATTRS_o_ai vec_vsl(vector int __a, vector unsigned char __b) { return (vector int)__builtin_altivec_vsl(__a, (vector int)__b); } static __inline__ vector int __ATTRS_o_ai vec_vsl(vector int __a, vector unsigned short __b) { return (vector int)__builtin_altivec_vsl(__a, (vector int)__b); } static __inline__ vector int __ATTRS_o_ai vec_vsl(vector int __a, vector unsigned int __b) { return (vector int)__builtin_altivec_vsl(__a, (vector int)__b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_vsl(vector unsigned int __a, vector unsigned char __b) { return (vector unsigned int)__builtin_altivec_vsl((vector int)__a, (vector int)__b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_vsl(vector unsigned int __a, vector unsigned short __b) { return (vector unsigned int)__builtin_altivec_vsl((vector int)__a, (vector int)__b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_vsl(vector unsigned int __a, vector unsigned int __b) { return (vector unsigned int)__builtin_altivec_vsl((vector int)__a, (vector int)__b); } static __inline__ vector bool int __ATTRS_o_ai vec_vsl(vector bool int __a, vector unsigned char __b) { return (vector bool int)__builtin_altivec_vsl((vector int)__a, (vector int)__b); } static __inline__ vector bool int __ATTRS_o_ai vec_vsl(vector bool int __a, vector unsigned short __b) { return (vector bool int)__builtin_altivec_vsl((vector int)__a, (vector int)__b); } static __inline__ vector bool int __ATTRS_o_ai vec_vsl(vector bool int __a, vector unsigned int __b) { return (vector bool int)__builtin_altivec_vsl((vector int)__a, (vector int)__b); } /* vec_slo */ static __inline__ vector signed char __ATTRS_o_ai vec_slo(vector signed char __a, vector signed char __b) { return (vector signed char)__builtin_altivec_vslo((vector int)__a, (vector int)__b); } static __inline__ vector signed char __ATTRS_o_ai vec_slo(vector signed char __a, vector unsigned char __b) { return (vector signed char)__builtin_altivec_vslo((vector int)__a, (vector int)__b); } static __inline__ vector unsigned char __ATTRS_o_ai vec_slo(vector unsigned char __a, vector signed char __b) { return (vector unsigned char)__builtin_altivec_vslo((vector int)__a, (vector int)__b); } static __inline__ vector unsigned char __ATTRS_o_ai vec_slo(vector unsigned char __a, vector unsigned char __b) { return (vector unsigned char)__builtin_altivec_vslo((vector int)__a, (vector int)__b); } static __inline__ vector short __ATTRS_o_ai vec_slo(vector short __a, vector signed char __b) { return (vector short)__builtin_altivec_vslo((vector int)__a, (vector int)__b); } static __inline__ vector short __ATTRS_o_ai vec_slo(vector short __a, vector unsigned char __b) { return (vector short)__builtin_altivec_vslo((vector int)__a, (vector int)__b); } static __inline__ vector unsigned short __ATTRS_o_ai vec_slo(vector unsigned short __a, vector signed char __b) { return (vector unsigned short)__builtin_altivec_vslo((vector int)__a, (vector int)__b); } static __inline__ vector unsigned short __ATTRS_o_ai vec_slo(vector unsigned short __a, vector unsigned char __b) { return (vector unsigned short)__builtin_altivec_vslo((vector int)__a, (vector int)__b); } static __inline__ vector pixel __ATTRS_o_ai vec_slo(vector pixel __a, vector signed char __b) { return (vector pixel)__builtin_altivec_vslo((vector int)__a, (vector int)__b); } static __inline__ vector pixel __ATTRS_o_ai vec_slo(vector pixel __a, vector unsigned char __b) { return (vector pixel)__builtin_altivec_vslo((vector int)__a, (vector int)__b); } static __inline__ vector int __ATTRS_o_ai vec_slo(vector int __a, vector signed char __b) { return (vector int)__builtin_altivec_vslo(__a, (vector int)__b); } static __inline__ vector int __ATTRS_o_ai vec_slo(vector int __a, vector unsigned char __b) { return (vector int)__builtin_altivec_vslo(__a, (vector int)__b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_slo(vector unsigned int __a, vector signed char __b) { return (vector unsigned int)__builtin_altivec_vslo((vector int)__a, (vector int)__b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_slo(vector unsigned int __a, vector unsigned char __b) { return (vector unsigned int)__builtin_altivec_vslo((vector int)__a, (vector int)__b); } static __inline__ vector float __ATTRS_o_ai vec_slo(vector float __a, vector signed char __b) { return (vector float)__builtin_altivec_vslo((vector int)__a, (vector int)__b); } static __inline__ vector float __ATTRS_o_ai vec_slo(vector float __a, vector unsigned char __b) { return (vector float)__builtin_altivec_vslo((vector int)__a, (vector int)__b); } #ifdef __VSX__ static __inline__ vector signed long long __ATTRS_o_ai vec_slo(vector signed long long __a, vector signed char __b) { return (vector signed long long)__builtin_altivec_vslo((vector int)__a, (vector int)__b); } static __inline__ vector signed long long __ATTRS_o_ai vec_slo(vector signed long long __a, vector unsigned char __b) { return (vector signed long long)__builtin_altivec_vslo((vector int)__a, (vector int)__b); } static __inline__ vector unsigned long long __ATTRS_o_ai vec_slo(vector unsigned long long __a, vector signed char __b) { return (vector unsigned long long)__builtin_altivec_vslo((vector int)__a, (vector int)__b); } static __inline__ vector unsigned long long __ATTRS_o_ai vec_slo(vector unsigned long long __a, vector unsigned char __b) { return (vector unsigned long long)__builtin_altivec_vslo((vector int)__a, (vector int)__b); } #endif /* vec_vslo */ static __inline__ vector signed char __ATTRS_o_ai vec_vslo(vector signed char __a, vector signed char __b) { return (vector signed char)__builtin_altivec_vslo((vector int)__a, (vector int)__b); } static __inline__ vector signed char __ATTRS_o_ai vec_vslo(vector signed char __a, vector unsigned char __b) { return (vector signed char)__builtin_altivec_vslo((vector int)__a, (vector int)__b); } static __inline__ vector unsigned char __ATTRS_o_ai vec_vslo(vector unsigned char __a, vector signed char __b) { return (vector unsigned char)__builtin_altivec_vslo((vector int)__a, (vector int)__b); } static __inline__ vector unsigned char __ATTRS_o_ai vec_vslo(vector unsigned char __a, vector unsigned char __b) { return (vector unsigned char)__builtin_altivec_vslo((vector int)__a, (vector int)__b); } static __inline__ vector short __ATTRS_o_ai vec_vslo(vector short __a, vector signed char __b) { return (vector short)__builtin_altivec_vslo((vector int)__a, (vector int)__b); } static __inline__ vector short __ATTRS_o_ai vec_vslo(vector short __a, vector unsigned char __b) { return (vector short)__builtin_altivec_vslo((vector int)__a, (vector int)__b); } static __inline__ vector unsigned short __ATTRS_o_ai vec_vslo(vector unsigned short __a, vector signed char __b) { return (vector unsigned short)__builtin_altivec_vslo((vector int)__a, (vector int)__b); } static __inline__ vector unsigned short __ATTRS_o_ai vec_vslo(vector unsigned short __a, vector unsigned char __b) { return (vector unsigned short)__builtin_altivec_vslo((vector int)__a, (vector int)__b); } static __inline__ vector pixel __ATTRS_o_ai vec_vslo(vector pixel __a, vector signed char __b) { return (vector pixel)__builtin_altivec_vslo((vector int)__a, (vector int)__b); } static __inline__ vector pixel __ATTRS_o_ai vec_vslo(vector pixel __a, vector unsigned char __b) { return (vector pixel)__builtin_altivec_vslo((vector int)__a, (vector int)__b); } static __inline__ vector int __ATTRS_o_ai vec_vslo(vector int __a, vector signed char __b) { return (vector int)__builtin_altivec_vslo(__a, (vector int)__b); } static __inline__ vector int __ATTRS_o_ai vec_vslo(vector int __a, vector unsigned char __b) { return (vector int)__builtin_altivec_vslo(__a, (vector int)__b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_vslo(vector unsigned int __a, vector signed char __b) { return (vector unsigned int)__builtin_altivec_vslo((vector int)__a, (vector int)__b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_vslo(vector unsigned int __a, vector unsigned char __b) { return (vector unsigned int)__builtin_altivec_vslo((vector int)__a, (vector int)__b); } static __inline__ vector float __ATTRS_o_ai vec_vslo(vector float __a, vector signed char __b) { return (vector float)__builtin_altivec_vslo((vector int)__a, (vector int)__b); } static __inline__ vector float __ATTRS_o_ai vec_vslo(vector float __a, vector unsigned char __b) { return (vector float)__builtin_altivec_vslo((vector int)__a, (vector int)__b); } /* vec_splat */ static __inline__ vector signed char __ATTRS_o_ai vec_splat(vector signed char __a, unsigned const int __b) { return vec_perm(__a, __a, (vector unsigned char)(__b & 0x0F)); } static __inline__ vector unsigned char __ATTRS_o_ai vec_splat(vector unsigned char __a, unsigned const int __b) { return vec_perm(__a, __a, (vector unsigned char)(__b & 0x0F)); } static __inline__ vector bool char __ATTRS_o_ai vec_splat(vector bool char __a, unsigned const int __b) { return vec_perm(__a, __a, (vector unsigned char)(__b & 0x0F)); } static __inline__ vector signed short __ATTRS_o_ai vec_splat(vector signed short __a, unsigned const int __b) { unsigned char b0 = (__b & 0x07) * 2; unsigned char b1 = b0 + 1; return vec_perm(__a, __a, (vector unsigned char)(b0, b1, b0, b1, b0, b1, b0, b1, b0, b1, b0, b1, b0, b1, b0, b1)); } static __inline__ vector unsigned short __ATTRS_o_ai vec_splat(vector unsigned short __a, unsigned const int __b) { unsigned char b0 = (__b & 0x07) * 2; unsigned char b1 = b0 + 1; return vec_perm(__a, __a, (vector unsigned char)(b0, b1, b0, b1, b0, b1, b0, b1, b0, b1, b0, b1, b0, b1, b0, b1)); } static __inline__ vector bool short __ATTRS_o_ai vec_splat(vector bool short __a, unsigned const int __b) { unsigned char b0 = (__b & 0x07) * 2; unsigned char b1 = b0 + 1; return vec_perm(__a, __a, (vector unsigned char)(b0, b1, b0, b1, b0, b1, b0, b1, b0, b1, b0, b1, b0, b1, b0, b1)); } static __inline__ vector pixel __ATTRS_o_ai vec_splat(vector pixel __a, unsigned const int __b) { unsigned char b0 = (__b & 0x07) * 2; unsigned char b1 = b0 + 1; return vec_perm(__a, __a, (vector unsigned char)(b0, b1, b0, b1, b0, b1, b0, b1, b0, b1, b0, b1, b0, b1, b0, b1)); } static __inline__ vector signed int __ATTRS_o_ai vec_splat(vector signed int __a, unsigned const int __b) { unsigned char b0 = (__b & 0x03) * 4; unsigned char b1 = b0 + 1, b2 = b0 + 2, b3 = b0 + 3; return vec_perm(__a, __a, (vector unsigned char)(b0, b1, b2, b3, b0, b1, b2, b3, b0, b1, b2, b3, b0, b1, b2, b3)); } static __inline__ vector unsigned int __ATTRS_o_ai vec_splat(vector unsigned int __a, unsigned const int __b) { unsigned char b0 = (__b & 0x03) * 4; unsigned char b1 = b0 + 1, b2 = b0 + 2, b3 = b0 + 3; return vec_perm(__a, __a, (vector unsigned char)(b0, b1, b2, b3, b0, b1, b2, b3, b0, b1, b2, b3, b0, b1, b2, b3)); } static __inline__ vector bool int __ATTRS_o_ai vec_splat(vector bool int __a, unsigned const int __b) { unsigned char b0 = (__b & 0x03) * 4; unsigned char b1 = b0 + 1, b2 = b0 + 2, b3 = b0 + 3; return vec_perm(__a, __a, (vector unsigned char)(b0, b1, b2, b3, b0, b1, b2, b3, b0, b1, b2, b3, b0, b1, b2, b3)); } static __inline__ vector float __ATTRS_o_ai vec_splat(vector float __a, unsigned const int __b) { unsigned char b0 = (__b & 0x03) * 4; unsigned char b1 = b0 + 1, b2 = b0 + 2, b3 = b0 + 3; return vec_perm(__a, __a, (vector unsigned char)(b0, b1, b2, b3, b0, b1, b2, b3, b0, b1, b2, b3, b0, b1, b2, b3)); } #ifdef __VSX__ static __inline__ vector double __ATTRS_o_ai vec_splat(vector double __a, unsigned const int __b) { unsigned char b0 = (__b & 0x01) * 8; unsigned char b1 = b0 + 1, b2 = b0 + 2, b3 = b0 + 3, b4 = b0 + 4, b5 = b0 + 5, b6 = b0 + 6, b7 = b0 + 7; return vec_perm(__a, __a, (vector unsigned char)(b0, b1, b2, b3, b4, b5, b6, b7, b0, b1, b2, b3, b4, b5, b6, b7)); } static __inline__ vector bool long long __ATTRS_o_ai vec_splat(vector bool long long __a, unsigned const int __b) { unsigned char b0 = (__b & 0x01) * 8; unsigned char b1 = b0 + 1, b2 = b0 + 2, b3 = b0 + 3, b4 = b0 + 4, b5 = b0 + 5, b6 = b0 + 6, b7 = b0 + 7; return vec_perm(__a, __a, (vector unsigned char)(b0, b1, b2, b3, b4, b5, b6, b7, b0, b1, b2, b3, b4, b5, b6, b7)); } static __inline__ vector signed long long __ATTRS_o_ai vec_splat(vector signed long long __a, unsigned const int __b) { unsigned char b0 = (__b & 0x01) * 8; unsigned char b1 = b0 + 1, b2 = b0 + 2, b3 = b0 + 3, b4 = b0 + 4, b5 = b0 + 5, b6 = b0 + 6, b7 = b0 + 7; return vec_perm(__a, __a, (vector unsigned char)(b0, b1, b2, b3, b4, b5, b6, b7, b0, b1, b2, b3, b4, b5, b6, b7)); } static __inline__ vector unsigned long long __ATTRS_o_ai vec_splat(vector unsigned long long __a, unsigned const int __b) { unsigned char b0 = (__b & 0x01) * 8; unsigned char b1 = b0 + 1, b2 = b0 + 2, b3 = b0 + 3, b4 = b0 + 4, b5 = b0 + 5, b6 = b0 + 6, b7 = b0 + 7; return vec_perm(__a, __a, (vector unsigned char)(b0, b1, b2, b3, b4, b5, b6, b7, b0, b1, b2, b3, b4, b5, b6, b7)); } #endif /* vec_vspltb */ #define __builtin_altivec_vspltb vec_vspltb static __inline__ vector signed char __ATTRS_o_ai vec_vspltb(vector signed char __a, unsigned char __b) { return vec_perm(__a, __a, (vector unsigned char)(__b)); } static __inline__ vector unsigned char __ATTRS_o_ai vec_vspltb(vector unsigned char __a, unsigned char __b) { return vec_perm(__a, __a, (vector unsigned char)(__b)); } static __inline__ vector bool char __ATTRS_o_ai vec_vspltb(vector bool char __a, unsigned char __b) { return vec_perm(__a, __a, (vector unsigned char)(__b)); } /* vec_vsplth */ #define __builtin_altivec_vsplth vec_vsplth static __inline__ vector short __ATTRS_o_ai vec_vsplth(vector short __a, unsigned char __b) { __b *= 2; unsigned char b1 = __b + 1; return vec_perm(__a, __a, (vector unsigned char)(__b, b1, __b, b1, __b, b1, __b, b1, __b, b1, __b, b1, __b, b1, __b, b1)); } static __inline__ vector unsigned short __ATTRS_o_ai vec_vsplth(vector unsigned short __a, unsigned char __b) { __b *= 2; unsigned char b1 = __b + 1; return vec_perm(__a, __a, (vector unsigned char)(__b, b1, __b, b1, __b, b1, __b, b1, __b, b1, __b, b1, __b, b1, __b, b1)); } static __inline__ vector bool short __ATTRS_o_ai vec_vsplth(vector bool short __a, unsigned char __b) { __b *= 2; unsigned char b1 = __b + 1; return vec_perm(__a, __a, (vector unsigned char)(__b, b1, __b, b1, __b, b1, __b, b1, __b, b1, __b, b1, __b, b1, __b, b1)); } static __inline__ vector pixel __ATTRS_o_ai vec_vsplth(vector pixel __a, unsigned char __b) { __b *= 2; unsigned char b1 = __b + 1; return vec_perm(__a, __a, (vector unsigned char)(__b, b1, __b, b1, __b, b1, __b, b1, __b, b1, __b, b1, __b, b1, __b, b1)); } /* vec_vspltw */ #define __builtin_altivec_vspltw vec_vspltw static __inline__ vector int __ATTRS_o_ai vec_vspltw(vector int __a, unsigned char __b) { __b *= 4; unsigned char b1 = __b + 1, b2 = __b + 2, b3 = __b + 3; return vec_perm(__a, __a, (vector unsigned char)(__b, b1, b2, b3, __b, b1, b2, b3, __b, b1, b2, b3, __b, b1, b2, b3)); } static __inline__ vector unsigned int __ATTRS_o_ai vec_vspltw(vector unsigned int __a, unsigned char __b) { __b *= 4; unsigned char b1 = __b + 1, b2 = __b + 2, b3 = __b + 3; return vec_perm(__a, __a, (vector unsigned char)(__b, b1, b2, b3, __b, b1, b2, b3, __b, b1, b2, b3, __b, b1, b2, b3)); } static __inline__ vector bool int __ATTRS_o_ai vec_vspltw(vector bool int __a, unsigned char __b) { __b *= 4; unsigned char b1 = __b + 1, b2 = __b + 2, b3 = __b + 3; return vec_perm(__a, __a, (vector unsigned char)(__b, b1, b2, b3, __b, b1, b2, b3, __b, b1, b2, b3, __b, b1, b2, b3)); } static __inline__ vector float __ATTRS_o_ai vec_vspltw(vector float __a, unsigned char __b) { __b *= 4; unsigned char b1 = __b + 1, b2 = __b + 2, b3 = __b + 3; return vec_perm(__a, __a, (vector unsigned char)(__b, b1, b2, b3, __b, b1, b2, b3, __b, b1, b2, b3, __b, b1, b2, b3)); } /* vec_splat_s8 */ #define __builtin_altivec_vspltisb vec_splat_s8 // FIXME: parameter should be treated as 5-bit signed literal static __inline__ vector signed char __ATTRS_o_ai vec_splat_s8(signed char __a) { return (vector signed char)(__a); } /* vec_vspltisb */ // FIXME: parameter should be treated as 5-bit signed literal static __inline__ vector signed char __ATTRS_o_ai vec_vspltisb(signed char __a) { return (vector signed char)(__a); } /* vec_splat_s16 */ #define __builtin_altivec_vspltish vec_splat_s16 // FIXME: parameter should be treated as 5-bit signed literal static __inline__ vector short __ATTRS_o_ai vec_splat_s16(signed char __a) { return (vector short)(__a); } /* vec_vspltish */ // FIXME: parameter should be treated as 5-bit signed literal static __inline__ vector short __ATTRS_o_ai vec_vspltish(signed char __a) { return (vector short)(__a); } /* vec_splat_s32 */ #define __builtin_altivec_vspltisw vec_splat_s32 // FIXME: parameter should be treated as 5-bit signed literal static __inline__ vector int __ATTRS_o_ai vec_splat_s32(signed char __a) { return (vector int)(__a); } /* vec_vspltisw */ // FIXME: parameter should be treated as 5-bit signed literal static __inline__ vector int __ATTRS_o_ai vec_vspltisw(signed char __a) { return (vector int)(__a); } /* vec_splat_u8 */ // FIXME: parameter should be treated as 5-bit signed literal static __inline__ vector unsigned char __ATTRS_o_ai vec_splat_u8(unsigned char __a) { return (vector unsigned char)(__a); } /* vec_splat_u16 */ // FIXME: parameter should be treated as 5-bit signed literal static __inline__ vector unsigned short __ATTRS_o_ai vec_splat_u16(signed char __a) { return (vector unsigned short)(__a); } /* vec_splat_u32 */ // FIXME: parameter should be treated as 5-bit signed literal static __inline__ vector unsigned int __ATTRS_o_ai vec_splat_u32(signed char __a) { return (vector unsigned int)(__a); } /* vec_sr */ // vec_sr does modulo arithmetic on __b first, so __b is allowed to be more // than the length of __a. static __inline__ vector unsigned char __ATTRS_o_ai vec_sr(vector unsigned char __a, vector unsigned char __b) { return __a >> (__b % (vector unsigned char)(sizeof(unsigned char) * __CHAR_BIT__)); } static __inline__ vector signed char __ATTRS_o_ai vec_sr(vector signed char __a, vector unsigned char __b) { return (vector signed char)vec_sr((vector unsigned char)__a, __b); } static __inline__ vector unsigned short __ATTRS_o_ai vec_sr(vector unsigned short __a, vector unsigned short __b) { return __a >> (__b % (vector unsigned short)(sizeof(unsigned short) * __CHAR_BIT__)); } static __inline__ vector short __ATTRS_o_ai vec_sr(vector short __a, vector unsigned short __b) { return (vector short)vec_sr((vector unsigned short)__a, __b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_sr(vector unsigned int __a, vector unsigned int __b) { return __a >> (__b % (vector unsigned int)(sizeof(unsigned int) * __CHAR_BIT__)); } static __inline__ vector int __ATTRS_o_ai vec_sr(vector int __a, vector unsigned int __b) { return (vector int)vec_sr((vector unsigned int)__a, __b); } #ifdef __POWER8_VECTOR__ static __inline__ vector unsigned long long __ATTRS_o_ai vec_sr(vector unsigned long long __a, vector unsigned long long __b) { return __a >> (__b % (vector unsigned long long)(sizeof(unsigned long long) * __CHAR_BIT__)); } static __inline__ vector long long __ATTRS_o_ai vec_sr(vector long long __a, vector unsigned long long __b) { return (vector long long)vec_sr((vector unsigned long long)__a, __b); } #elif defined(__VSX__) static __inline__ vector unsigned long long __ATTRS_o_ai vec_sr(vector unsigned long long __a, vector unsigned long long __b) { __b %= (vector unsigned long long)(sizeof(unsigned long long) * __CHAR_BIT__); // Big endian element zero (the left doubleword) can be right shifted as-is. // However the shift amount must be in the right doubleword. // The other element needs to be swapped into the left doubleword and // shifted. Then the left doublewords of the two result vectors are merged. vector unsigned long long __swapshift = __builtin_shufflevector(__b, __b, 1, 0); vector unsigned long long __leftelt = (vector unsigned long long)__builtin_altivec_vsro( (vector signed int)__a, (vector signed int)__swapshift); #ifdef __LITTLE_ENDIAN__ __leftelt = (vector unsigned long long)__builtin_altivec_vsr( (vector signed int)__leftelt, (vector signed int)vec_vspltb((vector unsigned char)__swapshift, 0)); #else __leftelt = (vector unsigned long long)__builtin_altivec_vsr( (vector signed int)__leftelt, (vector signed int)vec_vspltb((vector unsigned char)__swapshift, 15)); #endif __a = __builtin_shufflevector(__a, __a, 1, 0); vector unsigned long long __rightelt = (vector unsigned long long)__builtin_altivec_vsro((vector signed int)__a, (vector signed int)__b); #ifdef __LITTLE_ENDIAN__ __rightelt = (vector unsigned long long)__builtin_altivec_vsr( (vector signed int)__rightelt, (vector signed int)vec_vspltb((vector unsigned char)__b, 0)); return __builtin_shufflevector(__rightelt, __leftelt, 1, 3); #else __rightelt = (vector unsigned long long)__builtin_altivec_vsr( (vector signed int)__rightelt, (vector signed int)vec_vspltb((vector unsigned char)__b, 15)); return __builtin_shufflevector(__leftelt, __rightelt, 0, 2); #endif } static __inline__ vector long long __ATTRS_o_ai vec_sr(vector long long __a, vector unsigned long long __b) { return (vector long long)vec_sr((vector unsigned long long)__a, __b); } #endif /* __VSX__ */ /* vec_vsrb */ #define __builtin_altivec_vsrb vec_vsrb static __inline__ vector signed char __ATTRS_o_ai vec_vsrb(vector signed char __a, vector unsigned char __b) { return vec_sr(__a, __b); } static __inline__ vector unsigned char __ATTRS_o_ai vec_vsrb(vector unsigned char __a, vector unsigned char __b) { return vec_sr(__a, __b); } /* vec_vsrh */ #define __builtin_altivec_vsrh vec_vsrh static __inline__ vector short __ATTRS_o_ai vec_vsrh(vector short __a, vector unsigned short __b) { return vec_sr(__a, __b); } static __inline__ vector unsigned short __ATTRS_o_ai vec_vsrh(vector unsigned short __a, vector unsigned short __b) { return vec_sr(__a, __b); } /* vec_vsrw */ #define __builtin_altivec_vsrw vec_vsrw static __inline__ vector int __ATTRS_o_ai vec_vsrw(vector int __a, vector unsigned int __b) { return vec_sr(__a, __b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_vsrw(vector unsigned int __a, vector unsigned int __b) { return vec_sr(__a, __b); } /* vec_sra */ static __inline__ vector signed char __ATTRS_o_ai vec_sra(vector signed char __a, vector unsigned char __b) { return (vector signed char)__builtin_altivec_vsrab((vector char)__a, __b); } static __inline__ vector unsigned char __ATTRS_o_ai vec_sra(vector unsigned char __a, vector unsigned char __b) { return (vector unsigned char)__builtin_altivec_vsrab((vector char)__a, __b); } static __inline__ vector short __ATTRS_o_ai vec_sra(vector short __a, vector unsigned short __b) { return __builtin_altivec_vsrah(__a, (vector unsigned short)__b); } static __inline__ vector unsigned short __ATTRS_o_ai vec_sra(vector unsigned short __a, vector unsigned short __b) { return (vector unsigned short)__builtin_altivec_vsrah((vector short)__a, __b); } static __inline__ vector int __ATTRS_o_ai vec_sra(vector int __a, vector unsigned int __b) { return __builtin_altivec_vsraw(__a, __b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_sra(vector unsigned int __a, vector unsigned int __b) { return (vector unsigned int)__builtin_altivec_vsraw((vector int)__a, __b); } #ifdef __POWER8_VECTOR__ static __inline__ vector signed long long __ATTRS_o_ai vec_sra(vector signed long long __a, vector unsigned long long __b) { return __a >> __b; } static __inline__ vector unsigned long long __ATTRS_o_ai vec_sra(vector unsigned long long __a, vector unsigned long long __b) { return (vector unsigned long long)((vector signed long long)__a >> __b); } #elif defined(__VSX__) static __inline__ vector signed long long __ATTRS_o_ai vec_sra(vector signed long long __a, vector unsigned long long __b) { __b %= (vector unsigned long long)(sizeof(unsigned long long) * __CHAR_BIT__); return __a >> __b; } static __inline__ vector unsigned long long __ATTRS_o_ai vec_sra(vector unsigned long long __a, vector unsigned long long __b) { __b %= (vector unsigned long long)(sizeof(unsigned long long) * __CHAR_BIT__); return (vector unsigned long long)((vector signed long long)__a >> __b); } #endif /* __VSX__ */ /* vec_vsrab */ static __inline__ vector signed char __ATTRS_o_ai vec_vsrab(vector signed char __a, vector unsigned char __b) { return (vector signed char)__builtin_altivec_vsrab((vector char)__a, __b); } static __inline__ vector unsigned char __ATTRS_o_ai vec_vsrab(vector unsigned char __a, vector unsigned char __b) { return (vector unsigned char)__builtin_altivec_vsrab((vector char)__a, __b); } /* vec_vsrah */ static __inline__ vector short __ATTRS_o_ai vec_vsrah(vector short __a, vector unsigned short __b) { return __builtin_altivec_vsrah(__a, (vector unsigned short)__b); } static __inline__ vector unsigned short __ATTRS_o_ai vec_vsrah(vector unsigned short __a, vector unsigned short __b) { return (vector unsigned short)__builtin_altivec_vsrah((vector short)__a, __b); } /* vec_vsraw */ static __inline__ vector int __ATTRS_o_ai vec_vsraw(vector int __a, vector unsigned int __b) { return __builtin_altivec_vsraw(__a, __b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_vsraw(vector unsigned int __a, vector unsigned int __b) { return (vector unsigned int)__builtin_altivec_vsraw((vector int)__a, __b); } /* vec_srl */ static __inline__ vector signed char __ATTRS_o_ai vec_srl(vector signed char __a, vector unsigned char __b) { return (vector signed char)__builtin_altivec_vsr((vector int)__a, (vector int)__b); } static __inline__ vector signed char __ATTRS_o_ai vec_srl(vector signed char __a, vector unsigned short __b) { return (vector signed char)__builtin_altivec_vsr((vector int)__a, (vector int)__b); } static __inline__ vector signed char __ATTRS_o_ai vec_srl(vector signed char __a, vector unsigned int __b) { return (vector signed char)__builtin_altivec_vsr((vector int)__a, (vector int)__b); } static __inline__ vector unsigned char __ATTRS_o_ai vec_srl(vector unsigned char __a, vector unsigned char __b) { return (vector unsigned char)__builtin_altivec_vsr((vector int)__a, (vector int)__b); } static __inline__ vector unsigned char __ATTRS_o_ai vec_srl(vector unsigned char __a, vector unsigned short __b) { return (vector unsigned char)__builtin_altivec_vsr((vector int)__a, (vector int)__b); } static __inline__ vector unsigned char __ATTRS_o_ai vec_srl(vector unsigned char __a, vector unsigned int __b) { return (vector unsigned char)__builtin_altivec_vsr((vector int)__a, (vector int)__b); } static __inline__ vector bool char __ATTRS_o_ai vec_srl(vector bool char __a, vector unsigned char __b) { return (vector bool char)__builtin_altivec_vsr((vector int)__a, (vector int)__b); } static __inline__ vector bool char __ATTRS_o_ai vec_srl(vector bool char __a, vector unsigned short __b) { return (vector bool char)__builtin_altivec_vsr((vector int)__a, (vector int)__b); } static __inline__ vector bool char __ATTRS_o_ai vec_srl(vector bool char __a, vector unsigned int __b) { return (vector bool char)__builtin_altivec_vsr((vector int)__a, (vector int)__b); } static __inline__ vector short __ATTRS_o_ai vec_srl(vector short __a, vector unsigned char __b) { return (vector short)__builtin_altivec_vsr((vector int)__a, (vector int)__b); } static __inline__ vector short __ATTRS_o_ai vec_srl(vector short __a, vector unsigned short __b) { return (vector short)__builtin_altivec_vsr((vector int)__a, (vector int)__b); } static __inline__ vector short __ATTRS_o_ai vec_srl(vector short __a, vector unsigned int __b) { return (vector short)__builtin_altivec_vsr((vector int)__a, (vector int)__b); } static __inline__ vector unsigned short __ATTRS_o_ai vec_srl(vector unsigned short __a, vector unsigned char __b) { return (vector unsigned short)__builtin_altivec_vsr((vector int)__a, (vector int)__b); } static __inline__ vector unsigned short __ATTRS_o_ai vec_srl(vector unsigned short __a, vector unsigned short __b) { return (vector unsigned short)__builtin_altivec_vsr((vector int)__a, (vector int)__b); } static __inline__ vector unsigned short __ATTRS_o_ai vec_srl(vector unsigned short __a, vector unsigned int __b) { return (vector unsigned short)__builtin_altivec_vsr((vector int)__a, (vector int)__b); } static __inline__ vector bool short __ATTRS_o_ai vec_srl(vector bool short __a, vector unsigned char __b) { return (vector bool short)__builtin_altivec_vsr((vector int)__a, (vector int)__b); } static __inline__ vector bool short __ATTRS_o_ai vec_srl(vector bool short __a, vector unsigned short __b) { return (vector bool short)__builtin_altivec_vsr((vector int)__a, (vector int)__b); } static __inline__ vector bool short __ATTRS_o_ai vec_srl(vector bool short __a, vector unsigned int __b) { return (vector bool short)__builtin_altivec_vsr((vector int)__a, (vector int)__b); } static __inline__ vector pixel __ATTRS_o_ai vec_srl(vector pixel __a, vector unsigned char __b) { return (vector pixel)__builtin_altivec_vsr((vector int)__a, (vector int)__b); } static __inline__ vector pixel __ATTRS_o_ai vec_srl(vector pixel __a, vector unsigned short __b) { return (vector pixel)__builtin_altivec_vsr((vector int)__a, (vector int)__b); } static __inline__ vector pixel __ATTRS_o_ai vec_srl(vector pixel __a, vector unsigned int __b) { return (vector pixel)__builtin_altivec_vsr((vector int)__a, (vector int)__b); } static __inline__ vector int __ATTRS_o_ai vec_srl(vector int __a, vector unsigned char __b) { return (vector int)__builtin_altivec_vsr(__a, (vector int)__b); } static __inline__ vector int __ATTRS_o_ai vec_srl(vector int __a, vector unsigned short __b) { return (vector int)__builtin_altivec_vsr(__a, (vector int)__b); } static __inline__ vector int __ATTRS_o_ai vec_srl(vector int __a, vector unsigned int __b) { return (vector int)__builtin_altivec_vsr(__a, (vector int)__b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_srl(vector unsigned int __a, vector unsigned char __b) { return (vector unsigned int)__builtin_altivec_vsr((vector int)__a, (vector int)__b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_srl(vector unsigned int __a, vector unsigned short __b) { return (vector unsigned int)__builtin_altivec_vsr((vector int)__a, (vector int)__b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_srl(vector unsigned int __a, vector unsigned int __b) { return (vector unsigned int)__builtin_altivec_vsr((vector int)__a, (vector int)__b); } static __inline__ vector bool int __ATTRS_o_ai vec_srl(vector bool int __a, vector unsigned char __b) { return (vector bool int)__builtin_altivec_vsr((vector int)__a, (vector int)__b); } static __inline__ vector bool int __ATTRS_o_ai vec_srl(vector bool int __a, vector unsigned short __b) { return (vector bool int)__builtin_altivec_vsr((vector int)__a, (vector int)__b); } static __inline__ vector bool int __ATTRS_o_ai vec_srl(vector bool int __a, vector unsigned int __b) { return (vector bool int)__builtin_altivec_vsr((vector int)__a, (vector int)__b); } #ifdef __VSX__ static __inline__ vector signed long long __ATTRS_o_ai vec_srl(vector signed long long __a, vector unsigned char __b) { return (vector signed long long)__builtin_altivec_vsr((vector int)__a, (vector int)__b); } static __inline__ vector unsigned long long __ATTRS_o_ai vec_srl(vector unsigned long long __a, vector unsigned char __b) { return (vector unsigned long long)__builtin_altivec_vsr((vector int)__a, (vector int)__b); } #endif /* vec_vsr */ static __inline__ vector signed char __ATTRS_o_ai vec_vsr(vector signed char __a, vector unsigned char __b) { return (vector signed char)__builtin_altivec_vsr((vector int)__a, (vector int)__b); } static __inline__ vector signed char __ATTRS_o_ai vec_vsr(vector signed char __a, vector unsigned short __b) { return (vector signed char)__builtin_altivec_vsr((vector int)__a, (vector int)__b); } static __inline__ vector signed char __ATTRS_o_ai vec_vsr(vector signed char __a, vector unsigned int __b) { return (vector signed char)__builtin_altivec_vsr((vector int)__a, (vector int)__b); } static __inline__ vector unsigned char __ATTRS_o_ai vec_vsr(vector unsigned char __a, vector unsigned char __b) { return (vector unsigned char)__builtin_altivec_vsr((vector int)__a, (vector int)__b); } static __inline__ vector unsigned char __ATTRS_o_ai vec_vsr(vector unsigned char __a, vector unsigned short __b) { return (vector unsigned char)__builtin_altivec_vsr((vector int)__a, (vector int)__b); } static __inline__ vector unsigned char __ATTRS_o_ai vec_vsr(vector unsigned char __a, vector unsigned int __b) { return (vector unsigned char)__builtin_altivec_vsr((vector int)__a, (vector int)__b); } static __inline__ vector bool char __ATTRS_o_ai vec_vsr(vector bool char __a, vector unsigned char __b) { return (vector bool char)__builtin_altivec_vsr((vector int)__a, (vector int)__b); } static __inline__ vector bool char __ATTRS_o_ai vec_vsr(vector bool char __a, vector unsigned short __b) { return (vector bool char)__builtin_altivec_vsr((vector int)__a, (vector int)__b); } static __inline__ vector bool char __ATTRS_o_ai vec_vsr(vector bool char __a, vector unsigned int __b) { return (vector bool char)__builtin_altivec_vsr((vector int)__a, (vector int)__b); } static __inline__ vector short __ATTRS_o_ai vec_vsr(vector short __a, vector unsigned char __b) { return (vector short)__builtin_altivec_vsr((vector int)__a, (vector int)__b); } static __inline__ vector short __ATTRS_o_ai vec_vsr(vector short __a, vector unsigned short __b) { return (vector short)__builtin_altivec_vsr((vector int)__a, (vector int)__b); } static __inline__ vector short __ATTRS_o_ai vec_vsr(vector short __a, vector unsigned int __b) { return (vector short)__builtin_altivec_vsr((vector int)__a, (vector int)__b); } static __inline__ vector unsigned short __ATTRS_o_ai vec_vsr(vector unsigned short __a, vector unsigned char __b) { return (vector unsigned short)__builtin_altivec_vsr((vector int)__a, (vector int)__b); } static __inline__ vector unsigned short __ATTRS_o_ai vec_vsr(vector unsigned short __a, vector unsigned short __b) { return (vector unsigned short)__builtin_altivec_vsr((vector int)__a, (vector int)__b); } static __inline__ vector unsigned short __ATTRS_o_ai vec_vsr(vector unsigned short __a, vector unsigned int __b) { return (vector unsigned short)__builtin_altivec_vsr((vector int)__a, (vector int)__b); } static __inline__ vector bool short __ATTRS_o_ai vec_vsr(vector bool short __a, vector unsigned char __b) { return (vector bool short)__builtin_altivec_vsr((vector int)__a, (vector int)__b); } static __inline__ vector bool short __ATTRS_o_ai vec_vsr(vector bool short __a, vector unsigned short __b) { return (vector bool short)__builtin_altivec_vsr((vector int)__a, (vector int)__b); } static __inline__ vector bool short __ATTRS_o_ai vec_vsr(vector bool short __a, vector unsigned int __b) { return (vector bool short)__builtin_altivec_vsr((vector int)__a, (vector int)__b); } static __inline__ vector pixel __ATTRS_o_ai vec_vsr(vector pixel __a, vector unsigned char __b) { return (vector pixel)__builtin_altivec_vsr((vector int)__a, (vector int)__b); } static __inline__ vector pixel __ATTRS_o_ai vec_vsr(vector pixel __a, vector unsigned short __b) { return (vector pixel)__builtin_altivec_vsr((vector int)__a, (vector int)__b); } static __inline__ vector pixel __ATTRS_o_ai vec_vsr(vector pixel __a, vector unsigned int __b) { return (vector pixel)__builtin_altivec_vsr((vector int)__a, (vector int)__b); } static __inline__ vector int __ATTRS_o_ai vec_vsr(vector int __a, vector unsigned char __b) { return (vector int)__builtin_altivec_vsr(__a, (vector int)__b); } static __inline__ vector int __ATTRS_o_ai vec_vsr(vector int __a, vector unsigned short __b) { return (vector int)__builtin_altivec_vsr(__a, (vector int)__b); } static __inline__ vector int __ATTRS_o_ai vec_vsr(vector int __a, vector unsigned int __b) { return (vector int)__builtin_altivec_vsr(__a, (vector int)__b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_vsr(vector unsigned int __a, vector unsigned char __b) { return (vector unsigned int)__builtin_altivec_vsr((vector int)__a, (vector int)__b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_vsr(vector unsigned int __a, vector unsigned short __b) { return (vector unsigned int)__builtin_altivec_vsr((vector int)__a, (vector int)__b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_vsr(vector unsigned int __a, vector unsigned int __b) { return (vector unsigned int)__builtin_altivec_vsr((vector int)__a, (vector int)__b); } static __inline__ vector bool int __ATTRS_o_ai vec_vsr(vector bool int __a, vector unsigned char __b) { return (vector bool int)__builtin_altivec_vsr((vector int)__a, (vector int)__b); } static __inline__ vector bool int __ATTRS_o_ai vec_vsr(vector bool int __a, vector unsigned short __b) { return (vector bool int)__builtin_altivec_vsr((vector int)__a, (vector int)__b); } static __inline__ vector bool int __ATTRS_o_ai vec_vsr(vector bool int __a, vector unsigned int __b) { return (vector bool int)__builtin_altivec_vsr((vector int)__a, (vector int)__b); } /* vec_sro */ static __inline__ vector signed char __ATTRS_o_ai vec_sro(vector signed char __a, vector signed char __b) { return (vector signed char)__builtin_altivec_vsro((vector int)__a, (vector int)__b); } static __inline__ vector signed char __ATTRS_o_ai vec_sro(vector signed char __a, vector unsigned char __b) { return (vector signed char)__builtin_altivec_vsro((vector int)__a, (vector int)__b); } static __inline__ vector unsigned char __ATTRS_o_ai vec_sro(vector unsigned char __a, vector signed char __b) { return (vector unsigned char)__builtin_altivec_vsro((vector int)__a, (vector int)__b); } static __inline__ vector unsigned char __ATTRS_o_ai vec_sro(vector unsigned char __a, vector unsigned char __b) { return (vector unsigned char)__builtin_altivec_vsro((vector int)__a, (vector int)__b); } static __inline__ vector short __ATTRS_o_ai vec_sro(vector short __a, vector signed char __b) { return (vector short)__builtin_altivec_vsro((vector int)__a, (vector int)__b); } static __inline__ vector short __ATTRS_o_ai vec_sro(vector short __a, vector unsigned char __b) { return (vector short)__builtin_altivec_vsro((vector int)__a, (vector int)__b); } static __inline__ vector unsigned short __ATTRS_o_ai vec_sro(vector unsigned short __a, vector signed char __b) { return (vector unsigned short)__builtin_altivec_vsro((vector int)__a, (vector int)__b); } static __inline__ vector unsigned short __ATTRS_o_ai vec_sro(vector unsigned short __a, vector unsigned char __b) { return (vector unsigned short)__builtin_altivec_vsro((vector int)__a, (vector int)__b); } static __inline__ vector pixel __ATTRS_o_ai vec_sro(vector pixel __a, vector signed char __b) { return (vector pixel)__builtin_altivec_vsro((vector int)__a, (vector int)__b); } static __inline__ vector pixel __ATTRS_o_ai vec_sro(vector pixel __a, vector unsigned char __b) { return (vector pixel)__builtin_altivec_vsro((vector int)__a, (vector int)__b); } static __inline__ vector int __ATTRS_o_ai vec_sro(vector int __a, vector signed char __b) { return (vector int)__builtin_altivec_vsro(__a, (vector int)__b); } static __inline__ vector int __ATTRS_o_ai vec_sro(vector int __a, vector unsigned char __b) { return (vector int)__builtin_altivec_vsro(__a, (vector int)__b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_sro(vector unsigned int __a, vector signed char __b) { return (vector unsigned int)__builtin_altivec_vsro((vector int)__a, (vector int)__b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_sro(vector unsigned int __a, vector unsigned char __b) { return (vector unsigned int)__builtin_altivec_vsro((vector int)__a, (vector int)__b); } static __inline__ vector float __ATTRS_o_ai vec_sro(vector float __a, vector signed char __b) { return (vector float)__builtin_altivec_vsro((vector int)__a, (vector int)__b); } static __inline__ vector float __ATTRS_o_ai vec_sro(vector float __a, vector unsigned char __b) { return (vector float)__builtin_altivec_vsro((vector int)__a, (vector int)__b); } #ifdef __VSX__ static __inline__ vector signed long long __ATTRS_o_ai vec_sro(vector signed long long __a, vector signed char __b) { return (vector signed long long)__builtin_altivec_vsro((vector int)__a, (vector int)__b); } static __inline__ vector signed long long __ATTRS_o_ai vec_sro(vector signed long long __a, vector unsigned char __b) { return (vector signed long long)__builtin_altivec_vsro((vector int)__a, (vector int)__b); } static __inline__ vector unsigned long long __ATTRS_o_ai vec_sro(vector unsigned long long __a, vector signed char __b) { return (vector unsigned long long)__builtin_altivec_vsro((vector int)__a, (vector int)__b); } static __inline__ vector unsigned long long __ATTRS_o_ai vec_sro(vector unsigned long long __a, vector unsigned char __b) { return (vector unsigned long long)__builtin_altivec_vsro((vector int)__a, (vector int)__b); } #endif /* vec_vsro */ static __inline__ vector signed char __ATTRS_o_ai vec_vsro(vector signed char __a, vector signed char __b) { return (vector signed char)__builtin_altivec_vsro((vector int)__a, (vector int)__b); } static __inline__ vector signed char __ATTRS_o_ai vec_vsro(vector signed char __a, vector unsigned char __b) { return (vector signed char)__builtin_altivec_vsro((vector int)__a, (vector int)__b); } static __inline__ vector unsigned char __ATTRS_o_ai vec_vsro(vector unsigned char __a, vector signed char __b) { return (vector unsigned char)__builtin_altivec_vsro((vector int)__a, (vector int)__b); } static __inline__ vector unsigned char __ATTRS_o_ai vec_vsro(vector unsigned char __a, vector unsigned char __b) { return (vector unsigned char)__builtin_altivec_vsro((vector int)__a, (vector int)__b); } static __inline__ vector short __ATTRS_o_ai vec_vsro(vector short __a, vector signed char __b) { return (vector short)__builtin_altivec_vsro((vector int)__a, (vector int)__b); } static __inline__ vector short __ATTRS_o_ai vec_vsro(vector short __a, vector unsigned char __b) { return (vector short)__builtin_altivec_vsro((vector int)__a, (vector int)__b); } static __inline__ vector unsigned short __ATTRS_o_ai vec_vsro(vector unsigned short __a, vector signed char __b) { return (vector unsigned short)__builtin_altivec_vsro((vector int)__a, (vector int)__b); } static __inline__ vector unsigned short __ATTRS_o_ai vec_vsro(vector unsigned short __a, vector unsigned char __b) { return (vector unsigned short)__builtin_altivec_vsro((vector int)__a, (vector int)__b); } static __inline__ vector pixel __ATTRS_o_ai vec_vsro(vector pixel __a, vector signed char __b) { return (vector pixel)__builtin_altivec_vsro((vector int)__a, (vector int)__b); } static __inline__ vector pixel __ATTRS_o_ai vec_vsro(vector pixel __a, vector unsigned char __b) { return (vector pixel)__builtin_altivec_vsro((vector int)__a, (vector int)__b); } static __inline__ vector int __ATTRS_o_ai vec_vsro(vector int __a, vector signed char __b) { return (vector int)__builtin_altivec_vsro(__a, (vector int)__b); } static __inline__ vector int __ATTRS_o_ai vec_vsro(vector int __a, vector unsigned char __b) { return (vector int)__builtin_altivec_vsro(__a, (vector int)__b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_vsro(vector unsigned int __a, vector signed char __b) { return (vector unsigned int)__builtin_altivec_vsro((vector int)__a, (vector int)__b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_vsro(vector unsigned int __a, vector unsigned char __b) { return (vector unsigned int)__builtin_altivec_vsro((vector int)__a, (vector int)__b); } static __inline__ vector float __ATTRS_o_ai vec_vsro(vector float __a, vector signed char __b) { return (vector float)__builtin_altivec_vsro((vector int)__a, (vector int)__b); } static __inline__ vector float __ATTRS_o_ai vec_vsro(vector float __a, vector unsigned char __b) { return (vector float)__builtin_altivec_vsro((vector int)__a, (vector int)__b); } /* vec_st */ static __inline__ void __ATTRS_o_ai vec_st(vector signed char __a, long __b, vector signed char *__c) { __builtin_altivec_stvx((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_st(vector signed char __a, long __b, signed char *__c) { __builtin_altivec_stvx((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_st(vector unsigned char __a, long __b, vector unsigned char *__c) { __builtin_altivec_stvx((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_st(vector unsigned char __a, long __b, unsigned char *__c) { __builtin_altivec_stvx((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_st(vector bool char __a, long __b, signed char *__c) { __builtin_altivec_stvx((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_st(vector bool char __a, long __b, unsigned char *__c) { __builtin_altivec_stvx((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_st(vector bool char __a, long __b, vector bool char *__c) { __builtin_altivec_stvx((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_st(vector short __a, long __b, vector short *__c) { __builtin_altivec_stvx((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_st(vector short __a, long __b, short *__c) { __builtin_altivec_stvx((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_st(vector unsigned short __a, long __b, vector unsigned short *__c) { __builtin_altivec_stvx((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_st(vector unsigned short __a, long __b, unsigned short *__c) { __builtin_altivec_stvx((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_st(vector bool short __a, long __b, short *__c) { __builtin_altivec_stvx((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_st(vector bool short __a, long __b, unsigned short *__c) { __builtin_altivec_stvx((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_st(vector bool short __a, long __b, vector bool short *__c) { __builtin_altivec_stvx((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_st(vector pixel __a, long __b, short *__c) { __builtin_altivec_stvx((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_st(vector pixel __a, long __b, unsigned short *__c) { __builtin_altivec_stvx((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_st(vector pixel __a, long __b, vector pixel *__c) { __builtin_altivec_stvx((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_st(vector int __a, long __b, vector int *__c) { __builtin_altivec_stvx(__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_st(vector int __a, long __b, int *__c) { __builtin_altivec_stvx(__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_st(vector unsigned int __a, long __b, vector unsigned int *__c) { __builtin_altivec_stvx((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_st(vector unsigned int __a, long __b, unsigned int *__c) { __builtin_altivec_stvx((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_st(vector bool int __a, long __b, int *__c) { __builtin_altivec_stvx((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_st(vector bool int __a, long __b, unsigned int *__c) { __builtin_altivec_stvx((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_st(vector bool int __a, long __b, vector bool int *__c) { __builtin_altivec_stvx((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_st(vector float __a, long __b, vector float *__c) { __builtin_altivec_stvx((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_st(vector float __a, long __b, float *__c) { __builtin_altivec_stvx((vector int)__a, __b, __c); } /* vec_stvx */ static __inline__ void __ATTRS_o_ai vec_stvx(vector signed char __a, long __b, vector signed char *__c) { __builtin_altivec_stvx((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvx(vector signed char __a, long __b, signed char *__c) { __builtin_altivec_stvx((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvx(vector unsigned char __a, long __b, vector unsigned char *__c) { __builtin_altivec_stvx((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvx(vector unsigned char __a, long __b, unsigned char *__c) { __builtin_altivec_stvx((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvx(vector bool char __a, long __b, signed char *__c) { __builtin_altivec_stvx((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvx(vector bool char __a, long __b, unsigned char *__c) { __builtin_altivec_stvx((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvx(vector bool char __a, long __b, vector bool char *__c) { __builtin_altivec_stvx((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvx(vector short __a, long __b, vector short *__c) { __builtin_altivec_stvx((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvx(vector short __a, long __b, short *__c) { __builtin_altivec_stvx((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvx(vector unsigned short __a, long __b, vector unsigned short *__c) { __builtin_altivec_stvx((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvx(vector unsigned short __a, long __b, unsigned short *__c) { __builtin_altivec_stvx((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvx(vector bool short __a, long __b, short *__c) { __builtin_altivec_stvx((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvx(vector bool short __a, long __b, unsigned short *__c) { __builtin_altivec_stvx((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvx(vector bool short __a, long __b, vector bool short *__c) { __builtin_altivec_stvx((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvx(vector pixel __a, long __b, short *__c) { __builtin_altivec_stvx((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvx(vector pixel __a, long __b, unsigned short *__c) { __builtin_altivec_stvx((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvx(vector pixel __a, long __b, vector pixel *__c) { __builtin_altivec_stvx((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvx(vector int __a, long __b, vector int *__c) { __builtin_altivec_stvx(__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvx(vector int __a, long __b, int *__c) { __builtin_altivec_stvx(__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvx(vector unsigned int __a, long __b, vector unsigned int *__c) { __builtin_altivec_stvx((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvx(vector unsigned int __a, long __b, unsigned int *__c) { __builtin_altivec_stvx((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvx(vector bool int __a, long __b, int *__c) { __builtin_altivec_stvx((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvx(vector bool int __a, long __b, unsigned int *__c) { __builtin_altivec_stvx((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvx(vector bool int __a, long __b, vector bool int *__c) { __builtin_altivec_stvx((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvx(vector float __a, long __b, vector float *__c) { __builtin_altivec_stvx((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvx(vector float __a, long __b, float *__c) { __builtin_altivec_stvx((vector int)__a, __b, __c); } /* vec_ste */ static __inline__ void __ATTRS_o_ai vec_ste(vector signed char __a, long __b, signed char *__c) { __builtin_altivec_stvebx((vector char)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_ste(vector unsigned char __a, long __b, unsigned char *__c) { __builtin_altivec_stvebx((vector char)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_ste(vector bool char __a, long __b, signed char *__c) { __builtin_altivec_stvebx((vector char)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_ste(vector bool char __a, long __b, unsigned char *__c) { __builtin_altivec_stvebx((vector char)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_ste(vector short __a, long __b, short *__c) { __builtin_altivec_stvehx(__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_ste(vector unsigned short __a, long __b, unsigned short *__c) { __builtin_altivec_stvehx((vector short)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_ste(vector bool short __a, long __b, short *__c) { __builtin_altivec_stvehx((vector short)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_ste(vector bool short __a, long __b, unsigned short *__c) { __builtin_altivec_stvehx((vector short)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_ste(vector pixel __a, long __b, short *__c) { __builtin_altivec_stvehx((vector short)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_ste(vector pixel __a, long __b, unsigned short *__c) { __builtin_altivec_stvehx((vector short)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_ste(vector int __a, long __b, int *__c) { __builtin_altivec_stvewx(__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_ste(vector unsigned int __a, long __b, unsigned int *__c) { __builtin_altivec_stvewx((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_ste(vector bool int __a, long __b, int *__c) { __builtin_altivec_stvewx((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_ste(vector bool int __a, long __b, unsigned int *__c) { __builtin_altivec_stvewx((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_ste(vector float __a, long __b, float *__c) { __builtin_altivec_stvewx((vector int)__a, __b, __c); } /* vec_stvebx */ static __inline__ void __ATTRS_o_ai vec_stvebx(vector signed char __a, long __b, signed char *__c) { __builtin_altivec_stvebx((vector char)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvebx(vector unsigned char __a, long __b, unsigned char *__c) { __builtin_altivec_stvebx((vector char)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvebx(vector bool char __a, long __b, signed char *__c) { __builtin_altivec_stvebx((vector char)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvebx(vector bool char __a, long __b, unsigned char *__c) { __builtin_altivec_stvebx((vector char)__a, __b, __c); } /* vec_stvehx */ static __inline__ void __ATTRS_o_ai vec_stvehx(vector short __a, long __b, short *__c) { __builtin_altivec_stvehx(__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvehx(vector unsigned short __a, long __b, unsigned short *__c) { __builtin_altivec_stvehx((vector short)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvehx(vector bool short __a, long __b, short *__c) { __builtin_altivec_stvehx((vector short)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvehx(vector bool short __a, long __b, unsigned short *__c) { __builtin_altivec_stvehx((vector short)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvehx(vector pixel __a, long __b, short *__c) { __builtin_altivec_stvehx((vector short)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvehx(vector pixel __a, long __b, unsigned short *__c) { __builtin_altivec_stvehx((vector short)__a, __b, __c); } /* vec_stvewx */ static __inline__ void __ATTRS_o_ai vec_stvewx(vector int __a, long __b, int *__c) { __builtin_altivec_stvewx(__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvewx(vector unsigned int __a, long __b, unsigned int *__c) { __builtin_altivec_stvewx((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvewx(vector bool int __a, long __b, int *__c) { __builtin_altivec_stvewx((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvewx(vector bool int __a, long __b, unsigned int *__c) { __builtin_altivec_stvewx((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvewx(vector float __a, long __b, float *__c) { __builtin_altivec_stvewx((vector int)__a, __b, __c); } /* vec_stl */ static __inline__ void __ATTRS_o_ai vec_stl(vector signed char __a, int __b, vector signed char *__c) { __builtin_altivec_stvxl((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stl(vector signed char __a, int __b, signed char *__c) { __builtin_altivec_stvxl((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stl(vector unsigned char __a, int __b, vector unsigned char *__c) { __builtin_altivec_stvxl((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stl(vector unsigned char __a, int __b, unsigned char *__c) { __builtin_altivec_stvxl((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stl(vector bool char __a, int __b, signed char *__c) { __builtin_altivec_stvxl((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stl(vector bool char __a, int __b, unsigned char *__c) { __builtin_altivec_stvxl((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stl(vector bool char __a, int __b, vector bool char *__c) { __builtin_altivec_stvxl((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stl(vector short __a, int __b, vector short *__c) { __builtin_altivec_stvxl((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stl(vector short __a, int __b, short *__c) { __builtin_altivec_stvxl((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stl(vector unsigned short __a, int __b, vector unsigned short *__c) { __builtin_altivec_stvxl((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stl(vector unsigned short __a, int __b, unsigned short *__c) { __builtin_altivec_stvxl((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stl(vector bool short __a, int __b, short *__c) { __builtin_altivec_stvxl((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stl(vector bool short __a, int __b, unsigned short *__c) { __builtin_altivec_stvxl((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stl(vector bool short __a, int __b, vector bool short *__c) { __builtin_altivec_stvxl((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stl(vector pixel __a, int __b, short *__c) { __builtin_altivec_stvxl((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stl(vector pixel __a, int __b, unsigned short *__c) { __builtin_altivec_stvxl((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stl(vector pixel __a, int __b, vector pixel *__c) { __builtin_altivec_stvxl((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stl(vector int __a, int __b, vector int *__c) { __builtin_altivec_stvxl(__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stl(vector int __a, int __b, int *__c) { __builtin_altivec_stvxl(__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stl(vector unsigned int __a, int __b, vector unsigned int *__c) { __builtin_altivec_stvxl((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stl(vector unsigned int __a, int __b, unsigned int *__c) { __builtin_altivec_stvxl((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stl(vector bool int __a, int __b, int *__c) { __builtin_altivec_stvxl((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stl(vector bool int __a, int __b, unsigned int *__c) { __builtin_altivec_stvxl((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stl(vector bool int __a, int __b, vector bool int *__c) { __builtin_altivec_stvxl((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stl(vector float __a, int __b, vector float *__c) { __builtin_altivec_stvxl((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stl(vector float __a, int __b, float *__c) { __builtin_altivec_stvxl((vector int)__a, __b, __c); } /* vec_stvxl */ static __inline__ void __ATTRS_o_ai vec_stvxl(vector signed char __a, int __b, vector signed char *__c) { __builtin_altivec_stvxl((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvxl(vector signed char __a, int __b, signed char *__c) { __builtin_altivec_stvxl((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvxl(vector unsigned char __a, int __b, vector unsigned char *__c) { __builtin_altivec_stvxl((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvxl(vector unsigned char __a, int __b, unsigned char *__c) { __builtin_altivec_stvxl((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvxl(vector bool char __a, int __b, signed char *__c) { __builtin_altivec_stvxl((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvxl(vector bool char __a, int __b, unsigned char *__c) { __builtin_altivec_stvxl((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvxl(vector bool char __a, int __b, vector bool char *__c) { __builtin_altivec_stvxl((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvxl(vector short __a, int __b, vector short *__c) { __builtin_altivec_stvxl((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvxl(vector short __a, int __b, short *__c) { __builtin_altivec_stvxl((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvxl(vector unsigned short __a, int __b, vector unsigned short *__c) { __builtin_altivec_stvxl((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvxl(vector unsigned short __a, int __b, unsigned short *__c) { __builtin_altivec_stvxl((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvxl(vector bool short __a, int __b, short *__c) { __builtin_altivec_stvxl((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvxl(vector bool short __a, int __b, unsigned short *__c) { __builtin_altivec_stvxl((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvxl(vector bool short __a, int __b, vector bool short *__c) { __builtin_altivec_stvxl((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvxl(vector pixel __a, int __b, short *__c) { __builtin_altivec_stvxl((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvxl(vector pixel __a, int __b, unsigned short *__c) { __builtin_altivec_stvxl((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvxl(vector pixel __a, int __b, vector pixel *__c) { __builtin_altivec_stvxl((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvxl(vector int __a, int __b, vector int *__c) { __builtin_altivec_stvxl(__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvxl(vector int __a, int __b, int *__c) { __builtin_altivec_stvxl(__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvxl(vector unsigned int __a, int __b, vector unsigned int *__c) { __builtin_altivec_stvxl((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvxl(vector unsigned int __a, int __b, unsigned int *__c) { __builtin_altivec_stvxl((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvxl(vector bool int __a, int __b, int *__c) { __builtin_altivec_stvxl((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvxl(vector bool int __a, int __b, unsigned int *__c) { __builtin_altivec_stvxl((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvxl(vector bool int __a, int __b, vector bool int *__c) { __builtin_altivec_stvxl((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvxl(vector float __a, int __b, vector float *__c) { __builtin_altivec_stvxl((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvxl(vector float __a, int __b, float *__c) { __builtin_altivec_stvxl((vector int)__a, __b, __c); } /* vec_sub */ static __inline__ vector signed char __ATTRS_o_ai vec_sub(vector signed char __a, vector signed char __b) { return __a - __b; } static __inline__ vector signed char __ATTRS_o_ai vec_sub(vector bool char __a, vector signed char __b) { return (vector signed char)__a - __b; } static __inline__ vector signed char __ATTRS_o_ai vec_sub(vector signed char __a, vector bool char __b) { return __a - (vector signed char)__b; } static __inline__ vector unsigned char __ATTRS_o_ai vec_sub(vector unsigned char __a, vector unsigned char __b) { return __a - __b; } static __inline__ vector unsigned char __ATTRS_o_ai vec_sub(vector bool char __a, vector unsigned char __b) { return (vector unsigned char)__a - __b; } static __inline__ vector unsigned char __ATTRS_o_ai vec_sub(vector unsigned char __a, vector bool char __b) { return __a - (vector unsigned char)__b; } static __inline__ vector short __ATTRS_o_ai vec_sub(vector short __a, vector short __b) { return __a - __b; } static __inline__ vector short __ATTRS_o_ai vec_sub(vector bool short __a, vector short __b) { return (vector short)__a - __b; } static __inline__ vector short __ATTRS_o_ai vec_sub(vector short __a, vector bool short __b) { return __a - (vector short)__b; } static __inline__ vector unsigned short __ATTRS_o_ai vec_sub(vector unsigned short __a, vector unsigned short __b) { return __a - __b; } static __inline__ vector unsigned short __ATTRS_o_ai vec_sub(vector bool short __a, vector unsigned short __b) { return (vector unsigned short)__a - __b; } static __inline__ vector unsigned short __ATTRS_o_ai vec_sub(vector unsigned short __a, vector bool short __b) { return __a - (vector unsigned short)__b; } static __inline__ vector int __ATTRS_o_ai vec_sub(vector int __a, vector int __b) { return __a - __b; } static __inline__ vector int __ATTRS_o_ai vec_sub(vector bool int __a, vector int __b) { return (vector int)__a - __b; } static __inline__ vector int __ATTRS_o_ai vec_sub(vector int __a, vector bool int __b) { return __a - (vector int)__b; } static __inline__ vector unsigned int __ATTRS_o_ai vec_sub(vector unsigned int __a, vector unsigned int __b) { return __a - __b; } static __inline__ vector unsigned int __ATTRS_o_ai vec_sub(vector bool int __a, vector unsigned int __b) { return (vector unsigned int)__a - __b; } static __inline__ vector unsigned int __ATTRS_o_ai vec_sub(vector unsigned int __a, vector bool int __b) { return __a - (vector unsigned int)__b; } #if defined(__POWER8_VECTOR__) && defined(__powerpc64__) && \ defined(__SIZEOF_INT128__) static __inline__ vector signed __int128 __ATTRS_o_ai vec_sub(vector signed __int128 __a, vector signed __int128 __b) { return __a - __b; } static __inline__ vector unsigned __int128 __ATTRS_o_ai vec_sub(vector unsigned __int128 __a, vector unsigned __int128 __b) { return __a - __b; } #endif // defined(__POWER8_VECTOR__) && defined(__powerpc64__) && // defined(__SIZEOF_INT128__) #ifdef __VSX__ static __inline__ vector signed long long __ATTRS_o_ai vec_sub(vector signed long long __a, vector signed long long __b) { return __a - __b; } static __inline__ vector unsigned long long __ATTRS_o_ai vec_sub(vector unsigned long long __a, vector unsigned long long __b) { return __a - __b; } static __inline__ vector double __ATTRS_o_ai vec_sub(vector double __a, vector double __b) { return __a - __b; } #endif static __inline__ vector float __ATTRS_o_ai vec_sub(vector float __a, vector float __b) { return __a - __b; } /* vec_vsububm */ #define __builtin_altivec_vsububm vec_vsububm static __inline__ vector signed char __ATTRS_o_ai vec_vsububm(vector signed char __a, vector signed char __b) { return __a - __b; } static __inline__ vector signed char __ATTRS_o_ai vec_vsububm(vector bool char __a, vector signed char __b) { return (vector signed char)__a - __b; } static __inline__ vector signed char __ATTRS_o_ai vec_vsububm(vector signed char __a, vector bool char __b) { return __a - (vector signed char)__b; } static __inline__ vector unsigned char __ATTRS_o_ai vec_vsububm(vector unsigned char __a, vector unsigned char __b) { return __a - __b; } static __inline__ vector unsigned char __ATTRS_o_ai vec_vsububm(vector bool char __a, vector unsigned char __b) { return (vector unsigned char)__a - __b; } static __inline__ vector unsigned char __ATTRS_o_ai vec_vsububm(vector unsigned char __a, vector bool char __b) { return __a - (vector unsigned char)__b; } /* vec_vsubuhm */ #define __builtin_altivec_vsubuhm vec_vsubuhm static __inline__ vector short __ATTRS_o_ai vec_vsubuhm(vector short __a, vector short __b) { return __a - __b; } static __inline__ vector short __ATTRS_o_ai vec_vsubuhm(vector bool short __a, vector short __b) { return (vector short)__a - __b; } static __inline__ vector short __ATTRS_o_ai vec_vsubuhm(vector short __a, vector bool short __b) { return __a - (vector short)__b; } static __inline__ vector unsigned short __ATTRS_o_ai vec_vsubuhm(vector unsigned short __a, vector unsigned short __b) { return __a - __b; } static __inline__ vector unsigned short __ATTRS_o_ai vec_vsubuhm(vector bool short __a, vector unsigned short __b) { return (vector unsigned short)__a - __b; } static __inline__ vector unsigned short __ATTRS_o_ai vec_vsubuhm(vector unsigned short __a, vector bool short __b) { return __a - (vector unsigned short)__b; } /* vec_vsubuwm */ #define __builtin_altivec_vsubuwm vec_vsubuwm static __inline__ vector int __ATTRS_o_ai vec_vsubuwm(vector int __a, vector int __b) { return __a - __b; } static __inline__ vector int __ATTRS_o_ai vec_vsubuwm(vector bool int __a, vector int __b) { return (vector int)__a - __b; } static __inline__ vector int __ATTRS_o_ai vec_vsubuwm(vector int __a, vector bool int __b) { return __a - (vector int)__b; } static __inline__ vector unsigned int __ATTRS_o_ai vec_vsubuwm(vector unsigned int __a, vector unsigned int __b) { return __a - __b; } static __inline__ vector unsigned int __ATTRS_o_ai vec_vsubuwm(vector bool int __a, vector unsigned int __b) { return (vector unsigned int)__a - __b; } static __inline__ vector unsigned int __ATTRS_o_ai vec_vsubuwm(vector unsigned int __a, vector bool int __b) { return __a - (vector unsigned int)__b; } /* vec_vsubfp */ #define __builtin_altivec_vsubfp vec_vsubfp static __inline__ vector float __attribute__((__always_inline__)) vec_vsubfp(vector float __a, vector float __b) { return __a - __b; } /* vec_subc */ static __inline__ vector signed int __ATTRS_o_ai vec_subc(vector signed int __a, vector signed int __b) { return (vector signed int)__builtin_altivec_vsubcuw((vector unsigned int)__a, (vector unsigned int) __b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_subc(vector unsigned int __a, vector unsigned int __b) { return __builtin_altivec_vsubcuw(__a, __b); } #ifdef __POWER8_VECTOR__ #ifdef __SIZEOF_INT128__ static __inline__ vector unsigned __int128 __ATTRS_o_ai vec_subc(vector unsigned __int128 __a, vector unsigned __int128 __b) { return __builtin_altivec_vsubcuq(__a, __b); } static __inline__ vector signed __int128 __ATTRS_o_ai vec_subc(vector signed __int128 __a, vector signed __int128 __b) { return (vector signed __int128)__builtin_altivec_vsubcuq( (vector unsigned __int128)__a, (vector unsigned __int128)__b); } #endif static __inline__ vector unsigned char __attribute__((__always_inline__)) vec_subc_u128(vector unsigned char __a, vector unsigned char __b) { return (vector unsigned char)__builtin_altivec_vsubcuq_c( (vector unsigned char)__a, (vector unsigned char)__b); } #endif // __POWER8_VECTOR__ /* vec_vsubcuw */ static __inline__ vector unsigned int __attribute__((__always_inline__)) vec_vsubcuw(vector unsigned int __a, vector unsigned int __b) { return __builtin_altivec_vsubcuw(__a, __b); } /* vec_subs */ static __inline__ vector signed char __ATTRS_o_ai vec_subs(vector signed char __a, vector signed char __b) { return __builtin_altivec_vsubsbs(__a, __b); } static __inline__ vector signed char __ATTRS_o_ai vec_subs(vector bool char __a, vector signed char __b) { return __builtin_altivec_vsubsbs((vector signed char)__a, __b); } static __inline__ vector signed char __ATTRS_o_ai vec_subs(vector signed char __a, vector bool char __b) { return __builtin_altivec_vsubsbs(__a, (vector signed char)__b); } static __inline__ vector unsigned char __ATTRS_o_ai vec_subs(vector unsigned char __a, vector unsigned char __b) { return __builtin_altivec_vsububs(__a, __b); } static __inline__ vector unsigned char __ATTRS_o_ai vec_subs(vector bool char __a, vector unsigned char __b) { return __builtin_altivec_vsububs((vector unsigned char)__a, __b); } static __inline__ vector unsigned char __ATTRS_o_ai vec_subs(vector unsigned char __a, vector bool char __b) { return __builtin_altivec_vsububs(__a, (vector unsigned char)__b); } static __inline__ vector short __ATTRS_o_ai vec_subs(vector short __a, vector short __b) { return __builtin_altivec_vsubshs(__a, __b); } static __inline__ vector short __ATTRS_o_ai vec_subs(vector bool short __a, vector short __b) { return __builtin_altivec_vsubshs((vector short)__a, __b); } static __inline__ vector short __ATTRS_o_ai vec_subs(vector short __a, vector bool short __b) { return __builtin_altivec_vsubshs(__a, (vector short)__b); } static __inline__ vector unsigned short __ATTRS_o_ai vec_subs(vector unsigned short __a, vector unsigned short __b) { return __builtin_altivec_vsubuhs(__a, __b); } static __inline__ vector unsigned short __ATTRS_o_ai vec_subs(vector bool short __a, vector unsigned short __b) { return __builtin_altivec_vsubuhs((vector unsigned short)__a, __b); } static __inline__ vector unsigned short __ATTRS_o_ai vec_subs(vector unsigned short __a, vector bool short __b) { return __builtin_altivec_vsubuhs(__a, (vector unsigned short)__b); } static __inline__ vector int __ATTRS_o_ai vec_subs(vector int __a, vector int __b) { return __builtin_altivec_vsubsws(__a, __b); } static __inline__ vector int __ATTRS_o_ai vec_subs(vector bool int __a, vector int __b) { return __builtin_altivec_vsubsws((vector int)__a, __b); } static __inline__ vector int __ATTRS_o_ai vec_subs(vector int __a, vector bool int __b) { return __builtin_altivec_vsubsws(__a, (vector int)__b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_subs(vector unsigned int __a, vector unsigned int __b) { return __builtin_altivec_vsubuws(__a, __b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_subs(vector bool int __a, vector unsigned int __b) { return __builtin_altivec_vsubuws((vector unsigned int)__a, __b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_subs(vector unsigned int __a, vector bool int __b) { return __builtin_altivec_vsubuws(__a, (vector unsigned int)__b); } /* vec_vsubsbs */ static __inline__ vector signed char __ATTRS_o_ai vec_vsubsbs(vector signed char __a, vector signed char __b) { return __builtin_altivec_vsubsbs(__a, __b); } static __inline__ vector signed char __ATTRS_o_ai vec_vsubsbs(vector bool char __a, vector signed char __b) { return __builtin_altivec_vsubsbs((vector signed char)__a, __b); } static __inline__ vector signed char __ATTRS_o_ai vec_vsubsbs(vector signed char __a, vector bool char __b) { return __builtin_altivec_vsubsbs(__a, (vector signed char)__b); } /* vec_vsububs */ static __inline__ vector unsigned char __ATTRS_o_ai vec_vsububs(vector unsigned char __a, vector unsigned char __b) { return __builtin_altivec_vsububs(__a, __b); } static __inline__ vector unsigned char __ATTRS_o_ai vec_vsububs(vector bool char __a, vector unsigned char __b) { return __builtin_altivec_vsububs((vector unsigned char)__a, __b); } static __inline__ vector unsigned char __ATTRS_o_ai vec_vsububs(vector unsigned char __a, vector bool char __b) { return __builtin_altivec_vsububs(__a, (vector unsigned char)__b); } /* vec_vsubshs */ static __inline__ vector short __ATTRS_o_ai vec_vsubshs(vector short __a, vector short __b) { return __builtin_altivec_vsubshs(__a, __b); } static __inline__ vector short __ATTRS_o_ai vec_vsubshs(vector bool short __a, vector short __b) { return __builtin_altivec_vsubshs((vector short)__a, __b); } static __inline__ vector short __ATTRS_o_ai vec_vsubshs(vector short __a, vector bool short __b) { return __builtin_altivec_vsubshs(__a, (vector short)__b); } /* vec_vsubuhs */ static __inline__ vector unsigned short __ATTRS_o_ai vec_vsubuhs(vector unsigned short __a, vector unsigned short __b) { return __builtin_altivec_vsubuhs(__a, __b); } static __inline__ vector unsigned short __ATTRS_o_ai vec_vsubuhs(vector bool short __a, vector unsigned short __b) { return __builtin_altivec_vsubuhs((vector unsigned short)__a, __b); } static __inline__ vector unsigned short __ATTRS_o_ai vec_vsubuhs(vector unsigned short __a, vector bool short __b) { return __builtin_altivec_vsubuhs(__a, (vector unsigned short)__b); } /* vec_vsubsws */ static __inline__ vector int __ATTRS_o_ai vec_vsubsws(vector int __a, vector int __b) { return __builtin_altivec_vsubsws(__a, __b); } static __inline__ vector int __ATTRS_o_ai vec_vsubsws(vector bool int __a, vector int __b) { return __builtin_altivec_vsubsws((vector int)__a, __b); } static __inline__ vector int __ATTRS_o_ai vec_vsubsws(vector int __a, vector bool int __b) { return __builtin_altivec_vsubsws(__a, (vector int)__b); } /* vec_vsubuws */ static __inline__ vector unsigned int __ATTRS_o_ai vec_vsubuws(vector unsigned int __a, vector unsigned int __b) { return __builtin_altivec_vsubuws(__a, __b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_vsubuws(vector bool int __a, vector unsigned int __b) { return __builtin_altivec_vsubuws((vector unsigned int)__a, __b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_vsubuws(vector unsigned int __a, vector bool int __b) { return __builtin_altivec_vsubuws(__a, (vector unsigned int)__b); } #ifdef __POWER8_VECTOR__ /* vec_vsubuqm */ #ifdef __SIZEOF_INT128__ static __inline__ vector signed __int128 __ATTRS_o_ai vec_vsubuqm(vector signed __int128 __a, vector signed __int128 __b) { return __a - __b; } static __inline__ vector unsigned __int128 __ATTRS_o_ai vec_vsubuqm(vector unsigned __int128 __a, vector unsigned __int128 __b) { return __a - __b; } #endif static __inline__ vector unsigned char __attribute__((__always_inline__)) vec_sub_u128(vector unsigned char __a, vector unsigned char __b) { return (vector unsigned char)__builtin_altivec_vsubuqm(__a, __b); } /* vec_vsubeuqm */ #ifdef __SIZEOF_INT128__ static __inline__ vector signed __int128 __ATTRS_o_ai vec_vsubeuqm(vector signed __int128 __a, vector signed __int128 __b, vector signed __int128 __c) { return (vector signed __int128)__builtin_altivec_vsubeuqm( (vector unsigned __int128)__a, (vector unsigned __int128)__b, (vector unsigned __int128)__c); } static __inline__ vector unsigned __int128 __ATTRS_o_ai vec_vsubeuqm(vector unsigned __int128 __a, vector unsigned __int128 __b, vector unsigned __int128 __c) { return __builtin_altivec_vsubeuqm(__a, __b, __c); } static __inline__ vector signed __int128 __ATTRS_o_ai vec_sube(vector signed __int128 __a, vector signed __int128 __b, vector signed __int128 __c) { return (vector signed __int128)__builtin_altivec_vsubeuqm( (vector unsigned __int128)__a, (vector unsigned __int128)__b, (vector unsigned __int128)__c); } static __inline__ vector unsigned __int128 __ATTRS_o_ai vec_sube(vector unsigned __int128 __a, vector unsigned __int128 __b, vector unsigned __int128 __c) { return __builtin_altivec_vsubeuqm(__a, __b, __c); } #endif static __inline__ vector unsigned char __attribute__((__always_inline__)) vec_sube_u128(vector unsigned char __a, vector unsigned char __b, vector unsigned char __c) { return (vector unsigned char)__builtin_altivec_vsubeuqm_c( (vector unsigned char)__a, (vector unsigned char)__b, (vector unsigned char)__c); } /* vec_vsubcuq */ #ifdef __SIZEOF_INT128__ static __inline__ vector signed __int128 __ATTRS_o_ai vec_vsubcuq(vector signed __int128 __a, vector signed __int128 __b) { return (vector signed __int128)__builtin_altivec_vsubcuq( (vector unsigned __int128)__a, (vector unsigned __int128)__b); } static __inline__ vector unsigned __int128 __ATTRS_o_ai vec_vsubcuq(vector unsigned __int128 __a, vector unsigned __int128 __b) { return __builtin_altivec_vsubcuq(__a, __b); } /* vec_vsubecuq */ static __inline__ vector signed __int128 __ATTRS_o_ai vec_vsubecuq(vector signed __int128 __a, vector signed __int128 __b, vector signed __int128 __c) { return (vector signed __int128)__builtin_altivec_vsubecuq( (vector unsigned __int128)__a, (vector unsigned __int128)__b, (vector unsigned __int128)__c); } static __inline__ vector unsigned __int128 __ATTRS_o_ai vec_vsubecuq(vector unsigned __int128 __a, vector unsigned __int128 __b, vector unsigned __int128 __c) { return __builtin_altivec_vsubecuq(__a, __b, __c); } #endif #ifdef __powerpc64__ static __inline__ vector signed int __ATTRS_o_ai vec_subec(vector signed int __a, vector signed int __b, vector signed int __c) { return vec_addec(__a, ~__b, __c); } static __inline__ vector unsigned int __ATTRS_o_ai vec_subec(vector unsigned int __a, vector unsigned int __b, vector unsigned int __c) { return vec_addec(__a, ~__b, __c); } #endif #ifdef __SIZEOF_INT128__ static __inline__ vector signed __int128 __ATTRS_o_ai vec_subec(vector signed __int128 __a, vector signed __int128 __b, vector signed __int128 __c) { return (vector signed __int128)__builtin_altivec_vsubecuq( (vector unsigned __int128)__a, (vector unsigned __int128)__b, (vector unsigned __int128)__c); } static __inline__ vector unsigned __int128 __ATTRS_o_ai vec_subec(vector unsigned __int128 __a, vector unsigned __int128 __b, vector unsigned __int128 __c) { return __builtin_altivec_vsubecuq(__a, __b, __c); } #endif static __inline__ vector unsigned char __attribute__((__always_inline__)) vec_subec_u128(vector unsigned char __a, vector unsigned char __b, vector unsigned char __c) { return (vector unsigned char)__builtin_altivec_vsubecuq_c( (vector unsigned char)__a, (vector unsigned char)__b, (vector unsigned char)__c); } #endif // __POWER8_VECTOR__ static __inline__ vector signed int __ATTRS_o_ai vec_sube(vector signed int __a, vector signed int __b, vector signed int __c) { vector signed int __mask = {1, 1, 1, 1}; vector signed int __carry = __c & __mask; return vec_adde(__a, ~__b, __carry); } static __inline__ vector unsigned int __ATTRS_o_ai vec_sube(vector unsigned int __a, vector unsigned int __b, vector unsigned int __c) { vector unsigned int __mask = {1, 1, 1, 1}; vector unsigned int __carry = __c & __mask; return vec_adde(__a, ~__b, __carry); } /* vec_sum4s */ static __inline__ vector int __ATTRS_o_ai vec_sum4s(vector signed char __a, vector int __b) { return __builtin_altivec_vsum4sbs(__a, __b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_sum4s(vector unsigned char __a, vector unsigned int __b) { return __builtin_altivec_vsum4ubs(__a, __b); } static __inline__ vector int __ATTRS_o_ai vec_sum4s(vector signed short __a, vector int __b) { return __builtin_altivec_vsum4shs(__a, __b); } /* vec_vsum4sbs */ static __inline__ vector int __attribute__((__always_inline__)) vec_vsum4sbs(vector signed char __a, vector int __b) { return __builtin_altivec_vsum4sbs(__a, __b); } /* vec_vsum4ubs */ static __inline__ vector unsigned int __attribute__((__always_inline__)) vec_vsum4ubs(vector unsigned char __a, vector unsigned int __b) { return __builtin_altivec_vsum4ubs(__a, __b); } /* vec_vsum4shs */ static __inline__ vector int __attribute__((__always_inline__)) vec_vsum4shs(vector signed short __a, vector int __b) { return __builtin_altivec_vsum4shs(__a, __b); } /* vec_sum2s */ /* The vsum2sws instruction has a big-endian bias, so that the second input vector and the result always reference big-endian elements 1 and 3 (little-endian element 0 and 2). For ease of porting the programmer wants elements 1 and 3 in both cases, so for little endian we must perform some permutes. */ static __inline__ vector signed int __attribute__((__always_inline__)) vec_sum2s(vector int __a, vector int __b) { #ifdef __LITTLE_ENDIAN__ vector int __c = (vector signed int)vec_perm( __b, __b, (vector unsigned char)(4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11)); __c = __builtin_altivec_vsum2sws(__a, __c); return (vector signed int)vec_perm( __c, __c, (vector unsigned char)(4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11)); #else return __builtin_altivec_vsum2sws(__a, __b); #endif } /* vec_vsum2sws */ static __inline__ vector signed int __attribute__((__always_inline__)) vec_vsum2sws(vector int __a, vector int __b) { #ifdef __LITTLE_ENDIAN__ vector int __c = (vector signed int)vec_perm( __b, __b, (vector unsigned char)(4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11)); __c = __builtin_altivec_vsum2sws(__a, __c); return (vector signed int)vec_perm( __c, __c, (vector unsigned char)(4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11)); #else return __builtin_altivec_vsum2sws(__a, __b); #endif } /* vec_sums */ /* The vsumsws instruction has a big-endian bias, so that the second input vector and the result always reference big-endian element 3 (little-endian element 0). For ease of porting the programmer wants element 3 in both cases, so for little endian we must perform some permutes. */ static __inline__ vector signed int __attribute__((__always_inline__)) vec_sums(vector signed int __a, vector signed int __b) { #ifdef __LITTLE_ENDIAN__ __b = (vector signed int)vec_splat(__b, 3); __b = __builtin_altivec_vsumsws(__a, __b); return (vector signed int)(0, 0, 0, __b[0]); #else return __builtin_altivec_vsumsws(__a, __b); #endif } /* vec_vsumsws */ static __inline__ vector signed int __attribute__((__always_inline__)) vec_vsumsws(vector signed int __a, vector signed int __b) { #ifdef __LITTLE_ENDIAN__ __b = (vector signed int)vec_splat(__b, 3); __b = __builtin_altivec_vsumsws(__a, __b); return (vector signed int)(0, 0, 0, __b[0]); #else return __builtin_altivec_vsumsws(__a, __b); #endif } /* vec_trunc */ static __inline__ vector float __ATTRS_o_ai vec_trunc(vector float __a) { #ifdef __VSX__ return __builtin_vsx_xvrspiz(__a); #else return __builtin_altivec_vrfiz(__a); #endif } #ifdef __VSX__ static __inline__ vector double __ATTRS_o_ai vec_trunc(vector double __a) { return __builtin_vsx_xvrdpiz(__a); } #endif /* vec_roundz */ static __inline__ vector float __ATTRS_o_ai vec_roundz(vector float __a) { return vec_trunc(__a); } #ifdef __VSX__ static __inline__ vector double __ATTRS_o_ai vec_roundz(vector double __a) { return vec_trunc(__a); } #endif /* vec_vrfiz */ static __inline__ vector float __attribute__((__always_inline__)) vec_vrfiz(vector float __a) { return __builtin_altivec_vrfiz(__a); } /* vec_unpackh */ /* The vector unpack instructions all have a big-endian bias, so for little endian we must reverse the meanings of "high" and "low." */ #ifdef __LITTLE_ENDIAN__ #define vec_vupkhpx(__a) __builtin_altivec_vupklpx((vector short)(__a)) #define vec_vupklpx(__a) __builtin_altivec_vupkhpx((vector short)(__a)) #else #define vec_vupkhpx(__a) __builtin_altivec_vupkhpx((vector short)(__a)) #define vec_vupklpx(__a) __builtin_altivec_vupklpx((vector short)(__a)) #endif static __inline__ vector short __ATTRS_o_ai vec_unpackh(vector signed char __a) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vupklsb((vector char)__a); #else return __builtin_altivec_vupkhsb((vector char)__a); #endif } static __inline__ vector bool short __ATTRS_o_ai vec_unpackh(vector bool char __a) { #ifdef __LITTLE_ENDIAN__ return (vector bool short)__builtin_altivec_vupklsb((vector char)__a); #else return (vector bool short)__builtin_altivec_vupkhsb((vector char)__a); #endif } static __inline__ vector int __ATTRS_o_ai vec_unpackh(vector short __a) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vupklsh(__a); #else return __builtin_altivec_vupkhsh(__a); #endif } static __inline__ vector bool int __ATTRS_o_ai vec_unpackh(vector bool short __a) { #ifdef __LITTLE_ENDIAN__ return (vector bool int)__builtin_altivec_vupklsh((vector short)__a); #else return (vector bool int)__builtin_altivec_vupkhsh((vector short)__a); #endif } static __inline__ vector unsigned int __ATTRS_o_ai vec_unpackh(vector pixel __a) { #ifdef __LITTLE_ENDIAN__ return (vector unsigned int)__builtin_altivec_vupklpx((vector short)__a); #else return (vector unsigned int)__builtin_altivec_vupkhpx((vector short)__a); #endif } #ifdef __POWER8_VECTOR__ static __inline__ vector long long __ATTRS_o_ai vec_unpackh(vector int __a) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vupklsw(__a); #else return __builtin_altivec_vupkhsw(__a); #endif } static __inline__ vector bool long long __ATTRS_o_ai vec_unpackh(vector bool int __a) { #ifdef __LITTLE_ENDIAN__ return (vector bool long long)__builtin_altivec_vupklsw((vector int)__a); #else return (vector bool long long)__builtin_altivec_vupkhsw((vector int)__a); #endif } static __inline__ vector double __ATTRS_o_ai vec_unpackh(vector float __a) { return (vector double)(__a[0], __a[1]); } #endif /* vec_vupkhsb */ static __inline__ vector short __ATTRS_o_ai vec_vupkhsb(vector signed char __a) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vupklsb((vector char)__a); #else return __builtin_altivec_vupkhsb((vector char)__a); #endif } static __inline__ vector bool short __ATTRS_o_ai vec_vupkhsb(vector bool char __a) { #ifdef __LITTLE_ENDIAN__ return (vector bool short)__builtin_altivec_vupklsb((vector char)__a); #else return (vector bool short)__builtin_altivec_vupkhsb((vector char)__a); #endif } /* vec_vupkhsh */ static __inline__ vector int __ATTRS_o_ai vec_vupkhsh(vector short __a) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vupklsh(__a); #else return __builtin_altivec_vupkhsh(__a); #endif } static __inline__ vector bool int __ATTRS_o_ai vec_vupkhsh(vector bool short __a) { #ifdef __LITTLE_ENDIAN__ return (vector bool int)__builtin_altivec_vupklsh((vector short)__a); #else return (vector bool int)__builtin_altivec_vupkhsh((vector short)__a); #endif } static __inline__ vector unsigned int __ATTRS_o_ai vec_vupkhsh(vector pixel __a) { #ifdef __LITTLE_ENDIAN__ return (vector unsigned int)__builtin_altivec_vupklpx((vector short)__a); #else return (vector unsigned int)__builtin_altivec_vupkhpx((vector short)__a); #endif } /* vec_vupkhsw */ #ifdef __POWER8_VECTOR__ static __inline__ vector long long __ATTRS_o_ai vec_vupkhsw(vector int __a) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vupklsw(__a); #else return __builtin_altivec_vupkhsw(__a); #endif } static __inline__ vector bool long long __ATTRS_o_ai vec_vupkhsw(vector bool int __a) { #ifdef __LITTLE_ENDIAN__ return (vector bool long long)__builtin_altivec_vupklsw((vector int)__a); #else return (vector bool long long)__builtin_altivec_vupkhsw((vector int)__a); #endif } #endif /* vec_unpackl */ static __inline__ vector short __ATTRS_o_ai vec_unpackl(vector signed char __a) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vupkhsb((vector char)__a); #else return __builtin_altivec_vupklsb((vector char)__a); #endif } static __inline__ vector bool short __ATTRS_o_ai vec_unpackl(vector bool char __a) { #ifdef __LITTLE_ENDIAN__ return (vector bool short)__builtin_altivec_vupkhsb((vector char)__a); #else return (vector bool short)__builtin_altivec_vupklsb((vector char)__a); #endif } static __inline__ vector int __ATTRS_o_ai vec_unpackl(vector short __a) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vupkhsh(__a); #else return __builtin_altivec_vupklsh(__a); #endif } static __inline__ vector bool int __ATTRS_o_ai vec_unpackl(vector bool short __a) { #ifdef __LITTLE_ENDIAN__ return (vector bool int)__builtin_altivec_vupkhsh((vector short)__a); #else return (vector bool int)__builtin_altivec_vupklsh((vector short)__a); #endif } static __inline__ vector unsigned int __ATTRS_o_ai vec_unpackl(vector pixel __a) { #ifdef __LITTLE_ENDIAN__ return (vector unsigned int)__builtin_altivec_vupkhpx((vector short)__a); #else return (vector unsigned int)__builtin_altivec_vupklpx((vector short)__a); #endif } #ifdef __POWER8_VECTOR__ static __inline__ vector long long __ATTRS_o_ai vec_unpackl(vector int __a) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vupkhsw(__a); #else return __builtin_altivec_vupklsw(__a); #endif } static __inline__ vector bool long long __ATTRS_o_ai vec_unpackl(vector bool int __a) { #ifdef __LITTLE_ENDIAN__ return (vector bool long long)__builtin_altivec_vupkhsw((vector int)__a); #else return (vector bool long long)__builtin_altivec_vupklsw((vector int)__a); #endif } static __inline__ vector double __ATTRS_o_ai vec_unpackl(vector float __a) { return (vector double)(__a[2], __a[3]); } #endif /* vec_vupklsb */ static __inline__ vector short __ATTRS_o_ai vec_vupklsb(vector signed char __a) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vupkhsb((vector char)__a); #else return __builtin_altivec_vupklsb((vector char)__a); #endif } static __inline__ vector bool short __ATTRS_o_ai vec_vupklsb(vector bool char __a) { #ifdef __LITTLE_ENDIAN__ return (vector bool short)__builtin_altivec_vupkhsb((vector char)__a); #else return (vector bool short)__builtin_altivec_vupklsb((vector char)__a); #endif } /* vec_vupklsh */ static __inline__ vector int __ATTRS_o_ai vec_vupklsh(vector short __a) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vupkhsh(__a); #else return __builtin_altivec_vupklsh(__a); #endif } static __inline__ vector bool int __ATTRS_o_ai vec_vupklsh(vector bool short __a) { #ifdef __LITTLE_ENDIAN__ return (vector bool int)__builtin_altivec_vupkhsh((vector short)__a); #else return (vector bool int)__builtin_altivec_vupklsh((vector short)__a); #endif } static __inline__ vector unsigned int __ATTRS_o_ai vec_vupklsh(vector pixel __a) { #ifdef __LITTLE_ENDIAN__ return (vector unsigned int)__builtin_altivec_vupkhpx((vector short)__a); #else return (vector unsigned int)__builtin_altivec_vupklpx((vector short)__a); #endif } /* vec_vupklsw */ #ifdef __POWER8_VECTOR__ static __inline__ vector long long __ATTRS_o_ai vec_vupklsw(vector int __a) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vupkhsw(__a); #else return __builtin_altivec_vupklsw(__a); #endif } static __inline__ vector bool long long __ATTRS_o_ai vec_vupklsw(vector bool int __a) { #ifdef __LITTLE_ENDIAN__ return (vector bool long long)__builtin_altivec_vupkhsw((vector int)__a); #else return (vector bool long long)__builtin_altivec_vupklsw((vector int)__a); #endif } #endif /* vec_vsx_ld */ #ifdef __VSX__ static __inline__ vector bool int __ATTRS_o_ai vec_vsx_ld(int __a, const vector bool int *__b) { return (vector bool int)__builtin_vsx_lxvw4x(__a, __b); } static __inline__ vector signed int __ATTRS_o_ai vec_vsx_ld(int __a, const vector signed int *__b) { return (vector signed int)__builtin_vsx_lxvw4x(__a, __b); } static __inline__ vector signed int __ATTRS_o_ai vec_vsx_ld(int __a, const signed int *__b) { return (vector signed int)__builtin_vsx_lxvw4x(__a, __b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_vsx_ld(int __a, const vector unsigned int *__b) { return (vector unsigned int)__builtin_vsx_lxvw4x(__a, __b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_vsx_ld(int __a, const unsigned int *__b) { return (vector unsigned int)__builtin_vsx_lxvw4x(__a, __b); } static __inline__ vector float __ATTRS_o_ai vec_vsx_ld(int __a, const vector float *__b) { return (vector float)__builtin_vsx_lxvw4x(__a, __b); } static __inline__ vector float __ATTRS_o_ai vec_vsx_ld(int __a, const float *__b) { return (vector float)__builtin_vsx_lxvw4x(__a, __b); } static __inline__ vector signed long long __ATTRS_o_ai vec_vsx_ld(int __a, const vector signed long long *__b) { return (vector signed long long)__builtin_vsx_lxvd2x(__a, __b); } static __inline__ vector unsigned long long __ATTRS_o_ai vec_vsx_ld(int __a, const vector unsigned long long *__b) { return (vector unsigned long long)__builtin_vsx_lxvd2x(__a, __b); } static __inline__ vector double __ATTRS_o_ai vec_vsx_ld(int __a, const vector double *__b) { return (vector double)__builtin_vsx_lxvd2x(__a, __b); } static __inline__ vector double __ATTRS_o_ai vec_vsx_ld(int __a, const double *__b) { return (vector double)__builtin_vsx_lxvd2x(__a, __b); } static __inline__ vector bool short __ATTRS_o_ai vec_vsx_ld(int __a, const vector bool short *__b) { return (vector bool short)__builtin_vsx_lxvw4x(__a, __b); } static __inline__ vector signed short __ATTRS_o_ai vec_vsx_ld(int __a, const vector signed short *__b) { return (vector signed short)__builtin_vsx_lxvw4x(__a, __b); } static __inline__ vector signed short __ATTRS_o_ai vec_vsx_ld(int __a, const signed short *__b) { return (vector signed short)__builtin_vsx_lxvw4x(__a, __b); } static __inline__ vector unsigned short __ATTRS_o_ai vec_vsx_ld(int __a, const vector unsigned short *__b) { return (vector unsigned short)__builtin_vsx_lxvw4x(__a, __b); } static __inline__ vector unsigned short __ATTRS_o_ai vec_vsx_ld(int __a, const unsigned short *__b) { return (vector unsigned short)__builtin_vsx_lxvw4x(__a, __b); } static __inline__ vector bool char __ATTRS_o_ai vec_vsx_ld(int __a, const vector bool char *__b) { return (vector bool char)__builtin_vsx_lxvw4x(__a, __b); } static __inline__ vector signed char __ATTRS_o_ai vec_vsx_ld(int __a, const vector signed char *__b) { return (vector signed char)__builtin_vsx_lxvw4x(__a, __b); } static __inline__ vector signed char __ATTRS_o_ai vec_vsx_ld(int __a, const signed char *__b) { return (vector signed char)__builtin_vsx_lxvw4x(__a, __b); } static __inline__ vector unsigned char __ATTRS_o_ai vec_vsx_ld(int __a, const vector unsigned char *__b) { return (vector unsigned char)__builtin_vsx_lxvw4x(__a, __b); } static __inline__ vector unsigned char __ATTRS_o_ai vec_vsx_ld(int __a, const unsigned char *__b) { return (vector unsigned char)__builtin_vsx_lxvw4x(__a, __b); } #endif /* vec_vsx_st */ #ifdef __VSX__ static __inline__ void __ATTRS_o_ai vec_vsx_st(vector bool int __a, int __b, vector bool int *__c) { __builtin_vsx_stxvw4x((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_vsx_st(vector bool int __a, int __b, signed int *__c) { __builtin_vsx_stxvw4x((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_vsx_st(vector bool int __a, int __b, unsigned int *__c) { __builtin_vsx_stxvw4x((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_vsx_st(vector signed int __a, int __b, vector signed int *__c) { __builtin_vsx_stxvw4x((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_vsx_st(vector signed int __a, int __b, signed int *__c) { __builtin_vsx_stxvw4x((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_vsx_st(vector unsigned int __a, int __b, vector unsigned int *__c) { __builtin_vsx_stxvw4x((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_vsx_st(vector unsigned int __a, int __b, unsigned int *__c) { __builtin_vsx_stxvw4x((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_vsx_st(vector float __a, int __b, vector float *__c) { __builtin_vsx_stxvw4x((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_vsx_st(vector float __a, int __b, float *__c) { __builtin_vsx_stxvw4x((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_vsx_st(vector signed long long __a, int __b, vector signed long long *__c) { __builtin_vsx_stxvd2x((vector double)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_vsx_st(vector unsigned long long __a, int __b, vector unsigned long long *__c) { __builtin_vsx_stxvd2x((vector double)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_vsx_st(vector double __a, int __b, vector double *__c) { __builtin_vsx_stxvd2x((vector double)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_vsx_st(vector double __a, int __b, double *__c) { __builtin_vsx_stxvd2x((vector double)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_vsx_st(vector bool short __a, int __b, vector bool short *__c) { __builtin_vsx_stxvw4x((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_vsx_st(vector bool short __a, int __b, signed short *__c) { __builtin_vsx_stxvw4x((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_vsx_st(vector bool short __a, int __b, unsigned short *__c) { __builtin_vsx_stxvw4x((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_vsx_st(vector signed short __a, int __b, vector signed short *__c) { __builtin_vsx_stxvw4x((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_vsx_st(vector signed short __a, int __b, signed short *__c) { __builtin_vsx_stxvw4x((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_vsx_st(vector unsigned short __a, int __b, vector unsigned short *__c) { __builtin_vsx_stxvw4x((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_vsx_st(vector unsigned short __a, int __b, unsigned short *__c) { __builtin_vsx_stxvw4x((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_vsx_st(vector bool char __a, int __b, vector bool char *__c) { __builtin_vsx_stxvw4x((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_vsx_st(vector bool char __a, int __b, signed char *__c) { __builtin_vsx_stxvw4x((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_vsx_st(vector bool char __a, int __b, unsigned char *__c) { __builtin_vsx_stxvw4x((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_vsx_st(vector signed char __a, int __b, vector signed char *__c) { __builtin_vsx_stxvw4x((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_vsx_st(vector signed char __a, int __b, signed char *__c) { __builtin_vsx_stxvw4x((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_vsx_st(vector unsigned char __a, int __b, vector unsigned char *__c) { __builtin_vsx_stxvw4x((vector int)__a, __b, __c); } static __inline__ void __ATTRS_o_ai vec_vsx_st(vector unsigned char __a, int __b, unsigned char *__c) { __builtin_vsx_stxvw4x((vector int)__a, __b, __c); } #endif #ifdef __VSX__ #define vec_xxpermdi __builtin_vsx_xxpermdi #define vec_xxsldwi __builtin_vsx_xxsldwi #define vec_permi(__a, __b, __c) \ _Generic((__a), vector signed long long \ : __builtin_shufflevector((__a), (__b), (((__c) >> 1) & 0x1), \ (((__c)&0x1) + 2)), \ vector unsigned long long \ : __builtin_shufflevector((__a), (__b), (((__c) >> 1) & 0x1), \ (((__c)&0x1) + 2)), \ vector double \ : __builtin_shufflevector((__a), (__b), (((__c) >> 1) & 0x1), \ (((__c)&0x1) + 2))) #endif /* vec_xor */ #define __builtin_altivec_vxor vec_xor static __inline__ vector signed char __ATTRS_o_ai vec_xor(vector signed char __a, vector signed char __b) { return __a ^ __b; } static __inline__ vector signed char __ATTRS_o_ai vec_xor(vector bool char __a, vector signed char __b) { return (vector signed char)__a ^ __b; } static __inline__ vector signed char __ATTRS_o_ai vec_xor(vector signed char __a, vector bool char __b) { return __a ^ (vector signed char)__b; } static __inline__ vector unsigned char __ATTRS_o_ai vec_xor(vector unsigned char __a, vector unsigned char __b) { return __a ^ __b; } static __inline__ vector unsigned char __ATTRS_o_ai vec_xor(vector bool char __a, vector unsigned char __b) { return (vector unsigned char)__a ^ __b; } static __inline__ vector unsigned char __ATTRS_o_ai vec_xor(vector unsigned char __a, vector bool char __b) { return __a ^ (vector unsigned char)__b; } static __inline__ vector bool char __ATTRS_o_ai vec_xor(vector bool char __a, vector bool char __b) { return __a ^ __b; } static __inline__ vector short __ATTRS_o_ai vec_xor(vector short __a, vector short __b) { return __a ^ __b; } static __inline__ vector short __ATTRS_o_ai vec_xor(vector bool short __a, vector short __b) { return (vector short)__a ^ __b; } static __inline__ vector short __ATTRS_o_ai vec_xor(vector short __a, vector bool short __b) { return __a ^ (vector short)__b; } static __inline__ vector unsigned short __ATTRS_o_ai vec_xor(vector unsigned short __a, vector unsigned short __b) { return __a ^ __b; } static __inline__ vector unsigned short __ATTRS_o_ai vec_xor(vector bool short __a, vector unsigned short __b) { return (vector unsigned short)__a ^ __b; } static __inline__ vector unsigned short __ATTRS_o_ai vec_xor(vector unsigned short __a, vector bool short __b) { return __a ^ (vector unsigned short)__b; } static __inline__ vector bool short __ATTRS_o_ai vec_xor(vector bool short __a, vector bool short __b) { return __a ^ __b; } static __inline__ vector int __ATTRS_o_ai vec_xor(vector int __a, vector int __b) { return __a ^ __b; } static __inline__ vector int __ATTRS_o_ai vec_xor(vector bool int __a, vector int __b) { return (vector int)__a ^ __b; } static __inline__ vector int __ATTRS_o_ai vec_xor(vector int __a, vector bool int __b) { return __a ^ (vector int)__b; } static __inline__ vector unsigned int __ATTRS_o_ai vec_xor(vector unsigned int __a, vector unsigned int __b) { return __a ^ __b; } static __inline__ vector unsigned int __ATTRS_o_ai vec_xor(vector bool int __a, vector unsigned int __b) { return (vector unsigned int)__a ^ __b; } static __inline__ vector unsigned int __ATTRS_o_ai vec_xor(vector unsigned int __a, vector bool int __b) { return __a ^ (vector unsigned int)__b; } static __inline__ vector bool int __ATTRS_o_ai vec_xor(vector bool int __a, vector bool int __b) { return __a ^ __b; } static __inline__ vector float __ATTRS_o_ai vec_xor(vector float __a, vector float __b) { vector unsigned int __res = (vector unsigned int)__a ^ (vector unsigned int)__b; return (vector float)__res; } static __inline__ vector float __ATTRS_o_ai vec_xor(vector bool int __a, vector float __b) { vector unsigned int __res = (vector unsigned int)__a ^ (vector unsigned int)__b; return (vector float)__res; } static __inline__ vector float __ATTRS_o_ai vec_xor(vector float __a, vector bool int __b) { vector unsigned int __res = (vector unsigned int)__a ^ (vector unsigned int)__b; return (vector float)__res; } #ifdef __VSX__ static __inline__ vector signed long long __ATTRS_o_ai vec_xor(vector signed long long __a, vector signed long long __b) { return __a ^ __b; } static __inline__ vector signed long long __ATTRS_o_ai vec_xor(vector bool long long __a, vector signed long long __b) { return (vector signed long long)__a ^ __b; } static __inline__ vector signed long long __ATTRS_o_ai vec_xor(vector signed long long __a, vector bool long long __b) { return __a ^ (vector signed long long)__b; } static __inline__ vector unsigned long long __ATTRS_o_ai vec_xor(vector unsigned long long __a, vector unsigned long long __b) { return __a ^ __b; } static __inline__ vector unsigned long long __ATTRS_o_ai vec_xor(vector bool long long __a, vector unsigned long long __b) { return (vector unsigned long long)__a ^ __b; } static __inline__ vector unsigned long long __ATTRS_o_ai vec_xor(vector unsigned long long __a, vector bool long long __b) { return __a ^ (vector unsigned long long)__b; } static __inline__ vector bool long long __ATTRS_o_ai vec_xor(vector bool long long __a, vector bool long long __b) { return __a ^ __b; } static __inline__ vector double __ATTRS_o_ai vec_xor(vector double __a, vector double __b) { return (vector double)((vector unsigned long long)__a ^ (vector unsigned long long)__b); } static __inline__ vector double __ATTRS_o_ai vec_xor(vector double __a, vector bool long long __b) { return (vector double)((vector unsigned long long)__a ^ (vector unsigned long long)__b); } static __inline__ vector double __ATTRS_o_ai vec_xor(vector bool long long __a, vector double __b) { return (vector double)((vector unsigned long long)__a ^ (vector unsigned long long)__b); } #endif /* vec_vxor */ static __inline__ vector signed char __ATTRS_o_ai vec_vxor(vector signed char __a, vector signed char __b) { return __a ^ __b; } static __inline__ vector signed char __ATTRS_o_ai vec_vxor(vector bool char __a, vector signed char __b) { return (vector signed char)__a ^ __b; } static __inline__ vector signed char __ATTRS_o_ai vec_vxor(vector signed char __a, vector bool char __b) { return __a ^ (vector signed char)__b; } static __inline__ vector unsigned char __ATTRS_o_ai vec_vxor(vector unsigned char __a, vector unsigned char __b) { return __a ^ __b; } static __inline__ vector unsigned char __ATTRS_o_ai vec_vxor(vector bool char __a, vector unsigned char __b) { return (vector unsigned char)__a ^ __b; } static __inline__ vector unsigned char __ATTRS_o_ai vec_vxor(vector unsigned char __a, vector bool char __b) { return __a ^ (vector unsigned char)__b; } static __inline__ vector bool char __ATTRS_o_ai vec_vxor(vector bool char __a, vector bool char __b) { return __a ^ __b; } static __inline__ vector short __ATTRS_o_ai vec_vxor(vector short __a, vector short __b) { return __a ^ __b; } static __inline__ vector short __ATTRS_o_ai vec_vxor(vector bool short __a, vector short __b) { return (vector short)__a ^ __b; } static __inline__ vector short __ATTRS_o_ai vec_vxor(vector short __a, vector bool short __b) { return __a ^ (vector short)__b; } static __inline__ vector unsigned short __ATTRS_o_ai vec_vxor(vector unsigned short __a, vector unsigned short __b) { return __a ^ __b; } static __inline__ vector unsigned short __ATTRS_o_ai vec_vxor(vector bool short __a, vector unsigned short __b) { return (vector unsigned short)__a ^ __b; } static __inline__ vector unsigned short __ATTRS_o_ai vec_vxor(vector unsigned short __a, vector bool short __b) { return __a ^ (vector unsigned short)__b; } static __inline__ vector bool short __ATTRS_o_ai vec_vxor(vector bool short __a, vector bool short __b) { return __a ^ __b; } static __inline__ vector int __ATTRS_o_ai vec_vxor(vector int __a, vector int __b) { return __a ^ __b; } static __inline__ vector int __ATTRS_o_ai vec_vxor(vector bool int __a, vector int __b) { return (vector int)__a ^ __b; } static __inline__ vector int __ATTRS_o_ai vec_vxor(vector int __a, vector bool int __b) { return __a ^ (vector int)__b; } static __inline__ vector unsigned int __ATTRS_o_ai vec_vxor(vector unsigned int __a, vector unsigned int __b) { return __a ^ __b; } static __inline__ vector unsigned int __ATTRS_o_ai vec_vxor(vector bool int __a, vector unsigned int __b) { return (vector unsigned int)__a ^ __b; } static __inline__ vector unsigned int __ATTRS_o_ai vec_vxor(vector unsigned int __a, vector bool int __b) { return __a ^ (vector unsigned int)__b; } static __inline__ vector bool int __ATTRS_o_ai vec_vxor(vector bool int __a, vector bool int __b) { return __a ^ __b; } static __inline__ vector float __ATTRS_o_ai vec_vxor(vector float __a, vector float __b) { vector unsigned int __res = (vector unsigned int)__a ^ (vector unsigned int)__b; return (vector float)__res; } static __inline__ vector float __ATTRS_o_ai vec_vxor(vector bool int __a, vector float __b) { vector unsigned int __res = (vector unsigned int)__a ^ (vector unsigned int)__b; return (vector float)__res; } static __inline__ vector float __ATTRS_o_ai vec_vxor(vector float __a, vector bool int __b) { vector unsigned int __res = (vector unsigned int)__a ^ (vector unsigned int)__b; return (vector float)__res; } #ifdef __VSX__ static __inline__ vector signed long long __ATTRS_o_ai vec_vxor(vector signed long long __a, vector signed long long __b) { return __a ^ __b; } static __inline__ vector signed long long __ATTRS_o_ai vec_vxor(vector bool long long __a, vector signed long long __b) { return (vector signed long long)__a ^ __b; } static __inline__ vector signed long long __ATTRS_o_ai vec_vxor(vector signed long long __a, vector bool long long __b) { return __a ^ (vector signed long long)__b; } static __inline__ vector unsigned long long __ATTRS_o_ai vec_vxor(vector unsigned long long __a, vector unsigned long long __b) { return __a ^ __b; } static __inline__ vector unsigned long long __ATTRS_o_ai vec_vxor(vector bool long long __a, vector unsigned long long __b) { return (vector unsigned long long)__a ^ __b; } static __inline__ vector unsigned long long __ATTRS_o_ai vec_vxor(vector unsigned long long __a, vector bool long long __b) { return __a ^ (vector unsigned long long)__b; } static __inline__ vector bool long long __ATTRS_o_ai vec_vxor(vector bool long long __a, vector bool long long __b) { return __a ^ __b; } #endif /* ------------------------ extensions for CBEA ----------------------------- */ /* vec_extract */ static __inline__ signed char __ATTRS_o_ai vec_extract(vector signed char __a, signed int __b) { return __a[__b & 0xf]; } static __inline__ unsigned char __ATTRS_o_ai vec_extract(vector unsigned char __a, signed int __b) { return __a[__b & 0xf]; } static __inline__ unsigned char __ATTRS_o_ai vec_extract(vector bool char __a, signed int __b) { return __a[__b & 0xf]; } static __inline__ signed short __ATTRS_o_ai vec_extract(vector signed short __a, signed int __b) { return __a[__b & 0x7]; } static __inline__ unsigned short __ATTRS_o_ai vec_extract(vector unsigned short __a, signed int __b) { return __a[__b & 0x7]; } static __inline__ unsigned short __ATTRS_o_ai vec_extract(vector bool short __a, signed int __b) { return __a[__b & 0x7]; } static __inline__ signed int __ATTRS_o_ai vec_extract(vector signed int __a, signed int __b) { return __a[__b & 0x3]; } static __inline__ unsigned int __ATTRS_o_ai vec_extract(vector unsigned int __a, signed int __b) { return __a[__b & 0x3]; } static __inline__ unsigned int __ATTRS_o_ai vec_extract(vector bool int __a, signed int __b) { return __a[__b & 0x3]; } #ifdef __VSX__ static __inline__ signed long long __ATTRS_o_ai vec_extract(vector signed long long __a, signed int __b) { return __a[__b & 0x1]; } static __inline__ unsigned long long __ATTRS_o_ai vec_extract(vector unsigned long long __a, signed int __b) { return __a[__b & 0x1]; } static __inline__ unsigned long long __ATTRS_o_ai vec_extract(vector bool long long __a, signed int __b) { return __a[__b & 0x1]; } static __inline__ double __ATTRS_o_ai vec_extract(vector double __a, signed int __b) { return __a[__b & 0x1]; } #endif static __inline__ float __ATTRS_o_ai vec_extract(vector float __a, signed int __b) { return __a[__b & 0x3]; } #ifdef __POWER9_VECTOR__ #define vec_insert4b __builtin_vsx_insertword #define vec_extract4b __builtin_vsx_extractuword /* vec_extract_exp */ static __inline__ vector unsigned int __ATTRS_o_ai vec_extract_exp(vector float __a) { return __builtin_vsx_xvxexpsp(__a); } static __inline__ vector unsigned long long __ATTRS_o_ai vec_extract_exp(vector double __a) { return __builtin_vsx_xvxexpdp(__a); } /* vec_extract_sig */ static __inline__ vector unsigned int __ATTRS_o_ai vec_extract_sig(vector float __a) { return __builtin_vsx_xvxsigsp(__a); } static __inline__ vector unsigned long long __ATTRS_o_ai vec_extract_sig (vector double __a) { return __builtin_vsx_xvxsigdp(__a); } static __inline__ vector float __ATTRS_o_ai vec_extract_fp32_from_shorth(vector unsigned short __a) { vector unsigned short __b = #ifdef __LITTLE_ENDIAN__ __builtin_shufflevector(__a, __a, 0, -1, 1, -1, 2, -1, 3, -1); #else __builtin_shufflevector(__a, __a, -1, 0, -1, 1, -1, 2, -1, 3); #endif return __builtin_vsx_xvcvhpsp(__b); } static __inline__ vector float __ATTRS_o_ai vec_extract_fp32_from_shortl(vector unsigned short __a) { vector unsigned short __b = #ifdef __LITTLE_ENDIAN__ __builtin_shufflevector(__a, __a, 4, -1, 5, -1, 6, -1, 7, -1); #else __builtin_shufflevector(__a, __a, -1, 4, -1, 5, -1, 6, -1, 7); #endif return __builtin_vsx_xvcvhpsp(__b); } #endif /* __POWER9_VECTOR__ */ /* vec_insert */ static __inline__ vector signed char __ATTRS_o_ai vec_insert(signed char __a, vector signed char __b, int __c) { __b[__c & 0xF] = __a; return __b; } static __inline__ vector unsigned char __ATTRS_o_ai vec_insert(unsigned char __a, vector unsigned char __b, int __c) { __b[__c & 0xF] = __a; return __b; } static __inline__ vector bool char __ATTRS_o_ai vec_insert(unsigned char __a, vector bool char __b, int __c) { __b[__c & 0xF] = __a; return __b; } static __inline__ vector signed short __ATTRS_o_ai vec_insert(signed short __a, vector signed short __b, int __c) { __b[__c & 0x7] = __a; return __b; } static __inline__ vector unsigned short __ATTRS_o_ai vec_insert(unsigned short __a, vector unsigned short __b, int __c) { __b[__c & 0x7] = __a; return __b; } static __inline__ vector bool short __ATTRS_o_ai vec_insert(unsigned short __a, vector bool short __b, int __c) { __b[__c & 0x7] = __a; return __b; } static __inline__ vector signed int __ATTRS_o_ai vec_insert(signed int __a, vector signed int __b, int __c) { __b[__c & 0x3] = __a; return __b; } static __inline__ vector unsigned int __ATTRS_o_ai vec_insert(unsigned int __a, vector unsigned int __b, int __c) { __b[__c & 0x3] = __a; return __b; } static __inline__ vector bool int __ATTRS_o_ai vec_insert(unsigned int __a, vector bool int __b, int __c) { __b[__c & 0x3] = __a; return __b; } #ifdef __VSX__ static __inline__ vector signed long long __ATTRS_o_ai vec_insert(signed long long __a, vector signed long long __b, int __c) { __b[__c & 0x1] = __a; return __b; } static __inline__ vector unsigned long long __ATTRS_o_ai vec_insert(unsigned long long __a, vector unsigned long long __b, int __c) { __b[__c & 0x1] = __a; return __b; } static __inline__ vector bool long long __ATTRS_o_ai vec_insert(unsigned long long __a, vector bool long long __b, int __c) { __b[__c & 0x1] = __a; return __b; } static __inline__ vector double __ATTRS_o_ai vec_insert(double __a, vector double __b, int __c) { __b[__c & 0x1] = __a; return __b; } #endif static __inline__ vector float __ATTRS_o_ai vec_insert(float __a, vector float __b, int __c) { __b[__c & 0x3] = __a; return __b; } /* vec_lvlx */ static __inline__ vector signed char __ATTRS_o_ai vec_lvlx(int __a, const signed char *__b) { return vec_perm(vec_ld(__a, __b), (vector signed char)(0), vec_lvsl(__a, __b)); } static __inline__ vector signed char __ATTRS_o_ai vec_lvlx(int __a, const vector signed char *__b) { return vec_perm(vec_ld(__a, __b), (vector signed char)(0), vec_lvsl(__a, (unsigned char *)__b)); } static __inline__ vector unsigned char __ATTRS_o_ai vec_lvlx(int __a, const unsigned char *__b) { return vec_perm(vec_ld(__a, __b), (vector unsigned char)(0), vec_lvsl(__a, __b)); } static __inline__ vector unsigned char __ATTRS_o_ai vec_lvlx(int __a, const vector unsigned char *__b) { return vec_perm(vec_ld(__a, __b), (vector unsigned char)(0), vec_lvsl(__a, (unsigned char *)__b)); } static __inline__ vector bool char __ATTRS_o_ai vec_lvlx(int __a, const vector bool char *__b) { return vec_perm(vec_ld(__a, __b), (vector bool char)(0), vec_lvsl(__a, (unsigned char *)__b)); } static __inline__ vector short __ATTRS_o_ai vec_lvlx(int __a, const short *__b) { return vec_perm(vec_ld(__a, __b), (vector short)(0), vec_lvsl(__a, __b)); } static __inline__ vector short __ATTRS_o_ai vec_lvlx(int __a, const vector short *__b) { return vec_perm(vec_ld(__a, __b), (vector short)(0), vec_lvsl(__a, (unsigned char *)__b)); } static __inline__ vector unsigned short __ATTRS_o_ai vec_lvlx(int __a, const unsigned short *__b) { return vec_perm(vec_ld(__a, __b), (vector unsigned short)(0), vec_lvsl(__a, __b)); } static __inline__ vector unsigned short __ATTRS_o_ai vec_lvlx(int __a, const vector unsigned short *__b) { return vec_perm(vec_ld(__a, __b), (vector unsigned short)(0), vec_lvsl(__a, (unsigned char *)__b)); } static __inline__ vector bool short __ATTRS_o_ai vec_lvlx(int __a, const vector bool short *__b) { return vec_perm(vec_ld(__a, __b), (vector bool short)(0), vec_lvsl(__a, (unsigned char *)__b)); } static __inline__ vector pixel __ATTRS_o_ai vec_lvlx(int __a, const vector pixel *__b) { return vec_perm(vec_ld(__a, __b), (vector pixel)(0), vec_lvsl(__a, (unsigned char *)__b)); } static __inline__ vector int __ATTRS_o_ai vec_lvlx(int __a, const int *__b) { return vec_perm(vec_ld(__a, __b), (vector int)(0), vec_lvsl(__a, __b)); } static __inline__ vector int __ATTRS_o_ai vec_lvlx(int __a, const vector int *__b) { return vec_perm(vec_ld(__a, __b), (vector int)(0), vec_lvsl(__a, (unsigned char *)__b)); } static __inline__ vector unsigned int __ATTRS_o_ai vec_lvlx(int __a, const unsigned int *__b) { return vec_perm(vec_ld(__a, __b), (vector unsigned int)(0), vec_lvsl(__a, __b)); } static __inline__ vector unsigned int __ATTRS_o_ai vec_lvlx(int __a, const vector unsigned int *__b) { return vec_perm(vec_ld(__a, __b), (vector unsigned int)(0), vec_lvsl(__a, (unsigned char *)__b)); } static __inline__ vector bool int __ATTRS_o_ai vec_lvlx(int __a, const vector bool int *__b) { return vec_perm(vec_ld(__a, __b), (vector bool int)(0), vec_lvsl(__a, (unsigned char *)__b)); } static __inline__ vector float __ATTRS_o_ai vec_lvlx(int __a, const float *__b) { return vec_perm(vec_ld(__a, __b), (vector float)(0), vec_lvsl(__a, __b)); } static __inline__ vector float __ATTRS_o_ai vec_lvlx(int __a, const vector float *__b) { return vec_perm(vec_ld(__a, __b), (vector float)(0), vec_lvsl(__a, (unsigned char *)__b)); } /* vec_lvlxl */ static __inline__ vector signed char __ATTRS_o_ai vec_lvlxl(int __a, const signed char *__b) { return vec_perm(vec_ldl(__a, __b), (vector signed char)(0), vec_lvsl(__a, __b)); } static __inline__ vector signed char __ATTRS_o_ai vec_lvlxl(int __a, const vector signed char *__b) { return vec_perm(vec_ldl(__a, __b), (vector signed char)(0), vec_lvsl(__a, (unsigned char *)__b)); } static __inline__ vector unsigned char __ATTRS_o_ai vec_lvlxl(int __a, const unsigned char *__b) { return vec_perm(vec_ldl(__a, __b), (vector unsigned char)(0), vec_lvsl(__a, __b)); } static __inline__ vector unsigned char __ATTRS_o_ai vec_lvlxl(int __a, const vector unsigned char *__b) { return vec_perm(vec_ldl(__a, __b), (vector unsigned char)(0), vec_lvsl(__a, (unsigned char *)__b)); } static __inline__ vector bool char __ATTRS_o_ai vec_lvlxl(int __a, const vector bool char *__b) { return vec_perm(vec_ldl(__a, __b), (vector bool char)(0), vec_lvsl(__a, (unsigned char *)__b)); } static __inline__ vector short __ATTRS_o_ai vec_lvlxl(int __a, const short *__b) { return vec_perm(vec_ldl(__a, __b), (vector short)(0), vec_lvsl(__a, __b)); } static __inline__ vector short __ATTRS_o_ai vec_lvlxl(int __a, const vector short *__b) { return vec_perm(vec_ldl(__a, __b), (vector short)(0), vec_lvsl(__a, (unsigned char *)__b)); } static __inline__ vector unsigned short __ATTRS_o_ai vec_lvlxl(int __a, const unsigned short *__b) { return vec_perm(vec_ldl(__a, __b), (vector unsigned short)(0), vec_lvsl(__a, __b)); } static __inline__ vector unsigned short __ATTRS_o_ai vec_lvlxl(int __a, const vector unsigned short *__b) { return vec_perm(vec_ldl(__a, __b), (vector unsigned short)(0), vec_lvsl(__a, (unsigned char *)__b)); } static __inline__ vector bool short __ATTRS_o_ai vec_lvlxl(int __a, const vector bool short *__b) { return vec_perm(vec_ldl(__a, __b), (vector bool short)(0), vec_lvsl(__a, (unsigned char *)__b)); } static __inline__ vector pixel __ATTRS_o_ai vec_lvlxl(int __a, const vector pixel *__b) { return vec_perm(vec_ldl(__a, __b), (vector pixel)(0), vec_lvsl(__a, (unsigned char *)__b)); } static __inline__ vector int __ATTRS_o_ai vec_lvlxl(int __a, const int *__b) { return vec_perm(vec_ldl(__a, __b), (vector int)(0), vec_lvsl(__a, __b)); } static __inline__ vector int __ATTRS_o_ai vec_lvlxl(int __a, const vector int *__b) { return vec_perm(vec_ldl(__a, __b), (vector int)(0), vec_lvsl(__a, (unsigned char *)__b)); } static __inline__ vector unsigned int __ATTRS_o_ai vec_lvlxl(int __a, const unsigned int *__b) { return vec_perm(vec_ldl(__a, __b), (vector unsigned int)(0), vec_lvsl(__a, __b)); } static __inline__ vector unsigned int __ATTRS_o_ai vec_lvlxl(int __a, const vector unsigned int *__b) { return vec_perm(vec_ldl(__a, __b), (vector unsigned int)(0), vec_lvsl(__a, (unsigned char *)__b)); } static __inline__ vector bool int __ATTRS_o_ai vec_lvlxl(int __a, const vector bool int *__b) { return vec_perm(vec_ldl(__a, __b), (vector bool int)(0), vec_lvsl(__a, (unsigned char *)__b)); } static __inline__ vector float __ATTRS_o_ai vec_lvlxl(int __a, const float *__b) { return vec_perm(vec_ldl(__a, __b), (vector float)(0), vec_lvsl(__a, __b)); } static __inline__ vector float __ATTRS_o_ai vec_lvlxl(int __a, vector float *__b) { return vec_perm(vec_ldl(__a, __b), (vector float)(0), vec_lvsl(__a, (unsigned char *)__b)); } /* vec_lvrx */ static __inline__ vector signed char __ATTRS_o_ai vec_lvrx(int __a, const signed char *__b) { return vec_perm((vector signed char)(0), vec_ld(__a, __b), vec_lvsl(__a, __b)); } static __inline__ vector signed char __ATTRS_o_ai vec_lvrx(int __a, const vector signed char *__b) { return vec_perm((vector signed char)(0), vec_ld(__a, __b), vec_lvsl(__a, (unsigned char *)__b)); } static __inline__ vector unsigned char __ATTRS_o_ai vec_lvrx(int __a, const unsigned char *__b) { return vec_perm((vector unsigned char)(0), vec_ld(__a, __b), vec_lvsl(__a, __b)); } static __inline__ vector unsigned char __ATTRS_o_ai vec_lvrx(int __a, const vector unsigned char *__b) { return vec_perm((vector unsigned char)(0), vec_ld(__a, __b), vec_lvsl(__a, (unsigned char *)__b)); } static __inline__ vector bool char __ATTRS_o_ai vec_lvrx(int __a, const vector bool char *__b) { return vec_perm((vector bool char)(0), vec_ld(__a, __b), vec_lvsl(__a, (unsigned char *)__b)); } static __inline__ vector short __ATTRS_o_ai vec_lvrx(int __a, const short *__b) { return vec_perm((vector short)(0), vec_ld(__a, __b), vec_lvsl(__a, __b)); } static __inline__ vector short __ATTRS_o_ai vec_lvrx(int __a, const vector short *__b) { return vec_perm((vector short)(0), vec_ld(__a, __b), vec_lvsl(__a, (unsigned char *)__b)); } static __inline__ vector unsigned short __ATTRS_o_ai vec_lvrx(int __a, const unsigned short *__b) { return vec_perm((vector unsigned short)(0), vec_ld(__a, __b), vec_lvsl(__a, __b)); } static __inline__ vector unsigned short __ATTRS_o_ai vec_lvrx(int __a, const vector unsigned short *__b) { return vec_perm((vector unsigned short)(0), vec_ld(__a, __b), vec_lvsl(__a, (unsigned char *)__b)); } static __inline__ vector bool short __ATTRS_o_ai vec_lvrx(int __a, const vector bool short *__b) { return vec_perm((vector bool short)(0), vec_ld(__a, __b), vec_lvsl(__a, (unsigned char *)__b)); } static __inline__ vector pixel __ATTRS_o_ai vec_lvrx(int __a, const vector pixel *__b) { return vec_perm((vector pixel)(0), vec_ld(__a, __b), vec_lvsl(__a, (unsigned char *)__b)); } static __inline__ vector int __ATTRS_o_ai vec_lvrx(int __a, const int *__b) { return vec_perm((vector int)(0), vec_ld(__a, __b), vec_lvsl(__a, __b)); } static __inline__ vector int __ATTRS_o_ai vec_lvrx(int __a, const vector int *__b) { return vec_perm((vector int)(0), vec_ld(__a, __b), vec_lvsl(__a, (unsigned char *)__b)); } static __inline__ vector unsigned int __ATTRS_o_ai vec_lvrx(int __a, const unsigned int *__b) { return vec_perm((vector unsigned int)(0), vec_ld(__a, __b), vec_lvsl(__a, __b)); } static __inline__ vector unsigned int __ATTRS_o_ai vec_lvrx(int __a, const vector unsigned int *__b) { return vec_perm((vector unsigned int)(0), vec_ld(__a, __b), vec_lvsl(__a, (unsigned char *)__b)); } static __inline__ vector bool int __ATTRS_o_ai vec_lvrx(int __a, const vector bool int *__b) { return vec_perm((vector bool int)(0), vec_ld(__a, __b), vec_lvsl(__a, (unsigned char *)__b)); } static __inline__ vector float __ATTRS_o_ai vec_lvrx(int __a, const float *__b) { return vec_perm((vector float)(0), vec_ld(__a, __b), vec_lvsl(__a, __b)); } static __inline__ vector float __ATTRS_o_ai vec_lvrx(int __a, const vector float *__b) { return vec_perm((vector float)(0), vec_ld(__a, __b), vec_lvsl(__a, (unsigned char *)__b)); } /* vec_lvrxl */ static __inline__ vector signed char __ATTRS_o_ai vec_lvrxl(int __a, const signed char *__b) { return vec_perm((vector signed char)(0), vec_ldl(__a, __b), vec_lvsl(__a, __b)); } static __inline__ vector signed char __ATTRS_o_ai vec_lvrxl(int __a, const vector signed char *__b) { return vec_perm((vector signed char)(0), vec_ldl(__a, __b), vec_lvsl(__a, (unsigned char *)__b)); } static __inline__ vector unsigned char __ATTRS_o_ai vec_lvrxl(int __a, const unsigned char *__b) { return vec_perm((vector unsigned char)(0), vec_ldl(__a, __b), vec_lvsl(__a, __b)); } static __inline__ vector unsigned char __ATTRS_o_ai vec_lvrxl(int __a, const vector unsigned char *__b) { return vec_perm((vector unsigned char)(0), vec_ldl(__a, __b), vec_lvsl(__a, (unsigned char *)__b)); } static __inline__ vector bool char __ATTRS_o_ai vec_lvrxl(int __a, const vector bool char *__b) { return vec_perm((vector bool char)(0), vec_ldl(__a, __b), vec_lvsl(__a, (unsigned char *)__b)); } static __inline__ vector short __ATTRS_o_ai vec_lvrxl(int __a, const short *__b) { return vec_perm((vector short)(0), vec_ldl(__a, __b), vec_lvsl(__a, __b)); } static __inline__ vector short __ATTRS_o_ai vec_lvrxl(int __a, const vector short *__b) { return vec_perm((vector short)(0), vec_ldl(__a, __b), vec_lvsl(__a, (unsigned char *)__b)); } static __inline__ vector unsigned short __ATTRS_o_ai vec_lvrxl(int __a, const unsigned short *__b) { return vec_perm((vector unsigned short)(0), vec_ldl(__a, __b), vec_lvsl(__a, __b)); } static __inline__ vector unsigned short __ATTRS_o_ai vec_lvrxl(int __a, const vector unsigned short *__b) { return vec_perm((vector unsigned short)(0), vec_ldl(__a, __b), vec_lvsl(__a, (unsigned char *)__b)); } static __inline__ vector bool short __ATTRS_o_ai vec_lvrxl(int __a, const vector bool short *__b) { return vec_perm((vector bool short)(0), vec_ldl(__a, __b), vec_lvsl(__a, (unsigned char *)__b)); } static __inline__ vector pixel __ATTRS_o_ai vec_lvrxl(int __a, const vector pixel *__b) { return vec_perm((vector pixel)(0), vec_ldl(__a, __b), vec_lvsl(__a, (unsigned char *)__b)); } static __inline__ vector int __ATTRS_o_ai vec_lvrxl(int __a, const int *__b) { return vec_perm((vector int)(0), vec_ldl(__a, __b), vec_lvsl(__a, __b)); } static __inline__ vector int __ATTRS_o_ai vec_lvrxl(int __a, const vector int *__b) { return vec_perm((vector int)(0), vec_ldl(__a, __b), vec_lvsl(__a, (unsigned char *)__b)); } static __inline__ vector unsigned int __ATTRS_o_ai vec_lvrxl(int __a, const unsigned int *__b) { return vec_perm((vector unsigned int)(0), vec_ldl(__a, __b), vec_lvsl(__a, __b)); } static __inline__ vector unsigned int __ATTRS_o_ai vec_lvrxl(int __a, const vector unsigned int *__b) { return vec_perm((vector unsigned int)(0), vec_ldl(__a, __b), vec_lvsl(__a, (unsigned char *)__b)); } static __inline__ vector bool int __ATTRS_o_ai vec_lvrxl(int __a, const vector bool int *__b) { return vec_perm((vector bool int)(0), vec_ldl(__a, __b), vec_lvsl(__a, (unsigned char *)__b)); } static __inline__ vector float __ATTRS_o_ai vec_lvrxl(int __a, const float *__b) { return vec_perm((vector float)(0), vec_ldl(__a, __b), vec_lvsl(__a, __b)); } static __inline__ vector float __ATTRS_o_ai vec_lvrxl(int __a, const vector float *__b) { return vec_perm((vector float)(0), vec_ldl(__a, __b), vec_lvsl(__a, (unsigned char *)__b)); } /* vec_stvlx */ static __inline__ void __ATTRS_o_ai vec_stvlx(vector signed char __a, int __b, signed char *__c) { return vec_st(vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, __c)), __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvlx(vector signed char __a, int __b, vector signed char *__c) { return vec_st( vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, (unsigned char *)__c)), __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvlx(vector unsigned char __a, int __b, unsigned char *__c) { return vec_st(vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, __c)), __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvlx(vector unsigned char __a, int __b, vector unsigned char *__c) { return vec_st( vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, (unsigned char *)__c)), __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvlx(vector bool char __a, int __b, vector bool char *__c) { return vec_st( vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, (unsigned char *)__c)), __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvlx(vector short __a, int __b, short *__c) { return vec_st(vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, __c)), __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvlx(vector short __a, int __b, vector short *__c) { return vec_st( vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, (unsigned char *)__c)), __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvlx(vector unsigned short __a, int __b, unsigned short *__c) { return vec_st(vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, __c)), __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvlx(vector unsigned short __a, int __b, vector unsigned short *__c) { return vec_st( vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, (unsigned char *)__c)), __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvlx(vector bool short __a, int __b, vector bool short *__c) { return vec_st( vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, (unsigned char *)__c)), __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvlx(vector pixel __a, int __b, vector pixel *__c) { return vec_st( vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, (unsigned char *)__c)), __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvlx(vector int __a, int __b, int *__c) { return vec_st(vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, __c)), __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvlx(vector int __a, int __b, vector int *__c) { return vec_st( vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, (unsigned char *)__c)), __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvlx(vector unsigned int __a, int __b, unsigned int *__c) { return vec_st(vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, __c)), __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvlx(vector unsigned int __a, int __b, vector unsigned int *__c) { return vec_st( vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, (unsigned char *)__c)), __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvlx(vector bool int __a, int __b, vector bool int *__c) { return vec_st( vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, (unsigned char *)__c)), __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvlx(vector float __a, int __b, vector float *__c) { return vec_st( vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, (unsigned char *)__c)), __b, __c); } /* vec_stvlxl */ static __inline__ void __ATTRS_o_ai vec_stvlxl(vector signed char __a, int __b, signed char *__c) { return vec_stl(vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, __c)), __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvlxl(vector signed char __a, int __b, vector signed char *__c) { return vec_stl( vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, (unsigned char *)__c)), __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvlxl(vector unsigned char __a, int __b, unsigned char *__c) { return vec_stl(vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, __c)), __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvlxl(vector unsigned char __a, int __b, vector unsigned char *__c) { return vec_stl( vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, (unsigned char *)__c)), __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvlxl(vector bool char __a, int __b, vector bool char *__c) { return vec_stl( vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, (unsigned char *)__c)), __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvlxl(vector short __a, int __b, short *__c) { return vec_stl(vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, __c)), __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvlxl(vector short __a, int __b, vector short *__c) { return vec_stl( vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, (unsigned char *)__c)), __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvlxl(vector unsigned short __a, int __b, unsigned short *__c) { return vec_stl(vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, __c)), __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvlxl(vector unsigned short __a, int __b, vector unsigned short *__c) { return vec_stl( vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, (unsigned char *)__c)), __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvlxl(vector bool short __a, int __b, vector bool short *__c) { return vec_stl( vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, (unsigned char *)__c)), __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvlxl(vector pixel __a, int __b, vector pixel *__c) { return vec_stl( vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, (unsigned char *)__c)), __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvlxl(vector int __a, int __b, int *__c) { return vec_stl(vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, __c)), __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvlxl(vector int __a, int __b, vector int *__c) { return vec_stl( vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, (unsigned char *)__c)), __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvlxl(vector unsigned int __a, int __b, unsigned int *__c) { return vec_stl(vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, __c)), __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvlxl(vector unsigned int __a, int __b, vector unsigned int *__c) { return vec_stl( vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, (unsigned char *)__c)), __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvlxl(vector bool int __a, int __b, vector bool int *__c) { return vec_stl( vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, (unsigned char *)__c)), __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvlxl(vector float __a, int __b, vector float *__c) { return vec_stl( vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, (unsigned char *)__c)), __b, __c); } /* vec_stvrx */ static __inline__ void __ATTRS_o_ai vec_stvrx(vector signed char __a, int __b, signed char *__c) { return vec_st(vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, __c)), __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvrx(vector signed char __a, int __b, vector signed char *__c) { return vec_st( vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, (unsigned char *)__c)), __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvrx(vector unsigned char __a, int __b, unsigned char *__c) { return vec_st(vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, __c)), __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvrx(vector unsigned char __a, int __b, vector unsigned char *__c) { return vec_st( vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, (unsigned char *)__c)), __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvrx(vector bool char __a, int __b, vector bool char *__c) { return vec_st( vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, (unsigned char *)__c)), __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvrx(vector short __a, int __b, short *__c) { return vec_st(vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, __c)), __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvrx(vector short __a, int __b, vector short *__c) { return vec_st( vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, (unsigned char *)__c)), __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvrx(vector unsigned short __a, int __b, unsigned short *__c) { return vec_st(vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, __c)), __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvrx(vector unsigned short __a, int __b, vector unsigned short *__c) { return vec_st( vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, (unsigned char *)__c)), __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvrx(vector bool short __a, int __b, vector bool short *__c) { return vec_st( vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, (unsigned char *)__c)), __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvrx(vector pixel __a, int __b, vector pixel *__c) { return vec_st( vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, (unsigned char *)__c)), __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvrx(vector int __a, int __b, int *__c) { return vec_st(vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, __c)), __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvrx(vector int __a, int __b, vector int *__c) { return vec_st( vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, (unsigned char *)__c)), __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvrx(vector unsigned int __a, int __b, unsigned int *__c) { return vec_st(vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, __c)), __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvrx(vector unsigned int __a, int __b, vector unsigned int *__c) { return vec_st( vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, (unsigned char *)__c)), __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvrx(vector bool int __a, int __b, vector bool int *__c) { return vec_st( vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, (unsigned char *)__c)), __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvrx(vector float __a, int __b, vector float *__c) { return vec_st( vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, (unsigned char *)__c)), __b, __c); } /* vec_stvrxl */ static __inline__ void __ATTRS_o_ai vec_stvrxl(vector signed char __a, int __b, signed char *__c) { return vec_stl(vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, __c)), __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvrxl(vector signed char __a, int __b, vector signed char *__c) { return vec_stl( vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, (unsigned char *)__c)), __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvrxl(vector unsigned char __a, int __b, unsigned char *__c) { return vec_stl(vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, __c)), __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvrxl(vector unsigned char __a, int __b, vector unsigned char *__c) { return vec_stl( vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, (unsigned char *)__c)), __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvrxl(vector bool char __a, int __b, vector bool char *__c) { return vec_stl( vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, (unsigned char *)__c)), __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvrxl(vector short __a, int __b, short *__c) { return vec_stl(vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, __c)), __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvrxl(vector short __a, int __b, vector short *__c) { return vec_stl( vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, (unsigned char *)__c)), __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvrxl(vector unsigned short __a, int __b, unsigned short *__c) { return vec_stl(vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, __c)), __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvrxl(vector unsigned short __a, int __b, vector unsigned short *__c) { return vec_stl( vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, (unsigned char *)__c)), __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvrxl(vector bool short __a, int __b, vector bool short *__c) { return vec_stl( vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, (unsigned char *)__c)), __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvrxl(vector pixel __a, int __b, vector pixel *__c) { return vec_stl( vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, (unsigned char *)__c)), __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvrxl(vector int __a, int __b, int *__c) { return vec_stl(vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, __c)), __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvrxl(vector int __a, int __b, vector int *__c) { return vec_stl( vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, (unsigned char *)__c)), __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvrxl(vector unsigned int __a, int __b, unsigned int *__c) { return vec_stl(vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, __c)), __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvrxl(vector unsigned int __a, int __b, vector unsigned int *__c) { return vec_stl( vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, (unsigned char *)__c)), __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvrxl(vector bool int __a, int __b, vector bool int *__c) { return vec_stl( vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, (unsigned char *)__c)), __b, __c); } static __inline__ void __ATTRS_o_ai vec_stvrxl(vector float __a, int __b, vector float *__c) { return vec_stl( vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, (unsigned char *)__c)), __b, __c); } /* vec_promote */ static __inline__ vector signed char __ATTRS_o_ai vec_promote(signed char __a, int __b) { const vector signed char __zero = (vector signed char)0; vector signed char __res = __builtin_shufflevector(__zero, __zero, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); __res[__b & 0xf] = __a; return __res; } static __inline__ vector unsigned char __ATTRS_o_ai vec_promote(unsigned char __a, int __b) { const vector unsigned char __zero = (vector unsigned char)(0); vector unsigned char __res = __builtin_shufflevector(__zero, __zero, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); __res[__b & 0xf] = __a; return __res; } static __inline__ vector short __ATTRS_o_ai vec_promote(short __a, int __b) { const vector short __zero = (vector short)(0); vector short __res = __builtin_shufflevector(__zero, __zero, -1, -1, -1, -1, -1, -1, -1, -1); __res[__b & 0x7] = __a; return __res; } static __inline__ vector unsigned short __ATTRS_o_ai vec_promote(unsigned short __a, int __b) { const vector unsigned short __zero = (vector unsigned short)(0); vector unsigned short __res = __builtin_shufflevector(__zero, __zero, -1, -1, -1, -1, -1, -1, -1, -1); __res[__b & 0x7] = __a; return __res; } static __inline__ vector int __ATTRS_o_ai vec_promote(int __a, int __b) { const vector int __zero = (vector int)(0); vector int __res = __builtin_shufflevector(__zero, __zero, -1, -1, -1, -1); __res[__b & 0x3] = __a; return __res; } static __inline__ vector unsigned int __ATTRS_o_ai vec_promote(unsigned int __a, int __b) { const vector unsigned int __zero = (vector unsigned int)(0); vector unsigned int __res = __builtin_shufflevector(__zero, __zero, -1, -1, -1, -1); __res[__b & 0x3] = __a; return __res; } static __inline__ vector float __ATTRS_o_ai vec_promote(float __a, int __b) { const vector float __zero = (vector float)(0); vector float __res = __builtin_shufflevector(__zero, __zero, -1, -1, -1, -1); __res[__b & 0x3] = __a; return __res; } #ifdef __VSX__ static __inline__ vector double __ATTRS_o_ai vec_promote(double __a, int __b) { const vector double __zero = (vector double)(0); vector double __res = __builtin_shufflevector(__zero, __zero, -1, -1); __res[__b & 0x1] = __a; return __res; } static __inline__ vector signed long long __ATTRS_o_ai vec_promote(signed long long __a, int __b) { const vector signed long long __zero = (vector signed long long)(0); vector signed long long __res = __builtin_shufflevector(__zero, __zero, -1, -1); __res[__b & 0x1] = __a; return __res; } static __inline__ vector unsigned long long __ATTRS_o_ai vec_promote(unsigned long long __a, int __b) { const vector unsigned long long __zero = (vector unsigned long long)(0); vector unsigned long long __res = __builtin_shufflevector(__zero, __zero, -1, -1); __res[__b & 0x1] = __a; return __res; } #endif /* vec_splats */ static __inline__ vector signed char __ATTRS_o_ai vec_splats(signed char __a) { return (vector signed char)(__a); } static __inline__ vector unsigned char __ATTRS_o_ai vec_splats(unsigned char __a) { return (vector unsigned char)(__a); } static __inline__ vector short __ATTRS_o_ai vec_splats(short __a) { return (vector short)(__a); } static __inline__ vector unsigned short __ATTRS_o_ai vec_splats(unsigned short __a) { return (vector unsigned short)(__a); } static __inline__ vector int __ATTRS_o_ai vec_splats(int __a) { return (vector int)(__a); } static __inline__ vector unsigned int __ATTRS_o_ai vec_splats(unsigned int __a) { return (vector unsigned int)(__a); } #ifdef __VSX__ static __inline__ vector signed long long __ATTRS_o_ai vec_splats(signed long long __a) { return (vector signed long long)(__a); } static __inline__ vector unsigned long long __ATTRS_o_ai vec_splats(unsigned long long __a) { return (vector unsigned long long)(__a); } #if defined(__POWER8_VECTOR__) && defined(__powerpc64__) && \ defined(__SIZEOF_INT128__) static __inline__ vector signed __int128 __ATTRS_o_ai vec_splats(signed __int128 __a) { return (vector signed __int128)(__a); } static __inline__ vector unsigned __int128 __ATTRS_o_ai vec_splats(unsigned __int128 __a) { return (vector unsigned __int128)(__a); } #endif static __inline__ vector double __ATTRS_o_ai vec_splats(double __a) { return (vector double)(__a); } #endif static __inline__ vector float __ATTRS_o_ai vec_splats(float __a) { return (vector float)(__a); } /* ----------------------------- predicates --------------------------------- */ /* vec_all_eq */ static __inline__ int __ATTRS_o_ai vec_all_eq(vector signed char __a, vector signed char __b) { return __builtin_altivec_vcmpequb_p(__CR6_LT, (vector char)__a, (vector char)__b); } static __inline__ int __ATTRS_o_ai vec_all_eq(vector signed char __a, vector bool char __b) { return __builtin_altivec_vcmpequb_p(__CR6_LT, (vector char)__a, (vector char)__b); } static __inline__ int __ATTRS_o_ai vec_all_eq(vector unsigned char __a, vector unsigned char __b) { return __builtin_altivec_vcmpequb_p(__CR6_LT, (vector char)__a, (vector char)__b); } static __inline__ int __ATTRS_o_ai vec_all_eq(vector unsigned char __a, vector bool char __b) { return __builtin_altivec_vcmpequb_p(__CR6_LT, (vector char)__a, (vector char)__b); } static __inline__ int __ATTRS_o_ai vec_all_eq(vector bool char __a, vector signed char __b) { return __builtin_altivec_vcmpequb_p(__CR6_LT, (vector char)__a, (vector char)__b); } static __inline__ int __ATTRS_o_ai vec_all_eq(vector bool char __a, vector unsigned char __b) { return __builtin_altivec_vcmpequb_p(__CR6_LT, (vector char)__a, (vector char)__b); } static __inline__ int __ATTRS_o_ai vec_all_eq(vector bool char __a, vector bool char __b) { return __builtin_altivec_vcmpequb_p(__CR6_LT, (vector char)__a, (vector char)__b); } static __inline__ int __ATTRS_o_ai vec_all_eq(vector short __a, vector short __b) { return __builtin_altivec_vcmpequh_p(__CR6_LT, __a, __b); } static __inline__ int __ATTRS_o_ai vec_all_eq(vector short __a, vector bool short __b) { return __builtin_altivec_vcmpequh_p(__CR6_LT, __a, (vector short)__b); } static __inline__ int __ATTRS_o_ai vec_all_eq(vector unsigned short __a, vector unsigned short __b) { return __builtin_altivec_vcmpequh_p(__CR6_LT, (vector short)__a, (vector short)__b); } static __inline__ int __ATTRS_o_ai vec_all_eq(vector unsigned short __a, vector bool short __b) { return __builtin_altivec_vcmpequh_p(__CR6_LT, (vector short)__a, (vector short)__b); } static __inline__ int __ATTRS_o_ai vec_all_eq(vector bool short __a, vector short __b) { return __builtin_altivec_vcmpequh_p(__CR6_LT, (vector short)__a, (vector short)__b); } static __inline__ int __ATTRS_o_ai vec_all_eq(vector bool short __a, vector unsigned short __b) { return __builtin_altivec_vcmpequh_p(__CR6_LT, (vector short)__a, (vector short)__b); } static __inline__ int __ATTRS_o_ai vec_all_eq(vector bool short __a, vector bool short __b) { return __builtin_altivec_vcmpequh_p(__CR6_LT, (vector short)__a, (vector short)__b); } static __inline__ int __ATTRS_o_ai vec_all_eq(vector pixel __a, vector pixel __b) { return __builtin_altivec_vcmpequh_p(__CR6_LT, (vector short)__a, (vector short)__b); } static __inline__ int __ATTRS_o_ai vec_all_eq(vector int __a, vector int __b) { return __builtin_altivec_vcmpequw_p(__CR6_LT, __a, __b); } static __inline__ int __ATTRS_o_ai vec_all_eq(vector int __a, vector bool int __b) { return __builtin_altivec_vcmpequw_p(__CR6_LT, __a, (vector int)__b); } static __inline__ int __ATTRS_o_ai vec_all_eq(vector unsigned int __a, vector unsigned int __b) { return __builtin_altivec_vcmpequw_p(__CR6_LT, (vector int)__a, (vector int)__b); } static __inline__ int __ATTRS_o_ai vec_all_eq(vector unsigned int __a, vector bool int __b) { return __builtin_altivec_vcmpequw_p(__CR6_LT, (vector int)__a, (vector int)__b); } static __inline__ int __ATTRS_o_ai vec_all_eq(vector bool int __a, vector int __b) { return __builtin_altivec_vcmpequw_p(__CR6_LT, (vector int)__a, (vector int)__b); } static __inline__ int __ATTRS_o_ai vec_all_eq(vector bool int __a, vector unsigned int __b) { return __builtin_altivec_vcmpequw_p(__CR6_LT, (vector int)__a, (vector int)__b); } static __inline__ int __ATTRS_o_ai vec_all_eq(vector bool int __a, vector bool int __b) { return __builtin_altivec_vcmpequw_p(__CR6_LT, (vector int)__a, (vector int)__b); } #ifdef __VSX__ static __inline__ int __ATTRS_o_ai vec_all_eq(vector signed long long __a, vector signed long long __b) { #ifdef __POWER8_VECTOR__ return __builtin_altivec_vcmpequd_p(__CR6_LT, __a, __b); #else // No vcmpequd on Power7 so we xor the two vectors and compare against zero as // 32-bit elements. return vec_all_eq((vector signed int)vec_xor(__a, __b), (vector signed int)0); #endif } static __inline__ int __ATTRS_o_ai vec_all_eq(vector long long __a, vector bool long long __b) { return vec_all_eq((vector signed long long)__a, (vector signed long long)__b); } static __inline__ int __ATTRS_o_ai vec_all_eq(vector unsigned long long __a, vector unsigned long long __b) { return vec_all_eq((vector signed long long)__a, (vector signed long long)__b); } static __inline__ int __ATTRS_o_ai vec_all_eq(vector unsigned long long __a, vector bool long long __b) { return vec_all_eq((vector signed long long)__a, (vector signed long long)__b); } static __inline__ int __ATTRS_o_ai vec_all_eq(vector bool long long __a, vector long long __b) { return vec_all_eq((vector signed long long)__a, (vector signed long long)__b); } static __inline__ int __ATTRS_o_ai vec_all_eq(vector bool long long __a, vector unsigned long long __b) { return vec_all_eq((vector signed long long)__a, (vector signed long long)__b); } static __inline__ int __ATTRS_o_ai vec_all_eq(vector bool long long __a, vector bool long long __b) { return vec_all_eq((vector signed long long)__a, (vector signed long long)__b); } #endif static __inline__ int __ATTRS_o_ai vec_all_eq(vector float __a, vector float __b) { #ifdef __VSX__ return __builtin_vsx_xvcmpeqsp_p(__CR6_LT, __a, __b); #else return __builtin_altivec_vcmpeqfp_p(__CR6_LT, __a, __b); #endif } #ifdef __VSX__ static __inline__ int __ATTRS_o_ai vec_all_eq(vector double __a, vector double __b) { return __builtin_vsx_xvcmpeqdp_p(__CR6_LT, __a, __b); } #endif #if defined(__POWER10_VECTOR__) && defined(__SIZEOF_INT128__) static __inline__ int __ATTRS_o_ai vec_all_eq(vector signed __int128 __a, vector signed __int128 __b) { return __builtin_altivec_vcmpequq_p(__CR6_LT, (vector unsigned __int128)__a, (vector signed __int128)__b); } static __inline__ int __ATTRS_o_ai vec_all_eq(vector unsigned __int128 __a, vector unsigned __int128 __b) { return __builtin_altivec_vcmpequq_p(__CR6_LT, __a, (vector signed __int128)__b); } static __inline__ int __ATTRS_o_ai vec_all_eq(vector bool __int128 __a, vector bool __int128 __b) { return __builtin_altivec_vcmpequq_p(__CR6_LT, (vector unsigned __int128)__a, (vector signed __int128)__b); } #endif /* vec_all_ge */ static __inline__ int __ATTRS_o_ai vec_all_ge(vector signed char __a, vector signed char __b) { return __builtin_altivec_vcmpgtsb_p(__CR6_EQ, __b, __a); } static __inline__ int __ATTRS_o_ai vec_all_ge(vector signed char __a, vector bool char __b) { return __builtin_altivec_vcmpgtsb_p(__CR6_EQ, (vector signed char)__b, __a); } static __inline__ int __ATTRS_o_ai vec_all_ge(vector unsigned char __a, vector unsigned char __b) { return __builtin_altivec_vcmpgtub_p(__CR6_EQ, __b, __a); } static __inline__ int __ATTRS_o_ai vec_all_ge(vector unsigned char __a, vector bool char __b) { return __builtin_altivec_vcmpgtub_p(__CR6_EQ, (vector unsigned char)__b, __a); } static __inline__ int __ATTRS_o_ai vec_all_ge(vector bool char __a, vector signed char __b) { return __builtin_altivec_vcmpgtsb_p(__CR6_EQ, __b, (vector signed char)__a); } static __inline__ int __ATTRS_o_ai vec_all_ge(vector bool char __a, vector unsigned char __b) { return __builtin_altivec_vcmpgtub_p(__CR6_EQ, __b, (vector unsigned char)__a); } static __inline__ int __ATTRS_o_ai vec_all_ge(vector bool char __a, vector bool char __b) { return __builtin_altivec_vcmpgtub_p(__CR6_EQ, (vector unsigned char)__b, (vector unsigned char)__a); } static __inline__ int __ATTRS_o_ai vec_all_ge(vector short __a, vector short __b) { return __builtin_altivec_vcmpgtsh_p(__CR6_EQ, __b, __a); } static __inline__ int __ATTRS_o_ai vec_all_ge(vector short __a, vector bool short __b) { return __builtin_altivec_vcmpgtsh_p(__CR6_EQ, (vector short)__b, __a); } static __inline__ int __ATTRS_o_ai vec_all_ge(vector unsigned short __a, vector unsigned short __b) { return __builtin_altivec_vcmpgtuh_p(__CR6_EQ, __b, __a); } static __inline__ int __ATTRS_o_ai vec_all_ge(vector unsigned short __a, vector bool short __b) { return __builtin_altivec_vcmpgtuh_p(__CR6_EQ, (vector unsigned short)__b, __a); } static __inline__ int __ATTRS_o_ai vec_all_ge(vector bool short __a, vector short __b) { return __builtin_altivec_vcmpgtsh_p(__CR6_EQ, __b, (vector signed short)__a); } static __inline__ int __ATTRS_o_ai vec_all_ge(vector bool short __a, vector unsigned short __b) { return __builtin_altivec_vcmpgtuh_p(__CR6_EQ, __b, (vector unsigned short)__a); } static __inline__ int __ATTRS_o_ai vec_all_ge(vector bool short __a, vector bool short __b) { return __builtin_altivec_vcmpgtuh_p(__CR6_EQ, (vector unsigned short)__b, (vector unsigned short)__a); } static __inline__ int __ATTRS_o_ai vec_all_ge(vector int __a, vector int __b) { return __builtin_altivec_vcmpgtsw_p(__CR6_EQ, __b, __a); } static __inline__ int __ATTRS_o_ai vec_all_ge(vector int __a, vector bool int __b) { return __builtin_altivec_vcmpgtsw_p(__CR6_EQ, (vector int)__b, __a); } static __inline__ int __ATTRS_o_ai vec_all_ge(vector unsigned int __a, vector unsigned int __b) { return __builtin_altivec_vcmpgtuw_p(__CR6_EQ, __b, __a); } static __inline__ int __ATTRS_o_ai vec_all_ge(vector unsigned int __a, vector bool int __b) { return __builtin_altivec_vcmpgtuw_p(__CR6_EQ, (vector unsigned int)__b, __a); } static __inline__ int __ATTRS_o_ai vec_all_ge(vector bool int __a, vector int __b) { return __builtin_altivec_vcmpgtsw_p(__CR6_EQ, __b, (vector signed int)__a); } static __inline__ int __ATTRS_o_ai vec_all_ge(vector bool int __a, vector unsigned int __b) { return __builtin_altivec_vcmpgtuw_p(__CR6_EQ, __b, (vector unsigned int)__a); } static __inline__ int __ATTRS_o_ai vec_all_ge(vector bool int __a, vector bool int __b) { return __builtin_altivec_vcmpgtuw_p(__CR6_EQ, (vector unsigned int)__b, (vector unsigned int)__a); } #ifdef __VSX__ static __inline__ int __ATTRS_o_ai vec_all_ge(vector signed long long __a, vector signed long long __b) { return __builtin_altivec_vcmpgtsd_p(__CR6_EQ, __b, __a); } static __inline__ int __ATTRS_o_ai vec_all_ge(vector signed long long __a, vector bool long long __b) { return __builtin_altivec_vcmpgtsd_p(__CR6_EQ, (vector signed long long)__b, __a); } static __inline__ int __ATTRS_o_ai vec_all_ge(vector unsigned long long __a, vector unsigned long long __b) { return __builtin_altivec_vcmpgtud_p(__CR6_EQ, __b, __a); } static __inline__ int __ATTRS_o_ai vec_all_ge(vector unsigned long long __a, vector bool long long __b) { return __builtin_altivec_vcmpgtud_p(__CR6_EQ, (vector unsigned long long)__b, __a); } static __inline__ int __ATTRS_o_ai vec_all_ge(vector bool long long __a, vector signed long long __b) { return __builtin_altivec_vcmpgtsd_p(__CR6_EQ, __b, (vector signed long long)__a); } static __inline__ int __ATTRS_o_ai vec_all_ge(vector bool long long __a, vector unsigned long long __b) { return __builtin_altivec_vcmpgtud_p(__CR6_EQ, __b, (vector unsigned long long)__a); } static __inline__ int __ATTRS_o_ai vec_all_ge(vector bool long long __a, vector bool long long __b) { return __builtin_altivec_vcmpgtud_p(__CR6_EQ, (vector unsigned long long)__b, (vector unsigned long long)__a); } #endif static __inline__ int __ATTRS_o_ai vec_all_ge(vector float __a, vector float __b) { #ifdef __VSX__ return __builtin_vsx_xvcmpgesp_p(__CR6_LT, __a, __b); #else return __builtin_altivec_vcmpgefp_p(__CR6_LT, __a, __b); #endif } #ifdef __VSX__ static __inline__ int __ATTRS_o_ai vec_all_ge(vector double __a, vector double __b) { return __builtin_vsx_xvcmpgedp_p(__CR6_LT, __a, __b); } #endif #if defined(__POWER10_VECTOR__) && defined(__SIZEOF_INT128__) static __inline__ int __ATTRS_o_ai vec_all_ge(vector signed __int128 __a, vector signed __int128 __b) { return __builtin_altivec_vcmpgtsq_p(__CR6_EQ, __b, __a); } static __inline__ int __ATTRS_o_ai vec_all_ge(vector unsigned __int128 __a, vector unsigned __int128 __b) { return __builtin_altivec_vcmpgtuq_p(__CR6_EQ, __b, __a); } #endif /* vec_all_gt */ static __inline__ int __ATTRS_o_ai vec_all_gt(vector signed char __a, vector signed char __b) { return __builtin_altivec_vcmpgtsb_p(__CR6_LT, __a, __b); } static __inline__ int __ATTRS_o_ai vec_all_gt(vector signed char __a, vector bool char __b) { return __builtin_altivec_vcmpgtsb_p(__CR6_LT, __a, (vector signed char)__b); } static __inline__ int __ATTRS_o_ai vec_all_gt(vector unsigned char __a, vector unsigned char __b) { return __builtin_altivec_vcmpgtub_p(__CR6_LT, __a, __b); } static __inline__ int __ATTRS_o_ai vec_all_gt(vector unsigned char __a, vector bool char __b) { return __builtin_altivec_vcmpgtub_p(__CR6_LT, __a, (vector unsigned char)__b); } static __inline__ int __ATTRS_o_ai vec_all_gt(vector bool char __a, vector signed char __b) { return __builtin_altivec_vcmpgtsb_p(__CR6_LT, (vector signed char)__a, __b); } static __inline__ int __ATTRS_o_ai vec_all_gt(vector bool char __a, vector unsigned char __b) { return __builtin_altivec_vcmpgtub_p(__CR6_LT, (vector unsigned char)__a, __b); } static __inline__ int __ATTRS_o_ai vec_all_gt(vector bool char __a, vector bool char __b) { return __builtin_altivec_vcmpgtub_p(__CR6_LT, (vector unsigned char)__a, (vector unsigned char)__b); } static __inline__ int __ATTRS_o_ai vec_all_gt(vector short __a, vector short __b) { return __builtin_altivec_vcmpgtsh_p(__CR6_LT, __a, __b); } static __inline__ int __ATTRS_o_ai vec_all_gt(vector short __a, vector bool short __b) { return __builtin_altivec_vcmpgtsh_p(__CR6_LT, __a, (vector short)__b); } static __inline__ int __ATTRS_o_ai vec_all_gt(vector unsigned short __a, vector unsigned short __b) { return __builtin_altivec_vcmpgtuh_p(__CR6_LT, __a, __b); } static __inline__ int __ATTRS_o_ai vec_all_gt(vector unsigned short __a, vector bool short __b) { return __builtin_altivec_vcmpgtuh_p(__CR6_LT, __a, (vector unsigned short)__b); } static __inline__ int __ATTRS_o_ai vec_all_gt(vector bool short __a, vector short __b) { return __builtin_altivec_vcmpgtsh_p(__CR6_LT, (vector signed short)__a, __b); } static __inline__ int __ATTRS_o_ai vec_all_gt(vector bool short __a, vector unsigned short __b) { return __builtin_altivec_vcmpgtuh_p(__CR6_LT, (vector unsigned short)__a, __b); } static __inline__ int __ATTRS_o_ai vec_all_gt(vector bool short __a, vector bool short __b) { return __builtin_altivec_vcmpgtuh_p(__CR6_LT, (vector unsigned short)__a, (vector unsigned short)__b); } static __inline__ int __ATTRS_o_ai vec_all_gt(vector int __a, vector int __b) { return __builtin_altivec_vcmpgtsw_p(__CR6_LT, __a, __b); } static __inline__ int __ATTRS_o_ai vec_all_gt(vector int __a, vector bool int __b) { return __builtin_altivec_vcmpgtsw_p(__CR6_LT, __a, (vector int)__b); } static __inline__ int __ATTRS_o_ai vec_all_gt(vector unsigned int __a, vector unsigned int __b) { return __builtin_altivec_vcmpgtuw_p(__CR6_LT, __a, __b); } static __inline__ int __ATTRS_o_ai vec_all_gt(vector unsigned int __a, vector bool int __b) { return __builtin_altivec_vcmpgtuw_p(__CR6_LT, __a, (vector unsigned int)__b); } static __inline__ int __ATTRS_o_ai vec_all_gt(vector bool int __a, vector int __b) { return __builtin_altivec_vcmpgtsw_p(__CR6_LT, (vector signed int)__a, __b); } static __inline__ int __ATTRS_o_ai vec_all_gt(vector bool int __a, vector unsigned int __b) { return __builtin_altivec_vcmpgtuw_p(__CR6_LT, (vector unsigned int)__a, __b); } static __inline__ int __ATTRS_o_ai vec_all_gt(vector bool int __a, vector bool int __b) { return __builtin_altivec_vcmpgtuw_p(__CR6_LT, (vector unsigned int)__a, (vector unsigned int)__b); } #ifdef __VSX__ static __inline__ int __ATTRS_o_ai vec_all_gt(vector signed long long __a, vector signed long long __b) { return __builtin_altivec_vcmpgtsd_p(__CR6_LT, __a, __b); } static __inline__ int __ATTRS_o_ai vec_all_gt(vector signed long long __a, vector bool long long __b) { return __builtin_altivec_vcmpgtsd_p(__CR6_LT, __a, (vector signed long long)__b); } static __inline__ int __ATTRS_o_ai vec_all_gt(vector unsigned long long __a, vector unsigned long long __b) { return __builtin_altivec_vcmpgtud_p(__CR6_LT, __a, __b); } static __inline__ int __ATTRS_o_ai vec_all_gt(vector unsigned long long __a, vector bool long long __b) { return __builtin_altivec_vcmpgtud_p(__CR6_LT, __a, (vector unsigned long long)__b); } static __inline__ int __ATTRS_o_ai vec_all_gt(vector bool long long __a, vector signed long long __b) { return __builtin_altivec_vcmpgtsd_p(__CR6_LT, (vector signed long long)__a, __b); } static __inline__ int __ATTRS_o_ai vec_all_gt(vector bool long long __a, vector unsigned long long __b) { return __builtin_altivec_vcmpgtud_p(__CR6_LT, (vector unsigned long long)__a, __b); } static __inline__ int __ATTRS_o_ai vec_all_gt(vector bool long long __a, vector bool long long __b) { return __builtin_altivec_vcmpgtud_p(__CR6_LT, (vector unsigned long long)__a, (vector unsigned long long)__b); } #endif static __inline__ int __ATTRS_o_ai vec_all_gt(vector float __a, vector float __b) { #ifdef __VSX__ return __builtin_vsx_xvcmpgtsp_p(__CR6_LT, __a, __b); #else return __builtin_altivec_vcmpgtfp_p(__CR6_LT, __a, __b); #endif } #ifdef __VSX__ static __inline__ int __ATTRS_o_ai vec_all_gt(vector double __a, vector double __b) { return __builtin_vsx_xvcmpgtdp_p(__CR6_LT, __a, __b); } #endif #if defined(__POWER10_VECTOR__) && defined(__SIZEOF_INT128__) static __inline__ int __ATTRS_o_ai vec_all_gt(vector signed __int128 __a, vector signed __int128 __b) { return __builtin_altivec_vcmpgtsq_p(__CR6_LT, __a, __b); } static __inline__ int __ATTRS_o_ai vec_all_gt(vector unsigned __int128 __a, vector unsigned __int128 __b) { return __builtin_altivec_vcmpgtuq_p(__CR6_LT, __a, __b); } #endif /* vec_all_in */ static __inline__ int __attribute__((__always_inline__)) vec_all_in(vector float __a, vector float __b) { return __builtin_altivec_vcmpbfp_p(__CR6_EQ, __a, __b); } /* vec_all_le */ static __inline__ int __ATTRS_o_ai vec_all_le(vector signed char __a, vector signed char __b) { return __builtin_altivec_vcmpgtsb_p(__CR6_EQ, __a, __b); } static __inline__ int __ATTRS_o_ai vec_all_le(vector signed char __a, vector bool char __b) { return __builtin_altivec_vcmpgtsb_p(__CR6_EQ, __a, (vector signed char)__b); } static __inline__ int __ATTRS_o_ai vec_all_le(vector unsigned char __a, vector unsigned char __b) { return __builtin_altivec_vcmpgtub_p(__CR6_EQ, __a, __b); } static __inline__ int __ATTRS_o_ai vec_all_le(vector unsigned char __a, vector bool char __b) { return __builtin_altivec_vcmpgtub_p(__CR6_EQ, __a, (vector unsigned char)__b); } static __inline__ int __ATTRS_o_ai vec_all_le(vector bool char __a, vector signed char __b) { return __builtin_altivec_vcmpgtsb_p(__CR6_EQ, (vector signed char)__a, __b); } static __inline__ int __ATTRS_o_ai vec_all_le(vector bool char __a, vector unsigned char __b) { return __builtin_altivec_vcmpgtub_p(__CR6_EQ, (vector unsigned char)__a, __b); } static __inline__ int __ATTRS_o_ai vec_all_le(vector bool char __a, vector bool char __b) { return __builtin_altivec_vcmpgtub_p(__CR6_EQ, (vector unsigned char)__a, (vector unsigned char)__b); } static __inline__ int __ATTRS_o_ai vec_all_le(vector short __a, vector short __b) { return __builtin_altivec_vcmpgtsh_p(__CR6_EQ, __a, __b); } static __inline__ int __ATTRS_o_ai vec_all_le(vector short __a, vector bool short __b) { return __builtin_altivec_vcmpgtsh_p(__CR6_EQ, __a, (vector short)__b); } static __inline__ int __ATTRS_o_ai vec_all_le(vector unsigned short __a, vector unsigned short __b) { return __builtin_altivec_vcmpgtuh_p(__CR6_EQ, __a, __b); } static __inline__ int __ATTRS_o_ai vec_all_le(vector unsigned short __a, vector bool short __b) { return __builtin_altivec_vcmpgtuh_p(__CR6_EQ, __a, (vector unsigned short)__b); } static __inline__ int __ATTRS_o_ai vec_all_le(vector bool short __a, vector short __b) { return __builtin_altivec_vcmpgtsh_p(__CR6_EQ, (vector signed short)__a, __b); } static __inline__ int __ATTRS_o_ai vec_all_le(vector bool short __a, vector unsigned short __b) { return __builtin_altivec_vcmpgtuh_p(__CR6_EQ, (vector unsigned short)__a, __b); } static __inline__ int __ATTRS_o_ai vec_all_le(vector bool short __a, vector bool short __b) { return __builtin_altivec_vcmpgtuh_p(__CR6_EQ, (vector unsigned short)__a, (vector unsigned short)__b); } static __inline__ int __ATTRS_o_ai vec_all_le(vector int __a, vector int __b) { return __builtin_altivec_vcmpgtsw_p(__CR6_EQ, __a, __b); } static __inline__ int __ATTRS_o_ai vec_all_le(vector int __a, vector bool int __b) { return __builtin_altivec_vcmpgtsw_p(__CR6_EQ, __a, (vector int)__b); } static __inline__ int __ATTRS_o_ai vec_all_le(vector unsigned int __a, vector unsigned int __b) { return __builtin_altivec_vcmpgtuw_p(__CR6_EQ, __a, __b); } static __inline__ int __ATTRS_o_ai vec_all_le(vector unsigned int __a, vector bool int __b) { return __builtin_altivec_vcmpgtuw_p(__CR6_EQ, __a, (vector unsigned int)__b); } static __inline__ int __ATTRS_o_ai vec_all_le(vector bool int __a, vector int __b) { return __builtin_altivec_vcmpgtsw_p(__CR6_EQ, (vector signed int)__a, __b); } static __inline__ int __ATTRS_o_ai vec_all_le(vector bool int __a, vector unsigned int __b) { return __builtin_altivec_vcmpgtuw_p(__CR6_EQ, (vector unsigned int)__a, __b); } static __inline__ int __ATTRS_o_ai vec_all_le(vector bool int __a, vector bool int __b) { return __builtin_altivec_vcmpgtuw_p(__CR6_EQ, (vector unsigned int)__a, (vector unsigned int)__b); } #ifdef __VSX__ static __inline__ int __ATTRS_o_ai vec_all_le(vector signed long long __a, vector signed long long __b) { return __builtin_altivec_vcmpgtsd_p(__CR6_EQ, __a, __b); } static __inline__ int __ATTRS_o_ai vec_all_le(vector unsigned long long __a, vector unsigned long long __b) { return __builtin_altivec_vcmpgtud_p(__CR6_EQ, __a, __b); } static __inline__ int __ATTRS_o_ai vec_all_le(vector signed long long __a, vector bool long long __b) { return __builtin_altivec_vcmpgtsd_p(__CR6_EQ, __a, (vector signed long long)__b); } static __inline__ int __ATTRS_o_ai vec_all_le(vector unsigned long long __a, vector bool long long __b) { return __builtin_altivec_vcmpgtud_p(__CR6_EQ, __a, (vector unsigned long long)__b); } static __inline__ int __ATTRS_o_ai vec_all_le(vector bool long long __a, vector signed long long __b) { return __builtin_altivec_vcmpgtsd_p(__CR6_EQ, (vector signed long long)__a, __b); } static __inline__ int __ATTRS_o_ai vec_all_le(vector bool long long __a, vector unsigned long long __b) { return __builtin_altivec_vcmpgtud_p(__CR6_EQ, (vector unsigned long long)__a, __b); } static __inline__ int __ATTRS_o_ai vec_all_le(vector bool long long __a, vector bool long long __b) { return __builtin_altivec_vcmpgtud_p(__CR6_EQ, (vector unsigned long long)__a, (vector unsigned long long)__b); } #endif static __inline__ int __ATTRS_o_ai vec_all_le(vector float __a, vector float __b) { #ifdef __VSX__ return __builtin_vsx_xvcmpgesp_p(__CR6_LT, __b, __a); #else return __builtin_altivec_vcmpgefp_p(__CR6_LT, __b, __a); #endif } #ifdef __VSX__ static __inline__ int __ATTRS_o_ai vec_all_le(vector double __a, vector double __b) { return __builtin_vsx_xvcmpgedp_p(__CR6_LT, __b, __a); } #endif #if defined(__POWER10_VECTOR__) && defined(__SIZEOF_INT128__) static __inline__ int __ATTRS_o_ai vec_all_le(vector signed __int128 __a, vector signed __int128 __b) { return __builtin_altivec_vcmpgtsq_p(__CR6_EQ, __a, __b); } static __inline__ int __ATTRS_o_ai vec_all_le(vector unsigned __int128 __a, vector unsigned __int128 __b) { return __builtin_altivec_vcmpgtuq_p(__CR6_EQ, __a, __b); } #endif /* vec_all_lt */ static __inline__ int __ATTRS_o_ai vec_all_lt(vector signed char __a, vector signed char __b) { return __builtin_altivec_vcmpgtsb_p(__CR6_LT, __b, __a); } static __inline__ int __ATTRS_o_ai vec_all_lt(vector signed char __a, vector bool char __b) { return __builtin_altivec_vcmpgtsb_p(__CR6_LT, (vector signed char)__b, __a); } static __inline__ int __ATTRS_o_ai vec_all_lt(vector unsigned char __a, vector unsigned char __b) { return __builtin_altivec_vcmpgtub_p(__CR6_LT, __b, __a); } static __inline__ int __ATTRS_o_ai vec_all_lt(vector unsigned char __a, vector bool char __b) { return __builtin_altivec_vcmpgtub_p(__CR6_LT, (vector unsigned char)__b, __a); } static __inline__ int __ATTRS_o_ai vec_all_lt(vector bool char __a, vector signed char __b) { return __builtin_altivec_vcmpgtsb_p(__CR6_LT, __b, (vector signed char)__a); } static __inline__ int __ATTRS_o_ai vec_all_lt(vector bool char __a, vector unsigned char __b) { return __builtin_altivec_vcmpgtub_p(__CR6_LT, __b, (vector unsigned char)__a); } static __inline__ int __ATTRS_o_ai vec_all_lt(vector bool char __a, vector bool char __b) { return __builtin_altivec_vcmpgtub_p(__CR6_LT, (vector unsigned char)__b, (vector unsigned char)__a); } static __inline__ int __ATTRS_o_ai vec_all_lt(vector short __a, vector short __b) { return __builtin_altivec_vcmpgtsh_p(__CR6_LT, __b, __a); } static __inline__ int __ATTRS_o_ai vec_all_lt(vector short __a, vector bool short __b) { return __builtin_altivec_vcmpgtsh_p(__CR6_LT, (vector short)__b, __a); } static __inline__ int __ATTRS_o_ai vec_all_lt(vector unsigned short __a, vector unsigned short __b) { return __builtin_altivec_vcmpgtuh_p(__CR6_LT, __b, __a); } static __inline__ int __ATTRS_o_ai vec_all_lt(vector unsigned short __a, vector bool short __b) { return __builtin_altivec_vcmpgtuh_p(__CR6_LT, (vector unsigned short)__b, __a); } static __inline__ int __ATTRS_o_ai vec_all_lt(vector bool short __a, vector short __b) { return __builtin_altivec_vcmpgtsh_p(__CR6_LT, __b, (vector signed short)__a); } static __inline__ int __ATTRS_o_ai vec_all_lt(vector bool short __a, vector unsigned short __b) { return __builtin_altivec_vcmpgtuh_p(__CR6_LT, __b, (vector unsigned short)__a); } static __inline__ int __ATTRS_o_ai vec_all_lt(vector bool short __a, vector bool short __b) { return __builtin_altivec_vcmpgtuh_p(__CR6_LT, (vector unsigned short)__b, (vector unsigned short)__a); } static __inline__ int __ATTRS_o_ai vec_all_lt(vector int __a, vector int __b) { return __builtin_altivec_vcmpgtsw_p(__CR6_LT, __b, __a); } static __inline__ int __ATTRS_o_ai vec_all_lt(vector int __a, vector bool int __b) { return __builtin_altivec_vcmpgtsw_p(__CR6_LT, (vector int)__b, __a); } static __inline__ int __ATTRS_o_ai vec_all_lt(vector unsigned int __a, vector unsigned int __b) { return __builtin_altivec_vcmpgtuw_p(__CR6_LT, __b, __a); } static __inline__ int __ATTRS_o_ai vec_all_lt(vector unsigned int __a, vector bool int __b) { return __builtin_altivec_vcmpgtuw_p(__CR6_LT, (vector unsigned int)__b, __a); } static __inline__ int __ATTRS_o_ai vec_all_lt(vector bool int __a, vector int __b) { return __builtin_altivec_vcmpgtsw_p(__CR6_LT, __b, (vector signed int)__a); } static __inline__ int __ATTRS_o_ai vec_all_lt(vector bool int __a, vector unsigned int __b) { return __builtin_altivec_vcmpgtuw_p(__CR6_LT, __b, (vector unsigned int)__a); } static __inline__ int __ATTRS_o_ai vec_all_lt(vector bool int __a, vector bool int __b) { return __builtin_altivec_vcmpgtuw_p(__CR6_LT, (vector unsigned int)__b, (vector unsigned int)__a); } #ifdef __VSX__ static __inline__ int __ATTRS_o_ai vec_all_lt(vector signed long long __a, vector signed long long __b) { return __builtin_altivec_vcmpgtsd_p(__CR6_LT, __b, __a); } static __inline__ int __ATTRS_o_ai vec_all_lt(vector unsigned long long __a, vector unsigned long long __b) { return __builtin_altivec_vcmpgtud_p(__CR6_LT, __b, __a); } static __inline__ int __ATTRS_o_ai vec_all_lt(vector signed long long __a, vector bool long long __b) { return __builtin_altivec_vcmpgtsd_p(__CR6_LT, (vector signed long long)__b, __a); } static __inline__ int __ATTRS_o_ai vec_all_lt(vector unsigned long long __a, vector bool long long __b) { return __builtin_altivec_vcmpgtud_p(__CR6_LT, (vector unsigned long long)__b, __a); } static __inline__ int __ATTRS_o_ai vec_all_lt(vector bool long long __a, vector signed long long __b) { return __builtin_altivec_vcmpgtsd_p(__CR6_LT, __b, (vector signed long long)__a); } static __inline__ int __ATTRS_o_ai vec_all_lt(vector bool long long __a, vector unsigned long long __b) { return __builtin_altivec_vcmpgtud_p(__CR6_LT, __b, (vector unsigned long long)__a); } static __inline__ int __ATTRS_o_ai vec_all_lt(vector bool long long __a, vector bool long long __b) { return __builtin_altivec_vcmpgtud_p(__CR6_LT, (vector unsigned long long)__b, (vector unsigned long long)__a); } #endif static __inline__ int __ATTRS_o_ai vec_all_lt(vector float __a, vector float __b) { #ifdef __VSX__ return __builtin_vsx_xvcmpgtsp_p(__CR6_LT, __b, __a); #else return __builtin_altivec_vcmpgtfp_p(__CR6_LT, __b, __a); #endif } #ifdef __VSX__ static __inline__ int __ATTRS_o_ai vec_all_lt(vector double __a, vector double __b) { return __builtin_vsx_xvcmpgtdp_p(__CR6_LT, __b, __a); } #endif #if defined(__POWER10_VECTOR__) && defined(__SIZEOF_INT128__) static __inline__ int __ATTRS_o_ai vec_all_lt(vector signed __int128 __a, vector signed __int128 __b) { return __builtin_altivec_vcmpgtsq_p(__CR6_LT, __b, __a); } static __inline__ int __ATTRS_o_ai vec_all_lt(vector unsigned __int128 __a, vector unsigned __int128 __b) { return __builtin_altivec_vcmpgtuq_p(__CR6_LT, __b, __a); } #endif /* vec_all_nan */ static __inline__ int __ATTRS_o_ai vec_all_nan(vector float __a) { #ifdef __VSX__ return __builtin_vsx_xvcmpeqsp_p(__CR6_EQ, __a, __a); #else return __builtin_altivec_vcmpeqfp_p(__CR6_EQ, __a, __a); #endif } #ifdef __VSX__ static __inline__ int __ATTRS_o_ai vec_all_nan(vector double __a) { return __builtin_vsx_xvcmpeqdp_p(__CR6_EQ, __a, __a); } #endif /* vec_all_ne */ static __inline__ int __ATTRS_o_ai vec_all_ne(vector signed char __a, vector signed char __b) { return __builtin_altivec_vcmpequb_p(__CR6_EQ, (vector char)__a, (vector char)__b); } static __inline__ int __ATTRS_o_ai vec_all_ne(vector signed char __a, vector bool char __b) { return __builtin_altivec_vcmpequb_p(__CR6_EQ, (vector char)__a, (vector char)__b); } static __inline__ int __ATTRS_o_ai vec_all_ne(vector unsigned char __a, vector unsigned char __b) { return __builtin_altivec_vcmpequb_p(__CR6_EQ, (vector char)__a, (vector char)__b); } static __inline__ int __ATTRS_o_ai vec_all_ne(vector unsigned char __a, vector bool char __b) { return __builtin_altivec_vcmpequb_p(__CR6_EQ, (vector char)__a, (vector char)__b); } static __inline__ int __ATTRS_o_ai vec_all_ne(vector bool char __a, vector signed char __b) { return __builtin_altivec_vcmpequb_p(__CR6_EQ, (vector char)__a, (vector char)__b); } static __inline__ int __ATTRS_o_ai vec_all_ne(vector bool char __a, vector unsigned char __b) { return __builtin_altivec_vcmpequb_p(__CR6_EQ, (vector char)__a, (vector char)__b); } static __inline__ int __ATTRS_o_ai vec_all_ne(vector bool char __a, vector bool char __b) { return __builtin_altivec_vcmpequb_p(__CR6_EQ, (vector char)__a, (vector char)__b); } static __inline__ int __ATTRS_o_ai vec_all_ne(vector short __a, vector short __b) { return __builtin_altivec_vcmpequh_p(__CR6_EQ, __a, __b); } static __inline__ int __ATTRS_o_ai vec_all_ne(vector short __a, vector bool short __b) { return __builtin_altivec_vcmpequh_p(__CR6_EQ, __a, (vector short)__b); } static __inline__ int __ATTRS_o_ai vec_all_ne(vector unsigned short __a, vector unsigned short __b) { return __builtin_altivec_vcmpequh_p(__CR6_EQ, (vector short)__a, (vector short)__b); } static __inline__ int __ATTRS_o_ai vec_all_ne(vector unsigned short __a, vector bool short __b) { return __builtin_altivec_vcmpequh_p(__CR6_EQ, (vector short)__a, (vector short)__b); } static __inline__ int __ATTRS_o_ai vec_all_ne(vector bool short __a, vector short __b) { return __builtin_altivec_vcmpequh_p(__CR6_EQ, (vector short)__a, (vector short)__b); } static __inline__ int __ATTRS_o_ai vec_all_ne(vector bool short __a, vector unsigned short __b) { return __builtin_altivec_vcmpequh_p(__CR6_EQ, (vector short)__a, (vector short)__b); } static __inline__ int __ATTRS_o_ai vec_all_ne(vector bool short __a, vector bool short __b) { return __builtin_altivec_vcmpequh_p(__CR6_EQ, (vector short)__a, (vector short)__b); } static __inline__ int __ATTRS_o_ai vec_all_ne(vector pixel __a, vector pixel __b) { return __builtin_altivec_vcmpequh_p(__CR6_EQ, (vector short)__a, (vector short)__b); } static __inline__ int __ATTRS_o_ai vec_all_ne(vector int __a, vector int __b) { return __builtin_altivec_vcmpequw_p(__CR6_EQ, __a, __b); } static __inline__ int __ATTRS_o_ai vec_all_ne(vector int __a, vector bool int __b) { return __builtin_altivec_vcmpequw_p(__CR6_EQ, __a, (vector int)__b); } static __inline__ int __ATTRS_o_ai vec_all_ne(vector unsigned int __a, vector unsigned int __b) { return __builtin_altivec_vcmpequw_p(__CR6_EQ, (vector int)__a, (vector int)__b); } static __inline__ int __ATTRS_o_ai vec_all_ne(vector unsigned int __a, vector bool int __b) { return __builtin_altivec_vcmpequw_p(__CR6_EQ, (vector int)__a, (vector int)__b); } static __inline__ int __ATTRS_o_ai vec_all_ne(vector bool int __a, vector int __b) { return __builtin_altivec_vcmpequw_p(__CR6_EQ, (vector int)__a, (vector int)__b); } static __inline__ int __ATTRS_o_ai vec_all_ne(vector bool int __a, vector unsigned int __b) { return __builtin_altivec_vcmpequw_p(__CR6_EQ, (vector int)__a, (vector int)__b); } static __inline__ int __ATTRS_o_ai vec_all_ne(vector bool int __a, vector bool int __b) { return __builtin_altivec_vcmpequw_p(__CR6_EQ, (vector int)__a, (vector int)__b); } #ifdef __VSX__ static __inline__ int __ATTRS_o_ai vec_all_ne(vector signed long long __a, vector signed long long __b) { return __builtin_altivec_vcmpequd_p(__CR6_EQ, __a, __b); } static __inline__ int __ATTRS_o_ai vec_all_ne(vector unsigned long long __a, vector unsigned long long __b) { return __builtin_altivec_vcmpequd_p(__CR6_EQ, (vector long long)__a, (vector long long)__b); } static __inline__ int __ATTRS_o_ai vec_all_ne(vector signed long long __a, vector bool long long __b) { return __builtin_altivec_vcmpequd_p(__CR6_EQ, __a, (vector signed long long)__b); } static __inline__ int __ATTRS_o_ai vec_all_ne(vector unsigned long long __a, vector bool long long __b) { return __builtin_altivec_vcmpequd_p(__CR6_EQ, (vector signed long long)__a, (vector signed long long)__b); } static __inline__ int __ATTRS_o_ai vec_all_ne(vector bool long long __a, vector signed long long __b) { return __builtin_altivec_vcmpequd_p(__CR6_EQ, (vector signed long long)__a, (vector signed long long)__b); } static __inline__ int __ATTRS_o_ai vec_all_ne(vector bool long long __a, vector unsigned long long __b) { return __builtin_altivec_vcmpequd_p(__CR6_EQ, (vector signed long long)__a, (vector signed long long)__b); } static __inline__ int __ATTRS_o_ai vec_all_ne(vector bool long long __a, vector bool long long __b) { return __builtin_altivec_vcmpequd_p(__CR6_EQ, (vector signed long long)__a, (vector signed long long)__b); } #endif static __inline__ int __ATTRS_o_ai vec_all_ne(vector float __a, vector float __b) { #ifdef __VSX__ return __builtin_vsx_xvcmpeqsp_p(__CR6_EQ, __a, __b); #else return __builtin_altivec_vcmpeqfp_p(__CR6_EQ, __a, __b); #endif } #ifdef __VSX__ static __inline__ int __ATTRS_o_ai vec_all_ne(vector double __a, vector double __b) { return __builtin_vsx_xvcmpeqdp_p(__CR6_EQ, __a, __b); } #endif #if defined(__POWER10_VECTOR__) && defined(__SIZEOF_INT128__) static __inline__ int __ATTRS_o_ai vec_all_ne(vector signed __int128 __a, vector signed __int128 __b) { return __builtin_altivec_vcmpequq_p(__CR6_EQ, (vector unsigned __int128)__a, __b); } static __inline__ int __ATTRS_o_ai vec_all_ne(vector unsigned __int128 __a, vector unsigned __int128 __b) { return __builtin_altivec_vcmpequq_p(__CR6_EQ, __a, (vector signed __int128)__b); } static __inline__ int __ATTRS_o_ai vec_all_ne(vector bool __int128 __a, vector bool __int128 __b) { return __builtin_altivec_vcmpequq_p(__CR6_EQ, (vector unsigned __int128)__a, (vector signed __int128)__b); } #endif /* vec_all_nge */ static __inline__ int __ATTRS_o_ai vec_all_nge(vector float __a, vector float __b) { #ifdef __VSX__ return __builtin_vsx_xvcmpgesp_p(__CR6_EQ, __a, __b); #else return __builtin_altivec_vcmpgefp_p(__CR6_EQ, __a, __b); #endif } #ifdef __VSX__ static __inline__ int __ATTRS_o_ai vec_all_nge(vector double __a, vector double __b) { return __builtin_vsx_xvcmpgedp_p(__CR6_EQ, __a, __b); } #endif /* vec_all_ngt */ static __inline__ int __ATTRS_o_ai vec_all_ngt(vector float __a, vector float __b) { #ifdef __VSX__ return __builtin_vsx_xvcmpgtsp_p(__CR6_EQ, __a, __b); #else return __builtin_altivec_vcmpgtfp_p(__CR6_EQ, __a, __b); #endif } #ifdef __VSX__ static __inline__ int __ATTRS_o_ai vec_all_ngt(vector double __a, vector double __b) { return __builtin_vsx_xvcmpgtdp_p(__CR6_EQ, __a, __b); } #endif /* vec_all_nle */ static __inline__ int __ATTRS_o_ai vec_all_nle(vector float __a, vector float __b) { #ifdef __VSX__ return __builtin_vsx_xvcmpgesp_p(__CR6_EQ, __b, __a); #else return __builtin_altivec_vcmpgefp_p(__CR6_EQ, __b, __a); #endif } #ifdef __VSX__ static __inline__ int __ATTRS_o_ai vec_all_nle(vector double __a, vector double __b) { return __builtin_vsx_xvcmpgedp_p(__CR6_EQ, __b, __a); } #endif /* vec_all_nlt */ static __inline__ int __ATTRS_o_ai vec_all_nlt(vector float __a, vector float __b) { #ifdef __VSX__ return __builtin_vsx_xvcmpgtsp_p(__CR6_EQ, __b, __a); #else return __builtin_altivec_vcmpgtfp_p(__CR6_EQ, __b, __a); #endif } #ifdef __VSX__ static __inline__ int __ATTRS_o_ai vec_all_nlt(vector double __a, vector double __b) { return __builtin_vsx_xvcmpgtdp_p(__CR6_EQ, __b, __a); } #endif /* vec_all_numeric */ static __inline__ int __ATTRS_o_ai vec_all_numeric(vector float __a) { #ifdef __VSX__ return __builtin_vsx_xvcmpeqsp_p(__CR6_LT, __a, __a); #else return __builtin_altivec_vcmpeqfp_p(__CR6_LT, __a, __a); #endif } #ifdef __VSX__ static __inline__ int __ATTRS_o_ai vec_all_numeric(vector double __a) { return __builtin_vsx_xvcmpeqdp_p(__CR6_LT, __a, __a); } #endif /* vec_any_eq */ static __inline__ int __ATTRS_o_ai vec_any_eq(vector signed char __a, vector signed char __b) { return __builtin_altivec_vcmpequb_p(__CR6_EQ_REV, (vector char)__a, (vector char)__b); } static __inline__ int __ATTRS_o_ai vec_any_eq(vector signed char __a, vector bool char __b) { return __builtin_altivec_vcmpequb_p(__CR6_EQ_REV, (vector char)__a, (vector char)__b); } static __inline__ int __ATTRS_o_ai vec_any_eq(vector unsigned char __a, vector unsigned char __b) { return __builtin_altivec_vcmpequb_p(__CR6_EQ_REV, (vector char)__a, (vector char)__b); } static __inline__ int __ATTRS_o_ai vec_any_eq(vector unsigned char __a, vector bool char __b) { return __builtin_altivec_vcmpequb_p(__CR6_EQ_REV, (vector char)__a, (vector char)__b); } static __inline__ int __ATTRS_o_ai vec_any_eq(vector bool char __a, vector signed char __b) { return __builtin_altivec_vcmpequb_p(__CR6_EQ_REV, (vector char)__a, (vector char)__b); } static __inline__ int __ATTRS_o_ai vec_any_eq(vector bool char __a, vector unsigned char __b) { return __builtin_altivec_vcmpequb_p(__CR6_EQ_REV, (vector char)__a, (vector char)__b); } static __inline__ int __ATTRS_o_ai vec_any_eq(vector bool char __a, vector bool char __b) { return __builtin_altivec_vcmpequb_p(__CR6_EQ_REV, (vector char)__a, (vector char)__b); } static __inline__ int __ATTRS_o_ai vec_any_eq(vector short __a, vector short __b) { return __builtin_altivec_vcmpequh_p(__CR6_EQ_REV, __a, __b); } static __inline__ int __ATTRS_o_ai vec_any_eq(vector short __a, vector bool short __b) { return __builtin_altivec_vcmpequh_p(__CR6_EQ_REV, __a, (vector short)__b); } static __inline__ int __ATTRS_o_ai vec_any_eq(vector unsigned short __a, vector unsigned short __b) { return __builtin_altivec_vcmpequh_p(__CR6_EQ_REV, (vector short)__a, (vector short)__b); } static __inline__ int __ATTRS_o_ai vec_any_eq(vector unsigned short __a, vector bool short __b) { return __builtin_altivec_vcmpequh_p(__CR6_EQ_REV, (vector short)__a, (vector short)__b); } static __inline__ int __ATTRS_o_ai vec_any_eq(vector bool short __a, vector short __b) { return __builtin_altivec_vcmpequh_p(__CR6_EQ_REV, (vector short)__a, (vector short)__b); } static __inline__ int __ATTRS_o_ai vec_any_eq(vector bool short __a, vector unsigned short __b) { return __builtin_altivec_vcmpequh_p(__CR6_EQ_REV, (vector short)__a, (vector short)__b); } static __inline__ int __ATTRS_o_ai vec_any_eq(vector bool short __a, vector bool short __b) { return __builtin_altivec_vcmpequh_p(__CR6_EQ_REV, (vector short)__a, (vector short)__b); } static __inline__ int __ATTRS_o_ai vec_any_eq(vector pixel __a, vector pixel __b) { return __builtin_altivec_vcmpequh_p(__CR6_EQ_REV, (vector short)__a, (vector short)__b); } static __inline__ int __ATTRS_o_ai vec_any_eq(vector int __a, vector int __b) { return __builtin_altivec_vcmpequw_p(__CR6_EQ_REV, __a, __b); } static __inline__ int __ATTRS_o_ai vec_any_eq(vector int __a, vector bool int __b) { return __builtin_altivec_vcmpequw_p(__CR6_EQ_REV, __a, (vector int)__b); } static __inline__ int __ATTRS_o_ai vec_any_eq(vector unsigned int __a, vector unsigned int __b) { return __builtin_altivec_vcmpequw_p(__CR6_EQ_REV, (vector int)__a, (vector int)__b); } static __inline__ int __ATTRS_o_ai vec_any_eq(vector unsigned int __a, vector bool int __b) { return __builtin_altivec_vcmpequw_p(__CR6_EQ_REV, (vector int)__a, (vector int)__b); } static __inline__ int __ATTRS_o_ai vec_any_eq(vector bool int __a, vector int __b) { return __builtin_altivec_vcmpequw_p(__CR6_EQ_REV, (vector int)__a, (vector int)__b); } static __inline__ int __ATTRS_o_ai vec_any_eq(vector bool int __a, vector unsigned int __b) { return __builtin_altivec_vcmpequw_p(__CR6_EQ_REV, (vector int)__a, (vector int)__b); } static __inline__ int __ATTRS_o_ai vec_any_eq(vector bool int __a, vector bool int __b) { return __builtin_altivec_vcmpequw_p(__CR6_EQ_REV, (vector int)__a, (vector int)__b); } #ifdef __VSX__ static __inline__ int __ATTRS_o_ai vec_any_eq(vector signed long long __a, vector signed long long __b) { return __builtin_altivec_vcmpequd_p(__CR6_EQ_REV, __a, __b); } static __inline__ int __ATTRS_o_ai vec_any_eq(vector unsigned long long __a, vector unsigned long long __b) { return __builtin_altivec_vcmpequd_p(__CR6_EQ_REV, (vector long long)__a, (vector long long)__b); } static __inline__ int __ATTRS_o_ai vec_any_eq(vector signed long long __a, vector bool long long __b) { return __builtin_altivec_vcmpequd_p(__CR6_EQ_REV, __a, (vector signed long long)__b); } static __inline__ int __ATTRS_o_ai vec_any_eq(vector unsigned long long __a, vector bool long long __b) { return __builtin_altivec_vcmpequd_p( __CR6_EQ_REV, (vector signed long long)__a, (vector signed long long)__b); } static __inline__ int __ATTRS_o_ai vec_any_eq(vector bool long long __a, vector signed long long __b) { return __builtin_altivec_vcmpequd_p( __CR6_EQ_REV, (vector signed long long)__a, (vector signed long long)__b); } static __inline__ int __ATTRS_o_ai vec_any_eq(vector bool long long __a, vector unsigned long long __b) { return __builtin_altivec_vcmpequd_p( __CR6_EQ_REV, (vector signed long long)__a, (vector signed long long)__b); } static __inline__ int __ATTRS_o_ai vec_any_eq(vector bool long long __a, vector bool long long __b) { return __builtin_altivec_vcmpequd_p( __CR6_EQ_REV, (vector signed long long)__a, (vector signed long long)__b); } #endif static __inline__ int __ATTRS_o_ai vec_any_eq(vector float __a, vector float __b) { #ifdef __VSX__ return __builtin_vsx_xvcmpeqsp_p(__CR6_EQ_REV, __a, __b); #else return __builtin_altivec_vcmpeqfp_p(__CR6_EQ_REV, __a, __b); #endif } #ifdef __VSX__ static __inline__ int __ATTRS_o_ai vec_any_eq(vector double __a, vector double __b) { return __builtin_vsx_xvcmpeqdp_p(__CR6_EQ_REV, __a, __b); } #endif #if defined(__POWER10_VECTOR__) && defined(__SIZEOF_INT128__) static __inline__ int __ATTRS_o_ai vec_any_eq(vector signed __int128 __a, vector signed __int128 __b) { return __builtin_altivec_vcmpequq_p(__CR6_EQ_REV, (vector unsigned __int128)__a, __b); } static __inline__ int __ATTRS_o_ai vec_any_eq(vector unsigned __int128 __a, vector unsigned __int128 __b) { return __builtin_altivec_vcmpequq_p(__CR6_EQ_REV, __a, (vector signed __int128)__b); } static __inline__ int __ATTRS_o_ai vec_any_eq(vector bool __int128 __a, vector bool __int128 __b) { return __builtin_altivec_vcmpequq_p( __CR6_EQ_REV, (vector unsigned __int128)__a, (vector signed __int128)__b); } #endif /* vec_any_ge */ static __inline__ int __ATTRS_o_ai vec_any_ge(vector signed char __a, vector signed char __b) { return __builtin_altivec_vcmpgtsb_p(__CR6_LT_REV, __b, __a); } static __inline__ int __ATTRS_o_ai vec_any_ge(vector signed char __a, vector bool char __b) { return __builtin_altivec_vcmpgtsb_p(__CR6_LT_REV, (vector signed char)__b, __a); } static __inline__ int __ATTRS_o_ai vec_any_ge(vector unsigned char __a, vector unsigned char __b) { return __builtin_altivec_vcmpgtub_p(__CR6_LT_REV, __b, __a); } static __inline__ int __ATTRS_o_ai vec_any_ge(vector unsigned char __a, vector bool char __b) { return __builtin_altivec_vcmpgtub_p(__CR6_LT_REV, (vector unsigned char)__b, __a); } static __inline__ int __ATTRS_o_ai vec_any_ge(vector bool char __a, vector signed char __b) { return __builtin_altivec_vcmpgtsb_p(__CR6_LT_REV, __b, (vector signed char)__a); } static __inline__ int __ATTRS_o_ai vec_any_ge(vector bool char __a, vector unsigned char __b) { return __builtin_altivec_vcmpgtub_p(__CR6_LT_REV, __b, (vector unsigned char)__a); } static __inline__ int __ATTRS_o_ai vec_any_ge(vector bool char __a, vector bool char __b) { return __builtin_altivec_vcmpgtub_p(__CR6_LT_REV, (vector unsigned char)__b, (vector unsigned char)__a); } static __inline__ int __ATTRS_o_ai vec_any_ge(vector short __a, vector short __b) { return __builtin_altivec_vcmpgtsh_p(__CR6_LT_REV, __b, __a); } static __inline__ int __ATTRS_o_ai vec_any_ge(vector short __a, vector bool short __b) { return __builtin_altivec_vcmpgtsh_p(__CR6_LT_REV, (vector short)__b, __a); } static __inline__ int __ATTRS_o_ai vec_any_ge(vector unsigned short __a, vector unsigned short __b) { return __builtin_altivec_vcmpgtuh_p(__CR6_LT_REV, __b, __a); } static __inline__ int __ATTRS_o_ai vec_any_ge(vector unsigned short __a, vector bool short __b) { return __builtin_altivec_vcmpgtuh_p(__CR6_LT_REV, (vector unsigned short)__b, __a); } static __inline__ int __ATTRS_o_ai vec_any_ge(vector bool short __a, vector short __b) { return __builtin_altivec_vcmpgtsh_p(__CR6_LT_REV, __b, (vector signed short)__a); } static __inline__ int __ATTRS_o_ai vec_any_ge(vector bool short __a, vector unsigned short __b) { return __builtin_altivec_vcmpgtuh_p(__CR6_LT_REV, __b, (vector unsigned short)__a); } static __inline__ int __ATTRS_o_ai vec_any_ge(vector bool short __a, vector bool short __b) { return __builtin_altivec_vcmpgtuh_p(__CR6_LT_REV, (vector unsigned short)__b, (vector unsigned short)__a); } static __inline__ int __ATTRS_o_ai vec_any_ge(vector int __a, vector int __b) { return __builtin_altivec_vcmpgtsw_p(__CR6_LT_REV, __b, __a); } static __inline__ int __ATTRS_o_ai vec_any_ge(vector int __a, vector bool int __b) { return __builtin_altivec_vcmpgtsw_p(__CR6_LT_REV, (vector int)__b, __a); } static __inline__ int __ATTRS_o_ai vec_any_ge(vector unsigned int __a, vector unsigned int __b) { return __builtin_altivec_vcmpgtuw_p(__CR6_LT_REV, __b, __a); } static __inline__ int __ATTRS_o_ai vec_any_ge(vector unsigned int __a, vector bool int __b) { return __builtin_altivec_vcmpgtuw_p(__CR6_LT_REV, (vector unsigned int)__b, __a); } static __inline__ int __ATTRS_o_ai vec_any_ge(vector bool int __a, vector int __b) { return __builtin_altivec_vcmpgtsw_p(__CR6_LT_REV, __b, (vector signed int)__a); } static __inline__ int __ATTRS_o_ai vec_any_ge(vector bool int __a, vector unsigned int __b) { return __builtin_altivec_vcmpgtuw_p(__CR6_LT_REV, __b, (vector unsigned int)__a); } static __inline__ int __ATTRS_o_ai vec_any_ge(vector bool int __a, vector bool int __b) { return __builtin_altivec_vcmpgtuw_p(__CR6_LT_REV, (vector unsigned int)__b, (vector unsigned int)__a); } #ifdef __VSX__ static __inline__ int __ATTRS_o_ai vec_any_ge(vector signed long long __a, vector signed long long __b) { return __builtin_altivec_vcmpgtsd_p(__CR6_LT_REV, __b, __a); } static __inline__ int __ATTRS_o_ai vec_any_ge(vector unsigned long long __a, vector unsigned long long __b) { return __builtin_altivec_vcmpgtud_p(__CR6_LT_REV, __b, __a); } static __inline__ int __ATTRS_o_ai vec_any_ge(vector signed long long __a, vector bool long long __b) { return __builtin_altivec_vcmpgtsd_p(__CR6_LT_REV, (vector signed long long)__b, __a); } static __inline__ int __ATTRS_o_ai vec_any_ge(vector unsigned long long __a, vector bool long long __b) { return __builtin_altivec_vcmpgtud_p(__CR6_LT_REV, (vector unsigned long long)__b, __a); } static __inline__ int __ATTRS_o_ai vec_any_ge(vector bool long long __a, vector signed long long __b) { return __builtin_altivec_vcmpgtsd_p(__CR6_LT_REV, __b, (vector signed long long)__a); } static __inline__ int __ATTRS_o_ai vec_any_ge(vector bool long long __a, vector unsigned long long __b) { return __builtin_altivec_vcmpgtud_p(__CR6_LT_REV, __b, (vector unsigned long long)__a); } static __inline__ int __ATTRS_o_ai vec_any_ge(vector bool long long __a, vector bool long long __b) { return __builtin_altivec_vcmpgtud_p(__CR6_LT_REV, (vector unsigned long long)__b, (vector unsigned long long)__a); } #endif static __inline__ int __ATTRS_o_ai vec_any_ge(vector float __a, vector float __b) { #ifdef __VSX__ return __builtin_vsx_xvcmpgesp_p(__CR6_EQ_REV, __a, __b); #else return __builtin_altivec_vcmpgefp_p(__CR6_EQ_REV, __a, __b); #endif } #ifdef __VSX__ static __inline__ int __ATTRS_o_ai vec_any_ge(vector double __a, vector double __b) { return __builtin_vsx_xvcmpgedp_p(__CR6_EQ_REV, __a, __b); } #endif #if defined(__POWER10_VECTOR__) && defined(__SIZEOF_INT128__) static __inline__ int __ATTRS_o_ai vec_any_ge(vector signed __int128 __a, vector signed __int128 __b) { return __builtin_altivec_vcmpgtsq_p(__CR6_LT_REV, __b, __a); } static __inline__ int __ATTRS_o_ai vec_any_ge(vector unsigned __int128 __a, vector unsigned __int128 __b) { return __builtin_altivec_vcmpgtuq_p(__CR6_LT_REV, __b, __a); } #endif /* vec_any_gt */ static __inline__ int __ATTRS_o_ai vec_any_gt(vector signed char __a, vector signed char __b) { return __builtin_altivec_vcmpgtsb_p(__CR6_EQ_REV, __a, __b); } static __inline__ int __ATTRS_o_ai vec_any_gt(vector signed char __a, vector bool char __b) { return __builtin_altivec_vcmpgtsb_p(__CR6_EQ_REV, __a, (vector signed char)__b); } static __inline__ int __ATTRS_o_ai vec_any_gt(vector unsigned char __a, vector unsigned char __b) { return __builtin_altivec_vcmpgtub_p(__CR6_EQ_REV, __a, __b); } static __inline__ int __ATTRS_o_ai vec_any_gt(vector unsigned char __a, vector bool char __b) { return __builtin_altivec_vcmpgtub_p(__CR6_EQ_REV, __a, (vector unsigned char)__b); } static __inline__ int __ATTRS_o_ai vec_any_gt(vector bool char __a, vector signed char __b) { return __builtin_altivec_vcmpgtsb_p(__CR6_EQ_REV, (vector signed char)__a, __b); } static __inline__ int __ATTRS_o_ai vec_any_gt(vector bool char __a, vector unsigned char __b) { return __builtin_altivec_vcmpgtub_p(__CR6_EQ_REV, (vector unsigned char)__a, __b); } static __inline__ int __ATTRS_o_ai vec_any_gt(vector bool char __a, vector bool char __b) { return __builtin_altivec_vcmpgtub_p(__CR6_EQ_REV, (vector unsigned char)__a, (vector unsigned char)__b); } static __inline__ int __ATTRS_o_ai vec_any_gt(vector short __a, vector short __b) { return __builtin_altivec_vcmpgtsh_p(__CR6_EQ_REV, __a, __b); } static __inline__ int __ATTRS_o_ai vec_any_gt(vector short __a, vector bool short __b) { return __builtin_altivec_vcmpgtsh_p(__CR6_EQ_REV, __a, (vector short)__b); } static __inline__ int __ATTRS_o_ai vec_any_gt(vector unsigned short __a, vector unsigned short __b) { return __builtin_altivec_vcmpgtuh_p(__CR6_EQ_REV, __a, __b); } static __inline__ int __ATTRS_o_ai vec_any_gt(vector unsigned short __a, vector bool short __b) { return __builtin_altivec_vcmpgtuh_p(__CR6_EQ_REV, __a, (vector unsigned short)__b); } static __inline__ int __ATTRS_o_ai vec_any_gt(vector bool short __a, vector short __b) { return __builtin_altivec_vcmpgtsh_p(__CR6_EQ_REV, (vector signed short)__a, __b); } static __inline__ int __ATTRS_o_ai vec_any_gt(vector bool short __a, vector unsigned short __b) { return __builtin_altivec_vcmpgtuh_p(__CR6_EQ_REV, (vector unsigned short)__a, __b); } static __inline__ int __ATTRS_o_ai vec_any_gt(vector bool short __a, vector bool short __b) { return __builtin_altivec_vcmpgtuh_p(__CR6_EQ_REV, (vector unsigned short)__a, (vector unsigned short)__b); } static __inline__ int __ATTRS_o_ai vec_any_gt(vector int __a, vector int __b) { return __builtin_altivec_vcmpgtsw_p(__CR6_EQ_REV, __a, __b); } static __inline__ int __ATTRS_o_ai vec_any_gt(vector int __a, vector bool int __b) { return __builtin_altivec_vcmpgtsw_p(__CR6_EQ_REV, __a, (vector int)__b); } static __inline__ int __ATTRS_o_ai vec_any_gt(vector unsigned int __a, vector unsigned int __b) { return __builtin_altivec_vcmpgtuw_p(__CR6_EQ_REV, __a, __b); } static __inline__ int __ATTRS_o_ai vec_any_gt(vector unsigned int __a, vector bool int __b) { return __builtin_altivec_vcmpgtuw_p(__CR6_EQ_REV, __a, (vector unsigned int)__b); } static __inline__ int __ATTRS_o_ai vec_any_gt(vector bool int __a, vector int __b) { return __builtin_altivec_vcmpgtsw_p(__CR6_EQ_REV, (vector signed int)__a, __b); } static __inline__ int __ATTRS_o_ai vec_any_gt(vector bool int __a, vector unsigned int __b) { return __builtin_altivec_vcmpgtuw_p(__CR6_EQ_REV, (vector unsigned int)__a, __b); } static __inline__ int __ATTRS_o_ai vec_any_gt(vector bool int __a, vector bool int __b) { return __builtin_altivec_vcmpgtuw_p(__CR6_EQ_REV, (vector unsigned int)__a, (vector unsigned int)__b); } #ifdef __VSX__ static __inline__ int __ATTRS_o_ai vec_any_gt(vector signed long long __a, vector signed long long __b) { return __builtin_altivec_vcmpgtsd_p(__CR6_EQ_REV, __a, __b); } static __inline__ int __ATTRS_o_ai vec_any_gt(vector unsigned long long __a, vector unsigned long long __b) { return __builtin_altivec_vcmpgtud_p(__CR6_EQ_REV, __a, __b); } static __inline__ int __ATTRS_o_ai vec_any_gt(vector signed long long __a, vector bool long long __b) { return __builtin_altivec_vcmpgtsd_p(__CR6_EQ_REV, __a, (vector signed long long)__b); } static __inline__ int __ATTRS_o_ai vec_any_gt(vector unsigned long long __a, vector bool long long __b) { return __builtin_altivec_vcmpgtud_p(__CR6_EQ_REV, __a, (vector unsigned long long)__b); } static __inline__ int __ATTRS_o_ai vec_any_gt(vector bool long long __a, vector signed long long __b) { return __builtin_altivec_vcmpgtsd_p(__CR6_EQ_REV, (vector signed long long)__a, __b); } static __inline__ int __ATTRS_o_ai vec_any_gt(vector bool long long __a, vector unsigned long long __b) { return __builtin_altivec_vcmpgtud_p(__CR6_EQ_REV, (vector unsigned long long)__a, __b); } static __inline__ int __ATTRS_o_ai vec_any_gt(vector bool long long __a, vector bool long long __b) { return __builtin_altivec_vcmpgtud_p(__CR6_EQ_REV, (vector unsigned long long)__a, (vector unsigned long long)__b); } #endif static __inline__ int __ATTRS_o_ai vec_any_gt(vector float __a, vector float __b) { #ifdef __VSX__ return __builtin_vsx_xvcmpgtsp_p(__CR6_EQ_REV, __a, __b); #else return __builtin_altivec_vcmpgtfp_p(__CR6_EQ_REV, __a, __b); #endif } #ifdef __VSX__ static __inline__ int __ATTRS_o_ai vec_any_gt(vector double __a, vector double __b) { return __builtin_vsx_xvcmpgtdp_p(__CR6_EQ_REV, __a, __b); } #endif #if defined(__POWER10_VECTOR__) && defined(__SIZEOF_INT128__) static __inline__ int __ATTRS_o_ai vec_any_gt(vector signed __int128 __a, vector signed __int128 __b) { return __builtin_altivec_vcmpgtsq_p(__CR6_EQ_REV, __a, __b); } static __inline__ int __ATTRS_o_ai vec_any_gt(vector unsigned __int128 __a, vector unsigned __int128 __b) { return __builtin_altivec_vcmpgtuq_p(__CR6_EQ_REV, __a, __b); } #endif /* vec_any_le */ static __inline__ int __ATTRS_o_ai vec_any_le(vector signed char __a, vector signed char __b) { return __builtin_altivec_vcmpgtsb_p(__CR6_LT_REV, __a, __b); } static __inline__ int __ATTRS_o_ai vec_any_le(vector signed char __a, vector bool char __b) { return __builtin_altivec_vcmpgtsb_p(__CR6_LT_REV, __a, (vector signed char)__b); } static __inline__ int __ATTRS_o_ai vec_any_le(vector unsigned char __a, vector unsigned char __b) { return __builtin_altivec_vcmpgtub_p(__CR6_LT_REV, __a, __b); } static __inline__ int __ATTRS_o_ai vec_any_le(vector unsigned char __a, vector bool char __b) { return __builtin_altivec_vcmpgtub_p(__CR6_LT_REV, __a, (vector unsigned char)__b); } static __inline__ int __ATTRS_o_ai vec_any_le(vector bool char __a, vector signed char __b) { return __builtin_altivec_vcmpgtsb_p(__CR6_LT_REV, (vector signed char)__a, __b); } static __inline__ int __ATTRS_o_ai vec_any_le(vector bool char __a, vector unsigned char __b) { return __builtin_altivec_vcmpgtub_p(__CR6_LT_REV, (vector unsigned char)__a, __b); } static __inline__ int __ATTRS_o_ai vec_any_le(vector bool char __a, vector bool char __b) { return __builtin_altivec_vcmpgtub_p(__CR6_LT_REV, (vector unsigned char)__a, (vector unsigned char)__b); } static __inline__ int __ATTRS_o_ai vec_any_le(vector short __a, vector short __b) { return __builtin_altivec_vcmpgtsh_p(__CR6_LT_REV, __a, __b); } static __inline__ int __ATTRS_o_ai vec_any_le(vector short __a, vector bool short __b) { return __builtin_altivec_vcmpgtsh_p(__CR6_LT_REV, __a, (vector short)__b); } static __inline__ int __ATTRS_o_ai vec_any_le(vector unsigned short __a, vector unsigned short __b) { return __builtin_altivec_vcmpgtuh_p(__CR6_LT_REV, __a, __b); } static __inline__ int __ATTRS_o_ai vec_any_le(vector unsigned short __a, vector bool short __b) { return __builtin_altivec_vcmpgtuh_p(__CR6_LT_REV, __a, (vector unsigned short)__b); } static __inline__ int __ATTRS_o_ai vec_any_le(vector bool short __a, vector short __b) { return __builtin_altivec_vcmpgtsh_p(__CR6_LT_REV, (vector signed short)__a, __b); } static __inline__ int __ATTRS_o_ai vec_any_le(vector bool short __a, vector unsigned short __b) { return __builtin_altivec_vcmpgtuh_p(__CR6_LT_REV, (vector unsigned short)__a, __b); } static __inline__ int __ATTRS_o_ai vec_any_le(vector bool short __a, vector bool short __b) { return __builtin_altivec_vcmpgtuh_p(__CR6_LT_REV, (vector unsigned short)__a, (vector unsigned short)__b); } static __inline__ int __ATTRS_o_ai vec_any_le(vector int __a, vector int __b) { return __builtin_altivec_vcmpgtsw_p(__CR6_LT_REV, __a, __b); } static __inline__ int __ATTRS_o_ai vec_any_le(vector int __a, vector bool int __b) { return __builtin_altivec_vcmpgtsw_p(__CR6_LT_REV, __a, (vector int)__b); } static __inline__ int __ATTRS_o_ai vec_any_le(vector unsigned int __a, vector unsigned int __b) { return __builtin_altivec_vcmpgtuw_p(__CR6_LT_REV, __a, __b); } static __inline__ int __ATTRS_o_ai vec_any_le(vector unsigned int __a, vector bool int __b) { return __builtin_altivec_vcmpgtuw_p(__CR6_LT_REV, __a, (vector unsigned int)__b); } static __inline__ int __ATTRS_o_ai vec_any_le(vector bool int __a, vector int __b) { return __builtin_altivec_vcmpgtsw_p(__CR6_LT_REV, (vector signed int)__a, __b); } static __inline__ int __ATTRS_o_ai vec_any_le(vector bool int __a, vector unsigned int __b) { return __builtin_altivec_vcmpgtuw_p(__CR6_LT_REV, (vector unsigned int)__a, __b); } static __inline__ int __ATTRS_o_ai vec_any_le(vector bool int __a, vector bool int __b) { return __builtin_altivec_vcmpgtuw_p(__CR6_LT_REV, (vector unsigned int)__a, (vector unsigned int)__b); } #ifdef __VSX__ static __inline__ int __ATTRS_o_ai vec_any_le(vector signed long long __a, vector signed long long __b) { return __builtin_altivec_vcmpgtsd_p(__CR6_LT_REV, __a, __b); } static __inline__ int __ATTRS_o_ai vec_any_le(vector unsigned long long __a, vector unsigned long long __b) { return __builtin_altivec_vcmpgtud_p(__CR6_LT_REV, __a, __b); } static __inline__ int __ATTRS_o_ai vec_any_le(vector signed long long __a, vector bool long long __b) { return __builtin_altivec_vcmpgtsd_p(__CR6_LT_REV, __a, (vector signed long long)__b); } static __inline__ int __ATTRS_o_ai vec_any_le(vector unsigned long long __a, vector bool long long __b) { return __builtin_altivec_vcmpgtud_p(__CR6_LT_REV, __a, (vector unsigned long long)__b); } static __inline__ int __ATTRS_o_ai vec_any_le(vector bool long long __a, vector signed long long __b) { return __builtin_altivec_vcmpgtsd_p(__CR6_LT_REV, (vector signed long long)__a, __b); } static __inline__ int __ATTRS_o_ai vec_any_le(vector bool long long __a, vector unsigned long long __b) { return __builtin_altivec_vcmpgtud_p(__CR6_LT_REV, (vector unsigned long long)__a, __b); } static __inline__ int __ATTRS_o_ai vec_any_le(vector bool long long __a, vector bool long long __b) { return __builtin_altivec_vcmpgtud_p(__CR6_LT_REV, (vector unsigned long long)__a, (vector unsigned long long)__b); } #endif static __inline__ int __ATTRS_o_ai vec_any_le(vector float __a, vector float __b) { #ifdef __VSX__ return __builtin_vsx_xvcmpgesp_p(__CR6_EQ_REV, __b, __a); #else return __builtin_altivec_vcmpgefp_p(__CR6_EQ_REV, __b, __a); #endif } #ifdef __VSX__ static __inline__ int __ATTRS_o_ai vec_any_le(vector double __a, vector double __b) { return __builtin_vsx_xvcmpgedp_p(__CR6_EQ_REV, __b, __a); } #endif #if defined(__POWER10_VECTOR__) && defined(__SIZEOF_INT128__) static __inline__ int __ATTRS_o_ai vec_any_le(vector signed __int128 __a, vector signed __int128 __b) { return __builtin_altivec_vcmpgtsq_p(__CR6_LT_REV, __a, __b); } static __inline__ int __ATTRS_o_ai vec_any_le(vector unsigned __int128 __a, vector unsigned __int128 __b) { return __builtin_altivec_vcmpgtuq_p(__CR6_LT_REV, __a, __b); } #endif /* vec_any_lt */ static __inline__ int __ATTRS_o_ai vec_any_lt(vector signed char __a, vector signed char __b) { return __builtin_altivec_vcmpgtsb_p(__CR6_EQ_REV, __b, __a); } static __inline__ int __ATTRS_o_ai vec_any_lt(vector signed char __a, vector bool char __b) { return __builtin_altivec_vcmpgtsb_p(__CR6_EQ_REV, (vector signed char)__b, __a); } static __inline__ int __ATTRS_o_ai vec_any_lt(vector unsigned char __a, vector unsigned char __b) { return __builtin_altivec_vcmpgtub_p(__CR6_EQ_REV, __b, __a); } static __inline__ int __ATTRS_o_ai vec_any_lt(vector unsigned char __a, vector bool char __b) { return __builtin_altivec_vcmpgtub_p(__CR6_EQ_REV, (vector unsigned char)__b, __a); } static __inline__ int __ATTRS_o_ai vec_any_lt(vector bool char __a, vector signed char __b) { return __builtin_altivec_vcmpgtsb_p(__CR6_EQ_REV, __b, (vector signed char)__a); } static __inline__ int __ATTRS_o_ai vec_any_lt(vector bool char __a, vector unsigned char __b) { return __builtin_altivec_vcmpgtub_p(__CR6_EQ_REV, __b, (vector unsigned char)__a); } static __inline__ int __ATTRS_o_ai vec_any_lt(vector bool char __a, vector bool char __b) { return __builtin_altivec_vcmpgtub_p(__CR6_EQ_REV, (vector unsigned char)__b, (vector unsigned char)__a); } static __inline__ int __ATTRS_o_ai vec_any_lt(vector short __a, vector short __b) { return __builtin_altivec_vcmpgtsh_p(__CR6_EQ_REV, __b, __a); } static __inline__ int __ATTRS_o_ai vec_any_lt(vector short __a, vector bool short __b) { return __builtin_altivec_vcmpgtsh_p(__CR6_EQ_REV, (vector short)__b, __a); } static __inline__ int __ATTRS_o_ai vec_any_lt(vector unsigned short __a, vector unsigned short __b) { return __builtin_altivec_vcmpgtuh_p(__CR6_EQ_REV, __b, __a); } static __inline__ int __ATTRS_o_ai vec_any_lt(vector unsigned short __a, vector bool short __b) { return __builtin_altivec_vcmpgtuh_p(__CR6_EQ_REV, (vector unsigned short)__b, __a); } static __inline__ int __ATTRS_o_ai vec_any_lt(vector bool short __a, vector short __b) { return __builtin_altivec_vcmpgtsh_p(__CR6_EQ_REV, __b, (vector signed short)__a); } static __inline__ int __ATTRS_o_ai vec_any_lt(vector bool short __a, vector unsigned short __b) { return __builtin_altivec_vcmpgtuh_p(__CR6_EQ_REV, __b, (vector unsigned short)__a); } static __inline__ int __ATTRS_o_ai vec_any_lt(vector bool short __a, vector bool short __b) { return __builtin_altivec_vcmpgtuh_p(__CR6_EQ_REV, (vector unsigned short)__b, (vector unsigned short)__a); } static __inline__ int __ATTRS_o_ai vec_any_lt(vector int __a, vector int __b) { return __builtin_altivec_vcmpgtsw_p(__CR6_EQ_REV, __b, __a); } static __inline__ int __ATTRS_o_ai vec_any_lt(vector int __a, vector bool int __b) { return __builtin_altivec_vcmpgtsw_p(__CR6_EQ_REV, (vector int)__b, __a); } static __inline__ int __ATTRS_o_ai vec_any_lt(vector unsigned int __a, vector unsigned int __b) { return __builtin_altivec_vcmpgtuw_p(__CR6_EQ_REV, __b, __a); } static __inline__ int __ATTRS_o_ai vec_any_lt(vector unsigned int __a, vector bool int __b) { return __builtin_altivec_vcmpgtuw_p(__CR6_EQ_REV, (vector unsigned int)__b, __a); } static __inline__ int __ATTRS_o_ai vec_any_lt(vector bool int __a, vector int __b) { return __builtin_altivec_vcmpgtsw_p(__CR6_EQ_REV, __b, (vector signed int)__a); } static __inline__ int __ATTRS_o_ai vec_any_lt(vector bool int __a, vector unsigned int __b) { return __builtin_altivec_vcmpgtuw_p(__CR6_EQ_REV, __b, (vector unsigned int)__a); } static __inline__ int __ATTRS_o_ai vec_any_lt(vector bool int __a, vector bool int __b) { return __builtin_altivec_vcmpgtuw_p(__CR6_EQ_REV, (vector unsigned int)__b, (vector unsigned int)__a); } #ifdef __VSX__ static __inline__ int __ATTRS_o_ai vec_any_lt(vector signed long long __a, vector signed long long __b) { return __builtin_altivec_vcmpgtsd_p(__CR6_EQ_REV, __b, __a); } static __inline__ int __ATTRS_o_ai vec_any_lt(vector unsigned long long __a, vector unsigned long long __b) { return __builtin_altivec_vcmpgtud_p(__CR6_EQ_REV, __b, __a); } static __inline__ int __ATTRS_o_ai vec_any_lt(vector signed long long __a, vector bool long long __b) { return __builtin_altivec_vcmpgtsd_p(__CR6_EQ_REV, (vector signed long long)__b, __a); } static __inline__ int __ATTRS_o_ai vec_any_lt(vector unsigned long long __a, vector bool long long __b) { return __builtin_altivec_vcmpgtud_p(__CR6_EQ_REV, (vector unsigned long long)__b, __a); } static __inline__ int __ATTRS_o_ai vec_any_lt(vector bool long long __a, vector signed long long __b) { return __builtin_altivec_vcmpgtsd_p(__CR6_EQ_REV, __b, (vector signed long long)__a); } static __inline__ int __ATTRS_o_ai vec_any_lt(vector bool long long __a, vector unsigned long long __b) { return __builtin_altivec_vcmpgtud_p(__CR6_EQ_REV, __b, (vector unsigned long long)__a); } static __inline__ int __ATTRS_o_ai vec_any_lt(vector bool long long __a, vector bool long long __b) { return __builtin_altivec_vcmpgtud_p(__CR6_EQ_REV, (vector unsigned long long)__b, (vector unsigned long long)__a); } #endif static __inline__ int __ATTRS_o_ai vec_any_lt(vector float __a, vector float __b) { #ifdef __VSX__ return __builtin_vsx_xvcmpgtsp_p(__CR6_EQ_REV, __b, __a); #else return __builtin_altivec_vcmpgtfp_p(__CR6_EQ_REV, __b, __a); #endif } #ifdef __VSX__ static __inline__ int __ATTRS_o_ai vec_any_lt(vector double __a, vector double __b) { return __builtin_vsx_xvcmpgtdp_p(__CR6_EQ_REV, __b, __a); } #endif #if defined(__POWER10_VECTOR__) && defined(__SIZEOF_INT128__) static __inline__ int __ATTRS_o_ai vec_any_lt(vector signed __int128 __a, vector signed __int128 __b) { return __builtin_altivec_vcmpgtsq_p(__CR6_EQ_REV, __b, __a); } static __inline__ int __ATTRS_o_ai vec_any_lt(vector unsigned __int128 __a, vector unsigned __int128 __b) { return __builtin_altivec_vcmpgtuq_p(__CR6_EQ_REV, __b, __a); } #endif /* vec_any_nan */ static __inline__ int __ATTRS_o_ai vec_any_nan(vector float __a) { #ifdef __VSX__ return __builtin_vsx_xvcmpeqsp_p(__CR6_LT_REV, __a, __a); #else return __builtin_altivec_vcmpeqfp_p(__CR6_LT_REV, __a, __a); #endif } #ifdef __VSX__ static __inline__ int __ATTRS_o_ai vec_any_nan(vector double __a) { return __builtin_vsx_xvcmpeqdp_p(__CR6_LT_REV, __a, __a); } #endif /* vec_any_ne */ static __inline__ int __ATTRS_o_ai vec_any_ne(vector signed char __a, vector signed char __b) { return __builtin_altivec_vcmpequb_p(__CR6_LT_REV, (vector char)__a, (vector char)__b); } static __inline__ int __ATTRS_o_ai vec_any_ne(vector signed char __a, vector bool char __b) { return __builtin_altivec_vcmpequb_p(__CR6_LT_REV, (vector char)__a, (vector char)__b); } static __inline__ int __ATTRS_o_ai vec_any_ne(vector unsigned char __a, vector unsigned char __b) { return __builtin_altivec_vcmpequb_p(__CR6_LT_REV, (vector char)__a, (vector char)__b); } static __inline__ int __ATTRS_o_ai vec_any_ne(vector unsigned char __a, vector bool char __b) { return __builtin_altivec_vcmpequb_p(__CR6_LT_REV, (vector char)__a, (vector char)__b); } static __inline__ int __ATTRS_o_ai vec_any_ne(vector bool char __a, vector signed char __b) { return __builtin_altivec_vcmpequb_p(__CR6_LT_REV, (vector char)__a, (vector char)__b); } static __inline__ int __ATTRS_o_ai vec_any_ne(vector bool char __a, vector unsigned char __b) { return __builtin_altivec_vcmpequb_p(__CR6_LT_REV, (vector char)__a, (vector char)__b); } static __inline__ int __ATTRS_o_ai vec_any_ne(vector bool char __a, vector bool char __b) { return __builtin_altivec_vcmpequb_p(__CR6_LT_REV, (vector char)__a, (vector char)__b); } static __inline__ int __ATTRS_o_ai vec_any_ne(vector short __a, vector short __b) { return __builtin_altivec_vcmpequh_p(__CR6_LT_REV, __a, __b); } static __inline__ int __ATTRS_o_ai vec_any_ne(vector short __a, vector bool short __b) { return __builtin_altivec_vcmpequh_p(__CR6_LT_REV, __a, (vector short)__b); } static __inline__ int __ATTRS_o_ai vec_any_ne(vector unsigned short __a, vector unsigned short __b) { return __builtin_altivec_vcmpequh_p(__CR6_LT_REV, (vector short)__a, (vector short)__b); } static __inline__ int __ATTRS_o_ai vec_any_ne(vector unsigned short __a, vector bool short __b) { return __builtin_altivec_vcmpequh_p(__CR6_LT_REV, (vector short)__a, (vector short)__b); } static __inline__ int __ATTRS_o_ai vec_any_ne(vector bool short __a, vector short __b) { return __builtin_altivec_vcmpequh_p(__CR6_LT_REV, (vector short)__a, (vector short)__b); } static __inline__ int __ATTRS_o_ai vec_any_ne(vector bool short __a, vector unsigned short __b) { return __builtin_altivec_vcmpequh_p(__CR6_LT_REV, (vector short)__a, (vector short)__b); } static __inline__ int __ATTRS_o_ai vec_any_ne(vector bool short __a, vector bool short __b) { return __builtin_altivec_vcmpequh_p(__CR6_LT_REV, (vector short)__a, (vector short)__b); } static __inline__ int __ATTRS_o_ai vec_any_ne(vector pixel __a, vector pixel __b) { return __builtin_altivec_vcmpequh_p(__CR6_LT_REV, (vector short)__a, (vector short)__b); } static __inline__ int __ATTRS_o_ai vec_any_ne(vector int __a, vector int __b) { return __builtin_altivec_vcmpequw_p(__CR6_LT_REV, __a, __b); } static __inline__ int __ATTRS_o_ai vec_any_ne(vector int __a, vector bool int __b) { return __builtin_altivec_vcmpequw_p(__CR6_LT_REV, __a, (vector int)__b); } static __inline__ int __ATTRS_o_ai vec_any_ne(vector unsigned int __a, vector unsigned int __b) { return __builtin_altivec_vcmpequw_p(__CR6_LT_REV, (vector int)__a, (vector int)__b); } static __inline__ int __ATTRS_o_ai vec_any_ne(vector unsigned int __a, vector bool int __b) { return __builtin_altivec_vcmpequw_p(__CR6_LT_REV, (vector int)__a, (vector int)__b); } static __inline__ int __ATTRS_o_ai vec_any_ne(vector bool int __a, vector int __b) { return __builtin_altivec_vcmpequw_p(__CR6_LT_REV, (vector int)__a, (vector int)__b); } static __inline__ int __ATTRS_o_ai vec_any_ne(vector bool int __a, vector unsigned int __b) { return __builtin_altivec_vcmpequw_p(__CR6_LT_REV, (vector int)__a, (vector int)__b); } static __inline__ int __ATTRS_o_ai vec_any_ne(vector bool int __a, vector bool int __b) { return __builtin_altivec_vcmpequw_p(__CR6_LT_REV, (vector int)__a, (vector int)__b); } #ifdef __VSX__ static __inline__ int __ATTRS_o_ai vec_any_ne(vector signed long long __a, vector signed long long __b) { #ifdef __POWER8_VECTOR__ return __builtin_altivec_vcmpequd_p(__CR6_LT_REV, __a, __b); #else // Take advantage of the optimized sequence for vec_all_eq when vcmpequd is // not available. return !vec_all_eq(__a, __b); #endif } static __inline__ int __ATTRS_o_ai vec_any_ne(vector unsigned long long __a, vector unsigned long long __b) { return vec_any_ne((vector signed long long)__a, (vector signed long long)__b); } static __inline__ int __ATTRS_o_ai vec_any_ne(vector signed long long __a, vector bool long long __b) { return vec_any_ne((vector signed long long)__a, (vector signed long long)__b); } static __inline__ int __ATTRS_o_ai vec_any_ne(vector unsigned long long __a, vector bool long long __b) { return vec_any_ne((vector signed long long)__a, (vector signed long long)__b); } static __inline__ int __ATTRS_o_ai vec_any_ne(vector bool long long __a, vector signed long long __b) { return vec_any_ne((vector signed long long)__a, (vector signed long long)__b); } static __inline__ int __ATTRS_o_ai vec_any_ne(vector bool long long __a, vector unsigned long long __b) { return vec_any_ne((vector signed long long)__a, (vector signed long long)__b); } static __inline__ int __ATTRS_o_ai vec_any_ne(vector bool long long __a, vector bool long long __b) { return vec_any_ne((vector signed long long)__a, (vector signed long long)__b); } #endif static __inline__ int __ATTRS_o_ai vec_any_ne(vector float __a, vector float __b) { #ifdef __VSX__ return __builtin_vsx_xvcmpeqsp_p(__CR6_LT_REV, __a, __b); #else return __builtin_altivec_vcmpeqfp_p(__CR6_LT_REV, __a, __b); #endif } #ifdef __VSX__ static __inline__ int __ATTRS_o_ai vec_any_ne(vector double __a, vector double __b) { return __builtin_vsx_xvcmpeqdp_p(__CR6_LT_REV, __a, __b); } #endif #if defined(__POWER10_VECTOR__) && defined(__SIZEOF_INT128__) static __inline__ int __ATTRS_o_ai vec_any_ne(vector signed __int128 __a, vector signed __int128 __b) { return __builtin_altivec_vcmpequq_p(__CR6_LT_REV, (vector unsigned __int128)__a, __b); } static __inline__ int __ATTRS_o_ai vec_any_ne(vector unsigned __int128 __a, vector unsigned __int128 __b) { return __builtin_altivec_vcmpequq_p(__CR6_LT_REV, __a, (vector signed __int128)__b); } static __inline__ int __ATTRS_o_ai vec_any_ne(vector bool __int128 __a, vector bool __int128 __b) { return __builtin_altivec_vcmpequq_p( __CR6_LT_REV, (vector unsigned __int128)__a, (vector signed __int128)__b); } #endif /* vec_any_nge */ static __inline__ int __ATTRS_o_ai vec_any_nge(vector float __a, vector float __b) { #ifdef __VSX__ return __builtin_vsx_xvcmpgesp_p(__CR6_LT_REV, __a, __b); #else return __builtin_altivec_vcmpgefp_p(__CR6_LT_REV, __a, __b); #endif } #ifdef __VSX__ static __inline__ int __ATTRS_o_ai vec_any_nge(vector double __a, vector double __b) { return __builtin_vsx_xvcmpgedp_p(__CR6_LT_REV, __a, __b); } #endif /* vec_any_ngt */ static __inline__ int __ATTRS_o_ai vec_any_ngt(vector float __a, vector float __b) { #ifdef __VSX__ return __builtin_vsx_xvcmpgtsp_p(__CR6_LT_REV, __a, __b); #else return __builtin_altivec_vcmpgtfp_p(__CR6_LT_REV, __a, __b); #endif } #ifdef __VSX__ static __inline__ int __ATTRS_o_ai vec_any_ngt(vector double __a, vector double __b) { return __builtin_vsx_xvcmpgtdp_p(__CR6_LT_REV, __a, __b); } #endif /* vec_any_nle */ static __inline__ int __ATTRS_o_ai vec_any_nle(vector float __a, vector float __b) { #ifdef __VSX__ return __builtin_vsx_xvcmpgesp_p(__CR6_LT_REV, __b, __a); #else return __builtin_altivec_vcmpgefp_p(__CR6_LT_REV, __b, __a); #endif } #ifdef __VSX__ static __inline__ int __ATTRS_o_ai vec_any_nle(vector double __a, vector double __b) { return __builtin_vsx_xvcmpgedp_p(__CR6_LT_REV, __b, __a); } #endif /* vec_any_nlt */ static __inline__ int __ATTRS_o_ai vec_any_nlt(vector float __a, vector float __b) { #ifdef __VSX__ return __builtin_vsx_xvcmpgtsp_p(__CR6_LT_REV, __b, __a); #else return __builtin_altivec_vcmpgtfp_p(__CR6_LT_REV, __b, __a); #endif } #ifdef __VSX__ static __inline__ int __ATTRS_o_ai vec_any_nlt(vector double __a, vector double __b) { return __builtin_vsx_xvcmpgtdp_p(__CR6_LT_REV, __b, __a); } #endif /* vec_any_numeric */ static __inline__ int __ATTRS_o_ai vec_any_numeric(vector float __a) { #ifdef __VSX__ return __builtin_vsx_xvcmpeqsp_p(__CR6_EQ_REV, __a, __a); #else return __builtin_altivec_vcmpeqfp_p(__CR6_EQ_REV, __a, __a); #endif } #ifdef __VSX__ static __inline__ int __ATTRS_o_ai vec_any_numeric(vector double __a) { return __builtin_vsx_xvcmpeqdp_p(__CR6_EQ_REV, __a, __a); } #endif /* vec_any_out */ static __inline__ int __attribute__((__always_inline__)) vec_any_out(vector float __a, vector float __b) { return __builtin_altivec_vcmpbfp_p(__CR6_EQ_REV, __a, __b); } /* Power 8 Crypto functions Note: We diverge from the current GCC implementation with regard to cryptography and related functions as follows: - Only the SHA and AES instructions and builtins are disabled by -mno-crypto - The remaining ones are only available on Power8 and up so require -mpower8-vector The justification for this is that export requirements require that Category:Vector.Crypto is optional (i.e. compliant hardware may not provide support). As a result, we need to be able to turn off support for those. The remaining ones (currently controlled by -mcrypto for GCC) still need to be provided on compliant hardware even if Vector.Crypto is not provided. */ #ifdef __CRYPTO__ #define vec_sbox_be __builtin_altivec_crypto_vsbox #define vec_cipher_be __builtin_altivec_crypto_vcipher #define vec_cipherlast_be __builtin_altivec_crypto_vcipherlast #define vec_ncipher_be __builtin_altivec_crypto_vncipher #define vec_ncipherlast_be __builtin_altivec_crypto_vncipherlast #ifdef __VSX__ static __inline__ vector unsigned char __attribute__((__always_inline__)) __builtin_crypto_vsbox(vector unsigned char __a) { return __builtin_altivec_crypto_vsbox(__a); } static __inline__ vector unsigned char __attribute__((__always_inline__)) __builtin_crypto_vcipher(vector unsigned char __a, vector unsigned char __b) { return __builtin_altivec_crypto_vcipher(__a, __b); } static __inline__ vector unsigned char __attribute__((__always_inline__)) __builtin_crypto_vcipherlast(vector unsigned char __a, vector unsigned char __b) { return __builtin_altivec_crypto_vcipherlast(__a, __b); } static __inline__ vector unsigned char __attribute__((__always_inline__)) __builtin_crypto_vncipher(vector unsigned char __a, vector unsigned char __b) { return __builtin_altivec_crypto_vncipher(__a, __b); } static __inline__ vector unsigned char __attribute__((__always_inline__)) __builtin_crypto_vncipherlast(vector unsigned char __a, vector unsigned char __b) { return __builtin_altivec_crypto_vncipherlast(__a, __b); } #endif /* __VSX__ */ #define __builtin_crypto_vshasigmad __builtin_altivec_crypto_vshasigmad #define __builtin_crypto_vshasigmaw __builtin_altivec_crypto_vshasigmaw #define vec_shasigma_be(X, Y, Z) \ _Generic((X), vector unsigned int \ : __builtin_crypto_vshasigmaw, vector unsigned long long \ : __builtin_crypto_vshasigmad)((X), (Y), (Z)) #endif #ifdef __POWER8_VECTOR__ static __inline__ vector bool char __ATTRS_o_ai vec_permxor(vector bool char __a, vector bool char __b, vector bool char __c) { return (vector bool char)__builtin_altivec_crypto_vpermxor( (vector unsigned char)__a, (vector unsigned char)__b, (vector unsigned char)__c); } static __inline__ vector signed char __ATTRS_o_ai vec_permxor(vector signed char __a, vector signed char __b, vector signed char __c) { return (vector signed char)__builtin_altivec_crypto_vpermxor( (vector unsigned char)__a, (vector unsigned char)__b, (vector unsigned char)__c); } static __inline__ vector unsigned char __ATTRS_o_ai vec_permxor(vector unsigned char __a, vector unsigned char __b, vector unsigned char __c) { return __builtin_altivec_crypto_vpermxor(__a, __b, __c); } static __inline__ vector unsigned char __ATTRS_o_ai __builtin_crypto_vpermxor(vector unsigned char __a, vector unsigned char __b, vector unsigned char __c) { return __builtin_altivec_crypto_vpermxor(__a, __b, __c); } static __inline__ vector unsigned short __ATTRS_o_ai __builtin_crypto_vpermxor(vector unsigned short __a, vector unsigned short __b, vector unsigned short __c) { return (vector unsigned short)__builtin_altivec_crypto_vpermxor( (vector unsigned char)__a, (vector unsigned char)__b, (vector unsigned char)__c); } static __inline__ vector unsigned int __ATTRS_o_ai __builtin_crypto_vpermxor( vector unsigned int __a, vector unsigned int __b, vector unsigned int __c) { return (vector unsigned int)__builtin_altivec_crypto_vpermxor( (vector unsigned char)__a, (vector unsigned char)__b, (vector unsigned char)__c); } static __inline__ vector unsigned long long __ATTRS_o_ai __builtin_crypto_vpermxor(vector unsigned long long __a, vector unsigned long long __b, vector unsigned long long __c) { return (vector unsigned long long)__builtin_altivec_crypto_vpermxor( (vector unsigned char)__a, (vector unsigned char)__b, (vector unsigned char)__c); } static __inline__ vector unsigned char __ATTRS_o_ai __builtin_crypto_vpmsumb(vector unsigned char __a, vector unsigned char __b) { return __builtin_altivec_crypto_vpmsumb(__a, __b); } static __inline__ vector unsigned short __ATTRS_o_ai __builtin_crypto_vpmsumb(vector unsigned short __a, vector unsigned short __b) { return __builtin_altivec_crypto_vpmsumh(__a, __b); } static __inline__ vector unsigned int __ATTRS_o_ai __builtin_crypto_vpmsumb(vector unsigned int __a, vector unsigned int __b) { return __builtin_altivec_crypto_vpmsumw(__a, __b); } static __inline__ vector unsigned long long __ATTRS_o_ai __builtin_crypto_vpmsumb(vector unsigned long long __a, vector unsigned long long __b) { return __builtin_altivec_crypto_vpmsumd(__a, __b); } static __inline__ vector signed char __ATTRS_o_ai vec_vgbbd(vector signed char __a) { return (vector signed char)__builtin_altivec_vgbbd((vector unsigned char)__a); } #define vec_pmsum_be __builtin_crypto_vpmsumb #define vec_gb __builtin_altivec_vgbbd static __inline__ vector unsigned char __ATTRS_o_ai vec_vgbbd(vector unsigned char __a) { return __builtin_altivec_vgbbd(__a); } static __inline__ vector signed long long __ATTRS_o_ai vec_gbb(vector signed long long __a) { return (vector signed long long)__builtin_altivec_vgbbd( (vector unsigned char)__a); } static __inline__ vector unsigned long long __ATTRS_o_ai vec_gbb(vector unsigned long long __a) { return (vector unsigned long long)__builtin_altivec_vgbbd( (vector unsigned char)__a); } static __inline__ vector long long __ATTRS_o_ai vec_vbpermq(vector signed char __a, vector signed char __b) { return (vector long long)__builtin_altivec_vbpermq((vector unsigned char)__a, (vector unsigned char)__b); } static __inline__ vector long long __ATTRS_o_ai vec_vbpermq(vector unsigned char __a, vector unsigned char __b) { return (vector long long)__builtin_altivec_vbpermq(__a, __b); } #if defined(__powerpc64__) && defined(__SIZEOF_INT128__) static __inline__ vector unsigned long long __ATTRS_o_ai vec_bperm(vector unsigned __int128 __a, vector unsigned char __b) { return __builtin_altivec_vbpermq((vector unsigned char)__a, (vector unsigned char)__b); } #endif static __inline__ vector unsigned char __ATTRS_o_ai vec_bperm(vector unsigned char __a, vector unsigned char __b) { return (vector unsigned char)__builtin_altivec_vbpermq(__a, __b); } #endif // __POWER8_VECTOR__ #ifdef __POWER9_VECTOR__ static __inline__ vector unsigned long long __ATTRS_o_ai vec_bperm(vector unsigned long long __a, vector unsigned char __b) { return __builtin_altivec_vbpermd(__a, __b); } #endif /* vec_reve */ static inline __ATTRS_o_ai vector bool char vec_reve(vector bool char __a) { return __builtin_shufflevector(__a, __a, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); } static inline __ATTRS_o_ai vector signed char vec_reve(vector signed char __a) { return __builtin_shufflevector(__a, __a, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); } static inline __ATTRS_o_ai vector unsigned char vec_reve(vector unsigned char __a) { return __builtin_shufflevector(__a, __a, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); } static inline __ATTRS_o_ai vector bool int vec_reve(vector bool int __a) { return __builtin_shufflevector(__a, __a, 3, 2, 1, 0); } static inline __ATTRS_o_ai vector signed int vec_reve(vector signed int __a) { return __builtin_shufflevector(__a, __a, 3, 2, 1, 0); } static inline __ATTRS_o_ai vector unsigned int vec_reve(vector unsigned int __a) { return __builtin_shufflevector(__a, __a, 3, 2, 1, 0); } static inline __ATTRS_o_ai vector bool short vec_reve(vector bool short __a) { return __builtin_shufflevector(__a, __a, 7, 6, 5, 4, 3, 2, 1, 0); } static inline __ATTRS_o_ai vector signed short vec_reve(vector signed short __a) { return __builtin_shufflevector(__a, __a, 7, 6, 5, 4, 3, 2, 1, 0); } static inline __ATTRS_o_ai vector unsigned short vec_reve(vector unsigned short __a) { return __builtin_shufflevector(__a, __a, 7, 6, 5, 4, 3, 2, 1, 0); } static inline __ATTRS_o_ai vector float vec_reve(vector float __a) { return __builtin_shufflevector(__a, __a, 3, 2, 1, 0); } #ifdef __VSX__ static inline __ATTRS_o_ai vector bool long long vec_reve(vector bool long long __a) { return __builtin_shufflevector(__a, __a, 1, 0); } static inline __ATTRS_o_ai vector signed long long vec_reve(vector signed long long __a) { return __builtin_shufflevector(__a, __a, 1, 0); } static inline __ATTRS_o_ai vector unsigned long long vec_reve(vector unsigned long long __a) { return __builtin_shufflevector(__a, __a, 1, 0); } static inline __ATTRS_o_ai vector double vec_reve(vector double __a) { return __builtin_shufflevector(__a, __a, 1, 0); } #endif /* vec_revb */ static __inline__ vector bool char __ATTRS_o_ai vec_revb(vector bool char __a) { return __a; } static __inline__ vector signed char __ATTRS_o_ai vec_revb(vector signed char __a) { return __a; } static __inline__ vector unsigned char __ATTRS_o_ai vec_revb(vector unsigned char __a) { return __a; } static __inline__ vector bool short __ATTRS_o_ai vec_revb(vector bool short __a) { vector unsigned char __indices = { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 }; return vec_perm(__a, __a, __indices); } static __inline__ vector signed short __ATTRS_o_ai vec_revb(vector signed short __a) { vector unsigned char __indices = { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 }; return vec_perm(__a, __a, __indices); } static __inline__ vector unsigned short __ATTRS_o_ai vec_revb(vector unsigned short __a) { vector unsigned char __indices = { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 }; return vec_perm(__a, __a, __indices); } static __inline__ vector bool int __ATTRS_o_ai vec_revb(vector bool int __a) { vector unsigned char __indices = { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 }; return vec_perm(__a, __a, __indices); } static __inline__ vector signed int __ATTRS_o_ai vec_revb(vector signed int __a) { vector unsigned char __indices = { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 }; return vec_perm(__a, __a, __indices); } static __inline__ vector unsigned int __ATTRS_o_ai vec_revb(vector unsigned int __a) { vector unsigned char __indices = { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 }; return vec_perm(__a, __a, __indices); } static __inline__ vector float __ATTRS_o_ai vec_revb(vector float __a) { vector unsigned char __indices = { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 }; return vec_perm(__a, __a, __indices); } #ifdef __VSX__ static __inline__ vector bool long long __ATTRS_o_ai vec_revb(vector bool long long __a) { vector unsigned char __indices = { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 }; return vec_perm(__a, __a, __indices); } static __inline__ vector signed long long __ATTRS_o_ai vec_revb(vector signed long long __a) { vector unsigned char __indices = { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 }; return vec_perm(__a, __a, __indices); } static __inline__ vector unsigned long long __ATTRS_o_ai vec_revb(vector unsigned long long __a) { vector unsigned char __indices = { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 }; return vec_perm(__a, __a, __indices); } static __inline__ vector double __ATTRS_o_ai vec_revb(vector double __a) { vector unsigned char __indices = { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 }; return vec_perm(__a, __a, __indices); } #endif /* End __VSX__ */ #if defined(__POWER8_VECTOR__) && defined(__powerpc64__) && \ defined(__SIZEOF_INT128__) static __inline__ vector signed __int128 __ATTRS_o_ai vec_revb(vector signed __int128 __a) { vector unsigned char __indices = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }; return (vector signed __int128)vec_perm((vector signed int)__a, (vector signed int)__a, __indices); } static __inline__ vector unsigned __int128 __ATTRS_o_ai vec_revb(vector unsigned __int128 __a) { vector unsigned char __indices = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }; return (vector unsigned __int128)vec_perm((vector signed int)__a, (vector signed int)__a, __indices); } #endif /* END __POWER8_VECTOR__ && __powerpc64__ */ /* vec_xl */ #define vec_xld2 vec_xl #define vec_xlw4 vec_xl typedef vector signed char unaligned_vec_schar __attribute__((aligned(1))); typedef vector unsigned char unaligned_vec_uchar __attribute__((aligned(1))); typedef vector signed short unaligned_vec_sshort __attribute__((aligned(1))); typedef vector unsigned short unaligned_vec_ushort __attribute__((aligned(1))); typedef vector signed int unaligned_vec_sint __attribute__((aligned(1))); typedef vector unsigned int unaligned_vec_uint __attribute__((aligned(1))); typedef vector float unaligned_vec_float __attribute__((aligned(1))); static inline __ATTRS_o_ai vector signed char vec_xl(ptrdiff_t __offset, const signed char *__ptr) { return *(unaligned_vec_schar *)(__ptr + __offset); } static inline __ATTRS_o_ai vector unsigned char vec_xl(ptrdiff_t __offset, const unsigned char *__ptr) { return *(unaligned_vec_uchar*)(__ptr + __offset); } static inline __ATTRS_o_ai vector signed short vec_xl(ptrdiff_t __offset, const signed short *__ptr) { signed char *__addr = (signed char *)__ptr + __offset; return *(unaligned_vec_sshort *)__addr; } static inline __ATTRS_o_ai vector unsigned short vec_xl(ptrdiff_t __offset, const unsigned short *__ptr) { signed char *__addr = (signed char *)__ptr + __offset; return *(unaligned_vec_ushort *)__addr; } static inline __ATTRS_o_ai vector signed int vec_xl(ptrdiff_t __offset, const signed int *__ptr) { signed char *__addr = (signed char *)__ptr + __offset; return *(unaligned_vec_sint *)__addr; } static inline __ATTRS_o_ai vector unsigned int vec_xl(ptrdiff_t __offset, const unsigned int *__ptr) { signed char *__addr = (signed char *)__ptr + __offset; return *(unaligned_vec_uint *)__addr; } static inline __ATTRS_o_ai vector float vec_xl(ptrdiff_t __offset, const float *__ptr) { signed char *__addr = (signed char *)__ptr + __offset; return *(unaligned_vec_float *)__addr; } #ifdef __VSX__ typedef vector signed long long unaligned_vec_sll __attribute__((aligned(1))); typedef vector unsigned long long unaligned_vec_ull __attribute__((aligned(1))); typedef vector double unaligned_vec_double __attribute__((aligned(1))); static inline __ATTRS_o_ai vector signed long long vec_xl(ptrdiff_t __offset, const signed long long *__ptr) { signed char *__addr = (signed char *)__ptr + __offset; return *(unaligned_vec_sll *)__addr; } static inline __ATTRS_o_ai vector unsigned long long vec_xl(ptrdiff_t __offset, const unsigned long long *__ptr) { signed char *__addr = (signed char *)__ptr + __offset; return *(unaligned_vec_ull *)__addr; } static inline __ATTRS_o_ai vector double vec_xl(ptrdiff_t __offset, const double *__ptr) { signed char *__addr = (signed char *)__ptr + __offset; return *(unaligned_vec_double *)__addr; } #endif #if defined(__POWER8_VECTOR__) && defined(__powerpc64__) && \ defined(__SIZEOF_INT128__) typedef vector signed __int128 unaligned_vec_si128 __attribute__((aligned(1))); typedef vector unsigned __int128 unaligned_vec_ui128 __attribute__((aligned(1))); static inline __ATTRS_o_ai vector signed __int128 vec_xl(ptrdiff_t __offset, const signed __int128 *__ptr) { signed char *__addr = (signed char *)__ptr + __offset; return *(unaligned_vec_si128 *)__addr; } static inline __ATTRS_o_ai vector unsigned __int128 vec_xl(ptrdiff_t __offset, const unsigned __int128 *__ptr) { signed char *__addr = (signed char *)__ptr + __offset; return *(unaligned_vec_ui128 *)__addr; } #endif /* vec_xl_be */ #ifdef __LITTLE_ENDIAN__ static __inline__ vector signed char __ATTRS_o_ai vec_xl_be(ptrdiff_t __offset, const signed char *__ptr) { vector signed char __vec = (vector signed char)__builtin_vsx_lxvd2x_be(__offset, __ptr); return __builtin_shufflevector(__vec, __vec, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); } static __inline__ vector unsigned char __ATTRS_o_ai vec_xl_be(ptrdiff_t __offset, const unsigned char *__ptr) { vector unsigned char __vec = (vector unsigned char)__builtin_vsx_lxvd2x_be(__offset, __ptr); return __builtin_shufflevector(__vec, __vec, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); } static __inline__ vector signed short __ATTRS_o_ai vec_xl_be(ptrdiff_t __offset, const signed short *__ptr) { vector signed short __vec = (vector signed short)__builtin_vsx_lxvd2x_be(__offset, __ptr); return __builtin_shufflevector(__vec, __vec, 3, 2, 1, 0, 7, 6, 5, 4); } static __inline__ vector unsigned short __ATTRS_o_ai vec_xl_be(ptrdiff_t __offset, const unsigned short *__ptr) { vector unsigned short __vec = (vector unsigned short)__builtin_vsx_lxvd2x_be(__offset, __ptr); return __builtin_shufflevector(__vec, __vec, 3, 2, 1, 0, 7, 6, 5, 4); } static __inline__ vector signed int __ATTRS_o_ai vec_xl_be(signed long long __offset, const signed int *__ptr) { return (vector signed int)__builtin_vsx_lxvw4x_be(__offset, __ptr); } static __inline__ vector unsigned int __ATTRS_o_ai vec_xl_be(signed long long __offset, const unsigned int *__ptr) { return (vector unsigned int)__builtin_vsx_lxvw4x_be(__offset, __ptr); } static __inline__ vector float __ATTRS_o_ai vec_xl_be(signed long long __offset, const float *__ptr) { return (vector float)__builtin_vsx_lxvw4x_be(__offset, __ptr); } #ifdef __VSX__ static __inline__ vector signed long long __ATTRS_o_ai vec_xl_be(signed long long __offset, const signed long long *__ptr) { return (vector signed long long)__builtin_vsx_lxvd2x_be(__offset, __ptr); } static __inline__ vector unsigned long long __ATTRS_o_ai vec_xl_be(signed long long __offset, const unsigned long long *__ptr) { return (vector unsigned long long)__builtin_vsx_lxvd2x_be(__offset, __ptr); } static __inline__ vector double __ATTRS_o_ai vec_xl_be(signed long long __offset, const double *__ptr) { return (vector double)__builtin_vsx_lxvd2x_be(__offset, __ptr); } #endif #if defined(__POWER8_VECTOR__) && defined(__powerpc64__) && \ defined(__SIZEOF_INT128__) static __inline__ vector signed __int128 __ATTRS_o_ai vec_xl_be(signed long long __offset, const signed __int128 *__ptr) { return vec_xl(__offset, __ptr); } static __inline__ vector unsigned __int128 __ATTRS_o_ai vec_xl_be(signed long long __offset, const unsigned __int128 *__ptr) { return vec_xl(__offset, __ptr); } #endif #else #define vec_xl_be vec_xl #endif #if defined(__POWER10_VECTOR__) && defined(__VSX__) && \ defined(__SIZEOF_INT128__) /* vec_xl_sext */ static __inline__ vector signed __int128 __ATTRS_o_ai vec_xl_sext(ptrdiff_t __offset, const signed char *__pointer) { return (vector signed __int128)*(__pointer + __offset); } static __inline__ vector signed __int128 __ATTRS_o_ai vec_xl_sext(ptrdiff_t __offset, const signed short *__pointer) { return (vector signed __int128)*(__pointer + __offset); } static __inline__ vector signed __int128 __ATTRS_o_ai vec_xl_sext(ptrdiff_t __offset, const signed int *__pointer) { return (vector signed __int128)*(__pointer + __offset); } static __inline__ vector signed __int128 __ATTRS_o_ai vec_xl_sext(ptrdiff_t __offset, const signed long long *__pointer) { return (vector signed __int128)*(__pointer + __offset); } /* vec_xl_zext */ static __inline__ vector unsigned __int128 __ATTRS_o_ai vec_xl_zext(ptrdiff_t __offset, const unsigned char *__pointer) { return (vector unsigned __int128)*(__pointer + __offset); } static __inline__ vector unsigned __int128 __ATTRS_o_ai vec_xl_zext(ptrdiff_t __offset, const unsigned short *__pointer) { return (vector unsigned __int128)*(__pointer + __offset); } static __inline__ vector unsigned __int128 __ATTRS_o_ai vec_xl_zext(ptrdiff_t __offset, const unsigned int *__pointer) { return (vector unsigned __int128)*(__pointer + __offset); } static __inline__ vector unsigned __int128 __ATTRS_o_ai vec_xl_zext(ptrdiff_t __offset, const unsigned long long *__pointer) { return (vector unsigned __int128)*(__pointer + __offset); } #endif /* vec_xlds */ #ifdef __VSX__ static __inline__ vector signed long long __ATTRS_o_ai vec_xlds(ptrdiff_t __offset, const signed long long *__ptr) { signed long long *__addr = (signed long long*)((signed char *)__ptr + __offset); return (vector signed long long) *__addr; } static __inline__ vector unsigned long long __ATTRS_o_ai vec_xlds(ptrdiff_t __offset, const unsigned long long *__ptr) { unsigned long long *__addr = (unsigned long long *)((signed char *)__ptr + __offset); return (unaligned_vec_ull) *__addr; } static __inline__ vector double __ATTRS_o_ai vec_xlds(ptrdiff_t __offset, const double *__ptr) { double *__addr = (double*)((signed char *)__ptr + __offset); return (unaligned_vec_double) *__addr; } /* vec_load_splats */ static __inline__ vector signed int __ATTRS_o_ai vec_load_splats(signed long long __offset, const signed int *__ptr) { signed int *__addr = (signed int*)((signed char *)__ptr + __offset); return (vector signed int)*__addr; } static __inline__ vector signed int __ATTRS_o_ai vec_load_splats(unsigned long long __offset, const signed int *__ptr) { signed int *__addr = (signed int*)((signed char *)__ptr + __offset); return (vector signed int)*__addr; } static __inline__ vector unsigned int __ATTRS_o_ai vec_load_splats(signed long long __offset, const unsigned int *__ptr) { unsigned int *__addr = (unsigned int*)((signed char *)__ptr + __offset); return (vector unsigned int)*__addr; } static __inline__ vector unsigned int __ATTRS_o_ai vec_load_splats(unsigned long long __offset, const unsigned int *__ptr) { unsigned int *__addr = (unsigned int*)((signed char *)__ptr + __offset); return (vector unsigned int)*__addr; } static __inline__ vector float __ATTRS_o_ai vec_load_splats(signed long long __offset, const float *__ptr) { float *__addr = (float*)((signed char *)__ptr + __offset); return (vector float)*__addr; } static __inline__ vector float __ATTRS_o_ai vec_load_splats(unsigned long long __offset, const float *__ptr) { float *__addr = (float*)((signed char *)__ptr + __offset); return (vector float)*__addr; } #endif /* vec_xst */ #define vec_xstd2 vec_xst #define vec_xstw4 vec_xst static inline __ATTRS_o_ai void vec_xst(vector signed char __vec, ptrdiff_t __offset, signed char *__ptr) { *(unaligned_vec_schar *)(__ptr + __offset) = __vec; } static inline __ATTRS_o_ai void vec_xst(vector unsigned char __vec, ptrdiff_t __offset, unsigned char *__ptr) { *(unaligned_vec_uchar *)(__ptr + __offset) = __vec; } static inline __ATTRS_o_ai void vec_xst(vector signed short __vec, ptrdiff_t __offset, signed short *__ptr) { signed char *__addr = (signed char *)__ptr + __offset; *(unaligned_vec_sshort *)__addr = __vec; } static inline __ATTRS_o_ai void vec_xst(vector unsigned short __vec, ptrdiff_t __offset, unsigned short *__ptr) { signed char *__addr = (signed char *)__ptr + __offset; *(unaligned_vec_ushort *)__addr = __vec; } static inline __ATTRS_o_ai void vec_xst(vector signed int __vec, ptrdiff_t __offset, signed int *__ptr) { signed char *__addr = (signed char *)__ptr + __offset; *(unaligned_vec_sint *)__addr = __vec; } static inline __ATTRS_o_ai void vec_xst(vector unsigned int __vec, ptrdiff_t __offset, unsigned int *__ptr) { signed char *__addr = (signed char *)__ptr + __offset; *(unaligned_vec_uint *)__addr = __vec; } static inline __ATTRS_o_ai void vec_xst(vector float __vec, ptrdiff_t __offset, float *__ptr) { signed char *__addr = (signed char *)__ptr + __offset; *(unaligned_vec_float *)__addr = __vec; } #ifdef __VSX__ static inline __ATTRS_o_ai void vec_xst(vector signed long long __vec, ptrdiff_t __offset, signed long long *__ptr) { signed char *__addr = (signed char *)__ptr + __offset; *(unaligned_vec_sll *)__addr = __vec; } static inline __ATTRS_o_ai void vec_xst(vector unsigned long long __vec, ptrdiff_t __offset, unsigned long long *__ptr) { signed char *__addr = (signed char *)__ptr + __offset; *(unaligned_vec_ull *)__addr = __vec; } static inline __ATTRS_o_ai void vec_xst(vector double __vec, ptrdiff_t __offset, double *__ptr) { signed char *__addr = (signed char *)__ptr + __offset; *(unaligned_vec_double *)__addr = __vec; } #endif #if defined(__POWER8_VECTOR__) && defined(__powerpc64__) && \ defined(__SIZEOF_INT128__) static inline __ATTRS_o_ai void vec_xst(vector signed __int128 __vec, ptrdiff_t __offset, signed __int128 *__ptr) { signed char *__addr = (signed char *)__ptr + __offset; *(unaligned_vec_si128 *)__addr = __vec; } static inline __ATTRS_o_ai void vec_xst(vector unsigned __int128 __vec, ptrdiff_t __offset, unsigned __int128 *__ptr) { signed char *__addr = (signed char *)__ptr + __offset; *(unaligned_vec_ui128 *)__addr = __vec; } #endif /* vec_xst_trunc */ #if defined(__POWER10_VECTOR__) && defined(__VSX__) && \ defined(__SIZEOF_INT128__) static inline __ATTRS_o_ai void vec_xst_trunc(vector signed __int128 __vec, ptrdiff_t __offset, signed char *__ptr) { *(__ptr + __offset) = (signed char)__vec[0]; } static inline __ATTRS_o_ai void vec_xst_trunc(vector unsigned __int128 __vec, ptrdiff_t __offset, unsigned char *__ptr) { *(__ptr + __offset) = (unsigned char)__vec[0]; } static inline __ATTRS_o_ai void vec_xst_trunc(vector signed __int128 __vec, ptrdiff_t __offset, signed short *__ptr) { *(__ptr + __offset) = (signed short)__vec[0]; } static inline __ATTRS_o_ai void vec_xst_trunc(vector unsigned __int128 __vec, ptrdiff_t __offset, unsigned short *__ptr) { *(__ptr + __offset) = (unsigned short)__vec[0]; } static inline __ATTRS_o_ai void vec_xst_trunc(vector signed __int128 __vec, ptrdiff_t __offset, signed int *__ptr) { *(__ptr + __offset) = (signed int)__vec[0]; } static inline __ATTRS_o_ai void vec_xst_trunc(vector unsigned __int128 __vec, ptrdiff_t __offset, unsigned int *__ptr) { *(__ptr + __offset) = (unsigned int)__vec[0]; } static inline __ATTRS_o_ai void vec_xst_trunc(vector signed __int128 __vec, ptrdiff_t __offset, signed long long *__ptr) { *(__ptr + __offset) = (signed long long)__vec[0]; } static inline __ATTRS_o_ai void vec_xst_trunc(vector unsigned __int128 __vec, ptrdiff_t __offset, unsigned long long *__ptr) { *(__ptr + __offset) = (unsigned long long)__vec[0]; } #endif /* vec_xst_be */ #ifdef __LITTLE_ENDIAN__ static __inline__ void __ATTRS_o_ai vec_xst_be(vector signed char __vec, signed long long __offset, signed char *__ptr) { vector signed char __tmp = __builtin_shufflevector(__vec, __vec, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); typedef __attribute__((vector_size(sizeof(__tmp)))) double __vector_double; __builtin_vsx_stxvd2x_be((__vector_double)__tmp, __offset, __ptr); } static __inline__ void __ATTRS_o_ai vec_xst_be(vector unsigned char __vec, signed long long __offset, unsigned char *__ptr) { vector unsigned char __tmp = __builtin_shufflevector(__vec, __vec, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); typedef __attribute__((vector_size(sizeof(__tmp)))) double __vector_double; __builtin_vsx_stxvd2x_be((__vector_double)__tmp, __offset, __ptr); } static __inline__ void __ATTRS_o_ai vec_xst_be(vector signed short __vec, signed long long __offset, signed short *__ptr) { vector signed short __tmp = __builtin_shufflevector(__vec, __vec, 3, 2, 1, 0, 7, 6, 5, 4); typedef __attribute__((vector_size(sizeof(__tmp)))) double __vector_double; __builtin_vsx_stxvd2x_be((__vector_double)__tmp, __offset, __ptr); } static __inline__ void __ATTRS_o_ai vec_xst_be(vector unsigned short __vec, signed long long __offset, unsigned short *__ptr) { vector unsigned short __tmp = __builtin_shufflevector(__vec, __vec, 3, 2, 1, 0, 7, 6, 5, 4); typedef __attribute__((vector_size(sizeof(__tmp)))) double __vector_double; __builtin_vsx_stxvd2x_be((__vector_double)__tmp, __offset, __ptr); } static __inline__ void __ATTRS_o_ai vec_xst_be(vector signed int __vec, signed long long __offset, signed int *__ptr) { __builtin_vsx_stxvw4x_be(__vec, __offset, __ptr); } static __inline__ void __ATTRS_o_ai vec_xst_be(vector unsigned int __vec, signed long long __offset, unsigned int *__ptr) { __builtin_vsx_stxvw4x_be((vector int)__vec, __offset, __ptr); } static __inline__ void __ATTRS_o_ai vec_xst_be(vector float __vec, signed long long __offset, float *__ptr) { __builtin_vsx_stxvw4x_be((vector int)__vec, __offset, __ptr); } #ifdef __VSX__ static __inline__ void __ATTRS_o_ai vec_xst_be(vector signed long long __vec, signed long long __offset, signed long long *__ptr) { __builtin_vsx_stxvd2x_be((vector double)__vec, __offset, __ptr); } static __inline__ void __ATTRS_o_ai vec_xst_be(vector unsigned long long __vec, signed long long __offset, unsigned long long *__ptr) { __builtin_vsx_stxvd2x_be((vector double)__vec, __offset, __ptr); } static __inline__ void __ATTRS_o_ai vec_xst_be(vector double __vec, signed long long __offset, double *__ptr) { __builtin_vsx_stxvd2x_be((vector double)__vec, __offset, __ptr); } #endif #if defined(__POWER8_VECTOR__) && defined(__powerpc64__) && \ defined(__SIZEOF_INT128__) static __inline__ void __ATTRS_o_ai vec_xst_be(vector signed __int128 __vec, signed long long __offset, signed __int128 *__ptr) { vec_xst(__vec, __offset, __ptr); } static __inline__ void __ATTRS_o_ai vec_xst_be(vector unsigned __int128 __vec, signed long long __offset, unsigned __int128 *__ptr) { vec_xst(__vec, __offset, __ptr); } #endif #else #define vec_xst_be vec_xst #endif #ifdef __POWER9_VECTOR__ #define vec_test_data_class(__a, __b) \ _Generic( \ (__a), vector float \ : (vector bool int)__builtin_vsx_xvtstdcsp((vector float)(__a), (__b)), \ vector double \ : (vector bool long long)__builtin_vsx_xvtstdcdp((vector double)(__a), \ (__b))) #endif /* #ifdef __POWER9_VECTOR__ */ static vector float __ATTRS_o_ai vec_neg(vector float __a) { return -__a; } #ifdef __VSX__ static vector double __ATTRS_o_ai vec_neg(vector double __a) { return -__a; } #endif #ifdef __VSX__ static vector long long __ATTRS_o_ai vec_neg(vector long long __a) { return -__a; } #endif static vector signed int __ATTRS_o_ai vec_neg(vector signed int __a) { return -__a; } static vector signed short __ATTRS_o_ai vec_neg(vector signed short __a) { return -__a; } static vector signed char __ATTRS_o_ai vec_neg(vector signed char __a) { return -__a; } static vector float __ATTRS_o_ai vec_nabs(vector float __a) { return - vec_abs(__a); } #ifdef __VSX__ static vector double __ATTRS_o_ai vec_nabs(vector double __a) { return - vec_abs(__a); } #endif #ifdef __POWER8_VECTOR__ static vector long long __ATTRS_o_ai vec_nabs(vector long long __a) { return __builtin_altivec_vminsd(__a, -__a); } #endif static vector signed int __ATTRS_o_ai vec_nabs(vector signed int __a) { return __builtin_altivec_vminsw(__a, -__a); } static vector signed short __ATTRS_o_ai vec_nabs(vector signed short __a) { return __builtin_altivec_vminsh(__a, -__a); } static vector signed char __ATTRS_o_ai vec_nabs(vector signed char __a) { return __builtin_altivec_vminsb(__a, -__a); } static vector float __ATTRS_o_ai vec_recipdiv(vector float __a, vector float __b) { return __builtin_ppc_recipdivf(__a, __b); } #ifdef __VSX__ static vector double __ATTRS_o_ai vec_recipdiv(vector double __a, vector double __b) { return __builtin_ppc_recipdivd(__a, __b); } #endif #ifdef __POWER10_VECTOR__ /* vec_extractm */ static __inline__ unsigned int __ATTRS_o_ai vec_extractm(vector unsigned char __a) { return __builtin_altivec_vextractbm(__a); } static __inline__ unsigned int __ATTRS_o_ai vec_extractm(vector unsigned short __a) { return __builtin_altivec_vextracthm(__a); } static __inline__ unsigned int __ATTRS_o_ai vec_extractm(vector unsigned int __a) { return __builtin_altivec_vextractwm(__a); } static __inline__ unsigned int __ATTRS_o_ai vec_extractm(vector unsigned long long __a) { return __builtin_altivec_vextractdm(__a); } #ifdef __SIZEOF_INT128__ static __inline__ unsigned int __ATTRS_o_ai vec_extractm(vector unsigned __int128 __a) { return __builtin_altivec_vextractqm(__a); } #endif /* vec_expandm */ static __inline__ vector unsigned char __ATTRS_o_ai vec_expandm(vector unsigned char __a) { return __builtin_altivec_vexpandbm(__a); } static __inline__ vector unsigned short __ATTRS_o_ai vec_expandm(vector unsigned short __a) { return __builtin_altivec_vexpandhm(__a); } static __inline__ vector unsigned int __ATTRS_o_ai vec_expandm(vector unsigned int __a) { return __builtin_altivec_vexpandwm(__a); } static __inline__ vector unsigned long long __ATTRS_o_ai vec_expandm(vector unsigned long long __a) { return __builtin_altivec_vexpanddm(__a); } #ifdef __SIZEOF_INT128__ static __inline__ vector unsigned __int128 __ATTRS_o_ai vec_expandm(vector unsigned __int128 __a) { return __builtin_altivec_vexpandqm(__a); } #endif /* vec_cntm */ #define vec_cntm(__a, __mp) \ _Generic((__a), vector unsigned char \ : __builtin_altivec_vcntmbb((vector unsigned char)(__a), \ (unsigned char)(__mp)), \ vector unsigned short \ : __builtin_altivec_vcntmbh((vector unsigned short)(__a), \ (unsigned char)(__mp)), \ vector unsigned int \ : __builtin_altivec_vcntmbw((vector unsigned int)(__a), \ (unsigned char)(__mp)), \ vector unsigned long long \ : __builtin_altivec_vcntmbd((vector unsigned long long)(__a), \ (unsigned char)(__mp))) /* vec_gen[b|h|w|d|q]m */ static __inline__ vector unsigned char __ATTRS_o_ai vec_genbm(unsigned long long __bm) { return __builtin_altivec_mtvsrbm(__bm); } static __inline__ vector unsigned short __ATTRS_o_ai vec_genhm(unsigned long long __bm) { return __builtin_altivec_mtvsrhm(__bm); } static __inline__ vector unsigned int __ATTRS_o_ai vec_genwm(unsigned long long __bm) { return __builtin_altivec_mtvsrwm(__bm); } static __inline__ vector unsigned long long __ATTRS_o_ai vec_gendm(unsigned long long __bm) { return __builtin_altivec_mtvsrdm(__bm); } #ifdef __SIZEOF_INT128__ static __inline__ vector unsigned __int128 __ATTRS_o_ai vec_genqm(unsigned long long __bm) { return __builtin_altivec_mtvsrqm(__bm); } #endif /* vec_pdep */ static __inline__ vector unsigned long long __ATTRS_o_ai vec_pdep(vector unsigned long long __a, vector unsigned long long __b) { return __builtin_altivec_vpdepd(__a, __b); } /* vec_pext */ static __inline__ vector unsigned long long __ATTRS_o_ai vec_pext(vector unsigned long long __a, vector unsigned long long __b) { return __builtin_altivec_vpextd(__a, __b); } /* vec_cfuge */ static __inline__ vector unsigned long long __ATTRS_o_ai vec_cfuge(vector unsigned long long __a, vector unsigned long long __b) { return __builtin_altivec_vcfuged(__a, __b); } /* vec_gnb */ #define vec_gnb(__a, __b) __builtin_altivec_vgnb(__a, __b) /* vec_ternarylogic */ #ifdef __VSX__ #ifdef __SIZEOF_INT128__ #define vec_ternarylogic(__a, __b, __c, __imm) \ _Generic((__a), vector unsigned char \ : (vector unsigned char)__builtin_vsx_xxeval( \ (vector unsigned long long)(__a), \ (vector unsigned long long)(__b), \ (vector unsigned long long)(__c), (__imm)), \ vector unsigned short \ : (vector unsigned short)__builtin_vsx_xxeval( \ (vector unsigned long long)(__a), \ (vector unsigned long long)(__b), \ (vector unsigned long long)(__c), (__imm)), \ vector unsigned int \ : (vector unsigned int)__builtin_vsx_xxeval( \ (vector unsigned long long)(__a), \ (vector unsigned long long)(__b), \ (vector unsigned long long)(__c), (__imm)), \ vector unsigned long long \ : (vector unsigned long long)__builtin_vsx_xxeval( \ (vector unsigned long long)(__a), \ (vector unsigned long long)(__b), \ (vector unsigned long long)(__c), (__imm)), \ vector unsigned __int128 \ : (vector unsigned __int128)__builtin_vsx_xxeval( \ (vector unsigned long long)(__a), \ (vector unsigned long long)(__b), \ (vector unsigned long long)(__c), (__imm))) #else #define vec_ternarylogic(__a, __b, __c, __imm) \ _Generic((__a), vector unsigned char \ : (vector unsigned char)__builtin_vsx_xxeval( \ (vector unsigned long long)(__a), \ (vector unsigned long long)(__b), \ (vector unsigned long long)(__c), (__imm)), \ vector unsigned short \ : (vector unsigned short)__builtin_vsx_xxeval( \ (vector unsigned long long)(__a), \ (vector unsigned long long)(__b), \ (vector unsigned long long)(__c), (__imm)), \ vector unsigned int \ : (vector unsigned int)__builtin_vsx_xxeval( \ (vector unsigned long long)(__a), \ (vector unsigned long long)(__b), \ (vector unsigned long long)(__c), (__imm)), \ vector unsigned long long \ : (vector unsigned long long)__builtin_vsx_xxeval( \ (vector unsigned long long)(__a), \ (vector unsigned long long)(__b), \ (vector unsigned long long)(__c), (__imm))) #endif /* __SIZEOF_INT128__ */ #endif /* __VSX__ */ /* vec_genpcvm */ #ifdef __VSX__ #define vec_genpcvm(__a, __imm) \ _Generic( \ (__a), vector unsigned char \ : __builtin_vsx_xxgenpcvbm((vector unsigned char)(__a), (int)(__imm)), \ vector unsigned short \ : __builtin_vsx_xxgenpcvhm((vector unsigned short)(__a), (int)(__imm)), \ vector unsigned int \ : __builtin_vsx_xxgenpcvwm((vector unsigned int)(__a), (int)(__imm)), \ vector unsigned long long \ : __builtin_vsx_xxgenpcvdm((vector unsigned long long)(__a), \ (int)(__imm))) #endif /* __VSX__ */ /* vec_clr_first */ static __inline__ vector signed char __ATTRS_o_ai vec_clr_first(vector signed char __a, unsigned int __n) { #ifdef __LITTLE_ENDIAN__ return (vector signed char)__builtin_altivec_vclrrb((vector unsigned char)__a, __n); #else return (vector signed char)__builtin_altivec_vclrlb((vector unsigned char)__a, __n); #endif } static __inline__ vector unsigned char __ATTRS_o_ai vec_clr_first(vector unsigned char __a, unsigned int __n) { #ifdef __LITTLE_ENDIAN__ return (vector unsigned char)__builtin_altivec_vclrrb( (vector unsigned char)__a, __n); #else return (vector unsigned char)__builtin_altivec_vclrlb( (vector unsigned char)__a, __n); #endif } /* vec_clr_last */ static __inline__ vector signed char __ATTRS_o_ai vec_clr_last(vector signed char __a, unsigned int __n) { #ifdef __LITTLE_ENDIAN__ return (vector signed char)__builtin_altivec_vclrlb((vector unsigned char)__a, __n); #else return (vector signed char)__builtin_altivec_vclrrb((vector unsigned char)__a, __n); #endif } static __inline__ vector unsigned char __ATTRS_o_ai vec_clr_last(vector unsigned char __a, unsigned int __n) { #ifdef __LITTLE_ENDIAN__ return (vector unsigned char)__builtin_altivec_vclrlb( (vector unsigned char)__a, __n); #else return (vector unsigned char)__builtin_altivec_vclrrb( (vector unsigned char)__a, __n); #endif } /* vec_cntlzm */ static __inline__ vector unsigned long long __ATTRS_o_ai vec_cntlzm(vector unsigned long long __a, vector unsigned long long __b) { return __builtin_altivec_vclzdm(__a, __b); } /* vec_cnttzm */ static __inline__ vector unsigned long long __ATTRS_o_ai vec_cnttzm(vector unsigned long long __a, vector unsigned long long __b) { return __builtin_altivec_vctzdm(__a, __b); } /* vec_mod */ static __inline__ vector signed int __ATTRS_o_ai vec_mod(vector signed int __a, vector signed int __b) { return __a % __b; } static __inline__ vector unsigned int __ATTRS_o_ai vec_mod(vector unsigned int __a, vector unsigned int __b) { return __a % __b; } static __inline__ vector signed long long __ATTRS_o_ai vec_mod(vector signed long long __a, vector signed long long __b) { return __a % __b; } static __inline__ vector unsigned long long __ATTRS_o_ai vec_mod(vector unsigned long long __a, vector unsigned long long __b) { return __a % __b; } #ifdef __SIZEOF_INT128__ static __inline__ vector signed __int128 __ATTRS_o_ai vec_mod(vector signed __int128 __a, vector signed __int128 __b) { return __a % __b; } static __inline__ vector unsigned __int128 __ATTRS_o_ai vec_mod(vector unsigned __int128 __a, vector unsigned __int128 __b) { return __a % __b; } #endif /* vec_sldb */ #define vec_sldb(__a, __b, __c) \ _Generic( \ (__a), vector unsigned char \ : (vector unsigned char)__builtin_altivec_vsldbi( \ (vector unsigned char)__a, (vector unsigned char)__b, \ (__c & 0x7)), \ vector signed char \ : (vector signed char)__builtin_altivec_vsldbi( \ (vector unsigned char)__a, (vector unsigned char)__b, \ (__c & 0x7)), \ vector unsigned short \ : (vector unsigned short)__builtin_altivec_vsldbi( \ (vector unsigned char)__a, (vector unsigned char)__b, \ (__c & 0x7)), \ vector signed short \ : (vector signed short)__builtin_altivec_vsldbi( \ (vector unsigned char)__a, (vector unsigned char)__b, \ (__c & 0x7)), \ vector unsigned int \ : (vector unsigned int)__builtin_altivec_vsldbi( \ (vector unsigned char)__a, (vector unsigned char)__b, \ (__c & 0x7)), \ vector signed int \ : (vector signed int)__builtin_altivec_vsldbi((vector unsigned char)__a, \ (vector unsigned char)__b, \ (__c & 0x7)), \ vector unsigned long long \ : (vector unsigned long long)__builtin_altivec_vsldbi( \ (vector unsigned char)__a, (vector unsigned char)__b, \ (__c & 0x7)), \ vector signed long long \ : (vector signed long long)__builtin_altivec_vsldbi( \ (vector unsigned char)__a, (vector unsigned char)__b, (__c & 0x7))) /* vec_srdb */ #define vec_srdb(__a, __b, __c) \ _Generic( \ (__a), vector unsigned char \ : (vector unsigned char)__builtin_altivec_vsrdbi( \ (vector unsigned char)__a, (vector unsigned char)__b, \ (__c & 0x7)), \ vector signed char \ : (vector signed char)__builtin_altivec_vsrdbi( \ (vector unsigned char)__a, (vector unsigned char)__b, \ (__c & 0x7)), \ vector unsigned short \ : (vector unsigned short)__builtin_altivec_vsrdbi( \ (vector unsigned char)__a, (vector unsigned char)__b, \ (__c & 0x7)), \ vector signed short \ : (vector signed short)__builtin_altivec_vsrdbi( \ (vector unsigned char)__a, (vector unsigned char)__b, \ (__c & 0x7)), \ vector unsigned int \ : (vector unsigned int)__builtin_altivec_vsrdbi( \ (vector unsigned char)__a, (vector unsigned char)__b, \ (__c & 0x7)), \ vector signed int \ : (vector signed int)__builtin_altivec_vsrdbi((vector unsigned char)__a, \ (vector unsigned char)__b, \ (__c & 0x7)), \ vector unsigned long long \ : (vector unsigned long long)__builtin_altivec_vsrdbi( \ (vector unsigned char)__a, (vector unsigned char)__b, \ (__c & 0x7)), \ vector signed long long \ : (vector signed long long)__builtin_altivec_vsrdbi( \ (vector unsigned char)__a, (vector unsigned char)__b, (__c & 0x7))) /* vec_insertl */ static __inline__ vector unsigned char __ATTRS_o_ai vec_insertl(unsigned char __a, vector unsigned char __b, unsigned int __c) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vinsbrx(__b, __c, __a); #else return __builtin_altivec_vinsblx(__b, __c, __a); #endif } static __inline__ vector unsigned short __ATTRS_o_ai vec_insertl(unsigned short __a, vector unsigned short __b, unsigned int __c) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vinshrx(__b, __c, __a); #else return __builtin_altivec_vinshlx(__b, __c, __a); #endif } static __inline__ vector unsigned int __ATTRS_o_ai vec_insertl(unsigned int __a, vector unsigned int __b, unsigned int __c) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vinswrx(__b, __c, __a); #else return __builtin_altivec_vinswlx(__b, __c, __a); #endif } static __inline__ vector unsigned long long __ATTRS_o_ai vec_insertl(unsigned long long __a, vector unsigned long long __b, unsigned int __c) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vinsdrx(__b, __c, __a); #else return __builtin_altivec_vinsdlx(__b, __c, __a); #endif } static __inline__ vector unsigned char __ATTRS_o_ai vec_insertl(vector unsigned char __a, vector unsigned char __b, unsigned int __c) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vinsbvrx(__b, __c, __a); #else return __builtin_altivec_vinsbvlx(__b, __c, __a); #endif } static __inline__ vector unsigned short __ATTRS_o_ai vec_insertl(vector unsigned short __a, vector unsigned short __b, unsigned int __c) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vinshvrx(__b, __c, __a); #else return __builtin_altivec_vinshvlx(__b, __c, __a); #endif } static __inline__ vector unsigned int __ATTRS_o_ai vec_insertl(vector unsigned int __a, vector unsigned int __b, unsigned int __c) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vinswvrx(__b, __c, __a); #else return __builtin_altivec_vinswvlx(__b, __c, __a); #endif } /* vec_inserth */ static __inline__ vector unsigned char __ATTRS_o_ai vec_inserth(unsigned char __a, vector unsigned char __b, unsigned int __c) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vinsblx(__b, __c, __a); #else return __builtin_altivec_vinsbrx(__b, __c, __a); #endif } static __inline__ vector unsigned short __ATTRS_o_ai vec_inserth(unsigned short __a, vector unsigned short __b, unsigned int __c) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vinshlx(__b, __c, __a); #else return __builtin_altivec_vinshrx(__b, __c, __a); #endif } static __inline__ vector unsigned int __ATTRS_o_ai vec_inserth(unsigned int __a, vector unsigned int __b, unsigned int __c) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vinswlx(__b, __c, __a); #else return __builtin_altivec_vinswrx(__b, __c, __a); #endif } static __inline__ vector unsigned long long __ATTRS_o_ai vec_inserth(unsigned long long __a, vector unsigned long long __b, unsigned int __c) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vinsdlx(__b, __c, __a); #else return __builtin_altivec_vinsdrx(__b, __c, __a); #endif } static __inline__ vector unsigned char __ATTRS_o_ai vec_inserth(vector unsigned char __a, vector unsigned char __b, unsigned int __c) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vinsbvlx(__b, __c, __a); #else return __builtin_altivec_vinsbvrx(__b, __c, __a); #endif } static __inline__ vector unsigned short __ATTRS_o_ai vec_inserth(vector unsigned short __a, vector unsigned short __b, unsigned int __c) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vinshvlx(__b, __c, __a); #else return __builtin_altivec_vinshvrx(__b, __c, __a); #endif } static __inline__ vector unsigned int __ATTRS_o_ai vec_inserth(vector unsigned int __a, vector unsigned int __b, unsigned int __c) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vinswvlx(__b, __c, __a); #else return __builtin_altivec_vinswvrx(__b, __c, __a); #endif } /* vec_extractl */ static __inline__ vector unsigned long long __ATTRS_o_ai vec_extractl( vector unsigned char __a, vector unsigned char __b, unsigned int __c) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vextdubvrx(__a, __b, __c); #else vector unsigned long long __ret = __builtin_altivec_vextdubvlx(__a, __b, __c); return vec_sld(__ret, __ret, 8); #endif } static __inline__ vector unsigned long long __ATTRS_o_ai vec_extractl( vector unsigned short __a, vector unsigned short __b, unsigned int __c) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vextduhvrx(__a, __b, __c); #else vector unsigned long long __ret = __builtin_altivec_vextduhvlx(__a, __b, __c); return vec_sld(__ret, __ret, 8); #endif } static __inline__ vector unsigned long long __ATTRS_o_ai vec_extractl( vector unsigned int __a, vector unsigned int __b, unsigned int __c) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vextduwvrx(__a, __b, __c); #else vector unsigned long long __ret = __builtin_altivec_vextduwvlx(__a, __b, __c); return vec_sld(__ret, __ret, 8); #endif } static __inline__ vector unsigned long long __ATTRS_o_ai vec_extractl(vector unsigned long long __a, vector unsigned long long __b, unsigned int __c) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vextddvrx(__a, __b, __c); #else vector unsigned long long __ret = __builtin_altivec_vextddvlx(__a, __b, __c); return vec_sld(__ret, __ret, 8); #endif } /* vec_extracth */ static __inline__ vector unsigned long long __ATTRS_o_ai vec_extracth( vector unsigned char __a, vector unsigned char __b, unsigned int __c) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vextdubvlx(__a, __b, __c); #else vector unsigned long long __ret = __builtin_altivec_vextdubvrx(__a, __b, __c); return vec_sld(__ret, __ret, 8); #endif } static __inline__ vector unsigned long long __ATTRS_o_ai vec_extracth( vector unsigned short __a, vector unsigned short __b, unsigned int __c) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vextduhvlx(__a, __b, __c); #else vector unsigned long long __ret = __builtin_altivec_vextduhvrx(__a, __b, __c); return vec_sld(__ret, __ret, 8); #endif } static __inline__ vector unsigned long long __ATTRS_o_ai vec_extracth( vector unsigned int __a, vector unsigned int __b, unsigned int __c) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vextduwvlx(__a, __b, __c); #else vector unsigned long long __ret = __builtin_altivec_vextduwvrx(__a, __b, __c); return vec_sld(__ret, __ret, 8); #endif } static __inline__ vector unsigned long long __ATTRS_o_ai vec_extracth(vector unsigned long long __a, vector unsigned long long __b, unsigned int __c) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vextddvlx(__a, __b, __c); #else vector unsigned long long __ret = __builtin_altivec_vextddvrx(__a, __b, __c); return vec_sld(__ret, __ret, 8); #endif } #ifdef __VSX__ /* vec_permx */ #define vec_permx(__a, __b, __c, __d) \ _Generic( \ (__a), vector unsigned char \ : (vector unsigned char)__builtin_vsx_xxpermx( \ (vector unsigned char)__a, (vector unsigned char)__b, __c, __d), \ vector signed char \ : (vector signed char)__builtin_vsx_xxpermx( \ (vector unsigned char)__a, (vector unsigned char)__b, __c, __d), \ vector unsigned short \ : (vector unsigned short)__builtin_vsx_xxpermx( \ (vector unsigned char)__a, (vector unsigned char)__b, __c, __d), \ vector signed short \ : (vector signed short)__builtin_vsx_xxpermx( \ (vector unsigned char)__a, (vector unsigned char)__b, __c, __d), \ vector unsigned int \ : (vector unsigned int)__builtin_vsx_xxpermx( \ (vector unsigned char)__a, (vector unsigned char)__b, __c, __d), \ vector signed int \ : (vector signed int)__builtin_vsx_xxpermx( \ (vector unsigned char)__a, (vector unsigned char)__b, __c, __d), \ vector unsigned long long \ : (vector unsigned long long)__builtin_vsx_xxpermx( \ (vector unsigned char)__a, (vector unsigned char)__b, __c, __d), \ vector signed long long \ : (vector signed long long)__builtin_vsx_xxpermx( \ (vector unsigned char)__a, (vector unsigned char)__b, __c, __d), \ vector float \ : (vector float)__builtin_vsx_xxpermx( \ (vector unsigned char)__a, (vector unsigned char)__b, __c, __d), \ vector double \ : (vector double)__builtin_vsx_xxpermx( \ (vector unsigned char)__a, (vector unsigned char)__b, __c, __d)) /* vec_blendv */ static __inline__ vector signed char __ATTRS_o_ai vec_blendv(vector signed char __a, vector signed char __b, vector unsigned char __c) { return (vector signed char)__builtin_vsx_xxblendvb( (vector unsigned char)__a, (vector unsigned char)__b, __c); } static __inline__ vector unsigned char __ATTRS_o_ai vec_blendv(vector unsigned char __a, vector unsigned char __b, vector unsigned char __c) { return __builtin_vsx_xxblendvb(__a, __b, __c); } static __inline__ vector signed short __ATTRS_o_ai vec_blendv(vector signed short __a, vector signed short __b, vector unsigned short __c) { return (vector signed short)__builtin_vsx_xxblendvh( (vector unsigned short)__a, (vector unsigned short)__b, __c); } static __inline__ vector unsigned short __ATTRS_o_ai vec_blendv(vector unsigned short __a, vector unsigned short __b, vector unsigned short __c) { return __builtin_vsx_xxblendvh(__a, __b, __c); } static __inline__ vector signed int __ATTRS_o_ai vec_blendv(vector signed int __a, vector signed int __b, vector unsigned int __c) { return (vector signed int)__builtin_vsx_xxblendvw( (vector unsigned int)__a, (vector unsigned int)__b, __c); } static __inline__ vector unsigned int __ATTRS_o_ai vec_blendv(vector unsigned int __a, vector unsigned int __b, vector unsigned int __c) { return __builtin_vsx_xxblendvw(__a, __b, __c); } static __inline__ vector signed long long __ATTRS_o_ai vec_blendv(vector signed long long __a, vector signed long long __b, vector unsigned long long __c) { return (vector signed long long)__builtin_vsx_xxblendvd( (vector unsigned long long)__a, (vector unsigned long long)__b, __c); } static __inline__ vector unsigned long long __ATTRS_o_ai vec_blendv(vector unsigned long long __a, vector unsigned long long __b, vector unsigned long long __c) { return (vector unsigned long long)__builtin_vsx_xxblendvd(__a, __b, __c); } static __inline__ vector float __ATTRS_o_ai vec_blendv(vector float __a, vector float __b, vector unsigned int __c) { return (vector float)__builtin_vsx_xxblendvw((vector unsigned int)__a, (vector unsigned int)__b, __c); } static __inline__ vector double __ATTRS_o_ai vec_blendv(vector double __a, vector double __b, vector unsigned long long __c) { return (vector double)__builtin_vsx_xxblendvd( (vector unsigned long long)__a, (vector unsigned long long)__b, __c); } #define vec_replace_unaligned(__a, __b, __c) \ _Generic((__a), vector signed int \ : __builtin_altivec_vinsw((vector unsigned char)__a, \ (unsigned int)__b, __c), \ vector unsigned int \ : __builtin_altivec_vinsw((vector unsigned char)__a, \ (unsigned int)__b, __c), \ vector unsigned long long \ : __builtin_altivec_vinsd((vector unsigned char)__a, \ (unsigned long long)__b, __c), \ vector signed long long \ : __builtin_altivec_vinsd((vector unsigned char)__a, \ (unsigned long long)__b, __c), \ vector float \ : __builtin_altivec_vinsw((vector unsigned char)__a, \ (unsigned int)__b, __c), \ vector double \ : __builtin_altivec_vinsd((vector unsigned char)__a, \ (unsigned long long)__b, __c)) #define vec_replace_elt(__a, __b, __c) \ _Generic((__a), vector signed int \ : (vector signed int)__builtin_altivec_vinsw_elt( \ (vector unsigned char)__a, (unsigned int)__b, __c), \ vector unsigned int \ : (vector unsigned int)__builtin_altivec_vinsw_elt( \ (vector unsigned char)__a, (unsigned int)__b, __c), \ vector unsigned long long \ : (vector unsigned long long)__builtin_altivec_vinsd_elt( \ (vector unsigned char)__a, (unsigned long long)__b, __c), \ vector signed long long \ : (vector signed long long)__builtin_altivec_vinsd_elt( \ (vector unsigned char)__a, (unsigned long long)__b, __c), \ vector float \ : (vector float)__builtin_altivec_vinsw_elt( \ (vector unsigned char)__a, (unsigned int)__b, __c), \ vector double \ : (vector double)__builtin_altivec_vinsd_elt( \ (vector unsigned char)__a, (unsigned long long)__b, __c)) /* vec_splati */ #define vec_splati(__a) \ _Generic((__a), signed int \ : ((vector signed int)__a), unsigned int \ : ((vector unsigned int)__a), float \ : ((vector float)__a)) /* vec_spatid */ static __inline__ vector double __ATTRS_o_ai vec_splatid(const float __a) { return ((vector double)((double)__a)); } /* vec_splati_ins */ static __inline__ vector signed int __ATTRS_o_ai vec_splati_ins( vector signed int __a, const unsigned int __b, const signed int __c) { const unsigned int __d = __b & 0x01; #ifdef __LITTLE_ENDIAN__ __a[1 - __d] = __c; __a[3 - __d] = __c; #else __a[__d] = __c; __a[2 + __d] = __c; #endif return __a; } static __inline__ vector unsigned int __ATTRS_o_ai vec_splati_ins( vector unsigned int __a, const unsigned int __b, const unsigned int __c) { const unsigned int __d = __b & 0x01; #ifdef __LITTLE_ENDIAN__ __a[1 - __d] = __c; __a[3 - __d] = __c; #else __a[__d] = __c; __a[2 + __d] = __c; #endif return __a; } static __inline__ vector float __ATTRS_o_ai vec_splati_ins(vector float __a, const unsigned int __b, const float __c) { const unsigned int __d = __b & 0x01; #ifdef __LITTLE_ENDIAN__ __a[1 - __d] = __c; __a[3 - __d] = __c; #else __a[__d] = __c; __a[2 + __d] = __c; #endif return __a; } /* vec_test_lsbb_all_ones */ static __inline__ int __ATTRS_o_ai vec_test_lsbb_all_ones(vector unsigned char __a) { return __builtin_vsx_xvtlsbb(__a, 1); } /* vec_test_lsbb_all_zeros */ static __inline__ int __ATTRS_o_ai vec_test_lsbb_all_zeros(vector unsigned char __a) { return __builtin_vsx_xvtlsbb(__a, 0); } #endif /* __VSX__ */ /* vec_stril */ static __inline__ vector unsigned char __ATTRS_o_ai vec_stril(vector unsigned char __a) { #ifdef __LITTLE_ENDIAN__ return (vector unsigned char)__builtin_altivec_vstribr( (vector unsigned char)__a); #else return (vector unsigned char)__builtin_altivec_vstribl( (vector unsigned char)__a); #endif } static __inline__ vector signed char __ATTRS_o_ai vec_stril(vector signed char __a) { #ifdef __LITTLE_ENDIAN__ return (vector signed char)__builtin_altivec_vstribr( (vector unsigned char)__a); #else return (vector signed char)__builtin_altivec_vstribl( (vector unsigned char)__a); #endif } static __inline__ vector unsigned short __ATTRS_o_ai vec_stril(vector unsigned short __a) { #ifdef __LITTLE_ENDIAN__ return (vector unsigned short)__builtin_altivec_vstrihr( (vector signed short)__a); #else return (vector unsigned short)__builtin_altivec_vstrihl( (vector signed short)__a); #endif } static __inline__ vector signed short __ATTRS_o_ai vec_stril(vector signed short __a) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vstrihr(__a); #else return __builtin_altivec_vstrihl(__a); #endif } /* vec_stril_p */ static __inline__ int __ATTRS_o_ai vec_stril_p(vector unsigned char __a) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vstribr_p(__CR6_EQ, (vector unsigned char)__a); #else return __builtin_altivec_vstribl_p(__CR6_EQ, (vector unsigned char)__a); #endif } static __inline__ int __ATTRS_o_ai vec_stril_p(vector signed char __a) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vstribr_p(__CR6_EQ, (vector unsigned char)__a); #else return __builtin_altivec_vstribl_p(__CR6_EQ, (vector unsigned char)__a); #endif } static __inline__ int __ATTRS_o_ai vec_stril_p(vector unsigned short __a) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vstrihr_p(__CR6_EQ, (vector signed short)__a); #else return __builtin_altivec_vstrihl_p(__CR6_EQ, (vector signed short)__a); #endif } static __inline__ int __ATTRS_o_ai vec_stril_p(vector signed short __a) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vstrihr_p(__CR6_EQ, __a); #else return __builtin_altivec_vstrihl_p(__CR6_EQ, __a); #endif } /* vec_strir */ static __inline__ vector unsigned char __ATTRS_o_ai vec_strir(vector unsigned char __a) { #ifdef __LITTLE_ENDIAN__ return (vector unsigned char)__builtin_altivec_vstribl( (vector unsigned char)__a); #else return (vector unsigned char)__builtin_altivec_vstribr( (vector unsigned char)__a); #endif } static __inline__ vector signed char __ATTRS_o_ai vec_strir(vector signed char __a) { #ifdef __LITTLE_ENDIAN__ return (vector signed char)__builtin_altivec_vstribl( (vector unsigned char)__a); #else return (vector signed char)__builtin_altivec_vstribr( (vector unsigned char)__a); #endif } static __inline__ vector unsigned short __ATTRS_o_ai vec_strir(vector unsigned short __a) { #ifdef __LITTLE_ENDIAN__ return (vector unsigned short)__builtin_altivec_vstrihl( (vector signed short)__a); #else return (vector unsigned short)__builtin_altivec_vstrihr( (vector signed short)__a); #endif } static __inline__ vector signed short __ATTRS_o_ai vec_strir(vector signed short __a) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vstrihl(__a); #else return __builtin_altivec_vstrihr(__a); #endif } /* vec_strir_p */ static __inline__ int __ATTRS_o_ai vec_strir_p(vector unsigned char __a) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vstribl_p(__CR6_EQ, (vector unsigned char)__a); #else return __builtin_altivec_vstribr_p(__CR6_EQ, (vector unsigned char)__a); #endif } static __inline__ int __ATTRS_o_ai vec_strir_p(vector signed char __a) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vstribl_p(__CR6_EQ, (vector unsigned char)__a); #else return __builtin_altivec_vstribr_p(__CR6_EQ, (vector unsigned char)__a); #endif } static __inline__ int __ATTRS_o_ai vec_strir_p(vector unsigned short __a) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vstrihl_p(__CR6_EQ, (vector signed short)__a); #else return __builtin_altivec_vstrihr_p(__CR6_EQ, (vector signed short)__a); #endif } static __inline__ int __ATTRS_o_ai vec_strir_p(vector signed short __a) { #ifdef __LITTLE_ENDIAN__ return __builtin_altivec_vstrihl_p(__CR6_EQ, __a); #else return __builtin_altivec_vstrihr_p(__CR6_EQ, __a); #endif } /* vs[l | r | ra] */ #ifdef __SIZEOF_INT128__ static __inline__ vector unsigned __int128 __ATTRS_o_ai vec_sl(vector unsigned __int128 __a, vector unsigned __int128 __b) { return __a << (__b % (vector unsigned __int128)(sizeof(unsigned __int128) * __CHAR_BIT__)); } static __inline__ vector signed __int128 __ATTRS_o_ai vec_sl(vector signed __int128 __a, vector unsigned __int128 __b) { return __a << (__b % (vector unsigned __int128)(sizeof(unsigned __int128) * __CHAR_BIT__)); } static __inline__ vector unsigned __int128 __ATTRS_o_ai vec_sr(vector unsigned __int128 __a, vector unsigned __int128 __b) { return __a >> (__b % (vector unsigned __int128)(sizeof(unsigned __int128) * __CHAR_BIT__)); } static __inline__ vector signed __int128 __ATTRS_o_ai vec_sr(vector signed __int128 __a, vector unsigned __int128 __b) { return ( vector signed __int128)(((vector unsigned __int128)__a) >> (__b % (vector unsigned __int128)(sizeof( unsigned __int128) * __CHAR_BIT__))); } static __inline__ vector unsigned __int128 __ATTRS_o_ai vec_sra(vector unsigned __int128 __a, vector unsigned __int128 __b) { return ( vector unsigned __int128)(((vector signed __int128)__a) >> (__b % (vector unsigned __int128)(sizeof( unsigned __int128) * __CHAR_BIT__))); } static __inline__ vector signed __int128 __ATTRS_o_ai vec_sra(vector signed __int128 __a, vector unsigned __int128 __b) { return __a >> (__b % (vector unsigned __int128)(sizeof(unsigned __int128) * __CHAR_BIT__)); } #endif /* __SIZEOF_INT128__ */ #endif /* __POWER10_VECTOR__ */ #ifdef __POWER8_VECTOR__ #define __bcdadd(__a, __b, __ps) __builtin_ppc_bcdadd((__a), (__b), (__ps)) #define __bcdsub(__a, __b, __ps) __builtin_ppc_bcdsub((__a), (__b), (__ps)) static __inline__ long __bcdadd_ofl(vector unsigned char __a, vector unsigned char __b) { return __builtin_ppc_bcdadd_p(__CR6_SO, __a, __b); } static __inline__ long __bcdsub_ofl(vector unsigned char __a, vector unsigned char __b) { return __builtin_ppc_bcdsub_p(__CR6_SO, __a, __b); } static __inline__ long __bcd_invalid(vector unsigned char __a) { return __builtin_ppc_bcdsub_p(__CR6_SO, __a, __a); } static __inline__ long __bcdcmpeq(vector unsigned char __a, vector unsigned char __b) { return __builtin_ppc_bcdsub_p(__CR6_EQ, __a, __b); } static __inline__ long __bcdcmplt(vector unsigned char __a, vector unsigned char __b) { return __builtin_ppc_bcdsub_p(__CR6_LT, __a, __b); } static __inline__ long __bcdcmpgt(vector unsigned char __a, vector unsigned char __b) { return __builtin_ppc_bcdsub_p(__CR6_GT, __a, __b); } static __inline__ long __bcdcmple(vector unsigned char __a, vector unsigned char __b) { return __builtin_ppc_bcdsub_p(__CR6_GT_REV, __a, __b); } static __inline__ long __bcdcmpge(vector unsigned char __a, vector unsigned char __b) { return __builtin_ppc_bcdsub_p(__CR6_LT_REV, __a, __b); } #endif // __POWER8_VECTOR__ #undef __ATTRS_o_ai #endif /* __ALTIVEC_H */ arm_bf16.havx512vbmivlintrin.h/*===------------- avx512vbmivlintrin.h - VBMI intrinsics ------------------=== * * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __VBMIVLINTRIN_H #define __VBMIVLINTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS128 \ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512vbmi,avx512vl,no-evex512"), \ __min_vector_width__(128))) #define __DEFAULT_FN_ATTRS256 \ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512vbmi,avx512vl,no-evex512"), \ __min_vector_width__(256))) static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_permutex2var_epi8(__m128i __A, __m128i __I, __m128i __B) { return (__m128i)__builtin_ia32_vpermi2varqi128((__v16qi)__A, (__v16qi)__I, (__v16qi)__B); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_permutex2var_epi8(__m128i __A, __mmask16 __U, __m128i __I, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128(__U, (__v16qi)_mm_permutex2var_epi8(__A, __I, __B), (__v16qi)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask2_permutex2var_epi8(__m128i __A, __m128i __I, __mmask16 __U, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128(__U, (__v16qi)_mm_permutex2var_epi8(__A, __I, __B), (__v16qi)__I); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_permutex2var_epi8(__mmask16 __U, __m128i __A, __m128i __I, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128(__U, (__v16qi)_mm_permutex2var_epi8(__A, __I, __B), (__v16qi)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_permutex2var_epi8(__m256i __A, __m256i __I, __m256i __B) { return (__m256i)__builtin_ia32_vpermi2varqi256((__v32qi)__A, (__v32qi)__I, (__v32qi)__B); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_permutex2var_epi8(__m256i __A, __mmask32 __U, __m256i __I, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256(__U, (__v32qi)_mm256_permutex2var_epi8(__A, __I, __B), (__v32qi)__A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask2_permutex2var_epi8(__m256i __A, __m256i __I, __mmask32 __U, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256(__U, (__v32qi)_mm256_permutex2var_epi8(__A, __I, __B), (__v32qi)__I); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_permutex2var_epi8(__mmask32 __U, __m256i __A, __m256i __I, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256(__U, (__v32qi)_mm256_permutex2var_epi8(__A, __I, __B), (__v32qi)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_permutexvar_epi8 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_permvarqi128((__v16qi)__B, (__v16qi)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_permutexvar_epi8 (__mmask16 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, (__v16qi)_mm_permutexvar_epi8(__A, __B), (__v16qi)_mm_setzero_si128()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_permutexvar_epi8 (__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, (__v16qi)_mm_permutexvar_epi8(__A, __B), (__v16qi)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_permutexvar_epi8 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_permvarqi256((__v32qi) __B, (__v32qi) __A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_permutexvar_epi8 (__mmask32 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, (__v32qi)_mm256_permutexvar_epi8(__A, __B), (__v32qi)_mm256_setzero_si256()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_permutexvar_epi8 (__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, (__v32qi)_mm256_permutexvar_epi8(__A, __B), (__v32qi)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_multishift_epi64_epi8(__m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_vpmultishiftqb128((__v16qi)__X, (__v16qi)__Y); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_multishift_epi64_epi8(__m128i __W, __mmask16 __M, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, (__v16qi)_mm_multishift_epi64_epi8(__X, __Y), (__v16qi)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_multishift_epi64_epi8(__mmask16 __M, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, (__v16qi)_mm_multishift_epi64_epi8(__X, __Y), (__v16qi)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_multishift_epi64_epi8(__m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_vpmultishiftqb256((__v32qi)__X, (__v32qi)__Y); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_multishift_epi64_epi8(__m256i __W, __mmask32 __M, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, (__v32qi)_mm256_multishift_epi64_epi8(__X, __Y), (__v32qi)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_multishift_epi64_epi8(__mmask32 __M, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, (__v32qi)_mm256_multishift_epi64_epi8(__X, __Y), (__v32qi)_mm256_setzero_si256()); } #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 #endif avx512vlfp16intrin.h/*===------------- avx512vlvbmi2intrin.h - VBMI2 intrinsics -----------------=== * * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __AVX512VLVBMI2INTRIN_H #define __AVX512VLVBMI2INTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS128 \ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512vl,avx512vbmi2,no-evex512"), \ __min_vector_width__(128))) #define __DEFAULT_FN_ATTRS256 \ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512vl,avx512vbmi2,no-evex512"), \ __min_vector_width__(256))) static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_compress_epi16(__m128i __S, __mmask8 __U, __m128i __D) { return (__m128i) __builtin_ia32_compresshi128_mask ((__v8hi) __D, (__v8hi) __S, __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_compress_epi16(__mmask8 __U, __m128i __D) { return (__m128i) __builtin_ia32_compresshi128_mask ((__v8hi) __D, (__v8hi) _mm_setzero_si128(), __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_compress_epi8(__m128i __S, __mmask16 __U, __m128i __D) { return (__m128i) __builtin_ia32_compressqi128_mask ((__v16qi) __D, (__v16qi) __S, __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_compress_epi8(__mmask16 __U, __m128i __D) { return (__m128i) __builtin_ia32_compressqi128_mask ((__v16qi) __D, (__v16qi) _mm_setzero_si128(), __U); } static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_compressstoreu_epi16(void *__P, __mmask8 __U, __m128i __D) { __builtin_ia32_compressstorehi128_mask ((__v8hi *) __P, (__v8hi) __D, __U); } static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_compressstoreu_epi8(void *__P, __mmask16 __U, __m128i __D) { __builtin_ia32_compressstoreqi128_mask ((__v16qi *) __P, (__v16qi) __D, __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_expand_epi16(__m128i __S, __mmask8 __U, __m128i __D) { return (__m128i) __builtin_ia32_expandhi128_mask ((__v8hi) __D, (__v8hi) __S, __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_expand_epi16(__mmask8 __U, __m128i __D) { return (__m128i) __builtin_ia32_expandhi128_mask ((__v8hi) __D, (__v8hi) _mm_setzero_si128(), __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_expand_epi8(__m128i __S, __mmask16 __U, __m128i __D) { return (__m128i) __builtin_ia32_expandqi128_mask ((__v16qi) __D, (__v16qi) __S, __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_expand_epi8(__mmask16 __U, __m128i __D) { return (__m128i) __builtin_ia32_expandqi128_mask ((__v16qi) __D, (__v16qi) _mm_setzero_si128(), __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_expandloadu_epi16(__m128i __S, __mmask8 __U, void const *__P) { return (__m128i) __builtin_ia32_expandloadhi128_mask ((const __v8hi *)__P, (__v8hi) __S, __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_expandloadu_epi16(__mmask8 __U, void const *__P) { return (__m128i) __builtin_ia32_expandloadhi128_mask ((const __v8hi *)__P, (__v8hi) _mm_setzero_si128(), __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_expandloadu_epi8(__m128i __S, __mmask16 __U, void const *__P) { return (__m128i) __builtin_ia32_expandloadqi128_mask ((const __v16qi *)__P, (__v16qi) __S, __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_expandloadu_epi8(__mmask16 __U, void const *__P) { return (__m128i) __builtin_ia32_expandloadqi128_mask ((const __v16qi *)__P, (__v16qi) _mm_setzero_si128(), __U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_compress_epi16(__m256i __S, __mmask16 __U, __m256i __D) { return (__m256i) __builtin_ia32_compresshi256_mask ((__v16hi) __D, (__v16hi) __S, __U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_compress_epi16(__mmask16 __U, __m256i __D) { return (__m256i) __builtin_ia32_compresshi256_mask ((__v16hi) __D, (__v16hi) _mm256_setzero_si256(), __U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_compress_epi8(__m256i __S, __mmask32 __U, __m256i __D) { return (__m256i) __builtin_ia32_compressqi256_mask ((__v32qi) __D, (__v32qi) __S, __U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_compress_epi8(__mmask32 __U, __m256i __D) { return (__m256i) __builtin_ia32_compressqi256_mask ((__v32qi) __D, (__v32qi) _mm256_setzero_si256(), __U); } static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_compressstoreu_epi16(void *__P, __mmask16 __U, __m256i __D) { __builtin_ia32_compressstorehi256_mask ((__v16hi *) __P, (__v16hi) __D, __U); } static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_compressstoreu_epi8(void *__P, __mmask32 __U, __m256i __D) { __builtin_ia32_compressstoreqi256_mask ((__v32qi *) __P, (__v32qi) __D, __U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_expand_epi16(__m256i __S, __mmask16 __U, __m256i __D) { return (__m256i) __builtin_ia32_expandhi256_mask ((__v16hi) __D, (__v16hi) __S, __U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_expand_epi16(__mmask16 __U, __m256i __D) { return (__m256i) __builtin_ia32_expandhi256_mask ((__v16hi) __D, (__v16hi) _mm256_setzero_si256(), __U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_expand_epi8(__m256i __S, __mmask32 __U, __m256i __D) { return (__m256i) __builtin_ia32_expandqi256_mask ((__v32qi) __D, (__v32qi) __S, __U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_expand_epi8(__mmask32 __U, __m256i __D) { return (__m256i) __builtin_ia32_expandqi256_mask ((__v32qi) __D, (__v32qi) _mm256_setzero_si256(), __U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_expandloadu_epi16(__m256i __S, __mmask16 __U, void const *__P) { return (__m256i) __builtin_ia32_expandloadhi256_mask ((const __v16hi *)__P, (__v16hi) __S, __U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_expandloadu_epi16(__mmask16 __U, void const *__P) { return (__m256i) __builtin_ia32_expandloadhi256_mask ((const __v16hi *)__P, (__v16hi) _mm256_setzero_si256(), __U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_expandloadu_epi8(__m256i __S, __mmask32 __U, void const *__P) { return (__m256i) __builtin_ia32_expandloadqi256_mask ((const __v32qi *)__P, (__v32qi) __S, __U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P) { return (__m256i) __builtin_ia32_expandloadqi256_mask ((const __v32qi *)__P, (__v32qi) _mm256_setzero_si256(), __U); } #define _mm256_shldi_epi64(A, B, I) \ ((__m256i)__builtin_ia32_vpshldq256((__v4di)(__m256i)(A), \ (__v4di)(__m256i)(B), (int)(I))) #define _mm256_mask_shldi_epi64(S, U, A, B, I) \ ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ (__v4di)_mm256_shldi_epi64((A), (B), (I)), \ (__v4di)(__m256i)(S))) #define _mm256_maskz_shldi_epi64(U, A, B, I) \ ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ (__v4di)_mm256_shldi_epi64((A), (B), (I)), \ (__v4di)_mm256_setzero_si256())) #define _mm_shldi_epi64(A, B, I) \ ((__m128i)__builtin_ia32_vpshldq128((__v2di)(__m128i)(A), \ (__v2di)(__m128i)(B), (int)(I))) #define _mm_mask_shldi_epi64(S, U, A, B, I) \ ((__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ (__v2di)_mm_shldi_epi64((A), (B), (I)), \ (__v2di)(__m128i)(S))) #define _mm_maskz_shldi_epi64(U, A, B, I) \ ((__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ (__v2di)_mm_shldi_epi64((A), (B), (I)), \ (__v2di)_mm_setzero_si128())) #define _mm256_shldi_epi32(A, B, I) \ ((__m256i)__builtin_ia32_vpshldd256((__v8si)(__m256i)(A), \ (__v8si)(__m256i)(B), (int)(I))) #define _mm256_mask_shldi_epi32(S, U, A, B, I) \ ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ (__v8si)_mm256_shldi_epi32((A), (B), (I)), \ (__v8si)(__m256i)(S))) #define _mm256_maskz_shldi_epi32(U, A, B, I) \ ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ (__v8si)_mm256_shldi_epi32((A), (B), (I)), \ (__v8si)_mm256_setzero_si256())) #define _mm_shldi_epi32(A, B, I) \ ((__m128i)__builtin_ia32_vpshldd128((__v4si)(__m128i)(A), \ (__v4si)(__m128i)(B), (int)(I))) #define _mm_mask_shldi_epi32(S, U, A, B, I) \ ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ (__v4si)_mm_shldi_epi32((A), (B), (I)), \ (__v4si)(__m128i)(S))) #define _mm_maskz_shldi_epi32(U, A, B, I) \ ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ (__v4si)_mm_shldi_epi32((A), (B), (I)), \ (__v4si)_mm_setzero_si128())) #define _mm256_shldi_epi16(A, B, I) \ ((__m256i)__builtin_ia32_vpshldw256((__v16hi)(__m256i)(A), \ (__v16hi)(__m256i)(B), (int)(I))) #define _mm256_mask_shldi_epi16(S, U, A, B, I) \ ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ (__v16hi)_mm256_shldi_epi16((A), (B), (I)), \ (__v16hi)(__m256i)(S))) #define _mm256_maskz_shldi_epi16(U, A, B, I) \ ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ (__v16hi)_mm256_shldi_epi16((A), (B), (I)), \ (__v16hi)_mm256_setzero_si256())) #define _mm_shldi_epi16(A, B, I) \ ((__m128i)__builtin_ia32_vpshldw128((__v8hi)(__m128i)(A), \ (__v8hi)(__m128i)(B), (int)(I))) #define _mm_mask_shldi_epi16(S, U, A, B, I) \ ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ (__v8hi)_mm_shldi_epi16((A), (B), (I)), \ (__v8hi)(__m128i)(S))) #define _mm_maskz_shldi_epi16(U, A, B, I) \ ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ (__v8hi)_mm_shldi_epi16((A), (B), (I)), \ (__v8hi)_mm_setzero_si128())) #define _mm256_shrdi_epi64(A, B, I) \ ((__m256i)__builtin_ia32_vpshrdq256((__v4di)(__m256i)(A), \ (__v4di)(__m256i)(B), (int)(I))) #define _mm256_mask_shrdi_epi64(S, U, A, B, I) \ ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ (__v4di)_mm256_shrdi_epi64((A), (B), (I)), \ (__v4di)(__m256i)(S))) #define _mm256_maskz_shrdi_epi64(U, A, B, I) \ ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ (__v4di)_mm256_shrdi_epi64((A), (B), (I)), \ (__v4di)_mm256_setzero_si256())) #define _mm_shrdi_epi64(A, B, I) \ ((__m128i)__builtin_ia32_vpshrdq128((__v2di)(__m128i)(A), \ (__v2di)(__m128i)(B), (int)(I))) #define _mm_mask_shrdi_epi64(S, U, A, B, I) \ ((__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ (__v2di)_mm_shrdi_epi64((A), (B), (I)), \ (__v2di)(__m128i)(S))) #define _mm_maskz_shrdi_epi64(U, A, B, I) \ ((__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ (__v2di)_mm_shrdi_epi64((A), (B), (I)), \ (__v2di)_mm_setzero_si128())) #define _mm256_shrdi_epi32(A, B, I) \ ((__m256i)__builtin_ia32_vpshrdd256((__v8si)(__m256i)(A), \ (__v8si)(__m256i)(B), (int)(I))) #define _mm256_mask_shrdi_epi32(S, U, A, B, I) \ ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ (__v8si)_mm256_shrdi_epi32((A), (B), (I)), \ (__v8si)(__m256i)(S))) #define _mm256_maskz_shrdi_epi32(U, A, B, I) \ ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ (__v8si)_mm256_shrdi_epi32((A), (B), (I)), \ (__v8si)_mm256_setzero_si256())) #define _mm_shrdi_epi32(A, B, I) \ ((__m128i)__builtin_ia32_vpshrdd128((__v4si)(__m128i)(A), \ (__v4si)(__m128i)(B), (int)(I))) #define _mm_mask_shrdi_epi32(S, U, A, B, I) \ ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ (__v4si)_mm_shrdi_epi32((A), (B), (I)), \ (__v4si)(__m128i)(S))) #define _mm_maskz_shrdi_epi32(U, A, B, I) \ ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ (__v4si)_mm_shrdi_epi32((A), (B), (I)), \ (__v4si)_mm_setzero_si128())) #define _mm256_shrdi_epi16(A, B, I) \ ((__m256i)__builtin_ia32_vpshrdw256((__v16hi)(__m256i)(A), \ (__v16hi)(__m256i)(B), (int)(I))) #define _mm256_mask_shrdi_epi16(S, U, A, B, I) \ ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ (__v16hi)_mm256_shrdi_epi16((A), (B), (I)), \ (__v16hi)(__m256i)(S))) #define _mm256_maskz_shrdi_epi16(U, A, B, I) \ ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ (__v16hi)_mm256_shrdi_epi16((A), (B), (I)), \ (__v16hi)_mm256_setzero_si256())) #define _mm_shrdi_epi16(A, B, I) \ ((__m128i)__builtin_ia32_vpshrdw128((__v8hi)(__m128i)(A), \ (__v8hi)(__m128i)(B), (int)(I))) #define _mm_mask_shrdi_epi16(S, U, A, B, I) \ ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ (__v8hi)_mm_shrdi_epi16((A), (B), (I)), \ (__v8hi)(__m128i)(S))) #define _mm_maskz_shrdi_epi16(U, A, B, I) \ ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ (__v8hi)_mm_shrdi_epi16((A), (B), (I)), \ (__v8hi)_mm_setzero_si128())) static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_shldv_epi64(__m256i __A, __m256i __B, __m256i __C) { return (__m256i)__builtin_ia32_vpshldvq256((__v4di)__A, (__v4di)__B, (__v4di)__C); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_shldv_epi64(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) { return (__m256i)__builtin_ia32_selectq_256(__U, (__v4di)_mm256_shldv_epi64(__A, __B, __C), (__v4di)__A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_shldv_epi64(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) { return (__m256i)__builtin_ia32_selectq_256(__U, (__v4di)_mm256_shldv_epi64(__A, __B, __C), (__v4di)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_shldv_epi64(__m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_vpshldvq128((__v2di)__A, (__v2di)__B, (__v2di)__C); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_shldv_epi64(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_selectq_128(__U, (__v2di)_mm_shldv_epi64(__A, __B, __C), (__v2di)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_shldv_epi64(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_selectq_128(__U, (__v2di)_mm_shldv_epi64(__A, __B, __C), (__v2di)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_shldv_epi32(__m256i __A, __m256i __B, __m256i __C) { return (__m256i)__builtin_ia32_vpshldvd256((__v8si)__A, (__v8si)__B, (__v8si)__C); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_shldv_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) { return (__m256i)__builtin_ia32_selectd_256(__U, (__v8si)_mm256_shldv_epi32(__A, __B, __C), (__v8si)__A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_shldv_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) { return (__m256i)__builtin_ia32_selectd_256(__U, (__v8si)_mm256_shldv_epi32(__A, __B, __C), (__v8si)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_shldv_epi32(__m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_vpshldvd128((__v4si)__A, (__v4si)__B, (__v4si)__C); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_shldv_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_selectd_128(__U, (__v4si)_mm_shldv_epi32(__A, __B, __C), (__v4si)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_shldv_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_selectd_128(__U, (__v4si)_mm_shldv_epi32(__A, __B, __C), (__v4si)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_shldv_epi16(__m256i __A, __m256i __B, __m256i __C) { return (__m256i)__builtin_ia32_vpshldvw256((__v16hi)__A, (__v16hi)__B, (__v16hi)__C); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_shldv_epi16(__m256i __A, __mmask16 __U, __m256i __B, __m256i __C) { return (__m256i)__builtin_ia32_selectw_256(__U, (__v16hi)_mm256_shldv_epi16(__A, __B, __C), (__v16hi)__A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_shldv_epi16(__mmask16 __U, __m256i __A, __m256i __B, __m256i __C) { return (__m256i)__builtin_ia32_selectw_256(__U, (__v16hi)_mm256_shldv_epi16(__A, __B, __C), (__v16hi)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_shldv_epi16(__m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_vpshldvw128((__v8hi)__A, (__v8hi)__B, (__v8hi)__C); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_shldv_epi16(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_selectw_128(__U, (__v8hi)_mm_shldv_epi16(__A, __B, __C), (__v8hi)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_shldv_epi16(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_selectw_128(__U, (__v8hi)_mm_shldv_epi16(__A, __B, __C), (__v8hi)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_shrdv_epi64(__m256i __A, __m256i __B, __m256i __C) { return (__m256i)__builtin_ia32_vpshrdvq256((__v4di)__A, (__v4di)__B, (__v4di)__C); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_shrdv_epi64(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) { return (__m256i)__builtin_ia32_selectq_256(__U, (__v4di)_mm256_shrdv_epi64(__A, __B, __C), (__v4di)__A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_shrdv_epi64(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) { return (__m256i)__builtin_ia32_selectq_256(__U, (__v4di)_mm256_shrdv_epi64(__A, __B, __C), (__v4di)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_shrdv_epi64(__m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_vpshrdvq128((__v2di)__A, (__v2di)__B, (__v2di)__C); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_shrdv_epi64(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_selectq_128(__U, (__v2di)_mm_shrdv_epi64(__A, __B, __C), (__v2di)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_shrdv_epi64(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_selectq_128(__U, (__v2di)_mm_shrdv_epi64(__A, __B, __C), (__v2di)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_shrdv_epi32(__m256i __A, __m256i __B, __m256i __C) { return (__m256i)__builtin_ia32_vpshrdvd256((__v8si)__A, (__v8si)__B, (__v8si)__C); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_shrdv_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) { return (__m256i)__builtin_ia32_selectd_256(__U, (__v8si)_mm256_shrdv_epi32(__A, __B, __C), (__v8si)__A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_shrdv_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) { return (__m256i)__builtin_ia32_selectd_256(__U, (__v8si)_mm256_shrdv_epi32(__A, __B, __C), (__v8si)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_shrdv_epi32(__m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_vpshrdvd128((__v4si)__A, (__v4si)__B, (__v4si)__C); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_shrdv_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_selectd_128(__U, (__v4si)_mm_shrdv_epi32(__A, __B, __C), (__v4si)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_shrdv_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_selectd_128(__U, (__v4si)_mm_shrdv_epi32(__A, __B, __C), (__v4si)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_shrdv_epi16(__m256i __A, __m256i __B, __m256i __C) { return (__m256i)__builtin_ia32_vpshrdvw256((__v16hi)__A, (__v16hi)__B, (__v16hi)__C); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_shrdv_epi16(__m256i __A, __mmask16 __U, __m256i __B, __m256i __C) { return (__m256i)__builtin_ia32_selectw_256(__U, (__v16hi)_mm256_shrdv_epi16(__A, __B, __C), (__v16hi)__A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_shrdv_epi16(__mmask16 __U, __m256i __A, __m256i __B, __m256i __C) { return (__m256i)__builtin_ia32_selectw_256(__U, (__v16hi)_mm256_shrdv_epi16(__A, __B, __C), (__v16hi)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_shrdv_epi16(__m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_vpshrdvw128((__v8hi)__A, (__v8hi)__B, (__v8hi)__C); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_shrdv_epi16(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_selectw_128(__U, (__v8hi)_mm_shrdv_epi16(__A, __B, __C), (__v8hi)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_shrdv_epi16(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_selectw_128(__U, (__v8hi)_mm_shrdv_epi16(__A, __B, __C), (__v8hi)_mm_setzero_si128()); } #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 #endif avx512vpopcntdqvlintrin.hbmiintrin.h/*===---- clflushoptintrin.h - CLFLUSHOPT intrinsic ------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __CLFLUSHOPTINTRIN_H #define __CLFLUSHOPTINTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("clflushopt"))) /// Invalidates all levels of the cache hierarchy and flushes modified data to /// memory for the cache line specified by the address \a __m. /// /// \headerfile /// /// This intrinsic corresponds to the \c CLFLUSHOPT instruction. /// /// \param __m /// An address within the cache line to flush and invalidate. static __inline__ void __DEFAULT_FN_ATTRS _mm_clflushopt(void const * __m) { __builtin_ia32_clflushopt(__m); } #undef __DEFAULT_FN_ATTRS #endif //===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // Automatically generated file, do not edit! //===----------------------------------------------------------------------===// #ifndef _HVX_HEXAGON_PROTOS_H_ #define _HVX_HEXAGON_PROTOS_H_ 1 #ifdef __HVX__ #if __HVX_LENGTH__ == 128 #define __BUILTIN_VECTOR_WRAP(a) a ## _128B #else #define __BUILTIN_VECTOR_WRAP(a) a #endif #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Rd32=vextract(Vu32,Rs32) C Intrinsic Prototype: Word32 Q6_R_vextract_VR(HVX_Vector Vu, Word32 Rs) Instruction Type: LD Execution Slots: SLOT0 ========================================================================== */ #define Q6_R_vextract_VR(Vu,Rs) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_extractw)(Vu,Rs) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32=hi(Vss32) C Intrinsic Prototype: HVX_Vector Q6_V_hi_W(HVX_VectorPair Vss) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_V_hi_W(Vss) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_hi)(Vss) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32=lo(Vss32) C Intrinsic Prototype: HVX_Vector Q6_V_lo_W(HVX_VectorPair Vss) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_V_lo_W(Vss) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_lo)(Vss) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32=vsplat(Rt32) C Intrinsic Prototype: HVX_Vector Q6_V_vsplat_R(Word32 Rt) Instruction Type: CVI_VX_LATE Execution Slots: SLOT23 ========================================================================== */ #define Q6_V_vsplat_R(Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_lvsplatw)(Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qd4=and(Qs4,Qt4) C Intrinsic Prototype: HVX_VectorPred Q6_Q_and_QQ(HVX_VectorPred Qs, HVX_VectorPred Qt) Instruction Type: CVI_VA_DV Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_and_QQ(Qs,Qt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qt),-1))),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qd4=and(Qs4,!Qt4) C Intrinsic Prototype: HVX_VectorPred Q6_Q_and_QQn(HVX_VectorPred Qs, HVX_VectorPred Qt) Instruction Type: CVI_VA_DV Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_and_QQn(Qs,Qt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_and_n)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qt),-1))),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qd4=not(Qs4) C Intrinsic Prototype: HVX_VectorPred Q6_Q_not_Q(HVX_VectorPred Qs) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_not_Q(Qs) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_not)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1))),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qd4=or(Qs4,Qt4) C Intrinsic Prototype: HVX_VectorPred Q6_Q_or_QQ(HVX_VectorPred Qs, HVX_VectorPred Qt) Instruction Type: CVI_VA_DV Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_or_QQ(Qs,Qt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qt),-1))),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qd4=or(Qs4,!Qt4) C Intrinsic Prototype: HVX_VectorPred Q6_Q_or_QQn(HVX_VectorPred Qs, HVX_VectorPred Qt) Instruction Type: CVI_VA_DV Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_or_QQn(Qs,Qt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_or_n)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qt),-1))),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qd4=vsetq(Rt32) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vsetq_R(Word32 Rt) Instruction Type: CVI_VP Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vsetq_R(Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_scalar2)(Rt)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qd4=xor(Qs4,Qt4) C Intrinsic Prototype: HVX_VectorPred Q6_Q_xor_QQ(HVX_VectorPred Qs, HVX_VectorPred Qt) Instruction Type: CVI_VA_DV Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_xor_QQ(Qs,Qt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qt),-1))),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: if (!Qv4) vmem(Rt32+#s4)=Vs32 C Intrinsic Prototype: void Q6_vmem_QnRIV(HVX_VectorPred Qv, HVX_Vector* Rt, HVX_Vector Vs) Instruction Type: CVI_VM_ST Execution Slots: SLOT0 ========================================================================== */ #define Q6_vmem_QnRIV(Qv,Rt,Vs) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vS32b_nqpred_ai)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Rt,Vs) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: if (!Qv4) vmem(Rt32+#s4):nt=Vs32 C Intrinsic Prototype: void Q6_vmem_QnRIV_nt(HVX_VectorPred Qv, HVX_Vector* Rt, HVX_Vector Vs) Instruction Type: CVI_VM_ST Execution Slots: SLOT0 ========================================================================== */ #define Q6_vmem_QnRIV_nt(Qv,Rt,Vs) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vS32b_nt_nqpred_ai)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Rt,Vs) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: if (Qv4) vmem(Rt32+#s4):nt=Vs32 C Intrinsic Prototype: void Q6_vmem_QRIV_nt(HVX_VectorPred Qv, HVX_Vector* Rt, HVX_Vector Vs) Instruction Type: CVI_VM_ST Execution Slots: SLOT0 ========================================================================== */ #define Q6_vmem_QRIV_nt(Qv,Rt,Vs) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vS32b_nt_qpred_ai)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Rt,Vs) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: if (Qv4) vmem(Rt32+#s4)=Vs32 C Intrinsic Prototype: void Q6_vmem_QRIV(HVX_VectorPred Qv, HVX_Vector* Rt, HVX_Vector Vs) Instruction Type: CVI_VM_ST Execution Slots: SLOT0 ========================================================================== */ #define Q6_vmem_QRIV(Qv,Rt,Vs) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vS32b_qpred_ai)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Rt,Vs) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.uh=vabsdiff(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_Vector Q6_Vuh_vabsdiff_VhVh(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vuh_vabsdiff_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsdiffh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.ub=vabsdiff(Vu32.ub,Vv32.ub) C Intrinsic Prototype: HVX_Vector Q6_Vub_vabsdiff_VubVub(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vub_vabsdiff_VubVub(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsdiffub)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.uh=vabsdiff(Vu32.uh,Vv32.uh) C Intrinsic Prototype: HVX_Vector Q6_Vuh_vabsdiff_VuhVuh(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vuh_vabsdiff_VuhVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsdiffuh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.uw=vabsdiff(Vu32.w,Vv32.w) C Intrinsic Prototype: HVX_Vector Q6_Vuw_vabsdiff_VwVw(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vuw_vabsdiff_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsdiffw)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vabs(Vu32.h) C Intrinsic Prototype: HVX_Vector Q6_Vh_vabs_Vh(HVX_Vector Vu) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_vabs_Vh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsh)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vabs(Vu32.h):sat C Intrinsic Prototype: HVX_Vector Q6_Vh_vabs_Vh_sat(HVX_Vector Vu) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_vabs_Vh_sat(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsh_sat)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vabs(Vu32.w) C Intrinsic Prototype: HVX_Vector Q6_Vw_vabs_Vw(HVX_Vector Vu) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vw_vabs_Vw(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsw)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vabs(Vu32.w):sat C Intrinsic Prototype: HVX_Vector Q6_Vw_vabs_Vw_sat(HVX_Vector Vu) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vw_vabs_Vw_sat(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsw_sat)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.b=vadd(Vu32.b,Vv32.b) C Intrinsic Prototype: HVX_Vector Q6_Vb_vadd_VbVb(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vb_vadd_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddb)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.b=vadd(Vuu32.b,Vvv32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Wb_vadd_WbWb(HVX_VectorPair Vuu, HVX_VectorPair Vvv) Instruction Type: CVI_VA_DV Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Wb_vadd_WbWb(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddb_dv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: if (!Qv4) Vx32.b+=Vu32.b C Intrinsic Prototype: HVX_Vector Q6_Vb_condacc_QnVbVb(HVX_VectorPred Qv, HVX_Vector Vx, HVX_Vector Vu) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vb_condacc_QnVbVb(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddbnq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: if (Qv4) Vx32.b+=Vu32.b C Intrinsic Prototype: HVX_Vector Q6_Vb_condacc_QVbVb(HVX_VectorPred Qv, HVX_Vector Vx, HVX_Vector Vu) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vb_condacc_QVbVb(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddbq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vadd(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_Vector Q6_Vh_vadd_VhVh(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_vadd_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.h=vadd(Vuu32.h,Vvv32.h) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vadd_WhWh(HVX_VectorPair Vuu, HVX_VectorPair Vvv) Instruction Type: CVI_VA_DV Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Wh_vadd_WhWh(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddh_dv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: if (!Qv4) Vx32.h+=Vu32.h C Intrinsic Prototype: HVX_Vector Q6_Vh_condacc_QnVhVh(HVX_VectorPred Qv, HVX_Vector Vx, HVX_Vector Vu) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_condacc_QnVhVh(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddhnq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: if (Qv4) Vx32.h+=Vu32.h C Intrinsic Prototype: HVX_Vector Q6_Vh_condacc_QVhVh(HVX_VectorPred Qv, HVX_Vector Vx, HVX_Vector Vu) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_condacc_QVhVh(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddhq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vadd(Vu32.h,Vv32.h):sat C Intrinsic Prototype: HVX_Vector Q6_Vh_vadd_VhVh_sat(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_vadd_VhVh_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddhsat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.h=vadd(Vuu32.h,Vvv32.h):sat C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vadd_WhWh_sat(HVX_VectorPair Vuu, HVX_VectorPair Vvv) Instruction Type: CVI_VA_DV Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Wh_vadd_WhWh_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddhsat_dv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.w=vadd(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vadd_VhVh(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Ww_vadd_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddhw)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.h=vadd(Vu32.ub,Vv32.ub) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vadd_VubVub(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wh_vadd_VubVub(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddubh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.ub=vadd(Vu32.ub,Vv32.ub):sat C Intrinsic Prototype: HVX_Vector Q6_Vub_vadd_VubVub_sat(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vub_vadd_VubVub_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddubsat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.ub=vadd(Vuu32.ub,Vvv32.ub):sat C Intrinsic Prototype: HVX_VectorPair Q6_Wub_vadd_WubWub_sat(HVX_VectorPair Vuu, HVX_VectorPair Vvv) Instruction Type: CVI_VA_DV Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Wub_vadd_WubWub_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddubsat_dv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.uh=vadd(Vu32.uh,Vv32.uh):sat C Intrinsic Prototype: HVX_Vector Q6_Vuh_vadd_VuhVuh_sat(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vuh_vadd_VuhVuh_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadduhsat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.uh=vadd(Vuu32.uh,Vvv32.uh):sat C Intrinsic Prototype: HVX_VectorPair Q6_Wuh_vadd_WuhWuh_sat(HVX_VectorPair Vuu, HVX_VectorPair Vvv) Instruction Type: CVI_VA_DV Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Wuh_vadd_WuhWuh_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadduhsat_dv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.w=vadd(Vu32.uh,Vv32.uh) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vadd_VuhVuh(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Ww_vadd_VuhVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadduhw)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vadd(Vu32.w,Vv32.w) C Intrinsic Prototype: HVX_Vector Q6_Vw_vadd_VwVw(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vw_vadd_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddw)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.w=vadd(Vuu32.w,Vvv32.w) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vadd_WwWw(HVX_VectorPair Vuu, HVX_VectorPair Vvv) Instruction Type: CVI_VA_DV Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Ww_vadd_WwWw(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddw_dv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: if (!Qv4) Vx32.w+=Vu32.w C Intrinsic Prototype: HVX_Vector Q6_Vw_condacc_QnVwVw(HVX_VectorPred Qv, HVX_Vector Vx, HVX_Vector Vu) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vw_condacc_QnVwVw(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddwnq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: if (Qv4) Vx32.w+=Vu32.w C Intrinsic Prototype: HVX_Vector Q6_Vw_condacc_QVwVw(HVX_VectorPred Qv, HVX_Vector Vx, HVX_Vector Vu) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vw_condacc_QVwVw(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddwq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vadd(Vu32.w,Vv32.w):sat C Intrinsic Prototype: HVX_Vector Q6_Vw_vadd_VwVw_sat(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vw_vadd_VwVw_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddwsat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.w=vadd(Vuu32.w,Vvv32.w):sat C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vadd_WwWw_sat(HVX_VectorPair Vuu, HVX_VectorPair Vvv) Instruction Type: CVI_VA_DV Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Ww_vadd_WwWw_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddwsat_dv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32=valign(Vu32,Vv32,Rt8) C Intrinsic Prototype: HVX_Vector Q6_V_valign_VVR(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) Instruction Type: CVI_VP Execution Slots: SLOT0123 ========================================================================== */ #define Q6_V_valign_VVR(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_valignb)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32=valign(Vu32,Vv32,#u3) C Intrinsic Prototype: HVX_Vector Q6_V_valign_VVI(HVX_Vector Vu, HVX_Vector Vv, Word32 Iu3) Instruction Type: CVI_VP Execution Slots: SLOT0123 ========================================================================== */ #define Q6_V_valign_VVI(Vu,Vv,Iu3) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_valignbi)(Vu,Vv,Iu3) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32=vand(Vu32,Vv32) C Intrinsic Prototype: HVX_Vector Q6_V_vand_VV(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_V_vand_VV(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vand)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32=vand(Qu4,Rt32) C Intrinsic Prototype: HVX_Vector Q6_V_vand_QR(HVX_VectorPred Qu, Word32 Rt) Instruction Type: CVI_VX_LATE Execution Slots: SLOT23 ========================================================================== */ #define Q6_V_vand_QR(Qu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qu),-1),Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32|=vand(Qu4,Rt32) C Intrinsic Prototype: HVX_Vector Q6_V_vandor_VQR(HVX_Vector Vx, HVX_VectorPred Qu, Word32 Rt) Instruction Type: CVI_VX_LATE Execution Slots: SLOT23 ========================================================================== */ #define Q6_V_vandor_VQR(Vx,Qu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt_acc)(Vx,__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qu),-1),Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qd4=vand(Vu32,Rt32) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vand_VR(HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VX_LATE Execution Slots: SLOT23 ========================================================================== */ #define Q6_Q_vand_VR(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)(Vu,Rt)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4|=vand(Vu32,Rt32) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vandor_QVR(HVX_VectorPred Qx, HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VX_LATE Execution Slots: SLOT23 ========================================================================== */ #define Q6_Q_vandor_QVR(Qx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt_acc)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Rt)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vasl(Vu32.h,Rt32) C Intrinsic Prototype: HVX_Vector Q6_Vh_vasl_VhR(HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_vasl_VhR(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaslh)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vasl(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_Vector Q6_Vh_vasl_VhVh(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_vasl_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaslhv)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vasl(Vu32.w,Rt32) C Intrinsic Prototype: HVX_Vector Q6_Vw_vasl_VwR(HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vw_vasl_VwR(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaslw)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.w+=vasl(Vu32.w,Rt32) C Intrinsic Prototype: HVX_Vector Q6_Vw_vaslacc_VwVwR(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vw_vaslacc_VwVwR(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaslw_acc)(Vx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vasl(Vu32.w,Vv32.w) C Intrinsic Prototype: HVX_Vector Q6_Vw_vasl_VwVw(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vw_vasl_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaslwv)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vasr(Vu32.h,Rt32) C Intrinsic Prototype: HVX_Vector Q6_Vh_vasr_VhR(HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_vasr_VhR(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrh)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.b=vasr(Vu32.h,Vv32.h,Rt8):rnd:sat C Intrinsic Prototype: HVX_Vector Q6_Vb_vasr_VhVhR_rnd_sat(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vb_vasr_VhVhR_rnd_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrhbrndsat)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.ub=vasr(Vu32.h,Vv32.h,Rt8):rnd:sat C Intrinsic Prototype: HVX_Vector Q6_Vub_vasr_VhVhR_rnd_sat(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vub_vasr_VhVhR_rnd_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrhubrndsat)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.ub=vasr(Vu32.h,Vv32.h,Rt8):sat C Intrinsic Prototype: HVX_Vector Q6_Vub_vasr_VhVhR_sat(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vub_vasr_VhVhR_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrhubsat)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vasr(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_Vector Q6_Vh_vasr_VhVh(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_vasr_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrhv)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vasr(Vu32.w,Rt32) C Intrinsic Prototype: HVX_Vector Q6_Vw_vasr_VwR(HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vw_vasr_VwR(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrw)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.w+=vasr(Vu32.w,Rt32) C Intrinsic Prototype: HVX_Vector Q6_Vw_vasracc_VwVwR(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vw_vasracc_VwVwR(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrw_acc)(Vx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vasr(Vu32.w,Vv32.w,Rt8) C Intrinsic Prototype: HVX_Vector Q6_Vh_vasr_VwVwR(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_vasr_VwVwR(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrwh)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vasr(Vu32.w,Vv32.w,Rt8):rnd:sat C Intrinsic Prototype: HVX_Vector Q6_Vh_vasr_VwVwR_rnd_sat(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_vasr_VwVwR_rnd_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrwhrndsat)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vasr(Vu32.w,Vv32.w,Rt8):sat C Intrinsic Prototype: HVX_Vector Q6_Vh_vasr_VwVwR_sat(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_vasr_VwVwR_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrwhsat)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.uh=vasr(Vu32.w,Vv32.w,Rt8):sat C Intrinsic Prototype: HVX_Vector Q6_Vuh_vasr_VwVwR_sat(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vuh_vasr_VwVwR_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrwuhsat)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vasr(Vu32.w,Vv32.w) C Intrinsic Prototype: HVX_Vector Q6_Vw_vasr_VwVw(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vw_vasr_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrwv)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32=Vu32 C Intrinsic Prototype: HVX_Vector Q6_V_equals_V(HVX_Vector Vu) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_V_equals_V(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vassign)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32=Vuu32 C Intrinsic Prototype: HVX_VectorPair Q6_W_equals_W(HVX_VectorPair Vuu) Instruction Type: CVI_VA_DV Execution Slots: SLOT0123 ========================================================================== */ #define Q6_W_equals_W(Vuu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vassignp)(Vuu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vavg(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_Vector Q6_Vh_vavg_VhVh(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_vavg_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavgh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vavg(Vu32.h,Vv32.h):rnd C Intrinsic Prototype: HVX_Vector Q6_Vh_vavg_VhVh_rnd(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_vavg_VhVh_rnd(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavghrnd)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.ub=vavg(Vu32.ub,Vv32.ub) C Intrinsic Prototype: HVX_Vector Q6_Vub_vavg_VubVub(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vub_vavg_VubVub(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavgub)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.ub=vavg(Vu32.ub,Vv32.ub):rnd C Intrinsic Prototype: HVX_Vector Q6_Vub_vavg_VubVub_rnd(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vub_vavg_VubVub_rnd(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavgubrnd)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.uh=vavg(Vu32.uh,Vv32.uh) C Intrinsic Prototype: HVX_Vector Q6_Vuh_vavg_VuhVuh(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vuh_vavg_VuhVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavguh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.uh=vavg(Vu32.uh,Vv32.uh):rnd C Intrinsic Prototype: HVX_Vector Q6_Vuh_vavg_VuhVuh_rnd(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vuh_vavg_VuhVuh_rnd(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavguhrnd)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vavg(Vu32.w,Vv32.w) C Intrinsic Prototype: HVX_Vector Q6_Vw_vavg_VwVw(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vw_vavg_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavgw)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vavg(Vu32.w,Vv32.w):rnd C Intrinsic Prototype: HVX_Vector Q6_Vw_vavg_VwVw_rnd(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vw_vavg_VwVw_rnd(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavgwrnd)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.uh=vcl0(Vu32.uh) C Intrinsic Prototype: HVX_Vector Q6_Vuh_vcl0_Vuh(HVX_Vector Vu) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vuh_vcl0_Vuh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcl0h)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.uw=vcl0(Vu32.uw) C Intrinsic Prototype: HVX_Vector Q6_Vuw_vcl0_Vuw(HVX_Vector Vu) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vuw_vcl0_Vuw(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcl0w)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32=vcombine(Vu32,Vv32) C Intrinsic Prototype: HVX_VectorPair Q6_W_vcombine_VV(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA_DV Execution Slots: SLOT0123 ========================================================================== */ #define Q6_W_vcombine_VV(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcombine)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32=#0 C Intrinsic Prototype: HVX_Vector Q6_V_vzero() Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_V_vzero() __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vd0)() #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.b=vdeal(Vu32.b) C Intrinsic Prototype: HVX_Vector Q6_Vb_vdeal_Vb(HVX_Vector Vu) Instruction Type: CVI_VP Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vb_vdeal_Vb(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdealb)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.b=vdeale(Vu32.b,Vv32.b) C Intrinsic Prototype: HVX_Vector Q6_Vb_vdeale_VbVb(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VP Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vb_vdeale_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdealb4w)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vdeal(Vu32.h) C Intrinsic Prototype: HVX_Vector Q6_Vh_vdeal_Vh(HVX_Vector Vu) Instruction Type: CVI_VP Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_vdeal_Vh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdealh)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32=vdeal(Vu32,Vv32,Rt8) C Intrinsic Prototype: HVX_VectorPair Q6_W_vdeal_VVR(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) Instruction Type: CVI_VP_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_W_vdeal_VVR(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdealvdd)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32=vdelta(Vu32,Vv32) C Intrinsic Prototype: HVX_Vector Q6_V_vdelta_VV(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VP Execution Slots: SLOT0123 ========================================================================== */ #define Q6_V_vdelta_VV(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdelta)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vdmpy(Vu32.ub,Rt32.b) C Intrinsic Prototype: HVX_Vector Q6_Vh_vdmpy_VubRb(HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vh_vdmpy_VubRb(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpybus)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.h+=vdmpy(Vu32.ub,Rt32.b) C Intrinsic Prototype: HVX_Vector Q6_Vh_vdmpyacc_VhVubRb(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vh_vdmpyacc_VhVubRb(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpybus_acc)(Vx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.h=vdmpy(Vuu32.ub,Rt32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vdmpy_WubRb(HVX_VectorPair Vuu, Word32 Rt) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wh_vdmpy_WubRb(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpybus_dv)(Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vxx32.h+=vdmpy(Vuu32.ub,Rt32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vdmpyacc_WhWubRb(HVX_VectorPair Vxx, HVX_VectorPair Vuu, Word32 Rt) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wh_vdmpyacc_WhWubRb(Vxx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpybus_dv_acc)(Vxx,Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vdmpy(Vu32.h,Rt32.b) C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpy_VhRb(HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vw_vdmpy_VhRb(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhb)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.w+=vdmpy(Vu32.h,Rt32.b) C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpyacc_VwVhRb(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vw_vdmpyacc_VwVhRb(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhb_acc)(Vx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.w=vdmpy(Vuu32.h,Rt32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vdmpy_WhRb(HVX_VectorPair Vuu, Word32 Rt) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Ww_vdmpy_WhRb(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhb_dv)(Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vxx32.w+=vdmpy(Vuu32.h,Rt32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vdmpyacc_WwWhRb(HVX_VectorPair Vxx, HVX_VectorPair Vuu, Word32 Rt) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Ww_vdmpyacc_WwWhRb(Vxx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhb_dv_acc)(Vxx,Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vdmpy(Vuu32.h,Rt32.h):sat C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpy_WhRh_sat(HVX_VectorPair Vuu, Word32 Rt) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vw_vdmpy_WhRh_sat(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhisat)(Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.w+=vdmpy(Vuu32.h,Rt32.h):sat C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpyacc_VwWhRh_sat(HVX_Vector Vx, HVX_VectorPair Vuu, Word32 Rt) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vw_vdmpyacc_VwWhRh_sat(Vx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhisat_acc)(Vx,Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vdmpy(Vu32.h,Rt32.h):sat C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpy_VhRh_sat(HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vw_vdmpy_VhRh_sat(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhsat)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.w+=vdmpy(Vu32.h,Rt32.h):sat C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpyacc_VwVhRh_sat(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vw_vdmpyacc_VwVhRh_sat(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhsat_acc)(Vx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vdmpy(Vuu32.h,Rt32.uh,#1):sat C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpy_WhRuh_sat(HVX_VectorPair Vuu, Word32 Rt) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vw_vdmpy_WhRuh_sat(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhsuisat)(Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.w+=vdmpy(Vuu32.h,Rt32.uh,#1):sat C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpyacc_VwWhRuh_sat(HVX_Vector Vx, HVX_VectorPair Vuu, Word32 Rt) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vw_vdmpyacc_VwWhRuh_sat(Vx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhsuisat_acc)(Vx,Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vdmpy(Vu32.h,Rt32.uh):sat C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpy_VhRuh_sat(HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vw_vdmpy_VhRuh_sat(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhsusat)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.w+=vdmpy(Vu32.h,Rt32.uh):sat C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpyacc_VwVhRuh_sat(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vw_vdmpyacc_VwVhRuh_sat(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhsusat_acc)(Vx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vdmpy(Vu32.h,Vv32.h):sat C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpy_VhVh_sat(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vw_vdmpy_VhVh_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhvsat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.w+=vdmpy(Vu32.h,Vv32.h):sat C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpyacc_VwVhVh_sat(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vw_vdmpyacc_VwVhVh_sat(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhvsat_acc)(Vx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.uw=vdsad(Vuu32.uh,Rt32.uh) C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vdsad_WuhRuh(HVX_VectorPair Vuu, Word32 Rt) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wuw_vdsad_WuhRuh(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdsaduh)(Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vxx32.uw+=vdsad(Vuu32.uh,Rt32.uh) C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vdsadacc_WuwWuhRuh(HVX_VectorPair Vxx, HVX_VectorPair Vuu, Word32 Rt) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wuw_vdsadacc_WuwWuhRuh(Vxx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdsaduh_acc)(Vxx,Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qd4=vcmp.eq(Vu32.b,Vv32.b) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eq_VbVb(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_eq_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqb)(Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4&=vcmp.eq(Vu32.b,Vv32.b) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqand_QVbVb(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_eqand_QVbVb(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqb_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4|=vcmp.eq(Vu32.b,Vv32.b) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqor_QVbVb(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_eqor_QVbVb(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqb_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4^=vcmp.eq(Vu32.b,Vv32.b) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqxacc_QVbVb(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_eqxacc_QVbVb(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqb_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qd4=vcmp.eq(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eq_VhVh(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_eq_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqh)(Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4&=vcmp.eq(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqand_QVhVh(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_eqand_QVhVh(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqh_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4|=vcmp.eq(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqor_QVhVh(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_eqor_QVhVh(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqh_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4^=vcmp.eq(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqxacc_QVhVh(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_eqxacc_QVhVh(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqh_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qd4=vcmp.eq(Vu32.w,Vv32.w) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eq_VwVw(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_eq_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqw)(Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4&=vcmp.eq(Vu32.w,Vv32.w) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqand_QVwVw(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_eqand_QVwVw(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqw_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4|=vcmp.eq(Vu32.w,Vv32.w) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqor_QVwVw(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_eqor_QVwVw(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqw_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4^=vcmp.eq(Vu32.w,Vv32.w) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqxacc_QVwVw(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_eqxacc_QVwVw(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqw_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qd4=vcmp.gt(Vu32.b,Vv32.b) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gt_VbVb(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_gt_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtb)(Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4&=vcmp.gt(Vu32.b,Vv32.b) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtand_QVbVb(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_gtand_QVbVb(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtb_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4|=vcmp.gt(Vu32.b,Vv32.b) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtor_QVbVb(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_gtor_QVbVb(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtb_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4^=vcmp.gt(Vu32.b,Vv32.b) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtxacc_QVbVb(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_gtxacc_QVbVb(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtb_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qd4=vcmp.gt(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gt_VhVh(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_gt_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgth)(Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4&=vcmp.gt(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtand_QVhVh(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_gtand_QVhVh(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgth_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4|=vcmp.gt(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtor_QVhVh(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_gtor_QVhVh(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgth_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4^=vcmp.gt(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtxacc_QVhVh(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_gtxacc_QVhVh(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgth_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qd4=vcmp.gt(Vu32.ub,Vv32.ub) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gt_VubVub(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_gt_VubVub(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtub)(Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4&=vcmp.gt(Vu32.ub,Vv32.ub) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtand_QVubVub(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_gtand_QVubVub(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtub_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4|=vcmp.gt(Vu32.ub,Vv32.ub) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtor_QVubVub(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_gtor_QVubVub(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtub_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4^=vcmp.gt(Vu32.ub,Vv32.ub) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtxacc_QVubVub(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_gtxacc_QVubVub(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtub_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qd4=vcmp.gt(Vu32.uh,Vv32.uh) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gt_VuhVuh(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_gt_VuhVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuh)(Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4&=vcmp.gt(Vu32.uh,Vv32.uh) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtand_QVuhVuh(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_gtand_QVuhVuh(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuh_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4|=vcmp.gt(Vu32.uh,Vv32.uh) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtor_QVuhVuh(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_gtor_QVuhVuh(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuh_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4^=vcmp.gt(Vu32.uh,Vv32.uh) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtxacc_QVuhVuh(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_gtxacc_QVuhVuh(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuh_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qd4=vcmp.gt(Vu32.uw,Vv32.uw) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gt_VuwVuw(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_gt_VuwVuw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuw)(Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4&=vcmp.gt(Vu32.uw,Vv32.uw) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtand_QVuwVuw(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_gtand_QVuwVuw(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuw_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4|=vcmp.gt(Vu32.uw,Vv32.uw) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtor_QVuwVuw(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_gtor_QVuwVuw(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuw_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4^=vcmp.gt(Vu32.uw,Vv32.uw) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtxacc_QVuwVuw(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_gtxacc_QVuwVuw(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuw_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qd4=vcmp.gt(Vu32.w,Vv32.w) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gt_VwVw(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_gt_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtw)(Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4&=vcmp.gt(Vu32.w,Vv32.w) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtand_QVwVw(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_gtand_QVwVw(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtw_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4|=vcmp.gt(Vu32.w,Vv32.w) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtor_QVwVw(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_gtor_QVwVw(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtw_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4^=vcmp.gt(Vu32.w,Vv32.w) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtxacc_QVwVw(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_gtxacc_QVwVw(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtw_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.w=vinsert(Rt32) C Intrinsic Prototype: HVX_Vector Q6_Vw_vinsert_VwR(HVX_Vector Vx, Word32 Rt) Instruction Type: CVI_VX_LATE Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vw_vinsert_VwR(Vx,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vinsertwr)(Vx,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32=vlalign(Vu32,Vv32,Rt8) C Intrinsic Prototype: HVX_Vector Q6_V_vlalign_VVR(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) Instruction Type: CVI_VP Execution Slots: SLOT0123 ========================================================================== */ #define Q6_V_vlalign_VVR(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlalignb)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32=vlalign(Vu32,Vv32,#u3) C Intrinsic Prototype: HVX_Vector Q6_V_vlalign_VVI(HVX_Vector Vu, HVX_Vector Vv, Word32 Iu3) Instruction Type: CVI_VP Execution Slots: SLOT0123 ========================================================================== */ #define Q6_V_vlalign_VVI(Vu,Vv,Iu3) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlalignbi)(Vu,Vv,Iu3) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.uh=vlsr(Vu32.uh,Rt32) C Intrinsic Prototype: HVX_Vector Q6_Vuh_vlsr_VuhR(HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vuh_vlsr_VuhR(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlsrh)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vlsr(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_Vector Q6_Vh_vlsr_VhVh(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_vlsr_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlsrhv)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.uw=vlsr(Vu32.uw,Rt32) C Intrinsic Prototype: HVX_Vector Q6_Vuw_vlsr_VuwR(HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vuw_vlsr_VuwR(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlsrw)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vlsr(Vu32.w,Vv32.w) C Intrinsic Prototype: HVX_Vector Q6_Vw_vlsr_VwVw(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vw_vlsr_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlsrwv)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.b=vlut32(Vu32.b,Vv32.b,Rt8) C Intrinsic Prototype: HVX_Vector Q6_Vb_vlut32_VbVbR(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) Instruction Type: CVI_VP Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vb_vlut32_VbVbR(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvvb)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.b|=vlut32(Vu32.b,Vv32.b,Rt8) C Intrinsic Prototype: HVX_Vector Q6_Vb_vlut32or_VbVbVbR(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) Instruction Type: CVI_VP_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vb_vlut32or_VbVbVbR(Vx,Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvvb_oracc)(Vx,Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.h=vlut16(Vu32.b,Vv32.h,Rt8) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vlut16_VbVhR(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) Instruction Type: CVI_VP_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Wh_vlut16_VbVhR(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvwh)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vxx32.h|=vlut16(Vu32.b,Vv32.h,Rt8) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vlut16or_WhVbVhR(HVX_VectorPair Vxx, HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) Instruction Type: CVI_VP_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Wh_vlut16or_WhVbVhR(Vxx,Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvwh_oracc)(Vxx,Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vmax(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_Vector Q6_Vh_vmax_VhVh(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_vmax_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmaxh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.ub=vmax(Vu32.ub,Vv32.ub) C Intrinsic Prototype: HVX_Vector Q6_Vub_vmax_VubVub(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vub_vmax_VubVub(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmaxub)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.uh=vmax(Vu32.uh,Vv32.uh) C Intrinsic Prototype: HVX_Vector Q6_Vuh_vmax_VuhVuh(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vuh_vmax_VuhVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmaxuh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vmax(Vu32.w,Vv32.w) C Intrinsic Prototype: HVX_Vector Q6_Vw_vmax_VwVw(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vw_vmax_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmaxw)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vmin(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_Vector Q6_Vh_vmin_VhVh(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_vmin_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vminh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.ub=vmin(Vu32.ub,Vv32.ub) C Intrinsic Prototype: HVX_Vector Q6_Vub_vmin_VubVub(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vub_vmin_VubVub(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vminub)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.uh=vmin(Vu32.uh,Vv32.uh) C Intrinsic Prototype: HVX_Vector Q6_Vuh_vmin_VuhVuh(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vuh_vmin_VuhVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vminuh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vmin(Vu32.w,Vv32.w) C Intrinsic Prototype: HVX_Vector Q6_Vw_vmin_VwVw(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vw_vmin_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vminw)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.h=vmpa(Vuu32.ub,Rt32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vmpa_WubRb(HVX_VectorPair Vuu, Word32 Rt) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wh_vmpa_WubRb(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpabus)(Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vxx32.h+=vmpa(Vuu32.ub,Rt32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vmpaacc_WhWubRb(HVX_VectorPair Vxx, HVX_VectorPair Vuu, Word32 Rt) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wh_vmpaacc_WhWubRb(Vxx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpabus_acc)(Vxx,Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.h=vmpa(Vuu32.ub,Vvv32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vmpa_WubWb(HVX_VectorPair Vuu, HVX_VectorPair Vvv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wh_vmpa_WubWb(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpabusv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.h=vmpa(Vuu32.ub,Vvv32.ub) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vmpa_WubWub(HVX_VectorPair Vuu, HVX_VectorPair Vvv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wh_vmpa_WubWub(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpabuuv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.w=vmpa(Vuu32.h,Rt32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vmpa_WhRb(HVX_VectorPair Vuu, Word32 Rt) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Ww_vmpa_WhRb(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpahb)(Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vxx32.w+=vmpa(Vuu32.h,Rt32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vmpaacc_WwWhRb(HVX_VectorPair Vxx, HVX_VectorPair Vuu, Word32 Rt) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Ww_vmpaacc_WwWhRb(Vxx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpahb_acc)(Vxx,Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.h=vmpy(Vu32.ub,Rt32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vmpy_VubRb(HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wh_vmpy_VubRb(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpybus)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vxx32.h+=vmpy(Vu32.ub,Rt32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vmpyacc_WhVubRb(HVX_VectorPair Vxx, HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wh_vmpyacc_WhVubRb(Vxx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpybus_acc)(Vxx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.h=vmpy(Vu32.ub,Vv32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vmpy_VubVb(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wh_vmpy_VubVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpybusv)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vxx32.h+=vmpy(Vu32.ub,Vv32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vmpyacc_WhVubVb(HVX_VectorPair Vxx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wh_vmpyacc_WhVubVb(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpybusv_acc)(Vxx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.h=vmpy(Vu32.b,Vv32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vmpy_VbVb(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wh_vmpy_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpybv)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vxx32.h+=vmpy(Vu32.b,Vv32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vmpyacc_WhVbVb(HVX_VectorPair Vxx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wh_vmpyacc_WhVbVb(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpybv_acc)(Vxx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vmpye(Vu32.w,Vv32.uh) C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpye_VwVuh(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vw_vmpye_VwVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyewuh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.w=vmpy(Vu32.h,Rt32.h) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vmpy_VhRh(HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Ww_vmpy_VhRh(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyh)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vxx32.w+=vmpy(Vu32.h,Rt32.h):sat C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vmpyacc_WwVhRh_sat(HVX_VectorPair Vxx, HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Ww_vmpyacc_WwVhRh_sat(Vxx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhsat_acc)(Vxx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vmpy(Vu32.h,Rt32.h):<<1:rnd:sat C Intrinsic Prototype: HVX_Vector Q6_Vh_vmpy_VhRh_s1_rnd_sat(HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vh_vmpy_VhRh_s1_rnd_sat(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhsrs)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vmpy(Vu32.h,Rt32.h):<<1:sat C Intrinsic Prototype: HVX_Vector Q6_Vh_vmpy_VhRh_s1_sat(HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vh_vmpy_VhRh_s1_sat(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhss)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.w=vmpy(Vu32.h,Vv32.uh) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vmpy_VhVuh(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Ww_vmpy_VhVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhus)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vxx32.w+=vmpy(Vu32.h,Vv32.uh) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vmpyacc_WwVhVuh(HVX_VectorPair Vxx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Ww_vmpyacc_WwVhVuh(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhus_acc)(Vxx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.w=vmpy(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vmpy_VhVh(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Ww_vmpy_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhv)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vxx32.w+=vmpy(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vmpyacc_WwVhVh(HVX_VectorPair Vxx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Ww_vmpyacc_WwVhVh(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhv_acc)(Vxx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vmpy(Vu32.h,Vv32.h):<<1:rnd:sat C Intrinsic Prototype: HVX_Vector Q6_Vh_vmpy_VhVh_s1_rnd_sat(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vh_vmpy_VhVh_s1_rnd_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhvsrs)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vmpyieo(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyieo_VhVh(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vw_vmpyieo_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyieoh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.w+=vmpyie(Vu32.w,Vv32.h) C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyieacc_VwVwVh(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vw_vmpyieacc_VwVwVh(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiewh_acc)(Vx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vmpyie(Vu32.w,Vv32.uh) C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyie_VwVuh(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vw_vmpyie_VwVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiewuh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.w+=vmpyie(Vu32.w,Vv32.uh) C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyieacc_VwVwVuh(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vw_vmpyieacc_VwVwVuh(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiewuh_acc)(Vx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vmpyi(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_Vector Q6_Vh_vmpyi_VhVh(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vh_vmpyi_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyih)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.h+=vmpyi(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_Vector Q6_Vh_vmpyiacc_VhVhVh(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vh_vmpyiacc_VhVhVh(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyih_acc)(Vx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vmpyi(Vu32.h,Rt32.b) C Intrinsic Prototype: HVX_Vector Q6_Vh_vmpyi_VhRb(HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vh_vmpyi_VhRb(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyihb)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.h+=vmpyi(Vu32.h,Rt32.b) C Intrinsic Prototype: HVX_Vector Q6_Vh_vmpyiacc_VhVhRb(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vh_vmpyiacc_VhVhRb(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyihb_acc)(Vx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vmpyio(Vu32.w,Vv32.h) C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyio_VwVh(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vw_vmpyio_VwVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiowh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vmpyi(Vu32.w,Rt32.b) C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyi_VwRb(HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vw_vmpyi_VwRb(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiwb)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.w+=vmpyi(Vu32.w,Rt32.b) C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyiacc_VwVwRb(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vw_vmpyiacc_VwVwRb(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiwb_acc)(Vx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vmpyi(Vu32.w,Rt32.h) C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyi_VwRh(HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vw_vmpyi_VwRh(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiwh)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.w+=vmpyi(Vu32.w,Rt32.h) C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyiacc_VwVwRh(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vw_vmpyiacc_VwVwRh(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiwh_acc)(Vx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vmpyo(Vu32.w,Vv32.h):<<1:sat C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyo_VwVh_s1_sat(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vw_vmpyo_VwVh_s1_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyowh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vmpyo(Vu32.w,Vv32.h):<<1:rnd:sat C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyo_VwVh_s1_rnd_sat(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vw_vmpyo_VwVh_s1_rnd_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyowh_rnd)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.w+=vmpyo(Vu32.w,Vv32.h):<<1:rnd:sat:shift C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyoacc_VwVwVh_s1_rnd_sat_shift(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vw_vmpyoacc_VwVwVh_s1_rnd_sat_shift(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyowh_rnd_sacc)(Vx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.w+=vmpyo(Vu32.w,Vv32.h):<<1:sat:shift C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyoacc_VwVwVh_s1_sat_shift(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vw_vmpyoacc_VwVwVh_s1_sat_shift(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyowh_sacc)(Vx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.uh=vmpy(Vu32.ub,Rt32.ub) C Intrinsic Prototype: HVX_VectorPair Q6_Wuh_vmpy_VubRub(HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wuh_vmpy_VubRub(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyub)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vxx32.uh+=vmpy(Vu32.ub,Rt32.ub) C Intrinsic Prototype: HVX_VectorPair Q6_Wuh_vmpyacc_WuhVubRub(HVX_VectorPair Vxx, HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wuh_vmpyacc_WuhVubRub(Vxx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyub_acc)(Vxx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.uh=vmpy(Vu32.ub,Vv32.ub) C Intrinsic Prototype: HVX_VectorPair Q6_Wuh_vmpy_VubVub(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wuh_vmpy_VubVub(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyubv)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vxx32.uh+=vmpy(Vu32.ub,Vv32.ub) C Intrinsic Prototype: HVX_VectorPair Q6_Wuh_vmpyacc_WuhVubVub(HVX_VectorPair Vxx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wuh_vmpyacc_WuhVubVub(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyubv_acc)(Vxx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.uw=vmpy(Vu32.uh,Rt32.uh) C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vmpy_VuhRuh(HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wuw_vmpy_VuhRuh(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyuh)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vxx32.uw+=vmpy(Vu32.uh,Rt32.uh) C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vmpyacc_WuwVuhRuh(HVX_VectorPair Vxx, HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wuw_vmpyacc_WuwVuhRuh(Vxx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyuh_acc)(Vxx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.uw=vmpy(Vu32.uh,Vv32.uh) C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vmpy_VuhVuh(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wuw_vmpy_VuhVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyuhv)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vxx32.uw+=vmpy(Vu32.uh,Vv32.uh) C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vmpyacc_WuwVuhVuh(HVX_VectorPair Vxx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wuw_vmpyacc_WuwVuhVuh(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyuhv_acc)(Vxx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32=vmux(Qt4,Vu32,Vv32) C Intrinsic Prototype: HVX_Vector Q6_V_vmux_QVV(HVX_VectorPred Qt, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_V_vmux_QVV(Qt,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmux)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qt),-1),Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vnavg(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_Vector Q6_Vh_vnavg_VhVh(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_vnavg_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vnavgh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.b=vnavg(Vu32.ub,Vv32.ub) C Intrinsic Prototype: HVX_Vector Q6_Vb_vnavg_VubVub(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vb_vnavg_VubVub(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vnavgub)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vnavg(Vu32.w,Vv32.w) C Intrinsic Prototype: HVX_Vector Q6_Vw_vnavg_VwVw(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vw_vnavg_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vnavgw)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vnormamt(Vu32.h) C Intrinsic Prototype: HVX_Vector Q6_Vh_vnormamt_Vh(HVX_Vector Vu) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_vnormamt_Vh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vnormamth)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vnormamt(Vu32.w) C Intrinsic Prototype: HVX_Vector Q6_Vw_vnormamt_Vw(HVX_Vector Vu) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vw_vnormamt_Vw(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vnormamtw)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32=vnot(Vu32) C Intrinsic Prototype: HVX_Vector Q6_V_vnot_V(HVX_Vector Vu) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_V_vnot_V(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vnot)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32=vor(Vu32,Vv32) C Intrinsic Prototype: HVX_Vector Q6_V_vor_VV(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_V_vor_VV(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vor)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.b=vpacke(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_Vector Q6_Vb_vpacke_VhVh(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VP Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vb_vpacke_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackeb)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vpacke(Vu32.w,Vv32.w) C Intrinsic Prototype: HVX_Vector Q6_Vh_vpacke_VwVw(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VP Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_vpacke_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackeh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.b=vpack(Vu32.h,Vv32.h):sat C Intrinsic Prototype: HVX_Vector Q6_Vb_vpack_VhVh_sat(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VP Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vb_vpack_VhVh_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackhb_sat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.ub=vpack(Vu32.h,Vv32.h):sat C Intrinsic Prototype: HVX_Vector Q6_Vub_vpack_VhVh_sat(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VP Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vub_vpack_VhVh_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackhub_sat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.b=vpacko(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_Vector Q6_Vb_vpacko_VhVh(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VP Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vb_vpacko_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackob)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vpacko(Vu32.w,Vv32.w) C Intrinsic Prototype: HVX_Vector Q6_Vh_vpacko_VwVw(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VP Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_vpacko_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackoh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vpack(Vu32.w,Vv32.w):sat C Intrinsic Prototype: HVX_Vector Q6_Vh_vpack_VwVw_sat(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VP Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_vpack_VwVw_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackwh_sat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.uh=vpack(Vu32.w,Vv32.w):sat C Intrinsic Prototype: HVX_Vector Q6_Vuh_vpack_VwVw_sat(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VP Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vuh_vpack_VwVw_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackwuh_sat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vpopcount(Vu32.h) C Intrinsic Prototype: HVX_Vector Q6_Vh_vpopcount_Vh(HVX_Vector Vu) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_vpopcount_Vh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpopcounth)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32=vrdelta(Vu32,Vv32) C Intrinsic Prototype: HVX_Vector Q6_V_vrdelta_VV(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VP Execution Slots: SLOT0123 ========================================================================== */ #define Q6_V_vrdelta_VV(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrdelta)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vrmpy(Vu32.ub,Rt32.b) C Intrinsic Prototype: HVX_Vector Q6_Vw_vrmpy_VubRb(HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vw_vrmpy_VubRb(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybus)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.w+=vrmpy(Vu32.ub,Rt32.b) C Intrinsic Prototype: HVX_Vector Q6_Vw_vrmpyacc_VwVubRb(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vw_vrmpyacc_VwVubRb(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybus_acc)(Vx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.w=vrmpy(Vuu32.ub,Rt32.b,#u1) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vrmpy_WubRbI(HVX_VectorPair Vuu, Word32 Rt, Word32 Iu1) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Ww_vrmpy_WubRbI(Vuu,Rt,Iu1) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybusi)(Vuu,Rt,Iu1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vxx32.w+=vrmpy(Vuu32.ub,Rt32.b,#u1) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vrmpyacc_WwWubRbI(HVX_VectorPair Vxx, HVX_VectorPair Vuu, Word32 Rt, Word32 Iu1) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Ww_vrmpyacc_WwWubRbI(Vxx,Vuu,Rt,Iu1) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybusi_acc)(Vxx,Vuu,Rt,Iu1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vrmpy(Vu32.ub,Vv32.b) C Intrinsic Prototype: HVX_Vector Q6_Vw_vrmpy_VubVb(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vw_vrmpy_VubVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybusv)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.w+=vrmpy(Vu32.ub,Vv32.b) C Intrinsic Prototype: HVX_Vector Q6_Vw_vrmpyacc_VwVubVb(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vw_vrmpyacc_VwVubVb(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybusv_acc)(Vx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vrmpy(Vu32.b,Vv32.b) C Intrinsic Prototype: HVX_Vector Q6_Vw_vrmpy_VbVb(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vw_vrmpy_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybv)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.w+=vrmpy(Vu32.b,Vv32.b) C Intrinsic Prototype: HVX_Vector Q6_Vw_vrmpyacc_VwVbVb(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vw_vrmpyacc_VwVbVb(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybv_acc)(Vx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.uw=vrmpy(Vu32.ub,Rt32.ub) C Intrinsic Prototype: HVX_Vector Q6_Vuw_vrmpy_VubRub(HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vuw_vrmpy_VubRub(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpyub)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.uw+=vrmpy(Vu32.ub,Rt32.ub) C Intrinsic Prototype: HVX_Vector Q6_Vuw_vrmpyacc_VuwVubRub(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vuw_vrmpyacc_VuwVubRub(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpyub_acc)(Vx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.uw=vrmpy(Vuu32.ub,Rt32.ub,#u1) C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vrmpy_WubRubI(HVX_VectorPair Vuu, Word32 Rt, Word32 Iu1) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wuw_vrmpy_WubRubI(Vuu,Rt,Iu1) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpyubi)(Vuu,Rt,Iu1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vxx32.uw+=vrmpy(Vuu32.ub,Rt32.ub,#u1) C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vrmpyacc_WuwWubRubI(HVX_VectorPair Vxx, HVX_VectorPair Vuu, Word32 Rt, Word32 Iu1) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wuw_vrmpyacc_WuwWubRubI(Vxx,Vuu,Rt,Iu1) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpyubi_acc)(Vxx,Vuu,Rt,Iu1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.uw=vrmpy(Vu32.ub,Vv32.ub) C Intrinsic Prototype: HVX_Vector Q6_Vuw_vrmpy_VubVub(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vuw_vrmpy_VubVub(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpyubv)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.uw+=vrmpy(Vu32.ub,Vv32.ub) C Intrinsic Prototype: HVX_Vector Q6_Vuw_vrmpyacc_VuwVubVub(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vuw_vrmpyacc_VuwVubVub(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpyubv_acc)(Vx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32=vror(Vu32,Rt32) C Intrinsic Prototype: HVX_Vector Q6_V_vror_VR(HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VP Execution Slots: SLOT0123 ========================================================================== */ #define Q6_V_vror_VR(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vror)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.b=vround(Vu32.h,Vv32.h):sat C Intrinsic Prototype: HVX_Vector Q6_Vb_vround_VhVh_sat(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vb_vround_VhVh_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vroundhb)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.ub=vround(Vu32.h,Vv32.h):sat C Intrinsic Prototype: HVX_Vector Q6_Vub_vround_VhVh_sat(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vub_vround_VhVh_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vroundhub)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vround(Vu32.w,Vv32.w):sat C Intrinsic Prototype: HVX_Vector Q6_Vh_vround_VwVw_sat(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_vround_VwVw_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vroundwh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.uh=vround(Vu32.w,Vv32.w):sat C Intrinsic Prototype: HVX_Vector Q6_Vuh_vround_VwVw_sat(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vuh_vround_VwVw_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vroundwuh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.uw=vrsad(Vuu32.ub,Rt32.ub,#u1) C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vrsad_WubRubI(HVX_VectorPair Vuu, Word32 Rt, Word32 Iu1) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wuw_vrsad_WubRubI(Vuu,Rt,Iu1) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrsadubi)(Vuu,Rt,Iu1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vxx32.uw+=vrsad(Vuu32.ub,Rt32.ub,#u1) C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vrsadacc_WuwWubRubI(HVX_VectorPair Vxx, HVX_VectorPair Vuu, Word32 Rt, Word32 Iu1) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wuw_vrsadacc_WuwWubRubI(Vxx,Vuu,Rt,Iu1) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrsadubi_acc)(Vxx,Vuu,Rt,Iu1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.ub=vsat(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_Vector Q6_Vub_vsat_VhVh(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vub_vsat_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsathub)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vsat(Vu32.w,Vv32.w) C Intrinsic Prototype: HVX_Vector Q6_Vh_vsat_VwVw(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_vsat_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsatwh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.h=vsxt(Vu32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vsxt_Vb(HVX_Vector Vu) Instruction Type: CVI_VA_DV Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Wh_vsxt_Vb(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsb)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.w=vsxt(Vu32.h) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vsxt_Vh(HVX_Vector Vu) Instruction Type: CVI_VA_DV Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Ww_vsxt_Vh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsh)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vshuffe(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_Vector Q6_Vh_vshuffe_VhVh(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_vshuffe_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshufeh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.b=vshuff(Vu32.b) C Intrinsic Prototype: HVX_Vector Q6_Vb_vshuff_Vb(HVX_Vector Vu) Instruction Type: CVI_VP Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vb_vshuff_Vb(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshuffb)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.b=vshuffe(Vu32.b,Vv32.b) C Intrinsic Prototype: HVX_Vector Q6_Vb_vshuffe_VbVb(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vb_vshuffe_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshuffeb)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vshuff(Vu32.h) C Intrinsic Prototype: HVX_Vector Q6_Vh_vshuff_Vh(HVX_Vector Vu) Instruction Type: CVI_VP Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_vshuff_Vh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshuffh)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.b=vshuffo(Vu32.b,Vv32.b) C Intrinsic Prototype: HVX_Vector Q6_Vb_vshuffo_VbVb(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vb_vshuffo_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshuffob)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32=vshuff(Vu32,Vv32,Rt8) C Intrinsic Prototype: HVX_VectorPair Q6_W_vshuff_VVR(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) Instruction Type: CVI_VP_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_W_vshuff_VVR(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshuffvdd)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.b=vshuffoe(Vu32.b,Vv32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Wb_vshuffoe_VbVb(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA_DV Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Wb_vshuffoe_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshufoeb)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.h=vshuffoe(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vshuffoe_VhVh(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA_DV Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Wh_vshuffoe_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshufoeh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vshuffo(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_Vector Q6_Vh_vshuffo_VhVh(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_vshuffo_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshufoh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.b=vsub(Vu32.b,Vv32.b) C Intrinsic Prototype: HVX_Vector Q6_Vb_vsub_VbVb(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vb_vsub_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubb)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.b=vsub(Vuu32.b,Vvv32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Wb_vsub_WbWb(HVX_VectorPair Vuu, HVX_VectorPair Vvv) Instruction Type: CVI_VA_DV Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Wb_vsub_WbWb(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubb_dv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: if (!Qv4) Vx32.b-=Vu32.b C Intrinsic Prototype: HVX_Vector Q6_Vb_condnac_QnVbVb(HVX_VectorPred Qv, HVX_Vector Vx, HVX_Vector Vu) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vb_condnac_QnVbVb(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubbnq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: if (Qv4) Vx32.b-=Vu32.b C Intrinsic Prototype: HVX_Vector Q6_Vb_condnac_QVbVb(HVX_VectorPred Qv, HVX_Vector Vx, HVX_Vector Vu) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vb_condnac_QVbVb(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubbq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vsub(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_Vector Q6_Vh_vsub_VhVh(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_vsub_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.h=vsub(Vuu32.h,Vvv32.h) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vsub_WhWh(HVX_VectorPair Vuu, HVX_VectorPair Vvv) Instruction Type: CVI_VA_DV Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Wh_vsub_WhWh(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubh_dv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: if (!Qv4) Vx32.h-=Vu32.h C Intrinsic Prototype: HVX_Vector Q6_Vh_condnac_QnVhVh(HVX_VectorPred Qv, HVX_Vector Vx, HVX_Vector Vu) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_condnac_QnVhVh(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubhnq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: if (Qv4) Vx32.h-=Vu32.h C Intrinsic Prototype: HVX_Vector Q6_Vh_condnac_QVhVh(HVX_VectorPred Qv, HVX_Vector Vx, HVX_Vector Vu) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_condnac_QVhVh(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubhq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vsub(Vu32.h,Vv32.h):sat C Intrinsic Prototype: HVX_Vector Q6_Vh_vsub_VhVh_sat(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_vsub_VhVh_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubhsat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.h=vsub(Vuu32.h,Vvv32.h):sat C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vsub_WhWh_sat(HVX_VectorPair Vuu, HVX_VectorPair Vvv) Instruction Type: CVI_VA_DV Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Wh_vsub_WhWh_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubhsat_dv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.w=vsub(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vsub_VhVh(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Ww_vsub_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubhw)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.h=vsub(Vu32.ub,Vv32.ub) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vsub_VubVub(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wh_vsub_VubVub(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsububh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.ub=vsub(Vu32.ub,Vv32.ub):sat C Intrinsic Prototype: HVX_Vector Q6_Vub_vsub_VubVub_sat(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vub_vsub_VubVub_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsububsat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.ub=vsub(Vuu32.ub,Vvv32.ub):sat C Intrinsic Prototype: HVX_VectorPair Q6_Wub_vsub_WubWub_sat(HVX_VectorPair Vuu, HVX_VectorPair Vvv) Instruction Type: CVI_VA_DV Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Wub_vsub_WubWub_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsububsat_dv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.uh=vsub(Vu32.uh,Vv32.uh):sat C Intrinsic Prototype: HVX_Vector Q6_Vuh_vsub_VuhVuh_sat(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vuh_vsub_VuhVuh_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubuhsat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.uh=vsub(Vuu32.uh,Vvv32.uh):sat C Intrinsic Prototype: HVX_VectorPair Q6_Wuh_vsub_WuhWuh_sat(HVX_VectorPair Vuu, HVX_VectorPair Vvv) Instruction Type: CVI_VA_DV Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Wuh_vsub_WuhWuh_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubuhsat_dv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.w=vsub(Vu32.uh,Vv32.uh) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vsub_VuhVuh(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Ww_vsub_VuhVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubuhw)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vsub(Vu32.w,Vv32.w) C Intrinsic Prototype: HVX_Vector Q6_Vw_vsub_VwVw(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vw_vsub_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubw)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.w=vsub(Vuu32.w,Vvv32.w) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vsub_WwWw(HVX_VectorPair Vuu, HVX_VectorPair Vvv) Instruction Type: CVI_VA_DV Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Ww_vsub_WwWw(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubw_dv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: if (!Qv4) Vx32.w-=Vu32.w C Intrinsic Prototype: HVX_Vector Q6_Vw_condnac_QnVwVw(HVX_VectorPred Qv, HVX_Vector Vx, HVX_Vector Vu) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vw_condnac_QnVwVw(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubwnq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: if (Qv4) Vx32.w-=Vu32.w C Intrinsic Prototype: HVX_Vector Q6_Vw_condnac_QVwVw(HVX_VectorPred Qv, HVX_Vector Vx, HVX_Vector Vu) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vw_condnac_QVwVw(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubwq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vsub(Vu32.w,Vv32.w):sat C Intrinsic Prototype: HVX_Vector Q6_Vw_vsub_VwVw_sat(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vw_vsub_VwVw_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubwsat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.w=vsub(Vuu32.w,Vvv32.w):sat C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vsub_WwWw_sat(HVX_VectorPair Vuu, HVX_VectorPair Vvv) Instruction Type: CVI_VA_DV Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Ww_vsub_WwWw_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubwsat_dv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32=vswap(Qt4,Vu32,Vv32) C Intrinsic Prototype: HVX_VectorPair Q6_W_vswap_QVV(HVX_VectorPred Qt, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA_DV Execution Slots: SLOT0123 ========================================================================== */ #define Q6_W_vswap_QVV(Qt,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vswap)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qt),-1),Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.h=vtmpy(Vuu32.b,Rt32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vtmpy_WbRb(HVX_VectorPair Vuu, Word32 Rt) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wh_vtmpy_WbRb(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vtmpyb)(Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vxx32.h+=vtmpy(Vuu32.b,Rt32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vtmpyacc_WhWbRb(HVX_VectorPair Vxx, HVX_VectorPair Vuu, Word32 Rt) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wh_vtmpyacc_WhWbRb(Vxx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vtmpyb_acc)(Vxx,Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.h=vtmpy(Vuu32.ub,Rt32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vtmpy_WubRb(HVX_VectorPair Vuu, Word32 Rt) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wh_vtmpy_WubRb(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vtmpybus)(Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vxx32.h+=vtmpy(Vuu32.ub,Rt32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vtmpyacc_WhWubRb(HVX_VectorPair Vxx, HVX_VectorPair Vuu, Word32 Rt) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wh_vtmpyacc_WhWubRb(Vxx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vtmpybus_acc)(Vxx,Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.w=vtmpy(Vuu32.h,Rt32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vtmpy_WhRb(HVX_VectorPair Vuu, Word32 Rt) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Ww_vtmpy_WhRb(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vtmpyhb)(Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vxx32.w+=vtmpy(Vuu32.h,Rt32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vtmpyacc_WwWhRb(HVX_VectorPair Vxx, HVX_VectorPair Vuu, Word32 Rt) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Ww_vtmpyacc_WwWhRb(Vxx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vtmpyhb_acc)(Vxx,Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.h=vunpack(Vu32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vunpack_Vb(HVX_Vector Vu) Instruction Type: CVI_VP_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Wh_vunpack_Vb(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vunpackb)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.w=vunpack(Vu32.h) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vunpack_Vh(HVX_Vector Vu) Instruction Type: CVI_VP_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Ww_vunpack_Vh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vunpackh)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vxx32.h|=vunpacko(Vu32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vunpackoor_WhVb(HVX_VectorPair Vxx, HVX_Vector Vu) Instruction Type: CVI_VP_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Wh_vunpackoor_WhVb(Vxx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vunpackob)(Vxx,Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vxx32.w|=vunpacko(Vu32.h) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vunpackoor_WwVh(HVX_VectorPair Vxx, HVX_Vector Vu) Instruction Type: CVI_VP_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Ww_vunpackoor_WwVh(Vxx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vunpackoh)(Vxx,Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.uh=vunpack(Vu32.ub) C Intrinsic Prototype: HVX_VectorPair Q6_Wuh_vunpack_Vub(HVX_Vector Vu) Instruction Type: CVI_VP_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Wuh_vunpack_Vub(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vunpackub)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.uw=vunpack(Vu32.uh) C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vunpack_Vuh(HVX_Vector Vu) Instruction Type: CVI_VP_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Wuw_vunpack_Vuh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vunpackuh)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32=vxor(Vu32,Vv32) C Intrinsic Prototype: HVX_Vector Q6_V_vxor_VV(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_V_vxor_VV(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vxor)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.uh=vzxt(Vu32.ub) C Intrinsic Prototype: HVX_VectorPair Q6_Wuh_vzxt_Vub(HVX_Vector Vu) Instruction Type: CVI_VA_DV Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Wuh_vzxt_Vub(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vzb)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.uw=vzxt(Vu32.uh) C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vzxt_Vuh(HVX_Vector Vu) Instruction Type: CVI_VA_DV Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Wuw_vzxt_Vuh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vzh)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vd32.b=vsplat(Rt32) C Intrinsic Prototype: HVX_Vector Q6_Vb_vsplat_R(Word32 Rt) Instruction Type: CVI_VX_LATE Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vb_vsplat_R(Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_lvsplatb)(Rt) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vd32.h=vsplat(Rt32) C Intrinsic Prototype: HVX_Vector Q6_Vh_vsplat_R(Word32 Rt) Instruction Type: CVI_VX_LATE Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vh_vsplat_R(Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_lvsplath)(Rt) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Qd4=vsetq2(Rt32) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vsetq2_R(Word32 Rt) Instruction Type: CVI_VP Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vsetq2_R(Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_scalar2v2)(Rt)),-1) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Qd4.b=vshuffe(Qs4.h,Qt4.h) C Intrinsic Prototype: HVX_VectorPred Q6_Qb_vshuffe_QhQh(HVX_VectorPred Qs, HVX_VectorPred Qt) Instruction Type: CVI_VA_DV Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Qb_vshuffe_QhQh(Qs,Qt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_shuffeqh)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qt),-1))),-1) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Qd4.h=vshuffe(Qs4.w,Qt4.w) C Intrinsic Prototype: HVX_VectorPred Q6_Qh_vshuffe_QwQw(HVX_VectorPred Qs, HVX_VectorPred Qt) Instruction Type: CVI_VA_DV Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Qh_vshuffe_QwQw(Qs,Qt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_shuffeqw)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qt),-1))),-1) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vd32.b=vadd(Vu32.b,Vv32.b):sat C Intrinsic Prototype: HVX_Vector Q6_Vb_vadd_VbVb_sat(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vb_vadd_VbVb_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddbsat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vdd32.b=vadd(Vuu32.b,Vvv32.b):sat C Intrinsic Prototype: HVX_VectorPair Q6_Wb_vadd_WbWb_sat(HVX_VectorPair Vuu, HVX_VectorPair Vvv) Instruction Type: CVI_VA_DV Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Wb_vadd_WbWb_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddbsat_dv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vd32.w=vadd(Vu32.w,Vv32.w,Qx4):carry C Intrinsic Prototype: HVX_Vector Q6_Vw_vadd_VwVwQ_carry(HVX_Vector Vu, HVX_Vector Vv, HVX_VectorPred* Qx) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vw_vadd_VwVwQ_carry(Vu,Vv,Qx) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddcarry)(Vu,Vv,Qx) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vd32.h=vadd(vclb(Vu32.h),Vv32.h) C Intrinsic Prototype: HVX_Vector Q6_Vh_vadd_vclb_VhVh(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_vadd_vclb_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddclbh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vd32.w=vadd(vclb(Vu32.w),Vv32.w) C Intrinsic Prototype: HVX_Vector Q6_Vw_vadd_vclb_VwVw(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vw_vadd_vclb_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddclbw)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vxx32.w+=vadd(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vaddacc_WwVhVh(HVX_VectorPair Vxx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Ww_vaddacc_WwVhVh(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddhw_acc)(Vxx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vxx32.h+=vadd(Vu32.ub,Vv32.ub) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vaddacc_WhVubVub(HVX_VectorPair Vxx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wh_vaddacc_WhVubVub(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddubh_acc)(Vxx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vd32.ub=vadd(Vu32.ub,Vv32.b):sat C Intrinsic Prototype: HVX_Vector Q6_Vub_vadd_VubVb_sat(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vub_vadd_VubVb_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddububb_sat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vxx32.w+=vadd(Vu32.uh,Vv32.uh) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vaddacc_WwVuhVuh(HVX_VectorPair Vxx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Ww_vaddacc_WwVuhVuh(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadduhw_acc)(Vxx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vd32.uw=vadd(Vu32.uw,Vv32.uw):sat C Intrinsic Prototype: HVX_Vector Q6_Vuw_vadd_VuwVuw_sat(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vuw_vadd_VuwVuw_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadduwsat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vdd32.uw=vadd(Vuu32.uw,Vvv32.uw):sat C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vadd_WuwWuw_sat(HVX_VectorPair Vuu, HVX_VectorPair Vvv) Instruction Type: CVI_VA_DV Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Wuw_vadd_WuwWuw_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadduwsat_dv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vd32=vand(!Qu4,Rt32) C Intrinsic Prototype: HVX_Vector Q6_V_vand_QnR(HVX_VectorPred Qu, Word32 Rt) Instruction Type: CVI_VX_LATE Execution Slots: SLOT23 ========================================================================== */ #define Q6_V_vand_QnR(Qu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandnqrt)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qu),-1),Rt) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vx32|=vand(!Qu4,Rt32) C Intrinsic Prototype: HVX_Vector Q6_V_vandor_VQnR(HVX_Vector Vx, HVX_VectorPred Qu, Word32 Rt) Instruction Type: CVI_VX_LATE Execution Slots: SLOT23 ========================================================================== */ #define Q6_V_vandor_VQnR(Vx,Qu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandnqrt_acc)(Vx,__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qu),-1),Rt) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vd32=vand(!Qv4,Vu32) C Intrinsic Prototype: HVX_Vector Q6_V_vand_QnV(HVX_VectorPred Qv, HVX_Vector Vu) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_V_vand_QnV(Qv,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvnqv)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vu) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vd32=vand(Qv4,Vu32) C Intrinsic Prototype: HVX_Vector Q6_V_vand_QV(HVX_VectorPred Qv, HVX_Vector Vu) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_V_vand_QV(Qv,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvqv)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vu) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vd32.b=vasr(Vu32.h,Vv32.h,Rt8):sat C Intrinsic Prototype: HVX_Vector Q6_Vb_vasr_VhVhR_sat(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vb_vasr_VhVhR_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrhbsat)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vd32.uh=vasr(Vu32.uw,Vv32.uw,Rt8):rnd:sat C Intrinsic Prototype: HVX_Vector Q6_Vuh_vasr_VuwVuwR_rnd_sat(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vuh_vasr_VuwVuwR_rnd_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasruwuhrndsat)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vd32.uh=vasr(Vu32.w,Vv32.w,Rt8):rnd:sat C Intrinsic Prototype: HVX_Vector Q6_Vuh_vasr_VwVwR_rnd_sat(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vuh_vasr_VwVwR_rnd_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrwuhrndsat)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vd32.ub=vlsr(Vu32.ub,Rt32) C Intrinsic Prototype: HVX_Vector Q6_Vub_vlsr_VubR(HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vub_vlsr_VubR(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlsrb)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vd32.b=vlut32(Vu32.b,Vv32.b,Rt8):nomatch C Intrinsic Prototype: HVX_Vector Q6_Vb_vlut32_VbVbR_nomatch(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) Instruction Type: CVI_VP Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vb_vlut32_VbVbR_nomatch(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvvb_nm)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vx32.b|=vlut32(Vu32.b,Vv32.b,#u3) C Intrinsic Prototype: HVX_Vector Q6_Vb_vlut32or_VbVbVbI(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv, Word32 Iu3) Instruction Type: CVI_VP_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vb_vlut32or_VbVbVbI(Vx,Vu,Vv,Iu3) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvvb_oracci)(Vx,Vu,Vv,Iu3) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vd32.b=vlut32(Vu32.b,Vv32.b,#u3) C Intrinsic Prototype: HVX_Vector Q6_Vb_vlut32_VbVbI(HVX_Vector Vu, HVX_Vector Vv, Word32 Iu3) Instruction Type: CVI_VP Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vb_vlut32_VbVbI(Vu,Vv,Iu3) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvvbi)(Vu,Vv,Iu3) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vdd32.h=vlut16(Vu32.b,Vv32.h,Rt8):nomatch C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vlut16_VbVhR_nomatch(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) Instruction Type: CVI_VP_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Wh_vlut16_VbVhR_nomatch(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvwh_nm)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vxx32.h|=vlut16(Vu32.b,Vv32.h,#u3) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vlut16or_WhVbVhI(HVX_VectorPair Vxx, HVX_Vector Vu, HVX_Vector Vv, Word32 Iu3) Instruction Type: CVI_VP_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Wh_vlut16or_WhVbVhI(Vxx,Vu,Vv,Iu3) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvwh_oracci)(Vxx,Vu,Vv,Iu3) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vdd32.h=vlut16(Vu32.b,Vv32.h,#u3) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vlut16_VbVhI(HVX_Vector Vu, HVX_Vector Vv, Word32 Iu3) Instruction Type: CVI_VP_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Wh_vlut16_VbVhI(Vu,Vv,Iu3) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvwhi)(Vu,Vv,Iu3) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vd32.b=vmax(Vu32.b,Vv32.b) C Intrinsic Prototype: HVX_Vector Q6_Vb_vmax_VbVb(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vb_vmax_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmaxb)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vd32.b=vmin(Vu32.b,Vv32.b) C Intrinsic Prototype: HVX_Vector Q6_Vb_vmin_VbVb(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vb_vmin_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vminb)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vdd32.w=vmpa(Vuu32.uh,Rt32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vmpa_WuhRb(HVX_VectorPair Vuu, Word32 Rt) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Ww_vmpa_WuhRb(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpauhb)(Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vxx32.w+=vmpa(Vuu32.uh,Rt32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vmpaacc_WwWuhRb(HVX_VectorPair Vxx, HVX_VectorPair Vuu, Word32 Rt) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Ww_vmpaacc_WwWuhRb(Vxx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpauhb_acc)(Vxx,Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vdd32=vmpye(Vu32.w,Vv32.uh) C Intrinsic Prototype: HVX_VectorPair Q6_W_vmpye_VwVuh(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_W_vmpye_VwVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyewuh_64)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vd32.w=vmpyi(Vu32.w,Rt32.ub) C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyi_VwRub(HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vw_vmpyi_VwRub(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiwub)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vx32.w+=vmpyi(Vu32.w,Rt32.ub) C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyiacc_VwVwRub(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vw_vmpyiacc_VwVwRub(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiwub_acc)(Vx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vxx32+=vmpyo(Vu32.w,Vv32.h) C Intrinsic Prototype: HVX_VectorPair Q6_W_vmpyoacc_WVwVh(HVX_VectorPair Vxx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_W_vmpyoacc_WVwVh(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyowh_64_acc)(Vxx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vd32.ub=vround(Vu32.uh,Vv32.uh):sat C Intrinsic Prototype: HVX_Vector Q6_Vub_vround_VuhVuh_sat(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vub_vround_VuhVuh_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrounduhub)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vd32.uh=vround(Vu32.uw,Vv32.uw):sat C Intrinsic Prototype: HVX_Vector Q6_Vuh_vround_VuwVuw_sat(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vuh_vround_VuwVuw_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrounduwuh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vd32.uh=vsat(Vu32.uw,Vv32.uw) C Intrinsic Prototype: HVX_Vector Q6_Vuh_vsat_VuwVuw(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vuh_vsat_VuwVuw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsatuwuh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vd32.b=vsub(Vu32.b,Vv32.b):sat C Intrinsic Prototype: HVX_Vector Q6_Vb_vsub_VbVb_sat(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vb_vsub_VbVb_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubbsat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vdd32.b=vsub(Vuu32.b,Vvv32.b):sat C Intrinsic Prototype: HVX_VectorPair Q6_Wb_vsub_WbWb_sat(HVX_VectorPair Vuu, HVX_VectorPair Vvv) Instruction Type: CVI_VA_DV Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Wb_vsub_WbWb_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubbsat_dv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vd32.w=vsub(Vu32.w,Vv32.w,Qx4):carry C Intrinsic Prototype: HVX_Vector Q6_Vw_vsub_VwVwQ_carry(HVX_Vector Vu, HVX_Vector Vv, HVX_VectorPred* Qx) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vw_vsub_VwVwQ_carry(Vu,Vv,Qx) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubcarry)(Vu,Vv,Qx) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vd32.ub=vsub(Vu32.ub,Vv32.b):sat C Intrinsic Prototype: HVX_Vector Q6_Vub_vsub_VubVb_sat(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vub_vsub_VubVb_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubububb_sat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vd32.uw=vsub(Vu32.uw,Vv32.uw):sat C Intrinsic Prototype: HVX_Vector Q6_Vuw_vsub_VuwVuw_sat(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vuw_vsub_VuwVuw_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubuwsat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vdd32.uw=vsub(Vuu32.uw,Vvv32.uw):sat C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vsub_WuwWuw_sat(HVX_VectorPair Vuu, HVX_VectorPair Vvv) Instruction Type: CVI_VA_DV Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Wuw_vsub_WuwWuw_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubuwsat_dv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: Vd32.b=vabs(Vu32.b) C Intrinsic Prototype: HVX_Vector Q6_Vb_vabs_Vb(HVX_Vector Vu) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vb_vabs_Vb(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsb)(Vu) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: Vd32.b=vabs(Vu32.b):sat C Intrinsic Prototype: HVX_Vector Q6_Vb_vabs_Vb_sat(HVX_Vector Vu) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vb_vabs_Vb_sat(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsb_sat)(Vu) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: Vx32.h+=vasl(Vu32.h,Rt32) C Intrinsic Prototype: HVX_Vector Q6_Vh_vaslacc_VhVhR(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_vaslacc_VhVhR(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaslh_acc)(Vx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: Vx32.h+=vasr(Vu32.h,Rt32) C Intrinsic Prototype: HVX_Vector Q6_Vh_vasracc_VhVhR(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_vasracc_VhVhR(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrh_acc)(Vx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: Vd32.ub=vasr(Vu32.uh,Vv32.uh,Rt8):rnd:sat C Intrinsic Prototype: HVX_Vector Q6_Vub_vasr_VuhVuhR_rnd_sat(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vub_vasr_VuhVuhR_rnd_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasruhubrndsat)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: Vd32.ub=vasr(Vu32.uh,Vv32.uh,Rt8):sat C Intrinsic Prototype: HVX_Vector Q6_Vub_vasr_VuhVuhR_sat(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vub_vasr_VuhVuhR_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasruhubsat)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: Vd32.uh=vasr(Vu32.uw,Vv32.uw,Rt8):sat C Intrinsic Prototype: HVX_Vector Q6_Vuh_vasr_VuwVuwR_sat(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vuh_vasr_VuwVuwR_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasruwuhsat)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: Vd32.b=vavg(Vu32.b,Vv32.b) C Intrinsic Prototype: HVX_Vector Q6_Vb_vavg_VbVb(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vb_vavg_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavgb)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: Vd32.b=vavg(Vu32.b,Vv32.b):rnd C Intrinsic Prototype: HVX_Vector Q6_Vb_vavg_VbVb_rnd(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vb_vavg_VbVb_rnd(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavgbrnd)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: Vd32.uw=vavg(Vu32.uw,Vv32.uw) C Intrinsic Prototype: HVX_Vector Q6_Vuw_vavg_VuwVuw(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vuw_vavg_VuwVuw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavguw)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: Vd32.uw=vavg(Vu32.uw,Vv32.uw):rnd C Intrinsic Prototype: HVX_Vector Q6_Vuw_vavg_VuwVuw_rnd(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vuw_vavg_VuwVuw_rnd(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavguwrnd)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: Vdd32=#0 C Intrinsic Prototype: HVX_VectorPair Q6_W_vzero() Instruction Type: MAPPING Execution Slots: SLOT0123 ========================================================================== */ #define Q6_W_vzero() __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdd0)() #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: vtmp.h=vgather(Rt32,Mu2,Vv32.h).h C Intrinsic Prototype: void Q6_vgather_ARMVh(HVX_Vector* Rs, Word32 Rt, Word32 Mu, HVX_Vector Vv) Instruction Type: CVI_GATHER Execution Slots: SLOT01 ========================================================================== */ #define Q6_vgather_ARMVh(Rs,Rt,Mu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgathermh)(Rs,Rt,Mu,Vv) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: if (Qs4) vtmp.h=vgather(Rt32,Mu2,Vv32.h).h C Intrinsic Prototype: void Q6_vgather_AQRMVh(HVX_Vector* Rs, HVX_VectorPred Qs, Word32 Rt, Word32 Mu, HVX_Vector Vv) Instruction Type: CVI_GATHER Execution Slots: SLOT01 ========================================================================== */ #define Q6_vgather_AQRMVh(Rs,Qs,Rt,Mu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgathermhq)(Rs,__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),Rt,Mu,Vv) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: vtmp.h=vgather(Rt32,Mu2,Vvv32.w).h C Intrinsic Prototype: void Q6_vgather_ARMWw(HVX_Vector* Rs, Word32 Rt, Word32 Mu, HVX_VectorPair Vvv) Instruction Type: CVI_GATHER_DV Execution Slots: SLOT01 ========================================================================== */ #define Q6_vgather_ARMWw(Rs,Rt,Mu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgathermhw)(Rs,Rt,Mu,Vvv) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: if (Qs4) vtmp.h=vgather(Rt32,Mu2,Vvv32.w).h C Intrinsic Prototype: void Q6_vgather_AQRMWw(HVX_Vector* Rs, HVX_VectorPred Qs, Word32 Rt, Word32 Mu, HVX_VectorPair Vvv) Instruction Type: CVI_GATHER_DV Execution Slots: SLOT01 ========================================================================== */ #define Q6_vgather_AQRMWw(Rs,Qs,Rt,Mu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgathermhwq)(Rs,__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),Rt,Mu,Vvv) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: vtmp.w=vgather(Rt32,Mu2,Vv32.w).w C Intrinsic Prototype: void Q6_vgather_ARMVw(HVX_Vector* Rs, Word32 Rt, Word32 Mu, HVX_Vector Vv) Instruction Type: CVI_GATHER Execution Slots: SLOT01 ========================================================================== */ #define Q6_vgather_ARMVw(Rs,Rt,Mu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgathermw)(Rs,Rt,Mu,Vv) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: if (Qs4) vtmp.w=vgather(Rt32,Mu2,Vv32.w).w C Intrinsic Prototype: void Q6_vgather_AQRMVw(HVX_Vector* Rs, HVX_VectorPred Qs, Word32 Rt, Word32 Mu, HVX_Vector Vv) Instruction Type: CVI_GATHER Execution Slots: SLOT01 ========================================================================== */ #define Q6_vgather_AQRMVw(Rs,Qs,Rt,Mu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgathermwq)(Rs,__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),Rt,Mu,Vv) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: Vd32.h=vlut4(Vu32.uh,Rtt32.h) C Intrinsic Prototype: HVX_Vector Q6_Vh_vlut4_VuhPh(HVX_Vector Vu, Word64 Rtt) Instruction Type: CVI_VX_DV Execution Slots: SLOT2 ========================================================================== */ #define Q6_Vh_vlut4_VuhPh(Vu,Rtt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlut4)(Vu,Rtt) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: Vdd32.h=vmpa(Vuu32.ub,Rt32.ub) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vmpa_WubRub(HVX_VectorPair Vuu, Word32 Rt) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wh_vmpa_WubRub(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpabuu)(Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: Vxx32.h+=vmpa(Vuu32.ub,Rt32.ub) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vmpaacc_WhWubRub(HVX_VectorPair Vxx, HVX_VectorPair Vuu, Word32 Rt) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wh_vmpaacc_WhWubRub(Vxx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpabuu_acc)(Vxx,Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: Vx32.h=vmpa(Vx32.h,Vu32.h,Rtt32.h):sat C Intrinsic Prototype: HVX_Vector Q6_Vh_vmpa_VhVhVhPh_sat(HVX_Vector Vx, HVX_Vector Vu, Word64 Rtt) Instruction Type: CVI_VX_DV Execution Slots: SLOT2 ========================================================================== */ #define Q6_Vh_vmpa_VhVhVhPh_sat(Vx,Vu,Rtt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpahhsat)(Vx,Vu,Rtt) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: Vx32.h=vmpa(Vx32.h,Vu32.uh,Rtt32.uh):sat C Intrinsic Prototype: HVX_Vector Q6_Vh_vmpa_VhVhVuhPuh_sat(HVX_Vector Vx, HVX_Vector Vu, Word64 Rtt) Instruction Type: CVI_VX_DV Execution Slots: SLOT2 ========================================================================== */ #define Q6_Vh_vmpa_VhVhVuhPuh_sat(Vx,Vu,Rtt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpauhuhsat)(Vx,Vu,Rtt) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: Vx32.h=vmps(Vx32.h,Vu32.uh,Rtt32.uh):sat C Intrinsic Prototype: HVX_Vector Q6_Vh_vmps_VhVhVuhPuh_sat(HVX_Vector Vx, HVX_Vector Vu, Word64 Rtt) Instruction Type: CVI_VX_DV Execution Slots: SLOT2 ========================================================================== */ #define Q6_Vh_vmps_VhVhVuhPuh_sat(Vx,Vu,Rtt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpsuhuhsat)(Vx,Vu,Rtt) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: Vxx32.w+=vmpy(Vu32.h,Rt32.h) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vmpyacc_WwVhRh(HVX_VectorPair Vxx, HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Ww_vmpyacc_WwVhRh(Vxx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyh_acc)(Vxx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: Vd32.uw=vmpye(Vu32.uh,Rt32.uh) C Intrinsic Prototype: HVX_Vector Q6_Vuw_vmpye_VuhRuh(HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vuw_vmpye_VuhRuh(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyuhe)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: Vx32.uw+=vmpye(Vu32.uh,Rt32.uh) C Intrinsic Prototype: HVX_Vector Q6_Vuw_vmpyeacc_VuwVuhRuh(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vuw_vmpyeacc_VuwVuhRuh(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyuhe_acc)(Vx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: Vd32.b=vnavg(Vu32.b,Vv32.b) C Intrinsic Prototype: HVX_Vector Q6_Vb_vnavg_VbVb(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vb_vnavg_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vnavgb)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: Vd32.b=prefixsum(Qv4) C Intrinsic Prototype: HVX_Vector Q6_Vb_prefixsum_Q(HVX_VectorPred Qv) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vb_prefixsum_Q(Qv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vprefixqb)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1)) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: Vd32.h=prefixsum(Qv4) C Intrinsic Prototype: HVX_Vector Q6_Vh_prefixsum_Q(HVX_VectorPred Qv) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_prefixsum_Q(Qv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vprefixqh)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1)) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: Vd32.w=prefixsum(Qv4) C Intrinsic Prototype: HVX_Vector Q6_Vw_prefixsum_Q(HVX_VectorPred Qv) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vw_prefixsum_Q(Qv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vprefixqw)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1)) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: vscatter(Rt32,Mu2,Vv32.h).h=Vw32 C Intrinsic Prototype: void Q6_vscatter_RMVhV(Word32 Rt, Word32 Mu, HVX_Vector Vv, HVX_Vector Vw) Instruction Type: CVI_SCATTER Execution Slots: SLOT0 ========================================================================== */ #define Q6_vscatter_RMVhV(Rt,Mu,Vv,Vw) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermh)(Rt,Mu,Vv,Vw) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: vscatter(Rt32,Mu2,Vv32.h).h+=Vw32 C Intrinsic Prototype: void Q6_vscatteracc_RMVhV(Word32 Rt, Word32 Mu, HVX_Vector Vv, HVX_Vector Vw) Instruction Type: CVI_SCATTER Execution Slots: SLOT0 ========================================================================== */ #define Q6_vscatteracc_RMVhV(Rt,Mu,Vv,Vw) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermh_add)(Rt,Mu,Vv,Vw) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: if (Qs4) vscatter(Rt32,Mu2,Vv32.h).h=Vw32 C Intrinsic Prototype: void Q6_vscatter_QRMVhV(HVX_VectorPred Qs, Word32 Rt, Word32 Mu, HVX_Vector Vv, HVX_Vector Vw) Instruction Type: CVI_SCATTER Execution Slots: SLOT0 ========================================================================== */ #define Q6_vscatter_QRMVhV(Qs,Rt,Mu,Vv,Vw) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermhq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),Rt,Mu,Vv,Vw) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: vscatter(Rt32,Mu2,Vvv32.w).h=Vw32 C Intrinsic Prototype: void Q6_vscatter_RMWwV(Word32 Rt, Word32 Mu, HVX_VectorPair Vvv, HVX_Vector Vw) Instruction Type: CVI_SCATTER_DV Execution Slots: SLOT0 ========================================================================== */ #define Q6_vscatter_RMWwV(Rt,Mu,Vvv,Vw) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermhw)(Rt,Mu,Vvv,Vw) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: vscatter(Rt32,Mu2,Vvv32.w).h+=Vw32 C Intrinsic Prototype: void Q6_vscatteracc_RMWwV(Word32 Rt, Word32 Mu, HVX_VectorPair Vvv, HVX_Vector Vw) Instruction Type: CVI_SCATTER_DV Execution Slots: SLOT0 ========================================================================== */ #define Q6_vscatteracc_RMWwV(Rt,Mu,Vvv,Vw) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermhw_add)(Rt,Mu,Vvv,Vw) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: if (Qs4) vscatter(Rt32,Mu2,Vvv32.w).h=Vw32 C Intrinsic Prototype: void Q6_vscatter_QRMWwV(HVX_VectorPred Qs, Word32 Rt, Word32 Mu, HVX_VectorPair Vvv, HVX_Vector Vw) Instruction Type: CVI_SCATTER_DV Execution Slots: SLOT0 ========================================================================== */ #define Q6_vscatter_QRMWwV(Qs,Rt,Mu,Vvv,Vw) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermhwq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),Rt,Mu,Vvv,Vw) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: vscatter(Rt32,Mu2,Vv32.w).w=Vw32 C Intrinsic Prototype: void Q6_vscatter_RMVwV(Word32 Rt, Word32 Mu, HVX_Vector Vv, HVX_Vector Vw) Instruction Type: CVI_SCATTER Execution Slots: SLOT0 ========================================================================== */ #define Q6_vscatter_RMVwV(Rt,Mu,Vv,Vw) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermw)(Rt,Mu,Vv,Vw) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: vscatter(Rt32,Mu2,Vv32.w).w+=Vw32 C Intrinsic Prototype: void Q6_vscatteracc_RMVwV(Word32 Rt, Word32 Mu, HVX_Vector Vv, HVX_Vector Vw) Instruction Type: CVI_SCATTER Execution Slots: SLOT0 ========================================================================== */ #define Q6_vscatteracc_RMVwV(Rt,Mu,Vv,Vw) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermw_add)(Rt,Mu,Vv,Vw) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: if (Qs4) vscatter(Rt32,Mu2,Vv32.w).w=Vw32 C Intrinsic Prototype: void Q6_vscatter_QRMVwV(HVX_VectorPred Qs, Word32 Rt, Word32 Mu, HVX_Vector Vv, HVX_Vector Vw) Instruction Type: CVI_SCATTER Execution Slots: SLOT0 ========================================================================== */ #define Q6_vscatter_QRMVwV(Qs,Rt,Mu,Vv,Vw) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermwq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),Rt,Mu,Vv,Vw) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 66 /* ========================================================================== Assembly Syntax: Vd32.w=vadd(Vu32.w,Vv32.w,Qs4):carry:sat C Intrinsic Prototype: HVX_Vector Q6_Vw_vadd_VwVwQ_carry_sat(HVX_Vector Vu, HVX_Vector Vv, HVX_VectorPred Qs) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vw_vadd_VwVwQ_carry_sat(Vu,Vv,Qs) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddcarrysat)(Vu,Vv,__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1)) #endif /* __HEXAGON_ARCH___ >= 66 */ #if __HVX_ARCH__ >= 66 /* ========================================================================== Assembly Syntax: Vxx32.w=vasrinto(Vu32.w,Vv32.w) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vasrinto_WwVwVw(HVX_VectorPair Vxx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VP_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Ww_vasrinto_WwVwVw(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasr_into)(Vxx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 66 */ #if __HVX_ARCH__ >= 66 /* ========================================================================== Assembly Syntax: Vd32.uw=vrotr(Vu32.uw,Vv32.uw) C Intrinsic Prototype: HVX_Vector Q6_Vuw_vrotr_VuwVuw(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vuw_vrotr_VuwVuw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrotr)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 66 */ #if __HVX_ARCH__ >= 66 /* ========================================================================== Assembly Syntax: Vd32.w=vsatdw(Vu32.w,Vv32.w) C Intrinsic Prototype: HVX_Vector Q6_Vw_vsatdw_VwVw(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vw_vsatdw_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsatdw)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 66 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vdd32.w=v6mpy(Vuu32.ub,Vvv32.b,#u2):h C Intrinsic Prototype: HVX_VectorPair Q6_Ww_v6mpy_WubWbI_h(HVX_VectorPair Vuu, HVX_VectorPair Vvv, Word32 Iu2) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Ww_v6mpy_WubWbI_h(Vuu,Vvv,Iu2) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_v6mpyhubs10)(Vuu,Vvv,Iu2) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vxx32.w+=v6mpy(Vuu32.ub,Vvv32.b,#u2):h C Intrinsic Prototype: HVX_VectorPair Q6_Ww_v6mpyacc_WwWubWbI_h(HVX_VectorPair Vxx, HVX_VectorPair Vuu, HVX_VectorPair Vvv, Word32 Iu2) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Ww_v6mpyacc_WwWubWbI_h(Vxx,Vuu,Vvv,Iu2) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_v6mpyhubs10_vxx)(Vxx,Vuu,Vvv,Iu2) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vdd32.w=v6mpy(Vuu32.ub,Vvv32.b,#u2):v C Intrinsic Prototype: HVX_VectorPair Q6_Ww_v6mpy_WubWbI_v(HVX_VectorPair Vuu, HVX_VectorPair Vvv, Word32 Iu2) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Ww_v6mpy_WubWbI_v(Vuu,Vvv,Iu2) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_v6mpyvubs10)(Vuu,Vvv,Iu2) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vxx32.w+=v6mpy(Vuu32.ub,Vvv32.b,#u2):v C Intrinsic Prototype: HVX_VectorPair Q6_Ww_v6mpyacc_WwWubWbI_v(HVX_VectorPair Vxx, HVX_VectorPair Vuu, HVX_VectorPair Vvv, Word32 Iu2) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Ww_v6mpyacc_WwWubWbI_v(Vxx,Vuu,Vvv,Iu2) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_v6mpyvubs10_vxx)(Vxx,Vuu,Vvv,Iu2) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.hf=vabs(Vu32.hf) C Intrinsic Prototype: HVX_Vector Q6_Vhf_vabs_Vhf(HVX_Vector Vu) Instruction Type: CVI_VX_LATE Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vhf_vabs_Vhf(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabs_hf)(Vu) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.sf=vabs(Vu32.sf) C Intrinsic Prototype: HVX_Vector Q6_Vsf_vabs_Vsf(HVX_Vector Vu) Instruction Type: CVI_VX_LATE Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vsf_vabs_Vsf(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabs_sf)(Vu) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.qf16=vadd(Vu32.hf,Vv32.hf) C Intrinsic Prototype: HVX_Vector Q6_Vqf16_vadd_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vqf16_vadd_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadd_hf)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.hf=vadd(Vu32.hf,Vv32.hf) C Intrinsic Prototype: HVX_Vector Q6_Vhf_vadd_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vhf_vadd_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadd_hf_hf)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.qf16=vadd(Vu32.qf16,Vv32.qf16) C Intrinsic Prototype: HVX_Vector Q6_Vqf16_vadd_Vqf16Vqf16(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vqf16_vadd_Vqf16Vqf16(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadd_qf16)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.qf16=vadd(Vu32.qf16,Vv32.hf) C Intrinsic Prototype: HVX_Vector Q6_Vqf16_vadd_Vqf16Vhf(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vqf16_vadd_Vqf16Vhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadd_qf16_mix)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.qf32=vadd(Vu32.qf32,Vv32.qf32) C Intrinsic Prototype: HVX_Vector Q6_Vqf32_vadd_Vqf32Vqf32(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vqf32_vadd_Vqf32Vqf32(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadd_qf32)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.qf32=vadd(Vu32.qf32,Vv32.sf) C Intrinsic Prototype: HVX_Vector Q6_Vqf32_vadd_Vqf32Vsf(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vqf32_vadd_Vqf32Vsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadd_qf32_mix)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.qf32=vadd(Vu32.sf,Vv32.sf) C Intrinsic Prototype: HVX_Vector Q6_Vqf32_vadd_VsfVsf(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vqf32_vadd_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadd_sf)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vdd32.sf=vadd(Vu32.hf,Vv32.hf) C Intrinsic Prototype: HVX_VectorPair Q6_Wsf_vadd_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wsf_vadd_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadd_sf_hf)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.sf=vadd(Vu32.sf,Vv32.sf) C Intrinsic Prototype: HVX_Vector Q6_Vsf_vadd_VsfVsf(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vsf_vadd_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadd_sf_sf)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.w=vfmv(Vu32.w) C Intrinsic Prototype: HVX_Vector Q6_Vw_vfmv_Vw(HVX_Vector Vu) Instruction Type: CVI_VX_LATE Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vw_vfmv_Vw(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vassign_fp)(Vu) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.hf=Vu32.qf16 C Intrinsic Prototype: HVX_Vector Q6_Vhf_equals_Vqf16(HVX_Vector Vu) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vhf_equals_Vqf16(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vconv_hf_qf16)(Vu) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.hf=Vuu32.qf32 C Intrinsic Prototype: HVX_Vector Q6_Vhf_equals_Wqf32(HVX_VectorPair Vuu) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vhf_equals_Wqf32(Vuu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vconv_hf_qf32)(Vuu) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.sf=Vu32.qf32 C Intrinsic Prototype: HVX_Vector Q6_Vsf_equals_Vqf32(HVX_Vector Vu) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vsf_equals_Vqf32(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vconv_sf_qf32)(Vu) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.b=vcvt(Vu32.hf,Vv32.hf) C Intrinsic Prototype: HVX_Vector Q6_Vb_vcvt_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vb_vcvt_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_b_hf)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.h=vcvt(Vu32.hf) C Intrinsic Prototype: HVX_Vector Q6_Vh_vcvt_Vhf(HVX_Vector Vu) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vh_vcvt_Vhf(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_h_hf)(Vu) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vdd32.hf=vcvt(Vu32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Whf_vcvt_Vb(HVX_Vector Vu) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Whf_vcvt_Vb(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_hf_b)(Vu) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.hf=vcvt(Vu32.h) C Intrinsic Prototype: HVX_Vector Q6_Vhf_vcvt_Vh(HVX_Vector Vu) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vhf_vcvt_Vh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_hf_h)(Vu) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.hf=vcvt(Vu32.sf,Vv32.sf) C Intrinsic Prototype: HVX_Vector Q6_Vhf_vcvt_VsfVsf(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vhf_vcvt_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_hf_sf)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vdd32.hf=vcvt(Vu32.ub) C Intrinsic Prototype: HVX_VectorPair Q6_Whf_vcvt_Vub(HVX_Vector Vu) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Whf_vcvt_Vub(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_hf_ub)(Vu) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.hf=vcvt(Vu32.uh) C Intrinsic Prototype: HVX_Vector Q6_Vhf_vcvt_Vuh(HVX_Vector Vu) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vhf_vcvt_Vuh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_hf_uh)(Vu) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vdd32.sf=vcvt(Vu32.hf) C Intrinsic Prototype: HVX_VectorPair Q6_Wsf_vcvt_Vhf(HVX_Vector Vu) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wsf_vcvt_Vhf(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_sf_hf)(Vu) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.ub=vcvt(Vu32.hf,Vv32.hf) C Intrinsic Prototype: HVX_Vector Q6_Vub_vcvt_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vub_vcvt_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_ub_hf)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.uh=vcvt(Vu32.hf) C Intrinsic Prototype: HVX_Vector Q6_Vuh_vcvt_Vhf(HVX_Vector Vu) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vuh_vcvt_Vhf(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_uh_hf)(Vu) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.sf=vdmpy(Vu32.hf,Vv32.hf) C Intrinsic Prototype: HVX_Vector Q6_Vsf_vdmpy_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vsf_vdmpy_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpy_sf_hf)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vx32.sf+=vdmpy(Vu32.hf,Vv32.hf) C Intrinsic Prototype: HVX_Vector Q6_Vsf_vdmpyacc_VsfVhfVhf(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vsf_vdmpyacc_VsfVhfVhf(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpy_sf_hf_acc)(Vx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.hf=vfmax(Vu32.hf,Vv32.hf) C Intrinsic Prototype: HVX_Vector Q6_Vhf_vfmax_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_LATE Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vhf_vfmax_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vfmax_hf)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.sf=vfmax(Vu32.sf,Vv32.sf) C Intrinsic Prototype: HVX_Vector Q6_Vsf_vfmax_VsfVsf(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_LATE Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vsf_vfmax_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vfmax_sf)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.hf=vfmin(Vu32.hf,Vv32.hf) C Intrinsic Prototype: HVX_Vector Q6_Vhf_vfmin_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_LATE Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vhf_vfmin_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vfmin_hf)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.sf=vfmin(Vu32.sf,Vv32.sf) C Intrinsic Prototype: HVX_Vector Q6_Vsf_vfmin_VsfVsf(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_LATE Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vsf_vfmin_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vfmin_sf)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.hf=vfneg(Vu32.hf) C Intrinsic Prototype: HVX_Vector Q6_Vhf_vfneg_Vhf(HVX_Vector Vu) Instruction Type: CVI_VX_LATE Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vhf_vfneg_Vhf(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vfneg_hf)(Vu) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.sf=vfneg(Vu32.sf) C Intrinsic Prototype: HVX_Vector Q6_Vsf_vfneg_Vsf(HVX_Vector Vu) Instruction Type: CVI_VX_LATE Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vsf_vfneg_Vsf(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vfneg_sf)(Vu) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Qd4=vcmp.gt(Vu32.hf,Vv32.hf) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gt_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_gt_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgthf)(Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Qx4&=vcmp.gt(Vu32.hf,Vv32.hf) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtand_QVhfVhf(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_gtand_QVhfVhf(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgthf_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Qx4|=vcmp.gt(Vu32.hf,Vv32.hf) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtor_QVhfVhf(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_gtor_QVhfVhf(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgthf_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Qx4^=vcmp.gt(Vu32.hf,Vv32.hf) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtxacc_QVhfVhf(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_gtxacc_QVhfVhf(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgthf_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Qd4=vcmp.gt(Vu32.sf,Vv32.sf) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gt_VsfVsf(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_gt_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtsf)(Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Qx4&=vcmp.gt(Vu32.sf,Vv32.sf) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtand_QVsfVsf(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_gtand_QVsfVsf(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtsf_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Qx4|=vcmp.gt(Vu32.sf,Vv32.sf) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtor_QVsfVsf(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_gtor_QVsfVsf(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtsf_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Qx4^=vcmp.gt(Vu32.sf,Vv32.sf) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtxacc_QVsfVsf(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_gtxacc_QVsfVsf(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtsf_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.hf=vmax(Vu32.hf,Vv32.hf) C Intrinsic Prototype: HVX_Vector Q6_Vhf_vmax_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vhf_vmax_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmax_hf)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.sf=vmax(Vu32.sf,Vv32.sf) C Intrinsic Prototype: HVX_Vector Q6_Vsf_vmax_VsfVsf(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vsf_vmax_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmax_sf)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.hf=vmin(Vu32.hf,Vv32.hf) C Intrinsic Prototype: HVX_Vector Q6_Vhf_vmin_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vhf_vmin_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmin_hf)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.sf=vmin(Vu32.sf,Vv32.sf) C Intrinsic Prototype: HVX_Vector Q6_Vsf_vmin_VsfVsf(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vsf_vmin_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmin_sf)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.hf=vmpy(Vu32.hf,Vv32.hf) C Intrinsic Prototype: HVX_Vector Q6_Vhf_vmpy_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vhf_vmpy_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_hf_hf)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vx32.hf+=vmpy(Vu32.hf,Vv32.hf) C Intrinsic Prototype: HVX_Vector Q6_Vhf_vmpyacc_VhfVhfVhf(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vhf_vmpyacc_VhfVhfVhf(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_hf_hf_acc)(Vx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.qf16=vmpy(Vu32.qf16,Vv32.qf16) C Intrinsic Prototype: HVX_Vector Q6_Vqf16_vmpy_Vqf16Vqf16(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vqf16_vmpy_Vqf16Vqf16(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_qf16)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.qf16=vmpy(Vu32.hf,Vv32.hf) C Intrinsic Prototype: HVX_Vector Q6_Vqf16_vmpy_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vqf16_vmpy_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_qf16_hf)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.qf16=vmpy(Vu32.qf16,Vv32.hf) C Intrinsic Prototype: HVX_Vector Q6_Vqf16_vmpy_Vqf16Vhf(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vqf16_vmpy_Vqf16Vhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_qf16_mix_hf)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.qf32=vmpy(Vu32.qf32,Vv32.qf32) C Intrinsic Prototype: HVX_Vector Q6_Vqf32_vmpy_Vqf32Vqf32(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vqf32_vmpy_Vqf32Vqf32(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_qf32)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vdd32.qf32=vmpy(Vu32.hf,Vv32.hf) C Intrinsic Prototype: HVX_VectorPair Q6_Wqf32_vmpy_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wqf32_vmpy_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_qf32_hf)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vdd32.qf32=vmpy(Vu32.qf16,Vv32.hf) C Intrinsic Prototype: HVX_VectorPair Q6_Wqf32_vmpy_Vqf16Vhf(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wqf32_vmpy_Vqf16Vhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_qf32_mix_hf)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vdd32.qf32=vmpy(Vu32.qf16,Vv32.qf16) C Intrinsic Prototype: HVX_VectorPair Q6_Wqf32_vmpy_Vqf16Vqf16(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wqf32_vmpy_Vqf16Vqf16(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_qf32_qf16)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.qf32=vmpy(Vu32.sf,Vv32.sf) C Intrinsic Prototype: HVX_Vector Q6_Vqf32_vmpy_VsfVsf(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vqf32_vmpy_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_qf32_sf)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vdd32.sf=vmpy(Vu32.hf,Vv32.hf) C Intrinsic Prototype: HVX_VectorPair Q6_Wsf_vmpy_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wsf_vmpy_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_sf_hf)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vxx32.sf+=vmpy(Vu32.hf,Vv32.hf) C Intrinsic Prototype: HVX_VectorPair Q6_Wsf_vmpyacc_WsfVhfVhf(HVX_VectorPair Vxx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wsf_vmpyacc_WsfVhfVhf(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_sf_hf_acc)(Vxx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.sf=vmpy(Vu32.sf,Vv32.sf) C Intrinsic Prototype: HVX_Vector Q6_Vsf_vmpy_VsfVsf(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vsf_vmpy_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_sf_sf)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.qf16=vsub(Vu32.hf,Vv32.hf) C Intrinsic Prototype: HVX_Vector Q6_Vqf16_vsub_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vqf16_vsub_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_hf)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.hf=vsub(Vu32.hf,Vv32.hf) C Intrinsic Prototype: HVX_Vector Q6_Vhf_vsub_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vhf_vsub_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_hf_hf)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.qf16=vsub(Vu32.qf16,Vv32.qf16) C Intrinsic Prototype: HVX_Vector Q6_Vqf16_vsub_Vqf16Vqf16(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vqf16_vsub_Vqf16Vqf16(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_qf16)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.qf16=vsub(Vu32.qf16,Vv32.hf) C Intrinsic Prototype: HVX_Vector Q6_Vqf16_vsub_Vqf16Vhf(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vqf16_vsub_Vqf16Vhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_qf16_mix)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.qf32=vsub(Vu32.qf32,Vv32.qf32) C Intrinsic Prototype: HVX_Vector Q6_Vqf32_vsub_Vqf32Vqf32(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vqf32_vsub_Vqf32Vqf32(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_qf32)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.qf32=vsub(Vu32.qf32,Vv32.sf) C Intrinsic Prototype: HVX_Vector Q6_Vqf32_vsub_Vqf32Vsf(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vqf32_vsub_Vqf32Vsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_qf32_mix)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.qf32=vsub(Vu32.sf,Vv32.sf) C Intrinsic Prototype: HVX_Vector Q6_Vqf32_vsub_VsfVsf(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vqf32_vsub_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_sf)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vdd32.sf=vsub(Vu32.hf,Vv32.hf) C Intrinsic Prototype: HVX_VectorPair Q6_Wsf_vsub_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wsf_vsub_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_sf_hf)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.sf=vsub(Vu32.sf,Vv32.sf) C Intrinsic Prototype: HVX_Vector Q6_Vsf_vsub_VsfVsf(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vsf_vsub_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_sf_sf)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 69 /* ========================================================================== Assembly Syntax: Vd32.ub=vasr(Vuu32.uh,Vv32.ub):rnd:sat C Intrinsic Prototype: HVX_Vector Q6_Vub_vasr_WuhVub_rnd_sat(HVX_VectorPair Vuu, HVX_Vector Vv) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vub_vasr_WuhVub_rnd_sat(Vuu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrvuhubrndsat)(Vuu,Vv) #endif /* __HEXAGON_ARCH___ >= 69 */ #if __HVX_ARCH__ >= 69 /* ========================================================================== Assembly Syntax: Vd32.ub=vasr(Vuu32.uh,Vv32.ub):sat C Intrinsic Prototype: HVX_Vector Q6_Vub_vasr_WuhVub_sat(HVX_VectorPair Vuu, HVX_Vector Vv) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vub_vasr_WuhVub_sat(Vuu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrvuhubsat)(Vuu,Vv) #endif /* __HEXAGON_ARCH___ >= 69 */ #if __HVX_ARCH__ >= 69 /* ========================================================================== Assembly Syntax: Vd32.uh=vasr(Vuu32.w,Vv32.uh):rnd:sat C Intrinsic Prototype: HVX_Vector Q6_Vuh_vasr_WwVuh_rnd_sat(HVX_VectorPair Vuu, HVX_Vector Vv) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vuh_vasr_WwVuh_rnd_sat(Vuu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrvwuhrndsat)(Vuu,Vv) #endif /* __HEXAGON_ARCH___ >= 69 */ #if __HVX_ARCH__ >= 69 /* ========================================================================== Assembly Syntax: Vd32.uh=vasr(Vuu32.w,Vv32.uh):sat C Intrinsic Prototype: HVX_Vector Q6_Vuh_vasr_WwVuh_sat(HVX_VectorPair Vuu, HVX_Vector Vv) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vuh_vasr_WwVuh_sat(Vuu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrvwuhsat)(Vuu,Vv) #endif /* __HEXAGON_ARCH___ >= 69 */ #if __HVX_ARCH__ >= 69 /* ========================================================================== Assembly Syntax: Vd32.uh=vmpy(Vu32.uh,Vv32.uh):>>16 C Intrinsic Prototype: HVX_Vector Q6_Vuh_vmpy_VuhVuh_rs16(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vuh_vmpy_VuhVuh_rs16(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyuhvs)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 69 */ #endif /* __HVX__ */ #endif /*===---- iso646.h - Standard header for alternate spellings of operators---=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __ISO646_H #define __ISO646_H #ifndef __cplusplus #define and && #define and_eq &= #define bitand & #define bitor | #define compl ~ #define not ! #define not_eq != #define or || #define or_eq |= #define xor ^ #define xor_eq ^= #endif #endif /* __ISO646_H */ lasxintrin.h/*===---- mm_malloc.h - Allocating and Freeing Aligned Memory Blocks -------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __MM_MALLOC_H #define __MM_MALLOC_H #include #ifdef _WIN32 #include #else #ifndef __cplusplus extern int posix_memalign(void **__memptr, size_t __alignment, size_t __size); #else // Some systems (e.g. those with GNU libc) declare posix_memalign with an // exception specifier. Via an "egregious workaround" in // Sema::CheckEquivalentExceptionSpec, Clang accepts the following as a valid // redeclaration of glibc's declaration. extern "C" int posix_memalign(void **__memptr, size_t __alignment, size_t __size); #endif #endif #if !(defined(_WIN32) && defined(_mm_malloc)) static __inline__ void *__attribute__((__always_inline__, __nodebug__, __malloc__, __alloc_size__(1), __alloc_align__(2))) _mm_malloc(size_t __size, size_t __align) { if (__align == 1) { return malloc(__size); } if (!(__align & (__align - 1)) && __align < sizeof(void *)) __align = sizeof(void *); void *__mallocedMemory; #if defined(__MINGW32__) __mallocedMemory = __mingw_aligned_malloc(__size, __align); #elif defined(_WIN32) __mallocedMemory = _aligned_malloc(__size, __align); #else if (posix_memalign(&__mallocedMemory, __align, __size)) return 0; #endif return __mallocedMemory; } static __inline__ void __attribute__((__always_inline__, __nodebug__)) _mm_free(void *__p) { #if defined(__MINGW32__) __mingw_aligned_free(__p); #elif defined(_WIN32) _aligned_free(__p); #else free(__p); #endif } #endif #endif /* __MM_MALLOC_H */ raointintrin.h/*===---- riscv_bitmanip.h - RISC-V Zb* intrinsics --------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __RISCV_BITMANIP_H #define __RISCV_BITMANIP_H #include #if defined(__cplusplus) extern "C" { #endif #if defined(__riscv_zbb) static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) __riscv_orc_b_32(uint32_t __x) { return __builtin_riscv_orc_b_32(__x); } static __inline__ unsigned __attribute__((__always_inline__, __nodebug__)) __riscv_clz_32(uint32_t __x) { return __builtin_riscv_clz_32(__x); } static __inline__ unsigned __attribute__((__always_inline__, __nodebug__)) __riscv_ctz_32(uint32_t __x) { return __builtin_riscv_ctz_32(__x); } static __inline__ unsigned __attribute__((__always_inline__, __nodebug__)) __riscv_cpop_32(uint32_t __x) { return __builtin_popcount(__x); } #if __riscv_xlen == 64 static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__)) __riscv_orc_b_64(uint64_t __x) { return __builtin_riscv_orc_b_64(__x); } static __inline__ unsigned __attribute__((__always_inline__, __nodebug__)) __riscv_clz_64(uint64_t __x) { return __builtin_riscv_clz_64(__x); } static __inline__ unsigned __attribute__((__always_inline__, __nodebug__)) __riscv_ctz_64(uint64_t __x) { return __builtin_riscv_ctz_64(__x); } static __inline__ unsigned __attribute__((__always_inline__, __nodebug__)) __riscv_cpop_64(uint64_t __x) { return __builtin_popcountll(__x); } #endif #endif // defined(__riscv_zbb) #if defined(__riscv_zbb) || defined(__riscv_zbkb) static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) __riscv_rev8_32(uint32_t __x) { return __builtin_bswap32(__x); } static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) __riscv_rol_32(uint32_t __x, uint32_t __y) { return __builtin_rotateleft32(__x, __y); } static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) __riscv_ror_32(uint32_t __x, uint32_t __y) { return __builtin_rotateright32(__x, __y); } #if __riscv_xlen == 64 static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__)) __riscv_rev8_64(uint64_t __x) { return __builtin_bswap64(__x); } static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__)) __riscv_rol_64(uint64_t __x, uint32_t __y) { return __builtin_rotateleft64(__x, __y); } static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__)) __riscv_ror_64(uint64_t __x, uint32_t __y) { return __builtin_rotateright64(__x, __y); } #endif #endif // defined(__riscv_zbb) || defined(__riscv_zbkb) #if defined(__riscv_zbkb) static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) __riscv_brev8_32(uint32_t __x) { return __builtin_riscv_brev8_32(__x); } #if __riscv_xlen == 64 static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__)) __riscv_brev8_64(uint64_t __x) { return __builtin_riscv_brev8_64(__x); } #endif #if __riscv_xlen == 32 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) __riscv_unzip_32(uint32_t __x) { return __builtin_riscv_unzip_32(__x); } static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) __riscv_zip_32(uint32_t __x) { return __builtin_riscv_zip_32(__x); } #endif #endif // defined(__riscv_zbkb) #if defined(__riscv_zbc) #if __riscv_xlen == 32 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) __riscv_clmulr_32(uint32_t __x, uint32_t __y) { return __builtin_riscv_clmulr_32(__x, __y); } #endif #if __riscv_xlen == 64 static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__)) __riscv_clmulr_64(uint64_t __x, uint64_t __y) { return __builtin_riscv_clmulr_64(__x, __y); } #endif #endif // defined(__riscv_zbc) #if defined(__riscv_zbkc) || defined(__riscv_zbc) static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) __riscv_clmul_32(uint32_t __x, uint32_t __y) { return __builtin_riscv_clmul_32(__x, __y); } #if __riscv_xlen == 32 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) __riscv_clmulh_32(uint32_t __x, uint32_t __y) { return __builtin_riscv_clmulh_32(__x, __y); } #endif #if __riscv_xlen == 64 static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__)) __riscv_clmul_64(uint64_t __x, uint64_t __y) { return __builtin_riscv_clmul_64(__x, __y); } static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__)) __riscv_clmulh_64(uint64_t __x, uint64_t __y) { return __builtin_riscv_clmulh_64(__x, __y); } #endif #endif // defined(__riscv_zbkc) || defined(__riscv_zbc) #if defined(__riscv_zbkx) #if __riscv_xlen == 32 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) __riscv_xperm4_32(uint32_t __x, uint32_t __y) { return __builtin_riscv_xperm4_32(__x, __y); } static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) __riscv_xperm8_32(uint32_t __x, uint32_t __y) { return __builtin_riscv_xperm8_32(__x, __y); } #endif #if __riscv_xlen == 64 static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__)) __riscv_xperm4_64(uint64_t __x, uint64_t __y) { return __builtin_riscv_xperm4_64(__x, __y); } static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__)) __riscv_xperm8_64(uint64_t __x, uint64_t __y) { return __builtin_riscv_xperm8_64(__x, __y); } #endif #endif // defined(__riscv_zbkx) #if defined(__cplusplus) } #endif #endif xopintrin.h/*===---- xopintrin.h - XOP intrinsics -------------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __X86INTRIN_H #error "Never use directly; include instead." #endif #ifndef __XOPINTRIN_H #define __XOPINTRIN_H #include /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("xop"), __min_vector_width__(128))) #define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("xop"), __min_vector_width__(256))) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_vpmacssww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_macc_epi16(__m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_vpmacsww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maccsd_epi16(__m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_vpmacsswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maccd_epi16(__m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_vpmacswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maccs_epi32(__m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_vpmacssdd((__v4si)__A, (__v4si)__B, (__v4si)__C); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_macc_epi32(__m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_vpmacsdd((__v4si)__A, (__v4si)__B, (__v4si)__C); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maccslo_epi32(__m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_vpmacssdql((__v4si)__A, (__v4si)__B, (__v2di)__C); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_macclo_epi32(__m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_vpmacsdql((__v4si)__A, (__v4si)__B, (__v2di)__C); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maccshi_epi32(__m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_vpmacssdqh((__v4si)__A, (__v4si)__B, (__v2di)__C); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_macchi_epi32(__m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_vpmacsdqh((__v4si)__A, (__v4si)__B, (__v2di)__C); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maddsd_epi16(__m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_vpmadcsswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maddd_epi16(__m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_vpmadcswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_haddw_epi8(__m128i __A) { return (__m128i)__builtin_ia32_vphaddbw((__v16qi)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_haddd_epi8(__m128i __A) { return (__m128i)__builtin_ia32_vphaddbd((__v16qi)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_haddq_epi8(__m128i __A) { return (__m128i)__builtin_ia32_vphaddbq((__v16qi)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_haddd_epi16(__m128i __A) { return (__m128i)__builtin_ia32_vphaddwd((__v8hi)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_haddq_epi16(__m128i __A) { return (__m128i)__builtin_ia32_vphaddwq((__v8hi)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_haddq_epi32(__m128i __A) { return (__m128i)__builtin_ia32_vphadddq((__v4si)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_haddw_epu8(__m128i __A) { return (__m128i)__builtin_ia32_vphaddubw((__v16qi)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_haddd_epu8(__m128i __A) { return (__m128i)__builtin_ia32_vphaddubd((__v16qi)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_haddq_epu8(__m128i __A) { return (__m128i)__builtin_ia32_vphaddubq((__v16qi)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_haddd_epu16(__m128i __A) { return (__m128i)__builtin_ia32_vphadduwd((__v8hi)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_haddq_epu16(__m128i __A) { return (__m128i)__builtin_ia32_vphadduwq((__v8hi)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_haddq_epu32(__m128i __A) { return (__m128i)__builtin_ia32_vphaddudq((__v4si)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hsubw_epi8(__m128i __A) { return (__m128i)__builtin_ia32_vphsubbw((__v16qi)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hsubd_epi16(__m128i __A) { return (__m128i)__builtin_ia32_vphsubwd((__v8hi)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hsubq_epi32(__m128i __A) { return (__m128i)__builtin_ia32_vphsubdq((__v4si)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmov_si128(__m128i __A, __m128i __B, __m128i __C) { return (__m128i)(((__v2du)__A & (__v2du)__C) | ((__v2du)__B & ~(__v2du)__C)); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cmov_si256(__m256i __A, __m256i __B, __m256i __C) { return (__m256i)(((__v4du)__A & (__v4du)__C) | ((__v4du)__B & ~(__v4du)__C)); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_vpperm((__v16qi)__A, (__v16qi)__B, (__v16qi)__C); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_rot_epi8(__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vprotb((__v16qi)__A, (__v16qi)__B); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_rot_epi16(__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vprotw((__v8hi)__A, (__v8hi)__B); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_rot_epi32(__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vprotd((__v4si)__A, (__v4si)__B); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_rot_epi64(__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vprotq((__v2di)__A, (__v2di)__B); } #define _mm_roti_epi8(A, N) \ ((__m128i)__builtin_ia32_vprotbi((__v16qi)(__m128i)(A), (N))) #define _mm_roti_epi16(A, N) \ ((__m128i)__builtin_ia32_vprotwi((__v8hi)(__m128i)(A), (N))) #define _mm_roti_epi32(A, N) \ ((__m128i)__builtin_ia32_vprotdi((__v4si)(__m128i)(A), (N))) #define _mm_roti_epi64(A, N) \ ((__m128i)__builtin_ia32_vprotqi((__v2di)(__m128i)(A), (N))) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_shl_epi8(__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vpshlb((__v16qi)__A, (__v16qi)__B); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_shl_epi16(__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vpshlw((__v8hi)__A, (__v8hi)__B); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_shl_epi32(__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vpshld((__v4si)__A, (__v4si)__B); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_shl_epi64(__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vpshlq((__v2di)__A, (__v2di)__B); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sha_epi8(__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vpshab((__v16qi)__A, (__v16qi)__B); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sha_epi16(__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vpshaw((__v8hi)__A, (__v8hi)__B); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sha_epi32(__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vpshad((__v4si)__A, (__v4si)__B); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sha_epi64(__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vpshaq((__v2di)__A, (__v2di)__B); } #define _mm_com_epu8(A, B, N) \ ((__m128i)__builtin_ia32_vpcomub((__v16qi)(__m128i)(A), \ (__v16qi)(__m128i)(B), (N))) #define _mm_com_epu16(A, B, N) \ ((__m128i)__builtin_ia32_vpcomuw((__v8hi)(__m128i)(A), \ (__v8hi)(__m128i)(B), (N))) #define _mm_com_epu32(A, B, N) \ ((__m128i)__builtin_ia32_vpcomud((__v4si)(__m128i)(A), \ (__v4si)(__m128i)(B), (N))) #define _mm_com_epu64(A, B, N) \ ((__m128i)__builtin_ia32_vpcomuq((__v2di)(__m128i)(A), \ (__v2di)(__m128i)(B), (N))) #define _mm_com_epi8(A, B, N) \ ((__m128i)__builtin_ia32_vpcomb((__v16qi)(__m128i)(A), \ (__v16qi)(__m128i)(B), (N))) #define _mm_com_epi16(A, B, N) \ ((__m128i)__builtin_ia32_vpcomw((__v8hi)(__m128i)(A), \ (__v8hi)(__m128i)(B), (N))) #define _mm_com_epi32(A, B, N) \ ((__m128i)__builtin_ia32_vpcomd((__v4si)(__m128i)(A), \ (__v4si)(__m128i)(B), (N))) #define _mm_com_epi64(A, B, N) \ ((__m128i)__builtin_ia32_vpcomq((__v2di)(__m128i)(A), \ (__v2di)(__m128i)(B), (N))) #define _MM_PCOMCTRL_LT 0 #define _MM_PCOMCTRL_LE 1 #define _MM_PCOMCTRL_GT 2 #define _MM_PCOMCTRL_GE 3 #define _MM_PCOMCTRL_EQ 4 #define _MM_PCOMCTRL_NEQ 5 #define _MM_PCOMCTRL_FALSE 6 #define _MM_PCOMCTRL_TRUE 7 static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comlt_epu8(__m128i __A, __m128i __B) { return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_LT); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comle_epu8(__m128i __A, __m128i __B) { return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_LE); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comgt_epu8(__m128i __A, __m128i __B) { return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_GT); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comge_epu8(__m128i __A, __m128i __B) { return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_GE); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comeq_epu8(__m128i __A, __m128i __B) { return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_EQ); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comneq_epu8(__m128i __A, __m128i __B) { return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_NEQ); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comfalse_epu8(__m128i __A, __m128i __B) { return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_FALSE); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comtrue_epu8(__m128i __A, __m128i __B) { return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_TRUE); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comlt_epu16(__m128i __A, __m128i __B) { return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_LT); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comle_epu16(__m128i __A, __m128i __B) { return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_LE); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comgt_epu16(__m128i __A, __m128i __B) { return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_GT); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comge_epu16(__m128i __A, __m128i __B) { return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_GE); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comeq_epu16(__m128i __A, __m128i __B) { return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_EQ); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comneq_epu16(__m128i __A, __m128i __B) { return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_NEQ); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comfalse_epu16(__m128i __A, __m128i __B) { return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_FALSE); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comtrue_epu16(__m128i __A, __m128i __B) { return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_TRUE); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comlt_epu32(__m128i __A, __m128i __B) { return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_LT); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comle_epu32(__m128i __A, __m128i __B) { return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_LE); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comgt_epu32(__m128i __A, __m128i __B) { return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_GT); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comge_epu32(__m128i __A, __m128i __B) { return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_GE); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comeq_epu32(__m128i __A, __m128i __B) { return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_EQ); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comneq_epu32(__m128i __A, __m128i __B) { return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_NEQ); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comfalse_epu32(__m128i __A, __m128i __B) { return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_FALSE); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comtrue_epu32(__m128i __A, __m128i __B) { return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_TRUE); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comlt_epu64(__m128i __A, __m128i __B) { return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_LT); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comle_epu64(__m128i __A, __m128i __B) { return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_LE); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comgt_epu64(__m128i __A, __m128i __B) { return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_GT); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comge_epu64(__m128i __A, __m128i __B) { return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_GE); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comeq_epu64(__m128i __A, __m128i __B) { return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_EQ); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comneq_epu64(__m128i __A, __m128i __B) { return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_NEQ); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comfalse_epu64(__m128i __A, __m128i __B) { return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_FALSE); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comtrue_epu64(__m128i __A, __m128i __B) { return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_TRUE); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comlt_epi8(__m128i __A, __m128i __B) { return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_LT); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comle_epi8(__m128i __A, __m128i __B) { return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_LE); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comgt_epi8(__m128i __A, __m128i __B) { return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_GT); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comge_epi8(__m128i __A, __m128i __B) { return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_GE); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comeq_epi8(__m128i __A, __m128i __B) { return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_EQ); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comneq_epi8(__m128i __A, __m128i __B) { return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_NEQ); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comfalse_epi8(__m128i __A, __m128i __B) { return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_FALSE); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comtrue_epi8(__m128i __A, __m128i __B) { return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_TRUE); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comlt_epi16(__m128i __A, __m128i __B) { return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_LT); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comle_epi16(__m128i __A, __m128i __B) { return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_LE); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comgt_epi16(__m128i __A, __m128i __B) { return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_GT); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comge_epi16(__m128i __A, __m128i __B) { return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_GE); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comeq_epi16(__m128i __A, __m128i __B) { return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_EQ); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comneq_epi16(__m128i __A, __m128i __B) { return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_NEQ); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comfalse_epi16(__m128i __A, __m128i __B) { return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_FALSE); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comtrue_epi16(__m128i __A, __m128i __B) { return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_TRUE); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comlt_epi32(__m128i __A, __m128i __B) { return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_LT); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comle_epi32(__m128i __A, __m128i __B) { return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_LE); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comgt_epi32(__m128i __A, __m128i __B) { return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_GT); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comge_epi32(__m128i __A, __m128i __B) { return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_GE); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comeq_epi32(__m128i __A, __m128i __B) { return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_EQ); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comneq_epi32(__m128i __A, __m128i __B) { return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_NEQ); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comfalse_epi32(__m128i __A, __m128i __B) { return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_FALSE); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comtrue_epi32(__m128i __A, __m128i __B) { return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_TRUE); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comlt_epi64(__m128i __A, __m128i __B) { return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_LT); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comle_epi64(__m128i __A, __m128i __B) { return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_LE); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comgt_epi64(__m128i __A, __m128i __B) { return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_GT); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comge_epi64(__m128i __A, __m128i __B) { return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_GE); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comeq_epi64(__m128i __A, __m128i __B) { return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_EQ); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comneq_epi64(__m128i __A, __m128i __B) { return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_NEQ); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comfalse_epi64(__m128i __A, __m128i __B) { return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_FALSE); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comtrue_epi64(__m128i __A, __m128i __B) { return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_TRUE); } #define _mm_permute2_pd(X, Y, C, I) \ ((__m128d)__builtin_ia32_vpermil2pd((__v2df)(__m128d)(X), \ (__v2df)(__m128d)(Y), \ (__v2di)(__m128i)(C), (I))) #define _mm256_permute2_pd(X, Y, C, I) \ ((__m256d)__builtin_ia32_vpermil2pd256((__v4df)(__m256d)(X), \ (__v4df)(__m256d)(Y), \ (__v4di)(__m256i)(C), (I))) #define _mm_permute2_ps(X, Y, C, I) \ ((__m128)__builtin_ia32_vpermil2ps((__v4sf)(__m128)(X), (__v4sf)(__m128)(Y), \ (__v4si)(__m128i)(C), (I))) #define _mm256_permute2_ps(X, Y, C, I) \ ((__m256)__builtin_ia32_vpermil2ps256((__v8sf)(__m256)(X), \ (__v8sf)(__m256)(Y), \ (__v8si)(__m256i)(C), (I))) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_frcz_ss(__m128 __A) { return (__m128)__builtin_ia32_vfrczss((__v4sf)__A); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_frcz_sd(__m128d __A) { return (__m128d)__builtin_ia32_vfrczsd((__v2df)__A); } static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_frcz_ps(__m128 __A) { return (__m128)__builtin_ia32_vfrczps((__v4sf)__A); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_frcz_pd(__m128d __A) { return (__m128d)__builtin_ia32_vfrczpd((__v2df)__A); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_frcz_ps(__m256 __A) { return (__m256)__builtin_ia32_vfrczps256((__v8sf)__A); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_frcz_pd(__m256d __A) { return (__m256d)__builtin_ia32_vfrczpd256((__v4df)__A); } #undef __DEFAULT_FN_ATTRS #undef __DEFAULT_FN_ATTRS256 #endif /* __XOPINTRIN_H */ xsavesintrin.h// CUDA headers define __noinline__ which interferes with libstdc++'s use of // `__attribute((__noinline__))`. In order to avoid compilation error, // temporarily unset __noinline__ when we include affected libstdc++ header. #pragma push_macro("__noinline__") #undef __noinline__ #include_next "bits/shared_ptr_base.h" #pragma pop_macro("__noinline__") /*===- c_api.h - C API for the ORC runtime ------------------------*- C -*-===*\ |* *| |* Part of the LLVM Project, under the Apache License v2.0 with LLVM *| |* Exceptions. *| |* See https://llvm.org/LICENSE.txt for license information. *| |* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception *| |* *| |*===----------------------------------------------------------------------===*| |* *| |* This file defines the C API for the ORC runtime *| |* *| \*===----------------------------------------------------------------------===*/ #ifndef ORC_RT_C_API_H #define ORC_RT_C_API_H #include #include #include #include /* Helper to suppress strict prototype warnings. */ #ifdef __clang__ #define ORC_RT_C_STRICT_PROTOTYPES_BEGIN \ _Pragma("clang diagnostic push") \ _Pragma("clang diagnostic error \"-Wstrict-prototypes\"") #define ORC_RT_C_STRICT_PROTOTYPES_END _Pragma("clang diagnostic pop") #else #define ORC_RT_C_STRICT_PROTOTYPES_BEGIN #define ORC_RT_C_STRICT_PROTOTYPES_END #endif /* Helper to wrap C code for C++ */ #ifdef __cplusplus #define ORC_RT_C_EXTERN_C_BEGIN \ extern "C" { \ ORC_RT_C_STRICT_PROTOTYPES_BEGIN #define ORC_RT_C_EXTERN_C_END \ ORC_RT_C_STRICT_PROTOTYPES_END \ } #else #define ORC_RT_C_EXTERN_C_BEGIN ORC_RT_C_STRICT_PROTOTYPES_BEGIN #define ORC_RT_C_EXTERN_C_END ORC_RT_C_STRICT_PROTOTYPES_END #endif ORC_RT_C_EXTERN_C_BEGIN typedef union { char *ValuePtr; char Value[sizeof(char *)]; } orc_rt_CWrapperFunctionResultDataUnion; /** * orc_rt_CWrapperFunctionResult is a kind of C-SmallVector with an * out-of-band error state. * * If Size == 0 and Data.ValuePtr is non-zero then the value is in the * 'out-of-band error' state, and Data.ValuePtr points at a malloc-allocated, * null-terminated string error message. * * If Size <= sizeof(orc_rt_CWrapperFunctionResultData) then the value is in * the 'small' state and the content is held in the first Size bytes of * Data.Value. * * If Size > sizeof(OrtRTCWrapperFunctionResultData) then the value is in the * 'large' state and the content is held in the first Size bytes of the * memory pointed to by Data.ValuePtr. This memory must have been allocated by * malloc, and will be freed with free when this value is destroyed. */ typedef struct { orc_rt_CWrapperFunctionResultDataUnion Data; size_t Size; } orc_rt_CWrapperFunctionResult; typedef struct orc_rt_CSharedOpaqueJITProcessControl *orc_rt_SharedJITProcessControlRef; /** * Zero-initialize an orc_rt_CWrapperFunctionResult. */ static inline void orc_rt_CWrapperFunctionResultInit(orc_rt_CWrapperFunctionResult *R) { R->Size = 0; R->Data.ValuePtr = 0; } /** * Create an orc_rt_CWrapperFunctionResult with an uninitialized buffer of * size Size. The buffer is returned via the DataPtr argument. */ static inline orc_rt_CWrapperFunctionResult orc_rt_CWrapperFunctionResultAllocate(size_t Size) { orc_rt_CWrapperFunctionResult R; R.Size = Size; // If Size is 0 ValuePtr must be 0 or it is considered an out-of-band error. R.Data.ValuePtr = 0; if (Size > sizeof(R.Data.Value)) R.Data.ValuePtr = (char *)malloc(Size); return R; } /** * Create an orc_rt_WrapperFunctionResult from the given data range. */ static inline orc_rt_CWrapperFunctionResult orc_rt_CreateCWrapperFunctionResultFromRange(const char *Data, size_t Size) { orc_rt_CWrapperFunctionResult R; R.Size = Size; if (R.Size > sizeof(R.Data.Value)) { char *Tmp = (char *)malloc(Size); memcpy(Tmp, Data, Size); R.Data.ValuePtr = Tmp; } else memcpy(R.Data.Value, Data, Size); return R; } /** * Create an orc_rt_CWrapperFunctionResult by copying the given string, * including the null-terminator. * * This function copies the input string. The client is responsible for freeing * the ErrMsg arg. */ static inline orc_rt_CWrapperFunctionResult orc_rt_CreateCWrapperFunctionResultFromString(const char *Source) { return orc_rt_CreateCWrapperFunctionResultFromRange(Source, strlen(Source) + 1); } /** * Create an orc_rt_CWrapperFunctionResult representing an out-of-band * error. * * This function copies the input string. The client is responsible for freeing * the ErrMsg arg. */ static inline orc_rt_CWrapperFunctionResult orc_rt_CreateCWrapperFunctionResultFromOutOfBandError(const char *ErrMsg) { orc_rt_CWrapperFunctionResult R; R.Size = 0; char *Tmp = (char *)malloc(strlen(ErrMsg) + 1); strcpy(Tmp, ErrMsg); R.Data.ValuePtr = Tmp; return R; } /** * This should be called to destroy orc_rt_CWrapperFunctionResult values * regardless of their state. */ static inline void orc_rt_DisposeCWrapperFunctionResult(orc_rt_CWrapperFunctionResult *R) { if (R->Size > sizeof(R->Data.Value) || (R->Size == 0 && R->Data.ValuePtr)) free(R->Data.ValuePtr); } /** * Get a pointer to the data contained in the given * orc_rt_CWrapperFunctionResult. */ static inline char * orc_rt_CWrapperFunctionResultData(orc_rt_CWrapperFunctionResult *R) { assert((R->Size != 0 || R->Data.ValuePtr == NULL) && "Cannot get data for out-of-band error value"); return R->Size > sizeof(R->Data.Value) ? R->Data.ValuePtr : R->Data.Value; } /** * Safely get the size of the given orc_rt_CWrapperFunctionResult. * * Asserts that we're not trying to access the size of an error value. */ static inline size_t orc_rt_CWrapperFunctionResultSize(const orc_rt_CWrapperFunctionResult *R) { assert((R->Size != 0 || R->Data.ValuePtr == NULL) && "Cannot get size for out-of-band error value"); return R->Size; } /** * Returns 1 if this value is equivalent to a value just initialized by * orc_rt_CWrapperFunctionResultInit, 0 otherwise. */ static inline size_t orc_rt_CWrapperFunctionResultEmpty(const orc_rt_CWrapperFunctionResult *R) { return R->Size == 0 && R->Data.ValuePtr == 0; } /** * Returns a pointer to the out-of-band error string for this * orc_rt_CWrapperFunctionResult, or null if there is no error. * * The orc_rt_CWrapperFunctionResult retains ownership of the error * string, so it should be copied if the caller wishes to preserve it. */ static inline const char *orc_rt_CWrapperFunctionResultGetOutOfBandError( const orc_rt_CWrapperFunctionResult *R) { return R->Size == 0 ? R->Data.ValuePtr : 0; } ORC_RT_C_EXTERN_C_END #endif /* ORC_RT_C_API_H */ ppc_wrappers/bmi2intrin.hppc_wrappers/bmiintrin.hppc_wrappers/nmmintrin.h//===-- msan_interface.h --------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file is a part of MemorySanitizer. // // Public interface header. //===----------------------------------------------------------------------===// #ifndef MSAN_INTERFACE_H #define MSAN_INTERFACE_H #include #ifdef __cplusplus extern "C" { #endif /* Set raw origin for the memory range. */ void SANITIZER_CDECL __msan_set_origin(const volatile void *a, size_t size, uint32_t origin); /* Get raw origin for an address. */ uint32_t SANITIZER_CDECL __msan_get_origin(const volatile void *a); /* Test that this_id is a descendant of prev_id (or they are simply equal). * "descendant" here means they are part of the same chain, created with * __msan_chain_origin. */ int SANITIZER_CDECL __msan_origin_is_descendant_or_same(uint32_t this_id, uint32_t prev_id); /* Returns non-zero if tracking origins. */ int SANITIZER_CDECL __msan_get_track_origins(void); /* Returns the origin id of the latest UMR in the calling thread. */ uint32_t SANITIZER_CDECL __msan_get_umr_origin(void); /* Make memory region fully initialized (without changing its contents). */ void SANITIZER_CDECL __msan_unpoison(const volatile void *a, size_t size); /* Make a null-terminated string fully initialized (without changing its contents). */ void SANITIZER_CDECL __msan_unpoison_string(const volatile char *a); /* Make first n parameters of the next function call fully initialized. */ void SANITIZER_CDECL __msan_unpoison_param(size_t n); /* Make memory region fully uninitialized (without changing its contents). This is a legacy interface that does not update origin information. Use __msan_allocated_memory() instead. */ void SANITIZER_CDECL __msan_poison(const volatile void *a, size_t size); /* Make memory region partially uninitialized (without changing its contents). */ void SANITIZER_CDECL __msan_partial_poison(const volatile void *data, void *shadow, size_t size); /* Returns the offset of the first (at least partially) poisoned byte in the memory range, or -1 if the whole range is good. */ intptr_t SANITIZER_CDECL __msan_test_shadow(const volatile void *x, size_t size); /* Checks that memory range is fully initialized, and reports an error if it * is not. */ void SANITIZER_CDECL __msan_check_mem_is_initialized(const volatile void *x, size_t size); /* For testing: __msan_set_expect_umr(1); ... some buggy code ... __msan_set_expect_umr(0); The last line will verify that a UMR happened. */ void SANITIZER_CDECL __msan_set_expect_umr(int expect_umr); /* Change the value of keep_going flag. Non-zero value means don't terminate program execution when an error is detected. This will not affect error in modules that were compiled without the corresponding compiler flag. */ void SANITIZER_CDECL __msan_set_keep_going(int keep_going); /* Print shadow and origin for the memory range to stderr in a human-readable format. */ void SANITIZER_CDECL __msan_print_shadow(const volatile void *x, size_t size); /* Print shadow for the memory range to stderr in a minimalistic human-readable format. */ void SANITIZER_CDECL __msan_dump_shadow(const volatile void *x, size_t size); /* Returns true if running under a dynamic tool (DynamoRio-based). */ int SANITIZER_CDECL __msan_has_dynamic_component(void); /* Tell MSan about newly allocated memory (ex.: custom allocator). Memory will be marked uninitialized, with origin at the call site. */ void SANITIZER_CDECL __msan_allocated_memory(const volatile void *data, size_t size); /* Tell MSan about newly destroyed memory. Mark memory as uninitialized. */ void SANITIZER_CDECL __sanitizer_dtor_callback(const volatile void *data, size_t size); void SANITIZER_CDECL __sanitizer_dtor_callback_fields(const volatile void *data, size_t size); void SANITIZER_CDECL __sanitizer_dtor_callback_vptr(const volatile void *data); /* This function may be optionally provided by user and should return a string containing Msan runtime options. See msan_flags.h for details. */ const char *SANITIZER_CDECL __msan_default_options(void); /* Deprecated. Call __sanitizer_set_death_callback instead. */ void SANITIZER_CDECL __msan_set_death_callback(void(SANITIZER_CDECL *callback)(void)); /* Update shadow for the application copy of size bytes from src to dst. Src and dst are application addresses. This function does not copy the actual application memory, it only updates shadow and origin for such copy. Source and destination regions can overlap. */ void SANITIZER_CDECL __msan_copy_shadow(const volatile void *dst, const volatile void *src, size_t size); /* Disables uninitialized memory checks in interceptors. */ void SANITIZER_CDECL __msan_scoped_disable_interceptor_checks(void); /* Re-enables uninitialized memory checks in interceptors after a previous call to __msan_scoped_disable_interceptor_checks. */ void SANITIZER_CDECL __msan_scoped_enable_interceptor_checks(void); void SANITIZER_CDECL __msan_start_switch_fiber(const void *bottom, size_t size); void SANITIZER_CDECL __msan_finish_switch_fiber(const void **bottom_old, size_t *size_old); #ifdef __cplusplus } // extern "C" #endif #endif (fseek(handle, 0, 0)=Missing a closing quotation mark in string.Capture index out of range: allRE2: unexpected op: [:^alnum:]nop -> %dcontext does not contain text in ComputeFirstByteRegexp not destroyed.unexpected errorbad repetition operator(){}[]*+?|.^$\InheritedPau_Cin_HauPc%s: failed to read elf headerleft_htjgchar16_tdeleteanmmstd::LINUX_2.6offset < strsize_This can't happen; base64 decoder state = %d, data = "{pw not w's predecessorReleasableMutexLock::Release may only be called onceReaderTryLock succeeded AES-GCM-decrypt KAT failed because EVP_AEAD_CTX_open failed. common libcrypto routinesUSER_LIBOVERFLOWexternal/boringssl/src/crypto/bn_extra/bn_asn1.cexternal/boringssl/src/crypto/evp/p_ed25519.ckythe.proto.CompilationUnit.source_filekythe.proto.FileInfo.digestkythe.proto.IndexedCompilation.Index.revisions Expected : kythe.proto.common.NodeInfo.definitionkythe.proto.common.CorpusPath.corpuskythe/proto/storage.protokythe.proto.WriteRequest.Update.edge_kind--analyzebad_array_new_length was thrown in -fno-exceptions mode-oKYTHE_CORPUSMissing required file %s. /*===---- __clang_hip_libdevice_declares.h - HIP device library decls -------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __CLANG_HIP_LIBDEVICE_DECLARES_H__ #define __CLANG_HIP_LIBDEVICE_DECLARES_H__ #if !defined(__HIPCC_RTC__) && __has_include("hip/hip_version.h") #include "hip/hip_version.h" #endif // __has_include("hip/hip_version.h") #ifdef __cplusplus extern "C" { #endif // BEGIN FLOAT __device__ __attribute__((const)) float __ocml_acos_f32(float); __device__ __attribute__((pure)) float __ocml_acosh_f32(float); __device__ __attribute__((const)) float __ocml_asin_f32(float); __device__ __attribute__((pure)) float __ocml_asinh_f32(float); __device__ __attribute__((const)) float __ocml_atan2_f32(float, float); __device__ __attribute__((const)) float __ocml_atan_f32(float); __device__ __attribute__((pure)) float __ocml_atanh_f32(float); __device__ __attribute__((pure)) float __ocml_cbrt_f32(float); __device__ __attribute__((const)) float __ocml_ceil_f32(float); __device__ __attribute__((const)) __device__ float __ocml_copysign_f32(float, float); __device__ float __ocml_cos_f32(float); __device__ float __ocml_native_cos_f32(float); __device__ __attribute__((pure)) __device__ float __ocml_cosh_f32(float); __device__ float __ocml_cospi_f32(float); __device__ float __ocml_i0_f32(float); __device__ float __ocml_i1_f32(float); __device__ __attribute__((pure)) float __ocml_erfc_f32(float); __device__ __attribute__((pure)) float __ocml_erfcinv_f32(float); __device__ __attribute__((pure)) float __ocml_erfcx_f32(float); __device__ __attribute__((pure)) float __ocml_erf_f32(float); __device__ __attribute__((pure)) float __ocml_erfinv_f32(float); __device__ __attribute__((pure)) float __ocml_exp10_f32(float); __device__ __attribute__((pure)) float __ocml_native_exp10_f32(float); __device__ __attribute__((pure)) float __ocml_exp2_f32(float); __device__ __attribute__((pure)) float __ocml_exp_f32(float); __device__ __attribute__((pure)) float __ocml_native_exp_f32(float); __device__ __attribute__((pure)) float __ocml_expm1_f32(float); __device__ __attribute__((const)) float __ocml_fabs_f32(float); __device__ __attribute__((const)) float __ocml_fdim_f32(float, float); __device__ __attribute__((const)) float __ocml_floor_f32(float); __device__ __attribute__((const)) float __ocml_fma_f32(float, float, float); __device__ __attribute__((const)) float __ocml_fmax_f32(float, float); __device__ __attribute__((const)) float __ocml_fmin_f32(float, float); __device__ __attribute__((const)) __device__ float __ocml_fmod_f32(float, float); __device__ float __ocml_frexp_f32(float, __attribute__((address_space(5))) int *); __device__ __attribute__((const)) float __ocml_hypot_f32(float, float); __device__ __attribute__((const)) int __ocml_ilogb_f32(float); __device__ __attribute__((const)) int __ocml_isfinite_f32(float); __device__ __attribute__((const)) int __ocml_isinf_f32(float); __device__ __attribute__((const)) int __ocml_isnan_f32(float); __device__ float __ocml_j0_f32(float); __device__ float __ocml_j1_f32(float); __device__ __attribute__((const)) float __ocml_ldexp_f32(float, int); __device__ float __ocml_lgamma_f32(float); __device__ __attribute__((pure)) float __ocml_log10_f32(float); __device__ __attribute__((pure)) float __ocml_native_log10_f32(float); __device__ __attribute__((pure)) float __ocml_log1p_f32(float); __device__ __attribute__((pure)) float __ocml_log2_f32(float); __device__ __attribute__((pure)) float __ocml_native_log2_f32(float); __device__ __attribute__((const)) float __ocml_logb_f32(float); __device__ __attribute__((pure)) float __ocml_log_f32(float); __device__ __attribute__((pure)) float __ocml_native_log_f32(float); __device__ float __ocml_modf_f32(float, __attribute__((address_space(5))) float *); __device__ __attribute__((const)) float __ocml_nearbyint_f32(float); __device__ __attribute__((const)) float __ocml_nextafter_f32(float, float); __device__ __attribute__((const)) float __ocml_len3_f32(float, float, float); __device__ __attribute__((const)) float __ocml_len4_f32(float, float, float, float); __device__ __attribute__((pure)) float __ocml_ncdf_f32(float); __device__ __attribute__((pure)) float __ocml_ncdfinv_f32(float); __device__ __attribute__((pure)) float __ocml_pow_f32(float, float); __device__ __attribute__((pure)) float __ocml_pown_f32(float, int); __device__ __attribute__((pure)) float __ocml_rcbrt_f32(float); __device__ __attribute__((const)) float __ocml_remainder_f32(float, float); __device__ float __ocml_remquo_f32(float, float, __attribute__((address_space(5))) int *); __device__ __attribute__((const)) float __ocml_rhypot_f32(float, float); __device__ __attribute__((const)) float __ocml_rint_f32(float); __device__ __attribute__((const)) float __ocml_rlen3_f32(float, float, float); __device__ __attribute__((const)) float __ocml_rlen4_f32(float, float, float, float); __device__ __attribute__((const)) float __ocml_round_f32(float); __device__ __attribute__((pure)) float __ocml_rsqrt_f32(float); __device__ __attribute__((const)) float __ocml_scalb_f32(float, float); __device__ __attribute__((const)) float __ocml_scalbn_f32(float, int); __device__ __attribute__((const)) int __ocml_signbit_f32(float); __device__ float __ocml_sincos_f32(float, __attribute__((address_space(5))) float *); __device__ float __ocml_sincospi_f32(float, __attribute__((address_space(5))) float *); __device__ float __ocml_sin_f32(float); __device__ float __ocml_native_sin_f32(float); __device__ __attribute__((pure)) float __ocml_sinh_f32(float); __device__ float __ocml_sinpi_f32(float); __device__ __attribute__((const)) float __ocml_sqrt_f32(float); __device__ __attribute__((const)) float __ocml_native_sqrt_f32(float); __device__ float __ocml_tan_f32(float); __device__ __attribute__((pure)) float __ocml_tanh_f32(float); __device__ float __ocml_tgamma_f32(float); __device__ __attribute__((const)) float __ocml_trunc_f32(float); __device__ float __ocml_y0_f32(float); __device__ float __ocml_y1_f32(float); // BEGIN INTRINSICS __device__ __attribute__((const)) float __ocml_add_rte_f32(float, float); __device__ __attribute__((const)) float __ocml_add_rtn_f32(float, float); __device__ __attribute__((const)) float __ocml_add_rtp_f32(float, float); __device__ __attribute__((const)) float __ocml_add_rtz_f32(float, float); __device__ __attribute__((const)) float __ocml_sub_rte_f32(float, float); __device__ __attribute__((const)) float __ocml_sub_rtn_f32(float, float); __device__ __attribute__((const)) float __ocml_sub_rtp_f32(float, float); __device__ __attribute__((const)) float __ocml_sub_rtz_f32(float, float); __device__ __attribute__((const)) float __ocml_mul_rte_f32(float, float); __device__ __attribute__((const)) float __ocml_mul_rtn_f32(float, float); __device__ __attribute__((const)) float __ocml_mul_rtp_f32(float, float); __device__ __attribute__((const)) float __ocml_mul_rtz_f32(float, float); __device__ __attribute__((const)) float __ocml_div_rte_f32(float, float); __device__ __attribute__((const)) float __ocml_div_rtn_f32(float, float); __device__ __attribute__((const)) float __ocml_div_rtp_f32(float, float); __device__ __attribute__((const)) float __ocml_div_rtz_f32(float, float); __device__ __attribute__((const)) float __ocml_sqrt_rte_f32(float); __device__ __attribute__((const)) float __ocml_sqrt_rtn_f32(float); __device__ __attribute__((const)) float __ocml_sqrt_rtp_f32(float); __device__ __attribute__((const)) float __ocml_sqrt_rtz_f32(float); __device__ __attribute__((const)) float __ocml_fma_rte_f32(float, float, float); __device__ __attribute__((const)) float __ocml_fma_rtn_f32(float, float, float); __device__ __attribute__((const)) float __ocml_fma_rtp_f32(float, float, float); __device__ __attribute__((const)) float __ocml_fma_rtz_f32(float, float, float); // END INTRINSICS // END FLOAT // BEGIN DOUBLE __device__ __attribute__((const)) double __ocml_acos_f64(double); __device__ __attribute__((pure)) double __ocml_acosh_f64(double); __device__ __attribute__((const)) double __ocml_asin_f64(double); __device__ __attribute__((pure)) double __ocml_asinh_f64(double); __device__ __attribute__((const)) double __ocml_atan2_f64(double, double); __device__ __attribute__((const)) double __ocml_atan_f64(double); __device__ __attribute__((pure)) double __ocml_atanh_f64(double); __device__ __attribute__((pure)) double __ocml_cbrt_f64(double); __device__ __attribute__((const)) double __ocml_ceil_f64(double); __device__ __attribute__((const)) double __ocml_copysign_f64(double, double); __device__ double __ocml_cos_f64(double); __device__ __attribute__((pure)) double __ocml_cosh_f64(double); __device__ double __ocml_cospi_f64(double); __device__ double __ocml_i0_f64(double); __device__ double __ocml_i1_f64(double); __device__ __attribute__((pure)) double __ocml_erfc_f64(double); __device__ __attribute__((pure)) double __ocml_erfcinv_f64(double); __device__ __attribute__((pure)) double __ocml_erfcx_f64(double); __device__ __attribute__((pure)) double __ocml_erf_f64(double); __device__ __attribute__((pure)) double __ocml_erfinv_f64(double); __device__ __attribute__((pure)) double __ocml_exp10_f64(double); __device__ __attribute__((pure)) double __ocml_exp2_f64(double); __device__ __attribute__((pure)) double __ocml_exp_f64(double); __device__ __attribute__((pure)) double __ocml_expm1_f64(double); __device__ __attribute__((const)) double __ocml_fabs_f64(double); __device__ __attribute__((const)) double __ocml_fdim_f64(double, double); __device__ __attribute__((const)) double __ocml_floor_f64(double); __device__ __attribute__((const)) double __ocml_fma_f64(double, double, double); __device__ __attribute__((const)) double __ocml_fmax_f64(double, double); __device__ __attribute__((const)) double __ocml_fmin_f64(double, double); __device__ __attribute__((const)) double __ocml_fmod_f64(double, double); __device__ double __ocml_frexp_f64(double, __attribute__((address_space(5))) int *); __device__ __attribute__((const)) double __ocml_hypot_f64(double, double); __device__ __attribute__((const)) int __ocml_ilogb_f64(double); __device__ __attribute__((const)) int __ocml_isfinite_f64(double); __device__ __attribute__((const)) int __ocml_isinf_f64(double); __device__ __attribute__((const)) int __ocml_isnan_f64(double); __device__ double __ocml_j0_f64(double); __device__ double __ocml_j1_f64(double); __device__ __attribute__((const)) double __ocml_ldexp_f64(double, int); __device__ double __ocml_lgamma_f64(double); __device__ __attribute__((pure)) double __ocml_log10_f64(double); __device__ __attribute__((pure)) double __ocml_log1p_f64(double); __device__ __attribute__((pure)) double __ocml_log2_f64(double); __device__ __attribute__((const)) double __ocml_logb_f64(double); __device__ __attribute__((pure)) double __ocml_log_f64(double); __device__ double __ocml_modf_f64(double, __attribute__((address_space(5))) double *); __device__ __attribute__((const)) double __ocml_nearbyint_f64(double); __device__ __attribute__((const)) double __ocml_nextafter_f64(double, double); __device__ __attribute__((const)) double __ocml_len3_f64(double, double, double); __device__ __attribute__((const)) double __ocml_len4_f64(double, double, double, double); __device__ __attribute__((pure)) double __ocml_ncdf_f64(double); __device__ __attribute__((pure)) double __ocml_ncdfinv_f64(double); __device__ __attribute__((pure)) double __ocml_pow_f64(double, double); __device__ __attribute__((pure)) double __ocml_pown_f64(double, int); __device__ __attribute__((pure)) double __ocml_rcbrt_f64(double); __device__ __attribute__((const)) double __ocml_remainder_f64(double, double); __device__ double __ocml_remquo_f64(double, double, __attribute__((address_space(5))) int *); __device__ __attribute__((const)) double __ocml_rhypot_f64(double, double); __device__ __attribute__((const)) double __ocml_rint_f64(double); __device__ __attribute__((const)) double __ocml_rlen3_f64(double, double, double); __device__ __attribute__((const)) double __ocml_rlen4_f64(double, double, double, double); __device__ __attribute__((const)) double __ocml_round_f64(double); __device__ __attribute__((pure)) double __ocml_rsqrt_f64(double); __device__ __attribute__((const)) double __ocml_scalb_f64(double, double); __device__ __attribute__((const)) double __ocml_scalbn_f64(double, int); __device__ __attribute__((const)) int __ocml_signbit_f64(double); __device__ double __ocml_sincos_f64(double, __attribute__((address_space(5))) double *); __device__ double __ocml_sincospi_f64(double, __attribute__((address_space(5))) double *); __device__ double __ocml_sin_f64(double); __device__ __attribute__((pure)) double __ocml_sinh_f64(double); __device__ double __ocml_sinpi_f64(double); __device__ __attribute__((const)) double __ocml_sqrt_f64(double); __device__ double __ocml_tan_f64(double); __device__ __attribute__((pure)) double __ocml_tanh_f64(double); __device__ double __ocml_tgamma_f64(double); __device__ __attribute__((const)) double __ocml_trunc_f64(double); __device__ double __ocml_y0_f64(double); __device__ double __ocml_y1_f64(double); // BEGIN INTRINSICS __device__ __attribute__((const)) double __ocml_add_rte_f64(double, double); __device__ __attribute__((const)) double __ocml_add_rtn_f64(double, double); __device__ __attribute__((const)) double __ocml_add_rtp_f64(double, double); __device__ __attribute__((const)) double __ocml_add_rtz_f64(double, double); __device__ __attribute__((const)) double __ocml_sub_rte_f64(double, double); __device__ __attribute__((const)) double __ocml_sub_rtn_f64(double, double); __device__ __attribute__((const)) double __ocml_sub_rtp_f64(double, double); __device__ __attribute__((const)) double __ocml_sub_rtz_f64(double, double); __device__ __attribute__((const)) double __ocml_mul_rte_f64(double, double); __device__ __attribute__((const)) double __ocml_mul_rtn_f64(double, double); __device__ __attribute__((const)) double __ocml_mul_rtp_f64(double, double); __device__ __attribute__((const)) double __ocml_mul_rtz_f64(double, double); __device__ __attribute__((const)) double __ocml_div_rte_f64(double, double); __device__ __attribute__((const)) double __ocml_div_rtn_f64(double, double); __device__ __attribute__((const)) double __ocml_div_rtp_f64(double, double); __device__ __attribute__((const)) double __ocml_div_rtz_f64(double, double); __device__ __attribute__((const)) double __ocml_sqrt_rte_f64(double); __device__ __attribute__((const)) double __ocml_sqrt_rtn_f64(double); __device__ __attribute__((const)) double __ocml_sqrt_rtp_f64(double); __device__ __attribute__((const)) double __ocml_sqrt_rtz_f64(double); __device__ __attribute__((const)) double __ocml_fma_rte_f64(double, double, double); __device__ __attribute__((const)) double __ocml_fma_rtn_f64(double, double, double); __device__ __attribute__((const)) double __ocml_fma_rtp_f64(double, double, double); __device__ __attribute__((const)) double __ocml_fma_rtz_f64(double, double, double); __device__ __attribute__((const)) _Float16 __ocml_ceil_f16(_Float16); __device__ _Float16 __ocml_cos_f16(_Float16); __device__ __attribute__((const)) _Float16 __ocml_cvtrtn_f16_f32(float); __device__ __attribute__((const)) _Float16 __ocml_cvtrtp_f16_f32(float); __device__ __attribute__((const)) _Float16 __ocml_cvtrtz_f16_f32(float); __device__ __attribute__((pure)) _Float16 __ocml_exp_f16(_Float16); __device__ __attribute__((pure)) _Float16 __ocml_exp10_f16(_Float16); __device__ __attribute__((pure)) _Float16 __ocml_exp2_f16(_Float16); __device__ __attribute__((const)) _Float16 __ocml_floor_f16(_Float16); __device__ __attribute__((const)) _Float16 __ocml_fma_f16(_Float16, _Float16, _Float16); __device__ __attribute__((const)) _Float16 __ocml_fmax_f16(_Float16, _Float16); __device__ __attribute__((const)) _Float16 __ocml_fmin_f16(_Float16, _Float16); __device__ __attribute__((const)) _Float16 __ocml_fabs_f16(_Float16); __device__ __attribute__((const)) int __ocml_isinf_f16(_Float16); __device__ __attribute__((const)) int __ocml_isnan_f16(_Float16); __device__ __attribute__((pure)) _Float16 __ocml_log_f16(_Float16); __device__ __attribute__((pure)) _Float16 __ocml_log10_f16(_Float16); __device__ __attribute__((pure)) _Float16 __ocml_log2_f16(_Float16); __device__ __attribute__((const)) _Float16 __ocml_rint_f16(_Float16); __device__ __attribute__((const)) _Float16 __ocml_rsqrt_f16(_Float16); __device__ _Float16 __ocml_sin_f16(_Float16); __device__ __attribute__((const)) _Float16 __ocml_sqrt_f16(_Float16); __device__ __attribute__((const)) _Float16 __ocml_trunc_f16(_Float16); __device__ __attribute__((pure)) _Float16 __ocml_pown_f16(_Float16, int); typedef _Float16 __2f16 __attribute__((ext_vector_type(2))); typedef short __2i16 __attribute__((ext_vector_type(2))); // We need to match C99's bool and get an i1 in the IR. #ifdef __cplusplus typedef bool __ockl_bool; #else typedef _Bool __ockl_bool; #endif __device__ __attribute__((const)) float __ockl_fdot2(__2f16 a, __2f16 b, float c, __ockl_bool s); __device__ __attribute__((const)) __2f16 __ocml_ceil_2f16(__2f16); __device__ __attribute__((const)) __2f16 __ocml_fabs_2f16(__2f16); __device__ __2f16 __ocml_cos_2f16(__2f16); __device__ __attribute__((pure)) __2f16 __ocml_exp_2f16(__2f16); __device__ __attribute__((pure)) __2f16 __ocml_exp10_2f16(__2f16); __device__ __attribute__((pure)) __2f16 __ocml_exp2_2f16(__2f16); __device__ __attribute__((const)) __2f16 __ocml_floor_2f16(__2f16); __device__ __attribute__((const)) __2f16 __ocml_fma_2f16(__2f16, __2f16, __2f16); __device__ __attribute__((const)) __2i16 __ocml_isinf_2f16(__2f16); __device__ __attribute__((const)) __2i16 __ocml_isnan_2f16(__2f16); __device__ __attribute__((pure)) __2f16 __ocml_log_2f16(__2f16); __device__ __attribute__((pure)) __2f16 __ocml_log10_2f16(__2f16); __device__ __attribute__((pure)) __2f16 __ocml_log2_2f16(__2f16); #if HIP_VERSION_MAJOR * 100 + HIP_VERSION_MINOR >= 560 #define __DEPRECATED_SINCE_HIP_560(X) __attribute__((deprecated(X))) #else #define __DEPRECATED_SINCE_HIP_560(X) #endif // Deprecated, should be removed when rocm releases using it are no longer // relevant. __DEPRECATED_SINCE_HIP_560("use ((_Float16)1.0) / ") __device__ inline _Float16 __llvm_amdgcn_rcp_f16(_Float16 x) { return ((_Float16)1.0f) / x; } __DEPRECATED_SINCE_HIP_560("use ((__2f16)1.0) / ") __device__ inline __2f16 __llvm_amdgcn_rcp_2f16(__2f16 __x) { return ((__2f16)1.0f) / __x; } #undef __DEPRECATED_SINCE_HIP_560 __device__ __attribute__((const)) __2f16 __ocml_rint_2f16(__2f16); __device__ __attribute__((const)) __2f16 __ocml_rsqrt_2f16(__2f16); __device__ __2f16 __ocml_sin_2f16(__2f16); __device__ __attribute__((const)) __2f16 __ocml_sqrt_2f16(__2f16); __device__ __attribute__((const)) __2f16 __ocml_trunc_2f16(__2f16); __device__ __attribute__((const)) __2f16 __ocml_pown_2f16(__2f16, __2i16); #ifdef __cplusplus } // extern "C" #endif #endif // __CLANG_HIP_LIBDEVICE_DECLARES_H__ arm64intr.harm_neon.havx512erintrin.havxvnniint16intrin.hclflushoptintrin.hemmintrin.h/* ===-------- ia32intrin.h ---------------------------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __X86INTRIN_H #error "Never use directly; include instead." #endif #ifndef __IA32INTRIN_H #define __IA32INTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) #define __DEFAULT_FN_ATTRS_CRC32 __attribute__((__always_inline__, __nodebug__, __target__("crc32"))) #if defined(__cplusplus) && (__cplusplus >= 201103L) #define __DEFAULT_FN_ATTRS_CAST __attribute__((__always_inline__)) constexpr #define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr #else #define __DEFAULT_FN_ATTRS_CAST __attribute__((__always_inline__)) #define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS #endif /// Finds the first set bit starting from the least significant bit. The result /// is undefined if the input is 0. /// /// \headerfile /// /// This intrinsic corresponds to the \c BSF instruction or the /// \c TZCNT instruction. /// /// \param __A /// A 32-bit integer operand. /// \returns A 32-bit integer containing the bit number. /// \see _bit_scan_forward static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR __bsfd(int __A) { return __builtin_ctz((unsigned int)__A); } /// Finds the first set bit starting from the most significant bit. The result /// is undefined if the input is 0. /// /// \headerfile /// /// This intrinsic corresponds to the \c BSR instruction or the /// \c LZCNT instruction and an \c XOR. /// /// \param __A /// A 32-bit integer operand. /// \returns A 32-bit integer containing the bit number. /// \see _bit_scan_reverse static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR __bsrd(int __A) { return 31 - __builtin_clz((unsigned int)__A); } /// Swaps the bytes in the input, converting little endian to big endian or /// vice versa. /// /// \headerfile /// /// This intrinsic corresponds to the \c BSWAP instruction. /// /// \param __A /// A 32-bit integer operand. /// \returns A 32-bit integer containing the swapped bytes. static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR __bswapd(int __A) { return (int)__builtin_bswap32((unsigned int)__A); } /// Swaps the bytes in the input, converting little endian to big endian or /// vice versa. /// /// \headerfile /// /// This intrinsic corresponds to the \c BSWAP instruction. /// /// \param __A /// A 32-bit integer operand. /// \returns A 32-bit integer containing the swapped bytes. static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _bswap(int __A) { return (int)__builtin_bswap32((unsigned int)__A); } /// Finds the first set bit starting from the least significant bit. The result /// is undefined if the input is 0. /// /// \headerfile /// /// \code /// int _bit_scan_forward(int A); /// \endcode /// /// This intrinsic corresponds to the \c BSF instruction or the /// \c TZCNT instruction. /// /// \param A /// A 32-bit integer operand. /// \returns A 32-bit integer containing the bit number. /// \see __bsfd #define _bit_scan_forward(A) __bsfd((A)) /// Finds the first set bit starting from the most significant bit. The result /// is undefined if the input is 0. /// /// \headerfile /// /// \code /// int _bit_scan_reverse(int A); /// \endcode /// /// This intrinsic corresponds to the \c BSR instruction or the /// \c LZCNT instruction and an \c XOR. /// /// \param A /// A 32-bit integer operand. /// \returns A 32-bit integer containing the bit number. /// \see __bsrd #define _bit_scan_reverse(A) __bsrd((A)) #ifdef __x86_64__ /// Finds the first set bit starting from the least significant bit. The result /// is undefined if the input is 0. /// /// \headerfile /// /// This intrinsic corresponds to the \c BSF instruction or the /// \c TZCNT instruction. /// /// \param __A /// A 64-bit integer operand. /// \returns A 32-bit integer containing the bit number. static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR __bsfq(long long __A) { return (long long)__builtin_ctzll((unsigned long long)__A); } /// Finds the first set bit starting from the most significant bit. The result /// is undefined if input is 0. /// /// \headerfile /// /// This intrinsic corresponds to the \c BSR instruction or the /// \c LZCNT instruction and an \c XOR. /// /// \param __A /// A 64-bit integer operand. /// \returns A 32-bit integer containing the bit number. static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR __bsrq(long long __A) { return 63 - __builtin_clzll((unsigned long long)__A); } /// Swaps the bytes in the input, converting little endian to big endian or /// vice versa. /// /// \headerfile /// /// This intrinsic corresponds to the \c BSWAP instruction. /// /// \param __A /// A 64-bit integer operand. /// \returns A 64-bit integer containing the swapped bytes. /// \see _bswap64 static __inline__ long long __DEFAULT_FN_ATTRS_CONSTEXPR __bswapq(long long __A) { return (long long)__builtin_bswap64((unsigned long long)__A); } /// Swaps the bytes in the input, converting little endian to big endian or /// vice versa. /// /// \headerfile /// /// \code /// long long _bswap64(long long A); /// \endcode /// /// This intrinsic corresponds to the \c BSWAP instruction. /// /// \param A /// A 64-bit integer operand. /// \returns A 64-bit integer containing the swapped bytes. /// \see __bswapq #define _bswap64(A) __bswapq((A)) #endif /* __x86_64__ */ /// Counts the number of bits in the source operand having a value of 1. /// /// \headerfile /// /// This intrinsic corresponds to the \c POPCNT instruction or a /// sequence of arithmetic and logic operations to calculate it. /// /// \param __A /// An unsigned 32-bit integer operand. /// \returns A 32-bit integer containing the number of bits with value 1 in the /// source operand. /// \see _popcnt32 static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR __popcntd(unsigned int __A) { return __builtin_popcount(__A); } /// Counts the number of bits in the source operand having a value of 1. /// /// \headerfile /// /// \code /// int _popcnt32(int A); /// \endcode /// /// This intrinsic corresponds to the \c POPCNT instruction or a /// sequence of arithmetic and logic operations to calculate it. /// /// \param A /// An unsigned 32-bit integer operand. /// \returns A 32-bit integer containing the number of bits with value 1 in the /// source operand. /// \see __popcntd #define _popcnt32(A) __popcntd((A)) #ifdef __x86_64__ /// Counts the number of bits in the source operand having a value of 1. /// /// \headerfile /// /// This intrinsic corresponds to the \c POPCNT instruction or a /// sequence of arithmetic and logic operations to calculate it. /// /// \param __A /// An unsigned 64-bit integer operand. /// \returns A 64-bit integer containing the number of bits with value 1 in the /// source operand. /// \see _popcnt64 static __inline__ long long __DEFAULT_FN_ATTRS_CONSTEXPR __popcntq(unsigned long long __A) { return __builtin_popcountll(__A); } /// Counts the number of bits in the source operand having a value of 1. /// /// \headerfile /// /// \code /// long long _popcnt64(unsigned long long A); /// \endcode /// /// This intrinsic corresponds to the \c POPCNT instruction or a /// sequence of arithmetic and logic operations to calculate it. /// /// \param A /// An unsigned 64-bit integer operand. /// \returns A 64-bit integer containing the number of bits with value 1 in the /// source operand. /// \see __popcntq #define _popcnt64(A) __popcntq((A)) #endif /* __x86_64__ */ #ifdef __x86_64__ /// Returns the program status-and-control \c RFLAGS register with the \c VM /// and \c RF flags cleared. /// /// \headerfile /// /// This intrinsic corresponds to the \c PUSHFQ + \c POP instruction sequence. /// /// \returns The 64-bit value of the RFLAGS register. static __inline__ unsigned long long __DEFAULT_FN_ATTRS __readeflags(void) { return __builtin_ia32_readeflags_u64(); } /// Writes the specified value to the program status-and-control \c RFLAGS /// register. Reserved bits are not affected. /// /// \headerfile /// /// This intrinsic corresponds to the \c PUSH + \c POPFQ instruction sequence. /// /// \param __f /// The 64-bit value to write to \c RFLAGS. static __inline__ void __DEFAULT_FN_ATTRS __writeeflags(unsigned long long __f) { __builtin_ia32_writeeflags_u64(__f); } #else /* !__x86_64__ */ /// Returns the program status-and-control \c EFLAGS register with the \c VM /// and \c RF flags cleared. /// /// \headerfile /// /// This intrinsic corresponds to the \c PUSHFD + \c POP instruction sequence. /// /// \returns The 32-bit value of the EFLAGS register. static __inline__ unsigned int __DEFAULT_FN_ATTRS __readeflags(void) { return __builtin_ia32_readeflags_u32(); } /// Writes the specified value to the program status-and-control \c EFLAGS /// register. Reserved bits are not affected. /// /// \headerfile /// /// This intrinsic corresponds to the \c PUSH + \c POPFD instruction sequence. /// /// \param __f /// The 32-bit value to write to \c EFLAGS. static __inline__ void __DEFAULT_FN_ATTRS __writeeflags(unsigned int __f) { __builtin_ia32_writeeflags_u32(__f); } #endif /* !__x86_64__ */ /// Casts a 32-bit float value to a 32-bit unsigned integer value. /// /// \headerfile /// /// This intrinsic corresponds to the \c VMOVD / \c MOVD instruction in x86_64, /// and corresponds to the \c VMOVL / \c MOVL instruction in ia32. /// /// \param __A /// A 32-bit float value. /// \returns A 32-bit unsigned integer containing the converted value. static __inline__ unsigned int __DEFAULT_FN_ATTRS_CAST _castf32_u32(float __A) { return __builtin_bit_cast(unsigned int, __A); } /// Casts a 64-bit float value to a 64-bit unsigned integer value. /// /// \headerfile /// /// This intrinsic corresponds to the \c VMOVQ / \c MOVQ instruction in x86_64, /// and corresponds to the \c VMOVL / \c MOVL instruction in ia32. /// /// \param __A /// A 64-bit float value. /// \returns A 64-bit unsigned integer containing the converted value. static __inline__ unsigned long long __DEFAULT_FN_ATTRS_CAST _castf64_u64(double __A) { return __builtin_bit_cast(unsigned long long, __A); } /// Casts a 32-bit unsigned integer value to a 32-bit float value. /// /// \headerfile /// /// This intrinsic corresponds to the \c VMOVQ / \c MOVQ instruction in x86_64, /// and corresponds to the \c FLDS instruction in ia32. /// /// \param __A /// A 32-bit unsigned integer value. /// \returns A 32-bit float value containing the converted value. static __inline__ float __DEFAULT_FN_ATTRS_CAST _castu32_f32(unsigned int __A) { return __builtin_bit_cast(float, __A); } /// Casts a 64-bit unsigned integer value to a 64-bit float value. /// /// \headerfile /// /// This intrinsic corresponds to the \c VMOVQ / \c MOVQ instruction in x86_64, /// and corresponds to the \c FLDL instruction in ia32. /// /// \param __A /// A 64-bit unsigned integer value. /// \returns A 64-bit float value containing the converted value. static __inline__ double __DEFAULT_FN_ATTRS_CAST _castu64_f64(unsigned long long __A) { return __builtin_bit_cast(double, __A); } /// Adds the unsigned integer operand to the CRC-32C checksum of the /// unsigned char operand. /// /// \headerfile /// /// This intrinsic corresponds to the \c CRC32B instruction. /// /// \param __C /// An unsigned integer operand to add to the CRC-32C checksum of operand /// \a __D. /// \param __D /// An unsigned 8-bit integer operand used to compute the CRC-32C checksum. /// \returns The result of adding operand \a __C to the CRC-32C checksum of /// operand \a __D. static __inline__ unsigned int __DEFAULT_FN_ATTRS_CRC32 __crc32b(unsigned int __C, unsigned char __D) { return __builtin_ia32_crc32qi(__C, __D); } /// Adds the unsigned integer operand to the CRC-32C checksum of the /// unsigned short operand. /// /// \headerfile /// /// This intrinsic corresponds to the \c CRC32W instruction. /// /// \param __C /// An unsigned integer operand to add to the CRC-32C checksum of operand /// \a __D. /// \param __D /// An unsigned 16-bit integer operand used to compute the CRC-32C checksum. /// \returns The result of adding operand \a __C to the CRC-32C checksum of /// operand \a __D. static __inline__ unsigned int __DEFAULT_FN_ATTRS_CRC32 __crc32w(unsigned int __C, unsigned short __D) { return __builtin_ia32_crc32hi(__C, __D); } /// Adds the unsigned integer operand to the CRC-32C checksum of the /// second unsigned integer operand. /// /// \headerfile /// /// This intrinsic corresponds to the \c CRC32D instruction. /// /// \param __C /// An unsigned integer operand to add to the CRC-32C checksum of operand /// \a __D. /// \param __D /// An unsigned 32-bit integer operand used to compute the CRC-32C checksum. /// \returns The result of adding operand \a __C to the CRC-32C checksum of /// operand \a __D. static __inline__ unsigned int __DEFAULT_FN_ATTRS_CRC32 __crc32d(unsigned int __C, unsigned int __D) { return __builtin_ia32_crc32si(__C, __D); } #ifdef __x86_64__ /// Adds the unsigned integer operand to the CRC-32C checksum of the /// unsigned 64-bit integer operand. /// /// \headerfile /// /// This intrinsic corresponds to the \c CRC32Q instruction. /// /// \param __C /// An unsigned integer operand to add to the CRC-32C checksum of operand /// \a __D. /// \param __D /// An unsigned 64-bit integer operand used to compute the CRC-32C checksum. /// \returns The result of adding operand \a __C to the CRC-32C checksum of /// operand \a __D. static __inline__ unsigned long long __DEFAULT_FN_ATTRS_CRC32 __crc32q(unsigned long long __C, unsigned long long __D) { return __builtin_ia32_crc32di(__C, __D); } #endif /* __x86_64__ */ /// Reads the specified performance-monitoring counter. Refer to your /// processor's documentation to determine which performance counters are /// supported. /// /// \headerfile /// /// This intrinsic corresponds to the \c RDPMC instruction. /// /// \param __A /// The performance counter to read. /// \returns The 64-bit value read from the performance counter. /// \see _rdpmc static __inline__ unsigned long long __DEFAULT_FN_ATTRS __rdpmc(int __A) { return __builtin_ia32_rdpmc(__A); } /// Reads the processor's time-stamp counter and the \c IA32_TSC_AUX MSR /// \c (0xc0000103). /// /// \headerfile /// /// This intrinsic corresponds to the \c RDTSCP instruction. /// /// \param __A /// The address of where to store the 32-bit \c IA32_TSC_AUX value. /// \returns The 64-bit value of the time-stamp counter. static __inline__ unsigned long long __DEFAULT_FN_ATTRS __rdtscp(unsigned int *__A) { return __builtin_ia32_rdtscp(__A); } /// Reads the processor's time-stamp counter. /// /// \headerfile /// /// \code /// unsigned long long _rdtsc(); /// \endcode /// /// This intrinsic corresponds to the \c RDTSC instruction. /// /// \returns The 64-bit value of the time-stamp counter. #define _rdtsc() __rdtsc() /// Reads the specified performance monitoring counter. Refer to your /// processor's documentation to determine which performance counters are /// supported. /// /// \headerfile /// /// \code /// unsigned long long _rdpmc(int A); /// \endcode /// /// This intrinsic corresponds to the \c RDPMC instruction. /// /// \param A /// The performance counter to read. /// \returns The 64-bit value read from the performance counter. /// \see __rdpmc #define _rdpmc(A) __rdpmc(A) static __inline__ void __DEFAULT_FN_ATTRS _wbinvd(void) { __builtin_ia32_wbinvd(); } /// Rotates an 8-bit value to the left by the specified number of bits. /// This operation is undefined if the number of bits exceeds the size of /// the value. /// /// \headerfile /// /// This intrinsic corresponds to the \c ROL instruction. /// /// \param __X /// The unsigned 8-bit value to be rotated. /// \param __C /// The number of bits to rotate the value. /// \returns The rotated value. static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR __rolb(unsigned char __X, int __C) { return __builtin_rotateleft8(__X, __C); } /// Rotates an 8-bit value to the right by the specified number of bits. /// This operation is undefined if the number of bits exceeds the size of /// the value. /// /// \headerfile /// /// This intrinsic corresponds to the \c ROR instruction. /// /// \param __X /// The unsigned 8-bit value to be rotated. /// \param __C /// The number of bits to rotate the value. /// \returns The rotated value. static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR __rorb(unsigned char __X, int __C) { return __builtin_rotateright8(__X, __C); } /// Rotates a 16-bit value to the left by the specified number of bits. /// This operation is undefined if the number of bits exceeds the size of /// the value. /// /// \headerfile /// /// This intrinsic corresponds to the \c ROL instruction. /// /// \param __X /// The unsigned 16-bit value to be rotated. /// \param __C /// The number of bits to rotate the value. /// \returns The rotated value. /// \see _rotwl static __inline__ unsigned short __DEFAULT_FN_ATTRS_CONSTEXPR __rolw(unsigned short __X, int __C) { return __builtin_rotateleft16(__X, __C); } /// Rotates a 16-bit value to the right by the specified number of bits. /// This operation is undefined if the number of bits exceeds the size of /// the value. /// /// \headerfile /// /// This intrinsic corresponds to the \c ROR instruction. /// /// \param __X /// The unsigned 16-bit value to be rotated. /// \param __C /// The number of bits to rotate the value. /// \returns The rotated value. /// \see _rotwr static __inline__ unsigned short __DEFAULT_FN_ATTRS_CONSTEXPR __rorw(unsigned short __X, int __C) { return __builtin_rotateright16(__X, __C); } /// Rotates a 32-bit value to the left by the specified number of bits. /// This operation is undefined if the number of bits exceeds the size of /// the value. /// /// \headerfile /// /// This intrinsic corresponds to the \c ROL instruction. /// /// \param __X /// The unsigned 32-bit value to be rotated. /// \param __C /// The number of bits to rotate the value. /// \returns The rotated value. /// \see _rotl static __inline__ unsigned int __DEFAULT_FN_ATTRS_CONSTEXPR __rold(unsigned int __X, int __C) { return __builtin_rotateleft32(__X, (unsigned int)__C); } /// Rotates a 32-bit value to the right by the specified number of bits. /// This operation is undefined if the number of bits exceeds the size of /// the value. /// /// \headerfile /// /// This intrinsic corresponds to the \c ROR instruction. /// /// \param __X /// The unsigned 32-bit value to be rotated. /// \param __C /// The number of bits to rotate the value. /// \returns The rotated value. /// \see _rotr static __inline__ unsigned int __DEFAULT_FN_ATTRS_CONSTEXPR __rord(unsigned int __X, int __C) { return __builtin_rotateright32(__X, (unsigned int)__C); } #ifdef __x86_64__ /// Rotates a 64-bit value to the left by the specified number of bits. /// This operation is undefined if the number of bits exceeds the size of /// the value. /// /// \headerfile /// /// This intrinsic corresponds to the \c ROL instruction. /// /// \param __X /// The unsigned 64-bit value to be rotated. /// \param __C /// The number of bits to rotate the value. /// \returns The rotated value. static __inline__ unsigned long long __DEFAULT_FN_ATTRS_CONSTEXPR __rolq(unsigned long long __X, int __C) { return __builtin_rotateleft64(__X, (unsigned long long)__C); } /// Rotates a 64-bit value to the right by the specified number of bits. /// This operation is undefined if the number of bits exceeds the size of /// the value. /// /// \headerfile /// /// This intrinsic corresponds to the \c ROR instruction. /// /// \param __X /// The unsigned 64-bit value to be rotated. /// \param __C /// The number of bits to rotate the value. /// \returns The rotated value. static __inline__ unsigned long long __DEFAULT_FN_ATTRS_CONSTEXPR __rorq(unsigned long long __X, int __C) { return __builtin_rotateright64(__X, (unsigned long long)__C); } #endif /* __x86_64__ */ #ifndef _MSC_VER /* These are already provided as builtins for MSVC. */ /* Select the correct function based on the size of long. */ #ifdef __LP64__ /// Rotates a 64-bit value to the left by the specified number of bits. /// This operation is undefined if the number of bits exceeds the size of /// the value. /// /// \headerfile /// /// \code /// unsigned long long _lrotl(unsigned long long a, int b); /// \endcode /// /// This intrinsic corresponds to the \c ROL instruction. /// /// \param a /// The unsigned 64-bit value to be rotated. /// \param b /// The number of bits to rotate the value. /// \returns The rotated value. /// \see __rolq #define _lrotl(a,b) __rolq((a), (b)) /// Rotates a 64-bit value to the right by the specified number of bits. /// This operation is undefined if the number of bits exceeds the size of /// the value. /// /// \headerfile /// /// \code /// unsigned long long _lrotr(unsigned long long a, int b); /// \endcode /// /// This intrinsic corresponds to the \c ROR instruction. /// /// \param a /// The unsigned 64-bit value to be rotated. /// \param b /// The number of bits to rotate the value. /// \returns The rotated value. /// \see __rorq #define _lrotr(a,b) __rorq((a), (b)) #else // __LP64__ /// Rotates a 32-bit value to the left by the specified number of bits. /// This operation is undefined if the number of bits exceeds the size of /// the value. /// /// \headerfile /// /// \code /// unsigned int _lrotl(unsigned int a, int b); /// \endcode /// /// This intrinsic corresponds to the \c ROL instruction. /// /// \param a /// The unsigned 32-bit value to be rotated. /// \param b /// The number of bits to rotate the value. /// \returns The rotated value. /// \see __rold #define _lrotl(a,b) __rold((a), (b)) /// Rotates a 32-bit value to the right by the specified number of bits. /// This operation is undefined if the number of bits exceeds the size of /// the value. /// /// \headerfile /// /// \code /// unsigned int _lrotr(unsigned int a, int b); /// \endcode /// /// This intrinsic corresponds to the \c ROR instruction. /// /// \param a /// The unsigned 32-bit value to be rotated. /// \param b /// The number of bits to rotate the value. /// \returns The rotated value. /// \see __rord #define _lrotr(a,b) __rord((a), (b)) #endif // __LP64__ /// Rotates a 32-bit value to the left by the specified number of bits. /// This operation is undefined if the number of bits exceeds the size of /// the value. /// /// \headerfile /// /// \code /// unsigned int _rotl(unsigned int a, int b); /// \endcode /// /// This intrinsic corresponds to the \c ROL instruction. /// /// \param a /// The unsigned 32-bit value to be rotated. /// \param b /// The number of bits to rotate the value. /// \returns The rotated value. /// \see __rold #define _rotl(a,b) __rold((a), (b)) /// Rotates a 32-bit value to the right by the specified number of bits. /// This operation is undefined if the number of bits exceeds the size of /// the value. /// /// \headerfile /// /// \code /// unsigned int _rotr(unsigned int a, int b); /// \endcode /// /// This intrinsic corresponds to the \c ROR instruction. /// /// \param a /// The unsigned 32-bit value to be rotated. /// \param b /// The number of bits to rotate the value. /// \returns The rotated value. /// \see __rord #define _rotr(a,b) __rord((a), (b)) #endif // _MSC_VER /* These are not builtins so need to be provided in all modes. */ /// Rotates a 16-bit value to the left by the specified number of bits. /// This operation is undefined if the number of bits exceeds the size of /// the value. /// /// \headerfile /// /// \code /// unsigned short _rotwl(unsigned short a, int b); /// \endcode /// /// This intrinsic corresponds to the \c ROL instruction. /// /// \param a /// The unsigned 16-bit value to be rotated. /// \param b /// The number of bits to rotate the value. /// \returns The rotated value. /// \see __rolw #define _rotwl(a,b) __rolw((a), (b)) /// Rotates a 16-bit value to the right by the specified number of bits. /// This operation is undefined if the number of bits exceeds the size of /// the value. /// /// \headerfile /// /// \code /// unsigned short _rotwr(unsigned short a, int b); /// \endcode /// /// This intrinsic corresponds to the \c ROR instruction. /// /// \param a /// The unsigned 16-bit value to be rotated. /// \param b /// The number of bits to rotate the value. /// \returns The rotated value. /// \see __rorw #define _rotwr(a,b) __rorw((a), (b)) #undef __DEFAULT_FN_ATTRS #undef __DEFAULT_FN_ATTRS_CAST #undef __DEFAULT_FN_ATTRS_CRC32 #undef __DEFAULT_FN_ATTRS_CONSTEXPR #endif /* __IA32INTRIN_H */ intrin.hstdalign.h/* ===-------- vadefs.h ---------------------------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ /* Only include this if we are aiming for MSVC compatibility. */ #ifndef _MSC_VER #include_next #else #ifndef __clang_vadefs_h #define __clang_vadefs_h #include_next /* Override macros from vadefs.h with definitions that work with Clang. */ #ifdef _crt_va_start #undef _crt_va_start #define _crt_va_start(ap, param) __builtin_va_start(ap, param) #endif #ifdef _crt_va_end #undef _crt_va_end #define _crt_va_end(ap) __builtin_va_end(ap) #endif #ifdef _crt_va_arg #undef _crt_va_arg #define _crt_va_arg(ap, type) __builtin_va_arg(ap, type) #endif /* VS 2015 switched to double underscore names, which is an improvement, but now * we have to intercept those names too. */ #ifdef __crt_va_start #undef __crt_va_start #define __crt_va_start(ap, param) __builtin_va_start(ap, param) #endif #ifdef __crt_va_end #undef __crt_va_end #define __crt_va_end(ap) __builtin_va_end(ap) #endif #ifdef __crt_va_arg #undef __crt_va_arg #define __crt_va_arg(ap, type) __builtin_va_arg(ap, type) #endif #endif #endif /*===------------ vpclmulqdqintrin.h - VPCLMULQDQ intrinsics ---------------=== * * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __VPCLMULQDQINTRIN_H #define __VPCLMULQDQINTRIN_H #define _mm256_clmulepi64_epi128(A, B, I) \ ((__m256i)__builtin_ia32_pclmulqdq256((__v4di)(__m256i)(A), \ (__v4di)(__m256i)(B), \ (char)(I))) #ifdef __AVX512FINTRIN_H #define _mm512_clmulepi64_epi128(A, B, I) \ ((__m512i)__builtin_ia32_pclmulqdq512((__v8di)(__m512i)(A), \ (__v8di)(__m512i)(B), \ (char)(I))) #endif // __AVX512FINTRIN_H #endif /* __VPCLMULQDQINTRIN_H */ /*===-------------- wbnoinvdintrin.h - wbnoinvd intrinsic-------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #if !defined __X86INTRIN_H && !defined __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __WBNOINVDINTRIN_H #define __WBNOINVDINTRIN_H static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("wbnoinvd"))) _wbnoinvd (void) { __builtin_ia32_wbnoinvd (); } #endif /* __WBNOINVDINTRIN_H */ xmmintrin.h/*===---- xmmintrin.h - Implementation of SSE intrinsics on PowerPC --------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ /* Implemented from the specification included in the Intel C++ Compiler User Guide and Reference, version 9.0. */ #ifndef NO_WARN_X86_INTRINSICS /* This header file is to help porting code using Intel intrinsics explicitly from x86_64 to powerpc64/powerpc64le. Since X86 SSE intrinsics mainly handles __m128 type, PowerPC VMX/VSX ISA is a good match for vector float SIMD operations. However scalar float operations in vector (XMM) registers require the POWER8 VSX ISA (2.07) level. There are differences for data format and placement of float scalars in the vector register, which require extra steps to match SSE scalar float semantics on POWER. It should be noted that there's much difference between X86_64's MXSCR and PowerISA's FPSCR/VSCR registers. It's recommended to use portable instead of access MXSCR directly. Most SSE scalar float intrinsic operations can be performed more efficiently as C language float scalar operations or optimized to use vector SIMD operations. We recommend this for new applications. */ #error \ "Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this error." #endif #ifndef XMMINTRIN_H_ #define XMMINTRIN_H_ #if defined(__powerpc64__) && \ (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) /* Define four value permute mask */ #define _MM_SHUFFLE(w, x, y, z) (((w) << 6) | ((x) << 4) | ((y) << 2) | (z)) #include /* Avoid collisions between altivec.h and strict adherence to C++ and C11 standards. This should eventually be done inside altivec.h itself, but only after testing a full distro build. */ #if defined(__STRICT_ANSI__) && \ (defined(__cplusplus) || \ (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L)) #undef vector #undef pixel #undef bool #endif /* We need type definitions from the MMX header file. */ #include /* Get _mm_malloc () and _mm_free (). */ #if __STDC_HOSTED__ #include #endif /* The Intel API is flexible enough that we must allow aliasing with other vector types, and their scalar components. */ typedef vector float __m128 __attribute__((__may_alias__)); /* Unaligned version of the same type. */ typedef vector float __m128_u __attribute__((__may_alias__, __aligned__(1))); /* Internal data types for implementing the intrinsics. */ typedef vector float __v4sf; /* Create an undefined vector. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_undefined_ps(void) { __m128 __Y = __Y; return __Y; } /* Create a vector of zeros. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_setzero_ps(void) { return __extension__(__m128){0.0f, 0.0f, 0.0f, 0.0f}; } /* Load four SPFP values from P. The address must be 16-byte aligned. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_load_ps(float const *__P) { return ((__m128)vec_ld(0, (__v4sf *)__P)); } /* Load four SPFP values from P. The address need not be 16-byte aligned. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_loadu_ps(float const *__P) { return (vec_vsx_ld(0, __P)); } /* Load four SPFP values in reverse order. The address must be aligned. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_loadr_ps(float const *__P) { __v4sf __tmp; __m128 __result; static const __vector unsigned char __permute_vector = { 0x1C, 0x1D, 0x1E, 0x1F, 0x18, 0x19, 0x1A, 0x1B, 0x14, 0x15, 0x16, 0x17, 0x10, 0x11, 0x12, 0x13}; __tmp = vec_ld(0, (__v4sf *)__P); __result = (__m128)vec_perm(__tmp, __tmp, __permute_vector); return __result; } /* Create a vector with all four elements equal to F. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_set1_ps(float __F) { return __extension__(__m128)(__v4sf){__F, __F, __F, __F}; } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_set_ps1(float __F) { return _mm_set1_ps(__F); } /* Create the vector [Z Y X W]. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_set_ps(const float __Z, const float __Y, const float __X, const float __W) { return __extension__(__m128)(__v4sf){__W, __X, __Y, __Z}; } /* Create the vector [W X Y Z]. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_setr_ps(float __Z, float __Y, float __X, float __W) { return __extension__(__m128)(__v4sf){__Z, __Y, __X, __W}; } /* Store four SPFP values. The address must be 16-byte aligned. */ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_store_ps(float *__P, __m128 __A) { vec_st((__v4sf)__A, 0, (__v4sf *)__P); } /* Store four SPFP values. The address need not be 16-byte aligned. */ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_storeu_ps(float *__P, __m128 __A) { *(__m128_u *)__P = __A; } /* Store four SPFP values in reverse order. The address must be aligned. */ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_storer_ps(float *__P, __m128 __A) { __v4sf __tmp; static const __vector unsigned char __permute_vector = { 0x1C, 0x1D, 0x1E, 0x1F, 0x18, 0x19, 0x1A, 0x1B, 0x14, 0x15, 0x16, 0x17, 0x10, 0x11, 0x12, 0x13}; __tmp = (__m128)vec_perm(__A, __A, __permute_vector); _mm_store_ps(__P, __tmp); } /* Store the lower SPFP value across four words. */ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_store1_ps(float *__P, __m128 __A) { __v4sf __va = vec_splat((__v4sf)__A, 0); _mm_store_ps(__P, __va); } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_store_ps1(float *__P, __m128 __A) { _mm_store1_ps(__P, __A); } /* Create a vector with element 0 as F and the rest zero. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_set_ss(float __F) { return __extension__(__m128)(__v4sf){__F, 0.0f, 0.0f, 0.0f}; } /* Sets the low SPFP value of A from the low value of B. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_move_ss(__m128 __A, __m128 __B) { static const __vector unsigned int __mask = {0xffffffff, 0, 0, 0}; return (vec_sel((__v4sf)__A, (__v4sf)__B, __mask)); } /* Create a vector with element 0 as *P and the rest zero. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_load_ss(float const *__P) { return _mm_set_ss(*__P); } /* Stores the lower SPFP value. */ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_store_ss(float *__P, __m128 __A) { *__P = ((__v4sf)__A)[0]; } /* Perform the respective operation on the lower SPFP (single-precision floating-point) values of A and B; the upper three SPFP values are passed through from A. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_add_ss(__m128 __A, __m128 __B) { #ifdef _ARCH_PWR7 __m128 __a, __b, __c; static const __vector unsigned int __mask = {0xffffffff, 0, 0, 0}; /* PowerISA VSX does not allow partial (for just lower double) results. So to insure we don't generate spurious exceptions (from the upper double values) we splat the lower double before we to the operation. */ __a = vec_splat(__A, 0); __b = vec_splat(__B, 0); __c = __a + __b; /* Then we merge the lower float result with the original upper float elements from __A. */ return (vec_sel(__A, __c, __mask)); #else __A[0] = __A[0] + __B[0]; return (__A); #endif } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sub_ss(__m128 __A, __m128 __B) { #ifdef _ARCH_PWR7 __m128 __a, __b, __c; static const __vector unsigned int __mask = {0xffffffff, 0, 0, 0}; /* PowerISA VSX does not allow partial (for just lower double) results. So to insure we don't generate spurious exceptions (from the upper double values) we splat the lower double before we to the operation. */ __a = vec_splat(__A, 0); __b = vec_splat(__B, 0); __c = __a - __b; /* Then we merge the lower float result with the original upper float elements from __A. */ return (vec_sel(__A, __c, __mask)); #else __A[0] = __A[0] - __B[0]; return (__A); #endif } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_mul_ss(__m128 __A, __m128 __B) { #ifdef _ARCH_PWR7 __m128 __a, __b, __c; static const __vector unsigned int __mask = {0xffffffff, 0, 0, 0}; /* PowerISA VSX does not allow partial (for just lower double) results. So to insure we don't generate spurious exceptions (from the upper double values) we splat the lower double before we to the operation. */ __a = vec_splat(__A, 0); __b = vec_splat(__B, 0); __c = __a * __b; /* Then we merge the lower float result with the original upper float elements from __A. */ return (vec_sel(__A, __c, __mask)); #else __A[0] = __A[0] * __B[0]; return (__A); #endif } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_div_ss(__m128 __A, __m128 __B) { #ifdef _ARCH_PWR7 __m128 __a, __b, __c; static const __vector unsigned int __mask = {0xffffffff, 0, 0, 0}; /* PowerISA VSX does not allow partial (for just lower double) results. So to insure we don't generate spurious exceptions (from the upper double values) we splat the lower double before we to the operation. */ __a = vec_splat(__A, 0); __b = vec_splat(__B, 0); __c = __a / __b; /* Then we merge the lower float result with the original upper float elements from __A. */ return (vec_sel(__A, __c, __mask)); #else __A[0] = __A[0] / __B[0]; return (__A); #endif } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sqrt_ss(__m128 __A) { __m128 __a, __c; static const __vector unsigned int __mask = {0xffffffff, 0, 0, 0}; /* PowerISA VSX does not allow partial (for just lower double) * results. So to insure we don't generate spurious exceptions * (from the upper double values) we splat the lower double * before we to the operation. */ __a = vec_splat(__A, 0); __c = vec_sqrt(__a); /* Then we merge the lower float result with the original upper * float elements from __A. */ return (vec_sel(__A, __c, __mask)); } /* Perform the respective operation on the four SPFP values in A and B. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_add_ps(__m128 __A, __m128 __B) { return (__m128)((__v4sf)__A + (__v4sf)__B); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sub_ps(__m128 __A, __m128 __B) { return (__m128)((__v4sf)__A - (__v4sf)__B); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_mul_ps(__m128 __A, __m128 __B) { return (__m128)((__v4sf)__A * (__v4sf)__B); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_div_ps(__m128 __A, __m128 __B) { return (__m128)((__v4sf)__A / (__v4sf)__B); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sqrt_ps(__m128 __A) { return (vec_sqrt((__v4sf)__A)); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_rcp_ps(__m128 __A) { return (vec_re((__v4sf)__A)); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_rsqrt_ps(__m128 __A) { return (vec_rsqrte(__A)); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_rcp_ss(__m128 __A) { __m128 __a, __c; static const __vector unsigned int __mask = {0xffffffff, 0, 0, 0}; /* PowerISA VSX does not allow partial (for just lower double) * results. So to insure we don't generate spurious exceptions * (from the upper double values) we splat the lower double * before we to the operation. */ __a = vec_splat(__A, 0); __c = _mm_rcp_ps(__a); /* Then we merge the lower float result with the original upper * float elements from __A. */ return (vec_sel(__A, __c, __mask)); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_rsqrt_ss(__m128 __A) { __m128 __a, __c; static const __vector unsigned int __mask = {0xffffffff, 0, 0, 0}; /* PowerISA VSX does not allow partial (for just lower double) * results. So to insure we don't generate spurious exceptions * (from the upper double values) we splat the lower double * before we to the operation. */ __a = vec_splat(__A, 0); __c = vec_rsqrte(__a); /* Then we merge the lower float result with the original upper * float elements from __A. */ return (vec_sel(__A, __c, __mask)); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_min_ss(__m128 __A, __m128 __B) { __v4sf __a, __b, __c; static const __vector unsigned int __mask = {0xffffffff, 0, 0, 0}; /* PowerISA VSX does not allow partial (for just lower float) * results. So to insure we don't generate spurious exceptions * (from the upper float values) we splat the lower float * before we to the operation. */ __a = vec_splat((__v4sf)__A, 0); __b = vec_splat((__v4sf)__B, 0); __c = vec_min(__a, __b); /* Then we merge the lower float result with the original upper * float elements from __A. */ return (vec_sel((__v4sf)__A, __c, __mask)); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_max_ss(__m128 __A, __m128 __B) { __v4sf __a, __b, __c; static const __vector unsigned int __mask = {0xffffffff, 0, 0, 0}; /* PowerISA VSX does not allow partial (for just lower float) * results. So to insure we don't generate spurious exceptions * (from the upper float values) we splat the lower float * before we to the operation. */ __a = vec_splat(__A, 0); __b = vec_splat(__B, 0); __c = vec_max(__a, __b); /* Then we merge the lower float result with the original upper * float elements from __A. */ return (vec_sel((__v4sf)__A, __c, __mask)); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_min_ps(__m128 __A, __m128 __B) { __vector __bool int __m = vec_cmpgt((__v4sf)__B, (__v4sf)__A); return vec_sel(__B, __A, __m); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_max_ps(__m128 __A, __m128 __B) { __vector __bool int __m = vec_cmpgt((__v4sf)__A, (__v4sf)__B); return vec_sel(__B, __A, __m); } /* Perform logical bit-wise operations on 128-bit values. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_and_ps(__m128 __A, __m128 __B) { return ((__m128)vec_and((__v4sf)__A, (__v4sf)__B)); // return __builtin_ia32_andps (__A, __B); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_andnot_ps(__m128 __A, __m128 __B) { return ((__m128)vec_andc((__v4sf)__B, (__v4sf)__A)); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_or_ps(__m128 __A, __m128 __B) { return ((__m128)vec_or((__v4sf)__A, (__v4sf)__B)); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_xor_ps(__m128 __A, __m128 __B) { return ((__m128)vec_xor((__v4sf)__A, (__v4sf)__B)); } /* Perform a comparison on the four SPFP values of A and B. For each element, if the comparison is true, place a mask of all ones in the result, otherwise a mask of zeros. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpeq_ps(__m128 __A, __m128 __B) { return ((__m128)vec_cmpeq((__v4sf)__A, (__v4sf)__B)); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmplt_ps(__m128 __A, __m128 __B) { return ((__m128)vec_cmplt((__v4sf)__A, (__v4sf)__B)); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmple_ps(__m128 __A, __m128 __B) { return ((__m128)vec_cmple((__v4sf)__A, (__v4sf)__B)); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpgt_ps(__m128 __A, __m128 __B) { return ((__m128)vec_cmpgt((__v4sf)__A, (__v4sf)__B)); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpge_ps(__m128 __A, __m128 __B) { return ((__m128)vec_cmpge((__v4sf)__A, (__v4sf)__B)); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpneq_ps(__m128 __A, __m128 __B) { __v4sf __temp = (__v4sf)vec_cmpeq((__v4sf)__A, (__v4sf)__B); return ((__m128)vec_nor(__temp, __temp)); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpnlt_ps(__m128 __A, __m128 __B) { return ((__m128)vec_cmpge((__v4sf)__A, (__v4sf)__B)); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpnle_ps(__m128 __A, __m128 __B) { return ((__m128)vec_cmpgt((__v4sf)__A, (__v4sf)__B)); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpngt_ps(__m128 __A, __m128 __B) { return ((__m128)vec_cmple((__v4sf)__A, (__v4sf)__B)); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpnge_ps(__m128 __A, __m128 __B) { return ((__m128)vec_cmplt((__v4sf)__A, (__v4sf)__B)); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpord_ps(__m128 __A, __m128 __B) { __vector unsigned int __a, __b; __vector unsigned int __c, __d; static const __vector unsigned int __float_exp_mask = { 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000}; __a = (__vector unsigned int)vec_abs((__v4sf)__A); __b = (__vector unsigned int)vec_abs((__v4sf)__B); __c = (__vector unsigned int)vec_cmpgt(__float_exp_mask, __a); __d = (__vector unsigned int)vec_cmpgt(__float_exp_mask, __b); return ((__m128)vec_and(__c, __d)); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpunord_ps(__m128 __A, __m128 __B) { __vector unsigned int __a, __b; __vector unsigned int __c, __d; static const __vector unsigned int __float_exp_mask = { 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000}; __a = (__vector unsigned int)vec_abs((__v4sf)__A); __b = (__vector unsigned int)vec_abs((__v4sf)__B); __c = (__vector unsigned int)vec_cmpgt(__a, __float_exp_mask); __d = (__vector unsigned int)vec_cmpgt(__b, __float_exp_mask); return ((__m128)vec_or(__c, __d)); } /* Perform a comparison on the lower SPFP values of A and B. If the comparison is true, place a mask of all ones in the result, otherwise a mask of zeros. The upper three SPFP values are passed through from A. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpeq_ss(__m128 __A, __m128 __B) { static const __vector unsigned int __mask = {0xffffffff, 0, 0, 0}; __v4sf __a, __b, __c; /* PowerISA VMX does not allow partial (for just element 0) * results. So to insure we don't generate spurious exceptions * (from the upper elements) we splat the lower float * before we to the operation. */ __a = vec_splat((__v4sf)__A, 0); __b = vec_splat((__v4sf)__B, 0); __c = (__v4sf)vec_cmpeq(__a, __b); /* Then we merge the lower float result with the original upper * float elements from __A. */ return ((__m128)vec_sel((__v4sf)__A, __c, __mask)); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmplt_ss(__m128 __A, __m128 __B) { static const __vector unsigned int __mask = {0xffffffff, 0, 0, 0}; __v4sf __a, __b, __c; /* PowerISA VMX does not allow partial (for just element 0) * results. So to insure we don't generate spurious exceptions * (from the upper elements) we splat the lower float * before we to the operation. */ __a = vec_splat((__v4sf)__A, 0); __b = vec_splat((__v4sf)__B, 0); __c = (__v4sf)vec_cmplt(__a, __b); /* Then we merge the lower float result with the original upper * float elements from __A. */ return ((__m128)vec_sel((__v4sf)__A, __c, __mask)); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmple_ss(__m128 __A, __m128 __B) { static const __vector unsigned int __mask = {0xffffffff, 0, 0, 0}; __v4sf __a, __b, __c; /* PowerISA VMX does not allow partial (for just element 0) * results. So to insure we don't generate spurious exceptions * (from the upper elements) we splat the lower float * before we to the operation. */ __a = vec_splat((__v4sf)__A, 0); __b = vec_splat((__v4sf)__B, 0); __c = (__v4sf)vec_cmple(__a, __b); /* Then we merge the lower float result with the original upper * float elements from __A. */ return ((__m128)vec_sel((__v4sf)__A, __c, __mask)); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpgt_ss(__m128 __A, __m128 __B) { static const __vector unsigned int __mask = {0xffffffff, 0, 0, 0}; __v4sf __a, __b, __c; /* PowerISA VMX does not allow partial (for just element 0) * results. So to insure we don't generate spurious exceptions * (from the upper elements) we splat the lower float * before we to the operation. */ __a = vec_splat((__v4sf)__A, 0); __b = vec_splat((__v4sf)__B, 0); __c = (__v4sf)vec_cmpgt(__a, __b); /* Then we merge the lower float result with the original upper * float elements from __A. */ return ((__m128)vec_sel((__v4sf)__A, __c, __mask)); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpge_ss(__m128 __A, __m128 __B) { static const __vector unsigned int __mask = {0xffffffff, 0, 0, 0}; __v4sf __a, __b, __c; /* PowerISA VMX does not allow partial (for just element 0) * results. So to insure we don't generate spurious exceptions * (from the upper elements) we splat the lower float * before we to the operation. */ __a = vec_splat((__v4sf)__A, 0); __b = vec_splat((__v4sf)__B, 0); __c = (__v4sf)vec_cmpge(__a, __b); /* Then we merge the lower float result with the original upper * float elements from __A. */ return ((__m128)vec_sel((__v4sf)__A, __c, __mask)); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpneq_ss(__m128 __A, __m128 __B) { static const __vector unsigned int __mask = {0xffffffff, 0, 0, 0}; __v4sf __a, __b, __c; /* PowerISA VMX does not allow partial (for just element 0) * results. So to insure we don't generate spurious exceptions * (from the upper elements) we splat the lower float * before we to the operation. */ __a = vec_splat((__v4sf)__A, 0); __b = vec_splat((__v4sf)__B, 0); __c = (__v4sf)vec_cmpeq(__a, __b); __c = vec_nor(__c, __c); /* Then we merge the lower float result with the original upper * float elements from __A. */ return ((__m128)vec_sel((__v4sf)__A, __c, __mask)); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpnlt_ss(__m128 __A, __m128 __B) { static const __vector unsigned int __mask = {0xffffffff, 0, 0, 0}; __v4sf __a, __b, __c; /* PowerISA VMX does not allow partial (for just element 0) * results. So to insure we don't generate spurious exceptions * (from the upper elements) we splat the lower float * before we to the operation. */ __a = vec_splat((__v4sf)__A, 0); __b = vec_splat((__v4sf)__B, 0); __c = (__v4sf)vec_cmpge(__a, __b); /* Then we merge the lower float result with the original upper * float elements from __A. */ return ((__m128)vec_sel((__v4sf)__A, __c, __mask)); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpnle_ss(__m128 __A, __m128 __B) { static const __vector unsigned int __mask = {0xffffffff, 0, 0, 0}; __v4sf __a, __b, __c; /* PowerISA VMX does not allow partial (for just element 0) * results. So to insure we don't generate spurious exceptions * (from the upper elements) we splat the lower float * before we to the operation. */ __a = vec_splat((__v4sf)__A, 0); __b = vec_splat((__v4sf)__B, 0); __c = (__v4sf)vec_cmpgt(__a, __b); /* Then we merge the lower float result with the original upper * float elements from __A. */ return ((__m128)vec_sel((__v4sf)__A, __c, __mask)); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpngt_ss(__m128 __A, __m128 __B) { static const __vector unsigned int __mask = {0xffffffff, 0, 0, 0}; __v4sf __a, __b, __c; /* PowerISA VMX does not allow partial (for just element 0) * results. So to insure we don't generate spurious exceptions * (from the upper elements) we splat the lower float * before we to the operation. */ __a = vec_splat((__v4sf)__A, 0); __b = vec_splat((__v4sf)__B, 0); __c = (__v4sf)vec_cmple(__a, __b); /* Then we merge the lower float result with the original upper * float elements from __A. */ return ((__m128)vec_sel((__v4sf)__A, __c, __mask)); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpnge_ss(__m128 __A, __m128 __B) { static const __vector unsigned int __mask = {0xffffffff, 0, 0, 0}; __v4sf __a, __b, __c; /* PowerISA VMX does not allow partial (for just element 0) * results. So to insure we don't generate spurious exceptions * (from the upper elements) we splat the lower float * before we do the operation. */ __a = vec_splat((__v4sf)__A, 0); __b = vec_splat((__v4sf)__B, 0); __c = (__v4sf)vec_cmplt(__a, __b); /* Then we merge the lower float result with the original upper * float elements from __A. */ return ((__m128)vec_sel((__v4sf)__A, __c, __mask)); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpord_ss(__m128 __A, __m128 __B) { __vector unsigned int __a, __b; __vector unsigned int __c, __d; static const __vector unsigned int __float_exp_mask = { 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000}; static const __vector unsigned int __mask = {0xffffffff, 0, 0, 0}; __a = (__vector unsigned int)vec_abs((__v4sf)__A); __b = (__vector unsigned int)vec_abs((__v4sf)__B); __c = (__vector unsigned int)vec_cmpgt(__float_exp_mask, __a); __d = (__vector unsigned int)vec_cmpgt(__float_exp_mask, __b); __c = vec_and(__c, __d); /* Then we merge the lower float result with the original upper * float elements from __A. */ return ((__m128)vec_sel((__v4sf)__A, (__v4sf)__c, __mask)); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpunord_ss(__m128 __A, __m128 __B) { __vector unsigned int __a, __b; __vector unsigned int __c, __d; static const __vector unsigned int __float_exp_mask = { 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000}; static const __vector unsigned int __mask = {0xffffffff, 0, 0, 0}; __a = (__vector unsigned int)vec_abs((__v4sf)__A); __b = (__vector unsigned int)vec_abs((__v4sf)__B); __c = (__vector unsigned int)vec_cmpgt(__a, __float_exp_mask); __d = (__vector unsigned int)vec_cmpgt(__b, __float_exp_mask); __c = vec_or(__c, __d); /* Then we merge the lower float result with the original upper * float elements from __A. */ return ((__m128)vec_sel((__v4sf)__A, (__v4sf)__c, __mask)); } /* Compare the lower SPFP values of A and B and return 1 if true and 0 if false. */ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comieq_ss(__m128 __A, __m128 __B) { return (__A[0] == __B[0]); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comilt_ss(__m128 __A, __m128 __B) { return (__A[0] < __B[0]); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comile_ss(__m128 __A, __m128 __B) { return (__A[0] <= __B[0]); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comigt_ss(__m128 __A, __m128 __B) { return (__A[0] > __B[0]); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comige_ss(__m128 __A, __m128 __B) { return (__A[0] >= __B[0]); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comineq_ss(__m128 __A, __m128 __B) { return (__A[0] != __B[0]); } /* FIXME * The __mm_ucomi??_ss implementations below are exactly the same as * __mm_comi??_ss because GCC for PowerPC only generates unordered * compares (scalar and vector). * Technically __mm_comieq_ss et al should be using the ordered * compare and signal for QNaNs. * The __mm_ucomieq_sd et all should be OK, as is. */ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_ucomieq_ss(__m128 __A, __m128 __B) { return (__A[0] == __B[0]); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_ucomilt_ss(__m128 __A, __m128 __B) { return (__A[0] < __B[0]); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_ucomile_ss(__m128 __A, __m128 __B) { return (__A[0] <= __B[0]); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_ucomigt_ss(__m128 __A, __m128 __B) { return (__A[0] > __B[0]); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_ucomige_ss(__m128 __A, __m128 __B) { return (__A[0] >= __B[0]); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_ucomineq_ss(__m128 __A, __m128 __B) { return (__A[0] != __B[0]); } extern __inline float __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtss_f32(__m128 __A) { return ((__v4sf)__A)[0]; } /* Convert the lower SPFP value to a 32-bit integer according to the current rounding mode. */ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtss_si32(__m128 __A) { int __res; #ifdef _ARCH_PWR8 double __dtmp; __asm__( #ifdef __LITTLE_ENDIAN__ "xxsldwi %x0,%x0,%x0,3;\n" #endif "xscvspdp %x2,%x0;\n" "fctiw %2,%2;\n" "mfvsrd %1,%x2;\n" : "+wa"(__A), "=r"(__res), "=f"(__dtmp) :); #else __res = __builtin_rint(__A[0]); #endif return __res; } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvt_ss2si(__m128 __A) { return _mm_cvtss_si32(__A); } /* Convert the lower SPFP value to a 32-bit integer according to the current rounding mode. */ /* Intel intrinsic. */ extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtss_si64(__m128 __A) { long long __res; #if defined(_ARCH_PWR8) && defined(__powerpc64__) double __dtmp; __asm__( #ifdef __LITTLE_ENDIAN__ "xxsldwi %x0,%x0,%x0,3;\n" #endif "xscvspdp %x2,%x0;\n" "fctid %2,%2;\n" "mfvsrd %1,%x2;\n" : "+wa"(__A), "=r"(__res), "=f"(__dtmp) :); #else __res = __builtin_llrint(__A[0]); #endif return __res; } /* Microsoft intrinsic. */ extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtss_si64x(__m128 __A) { return _mm_cvtss_si64((__v4sf)__A); } /* Constants for use with _mm_prefetch. */ enum _mm_hint { /* _MM_HINT_ET is _MM_HINT_T with set 3rd bit. */ _MM_HINT_ET0 = 7, _MM_HINT_ET1 = 6, _MM_HINT_T0 = 3, _MM_HINT_T1 = 2, _MM_HINT_T2 = 1, _MM_HINT_NTA = 0 }; /* Loads one cache line from address P to a location "closer" to the processor. The selector I specifies the type of prefetch operation. */ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_prefetch(const void *__P, enum _mm_hint __I) { /* Current PowerPC will ignores the hint parameters. */ __builtin_prefetch(__P); } /* Convert the two lower SPFP values to 32-bit integers according to the current rounding mode. Return the integers in packed form. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtps_pi32(__m128 __A) { /* Splat two lower SPFP values to both halves. */ __v4sf __temp, __rounded; __vector unsigned long long __result; /* Splat two lower SPFP values to both halves. */ __temp = (__v4sf)vec_splat((__vector long long)__A, 0); __rounded = vec_rint(__temp); __result = (__vector unsigned long long)vec_cts(__rounded, 0); return (__m64)((__vector long long)__result)[0]; } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvt_ps2pi(__m128 __A) { return _mm_cvtps_pi32(__A); } /* Truncate the lower SPFP value to a 32-bit integer. */ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvttss_si32(__m128 __A) { /* Extract the lower float element. */ float __temp = __A[0]; /* truncate to 32-bit integer and return. */ return __temp; } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtt_ss2si(__m128 __A) { return _mm_cvttss_si32(__A); } /* Intel intrinsic. */ extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvttss_si64(__m128 __A) { /* Extract the lower float element. */ float __temp = __A[0]; /* truncate to 32-bit integer and return. */ return __temp; } /* Microsoft intrinsic. */ extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvttss_si64x(__m128 __A) { /* Extract the lower float element. */ float __temp = __A[0]; /* truncate to 32-bit integer and return. */ return __temp; } /* Truncate the two lower SPFP values to 32-bit integers. Return the integers in packed form. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvttps_pi32(__m128 __A) { __v4sf __temp; __vector unsigned long long __result; /* Splat two lower SPFP values to both halves. */ __temp = (__v4sf)vec_splat((__vector long long)__A, 0); __result = (__vector unsigned long long)vec_cts(__temp, 0); return (__m64)((__vector long long)__result)[0]; } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtt_ps2pi(__m128 __A) { return _mm_cvttps_pi32(__A); } /* Convert B to a SPFP value and insert it as element zero in A. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtsi32_ss(__m128 __A, int __B) { float __temp = __B; __A[0] = __temp; return __A; } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvt_si2ss(__m128 __A, int __B) { return _mm_cvtsi32_ss(__A, __B); } /* Convert B to a SPFP value and insert it as element zero in A. */ /* Intel intrinsic. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtsi64_ss(__m128 __A, long long __B) { float __temp = __B; __A[0] = __temp; return __A; } /* Microsoft intrinsic. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtsi64x_ss(__m128 __A, long long __B) { return _mm_cvtsi64_ss(__A, __B); } /* Convert the two 32-bit values in B to SPFP form and insert them as the two lower elements in A. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtpi32_ps(__m128 __A, __m64 __B) { __vector signed int __vm1; __vector float __vf1; __vm1 = (__vector signed int)(__vector unsigned long long){__B, __B}; __vf1 = (__vector float)vec_ctf(__vm1, 0); return ((__m128)(__vector unsigned long long){ ((__vector unsigned long long)__vf1)[0], ((__vector unsigned long long)__A)[1]}); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvt_pi2ps(__m128 __A, __m64 __B) { return _mm_cvtpi32_ps(__A, __B); } /* Convert the four signed 16-bit values in A to SPFP form. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtpi16_ps(__m64 __A) { __vector signed short __vs8; __vector signed int __vi4; __vector float __vf1; __vs8 = (__vector signed short)(__vector unsigned long long){__A, __A}; __vi4 = vec_vupklsh(__vs8); __vf1 = (__vector float)vec_ctf(__vi4, 0); return (__m128)__vf1; } /* Convert the four unsigned 16-bit values in A to SPFP form. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtpu16_ps(__m64 __A) { const __vector unsigned short __zero = {0, 0, 0, 0, 0, 0, 0, 0}; __vector unsigned short __vs8; __vector unsigned int __vi4; __vector float __vf1; __vs8 = (__vector unsigned short)(__vector unsigned long long){__A, __A}; __vi4 = (__vector unsigned int)vec_mergel #ifdef __LITTLE_ENDIAN__ (__vs8, __zero); #else (__zero, __vs8); #endif __vf1 = (__vector float)vec_ctf(__vi4, 0); return (__m128)__vf1; } /* Convert the low four signed 8-bit values in A to SPFP form. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtpi8_ps(__m64 __A) { __vector signed char __vc16; __vector signed short __vs8; __vector signed int __vi4; __vector float __vf1; __vc16 = (__vector signed char)(__vector unsigned long long){__A, __A}; __vs8 = vec_vupkhsb(__vc16); __vi4 = vec_vupkhsh(__vs8); __vf1 = (__vector float)vec_ctf(__vi4, 0); return (__m128)__vf1; } /* Convert the low four unsigned 8-bit values in A to SPFP form. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtpu8_ps(__m64 __A) { const __vector unsigned char __zero = {0, 0, 0, 0, 0, 0, 0, 0}; __vector unsigned char __vc16; __vector unsigned short __vs8; __vector unsigned int __vi4; __vector float __vf1; __vc16 = (__vector unsigned char)(__vector unsigned long long){__A, __A}; #ifdef __LITTLE_ENDIAN__ __vs8 = (__vector unsigned short)vec_mergel(__vc16, __zero); __vi4 = (__vector unsigned int)vec_mergeh(__vs8, (__vector unsigned short)__zero); #else __vs8 = (__vector unsigned short)vec_mergel(__zero, __vc16); __vi4 = (__vector unsigned int)vec_mergeh((__vector unsigned short)__zero, __vs8); #endif __vf1 = (__vector float)vec_ctf(__vi4, 0); return (__m128)__vf1; } /* Convert the four signed 32-bit values in A and B to SPFP form. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtpi32x2_ps(__m64 __A, __m64 __B) { __vector signed int __vi4; __vector float __vf4; __vi4 = (__vector signed int)(__vector unsigned long long){__A, __B}; __vf4 = (__vector float)vec_ctf(__vi4, 0); return (__m128)__vf4; } /* Convert the four SPFP values in A to four signed 16-bit integers. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtps_pi16(__m128 __A) { __v4sf __rounded; __vector signed int __temp; __vector unsigned long long __result; __rounded = vec_rint(__A); __temp = vec_cts(__rounded, 0); __result = (__vector unsigned long long)vec_pack(__temp, __temp); return (__m64)((__vector long long)__result)[0]; } /* Convert the four SPFP values in A to four signed 8-bit integers. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtps_pi8(__m128 __A) { __v4sf __rounded; __vector signed int __tmp_i; static const __vector signed int __zero = {0, 0, 0, 0}; __vector signed short __tmp_s; __vector signed char __res_v; __rounded = vec_rint(__A); __tmp_i = vec_cts(__rounded, 0); __tmp_s = vec_pack(__tmp_i, __zero); __res_v = vec_pack(__tmp_s, __tmp_s); return (__m64)((__vector long long)__res_v)[0]; } /* Selects four specific SPFP values from A and B based on MASK. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_shuffle_ps(__m128 __A, __m128 __B, int const __mask) { unsigned long __element_selector_10 = __mask & 0x03; unsigned long __element_selector_32 = (__mask >> 2) & 0x03; unsigned long __element_selector_54 = (__mask >> 4) & 0x03; unsigned long __element_selector_76 = (__mask >> 6) & 0x03; static const unsigned int __permute_selectors[4] = { #ifdef __LITTLE_ENDIAN__ 0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C #else 0x00010203, 0x04050607, 0x08090A0B, 0x0C0D0E0F #endif }; __vector unsigned int __t; __t[0] = __permute_selectors[__element_selector_10]; __t[1] = __permute_selectors[__element_selector_32]; __t[2] = __permute_selectors[__element_selector_54] + 0x10101010; __t[3] = __permute_selectors[__element_selector_76] + 0x10101010; return vec_perm((__v4sf)__A, (__v4sf)__B, (__vector unsigned char)__t); } /* Selects and interleaves the upper two SPFP values from A and B. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_unpackhi_ps(__m128 __A, __m128 __B) { return (__m128)vec_vmrglw((__v4sf)__A, (__v4sf)__B); } /* Selects and interleaves the lower two SPFP values from A and B. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_unpacklo_ps(__m128 __A, __m128 __B) { return (__m128)vec_vmrghw((__v4sf)__A, (__v4sf)__B); } /* Sets the upper two SPFP values with 64-bits of data loaded from P; the lower two values are passed through from A. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_loadh_pi(__m128 __A, __m64 const *__P) { __vector unsigned long long __a = (__vector unsigned long long)__A; __vector unsigned long long __p = vec_splats(*__P); __a[1] = __p[1]; return (__m128)__a; } /* Stores the upper two SPFP values of A into P. */ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_storeh_pi(__m64 *__P, __m128 __A) { __vector unsigned long long __a = (__vector unsigned long long)__A; *__P = __a[1]; } /* Moves the upper two values of B into the lower two values of A. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_movehl_ps(__m128 __A, __m128 __B) { return (__m128)vec_mergel((__vector unsigned long long)__B, (__vector unsigned long long)__A); } /* Moves the lower two values of B into the upper two values of A. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_movelh_ps(__m128 __A, __m128 __B) { return (__m128)vec_mergeh((__vector unsigned long long)__A, (__vector unsigned long long)__B); } /* Sets the lower two SPFP values with 64-bits of data loaded from P; the upper two values are passed through from A. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_loadl_pi(__m128 __A, __m64 const *__P) { __vector unsigned long long __a = (__vector unsigned long long)__A; __vector unsigned long long __p = vec_splats(*__P); __a[0] = __p[0]; return (__m128)__a; } /* Stores the lower two SPFP values of A into P. */ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_storel_pi(__m64 *__P, __m128 __A) { __vector unsigned long long __a = (__vector unsigned long long)__A; *__P = __a[0]; } #ifdef _ARCH_PWR8 /* Intrinsic functions that require PowerISA 2.07 minimum. */ /* Creates a 4-bit mask from the most significant bits of the SPFP values. */ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_movemask_ps(__m128 __A) { #ifdef _ARCH_PWR10 return vec_extractm((__vector unsigned int)__A); #else __vector unsigned long long __result; static const __vector unsigned int __perm_mask = { #ifdef __LITTLE_ENDIAN__ 0x00204060, 0x80808080, 0x80808080, 0x80808080 #else 0x80808080, 0x80808080, 0x80808080, 0x00204060 #endif }; __result = ((__vector unsigned long long)vec_vbpermq( (__vector unsigned char)__A, (__vector unsigned char)__perm_mask)); #ifdef __LITTLE_ENDIAN__ return __result[1]; #else return __result[0]; #endif #endif /* !_ARCH_PWR10 */ } #endif /* _ARCH_PWR8 */ /* Create a vector with all four elements equal to *P. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_load1_ps(float const *__P) { return _mm_set1_ps(*__P); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_load_ps1(float const *__P) { return _mm_load1_ps(__P); } /* Extracts one of the four words of A. The selector N must be immediate. */ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_extract_pi16(__m64 const __A, int const __N) { unsigned int __shiftr = __N & 3; #ifdef __BIG_ENDIAN__ __shiftr = 3 - __shiftr; #endif return ((__A >> (__shiftr * 16)) & 0xffff); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_pextrw(__m64 const __A, int const __N) { return _mm_extract_pi16(__A, __N); } /* Inserts word D into one of four words of A. The selector N must be immediate. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_insert_pi16(__m64 const __A, int const __D, int const __N) { const int __shiftl = (__N & 3) * 16; const __m64 __shiftD = (const __m64)__D << __shiftl; const __m64 __mask = 0xffffUL << __shiftl; __m64 __result = (__A & (~__mask)) | (__shiftD & __mask); return __result; } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_pinsrw(__m64 const __A, int const __D, int const __N) { return _mm_insert_pi16(__A, __D, __N); } /* Compute the element-wise maximum of signed 16-bit values. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_max_pi16(__m64 __A, __m64 __B) { #if _ARCH_PWR8 __vector signed short __a, __b, __r; __vector __bool short __c; __a = (__vector signed short)vec_splats(__A); __b = (__vector signed short)vec_splats(__B); __c = (__vector __bool short)vec_cmpgt(__a, __b); __r = vec_sel(__b, __a, __c); return (__m64)((__vector long long)__r)[0]; #else __m64_union __m1, __m2, __res; __m1.as_m64 = __A; __m2.as_m64 = __B; __res.as_short[0] = (__m1.as_short[0] > __m2.as_short[0]) ? __m1.as_short[0] : __m2.as_short[0]; __res.as_short[1] = (__m1.as_short[1] > __m2.as_short[1]) ? __m1.as_short[1] : __m2.as_short[1]; __res.as_short[2] = (__m1.as_short[2] > __m2.as_short[2]) ? __m1.as_short[2] : __m2.as_short[2]; __res.as_short[3] = (__m1.as_short[3] > __m2.as_short[3]) ? __m1.as_short[3] : __m2.as_short[3]; return (__m64)__res.as_m64; #endif } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_pmaxsw(__m64 __A, __m64 __B) { return _mm_max_pi16(__A, __B); } /* Compute the element-wise maximum of unsigned 8-bit values. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_max_pu8(__m64 __A, __m64 __B) { #if _ARCH_PWR8 __vector unsigned char __a, __b, __r; __vector __bool char __c; __a = (__vector unsigned char)vec_splats(__A); __b = (__vector unsigned char)vec_splats(__B); __c = (__vector __bool char)vec_cmpgt(__a, __b); __r = vec_sel(__b, __a, __c); return (__m64)((__vector long long)__r)[0]; #else __m64_union __m1, __m2, __res; long __i; __m1.as_m64 = __A; __m2.as_m64 = __B; for (__i = 0; __i < 8; __i++) __res.as_char[__i] = ((unsigned char)__m1.as_char[__i] > (unsigned char)__m2.as_char[__i]) ? __m1.as_char[__i] : __m2.as_char[__i]; return (__m64)__res.as_m64; #endif } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_pmaxub(__m64 __A, __m64 __B) { return _mm_max_pu8(__A, __B); } /* Compute the element-wise minimum of signed 16-bit values. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_min_pi16(__m64 __A, __m64 __B) { #if _ARCH_PWR8 __vector signed short __a, __b, __r; __vector __bool short __c; __a = (__vector signed short)vec_splats(__A); __b = (__vector signed short)vec_splats(__B); __c = (__vector __bool short)vec_cmplt(__a, __b); __r = vec_sel(__b, __a, __c); return (__m64)((__vector long long)__r)[0]; #else __m64_union __m1, __m2, __res; __m1.as_m64 = __A; __m2.as_m64 = __B; __res.as_short[0] = (__m1.as_short[0] < __m2.as_short[0]) ? __m1.as_short[0] : __m2.as_short[0]; __res.as_short[1] = (__m1.as_short[1] < __m2.as_short[1]) ? __m1.as_short[1] : __m2.as_short[1]; __res.as_short[2] = (__m1.as_short[2] < __m2.as_short[2]) ? __m1.as_short[2] : __m2.as_short[2]; __res.as_short[3] = (__m1.as_short[3] < __m2.as_short[3]) ? __m1.as_short[3] : __m2.as_short[3]; return (__m64)__res.as_m64; #endif } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_pminsw(__m64 __A, __m64 __B) { return _mm_min_pi16(__A, __B); } /* Compute the element-wise minimum of unsigned 8-bit values. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_min_pu8(__m64 __A, __m64 __B) { #if _ARCH_PWR8 __vector unsigned char __a, __b, __r; __vector __bool char __c; __a = (__vector unsigned char)vec_splats(__A); __b = (__vector unsigned char)vec_splats(__B); __c = (__vector __bool char)vec_cmplt(__a, __b); __r = vec_sel(__b, __a, __c); return (__m64)((__vector long long)__r)[0]; #else __m64_union __m1, __m2, __res; long __i; __m1.as_m64 = __A; __m2.as_m64 = __B; for (__i = 0; __i < 8; __i++) __res.as_char[__i] = ((unsigned char)__m1.as_char[__i] < (unsigned char)__m2.as_char[__i]) ? __m1.as_char[__i] : __m2.as_char[__i]; return (__m64)__res.as_m64; #endif } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_pminub(__m64 __A, __m64 __B) { return _mm_min_pu8(__A, __B); } /* Create an 8-bit mask of the signs of 8-bit values. */ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_movemask_pi8(__m64 __A) { #ifdef __powerpc64__ unsigned long long __p = #ifdef __LITTLE_ENDIAN__ 0x0008101820283038UL; // permute control for sign bits #else 0x3830282018100800UL; // permute control for sign bits #endif return __builtin_bpermd(__p, __A); #else #ifdef __LITTLE_ENDIAN__ unsigned int __mask = 0x20283038UL; unsigned int __r1 = __builtin_bpermd(__mask, __A) & 0xf; unsigned int __r2 = __builtin_bpermd(__mask, __A >> 32) & 0xf; #else unsigned int __mask = 0x38302820UL; unsigned int __r1 = __builtin_bpermd(__mask, __A >> 32) & 0xf; unsigned int __r2 = __builtin_bpermd(__mask, __A) & 0xf; #endif return (__r2 << 4) | __r1; #endif } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_pmovmskb(__m64 __A) { return _mm_movemask_pi8(__A); } /* Multiply four unsigned 16-bit values in A by four unsigned 16-bit values in B and produce the high 16 bits of the 32-bit results. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_mulhi_pu16(__m64 __A, __m64 __B) { __vector unsigned short __a, __b; __vector unsigned short __c; __vector unsigned int __w0, __w1; __vector unsigned char __xform1 = { #ifdef __LITTLE_ENDIAN__ 0x02, 0x03, 0x12, 0x13, 0x06, 0x07, 0x16, 0x17, 0x0A, 0x0B, 0x1A, 0x1B, 0x0E, 0x0F, 0x1E, 0x1F #else 0x00, 0x01, 0x10, 0x11, 0x04, 0x05, 0x14, 0x15, 0x00, 0x01, 0x10, 0x11, 0x04, 0x05, 0x14, 0x15 #endif }; __a = (__vector unsigned short)vec_splats(__A); __b = (__vector unsigned short)vec_splats(__B); __w0 = vec_vmuleuh(__a, __b); __w1 = vec_vmulouh(__a, __b); __c = (__vector unsigned short)vec_perm(__w0, __w1, __xform1); return (__m64)((__vector long long)__c)[0]; } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_pmulhuw(__m64 __A, __m64 __B) { return _mm_mulhi_pu16(__A, __B); } /* Return a combination of the four 16-bit values in A. The selector must be an immediate. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_shuffle_pi16(__m64 __A, int const __N) { unsigned long __element_selector_10 = __N & 0x03; unsigned long __element_selector_32 = (__N >> 2) & 0x03; unsigned long __element_selector_54 = (__N >> 4) & 0x03; unsigned long __element_selector_76 = (__N >> 6) & 0x03; static const unsigned short __permute_selectors[4] = { #ifdef __LITTLE_ENDIAN__ 0x0908, 0x0B0A, 0x0D0C, 0x0F0E #else 0x0607, 0x0405, 0x0203, 0x0001 #endif }; __m64_union __t; __vector unsigned long long __a, __p, __r; #ifdef __LITTLE_ENDIAN__ __t.as_short[0] = __permute_selectors[__element_selector_10]; __t.as_short[1] = __permute_selectors[__element_selector_32]; __t.as_short[2] = __permute_selectors[__element_selector_54]; __t.as_short[3] = __permute_selectors[__element_selector_76]; #else __t.as_short[3] = __permute_selectors[__element_selector_10]; __t.as_short[2] = __permute_selectors[__element_selector_32]; __t.as_short[1] = __permute_selectors[__element_selector_54]; __t.as_short[0] = __permute_selectors[__element_selector_76]; #endif __p = vec_splats(__t.as_m64); __a = vec_splats(__A); __r = vec_perm(__a, __a, (__vector unsigned char)__p); return (__m64)((__vector long long)__r)[0]; } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_pshufw(__m64 __A, int const __N) { return _mm_shuffle_pi16(__A, __N); } /* Conditionally store byte elements of A into P. The high bit of each byte in the selector N determines whether the corresponding byte from A is stored. */ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_maskmove_si64(__m64 __A, __m64 __N, char *__P) { __m64 __hibit = 0x8080808080808080UL; __m64 __mask, __tmp; __m64 *__p = (__m64 *)__P; __tmp = *__p; __mask = _mm_cmpeq_pi8((__N & __hibit), __hibit); __tmp = (__tmp & (~__mask)) | (__A & __mask); *__p = __tmp; } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_maskmovq(__m64 __A, __m64 __N, char *__P) { _mm_maskmove_si64(__A, __N, __P); } /* Compute the rounded averages of the unsigned 8-bit values in A and B. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_avg_pu8(__m64 __A, __m64 __B) { __vector unsigned char __a, __b, __c; __a = (__vector unsigned char)vec_splats(__A); __b = (__vector unsigned char)vec_splats(__B); __c = vec_avg(__a, __b); return (__m64)((__vector long long)__c)[0]; } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_pavgb(__m64 __A, __m64 __B) { return _mm_avg_pu8(__A, __B); } /* Compute the rounded averages of the unsigned 16-bit values in A and B. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_avg_pu16(__m64 __A, __m64 __B) { __vector unsigned short __a, __b, __c; __a = (__vector unsigned short)vec_splats(__A); __b = (__vector unsigned short)vec_splats(__B); __c = vec_avg(__a, __b); return (__m64)((__vector long long)__c)[0]; } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_pavgw(__m64 __A, __m64 __B) { return _mm_avg_pu16(__A, __B); } /* Compute the sum of the absolute differences of the unsigned 8-bit values in A and B. Return the value in the lower 16-bit word; the upper words are cleared. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sad_pu8(__m64 __A, __m64 __B) { __vector unsigned char __a, __b; __vector unsigned char __vmin, __vmax, __vabsdiff; __vector signed int __vsum; const __vector unsigned int __zero = {0, 0, 0, 0}; __m64_union __result = {0}; __a = (__vector unsigned char)(__vector unsigned long long){0UL, __A}; __b = (__vector unsigned char)(__vector unsigned long long){0UL, __B}; __vmin = vec_min(__a, __b); __vmax = vec_max(__a, __b); __vabsdiff = vec_sub(__vmax, __vmin); /* Sum four groups of bytes into integers. */ __vsum = (__vector signed int)vec_sum4s(__vabsdiff, __zero); /* Sum across four integers with integer result. */ __vsum = vec_sums(__vsum, (__vector signed int)__zero); /* The sum is in the right most 32-bits of the vector result. Transfer to a GPR and truncate to 16 bits. */ __result.as_short[0] = __vsum[3]; return __result.as_m64; } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_psadbw(__m64 __A, __m64 __B) { return _mm_sad_pu8(__A, __B); } /* Stores the data in A to the address P without polluting the caches. */ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_stream_pi(__m64 *__P, __m64 __A) { /* Use the data cache block touch for store transient. */ __asm__(" dcbtstt 0,%0" : : "b"(__P) : "memory"); *__P = __A; } /* Likewise. The address must be 16-byte aligned. */ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_stream_ps(float *__P, __m128 __A) { /* Use the data cache block touch for store transient. */ __asm__(" dcbtstt 0,%0" : : "b"(__P) : "memory"); _mm_store_ps(__P, __A); } /* Guarantees that every preceding store is globally visible before any subsequent store. */ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sfence(void) { /* Generate a light weight sync. */ __atomic_thread_fence(__ATOMIC_RELEASE); } /* The execution of the next instruction is delayed by an implementation specific amount of time. The instruction does not modify the architectural state. This is after the pop_options pragma because it does not require SSE support in the processor--the encoding is a nop on processors that do not support it. */ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_pause(void) { /* There is no exact match with this construct, but the following is close to the desired effect. */ #if _ARCH_PWR8 /* On power8 and later processors we can depend on Program Priority (PRI) and associated "very low" PPI setting. Since we don't know what PPI this thread is running at we: 1) save the current PRI from the PPR SPR into a local GRP, 2) set the PRI to "very low* via the special or 31,31,31 encoding. 3) issue an "isync" to insure the PRI change takes effect before we execute any more instructions. Now we can execute a lwsync (release barrier) while we execute this thread at "very low" PRI. Finally we restore the original PRI and continue execution. */ unsigned long __PPR; __asm__ volatile(" mfppr %0;" " or 31,31,31;" " isync;" " lwsync;" " isync;" " mtppr %0;" : "=r"(__PPR) : : "memory"); #else /* For older processor where we may not even have Program Priority controls we can only depend on Heavy Weight Sync. */ __atomic_thread_fence(__ATOMIC_SEQ_CST); #endif } /* Transpose the 4x4 matrix composed of row[0-3]. */ #define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ do { \ __v4sf __r0 = (row0), __r1 = (row1), __r2 = (row2), __r3 = (row3); \ __v4sf __t0 = vec_vmrghw(__r0, __r1); \ __v4sf __t1 = vec_vmrghw(__r2, __r3); \ __v4sf __t2 = vec_vmrglw(__r0, __r1); \ __v4sf __t3 = vec_vmrglw(__r2, __r3); \ (row0) = (__v4sf)vec_mergeh((__vector long long)__t0, \ (__vector long long)__t1); \ (row1) = (__v4sf)vec_mergel((__vector long long)__t0, \ (__vector long long)__t1); \ (row2) = (__v4sf)vec_mergeh((__vector long long)__t2, \ (__vector long long)__t3); \ (row3) = (__v4sf)vec_mergel((__vector long long)__t2, \ (__vector long long)__t3); \ } while (0) /* For backward source compatibility. */ //# include #else #include_next #endif /* defined(__powerpc64__) && \ * (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) */ #endif /* XMMINTRIN_H_ */ sanitizer/hwasan_interface.hCheck failed: document["content"].Accept(writer)unhandled opcode: (?-m:$)EthiopicInscriptional_ParthianLlTai_LeThaiTibetanSIGABRTsigaction() failed on cpu %darena != nullptr && arena != DefaultArena() && arena != UnhookedArena()float16/=gtCANCELLED exceeds Cord's size j <= 256tree->height() <= kMaxHeightpw->skip == nullptrlibc:BoringCryptoRSA-verify KAT failed. ECDSA KeyGen failed ECDSA-sign signatureplatform: n/aunknown librarysystem libraryRSAX509V3TRUST_TOKENexternal/boringssl/src/crypto/ex_data.c'((\(9ST]SUbUS~TAVAVTUpsrp@q$%$ %L$%4&N%&$w&89D998FGVGGG2QJRQQxQno.oo/kythe/edge/childof/kythe/edge/extends/kythe/edge/extends/private/kythe/edge/extends/private/virtual/kythe/edge/extends/protected/kythe/edge/extends/protected/virtual/kythe/edge/extends/public/kythe/edge/extends/public/virtual/kythe/edge/extends/virtual/kythe/edge/generates/kythe/edge/named/kythe/edge/overrides/kythe/edge/param/kythe/edge/satisfies/kythe/edge/typed/kythe/edge/completes/kythe/edge/completes/uniquely/kythe/edge/defines/kythe/edge/defines/binding/kythe/edge/documents/kythe/edge/ref/kythe/edge/ref/call/kythe/edge/ref/imports/kythe/edge/ref/init/kythe/edge/taggedݶӺSӺӺӺӺӺӺӺEӺӺӺekythe.io/proto/kythe.proto.CxxCompilationUnitDetailsZc Zc S vT T T p p ]m m p m - F  ƺ + n /kythe_builtinskythe.io/proto/kythe.proto.BuildDetails0123456789abcdef 5& % & $ & U' & ' u% ' $ ]% =' =$ % # e& m' E( & %' ( ( % % ) # ( ( ' M& ' U$ # }# # # ( -( % }& u( " %$ ' }) 5# m$ $ " ) e) # ) M) M# ]( $ ( -% & ' % E% & " $ ) # ]" " 5) ) ) e# & u" ) E" " $ $ ' e g ؜ !    P z z ş 0 \ " ڞ Ý E z 8 t Ǜ z z Z N z ¡ Ҡ #  z z 6 z K z K z z ij ij ij ij ij ij ij ij  ڴ 6 A ˴ v ij  ij ` ij  ij ij ij + k  ij ij ý [ [ [ [ y y  S S S S ( (  I /= U +0 s T u  [ ʨ 1 V 3 ˅ l P fw 3 a w y 1 C. ] _  z Nx Y X W i @ ӣ 2 f Շ 56 87 4  7 jr Z  }    :t  x \ " \ /* [ #9 6y $ W b ^d  n a  vc Rs   ? \ π ق  x A h t ؈ > n <  ñ P   F  x    + Z d 6 $ ܲ , Ȳ 2 ɱ  @ б J " ޱ ñ Ҳ ױ <  ]  1 & ~ R G s h  g! " " " " " " " " " " ! " " " " " " " ! " " " " " ! " " " " " " }! @0BחAeAG06q&{Gz?@@u@NAV33P@Ņ1B3)\(?L~B vj@?Cod( yPD??O8M3#I9[%nF@mn.AؗҜ<9B..A20HwZ@@:0yE>L]? -C6?C=D2cA"/\ ?$@Y@@@@j@.AcAחAeA _BvH7BmB@0BļB4&k C7yAC؅W4vCNgmC=`XC@xDPKDMDJ-DyCxD(,* E52TEqىE/'E!1Eꌠ9Y>)F$_FnFF"F|FMraB3G yhGiWCG*GJH\)c=H7]rHaxHyֲHL}YI\CkFI3T|I\'Isȡ1I:~^Jd~QJvaJ0}GJ>nllJ$KAjZK=P1PKMZ>dKW`M}Kmn/LDcL^LpuLafirM?O8MrbnMG9Mz)M:Ft NdȋBN=ֺ.wN 9iNCNuOILLO֯OO[пOE!P/'%UP_QP6PbDP{U[*QmUx`Q*4VQz5߼QlX R.4R9mr"iRY) kRعeR$N(Sa򮌮>S }W-sSO\]ScbuSp] T%L9hGT.B}T}Ô%IT\nTsqUFQU`RUxӫU?+dpU5=%VN=@[Vҟ&VG0JV=:YVf$0W&sdWW)>W]3sMXk5!a9XBioX)8ӣX*4X5AHxY(-\CYr%4xYv/AYiY?ZOMZ20HwZ~$|7Z-[bZXC}"[;/V[ ;C-[SJ[= \[M"4+\0IΕ2a\|AH\[R\ysKp]WPM4]mH=j]Į]-f]u8W]am ^|M$D@^`-Ut^x^WUH^P.5_[ypH_r]~_':__ k_EW`RVR`'.N`(:W"`Yv5`o%&a.s\ax}?5ȑa\,C:a 4az]1beb"b _Sb72cϢER:ckpc2gFxc@BXVch)5,dtC7Ddx0REydVfYd6 6dCCuesTNNeGe1eeax~Ze= "f β̈Wf_jfbf8jfD},gJ#agZqg$gwWӈhה,5h :7ekhHDbhZսghJzg iN@iZbti: iDhTi VBijkzIjsYH j7-4j 8jL%k0V(wSkk21Ukjk*do^k5= 6~'l ]]l8l@4l7#l#s:V!2mOBɫfm㒻Tmp;5m б!nr-3;ngRJqn眥na}!n,}ovk*:Eobzo=$qE}om͖o\Ȁp9}UPpCD pT&)p4osp%#qVA1/XqkYqz42q܍qSr-rCbrnr1{Jr _|sNsv[06sTrlst"sRyXsW tquAtztUutGtc2 tXSTv/Ngvazjv}+vZ/v(wp-T_w&2bw~Ù:w\4@Iw!2xT):gx0xg^Jp5|x\By3tᮺvU0 5]JB-;eUkE=ƚpOܼw kAV<ЍU1(\Qӵɦqˋ#w"mSx@I̮Wζ]y<7VM6OH8oꖐ:%˅tφ* 45*g8;?Ȅ'DŖ%Οkb}$l_ Xf&ޓJ|l_bS04`U&N~)p$wߏ帟ߦ}t_ϛpDk11eU%ͬ{?;+*\Ӓsi$$d̈Po ̼,eX@bx x9?{Ηp\{2~h髤8E"&'O'1cȌ8eްeǃqB]X,iMpdJwmk}{x wyTś[[=]S5ȳ\*_F%94›\rξTSܷA"x\ӛ S!{Z:0ܵ△\S٨ > > 5> ] ] ] ] ] ] ] ] 6^ 6^ 6^ 6^ 6^ 6^ 6^ 6^ 6^ 6^ 6^ 6^ 6^ 6^ 6^ 6^ 6^ 6^ 6^ 6^ 6^ 6^ 6^ 6^ 6^ 6^ 6^ 6^ 6^ 6^ 6^ 6^ 6^ 6^ 6^ 6^ 6^ 6^ 6^ 6^ 6^ x^ 6^ 6^ 6^ 6^ k^ 6^ 6^ 6^ 6^ 6^ 6^ 6^ ^ 6^ 6^ 6^ )^ 6^ ^ 6^ ^ 6^ ^ Ai j j j j j j j j j j j j j j j j j j j i j j j i j j j j j i n q q q o n o o q q n q q q q q q q q q q q q q q q q ao q q q q q q q q q q q q q q q q q q q q q q q q q q q ho bp q p q q q q q q q q q q q q q q q q q q q q q q q q q q q q n n 09 09AZ__az09AZazAZaz 09!~az ~!/:@[`{~ AZ09AZ__az09AFaf  2 s ! x  < v ȸ _  ( Y f K ;   : I < < < < b\\\N 9U #!:!:!z!:!:!:!:!:!:!:!:!:!:!:!:!:!:!:!:!:!:!:!:!!:!:!:!:!n!76l776l7yh4h$$$9`Զ11F1ż0&WuGG555yyyyyyyyyyyyy!!!!!Y$$$##!#$!!$$$$$$!(((())$)$)$)++(((((((()(==>====A>@AAApA@/BBABAB@@B@[A@ABCAHHHHHAZ ajkk lrss tz F y/279HJwxxy~Oa8  ~"3::+*;<==]>>(*?@?*ABCC=DDEEEGFOPP*QQ*RR*SS.TT2VW3YY6[[5\\O``3aaKcc1ee(ffDhh/ii-jjDkk)llAoo-qq)rr+uu*}})&&*&'%EETpsvw{}t&%@?  @60]#~P/ 0122N3344M5=>>D?@ABBCIJJ<KOP_`bcc$d/1V0a```  З‰@@yy}}`aa:bA '(/078?@EHMQQSSUUWWYY[[]]__`ghopqJruVvwdxyz{p|}~   &!&!*!*!!+!+!2!2!N!N!`!o!p!!!!$$$$,.,00,^,`,a,b,b, c,c,d,d,e,e,f,f,g,l,m,m,n,n,o,o,p,p,r,s,u,v,~,,,,,,,,-%-'-'-----@JKK=vLm"/2oy|}}u~ZZZZZZZZZSS`p0h!: AZ'((O(  @   @n_n `nn!""C]AZ  .@26@9G@Jv@xxy}@@O@@@@@@  ~"2@::+*;;==]>>(*AACC=DDEEEGFN@EEtpr@vvt&%@?  @~P/ `@@@.@1V0```É@@@A@(/8?HMYY[[]]__ho&!&!*!*!A+!+!2!2!`!o!!!$$,.,0`,`,b,b, c,c,d,d,g,k,@m,m,n,n,o,o,p,p,r,r,u,u,~,,,,@,,@,,@l@@".@2n@y{@}}u~@Z@@ZZZZZZZZ@p0h!: '((  @ @n_n !"JPY^_+0?DFF   ?AJVoqPP=Pptv`~!"$$'')24799;;BBGGIIKKMOQRTTWWYY[[]]__abddgjlrtwy|~~1VY 5 9 ? KP|h8jjjjj  68EPl1/111MRo((@S  * . ` d f o sz 0coo  * . ` d f o sz 46F6@MPY\_p@[`{tt~~  @@d e ??56  d f p t ~ !%!'!)!,!1!3!M!O!_!!!!&$@$J$`$')s+v++++++.N.//00000 00070<0?00000001111 2_222X33MM!09..ϩϩ[[>?0RTfhk ;@[epp37?&)fjzV`xTV  9;>@DFFJPRq+0 kp;@HPQ`es GPY` >@psvzz|`m ,,,, #$n$p$t$$C% 578<<??/++xx--@./O P S c f  ;jp|0.4'HJMPVXXZ]`Z]|------------------  &(.-%-'-'-----,.,0,^,!#$&*0J (*02359<DGHKMPPWW]cflptpsuwz}&*]afj EHMPWYY[[]]_}&!&!ee@E `eghj      ( * 0 2 3 5 6 8 9 < < > B G H K M Q Q Y \ ^ ^ f v ....//0000!0)080;04MNmp֦4@ .0/011122`2~2`|װנ ' 0 9  468<>>@ACDFOA0000@UW_oKUppQ R  *0-000 -;;gi{` r x  @ U X _ ͩЩ٩ީߩ 000011223W3foq-//         5 8 : ? H P X >AZazptvwz}/1VYY` Jnoq/M$$((@X`j 9 = = P P X a q     ( * 0 2 3 5 6 8 9 Y \ ^ ^ r t     ( * 0 2 3 5 9 = = \ ] _ a q q     ( * 9 = = X Z ` a     : = = N N T V _ a z  023@F@GIl*??PUZ]aaefnpuHJMPVXXZ]`Zlo  1@Q`lnp xPmpt T3EK#MOZ} EHMPWYY[[]]_}q q   !!!! !!!!!!$!$!&!&!(!(!*!-!/!9!>@ACDF=Pptv!:AZf  &(:<=?MP]-@BIPu'0c6@U`g 578<<?U`v   9            5 ` |        5 @ U ` r    H      # ''0E7&DDPrvv+ (*02359==PP]a4GJ/DD+ 2::PP\ .@@r  0FF`eghj #$C%0.4DFFh8j@j^jjjk/k@kCkckwk}kk@nnoDoPoPoooooppjp|TV  9;>@DFFJPR46NPnpC!"$$'')24799;;BBGGIIKKMOQRTTWWYY[[]]__abddgjlrtwy|~~֦4@ AZaz%,\bekwyq q   *!+!2!2!N!N!`!!`,,"0Z\d!:AZ7;IMO +0;@@DO6@U`g  &(:<=?MP]Фaz      !!##%%''))++--//11335578::<<>>@@BBDDFFHIKKMMOOQQSSUUWWYY[[]]__aacceeggiikkmmooqqssuuwwzz||~      !!##%%''))++--//1139<<?@BBGGIIKKMMOqqssww{}0_aacceeggiikkmmooqqssuuwwyy{{}}      !!##%%''))++--//`+kwy      !!##%%''))++--//1133557799;;==??AACCEEGGIIKKMMOOQQSSUUWWYY[[]]__aacceeggiikkmmooqqssuuwwyy{{}} '07@EPW`gp} ! !!!!!/!/!4!4!9!9!>@ACDF=Pptvfoq  &(:<=?MP]-@BIPuP'0c6@U`g 578<<?U`v   9            5 ` |        5 @ U ` r    H  # ''0E7&DDPrvv+ (*02359==PP]a4GJ/DD+ 2::PP\ .@@r  0FF`eghj #$C%0.4DFFh8j@j^jjjk/kckwk}kkoDoPoPoppjp|!"$$'')24799;;BBGGIIKKMOQRTTWWYY[[]]__abddgjlrtwy|~~֦4@ AZ      ""$$&&((**,,..0022446699;;==??AACCEEGGJJLLNNPPRRTTVVXXZZ\\^^``bbddffhhjjllnnpprrttvvxy{{}}      ""$$&&((**,,..0022:;=>AACFHHJJLLNNpprrvv/``bbddffhhjjllnnpprrttvvxxzz||~~      ""$$&&((**,,..1V      ""$$&&((**,,..0022446688::<<>>@@BBDDFFHHJJLLNNPPRRTTVVXXZZ\\^^``bbddffhhjjllnnpprrttvvxxzz||~~(/8?HMYY[[]]__ho!!!! ! !!!!!!!$!$!&!&!(!(!*!-!0!3!>!?!E!E!!!,.,`,`,b,d,g,g,i,i,k,k,m,p,r,r,u,u,~,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,@@BBDDFFHHJJLLNNPPRRTTVVXXZZ\\^^``bbddffhhjjll""$$&&((**,,..22446688::<<>>@@BBDDFFHHJJLLNNPPRRTTVVXXZZ\\^^``bbddffhhjjllnnyy{{}~!:'  @n_n4Mh  89;>@DFFJPl!<Up4Vn! 9 ? ? oK_pp0J#%')-Y[ : < > O Q W b c   < < > B G H K M Q Q p q u u   < < > D G H K M V W b c  > D F H J M U V b c  ; < > D F H J M W W b c 114:GN557799>?q+>VY^`bdgmqt]_24RSrs   +0;U^`|4Dks$7 ,,----*0/000ort} #'Ũ&-GS)6CCLM{} /vz      8 : ? ?   $ ' FP8F'4EFss,7>>;<>DGHKMWWbcflpt5F^^0@+,: 39;>GGQ[/68?16::<=?EGGjj0k6kQo~oooeimr{BD6;luu!#$&*DJPv      D F H J O T c f  @[^^    p  6::<=?GPY  ; ; > @ I L N O   > @   > > @ @ G H K L W W   A D   > @ F H J L W W >?+,1188;<VWbdgm#&)+0138UUWWaacdmr55;;=ACD$+45.0/0#$''èRS/034MM{{}},,EF,.2355>?ADGHKMWWbc57@AEE02;<>> !&&,.8899WX//>>Qo~oefmr pr@nn        oDoPo~ooooK_pp0J#%')-Y[ : : < < A H M M Q W b c   < < A B G H K M Q Q p q u u   < < ? ? A D M M V V b c   > @ F H J M U V b c  ; < A D M M b c 114:GN557799q~-0279:=>XY^`qt]_24RSrs   "'(229;VVX^``bbels|446:<<BBks,367 ,,----*0-000oot} %&ĨŨ&-GQ).1256CCLL|| /vz      8 : ? ?   $ ' FP8F'+-4ss/14467>>;<@@flpt8?BDFF^^3:==?@"%'+/79: 38;>GGQVY[068=??16::<=?EGGjj0k6koogi{BD6;luu!#$&*DJDPY x`l@j^j`jijnjoj`09`if o f o f o r w f o x ~ X ^ f x PY 3@Ii|FOPY@IPYp p t y P!!!!`$$$$v'',,00!0)080:011 2)2H2O2Q2_22222 )05Ш٨ Щ٩PY3@x #AAJJX_y        @ H } ~     X _ x      0 9 `~&QTRo6?PYPY0;PlPY$n$`jijPkYk[kaknn`xPYq 09`if o f o f o f o f o PY )@IFOPY@IPY )Ш٨ Щ٩PY0 9 fo6?PYPY09PYPY`jijPkYkPYY[[]^`!!!!00!0)080:0@tAAJJ$n$ r w x ~ X ^ p x *3i|p p t y P!_!!!`$$$$v'',,11 2)2H2O2Q2_22222053ux #X_y        @ H } ~     X _ x      `~&QTRe:;Zl[kaknn`xq oopP      #-/  Pz'`   H       ( * 0 2 3 5 9 < D G H K M V W \ ] _ c f w !#%*,/:;?@[]__{{}}~~Z_    jm 0>^^d e p p v v OOZ[:=JO`hmn56 DEZ`;?~ ' 0 C E Q S ^ } ~ # #)#*#h'u''''')))))),,,,p-p-...0.N.0000000000=0=00000 ss~~twΨϨ./__ͩީߩ\_ުߪ>?0RTacchhjk  ;=??[[]]_eooWW  ? ? P X     9 ?   UYGM@Ctu8=KO[[]]AC`l<>;;?FAEpqp$t$njojjj7k;kDkDknn^_kEkPkYk[kakckwk}kk`__? @ T T 34MO??--  ....:.;.@.@.0000000012XXcc ))]]}};;==F F ~ ~ # # # #*#*#i'i'k'k'm'm'o'o'q'q's's'u'u'''''''''''''))))))))))))))))))))))))))))#.#.%.%.'.'.).). 0 0 0 0 0 000000000000000>>6688::<<>>@@BBDDHHZZ\\^^ ==]]``cc    : : .... . . . ...!.!.@w          9 9 .... . . . ... . .!#%'**,,./:;?@\\~~Z_    jm 0>^^d e p p v v OOZ[JO`hmn56 DEZ`;?~  ' 0 8 ; > A C G Q S S U ^ ,,,,p-p-.... . .........*...0.9.<.?.A.A.C.N.00=0=000 ss~~twΨϨ./__ͩީߩ\_ުߪ00EFILPRTW_ahhjk  <;;?FAEpqp$t$njojjj7k;kDkDknn^_(([[{{::<<    E E } } ## # #)#)#h'h'j'j'l'l'n'n'p'p'r'r't't'''''''''''''))))))))))))))))))))))))))))".".$.$.&.&.(.(.B.B.00 0 0 0 000000000000000??557799;;==??AACCGGYY[[]];;[[__bb      0S__$$++<>^^``||~~uu   p p   O O y y ??446688@@ajt|D D R R z | !!!!! !!!!!!#!%!%!'!'!)!)!.!.!:!;!@!D!J!M!O!O!!!!# #(#+#&$@$J$$$%g''''''))))))s+v++++++,,....////0000 0 06070>0?00011111122*2G2P2P2`22222233MMƤ !(+69wy[[))bbdfii >>@@\\^^7?ywx  ??@psvzz|`m-0>ŨΨ٨$$   ?? 88iiP ^^``uu00 ![[>>@@++<>||~~D D R R z | !!@!D!K!K!!!!!!!!!!!!!!!!!!!!" #!#|#|#####%%%%%%o&o&'''''')))))))*0+D+G+L+))bbdf \\^^55OOoo p p   O O y y 446688@@ajt|!!!!! !!!!!!#!%!%!'!'!)!)!.!.!:!;!J!J!L!M!O!O!!!!!!!!!!!!!!!!!!!!!## ##"#(#+#{#}#####&$@$J$$$%%%%%%&n&p&g'''((+/+E+F+M+s+v++++++,,....////0000 0 06070>0?011111122*2G2P2P2`22222233MMƤ(+6799wy7?ywx  ??@psvzz|`m0YP+ JMO`j `lnprsPmpt ^`|ª۪ߪ oop    ( * 9 = D F H J M U V X Z ` c f o x  :@[GIlq0-g-o-p---+Ƥ  ( ) / / _ _ 00G( ( ) )  / / _ _ 00 ... (message truncated) {e{e{e{e...{e{e{e{er99N9~9N9N9N9N99N9N9N9N9N99N9N9N99%[Rj?$Dsp.1)"8 lN.w8!(E͙l[LB"͘{՛l 3yI]i'֠F"Οb9                   Status accessed after move.{e{eG:): :9::::ʼnLLLLΉLLLLLLLLLLLLLLLшLLLLLLLLLLLLLLLLLLLLL׉LLZLLLϋLLLLƋLLLLLLLLLLȈLLLLڈLZ%6YY|YYYYYYYYYYYYYYYYYYYYYYYYp.44f>?456789:;<=  !"#$%&'()*+,-./0123>456789:;<= ? !"#$%&'()*+,-./0123  @@@@@@@@@hHHHH@@@@@@@@@@@@@@@@@@(@  !"#$%&'()*+,-./0123456789:;<=>?@abcdefghijklmnopqrstuvwxyz[\]^_`abcdefghijklmnopqrstuvwxyz{|}~  !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ{|}~lHe10123456789abcdef000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff}q5 =-1eQJ$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ $$$$$$$  !"#$$$$$$  !"#$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$?*UUU$I8 I$ qfffa]tUUUQN/$IXiDDDB!>:8VUUgff櫪n۶r433F]VUUwb'mxwwyxx9Q^Cz.MoG<^Bܶm=<<}Wq?*UUUUUUUI$I$I888 袋. ؉؉ $I$I$ qqk(fffffffaatE]t YBUUUUUUUQNNKh/$I$IFXiDDDDDDD!B!>::88VUUUUUUgffffff櫪m۶m۶qq4333333]tE]VUUUUUU(vb'vb'ܶm۶mxwwwwwwyxxxxxx885P^Cyy.袋.7MoHzG;;%^Bn۶m۶m|a{|=<<<<<>>:::::88888VUUUUUUUUUUUUUUgffffffffffffff櫪n۶m۶m۶m۶m۶rqqqq433333333333333uE]tE]tE]VUUUUUUUUUUUUUUwb'vb'vb'vb'vb'm۶m۶m۶m۶mxwwwwwwwwwwwwwwyxxxxxxxxxxxxxx988886P^Cy 5P^Czyyyy袋.袋.袋.Mozӛ7Mo{GzGzG<;;;;&^B{ %^Bܶm۶m۶m۶m۶m{a{a|{{}||=<<<<<<<<<<<<<<}W|W|W|W|WqqqqqUUUU?3333*$I$qtEUUU;$I8^Cy 0 ,d! p= ^B{ I$ B|uPqUUUUUUUU?33333333*$I$I$qqqE]tEUUUUUUU;;I$I$I8885P^Cy 0 0 0 袋. ,d! p= ףp= ؉؉ %^B{ $I$I$ =B!B|PuPuPqqUUUUUUUUUUUUUUUU?3333333333333333*$I$I$I$I$I$qqqqq]tE]tE]tEUUUUUUUUUUUUUUU;;;;;$I$I$I$I$I88888P^Cy 5P^Cy 0 0 0 0 0 .袋.袋. ,d! YB,d! ףp= ףp= ףp= ؉؉؉؉ ^B{ %^B{ I$I$I$I$I$ ==!B!B!B|||uPuPuPuPuPqqqqqZ;SeefXv>ᮺSoΊߙZ+ؑFq6NMDzrxjmO\#Gf+vU0 k<ܭx݅KbSk3o=qԇˌɩH;mtv>~;Υ5]JBϲzΕIB롦`f8Gc^s b/6{+ջC-;eUkԜv,n% DHdW*͖>'ukq~X1[DLhrE=k0bЏij<)).%I qo(TJ2qn1'ƚp}x=MNF!&8#Xlo(,nGEyۤ̂}f;^@J6VchN"uO>ZS U1(\Qӑf-@s]Ύhr#j9NDGC"ɦq"|+TO9zc%C1X=c5u|&<%ˋ#w"_uv6]IYyX7 1 ?jgν߽B`A֋mSx@I̮Hh[-^zyx2_`&;\oJW`Wζ]y<$ˢhm} T]&Tpƌ$gDבv@6VM6+zCZ[l1r'0N1J<6^F&t~WOH8oꖐcZ mM`34,9gAwG`I,9b7]:%˅t׼dqӨMSf-bg`=ހx SaVfQp[y2Fqk󗿗φ}l]=c1Ü`w;մ-xF\* 4zWF׬XҘ#Tw,)Udwsj=>*b5*g81~8<<^p8G vڍԨ +eṗt;?Ȅ w UyϿՓ/8';J0F.D.>vJ'D(0Tx>ݔ20:?巧`ޥV֑f%Οk;i‡F k)XsۓPҸsUrsONdP#b}$lr]ΖKO|UraօuERAQ;H=_ Xfѷ?ϻӛa՟ȂS|n{ch i->!Qai&ޓv:k\me 3RYg+@8H۔PFfנ`2$^`#㏜ūksd|F ,.JDw`zd؋J|l_b-]:y4yx`K6C>'=z·)f&4*cR04`g|A8?,6)1c}e5C.?9ϛdpÂz}U&N(&۲oOFkȒ˻zܝYTWdeT-~)p$w6vkwąYڛSu᪢R|Dե-帟ߦ&/xs$^;V-k4y˼׷&62$C1]?Ɣ}t_|Hi"RD¢Ag k`ŗ`鸶8>G#g$ v6ΛpD’s繹;Hw(J2R l (_S#Y7yHD"'k6-w꺔R̆9'd҉>+Zꌤ11eU%>_Un* du.=Ķ{sLuZ(eMq3{?_xۏ|Vsﭚ'vcY~S|qޝh b!q&Ui ;+*\[z@g7._⼺;1awl}9U"Suu\TҒsi$$wÿ-ԴJbڗ;5 H~tڝXv%QӮR;uD'm,{tPd弥a}J]I62w[Mľ٬:|\ 5$KB.ˈPo ̼$ 믾ۗ6A_paw̫BzՔֵil7#GGŧ+eX;On ףp= ף@P$ (k@C*焑 1_.@v:k #NJbxz&n2xW ?hӠ@aQYȥo: '΄ x9? 6Ngɖ"E@|op+ŝL67V߄\l: {Η H½ Pv1P?%O7и'ƫCưo\{2~]n0b/5E=!֕C)L{QF$+v؈Sﶓh髤8AqfcQlN@< ePKПDǕh"!9oj%p E"&'O֪"Nj+w 3;LզIxŠ'V:qCN'1c$_E^69uD]ȩdLJ:݈MXd슠p`~Ȍ8eްݫ--{9.ڇz贙#"׬+֫* eǃR?Vʸ&ϫ ^xa Z^9wuܠLqm_:JIxX{-KqB]e wet _jhvjE¿;JЕ(aʩ]D&r<uX,iM.wzU?7ߎ~S^=lX![qgpdڲX4A4>QԞ2#Ek0SF|n_OIwmU c*O~MQ5Fe B‹_FiYW7/-k}{x 2cPME fzBΨ?]ߌG#٨l*CwyT^Tj4'RE]dB!ܺ~IriE֒Pś[[26hd#Dvja5ӨŹ7hdZk"!"k*=]S5Ȍ:Bxri %ь[e8y/JXg].\*߈=9tauGѹ"+_vI}Azdұ_F;#J ,(ن"y(+EWAu-/ӷ .|]|%94›.²}DKa2(x5˲az®k[rc? <<Ϗ( CYxķapeFYy "WˇuξTSܷ.$*(VyuUD`גjU'9pbCFTfAXjiQ.A"ZҪ/ iyMh, XEa75 .|BǼ]R5VS!{Z֨Y dp>ԅ.7JŇԱ YJ^M:0ܵ$~sީq]V ukP/ԾDwZ6q>D[Z † yXr9Mnb-!= 4f|N bf'bK=`?wo!M8UHj`FS*~mBDt.SU'~U5yX4/UKoy^F {fg@'᷂g$m\,nsXjPu9BRm;Ӧ{d JU\HH/&$ڔ;X=vnd KvT }]/L|]C5;ӯmJ{ A5bKc]BcyBY{[o>B,S*xP(d$5V6w?nY{U(ω/3!v]?Skuz(YI\ٻ-qdϪMyUDrĆ"u(1k62c}1]{$crmYGBtRl'.gSۣ(Ͳ"a_ Wky;t60˜D۾D-HUJM-u]cxZb4|qM=5] Y`tK?p8+'Fc{BձL;J_ r{~TC#OaԔdӍ@EHP$W $֭; ץLL!Lϟ^gi GvsA)XRqhf@q0U@HL|jPZ٦cmJ2NY)?@|Y+X0t<6Rj'Dܹ^:)Ht!d4[I4 B@OQ]=Q* rGVieg!Y,S>ih0:BA9JFE߃2kde2?/n{S 6__,t7#8H,,Z"sM`>&1RpIFw3k.dsƣz=N\ b OInHãۉZuZF)#X񗳻,.X}jtWN¨labM w.t V$ ilc?ʲ|0]|k:BzkSҘl{qrIdGϛ= NJ9QX*r(eN2)b"=s_R?Z}5&ϰʵݓCx LKKe^y JFMENKPq/^LZN'sv]oj:6U .tE+ɋ ;D7@n j B-#JF"GV+~xYZUhaέ[ù?Brgɟ Th;#)!e ri>[0 Ho^+Ʊ 68c%8Z~HWQZ-elE1Xd˞?/"=~Frj[ŸG$bGט#?: 쎬$0hS-JyR] X`Unp" ŗ{`=jP|}B`wY78Uf/Flk=;K## ^!J5T.wAP~ dDKN^> ;ZꊭJgpꀍMy%0CXn Snʋ}4Ud^w*J6"݃:B5yrjē<u8c$S^ӄ2l1+ϟCb.2:‡ yh.L K7Soe(E.D?H9iMZDs00 h1=|6+ Liv2=I?Sr3܀N@a&1Zdp= ףp=@PM (l2@<KԆDU]ʡZ@=J͜m#, 7 4"&EA+p@_v <ji zEz ꎀ֘ErPG+ڦG$g_mA!z j+R-DvSG6e,Bb~:MBާe/눟Uc%kqkD+Sq`1FUAck-~<ƢDRs\dj:EE='WTKxT6ݧjWQDme%Jd ]=U B$ /Sm mhDi}n܁7{S⻅xtm'z81{D&v't#qINgV-ae(R63s4aF]'A1qRuqg~ӄbhx{RVp @v`5ГjCMĸJpz3zr\L.YOtsyob+EV݊6+>m7̶Ȣ" @K ж%dD.\G_,>%tw(N/opk UMlcNTGtT%k$MT¶)s$t-ɒez|/~e>"t*U_':7h*-Ciu+-s)b);{Is!6p$ӌ#@J286>C &|ca/5_([cрyb2üק+GٍQO3VnG/ X'a'͜x8Vc GtlXC 7T%`i *.GATW3LR?#wXHv W}q5 =-1eQJsH d'@Bʚ;egIٗ-$df)) *{4g4_6`ށV]dAzs@Oo s-סv=-h1??w{kqgfz̹sJ!@zG}'([#ӂ2W%ei,m$M]>fzZx6Td4@rjA+œx}++lKuu{Xb4z U/";Oe[i.5_ /lu8]-"R5*&3a*hT}]1u^I1W)C᢫!bCc6#W%BTfAScd1 $7e3&DĦ㋴yi&ag9}9 P&th[ PwY\Ҟm#u14*)2AU*zZp@ *<'qmV%SʓNЃ"sU8=?0`Xj!faILѹ}K2i\ȞQи! ]Y곉ՄER7i%3<M5\iФļ[L" O(@9HW~ uN^PkLk5D70u'_B'+,]Rܚ揱w,uK@I9˘O.xoǜsLݺCzO .O/ [d„Z 6 z}d7Ab3)M}T630[gwkGR,L("Y)ʝL1 =}).SG2ܜMAw $' wIil\\+`J 0߶fO;<A+h{]˱B@+ ZqW>/CE I4M*LLB5$:g18];> ̨xz"%QY?ˡ=AU:dDe# vrbd3jc"#*TnGpڌ@V߻39}{5p)Db87tޝUcr[p'x:`D^sՇ pƎT"JV|`̑]V Pu iEL#fI w}:-C#Eo&F*X*S.öe[e+ @ȻX\:\ pFIDE}Bs=<|F or<^\^I l\襐k_Wlk_ 7o>WfHH!T9 f f  xww@xABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_ьFьь3e&ߌߌߌߌߌߌߌߌ++F+uP̑`PPؔvvv[[[[[[[[vH וו4X  וt>>>ؘe########tt>ٜYwwwwwwwwٜٜuM~DMMJJZ=11111111J\::ҫͪͪͪͪͪͪͪͪ\\:   csdiouxXfFeEgGaAnpv{[e'4''XcsdiouxXfFeEgGaAnpv! !2!!@!G!9!N!! !csdiouxXfFeEgGaAnpv----.-J-C----------------- ---------5--Q-X-t-m---<-----'--_---f---{-- -  h{e{e{e{e;Zx0NFixed/UTC0123456789TVFhXzIZ*\ ^f_aͰcetg;Zx0Nm<[y1On0123456789\UddO}}(~(~(~}(~(~(~ ~wRRRRS;T)T2T*FFFFFFF9]FFFFFFFFFF3YY\(a8Bk&JƲ} .;soL;Ã2Ora/4(4pm ȧ rcVM9ޣBOh`jȲKa$ zy:HOʬ056Xrډwp O5W^W=Uecn_3$aezaĮu cc/}>$Ν3dA6ᩕ6-΃Ź<=' VܯJXT0 0 *H @0!0 +0-0  `He 010  `He 00A0  `He0@0Q0  `He@3`|M;>{_#p^xF(yAά?vJOҲ.3޽Ab"?{^*3/526o4ϸQ(ռdB >,ضJ9J@.Xn/M1Cc*鷋3N|/r FE{3`SNB. h'=6xJ{C^t9ۃoâ:J3L;`oJu}Yd3 ょX^'^fϹ`ؒWFu!%ҭ"xlMiڡ)<5Nx6z5Ad&/+Ʋ]FvZȵ%RGmmq=L; [u|*|5?[6_[nSsms ӼKFHtls13  @6 %)+/5;=CGIOSYaegkmq %379=KQ[]agou{  #-39;AKQWY_eikw)+57;=GUY[_mqsw %'-?CEIOU]ci  ')/QW]ew #+/=AGIMSU[ey '7EKOQUWamsy!#')3?AQSY]_iq   # % + / 5 C I M O U Y _ k q w   ! 1 9 = I W a c g o u {      # ) - ? G Q W ] e o {   % / 1 A [ _ a m s w      ! + - = ? O U i y !'/5;KWY]kqu}  %)1CGMOSY[gk!%+9=?Qisy{ '-9EGY_cio #)+17AGS_qsy} '-7CEIOW]gim{!/3;EMYkoqu %)+7=ACI_egk} %39=EOUimou #'3A]cw{57;CIMUgqw}13EIQ[y!#-/5?MQik{}#%/17;AGOUYeks '+-3=EKOUs !#59?AKS]ciqu{} %+/=IMOmq 9IKQgu{@+!=*\\E)>}Q_iz)K?1YȫVHq|_jayQ%cʹyL$oIyFYk+9E( V-f+"s)jzH X-7Mc $1-=G):H>JI ԕh&(z[?!9+ (Ifu=8֠x nMT:YTd+Na#/)­:MK=#{P.V筋hR@i9&Z«x+-Aq2Lzح).  w⊓K7 !+O`K+#@ d8oGɵ;Х Hk/QM|~TܺsEy8^WrI.#n>nuBϼgam[E -I-=c1r?M!Yoѓ;Pݞ5Wڟ@2N!et&ZJF(O;J_D a9QC8֟\b zL9K3 4ͫ$z>/E.cXOe5ꏢx﹚؝^`)I? xDZ-9mQwB[MM<0y_u%bw+syU7v_ VWS\J%!݈҅]%qҠ/O4bl\ ,W:Afai^/ \a`M =R7{oH7GhV jF4c4ya'ı=Z}L| &CL=W7xb sok[r% D¨bg܏Úcp8եZ$!ԇasqsvewS#>D!F,+#<[Ŋg=жܾLnX '5 <RV֏'Op34wWbݽ;gž|~WfQmYR:0t:ӯ'<уś]a`8jX6I+e_ 뮲'U sz1F:Ȼ/[qn5mVJ$jAVj&{$mk@ B>?i _к;;sr gPD4yVjhlYD1 0_2Pu]1uÙg/|?rH0#+Z%4Yoob"& Cj!yٴP>~Ƒ7@,Be6 AI+9ŵhpYûܚѪDq U/t4q@<*2CzA`Q!K_Ujg]J/Zc~Hq)%N<7}ުu} }8w lI̞tlFϥήz~{j䶖5#8kKmNS(Zȕr b;a)cA WXЕݖ=D@z+i.O4Ût0BVZ BH'IU*seFL_RY)?Ok[o_A~,3UvcQGA( r&녧ʧԩX SُYacDkbrd"a󈲞~>4=1OOr`43I1H:UepIVJ yRFEOTmiډ;"@(bx'cMYKz3Jy! PgJEGM'r&s^,DݩsعsnlSq!y;lrv]pνwʓZ m1d†IU^n ^I> S9J5{A'k8 Db-&CLA,Υ+bCg7Ɇ&BzG g A$MD vb7GeO %en6ӂ!‚D߂5l]V/~/k" QQv1 ԇ_ ypCkㅝl-"l\u@VegoCmY^´<a9.Gg^? a+UB:R9>3 MdܖH3!#wp(zpVia +8J}iQ v#=?eǔ 81K6 [=Shɞunb TBGuʲnJ xE^,vۡ>7ΒYR !+"D +xYQ@Z8Gaё*#Քy-ּ;X_W Ć۷ln}q/]GeTл`h$>30_e-ͶqbH #+$3>&KQUkqHZ؄ !/wP“[klMJȮTQwą6h3ێ^(ZD"2kr.:+^$l6'W `'_BoringCrypto Key Ŭ%Dih/wcj6OUVFAbAz&Ɔ0_WP:^@4z$WeX=OG` {QkylHAVӷ%oo@uʯHUNpy} x{Ս>Kd@/a~VR}Zc. XH- 45W` pļרhr4R !v7O9 9SؖxY<=D^²ЩjE^V@HE/ٺbcU X)8 m@( I1mF/J?LxzO@µ JS m 'BCMPersonalizationBCM DRBG KAT AD +IvQV}Dv^e5lo"CN-9C;W#?q ոgz9n%l# 酛lmI5!N.~kFEv5v:ylwQπ &K W K| j4JЃɁ*FIPS self test 趐mQycQe}$IA{ʕkT^*`#A|s ϫ$j*av(:cmsu.٨y}5b >}tjN),M{5/K~$fӓ*tK__D&.zde␔ a }rg e/m7x!6ύ"=S=kӝHS4ݿ N,HnlA >p]\f1!uZtL!`Dn4\S~C\\mU\Rz;jcٺ_a7w b8.~ L_)6(_H& U2*3Lti7yԎ=q8u1%1Zd.)R}EɠNm:-mW2##3Q}1R2eM4\S%gD9:bwɢ:&[8H!b@<&0#?h146:мNr~uߡpHQAt>/4e4MUKET u~M 3V9Utґỗ* 8Gv?\ I;L>\X'5 zAR߭L %bGwHr-_ƞZƢ k0|cCT^, ^FCSNsU/,=,!θ_O!866Gu.uX鎣i\;UdwSxu :w7"Tg^>D7)Vl0MualbƨNJ`bLt1-1Xk?>'FzߧfAex@\և)DOΈxI'N 0s2C/`ڮ+0 {JyeE*n0HWy}gGR cN=.6ے Ӆ]Rch'Oh4]0bLk ţԛTOLLzŸȎuu%;̸rlI DM@ak> w&9v=L>[XvxvI\KA'u(\0iP)һDOWQL 4P2{5j&LJgO:`HY9|4", ‘i*Ln@>ii}\xmA-r?qcOF0pQ PCX{P}ݵ*M#"ApMq >Z->NoY<s/T 0#+N+2Ej#/*$H_d~)N\75 U&#\CQ+ KV$䥨ĹH}* ܾ Ry| +`D&[-n,pl㿳Ta{cRhvsr402"7*no6!1۔c/I|A@.J aNfz4Dݲai=ҭ妁⯣ƘdIlPVv?P~Ynva,l$.Ft0RwOk{Xd9/ "~p*?tjlXn?㷚/mLp7q&VC7W_ `h؄Ht+74|@tuĈubS'$ ՗X4Dik_$K~w]7gȽOt&(=o#|pj[m\LNPkלV1j-6n纂pܽs|W4WP+5*ӦXS]= vW78b%9`n'T⽻ۇv! d F7;5e"IyܜYpSBoN%K-Z'Cfz ^:xM2,8|Cظ$ gzT~N,&q0l̬%7?$*^=C6@j n.pWS*NZ˫'و@{_Yxn& ^Ok{Xd9/ "~p*?YchzQRȘ=/<0y_u%bw+syU7v_.kZ3+gxũGaJ m&R`6%km`^ VWS\J%!݈҅]%q߽)bߜؐ0x.! 4H0lmmmimDRe0tH2 2 3 3 4 4 5 5 / 6 =6 M 7 Z7 j 8 u8 9 9 : : ; !; 2T2`3y344 556ܖ677)8G8V9r9::;ӗ;<< ===>L>c?v? @@AABBCC&D;DOEaE5FFvGGHHIIJs 22 3344 522334425J5r667Ԑ788 99.:T: ;;<<==>>-?F?^@ @wABAaBBCC(DD_E2 2 2$2$3$Ò3$4$4$5$5$6$)6$77$E7$N8$[8$n9$ 2(2( 3(3( 4(4(5(5( 6(22,2,33,3,.34,@4,[35,m5,36,:6,37,ֳ7,48,+8,I49,Y9,k4:,:,4;,;,<,<,4=,Ɠ=,5>,,>,D5?,\?,5@,@,5A,A,^B,B,mC,C,3D,iD,3E,E,94F,ôF,4G,G,3H,202030830W40q40x5050606070Հ70808090290G:0[:0o;0;0<0<0=0؁=0>0>0?0$?03@0?@0TA0dA0{B0B0C0C0D0D0E0E0F0:F0rG0G0H0H0I0ڃI0J0J0K0+K06L0GL0SM0iM0wN0N0O0O0P0P0Q0Q0 R05R0MS0aS0yT0T0U0ȅU0V0V0W0 W0X05X0>Y0SY0fZ0uZ0[0[0\0\0]0 ]0)^0M^0c_0m_0`0J`0Wa0:a0b0 24ċ24 3434 44]44A 54 542<(2< ?<6?<@<@< A<ӍA<B@g>@?@?@@@Ҝ@@ A@(A@AB@TB@hC@C@D@D@E@ԝE@F@F@&G@MG@H@xH@I@I@J@ޟJ@K@#K@9 L@KL@l M@M@ N@ƠN@w!O@O@\"P@pP@"Q@"R@עR@"S@S@#T@:T@e#U@U@#V@V@#W@ W@ $X@GX@v$Y@Y@Z@$[@[@$\@2\@?%]@d]@%^@^@%_@Х_@%`@`@a&a@a@&b@b@'c@#c@:'d@fd@'e@e@'f@ڧf@(g@Jg@(h@h@(i@i@4)j@Tj@*k@k@+l@l@N.m@vm@.n@n@J/o@o@/p@p@Mq@įq@/r@ r@%0s@s@90t@St@d0u@xu@0v@[v@0w@w@1x@?x@V1y@ly@1z@z@1{@ұ{@2|@ |@(}@}@1~@W~@:@ @!@b@"@@.@@2$@K@v@@%@(@%@/@.@v@@]@B@ϛ@M+@Ǚ@1@_@%@@^@@ @@q(@D@&(@V@5+@%@@@/@@@@f@&@x@.@@"@ݠ@@榘@,/@@H#@㙚@@@0@7@\!@*@@ա@!@מ@!@@E!@*A*A)ABAp-A *BB*B~Bp) B B) B B) B` B- Bv B), B` B,BS-BլB,B,B.!B6-&B-+B+B-,B,B+-B-B+.B-0Bz0Bx2D2D3D3D4DŇ4D5Dև5D6D6D7D7D-8D58DM9D[9Dn:D2H2H3HƓ3H2L2L&3LF3L4L4LR5L`5L|6L6L7L7L8L8L9L*9LC:L\:L5;Lu;LL>L>?L[?L@L@L ALALBLדBL52Pܵ2P53P3P64P*4P865PN5P`66Pv6P67P7P68P˶8P69P9P7:P*:PB7;PT;Pp7Pٷ>P7?P?P7@P@P(8AP7APQ8BPnBP~8CPCP8DPiDP8EPݸEP8FPFP9GP-GPS9HPaHPt9IPIP9JPJP9KPKP:LP3LP`:MP}MP:NPNP:OPݺOP:PP)PP:QPQPE:RP2X2h2h 3h3h4h4h 5hY2l2ld3l}3lf 2t2t 3t 2x2x 3x3x 4x4x/ 5xQ5xf 6x|6x 7x7x 8xҊ8x 9x9x :x!:x+ ;x?;xV x>xt2|2223324@4^25526O627-728o829ASN1_LENGTH_MISMATCHAUX_ERRORBAD_GET_ASN1_OBJECT_CALLBAD_OBJECT_HEADERBAD_TEMPLATEBMPSTRING_IS_WRONG_LENGTHBN_LIBBOOLEAN_IS_WRONG_LENGTHBUFFER_TOO_SMALLCONTEXT_NOT_INITIALISEDDECODE_ERRORDEPTH_EXCEEDEDDIGEST_AND_KEY_TYPE_NOT_SUPPORTEDENCODE_ERRORERROR_GETTING_TIMEEXPECTING_AN_ASN1_SEQUENCEEXPECTING_AN_INTEGEREXPECTING_AN_OBJECTEXPECTING_A_BOOLEANEXPECTING_A_TIMEEXPLICIT_LENGTH_MISMATCHEXPLICIT_TAG_NOT_CONSTRUCTEDFIELD_MISSINGFIRST_NUM_TOO_LARGEHEADER_TOO_LONGILLEGAL_BITSTRING_FORMATILLEGAL_BOOLEANILLEGAL_CHARACTERSILLEGAL_FORMATILLEGAL_HEXILLEGAL_IMPLICIT_TAGILLEGAL_INTEGERILLEGAL_NESTED_TAGGINGILLEGAL_NULLILLEGAL_NULL_VALUEILLEGAL_OBJECTILLEGAL_OPTIONAL_ANYILLEGAL_OPTIONS_ON_ITEM_TEMPLATEILLEGAL_TAGGED_ANYILLEGAL_TIME_VALUEINTEGER_NOT_ASCII_FORMATINTEGER_TOO_LARGE_FOR_LONGINVALID_BIT_STRING_BITS_LEFTINVALID_BIT_STRING_PADDINGINVALID_BMPSTRINGINVALID_DIGITINVALID_INTEGERINVALID_MODIFIERINVALID_NUMBERINVALID_OBJECT_ENCODINGINVALID_SEPARATORINVALID_TIME_FORMATINVALID_UNIVERSALSTRINGINVALID_UTF8STRINGLIST_ERRORMISSING_ASN1_EOSMISSING_EOCMISSING_SECOND_NUMBERMISSING_VALUEMSTRING_NOT_UNIVERSALMSTRING_WRONG_TAGNESTED_ASN1_ERRORNESTED_ASN1_STRINGNESTED_TOO_DEEPNON_HEX_CHARACTERSNOT_ASCII_FORMATNOT_ENOUGH_DATANO_MATCHING_CHOICE_TYPENULL_IS_WRONG_LENGTHOBJECT_NOT_ASCII_FORMATODD_NUMBER_OF_CHARSSECOND_NUMBER_TOO_LARGESEQUENCE_LENGTH_MISMATCHSEQUENCE_NOT_CONSTRUCTEDSEQUENCE_OR_SET_NEEDS_CONFIGSHORT_LINESTREAMING_NOT_SUPPORTEDSTRING_TOO_LONGSTRING_TOO_SHORTTAG_VALUE_TOO_HIGHTIME_NOT_ASCII_FORMATTOO_LONGTYPE_NOT_CONSTRUCTEDTYPE_NOT_PRIMITIVEUNEXPECTED_EOCUNIVERSALSTRING_IS_WRONG_LENGTHUNKNOWN_FORMATUNKNOWN_MESSAGE_DIGEST_ALGORITHMUNKNOWN_SIGNATURE_ALGORITHMUNKNOWN_TAGUNSUPPORTED_ANY_DEFINED_BY_TYPEUNSUPPORTED_PUBLIC_KEY_TYPEUNSUPPORTED_TYPEWRONG_INTEGER_TYPEWRONG_PUBLIC_KEY_TYPEWRONG_TAGWRONG_TYPEBAD_FOPEN_MODEBROKEN_PIPECONNECT_ERRORERROR_SETTING_NBIOINVALID_ARGUMENTIN_USEKEEPALIVENBIO_CONNECT_ERRORNO_HOSTNAME_SPECIFIEDNO_PORT_SPECIFIEDNO_SUCH_FILENULL_PARAMETERSYS_LIBUNABLE_TO_CREATE_SOCKETUNINITIALIZEDUNSUPPORTED_METHODWRITE_TO_READ_ONLY_BIOARG2_LT_ARG3BAD_ENCODINGBAD_RECIPROCALBIGNUM_TOO_LONGBITS_TOO_SMALLCALLED_WITH_EVEN_MODULUSDIV_BY_ZEROEXPAND_ON_STATIC_BIGNUM_DATAINPUT_NOT_REDUCEDINVALID_INPUTINVALID_RANGENEGATIVE_NUMBERNOT_A_SQUARENOT_INITIALIZEDNO_INVERSEPRIVATE_KEY_TOO_LARGEP_IS_NOT_PRIMETOO_MANY_ITERATIONSTOO_MANY_TEMPORARY_VARIABLESAES_KEY_SETUP_FAILEDBAD_DECRYPTBAD_KEY_LENGTHCTRL_NOT_IMPLEMENTEDCTRL_OPERATION_NOT_IMPLEMENTEDDATA_NOT_MULTIPLE_OF_BLOCK_LENGTHINITIALIZATION_ERRORINPUT_NOT_INITIALIZEDINVALID_AD_SIZEINVALID_KEY_LENGTHINVALID_NONCEINVALID_NONCE_SIZEINVALID_OPERATIONIV_TOO_LARGENO_CIPHER_SETNO_DIRECTION_SETOUTPUT_ALIASES_INPUTTAG_TOO_LARGETOO_LARGEUNSUPPORTED_AD_SIZEUNSUPPORTED_INPUT_SIZEUNSUPPORTED_KEY_SIZEUNSUPPORTED_NONCE_SIZEUNSUPPORTED_TAG_SIZEWRONG_FINAL_BLOCK_LENGTHLIST_CANNOT_BE_NULLMISSING_CLOSE_SQUARE_BRACKETMISSING_EQUAL_SIGNNO_CLOSE_BRACEUNABLE_TO_CREATE_NEW_SECTIONVARIABLE_EXPANSION_NOT_SUPPORTEDVARIABLE_EXPANSION_TOO_LONGVARIABLE_HAS_NO_VALUEBAD_GENERATORINVALID_PARAMETERSINVALID_PUBKEYMODULUS_TOO_LARGENO_PRIVATE_VALUEUNKNOWN_HASHBAD_Q_VALUEBAD_VERSIONMISSING_PARAMETERSNEED_NEW_SETUP_VALUESBIGNUM_OUT_OF_RANGECOORDINATES_OUT_OF_RANGED2I_ECPKPARAMETERS_FAILUREEC_GROUP_NEW_BY_NAME_FAILUREGROUP2PKPARAMETERS_FAILUREGROUP_MISMATCHI2D_ECPKPARAMETERS_FAILUREINCOMPATIBLE_OBJECTSINVALID_COFACTORINVALID_COMPRESSED_POINTINVALID_COMPRESSION_BITINVALID_ENCODINGINVALID_FIELDINVALID_FORMINVALID_GROUP_ORDERINVALID_PRIVATE_KEYINVALID_SCALARMISSING_PRIVATE_KEYNON_NAMED_CURVEPKPARAMETERS2GROUP_FAILUREPOINT_AT_INFINITYPOINT_IS_NOT_ON_CURVEPUBLIC_KEY_VALIDATION_FAILEDSLOT_FULLUNDEFINED_GENERATORUNKNOWN_GROUPUNKNOWN_ORDERWRONG_CURVE_PARAMETERSWRONG_ORDERKDF_FAILEDPOINT_ARITHMETIC_FAILUREUNKNOWN_DIGEST_LENGTHBAD_SIGNATURENOT_IMPLEMENTEDRANDOM_NUMBER_GENERATION_FAILEDOPERATION_NOT_SUPPORTEDCOMMAND_NOT_SUPPORTEDDIFFERENT_KEY_TYPESDIFFERENT_PARAMETERSEMPTY_PSKEXPECTING_AN_EC_KEY_KEYEXPECTING_AN_RSA_KEYEXPECTING_A_DH_KEYEXPECTING_A_DSA_KEYILLEGAL_OR_UNSUPPORTED_PADDING_MODEINVALID_BUFFER_SIZEINVALID_DIGEST_LENGTHINVALID_DIGEST_TYPEINVALID_KEYBITSINVALID_MGF1_MDINVALID_PADDING_MODEINVALID_PEER_KEYINVALID_PSS_SALTLENINVALID_SIGNATUREKEYS_NOT_SETMEMORY_LIMIT_EXCEEDEDNOT_A_PRIVATE_KEYNOT_XOF_OR_INVALID_LENGTHNO_DEFAULT_DIGESTNO_KEY_SETNO_MDC2_SUPPORTNO_NID_FOR_CURVENO_OPERATION_SETNO_PARAMETERS_SETOPERATION_NOT_SUPPORTED_FOR_THIS_KEYTYPEOPERATON_NOT_INITIALIZEDUNKNOWN_PUBLIC_KEY_TYPEUNSUPPORTED_ALGORITHMOUTPUT_TOO_LARGEINVALID_OID_STRINGUNKNOWN_NIDBAD_BASE64_DECODEBAD_END_LINEBAD_IV_CHARSBAD_PASSWORD_READCIPHER_IS_NULLERROR_CONVERTING_PRIVATE_KEYNOT_DEK_INFONOT_ENCRYPTEDNOT_PROC_TYPENO_START_LINEREAD_KEYSHORT_HEADERUNSUPPORTED_CIPHERUNSUPPORTED_ENCRYPTIONBAD_PKCS7_VERSIONNOT_PKCS7_SIGNED_DATANO_CERTIFICATES_INCLUDEDNO_CRLS_INCLUDEDAMBIGUOUS_FRIENDLY_NAMEBAD_ITERATION_COUNTBAD_PKCS12_DATABAD_PKCS12_VERSIONCIPHER_HAS_NO_OBJECT_IDENTIFIERCRYPT_ERRORENCRYPT_ERRORERROR_SETTING_CIPHER_PARAMSINCORRECT_PASSWORDINVALID_CHARACTERSKEYGEN_FAILUREKEY_GEN_ERRORMETHOD_NOT_SUPPORTEDMISSING_MACMULTIPLE_PRIVATE_KEYS_IN_PKCS12PKCS12_PUBLIC_KEY_INTEGRITY_NOT_SUPPORTEDPKCS12_TOO_DEEPLY_NESTEDPRIVATE_KEY_DECODE_ERRORPRIVATE_KEY_ENCODE_ERRORUNKNOWN_ALGORITHMUNKNOWN_CIPHERUNKNOWN_CIPHER_ALGORITHMUNKNOWN_DIGESTUNSUPPORTED_KEYLENGTHUNSUPPORTED_KEY_DERIVATION_FUNCTIONUNSUPPORTED_OPTIONSUNSUPPORTED_PRFUNSUPPORTED_PRIVATE_KEY_ALGORITHMUNSUPPORTED_SALT_TYPEBAD_E_VALUEBAD_FIXED_HEADER_DECRYPTBAD_PAD_BYTE_COUNTBAD_RSA_PARAMETERSBLOCK_TYPE_IS_NOT_01BLOCK_TYPE_IS_NOT_02BN_NOT_INITIALIZEDCANNOT_RECOVER_MULTI_PRIME_KEYCRT_PARAMS_ALREADY_GIVENCRT_VALUES_INCORRECTDATA_LEN_NOT_EQUAL_TO_MOD_LENDATA_TOO_LARGEDATA_TOO_LARGE_FOR_KEY_SIZEDATA_TOO_LARGE_FOR_MODULUSDATA_TOO_SMALLDATA_TOO_SMALL_FOR_KEY_SIZEDIGEST_TOO_BIG_FOR_RSA_KEYD_E_NOT_CONGRUENT_TO_1D_OUT_OF_RANGEEMPTY_PUBLIC_KEYFIRST_OCTET_INVALIDINCONSISTENT_SET_OF_CRT_VALUESINTERNAL_ERRORINVALID_MESSAGE_LENGTHKEY_SIZE_TOO_SMALLLAST_OCTET_INVALIDMUST_HAVE_AT_LEAST_TWO_PRIMESNO_PUBLIC_EXPONENTNULL_BEFORE_BLOCK_MISSINGN_NOT_EQUAL_P_QOAEP_DECODING_ERRORONLY_ONE_OF_P_Q_GIVENOUTPUT_BUFFER_TOO_SMALLPADDING_CHECK_FAILEDPKCS_DECODING_ERRORSLEN_CHECK_FAILEDSLEN_RECOVERY_FAILEDUNKNOWN_ALGORITHM_TYPEUNKNOWN_PADDING_TYPEVALUE_MISSINGWRONG_SIGNATURE_LENGTHALPN_MISMATCH_ON_EARLY_DATAALPS_MISMATCH_ON_EARLY_DATAAPPLICATION_DATA_INSTEAD_OF_HANDSHAKEAPPLICATION_DATA_ON_SHUTDOWNAPP_DATA_IN_HANDSHAKEATTEMPT_TO_REUSE_SESSION_IN_DIFFERENT_CONTEXTBAD_ALERTBAD_CHANGE_CIPHER_SPECBAD_DATA_RETURNED_BY_CALLBACKBAD_DH_P_LENGTHBAD_DIGEST_LENGTHBAD_ECC_CERTBAD_ECPOINTBAD_HANDSHAKE_RECORDBAD_HELLO_REQUESTBAD_LENGTHBAD_PACKET_LENGTHBAD_RSA_ENCRYPTBAD_SRTP_MKI_VALUEBAD_SRTP_PROTECTION_PROFILE_LISTBAD_SSL_FILETYPEBAD_WRITE_RETRYBIO_NOT_SETBLOCK_CIPHER_PAD_IS_WRONGCANNOT_HAVE_BOTH_PRIVKEY_AND_METHODCANNOT_PARSE_LEAF_CERTCA_DN_LENGTH_MISMATCHCA_DN_TOO_LONGCCS_RECEIVED_EARLYCERTIFICATE_AND_PRIVATE_KEY_MISMATCHCERTIFICATE_VERIFY_FAILEDCERT_CB_ERRORCERT_DECOMPRESSION_FAILEDCERT_LENGTH_MISMATCHCHANNEL_ID_NOT_P256CHANNEL_ID_SIGNATURE_INVALIDCIPHER_MISMATCH_ON_EARLY_DATACIPHER_OR_HASH_UNAVAILABLECLIENTHELLO_PARSE_FAILEDCLIENTHELLO_TLSEXTCONNECTION_REJECTEDCONNECTION_TYPE_NOT_SETCOULD_NOT_PARSE_HINTSCUSTOM_EXTENSION_ERRORDATA_LENGTH_TOO_LONGDECRYPTION_FAILEDDECRYPTION_FAILED_OR_BAD_RECORD_MACDH_PUBLIC_VALUE_LENGTH_IS_WRONGDH_P_TOO_LONGDIGEST_CHECK_FAILEDDOWNGRADE_DETECTEDDTLS_MESSAGE_TOO_BIGDUPLICATE_EXTENSIONDUPLICATE_KEY_SHAREDUPLICATE_SIGNATURE_ALGORITHMEARLY_DATA_NOT_IN_USEECC_CERT_NOT_FOR_SIGNINGECH_REJECTEDECH_SERVER_CONFIG_AND_PRIVATE_KEY_MISMATCHECH_SERVER_CONFIG_UNSUPPORTED_EXTENSIONECH_SERVER_WOULD_HAVE_NO_RETRY_CONFIGSEMPTY_HELLO_RETRY_REQUESTEMS_STATE_INCONSISTENTENCRYPTED_LENGTH_TOO_LONGERROR_ADDING_EXTENSIONERROR_IN_RECEIVED_CIPHER_LISTERROR_PARSING_EXTENSIONEXCESSIVE_MESSAGE_SIZEEXCESS_HANDSHAKE_DATAEXTRA_DATA_IN_MESSAGEFRAGMENT_MISMATCHGOT_NEXT_PROTO_WITHOUT_EXTENSIONHANDSHAKE_FAILURE_ON_CLIENT_HELLOHANDSHAKE_NOT_COMPLETEHTTPS_PROXY_REQUESTHTTP_REQUESTINAPPROPRIATE_FALLBACKINCONSISTENT_CLIENT_HELLOINCONSISTENT_ECH_NEGOTIATIONINVALID_ALPN_PROTOCOLINVALID_ALPN_PROTOCOL_LISTINVALID_ALPS_CODEPOINTINVALID_CLIENT_HELLO_INNERINVALID_COMMANDINVALID_COMPRESSION_LISTINVALID_DELEGATED_CREDENTIALINVALID_ECH_CONFIG_LISTINVALID_ECH_PUBLIC_NAMEINVALID_MESSAGEINVALID_OUTER_EXTENSIONINVALID_OUTER_RECORD_TYPEINVALID_SCT_LISTINVALID_SIGNATURE_ALGORITHMINVALID_SSL_SESSIONINVALID_TICKET_KEYS_LENGTHKEY_USAGE_BIT_INCORRECTLENGTH_MISMATCHMISSING_EXTENSIONMISSING_KEY_SHAREMISSING_RSA_CERTIFICATEMISSING_TMP_DH_KEYMISSING_TMP_ECDH_KEYMIXED_SPECIAL_OPERATOR_WITH_GROUPSMTU_TOO_SMALLNEGOTIATED_ALPS_WITHOUT_ALPNNEGOTIATED_BOTH_NPN_AND_ALPNNEGOTIATED_TB_WITHOUT_EMS_OR_RINESTED_GROUPNO_APPLICATION_PROTOCOLNO_CERTIFICATES_RETURNEDNO_CERTIFICATE_ASSIGNEDNO_CERTIFICATE_SETNO_CIPHERS_AVAILABLENO_CIPHERS_PASSEDNO_CIPHERS_SPECIFIEDNO_CIPHER_MATCHNO_COMMON_SIGNATURE_ALGORITHMSNO_COMPRESSION_SPECIFIEDNO_GROUPS_SPECIFIEDNO_METHOD_SPECIFIEDNO_PRIVATE_KEY_ASSIGNEDNO_RENEGOTIATIONNO_REQUIRED_DIGESTNO_SHARED_CIPHERNO_SHARED_GROUPNO_SUPPORTED_VERSIONS_ENABLEDNULL_SSL_CTXNULL_SSL_METHOD_PASSEDOCSP_CB_ERROROLD_SESSION_CIPHER_NOT_RETURNEDOLD_SESSION_PRF_HASH_MISMATCHOLD_SESSION_VERSION_NOT_RETURNEDPARSE_TLSEXTPATH_TOO_LONGPEER_DID_NOT_RETURN_A_CERTIFICATEPEER_ERROR_UNSUPPORTED_CERTIFICATE_TYPEPRE_SHARED_KEY_MUST_BE_LASTPRIVATE_KEY_OPERATION_FAILEDPROTOCOL_IS_SHUTDOWNPSK_IDENTITY_BINDER_COUNT_MISMATCHPSK_IDENTITY_NOT_FOUNDPSK_NO_CLIENT_CBPSK_NO_SERVER_CBQUIC_INTERNAL_ERRORQUIC_TRANSPORT_PARAMETERS_MISCONFIGUREDREAD_TIMEOUT_EXPIREDRECORD_LENGTH_MISMATCHRECORD_TOO_LARGERENEGOTIATION_EMS_MISMATCHRENEGOTIATION_ENCODING_ERRRENEGOTIATION_MISMATCHREQUIRED_CIPHER_MISSINGRESUMED_EMS_SESSION_WITHOUT_EMS_EXTENSIONRESUMED_NON_EMS_SESSION_WITH_EMS_EXTENSIONSCSV_RECEIVED_WHEN_RENEGOTIATINGSECOND_SERVERHELLO_VERSION_MISMATCHSERVERHELLO_TLSEXTSERVER_CERT_CHANGEDSERVER_ECHOED_INVALID_SESSION_IDSESSION_ID_CONTEXT_UNINITIALIZEDSESSION_MAY_NOT_BE_CREATEDSHUTDOWN_WHILE_IN_INITSIGNATURE_ALGORITHMS_EXTENSION_SENT_BY_SERVERSRTP_COULD_NOT_ALLOCATE_PROFILESSRTP_UNKNOWN_PROTECTION_PROFILESSL3_EXT_INVALID_SERVERNAMESSLV3_ALERT_BAD_CERTIFICATESSLV3_ALERT_BAD_RECORD_MACSSLV3_ALERT_CERTIFICATE_EXPIREDSSLV3_ALERT_CERTIFICATE_REVOKEDSSLV3_ALERT_CERTIFICATE_UNKNOWNSSLV3_ALERT_CLOSE_NOTIFYSSLV3_ALERT_DECOMPRESSION_FAILURESSLV3_ALERT_HANDSHAKE_FAILURESSLV3_ALERT_ILLEGAL_PARAMETERSSLV3_ALERT_NO_CERTIFICATESSLV3_ALERT_UNEXPECTED_MESSAGESSLV3_ALERT_UNSUPPORTED_CERTIFICATESSL_CTX_HAS_NO_DEFAULT_SSL_VERSIONSSL_HANDSHAKE_FAILURESSL_SESSION_ID_CONTEXT_TOO_LONGSSL_SESSION_ID_TOO_LONGTICKET_ENCRYPTION_FAILEDTLS13_DOWNGRADETLSV1_ALERT_ACCESS_DENIEDTLSV1_ALERT_BAD_CERTIFICATE_HASH_VALUETLSV1_ALERT_BAD_CERTIFICATE_STATUS_RESPONSETLSV1_ALERT_CERTIFICATE_REQUIREDTLSV1_ALERT_CERTIFICATE_UNOBTAINABLETLSV1_ALERT_DECODE_ERRORTLSV1_ALERT_DECRYPTION_FAILEDTLSV1_ALERT_DECRYPT_ERRORTLSV1_ALERT_ECH_REQUIREDTLSV1_ALERT_EXPORT_RESTRICTIONTLSV1_ALERT_INAPPROPRIATE_FALLBACKTLSV1_ALERT_INSUFFICIENT_SECURITYTLSV1_ALERT_INTERNAL_ERRORTLSV1_ALERT_NO_APPLICATION_PROTOCOLTLSV1_ALERT_NO_RENEGOTIATIONTLSV1_ALERT_PROTOCOL_VERSIONTLSV1_ALERT_RECORD_OVERFLOWTLSV1_ALERT_UNKNOWN_CATLSV1_ALERT_UNKNOWN_PSK_IDENTITYTLSV1_ALERT_UNRECOGNIZED_NAMETLSV1_ALERT_UNSUPPORTED_EXTENSIONTLSV1_ALERT_USER_CANCELLEDTLS_PEER_DID_NOT_RESPOND_WITH_CERTIFICATE_LISTTLS_RSA_ENCRYPTED_VALUE_LENGTH_IS_WRONGTOO_MANY_EMPTY_FRAGMENTSTOO_MANY_KEY_UPDATESTOO_MANY_WARNING_ALERTSTOO_MUCH_READ_EARLY_DATATOO_MUCH_SKIPPED_EARLY_DATAUNABLE_TO_FIND_ECDH_PARAMETERSUNCOMPRESSED_CERT_TOO_LARGEUNEXPECTED_COMPATIBILITY_MODEUNEXPECTED_EXTENSIONUNEXPECTED_EXTENSION_ON_EARLY_DATAUNEXPECTED_MESSAGEUNEXPECTED_OPERATOR_IN_GROUPUNEXPECTED_RECORDUNKNOWN_ALERT_TYPEUNKNOWN_CERTIFICATE_TYPEUNKNOWN_CERT_COMPRESSION_ALGUNKNOWN_CIPHER_RETURNEDUNKNOWN_CIPHER_TYPEUNKNOWN_KEY_EXCHANGE_TYPEUNKNOWN_PROTOCOLUNKNOWN_SSL_VERSIONUNKNOWN_STATEUNSAFE_LEGACY_RENEGOTIATION_DISABLEDUNSUPPORTED_COMPRESSION_ALGORITHMUNSUPPORTED_ECH_SERVER_CONFIGUNSUPPORTED_ELLIPTIC_CURVEUNSUPPORTED_PROTOCOLUNSUPPORTED_PROTOCOL_FOR_CUSTOM_KEYWRONG_CERTIFICATE_TYPEWRONG_CIPHER_RETURNEDWRONG_CURVEWRONG_ENCRYPTION_LEVEL_RECEIVEDWRONG_MESSAGE_TYPEWRONG_SIGNATURE_TYPEWRONG_SSL_VERSIONWRONG_VERSION_NUMBERWRONG_VERSION_ON_EARLY_DATAX509_LIBX509_VERIFICATION_SETUP_PROBLEMSBAD_VALIDITY_CHECKDECODE_FAILUREINVALID_KEY_IDINVALID_METADATAINVALID_METADATA_KEYINVALID_PROOFINVALID_TOKENNO_KEYS_CONFIGUREDNO_SRR_KEY_CONFIGUREDOVER_BATCHSIZESRR_SIGNATURE_ERRORTOO_MANY_KEYSAKID_MISMATCHBAD_X509_FILETYPEBASE64_DECODE_ERRORCANT_CHECK_DH_KEYCERT_ALREADY_IN_HASH_TABLECRL_ALREADY_DELTACRL_VERIFY_FAILUREDELTA_CRL_WITHOUT_CRL_NUMBERIDP_MISMATCHINVALID_DIRECTORYINVALID_FIELD_FOR_VERSIONINVALID_FIELD_NAMEINVALID_PARAMETERINVALID_POLICY_EXTENSIONINVALID_PSS_PARAMETERSINVALID_TRUSTINVALID_VERSIONISSUER_MISMATCHKEY_TYPE_MISMATCHKEY_VALUES_MISMATCHLOADING_CERT_DIRLOADING_DEFAULTSNAME_TOO_LONGNEWER_CRL_NOT_NEWERNO_CERTIFICATE_FOUNDNO_CERTIFICATE_OR_CRL_FOUNDNO_CERT_SET_FOR_US_TO_VERIFYNO_CRL_FOUNDNO_CRL_NUMBERPUBLIC_KEY_DECODE_ERRORPUBLIC_KEY_ENCODE_ERRORSHOULD_RETRYSIGNATURE_ALGORITHM_MISMATCHUNKNOWN_KEY_TYPEUNKNOWN_PURPOSE_IDUNKNOWN_TRUST_IDWRONG_LOOKUP_TYPEBAD_IP_ADDRESSBAD_OBJECTBN_DEC2BN_ERRORBN_TO_ASN1_INTEGER_ERRORCANNOT_FIND_FREE_FUNCTIONDIRNAME_ERRORDISTPOINT_ALREADY_SETDUPLICATE_ZONE_IDERROR_CONVERTING_ZONEERROR_CREATING_EXTENSIONERROR_IN_EXTENSIONEXPECTED_A_SECTION_NAMEEXTENSION_EXISTSEXTENSION_NAME_ERROREXTENSION_NOT_FOUNDEXTENSION_SETTING_NOT_SUPPORTEDEXTENSION_VALUE_ERRORILLEGAL_EMPTY_EXTENSIONILLEGAL_HEX_DIGITINCORRECT_POLICY_SYNTAX_TAGINVALID_BOOLEAN_STRINGINVALID_EXTENSION_STRINGINVALID_MULTIPLE_RDNSINVALID_NAMEINVALID_NULL_ARGUMENTINVALID_NULL_NAMEINVALID_NULL_VALUEINVALID_NUMBERSINVALID_OBJECT_IDENTIFIERINVALID_OPTIONINVALID_POLICY_IDENTIFIERINVALID_PROXY_POLICY_SETTINGINVALID_PURPOSEINVALID_SECTIONINVALID_SYNTAXINVALID_VALUEISSUER_DECODE_ERRORNEED_ORGANIZATION_AND_NUMBERSNO_CONFIG_DATABASENO_ISSUER_CERTIFICATENO_ISSUER_DETAILSNO_POLICY_IDENTIFIERNO_PROXY_CERT_POLICY_LANGUAGE_DEFINEDNO_PUBLIC_KEYNO_SUBJECT_DETAILSODD_NUMBER_OF_DIGITSOPERATION_NOT_DEFINEDOTHERNAME_ERRORPOLICY_LANGUAGE_ALREADY_DEFINEDPOLICY_PATH_LENGTHPOLICY_PATH_LENGTH_ALREADY_DEFINEDPOLICY_WHEN_PROXY_LANGUAGE_REQUIRES_NO_POLICYSECTION_NOT_FOUNDTRAILING_DATA_IN_EXTENSIONUNABLE_TO_GET_ISSUER_DETAILSUNABLE_TO_GET_ISSUER_KEYIDUNKNOWN_BIT_STRING_ARGUMENTUNKNOWN_EXTENSIONUNKNOWN_EXTENSION_NAMEUNKNOWN_OPTIONUNSUPPORTED_OPTIONUSER_TOO_LONG*H=0123456789abcdefhz(<K((ixYMn;()`5~x`K-"bSW+TO|&k%NB%agpx:2b,P;}O)F^Q囕 gѵZ,->G0$`G#Ʌ映)N9-,E<'LkK[ĤQА:&}mR,_ h)1S`Ì+UdnЯ{d*5%RŞX96EQ9jXˤQ<8ʫCcASpS^ K4m4:ᔟ]L]ISX7b`cZ}Rp j2s!{|I%ro1g66T@q(2{>;I.(G=Dz&}ޝz}~7-|^83H1Y'c~I]y^z/fU a8h̅LeeSotLVFz)Yuc/`gvc-3f B'fWcMiXOwŋdH'". rTM rFznV kG|.D04Q]R 2[ʸm:Ա:_ k82vʔqz5*ޟBI|d?X=YXn ෑJ . 2L2K=|>@9s(|'(>43)OhzyՓXyg0f  -0LUqj ȜO`zde̪O7OMZ/@1UuX ?BLCPb1mƢrhc+j3DBTx#A5-u n\GÂ6d)ST\x>T*? 8 ZM:N\cMsV%PA)8?Ƀa!2,MtDwgd';N1m0^ԧ} η, NSƱ/cge؈7yBY׭8ӞKd~!m~3bzseGɭEE:[-!Mwfffff AES-NI GCM module for x86_64, CRYPTOGAMS by       AES for Intel AES-NI, CRYPTOGAMS by   GHASH for x86_64, CRYPTOGAMS by yZyZyZyZyZyZyZyZnnnnnnnnܼܼܼܼܼܼܼܼbbbbbbbb     SHA1 block transform for x86_64, CRYPTOGAMS by /BD7q۵/BD7q۵[V9Y?^[V9Y?^[1$} U[1$} Ut]rހܛtt]rހܛtiGƝ̡ $iGƝ̡ $o,-tJܩ\ڈvo,-tJܩ\ڈvRQ>m1'YRQ>m1'Y GQcg)) GQcg)) '8!.m,M 8S '8!.m,M 8STs e jv.,rTs e jv.,r迢KfpK£Qlǡ迢KfpK£Ql$օ5pj$օ5pjl7LwH'4l7LwH'4 9JNOʜ[o.h 9JNOʜ[o.htocxxȄnjtocxxȄnjlPxqlPxq        SHA256 block transform for x86_64, CRYPTOGAMS by "(ט/Be#D7q"(ט/Be#D7q/;Mۉ۵/;Mۉ۵8H[V9Y8H[V9YO?m^O?m^BؾopE[BؾopE[N1$} UN1$} Uo{t]r;ހo{t]r;ހ5%ܛ&it5%ܛ&itJi%O8GJi%O8GՌƝew̡ $ՌƝew̡ $u+Yo,-ntJu+Yo,-ntJAܩ\SڈvAܩ\SڈvfRQ>2-m1fRQ>2-m1?!'Y?!'Y= % G= % GoQcpn g))oQcpn g))/F '&&\8!./F '&&\8!.*Zm,M߳ 8S*Zm,M߳ 8ScTs ew< jvcTs ew< jvG.;5,rG.;5,rdL迢0BKfdL迢0BKfpK0TQlǑpK0TQlReU$ReU$* qW5ѻ2pj* qW5ѻ2pjҸSAQl7ҸSAQl7LwH'Hᵼ4LwH'Hᵼ4cZų 9ˊAJNcZų 9ˊAJNscwOʜ[o.hscwOʜ[o.h]t`/Cocx]t`/CocxrxȄ9dnjrxȄ9dnj(c#齂lP(c#齂lPyƲ+SrxqyƲ+SrxqƜa&>'!Ǹќa&>'!Ǹ}xnO}}xnO}orgȢ}c orgȢ}c ?G5 q ?G5 q}#w($@{2}#w($@{2 ˾L*~e)YB>˾L*~e)Y:o_XGJDl:o_XGJDl  SHA512 block transform for x86_64, CRYPTOGAMS by        p*Zx"RM|1}0L>PˏᛱD*nzߥ#6. ;$q Ɠz/U^) @iJ#cǽomxzj_t5+Aѐ                                    }|M*p[[[[[[[[[[[[[[[[`)Ih!APQ \] G@]Ziܵ6_wA(G]Z@6wA(i_POLSJ[Ȃ4~o%ՆPOLS{O1 j^~o[%4J3'bQvE鬟_T [PENAJe`㆔rwֆSLOPI;e,^rW}9D*n$< 1)Bd"F&`gYͦkU2> &Fd`B"ͦU 2>Ykg@~S->ԹmDVYK  Vector Permutation AES for x86_64/SSSE3, Mike Hamburg (Stanford University)Montgomery Multiplication with scatter/gather for x86_64, CRYPTOGAMS by ( (0(@Xp    ( kythe/proto/analysis.proto kythe.protogoogle/protobuf/any.protogoogle/protobuf/timestamp.protokythe/proto/storage.proto" AnalysisRequest1 compilation ( 2.kythe.proto.CompilationUnit file_data_service (  revision (  build_id ( "R AnalysisOutput value ( 1 final_result ( 2.kythe.proto.AnalysisResult" AnalysisResult2 status (2".kythe.proto.AnalysisResult.Status summary ( "; Status COMPLETE INCOMPLETE INVALID_REQUEST" CompilationUnit" v_name ( 2.kythe.proto.VName> required_input ( 2&.kythe.proto.CompilationUnit.FileInput has_compile_errors ( argument (  source_file (  output_key (  working_directory (  entry_context ( 5 environment ( 2 .kythe.proto.CompilationUnit.Env% details ( 2.google.protobuf.Any FileInput" v_name ( 2.kythe.proto.VName# info ( 2.kythe.proto.FileInfo% details ( 2.google.protobuf.AnyJ" Env name (  value ( J" KzipInfo3 corpora ( 2".kythe.proto.KzipInfo.CorporaEntry size ( absolute_paths (  critical_kzip_errors (  CorpusInfo^ language_required_inputs ( 2<.kythe.proto.KzipInfo.CorpusInfo.LanguageRequiredInputsEntryO language_sources ( 25.kythe.proto.KzipInfo.CorpusInfo.LanguageSourcesEntry RequiredInputs count (n LanguageRequiredInputsEntry key ( > value ( 2/.kythe.proto.KzipInfo.CorpusInfo.RequiredInputs:8g LanguageSourcesEntry key ( > value ( 2/.kythe.proto.KzipInfo.CorpusInfo.RequiredInputs:8JJJP CorporaEntry key ( / value ( 2 .kythe.proto.KzipInfo.CorpusInfo:8JJ"E BuildMetadata4 commit_timestamp ( 2.google.protobuf.Timestamp"4 FilesRequest$ files ( 2.kythe.proto.FileInfo"( FileInfo path (  digest ( "Q FileData content ( # info ( 2.kythe.proto.FileInfo missing ("e CompilationBundle* unit ( 2.kythe.proto.CompilationUnit$ files ( 2.kythe.proto.FileData" IndexedCompilation* unit ( 2.kythe.proto.CompilationUnit4 index ( 2%.kythe.proto.IndexedCompilation.Index Index revisions ( B7 com.google.devtools.kythe.protoZanalysis_go_protobproto38 ( @#(+;BJ(LT(V^f(hr y0((80( kythe/proto/buildinfo.proto kythe.proto"M BuildDetails build_target (  rule_type (  build_config ( B5 com.google.devtools.kythe.protoZbuildinfo_go_protobproto30 (  X@HP\`d((08(0 kythe/proto/common.protokythe.proto.commongoogle/protobuf/timestamp.proto"# Fact name (  value ( "H Point byte_offset ( line_number ( column_offset ("X Span( start ( 2.kythe.proto.common.Point& end ( 2.kythe.proto.common.Point" NodeInfo6 facts ( 2'.kythe.proto.common.NodeInfo.FactsEntry definition ( , FactsEntry key (  value ( :8JRticket"k Diagnostic& span ( 2.kythe.proto.common.Span message (  details (  context_url ( "} ResolvedDiagnostic3 corpus_path ( 2.kythe.proto.common.CorpusPath2 diagnostic ( 2.kythe.proto.common.Diagnostic"8 CorpusPath corpus (  root (  path ( "& Link definition ( JJ" MarkedSource3 kind (2%.kythe.proto.common.MarkedSource.Kind pre_text ( / child ( 2 .kythe.proto.common.MarkedSource post_child_text (  post_text (  lookup_index (  default_children_count (  add_final_list_token (& link ( 2.kythe.proto.common.Link" Kind BOX TYPE PARAMETER IDENTIFIER CONTEXT INITIALIZER PARAMETER_LOOKUP_BY_PARAM LOOKUP_BY_PARAM+ 'PARAMETER_LOOKUP_BY_PARAM_WITH_DEFAULTS LOOKUP_BY_TYPED  PARAMETER_LOOKUP_BY_TPARAM  LOOKUP_BY_TPARAM "7 SymbolInfo base_name (  qualified_name ( "r Origin corpus (  revision ( 4 commit_timestamp ( 2.google.protobuf.Timestamp language ( " Language name ( 5 support (2$.kythe.proto.common.Language.Support analyzer_version ( "7 Support UNKNOWN EXPERIMENTAL SUPPORTED"r Hash/ type (2!.kythe.proto.common.Hash.HashType value ( "* HashType NONE SHA256 SHA1B4 com.google.devtools.kythe.protoPZcommon_go_protobproto3( (!(#+85(=0F0Mp\(dHn8w (,@( kythe/proto/cxx.proto kythe.proto" CxxCompilationUnitDetailsS header_search_info ( 27.kythe.proto.CxxCompilationUnitDetails.HeaderSearchInfoW system_header_prefix ( 29.kythe.proto.CxxCompilationUnitDetails.SystemHeaderPrefixB stat_path ( 2/.kythe.proto.CxxCompilationUnitDetails.StatPathR HeaderSearchDir path (  characteristic_kind ( is_framework ( HeaderSearchInfo first_angled_dir ( first_system_dir (C dir ( 26.kythe.proto.CxxCompilationUnitDetails.HeaderSearchDir> SystemHeaderPrefix prefix (  is_system_header ( StatPath path ( B/ com.google.devtools.kythe.protoZ cxx_go_protobproto3( 8  !P(0 kythe/proto/filecontext.proto kythe.proto" ContextDependentVersion5 row ( 2(.kythe.proto.ContextDependentVersion.Row0 Column offset ( linked_context ( r Row source_context ( ; column ( 2+.kythe.proto.ContextDependentVersion.Column always_process (B7 com.google.devtools.kythe.protoZfilecontext_go_protobproto3 80((,0D 48<@ kythe/proto/metadata.protokythe.proto.metadatakythe/proto/storage.proto" GeneratedCodeInfo: type (2,.kythe.proto.metadata.GeneratedCodeInfo.Type/ meta ( 2!.kythe.proto.metadata.MappingRule" Type NONE KYTHE0" MappingRule4 type (2&.kythe.proto.metadata.MappingRule.Type edge ( ! vname ( 2.kythe.proto.VName begin (  end ( < semantic (2*.kythe.proto.metadata.MappingRule.Semantic( source_vname ( 2.kythe.proto.VName source_begin (  source_end (  target_begin (  target_end ( "@ Type NONE NOP ANCHOR_DEFINES ANCHOR_ANCHOR"> Semantic SEMA_NONE SEMA_WRITE SEMA_READ_WRITEB! com.google.devtools.kythe.protobproto30P (0(0 ( (  kythe/proto/storage.proto kythe.proto"X VName signature (  corpus (  root (  path (  language ( "\ VNameMask signature ( corpus ( root ( path ( language (" Entry" source ( 2.kythe.proto.VName edge_kind ( " target ( 2.kythe.proto.VName fact_name (  fact_value ( ". Entries# entries ( 2.kythe.proto.Entry"D ReadRequest" source ( 2.kythe.proto.VName edge_kind ( " WriteRequest" source ( 2.kythe.proto.VName0 update ( 2 .kythe.proto.WriteRequest.Updatef Update edge_kind ( " target ( 2.kythe.proto.VName fact_name (  fact_value ( " WriteReply"Y ScanRequest" target ( 2.kythe.proto.VName edge_kind (  fact_prefix ( "- CountRequest index ( shards (" CountReply entries ("- ShardRequest index ( shards ("N VNameRewriteRule pattern ( ) v_name ( 2.kythe.proto.VNameRvname"@ VNameRewriteRules+ rule ( 2.kythe.proto.VNameRewriteRuleB3 com.google.devtools.kythe.protoZstorage_go_protobproto3@ @!0((08:8BH0Q(Y `(h(p0;\Jx0@Ț`p(ЯH`xбЛ@P@Hx̜к@ P@hȝ `8hܞ \ԟ@P<\|Ԡ@0p ġ`HphȢ@8Xܣ PTԤ 8X0P8dЦP4Ţ@ `,pLt0pDPh` Ht`p̪ 4@Tpt ī0` ج0(Hh@pȭ @`  @ P   p \p   ȯ(p4dܱ0 P@`ز D!`%*D,t. .ȵP.`./4/T/t 0P00p1101P 234p454 5T 606`6ȸ667<8h88@9;;@<l@=P=>>>4>T0?t0@@ػ0A@AAD0DtEFȼ0G@GG@G`GHHPH@I @J<`J\pJ|PKMNO8PhP Q0QпQQQ0QPR|ST T U 0WPWXYY$ZLZl`ZZZ[\,p\P\p`]_aa0bH@bhccd d0d@d<pd\ePfff(@h\jjkPm<`m\mmmm`nn, oPotpqPqqr( tLtpu@vvww$wH`xlxpyy<y`zpz0{(@{@|8||X}~ذP@pP4  0@P`p0PpЉ0 P0p@P`p0Pt 0@4P\ 0@ P@p`@8ГXxPP@p`(`Ll@`0,\Т|8d (PTP|  @(pLt P0|0p p8\Pp(T @Pp 8d0@p(`Xpx@p H0pp  Dd  Lx``(@T 0@l ` , \@  @(pHpPp4 \0  8`  ,!\"#$$$$%D@%d&' '0''0+`-P.P/`//,/L0l000000001P 1p01P1`112( 3T3x33 67$@8D8t9P9`9999<P:`:; ;P;`=(?X?x??`@p@@0@P@p@ ABC8pC\C|DF GH8JlJ@JPJJJJ4KTK0KK0LpMt`NNN@O@QTV0pWdW@XPXY0Y@Y8YX [[\\$]X`cdff0@fXPfxffff ghDphhhjkDpltmn nPn`n8nXnxn0pppp<0qdt@wxyy8`y`pyyy zz`{@{d{|0p(\| `0`Іp,0p0Pp`8X004T@|P$`DtФpDxЮ`p< \@|<hPл( мH | н 0 P  @( T  x 0 p P $ D t  @ P  ( L `p    p  @$ H l      `t@PpPpd @` 0 @@P``p pPpp<l 4XP< hP8h@@l``P p@`<d8pl`Dh0   L    P p ( H` t @P <@p 040TPt@ 08p```0@0d @P 0 `   p!0"4!0#d!$!0&!@&!p&"&0"&P"'p"p'"'"@("(#)\ )$#)D#*d#*# *#0*#@*#*$`+@$+l$p,$-$-$-%-<%-\%.%/%/%/%/ &02P&3& 4&4&4&`5('p5H'5h'5' 6'@7'7(7$(8D(8x(;(@=(>)p?@)?`)@@)P@)@)@)A*AD*`Bp*B*B*0C*D +pE<+Fh+F+F+G+G,G0,GP,PHt,H,PI,`I,pI,I-JH-Kt-K- L-@L-`L.pL0.LP.Lp.L.L.L.M.@M/N<>\>>>И>>?@0? \??К??`?,@\@@`@p@ A,APLAplAAСA`AB0(B@HBФ|B0BBC04C@TCp|CCCCC@D@HDtDDDDEHEPtEPE`EEF4FTF@tFFFFFG@LGxGGGGHPHtHHH`Ip I@I`IIpII0I@J`s?s@tAtBHuBduChvCvCvCv EvPG$wHTwIwLwMwPMxT0xTPx UtxUxVx@VxVy0X@yYly@Zyelz0hz@izPjz`k{pl<{md{0n{Po{p{q|r<|sd|Pu|v|`x|y}z<}`|h}p}}~}} ~4~@`~~P~p~8Ј`0Ћ p8dpp0@ph0 @8` @؂`(Px̃Щ(X Є`$T лP<h8Pdp܇ `8``܈`@T|Ԋ4d` ܋@,\ `0p DlppHpĎP@ph0 0p0X4`P P8\P`ԓ<pd0 ؔ  (@ P x@  ̕  $` P |  Ԗ@  , X   @  8 d @   p @` l   @  H` p     @ 8 ` `  ؛0  ( PP x \ P   P 8 d  P   <p h      @ pP  ̠   p" 0# \$ y% y@& z' Dz0( H) p) * + , - 8`. `P/ P0 @1 آ@2 3 3 4 5 06 X`7 8 ? @ PA 8B hD D E pF `G <G dG 0O R PT U <@\ ld @d pd  e e Le lf h Ȩi `j `k @`l l0m  n ԩq t <v tp  ̪ `  H t  ̫0 P  < \   @  @ p H`  ̮ `  L` x  $ L0 tp p ijp 4 `   t Я @ lЬ  Ȱ  (@ Xp    0 <к lP  Ȳ` p   dP  ̴  4 \ |  ص  00 TP p  Զ0 p 4 \0 @ ȷ  $ L t  и  ,` P t   ԹP  4@ d` `    8p d  Ļ0 p $ Tp P  ܼP  ! 4P" `" P# % ܽ`& & 0& P& tP' ' `( 1 p2 @03 l3 P6 08 @8 h9 9 : : < 4`= \= `> > PB pB B DPC l E E `T Z P` La |`b e Pf `f f 8f Xf xg 0g h i Pj @j h0k k l Pn `n 8pp hp q pq q Pr (r P0s xs t t t `u @u h@v v  w w w (w Hw hw w w x x  x (0x H@x hPx `x px x x @y `0| | | ~ ~  P  $ L t 0    XP   P   H p   @ 8P h    P $ L0 l P P  ( X    (@ X  p @ л D xP   P 4 `@  0   ` p  $ DP t  0 `   @  L0 lp 0 ` p  ( H@ x     4P T    0 , X |P 0     P x 0   ` $p D lP    , L l 0  " $ LP& |P' (  ( @( + < + \p+ + + + . `/ D3 p9 ? ? ? (? H@ h @ @@ P@ `@ PA A ,A T C 0D E G G @ H hK L  N P PR @pS lW @^ pd h Lo  p 0p |q z ~  @@ p   P 8 h     0` \`  `  @ D t @ P p $ H lЮ P p б  h 0 P p   (в H h0    л @ d     4  0  80 ``  $ PP    4 d @  `  P     p $ 0' 8' X( `+ 5 `7 9 L09 l; ; ; < > @@A xD E E F (F HG h G 0J PJ pJ J J <@K hK L  M M Q 4T dpU W X X Y `Z ^ ^  b 4b \b |c `d f pk @k `pl l Pm m 4 n n o o $q T`r x s  u v pv $ x P z |  ~ 0 4@ T@ ` P   8 h@ 0   @ h@  0 `  $p H0 l   `  D tp   p 4 X@ x   0  4з Xp  p     @ p0 P    ,@ L` l      80 XP x  p  $p P p P  0  0 P p  @ P    < @ h   P    @ ` p   @   p @ P p    p ,  \     ( px   4L n  8XxP0 T` @ PP0PPt `'*$,L -tp-.0 13$7T8x9BDE@F@FdGQTVX,pZX@[|\ _`cf(fHfhgj mmm<`ohooppppp,r`r0sss0tt u0uPPulvvw0w@w`w<xlxxx`{@| |x}L}0Ї8h@L0l`0`<\ |P@<p\|Ж0` < \  | P    !@(>ЋT>t> >0>>Е?T?t?P??0?0$?@,@T@@ @0@@Ap@A`A0APAAAФ B,BpLBpBBШB B`C,CPPCtC`CCpCдD 8D`DDDpDмEP4EпlEEE EPFHFhFFFFF(G`TG@GPGPGPG@$H0PH0|H0H0H0I(IPPI0|IIIJp,JPXJ0JJ`JK0@KpKK`KL4LXL@LLL0M8M`MMMPMMN4NP`N N0NNO00O`O O Op O P,PLP`xPpPPP0 Q,QLQPpQQQQ(RPR RRRRp! S0$LS&xSP&S'S`(T)HTP*S.hT1T 2T2T2TU7U<V@>@V?VAVpCVCWC8W`DhW@LWPLWpLWN X@Rtt?tBtCtG,uRlu@UuVuVuW vX,v0^\v`v avpdvdwpe0AA@ClD`DDE JDJlK`LLMN4POPPRSW4 XTbkl l`n0o\puĹvwx<yhȺ8@lАБ̻@,P@P0Լ@4P\D0tpP@@`p@ܿ0\8X pP,X@`|p$ T x`P80%h'0(02 2@2`306; < <4>h@@@PAO0VH@VhPV`VpVV`WX@@XdpXX0ZP[ ]<]d]^p^^_p_4_X@aae`fpff<0gd@gPgijl<ndnno o0o@o4PoT`otpoooop p40pT@ptPp`ppqPqpq<q\q|qqttt<0u\Pu|uuv0vvv<w\ w|pwwwx`xx<x\x|@y`yyy0zzDzl {@{{{`||4}T }t}}@~`~~4Tt @ 4Tt@`0`@$pPPp$Dd 0P  ,pXx00P|p$Dpе 0@PpжP8d0p @8\ 0@(`Hh@`@(`Hh@`@(`Hhп p P@`8hp0 4P\`pP<l00Hh0``0P0Tt`P$`Tt `@ `  ,T t0Hpl@0 @ x@ 04$d0%@'''(((H(h- /167@7`89P:`;>0@\ A0ApAA0BB(`FLGlJMQQQ(QHQhQQRRSSDTl UVWWX(0XHXpXYY Y0\0]H^x_def$PhP j|pmmp0q,PqL`qlpqqqqpruHvly0z`z0H p@L`|0$D lPP@`8X0(PHx@P 4`0 ,0 X!$%%)4)T*@++P,,,0,P-x@-- ...(0/H0p 0 6p77 88@9h :0:0;<@?<AhBCC@DE<PH| L0PS W0 X\  Y Z  [ \ \0 p^\ _ _ a b d, eL ep `f f f g 0g0 PgP gt g g h i Pj0 jd j l l mn,`oP`qvwp}p~<~\  @HtБ`@`p(ЕHh @P4\Pp8ЛXp 0,\ 0`p,p\Pp  ,0L@l(Hx 0@P `@p``$Tt (H ` @` ,L@lPp<h p @p<\P" '+ .L `.l . . 0 3 !38!@7d!7!7!P8!8! 9"90":P"p:p"0;"@;";";"?$#DT#Ex#`F#F#G#H $PID$0Jp$K$ K$N$0Q%VH%Xx%X%Y%Y%0Z&PZ(&ZH&Zh&Z&Z&0[&P[&['[('[H'0\h'p\'\'0]'@]'`](^((^H(0^h(^(^(_(`_(_)_4)`T)pd|)f)j)n *w<*yd*y*@* *0+L+Д++ +,з4,к`,,,, -0,-PL-`l-p----. <.0\.@|.P.`.p../`0>P>t> >>$?L???`?@8@X@0x@@@@(APLAptAAAAAp,BHBhBBB BB C0DChC CC0CD(DPTDxD Dp D D@ E DE hE E0 E Ep F0F\F|F0F@F`FG LGppGGGG H00H@PHtHHHH I`(IHIhIIIIIJ(J@HJhJJJJJ@KP@K hKPKKKKPL4LXLxL`LLLMHM`!tM!M"M $M`$N$4N$TN$tN$N %N%N &OP&0O&TO&xO'OP'O*OP4 P@7PP8|P8P9P:Q@;(Q;TQ =|Q=Q?Qp?R?$R@LR@@pRp@R@R@RAR0ASpA0SAPSApSPBSBSCS0CSpCTC0TDPTpDpTDTET`FTFU I@UpIdU0JUJUPKULVM0V0N\VN|VPOVPVQWR0WSXWSxW0TWTWWWWX]DX]dX]X]X0^Xp^X^YP_DY0atYbY bYPbYb Zb8ZcdZdZdZ0fZf[g<[hd[h[ i[i[j[j$\ kL\kt\`l\ m\`m\pm]n4]nT]p]q]r]`u^x@^pxd^`z^|^ }^P}^~,_~\_@__@_ `08`\`Љ```aЋ$aLappaa0aPaЍ bP8bЎ`bPbЏbb`c0cPctccPccd0dГPdpdddd d0e@0ePPe`peeeee`fHf tfpfffg(g TgpxggPgg`4hpThthОhhh h0i@8iPXi`xipiiiij8jПXjxjjj jk8kdkkkkk0 l@@l``lllllm  m@@m`mmPmmm$n XnnnЭnPo 0oXopoooPoP$pDphpppPp q04q\qpqqзq@q0(rPr0xr rrrп s,sLsPpsssst@0tXttttt uDupuuuuv`@v lv`v0vpvw`4wlww0wwxp>P??@AԒBC IP0PYPYd[[0\P\p\Г\`_Pac,e@fTmhn|n`u`~p~~ L ȕ@`$8 lԖ@,p&7<8Pp;d=xp>>P??Ș@ܘ@@A0A,A@BTBl E`JLHR@X@Y$ a8b|@kkltv@`Th |PԜЇ Ld|0Pԝ6p(@|zRx *zRx  AC H <pAC V \p AC G |` AC E PCAC ~ ,AC J` A (lAC BZ E F (AC CJ A K  DmAC Ca hD AC D 04AC G E H A ,AC M A 0AC GQ A [   AC G  @ XC C ,daAC C$ A S E  AC H $'FC CU EAC C VAC Q @AC V <@ AC E \0;AC v ,|PqAC JU A , AC F E J p!AC BQ p! AC D 0`!AC G E H A ,P "AC J A ,#AC G A k < AC D  <$XC C 0%TAC G A W E (D& AC H $H4&'FC CU Ep<&AC C ,&6AC q L&AC V L&4AC o (l&AC G A (&mAC B[ E F  H4'nAC Cb l' AC D $p'8AC B_ A M ,'VAC M, A 0)xAC G% A k E +iAC ` ,8T+}AC CX A S E h+ AC H 0+AC G A S E ,AC C ,2AC m  ,AC V  , AC E <, AC E $\,MQC Cp A$(,BAC Gv P,7AC r (p,5AC J& (-lAC BZ E F ($ AC C_ A U  P -mAC Ca t - AC D 0 -HAC G E H A , .mAC MH A , 41AC G A V (  AC D (H 1sAC J` 4t 3kAC J A ] E  P4 AC H $ @4'FC CU E H4AC C  84FAC A 4 h4AC V T h47AC r 4t 4AC J E U A ( P5vAC Bd E F  5wAC Ck  6 AC D $ 5:AC Ba A M ,D 6AC M A 0t 8AC Gj A C  :AC { , :AC C A S E  ; AC H $ :DFC Cr E@ ;AC C ` ;6AC q  ,;AC V  ,; AC H $ ;SQC Cv A$ T;RAC GF ;AC  ,0;AC J} A (`|?lAC BZ E F (PAC B E F  ?mAC Ca ? AC G 4?AC J E N A ,4XBvAC MQ A 4dJAC Mi A d E  AC D (`P[AC JH 4SPAC J A ] E  V AC H $@V'FC CU EhVAC C VlAC g WAC V W)AC d  WEAC C} (  XTAC CC E D  8TXYAC CM \X AC D $|XAC N A E ,xXRAC J, A ,YAC Cs A L E (Z/AC f $$8Z,AC U A M L@Z AC H ,l0ZmAC FH A U pZAC C `Z"AC ] pZAC V pZ2AC m Z.AC i <Z AC A 4\PLC F A XZAC V XZ2AC m xZ.AC i Z AC A xZAC V $4xZAC  A N  \`[AC C ([AC Js  6AC Cj ( ]AC C E D ]AC BQ ] AC G 0<]AC J A U (p|jAC JW ,^AC J A $AC G A $hAC G A ,_\AC M A (LZAC KE A ,xDAC Jv E ,cAC M A ,dfAC G~ A W f AC H $(f'FC CU EPfAC C  pfVAC CN hAC V h2AC m (h.AC i 8h AC A 4LC F A LgAC V $lgAC  A Q  hNRC Bn (hAC J  6AC Cj (DjAC B E F 4kAC BQ Tk AC G 0tjAC J A U (jAC JW ,lAC M A $(AC G A ,,pAC M A (\ZAC KE A ,AC Jy E ,4u,AC M A 04w_AC J. A ] `x AC H $<Px'FC CU EdXxAC C  Hx8AC C0 dyAC V dy AC E $TyMQC Cp A|y0AC k $0yAC F} (XyAC Cp E D XzAC BQ Xz AC D 0HzxAC FY E J A ,zeAC J? A (({gAC CH A R  T|d_C Bl ,xd|AC CL A S E | AC H 0|AC G A S E }AC C }&AC a <}AC V \}3AC n 0|}AC J A ] (L~{AC Bi E F  ~|AC Cp ~ AC D 0 ~AC Jq A U ,TXAC J A ,uAC GN A V (8AC J 0,AC Jn A ]   AC H 04 "AC J A Y h AC C  t6AC q  AC V  7AC r 4 AC J E U A ( !|vAC Bd E F  L!ЄwAC Ck p!, AC D $!:AC Ba A M ,!4AC M A 0!AC Gj A C "@AC { ,<"AC C A S E l"0 AC H $" DFC Cr E"HAC C "86AC q "XAC V #X AC E 4#H?AC z (T#hAC G A ,#,AC F E J #AC BQ # AC D 0#AC G E H A ,$$CAC J A 0T$+AC G A k E  $XC C ,$AC C A S E $p AC H $$`'FC CU E$%hAC C D%XDAC  d%AC V % AC E %x0AC k (%AC J ,%\AC F E J  &<AC BQ @&< AC D 0`&,AC G E H A ,& AC J A ,&ؕAC G A V & AC D ('HAC J 4@'AC J A ] E x' AC H $''FC CU E'AC C 'FAC A (̗AC V  (̗3AC n 0@(AC J A ] (t(mAC B[ E F  (̘nAC Cb ( AC D (NC  ,)șgAC MB A 04)>AC M  A ] h)sHC S 0)tAC Jn A ] ) AC H 0)НeAC J8 A Y * AC C 0*6AC q P*AC V p* AC E *  AC E */AC j (* lAC J] (*PlAC BZ E F ,(+dAC F A R  X+dmAC Ca |+ AC D 0+cAC G5 E H A ,+ܡAC J A ,,lAC F{ A X 0,l AC D $P,̣AC F 4x,AC MN A ] E ,  AC H $,'FC CU E,AC C -&AC a 8-AC V  X-eAC B^  |-P]AC BV  -VAC BO  -ȦrAC Bk  -$YAC BR  .`AC B  0.<KAC BD  T.h]AC BV  x.]AC BV  . AC C  .̪]AC BV  .[AC CS  /DRAC BK  ,/jAC Bc  P/̬YAC BR  t/aAC BZ  /TnAC Bg  /jAC Bc  /QAC BJ 0 AC D $0 AC D D0AC A d0x AC D 0h AC D 0X AC D 0H AC D 08 AC D 1( AC D $1 AC D D1 AC D d1 AC G 1 AC D 1ؽ AC G 1Ƚ AC D 1 AC D 2 AC D $2 AC D D2 AC D d2x AC D 2h AC D 2X AC D  2H7UC BX 2d8AC Bm 3AC I (3t AC D H3d<AC Bu h3AC I 3t AC A $3dAC F $3AC F~ ,3T<AC J A ((4dAC G E T4(AC N t4(AC C 4AC I 4AC S  47UC BX 4$8AC Bm 5D AC D ,854<AC J A  h5D7UC BX 5`8AC Bm 5 AC D 5p+AC Bd 5 AC A  6pjAC Cb $06AC F ,X6$<AC J A (64AC G E 6AC S $6AC E A (6AC I A ((7$AC I A $T7AC J A |7AC P  7AC C 7 *AC Bc  7)AC C]  8(wAC Co (8AC BQ $H8AC G ,p8*AC M9 F ,8bAC MK A 8<+AC Bd $8LAC I E A (9DAC J  D9+AC Cb h9AC BU  9#AC C[ (9AC I A 9TAC O 9T AC A ,:D*AC M9 F $H:DAC E A (p:AC I A (:`AC I A $:AC J A  :)AC C] $;AC G ,<;PAC M9 F ,l; AC M A  ;+AC Cb $;AC F A E ;AC 0<AC F E J A $8<AC Gv 0`<AC F E J A ,<AC Cp E D A (<(AC M $<uMC C\ E8=qAC  F P A O A / (T=H>AC Cj A G (=\AC M  =0;ZC BW (=L6AC ^ E A A $=`AC F ($>:AC Ci E D (P>AC Mr |>@AC J (>@ AC M ,>$AC J A ,>AC J A (?AC D?xAC (`?|AC Mr ?AC J (?AC Mr ?DAC J ,?D?AC M( A ,(@TAC M} A (X@DOAC K A ,@htAC M] A ,@AC I. A (@8AC J ,AAC M A ,@AAC M A (pA|AC Gg E ,AQAC M: A AAC E (AAC I A (B!AC M DBkNC Y ,dB#AC M  A $BAC G A $BpnAC O E U  B:AC Bn A $CSAC G: A ,0C BAC M( A ,`C,AC M A $CAC E A Ct*AC Bc ,C#AC M  A XDAC MK A \ E  E ` E ] E U E W ,dD qYC MG (DX &AC Mf A (D\ AC M (DP AC M (EAC M (DEAC Mq (pEiAC MW (E0AC M ,EKC M (EAC M ,$F>KC M" ,TF(AC M A ,FAC Mk A (F ^AC K A ,F"AC Mm A ,G|#AC M A ,@G,%AC M A ,pG%WAC M@ A ,G,'#AC M  A $G,*AC G A  G*:AC Bn A $H*AC G A ,DH+ AC M A $tH-AC E A H/0AC Bi $H /JC CL (H0AC Mr I0AC J (0I0AC Mr \I`1AC J (|I`1 AC K A $ID2AC G A (I2AC I A ,I3AC Mi A ,,J4cAC ML A (\J06AC K A  J7NC q A $J7AC N E A (J7AC G A KL8AC J $ KL8AC I C A (HKD8AC G A (tK9AC I A ,K9YAC MB A ,K;<AC M% A (L,<AC K A (,L=AC G A XL=AC J xL=AC O L= AC H L=;AC v 4L=@AC J E U A (M>AC Bm E F AC M  A ] |d`sHC S 0dAC Jn A ] d, AC H 0deAC J8 A Y $eXAC C DeH6AC q dehAC V eh^AC Y ,eAC J A (eAC B E F fAC BQ  f AC D ,@f AC Gp A O ,pflAC M A 4f\AC G A L E f4 AC D (fJAC J7 4$gȓAC J A ] E \g AC H $|gp'FC CU EgxAC C ghAC  gAC V h7AC r 4$h(AC J E U A (\hvAC Bd E F  hDwAC Ck h AC D $h:AC Ba A M ,hAC M A 0$i(AC Gj A C XiAC { ,xi$AC C A S E i AC H $iDFC Cr EiAC C j6AC q 0j̜AC V Pj̜ AC E $pjMQC Cp AjCAC ~ ,jAC Jz A (jAC C E D kAC BQ 4k AC D 0Tk,AC F  E J A ,k\AC M7 A 0k$!AC M A O  k )AC C 4l,AC J  A ] E Hl AC H $hlt'FC CU El|AC C llfAC a lAC V l;AC v ,mܩ AC J A (@mvAC Bd E F  lmwAC Ck ml AC D (m\AC B A M ,m AC M A 4 nAC M; A v E Dn(AC  4dnزAC J A ] E n AC H 0nFC Cw A  n<AC C o,RAC M 0olAC V Pol4AC o (poAC G A (omAC B[ E F  oTnAC Cb o AC D $ p8AC B_ A M ,4pAC J A 0dpX>AC G A k E pdiAC ` ,p}AC CX A S E p AC H 0qAC G A S E AC y (|8AC G A (@|mAC B[ E F  l|nAC Cb |\ AC D $|L<AC Bc A M ,|dAC M A 0}$AC GO A n E <}tAC k ,\}AC Cc A S E }p AC H 0}`AC G A S E }AC C ~@AC {  ~ AC V @~ 0AC k (`~AC J (~{AC Bi E F  ~|AC Cp ~p AC D 0~`AC J} A U ,0AC Mg A 0`<mAC G A V E (xEAC J2 0AC J A ] ( AC H $'FC CU E< AC C \FAC A |@AC V @7AC r (`AC G A (mAC B[ E F  (nAC Cb 8t AC D $Xd5AC B\ A M ,|vAC ML A 00AC G A r E TAC K ,}AC CX A S E 4X AC H 0THAC G A S E AC C 4AC o ȂAC V 3AC n 0AC F} E Q A (<pmAC B[ E F  hnAC Cb  AC D $1AC BX A M ,ԃAC M A ,AC G A k 4IAC @ ,TrAC CM A S E  AC H 0AC Gu A S E ؄tAC C d&AC a tAC V 8t AC E Xd8AC s (xWAC JH (lAC BZ E F (ЅAC C] A Z  mAC Ca   AC D 4@ AC J] E N A ,xdAC M A ,AC G A V (؆AC J 4HMAC J A ] E <` AC H $\P'FC CU EXAC C HnAC i ćAC V  `AC BY  nAC Bg  , YAC BR  P\UAC BN  toAC Bh  AC D  AC D ؈ AC D  AC D t AC D 8d AC D (XTAC Mr ,AC Cc A S E (AC Mr ,|}AC CX A S E (AC Mr ,<0rAC CM A S E  lAC O | AC H l4AC o (ЊAC G A ( mAC B[ E F  (T nAC Cb L  AC D $l 8AC B_ A M , VAC M, A 0ċ xAC G% A k E $ hAC _ ,t }AC CX A S E H  AC H 0h AC G A S E 0AC C  2AC m ܌@AC V @BAC } ,pAC J A (L`AC Br E F xAC BQ  AC D 0AC J} A U ,0AC M A 0 nAC G& A e E (P\7AC J$ 4|pAC J A ] E ( AC H ,ԎJC J EAC C $TAC O DAC V d3AC n 0AC J A ] ({AC Bi E F  |AC Cp D AC D $(41AC BX A M ,PLAC J A ,uAC GN A V (,AC Jv 0ܐAC Jn A ]  AC H 00AC J A Y dAC C x6AC q AC V  đVAC BO  AC B}  @jAC Bc 0 AC D P| AC D pl AC D (\AC M (0AC Mr ,}AC CX A S E (AC Mr DHAC J dHAC O H AC H 8AC V ē8 AC G (AC V ( AC G $AC V D AC G d4AC o ((AC J ({AC Bi E F  ܔ|AC Cp l AC D , \cAC GC A O ,PAC M A 0!AC G A V E (X"AC J 0"AC Jy A ] X# AC H 04H#AC J A Y h$AC C $BAC } 4$AC V Ȗ4$ AC E $$ AC E $$MQC Cp A$0<$MQC Cp AXd$BAC } ,x$7AC J  A (%AC C E D ԗ(&AC BQ (& AC D 0&AC F E J A ,H&AC M~ A 0xD,,AC G5 A n E $@1AC F 0Ԙ2AC F  A Y E d4 AC H $(T4'FC CU EP\4AC C pL4VAC Q 4AC V  4kAC Bd  ԙ4dAC B] $5 AC D 5 AC D 85 AC D (X4AC Mr X5AC J  X5AC O ȚT5 AC H D5CAC ~ 4t5AC J E U A (@,7AC B E F l7AC BQ 7 AC D $7UAC B| A M ,ԛ7AC M A 0<AC G~ A  8t@5AC , ,XAAC C= A S E B AC H $B_FC CM EМCAC C  CfAC a \CAC V 0\C3AC n  P|C`AC CX (tCTAC CC E D  CYAC CM ĝ(D AC D $D!AC R A E , DAC J A ,<GQAC C A L E l0HaAC X $HTAC } A M H AC H ,ԞHAC Fw A U IAC C $I0AC k DIAC V dI AC E I AC E HBAC } ,ğ(IAC J A (JAC C E D  LKAC BQ @LK AC D 0`AC Bs h AC H XAC W (XAC K A ( ܵAC I A LAC BV lAC L AC BV AC A ,AC Fu E c  AC C ( AC J $LPBFC Cp E(txAC M (lAC K A , 1AC M A ,PCAC M A ,,G@AC M$ A  \JAC B  |JAC B AC C <xMC M A ,((AC M A (4hJAC J `LAC BQ ,LAC P A ,PAC M A ,AC Gf A $?AC G (0 @AC GK A $\@AC G $AAC G (BAC K A $CAC G $DAC G $(EMAC GA ,PFAC MQ A (HAC GW A $ JAC G $KAC G $LAC G ($MAC I A $P|MAC E A LxTNAC M A R E g E g E g E j E j E j E j E j E j E j E j E j E j E j E j E g E j E j E j E j E j E j E j E L E j E j E j E j E g E r E j E j E h E w E f E $TYAC FJ $UYAC FJ ,TUzAC F^ E L ,HUzAC F^ E L ,xUzAC F^ E L ,DVzAC F^ E L $VZAC FK $VZAC FK $(WUAC FF $PAC M' A p,AC O ,*AC B_ <AC E $,cAC ET A (tAC Ip A ,$hwAC M A $TAC E A (|0SAC N A (4=AC G A ,HAC J* A (AC N A ,0CAC M, A (`AC G A , /AC P A ( AC I A $VPC Ch A ,AC Cm C D A ,@AC M? A  p IAC Ez A (8vAC Ka A (vAC Ic A ,BAC M A $AC J A lDAC Gs E t A M E E E b E P E N E N E H E (h;AC G A ,|AC J| E (,=AC G A ,<@AC Jx E ,lAC M~ A ,4AC J A , #AC J A , 4AC J A ,, #AC J A $\ AC B A ( PAC F" A , AC M A ,\ ~AC MG A ( AC I 4<  lAC  A  A  A  tX AC C $t`AC EQ A $+AC b C A  ?`C BU ( AC N A (8AC N A (dx AC IF A D AC Bf E N E F E F E 1 A , CAC M A 4 AC J A  E 4@  AC J A  E ,x  AC M A $t( 4AC G( ) AC T ,) AC J A ( ) AAC Cm A G (L * AC Co A o (x* AC K A  + YAC CQ ,+ AC F E J , AC BQ ,, AC P A ,H41 AC P A ,x$4 1AC M A $45 jAC F_ ,|5 IAC M A 86 MC G A ,<p7 ;AC M$ A (l< SAC FC A $> AC C A $|@ AC E A (TA AC L A ,C @AC M  A (D(E AC I  A ,pF AC M A  G hAC Ba  H gAC B` ,TH OAC P) A ,tK AC Pp A ,HO TAC P A ,xDS nAC M A ,U tAC M A $V AC L A C ( V AC F A $, W KAC F@ ,T W WAC M A $ X AC H A ( Y AC G[ A , Z AC M A ,![ UAC @ A C A K ,8!\ LAC { A C A G ,h!4\ <AC M% A ,!D] 1AC M A (!T^ AC M (!_ AC F A , "` UAC M A (P"b AC P E { A (|"@c AC J ("c %AC J  "d JAC BC ("e AC B A J ($#e AC M 0P#g VAC M? A #Hh AC W #Hh AC H  #Hh JAC F (#th RAC FG $h :AC u $4$h AC G A ,\$pi & AC P A $$pr AC G A ($(s AC I A $s hAC _ ,% t AC P A (0% )AC M $\% AC Gw ,%8v AC M A $%w xAC Ei A (%@x AC C A h &4y JC D $(&4y AC G A $P&y AC J B F ,x&y AC M A $&{ AC G A $&{ {AC El A $&T| {AC El A  '| AC W ,@'| AC P/ A p'L AC T $'L eAC EV A $' bAC ES A ,'܀ AC P A (| TJC C ,0( AC P A ,`(l AC Px A ,(ܖ KAC P^ A ,( 7AC P A $( AC G ,) ;AC M A  H)ġ AC B l)P AC G )@ FC R )@ AC O )@ #AC ^ )P #AC ^  *` #AC ^ ,,*p OAC M% A ,\* UAC M+ A $* dAC EU A $* dAC EU A $*P bAC ES A $+ cAC ET A ,,+ OAC M% A ,\+ UAC M+ A +0 AC F ,+ AC P A + AC I $+ iAC EZ A $$,H iAC EZ A $L, iAC EZ A $t,ت oAC G^ A $, iAC EZ A $,h iAC EZ A $, iAC EZ A $- nAC G] A $<-@ gAC EX A $d- gAC EX A $-Ь gAC EX A $- mAC G\ A $-` hAC EY A $. hAC EY A $,. hAC EY A $T.8 nAC G] A |. AC F .p AC F .` AC F .P AC C .@ AC F /0 AC F l AC Jp A (<>P AC C E D h> AC BQ > AC C > 3XC  (>! AC J (>" AC J ( ?L# AC J (L?$ AC J $x?$ LAC FA (?$ hAC JY  ?@% FAC F{ $?l% QAC GE (@% AC J  D@X& AC C (h@' AC J ,@' AC M A $@) {AC r A C $@+ lOC CS (AH+ 1AC J" ,@A\/ AC P A $pA2 8AC o A C A42 AC E $A$2 AC J E D 4A2 AC Mr A  E (B6 =AC J. (DB7 AC K A ,pB8 AC P A Bl: AC G B\: AC L ,B\: AC Ma A ,C,< UAC P A @C\A [AC V ,`CA AC P? A ,CD AC Mt A ,C,F AC Mx A (CG AC J $DH JC C DD8I AC F dD(I AC I (D(I AC N A DK AC A $DK BAC Bp E F DK AC D EK AC D 8EK AC D ,XEK AC P A ,EtN AC F E F (EO =AC J. ,E(S AC P A ,FX bAC M A 4DF(^ AC q A e A T A T |F^ AC P F^ !AC \ F^ AC S F^ AC S F^ AC N G^ AC K AC y $`r AC BY A 4@`s JAC M E N A x`t AC K ,`t AC P A  `v pAC Cd (`v AC I A ,atw AC P A (HaDy AC K A  tay pAC Cd (aDz AC M ,a{ AC P A $ap SAC BG A ,bp} AC P} A (Lb ^AC IK A ,xb AC PQ A b AC D ,b AC P A 0b AC J E @ $,c AC F 4Tch AC P< E ` A c AC D ,c AC P A $c AC M E C 4d AC P_ E K A HNC 0}|X _C I }HZ JC D }HZ WgC ~Z RbC ,~Z RbC H~[ >NC d~8[ WgC  ~|[ lAC z A ~[ AC E ~[ >NC ,~[ CAC M  A \ 2AC Bk 0] +AC Bd P,] AC W p,] AC D ] AC BV ,] AC M A  ^ AC H ] AC D  ] AC W (@] AC CP A $l`` AC G (a AC J (a AC B A I `a AC O $ @b yC A@ ,4f AC I A  d8k OAC BH  dk GAC C ,k AC I ܁ m AC   $ o AC E{ A $q QTC r Dq OTC p (d~ AC G A ,q AC J A r ?KC Bn $r #AC H A $u AC E A 0AC G A  8>:AC Be E (\>AC Fl A 4? AC G $?#AC B\ 4? AC G  $?AC Q $  ?AC G A 4?EAC @ $T(@AC G A |AAC L A AC E ,@AC F| A W (pAhAC CD A W ,AAC G A U ,HDBAC C C i E  xB5TC B[ ,C!AC P A C A G ,CZFC B^ C m (0CAC J  (DEgC BT LEAC BQ  lE$AC C\ (E-AC CY A G  EAC L  EAC O ( EAC I A 0(HJC F C s A ,\IAC J A ,JAC CW E D A ,KcAC M A PMAC BQ  PM`JC ] A 40MAC C] C D A M E ,htNAC M^ A (PAC G A ,PAC M` A 8RGAC B $hRAC C A $<0ThAC _ A C dxT AC A $hTAC G UyAC t 4eAC Kp A j E ((UAC I A (0V,AC M 4\gAC KZ A j E 4j)AC K A T E 4lAC K A i E 4xnAC K A i E 4<pAC K A f E 4thsAC K A f E 4uAC K A f E 4XxAC K A f E z"AC B[ $<W AC G A dz`eC U ,{>AC g E I A C XX AC F ,z>AC g E I A C X AC F ,$z>AC g E I A C TW AC F ,tz<AC f E H A C W AC F ,z<AC f E H A C XW AC F ,z6AC c E E A C DW AC F ,dzDAC c E S A C V AC F ,zXAC c E g A C V AC F ,zFAC c E U A C 4XV AC F THV AC F t8V AC F $xz;AC n E C V AC F $pz;AC n E C U AC F  $hzAC u A  HU\AC R A  lzAC v A  U\AC R A  zAC x A  UdAC Z A  {5`C N (UAC J H{5`C N (hVAC GF A 4{AC JX E m A ( WAC Gv A 4{AC JA A d E (0lWAC J~ $\{OAC B E C W3AC n ${;AC n E C W AC F  WAC C ,,XAC P= A  @[AC C d\ AC F 4H{AC M A 4 E 4~AC M A 5 E ,JC J ,$hrAC M; A ,TAC P A ,HAC Pv A  aAC BZ $`AC Ft A  ,ZAC Bv A $$hAC Em A $LAC G A $tAC G ,03AC P A ,@oAC M8 A @ AC A 0 AC A <  AC A $\AC F (AC J ,| AC P: A < AC E ,,+ AC P A 0, AC A <P[ JC I| A ,<^AC M A $lAC  A o ($AC M  AC  A ,8AC M} A Dh]AC h B G E G E G E G B A (hAC LR A (`hAC LR A (hAC LR A (4hAC LR A (`,hAC LR A (p-AC L A ,tAC M A (gAC KR A (8AC J @@AC ; <`AC M E  A v B <LV JC I A 0lZAC M A  AC G  8DAC G  \AC G 'AC B` ,AC P A ,IAC M2 A $AC G A ,(dMAC M A  X<AC Fq 0|JC J $,AC F E A $AC z (AC Ju ,$AC M A ,TAC M A $AC G] A (AC K, A SC m ƴDAC b A F A F A F A F A F A F A F A F A F A F A F A F A F A F A F A F A F A F A J (<.AC J ,@AC Mi A , AC Mu A < AC A \AC Q |AC Q AC Q  AC C | AC D l AC D  \AC O $@\AC G A ,h%AC M A 'AC A  AC H ,AC Fy A l dAC A $(TgC  A (PLmAC y A W A ,|BAC J A )AC Bb AC A AC A $ XAC V A x (4>&AC Gy A $`3AC ] A L *AC a ,sAC M* A ($`AC KE A X AC A 4$%AC M B N A 4\ AC Jq A g E , *AC W A G A A  6AC Bo , AC Pj A $HGAC | A A  <pKAC BD $`3AC f A C $?AC r A C  oAC U A ,AC MX A JIC B AC H 4@|AC Jf E N A x4JIC B$hAC C A (AC CW A f (dAC J ,HAC M~ A ,D#AC G  E H 0tAC J A V  AC Bz A P*AC C  AC [  AC H ,'AC b L AC D (lKAC W A W A )AC A )AC A t)AC A (AC I A ($8)"AC L A P<*AC W p<*AC A $,*AC N A G 0$*pAC FV E J `* AC A  P*0AC Bi $,`*MAC B A ,T+AC B` A [  +(AC BZ A  +AC R A $+7AC l E A ,AC E + AC H 4+AC E $T+zAC Ch A (|0,AC I A (,AC Fb A  X-`AC l A  -SAC _ A - AC A  <-YAC CQ $`-iAC G] $D.iAC F^ .DAC B} $.AC F ,D/nJC BQ L W 0(/kAC IQ E  A  \0jAC Cb ( 1XC Fz 1DAC B} 12AC m  1AC  A  2~AC G A ,43AC M A ,d3AC Mp A 7AC ~ 8AC  (x8AC M (,:AC I A $,;=AC G1 (T<AC I A \>)AC Bb ,l>yAC n A @ A D $>AC G A ,t?AC M A ,(4AAC M A XBAC  ,xCAC S A | A U  DdAC Z A  PDlAC b A  DlAC b A DAC W 4DEAC @ TEBAC } $tHE=AC f A Q $`E=AC f A Q $xE=AC f A Q $E=AC f A Q $E=AC f A Q $<E=AC f A Q $dERAC F A F FAC   FAC v A  FAC ~ A  HGAC ~ A $G8AC \ A V $@GYAC } A V $hHSAC w A V , IAC D  0><JC B E T>ȴ^AC CV x>%AC \ (>(AC Gs A ,>AC M& A ,>ȹAC M A $?5AC Bn (D?8AC CK A  $p?AC  A C ? AC E ? AC E ? AC D ? AC E @t AC E 8@dAC Y $X@dAC G @)AC d (@HAC J9 @AC I ,@AC M A ,A}AC M  A $LAAC  A C ,tAAC M A Ah AC A (AX AC J ,A<AC M A  B AC D ,@BAC M A $pB|AC  A C BDAC O (BDAC Gy A B8AC C C(AC C $C AC D DC AC D dCAC K CAC Z CAC Z C7AC Bp CAAC Bz  D8?FC Bn E (DT?FC Bn ELDp&AC a lD AC D DpAC A (D`AC G A  DTMAC CE DAC K E7AC Bp $AC M A $WDAC  A ] WE AC A (WxEAC G` A (XEyAC FP A $,X@FHAC Cz A TXhGC t ,tXXHOAC P A (XxKAC M  XLMpAC Ch XM AC A YM4OC a  4YM}AC Cu XYN4OC a  xY$N}AC Cu  YNQC A Y R3AC .  Y,SeEC Ao (ZxUAC I (0ZXAC L A \Z\AC H |Z\AC A Z\ AC F Zp\ AC F Z`\ AC F ZP\GC [D\ AC C (8[4\AC F A $d[\MAC FB ([\AC Cy E D $[T]AC F  [,^AC Cw $\^AC F ,,\@_AC J A \\` AC E |\`AC O \`JC G $\`AC ] A  \ha AC D ]Xa AC D $]Ha AC D D]8a AC D (d](aAC N A (] dAC I A ,]dUAC JA A (]f,AC J ,^gAC Pp A H^kHcC a ,h^lnAC JM A (^TmAC F A (^nAC K A ,^pMAC J A ( _swAC Jh (L_tAC J $x_vAC F _vAC Q _vAC E _vAC E `vAC A  `v$AC [ @`vAC U (``vAC K} A ,`PwMAC J A  `pzDAC C| (`zAC G A $ a ~kAC F\ 4ah~!AC X 4Tax~ AC P E h A ,aF0AC P,0 A $a0AC Z A v  ahAC ^ A bAC   (bAC H HbC t $hb4AC f A  ,b<AC M% A ,b AC P A 4bAC Po A  E ,(ct]AC P A Xc AC [ xcAC  cd3MC \ $cOAC F@ c(AC _  dvAC l A  $dvAC l A  HdtAC  A ld3AC . d AC U d !AC X $d0C Cw dAC K ,eAC P A $De8AC G  A ,leoAC PU A eAC H ,eKAC P1 A (e AC J ,fAC P A ,HftAC P A ,xfAC PO A (fD aAC IS f;AC v (fAC I , g\AC P A ,Pg AC P A g-C p (gLAC I A ,gAC P~ A ,g@AC P A (,h"AC K A Xh"(AC c ,xh"AC P A ht&AC Q ,ht&AC M A  h$'AC C i'7AC r  AC y T>AC y t7AC r 8AC I 87AC r ԁXAC G H AC F <8AC Gg E H E Q E Q (TUAC JF  AC A AC  LAC  AC K AC I $ 9AC J% A HAC K hAC I $AC G (<TAC Ct E O 0܃p4AC M A  ,|DAC P# A ,@AC P A p<*AC % LgAC b  AC G  Є[AC CS ,AC M A 0$XAC Mg E  8X4AC Mg E V E  ( KAC J<  ZAC U  MAC H 8AC s , <MC J ,P,oAC M A (lMAC Fw E F (fAC GS A ؆fAC a $,LC W 4fAC a (8^AC JO dfAC a fAC a XfAC a ćfAC a  AC C AC A (GAC B H4AC o ,hAC Pu A ,AC P# A  Ȉ AC C ,!BAC M+ A $"AC K A ~  D#AC C (h#AC G` A  X$AC C (%AC G A $%AC E A  p&AC A ,,`&AC M A ,\ )AC M A ,,AC M A ,0AC M A $2AC E A $83AC E A <3+AC f \43AC n | 4AC K  4+AC f 04AC E ܋ 4AC K  42AC m @43AC n <`4AC K \`4=AC x |43AC n 4AC P 4=AC x ܌43AC n 4AC N 4AC  <`5AC E \P5AC P |P5AC  5AC E 5AC Q ܍5AC  @6AC E 06AC P <06SAC N $\p6PAC FE  60AC Cd 6"AC ] $Ȏ6[AC GJ A ,:AC P A , <ZAC P@ A ,P AAC P A ,DAC P A $M"AC H A $؏NAC E A ,,OAC Pf A ,0|VAC P A ,`,Y AC P A , `AC Pm A 0ld AC Ms E  0HiAC Pp E  ,(mcAC PI A ,X4}a AC PG A t1AC , (AC M $ԑhAC G A P"AC ] ,`zMC Gb 0LeAC M# A /AC Bh AC O AC A AC C ܔ AC F  ̔"AC Y $@ܔ!AC S A H $h!AC S A H  AC F ܔ AC F Г̔ AC F  AC F AC A 0 AC F PAC A p| AC F lAC A (\AC Cx B D (ܔAC G A  tAC L A  ,pAC J A  PlAC J A  thAC J A  dAC J A ` AC A  ܕPAC J A $LNAC BB A (t&AC a (HAC G A 0t(AC Jf A   qAC Bj ,̖AC Cc E h A (AC C E h 8($cAC CS E D A q E D dX"AC B[ (hqAC G& A 8cAC CS E D A q E D AC C  AC C ,КAC C  LqAC Bj 8pcAC CS E D A q E D $P)AC W A   $ԘXAC _ A   AC B  l*AC e (@|AC CY E d (l}AC CL E d 4AC O 4AC L ؙ4 AC [ 41AC l T"AC ] 8dAC O XdAC U ,xdAC } A F A C 4ĞlAC U A C A F A F  AC  A AC S ,$TAC M= A TȠ AC C  tAC  A $DAC G   eAC B^  XAC B  ԢqAC Bj ,0 AC A 8L cAC CS E D A q E D TAC R (TAC Mo ԜAC T AC T  qAC Ci $8AC G `AC Q  lAC Cd $AC G ̝AC T $GAC B A $AC F A A  <7AC Co `̨ AC A AC L 2AC m $ܨ%AC K A T  AC H ԨAC V ((ԨAC N E  A  T8AC U A xAC T ,AC M A ,ȟAC P A AC K AC N ,8AC J A ,hdDAC P* A (QAC L; A 0ĠAC Jy A P  drAC Cj $AC F DxAC A dhAC A XAC A HAC A ġ8AC A ,(tFC G^ E,xFAC M A (DAC Jz 0pAC Gp E M A $JC B E ̢AC Z  WAC CO  ȸYAC BR $8AC I A C  `EWC BZ (KC I (KC L  ģ()FC BX E,4jFC GT E(tiFC FT EPDFC Gq C  A $ddqC Cf 4NC Cr C B (tAC Je ,$XCLC M& ,Tx,AC Q A Q A C $x\gC Cb  AC D ̥%NC P 4{FC CS C X ($AC M PAC U (p%AC J  YAC BR $AC I A C KC I  )FC BX E, AC D L%NC P 0ljFC GT E<RC @AC I  ܧ0AC N , AC [  ,6nC (@LAC G A $lAC C A  EFC Bt E AC C ܨ AC E (AC t B a A  (XAC BQ (LP`AC ] B a A  xXAC BQ (`AC ] B a A  ȩQAC BJ (0`AC ] B a A  dQAC BJ (<cAC ` B a A  hQAC BJ ( `AC ] B a A  TQAC BJ (ܪAC G% A ($cAC ` B a A  4hQAC BJ $XAC C A  \QAC BJ (AC C A ЫLAC W (LUAC BC E F AC K $<pAC S B C (dh]AC CO B D ,]AC F B _  EAC C} $ AC V B C $ 0AC h B A $4"AC X B C $\%AC [ B C  AC D AC C  ĭ{mC Bx A DAC C  $ @AC G $4AC G \5PC a |5PC a 0AC V 0AC V ܮ0AC C  AC A AC A <AC A \AC A |AC A 5PC a 5PC a  ܯ AC D  KAC CC $(AC E DAC E  d9AC Cq ($AC J $AC G ܰ!AC BZ  8AC Cp  *AC Cb D<AC Bu  dMAC BF 4,AC Be  D6AC Cn ̱`!AC BZ ,phAC FD E T ,OAC J} A L?ZC a  lAC V (AC GO A (pAC L A $dAC FY (,AC FI A (<AC Ly A h$6QC a D>YC a dAC E ȳTAC E $D$AC R E H L%JC Q (0\AC C} E  $\hAC F] !AC BZ  8AC Cp  ȴ4*AC Cb @?AC Bx  `PAC BI (0|AC I8 A 4\w AC P[ A g E ,(AC P A (ĵAC I A ,|AC M A $  "AC E A (HAC I A $tAC G~ A (AC I A $ȶ(AC G A (AC F A ,0AC M A (L aAC GU xdAC X $dcAC GW  8AC Cp 'JC V IC R $-JC \ D3MC b d/MC ^ @MC o 8/MC ^ ĸH-MC \ XvMC e yMC h $%JC T D(+AC f d89dC Q XBLC r MMC | Ĺ\MC K (eAC JV ,<AC J A $@ wAC Gf A hd AC [ (dAC I  FAC | A ,غAC M A $oAC E` A (0NC  F E $\`DAC G3 A bJC C (AC I A л[LC K $ AC G A ,AC M A ,HAC M A (xAC Ix A $HAC G A ̼ BLC r P_LC O ( QJC v D I ,8#AC M  A hAC I ,'AC M8 A AC F $ؽAC G  AC O   iAC Ca DX8AC s $dxAC M A E (pAC Gp A ,AC M A (AC J XKC O 4X$GC T (Th3AC C` E F (|3AC C` E F (3AC C` E F ,ؿqAC MZ A $ \AC EM A (0, 2AC M  (\@ vAC Ic A $ AC G $\ nAC G] A $ AC G L 5AC p $ l cAC ET A $H gAC EX A $p AC G A $tyAC Ej A ${AC El A $$AC E A $AC E A 8T5VC [ XtAC F ,xdbAC MK A AC F 4AC Ms E v A (,AC I A $,AC G A ,TAC M A ,HAC MX A  cXC Bo $AC C A  WcC A/ $$YAC EJ A L@#AC B\ 0lPAC I A _ E ,AC K A $loAC G^ A $AC G , <"CAC M A ,P\$>AC Mf A (l&AC M  'vAC Cn ,L(AC J A  <)9AC Cq $$X)|AC Fq $L)AC G  t*7EC Bf $*7EC Bf  *]AC CU $*AC G $ p+AC R A C 4h+AC O (Th+{FC Fk (+{FC Fk $,AC BZ A $h,AC BZ A $,AC BZ A ($-AC G A $P-AC C A (xT.OFC ] A d x.FYC f  .~AC t A  /yAC o A ( `/OFC ] A d (8/OFC ] A d d/ AC D / AC F / AC G x/ AC G h/ AC G X/ AC G $H/ AC D D8/ AC D d(/ AC F / AC E / AC E . AC G .AC L . AC F $.+AC f D.>YC _ (d/QFC b A a (AC Cv ,8WAC F E   L:DVC B\ $p:AC G A 0;8AC KV B  A (<FC q D F $d=AC G A , =,AC M A $P>yAC Ej A (xD?AC K A $?AC F A x $P@AC G A (@~AC Ik A $ AC Bw (dAC Bx E F ȅAC O ȅ AC D  AC E 4 AC E T AC D t AC E x AC E hAC Y h)AC d $xVAC FK (lC G ,HT* AC P= A  xTAC C ,.AC J A AC F  1FC B` E̐AC X $0̐PAC FE ,XAC P@ A  TWAC M A ,AC P A $`AC Gw A  șKAC A A ,(AC M A  XAC B |`AC Q `AC E PAC E  @AC C  ̛AC C $$XfAC JR A L=AC x $lAC GX A $AC J A $@AC G A  ȠyAC Cq  $AC B{ ,QLC a LС AC F l AC F  !AC BZ ,AC P A (hAC IZ , k5AC PQ5 A (<04AC M" hDAC   LAC F} (fAC JS ,D_AC PE A ,taAC PG A ,8AC P A ,hd AC P A TAC H ,TAC P A ,$AC P A ,AC P A (Ht> AC I0 (tiAC IV A ,AC P A , UAC P; A ,)AC P A  0IHH H?EtfDt[I7@tEHHDLHaK)9stMS\HU$H6JLHMtADcPLH[A\A]A^A_]1HKHHEȅHOUHAWAVATSHIIHWHD"At LbMu MAMtEIHL)HL9|vA I~EftHRHHLMIHI9IwH uV LIqII@u!L[A\A^A_]H߾LxIHHLH[A\A^A_]hHOHtHAUHAVSHtɍ ɃIHH1HBH9HGH ʀHPH t#HJHt!ʃҍ҃IH4H Hu1HcHIHtHHɍ ɃIHHHPHHÃIHHLHW H[A^]yfUHAWAVATSIHHvHt HFHu #HtH{HCHHH3I9IFH ʀL{Mu#HCHH菩IHCIFHL% LEIt$Ht HFHu HtIIGHHuh\AD$tAGIt$@tIHHLIv@u [A\A^A_]HHHH[A\A^A_]HHH?8fff.UHH]H9t!UHAVSHILH[A^]jfUH]UHHGHNHOHFHFHOHNHGHGHNHOHF]f.UHH=H5nH]qUHHHHDHGHTHHGHR~HG]fff.UHAWAVATSIHLgHGH HLWGHv@tHHL4H}IIvHt HFHu HtI$HHuLbAFC[A\A^A_]Hffff.UHSPHHGu HHuH{Hu H{MH^}HHH[tHtHHH[] H[]UHAVSHHGu HHuH{JHu H{H|HHLsAtItLLH[A^]fUHw]UHSPHHCCuH[]HHH[]UHAWAVAUATSH(HILH}HGHEIMHEPL;+|EuMeEy0A$AAƀx IMLDIA֐DteAI$Ml$HyAM4ƀaLHeIQffffff.AuJIHHH}IHLHHIE&AtTIvSfEDI7@HHDLHEIHDI~HxRIAIDHDCD)9stxS\HI`HH6H}HHuH5UwH}HuL HPMtADsPMLH([A\A]A^A_]E1HCHIEŅLOf.UHAWAVATSHIIGt`L93=AAFwIDHcȉ€ʀAVHANI=@r%HANHAIH?HwIHtHwHuHtztHHH TDIWHD"AtLbAIQHL)HL9|BAI~EftHRHHLMII@u!L[A\A^A_]H߾LIHHLH[A\A^A_]oHLIAGUHHOHt#HAHt!ɍ ɃIH4H Hu1HcGHtHHIHHHW]UHAVSIHHvHt HFHu HtH{HCHHu7zAFtCIv@u[A^]HHHH[A^]HUHH ]UHAWAVATSH9tyHILgL^AGMwAGtLHsHt HFHu HtIHHu>L輿CtAGHs@u [A\A^A_]HHL[A\A^A_]H뽐UH]UHHGHNHOHFHFHOHNHGGNOF]fffff.UHH=uH5NHw]QUHHG(]fDUHHG0]fDHOHHu8UHAVSHuH_(HtIHHLHG([A^]HUHAWAVATSLcwM~%HL E1fK|IM9uC[A\A^A_]fffff.UHHHHDHGHHHwWGG(G8]fUHAWAVAUATSPIHHGHHWGHG D~EtJLcMn ILD;HK DD+CLHLDWD{D{HC D98}D8C(C8Iv@tH{HHL-M9M~(M u'HC(HEп@BIHLTHEL M9Mv0M u( IHL>nL{0H[A\A]A^A_]ff.UHSPHHGu HHuH{KHuHNHsHHH[tHtH虼HH[]雼H[]@UHAVSHHGu HHuH{HuHݧH~sHHLsAtItL'L/H[A^]#UHw8]UHAWAVATSHLcwM~"L{ E1fK|I#M9uCHCHHHHLsHu%L{(MtL蕚L蝻IȃHHC(HHuxL{0MtoIGu HHu"IHuIXIOH`rHIMgAtItL LL IHC0HtL[A\A^A_][A\A^A_]HH HuMUHAWAVAUATSHHIILHGHEL}+/HIF0HLźIHL;+E}MeEy2A$AAǀx IMLDiIADtLA Iv(HlHEHHHBHIF(EDAIv0H)HEHHHH?ff.A"uzIML}IF HtIcN;}QAVHtI?踺LH轺HIHL|HIH9A}"tff.EtoDtdHEH0@tAHHDLHMSCD)9stUS\H@ID*H}yHH?Mt AD{PME1LH[A\A]A^A_]HCHIEŅLOUHAWAVATSHHIL=L9Hw(H ʀuV8HI軸M9Iv0H ʀuV HI葸E~Et/E1DIN McJtV HIaAE9uI~@u [A\A^A_]HHHH[A\A^A_]kff.UHAWAVAUATSPHLcwHG LxHLDMt@N$E1fff.KUHAVSHIHuI~lAFt I~LH[A^]fff.UH]UHHGHNHOHFHFHOHNHGHFHOHNHG]f.UHH=H5H]UHH]HOHHu>UHAVSHu"HHtIH]HeLHLJ[A^]Hffff.UHAWAVATSLcwxM~5HLE1ffffff.K|ICM9uCx[A\A^A_]fffff.UHHHHDHGHHHwWGHw(G0Hw@GHHwXG`HwpLJGxƇH`HHH]fUHAWAVAUATSPIHHGHHWGHG D~EtJLcMn ILDKHK DD+CLHLD?WD{D{HC D98}D8C(HC8E~0EtGLc(Mn8ILDHK8DD+C0LHLDD{0D{0HC8D98}D8WC@HCPE~HEtJLc@MnPILD荩HKPDD+CHLHLDWD{HD{HHCPD98}D8CXHChE~`EtGLcXMnhILD.HKhDD+C`LHLDD{`D{`HChD98}D8WCpHǃE~xEtSLcpMILDɨHDD+CxLHLD WD{xD{xHD98}D8L{LǃƃIv@tHHL\H ^IMIHtHFLHt!IHHL聦H ]I $IHtHFLHt!IHHL6H ]IMIHt HFHu HtIHHuuLH{I9MM u@֥IHLLAH[A\A]A^A_]HHFHffffff.UHSPHHGu HHuH{HuHΑH\HHH[tHtH9HH[];H[]@UHAVSHHGu HHuH{zHuH]H\HHLsAtItLǤLϤH[A^]äUH剷]@UHAWAVAUATSPHLcwM~/L{ E1ffffff.K|IcM9uCHcC0HHK8u1ҨHDuifqƁ1f.fHH9tH|ufLGAHGH|tLGAHGHHH@C0HcCHHHKPu1ҨyHDu`fhƁ1fHH9tH|ufLGAHGH|tLGAHGHHH@CHLc{`M~PLchE1fff.IM9t,OtI~ I~AFtILnC`LcsxM~%LE1fK|IM9uCxH諢H蟢H蓢HCHHHugLsHu"LMtL5L=IHǃƃHtLH[A\A]A^A_]H[A\A]A^A_]H fDUHAWAVAUATSHXHIIHGpHEHGXHEHGHEHHEHHEHHEHG@HEHG(HEHGHE0IuI}H:IAIDH*I苡L;;E/MgEy=A$AAŀx IMLDTIAfffff.Dȃ H "HcHA IHu%HEHHHHIHLA SI$M|$Hy#A4ƀIMHAA:HEHHHH}IHLH~HcIEeApDjAIMfIF HtIcN;}QAVHtLeI<$賀LHHIHLןHIH9A?tAB&HEHHHH}螟IHLH蝟HIEeA+Iu'AJHEHHHH}@IHLH?H$IEeAIuA*jIMff.IF8HtIcN0;}QAV0LdXHEH8Ht$1HH HJWH@H}H蜟IILLHxH]IE,$AtIt$f.DI|$Hx7IAIDHL9;VA?*GH}HHuH5ow#H}HuA28IMfIFPHtIcNH;}QAVHLdXHEH8Ht$1؞HH ~HJWH@H}HlIILLHHH-IE,$AtIt$f.DI|$Hx7IAIDH^躜L9;&A?2H}HHuH5knwH}HuAZIMfIHt$IcNx;}QAVxHt)fffff.LeI<$cLHhHIHL'H,IH9oA?ZtdARuzIMff.IFhHtIcN`;}QAV`HtLeI<$}LHHIHL觛HIH9A?RtE{DlHEH0@BHHDLHxIHEDI}HIAIDHd^DI}HIAIDHSM5L譛IHA눋CD)9sS\HI H6 H6H6OH}HHuH59lwH}HuH}HHuH5lw蝚H}HuH}HHuH5kwyH}Hu H}HH?Mt ADkPME1LHX[A\A]A^A_]HCHIEDžLOf.UHAWAVAUATSHIIHoH9H}HH uV8LMљIHEЋXtIE1Lmfff.IE McJtV8LM葙IAD9uAu&LmAtM9<$$A AGILeLeIcD$0HEHE1ID$8NlAuMe@tI}Auffff.LH ;A]tI] f.HOHEHL)HH9|8I$HD"AtLbAIZIEL)HL9FA:IEgtHRHHLMILeI$HtHwHuHtHHH _I$HD"AtLbAIIEL)HL9ABIEgtHRHHL-MILeI$HtHwHuHtHHH .NMƕI$HD"AtLbAIIEL)HL9AJIEgtHRHHLtMILeA\$`t;E1ffff.ID$hMcJtV LMIAD9uA\$xt:E1fI$McJtV LM͔IAD9uI|$@uZLH[A\A]A^A_]LLŔILL譔IoL L蕔IHHLLH[A\A]A^A_]|LL豔IHEfffff.UHAWAVAUATSPHLcwHG LxHLDMt@N$E1fff.KLeMIT$LLIIlIA4$@uII $La(LeLmHEML-%L}9f.At$Md$LLfzLLeMIT$LLIkIA4$@uIHHLHAzIt{@UHAWAVAUATSH(HdH%(HELwL{L{XDcXL{Hs`HCpH9vAHSxH HuH~DH9t#HLHHtHHH; uH HI(H1HML}HELIzMLL}HH4ƉHzHJAAADȉȃIȃLcH,3yɍ ɃIHLỈȃIIL LyHUHtHcB(H_1lLLyLDLyHHH9vKHH Hu"H~fff.H9t#HLHHtHHH; uH HI(H1MHMLmHELI9yMLLe@HH4ƉHzHJAAADȉȃIȃLcH,wɍ ɃIHLIΉȃIILLxHUHtHcB(H_1lHHLfwdH %(H;MuH([A\A]A^A_]sxUHAWAVATSIHLLfLxLxH{XIvXʩLxLMLwLwHI葩LwIv@u [A\A^A_]HHHH[A\A^A_]f.UHH}]H9t!UHAVSHILH[A^]fUH]UHAVSHIHGHNHOHFHHvwIFXHKXINXHCXIF`HK`IN`HC`IFhHKhINhHChIFpHKpINpHCpIFxHKxINxHCxIHIHIHvIHIHIHIHIHIHIHIHIHIHIHIH[A^]f.UHH=H5^Hg_]tUHHGH*HGHGG HâH]fffff.UHHwH*HGHGG HH]fUH]vgfDUHH=UH5.^H^]1tUHHHHDHOH HHwWG G0G@GPH HOHGXHG`HGhHGpH )HOxHHHHt]HHHH]EtDHu;H`t4UHSPHGXHH蚝HHu Hx$rHH[]HtfUHAWAVAUATSPIHHGH͡HWGG(G8GDHHGHGXHG`HGhHGpH(HGxDEt_LMILDrHDD+LHLD訠WDDHD98}D8HǃEEt\LMILD=rHDD+LHLD+DDHD98}D8L{LcXHǃǃIv@tH{HH֝MnLjrLbrIvXL6L^rIHH[A\A]A^A_]ÐUHSPHHGuH H{耓HH{`Ht+uH{XIHu H{xoH{qPuH{XHu H{xoH{qH&HHCH'qHqH&HHH[tHtHFoHH[]HoH[]ÐUHSPHHH[]$o@UH剷]@UHAWAVAUATSPHLwL MtAMcgM~8LuMoE1fffff.K|IHP M9uAGLuH{XLpHcHHu1ҨtyHDu`fhƁ1fHH9tH|ufLGAHGH|tLGAHGHHH@ǃHcHHu1ҨHDugfoƁ1fHH9tH|ufLGAHGH|tLGAHGHHH@ǃHǃCuH[A\A]A^A_]HHH[A\A]A^A_]鱚UHAWAVAUATSH8HIIHHEHHEHGHEHGHEIMIfffff.L;+EeM}Ey9AAAĀx IMLDlIAff.Dȃ?H \"HcHA %IMffffff.IHHuLaH{IH9IA} t=A*IMffff.IHt$Ic;}QAL|_HEH8Ht$18mHH ލHJFkWH@H}HlIILLHkHIE'AtIwfff.DIHx8IAIDH%kYL9+*A}*H}HHuH5UHAVSHIHFI~FAFt I~BHLH@BOff.AIvH"IHHH?ffffff.AujIMIEHy4ƀx{ILHAF H;K)9sS\HAtEtYDtNI7@t8HHDLHaA`LAHAF KH6LnHMtADcPLH[A\A]A^A_]1HKHHEȅHOffff.UHAWAVATSHIIHWHD"At LbMu QAMtIIHL)HL9A I~EftHRHHL@MIH7I9IwH uV LIm@IA tL93vRAAFII@u!L[A\A^A_]H߾LZ@IHHLH[A\A^A_]J@HL@IAG DHOHtHAUHAVSHtɍ ɃIHH1HJH9HGH ʀHPH t#HJHt!ʃҍ҃IH4H Hu1HPH tHJIHtʃҍ҃IHHHHP H]?HÃIHHLHs HDHW$[A^])?fUHAVSIHHvHt HFHu #HtH{HCHH=HI9IFH ʀu;H{HuHCHHu];&HHCIFHH5HEA~ tC Iv@u[A^]HHHH[A^]jHhH?fffff.UHHms]H9t!UHAVSHIiLH[A^]fUH]UHHGHNHOHFHFHOHNHGHGHNHOHFG N O F ]fff.UHH=]H5~'Hw(]=UHHG(]fDUHHHHDHGHDoHHwWGG$]UHAWAVAUATSPIHHGH oHWGHG D~EtGLcMn ILD+=HK DD+CLHLDoD{D{HC D98}D8HC(C0Iv@tH{HHhHI9Mv(M u9;IHLL{(H[A\A]A^A_]UHAWAVSPHHGuHHH{s^HH{ tH{u H{C<HTH9thLs(Mt_IFu HHuI~"^HuL'HHIM~AtItLo:Lw:Lo:HHHH[t HtH::HH[A^A_]8:H[A^A_]ffff.UHSPHHH[]:@UHw0]UHAWAVATSHLcwM~"L{ E1fK|I#M9uCHCHHHLsHunL{(MteIGu HHuI\HuL%HHIMgAtItL19L99L19IHC(HtL[A\A^A_]f[A\A^A_]H [ffff.UHAWAVAUATSPHIILHGHE&HIF(LHH9IHL;+DEeIMEy.AAĀx ILLD8HAԐDtEA Iv(HyHEHHHĮQH?I@AuzHI@IF HtIcN;}QAVHtI? LHM9HIHL 8HIH9A}tff.EtmDtbHEH0@tAHHDHH7CD)9stSS\H7I(H}IeLHHt ADcPIE1LH[A\A]A^A_]HCHIEŅLODUHAWAVATSHHIH =H9Hw(H ʀuHIH7E~Et6E1fff.IN McJtV$HI7AE9uI~@u [A\A^A_]HHHH[A\A^A_]7ff.UHAWAVAUATSPHLcwHG LxHLDMt@N$E1fff.KNH?H?fffff.UHH-W]H9t!UHAVSHIYLH[A^]*fUH]UHHGHNHOHFGNOF]f.UHH=UAH5. HW ]1!UHSPHt8H1"HX8H@H HHH@(@0H HHHHHH H[]ff.UHSPHt(H1V"HX(fH@H HHH@@ H ~HHH[]UHSPHt H1!HX H@H 'IHH@H %HHH[]f.UHSPHt5@H1!HXH {IHHXW@@(@8+@H@H GIHW@@ @,H[]fffff.UHSPHt(H1!HX(&H@H IH@ H FHHHHH[]UHSPHtiH1 HXH IHHXW@HX(@0HX@@HHXX@`HXpǀ@xƀZrH@H sIHǀW@@ @0@@@P@`@pH [HHHH[]ffffff.UHSPHtH1HXH@H gIHH@H[]DUHSPHt(H1fHX(vH@H HHH@@ H IHH[]UHSPHt(H1HX(H@H GHHH@@ H IHH[]UHAVSHH1IHXHcJII^WAF AF0AF@AFPHCRIFIFXIF`IFhIFpHIFxIIAAAAdžH RIIdžIdžIdžIdžIIAdžH1HLLH@H UIHW@@(@8@DH @QHHH@XH@`H@hH@pH HHxHǀHQHHǀHǀHǀHǀHHǀǀ[A^]UHSPHt(H1HX(H@H HHH@@ H HHH[]UHAVSHH11IHXHCIII^WAF AF0AF@AFPHsQIFIFXIF`IFhIFpH@IFxIIAIAAHxHLLH@H HHW@@(@8@DH PHHH@XH@`H@hH@pH HHxǀ[A^]DUHSPHt H1HX H@H wHHH@@H[]fffff.UHSPHt10H1vHXH HHHXW@@('0jH@H HHW@@H[]fDUHSPHt(H1HX(H@H HH@ H 6HHHHH[]UHSPHt(H1HX(H@H 'IH@$H@@ H HHH[]ffffff.UHSPHt.8H16HXH kIHHXW@@$.8-H@H >IHW@@ @0H[]fUHSPHt10H1HXH IHHXW@@('0H@H kIHW@@H[]fDUHSPHt(H1VHX(fH@H IHW@@ H[]UHAVSHHMH{DH{ ;HH9tH[(HtHCuHHu[A^]H{e8HuHHH HHLsAtItLLH[A^]UH勇]UHG]UHAVSHH tH{u H{HH9t5Ls(MtL2L:H[0HtHCuHHu'[A^]H{7HuH{H{ HHHLsAtItLLH[A^]UHG ]UHSPHH tH{u H{H{(H{@H{htH{Xu H{XHtH{pu H{pHAH5H)HH9t"HHtHHH[]H[]UHG8]̋F UHAWAVSPIHtRL{HCHHLILHKHHLH7K AF tXHCHHudH{u 0HCILH{HtOHp@tHHHg?K H[A^A_]H6VHvH?H{uUHAVSHIHI~HHHHfEL[A^]UHAWAVAUATSPHLwMt1Mc~M~(MfE1fDK|IHP M9uAFH{H6HH[A\A]A^A_]6UHAVSHPIHdH%(HEEHEHH HtcIHWELuLuEHEH}HHXH}HtHWHHu]Ht`EtH}IL1CX{Pu2KMK!HcHKH dH %(H;Mu)HP[A^]1H HuHIQLEuRUHAVSHPIHdH%(HEEHEHH HtcIHWELuLuEHEH}HHbH}HtHWHHu]Ht`EtH}IL1CX{Pu2KMK!HcHKH dH %(H;Mu)HP[A^]1H HuHIQLEuRUHAWAVATSH IdH%(HEL&L'MILH=J 1HHH HIFW)EHEIwIOH9IW HHuHfH9tzHDHHtHHH;tHEL}HM"IFrHH@(HEL}HMHt0L}ffff.HHL1_HEHuM&I^J4Ĩ?H~MIDHUHAcdH%(H;Eu H [A\A^A_]UHAWAVAUATSPLIIAI9BAwIBAɀAAEgI=@r#DfDAOAIA?AwtFɍ ɃIAV׃UHAWAVSHHHIdH%(HEHIL,|uHHHM)EHPHpH}7H}HUL1-L}LMuH}HULw)HEHHEt H}]MdH %(H;Mu HH[A^A_]hUHAWAVSHHHIdH%(HEHIL\{uHHHM)EHPHpH}E6H}HUL13,LuMtIFHEt H}MdH %(H;Mu HH[A^A_]UHAVSHHdH%(HEHzuHHHM)EHPHpH}5HHHH}HUH1r+LuMtJHEHMLuHEHMHMHEHEHEHEH}CHEHD$(E$HAEt H}MdH %(H;Mu HĀ[A^]UHHH HH;]UHAVSHIHHxHHH9vHHp HHu"LGfff.I9t#HTHHtHHH;uHHR(H1H3HHFHNIHLH[A^]UHSPHHHWH@H[]UHAVSHI~IHHHLH[A^]gzUHAVSHIIFHHKHINHHCHIFPHKPINPHCPIFXHKXINXHCXIF`HK`IN`HC`IFhHKhINhHChIFpHKpINpHCp[A^]UHSPHCHH[]UHAWAVATSHLwMtWAFIWH@LsMc~M~#MfE1K|IHP M9uAFW)EHEHSPHC`H9vbHKhH4HuHfH9tDHtHHtHSHHHH;4tHuHUHEOHHI(HMHUHEHu7dH%(H;EH([A\A]A^A_]DH}?H}tHsH5H=.PIHPut HH1IH9tfLLK}LeAO IGHHWLHL9A $upIL$HHA$IFHtHc0A;v }AN9}HcHLHLANHcL|AFIFcfffff.It$IEt$IET$HIT$It$Hfff.LmAO IGHHuxMgMu JIIGIEI9AD$M|$AD$tLAE(tAD$Iu @MHHL]8H6H?UHAWAVAUATSHHdH%(HELoHHH}Mu@HMLuLXH5[LH}HLH}A}IELpHLD,fff.IIEHHHHDIcEHI9tI>HII>HH}HuH H]HCL9tC(Lc C tLoAGtC(Iw@vHHLadH%(H;EuHH[A\A]A^A_]UHSPH=WH@HH[]UHH>Ht].]UHAWAVAUATSPIIHHHHPHQI~\sLAO(9t;L~ uAtH~ IADLF( uWAIGAF8F8HHHH[A\A]A^A_]UHAVSHIH>{;IHLH[A^]UHSPHBHHHH[]UHAVSH!LsHH%L[A^]UHAWAVSH(dH%(HEHL0MtfHAuIFHEA)EIVIvH},L{{( uAtH{ C( HEIG(EAILs0dH%(H;Eu H([A^A_]UHH}HH]UH]&UHAWAVAUATSH(HdH%(HELwMuUL{Mt-L1IHHwHBM>WAFIWH@LsMc~M~#MfE1K|IHP M9uAFW)EHEHSPHC`H9vbHKhH4HuHfH9tDHtHHtHSHHHH;4tHuHUHEOHHI(HMHUHEHu7dH%(H;EH([A\A]A^A_]DH}8H}tHsHmH=fPIHPut HH1IH9tfLLwLeAO IGHHWLHL9A $upIL$HHA$IFHtHc0A;v }AN9}HcHLHLANHcL|AFIFcfffff.It$IEt$IET$HIT$It$H fff.LmAO IGHHuxMgMu ZIIGIEI9AD$M|$AD$tLAE(tAD$Iu @MHHLm8H6H?&UHSHHHdH%(HEHHH+muHHHM)EHPHpH}(H}HUH1[H]Et H}HdH %(H;MuHH[]UHAWAVSHHHIdH%(HEHIL|luHHHM)EHPHpH}e'H}HUL1cZL}LMuH}HULUHEHHEt H}MdH %(H;Mu HH[A^A_]UHAWAVSHHHIdH%(HEHILkuHHHM)EHPHpH}&H}HUL1YLuMtIFHEt H}MdH %(H;Mu HH[A^A_]UHAVSHHdH%(HEHjuHHHM)EHPHpH}%HHHH}HUH1XLuMtJHEHMLuHEHMHMHEHEHEHEH}eHEHD$(E$HocEt H}MdH %(H;Mu HĀ[A^]UHAVSHIIHHHLH[A^]sUHAWAVATSHLwMtWAFfIWH@LsMc~M~#MfE1K|IHP M9uAFW)EHEHSPHC`H9vbHKhH4HuHfH9tDHtHHtHSHHHH;4tHuHUHEOHHI(HMHUHEHu7dH%(H;EH([A\A]A^A_]DH}'cH}tHsHmH=fPIHPut HH1IH9tfLLuLeAO IGHH!LHL9A $upIL$HHA$IFHtHc0A;v }AN9}HcHLHLANHcL|AFIFcfffff.It$IEt$IET$HIT$It$Hfff.LeAO IGHHuBMoMu :IIGIM9L^LLhnH6H?,UHAWAVAUATSHHHdH%(HELoHHMu@H MLuLXH5lLH}HLH A}tIELpHLD"fIIEHHHHDIcEHI9tLI>HII>HH}HH&OLeIM9tL]LLgdH%(H;EuHH[A\A]A^A_]UHAVSHIH>K`IHLH[A^]UHu]H]UH UHAWAVSPHHHHOHHHu]H{GLsMtKM~AtM~LM?H6HIAtItLLLHHHH[t HtHHH[A^A_]H[A^A_]HHuTUHAWAVATSHHHu HtH OHHuHt?IIH{H;{tLsLI>Ht H{.H&1LL1H[A\A^A_]UHAWAVSPHHHHOHHHu]H{LsMtKM~AtM~LM?HƒHIAtItLsL{LsHHHH[t HtH>HH[A^A_]<H[A^A_]HHuTUHAVSHHHHOHHu]Hu"H{LsMtL=YLHHHH[tHtHH[A^][A^]HHuUHAWAVAUATSHHHuE1"fff.LHHCIH9PHC N4MtLHL;4t0JMf0HL1MMuIOHMJDHC JM.IFHEfDHLML;}IEMe(HLHtDIH HuLzI9LuLHtfff.HHHuHJH9HuM9.uIINI~LI~aL)T@H{(uIvL-LHM1HHCH[A\A]A^A_]HtoH(uhUHAVSHLv AtLs L$M6HҏHHCAtItL~Lt H{xH[A^]lUHHHHtHFHHtDHHHuHJHtHBHAE1H9tHHHB"HBA1H9uHH9HBHHDJH9tALVLRE1I92AKLLIRLVLRMtIRDVDRH9HDEHEuA]DHH1H9HHHPH9uK@AHHHQH2HqHtHNHqHrE1H9AJH HQHH9HDHAHHt yHPHt z@H@H9xPff.uK@AHHHHrH1HtHNHqHrE1H9AJHJHQHHH9HDHHHt y!HPHt z@H@xt H9@]1H8G]HPHtzt:A@HHQHHtHBHPHQ1H9@H HAHHHHAPQ@HA@HAHHHHPHtHBHPHQ1H9@H HHt yt>HPB@HHHHPHtHBHPHQ1H9@H HHHHHAPQ@H@HAHHQHHtHBHPHQ1H9@H HAHH]UHAVSHt!HH6IHsLI~t[A^]H[A^]UHAWAVAUATSHHHE1"fff.LHHCIH9HC N4MtLHL;4tPJfDL(MMtM H{(uI~iRAtI~IOHMJDHC JM&IFHEfDLL;eMID$M|$(HLHtfDIH HuLbI9$LuLHtff.HHHuHJH9HuM9.uIINI~LI~uLM\H{(QI`QA6I(H{(uIvLLHMO1HHCH[A\A]A^A_]HH?@t HHUHSPHHVHHH[]UHHH?@tHHHH;Gt]]HHUHAWAVSPHL?MIAu1Mt4 L1HH UHJW@L;M6Mu̿ W))@AI IL;L0HH[A^A_]UHAVSHHH;{tLsLI>Ht H{[A^][[A^]UHAWAVAUATSHHUIMAE)~!L'IcM<1ېLIHA9uEԅLe~&A1fff.ILefff.LmLmLLLIE0IE0IHuIF H]IIJHtHHHGH9Fv0HV H HuHHGH;FrHH; uHHJ(HdH%(H;Eu HH8[]脯UHSPHHR0H9t HC0HHH[]UHAWAVAUATSH(IHIdH%(HEH92; XSxHHH߉謯HHMfLcIGH)HL9}LHL苯HHuiLHL@LAv@t I~ AvI~H >%1'I;,;IvL.H}L'HuHIv(IFL;(HLHPHHI;IHx(IF0Mn(HLHtI~LI~0O HGHHHIt I}胫ID$IEA$AEfAFI~0HLJH,HLAG)A9wIAG)A9wIH6HI~LAG)A9wIGHHEÅHOAW\L0HHtjIHx(#HIF0AW\LHHt5IHx(HIF0AW\LƪHu H:1dH%(H;EuHH([A\A]A^A_]-UHAWAVAUATSH8DEIIIHUIGHEIGHEIGHEL}H)MM)IIIHHEMIJLILHHULI}N4MLHULI}N$MLHUHMLHLLeLIEKLIMKDHLHULn}L}uXIEHxAAHExIMLE@HqHEqHEQ1I9IBJ1ɅD˄tHLLL ItRLLL LAI^HL udELeu LLLHHEHIZE1}AH]HLLLHIEMHHMHMELeH !Jc HH8[A\A]A^A_]IuLHULH8[A\A]A^A_]I_H{HE{MuHEKAIvIEvIEVE1H9HBA1ɅADHEnI]LpaIuIUIMLLEMH8[A\A]A^A_]*IuIULHMMH8[A\A]A^A_] }tvLLLH8[A\A]A^A_]M9MIIJLIfLLLHHIuLHuLH8[A\A]A^A_]9LLLH8[A\A]A^A_]H9HGH9XUHAWAVAUATSH(HIHMHHu/ffffff.MH]M'HMHAHEH9LaA$It$AAI|$HuHDL)MED$AMIuIEuHEIEUE1I9IBA1ɅADπxL}f.O,>IRA$I|$HD}Ol>IEL$AEIuIEuIEU1H9HB蘨1ɅEIuMH([A\A]A^A_]UHAWAVAUATSHH9HGHuH9tH[A\A]A^A_]H HCH;EtL{AMgAAIIDL+MEGAMIuIEuHIEUE1I9IBA1ɅAD΀uH]If.M.AIIDMnIEOAEIuIEuIEU1H9HBX1ɅDIuM>H]UHAWAVAUATSH8HuHLiAALEiH}H?HMLEyLGHwLEIDIH}HEGE1I9IBALH赦1ۅADLuM&A$I|$IE|$IET$E1L9ICALq1ɅAD΄t$HEL HEHMH1HEL HMHUHHH{HE{HEL0HEKAIvIEvIEVE1H9HBA֥1ɅADϸHEHHEL0HEHMHHEL}L8HEL0AI~IE~IENAHuIEwIEW1H9HBL1ɅD˸uHEL0HEL8H8[A\A]A^A_]UHAWAVAUATSH8IHHQLaHULDIHFHMHEYHpHEpHEPE1H9HBAL袤1ɅADτLmtWMfff.IGIHpHEpHEPE1H9HBALJ1ɅDEEtVIE@IL9sFIHpHEpHEPE1H9HBAL1ɅDEIGAuM9sSffff.IFIHpHEpHEPE1H9HBAL蚣1ɅDEEuM9IM&ff.M'IHMAAHyHD}LEALEfI_IHsHEsHESE1I9IBAI LEL1ɅDEAuH]L}ff.MfILuA$It$IEt$IET$E1I9IBAL袢LE1ɅDEELuuL}M9HE IGHMH9tHHHEIGLH8[A\A]A^A_]UHAWAVAUATSHHIHHQHAHMHEqHuHUHDHEA1H}N$/A$I|$IE|$IET$E1HEH9HCAHu赡H}1ɅDEHIEuHEL}LeII)IHf.IFIHxHExHEP1L9ICL<1ɅEـuRff.IFIHxHExHEP1L9ICL1ɅEلuM9rM9M&LuLLmLffffff.L&HEH]HHMAAHHqHDuLEAfLsHAI~IE~IEVE1L9ICAIMGML1ɅDEEuH]LuH]DLcHA$I|$IE|$IET$E1L9ICALM1ɅDEAuHuH9HLuLmIuHMH9tHHM9HEHHHH[A\A]A^A_]UHAWAVAUATSH8IIHH)HHH !HcHI_IH{HE{MuHEKAIvIEvIEVE1H9HBA1ɅAD̀uI]MwHwILLHwHWHOIM'ILgHwLL\LuMnM9mEHEL}!DL}MIEHEIL9fI]HSH{HUHDM$$HEKA$It$IEt$IET$E1H9HBA1ɅAD΀{LmLuLmffffff.Od5IteH{HD}Od5HEKA$It$IEt$IET$E1H9HBAj1ɅDEIEuMMIEEԃL}LmIM9HwHWILMH8[A\A]A^A_]UHAWAVAUATSHIIH}HuLM,$AEI}IE}M7IEMAIvIEvIEV1H9HB耜1ɅDˀM/M4$M'A$I|$IE|$HEL0IEL$AIvIEvIEV1H9HB1ɅDˀurHEL M7L8AIIEHEL0IEOAIvIEvIEV1H9HB賛1ɅDˀuHEL8HEL0H[A\A]A^A_]UHAWAVAUATSHMIH}HuHUMeMuAI~IE~I$IENHsHEsHESE1H9HBA 1ɅADπHM4$I]M4$AI~IE~HEHIENHsHEsHESE1H9HBA袚1ɅADπHEL0I$L0AI~IE~HEHIENHsHEsHESE1H9HBA71ɅADπuxHEL0HMHL0AI~IE~HEHIENHsHEsHESE1H9HBA͙1ɅADπuHEL0HEHH[A\A]A^A_]HUHAWAVAUATSHXIII)LHHJHHMH9wII\$H}N,IIHUI9MuAMI~IE~IMHMIEFDDEHqDAHEqMELEHEQ1H9HBMؘ1ɅDMɋMEMLEuLEmLmIE LmMuAMI~IE~IEFAMHIpLMIDLEIEPE1H9HBAR1ɅAD̄HMQLefM4$AI~HEIE~LeHMLaIEFE,$It$HUDAIEt$HMHIHMIET$E1H9HBA1ɅADτAEMELeLEeLmLEmI~IE~IEFLEAIpHDuIEP1H9HBT1ɅDHUILHMtFLM7H9]|:HUHH]L$IH]H;EM4$AIWHEHHX[A\A]A^A_]UHAWAVAUATSHHHUIH}I)II_HXfffff.H]HH9/HHHHEHHH}H)HHU'IGIIH]LeI$HEIOH1L}HML.M,$ILI9zI4HL4EHEL9}HUHuIHZH{HE{HULjHEKAEIuIEuIEUE1H9HBA蒕1ɅADτuIHuf.HuHLuHML}:HEHHH[A\A]A^A_]H3UHAWAVAUATSHHIHHL,AEI}IE}LFIEMAMHIIpLMIDLEIEPE1H9HBA讔1ɅDEALMHHHGH9Fv0HV H HuHHGH;FrHH; uHHJ(HdH%(H;Eu HH8[]~UHSPHH H9tH HHH[]H9HGuHt HH 1HVuIt IME1L9GUHAVSHWHFHGHNIHHH}IFXHKXINXHCXIF`HK`IN`HC`IFhHKhINhHChIFpHKpINpHCpIFxHKxINxHCxIHIHIH}IHIHIHIHIHIHIHIHIHIHIHIH[A^]UHAWAVAUATSHHUIMAE)~!L'IcM<1ېLhbIHA9uEԅLe~&A1fff.IHh{H5&HY{H5d_HJ{L%[/It$HH6{H5ǨH'{H5H{IHMI4LzH}H{L {dH%(H;EuHH@[A\A^A_]yUHAVSH@dH%(HEGudHHLuLcqzH5LrzH5IvHczH5HTzH}HXzL`zCdH %(H;Mu H@[A^] yUHAWAVSH8HIdH%(HEHG(H;F(t]W)E)E)EL}LLLH=HL2H}u~H}twH}H}ugH}kv\IH IHIFHKINHCIFHKINHCIFHKINHCIF HK IN HC IF(HK(IN(HC(dH%(H;Eu H8[A^A_]xUHAWAVAUATSHHdH%(HEHHGHGHGH,HG HG(HVHFH9HHN L4Mu%Hfff.H9LtHMtHHL;4tLuHuHEILq(LuHuHEMLeffff.LLuMtlW)E)EH}HLթ}tL}M9tAG(Mo AG tLKAF(tAG(Iv @tHHLdH%(H;EuHH[A\A]A^A_]vUHAWAVAUATSHHHdH%(HEH9IH蛙IWIGH9IO L4Mu#Hf.H9LtHMtHHL;4tLuL}HEILq(LuL}HEMLeffff.LLuMtlW)E)EH}HLu}tL}M9tAG(Mo AG tLAF(tAG(Iv @tHHL腠dH%(H;EuHHH[A\A]A^A_]-uUHAWAVATSH dH%(HEHWHGH9W)EHEHO L4MuH1fH9~LtHMtHHL;4tLuH}HEILq(LuH}HEMt<1L}LHvIIL9uIJ HL6LuMu1dH%(H;EuHH [A\A^A_]%tHHHt5UHHNHs(1uH HtH;LuHIH IHH]HHHH1fff.HHtLMtL;DuM@O@ILLDMtL;DuM@O@ILUHHt H?u]r]UHAWAVATSIHHH@@uMtEHHLr4H9t/HITILHMu ILPME1HCHt,s{ 9t%HcHc9uGH|Ht!H;uHPs H~tHCHCHcKQSLt[A\A^A_]9} HTHTUHHHH]M]UHHtH]`]UHAWAVATSIHHH@@uMtEHHLq4H9t/HIDSILHfMu ILPME1HCHt,s{ 9t%HcHc9uGH|Ht!H;uHPs HNsHCHCHcKQSLt[A\A^A_]9} HTHTUHAWAVSH8HIdH%(HEHG(H;F(t]W)E)E)EL}LLLHHLH}u~H}twH}H}ugH}{n\IH IHIFHKINHCIFHKINHCIFHKINHCIF HK IN HC IF(HK(IN(HC(dH%(H;Eu H8[A^A_].pUHAWAVAUATSHHdH%(HEHHGHGHGH$HG HG(HVHFH9HHN L4Mu%Hfff.H9LtHMtHHL;4tLuHuHEILq(LuHuHEMtmL}Leffff.LLuMtEW)E)ELHL}tLmM9tIILWLL|dH%(H;EuHH[A\A]A^A_]nUHAWAVAUATSHHHdH%(HEH9IH軗IWIGH9IO L4Mu#Hf.H9LtHMtHHL;4tLuL}HEILq(LuL}HEMtmL}Leffff.LXLuMtEW)E)ELHL}tLmM9tIILLLeHIE.AtTIvSfEVDGI7@*HHDLHdIH>4DI~HIAIDH0mDI~HIAIDHlCD)9sS\HdIH6H6H6&H}HHuH55vVdH}Hu_H}HHuH55v2dH}Hu3H}HHuH55vdH}Hu8L9HMtADsPMLH([A\A]A^A_]E1HCHIEąLOUHAWAVATSHIIHHtHwHuHtHHH ktcIWHD"AtLbAIHL)HL9A I~EftHRHHL'cMIIHtHwHuHtHHH ybIWHD"AtLbAIHL)HL9AI~EftHRHHL{bMII HtHwHuHt~tHHH Zj bIW HD"AtLbAIHL)HL9|rAI~EftHRHHLaMII@uQL[A\A^A_]H߾LaIH߾LaIH߾LaIHHLH[A\A^A_]zaf.UHHOHt#HAHt!ɍ ɃIH4H Hu1HOHt HAHu !Htɍ ɃIHHHHO Ht HAHu !Htɍ ɃIHHHHW(]`UHAVSIHHvHt HFHu #HtH{HCHHF_IvHt HFHu HtH{HCHHum _Iv Ht HFHu HtH{ HCHHu9^Iv@u[A^]HHHH[A^]=HZHHff.UHHu]H9tGUHAVSHIH^I~^I~ ^AFt I~LH[A^]UH]UHHGHNHOHFHFHOHNHGHFHOHNHGHF HO HN HG ]f.UHH=H5>JHGJ]^UHSPHt0H1`HX0]H@H ϚH@(H HHHHHH H[]UHH]UHH=IH57]xaUHHe;]UHH=U;1\HJ]DUH ]@UHH=%;1\HoJ]DUH]@UHH=:1\HGJ]DUH]@UHHHHDHGHtHG HHGHG]fUHAWAVAUATSPIHLwHGH)HLgWGG Hv@tHHLzL-3M,$IwHtHFHHtIHHu]L[L+IwHt-HFHt+IHHu1HH[A\A]A^A_]j[HuH[A\A]A^A_]HHf.UHSPHHGu HHu"H{~HuH{M[H{D[HUHHH[tHtHZHH[][H[]f.UHAVSHHGu HHu"H{:~HuH{ZH{ZHHHLsAtItL}ZLZH[A^]yZfUHw ]UHSPHHnZH{eZCuH[]HHH[]ƇfDUHAWAVAUATSH(HILHGHEHH}&H}GZHLHIZIHL;+6EuMeEy6A$AAƀx IMLDYIADtUupA ujIHHH}YIHLHYH'IE&AthIvgfAuIHHH6EDI7@HHDLHEYDI~HxRIAIDHXCD)9stxS\HYIHH6H}HHuH5a*vXH}HuLH\MtADsPMLH([A\A]A^A_]E1HCHIEŅLOfDUHAWAVATSHIIHHtHwHuHtHHH TXIWHD"AtLbAIHL)HL9A I~EftHRHHLXMIIWHD"At LbMu IAMtAIiHL)HL9|ZAI~EftHRHHLWMII@u9L[A\A^A_]H߾LWIlH߾LlWIHHLH[A\A^A_]\Wfff.UHHOHt#HAHt!ɍ ɃIH4H Hu1HOHt HAHu !Htɍ ɃIHHHHW ]Vfff.UHAVSIHHvHt HFHu HtH{HCHHueZUIvHt HFHu HtH{HCHHu1!UIv@u[A^]HHHH[A^]銂HHUHH5]H9t>UHAVSHIHUI~ UAFt I~|LH[A^]fff.UH]UHHGHNHOHFHFHOHNHGHFHOHNHG]f.UHH=H5@H@]aUUHHHHDHGHHWG]UHAVSHHGHHLwWGHv@tHHH>CAFHCI[A^]ff.UHAVSIH_tI^HwHH HItHtHmSH[A^]qS[A^]fff.UHAVSHLwAtLsLvM6H^ HHAtItL SLSH[A^]SUHw]UHGHGGu]H]kff.UHAWAVAUATSPHHIL ILAvH;yD(L`Ey2A $AAŀx HIHDRIADtLAA4$ID$@y΃ƀILAv]DAuzA4$ID$@;΃ƀLSAVw@Au:A4$ID$@y΃ƀxXILAvf.EDtuI7@t_HHDLHQHiLRAVLRAVًK)9stES\HQgL~HMtADkPLH[A\A]A^A_]1HKHHEȅHOfUHAVSGt[H925FwHAHcAAȀDFHNH=@r!HDNHHH?HwGtYH92FwH?HcAAȀDFHNH=@rHNHHH?HwGtYH92FwH?HcAAȀDFHNH=@rHNHHH?HwH@uH[A^]HH[A^]TPHHIPHLHƋCHHIgPHLHƋCHHIHPHLHƋC0UHHcGHtHH4Ɖ1HcGHtHHHHcGHtHHHHW]OUHFtGFtGFtGHv@u]HHH]{UHHu]UHAWAVSPH9tXGHGH_GtIHI{LLFtHOFtGFtGHv@u H[A^A_]HHHH[A^A_]{@UH]UHHGHNHOHFHGHNHOHFGNOF]fffff.UHH=H59H9]QNUHHG]fDUHHG]fDUHHHHDHGH$HWGG ]ÐUHAWAVAUATSPIHHGHHWGG Hv@tH{HHzH'I9MnM upHCHEп QLIH@HߊIMgWAGIu@tLHHHyAEAD$IEI$HEL8HS'I9M~M u^ KIH@H`IMfWAFIw@tLHHHyAGAD$IGI$LsH[A\A]A^A_]ffffff.UHSPHHGu HHuH{nHuH辉HoHHH[tHtHKHH[]KH[]@UHAVSHHGu HHuH{ZnHuHMHHHLsAtItLJLJH[A^]JUHw ]UHAWAVATSHHGHHHHLsHuaL{MtXMgAtMgLmM$$H_HIAtItL JLJL JIȃHHCHHu[L{MtRMgAtMgL;mM$$HHIAtItLILILIIHCHtL[A\A^A_]w[A\A^A_]HH Huj@UHAWAVAUATSPHHIL'.HIFHLeIH[H;D(L`Ey6A $AAŀx HIHD IIADt5upA ujIvHuIHH\H?TAu:IvHMIHHVHIF*f.EtlDtaI7@tCHHDLHHHUK)9stMS\HH$LuHH?tMtADkPLH[A\A]A^A_]1HKHHEȅHOf.UHAWAVSPHHIL="L9HwH ʀuVHIGM9IvH ʀuVHIGI~@u H[A^A_]HHHHH[A^A_]GUHAWAVSPHL="L9HH uHcGHtHH4ƉE1a1HcGHtHHHHcGHtHHHHWXGɍ ɃIL4IL9H{H HcGHtHH4Ɖ1HcGHtHHHHcGHtHHHHWFIƃIIIHS HLH[A^A_]Ffff.UHAWAVSPIHL= L9HNH €usH{Hu#HCHH1}HHCINHH< HEHtOHtOHtOHp@tHHH?rM9INH €usH{Hu#HCHH|HHCINHHHEHtOHtOHtOHp@tHHHqIv@u H[A^A_]HHHHH[A^A_]qH?H?ffUHHE]H9t!UHAVSHIYLH[A^]ZfUH]UHHGHNHOHFGNOF]f.UHH=5H5/H'0]DUHHGH}HGHGG HH]fUHHwHQHGHGG HۂH]fDUH]VfDUHH=H5n/H/]CUHHHHDHOH 4HHwWG G0G@GPH HOHGXHG`HGhHGpH HOxHLJH yHHt]HH HH]DHu;H`t4UHSPHGXHHʖHHu HxAHH[]HCfUHAWAVAUATSPIH_HGH,HLgWGG(G8GDHHGLoXHGXHG`HGhHGpHHGxLLJHv@tHHHnH]I_L&CHCIwXLҗLCH#LIIHtHFHLut!IHHu"H[A\A]A^A_]@H[A\A]A^A_]HDUHSPHHGuH H{cHH{`Ht+uH{XHu H{xF@H{-BDuH{XHu H{x@H{BHCHHCH@HHHH[tHtH?HH[]?H[]ffff.UHSPHHH[]?@UH剷]@UHAWAVAUATSPHLwL MtAMcgM~8LuMoE1fffff.K|IHP M9uAGLuH{XؓL AH$?CuH[A\A]A^A_]HHH[A\A]A^A_]ulDUHAWAVAUATSH(HIIHGHEHHEIffffff.L;+E}MeEy9A$AAǀx IMLD>IAf.DteAIMfff.IHLL~HvIH9ZA}tNfffff.A*uZHEHHHH} >IHLH >HIE'AtdIwcfffff.EDHEH0@HHDLH=IHDIHxOIAIDHP$+=cqCD)9styS\Hi=IFNH6H}HHuH5vL=H}HuH}yjHRMt AD{PME1LH([A\A]A^A_]HCHIEŅLOUHAWAVAUATSH8HIIdH%(HELL>I|$X6Ml$XL>{:LeI}H]H}L,H]LeHHE1*AuMmLH Td<IL9O,H{5H{ ,CuH[]HHH[]Jffff.UHAWAVAUATSH(HILHG HEHGHEHH}0IvI~H}IAIDHxL;#E4$Ml$Ey3AEAAƀx IMLDzIA@DtlA  IHHH}NIHLHMHTIE.A8Iv4f.AIHH_H}IHLHHIE.ADfDAuJIHH H}IHLHHIE.AtTIvSfEVDGI7@*HHDLHEIH>4DI~HIAIDHNDI~HIAIDHCD)9sS\HIH6H6H6&H}HHuH5uH}Hu_H}HHuH5uH}Hu3H}HHuH5u^H}Hu8LGHMtADsPMLH([A\A]A^A_]E1HCHIEąLOUHAWAVATSHIIHHtHwHuHtHHH IWHD"AtLbAIHL)HL9A I~EftHRHHLwMIIHtHwHuHtHHH kxIWHD"AtLbAIHL)HL9AI~EftHRHHLMII HtHwHuHt~tHHH xpIW HD"AtLbAIHL)HL9|rAI~EftHRHHL'MII@uQL[A\A^A_]H߾L IH߾LIH߾LIHHLH[A\A^A_]f.UHHOHt#HAHt!ɍ ɃIH4H Hu1HOHt HAHu !Htɍ ɃIHHHHO Ht HAHu !Htɍ ɃIHHHHW(]UHAVSIHHvHt HFHu #HtH{HCHHIvHt HFHu HtH{HCHHum]Iv Ht HFHu HtH{ HCHHu9$Iv@u[A^]HHHH[A^]BHZHHff.UHH\]H9tGUHAVSHIHI~I~ AFt I~cBLH[A^]UH]UHHGHNHOHFHFHOHNHGHFHOHNHGHF HO HN HG ]f.UHH=H5H7]AUHHHHDHGHWHHwWGG(]ffff.UHAWAVAUATSPIHHGHVHWGHG D~EtGLcMn ILDHK DD+CLHLDBD{D{HC D98}D8C(Iv@uH[A\A]A^A_]HHHHH[A\A]A^A_]@fUHSPHHGu HHuH{k6Hu H{HHHH[tHtHHH[]H[]UHAVSHHGu HHuH{5Hu H{HHHLsAtItLFLNH[A^]BfUHw(]HcGHUHHO u1ҨHDufƁ1fff.fAHH9tLDAufAfffff.MHAI@LDAtMHAI@HHH@G]GuH>fff.UHAWAVAUATSH(HIIHGHEHGHEfDL;;E/MgEyA$AAŀxYIMAthEDHEH0@HHDLH)IHuLDIAAuDIMf.IF HtIcN;}QAVLdXHEH8Ht$1HHH2HBDHWHFH}|IILLHXHIE,$AtIt$f.DI|$Hx7IAIDHʀʀPPH@r"f.HH?wA~dtH9PHHE~0Et,E1fIN8McJtV( HIQAE9uI~@u?[A\A^A_]H߾Y$H߾GH߾5HHHH[A\A^A_]%HHZANXHHFAN\sHH2AN`HHANd @UHAWAVAUATSPHLcwHG LxHLDMt@N$E1fff.K]H9t!UHAVSHIILH[A^]fUH]UHHGHNHOHFFHF OHO NHN GHG F(HF8HO8HW(LG0HV(HN8LF0G(HG8HF@HO@HN@HG@HFHHOHHNHHGHHFPHOPHNPHGPHGXHNXHOXHFXG`N`O`F`GdNdOdFd]UHH=5H5H]UHHHHDHGH9HG HcHGHG]fUHAWAVAUATSPIHLwHGH99HLgWGG Hv@tHHL:"L-M,$IwHtHFHHtIHHu]LiL+IwHt-HFHt+IHHu1HH[A\A]A^A_]*HuH[A\A]A^A_]HHf.UHSPHHGu HHu"H{{HuH{ H{HHHH[tHtHHH[]H[]f.UHAVSHHGu HHu"H{HuH{H{HHHLsAtItL=LEH[A^]9fUHw ]UHSPHH.H{%CuH[]HHH[] fDUHAWAVAUATSH(HILHGHEHH}0IvI~HIAIDHL7L;+{EuMeEyUHAVSHIH%I~AFt I~LH[A^]fff.UH]UHHGHNHOHFHFHOHNHGHFHOHNHG]f.UHH=%H5H]qUHHG8]fDHOHHu8UHAVSHuH_8HtIHHLHG8[A^]HUHHHHDHGH0HHwWG(G4GHHG(HG0]ffff.UHAWAVAUATSPIHHGH]0HWGHG D~EtJLcMn ILDHK DD+CLHLDWD{D{HC D98}D8L{Lk(C4C(Iv@tHHL^H IMIv(HtHFLc0Ht(L}IHHIϨLLL}I $Iv0Ht HFHu HtIHHuYL@HI9Mv8M u (IHLL{8H[A\A]A^A_]HeHf.UHAVSHHGu HHuPH{Z Hu@H{|H{(H{0HCH9tLs8MtLLHƠHHH[tHtHpH[A^]t[A^]ffffff.UHSPHBHH[]D@UHw@]UHAWAVSPHHcGHHK u1Ҩ~HDuefmƁ1fDfHH9tH|ufLGAHGH|tLGAHGHHH@CH{(H{0HCHHHuRLsHuL{8MtLL9IHC8HtLH[A^A_]H[A^A_]H @UHAWAVAUATSH8HIIHGHEHGHEHG0HEHG(HE0IwIHuIAIDHL;#E<$Ml$EyIE/At]Iw\E DHEH0@HHDLH1IHDIHxxIAIDHqCD)9sS\HIH6GH6H}HHuH5?uH}HugH}HHuH5uH}HudH}H&H?EMt AD{PME1LH8[A\A]A^A_]HCHIEąLO@UHAWAVAUATSHHUIH}H(HtHwHuHtHHH HEHP(HtHZHCHEHL)HH9,A I~A^tHRHHHIIHEHx0HtHwHuHtHHH GHEHP0HtHZHHEHL)HH9AI~A^tHRHHHIIHHMH9Hq8H uVLLEIHEHc@HEHE1ffff.HEH@ NlAuI]@tI}Auf.HߺH <!:E}AtM} fAIKHEHL)HL9|8A"E~AEtI]I~HLMIIL9eNH}оLLIHEHx@uDLH[A\A]A^A_]H}оLIH}оLIHHLHUH[A\A]A^A_]offffff.UHAVSG~NHO 1H0IpAAEGAIAHLHH9tLDA0@uHHO(Ht HAHu !Htɍ ɃIHHHHO0Ht HAHu !Htɍ ɃIHHHHH9HG8H ʀu&IHLHÃIHHHW@H[A^]GUHAWAVAUATSPIHD~EtGLcMn ILD\HK DD+CLHLDPD{D{HC D98}D8Iv(Ht HFHu #HtH{(HCHHoIv0Ht HFHu #HtH{0HCHH2HۻI9Iv8H uH{8HtLHuH5oIv@uH[A\A]A^A_]HHHHH[A\A]A^A_]O HCHHu'kHHC8Iv8HtHHRH?@UHHe%]H9t!UHAVSHIYLH[A^]:fUH]UHHGHNHOHFFHF OHO NHN GHG HF(HO(HN(HG(HF0HO0HN0HG0HG8HN8HO8HF8]f.UHH=H5^H]UHHHHDHGH$"HHwWG(GHHG(]DUHAWAVAUATSPIHHGH!HWGHG D~EtJLcMn ILDHK DD+CLHLD WD{D{HC D98}D8LcL{(C(Iv@tHHLB HIIv(Ht HFHu HtI$HHuLpAF0C0H[A\A]A^A_]HDUHSPHHGu HHu"H{HuH{H{(THeHHH[tHtHHH[]H[]f.UHAVSHHGu HHu"H{JHuH{lH{(HHHLsAtItLLH[A^]fUHw4]UHSPHHcGHHK u1ҨHDuifqƁ1f.fHH9tH|ufLGAHGH|tLGAHGHHH@CH{(C0CuH[]HHH[]/ffffff.UHAWAVAUATSH(HIIHGHEHGHEHG(HELdHKIAV0@L;#E<$Ml$Ey8AEAAǀx IMLD/IAfDtlA HEHHHhH}IHLHHIE/AIwfDAvIUMeH A $4ƀIMAV0A&IMIF HtIcN;}QAVL|XHEH8Ht$1xHHHHBDHWHFH} IILLHHIE/AtIwfff.DIHx8IAIDH3B[3L9#A<$H}HHuH5 uH}HufEDHEH0@HHDLH!IHeDIHxOIAIDH3qCD)9styS\HINH6H}HHuH54uH}HuH}HRMt AD{PME1LH([A\A]A^A_]HCHIEąLOUHAWAVAUATSHHUIH}H(HtHwHuHtHHH HEHP(HtHZHHEHL)HH9A I~A^tHRHHHIIHEȋ@0tcHML91AAFwICHcȉ€ʀAVHANI=@r$HfDANHAIH?HwHEHc@HEHE1ffff.HEH@ NlAuI]@tI}Auf.HߺH 0BE}AtM} fAIKHEHL)HL9|8AE~AEtI]I~HLMIIL9eNH}оLLXIHEHx@u+LH[A\A]A^A_]H}оLBIgHHLHUH[A\A]A^A_](H}L\IHEȋ@0Bffff.UHG~RHO 1H3f.MAEAEG AIALLHH9tLLEAuAHHO(Ht HAHu !Htɍ ɃIHHHHcG0HtHHIHHHW4]Kff.UHAWAVAUATSPIHD~EtGLcMn ILD\HK DD+CLHLDPD{D{HC D98}D8Iv(Ht HFHu HtH{(HCHHuKsAF0tC0Iv@uH[A\A]A^A_]HHHHH[A\A]A^A_]HUHH]H9t>UHAVSHIHcGHIN u1ҨyHDu`fhÉƁ1fHH9tH|ufLGAHGH|tLGAHGHHH@AFI~(AF0AFt I~ LH[A^]ffff.UH]UHHGHNHOHFFHF OHO NHN GHG HF(HO(HN(HG(G0N0O0F0]fffff.UHH=rH5NH]UHHHHDHGHHHGHHG]fff.UHAWAVATSIHLgHGHlHLWGHv@tHHLHMIIvHt HFHu HtI$HHuLAFC[A\A^A_]Hffff.UHSPHHGu HHuH{Hu H{HHHH[tHtHhHH[]jH[]UHAVSHHGu HHuH{Hu H{<HMHHLsAtItLLH[A^]fUHw]UHSPHHCCuH[]HHH[]HUHAWAVAUATSPHHILHOHM+ff.H}HLHHsH;D LhEy:AMAAĀx HIHDQIAff.Dt%u@Au:IUIEHxuAVfDAuIHHHH6@EDI7@HHDLHH4ƀxILAVLXHSPK)9stHS\HLHxMtADcPLH[A\A]A^A_]1HKHHEȅHOfDUHAWAVATSHIIGt`L93AAFwIDHcȉ€ʀAVHANI=@r%HANHAIH?HwIWHD"At LbMu IAMtAIQHL)HL9|BAI~EftHRHHLMII@u!L[A\A^A_]H߾LIHHLH[A\A^A_]HLIAGfUHHOHt#HAHt!ɍ ɃIH4H Hu1HcGHtHHIHHHW]UHAVSIHHvHt HFHu HtH{HCHHu7AFtCIv@u[A^]HHHH[A^]HUHHe]UHAWAVATSH9tyHILgL~AGMwAGtLHsHt HFHu HtIHHu>LCtAGHs@u [A\A^A_]HHL[A\A^A_]6H뽐UH]UHHGHNHOHFHFHOHNHGGNOF]fffff.UHH=%kH5Hǵ]qUHSPHt(H1HX(H@H H@ H 6HHHHH[]UHSPHt H1HX H@H GHW@H[]fDUHSPHt(H1VHX(fH@H HW@@ H[]ffffff.UHSPHt(H1HX(H@H 7~HHHH@ H HH[]UHAVSHH1IHXH3II^WAF AF0AF@AFPHIFIFXIF`IFhIFpH}IFxIAdžHs}IHHLLH@H HW@@(@8@DH HHH@XH@`H@hH@pH }HHxHǀǀH |H[A^]DUHSPHt8H1FHX8VH@H wHH@(@0H n|HHHHHH H[]ff.UHSPHt(H1HX(H@H HW@@ H[]ffffff.UHSPHt0H1vHX0H@H H@(H {HHHHHH H[]UHSPHt10H1HXH ;HHXW@@('0 H@H HW@@H[]fDUHSPHt5pH1HXH kHHXW@HX(@h.pH@H 7H@hW@@ @0@@@PH@]H zHH@HHHHHPH[]@UHSPHt(H1HX(H@H WH@ H 6zHHHHH[]UHSPHt2HH1HXH HHXW@@(@42HH@H zHW@@ @0@@H yHH(HH0H[]ff.UHSPHt.8H1HXH HHXW@@(/8 H@H HW@@ H@0H !yHH(H[]fDUHSPHt H1HX H@H HH@H xHHH[]UHAWAVSPHH9HLwMtKM~AtM~LM?HrxHIAtItLL'LH[HtULsAtLsLpM6HxHHAtItLLHH[A^A_]H[A^A_]̋F UHAWAVSPIHtNL{HCHHLILHKHHupLHK AF tFL{HCHHuKLILHKHHu,LHͿK H[A^A_]H6lHH6HUHAVSHIHI~HHHHfL[A^]UHAWAVAUATSPHLwMt1Mc~M~(MfE1fDK|IHP M9uAFH{HHH[A\A]A^A_]6UHAVSHPIHdH%(HEEHEHH HtcIHWELuLuEHEH}HHH}HtHWHHu]Ht`EtH}I艾L1CX{Pu2KMK!HcHKH dH %(H;Mu)HP[A^]1H HuHIQLEuRUHAWAVAUATSPMIHAI9BAwIBAɀAAEgI=@r#DfDAOAIA?AwtFɍ ɃItSЃҍ҃Iȍ AwI9 AAGI@r@AOAI=?wM9>*D&AtLfAIIL)HL9A IEgtHvHL莽MIM9>vD#AtLcAIyIL)HL9|jAIEgtH[HHL5K'HH[A\A]A^A_]LHLIM9>wLLXInLHLH[A\A]A^A_]鿽LIL!LILILLIUHAVSHH轻H{贻H{ 諻HH9tH[(HtHCuHHu[A^]H{HuHHyrHHLsAtItL"L*H[A^]UHAVSHbH9t8HLwMtL LH[HtHCuHHu[A^]H{'HuHHqHHLsAtItLtL|H[A^]pUHGh]UHH]UHH]CUHH]UHH]3UH勇]UHH]cUHH]UHH]UHH]cUHH]UHH]SUHH]UHG@]UHH]UHG4]UHH]sUHAVSHHHLwAuHHLHHuA)H{LsLHH H AuNHuH{H{LsH pHHAtItL蹸L[A^]齸[A^]HHuUHSPH2HH[]鄸UHH]#UHSPHH~H{uc H[]UH]UH]vUHAVSHHtHȃILcHHtHȃIHLH[A^]UHAWAVSPG 1At:HItHȃID4ALAG t.HtHȃIDH[A^A_]UHAWAVAUATSHHIIHGHEHGHEHLH趹IML;+AUM}yAʃ€xpIM t-Ѓ1LH(fDAN IFHHeH}菶AxXIoLXHLIǃ uAN IFHHH}?IAxQI^IL袸HILLcHKL)HL9HLLNMMLYHt?ILcHCL)HL9}HLL@ILLLMME1fA4$@t At$Md$IL1H {õ86CD)9stHS\HqI$H6H6E1ʉSPMLH[A\A]A^A_]HCHIEŅLOUHAWAVATSHIIHL9;D AtL`AIH L)HL9A IEgtH@HHLMIILL9;D0AtLpAI`H L)HL9|QAIEwtH@HHL|K7H[A\A^A_]H߾HLHIrH߾HL[A\A^A_]%HLI致ILHLInIL:UH8]UHHGH]UHAWAVSHHHIdH%(HEHIL<:uHHHM)EHPHpH}%H}HUL1L}LMuH}HUL HEHHEt H}mMdH %(H;Mu HH[A^A_]xUHAVSHHdH%(HEHz9uHHHM)EHPHpH}cHHHH}HUH1BLuMtJHEHMLuHEHMHMHEHEHEHEH}HEHD$(E$HOEt H}pMdH %(H;Mu HĀ[A^]zUHAVSHI>IHHHLH[A^]UHAWAVATSHLwMtHLH1L[A\A^A_]UHAWAVAUATSH(HdH%(HELwMuUL{Mt-L1ޱIHH>HBM>WAFIWH@LsMc~M~#MfE1K|IHP M9uAFW)EHEHSPHC`H9vbHKhH4HuHfH9tDHtHHtHSHHHH;4tHuHUHEoHHI(HMHUHEHuWdH%(H;EH([A\A]A^A_]DIt$IEt$(IET$ HH}H}tHsHMH=FPIHPut HH1IH9tfLLLeAO IGHH@IL9A $usIL$HHA$@IFHtHc0A;v }AN9}HcHLHLANHcL|AFIFcfffff.It$IEt$IET$HIT$It$HL6H L;kdH%(H;EH8[A\A]A^A_]HC NHcщ@΀@pHPH@rHfHHHH?HwE~Et5E1ff.IN McJtV HI聅AE9uI~@u [A\A^A_]HHHH[A\A^A_]鋅HHAN(HH謅AN,UHAWAVAUATSPHLcwHG LxHLDMN$E19fDHpHDHW IƃIIIM9twKoHWo]鱂UHHHHDHGHHGGH8HG]fUHAWAVATSIHLgWGHHLGGHv@tHHLqH*8IIvHt HFHu HtI$HHuL蟀AFC[A\A^A_]HfUHSPHHGu HHuH{Hu H{荀H7HHH[tHtHHHH[]JH[]UHAVSHHGu HHuH{芣Hu H{H-7HHLsAtItLLH[A^]fUHw]UHSPHHCCuH[]HHH[]+ff.UHAWAVAUATSH(HILH}HGHEIMHHE@L;+EuMeEyL?z{tAGHs@u [A\A^A_]HHL[A\A^A_]陧H@UH]UHHGHNHOHFHFHOHNHGGNOF]fff.UHH=UH5^gHg]zUHHHHDHGHHGH0HG]ffff.UHAWAVSPIH_WGHqHGHHv@tHHIH薦LHL0HIvHt&HFHt$HHHu!H[A^A_]xHuH[A^A_]Hffff.UHSPHHGu HHuH{Hu H{xH/HHH[tHtHhxHH[]jxH[]UHAVSHHGu HHuH{誛Hu H{D/I7@HHDLH{wIHu LD#wIAA uIHHH}wIHLHwHIE.AtIv DI~HxRIAIDHtvCD)9stxS\HvIHH6bH}HHuH5!HuvH}HuLףHMtADsPMLH[A\A]A^A_]E1HCHIEąLOfDUHAWAVATSHIIHHtHwHuHtztHHH vvIWHD"AtLbAIQHL)HL9|BA I~EftHRHHLuMII@u!L[A\A^A_]H߾LuIHHLH[A\A^A_]ufDUHHOHt#HAHt!ɍ ɃIH4H Hu1HW]guUHAVSIHHvHt HFHu HtH{HCHHu,sIv@u[A^]HHHH[A^]SHfffff.UHHm]UHAWAVATSH9thHLILsMt$AD$tL9HsHt HFHu HtIHHu3L2sHs@u [A\A^A_]HHL[A\A^A_]闠HfUH]UHHGHNHOHFHFHOHNHG]f.UHH=eH5n`H`]sUHHG@]fDUHHHHDHGHTHHwWGHw(G0G<]UHAWAVAUATSPIHHGH HWGHG D~EtJLcMn ILD{sHK DD+CLHLD_WD{D{HC D98}D8C(HC8E~0EtGLc(Mn8ILDsHK8DD+C0LHLDD{0D{0HC8D98}D8HC@CHIv@tH{HHҞHPI9Mv@M u8*qIHLLXCtAGHs@u [A\A^A_]HHL[A\A^A_]H뽐UH]UHHGHNHOHFHFHOHNHGGNOF]fffff.UHH=H5NFHWF]QYUHHHHDHGHHHwG4WG!GH$HG(]fffff.UHAWAVAUATSPIHHGH=HWGHG D~EtGLcMn ILDXHK DD+CLHLDD{D{HC D98}D8LcL{(HC(C0C4Iv@tHHL薄HOIIv(Ht HFHu HtI$HHuLVAF0C0H[A\A]A^A_]HUHSPHHGu HHu0H{zHu H{ tH{u H{WH{(VH HHH[tHtHQVHH[]SVH[]fff.UHSPHbHH[]$V@UHw4]UHAWAVAUATSPHLcM~PLc E1ffff.IM9t+OtI~UAFAFtILOCH{(UC0CuH[A\A]A^A_]HHH[A\A]A^A_] f.UHAWAVAUATSH(HIILHGHEHG(HEL}IMHAF0L;+1E}MeEy4A$AAǀx IMLDUIADDtlSA IHEHHHH}TIHLHTHIE'AOIwKfDAIML}fff.IF HtIcN;}QAVHtI? LH}UHIHLIAfDDtUAI$Ml$HyAM4ƀTIMAV(ZfDAuzIML}IF HtIcN;}QAVHtI?LH>HIHL=HIH9A}tff.EtpDteHEH0@tJHHDLHm=IHECD)9stMS\HW=Ix"H}jHMt AD{PME1LH[A\A]A^A_]HCHIEŅLOfUHAWAVATSHHIO(t^H9HwHDHcщ@΀@pHPH@r$HHHHH?HwE~Et5E1ff.IN McJtVHHIq<AE9uI~@u [A\A^A_]HHHH[A\A^A_]{<HH<AN(+UHAWAVAUATSPHLcwHG LxHLDMt@N$E1fff.KA$AAǀx IM LD4IAffffff.Dȃ IcLAI$Ml$HAM4ƀIMA(A4$Ml$@yAE΃ƀIMHEЉp0AP_A4$Ml$@yAE΃ƀIMHEЉp@AHEHpHHEHHHmH?eA A4$Ml$@yAE΃ƀeIMHEЉp,IA@A4$Ml$@yAE΃ƀ6IMHEЉp8A]HEHHHH}R2IHLHQ2HIE'AWIwSA2HEHp HwHEHHH蘆HHEHp LA8A4$Ml$@yAE΃ƀIMHEЉp4 AXu{I$Ml$HAM4ƀQIMWAHu9A4$Ml$@yAE΃ƀ7IMHEЉpAxIV0HD"AtLbAIHL)HL9A*IEgtHRHHLWMII~@L[A\A^A_]H߾L6IH߾LIH߾LIFH߾LIH߾LInHHLH[A\A^A_]UHHOHt#HAHt!ɍ ɃIH4H Hu1HOHt HAHu !Htɍ ɃIHHHHO Ht HAHu !Htɍ ɃIHHHHO(Ht HAHu !Htɍ ɃIHHHHO0Ht HAHu !Htɍ ɃIHHHHW8]ff.UHAVSIHHvHt HFHu #HtH{HCHHIvHt HFHu #HtH{HCHHIv Ht HFHu #HtH{ HCHHIv(Ht HFHu HtH{(HCHHu}cIv0Ht HFHu HtH{0HCHHuL*Iv@u[A^]HHHH[A^]AHHHJH{Hfffff.UHHn]H9tYUHAVSHIHI~I~ I~(I~0AFt I~AALH[A^]ÐUH]UHHGHNHOHFHFHOHNHGHFHOHNHGHF HO HN HG HF(HO(HN(HG(HF0HO0HN0HG0]f.UHH=H5H]UHHHHDHGHdHGGG]ffff.UHAVSHHGHdHLwGGGHv@tHHH?CAFCA[A^]UHAVSIH_tI^H5HHOHItHtHH[A^][A^]fff.UHAVSHLwAtLsL@5M6HHHAtItLLH[A^]UHw]UHGGGu]H]>ffffff.UHAWAVAUATSPHHIHOHML=!L@HAFHH;TD(L`Ey1A $AAŀx HIHDIAfDȃmIcLAZI $ID$Hy"4ƀILHAFWA(I $ID$Hy"4ƀILHAFAI $ID$Hy"4ƀILHAFA I $ID$Hy"4ƀILHAFAu>I $ID$Hy"4ƀILHAF;EDHEH0@HHDLHLHAFLHAFLHAFLHAFK)9stIS\H# H}_H{:Hu.H{HH9tLsMtLLHHHH[tHtHbH[A^]f[A^]ÐUHSPHbHH[]D@UHw ]UHAWAVSPHH:HCHHHuRLsHuL{MtLLIHCHtLH[A^A_]b H[A^A_]H fUHAWAVAUATSH(HILH}HGHE'GHHEHpHLIHL;+YEuMeEy9A$AAƀx IMLD`IAf.DtEA HEHpHjIHHHH?@AuJIHHH}IHLHHIE&AtHIvGfEDI7@HHDLHDI~HxRIAIDH?H$;CD)9stxS\HvIHH6(H}HHuH5tYH}HuLH\MtADsPMLH([A\A]A^A_]E1HCHIEŅLOfDUHAWAVATSHIIHH9HwH uV8LIIIHtHwHuHtztHHH G$IWHD"AtLbAIQHL)HL9|BAI~EftHRHHLOMII@u!L[A\A^A_]H߾L2IHHLH[A\A^A_]"fHOHtHAUHAVSHtɍ ɃIHH1HRH9HGH ʀu&IHLHÃIHHHW H[A^]ffff.UHAVSIHHvHt HFHu #HtH{HCHHHI9IFH ʀu;H{HuHCHHuRCHHCIFHH5HE]Iv@u[A^]HHHH[A^]&HsH?fUHHH]H9t{UHAWAVATSHIHIFHHHuQM~HuMfMtLFLNIIFHtLLH[A\A^A_]H f.UH]UHHGHNHOHFHFHOHNHGHGHNHOHF]f.UHH=UH5NH]UHHG(]fDUHHHHDHGHAHHG(G0H{HGHGHG ]ffff.UHAWAVAUATSPIHLHGH@HLoWGG G0Hv@tHHLFH IMIvHtHFLcHt!IHHLqH I $IvHtHFLk Ht!IHHL,H uIMIv Ht HFHu HtIHHuaLHI9Mv(M u@IHLL{(H[A\A]A^A_]H H]Hfff.UHAVSHHGu HHuPH{ Hu@H{H{H{ zHH9tLs(MtL=LEHfHHH[tHtHH[A^][A^]ffffff.UHSPHBHH[]@UHw0]UHAWAVSPHHH{H{ HCHHHuRLsHuL{(MtLmLuIHC(HtLH[A^A_]H[A^A_]H UHAWAVAUATSH8HILHG HEHGHEH}HGHE'>HHEHp(HL<IHL;+#EuMeEy9A$AAƀx IMLDIAf.Dȃ/H 4HcHA IHHH}IHLHH7IE&A5Iv1AIHHpH}QIHLHPHIE&AIvA"ugIHH#H}HLHAu3HEHp(H{IHHYH?QE-DI7@HHDLH$DI~HIAIDH$DI~HIAIDH|CD)9sS\H(IH6H6H6H}HHuH5stH}HuPH}HHuH5OtH}HuRLHMtADsPMLH8[A\A]A^A_]E1HCHIEŅLO֐UHAWAVATSHIIHHtHwHuHtHHH DDIWHD"AtLbAIHL)HL9A I~EftHRHHLMIHI9Iw(H uV8LIIIHtHwHuHtHHH dIWHD"AtLbAIHL)HL9AI~EftHRHHLMIIW HD"At LbMu MAMtEIHL)HL9|rA"I~EftHRHHLMII@uQL[A\A^A_]H߾LIH߾LxIPH߾L`IHHLH[A\A^A_]PHOHtHAUHAVSHtɍ ɃIHH1HOHt HAHu !Htɍ ɃIHHHHO Ht HAHu !Htɍ ɃIHHHH4H9HG(H ʀu&IHLHÃIHHHW0H[A^]WUHAVSIHHvHt HFHu #HtH{HCHHIvHt HFHu #HtH{HCHHIv Ht HFHu #HtH{ HCHH\HI9IF(H ʀu;H{(HuHCHHuba6HHC(IF(HH5HEIv@u[A^]HHHH[A^]lHH.HcH?ffffff.UHH;]H9t!UHAVSHILH[A^]ZfUH]UHHGHNHOHFHFHOHNHGHFHOHNHGHF HO HN HG HG(HN(HO(HF(]f.UHH=żH5H]!UHHG(]fDUHHHHDHGH$5HHwWGG$]UHAWAVAUATSPIHHGH4HWGHG D~EtGLcMn ILDHK DD+CLHLD<D{D{HC D98}D8HC(C0Iv@tH{HH HI9Mv(M u@IHLL{(H[A\A]A^A_]UHAVSHHGu HHuLH{HuHHAtItLLH[A^]UHw ]UHWGGu]H]SUHAWAVAUATSPHHIL ILINH;.D(L`Ey0A $AAŀx HIHDIAՐDtUAI $ID$Hy4ƀgLIVH_Au:I $ID$Hy4ƀxPILIN@Et{DtpI7@tZHHDLHHdL~IVHMK)9stES\HLHMtADkPLH[A\A]A^A_]1HKHHEȅHOff.UHAVSHGHtYH92FHwH>ɀNHHNHH=@r H@NHHH?HwHGHtWH92FHwH<ɀNHHNHH=@rHfNHHH?HwH@uH[A^]HH[A^]HHIHLHHC HHIHLHHCO@UHHGHtHH4Ɖ1HGHtHHHHW ]bfUHHFHtHGHFHtHGHv@u]HHH]@UHHE"]UHAWAVSPH9tLWGH_GtIHILLHFHtHOHHFHtHGHv@u H[A^A_]HHHH[A^A_]UH]UHHGHNHOHFGNOF]f.UHH=H5޴Hw]AUHHHHDHGHHHGG]UHAVSIHWGHHGHv@tH{HH IFHC[A^]UHAVSIH_tI^HHH|HItHtH]H[A^]a[A^]fff.UHAVSHLwAtLsLM6HN|HHAtItLLH[A^]UHw]UHHGGu]H]bfUHAWAVAUATSPHHILH HBHxɀNHHNHH=@r H@NHHH?HwHGHtWH92FHwH<ɀNHHNHH=@rHfNHHH?HwH@uH[A^]HH[A^]HHIFHLHHC HHI&HLHHCO@UHHGHtHH4Ɖ1HGHtHHHHW ]钽fUHHFHtHGHFHtHGHv@u]HHH]@UHH]UHAWAVSPH9tLWGH_GtIHILLHFHtHOHHFHtHGHv@u H[A^A_]HHHH[A^A_] UH]UHHGHNHOHFGNOF]f.UHH=H5HǪ]qUHHG]fDUHHHHDHGH4HHGG H;rHG]DUHAWAVATSIHLgHGHHLWGG Hv@tHHLHqIIvHt HFHu HtI$HHuKLKH,I9MvM u@3IHLEL{[A\A^A_]HUHAVSHHGu HHu>H{zHu.H{ HH9tLsMtLϘL׹HpHHH[tHtH袹H[A^]馹[A^]ÐUHSPHbHH[]鄹@UHw ]UHAWAVSPHHzHCHHHuRLsHuL{MtLL'IHCHtLH[A^A_]H[A^A_]H fUHAWAVAUATSH(HILH}HGHE' HHEHpHLIHL;+iEuMeEy9A$AAƀx IMLD蠸IAf.DteA IHHH}wIHLHvHOIE&AIvAu:HEHpHIHHH?ffffff.EDI7@HHDLHDI~HxRIAIDH$kCD)9stxS\H覷IsHH6H}HHuH5t艷H}HuLH\MtADsPMLH([A\A]A^A_]E1HCHIEŅLOfDUHAWAVATSHIIHHtHwHuHt~tHHH $IWHD"AtLbAIHL)HL9|vA I~EftHRHHL诶MIHI9IwH uV8LIWII@u!L[A\A^A_]H߾L^IHHLH[A\A^A_]Nfffff.HOHtHAUHAVSHtɍ ɃIHH1H˜H9HGH ʀu&IHLHÃIHHHW H[A^]齵ffff.UHAVSIHHvHt HFHu #HtH{HCHH6HI9IFH ʀu;H{HuHCHHuR; HHCIFHH5HE}Iv@u[A^]HHHH[A^]FHsH?fUHH%]H9t{UHAWAVATSHIHIFHHHuQM~HuMfMtLfLnIIFHtLLH[A\A^A_]H f.UH]UHHGHNHOHFHFHOHNHGHGHNHOHF]f.UHH=uH5nH7]ѳUHHHHDHGHD HHwWGG(]ffff.UHAWAVAUATSPIHHGH HWGHG D~EtGLcMn ILD{HK DD+CLHLDD{D{HC D98}D8C(Iv@uH[A\A]A^A_]HHHHH[A\A]A^A_]fUHSPHHGu HHu'H{HuH{ tH{u H{ϲHhHHH[tHtH:HH[]<H[]DUHAVSHHGu HHu'H{zHuH{ tH{u H{NHhHHLsAtItL踰LH[A^]鴰@UHw(]UHSPHHn CuH[]HHH[]ffffff.UHAWAVAUATSPHIIHGHELgffff.L;+ E}IMEyAAǀxYILA tiEDHEH0@HHDHH:IHuLDHAA ufDHIf.IF HtIcN;}QAVHtI<$7 LH̰HIHL苯HtfIH9A} tCD)9stVS\H薯I+H}ILHHt AD{PIE1LH[A\A]A^A_]HCHIEŅLOUHAWAVATSHHIDEt*E1IN McJtV HIAE9uI~@u [A\A^A_]HHHH[A\A^A_]ff.UHAWAVAUATSHLcwH}HG LhHLDMJHE1Affffff.IW LLIƃIIHH9]M|IOHtHAHuE1+fff.Htɍ ɃIL$IHI9IH _ZIăIII=E1H}HW(LH[A\A]A^A_]fff.UHAWAVAUATSPIHD~EtGLcMn ILDHK DD+CLHLD D{D{HC D98}D8Iv@uH[A\A]A^A_]HHHHH[A\A]A^A_]UHH]UHAWAVAUATSPH9HILgLMwAGtLDkEtKLuLs ILD$IO DE+GLHLLuD EoEoIG D9(}D(Hs@uH[A\A]A^A_]HHLH[A\A]A^A_]fUH]UHHGHNHOHFFHF OHO NHN GHG ]f.UHH=H5Hw]UHSPHt@H1薭HX@親H@H 7H@8H aHHHHHH HH(HH0H[]ff.UHSPHt H1&HX 6H@H gH@@@H[]ff.UHSPHt@H1ƬHX@֩H@H HW@(@8H `HHHHHH H[]fff.UHSPHt10H1VHXH HHXW@@('0JH@H HW@@H[]fDUHSPHt(H1HX(H@H HH@@ H `HHH[]UHSPHt8H1膫HX8薨H@H GHH@(@0H _HHHHHH H[]ff.UHSPHt.8H1HXH HHXW@@$.8 H@H ^HW@@ @0H[]fUHSPHtH1親HX趧H@@H HH[]fDUHSPHt0H1VHX0fH@H HH@ @(H ~^HHHHH[]ffffff.UHSPHt(H1HX(H@H 'HW@@ H[]ffffff.UHSPHt H1膩HX 薦H@H gHH@@H[]fffff.UHSPHt(H1&HX(6H@H HW@@ H[]ffffff.UHSPHt(H1ƨHX(֥H@H HH@@ H \HHH[]UHSPHt10H1fHXH ;HHXW@@('0ZH@H HW@@H[]UHH]SUHH]UHH]UHH]cUHH]UHH]UHH]sUHSPHHH[]ĤUHH]UH]UHG]UHw]UHH]UHH]#UHH]sUHH]UHH]UHH]cUHAWAVAUATSHLcgMH}HGHE1 fHI9tgHELlI}IEHHHu?MuHuM}MtL蛂L裣IIEHtL(H HE@H[A\A]A^A_]UHAWAVAUATSHHUIMAE)~!L'IcM<1ېLIHA9uEԅLe~&A1fff.I|LL@LXtxLMLH5Lu9LH5Jqu&LH5_^uLH5DAKtMAHhHL@I!AHhHALH5{rDELADHp˜H远H賜H0觜Hp蛜dH%(H;EuHĘ[A\A]A^A_]#UHAWAVAUATSHxIHIdH%(HEHt(IIs,C6pLqMuT`W)pHEQLHHMIILDILCIHEILpLxLHL C7HwLpL1誛HHHM)EWH@H5H}荛HHHM)EWH@EHuHEuHEUL1^EEpt H}舖WAD$ AD$ID$0)EHELmLL-HML Ht]IFIsaC6pLqMH}EbH}pUYW)pHEfLHHHhLHHHDHH聕IHEHHpHhLxLHLCC7LpLmH< HEL}HEH5HMALB pt H} Et H}dH%(H;EuHx[A\A]A^A_]HpfUHȃ]ffff.UHAWAVAUATSHIIdH%(HEW)E)E)EHEH5H].HzW)p)`)PHEH5^LPL?ƅ H-Xclang-H!Hng-only=H&ƅ.LLHLjHLMLuWAEIELHHI9.1L6fff.LLLHI_IL; t L(fAAIHH!tH0E4$ID$EAAHLELMd$HH@HLEI IBL9LHBL苖uGL9uBI ,M}Is~C?8H9I fffff.LHI;_ALAID$HCA$Hffff.LHHMIILDILHHHIL8L@I HLLmB;LMgIGI9s4HHID$8A$ILLfM/M)LHHHLsH I9L)HHH L9LGHUUUUUUUH9LCMt;I9JH<@!HHHpH|1H [HKvHHHHHT8L4IL)HLL~ILMwHIGMtL茐LLMtAHLL9u$HL9tCtH{HHL3HHtELHI9u(fDII9tAFtI~HH t H0͏HPAH}8dH%(H;Eu6LH[A\A]A^A_]LH8L8螑fffff.UHAWAVAUATSHXHuHdH%(HEWHGL2HUL9rhHEHHE1H]GHLuHCHMHHEL0H@L)HHHH9 L,ICHMEt O| fDAAOdH]HɒWHMHMHHEMtXKHELDH}LLHHME1萒AH}H9t菒H}薒EH]tHMH$HxH]IHEL(L{L;{AEuIEIGAEAIUIuL1IL{dH%(H;EuHHX[A\A]A^A_]辏fffff.UHAWAVAUATSHHHuHdH%(HEWHGL:HBHEI9,HEHHEH]'HLILkIL;}E'AtMoEAMwH]HWHMHMHHEMtYIGALDH}LLHHME1AH}H9tH}EH]_ff.HȐH]LkL;k&AuIGIEAAEIIWIwLIdH%(H;EuHHH[A\A]A^A_]UHAWAVSH(HdH%(HELuLHLlLuMt;L}LM9u$DIM9tAGtIwH}LuhdH%(H;EuHH([A^A_]vfDUHAWAVAUATSHIHdH%(HEW)E)E)EHEH5޻L}#LW)p)`)PHEH5@LPLWHCL8LLL HLLL8Mt@L@LM9u #IM9tAGtIGH8L@2LsHKI9s1AH-fsyntaxIFHtax-onlyIFAFIMH L;M)MIHLMl$I9 L)HHH L9LGHUUUUUUUH9LCMtI9JH<@[1K dL$JmLHH0H-fsyntaxHTHtax-onlyHTDL,IM)LLLL#LkH0HCMtLLkHPjH}adH%(H;EuHHĨ[A\A]A^A_]Hp ff.UHAWAVSH(HdH%(HELuLHLlLuMt;L}LM9u$DIM9tAGtI7H}Lu(dH%(H;EuHH([A^A_]6fDUHAVSHHdH%(HEWHGH;HsEH-fsyntaxHEHtax-onlyHEEƅpH--analyzHqfDžyeHUHpSpt H}aEt H}RLsEH-XanalyzHEfEerEE 4XEEEHEfEerE HEHE!HEHors=trueHH~J@HMHUAHL Eu9EuBEuKEt H}虆dH%(H;Eu>HHĀ[A^]H}rEtH}cEtH}TEuwUHAWAVAUATSHIHdH%(HEW)E)E)EHEH5L}]LW)p)`)PHEH5bLPLL8LLL:HLL L8MtNL@LM9u1ffffff.IM9tAGtIGH8L@2HP覉H}蝉dH%(H;EuHHĨ[A\A]A^A_]$@UHAWAVSHIHdH%(HEW)E)E)EHEH5%#H}W)E)p)`HEH59>JH`|IINH9thH-shaared(H8DA1Ћ1D HxHHH9t0x@tL@IufAAIu@uHxHuHLiH`]H}TdH%(H;EHHĨ[A^A_]LHHuLLH`HLLHMtLPLM9u-ff.IM9tAGtIGHHLP2DXUHAWAVAUATSH(IHWHGL>LvLL)HHHHu1E15H9H<蓂IHM$MD$ID$HM9LeLuI0IMD$IMM9MoALtIwID$I9rM<$M)MIINH9L)HHH9wHHH9HCHt+HuLEH9XH<ȁILEHuE1IHEK4KHLLL膂HEL IHXHEID$MtL蒁IHLuID$I9sIILM<$M)MIINH9L)HHH9HFHH9HCHtH9w{MH<IME1M,KKHLLL諁HEL IHXLhMtL輀I\$LH([A\A]A^A_]L]UHAWAVATSHID'AtM~EAH1I9u At Mv1ILHL[A\A^A_]H9DUHAWAVAUATSPHIIILjHAHE,HSHsLOffffff.IM9A $IT$HHDA>@tMFL9ufDAAL9uL@tIvuSr!1fff.AT :uHH9uI9tLHCID$A$UDI|$ւI9=5HsHDuHESLH[A\A]A^A_]UHH=f1^UHH=UHHH=出1+UHAWAVAUATSPHH L7LM)MIHLMeI9HSL)HHH L9LGHUUUUUUUH9LCMt$HuI9JH<@}Hu1J mLL,K dHHEuHFIEAEHVHvLL3L{M)MeM)LLLR~L+LcHEHCMtLf}LH[A\A]A^A_]H wUHH=pUHAWAVAUATSH8IMILgHGHL)HHHL9H}}wL?H M)ILLEMI9UL)HHHL9LGHUUUUUUUH9LCMI9%JH<@U|LL)HHHL9LELmHUIHEH9yML+mLMLe;fDITItLI{LLeIJ+HHH9.ADuJ+HPIWAK@IHELM1LmM)ILJ mLHEL,HEHL<@K/HE1#ff.ITItѽHI9tJ<+AuIHHHOKdHMHHELuM~H]I)LeLHL{MI^I6HuH)MI)LH{H}M&M~HEIFHNzDLHEHXH}*ILmLEIK @J)H)LL9s>WHLffffff.HrHq  HBHHL9rHUHJI9tCI,HCHHIOAfCIHL9tAtIyH]L9tuM5fff.IvIEvIEVLԪIII9t;M9tAAuȨuIFIGAAIVIvLLmLH8[A\A]A^A_]H}!UHAWAVAUATSHHIL~@MuHCHHUxIHC@H]AAG(AFAG,MfLuI^I9~MoDIG HtIcO;}QAWLtI}LH,zIIFHHu*I~L^xAD$AFAD$AFI I9uHHELx HX(I9LetMt$ID$ Ht&IcL$;}QAT$H|$fffff.I>LHyHAGGHGHHuHLwI I9uHH[A\A]A^A_]H?UHAWAVAUATSH8HuHdH%(HELwLIM9tAGtIwwLsLs L{( IM9tAGtIGwLs(HEH@@HuHUH( H,KHH LiHLDLcpMHCHEIE1HDHMHH(EW)EHEHUJH HH HCIM9Od=ID$HuHHHM)EfHPHpH}߸AD$EAD$EHCH;C`H}HuEHC{H},vmHMHA LhHLDLcyMHC HELc(IE1fH}HUILc(IM9tRKD5HpH@EL;c0suHNIL$A$HVHvLEAD$I 롋C9v1HKH+KHH9dH %(H;MuH8[A\A]A^A_]qwUHAWAVAUATSHxHxIdH%(HEH_H}Lwf.II9tAFtI~tHEHXLx HX(L9LxurHMLy(IF@I+F8HIF@I+F8HAAFhHhAFlHpIF8HEIF@HEMl$0Mt$8M9La GDHL9tCtH{HtfDLLHRIHMLy(I M9tKI]L;y0sAEuIEIGAEAf.IUIuLPAGI 뤰HMHUH9aH)LeID$HEHHME1E1LxUfHMHH(EW)EHEHUJHJHH ID$H]IIL9L9hu E,$DL9puEl$I~8BD?LtKwHmHIHC6EH]Mff.wH*HIHsC$EH]MuJSLHHMIILDILHHrHHHEILuLxLeHLuB#IF8BD8EELenLHHMIILDILHqHHHEILeLeLuHLcuB3LxIF8BD8EEID$I;D$H}HuEID$H]/H}zq!1Uvu$1dH %(H;MHx[A\A]A^A_]8vMID H5FBH]HߺcE1AvH!vH5I &HHM}8LuHEHUL}HuLHLKH5BHEt H}pH}uE&&H}订rUHHFHEFHEVH]+UHAWAVAUATSPHL>Mt'LvIIs#C6IIMuIUWHCJLHHMIILDILoIHCIL+LsLLL;sC4HH[A\A]A^A_]H輡UHAWAVAUATSPHHLI)IIWHH;[IHHsH)III9LFHH9LCMtI9"LHn1IJ 8IIIVJT8AB8WAIFAVBT8AVBT8JD8 HEL3L{LL)L,t}1fffff.ILILA ALAIDALALALALI H H L9uDI M9tAtI~(nL;L+LuLsLcMtLnLH[A\A]A^A_]HUHAWAVAUATSHL7LMM)IIMHH;HH}HL)HHH9HFHH9HCHHUtH9AIHH8mL1MIHEIHuHFID$A$%HVHvL臯HEL0LxMM)IH]HEAD$ID$ HEIM)M9x1WH]ILILA A AIDALALI H H L9uLmfff.I M9tAtI~hlL3H]LmL#L}L{LkMtL=lLH[A\A]A^A_]QUHAWAVAUATSH8IIHdH%(HEW)EH}H&q}HH@L$#LH|(M MMDuMHEH}HLpH5"H}pHHǾ Q8EH}pH}EA$DLLLMXHuHH@H<t pH}pdH%(H;EuHH8[A\A]A^A_]-mUHAWAVAUATSHHdH%(HEH$MIHDMHH)IHE1H)LOHUII)M~HHLP`L9MW)EHEIs C$ELuFHL!HLmMIILDIL jIHEILmLmLeuLLmC&EtHuHuHHLP`IEt H}iM9u'HuI)M~HHLP`L9u IE1dH%(H;EuHHH[A\A]A^A_]kUHAVSAHHHHuHFHG HVHv裫Ds C(C0[A^]fUHAWAVAUATSHhHdH%(HEЀ0W)EHE{ Cuu9HCHHHM)E-HK HEKHEStLfHSHsH}AA)EHEM|$IHUHxIsED}LmMMtdtRHvOELHHMIILDILIgLIŋEILuLmL}LuuHLLpkCD%.kzifCD%pEAALEuLEEH}HxHULEt H}gWEAAL}LE})ELEu=H}LL<H}HE{0tDH{(HC(HtHPEt H}%gdH%(H;E1Hh[A\A]A^A_]HC(C0EuHLuLIHƺE1AkLkH5"HH5"#HwH5%pHcLkL~H5LuLNE1A]kLekH50HH5#HH5~HHuHMkL%kH}H}z hH}UHAWAVSHhIHdH%(HEW)E)EHEH}11;H}HuHEHH_GHHEL9tLW)E)EHs(HH}HUPH}t1juIH}HuEtH}e @u~H}EIIGHHaHuWEt H}WHpHyHEyHEqW)E)EHU舕 HnWHEHDžxIHE@HyW@0@ @@@1H)ffffff.|<@|FHH t2xHtHu|<@qWLHvWIL}HpLhH HpI9u$ffffff.IH;piHEHtHcM;}QUL|fH}H}HVIIGHHIu I((UA}8uAG0I]@MuHL9MgDHL9IG HtIcO;}QAWH|DI<$LHlVHNjC GHGHHuEHs(HTHKHtfffff.HH HupHCH9Hu_Hf.IMHtHH HuIEL9(IuHL}IOHH1HtHDI(H]LHYHdH%(H;EHx[A\A]A^A_]H5qL}LVE1AXLXH5/#2HH5#HnLXHHH:H?H?U@UHAWAVAUATSHhHHHdH%(HEHDž8H8XH(W(H HC LpHLDHLc{M;IE1Hpfff.IM9K&H@0HuH+H@H0HP@HEPHEHHHHDž@H H@HH8tp{fDIſ8QHMHH0(p@ W)pHEH@HHIEH H Ht H IEH(HH0pH}QHH H(HpH9HHHHH8HxHPHHHfHH;pHr IHxHHW)@HDžPAF IV!IEV0LIEN(L@LHHL-H(H@$LPHALDHEHHLHHLC Lc!AALEc0LEk(HIL9rMLLLS1ɅEuLLLS1L;1҅Eʀ*HaW)EHEH8fEHHpH}LpLHAE1SUEu=HLXL;`HL11/ I(fD1THTINjLH5lvE1ASLSHH5HH>LSAH`HnuHIHLXHC Ht HcK;}QSLd(ff.HH;1/HHOIMl$(Mu!ID$HH>IID$(W)E)E)E@HPHAHD)pHEHLH|HMHt HAHu/Hu&HxHHH}H%MLLZ9L2,I|$0Hu!ID$HH}5HID$0HGHH5HHLI\$0Hu!ID$HH7q5HID$0HEHxHpH)W)E)pL蔊 HzLHHDžIHDž@W@0@ @@@1HH=DLBHH tSp 9HtH Bp 9HtHf.HCHHHHHzQt HKIHHuIGHH-4HIGIt$0HuH5$IGHHILKHMHqHQHH)HEuH}HEHtHP@$t HPKHKHtHH HuHCH9HuHHH6ZH?HBH?{H?H?HhHtPH(H H8Ht OuHPdH%(H;EHh[A\A]A^A_]HL8H@HI9tH(Hf.IH;hAF tI~(Ht;ff.Hu&tIN09/u@IN!9/LW)EHEH(HtHL`0fHL`!H(fffff.IA Hq!HEq0HHEQ(E1H9HBAILLL1ɅDED4EIEILHuHH(H9tKEHIE@ uH HHHM)E&EIEHQ(Hq0H}fffff.W)HDžHL MLAF M~!ME~0IE^(LAD$ It$!IEt$0IET$(E1H9HBALK1ɅDED4EMEM$MuL;Lt:AE uI IEHAE)IU(Iu0HfW)HDžAF IN!IEN0IEV(HpHxHtJH@HDžHHHpH@MtHEfHHu0tHHu HH}tH}AALHLDLEL9rMLL0JtHL9s9ffffff.L9r!HtHLLItcHC8HtHcK0;}QS0H|%HH;HHVGHHGHHIv HEuZufEt H}EINHtfHH HuIFL90Iuoffffff.HDEtH/EEuH_JGf.it=UHAVSH H(H(IHI WA([A^]fUHt HNHs 8Hr.tHFHFH/kythe_bH3HbuiltinsH3PH tH H]]ffffff.UHAWAVSH(HdH%(HEHVHEVHENHr H/kythe_bH3HbuiltinsH3rH tBL8L}LH@HEHuLL9H8t)Et H}CdH%(H;Eu{H([A^A_]Iǿ8mCHMHH0(E@ W)EHEHMHHIIH HtIIH@H蠉HHErvUEDUHAWAVAUATSH(HdH%(HEF`tHChHuHH=H HEH8H}HCaHE1fff.HEHHHELlM}MeM9ufDLxBMM9tAG I!IE0IEO(C`HspHDuHEShH9rHt5Et,IGHtIHHuMwM9>MufIGLHt@IHHuLqI9LuM9}uMuIMI}LZiAG /I0A!dH%(H;EuH([A\A]A^A_]Cfffff.UHAWAVAUATSHLHLxIHpXIdH%(HEW))))))p)`)P)@)0) H 11IW)HDžHHhϰW)HDžAuIHHH)IIH踂t HHHW)HDžM~XL`Mf`MM)lLHHH H9L?HHHLH&f.IGHCAIHM9tAtIWIwHށHHINXIV`H)+HHHHH1 HH9LF AtNTIuEAIuNAt M@@IEHA-ouH9vHH HH99d@HAH)d1HH`HPpHHaHHƅDž-tarDžrgetƅHH5t H>HLL9tffffff.HpH8HLpLPL~HI9M4IF0HuHUHpHHHHtIW))pHDžHp11HxHHuzI(HL'M~0MuL=HHtL9u9*f.HxHHu*HHL9LHH?H`HPLXL9Lpt#fffff.I?HHPH(L9uHHH9tS+HǵH0t H&HHtHLHI9u+fII9tAFtI~&HHr&t H]&t HH&H 蜀dH%(H;E8HĘ[A\A]A^A_]HQHqHsVHHHIIILDIL%IHILHLLH?)A^eH7HHHHEHHH?HHH8+HDH9*ID0H5BLpL纨E1A*L*H5GH贸HHxHh螸H5xI-H芸HHPHxL)E7)HD8H5%BLL~E1Ad)Ll)H5HHH蹳H5x"HH5s HL))D;Q&H蠦HVHUUHAWAVATSH@IIHdH%(HEؿq#IID$ HtL9tHEID$ HEHuHuI$LPL)HEIHEHtHMH9t IHE Idž1LH`IHEH}PH}WA)A)A)A)A)Adž?MA)IdžHMH9t (Hu  HHL3dH%(H;EuHH@[A\A^A_]$fUHAWAVAUATSHIHHdH%(HEп8!IHǾh(HL;LAG}H8=IHLI}MmH1%HHDžHDžIr,LHLy'HHfH߸MtHHL "HLHHRHfDžfDžfDž`fDž8LHHhH@LE'IEfDž8tH fDžfDž`fDž8MLHHhH@L&fDžM}MtL%H f1HHLH IKE1A&W$HH1HE1E1w&HHDžHtHPHHH9M $dH%(H;EuHH[A\A]A^A_]!fUHAWAVSH(IHdH%(HE(&IIHEIHuL%L;AGH}Ht OuHPL3H5H}ȺHEHEHEHuL%H}Ht!OuHPH}Ht OuHPdH%(H;EuHH([A^A_] fff.UHAVSH HdH%(HEtMLuL5$H}LHEHHEH}Ht OuHPdH%(H;Et#dH%(H;EuH#HH [A^] fff.UHAWAVSH(HIdH%(HEI0L}LPLLUAEt H}EtdH%(H;Eu>H([A^A_]HH8HF;HEHE"HuH諕$lfff.HHGH)HHHHHUHH-A9cudaf.H9HHH9DAAtLIIuEAIuAtLA LAEA-xuDAAt LIIuEAIxIATLIK]$1$fUHAWAVAUATSHHHdH%(HEIH9t HHVHH)HIHH`HLs?HKI9s4A H--cuda-hIFHhost-onlIFIFyIMH L;M)MIMLMl$I9UL)HHH L9LGHUUUUUUUH9LCMtI9(JH<@1K dL$JmLHHU H--cuda-hHTHhost-onlHTHDyL,IM)LLLL#LkHEHCMtLLkMIffEHEHL9tuHHHMofELHPHpH}\HLsI)IMIr^Ht HP Ht IHu?tHH(HHoIfnft,Rft DSffɁW      EHuHEuHEUH H}H覍L3Mt5L{LM9u IM9tAGtIWH;LsIHuH3HEHCHMHKI-resourcIurce-dirH9teHN)HH:L1HRL1H t@HQHHH9t6QtHyH rDH ruHQƃtxHEH/kythe_bHEHbuiltinsHEEHUH臌Et H}hH3HELuL}EHUHUEt H}6H3HE(AKEENG=1EHUHEt H}HH3HSHH)HIYLsHKI9s1AH--no-warIFHwarningsIFAFIMH L;M)MIMLMl$I9aL)HHH L9LGHUUUUUUUH9LCMtI94JH<@ 1K dL$JmLHHUH--no-warHTHwarningsHTDL,IM)LLLL#LkHEHCMtLLkEt H}dH%(H;EuHH[A\A]A^A_]HH0HUH9tT9Eu@u4HAHEofE2Hp1@HEp@HEP8H};GHP8Hp@H}FML#LkM|$IHM9u:MIG@IGAoG0AfAG0IGIHM9ItAtILcIL9MuOL{H;HEH9EuFu`HEHGfoEffffff.IM9tAD$tI|$EHuHEuHEU:FUHUHuEC>HƗ1UHAWAVAUATSHHIdH%(HEH=HIHHIHsC?ELeMu@LLHHLMIILDILRIHEILmIL}LLLC<IHEH9tAEuHEHGEEu!(HuHEuHEUEEt H}H=sHIHHIHsC?ELeMu@LLHHLMIILDIL\IHEILmIL}LLLC<Mu0L}HuLLLAEt H}"EAEt H} H=JA>HIHHHLmMe0HsELmHu:FHHHIIILDILyIHEIL}H]LLHADHuL&Et H}WLmH=`HtIHH='iLgHIHHIHsC?ELeMu:FLHHLHHHDHHIHEHH]L}LLL8C<IHEH9tAEu?HEHGEEu!(HuHEuHEUbBEt H}CH=dwHtAƅH=s^HtAƅH=dEHIHHIHsC?ELeMu:FLHHLHHHDHHIHEHH]L}LLLC<IpHEH9tAEu9HEHGEEu!(HuHEuHEU@AEt H}!H= .UHIHHIHsC?ELeMu:FLHHLHHHDHHIHEHH]L}LLL&C<I(HEH9tAEueHEHGEEu!(HuHEuHEUP@Et H}1H=eHIHLHVHH 1ۄEAtfAt>MIDžMtI~TLAt IAt IAƅAdH%(H;EHH[A\A]A^A_]HUHu>EVZHUHu>EHUHu>EHUHun>EH}@H}@H)H8H+HEHE"HuHUd@UHAWAVAUATSHIIdH%(HEЉuL Hÿ ? IAtHLeL&H0LH0HEHDž0H}HtOuHP H}IvMf0AEHkIEHEIEHt@MeLmAEHUH H}Ht OuHPH}Ht OuHPIA(Auu;I(HHHO7I)IE8IE0J=I0I8LIIM9tA$uID$IFA$ALHEHEHb IW@HhIM~LLLH0HHLHLMt#HIFHuILPLHHDžHtHP@LMtFLLM9u)IM9tAGtIHLL0LALH}HtHP@ uH HUdH%(H;EuDH[A\A]A^A_]\ HUHAWAVAUATSHIdH%(HEDAt,IHt*MHGAtYI WDHuֿ8IH5cILHAIHHHOI.kzi3TDp 8)IHbILHAuID$HGA$/II tIIIZI1AG AG(AG0L}HUL(H}HEHt HQ dH %(H;MH[A\A]A^A_]H53$L}LE1Au L} H53H)H5]#HH5B'HLI E,$:f.UHAWAVAUATSHDhH`IHdH%(HELHHxID$HXID$HHID$HPLLLLL)HHHH`O IILO4HLLHXHIK4HHHHHIK4HHHPHHLHHI?K4H9LtnHGu4t2HH)fLLHXH[L21HNut HH 1H9t= PfDH`htPW)E)E)E)ELM<$H]H11EL9HEultjHHa@HHPL8LLH!LHL!LLGoHLH I1IOut HH 1H}LH9tN"OAE1LLLxHJ LL!HLHU !LH蠆IIxHL)HLIL}LIHtIfIT$IHL LLL HL3y]ffff.LfL9s7IT$IHLS LLLE HLڅyfDHL9LpfHH8I4$H9tRHGutHH 1HNutHH 1H9t f{MMIHfffff.HSHLL}LLHUnLLxHH]@IT$ILL<LLH.LLÄyHL9LMHHLH;HItVH1HH8H9tDHGu t HH1HNut HH 1H9t*LcLLH9H]H9tDHGu t HH1HMut HH 1HH9tKLH8CHpH9Hxr>HMLHMwLLHuJ_E1hAHMLHH`IDžh"M{HHc HIT$HL0LLH`LLHLKLLHقHH8HsH9HGHHHHwHXHHHwHWHOLXIHHwHWHXI6{I߀h L9^HHH;FL;IHAAHxLIID$HHH; HM,$IT$ILLLLLLHLLLkyW)E)E)E)ELM,$H]H11?L9tHHEu t HH1IMut HH 1H}LH9tHILLfffff.Kt&K<&H9tNHGutHHffff.1HNutHH 1H9tidHItiLOl&HLxLHUHHLLHH$IHfD;HIuLMI<$H]H9HGu t HH1HMut HH 1HH9GL9 HBHH`HHp%fGL>HpHhHH9`|H6L,uHH HLH]H11;L9LtHHEu t HH1IOut HH 1H}LH9tEELeLhffffff.LeLIMLHxHLLHLLL]}LH'HH;LI7H9tFHGut HH1HNutHH 1H9DL9`HJmJ mHHL4IN,mL;pIM~LLHxHHHLLH`|ILkDL9`Kfffff.H;L9HxHGu t HH1HMut HH 1LH9VCQLPH$CfL;IWIHHpHW)E)p)`)PMLM>L1109M9t`HXutHH1IOutHH 1HPLH9tB CLpIGHHLE1 fffff.BL;IJHN4eN$eM9L`HL{HHHxHLLHLHL;zxHL`HHMLpH3I}H9@HGutHHfD1HNutHH 1H9Aff.HHHH9LPHt/H3HH8H9tuHGuAt?HH6H8L9HGTNHHB1HNut HH 1H9t@(AH;L9tGHGu t HH1HXut HH 1LH9t@@HLiL+IIIIHJL9HLxLHHLLLpHHH,xtW)E)E)E)EL;H]H115L9t|HEuAt?HH61HXut HH 1LH9?1IOut HH 1H}LH9tt??HNMtyIIHNMuH;LuL9LptDHGu t HH1HMut HH 1LH9t&>_>L5LPkL9XMHLpM9u0@=HI5LLxIMM9HL!HQHLL HLLHHuLMyW)E)E)E)EM.H]H1123L9MLLtHHEu t HH1IMut HH 1H}LH9t<$=HxEff. =Ml$LHHU'LHLLLtIyfIt$I<$H9tHGutHH1HNutHH 1H9u@HH0H9LHGu^t\HHS; LHL- LLoHH9I6H9HGu&t$HH1HNu't$HH 1HNutHH 1H9t1H9tcal7IM?LxLH L LHLo LLoHH8I6H9u_7HHM>LxLH! L LHL LLnySI>HH0H9tAHGu t HH1HNut HH 1H9tz6dH%(H;EuH[A\A]A^A_]@UHAWAVAUATSHHIdH%(HEW)E)E)E)EHHLuL11+I9tHHEu t HH1HKut HH 1H}HH9tr55I\$L8HULLLLLLHLL9mxeHHHLHL9L3H8LHU^LLLLIH8LlHKyYHfLsHL8LLLHULLLLLL{lyL9sVLff.Mt$IH8LHULLLLH8L lxHL95L8HLff.HH9I4$H9tRHGutHH 1HNutHH 1H9t f3LLffff.M|$ILLHUHLLLH?kyLLffff.M|$ILLHUhHLLZLHjxHL9LqHI9HtLI6H8H9tAHGu t HH1HNut HH 1H9ts22I>HEH9tEHGu t HH1HMut HH 1HuH9t"2[2H})dH%(H;EuHH[A\A]A^A_]fUHAWAVAUATSH(HIHdH%(HEHH)HAHH HcHIWLL#L8LLLLLLLLLTi,AHsIHLHjHsHSHKIHMLsLkHsHLH$HCL91HHL%fLIEHL9HMeIHH8LLLLLLH8LPhyW)E)E)E)ELMmLuL11%M9tHHEu t HH1IMut HH 1H}LH9t/0Lf.Jt#J<#H9tNHGutHHffff.1HNutHH 1H9tiT/ItiNl#H8IHHHULLHLLH8LgII+/IuIII<$HEH9LLtEHGu t HH1HMut HH 1HuH9t..HHH}%HsHSIHLLHdH%(H;EDH([A\A]A^A_]H;HHpH9AtHGu#t!HHIM9AH}A%1HNut HH 1H9t m-c6fDUHAWAVAUATSHIHLr(MuL5qH>L~IF(HLPAALEPLE`M9uLLLUH}XH}LUȅtZIE0HuHHpHtHNAI9u)MըtHvHLE1AME1IvIVHHIN HIF0HD>LFAA@LEFLENLCLK2HzAA@HEzLEBH{LC HqHEqHEyHs(H{0LS8Lc@HPHEPHEpHSHHsPHH[A\A]A^A_]fff.UHAWAVAUATSHMIHdH%(HEHHLLIL3LxLLL LLLLLc*H;HH0H9LtAHGu t HH1HNut HH 1H9tvO+HHI$LxLL_LLHQLLbI<$HH0H9LtAHGu t HH1HNut HH 1H9t*HIHHLLLLHLLSbx!dH%(H;EuhH[A\A]A^A_]HH8I4$H9tHGu t HH1HNut HH 1H9t1 *cUHAWAVAUATSHXdH%(HEHGXH@HHEHEHE~pteHHhH}xDuIuNHELxAALExLEpI4IgC6ELeME1L}H4#L-0L%DKfff.LH5E1ANLVHH57sqHH5/pHHpHH54#pHHp HH5LpHH5pHǾHH5qpHLHH5Rp)HLBpuHgHL'pLoLHHMIILDILIHEILmLuLLLC4Lt HHEIFEAHCXLpPI~031HCXHHMLh@LHHEHHELIH7IMoMwWAGIG(AG8AGHAGXMghHEIGpHEIGxHEI(@Hkythe_clHH @laim@H 8HLx L11HMg(@Hkythe_meHH HmetadataHH@H 8HLx L11HMMt#PH HHLxL`ILeMHit+H L(H(BpL W(LIW)EHEH]fDLH5f}uH]EHt0HL4IƸtBD3tJ|3H]L9tHdH%(H;EuHX[A\A]A^A_]H} fUHAWAVAUATSHXHdH%(HEHHELHtTAuu&HHHIN(A)1tHHLHG HL HCXHHHHPPHALy(LbhHpH9tFuu,HHHO)HpHEpHEPw HPHpW)E)E)EHELmLLLbAHL1EIEHKXHI HEȈMHHHHèHLELMLHP0H]Ht;LuHI9u$DII9tAFtI~H}H]H]Ht7LuHI9u II9tAFtI~WH}H]HdH%(H;EuHX[A\A]A^A_]PUHSPHH3Ht HH̋t Ht HHK`HH9t (Hu  HHHH[]f.UHSPHHp2Ht HXH,t H7t H"HK`HH9t (Hu  HHHPHH[]fUHAWAVAUATSPHH`2HLw@LHM9HsXHHHHHHH@HH)LiM,HC`HEHs`HHHHH@H)LiM$M9tVM4M9tBII}AEt I}IŨLI+H=uMnILs@L{HHEHC`MHLL)HHr5ffff.I>Ls@L{HILs@LL)HHwHt Hu HCXM9t7I>hIM9uHC@HKHH9tH)HHHHCHH{8Ht2Ct H{(#HH[A\A]A^A_]ffff.UHSPHHH[]@UHAWAVAUATSHHdH%(HEЃtCHC W) HDž0H H0H{`t5H/ H!HE0HE(HB WH0!FLc8HtsHC`HOs0HKXHHH{@HHHHHHHRH)HiɨCu;HCHHH0) 1H{ƅ Hs1qHS Hs(H xHHt9H@H)H [{ u8xH@HH@1 1fIH@tHuM7IIsC6 L!MuQaLHHLMIILDILIH0IL LL(I LLLCD5Dž8H<'@ELuHEWEHEH L#HuLy t H0 H,IHCHX8HH#CHH8HH~H5ڹHDHIHL0@HH#CHH9HHyH5HDHIH L?1Hc7H -@H5&ڹHCH L?1H# H OH5vHCHL?1@H IH5wvHDHLs?1H $"H53HDHLF?dH%(H;Eu|H[A\A]A^A_]H5]L LDE1ALH5%HPcH5&#HHH@0L,LHދEIt$H}HvEAAH]HE]LEmI~HL=3LEI>LCLu,L HHL3L HL3Et H}:dH%(H;EuH([A\A]A^A_]GUHAWAVSHHAIdH%(HEHwH}uEHEHEEHEHEMHME$HLDAO1Et H}dH%(H;Eu HH[A^A_]UHAWAVSHHAIdH%(HEHwH}1uEHEHEEHEHEMHME$H#LDA0Et H}dH%(H;Eu HH[A^A_]UHAWAVAUATSHXAH}dH%(HEH9HyEHBLhEuL}IsC6ELMuAQDHDeMIILDIL#HHEILeDeLuIHLLB3ELE}L}HEMHME$HJH}DDE/Et H}dH%(H;EuHX[A\A]A^A_]UHAWAVAUATSHXAH}dH%(HEH9Hy)MHBLhEuL}IsC6ELMuAQDHDeMIILDILHHEILeDeLuIHLLuB3ELE}L}HEMHME$HzIH}DDEP.Et H}dH%(H;EuHX[A\A]A^A_]fffff.UHSPH2HH[]D@UHAVSHdH%(HELw IF`HtXINXHHHHHIF@HHHHRH)HiɨDdH%(H;EupH[A^]H5kݵH]HߺE1AHH5ݵ%HaSH5#HMSHIF`AUHSPH"HH[]4@UHAWAVAUATSH IdH%(HEH_ H{`sLL1LHDžHDžLL1LHDžHDžLL1aLHDžHDžHHHL}HtdHH L`PPD$`$HpHHpt H޽HL9t-HL9tHL9t dH%(H;EuxHĨ [A\A]A^A_]H52۵LLE1AqLyH5p۵%H%QH5#HQLY'Offffff.UHAWAVAUATSH(HIIdH%(HEHLHF`Hv@IWXH<HHHHHH4HHRH)HiרLtHDHH]H,W)EHEIGHpL3LcH}LLA(IwI@IG`IOXHHHHHHHHHRH)HiɨLtH\HHLnHt HH1HLHE1AHHX(LL1iLHDžHDžH9]tKMLI!HHL.HHW)HDžIG`Iw@IOXHHHHHHHHHRH)Hiɨ H\H"HzHfDžLLpHHxHp1HDžH9 HHH4L1HHLLLfDžHM HHpHAHxfDžhfDž@fDžHHpHHH LoHHH1H1Ht2IHWIspC 6HMW)HDž11E1HtLLLHLILLHHMIILDILI贸HLHILLHHL7B3HÀDIDHt$HuOtH9/t=H9/u1u HHDžHf.ÀDLt LMLHDžL9s$HHLܽH1MtHLL芸HLHfDžHM HHpHAHxfDžhfDž@fDžHHpHHH L贽tL8LLWAEIEILIsC6AEI]MuNZLHHMIILDIL諶HHHXIHL(LMuHLL&B3LHLHHH9tǺdH%(H;E4LH([A\A]A^A_]H5ӵLLrE1A&L.H5I5HIH5q #HILnH5ӵH HߺE1AHȺH5\O~5HtIH5 #H`IHLHHL'EHLDHt HXGH kLHLPKH?ffffff.UHAWAVAUATSHHHIIdH%(HEW)HDžHLLHMnpfDžHWHH11'L@HLH>SLLL@TIAH}1HpHx\Xt HhH@t HPHHHb\t HͳEMW))Mf)P)@HDž`H@LL豼`t(@(P))HL苼I}(HHH0HHEMvxHEHLYTW)p)`)PHHE)@HEH@LwHXHtHAIXHu,HXIƈHHHHXL(H@LHVH.t H)11u:t HdH%(H;EHH[A\A]A^A_]ݶID H5ϵHHߺ1E1AHɶH58`HuEHHAH5+Yv HREHL@LL譺HL@H5HE@t HP)HME&H+ff.UHAWAVAUATSHHdH%(HE9AA@HALEItI@tLAA8-tn@tLLAJLAA8-uXHHxHHHHKLAAE3PE t@u&dH%(H;E|HAHCTHqdH %(H;MRHHI-HhW)E)pHPLv)E)EHEH}L¸}t(E(M)M)pHpL訸HpHxW)E)EHU6 HHhHDžXIHDž`@HYW@0@ @@@1H)\!|<@|FHH t8XHtHh|<@M&MnMFIvHLmHEI)II9AMELH=yIMuIJ<螨HJJ4IM)HEHIIMIhr`N M)IMI rMIIIMIN,OMIE1CCLCDC IM9uM9t@MMMIII9uILuIIVMFIvMtLHEIVHBMnM~IEIFI]MnM;~ILH)vjHHHH?HHHIIHMI)M)LLLHMLiJHHH0HH HHIHEXHEPH`L6W)E)E)E`HaHEp)EHEhH}LZHMHtHAH2`XHd0t H@ӔH{HMH9Hq@HEqHEQHHJHMH9Hq@HEqHEQHHHMH9Hq@HEqHEQHHHMH9Hq@HEqHEQHHHMH9Hq@HEqHEQHNH}rH}IňHEHHBLz`Hp葓HHXx0aID(H5LHLE1ABLJH5kfsH&ILeHxL"LL"H5 H&IHSH`HuH觜LHL"H5;vH&`t Hp蓒Et H}脒HH託E.H蓔H0UHAWAVAUATSHDEHMIAHdH%(HEIL{ALDEEHsE1CxIHtH@H)HUL:}uƹ#A)I I DmHu̺LH>HLHUH'MȉMHu̺HHHMH1HQHdH%(H;EuH[A\A]A^A_]RfUH]fDUHSPH ݐH fHC@HKHHH[]fff.UHH5HGFHGHF]UH]f.UH]閐fDUHAWAVSHHIHdH%(HUME1LHWMG02HWHHH]IHHULMIAWAVASNH Ht%HH@ dH %(H;Mu)HH[A^A_]dH%(H;Eu H[A^A_] UHAWAVATSHFAAHEFLEvHHHHHHEtL{ AAAHL)L9s#LH)LHLME1PAV蒕HM>H}IFEH}AFHEH}Ht}LLHpHEpHEPHk-IWIwHHHIFHPHpH蜼dH%(H;EuHH8[A\A]A^A_]6L) UHAWAVAUATSH8IHIdH%(HELgHOI9I9>HCMl$LM9s(IL$IUIT$AEA$WAEIEIOI9I/IEHHIL$A$fAEIIH9A$tI|$#M/H M)IHLII9L)HHH L9LGIWHUUUUUUUH9LCHUMt|I9KJH<@腊HXt H{苊IFHCAfAIFHCAWAIFHHIG1HL)HHHEH IH HMHMK dHHEH}L LeH}IWH)HIGH)HEI_LuI7H)I)LH虊I?M7H}IGEH}AGHEH}Ht蜉LdH%(H;EuHH8[A\A]A^A_]裋L+ UHAVSHIHG8HtIcN0;}QAV0HDH9uRkI(I>Ht1NJHH mHJ ݈WH@LHdH9t uu4HKHH[A^]HsHEsHESH[A^]鮹HSHsH[A^]UHAVSHIHGPHtIcNH;}QAVHHDH9uRkI@I>Ht1HH HJ WH@LH脉H9t uu4HKHH[A^]HsHEsHESH[A^]θHSHsH[A^]UHH dH%(HEHLNHUH YHMHMAHLpdH %(H;MuH ]vUHAWAVSPIHtbt=LHǃMtILt Hxt H̆ƃHàL9tAuu@IFHCAH[A^A_]IvIEvIEVHH[A^A_]|IVIvHH[A^A_]¶UHAVSHG :F u]tH{I$LHFHCfHFHFLsHCMtI~|L[A^]t6LsHCMtI~RL躅t H{謅C 0HFHCWHFHFHFHCC [A^]UHAWAVAUATSPLwMHL?LgI0E1fII8M9tSCHHH:I DHH *Ht;HH<DH HHi-8ߝHHH1HH[]HH[]LfUHAWAVAUATSH(HdH%(HEHEHHEHGHEG$ELwHwH}HLxHMH HCHEE1OfMHH!π8HwH!΃Hƈ1HAoEHE8HMII9HEB<8xMIK4.Od.H=L@LHi-8ߝHH1HHKHHHH H1H!o0fvfdfIALH!o0fvfdfItH}}HvdH%(H;EuH([A\A]A^A_]xUHH dH%(HEHGHrHOHHHHH9v!dH %(H;Mu>H4EH ]W)EH5HU蠛dH%(H;EuH ]xHHHAH9AHHH H1H!EtHHHHHH9HB1ofvfdft16UH1fff.HHHH!ofvfdft]HH!UHAWAVAUATSPHUHIL~MwILHI<HHtLhL+ILsLHLsLHHLH)HLID$IL9AA DHu.IHHHUYI>A~HtLsIWLタQxCD=ILsDH[A\A]A^A_]UHSPHH^H=HHQHHi-8ߝHH1H[]UH]UHHHOHH<H]sUHAWAVAUATSHHIIHH6I_H=5HHHi-8ߝHH1M.INI~HHLH H1Àfnf`pfpI7Mw1foH!Ao\foftft{H}fMHUf]HEDII!MIN9t!u#II<$HuLHMvHuHMtEHUBf!H}foMHUfofo]uftfÅuHHHOME1LLHH[A\A]A^A_]UHAWAVAUATSHHIIL?HGHL)HIIH9sQMH]HMLMvLM9upffff.IM9SAFtI~qH]LuI^IM)LHIH9O,4L9uCLufIt$IEt$IET$L衢IIIM9tA$AuuID$IGA$AIT$It$L赡L;eMLuH]@fff.It$IEt$IET$LIIII9vM9tA$AuuID$IGA$AIT$It$L"H;L{pWHC1HMIH]H H9CHIH4H9HGHUUUUUUUH9HCH9HL,ILpIIIFLIF fDIT$It$L~III9A$uID$IGA$ALuM~LL;mtOLM+fIUIuL IIHL;mtAEuIEID$AEA$L)IM~4I^MLu HL9tCtH{HoMnH[A\A]A^A_]LUHAVSHIHsLHH[A^]鹼HHHGHvHvUHSPHHHQHH[]ovUHSPHHhHHHt OuHPH0vHH[]bnUHH%]UHH;5%H;5%]UHAWAVATSH IIHdH%(HEWG@G0G GHGPHvHP(CPu9Hut-MLeLLuLLc)Et H}mdH%(H;EuHH [A\A^A_]oUHAWAVSH(IIHdH%(HEWHGHvHP0CMvH}L uMȉ$t HMHs 6Hr.tHMHMH/kythe_bH3HbuiltinsH3qH tI HULHYE$t H}ldH%(H;EuHH([A^A_]nUHSPHHvHP8HH[]UHHHH@@]UHSPHHvHPHHH[]UH]UHAWAVSPHtDAL=HLsAt HKHC H)Hvf HC AuHKHC H)H wH5 HH[A^A_]sHFileSystH@tem HC H[A^A_]UHAVSIHIFH9t"HKHtfff.HH Hu1OHfff.HAH9HuI9uIINI~H蜒C t H{0jHj[A^]UHAWAVAUATSPLoHML~AALE~LEfH}IAE I}!IE}0IEU(1L9ICL3n1ɅEÄMDMlMuH}I9t@AF Iv!IEv0IEV(1I9IBLmH}1ɅEـuILH[A\A]A^A_]Ht5CL>(CD>IHM9uK<>B;uJ;HHHOHIJ LHf.HøL9(CtH{TbUHAWAVAUATSHHHdH%(HEHELwLEȹHLjIL#CHHI1WIT ITA A A ID AL AT (A\ 5A\5AT(ALI HHHHH9uL#CHt+HL, @IŸtCD,tK|,haL#LmM9tLeL;Dk dH%(H;EuH[A\A]A^A_]XcUHAWAVATSHIuHFIGAHVHvLYMgHCIG(AGWHCHCIG0IG8HK IO8HS(IW@HtHS HAHSIG0MwHL111=I9tIGPutHH [A\A^A_]1HK8ut HH 1H0LHH9t [A\A^A_]7L[A\A^A_]LUHAWAVAUATSH8IIdH%(HELnHvIDHIEWH}趙H]H[HHEtrHLeHHHUUUUUUUUH!HH)H33333333HH!HH!HHHHHH!ILI8IwLCI!2E1LIH9r"HH H t H1HI 1AHEHJHL0MEAt MOMoALSLMLU$fff.L!L9M6MIFH9uOANIVHHDL9uuR1ATA:T uHH9uefDAvH9rHH H t81HHoI~ LMaLULMMH}R19MLe]IHHEIFI~LLmIEHxH*HHH H*XHH؃H HLxH*AM uH*XAM t Y.HAHrHKE1HAI ^fH,HH?\ْH,H!H I9IGAHtHGHu I1efII]I9vLLIEHx WH*HHH WH*XA^E eH,H\9H,H?H!H HHHHUUUUUUUUH!HH)H33333333HHH!H!HHHHHH!HHH8wHGHȃ?ٸHHHB_eI9LFI9I]HSHHEuH!!H9sHHH H t1H1IMHHtHIVIEIuI6MuHIHt>H@HKHuH!!H9rHH H t 1HH1HIEL0IEdH%(H;EuLH8[A\A]A^A_].]UHAWAVSPHIuHCIGAHSHsL*HC(IG(CAGWCHC(HC0IG0IG8HK8IO8HS@IW@HtHS8HAHS0IG0MwHL117I9tIGPutHH H[A^A_]1HKPut HH 1HHLHHH9t [A^A_]F[A^A_]kFUHAWAVSPHHtKIIIGH9"J<YH;HHtYLsDIs%1kH;HHtoYHCM!1ffff.HHHHDHHDHHDHI9uHtHHHHuHsHSLCHNLHHUUUUUUUUH!LH)H33333333HH!HH!HHHHHH!HHH8Hw IFH!'L9r"HL H t H1IH 1AHLLMMN>ffff.IHHHHIHHLIHLMHMtbI@wL!H9t1fff.L9rHL H t$1IHH9tHH<uH41AH9tH[A^A_]HtIUHAVSHH6IHsLH{@HsHC t H{0>WH[A^]2WHtffff.IIL9AD$ A;G AL$(IT$0HHDAw(@tMG0L9tAAL9@t+Iw8t+I|$8xWtDffffff.Iw)uՀr"1AT ):uvHH9uffff.IL$HtfDHH HuID$L9 IuIWHtf.HHHuIOL99Iu1ۉ[A\A^A_]UHAWAVAUATSH(IdH%(HEH^HvHDIIEUH}詌MgMHLHHUUUUUUUUH!LH)H33333333HH!HH!HHHHHH!ILI8Iw MD$I!-L9sI#HL H t H1II 1AAIJHL8MEMAt MMI]AMT$H}LMLU!fL!L9M?MIGH9uOAOIWHHDL9uuR1AT: uHH9ufff.AvL9rHL H t51IHoI HMTLULMMH}R1A;E1dH%(H;EuLH([A\A]A^A_]RUHAWAVSHIHHdH%(HEHEHuIHHt1XPPIOHH0A@ WAIGAOH8IO HH@HHHIW(HPHIw0HpPHtIw(HJIw HH@HMHHIH H IHHtH I6H{FHCLdH %(H;Mu H[A^A_]QUHAWAVAUATSH(HuHHGHH HzAAHEzLEBLEH}ILHH}HuI@ Mf!AAMEf0MEn(HEHL9rLALHR1ɅAE<tLHuIHR1L;m1҅EʀuIvIFHLq IHuHEL0HH([A\A]A^A_]UHAWAVATSH IIHdH%(HEHEHEHUHML0Mt1Iǿ@NIA$AF LH(AD$uIID$HGA$IT$It$bHEWAIFM7HHLHtHI7H{#HCdH%(H;EuLH [A\A^A_]OUHHHwH9t EDP E9s"LIH9tnMtDLɐIHIHuNHHHIfff.LMMuHffff.LQI9 LtAA9J sMtGLIL]H>HuHHH]H8HHtH 9rvHpHxH]HHH]Ifff.IxL9IuH9tD;O s Ht>H:H]HHufHHHrHȋI A9raHpHHHH]UHAWAVATSDE~Aw H3ExKvHHD)HA1HD#9v [A\A^A_]pUAu[A\A^A_]D;u 9vExDH@HHD)HA1xHD#9vUHH=1NUHAWAVSPHLwMu:H;HHHtb[A^A_]Jfff.LJMMtM>I~X)I~@IvH7AF(t I~8JAFtI~ J[A^A_]UHHթH]\TUHSPHHH8THH[]:JUHH]UHAWAVAUATSPHLw0D8Mt7Ifff.ItK|>KD>HtHPLs0HC@I9tLN{$tBDs Mt9E1fDIM9t&HCJI~IFHtHPH;H[A\A]A^A_]PMUHAWAVATSHHAdH%(HEH`..RH;-DLLPL1pLL114%fDžWfDž .ƅ"Dž8H@P`pHHHƅƅ8ƅ@fDžH.ƅJ`pEEEEEHEIcH)0HDž@L0LH$LL9+H0HtKL8HI9u.fff.II9tAFtI~'GH0H8GLPL2L1>QL*dH%(H;EuHİ[A\A^A_]HUHAWAVSPHpX@(t H]FH{04Ct H{(EFL3ML{LM9uIM9AGtIFHEXOHhE@CHPE(7H8E+H E"H;LsH[A^A_]iEH[A^A_]UHAWAVAUATSHH}HH H9IIHHIHDIHMHHAHHAM9LmdDHHHIIILDILDIID$IM,$LmI\$LHuH#HAIIM9t1M7LFIHs7HLuHsA$Mt$HuHEL`H[A\A]A^A_]LavH}UHAWAVSPHt[t=LHǃMtI~qLCt HCxt HCLPMt7LXLM9t@ILM9uHPLXlCH8H@yH H(fCpt HBLsXML{`LM9uIM9AGtIBHB\H|BPHcBDHJB8H1B,HBCp#H{XLs`AH{ L;MtBLsLM9t#ffffff.IƨL4M9uH;L{H[A^A_]AH[A^A_]UHSPHG@u7C(u@Ct H{ lAH[HtHHCHt'H[]H{PAAC(tH{82ACuHHPHH[]HUHHHOtHobjectivHfAec A+fc+]UHAWAVAUATSHIIIHdH%(HEW)HDžHJ`MLLLL@WEEEEp`P@L(HDž0HDž8)HH(H1 JHHHHIHH5"HߺILchM$@MLLLLILLLIANHHDHII$@QHHH9tNHPtHH1?HI1HttH@H HHL(L1BLHDžHDž HC8u_LLLIJ7I9WsIwLYDIML@tdLI?LL?IPHHtPLIIL@s\C6LMLLIW)HDž11E1L@tLHHMIILDILI`=LIHILLLL@C7DHHH D@ALLEDLEHDžL9s$HH(LBH1MtHLL=HLHAt H<LL16GLM9LMwM9s$M9os7IGIwLLTB9MteHI?L?QMtHHH8L?E1HI9LtHHLM7H)L<MoHC8uHHIHHPH; HHHtAI[@LDH11W)$D$(HHD$ HHD$D$PD$HD$@HD$8HD$DHE1EHH \L)LMt2LIuHIs2C6LMukwHW)HDžiLHHLMIILDIL]:IHILLLLLL=C4L@HHEHHEH5U"HMALt H9HHH(H9t H9>HH(L9t H">HdH %(H;MuFLvMHOHtNDD 1q!L 8M9t4AItDA!IIJ<N M9t HHHHHH9t HGH1dH %(H;MH[A\A^A_]IHfE1H]HIHƺE1A9H9H5!2HH5<"HH5fTH}H9A<$F|Mt$L6fff.UHAVSHIH[?1҃FHMIANHtN 1y!AANI9t4AItDA!IINNI9t IIIHHI9t A@1[A^]ÐUHAWAVAUATSH8dH%(HEHIH LuCtmH DD 1!‰L<1L1M9t[1I II ID!IIN<NM9tAE1HUHLIIwA02HKS HRHDLTP HIOH IOL{D(WEEEL=ELuHES HC;S$ LeK H IHA$AL$AT$ T LC ƃ(AFUw0H xHcHHL u0HLwt!A(HL>vuE1䀻(unAF~&uL<rPHŃw,ۃL<HuLl<ƃ(LuKthHDD 1!ΉH:L :M9tRA1III HDDA!IIJ N M9t1HUHHUH'L;Ht@ȃOD(C ȉC tIH@HHDdH%(H;E0DH8[A\A]A^A_]HLnzTHLcbHL HL1HLLlHLQzTHL4b&Ht[I!@HL%MIt5AG~tȃwIHt@ tAAFtjL%L AFu E1M9uNL%DpIL0M9t6Ifff.It$H蓞ADŽtID$M9IuE1D[A\A^A_]fffff.UHAWAVATSIHH~(v%II~(j%IN(IL$M9t)f.I7HHIM9uL %tsL %HtfH$HtYIfDHLteMIt5AG~tȃwIHt@ tAAFtiLP$L AFuE1M9uME1HL/$DpIL0M9t0Iff.It$H#ADŽt ID$M9IuD[A\A^A_]UHAWAVAUATSPIHL~(E'Mt.IE1fKt/HS~IM9uL#ttL#HtgH#HtZI HLeteMIt5AG~tȃwIHt@ tAAFtiL#L AFuE1M9uME1HL"DpIL0M9t0Iff.It$HӛADŽt ID$M9IuDH[A\A]A^A_]ffff.UHAWAVAUATSPIHH~("II~("Mf(AL$L,M9t(DI7HH2IM9uMf(E<$Mt2IE1ffff.Kt,H|IM9uL "tyL"HtlH!Ht_I HLt?MIt:AG~tȃwIHt@ tE1nAAFtdLK!L AFu E1M9uHL/!DpIL0M9t0Iff.It$H#ADŽt ID$M9IuDH[A\A]A^A_]ffff.UHAWAVAUATSPIHHVXHtH2HH^E~hMt,Mf`IE1K4,HtWIM9uILPHtILP@HHHCt&AAFtL- L AFuE1M9u/E1DH[A\A]A^A_]LDpIL0M9tIIt$HADŽtID$M9Iuffff.UHAWAVATSIHH藮L9u LjLBLLI~0yHtfI,ffffff.HLUMIt5AG~tȃwIHt@ tAAFtjLL AFu E1M9uNLDpIL0M9t6Ifff.It$H×ADŽtID$M9IuE1D[A\A^A_]fffff.UHAWAVAUATSPIHL~pMt1EgMt(IE1Kt/H#6IM9uLzHtH0HHH3\LCL9u LLLL޵I~0HtcI$ff.HLt?MIt:AG~tȃwIHt@ tE1nAAFtdLKL AFu E1M9uHL/DpIL0M9t0Iff.It$H#ADŽt ID$M9IuDH[A\A]A^A_]ffff.UHAWAVATSIHHtlI~0Ht^I$HLMIt5AG~tȃwIHt@ tAAFtjL<L AFu E1M9uNL DpIL0M9t6Ifff.It$HADŽtID$M9IuE1D[A\A^A_]fffff.UHAWAVATSIHHtlI~0Ht^I$HLMIt5AG~tȃwIHt@ tAAFtjL,L AFu E1M9uNLDpIL0M9t6Ifff.It$HADŽtID$M9IuE1D[A\A^A_]fffff.UHAWAVAUATSPIHL~`Mt1EgMt(IE1Kt/Hc2IM9uI~0iHtbI(ffffff.HLEteMIt5AG~tȃwIHt@ tAAFtiLL AFuE1M9uME1HLDpIL0M9t0Iff.It$H賒ADŽt ID$M9IuDH[A\A]A^A_]ffff.UHAWAVATSIHHtlI~0QHt^I$HL5MIt5AG~tȃwIHt@ tAAFtjLL AFu E1M9uNLDpIL0M9t6Ifff.It$H裑ADŽtID$M9IuE1D[A\A^A_]fffff.UHAWAVATSIHHtlI~0AHt^I$HL%MIt5AG~tȃwIHt@ tAAFtjLL AFu E1M9uNLDpIL0M9t6Ifff.It$H蓐ADŽtID$M9IuE1D[A\A^A_]fffff.UHAVSIHHvXH'}t(Iv`HtHH}tIv0H[A^]n1[A^]@UHAWAVAUATSPIHL~8E'Mt*IE1fKt/Hcqt#IM9uIv0HH[A\A]A^A_]|n1H[A\A]A^A_]ff.UHAWAVATSH@IHdH%(HEHv@IVHRIF(ANIVPHEMHUHT$E$H=SL-L-HHHt{IfHLt?MIt[AG~tȃwIHt@ tE1dH%(H;EDH@[A\A^A_]AAFtLJL AFu E1M9uL.DpIL0M9tIf.It$H#ADŽyID$M9Iug UHAWAVATSIHHtuLHthHHt[I!@HLMIt5AG~tȃwIHt@ tAAFtjL<L AFu E1M9uNL DpIL0M9t6Ifff.It$HADŽtID$M9IuE1D[A\A^A_]fffff.UHAWAVATSIHHtuLHthHHt[I!@HLMIt5AG~tȃwIHt@ tAAFtjLL AFu E1M9uNLDpIL0M9t6Ifff.It$HADŽtID$M9IuE1D[A\A^A_]fffff.UHAWAVATSIHHtuLHthH~Ht[I!@HLeMIt5AG~tȃwIHt@ tAAFtjLL AFu E1M9uNLDpIL0M9t6Ifff.It$HӊADŽtID$M9IuE1D[A\A^A_]fffff.UHAWAVATSIHHtuLHthH^Ht[I!@HLEMIt5AG~tȃwIHt@ tAAFtjLL AFu E1M9uNLDpIL0M9t6Ifff.It$H賉ADŽtID$M9IuE1D[A\A^A_]fffff.UHAWAVATSIHHvHHCvt}LgtvLkHtiH.Ht\I@HLt?MIt:AG~tȃwIHt@ tE1nAAFtdLL AFu E1M9uHLDpIL0M9t0Iff.It$H胈ADŽt ID$M9IuD[A\A^A_]UHAWAVATSIH t&AAGtLL AGuE1M9u)E1D[A\A^A_]LDxIL8M9tIDIt$HAƄtID$M9Iuffff.UHAWAVATSIHjt&AAGtLtL AGuE1M9u)E1D[A\A^A_]LIDxIL8M9tIDIt$HCAƄtID$M9Iuffff.UHAWAVATSIHʒt&AAGtL L AGuE1M9u)E1D[A\A^A_]L DxIL8M9tIDIt$H裆AƄtID$M9Iuffff.UHAWAVATSIH*t&AAGtL4 L AGuE1M9u)E1D[A\A^A_]L DxIL8M9tIDIt$HAƄtID$M9Iuffff.UHAWAVATSIH芑t&AAGtL L AGuE1M9u)E1D[A\A^A_]Li DxIL8M9tIDIt$HcAƄtID$M9Iuffff.UHAWAVATSIHt&AAGtL L AGuE1M9u)E1D[A\A^A_]L DxIL8M9tIDIt$HÄAƄtID$M9Iuffff.UHAWAVATSIH誔AF(tL HHH@qtzLd tsLh HtfH+ HtYIHLt?MIt:AG~tȃwIHt@ tE1nAAFtdL L AFu E1M9uHL DpIL0M9t0Iff.It$H胃ADŽt ID$M9IuD[A\A^A_]UHAWAVATSIHHFxHtHHtH2HH?HHL<AF(tL@ HHHoL L HtzH HtmI$ff.HLt?MItDAG~tȃwIHt@ tE1D[A\A^A_]AAFtL! L AFu E1M9uL DpIL0M9tIIt$HADŽtID$M9Iuffff.UHAWAVAUATSPIHLMtlEg At5AE1fff.Kt/H)IM9uEg A@t!AKt'HtHHyL>HtlHHt_I HLt?MIt:AG~tȃwIHt@ tE1nAAFtdL{L AFu E1M9uHL_DpIL0M9t0Iff.It$HSaADŽt ID$M9IuD[A\A^A_]UHAWAVATSIH~`t)IFPHHtHH2HH&LtrLHteHHtXIDHL襨teMIt5AG~tȃwIHt@ tAAFtiL@L AFuE1M9uME1HLDpIL0M9t0Iff.It$H`ADŽt ID$M9IuD[A\A^A_]UHAWAVATSH IHHv0HtHH>AF<t IvPHHHLtIVHHr7ЃHt,HuHRH2HHn$3L^t|LbHtoH%HtbI(ff.HLMIt5AG~tȃwIHt@ tAAFLL AFE1M9|M~XI7IWH!tbIG(HD$AG$H{"tDM@MEgMIfHLUItI IuE1DH [A\A^A_]LDpIL0M9tI@It$H]ADŽtID$M9Iuffff.UHAWAVAUATSPIHHv0蓥yIFHHuoM~HIHr<H1҃HDHBHHIDH0Hu HvH5HHt8HHHE8HHEHFLfHLHIt$H@Hu&H>G 9FtFHHuLHuHvHEHuHuLXzHuHuH蚠uE1dH%(H;EuWDH8[A\A]A^A_]LZDpIL0M9tIfDIt$HSXADŽtID$M9Iu(UHAWAVAUATSPIHHv0M~8MtnEg At7AE1fffff.Kt/HßIM9uEg A@t!AKt'HtHH|DLt{LHtnHcHtaI"fHLEt?MIt:AG~tȃwIHt@ tE1nAAFtdLL AFu E1M9uHLDpIL0M9t0Iff.It$HVADŽt ID$M9IuDH[A\A]A^A_]ffff.UHAWAVAUATSH8IHdH%(HEL~8MtoEg At8AE1ffffff.Kt/HfIM9uEg A@t!AKt'HtHHB/Iv0HȝILP L9LttLHtgHHtZI HLuMIt5AG~tȃwIHt@ tAAF~LL AFE1M9WLLhLHHHHHMI9L}IL;mIEH@HHppLfhAujAII$x>HtDIHuLHE\zHuIHEI$AD$It$IIILfhAIu!I<$G A9D$tAD$HMd$HELeLe$fHL՛t,LIvLeMLtHr̃vE1dH%(H;EuVDH8[A\A]A^A_]LiDpIL0M9tIDIt$HcSADŽtID$M9Iu8UHAWAVAUATSH(IHdH%(HEL~8MtoEg At8AE1ffffff.Kt/HӚIM9uEg A@t!AKt'HtHH?jIv0H舚VILP L9LttLHtgHMHtZI HL5MIt5AG~tȃwIHt@ tAAFLL AFE1M9LL`LuDhILhM9"L}f.IM9I<$uHEHEHEHtHLtHuHtuH.uE1dH%(H;Eu[DH([A\A]A^A_]LDpIL0M9tIf.It$HPADŽtID$M9IuUHAWAVAUATSPIHL~8MtoEg At8AE1ffffff.Kt/HcIM9uEg A@t!AKt'HtHH=Iv@HH=L%L%HtsHHtfI'fffff.HLŗt?MIt:AG~tȃwIHt@ tE1nAAFtdL[L AFu E1M9uHL?DpIL0M9t0Iff.It$H3OADŽt ID$M9IuDH[A\A]A^A_]ffff.UHAWAVAUATSPIHL~8MtoEg At8AE1ffffff.Kt/H裖IM9uEg A@t!AKt'HtHH\;L|tvLHtiHCHt\I"fHL%teMIt5AG~tȃwIHt@ tAAFtiLL AFuE1M9uME1HLDpIL0M9t0Iff.It$HMADŽt ID$M9IuDH[A\A]A^A_]ffff.UHAWAVATSIHHV@Ht9H2HHGt6AAGt/LL AGu)E1M9u9Iw8H+uE1D[A\A^A_]LDxIL8M9tIfIt$HLAƄtID$M9Iuffff.UHAWAVATSIHHtuLHthH^Ht[I!@HLEMIt5AG~tȃwIHt@ tAAFtjLL AFu E1M9uNLDpIL0M9t6Ifff.It$HKADŽtID$M9IuE1D[A\A^A_]fffff.UHAWAVATSIHHvHIWPt&AAGtL,L AGuE1M9u)E1D[A\A^A_]LDxIL8M9tIffff.It$HJAƄtID$M9Iuffff.UHAWAVATSIHHtuLHthHHt[I!@HLuMIt5AG~tȃwIHt@ tAAFtjL L AFu E1M9uNLDpIL0M9t6Ifff.It$HIADŽtID$M9IuE1D[A\A^A_]fffff.UHAWAVATSIHHV@H2HtyLtrLHteHZHtXIHLEt?MIt:AG~tȃwIHt@ tE1nAAFtdLL AFu E1M9uHLDpIL0M9t0Iff.It$HHADŽt ID$M9IuD[A\A^A_]UHAWAVATSH@IHdH%(HEHv@IVH IF(ANIVPHEMHUHT$E$H= L-L-HHHt{IfHLՏt?MIt[AG~tȃwIHt@ tE1dH%(H;EDH@[A\A^A_]AAFtLJL AFu E1M9uL.DpIL0M9tIf.It$H#GADŽyID$M9IugUHAWAVATSIHHv(H3t}LtvLHtiHHt\I@HL蕎t?MIt:AG~tȃwIHt@ tE1nAAFtdL+L AFu E1M9uHLDpIL0M9t0Iff.It$HFADŽt ID$M9IuD[A\A^A_]UHAWAVATSIHHtuLHthHHt[I!@HL腍MIt5AG~tȃwIHt@ tAAFtjLL AFu E1M9uNLDpIL0M9t6Ifff.It$HDADŽtID$M9IuE1D[A\A^A_]fffff.UHAWAVATSIHF$Ht4HL$@M~(ff.HL-IIuL{tuLHthHBHt[I!HL%teMIt5AG~tȃwIHt@ tAAFtiLL AFuE1M9uME1HLDpIL0M9t0Iff.It$HCADŽt ID$M9IuD[A\A^A_]UHAWAVAUATSHIHHF0HH@@tE1IEHHuH2HH$AF$HEHt_E1LufDIL;eLutCIF(N,Eu AtAE1Kt=H裊IM9uLLHtvHkHtiIHLUt?MItJAG~tȃwIHt@ tE1DH[A\A]A^A_]AAFtLL AFuE1LDpIL0M9tIIt$HAADŽtID$M9Iuffff.UHAWAVATSIHHF(HH@@tE1IEHHRupH2HHZIF(HHHHxtHHHH yu+HHt@u1Hp0HLtpLHtcHHtVIHL襈teMIt5AG~tȃwIHt@ tAAFtL@L AFuE1M9u)E1D[A\A^A_]LDpIL0M9tIIt$H@ADŽtID$M9Iuffff.UHAWAVATSIHHv(H,t}LtvLHtiHHt\I@HL腇t?MIt:AG~tȃwIHt@ tE1nAAFtdLL AFu E1M9uHLDpIL0M9t0Iff.It$H>ADŽt ID$M9IuD[A\A^A_]UHAWAVATSIHHtuLHthHHt[I!@HLuMIt5AG~tȃwIHt@ tAAFtjL L AFu E1M9uNLDpIL0M9t6Ifff.It$H=ADŽtID$M9IuE1D[A\A^A_]fffff.UHAWAVATSIHHtuLHthHnHt[I!@HLUMIt5AG~tȃwIHt@ tAAFtjLL AFu E1M9uNLDpIL0M9t6Ifff.It$H<ADŽtID$M9IuE1D[A\A^A_]fffff.UHAWAVSHdH%(HEHuHUسHtMIIHvHt Lt/L#w$grH}LH0u1dH%(H;Eu H[A^A_]fUHSHdH%(HEHEʃuHH2Bփrs-HEHPHuoH2Hu_auXu H8tbH1H}YHEHEH}XHuH}ZHtHpHH%t1dH %(H;MuH[]HHpUHAWAVAUATSH(dH%(HEAHIIHL-efDAt:II>HIILH tIL1HFffffff.I@H7w9IcDLMM7Ifff.D1I)M6MzdH%(H;EDH([A\A]A^A_]Iv IvdH%(H;EHdH%(H;EHLLH([A\A]A^A_]dH%(H;E|HLH([A\A]A^A_]b dH%(H;EPHLH([A\A]A^A_]& dH%(H;E$HLH([A\A]A^A_]dH%(H;EHLLH([A\A]A^A_]dH%(H;EHLLH([A\A]A^A_],dH%(H;EHLLH([A\A]A^A_]MdH%(H;EkHLLH([A\A]A^A_]dH%(H;E<HLLH([A\A]A^A_]dH%(H;E HLLH([A\A]A^A_]dH%(H;EHLLH([A\A]A^A_] dH%(H;EHLLH([A\A]A^A_] dH%(H;EHLLH([A\A]A^A_]dH%(H;EQHLLH([A\A]A^A_]dH%(H;E"HLLH([A\A]A^A_]dH%(H;EHLLH([A\A]A^A_]dH%(H;EHLH([A\A]A^A_]Z dH%(H;EHLLH([A\A]A^A_] dH%(H;EiHLLH([A\A]A^A_]dH%(H;E:HLLH([A\A]A^A_]dH%(H;E HLLH([A\A]A^A_]^dH%(H;EHLLH([A\A]A^A_]dH%(H;EHLLH([A\A]A^A_]dH%(H;E~HLLH([A\A]A^A_]L}LL貽HLA5dH%(H;E-HLLH([A\A]A^A_] dH%(H;EHLLH([A\A]A^A_] dH%(H;EHLLH([A\A]A^A_]RdH%(H;EHLLH([A\A]A^A_]dH%(H;EqHLLH([A\A]A^A_]dH%(H;EBHLH([A\A]A^A_]8L萼dH %(H;MHHHH([A\A]A^A_] dH%(H;EHLLH([A\A]A^A_]b dH%(H;EHLLH([A\A]A^A_]sIv IWdH%(H;Eu}HH([A\A]A^A_]IF0INAFHDH0dH%(H;EuAHH([A\A]A^A_]dH%(H;EuHLLH([A\A]A^A_] f.UHAWAVSPIIH^ H$I 7HHH t H1H1H)LHHH[A^A_]UHAWAVSPIIH^ HĺI 7HHH t H1H1H)LHHH[A^A_]gUHAWAVSPIHL~ LdI 6HHH t H1H1H)HLHtIvHHH[A^A_]1H[A^A_]fDUHAWAVSPIHL~ LI 6HHH t H1H1H)HLHtIvHHH[A^A_]C1H[A^A_]fDUHAWAVSPIHL~ LdI 6HHH t H1H1H)HLHtIvHHH[A^A_]1H[A^A_]fDUHAWAVSPIHL~ LI 6HHH t H1H1H)HLHtIvHHH[A^A_]C1H[A^A_]fDUHAWAVSPIIH^ HdI 7H HH t H1H1H)LHHH[A^A_]fDUHAWAVSPIIH^ HI 7HHH t H1H1H)LHHH[A^A_]fDUHAWAVSPHILv L褷HHH t H1H1H)LLHH[A^A_]Kff.UHAWAVSPIIH^ HDI 7HHH t H1H1H)LHHH[A^A_]fDUHAWAVATSH IIHHv tAI|$(twMI7IWHt]IG(HD$AG$Ht?M@Mt9EgMt0IIfHLeAIEt I uE1DH [A\A^A_]fUHAVSHIHdH%(HEHF(HEH}HuH}HtHpHH^t#IvdH%(H;Eu*HH[A^]g dH%(H;Eu 1H[A^]hUHAVSIHHv H7tIv(H[A^] 1[A^]fff.UHAVSIHHv HtHHtIv(H[A^] 1[A^]@UHAWAVAUATSH(HIIdH%(HEHRHtIv Ht Lt[EnMtNI0IE1JD INHMA)EHELHutIL$IM9Iu1dH %(H;MuH([A\A]A^A_]ff.UHAVSIHHv(HtHHtIv H[A^] 1[A^]@UHAWAVATSIIHHv HtIWH'tKMt$(A#D$=0uI|$ tWAL跳MJ >HHH tH1H1[A\A^A_]É1H)HLH[A\A^A_]NE1fUHAWAVATSIIH^FHu LfH1IHAtLfI#AIHMJ >HHH t H1H1H)LHH[A\A^A_]ffffff.UHAWAVAUATSPIIHLfFHu MnH1IHAtMnI#AIL'MJ .HHH t H1H1H)HLHA~tzIFH&ftmDE1,fff.HxquDffffff.KtHuIFH&I9sKt0HIM9uIFHH&I H0HH<HHHH;r 1IH LAIE1f.JTH2HLtIL$M9Iu1H[A\A]A^A_]@UHAWAVSPIIH^ H贬I 7HHH t H1H1H)LHHH[A^A_]VfDUHAVSIHHv(tIv0HH[A^]1[A^]fff.UHAWAVSPIIH^ HI 7HHH t H1H1H)LHHH[A^A_]fDUHAWAVSPIIH^ H贫I 7HHH t H1H1H)LHHH[A^A_]VfDUHAWAVSPIIH^ HTI 7HHH t H1H1H)LHHH[A^A_]fDUHAWAVSPIIH^ HI 7HHH t H1H1H)LHHH[A^A_]fDUHAWAVSPIIH^ H蔪I 7HHH t H1H1H)LHHH[A^A_]6fDUHAWAVAUATSH8HUIIdH%(HEHF HEH}HuH}HtHpHLd t^EnMtYI0I1Lef.HEHDINHMA)EHELL tHKII9Hu1dH %(H;MuH8[A\A]A^A_])fUHSPHs H[]HPLffff.H4HrHH6VH6w HcHF0HDHvHKH[]H[]DH[]yH[]H[]HHȨHHHH[]D H[]H[]H[]H[]8H[]= H[]uH[]H[]H[]H[])Hv H[] H[]H[]H[]H[]H[]sUHAVSIHHv jt#AFtIv@HtHH[ t1[A^]fff.UHAVSIHHv tIv(HtHH t1[A^]UHAVSIHHv tIv(HH[A^] 1[A^]fff.UHAWAVSPHIHvtDH{(t=KHt5HLHH=@HcHu)HNHHHtHQHIHLtFHvLuL踜L`HuH}bHtHpHHt1HVHH2dH%(H;EHHH[A^A_]HvdH%(H;EHHHH[A^A_]NHt8LvHIL/H8tH0Hs Ԉ݈HPL蜈P)0H`H@LhLpH0H8HL1HL1H LHu0 H0H0H8HL1HL1H t H0H0HHVGH8tH0Hs AEHHtxI] HHE1L҇IML;LL·tI6HHFHLHIHL9uL蒇HHeIU H2HHHfHPLP)0H`H@LhLpH0H8HL1HL1H Lu0}H0H0H8HL1HL1H Rt H0qH0HHH8tH0Hs IRAEMu @%IH2HH{IUH2HHHb/HPL軅P)0H`H@LhLpH0H8HL1HL1H Lu0FH0H0H8HL1HL1H t H0:H0HH|mH8tH0Hs HPLڄP)0H`H@LhLpH0H8HL1HL1H LHu0^H0H0H8HL1HL1H 3t H0RH0HHH8tH0Hs *3HPLP)0H`H@LhLpH0H8HL1HL1H LHu0vH0H0H8HL1HL1H Kt H0jH0HHH8tH0Hs BKHPL P)0H`H@LhLpH0H8HL1HL1H LHu0H0H0H8HL1HL1H ct H0肂H0HHH8tH0Hs ZcHPL"P)0H`H@LhLpH0H8HL1HL1H LHu0H0H0H8HL1HL1H {t H0蚁H0HHH8tH0Hs r{HPL:P)0H`H@LhLpH0H8HL1HL1H LHu0H0H0H8HL1HL1H t H0貀H0HHH8tH0Hs 芀蓀HPLRP)0H`H@LhLpH0H8HL1HL1H LHu0H0H0H8HL1HL1H t H0H0HH H8tH0Hs I]Ht1D3Mt)IE1Jt;H`IM9uHPL0P)0H`H@LhLpH0H8HL1HL1H LHu0H0H0H8HL1HL1H t H0~H0HHH8tH0Hs ~~HPLH~P)0H`H@LhLpH0H8HL1HL1H LHu0H0H0H8HL1HL1H t H0}H0HHH8tH0Hs }}HPL`}P)0H`H@LhLpH0H8HL1HL1H LHu0H0H0H8HL1HL1H t H0|H0HH H8tH0Hs ||HPLx|P)0H`H@LhLpH0H8HL1HL1H LHu0H0H0H8HL1HL1H t H0{H0HH2#H8tH0Hs {{IUH2HHHHPLn{P)0H`H@LhLpH0H8HL1HL1H Lu0H0H0H8HL1HL1H t H0zH0HH/ H8tH0Hs zzHPLzP)0H`H@LhLpH0H8HL1HL1H LHu0H0H0H8HL1HL1H t H0zH0HHG8H8tH0Hs yyI]Ht1D3Mt)IE1Jt;HIM9uHPLkyP)0H`H@LhLpH0H8HL1HL1H LHu0H0H0H8HL1HL1H t H0xH0HH%H8tH0Hs xxI]Ht1D3Mt)IE1Jt;HyIM9uHPLIxP)0H`H@LhLpH0H8HL1HL1H LHu0H0H0H8HL1HL1H t H0wH0HHH8tH0Hs wwHPLawP)0H`H@LhLpH0H8HL1HL1H LHu0H0H0H8HL1HL1H t H0vH0HH H8tH0Hs vvHPLyvP)0H`H@LhLpH0H8HL1HL1H LHu0H0H0H8HL1HL1H t H0uH0HH3$H8tH0Hs uuIu(IU0H|AMLtN1AML|@t5LH8tH0Hs llIUH2HHH0HPLlP)0H`H@LhLpH0H8HL1HL1H Lu0H0H0H8HL1HL1H t H0lH0HHJ;H8tH0Hs kkIuIUHH萦IE0HD$AE $HLAEt4A}8t-A]DHt$M}HHLI HuHPLkP)0H`H@LhLpH0H8HL1HL1H LHu0H0H0H8HL1HL1H vt H0jH0HHH8tH0Hs mjvjI]Ht1D3Mt)IE1Jt;H+xIM9uHPLiP)0H`H@LhLpH0H8HL1HL1H LHu0H0H0H8HL1HL1H Tt H0siH0HHH8tH0Hs KiTiIUH2HHH蘥eHPLhP)0H`H@LhLpH0H8HL1HL1H Lu0|H0H0H8HL1HL1H Qt H0phH0HHH8tH0Hs HhQhHPLhP)0H`H@LhLpH0H8HL1HL1H LHu0锽H0H0H8HL1HL1H it H0gH0HHH8tH0Hs `gigI]Ht1D3Mt)IE1Jt;HkIM9uHPLfP)0H`H@LhLpH0H8HL1HL1H LHu0rH0H0H8HL1HL1H Gt H0ffH0HHH8tH0Hs >fGfHPLfP)0H`H@LhLpH0H8HL1HL1H LHu0銻H0H0H8HL1HL1H _t H0~eH0HHH8tH0Hs Ve_eHPLeP)0H`H@LhLpH0H8HL1HL1H LHu0颺H0H0H8HL1HL1H wt H0dH0HHɺH8tH0Hs ndwdI]Ht1D3Mt)IE1Jt;H,yIM9uHPLcP)0H`H@LhLpH0H8HL1HL1H LHu0逹H0H0H8HL1HL1H Ut H0tcH0HHH8tH0Hs LcUcHPLcP)0H`H@LhLpH0H8HL1HL1H LHu0阸H0H0H8HL1HL1H mt H0bH0HHH8tH0Hs dbmbI]Ht1D3Mt)IE1Jt;H"oIM9uHPLaP)0H`H@LhLpH0H8HL1HL1H LHu0vH0H0H8HL1HL1H Kt H0jaH0HHH8tH0Hs BaKaHPL aP)0H`H@LhLpH0H8HL1HL1H LHu0鎶H0H0H8HL1HL1H ct H0`H0HHH8tH0Hs Z`c`HPL"`P)0H`H@LhLpH0H8HL1HL1H LHu0馵H0H0H8HL1HL1H {t H0_H0HH͵H8tH0Hs r_{_HPL:_P)0H`H@LhLpH0H8HL1HL1H LHu0龴H0H0H8HL1HL1H t H0^H0HHH8tH0Hs ^^IUHH2HHHӚHPL,^P)0H`H@LhLpH0H8HL1HL1H Lu0鷳H0H0H8HL1HL1H t H0]H0HH޳H8tH0Hs ]]HPLK]P)0H`H@LhLpH0H8HL1HL1H LHu0ϲH0H0H8HL1HL1H t H0\H0HHH8tH0Hs \\IUH2HHHHPLA\P)0H`H@LhLpH0H8HL1HL1H Lu0̱H0H0H8HL1HL1H t H0[H0HHH8tH0Hs [[HPL`[P)0H`H@LhLpH0H8HL1HL1H LHu0H0H0H8HL1HL1H t H0ZH0HH H8tH0Hs ZZIUHtHH2HHHPLMZP)0H`H@LhLpH0H8HL1HL1H Lu0دH0H0H8HL1HL1H t H0YH0HHH8tH0Hs YYHPLlYP)0H`H@LhLpH0H8HL1HL1H LHu0H0H0H8HL1HL1H Ůt H0XH0HH&H8tH0Hs XXIEHHtHpHHԼѮHPL]XP)0H`H@LhLpH0H8HL1HL1H Lu0H0H0H8HL1HL1H t H0WH0HHH8tH0Hs WWI]Ht1D3Mt)IE1Jt;HrIM9uHPLBWP)0H`H@LhLpH0H8HL1HL1H LHu0ƬH0H0H8HL1HL1H t H0VH0HHH8tH0Hs VVI]Ht1D3Mt)IE1Jt;HPIM9uHPL VP)0H`H@LhLpH0H8HL1HL1H LHu0餫H0H0H8HL1HL1H yt H0UH0HHڹ˫H8tH0Hs pUyUI]Ht1D3Mt)IE1Jt;H.{IM9uHPLTP)0H`H@LhLpH0H8HL1HL1H LHu0邪H0H0H8HL1HL1H Wt H0vTH0HH踸H8tH0Hs NTWTI]Ht1D3Mt)IE1Jt;H YIM9uHPLSP)0H`H@LhLpH0H8HL1HL1H LHu0`H0H0H8HL1HL1H 5t H0TSH0HH薷H8tH0Hs ,S5SI]Ht1D3Mt)IE1Jt;H7IM9uHPLRP)0H`H@LhLpH0H8HL1HL1H LHu0>H0H0H8HL1HL1H t H02RH0HHteH8tH0Hs  RRHPLQP)0H`H@LhLpH0H8HL1HL1H LHu0VH0H0H8HL1HL1H +t H0JQH0HH茵}H8tH0Hs "Q+QI]Ht1D3Mt)IE1Jt;H-IM9uHPLPP)0H`H@LhLpH0H8HL1HL1H LHu04H0H0H8HL1HL1H  t H0(PH0HHj[H8tH0Hs P PI]Ht1D3Mt)IE1Jt;H辨 IM9uHPLOP)0H`H@LhLpH0H8HL1HL1H LHu0H0H0H8HL1HL1H t H0OH0HHH9H8tH0Hs NNI]Ht1D3Mt)IE1Jt;H蜧IM9uHPLlNP)0H`H@LhLpH0H8HL1HL1H LHu0H0H0H8HL1HL1H ţt H0MH0HH&H8tH0Hs MMI]Ht1D3Mt)IE1Jt;HzǣIM9uHPLJMP)0H`H@LhLpH0H8HL1HL1H LHu0΢H0H0H8HL1HL1H t H0LH0HHH8tH0Hs LLI]Ht1D3Mt)IE1Jt;HXIM9uHPL(LP)0H`H@LhLpH0H8HL1HL1H LHu0鬡H0H0H8HL1HL1H t H0KH0HHӡH8tH0Hs xKKHPL@KP)0H`H@LhLpH0H8HL1HL1H LHu0ĠH0H0H8HL1HL1H t H0JH0HHH8tH0Hs JJHPLXJP)0H`H@LhLpH0H8HL1HL1H LHu0ܟH0H0H8HL1HL1H t H0IH0HHH8tH0Hs III]Ht1D3Mt)IE1Jt;HfIM9uHPL6IP)0H`H@LhLpH0H8HL1HL1H LHu0麞H0H0H8HL1HL1H t H0HH0HHH8tH0Hs HHI]Ht1D3Mt)IE1Jt;HDIM9uHPLHP)0H`H@LhLpH0H8HL1HL1H LHu0阝H0H0H8HL1HL1H mt H0GH0HHΫH8tH0Hs dGmGI]Ht1D3Mt)IE1Jt;H"oIM9uHPLFP)0H`H@LhLpH0H8HL1HL1H LHu0vH0H0H8HL1HL1H Kt H0jFH0HH謪H8tH0Hs BFKFIu IU(HH_IU0HLtH2HHk[IU@ЃHuH2HH=-HPLEP)0H`H@LhLpH0H8HL1HL1H Lu0!H0H0H8HL1HL1H t H0EH0HHWHH8tH0Hs DDHPLDP)0H`H@LhLpH0H8HL1HL1H LHu09H0H0H8HL1HL1H t H0-DH0HHo`H8tH0Hs DDHPLCP)0H`H@LhLpH0H8HL1HL1H LHu0QH0H0H8HL1HL1H &t H0ECH0HH臧xH8tH0Hs C&CI]Ht1D3Mt)IE1Jt;Hۛ(IM9uHPLBP)0H`H@LhLpH0H8HL1HL1H LHu0/H0H0H8HL1HL1H t H0#BH0HHeVH8tH0Hs ABI]Ht1D3Mt)IE1Jt;H蹚IM9uHPLAP)0H`H@LhLpH0H8HL1HL1H LHu0 H0H0H8HL1HL1H t H0AH0HHC4H8tH0Hs @@HPL@P)0H`H@LhLpH0H8HL1HL1H LHu0%H0H0H8HL1HL1H t H0@H0HH[LH8tH0Hs ??I]Ht1D3Mt)IE1Jt;H诘IM9uHPL?P)0H`H@LhLpH0H8HL1HL1H LHu0H0H0H8HL1HL1H ؔt H0>H0HH9*H8tH0Hs >>HPL>P)0H`H@LhLpH0H8HL1HL1H LHu0H0H0H8HL1HL1H t H0>H0HHQBH8tH0Hs ==I]Ht1D3Mt)IE1Jt;H襖IM9uHPLu=P)0H`H@LhLpH0H8HL1HL1H LHu0H0H0H8HL1HL1H Βt H0<H0HH/ H8tH0Hs <<HPL<P)0H`H@LhLpH0H8HL1HL1H LHu0H0H0H8HL1HL1H t H0<H0HHG8H8tH0Hs ;;Mu(DHuFIL<E1HL1<AM$PTHPHHwHPLI;P)0H`H@LhLpH0H8HL1HL1H Lu0ԐH0H0H8HL1HL1H t H0:H0HH H8tH0Hs ::I]Ht1D3Mt)IE1Jt;H^IM9uHPL.:P)0H`H@LhLpH0H8HL1HL1H LHu0鲏H0H0H8HL1HL1H t H09H0HHُH8tH0Hs ~99HPLF9P)0H`H@LhLpH0H8HL1HL1H LHu0ʎH0H0H8HL1HL1H t H08H0HHH8tH0Hs 88I]Ht1D3Mt)IE1Jt;HTIM9uHPL$8P)0H`H@LhLpH0H8HL1HL1H LHu0騍H0H0H8HL1HL1H }t H07H0HHޛύH8tH0Hs t7}7IEcI]IHHDLDzHPL7P)0H`H@LhLpH0H8HL1HL1H LHu0霌H0H0H8HL1HL1H qt H06H0HHҚÌH8tH0Hs h6q6I]Ht1D3Mt)IE1Jt;H&sIM9uHPL5P)0H`H@LhLpH0H8HL1HL1H LHu0zH0H0H8HL1HL1H Ot H0n5H0HH谙H8tH0Hs F5O5I]Ht1D3Mt)IE1Jt;HQIM9uHPL4P)0H`H@LhLpH0H8HL1HL1H LHu0XH0H0H8HL1HL1H -t H0L4H0HH莘H8tH0Hs $4-4HPL3P)0H`H@LhLpH0H8HL1HL1H LHu0pH0H0H8HL1HL1H Et H0d3H0HH覗H8tH0Hs <3E3I]Ht1D3Mt)IE1Jt;HGIM9uHPL2P)0H`H@LhLpH0H8HL1HL1H LHu0NH0H0H8HL1HL1H #t H0B2H0HH脖uH8tH0Hs 2#2HPL1P)0H`H@LhLpH0H8HL1HL1H LHu0fH0H0H8HL1HL1H ;t H0Z1H0HH蜕H8tH0Hs 21;1I]Ht1D3Mt)IE1Jt;H=IM9uHPL0P)0H`H@LhLpH0H8HL1HL1H LHu0DH0H0H8HL1HL1H t H080H0HHzkH8tH0Hs 00I]Ht1D3Mt)IE1Jt;HΈIM9uHPL/P)0H`H@LhLpH0H8HL1HL1H LHu0"H0H0H8HL1HL1H t H0/H0HHXIH8tH0Hs ..HPL.P)0H`H@LhLpH0H8HL1HL1H LHu0:H0H0H8HL1HL1H t H0..H0HHpaH8tH0Hs ..Iu(IU0HHh#HPL-P)0H`H@LhLpH0H8HL1HL1H Lu0:H0H0H8HL1HL1H t H0.-H0HHpaH8tH0Hs --HPL,P)0H`H@LhLpH0H8HL1HL1H LHu0RH0H0H8HL1HL1H 't H0F,H0HH舐yH8tH0Hs ,',HPL+P)0H`H@LhLpH0H8HL1HL1H LHu0jH0H0H8HL1HL1H ?t H0^+H0HH蠏H8tH0Hs 6+?+HPL*P)0H`H@LhLpH0H8HL1HL1H LHu0邀H0H0H8HL1HL1H Wt H0v*H0HH踎H8tH0Hs N*W*I]Ht1D3Mt)IE1Jt;H YIM9uHPL)P)0H`H@LhLpH0H8HL1HL1H LHu0`H0H0H8HL1HL1H 5t H0T)H0HH薍H8tH0Hs ,)5)HPL(P)0H`H@LhLpH0H8HL1HL1H LHu0x~H0H0H8HL1HL1H M~t H0l(H0HH讌~H8tH0Hs D(M(IE0HD$AE $HcZ~I]Ht1D3Mt)IE1Jt;H܀)~IM9uHPL'P)0H`H@LhLpH0H8HL1HL1H LHu00}H0H0H8HL1HL1H }t H0$'H0HHfW}H8tH0Hs &'HPL&P)0H`H@LhLpH0H8HL1HL1H LHu0H|H0H0H8HL1HL1H |t H0<&H0HH~o|H8tH0Hs &&IUH2HHHab.|HPL%P)0H`H@LhLpH0H8HL1HL1H Lu0E{H0H0H8HL1HL1H {t H09%H0HH{l{H8tH0Hs %%IUH2HHH^a+{HPL$P)0H`H@LhLpH0H8HL1HL1H Lu0BzH0H0H8HL1HL1H zt H06$H0HHxizH8tH0Hs $$HPL#P)0H`H@LhLpH0H8HL1HL1H LHu0ZyH0H0H8HL1HL1H /yt H0N#H0HH萇yH8tH0Hs &#/#L#HHHH@=yHPL"P)0H`H@LhLpH0H8HL1HL1H Lu0TxH0H0H8HL1HL1H )xt H0H"H0HH芆{xH8tH0Hs  ")"I]Ht1D3Mt)IE1Jt;Hz+xIM9uHPL!P)0H`H@LhLpH0H8HL1HL1H LHu02wH0H0H8HL1HL1H wt H0&!H0HHhYwH8tH0Hs  !IuHH2wHPL P)0H`H@LhLpH0H8HL1HL1H Lu06vH0H0H8HL1HL1H  vt H0* H0HHl]vH8tH0Hs   HPLP)0H`H@LhLpH0H8HL1HL1H LHu0NuH0H0H8HL1HL1H #ut H0BH0HH脃uuH8tH0Hs #HPLP)0H`H@LhLpH0H8HL1HL1H LHu0ftH0H0H8HL1HL1H ;tt H0ZH0HH蜂tH8tH0Hs 2;HPLP)0H`H@LhLpH0H8HL1HL1H LHu0~sH0H0H8HL1HL1H Sst H0rH0HH贁sH8tH0Hs JSIu0HHnksA]t1E1IE8AMDH4HH=:sAD9uA]Ht*E1IE8J4HH sIL9uA]Ht*E1IE@J4HH׀rIL9uHPLXP)0H`H@LhLpH0H8HL1HL1H LHjqHPLP)0H`H@LhLpH0H8HL1HL1H LHu0mqH0H0H8HL1HL1H Bqt H0aH0HHqH8tH0Hs 9BHPLP)0H`H@LhLpH0H8HL1HL1H LHu0pH0H0H8HL1HL1H Zpt H0yH0HH~pH8tH0Hs QZI]Ht1D3Mt)IE1Jt;Hs\pIM9uHPLP)0H`H@LhLpH0H8HL1HL1H LHu0coH0H0H8HL1HL1H 8ot H0WH0HH}oH8tH0Hs /8IuIUHHSLoIE0HD$AE $H]T*oHPLP)0H`H@LhLpH0H8HL1HL1H Lu0AnH0H0H8HL1HL1H nt H05H0HHw|hnH8tH0Hs  HPLP)0H`H@LhLpH0H8HL1HL1H LHu0YmH0H0H8HL1HL1H .mt H0MH0HH{mH8tH0Hs %.I]Ht1D3Mt)IE1Jt;Ho0mIM9uHPLP)0H`H@LhLpH0H8HL1HL1H LHu07lH0H0H8HL1HL1H  lt H0+H0HHmz^lH8tH0Hs  I]Ht1D3Mt)IE1Jt;HnlIM9uHPLP)0H`H@LhLpH0H8HL1HL1H LHu0kH0H0H8HL1HL1H jt H0 H0HHKyH0H0H8HL1HL1H >t H07H0HHyLj>H8tH0Hs HPLP)0H`H@LhLpH0H8HL1HL1H LHu0[=H0H0H8HL1HL1H 0=t H0OH0HHK=H8tH0Hs '0I]Ht1D3Mt)IE1Jt;H?2=IM9uHPLP)0H`H@LhLpH0H8HL1HL1H LHu09<H0H0H8HL1HL1H <t H0-H0HHoJ`<H8tH0Hs I]Ht1D3Mt)IE1Jt;H><IM9uHPLP)0H`H@LhLpH0H8HL1HL1H LHu0;H0H0H8HL1HL1H :t H0 H0HHMI>;H8tH0Hs HPLP)0H`H@LhLpH0H8HL1HL1H LHu0/:H0H0H8HL1HL1H :t H0#H0HHeHV:H8tH0Hs HPLP)0H`H@LhLpH0H8HL1HL1H LHu0G9H0H0H8HL1HL1H 9t H0;H0HH}Gn9H8tH0Hs HPLP)0H`H@LhLpH0H8HL1HL1H LHu0_8H0H0H8HL1HL1H 48t H0SH0HHF8H8tH0Hs +4I]Ht1D3Mt)IE1Jt;H:68IM9uHPLP)0H`H@LhLpH0H8HL1HL1H LHu0=7H0H0H8HL1HL1H 7t H01H0HHsEd7H8tH0Hs  I]Ht1D3Mt)IE1Jt;H97IM9uHPLP)0H`H@LhLpH0H8HL1HL1H LHu06H0H0H8HL1HL1H 5t H0H0HHQDB6H8tH0Hs I]Ht1D3Mt)IE1Jt;H85IM9uHPLuP)0H`H@LhLpH0H8HL1HL1H LHu04H0H0H8HL1HL1H 4t H0H0HH/C 5H8tH0Hs IUHH2HHH4HPLgP)0H`H@LhLpH0H8HL1HL1H Lu03H0H0H8HL1HL1H 3t H0H0HH(B4H8tH0Hs I]Ht1D3Mt)IE1Jt;H|63IM9uHPLLP)0H`H@LhLpH0H8HL1HL1H LHu02H0H0H8HL1HL1H 2t H0H0HHA2H8tH0Hs I]Ht1D3Mt)IE1Jt;HZ52IM9uHPL*P)0H`H@LhLpH0H8HL1HL1H LHu01H0H0H8HL1HL1H 1t H0H0HH?1H8tH0Hs zHPLBP)0H`H@LhLpH0H8HL1HL1H LHu00H0H0H8HL1HL1H 0t H0H0HH>0H8tH0Hs I]Ht1D3Mt)IE1Jt;HP30IM9uHPL P)0H`H@LhLpH0H8HL1HL1H LHu0/H0H0H8HL1HL1H y/t H0H0HH=/H8tH0Hs pyHPL8P)0H`H@LhLpH0H8HL1HL1H LHu0.H0H0H8HL1HL1H .t H0H0HH<.H8tH0Hs I]Ht1D3Mt)IE1Jt;HF1.IM9uHPLP)0H`H@LhLpH0H8HL1HL1H LHu0-H0H0H8HL1HL1H o-t H0H0HH;-H8tH0Hs foHPL.P)0H`H@LhLpH0H8HL1HL1H LHu0,H0H0H8HL1HL1H ,t H0H0HH:,H8tH0Hs ~HPLFP)0H`H@LhLpH0H8HL1HL1H LHu0+H0H0H8HL1HL1H +t H0H0HH:+H8tH0Hs I]Ht1D3Mt)IE1Jt;HT.+IM9uHPL$P)0H`H@LhLpH0H8HL1HL1H LHu0*H0H0H8HL1HL1H }*t H0H0HH8*H8tH0Hs t}IUH2HHH*HPLP)0H`H@LhLpH0H8HL1HL1H Lu0)H0H0H8HL1HL1H z)t H0H0HH7)H8tH0Hs qzHPL9P)0H`H@LhLpH0H8HL1HL1H LHu0(H0H0H8HL1HL1H (t H0H0HH6(H8tH0Hs HPLQP)0H`H@LhLpH0H8HL1HL1H LHu0'H0H0H8HL1HL1H 't H0H0HH 6'H8tH0Hs HPLiP)0H`H@LhLpH0H8HL1HL1H LHu0&H0H0H8HL1HL1H &t H0H0HH#5'H8tH0Hs HPLP)0H`H@LhLpH0H8HL1HL1H LHu0&H0H0H8HL1HL1H %t H0H0HH;4,&H8tH0Hs HPLP)0H`H@LhLpH0H8HL1HL1H LHu0%H0H0H8HL1HL1H $t H0H0HHS3D%H8tH0Hs I]Ht1D3Mt)IE1Jt;H'$IM9uHPLwP)0H`H@LhLpH0H8HL1HL1H LHu0#H0H0H8HL1HL1H #t H0H0HH12"$H8tH0Hs I]Ht1D3Mt)IE1Jt;H&#IM9uHPLUP)0H`H@LhLpH0H8HL1HL1H LHu0"H0H0H8HL1HL1H "t H0H0HH1#H8tH0Hs IUHtHH2HH"HPLBP)0H`H@LhLpH0H8HL1HL1H Lu0!H0H0H8HL1HL1H !t H0H0HH0!H8tH0Hs HPLaP)0H`H@LhLpH0H8HL1HL1H LHu0 H0H0H8HL1HL1H  t H0H0HH/ !H8tH0Hs HPLyP)0H`H@LhLpH0H8HL1HL1H LHu0H0H0H8HL1HL1H t H0H0HH3.$ H8tH0Hs HPLP)0H`H@LhLpH0H8HL1HL1H LHu0H0H0H8HL1HL1H t H0 H0HHK-<H8tH0Hs HPLP)0H`H@LhLpH0H8HL1HL1H LHu0-H0H0H8HL1HL1H t H0!H0HHc,TH8tH0Hs IUHHtH2HHAHPLP)0H`H@LhLpH0H8HL1HL1H Lu0%H0H0H8HL1HL1H t H0H0HH[+LH8tH0Hs HPLP)0H`H@LhLpH0H8HL1HL1H LHu0=H0H0H8HL1HL1H t H01H0HHs*dH8tH0Hs  I]H3HSLL#HC(HD$C$L5LL{@Mt-A_Ht$IHL(I HuHPLQP)0H`H@LhLpH0H8HL1HL1H LHu0H0H0H8HL1HL1H t H0H0HH )H8tH0Hs HPLiP)0H`H@LhLpH0H8HL1HL1H LHu0H0H0H8HL1HL1H t H0H0HH#(H8tH0Hs HPLP)0H`H@LhLpH0H8HL1HL1H LHu0H0H0H8HL1HL1H t H0H0HH;',H8tH0Hs IuHHH&HPL{P)0H`H@LhLpH0H8HL1HL1H Lu0H0H0H8HL1HL1H t H0H0HH<&-H8tH0Hs HPLP)0H`H@LhLpH0H8HL1HL1H LHu0H0H0H8HL1HL1H t H0H0HHT%EH8tH0Hs IE0LLHOH>HL^HPLP)0H`H@LhLpH0H8HL1HL1H LHu0H0H0H8HL1HL1H t H0H0HH:$+H8tH0Hs пٿHPL蘿P)0H`H@LhLpH0H8HL1HL1H LHu0H0H0H8HL1HL1H t H0H0HHR#CH8tH0Hs HPL谾P)0H`H@LhLpH0H8HL1HL1H LHu04H0H0H8HL1HL1H  t H0(H0HHj"[H8tH0Hs  HPLȽP)0H`H@LhLpH0H8HL1HL1H LHu0LH0H0H8HL1HL1H !t H0@H0HH!sH8tH0Hs !Iu@H2HPLƼP)0H`H@LhLpH0H8HL1HL1H LHu0JH0H0H8HL1HL1H t H0>H0HH qH8tH0Hs HPL޻P)0H`H@LhLpH0H8HL1HL1H LHu0bH0H0H8HL1HL1H 7t H0VH0HHH8tH0Hs .7HPLP)0H`H@LhLpH0H8HL1HL1H LHu0zH0H0H8HL1HL1H Ot H0nH0HHH8tH0Hs FOHPLP)0H`H@LhLpH0H8HL1HL1H LHu0H0H0H8HL1HL1H gt H0膹H0HHH8tH0Hs ^gHPL&P)0H`H@LhLpH0H8HL1HL1H LHu0H0H0H8HL1HL1H t H0螸H0HHH8tH0Hs vHPL>P)0H`H@LhLpH0H8HL1HL1H LHu0 H0H0H8HL1HL1H  t H0趷H0HH H8tH0Hs 获藷HPLVP)0H`H@LhLpH0H8HL1HL1H LHu0 H0H0H8HL1HL1H  t H0ζH0HH H8tH0Hs 覶诶HPLnP)0H`H@LhLpH0H8HL1HL1H LHu0 H0H0H8HL1HL1H  t H0H0HH( H8tH0Hs 辵ǵHPL膵P)0H`H@LhLpH0H8HL1HL1H LHu0 H0H0H8HL1HL1H  t H0H0HH@1 H8tH0Hs ִߴHPL螴P)0H`H@LhLpH0H8HL1HL1H LHu0" H0H0H8HL1HL1H  t H0H0HHXI H8tH0Hs IuHH AEw HE1Kt5>uHvHH IL9u6 HPLGP)0H`H@LhLpH0H8HL1HL1H LHu0H0H0H8HL1HL1H t H0迲H0HHH8tH0Hs 藲蠲A]Ht+E1KTH2HHIL9uHPL+P)0H`H@LhLpH0H8HL1HL1H LHu0H0H0H8HL1HL1H t H0裱H0HHH8tH0Hs {脱H0H0H8HL1HL1H t H0&H0HHhYH8tH0Hs Iu0IU8LH:IEIM H@(AU(H08H@HL$0$HAEtQH@E1A|0IL@LEt3H0LX Ht HLI HuHPLP)H`H LhLpHHHL1HL1H Hu0HHHHL1HL1H _t H~H0HHHtHHs V_Iu IU(LHIEH@(AMIUH08H@HT$0$HZJAEtSLE1| HT0LEt/HH X Ht HLI HuHPLUP)H`H LhLpHHHL1HL1H Hu0HHHHL1HL1H t HԭH0HH/HtHHs 謭赭I6HHAEÁ7I H I ΃L4E1HN<(K.HLtH2HLeLLLROIL9uHHXL,IH3H)lHL9uHo1AMH7L4MD{Mt9IIE1Jt#LL tIL$M9Iu1[A\A^A_]fffff.UHAWAVATSHIFHtHt>D{Mt9IIE1Jt#LLtIL$M9Iu1[A\A^A_]fffff.UHAWAVATSHIHvHStBD{Mt=IIE1ff.Jt#(LL tIL$M9Iu1[A\A^A_]fffff.UHAWAVAUATSPDfMIIIE1fDKt/LLIM9uEgMO,II1@ItLLpHI9uAGHL$I L,I1fff.ItLL t]HI9uAWHҰtOL$I H LHHtIOM9Iu1H[A\A]A^A_]ðfDUHAWAVAUATSPIHFHL@H H H H4H\M~@AFI H H H HtHH,EfMt+IE1@K4/HHqIM9uIvHHQIv(HH:EfAM-KH@HEIE1ffffff.HEJ4(HH}IM9uAFHH HMI L,I@E1Kt%HH4IL9euAFHH HMI H L,I@E1Kt%HHtZIL9euAFHtJL$I H H L4I@IE1K4.HHADŽtIEM9IuE1DH[A\A]A^A_]UHAVSIHHvHWtIvHH[A^]@1[A^]fUHAWAVATSH dH%(HED~MIHIE1f.Kt&HHIM9uANIHHEWEHIL$I1Lu#ff.HEHEHMHL1H tWtLH0HH]ADŽt5HEȨtLHsfAE1dH%(H;EuDH [A\A^A_] ffff.UHAVSIHHvHtIvHH[A^]1[A^]fUHAVSIHHvHtIv(HH[A^]1[A^]fUHAVSIHHvHWtIv HH[A^]@1[A^]fUHAVSIHHvHtIvHH[A^]1[A^]fUHAVSIHHvHtIvHH[A^]1[A^]fUHAVSIHHvHtIv HH[A^]1[A^]fUHAWAVAUATSPDfAMt~IHIE1fKt/HH0tQIM9uAGHtBMIM9uAFP@@Au=ANTH H ȩIH@0Hu u LqzAAF| ubI~HizH@HHtH?HtBHG@Ht9@ t+HBzHtL9tHHQ IILP I9AEtaILP@H}HH0tBIHL0wAHt1I MvItAF~2uH}L7uE1dH%(H;EuDHH[A\A]A^A_]mUHAWAVAUATSPHHuHF8u 11E1DIHEH@8tHHI9sjH@N,MtE} At)AE1@Kt5HS7tIM9uE} A@tAKt=HtHH{H0HP11H膲tGHEH@8HHtHR HtH2HHtHEHp0Hu1H[A\A]A^A_]f.UHAWAVAUATSPIHHFxu 11E1LufILuIFxtHHI9sjH@N,MtE} At)AE1@Kt5H#6tIM9uE} A@tAKt=HtHH{H0HP11HVt\AFJAtML IL}I9t5IIIT$H2HH謲ADŽtID$M9IuE1DH[A\A]A^A_]̋Ot:UHSH?H6AA A1A!DLHH9u[]LE11LAE1Ht-HL MDEAA!LHLHH9tMME1UHAVSHdH%(HEHuOGDD @E9sSAAAD+G AAE9vtO HHHFdH%(H;Eu9HH[A^]HI+HUHLHLKHujUHAWAVSPI_L7F  ȉ  ȉ A@CAHqIMt-HILL^LHH[A^A_]DqIGAOHt$H1ffff.HHH9uH[A^A_]UHAWAVSPHIIHGGHt1IH1@HHH9uffff.II9IHHHtIAW 1!AAJ<NL9u)HIFHHGIfAGI~Ht;fAE1It,IM LDDA!IIJ<NL9tMIEH[A^A_]UHAWAVSPHGhHtHHHH_`HCXuUHLx>Mt2HIHǺLL0@HXHHHHCXHu#HQ 9PtPHIHHLHGhHR1H[A^A_]UHAWAVSPHGhHtHHH@H_`HCXuUHLx>Mt2HIHǺLL0@HXHHHHCXHu#HQ 9PtPHIHHLHGhHQ1H[A^A_]UHAWAVSPHOhHtHHAHHAH_`HCXuUHLx>Mt2HIHǺ LL0@HXHHHHCXHu#HQ 9PtPHIHHLHOhHJ1H[A^A_]UHAWAVSPHGhHtHHH@H_`HCXuUHLx>Mt2HIHǺ LL0@HXHHHHCXHu#HQ 9PtPHIHHLHGhHQ1H[A^A_]UHAWAVSPHOhHtHHAHHAH_`HCXuUHLx>Mt2HIHǺ LL0@HXHHHHCXHu#HQ 9PtPHIHHLHOhHJ1H[A^A_]UHAWAVSPHGxHt-HH@HtHnHCxHH@H_`HCXuUHLx>Mt2HIHǺ1 LL0@HXHHHHCXHu#HQ 9PtPHIHHLHGxH;1H[A^A_]UHAWAVSPHGxHtHHHH_`HCXuUHLx>Mt2HIHǺX LL0@HXHHHHCXHu#HQ 9PtPHIHHLHGxHR1H[A^A_]UHAWAVSPHGxHt-HH@HtHmHCxHH@H_`HCXuUHLx>Mt2HIHǺa LL0@HXHHHHCXHu#HQ 9PtPHIHHLHGxH;1H[A^A_]UHAWAVSPHOxHt4HHAHtH!lHKxHAHHAH_`HCXuUHLx>Mt2HIHǺjLL0@HXHHHHCXHu#HQ 9PtPHIHHLHOxH41H[A^A_]UHAWAVSPHGxHt-HH@HtH!kHCxHH@ H_`HCXuUHLx>Mt2HIHǺqLL0@HXHHHHCXHu#HQ 9PtPHIHHLHGxH;1H[A^A_]UHAWAVSPHOxHt4HHAHtH1jHKxHAHHA H_`HCXuUHLx>Mt2HIHǺzLL0@HXHHHHCXHu#HQ 9PtPHIHHLHOxH41H[A^A_]UHAWAVSPH_hHC`uUHLx>Mt2HIHǺLL0@HXHHHHC`Hu#HQ 9PtPHIHHLHHGHu[A^A_][A^A_] iUHAWAVATSHLshIN`uZHLx>Mt6HHϺIHLL9ALqHLHIN`H@@u H9W 9QtQHLILHIH IH[A\A^A_]UHAWAVATSHL7I^Htxt1@I^HuZHLx>Mt6HHߺIHLL;CLsHLHI^HHu$H;O 9KtKHLILH[1H;XHEH[A\A^A_]UHAWAVATSHL7I^htxt1@I^huZHLx>Mt6HHߺIHLL;CLsHLHI^hHu$H;O 9KtKHLILH[1H;XHEH[A\A^A_]UHAWAVATSHL7I^`txt1@I^`uZHLx>Mt6HHߺI7HLL;CLsHLHI^`Hu$H;O 9KtKHLILH[1H;XHEH[A\A^A_]UHAWAVSPLwhI^`uPtzHLx>Mt0HHߺ{HL8@LpHLHI^`HuH;G 9CtCHLH[Het%E1ff.H[`IEHduHH[A^A_]UHAWAVAUATSH8LMIIH}dH%(HEHzX1dIC$tI I}XD1|dIM)ILeLuL5H}L1`dH]H9HCH)HEHEH]H}LH>dHHMH9HCL}II)IL9LBMt'ILmIs)C6AEMeMuP\WLmAEIEKLHHLHHHDHHUUIIEHI]MuLLLXC4dH%(H;EuLH8[A\A]A^A_]YWLQUHAWAVAUATSHLhMIH}LmdH%(HEIEHuMuL/HIHtHL9x wEHCHuLs(TLx WHXIIMH HtIMIH]I}H˚IEWHCLpAGHHpLiH@HLHPfIL;PUAEHfrHs8IL$8IT$XLuLLE1-bHLEtH}S롃tM}HhMHp@HH]HpHIH1fL9ILXsC?EHEMuH^DHLHHHDHHL}IQLL}HEHH]H]L}HEHH}LnUHEB8H`BW)EHEH}LH}HEHH„uHtjHMHDL@MthEHUHEHDHEMHEH$LuLLE1$HLiEt H}*QLxHt YHuHHu5Et H}PLXZMH]W)EHEIwD6IsC6EHEMuD[DHLHHHDHHHxdPHxHEHH]LuHHHLSHH]B0AGW)EHEH}LLfH}HEHH„uxHtcHMHDL@MtaEHUHEHDHEMHEH$LuLLE1HLEt'H}OHt ZXHuHHu蹿E_H}vOQHt&HH]Hs9 HHuudH%(H;EHEWH@wHHHIIILDILIHINLHHID$IM4$IL$HHHIOL H]dH%(H;EuHHĘ[A\A]A^A_]PH蹀fUHAVSH0HdH%(HEHEWEHELuL4$)HuLdH%(H;Eu HH0[A^]MPffff.UHAWAVAUATSH(HHdH %(HMHN8HVXH1E1w\HEHUL5GH}L1[L}L9ICI)HEHEL}H}LH[HHMH9HCIM)IL9LBIL}IsC6LcMu9ELHHMIILDILLIHCIL+LsLLLPC4dH%(H;EuHH([A\A]A^A_]H~NUHAWAVAUATSHHdH%(HEHUBu&dH%(H;EWHCIHpHu IH@HuHIHtfH uHHu1HEH1҄HDHUHULLUHt'HHE1AHr'uHAIE1E1 HL)DaHULLHt @1HMH HLkLcdH%(H;EuHH[A\A]A^A_]MUHAVSHdH%(HEOtvHLDEA A1DQE!EAJN4M9E1IIM LDAE!MIJ2N42M9tT1HEWDD IE9sYAAAD+G AAE9vB‰WH8tO HHH@dH %(H;Mu?H[A^]MIEHI)HUHLHLSHEFLUHAWAVSPI_L7F  ȉ  ȉ A@CAHYSIMt-HILL^LHH[A^A_]SIGAOHt$H1ffff.HHH9uH[A^A_]UHAWAVSPHIIHGGHtQIH1@HHH9u1MMEfDII~IFI@IFAGII9IHHHtIAW 1!։L9L9L9tE1IrIM MD!IINNL9SH[A^A_]UHAVSL7AIuCI^HuH;HCH9tNLHGI^Ht[A^]H;HCH9tLH[A^]GUHAWAVSPHIIHH$sLH(VHIF8@IpH0tIH@@uOI OIHHH@@@H@HHIIpI0;HtLHHHUH1H[A^A_]Ht0UHAVSHHIHIvHL[A^]{FUHH]UHAWAVAUATSHHIdH%(HEtH{H{H5 TIHL1TLTIHL11T[WAIGMtLL1TAtIILLTH>LT`dH%(H;E{LH[A\A]A^A_]ALeL-Ŝ L=LH5mE1A.JL6JHH5qHH5oGHLHH5 HLHH5PvHH5jtrHDLHH5Ny>SH1iLHH5'5HL%HHLbILeL- L=Iff.LH5lE1AHLIHH5qHH5HLHH5zHLjHH5 uVHH5BHLRHH5x>#H19KHH5U& HLHHL2HH5kLuLE1AGLGH5\HH52 HH5j}HsHHLGfAL}L- L5fDLH5kE1ANGLVGHH57qHH5HLHH5 HLHH5psHH5%HDIHH5nv>sH1IHH5$UHLEHHLFILeL- L=RLH5i E1AFL&FHH5q#HH5HLHH5r HLHH5^v&HH50bHLHHH5h}CHǾVHHH5r#"HLHHLOEL}L5 L%3#L-QLH5h!E1ADLDHH5qHH5HL~HH5g}jHLZHH5FHH51h2HǾEGHH53HǾ$GHLHLHHL!DAUHAWAVAUATSHHXHxdH%(HEHHpHFH`H9rIH)"f.HCXH/H;`HGCLcHCIDHEHEKHMH}LACHCHDHEHEKHMH}!ACHSHC'HhHDHEHEKHMH}E9EOA9DLDW)EHEuE1E1?ffffff.AaLcILg=IHEH1LSAMH{HXHELmM)ID<$Hu1LAMu5ML&=HpHLxLWHxG0G G11Ct HCHu gHt`LkHxH@HH[HxHx<KLEcLeHESHUHULHLECtHCLxHtWLkID$HHI|$ <KHKHDHMHESHUHULHLECCt HCHu bHt[LkID$HHI|$(<KHhHEK'HMHESHUHULHLEMtLq;dH%(H;Eu6LHĈ[A\A]A^A_]H6H6 H6tH}W=UHAWAVAUATSH8IIHdH%(HEWG0G GHK(HtHAH HHCIIIHC(HEIsC$ELmMuBNLHHLuMIILDIL3:IHEILuLuLeLLL:CD%HuH}L9Et H} :dH%(H;Eu(HH8[A\A]A^A_]M6I?H}l;ffff.UHAVSH0HHHHdH%(HEHEH}{LuIHu2H}L;t H{W9HEHCELuAuLRIdH %(H;Mu H0[A^]B;fUHAWAVAUATSHhIIHxHdH%(HEW))))))HDž(}8HH@W@H@ HHLLK'H L(HHMMt:A?u ILI9t%A?u ILI9tA?u ILHHugHcHtH CLctIfILM!HHHHfDfAXHLHq>1HIOL!AHHH9@ HHW)P)@)0) )HDž`PL`LM!LE6IW@HIEMuLL8LLLMt%HID$HuI$LPL=Ht9H@xHpHEpHEPH=AIH}LHjP/LeIGIH!AHHI9fA|$IW)E)EHAH}LH @dHEHH8t HH5HMHAH8HBfEW)E)pHAHpLH 1vHpH t H0 5HxHBH8HAAfDžxW))HAHLH HHAPt H`4HHBH8HA(AfDžHxHW E1#HH5;APt H`,48Ht HH4 t H03LMt%HID$HuI$LPLJ;EIHL!苍HHI9eHAu_ZHAu!HAuH HpHuxtH}93 @uLH}HuEH}3@LL53L7LHH)[}HEHE HLuLGL)LpHxHHHUHpHHEHEH9tGHC29H5I&#H"H5(" H5HH9H\dH%(H;EuHHh[A\A]A^A_]4UHAWAVAUATSHHdH%(HEW)EHH1HHH HuDELbHI!HHMHHHHA A)HH!HH@uCH=0茫H}S\N,UHAVSHLwL;wsLIX HILs[A^]UHHH?Hu@tHx])@u]DC]UHAWAVSPHLwMthM>MM;~u)fDLM-M>INI9MtI9uM uIGI~ Ht*)L")H{0y-H{(HHt [A^A_])[A^A_]UHAWAVSH8HdH%(HE(IH;H}Ht-1HE؋sL}LHuLLLsdH%(H;Eu H8[A^A_]*UHHHH@H8]!UHAWAVAUATSPLoHML~AALE~LEfH}IAE I}!IE}0IEU(1L9ICL+1ɅEÄMDMlMuH}I9t@AF Iv!IEv0IEV(1I9IBLP+H}1ɅEـuILH[A\A]A^A_]UHAWAVAUATSHH袋.HLwI)IH.袋.LM~I9H}HH)HHH ?L9LGH]tE]tH9LCMtI9nIIkX&ILE1MkXK.IkXLHEHMHML!LyLL)H)H]K.HXM9HH]tE]tHHkXIM1WI ILILILAIL IL ALALADID IL8IL8AL(AL(AD(ID8ILPILPAL@AL@AD@IDPI HXHXL9d@LIXM9uHML9HEHHYHEHAMtLx%HH[A\A]A^A_]!茦UHAVSIHHHHFHGHtH@H{AFuIFHHHO IVIv ogH{(AF(uIF(HHHO IV0Iv8CgH{@AF@uI@IFHGA IVHIvPgH[A^]UHSHXHdH%(HEWEHEHG HEEEHEHEEH}HCPHSXuHC8HHHK8H9t f@H{0H{8`(WC0HC@H}H(H}Ht#dH%(H;Eu HHX[]%UHAWAVSPIHG0HG8HH1I&H;qt3ff.H wIsHH1IH1H;quߋC0umH;qt~>tyHL}C0uKIH1H;qt2f.H wIsHH1IH1H;quߋC0u 1H;qt>uHKHS8HKH[A^A_]øH+qC0Hs8HHH;HP D EAA wL OcMA A"" 1{HHHHH;Htt9uuoHHHHH;Ht]9luXHHHHH;HtF9luAUHSPHHHB8HJ@H)H~4HHHJ8WH[] 5 H+HG0HO8Hz HHHC8HHH;HHHHHL@L99rHHHHH;Htq9uulHHHHH;HtZ9euUUHSPHHHB8HJ@H)H~HHHJ8Wf@ H[]Hz HgHHC8LH+HG0HO8HHH;HHHHHL@L99aHHHHH;H9lHHHHH;Htq9sulHHHHH;HtZ9euUUHSPHHHB8HJ@H)H~HHHJ8Wf@ H[]Hz HxHHC8LH+HG0HO8UHAWAVAUATSH8HIIdH%(HEHHH;PtHHI$HPHELuH0H9hH=P\tU"HVHD>LmIEIU H)HHPIUD8EI$H0H;puIL+hHH0I$H0H;pD<EteHH0LmIEIU H)HLM.H=M&LLuIމ H=uLLuIEPuHH0LLLAA~0\%=tH}DH= HuLuI$HH;H9\HHI$HH;H9u~HHLuLLLA~0 IHu Ɓ$LuYHH0LeID$IT$ H)HHPIT$EA~0uqEDpHULjMI)LbH{8HC@H)HHGHC8HSWMLeDuHu!HH+pAF0 Iv8dH%(H;Eu]H8[A\A]A^A_]AF0 Mn8ԾLA0DID$0H{ %H{8_L%enwH+pAF0 rHE@0 Lh8bUHAWAVAUATSPHHH;HtHHHB8HJ@H)HHHHJ8Wf@HHH;Ht-I&D I wMsHHHHH;Huރ0u+H;Ht`9}u[HHHB8HHHH@H[A\A]A^A_]HB HIHI HLLIG8G1I&0uH;H9"IIIԹXA~0uLLI7HH;Vt:ffffff.:H wIsHHH1HH;Vu߃x08H;Vf::]HHfff.HH2H;rt>H wIsHH2݃x0LLLA~0LLLIHH;At1DDI wMsHHHHH;Auރ0H;ADA,u>HHHHH;HDI MHHA}u=HHHJHz8HHH)Hr8HHHH[A\A]A^A_]H+AG0HG8H+HG0HO8H+V@0HP8UHAWAVAUATSPHHH;HtHHHB8HJ@H)HHHHJ8Wf@HHH;Ht-I&D I wMsHHHHH;Huރ0u+H;Ht`9]u[HHHB8HHHH@H[A\A]A^A_]HB HIHI HLLIG8G1I& @0uIIIA~0uLLLIHH;At:DI w&Ms HHHHH;Aufffff.0IH;AÀ8,uEHHffffff.HHH;H`DI RMHHHЀ8]u=HHHJHz8HHH)Hr8HHHH[A\A]A^A_]^H+AG0HG8UHAWAVAUATSH(LIMXL9H}IM€-@H}uLHMLM MZM9bI+@EA0-AABMAMALMMPfWA-ugM9<E1EEsA 3 s*DIYIECXALMM;HuukA9uλ ^M9E1ҐEEsA s*DIYIECXALMM;Huu A5vλAMXfWA-M9|H ALff.E!AL$Ѐ \M9w*K MqM0EEM4HALMM;Hu-I9A9uI{E1E1E1M9HALyE!A|$@ M9w*KuI> LIIFIF(1M~I)HLH9HGHt H|1IFIM~HI^ H[A^A_]UHAWAVATSLMM;H5A DQиA r"DQArDQAIMLMM;ZEAXA r"AXArAXAIMHL3L;sEEzAA r EzAArEzAAwkIL3H6HH;^tYD3EfAA r EfAArEfAAw%DDDDDDHH G0HW81[A\A^A_]UHAWAVATSw+HHCHK H)HHHHK@0HwAAAHHCHK H)Aw A AuIN LIIFIF(1M~I)LH9HGHt H 1IFIM~HI^ H[A^A_]UHAWAVAUATSPHI~ wfAF *CAF fAF CAAIIL"MtIL$J9I;$vZLjM9MFHzHUujHUHBHB LHt7Ht-L(H@L`HMH1II HIM|$1ANH0H INIH3SLCAH[A\A]A^A_]UHAWAVAUATSHIIfGAIL!MtIT$J:I;$v^]HuHYL9IFHyHMumHMHAHA H{HH@L`HMH1IHu]I<HLIT$M#nI MnLM!nA^AH[A\A]A^A_]UHAWAVAUATSHIIfGAIL!MtIT$J:I;$v^]HuHYL9IFHyHMu}HMHAHA H{HH@L`HMH1IHu]I<HLIT$M#nI MnLM!nA^AH[A\A]A^A_]UHAWAVAUATSH(IIHdH%(HEIH W)EHL HMȉEIVL!E.MH]HILHULHA A)M!HfDžHLHH$HLLL3 E11WZHD H5ѾLLE1A;LCILHL LL菉H5ٞHˍH0Hlt HHD#E1UE1HpdH%(H;EDH@[A\A^A_]E1PuTHH1 At HIHu'0u3PnH`]H 0tH@Pu. fUHAWAVAUATSHHdH%(HEW)p)`HEHEHEH`HEEEHEHEEDH>H5 }HuHHxHMH)HTHHHxHxLpLHCIHsC6LcMu9ELHHMIILDILIHCIL+LsLLL"C4H}H}HtvHpHhHtYdH%(H;EHHĈ[A\A]A^A_]H5LXLE1AL H50H̊H5cN H踊LH` HxH(UHAWAVSH(IHdH%(HE L9t>LIHeIfEH}L-IFEtH}:t,L5L3dH%(H;EuHH([A^A_]1H=tĿIoLHdL5H=affffff.UHAVSHH?HHt`tH9=Mt H[A^]`[A^]H=IHLtϿ1HHHHH=LfDUHAWAVAUATSHxIIIH`dH%(HEHEHHH@HpH}-W)EHE)ELmHuLW)E)EH]ILHPHH}HLMMrDuEt]t7L}7HuLL`IEAALeMHELDMt(IIs(C?hLiMuS_ƅhLiRLHHLHHHDHHIHxHHhLpLLLMAEhHiHExHEpL`LD!huiEEt H}H}HEHtH9=tHPdH%(H;EuwLHx[A\A]A^A_]HxEt%H5" 1L`LEhH}E[_H5pLTH.IHLIHHHHH=LHh4$@UHSHHHdH%(HEfEHUdH%(H;Eu HH[]UHAWAVATSH0MIHdH%(HEW)E)ELeLHLLL>dH%(H;EuHH0[A\A^A_]jf.UHAWAVSH8IHdH%(HEfEW)E)EL}LHUHLLdH%(H;EuHH8[A^A_]f.UHAWAVAUATSH8IIIdH%(HEHAIIIIFHEIsBmEH]MuHQLHHL}MIILDILHHHHEIL}L}LmHLB+HuH}LEt H}IFHHuLILLHdH%(H;Eu+H8[A\A]A^A_]M$$IH}!H6fffff.UHAVSHHdH%(HEW)E)EHwHt VHvHLuLW)E)E)E)pLuHEHEEEHhEE)ELpLHLLdH%(H;EuHĀ[A^]fDUHSH(HdH%(HEH]HfEH}HCEt H}+dH%(H;EuH([]@UHHH J]HHH@HtH]`]DH!]HHGHGHUHSPHHHQHH[]UHAWAVATSHIHdH%(HE:HB@HEBHEJHt#HyfHtHW|8/Hu1HH)L~HGHUHEHxHML`HuHxL8IHLLLP`t HpdH%(H;EuHHĀ[A\A^A_]UHAWAVATSHIHdH%(HE:HB@HEBHEJHt#HyfHtHW|8/Hu1HH)L~HGHUHEHxHML`HuHxL7IHLLLP`t HpdH%(H;EuHHĀ[A\A^A_]UHAWAVATSLw L;wIFHt[A~Ht!HCHK H)HHHHK,0,A:DDHCHK H)HHHHKD8IFLw HG(L)H~CIFHG IAFHHCHK H)H~9HHHK{[A\A^A_]G<HGHHHLs 렾HHC봾IHLHCIHLHC5UHAWAVAUATSPLw L;wCIFHt[A~Ht!HCHK H)HMHHHK,0,A:DDHCHK H)HCHHHKD8IFHHH H+HH9HHHHHrqHH)H7DLNMQLVEH7D@LNMQLVEH7D@LNMQLVEH7D@LNMQLVEHHH9rH[A\A]A^A_]HLDHMPMZMXE HuRG<HHIHI4LHLHIIHI LLLHCHuоIHIHuLLHCUHAWAVAUATSH(IHdH%(HEIH W)EHL HMȉELkM!HHLHLHA A)M!HtDIM~M!fDLHIIFL!AHHI9uHC HHCHK H)HHHHK]H߾HA>IMfM!ffffff.AL$It$u L!A$@H )L H,I|$HI IFL!AHHI9uHC HHCHK H)HHHHK}[A\A^A_]H1[A\A^A_]6IFu>HH!AI7uH uX@ufI6H߄xo[A\A^A_] H ) HL[A\A^A_]AH[A\A^A_]A6H[A\A^A_]A6H[A\A^A_][A\A^A_]1%HHCHHCUHAWAVATSLw L;wIFHt[A~Ht!HCHK H)HHHHK,0,A:DDHCHK H)HHHHKD8IFHHHHP H)H~RHQHPnHHHHQHPuHHHHQHPlHHHHQHPl[A\A^A_]G<뚾HHHHHH뒾IHLHC!IHLHC7UHAWAVAUATSPLw L;wtwIFHtSA~HtHCHK H)H~ZHHHK,,,A:DDHCHK H)H~KHHHKD8IF@H[A\A]A^A_]a G<AIHDLHCAIHDLHCUHAWAVATSLw L;wIFHt[A~Ht!HCHK H)HHHHK,0,A:DDHCHK H)HHHHKD8IFLw HG(L)H~CIFHG IAFHHCHK H)H~9HHHK[[A\A^A_]G<HGHHHLs 렾HHC봾IH{LHCIH_LHC5UHAWAVAUATSPLw L;wtsIFHtSA~HtHCHK H)H~VHHHK,,,A:DDHCHK H)H~MHHHKD8IFH[A\A]A^A_]EG<IIHALLDHCHuоIHALHuDHCUHAWAVAUATSH8dH%(HELw L;wdIFHt[A~Ht!HCHK H)HlHHHK,0,A:DDHCHK H)H_HHHKD8IFIfI~MM!M9HLkHC L)HIEHCfWf.u"z LMy IEAE-f0.@0HU_8f.H}vMuAE-fWTMEEHuHULűUL H}HL)HQHHHAM9dH %(H;MH8[A\A]A^A_]G<IHfEfoELLk IH)EfoELHCoIH)ExfoELHC|BUHAWAVAUATSPAHLg L;gID$Ht\A|$L;t!IGIO H)HHHIO,0,A:DDIGIO H)HHHIOD(ID$L;MgIG L)H ~TID$ IGExL It$A$-AD'H L)HQHHHAH[A\A]A^A_]C<똾 L^Mg뙾LKIG-L5IGIUHAWAVAUATSPAHLg L;gID$Ht\A|$L;t!IGIO H)HHHIO,0,A:DDIGIO H)HHHIOD(ID$L#M|$ID$ L)H ~@IG ID$DL H L)HQHHHAH[A\A]A^A_]C<몾 L0M|$묾LIG>LIGZUHAWAVAUATSPIHLg L;gID$Ht\A|$L;t!IGIO H)HHHIO,0,A:DDIGIO H)HHHIOD(ID$L;MgIG L)H~TID$IGMxL It$A$-IL'H L)HQHHHAH[A\A]A^A_]C<똾LMg뙾LIG-LIGIUHAWAVAUATSPIHLg L;gID$Ht\A|$L;t!IGIO H)HHHIO,0,A:DDIGIO H)HHHIOD(ID$L#M|$ID$ L)H~@IGID$LL H L)HQHHHAH[A\A]A^A_]C<몾LM|$묾LIG>LIGZUHSPHHHHP H)ʅt;HHQHPtHHHHQHPrHHHHQHPuGH~{HQHPfHHHHQHPaHHHHQHPlHHHHQHPsHHHHQHPeH[]þHHHHHHNHHHHHHfUHAWAVSPH [ MHPL@ I)I9HJHH"1HeLNfffff.DEF L7I^L{M~Eu DHH9r\LI[LsMsDAuuLMZI[IZA0LMZI[IZA0LEAGIZLsMrDLAG MZI[IZE wHHHHQHP"H[A^A_]IHIHBLLIHPUHAWAVAUATSH8IHuH}fH~HHWH!H4H:HDADEL4ARdH4%(HuL]t$HL!Hɉ΃р6II AJIH9DH?IE) H)W*YXڑ,f\fW1f. A\A)E1H 6EH HHHILI LAIH HH HII II IJLHHMIITH}HuHMLEdH%(H;EuH8[A\A]A^A_]uUHAWAVAUATSHAAAHG,<AuAEt'IcHAGA9AOAAIǾ0LIcf.0HHAUwBDHEH<HH4DHcHu2HE.EIcHHAED)HEH<IcHf0.Ey H{HEЃCЃ0AE9AAAB<30A}AE9}df0.C0AH}|/AMEIcHHD<0AHA9HHHEDHHAuCexEHOH{HsAGIHcLC.IcDexHHJ3HEHC-HD)drAAr/AA@Br AArAA ALH}H)HLML!AL-0fDA0EaE McFEEIIIM)E!AAOcLMAE1EAYMI-Ei'EMiQI%EkdEMiMbI&EiEMik_kI6EiـhEAMI#G GMEAMiZ| I'Ei٠2EMiɃCI2Ei@BEMiɉ;UI9EifDE)EEEEEEEEE1EAL )ffff.IIKH!AIII)w4HHHYHH-Di'D)LiCI1Ai{kd)Di{AC EkdD)HH=B<@>H}HWx/e9HHH3Ho#HH)ǃ 0HdHL HH  HH mDHI@zZL9~HB|;@>HH=0B<@>HALA|;@>HH=B<@>HALA|8@>HH=ޖ<@>HH=ʖDFDFFDFBFADFBFADFH H@BDLXA|8@>HH=E<@>HH=1DFDFFDFH.cH AfH KVHH HsHHH HQJII III IHʚ;Hiw- 0ikd)H )AfFH2kd)H BfJHɈFDFHHaw̫HHHHiH)ljйYHH-Di'D)HiCH1DAEi{AEkdD)Di{AEkdD)LiϻI(Ei'AE)Hi{cH.Ei{AEkdE)EEi{AAkdA)EHfKfFBCfFSfF;fFBKfF BSfF B[fFHH[A^]UHAWAVSHXIHdH%(HEWEHEHG HEEEHEEEHEHI&fDH w IsHI8H}LHMȅIH w IsHI8HEHMHECPHKXHC8HHHK8H9t f@H{0H{8WC0HC@H}H}HtidH%(H;Eu:HHX[A^A_]ùI+FMHEHEHUHEKPHSXKUHSPHDAH wLیIc LH[]A"u 1H[]H[]Q H[]HHHxu!HHHxlHHHxlHHHB8HJ@H)HHHHJ8WHHHxaHHHxlHHHxsHHHxeHHHB8HJ@H)HHHHJ8Wf@ SHHHxruMHHHxuu@HHHxeu3HHHB8HJ@H)H~IHHHJ8Wf@ H[]H+NG0HO8Hz H³HHC8Hz H襳HHC8Hz H苳HHC8+UHAWAVAUATSHXAHIdH%(HE)EHuHEHHEHEH}HHuID$DHHIL$D(EHE\td"HHHMD(LeID$IL$ H)HLL}IDI藶HuLALL}wIL+eHHHMHD, Et?HHELeID$IL$ H)HBL5HuH~uHHELILVA0T%=tH}HEHI~LHuL}HE8\HHHMxuyHHEL}LLLA0 BHu Ɓ$L}nHHELeID$IL$ H)HHHIL$EA0u}EDxHMLiMI)LaH{8HC@H)EHHGHC8HSWMLeD}HugH+Et AG0 AG0 IG8HE(EdH%(H;EuIHX[A\A]A^A_]þLqID$-H{ iH{8]L%|lAG0 Mg8HE@0 L`8vUHAWAVAUATSPHHB8HJ@H)H~HHHJ8Wf@HI&fH w IsHH0u(}u[HHHB8HHHH@H[A\A]A^A_]HB HIHIbLHLIG8V"11IIIԹA~0uLLI7Hffffff.:H wIsHHHx0a::8HHffff.2H w IsHHx0$LLL1A~0 LLLIHDI wMsHHH0DA,uMHHfH w IsHH08"H+FG0HG8wA}u=HHHJHz8HHH)Hr8HHHH[A\A]A^A_]_H+NG0HO8 H+q@0Hp8 UHAWAVAUATSPHHB8HJ@H)H~HHHJ8Wf@HI&ffff.H w IsHH0u%8]u[HHHB8HHHH@H[A\A]A^A_]HB HIHIլLHLIG8U1ffffff.H0uIII)A~0uLLLIHDI wMsHHH0UDA,uHHH wIsHA]u=HHHJHz8HHH)Hr8HHHH[A\A]A^A_]yH+NG0HO8UHAWAVAUATSH8HUH}HuHI<-uLqyHUHZ@0DGA;DAEFIAxfWA<-uS@ E1A s*CDyEgA rEgArEgAw&DDDDDDHH G0HW81[A\A^A_]UH勇9]UHAWAVAUATSPMMIIHIH觳LLHE1!uLLLu L#t2(IH]HLH菵HH[A\A]A^A_]H@UHAVSIHHHHLH1E1蔾t[A^]H[A^]韾ffffff.UHAVS1u`IƇH_H5&{H1E17u5HKu)H5 H1E1u H"t[A^]Aȃu0H5n/ H1E1ϽuHuA1uH5dX H1E1蕽uH[A^]饽DUHAWAVATSHAHHdH %(HMH5cFH輻Ht+Iǿ躬IHLDLcH;跱8PIHuH}LH&HEHHE7HtdH%(H;Eu)HH[A\A^A_]HzH}@ut@UHAVSIHHHHwHɼAƆAWAAAdž?[A^]UHAWAVSPHH4HHLMHHǃHt虫HHt H聫HHǃHtCHt H{XULs0Mt~L{8LM9uI]fffff.L(MMgM>AFtI~ IM9tAGtIH{0Ls8H[A^A_]ΪH[A^A_]UHSPHHH[]餪@UHAWAVAUATSHIIHdH%(HEH^tbIH(H}LHYHEHHE7HdH%(H;EHH[A\A]A^A_]W)E)EH}L聩HEHZW)pHEELmH}HEuID)E)ELeL Hp L讲W)E)EH5*HP{HPHEHDžP7HAH}pAALExLqLEEELEmHEML@HH@$H*LP LLH}LLeHPHuHh苨0HH;H艼@uIAW) HDž0H L8ADŽpAALqLEELEx H!HE0HE(HH$HRLP LLH}LHPHuqXtsHhaeH}}HEHtyHH5|HP$?HPHHDžP7HuH,HP@u t H0H}EttHu7HEHCECWEHEHEtH}衦H;pt H}芦H}Hu7EFH}m8Ht@uHmH}@ڿH}LHP@趿H}3UHAWAVAUATSHMMIHdH%(HEW)PHDž`HUHMLpLxLPHuHpL~L(IİLLLHMHDžpPL0t H`HQLiI:H8H@H1L}IsBmEMuNWLHHLHMIILDILI谤LIHEILeLHLmLL8C/EAALELEELEMHpH(H8oEt H}`HpL0IOIsC6EL}Mu:FLHHMIILDILIHEILeLuLLL賤C7HEHCECHEHC ECEfCECHbH}LH@f H}HEHt#}tGtHG IHjLbHpHHDžp7Ht9Pt H`2dH%(H;Eu:HHĸ[A\A]A^A_]H&Hp@u蔼H}@UHAWAVAUATSHhIIIHdH%(HEHtW3IHhH}LH虩HEHHE7HH舶H}@~W)E)ELmLLL& H} LEEAALELEELEMLeL}E$H HLEt H}աdH%(H;EuHHh[A\A]A^A_]ߣffffff.UHAWAVSPIHH~GAI蛰IGEt#DIH+HLH]HHH[A^A_]fDUHAWAVAUATSHdH%(HEH=eFHøHta;t\H58EsH赱t6H5H袱t*H5H菱t1蟥u5 dH %(H;MH[A\A]A^A_]qID(H50WFL}LE1AUL]H5)H 4IHLHH3H58m H3L$E.AHH9UHAWAVATSHHuCH^LpH8uoAtHxII蝟LLHKINAfFH8u@tHxHIbLHIH7H8H9uGtKWHKINAWHCH8Ht7H@u*H訸HH@uH蒸HH Ht [A\A^A_]HHUHAWAVAUATSH8HUIdH%(HEL~HvIDIIEUH}uHI\$HtoLeHHHUUUUUUUUH!HH)H33333333HH!HH!HHHHHH!ILI8IwLCI!5IE1RIH9r"HH H t H1HI 1AHEHJH L0MEMAt MMM}ALSH}LMLU"f.L!L9M6MIFH9uOANIVHHDL9uuR1ATA:uHH9uAvH9rHH H t81HHoI~ LM蝠LULMMH}R219MILe(肜IHLhHxHMuHAHG HQHqID$HLx WH*HHH WH*XHH؃H HxH*AL$ uH*XAL$ t Y.HAHrHKE1HAI ^?H,HH?\wH,H!H I9IGAHtHGHu I1II\$I9vLLID$Hx WH*HHH WH*XA^D$ 蘤H,H\H,H?H!H HHHHUUUUUUUUH!HH)H33333333HHH!H!HHHHHH!HHH8wHGHȃ?ٸHHHBI9LFI9I\$HSHLuH!!H9sHHH H t1H1I $HHtHIYID$It$I6Mt$HIHt>H@HKHuH!!H9rHH H t 1HH1HI$L0ID$dH%(H;EuLH8[A\A]A^A_]śUHAWAVSPHHtKIIIGH9"J<7H;HHtGLsDIs%1kH;HHtHCM!1ffff.HHHHDHHDHHDHI9uHtHHHHuHsHSLCHNLHHUUUUUUUUH!LH)H33333333HH!HH!HHHHHH!HHH8Hw IFH!'L9r"HL H t H1IH 1AHLLMMN>ffff.IHHHHIHHLIHLMHMtbI@wL!H9t1fff.L9rHL H t$1IHH9tHH<uH41AH9tH[A^A_]mUHAVSHLNLBLHHUUUUUUUUH!LH)H33333333II!HH!LHHHHH!HHH8Hw IAI!)M9r$LL H t L1II D1AAL6KIHH9uL^M9t9IBwIQH!L9u$kL9rHL H 1IHL9tIHHt9H@wIQH!L9u$*L9rHL H 1IHL9tKL1MtIFwIL!L9u%-E1(L9rHL H tJ1IHL9t HLL1M2HHNHL_GH[A^]1AL9;1AL9u1AL9\_UHSHHHdH%(HEW)E)EH}H}HuuEHMHMHMHMHMWEHEHUHuHUHuCHEHUHC HSHKHEtBH}dH%(H;EuH薔7H;@u@udH%(H;Eu dH%(H;Eu HHH[]~fffff.UHAWAVSHIIHdH%(HEHt1A?/u+H}LL HEHCECHhW)@)0H0'H0Hu88H9HEHHE@Ht"HQ|/HEH1HEHUHFHEHEMtA?/u IIE1LPLXHHuHUHPHHE(HE H} HEHC(ECW)EHEHt H(諒H0Hu8tHH艒 @udH%(H;EuHH[A^A_]臔UHAWAVAUATSHHIHdH%(HEW)E)EH}HHH}HutH]ELeLEeHE]AM~AAME~MEnMtvL9LLLvu{IM)K ,IuTA?/uNcH;@u H@dH%(H;EC'LIH9/u K ,HILIIIsC6ELeMH]teI?ILHHMIILDILIHEIL}LuH]LLLKELeE1H]E1C4HEHCECHEHC ECEfCECHH}HEudH%(H;Et*H}dH%(H;Eu0HHH[A\A]A^A_]IH}@UHAWAVSHHHdH%(HEW)E)EH}H}HDuHEHEHEHEL}WEHEHEHMHEHM׿(oHH HHHW@H@ DsHMHUHK HSL{HC HEt,H}8!H;@u H}Ht@u赨dH%(H;EuHHH[A^A_]#UHAWAVAUATSHHdH%(HEW)EHEHIHL}HsC6EMMu@ILHHMIILDILIPLIHEILmLuLLC4 IHEHE HEH1DmALuMELLޞHt&LH=rKHH}H1՜>Β0H}S&HEHHE7HtgL覍Au06HEAHEHCHEHCHEHCHAtLhdH%(H;Eu4HHH[A\A]A^A_]H_H}@uЦH}UPUHSPHuHFHCHVHvHb(ȌHH jHHHW@H@ HCH[]UHSPHH9tOuu2HFHC2HFHEFHEVHHuHVHvHüHH[]fUHAWAVAUATSHIIIH8dH%(HELvH@HHHLW)EHEL0IvH GH}H!}W)E)EH}LLHEHumEAALmLEmLEuAI_AAIE_MEgMtKM9rfLHLuTMM)K ,Iu-;/u(=HX LMM9/u K ,HILMIIsC?xLyMtaI>ILHHLHHHDHHvIHEHHxL}LLLƅxLyE1C<HEHpx`HyHaEiEfmEoHDžXH}HuEtH} @u腣H}HHXHXHu`tHp诉 @uBHEHHHu%H8H{@ u(H HHHO!H8H uHP(Hp0HH0車@t HP&dH%(H;EuHHĨ[A\A]A^A_]-Hx!UHAWAVAUATSHIHdH%(HEHtA>/4 Dž4W)PHDž`LpHxE/)E)E)EHpHEE/MH t$H}HxHHE}EHU1HHL(HuH;u E1E1vE1H]E1fL}H}HuA=..H`I9sM>I~IM@M)MIIMHH9NLL)HHH9HFHH9HCHtUHH9HH%IH0M9tMnI~uvIE=..uh4;cE1IHLO<,K|,MILLLL詇LPLXH`MtL跆H]LXMHQ }HH9EHAGH tKH(9/u?HFH t+H(z/uH HzS1HEHMH ֳFH8ALLA8H9HEHHE@HpHxHuHpHH8t HH蠅HPHt菅dH%(H;EuPHH[A\A]A^A_]1H(x/HFHRHDHHP`[ff.UHAWAVAUATSHAIIHdH%(HEEEW)E)pHpLpIuTDxyE|EHUHEH8HEHEAE4s[E111Au ALL;AAuAH;V1IALH(W)E)pHEHpLLLpIDxHyHEHEHEHuH}H(EMhkEEƅ@ƅ`4sH HL(E1`D@HEHMHAHHHPHEHEHXƅ`A1H(1H1H DpLxAu0Dxhky|HUH8HELH8D`LuEEEtH@LeEpCIEHC AECWAEIEC(CHEtxfffff.UHH1HtEHffffff.Ht.HO|8/HuHt Hu ]1]H9HBH]ff.UHHHt4HHt)HN|0/HuHtHtHH9HBHH)]1]HH]@UHAWAVAUATSHHIIHdH%(HEW)E)EH}HL_LuIufDmHEHEHEHELuHuDmLuHEH}LL H}HuVHEHCEWEHEFAu AL轎fAu/dH%(H;EuZL蛎>f@u艎AudH%(H;Et)dH%(H;EuLtHHH[A\A]A^A_]vUHAWAVAUATSH(IIIHHHULuMvHELk8IGKD.AC.WAIGLeLxHIMHsC$AF M~!Mu:GLHHMIILDILsIIF0IMn Mf(LHuLOwC'IFLuUHEI6HHk8INH3HKSHH([A\A]A^A_]K<.H 蟥UHAWAVAUATSHXIHHHNAAHvHMHDME}H=L4LHi-8ߝHII1HMIHH Dfnf`pEeAtMeIMHMAHsH]HKI1fp1fo ]fEI!Bo:ftfHUHMH}HEDMI!Ik8>tHL>L9u:HfUHtHfHHuLuHfo ԫfoUt>HMAf!HUHMH}~ftf…uIIHfoE71H}LIŲLHX[A\A]A^A_]UHAWAVSPHH_HHHH H1H!ofvfdfu%ALH!ofvfdfItHH!Hyu<u=HG1Ҁ<H)Q@HHW@4HxH!׃H@4:H[A^A_]IILHLLLIUHAWAVAUATSH(HdH%(HEHEHHEHGHEG$ELwHwH}HMHLH}t(tFH}HUHLVH}}H5pdH%(H;EH([A\A]A^A_]HELhE1fDIL;}tHEB<8xIk8AAAItIEtMEdH=CLLHi-8ߝHH1HMHHIHHHH H1H!o0fvfdfu3Afffff.LH!o0fvfdfItLHH!π8HwH!΃Hƈ1Hk8J)HSIT oAD f HCHKHuHHK0HH0oC @ K HC0H@H{Hu:HC tH{0nHHHC7H{n}@upUHH@dH%(HEHGHrHOHHHHH9v!dH %(H;MuNH4EH@]GW)E)E)EHEH5HU耓dH%(H;EuH@]oUHAWAVAUATSPHILvMfIIk8I<HHkmLLxL;ILcLHLkLHHLH)HHGIL9AA DHuLLMLkIVLpC7ILkDH[A\A]A^A_]UHAWAVAUATSPLMHILfIIE1Wffff.IH8M;nIB<(xLL1Hk8I HSIT A  HCHKHuQHK0HH0C @ K HC0H@H{Hu1HC t6H{0!l(ffffff.HHHC7@u藅W;H{kW*H[A\A]A^A_]UHSPHFHEFHE^H=HHHHi-8ߝHH1H[]UHSPHBHFWHBHBHuEHB0HF0J N B HB0HFHzHu%HB t-Hz0HkHFHB7@u H胄HuH[]HzH[]jUHAWAVAUATSHfH}HGH9`LIIIHvMhI9LL)HH@s LLHH‰ЃHEH)HHJ (LfHnfHnfpDIp8fffff.foffofoFoVflfoo^flfH@HuffpffH~ƐHHpHHL9uHtvLeLLEHEH0HPA$tMd$ILDjM9HEt9L`LLH"jIIuIULjMeIM9uHEH[A\A]A^A_]UHAWAVAUATSHH(D/HHHHAHEtLw AAAHL)H9s'LHH)HH)H9tHOLgLHOHMH}HHH9w2HH9HGƻHrHHHHHHDHHIHhIMtLHuLkAt H}hH}LgHHLwLHGIEu 6Hw0H[A\A]A^A_]UH}w H 7]@UHl8]ffffff.UHAWAVSPIAljwHHlLDHHhLH[A^A_]fDUHAWAVSPHlD0DSADwIHkHDLHgHH[A^A_]UH1GGGHGHHG]fDUHAVSH0HdH%(HEWHGGfGGHG G(G,HLJG-G8HLJHuHt Hj1HELuL1 HuHL!dH%(H;Eu H0[A^]g@UHAWAVAUATSHIIHdH%(HEHoUHt6HHHHH=FUHOHtM'Mt3MwILXIs4C6LMu_kW)HDžucjLHHMIILDILdIHILLLLL-hCD5LXt H{dHHC(H{IEHC(AECWCPHC`ChHCpH+THCxǃ))LsHCIDHHEKHs HHHCPHt6HDžxHs0HSHHxH30&Hx){ƅHDžhL%Ml$@L8HH HAHIHH@HHLxIMthIC6IIMdH1zkHtVIH]IC 6ML}MdH%(H;E"WHCW)EHEDxHMIILDILVIHCIL+LsLLL7ZC4LHHMIILDILIUVILL}ILeLuLHLYC7H5|sH}ZHHHKWH@Et H}VdH%(H;EuHH([A\A]A^A_]XH}fUHSH8HdH%(HEH}HHt/HHEHEHEHEHEHEH@HudHCpdH %(H;MuH8[]WUHAVSHHXHtH{PHtLs`MtLL!ULspMtLLULsxMt L;5DtAt I~TLTLMtL;5DtIvLLTLMtL;5DtIvLeLTuC0uu%[A^]HhTC0tH{@YTtH{[A^]GTHG`Ht UH@]øUHAVSH0dH%(HEH`tOIH}HHt/IHEHEHEHEHEHEH>HubIFpHtXdH%(H;Eu H0[A^]Ufffff.UHH`Ht]]fUHAWAVAUATSHHdH%(HEHc_HIIW)E)EHEH<RHE]HHRHE]HuLI_IwL) IWAGH}EIL}2fDIIGH}EAF IHcH I9AUAf.AǾD9|H II @IMHtI΋IA9|~INHuMfLu(QIDx@ WHEIFM4$L}IHLH7II4$+HHtHH@HufHH[H9t[HEHtQH}HEHtQdH%(H;EuHH[A\A]A^A_]Sffffff.UHAVSH0dH%(HEH`t_HH}HHt=HHEHEHEHEHEHEH;HEIH#`LH{pHtmdH %(H;Mu H0[A^]RUHSH8HdH%(HEH}HHt/HHEHEHEHEHEHEH>Hu_HdH %(H;MuH8[]eRDUHSH8HdH%(HEH}HHt/HHEHEHEHEHEHEHP>Hu_HdH %(H;MuH8[]QDUHAIHHHƺ1]UHAWAVAUATSHHIdH%(HEЃAƅHDžhL-I]@H8HL5IFINHH@HHL`I<,H_JDŽ%PBDŽ%XLILH8HL_L-]ILW Dž0H5FHKH5iH7HǾ JVH5$HH5${ HIOx9Hq@HEqHEQHu Hh>L8IINHH@HLt H ML^IHL^L^E1DLE9Oh}+E1dH%(H;EDHH[A\A]A^A_]AHCADDHHDELL1PA}1HHND4$E1L1EMZAuUgEILOHH1LPHHND4$LL1EMtKMHHtIUIUHH+HLMtAMLlLKME1ANHfffff.L9t5K7K|uKtuHPIuML.NE1rAMfLNYMUHAIHHH11]UHSHAIHdH%(HEHEHMHH޺t HMH H)KdH %(H;MuH[]kMff.UHSHAIHdH%(HEHEHMHH1ht HMH H)KdH %(H;MuH[]Lfffff.UHAWAVAUATSHHIIHdH%(HEHE11`NIFH~WIH*ffffff.D9DO DBHH9s9\uHH9sAvE1AAHSHCHHDHHEKHD$$E1HLL1E14W)HDžHHLLEAńtOHtHCHH)AAHHEHLEHZt H IdH%(H;EuDHH[A\A]A^A_]'KHOH~HUHHH1!f9ƉO BHH9s:\uH¾H9s2]1fff.UHAWAVAUATSHIdH%(HEЃA{SƅHDžhL-I]@H8LL=IGIOHH@HHLpI<.HLLhXJDŽ5PBDŽ5XLILH8LXL-ZILW Dž0H5ELHH5bH4HǾCGOH5!HH5{ LHhHIx9Hq@HEqHEQHu H^7H8IIOHH@HLt H FHWILLWHxH9wH~HH)A{]ƅHDžhHL{@L8LL5IFI~HH@HHL`J%HpLHxHhVJDŽ%PBDŽ%XHHL8LLVHYHHW Dž0H5ELGH5`H3HǾIFMH5 HH5ΘH%LH5s HHHx ML-oFHLH50HHHhLHLH5r H}HpHqHLH5sHVu H4L8IINHH@HHt H $DH8UILL9ULAUE1dH%(H;E( DHĘ[A\A]A^A_]HX])LILLI)ILE1LMDW)AChH9MHxI{`H]t AuAEDAEDAC0tIK8Ht#E1HI) IC@~H`ICHHECLxAAE"slOP1HH@ƅAtAAEtsHX IC1LhA{HH`H(H`HLDEPEA AEECD8*HHH9rE1HXMMLHHHHp1DhMjAToH?9LHHHXHHpAIjATroHt1MHPHHpLHM EHPMHp3L`LLLhICHHAu>LxADGAPE11AHH@A>ESLxADGAPE11AEHH@AKlƅHXt+AHwAH AAPt'H;@AtAƅPMMLHLHHHpDhjARmHtJMH`xHpH`AAE1HMLx7AHHxH5rEH.HH5h|IM`qH*GH5*jFHH5EHIM`qHFHH5iFH5 HIM`q HFH.M0(ALxE1䀽}MA}H5vEH-HH5g|IM`qH.FL%.iFHLH5DHIM`qHEHLH5 HIM`q HEH-M1H`E1,LxHpMMA1H`HpMLxDhAH)EAAII{`HHHHƹLAU"HEHHxH5EHHߺ+HH5csIHH@H9P I{`HHubDLAUHufEfHHxUH5_EHHߺ*HH5jDLAUfHILH`~ HtI)IAAA9IcHcD)Hʃt'HHLH1WHHH9uHH)H)HH)HJ HpWAAAAAAAHHu^ERHHxAH5KEHHߺ)HH5uH*AEMMH5EH)HH5}c|LXAt$HBH5eFHjH5 AHVAt$HiBHH5aeF5H5Z H!At$ H4BH*MM1H`AE1HpLxDhH5EHHߺ(HH5;fffff.UHAWAVAUATSHHHdH%(HELzM5EIHHL"MHFHD"ff.LJIM9A$<\uIM9A$XЃ wGD9HHH\HtIMHHHAEHDtMu<\L\qAAAHHHL)H9s"HH)LLLME1PS>HJHtIUHJ<2HH;IAEu C6AEMuHB0DHxƅHDžhHLs@L8LL=yIGIOHH@HHLhJ<-HLHJDŽ-PBDŽ-XHHL8LnHL5ILW Dž0H5:EHHH5QHHǾ>H5HH5EHHHL(L;HLHu H'H8IIOHH@HLt H X6HlGIHHxƅHDžhL5Mn@L8HL=IGIOHH@HHL`I<,HFJDŽ%PBDŽ%XLILL8HFL57ILW Dž0H5bEL%H5OHHǾ$=H5HH5LHlj<H5_F HHHL(L9HLHu H&%H8IIOHH@HLt H i4H}EILL~EHE1dH %(H;MuHH[A\A]A^A_]N6fffff.UHAWAVAUATSHHH`IdH%(HEH1Hp17HCH~WH HE1fD9DO DBHH9s9\uHH9s1AHXA HXAAINM~HHLDIE^W)HDžH;L1HpE1LxHhLP.HHIE1AWj9HII9AIFHDHHIENHLH)HH`HE1LAUH7LM9HHHHHDtHfM)HH)L9s+LH)HHHIE1AWAT)8HNLtLI<LLO5Lu HALxHhHPH9HH`xuoHL)HHMLdtPDžHL藕|Džƒ fI9HHHHHDtHH9H9HtHAHu:6HHHHHDtHH^LcHH)L9s(LH)HHHIE1AWAT86HPtLLtLI<LL\3Lu HALxMHhI9]ffff.H`HHXHELLHpHpMI96 Mpt=I9L)IHHHHHEtSHQ1Hpt H.HpdH %(H;MHĘ[A\A]A^A_]ÉHH)L9s(LH)HHHIE1ASAW4HMt LLI<LL1Lu HALxHH()INHA)AIF/DUHAWAVAUATSH(IHHIdH%(HEHE11Y1HCH~MH H#D9DO DBHH9s9\uHH9sAv E1qAAIMD$$E1LHL1E1btNLHHMIILDIL*IHEIL}LmL}LHxL-C,At I)HEIGEALpAxHDMt?E~:IF1fDHH9t$AHtIV4 ~@w@@4 W)EHE)EHEMD)~/Ix`HuHU0gtHuLpHuLdAtQIGHtOL"Et H})Et H}(dH%(H;EupHh[A\A]A^A_]HuAu AI IFMvAAu AI IGMA1Ew{H}Z*DUHSPHNH~RH6H@D9ANHH9s7>\uHH9tJDA\tAA rH5zH׺=4Oh9~-H5x1H׉1%H5{HH׺-u1ۉH[]ff.UHH]@HtUHHHHHVu]fUHHtH:Hr]fff.UHHHuH]ffff.UHHHuH]ffff.UHHHuH]ffff.UHAWAVAUATSHXdH%(HEHIIMHUW)E)EEH8HL=LPGDX 1-J (MI)Ir190u,y0u&MJ4(Hy0uIHIwAHII w,HHL}!LL7-uE-BD%MHE*IHuLUy7ML9uuA<$t 1dH %(H;Mu"HX[A\A]A^A_]HHUHtH 'UHAWAVAUATSHXdH%(HEHIIMHUW)E)EEH6HL=NGDX 1-J (MI)Ir190u,y0u&MJ4(Hy0uIHIwAHII w,HHL}!LLP6-uE-BD%MA?-t5HE)IHuLU36ML9uuA<$t 1dH %(H;Mu"HX[A\A]A^A_]HHUHtH k&ff.UHSHHdH%(HEHEHU&tHMHH9u Htf 1dH %(H;MuH[]%fff.UHSHHdH%(HEHEHUtHMHw Htf 1dH %(H;MuH[]%fff.UHSHHdH%(HEHEHUFtHMHcH9u Ht 1dH %(H;MuH[]%fffff.UHSHHdH%(HEHEHU&tHMHH u Ht 1dH %(H;MuH[]$ffff.UHAWAVAUATSHXdH%(HEHIIMHUW)E)EEH3HL=KGDX 1-J (MI)Ir190u,y0u&MJ4(Hy0uIHIwAHII w,HHL}!LL03-uE-BD%MHE&IHuLU)3ML9uuA<$t 1dH %(H;Mu"HX[A\A]A^A_]HHUHtH Q#UHAWAVAUATSHXdH%(HEHIIMHUW)E)EEHT2HL=JGDX 1-J (MI)Ir190u,y0u&MJ4(Hy0uIHIwAHII w,HHL}!LL1-uE-BD%MA?-t5HE+%IHuLU1ML9uuA<$t 1dH %(H;Mu"HX[A\A]A^A_]HHUHtH !ff.UHH1]@UHAWAVAUATSHdH%(HEHAIIHWE)E)E)E)E)p)`)P)@)0) ))H0HDX uvM1-I I)IrF90uAy0u;1Ҁ-HLIffffff.y0uIHIwAIIIv4L%bHGgMff.I$DX oIIuE1)HIHLL/-uƅ-BƄ-LMHDž'#IHLEt /Z/ML9uA}t#1dH %(H;Mu5H[A\A]A^A_]ðHHtEt ZUHHѺ]fffff.UHSHHdH%(HEHEHU qtHMHH9u Htf 1dH %(H;MuH[]GUHSHHdH%(HEHEHUtHMHH9u Htf 1dH %(H;MuH[]UHSHHdH%(HEHEHUtHMHH9u Htf 1dH %(H;MuH[]gUHAVSHIdH%(HEHEHU11 tHEHH9u MtfA1dH%(H;Eu H[A^]UHSHHdH%(HEHEHU tHMHw Htf 1dH %(H;MuH[]UHSHHdH%(HEHEHUtHMHw Htf 1dH %(H;MuH[]UHSHHdH%(HEHEHU!tHMHw Htf 1dH %(H;MuH[]UHAVSHIdH%(HEHEHU11tHEH=w MtfA1dH%(H;Eu H[A^]2fUHSHHdH%(HEHEHU tHMHcH9u Ht 1dH %(H;MuH[]fUHSHHdH%(HEHEHUtHMHcH9u Ht 1dH %(H;MuH[]YfUHSHHdH%(HEHEHUtHMHcH9u Ht 1dH %(H;MuH[]fUHAVSHIdH%(HEHEHU11tHEHcH9u MtA1dH%(H;Eu H[A^]sUHSHHdH%(HEHEHU tHMHH u Ht 1dH %(H;MuH[]UHSHHdH%(HEHEHUtHMHH u Ht 1dH %(H;MuH[]UHSHHdH%(HEHEHUtHMHH u Ht 1dH %(H;MuH[](UHAVSHIdH%(HEHEHU110tHEHH u MtA1dH%(H;Eu H[A^]fUH ]UH]UH]qUH1]d@UH ]UH]UH]UH1]t@UH ]UH]qUH]aUH1]T@UH ]UH]UH]qUH1]d@UHRWH@HU3HHW@HH>HHW@HH']ffff.UHAWAVAUATSHHdH%(HEHH@L(I}XHVUUUUUUUIm HH?HԮIEpHA}<ƅHDžhH;Lc@L8LH *HAHIHH@HHLxIH([A\A]A^A_]H}dH%(H;EuH([A\A]A^A_]H}@@;UH7GHOHG G(@ DȉO,]f.UHAWAVSPLw H L`Mt0LMvHG?uH_ HtItH{m H[A^A_]HHtPH@8uCUHAVSHX Ht/H@ HIIFH{HsgH L[A^]UHAWAVSPIH1A<I~ Hs,I~ Gt7uoHGDxLb(x IƋHǾvE~=HGDxAGw,A DtL(- IƋA<w LCAFHC IFLs H[A^A_]fDUHAWAVAUATSPH_ HLsMHADAAEFfD3CAH}AAԀu#AvAAFIF L4s<u L"I~(E1fHC B4LIHcCI9|CH{ Hto HC EHExDkfDcLp H»1H[A\A]A^A_]ÐUH]@UHH~RE1AH)f.ABA)A9AOH@H EOE~EAKRDA9H9P|]HcH@H1H9HB]ffff.UHO? t t9*@t&@u+u)u]]É+u u]]ffffff.UHH HcHH4RHH5L11+ffff.EAD)A9DOO@NAOх~%LAAOIGA9NA9@|MI9s`ED)|VK4@L? tt1&@t"@uu%u]]u u]]fDUHAWAVAUATSPAHD'AtDD9AA u(IHǾD萳2HDDu+(bINjHǾ`EwHLaH[A\A]A^A_]ÿ(&IA?HǾD(IHMg AE+I DDDAD9qA u׋D!ufffff.UHAWAVATSHD7EA(~IAAHDDuHLz[A\A^A_]ffffff.UHAWAVSPI('IAHlj'LL,H[A^A_]ffffff.UHAWAVSPI(IAHljԱLLH[A^A_]fff.UHAWAVATSHD7Au*(IHǾD|HL8EA D;(HIHǾDEHLJD3[A\A^A_]fff.UHAWAVATSHD?EA(IAuL DPA?LDɰ(IH4M~ L1 QI~ S, @HL[A\A^A_]f.UHAWAVATSHH Ht DArHCHC@1AA@DDA9uOA9AAwGA9u(AIHDDׯfAFLc ID$IFID$A<$u5M|$ Mt+ID$ LHID$IIwLfA~rIFLHL L~AFLs [A\A^A_]ff.UHЀ> uNyN~]fUHEDAs1ʁfnfp%H1fofff.Ao0Ao\0fofffffoffofffffofH H9ufoffffffpfoffffffpUfoffffff~H9tff.A49LHH9u]ffff.UHAWAVAUATSHdH%(HEƅHDžhL%Ml$@L8LL5|IFINHH@HHHXH<+HLHDŽPDŽXHILL8LkL- ILW Dž0H5HH5HHǾ1 H5ӦHŕH50#H豕u HutIFHC AFA~~A fAFID$A<$u5M|$ Mt+ID$ L&ID$IIwĴLlfA~r4IF2HCHCC@1[A\A^A_]LMIFL L1AFHLfffff.UHSPHrH{ HGHC 衫H߾H[]^fffff.UHAVSH1H{ HGHC `H߾#HC HtHxtHCHCC@1[A^]HC H@8uHX HtH@ HIIFH{HstHLt%HufxrUHH@H19HE]1UHAWAVSPHtquBf{r;LsI>?tYnICfuQM~IFHIL*DsH7(MHHǾDJHH[A^A_]fCIvLfUHH?t0H t4u:H]fH@H8uHftfuHHH@ ]1]f.UHAWAVAUATSHxdH%(HEW)E)EE1ffffff.HH?u!A IcAH|ŰfrHH?׃tfukO)~99'HG Hc΋IH^IG A_AE7HG HtHH0HHG GEH{HHhHHHpHHHxH@HEIH;HH@HHHH.HHRfHtHHpHmHqIEAEII.NlfA}r Mu@MuI>?u诧IAEƅHDžHHHHpHH@HxHHLpHJ<3LLtJDŽ50BDŽ58HHHHLHLLWCXCHDžHH5HH5͉HǾNHH5謉 HH5s蘉AuHu HHhHH@HHLHUu$MfIFLLGLfAEIvLGH HcH4HcHEdH%(H;EuHx[A\A]A^A_]UHAWAVAUATSHdH%(HEHW)HDžHHHmHIHHHHPHHHXHHH`H@HhH«HH@HHHpHHHxH)HiЫAT$LMD$Ml$IL$I9IcD$L)Hiɫ9}iH@H4LHLL;PH I$AL$WAD$ AD$I0LLwfffff.A|$!LHE1E1DA9ID$IcHI;Us9McHfffff.HJIID$HHAHI;UrAL$܍Qs_W)IEHI}AuHH蛮IL$IcAHE}uIMIcAH E}@ƅHDžhHH8HXHH@H`HHHXHH<HHDŽPDŽXHpHHH8HHxHWHGXGHDž0H5 CHH5 /HǾBHH5HH5At$H u HtHPHH@HhHHt H HHTHpHH8IAD$M;l$TA9KMcMcDID$J IJ IIcD$I9|LHM("H6DHLt?I7H}<Ḿ|Eƒ΁ A WAFLm{ubL}1HIIHELmHELHLHII)OH}L55HMIHMHQIII)HUHULmH)MABeHEMteHE8^uAAHHEILmDHEIu=L=H^HMH9H5AnIyAf ugE1UH8HcH~eL=P fI0HtQM'MtLL9u1L9uMtHuI9tLLuUH}LM1A(EAFdH %(H;MuLHX[A\A]A^A_]H}LtA(EAFHcII)WaUHAWAVSPHIIH>HFHHB$<t.I6Le:A| Aƒ u WCH[A^A_]HcII)NfUHAWAVATSHHdH%(HEL7HOEL}ffffff.IHt]IIBLc;t7LL9Mԁ!ƒ tHcILH)хy  WCMdH %(H;Mu H[A\A^A_]@UHAWAVAUATSH8AIHdH%(HEЃu}A~3E1f.IGB4BTHDIIcGI9|A(E1fff.IG B4BTHDHIIcG(I9|AuXAG/IOE11!f.BtIHcI9B9sHDIOAGW)E)EHEH}A~?\u,H,HLLH[A^A_]AA@VMHHB7t.H3L<6A| Aƒ uA WAG1H[A^A_]HcH H)Kfff.UHAWAVAUATSH(dH%(HELGMtL/A}\uIu9WB1dH %(H;MBH([A\A]A^A_]AHHuEEIEHIILGIIBH6t+I6H}"5LceID u WC]LcI>LI>MVMM)MFHE7LDD}AL$ЃHLlIcLM9ȀM9tRDDMDʀ0u?A ɃLOLLHHVt%E Dʀ0uA ɃHH>ILFU9w  xANLHMAT$Ѓ r8HB`%u)D!C 6)M9u3HL)LkHC  H}H}{H}LH1҅E1Uċ}̍Gƃw߃AD ADE9LVH~KH}HUU1yQLI~H}H)-}̍GƃHELw߃MȍQƃw߃I} A}}HELD UHAWAVAUATSPLIIIIHHEWtlIFHrI9-uy]u AAGKHIHIFIWLLLIt#AOA;}IHMH)HKHC1H[A\A]A^A_]fDUHAWAVAUATSHIdH%(HE)EH~'IH8[H`H(Iƺ?H]#HǾw(rHHI^ IIGHQIIIMgU^uAHIHIGHU# IătH߾ ƜMgMxHHEHe8IEHhLuLmHXM/AE<]‰@@d<-tyI<[uA}:uI| IfI<\H]u[AE puN3IV LLmL:Xu,H]CuIA}]u M'LmM'IGHEHLmHXA<$\H;Iffffff.H0ItXL3MtLwHuM9t AfA;$uIM'HEHIGLuI~ SHEHLmeLuLmH]HEHLHUHMMIGHrI9-uy]u,fUUI~  Mg1M EHIHIGHLHUHMM+uU9}HM MIN HpK ,HfJ+HHH9A|:uA|]u݃}bL5:HEfDI0HBHEI>HtHxHxHx1H9HEuHtL9tLHHEuIM/I)MgAVHpLMLuLmAEWAEAE(EAELHy1dH %(H;MHĈ[A\A]A^A_]IIM/Mg}^u I~ ŠH`L0IIEIIBL5.tJH}Lu,M|$E*HELhHXH}Iƒ u7HE WHhAEIL)MeIEHUIHJHBUHAWAVAUATSHHdH%(HE)D/AHLIH8(x?LxLILIHA?Px<HH>HIHHHCIOHHHrHHIHCHH[LHHHH)HHƅHDžhH}Lk@L8LL=}IGIOHH@HHLpI<.HLJDŽ5PBDŽ5XLHHL8LyH}HHW Dž0H5LYH5߱HXHǾH5iHXH5o&LXu HJL8IIOHH@HHt H LILLLHH@1dH %(H;M|HH[A\A]A^A_]DžMDžE1fDH_IIBL*3HLP(ƒ΁ HcILI)L=T~<HH_HȖHcHuA@-HHE1Mu'A@u*A?.uAA?A? AA?AM WCMHsH5HC HKHA)tB:uKH(:IċHǾ8mAD$HL4HHEtHM9{1HH%q~Ѓ H@@0@@uqЃ 1E1f|1DGA AGF\tPEMH`H]HH0ZuLpL谯E1E1L FL%(H(fffff.I0HtM,$MtL6HuM9t AEfA;uIL`HHHh( HA?HǾDb(IHjL{ AT$pLLLpLHΪE1E1L ?LpLL說E1E1"(uIHǾDrbLpLLpH`HhHHhE1E1H]HH`HHhIH]LpIt MmH`?\uEu H`EILCDZH`H}2u; &HcH`H)hdL}LhIOjH1HHU,uWH1HHUHH)HHHMH9:}t2HH\7HMH €uXHUHHDH\:}u;LHHUHHMHEH`HEHhquEL`1vLLpL{藬H`HhE1E1MIH]L`HhHtA>?uIL`HHh1ML@M)LH\DHpDIH@ԳyHH`ILhLHL}LpEH`HHCtbH`LuH t7HcH`H)hx@LYHhHuHp聹 IFMH0 IDW1H8HpLH]Htt H{HdH%(H;EH8H[A\A]A^A_]IFMH0 HMHDyL)MH0 HEIELhHHWIFMH0 HD4Ld-M)HH0 HEHELhLpQUHAWAVAUATSHAdH%(HEAGЃ AGw ADAGw AǩDƅHDžhL-TjI]@H8HL5CjIFINHH@HHL`I<,H]JDŽ%PBDŽ%XLILH8HL-L-iILW Dž0H5HEH5̱HEHǾ軹H5VHEH5yHsEHD船u HL8IINHH@HLt H 6LNIHLOLW1dH %(H;MuH[A\A]A^A_]ffffff.HGHt7UHHDBA wHA0DuQЃ s1]1HH1DDAEHA w"ՍAPHHGHHrω]UHSPHH@ HHH[HtHcHH[]H[]UHAVSHHHHcLsMtLLʯH[A^]龯UH]UH]UH]UHAWAVAUATSHdH%(HEHGHjHx(_HƅHDžhL5fMn@L8HL=fIGIOHH@HHL`I<,HܿJDŽ%PBDŽ%XLILL8H诿L5PfILW Dž0H5#L>BH5ɱH*BHǾ=H5SH BH5rLAu H耞L8IIOHH@HLt H íH׾ILLؾLL{IW(HE1f.L{IW(HIOIG HHHHH4H|HtAL{IOIG IWIw(HH)HHH9IDHNIO(HH)HH{HzIGhdH%(H;EuH[A\A]A^A_]UHAWAVSPHLwLHG(LL)HHr+I>般LsL{ILsLL)HHwHt Hu@HC M9t7I>8IM9uHCHKH9tH)HHHHCH;HHt [A^A_][A^A_]UHAWAVAUATSH(HUUUUUUUL/LwMM)LHHILMgI9LGM)ILK L9LGHI9LCMt;HuHULMH}I9TLHH<@*H}LMHUHu1K HLO$dIIL2LtWD DL<I0M)M9L}LEIHUUUUUUUUHH[HHH1AL  DIT HTIT HTIT HT AD ID AT (T(J)H0H0L9uI0M9tI}HtI}ELM7LEL}LLLgMtLLH([A\A]A^A_],1+UHAWAVAUATSH(AIIdH%(HEKM\MnIEIMHH)HH1H9HEIM(IM H9uLIEIM IM(HHHЃL<DDd HDIE(E14ffff.HHD!@IFHH(HPH@ HHHHH ʃHL,DLAt\AMtIEA9S KLCILLPAfA}jH{H]֪SAFHAN(ƅS ILLHPCCHCEMEGAHCHCE1-fIEA96E uAIHDH;>MfIL$IT$HH)HHH9IcL,Ћ[HEID$(ID$ H9uLIL$ID$ ID$(HHH уL,D\ HDID$(/AFS ILLP(AI^HCHK(HSHH)HHH9IDHQHS(HK H)HHrHxHCIFHH(HHPH@ HHHHHƒH HL HHcPH vHC4ILP HKHcSCYALèHCDKzƅHDžhH]Lk@L8HL5]IFINHH@HHLxIHHHH?HHHIIHMI)M)tLLIL~LLsML{KHCLsHHsMI)HSHH)I9s(L99耣HEHuHpHuHH9ALEHCHELH=.N$L1HEJ (HMHMJ IHMHMM9u+MHMlIIIHUL)IHUHMLmHULH)HHMI9ALELH=LHHEJ<臢LMJJ LHL)H4M)IIHLGHEHuHLcL{I$Mt$LsL;{KH LH)HULH)HHMI9ALELH=LHHEJ<šLMJJ LHL)H4M)IIHZHEHII9AMEI9OJ<UIJIO,tLeLmHEHELNHKLsMHEIEIEHEI9t6L}f.ILLL;suL}HKoEHUfInfHnflIHUH;L;H}LuCHMHCHSHEL9tI)IIILuH蛠JtB|t*@uDDAsA DLAV|H[A\A]A^A_]DUHAWAVAUATSH(AdH%(HEIcH H DHBAI LUH_PLwXM#LHHUUUUUUUUH!LH)H33333333II!HH!LHHHHH!ILI8Iw M^M!,MM9r$LL H t L1II D1AAHJH|L MttMnfM9T$M$$MtQID$L9tAw L!7L9r+HL H t 1IH1Afffff.L9t@AkAHuHuH)`HMLEH*HDDqdH %(H;MuH([A\A]A^A_]AD$EDUHHcHw0LGX1ɀ|MQDLDTHA I KQH LHHUUUUUUUUH!LH)H33333333HH!HH!HHHHHH!ILI8IwHLH!*HL9r"HL H t H1IH 1AHGPHH|H8HttMPff.H9OtaH?HtUHGH9tAwL!H9t%;EuI%;EAu9EAā}ujA?@?uE}?tNAt-A HDDbADD9pEAā}iAT$HD+DdH%(H;EHH8[A\A]A^A_]AH߾mAAAAJ<H{0DE1.I!M uDuEEH}HuBAH}Hu2{(EzUuH1E1A]E~EAH1AIM92BT%BL%8@M9@@uH߉1AE1LH C|t:t>HS0|t*@uDDAsA DLC|{ sx{$HDADcxE~mABT5Bt5H1E1EtNI1AIIv6BL5BT58@I@@Hu1A1{  sx{$uAH߉.tH߾?xUHH{0sxDA,EDcxAH߾x)HH{0sxDAk,DE1Dcx|1Cxr|ff.UHAWAVAUATSPA9AAHx>AAALAEJ<H{0AE1+I!M E1LH C|tAtEHS0|t*@uDDAsA DLC|{ uOsxtD{$uHDA.H߾xHH{0sxDA+E1DsxH[A\A]A^A_]UHAWAVATSH@HdH%(HEW)E)E)EL=OE1H߉CxII I IcGAHxDDEwAAH1BDAAt{ usxt{$tAH߾x HH{0sxDA**Df1_dH%(H;Eu H@[A\A^A_]zf.UHAWAVAUATSH(HdH%(HEƅHDžhH/Lk@L8LL=/IGIOHH@HHLpI<.HL݈JDŽ5PBDŽ5XLHHL8L贈HU/HHW Dž0H5r!LC H5 H/ HǾ,BH5H H5mL u HgL8IIOHH@HHt H vLILLLH@ dH%(H;Eu1H([A\A]A^A_]xfDUHG 1]@UH t1]ffffff.UHAWAVAUATSH(AAdH%(HEЉuȋG$tMIAJ<I~0ADDE1p'H!L H}IALLmLAJ<I}0ADDE1'H!L jE11pEH}HuADeLuL7xjAJ<I~0DD1E1&H!L A|B1II!dH%(H;EL H([A\A]A^A_]11A|DHEAQfAAJ<HEHx0DD1E1*&I!M LuLHLqHIL9m`Fd-LNyE1pvUHAWAVAUATSHHdH%(HEЀ t E11IȃH IHcHFVLgH^{D3L{L{IHcCHHCI9A7Et A| A[|ӃB} AWy~AWA{t1[aD A|$$Lt1A|$(L3L11bID$xA|$(ovLjA$]LILHL:LIII APDʻLH(MDLH H HMH9LM$MI tEEHHx{HHHx0HDp#EtaEtfHH@0fDlEtADAuAAsAD  ,E11HMD|HL11}HrLLHHƺ_HH5k3HyL`AD$ IV@LHvIV@LHYLGL :FHF VLH>HII yLAHQ B4QIHII DL HH+HII IHcQI9|LIADAfKLHMMIL9uSVIxILHefffff.UH]&UHSPHHHHdH[HtHCHH[]bH[]UHH]UHH]UHH]UHAWAVAUATSHdH%(HEHGHHx(HƅHDžhL5Mn@L8HL=IGIOHH@HHL`I<,HrJDŽ%PBDŽ%XLILL8HrL5@ILW Dž0H5ΐ#L.H5{HHǾ-iH5HH5zrLu HpQL8IIOHH@HLt H `HqILLqLqLcID$(HIE1!ffffff.LcID$(HMD$It$ H 0HHIHIHkUH)H IHH|(Ht`LcMD$It$ ID$IL$(HL)HHkUHL9IDHyI|$(HH)HHbHx_ID$NdH%(H;EuH[A\A]A^A_]aUHAWAVSPHLwLHG(LL)HHr+I>H_LsL{ILsLL)HHwHt HuU*HC M9t7I>^IM9uHCHKH9tH)HHHHCH;HHt [A^A_]^[A^A_]UHAWAVAUATSPLLoMLMtkLHHUUUUUUUUH!LH)H33333333HH!HH!HHHHHH!ILI8IwMeM!:IHE1MM9r$LL H t L1II D1AAHJHttL0MtlIuM9NM6MtQIFL9tAwH!L9t8DL9rHL H t1IHL9t 1AL9tIH˿ =]IHHEIFHHIFAFIGHxH*HHH H*XLHDH MxI*AO uH*XAO t Y.JmAIrIME1IAI ^fH,HH?\MH,H!H I9IGAHtHGHuI eIMoHMM9vLLeHMIGHxWH*1HHH WH*XA^G ieH,H\H,H?H!H ILHHUUUUUUUUH!LH)H33333333HHH!H!HHHHHH!HHH8wHGHȃ?ٸHHHBdHMI9LFM9MoMeMuI!-L9sI#HL H t H1II 1AAIJHtHIVIGIWIMwJIHt>H@IMIuH!"L9rHL H t 1IH1AHIL0IGLH[A\A]A^A_]UHAWAVAUATSH(IIIdH%(HEMMnIuIEHH)HHkUH1H9HEIM(IM H9uLwIuIM IM(HHHHHHkUH)H IHL<IDLd WDHD(IE(4ff.HHL!@IvHF(HN HHHIHFHHHkUH)H IHHL,DdAt`AMtIEA9HS HKLC(ILLEPIfA}H{(H}[sAFHANHƅHS ILLHPHC.CHC(EeEAAHCHC(E1'IEA92E uAL$HDH;sMnIuIMHH)HHkUHH9IcLHHHH?HHHIIHMI)M)tLLILpWLLsML{KHCLsHHsMI)HSHH)I9s(L99rSHEHuHbHuHH9ALEHCHELH=0N$L#SHEJ (HMHMJ IHMHMRM9u+MHMlIIIHUL)IHUHMLmHULH)HHMI9ALELH=LHHEJ<yRLMJJ LHL)H4M)IIHN9RHEHuHLcL{I$Mt$LsL;{MH LH)HULH)HHMI9ALELH=LHHEJ<QLMJJ LHL)H4M)IIH\HEHII9AMEI9QJ<GQIJIO,tLeLmHEHEL@QHKLsMHEIEIEHEI9t8L}fff.ILL豳L;suL}HKoEHUfInfHnflIHUH;L;H}LuCHMHCHSHEL9tI)IIILuHPJA9 DH9V@@ƿ@@ƍz@ @ǀ_@@@@ƿ@@ƍx@ @<_@@0ЍQDщuHHtgHH9lr$1<@ƍB < B_D Ή)€߀¿qB΍PЀ B<_D ]UHAWAVAUATSH5 HLw`HGhI9sA6AVIMH{XLM)MIMELH=HL)IIM9MFHH9LCMLMt#uЉUI9w{J<N@IċŰuE1KHEC4CTOLA McIF B4LA)|HcLIĉL?ADAt4tuIFHt LA@IFHtLÀIFHtLAtAF8HMЉH[A\A]A^A_]fffff.UHAWAVAUATSHIdH%(HEH}W)p)E)E)E)E)E)`HHxXHEǀEA~E1L`1HH1HPLX&LXL`LIIcFI9I^HBuFtELFdDDvAarGAzwAB|t9AbaDBAzzDCE9wAAH`DD)BJLXINHBLʃL`310#BDH ʀu(L LBDHH0P 1ffffff.Aމ$<F< @_ȁADMA_D)1AAtHDD EQAA¿AAAA AA]AEED8tAωӄXLDHL1fff.Aމ$<F< @_ȁADMA_D)1AAtHDD EQAA¿AAAA AA]AEED8tAωӄXLDHHPIvxIH`L#H}Ht H}6H}Ht H}6H}Ht H}6dH%(H;EuHĘ[A\A]A^A_]8UHAWAVAUATSH8dH%(HEЀ9GW)E)EHEHLcM/N4L5HED}L5HED}W) HDž0Et!L5J H H(H0W)E)pHEL5HxD}LHHo5HED}W)P)@HDž`LC5HHDPH-5HXD`W)HDžHpH@HLEL H.Le1MLcmAEŅDpMcMÉH4HEHuILL78HJ4HHIǃ?H~1EHEHHAHJHH9JLpL@LL}Hffff.Ht94;w t;wtH H$LLMMA HHDpMcHW)HDžEtUJHn3IHN$LH1HL7LLLHW 1HE1)HDžHc_Ht:;H<2IHLLHHE1HMLEMIE1IH HH+HKcL'ADC4'HpHLEI HHHIM9uLHLIHL)HO WG$IG4HL)HH~|II$1,ffffff.HHL)HHHcH9}HA4AAAtH AA9sF9t(HcAHML1ILqHAHt H.HMLeI?LqL91LMLUL]HHMLqL91AvILqLu1HcƉ4IR AIBIRHctLEHEAIAHcHtH?H)III9LFHH9LCMHutDH}I9J<&IH}$MtJL1!O4LsiE1H<LKDHEJI1 Offfff.Lufff.M9sILLtff.M9LmsGM@IEIIMIfILLuILLtM9rI DIHMI9tIEHHEIEL9ur6H}LHAM}LHuH EE1}AH}LHHxgIEMHMLHutMILLt'IMf.IH;MsLLIOtHEII9s[LmILLu>fffff.IIMIIEfILLtILLuM9rIGHUH9tHH HMHLmEL}H #HcHH}LIEHUH IMH{M}LLAH}LEIM#HUHIEH ;M}MuMeLLLLIH}L I$HUH I $HLLAofpNALLAoEfpNAEIuIULHMIOHMI9IEH9р}oE1M)f.LmHEIEID$IH;E>LIHtI$HEMM@IEIEMtIuH}IIuL;mLHUHHIEHUH IMHLLAoEfpNAEMeL}IEIH;EtoLIHtIEHEMIFIFLLIuHEIFIEIEIMH}LtIHUH I}dH%(H;EuHh[A\A]A^A_]PUHAWAVAUATSH(IIdH%(HEHH)HAH!H *HcHHHHLA$IH IHI_HHILA$LAH}HA$E IHUHIH IGIIWI_ILHEHHUHMLHA$gHIMH IEHH]HA$CofpNHH]HA$ AoGfpNAGHuMoMwLLA$LmLLA$t~IIGIIOH]HIwIWHLMIGHUH IOHHLA$vAofpNAbH]AoGfpNAGLLA$tpAofpNA_IGIIOL}LHA$HIH IIGIIOH}LA$H]tAofpNAL}II9E1IHuLIGIIH9~LA$tDmLIHEMMf.IEIEMt#IuH}IIuff.LmHEIEDmAAtIH]kA IL;}AdH%(H;EuDH([A\A]A^A_]UHAWAVAUATSHMHMIHIHLAALHAELutItGI$II $Lm^LmtVHI $H I$HHuAt:HUHH H H(HIH LHALmtHI $H I$LLAtII$IMI $IELHAt,HI $H I$HHuAtHUHH H HH[A\A]A^A_]UHAWAVAUATSHhdH%(HEH9HIIHUHuH)HxHHHEHHMH}+HEHHHEH$HEIELmL}HUHBHHH9U|LL<H?I!MwJHIL;}HU}LcHLAUHUIEMEL}M$HLAUHUuI$HE)LMHHuHEMM?IHI$L9u+K6NtdILnL=#FL%L.fHH5w1Hq rIIcHIFI9tAMA9N(HL1LHH[A\A]A^A_]fDUHAWAVAUATSPHHtIHt Muf_ fXf*CWHCH5u$EHL1A~~iL=#FL%rE1LH51LIIcFI9}6IB H߃tԃuH5fDH5Z1LAV H5LGH1HH[A\A]A^A_]fDUHAWAVAUATSH8AHUHIdH%(HELcn(HcF,LHHHHIHEHcHLuD}HKAVI6D E1EAE9IcLFHEEAAuntiuXEAGDۃtBuJEAGAAAAu':AE11H}EADAL]t E EAAAEAD~AH}E~D1Ƀ|A)̋MȋEAADEDD D}ĉMHExIcHEuiE~dH}HEL4L}/ffffff.AEHL+ ILM9s#Hffff.;tHL9rE1HUH|ALuHEAHcHDUȋMteLZHUHH1ff.HH9t>Ic<3LBHE AAuA|McACHEdH %(H;MuH8[A\A]A^A_]fDB(B0~UHAWAVATSHIIE1{4t/IIcFI9}RIBttAAN LH=ыK C0PS0H{t9vHcHcHsHK[A\A^A_]UHAWAVAUATSHhIIdH%(HELOXAHHHHPHHPHH@HH_HHALoLA~4BDEiAC*fffff.tąttMFMtBA9F vt^UHAWAVAUATSPIIILnfAUA9W(NLLsIIcHIGI9uH[A\A]A^A_]UHAWAVAUATSHDdH%(HEB(B0>MIIL~Aȃ HHH`HPHhHPHpH|Hr@HHHxHoHHH@HAHLHLXfHHHpHH8HLHAIIcEHIEI9IcA9](~^HLpHAމփAr't xtuA$u`fA<$LAx4jAp A@0PAP0IxP9HLcIcIpBIpA+ƅHDžhHH8HhHH@HpHHL`HJ< HJDŽ%PBDŽ%XHxHHH8HLLWHGXGHDž0H5l]~HH5I~HǾ\HH5.(~HH5R~A4ރH!u HH`HH@HHLLXSH BA|ECAt9hAt9ZLƋiAHLH)dH%(H;EuHĈ[A\A]A^A_]cUHAWAVATSAIIH_Hu)LLD.IHu L[A\A^A_].fffff.UHAWAVAUATSH8HdH%(HEHvAIHP@PHBH.HYƅHDžhL%Ml$@L8LL5IFINHH@HHHXH<+HLHDŽPDŽXHILL8LL-yILW Dž0H5iHg{H50HS{HǾfH5@H2{H58TIcDxHHDH=MeHA$AD$(AD$0{~~E1A|$4t.IHcCI9}]HBttLK LLAL$ AD$0PAT$0I|$t9vHcIc$It$IL$A$뗋S AAE1A @DEDƃ AAEHE1RƅHDžhL%͝Ml$@L8LL5IFINHH@HHHXH<+HLHDŽPDŽXHILL8LL-KILW Dž0H5rgH9yH5H%yHǾ8H5HyH5u6&ƅHDžhL%Ml$@L8LL5IFINHH@HHHXH<+HLHDŽPDŽXHILL8LL- ILW Dž0H5GfHxH5HwHǾ H5HwH5*Hwu HPL8IINHH@HLt H LIHLL1EAAAAEOA AA_AEEALDD0DAAA !MeHLMmPDt{LAEAE(AE0A<$tFM|$ffff.AA9T$(NHLDIIc$HID$I9uLkHLcPLcHLkPLƅLHLLDLEHSHHsPHsHHSPA1Ƀ{HEрADHDэDHdH AuIIcLxHHcHDdH %(H;MuH8[A\A]A^A_]fDUHH7GHMu]fUHSPuHH?uH;u CH[]fUHSPH_HۅuH[]fUHAVSH~u(IH>I>AFHǃǃHǃǃHǃǃHǃǃHǃǃ Hǃ(ǃ0Hǃ8ǃ@HǃHǃPHHt@L0Hx4LMuHHHtfL7LMuHǃHH|ЃHs1NH1HHHHDHHDHHDHH9uHtHHHHuHǃHH[A^]fDH7HvcUHAWAVSPGHG B GHcBHHGHHIIHI@IFI7HHH[A^A_]dWGGHW @UHt]HHt]DUHAWAVAUATSH(dH%(HEЀt HG IHHHI>IvAVANHHƅHDžhL%Ml$@L8HL=IGIOHH@HHLpI<.HJDŽ5PBDŽ5XLILL8HL%=ILW Dž0H5d_L+qH5HqHǾ*H5HpH5#Lpu HmL8IIOHH@HLt H HILLLHHu&dH %(H;MuH([A\A]A^A_]ffffff.UH11E1]ffffff.UH11A]fff.UH1ҹE1]lfff.UH1ҹA]IfUH1E1],fff.UH1A] fUHE1]fUHA]fDUHV0N!DF"]DF0N!ɍN" HHRHt UHHHD]fDUHAWAVAUATSH(HdH%(HEHHNH9rHSHHsHH9HƅHDžhL5!Mn@L8HL=IGIOHH@HHL`I<,HH'JDŽ%PBDŽ%XLILL8HL5ILW Dž0H5[LmH5VHymHǾiH5f~HXmH5ڽLDmu HL8IIOHH@HLHt H HILL L(HC(dH %(H;MH([A\A]A^A_]À{"tH9tV@< uAIH9t<< t߀PЀ <_D4HA A1K L<IIHLDu!Hs8LLHLDtIHC(AGC0ƅHDžhL5Mn@L8HL=qIGIOHH@HHL`I<,HJDŽ%PBDŽ%XLILL8H^L5ILW Dž0H5&YLjH5HjHǾH5{HjH5Lju H/L8IIOHH@HLt H rHILLLC@1foffffff.UHAWAVAUATSPBAAHIIHH}zCAID$HH(H0I$It$H1A TLDIt$HE1L1DHHHQHr+Ht,I<$tA u H y rCAH}uDH[A\A]A^A_]UHAWAVAUATSHIHH}HUdH%(HEH{ t 1W)EEEIHshH}\LPAE)`W)E)EHEA)pHEEHEHEHE\EDmD}HE HEH`Hjt1LuHEHt*HuVE0HPHBHMHE1H}dH%(H;EHĈ[A\A]A^A_]ËEMɍM HHJHtH HDH`HЀ}t"A11HMHPHHiHEHMHWfffff.UHSH8HdH%(HEtAH]HHthHHEHEHEHEHEHEH7H]HHt/HHEHEHEHEHEHEHHuhEH]HHt/HHEHEHEHEHEHEHHupHdH %(H;MuH8[]vfDHtUHSPHHHH[]ff.UHAWAVAUATSH8IHHudH%(HE)EH}uA)E{CDD@t H}I;>u@t8HuHuI>I~H9t$1dH %(H;MHH8[A\A]A^A_]At AAAHMALMt'AuIHAAE2AAMDeItE1EDDAHDHEE1ɀ{AHLUAEHUHLAWARL}AWH 1A?HM}t HU1{uIv1I6H9HIIvHր{H}HDHDH)H9HqE1bfUHAWAVAUATSHhdH%(HE1ۀ BIHW)EHwhH]H"W) )P)@)0HDž`DžPHXhƅ@H L1ۄHHHHW)E)EHEE?)))DžH}HHHHHH)HH1H9HEHHH9u!H#HHHHH HЁHHHHILc`MIW)HDžEHtZAJH<HN,LHH1HLLLH1H1HI1HcLxrf.H}#Hz8LyHtH4|HIHc@HW)HDžEtEAJHMIHJHL1H+HE1LMnE1;ffffff.I HEHHuHP0AHHHHHH H4ׁH4HHHHHHrH8HHHH;9LxH&BL8xMHcAHH;MLc;HDžL{HHD3ILRLIMH}HLH BHH1Hi-8ߝHH1H/H1HHH/H1HHHHUUUUUUUUH!HH)HH33333333H!HH!HIIIHI!HLI8Iw6HwH!@fffff.HHAHHH9r H1HHHEHHLMLWff.M9aM Mt^IAH9tAwL!H9tEDH9rHH H t1HHH9t1H9tfffff.EEH}HHHMHHHH)HHH9HEHHH9u!HHHHHH HЁHHHHHuH}H_HMLE#@HH Au I@BL9xDHLx HMAH{ HH!AIDH]MtLHHtLLHDžLL)HHr8I>LLILLL)HHwHt HuHM9tDff.I>HIM9uHHH9tH)HHHHHHtH}Htffff.L7LMuH}HEHtH}adH%(H;EuHh[A\A]A^A_]HHJHHdZWLEHEP IRSE~{I0AuE1KDˁE1Nc4ILHI-I MNctLHLH-L LIL9uAtJc4ILHI-I ILL_MgHHH-H LHHUUUUUUUUH!LH)H33333333HH!HH!HHHHHH!HHH8Hw IsH!*HL9r"HL H t H1IH 1AHHHHHI{#ffffff.H!H9HHt~HPH9uCHPL9tnD9R uD9JuE~]HM0E1F$G;$uIM9u@fff.vL9rIHL H t H1I1ALq1[A\A^A_]UHAWAVAUATSPHUHILJ>HdZWHqy HWSLuE1QAAE1OcIMHI-I MOc\LILH-L LIM9u@tOcIMHI-I MLLHI-I LcMtlLHHUUUUUUUUH!LH)H33333333II!HH!LHHHHH!ILI8IwMl$M!4E1MM9r$LL H t L1II D1AAHJHL8MML$L!L9M?MIGL9uOIGH9w9x u9puԅeHHE1FF;uIL9u>fAvL9rHL H t 1IHo1AcFIHLpHEHIGHCHxH*HHH H*XLHDH MxI*K uH*XK t Y.K$AIrIL$E1IAI ^.H,HH?\fH,H!H I9IGAHt HGHuIM9v(HLW1ߺILcM9wHCHx WH*HHH WH*X^C 舺H,H\H,H?H!H ILHHUUUUUUUUH!LH)H33333333HHH!H!HHHHHH!HHH8wHGHȃ?ٸHHHBI9LFM9LcMl$MuM!.M9sM$LL H t L1II D1AAH JHtHIWHCHSIL{JIHt?H@IL$IuH!"L9rHL H t 1IH1AHHL8HCLH[A\A]A^A_]UHAWAVAUATSHILLH BHH1Hi-8ߝHH1H/H1HII/I1LLWMLUtkLHHUUUUUUUUH!LH)H33333333HH!HH!HHHHHH!HHH8HwMzM!'MIH1HEMM9r L1IIHJHL}yL0MtqMJM9FM6MtQIFL9twL!L9t9fDL9rHL H t1IHL9t 1AL9tMIH˿ ڭIHL`IIFAFLIEHxH*HHH H*XLmLHDH MMxI* H*XHUK t Y.JmAIrIME1IAI ^趶H,HH?\H,H!H I9IGAHt HGHuIM9v!HLSnILkM9wHCHxWH*1wHHH WH*X^C H,H\LH,H?H!H ILHHUUUUUUUUH!LH)H33333333HHH!H!HHHHHH!HHH8wHGHȃ?ٸHHHBrI9LFM9LkIUIuL!M9sLL1IH HHtHIVHCHsI6LsHIHt>H@IMIuH!"L9rHL H t 1IH1AHHL0HCLH[A\A]A^A_]UHAWAVAUATSHHHdH%(HEHG H=HHC LcL{I$Mt$LsL;{IH LH)>HHHH?HHHIIHMI)M)tLLILLLsML{KHCLsHHsMI)HSHH)I9s(L99HEHuH HuHH9ALEHCHELH=,N$LϩHEJ (HMHMJ IHMHM覩M9u+MHMlIIIHUL)IHUHMLmHULH)HHMI9ALELH=LHHEJ<%LMJJ LHL)}H4M)IIHJHEHuH LcL{I$Mt$LsL;{IH LH)HULH)HHMI9ALELH=LHHEJ<cLMJJ LHL)H4M)IIHXHEHII9AMEI9MJ<IJIO,tLeLmHEHELHKLsMHEIEIEHEI9t4L}ILLa L;suL}HKoEHUfInfHnflIHUH;L;H}LuCHMHCHSHEL9tI)IIILuH;JH@IMIuH!"L9rHL H t 1IH1AHHL0HCLH[A\A]A^A_]UHAVSHH(C(HC0C8HC@CHLsPCPHCXC`HChCpHCxǃH3FCC fCWCv#HpL#HB0J<B4~BD4ADH膠H{xHCxHt蔠DWƃ[A^]!fUHAWAVSPHHHt蒢LMH{xHCxHt,H{hHChHtH{XHCXHtH{@HC@HtH{0HC0HHt<[A^A_]ʟf.L踟MMjM>I~Htۡ[A^A_]fUHAWAVAUATSHLLPdH%(HEЅ=AIIH]HxHGEHAA HVHHXHHH`HHHhH@HpHVHALHDDH;LHDDffffff.LEtALLHHLLc fHI1Ht%HtPIHIHEtIL$IT$Ht^E9|$ vWJc4A$9sHD;<2gHcBID$HD<A$ID$IL$JcHHDIL$IT$IELpHCƃ΃tJc HHHLuIcLC|DAEAACCt9|Ct9 HCu'AGHHcHHHDCDHPHZЅ.CLHDDAGHHcHHHDXu'AGHHcHHHDOc\E;] HcHH\IHt%HIHBAM LHIcE HxH<HHHH*HxHLDDHLHB~;HHK1@H4H4HtHtHIcu H9|HHBHHHJ HC1ƅHDžhHRH@HH8H`HH@HhHHHHHHH<HūHHDŽPDŽXHHRHHHH8H苫HHWHGXGHDž0H5? .HH5ᴰ .HǾHH5>- HH5>q-C4HHH52E-u H;HXHH@HpHHHLt H pH脪H QHpH}H8聪LHDDEdH%(H;EuHĈ[A\A]A^A_]&fDUHAWAVAUATSHLhHdH%(HEHp>MIHCIIHH =PHHHQHHQHHIHH PHHIHxL(ffff.IHCHc HHI9PM|$MtANA@@@uIHIH;6Mc$ILJHC׃MA~tHI;F IIW@t HH9 A~ ~11fH4H4HtHtHIcv H9|IHHHAƆrL<$LHpLhM\GI9>Aщփ ]1 ƅHDžhHHNLh@L8HHH@HHHHXHH<HLXL`DHDŽPDŽXHMHHL8HHHWHGXGHDž0IH5E;)HH5k)HǾ]詝HH5{:u) LH5a)HXH`4ʃHeHH5:1)u H輅HHH@HHHHxt H H HLHpHH8 L Aу„uIuEAHA^IIMHuHH;HA~ ~;IIW1ffffff.HDI9DuǀtH9uAFIEIEIF AUtLAF EFu AF ADHHfIAƆM~IN(MfPAF(AFPH1HH軒M}HLIǃ;IHLIEIEI9sALIHHLMA$IUIMHхI9LAutI9wHI>GGI>GHx4;u/I]I]I9s"A9tHL)L$IHLDIHt!HIHCAN M蓍HIcF HH<HHHHԏHHC~8I1fff.H4H4HtHtHIcv H9|HCL8AVIEIEI9sAH$LHLMsH IHIƅHDžhL%lDMl$@L8LL5ZDIFINHH@HHHXH<+HLqHDŽPDŽXHILL8LIL-CILW Dž0H5{1HH5HHǾדH50HH51HHNju褓vHu7IHcLzHA߉ƒuIcTA;V }IH AɃuIHHAƆƅHDžhL-BIE@HH8HH BHAHIHH@HHL`I<,HJDŽ%PBDŽ%XLILHH8H荛L-.BILW Dž0H5/LH5夰HHǾH5.HH5F$LA4߃Hu HOzH8L=qAIIOHH@HLt H 苉H蟚ILL蠚H訚H;tHHCfDHHc HHKH9t(HHHtr1IHIċuAEM} 1Lf%1Lf.ILD7H|7L)MD1I|1IL7H|7L)M1I|1HH H9uIHH4HLH)I4ILt@UHAWAVAUATSHhdH%(HEHcGHUIL<LH؇HL͇LAMAA9LHH o?HHHQHHQHHIHAH ??HHH=_IHpLxHoHoH>HpHhH8lLxLHDDH=_fIIcL M9Mc'MuHC ʃʃHcHu0AT$A9v&HcHcH9ȋuDž\I}uL(HPHHX@LpLHn4LDXMЃHH1HHHIH@DII9DAHPDxEdEA?tCH`L5H@uAt#A0E1Ƀ\uD%0fHHL9D tLHHD tL1HHIA0t"AA?tMH`L3uAt4H1dH %(H;MQH[A\A]A^A_]Ã|K%tBfDHHL9t*ډ tLHH tL˃|.HHXMЃH;vuLxGu 1L(k΁1L(ffffff.L=XL=`M)MD:ML:L=hL=pM)MD:M :HH H9uHHpHxH)I ItvUHAWAVAUATSHdH%(HEЀt HXAXGHc_HtHHcw,H;HGD HO`HAHHIHMcHL H t5HIH9}5E1dH%(H;En DH[A\A]A^A_]1AH9|HG4G0HPG<4 DL@HHsH8HPHc@H L,HLVsIHǾLCwW)HDžL&sILsILsHHLsHp9LhL0AA$AƅHH1H@HF Ho*HHHHHHHHH@HAH?*HHLHPMLdH Hc IcHHHHӃx~1fD0HHcpH9|0L8A ACDžE1LHHE$H(ALxE,DMcHpB AAMcLxHCуqL IIc4LulA@D9@@u2IcIcI~H9t1H[A\A]A^A_]҄ɹDʈK(EuHK)HK*LK0Dc8E~$1W1fHS0HHcS8HH9|HANHADi>DIYAEILWH{@HC@Ht WHC@DkHH1L[EAAEMN<LWH{PHCPHt WHCPDkXH1Ls[iWH{`HC`HtwWCh{(t&IHCPHHpHH[A\A]A^A_]I~M>fDH;x4M&MfM9s(A9t LL)L0]IHMDfDHCPL8HpHL(6IIINI9v"UHAWAVAUATSHMEljRfE[A^]fUHAWAVAUATSPG=IHBHdL% BID$HH]uMl$M-f.IMHtIL9p ws\IGHuMoL0QILp @(WHXIEI$HLHtI$IuI|$×ID$A_(H}c؅uH[A\A]A^A_]cDUHAWAVAUATSH(HdH%(HEGfHAHt'HEHEHEHEH=@H0Hu `L5@L>c@L%@Ml$f{ID$MufDIMHtIH9X wsdIGHuMoLuM0ID$HuMl$M+IMHtIL9p ws\IGHuMoL0pNILp @(WHXIEI$HLHtI$IuI|$賔ID$AG(H=|fANHH>HxHHHIHf1L9r @LCHHuI9M9w IWHfHHHuL-=IEHuI]I*ILHtIL9p ws^IGHuI_MM0MMILp @(WL`HIEHLHtIEH3I}蓓IEMAO(H}_uEH[A\A]A^A_]LfHJH9HuL98uHHHLtLL!_UHAWAVAUATSHHHdH%(HEfHCLH;HHHHHHHHH@HHHH5fff.fCLHKH#LLMnLcf{LƅHDžhHLp@L8HHH@HHHLxK|=MLL\JDŽ=PBDŽ=XH#HHL8LLMg\LWAD$XAD$HDž0 LH5HH5eHǾSHH5LH5sH]u H9;HHH@HHLt H |JH[HHpL[H8[LCf ftLkLkE1MgMICI9sTO|MtAGftfAGfuLAGfufAuLLI@fvLKidH%(H;EuBHH[A\A]A^A_]HdH%(H;EuHHH[A\A]A^A_]XIKUHAWAVSPIHt~%UUUU)%33333333%iwLM~ H:KIF 1@A IV HIcNH9|LKA~ KIF 1IF OANHcωH[A^A_]ffffff.UHAWAVSPA(GHIHǾEEwLH[A^A_]UHAWAVATSHA9u K9u C9uFru<C9u4(AGIHǾDfAFf{r@HC>(AGIHDDfAD$I\$LH[A\A^A_]HCL8LPfA~rIFIFL8H#Lfffff.UHH]fff.UHH]fff.UHH ]fff.UHAWAVAUATSH(˅tAIu)MuA(FIHAub`}ԃAAuZMcIEHIIHIHLLILDQ(M7O.FE1AL}LeA$A޻HH/(EIHNjuDuD<.HIIGL}fA_E1LmMf(EIHNjuԋUGIEfAEHLPFO,IIL9u)HHMH4HU)‹}ԋME1XHcIH}HtnsGLu7AMA(DIHNjuԉDA|H<7GIFfEf,fEffArIFLuLHA1Ar;HL)H r/ف1fADALLHH9uH9tmHHHt"Hfffff.I|HDs(BHHǾDHH[A\A]A^A_]ÿ(AAHHǾDEE1) 6DIHC 1GCHcE4IM9tHEF4{HtL{ |҉%UUUU)%33333333%iwHCHC 1A HS HHcKH9|LC{L{ XDUHAWAVSPI(@IHǾMwLH[A^A_]UHH0dH%(HEHG HEO)M)EHF NOHG (E(MHEHF NdH%(H;EuH0]Bffff.UHAWAVAUATSHhIHdH%(HEHHu L9A]HL0HA8W)EHE,f.Md$HM$$HLIHs#f{hH[fA|$sgfDdCfWHKHMIL$Hx1H]Le*MuIMHULuHCH9 HMfrHEHfA|$HxrID$L,HUL4LLpL}HEI9LusM/IMHuI)MIIL$HH9H)III9LFHH9LCMHutI9J<I>H1N4N,N,IHLeLL?H]LmLuMtL'>H]LeLuLmHEI9L}M)MIINHH9=L)III9wIHH9LCMtI9J<=H1N$HEJN4IHLLN>H]LuLeMtLe=H]LeHfA|$OIKfDH}HEHH)tnHXL`HwH}H,HHEE1H}E1Ht H}<dH%(H;Eu*DHh[A\A]A^A_]AHuH}_ʽ>DUHAWAVAUATSH(dH%(HE:ɃHHc HFf3G@Gf;FG;FuFf3GeHcW;VVFf3GFH Hv HWFf3G@(G;FG;FHGHNP;QHcP;QHHqHx@?ƅHDžhL-I]@H8LL5IFINHH@HHLxJ=HHLLJDŽ=PBDŽ=XMILH8HLKL-yILW Dž0H5 HgH50UHSHǾfBH5@H2H5} HH1H,Bu H*L8IINHH@HLt H 9LJIHLJLJG;Fu HG H;F 1dH %(H;Mu$H([A\A]A^A_]Ff3G ;f.UHAWAVAUATSPHBH ݰLHsxIHsC6IIMu9ELHHMIILDIL8IHCIL+LsLLLZ<C4HH[A\A]A^A_]Hjff.UHAWAVAUATSH(IHdH%(HEH~t[WHCABHLLHHL}MIILDIL7IHEIL}L}LeLLL ;CD%HuH蚧Et H}[7H55H;M~H HHHHEtLc AAAIFHL)L9s&LH)LHLME1PAW.=HMtRLsPLHHMIILDIL6IHCIL+LsLLL:C4,LsK<&HL:MuC$LcC&dH%(H;EuHH([A\A]A^A_]c8H}ZhHRhfUHFG]fffff.UHAVSH0HdH%(HEWEؿ05W@@ HEEHǭHHEHE@BLuLH1ҹ葋]HHHELLuMtL覈L^5dH%(H;Eu H0[A^]o7ffffff.UHAVSH0HdH%(HEHE04W@@ HEEH6HHEHEE@BLuLH1ҹ蹊H]HEHHHELLuMtLŇL}4dH%(H;Eu HH0[A^]6ffff.UHAVSH0HdH%(HEHE04W@@ HEEHHHEHEE@BLuLH1ҹىH]HEHƣHHEL6LuMtLL3dH%(H;Eu HH0[A^]5ffff.UHAWAVAUATSHHHdH%(HEufHFHFH;CHrLsHuLsHt{HAIK|?uIJ< HIHuKAtEOTAHutWu.AB IczLUHMuwHDA.LN+1dH %(H;MHH[A\A]A^A_]IHMAJ AB=ʀu*оH}+A>DALNIEH}L+HcH}HuDLKD@$CA9sFMffffff.K uG]UHAWAVAUATSH(dH%(HEƅHDžhHLk@L8LL=wIGIOHH@HHLpI<.HL.JDŽ5PBDŽ5XLHHL8Le.HHHW Dž0H5 LH57HHǾ%$H5H述H5.$L諰u H6 L8IIOHH@HHt H yL-ILL-L-dH%(H;EuH([A\A]A^A_]^UHAVSHH_HHLwMtIvLLHHHHUlLsMtLoLH[A^]UHAWAVSHdH%(HE> uqIHv HteIHHu#MHHWGHIIv IHOH9u$E~Iv HuHHMLE2Dx8dH%(H;Eu H[A^A_]-UHAWAVAUATSH(dH%(HEƅHDžhHXLk@L8LL=GIGIOHH@HHLpI<.HL^+JDŽ5PBDŽ5XLHHL8L5+HHHW Dž0H5q LĭH54H谭HǾS!H5H菭H5Bz&L{u H L8IIOHH@HHt H ILa*ILLb*Lj*dH%(H;EuH([A\A]A^A_].UHAWAVATSHIHHdH%(HEHEHu L0Mt1zIǿ@IHH I$uHHHO HPHpZAF8HEWAIFM7HHLHtHI7H{^HCdH%(H;EuLH[A\A^A_]@UHAVSHHHHLwMtIvLLHԇHHHEhLsMtLjLH[A^]UHAWAVAUATSHӀ> wLf MjLMtaIOIGDnHu%]HMHMIf.IHtiIƋ@ A9|IFHuHM]IFHEI]HIIHWAGIL{Mf EnHEHEI HM]LuL@IDh W@(H@8HXHMHIHLHtIH1HEH8\IG]I(M9tVA$Auu5ID$IFA$A3It$IEt$IET$L1GIT$It$L}FH[A\A]A^A_]UHAWAVAUATSH(dH%(HEƅHDžhHXLk@L8LL=GIGIOHH@HHLpI<.HL^&JDŽ5PBDŽ5XLHHL8L5&HHHW Dž0H5q LĨH5/H谨HǾ}H5H菨H53;q%L{u HL8IIOHH@HHt H ILa%ILLb%Lj%dH%(H;EuH([A\A]A^A_].UHAWAVAUATSHHIIdH%(HEHSH4IHZIL_MtCHL+*At I|HIGALƅHDžhL=Mo@L8HH HAHIHH@HHMLpI<.H#JDŽ5PBDŽ5XMLILL8H#L-]ILW Dž0H5xc"LKH5-H7HǾ!JH5$HH58ELHL(u HL8H HIHIHH@HLt H H"IHL"L"HtAC1dH %(H;MuH[A\A]A^A_]fUHAWAVSH8IdH%(HEHE0W@@ HEEHԉHHEE@B1H}L1ҹHIHE0W@@ HEEHHHEE@BL}LL1ҹHL5HNHHELLuMtLͰLEHHHEH}轭LuMtL蜰LdH%(H;EuHH8[A^A_]"fUHAWAVAUATSH(dH%(HEɃHHc HdH %(H;MH([A\A]A^A_]frHOHr/H-HHyt sy1HHtH1fDH׀xpHrH9Hu_HG H@ƅHDžhL-I]@H8LL5wIFINHH@HHLxJ=HHLJDŽ=PBDŽ=XMILH8HL[L-ILW Dž0H5_"HH5(H֡HǾdH5òH赡H5(#H衡H HH}u HL8IINHH@HLt H K LcIHLdLl1vfrHHHxVHG@= ;DUHH]飻UHAWAVAUATSH(HdH%(HEƅHDžhH7Lk@L8LL=&IGIOHH@HHLpI<.HL=JDŽ5PBDŽ5XLHHL8LHHHW Dž0H5\"L裟H5l&H菟HǾH5|HnH5@;!LZu HL8IIOHH@HHt H ( L@ILLALIH͹dH %(H;MuH([A\A]A^A_] UHAWAVAUATSPINHLE&AAfIWI9Is{I9sJ|J4ANtfL{E1ANII9'II9sII7ttILTft"IF ffINIFHH;t>( IAVHD豴E~IrLD IEfE}HHQ1f.H9LDH~L;DHtH9r`fE}fEtLHAN1ҐH4H4HH9rA t iAFAE\IFIEOftdЃf 1E1eINHH;tb(IA6AVH輳E~IrdD IEfE}dE1;E1JAE1?@H|E1ɀ?AAEHtE1>AEHH9uHt$H 1fDH41>@AHH9u(>IA6AVH:ANAE)A~DHAz DIEIEfE}LmMIfALCft6E1E1McKAGMH HcHCdH%(H;EHH[A\A]A^A_](If{HSH;A8uKf;H(I ISHDfAFMf?CHZ[HKVHC@=]}( ISHǾPƅHDžhL5'Mn@L8HL=IGIOHH@HHL`I<,H0 JDŽ%PBDŽ%XLILL8H L5ILW Dž0H5L"L蒏H5[H~HǾ.H5kH]H5LI3H^u HL8IIOHH@HLt H H ILL! L) H豩I8SsKHHIHpAFLvM6f{rwHCuHSH;UHHKIH;(D SIHDBD{ID HLIL$fE|$HCL;0( ISHǾ LfAGMwKAOAGHP1fH9MDH~L;DHtH9#8LfE|$fEtfHHS1f.I( IHǾ譠 IFfAFHLhMLMt(dH%(H;E}LHX[A\A]A^A_]ƅHDžhHLs@L8LH HAHIHH@HHLL`I<,HLJDŽ%PBDŽ%XHHHL8LL%nILW Dž0H5E"H\H5%HHHǾ{[ H55H'H5WHIHH HHEHELчL5hKHL躇HD HL蟇HNj t H u HL8L=)IIOHH@HLt H C HWIHLXL`( IHǾ+ &tfDUHAVSHHG@=tuA( ISHǾ( ISHǾ 苝L[A^]H[A^]WUHAWAVATSII(C IHǾA ID$fAD$L8LpL[A\A^A_]UHAVSHHhHH蓏LsMtLrL H[A^] UHAWAVAUATSH(IIIdH%(HE+MMnIuIEHH)HHkUH1H9HEIM(IM H9uLלIuIM IM(HHHHHHkUH)H IHL<DLdHD(IE(4fDHH L!@IvHF(HN HHHHHFHHHkUH)H IHL<L,DLAt`AMtIEA9IWIOMG(ILLPIfA}I(Hr hAFHAN:ƅIWILLHPIG AGIG(EME@AIG IG(E1&fDIEA92E uAIHDH;fMfIt$IL$HH)HHkUHH9IcL,MHEIL$(IL$ H9uLÚIt$IL$ IL$(HHHHHkUH)H IHL,DL|HD(ID$( AFIWILLP(IM~IGIO(IWHH)HHkUHH9кHDHQIW(IO H)HHrHx IGIFHH(H-HpH@ HHHHHH4HkUH)H IHHHL(HGHcPH >IG(H4ILP IO(IcWHAGALH IG(EOKƅHDžhHzLk@L8HL5iIFINHH@HHLxI}tH5M}Hj HXHHEH}29LuMtL;L dH%(H;Eu HH0[A^] fUHAWAVAUATSH(dH%(HE1ۃwOt H DHGMI~H5_qPI\$J<+H5mL MA$uBmA$Ml$B+uI~H5O" dH%(H;Eu1H8[A\A]A^A_] _DUHAWAVSPAAHu9H=DJ Ht Hߺ\ A־HWAFAAuSHߺ[ AFоH A־H Hߺ]H[A^A_] HDH[A^A_] ffff.UHAVSAHAF^w=H=2/Dv HtH50FH A־H[A^] AFw>H aHcHH5,H5H5H50zH[A^]v A H5N}H5JHD1[A^]|UH1]HHtmHukUHAVSHIfffdH%HHHHH1H Hi-8ߝHHH1HH H1L!H[A^]ð1@UHAVSHIfffdH%HHHHH1H =Hi-8ߝHHH1HI I1I!I[A^]UHSPHHH H1fffdH%HH1HHHH1HNNNHHHHRHH)HH[]DH7H~9UHfofo CHoffdffHH9r]HHWHPHHff.HHOHHH H1H!o8fvfdft13UH1fHHHH!o8fvfdft]HH!ÐUHAWAVAUATSH8HUHH?HCHELsJ7M~Afofo PHffff.offdffHH9rHHWHPHHB7ML.HFHEHFHEE1L}H}SfEII!LH)MI)I1M!I$C"It$H!΃Lֈ1H}IMM9B<'uHLULHKHHLH H1H!Ao2fvfdfDHEeAHfff.LH!Ao:fvfdfDIEt)LIHU$IpH!΃LH}B<C1Hu,HLUHHKB IT$H!ʃH HuLmLmHUAHHuLAHLHUAILmM)HHKHHHsHHH)HHH8[A\A]A^A_]@UHHGHHGHs1HH!ƃH0HH@]LFI!o1fo ftfBoftfDEAAE AuEEAAIIv1HH!ƃH0]UHAWAVSPHHgt>L3L{IWL C>HHKHHHsHHH)HHHVH8HHWCH[A^A_]fDUHHHOHHDHHGHHLHNHLHHD]fff.UHAWAVSPHIHWHLzLLHH2 LHLLH[A^A_] UHAWAVSPHHHVLLOIODLBLGINLLHLLLLDLvHWHLzLJ4;HL MLHLH[A^A_] HHOHHH H1H!o8fvfdft13UH1fHHHH!o8fvfdft]HH!9=:u H1d9=Ѻu HȺS9=hu H_B9=u H19=u H 9=-u H$9=ĽuHUHH@]HlDfUHAWAVAUATSHdH%(HEHGH"H8E1L-9L=bL0ff.W)E)E)E)E)p)`)P)@)0HEH M@=tt9ML0C<1 HSHpLP`LI1= ;L%DA<$u1V HQ%FHpLP`I1 LpL LH5gHHL`L%aMlLHW)E)E)E)E)E)p)`)PAtHvHP D1j 9Tu HKd9u HS9u HyB9u H19u H 9Gu H>9޸uFHոLxMtCA?t=1 HORHpLP`LI1 :L=QDA?u1 H#FHpLP`I1| HpA5HHL1=ݸ91u H5(d9ȴu H5S9_u H5VB9u H519u H5 9$u H59uH5H1 1 dH%(H;EuHĠ[A\A^A_] ffffff.UHH dH%(HE HHH!HEHu1"AE1 HEHtFH}1 uVHUHML%AMVS11x dH%(H;EuH ] H5pH G(:1/ DH5pH D1UH1 ] DUHAWAVAUATSHH@AIdH%(HEW)E)E)E)E)E)p)`)P)@)0) )))))))))))p)`)PDžLLLPLLLL M L%DLLGHLLE@ARHdH%(H;EuHĘ[A\A]A^A_]" fUH]&fUHSPH HHH[]Ht>UHAWAVATSHI ID LH18 E'[A\A^A_]fff.UHAWAVAUATSH I͉IAL@LHt8)P)`)p)))))dH%(HEW)H HH 0HHEHL L1 LDž HhHLDLLHcHxxLLHMw 9@@ 1ɉ׃B@E)HcH@uHHH1#HsyH +HH1ZH HtNI LILAދHL10 A$LDLAt!dH%(H;Eu3Hĸ [A\A]A^A_]LEHHLLs N fffff.UH]UHIDAt EAMIAIH 61]`UHHH=]f.UHHH=|]f.UHH EHAH9]f.UHAVSHH8L@LHt))P)`)p)])e)m)u)}dH%(HE>xbIIHW)EH HEH0HEHEHEH?Hc6LMкHM xA)|AHdH%(H;Eu H[A^] fff.UHIHHLLA1]UH]̋z=uiUHH=a]Q@N=u UHH=3]ѰUH忺1] ffffff.UHSPfffdH%HÀuHHH[]ÿ13 HHHƃUHSPH{e1tH(H߾1u, a=uH߾H[]DH[]ffff.UHAWAVAUATSH(HdH%(HEй{e1tH'H߾1HH=3E11T AHDžLL17 DL Hy 6 8tStQHH t0IHt uD I*YYD @BLfffff.AAIIHcHDžHfLL t \ 8tL)WH*L)WH*Y $^f(Y |f.f(Y 4Vf.LH=u dH%(H;EuH([A\A]A^A_] ffffff.UHAWAVAUATSP H1II I II)1HI9LDHDHE1II I IHEI)M9MMMMHLHE1II I IHEI)M9MMMMHLqHE1II I \IHEI)M9MMMMHL>HE1II I )IHEI)M9MMMMHL HE1II I IHEI)M9MMMMHLHE1II I IHEI)M9MMMMHLHE1II I IHEI)M9MMMMHLrHE1II I ]IHEI)M9MMMMHL?I1II I +L)L9MMILHLH[A\A]A^A_]UHAWAVSHhdH%(HEW)EHuпQ u+HiEʚ;HEdH %(H;Mu Hh[A^A_]R  0H]H H5}LuLHHxpL}LLCHHH5LL L H UHSPHHH, HHHKWH@HH[]UHAWAVSPHIIH H1LH HHIOAWH@LH[A^A_]UHAWAVATSI ID LE11| E'[A\A^A_]UHHEʿHƺE11]B fUHAWAVAUATSPMIAIHH@E1ALDMOA4$Et1@A;4tH H9uffffff.AT9t A$uA|uA4$빉H[A\A]A^A_]fffff.UHH HmHH H BJ! ]UH]UHHŮHLLEE1E1A]f.UHHHLLEA]UHH sHLLEэJ1E1E1A]UHMIȉH =HLLEэJ1A]UHH=]UHHMHNLLDHL%LDHMDEEAEMH]UHAWAVAUATSPLAAIIn HcHE@1@11"ffff.E1E~lAMtqD9}lIuHtcI}IM)AIAEuI9AIAEuH9AHAEu@HEIEHI4MuHtzMtq11Ґ1E1EAI}IM)vOIwFI9AIAEu Hw,H9v'@u!HtEE)DFIDr1҉H[A\A]A^A_]ÐUHAWAVAUATSPLMMAAIH HcHE@1@11)f1EAHD9HsHH;MtKMFxMMM)IMMLM9MLI9u HAH9AEuUff.II)AIAEbH9AHAEuH9AHAE6@HEHE-HItH'LmMH11fDA11EADLMtDMVxMMM)IMMLM9MLM9uMtmI9u?fffff.MI)vQIwHH9AHAEu Iw.I9v)Au#MtDA)~LÁK1AUH[A\A]A^A_]@UHAWAVAUATSHLMAAHuH}HHE1u~E1AMtqLE9}iH{t_L#MI9Au L%CuE1EHCMcHUJI9w1D)HUBAMuIHEHt}Mth1E1L}DE1EAAI$HtOL9tJ؃uCHUE1EAHBt%EE)HMȍAI܁L}r E1L}HED(DH[A\A]A^A_]ffffff.UHAWAVAUATSHLMAAHuH}HHE1u~E1AMtqLE9}iH{t_L#MI9Au LAuE1EHCMcHUJI9w1D)HUBAMuIHEHt}Mth1E1L}DE1EAAI$HtOL9tJ؃uCHUE1EAH5At%EE)HMȍAI܁L}r E1L}HED(DH[A\A]A^A_]UHSPH–H=[HtQ HFHt;tH H-H[]fDUHAWAVAUATSHHdH%(HEW)E)E)E)E)p)`EHDžH`HHDžd)P)@)0) E1HH @1H@zfZ@iW)))p)`^HHHH`@ZH@f\E1WLLfffff.)P)@)0) ZAHH@LH H@ HxW))))@LLHAAsXLH# LHH{ Ht_HHH P0tA\A9WAE1dH%(H;EuDHĸ[A\A]A^A_]荶 iUHAWAVAUATSHHUHIIHuLg1Lefff. DAIEM} I9M9„u4WA$A}IuIUL* HxMuLIE LH)IuHEIH)M)I9LCH]HLU LH]MMLL;eLei*1&H5ρH H1HHH[A\A]A^A_]fUHAWAVAUATSHxHIdH%(HEHAr-H5aH R:E1~MI1[IW)E)E)E)E)p)`)P)@)0) EHDžH HHDžd))))E1HH@1H@f@ W))))pHHHp@eH@E1fL`LhLAA9MAH@LL`LAH@uoAHMLHuHHxNH9uH}HhH迴 wA.E1dH%(H;Eu!DHx[A\A]A^A_]1E1A UHSPu? u.55rH=آ%1ۉH[]fDUHSPuS uBHcH~wH LI1H ~1fff.9| tHI9uA1S9~1HH4vfH| (H| D  HH9uƉ555 s H[]H=$ffffff.UHSPԡuf uUHcH /K H @HH$fffff.UHAWAVAUATSPH9IHHIIHHu%H1H=tI& ޠA}iH]L HHH5HL HtHLHH HcmH dHH eL<LtHUHTH\5X5OrH=C#E1DH[A\A]A^A_] H5|H vLL:L }Sn1DUHAWAVSPu}AAD̟uiHcH~aLIIڳLAfI9I O4IIIt-MpM9wMxL9>wL7L>I@HIH11ۋ5S5JsH[A^A_]H=."UHAWAVAUATSHdH%(HEЅAIIHߞHux#H1H=žt$1HžHt Muq葿 HH=Xw X11HHH5q HffH@f@H 0(1訯 MDD D1D11уH H H 0L9 0tL9htL9htL9huI1 o@@fvf@@D@Lt MtE - HCHXCHLHLPDDIƃx A~$uIFINH9)1H`En L ,ffffffEfEfEfEHDžhH DhHpHDžx AvdIVPHhLMtbHINPHhHu@H@u9L<$HhLELHPHXH`AvdIVPHhL M1HINPHhHu@*H@L<$HhLELHPHXH`@LXLL,H`fffffHDžHUHffEfEHHUL薊LuLb HH= ] HXLHA ULXH`QI>11 AF -I>H5:袼 IF8ELFH=:11ڻ M~H1Aǫ D`DMH詻 H=HLi LL1k `LX H=IH L1 `L H=uiIHʪ uML1ת `Lĺ H=u#I HL聪 Q`螺 HH)ANhINxHIL9LHhH=̖HtK11. AF yBM6N DH5tH EgM11A AF xfffffffDžHDžhhHHpHDžxdfEfEfEfEHhHu@1)H@}ELFEDAV$u-MH5sH \CqA1 Iv(Hh@1H@LHAF``MnHAF^H0IFhH8E1E1NIk8IHMH8HL0foEfoMfoUT LL0AD9`ffEfEfEHE8HhHuLH8}uEШuI\MH$H5rH #IpM1LPMH5TrH ۈw1WAIHIL9yLHMH5qH ɼE1EMLPtLHA~$bnMH5qH 1LHA1LXr ]=FLHHDžHPHHDLHDž H ,HHDžL=E1Lfffff.IHLAWIHcII9|LHLX55A>ƃ HPH 1ʉ11уHL,I 0H 0`0LHI}I}I}AMD19GI}HfHn9AGLBfpUf~9AMBfnfnfbflfvfAU@K| 胞 HPLXKDP1 A~EHfAnMDfnfbflfvfAU@HPHXIDL膥 MIIH5Lĝ HBIHLLT HXMt AD@LHDDEtaELLL˴ AC|<tHIGHHBK<E11HuAX0J|;6 J|;, J|;" J<; IPIXXuH;tCA E1ff.IIHL;#tLkK|=Ӝ C|=xU HCH{诜 H觜 dH%(H;EDHĸ[A\A]A^A_]Au!IHIL9vHLHvH`MH$H5JmH ^yM1JH`AuI HI8L9vLHH`H+PHPH``訰 胠 H5lH fL:L .DK1H5lH fL5xL D1H5glH `fLBpL >c1\fff.UHAWAVSP0u}AADuiHcH~aLIIڳALfI9O4III It-MpM9wMxL9>wL7L>I@HIH11ۋ55sH[A^A_]H=~WUHAWAVAUATSPIHL ,uCHL&-H Ht@HC1Hff.HHH9s!H<HLiHN9tvHH9r1L}H9t#HCHiHL9tMHL9p1Ht?A E1IIHL;+tL{K|'c C<'x HC{CHHu?tJH HtHHC1HHHH9sH<HLiHN9tvHH9rH9u(Cr1H9tHCHiHL9tvjHL9pvRHt@A E1fIIHL;;tLcK|4胘 C<4x HC1H[A\A]A^A_]C UHAVSHIi%t,H蝟 HPH wH=sLH[A^]9 [A^] ffffff.UHAWAVAUATSHIIdH%(HEW)E)E)E)E)Eݮ Hk1xH]PH߉1Q H11 y8 DAtH5hH hQC1LE01} L`MLXLh|LXLhM9t7IM9LL)L Hş Ht0M_ff.L!HrIM4LLH M4H)ڋ|LH9ILL)L I HII9seLE1I??)LHzЃ ryxHE1L9r4TfLHDFЃ AABLcI HL9t q@ rƿ%w IrLL9DLrM9s-HL1fIHt A> t~M @ʃ(ujHc Hɋ|IIAH5"H9AM9IHH It)LFM9wHVH9wL~L6HM苅|[LLLL`(|H=,LIDw LI؋|wtH= |vH5cH lxDžxKFH5cH kxDžxT"H5cH EDžx`M1x|Ѩ xdH%(H;EuHĈ[A\A]A^A_] ޖ fffff.UHAWAVAUATSHMHIM MIEIiHL<LI9v.H5bH |DIM1AARCI9u@II+HH)H9u'IHuLHMe HuHMȅZHuHMH]M;euNJe2HiHH55ސ IIuHtIiUHLl I} M}I]MeM}ID$IEIiHWADAID I<H( 1& L IIH5LO HIIHLLܓ M'HEIGHEIGHEIGIG KMI9uLL< t,H56aH 晳IM1ATS0HH[A\A]A^A_]IH5`H ZL,:L ]8K1DUHAWAVAUATSPxbHx]AE1HUIM)vFHN<6DLL Hy DAt&ffff.IHHHUuI9v+' H5E`H 1HILH[A\A]A^A_]ffffff.UHAWAVAUATSH8HUH}ALEDIAHEE1uLe#ff.I΋uA9LeM)IILCLHH]H}MLLHH~?H=?HHHUHHMtD9:tH@HumBJ RZ,HEX0P HA1D H8[A\A]A^A_]HH5^H 'xE1$MII1(H5^H DE1-MII1j@ARHW UHAWAVAUATSHHAMWIIIY Iq8HH H tH1HH9v+1H9LELUE11EE1HEHEHHHEH9@LmLeIIII8HMIIHHUH)H**HC؍H@MLHuHHHHHH H9tHH9gMHEHHMLLLeLms*8DDEAMff.HHLxMtfxtD@AAtLpIO7I9I9ADu I1I1M uAtMAMAEAsAA]HEHX LL}LHMIH~3I sqL1L AHuBHEBD0A0E$L4$H5\H "%xAFI1DHH[A\A]A^A_]Ÿ H5[H UL L DJ1UHH5wH=$w]f.UH1]fffff.=u$ ffff.t QUHSPHRHHgHH[]fff.UHH)HHHLt u]ø]ff.UHH]ÐUHSPH{e1tHcH߾1u<Mȉ ~=uH߾H[]$H[]UHAWAVAUATSPHAAu%DD AAuH[A\A]A^A_]EAH#~HHtI 1II I AIE11Aw@DD3t4AAuD уD A DAsAuwDAHDD HAH}HHtH 1H H HL)HH9HMt u AbzUHAVSAH1AAu[A^]DHHitH[A^]UH]UHH|H8]Ë"~=uH|UHH=~H1]H|fUHH=|+H=}1H=] ff.UHAVSAH1ǃ D$ H(Hǃ0 Hǃ8@ǃ@HCHCH5j|HCH[C(WC0C@CPC`Cp[A^]UHAVS|=u)u>u|H5|=t9VH5C{.H=|H1t‹i|H5}=uH菅 IHljL[A^]H=5|HFI1LUHAVSH dH%(HEHH{=HzH9{=H{H9W)@)0) )))))HDžPHX$HtV)E)E)E)E)E)E)p)`L`Lٛ H1L؛ HXD EthHX77H1j kffff. H1A Hs0HHHF(HC0HFH1H=j|H9^H(HH H t!H1HHt#ffffff.1HHH H tH1HHt1H$4HH艚 8輊 DH5yH +1*HX77stH1 ƅH贂 EdH %(H;MurH [A^]Eu=axH=xH1OH=xH15 H5yH "ML/ML 1H5zyH LLEH}L Ė1H5MyH LL6H}L Ky1ĸH5 yH LL=pL I1藸H5yH nLL*L E1jH5ƭyH Zx-A1HH5yH LLhL $;C&1ff.UHSHdH%(HEHGHOH1H>LH9w;LL8IH1M9v1fffff.IM9wHD@ff.EimNAA90A@tD@HcH9BхBʉW)E)E)E)E)E)E)E)p)`)P)@)0) ))HVLcF(M~BMIAt,MPIff.MO\ MtI9rNAA9}!JIDF(A9|t'1@HHt(HtHD(HH9uHHj|HOHIHdH%(H;EugH[]%DNIbMff.LOL MtI9rJMSfIJ\(HtH9rg H5#yH ILPmL 1蚵H5yH qIL̪L 8C1mH5ɪyH DILL D}1@UHAWAVAUATSHdH%(HEHHIW)E)E)E)E)p)`)P)@HEHu$Hte))))))p)`)PLPL蒔 HH1L葔 AH}ȋ{jIoH0M<II M9SHI!IGHH@LsIGHHH8I9v1LfHH9wHH@H9BC;C(~m33fH(HHHL$M9HI!$1L"AE1> fHLLM9<$9B Ld(M_It$L1Hj|I9\$I9tI9HL9rH$ A"1LI1ő IHAMVS1LLL:1荑 u6 u)MeLH5>LIEI]I LH6BHHW)0) )))))))))))p)`)PHPLLLH8LI$H9w0K LKL<K\<M<$K<LID$I9\$ H}ȋ77AtHH14 I E1dH%(H;EueLHĨ[A\A]A^A_]nIH5,yH DLEL :1裰lAj*~ H5yH aDLCL w1]H5yH 4DL?}L ͱ:10H5yH DLL 1H5_yH CL pL 1֯H52yH CLnyL y1詯H5yH CLL ?}1| DH5ФyH &91UH5yH ,CLAL kD_1(H5yH _Qx-A1fDUHAWAVATSHdH%(HEHw(HHH H9HLgHWF)E)E)E)E)E)p)`)P)@)0) ))))I\$LIHLHLLII$8HB1H9v1fHH9wHA$@f.imNA90@tA$@HcH9BЅ*BAN IcD$(H~;HHڨt(HpHHH| HtL9rH9}%fHHAD$(AN 9|t'1fHHT(IT(Lt(HAN H9udH%(H;EurH[A\A^A_]fffff.HHqHff.IHT HtL9rLHwfLMD(MtM9r7z H5yH n@LL ;}1jf.UHHcG H~BuIH*L@ILML MtI9rHLu~G H HI(H9~ t&1H H9t(uLD(LD(HN H9uG ~'H| uHHȉG HQHHw]J H~LIHL HtH9rLLL@DLOL(MtI9r1H9XH5yH (?LޠyL :1$UHAVSIH{e1tHH߾жu"Aָ=uH߾[A^]z[A^]UHAWAVSPIItD{ HD8L1A1覉 Hu$tuED;1H[A^A_]H5$H 3>LpL {>Y1/H5H >LFEmL RDZ1UHH@dH%(HEHEH}HuЉUHEEEHHEH}(HEHcMH4H<HE1fffff.EF A.FLA_tAAAvE1BfIffff.FTIA_tEAAArAA.uFTAA sIFLAIA rAUEt3@u@}y(HH1ffff.<HRuH},E;E 1dH %(H;MuH@]u ffffff.UHAVSHIHdH%(HEWHGEHMH11M }H H߀uHIw Lw Lw dH%(H;Eu HH[A^]Yu fOGPW=@@uZQWpw@=AAu6LHcwA<0_u(A|0Zu UHSPwWHO=~ 1)1OH5H߄t )H߰HߋOH[]Of.UH(x]À>t#1ffff.HP|Hu]*1]*@OGQWPW=t1OUHHHcGDD:uT:Vu G1]OUHSPHGOPWQW=t1H$H߄t KH[]DUHAWAVSHHdH%(HEwGHO=t 1N=@@t #LsS$UHSHUD{(VSPS@=@H3HcC<NCKD%C(HCKPSQS=uHHcK RtOuACHKStRD#s(% Ɖs(ACBC@Ɓ@uH3HcS<Eu A‹EAFHEID{(BCC)EЍACrsH;Hcs<7ZƉsCBCxHH߄t SJSJtMSCPS=u,HHcC<EuCKHX;Ktɰ^HC(EɉKC)EH1tH; u%HC(EH t H 1sΉsdH %(H;Mu H[A^A_]p UHAWAVSHHdH%(HEGOPWQW=tE1C$EHCHED{(D%C(HAƄtAHuA#C(D C(y+H5wxH9&HCM؉HHMHD{(CȉCdH%(H;EuDH[A^A_]o UHAWAVSHHdH%(HEwOAGt1VC$EHCHEDs(~{AC@ƁAAuhLHcsA<0TuZLNDK{AC~:C w-Xs#sSHJXSCL{MAOHMIDs(JKHK=H HcC<Tu|cuxCSHhAt`H\AStTBCCHK=u0HyHiSʉStSEAGHEIDs(JKsFC@ǁ@@u:H;Hcs<7Gu-|7Vu&sSH SCJuAwHuI7Ds(Kps=uQH HcC<TuDCSH@@Sst*BCNK@@ƉEAGHEIDs(BCNKuiH3HcC<TX|CMCSHSK.AC@Ɓ@@t;HHSʉSs5BrsDADC@Ɓ@@H;suLcB<nuA7CLcFAA w}M9IfDEAIA rsCAC9Hc΀<_,ƉsSc+Ht#C(AD C(DEAGHEIDs(rsACphLHc{A<8TuZLWDSsDIDK:CJtFu({SHaoSCrDȋMAOHMIDs(sHKA=:H;HcC<Gu-|Ru&CSHSKrEAGHEIDs(sAC>6LHc{A<8Gu(A|8Au {sACMAOHMIDs(sHKb=[H;HcK<TuNLADCsps=6Bvthu'KSH<tHtSEAGHEIDs(1sΉsdH %(H;MuRH[A^A_]HtHKHsVSuCSKh UHAWAVATSH dH%(HE؋ODGAGAPWAAAt1QWEHDOAAAEuILLcOC< Su;C| _u3ADOG(^H5C:ILBAG)EWEHDOAAELLc_C<SID_WEHDOAG<EtyMM1ff.EgA AAǿAAEuE|HEuHt4LD)Dۉ_WEHDOAtLcC<_u É_L(EAWAQWAHHcW<SLBDGGDt@L5b(yH5PwHHI^;(H5'9ILA~(;1HP|HuqG(EA1uwH HcHL5\L5GPL5DL58L5,L5 1LHELAOdH%(H;EuH [A\A^A_]e UHAVSH HdH%(HEOWBG@@tE1As$uHsHuDs(Ds(qsrsHHcK< IKCHA1fH(1uCKPSQS=uEHHcK< Eu8KC#C(AD C(Ay1H5iHCHCMHHMHDs(E1KɉKdH%(H;Eu DH [A^]5d DUHSHHdH%(HEGOPWqw=t1H2SC)EBCCHK=uIH HcC<Su<|tu5CS{(yH5FHHG2tSHC(E1SʉSdH %(H;MuH[]*c f.OGQWPW=t1OUHHHcG@84u G1]OfOWBG@ƁA1AUHqwDBDGAAEuLLcGC<ru ADGAwDBDGAE1EAu!LLcWC<VAu ADWAAAwBGDuHHcG<Ku GAE9]Off.UHAVSHGOPWQW=tE1C( uH59HH3u!H߾buHC( x%C(L59C( yҍ uHL0HuH߾uH{uCSHKJK={H3HcS<Mun‰SCH#[UCKPSQS=u)HHcK< MuKCH11ɉCC(…uC |C HSAtHt HAKD[A^]ff.OGQWPW=t1OUHHHcGt!DEtH@D8tDHEu1]OGffffff.UHAVSH dH%(HEDOOAAGQWA@@pAQWqwA@ƁAAuGLHcwA<0Tu9A|0_u1wG( H5:ILAAHwG)EЉWDQDWAELLc_C<TMSDWY_AY_Y_AADuC<nu ADEډWMcGAA wKMIfDEAAIA rDWWQWIcA<_u ADWG(E1OdH%(H;Eu H [A^]C] UHAWAVAUATSH8HdH%(HEЋODOAAGAtE1ALcS$UHSHUD{(QSAqs@AAAAH;LcSB<U/B|t#MBDCCAqsAAqsAqsAAAEuB<nuAE$AEE1ɉSMcFEjA wNM4;IE1fDGEGZAEEjAIA rE$EAEEDAUASAqsA?McB<_u5ADCCEyH5#CH{(IA}A|$H}I<$SVSHHcK< U| lKCD%C(Hf.HxuD%#K( K(CDCHKAPS=A@@,HLcKB< EIqsCAx{AAx{Ax{=AAAEu<2nuAE $ADE1KLcF EYA wIMIE1GEGQAE EqIA r݉sEAEEDCAWKOKCHc΀< _u:ƉsCH5^.xHAHDH5wHACEAD$HEI$D{(E1KɉKdH%(H;EDH8[A\A]A^A_]AW)EEHM1HUIcHigfffII?H"DD6GD)@0@9HH9vAHAArHt<H)HHcS r{9}|LCs A<HHHulj{ HcC ;C}HK{(AH5_HQX OGPW=AAt1OUHSDAQWPW=LGuHcA<nu G1DGLcGAZE1 wKMI@GEGBAEEZIA r݉GHtDلADȉO1[]OUHH dH%(HE(W)EEHM1HUfLcMigfffMI?I"EGGE)A0LDLHI9v DrHt>HH@HcW rDGD9}DDLOw EHHHuADG HcG ;G}HOdH%(H;EuH ]V ffffff.UHAWAVSH8HdH%(HEGOPWQW=tE1eL{C)EH@*(EACsHKVS=@@=@(EAKVS@@u9H;Hcs<7Du,|7pu%sCHACSH(EAKJK=HHcK< UKCHHAuKuH;HcS<ǽ#AA‰SCHfAu (EAE1CȉCdH%(H;E!DH8[A^A_]ÉC(EACSHKrs=@ǁ@?xL EH: KDLJHMAytRAyuDF{DC=ɁLHcsE2E:uET2E:Quffffff.DFE {DC=vjLHcsE8 2YƉsK{(H2>1HP|HuV{S=5-H3HcC<uu CKHK;SK(EA֍VS/#A)EQSFwDADCFC,$L Hc{A<9DuA|9ou{S5~A)EDC{nfH HcC<DuY|OuRCSH S{t7BCGC@@uH3HcK<E(EABCGCzH3HcK<Dum|wufKSHV+HFuSKBCAC@Ɓ@uH3HcK<Eu KS S(EAJpsd=]HHcC<FuPCKH߾Y HH߾OH߾EKt ɉKVK(EASBC(} H5KɉKACɉKQps=A)EЍqsx{=LLcSC<AMJE=y{x{=tC<nu AEEщsIcEAA wWLH9@AH@ rDKsx{=*IcA<_u ADKSH:S{(EABCwswoH HcC<AubCSHSCJKps=u+H HcC<_uCSHqSs(EAJKFC}uA)EЍACFCMEH HcC<Mu8CSH4t!H(StʉSKS(EAʉщKH5AH1pH@tHh(EAHPH5wCH.H1H߾_ sK{(y$H2>t1HP|Hu1HKASCM UHAWAVSHdH%(HEOWBG@@HGG)EЍrw@ƁAAqDADGDBDGAAELDOuMcC<nu ADAE1҉wIcEEsA wSN4I1fDEA[EE{AIA rDOAAEDDwrw~*(E1OdH %(H;MH[A^A_]IcMcK4EtkDEtMILRE1ffff.M9tI[C<IuH9rA r*1H xDE8uE HEuDEtHHDwOI(yH5 }K UHSPHOGQWPW=t1 HKɉKH[]DUHAVSH dH%(HEWGHO=@@t1JLwG)EЍrwD@DG=AALHcWA<DuzLJDOwpw=bC tuUWOHHߋKt:AGGPW=uHHcG<Eu A (EA1ɉʉWdH %(H;Mu H [A^]J UHSPHGOPWQW=t1Hu H1KH[]H(UH>nu AEDωKLcF AA wIIEAIA rA?KDBDCEHc׀<pu<lj{CHH1iH߾_ACDCH(EAKAHK=AHHcK< cur| vukKCHA)EH߾_tHcuH߾Euy(EAH>AtjCEHuHMЅ €u@rHt/MЃ|HtMЅ~ HtAu(EACsPSNK=@ǁ@@u9H;Hcs<7su,|7tu%sCHACKP(EASQS=@@uH3SHc<>du|>tt7=psqsu?H SHc<1pu0|1tu)SCHtH AtlC(EACKPSQS=u=HHcK< du0| su)KCHmtHaAtC(EACsHKVS=@ǁ@@u9H;Hcs<7su,|7pu%sCHACSH(EArs=@Ɓ@@tE1^(E)EЍpsrs=uHHcC<gu |suAKH A(EACKPSQS=u:HHcK< su-| ru&KCHH߄ C(EACKPSqs=@Ɓ@@uZH;Hcs<7suM|7ruFLFDCSQS-B<Nu&sCHH߄k}|C(EACKPSqs=@Ɓ@@uH;Hcs<7gu |7suA7=@ƉSQS@HHcK< suz| rusKCHtaHHuCKPSQS=u#HHcK< EuKCHtC(EAE1CȉCKdH%(H;EDH([A^A_]uH1%HquH1H$wHH uH߾E[NH >>Y> fUHAVSH dH%(HEWGHO=@@t1JLwG)EЍrwD@DGA=AEuXLLcGC<LuJC|ZuBADGwPW=4HH߄HߋSJpw=uAAKCHcs$HsLs(\C4H4HH SKHcs$HsC(y%Ls(HHIHLAHKvDBDCE<>I;KCH5&XCHH멋KHC(EKHK>ASrs@Ɓ@@tE1%qsrsAt.ȉɉKdH%(H;EDH[A\A]A^A_]LkL=YL%AAIA rDODWDvDwMcC<0_EqDwD~DAD~_DAtMcC<nu ADEDWMcGAA wCMIEAAIA rDwWVWIcA<_u AD0 (E1OdH %(H;Mu H [A\A^A_]) UHH=]UHH]UHHt H]1]f.UHAWAVAUATSH8DDHHL}LmdH%(HEHItfLL1n* LLyL=AMELL1;* L<$L}H;H UQW) )))))Dž0HH ULdLAM17 LLAՅHE1HMl$HF<HH1n) LH޺xL-@LEHH1;) ELl$D<$HHAH AM1C7 HL}LLmAHIL9HHN$0MHBW) )))))Dž0~/$dIHH}PC*L,$HH\.dIHHoH (AM1_6 LMLLmA~]W) )))))Dž0H=HdH15 HLAdH%(H;EuH8[A\A]A^A_]% ffff.UHAWAVAUATSHH HAdH%(HEЃAL}ML1HXEIE11L"AE16 HIDHLLE@DDIEHLH1& LLDdQDL1Ly@W) )))))Dž0dLH}9H AM1S4 LLHI9HL4؀sL1% I~LCuuLL/uL%nDfytsGSAL9P8'LXA[fD9s$fEsEtEC\3MfD9rfD9u$EKAfA4EKM9TIHH7LOQҁwHQHQH+P@HHWHO[A^]H5'H _LL =DT1[HH5&H 2Ln9L q=D_1.HH5&H LL wr1HH5{&H Lq?CL 1GH5N&H LL n9{1GH5!&H ~LL .t1zGH5%H QL*L w1MGUHAWAVATSIIHIi-8ߝHr;fLCHIHH1IIIwI rLL PIrACD7AADHH /Mt6LHACD7IADA HIHH1H[A\A^A_]ÐUHAWAVAUATSPHIIHrZL-tIi-8ߝffffff.LH L9 LIII1HIHwLLHH[A\A]A^A_]*f.UHH H t] UHSPHHr+HHm H sHHH mH r!HH Hi-8ߝHHH3\HSHr>DHH *Ht;HH<DH HHi-8ߝHHH1HH[]HH[]kHwrH HH HWiQ-1HWiQ-1HWiQ-1UHSiQ-̋L7iQ-i iɓ5T7DiQ-AiD iғ5DD7DL7EiQ-AEiE Eiɓ5DT7EiQ-AEiE Eiғ5EiQ-AEiE Ei51 dkTD1 dkT1 dkTD1 D AdkTAA GAdkTHHHHHfff.DWiQ-i i5wDiQ-AiD i5DDO EiQ-AEiE Eiɓ5D_1dkTEA AiQ-4dkTEA1A D1DEG AdkTAHHDAAiQ-Ai iQ-A D iQ-i  dkTiQ-i ȍdkTiQ-[]HHDGDL T7EiQ-AEiE Eiɓ5A1A G AdkTEiQ-AEiE Ei5E1A GAdkTDiQ-AiD Diʓ5E1A T7C4dkTiQ-i iɓ51 dkTiQ-i i51 dkTiQ-i iʓ51 dkT DD7A7 iQ-i iғ51 dkTAiQ-Ai1 iQ-i i֓51 dkTiQ-i i51 dkT1ik녉 1i5‰1UHAWAVAUATSIHH wkHO@/;jH%HH xIIT?HHHHH'HHH1HH1H/H1HHH/H1HH@3HO@/;jH xMLIT?ML?IGI_IwLMD?It?HMMILHLIHH"LHLI1O4MIILIHLN 6IMLIIMLILHHHHH/H1HHH xHsoI7IWMD?LI|?HL IHH.HLI"MII1LL1I/I1LLH/L1HPIsoHi-8ߝID?IL?Ml?ML?N)M9MT?M1LLL1I/I1LLH/L1HM\?IJHH+MMMIMIM\?MIT?MIMMIH ILMH+IIIIHHH}L}E1HUffff.LLN\:Nd:LHMJL:0HMIIHsoHLHHL1JL:(IMILHHHsoHLHJt:N :MIMMMIIMIMNt: Jt:8MIII+I+MHMLIIHIMI@L9}#M1Hi-8ߝLM1I/M1LLH/L1HLH/L1HsoHHHM1LM1I/M1LLH/L1HHH1HH1H/H1HHH/H1H[A\A]A^A_]Hr5H xAHAD?H1HH1H/H1HHH/H1HHtAHHAAt?H H HH'1˗\ȥHH1HH/H1Hlff.UHSPHBHoĕeHH1Hi-8ߝHH1H/H1HHH/H1HH[]ÐUHAVSHIL)H1Hi-8ߝHH1H/H1HHH/H1H[A^]UHAWAVAUATSIL3HAHALiLaHMHI MHuIHffffff.HO@HH1HWL1HHIHCL1L3KIIIM1I1HC HS(L1L[0L3C8H{@L1HHII1LIM1IM1I1I1II@wM1HuHMIs sIIrjLIMWAuHL1L3GIII1HIIr;fHL3GL1IH1HGL1H3WHII1H IIwI r HJT?8Ir BT?)Mt LH AD?H 11L1HyH1HH1H1HH1[A\A]A^A_]UHHHO`H5{gHcHf@OKHHHNTICATEDHQHUNAUTHENHHHHUNAVAILAHAABLEHH HOUT_OF_RHAANGEHH b"u+pHLEMENTEDHQHUNIMPLEMHHH)&ATIONHH ANOWNUNKNHDATA_LOSHASa XEHH" ADHHHCANCELLEHNOT_FOUNHADHH tHINTERNALHHHH ZHY_EXISTSHQHALREADY_HHH3ARTEDABORHH$wfAEDHH]fffff.UHSH(dH %(HMHuك|H eHcHEfEOKHEEHNTICATEDHFHUNAUTHENHHEEHUNAVAILAHFABLE HEEHOUT_OF_RHFANGE HEE"7)pEHLEMENTEDHFHUNIMPLEMH HEME&JFTIONHE*EFNOWNUNKNEHDATA_LOSHFSoE UEHEE"fFDHEEHCANCELLEE1HEHNOT_FOUNHFD HEEHINTERNALHEٺHEcEHY_EXISTSHFHALREADY_HHE9EFRTEDABORHEE$twfFEDHEEtH}H HdH %(H;MuH([] fDi tHF UHH=R = t!W' H, H=- ( ]H ffffff.) tH UHH=  tH= H5e H=  ]H ÐUHAWAVAUATSPIHcH HMHAI( IDhIIsC?AD$MI CLHHH]LHHHDHHc IID$HI\$H]M|$LHL+ CD=ID$ M&H[A\A]A^A_]IL^0fffff.@6%UHSPH( HXf@H@ H[]ÐUHSHHdH%(HE@u dH%(H;EH3HHyH!aHc HƅxfDžyOKH{9ƅxHNTICATEDHHHUNAUTHENHHU ƅxHUNAVAILAH@ABLE HUƅxHOUT_OF_RH@ANGE HUƅx"q$pƅxHLEMENTEDHHHUNIMPLEMH HUtƅx&~@TIONHUNƅx@NOWNUNKN ƅxHDATA_LOSH@S~ƅx PEHUƅx"@DHUƅxHCANCELLE"ƅx1HƅxHNOT_FOUNH@D HUƅxHINTERNALHyHUlƅxHY_EXISTSHHHALREADY_HHU?ƅx@RTEDABORHU ƅx$wf@EDHUHEHMHHEHEHuHUHFxt H} dH%(H;Eu HHĈ[] ffff.UHSH(HdH%(HEH6HuEfEOKEH}غUHMHE@HuHE@HEH"Et H}3 dH%(H;Eu HH([]E DUHSPHHH HH[]ffff.UHSPHHHHH[]ffff.UHSPHHHwHH[]ffff.UHSPHHHGHH[]ffff.UHSPHHHHH[]ffff.UHSPHHH HH[]ffff.UHSPHHH HH[]ffff.UHSPHHHHH[]ffff.UHSPHHHWHH[]ffff.UHSPHHH 'HH[]ffff.UHSPHHHHH[]ffff.UHSPHHHHH[]ffff.UHSPHHHHH[]ffff.UHSPHHHgHH[]ffff.UHSPHHH 7HH[]ffff.UHSPHHHHH[]ffff.H?@uHUH ]ffff.H?@uHUHe]ffff.H?@uHUH5]ffff.H?@uHUH]ffff.H?@uHUH]ffff.H?@uHUH ]ffff.H?@uHUHu ]ffff.H?@uHUHE]ffff.H?@uHUH]ffff.H?@uHUH ]ffff.H?@uHUH]ffff.H?@uHUH]ffff.H?@uHUHU]ffff.H?@uHUH%]ffff.H?@uHUH ]ffff.H?@uHUH]ffff.UH~w H W]@UHAWAVAUATSHIIAHdH%(HEЉALeL}HHpHDžxH(D(H)HE8HE0H@HHHHuHpH@?(t H8$ HHE HEHDt H dH%(H;EuHH[A\A]A^A_] UHHuPHH HEHHEpH H ]XHH5DHE]UHAWAVATSIHH HslIHsC6HMu9ELHHMIILDIL HCIL#LsHHLLs B3[A\A^A_]H#UHAWAVATSAH ID m t]Aw(DH N H@HuHHHKHDHPHpH3E&H[A\A^A_]H= t'H H= vffffff.UHAWAVATSH dH%(HEؿ HE1 H1 L}I.fff.HEID$EA$IIItLDCA$tI|$r dH%(H;EuHH [A\A^A_]| fff.UHAWAVAUATSHxAHdH%(HEW)E)E)E)E)p)`EL`dL Iƀ8uHdLD1 ML HIHsC?LcMu9ELHHMIILDILI IHCIL+L{LLL C<dH%(H;EuHHx[A\A]A^A_]HK!F UHH HHHHGH7GWGHG(]UHHM HHHFHGuGWGHG(]UHAVSHHdH%(HEHuF=H{HFH9t uHFHC@u ILH9tQHFH{uu/HHHO,HFHEF(HEV H HV Hv(PdH%(H;Eu'HH[A^]H~HEIHLG DUHHdH%(HEH}G=udH%(H;EuH]HHuk @UHAWAVSHIHdH%(HEHu؋F=ulH{IFH9tHCIF7@uL{Ct H{( IF(IGAFAfAFdH%(H;EuHH[A^A_]I~Hu fUHH HHHFHGHF7GWGHG(]fDUHSHdH%(HEH}G=u Gt"H( HGHuHH-HGuHdH%(H;Eu HH[]" fUHHG]fDUHAVSH HdH%(HEHZHEHHEEEMH=Z HuHUHMLELuLI H}LHzH;HEH9tHHE7@uH}@udH%(H;Eu H [A^]L fff.UHAWAVSHHHHEHE4H]HߺiLXLHL}HuLL6Hm HH5YLL H @UHAWAVSHHHiHEHE4H]HߺLXLHL}HuLL6H HH5cLL H @UHAWAVSHIHdH%(HE{e1t!HOH߾*M6H:HEHEIvHuƅPfDžQOKƅS&HPsPH`HX@HQHE@HEHUHMHhHuHU4M~AFt I~( HxIGhAfDžhPt H` =u H߾(dH%(H;EuHĘ[A^A_] UHAWAVAUATSH8dH%(HEL'D>HEM(L HIH]HsC6EH]MuHTLHHD}MMIILDIL HHEILeMD}LuHLL\ B3HMDHuUAEt H} dH%(H;EuH8[A\A]A^A_]H} H6HufGOKGHUHSPHhHH[]UHHNHENHEVHHW]UHSPHHx HHGt H{( H{@ukHH[] UHSPHH( HHGt H{( H{@uHs HH[]e UHAVSHt uBLs HC MtI>tL{L# Ct H{ H[A^] [A^]UHAWAVAUATSPHLv Mt M.IsE1D{HH[A\A]A^A_]IIAtMvIIIfIvLLQ t0I(ItAFt INL9u L9uڨuIvAIFHtAAb@HCHAI>EHL0UHAWAVAUATSHHIIdH%(HEH_ HHMtL;Ir.A:Lk5I0 HHIE IIsC6ELmMu:FLHHMIILDIL1 IHEIL}LuLLL CD5HEEWHtHSLuHHH9tHCHCH HUHT(EW)EHEALAHEt H} dH%(H;EHH[A\A]A^A_]LkII%DIuLL! t@I(IAEtIML9ufff.L9uʨuIuAEtLHMAEdH%(H;Eu.WWHuH EL H}UHAWAVAUATSPH}Lw Mt IHs1HUH[A\A]A^A_]IItMnMnH)ffff.IuLL t0I(HtAEt IML9u L9uڨuIuIU(LL L}IG H8wAGtIGHt LJIcGHAt A*Mw IG MHtI>tLL AGt Iv Ln H@UHAWAVAUATSHHUHuL Mt;IHr,HNNNLHHHRHLH)HAHsH[A\A]A^A_]E1HHMoHE1Nff.H ljHtHEtHETH HH}UIIHHHI9sHEIDLtIDUHAWAVAUATSHAIHdH%(HEWHGvHH HEHEHEHMHׄHPHDžXAGIO IEOIEWH H(HuHPH HHLAt H AtUHM Mt@IHr1HNNNLHHHRHLH)HIIMMIM9HIBLBHHIIM)Iv1H}uH2HHDLAHHDHpM9HBLIMBtL`HsH}H} HIHEM9HBLhLpLxL]HMI(L;eLxL]HMeM9LIBHIͨtL)H1HHL4MA<$@M|$L`LD}&I|$H` }t{I(M9AEtIMI9uI9uϨtIu@uIu@u@r#1fffff.AL: uHH9uAoD$AoMftf=LhLpIuHuHIE HI|$HuHID$ HH98H}1 f.UHAWAVAUATSPHuI7Ls Mt'0 IHI>rLLE1(c KHs AAHEsILEkCAAGIIsBmAGMI MuHQLHHLeMIILDILI LIIGIMgLeMoLLl C.Mg t u>Ls HC MtI>tLL Ct H{ H LH[A\A]A^A_]IL ff.UHB]ffffff.UHAWAVSHIIdH%(HE HMt L 1L}HEHyHEHEI6HuƅfDž OKƅ &HHH@H HE@HEHPHXHoH HDž(HuHUHPL H#t H% dH%(H;EuHH[A^A_]0 UHAWAVAUATSHHHGHOHHEOHMйHEHgffffffH9IHIIHMHHs:H[A^A_]H_HFHt+HHuQku0H[A^A_]Y@HGHH Hu/kuHH[A^A_][mIHIMLL뙺 kYfffff.tFUHSPHHu 1uHHHHt1oHd$[]l@UHAVSHHwHH[A^]gAHHNHCHdH%HHH| HH[A^]tHD[A^]wUHAWAVATSHHIHHC H HCHy@HAHHHHBHBHH!1H @IIAD$A:HDB IH@D` HHx LH[ L1[A\A^A_]ÿQ IHzIGII L IH @ HHHHLxHLH1[A\A^A_]_ffffff.UHSPHHuHotH[]HHHH[]jDUHSPt HHu(HGWHuhtH[]HHuH HH[]_jffffff.UHAWAVAUATSPHHLsMt,HCHdH%HHH|7HHLcL;Iu;I$AntLMuTHH[A\A]A^A_]StH߾LILM,$Mg@LiMtLfff.UHAWAVAUATSPHLsHML;I=AF < BsADBIG <u.LII菟LHLHtyH"IF"<rcGu[DO A BsADBACCAECHΉAHEIcH+L9HHHBHHBHP H9_HHOHPHH?HHHH?HHHBH H HCHy@HAHHHHBHBHH!1H @IIAGA:HDBS H@HHHBH H HCHy@HAHHHHBHBHH!1H @IIAFA:HDB H@HCHCH+WA1MH;M~HCHCM]@ٺHHH)HHr HHHHHHBH H HCH @HBHALCJ9HII!L H@I  Bѹ:BICʺCIADx HHHSH HEArINKT.HJT(?ArANCT.BT('Ar!ANLHATC .BL(uHL(AD#WA@ٺHHH)HHr HHHHHHBH H HCH @HBHALCJ1HII!LS I  Bѹ:BICH@CIADp HHMu@HH[A\A]A^A_]H}L IILELEH}LLLIG@#UH]6BfDUH]HfDUHAWAVSPH_Hu H;Hu2HHHt3H^Ht*H1HDuGHEHFHHuH[A^A_]C{ tTHH[A^A_]HHƹH[A^A_]HHuikt}WOIHsCuH{ILH褷 LFHC(tTLuH[A^A_]IHIrLLkuIHHW[HLjHHA[Hfff.UHAWAVAUATSHAHdH%(HELM@@u I?HtMukLIIHwZW)EEH}I I<.HHsL_ (EdH%(H;EH[A\A]A^A_]HHu+dH %(H;MumHHDH[A\A]A^A_]dH %(H;MuBHHDH[A\A]A^A_]H;Hu#Aot4W1H R IILLAouLIIYLLfDUHAWAVAUATSPLLHAI)E1AMDI9MHHBH H HCHA@LAHHILBIBHL!1H= IHHK:H=BIHIIŴ IH@X Hx LL蘵 LM>AEu9LUH[A\A]A^A_] ɈHHH[A\A]A^A_]R LUH[A\A]A^A_]fUHAWAVAUATSH(IHdH%(HEL/MIL)1AHDL9MIIBH I HCHy@HAHHHHBHBHH!1H @IIAGA:HDBIu IH@Dx Hx LLG M4$dH%(H;EЋUHLH([A\A]A^A_]+C4W)EEH}L* HC1AJ|5HDLϳ (EdH%(H;Eu:H([A\A]A^A_]dH%(H;EЋUuHLH([A\A]A^A_]/ڴ f.UHAWAVAUATSHdH%(HEHuHHHOH H9LMu I?Mu)dH%(H;EaH[A\A]A^A_]gIHHA MM>LH)v;AF ttIL4AnIHL An|WE1HdH%(H;EH[A\A]A^A_]AFM>u IvI)M>wHtI)A~ HuIvIMfAFL:IM W)@L8@ LhAD$L`HAnILwH?Ls@dH%(H;EHH[A\A]A^A_]HHAoWEIHIbLLA aMwAGu8IGIHIҁL* LL2IL$TLqAFIG( ILISLILISLLISL HHILHAoHLISLHW)HDžHӆwHIHIH*H{[HEHELpLLEFHVH@HDžHLFLLHFLHuH@LLM"HLFL Hx HH5ʮHffffff.UHAWAVAUATSHHdH%(HEu HHHKH H9LcMu I<$Mʀu#LII)J<3HH1 ED3gL3InA|$ MM'I9v&AG ttZAGM'uNI)M'AopWE1MdH%(H;EH[A\A]A^A_]Ht-I)A u.IGHMo)LyIHu[AGL?1HM W)@L @ HHHAELhAoIL{MCM~@dH%(H;ELH[A\A]A^A_]H;HAl$W1cLIoLA|$ M|$AD$u*I|$I}L< LVLdH%(H;EhMAO L@uMGIGH E1LI7r H LL@LH0H8HL)LH0H8L)HHH9W#fD)011H)HHH9HrH0LDH7LD[ff.Hr0DD7DD:f.Ht+0@7HHD0D7t@tfff.H)HHkLcXMPJhAHPHHhD\DPIM9u{E1fM9MSJpF]IDpMM9tF\IcJDH`D@D5[LNHLHP11XAD\EJD1H+0HPH0DH AuL@H@DH ArH E1AsH@LH0H8:HrHHDHHD5HrDD!HtHH4@4DDdH%(H;Eu+HHĸ[A\A]A^A_]HDžHLLæ IIHMffffff.UHAWAVAUATSH(HdH%(HEH>IIIIBH <( HCHx@HHH=HHHBHBHH!1H @IIAEA:HDBI虣 I@Dh L HCHKIG IOLeLLysLL.ILuLsL3L{II?tBMLeL5sLLILtL{MttM~@LJfMAoWLuLr1L藰IL|tL{HdH%HHH|$HHdH%(H;EupH([A\A]A^A_]/tH߾讴LIlLL{LIeLAoCLIKFL0^ fffff.UHAWAVATSH AHdH%(HEHEL}LzqL":IL#AuIHCL IGIO HH)HH1H9HEIO8IO0H9uLHbXIGIO0IO8HHHL$DtIG8LuL}LLEqHLLrLrdH%(H;Eu H [A\A^A_]I fu1UHHOHQ1y HD]t1H u'UHHrʁH]111 H fffff.UHH dH%(HEHuHUHuH9dH %(H;MuH ]荢 ffff.UH]V;fDUHH dH%(HEHuHUHuHHGHHuH9ui"sHi1 idH %(H;MuH ] UHHuHHGHHuHH9ts"`k1Ƀ ]HFHH9u]CkH;k]UHAWAVATSH0HHdH%(HEHu,III)r-HFH@I@t)E'HNL1I)s1Y@HEHEH>s`L}LL4H]LLeHuL7EtH}Hu9H}ot5dH%(H;Eu-H0[A\A^A_]H}U~wB萠 UHAWAVATSH dH%(HEHuHHHOHL6Au'IL)r+HGHIt)E'HNL1L)s1P@HEHEH?sWLeLH&LLL8EtH}Hu?H}ot;dH%(H;Eu3H [A\A^A_]HEHHǺJslA腟 DUHAVSIHWHGu LH;-IFH0HtH3tHsHHLH[A^]fffff.w;HGHtLFAAHL)v/UHAVSHHIH3HL[A^]u)H5u HF HFHNtHv ff.UHAWAVAUATSHHIdH%(HEW) tI1H t;H H(H輜 dH%(H;EHĸ[A\A]A^A_]W)P)E)E)E)E)p)`)@)0HEDžXIIGH@@t"IIIE1LHMEMeL(LHMDH x XHHh=\Hty@uHHHWHtH`N=[tJfHLH`q@[HzHtHXNZHHHwHhL|L M7M)LPAO uIGMAO s M 1rI ILHLH@H H@uHPLxAO sM1IǀrI IL0E1L0L8fffff.LLIMHLLp M)LHtHcXHHhMHh\pHH91fff.H9HwHp=]HD@HL9tӈ5\HcfHDH`H[HrHHLP+fW)01E1E1E11Y\L|E1II)LPAW uIOMAW s M 1ɀrI ILHL0H8q UHHH?tG <t<r&HH H9HAW]HG <s<t&tuTHHWHHAH]oHGP rHHwHH tuH@HGHHHQ1]HHwH](oUHAWAVAUATSH(IIIdH%(HEWHHuXLmA$C?A$IHLnM9#M*fff.WAE11MM)MM9LHL贗 LUM)jtIcr(HxI|8tIJ HtvIB8AR,xHH9uhE1L9thIxKD@CT-HDHIL9tAT:,Hc@HDID0PAT +HqHHIJ 1E1JAR,H\1H+ IJ L3K uHCH[K s H[ 1rH HIMrHcF(HH|8MLn H^L9IH6H}L A$tI<$HI|$oEA$L9HEL8L)xL)xMHFL981HF@ID$I$dH%HHHHHMLHL轕 LL)vLHMHHAL)yLm`MLmL]OM)sEIc{(HI|8IK HIS8AC,rHH91E1L9MAKT@Ct -HDRML9tCt,IcHTIT0rAt+HHHHIK bHNHPx HDHC <L9;L6L+sLHEHMLHLHH辙HHSHEID$I$dH%HHHHHMHEL)x L9;CIMHL]/IS8AC,rHH9A{(1NE1L9MAKT@Ct -HDRML9tCt,IcHTIT0rAt+HHHHIMdH%(H;EWAQL6I)I<uLsH[ h W)@L8@ LpCHXHI\$I$dH%HHHHHHEL)xL8L)x11MKAC,HDH+IMp @uHPHHq 1HM@rH HIHHI ICM){U11HAC,HDH+IK p @uHPHHq 1H@rH HIHHI ICdH%(H;ELH([A\A]A^A_]tdH%(H;EuwL P蹟L 4蚟MoL ]A$D1oG4= UHHHHAAAuH1HD2]G <t7<w<tt!HwHG <vD7 ]HG0]]hHDUHAWAVAUATSHH HdH%(HELu(I9s@WC CpC`CPC@C0CHǃC(RLeH(H0ID$H0Mt$I9rM|$MtM,$H 0LL蛕 HL)Hʀu$IcD$(HxI|8tLLw0DI)IHML)HLyI)LqL0M9H8辏 L H@I9IMBMtH8LLx uzMM)&MI9sL8L)H@L)PMMMtHc`Hx"HptH8L/MmHDžPMZLeID$HrcI$HID$IL$H(oHuHcN(HL0x"H|8tHIH$/LMvHFE1IcD$(HH(H0x I|8tLI.LID$H(HHu: dH%(H;EuHHĸ[A\A]A^A_]q UHAWAVATSHPHdH%(HEHtHu*HH9s+bdH%(H;EH BHFH9r9utH0H8H0IHHH%t HLAnWC CpC`CPC@C0CHǃC(1L@LIIsAHLHHLLdH%(H;EuHHP[A\A^A_]@ UHAWAVAUATSHIdH%(HEHuHHHJHHHuHHH9vyH~H?H9iH9uEoo ftfl,H0HNH H9,H8HpHHT@L8L@ffPf@f0f fffffHDž`LLH?HLHL8LLxLL_HLK HHH9.L8LLHLLL ff ffffffffHDž0HHp>H@L)uL8H@L)PHL9LL)HL)HL2 H8H HPHf.HLLL8H@L9MLBLLL赌 LH)v[ILHH)L9L8HPML8L)H@L)HPH_4Mt?HcHx#HtHL)H@HDžyHHuMHc`HHpH8L>)HHLHHHHHHLHHxLL8H9aHLHH8L@LH8, HHH9 MHc`HHptLLLH(eMHcHxjHt_HH(UHcHxgHt\L'HHDžPLHDžHHLHDž1HBHHH9HHHH)!HބdH%(H;EGL;%LpHL裆 LLHp:H8Ly HLe HPH/HLLL8H@L9MLBLLL  LH)v[ILHH)L9L8HPML8L)H@L)HPH_Mt?HcHx#HtHL%H@HDžyHHuMHc`HxHptH8L%HLL aLfAD$ A$AD$pAD$`AD$PAD$@AD$0AD$A$IDŽ$AD$(dH%(H;EuLH[A\A]A^A_]߅ ffffff.UHHHdH%(HEHtHHH`.HxdH %(H;Mu HĠ]r fUHHHHdH%(HEHu!HHt%H`HxHBHHu۰dH %(H;Mu HĠ] fffff.UHAWAVAUATSPIHuVHHvZHr ILH4 Z I@H ID$I$AD$ M|$IFHHwHC H HCHy@HAHHHHBHBHH!1H @IIAGA:HDB贁 IH@@Dx HII LLYM.Iu)I~ot-MfMu+LHH[A\A]A^A_]L kt%Me@L膙@UHAWAVAUATSHdH%(HEH?=HIG <<W)E)E)E)E)p)`)P)@)0HEHIHHW XGHh\Ht~u Hуu1oHJH|H`G[tNfffff.HDH`P [HqH|HXG ZHHHwHhL|M'M)LL(LPAO uIGMAO s M 1rI IL0L8LHfDW)0E1E1MLLLM)LHtHcXHxHhtH(Hh\pHH91ff.H9HwHp=]HD@HL9tӈ5\HcfHDH`H[HrHHHPH(1H(E1E1j\L|E1M'H(L)HH(HPAO uIGMAO s M 1rI ILHL0L8HG <tNW)0H0tH0H8LdH%(H;EuHĸ[A\A]A^A_]H fff.UHAWAVAUATSHIHdH%(HEW)P)E)E)E)E)p)`)@)0HEDžXHHFH@@tIIIE1MEMpL(LHM|H x XHHh=\H~@u HHƃu,kHWHtH`N=[tJfHLH`q@[HzHtHXNZHHHwHhLtL NH@H :H@uHPLpAN sMv1IƀrI IL8E1DM>M)LPAN uIFMvAN s Mv 1rI ILHL0L8fff.W)0E1E1MHLL9 M)LHtHcXHxHhtMu E1E1E1Hh\pHH9uz1f.H9HwHp=]HD@HL9tӈ5\HcfHDH`H[HrHHLP \LtdH%(H;EuHHĨ[A\A]A^A_]E1q|| fff.UH ]DUH]DUH@ BиsB@CCʸCHc]ÐUHAWAVSHdH%(HEHsKHG 1H IHH:HBdH %(H;MuHĨ[A^A_]{ W)`HDžpH5%H`IH!HHEHEL}LL?LHHuLLHLNL Hb HH5ДHfDUH ]DUH ]DHt-G x UHSPHG0HHx HH[]~x ffff.Ht#UHSPHGHHVx HH[]Hx UHSPHtCHVHvCH讧HCHdH%HHH|:HHH[]ÉHwtHvHƍHHH[] [tH߾ H[]ԉUHSPHt0HVHv0H u-HHƺH[]逨HwШtHvHHƺH[]HH߹H[]魳UHSPHt0HVHv0H{u-HHƺH[]HwШtHvHHƺH[]#HH߹H[]mUHAWAVAUATSHHdH%(HELwMu I>HuHHHu.HVHHuHHHtu6HH5IHudH%(H;ETHNHHHvHw=@HAAAH1HDdH%(H;EE1LEAFA~ bdH%(H;E\HߺHĨ[A\A]A^A_]@LvAFA~ 3ML{HdH%HHH/HHF <r5dH%(H;EH H߹HĨ[A\A]A^A_](H9W)P)E)E)E)E)p)`)@)0HEDžXL.LHM?<+<3~ XFHh=\H@u2HHFL0LH.HLL HOHTH`B=[t@HDH`P [HyHTHXB ZHHHwHhLtL&M>M)LPAN uIFMvAN 1H;HiAntWE11M~AFuxI~DLr yM~AFI~^DLr LdH%(H;EH߾HĨ[A\A]A^A_]飄AGIF(LdH%(H;E^HߺHĨ[A\A]A^A_]ҤAGIF(LHvF <H@<uHNLvAF 1IL><rI MvIL0L8E1II賅LAnLILyLHĨ[A\A]A^A_]HLL߭M)LHAHcXHHhMHh\pHH91H9HwHp=]HD@HL9tӈ5\HcfHDH`H[HrHHLP(W)0E1E1rE1E1E1Y\LtE1M>M)LPAN uIFMvAN s Mv 1rI ILHL0L8MdH%(H;En r UHAWAVAUATSHHdH%(HELwMu I>)HuHHHuNHVHH>uEHHHtAu_H1HDdH%(H;UHHhIHuuvdH%(H;EQIHvHHF <dH%(H;EH H߹HĨ[A\A]A^A_]'H>LvHuWA~ tML{HdH%HHHpHHI>HtI@LI1WA~ dH%(H;EHߺHĨ[A\A]A^A_]I9W)P)E)E)E)E)p)`)@)0HEDžXL.LHMR<D<L~ XFHh=\H@u2HHFL0LHq HLFLHOHTH`B=[t@HDH`P [HyHTHXB ZHHHwHhLtL&M>M)LPAN uIFMvAN 1H;HAnWE11L~FH~I=Ll IL{M~AFI~=Lk hvzdH%(H;EH߾HĨ[A\A]A^A_]}AGHF(LqdH%(H;EXHߺHĨ[A\A]A^A_]AGIF(L@HvF <H@<uHNLvAF 1IL><rI MvIL0L8E1II~LAnsLIL`HHĨ[A\A]A^A_]HLL M)LH;HcXHHhMHh\pHH91H9HwHp=]HD@HL9tӈ5\HcfHDH`H[HrHHLP(W)0E1E1rE1E1E1Y\LtE1M>M)LPAN uIFMvAN s Mv 1rI ILHL0L8MdH%(H;Et=k HuHUHHGH]UHAVSHLwHL~L)L3HC[A^]UH]UHAVSHHHHHH HHH)HrH6HLH7HL8Hr6L7L$H9tDDCIIB4Bt L3II)L1k ED3[A^]t$UHSPHHuHoHd$[] HH{HUHAWAVSPHHuL3Hwg IW@@(@8H HAIFAAFMI9tkLHHA LI8LHH4H9A(t?EDHLI@MIAAAN I9tJH)HL)H11} L;AuI;Lf L3ILH[A^A_]UHAWAVAUATSPHIL?AuEt-I~I)IFH8tH t`rhL8H H1E1L&LnM9MBLLj I9tuLLLLIu"H[A\A]A^A_]H@H stуu=L8HxP уtN1fD@JD9u)/L8uHxH@x rHH =1Hx2рrV@PHDPHDPHDPHDPHDPHDPHDPHDrHHLQ uHAHyW 1HL9r H HHGHUHAWAVATSHIIL'AuEt5II1IGH8t"H L H H1E1Iu t5IvH0IFH8t"P {HH H11L9LBLg I9tuLLLH4%[A\A^A_]H@H atуu=L HxYP уtK1xHD9u)/L uHxH@x rHH 1HxрrWDPHDPHDPHDPHDPHDPHDPHDPHDrHHLQ uHAHyW 1HL!r H HdHGHXH@P }tʃuCHHptP уtQ1fff.D@JD9u)/HuHpH@x rHH 1HpрrV@PHDPHDPHDPHDPHDPHDPHDPHDrHHLQ uHAHqV 1HH r H HzHFHnUHHFHt]@HGHH>s]ú]MvUHAVSHItHsHvu!HC$HwHHHCHC@HCHtHKIVIvHqHIHH3@tH{H9r@uHCHK[A^]HUHHrHHLHGHHL]HrLGHGL]HtHH  DDHGH]UHAWAVSPDG I2H$Jc HIIIGLLAI1ҸAAMTAJ1ҸABM\AK1ҸACI\K1ҸDs 1ҸAA BиsADBACCʸACAAIcL31ҸL)tEH9HCJ1HAw,DH+HcHHI I J3H I H7H[A^A_]UHHOH)HOuW]H9wHO HHW8G,rHH9DG(1EDNE1M9uMJJT@Bt-HDZML9tBt,IcHTHT0r@t+HHHHHO w(HT8LLH)I9RDBJLL HI9wL)NLIM L9sDD7,HuEJtH DN Au_LFHVDJ Vff.DD7+LNHL~JTHT0DBNLM I9wfL)NLIM L9sE1HH6ArH HRH)LHʋO(HL8H HH)HO HHw]11KG,HDH+HO p @uHPHHq @sHI1H@rH HHHHG]111H=|UHAWAVATSH HdH%(HEH(H9s>IIHHH+HHHHL4DAL<HC HHLCIH{ HHLCIL9CtI@HI1HEHMLEHHLL#LH+K(HLL~SL)IIIHL)DALH}LELEHHHLHuHUHdH%(H;Eu H [A\A^A_]\ UHAWAVATSIIHIHWHHH)HHE1H9LEIO(MG IM)LL)vLIO(IWIMG ILHHH9t AH1HHMt)HH+8HLIIJ4@JH93[A\A^A_]UHAVSHHO(H LLCHCMIOIM)tEAMI9uE1I9MI)M+IIH+MHI)M~lII1L9IEL)HK(HH)HrFE1fHxgX HCHK(HHSHH)HHH9HCIDHK H)Hw[A^]UHAWAVAUATSH(MIHUH9HMAL7I]M}M9LmH}HHffffff.L)HHLL)HH9HMIIHtIULL[ MM9t HHIMHCwI]M}I9HHffff.L)HHL)HH9HMIIHtIVLLZ MHMI9HHIL9HMtHL;LmH}I]M}HH;}tzH}L7HHE1L)HHL)HH9HMIIHtK4&IULZ MIt HHIMHCHZHEL I9ttHHDL)HHL)HH9HMIIHtIVLLY MHMI9t HHIMHCL9tHL;I]M}H([A\A]A^A_]UHAWAVAUATSHhdH%(HELg1ML+wHIIE1AK/HO HHH9HBH}vuH)LgL+'LHIL)H9HU6HpIJ3I9LFHGHHEM^I9: J<T HUAHHH)HO HuTH4H]HHKHSHsH}T HLcHUHuI4$LgILgHLI7MwLwL;guHLH)vjHHHH?L,IIJMI)LL)t$LIHuLHW HuLH}LgIILKHGM]HuHULH)HHMI9̸HDHH9IIH<S J LHL)HM)II8sHkJ4L)HH sHUIIILHH4M4N(IE1CDCLCDC IL9uI9QI>H>HIH9u2HtKILH)LufDHGH;GR HEH}L蘳HUH}IL9uHG 1HHHxI)HENH:HHL9uJHEHMLuHUH}HtQ HEIIILH?H!HI4IIE1DCCLCDC IL9uI9hvLEIXHpMpHI9t2L}f.ILL!LEM;puIXL}LI8(EH}AHEMxH]IHLHUIPHMHxI)H H9tH)HHHHEH+O !Hu-MHIL}IL}IXHIXH,L;}uHMLeLH)vcHHHH?L,IIJMI)LL)t LHuLIS LHuLEMIILuKHE`HuMI)IHMI9ϸLDHI9bMIJ<N MJ IM)tH:HHL9uJHEHMLuHUH}HtN LEHuIIILH?H!HI4IIE1ɐCCLCDC IL9uI9hvL)E1LufM HEH}L趰HEH@HMHI HUH+B1HHHHH}HG IL9uM)LHHH)HG HLgNJ4H]HHKHSHsH}M HLcHUHufDI4$LgILgHLI7MwLwL;guHLH)vjHHHH?HHHHMI)MM)t$LIHuLLP HuLH}LgHMLIHGM]HuHUMI)IHMI9̸LDHI9LHJ<dL H LHL)HM)II8sHkH4L)HH sHUIIHLHH4M4LIE1CDCLCDC IL9uI9RI>H>HIH9u3dH%(H;EuHh[A\A]A^A_]M UHAWAVSPHLwLHG(LL)HHr+I>xK LsL{ILsLL)HHwHt HuHC M9t7I>(K IM9uHCHKH9tH)HHHHCH;HHt [A^A_]J [A^A_]WGHG G(HHFHAAtHHOHF1HDHHOHHOH8H /UHu7D@ DG(HJD8BL,MAuQLHAuiHGp @uHPHHq @sHI1H@rH HHIPHtJt0NBL+At<HLHL0q@t+LBHtHt(NL*HHLwHW8HTHH+HG r @uHBHJq 1HH@r H HHAHHHW]H@H UHAWAVAUATSHHHdH%(HEHG H=HHC LcL{I$Mt$LsL;{IH LH)>HHHH?HHHIIHMI)M)tLLILL LLsML{KHCLsHHsMI)HSHH)I9s(L99H HEHuHHuHH9ALEHCHELH=,N$LG HEJ (HMHMJ IHMHMG M9u+MHMlIIIHUL)IHUHMLmHULH)HHMI9ALELH=LHHEJ<%G LMJJ LHL)}H4M)IIHJF HEHuH襩LcL{I$Mt$LsL;{IH LH)HULH)HHMI9ALELH=LHHEJ<cF LMJJ LHL)H4M)IIHXHEHII9AMEI9MJ<E IJIO,tLeLmHEHELE HKLsMHEIEIEHEI9t4L}ILLaL;suL}HKoEHUfInfHnflIHUH;L;H}LuCHMHCHSHEL9tI)IIILuH;E JL8HxP уtO1f.D@JD9u)/L8uHxH@x rHH -1Hx"рrV@PHDPHDPHDPHDPHDPHDPHDPHDrHHLQ uHAHyW 1HL9r H HHGHUHAWAVATSHIIL'AuEt5II1IGH8t"H L H H1E1Iu t5IvH0IFH8t"P HH H11L9LBLN7 I9tuLLLHo [A\A^A_]H@H Rtуu>L HxJP уtL1xHD9u)/L uHxH@x rHH 1HxрrWDPHDPHDPHDPHDPHDPHDPHDPHDrHHLQ uHAHyW 1HL!r H HTHGHHH@P mtʃuCHHpdP уtQ1fff.D@JD9u)/HuHpH@x rHH 1HpрrV@PHDPHDPHDPHDPHDPHDPHDPHDrHHLQ uHAHqV 1HH r H HjHFH^UH' t H' ]H=' I9 tH0 H@@W@@(@8HX' H=Y' 9 fUHSPHH=0 H@@W@@(@8HH[]ffffff.UHHH]ÐUHSPHH& tH& HH[]H=& Hf8 HޅtֿH/ H@@W@@(@8Hr& H=s& .8 HfHH9tBUHAWAVSPH uH{IIHO/ LLHHH[A^A_]DHH9tWUHAWAVSPH uH{IIWH. LLHH % tH % HH[A^A_]H=% HIN7 LH؅tʿH. H@@W@@(@8HU% H=V% 7 LHfUHSPH uH{HH[];. H[]@UHHHQ@HtH>HtH)ˋxrHRHH[]ffff.HHxHx@UHAWAVAUATSPdHH@H9C txHK0HHLH[A^]H= IP2 LtH H= A2 L+ UHSHHdH%(HE׉}[ t,H=H HHuPEdH %(H;MuH[A^]H= I+1 LtHp H=q 1 L* fUHAVSHdH%(HE}< t.H=) HHuP3]dH%(H;Eu>H[A^]H= I0 LtH H= 0 L* fUHAWAVATSHHIH t!H H8HH@LA[A\A^A_]H= II0 LLtH k HHQH=m 0 LLUHSHdH%(HE1} t*H=  HHuP EdH %(H;Mu:H[]H= H/ HڅtH H= r/ H( UHAWAVAUATSủ}ԅ"HEHLL1,fDHuHHIIH;uII JBHuHt@t3EB ủƃDUD!1BD1@DBD1@DB\@AE1DDFt DAE1ADDF|DAE1ADDFdDAE1ADDFlDD1Bt DD1B|DD1BDAE1DUFTEE1DUFT$EE1DUFT(AE1DUFT,EE1DUFT0AE1DUFT4AE1DUFT8DUE1FTIA3AJABEb II8Iff.ADE3D3D3 D3ыA3Z33 3DE3rD3D3 D3LA3B 33 3ILЋA3J33 3IӋA3R3%3 3LA3Z3%3 3LDE3zD3%D3 D3LDE3Z D3%D3 HD3DE3r$D3%D3 D3ËA3R(3%3 3ILA3R,3%3 3ILA3Z03%3 3L‹A3J43%3 3AċA3B8L3D3 L3ADE3bA|$A<:=E\$C<2AMA< )L8L@DG @:DBAD<:DBAD DDA:DBA :BLHMt A  A A"A'Ml$ M9)AD$A<7L8ED$C</<:BA@ A:ABE\$C<A|$A<:L@EAEA:EBD_ @:DBED$C<$A|$A<:Ƀ A AA ADA:BD ؍O @:Bσ AD$A<D@ <:DBAA AA}A<:AG @:BǃD =LHGMt LMI\9LLzIL@L8LZtA A A AA\M)ǨtHJL9uC?LJLzC9HHHtH5"{>[1dH %(H;MHĨ[A\A]A^A_]LHMtIUPHQ BƄ-RHHP 1 HHHM)EWH@H5g8:HHHXH57xBHHH7H58B(!LHMI)IIIoC$PHQLHMHH]HIH%A6H LD}DuHEHPHEHVH]W)EHE D:DrHPHVHBHJHZ1LLHM:fEuEH8BLHMfEUEIIIMLHMI)IIsBmEH]>LHHMIILDILH HHHEILuLmHLo B+HH7BH}&1S DpHHHVHHHPLxWH@LA$)0H[HEHE,HPHDžXH}HuHPL VHt Hy HHEHAE1+$ LHLHMFEH}HB DHH}&1& LDpHHHVHHHPLxWH@A$t HzS LDrHPHVHBHJLzEH} MPHQH ƄQHHP 1` HHHM)EWH@H5H}C LDpHHHMHHHMLxWH@A$>Hzb L-HYHEHE,HPHDžX H}HuHPLTLA$LHHMIILDILH HHH`ILPLXHLI B#HHP 1* HHHM)EWH@H5b8H} DpHHHMHHHMLxWH@LAEt Hz0 LDrHEHMHBHJLzEt H} PyH` hHzI EE<$Et$HPHVID$IL$I\$, HH}8HP8UHAWAVSPWHGHSуHs 1III1LpIEGIA@BAXBHLA@BEXGIA@F4MIA@FA@BLLIIuHtE1LppGGLIL9uH9u5Hv^HBIHIH1E1E1ARAS HHXHtHIHI;LLHHt1t:HG:Ht!HGIHH LAADHH[A^A_]HH1LoL lCfD\EAA0DXEAAA0DXAA0DPHHH9tDGAtAuDHDEZAwOcMAf\tHA\uf\\Hf\'Hf\rHf\nHf\"HvUHSPHE1 HH[]UHAWAVAUATSPDEЉMIWHGHpIIE11fDC\5Kw"HkHc HLH5ffD\u LH5ɧnOMt]L =LH5q,LH5,x LH5LH5qf. 1IM9]K^wtH o<y}tALH5  H r4L0 Hr4L LH5KVDG H r4L H pr4L HZr4LH[A\A]A^A_]ffffff.UHSPH1A*HH[]UHSPHAHH[]ffff.UHH j]fUHAWAVAUATSH8IIIIHHH@DHtIMHH)v L~u IE AMIEI]IEAEHtIMHMHHEI1Lm1E1E1AIHtUANHtKAVHtAAA  A ANA4 xIIL8LMIT$1MLffffff.9H@E EyJIAHHIw ffff.HAHHH I4H9@EExAE I)ILA>I@EEyHAKHHw=AE A>I@EEyHAuAE IDLMIIHHMH9cfHMfBD B4 1LMI3I1L l1E11HEMH]AHtRANHtHAVHt>AA  A ANA4 xIIMH]!ICMc1LMffffff.A9@EEy%HIADHHIHw0IfDA9@EEy.HQAI HIHIHHwvI)IH]DA>@EIEyEHHAdIHHwSff.IAA>I@EExMHE1Is#E11HE1LL SjMLEMfLWE1ffffff.M9FAA4yIIIAuH] HELHӃtGELmIFLH9DDEC /yiHIIC)uILHljH]HEL@1HEйH}$I9HEHM@tt1 HEЃMI)ՃtEEI^If.L9lFDAA4y0IIIAu`IHHMLm@ HEЃM)HEЃLmH]t.I<EBAAy$IIA u(ILH IMLM)IMN4IΉA.%1MHUHUEIL1IMH]III)IƸHMHMDLILHNjEHELmH]IE1I)H׋EHEMILmH]A.A=M$II)IƸH]-IHIH¸IHMHMDHE@Ex@=t.tLE@.t =IILEADH bHcHH}kHEHxHEH}tSH9HEHM@49HEHxHEH}t!H9HMHUȈD @4MtB1H5of1ff.HI9tA=t.tuWt;MuNAEtIMH9u ?AEHEIEI}82H eAEu fAEIEIE1H8[A\A]A^A_]IĸfIHHMEHEЉJHIMUHUЉMIILHIMA.9LyH5=gH 1,UHH b]offffff.UHLe]Jf.UHL1]-ffff.UHSPHWHGLHHHڹHH[]DUHSPHWHGL"HHH1HH[]UHAWAVSPIWHGHsLH[A^A_]HIHLH-IF1Hb1fffff.AL9u9"u2f.v & Īf.v n[A^]H={AoUHAWAVATSH>HIIH{6AHLL6HvLL6H@LLb6H_ LLC6uoH?LL(6uQHLL 6u6H%LL5uHALL5tE1D#AD[A\A^A_]HLLL5E1tH5ZH L0>L %{s1 UHHkgH i00f]ff.ds*1Ƀ UHSPHHHH[]ø'v$?Bv,Hi҃CH2rԁ뛁UH]fDHHAds41A @UHSPHyA-H:HH[]ú'v&?Bv.AMiCI2Drҁ둁ff.UHy H-Hlj]AHHds-1H UHSPAJBHHH[]AI4ׂCHH'v%H?Bv-AHIHHHrHHvfffff.UH]ffDHII?IHHHAHHds71H UHSPAKCHyA@-HHH[]AI4ׂCHH'v%H?Bv-AHIHHHrHHlUHHy H-HHHHH]{ff.UHH FsUYHH-i')i(k kg i0000PHHiɉ;UH9i)Hi׻H(i')H H Hi(HHH!Hk֜HHHHkgH HH!HHHHH00000000HHHPHσ r;i{kd)kg i@0@x@0@pHυt @0@xH]ffff.UHSH HH7I00000000HIIHƤ~Iaw̫H9rtHIHHiH)HiǻH(i')H H Hi(HL!Hk؜HHHHkgH L!HHHHL HHFHHHIHHiII)IiûH(i'A)I I Ii(HL!LkМMIIIkgH L!HLHL HHFHH HHrNYHH-i')i(kȜ kg %i0000FHHH r4i{kd)kg i0N 0FHHHt @0@~HH[]fff.UH@0@(@ @ds1]ùUH@ @ds]ø]ÐUHffHfcwf 1]='w =ݹf.UHfcwf ]='w =]ff.UHH׹ds51҃ 1]Á?Bv2HiCH2ŕ'wց뽁ffffff.UHds01Ƀ ]Á?Bv2HiуCH2rҁ'wցffff.UHHHHHϾHdsH1H H?1]I4ׂCH'v$H?Bv,HIHHHrHHfUHHdsH1H ]H4ׂCH'v(H?Bv0ƃHHHHHrHHUHHHHHϾHdsH1H H?1]I4ׂCH'v$H?Bv,HIHHHrHHfUHHdsH1H ]H4ׂCH'v(H?Bv0ƃHHHHHrHHUHAWAVAUATSH(dH%(HEf.fWf.u&z$fH~HHyHG-f0H)Hf.L-fWf.f.i1f.2Yf(fTfUfV f./Yf(fTFfUfVH f.Yf(fTfUfVFȍA f.Yf(fTFfUfVH ڝf.Yf(fTfUfVFȍA ǜf.Yf(fTFfUfVHf.Hf(Yf(fTfUFfVDaf.Yf(fTfUDFfVkf.AIf.>'AinfI)IL Z1f.!Yf(fTfUfVH Mf.f(Yf(fTfUBfV f.эH@f(fTYAfUfVBf.f(fTY Q fUBfV /f.эBf(fTYfUfVBКf.f(fTY HfUBfVf.ӍAf(fTYufUfVB zf.f(fTYD`fUDBfVf. rA4YY H,H\ H?L,I!I AHHwdH}IH}h  КYH,HH?\H,H!H HF4mIA|$At$L IH1+IIhIID)H H11L9L9H}uH9H9t@@~AL9uH9u DDA1A@BAEA YHH-DiAkg DiAAIiQH%Dk֜ELkgI AiMkgI AiDDƁ00DAA00I0AA00EI IM L]DUfA0.E AA [HEH `KJcHAG0IAG0IAG0IHfAOAGI fff.IGA0ItEAAG.EAGEAGILff.HHȃ0tHfAOAIuA00EAD@<0IQEfAAG.EAGILfHHȃ0t>HAOfAAG.EfAGEAGILfDHHȃ0t.H00AEAGEAGIAGIσ0t.uIAeҀ+HAWHdrMHiQH%kќЀ0AOI2HAOA@0u8I?AG.EG@0"ILM-ALLtLHH)H>0F xIHW)p)`)PHPAmHxHB`5HPHXHσ )9OsE1oW)@)0) H DiHHH0H HL8D(MAA4HEVEijRAH ;LHJ$HAHH?MRHLI? W)@)0) H lHHH0IW)E)E)E)E)E)p)`)PH8Ht8H@HH)HALLH9tHPL BƄ5PHP EtWLLEu e}sA E>W>H Ht5(Hσ )9OхE1EuW13E_W_AA5HHH?IIEVH 3JLJ$HIEijRADЩHLDHV^LJ$HIDHAAL~LH?H IM$ÇMI1L?HE1@LDIIHAE1MIE1@MEMEIM@MEIMEI!M9LHI313HE1@LDOIIJAE1MIE1@MEMEIM@MEIMEI!M9LHIM1M u DIHHyH E1I9AIEAHayAEA=t=ayu#EuIWLEu M=S3EI?3H4II!1H9HFLL H"ILzHHH?M1M u DIHHqH E1I9AIEAHayAEAayuEuWVEI?3H4II!1H9HFLL H;A@H1HAD)HH1@HEALDAAHLH?H MIHuADɃHL ED)?HcHH)DȃLHH6HHHHHH4HH!H HH EHDH1H@HHDAsE1HAAA A)@I31H)HH1@HEALDAA D)A9AOA΅~ vBE1HE1@LD@H LDI LLHE1IM@LEHH@IMEIIDHAE1MII@MEMEI!L!M9LHsI%LL1HL1H @D@σIHHqH 1I9@IEAHayAEAEu "dH %(H;Mu4H[A\A]A^A_]IIM1L1L @\x UHAWAVAUATSHHHIdH%(HEE1H9tA>-ALLtLHH)H>0F xIHW)p)`)PHPAcHxHB`5HPHXHσ)k)9OsE1oW)@)0) H D_HHH0H HL8D(MAA4HEVEijRAH ;BHJ$HAHH?M=HLI?W)@)0) H bHHH0IW)E)E)E)E)E)p)`)PH8Ht8H@HH)HALLH9tHPL BƄ5PHP EtWLLzEu }sA E>W>H Ht5(Hσ)k)9OхE1EuW1E_W_AA'HHH?IIEVH 9@LJ$HIEijRAHHHHL!H\TLJ$IIILHH?H!LHH L~LH?H&IMMI1LHE1@LDIIHAE1MIE1@MEMEIM@MEIMEI!M9LHI1HE1@LD:IIJAE1MIE1@MEMEIM@MEIMEI!M9LHIM1M u DI1I¹IEHɸayEƒh=t=ayu#EuKWNEu 8?65AKAA1ҁBD "IL?HHH?M1M u DI1I@ǹIEHɾayEhayuEuWaAKAA1ҁBD Ɖ3A@H1HAD)HH1@HEALDAA"ILH?H&MIMH?uI!DɃHL hD)HcHH)DȃLH=HHH,% ED‰1H@HHDAsE1HAAA(A)@I31H)HH1@HEALDAA'kD)A9AOA΅~ vBE1HE1@LD@H LDI;LLHE1IM@LEHH@IMEIIDHAE1MII@MEMEI!L!M9LHsI!LL1HL1H u@D@σI1I¹IEAHɾayAEAiBEu "dH %(H;Mu4H[A\A]A^A_]IIM1L1L @oȼ UHAWAVAUATSHHAIdH%(HELtTL10 LH޺@GO$?N4}AELxxHtAKLH HI H,H1輽 D$D(E9D)H 0G LH TH1Ae D DHLDuI DeD EASSABL LeLDLAZQrCD+HtDzFt9ىOѾLf.HE9GB|E1A9|Dt9|D MAEDAu11VD11fD$(LID$I HLƉ(HH HH9uAt$HHЉ$HH HASH tB$AE ;})H +HtELt 9щOAHE9GABu)~#E1A9|Dt9|D 1x u AD1dH %(H;MuH[A\A]A^A_]/ H9uUHHHH]1ffffff.H9s1UHAWAVATSHIIIIfffff.LLHrtIIOI9Is[A\A^A_]H9s1UHHH9]fUHAVSHIHdH%(HEH$H5# 8u'MtEHL" HH)H iMEEH}蛺 MHt1BIH1fff.:T5tHH9uHL9uLL9H)H dH %(H;Mu H[A^]舷 H)s1UHHHH&]ffffff.UHIHHH9HB1HrR@I4H34uaHqHH9HrILH3LtPHH?fff.A<f3<Hu*HqH9vH9t41A:4HH]@HH]ff.H9ILBMt@UHH7L)HHH1ff.x@:9uHHHI9uLH]11UHAWAVAUATSPIIHWHGLnHrLtHJtLcLcMuLcMoMtI7LLu IVHtMI6L^ HH[A\A]A^A_]fff.UHAWAVAUATSPIIHWHGLvHrLHMHqt HJMut LcMu LcMtIuLLҳ MMwMtI7LL踳 HEHPHtMH0L蝳 HH[A\A]A^A_]ff.UHAWAVAUATSHIIHWHGL~HrLHMHqLEIpt HSIM~t LcMu LcMtI6LL MMuMtIuLL MHELpML}tH0LL˲ IWHtMI7L贲 HH[A\A]A^A_]fUH]HfDUHAWAVSPIHHHHHWHGAHdsnH1H ALH?DtHWHHKHC111҉@HH@HEHEHLDHH[A^A_]H4ׂCH'v)H?Bv4AHHHHH[HNHUHAWAVSPIHWHGAHHdsb1H AtDHjGHKHC111҉@HH@HEHEHLD/HH[A^A_]H4ׂCLH'v)H?Bv4AHHHHHaHTH@f.UHAWAVSPAHHWHGAd1 ADDtHZFHKHC111҉@HH@HEHEHDDOHH[A^A_]=?Bv7AHiɃCH2=r='w=a=UHAWAVSPAHWHGAdsa1 AtDH}EHKHC111҉@HH@HEHEHDDHH[A^A_]D='v'=?Bv2AHiɃCH2=p=d=Rffffff.UHAWAVSPIHHHHHWHGAHdsnH1H ALH?DtHwDHKHC111҉@HH@HEHEHLD HH[A^A_]H4ׂCH'v)H?Bv4AHHHHH[HNHUHAWAVSPIHWHGAHHdsb1H AtDHCHKHC111҉@HH@HEHEHLDOHH[A^A_]H4ׂCLH'v)H?Bv4AHHHHHaHTH@f.UHAWAVSPIHHHHHWHGAHdsnH1H ALH?DtHwBHKHC111҉@HH@HEHEHLD HH[A^A_]H4ׂCH'v)H?Bv4AHHHHH[HNHUHAWAVSPIHWHGAHHdsb1H AtDHAHKHC111҉@HH@HEHEHLDOHH[A^A_]H4ׂCLH'v)H?Bv4AHHHHHaHTH@f.UHAVSHEHWHG@LsLtH{EZ tHSH9r%u LsHCAHH[A^]H*UHAVSHEHWHG@LsLtH{Ek tHSH9r%u LsHCAHH[A^]H*UHAWAVSPHIHHHHAHdsRH1H AHH?DLi?A6@t IvMvILHDH[A^A_]H4ׂCH'v)H?Bv4AHHHHHwHjHf.UHAWAVSPHIAHHdsH1H ALL>A7@t IwMILHDH[A^A_]iAH4ׂCHH'v)H?Bv4AHHHHHuHhHTfff.UHAWAVSPIHAdsn1 AljDL=A6@t IvMvILDH[A^A_]鲻=?Bv0AHiɃCH2=r='w==ffffff.UHAWAVSPIAdsF1 ALLA='v#=?Bv+AHiɃCH2=r==off.UHAWAVSPHIHHHHAHdsRH1H AHH?DL) LH[A\A]A^A_]f.UHAWAVATSHID'LwHv6HSHt AtM [A\A^A_]IDALDMH3L[A\A^A_]麟 f.UHAWAVATSHIID'AtMfAHsIwL5AtMvIMMgMtI7LLP HSHtMH3L[A\A^A_]1 [A\A^A_]UHAWAVAUATSPHIIID/AtMnAIwIt$HsL4AtMvIMMl$MtI4$LL賞 MMgMtI7LL虞 HSHtMH3LH[A\A]A^A_]t H[A\A]A^A_]DUHAWAVAUATSPIIIItI^It$IuIwLEIpL94AtMvIII]HtIuLH II\$HtI4$LHӝ IMgMtI7LL蹝 HEHPHtMH0LH[A\A]A^A_]鐝 H[A\A]A^A_]UHAWAVAUATSHHHIdH%(HELjL9*HhwJ?HX1H`E1H@L88f.IEML@MjIMjL8M9*$IEHL)sMu_LL)r=LhH9HBLMHMAEpHHHuHpIEMuLH`H`IMM9UMMMHhN<HXL)L9(I]Tff.L)HLTf HIHHLܞ tIHXL)L9}L;XL+hMf.M}IL@MjLI+ HHHHHL8H H ͰIHt 1H;t 0@Ht E1H;t HADDAu_H4 H(H~ H}N)M)EH| H~  L NHuHt (E(ML  HHsMjE11H`MM)LHI$HHHA$HEtI\$LhMHH)L9s$LH)HLHIE1ARAV HAM9t<tM|$M|$I<LL LA$u A$I\$AdH%(H;EuH`HĨ[A\A]A^A_]< fff.UHAWAVATSH0IIHdH%(HEHMLEW)EHELeHMLWHCHL LLLHH}Ht H}x dH%(H;EuHH0[A\A^A_]脚 @UHH dH%(HEH}HuH}H'dH %(H;MuH ]= UHAWAVSHhHdH%(HEW)EHELvHvIDHHESH}H}H;}tW)EHE3@tHsE1Hu~L}Lۨ LEsHEsHULLAHCHE)EHEHC(E(E)EHEHEEt H} H}Ht H} dH%(H;EuDHh[A^A_] UHAWAVAUATSHxHpdH%(HEWH}HGLiM;HUHfffffffI9FHJL HIHEHHHLxL3IMLEHpLHhHxHMLmfLELmI M9MfMM9uID+LLpfDL)HtLD HtIHHLg tIHhL)L9}|L;hL+pILmt LE[HEH@LEI9Hxs%AFIM`A@Mx I(LLE I)MIHLI}HfffffffH9H)HHH H9HGH3333333H9HCHt9LEH9HH`H<膔 H`HxLE1J LL,HHHUAFHLdDL| HH(M)LL HxHEL(HXHMHHHt# LELmHEHXHHH)HHHHHHxH H HfHQ1H;Q0@HQ 1H;QH@D@uQHQ(Hr HuJ)M)EHq Hr IJHUHQ (E(MIHHuH]IdH%(H;EuHEHx[A\A]A^A_]H}Q LUHAWAVATSIHsoHHsAIHu?HHHHIIILDILI蛒 LIFIM>I^ILH+ A[A\A^A_]LDUHAWAVAUATSPIHIIt@MuItBtPMmMtOLIL9rHEILL)L9E1AAIuHL)vSt%Mm"IMuHMIE1IAuML HL)Hʀu IE1H9IE1H9AA]ffff.L)HML蚗 H:IHLL t'ILL)L9}Le1Mt%LeE1#M9L+}ILLeILLH[A\A]A^A_]H=S{AUHHH9vLH1I> fHH9t/D I wMsH9H@@ H@ׄHE]1]UHAVSIHH)v"7HH萖 HH)Ht1L9wILH[A^]H='S{UHAWAVATSIHsoHHsAIHu?HHHHIIILDILI諏 LIFIM>I^ILH; A[A\A^A_]LDH7LGAA@LEGLEOMt5H4H9v:UHH19E1DC:<t&IM9uHH9u2HtHH11HH9HH)H ЄHEH]ÐH7H~UHH5H ZWL^kL B1VfDUHH9HBLH?IHH)H9IG1]UHWG]UHHH 1HHDщwGHG ]UHSPWGHtWH1fDAA w HH9uHAH9Du$HH¹'%~H߉H[]H[]UHAWAVAUATSPAIHIHcH~IH1o AL9s;0u HL9uLE1L9D)IE1fA0uAINL9IrAIEu1@ALA1EtDr9NCA?1ɉ11HL9t E@…t%EHcH=3<ƃuE1E1Q1AAE1E1@GdGLLMGdI LMGLMI IM9u@tGLLMGLMI EI t ETAtB1fffff.AL@ǃ1@t@r9NBAL9 <u LH)H߾.? HDEA)EDH[A\A]A^A_]UH&]̃ UHAVS sfQх~ƒuE1E1[ffffff.AAE1E1FTB\MisHMFtI LiӕsHMFTMI IM9utFTMiҕsHMFTMI [I QDDC]HRJH1 1E1H)1uE1E1MAAE1E1B\FDHLB\H LIFDMI IM9utFDLMFDMI I t DT[A^]0AA1E1DTTLMDTI HL҉TII HI9ųtTHL‰TII H t DDUHAWAVSPAHIHcH~I~H1F AHsHtHS LDC % ANSAVu tACH[A^A_]UHSPHHcH~H{H1Њ H[]UHAWAVSP*r'~HGHHH1苊 HڃL‰AADLAA9~WDLMcMM)FFTEFTMQM)~,FFTEFMQM)~FFTEFTDOAFL}RHcȃ|tHBHH HH)Ht%HOHHH)IHAH? DL s H[A^A_]HH1H[A^A_]Z UHAVSHtu+1E1mHGH1IH1 L|AA1E1fDDTLLMDTI HLщLII HI9utLHLLII H tDD1ۉ[A^]UHAWAVATSHHdH%(HEHH HuD7t?ExJAAEBLef.HDLEArMt dH%(H;EH[A\A^A_]ÅtE~׉Au)11oEH{IE11L蝇 D;D11ff.DDtLIDDI HLƉtHH HH9uAttHHЉDHH HA1H 'BTE~D;E1D; H UHAWAVAUATSPAD'C4|=HIADBAfff.LDHDE|ArH[A\A]A^A_]UHS fffff.N~uE11LAAE11ҐFLF\IiٕsHHB\H Mi˕sHIFLLH IM9utFLMiɕsHIFLLH kI aTTHH.* u1E1CAA1E1DTTLMDTI HL҉TII HI9ųtTHL‰TII H t DD[]UHAWAVAUATSH(IWGIGHEȱLfffff.BH )H4D4IԍH}x LE71kÃHiKh/H )CuE'E,TA|MH]LD<BHUH .)L4ADCAfHDLDEArIH]MHUS ADK~u11MAA11fA|ELLiוsHIETI IisHLA|HH HI9utA|HisHHA|HH kH aATARمAHH' u11SAA11ffffff.ELATLIELI HLATHH HI9uʨtATHHATHH H t A|ALH([A\A]A^A_]UH1x 97~D]UHAWAVSH(HdH%(HEFE)EWHGD}EI'ffffff.Aǀ0H- E~yDH1H DH HIHTHHH)HHwLc}M~.B|uLAWUIvLx|t110H诎 HS$HDHtt)HCHKHu%OH߾0v  $uHCHt,HHH9v!HfP1@pHHPH9HrdH%(H;EuHH([A^A_]~ UH]UHHG]UHt:Hc1Tȉ„uƋ9NLƉ]D9AMD)A9AEUHSLcLcIE11LDTBLIHII IډHs I[M9I|APAMMI HcDTsCAuCApEwAfHcDLAƒE1EtXEу|NIcDEtAA:Apffffff.HcDLAƒE1EtEу|ApDD9DOAAL7IcLHtD9[]UHHHH HcTs3u3FUHLƒ1Ʉt7у|.t.(ƉHLƒ1Ʉtу|Ɖ9OȃL]UHT1]} UHAVSHIHH 1HHDH H1} I^[A^]UHAWAVSPIIHT1} Mt[K>1fDA Ѐ w HI9uMI9u(HL*)~H߉H[A^A_]H[A^A_]UHAWAVAUATSPAIHIHcH~IH1| AL9s;0u HL9uLE1L9D)IE1fA0uAINL9IrAIEu1@ALA1EtDTr9NTTCA?1ɉ11HL9t E@…t%EHcH==<ƃuE1E1Q1AAE1E1@GdGLLMGdI LMGLMI IM9u@tGLLMGLMI SEI t ETAtB1fffff.AL@ǃ1@t@Tr9NTTBAL9 <u LH)H߾.{ HDEA)EDH[A\A]A^A_]UH)]̃ UHAVS sfQх~ƒuE1E1[ffffff.AAE1E1FTB\MisHMFtI LiӕsHMFTMI IM9utFTMiҕsHMFTMI S[I QDDC]HRJH 1E1HuE1E1MAAE1E1B\FDHLB\H LIFDMI IM9utFDLMFDMI SI t DT[A^]AA1E1DTTLMDTI HL҉TII HI9ųtTHL‰TII SH t DDUHAWAVSPAHIHcH~I~H1v AHsHtHS LDC % ANSAVu tACH[A^A_]UHAWAVSPR r'~HGHHH1+v H!ڃTTL‰SASASDLAA9~SDLMcEA)MAtMM)FFLEFLMJIM9tBFIM)GGtEFtG\G4EF4IM9DOAFLT}RHcȃ|tHBHH HH)Ht%HOHHH)IHAHt DL s H[A^A_]HH1H[A^A_]t UHAVSHtu+1E1mHGH1IH1zt L|AA1E1fDDTLLMDTI HLщLII HI9utLHLLII SH tDD1ۉ[A^]UHAWAVATSHHdH%(HEHH HuD7t?ExJASASEBLef.HDLEArMt dH%(H;EH[A\A^A_]ÅtE~׉Au)11oEH{IE11Ls D;D11ff.DDtLIDDI HLƉtHH HH9uAttHHЉDHH HAS1H 'BTE~D;E1D; p UHAWAVAUATSPAD'C4|=HIUAUDBAfff.LDHDE|ArH[A\A]A^A_]UHS fffff.N~uE11LAAE11ҐFLF\IiٕsHHB\H Mi˕sHIFLLH IM9utFLMiɕsHIFLLH SkI aTTHH u1E1CAA1E1DTTLMDTI HL҉TII HI9ųtTHL‰TII SH t DD[]UHAWAVAUATSH(IT1^p LaIGHEȱJfff.BH 3H4D4IԍH}l LE71kÃHiKh/H )CuE'E,TA|MH]LD<BHUH L4AUUDCAfHDLDEArIH]MHUS ADK~u11MAA11fA|ELLiוsHIETI IisHLA|HH HI9utA|HisHHA|HH SkH aATARمAHH  u11SAA11ffffff.ELATLIELI HLATHH HI9uʨtATHHATHH SH t A|ALH([A\A]A^A_]UHAWAVSHhHdH%(HEHTj WHCDEI!DAǀ0Hz EDH1fff.H H HIHHHH)HHwLcM~BBuLAWIiLxtV110H"z HS$HDHtt)HCHKHu%RH߾0y  $uHCHt/HHH9v$HDP1@pHHPH9HrdH%(H;EuHHh[A^A_]Oj UHt:SHc1TSȉ„uƋ9NTTLƉ]D9AMD)A9AEUHSLcLcIE11LDTBLIHII IډHs I[M9I|APARMMI HcDTsCAuCApAPvAHcDLAƒE1EtXEуT|NIcDEtAUAQ:Apffffff.HcDLAƒE1EtEуT|ApDD9DOATTAL7IcLHtD9[]UHHTHH HcTs3u3FQUHLƒ1Ʉt7уT|.t.UR(ƉHLƒ1ɄtуT|Ɖ9OȃTTL]UHAWAVSPII)I1%LIc LIHHH5vGKu^LAGIr H{H5!LHHY(#HIIHH5FJt 1H[A^A_]AGI~I(H9sހ?(uLHHH9sLHHH9tDEAAArA_tEHA vA)uHQHy HHy({HGfUH H9AAHuH80u HH9uHII)L@ I LLL9s,MI)DEZA w A JHIuLH9s!H)L0DAA w HHuL )]UHAWAVAUATSHHWG GH9\AIIHHH=MM9s Mff.A<$0u IM9uMM9tLL>0uHHL9uLLLH)HVHILH9sBH)H 1f.>DGA wHHGHHuHHL9r 1H1L9sLH)H<1fffff.DEHA wA0ADHHuHL)AHcID^)1DLMM9A<$.IHELMI)LcJ<M9INH9s6II)ff.DEQA wAHIAHIuHL9sOEMI)J<H}1DEZA wA0ADHIuHuD@ADEL)9MցJHcIA)LL)5Hu A}.$tLkLc HC AAM9A$$0HL9uIEMIL$<+LDLLIHcL}u t AD]D]u LAtCD[ 1H;AEÉCHK(HH[A\A]A^A_]MLIUHAWAVAUATSPHWG GH9XAIIHHH!9MM9s#Mfffff.A<$0u IM9uMM9tLL>0uHHL9uLLLH)HVHILH9s?H)H 1H=sDM8MxHLHHuHHL9r 1H1L9sLH)H<1L*f.DC<xA0ADHHuHL)=HcID^)1DLMM9A<$.IH=LLH)LcNL9MNH=L9s-MI)DM:MxHLHIuLL9sOD]MI)NE1f.DA<;xA0AEHIuLADD]L)9MցHHcIA)LL)3Hu A}."H HC AAM9A$$0HL9uIEMIL$<+LDLLIzHcL}u t AD]D]u LAt CAD[ 1H;AEÉCHK(HH[A\A]A^A_]MLIUHAVSHH&AHEHEw HxHLuHuLH HE HH5B$LLp UHSPf.G u(H_[ CH߃tHC(HtH[]Httu[][]i[]g[][ UHAVSHLwLoL)L3HC[A^]UHAWAVAUATSHHEHuNIHcHUDJE1LMHMH}'MEHCIAMHH M$IcAt]AuEt$E|$LL)HEMο@MZ LI@A|$ F @El$H9<LEDULMEt$M<$@1Z @L8AD$ AL$AT$,@ HP,AL$HLIM9uHXC 1LI>rH HvHH<<HBH1I>=H^VH5HCHIHHE HuLLI~ILL t HB t H0B dH%(H;EH[A\A]A^A_]1LI>rH HvHH<<HBH1I>=HUH5+HCHIHHE HuH;BH}t D ff.UHI1]AUHI111]-ffff.UHAWAVAUATSHIW HOGDwJ4HHuЃQD8AHPL:A HI9I|Gt HG(uO sHGuƋHt HH)uIx rH@ D8bLeL$#MtL@ IL;e,M4$AFt IF(uEnA^A8tAHPLh@ IL9tK|.Gt HG(uO sHGuʋHt HH)uIx rH@ D8MH"ff.MtL? HH;]tL3AFt IF(uEfEnE8tAAf.IM9tK|&GtHG(tLeMtLH[A\A]A^A_]Z? MMuH[A\A]A^A_]UHAWAVAUATSH8dH%(HEHtV usW DIOHH5/H {L-1ۿH5 H {L1ۿgH5H r{Lt81ۿDH5H O{L1ۿ!H5H ,{L}1ۿ1rdH%(H;EMH8[A\A]A^A_]A8v#H5OH {Lb1ۿA8u+1H;@4 tDBF D1 IIM9tN\M@A[ tTA[ A9tkwu I[{ wH5H !{L1ۿIH}H>EtWH]UA8L$ADIJ|?DÄIGM9ILuH]I6H}яEtLULUH5H qDLdBL VV1ARSqHEt H}^< Et H}O< 1H5H {Lv1ۿH5dH {L!1ۿH5>H {L1ۿ> UHAWAVAUATSH(W HH)GHDHH9IHUEf1f.KLIHII)r@C; @HHAF ANAV,@ HP,AF]UHHpdH%(HEDG W)E)E)E)E)E)EHMt811Hfff.DHJDDHAHD͐HI9uHLLAI1ҸEQ 1ҸAA BиsADBACCʸACAAMcI 1ҸI)I9IBHIH7I H MtxDAs1B1gA1ffffff.H|ՐH7H|՘H7H|ՠH7H|ըH7HI9uHt!HՐH1DLI0HH9uHdH %(H;MuHp]t' @UHH dH%(HE t$HEHEHEHHu1H}dH%(H;Eu HH ]' @UHH dH%(HEH}~ uDG :F s"dH%(H;EucHHHH ]v$dH%(H;EuAH ]HEHEHN HEHH0HEdH %(H;MuH ]q& UHH dH%(HEH}~ uDF :G sdH%(H;EucH ]#dH%(H;EuJHHHH ]HEHEH^ HEHH0HEdH %(H;MuH ]% UH]fDUH]&fDUHAWAVAUATSH8IUtFEEH^~ DvHuD~t2E8t2AADefDJ43LDIM9uE8uB}H}t)}tHtH8[A\A]A^A_]" oH8[A\A]A^A_]HEJHHEJIEHELmffff.HH;UtHUL:}uAGIMuAVAFHH)Hr&@," @A u>AO 7DH{BAFM|ILHs%E1$@1IH@ H f@LxIEMmAMIGff.AO IH@ H f@LxHMJMJLIIIHt4AUAMH)Hrn@D! @A t1f@I#! @IIHAN @ H f@LpLxLmKD,fff.HBAEM|I]LmHUKDH>HMJ fDHHHHuAFAN8IVH<H߾H ΃IHt H HA@HV H@{ uQ 1H1H0@ P f@HHI[A\A^A_]HA1ffff.UHAWAVATSHH9tnHt,~ u1IHVILfAD$nuHntP12III  HW)@L0@ LxL`HH8H H[A\A^A_]H荾1fUHAWAVATSHH9tnHt,~ u1IHVILfAD$nuH>ntP12III  HW)@L0@ LxL`HH8$H H[A\A^A_]Hݽ1UHAWAVAUATSHIHdH%(HEDw HHPW)E)E)E)E)p)`HE1IEtDA@u4LhHA@MDI9uAH1D`AH1Љ`D9ADD)L҃tLhHEHODHuI)Iw[ffff.LhAxMDLpAxMDLxAxMDLDՀHAxMDH9uEhExLL)Hr&@" I@A|$ uzAD$ rE1D9HXIHH@L @HHHC KS,@ HP,SHA@HA@A@Hw$I ȺfHtItHH9sA@A@MdHPIH`HDHP舻dH %(H;MuHĘ[A\A]A^A_] UHAWAVAUATSHHdH%(HEDw L&W)E)E)E)E)p)`HE1IEt>A@u4LhHA@MDI9uAH1D`AH1Љ`D9ADD)L҃tLhHEHODHuI)Iw[ffff.LhAxMDLpAxMDLxAxMDLDՀHAxMDH9uEhExLL)Hr0@I LI@A B E1D9HXLPHHM @L @L C KS,@ HP,SIHHLPHX[1H IA@ A@ fA@IPAXAALPHX@JT;BIM9uIHHAHA@HuLHAHItM H`HDL蝽dH %(H;MHĘ[A\A]A^A_]HH)A@A@8tIPE1Hr><HH r.AAINlL#IGHhIIIIoJ`EHc`DSI9~\L IBHDsHH@L0 @HHC KS,@ HP,KH8~uHHIE ELHI)H}HEuE1~Ht1x u5HpHuH@+x IHpIL`@I>HuHEȿ IW)@L @ HEIGHE@IG@ H@E1A uAG IHG G fGLHEHAHufDH90H)IAFI9McSfffff.O HuHH@ H f@HxAHNtFl&IAFII9u;Fl&HcI9}WHtDI@ L@A~ t1KLHH9?HAJLIH'DHto|1`1YIIIĿ B HW)@L0@ LxAD$L`HEH}DIffffff.1LHHDlHHLDiNdHtHEHMȿ@LuI L@@ P f@HHMJDADyI;$s IE14HMJ HE1AD$NdI+$N$II;$sMIHuH}HE>IIHtL1HDH}HuKDIE~DnHHH([A\A]A^A_]1fffff.HtHH9u @1UHAWAVSPx uIHpHLx IHIǿ W)@L0@ HXAGLxH[A^A_]UHAWAVAUATSPIHHH u,HVLvFu HIIM/ MLL1AFHF(tLHH[A\A]A^A_]AHIIMfDUHAWAVAUATSPIHHH u,HVLvFu HIIM MLL1AFHF(tLHH[A\A]A^A_]AHIIMhUHAWAVATSHHtI uAFuM~[E1MMfAD$IF(tS I@IILsMtI$1IAF MfLHL[A\A^A_]L軕fUHSPHHGHthtH{HH[]p HxUHAWAVSPdH%HHfffdH%L5R ~:tfHHt!UHSPHHH HH[]ff.UHHt \t]1]ËX]ffffff.Ht$UHSPHHPHtHPHH1HHHHHHHHHH0 HH[]fDUHAWAVAUATSPAHIIE11H HIHG IF WAF(IF8Mf@I~H@+HIHHt#IHHPHt$LPHH&IdžPEX@HsHLHHHH\ I$IPEXD\EuDXE\WA`ApAAAAAAAAAAIdž -I(A0DI`HtE1ff.HH`HtI`HtHhHtIhH[A\A]A^A_]ff.Hٍ HHHG@HhUHSPHHIHH[]{ff.UHSPHHH[] @UHAVSIH8I`[A^]fff.UHAVSHLw@H89Mt[A^]H[A^]UHHHHGH]ffffff.UHHPHH]fff.UHAWAVSH(IHdH%(HEP1 IXHI`HIhHIpHIxHIHIHIHIHIHIHIHIHIHIHIHIHIHIHIHIH IH(IH0IH8IH@I HHM~8LMv@MAFL!IHH]fWfEAFHCHMHcA~ u{HEH@H@xHE fHnfbnf\vf(fX^XEEIvHt/FHH&dH%(H;ELswL11M~H}Htx u H}HTHEHMHHEH,H\3H?H,H!H HpA/tdH%(H;Eu'HH([A^A_]dH%(H;Eu Le~ fffff.HJUHHv @~:f( B¡f.H7HFH7HFXH fHnfbf\f(fXf(^XYYH@H:pHcHp @t@H@ADBA DB@CAECDƹACAHHAHHA EIc(Ar<0Ar.8Ar @ArHAH?HH1@ulH7HFH7HFPH0H(H1fHn~ fbf(f\f(fXfHnfbf\f(fX^XYY11]H뢐UHAWAVAUATSPIIHH@HH@pHG@HUfHnfbXf\`f(fX$^XGG~ ^DntKD8tKA+ffff.FHcHUL[HI9tItHu1D8uH[A\A]A^A_]MgA$FHcHULL8HI9tItHu1UHAVSHH߆ HH@wWGf t:@t0L5N LNIFHtHCHXI^L[A^]}[A^]H=! Ay Dt WH H= _ Df.UHAWAVAUATSHHHdH%(HEH HH {H= W)EHEH}yHCLsHH]t-Lp1AMHEH@IFIH}MH1"@L3HIL}MvLMA~uHEH9rHuH)IIIOH9 H)III9wIHH9LCMHutI9 J<E IE1JLHEOtO|LLeLH LmHEHEMtL H$E1HEH@H}HGrH]L9t"I @IM9tI>HtHPHtH dH%(H;EuTHH[A\A]A^A_]H}SaH=   U WH H= }_ffff.UHSPHHH[]$ @tUH< tH) H@H]H= y tݿ WH H= b UHAVSHt tttH[A^]`[A^]H  H H@HtIHHCHtIFLpLsH[A^]HLH^ HH H߅o WH) H=* HDH IHk LR WH H= M L'DUHAWAVAUATSH(IWHG LH HH]LsMHM}Lm)ff.M7ILI]MvIMIEI9rIuI)LHHKH9H)III9wIHH9LCMHutI9J< IE1JLHEMtI\LLeLLI HEL(IHEIEMtLZ H%H}LH([A\A]A^A_]L]S\H=L  WH H=! xUHAWAVATSt Ht ~t1gcII t]L5 LIFHt01ɺf.L9tL9DH@HuL[A\A^A_]H=w t WHO H=P kfDUHAWAVAUATSH(IWHG~4I AH= H}MwMHLmM7ILI]MvMA~uM}IEI9rIuI)LHHKH9H)III9LFHH9LCMHutI9J< IE1JLHEMtI\LLeLL HEL(IHEIEMtL HH}0LH([A\A]A^A_]LV[YH=  S WH H= UHHHHHHH @HH)tHHuHHH@HD]fUHAVSHLIL9v1L7HrrIrHH9viff.DADEAG40D0EAA?G40DpAA?GDX?EDXHHH9rHI)I"HH)H5hNcIAHL7A<8@80A40@pEHHf@==H7f A<8@8?A<8@xA4@pEtkH@=UH7OfɉA<8@8 A40@p?A40@p?A HH HHH)[A^]H5:+CH uM1 UHHt[1 HH9tLDA8tEHEP AEEECD@DP AEDCAED)t]1]UH] fDUHAWAVATSHHHHHHHEtLw AAAHL)H9sHH)LLME1PSm HDHt?ItLLK<7HH IA$u C6A$Mt$C7[A\A^A_]UHwd$? wG@E?ʀ wGWGW?ɀO@@7]UHAWAVSHAdH%(HEEw@}ܺw  E@?@π@}ݺ„u) E܉$? E@?@π@}޺D= E܉ $? E݉$? E@?@π@}ߺtH $DHEHHHMeHQHAHH)H H9vHuHHI I^1HY H)H9HIHIQI_I?HuLAW1dH %(H;Mu H[A^A_] ff.UHAWAVAUATSHHHAAHH 1HHHLL_A9E1@1AMM)L)HB@-@AAE1IL%TLEAAtA5Au"D@E1DH )LE L%?8AE1L)IBM@LewZ@οsL@@@ M9E1@u'@H xH=zHDAE1H}ME1L)LCLADIA@H}uI9tA90t IBL9LG1M)LBM)LBAIDLMLUL]LEy1AHDHDIHHߺ 4Mt,LkHSHH)H L9vHHuL Lk}t4DuLs L)H;LSLsDuH;HuLS}u[LmLkHSHH)H L9vHHuL Lk)DuLs L)H;LSLsDuH;HuLSHL0/3HEH9EtYLmLkHSHH)H L9vHHuL Lk'ELs L)H;LSLsEH;HuLS1ALDHL HH[A\A]A^A_]2@UHAVSHPdH%(HEHtxW)E)E)E)EHELEL Q?Ifffff.DGYfEXIIHLwIxAx0IDH}H)HEH}=VHAHQHH)H HvB)(nilHA*HY H)H9HIQI^I>H5w{AVdH%(H;Eu HP[A^] fDUHg ]DUHW ]DUH ]DUH]DUH']DUH]DUHg]DUH]DUH]DUH]DUH]DUH)]DUH,]DUHH dH%(HEH}u@uE @@uH}HW1dH %(H;MuH ]t @UHH dH%(HEH}u@uE @@uH}Hb1dH %(H;MuH ] @UHH0mdH%(HEH}u@uE @@u<$H}HLH1dH %(H;MuH0] fff.UHAWAVAUATSPH@uHHt)II̅x0A׉L4H1X HLEI)tzAE1AH ILLt H $DHHHLE`FbMt[LqHQHH)H L9vHHLHm Ls)Ly L)H9LIQM|$I<$HLAT$H[A\A]A^A_]ffff.UHAWAVATSLwLEwHE_t'AH $DHHLHE[A\A^A_]ðHtHYHQHH)H H9vHLHI I^Ly L)H9LIQM|$I<$LHAT$f.UHAWAVATSHHIt%H DLLHAD[A\A^A_]ðHtIXIPLH)H H9vHLHM I^Mx L)I8LMAPM|$I<$LHAT$fUHAWAVAUATSP@u GaIHt0H˅x3A׉L,I1 LLHHEL)HAH1AI LHLDI"H[A\A]A^A_]UHIȉHGAAHEGHLEOHL]ffff.UH]DUHAVSHNH t"HHNHVH)Hv3trueHFzHHNHVH)Hv:BefalsHFTH^ H)H>IHAVI^I>H5Wz'H^ H)H>IHAVI^I>H5AV[A^]UHAWAVAUATSH(AHIIdH%(HEH<H}Hs HL IHW)E)E)E)E)p)`)P)@)0) ))))))H]ME1111@A< AIHI9LA|L]ȃvցw.AAAE @?@πA| Af.AAAAAEu;AA AE AAA?AɀEL @?@πA| AKDAwOAAAE AA A?AɀEL AAA?AɀEL @?@πA| AwCAAAE 0 @πA| A@EE1@@@@΀A4 @?@πA| A11EPH]HuOAMteMpIPLH)H L9HHLL, Ls1HtXE1H $DLHLE?AH}r H} dH%(H;EuBDH([A\A]A^A_]AM` L)I8LMAPMeI}HLAU fff.UHAVS@u,HAHQHH)H Hv8B)(nilHAQtHAH $DHH11>,HY H)H9HIQI^I>H5qAV[A^]ffffff.UH']UHAWAVATSH`dH%(HEHuUfWf)Ef)Ef)Ef)E@L~uIcL@W*H}HzYH]H]@KHEL7HfDDσB<@{HD@AwH]H]@IHIADLLcHE@L!3AuUH1@H]1@H1@1UHH@HtAIAt@H]@ω]1]UHH@HtAIA t@H]@ω]1]UHH@HtAIA t@H]@ω]1]UHH@HtAIA tH]Qω]1]UHH@HtAIA tH]ω]1]UHH@HtAIA tH]48]1]UHH@HtAIA tH]Bω]1]UHH@HtAIA t1H]HHHMHHLщ]1]UHH@HtAIA tH]THHBω]1]UHIȉHHt"H tHHwHω]1]UHIȉHHt"H tHHwHω])1]UHH HdH %(HM@HtAAIA t/HuU@uE @@ufnH}H/21dH %(H;MuH ]衪 UHH HdH %(HM@HtBAIA t0HuU@uE @@ufHnH}H=1dH %(H;MuH ] UHH0HdH %(HM@HtDAIA t2/HuU@uE @@u<$H}H>#1dH %(H;MuH0]螩 H@Ht%AIAtUHH]1H@Ht%AIAtUHH]1UHAWAVAUATSPHHHAкHH7HWt#H DH߉H[A\A]A^A_]!AHtTHSHCHH)H H9vHI L{)L{ L)H;ILIHSL{H;LLSDH[A\A]A^A_]E1UHAWAVAUATSPH@HHuHAH[A\A]A^A_]IHt-x0A׉L,I1 LLHHEL)HA1AI LHLDIH[A\A]A^A_]1UHIȉHHt6Ht&DHODAHEOHEwHω]1]UHIȉHHt"HtHHwHω]y1]UHAWAVAUATSHhMHdH%(HEHUMW)E)E)E)EH=XHcHH H}L+:LxE1IIIHHpLILIL}ffff.I_dLL1 IIAkdA)DH肷ILL MuIGAO0IMDHpMo1Ҁ0LDHA-LmE1H7fDHڃHBT5HIHwHuZLxLuMfdH1 IIAkd)ÉL跶LILL LuMnA~0MDLuM)LuLmLx}HuUH}LdH %(H;MHh[A\A]A^A_]LuHEH Hffffff.HAVI^I>LLAVuII 1ILBHLHu&HL IA׾H8/A׾H"HL   HUHAWAVAUATSPHHwL HMI)I‰EM)vLc I9tuLo LkLk IIeMM)H;LLSLcIrFDmf.ILD L{H;LSLcIwDmLDL LsH[A\A]A^A_]UHAWAVAUATSHdH%(HEHcHH9 DODMAXAI9IJDLEI IHI1ILCTLLLLDMEE1DWDUAEAI9wVIJDLEI IH1ILCTt*LLLDMDUDIDQD w DqG A1AHAHHGHqdH %(H;Mu/H[A\A]A^A_]AAC؉EAA3讜 fffff.UHAWAVAUATSHMIIHHPdH%(HEW)`HDžpLL1 H`HHHHDžLHI(HxLLDžMIL5NLX %IFILL5NfL9cMI)H߾%L葟 HIII)toLxMeIULH)H L9vHHL Me4fMu L)I}LAUMuL5NI}HLAULXIL9AxeDž HDžHxH IL%uRLxIFIVLH)H HI^ H)I>HAVI^I>H5T7AVyW)HDžfDž LLLH2oHHHxL /HxLLLkHCHXI9L{E1M*IuHxI L;XMMuMM)A}uMtLHHEH)L9vHLL֗ LHH)HHHHLLh;HPfHHHH)`Hpt I9tVLxMgIWLH)H L9vHHL Mg"Mw L)I?LAWMwI?HLAWHHHH)HpHPHC(`dH%(H;EuHHĈ[A\A]A^A_] UHAWAVAUATSHxLpLhIHIIdH%(HELL1? LLHDžLIVMHHxHhHHpHDžAMIIL5ILLh%IFIL5IM9MLMM)L%LΚ LHIIHpI)tvLxMeIULH)H L9v HHpL Me7Mu L)I}LAUMuL53II}HpLAULhIL9AxaIDž HDžHxH8 I%IuRLxIFIVLH)H HI^ H)I>HAVI^I>H57AVhW)HDžfDž LLHfjHtIHxL 'E1xHHxHhHHpHLkHpH[I9HpLxE1M+f.IuHx tI I9MMuMM)A}uMtLHHEH)L9vHLL LHH)HHHHLLlHp8AaI9tYHLxMgIWLH)H L9vHHLb Mg"Mw L)I?LAWMwI?HLAWAHHHH)dH%(H;EuDHx[A\A]A^A_]P UHSPHIHLOHHHAtM@IIH5 H4uHH@H<t I HH[]ffffff.UHAWAVSPMIHHHD?LwH5 tHH[A^A_]DALDHLH UHSPMIHHHWHGH5& qt HH[]ufHCHCfDUHSH(MIHHdH%(HEWEH}H5 H}t]t/裔  蚔 dH %(H;Mu H([]HEHth 苑 ff.UHAVSH HI1HHCdH4%(HuH}HEHEH5H}XtHEHt!HH9HCA dH %(H;Mu H [A^] ffff.UHAWAVAUATSHdH%(HELc6IHGL9H_~ DFDxALAL9IIJ|HxI 1BTLLDxEcE1VUBH9Wr\HGHIH|HMII 1BTt2LLDxUL HH(D n D401H(H Dž4HILFHH\HhHDžpLWp`P@0 L5kD M~@LH aD HAHIHxH@HxHxLhJ<-xHLx JDŽ-BDŽ-ILxLLV L=C ILWDžH5qHxI)PHhHELEEHEH5ALHuIH@LHAt  H5CLfI4H8`8H9HEHHE@L$8t HH/ EHt H} H x Hx x"H5ϴAHxHljܒ ?Hw H@EHxHuH5Hwuu0ELmYHHH9s HHHHHL0L)HHsELmHu<1FHHHIIILDIL݉ IHEILeH]LLHl ADEAALeLEeLEuMt9H(L(MuIULH)H L9HLLR MuEt H}o HL5A IINHxH@HxLt H+ H? IHxL< HD dH %(H;MuhH[A\A]A^A_]I] H)I}HAUI]I}LLAUE;?AAC؉xAA}越 H}譺ffff.UHAWAVAUATSHdH%(HELc6IHGL9H_~ DFDEAEAL9IJDHMI IH1BTt~LLDEEE1FEȃ?H9GrULGHITHMI IH1ATt+LLDE̋ED ~ V I L AH 1+H ILvH HHH׉SdH %(H;Mu/H[A\A]A^A_]AAC؉EAA+1 UHAWAVATSHHHHHHHEtLw AAAHL)H9sHH)LLME1PS݌ HDHt?ItLLK<7HH IA$u C6A$Mt$C7[A\A^A_]UH] UH]\UH]v\UHAWAVSPL=AH56IDIWHGy H57IDLc H57IDLM H5UjIDL7 H52IDL! LH[A^A_]UHAWAVAUATSP1NEI9LCHELHL)HCEt6MILsHSHH)H L9vZHLۅ LslIHHƺ MLsHSHH)H L9vLHLL蒅 LsXL{ L)H;ILSL{H;LLSLHHƺ Lc L)H;LSLcH;LLSH[A\A]A^A_]UHAWAVAUATSHIIm۽HdH %(HMHx1AD$+u)س uDžH1-DžH۽$ qrt۽(A $$< H5H9[uHDƀHDHJfHH)HAL$EL$ALAۭ( 4LAD$ADIDž\W))<$H虘 D<$@蕘 E}<$HKDW))))))pHDžL(A4$1: H8Hc HHL48L@H(HHHPA@,IÃ?HxHH?pMAMEI}(()L$)$HpH\Lc\M$M9E1ELMIuI}EuI)IT$ Gt%(()L$)$HpH\L$L(ABHIu HHx.HDD4A ʀ EʺEEы\HpHHHBH@@+H@2ۭ<$H(H!W LL48ML@L(LHHPEbA@1LL t;AALfDMx&A MMALL uE1E1LH0IEAD4HHAIDffff.HHrH0D1uAL$r`HHrH0HHrH0HHrH0HHrH09uHHHH.H0E1@@MDA)A@A) E)E)E)p)`)P)@)0))H*ƅ*.H+A@JDHHff.HHHAA)A0DFHH HsDIH1A@LEHDMtxIILIHHH?HMI)G4RHIH)BqMIi J ҍ( 0FHI IwHfHHHAA)A0DFHH HsHHH)H@HuHHGuHH)HHAHEHEE$H81IEID4L9Ls[E1HIH1@LEHEI LCHCIMMM H<HM1H4H@HEIL(DM!HI!1@‰H PH<XW)EEE0U:UExEuEx ABuH]LL tH]E.LL t@1Lffffff.ILmLeHUI̻ MMM?IMMILIHHI)M)IMHLLILMIHHHEA)A0HVXLRLVXDJL9LIMwLVXLN`AIRHVXABHFX@.LeM!LmM!M9LHLHUЉHH)!L^`LH)HN`95bM HAH~XH9 0ID MI)vOLV`LL)HN`95HAH~XH9sHH9.t90H~XڰL)HHt!1@LN`MQLV`A0HH9uH)Iff.HN`HQHV`0HN`HQHV`0HN`HQHV`0HN`HQHV`0HuQ0H~XHH9'.t9tSM!IDMI)vNLN`LL)HN`95HAH~XH9s0H~XHH9.t9tL)HHt#1fDLN`MQLV`A0HH9uH)Iff.HN`HQHV`0HN`HQHV`0HN`HQHV`0HN`HQHV`0Hu1)ItL)HA<:0Huy@.uy@ItL)HA<90Huy@.uy@L1MtDMOM9vE1LH0LNXMYL^XAAHF`LHLN`.M!HuM!M!CH)H@uH%MOLHM!0LN`MYL^`AHGHtVf.MKHH1HFXfHN`[A\A]A^A_]HUE1LL t?IA ffffff.LION4BAII9LLsLE E1E1LLMI@LDA0HFXLPLVXDHHF`LHLN`.M!M!H)IH@uIB LHON4BILLH@HDM!M!0HF`LPLV`LWH fLHONBHLMIM!L!HON4BILLH@LDH^`HDA0HCHF`DM!M!0HF`LXL^`Iu LHHKHBHHII@LDA|HN`:A2L!L!H HN`uQ.uQҁHAHVXH9s)0HVXHH9r.t9tHH1HFXfAItHL)HA<:0Huy@.uy@|sHtLL)HA<;0Huy@.uy@-DUHAWAVAUATSHxHxHdH%(HEЋGOE1DOADIW)E)EE%wH}ADuEAAHuHEHpHEHDLEmH}L%s m}IIűAt Hp] AE*.*LHw H N1AEAE] HHEHE HEE1H1za m<$HUHDE1o LuE1fff.MtHUH9wKH)HH}H11l EH}HEHDHEum<$LDE1 o yHtHEHEAHLxt,I_IWLH)H H9vCHHuH] I_Et H}\ dH%(H;Eu=DHx[A\A]A^A_]Mw L)I?LAWMwI?HuHAWEu^ fUHHB`HHHJ`@0HB`HHHJ`x+-HB`HHHJ`drdHiQH%@0@0HH#HiȚH ɍ )@0HJ`LALB`@1)@0HB`HHHJ`@8]@i @0@8(0HB`HpHr`]fffff.UHAWAVAUATSHLHuAHcAHHUxAHE1H)LCE1Dq1AMLDALEAIDHߺ "EtAվH HL0LmMtOLkHSHH)H L9vHHuL{[ Lk!L{ L)H;LSL{H;HuLS1ALDHL H[A\A]A^A_]|fff.UHHPdH%(HEHMIAHHigfffHH?H"HHMHMH HMH}Hu؉UH`HuHdH%(H;EuHP][ f.UHAWAVAUATSHXHAHuIdH%(HEHIHt HHCHCHH;HLchAMx%I)v @uIM1ҨLHELEE1E11H{ -tH{оH{L0H{uHC@u1H{.ܻHCHEH]HEHEAF_AN~IHcHEHEHV HELeHEHEDuHHuRH{Hu0pH{L _dH%(H;EuHX[A\A]A^A_]LZ fff.UHHHHHwFHPHc HHH]2HH]HH]HH]^HH]u]UHAWAVSHHIdH%(HELL1W[ LLdH%(H;EuH[A^A_]xY UHAWAVSHHIdH%(HELL1Z LLdH%(H;EuH[A^A_]Y UHAWAVSHHIdH%(HELL1wZ LLdH%(H;EuH[A^A_]X UHAWAVSHHIdH%(HELL1Z LLdH%(H;EuH[A^A_](X UHAWAVSH HIdH%(HEL L1Y LLdH%(H;EuH [A^A_]W UHAWAVSHIdH%(HEO LOLWHEHuHUAILcڍII{SHcHigfffHH?H HcHp)MIE1@ME (LӉHMF4@LEIEHL tD HI H uMuHtvISZ/Df.II1BDH H HH IH BTICHiʚ;H)IHuHG|HDIRBLHu HILhtQHhfffff.HH#<6LAVMnIH]I>LLAVHEH;EHHHMHMHiQHiMbfnHƉLiCLik_kLiȉ;UAIH#fnH%fnH&fbfn¸YHH-HiƚH )fbflfofo"ffpfpffpfbfrfpfoEffpffpfbffSfgfgfӈf~E@0@uHiZ| H'i ) 0EI2AiA)A0DEI6AišA)A0DUI9CDA(A0DMHE A MRHHxuHHAu1Hx.HHpHx0HHxHu dH%(H;EuHX[A\A]A^A_]Le1E1Q UHAWAVATSH0HdH %(HM؋ DOAHAAIc؃)LP @(MILCLxE1@MEMIMD4@@MEMELL t(J HfDDM I HMuMuEt A?s 1LH11kMI1LfDTD\OJ YLH OJ QLHH IuAtDLO J ILH IxB|IEMH}HuHUH8HuPdH%(H;Eu H0[A\A^A_]iP fUHAWAVAUATSPHHxIHGH8LfL~LM@u MeMtIuLE1`E1E1UDLHLE1ATHHMEKLH1H u E1|1HHID?H1у?H)%1HH1@HEHEDLI)IIAEI4qLBICHHuHH: HH@聤HH@L4(IXu7HHL7 H5 H7 HHǾ Q8H7 Adž0HH@HHHL~9 HH@HHL\9 0uu0AMoYHH(H9s H(HHHHL L)HHsAMoHu;1EHHHIIILDIL0 IIGIM7I_LLH"4 L8ADH. HHKHH@HHHt H A0 HUA HHHRA LZA dH%(H;EuLH8[A\A]A^A_]!2 LbfUHAWAVAUATSH(IIHdH%(HEHH@DlW)EHEH}DHH@LtHDEtHEI)wxI)mD$<t~AA HLxB;uEILeLLl4 H5u Lm4 HHǾ Q8ALd4 AAH}L= AJAulM tgHLxB;uEILeLL3 H5 L3 HHǾ Q8AL3 AAH}bHLxB;uEILeLL3 H5 L3 HHǾ Q8AL{3 AAH}1LC EHuHEuHEUHEtH}H- HdH %(H;MuH([A\A]A^A_]/ UHAVSH HdH%(HEWHGHVHxH6f-11H+HHHLuL3HLXEt H}- dH%(H;Eu HH [A^])/ fUHAWAVAUATSHHIIHdH%(HEHH@DlW)EHEAHLLLuLDH}L賜Et H}t, HH@LtHDEtHMI)wI)DᰀAA HLxB;uEILeLLF1 H5O LG1 HHǾ Q8AL>1 AAH}L: M_AfE+AHSAJAulM tgHLxB;uEILeLL0 H5 L0 HHǾ Q8AL0 AAH}bHLxB;uEILeLL20 H5; L30 HHǾ Q8AL*0 AAH}1L~@ EHuHEuHEUH^EtH}Hl* HdH %(H;MHH[A\A]A^A_]fE-1LHLtHEHE-t +HLxB;uEILeLLM)~I^H[A^A_]@HUHAWAVAUATSPHIIfffff.E,$HtpAui. ID(ILH: HtIGIH)A<$u@A$t tAGI?> tAG H[A\A]A^A_]UH9x]]ff.UHHHO wH5HcHfhJLf@llfq3f@hhHH(flfzfL fjftHH]ÐUHAWAVAUATSPLMMAIIHHt L"* H1HKWCHC LLHtAHLHM4H[A\A]A^A_]fff.UHAWAVAUATSHXIHUudH%(HEW)E)EHEE?H_HGHEH9LmL}ffffff.H H;];tC+ЉEL9HMDLLL* C#ЉEL9wzHMDtoLLL KCHwHCD1HcɉML9w5MMHULl LtMMLLL .E11HEL9AD uH}Htffff.H% HHuH}HEHt% dH%(H;EuDHX[A\A]A^A_]' UHAWAVAUATSH8dH%(HEHUHMEHWIHIL},LH5[7 ILL9MI)H߾%LW+ HIHH)LH MeM9A $HxQMHuEHEE EMMLLObf.%W)EHEfE LLLmLHMHtHHL)LLLp1H}HL dH %(H;MuH8[A\A]A^A_].& UHAVSH dH%(HEH9HGHEE<HIH}HuL}$pHEL9cHPHUM߀@0GH5u1<>AAAu1@@{ L9HzH} MH0~ŀ:rL0H}HuL2K CM߀.K HEL9HPHUMߍq@ wH}HuLCM*L9sHHHM@E<ZH}HuLЉC}$;HEL9.HHHMMt*CK L9HBHEE<H}HuL5ЉC}$HEL9HHHMMCH 4@*u1H9LHpHup@uAxA>)ljz BL@H=DH@IMIuH!"L9rHL H t 1IH1AHIL0IGLH[A\A]A^A_]UHAWAVAUATSHHPIHGHH LsLsL+M+uIuMeL9t A|$IE I9s-A$Mt$ID$fAD$ AD$I MI)MIIWHH;HH)HHH9HFHH9HCHLmHut!H9HHM I Mt$kE1IHLCD=Ot=KD=fCD= CD=Ot= LL}LL HELhILpHX MtL MuH[A\A]A^A_]IL虘UHAWAVAUATSH8HIIdH%(HEHHHj I_I_M/I+]IINHMHEM}IE I9s$AI_HEHMIGIOI LLuI}H7I)MIIVHH;HHuH)III9LFHH9LCMLmtI9LH IE1IIMCD5K\5HMHHIKD5KL5K\5 LLuLL] HELhIHXL` MtLm I]dH%(H;EuH8[A\A]A^A_]t jUH]&* fDUHH] H=L ]f.UHHM H=< ]f.UHH= H=, ]f.UHAVS + u$HcH+ \ + u 9~2QH=n+ A&DH=V+ A&D9u H=A+ 5C+ ?1[A^]ffffff.UHH]5UHH]5UH72Ht@]fff.UHAVSH dH%(HE@H@(d* um S* u\D3AD35@* 57* sMEuH W)EHO'HEHEdH%(H;Eu*H [A^]H=) SH=) EuE DUHAWAVAUATSHHMIHuHHɱ,T?HHH)  z) HiHEH HH H=r}H H5-H 0dE1Af1EL- f.K.II8 t"K<.Hu LMtLu{ HH+uI{_:{_:HHHIHUuDHo HHtFfDHHH1L9H@Hu%DHHHItHuHL H uHEHL=T:ALELIS Hx0 IL1HXHx)W@@(Lc& LH  JIVN45' 5' s&H[A\A]A^A_]H=' I+LEH=' HNHff.UH@=' ]UHAWAVSP{' Htc$t_II@1HLxLp F' u> 5' u-D3AD35"' 5' sEt*H[A^A_]H=& dH=& 萄EuHH[A^A_]{ ff.UH]f.HȃMHHHHHUHHLI91ff.HF LMH M;upDVE;PufH@MIHtPMtKLPM;QuJLPM;Qu@oAo ftf=u(I9pubHFHu IH9HDI@IL9t@I@Ht7LHMt$Mff.ILMJMIMMuI@@LLH9t I9,I9 1HHHH9tkH9toL@ MLJ M;u_DPD;RuUM@MIMM~MPM;Qu1MPM;Qu'AoAo ftfDAu Q1H9HEHFHt"HƒHtH HHA(A]M9uLBMtL@HPff.UHHG HHN H;u>W;Vu6H@HIHH@@uH9]HPH;Qu HPH;Qt1]oo ftf=UHAWAVAUATSPHFIL=9$ +HC H@HC H@CHC Hx.uLHH;tE1DALHH;t# =u2D-# # =u(E9tH=# 5# ]8E1LLE9~H{ u{tHC H[A\A]A^A_]H5'H L``BL f61?fffff.UHSPHHHF-HH[]UH]f.UH]f.UHHuHHHu]Ë " Ƀ~HuuHHHuH5B11]nUHHuHHHHu]H5/11]3ffff.UHHHLH5IEHE1]fDUHAWAVAUATSHXEHMHIIdH%(HEHHtE1DDE1HHH!I MIMIuHHSAWE(HLmH]HMHMHEHEHEH HHt1HHH H HHEEDHADEuJHuLAHtH}u HSAdH%(H;EuDDHX[A\A]A^A_]AHuLA6HuL4D(& ffff.UHAWAVATSHPHIIdH%(HEHVAt#dH%(H;EDHP[A\A^A_]IHL%ؿLDWEH'HLeLuH]HEHEHEHp HHu1HHH H HHEELeLL)HuL'LLwAH}+LAVHtE1g'S} H5#H L\L /m>1;UH]gUHAWAVAUATSPAIHHtH1I94MHIF Hx tKxuEH5"H *LL bt1&;fDH= 5 >3E1 fAL+B D!(TILhADDHHH#H LIA@1AHɹL!HDLH LHLLDEHhAA H }I9LEI fffff.HL HH ufDHHL!H HHHLH ffff.1LLDH1A MD!HJH$AHDH5I96HEH LH~IF H@ 1AHɼL!HALH u`IIE(L+LHHLH uI~HWHLIv HAE1IF Hx t x3' =uQD-%  =uKE9}ADШ D$ #H= H= E9LIF Hx t xAuH[A\A]A^A_]1H GI94EHH[A\A]A^A_]tH5H LL C?17H5qH LL $U17H5DH qLL t1m7H5H iLH5H m2LM1#7H5H LL 2t#16fDUHHuHHH]èu1]]fDUHSPH uHHHu11H[]f.UHAWAVAUATSHAIHɱ,T?HHdH%(HEHHiHH)ËZ ) E H HHt*H{_:{_:ff.HKL1H9t H[Hu15 5 Ht {($DLL@L1 L(BAH1 fDž @ƅEtSEAE1IcH<(HAI)JLH1 xI9v AIM9uDIHC)HL=! OD>L*ALELH5tH 671L1ASAR4HHt C>uHu%hDHH tHCHtH{ Ћ um x u\D3AD35e 5\ sgEuH dH%(H;Eu^H[A\A]A^A_]H=" sH= xsH= tHH= tEuP UHHu,HHHHtt Ht٨u1ɉ]] fUHSPHt" uHHHHu1۾H[]DUHHHH ʃH9sHHHu]1]ffff.UHHfuH=HHHHu]1]UHI D!Ȩ(u]IDШ t*Dȃ$ uH5 H -1@2H5H i1!2UHAWAVAUATSHIHdH%(HELf(MIG(DH=I 5K )E1 fAI $uHHHI$tS =u%D5  =uE9uAH= H= E9~IG Hx Lx HHt}HIw HIW HqMw IF H@L9@u A~wM~ IFfAF$AFIH HHt+ADI*IW HBI O I $1II I IM9f0L}tMEE HuHUH t$H5H MA140 EAFfYvH,IMf0MLHAV;S~${HI9uIH>Ht&;P|!{H I9u IIL3HC(IF(CAF{LHK H1IV LH;2sLA;v|HIHRHHHqLH;rQHqLH;r@oo ftfL!M6IV(AFLHDHqAVH;Q8HH HqHtHVHtHDIHHWHRLHuHIL0HxHH H1IV H;2uOpA;vuFHIHRHHHqH;ru"HqH;ruoo ftfɁtZH5H L}tL ?1-IL3M>LLtM~HH9uH9tyxtsHH H1IV H;2ucpA;vuZHIHRHtDHt?HqH;ru>HqH;ru4oo ftfɁuLH9u}LsdH9uLpIIN H1HP H;2uUAv;puLHIHRHt:Ht5HqH;ru0HqH;ru&oo ftfɁuIFHH9tHAFHdH%(H;EuHH[A\A]A^A_]Q H5[H L^MBL M 1,H5.H [LlL t1W,H tUHSPH*HH)HL ^!ALEH5H al I1,fDUH1ҹ]U ffffff.UHAWAVAUATSPHF HxHx(IHH@L'Hv L-1L9.DE1&AL#Iv 1L9.DADt'A@uWLHDLH uFff.1LHDH HLH T =u=D%R @ =u7E9UJH=3 55 "E18H=  T H= F E9~IAFLH[A\A]A^A_]wLHIv L,HHHH HHuH[A\A]A^A_]H5H LL "@ 1)H5H ½LlL yr 1)H5hH LkL [At 1)H5;H hLNL !@ 1d)@HtUHSPH:HH)HL nALEH5H  I1)fDUHAVSHHɱ,T?HHHHiII)Ƌ u_ uNH JHtH{_:{_:HPH1H9t H@Hu15 5 s[A^]H= hH= H-iHf.UHAVSH@(U u8 D u'D3AD351 5( sEt$[A^]H= ygH= hEuH[A^] fDUHAWAVAUATSPIHE1L= AH uHHHHtd =u>D-  =u4E9u AfH= 5 nE1LLE9HHtCH@HH6L9tH9uL9u)I6H21L9HEL9HDIAF1H H H[A\A]A^A_]f.UHAWAVAUATSHXIHIdH%(HEHHթL5LDL/H8 H= LLmAt L WEHLuHELeH]HEL}H HHu1HHH H HHEEHuHHE@E1t>f.HE@u$LuHuLIAAHEHx tpH@ HB H=CLEt L HL1ҹdH%(H;EuDHX[A\A]A^A_] H5 H LL ZmO 1$@UHAWAVATSHHHE1L5 f.AH HuHHHHtX =u2D%  =u(E9tH= 5 |E1LLE9~HHtH0H9tHHAA H .IH HF HxH H=AHMtH߾ [A\A^A_]F[A\A^A_]ffff.UHAWAVAUATSPL?MHE1L5 AL;MAuEALL+tX =u2D=  =u(E9tH= 5 KE1LLE9~It5M7@M&IF HxLM9MuH H=CHMuH[A\A]A^A_]H߾ H[A\A]A^A_]UHSPHHtEHHH փH9sHHHuHH[]HH18HH5 H LL I 1"UHHGH HGHWH7]UHHH] fUHHGHHGHwHH]fUH]UHHHuH9]HGH;Fu HGH;Ft1]ooftf=DUHAWAVSHHdH%(HE{e1tHH߾1,?3%|+HHU HR P @H0 sIAcHEЉUH}LDMH@HMЉELuоLHPH  H  HEE =1 =@1HDքHIE΋Q H H  HE1@@1HDE@IEƋH f HHT =u H߾y+dH%(H;Eu H[A^A_]J UH]UHAWAVATSIII t%LLLHAt&[A\A^A_]#1Ƀف  H5H LhL y 1 UHAWAVAUATSH8IHL?H5?LLAtA1IHAHEMtIF Hx t x1HE1HEE11HEE1L;AtMDu LH$DHL u$ILHHLH uA@uLH@LH tD1A|AH}MIAtIC(HHHH}HL]tA{LVIHH HʠH9u HyH}t&HEH@ H H9HE,L9]"H}L9LeHHIDL ACI{ACL;IAD$ID$ HxHt WL]t%H}t`IL$ HEH9u AD$dHEA|$u\IL$HtRHQHtLHHHVHRHHuIL$I&AD$ID$ H H9tvLLeL}HEM9tMM$$HE?HEL]LeGHL9H7HuHML]vHEHAi HEL]LeMqMVAL;MIMLAu1 I~(A1LLIDD!HHIHLEI LL#uEHG(GM)LL$ACMMDLmKHIC(MLLLHAI @A MI9$hHU1I4$~I|$H>I<$L9M9MD$ MLO LM;ET$LD;WM@MIMMMPLM;QMPLM;QAoAo ftfDLAuiOLfMt7ID$HtHMIHGH@LHuLfTM9ܸIE%IALM9uHGHtID$I|$LH:H>H2L9u,H~ HH5ƜH97tM9@@LMtHLL@H}AHtH@(@I M IM IIL;H}H HHtI 1II I IH}E1uHG LH+H0ILp0HG @8L'HGLeLIuM~0H^ H=C AHLH8 LH8[A\A]A^A_]H8[A\A]A^A_]H5H =L1L QNH5H LL &H5H L @L ɬ1H5H LU{tL tGH5kH LML K<H5CH pL?L ӺtdH5H HL?L f VH5H LϧyL i2 +H5H LرL UH]UH]UH]UH]UHAWAVSHHdH%(HEH' =W)E)E)E)E)E)E)p)`)P)@)0) ))))L`L LLL = H L1 dH%(H;Eu2H[A^A_]H=T H)Hh. fffff.UHHH= -  ]@UH]f.UH u1]Ë= ] UHAWAVSPIIH{e1tHH߾ u+I?Aָ=uH߾H[A^A_]{ H[A^A_]UH]UHSPHǃHǃLƃPH[]UHSP,   H HtHXH 5 5 Hu>_ HHHHǃHǃLƃPCCWCC HC0CCfCHC8Hǃ@ǃHǃLƃPHǃXH5@HHH[]H= RH= THVUHSPHH8Ht  uA u0H HXH 5 5 sH[]H=z QRH=l H[]wSUHSPHBHtH@H[]UHHt H@]fUHHLPt)=| $u] ]fUHH@]fUHG@]UHH@]fUHAVSIHHtYHLƃPH@HtH{@L%H@Ht ƃPǃL[A^]HUHAWAVSH(AIdH%(HEHUHt5H}u|HEHU$1LEпLD$1ۿLDE1E11 Ht A 1+dH%(H;Eu>H([A^A_]HEHU1LEпLD1 Hu ff.UHAWAVSPHI1AADt PAu0tL1HP wAru1H[A^A_]H51iH IUA1UHuH1 Hx] DE~AH5ͫiH g1UHH1 Hx] DE~AH5ziH g1-UH] fDUHAVSHIL9@H@u$HI61HHOHL9HHEH[A^]UHAWAVATSHIL9Iu.HIO I)M91MIOHLIIu IIL* I)E1MMOHH)II9INHH[A^A_]UHSPHHtHHubH H)1HHOH[]fff.UHAWAVSPHHHt>IIu IIL'' I)E1MMOHH)II9INH& .HHHH?HHHiʚ;H)ˍ HH=HHۍ4(kIH[A^A_]HHtUHSPHHubH H)1HHOH[]HH HSZ/DHHH iʚ;)fUHAWAVSH(dH%(HEHHtAu86dH%(H;EH}%_H([A^A_]- IW)EHuDR HL)H}HuHH& .HHHH?HHHiʚ;H)DHH=HHۍ(kAIHMЉ}H}H8p11HDȀu1qdH %(H;Mu H([A^A_] H5,H L|lL SE1 DUHAVSHHHtKIIuz I)HI9#1MIOH?BH4ׂCHHHH[A^]fDHHHUHAWAVSPHHu HHH0I H)E1HLOHLLH)HH9IOHS㥛 HHHH?HH1 HH[A^A_]@UHSPHHtHHurH H)1HHOH[]UH7PHL@u)у=|ƀP]UHH dH%(HEW)EHu1 HiMʚ;HMx(HH HSZ/DHHH iʚ;)DHHH HSZ/DHH Hiʚ;H)HHHH=H)1Hɍ (kDdH%(H;UuH ]+ H5_tH bL]'L 4{1^fffff.UHH dH%(HEW)EHu1 u&HiEʚ;HEdH %(H;MuH ] H5ӫtH ֛L&L z1fUHAWAVAUATSH(IdH%(HEH}uIL}LefLL3Lu]Mt NtGM9DLHEHUffffff.LL t 8tdH%(H;EuH([A\A]A^A_] UHAWAVAUATSHH@AAAtH/E1.H@AAAu+HHH|%Hiʚ;HȃH@AAAu5HTԔH9HiHiQH'HiɐH@AAAu.HUK{cH9wvHi@BHiMbH(HiɠGH@A =AA^HJ H9w-HiHiɃCH4Hi =)1I AQII?H@AE0EAƒ D A&LMA(k(k)HIHH?H1¾(kHHIHIE)HEIHH?H1HIII}HLLLD]Y IDEHH!HH?H @IIELHEE1@}HDLDLHUHILMIH)IM)uZHH H& .HHH sHHHEHHEIAAEHEIAQ1I5wsTHH(kL1MMl MMDEHiؔEHUHEt=t4HѾ(k)މ+I5wL D EIDHUHEH1H ZI At LL!HL!H[A\A]A^A_]H@EAAHx'HHH H tU1HHщ1HH)HH H t H1HH1HHHމHHDI1щ<UHHOH9tmtGH8L7LAA(kM)I9 HؔщHHHxH9}2HH9~HH0H]ffff.UHHOtwtHAt#EuL9tH11HӸ(kD)HHAǺt9At8(kD9LHD9CD)Hx(L9~7H&L!IEHIIL9}HdH %(H;MuH[A\A]A^A_]] ffff.UHAWAVAUATSHAHAIdH%(HEIH}uLM1LDHE^AtGHUuȃtRLH)1D9@A$(kAFH))Hx?L9~NH=LM9t5IEHIIŹL9}HL9uM9u!qAMD9rIƉ|D9Ht(y>HAt#EuL9tH11HӸ(kD)HHAǺtJAtIJ43DA(kI)ؔ1I9OHDHx L9}/HLHL9~HdH %(H;MuH[A\A]A^A_]d @UHHɚ;w HH& .HHHH?HHHiʚ;H)ƍHH=HH(kIH9(kHؔ(kBHxH9}%HH9~H]fUHH?Bw i֠H]H4ׂCHHHH?HHHi@BH)HiƠ(kII?IHII8(kHؔ(kBMxH9}H]H9~H]DUHH HdH %(HMHH!uHiʚ;HLM1AdH %(H;MuH ] fff.UHH HdH%(HEHH+uHi@BHiMbH(HLM1AdH %(H;MuH ]$ @UHH HdH%(HEHH5uHiHiCH4HLM1A =.dH %(H;MuH ] @UHH΃ H]@UHHt7H HHHHHHHH?HH]ff.UHHt4H HH|jYHHHHHH?H H]fffff.UHt"H*Y j{H*XY{]H?Hz]UHt"H*Y *{H*X^'z]H?H:]UHt"H*Y zH*X^y]H?H]UHt"H*zYʉH*X^]H?H]UHt"H*Y jzH*X^y]H?Hz]UHt"H*Y *zH*X^gy]H?H:]UHHtHy:J(kr-Hؔ$H1Hɚ;HHHAHI]É]fffff.UHHt Hx)kHHɚ;IHAHIGN'kvؔHH@DΉdkr H9eHiMbH&]UHH HdH %(HMtHH!u&Hiʚ;H/HHAHHILM1AzdH %(H;MuH ] UHH HdH%(HEt"HH+u0Hi@BHiMbH(H-HHAHHILM1AdH %(H;MuH ]x UHH HdH%(HEt"HH5u0HiHiCH4H-HHAHHILM1A =bdH %(H;MuH ] UHtH΃ H]HHAHHI]UHt9H HHHHHHHH?HH]HHAHHI]UHt3H HH|jYHHHHH?H H]HHAHHI]ffff.UHAWAVAUATSH8HdH%(HEIHuUM~L9 fWfHGu,dH%(H;EH5jH HyUIH5`:HA貼 AtDEuLM9tI1L IԸ(k)LHMEȉHL9 uH5otHP 7HLuE1M^ D$^ $HHHuUȹ<E1M^ D$h^ $HHHEMȃt6H*tYʉWH*X^!t4u;atTH?H  0^ L$ ^ BH*Y itWH*XYt L] L$ 0]  $Ht HKHu 'HtHu8t HK9-t )HK9-u u HH HCHCf0dH%(H;EusHH8[A\A]A^A_]ÉWH*=w ^r \ L$ \ Ht ^rr \ L$ \ ķ @UHAWAVAUATSH8IdH%(HELc} AALHEH} YE( H,ML,HL L}LMHEHE1IgfffffffHIHH?HHAA)A0DD5H HHHwōF|)HMH)L$1IL0} MLML.IMM)IEHHHAEHELmtMm AAAHL)L9s%LH)LH}LME1ARATh HRM9tMt HEH@HEHHEJ<(LL腷 MHMu BmLiHEB(MLmL. 1fLIHH?HHD)@0@t I HIIwHUHHLHȍP|"HuH)L4IL0 L4)I1fffff.|0H[tHEL)HIMHHHAEHEtM} AAAHL)H9s1LH)HUL)HHHLLME1AVP HmH+HL9t`tMeMeMLLH HEL)AEuHMD(AEE}IJ ;HIMLDHULrIMHHHAEHEtM} AAAHHL)L9s#LH)LLLME1PAV H@Mt;tI]I]J<;HL4 MAEu C?AEM}B;dH%(H;EuH8[A\A]A^A_]蔳 @UHAWAVAUATSH(HdH%(HEW)EELmE1Hgffffffffffff.HHHH?HHDD)0BD=H IHHwAW|-LeI)O4HLHH (ff.HH)Ht5H mtputQnuE1ɀ{sAp E1htɃst mtEAFA>E1ɀ{sA${suA =E1E1A<H}HuHUHHELMLMDEDEH}HM&AuADELMHEMȃHUtpHuH H H4DA(kI)EؔE1I9AEOILLH H}EAHHux(HH9DELM|(=DELMHH9DELM~HH HUHEADMtTL]LuLMDEH}L.LmEȃM„u=1M1ILA HUHuIA(k(k)MILH?L1(kHHHLH?LH?L1HHH1賥 IM1Ht+HH5wIs`(kL1胥 Hu)LH H& .HHH HuIiؔAMHUL]A(k6H5wI IGMII DAHUHuA(kL]Au AAtH}H MH DEA(kM)EؔE1I9EOAIILH H}EEHLmx/LI9|6TEA(kt[Hй(kD)AjHI9~ HH HMHEAfffff.H]HH9jHE1E1dH%(H;Eu]DHX[A\A]A^A_]HHMEHcH}HHHED]HDZAHMH EH ߪ ffffff.UH]vfDUHSPHHH[]UHHGHEGHEOHHH]%DUHSPHHH[]UHAWAVATSH@dH%(HEHHDFH9AAAE AuBHG0H(GG;HGHHG$G,mH Nl^AAHH9D uCHG0H]GGHGHHG$G,HMl1I1II諽 H4ׂCHHH?HHIHUW)E)ELeH}HuHU9LHMHuIG0I@AGAAEEG AGAGHHAGENIGH ףp= ףHHEO$HHH?HHHiII)1@ALH)H` DHEAEi{AAEi> ALH5^Jc4MIMH4H` L)LH%I$I$IHHHH?HHHH)HH,^DPC‰G(D <u;H)\(\HHQHHH\(\H9w1MHF^DB_,EG0EĈG4HEHG8dH%(H;EuHH@[A\A^A_]C UHH dH%(HEHEE =H}HHQdH %(H;MuH ] UHH dH%(HEH& .HHHH?HHHiʚ;H)ǍHH=HH(kIHMUH}d H0PH nHEEH}HEUdH %(H;MuH ]; ff.UHH HdH%(HEHH!uHiʚ;HJEHELM1A9HPHH9HDH}HIHHOdH%(H;Eu HH ]藥 UHH HdH%(HEHH+uHi@BHiMbH(HLEHELM1AHHHH9HDH}HIHHOdH %(H;MuH ] fffff.UHH HdH%(HEHH5uHiHiCH4HLEHELM1A =HHHH9HDH}HIHHOdH %(H;MuH ]> fffff.UHH]UHHHAHHIHE]fffff.UHHHAE1HAɚ;MHHIIDHE]UH1ҹ =]ffffff.UHH dH%(HEH}uH}H n1oH0PEHELM1AHHHH9HDH}HIHHOdH %(H;MuH ] UH]fDUH]&fDUHH1HAɚ;DHHBHIƒDEHEIiMbH&]fffff.UHSPH1 H H)H4ׂCHHHH?HHHi@BH)Hi(kHH?HHIH[]fffff.UHAWAVSPHtIyL1ҹIƉE11( Iǃt L6HHAMHILH[A^A_]ÐUHAWAVATSH@HdH%(HEIL9@@ @u8HC $;L 1AA;HHS,IL9@@ @u)HC L3HCLsHCH(Ely1AII' H4ׂCHHH?HHLHUW)E)EH}HULHC (EHCD{ECEĈC HEHC(dH%(H;EuHH@[A\A^A_]F fDUHAWAVATSH@IHdH%(HEHUHMW)E)ELeH}L}LWEH1҃BЉH}LLbHCS LLLMHCSH}LL7HCS$dH%(H;EuHH@[A\A^A_]| fff.UHAWAVAUATSHHIHdH%(HEIL}MoLmHL9W)E)EH}HEHUHH(IH9E} AuDAF8E|u8AF 8E|u,AF 8E|u AF 8E|uEAA:F HHUL9unW)E)EH}HEHHHEI9} AMgu:EA8F|u.EA8F |u"EA8F |uEA8F |u AF :E|E11z H%I(}!HIHH?IIL;dH%(H;EuLDHH[A\A]A^A_] UHAWAVSH8HIIdH%(HE1 H4ׂCHHH?HHLHUWEHEHEfEHEвfEHuHULt(E(ECdH %(H;Mu H8[A^A_]' UHAWAVSH8HIIdH%(HE1F H4ׂCHHH?HHLHUWEHEHEfEHEвfEHuHULyt(E(ECdH %(H;Mu H8[A^A_]g UHAWAVAUATSHxHEdL%(LUHEHdEH9|HGWGGH.HG&H9oHGWGGHHHOG HHO HOHH@GHG(dH%(H;EoHHx[A\A]A^A_]ÉUHc‰MHcDEIcDMMcLcMHpHhHHHEHEHxHUW)E)EH}H]L}HLEHE1DBH}LHIAH}LHHAH}HuHUhHpHG$L/DwH_ DHGW AD$wDg$HG(G(HMH;hHx@;U ʀ;M3M3U H;u~- ffff.UHAWAVAUATSHHHdH%(HEHuHcGOQ1@H0lEHcHcS HcKLcCLc nHEHUW)E)EH}L}LeLL H}LLIAH}LLH}LL{ IEAEdH %(H;MuHH[A\A]A^A_]2 fUHAWAVSHHHdH%(HEHHMWC CHC0H9 uI$;E1 1AA;HHщHHHII HH9t1@1E1E1o1I踬 H4ׂCHHH?HHLHUW)E)EH}HuHUJ H}HMȉHHHHII DuED҉SCDDC ȉCAHl|AHDH ףp= ףHHHHH?HHHiII)L1LH)H` DHHH` Di{H)HMJcHHAAi> HHH%I$I$IHHD[HH?HHHH)HH5MHcDHNC1@u@|;H)\(\HHQHHH\(\H9w1MH TMB ABȉCDs dH%(H;EuHHH[A^A_]^ UHAWAVATSHMHI;w9I;HHCHHVH „71ILIIMLH?IIIkHI>muHJHL9LHL9HmHEHLmD]HgEI)\(\IQI\(\IQIףp= ףE1@u7u0HIN3IAM9wLHE1L9ADLNcDLII)~@1@ @MHLD]LmEL+mIH AH AH H @H L[A\A]A^A_]HpH:HnEUHSPHHHtuHHHKtHPHpHPHH[]fDUHHHt]]Lfff.UHAVSHHHu I&LHpHHPH[A^]fffff.UHAVSHHHu ILHpHHPH[A^]fffff.UHAVSHHuHILHHxHH@ [A^]ffffff.UHAVSHHuHIfLHHxHH@([A^]ffffff.UHSPHHHu*HpHHP0HH[]UHSPHHHuHpHHP8HH[]UH]fDUH]fDUHSH(HdH%(HEHEH]HHuHGEt H} HEdH %(H;MuH([] fUHAWAVATSH0dH%(HEH=%5؏ HH ҸHE1ۀ9:HH5XH uH=ru蠏 HHc5HEW)EHEH HIHsC6EL}Mu:FLHHMIILDILч IHEILeLuLHL` C7HEH}HuEt H}觇 HEdH %(H;MuH0[A\A^A_]H}讹詉 ٟ tHƟ UHSPH= ۏ t(  HHbH H= 迏 H[]Hz Ðy tHf UHSPH=` { t( 轆 HHH3 H=4 _ H[]H ÐUHAWAVAUATSH(HIdH%(HE L-ޞ HEHuLR tH}u L+@ƞ L= Lo H=x Ht(L3HtHH(H L9h(Lp Lc Ʌ IHL+T mL%= L H= Hu%(臅 HW@@ ?Hܝ LuHQ\HMLELHH(Hu1ILIDHH(LEH L9h(L谉 Mt.IIGHtHPAt I L dH%(H;EuRH([A\A]A^A_]H=V q \ 评 IHL=% H=& Q /׆ H= + U(i W@H@ H H=  !H=؜ (! W@H@ H H= 輌 KUHAWAVATS~ Hg H' H=/ d2 L= MgMt~E1,DHH HЁIT$(HIF(M$$MtFL5ڛ IFINHH)HHH9IDIN(IN H9uL!IFIN IN(L= IMwMu8IGIWHЃHs-1l@L MMtM&AFtI~ ݂ H1DI7HI7HDI7HDI7HDHH9uHtIHHHuIGH[A\A^A_]Ն H=ޚ [(' W@H@ H H= Š 'H=v 衊 ?0߁ W@@ HB H=C ~ fUHGUTCH ]UHSPHuHFHCHVHvHH{H H[]UHAWAVAUATSH8IIdH%(HELnHvIDHIEWH}H]H[HHEtrHLeHHHUUUUUUUUH!HH)H33333333HH!HH!HHHHHH!ILI8IwLCI!2E1LIH9r"HH H t H1HI 1AHEHJHL0MEAt MOMoALSLMLU$fff.L!L9M6MIFH9uOANIVHHDL9uuR1ATA:T uHH9ufDAvH9rHH H t81HHoI~ LM- LULMMH}R?19MLe0 IHHEIFLHI$LmuHHHO HPHpbIF(IEHLx WH*HHH WH*XHH؃H HxH*AM uH*XAM t Y.HAHrHKE1HAI ^‡ H,HH?\H,H!H I9IGAHtHGHu I1膇 II]I9vLL$IEHx WH*HHH WH*XA^E  H,H\ZH,H?H!H HHHHUUUUUUUUH!HH)H33333333HHH!H!HHHHHH!HHH8wHGHȃ?ٸHHHB耆 I9LFI9I]HSHHEuH!!H9sHHH H t1H1IMHHtHIVIEIuI6MuHIHt>H@HKHuH!!H9rHH H t 1HH1HIEL0IEdH%(H;EuLH8[A\A]A^A_]O~ tHWUHAWAVAUATSPHt#HtHuZtZHWHtH t"HHH5Ƀo 1uEH;HHFixed/UTH3OHCH t21HUTWC1f uHH[A\A]A^A_]D A-tA+u :u:uIIH gtI} WtALHCukfffff.UHAVSHWHGtHCHtH H1tHCHHtHCHHLsLtHKy0ugy0uatHCHvWHZtLsA~0u+A~0u$tHCHvHH[A^]HHt|UHAWAVATSt H_LwLwHH)H9HBv(H)LH IHHIHy LLAH)uH_A[A\A^A_]UHSHHdH%(HEH}NHEHdH%(H;Eu HH[]x fUHAWAVAUATSH(IHdH%(HED6At Mt$M|$AM|$IIBH5AL>y D <u&IIIs'C6ELeMuO_H}L HEHzLHHMIILDILt IHEILmLuILLL}x C4H}Hu[HEHHEEt H}t dH%(H;EuHH([A\A]A^A_]v H}辦fffff.UH]f.UH UHAWAVAUATSHXMIȉUHuIG@tIL$HHMMl$ IL$(HML)HMLEtL}tID$P 1ID$AHEH}HE8ALet MxIHAIHHMHMHHMI)1HI0H9]thEeHEN4 LNx I9uH}LL*w HEIDHEIcEH;EuEA8EuL9euHL}1H]L}Le1HwtH}sjI|$ Hu\DIHEAEAF(AD$@t ID$HH9Et 'H9EuI@LHuL1譁 HEAF)AHX[A\A]A^A_]Ð@8tCUH@HO H@HR4;4uHHH(:J(u @):B)1]DUHAWAVAUATSHHIdH%(HEƇGpkIGxHiIpW)P)@)0) )))HcƅHE1HLL1'WHHHƅH(LL0LH8>H@0H#HPmHH+(HXH=QIGpL AHAH IG@A8tBIW H@H I4;4 u%HHP(:Q(u@):A)At E1t H p t Ho dH%(H;EDHH[A\A]A^A_]IIwI+wHHHH$HCAƇIGHP@H @IO LuLLHH}I1A@uVHH)\(\HHQHHAH\(\H9w"HQHHHףp= ףH9AH}HEM~CIH}DMMDhDCW)E)E)EEHEfEEHEfEEhxHEhHDžpfDžxƅ{HEfEEIHHLIL^DE1HH +HcIH&,D,IcLi$II ADAAB A)AI0AуtHt3H8H<DH)H8@LH91HIԉуH8HHHRL'LHLAIcLHH%I$I$IHHH?HHHH)HH:Hu4)Bi%Iȍ )ЃHLE1F)ЍHi%Iэ)ȃHH H)LH<HIL⋅HHtDt/HPH<DʃH) @HP@MLLQ1IуHPHHHRL&LHLAIcLHH%I$I$IHHH?HHHH)HHRIu4)Bi%Iȍ )ЃHLD1E)ЍHi%Iэ)ȃHJ L)LHHIMHiπQHiQH@LHH+HUHXLHH+(H`MH`H9| L`LL;MwIGM;$yI9s+A$AL$AT$ AV ANAI0L>HH I)MIMI_HUUUUUUUH9)HH)HIH H9HGHH9HCHLLHtH9HHH<@i 1K HH<L<[IIA$AL$AT$ T LHH0L)HLLLDj HHHHHXLxIMtLJi ILLLHI_IGII9s4OW AV ANAI0LLu6ff.HH1I)MIMIOHUUUUUUUH9H)HIHH9wHHHH9HCHLLt!H9hIHHH<@3h L1K HL<L$[IIOW T LHH0M)LHLh HLxIHXL`HHtg ILLuLLI_IH;t}H@AAHH)\(\HHQHHAH\(\H9HUHQHHHףp= ףH9A.A:i H)fUHAWAVSPIIIHHHHN LH?IIIkx H E1HHK8HHHsH~y DvH 1HDpHsH L8L9C8L,@ALk E1E1-IL{HCL)HHHI0I9K7H}HLKH}Hu2KD7 KT7(CD7L,@ALk K7H}HL ECD7MlMH HJ40HH}L5CE1H0HH8Y Ls L{(M9tVHLeLhLHHLsEAFLHLLYEAFI0M9uH 2AhN[ fffff.UHF ȅx@HF$ȅx4HGF(ȅx'HGFȅxHGFȅx HG Fȅy1]HG(]UHAWAVSHHIHdH%(HEHEHuLltHuH|HEHB HH l HML}L}H}LLH}L9t (Hu  LHHuHt#HH}HEHt HP1dH%(H;Eu HH[A^A_]Y fUHSHHdH%(HE}W H V HHW@@@(@@@P@`@pHǀHHEHuHWdH%(H;Eu HH[]YY fUHAWAVSPIHV IH HIWAFAFAF(AF@AFPAF`AFpIdžAL3LLuHILPHH[A^A_]fff.UHAWAVATSIIHAL~ L$@AHyLYL+ IDLDHDLHLHLHH?HHII IHkx@8z :u.x@8z *ux@8z uz @:x M;uHHtL9sH<HL8L;t89IMI9DRDZ z @}z @}z @}ILH0HLIIHtOHL$IK O;t |uD:S|uD:[|uDeD:c|uDeD:c|uDeD:c}LH)HHHLMA}@8zMA}@8z MA}@8z MA}@8z Muqz MA:}^uRAx@8zuAAx@8z u0Ax@8z uAx@8z uz A:xsM;phI9t9M9H:I9x dH%(H;E$HL L2L9p }#dH %(H;MHHu5J8H(|u*J 8H)|uJ 8H*|uJ 8H+|u H,:J |F8HN H4@H1L;t1}%HHHHCHCHCu5H 8J|u*H!8J |uH"8J |uH#8J |u J :H$|LbHcHiȉH LcAk<)Lcȿ1bIMIH]HHEDAALH+DD)HcH@HAAE)IcHkB A8@)u.B A8@*uB A8@+uA@,:B IpI9xH]HZMpIHMhuNJA8Mu>J A8Mu.J A8MuJ A8M}uAM:J mdH %(H;MHHg+AMHF H IH9|HHuNBA8@3u>B A8@#u.B A8@uB A8@uA@:B dH%(H;EHAx@8zAx@8z nAx@8z YAx@8z Dz A:x+uPHHJ8Hu:J 8H u+J 8H uJ 8H u @ :B H]HZMuIMMmIIMIEAE'D)HcH@HAD)HcHk1U H4ׂCHHH?HHLI;t HI9uI9II)tMIHIIHf.HHt!HH@HM9|}HHII0I9LEI@ I8 IH9IMIUH9IDAu@8tHRH4v<;<0uHHz(@:~(u R):V)tI9tQI}IEHHщHAAMH LI軩HMHHQAEA1H[A\A]A^A_]fUH]= fDUHr= H H]fDUHH H]UH]f.UH]V= fDUHAWAVAUATSHHHdH%(HED:LrAHLt LzLrAE1IIBH5dlAL@ 1ۅKHDW)PHDž`L9LtA</tDH=C Ht8H5yHEH5yLPLA L/M HPLHHR Pt H`HQLH5J Ht3Iƿ ; HHk HLsH} HCHC1I$Pt H`; HI$Ht HAHy HHHBH5cl>?  HH1Le7LL14J [fff.LXJ HHH H<H5I HtIW)EHELHI HuEtzda1Mta uDuALHH ЋMII պH9LBEIINI9EELL1NI &W)p)`)PDžLL)HHNNNHHHk4H9HH4HD4HPLH H4x|IHI ־H9LBMHH H9HBHL UHƅxHHt HHxHHPM HAHHHt HXHx HE1HHBH5alAI< 1ɅKHD )E )EHb@HEH9HuHE1A</HMLD@LHHH1HLffff.IL;#LMW)0HDž@HL0L< HVIHsBm0H1MuFRLHHMIILDIL7 HH@IL0L8HLL1; B+W)HDž D0Au)H@H (0)H8MH8H@MLHyEAALEMLtLH5k; LHHHM HtH H5ʣE Hu0t H 6 AGH@6 6HW)HDžMt]H`@1B: W)HDžI_ HHw2ۈE1iLI$1E1LsHHHIIILDIL5 IILHHDHLDAtH@HLHL9 HrevisionK>CD>.txtCD> HDž8H@ L{@LL56 IFINHPH@HPHDžXHPL`I<,HPH`F JDŽ%BDŽ%HHPLH`K HtHH`xK HLu%HPH@H<(HPp9 t Hb4 HHtmHPHpHPHLL}9 H5 L~9 HHǾ Q8ALLn9 AHHHIINHPH@HPH`J IHPLJ HD DH8j3 HHHH!HP(HHHH HHH@H _ HDp HX0I$t H +3 At H@3 dH%(H;EPLHH[A\A]A^A_]ÀHEL%\@LD82 HL7 HIHLsC60L1MLuJZLHHMIILDILH2 IH@IL0L8LLHL3 CD5L{H HCHHCH H0C H1HC!9C)=fC-?C/H@HC0I$H0c3 HcDUHSPHH HHHCHtSHH[]K,ff.UHSPHH HHHCHtSH,HH[]#1 UHSPHHHOHwH9HB־H@ H)CH[]ffff.UHAVSHIHIFH9HBH޺? uI)^[A^]f.UHHWHG]fUHSPHH( HG t H{0^0 H HH{HCHtSHH[]+fUHSPHH HG t H{00 Ho HH{HCHtSH*HH[]/ f.F uH HFHGHUHSPHV(Hv0HrHH[]@UHSPHH HG t H{0n/ H HH{HCHtSHH[]"*fUHSPHH@ HG t H{0/ H HH{HCHtSH)HH[]. f.F uH HFHGHUHSPHV(Hv0HqHH[]UHAWAVAUATSH8IIdH%(HELgHHOI9M9I0IL$LL9s!ID$0IQ AT$ AL$A$IFM9tLL)I)HLH1 HHCfCCfCHCfC HC"pM.HUUUUUUUM)IHLII9hL)HHH L9LGIVHH9LCHUMtaI93LHH<@4- H>WAAGAG IGfAGIGfAG I0M~1LL)HHHEH IHHHMHMK dHHHMH} LeH}IVL)H- IFL)HEI^H]I6I)L)HL_- I>IH}IFEH}AFHEH}Htb, LdH%(H;EuHH8[A\A]A^A_]i. L\UHAWAVAUATSPHHGH)HHHH9soHVUUUUUUH9soLwI)HL L IcH HHH` HL)HH%I$I$IHHHH?HHHH)HHID]UHHHGH)HHHHH)v H] sHvHHHO]UHAWAVAUATSH8IIdH%(HELgHHOI9M9I0IL$LL9s!ID$0IQ AT$ AL$A$IFM9tLL)I)LH, WHCfCCfCHC fC(C*fC.pM.HUUUUUUUM)IHLII9hL)HHH L9LGIVHH9LCHUMtaI93LHH<@D( H>WAGAG AIGfAGIG fAG(I0M~1LL)HHHEH IHHHMHMK dHHHMH}] LeH}IVL)H( IFL)HEI^H]I6I)L)HLo( I>IH}IFEH}AFHEH}Htr' LdH%(H;EuHH8[A\A]A^A_]y) LlUHSPIIDDDIIHHLM9uSH"""""""II MIH貒HLщHɉLH LIHLZHwwwwwwwwLHL)HH?HHII HLHILII?HLLkAL>AT> T: L::H0I9uHHSHKHsH}Ht ! HKWAA HAfAHAfA HC0H[A\A]A^A_]ĢUHAWAVAUATSIAH ףp= ףHHHHH?HHLiL)LHLHH?HHHiMI)LL)LV E1@ALL)ߍqHIi{HcHipHiigf~fIDEiۅEAAEEAE1AE@ AELM)EqMEIEEi{EAAEMcMipMEiřAEEiEAAEEl$fEEIEAi{AADL)L)HimHiұ:HAHHIcMcL)HHIcHHHIcHHHcHH~HyHbuHHyH~HƞH HH ףp= ףHHHH?HHHi±:H[A\A]A^A_]UHAWAVAUATSPHLHGHL)HHHH9suHUUUUUUUH;I)ILM$7I9MH)HHHL9LGHH9LCMtzI9 ILHH<@} ILYHtHHvHLWfAAGAG IGfAGIGfAG I0I9uIL{E1O<IML,vIMWL@@ H@f@H@f@ H0L9uKdHIL#HSL)I)LL L;LkLsMtLH[A\A]A^A_] H[A\A]A^A_]HP軞UHAWAVAUATSPHLwLL;w7HL{LH)v_HHHHBH?HHHHL,@IO$/M)tLLL HKMLcLHKLLH)HHVUUUUUUUHHUI9ָHEHVUUUUUUH9IIHL,@L~ KdHHHLHM)t=I 1ffffff.A?AL?AT? T: L::H0I9uHHSHKHsH}Ht + HKWAA HAfAHA fA(HC0H[A\A]A^A_]UHAWAVAUATSPHLHGHL)HHHH9suHUUUUUUUH;I)ILM$7I9MH)HHHL9LGHH9LCMtzI9 ILHH<@ ILYHtHHvHLWfAGAG AIGfAGIG fAG(I0I9uIL{E1O<IML,vIMWL@@ H@f@H@ f@(H0L9uKdHIL#HSL)I)LL= L;LkLsMtLH[A\A]A^A_]G H[A\A]A^A_]H[UHAWAVATSHAIHdH%(HEEH}H޺0 }AufAIGIGE1fff.HH@H|(HGH;G tHHHOffff.HPPt+D8t"L* IAtIu11M4HH@H< t  dH%(H;EuHH[A\A^A_]3 UHAWAVSPHI IHH M>LH[A^A_]UHAWAVATSH`IIHdH%(HEWHGfGGHkHG1. H%I(}!HIHH?IIM'LeW)E)E)EHEA~tH}Hu./ H}Hu// IHtAIcHlIcwHIcW IcOMcGMcHHSAG(CA~t;IG0uHoH8HHf@AAIDLAEAu!@IAAAu KHÄuHH)H}1 HH_ZHH[]DHUHAWAVAUATSH(LAIuЉUD/A-tA+uDA-El$Md$DDAH=h Z E1Ht6DuH]H G)L ~1L9D;}D;}E1t}1H([A\A]A^A_]1L5LE1ff.A EC 9AADkHA L HsD) |fA:esH=  H[AL5nE1D) EA /EC 9AABt+Iź L HuIEA<E1J+8:HEpH=  HJ +HHMAHE1)؃ IA wEC 9`AAHEBt(Iź H\ HuIEA<H]LE1AkA RECL9>AACt&Iĺ L HuIEAntE1\MDHCHC A>/u>dH%(H;EuZHILYIH([A\A]A^A_]dH%(H;EuLH([A\A]A^A_]MI, fff.UHAWAVAUATSHHAAI7H= HtvDuH]HE1MD)؃ yCDoA(A($NfnffAfoMfofrfrffs ffofsfofsffofs fffAfsfs ffofs ffA$AIH0LMI1dH %(H;MKHX[A\A]A^A_]ǃ)EG)EC(E)EA0L%a(E)ELuL}f.W)ELLxA$fnfEfoMfofrfrffs ffofsfofsffofs fff](EBD+I f]LHuwfoEfs foUffofsfofsffofs fffUfU(EB+I I.fffff.UHAWAVSPHAItAtAuk*uLDHHu([A^A_]^LDHکuu [A^A_]u H1H[A^A_]UHAWAVSPHAI*LDHکu u u H߉3H[A^A_]fDUH]fDUHH@fDo7fD2fD2foGfBfofBfoG fB fB fog0fb0fb0fDoG@fDB@fDB@fDoWPfDRPfDRPfo`fz`fz`fDo_pfDZpHf.fofAfofofufAfDofEfDfEofEfAofDMfAf]fEfDPfoffUfof`ffffDofEfDofEfAofAfofpfAofAfffAofAffo]fAfDof]fEfDEfDefofAf}fofufAfffo}fAfAffEfEofMfDfAfffAffEfD@fAofAfofAfAfAffAffDoffofAfEfDofDffEofEfEofDfDfAffofAfAfAffAfAfffDofufDfofAfoUfAfUfDEfDfofmfDofEfEfDMfo]ff]f}fAfAfD۝@fDofDfDfەPfopfAfDۥ`fDfefAfDfoffAofAfAfDofAfEfDfDfAfDffouffDfAfAfDoEfDfAffDfEfvfDfEfffAofAfAfAfAfffAf fDJfj fz0fofffB@fbPfJ`fRp1fo%,fo-'fo5+fo=)ffffff.fo foffp9fffpȓffofsfsfffff HHufo fDojfob fDoB0foZ@fojPfDoJ`fDoZpfofrfofrffAofrfAofrffofrfDofArfDfAofrfEofArfDfof]frfrffUfofmfofrfrffAofrfEofArfDfAofrfAofrfffofrfDofArfDfDfAfofrfofrffAfDfAfAofArfrfAfEfDffAffAofrfEofArfDfEfEfAfAfEfAofrfArfDfEfDfAofEfArfrfAfDoMfDfAfEofAfArfArfEfoufo]ffDfofDfrfrfffDfD2fJfB fb0fDB@fDRPfz`fDZpHHfD4fD2fLfofJfD fB fd0fb0fDD@fDB@fDTPfDRPf|`fz`fD\pfDZpHH9fofAfofofofAfofUfDofEfDfAofAfofDofAf]fEfDPfoffUfDof`ffffofAfDofEfAoff]fofpfAofAfAffofAffAofDofDfEofDMfEfDmfAofDefDofEfofufAofEofAfffDomfEfDfDfEfAofoMffAoffffffDofofEfDMfAofAfofAfAfDmfDfffoffAfAfoffAfDofDfEofDfEfDfDfEofDfDfDfAfEfEfDffofufAfAoffDfDef}fEfEofDMfAofAfDfUfAfMfDmfEfofAfD]fDofEfEfDۅPfopfAfDۥ`fomffefDffofDfofAfAfDofAfEffofDfEfAfEfomfDfofAfDffDfAfDfofoufAfEfvffffAffAfAffffEfAf fRfDJ fr0ffB@fbPfj`fDBp1fo$fo fo$fo"ffo$foffp9fffpffofsfsfffff,HH=uH(7W)(D7WB)B(D7 WB )B (D70WB0)B0(D7@WB@)B@(D7PWBP)BP(D7`WB`)B`(D7pWBp)BpH@]ffffff.UHAWAVSH(HIIHH(W)(DWB)B(D WB )B (D0WB0)B0(D@WB@)B@(DPWBP)BP(D`WB`)B`foDpfBpfBp1ffofof o#fpГfBffp9f#fofsfsff '!fff HH=u[fLHfAo,f+f+fAodfcfcfEo\ fD[ fD[ fAoT0fS0fS0fEoD@fDC@fDC@fAo|Pf{Pf{PfEoL`fDK`fDK`fAoLpfKpfofrfofrffofrfDofArfDfAofrfAofrffofrfDofArfDfAofrfEofArfDfEfofrfDofArfDfDfEofEfAofffAfEofAfDfAfEfEfArfArfEfDofffrfArfDffAfDfEfAfAfEfAfAofrfEofArfDfofrfDofArfDfofrfDofArfDfofrfDofArfDfAofrfAofrff]fofrfofrff]fAofrfEofArfDfofrfofrffEffAofrfofAfArfDfEfAfDfDfofrfDofDfrffAfAfoffrfrffAffAfCfk fofrfoffrffAfAfc0fofrfrffEffAofAfArfrfAfouffAfoffrfrffoefDffAofAfArfrfAfffS@f[PfC`fKp1fo%+fo-fo5fo=fofoffpГfffp9ffofsfsfffff HH=uHiIA(W)A(FWC)CA(F WC )C A(F0WC0)C0A(F@WC@)C@A(FPWCP)CPA(F`WC`)C`A(FpWCp)CpH([A^A_]ffff.UHAWAVAUATSH dH%(HEHMIIIHHH1ۺ1NAHHfo5fo=ffffff.Aofofrfffoffffofrfffofrfffofrfffofrfffofrfffofrfffffofrfffofrffff fofrfffofrffff0fofrfffofrffff@HH9L@ff@f0f fffffAE)PAE)`AE)pAE)AE)AE)AE)AoEf\HHHHHALBEAL)Hȉ\HCЍCȉlCȉ|CȉCȉCȉCȉCȉJ<-PH1vHPHPLPfoPfo`fopfofofrffoffofoffffofrfffDofoffffofofrfoffAfoffffofofrfDofAfAfDofDfDfAfofrffDofAfEofDofArfDffofrffAfofrfffofrfAfAfDofArfDfAfofrffAfofrfffAofrfAfo-ZffDofofrfAfPfAffofrffAfofrff`fffofrffAfofrffpfffofrffAfofrffffH@HHP5HH(P(`(p()P)`)p)]()E()E()E()EfoPfofrfo`ffo-Hffoffffopfofrfo]fffDofoffffoMfofrfomffAfoffffoMfofrfDoEfAfAfDofDfDfAfofrffDoMfAfDofArfDffofrffAfEofofrfffofrfAfAfDofArfDfAfofrffAfofrfffAofrfAfDo fAfofrfAfPfAf]fofrffAfEofofrffAof`ff]fofrffAfofrffpff]fofrffAfofrff]ffMHtCD1Dfo@foPffHH H9uAtHfoPf1Lfffff.AofAHHuL)tII}dH%(H;EuHĘ [A\A]A^A_]UHAWAVAUATSHL@dH%(HEHDIIIHPE11HHHHHHfo5fo=pBo1fofrfffoffffofrfffofrfffofrfffofrfffofrfffofrffBPffBfofrfffofrffB`ffBfofrfffofrffBpffBfofrfffofrffBffBIL9IH@o qE1Hff.CoD5ffofrf 4fofrffofrfDofAfofrffrffffofrffofrfAfofrffrffofrfAfofrffffofrfAfofrfff ff0ff@HHHPHfo5fofofo fo@ffo=;ffof߅fffDo fAfofDo tfAfffofߍffffAfo0fAffofrfffrffofrfffrfofrfffrf%KfC$4IIH@ dH%(H;EOHĨ[A\A]A^A_]Ip%fffff.fBo5III)tf0IIBff@f0f fffffAAHLLfffffffpf`fPfffffEffEMHPtWHLLHPfDoPfo`fDopfofofofofofAofrffDov fAfoffAffAofrffAfDofDfEffofrffAfDofDfDffofrffAfDofDfDffofrfAfDo% fAfDofArfDfAfofrffAfofrfffAofrfAfAfofrfAfAfofrffAfDofArfDffAofrffDo fAfofrfAfPfffofrfAfAfofrff`fAffofrffAfofrffpfffofrffAfofrffffHHHPMfoPfDo`fopfDofofDofofDofofrfAfo=ffofffofrfAffofffDofArfEfDfAofAffofrfAffDofDfDfofrffo%3 ffDofArfDfofrfAffofrffAofrff-fDofArfEE|$IYfAfAfEfAfDofArfDfDo fEfEofArfDfAofrffAfDofArfEfEofArfEfD+fEofArfEEl$IffAfofrfffDofArfDED$ItfDffAofrfffofrfAAT$ItEfAl$It4fEE\$It"fA\$ItfA$AoD$f0AD$IAoD$fAD$IAoD$fAD$IpAoD$fAD$IPAoD$fAD$I0AoD$fAD$IAoD$f AD$IAo$f0A$fUHAWAVAUATSH8IH}dH%(HEI@H=A@Ǹ@HHƊHEI~HLMW)EHHELmI1LeDHEHLHHEE1:HU|J;0Elj0MƉ0M0EHEKDIM9tKDHEȿLLuuHUyHU藀HEIHEdH %(H;MuH8[A\A]A^A_]@UHAWAVSHLIdH%(HEHEHEHHYAt&MH5mIEH}غ uAdH%(H;EuDH[A^A_]'UHAWAVAUATSHHMIIdH%(HEI@H=@A@1@HUW)EHHEHIVLHIKvHEL)HELmLu$HEHH]H]LuHEH]$f.zHEKDIHIt0]lj0EƉ0Eʼn0EKDHEȿLLLuuf.~HEHMHdH %(H;MuHH[A\A]A^A_]fDUHAWAVAUATSH(HdH%(HEMaIHHIHH9ID$H91IIW)EEYYDȉEIw=H}ȹLLH}LLumumfkLELMLI1Mt^KD%LHuHUQHuLLLMAL11ExDx)}HdH %(H;MuH([A\A]A^A_]IfUHAWAVAUATSH8dH%(HEH1I MaI9HUHEIufIM)HuLMIHuMIILCEM9IGƹDGI)LLHMLE<HUDȉB EuB B J J B ȈB JJBȈBJJBȈBJJBȈBJJBȈBID$HuA2T AIfff.A2T AKA2T AHuKMLuDq AIM*MIWH}HHMLEwHUAADr B B J J B ȈB JJBȈBJJBȈBJJBȈBJJBȈBALHuI(KIM)HuLMIHuMIILCEM9IGƹDGI)LLHMLElvHUDȉB EuB B J J B ȈB JJBȈBJJBȈBJJBȈBJJBȈBID$HuA2T AIt(A2T AKA2T AHuHEH([A\A]A^A_]UHAWAVAUATSH(MHMHEHu*A2HLjHLrÃtHLuILME|$ AI?MH}HuWLmAELLHMMUAAE|$ AD$ AD$ AL$ AL$ AD$ AD$ AL$AL$AD$AD$AL$AL$AD$AD$AL$AL$AD$AD$AL$AL$AD$AD$A$AHuH}ILIZ{DIM)HuLH}LIMIILCC/I9ŹHGAǸDGI)H}HuLHMMUDAD$ EuAD$ AD$ AL$ AL$ AD$ AD$ AL$AL$AD$AD$AL$AL$AD$AD$AL$AL$AD$AD$AL$AL$AD$AD$A$IFAT 2It*@AT 2KAT 2HuHEH([A\A]A^A_]DUHAWAVATSAHIILLHAuu/uB[A\A^A_]u!u4[A\A^A_]:[A\A^A_]l[A\A^A_]m[A\A^A_]q[A\A^A_]qfDUHAWAVAUATSPDMMIIHIu-DMԩuHLHLLMH[A\A]A^A_]1LHLLMDMH[A\A]A^A_]KEt)L ϸLHLLMH[A\A]A^A_]-L LHLLMH[A\A]A^A_]T@UHAWAVAUATSH(HMMIHIHE1LeLIHUILEDLUJ<;HK3L=J ;H@K3D=JD;HLALULEILIIIwJ;HL;}MLLeIw.LIIv"HL)H HH)H DуIs 1Hu_Dփ 1fD<A2|@<|A2|@||A2|@||A2|@|HH9uHrHt*HHfffff.T0A2T5T3HHuHwXIw9HH)Hr-L)щʃJ4J4Ht JtJtH9tIIfBLBLIIrHHLA?fAD AL TWWT  H HLeA$H([A\A]A^A_]UHAWAVAUATSH8dH%(HEH|MMIIIHW)EH HOH9H9LEri ueIHHuLALEHIH3MI $IHCINH3MIL$IFIHIIwMu{Ir]LuLmffffff.IHLLULEII1$IFI1D$IHIMIwHCLuLmLAMjHuHLAI 1HHHAtA2T4AA4HI9tVLHHHDAT2TtATAtTAt@2tAtATHHHHuH1L)HCHHLH)HwH11HHI7J<;MLMHH9uH9:TKJ4;I1f.:>HH9uH9t-ILHfDADHHrdH%(H;EuH8[A\A]A^A_]MBfUHSHLdH%(HEAEH:H$LM-EdH%(H;EuH[]ffff.UHAWAVAUATSHMHMIILMAHu6fff.AA2$IAILjÃtHLuIHEIrFfLLHUHII3$IIFI3D$IGHIIIIw1MtfLLHUAu LIu#IIEAA2 AIt.fDAA2 AKAA2 AHuHEȉH[A\A]A^A_]UHSHLˋEdL %(LMD DMLLMARP*HEdH%(H;EuH[]FfDUHAWAVAUATSHMIHMIILUA@H@ }LMA A2 $IA AILjÃt HLuILLLA҃w+I I3 I I wILI3LILILIII1IHULUrCfLLULUHUI$I3IIID$I3FIFIGIIIIwM$fA $IA4@0A7IA LjÃt HLuIXLLLA҃w1I IH1II wILITH1ITILIII1IHULUrOLLULUHUI$IH1IIID$INH1IOIFIIIIwMIIDLMLLAAuLAA2 IEAAH}IAA2 AAKAA2 AAHuIILMtLLAAu L#H}dIEA A4@0A4AH}It=DA A4@0A4AKA A4@0փA4AHuȉH[A\A]A^A_]UHAWAVSPHDvD;ruHaDsH[A^A_]EILDHDI>IcFHIHcNH8H߅x?LL txKE1~HH|u q1ɉKLL t9CA~3H fDH|u P1C<19+fff.UHHLcGHHcNHL]7fUHSPH" t+C~(H @H|u'PC 1uCCH[]UHAWAVAUATSPI׋F;BILLLLHMcnIcGHEMeLst_DcLMEtYII6H;E1LH)fffff.JJJM@HuHE9}DD)uL-11E9|H^I1JHJIQHIuL9t;HIH HH IHDHH3HDHI9uHJ C~.H fffff.H|u#PCuCCH[A\A]A^A_]fff.UHAWAVAUATSPI׋F;BILLLLHMcnIcGHEMeLt_DcLMEtVII6H;E1LH)fffff.JJJM@HuHE9}DD)uL*1y1E9|HaI1JHJIQHIuL9t>Hf.IH HH IHDHH3HDHI9uHJ H[A\A]A^A_]@UHAWAVATSHcG H9Hr"H rx1ۿ1fAczzGuPIHIHtZILIcT$HtHH0L軭LH8IM4$E|$ $H Krx1ۿ1jAh1ۉ[A\A^A_]Ht8UHE1H)ffffff.JJJM@HuH]1UHG~"HfH|u PGt]G]ÉG]ÐHUHAWAVSPHHcOHHs1E1YHHfE1fBofBoTfIL9uffpffI~H9tff.L HH9uMt7{tgCHZHcKH9H11IH CHL0CDsEH H1@LcsL9HHHItHHf1fofoTfHH9uffpffH~H9tffff.H LE1HH)JJKMIHuH҃ 11ANA9}'IH H)@1@H3H HIcNH9|Ht H ]>1ۿ1dAKCIII0L IfE1fCoTfCofIM9uffpffI~L9tff.L HH9uMH \>1ۿ1dA [A\A^A_]HcOII)UHHIsE1HfLHH7H4HfE1fBoTfBofIL9uffpffI~I9tff.L HH9u1M]f.Ht8UHE1H)ffffff.JJJM@HuH]1ÅHcGHUHHs11YHHf1fofoTfHH9uffpffH~H9tffff.H 4HH9uH]tG1Gffffff.G~ UHHfH|u P1]UHHIE1H|HH&IHLHIHHfIHLOHIHHfIHLOHIHHfIHLOHH H IIIwMtZHH&IHLHIIt=HHfIHLOHIItHHfIHLOHH]E1L]H UHIE1HHHEHHeIHILHFHEHHeIHILWHFHEHHeIHILOHFHEHHeIHLWH H IIIxMtaHHEHHeIHLIIt@HFHEHHeIHLOIItHFHEHHeIHLOI]LE1LfDHUHHHr[f.HHHHWHFHHGHWHFHHG HW(HFHHG0HW8H H@HHwHt7HHHHWHt$HFHHGHWHtHFHHG HW(]fDUHHHH"E1E1E1E1IIILHHaE1IIIHFH!IIIL_HFH!E1IIIHFHaIIIHHaIIILOHHaE1IIIHFHaIIIHFHaIIIHFH!IIILWHF H!E1IIIHFHaIIIHFHaIIIHFHaIIIHHa IIIL_ HHa(E1IIIHFHa IIIHFHaIIIHFHaIIIHF HaIIIHF(H!IIILO(HF0H!E1IIIHF(HaIIIHF HaIIIHFHaIIIHFHa IIIHFHa(IIIHHa0IIILW0HHa8E1IIIHFHa0IIIHFHa(IIIHFHa IIIHF HaIIIHF(HaIIIHF0HaIIIHF8H!IIIL_8HF8HaE1IIIHF0HaIIIHF(HaIIIHF Ha IIIHFHa(IIIHFHa0IIIHFHa8IIILO@HFHa8E1IIIHFHa0IIIHF Ha(IIIHF(Ha IIIHF0HaIIIHF8HaIIILWHHF8HaE1IIIHF0Ha IIIHF(Ha(IIIHF Ha0IIIHFHa8IIIL_PHF Ha8E1IIIHF(Ha0IIIHF0Ha(IIIHF8Ha IIILOXHF8Ha(E1IIIHF0Ha0IIIHF(Ha8IIILW`HF0Ha8E1IIIHF8Ha0IIIL_hHF8Ha8IIILOpLWx]DUHHHH"E1E1E1E1IIILHHaE1IIIHFH!IIILOHFH!E1IIIHFHaIIIHHaIIIL_HHaE1IIIHFHaIIIHFHaIIIHFH!IIILWHFHaE1IIIHFHaIIIHFHaIIILO HFHaE1IIIHFHaIIIL_(HFHaIIILW0LO8]DUHHH1E1E1E1IIILHFH&E1IIIIIILWHFHE1IIIHFH&IIIIIILGHFH&E1IIIIIIHFHfIIIIIILOHFHE1IIIHFHfIIIIIIHF H&IIIIIILW HF(H&E1IIIIIIHF HfIIIIIIHFHfIIIIIILG(HFHE1IIIHF HfIIIIIIHF(HfIIIIIIHF0H&IIIIIILO0HF8H&E1IIIIIIHF0HfIIIIIIHF(HfIIIIIIHF HfIIIIIILW8HF HE1IIIHF(HfIIIIIIHF0HfIIIIIIHF8HfIIIIIILG@HF8HfE1IIIIIIHF0HfIIIIIIHF(Hf IIIIIILOHHF(HE1IIIHF0Hf IIIIIIHF8HfIIIIIILWPHF8Hf E1IIIIIIHF0Hf(IIIIIILGXHF0HE1IIIHF8Hf(IIIIIILO`HF8Hf0E1IIIIIILWhHF8HIIHLGpLOx]fUHHH1E1E1E1IIILHFH&E1IIIIIILGHFHE1IIIHFH&IIIIIILWHFH&E1IIIIIIHFHfIIIIIILOHFHE1IIIHFHfIIIIIILG HFHfE1IIIIIILW(HFHIIHLO0LG8]fUHRHtWH@@]fDUH"HtWH@@]fDUHWHG]fff.Ht6Gu UHSPHHHHߋCH[]H@Ht6Gu UHSPHHHpHߋCH[]\H@HtjUHAVSHGHWH@@H9t6HcsIHt(HcSHtFHH3I>LK41H>LAFuH?LAFuH1LHKH1[A^]fDUHAVSIH9t:HIcv51Ʌt-IcVHtHI6H;WANKAFCL1H[A^]UHSPHH?HtHcS Ht H1CCH[]fDUHH=ժ H5Hʪ ]UHH]RH ( ]fffff.UH1H1HH HD HH1H@HB HH1H=HBHH1HHBHH1HPHBH]fUHO~HfH|uQHHcHT1H 1HH HDHH1HHBII1H@LBLHE1IAIBIIE1HALBE эBD I]1]UHO~HfH|uQHHcHT1H 1HH HDHH1HHBII1H@LBLHE1IAIBIIE1HLBAE эBD I]1]ÐUHGG]fff.UHSPHtCHHC1H[]ÐUHAVSHHt(IHt'CHL0 C1C1[A^]ffffff.UHAWAVSPHIIH5t+HHt I>LA^AF1H[A^A_]fUHAWAVSPIIHGu H;CL;DsDs CCH[A^A_]UHAWAVSPzt&H ySxE11mA=/HcZII)HIsE1HiMII 1LIfE1fff.CoTfCofIM9uffpffI~M9tff.L HH9uHMt#H RxE11fADk1HHHLIfE1fCoTfCofIL9uffpffI~H9tff.L HH9uMt"H Qx1fA1 A^[A^]ffffff.UHMHHIr!II)I AII)I AEtnE1MAtNI!NI!M NMIMXM9N I!NI!M NNLI!N\I!M N\IM9uyMIfHnfpDfHnfpDE1f.BoBo\Bo$BolffffffB$BlIM9uM9]ffff.UHHHH<H9 HrLAIxsHH1H< H9v!HHHAAII9sHIIMINJHtXHfH@@IMkC+Mt!AEu I}AEu8IE1K>H9s/LHHDLH9sHL1HHHHLHIHHHIIwHMDIs11PI11fffff.A<6HH AT6H A|6HH AT6H HI9uHt%I1fff.HA<6H HH9uHHIHIMt 1L܈HH[A\A]A^A_]5DUHAWAVAUATSPIHtMH1MtuIH}MfIMl$HL1tbDkHJH;HuLOrIPHt^HWH@@LMuCC0Mt)AGu I?AGu IL1HH[A\A]A^A_]fUH]fDUH]f.UHAWAVATSHH9HHBHL7Hr$II)ILM9AI9AEE1IMIt8MIH>HN4 E1ffff.G$>F$IIM9uII)IwCMIMF EFLEHFLEHFLEHIIL9uH9sH)H1[A\A^A_]鳆[A\A^A_]H sE1IILRL>ILHE1ffff.AoJAofof`fpNppfhfpNppfgC 3fof`fpNppfhfpNppfgCT3I IL9{L9}MIINH>HIMIfA~ f`fpNppfgfB IIM9uL9UHAWAVATSHcGH~%H1ۉDH|uWHHcLD1M 1MI @MDMIE1IAMBAMIE1IAMBMI1IMBMIE1IMBAE D BύD I1HH9HHBHHHr$II)IL 9M9AH9AEE1IMIt.MIL3IN4E1G$>G$ IIM9uII)IwCMILF DFD DBFD DBFD DBIHL9uH9sHH)H1蘃H[A\A^A_]H sE1AALIL3IIIIE1fAoIAofof`fpNppfhfpNppfgC 2fof`fpNppfhfpNppfgCT2I IM9{L9@MAAN L3IIIIIffffff.A~ f`fpNppfgfC IIM9uL9UHAWAVATSHH2LcbILH)Hs 1IH s 11{HHH3HfE1ffBoTfBofI L9uffpffpUffofrffofqff~H9taIIMfnHff.~ fHI9ufpUffofrffofqff~L9uHIf.B IM9u1uHO4E1DG$>F$IIM9uII)IwCMILG DGLDHGLDHGLDHIHL9u͸H9sH)H1~[A\A^A_]H sE1IIMPL>ILHE1ff.AoJAofof`fpNppfhfpNppfgC 3fof`fpNppfhfpNppfgCT3I IL9{L9MIIOH>HIMIffDA~ f`fpNppfgfB IIM9uL9UHO~H1H|u q]Åt u HH]É]H]fff.UHO~H1AJ|uDADt1uHHH]H9ILBMtXUHAVS1E1DNJMI)L1LsM1I HI!I?M1I?IL1L!L1IM9u[A^]1II)sII)Is#E1H{*IE1HMII<2H4HfE1ffBoTfBofIM9uffpffI~M9tff.L HH9uLHII!I?AD MII H HfE1fffffff.BoTfBofIM9uffpffI~M9tff.L HH9uLHII!I?D!f.HHu1H1H UHSP_;^u$HLcGHHcNHL؅D 1H[]fff.UHHHH]fGHUHHH31|qr[HxIIIPfHnf1oTfoTfHI9uffpffH~L9tfff.H 4HH9u]1Hf.UHHdH%(HEHuHt(u"1HHHcwHUHdH %(H;MuH]wff.1tËGHUHHH1H|tr^HxIIIPfHnf1@oTfoTfHI9uffpffH~L9tfff.H 4HH9u]1HUHGIHHLI1|{reLHMIIRfInfE1f.BoTfBoTfIM9uffpffI~M9tf.L HH9u1Mt]Ht 1]ø]@~UHH]1fUHO~HfH|uQ t 1t]1]ÍQH7|11fH<uHH9uHcH HQ1H]fff.UHSHcOHcF9}LII)Is 1Ii1qMIM IHf1foTfofHI9uffpffH~M9tffff.K IL9u9LII)IsIhMIM fHnIHf1f.oTfofHI9uffpffH~M9tffff.K IL9u9LHLAs1uD%fHnDHfE1ҐBoBo\Co$ffCoTffI L9uffpffH~L9tfDML3L HI9uF3GHc1H []fUH02HtW@@]ffff.Ht9UHSPHH5HHHH{ЭHH[]­Ð(tUHAWAVSPHOH_ H;OuUHIHHA LEI9LH=u;HGJ4IHLHtHGLwHOHGHHGfG(H[A^A_]ffff.UHAVSH(t3{)H 61tAh1C)H;Hu`HHHLs I9u7蹪HtHIWH@@H;HHt@H;Hs @@HC mH 61tAGH 51tAAFu I>AFu ILC(1[A^]f.(tUHHGHOHQHWHDHG ]f.UHAWAVAUATSHhMIz~+Hffffff.AJ|uDGD1HcAAH~&L ADEK|uEPAEE1~ L I|t+E~H9EJ|t H9s.E1E1yH E11oAX\IIfE1fɐBofBoTfIM9uffpffI~I9tff.N IL9uMtpA}(HuIHUIMI] I;MurHIHHA LEI9LH=uXI}J4貪HtBIEMuIM*H E11iA^bIEHIEfAE(LHELHELHMu LtILmH]HLeMLAO~HH|uQHH}HcHT1H 1II LDMI1IMBMI1IMBMIE1IAMBMIE1IAMBE эBD I1H}Ѓ?@)LEA@HuH}~!I@H|u P1A@A@)ډUH}G~)HHuDH|uPGHuu1GGHcvN9~7Hc芽%H}HHcOH_LHMLEXHTH}HcWLEAX9LHM})fffff.H7HHIcXHH9|_McxAE)IIu1JtHuH7HuNdI@M1H}GAFHc踼SANHEHMLcIL(LHH}Hx肼L}HExtI@H}LMLUL]AoIcHcHULHHE1!H}L]HM}AID;u3LEHIL9tXHCLpHpHIIHsH9HHs.LILIL9rL)HN!IH9HLrI9HEH0LLLULMLEII JHx9I1H) fI4H4I4HRHuH@H}L]IEHEH1LH) fI4H4I4HRHuH@HHuF~$HLEH|u"PFLEu FFHUt#HEHcHHLEHs>11|E1Hu~(uHFHNHQHVHDHF DHh[A\A]A^A_]HHf1fofoTfHH9uffpffH~H9t H 1u]HEȋ@AFAFHcSHtHI?1K]DcEgAGHEȋHEtk9Mt-HcHcHHHMH1H)I?AYDHMȋI1HuЀ~(!HFHNHQHVHDHF AIBEI?%fMAAIIN HrHcKHt+I1M)LLLHvHuMAfE1HEHJ1HIH LccMtM/HEHH31LM)MDLLHHuMAM)LIIпAJ4LJH´LuLmAD$~(I $DH|u"PAD$uAD$AD$1LLLIt3AA}t)A~u HHdLLLЅuE1{(uHCHKHQHSHDHC DH[A\A]A^A_]ÐUHSPHt+C~(H @H|u'PC 1uCCH[]UHSPH肭t+C~(H @H|u'PC 1uCCH[]UHAWAVSPHIIHt/AG~4IfDH|u-PAG1H[A^A_]uAGAG1LLLIH[A^A_]HUHAWAVAUATSPMIAHI11LA|$t*A}u H9HLLLЅA}tPL.HtmI@LLDHMkAEu I}HAEu,IE*LLDLMH[A\A]A^A_]$L H[A\A]A^A_]ff.UHAWAVAUATSHMIΉUHIH9t2HcsLHcSHtHH3I<$HC HtwC1AD$KAL$EnIc9vHI $HH)HsME1HLH t,Ic|$A9H)HHI<$1H2K1BHHHLIfE1fCoTfCofIL9uffpffI~H9tff.L HH9uMt%H H x1ۿ1fAEl$A(uoIOI_ I;OuLHIHHA LEI9LH=u2IJ4ǃHtIGMoIOIGHIGfAG(LE~H}1HIIcH薘AEE}]ԅ1 9IcFHtI4$I}ME1HM)NNNMRHuMAE1HM) fNONMRHuMAM)LHAL LL9AL9AEtE1IIfInfpDfHnfpDE1ff.BoBo\Bo$BolffffffB$BlIM9uI9ffffff.MʨtNM!NI!M NMILXM9N M!NI!M NNLM!N\I!M N\IL9uUAD$HuȀ~(uHFHNHQHVHDHF H[A\A]A^A_]UHAWAVATSHAII0~HtTW@@LLDHIHAH;H5hH,H{SHKD1[A\A^A_]ff.UHAWAVATSIHIIH9t0AD$AFIct$HL軕AD$AFbIcvHL蕕tAF~GII$11fffff.HANKAFC1LuC11HH[A\A]A^A_]D{UHAWAVSPIHDHcKH4Hs11SHHf1ffofoTfHH9uffpffH~H9tffff.H \7AD$1AFAD$AF{(uHsHKHQHSHLHK H[A\A]A^A_]1AD$AFZfffff.1x%HcWH9sUHHH 1H]@UHAWAVAUATSHxdH%(HEЃytIH ȋE11mAfdH%(H;E DHx[A\A]A^A_]LIIIHh~uI<$Ict$IIcO蒼xQE11LLLIXtA~Mt-Au H|HÍLLLMUA~IuWfffffHhH~dHfH|uWQdH%(H;EHhLLLIE1Hx[A\A]A^A_]SHHcHT1H 1HH HDHH1HHBII1H@LBLHE1IAIBIIE1HALBE эBD Iffff{(HHKHS H;KHIHHA LEI9LH=H{J4HpHfHtZHCLkHKBL肅t%AFIHAFA!E1HCHHCfC(fEfEfEfEfEfpf`fPf@f0f fffffHIHLHM@H@ E1AHpHtEL9tkIcwHmIcWHtBHI7HH82HAG(HLzHpu6Q1HBBHHHHp E11LLIIcA|$Lt.Au HHɉHLHpHcFHHHs1E1GHHfE1fBofBoTfIL9uffpffI~H9t L HH9uM4Aw.Aw AOwAAHHHInE1LpE|$AH蹿JHJHHHIIDDtHpAǾL9D|HHAFIHAFpE1HHAIE1IIIAD9HDx/HhHc@J HHH9sHhHH4HrFAtLLLHLIMMLHjLE1A|MHLHHHE1H9sH4E1HAF$mH\HJ4 HHH1H9sH 1H@DZD(EAAEEDE都|HHJ4 HHH1H9sH 1H@DEAAEEDE都|HHJ4 HHH1H9sH 1H@DZD(EAAEEDE都|tKHtFHLHHH1H9sH1H@ƱD(DDEDEEpu)E}LLLHIAuAIcHLLHIɷHD)Džp+HAAAFAFA{(uHCHKHQHSHDHC uHhu HDž HhuHhHhE1{(uk-fUHAWAVAUATSHHdH%(HELcqM~ L)M}Au!H XE11hAzytH 1E11mA~}~u[HLcVHHHLHLHLLL HHHxIH E11kAdH%(H;EDHH[A\A]A^A_]ËV~HH|urHHcHt1H 1II LDMI1I@MBMIE1IAMBMIE1IAMBMIE1IAMBE BBD IffEfEfEfEfEfpf`fPf@f0f fffff(WHOLw H;O1HIHHA LEI9LH=HJ46fHHHGLHOIAthArPIVHHHFfInf1AoTfAoTfHH9uffpffI~H9t M |HI9uMt;HztCH@HH@AaAAAHE1@HGL4HGfG(IfIL[IHMME1tE1HHt E1L$HLLMHIHtw&wOw1EHLHHHE1.LLLLrHIHLLHLHDpALL"JHJHLHLPsIDDLtE1HHLLH؃x!IcAHsIII9s INIrMAt9LLLLLHLHHAAALAHKHHE1H9sHIuI$MIOI0I!nAFA^AIHAx(LEIPMp I;PuaHRHHA LEI9LH=uGI@J4H}HI@^LH}LEHtI@MxIPI@L4I@fA@(IIL?E1Ht_L9t3{u LA}tvH 3E11mA {'IHHHU轉tLLL+wAHEЀx(hHHHPHrHpHLHH JIHHHMjuE1*H -=E11DAff.UHAWAVAUATSPHHUIIy(unHKLs H;KuLHIHHA LEI9LH=u2H{J4\HtHCLkHKHCL4HCfC(HɮE1Ht\IL9tBIct$LqtCIcT$HtHI4$I}AD$1AEAD$AELLHUuAƀ{(uHCHKHQHSHDHC DH[A\A]A^A_]@Ht(UHSPHGu H;3[Cu H H[C,u H{ [C,H{u HZHH[]ZUHAWAVAUATSHdH%(HELH:PHH CIcA H96MIMt4ILH@H)AAI)fKLHuEH@I@IuIdH%(H;ELHHMH[A\A]A^A_]HL1HH HDHH1H@HB HH1H@ƍ4HBHH1H@Ǎ4HBHH1H@Ǎ4~HB1HHHHH)Hw)HwHOw1HEBȉHP1. HHHtHPHH+1HLLLW)0) ))HDž@HHPHHL(DAtTAfDAEHHE1@E111HD$xE1 H$bMH$H;I4$HD$HHHD$(HHD$PLLH0H$H$ CCAH$LL蕁K AHp1HwIcxHH9sIH H= At{Hx1H9s%LD$HMc@HL9sHT$HHHH< A|:H1H9s%Ht$HHcvHH9sHL$HH H H; H\$(A>E)H$LHLL$`A.EH$AAELAE)AH\$hLd$XDDt$0A H$HHLIMH$HHLIMH$HHLIMH$HHLIMH$HHLIMpH$D<$HHT$(LILL$`L A|$01DLL$Hx(Icy1H9sIH41HAu?D<1EIcQDH9DI H1HAvMcQ1L9sIH<1HDHHUȋB1AFAFAELuHcIus1E1GHHfE1fBofBoTfIL9uffpffI~H9t L HH9uME1CCIAEHcLHH9sIMHL0C~H tHHECHcHHHcCHHUtH HHH9L@IsE1UIIE1@NTLN\JIcvIUIcM|vxRL[Ht9I1HHLM5A~Let%A}u#HT=!E1s1LeE1MHGLLLMtEA}~ IEuHuHLLLEHuHLLLEaMu3Mt)AGu I?5+AGu IL+1MMt-AD$uI<$+AD$u I$L*dH%(H;EuHH[A\A]A^A_]ffffff.UHAWAVAUATSHzu^IHL*LczL!HcYH0LLLIHLEH(LXtHXL(H}L0x6H B@E11kAHDH[A\A]A^A_]H]EAs11SLHf1ffAoTfAoTfHH9uffpffH~L9tf.I LHI9uHtIE~ AE }~ A$AH A@E11pAH}ȅzI $H|~rhHWHHHFfHnf1fffff.AoTfAoTfHH9uffpffH~H9tf.I HH9uHAH A@E11pAMHUA9DC(HOLg H;OukHIHHA LEI9LH=uQHJ4(H}Ht7HGLoHOA@A@AHGL$HGfG(IzH`LzHhL{zHELozHpL`zHELTzHLIzIL>zMIHUHpH`LmHhfInfHnflfHnfInflfInfHnflfHnfHnflffvfpffvfpffkfvfpѱffvfpȱffkfkfE1t,HuЀ~(HFHNHQHVHDHF sH8LMLEL9tDIIcvH!(II?IH LLE&(HEHEH8H LLmLeLMR)I?H HuLLE<)HHH; HELe\H`HHHLhI2LAII!I<$E1LM)NNNMIHuMLAAMDLI M $HH׸ALUJML9AI9At1ffHnfpDfHnfpD1LUAoAo\o$olffffff$lHI9uLM9tdIAtMI!LI!M LIIH;xt6KH!NI!I NKDH!N\I!I N\IM9uL}IAII$HHhLE1LM)NHHcKLKL~tAFCAFAD[A\A^A_]fffff.UHAWAVAUATSHLEHIHuIy(unHKLs H;KuLHIHHA LEI9LH=u2H{J4HtHCLcHKHCL4HCfC(HgE1HtzIL9t?IcwL*tbIcWHtHI7I<$AG1AD$AGAD$L.t"LHuLLILME1Aƀ{(uHCHKHQHSHDHC DH[A\A]A^A_]ffffff.UHAWAVAUATSHLEHIHuIy(unHKLs H;KuLHIHHA LEI9LH=u2H{J4aHtHCLcHKHCL4HCfC(HrfE1HttIL9t?IcwL6)t\IcWHtHI7I<$YAG1AD$AGAD$LV-tLHuLLILMIAƀ{(uHCHKHQHSHDHC DH[A\A]A^A_]DUHSHIHHHdH%(HEEHuNt#UdH%(H;Eu)HHH[]ndH%(H;Eu 1H[]f.UHAWAVAUATSHhBY9O؅IIHxHpAx(LEIHMp I;HuoHIHHA LEI9LH=uUIxJ4rLEHt;I@MhIH#GGA5I@L4I@fA@(]LMYdHLNdILCdMIHMME1IL9LUH]]t[Ict$L&IcT$Ht%HI4$H]H;Iۋ]AD$LU 1LUL]ACAD$ACM9LutOIcwL{&)IcWHtHI7LuI>MAG1LUL]ABAGABLcIcC9vI HL)Hs[E1LLLL&Hc{E9LUIۉLH)HHI;1L]LUxHHJ7NIfE1fDCoTfCofIL9uffpffI~H9tff.L HH9uMA[IcB9vI HL)HsWE1LLL%LUIcz9L]LH)HHI:16L]LU~HHJ7NIfE1fff.CoTfCofIL9uffpffI~H9tff.L HH9uMAZH]HcC9vH HL)HsTE1LHL$$HuHc{9}L]LH)HHH;1?L]HHJ7NIfE1fCoTfCofIL9uffpffI~H9tff.L HH9uMH]tVH TwE11fA ,Hu~(uHFHNHQHVHDHF DHh[A\A]A^A_]ËEԉCAL$AO9sH %@E11fAN닅KIvJHEIFHELHIII~HH}H}1E1ɉME!DME)HU‹M9L]HUDMIH HH}H?LAII!L}MLE1LM)@N,N,O,M[HuMAIDLIIIA}LMK< IM9AH9@AtE1lfHnfpDfInfpD1fDoo\Ao$AolffffffA$AlHH9uIL9thfLAtJcLAL$1AN@AFAO MILH]0LLLЅMGAFALDMIHcIs11THHf1fofoTfHH9uffpffH~H9tffff.H Ic~A9I)IHI>1LaHHJ?NIfE1ffffff.CoTfCofIL9uffpffI~H9tff.L HH9uMt#H xwwE11fA.  EnAH]Ht'H;H5;HSH{zHrDH[A\A]A^A_]UHAWAVSPHHcvHHs1E1WHHfE1fDBofBoTfIL9uffpffI~H9tff.L HH9uMt&~uJH Gi=1ۿ1hAH *i=1ۿ1iA H[A^A_]ÃxtH h=1ۿ1mAˁLIs E1fMIIfE1fffff.Bo fBo fIM9uffpffI~M9t@L HH9uMt H @h=1ۿ1fAH_H9t6IIHHt)LIcWHt=HH1H;iLAG'G 0H g=1ۿ1DA1LAF IAN(~$H @H|uPG u G(G HHA@1HI@fffff.EAIIM!LL1HM!IDۃHII!MM1IM!MH!HM!L H HuHO0HG81UHAWAVATSAAD9wAA^IH1LIcD$A9D|tD)Qt!ffffff.LIHurAD)fHHHHDHHDHHDHuȉ_?HHDH Ѹ[A\A^A_]UHAWAVAUATSHHLgLc M~(I$DH|uQEI$DHcHT1H 1II LDLH1IIBIIE1HALBLHE1IAIBIIE1HLBAE BF,E IAAAuPCCHLHc{A9tLH)HHH;1*D{1EALeHuEAD9cAt$HHuWHcCA9|kD)QtH3HHur@D)HHHHDHHDHHDHuHEЉCA?DHH DH DE)AHHHMLuM#cHHHHM7zHHHHM^HHHHMBHHHHM&HHHHMLj HHHHM諈D{ McHcCD9vH HL)HsME1LHLHc{A9I)IHH;1L HHJ7NIfE1fCoTfCofIL9uffpffI~H9t L HH9uMt/H HAMu5MLeHxI>uLL藯IE1HAHDH[A\A]A^A_]UHAWAVATS~t+H @a=1ۿ1mAC[A\A^A_]HcZ Ht%HHcF9v(HII)IstE1IGIIIHH LIcD$9LLHH)HHHH1LLLMIMLIfE1fffffff.CoTfCofIM9uffpffI~M9tff.N IL9uMt H (mw1ۿ1fAI^HHcr I t*IHcs AwAGI?IIcN[A\A^A_]1vD1tLcN HcOD9vUHHHL)HsE1LjHHJNIfE1ffff.CoTfCofIL9uffpffI~H9tff.L HH9u1M]f.UHIHI]ffff.UHAWAVAUATSH(IMcH LH1K4 H1H ELMH}IXI@0HEE1HUIMI $HMLHL K LHH9L  DKIIu}HUN1E1LHEM) fff.O N N M@HuMAM)LHILHHH9@NM9ADt?1H ]=1ۿ1BA&dH([A\A]A^A_]ûLHfInfpDfHnfpDNIE1fDCoTCoBo$BolffffffB$BlIL9uL9kIAtI4L!LI!I LIIIwH99JHDJtL!J5CE1E11ɁHB1H=@ƍ HBЉ1H HB1HLQHBƃAMAL)E1H%E9‰D}D)рHEuu]HcH$L}HcH]HLEH;I7HEHMA)HEHH$EDM:-ECD}DHcHtlDHcL}LLEtOI?HEH0HEHMA))HEHH$EA1EAGL1AFCAFLEAx(uI@IHHQIPHDI@ DHH[A\A]A^A_]UHAWAVAUATSHHIL9MHM HMH IHLHEHKILO$I L)E1@KH;HHt'IHcsHHNt(HHcKQSL4ȸCC1[A^]ff.UHAWAVAUATSHdH%(HEDfE~xHIz(HHKLs H;KukHIHHA LEI9LH=uQH{J4hHHt4HCL{HKGGHCL4HCfC(II9u HR%IHG%IMM 1yK4$LH\ItAu"I?IuI?IuwAwtW)E)E)E)E)E)p)`)P)@)0) )))))I?IuHSAD$Au,B4L.I?IuIL%LHtkI?IuILAGHAwHI9t*IcWHtaHI7LI>ݔLAGK1{(uHKHSHrHsHLHK dH %(H;Mu2H[A\A]A^A_]1HAAGA{(u͕ffff.HUHAWAVAUATSH(HIIL<HHHDHHIHIt$IUI $OLKIL}H]M}LmI]H}AE1LeLeDK<4HEJ4(HJL(LIHIIIuLe1L}LH}H) fHHHH@HuHLmIAH]&1LH)@HHHH@HuHHI$HHHQIt&ID$HHAHQItID$HHA HQ(1LH)HHHH@HuHH([A\A]A^A_]H]HfI$HHHQID$HHAHQID$HHA HQ(ID$HHA0HQ8I H@IIwM2gfUHAWAVAUATSH(HuH([A\A]A^A_]IHwLH([A\A]A^A_]HHL<L$IE1E1HM)NONMRHuMAE1HLeM)DONOMRHuMIHHfInMIfpDfInfpD@CoCo\Bo$BolffffffC$ClIL9uHuIH9t)fff.I4L!HtH}HHU*AuA~(uINIVHrIvHLIN  MtE11HuLAT$HEHE}IcuH}IcEHHMH IUHHr#HH5, HUI~LdH%(H;EDHh[A\A]A^A_]ÃtAA#H 3E11gA6A7"CuAE H <3E11gAS轂ffff.UHHMAHщHdH%(HEEH}M1 EdH %(H;MuH]WUHAWAVAUATSHLcE1IH}о3D1ffAu^L}AA IL=4@E1HI9t$E,_AH}C1fHCtE1L9L}AHEHMH H1H1A|}ArfIIIIPfHnf1fff.oTfoTfHI9uffpffH~L9tfff.H 4HI9uAHuE1Ht HEЃxADH[A\A]A^A_]UHAWAVAUATSPIIHHAHL6HCL*HCLHCL+MH{H{HM~H=H5+ֶHLLL+LC$H{LLM~AN ~&If.H|uQIHcHT1H 1HH HDHH1HHBII1H@LBLHE1IAIBIIE1HALBE эBD I1C H{Icv H~ IH|xLLL0]uWsIbtgIF1H+LI$H AV |#IVHHH0HHIcV H9|P@HSH{LH[A\A]A^A_]1H[A\A]A^A_]fDHcOHwUHAWAVAUATSHH111fHII MIIM!I?II ME!II!M MI0MIIM!I?MIMM1M!M1MI8MIIM!I?MIMM1M!M1L[MIIM!IHL!H?IEAE1E!E1I?MIIM!A AE AE AE I?D MII D I?A AD! HH@LH9[A\A]A^A_]1fUHAWAVAUATSHLHMIIH}Ax(unHKL{ H;KuLHIHHA LEI9LH=u2H{J4HtHCLkHKHCLII)Is 1Ip1xMIMH HfE1ffffff.BoTfBofIM9uffpffH~M9L]tJ IL9u9I}II)IsIgMIMfHnH HfE1BoTfBofIM9uffpffH~M9L]tJ IL9u9LЅI}LAуs1DʁfHnDHfE1fff.BoBo\Co$ffCoTffI L9uffpffH~L9tfDML3L HI9uAUDNA1M{Ic9Le}MII)Is E1IkE1rMMIMMIf1ffDAoTfAofHI9uffpffI~M9MLet@O IL9uMc9MUII)IsIdMMIM?fInMIf1AoTfAofHI9uffpffI~M9MLet@O IL9uL 9L~IEMAs1L]L]DׁfInEAAIfE1fffff.BoBo\Co$ffCoTffI M9uffpffI~L9L]tfI4H34I HI9uHA3WHcL DIA{ eA1fffff.LHE9{$HEH8LLLHMISZL]ISIcEHcJ9}H:II)IsI1rf1xfff.MIMH4HfE1ffBoTfBofIM9uffpffH~M9L]tJ 4IL9u9I}II)Is IgMIM fHnH4HfE1ېBoTfBofIM9uffpffH~M9L]tJ 4IL9u9LȅI}LAɃs1DɁfHnDHfE1ff.BoBo\Co$ffCoTffI L9uffpffH~L9tfDML3L HI9uAMR1HcH HLDISHcr9}LII)IsI1q1xfff.MIM ILHuH9vHMHuHUH}LELʺtlLHL HHH HHH HHH HHH II I LHHu-H;Es'H 41ۿ1lACM1KHEHEH8J4HuL)HB%HEHHMN!dH8HEHHEHtE1HMHHu1AHMO>IIs A1gMIMQHf1ffffff.oTfoTfHH9uffpffH~M9tfff.J 4IL9uIIHL!LMI1MI)M1M M1I!I?AHD!HMHHHH#MHUH2H HMH H EJ!DBEDrUHSHIHщHdH%(HEEH}1E1  ]dH%(H;Eu H[]hDUHAWAVAUATSHxdH%(HELc~M~L&A$tHE~tFH %3E11wA'dH%(H;EDHx[A\A]A^A_]HEHMH}LHuLHLpoH}~م7DAOI fff.DI|uAAWH"H11H1II LDMI1I@MBMIE1IAMBMIE1IMBAMI1IMB BBZ I¹Q@D7*Z37"MȉM(urHOLw H;OuPHIHHA LEI9LH=u6HJ4H}HtHGLHOHGL4HGfG(HIHEI9tIHcpLյHEHcPHtHH0I<$cHEH1AL$@AD$LIcD$E1I $H}ILHH9sHLsrHOHHLD6H`L}L=HEL1IL&HELHLHMIfHnHxfHnH]flfHnLufInflfHfvfvfoЈTPʅH}Hu_hH|LhHE}LuMHEE1Ict$HD<H;I$IcL$L_CAD$CELHuHHMLEʋULLdHEA~H}u!I>IcvHUTH}HH`HMLEHLMZ5r{HLLMALx|LxMtZA9\MM9IcqLLHcSHIHI1I>aLMAAAAHtI H1H|drNHxIIIPfHnf1oTfoTfHI9uffpffH~L9t H 4HH9uHM1AFAAAFLLLHMLEM=A^A;\$MI9IcqI$IcL$LMمDȅGHcHH1H|drNLHMIIPfHnf1oTfoTfHI9uffpffH~M9t H 4HH9uHHpH]tsHpRt(C;]HELuH]HEAH}E11?HEx(HHHPHrHpHLHH LLLMcHtLHIHHMLEtLLMuL'HnLLhL HSL覴>H]HLHUHM臇H1ɅHEHE_f.UHAWAVAUATSHE1HAAGw&H Ɠ4E11BAVur&H 4E11BA+JAt/A|9H e4E11fAGGMAF?AN~IAFEn>DIMHcH}HAAC.ʍJIIHHUȃ?ILMLeI<$H4H'I $L!lADEԋ}tCHAA@@uHMȅtXɺ HMHDEHI$H AuI $H AD$A|$ADH[A\A]A^A_]I$H I$HHHUHH`]fUH]fDUHSPHt1HHuE1LIIs E1^MIIBfE1fBoTfBoTfIM9uffpffI~M9tf.L HH9uLHII!HII1HH)H1L H1L!H?H!H[]UHAWAVAUATSH8LEHuHMHt!IHEL`E1JHu;IIrH 41ۿ1lAH8[A\A]A^A_]Mu HMH9 vHUHHH HHH HHH HHH III LH HEMtJ<L1L4]I\$L }HEHL)HELHHHHMAdHE'AHULHHHD!ALHuHUHEK!DH}tE1HHUtHu1xff.Is 1Qf1fHuAoTfAoTfHH9uffpffH~HMI9tfDI DHH9uHHHH!IMHH}H1II)I1I I1I!I?AH 41ۿ1sAUHAWAVAUATSHdH%(HEH IHIH=H5?/JL=MLtAt6H Hn1=*HDȀ=߄HDHHH=ۄ~H DŽAuHAHHH AHH=肏 W)E)EҊ@t$1Ċ@tH} G<H uW)E)Eq@u H} 1H=#H50B[IL}A M=u=ffff.>LL1iHu5AEt9=LLiHu AEtII)HAE 4W)E)EH} EuH} -HHWEWM)E)MLH8L1'Y HtHHt ~uHIIHFHfH@@LL~HL0AIHDACHL0IIfE1fffffff.CofCoTfIM9uffpffI~I9tff.O IL9uM,~MI}ArfLXLHLKfInfE1ff.CoTfCoTfIL9uffpffI~I9tf.O IL9uMHUH}Ѐy(HMHQHY H;QHRHHA LEI9LH=HyJIH耆LHMHHALqHQ?HHt ~CII諃H HfH@@LL~FHHH1H|yrcL@MIIQfHnf1foTfoTfHI9uffpffH~M9tfff.H 4HH9uE1HAtRC1CHHX[A\A]A^A_]L9tCu H;薄Cu9H1HAHHA2IH&rEHN1fA(IHHIH HEHHEHIHIHHEHHEHHu菲HHLLHUHMLeHEHcHAHL}ffff.ILHH9sH4LsALHA AGLFHLLHMLEE1L}LHHUHMvLLH nj1nA=LLLHLEspLHuȺWAGLeLLLHMLEE1*L}LLHUHM LLLHMLEL+L}LLLHMLELLLHMLEHL(HHqAGAH}LmLmLuIL}wLL)HU-CL11%I}EuMcHEHHcHLx3HUȃzHuHLuLLЅEvMcEHULuLmtfIEIs11ALHf1fofoTfHH9uffpffH~H 4HI9uHLmuLHu%HUt2LL}LN|ufL}IIRH}|1HEH9tH芯1HEx(NHHHPHrHpHLHH 0L9>VLmtGu H kj1rA3H jj1sA cLLDMLL}LLHMLEE1(Lt#H jj1rA詝MLHuuPIcEHIMs11DHHf1fofoTfHH9uffpffH~H9t H 4HH9uHLmtLH}LLHMLEE1KHEHc@HHMH 11LLHUHM9HEHc@HHMH s11DHHf1fofoTfHH9uffpffH~H9t H 4HH9uHNH}gtHMAHHAYHHf1fofoTfHH9uffpffH~H9t H 4HH9uHH}HuHUHMH}HLHMLEH}HLHMLEH}hA(H}HuHUHMjH}\AH}HuH=DD~(E)H}HHUHMAA}H}HUHHMLEH}HHUHMLEH}HHUHMLEEAt>AH}HHHMLExH}jAE9uH fj1nAy81L#dH %(H;MuaHP[A\A^A_]Hu(L] HEK'Ht$0L\$(WD$Ld$HL$L $HMLMIAR(tLeи6@UHAWAVAUATSPMHIILm HEJ (J.H9t L9vH9wUL9@K7L9AAu>L9H9t.L]0LMt|AzuuH $`=1pAH `=1sA脎Mt L1L7Mt L1L6H1H[A\A]A^A_]HLLHMu@u8ASu(AUPuAR(H@ufUHAWAVAUATSHIHILmHEH9tYK 7H9vPJ (L9vGH >_=1sA蹍E1Mt L1L461L#H[A\A]A^A_]L](LU HLa Mt$LHLASARAUPAH u@MI)sH ^=1eAmM9vH ^=1gAJHUJ4 L9t)L9v$Kt'tMH V>E11BAlH V>E11lAg#DH[A^A_]IHHHtH@(HtHHHGIHgjWAAFpAF`AFPAF@AF0AF AFAIdžLML.LMIGHt ApHtHpHCHt/LMAA@41HAP0HHE1 ffff.UHSPHHHtH@(HtHH{xiWCpC`CPC@C0C CHǃH[]UHAWAVAUATSHMIIHdH%(HEAtE1EADcDcHMtwHtOH@(HtHH{hWCCpC`CPC@C0C HǃDcL+A}Ht:fHCHH.HuAH 'T>1rA5HCLAMKC @H?HHc HChMoH?EH@0HHMH߾1Ѓ4IEGH@0Ht(H111Ѓt3tMHSH1H -S>1hAqH S>1iAw赅HH R>1kA萅ChMH?EH@0HtEHMH߾1ЃEH qR>1hAq4H VR>1hAqSH ;R>1iAwބH@ H{4LH R>1iAw褄H@ tH{$L *HH?uhEH@0HtHMH߾1Ѓt%u>E?H Q>1hAqH sQ>1iAwH@ tH{4Hs$})HMu@tHLLDPtCdClǃ1dH %(H;MuH[A\A]A^A_]x*UHSPHHt/H@0HtCЉÃuZH P>1ۿ1iAw8H P>1ۿ1rAlH uP>1ۿ1hAqH[]ffff.UHH@]ÐUHH@?]ÐUHSHHdH%(HEHH?uhEH@0HtHMH߾1Ѓt%u>E?H O>1hAqH O>1iAwQH@ dH %(H;MuH[])UHA]pUHE1]cUHAWAVAUATSHt%H 5O>1ۿ1BAցLJLIcYHAAE9AAu%H N>1ۿ1EA胁AAucEHcGdHAA)LwDEE)IDHLHIHE&D{dAǃHIcIHHAQ Adž1E1DE\HMHUMc9H}tLHIHuL&LH}LILHAQ HMLLHH}HULAXD!A)~#IDIIHHAP t?E.LLtHGDIcHHcIHH%L_dLJ1ۉH[A\A]A^A_]HEDIHHAQ tD3ALJ1fff.UHAWAVAUATSPt"H L>1ۿ1BA`i5L7AFuA^Hu2I111AV xAH[A\A]A^A_]ËGdG!uGI9v,)HHDHIHIH{'LLHWDHAV t3AHuH L>1ۿ1jAz~q1jfff.UHAWAVAUATSHt&H K>E11BAU~2LAYHAAE9AA Au&H gK>E11EA~AAEG!DlEIt9Ht1HGpHHuHHUHIE3#ELLHuHUHIItoLrdtvGlAEtRA$LIcHHHAQ A1EAOAA%DH[A\A]A^A_]E1DH[A\A]A^A_]A)$GlHpIc$LHHk"kfDUHAVSt'H J>1ۿ1BA|[A^]H@u6G!uJ@r݃dtgH I>1ۿ1{AI111P xA렃dt H I>1ۿ1jAupltHDDpEAD)AEt H 7I>1ۿ1eA)EAF8DpAu~|rII)II E1IMItMF\pFIIuI)Iw4FDpFFDqFDFDrFDFDsFDIL9ủH xH>1ۿ1eAj sE1>AAE1BDpBBBLI M9uI9t3MAANTpNIM9uI9i@UHHH@ ]fUHt]p]f.UHt]]zf.UHH]UHH]DUHG]UHH@]@UHG]UHG?]@UHHG]fDUHHw]fDUH1#O ȉG ]fffff.9wt6t HA@u&UHH F>1nAgy1]Éwffffff.UH]UHG]UHG]UHG ]UHG]@UHEIHHt4WGpG`GPG@G0G GHLJ]@UHIHHt4WGpG`GPG@G0G GHLJA]UHIHHt4WGpG`GPG@G0G GHLJE1]d@UHt]`]f.UH]FfDUH]fDUH]DUH]f.UHAWAVAUATSH(MIIHIdH%(HEпPALDL HtZ H1HHW)ELeLLLvHHHMHM9H7MQH ;BLDLHMt H,IHH H1IHHW)ELuLLLHHHMHMsLoHts H1HDHW)ELeLLL$EHHHMHM}H`Mt H IdH %(H;MuH([A\A]A^A_]Nfffff.UHAWAVAUATSH(AIIHdH%(HEк 1LW)ELmLLLAEHHHMHM(}E $ȈdH%(H;EuH([A\A]A^A_]DUHH=%2H5QH"2]UHW:2(O)2HH2HH1H޸H1]DUHH=1H5yQH1]UHW 2(KO)1HH1H,H1HθH1]DUHH=1H5 QH1]UHW1(Q)1HH1HH1HH1]DUHH=1H5PH1]UH(P)~1HH}1HHw1HHq1H Hk1HLHe1]UHH=]1H5PHR1]UHWj1(M)41HH31H̴H-1HH'1]DUHH=-1H5OH"1]UHW:1(+R)1HH1H\H0HH0]DUHH=0H59OH0]UHW 1(KQ)0HH0HH0H޶H0]DUHH=0H5NH0]UH(5P)0HH0HƶH0HطH0H:H0H|H0]UHH=0H5INH0]UHW0(M)d0HHc0HH]0H>HW0]DUHH=]0H5MHR0]UHWj0(L)40HH30HH-0H.H'0]DUHH=-0H5iMH"0]UHW:0(M)0HH0HH/HH/]DUHH=/H5LH/]UH(uN)/HH/HH/HH/HjH/HH/]UHGuH=/H5hLH/]H=/H5ѼLLH/]UHrGuH=0H5}LH0]H=/H5KH/]UH"GuH=L0H5 KHA0]H=/H5KH/]UHH=M0H5KHB0]UHWB0K0T0HY0H H0HH0HH0HH0HH0]fffff.UHH=/H5JH/]UHW//0H 0H H/HQH/HH/HH/HGH/]fffff.UHH=/H5IJH/]UHW///H/H Hh/HHb/HcHd/HeHf/HH`/]fffff.UHH=]/H5IHR/]UHWR/[/d/Hi/HH/HH/HýH/HH/HH/]fffff.UHH= /H5 IH/]UHW/ //H/H H.HH.H#H.HH.H7H.]fffff.UHH=.H5iHH.]UHW...H.H Hx.HQHr.HHt.HHv.HǼHp.]fffff.UHH=m.H5GHb.]UHWb.k.t.Hy.H H(.HH".HH$.HeH&.H'H .]fffff.UHH=.H5)GH.]UHW..$.H).H H-HH-HCH-HUH-HH-]fffff.UHH=-H5FH-]UHW---H-H H-HH-HH-HH-HH-]fffff.UH2A!ȃ]@UHA]ff.UHH==-H5EH2-]UHW"-+-4-=-HB-, HH,HH,HH,HH,]fffff.UHH=,H5 EH,]UHW,,,,H,, HH,HH,HH,HH,]fffff.UHH=,H5iDH,]UHW,,,,H,X, HAHR,HcHT,HeHV,HGHP,]fffff.UHAWAVAUATSHHIIIdH%(HEH t0E1ILH=#H5CH #!LH=%H5(cCH %W)E)E)E)E)E)p)`)P)@)0) ))HLL+E1HHHLuLm/HMtdHUL9t5DuȍVL9t#HEHH)H<0H1H HLHs1LuLmA s1Mʃ1ffffff.D5L50WD0WA7AD7H H9uH9tCt)H։ʃH<0H3|5I<7HH9uH9tD4@2tA4HH9uHHLLP E1AHHtH@(Ht HHEW)E)p)`)P)@)0) ))HELELEH}EdH%(H;EuDH[A\A]A^A_]Y fUHAWAVSH(HdH%(HEDqE1It A0AH9$LIHHHHAbffEHHHuHLP LE1H11AEfnMfqf >fo]fofff~MˆEɉ@@@}ALEfIn@fnfo@ffofsffqfo=ffff֋I8Ef~$D0fofrfnfrfo-?fffqffofff~@@@򈓬foh>foffnfs0fofffo%<ffnfofs8fffofsfofs8fffs0fffqfffpԓf֓fMM}ljш@@Ϲ@f~$@0fofrDfAnfrfof>fffqfa<fofff~AADˆ@@@@@Ϲ@<3f~@@ @0@4 ǃAdH%(H;EuDH([A^A_]f.UHAWAVAUATSH(HIIdH%(HEHD`W)EHt]MI)I9LCIMHEtHLLAB/AL)tmIHuLHULP t\ML9v/Lmf.ILLLLP t0ML)L9wHtILHxA1dH %(H;MuH([A\A]A^A_]DHHH HUHAWAVATSHA9tGƄPA9t,HAHL)HHIH1ILLHJ71IHItHfD 0D27D 3HIuH)HwUI>HfDT0273T027T3T027T3027T3HH9uHH@ HH[A\A^A_]HH)1H IHH)HHH 2 s1cʃI4>HE1f.BBBTWBWBBDI L9uH9WH։ʃffffff.L0L37L3HH9uH9fUH2<Ht4W@p@`@P@@@0@ @Hǀ]HUHSPHHHtH@(HtHH{=WCpC`CPC@C0C CHǃH=H=H=HH[]:=fUHAVSIHtAAAAA1[A^]fUHLJL[11A]{ff.UHAWAVATSHAExt%H Q1ۿ1jZ[A\A^A_]HcXH~L0AtɉKI Ήfffff.I|uʍrHH11H1II LDMI1I@MBMIE1IAMBMIE1IAMBMIE1IAMB BBBb I'HOHt9AKyHHcqIHLH謅LAKHGARxHcpHH8s11LHHf1fofoTfHH9uffpffH~H9tH HH9uHLHAR UHAWAVAUATSHIIIE10F8HHf@@1:HtHCHC HKHHCfC(H=H5k5LuMtDHEDpD;5u*H8HcpHHc  EDȅ EtAH[E1HIL}M<$I9t@IcwLOIcWHtHI7I>3AG1AFAGAFL2SL}tzH}t7HEDxE;~H8HcpIIcNNEDȅL}xAIT$Ht(I $E1LHuIE1EtA~ttAA{(uHCHKHQHSHDHC H;H5gHg+eH{R8HJ8DH[A\A]A^A_]EL}cZAFHtIH1H|drNHxIIIPfHnf1oTfoTfHI9uffpffH~L9t H 4HH9uHfff.UHAWAVAUATSH(IIdH%(HEE1n0\5H[Hf@@17HtHCHC HKHHCfC(HщE1HILuH趉HHEIMwtbLeH=H5g2MEfD;%I>IcvHwHc xEDȅAFHtIHII|tr^LHMIIrfInfE1BoTfBoTfIM9uffpffI~M9tf.L HH9uMu A~H|wraL@MIIqfHnf1oTfoTfHI9uffpffH~M9tfff.H HH9uHUA~LuJILcHM5LIA1M1$EM7MgMMuMu7hEl$E;nu%I<$Ict$IIcN EDȅy:Et3IWE1H}LILIE1 3L)uHEIwEE1H}̺@HE1E1Y E̅uHEIIOLeLLuLI> A~LuA >I7EE1H}̺@HE1E1ۅ E̅xlt\IuXI7LtOEE1H}L@HE1E1艅 E̅xAuLuA A{(uHCHKHQHSHDHC H;H5H.b_H{2H2dH%(H;EYDH([A\A]A^A_]AFHLLuH H1H|drNL@MIIQfHnf1oTfoTfHI9uffpffH~M9t H 4HH9uHa_ILcHMCLIAu 1M1|LeEAE1M1OlKtHH=HHHHIHHRH)L=IHHHIHR)AH LIu1LeAt0K tHH=HHHHIHR)H E1H L LukcMEA1MOdKtHH?HHHHIHHH)L?IHHHIH)AI tIu1LuMAt-K tHH?HHHHIH)H N UHSPx.HtHH0H|+HCtH[]ff.UHAVSHtBHHtht2H{hL3Mt.AFu I>/AFuI [A^]L/LsMt)AFu I>/AFu IL/LsMt)AFu I>/AFu ILq/LsMt)AFu I>X/AFu IL?/Ls Mt)AFu I>&/AFu IL /H{0*H[A^].UHHH~+Hffffff.H|uQHHcHT1H 1HH HDHH1HHBII1H@LBLHE1IAIBIIE1HALBE эBD I]1]UHHG]fDUHHG ]fDUHH]UHHG]fDUHHG]fDUHHtHGHHtHG H]fUHAWAVATSHIHt=IMfMt-AD$uI<$\-AD$u I$LA-M~Ht6M~ Mt)AGu I?-AGu IL-I^ [A\A^A_]@UHHtHHHtHGHHtHGH]UHAWAVAUATSPHL71HL H{H  HtUMtMAFuI>IIIn,LLLAFu ILIIIC,LLLH3HtNLsMtAAFuI>II,LLAFu ILII+LLHSHtBLsMt5AFuI>I+LAFu ILI+LHKH{hDHChH[A\A]A^A_]UHw(]fUHAWAVAUATSH(I0E)IE1HWAD$AD$LeA$Mg LMu()HWH@@HEMoMu'(Ht|IWH@@I_0IHEH%MwhH&MurLuH%Iht;H&UE1E11HE1E11HEE11LeLHuIIGhH%MTMtJ1LeIIwMOhLMLmLMMwMo AMI_HLeaHcsH}c?AAHEH8HHcKLvHMACA1QME1H ,1AGMt2Iu+AEu I})AEu IEL(LmMt(I u!AEu I}(AEu6IEHt6Cu H;(Cu HH( L(HuMt)I<$H5uHWCUI|$i(La(DH([A\A]A^A_]ÿU&HAHWH@@AG(It&HωEHMYHMu9s H< HHBH}Hl1E1ufUHAWAVAUATSHIII0%HtDHf@@1(Ht)HCHC HKHHCAfC(IEHLuL}~(Hfff.H|uQHHcHT1H 1HH HDHH1HHBII1H@LBLHE1IAIBIIE1HLBAE эF$AE IAAAE1HyAHQILHHUH4DIIcFHHH)%Hs E1IH s E11HHL IfE1fDCoTfCofI L9uffpffpUffofrffofqffA~H9@t`IIM EfAnLfDA~ 8fHI9ufpUffofrffofqffA~L9uHIF  IL9uEDeH9HHBHH}L$Hr$II)IL 1M9AL9AEE1IMIt>MIL:IN4E1ffffff.G,>G, IIM9uIH}I)IwCMIMF E$FD ED$FD ED$FD ED$IIL9uH9s H)1D}Ѐ{(uHCHKHQHSHDHC H;H5pHRqPH{#H#DH[A\A]A^A_]H sE1AALIL:IIIIE1ffAoIAofof`fpNppfhfpNppfgC 2fof`fpNppfhfpNppfgCT2I IM9{L9@)MAAN L:IIIIIffffff.A~ f`fpNppfgfC IIM9uL9OUHHH~+Hffffff.H|uQHHcHT1H 1HH HDHH1HHBII1H@LBLHE1IAIBIIE1HLBAE эBD I]1]ÐUHAWAVAUATSH8HIIIdH%(HEqI EHULLx}{(LuHKLs H;KHIHHA LEI9LH=H{J4!HtpHCLkHKXE1H :E11gA/>H E11eA5>HCL4HCfC(HrE1HEHXMw0IHEL#H]I_hLSHH]u@L"Iht LS'H}H5HEIGhL5H}IIW MOhLmLLILeM7M9tCIcvL4IcVHtHI6I<$AF1AD$AFAD$L9tFLHl~H}HukAu?H E11eANH E11AG={(uHCHKHQHSHDHC dH%(H;EuDH8[A\A]A^A_]fUH]fDUHAWAVAUATSPIII0HtDHW@@1HLut)HCHC HKHHCAfC(HpAHt'ILHLHNtLHu_Iǀ{(uHCHKHQHSHDHC H;H5RjH{LJH{=H5DH[A\A]A^A_]UHAWAVAUATSH8dH%(HEHAYE1H9^IHHLMHuHULE~'Hff.H|uyHHcHT1H 1HH HDHH1HHBII1H@LBLHE1IAIBIIE1HLBAE эBD I1ALIEE1HtALHuLD9u-HMLLHULE[t;]u HEHALdH%(H;EuDH8[A\A]A^A_]UHG]UHAWAVAUATSH8MHHUIIdH%(HEW)E)EMt A},Ht`I1LuLmLuLAUHELLLPHELHuP HEHtH H}p,H}11HEHtH}dH%(H;EuH8[A\A]A^A_]UHHH~+Hffffff.H|uQHHcHT1H 1HH HDHH1HHBII1H@LBLHE1IAIBIIE1HALBE эBD I]1]UHHtCQ]fff.UHAWAVATSKIHtWA$ID$AD$IHtWAIGAGHHtWHCCxOIHtI~0AFtfInfInflfInfHnflffvfvfoʈTPtM AD$uI<$HAD$H I $H IL$AD$AD$ L-IcD$H*II$HHr#Ht#A,bI1MtfI~? MfM>LAW<1EINH H|(t"H "?1wAmw(1HtINH [A\A^A_]fUHA]UHSPHGHHx(t HP H r"?1ۿ1}A'H[]fDUHSPHGHHx8t HP H ""?1ۿ1}A'H[]fDUHAWAVAUATSHhHdH%(HEHHHx(HIIW)E)E)E)E)p)EHpHqHpHpLeLP HpHxDhp,H{LLLMD1ۅQH ,!?1ۿ1}A&JHD@dH%(H;EuT11Hh[A\A]A^A_]ZD1HxHEHtH}dH%(H;EuHh[A\A]A^A_](UHAWAVAUATSHhdH%(HEHGHHx8HIIW)E)E)E)E)p)EHpHtrHpHpLeLP HpHxDhp,H{LLLMD1ۅ$H ?1ۿ1}Af%1HxHEHtH}dH%(H;EuHh[A\A]A^A_]fUHAWAVSPHGLIy(t2HtHHIHILPHLLH[A^A_]6MI0MtHH[A^A_]AH ?1}A$1H[A^A_]fUHAWAVSPHHLIy8t.L HIHILAQHLLH[A^A_]IA@Ht H[A^A_]H ?1}A$1H[A^A_]UHAWAVAUATSHHdH%(HELyMt;IHL1IH2L_t@H 3=1ۿ1jA]xH =1ۿ1eAWXIW)E)E)E)E)p)`)P)@)0) )))HE))))fDžHDžLHILLLLP LLRteHHLBLt]II>I HJc HHHH3>H /t1DAN>"H =1ۿ1fAg"dH%(H;EHH[A\A]A^A_]HHHQHHHdHHHWH =1ۿ1gAz][ff.UHH]1H9t;u tHH;tÃttUHAVSIHHHNu9I@H@Nu"IhHhIcHHt [A^]IHIcHHuLHIvH#=fffff.UHAVSHHADuH *r1ۿ1CAE;HHIP HLOu H q1ۿ1DAN[A^]UHAWAVAUATSHXHIIdH%(HEЋH~(I@E1DH|urI@HcHt1H 1HH HDII1HLB@MI1IMB@MIE1IAMBMIE1IAMBE F,AE IAAAI9v&H pE11dARE1W)E)E)E)EHEIE1HULL1tIHMLLHPhL+AdH%(H;EuDHX[A\A]A^A_]fffff.UHAVSHHHIdH %(HMLMWE)E)E)E)E()p()EEHUHMLEH=>2LpLPARljHHL2pL{dH%(H;EuHHĀ[A^]HUHAVSHHHIdH %(HMLMWE)E)E)E)E(j)p(<)EE HUHMLEH=n1LpLPARiHuHL1pLdH%(H;EuHHĀ[A^]zf.UHAVSHHHHdH%(HEWEEEEEEp`H؞]HH|6*)bHHp0ZYH H9Y/H(H1 g&3gH0HXhJH8Hd . H@HOHGHHH0HEPLL0HL4LjdH%(H;EuHH[A^]7UHAVSHHHHdH%(HEWEEEEEEp`Hɼg jHH;ʄgHH+rn+hH8HkAكH@Hy!~[HHH@HEPLLWuHL3L,dH%(H;EuHH[A^]fUHSPHtjHÿ~HtWH@@HWHt!WH@@HCH;uHCH1HH[]DHt3UHAVSHL7Mt*AFu I>AFu I LLsMt)AFu I>AFu ILH[A^]UHH]UHHG]fDUHHtHHHtHGH]UHAWAVATSHHH1u}IIL'Mt-AD$uI<$AD$u I$LMfMt-AD$uI<$AD$u I$LM>I^[A\A^A_]f.UHAWAVAUATSHdH%(HEH0HH$LqMffEfEfEfEHEf`fPf@f0HDžpffffHDž ffffHDžfpf`fPf@HDžf fffHDž0HHcHH Ls(E1E1vH Q?1ۿ1eAIIE1fCofCoTfIM9uffpffI~I9tff.O IL9uMpIIHHcH}H,*HLcH}LLAIWHcBHH s11\HHf1fff.ofoTfHH9uffpffH~H9tffff.H u H Æ1ۿ1A [[A^]@UHHH]ffffff.UHAWAVATSHPHIdH%(HEHcH~.IfAJ|u"DGDILcND1M@ 1MI @MDMIE1IAMBAMIE1IAMBMIE1IAMBMIE1IAMBE D B4B4D I1AMgII9LCH L9~IrsIt$Hr+J "H9pD?HDHH9RLHfffff.Ht HHH7HHHwHuHLH΃HsE1E1THE1E1fffff.F IAM FD M FT IAM FD M IL9uHt#L1f.ID M HH9uLHHHHt 1HOIAM9v!HcAD)HHbAffEfEfEfEHEHcM1LME1HH) ff.J4K4K4MRHuHHHHуrwHHfHnfpDfHnfpD1f.oo\fffodfffoTff$THH9uH9t&HHcHP tI~HcHOLnE1?UH]FfDUHAWAVAUATSHHHHPdH%(HEHB(Ht(Hx(t!H ,)E11gA8tL:MLLbM?IWEEEExhXH))))Hɼg jHH;ʄgHH+rn+hH HkAكH(Hy!~[H0E8E@IcHHHLNHHPHH8uLLHW)))p)`HDžIIcH`MWIA!H`fDž\LH\LHLPLHIH\AW)))p)`HDžIIcHLH]E1!H &E11CA>dH%(H;Eu0LHĘ[A\A]A^A_]H n&1jAl蓤UHH]UHHɼg jHH;ʄgHGH+rn+hHG(HkAكHG0Hy!~[HG8H@HWG@]fffff.HeUHAWAVAUATSPIHHHH=HG@HOHLPHt.AI)IM)s.ILLIDIIs{tLLLǃMt1@uHL&IsgHL8IrNMIt1@uHLLt&HLLd8MAM)MtLLL'DH[A\A]A^A_]fff.UHH]ffff.UHIHHHcH]Ef.UHH= H5yH]UHAVSH+HHoH!uH=FH ijH IL5uH=H ;jH |L5}{vH=uH='H jH L5uH=eH iH HL5HjpBHH=pH5HeHn(p( r(((((3(5HHoHH[A^]fUHH=H5)H]UHAVSHTvqHH*H=HuH=H hH IL5uH=jsH hhH QL5RPKH\uH=VH 9hH L5uH=H hH ~HL5xvqHOH|H=H5HHCHD(M>(O@(j(l(g(g((H\mHH[A^]fDUHH=H5H]UHAWAVSPH+HHdH"uH=H fH IL=auH=BUKH fH )L=*(#IL5.uH=H fH L=uH=irH fH PHaL=JHCHE܏`nHNH=H5CHHH(gf(if(kf(mf.(of0(qf2(sfL(ufN(wfP(fR({f4(]fHjL5kH[A^A_]fff.UHH=UH5IHJ]UHAVSH+H0HZ^vH##!buH=CLH eH *I L5!uH=H eH L5HuH=fyoH eH ML5NLG)uH= 5H fH HL5HǕy\/HH=@H5H5HH(e(e(e(eHt(e(e(f(fH(f(lf(Nf(0fH(e(e(e|(e^HMHgHH[A^]ffff.UHAWAVAUATSHHIIIO~%IfH|uQIHcHT1H 1HH HDHH1HHBII1H@LBLHE1IAIBIIE1HLBAE эBD I=r&H AE11nAHtL}1HEȀ{(20XHHL}W@@H]Ȁ{(uwHKL{ H;KuRMHIHHA LEI9LH=u5H{J4vHtHCLcHKMHCLtIH ;:E11BAGdH%(H;E~DHĨ[A\A]A^A_]DBE~#HEJ|uEHAEHMcANL1MD E1MI AMDAMIE1IAMBALHE1IAIBIIE1HALBLHE1IIBAEA E CBD H=r!H 9E11pAyt!H 8E11A%AHtH L I|hArQLPMIMCfInfE1BoTfBoTfIM9uffpffI~M9t N IL9uMWIIIHE1HHLHpI$@HH80ffEfEfEfEfEfpf`fPf@LHLI$LmH@LI$E1L1,H8L51ɅA$(@(P(`(pAD$AL$ AT$0A\$@HEID$PIE@I$AoEAoMAoU A]0A$AT$xAL$hAD$XADŽ$A!H 6E11pA5Hko葋UHHHLHNHHH]ADUH,truHH=UH5VHJ]ÁtbuH=H5ZH]H 51{AV1]H=H5LǴ1[A^]UHSPHH6ϱt"H $1ۿ1jA eWC8C(CCHCHCPC`CpHǃHǃH[]ff.UHWF0F FHF@FHFXFhFxHdžHdž]fffff.UHAVSHIH6諰t%H #1ۿ1jAIcHH~ s 11k|Hf1fo3fo3fH H9uffpffH~H9tfDH ˘HH9u1HÉ[A^]ÐUHHcHH~ s 11u]ÉHf1fffff.o>fo>fH H9uffpffH~H9tfDH ΐHH9u1H]fDUHAVSHIH6t%H J"1jA"Y1[A^]HLH[A^]ffffff.UHAWAVAUATSHIIdH%(HEHLpXHX`ff`fPf@f0HDžpH0LfffffHDž ffffHDžfpf`fPf@HDžL8I$LLLH(LLLLH0LH@LLLAA7IcHHffEfEfEfEHEI@1LLE1HH) f.K4K4K4MRHuHLME1HM)fDONOMRHuMAL)HHуHHfHnfpDfHnfpD1DooodolffffffHH9uH9t0f.HH!LDՀI!I LHH9uIcHHL0L8RffEfEfEfEHEI@1LLE1HH) f.K4K4K4MRHuHLME1HM)fDONOMRHuMAL)HHуHHfHnfpDfHnfpD1DooodolffffffHH9uH9t0f.HH!LDՀI!I LHH9uIcHHffEfEfEfEHEI@1L0LE1HH)K4K4K4MRHuHLME1HM)fDONOMRHuMHHHуHHfHnfpDfHnfpD1foTo\o0o@ffffff0@HH9uH9fDH|ՀH!L0I!I L0HH9uIhHHLIcHHL0L8cffEfEfEfEHEI@1L0LE1HH) ff.K4K4K4MRHuHLME1HM)fDONOMRHuMAL)HHуHHfHnfpDfHnfpD1Do0o@odolffffff0@HH9uH9t@f.H0H!LDՀI!I L0HH9uL0L8H0LHLIHH@LIcHHTffEfEfEfEHEI@1L0LE1HH) fff.K4K4K4MRHuHLME1HM)fDONOMRHuMAL)HHуHHfHnfpDfHnfpD1Do0o@odolffffff0@HH9uH9t0f.H0H!LDՀI!I L0HH9uIVHHLAIcHHmffEfEfEfEHEI@1LL0E1HH)K4K4K4MRHuHLME1HM)fDONOMRHuMHHHуHHfHnfpDfHnfpD1foTo\ooffffffHH9uH9t0f.H|ՀH!LI!I LHH9uIcHH~s11H(|11Hf1fH(ff.o5fo5fH H9uffpffH~H9tfDH HH9us11Uf1fAofAofHH9uffpffH~H9t@H 4HH9u1H1HdH%(H;Uu!ȃHĸ[A\A]A^A_]kfUHAWAVSPHIIH65uH3L&t.H U1jA,dH[A^A_]IHLLHfUHAWAVAUATSHdH%(HEHH8t6HIIIH6蒢tgH E11jA<H E11BA8dH%(H;EkDH[A\A]A^A_]W)`)P)@)0HDžp))))HDž ILHMH0IDHHHDLE1QMt]W)E)E)E)EfEHDžILmHH0LLPhHLLgHHt]W)E)E)E)EfEHDžILeHHLLPhHLHHA ifff.UHAVSH`HHdH%(HEW)E)E)E)EfEHEHLuHULPhHuLHH1HdH%(H;Uu H`[A^]whUH]VfDUHHB@HF@JR Z0^0V NBHJXRhZxFHNXVh^xHHHH]UHH]HH@HtUHH 1BA_1]fff.UHAWAVAUATSHIHIdH%(HEHLhXLx`ff`fPf@f0HDžpffffHDž H0HHAHLLAIcHHSIhffEfEfEfEHEM@1LE1HH) ff.K4J4K4MRHuHH}E1HM)fDOONMRHuMAL)HHуHHfHnfpDfHnfpD1DooodolffffffHH9uH9t0f.HH!LDՀI!I LHH9uHLHLAIcHHZIffEfEfEfEHEM@1LE1HH)DK4J4K4MRHuHH}E1HM)fDOONMRHuMAL)HHуHHfHnfpDfHnfpD1DooodolffffffHH9uH9t0f.HH!LDՀI!I LHH9uIcHH1H0HH E11xAt迼AIFPHC@AFAN AV0A^@[0S KAoFXAoNhAoVxAoCHKXSh[xIHnID$@HC@A$AL$AT$ A\$0[0S KHooHoP oX0CHKXSh[xH@@HAdH%(H;EuDH[A\A]A^A_]bUHHPdH%(HEHcHH)IffEfEfEfEHEL@1E1HM)@N O N M[HuMAHUE1HM)DNONM@HuMAM)LHуrvHHfInfpDfHnfpD1foo\odհolffffff$lHH9uH9t&HL!LDI!I LHH9udH%(H;EuHP](aUHAWAVATSH0IIHIdH%(HEH6識t%H 1ۿ1jAMMu%H 1ۿ1CA费W)E)E)E)EHE)p)`)P)@HE)0) )))))))HuLLBH@LL(HHUH@LVHHCH(((([8S(KC(CPK`SpH8HAAAAIHHLHj1dH%(H;EuH0[A\A^A_]^fUHAWAVAUATSHhdH%(HEffEfEfEfEfEHcHH~?H@1Afff.EJ|u.EHAEzH@AMcANL1MD E1MI AMDAMIE1IAMBALHE1IAIBIIE1HALBMIE1IMBAEA E CBD IzuA1ۃzu7Pu.L:LcrH@HxLILHHxxDH 91ۿ1eA%-dH%(H;EsHh[A\A]A^A_]LILH)Hs 1IH s11yHHJ;HfE1fBoTfBofI L9uffpffpUffofrffofqff~H9taIIN fnIff.~ fHI9ufpUffofrffofqff~L9uHIf.C IM9uI9HIBHHr-H +HHH)HH€ML9I91IHIt/HHLMIIME1ېG$E$IHM9uHH)HwCHHHHHEDEDDAEDDAEDDAHHH9uI9s HH)H}B1oHxLHHUHPpH s1IWLEIIIIIE1foJofof`fpNppfhfpNppfgC fof`fpNppfhfpNppfgCTH IM9uH9 {HʉMLMIIHIIIfA~f`fpNppfgfA IHI9uH9YfUH僿HGPHF@GO W0_@^0V NGXOhWxFHNXVh^xHHHH]WF0F FHF@FHFXFhFxHdžHdž]ÐUH]FfDUHAWAVATSHIIIH6uI6LuH3Lt+H 1ۿ1jA[A\A^A_]I$IIHLLLHPfffff.UHAWAVSPHIIH6euH3LVt-H 1ۿ1jA蒰H[A^A_]IIHLLHPUHAVSIHH6t)H E11jA&D[A^]IFPHcHH~ s 11s1ʁHf1fff.AoT>PfAoT>`fH H9uffpffH~H9tf.H 4HH9u1HHAHH@1M)LLLHHuMH1fH!HHcHH9|HFHHcHH~ s11u1҅ʁAAAIfE1fɐBoTHfBoTXfI M9uffpffH~H9tf.H 4HH9u1HH҅t[UHH@E1M)DN N N M@HuMɃH]~!1fff.H!HHcHH9|fff.UHAWAVAUATSHXIIdH%(HEMHL M@@0@@uH E11CA?LMHHH6LtIH IE11jAUdH%(H;EDHX[A\A]A^A_]MtI7L轋uE1HtH1#0萌HtHW@@HHHW))))HDžHLH{HL`IHLLP(LL6HAM7W)E)E)E)EHE)`)P)@)0) ))))))))HDžpHuLLIILHMLLLP LLWtQHIHHHLHPHuJH 1DAcH 1DANE1HH;H5~HBH{iHaHHH@PHuaQUHAWAVAUATSHHIIIHcL$t)IIcLHWLuH 1HEп1A /ڟ{(unHKLs H;KuLHIHHA LEI9LH=u2H{J4HtHCLcHKHCL4HCfC(HHIM1HE1LLLIRA~t)Au HrHLLLЅteIcH}L׿t*IIcH}H HMЅu$H Q1HEп1A {(uHCHKHQHSHDHC HMЉH[A\A]A^A_]ÐUHAVSHt=HHIP(HL]u=H W1ۿ1DAcH :1ۿ1CAXG[A^]UH]6fDHH@@HtUHH 1BA:1]fff.UHAVSHH@0HtCHIuuHHLu=H 1ۿ1DAyH m1ۿ1BApz[A^]HH@HHtUHH 11BA@1]fff.UHAVSHH@PHtCHIuuHHLu=H 1ۿ1DAH 1ۿ1BA躦[A^]LcHMUHSHHAr!II)I AIM)I AEtvE1MAtNI!KH!L JMIIYI9fNI!KH!L JNTI!K\H!L J\IM9uyMIfHnfpDfHnfpDE1f.BoBo\Co$ColffffffB$BlIM9uM9LcHMeAr!II)I AIM)I AEtsE1MAtN\HI!K\HH!L J\HMIIYI9NTHI!K\HH!L J\HNTPI!K\PH!L J\PIM9uyMIfHnfpDfHnfpDE1BoTHBo\XCodHColXffffffBdHBlXIM9uM9HcHHBr%II)I AIM)I AEE1M@t(NɐI!OȐI!M NΐMIL_M9NѐI!OАI!M N֐NјI!OИI!M N֘IL9uIIfHnfpDfHnfpDE1fBoѐBoѠCoАCoРffffffB֐B֠IM9uI9[]UHHcHHHH׃r!II)I AIM)I AEtxE1MʨtNI!OI!M NMILXM9ffff.N I!OI!M NNLI!O\I!M N\IL9uyIIfHnfpDfHnfpDE1f.BoBo\Co$ColffffffB$BlIM9uI9]ffff.LcHMTHHAr%II)I AIM)I AE-E1UHSMAtNI!KH!L JMIIYI9t?fNI!KH!L JNTI!K\H!L J\IM9u[]HcHHr%II)I AIM)I AEE1M@tNTHI!O\HI!M N\HMIL_M9:fDNLHI!O\HI!M N\HNLPI!O\PI!M N\PIL9uMIfHnfpDfHnfpDE1@BoBo\Co$ColffffffB$BlIM9uM9^IIfHnfpDfHnfpDE1fff.BoTHBo\XCodHColXffffffBdHBlXIM9uI9ff.UHAWAVAUATSHUH}HH)HL)H H ˆULIMPL^IXLqHAXM`XLpIxPHxLyP1HMHusfffff.HUHIIIÐHƐHÐIƐHEHLeIĐHǐIǐHHUHLeHEHELcHMtL}H}HMHH HEL<HxHHEHpHMLELHA Ut1q@LHfInfpDfHnfpD1AoTAoAodAo,ffffffAdA,HH9uL9tyHAt$HEL$M!HEHNUHHH@h]fUH]f.UH]DUH4]DUH帖]DuUHqNUH1]@UHAWAVSPI8YwHIHMLHC(HtHHHCI0L~OHC(Ht8L`wH{WwH[A^]Kw[A^]fDUHAWAVSPHtRIE11DHHI6HtrH;HtL#ttaH 1Aa~H }E11CA>fuHI脮LH3IvHt Ht(IvHt HtAFCAFCI HE1LH[A^A_]UHAVSHHt.HIstXH 1ۿ1A腔;Ht$uHHHIHLH71H[A^]ÐUHAWAVSPHH?HIHt)I6rtH 11ۿ1A yL{MtI?5L]uMt/H;Ht"IHLmuI?L)uE1L{1M H 1ۿ1rAxH[A^A_]ff.UHAWAVAUATSPL/MII`FsHHIIL8ACC CM.IcLL藩tcIIcLHʾtCIHcH~_sd11H E11rA蕒H 1A rE1Hf1ff.oT3foT3(fH H9uffpffH~H9tfff.I HH9u1Ht;H E11qA跑H/sDH[A\A]A^A_]I~sI^AUHHG]fDUHH ê]fff.HG(Ht UH@0]1UHHcH~ s 11u]ÉHf1fffff.o>foT>fH H9uffpffH~H9tffff.H HH9u1H]f.UHG]UHw]UHG]UHw]UHAWAVATSHdH%(HEHHL7ML{MI7LnH 1jAˏL3L{I7LYnH 1jA"蓏H E11xA1H E11CA&TdH%(H;E DH[A\A^A_]IcHH11\ILL;bHSAHtffEfEfEfEfEfpf`fPf@f0f ffHEL;HILLLP(LL,H;HSHHݪH SE11qADHf1fAo7fAo7fH H9uffpffH~H9t@I ϘHH9uHH E11wA+H IE11DAcUH s1A=4fUHAWAVSHIdH%(HEHG(Ht@0ubLtsI~W)EH}оLHtJH}оHLItAL)sEuJ(H 1ۿ1AR&1'1rH 1ۿ1AdZdH%(H;Eu H[A^A_]3fUHAWAVAUATSPHHHHL/MII׿xkHIAu I&M.WAFAFAF(AF8IFHAFPAF`AFpAIdžAAAAIdžH;LLLE1tHLt HAI>mLl!H 1E11CAzDH[A\A]A^A_]E1ff.UHAWAVAUATSPL?MIIIHMп/jHHAu IݣL;WCCC(C8HCHCPC`CpHǃHǃI>HLLLEet8LHA+H E11rAЉE1E1H;H6kDH[A\A]A^A_]@UHAWAVAUATSHMIIHIdH%(HEH6htIH >E11jACdH%(H;E|DHĸ[A\A]A^A_]ME$A+W)E)E)E)E)E)p)`)P)@H@LLLqHsHEHF@(@(P(`(p^0V NHEHEMU]SpK`CPIHAAAAAH >E11dAAHL8~?I@E1fff.H|u,QLE1DI@HcHT1H 1HH HDHH1HHBII1H@LBLHE1IAIBIIE1HLBAE эFE11mA50fH'W@@H8H(H8(HOHW H;OuhHIHHA LEI9LH=uNH8HxJ4H hH Ht#H8HGLoHOHGHHG H8fG(E1HtcILLIHKHL8toI<$Ict$I@IcHSx,H >E11mA>,L8#4LHLMrE1AA(uIGIOHQIWHDIG H(HH;H5VHH{AfH9fq+ffffff.HtIHщHwHt H?H.UHH 1rA\1]UHAWAVAUATSPHAIIHH6bt!H ~>E11jAIc$HHs11fHf1fDAo7fAo7fH H9uffpffH~H9t@I ϘHH9uHtDLDiHtgIHbHtWLLDIHM4nHt@IL;!H }>E11wALH[A\A]A^A_]E1LgdE1fUHAWAVSPHHL1AK|DIDH93I1HH1:H8ILHAFu I>cAFIH W 1ۿ1rALHcMD1M 1MI MDMIE1IAMBAMIE1IAMBMI1IMBMIE1IMBAE D BɍD IH9H w1ۿ1A9H[A^A_]Lb1H9DUHAWAVATSHHdH %(HMH?HtFHHHt=DE~tH1ff.EJ|ubEHAEH 1ۿ1rA胀dH%(H;EHH[A\A^A_]HMcANL1MD E1MI AMDAMIE1IAMBALHE1IAIBIIE1HALBMIE1IMBAEA E CBD IHH9v H 1ۿ1dAHHU%1Hu/&ffffff.UHAWAVAUATSPHHcH~/HE1AJ|u"DGDHLcND1M 1MI @MDMIE1IAMBAMIE1IAMBMIE1IAMBMIE1IMBAE D BF4AE IAAAE1HL9LHBHJ6Hr$MI)IL 9M9AH9AEE1IMIt/MIM6IN<E1@G,'G, IIM9uII)IwCMILF DFD DBFD DBFD DBIHL9uL9sLH)H1%L3H[A\A]A^A_]H sE1AALIM6IIIIE1fAoIAofof`fpNppfhfpNppfgC :fof`fpNppfhfpNppfgCT:I IM9{L9@MAAN M6IIIIIffffff.A~ f`fpNppfgfC IIM9uL9UHAWAVAUATSPHIHE111Ht9IHI[Ht&ILHLHtIL# L]E1LH[A\A]A^A_]fUHAWAVAUATSPHt6L7Mt.A~HIH|uAQH E1ۿ1CA{IHcHT1H 1HH HDII1HLBMI1I@MBMIE1IAMBMIE1IMBAE эBD I=%H k1ۿ1pA-z9H}п`*ZHHtHCHACC CHAHEH8IHDtMHEHIIHHcLLHEL MII$LLLP(LL7tQLyZHqZ1ۉH[A\A]A^A_]fffff.UHAWAVSPHtYHH?tPH@tHtAuSL{LsMtI>ՎLYH{YWAE1!H E11CA GxDH[A^A_]fUHLHHH=H].fDUHH0]/fUHH0]20fUH]f.UHHcHL(HHHH]fff.UHHcHL(HH]fUHAWAVATSHPHIIdH%(HEW)E)E)E)EHEL(HcHLeLHH`LLHL/dH%(H;Eu HP[A\A^A_]fffff.UHAWAVAUATSPHHcHH~/H@E1AJ|u"DGDH@LcND1M 1MI @MDMIE1IAMBAMIE1IAMBMIE1IAMBMIE1IMBAE D BF4AE IAAAE1HL9LHBHJ6Hr$MI)IL 9M9AH9AEE1IMIt/MIM6IN<E1@G,'G, IIM9uII)IwCMILF DFD DBFD DBFD DBIHL9uL9sLH)H1L3H[A\A]A^A_]H sE1AALIM6IIIIE1fAoIAofof`fpNppfhfpNppfgC :fof`fpNppfhfpNppfgCT:I IM9{L9@MAAN M6IIIIIffffff.A~ f`fpNppfgfC IIM9uL9UHAVSHI>t'M(IcHI(HH1[A^]fff.UHAWAVATSHcHH~6L@E1Afff.EK|u$EZAEL@AMcAO\E1MAE E1LH AIDAIIE1HALBALHE1IAIBIIE1HALBLHE1IIBAEE E GG AE HAAAE1ADI9GLI9bHrLIIxsII/L I9v!IIIAAIM9sIIIMINNfHnHHL)fE1fvfo`o(fof`fpNppfhfpNppfgfof`fpNppfhfpNppfgB,BdffIHM9uffpffH~M9t*fffff.LL HIMIHHwHAAHsE1E1PHE1E1fff.BIL FTI B\IL FTI IL9uMt%L1fff.ID M HI9uMIHHHtHLI1HHLH@HcHHHכAKx%H Ep21ۿ1o[A\A^A_]û&fff.UHAWAVSPHIL(HcHHMIcHI(HHMIcHI(HHMH[A^A_]eDUHHcHL(HHH]ffffff.UHAWAVAUATSHIIdH%(HEH9Iff`fPf@f0HDžpffffHDž ffffHDžMIcHHLHL~>sWE11dH%(H;ELLH[A\A]A^A_]M1H(1H AA%H1ffDAo;fAo;fH H9uffpffH~I9t@K DIL9uMs11aρf1fffffff.AofAofHH9uffpffH~H9t@I 4HH9u1HظHH(HHH ffpf`fPf@HDžI(H@LLIff fffHDž0IcHLLLLIJfffffHDžIcHHHLHIfffpf`fPHDžIcHHLTfEfEfEfEHELI@1LPE1HH)KtK4K4MIHuHLME1HM)DONOMRHuMAL)HHуLrHHfHnfpDfHnfpD1foPo`odolffffffP`HH9uH9t0f.HPH!LDՀI!I LPHH9uLAH 1MLHcHPHHL2IcHHffEfEfEfEHEI@1LPL@E1HH)K4K4K4MRHuHLME1HM)fDONOMRHuMHHHуHHfHnfpDfHnfpD1foTo\oPo`ffffffP`HH9uH9t0f.H|ՀH!LPI!I LPHH9uIcHHYffEfEfEfEHEI@1LPLE1HH)K4K4K4MRHuHLME1HM)fDONOMRHuMHHHуHHfHnfpDfHnfpD1foTo\oPo`ffffffP`HH9uH9t0f.H|ՀH!LPI!I LPHH9uAH1ff0f ffHDž@HcLHLLHIHHPHIcHLLHIfffffHDžIcHHH@LIffffpf`HDžIcHHbfEfEfEfEHEI@1L`LLE1HH)K4K4K4M[HuHLME1HM)fDONOMRHuMHHHуHHfHnfpDfHnfpD1foTo\o`opffffff`pHH9uH9t0f.H|ՀH!L`I!I L`HH9uIcHH~ s11s1E1Hf1fo5`fo5pfH H9uffpffH~H9tfDH `HH9uE1HMHcHH`HPHIPff@f0f fHDžPIcHLH@LHIfffffHDžIt$HIcHHHLIfffffpHDžIcHHfEfEfEfEHEI@1LpLLE1HH)fK4K4K4M[HuHLME1HM)fDONOMRHuMHHHуHHfHnfpDfHnfpD1foTo\opoffffffpHH9uH9t0f.H|ՀH!LpI!I LpHH9uIcHHxffEfEfEfEHEI@1LpE1HH)ffffff.K4K4K4MIHuHLME1HM)fDONOMRHuMAL)HHуHHfHnfpDfHnfpD1DopoodolffffffpHH9uH9t0f.HpH!LDՀI!I LpHH9uIcHHH L(~%s'1111H L(1|%Hf1f@o5pfo5fH H9uffpffH~H9tfDH pHH9u1HHL HLH!H!Ht;LHHndH%(H;EKH[A\A]A^A_]ffPf@f0f HDž`QHcfEfEfEfEHEM@1L L`E1HH)KH@t+HŐL!HHfoT>fH H9uffpffH~H9tffff.H HH9u1HH]f.UHHPdH%(HEHcHH)IffEfEfEfEHEL@1E1HM)@N O N M[HuMAHUE1HM)DNONM@HuMILHуrzHHfInfpDfHnfpD1ffff.oTհo\o$olffffff$lHH9uH9t&HTL!LI!I LHH9udH%(H;EuHP]UHHՖHHHHHHkHHHHO(HHa(HzHEH|HE6HvH8HpHYHjH{HdHH^HoHXHaHRHëHLHHFHH@HYH:]UHHH@p]fHcHH~ s1E1x1%AAAIfE1fDBo fBoT fI M9uffpffI~H9tff.L HH9u1IHt[UHL@E1M)DONNMIHuM҃H]~!1fff.H!HHcHH9|fff.UHtu;H~SH@1ffffff.H|u>yH >1oAO91]H@HcHT1H 1HH HDII1HLBMI1I@MBMIE1IAMBMIE1IMBAE эBD I11ɃHDHH]fff.UHAWAVAUATSHxMHAIIdH%(HEЉHpL9v%H >1ۿ1dAaHDžxHhLsIEHxLLLPhIHAu#LxIEHxLLLPhAFW)E)E)E)EfEIELuHxLLLPhHxBD0$AHpHhD8dH%(H;EuHHx[A\A]A^A_]@UHAWAVAUATSHdH%(HEDHE~0H@1f.EJ|uEHAEH@McANL1MD E1MI AMDAMIE1IAMBALHE1IAIBIIE1HALBMIE1IMBAEA E CBD I1H]H9:IW)E)E)E)EHE)`)P)@)0HDžpHHHuIIHPpE1tZLHLI$H0HPptE11mAz)dH%(H;EuDHĨ[A\A]A^A_]UHAWAVAUATSHMỈIIdH%(HEH6itIH Y>E11jAdH%(H;E[LH[A\A]A^A_]Mffpf`fPf@f0f fffIIH8HLLtNL\L9vGH >E11dAa-IcHHso11E1 IHDžMl$IHHLLPhLIHLLH8PhHf1fɐAo7fAo7fH H9uffpffH~H9t@I ϘHH9uHt.dH%(H;ELH[A\A]A^A_] H A >E11wAffEfEfEfEfEILmHLLH8PhHBD($A$UHAWAVAUATSHMƉIIIdH%(HEH6tH >E11jAKAu(APuI?IcwI@IcH=xIH < >E11kA dH%(H;EDHĨ[A\A]A^A_] LE1HtH1H8+0Htf@@HH8Iƅwx(INIV I;Nu`LHIHHA LEI9LH=uCH{J4HhHhHtHCLsIHKIFHIF IfC(LDHXLDHPLDHLDH@LhLDE1HHHHtaIhffEfEfEfEfEHDžxIEHuHxLPhHxH}H,H|H0H@tgIffEfEfEfEfEHDžxIEH]HxLHPhHxHH@ ,HI@HPLHHh#jHXHPLH`HLhTgAttHPHLLH`Lh^xHHLH`Lh]THXHHH`HLh cRHPHH0LH`LhfHXHHH`HLhl]HXHH@HLhD]LHLHXHHheHt1wIcFH~AI9%=nuWCH '>E11kAH.19uIIs'11eH >E11AJHHf1fofoTfHH9uffpffH~H9t H 4HH9uHHHtdH`HLHAF619uLLLLCRA?H >E11DAYH >E11lAQ? Hh~(uHFHNHQHVHDHF H8HH;H5:HCH{jHbVE1賲UHH=EH5H:]UHHH&HH(H)H"HۛHHMHHHHqH*HH$HHHHHyHHۛH HHHHHH]HcHt!UHHHHH1]øffff.UHAWAVSPHILHcHM藖IcIHHMH[A^A_]qUHLHcHHHH]HUHHPdH%(HEffEfEfEfEHEHcH IL1E1HM)@N O N M[HuMAHUE1HM)DNONM@HuMAM)LHуrvHHfInfpDfHnfpD1foo\fffodհfffoTff$THH9uH9t&HL!LDI!I LHH9uH}HdH%(H;EuHP]zf.UHHPdH%(HEffEfEfEfEHEHcH IL1E1HM)@N O N M[HuMAHUE1HM)DNONM@HuMILHуrzHHfInfpDfHnfpD1ffff.oo\fodհffolfffff\HH9uH9t&HTL!LI!I LHH9uH}H$dH%(H;EuHP]f.UHHdH%(HEffEfEfpf`HEfEfEfEfEHEHcHL1L`E1HM) ff.ONNM[HuMAHUE1HM)DNONMRHuMILHуrzHHfInfpDfHnfpD1ffff.oo\fodհffolfffff\HH9uH9t&HTL!L I!I L HH9uH}HtdH%(H;Eu HĠ]GUHHcHHH׃r!II)I AIM)I AEtxE1MʨtNI!OI!M NMILXM9ffff.N I!OI!M NNLI!O\I!M N\IL9uyIIfHnfpDfHnfpDE1f.BoBo\Co$ColffffffB$BlIM9uI9]ffff.UHHcLHH]2fUHHcHHHH]ffffff.HcHs1E1tAAAIfE1fffffff.Bo fBoT fI M9uffpffI~H9tff.L HH9uMt?UHAVSHHIHcHLHLI*[A^]1ffff.UHHH]ffffff.UHAWAVAUATSHIHIdH%(HEHLhXH@`ff`fPf@f0HDžpffffHDž HLLHH0LHHLLAIcHHfffEfEfEfEHEI@1L0E1HH) ff.K4K4K4MIHuHLME1HM)fDONOMRHuMHHHуHHHfHnfpDfHnfpD1foTo\o0o@ffffff0@HH9uH9t0f.H|ՀH!L0I!I L0HH9uIcHH~+s511DžHHDžHf1fo50fo5@fH H9uffpffH~H9tfDH 0HH9u1HHHHL0HLLLALLLHAIcHHmI~HffEfEfEfEHEM@1L0E1HH) ff.K4J4K4MRHuHH}E1HM)fDOONMRHuMHHHуHHfHnfpDfHnfpD1foTo\o0o@ffffff0@HH9uH9t0f.H|ՀH!L0I!I L0HH9uIcHH~s11}1ҋHf1fffffff.o50fo5@fH H9uffpffH~H9tfDH 0HH9us11Uf1fAofAofHH9uffpffH~H9t@H 4HH9u1HE1HdH %(H;Mu!H[A\A]A^A_]̣fff.UHAVSHPdH %(HMHcHHHs1E1i΁AAAIfE1f@BofBofI M9uffpffI~H9t@L HH9uMt]HffEfEfEfEHEHuIHI`t'IcHt5HH}HI11dH %(H;MuHP[A^]ø舢UHAWAVAUATSHHIHIdH%(HELH1ID$@HA$AL$AT$ A\$0AD$HAL$XAT$hA\$x))) )I$H0Ao$Ao$Ao$Ao$hXHI$Hx8ALffEfEfEfEHEHcHH~s#11Hx{1Hx%HE1fBoTHfBoTXfI L9uffpffH~H9HxtI HH9u1HHrH@E1LUM)NOOMIHuMHcHHs1oʁfHnfpDHE1ff.BoL BoT ffBL BT I L9uH9tfffff.H!DՀHH9uHHHЃs1vHHfHnfpDfHnfpD1foTo\odHolXffffffdHlXHH9uH9t$fDHtՀH!I]@HUHAWAVAUATSPHIHHGOLXHtrH@H H@ILgIH@tLLLHVLL<LLL)AFXWAG0AG AGAH@r"HHLIy<LHHHƃ?Ht#A^XLHUILHUA^XH[A\A]A^A_]ffff.UHAWAVSHIHL~F)EЋFXHxDH8r+?t?H)L1gXLL;1H8t8H)L1;X(EAFPLL;AFXWAG0AG AGAAAFCAFCAF C H[A^A_]f.UH]1;HUHAWAVAUATSH8MHE1HUHuH}ȐMIB?DM׀ɉMAAA)ELLHUH}HuAE2&}AEfoUfofsfnfsffqf afoffAB>UЀM B>HUIL9CH8[A\A]A^A_]@UHAWAVAUATSH(dH%(HEHLIIIE1HUfDHHECEC fECECEHHLUC& 0}EC 'HUH HRHSHUH CIL9udH%(H;EuH([A\A]A^A_]ETDUHAWAVAUATSPMIIIHEHu*AA2IAILjÃtHLuIHEIWfDL}LLLLMMMUMMML}IAD$AD$AL$AL$AD$ AD$ AL$ AL$ AD$ AD$ AL$ AL$ AD$ AD$ AL$AL$AD$AD$AL$AL$AD$AD$AL$AL$AD$AD$AL$AL$AD$AD$A$HI3IHCI3GIFHEIIII1M\LLLLMAD$AD$AL$AL$AD$ AD$ AL$ AL$ AD$ AD$ AL$ AL$ AD$ AD$ AL$AL$AD$AD$AL$AL$AD$AD$AL$AL$AD$AD$AL$AL$AD$AD$A$AuLHuIu DIEHuA2AIt&A2AKA2AHuHEH[A\A]A^A_]DUHAWAVAUATSWHHMHFHHIHEЃHHH!HL?IIL1Hw@HWHAAII?I!IIM1LG L_(DHH!M?MII1LWLOMM1Lo0LL1HO8LH1H_PMI1LwXMI1L`MI1LghLH1HEHGpHH1HEHGxL3ULLUM1LLMM1LM1LM1LL1HL1HL1HL1HM1LM1LM1LHEL1HHOHEL1HHG1fHHHHHtHu1Nffff.III1@DT0EYD\0EQD0ED0EHI I9utIIIL E0IICA0G^[A\A]A^A_]UHAWAVAUATSH(IIHuIdH%(HEM(ILmIXHH]ȿuBurLHLI_I?HL!H1IHIHLlHuL@uF*HN,IHd,HMHA$0L@IMl$t IMuЅH{@AMM"h<u*HC@H$LLLHMILMIII)HEDc AI riHC@HEHuf H}LUȺLLHMIUHuADȉC HEI I II wHuLHHMLHHUH{@LLLHUHMIUDeDȉC HMILAMtdHMHEHsHHuHU`AADc HJ)H 1H9s+LL]LELLMu1^J+H@HMJ<)M>LHML9AL]I9AH9LeI9@I9M9M@E@ @ HL!DHuI7HLH1fD: :T;\; d;@l;PWWd;@l;PWWT>>H H9uI9tzuHHu8jLtHQA<0@0|3@@2|3A<3HtAffff.A4@0t @@2t A4 HA4@0t @@2t A4 HủpH8[A\A]A^A_]fUHAWAVAUATSPHUIILoPHPpu Att I@LMg@fAoG0fsffofhfpNppf`fpNppfgAoO@fAO@LLAG AO@WAO@MHUH@1@uLLu1H[A\A]A^A_]f.UHAWAVSPIHI11Mt&IIBI@HLH[A^A_]5H[A^A_]fffff.UHAWAVAUATSH8IHdH%(HEHEHEL?LfH}HuLLb^HEHELkM6H}HuLL8^HEHEM1M1H}HuLL^HEHMHUH1H1HuH}II?II>M1II9M1H1L1IIIIIM1IIII1M1II?L3UI>I9I1M1I1I1I1M1I1L L[dH%(H;EuH8[A\A]A^A_]5fff.UHSH(dH%(HE)EHEHMƒHHHH8H1HHHUHHEHHHSHMLEWdH%(H;EuH([]-5ffff.UHAWAVAUATSHHdH%(HEH/HILL16HHH;ffffff.ML)HHMLLHHALBMLLL=GHrDu 1O>H1@HrHHzHrHHzH2HHzHrHH:HH H9uA?HHHHHHHdH%(H;EuH[A\A]A^A_]3f.UHHHNHHHHF]UHAWAVSH8HIIdH%(HE kHH1tHm1%AAOAW )U)M)EHHs1vHs)1Hʉك8fDI4H1tHH9uH9uIXك0AWE)EHtAFWE)E t AF WE)EH9tuA0T HH9u(E(M(UWh)EW iWj)M)UHHHMA H1HHHH(EHǃdH %(H;Mu H8[A^A_]1ffffff.UHSH8dH%(HE1H0NV )U)M)EHHs1tHs'1HƉȃ8@L2L1D5HH9uH9uIXȃ0WE)EHtBWE)EЃ t B WE)EH9t uf.4@0tHH9u(E(M(UW,g)EW ah)MWvi)UHHMA H1'H(EHǃdH %(H;MuH8[]z0f.UH]kfDUHSH8HdH%(HEW)E)E)EH1H0NV )U)M)EHs1nHs%1HƉȃ8fH<2H1|5HH9uH9t:Cȃ0WE)EHtBWE)EЃ t B WE)EH9uHuuHu<@0|HH9u0HBtHǃ1dH %(H;MuH8[]?/ffffff.UHAWAVAUATSHHdH%(HE1H0dIIHW)E)E)ELuHHELȉHuLHȉLLLuHȉLLHMIs1}Is11HD8fff.IT H1T HH9uL9uHXD0AEWE)EHtAEWE)E t AE WE)EL9tAuAL0LHI9uHMA H1HU&HAAdH %(H;MuHH[A\A]A^A_]-ffffff.UHAWAVAUATSH8dH%(HE1HIHH9IMt*LHHULMIHULMxHMLEHMefff.IAALLLLMAADAIHUH)HH A LBD?IHUtHvL1H-d@HtE1AAK4,LLAII9rbHtDW)EAAIH]HILALHLD*LHuHUdt IdH%(H;EuH8[A\A]A^A_]Z+f.UH ]QfUHAVSH=zVH5aHoVHtt(HmVi1Ƀ=^V*HD=JVHDJH=MV`H9V tIHHHHIHVH=V`L[A^]ffff.UHAVS=U;H~zH1Hƺ"AE1;HtTIHH޺d?tLH޺P?tLH[A^]<AL5NUHSU[A^]fffff.UH-U=+U]ffff.UH2t]H=o>9ff.UHAVSHI1u<,8 uHt L1H+1[A^]H=QR>]9ffff.UHHt]c]ffffff.UHHu]fUHHsYt]8H=o=8ffff.UH"t]H=o=8ff.UHH=TH5]^UHAVSHdH%(HEELu>L19HuK*HËt҃ tD&uhL5kL19u*;tH=M:<7Hu0mSx׉`SdH%(H;EuH[A^]H=bP<7'f.UHAWAVAUATSPHAIIH=RH5]E1EAtH=RH5Q]E)IMtp=Ru:f.>LLD1k8Hu5A}t7fff.=rRLLo8Hu A}t II)H1ۉH[A\A]A^A_]fUHSP_HttHÿ_HHtPWH@@H^Ht"WH@@HCCHHHH1HH[]ff.Ht3UHAVSHL7Mt*AFu I>i`AFu I LO`LsMt)AFu I>6`AFu IL`H[A^]`UHG]UHAWAVAUATSHLIIIdH%(HEЋFF HUM.Icw LruI}IWIcO LAEAG AEI6I~LHI~HuHLIGIOI>HHUIMI>HLLIHtlAF1I>HHLI&thI~HHLI tOIdH%(H;EueLLLIH[A\A]A^A_]H 51DA|AFdH%(H;Eu1H[A\A]A^A_]#ffff.UHIHHVH]ifUHAWAVATSH w%H o1ۿ1~ANo|HFH9s"H o1ۿ1vASA|XIfHH)HHGHHII$HLHHAMtLLL)Lg![A\A^A_]fff.UHAVSIwH 1o1ۿ1tAeZ9u:yu4It_MH1ffffff.DTAu1HI9u1H o1ۿ1kAkG{[A^]Eu"I9u:H o1ۿ1A}H o1ۿ1fAwLHI wH `o1ۿ1gAILH)H9v H 4o1ۿ1qAZI9tHHIHH LH5fff.UHSPH9v"H o1ۿ1rAEz9s"H o1ۿ1tA!zHt HHH[]ÐUHAWAVAUATSHHHHPdH%(HEW)p)`HeIIA@HXE1fffff.DȉEL9`tIL@MAx,MXHHHhZHhL`MML@H`HMAPH`HHPHHPH`HHuPL;XrFH`HLP H`Hhp,ZHXIAI)MXW)E)E)E)EH`H`H]HP H`Hhp,TZLHL1HhYHxHt HpdH%(H;EuHĘ[A\A]A^A_]@UHAWAVAUATSHHdH%(HEHHDW)E)pDRAt3EAt-EA$H oE11AwEHOALHH~'H ff.AJ|u)DHD tMH IMAMIHHL1HHH HDHHHHBHHHHBHH1HHBHHE1HHBA1H@)F4AAA/AUDtH oE11zA`LL E1EAHL)LHID$H9rDDdExsHH9skH oE11qAv1WHx{WHEHtH}dH%(H;EaDH[A\A]A^A_]J.|8wHPH(HXH0LHH8HIULE1HdHPLHhHLH@ILHLHMHhLHLPtUIr8H0HXJ4.L8IIIL9J :I91@A4@04 HL9rEtD(@ 2IO1L fffff.<2Lv@uH9Lr@uhdHXH(HHL)L9tlH oE11A=H oE11A H oE11AsHhW)E)E)E)EHpHCM)HpH5!HpHPHpHLLPHhLHpHLPHpHuHP HpHxp,;ULA>H}H@&H oE11iA+I s 1VE1LHJ4/H1D> >:WD:W:D:H H9uI9AHLHI<0H1<2HH9uI9SUHHH@ Ht]HGH~*Hfffff.H|uQHHcHT1H 1HH HDHH1HHBII1H@LBLHE1IAIBIIE1HLBAE эBD I]1]ÐUHAWAVAUATSHhIdH%(MLDHEHwHcFHEH6sE1E1TIIfE1ffBofBoTfIM9uffpffI~I9tff.N IL9uMtFDyHMLxHU~PH|u>H1H oE11xAIvHHL1HHH HDHHHHBHHHHBHH1HHBHHE1HAHB1H@)F4AAe1҉At1DA$IH@IGH9s!H oE11rAVMAtAu IM)I EjELeIM)ID$L9s!H oE11rAkNuUHME1Mt)LNH0HVHEHLǻ1HEffEfEHEHt'x,`NHH1=PH]HEHEHMLHHMIH]HPHEH5HPHEHHuLPHEHHuLPHEHHuP HEH}p,0PH}OHEHtH}ffEfEH]HLHULLx\K>LMLH)€tMH}H HIr)IqII)IJ7I9L)HH9 1HLHHt!ffff.DD0BHHHuL)HwCI)H9H1fDt@0t t@0t t@0t 4@0t HI9uЀ}u*M ADA#H loE11Adl1VNdH%(H;EDHh[A\A]A^A_]E11'NffEfEH}I s1QLHHH1o7oL7oT2fo2fT22H H9uI9At.HLHHL7L1D1HH9uI9vHfffff.UHAWAVATSII1XHHMtvLcMt-AD$uI<$,MAD$u I$LML|HCHtMMt*L{Mt]AGu I?LAGu=I_EAFuI [A^]LAELsMt)AFu I>(EAFu ILELsMt)AFu I>DAFu ILDLs Mt)AFu I>DAFu ILDLs(Mt)AFu I>DAFu ILyDLs0Mt)AFu I>`DAFu ILGDLs8Mt)AFu I>.DAFu ILDLs@Mt)AFu I>CAFu ILCH H{X?H[A^]CfDUHAWAVAUATSHMMIHUHuI1 HHMuLuMMLcMt-AD$uI<$ZCAD$u I$L?CLrHCH>H}MML{Mt)AGu I?CAGu ILBH}rHCHH}LsMt)AFu I>BAFu ILBH};rHCHHEHgLs Mt)AFu I>XBAFu IL?BH}qHC H=MLs(Mt)AFu I>BAFu ILALqHC(HMLs0Mt)AFu I>AAFu ILALFqHC0HLuMtvL{8Mt)AGu I?gAAGu ILNALpHC8HtQLuMt*L{@MtcAGu I?AAGuCIBH d1CAXl_H$1HH[A\A]A^A_]L@LppHC@HtHOufUHAWAVAUATSPIII1HHMLkMt+AEu I}S@AEu IEL9@LoHCHMtvLcMt-AD$uI<$@AD$u I$L?LoHCHtMMt*L{MtcAGu I??AGuCIBH d1CAX ^H1HH[A\A]A^A_]Lg?LoHCHtHuUHAWAVATSIIE11%HHÀHT@MtvLcMt-AD$uI<$>AD$u I$L>LnHCHtMMt*L{Mt^AGu I?>AGu>I=H d1CAX\HE1L[A\A^A_]L]>LnHCHtHtIff.UHAWAVATSIIE11HHÀHTMtvLcMt-AD$uI<$=AD$u I$L=LrmHCHtMMt*L{Mt^AGu I?=AGu>I=H d1CAX[HE1L[A\A^A_]LM=LlHCHtHtIff.UHAWAVAUATSHLMLEHMIIIE11HHÀHTMcLcMt-AD$uI<$<AD$u I$L<LIlHCH6ML{Mt)AGu I?j<AGu ILQ<LkHCHMLsMt)AFu I><AFu IL<LkHCHH}mLs Mt)AFu I>;AFu IL;H}VkHC HCH}Ls(Mt)AFu I>u;AFu IL\;H}kHC(HH}Ls0Mt)AFu I>";AFu IL ;H}jHC0HLuMtvL{8Mt)AGu I?:AGu IL:L`jHC8HtQLuMt*L{@MtdAGu I?:AGuDICH yd1CAXXHE1LH[A\A]A^A_]L1:LiHC@HtHtIffffff.UHAWAVSPIy8HIHMLyHHHuH=7+H5B5H=,+H;茞CPH@HCTM~XL4IHLtHH@Ht5HЅu,H=h*HL]L5H;=H591HH[A^A_]H;HxV@UHH=*H5A4Hz*]UHAWAVSPHHt}Ht-ILpMtOAFu I>8AFu/I.H d1CAXHWH%Ly8H!hHLIOHtHT@ H1H[A^A_]UHAWAVSPHHHǃHHǃHHǃLMt)AFu I>7AFu IL7HǃLMt)AFu I>7AFu ILr7HǃLMt)AFu I>K7AFu IL27HǃLMt)AFu I> 7AFu IL6HǃLHt"E1HJ<0IL;rH6WAH6WH[A^A_]UHHPSn]fff.UHHGH~*Hfffff.H|uQHHcHT1H 1HH HDHH1HHBII1H@LBLHE1IAIBIIE1HALBE эBD I]1]UHHG]fDUHHG]fDUHHG]fDUHHG ]fDUHHG(]fDUHHG0]fDUHHG8]fDUHHG@]fDUHHtHGHHtHGHHtHGH]fDUHHtHG HHtHG(H]fUH1]UHHtHG0HHtHG8HHtHG@H]fDUHAWAVAUATSPH_1II 3LGI &HtcHtZCu&HIHIII 4LLLLCu H IHIIH3HLLLHwHt[H_HtNCu HIHII3LLLCu HIHHIx3LHLHWHtOH_HtBCuHIHIE3LLCu HIHH!3HLHOH[A\A]A^A_]fffff.UHAWAVATSH_ 1HH HO(H HtWHtNCu HIHII2LLLCu HIHHIw2LHLHw HtOH_(HtBCuHIHID2LLCu HIHH 2HLHW([A\A^A_]UHAWAVAUATSPH_01II @LG8I 3LG@I &HtcHtZCu&HIHIII1LLLLCu H IHIIHn1HLLLHw0Ht[H_8HtNCu HIHII51LLLCu HIHHI 1LHLHW8HtOH_@HtBCuHIHI0LLCu HIHH0HLHO@eH[A\A]A^A_]ÐHH@0HtUHHE$H]fUHHGH~'H1fff.H|uq]HHcHT1H 1HH HDHH1HHBII1H@LBLHE1IAIBIIE1HLBAE эBD I]1]@HHt UH@H]1fUHLHHH=L ]vfDUHHH]BfUHHH]fUHAWAVAUATSH(MLEAIIH}L1ۅ^A~1A`Aw~LmHJc HLeL-At(A@tsAruHHEHMHM}A$LmLeL-`OL-FL-=L-4H Hd1ۿ1AQLLmLeL-+EeMMs"H d1ۿ1A>bLgL+HtZHMtIHHLLHMtHHLHuLHHEHHEL0HEH([A\A]A^A_]UHSP~'`w{H0Hc HHt6@trruPH$H d1ۿ1}A qH?Hl6H-H>$H d1ۿ1A0Hĭ@H9t H d1ۿ1}AJH[]ÐUHAWAVAUATSHHMLEIIIdH%(HEIHx(tOLI$H@(dH %(H;M.LDLLEMHH[A\A]A^A_]LEHEHEEHEH}HuHUMME1t~MLELMI$H@0Ht$HuLLЅu!ME1W$HuLLt.HEHH tQH EdE11EAsI}t H}+dH%(H;Eu&DHH[A\A]A^A_]HMA}u6fDUH]VfDUHAWAVAUATSH(L}AGL9usMIHuHUHMAL(HtoHDM LELHLLotTIH@0HtM$LHuHUHMIMNH 4dE11}AHH5qH(TQI~$L$H;]hH$[ff.UH]fDUHAWAVAUATSH(dH%(HEЋAH9IIILEDMALeLq"Ht|HLMLE$HuLHL8tYLeL8I9uKLLLHMIDMANH dE11}AbB3E1.E1!H ۷dE11DA5BH#dH%(H;EuDH([A\A]A^A_]UH]vfDUHAWAVAUATSPHOHt6HcqH~MHAf.DH|u@APAAH t1ۿ1ANHAIcALL1MD E1MI AMDAMIE1IAMBALHE1IAIBIIE1HALBLHE1IIBAEA E CBD H@rH t1ۿ1AV#w1H t1ۿ1~A]{@H[A\A]A^A_]Å~ tytH t1ۿ1hAdLGMt,IcHH~NIAEJ|uKEJAEGT@uH Xt1ۿ1A^IAMcAN\E1MAE E1LH AIDAIIE1HALBAMI1IMBMIE1IAMBMIE1IMBAEE E E G E IAAAs H ut1ۿ1eAn{~AxutGTu/A"[H 0t1ۿ1eA6Hj&H t1ۿ1eAuUHAWAVAUATSH(zubHtWyuNHLcRLHcYH}HILILIHLEjLLLEH}I1tH([A\A]A^A_]IAy(H}IqIY I;qujHvHHA LEI9LH=@@uNIyJ4LMEHMILHMELMHtIAMqIqIAHIAfAA(DEIIMLq1Ht.AuA}tLH L1ۿ1mA =MA}(IEIMHQIUHDIE LIHLMLDt11LLLDEM2Mt1AHMAWHtxI7LI|orQLJMIIzfInfE1BoTfBoTfIM9uffpffI~M9tL HH9uA1M‰fUHAWAVAUATSHXHdH%(HEHHt@HuzHE1y04HfIf@@fEHEEHCH~FHfH|u@VH ̰dE11A&;HHcH|1H 1II @LDMI1I@MBMIE1IAMBMIE1IAMBMIE1IMBAE BBD I΅~fH|uVHcH|1H 1II @LDMI1I@MBMIE1IAMBMIE1IAMBMIE1IMBAE BBD Iw#L{A~~IH t aH dE11AP9ADmAu H}Au HE H}I>H5gHI[GI~LzEH{H{ ffEfEHUEKHtSIHuLE HIt4DEHu LIEAu.A E1)AH ݭdE11D=8LdH%(H;E DHX[A\A]A^A_]H= H5 ]H H}LLn}dEHXHMH1H|drNHxIIIPfHnf1oTfoTfHI9uffpffH~L9t H 4HH9uHHsE1H}1LE1v}AfUHHH@@Ht]]UHAWAVAUATSH8dH%(HEHttHHtjIHuHMHE04HIf@@1HtiIFIF INHIFRH tE11AT66dH%(H;EDH8[A\A]A^A_]E1fAF(L]jILRjIMMu[LHuLRHtGHCI?IcwHHcHaH tE11sAt5E1E1A~(uIFINHQIVHDIF I>H5cHECI~LMLeIu L LsXLHB L$HLDt9STHH{ uBH "tE11AH tE11DAyUHE1HC H>HS(H1H{&H{0H{8H{@zHLcQ HcJD9HHL)HsfE1LHuHyHt)IHSHLHMHI:H tE11DAIIK4N IfE1fCoTfCofIM9uffpffI~L9t L HH9uMu xHHKLLLMH{tGLUgHHKHSLHEH}LMH}toLdtcEHu!HIULLMWH}HuL_ZAH ˜tE11DAH tE11DA22HLcI HcHD9vHHL)HsE1LSHHJNIfE1fCoTfCofIL9uffpffI~H9t L HH9uMnLLHL }E1T@UHHH@@Ht]]gUHGT]UH#GT]DUH]DUHAWAVAUATSHMMHHUHuI膾AI9v%H Ht1ۿ1A0LQH1HEt*uYM9s|H o1I1rARH}LLLϳtlIH@@LHtcHuHULdH t1I1A*0LPH}v`H o1ۿ1tA/H}$HuHULH}t HEL PH[A\A]A^A_]MKH}LL'7fUHAWAVAUATSHIIH_XHM EH} AAHE ALOIHuIL?IHtIwHtIP TII HI(MMuLIIH IHuI(L轰IHIw0HtSI8tLHEI@tKIAU IIw8HEȋP ItKAB>HHtjIW Iw(MH}H}L;H}ЅtCI@Iw0t>IHt&Iw@ILHMpt MLK>E1H DH[A\A]A^A_]fffff.UHAWAVAUATSHIHH=zH5 L5oMtAt)L=la1=]A*LD=HLDAH=K L=7AuIGHIL=AH= LkXL LL9t:Mt+1fDHH AHLL9rE1LMtRH1L=HtH+I$HLLuI~:Icv H}=AUHE1]fDUHAWAVAUATSHHMdH%(HEH IHHDdAADHHPHMHXNHMlHE)hE1D}HUf1dH, IA/1~H tA1$fff.AM~!IE@H|uQIEHcHT1H 1HH HDHH1HHBII1H@LBLHE1IAIBIIE1HLBAE эBD I!r'1eH tA<#@0&IHWAGAGA1HL@D`tIGIG IOHIGfAG(LWHpL~WHLsWILhWIL]WILRWLLmfInLefInfl~pH]fHnflM@HxHffvfvfoЈTP҅@L@D`1H ŒtAV"LmMtGA(uIGIOHQIWHDIG I?H5PH2e0ILAL謽E1A3D%= AAAD}LPD9GH}HuH} HcOLPD9HE)HȃtfH7HHuD)ʃrLHM)fffff.HHHHDHHDHHDHuHEGHJ LXHxD9GHu9HxHcOLXD9zHE)HȃtfH7HHuD)ʃrLHM)fffff.HHHHDHHDHHDHuHEGHJ I~tCI~toI~I~ I~(I~0I~8sBEHWH@@IFI~uHWH@@IFI~eHgWH@@IFI~ 9H=WH@@IF I~( mHWH@@IF(I~06H4WH@@IF0I~8u,HWH@@IF8I~Hu40H H}H5{4E=w`H}Hh_IFtIFgIFZIF MIF(@=t=H}/#H}HlU[ ffff.IVI~ Hu1LELMuAWHH}t1HURI~(IVIN HuLELMuAW5HH}tHUReI~ Iv(HIvJxIMn I^(H=H5W/H}LH I^(H=H5%/H}HHHpHuHULnIVI~HuHpMI~HxI1ɅI~Iv IV(LdIVIv01HMLEMNdBIVIv81HMLEM,d IFH~:HH|u8q@IF0IF8u @HI~.;Eu&LLL8t%11DH [tA1|H =tAvH +t1ۿ1CA E11LbdH%(H;EHĘ[A\A]A^A_]Ãdt LztLHLI_Ht'Cu H;Cu HHIFIGIFI_Ht'Cu H;Cu HHgIFIGIFI_Ht'Cu H;?Cu HH'IFIGIFI_ Ht'Cu H;Cu HHIF IG IF I_(Ht'Cu H;Cu HHIF(IG(IF(I_0Ht'Cu H;Cu HHgIF0IG0IF0I_8Ht'Cu H;?Cu HH'IF8IG8IF8I_@Ht'Cu H;Cu HHIF@IG@IF@I{IIIdžIVIIIdžI1IIIdžIHt'Cu H;MCu HH5IIIdžIHt'Cu H;Cu HHIIIdžIHt'Cu H;Cu HHIIIdžIHt'Cu H;iCu HHQIIIdžA$AAkff.UHAWAVATSIAItA t AuqHt]HWH@@H^ t\CHHCLDHLA[A+E1MH ;tE11hA1*E1Cu H;,Cu HHD[A\A^A_]UHWzn[]ffff.UH1]UH1]UHHa]UH帥4]DUH1]UH1]UHAVSHdH%(HEW)`)PEEE()p()EHEH HE(EHPLp HL)pL3H=xH  H(dH %(H;Mu HĠ[A^]UHAWAVAUATSPAHHIIHMHL;HoH8H=1HM1L%;E1HoH8C /L1IL9uL-koI}H ;1lL=E1fI}C &L1HIL9uI}HE11&I}-DH[A\A]A^A_]ff.UHAVSH dH%(HEW))))EEEEEEp`Hɼg jHH;ʄgHH+rn+hH8HkAكH@Hy!~[HHH@HEPH5\vHHuLLH#+HH=/vH n:@LdH %(H;Mu H [A^]Kff.UHAVSHdH%(HEW)E)E)p)`H=H5yW)@)0) )))HDžPH5uH HBMHHH5uuHPW)E)E)E)EHHuHP HHDpp,LH8HtuHHuHLPHH`HP HHXp,HLQ1 u/H=tH kH` HQ1dH %(H;Mu H[A^]膶fDUHAWAVAUATSHdH%(HEHH1ff`fPf@f0f ffffffffffDžpf`fPf@f0f fffffffffffpfp!Džp fo,f0ffoLE1HL0L-rDfPffHL2C,fnfPfofofrfrffs ffofsfofsffofs fff0fo0AIII `XH=EHu3]t3HH=H%du J}uͿH=u.uNHpHLp AMHpHLp As&L HpHLp H=H i:Hp DAffpH=&Hչt HH=Du+uHHpHLp E1JHpHLp E1s&L ׯHpHLp HH=H 1Hp YHDžhffPHDž`H=@H5|H=H@=%NH HH5H1Ѕ?H=H5D H fD$H2H$HD$ HHpHhLPyH Hg:H=H Y\HpHӺ0:HڅH=@H5|D 5H fD$H߇H$HD$0HHpHhLP{H Hw8H=‡H ^HpHӺ Hڅ H=HpH1H=H FoHVi\H0 H1衱H5jHH~H~dfHH H0Hp@A/} H=pH ݺHp@V H5H2H0~/ H H0Hp@A贁 H=eH UHp@HH0 HL L1xH Ps LHafffH=FH5HL='L5H=)H "L H A jAWjAVj"H0HDo H=H H (fffH=H5YHH=H džL PH A jAWjAVj?"H0yH=H ۺH HfffHDž`ffH= H5LHHH UL nHH`A jAFu IL|E1Hff fƅ/*H=H A H$Iff0f@fPf`L-=}oH IHHcQH H!IIt&I s811L-bKE1H1 @ <0HH9uLH1foT8 foT80fH H9uffpffpUffofrffofqff~I9tN=@fnH ~ 0fHH9ufpUffofrffofqff~@H HBLH)H H<0H9swHPH9sk1HHHHODDDDIIF PDDIIF PDDIIF PHHH9uH s1 HxIIIE1foOofof`fpNppfhfpNppfgfB @fof`fpNppfhfpNppfgfB 0H IM9vH9tK@HHHHHf~ f`fpNppfgfHHH9uHw H)H0@1蟸INHHcQH H!IItI s!111 @ <0HH9uLHf1foT8 foT80fH H9uffpffpUffofrffofqff~I9tN@fnH ~ 0fHH9ufpUffofrffofqff~@3H HBI)I@H0I9swHpH9sk1HHHHoDDDDIIF pDDIIF pDDIIF pHHH9uH s1 HxIIIE1foOofof`fpNppfhfpNppfgfB `fof`fpNppfhfpNppfgfB PH IM9vH9tK@HHHHhf~ f`fpNppfgfHHH9uHw H) HP1ֵH=߁H EH0@NL'IHIH= BHIVH= $HH= LH4H=EH5nIH=$u H=7^HIfAGAGAG(AG8IGHAGPAG`AGpAILJAAAAILJIH=vu H=HaI$fAD$AD$AD$(AD$8ID$HAD$PAD$`AD$pA$IDŽ$A$A$A$A$IDŽ$CIH}MMfAEIEAEffffƅSH=cHALE1m-H=% LHH= L1LME1HH=HAALHH=H w;HAHD`DžPLL-H/H =L-HHbRH8L1hH_LH fffffƅHRH8H0DžP1H_MtI? L;MtI<$ L%Mt+AEu I}AEu IELLPHHtAfHCC1H~HH HCCK8HIƿHIfH@@1HHIHIGAGAOMf M+AD$uI<$AD$I$E1L誥ffpf`fPf@f0f ffffffffffE1HOH8H;E11ӯLHTCu H;Cu H2H%LM~ ffpf`fPf@f0f ffffffffffHIH~HH|uQHHcHT1H 1HH HDHH1HHBII1HLB@LHE1IAIBIIE1HALBE эBD I=&HHL跫= H=|H L]HA腘DUH1]UH]DUHWGGPG@G0G (NGø]ffff.UHAVSHpHHIdH %(HMLMWEEEEE()EEHUHMLEH=LuLPAR=HHLq`LdH%(H;Eu HHp[A^]t@UHHIHHWHO\LGLOH=PAR$=H]fUHAWAVAUATSPIHL~DfDnF\HxDH8r+?t?H)L1藘LL1H8t8H)L1kAEnTAEfXLLmAF\WAG0AG AGAAȉAFȉCAFȉCAF ȉC AFȉCH[A\A]A^A_]UH]@UHAWAVATSHIIAĩ tucAA(uLLH[A\A^A_]Aĩt1z@uBLLHAu#[A\A^A_]锟LLH[A\A^A_]鞯[A\A^A_]LLH[A\A^A_];ff.UHAWAVAUATSHdH%(HEWEEE)EBEH>IHLDLeIfDWAE(GGl]fUHWG`GPG@G0G ((GGl ]fUHHIHHW(HOhLG$LO H=PAR8H]fUHH]fffff.UHHIHHW(HOhLG$LO H=<PARt8H]fUHHvl]UH]@UHAWAVATSHII tuNAĩt1@uBLLHAu [A\A^A_])[A\A^A_]LLH[A\A^A_]LLH[A\A^A_]pUHAWAVAUATSPIIH}Lb(Dj Z$BhHxD(H8r+?t?H)L1LL1H8t8H)L1A^`AEndLLAFhWAD$0AD$ AD$A$1I IrxIAHUЉ ItcANɉJItTANɉJItEAN ɉJ It6ANɉJIt'ANɉJItANɉJIt ANɉJH[A\A]A^A_]ff.UH]fDUHH؞]HH|6*)bHGHp0ZYHGH9Y/HGH1 g&3gHG HXhJHG(Hd . HG0HOHGHG8H0HWG@]fffff.UHH,+!1"HHdLȣ_UHGHQSok#HGH@Yw8HGH>(HG H9S%^HG(H,+HG0H,Ł-HG8H HWG@]fffff.UH]UHH0]nfffff.UHAVSHHHHdH%(HEWEEEEEEp`H,+!1"HHdLȣ_UHHQSok#H H@Yw8H(H>(H0H9S%^H8H,+H@H,Ł-HHH HEPLL HLWLdH%(H;EuHH[A^]藍UH]UHH ]UHAWAVATSIIHLbPHxDPHprHtH)L1܎t1Ծ@uLLHpu$LL"1pH)L1IF@INHHIHIit1[@u<LLDHt<IIIWDHs1cLL"Hu1rI1fI4HH3ItHHsItHHsItHHsH HI9uHt#I1f.H@rLAD$DIcAIT1HD 1HH HDHH1HHBII1H@LBLHE1IAIBIIE1HALBE эBD I1LuAN09NȉMAA;N4{(HKLs H;KuLHIHHA LEI9LH=uNH{J4趭Ht8HCL{HKHCL4HC"1HMЉAAD$AAfC(HIHDMEAEAE9VArLHuLUEDM̅IcFLUA9|gD)QtI6HHur?D)IHIHDIHDIHDHuHEAFLEIxDɃ?HIDH E11LLIED }̀{(uHCHKHQHSHDHC LuE~4ATAV0L}LL 9IVH}LHL}AG~/IH}UH|u+pL}AG H}UuAGL}AGA+V0L7 HUBLLHAG~!IH|u PAGuAGAGH}LLfCHMЋA~ H H|u P1HMЉAAE1 HMЉAAǾH}?HEL0LchHEHHcHLLvxkAtzLuLLHUE1AF~,IH|{PLuAFdAFWEt6AHus<11uH E11eAo!1HMHUHuaLHW1WAVATVHH9uVfpffH~L9t I HI9uHtAD$1HMHUABA3D$FUHAWAVAUATSHxEEID4 ZA HcDLcILLRA D)HHcIcIHcHIcH<1Hx[A\A]A^A_]coH}H]AJAE)HMDuEN4HHuHUDLMMLeM~HxJHAu H #1Hy H \<1ۿ1dAvH[A\A]A^A_]DUHHGLMtDOH4IHHHA&LG4LHHHt蔢ݤ]fDUHIHHOLLG4LODHhHLMt ASP謝 P^ H]UHHHGLG4LOhHH<$HHH胨H]HH UHAWAVAUATSPHGÃHHtkIԺIHI1bLHsPEEL1LaBLHMtIHcHLǃ|ǃxjxt/HHcH5|tHHuHcHtHHH_ǃ|ǃH[A\A]A^A_]fff.UHAWAVAUATSHLgEAAC,xHK,|HtEHt~HLIIHHLLMM02E1IH11 'C@AGDǃǃ|H{8HC0H@H H9@@L{PLPLXHC0tH0LMH{@HD AF0 HLvt HLuޅLHMHUAHUHMLMeMt誗K>H4HH9t [A^A_][A^A_]UHAWAVATS=AHWӃHӉH5HcHHǃxH@ H4HH1EAr>D9}5HH4H9tHqDyHHDAM1AxD)fHEIHHLZA|$#LHcIcH)HL&ǃIHAAAIƺILHYHI4L9Hc胜IH11xIHHHcHE~D9} AƅdHcIcHH)LXHcHHTHHHHTǃ|1xEtIcHcHH)HXHHHcH ǃ|AV1x~HDDHH&XeAV1r_uYHDDHXD<2HcHtHHWǃ I4M[A\A^A_]1f.UHWrrrHHrrHHrH!HrHH|r]f.H@H9wUHHGDGHHH ]@UHWZrcrlrHH3r1rHH(rHH"rHHr]f.UHAWAVAUATSPIHD`M)r-IHHH}E1fK<7J43HUMM9vH[A\A]A^A_]fUHWqqqHHqqHHqHHqHHq]f.UHWqqqHHsqqqHHhqHAHbqHH\q]f.UHWjqsq|qHHCqAq HH8qHH2qHH,q]f.UHW:qCqLqHHqq HHqHaHqHHp]f.UHAVSIHH!Ht/HL!H t"H <1ۿ1fA覮eHɻHEHr"H X<1ۿ1tAr1HGHIHHH16I A@[A^]UH]f.UHH@(E(M (U0HE@D@HLT$8HD$0T$ L$$OH@]fUHH0(E(M @HHD$ L$$2H0]fff.UHAWAVAUATSHLmHdH%(HELm0s%H +<1ۿ1uAEM9v%H <1ۿ1gAH}H@HHH8H]@LHPIH1M_U HL;RHPMLLHMHHu!HHH H9t,1:H -<1ۿ1oAGL}8HELEHMątEHA05ILct HLu܅HLIILHt!HHLHAIALMIr-HI,HI4LH9H9„q1LHHtH<@0HHuL)Hw@J;HL0L0L0 0HI9uD1ɉMLE HUMHPLMtH@ H@ LE0Mt8HU(MHPLMtHH[ HHm tLHP11HEHHt$HHH}0HHBHOH8L(dH%(H;EHĨ[A\A]A^A_]I s1VLHJ;H1D2 2W5W5)5)5H H9uI9A1HLHH4H1HH9uI9uTP@UHAWAVAUATSHdH%(HEHHEH;E0H(L8L0W)ELL@IԺHL1IQ HLbNLHLLHxu'HE(HpHH H9@@th1=H b<1ۿ1oAH E<1ۿ1eA _dH%(H;EHĸ[A\A]A^A_]Lm LML}HpMtgHL0H8fHAuAB0ILpt HLuمH)AH8L0IL0H8MIt/HHLLAH8L0K,ALMIr/HI4.HƀK<&LH9H9@@H1LHHt$Hfff.D 0D05HHuL)HwJK,Hf.L005L005L005 005HI9uD1ɉMLH@HMtH( H(. LuH]H@11Mt"IIBHH}_H}HLCt H ՠ<1ۿ1eA)I s1VLHK4,H1D> >W=W=)=)=H H9uI9AZHLHH<0H15HH9uI9-3LUHAVSIHt+H w)H <1ۿ1gAr1HHLH!Ht/HL!H t"H <1ۿ1fAˤhHɻHEHr"H }<1ۿ1tA藤4HGHIHHH1,I  A@[A^]f.UHAWAVAUATSHdH%(HEH}t%H <1ۿ1oA EHEI w%H <1ۿ1gAѣHLMI ILIHILrHA@IHLLMLMLILMPu@u8u0u(u uj H@tIMALHMI I 1dH%(H;EuH[A\A]A^A_]Ifffff.UHHtH <1oA#HEH w"H <1eA财1]LUJHHD@HIH ASu(u PARH0]ÐUHAVSIHLJ(HH!Ht/HL!H t"H <1ۿ1fAeHɻHEHr"H ͜<1ۿ1tA1HGHIHHH1*I A@[A^]ffff.UHAWAVATSH} u1IAHt HH;(sDH ><1}AH #<1yA ?1[A\A^A_]LU@L]8H]0Lu(L} LeHH(@HPARASSAVAWATj dH@fffff.UHAVSIHLJ(Ƈ8HH!Ht/HL!H t"H j<1ۿ1fA脠eHɻHEHr"H 6<1ۿ1tAP1HGHIHHH1(I A@[A^]fDUHAWAVATSH} u#MQIʀ8t.L0Ƈ8L"H <1yAk2H0L1Ht H;(s)H m<1}A艟1[A\A^A_]LU@L]8H]0Lu(L} LeHH(@HPARASSAVAWATj H@UHSHdH%(HEHH9u+HtDH d1ۿ1tA(tH d1ۿ1fAĞRIHGHEHUHHH1&ƃ@HMHHHHdH%(H;Eu H[]3EUH]f.UHAWAVAUATSHXIILu dH%(HEЋ LHw*Ht%H d1ۿ1uAD۝\D@M9v%H d1ۿ1gAI詝*LEH@L9LUHuLU@HE8LH}HW)E)EHuHLLIAVARHHEHHuHLLMIIAV HtlHEDMHu ARwpD҃HEHtHEL9uQH d1ۿ1oAN譜.H `d1ۿ1uAU舜 1LIHt8AA)LMMIHE1ff.CIL9uIM)L)HwE L)fff.zD=zD=zD=D=I<HHH uEW)EEHHtLULELMLLLPAR~HELELMLLL3P5HHE@HMHdH%(H;EuHX[A\A]A^A_]5BDUHAWAVAUATSHhHdH %(HMDB MIAw%Mt H d1ۿ1uAgHIH9@H;MLHxL](LU LH}LW)E)E)ELmLLHLILMIPASJHHEHLHx DAAwgAAHEItHEI9uHH d1ۿ1oAlH d1ۿ1eAqsE1IMIt4AE)MIIE1ff.CIM9uMM)I)IwMA M)ff.EHBD EHBD EHBD EBD N III uEW)EEHHtLULELMHLPAR{HELELMHLA4$PHHHuHMLLLELxAVyHt9HuHE@H}}t]H ]d1ۿ1eAH @d1ۿ1uAzhdH%(H;EuHh[A\A]A^A_]û?fff.UHAWAVAUATSH(MIH]DoIAB IIHUMD)1L9AIHuHHEЋOfAAFAU> щʀ@IHDAMtI~LL<MAEAfHnfpDfofsfofsfofsfofs fofs(fs0fs8f( sfTfTfgfTfTfgfgfgfWf`fpNppfgA'1HAt)AHHIUH9H)fDHHHHHJA4A<HJ *HHHuDfHnfpDfofsfofsfofsfofs fofs(fs0fs8f( qfTfTfgfTfTfgfgfgfWf`fpNppfgfAFL9MnLLHUUMLut%MIwsDA0EAHEHv17HHHIH I9u HE H([A\A]A^A_]LH AVANu1ANDA0FDA0FDA0FSANLH8A0NLH0A0NLH(A0NA0FDA0FDA0FDA0F E0INHMA&DLLHUUIIظEIpH)H9HHBHr&J<(H9HBI4HH9s\HuHHI9sMLHAA0TIHYHAHnHHHu]ffff.HHHLH)AoIAoLfALIAH9Hsffff.UHAWAVAUATSH8IdH%(HEЋw1L9MHMLELeH}HIHEr=I_ffff.II1GIFI1GHHIULIIIwMLmIIrK'HK &H9I9tg1MHItHA4@04HIuL)Hw`fDAA0DADA0DADA0DADA0DHI9u$D1fI4H14HH9uI9wHHAHH}t%1fA7HwHH9rW)EHuLAAGWEAGHUHtIH}L7dH %(H;MuH8[A\A]A^A_]K8ff.UHSHdH%(HEHH9u+HtDH d1ۿ1tA(tH d1ۿ1fARIHGHEHUHHH1ƃ@HMHHHHdH%(H;Eu H[]c7UHSHdH%(HEHH9u+HtDH d1ۿ1tA(6tH d1ۿ1fARIHGHEHUHHH1?ƃ@HMHHHHdH%(H;Eu H[]6UHHGW@L@@@0@ @(jk]DUHHIHwHVHNXLFLNH= PARH]ffff.UHHHwH]fff.UHHGW@L@@@0@ @(j]DUHH]fUHHHwH]fff.UHHGW@@P@@@0@ (zj@]fffff.UHHIHwHVHN\LFLNH=PARH]ffff.UHHHwH]̝fff.UHHGW@`@P@@@0@ (m(k@@l]UHHIHwHV(HNhLF$LN H=+PARcH]ffff.UHHHWH]駢UHHGW@`@P@@@0@ (l(pk@@l ]UHHIHwHV(HNhLF$LN H=PARH]ffff.UHHHWrlH] fUHHGH؞]HH|6*)bHHHp0ZYHHH9Y/HHH1 g&3gHH HXhJHH(Hd . HH0HOHGHH8H0HW@@]ffffff.UHH]"fUHHHW0H]wUHHGHɼg jHH;ʄgHHH+rn+hHH(HkAكHH0Hy!~[HH8H@HW@@]ffffff.UHH]RfUHHHWH]馤fDUHHGH,+!1"HHdLȣ_UHHHQSok#HHH@Yw8HHH>(HH H9S%^HH(H,+HH0H,Ł-HH8H HW@@]ffffff.UHH]邍fUHHHW H]ףUHHGW@L@@@0@ @( *f@l@|H\@l]ffff.UHAWAVSPHILLIw\IWxIMGtIpH=vMSAV{H[A^A_]UHAVSHLwHLwHI\HL[A^]0UHHHFQH7H@Q]fffff.UHAVSHPdH%(HEHLJHcHH~s11}E1%HfE1ffff.BofBofI L9uffpffH~H9t@H 4HH9uE1HMaHhH@E1M) ffff.NNNMIHuMHcHHs1ofInfpDAAAIE1BohBoxffBhBxI M9uH9tfL!HH9uffEfEfEfEHEL@E1E1HM)N N N M[HuMAL]1HM)fDL4M4M4H[HuMILHуIIfInfpDfHnfpDE1BoTՠBo\հBohBoxffffffBhBxIM9uI9t"@NTŠM!NI!M NIL9uHcHH@ffEfEfEfEHEL@E1E1HM) ffff.N N N M[HuMAHUE1HH)DJKJM[HuHILHуIIfInfpDfHnfpD1foTՠo\հohoxffffffhxHI9uI9t(f.JTŠL!JfAo>fH H9uffpffH~H9t@I ΐHH9uHIffEfEfEfEHEHuHLHcHHH}L%1Y%HfE1ffffff.CofCofI L9uffpffH~H9t@H fAo>fH H9uffpffH~M9t@J 4IM9u1H@Džt%H L;1ۿ1wAyHHfffooHfffUfUfUfUfUfpf`fPf@f0f fffLmHLH`$LLHBL}LLC$LLH%HpHL#$HH$HH $HHLLPLH#LL#LL#LL#LL#LL#LLHH0HL#HH#HH#HHLbLLH`#LLU#LLJ#LL?#LL4#LL)#LL#LL#LL#LL"LL"LL"LL"LL"LL"LLHHHL"HH"HHL}fffLLHg"LL\"LLQ"LLF"LL;"LL0"LL%"LL"LL"LL"LL!LL!LL!LL!LL!LL!LL!LL!LL!LL!LL!LL!LLu!LLj!LL_!LLT!LLI!LL>!LL3!LL(!LL!LL!LLHff.LL uLLLLLLL LL LL LL LL LL LLx LLm LLb LLW LLL LLA LL6 LL+ LL LL LL LLLLLLLLLLLLLLLLLLLLLLLLLL{LLpLLeLLLGLLLLLALL6LL+LL LLLL LLLLLLLLLLLLLLLLLLLLLLLLLL{LLpLLeLLZLLOLLDLL9LL.LL#LLLL HLLLLHLHHLt7AAN)M)EH}HHfoEfoMKMt]AFHANX)M)EHHHnLuHLLHLLH:foEfoMAOAdH%(H;EuHX[A\A]A^A_]xUHSHHdH%(HEJ)M)EBHJX)M)E)M)EI)M)pAHIX)P)`)0)@H0LPLpHMHuHUHIIPARASj!H (E(MK(E(MCHKX(E(MdH%(H;Eu H[]Nfffff.UHSHhHdH%(HEJ)M)EBHJX)E)M)E)MHMHuHUHII/(E(MK(E(MCHKX(E(MdH%(H;EuHh[]UHAWAVAUATSHxHIHhdH%(HEHp`1AEAM))AEHAMX))AA)) AAAAA`iLHH@HH (HpL(IL (Il.II`H`I`HH`LI`I`Ht}J<-pHI4/HpH+HpAHLaI,HpNpL pHLLASARPjVH QW)`)P)@)0) )))))))AAHuHLL L(L0L8L@LHHPHXH`HhE1HLpIIEu#HHH0HPII,Ak<3wAvA1ANHH ANHHH ANHHH ANH41HILHHDHAvD|v1H<1HLpI҃ ADIظ?)L`D!! ։HHL1H1H111E1E11HE1E1E111ffffff.H1H9HHII!HIUH!L MI!MeI!M MI!M}I!M MI!MUI!M LI!M]I!M LMI!M]I!M MI!MMI!M II!I]H!L II!MEI!M II!I}H!L LI!IuH!L HHLMMMH#I#MH HHHI`HHHHH1LHHHD1MIAIAAE1HAHAMAAMLIL!IHLELILILL`LH!ILM!I I!M!M HH!M!I H#I!I LLLHHHHHHHLLLLL|HHH0HPIIHPHPHPj)H Av'AA$41(( HhHH0I@HHXHPIdH%(H;EuHx[A\A]A^A_]UHAWAVAUATSHHHdH%(HEW)E)E)E)E)E)pLuLe)`)P)@)0) )A1LpHf.(( (0(@)p)M)U)](P)E(`)EHDIAAIIACABp1L@H H5HH4LpLLuLLeLLMMHPPH0PSjH IͰI H tLLLLMM9&HL{AM LHLcLHLsLHLp1LI@H H5HA3 HpHHuHUIIHPPH0PHPjH L{LcHLs(p(MHHAAN@HHXA$AL$dH%(H;EuH[A\A]A^A_], fff.UHAWAVAUATSHLIIHHdH%(HEI))AHIX))LH0E11P AA)) W))))))pHHHpHLMM$HPHpH0HMMHPLATLpAWjTH HHHH0LPLpHPATAWjH HHH0HLLHPATAWjH HPHpHHLL0HPATAWjH HHHHPLpLHPATAWjPH HH0HPHLLHPATAWjH HpHuHUHL0LPHPATAWjH H`1 HHcIItOu 1DpƒIB֨D)B-`IuHH1H9sH<1HW)P)@)0) ))L AAHTD()(@)A) AB)0(@@)@(@P)PE1IEt Iv+BHLH@HMI IIFAM HHIVHHDIvIII>AGHD1LL t\HHHHLHLH@HMILEASARPjH IFIVI>IvE1DHHH p1LH tHHH ,LIHDHLH@HMILASPARjVH E1B-`0(H@H*HW))x(L*IEffffff.1H+H H1HP(@HڿH@@1Hp0@HAIAAE1Hx8AHAMAAMMIM!LEIHIHLHLLE5LX@HLH@HMIASARPjH H((HHA$AL$@HHXH@IdH%(H;EuH[A\A]A^A_]DUHAWAVAUATSHdH%(HEHcHH.HIHs11gʁHfE1fDCofCofI L9uffpffH~H9t@H 4HH9uHHH)M)EL}LLLHK)M)EH}HL~AAF)pHpIHIHH׺HHIIIHIIHMI1Lx@LIIILHIILILLJ:HHE1IALDLMI)EE1HuAIIIMHHHIHIIHHIK 0HLIHIE1LAMDIIOIE1LEALHIILIHHLHHILIIMMLHIM HLLH1LHII)ȹHMII)ʹHII)˹HAMHHI!HH!L H!I!I H!I!I H!L!H HpLxLUHEfoEfoMftMftpff=ffPf@f0f HDž`HHcHHtPH1H LM) ffff.MLLHvHuMAuqHcH1H@H HyL( (0)M)EH}HUHWfoEfoMftMftpff=t#1dH %(H;MuH[A\A]A^A_]øfUHAWAVAUATSHhHII2ЩuLLHHh[A\A]A^A_]I7HCHEHHEHUHCHEHIIHULLEHCHEHIHLHHHHHHHEHxHHLpIIHHHIMIHHIIHMEHUMLIHLM@MIIOHEHHUL}IHEHHIHEHHKIIIMLLAHLAIIMHEHHAHEHEHUHUL]I@HULxHEALIHAHغHHHHHHIHHLLHUKHI?HIAHHHIIEIMDIIHAMHHEHpHEHIHEHEHIIHEHIHHEHHHHELHML]IIIHHLMMIAHHIHHHHHHHHLHUJ7HHHI@HHHHMDIMHLDIAIHEL@HEIIHEHEIHIHEIHIHEIIIHLLH}HUH IHLMILLHMALHHILHHHHLK MMLJ1HHALIILEHIIHDLIM AI1LH@@DHL)AMLAAAMI)ڻHDHL)AMALAAEM9AMAHIM!HH!L L!H!H M!I!M I!H!L HpHHxLPHHHh[A\A]A^A_]UHAWAVAUATSHHI -ЩuLHHĈ[A\A]A^A_]7H L{HsHuHSHUHHHUHHXHHIHpHHhHIIHUIHHHIHHHEIкHHIHUHHHLPIHHLLULMLLLELEEHK.IHEE1H}AHEIIHUHH`HEIIHxIHELIL}MILLIMHuDEIJ<3L]DmMEILUMILIIIIAHHIHHHHIHHLHHUI7HIHI@HHHIMDIMDIMHADuIHEH}HIHUHHEHHIHLpLHMHLxHUJ<HIHLhLILLMLMAHHIIHHHIHHLMJ HKHIHHHMMHHDIMHAHHEHIHLuLL`LLELELMOTHLHXHMILIMHLAIHILHHHHLK>MLJHIALIILEIIMHDHII<6I1HH@@DLL)AMLAAAMI)ٻHDHL)AMLAAEM9AMAHIL!HH!H LL!H!H M!I!M I!H!L HPHHpLHHHHĈ[A\A]A^A_]f.UHAWAVAUATSHHMHHHHLm(dH%(HEIffEfEfEfEfEfpIAI I AI AHIIEI EI EI EHf`fPHPLLWff@f0f fff}LH)0C)@HLHqHAHHLLYL?I?1HLAEEIM)ȻHIAMIII)IAAI9KAMNUAHIL!HI!I L!H!H L!I!I I!L!L LHLHA$)AoD$f ffLLLH0HLIuIELI2IRH@1IBMEE1MB@AIUHLI1HHAAAEIM)AMIMAAEIM)AMAEIAMIIM)IAEM9AMAAMML!IL!H L!M!I L!M!I I!M!M HLLLHHHHH+PH1HXH)ϾH@H@1H`@H)ѺHL1Hh@I)кHھHHIL!HHHLH+H@@E1HAH)L@@E1HAH)L@@E1HAH)L@HHL!HAIHHHLHHLLLHLLL fffHHPHuLHL+0H1H8H)ȹHH1H@H)˾H@H@1HH@H)ѺHںHIHI!IAIHILLHLHpHHfffLLHHPfffpHpHu LHpHxH+H1H@H)ѺHL1H @I)кHH1H(@H)ֺHڿHHIL!HHIHILIHI?HHHιIDDLL)AMAEIAMIIM)IAEM9AMAL,LTHIM!HI!M L!H!H M!H!L I!L!L M I M LPHXH`M H L H HHHH!LLHIt$H!LLHI~H!H?H HhHHHt4HHHHLLHHHHHHL6LL?I?H4uE1HHӿIDDHL)AMAEAIIM)IAEM9AMAAMMM!IL!L L!L!H L!L!H I!M!M H0H8H@LHL0LLff fHHLafffHH0L7H}HP7HMHUH+H1H@H)¿H@HE@1H @H)AIAHuAE1H(AH)L@߿HIIM!HAIHIHH)ѿHH@@E1HAI)AMAHEE1IAL)LLD1MM)AIAڻHIM!HىLHMH)AMAEE1IAL)AMAEE1IAL)AMAEE1MAM)AMAAMMM!LEIHMHML]H)ʹHHEE1LAH)ιLE1HAH)ϸL1LI)HظHLMHL!H‰HHLHUHEH}HMH}HHPfffHHHHEHH)йHH1H@L]I)˹HLE1LAHMH)L@@LE1LAH]H)L@AMLL!LEMHHH)кHE1HAI)ҺL1L@H)ѺH1L@H)׺HںHI!HЉLHIIܺHHHILEAo8Ao`LAo(AopMMIfHnfpDfoUffHnfpDfffInfpDffInfpDfffo}ffffffLA)AqH!LU M I!I H!IBH!H H!IrH!H L!I#RH M!LI L!L L!IzL!H L!IBL!H L!M#jI HH HzHBLjHE(o ohHo0oxfDopfDffAfffEffffffH0xdH%(H;EuHH[A\A]A^A_]-ffff.UHAWAVAUATSHLMLIHHHdH%(HEW)E)E)E)E)E)p) )H}L.H]HLHpLHMUMMLuHMHLL)IĸH1HMI)HI]L]1LHH)H@MmH}@1H@MI)׺HڸHHHH!ILLHHHLHMIE1IAE1LALIIIMILH)ǸHHHHLH)ùHٺLHкHH)HI9ϹHAMLI!IM!M I!L!L I!L!L L!IMI!III LH?MIHMIɹ@IMI)˹HLƺHֺHH)HH9ȸHظHHKH!I!I!L!HNUI!I I!M I!M H!H MHLDIMDMIDIMIHH)øHHHDLH)ȹHLALHH)H@@I9HHPHHXHH`HHhHںHHI!HI!M H!H!H I!H!L L!H!H L0H8H@HHHHPH0HDHHEHpLxLHuHLIHMH?H?HELILAEEIM)AMAAIMAII)IAAH9ٹHٹHHI!HH!L I!H!L H!I!I H!L!H HLIHLH?L,UHLMHMI)AMAHALAIEL)IAEL9ϿH@AMLI!IM!M H!L!H I!M!M H!I!HHIMI LHLLLH?LLDHL)AMAEIM)AMAMAAEL9ѹHٹHINmM!HN4mI!M M!H!L L!H!H I!L!L LI2IzL)AMAEE1IAL)AMAIREE1IAL)AMAIBEE1IAL)LٹHIIM!HΉHHII2IJIRMBHuHMHuHUH@1HMLEE1LE@AHUHLIHHAAAEIM)AMMHAAEIM)AMAEI۸IøHM)HI9¸HAMML!IL!H L!M!I L!M!I I!M!M H}LELMLUHH1HAIHH1HQIH@1ICMAE1MC@AIQHLIHHAAAEIM)AMIMIEAEIM)AMAAIAMAII)IAAI9AMAAMML!IL!H L!M!I L!M!I I!M!M HPLXL`LhHHHP HHKH+EH1HU@H)ѿH@HS@1Hu@H)AIAHsAE1H}AH)L@߿HIIM!HHHIHH{HSLCHL+.H1HFHH)¸H1HFI)ĸH1HFI)ǸHظHHL!IʼnHILLM.IFMfINHUHEHHLMLLUL?I?HILIDDHL)AMAIIػHEM)O OL HI9AMAAMLI!IM!M H!L!H H!L!H H!M!I LMHuH}LUH}HLHLHMH}HUHHHLML?I?IDDHL)AMAEHAIL)L ILMAAI9ɹHٹHIM!HI!M L!MI!I L!H!H I!H!L IL)¾H@@E1LIvAH)L@@E1HIFAH)L@@E1HINAH)L@߿HII!HHHIII~IFMFdH%(H;EuH[A\A]A^A_]ffffff.UHfBPB@B0B B1@E1H9AMIL!ILM!L fInL#JfInL#FM oVfpDoZob oj0ffpDffH RLBfoV o^0fffffR Z0HH@HY1HHHHHJPH!H fHnfHnH#BXH oR@fpDf[fpDffJ@HJPHBX]ffffff.UHAWAVAUATSH IIH8HL`I!H""""""""HQL!IHUHLLH!HUH!HuIHIH@HHIIHHHPI1DȃHL!HXLH>H?L!MI=I?M!HDDDDDDDDLPM!MwM!IH1HEH=H1M!I!M M!M H!L H1H8HH [A\A]A^A_]@UHAVSHdH%(HE==ELufD>L1HHËt΃ uZHH 8yfHEHH8Hf1Lu>L11*Hu;tH=aHudH%(H;Eu H[A^]kff.UHHHV(P]fffff.H?tUHAWAVATSAIH[>HAHMcHcPD9v"H3IM)ILsXE1LHL&$Hc{D9LI)IHH;1LALMIK<9NIfE1ffDCoTfCofIM9uffpffI~M9tff.L HH9uMt=H Xs1fA,Cu H; CuHDsH Ha 1[A\A^A_]fDUHAWAVAUATSHHAdH%(HEA?t&H sE11DA+A|&H ԕsE11AY+HHMH}BHLEH LIrr\L@MIIsfInfE1ҐBoTfBoTfIM9uffpffI~M9tf.L HH9uMtGdzBGdDD(LMHUurHOLw H;OuPHIHHA LEI9LH=u6HJ4^ H}HtHGLHOHGL4HGfG(n^H#IH1H}ˆU1HH ʉUADLADeEE1HD1ҹ quQH;HcsHHcHUED1E1HD1ҹpAƅwH}tE11DHURYH}taLHHUHM+4H}tEgHED;`u&I?IcwHHcHTEDȅeE\fM}>DcHED;`AܹɃH'MLeu|EH=H5*;LHH&t}H}LHUHMteE̅t-EH}H1HME1M[t;}u;utpMutLAT$Aƅ%E1Hu~(uHFHNHQHVHDHF dH%(H;Eu=DHH[A\A]A^A_]H sE11Az'AKUHSPH=H5HcH H[]ÐUHH=H5^]xUH]f.UH1]UH]DUHH=EH5)$]^fffff.UHwH uH]H8:]ffffff.UHwH tH]H:]ffffff.UH]DUH]DUH]DUH]DUH]f.UH]DUH]f.UH]DUH]UHAVS11ށGenuׁineI uE1ҁntelAAAE1 Authenti ځcAMD AA1r 1Ή߸1AAEtNu;Ƀr. % Ѓp = <uAAE Au11ЉШu A A?щ%DH D DAAq H=EnHt:HHHI4L:HtHHHH[A^][A^]f.UHAVSHHdH%(HED6AN1<0u-|0xHEu&1A~H<HH51HU%HE1A~HH_9HUHH1t/HEA~u! H !CA|u H CHdH%(H;Eu H[A^]6UH15u]:f.UHwu]fff.UH7u]fff.UH'u]fff.UHu]fff.UH]fDUHu]fff.UHSPH=H5:u*=t=]HtH1H[]2fUHH=H51Ʌ ]f.UHAWAVATSIHAH=eH5)=Nty=JIHt5H=> uVEH[N4H= K9(HtIċ=HKtL_HA1[A\A^A_]9fUHAWAVAUATSPHtHH=ntH[A\A]A^A_]L5L-L%L=HHEH=]8MtH;AMtH{AMtH{AMtH{AHEHtH{ HH[A\A]A^A_]UHSPHH=}tHHH„u=HwH{DHt HH H ~1AA 1H[]ffffff.UHAVSHH=|t'HdIHHu@MtdHu%UHw.H{Ht IHIHt2L1H[A^]H ٦E11AA`L[A^]E1UHAVSHHt)HHq!H E11EAHH={t%HIHHu>Mu#E1UHw.H{Ht IHIHt2L1H[A^]AH E11AAL[A^]UHAVSHtH=4{t[A^][A^]H_HHtI1HLH={tH1[A^][A^]-ffff.UHSPHHt HH1H[]fffff.UHAWAVATSIHtJIH=ztkLxIH=mzudIIHHL8H`H=;ztLHHMMfH= ztLHHMuRHM9MBHLLH=ytVL@IwIHt HL8H H 21ۿ1AAH[A\A^A_]M~IIt L1L#LH=XytL1,1ffffff.UHAVSHtH=yt[A^][A^]H_HHtI1HLH=xtH1[A^][A^] ffff.UH1]UH1]UH1]UHSPHH=oxtH5HH„u=HwH{Ht HH H ΢1AAX1H[]ffffff.UHAVSHtH=wt[A^][A^]H_HHtI1HLH=wtH1[A^][A^]ffff.UHHtHs 111]H s 11HHf1f@oo\o$ffoTffH H9uffpffpUffofrffofqff~H9t|tcIHHfnfJ J3 fHnfIL9ufpUffofrffofqff~H9tf.DD2DHH9u]UHHtHsŝ1[ŝ]Hŝ1ҐDA1AiDDA1AiDDA1AiDDA1AiHH9uHt'H1fffff.41iƓHH9u]f.UHSPHbHtHʃHsŝ1RŝsHŝ1@<31iǓ|31iǓ|31iǓ|31iǓHH9uHt#H1f.4 1iƓHH9uH[]DUHH1Htf<t HH9uHH9HB]fDUHAVSHtwH|IItgH=ttLrHu 0Iw*H HHtL0HHHL[A^]+H 1AA1[A^]fDUHAVSHtjHIH=&ttHHu-Hw'H{VHtHHHLH[A^]騼H 1AA 1[A^]UH߃ǿ1]fff.UH1 ]ffffff.UHGЃ ߃ǿ]UHFƱЃwFr V1Ƀw @@7]ffffff.UH߃Ѓ ]fffff.UHO C]fff.UHG ]fDUHP CPAA DCD9|MD9=t@DEHD AACD EQEA AECHD9}]ø]1]UHHtQH1fDHAA ADCDHAA ADCE9| 1Et LAH9Lu]1]ø]ø]@UHHIHHLPLXt&)`)p)U)])e)m)u)}dH%(HEW)EH0HEH0HEHEHELMHM6dH %(H;Mu H]fUHIIкH]@UHAWAVAUATSH8IIH}dH%(HEЅH'pHDwpHoHDppL%pLD%ip@HIW)EHEA)EIGHEMA@LM@LHLE=xZ@r5AALLAHtAHLHLEMII9 ʀu HML1#E1LHEH蚼 dH %(H;MuH8[A\A]A^A_]蜹fff.UH1]@UHHH@HHLPLXt&)`)p)U)])e)m)u)}dH%(HEW)EH0HEH0HEHEHEHU1dH %(H;Mu H]ҸfUHAWAVATSHI1HtA<t HH9uHH9HBHt8LcH=entHL+IHt]HtLLHAdH јE11AA'AHwH{ \HtIL IHuH E11AAL[A\A^A_]fUHSPHHr,H1fff. t HH9uHHHۻu HHHH%HH[]ff.UHSPHt11ې<tHH9uH_11YHBH9uH1FHHH1fff.tLAHH9uHHH HHHHHH胹HH[]fUHSPHH=ltHeHH„u=HwH{Ht HH H 1AA1H[]ffffff.UH]fDUHAVSHtH=lt[A^][A^]H_HHtI1H豷LH=ktH1[A^][A^] UH111E1E1]UHAWAVAUATSH(LMLEHMIA}11!IHtgEAD9EDDKM,A\LMLeMHMHUu^IEHtEHAEGHIH11HcHk9H1A$Ht\IEHt>HHt}teI}tI¶IEIIE1H:HHt}t I}臶WAEIEEH([A\A]A^A_]ffffff.UHHH1E1E1]SUHIIHH1]3UHAVS11pHt";tBH I\3:HtH.11HILu[A^]ffffff.UHAWAVATSHIE11HtIA;tyH IDtMH@@uYHHHt?I@BHt0H|E11HILuHi9I1D[A\A^A_]UHIIHH11]fDUHAVS11Ht9t=H I\3HtH11HIDLu[A^]@UHAWAVATSHIE11HtDA9ttH IDtMH@@uYHHHt?I@B=Ht0H1E11HILuH;h9I1D[A\A^A_]fff.UHIIHH1]UHSP1HHH{>WHCH{ 'WCHC(H{8WC0HC@H{PWCHHCXH{h߲WC`HCpHIJWCxHǃH覲WHǃH腲WHǃHdWHǃHCWHǃH"WHǃHWHǃH(W Hǃ0H@迱W8HǃHHX螱WPHǃ`Hp}WhHǃxH\WH[]ÿHtHH1HfUHHt]]ffffff.UHAVSHHD5AFHD[A^]fUH]FfDUH7]ffffff.UH!v1HH UH HHSHE]fffff.UH!v1]HH UHD]ff.UHHĄn]UH1HH fffff.UHHHrHDx]UHAWAVAUATSHXdH%(HEHtI։HuAA!v E11MDH@H {RL$1IW)E)E)E)EM4M[LCnsH}LHA1AUATH[HIL9N<3IH߾:HL9@ƺ@IHHǾ:׹M~HL9ʺuUHHǾ:讹M~HL9ʺu,HHǾ:腹IHL9ȺMt L:,dH%(H;EuuHHX[A\A]A^A_]LfLe LE14MAAL#Lm L1rUHAWAVAUATSHHIdH%(HE1bIHu6KHtH? E11HIMEE1W)E)E)E)E)p)`)PHELPL1HDžHHDž@Dž<Dž8LP]ff.D<LL7M1ARASHAUʽH L^LHHAօ~[1HHH1[1 $AGH[A^A_]fDUH]f.UH]f.UH]f.UH]f.UH]f.HtnUHAWAVATSHHtAAE1ffff.L#K<4蔡WCD4KD4IIL;{rH;mH[A\A^A_]]fff.UHAWAVAUATSH1IHtjEEE9HtxHEgE9ECDD)HEHH<@H}ݫHHtWE9u^HEHC6'Ht IH11Hd1HH[A\A]A^A_]Hv1IE1H1HUޟHEHCHIAfADH@I4L#IK,HIHuzf.UHAWAVAUATSPIHHWHCIHM~Mt0L.IIL谪IHtLLL LkAFCAFfCH[A\A]A^A_]UHAWAVATSHtcHHGHtWH1IHtKH{trE1E1fffff.K<>H3LIHCII9r@[A\A^A_]QHt1IHB1HuAAdž[A\A^A_]荬ffff.HUHSPHGHHrWHCH{ [WCHC(H{8CWC0HC@H{P+WCHHCXH{hWC`HCpHWCxHǃHڝWHǃH蹝WHǃH蘝WHǃHwWHǃHVWHǃH5WHǃH(W Hǃ0H@W8HǃHHXҜWPHǃ`Hp豜WhHǃxH萜HH[]邜ÐUH19¸C]UHAVSH(LHt7IHLINHtHX H@ H1[A^]fUHSP(Ht;HyHHHKHtH@ H@ H 1H[]UHHtH]1]fffff.Ht?HHt7HHt UHSPHGHH1ӚHH[]HGff.HtH97vUHHGH]1DHtH97vUHHHOH]1fHt#UHSPHGHHFHH[]8HtdUHAWAVATSHHHt5IIE1f.II9sHKJ4HtLAHH{H[A\A^A_]fDHtcUHAWAVSPHHHt3IE1fffff.II9sHKJMt9E1Lmffffff.IM9tHCJ4HtLAHCHH1HH[A\A]A^A_]ff.UHSP(pHt;HHHHKHtH@ H@ H1H[]UHHtH]1]fffff.HtH97vUHHGH]1DHt#UHSPHGHH6HH[](UHH]HtHHtUHHGHDHH]1ff.HtdUHAWAVATSHHHt5IIE1f.II9sHKJ4HtLAHH{H[A\A^A_]w̃u UH֝fDUH1l]fUH ^1]UHf.t Hu]Ëfffff.ttHu11UHJUHAWAVATSHtoIHGHtcLxMI0Ht@HL`L8@ L LsIGHt>HЅ5H{= HE1 H Z1ۿ1CAtH[A\A^A_]H Z1ۿ1AzuIF0H=Z1ffffff.UHAVSHL5o:A9>t0L5C9A9>t$L5<A9>tL53=A9>t L5=A9>uO0Ht9HXL0@ INHt$HHх~HH{: HB1[A^]H Y1AH=Y1fHt?UHSPHHHtH@HtHH{ H{ HH[]ff.UHAVSHHHxI0HHIHIFHCAF C I~Ht IFHCI~Ht IFHCIHLPEHH{ H{ HH X1ۿ1Ag1H[A^]ff.UHHG]fDUHHt1LMt)IBpHt tA92u8w tLtcDL]H X1eA1]H W1}AH W1{AH W1rAUHSPHt$HHtHx(uHx0tG  H nW1ۿ1}AAH[]UHHtHHtH@(Ht  u%]H W1}A1]H V1~AUHSPHt$HHtHx8uHx@tG  H V1ۿ1}AH[]UHHtHHtH@8Ht  u%]H ZV1}A/1]H 8V1~AUHSPHtHHtHxPtG @ H U1ۿ1}AH[]ffffff.UHHtHHtH@PHt  @u%]H U1}Ao1]H xU1~AUHSPHtHHtHxXtG  H 5U1ۿ1}AH[]ffffff.UHHtHHtH@XHt  u%]H T1}A&1]H T1~A*ffff.UHSPHtHHtHxHtG  H eT1ۿ1}A28H[]ffffff.UHHtHHtH@HHt  u%]H T1}A<1]H S1~A@UHSPHtHHtHx`tG  H S1ۿ1}AHxH[]ffffff.UHAWAVATSHHHHx`uHxPu HxXH@pHO @tt 1IH1IЅLID$H@A;FLuI|$LMI|$LMt$I$LPpL, H gR1ۿ1}AT:[A\A^A_]H I1 H jP1ۿ1}A=[A^]H CP1ۿ1~AH &P1ۿ1Afff.UHSPHtHHtHxhtG  H O1ۿ1}AH[]ffffff.UHAVSHtjHHtbH@hHtY uw1HtiIH6Hu HHLI6HtlHHH@hлu3I>I1 H 'O1ۿ1}A[A^]H O1ۿ1~AH N1ۿ1AUH貹HtH]@UHSPHtCHt7HCHt HHtHHCCHH[]ºH[]ff.UH]UHHGHt H@pHt]1]UHAVSO;NuDHOHt6HHtIH҅~LINH޸HI(Ht[A^][A^]UHAWAVSPHIGt&;CtZH xCsE11gA L{IFHt!HHtLIFAFM~AAFHCHt6HHt*HЅt!H CsE11vAIFHt?HHt3LЅt*HCE1HtpHHtdLHH[A^A_]AF;Cu(IFHtHHtLHAt!H fBsE11hAQDH[A^A_]UHAVSIHHGHt HHtHHCCLsAC[A^]ff.UHHGHtHHt]1]UHO;NuHOHtHHt]]UHHtHGHt H@xHt]1]fffff.UHHtHGHtHHt]1]ff.UHG]UH]stauH9ttWuH$UHSPH @s1AH=K11H[]H靬HDUHAVSHt9Ht2HGHt)HHtHADHHCC~!t,t-uuL5$(,t tubL5*L5h+L5,L5)HthHOHt!HHtHѸHHCLwAO2H ?s1A H=I11[A^]ffffff.UHAWAVATSIIׁtL%*L%+QHHHH@HtHHt HHCLcA$CHLLAT$@uaHgt1HCHt HHtHHCCH21 H >s1ۿ1A"H[A\A^A_]fUHAWAVATSIIׁtL%)L%*QHHHH@HtHHt HHCLcA$CHLLAT$HuaHgt1HCHt HHtHHCCH21 H =s1ۿ1ADH[A\A^A_]fHGH@PHtUHH a=s1}A\O1]ff.HGH@XHtUHH !=s1}Af1]ff.UHI8E1]UHI8E1]UH1]UH]f.UH]f.UH]f.UH]f.UH]f.HGH@`HtUHH AtH5LrI^1UHAWAVSHHdH%(HEL~LIL虶IW)E)E)E)E)E)E)p)`)P)@)0) HuHߺ BH}Hu BH}HPBH5HPEtjH}LtZH}H @BtAH 1Ct/H LLE1tHD>u H 61ۿ1iAU+dH%(H;EuH[A^A_]nUHAWAVSPH_LvLvIH HLLHH11Ƀ MEH[A^A_]fff.UHAWAVSPIIHHHtI~t-H 51ۿ1fAIH[A^A_]LH!IHtIt*H 51ۿ1fAL0H5HLsfffff.UHAWAVSHHdH%(HELvL$6AW)E)E)E)E)E)E)p)`)P)@)0) HuHߺ ?H}18JH}Hu ?H}HP?txH5HPBt\LiH}HtDH}H d?t+AH LDtH~;u H 31ۿ1iAedH%(H;EuH[A^A_]!lHHt1`:UHH 31xA1]f.HHtH1H@UHH \31xA1]fUHH]b3fUHH]bfUHH#Ht H]01]fff.HHtUHH1H]øff.UHAVSHH~Ht6踢HtGH{HuI,HLH{HtEH[A^]7/H o21xAH T21vA1[A^]UHAWAVSPHL~HMu4IL MHuLH11ۅÉH[A^A_]UHSPHH,HCH[]ffffff.UHAVSHIH5I^HtHv11[A^]UHAVSHIH5wI^1H[A^]fDUHSPuH_ H 11ۿ1jA@kHH[]ÐUHSPuH_Ht,H0$H 01ۿ1jA@1HH[]UHAWAVSHhIdH%(HEW)E)EHEHuк !H}Hu#H}H}Hu!H}о"W)EH}Huq!H}HMLH1џH o61AuH Q61ۿ1A\dH%(H;E?HHh[A^A_]M?L)HHHL+kH}Hu1NILHCMDH>HL-+H}о!GW)E)EEH}Hu3 H}Hu H}Hu$}HMHHsHULE1"6H}HECH 41AjIH 41ۿ1rAb.IH}H 41Ay41E1H(L1-H ^41A@HsHSHHL5tKH}t H 41A踾Hn.wLNte@UHAWAVAUATSHXHdH%(HEо :W)))))))E)p)`)P)@)0Dž,HDž HuHߺ twH}H tcH uYH}Hp t@HpH`t$HhuH`*H3=3P tqH 21A G1dH %(H;M_HX[A\A]A^A_]dH%(H;E:HHX[A\A]A^A_]T HpHoH [HxMH}HP 0LHPL LHPLgHP11ҹ*"HXH}H@LH}LLrH}H0H,!NH}C,tH8H08ƅH@HHHHH@HHHHHEH9H-H%IHfHnLfInfl~~flfHHfvfvfoЈTPʅta1%H /1{A(H /1oA0H /1A5E1ίIIH BHHGHA)pHxHt)Hp8u!HpHxHu1HCsW)E)E)E)EfEH}HeZHxHpHuv@1HHHHE1()pHxHtHHp8u,HpHHxHtHp8tHCr V_1W)E)E)E)EfEH}HHxHpHuA)pHxHtCHp8u,HpHUHxHtHp8tHCr1W)E)E)E)EfEH}HmHxHpHuSAE)pHxHtCHp8u,HpH HxHtHp8tHCr1W)E)E)E)EfEH}H*HxHpHu;LL1LHHHE1H)pHxHtDHp8u,HpH HxHtHp8tHC1W)E)E)E)EfEH}HBHxHpHuS H@)pHxHt@Hp8u,HpH HxHtHp8tHCsb1W)E)E)E)EfEH}Ht,HxHpHu tHHHu'-H *1ۿ1{A]HHHHHHfUHAWAVAUATSHdH%(HEHH>H~ AIW)E)E)E)E)E)pHuH L.H}8H}Hp.MI<$襶HAL"HpLHI}DuaW)`)P)@H}H@-I4$H@lH})[DI|$W)`)P)@)0) )H}H@-H@H,H1d.I4$IT$AL$HE1H}(H(uZH +(1ۿ1A8H (1ۿ1CAH '1ۿ1A苲dH%(H;EuNH[A\A]A^A_]H '1ۿ1AH '1ۿ1A YfUHAWAVSH8dH%(HE tYIW)E)E)EHuI+1ۅtPLA HH}.t1L'1ۅ H &1ۿ1{Ak脱dH%(H;Eu H8[A^A_]CXUHAWAVAUATSH(MAIIH}dH%(HEE1HH։1E1LMHtGHHEHuH}HI.t%HMLLDILMܛE1H9AE1dH%(H;EuDH([A\A]A^A_]WUHSHdH%(HEW)EHu8H HH}^aH HH}7芹H HH}u`H HH}u=H %1ۿ1{AeH %1ۿ1AY蜯dH%(H;Eu HH[]^Vfffff.UHAWAVSHIHIdH%(HEHt8I>Ht0ˍHMy*H y$E11A:1MxHHEL}H}IHtMt I>M>HEHdH%(H;EuLH[A^A_]U@UHAVSH@HIdH%(HEW)E)E)EH}1"t(LH}LNtH}HmH}S#dH %(H;Mu H@[A^]UUHAWAVSHdH%(HEHxgHIHHEHUH}WHtHIjHt3HLILtMtI>aLM>HMH HJ1dH %(H;Mu H[A^A_]YTfUHAVSHpdH%(HEHH?IHW)E)E)EH}1!M6A tW)E)E)EH}Hu&txA IH}L)tYH}"tLH}HSlLH !1CA蓬'H !1{AksH}z!dH %(H;Mu Hp[A^](SUHAWAVSPHt\HHtTH;HtLIHCHuIHCHtzH;LIHHLE1J"t?ICM7 H !1ۿ1CA赫HH[A^A_]H 1ۿ1A1fDUHAWAVAUATSHXdH%(HEHHHuW)E)E)EH}1tzL;LcDkLLD1E1E1HtTHHEH}HuHC(t5HME1LLDIE1ԕH9uH}HubjDO2H}A!H E11CA"航dH%(H;EuDHX[A\A]A^A_]BQfUHAWAVATSHALBHt5HIE1L%C'B {HJD{IM9uݸ[A\A^A_]UHHO1H)r H7HO]UHAWAVATSHIIH>4I$HIwAHtI?I$Ht IGHE1D[A\A^A_]ffff.UHAVSHIH>HtƊI>IvJH1H[A^]HWHtUHH?1SH1H]1f1H9WuUHH? 1]ÐHGHt!UHHHQHHHG]1HO1Hr&UHHHPHHHOff]ffff.HO1Hr"UHHHPHHHOf]ÐHO1Hr3UHHHPHHHOP @ Љ]HO1Hr"UHHHPHHHOȉ]ÐHO1Hr UHHHPHHHO]HO1HriUHHHPHHHOP H PHH HH PHH HH H@H H]f.HO1HrlUHHHPHHHOP H PHH HH PHH HH H@H HH]HGHtUHHDHO]1ff.UHHO1H)rHLLHOHHV]DLG1I)r-HHHLGHtUHHH~K]HO1Ht6UHLIPHHHOEL)rJHHOHLF]fffff.HO1Hr@UHLIPHHHOEfAEL)rJHHOHLF]HO1HrOUHLIPHHHOEEPAAE E@M L)rJHHOHLF]@UHAWAVAUATSPLg1Mt;IIL/LLOHtL)I)rJ (IMgM.IFH[A\A]A^A_]ff.UHAWAVAUATSPHHIHD0DrIGHIIAHIG~H]HfffffffHXID D!tLIOHtIHIO 1HfffffffI1H9r*MKIMIrHuH]E1E1L3DH[A\A]A^A_]IA@UHAVSH IdH%(HEHE$1HME1E1etHEINH)r IINdH%(H;Eu H [A^]IfUHH$E1E1H]UHAWAVAUATSDULLwEtAAM[IE;IDua11I7II9*E#MII IIIDL ExLIAAA AHtD:MIIHWL)HxDHHtHHOH)HHHHOHHtHHVg؃EtJA tAu=HtHAAHOH)(HLLHOHuDHAI9HXAAHsE11PAAE11Gd3HAI C\3L Gd3HAI C\3L IM9uMt"MIE1DHG<3L IM9uHw&Et`IA IIMu&Iɍ IIMu Et"AH1HrLHdb1[A\A]A^A_]@UHH dH%(HEMHELD$4dH %(H;MuH ]Ff.UHAWAVAUATSH8AIdH%(HEHEEW)EHLmIMD$1HUHMLE1E1t&D9eu MHEMEIMH)r IIMdH%(H;EuH8[A\A]A^A_]Ff.UHSH8dH%(HEEHHEHD$HUHME1E1 9]dH%(H;Uu H8[]ELGMUHH?ʃu`A11DM9tlII9uaFMII tMIID҃L IExL1Ir" щ%@E 1111]9@ UHAVSH@HdH%(HEW)EHEE$E1HuHUHME1E1}HEHuH9HMHHMHH)HU9E1AHt*DA@AEAEujEA@AE @@DuLHHH11IHHH;B4H H3IxL9t II8tE1H9AdH%(H;Eu DH@[A^]Cfff.HwHtIUHH?Ht-A@A1Eu@@ƀ@]11@ fffff.UHAWAVSH8HdH%(HEW)EHEE$E1HuHUL}LE1E1t~}uxHEHUH9rkHuHHuH)HUtWHu9HHHMI)ILAHEHHAN@DŽADtdH%(H;Eu*DH8[A^A_]Ä<@Hw@{BfHW1HtOUHH?HtHt-W@Ƅ@Ǹ@u]DUHAVSH@HdH%(HEW)EHEE$E1HuHUHME1E1t?}u9HEHMH)r,HEHEHMHuHr1Ʉ AdH%(H;Eu DH@[A^]@ff.UHAWAVAUATSH8dH%(HELO1MLEEAAuZ1E1DI9tpMI9ugE4MIM tSMIEAM HExMIr*AAE D%AAE9Dȸt0HtdH%(H;EH8[A\A]A^A_]AHUHEEW)EHLeIIMD$1HUHMLE1E1tD9muMHEMEIL$H)lIIL$HUJp?UHAWAVAUATSHXHIdH%(HEW)EEHuHUADmEH]HEEW)EMH]MLD$E1H}HUHMLE1E1tI}uCMHEIEHKH)r/I$HKH}H]u E1WAAHtD+dH%(H;EuDHX[A\A]A^A_]]>ffff.UHAWAVSH(IωIdH%(HEW)EEHuHU#1ۅt'}tH}L tH}tM>dH%(H;Eu H([A^A_]=DUHAVSHPAΉHdH%(HEW)E)EEHuHU}HEE$E1H}HuHUHME1E1{tQ}uKHEHMH)r>HEHEHMHu,H}u%t =uE1 D3AdH%(H;Eu DHP[A^]<fDHW1Ht6UHH7w&tHtT1]fffff.HW1HtSUHH?wCtHt9AALADu#HH9v @@փ1]ÐHGHt;UHH11fff.H9t"@4@@AHDt1]11@]ffffff.UHAWAVAUATSHhHdH%(HEW)E)E)EH} IL;Ls1IL9AHHH kIHH ˄xHPrkH5H}5HðW)EHEL8LeLI1PLN=H}LE1H(AW)EHELI8LeL1dPL<H}LH=H}.tII(LBW)EHEL8H]Hߺ1OH<H}HHH8ME1ffffff.IM9AHHL ILHAAI ̄xH}. W)EHEH}IM1)OLeL;H}LH;IHDžxHDžpH}13 t$H}HxHpt Hx H}1dH %(H;MuHh[A\A]A^A_]8ffff.UHH]UHAWAVAUATSHhdH%(HE)ELm1ۅMUHMLeE4$D\zIE|$DHEH}Hubt 1AHMHuHC{tHHH1dH %(H;Mu H@[A^]UHAWAVAUATSH8IIHdH%(HEtGL}LuHEHEH}HuE1EH}Hukt}.H}H}HuEH}Hu,t}.H}HEHLeI( IHHE1ItLLAHHHwAAE}F4E)ADHEHHuLDH$1AHEȈAE9rH}L}HuLE1ELHu!t}.H}LeE1MtLLfDAHHHwAAEoF4E)AHEHHutCLDH$1AHEȈAE9rH}L}Hu)AE1dH%(H;EuDH8[A\A]A^A_]fff.UHAWAVAUATSH8IdH%(HEt'A|$ID$t"AT$ IT$HHHHH) E1IL$HEHMAHIH]H111rt@IH}uIGHEHA|$t?ID$A|$ I|$HXH)H8-H xCoE11BA sPI|$I\$Hy[IƾH}SLuMHEHE1t7H}DTH};TdH%(H;E:DH8[A\A]A^A_]HEHEH]LuH]fE1H}L11mtIHuH H]HHuD/A|$IL$tHID$AL$ HHMMtmLcHeE111LII L9utOIT$HtIt$HEH<^Ml$E1II$HtIt$HEJ<(6I$E11AAIHEJT0HLH0H}HIfHOLFL9LHBHt6UHSP1L9ۃL9H6H?D؅EH[]1L9L9DUHAVSHc_Ll1I'uLc_EhO ]A SLcGLcOHc Ds v3(D);A;ALL H IIIAC AIBAqMAI˜i{AADHLipMiDigfDAAIimA[fEAIMcHEEiۅEAAEMcHi:LHHHLHiQIiIkIcHiQH}HHHH1L9„uHy'H)HL9~dH%(H;Eu11H[A^A_]dH%(H;EuLHH[A^A_]Z5DUHAWAVATSH IIHdH%(HEHEHuH'E1tPHEHuL t8HMH+MHW)QΠEHHHH?H i€Q)AAdH%(H;EuDH [A\A^A_]|UHAVSHHIdH%(HEHEHuLtH}LH+11dH %(H;Mu H[A^]UHAWAVATSHkAHcIHz{It II-IHvdtA0IELBDH1H=`fffff.AA4A74AwIH3Ht@<A?4AwIHHH3Htu"HH8uHH0uPE1rff.HH8H<4A74AwIH3HtH0@ffff.H<A?4AwIH3HtAHH(DEu)Eu$HH AAuEI1A-uIĸHEAE1C<H1fHu<HHUHU@<HHUHHHH9rHtHE1HtH}1dH %(H;MHX[A\A]A^A_]LuH:^HH#NJ0HtIH^AŅMuE1f.LHHIIC?D)0H}N9EI A]AMuNLuk fUHAWAVAUATSH(HXID&Et1A-H}uIƸIE1fDC<&KM|$IŅtJ#HMHuM$IHEHH8Ht H}swlrHEHLeIwH_Cy 5LHH ҍJ JD)E1D9ADE1H]L}1E1HH]IL9tcJ LK&L,JIuHLH#NJHotLLZuE1HEH8u6L r,E1'LuLYL[uA^HEL0LeDH([A\A]A^A_]UHAVSIH1>-HA<0u F xuHHu12H tA>-uH;r[u H@[A^]f.UHAWAVAUATSPHI~tH5Lr1ɃH[tH56LG1ɃHaAE1ffffff.H}|wLmIA<6f.H4HALu)IItHJ4DHEuE1Ht1ɉH[A\A]A^A_]ùff.UHAVSH1HtIHHLr1[A^]UHAWAVSPHIrLqI IHt3Dɉ HtCH4HLAt MtH ILH[A^A_]@UHAWAVATSHw%H 1ۿ1uA^D7AHI9ufHHuHInLHHHt~MtbIHILH{HtRA$KyPHqpH;H 9 1ۿ1uA^H+sMuHn1H[A\A^A_]fffff.x-UHSPHHHΉH葅1 H[]øfx-UHSPHHHΉH1 H[]øUHAVSIHL=Ht:HL0@@$HHIF8HtHЅu H}>1H[A^]UHAWAVATSHtqIL5,ffffff.IT$LL@L(>Mt9MI$vt'I$M|$0ID$0HtH@@HtL1ۉ[A\A^A_]ffffff.UHHtHG0HG0]1]DUHH$u]fff.HtuUHAWAVSPIH'!DIWHLqLY=Mt4MI~$Hut$IMw0IG0HtH@@HtLH[A^A_]DHtuUHAWAVSPIH!DIWHLLHHt H@0HtUHH %:1sAvWH]1f.UHHdH%(HEHEHt@HHtH@0HtHMH~$HE H _%:1sAV1dH %(H;MuH]fUHHdH%(HEMHt@HHtH@0HtHM)H $:1sAVH1dH %(H;MuH]GHtGUHHHtH@0Ht 11]H |$:1sAV]1ÐHtGUHHHtH@0Ht 11]H ,$:1sAU]1ÐUH w]UH#G]DUHG]@UHG]@UHG]@UHG]@UHG]UHw]UH!w]DUHO ]fDUHO ]fDUHG]@UHgG]ffffff.UHH]DUHGGGHO0Q ‰WAG]DHt6HHt H@HHtUHH ":1sA9fT]1fHtIUHHHtH@0Ht 11Hx ]H w":1sAT1]1ffffff.HtIUHHHtH@0Ht 11Hx ]H ":1sAS1]1ffffff.HtIUHHHtH@0Ht 11Hx ]H !:1sAVS1]1ffffff.HtHUHHHtH@0HtHc־ 1]H [!:1sAR]1UHHG8]fDUHHG@]fDHHtUHHHHR0HuHq0]fffff.UHHtHG0]1]ffff.Ht1UHHfuH@0HtHHt @t9u]1]1UHAWAVSPI9BL=QNtLLQt1H[A^A_]UHHH=]Offf.UHAWAVATSHtIHtWIIffff.IIBIHt,H@Ht#A~tGLLЅ~XIF@II)u H :1ۿ1sAE11[HtIuI>LHUADH[A\A]A^A_]E1fUHAVSHH?HuA UL9vH;L[A^]hU1[A^]ÐUHH]UHAWAVAUATSHHUH:GHE̅ILw8E1Hu21ILLHLHUDPIHHAID9}tRIHxtHDgLHEH8HEuE@TL9vHEH8LTH1LmH]H;THH[A\A]A^A_]ffff.UH]UHAWAVSPHIkHt5IH= HHúHǾjLvH 11AnFH5H lL CA;LI1F8u)H 61ۿ1nAr'L1 H 1ۿ1pAtEHH[A^A_]fffff.UHAVSAHH=HtIcHǾjHHH[A^]@UHH]UHHHcҾj][ff.UHHk1]IHW)E)E)ELLeLLH観HELLStH}HU1t DeA)H :E11iAX>E1E1D#L$dH%(H;EuDHP[A\A^A_](HtvUHHG(Ht H@ HtHtNH_@HWRHPHrHHHHHwHHH9t1]1H H@r&HHffff.HHHHwH H1H9HC]ff.UHAVSH@IHdH%(HEW)E)E)EHu t2I6H}t"IvH}tHu H {81ۿ1iAHIHHEHUH}Ht!MtI>I"LM>HMH 1dH %(H;Mu H[A^A_]RfUHAVSH@HIdH%(HEW)E)E)EH}1蘬tH}LtH}HH}dH %(H;Mu H@[A^]UHSH(HdH%(HEW)EEHu軗t4H}Hujt#}tUH 1ۿ1mAH 1ۿ1uAS7dH%(H;Eu#H([]H}HuHS1Hfff.UHAVSH@IHdH%(HEHNt H 1ۿ1mA)W)E)E)EHuHߺVtFL JuH}1軱t+LJH}LtHUu H u1ۿ1vA4<6dH%(H;Eu H@[A^]UHSPH Ht HC(1H[]@UHAVSHIHtIF(HK(HHHIHH1[A^]UHH(Ht] ]ff.UHAWAVSPHHG(LxMuHGHt[HxIܝIHt-LLytL轹tHLNr L连1H[A^A_]H 1|A4fUHAWAVAUATSHHdH%(HEHGLxHt=IILEL3L]I9s-H "E11dAxw4H裛IHt"IwL?tHL p,L葜1 H g1ۿ1|A2H[A^A_]UHAWAVSPLw(tNu'IHct^ uMHtHIFDH 1ۿ1eAG2"IH`r@uM>1ۉH[A^A_]H 1ۿ1oAfUHA E1]NUHSPt H J1ۿ1A1H[]UHAWAVAUATSHXdH%(HEHLaMtJILHIHII7H1tH}Ht.H}u'HݼuCH em1hAjH dm1dAd'H81dH%(H;Eu HH [A^]6fDUHAVSIHn7IHtHH[A^]G1[A^]UHSHdH%(HEH}HuH}HtH}t+H @dm1dAwH&H1dH %(H;MuH[]UHAVSH@IHdH%(HEW)E)E)EHu t]IvHt6H}?tGIvHt H})t1Hu@H ucm1AT&H Wcm1ۿ1yA%dH%(H;Eu H@[A^]DHUHH cm1AT%1]ÐUHAWAVATSH@IHIdH%(HEW)E)E)ELeL萙L1覙t*H}LtH}LHSu)H kbm1yA$H}1dH %(H;Mu H@[A\A^A_]fffff.UHAVSH0IdH%(HE谹HHW)EHEHuL 脄t3H}HuSt"H}tbH am1jAH am1dA$H׿1dH%(H;E:HH0[A^]!4HCHtH}Ht4HCHtH}Ht3HCHtH}Hy3HC HgH}HSHs(H}>Hs0H})Hs8H}HH@H}H}tH g`m1dAHH 9`m1hAUHSHdH%(HEH}HuH}HtH}t+H _m1dAHl"H$1dH %(H;MuH[]'UHAVSH@IHdH%(HEW)E)E)EHu 赛 H}1IvHH}IvHH}IvHH}Iv HtwH}nIv(Ht]H}TtnIv0HtGH}>tXIv8Ht1H}(tBIv@H}gt1H u@H c^m1AT H E^m1ۿ1yA dH%(H;Eu H@[A^]UHAWAVATSH@IHIdH%(HEW)E)E)ELeL谔L1Ɣt*H}LtH}LHsu)H ]m1yA H}!1dH %(H;Mu H@[A\A^A_]fffff.UHAWAVSHdH%(HEHx>HIHHEHUH}WHt!MtI>IRLM>HMH 1dH %(H;Mu H[A^A_]BfUHAVSH@HIdH%(HEW)E)E)EH}1舓tH}LxtH}HH} dH %(H;Mu H@[A^]ff.UHAWAVSHdH%(HEHx>HIHHEHUH}Ht!MtI>IBLM>HMH 1dH %(H;Mu H[A^A_]2fUHAVSH@HIdH%(HEW)E)E)EH}1xtH}LtH}HH}dH %(H;Mu H@[A^]ff.UHSH(HdH%(HEHEHEH}Hut[HEHMHEHMH}HHtH}t(H Zm1dAw@H1H}1dH%(H;Eu HH([]ff.UHSH(HdH%(HEHEHEH}Hut[HEHMHEHMH}HHtH}t(H Ym1dAH81H}1dH%(H;Eu HH([]+UHSPHH=Ht@@(HC(1H[]UHAWAVATSIIH1HI@@(ID$(M(AAIHt,IFHtSAGAFIGIFIG IF AG(AF(I8t'I~8I8Iw@IF8Ht IG@IF@[A\A^A_]ffffff.UHSPH_(Ht)H{+H{0H{8HH[]xH[]ÐUHAWAVATSHLg(I|$t:HtUIA4$IT$E1H1t=HLA2*ID$E1Ht Hu E1L*D[A\A^A_]fffff.UHAWAVAUATSHLEIHIdH%(HEHGLo(LpHqCHMtwH H9s#H KfE11dAUIEHtmEAUE1u0HLẺLHULMJt EHAdH%(H;EuuDH[A\A]A^A_]AEdH%(H;UuPHLHLMLMP,H(EU(HLHLMLMARAu PH A|UHAWAVAUATSH(IIHdH%(HEHLs(LoMVMt]AV1LLIdH %(H;MLHډLMMH([A\A]A^A_]'HMLEHEAIV0Hu!H{AHcH‰IV0Ht~HcAFHHuLMMP H1tVHUH9UuLIv0H}14LMF EN(dH%(H;Eu8LHLAWATH81dH %(H;MuH([A\A]A^A_]eDUHAWAVAUATSHXLEHMIIIdH%(HEHI](HGHE@LcMt+IL9sQH ucE11dA  M'AdH%(H;EzDHX[A\A]A^A_]H{CHL}E1uIHEHEEH{0uI}?HcHC0HH{LkH}HuHUMHEHS0$HuH}LLELM"t"HUH;UuH{0HuL)4AM}t H}gE1EMMtHs0HuL)LL=HEL8dH%(H;Uu/$H}LLLELME1{E1zNfffff.UHAWAVAUATSH(LEHMHIIHM|$(HGHE>LcMt-H L9s2H aa1dA=1L+AGuwI0HuI|$*>HcRHIG0HtMG8MO@AG$LHUHMtH MG0$H}HLM1$H}HLLELMH([A\A]A^A_]ffff.UHAWAVAUATSHHLEHMHIIdH%(HEHMe(HGHEL=LcMt'L9;sLH (`1dA]1L;dH %(H;MHH[A\A]A^A_]AD$HEIT$0HuI}tu+G u1%G zHu IFAV[A^]ffffff.UHAE1]!,UHIE1],UH1]UH1]UH1]UHAE1]+UHIE1]+UHAE1]q+UHIE1]Q+UHIE1]1+UHIE1]+UHI E1]*UHI E1]*UHH dH%(HEHuHULM E1*dH %(H;MuH ]螴fffff.UHSHHdH%(HEW)ELM E15*t8HEHt%H Y1EA@ HMH dH %(H;MuH[]UHAWAVAUATSHxLMMHHpIIHMLmdH%(HEMuILIHIDH`LH EL9v&H ΤE11~Ad| LhH IIK&HHH9s&H BΤE11rAj6 oIVHxH9HhHEHHLHuHU1ME1E1ALxHI)ML}tHEI<1LGHLhI\$HuI 4HHMIB(HtMI)I)LHpHuIHTsLpHxH+]HE1HXIHHLHML`GHuH9xHUHHHHr'HpHHJ<(H9@I9@1HIHtIGD F0 IHuH)HwtI~t I~AE1W)E)E)E)E)E)E)p)`)P)@)0) HuHߺ sH}Hu sH}HPrH5NHPuteEtH}L tPH}H rt7H 1tt%IvH tHnu H vm1ۿ1iAydH%(H;EuH[A^A_]GUHAVSH_H~LIHALH%1Ʌ[A^]UHAWAVATSIIIH HAHtI|$t6H umE11fLK+H1[A\A^A_]HC HLH軿AtIuHBti*IHCMtKHtH HsHS HME1oL*H5LLI^\E1BAf.UHAVSHdH%(HEH^H6H{ +W)E)E)E)E)E)E)p)`)P)@)0) HuI 'pH}1dzH}Hu oH}HPotjH5KHPrtNH}H t>H}H ot%Hs H ľtLku=H sm1ۿ1iAH {sm1ۿ1vAdH%(H;EuH[A^]@UHH]+fUHHcH]f.UHAVSLwL=HtL;HtL>1HÉ[A^]ffffff.UHAWAVATSLLfI<$1HtQII?bM7I|$Ht4IIDMwI|$HtII%Mw[A\A^A_]ÐUHAWAVATSLwH^LeIHZLH!E1uILPIHELH!u'L>IH3LH!E1AD[A\A^A_]UHSPHHHCH[]ffffff.UHH uqm1BA,~1]f.UHH Eqm1BA2N1]f.UHAVSHIH5HI^HtH1[A^]UHAVSHIH5HI^1H[A^]fDUHSPtuH_ H pm1ۿ1lAFHH[]@UHSPtuH_Ht,HC$H Jpm1ۿ1lAFQ1HH[]UHAVSHH?Ht]HxtVHxtOI u&I~ uI>u I~tDH ]1ۿ 1kAZH y]1ۿ 1eAI[A^]I>b"tI~U"tI6I~5yI~h uI~ uI6I~aI~=t=t=u~I>='r H \1ۿ 1fAhLI~HtfuI~uI6I~x@H {\1ۿ 1kApH [\1ۿ 1dAaI~ Hlu'I~ uIvI~ H [1ۿ 1kA{{UHAVSH IdH%(HEHHW)EHuL OtBHHt5H}Ht%HCHtH}HѷtH}t(H N[ 1iAHP1dH%(H;Eu HH [A^]>fffff.UHAVSIHnIHtHH[A^]G1[A^]UHAVSH@IHdH%(HEW)E)E)EHu ht\I6Ht6H}tGIvHt H}誷t1Hdu@H KZ 1CAH -Z1ۿ 1jAedH%(H;Eu H@[A^]&fDUHAVSH IdH%(HE HHW)EHuL NHCHt~H}HtnHHtaH}HݵtQHCHtCH}H迵t3HCHt%H}H衵tH}uH.u*H Y 1iAJH 1dH%(H;Eu HH [A^]UHAVSH@IHdH%(HEW)E)E)EHu fIvHtaH}軵trI6HtLH}覵t]IvHt6H}萵tGIvHt H}zt1Hnbu@H X 1CAUH W1ۿ 1jA5dH%(H;Eu H@[A^]fDUHAVSH IdH%(HEHHW)EHuL KtnHHtaH}HϳtQHCHtCH}H豳t3HCHt%H}H蓳tH}uH u*H W 1iAHCHt^H}HtN HCHt@H}Ht0HH H}wtH}uHt7(H OT 1iAd@UHAVSH@IHdH%(HEW)E)E)EHu aH}12lI6Ht|H} IvHtbH}tsIvHtLH}ٰt]IvHt6H}ðtGIv Ht H}議t1H]u@H NS 1CAH 0S1ۿ 1jA/hdH%(H;Eu H@[A^])fUHAWAVSHdH%(HEHx>HIHHEHUH}Ht!MtI>ILM>HMH 1dH %(H;Mu H[A^A_]袍fUHAVSH@HIdH%(HEW)E)E)EH}1ZtH}LXtH}HH}m[dH %(H;Mu H@[A^]ff.UHAWAVSHdH%(HEHx>HIHHEHUH}Ht!MtI>I"LM>HMH 1dH %(H;Mu H[A^A_]蒌fUHAVSH@HIdH%(HEW)E)E)EH}1YtH}LHtH}HH}]ZdH %(H;Mu H@[A^] ff.UHAWAVSHdH%(HEHx>HIHHEHUH}Ht!MtI>ILM>HMH 1dH %(H;Mu H[A^A_]肋fUHAVSH@HIdH%(HEW)E)E)EH}1XtH}LtH}HH}MYdH %(H;Mu H@[A^]ff.UHAWAVSHdH%(HEHx>HIHHEHUH}Ht!MtI>ILM>HMH 1dH %(H;Mu H[A^A_]rfUHAVSH@HIdH%(HEW)E)E)EH}1WtH}L8tH}HآH}=XdH %(H;Mu H@[A^]UHSPHHtCpH{(舿HHxLHH[]fUHSPHtxHHp)thHSxH=H"H;zH{qH{hH{_H{ VH{`hH{hhH{(蛿HH[]H[]fDUHHp]fff.UHH?]sUHHG]fDUHHG ]fDUHH]UHHG]fDUHHG]fDUHHtHGHHtHG H]fUHHtHHHtHGHHtHGH]UHAWAVSPIH1HH t0HHt IM~HtI~ I^ H[A^A_]f.L1IM LOI LOI UHAWAVATSHHtLIIIaLLHL#HtHIIALHL{HtHI&HLsHG`HHfHC`H{hfHCh[A\A^A_]fUHAWAVAUATSH8dH%(HEЁ'r&H VbE1 1kAHIAHLLW)E)E)E)E)E)p)`)PrjcHH蔥IAAECMt%E1I9IMtH} LLHHHHuHHfHHWIHLH(H=HH.HHH0HH.LA?AIAL8LHDELeIGHDHH'HBMLHHLHHIGH1҈DE1L1H DLetLL0'DMtX HpLLl HPLLUHfffff.qu HHL9HrE1LLHU1LME1z E1HpLHP1ME1QML(LeH}Is1I s1L1H(D(LWPW`)D)LH H9uI9tRHHAt.HH PH1D HH9uHI9tfP0LHI9uMBL=LLHH H DL2HMAvE1L1'DDLE1I1GHHEtE1L1DnLHE1DDf.HfL9s pH@tE1HpLH]H1LE1iHLLLHALLHHkHHLQAD$D;A@LLD2"LLLHLLH8LH(<LME11LLLI LLHL0LHL^tvLH8LexL2HڹMduAMLAıAH MDDH zE1E1RE1E1PE1E1ULtL8LLHtE1H1LH(IH0HHIžH8HH8HH0LHMQtuALHHL L8LLHxt+ALLHH0IMbQE1uE1 E1E1HHEHtH H L6^dH%(H;EDH8[A\A]A^A_]H}AtLI<$cI|$YI|$OH0I$H(ID$HID$I<$1HHyHHtHHHDHH}fDUHAVSHHtm@pHx(I脳LHxHH;IH{IFH{HLINI>tHxHрu H1[A^]UHAWAVAUATSPH IHL{ MucIHHSL薡E1t|LkMu1IHH{`Hs(HLtNH HsLK`LLMuet1L{ LkAH{u#OE1+E1E1E1H{u 8E1H{t.H{ t6LDH[A\A]A^A_]E1E1H{uLH{ uLfUH]AHt+UHSPHHHH{HH[]逶ffffff.UHHtHHHtHGH]UHAWAVSPHHH1u&IHIHPIGM7I_H[A^A_]UHAWAVAUATSHIIHdH%(HEHI} $W)EHE)pHEH}Hp3IHBHIL8H(IE`H0IE(HHIEhH@!1H`1HhLX1AˉTW)EHEH}HIHMIUH}貞IUH0HHL@IUH@HHLIMIuMM`HHUM{btwIM1HHMkIUMEhLHuL脞LH`pHhdE1L`Hh1E1-fff. 1H HbA2AH}HLE#I}TL8I9LXv I}6AH(LH]HHI}[HHcHHpHL8H}IEHHp1IMe I]hLLHXIHLHLS<LpLLHhHMSLIMLLXLHUM0 &H@HLLHIHLHLQSLLH`HMASLHhTL 1H FbAE1E1E1sH Fb1ۿ 1eAWLL`LX/LrH dFb 1lAL`LhH 8Fb1ۿ 1AL,L$LH}SHpGL?dH%(H;EuBHHĸ[A\A]A^A_]ÿ1HfHHhHLcL`ufUHHIHHHdH%(HEEH}/1 EdH %(H;MuH]yufUHAWAVAUATSHMHIIIdH%(HELI}H`W)EHE)EHE)pHEH}H}Hp;HhHLXL`I?eI?I?IuI+IjIIuuwIwIUH}Hh5HLX/I}L9LBHULLHIMH}HUHLhO%H`H0IMH}HLh&%I}`Iu(IUHhMMIuIMHpHULEAu`hTvHtfIMHuHp1LhtEH`H0H}H1ɅAF1lH Bb1ۿ 1eAJH Bb1ۿ 1AHhZH}H}xHpldH%(H;EuHĈ[A\A]A^A_]tr@UHAVSHLHdH%(HEHMHLHt1IHuHI>I~L贬1dH %(H;Mu H[A^]qUHHdH%(HEEH}+1 EdH %(H;MuH]qDUHAWAVAUATSH8LMMIHUIIdH%(HEHE|HE1HttL}H}HuL Ht[K7H9EuQH}HuqL9 <u1MtHuLLzruHMLLHULE_H}dLuMtI>I~zLBdH%(H;EuH8[A\A]A^A_]}pffff.UHSPH1H}¹rHBHHHH=HwHHH9uHt HHtEI~HtzHCHt.I~HtcHCHtI~ HtLHC Hu H[y1H[A^]UHAVSH~t"H ۃ1ۿ1fAwlHz uAHL2AIHtNAANH0@ @@H{ILs$H qۃ1ۿ1fA7h1ۉ[A^]ffff.UHAVSHHdH%(HELvW)E)E)E)E)E)E)p)`)P)@)0) Hu @H}Hu q@H}HPT@ttH5HPXCtXH}H @t?H 1At-I H LCtH%<u H ڃ1ۿ1iA dH%(H;EuH[A^]lf.UHHGHNo@ oH0oQ ftoA0ftff1]UHAVSH@dH%(HEW)EH~t>H cك1ۿ1fAZdH%(H;EH@[A^]IHHuH׺5%tI~uH} uLLuA蕤HtYW)E)EH}HLInAF@H{ULsmH ؃1ۿ1fA!H1F{kff.UHAVSHdH%(HEH^{@PW)E)E)E)E)E)E)p)`)P)@)0) )))HuI =H}1GH}Hu =H}HPb=tzH5HPf@t^H}H -=tEH H=t)H H@tL-9u=H ׃1ۿ1iAH ׃1ۿ1AdH%(H;EuH[A^]iDUHAVSH0dH%(HEH uKIHA2HtXW)E)EH}HLI lAF@H{Ls$H Pփ1ۿ1fA!G1dH%(H;Eu H0[A^]i@UHAVSH uAIHA蓡HtNAANH0@ @@H{I]Ls$H Ճ1ۿ1fA71ۉ[A^]UHSPHGx@t-HtXH:wDH nՃ1ۿ1dAVe@s?dH%(H;EuHH[A\A]A^A_]ff.UHAWAVAUATSHX dH%(HEE1z?FIIHW))))))))))pHpIHwIICIL+pLH3H+xLHH3H+LHH3H+ILHH3H+LM!II3OO QMMI3L!LM!IM!H3L!HL!L!LpLxHHHH+HH3H+LHH3H+LHH3H+LII3L+HML!MI3OO QILH3L!HM!HL!H3L!HL!M!LHHHLA$AL$)`)PAD$ AL$0)@)0E1H9H u>H@H֜H98uH\cXH90W)))))))))))p)`)PHDž LPLT4 LL4 LL4LLH4W)@)0) )HHL6H}W)))))))HDž))))))))p)`)P)@)0) )))~-F 1~ fo fo| fo fo% fff. fnf`pfofqffEfDdfEofDfqfAffAffEfDdfEofDfqfAffAffEfDdfAoffqfAfffִHfHH=H1"ffffff.HHHHw䀼tڹHHffffff.D=E~ADGADƄ=VfDE)AsDIBtBƄIIrBƄDH&HHLIHIIfffffffffpf`fPf@f0f fff~- H01ffffff.fnf`pfofqffEfDdfEofDfqfAffAffEfDdfEofDfqfAffAffEfDdfAoffqfAfffִ HfHHG1!fffff.HHHHw䀼tڹHHffffff.D=E~ADGADƄ=VfDE)AsDIBtBƄIIrBƄDH&HHLIHIIH`1HLpLxI 8HM1HLIHLHM<LHHLLI HILHL)LL)LL)LL)LL)HffPf`fpfffffffH Hffffffff f0f@H(ffffpf`fPf@f0f fH0H8HH`(()P)@HhHHXHH(p())(HH8()@()PHH`LPHLLLLLLWH8LxLHWH`HLWHLLWHLL藥HLLuWHLHcWHHLQWL(LLLQLLML-QHLLI48HMHLI HLHM$LHH LL(I2H0HHHL)HL)HL)HL)HL)H8H@HHHPLHXH H(()p)`HL(LHhPLHLHLLLOHLxLHHOHHLOLLLOHLLI48HMHLI HLHM$LHHLLI2HILHL)LL)LL)LL)LL)HHHHLHH H (())H(L(LHNLHL茜HLLLcNHLxLHHCNHHL1NLHPLNHLLI48HMHLI HLHPM$LXHH`LLhI2HpLHL)LL)LL)LL)LL)HxHHHHH H(())HH(HnMW)))))HDžHDžD u4 u1AB=u*B=u"HuAω IAsILPLHHHffffff.HHPHHLHLxLMLLKHLMLPHKH(LLKAIkxH4HLHbLLLHHKHHxLLiKHLHWKIXLLCF=EHLHHKHLxLLLLJHLLHJH(LLJAKH(HLH貘ffff.EHHPHHjJHLxLMLLGJHLMH2JH(HPHLJAAAHH(HHH^fffff.F=EELPHLHHIHLxLLLL~IHLHlIH(LLZIHLLI8HMHLM<3HLHPM,LXHL`LLhM<LpILHL)LL)LL)LL)LL)HxHHAAALkxHHN$3W))HDž)E)EHE)E)pHEHIH(H}HPHHHxLHI4HPHpH(GLEHuK<(HHHH)N4.I)LMO<)HI)LUO$*HPLM)LXLuL`K<.LhLL)HpHLLHHHLLML MHxHHLLHH3N@LH3HpHH3J IHH3JRH#HH3L J4NHHH3IM!IL!LL!I3L!LL!L!N.ILpLxM)N/M)N4(L}M)LN)LL]M)LN4*LPLLUM)LLLLLLLHHHHHTW))))HDž))HDž)`)PHDžpHHA HI))HHHMLHLHDELPLHH+EHHLWW))HLyW0HP HE1AdH%(H;EuDHX [A\A]A^A_]CUHAVSHI H_ C$? CLH[A^] fUHAWAVATSHPHdH%(HEN)Ee)ME$? @EW)E)E)E)p)`)P)@)0) )HYHHu詘H`L8L@I<HhM4HpLHM<HxLPHM$1LH}LLXLM<>LILL)LL)LL)LL)LL)HHHHHHW))HDžLHLHLLBHL1UdH%(H;EuHP[A\A^A_]-ffff.UHAWAVAUATSHXHdH%(HEIfffHDžffHDžfpf`HDžf@f0HDžPffHDž ffHDžoVf)$? @DBDJDZrzDBZJH H JH I(H0L DZ L DR I%I L EA?I-M DJ IM DJ AM DJ A M DBI HHI IH!zH zA L DBDbN4LHAAM IH!L H H AI HxLmLuHHuLeXhHDžPf f0HDž@HDž1IWHf.AȸA1HPHXIHH1L!H1IHpHHL1L!H1HHH1HHL H(L1IHHHL1L!I1LH1HHH1L!H1HH1LPHXH`IM1M!L1IM1L HHHHH H(H0HII1M!L1IHI1HH HIH)HIH)HL`LHhIHI1M!L1I1J)L)HL0LH8HHH1L!H1HH1HhHpIM1M!H8HL@LHHxL1L!K1H)HLLpO7ILM)L`MLL1HpM1LM$HI)LHL4IHI)HHHHH)HHLLHH1H3xH HMLHpHpH@N,:I)HHHK8H)LL0H8L@H HHHPL`LLLHHH`LHHhHHpHHxHHHH`<H H0H;JLHELH IHpHH4KIk&HHHHHk&LHHHHxHIH H(HIHXHPK46LIHH@HIHhILIIIMHIH0HLIH8HHLMIHHO4$LIIHHILIILHIIHHHXHPLHhH HH!L@LIILxLL(L IIM I!LHL8LLMIM I!HL0HpLLII H!HMIM I!LpK I IHHH3LH!HHHH3LHLJLHELLK IH HI4IIk&HIH(HxIk&O HIHHHLIH0HHIHhHO LIHHHXHHHHHLHIIMLHH8H@HHIHPH`LHHIIO,6LHHHLHIILHIIHH!HHHHhHHHH H!LXLHMMLLLL0IIM I!L`LPL(LxMIM I!HH@HH8H HLHH H!HHLII HK I IHHIH3LHHH3LIHHH HHHHHH)IHPLH(J H II)LH0J 8HxII)HH8H H(HH)LH@J 0H0II)LLHHHHHHHpHHH!HI!LHH!HLHLHHL0L8L@HHLPHPHH5HHHHL)HHHH+HHHH+HHHH+HHKIFIH8HHHu H l1ۿ1iAdH%(H;EuH[A^]nfffff.UHHGHNooHoftoAftff1]DUHAVSH dH%(HEW)EH~t>H l1ۿ1fAZdH%(H;EH [A^]IHHuH׺5'tI~uH} uRLuпA蕦Ht_HH AANH0@ HI轰AF@H{OLsgH Gl1ۿ1fA!B1@umDUHAVSHdH%(HEH^{@TW)E)E)E)E)E)E)p)`)P)@)0) )))HuI ?H}1IH}Hu ?H}HPb?t~H5HPfBtbH}H -?tIH H?t-H H HBtL);u=H l1ۿ1iAH l1ۿ1AdH%(H;EuH[A^]kUHAVSH uQIHACHt^HH AANH0@ HIkAF@H{Ls$H l1ۿ1fA!R1ۉ[A^]UHAVSH u@IHA賣HtMAANH@@H{I~Ls$H yl1ۿ1fA51ۉ[A^]UHSPHGx@t-HtYH:wDH +l1ۿ1dAT=H l1ۿ1AJc@ H0NH H[]ffff.Ht>H:w&UHH l1dAf1]HGHNH UHAVSH u@IHASHtMAANH@@H{ILs$H l1ۿ1fA5s1ۉ[A^]UHAVSHHtI ê1HI H k1ۿ1xAxH[A^]ff.UH ]DUH]DUHSPHHNHCH[]UH]DUHAVSHAHt:IH5H5LH LAF@H{آLs1[A^]fDUHSPHGHtEHOHtl1ۿ1uA.H[]Àx@t(Ht\H:w:H >l1ۿ1dA@H >l1ۿ1A:H HHHH#HڅtH H V>l1ۿ1AD]fffff.UHSPt H >l1ۿ1eAUH[]UHAWAVSPH`ٟHt$IHH0E114tL{(A E1LDH[A^A_]DUHAWAVAUATSPII`rHIHH0E1H13Mu(M|$(AAIGIFIwHtI葧IFHt[IGIFIw(HtI nIF Ht8IG(IF(I0Lq7IL7HLH;E1A E1L DH[A\A]A^A_]fffff.UHAVSLw(Mt.HI~I~ ۟I~0b3LʟHC([A^]ffff.UHAWAVAUATSH8L(I_HMwMAHNHHEIGHEIG HEIG(HEI0LIM6ILb6HD$Ld$HEH$LHuHHMMLMH dE11vAdH dE11xAhAHH5HIIL*HI9H WdE11dA|dHHEIGHEI0LIW5ILl5H$LHuHHMMM$H dE11DAlDH8[A\A]A^A_]IWIOMGMO IG(H$LLDUHSPwwHW(H=Hc4Hr~H hd1ۿ1rAZH0H1HAHH8HrHHr H(H,HJ$H d1ۿ1eA舻H[]UHAE1]UHIE1]UHH dH%(HEHuHULME1dH %(H;MuH ]afffff.UHH dH%(HEHuHULME1XdH %(H;MuH ]^afffff.UHH dH%(HEHuHULME1dH %(H;MuH ]`UH]UHw]UH1HPu~tH7]@UHH]UH1H8u~t Hw]UHHG]fDUH]f.UH]zoS Hzoyq)!d$ f.zoYʼnA)A!zADAQADM1M9b1oD$8AADb)A AADb!zoYIbyD9bI`xyyDb yDO4&B19D$yDoD$HB)M8nXB!M8fPBLl$(BLd$0zoiB xyyDB1yDB)yDB!yDoD$XBBzoI B xyyDB1yDB)M8nHyDB!M8f@yDoD$hBLl$8BLd$@zoQ@B xyyDB1yDB)M8n8yD9D$xB!M8f0yDBLl$HBLd$PzoYPB xy9DB19DB)M8n(9DB!M8f c9DBLl$XBLd$`B xyszo[B1ŹB)M8nB!M8fYYDLl$hBLd$pB B b1xyb)sb!bM8nbM8&b I A B1B)B!BBB b1b)b!bbxy0b I@B1B)B!BBB b1b)b!bbxyPb I`mf.zoqzok0AIS@IzoY)b)!b!A)bA!őbb qB1|$cYB)YDB!GBo Bw0B @_Pzob1zoS b)Ll$xH`@b!L$Hv`zoybbb H`HrAxNxVyox^yoxfyoxnyoxvyoo|$(9D$9f.f.@H1H`UHSATAUAVAWwLezoHĀAX L+LqIǀAzo$HzoHMI DQpb9M!I!M)r IsL)oPIog@L@oo0HH1ow AoWYoQd$0Il$@it$PaT$`\$pvLexNxVx^xfxnxvB9Az$wHeA_A^A]A\[]f.f.oazoS MjxyLaq́q))!!ʼn f.B1B)B!BBB Ax<$Md$Auzo$B1'B)oB!w BaG0BW@B _PH`b1b)b!Bbb xxVx^ xf0xn@xvPHv`f.qzok0AIS@I)b)!b!)b!őbb q @H1H UHSATAUAVAWwzoHĀAX LlLqIǀHzoHDQpM!I!M)r IsL)IL@Hb1)zD$p!T$`d$Pl$@ t$0LeMI Azo$H Hb9o|$ zozoYmAzo9xNb1xVb)x^b!xfbxnbxvb zL$ot$0zoAmADADqDzoL$@IDzoY1mIDűiDAzoy0oL$P1DzoA mc1D1QDoT$`qDzoY@mqDűYDAzoy`9D$piDzoAPA9miDA1AD9DzoY mc9DʼnC1DAzo9ŹűzoAA Dűmc DőűsqD1sDzoYAmcDA1AC9iDAzoy0DzoA !mcDšAW|$C1D1C9DCA8W!DzoY@)mc!DũC9A!qDAzoy`űWC9DCA8W)DzoAPA9mc)DA1A)iD9D9D1Dũszo[sQ9c9D99c9D9LeB9Az$wHeA_A^A]A\[]JHR Wf8 HRuf8fff@JHR Wf8 HRuf8fff@IWWA HL HHf8f8 H f8f8Duf8f8f8f8IWWA HL HHf8f8 H f8f8Duf8f8f8f8IWWWA HL HHf8f8f8 H f8f8f8Duf8f8f8f8f8f8ÐIWWWA HL HHf8f8f8 H f8f8f8Duf8f8f8f8f8f8ÐIWWWWA HL HHf8f8f8f8 H f8f8f8f8Duf8f8f8f8f8f8f8f8IWWWWA HL HHf8f8f8f8 H f8f8f8f8Duf8f8f8f8f8f8f8f8IWfff8HL Hf8fff8fHf.@f8f8f8f8f8f8 H f8f8f8f8f8f8Duf8f8f8f8f8f8f8f8f8f8f8f8IWfff8HL Hf8fff8fHf.@f8f8f8f8f8f8 H f8f8f8f8f8f8Duf8f8f8f8f8f8f8f8f8f8f8f8IWWfffHL Hf8ffDf8fDHf.f8f8f8f8f8f8fD8fD8 H f8f8f8f8f8f8fD8fD8Duf8f8f8f8f8f8fD8fD8f8f8f8f8f8f8fD8fD8IWWfffHL Hf8ffDf8fDHf.f8f8f8f8f8f8fD8fD8 H f8f8f8f8f8f8fD8fD8Duf8f8f8f8f8f8fD8fD8f8f8f8f8f8f8fD8fD8HJIAENHoo_og oo0ow@oPDoG`DoOpHHfLoD^o_f og n0oo0v@ow@~PoPDF`DoG`DNpHDoOpH HsL^Df n0v@~PDF`DNpHH€=H rp_g H@o0w@H`PDoG`EWw^f n0v@~PDF`fDIHI Wf8 HIuf8D^f.D;^f _f.^f n0;W^f n0v@^f n0v@~Pf.DHoo_og oo0ow@oPDoG`DoOpHHhf.LoD^o_f og n0oo0v@ow@~PoPDF`DoG`DNpHDoOpHAHsfL^fDf fn0fv@f~PfDF`fEDNpfEHH€H _g H@o0w@H`PLDG`EW f^ff fn0fv@f~PfDF`fEfE,IHI Wf8 HIuf8f[f^f f^ff ff.Df^ff fn0f~f.Wf^ff fn0fv@ff?f.f^ff fn0fv@f~PfWf@HuJAIHI Wf8 HIuf8ffWfW]L$UHHAooE@ fЋi f$AfofofofT$@fT$PfT$`IfT$pI@IP11f:"I@f\$f:"LMPfd$ 1Af:"A1fl$0MHDT$LAMPA1ADL$\A1MHDT$lAA1DL$|Ifot$@fo|$PHJHHf.f.AfDoD$`f8EfDoL$pf8AAf8A1f8DL$ MHf8f8fD8fD8IAf8f8A1ff8f8DL$MHf8f8fD8fD8AAf8f8A1ff8f8DL$,MHf8f8fD8fD8IAf8f8A1ff8f8DL$s?sssssomyDyDaDsss9s>s?ssssspNpN/GH0IaowÐ;f.f.wzoLBZHv@zo-&ZB)HHzorpovB o~A mzoz` DA1B DovAmzorP1DA9B DA mDovA1zoz@9Do~B DAm Do61DA9zor0DB Dov A m9Do~@A1zoz DB Dov0Am1DA9zorDB DovPA m9Do~pA1zo: DB Dov`1DHH/AHAmzorpA9cDB cDovA mc9Do~A1zoz` D)B D WovAm1DA8WzorPADA1sDsA)ovB A WA m9Do~A1zoz@C) DB Do6Am1DA8Wzor0C)DDB Dov A m9Do~@A1zoz DB Dov0Am1DA9A(WzorC)DB DovPC)DAWA m9Do~pA1zo: DB Dov`A1DAHHHf.f.fzot H ovo~B oooHAmDA9zorDovB 9DsHkAmDA9zorDovB 9Do~H%AmDA9zorDo6B 9DsHAmDA9zorDov B 9Do~@HAmDA9zorDov0B 9DsHtXAmDA9zorDovPB 9D~~xHf.f.fAAmDA9D9DAzo"aYũšűssA)!C)DC)A)C)DC)A)A)HB)zwUSATAVAWHHH<E]MU H9AAAADAA1BxjA!A1DVDAA1BVA!A1DVD AA1Bp $A!A1DV DAA1BνA!A1DVDAA1B|A!A1DVDAA1B*ƇGA!A1DVD AA1BF0A!A1DVDAA1BFA!A1DV DAA1BؘiA!A1DV$DAA1BDA!A1DV(D AA1B[A!A1DV,DAA1B\A!A1DV0DAA1B"kA!A1DV4DAA1BqA!A1DV8D AA1BCyA!A1DVA ~fŵsHHFLILVLT$IA%IL^L\$ILfLd$ŭFyn-fB}Yŝ-nŅŕŭ-f ŝ-n@ŕŭF`Cɓ-B}YUA=HHAIHHAIHHAIIHQIM%ia~ŕťA%aŝ%)ŕťA %a@ŝ%i`C5ŕťŝ=HHFI~ofHHFI~onLC5Aŵ%H^IAynoFŝAB}Y~ofŕŭ~onAoF8ŝA~ofXŕŭ~onxA~oŝAA=A5B}YUHHAIoAHHAI~oaIHQIMťa~~oiAoAŝA~oaŕť~oi8AoAXŝA~oaxŕť~oAA=AA5oFHHFL~ofIA%H^Iŭyn~onAB}YoFŝA~ofŕŭ~on0AoFPŝA~ofpŕŭ~oAA=AB}YUA5oAHHAI~oaHQIIťa~~oiAoAŝA~oaŕť~oi0AoAPŝA~oapŕť~oAoFA=A~ofA5MH^ILA%ŭyn~onAB}YoFŝA~ofŕŭ~on(AoFHŝA~ofhŕŭ~oAA=AB}YU A5I oAHAII~oaťa~~oiA$ŝoAA~oaŕť~oi(AoAHŝA~oahŕť~oMAL $=AAnA5Aŝ$ŝsŅŕsŅŭsCŅťsCŅCCғCCۓŵC-ŝC%ŕC ŭťŝsŅŕsŅŭsCŅťsCŅCғCCۓCŵC-ŝC%ŕC ŭťGOW_ŝsŅŕsŅŭsCŅťsCŅ}sCғA=CۓCCŵC-ŝC%ŕC}ŭA=ŝsŅŕsŅŭsCŅťsCŅ}sCғA=CۓCCŵC-ŝC%ŕC}ŭA='o w@`~wHLxLpLhL`HhHXH f.f.fHH1LFLNLVIIMI:ILLLIHLLFLNIMI4I LLIHGLL^LFIMI.ILLIHGLLVL^I MI(ILLIHGLLNLVL^II"MI?ILLLIHG LLNLVIMI9ILLIHG(LLFLNIMI3I LLIHG0LLLFIMI-ILLIHG8LLVL^I MI'ILLIHG@LLN LV(L^0II!MI>ILLLIHGHLLN8LV@IMI8ILLIHGPLLFHLNPIMI2ILLIHGXLL^XLF`IMI,ILLIHG`LLVhL^pI MI&ILLIHGhLLNxLLII MI=ILLLIHGpLLLIMI7I LLIHGxLHLLNMII!L_MII!LWM:I!LGLVMII!LGM4I!LOL^MII!LOM.I!LWLF MI I!LWM(I!L_LN(MII!L_MI"I!LWM?I!LGLV0MII!LGM9I!LOL^8MII!LOM3I!LWLF@MII!LM-I!L_LNHMI I!L_M'I!LGLVPMII!LG MI!I!L_(M>I!LO0L^XMII!LO8M8I!LW@LF`MII!LWHM2I!L_PLNhMII!L_XM,I!LG`LVpMI I!LGhM&I!LOpL^xMII!LOxMI I!LM=I!LM1MII!LM7I!LLLLLf.f.wo-1H< oHv U6HuwwIH$HL1HD$n}o}oJ }oj@}Xvvvvvvmv ev@==v`55v--v%%vvv vv}ozH oFoNoVo^ۀۈېۘooN oV@o^`ۀۈ ې@ۘ`Žۆŵێŭۖť۞ŝۆŕێ ōۖ@Ņ۞`H}9E6/H wI#HSUATAUAVIHHIHIHD$@A0AxEXE` Eh f.AAiD$D1!F*yZAD1AEqD؉l$DA1!F%yZAD1AAQ Dt$D1D!GyZA1AAAiT$ DD1D!:yZ1AEqDl$AD1D!؍5yZD1AAQDDt$D1!G.yZAD1AAiD؉T$D1!F"yZAD1AEq l$DA1D!FyZA1AAAQ$Dt$ DD1D!A>yZ1AAi(DT$$D1D!؍2yZD1AEq,Dl$(AD1!F-yZAD1AAQ0DDt$,D1!G&yZAD1AAi4T$0D1D!FyZA1AAEq8l$4DAD1D!=yZ1AAQnD1A3l$$D؉T$ 3l$,D13l$2nD1AD3t$(l$$D3t$0D1D3t$F-nD1AAA3T$,Dt$(D3T$4D13T$ G&n1AA3l$0DT$,D3l$813l$Fn1AAAD3t$4Dl$0DD3t$<1D3t$=nD1AA3T$8DDt$43$D13T$A6nD1A3l$nD1A3l$ D؉T$3l$(D13,$2nD1AD3t$$Dl$ DD3t$,D!؉D3t$F-ܼD1AA!AA3T$(DDt$$D3T$0!D3T$G&ܼ1A!AA3l$,T$(3l$4!D3l$ Fܼ1AD!AAAD3t$0l$,D3t$8D!DD3t$=ܼD1AD!A3T$4DDt$0D3T$ܼD1D!A3l$DT$D3l$$D!3l$<2ܼD1D!AD3t$ Dl$DD3t$(D!؉D34$F-ܼD1AA!AA3T$$DDt$ D3T$,!D3T$G&ܼ1A!AA3l$(T$$3l$0!D3l$Fܼ1AD!AAAD3t$,l$(D3t$4D!DD3t$ =ܼD1AD!A3T$0DDt$,D3T$8D!3T$A6ܼD1D!A3l$4T$03l$bD1A3l$D؉T$3l$ D13l$82bD1AD3t$l$D3t$$D1D3t$HHs2f.@ofoffo>HHsA0fDHHAofDo֔H)fof85Ht!o?ffA>HHthfDofDofEfo pfAofAof8f8ooWfoffffDfD>t>HH uf.@LfEoRfEoZfEo fEoj0fEob@fEozPfEor`EHSUATAUAVAWIINTIIM)IK$L$L9wfH$L$L9wJDIMI$HM1M1LHIHIIHIHFHIM*f.IJHMMHNlIHIJHMIHM9uIHHMHNlIMH1MHNlJMvKM1LL$HIHHIIHIHFHLT$IM fIJHMNHNlIHIJHMIIMHM9uIHHMNHNlIH1MHMHNlJMvM9:M1H$MJJJDMvIuHHH1M1MJ JH!H!N H JMvIuJtHL~LvLnLfHnH^H&f.EHSUATAUAVAWIINTIIM)IK$L$L9wH$L$L9wJDJ|IMI$HM1M1LHIHIIHIHFHHHIHAHIHHHFHLMHH<$I f.HIJDHIHIJDHMHNlHHIJDHIHHJHLHJ|IHIJHIHIJDHMHNlHHIJDHMIHHJDHLHJ|IM95HIJDHIHIJDHMHNlHHIJDHIHHHHLHJ|IH1MHNlJf.@EHfnl$SUATAUAVAWIINIIM)IK$L$L9wH$L$L9wLJDLfAofAoJNT̨IfpfofoffvgfoffvfABpfoffvfAfoffvfAfoffvfAfoffvfAfoffvfAfoffvfAfoffvfAfoffvfAfoffvfAfoffvfAfoffvfA foffvfA0fgfvfA@fvfAPfAD$@fAL$PfAT$`fA`fA\$pfffAod$fAol$fAoT$fAbpfAo\$fA۪ffAےffAۚfffAod$fAol$fAoT$fAۢfAo\$fA۪ffAےffAۚfffAo$$fAol$fAoT$ fAۢfAo\$0fA۪ffAےffAۚ ffffpNfM$fH~MHM1M1LHIHIIHIHFHIM"DIJHMMHNlIHIJHMIHM9uIHMHNlIMH1MHNlJMv@J̘HfffAoD$fAoL$fAoT$fAo\$fBfJffRffZfffAoD$fAoL$fAoT$fAo\$fBfJffRffZfffAo$fAoL$fAoT$ fAo\$0ffJffR ffZ0fffAoD$@fAoL$PfAoT$`fAo\$pfB@fJPffR`ffZpffffpNfM$HfH~M1LL$HIHHIIHIHFHLT$IM%IJHMNHNlIHIJHMIIMHM9uIHMNHNlIH1MHMHNlJMvM9 M1H$H4$M f.JJJDMvIuHHH1M1MJ JH!H!N4H JMvIuJtHL~LvLnLfHnH^H&f.f.gHSUATAUAVAWgAOIINLHI)IM9r,L)JM@f.f.f.NMJMM)IMBL)HII)II$+L$H9wH$L$H9wIHD$(AHt$(HL~LvLnLfHnH^H&f.f.DIfnhH}N IfofoHNT LfpfoggfoffvgfoffvfABpfoffvfAfoffvfAfoffvfAfoffvfAfoffvfAfoffvfAfoffvfAfoffvfAfoffvfAfoffvfAfoffvfA foffvfA0fgfvfA@fvfAPfAD$@fAL$PfAT$`fA`fA\$pfffAod$fAol$fAoT$fAbpfAo\$fA۪ffAےffAۚfffAod$fAol$fAoT$fAۢfAo\$fA۪ffAےffAۚfffAo$$fAol$fAoT$ fAۢfAo\$0fA۪ffAےffAۚ ffffpNfM$fH~Ll$H|$@MHJ4ILHIHILt$HIHIJDHHHIHAHIHHJDHLMy HI HI>If.f.f.HIHAMv HIHIJD>HMHMnHHIHAHIHHJ>HLHI~IHIHHIHIJD>HMHMnHHIHAHIHHJD>HLHI HI>II 9HIHAMv HIHIHFHMHMnHHIHAHIHHJHLHI~IJ H1MHMnIfffAoD$fAoL$fAoT$fAo\$fBfJffRffZfffAoD$fAoL$fAoT$fAo\$fBfJffRffZfffAo$fAoL$fAoT$ fAo\$0ffJffR ffZ0fffAoD$@fAoL$PfAoT$`fAo\$pfB@fJPffR`ffZpffffpNfM$fH~OLHIHHIII>O4HIJDHHHIHAHM^HIHHJDHLMy HI HI fHIHAHMVMv HIHIJD>HMHI~HHIHAHM^HIHHJ>HLHMnIHIHHMHIHIJD>HMHI~HHIHAHM^HIHHJD>HLHI HMnII HIHAHMVMv HIHIHFHMHI~HHIHHiHM^HIHHJHLHMnII~J H1MHM.HMnL;d$H1L)ML H)KL!H)LHH|$@IM1LmLuL}d @HSUATAUAVAWAGIIMNLHI)IM9r+L)JM?f.f.f.NMJMM)IMBL)HII)II$+L$H9wH$L$H9wMILD$ HD$(fHnfHnfInfHn@ {6 q, g" ] fH~fH~HHD$(LD$ Ht$(HL~LvLnLfHnH^H&f.f.Ij J4LLt.J|L8HD.H|/H\.IIIHILT/IIHHL\/IH\.IIHIHMIIHIIMILTHIIHIIIIHH\IIMIIIHLIIIIHH\IIMIIIHLTIIIIHH\IIMIIIHL\IIHI IIHIIMILTH0IIHmHMHL/IHWf.DLt.J|L8HD.H|/H\.IILT/IHHLT/IIIHHL\/IIL\/M1H\.IIHHLd/IIIIHHMIILT/HM fHIIHIIL,IgIIHH\IIMIIILHIILdHIIIIHHMIILTHugIIHMHL/IHWHLvJ|L8HFH|/H^IIIHIIIIHLWIIMH^IIIHL_IIIIHIIMILWIIHFHMHL/IHWHHM1L)M1LHHGHWLHD.H|$8M1L_O$VI?N,YI?M LWMHIL_IHD.L'IKVLoMI?NYI?M LW MHIL_(HH.H_IHmLGMH@f.f.@O$VI?N,YI?M LWMHIL_IHD.LgIKVLoMI?NYI?M LMHIL_HH.H_IO$VLGMI?N,YI?M LWMHIL_IHD.L'IKVLoMI?NYI?M LW MHIL_(HHD.H_ILGMH@H  O$VgI?N,YI?M LWMHIL_IHFLgIKVLoMI?NYI?M HIHIH_LGfH~H1I )JTL8HL$J| 8HT$IJ<fHLOLWL_Lg Lo(Lw0L8HH@gIH\$(HEHHEIIIHIHEHMH\0IIHIHEHMHt$(IIHIHE HIMIIHIHE(HMIIHIHE0HMIIHIHE8HMIIHHIHEHMII.Hm@H1HT$H;l$fLLOLWL_Lg Lo(Lw0L8HH\$pHE@HIHELIIHIHEHMHIIHIHEHMIIHIHE HMIIHIHE(HMIIHIHE0HMIIHIHE8HMIIHH\(IHMHEII3Hm@HT$H;l$sCH\$pHHELLOLWL_Lg Lo(Lw0L8Hf.H1LIIIIIIIHHLLOLWL_Lg Lo(Lw0L8HHMH1fH~LLOfI~LWL_Lg Lo(Lw0L8H@H9f.f.fLeJLfH~HfH~HIM1LmLuL}LeLmLuL}Hm IIIII!I!I!I!IL#LkLsL{L'H[ LoMLwLH HuMIf.fHSUATAUAVAWAOIIMNLHI)IM9r L)JM!NMJMM)IMBL)HII)II$+L$H9wH$L$H9wLD$ HD$(=Ht$(HL~LvLnLfHnH^H&f.f.LL$MIIIN I fnhIHRjLl$LL$ H|$@fofoHNTHfpfogfogffvfoffvfABpfoffvfAfoffvfAfoffvfAfoffvfAfoffvfAfoffvfAfoffvfAfoffvfAfoffvfAfoffvfAfoffvfA fogffvfA0ffvfA@fvfAPfG@fOPfW`fA`f_pfffogfoofoWfAbpfo_fA۪ffAےffAۚfffogfoofoWfAۢfo_fA۪ffAےffAۚfffo'foofoW fAۢfo_0fA۪ffAےffAۚ ffffpNfHfH~H\$hIbfIbnIIbvMLD$(H1LH|$Hv fL8fL8bfL8M8bYfL8M8baH|$ LSfL8M8byLL[fL8L8HI Lc f.fL8fM8bvfL8FfM8bvggLfL8fL8Hv H[ M8b9fL8M8byfL8M8byLSfL8L[M8byLLcfL8L8HI LkHKHD$IH4MH|$HLsf.@LfggffoGfoOfoWfAۂfo_fAۊffAے ffAۚ0fffoGfoOfoWfAۂ@fo_fAۊPffAے`ffAۚpfffofoOfoW fAۂfo_0fAۊffAےffAۚfffoG@foOPfoW`fAۂfo_pfAۊffAےffAۚffffpNfHfH~H+H\ bH1IbfL8CfM8bnL8[fM8bvL8cfL8H Hv L8kfL8L8MLD$(LH1H|$bfL8M8bYfL8M8bafL8M8byLH|$ LSfL8L[L8LcHI f.f.fL8M8bvfL8L8FfL8[M8bvLfL8cL8fL8kL8Hv H[ fL8M8b9fL8M8byfL8M8byLSfL8M8L[byLHI LcfL8L8LkH,HD$IH+;H|$LT$MH4HLsL9LQIL$H,HH<1M1M)MM HL)HT$@ILmM1LuL} f.f.DHSUATAUAVAWAOIIMNLHI)IM9r+L)JM?f.f.f.NMJMM)IMBL)HII)II$+L$H9wH$L$H9wMIffHnfHnfInfHnLD$ HD$( w m c Y MHfH~fH~HD$(Ht$(HL~LvLnLfHnH^H&f.H|$8J,LL$Hl$+f.fff.>ffGfG fG0fG@fGPfG`fGpHI@uHM1M1M1M1M1M1H|$8H1FfM8L8FfM8L8fM8L8 fM8L8F(fM8L8F0fM8I8b~8HVfL8L8L@LGLOHH1^FfM8L8^ fM8L8(fM8L80fM8M8b8HVfL8L8fM8L8fL8LGLO ^F fM8L8^(fM8L80fM8M8b8>HVfL8L8fM8LG(LO0F L8fL8^(fM8L8F0fM8M8bf8HV Lv(fL8L8L~0fM8L8fL8LG8LO@³LF8fM8«L8fM8£LL8fM8fH8‹BLHv@fM8L8fL8L8ggBfM8fL8H;t$ HHILG@fL8OHfL8WPfL8_XLg`LohLwpLxH.HHHVHD$H|$ 1f.f.LbEfH8M8bMfL8M8bUfL8M8b]fL8M8b fL8M8bm(fL8M8bu0HϻfL8M8b8HTfL8L8fL8gHIHm@HH;l$tWH+\$fHVfL8fL8OLWL_Lg Lo(Lw0L8H@gH1HD$f.f.H1H+\$fL8HL$ fL8HILIIIIIH9LOLILWLQL_LYLg La Lo(Li(Lw0Lq0L8Ly8HLOHfH~LWPL_XLg`LohLwpH|$8HL_M1LL$M8LgLoM8fI8HL M8fI8L_(HH_M8fI8HTLg0M8fI8Lo8HGH_M8fI8HTHI LW@M8fI8L_HHG H_(M8fI8.HM8fI8LgPLoXHG0H_8H@ fI8HG0H_8H@fH~1H\$(HT$8JL HL$H|$H|$8LOLWL_Lg IHLo(Lw0L8HD$ H@H1HLbEfH8M8bMfL8M8bUfL8M8b]fL8M8b HLfL8M8T$(HHDxbm(fL8M8bu0fL8M8b}8HfL8L8fL8gggH@HH;l$HT$8LHm@HfL8OfL8WL_Lg Lo(Lw0L8H@HH1HD$ f.fLbEfH8M8bMfL8M8bUfL8M8b]fL8M8b fL8M8bm(fL8M8bu0fL8M8b}8H̀fL8L8HLfL8HLH;l$sEH+t$HT$8Hm@LLOLWL_Lg Lo(Lw0L8H@HHH1HD$H1LD$ IIIIIIIHH+t$LfH~LOHu8fH~LWL_Lg Lo(Lw0L8HH\$(HT@LLG@LOLWL_Lg Lo(Lw0L8H|@L;D$@LeIIHHfH~fH~ILmM1LuL}LeLmLuL}bHm bbbIL'LoLwLL"H LjMLrLzHR HuItHHHHHuf.fDL$HHWHfnfofoHLH$fpfofoffvfoffvf@foffvfHfoffvfPfoffvfXfoffvf@foffvfHfoffvfPfoffvfXfoffvffoffvfHfoffvfP foffvfX0foffvf@@foffvfHPfoffvfP`fofXpf.fDfffAoCfAoKfAoSf@fAo[fHffPffXfffAoCfAoKfAoSf@fAo[fHffPffXfffAofAoKfAoS ffAo[0fHffP ffX0fffAoC@fAoKPfAoS`f@@fAo[pfHPffP`ffXpfffMfpNffHI"UHHHbH\$XLt$ML|$bxLd$IbXLl$MHVIM1fM8I8H|$xfL8I8XfL8I8bhfL8HbM8fM8H1M8BM8fI8M8HbfM8M8BHVbPfI8M8bIfM8H8HM1fL8M8HVbxL8HHVfM8pfM8M8fI8I8fI8b8HMIM1fM8M8fM8H8b@L8HfL8I8HfI8HƒH1fI8HM1M8BI8HM8fL8BfM8L8“fM8H8MI8fL8HHILL)IMMLLHLHLIBLT$IBLl$MBIBIZH\$I:MbIBLd$Lt$L|$]UHHbVbNH\$H1M8Ld$bfHVLl$Lt$bL|$H|$~fM8L\$b^fL8HfL8fI8H8fI8H8fM8M8HVbffM8fL8bfI8fL8fL8M8fM8HVM8fM8Ht$fM8HHbfH8H8M1H8M8fI8I8HbM8fL8bM8MM8fM8MfM8LfI8ML8HMfM8MM8fM8I8bILI8M8bfL8L8fM8I8LLH8fM8IfL8fI8ILL)HMIMILHHIBMBMBIBH|$HwHL_LGH\$Ld$Ll$Lt$L|$]HH tBAWAVSHIII?L1M1M)LLHJIthJuLMYWÍHI1@IDdI(LHHH1@HEHEH7LI@IELE1I H IHDL HHH7r H> H=HI!)H4HH!L H fHn[A^A_11HHDH HɃ? E1H9HHs1M1HLA?LA?EE)HDHHH1A@HEHEAEfffff.IIIIIILI?MI!I!L)LHHAuMt91I8IpH9s-HHHH1HMtJ1HI8Ip1Mu1HIH H t H1IH 1AHIHMuHHIH$HT$HHl$Ld$H-?L%H\$Ll$Lt$L|$H8L)AIHI1mHt@LLDAHH9uH\$Hl$Ld$Ll$ Lt$(L|$0H8UHSH+RHHtHX[]HHHt#HHc#HH!UHH5,lH 4LpL 8n107UHAWAVSHdH%(HEHH_W)@)0) )))))HDžPHX$Ht\)E)E)E)E)E)E)p)`L`ILH1LLHXuwƒujH&~ ~}ȉ HX77sMtH1uodH%(H;Eu.H[A^A_]IH]uLvuOH5 +lH L[L Pb15H5*lH j-A1`5=u H5\UHSPHHvHH1HH[]H5&5%@%h%h%h%h%h%h%h%zhp%rh`%jh P%bh @%Zh 0%Rh %Jh %Bh%:h%2h%*h%"h%h%h% h%h%hp%h`%hP%h@%ڷh0%ҷh %ʷh%·h%h%h %h!%h"%h#%h$%h%%h&%zh'p%rh(`%jh)P%bh*@%Zh+0%Rh, %Jh-%Bh.%:h/%2h0%*h1%"h2%h3%h4% h5%h6%h7p%h8`%h9P%h:@%ڶh;0%Ҷh< %ʶh=%¶h>%h?%h@%hA%hB%hC%hD%hE%hF%zhGp%rhH`%jhIP%bhJ@%ZhK0%RhL %JhM%BhN%:hO%2hP%*hQ%"hR%hS%hT% hU%hV%hWp%hX`%hYP%hZ@%ڵh[0%ҵh\ %ʵh]%µh^%h_%h`%ha%hb%hc%hd%he%hf%zhgp%rhh`%jhiP%bhj@%Zhk0%Rhl %Jhm%Bhn%:ho%2hp%*hq%"hr%hs%ht% hu%hv%hwp%hx`%hyP%hz@%ڴh{0%Ҵh| %ʴh}%´h~%h%h%h%h%h%h%h%h%zhp%rh`%jhP%bh@%Zh0%Rh %Jh%Bh%:h%2h%*h%"h%h%h% h%h%hp%h`%hP%h@%ڳh0%ҳh %ʳh%³h%h%h%h%h%h%h%h%h%zhp%rh`%jhP%bh@%Zh0%Rh %Jh%Bh%:h%2h%*h%"h%h%h% h%h%hp%h`%hP%h@%ڲh0%Ҳh %ʲh%²h%h%h%h%h%h%h%h%h%zhp%rh`%jhP%bh@%Zh0%Rh %Jh%Bh%:h%2h%*h%"h%h%h% h%h%hp%h`%hP%h@%ڱh0%ұh %ʱh%±h%h%h%h%h%h%h%h%h%zhp%rh`%jhP%bh@%Zh0%Rh %Jh%Bh%:h%2h%*h%"h%h%h% h%h%hp%h`%hP%h@%ڰh0%Ұh %ʰh%°h%h%h%h%h%h%h%h%h%zhp%rh`%jh P%bh @%Zh 0%Rh  %Jh %Bh%:h%2h%*h%"h%h%h% h%h%hp%h`%hP%h@%گh0%үh %ʯh%¯h%h%h %h!%h"%h#%h$%h%%h&%zh'p%rh(`%jh)P%bh*@%Zh+0%Rh, %Jh-%Bh.%:h/%2h0%*h1%"h2%h3%h4% h5%h6%h7p%h8`%h9P%h:@%ڮh;0%Үh< %ʮh=%®h>%h?%h@%hA%hB%hC%hD%hE%hF%zhGp%rhH`%jhIP%bhJ@%ZhK0%RhL %JhM%BhN%:hO%2hP%*hQ%"hR%hS%hT% hU%hV%hWp%hX`%hYP%hZ@%ڭh[0%ҭh\ %ʭh]%­h^%h_%h`%ha%hb%hc%hd%he%hf%zhgp%rhh`%jhiP%bhj@%Zhk0%Rhl %Jhm%Bhn%:ho%2hp%*hq%"hr%hs%ht% hu%hv%hwp%hx`%hyP%hz@%ڬh{0%Ҭh| %ʬh}%¬h~%h%h%h%h%h%h%h%h%zhp%rh`%jhP%bh@%Zh0%Rh %Jh%Bh%:h8# ZL  !   \"6' R%Xn8iC:%&0H  0KI 5 p>  Y@j@חA vH7B*H=*H  t*H8+ep+enc[csc~ccccnbbbcbbo@ o_ (%@  6 co5h8  (o0o4o&6FVfv&6FVfv&6FVfv&6FVfv&6FVfv&6FVfv&6FVfv&6FVfv&6FVfv&6FVfv&6FVfv&6FVfv&6FVfv&6FVfv&6FVfv&6FVfv&6FVfv&6FVfv&6FVfv&6FVfv&6FVfv&6FVfv&6FVfv&6FVfv&6FVfv5ff ?P"Scrt1.ocrti.ofcrtn.o:RU.init_array.text.got.got.plt.rela.plt.init.tbss.bss.dtors.ctors.dynstr.eh_frame_hdr.jcr.gnu.version_r.interp.data.rel.ro.rela.dyn.gnu.version.dynsymmalloc_hook.gnu_debuglink.fini.gnu.hash.relro_padding.note.ABI-tag.eh_frame.tm_clone_tableprotodesc_cold.dynamic.shstrtab.tdata.rodata.gnu_debugdata.data7zXZi"6!x8]?Eh=(aK"a4`3i-5sK nR!R<"'o$U%XS+Vzsl, *J@G<JټXJLԹxڭܕ3r0$ 89}vvS K~Y([uD}a{ w!2|݄TڦKtng-c -$ BdW!!Tк}λd/{C,.L)Kvm~LBT$/GQ!(Hq35`qn%XYYP0Rjt͹푷E(%Ȭi(TahNw7:MNu nU4=~W`,cgS"\n1q793Pit;ѽ~h8'!:!bzm~%sm9 |=6)E4GW pvc -O&uc!kؾk7WUh>%&5hXf_{CV}@鱸 ҹYݒc#_`'b8+ g `̍K6I K4yш>E0t 9aYuQUj-%7U42SJcEZwrXn*.F}}`,-cХ[:8j98#!jx߁8n4w!uܺ-H'ƒer|1 W:RȁK񁡘#@C8 p۲IK|Z]5R9 >K1wMH6gh]>Jt)jKՕno2"oSW V+~,gh;)>rsmRLjy'-+$]ԡh9K=Z#n!zg~#3a.vl[Y+gXFTjIcYS`ca@'VZtϸɔHOAXd6p Ԟ!1ߌ)#" q- d[R\؉ť?ZXn8gp'; i ~ba5Ƽ[kn3;a F(Ж;.mř3v;~avW/$~Fà1p-fy3wW4>q5`m-QHnhB"9M"a{BܥIGcЖ¿m}fs+{w=^'VtRr@}̦E1RAGJN5xâ7`K-+77H6Kڮp!N_O1wM'lFd[M[47]\[p(yC*r (DEKJCg3T.$~ t_삍6,/i>uʷ<>fVMnpFλ'_v ?V 9}!hoO}cȴK&<8k*+r|i}LJuU[F6~t?8VTV'#IoCuV=UqzŗV;0V!eL/* .>+lr-TM! rC׷ wfJe%,-{%Tʅ f=E)FiI}oA#?.7yJAǰE5xXkbvZ{2_yvda X.nsn龪\1+aBnך0g)ߛxdLD[%Zas믃`d}H\λ vzY-ҭ pA-@T~EqDi>_IoPrteZ̯ )ƥƞos(.u"/Ks'G)1D/[Jx3egx,@YIR\ydH/qpV\U0p)7%C$lp CN\ridS2Gh$Vߘ4Y췡1Ycz!9fdI ^Ki\OաnBERuw[|מU9hG?iṀͪ99p!~qњ_=wV~8%ucu2#HpFsg"Xy*S~txj7ڹkQt6K Q: @2[s"ui1h~so{ФnrI5udLn\rR H,tނ)Lmz;ww7 `1, FRz5 Bތs'kNq0`͇` *7:wFNDaxtէVZ|\5Amw+Fa ;<94PB߈/3ePURu+!i%0ym@z=TKpE'C΅`DC .("y2a^pǭC瞴u Zr ]B|^ Ϡ&]$1yJ-,ܖuT͆6fgL T 5]|~Y t%>T"1QE"0v-=H؂Y(n92ƳN/TI[Hd*!Gĉ((vd ܞ_7ɺGP;\}O3rq+jrETJP }>re0ư<ĢŖ0HD~97S0 ~;OksЦE΂0eĜ8XHn vfj!M bJk7D^V_ll2Cut膻ŹD?kKy]*XeB00 vz5,*T_㊹DjLj>|AZףd[xEMf/~I?397ynK M (H:bEV(Љ^vLQJXkkŋ8GJ*X Kxߺ{qEઇ!|vtp"oLcQʎh&i[;?Nq"R*ad3W4q& jQ; ((OBXżRL`;șar/!.8t E@N}=; N QmrC__tyy}_Lca/I=aͬdogڏV+~ H&1j(SY;)OD{WEb}Ҁ L&_)=6@a H͊]T^(^>[!Kvb4>oP'b {;E0!Ue! .i N  Y/:y# z&n*b)"`@*2 xqӭ-Û|CD7DImACLx?Jg^G:=bHuYy*Ip2ԣ09nFDgU+7XvjGXJhSl@YA#H jY? uCÙ :Fo&'8f52ډv$plPH ~X")z3'^~ߣJ:y Ed3ؐl}@Ίuj찞z?.JL`?b)dnxPOiWa 8*AhS2tSM=kfy.)S[Hi](>S ^|xw*6;qT{r'{MǤ, k@4缰&W2J 88jVV|ݢ㥺r'aTaWKa^k9գ}^ W:i"(U@ {/2e7ɂ{ =ȧc4^6Z'b4J=Iׂk1F{Toǁw%Cgn)!6EXW9CU(Sxi7w.VG\.L_2p3471:Xʚ/E'h8=} wtDj CCjA_0j9AWwpXߦq=қP< Y<{9 Hi͸WOMTF&zhÑHyB)[l8Lo)=RI͂Q=欨ѢE~nmX0!h.* ˦LYxnΔ @NR? gp#D](M O@V?O[2o=8h e#O z2t!32?ݔ7k} rH wHr;@'c^hIU Q5}\Ge\cNY^/MK!"ײɡDO_~u7;B7 L_&"e-QTwQ! y"\^&NKQfH[F/µDK7o IoڳLM {͠*/DVVjl8M^S2mh7K礕 * eB]ȣ/Y/O}U8F@qJu9dWī͖ܣ:Ԋ2gI1:JhzaW^0!2Rl){T c)8TdtE̤=K}v ރa®/v$eՂ5r]XH`VRJpr3 `lv{l edi=E?MY&WEYq|9g" {Ulw,!v"{׆N曫*aɝ+K1N{/)GH)mO<f`Rvq6MbT6Aӆ5"%wRE@؝C1i_0Nxћb}K=k~9DE%tsyL[.0XmjCA\,PC\l/hgi}&w,?2\N:&2|~{k3ÍӘJkx$fD#Ur RRA4Ak -)o.0V`hIkNͅd`is A쯠洄҅ޜAQu.9{6esV%)q huCBtc"anZ<é_hOsEa#-8n-.1k팾/{{6/?Z W` Q $}4e!7n6!A܅\W\D4I,hi&+H=>2]_gs7󧱂m$6GTLjwx-\$TQDfc64eȐJ2pp &bMk=mP,Fe9 %k*OϝW HNϞMM.XZ¬L~#=$^3>~jd[zo PJ@sx]L g{\TSSl,}Ppӡ2A#=hBh\L3j0KBCѣN>z}!x~]!F5) aQ̈T!w3"=Gi@y[2ՍvmdNڹNPYWH$vAX zE}/TAMs*3X掄p+J{NTҍt޼6D_mʳ ӏ-^r[I ~&غe 59*`Ʋ'n\Z`1L `91zY9i Q[7^'Ug+CxZ3abk;I}{5k4`~2 Y7gU]hvC`NN, ¸+Tw@%]]q 09Ы!Z'yKtMLM?)㔹Gbptld6:ԔSSq_XCT sqP{x3)mKz)@ZM]6SOå [khQSR6܂61|c [Au<!b)Wciʶߟf|&7<-6I ïw5n MQ~=Jh"kbӪ&Ax L&Lז'*1\< Ci&>왬Xh*V@%x`Pxis=GΘ@ԆAYr//UKhuMY8.)@Sp4EF?Wo]lgpވZCt,3bL/7䝭C&++Q$r ZPR:Pi-"x#t:2?:ҿ"q.(50DZ0þĭl&v cYui6H!3V(!䯖"m/" >%hEoﺌ\KDh5zEa 3fIऺ¨6"~ MX5DЍ" r< )ְ+7|eq~Pl8"A"K1?R<5njP𑡤((\0YR>5_YT aK.wcشec3sX]Gax0/"ԩlA$|dDdubA aXs$u6r"rv -kN,DAQ5ỴHS r aDv@%AЩ EE Wg/8;Ϫ`#Mf7c+iߧZ6]-Y27'e*qraUBb^'Hi>>?przߋo;0#a 0v &:QpG<~SS3"#YR,Ϋi3fjkhPN G&"eq8z(Zb fAu+7bH/8:3ʌPƛ]Ǵ)w; *n:|P,M0]ob$V}I,SU$ALLyPRvPJDB_]|E:@YӦ`]o?礝,OMLbEyǧʟ׵ȸʇX^Wu*9XpcT=*^8֛#3=2wvu7ij[@AO"tE[3ӑ7BX|"710e J[)W{iDQ/H.ý}"<Y} l7;VXҹZL^-'}ا*vֹSǚ/B9R_mw[]JFCqTpH]>JawL?5HYkɝ\홱}g#ѰU,zхJ4jeGrTC# M/;f 7,<o|H[$|a#,NV"98)]jLL3OyafhJp's o*R ~؇-|S) hf_/Z=2+UJPJŵI}r8gW KW )feq rE,ė~RbY=/RO"nE#M@jǭEߛRG Ǐ)\GSD+[4x}"I"Kƚ9l=+:ˤo;>33&" {HEJY(*ͼ56%2CiO8{NDfpanƯE<}Aۄ". n PdڻnMn9{$n#gִ ,J͆haN`Ofֺi<@K0O\C!/hzbP6즏b ]应&;zWHyix2_yE;R~R'­R?:FDg9:żѝ ֍plcn)fQI8>iC(6(SlYp~Lxi}l%/Mp ^$#Kf)*ꍗٓ\4֚|m'PffO±YQ(f}'eExw*#WVZ -j/M,֘H5mm<"b`xSk Ɍ =V)$KK/rce%@8h28nht9YN)wԵ 6HNBM<KAy dvH cadS3$ PM'k}MHY"= JL!Դ@7֜Y3RF-)'tIN:$!j!e2dj)2Kn5VnZ ވKE->\@/I8Xn9i!͆m '%}Aǜ)nY;X 9ߩwB8X&-_2t8Sšy p ͘%ĕŊeS_O-ڠ& MԌ23BNq1TAqaeH`Ҹ{ztb}9esˁD:OHr&zc2^w*kg!DY=rGi 8PM֧?EU٘!MipkzRܽ5VI5f<ɞi Pq.M>$)na K!k.D[?9,* ]_nzCNlؒ8lFsN ƷA^Sw;ACkcpJwyiN L8nrGr`h^gFtGW.;b#(Q"!hk%gPτ#vv`^q)Uy՝TniERz8+gT?WVгi]_YsvgYqzVsTcgmeg?4e2*݅$HHG%"(M4PDH{QT?^Oa83il'Y 3WvجR5mˮe-"˪U1ZJe`{DhYDY!hCHw^B*9S6mx!+NsTITH 쩩9MJo!7W+E7Z4U*+5/(:K] ^pOGطҔSؔAT"&߮S9+6'x".𘟨-4"wp( AaHF B7۪M4e7RƎuxHp;(^ <ӫdq= S4(DҳeP )ch*cD;2ȝz{+}Sиy+X]Ǣ)Yi؈sH ߟo=os$a A`ZJ&ct%i _nPQH];Z]3t[(.YG&iՃ$L},!ѫVM9+'ZcmY**tYS2 P{},7[Xg]z=g Jn\lQEkkE jA{v8*.eYw/9a%&DaZ-r3, ң*C.&O3mNRYP~M%Ҥ N¶{!8:t9tnh7#ܟܙ$Avc')ӂ}V%7uLv*BhF"rjõnP/5OA]>BrȡrmK;bŒTZ)Ót?@wd pݕS Ursl^g~2^ @}e.a+ Q~%ba|/i|efhp:ZIr]c HE3*$S]^-_5^N%i V*v'+5i@XAAtLnkvϡʕK]XTnHf OJa$P 5^Qff77迶qN =r'% @PeX*Еh޹m\-,7,֊ƃT:@7 ui )9g* :xʧ,?c8`A_W&mBYQ7ݼt3&c۩ZW+K&D Dl3J GJ''2a71ِ4v1t'TU0 a:jJ48 Rtxu̳;\N ;m#6Ar񆼎IWFx^naCo2tƽrCs7hN` J .!5@iQz"7ϳI=!<#Za3*?})*U3qwzhcSPVJnp*H[9 y@Bq>xBwCڤdޣYJގ*;deg$TL.U:M8xrJ$Ӎ_nĻvi:0Ze *P}͍5|;7U/j ?wܑ}#0A\łlm鲱zOP5>.;ķ"Q? JEI_WwcTN:ْoQ+Zu>>86󇀥CrE@CSHVGԺ8%ɷ"\*u[,۟/Fj:d+b$M<;B*d bŜ_Ěޖ&14Xe8uV3Yb"IV|J4/j˒8¼'X>^gp/67m/B?~y[Gc۰dA`k7nׅ):޾3R,PL%Q.BJ `澦d*O!hm/`UN;~bV+K|)cssU4@'*J*`^EE/&>ߘ1+G-9z`շ-itC 7WjHj$|B/ |yKz+_+ MS8oFY"ec\:Xu׀{>4oFi!F숋3z rp8[&bR!tm(XAlz"҃{rC#MnHF}h&CvD[Qq72l0-\tE>9](ɹ29e.݄WRȗe$WF ^eyu}6!s~%/CIA,\yVL:<2踴rxFn4w9s!+ʟ^ bD陼i*3a-?Ҭ).Ix;|Uo{6ǂ! fsC>FdBQa6]؊\\:*q'?>%ОsΙ:~9L.hv`+/iͬ R['{OɱMڥ:+ۘwiƮdȸGE8+첳x MTuYmC^O6QLtZKnumX7alNp6Yn3k\lcy]Ϸʐ$)3hZK J!I !b dzF R<)H"$b3R)P{L*W u-v N<0nr٘M$lC# q" aoMM? QBS*n,c,~#s'"ۙl/@cMEhNkNDh]1Ӷ8Qb  '>[-,'&lb؛q(c޲xwUv,T <+ʋj+nd>@gֽζm }4Y"?gĂu0 ql[E*` ~]f9M[3 fԥ ze@8Hq[=rۂ;PRDI egW+m('76m_95btoz\Ddž-OOSG*+؛ Q*ͽp#}!x]4 ?n su{XyG Us.k)`F~*ʅzOS3gZ SH5'a *$'f;>~)aʙ4 3 I`Ӥ=`3m;FPH/BPcFu4RpٯNH'|6)#!pmLkc ?.IP\lW5_:XXy^ >TIgP!TҬw- ^U+Vr*f-P+F:QIf$wq$v{CƐ\9e; ^P7jsH/p{o q:tZ܂LV xRBB'#=RB̖0b2ҥ; R0}mڗ] }ZS5WnooN RbH;GK`i[Ǭ ?N_U{(꧃B{;МesQeE~eըO#a8|Lc_v,?{i=k.I'כl;[bK;七y%;[Ʈ^$ga@4H@p[/$c.oO\ߑy@ah ,I#yŘn*CEh>mIA֍_h9 g_栍s)J)tfzpA/zZƼ^o. G"`v5wK%F!IOȈgےl}tqAk(9'tCY^pjCl6B}kL1-}y5g}l%o/—S%kRqƌrAfS Oo\/~0g]qqnuX<)RLxj88c]u`')@,rbP)XU5# 5Zm5'㱨\PbAd6>WϽ^P<`'ΒӧxZ| 3 Z`CNHT8z]pNg)7EB]Q(zyeTya:Y4['߆)弾.2t >S @cX3hbda5mE2{$]Xv./y%0vigeR#R [gF<8 ƶ>'0Pr&D_n ~;zZܼQ4nSNA%K9CĒ[zʮ#r@$y%S1f7MV6/0X,wX<3$;SfOϬl*9ܢ{QG28?X@U!2rd_纆n!P!ڙ]=X6([y!dq/a mMHR3 ʴADR_A i"W'UH}Nkp6$gu_ 2z3'Q'jbh &_D]}f'@pdA P<\څ"7p:Fˮ)I6~P퉽kҎ8άa6idb2B_w"5[P9x0`fnEuK7lfUݮRt,t~]#6P?H9 idMn+)DN$^JDXgו/eazY39; )V.0;ySn+RET- yV {Dg^ BKxD>'WPuǭEƌ[x<ĵYoR'F#etki%$ȉY ZÜ'j ' x$"Eh`-c6:4Vm/ 3xdlƑ1 {iXigF֢4D TI0BvvBm$Ĺ_/>Q[X#/ћ4MhΐTjD`t9死_"Ov~VЮT3-[M2eąk}lQ0oޙNݳov58w6oV~5!.ykͰ ^:G!C1c@!ƆyzD3I@kAL~MH噤clgu79r3 ս=nʩ9ks琯PvK2 -Lm s xj8>Iͺ> g`GIN N dϬwU/m]sY駘&"fץ57j/R@]y Ѝ=I$o {r=vƒ u$cN5 mKӐJ=jS_)핸P-, ;k_l &44 (|gc |4Gd( 5l|Q)L޳`v`t eђXٴi<ξf7#Ubm1?KSJ e"Nl>`ɺJ\f$gUm4VrtAVX & Z!ʠ55I6TtL0t{݌u奃6&;4BNikv wv7JŒ[fV C@ &~?)͒ Є\0WAuu؈/t``yQMB9%e|Le*}v|\YC gz3o"SՐՄ(^|ź6/CIRjO7"'x5͆co؛|%ۺȌ+|l')$7+H%OF /{رwSw_:}F9T`L՞jA<+ Y YHC@qT;}~/T9wA3E {,Ҫ!1?fEJ־8 ki>fp\ҿ apf^U9ݺ"VDz8 -~/to,.G@8r7u}3-`{tColfBP^f5%-I,wƼVϻx !x]eyey-pMoXJϴvlc&iz{D)Z"y:O(,9`̎v2"TXkXvrxVi) q  V54Q_00Òl:k+{?ľߙX̊՗/i o@jDuZ9x}I@f u*1:{&n=䮰ag(%[˔:YM5/}yf66p,}*rcs':%s+GmBBX-'::{bZܦ,wGg2 YT:0) d)cf P$bBtY&Rzt*ts8% yCtK #; Q<ÅFghʦEW-p)I? "JckMQiCi$x^c_26 ZuA)f |whj|mA*YC? 1TMs:^G[e'R ZPM bcmbrX~Uv18\|d( ɀ2〢LTQ ϼ7JQ<>Y[A+2()K{K (HL|JekvnA:Y|v0x>{TVzHa셗UJFT鈏y^7(MSH\j[&9 S޼,t9A-bCTx] ԧk 9JW4' (hc_^ngo++2tteHch]0{PG%^5ذ'eұYXכjNv~ًӛ^:q.k ˿#6 KyqhfKA qvy-czxG w#i`r1XE7Or1װR4rfJqdbvcSq; ×/pZ _QV?9CCMNIm֐Jp^ߞy G~}^6#e~ZHH} nUtM|U夎G .=xg.?*E)cX_=bS6eAu!Hα5۲&+xDO5]e$/iM\97Ϣ6md wu}yZ[5RX:_]#cNz7WMͼʂ?m5)M lp}+5hw64*PǾmȌiQW?kӇ wj((9<_SC+p; z)!x]2|/^Q+}!lM8!ZMՁa&k~=*i|KӔ=_)=؋}_7~=(?nQg[ݏZ$U[`ğO/Ou/D{D=Gtg-_![jS7IjyhN5`!1X"ngNŖl0Nb]%,9_[04]=͎zVLdQ)D愞{#z|rc^S4#C(l=3$Yq_ElAw1ieɈpt'P3nRH>3 _ F/ag")I.+wtzq.Zr%e`j] h~wgmgP#JT!ǥO]1f%kbB4GtJoPyrKYx&; pɆ ",(<*%x%q$ѬR kRb$GJ kO/uhI腓:g I2 (.=l_Zyul\all9Bɨ|n(cܐ:튞*dRNkKtk@k_$>/nX68c*܁x+2o bqwaP'yߺʍ2`d08 V&rI;r `~9-)zb4-z%Li^`J7^MԥJg7i})x҇&S8Go>]x%h;+ݍM{4+g"i)!V>ݔ({8` oĝpNF621 `ʏGzBUG 2`8TҘ.2qְyZ8+Myfct9<"Dk?dX@TnR#C YRI]ɺUwձ CI=21W ! 7L 3Dw 2:D Xݸƍ$nG@#8F č> N ԅ~ug.iWtc/G'w$<uZ j5JcBɫJQ`Ȯ~e[e9InOsSb)9"Jo0I LrVv7 مTwe}#AT}͊gXI V,ɗPY.|>9 - S?RݗUL,ZjJ{ΧRH)X6-'+hD:=>R8cdZHsw"#Etk(CCGI5\p %7}oK*~*TQ c&MZ@ [{QkdMC\L߃{Me{&ykUMOW9\f#Kx #b(C6x ~y,uM%)!;&ݛۿEIjIsW;߲*wYSegu\xi4j[p}(7`=dߌ:M8:J D-Bk}8UizRf}=kQczsHsxA k.%~"d٨ENw=q.?sɺSzo{@wegfWY. ~ vtl8 N_ko=W\>SjL?x), L?Wma*'vFMZ6`U[BߦXZOR/rnphz];CV'f|;3\qxC7 |_GNj2a 8vp-EmKn:?d)$Y$ԪQ"JnPt~9!&?1ln *9 @7s/f93TU/uPYm3QOlC5@v PkJΞuçf&>ΰiѽ$r!>@f֘jɔ(1n㬎Yɣf3hu!uN&y`K9.;, }e /0k2k.Mc_{/]61#6Jj`H/u- ~5Ul=t*QM}WrFܮ1iPW@=E%xub26Bp~e&lfiB<"#зsp i sW O΢>ş>UBmAm%Mn]"a%e"1'"mhdUM.aoƳߑ!|6"pZsbΘ|\ &=a[0=phx I)ocLC㈀X(teKdDy%DO=ʹ"@[\;{zc0LxAGuշ EqrN(C}.\,ťXbIި<4L-K j?T0ɷ;8{WN~ݕ:ô]ͽ<06)4S"}ai4 )iaÜ [EϕTNQ թAҢ~΂)"܆J=Sto} ,ed0juݖz  Z3^n4+RѵݏMrOv0 P2=AHAk0o {.1Z&$h2Z?u?758sf.p$nR}$P%Nt8(Zx=N&&OPg>|[9 핡/sU?J0HRyb @CN_SoK>b~|erf,Vo:(/gOh q6|97g" 0.PM{e#K$YJB`XKwn7#\_rk6w d ;sV%8z>!|iKrzcCmCy~რ#&xPll; [onMrsYُiVas =#iWQ: y&=H!Ӗla-Go,Z0t!x ]4-҈',Drc8bgaxw٫Ţx Wq&ӝ\?dX%REܸ>PHxWf>wX. %-, cl2-Z?I_ Rq҅#RB-@*/v6VDh*ؠgW:M%Ftg9퓁tvz8Vչ5k|-B^" d {Y>M4oWN0 2[מL5t#"?50GtgVF8Rūv᪑l@w֝5F~EYV)]\JG(9L^ my&uX`ʾ!yQCKUAc'w\Ӝo~0OUQ!yBm@u >Z-5yakfS)WI.>U.2 U@$dWhpZՊ,* x[.IQ뛋@OZcb32 Gkg2dHY%t7PNaz'^dz z 䘲zx_~3y'^rPa o VS|Xvʉ-Y@r O ͘[Ylutq|yG߲+%Hݹ(6 H4~ɳ%@ .-$TZbjΰ̽(*:|9nv by-غj-oe|I} 8Sa_dv hA?a&oF]dMRM"݉3P_~@]*?\73GY#A"#P PkMkDb-A`Ɣ7Ti}dmvfxmTLURaVKyn&:͚{&`ިL,` -lA]{^p0 MObwc^ ?QDϿEw8] 竁Yq䮀9dXG(6"[߿xGٿ̓ AڸAO#yo ^VwNyŻQ@^to&BkՀjEW\A:ԩ' [2 H+5,P! xD7fH[\/Hmڒ?ďcg*$:ҳo`G3\FFNQ 7mz 5ekGb4CAlzPD}ڒXi]ȍ°_j2{EqY 4-a;CjkM6fxC(˴[ŗ(G;PbLj0ґP߰e\8Z5{wبTP:6 x# ]2 3Y W4czLbD48{ lcuf(~etԗ/ -J1gm^ _p%otuL.ʘ[O0^V@b!dgDg=ߕi>.L:D~,r`bz?~D%$.6P 1FUc#h;#K[Wu8ߖ)D9 ,>upS#r`!x Q]!%zW3F56I7/&JqƗǵ&\y)H{nSksVW^)q7y|;)Fw&/V£fT:*<~BɕWOMX;&ٍxd!l-ME`d N2F?aAh]=cdjük`^l!\z1)404".X_Q510X-F@'v~SuQ41g!ZH:k8bp)YDhNuQ@,w#t\7 2u&k]578I J(a], 3?BaKDEe(5vԧ`MH\eV'$#i78TMޮQacSBwl{ɮ8sGZVH< ۄۮ$bAC'6Q}m޿m:\c%0ݰ6W7V\fp-J'̗}(wht -cZ/.|03V| H S`^%E'x2d1˯tȊ$\]'[t4c7ok?1S ~ꫪ[DilEt|UA|-8=~L)8-pqBeq+3T˖VgG'g:ѣ}q\W';2m)13 Մħ"Km"j hl`qJKsK47~RCd߉TWXQ~K*T)1)N' z2C|(G`tZz߮Μ9X?Yx׉d5=U5Ij>]X+A<6:u~CIꌥ*ixV['LFɳ)۩&C/hs;Ԫ9A+c!Pk$ѓ W뼈=9P1 v@Ip _{<;$>e~uܦ.j[ick֝bF,Fs mIO+n񦹅.Q*KK>x Gyru![),c%/Įg>W YRMP>r4!!_*9*aSp;ܥi`4%nOrhiS|keme]j N73˽frJLYNĉ5r8 R/3dBhzW ,m4UضZzhdqJ9?(߳r<{/'Jo5:fp7vmH{5.҅¼N"ư|[H|(Sȓ*"EK%YCs>puWab`>+.qL7#{Qd:]@X<]et6tػS1𓖟tAK9&]42|`?H߭ Lj W)a ~mw 3Nܠoa* j-ܶ3"{Tks@"kKB>> ^"! :~Yi~*}R-@|(nUq:yOO.-*]#(jnԿ\@:pXVm,6_tO㲫\H!v8uK} 3 *Ib=cJE6&ĿH0՚+?,IkD:أbl!c[Ga <=*+~$(6G7I)* GL  ǚȄË5).r0T _u~l= MfvŷҒOo2|iAz"n9LʭPVfOkZHZjдH}w+|U d=m;77e pX"EU9 鹋9D9!1U٘MS RCƕR#5VdVX.F_0?ujf]-?b!+%ufjE?P򷢪4YO94iPk^R0w1:>區_y 3pkV߭WELyK ǨvRD "F&1vǷ<.1* wwe|ZqixI\b?'Mf_jМ_*n->{CqۈF t!pӝ8Wc>3*cADZ7$?<]gk86e?1`Eo+X F^ MV-ˣ{̈́mź;3AdRb?ܵ[WH$\\2JKRU9/?)Ϧ+beɧ8턀QO̖"¾F])1VC$Eow+0Bh^ux O" jQV[cnp1,stB es[✈ᒯ):nl!gc1J"h"IHxwވu9-8-'*Z(~HVXaՒ4|c/jI!x]'@/閃d 3:r8A\ˍnkϵƶ5nzY5i?_~OPX"z;ن,ndB XS}eS+K%T spJ8ŏ!2W9ꞁ~^"dcDψv\DVAɃ7ec)okvY]JeԅӪ)pifMat8hTt*I%2X Q`PIPpr:3!OS5u<0& ^QΫ-(h=e =?[;/?kYd)aRBZ4LOHJ##c#s, Ya5 ={^w֍;@iċ{\-HOMHD B I<9f$<%lXkukY]׽O$Lꯁ8 %AZ[X/r, tBo\Nt4et$3JaV=/zm¤gFs˸GߍuvYdޓvFͩݾ1de[cdp ~ k](J O=I[+9 / X0u\EWj5wRꮷY5OӣxrK 0Ւb?q.bC]/IWW%Tsb~oj?3t4 M:"@2S'KKXhﺇ*]IybU59G;bewf֕+EQmFT3O;"L2 tOS__<8pMy_]ĜP+'bmT3N+E֋igYUS}h$컪 %ȿz|=[h.w> quw!(L#@5^5&F|^wӆ~vML\N1c(8Pі5{^ݱs?G&A>2&v! 33|P} ApE=#bɔ?+C Fy>$Z *Htjp T^7#M39$W|<A6yh!苠-D{1Ce*rHh)v!iMCA'#<Ԟe5\IN})4#(Օ}5kEiQQhzJZAH|;7/\5ʄyM?Μ HdXk(3+6Ԧd|70H\ۉ@nv݁ BM\(T_@L"#}()^U,JRt[{Zuc|YM|hDϝOΖ߃³eE}ۥ?898YKwt/"!z כ o.#j =W}ؔyLB*r-;.WGPJ @BNM,uƀŔ5I[Jpu2ٴkn5a<_:r_6,k館;:>ST\vχIm3~ж]@ӷ2}1nO3 %u2L9\&$. Ίn{yOuo$XYGbwbQAY;ƢPR'҆{9Wڤ:־9awV#z')vZ\g]ͮ D1^uƃƏB́ AqkP5Ȯqɴe=IP'0_)huqjjT'GfRU9&zzB<׃ 428_'n)-ҏRtIG,x +srLz~T8A_zqL'*J7ut3նz=EZQ=:W%IB`i N3dKi$sQ)]6V4XDqm|&1(C৻H+{l{пqs&o\t^؛lj&Qkܚ_6:3gj?v~ -uu_:a;NTߊ]0AxZ U!ȏDM W6YLmM]lˆ2.6H%1(->GG/̨j[Yܷ{v:5x!~maP咂Y D,_e/r=Q w$ {9plOv9>TXOV@"EH׋.Po Q=m^WW{$y@Pt3>m'A+1;d--ٰ rƁ,Gj9rRd1IN={>""/9MQQS7wQՓr˺g@9N;Ɂ/)2"jCy;\o 2~w>pzzkx[@Ґ庝P:sP/ 04nOOOqJǍRܑ}8[n0 eZ~!\ BQ-qՕIߺw?m4dX}׭RuU@Nёb\#gvWrҍtlobK'~:5VU˲נl鸵zDajE5u; ѓr&-i@ld=5 SS=;T97j --,XĻ yEc^kwh] C. T8Q$&O2Tw82(1&]_練U}HJ%0U =\[Q[-&F fordȫhC;ҦZ]ŏi 363?D&~͍Bk D(K^iHbS߿tqL 7merC,A8,V'8$+xA԰uǶl{\ff8mϻ9SI!^tGU<"RxK#.} @LhwE6O-Wί%z!;-?~!K=TACv"ٟI,LQNQ$mcgIȅ5ΐCC[50LU6xCW3Nˆ0/0gɪʁjk*Y*},QZR՝r1wL08EHrrau,f}\uV1es;@=RYw s,wui¿ y4檯y~ɠ*7c: $g2mjvgH`|(|BUd6G%U4ZѱUaܷBM=Di=~G^h5d"q&HV;)fc׏+)z@C`1 EkUT9I)QA,kl,2)]7ڈoɧ!=8r"%V8rKPXa NvCۙ, ;ί:k|w5bXÙ|SGhq*Z{3*W*N5wWJ!gSd bBDc),V8b9#щUHxnYwEmfӓB⩰οwvz`EWcL{HhIbv˺3oI."w;wS/C B҆.9&Ʒ4t첫m97#h`]¶Wz;("ICpMo.ffF1n4AD!>%Ht[vCiN%Ai/# č}"~ $:H.{ѩs9o+RB7cu:Aq(1_ ]ֱF{6To`vyϬxk8~2p}C=(C ېRQ' Qjdvd['tӀKZsHߐ@oTmp6@^pKtT. ѳAf !t- ¦U4] +DW~18As\sM&^hRnaRiP*aovXS4W- h&QVD\ӕ^37/vc>l;xVhȕٰt]^bS%Ϊ Η$8;!t> Yq4tթjK.!x14 >E ϝ"c{86q[Joҁ۪\Imû?+.>:෰)_n^ehfL7Y r[B4+9Ns,"mE(Bw5,~AINl`ߙhJg ֎*'`+%i9 ͧSo PF`vq>8+wn65j T zbSM i$U:$*Dsؤuj|\k[mžh]k-&g8iȭC~5pt>%y htcၗ*InIfP*۹FXG9EٍNcP*(1<3 /# }{\yu +ʗLr3D:vH4\DS/CdGJ0rfD5v:!@Od)Oc] N'RYr/]^#x9))55lXr͘|rؤeT̢-~9j!+0P4Ƶ!_OBbP@-iې\_s`0J*y `nQqmϞW,׎0YuDI38~0>5Wu (jro9פ%vm~dcXspFIC5[R룩)0ǰ󂊱>܀Y*5-wӁۘ5ϝ􈷁@>[2<.mvNCf/KŻpUkv<D!Q#V`% uELz@CTe1 b>3S72cP񙿾~sM4,t*\}P퓅woW쓥Kxmw'eiKRxBC"&J &ҞL?BH`̅Wx4a!)cٻ*r,~{v+[YZh%u˳<9{;oF7;#NO1: z a{mлr}i.)!xb]3p 3wŐ2̦ y}lOZXK>z6`0+y0AE^Uertkk/,ur{ڦDA]Po?(Ѯ ytl爖 -! '=T "ɡXvm菦ɗ^=HVb;<7]#2֮uByi<*O)hsz"A_/ =^i*Mh\4Ȓ5h,Tb2 WŋZMm(Igm-q, +5os/ B7(}tDyKg*K͓sih\߮PIl:p3SΛY̎wW3T~@TU" rblk~m(d  cI&z9L%mY3 먏+ K3u&#f _,c2W0ODV)t9YA$Ǫgr2=oH}e)BT"&}#my#K FWƉQ'si`/ER陟\U9'] ^e%AwAKr>;$ods`9õTn8\X꾆r@ʤ\= aYhlqs!C5R3\#;ի|et|x<]QVԂ8m,_dmd.4tgX,R690 ր@&W_` a qOOcfc2YyOj\,T`/jlb78U&R rz\=tS}xcQAIǤ(B יb<mwn9DNb~"𾗂E}x+ɝoSCUiXص Hܡ *Ns5kLG#I0)I͘7(Y?i_މpr_t-lD+\[Ҷh~ eݡ.JM&lRB*`q̗AL-R8n.'zm/̌bȑs#&IzJQ'T当D(N _i^&=ô A逆A(ܫ\Gu;T[2pc~<נdRO=x2},ng_$5M`鯕 2,pZbK z"A•}-:Ѹ5Uo&fh31&ew1YیU>'-!_i" 4(Eh;#c ^ ҭ =特̙Ҵ γpܿXࡇ01k=.ܐDK)- Wg^,揬Smpv@KEsH0D.p,TDžikX[c*mdh ?8;_fyuZğ=;2J 6NzPXrdO-Y- <{>jaba$k2"Kuf_}jI7<oGl/dC\U;gd2<6'FQ  @"^Y |lO>_o{;IF_xş~U~۵{kpywo9n(fؘ AG{>Gxn}PxĹɕ9$;`#Ll#_p= ovkUezMl c$!w|ЅY+zB l(cu:F45%0i$$JmeL8T-%eJO~45ʶƔWs^BȻ WAk{ }P+*V!kOxmE 5Ī'dص8tJ7Z^b[)sp` 41nGu#"w6q2EDH#> vf47wˈrqIr" B~ (V7;U _-Z3-ɥl>.t&:/6>GN7FZƠoWwQQ, qdHR4t$.L[Wɋ'tj1*RDoΩr3$xfyZ1ao/~` vAƛU e LIH VcPЌND\G#;?~*> p;ω{AM Xj[7卫*o_y&#[ˆcSS Q_+f}bƤ_2~Z@6!/5Dz%eh.wi^a1R}F\ #d"GU]2K0?gt /R5x£"ef8(9yp&MiDED[7u;vu!dZBĬb9bg3msyԐ[U-M 3ioo |LNBn0DPKLLd|h"W6hVLqav_cw䐑dy5J7|emQ) opO~9 >xtYUl> [;6$:R'hdNIzAzw Êax+RK]Q2}uW)B<4M0C|u`6qQ*=\׵y4/#w<Ň!ҳP1sr2$iGvn,YW@pn)ÕV#hКW!2}b}+dDC e;pWUs0%,^zr!A+Qag/ۨia&RwEF58ɰ3lNyoŖZd=?VYRۓe8˫}l(_^ėj97ϖӕGu FP1=Oir҉p/d#44JZ^?;U;K}}mV3|8&Xݽi/% 09^j-e:GW m(vo.#[`e,o;!p%$UxZߌpmX)y~²HVC$ B䃖 KynUR*dhLעolA\ aD ƒFZ}mГ֌. \>!MDɤHفˍdXy. O@qyrDX Lo !af)1&1~ (Y~Pb0ȕ n'\ 7{mJρu$I6Ը8J(Acfd:&[wp|6 AQd"P<3e I'u[&v *Wpv8XD8TJK"#^2d_1-ݾqMBdȍ,C~Kȼ u(΋ n'U~whfRƝzć^w(G hR{ӥ8 ^ˏV[0~wKN_Ao|iՐcHk{Z:n)>?1d:'O92InDŝP71ާݭY9?i̠.҈N!}Qy;*9@?gpd8p=t`w$Xeb16 n|{Yl j 'uJ22(ufp$wH.PXp2ji&?ED/ nmJwG-Iދυ Ѣbj7ϩmCX`d k zӋr)XC6CqY:pŐ2(_GQk VZUCAK7>h%= @ jat\i n,p!0[xS00PCFo{N͚ev.SD D$; )E)/,.Yd S,EVX5Rz0}뮆tghGoh~!/hrKSo +|Elp}$s3)LXiAG؜Y:Qt:[;0Ĥ+ܶpyM%"߽ /d T0bVvD.tYp@t?UPx~R>+6:{r9S4Uڙ9Y.C #hr jdJȒ pI]b$KAV r'$AaLw-'FOqN' _uИ$^44#Q#_A}QՔF6rɰ=ݶXk^o`m%dm6!^!r'{{70/f|U94f25+֛Ǵcǟ``U z Gm-O5O7:҅pg i<_gڴCԜ.YX쨘H*f*.e%cU_ZPojY@4f~ﯞ(إ_-ǎ86ae{,8VCo\&(Л~=o8hqp:S= 4Q渇cIAG&A b\son_q@8jܕIǡްǍ?p%Lgh'FfǪyB.eOٵH%02&sh>?8Tr=؋Cv~nS6n ?l`Y փD١[!Ó۴]TA3RnϏro1Jl l!hSPtr4(6szI;ꖖn%BG4&MYsĐXc /9IFqn%8O6mVoz;:O5J3L}+dv6p٣ѵ6L^a[@vh+z7n^If^B\_K=D4-rT&{@x.~@.aiZ@$.( @\p,) YQ~IO5/rpw5*7 =qŠge-γJ-U>y,TFɳ2r[!,Zl!b8_:*5 *tD4]X%Sb(h~ ݅=C;qEcekW "教3ggr.BzQ4S78챙K74ţeDGT|.^Ѩ+0)5cO{G!D@Ƚ TEhSDBkY y//,r` I6(2j"q͠o|JO:#s>4eg`t s""? ;{L(JJJHS<ɟ٦a>U(0xX {pQh<.?.A٬A\T\ZP rёMFPHӳetaۦ+#:ӯ!N+~pu컴d-h%SX܁+7Edǂ^bwV ā+5`6&$El^&`z\GR?0ZT:n#v>F~WCDńN2Y$yhIٝ_oDEhExw. AfP^O5aA\uFwJ Xo\ҡ!x]i806CH2g]仱cڞ| YK7ec`GDZ`:Lf' ~on쟡r^ Ķ8B 8|rօx$ J<q+s/Sc9?\Q~s\+*K0;ćH6؂{#nv9)cIMLKҕ*y 7gbp4:R$9\0 opI㽜.OyErm>| co.uMXiϣTѷfdL~FK:;Q1%j?j1^a{"iСHPX}oPJq#΃"GF(`}ң&A=eʀk ӶI0G— g ~;쵔&= &?cI[*.e! )RX+ҌLYd(!B(&g(>&4:0?٧*@w.j|xU+TmO|M17̖ l+S4x?B[vXp唾lBS;y>neUjv K,<ފ@<b]3V2y_GC7T*q;9{`ߤQ`F>c. j\~Oz4%RoGq?ܓa2U``q.ݮj$y9gO3zÔ$v+jvv%5L=d/}#UK 1AcF}8x+uL} >כ{5Z?,!X(Bh.9=HA|Ś3 [W]UB!8V&_HtΆ{s曀82TF v@|!9qqF!6ev?eV13N.bF4^93>ѿCL;zE`Aq ; 64\.r$N~ {an3`U2{̮dSNNռ߽07I#7@F~Or` XS\ C" }ʔW_^_!hڷ,2΍c(hlcƘh޸%VR҄zAAwh6Tn-۳e.N:g1Y>oɞCf}9=&ҐC~'XXgՎhQ(l!ٖH+Zkxw.W"3GX8POW ޣ}q!c̒tQ",'~dt{x."0'GGA|Jzww$g D&dųMlz4֬6Eȹs`F}]F614e@ SH01Ph ~бEtvvi:U PLI_]2ٯ^!wWEwYN|YM\6>q3 {%ZƝ̎|"PK? jE qU0=6y(O3r)JT0ȗ_b39Qo{GQJ ^29,R]jdGR `-$:s.}۝"J!z-ZkeV:k}oߴMtښ\BkrRܹ53 *v.Ŏ pc-oI\`YM!_Z5U+OIbU joE ͉<)QTBA7Sě@(kh /uah1/H8u]x$ż, pJB,-A~v:`O[*qfkҩd~BY0"C)4{Se",8FH8ox89fS%sL#Ae tp|>rK>Udjj%·>E0r eHh}v`A8s|+2~\N$r%DI3gs A}нpþI<4//r1Q uBWJuh:O/X\7!EJF=ef]B,/늋 Y%e]=$Fؤ͘"{;c i& ]?%@DDߴa1 >;)e`$D[R 'Yuy^tXkQ4qe.DP[X}aN6 ./eǤ,}?q߁͒f?H \eÞ[8\/hH9  ԰ N QmU?D_TYr5^E0be"gүG(3d e_3* vId7 IW#F.ﱷatcR]G-?sGS-M*@qi b[u{3̔`Wn?<6t_ګk |9NHMDzs+;;W!β8oYHH"l򵐳 )ǯsO(ruG SQ^jv^Ϡf)@дCoh<, =TA S]Hуɸ|c>VLs>s3,&9~t -x@+=ڲNk~H]T_DE"}_PEM>^vK_(ksb_-q_Cm],;\Wq6 [-˄_}[U9yyWy!L.i&\߄9VJCMZn#@R. 䜞{cs[4 M_{J KL UPf!5~?85ΩzRf@+^kAR$+4q&@|C{ϐWpJYtvdfn.a]i~Uq'c $p/ ~ gbknrXj~7JQY#k0Xos }mM`"e\Etr6z3fmQCYKԄ])f;VTF,m[pu:![Do6K Ynjz])Re9(}{";~יI 1gF է;b+}H]a!W{LPFs8Q:mWK2O-17ܹ[jImm;"Og?޽<sϻxٝߒ-Μ2!6«6BK_ ժ">Ot`І) G{%BԎ!n[; ɪ2^?X7(QfܡdѺJov[9IO#k) Q:%6r,<̭OE8D0FNjhbWWˆ%ۨhvtt*QKQ0:Dl_+| $bO$l۬cF/a]VO,ύ黴gw֜XH${gV|OQG9%J;8w;T~`ֻ%Ώk81*F]%U68RH5S=e*]pZ~Az] 7Hͦa عF(ICp3˩$g﹡oD~zQ>8b/P1&&mвS^ k4be֩YŶ3MOd#Yfj C_!nӼ^x#UaeYH>|YCGiNқ5;KW . sIHzLb9t[מ G KŤ(mg$]SAw08@kl̦d_Lv뭐Bn5fx42'/0iOZȵdhm_\5> 62BdJu+:uω]*dZ(_;9,Y1Ԙu }JA78,Y T 9"H{u`Ie l'0+<R,#=xCG.WH0nЈ:q#qZۄ/%e˔6,,# F2Ew7(&7MJ rk_Tͱ3CV$<8#Q1Y3E hR%1 ڨ-d1NJXfXPY4ɑ.VK,qkؗDё"xUSxٚt˼QjcBX8,/H#28Z)M-@f(&bfsM_05pȭHҦ (zyNӬ؃l S^c98l}jޛҁFEtL01\XDo|A9]ĂZYv$ys}GT kūi`B`P 58ķl $<rr3 'L;6аr [)tzugeA8ڝM)%SxMoW}UʾKbղ;T7MdX'Sf4$¯`DYN8wXv1j ۧV7I!ͳ>ϛ(DWo&[ۏį,<E[M VảgQiyJ.F! zNm.ϞԴӮR|Z(I+ȷc28d#(9f4ܛ!p<亚"~3 8pG,ǞTF:2yo*<ϯn(kyqB,)KXq)UخO e2n^ Pp`GaΚMVKbG7<`o0{үÙqaݦp7e`@ֻNhN-EA7 ~-7G+Nz;eW@F/VS5f=شO ".UJetB&b!RƎ[FV[jQK'S`ID  5Œ٧a ߸_rQ^g! mp=TOzk2o' qBuqQI۟;4҉"Q}ޯs4k_US E"A>H;H0^V'F>nd[+pK-Y8.7+?9PAcEb/|;1??y Rm)NQVPݱ_5̪ ZV&gTt*}6C%P}kn@"z;?Cˀo`Չɂ'>LWhQȾr< mZw3,{+d(<ә-E)۵*~tf/(+ G8X'Iw(å'5/a20Q$(e­Bt>UO0Jǣ:syiR%e]\k:8#MS9Q3 6Kⱼ2"©75.5C웅AZ`Uʁ.j0jTg"ݥf!IWwK"Xc}MS<'U:;Ll5XQk5)Ƨ%yAe5epۥk%ѮeP[4|[&nXK R8>ǫJEPUi \gqAFQ>Ll9d^.΂/BeR5dd{ߙLjl I6~s,Ķ{MzrO ~w5`/0 k(eD%$B,K6Šb`zEqe ±ȈzSA!+;ƀ%J%>z?2ֿ:"6dycgLO(BR{%;Ŵy>母WKfHvݩUvln|>+2>!b#-jMMyD@-f.wPW )%ÔC溶\s*9MkGiYq]fQB f@mTdC~ 2j]77tW<æP+[]?V:*|xQd&q.ThxRbڠ>dqpXL;[bBf\)EI>oHTj<ASWFfp c U&%_Te5)|}#xTPٞ7/%ow "q--H 2ˎȢ@`>,ȴZs;J!?t{aH6ZKaWqve@\>^E4Ѿ^C&$Kϩǜ@ S񺳴? 2k>bEwR{~W(גLwCh Y\;xc8h9w9+})Gf G&L#%Vȶ4i۹B}vE7 yXUY`sI+ J`Y1n$wYѢf=Y5,ϥz3_~xd4f>d WU^X&9wE~4PG{Wd~+&»Z"/>]8G= cj̀͟ MGM&L`3" 穄h.lK.$ J%r?4ӳQEy/>wI7oCO)ˑ>;{XƭjP'>z*R+币j;#R2g`Ӧ!y[r^d{岈m}ZmI1TNWSn-!Q!W0p{j+d\x= BLp:vaTՋd=\?>sb`ؚ!}Ff㐥,6]:fna4 Pе-NVCAAHV%Op9< _ӯ\d;Xsp*,>[4m-"SMxU\.$] `aWF|2d"3eH8h3 eڌ.5gNmY@\&M1V'~掘(%THh?QĚV_@XwVћ4췽nV|@|\|)'n?1;BZSo>rx!E~_ @^A;+ظ{2} Z92A PFNu `j2f. fx+dM,%A!ۤ"ƪť.ѡi׾7C !%+9H|:D5nVSA3s﹌m >7Nվv2 qTa&A53.@% nIǒgaP)5[Zzr~MTbJ[-C'×T،|Z%߇4!HIB1Avm+| 5^>氅4Y! /'kٟ6]lyGtXlaF*?6#*ľʟ|?҅7QeH`z' f3 Xqc~n lfCףu'g΃ -Hlܻ">/nCg΃%'ꈀ( F,Y"NƺK5Fr4j3Fsc(ܬqLհ6xB G|x?qZ'>2{>H&.q6@r7_+L0ʵ6|{9n!F&`HmU!}ԯ" K\ʏ"3|ct>dgVCGFbC!n/H7Xց\7y-׫%˒*RSs>'9԰3_%ש; y:6+F~M ӹPSf|\H/#/xHZwzR>tʍNyqysڥL@qe^[w_ǭrdUlq = 1 tU-_׋y3'&6Q"r?PC (ASI`$z%ᮁiuu02泩m{\P>0&hYJ!{=tN!?NMOY(s}V9c鸸({, |byVu/4`lY#P>3Mcӡۃ[ Ь{Jl6:ՙӜkYnM  z0aYVR8Ê2qN9 ;Mo3' bSڞR/GcҔoIUV/ҩ7'h/%6R&3sAxIbj˲?VQ_akј^nf9oab$NO4ٲ>,bH'0#LrETy JdX+PPJPUkB4*%E3PX?P*Jbr7gYJ *-AKC˩fem` kbk9q6PJ~*k{:$`?mc)~(xjQ)[5Uo = c%l,B=q52-bI}ޗ Pcݚ7kjGQ6ox^_ nNK$S%Ź~.gU6J+'V)ڙEGъNI焚@-^~E; 3Jd" ۜbqEz2,*9+P5uJfxT3"z 9>VdZ35)JgQ> OێVswt&֪?@t4*C^Ì,yX ]jU6WK9s ̘"^ / k+(lE@ڕFimY uM >pr^IRlÃ>dusɳ42%߃Nckk70XTxK)< A ޿Id.{b8i&ڬT°ѧFLZmW)'mpaQw+X' y[ש"~YսrC5q҃oxMwĐȝ+u-cv҂ĝn'υVi7ewƉT3>}Ii.Te<-NT+7_W1X m"Edx +g>ǧnKp #Mo4. 2vGF M>Tv'iaN 2WVjU9՞V83&53`ZC _6_Lj2H!^ "7(N!xWnjGZjvxA0(+6b]X=aiD",O&5nTbڿ7L|Y|aF\M S;3@ WM?J b)WEu"#q){"ġVyCy^!xm]_=8oRHxpG &IlSjA뛰Dы@;I#GN \kApB\ҩIZ1ͤ#dKq$Y|I'Hjh:5VaI%䫻!Ξe϶gAɪ'y.XLH3:9URHȏ%=$1yu Y;In]ؓ탾WK]">v۠M[vƺD m̈Ko7UFL85F *m̘[9ݨni~ iWcxNoF  5"bIS#z(D:T Ģ3$6AܒX~\Vi~={{}uG>.'֘͑m3 w _B [ °5$s,]@z|hS8~7 >8>l=oI xVN4]kE~=7 |B\剾h\P#%[sa}qY7bRmB+xQXnhFuk&~'IRo{A w Ŋbr}Ov[_Щ[ h|5+*/tV\$ӰcbQmv$$foG/ 55, .J_x02;oBGg79Kn] ј54꿫Qmd/-#|4?^\24߿Y}`bq3E-?Нw=o vh < kȯE{ YL{I;<"HNWobɡ~A|a i>xXA`O$dǢT`v_}J35Iã9'R+Y<4H}a`8µ,l'**Pa5 5MWO:qGK{೾p +>oO!GٗF$̣)AǠ[}`}2&-)EiNy 8X&?PخU X*A'097l(@听xqj7p ZbBwjd*ܭ[QVfh#H|;@`ͧv8=],_YF t Brl~r _"#$;1 vmDS‚#đ a[qD# !~ۓҤr^< L 3[*qӗeC4}nNt{28o_L5fEz>9T1V7hpQߧPJlu0Pچht6G$h1׉7UɻV%4d(;ᴤFM]5+7X3&i s1tV 6d{}mQn$I4 jL,/> ♑-Hxb jm0)|iנ1G# MS؈y󻔘m+!S} |FK/B]!W% 61 be {`p]=tAB#k4Aڻnc>}V 7bRgò+~^lKE[6; U?3&rKwRE8YjvփA'd/-^h3hx&Me N6.\2.8]T.)ymO7]pꙶ]Q  4VasL۪S?̚ͅv >EdӬUk;]/d#(v]H[lksͻU_ 7I\P>rK}|g8G\G^5U`y8eE!J9~S2 ->ICN"pŘنY$eud(O4f"R+Qr:~H UK5d*iCJ74eˮR9"-dWzB bD97jr@ ޲$ӵTbeC})/jәf yHӞK/>7Z]'|)  \HH}]'<)eѲ55u%L'*{x/`e-r Ec&4hmլ{zM4 p B%c'v&ϭTP$C7 ''-]L2̺QE19LJV=C5[]iqK72ే90 w}s%Dko  (s7G#&d=t=<冼l6kOƐ,ɡ`TzbA'[ۜ6y@*t|HO ݣC I/_:d쐢tΪPBpWʨC'ӁɎ_xdS?ˤ$ꌀޗ^0CRtvluw3an$ݗq^QWá*5obT:Rtуd\ƋFnJN:Ԧv{ǥX#[T 9n\h\`5;{3b*qp NF\"ІE+ɀ@>K6] 2Ht&`:~Rk-#ٱXzn)q[N"4 xqyRE*HTBmMi*УH8cxkh֞K]U!QF`}iji&>q |ygۀsT&A,Feչ*/M?X=ɐ4_n>q]d^WOkVsgfrIV&%3V2-7{sEzT_;hGHr],JavW2qY]%׫\s,#K&1~W@9E8"g:7(EImz!-1zn=\0 [&7B9 Šܾ=3+0%!x 7]i ܔ}7ti=PЇCAn(>no6ѩytGW)w>a{ 9;@@VVb|5a`RѥHq!*q$VcE"%'Hї-?5 (6HFܰp@j$O z lvw(qem6 ˭ e;:?0%Ymn)[ IoVzA ?<'%:Ufly9Osu.,܈zަ8C0f'e# msg"ejmP0 #{a[k<;t~*ѓp ?X8V>lOUo1TK`ub`sϚk\h1 w/'~+Uj MZO딟_&q|]֓VSol nO^Ԫ= $/VN-;x;?%3J0:&z2'{ү'%q74}rLat]׬O8Sb LKH>Qjj烹m?>&K+NOڈ.U[\R$p8=01ʱ ; swC[Ȝ9/VI׆s*¿͉yiF/ćwq# DyAs@o%Ł芔g'b|m$*?v:GԎOE<;7D=$2&{hK 8|2\)2PJPe$Fr'Ol##! vzTxL WPے[t0qJmhyY1KVkXjEg$*E+aia 3xM32sA>8/wNk/GL*42ejF c)4ڙ8L d'd%IםQ}r_Hz/)]{.ϡQXŲP { Bzu"q[[F篈)b<‘3*F٣߲}SOf\"=`ļzܼ#e}2H$ Mo.S8-;=UWP)èq(h~\=.`+fc;BoAU5 8чU/UX<ݽ|7tt0 sTbDsk՟eo E8eͮBi a.5<O_D>[,^k߬"si} 3 U3Q R lϸoT|Kf?F_*!xo4K|֚t3}Tq /eʡBimGŌbPnâc8%qm%/Qm oH@DZWdً)#^zY궸Z^3c_ˏ ir39#Xqnc)%y8-?bF;\Q})\(s܅l(dGe#%!6n@e'pG|?moE^YC8_A?1i7nӸC/eC#>^z|.#JZqIݼ& oLkW:<&jXё`wϓh* Nr:+! :883@CYZt    -o00eo44o55J66c@!B((%32@ @(R==\`` @+$((66Y&,1(C<`|hh_8_PaP@@c ؏o(Jo7E@$" P;\|